How to parse XML from URL with Node
I don't see xml2js being able to parse xml by its chunks, so we need to buffer entire http response. To do that I have used global here, but it is better to use something like concat-stream
(I'll post that sometime later).
I have tried this one and it works for me:
var eyes = require('eyes'); var https = require('https'); var fs = require('fs'); var xml2js = require('xml2js'); var parser = new xml2js.Parser(); parser.on('error', function(err) { console.log('Parser error', err); }); var data = ''; https.get('https://tsdrapi.uspto.gov/ts/cd/casestatus/sn78787878/info.xml', function(res) { if (res.statusCode >= 200 && res.statusCode < 400) { res.on('data', function(data_) { data += data_.toString(); }); res.on('end', function() { console.log('data', data); parser.parseString(data, function(err, result) { console.log('FINISHED', err, result); }); }); } });
Only when response ends sending, then we parse XML. xml2js
uses sax
which seems to have streaming support but not sure if xml2js
takes advantage of it.
I have created small example which uses chunk-by-chunk parsing (similar like your example) but it fails giving error when parsing because in a chunk invalid xml arrives - that's why we need to buffer entire response.
If your xml is very big, try different parsers like sax
which have stream support.
You can also add error
handler to parser
to be able to print errors if it encounters them.
Concat stream
With concat stream you can more elegantly concat all .on('data'...)
calls:
var https = require('https');var xml2js = require('xml2js');var parser = new xml2js.Parser();var concat = require('concat-stream');parser.on('error', function(err) { console.log('Parser error', err); });https.get('https://tsdrapi.uspto.gov/ts/cd/casestatus/sn78787878/info.xml', function(resp) { resp.on('error', function(err) { console.log('Error while reading', err); }); resp.pipe(concat(function(buffer) { var str = buffer.toString(); parser.parseString(str, function(err, result) { console.log('Finished parsing:', err, result); }); }));});
You can use sax
to be able to not buffer entire file (in case your xml files are big), but it is more low level however, piping it as a stream is very similar.
Based on your question the solution should be something like this.
Both options are working as expected and give a valid json object of the xml.You can configure how to parse the xml as described in the read.me of xml2js
Native
var eyes = require('eyes'), https = require('https'), fs = require('fs'), xml2js = require('xml2js'), parser = new xml2js.Parser();https.get('https://tsdrapi.uspto.gov/ts/cd/casestatus/sn78787878/info.xml', function(res) { var response_data = ''; res.setEncoding('utf8'); res.on('data', function(chunk) { response_data += chunk; }); res.on('end', function() { parser.parseString(response_data, function(err, result) { if (err) { console.log('Got error: ' + err.message); } else { eyes.inspect(result); console.log('Done.'); } }); }); res.on('error', function(err) { console.log('Got error: ' + err.message); });});
ASYNC *Without the callback hell
var eyes = require('eyes'), https = require('https'), async =require('async'), xml2js = require('xml2js');async.waterfall([ function(callback) { https.get('https://tsdrapi.uspto.gov/ts/cd/casestatus/sn78787878/info.xml', function(res) { var response_data = ''; res.setEncoding('utf8'); res.on('data', function(chunk) { response_data += chunk; }); res.on('end', function() { callback(null, response_data) }); res.on('error', function(err) { callback(err); }); }); }, function(xml, callback) { var parser = new xml2js.Parser(); parser.parseString(xml, function(err, result) { if (err) { callback(err); } else { callback(null, result); } }); }, function(json, callback) { // do something usefull with the json eyes.inspect(json); callback(); }], function(err, result) { if (err) { console.log('Got error'); console.log(err); } else { console.log('Done.'); }});
Using xml2js
, it's very simple.
var parseString = require('xml2js').parseString;var xmldata = "XML output from the url";console.log(xmldata);parseString(xmldata, function (err, result) { // Result contains XML data in JSON format});