How to parse XML from URL with Node How to parse XML from URL with Node xml xml

How to parse XML from URL with Node


I don't see xml2js being able to parse xml by its chunks, so we need to buffer entire http response. To do that I have used global here, but it is better to use something like concat-stream (I'll post that sometime later).

I have tried this one and it works for me:

 var eyes = require('eyes'); var https = require('https'); var fs = require('fs'); var xml2js = require('xml2js'); var parser = new xml2js.Parser(); parser.on('error', function(err) { console.log('Parser error', err); }); var data = ''; https.get('https://tsdrapi.uspto.gov/ts/cd/casestatus/sn78787878/info.xml', function(res) {     if (res.statusCode >= 200 && res.statusCode < 400) {       res.on('data', function(data_) { data += data_.toString(); });       res.on('end', function() {         console.log('data', data);         parser.parseString(data, function(err, result) {           console.log('FINISHED', err, result);         });       });     }   });

Only when response ends sending, then we parse XML. xml2js uses sax which seems to have streaming support but not sure if xml2js takes advantage of it.

I have created small example which uses chunk-by-chunk parsing (similar like your example) but it fails giving error when parsing because in a chunk invalid xml arrives - that's why we need to buffer entire response.

If your xml is very big, try different parsers like sax which have stream support.

You can also add error handler to parser to be able to print errors if it encounters them.

Concat stream

With concat stream you can more elegantly concat all .on('data'...) calls:

var https = require('https');var xml2js = require('xml2js');var parser = new xml2js.Parser();var concat = require('concat-stream');parser.on('error', function(err) { console.log('Parser error', err); });https.get('https://tsdrapi.uspto.gov/ts/cd/casestatus/sn78787878/info.xml', function(resp) {    resp.on('error', function(err) {      console.log('Error while reading', err);    });    resp.pipe(concat(function(buffer) {      var str = buffer.toString();      parser.parseString(str, function(err, result) {        console.log('Finished parsing:', err, result);      });    }));});

You can use sax to be able to not buffer entire file (in case your xml files are big), but it is more low level however, piping it as a stream is very similar.


Based on your question the solution should be something like this.

Both options are working as expected and give a valid json object of the xml.You can configure how to parse the xml as described in the read.me of xml2js

Native


var eyes = require('eyes'),    https = require('https'),    fs = require('fs'),    xml2js = require('xml2js'),    parser = new xml2js.Parser();https.get('https://tsdrapi.uspto.gov/ts/cd/casestatus/sn78787878/info.xml', function(res) {    var response_data = '';    res.setEncoding('utf8');    res.on('data', function(chunk) {        response_data += chunk;    });    res.on('end', function() {        parser.parseString(response_data, function(err, result) {            if (err) {                console.log('Got error: ' + err.message);            } else {                eyes.inspect(result);                console.log('Done.');            }        });    });    res.on('error', function(err) {        console.log('Got error: ' + err.message);    });});

ASYNC *Without the callback hell


var eyes = require('eyes'),    https = require('https'),    async =require('async'),    xml2js = require('xml2js');async.waterfall([    function(callback) {        https.get('https://tsdrapi.uspto.gov/ts/cd/casestatus/sn78787878/info.xml', function(res) {            var response_data = '';            res.setEncoding('utf8');            res.on('data', function(chunk) {                response_data += chunk;            });            res.on('end', function() {                callback(null, response_data)            });            res.on('error', function(err) {                callback(err);            });        });    },    function(xml, callback) {        var parser = new xml2js.Parser();        parser.parseString(xml, function(err, result) {            if (err) {                callback(err);            } else {                callback(null, result);            }        });    },     function(json, callback) {        // do something usefull with the json        eyes.inspect(json);        callback();    }], function(err, result) {    if (err) {        console.log('Got error');        console.log(err);    } else {        console.log('Done.');    }});


Using xml2js, it's very simple.

var parseString = require('xml2js').parseString;var xmldata = "XML output from the url";console.log(xmldata);parseString(xmldata, function (err, result) { // Result contains XML data in JSON format});