A simple node.js rss parser using sax-js
The xml parser sax-js written by Issacs (the creator of npm, the de facto package manager of node.js) comes with a few examples that deal with local xml files. I couldn’t find one that can parse xml data from remote host (think RSS) therefore I decided to write one. In this example I borrowed codes heavily from both sax-js’s example code and node-rss source code.
The codes
cat saxrss.js
var sax=require('sax'); var http=require('http'); var callback=function(){}; exports.get_rss=function(host,port,path, cb) { callback=cb var parser = sax.parser(true) var item = null var currentTag = null var items=[] var cnt=0 parser.onclosetag = function (tagName) { var tag_name=tagName.toLowerCase(); if (tag_name === 'item' || tag_name === 'entry') { currentTag = item = null cnt++ return } if (currentTag && currentTag.parent) { var p = currentTag.parent delete currentTag.parent currentTag = p } } parser.onopentag = function (tag) { var tag_name=tag.name.toLowerCase() if (tag_name !== 'item' && tag_name !== 'entry' && !item) return if (tag_name === 'item') { item = tag items[cnt]={} } tag.parent = currentTag tag.children = [] tag.parent && tag.parent.children.push(tag) currentTag = tag } parser.ontext = function (text) { if (currentTag) { items[cnt][currentTag.name.toLowerCase()]=text } } parser.onend = function () { callback(items) } var body=''; http.get( { host:host, path:path, port:port }, function(res) { res.addListener('end', function() { parser.write(body).end() }); res.setEncoding('utf8'); res.on('data', function(d) { body+=d; }); }); }
cat test1.js
var rss=require('./saxrss.js'); var host='feeds.finance.yahoo.com'; // to get finance headlines about stock AAPL var path='/rss/2.0/headline?s=aapl®ion=US&lang=en-US'; rss.get_rss(host, 80, path, function(items) { console.log(items); });
To run
node test1.js
Required node modules:
sax
References:
[ UPDATE 2/20/2012 ]
With xml-simple module, the above example can be written as
// getting xml and convert to json object using xml-simple example var http=require('http'), simplexml=require('xml-simple'), config= {host:'feeds.finance.yahoo.com', path:'/rss/2.0/headline?s=aapl®ion=US&lang=en-US', port:80}, body=''; http.get( config, function( res ) { res.addListener('end', function() { simplexml.parse(body, function(e, parsed) { console.log(parsed.channel.item); //console.log(JSON.stringify(parsed)); }); }); res.setEncoding('utf8'); res.on('data', function(d) { body+=d; }); });
To install xml-simple, simply npm install -g xml-simple
.
Categories: javascript, node.js, Programming
Comments (0)
Trackbacks (0)
Leave a comment
Trackback