Home > javascript, node.js, Programming > A simple node.js rss parser using sax-js

A simple node.js rss parser using sax-js

The xml parser sax-js written by Issacs (the creator of npm, the de facto package manager of node.js) comes with a few examples that deal with local xml files. I couldn’t find one that can parse xml data from remote host (think RSS) therefore I decided to write one. In this example I borrowed codes heavily from both sax-js’s example code and node-rss source code.

The codes

cat saxrss.js

var sax=require('sax');
var http=require('http');

var callback=function(){};

exports.get_rss=function(host,port,path, cb) {
	callback=cb
	var parser = sax.parser(true)
	var item = null
	var currentTag = null
	var items=[]
	var cnt=0

	parser.onclosetag = function (tagName) {
		var tag_name=tagName.toLowerCase();
		if (tag_name === 'item' || tag_name === 'entry') {
			currentTag = item = null
			cnt++
			return
		}
		if (currentTag && currentTag.parent) {
			var p = currentTag.parent
			delete currentTag.parent
			currentTag = p
		}
	}

	parser.onopentag = function (tag) {
		var tag_name=tag.name.toLowerCase()
		if (tag_name !== 'item' && tag_name !== 'entry' && !item) return
		if (tag_name === 'item') {
			item = tag
				items[cnt]={}
		}
		tag.parent = currentTag
		tag.children = []
		tag.parent && tag.parent.children.push(tag)
		currentTag = tag
	}

	parser.ontext = function (text) {
		if (currentTag) {
			items[cnt][currentTag.name.toLowerCase()]=text
		}
	}

	parser.onend = function () {
		callback(items)
	}

	var body='';
	http.get( { host:host, path:path, port:port }, function(res) {
		res.addListener('end', function() {
			parser.write(body).end()
		});
		res.setEncoding('utf8');
		res.on('data', function(d) {
			body+=d;
		});
	});
}

cat test1.js

var rss=require('./saxrss.js');
var host='feeds.finance.yahoo.com';
// to get finance headlines about stock AAPL
var path='/rss/2.0/headline?s=aapl&region=US&lang=en-US';

rss.get_rss(host, 80, path, function(items) {
	console.log(items);
});
To run

node test1.js

Required node modules:

sax

References:

node-rss
sax-js

[ UPDATE 2/20/2012 ]
With xml-simple module, the above example can be written as

// getting xml and convert to json object using xml-simple example
var http=require('http'), simplexml=require('xml-simple'), config= {host:'feeds.finance.yahoo.com', path:'/rss/2.0/headline?s=aapl&region=US&lang=en-US', port:80}, body='';

http.get( config, function( res ) {
	res.addListener('end', function() {
		simplexml.parse(body, function(e, parsed) {
			console.log(parsed.channel.item);
			//console.log(JSON.stringify(parsed));
		});
	});
	res.setEncoding('utf8');
	res.on('data', function(d) {
		body+=d;
	});
});

To install xml-simple, simply npm install -g xml-simple.

  1. No comments yet.
  1. No trackbacks yet.

Leave a comment