Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 5 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -85,10 +85,11 @@ You can also check out this nice [working implementation](https://github.com/scr
you should set the `feedurl` option. Otherwise, feel free to ignore this option.

- `resume_saxerror` - Set to `false` to override Feedparser's default behavior, which
is to emit any `SAXError` on `error` and then automatically resume parsing. In
is to silently handle them and then automatically resume parsing. In
my experience, `SAXErrors` are not usually fatal, so this is usually helpful
behavior. If you want total control over handling these errors and optionally
aborting parsing the feed, use this option.
behavior. If you prefer to abort parsing the feed when there's a `SAXError`,
set `resume_saxerror` to `false`, which will cause the `SAXError` to be emitted
on `error` and abort parsing.

## Examples

Expand All @@ -104,7 +105,7 @@ Each readable chunk is an object representing an article in the feed.
### Events Emitted

* `meta` - called with feed `meta` when it has been parsed
* `error` - called with `error` whenever there is a Feedparser error of any kind (SAXError, Feedparser error, etc.)
* `error` - called with `error` whenever there is a fatal Feedparser error. SAXErrors are only emitted here when `resume_saxerror` is `false`; otherwise they are silently collected in `feedparser.errors`.

## What is the parsed output produced by feedparser?

Expand Down
10 changes: 6 additions & 4 deletions lib/feedparser/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -131,10 +131,11 @@ FeedParser.prototype.handleEnd = function (){

/** @this {FeedParserInstance} */
FeedParser.prototype.handleSaxError = function (e) {
this.emit('error', e);
if (this.options.resume_saxerror) {
this.resumeSaxError();
if (!this.options.resume_saxerror) {
return this.handleError(e);
}
this.errors.push(e);
this.resumeSaxError();
};

/** @this {FeedParserInstance} */
Expand All @@ -146,7 +147,8 @@ FeedParser.prototype.resumeSaxError = function () {
};

/** @this {FeedParserInstance} */
FeedParser.prototype.handleError = function (e){
FeedParser.prototype.handleError = function (e) {
this.errors.push(e);
this.emit('error', e);
};

Expand Down
54 changes: 54 additions & 0 deletions test/bad.js
Original file line number Diff line number Diff line change
Expand Up @@ -67,4 +67,58 @@ describe('bad feeds', function(){
});

});

describe('SAXError handling', function () {

// The fixture is a valid RSS feed with an unescaped & in one item's link.
// strict: true is required because feedparser defaults to non-strict SAX mode.
var feed = __dirname + '/feeds/saxerror.xml';

describe('resume_saxerror: true (default)', function () {

it('should silently collect the error and continue parsing all items', function (done) {
var items = [];
var feedparser = new FeedParser({ strict: true });
fs.createReadStream(feed).pipe(feedparser);
feedparser.on('readable', function () {
var item;
while ((item = this.read())) items.push(item.title);
})
.on('error', function (err) {
done(err);
})
.on('end', function () {
assert.equal(feedparser.errors.length, 1);
assert.ok(feedparser.errors[0] instanceof Error);
assert.equal(items.length, 3);
assert.deepEqual(items, ['Good Item', 'Bad Item', 'Item After Error']);
done();
});
});

});

describe('resume_saxerror: false', function () {

it('should emit the SAXError and abort parsing', function (done) {
var items = [];
var feedparser = new FeedParser({ strict: true, resume_saxerror: false });
fs.createReadStream(feed).pipe(feedparser);
feedparser.on('readable', function () {
var item;
while ((item = this.read())) items.push(item.title);
})
.on('error', function (err) {
assert.ok(err instanceof Error);
assert.equal(feedparser.errors.length, 1);
assert.strictEqual(feedparser.errors[0], err);
// Only the item before the error should have been parsed
assert.deepEqual(items, ['Good Item']);
done();
});
});

});

});
});
26 changes: 26 additions & 0 deletions test/feeds/saxerror.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
<?xml version="1.0"?>
<rss version="2.0">
<channel>
<title>SAXError Test Feed</title>
<link>http://example.com/</link>
<description>A feed with a SAX error in one item.</description>
<item>
<title>Good Item</title>
<link>http://example.com/good</link>
<description>This item is fine.</description>
<guid>http://example.com/good</guid>
</item>
<item>
<title>Bad Item</title>
<link>http://example.com/bad?foo=1&bar=2</link>
<description>This item has an unescaped ampersand in the link.</description>
<guid>http://example.com/bad</guid>
</item>
<item>
<title>Item After Error</title>
<link>http://example.com/after</link>
<description>This item comes after the SAX error.</description>
<guid>http://example.com/after</guid>
</item>
</channel>
</rss>