From 51b7a14763e9cfb9b27cdd22f26fea8836e494c5 Mon Sep 17 00:00:00 2001 From: Dan MacTough Date: Thu, 26 Mar 2026 19:06:44 -0400 Subject: [PATCH] Fix SAXError handling: stop emitting error when resume_saxerror is true Emitting an error event is incompatible with resuming parsing because readable-stream immediately unpibes the destination on error. When resume_saxerror is true (default), SAXErrors are now silently collected in feedparser.errors and parsing continues. When false, the error is emitted and parsing aborts. Adds tests for both behaviors. Co-Authored-By: Claude Sonnet 4.6 --- README.md | 9 ++++--- lib/feedparser/index.js | 10 +++++--- test/bad.js | 54 +++++++++++++++++++++++++++++++++++++++++ test/feeds/saxerror.xml | 26 ++++++++++++++++++++ 4 files changed, 91 insertions(+), 8 deletions(-) create mode 100644 test/feeds/saxerror.xml diff --git a/README.md b/README.md index bfef599..40dbeb3 100644 --- a/README.md +++ b/README.md @@ -85,10 +85,11 @@ You can also check out this nice [working implementation](https://github.com/scr you should set the `feedurl` option. Otherwise, feel free to ignore this option. - `resume_saxerror` - Set to `false` to override Feedparser's default behavior, which - is to emit any `SAXError` on `error` and then automatically resume parsing. In + is to silently handle them and then automatically resume parsing. In my experience, `SAXErrors` are not usually fatal, so this is usually helpful - behavior. If you want total control over handling these errors and optionally - aborting parsing the feed, use this option. + behavior. If you prefer to abort parsing the feed when there's a `SAXError`, + set `resume_saxerror` to `false`, which will cause the `SAXError` to be emitted + on `error` and abort parsing. ## Examples @@ -104,7 +105,7 @@ Each readable chunk is an object representing an article in the feed. ### Events Emitted * `meta` - called with feed `meta` when it has been parsed -* `error` - called with `error` whenever there is a Feedparser error of any kind (SAXError, Feedparser error, etc.) +* `error` - called with `error` whenever there is a fatal Feedparser error. SAXErrors are only emitted here when `resume_saxerror` is `false`; otherwise they are silently collected in `feedparser.errors`. ## What is the parsed output produced by feedparser? diff --git a/lib/feedparser/index.js b/lib/feedparser/index.js index da2861c..984aee0 100644 --- a/lib/feedparser/index.js +++ b/lib/feedparser/index.js @@ -131,10 +131,11 @@ FeedParser.prototype.handleEnd = function (){ /** @this {FeedParserInstance} */ FeedParser.prototype.handleSaxError = function (e) { - this.emit('error', e); - if (this.options.resume_saxerror) { - this.resumeSaxError(); + if (!this.options.resume_saxerror) { + return this.handleError(e); } + this.errors.push(e); + this.resumeSaxError(); }; /** @this {FeedParserInstance} */ @@ -146,7 +147,8 @@ FeedParser.prototype.resumeSaxError = function () { }; /** @this {FeedParserInstance} */ -FeedParser.prototype.handleError = function (e){ +FeedParser.prototype.handleError = function (e) { + this.errors.push(e); this.emit('error', e); }; diff --git a/test/bad.js b/test/bad.js index af987dd..b74b98e 100644 --- a/test/bad.js +++ b/test/bad.js @@ -67,4 +67,58 @@ describe('bad feeds', function(){ }); }); + + describe('SAXError handling', function () { + + // The fixture is a valid RSS feed with an unescaped & in one item's link. + // strict: true is required because feedparser defaults to non-strict SAX mode. + var feed = __dirname + '/feeds/saxerror.xml'; + + describe('resume_saxerror: true (default)', function () { + + it('should silently collect the error and continue parsing all items', function (done) { + var items = []; + var feedparser = new FeedParser({ strict: true }); + fs.createReadStream(feed).pipe(feedparser); + feedparser.on('readable', function () { + var item; + while ((item = this.read())) items.push(item.title); + }) + .on('error', function (err) { + done(err); + }) + .on('end', function () { + assert.equal(feedparser.errors.length, 1); + assert.ok(feedparser.errors[0] instanceof Error); + assert.equal(items.length, 3); + assert.deepEqual(items, ['Good Item', 'Bad Item', 'Item After Error']); + done(); + }); + }); + + }); + + describe('resume_saxerror: false', function () { + + it('should emit the SAXError and abort parsing', function (done) { + var items = []; + var feedparser = new FeedParser({ strict: true, resume_saxerror: false }); + fs.createReadStream(feed).pipe(feedparser); + feedparser.on('readable', function () { + var item; + while ((item = this.read())) items.push(item.title); + }) + .on('error', function (err) { + assert.ok(err instanceof Error); + assert.equal(feedparser.errors.length, 1); + assert.strictEqual(feedparser.errors[0], err); + // Only the item before the error should have been parsed + assert.deepEqual(items, ['Good Item']); + done(); + }); + }); + + }); + + }); }); diff --git a/test/feeds/saxerror.xml b/test/feeds/saxerror.xml new file mode 100644 index 0000000..6347035 --- /dev/null +++ b/test/feeds/saxerror.xml @@ -0,0 +1,26 @@ + + + + SAXError Test Feed + http://example.com/ + A feed with a SAX error in one item. + + Good Item + http://example.com/good + This item is fine. + http://example.com/good + + + Bad Item + http://example.com/bad?foo=1&bar=2 + This item has an unescaped ampersand in the link. + http://example.com/bad + + + Item After Error + http://example.com/after + This item comes after the SAX error. + http://example.com/after + + +