Skip to content

Commit e8b4789

Browse files
authored
Merge pull request #302 from danmactough/fix-saxerror-handling
Fix SAXError handling: stop emitting error when resume_saxerror is true
2 parents 4606b49 + 51b7a14 commit e8b4789

4 files changed

Lines changed: 91 additions & 8 deletions

File tree

README.md

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -85,10 +85,11 @@ You can also check out this nice [working implementation](https://github.com/scr
8585
you should set the `feedurl` option. Otherwise, feel free to ignore this option.
8686

8787
- `resume_saxerror` - Set to `false` to override Feedparser's default behavior, which
88-
is to emit any `SAXError` on `error` and then automatically resume parsing. In
88+
is to silently handle them and then automatically resume parsing. In
8989
my experience, `SAXErrors` are not usually fatal, so this is usually helpful
90-
behavior. If you want total control over handling these errors and optionally
91-
aborting parsing the feed, use this option.
90+
behavior. If you prefer to abort parsing the feed when there's a `SAXError`,
91+
set `resume_saxerror` to `false`, which will cause the `SAXError` to be emitted
92+
on `error` and abort parsing.
9293

9394
## Examples
9495

@@ -104,7 +105,7 @@ Each readable chunk is an object representing an article in the feed.
104105
### Events Emitted
105106

106107
* `meta` - called with feed `meta` when it has been parsed
107-
* `error` - called with `error` whenever there is a Feedparser error of any kind (SAXError, Feedparser error, etc.)
108+
* `error` - called with `error` whenever there is a fatal Feedparser error. SAXErrors are only emitted here when `resume_saxerror` is `false`; otherwise they are silently collected in `feedparser.errors`.
108109

109110
## What is the parsed output produced by feedparser?
110111

lib/feedparser/index.js

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -131,10 +131,11 @@ FeedParser.prototype.handleEnd = function (){
131131

132132
/** @this {FeedParserInstance} */
133133
FeedParser.prototype.handleSaxError = function (e) {
134-
this.emit('error', e);
135-
if (this.options.resume_saxerror) {
136-
this.resumeSaxError();
134+
if (!this.options.resume_saxerror) {
135+
return this.handleError(e);
137136
}
137+
this.errors.push(e);
138+
this.resumeSaxError();
138139
};
139140

140141
/** @this {FeedParserInstance} */
@@ -146,7 +147,8 @@ FeedParser.prototype.resumeSaxError = function () {
146147
};
147148

148149
/** @this {FeedParserInstance} */
149-
FeedParser.prototype.handleError = function (e){
150+
FeedParser.prototype.handleError = function (e) {
151+
this.errors.push(e);
150152
this.emit('error', e);
151153
};
152154

test/bad.js

Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -67,4 +67,58 @@ describe('bad feeds', function(){
6767
});
6868

6969
});
70+
71+
describe('SAXError handling', function () {
72+
73+
// The fixture is a valid RSS feed with an unescaped & in one item's link.
74+
// strict: true is required because feedparser defaults to non-strict SAX mode.
75+
var feed = __dirname + '/feeds/saxerror.xml';
76+
77+
describe('resume_saxerror: true (default)', function () {
78+
79+
it('should silently collect the error and continue parsing all items', function (done) {
80+
var items = [];
81+
var feedparser = new FeedParser({ strict: true });
82+
fs.createReadStream(feed).pipe(feedparser);
83+
feedparser.on('readable', function () {
84+
var item;
85+
while ((item = this.read())) items.push(item.title);
86+
})
87+
.on('error', function (err) {
88+
done(err);
89+
})
90+
.on('end', function () {
91+
assert.equal(feedparser.errors.length, 1);
92+
assert.ok(feedparser.errors[0] instanceof Error);
93+
assert.equal(items.length, 3);
94+
assert.deepEqual(items, ['Good Item', 'Bad Item', 'Item After Error']);
95+
done();
96+
});
97+
});
98+
99+
});
100+
101+
describe('resume_saxerror: false', function () {
102+
103+
it('should emit the SAXError and abort parsing', function (done) {
104+
var items = [];
105+
var feedparser = new FeedParser({ strict: true, resume_saxerror: false });
106+
fs.createReadStream(feed).pipe(feedparser);
107+
feedparser.on('readable', function () {
108+
var item;
109+
while ((item = this.read())) items.push(item.title);
110+
})
111+
.on('error', function (err) {
112+
assert.ok(err instanceof Error);
113+
assert.equal(feedparser.errors.length, 1);
114+
assert.strictEqual(feedparser.errors[0], err);
115+
// Only the item before the error should have been parsed
116+
assert.deepEqual(items, ['Good Item']);
117+
done();
118+
});
119+
});
120+
121+
});
122+
123+
});
70124
});

test/feeds/saxerror.xml

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
<?xml version="1.0"?>
2+
<rss version="2.0">
3+
<channel>
4+
<title>SAXError Test Feed</title>
5+
<link>http://example.com/</link>
6+
<description>A feed with a SAX error in one item.</description>
7+
<item>
8+
<title>Good Item</title>
9+
<link>http://example.com/good</link>
10+
<description>This item is fine.</description>
11+
<guid>http://example.com/good</guid>
12+
</item>
13+
<item>
14+
<title>Bad Item</title>
15+
<link>http://example.com/bad?foo=1&bar=2</link>
16+
<description>This item has an unescaped ampersand in the link.</description>
17+
<guid>http://example.com/bad</guid>
18+
</item>
19+
<item>
20+
<title>Item After Error</title>
21+
<link>http://example.com/after</link>
22+
<description>This item comes after the SAX error.</description>
23+
<guid>http://example.com/after</guid>
24+
</item>
25+
</channel>
26+
</rss>

0 commit comments

Comments
 (0)