Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .eslintignore
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
**/*.ts
98 changes: 98 additions & 0 deletions index.d.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,98 @@
/// <reference types="node" />

import stream = require("stream");

export = FeedParser;

declare class FeedParser extends stream.Transform {
constructor(options?: FeedParser.Options);
meta: FeedParser.Meta;
options: FeedParser.Options;

read(): FeedParser.Item | null;
resumeSaxError(): void;

on(event: 'meta', listener: (meta: FeedParser.Meta) => void): this;
on(event: 'readable', listener: (this: FeedParser) => void): this;
on(event: 'error', listener: (error: Error) => void): this;
on(event: string, listener: (...args: any[]) => void): this;

addListener(event: 'meta', listener: (meta: FeedParser.Meta) => void): this;
addListener(event: 'readable', listener: (this: FeedParser) => void): this;
addListener(event: 'error', listener: (error: Error) => void): this;
addListener(event: string, listener: (...args: any[]) => void): this;

once(event: 'meta', listener: (meta: FeedParser.Meta) => void): this;
once(event: 'readable', listener: (this: FeedParser) => void): this;
once(event: 'error', listener: (error: Error) => void): this;
once(event: string, listener: (...args: any[]) => void): this;
}

declare namespace FeedParser {
type Type = "atom" | "rss" | "rdf";

interface Options {
strict?: boolean;
normalize?: boolean;
addmeta?: boolean;
feedurl?: string;
resume_saxerror?: boolean;
MAX_BUFFER_LENGTH?: number;
}

interface Image {
url: string;
title: string;
}

interface Meta {
"#ns": Array<{ [key: string]: string }>;
"#type": Type;
"#version": string;
"@": { [key: string]: any };
title: string;
description: string;
date: Date | null;
pubdate: Date | null;
link: string;
xmlurl: string;
author: string;
language: string;
image: Image;
favicon: string;
copyright: string;
generator: string;
categories: string[];
[key: string]: any;
}

interface Enclosure {
url: string;
type?: string;
length?: string;
}

interface Source {
title: string;
url: string;
}

interface Item {
title: string;
description: string;
summary: string;
date: Date | null;
pubdate: Date | null;
link: string;
origlink: string;
author: string;
guid: string;
comments: string;
image: { url: string };
categories: string[];
source: Source;
enclosures: Enclosure[];
meta: Meta;
[key: string]: any;
}
}
124 changes: 114 additions & 10 deletions lib/feedparser/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -55,8 +55,8 @@ var sax = require('sax')
* - generator {String}
* - categories {Array}
*
* @param {Object} options
* @api public
* @this {FeedParserInstance}
* @param {import('../../index').Options} [options]
*/
function FeedParser (options) {
if (!(this instanceof FeedParser)) return new FeedParser(options);
Expand All @@ -72,9 +72,13 @@ function FeedParser (options) {
if (!('normalize' in this.options)) this.options.normalize = true;
if (!('addmeta' in this.options)) this.options.addmeta = true;
if (!('resume_saxerror' in this.options)) this.options.resume_saxerror = true;
// MAX_BUFFER_LENGTH is not part of the public API of sax, but we need to be
// able to handle nodes that are larger than the 64K default
if ('MAX_BUFFER_LENGTH' in this.options) {
// @ts-expect-error - private API of sax
sax.MAX_BUFFER_LENGTH = this.options.MAX_BUFFER_LENGTH; // set to Infinity to have unlimited buffers
} else {
// @ts-expect-error
sax.MAX_BUFFER_LENGTH = 16 * 1024 * 1024; // 16M versus the 64K default
}
if (this.options.feedurl) this.xmlbase.unshift({ '#name': 'xml', '#': this.options.feedurl});
Expand All @@ -96,6 +100,7 @@ util.inherits(FeedParser, TransformStream);
*
* Initializes the class-variables
*/
/** @this {FeedParserInstance} */
FeedParser.prototype.init = function (){
this.meta = {
'#ns': [],
Expand All @@ -113,6 +118,7 @@ FeedParser.prototype.init = function (){
this.errors = [];
};

/** @this {FeedParserInstance} */
FeedParser.prototype.handleEnd = function (){
// We made it to the end without throwing, but let's make sure we were actually
// parsing a feed
Expand All @@ -123,26 +129,33 @@ FeedParser.prototype.handleEnd = function (){
this.push(null);
};

/** @this {FeedParserInstance} */
FeedParser.prototype.handleSaxError = function (e) {
this.emit('error', e);
if (this.options.resume_saxerror) {
this.resumeSaxError();
}
};

/** @this {FeedParserInstance} */
FeedParser.prototype.resumeSaxError = function () {
if (this.stream._parser) {
this.stream._parser.error = null;
this.stream._parser.resume();
}
};

/** @this {FeedParserInstance} */
FeedParser.prototype.handleError = function (e){
this.emit('error', e);
};

// parses the xml declaration, which looks like:
// <?xml version="1.0" encoding="UTF-8" standalone="yes"?>
/**
* @this {FeedParserInstance}
* @param {SaxProcessingInstruction} node
*/
FeedParser.prototype.handleProcessingInstruction = function (node) {
if (node.name === 'xml') {
this.meta['#xml'] = node.body.trim().split(/\s+/).reduce(function (map, attr) {
Expand All @@ -155,6 +168,10 @@ FeedParser.prototype.handleProcessingInstruction = function (node) {
}
};

/**
* @this {FeedParserInstance}
* @param {import('sax').QualifiedTag} node
*/
FeedParser.prototype.handleOpenTag = function (node){
var n = {};
n['#name'] = node.name; // Avoid namespace collissions later...
Expand Down Expand Up @@ -204,6 +221,7 @@ FeedParser.prototype.handleOpenTag = function (node){
this.stack.unshift(n);
};

/** @this {FeedParserInstance} */
FeedParser.prototype.handleCloseTag = function (el){
var node = {
'#name': el,
Expand Down Expand Up @@ -355,6 +373,10 @@ FeedParser.prototype.handleCloseTag = function (el){
}
};

/**
* @this {FeedParserInstance}
* @param {string} text
*/
FeedParser.prototype.handleText = function (text){
if (this.in_xhtml) {
this.xhtml['#'] += text;
Expand All @@ -369,6 +391,12 @@ FeedParser.prototype.handleText = function (text){
}
};

/**
* @this {FeedParserInstance}
* @param {Object.<string, import('sax').QualifiedAttribute>} attrs
* @param {string} el
* @returns {Object.<string, string>}
*/
FeedParser.prototype.handleAttributes = function handleAttributes (attrs, el) {
/*
* Using the sax.js option { xmlns: true }
Expand All @@ -382,14 +410,14 @@ FeedParser.prototype.handleAttributes = function handleAttributes (attrs, el) {
*/

var basepath = ''
, simplifiedAttributes = {}
, simplifiedAttributes = /** @type {Object.<string, string>} */ ({})
;

if (this.xmlbase && this.xmlbase.length) {
basepath = this.xmlbase[0]['#'];
}

Object.keys(attrs).forEach(function(key){
Object.keys(attrs).forEach(/** @this {FeedParserInstance} */ function(key){
var attr = attrs[key]
, ns = {}
, prefix = ''
Expand Down Expand Up @@ -425,6 +453,13 @@ FeedParser.prototype.handleAttributes = function handleAttributes (attrs, el) {
return simplifiedAttributes;
};

/**
* @this {FeedParserInstance}
* @param {ParsedNode} node
* @param {import('../../index').Type} type
* @param {import('../../index').Options} options
* @returns {Object}
*/
FeedParser.prototype.handleMeta = function handleMeta (node, type, options) {
if (!type || !node) return {};

Expand Down Expand Up @@ -772,6 +807,13 @@ FeedParser.prototype.handleMeta = function handleMeta (node, type, options) {
return meta;
};

/**
* @this {FeedParserInstance}
* @param {ParsedNode} node
* @param {import('../../index').Type} type
* @param {import('../../index').Options} options
* @returns {Object}
*/
FeedParser.prototype.handleItem = function handleItem (node, type, options){
if (!type || !node) return {};

Expand Down Expand Up @@ -830,7 +872,7 @@ FeedParser.prototype.handleItem = function handleItem (node, type, options){
if (link['@']['rel'] == 'self' && !item.link) item.link = link['@']['href'];
if (link['@']['rel'] == 'replies') item.comments = link['@']['href'];
if (link['@']['rel'] == 'enclosure') {
enclosure = {};
enclosure = /** @type {import('../../index').Enclosure} */ ({});
enclosure.url = link['@']['href'];
enclosure.type = _.get(link['@'], 'type');
enclosure.length = _.get(link['@'], 'length');
Expand All @@ -853,7 +895,7 @@ FeedParser.prototype.handleItem = function handleItem (node, type, options){
if (el['@']['rel'] == 'self' && !item.link) item.link = el['@']['href'];
if (el['@']['rel'] == 'replies') item.comments = el['@']['href'];
if (el['@']['rel'] == 'enclosure') {
enclosure = {};
enclosure = /** @type {import('../../index').Enclosure} */ ({});
enclosure.url = el['@']['href'];
enclosure.type = _.get(el['@'], 'type');
enclosure.length = _.get(el['@'], 'length');
Expand Down Expand Up @@ -932,7 +974,7 @@ FeedParser.prototype.handleItem = function handleItem (node, type, options){
case('enclosure'):
if (Array.isArray(el)) {
el.forEach(function (enc){
enclosure = {};
enclosure = /** @type {import('../../index').Enclosure} */ ({});
enclosure.url = _.get(enc['@'], 'url');
enclosure.type = _.get(enc['@'], 'type');
enclosure.length = _.get(enc['@'], 'length');
Expand All @@ -943,7 +985,7 @@ FeedParser.prototype.handleItem = function handleItem (node, type, options){
}
});
} else {
enclosure = {};
enclosure = /** @type {import('../../index').Enclosure} */ ({});
enclosure.url = _.get(el['@'], 'url');
enclosure.type = _.get(el['@'], 'type');
enclosure.length = _.get(el['@'], 'length');
Expand All @@ -958,7 +1000,7 @@ FeedParser.prototype.handleItem = function handleItem (node, type, options){
var optionalAttributes = ['bitrate', 'framerate', 'samplingrate', 'duration', 'height', 'width'];
if (Array.isArray(el)) {
el.forEach(function (enc){
enclosure = {};
enclosure = /** @type {import('../../index').Enclosure} */ ({});
enclosure.url = _.get(enc['@'], 'url');
enclosure.type = _.get(enc['@'], 'type') || _.get(enc['@'], 'medium');
enclosure.length = _.get(enc['@'], 'filesize');
Expand All @@ -976,7 +1018,7 @@ FeedParser.prototype.handleItem = function handleItem (node, type, options){
}
});
} else {
enclosure = {};
enclosure = /** @type {import('../../index').Enclosure} */ ({});
enclosure.url = _.get(el['@'], 'url');
enclosure.type = _.get(el['@'], 'type') || _.get(el['@'], 'medium');
enclosure.length = _.get(el['@'], 'filesize');
Expand Down Expand Up @@ -1112,6 +1154,7 @@ FeedParser.prototype.handleItem = function handleItem (node, type, options){
};

// Naive Stream API
/** @this {FeedParserInstance} */
FeedParser.prototype._transform = function (data, encoding, done) {
try {
this.stream.write(data);
Expand All @@ -1123,6 +1166,7 @@ FeedParser.prototype._transform = function (data, encoding, done) {
}
};

/** @this {FeedParserInstance} */
FeedParser.prototype._flush = function (done) {
try {
this.stream.end();
Expand All @@ -1133,4 +1177,64 @@ FeedParser.prototype._flush = function (done) {
}
};

/**
* @typedef {Object} ParsedNode
* The internal accumulator object that handleOpenTag builds and pushes onto
* this.stack. Keys accumulate as child elements are parsed. String keys
* '#name', '#prefix', '#local', '#uri' hold element namespace info; '@' holds
* simplified attributes; '#' holds text content. Named keys hold child element
* values which may be strings, nested ParsedNodes, or arrays of either.
*/

/**
* @typedef {Object} XmlBaseEntry
* An entry in the this.xmlbase stack. The '#name' key holds the element name
* that established the base URL; the '#' key holds the xml:base URL value.
*/

/**
* @typedef {Object} SaxProcessingInstruction
* Payload of the sax 'processinginstruction' event.
* @property {string} name - Processing instruction target, e.g. "xml"
* @property {string} body - The rest of the processing instruction content
*/

/**
* @typedef {Object} AddressParserResult
* Shape of each item returned by the addressparser module.
* @property {string} [name]
* @property {string} [address]
*/

/**
* @typedef {Object} FeedParserState
* Instance properties set up by FeedParser.prototype.init and the constructor.
* @property {Object} meta - Parsed feed metadata; shape evolves during parsing
* @property {import('../../index').Options} options
* @property {Object.<string, string>} _namespaces
* @property {boolean} _emitted_meta
* @property {Array.<ParsedNode>} stack
* @property {Array.<XmlBaseEntry>} xmlbase
* @property {boolean} in_xhtml
* @property {Object} xhtml
* @property {Error[]} errors
* @property {import('sax').SAXStream} stream - The underlying sax stream
* @property {function(): void} init
* @property {function(): void} handleEnd
* @property {function(Error): void} handleSaxError
* @property {function(): void} resumeSaxError
* @property {function(Error): void} handleError
* @property {function(SaxProcessingInstruction): void} handleProcessingInstruction
* @property {function(import('sax').QualifiedTag): void} handleOpenTag
* @property {function(string): void} handleCloseTag
* @property {function(string): void} handleText
* @property {function(Object.<string, import('sax').QualifiedAttribute>, string): Object.<string, string>} handleAttributes
* @property {function(ParsedNode, import('../../index').Type, import('../../index').Options): Object} handleMeta
* @property {function(ParsedNode, import('../../index').Type, import('../../index').Options): Object} handleItem
*/

/**
* @typedef {import('readable-stream').Transform & FeedParserState} FeedParserInstance
*/

exports = module.exports = FeedParser;
Loading