Skip to content

Commit 52ff9e1

Browse files
committed
Add OpenGraphScraperOptions.jsonLDOptions.throwOnJSONParseError and change default behavior to not throw on JSON-LD string parse errors
1 parent 686fcc5 commit 52ff9e1

3 files changed

Lines changed: 94 additions & 1 deletion

File tree

lib/extract.ts

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -99,7 +99,16 @@ export default function extractMetaTags(body: string, options: OpenGraphScraperO
9999
if (scriptText) {
100100
scriptText = scriptText.replace(/(\r\n|\n|\r)/gm, ''); // remove newlines
101101
scriptText = unescapeScriptText(scriptText);
102-
ogObject.jsonLD.push(JSON.parse(scriptText));
102+
try {
103+
ogObject.jsonLD.push(JSON.parse(scriptText));
104+
} catch (error: unknown) {
105+
if (options.jsonLDOptions?.logOnJSONParseError) {
106+
console.error('Error parsing JSON-LD script tag:', error);
107+
}
108+
if (options.jsonLDOptions?.throwOnJSONParseError) {
109+
throw error;
110+
}
111+
}
103112
}
104113
}
105114
});

lib/types.ts

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@ export interface OpenGraphScraperOptions {
3838
timeout?: number;
3939
url?: string;
4040
urlValidatorSettings?: ValidatorSettings;
41+
jsonLDOptions?: JSONLDOptions;
4142
}
4243

4344
/**
@@ -67,6 +68,14 @@ export interface ValidatorSettings {
6768
validate_length: boolean;
6869
}
6970

71+
/**
72+
* Options for the JSON-LD parser
73+
*/
74+
export interface JSONLDOptions {
75+
throwOnJSONParseError?: boolean;
76+
logOnJSONParseError?: boolean;
77+
}
78+
7079
/**
7180
* The type for user defined custom meta tags you want to scrape.
7281
*

tests/unit/static.spec.ts

Lines changed: 75 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -279,6 +279,81 @@ describe('static check meta tags', function () {
279279
});
280280
});
281281

282+
it('jsonLD - invalid JSON string that cannot be parsed does not throw error', function () {
283+
const metaHTML = `<html><head>
284+
<script type="application/ld+json">
285+
{
286+
"@context": "http://schema.org",
287+
"@type": "Organization",
288+
"name": "Aer ",
289+
290+
"logo": "https:\\\u003csvg xmlns=\\\\"http:\\\\/\\\\/www.w3.org\\\\/2000\\\\/svg\\\\" width=\\\\"75\\\\" height=\\\\"36\\\\" viewbox=\\\\"0 0 75 36\\\\" fill=\\\\"none\\\\"\\\u003e\\\\n\\\u003cpath d=\\\\"M12.266 0L0 34.633H8.674L10.911 27.797H22.072L24.249 34.633H33.188L21.031 0H12.266ZM19.808 20.69H13.236L16.567 10.513L19.808 20.69Z\\\\" fill=\\\\"white\\\\"\\\u003e\\\u003c\\\\/path\\\u003e\\\\n\\\u003cpath d=\\\\"M54.859 13.105C53.8044 11.4222 52.2911 10.0757 50.497 9.224C48.667 8.39754 46.6777 7.98375 44.67 8.012C43.0131 7.9555 41.3629 8.24795 39.8264 8.87038C38.2899 9.49281 36.9014 10.4313 35.751 11.625C33.463 14.025 32.303 17.481 32.303 21.91C32.303 26.654 33.603 30.147 36.155 32.292C38.5883 34.3718 41.6921 35.5006 44.893 35.47C48.878 35.47 52.032 34.188 54.265 31.67C55.5727 30.3563 56.4349 28.6651 56.73 26.835L56.817 26.066H49.368L49.182 26.432C48.9141 26.994 48.5379 27.4976 48.075 27.914C47.2248 28.5969 46.153 28.9429 45.064 28.886C43.9848 28.9182 42.9201 28.6316 42.003 28.062C41.3698 27.6198 40.8495 27.035 40.4839 26.3548C40.1182 25.6746 39.9176 24.918 39.898 24.146H57.049L57.059 23.468C57.1136 21.7984 57.0431 20.1271 56.848 18.468C56.5874 16.5558 55.9083 14.7246 54.859 13.105ZM40.137 18.526C40.3378 17.5453 40.7935 16.6347 41.458 15.886C41.877 15.4681 42.3806 15.1448 42.935 14.9378C43.4894 14.7308 44.0816 14.645 44.672 14.686C45.8585 14.6426 47.0162 15.0577 47.905 15.845C48.6522 16.5522 49.1254 17.5007 49.241 18.523L40.137 18.526Z\\\\" fill=\\\\"white\\\\"\\\u003e\\\u003c\\\\/path\\\u003e\\\\n\\\u003cpath d=\\\\"M73.4671 8.07805C71.6259 8.00296 69.8193 8.59448 68.3791 9.74405C67.8289 10.2221 67.3384 10.7646 66.9181 11.36V8.66005H59.4651V34.629H67.2541V22.188C67.1697 20.8038 67.3952 19.4181 67.9141 18.132C68.6731 16.678 70.1671 15.971 72.4821 15.971H74.9991V8.07104L73.4671 8.07805Z\\\\" fill=\\\\"white\\\\"\\\u003e\\\u003c\\\\/path\\\u003e\\\\n\\\u003c\\\\/svg\\\u003e",
291+
292+
"sameAs": [
293+
"https:\\\\/\\\\/twitter.com\\\\/aer_sf?lang=en"
294+
"https:\\\\/\\\\/www.facebook.com\\\\/aersf\\\\/"
295+
""
296+
"https:\\\\/\\\\/www.instagram.com\\\\/aer_sf\\\\/"
297+
""
298+
""
299+
"https:\\\\/\\\\/www.youtube.com\\\\/@aerdesigns"
300+
""
301+
],
302+
"url": "https:\\\\/\\\\/aersf.com"
303+
}
304+
305+
</script>
306+
</head></html>`;
307+
308+
mockAgent.get('http://www.test.com')
309+
.intercept({ path: '/' })
310+
.reply(200, metaHTML);
311+
312+
return ogs({ url: 'www.test.com' })
313+
.then(function (data) {
314+
expect(data.result.success).to.be.eql(true);
315+
expect(data.result.requestUrl).to.be.eql('http://www.test.com');
316+
expect(data.result.jsonLD).to.be.eql([]);
317+
expect(data.html).to.be.eql(metaHTML);
318+
expect(data.response).to.be.a('response');
319+
});
320+
});
321+
322+
it('jsonLD - invalid JSON string that cannot be parsed throws error when options.jsonLDOptions.throwOnJSONParseError = true', function () {
323+
const metaHTML = `<html><head>
324+
<script type="application/ld+json">
325+
{
326+
"@context": "http://schema.org",
327+
"@type": "Organization",
328+
"name": "Aer ",
329+
330+
"logo": "https:\\\u003csvg xmlns=\\\\"http:\\\\/\\\\/www.w3.org\\\\/2000\\\\/svg\\\\" width=\\\\"75\\\\" height=\\\\"36\\\\" viewbox=\\\\"0 0 75 36\\\\" fill=\\\\"none\\\\"\\\u003e\\\\n\\\u003cpath d=\\\\"M12.266 0L0 34.633H8.674L10.911 27.797H22.072L24.249 34.633H33.188L21.031 0H12.266ZM19.808 20.69H13.236L16.567 10.513L19.808 20.69Z\\\\" fill=\\\\"white\\\\"\\\u003e\\\u003c\\\\/path\\\u003e\\\\n\\\u003cpath d=\\\\"M54.859 13.105C53.8044 11.4222 52.2911 10.0757 50.497 9.224C48.667 8.39754 46.6777 7.98375 44.67 8.012C43.0131 7.9555 41.3629 8.24795 39.8264 8.87038C38.2899 9.49281 36.9014 10.4313 35.751 11.625C33.463 14.025 32.303 17.481 32.303 21.91C32.303 26.654 33.603 30.147 36.155 32.292C38.5883 34.3718 41.6921 35.5006 44.893 35.47C48.878 35.47 52.032 34.188 54.265 31.67C55.5727 30.3563 56.4349 28.6651 56.73 26.835L56.817 26.066H49.368L49.182 26.432C48.9141 26.994 48.5379 27.4976 48.075 27.914C47.2248 28.5969 46.153 28.9429 45.064 28.886C43.9848 28.9182 42.9201 28.6316 42.003 28.062C41.3698 27.6198 40.8495 27.035 40.4839 26.3548C40.1182 25.6746 39.9176 24.918 39.898 24.146H57.049L57.059 23.468C57.1136 21.7984 57.0431 20.1271 56.848 18.468C56.5874 16.5558 55.9083 14.7246 54.859 13.105ZM40.137 18.526C40.3378 17.5453 40.7935 16.6347 41.458 15.886C41.877 15.4681 42.3806 15.1448 42.935 14.9378C43.4894 14.7308 44.0816 14.645 44.672 14.686C45.8585 14.6426 47.0162 15.0577 47.905 15.845C48.6522 16.5522 49.1254 17.5007 49.241 18.523L40.137 18.526Z\\\\" fill=\\\\"white\\\\"\\\u003e\\\u003c\\\\/path\\\u003e\\\\n\\\u003cpath d=\\\\"M73.4671 8.07805C71.6259 8.00296 69.8193 8.59448 68.3791 9.74405C67.8289 10.2221 67.3384 10.7646 66.9181 11.36V8.66005H59.4651V34.629H67.2541V22.188C67.1697 20.8038 67.3952 19.4181 67.9141 18.132C68.6731 16.678 70.1671 15.971 72.4821 15.971H74.9991V8.07104L73.4671 8.07805Z\\\\" fill=\\\\"white\\\\"\\\u003e\\\u003c\\\\/path\\\u003e\\\\n\\\u003c\\\\/svg\\\u003e",
331+
332+
"sameAs": [
333+
"https:\\\\/\\\\/twitter.com\\\\/aer_sf?lang=en"
334+
"https:\\\\/\\\\/www.facebook.com\\\\/aersf\\\\/"
335+
""
336+
"https:\\\\/\\\\/www.instagram.com\\\\/aer_sf\\\\/"
337+
""
338+
""
339+
"https:\\\\/\\\\/www.youtube.com\\\\/@aerdesigns"
340+
""
341+
],
342+
"url": "https:\\\\/\\\\/aersf.com"
343+
}
344+
345+
</script>
346+
</head></html>`;
347+
348+
mockAgent.get('http://www.test.com')
349+
.intercept({ path: '/' })
350+
.reply(200, metaHTML);
351+
352+
return ogs({ url: 'www.test.com', jsonLDOptions: {throwOnJSONParseError: true} }).catch((data) => {
353+
expect(data.result.success).to.be.eql(false);
354+
});
355+
});
356+
282357
it('encoding - utf-8', function () {
283358
/* eslint-disable max-len */
284359
const metaHTML = `<html><head>

0 commit comments

Comments
 (0)