Skip to content

Commit d3fe2d9

Browse files
committed
2 parents 35a382f + fb811f6 commit d3fe2d9

1 file changed

Lines changed: 89 additions & 10 deletions

File tree

src/common.ts

Lines changed: 89 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,16 @@ const COLORS = {
88
Crash: "#E52F18",
99
};
1010

11+
const happeningsKeywords = ['stated', 'landed', 'departed', 'performed'];
12+
const locationKeywords = [', was', ', had been'];
13+
14+
function formatLocationAndDate(locationExists: boolean, location: string, date: string) {
15+
let str = (locationExists ? location : '') + date;
16+
str = str.trim();
17+
str = str[0].toUpperCase() + str.slice(1);
18+
return str;
19+
}
20+
1121
export async function generateHTML(articleId: string) {
1222
const url = `https://avherald.com/h?article=${articleId}`;
1323

@@ -50,20 +60,89 @@ export async function generateHTML(articleId: string) {
5060
.map((node, i) => {
5161
if (node.nodeType === NodeType.TEXT_NODE) {
5262
if (i === 0) {
53-
// extract the location, which means the stuff between "was" and "when", and what happened, which begins after "when".
63+
// extract the location, which means the stuff between a location keyword and "when" (or when sentence ends and a new one begins), and what happened ("happenings"), which begins after "when" (or the next sentence).
5464
const nodeTextContent = node.text;
55-
const wasIndex = nodeTextContent.indexOf(' was ');
56-
const whenIndex = nodeTextContent.indexOf(' when ');
65+
const locationStartIndex = Math.min(
66+
...locationKeywords.map((keyword) => {
67+
const index = nodeTextContent.indexOf(keyword)
5768

58-
// if things go unexpectedly, abort
59-
if (wasIndex >= whenIndex || wasIndex < 0) return node.text;
69+
if (index >= 0) {
70+
return index + keyword.length;
71+
}
6072

61-
let location = nodeTextContent.substring(wasIndex + 5, whenIndex);
62-
location = location[0].toUpperCase() + location.slice(1);
63-
let happenings = nodeTextContent.substring(whenIndex + 6);
64-
happenings = happenings[0].toUpperCase() + happenings.slice(1);
73+
return index;
74+
}).filter((i) => i >= 0)
75+
);
6576

66-
return `📌 ${location + date}\n\n${happenings}`;
77+
const whenIndex = nodeTextContent.indexOf(' when ');
78+
79+
// index of the first occurence of a sentence ending in a full-stop and the next beginning with a capital letter
80+
const firstSentenceBoundaryIndex = Math.min(
81+
...nodeTextContent.split('.').map((sentence, i, arr) => {
82+
if (sentence.length === 0) return Infinity; // empty sentence ... wouldnt want to select one of those
83+
84+
// the index of the next sentence is the sum of the lengths of all previous sentences + fullstops
85+
const nextIndex = arr.slice(0, i + 1).map(sentence => sentence.length + 1).reduce((partialSum, currentValue) => partialSum + currentValue, 0);
86+
87+
const nextsentence = arr[i + 1];
88+
89+
// if this is the last sentence, return index of this sentence's fullstop
90+
if (i === arr.length - 1) {
91+
return nextIndex - 1;
92+
}
93+
94+
// if this isnt the last sentence and the next one begins with a space and capital letter, return index of this sentence's fullstop
95+
if (i < arr.length - 1 && nextsentence[0] === ' ' && nextsentence[1] === nextsentence[1].toUpperCase()) return nextIndex - 1;
96+
97+
// fallback (does this ever happen?)
98+
return Infinity;
99+
})
100+
);
101+
102+
// based on where "when" and the first sentence boundary are, calculate where the location part ends and "happenings" starts
103+
let happeningsStartIndex: number;
104+
105+
const locationEndIndex = (() => {
106+
if (whenIndex === -1 || firstSentenceBoundaryIndex < whenIndex) {
107+
// we go with sentence
108+
happeningsStartIndex = firstSentenceBoundaryIndex + 2;
109+
return firstSentenceBoundaryIndex;
110+
} else {
111+
// we go with "when"
112+
happeningsStartIndex = whenIndex + 6;
113+
return whenIndex;
114+
}
115+
})();
116+
117+
let locationExists: boolean;
118+
let location: string;
119+
let happenings: string;
120+
121+
// if somehow the location is supposed to start before it ends, or if it has no start, then there is no location
122+
if (locationStartIndex >= locationEndIndex || locationStartIndex <= 0) {
123+
// location doesnt exist
124+
locationExists = false;
125+
// so "happenings" starts with keyword, not "when"
126+
happeningsStartIndex = Math.min( // whichever is first in the text
127+
...happeningsKeywords
128+
.map(word => nodeTextContent.indexOf(' ' + word + ' ')) // map by index of first occurence
129+
.filter(i => i >= 0) // discard situationkeywords that dont exist in the text, because Math.min() prefers them (-1)
130+
) + 1;
131+
} else {
132+
// location exists
133+
location = nodeTextContent.slice(locationStartIndex, locationEndIndex);
134+
location = location[0].toUpperCase() + location.slice(1);
135+
locationExists = true;
136+
}
137+
138+
if (happenings === '') {
139+
happenings = nodeTextContent;
140+
} else {
141+
happenings = nodeTextContent.slice(happeningsStartIndex);
142+
happenings = happenings[0].toUpperCase() + happenings.slice(1);
143+
}
144+
145+
return `📌 ${formatLocationAndDate(locationExists, location, date)}\n\n${happenings}`;
67146
} else return node.text;
68147
} else {
69148
if (node.nodeType == NodeType.ELEMENT_NODE && (node as unknown as Element).tagName === "BR") {

0 commit comments

Comments
 (0)