@@ -8,6 +8,16 @@ const COLORS = {
88 Crash : "#E52F18" ,
99} ;
1010
11+ const happeningsKeywords = [ 'stated' , 'landed' , 'departed' , 'performed' ] ;
12+ const locationKeywords = [ ', was' , ', had been' ] ;
13+
14+ function formatLocationAndDate ( locationExists : boolean , location : string , date : string ) {
15+ let str = ( locationExists ? location : '' ) + date ;
16+ str = str . trim ( ) ;
17+ str = str [ 0 ] . toUpperCase ( ) + str . slice ( 1 ) ;
18+ return str ;
19+ }
20+
1121export async function generateHTML ( articleId : string ) {
1222 const url = `https://avherald.com/h?article=${ articleId } ` ;
1323
@@ -50,20 +60,89 @@ export async function generateHTML(articleId: string) {
5060 . map ( ( node , i ) => {
5161 if ( node . nodeType === NodeType . TEXT_NODE ) {
5262 if ( i === 0 ) {
53- // extract the location, which means the stuff between "was" and "when", and what happened, which begins after "when".
63+ // extract the location, which means the stuff between a location keyword and "when" (or when sentence ends and a new one begins) , and what happened ("happenings") , which begins after "when" (or the next sentence) .
5464 const nodeTextContent = node . text ;
55- const wasIndex = nodeTextContent . indexOf ( ' was ' ) ;
56- const whenIndex = nodeTextContent . indexOf ( ' when ' ) ;
65+ const locationStartIndex = Math . min (
66+ ...locationKeywords . map ( ( keyword ) => {
67+ const index = nodeTextContent . indexOf ( keyword )
5768
58- // if things go unexpectedly, abort
59- if ( wasIndex >= whenIndex || wasIndex < 0 ) return node . text ;
69+ if ( index >= 0 ) {
70+ return index + keyword . length ;
71+ }
6072
61- let location = nodeTextContent . substring ( wasIndex + 5 , whenIndex ) ;
62- location = location [ 0 ] . toUpperCase ( ) + location . slice ( 1 ) ;
63- let happenings = nodeTextContent . substring ( whenIndex + 6 ) ;
64- happenings = happenings [ 0 ] . toUpperCase ( ) + happenings . slice ( 1 ) ;
73+ return index ;
74+ } ) . filter ( ( i ) => i >= 0 )
75+ ) ;
6576
66- return `📌 ${ location + date } \n\n${ happenings } ` ;
77+ const whenIndex = nodeTextContent . indexOf ( ' when ' ) ;
78+
79+ // index of the first occurence of a sentence ending in a full-stop and the next beginning with a capital letter
80+ const firstSentenceBoundaryIndex = Math . min (
81+ ...nodeTextContent . split ( '.' ) . map ( ( sentence , i , arr ) => {
82+ if ( sentence . length === 0 ) return Infinity ; // empty sentence ... wouldnt want to select one of those
83+
84+ // the index of the next sentence is the sum of the lengths of all previous sentences + fullstops
85+ const nextIndex = arr . slice ( 0 , i + 1 ) . map ( sentence => sentence . length + 1 ) . reduce ( ( partialSum , currentValue ) => partialSum + currentValue , 0 ) ;
86+
87+ const nextsentence = arr [ i + 1 ] ;
88+
89+ // if this is the last sentence, return index of this sentence's fullstop
90+ if ( i === arr . length - 1 ) {
91+ return nextIndex - 1 ;
92+ }
93+
94+ // if this isnt the last sentence and the next one begins with a space and capital letter, return index of this sentence's fullstop
95+ if ( i < arr . length - 1 && nextsentence [ 0 ] === ' ' && nextsentence [ 1 ] === nextsentence [ 1 ] . toUpperCase ( ) ) return nextIndex - 1 ;
96+
97+ // fallback (does this ever happen?)
98+ return Infinity ;
99+ } )
100+ ) ;
101+
102+ // based on where "when" and the first sentence boundary are, calculate where the location part ends and "happenings" starts
103+ let happeningsStartIndex : number ;
104+
105+ const locationEndIndex = ( ( ) => {
106+ if ( whenIndex === - 1 || firstSentenceBoundaryIndex < whenIndex ) {
107+ // we go with sentence
108+ happeningsStartIndex = firstSentenceBoundaryIndex + 2 ;
109+ return firstSentenceBoundaryIndex ;
110+ } else {
111+ // we go with "when"
112+ happeningsStartIndex = whenIndex + 6 ;
113+ return whenIndex ;
114+ }
115+ } ) ( ) ;
116+
117+ let locationExists : boolean ;
118+ let location : string ;
119+ let happenings : string ;
120+
121+ // if somehow the location is supposed to start before it ends, or if it has no start, then there is no location
122+ if ( locationStartIndex >= locationEndIndex || locationStartIndex <= 0 ) {
123+ // location doesnt exist
124+ locationExists = false ;
125+ // so "happenings" starts with keyword, not "when"
126+ happeningsStartIndex = Math . min ( // whichever is first in the text
127+ ...happeningsKeywords
128+ . map ( word => nodeTextContent . indexOf ( ' ' + word + ' ' ) ) // map by index of first occurence
129+ . filter ( i => i >= 0 ) // discard situationkeywords that dont exist in the text, because Math.min() prefers them (-1)
130+ ) + 1 ;
131+ } else {
132+ // location exists
133+ location = nodeTextContent . slice ( locationStartIndex , locationEndIndex ) ;
134+ location = location [ 0 ] . toUpperCase ( ) + location . slice ( 1 ) ;
135+ locationExists = true ;
136+ }
137+
138+ if ( happenings === '' ) {
139+ happenings = nodeTextContent ;
140+ } else {
141+ happenings = nodeTextContent . slice ( happeningsStartIndex ) ;
142+ happenings = happenings [ 0 ] . toUpperCase ( ) + happenings . slice ( 1 ) ;
143+ }
144+
145+ return `📌 ${ formatLocationAndDate ( locationExists , location , date ) } \n\n${ happenings } ` ;
67146 } else return node . text ;
68147 } else {
69148 if ( node . nodeType == NodeType . ELEMENT_NODE && ( node as unknown as Element ) . tagName === "BR" ) {
0 commit comments