@@ -26,8 +26,6 @@ type ConsoleLocation = ReturnType<ConsoleMessage['location']>;
2626
2727type ReadableArticle = { title ?: string ; content ?: string ; url : string } ;
2828
29- type SearchResult = { title : string ; link : string ; snippet : string ; content ?: string } ;
30-
3129type StartOptions = {
3230 port : number ;
3331 profile : boolean ;
@@ -52,13 +50,6 @@ type ConsoleOptions = {
5250 noSerialize ?: boolean ;
5351} ;
5452
55- type SearchOptions = {
56- port : number ;
57- count : number ;
58- content ?: boolean ;
59- timeout ?: number ;
60- } ;
61-
6253type ContentOptions = { port : number ; timeout ?: number } ;
6354
6455type CookiesOptions = { port : number } ;
@@ -776,115 +767,6 @@ program
776767 }
777768 } ) ;
778769
779- program
780- . command ( 'search <query...>' )
781- . description ( 'Google search with optional readable content extraction.' )
782- . option (
783- '--port <number>' ,
784- 'Debugger port (default: 9222)' ,
785- ( value ) => Number . parseInt ( value , 10 ) ,
786- DEFAULT_PORT ,
787- )
788- . option (
789- '-n, --count <number>' ,
790- 'Number of results to return (default: 5, max: 50)' ,
791- ( value ) => Number . parseInt ( value , 10 ) ,
792- 5 ,
793- )
794- . option ( '--content' , 'Fetch readable content for each result (slower).' , false )
795- . option (
796- '--timeout <seconds>' ,
797- 'Per-navigation timeout in seconds (default: 10).' ,
798- ( value ) => Number . parseInt ( value , 10 ) ,
799- 10 ,
800- )
801- . action ( async ( queryWords : string [ ] , options : SearchOptions ) => {
802- const port = options . port ;
803- const count = Math . max ( 1 , Math . min ( options . count , 50 ) ) ;
804- const fetchContent = Boolean ( options . content ) ;
805- const timeoutMs = Math . max ( 3 , options . timeout ?? 10 ) * 1000 ;
806- const query = queryWords . join ( ' ' ) ;
807-
808- const { browser, page } = await getActivePage ( port ) ;
809- try {
810- const results : SearchResult [ ] = [ ] ;
811- let start = 0 ;
812- while ( results . length < count ) {
813- const searchUrl = `https://www.google.com/search?q=${ encodeURIComponent ( query ) } &start=${ start } ` ;
814- await page
815- . goto ( searchUrl , { waitUntil : 'domcontentloaded' , timeout : timeoutMs } )
816- . catch ( ( ) => undefined ) ;
817- await page . waitForSelector ( 'div.MjjYud' , { timeout : 3000 } ) . catch ( ( ) => undefined ) ;
818-
819- const pageResults = await page . evaluate ( ( ) : SearchResult [ ] => {
820- const items : SearchResult [ ] = [ ] ;
821- for ( const result of document . querySelectorAll ( 'div.MjjYud' ) ) {
822- const titleEl = result . querySelector ( 'h3' ) ;
823- const linkEl = result . querySelector ( 'a' ) ;
824- const snippetEl = result . querySelector ( 'div.VwiC3b, div[data-sncf]' ) ;
825- const link = linkEl ?. getAttribute ( 'href' ) ?? '' ;
826- if ( titleEl && linkEl && link && ! link . startsWith ( 'https://www.google.com' ) ) {
827- items . push ( {
828- title : titleEl . textContent ?. trim ( ) ?? '' ,
829- link,
830- snippet : snippetEl ?. textContent ?. trim ( ) ?? '' ,
831- } ) ;
832- }
833- }
834- return items ;
835- } ) ;
836-
837- for ( const result of pageResults ) {
838- if ( results . length >= count ) {
839- break ;
840- }
841- if ( ! results . some ( ( existing ) => existing . link === result . link ) ) {
842- results . push ( result ) ;
843- }
844- }
845-
846- if ( pageResults . length === 0 || start >= 90 ) {
847- break ;
848- }
849- start += 10 ;
850- }
851-
852- if ( fetchContent ) {
853- for ( const result of results ) {
854- try {
855- await page
856- . goto ( result . link , { waitUntil : 'networkidle2' , timeout : timeoutMs } )
857- . catch ( ( ) => undefined ) ;
858- const article = await extractReadableContent ( page ) ;
859- result . content = article . content ?? '(No readable content)' ;
860- } catch ( error ) {
861- const message = error instanceof Error ? error . message : String ( error ) ;
862- result . content = `(Error fetching content: ${ message } )` ;
863- }
864- }
865- }
866-
867- for ( const [ index , r ] of results . entries ( ) ) {
868- console . log ( `--- Result ${ index + 1 } ---` ) ;
869- console . log ( `Title: ${ r . title } ` ) ;
870- console . log ( `Link: ${ r . link } ` ) ;
871- if ( r . snippet ) {
872- console . log ( `Snippet: ${ r . snippet } ` ) ;
873- }
874- if ( r . content ) {
875- console . log ( `Content:\n${ r . content } ` ) ;
876- }
877- console . log ( '' ) ;
878- }
879-
880- if ( results . length === 0 ) {
881- console . log ( 'No results found.' ) ;
882- }
883- } finally {
884- await browser . disconnect ( ) ;
885- }
886- } ) ;
887-
888770program
889771 . command ( 'content <url>' )
890772 . description ( 'Extract readable content from a URL as markdown-like text.' )
0 commit comments