@@ -35,11 +35,6 @@ pub mod session;
3535use std:: fs;
3636use std:: path:: Path ;
3737use std:: sync:: OnceLock ;
38- use std:: time:: Duration ;
39-
40- use chromiumoxide:: browser:: { Browser , BrowserConfig , HeadlessMode } ;
41- use chromiumoxide:: handler:: viewport:: Viewport ;
42- use futures:: StreamExt ;
4338
4439use crate :: config:: BrowserConfig as AppBrowserConfig ;
4540
@@ -163,11 +158,9 @@ fn which_browser(name: &str) -> Option<String> {
163158
164159/// Fetch HTML content using headless browser and convert to readable markdown
165160///
166- /// This function:
167- /// 1. Requires Chrome/Chromium to be installed
168- /// 2. Renders page with headless browser (handles SPAs/JS)
169- /// 3. Applies readability algorithm to extract main content
170- /// 4. Converts to markdown for token-efficient representation
161+ /// This is the one-shot convenience function. It launches a browser, navigates,
162+ /// waits for the page to settle, extracts readable content, and tears down the
163+ /// browser — all using the same code path as persistent browser sessions.
171164///
172165/// Browser settings (executable path, profile) are read from the global
173166/// BrowserContext initialized at app startup.
@@ -184,30 +177,21 @@ pub async fn fetch_html(url: &str, max_length: Option<usize>) -> Result<FetchHtm
184177 ) ) ;
185178 }
186179
187- // Get browser settings from global context
188- let ctx = browser_context ( ) ;
189-
190- // Resolve browser executable: context -> auto-detect
191- let browser_path = ctx
192- . and_then ( |c| c. chrome_executable . clone ( ) )
193- . or_else ( detect_browser)
194- . ok_or_else ( || {
195- "No Chrome/Chromium browser found. Install chromium or google-chrome to use this tool."
196- . to_string ( )
197- } ) ?;
198-
199- // Get settings from context
200- let ctx = ctx. cloned ( ) . unwrap_or_default ( ) ;
180+ // Launch browser, navigate, wait for settle — shared with session module
181+ let ( mut browser, page, handler_task, temp_dir) = session:: launch_browser ( url) . await ?;
201182
202- let html = fetch_with_browser ( url, & browser_path, & ctx) . await ?;
183+ // Extract content
184+ let content = session:: extract_page_content ( & page, url) . await ?;
203185
204- // Apply readability to extract main content
205- let readable = extract_readable_content ( & html, url) ?;
206-
207- // Convert to markdown
208- let mut markdown = html_to_markdown ( & readable. content ) ;
186+ // Tear down immediately (one-shot, no persistent session)
187+ let _ = browser. close ( ) . await ;
188+ handler_task. abort ( ) ;
189+ if let Some ( ref temp) = temp_dir {
190+ let _ = std:: fs:: remove_dir_all ( temp) ;
191+ }
209192
210193 // Truncate if needed
194+ let mut markdown = content. markdown ;
211195 if markdown. len ( ) > max_length {
212196 let mut end = max_length;
213197 while end > 0 && !markdown. is_char_boundary ( end) {
@@ -223,185 +207,11 @@ pub async fn fetch_html(url: &str, max_length: Option<usize>) -> Result<FetchHtm
223207
224208 Ok ( FetchHtmlResult {
225209 content : markdown,
226- title : readable . title ,
210+ title : content . title ,
227211 url : url. to_string ( ) ,
228212 } )
229213}
230214
231- /// Fetch page using headless browser (handles JavaScript rendering)
232- ///
233- /// # Arguments
234- /// * `url` - The URL to fetch
235- /// * `browser_path` - Path to Chrome/Chromium executable
236- /// * `ctx` - Browser context with profile, viewport, and timing settings
237- async fn fetch_with_browser (
238- url : & str ,
239- browser_path : & str ,
240- ctx : & BrowserContext ,
241- ) -> Result < String , String > {
242- // Copy profile to isolated temp directory to avoid SingletonLock conflicts.
243- // See module doc comment for full explanation of why this is necessary.
244- let temp_dir = if let ( Some ( data_dir) , Some ( prof) ) =
245- ( & ctx. chrome_user_data_dir , & ctx. chrome_profile )
246- {
247- let source_profile = std:: path:: Path :: new ( data_dir) . join ( prof) ;
248- if source_profile. exists ( ) {
249- // Use PID to isolate temp dirs between concurrent Codey instances
250- let temp_base =
251- std:: env:: temp_dir ( ) . join ( format ! ( "codey-browser-{}" , std:: process:: id( ) ) ) ;
252- // Clean up any previous temp dir to ensure fresh copy
253- if temp_base. exists ( ) {
254- let _ = std:: fs:: remove_dir_all ( & temp_base) ;
255- }
256- std:: fs:: create_dir_all ( & temp_base)
257- . map_err ( |e| format ! ( "Failed to create temp browser dir: {}" , e) ) ?;
258-
259- // Copy profile to temp_base/Default (becomes the default profile)
260- let dest_profile = temp_base. join ( "Default" ) ;
261- copy_dir_recursive ( & source_profile, & dest_profile)
262- . map_err ( |e| format ! ( "Failed to copy browser profile: {}" , e) ) ?;
263-
264- Some ( temp_base)
265- } else {
266- return Err ( format ! (
267- "Browser profile not found: {}" ,
268- source_profile. display( )
269- ) ) ;
270- }
271- } else {
272- None
273- } ;
274-
275- // Configure browser
276- let headless_mode = if ctx. headless {
277- HeadlessMode :: True
278- } else {
279- HeadlessMode :: False
280- } ;
281-
282- // When using a copied profile, we need real Keychain access to decrypt cookies.
283- //
284- // chromiumoxide's DEFAULT_ARGS (from Puppeteer) include:
285- // "--password-store=basic" - Uses basic password store instead of OS keychain
286- // "--use-mock-keychain" - Uses mock keychain for testing
287- //
288- // These flags are designed for automation/testing where you don't want system prompts,
289- // but they prevent Chrome from decrypting cookies that were encrypted with the real
290- // Keychain key. We must disable defaults and provide our own args list.
291- //
292- // Reference: chromiumoxide 0.7.0 browser.rs DEFAULT_ARGS
293- // Original source: https://github.com/nickelc/chromiumoxide/blob/v0.7.0/src/browser.rs
294- let using_profile = temp_dir. is_some ( ) ;
295-
296- let mut config = BrowserConfig :: builder ( )
297- . no_sandbox ( )
298- . headless_mode ( headless_mode)
299- . window_size ( ctx. viewport_width , ctx. viewport_height )
300- . viewport ( Viewport {
301- width : ctx. viewport_width ,
302- height : ctx. viewport_height ,
303- device_scale_factor : None ,
304- emulating_mobile : false ,
305- is_landscape : false ,
306- has_touch : false ,
307- } ) ;
308-
309- if using_profile {
310- // Disable chromiumoxide defaults and add our own (without mock keychain flags)
311- config = config. disable_default_args ( ) . args ( [
312- "--disable-background-networking" ,
313- "--enable-features=NetworkService,NetworkServiceInProcess" ,
314- "--disable-background-timer-throttling" ,
315- "--disable-backgrounding-occluded-windows" ,
316- "--disable-breakpad" ,
317- "--disable-client-side-phishing-detection" ,
318- "--disable-component-extensions-with-background-pages" ,
319- "--disable-default-apps" ,
320- "--disable-dev-shm-usage" ,
321- "--disable-extensions" ,
322- "--disable-features=TranslateUI" ,
323- "--disable-hang-monitor" ,
324- "--disable-ipc-flooding-protection" ,
325- "--disable-popup-blocking" ,
326- "--disable-prompt-on-repost" ,
327- "--disable-renderer-backgrounding" ,
328- "--disable-sync" ,
329- "--force-color-profile=srgb" ,
330- "--metrics-recording-only" ,
331- "--no-first-run" ,
332- "--enable-automation" ,
333- // OMITTED: "--password-store=basic" - need real password store for cookies
334- // OMITTED: "--use-mock-keychain" - need real Keychain access
335- "--enable-blink-features=IdleDetection" ,
336- "--lang=en_US" ,
337- "--disable-gpu" ,
338- ] ) ;
339- } else {
340- config = config. arg ( "--disable-gpu" ) ;
341- }
342-
343- config = config. chrome_executable ( browser_path) ;
344-
345- // Use temp dir if we copied a profile, otherwise use user_data_dir directly
346- if let Some ( ref temp) = temp_dir {
347- config = config. user_data_dir ( temp) ;
348- } else if let Some ( ref dir) = ctx. chrome_user_data_dir {
349- // Fallback: use user_data_dir directly (may conflict with running Chrome)
350- config = config. user_data_dir ( dir) ;
351- if let Some ( ref prof) = ctx. chrome_profile {
352- config = config. arg ( format ! ( "--profile-directory={}" , prof) ) ;
353- }
354- }
355-
356- let config = config
357- . build ( )
358- . map_err ( |e| format ! ( "Failed to configure browser: {:?}" , e) ) ?;
359-
360- // Launch browser with timeout
361- let launch_result =
362- tokio:: time:: timeout ( Duration :: from_secs ( 30 ) , Browser :: launch ( config) ) . await ;
363-
364- let ( mut browser, mut handler) = match launch_result {
365- Ok ( Ok ( ( browser, handler) ) ) => ( browser, handler) ,
366- Ok ( Err ( e) ) => return Err ( format ! ( "Failed to launch browser: {}" , e) ) ,
367- Err ( _) => return Err ( "Browser launch timed out after 30 seconds" . to_string ( ) ) ,
368- } ;
369-
370- // Spawn handler task (required by chromiumoxide)
371- let handle = tokio:: spawn ( async move {
372- while let Some ( _event) = handler. next ( ) . await {
373- // Process browser events
374- }
375- } ) ;
376-
377- // Navigate to page
378- let page_result = tokio:: time:: timeout ( Duration :: from_secs ( 60 ) , async {
379- let page = browser
380- . new_page ( url)
381- . await
382- . map_err ( |e| format ! ( "Failed to navigate: {}" , e) ) ?;
383-
384- // Wait for JavaScript to render (configurable for SPAs like Twitter)
385- tokio:: time:: sleep ( Duration :: from_millis ( ctx. page_load_wait_ms ) ) . await ;
386-
387- // Get rendered HTML
388- page. content ( )
389- . await
390- . map_err ( |e| format ! ( "Failed to get page content: {}" , e) )
391- } )
392- . await ;
393-
394- // Clean up
395- let _ = browser. close ( ) . await ;
396- handle. abort ( ) ;
397-
398- match page_result {
399- Ok ( Ok ( html) ) => Ok ( html) ,
400- Ok ( Err ( e) ) => Err ( e) ,
401- Err ( _) => Err ( "Page load timed out after 60 seconds" . to_string ( ) ) ,
402- }
403- }
404-
405215/// Recursively copy a directory and its contents
406216pub ( crate ) fn copy_dir_recursive ( src : & Path , dst : & Path ) -> std:: io:: Result < ( ) > {
407217 fs:: create_dir_all ( dst) ?;
0 commit comments