|
6 | 6 |
|
7 | 7 | from __future__ import annotations |
8 | 8 |
|
9 | | -from typing import TYPE_CHECKING |
| 9 | +import logging |
| 10 | +import subprocess |
| 11 | +from pathlib import Path |
| 12 | +from typing import TYPE_CHECKING, Literal |
10 | 13 |
|
11 | 14 | from bs4 import BeautifulSoup |
12 | 15 | from playwright.sync_api import sync_playwright |
13 | 16 |
|
14 | 17 | from .. import parser, types |
15 | 18 |
|
16 | 19 | if TYPE_CHECKING: |
17 | | - from playwright.sync_api import Browser, Playwright |
| 20 | + from playwright.sync_api import Browser, BrowserType, Playwright |
18 | 21 |
|
| 22 | +logger = logging.getLogger(__name__) |
19 | 23 |
|
20 | | -def get_browser(playwright: Playwright, name: str) -> Browser: |
21 | | - if not hasattr(playwright, name): |
| 24 | +BrowserName = Literal["chromium", "firefox", "webkit"] |
| 25 | +BrowserChannel = Literal[ |
| 26 | + "chrome", |
| 27 | + "msedge", |
| 28 | + "chrome-beta", |
| 29 | + "msedge-beta", |
| 30 | + "chrome-dev", |
| 31 | + "msedge-dev", |
| 32 | + "chrome-canary", |
| 33 | + "msedge-canary", |
| 34 | +] |
| 35 | +BrowserLabel = BrowserName | BrowserChannel |
| 36 | + |
| 37 | + |
| 38 | +def get_browser(playwright: Playwright, name: BrowserLabel) -> Browser: |
| 39 | + browser_name = "chromium" |
| 40 | + browser_channel = None |
| 41 | + if name in BrowserName.__args__: # type: ignore[attr-defined] |
| 42 | + browser_name = name |
| 43 | + elif name in BrowserChannel.__args__: # type: ignore[attr-defined] |
| 44 | + browser_channel = name |
| 45 | + else: |
22 | 46 | raise ValueError(f"Browser type '{name}' is not supported.") |
23 | | - return playwright.chromium.launch(channel=name) |
| 47 | + |
| 48 | + # Install browser automatically. |
| 49 | + logger.info(f"Now installing browser '{name}' automatically.") |
| 50 | + subprocess.run(f"playwright install {name}".split()) |
| 51 | + |
| 52 | + return getattr(playwright, browser_name).launch(channel=browser_channel) |
24 | 53 |
|
25 | 54 |
|
26 | 55 | def fetch( |
27 | 56 | url: str, |
28 | 57 | fuzzy_mode: bool = False, |
29 | | - browser_name: str = "chromium", |
| 58 | + browser_name: BrowserLabel = "chromium", |
30 | 59 | ) -> types.Metadata | types.MetadataFuzzy: |
31 | 60 | """Fetch and parse HTTP content.""" |
32 | 61 | with sync_playwright() as p: |
|
0 commit comments