Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 8 additions & 23 deletions src/brightdata/api/async_unblocker.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ async def trigger(
zone: str,
url: str,
customer: Optional[str] = None,
**kwargs # Additional params like country, format, etc.
**kwargs, # Additional params like country, format, etc.
) -> Optional[str]:
"""
Trigger async unblocker request.
Expand Down Expand Up @@ -105,21 +105,14 @@ async def trigger(
payload.update(kwargs)

async with self.engine.post_to_url(
f"{self.engine.BASE_URL}{self.TRIGGER_ENDPOINT}",
params=params,
json_data=payload
f"{self.engine.BASE_URL}{self.TRIGGER_ENDPOINT}", params=params, json_data=payload
) as response:
# Extract response_id from x-response-id header
# Note: This is different from datasets API which returns snapshot_id in body
response_id = response.headers.get("x-response-id")
return response_id

async def get_status(
self,
zone: str,
response_id: str,
customer: Optional[str] = None
) -> str:
async def get_status(self, zone: str, response_id: str, customer: Optional[str] = None) -> str:
"""
Check if response is ready.

Expand All @@ -142,18 +135,14 @@ async def get_status(
>>> if status == "ready":
... # Fetch results
"""
params = {
"zone": zone,
"response_id": response_id
}
params = {"zone": zone, "response_id": response_id}

# Add customer to query params if provided
if customer:
params["customer"] = customer

async with self.engine.get_from_url(
f"{self.engine.BASE_URL}{self.FETCH_ENDPOINT}",
params=params
f"{self.engine.BASE_URL}{self.FETCH_ENDPOINT}", params=params
) as response:
if response.status == 200:
return "ready"
Expand All @@ -168,7 +157,7 @@ async def fetch_result(
zone: str,
response_id: str,
response_format: str = "json",
customer: Optional[str] = None
customer: Optional[str] = None,
) -> Any:
"""
Fetch results when ready.
Expand Down Expand Up @@ -203,18 +192,14 @@ async def fetch_result(
... customer="hl_67e5ed38"
... )
"""
params = {
"zone": zone,
"response_id": response_id
}
params = {"zone": zone, "response_id": response_id}

# Add customer to query params if provided
if customer:
params["customer"] = customer

async with self.engine.get_from_url(
f"{self.engine.BASE_URL}{self.FETCH_ENDPOINT}",
params=params
f"{self.engine.BASE_URL}{self.FETCH_ENDPOINT}", params=params
) as response:
if response.status == 200:
# Success - parse based on format
Expand Down
2 changes: 0 additions & 2 deletions src/brightdata/api/scrape_service.py
Original file line number Diff line number Diff line change
Expand Up @@ -182,5 +182,3 @@ def instagram(self):
bearer_token=self._client.token, engine=self._client.engine
)
return self._instagram


3 changes: 0 additions & 3 deletions src/brightdata/api/search_service.py
Original file line number Diff line number Diff line change
Expand Up @@ -103,7 +103,6 @@ async def google(
**kwargs,
)


async def bing(
self,
query: Union[str, List[str]],
Expand Down Expand Up @@ -132,7 +131,6 @@ async def bing(
**kwargs,
)


async def yandex(
self,
query: Union[str, List[str]],
Expand Down Expand Up @@ -161,7 +159,6 @@ async def yandex(
**kwargs,
)


@property
def amazon(self):
"""
Expand Down
1 change: 0 additions & 1 deletion src/brightdata/api/serp/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -153,7 +153,6 @@ async def search(
**kwargs,
)


async def _search_single_async(
self,
query: str,
Expand Down
6 changes: 2 additions & 4 deletions src/brightdata/api/web_unlocker.py
Original file line number Diff line number Diff line change
Expand Up @@ -308,7 +308,7 @@ async def _scrape_single_async_unblocker(
url=url,
format=response_format,
method=method,
country=country.upper() if country else None
country=country.upper() if country else None,
)
except Exception as e:
return ScrapeResult(
Expand Down Expand Up @@ -370,9 +370,7 @@ async def _scrape_single_async_unblocker(

try:
data = await self.async_unblocker.fetch_result(
zone,
response_id,
response_format=response_format
zone, response_id, response_format=response_format
)

root_domain = extract_root_domain(url)
Expand Down
4 changes: 1 addition & 3 deletions src/brightdata/cli/commands/scrape.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,9 +50,7 @@ def scrape_url(ctx: click.Context, url: str, country: str, response_format: str)
"""Scrape any URL using Web Unlocker."""
try:
client = create_client(ctx.obj["api_key"])
result = client.scrape_url(
url=url, country=country, response_format=response_format
)
result = client.scrape_url(url=url, country=country, response_format=response_format)
output_result(result, ctx.obj["output_format"], ctx.obj["output_file"])
except Exception as e:
handle_error(e)
Expand Down
7 changes: 1 addition & 6 deletions src/brightdata/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -507,7 +507,6 @@ async def scrape_url(
poll_timeout=poll_timeout,
)


async def __aenter__(self):
"""Async context manager entry."""
await self.engine.__aenter__()
Expand All @@ -517,9 +516,7 @@ async def __aenter__(self):
is_valid = await self.test_connection()
if not is_valid:
await self.engine.__aexit__(None, None, None)
raise AuthenticationError(
"Token validation failed. Please check your API token."
)
raise AuthenticationError("Token validation failed. Please check your API token.")

await self._ensure_zones()
return self
Expand All @@ -533,5 +530,3 @@ def __repr__(self) -> str:
token_preview = f"{self.token[:10]}...{self.token[-5:]}" if self.token else "None"
status = "Connected" if self._is_connected else "Not tested"
return f"<BrightDataClient token={token_preview} status='{status}'>"


4 changes: 3 additions & 1 deletion src/brightdata/core/engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,9 @@
warnings.filterwarnings("ignore", category=ResourceWarning, message="unclosed.*<ssl.SSLSocket")
# Suppress RuntimeWarning for coroutines not awaited in __del__ cleanup
# This happens because aiohttp's connector.close() is async in 3.x
warnings.filterwarnings("ignore", category=RuntimeWarning, message="coroutine.*TCPConnector.close.*never awaited")
warnings.filterwarnings(
"ignore", category=RuntimeWarning, message="coroutine.*TCPConnector.close.*never awaited"
)


class AsyncEngine:
Expand Down
18 changes: 6 additions & 12 deletions src/brightdata/scrapers/amazon/scraper.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,6 @@ async def products(

return await self._scrape_urls(url=url, dataset_id=self.DATASET_ID, timeout=timeout)


def products_sync(
self,
url: Union[str, List[str]],
Expand All @@ -101,9 +100,11 @@ def products_sync(

See products() for full documentation.
"""

async def _run():
async with self.engine:
return await self.products(url, timeout)

return asyncio.run(_run())

# ============================================================================
Expand Down Expand Up @@ -139,7 +140,6 @@ async def products_trigger(
urls=url, sdk_function=sdk_function or "products_trigger"
)


def products_trigger_sync(self, url: Union[str, List[str]]) -> ScrapeJob:
"""Trigger Amazon products scrape (sync version)."""
return asyncio.run(self.products_trigger(url))
Expand All @@ -159,7 +159,6 @@ async def products_status(self, snapshot_id: str) -> str:
"""
return await self._check_status_async(snapshot_id)


def products_status_sync(self, snapshot_id: str) -> str:
"""Check Amazon products scrape status (sync version)."""
return asyncio.run(self.products_status(snapshot_id))
Expand All @@ -179,7 +178,6 @@ async def products_fetch(self, snapshot_id: str) -> Any:
"""
return await self._fetch_results_async(snapshot_id)


def products_fetch_sync(self, snapshot_id: str) -> Any:
"""Fetch Amazon products scrape results (sync version)."""
return asyncio.run(self.products_fetch(snapshot_id))
Expand Down Expand Up @@ -276,7 +274,6 @@ async def reviews(
return results
return result


def reviews_sync(
self,
url: Union[str, List[str]],
Expand All @@ -290,9 +287,11 @@ def reviews_sync(

See reviews() for full documentation.
"""

async def _run():
async with self.engine:
return await self.reviews(url, pastDays, keyWord, numOfReviews, timeout)

return asyncio.run(_run())

# ============================================================================
Expand Down Expand Up @@ -332,7 +331,6 @@ async def reviews_trigger(
sdk_function=sdk_function or "reviews_trigger",
)


def reviews_trigger_sync(
self,
url: Union[str, List[str]],
Expand All @@ -347,7 +345,6 @@ async def reviews_status(self, snapshot_id: str) -> str:
"""Check Amazon reviews scrape status."""
return await self._check_status_async(snapshot_id)


def reviews_status_sync(self, snapshot_id: str) -> str:
"""Check Amazon reviews scrape status (sync version)."""
return asyncio.run(self.reviews_status(snapshot_id))
Expand All @@ -356,7 +353,6 @@ async def reviews_fetch(self, snapshot_id: str) -> Any:
"""Fetch Amazon reviews scrape results."""
return await self._fetch_results_async(snapshot_id)


def reviews_fetch_sync(self, snapshot_id: str) -> Any:
"""Fetch Amazon reviews scrape results (sync version)."""
return asyncio.run(self.reviews_fetch(snapshot_id))
Expand Down Expand Up @@ -397,7 +393,6 @@ async def sellers(

return await self._scrape_urls(url=url, dataset_id=self.DATASET_ID_SELLERS, timeout=timeout)


def sellers_sync(
self,
url: Union[str, List[str]],
Expand All @@ -408,9 +403,11 @@ def sellers_sync(

See sellers() for full documentation.
"""

async def _run():
async with self.engine:
return await self.sellers(url, timeout)

return asyncio.run(_run())

# ============================================================================
Expand Down Expand Up @@ -444,7 +441,6 @@ async def sellers_trigger(
sdk_function=sdk_function or "sellers_trigger",
)


def sellers_trigger_sync(self, url: Union[str, List[str]]) -> ScrapeJob:
"""Trigger Amazon sellers scrape (sync version)."""
return asyncio.run(self.sellers_trigger(url))
Expand All @@ -453,7 +449,6 @@ async def sellers_status(self, snapshot_id: str) -> str:
"""Check Amazon sellers scrape status."""
return await self._check_status_async(snapshot_id)


def sellers_status_sync(self, snapshot_id: str) -> str:
"""Check Amazon sellers scrape status (sync version)."""
return asyncio.run(self.sellers_status(snapshot_id))
Expand All @@ -462,7 +457,6 @@ async def sellers_fetch(self, snapshot_id: str) -> Any:
"""Fetch Amazon sellers scrape results."""
return await self._fetch_results_async(snapshot_id)


def sellers_fetch_sync(self, snapshot_id: str) -> Any:
"""Fetch Amazon sellers scrape results (sync version)."""
return asyncio.run(self.sellers_fetch(snapshot_id))
Expand Down
3 changes: 2 additions & 1 deletion src/brightdata/scrapers/amazon/search.py
Original file line number Diff line number Diff line change
Expand Up @@ -164,7 +164,6 @@ async def products(
timeout=timeout,
)


def products_sync(
self,
keyword: Optional[Union[str, List[str]]] = None,
Expand All @@ -182,6 +181,7 @@ def products_sync(

See products() for full documentation.
"""

async def _run():
async with self.engine:
return await self.products(
Expand All @@ -195,6 +195,7 @@ async def _run():
country=country,
timeout=timeout,
)

return asyncio.run(_run())

# ============================================================================
Expand Down
Loading
Loading