From 3bfba4032aa2a14aa6a324bbb4850f1322dddf23 Mon Sep 17 00:00:00 2001
From: Vlada Dusek <v.dusek96@gmail.com>
Date: Tue, 30 Jun 2026 13:05:01 +0200
Subject: [PATCH 1/4] feat: provide Request instances in skipped request
 callbacks

---
 .../respect_robots_on_skipped_request.py      |  6 +-
 src/crawlee/_utils/requests.py                | 32 +++++++++
 .../_abstract_http/_abstract_http_crawler.py  | 53 ++++++++-------
 src/crawlee/crawlers/_basic/_basic_crawler.py | 67 +++++++++++++++----
 .../_playwright/_playwright_crawler.py        | 53 ++++++++-------
 tests/unit/_utils/test_requests.py            | 26 ++++++-
 .../crawlers/_basic/test_basic_crawler.py     | 48 ++++++++++++-
 .../test_beautifulsoup_crawler.py             | 22 +++---
 .../crawlers/_parsel/test_parsel_crawler.py   | 22 +++---
 .../_playwright/test_playwright_crawler.py    | 22 +++---
 10 files changed, 254 insertions(+), 97 deletions(-)

diff --git a/docs/examples/code_examples/respect_robots_on_skipped_request.py b/docs/examples/code_examples/respect_robots_on_skipped_request.py
index 5c7eca173f..8b9cf7b18e 100644
--- a/docs/examples/code_examples/respect_robots_on_skipped_request.py
+++ b/docs/examples/code_examples/respect_robots_on_skipped_request.py
@@ -1,6 +1,6 @@
 import asyncio
 
-from crawlee import SkippedReason
+from crawlee import Request, SkippedReason
 from crawlee.crawlers import (
     BeautifulSoupCrawler,
     BeautifulSoupCrawlingContext,
@@ -18,7 +18,9 @@ async def request_handler(context: BeautifulSoupCrawlingContext) -> None:
     # highlight-start
     # This handler is called when a request is skipped
     @crawler.on_skipped_request
-    async def skipped_request_handler(url: str, reason: SkippedReason) -> None:
+    async def skipped_request_handler(request: Request, reason: SkippedReason) -> None:
+        url = request.url
+
         # Check if the request was skipped due to robots.txt rules
         if reason == 'robots_txt':
             crawler.log.info(f'Skipped {url} due to robots.txt rules.')
diff --git a/src/crawlee/_utils/requests.py b/src/crawlee/_utils/requests.py
index fa31d4621d..cb9a41b490 100644
--- a/src/crawlee/_utils/requests.py
+++ b/src/crawlee/_utils/requests.py
@@ -3,11 +3,15 @@
 from logging import getLogger
 from typing import TYPE_CHECKING
 
+from pydantic import ValidationError
 from yarl import URL
 
 from crawlee._utils.crypto import compute_short_hash
 
 if TYPE_CHECKING:
+    from logging import Logger
+
+    from crawlee._request import Request, RequestOptions
     from crawlee._types import HttpHeaders, HttpMethod, HttpPayload
 
 logger = getLogger(__name__)
@@ -110,6 +114,34 @@ def compute_unique_key(
     return normalized_url
 
 
+def create_request_from_options(request_options: RequestOptions, logger: Logger | None = None) -> Request | None:
+    """Build a `Request` from `RequestOptions`, returning `None` if the URL is invalid.
+
+    Shared by the crawlers' `extract_links` implementations to turn extracted URLs into `Request`
+    objects. A URL that fails validation (a malformed URL or an unsupported, non-`http(s)` scheme) is
+    logged at the debug level and skipped by returning `None`, rather than raising.
+
+    Args:
+        request_options: The options passed to `Request.from_url`.
+        logger: An optional logger used to report a skipped, invalid URL.
+
+    Returns:
+        The created `Request`, or `None` if the URL was invalid.
+    """
+    # Imported lazily to avoid a circular import (`crawlee._request` imports from this module).
+    from crawlee._request import Request  # noqa: PLC0415
+
+    try:
+        return Request.from_url(**request_options)
+    except ValidationError as exc:
+        if logger is not None:
+            logger.debug(
+                f'Skipping URL "{request_options["url"]}" due to invalid format: {exc}. '
+                'This may be caused by a malformed URL or an unsupported URL scheme.'
+            )
+        return None
+
+
 def _get_payload_hash(payload: HttpPayload | None) -> str:
     payload_in_bytes = b'' if payload is None else payload
     return compute_short_hash(payload_in_bytes)
diff --git a/src/crawlee/crawlers/_abstract_http/_abstract_http_crawler.py b/src/crawlee/crawlers/_abstract_http/_abstract_http_crawler.py
index 8d15a1d801..6a67b851ef 100644
--- a/src/crawlee/crawlers/_abstract_http/_abstract_http_crawler.py
+++ b/src/crawlee/crawlers/_abstract_http/_abstract_http_crawler.py
@@ -7,11 +7,11 @@
 from typing import TYPE_CHECKING, Generic
 
 from more_itertools import partition
-from pydantic import ValidationError
 from typing_extensions import NotRequired, TypeVar
 
 from crawlee._request import Request, RequestOptions, RequestState
 from crawlee._utils.docs import docs_group
+from crawlee._utils.requests import create_request_from_options
 from crawlee._utils.time import SharedTimeout
 from crawlee._utils.urls import to_absolute_url_iterator
 from crawlee.crawlers._basic import BasicCrawler, BasicCrawlerOptions, ContextPipeline
@@ -206,6 +206,7 @@ async def extract_links(
             **kwargs: Unpack[EnqueueLinksKwargs],
         ) -> list[Request]:
             requests = list[Request]()
+            skipped = list[Request]()
 
             base_user_data = user_data or {}
 
@@ -214,6 +215,21 @@ async def extract_links(
             kwargs.setdefault('strategy', 'same-hostname')
             strategy = kwargs.get('strategy', 'same-hostname')
 
+            def to_request(url: str) -> Request | None:
+                """Build a `Request` from a single extracted URL, applying the user-provided transform."""
+                request_options = RequestOptions(
+                    url=url, user_data={**base_user_data}, label=label, enqueue_strategy=strategy
+                )
+
+                if transform_request_function:
+                    transform_request_options = transform_request_function(request_options)
+                    if transform_request_options == 'skip':
+                        return None
+                    if transform_request_options != 'unchanged':
+                        request_options = transform_request_options
+
+                return create_request_from_options(request_options, context.log)
+
             links_iterator: Iterator[str] = iter(
                 self._parser.find_links(parsed_content, selector=selector, attribute=attribute)
             )
@@ -227,34 +243,19 @@ async def extract_links(
             )
             links_iterator = to_absolute_url_iterator(base_url, links_iterator, logger=context.log)
 
+            # Requests disallowed by robots.txt are reported to the skipped-request callback; the rest
+            # continue to the enqueue filter. Both paths go through `to_request` for consistent building.
             if robots_txt_file:
-                skipped, links_iterator = partition(robots_txt_file.is_allowed, links_iterator)
-            else:
-                skipped = iter([])
+                skipped_iterator, links_iterator = partition(robots_txt_file.is_allowed, links_iterator)
+                for url in skipped_iterator:
+                    request = to_request(url)
+                    if request is not None:
+                        skipped.append(request)
 
             for url in self._enqueue_links_filter_iterator(links_iterator, context.request.url, **kwargs):
-                request_options = RequestOptions(
-                    url=url, user_data={**base_user_data}, label=label, enqueue_strategy=strategy
-                )
-
-                if transform_request_function:
-                    transform_request_options = transform_request_function(request_options)
-                    if transform_request_options == 'skip':
-                        continue
-                    if transform_request_options != 'unchanged':
-                        request_options = transform_request_options
-
-                try:
-                    request = Request.from_url(**request_options)
-                except ValidationError as exc:
-                    context.log.debug(
-                        f'Skipping URL "{url}" due to invalid format: {exc}. '
-                        'This may be caused by a malformed URL or unsupported URL scheme. '
-                        'Please ensure the URL is correct and retry.'
-                    )
-                    continue
-
-                requests.append(request)
+                request = to_request(url)
+                if request is not None:
+                    requests.append(request)
 
             skipped_tasks = [
                 asyncio.create_task(self._handle_skipped_request(request, 'robots_txt')) for request in skipped
diff --git a/src/crawlee/crawlers/_basic/_basic_crawler.py b/src/crawlee/crawlers/_basic/_basic_crawler.py
index be3da6dd27..78e02ad82d 100644
--- a/src/crawlee/crawlers/_basic/_basic_crawler.py
+++ b/src/crawlee/crawlers/_basic/_basic_crawler.py
@@ -4,6 +4,7 @@
 
 import asyncio
 import functools
+import inspect
 import logging
 import signal
 import sys
@@ -17,7 +18,7 @@
 from http import HTTPStatus
 from io import StringIO
 from pathlib import Path
-from typing import TYPE_CHECKING, Any, Generic, Literal, ParamSpec, cast
+from typing import TYPE_CHECKING, Any, Generic, Literal, ParamSpec, cast, get_type_hints
 from weakref import WeakKeyDictionary
 
 from cachetools import LRUCache
@@ -110,7 +111,35 @@
 
 ErrorHandler = Callable[[TCrawlingContext, Exception], Awaitable[Request | None]]
 FailedRequestHandler = Callable[[TCrawlingContext, Exception], Awaitable[None]]
-SkippedRequestCallback = Callable[[str, SkippedReason], Awaitable[None]]
+SkippedRequestCallback = (
+    Callable[[str, SkippedReason], Awaitable[None]] | Callable[[Request, SkippedReason], Awaitable[None]]
+)
+"""A skipped-request callback receives either the URL `str` or the full `Request`.
+
+For backward compatibility, callbacks whose first parameter is annotated as `str` (or is unannotated)
+receive `request.url`; callbacks that annotate it as `Request` receive the `Request` object. See
+`_skipped_request_callback_expects_request`.
+"""
+
+
+def _skipped_request_callback_expects_request(callback: Callable[..., Awaitable[None]]) -> bool:
+    """Whether a skipped-request callback wants the full `Request` rather than the URL string.
+
+    The first parameter's resolved type annotation decides: a callback annotating it as `Request`
+    receives the `Request` object, while a callback annotating it as `str` (or leaving it unannotated)
+    receives `request.url`, preserving the original `(url: str, reason)` signature. Anything that
+    cannot be introspected falls back to the backward-compatible `str` form.
+    """
+    try:
+        parameters = list(inspect.signature(callback).parameters.values())
+        type_hints = get_type_hints(callback)
+    except Exception:  # Any introspection failure falls back to the backward-compatible `str` form.
+        return False
+
+    if not parameters:
+        return False
+
+    return type_hints.get(parameters[0].name) is Request
 
 
 class _BasicCrawlerOptions(TypedDict):
@@ -417,6 +446,7 @@ def __init__(
         self._error_handler: ErrorHandler[TCrawlingContext | BasicCrawlingContext] | None = None
         self._failed_request_handler: FailedRequestHandler[TCrawlingContext | BasicCrawlingContext] | None = None
         self._on_skipped_request: SkippedRequestCallback | None = None
+        self._on_skipped_request_expects_request = False
         self._abort_on_error = abort_on_error
 
         # Crawler callbacks
@@ -678,8 +708,13 @@ def on_skipped_request(self, callback: SkippedRequestCallback) -> SkippedRequest
         """Register a function to handle skipped requests.
 
         The skipped request handler is invoked when a request is skipped due to a collision or other reasons.
+
+        The callback receives either the request URL as a `str` or the full `Request` object, depending on
+        how its first parameter is annotated. Annotate it as `Request` to access request metadata such as
+        `user_data`; a `str` annotation (or no annotation) keeps the original URL-only behavior.
         """
         self._on_skipped_request = callback
+        self._on_skipped_request_expects_request = _skipped_request_callback_expects_request(callback)
         return callback
 
     async def run(
@@ -826,12 +861,14 @@ async def add_requests(
             wait_for_all_requests_to_be_added: If True, wait for all requests to be added before returning.
             wait_for_all_requests_to_be_added_timeout: Timeout for waiting for all requests to be added.
         """
-        allowed_requests = []
-        skipped = []
-
-        for request in requests:
-            check_url = request.url if isinstance(request, Request) else request
-            if await self._is_allowed_based_on_robots_txt_file(check_url):
+        allowed_requests: list[Request] = []
+        skipped: list[Request] = []
+
+        for original in requests:
+            # Normalize `str` URLs to `Request` once, so robots-skipped items always reach the
+            # skipped-request callback as a `Request` (see `_handle_skipped_request`).
+            request = original if isinstance(original, Request) else Request.from_url(original)
+            if await self._is_allowed_based_on_robots_txt_file(request.url):
                 allowed_requests.append(request)
             else:
                 skipped.append(request)
@@ -1210,17 +1247,19 @@ async def _handle_failed_request(self, context: TCrawlingContext | BasicCrawling
                 raise UserDefinedErrorHandlerError('Exception thrown in user-defined failed request handler') from e
 
     async def _handle_skipped_request(
-        self, request: Request | str, reason: SkippedReason, *, need_mark: bool = False
+        self, request: Request, reason: SkippedReason, *, need_mark: bool = False
     ) -> None:
-        if need_mark and isinstance(request, Request):
+        if need_mark:
             request.state = RequestState.SKIPPED
             await self._mark_request_as_handled(request)
 
-        url = request.url if isinstance(request, Request) else request
-
-        if self._on_skipped_request:
+        if self._on_skipped_request is not None:
+            # Pass the full `Request` or just its URL, depending on how the callback annotated its first
+            # parameter (see `on_skipped_request`). The cast reflects that dual-dispatch contract.
+            callback = cast('Callable[[str | Request, SkippedReason], Awaitable[None]]', self._on_skipped_request)
+            argument: str | Request = request if self._on_skipped_request_expects_request else request.url
             try:
-                await self._on_skipped_request(url, reason)
+                await callback(argument, reason)
             except Exception as e:
                 raise UserDefinedErrorHandlerError('Exception thrown in user-defined skipped request callback') from e
 
diff --git a/src/crawlee/crawlers/_playwright/_playwright_crawler.py b/src/crawlee/crawlers/_playwright/_playwright_crawler.py
index bbbc03b1fa..5eee40c25d 100644
--- a/src/crawlee/crawlers/_playwright/_playwright_crawler.py
+++ b/src/crawlee/crawlers/_playwright/_playwright_crawler.py
@@ -9,13 +9,13 @@
 
 import playwright.async_api
 from more_itertools import partition
-from pydantic import ValidationError
 from typing_extensions import NotRequired, TypedDict, TypeVar
 
 from crawlee._request import Request, RequestOptions, RequestState
 from crawlee._types import BasicCrawlingContext, ConcurrencySettings
 from crawlee._utils.blocked import RETRY_CSS_SELECTORS
 from crawlee._utils.docs import docs_group
+from crawlee._utils.requests import create_request_from_options
 from crawlee._utils.robots import RobotsTxtFile
 from crawlee._utils.time import SharedTimeout
 from crawlee._utils.urls import to_absolute_url_iterator
@@ -462,6 +462,7 @@ async def extract_links(
             The `PlaywrightCrawler` implementation of the `ExtractLinksFunction` function.
             """
             requests = list[Request]()
+            skipped = list[Request]()
 
             base_user_data = user_data or {}
 
@@ -470,6 +471,21 @@ async def extract_links(
             kwargs.setdefault('strategy', 'same-hostname')
             strategy = kwargs.get('strategy', 'same-hostname')
 
+            def to_request(url: str) -> Request | None:
+                """Build a `Request` from a single extracted URL, applying the user-provided transform."""
+                request_options = RequestOptions(
+                    url=url, user_data={**base_user_data}, label=label, enqueue_strategy=strategy
+                )
+
+                if transform_request_function:
+                    transform_request_options = transform_request_function(request_options)
+                    if transform_request_options == 'skip':
+                        return None
+                    if transform_request_options != 'unchanged':
+                        request_options = transform_request_options
+
+                return create_request_from_options(request_options, context.log)
+
             elements = await context.page.query_selector_all(selector)
             links_iterator: Iterator[str] = iter(
                 [url for element in elements if (url := await element.get_attribute(attribute)) is not None]
@@ -481,34 +497,19 @@ async def extract_links(
 
             links_iterator = to_absolute_url_iterator(base_url, links_iterator, logger=context.log)
 
+            # Requests disallowed by robots.txt are reported to the skipped-request callback; the rest
+            # continue to the enqueue filter. Both paths go through `to_request` for consistent building.
             if robots_txt_file:
-                skipped, links_iterator = partition(robots_txt_file.is_allowed, links_iterator)
-            else:
-                skipped = iter([])
+                skipped_iterator, links_iterator = partition(robots_txt_file.is_allowed, links_iterator)
+                for url in skipped_iterator:
+                    request = to_request(url)
+                    if request is not None:
+                        skipped.append(request)
 
             for url in self._enqueue_links_filter_iterator(links_iterator, context.request.url, **kwargs):
-                request_options = RequestOptions(
-                    url=url, user_data={**base_user_data}, label=label, enqueue_strategy=strategy
-                )
-
-                if transform_request_function:
-                    transform_request_options = transform_request_function(request_options)
-                    if transform_request_options == 'skip':
-                        continue
-                    if transform_request_options != 'unchanged':
-                        request_options = transform_request_options
-
-                try:
-                    request = Request.from_url(**request_options)
-                except ValidationError as exc:
-                    context.log.debug(
-                        f'Skipping URL "{url}" due to invalid format: {exc}. '
-                        'This may be caused by a malformed URL or unsupported URL scheme. '
-                        'Please ensure the URL is correct and retry.'
-                    )
-                    continue
-
-                requests.append(request)
+                request = to_request(url)
+                if request is not None:
+                    requests.append(request)
 
             skipped_tasks = [
                 asyncio.create_task(self._handle_skipped_request(request, 'robots_txt')) for request in skipped
diff --git a/tests/unit/_utils/test_requests.py b/tests/unit/_utils/test_requests.py
index 8198909592..537fccd361 100644
--- a/tests/unit/_utils/test_requests.py
+++ b/tests/unit/_utils/test_requests.py
@@ -1,9 +1,12 @@
 from __future__ import annotations
 
+import logging
+
 import pytest
 
+from crawlee._request import RequestOptions
 from crawlee._types import HttpHeaders
-from crawlee._utils.requests import compute_unique_key, normalize_url
+from crawlee._utils.requests import compute_unique_key, create_request_from_options, normalize_url
 
 
 @pytest.mark.parametrize(
@@ -132,3 +135,24 @@ def test_compute_unique_key_with_whitespace_in_headers() -> None:
 
     uk_2 = compute_unique_key(url, headers=headers_with_whitespaces, use_extended_unique_key=True)
     assert uk_2 == expected_output
+
+
+def test_create_request_from_options_valid_url() -> None:
+    """A valid `http(s)` URL yields a `Request` carrying that URL."""
+    request = create_request_from_options(RequestOptions(url='https://crawlee.dev'))
+    assert request is not None
+    assert request.url == 'https://crawlee.dev'
+
+
+def test_create_request_from_options_invalid_url_returns_none() -> None:
+    """An unsupported, non-`http(s)` URL is dropped (returns `None`) instead of raising."""
+    assert create_request_from_options(RequestOptions(url='ftp://crawlee.dev')) is None
+
+
+def test_create_request_from_options_logs_invalid_url(caplog: pytest.LogCaptureFixture) -> None:
+    """When a logger is provided, an invalid URL is reported at the debug level."""
+    logger = logging.getLogger('test_create_request_from_options')
+    with caplog.at_level(logging.DEBUG, logger=logger.name):
+        assert create_request_from_options(RequestOptions(url='ftp://crawlee.dev'), logger) is None
+
+    assert any('ftp://crawlee.dev' in record.message for record in caplog.records)
diff --git a/tests/unit/crawlers/_basic/test_basic_crawler.py b/tests/unit/crawlers/_basic/test_basic_crawler.py
index 0391d65843..bacc8863a5 100644
--- a/tests/unit/crawlers/_basic/test_basic_crawler.py
+++ b/tests/unit/crawlers/_basic/test_basic_crawler.py
@@ -19,12 +19,13 @@
 
 import pytest
 
-from crawlee import ConcurrencySettings, Glob, service_locator
+from crawlee import ConcurrencySettings, Glob, SkippedReason, service_locator
 from crawlee._request import Request, RequestState
 from crawlee._types import BasicCrawlingContext, EnqueueLinksKwargs, HttpMethod
 from crawlee._utils.robots import RobotsTxtFile
 from crawlee.configuration import Configuration
 from crawlee.crawlers import BasicCrawler
+from crawlee.crawlers._basic._basic_crawler import _skipped_request_callback_expects_request
 from crawlee.errors import RequestCollisionError, SessionError, UserDefinedErrorHandlerError
 from crawlee.events import Event, EventCrawlerStatusData, LocalEventManager
 from crawlee.request_loaders import RequestList, RequestManagerTandem, ThrottlingRequestManager
@@ -1647,6 +1648,51 @@ async def test_lock_with_get_robots_txt_file_for_url(server_url: URL) -> None:
         assert spy.call_count == 1
 
 
+def test_skipped_request_callback_dispatch_by_annotation() -> None:
+    """The skipped-request callback receives the full `Request` only when its first parameter annotates it."""
+
+    async def expects_request(_request: Request, _reason: SkippedReason) -> None: ...
+    async def expects_url(_url: str, _reason: SkippedReason) -> None: ...
+    async def expects_url_unannotated(_url, _reason) -> None: ...  # noqa: ANN001
+
+    assert _skipped_request_callback_expects_request(expects_request) is True
+    assert _skipped_request_callback_expects_request(expects_url) is False
+    assert _skipped_request_callback_expects_request(expects_url_unannotated) is False
+
+
+async def test_add_requests_reports_disallowed_url_to_skipped_callback(server_url: URL) -> None:
+    """A bare `str` URL disallowed by robots.txt reaches the `add_requests` skipped callback as a `Request`."""
+    crawler = BasicCrawler(respect_robots_txt_file=True)
+    skip = Mock()
+
+    @crawler.on_skipped_request
+    async def skipped_hook(request: Request, _reason: SkippedReason) -> None:
+        skip(request)
+
+    # `page_1` is disallowed by the test server's robots.txt; pass it as a plain string.
+    await crawler.add_requests([str(server_url / 'page_1')])
+
+    skipped_requests = [call.args[0] for call in skip.call_args_list]
+    assert all(isinstance(request, Request) for request in skipped_requests)
+    assert {request.url for request in skipped_requests} == {str(server_url / 'page_1')}
+
+
+async def test_skipped_request_callback_receives_url_for_str_signature(server_url: URL) -> None:
+    """A callback whose first parameter is annotated as `str` keeps receiving the URL string (backward compatible)."""
+    crawler = BasicCrawler(respect_robots_txt_file=True)
+    skip = Mock()
+
+    @crawler.on_skipped_request
+    async def skipped_hook(url: str, _reason: SkippedReason) -> None:
+        skip(url)
+
+    await crawler.add_requests([str(server_url / 'page_1')])
+
+    skipped_urls = [call.args[0] for call in skip.call_args_list]
+    assert all(isinstance(url, str) for url in skipped_urls)
+    assert set(skipped_urls) == {str(server_url / 'page_1')}
+
+
 async def test_reduced_logs_from_timed_out_request_handler(caplog: pytest.LogCaptureFixture) -> None:
     caplog.set_level(logging.INFO)
     crawler = BasicCrawler(
diff --git a/tests/unit/crawlers/_beautifulsoup/test_beautifulsoup_crawler.py b/tests/unit/crawlers/_beautifulsoup/test_beautifulsoup_crawler.py
index 9a828b0078..4d8f603910 100644
--- a/tests/unit/crawlers/_beautifulsoup/test_beautifulsoup_crawler.py
+++ b/tests/unit/crawlers/_beautifulsoup/test_beautifulsoup_crawler.py
@@ -246,18 +246,22 @@ async def request_handler(context: BeautifulSoupCrawlingContext) -> None:
         await context.enqueue_links()
 
     @crawler.on_skipped_request
-    async def skipped_hook(url: str, _reason: SkippedReason) -> None:
-        skip(url)
+    async def skipped_hook(request: Request, _reason: SkippedReason) -> None:
+        skip(request)
 
     await crawler.run([str(server_url / 'start_enqueue')])
 
-    expected_skip_calls = [
-        mock.call(str(server_url / 'page_1')),
-        mock.call(str(server_url / 'page_2')),
-        mock.call(str(server_url / 'page_3')),
-        mock.call(str(server_url / 'page_4')),
-    ]
-    skip.assert_has_calls(expected_skip_calls, any_order=True)
+    expected_skip_urls = {
+        str(server_url / 'page_1'),
+        str(server_url / 'page_2'),
+        str(server_url / 'page_3'),
+        str(server_url / 'page_4'),
+    }
+
+    requests = [call.args[0] for call in skip.call_args_list]
+
+    assert all(isinstance(request, Request) for request in requests)
+    assert {request.url for request in requests} == expected_skip_urls
 
 
 async def test_extract_links(server_url: URL, http_client: HttpClient) -> None:
diff --git a/tests/unit/crawlers/_parsel/test_parsel_crawler.py b/tests/unit/crawlers/_parsel/test_parsel_crawler.py
index 02f5b61a86..56e70acdc6 100644
--- a/tests/unit/crawlers/_parsel/test_parsel_crawler.py
+++ b/tests/unit/crawlers/_parsel/test_parsel_crawler.py
@@ -330,18 +330,22 @@ async def request_handler(context: ParselCrawlingContext) -> None:
         await context.enqueue_links()
 
     @crawler.on_skipped_request
-    async def skipped_hook(url: str, _reason: SkippedReason) -> None:
-        skip(url)
+    async def skipped_hook(request: Request, _reason: SkippedReason) -> None:
+        skip(request)
 
     await crawler.run([str(server_url / 'start_enqueue')])
 
-    expected_skip_calls = [
-        mock.call(str(server_url / 'page_1')),
-        mock.call(str(server_url / 'page_2')),
-        mock.call(str(server_url / 'page_3')),
-        mock.call(str(server_url / 'page_4')),
-    ]
-    skip.assert_has_calls(expected_skip_calls, any_order=True)
+    expected_skip_urls = {
+        str(server_url / 'page_1'),
+        str(server_url / 'page_2'),
+        str(server_url / 'page_3'),
+        str(server_url / 'page_4'),
+    }
+
+    requests = [call.args[0] for call in skip.call_args_list]
+
+    assert all(isinstance(request, Request) for request in requests)
+    assert {request.url for request in requests} == expected_skip_urls
 
 
 async def test_extract_links(server_url: URL, http_client: HttpClient) -> None:
diff --git a/tests/unit/crawlers/_playwright/test_playwright_crawler.py b/tests/unit/crawlers/_playwright/test_playwright_crawler.py
index 78d1789f99..f4208aa339 100644
--- a/tests/unit/crawlers/_playwright/test_playwright_crawler.py
+++ b/tests/unit/crawlers/_playwright/test_playwright_crawler.py
@@ -765,18 +765,22 @@ async def request_handler(context: PlaywrightCrawlingContext) -> None:
         await context.enqueue_links()
 
     @crawler.on_skipped_request
-    async def skipped_hook(url: str, _reason: SkippedReason) -> None:
-        skip(url)
+    async def skipped_hook(request: Request, _reason: SkippedReason) -> None:
+        skip(request)
 
     await crawler.run([str(server_url / 'start_enqueue')])
 
-    expected_skip_calls = [
-        mock.call(str(server_url / 'page_1')),
-        mock.call(str(server_url / 'page_2')),
-        mock.call(str(server_url / 'page_3')),
-        mock.call(str(server_url / 'page_4')),
-    ]
-    skip.assert_has_calls(expected_skip_calls, any_order=True)
+    expected_skip_urls = {
+        str(server_url / 'page_1'),
+        str(server_url / 'page_2'),
+        str(server_url / 'page_3'),
+        str(server_url / 'page_4'),
+    }
+
+    requests = [call.args[0] for call in skip.call_args_list]
+
+    assert all(isinstance(request, Request) for request in requests)
+    assert {request.url for request in requests} == expected_skip_urls
 
 
 async def test_send_request(server_url: URL) -> None:

From af58657b180f8da9dfba524b5e423ed1792bad73 Mon Sep 17 00:00:00 2001
From: Vlada Dusek <v.dusek96@gmail.com>
Date: Tue, 30 Jun 2026 14:00:38 +0200
Subject: [PATCH 2/4] docs: remove stray blank line in robots.txt skipped
 request example

---
 docs/examples/code_examples/respect_robots_on_skipped_request.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/docs/examples/code_examples/respect_robots_on_skipped_request.py b/docs/examples/code_examples/respect_robots_on_skipped_request.py
index 8b9cf7b18e..277e6d90f3 100644
--- a/docs/examples/code_examples/respect_robots_on_skipped_request.py
+++ b/docs/examples/code_examples/respect_robots_on_skipped_request.py
@@ -20,7 +20,6 @@ async def request_handler(context: BeautifulSoupCrawlingContext) -> None:
     @crawler.on_skipped_request
     async def skipped_request_handler(request: Request, reason: SkippedReason) -> None:
         url = request.url
-
         # Check if the request was skipped due to robots.txt rules
         if reason == 'robots_txt':
             crawler.log.info(f'Skipped {url} due to robots.txt rules.')

From 564183cf4c4b743e6dd108eef40fba30f993f89c Mon Sep 17 00:00:00 2001
From: Vlada Dusek <v.dusek96@gmail.com>
Date: Wed, 1 Jul 2026 09:22:08 +0200
Subject: [PATCH 3/4] fix(crawlers): robust skipped-request callback dispatch
 and keep robots skips out of transform

---
 .../_abstract_http/_abstract_http_crawler.py  | 17 +++++--
 src/crawlee/crawlers/_basic/_basic_crawler.py | 47 +++++++++++++++----
 .../_playwright/_playwright_crawler.py        | 17 +++++--
 .../_basic/_deferred_skipped_request_hook.py  | 27 +++++++++++
 .../crawlers/_basic/test_basic_crawler.py     | 25 +++++++++-
 .../test_beautifulsoup_crawler.py             | 32 +++++++++++++
 .../_playwright/test_playwright_crawler.py    | 30 ++++++++++++
 7 files changed, 176 insertions(+), 19 deletions(-)
 create mode 100644 tests/unit/crawlers/_basic/_deferred_skipped_request_hook.py

diff --git a/src/crawlee/crawlers/_abstract_http/_abstract_http_crawler.py b/src/crawlee/crawlers/_abstract_http/_abstract_http_crawler.py
index 6a67b851ef..2cfe08721b 100644
--- a/src/crawlee/crawlers/_abstract_http/_abstract_http_crawler.py
+++ b/src/crawlee/crawlers/_abstract_http/_abstract_http_crawler.py
@@ -215,13 +215,19 @@ async def extract_links(
             kwargs.setdefault('strategy', 'same-hostname')
             strategy = kwargs.get('strategy', 'same-hostname')
 
-            def to_request(url: str) -> Request | None:
-                """Build a `Request` from a single extracted URL, applying the user-provided transform."""
+            def to_request(url: str, *, apply_transform: bool = True) -> Request | None:
+                """Build a `Request` from a single extracted URL.
+
+                `transform_request_function` is applied only to links that will actually be enqueued
+                (`apply_transform=True`); robots.txt-skipped links are reported to the skipped-request
+                callback verbatim, so a transform returning `'skip'` cannot hide a robots-blocked URL
+                from that audit.
+                """
                 request_options = RequestOptions(
                     url=url, user_data={**base_user_data}, label=label, enqueue_strategy=strategy
                 )
 
-                if transform_request_function:
+                if apply_transform and transform_request_function:
                     transform_request_options = transform_request_function(request_options)
                     if transform_request_options == 'skip':
                         return None
@@ -244,11 +250,12 @@ def to_request(url: str) -> Request | None:
             links_iterator = to_absolute_url_iterator(base_url, links_iterator, logger=context.log)
 
             # Requests disallowed by robots.txt are reported to the skipped-request callback; the rest
-            # continue to the enqueue filter. Both paths go through `to_request` for consistent building.
+            # continue to the enqueue filter. The transform shapes only the enqueued set, so it is not
+            # applied to skipped links (see `to_request`).
             if robots_txt_file:
                 skipped_iterator, links_iterator = partition(robots_txt_file.is_allowed, links_iterator)
                 for url in skipped_iterator:
-                    request = to_request(url)
+                    request = to_request(url, apply_transform=False)
                     if request is not None:
                         skipped.append(request)
 
diff --git a/src/crawlee/crawlers/_basic/_basic_crawler.py b/src/crawlee/crawlers/_basic/_basic_crawler.py
index 78e02ad82d..e7743e2cba 100644
--- a/src/crawlee/crawlers/_basic/_basic_crawler.py
+++ b/src/crawlee/crawlers/_basic/_basic_crawler.py
@@ -2,6 +2,7 @@
 
 from __future__ import annotations
 
+import ast
 import asyncio
 import functools
 import inspect
@@ -18,7 +19,7 @@
 from http import HTTPStatus
 from io import StringIO
 from pathlib import Path
-from typing import TYPE_CHECKING, Any, Generic, Literal, ParamSpec, cast, get_type_hints
+from typing import TYPE_CHECKING, Any, Generic, Literal, ParamSpec, cast, get_args
 from weakref import WeakKeyDictionary
 
 from cachetools import LRUCache
@@ -125,21 +126,51 @@
 def _skipped_request_callback_expects_request(callback: Callable[..., Awaitable[None]]) -> bool:
     """Whether a skipped-request callback wants the full `Request` rather than the URL string.
 
-    The first parameter's resolved type annotation decides: a callback annotating it as `Request`
-    receives the `Request` object, while a callback annotating it as `str` (or leaving it unannotated)
-    receives `request.url`, preserving the original `(url: str, reason)` signature. Anything that
-    cannot be introspected falls back to the backward-compatible `str` form.
+    The first parameter's type annotation decides: a callback annotating it as `Request` (including a
+    union such as `Request | None`) receives the `Request` object, while a callback annotating it as
+    `str` (or leaving it unannotated) receives `request.url`, preserving the original `(url, reason)`
+    signature.
+
+    Annotations are matched leniently. Under `from __future__ import annotations` the annotation is a
+    string, and a hook whose module imports `Request` only under `TYPE_CHECKING` (the idiomatic style)
+    cannot be resolved to the class at runtime. Rather than silently degrading such hooks to the `str`
+    form, string annotations are matched by inspecting their syntax for a bare `Request` name.
     """
     try:
         parameters = list(inspect.signature(callback).parameters.values())
-        type_hints = get_type_hints(callback)
-    except Exception:  # Any introspection failure falls back to the backward-compatible `str` form.
+    except (TypeError, ValueError):  # Uninspectable callable falls back to the backward-compatible `str` form.
         return False
 
     if not parameters:
         return False
 
-    return type_hints.get(parameters[0].name) is Request
+    annotation = parameters[0].annotation
+
+    if annotation is inspect.Parameter.empty:
+        return False
+
+    # A string annotation (PEP 563, or an explicitly quoted forward reference) may not resolve to the
+    # class when `Request` is a `TYPE_CHECKING`-only import, so match it by name instead. This handles
+    # unions like `Request | None` and `Optional[Request]` without misfiring on names like `RequestOptions`.
+    if isinstance(annotation, str):
+        return _annotation_names_request(annotation)
+
+    # An already-resolved annotation (a class or a typing construct): match `Request` directly or inside a union.
+    return annotation is Request or Request in get_args(annotation)
+
+
+def _annotation_names_request(annotation: str) -> bool:
+    """Whether a string type annotation references `Request` as a bare name (e.g. `Request`, `Request | None`)."""
+    try:
+        tree = ast.parse(annotation, mode='eval')
+    except SyntaxError:
+        return False
+
+    return any(
+        (isinstance(node, ast.Name) and node.id == Request.__name__)
+        or (isinstance(node, ast.Attribute) and node.attr == Request.__name__)
+        for node in ast.walk(tree)
+    )
 
 
 class _BasicCrawlerOptions(TypedDict):
diff --git a/src/crawlee/crawlers/_playwright/_playwright_crawler.py b/src/crawlee/crawlers/_playwright/_playwright_crawler.py
index 5eee40c25d..33eb3bbad3 100644
--- a/src/crawlee/crawlers/_playwright/_playwright_crawler.py
+++ b/src/crawlee/crawlers/_playwright/_playwright_crawler.py
@@ -471,13 +471,19 @@ async def extract_links(
             kwargs.setdefault('strategy', 'same-hostname')
             strategy = kwargs.get('strategy', 'same-hostname')
 
-            def to_request(url: str) -> Request | None:
-                """Build a `Request` from a single extracted URL, applying the user-provided transform."""
+            def to_request(url: str, *, apply_transform: bool = True) -> Request | None:
+                """Build a `Request` from a single extracted URL.
+
+                `transform_request_function` is applied only to links that will actually be enqueued
+                (`apply_transform=True`); robots.txt-skipped links are reported to the skipped-request
+                callback verbatim, so a transform returning `'skip'` cannot hide a robots-blocked URL
+                from that audit.
+                """
                 request_options = RequestOptions(
                     url=url, user_data={**base_user_data}, label=label, enqueue_strategy=strategy
                 )
 
-                if transform_request_function:
+                if apply_transform and transform_request_function:
                     transform_request_options = transform_request_function(request_options)
                     if transform_request_options == 'skip':
                         return None
@@ -498,11 +504,12 @@ def to_request(url: str) -> Request | None:
             links_iterator = to_absolute_url_iterator(base_url, links_iterator, logger=context.log)
 
             # Requests disallowed by robots.txt are reported to the skipped-request callback; the rest
-            # continue to the enqueue filter. Both paths go through `to_request` for consistent building.
+            # continue to the enqueue filter. The transform shapes only the enqueued set, so it is not
+            # applied to skipped links (see `to_request`).
             if robots_txt_file:
                 skipped_iterator, links_iterator = partition(robots_txt_file.is_allowed, links_iterator)
                 for url in skipped_iterator:
-                    request = to_request(url)
+                    request = to_request(url, apply_transform=False)
                     if request is not None:
                         skipped.append(request)
 
diff --git a/tests/unit/crawlers/_basic/_deferred_skipped_request_hook.py b/tests/unit/crawlers/_basic/_deferred_skipped_request_hook.py
new file mode 100644
index 0000000000..ae32400677
--- /dev/null
+++ b/tests/unit/crawlers/_basic/_deferred_skipped_request_hook.py
@@ -0,0 +1,27 @@
+"""Skipped-request hooks written in the idiomatic deferred-annotation style.
+
+Used by `test_basic_crawler.py` to check that `_skipped_request_callback_expects_request` still
+recognizes a `Request` annotation when the hook's module uses `from __future__ import annotations`
+(PEP 563) and imports `Request` only under `TYPE_CHECKING`, so the name is not available at runtime.
+The module is loaded by file path so it is never collected by pytest and does not rely on the test
+package layout.
+"""
+
+from __future__ import annotations
+
+from typing import TYPE_CHECKING
+
+if TYPE_CHECKING:
+    from crawlee import Request, SkippedReason
+
+
+async def expects_request(_request: Request, _reason: SkippedReason) -> None:
+    """First parameter annotated `Request`, resolvable only under `TYPE_CHECKING`."""
+
+
+async def expects_optional_request(_request: Request | None, _reason: SkippedReason) -> None:
+    """First parameter annotated with a `Request | None` union."""
+
+
+async def expects_url(_url: str, _reason: SkippedReason) -> None:
+    """First parameter annotated `str` keeps the legacy URL-only behavior."""
diff --git a/tests/unit/crawlers/_basic/test_basic_crawler.py b/tests/unit/crawlers/_basic/test_basic_crawler.py
index bacc8863a5..996a4cdac4 100644
--- a/tests/unit/crawlers/_basic/test_basic_crawler.py
+++ b/tests/unit/crawlers/_basic/test_basic_crawler.py
@@ -2,6 +2,7 @@
 from __future__ import annotations
 
 import asyncio
+import importlib.util
 import json
 import logging
 import os
@@ -14,6 +15,7 @@
 from dataclasses import dataclass
 from datetime import timedelta
 from itertools import product
+from pathlib import Path
 from typing import TYPE_CHECKING, Any, Literal, cast
 from unittest.mock import AsyncMock, Mock, call, patch
 
@@ -37,7 +39,6 @@
 
 if TYPE_CHECKING:
     from collections.abc import Callable, Sequence
-    from pathlib import Path
 
     from yarl import URL
 
@@ -1660,6 +1661,28 @@ async def expects_url_unannotated(_url, _reason) -> None: ...  # noqa: ANN001
     assert _skipped_request_callback_expects_request(expects_url_unannotated) is False
 
 
+def test_skipped_request_callback_dispatch_accepts_optional_and_deferred_request_annotation() -> None:
+    """Detection recognizes a `Request` annotation given as a union or resolvable only under `TYPE_CHECKING`."""
+
+    # A `Request | None` union still means "give me the Request".
+    async def expects_optional_request(_request: Request | None, _reason: SkippedReason) -> None: ...
+
+    assert _skipped_request_callback_expects_request(expects_optional_request) is True
+
+    # Hooks living in a module that imports `Request` only under `TYPE_CHECKING` (with PEP 563 deferred
+    # annotations, the style Crawlee itself uses) must not silently degrade to the URL-only form.
+    hook_path = Path(__file__).parent / '_deferred_skipped_request_hook.py'
+    spec = importlib.util.spec_from_file_location('_deferred_skipped_request_hook', hook_path)
+    assert spec is not None
+    assert spec.loader is not None
+    hooks = importlib.util.module_from_spec(spec)
+    spec.loader.exec_module(hooks)
+
+    assert _skipped_request_callback_expects_request(hooks.expects_request) is True
+    assert _skipped_request_callback_expects_request(hooks.expects_optional_request) is True
+    assert _skipped_request_callback_expects_request(hooks.expects_url) is False
+
+
 async def test_add_requests_reports_disallowed_url_to_skipped_callback(server_url: URL) -> None:
     """A bare `str` URL disallowed by robots.txt reaches the `add_requests` skipped callback as a `Request`."""
     crawler = BasicCrawler(respect_robots_txt_file=True)
diff --git a/tests/unit/crawlers/_beautifulsoup/test_beautifulsoup_crawler.py b/tests/unit/crawlers/_beautifulsoup/test_beautifulsoup_crawler.py
index 4d8f603910..f8186db45b 100644
--- a/tests/unit/crawlers/_beautifulsoup/test_beautifulsoup_crawler.py
+++ b/tests/unit/crawlers/_beautifulsoup/test_beautifulsoup_crawler.py
@@ -264,6 +264,38 @@ async def skipped_hook(request: Request, _reason: SkippedReason) -> None:
     assert {request.url for request in requests} == expected_skip_urls
 
 
+async def test_on_skipped_request_reports_robots_url_despite_transform_skip(
+    server_url: URL, http_client: HttpClient
+) -> None:
+    """A `transform_request_function` returning `'skip'` must not hide a robots-disallowed URL from the callback."""
+    crawler = BeautifulSoupCrawler(http_client=http_client, respect_robots_txt_file=True)
+    skip = mock.Mock()
+
+    def transform_request_function(request_options: RequestOptions) -> RequestOptions | RequestTransformAction:
+        # The user only means "do not enqueue page_3"; this must not affect robots.txt reporting.
+        if 'page_3' in request_options['url']:
+            return 'skip'
+        return 'unchanged'
+
+    @crawler.router.default_handler
+    async def request_handler(context: BeautifulSoupCrawlingContext) -> None:
+        await context.enqueue_links(transform_request_function=transform_request_function)
+
+    @crawler.on_skipped_request
+    async def skipped_hook(request: Request, _reason: SkippedReason) -> None:
+        skip(request.url)
+
+    await crawler.run([str(server_url / 'start_enqueue')])
+
+    reported = {call.args[0] for call in skip.call_args_list}
+    assert reported == {
+        str(server_url / 'page_1'),
+        str(server_url / 'page_2'),
+        str(server_url / 'page_3'),  # robots-blocked; the transform skip must not hide it here
+        str(server_url / 'page_4'),
+    }
+
+
 async def test_extract_links(server_url: URL, http_client: HttpClient) -> None:
     crawler = BeautifulSoupCrawler(http_client=http_client)
     extracted_links: list[str] = []
diff --git a/tests/unit/crawlers/_playwright/test_playwright_crawler.py b/tests/unit/crawlers/_playwright/test_playwright_crawler.py
index f4208aa339..d25e59a1e7 100644
--- a/tests/unit/crawlers/_playwright/test_playwright_crawler.py
+++ b/tests/unit/crawlers/_playwright/test_playwright_crawler.py
@@ -783,6 +783,36 @@ async def skipped_hook(request: Request, _reason: SkippedReason) -> None:
     assert {request.url for request in requests} == expected_skip_urls
 
 
+async def test_on_skipped_request_reports_robots_url_despite_transform_skip(server_url: URL) -> None:
+    """A `transform_request_function` returning `'skip'` must not hide a robots-disallowed URL from the callback."""
+    crawler = PlaywrightCrawler(respect_robots_txt_file=True)
+    skip = mock.Mock()
+
+    def transform_request_function(request_options: RequestOptions) -> RequestOptions | RequestTransformAction:
+        # The user only means "do not enqueue page_3"; this must not affect robots.txt reporting.
+        if 'page_3' in request_options['url']:
+            return 'skip'
+        return 'unchanged'
+
+    @crawler.router.default_handler
+    async def request_handler(context: PlaywrightCrawlingContext) -> None:
+        await context.enqueue_links(transform_request_function=transform_request_function)
+
+    @crawler.on_skipped_request
+    async def skipped_hook(request: Request, _reason: SkippedReason) -> None:
+        skip(request.url)
+
+    await crawler.run([str(server_url / 'start_enqueue')])
+
+    reported = {call.args[0] for call in skip.call_args_list}
+    assert reported == {
+        str(server_url / 'page_1'),
+        str(server_url / 'page_2'),
+        str(server_url / 'page_3'),  # robots-blocked; the transform skip must not hide it here
+        str(server_url / 'page_4'),
+    }
+
+
 async def test_send_request(server_url: URL) -> None:
     check_data: dict[str, Any] = {}
 

From 608fbb6ff70c192520dde7da6ac850a4e7fec5ce Mon Sep 17 00:00:00 2001
From: Vlada Dusek <v.dusek96@gmail.com>
Date: Wed, 1 Jul 2026 09:31:33 +0200
Subject: [PATCH 4/4] docs: shorten skipped-request callback comments and drop
 helper module docstring

---
 .../_abstract_http/_abstract_http_crawler.py  | 11 ++++-----
 src/crawlee/crawlers/_basic/_basic_crawler.py | 23 +++++++------------
 .../_playwright/_playwright_crawler.py        | 11 ++++-----
 .../_basic/_deferred_skipped_request_hook.py  |  9 --------
 .../crawlers/_basic/test_basic_crawler.py     |  3 +--
 5 files changed, 17 insertions(+), 40 deletions(-)

diff --git a/src/crawlee/crawlers/_abstract_http/_abstract_http_crawler.py b/src/crawlee/crawlers/_abstract_http/_abstract_http_crawler.py
index 2cfe08721b..5dcd18d0be 100644
--- a/src/crawlee/crawlers/_abstract_http/_abstract_http_crawler.py
+++ b/src/crawlee/crawlers/_abstract_http/_abstract_http_crawler.py
@@ -218,10 +218,8 @@ async def extract_links(
             def to_request(url: str, *, apply_transform: bool = True) -> Request | None:
                 """Build a `Request` from a single extracted URL.
 
-                `transform_request_function` is applied only to links that will actually be enqueued
-                (`apply_transform=True`); robots.txt-skipped links are reported to the skipped-request
-                callback verbatim, so a transform returning `'skip'` cannot hide a robots-blocked URL
-                from that audit.
+                The transform is applied only to enqueued links (`apply_transform=True`), so a
+                transform returning `'skip'` cannot hide a robots-skipped URL from the callback.
                 """
                 request_options = RequestOptions(
                     url=url, user_data={**base_user_data}, label=label, enqueue_strategy=strategy
@@ -249,9 +247,8 @@ def to_request(url: str, *, apply_transform: bool = True) -> Request | None:
             )
             links_iterator = to_absolute_url_iterator(base_url, links_iterator, logger=context.log)
 
-            # Requests disallowed by robots.txt are reported to the skipped-request callback; the rest
-            # continue to the enqueue filter. The transform shapes only the enqueued set, so it is not
-            # applied to skipped links (see `to_request`).
+            # Robots-disallowed requests go to the skipped-request callback (without the transform, see
+            # `to_request`); the rest continue to the enqueue filter.
             if robots_txt_file:
                 skipped_iterator, links_iterator = partition(robots_txt_file.is_allowed, links_iterator)
                 for url in skipped_iterator:
diff --git a/src/crawlee/crawlers/_basic/_basic_crawler.py b/src/crawlee/crawlers/_basic/_basic_crawler.py
index e7743e2cba..660f210817 100644
--- a/src/crawlee/crawlers/_basic/_basic_crawler.py
+++ b/src/crawlee/crawlers/_basic/_basic_crawler.py
@@ -126,19 +126,14 @@
 def _skipped_request_callback_expects_request(callback: Callable[..., Awaitable[None]]) -> bool:
     """Whether a skipped-request callback wants the full `Request` rather than the URL string.
 
-    The first parameter's type annotation decides: a callback annotating it as `Request` (including a
-    union such as `Request | None`) receives the `Request` object, while a callback annotating it as
-    `str` (or leaving it unannotated) receives `request.url`, preserving the original `(url, reason)`
-    signature.
-
-    Annotations are matched leniently. Under `from __future__ import annotations` the annotation is a
-    string, and a hook whose module imports `Request` only under `TYPE_CHECKING` (the idiomatic style)
-    cannot be resolved to the class at runtime. Rather than silently degrading such hooks to the `str`
-    form, string annotations are matched by inspecting their syntax for a bare `Request` name.
+    The first parameter's annotation decides: `Request` (or a union such as `Request | None`) gets the
+    `Request` object; `str` or no annotation keeps the legacy `(url, reason)` signature. String
+    annotations (PEP 563, or a `TYPE_CHECKING`-only `Request` import) are matched by name so such hooks
+    don't silently degrade to the `str` form.
     """
     try:
         parameters = list(inspect.signature(callback).parameters.values())
-    except (TypeError, ValueError):  # Uninspectable callable falls back to the backward-compatible `str` form.
+    except (TypeError, ValueError):  # Uninspectable callable falls back to the `str` form.
         return False
 
     if not parameters:
@@ -149,18 +144,16 @@ def _skipped_request_callback_expects_request(callback: Callable[..., Awaitable[
     if annotation is inspect.Parameter.empty:
         return False
 
-    # A string annotation (PEP 563, or an explicitly quoted forward reference) may not resolve to the
-    # class when `Request` is a `TYPE_CHECKING`-only import, so match it by name instead. This handles
-    # unions like `Request | None` and `Optional[Request]` without misfiring on names like `RequestOptions`.
+    # A string annotation may not resolve to the class (e.g. a `TYPE_CHECKING`-only import), so match by name.
     if isinstance(annotation, str):
         return _annotation_names_request(annotation)
 
-    # An already-resolved annotation (a class or a typing construct): match `Request` directly or inside a union.
+    # A resolved annotation: match `Request` directly or inside a union.
     return annotation is Request or Request in get_args(annotation)
 
 
 def _annotation_names_request(annotation: str) -> bool:
-    """Whether a string type annotation references `Request` as a bare name (e.g. `Request`, `Request | None`)."""
+    """Whether a string annotation names `Request` (e.g. `Request`, `Request | None`), not `RequestOptions`."""
     try:
         tree = ast.parse(annotation, mode='eval')
     except SyntaxError:
diff --git a/src/crawlee/crawlers/_playwright/_playwright_crawler.py b/src/crawlee/crawlers/_playwright/_playwright_crawler.py
index 33eb3bbad3..5bf2232661 100644
--- a/src/crawlee/crawlers/_playwright/_playwright_crawler.py
+++ b/src/crawlee/crawlers/_playwright/_playwright_crawler.py
@@ -474,10 +474,8 @@ async def extract_links(
             def to_request(url: str, *, apply_transform: bool = True) -> Request | None:
                 """Build a `Request` from a single extracted URL.
 
-                `transform_request_function` is applied only to links that will actually be enqueued
-                (`apply_transform=True`); robots.txt-skipped links are reported to the skipped-request
-                callback verbatim, so a transform returning `'skip'` cannot hide a robots-blocked URL
-                from that audit.
+                The transform is applied only to enqueued links (`apply_transform=True`), so a
+                transform returning `'skip'` cannot hide a robots-skipped URL from the callback.
                 """
                 request_options = RequestOptions(
                     url=url, user_data={**base_user_data}, label=label, enqueue_strategy=strategy
@@ -503,9 +501,8 @@ def to_request(url: str, *, apply_transform: bool = True) -> Request | None:
 
             links_iterator = to_absolute_url_iterator(base_url, links_iterator, logger=context.log)
 
-            # Requests disallowed by robots.txt are reported to the skipped-request callback; the rest
-            # continue to the enqueue filter. The transform shapes only the enqueued set, so it is not
-            # applied to skipped links (see `to_request`).
+            # Robots-disallowed requests go to the skipped-request callback (without the transform, see
+            # `to_request`); the rest continue to the enqueue filter.
             if robots_txt_file:
                 skipped_iterator, links_iterator = partition(robots_txt_file.is_allowed, links_iterator)
                 for url in skipped_iterator:
diff --git a/tests/unit/crawlers/_basic/_deferred_skipped_request_hook.py b/tests/unit/crawlers/_basic/_deferred_skipped_request_hook.py
index ae32400677..448f749be5 100644
--- a/tests/unit/crawlers/_basic/_deferred_skipped_request_hook.py
+++ b/tests/unit/crawlers/_basic/_deferred_skipped_request_hook.py
@@ -1,12 +1,3 @@
-"""Skipped-request hooks written in the idiomatic deferred-annotation style.
-
-Used by `test_basic_crawler.py` to check that `_skipped_request_callback_expects_request` still
-recognizes a `Request` annotation when the hook's module uses `from __future__ import annotations`
-(PEP 563) and imports `Request` only under `TYPE_CHECKING`, so the name is not available at runtime.
-The module is loaded by file path so it is never collected by pytest and does not rely on the test
-package layout.
-"""
-
 from __future__ import annotations
 
 from typing import TYPE_CHECKING
diff --git a/tests/unit/crawlers/_basic/test_basic_crawler.py b/tests/unit/crawlers/_basic/test_basic_crawler.py
index 996a4cdac4..a762a7c392 100644
--- a/tests/unit/crawlers/_basic/test_basic_crawler.py
+++ b/tests/unit/crawlers/_basic/test_basic_crawler.py
@@ -1669,8 +1669,7 @@ async def expects_optional_request(_request: Request | None, _reason: SkippedRea
 
     assert _skipped_request_callback_expects_request(expects_optional_request) is True
 
-    # Hooks living in a module that imports `Request` only under `TYPE_CHECKING` (with PEP 563 deferred
-    # annotations, the style Crawlee itself uses) must not silently degrade to the URL-only form.
+    # Hooks in a module that imports `Request` only under `TYPE_CHECKING` must not degrade to the URL form.
     hook_path = Path(__file__).parent / '_deferred_skipped_request_hook.py'
     spec = importlib.util.spec_from_file_location('_deferred_skipped_request_hook', hook_path)
     assert spec is not None