Source code for pyfetcher.transports.cloudscraper

"""Cloudscraper transport implementation for :mod:`pyfetcher`.

Purpose:
    Provide synchronous fetching using ``cloudscraper`` as the underlying
    HTTP client library.  Cloudscraper automatically handles Cloudflare's
    anti-bot challenges (JavaScript challenges, CAPTCHAs, etc.), allowing
    transparent access to Cloudflare-protected sites.

Design:
    - ``cloudscraper`` is imported lazily so the package remains optional.
    - One transport instance owns a long-lived scraper session that is
      lazily created on first use.
    - Only synchronous fetch is supported because ``cloudscraper`` is
      built on top of :mod:`requests`, which is inherently synchronous.

Examples:
    ::

        >>> transport = CloudscraperTransport()
        >>> hasattr(transport, "fetch")
        True
"""

from __future__ import annotations

from time import perf_counter
from typing import TYPE_CHECKING

from pyfetcher.contracts.request import FetchRequest
from pyfetcher.contracts.response import FetchResponse

if TYPE_CHECKING:
    import cloudscraper


[docs] class CloudscraperTransport: """Synchronous cloudscraper transport for Cloudflare challenge bypass. Manages a long-lived :func:`cloudscraper.create_scraper` session that is lazily initialized on first use. The *browser* parameter controls which browser profile cloudscraper uses for challenge solving. Args: browser: Browser profile identifier passed to :func:`cloudscraper.create_scraper`. Defaults to ``"chrome"``. Note: This transport only supports synchronous fetch. Cloudscraper is built on :mod:`requests` and does not provide an async API. Examples: :: >>> transport = CloudscraperTransport() >>> hasattr(transport, "fetch") True """ def __init__(self, *, browser: str = "chrome") -> None: self._browser = browser self._scraper: cloudscraper.CloudScraper | None = None def _get_scraper(self) -> cloudscraper.CloudScraper: """Get or lazily create the cloudscraper session. Returns: A configured :class:`cloudscraper.CloudScraper` instance. """ if self._scraper is None: import cloudscraper as _cloudscraper # noqa: PLC0415 self._scraper = _cloudscraper.create_scraper( browser={"browser": self._browser, "mobile": False}, ) return self._scraper
[docs] def fetch(self, request: FetchRequest) -> FetchResponse: """Fetch a request synchronously using cloudscraper. Automatically handles Cloudflare anti-bot challenges, retrying internally when a challenge page is encountered. Args: request: The fetch request to execute. Returns: A normalized :class:`~pyfetcher.contracts.response.FetchResponse`. Raises: cloudscraper.exceptions.CloudflareChallengeError: If the Cloudflare challenge cannot be solved. requests.exceptions.HTTPError: If the response status indicates an error. """ scraper = self._get_scraper() start = perf_counter() response = scraper.request( request.method, request.url.unicode_string(), params=request.params or None, headers=request.headers or None, data=request.data, json=request.json_data, allow_redirects=request.allow_redirects, verify=request.verify_ssl, timeout=request.timeout.total_seconds, ) elapsed_ms = (perf_counter() - start) * 1000.0 response.raise_for_status() return FetchResponse( request_url=request.url.unicode_string(), final_url=str(response.url), status_code=response.status_code, headers=dict(response.headers), content_type=response.headers.get("content-type"), text=response.text, body=response.content, backend="cloudscraper", elapsed_ms=elapsed_ms, )
[docs] def close(self) -> None: """Close the owned scraper session if present. Releases resources held by the underlying :mod:`requests` session. """ if self._scraper is not None: self._scraper.close() self._scraper = None