Source code for pyfetcher.config
"""Application configuration for :mod:`pyfetcher`.
Purpose:
Provide a centralized, environment-aware configuration object using
``pydantic-settings``. Reads from environment variables (prefixed with
``PYFETCHER_``) and ``.env`` files.
Design:
- All infrastructure connection details (Postgres, MinIO) are configurable.
- Pipeline concurrency and behavior are tunable per deployment.
- Defaults are suitable for local development with Docker Compose.
Examples:
::
>>> config = PyfetcherConfig()
>>> config.database_url
'postgresql+asyncpg://pyfetcher:pyfetcher@localhost:5432/pyfetcher'
"""
from __future__ import annotations
from pydantic_settings import BaseSettings, SettingsConfigDict
[docs]
class PyfetcherConfig(BaseSettings):
"""Centralized configuration for pyfetcher infrastructure and pipeline.
Reads from environment variables prefixed with ``PYFETCHER_`` and
from ``.env`` files. Defaults are suitable for local development
with the provided Docker Compose setup.
Args:
database_url: SQLAlchemy async connection string for PostgreSQL.
db_pool_size: Base connection pool size for asyncpg.
db_max_overflow: Maximum overflow connections beyond pool_size.
minio_endpoint: MinIO server endpoint (host:port).
minio_access_key: MinIO access key.
minio_secret_key: MinIO secret key.
minio_secure: Whether to use HTTPS for MinIO connections.
minio_bucket: Default bucket name for storing assets.
crawl_concurrency: Maximum concurrent crawl workers.
scrape_concurrency: Maximum concurrent scrape workers.
download_concurrency: Maximum concurrent download workers.
default_crawl_delay_seconds: Default politeness delay between
requests to the same host.
max_retries: Default maximum retry attempts for failed jobs.
"""
model_config = SettingsConfigDict(
env_prefix="PYFETCHER_",
env_file=".env",
env_file_encoding="utf-8",
extra="ignore",
)
# Database
database_url: str = "postgresql+asyncpg://pyfetcher:pyfetcher@localhost:5432/pyfetcher"
db_pool_size: int = 10
db_max_overflow: int = 20
# MinIO / S3
minio_endpoint: str = "localhost:9000"
minio_access_key: str = "minioadmin"
minio_secret_key: str = "minioadmin"
minio_secure: bool = False
minio_bucket: str = "pyfetcher"
# Pipeline concurrency
crawl_concurrency: int = 10
scrape_concurrency: int = 20
download_concurrency: int = 5
default_crawl_delay_seconds: float = 1.0
max_retries: int = 3