breakpilot-compliance/consent-tester/routes_matrix.py

"""POST /scan-matrix — browser-matrix stage-1 endpoint.

Runs the existing consent_scanner once per browser profile and
returns the aggregated robustness-score per browser plus a
worst-of/best-of summary. Kept in its own module so main.py stays
under the 500-LOC cap.

KNOWN LIMITATION (stage 1.a):
  The underlying `run_consent_test` does not yet accept a
  `browser_profile` kwarg — all profiles currently execute on the
  same Chromium instance. Engine diversity (real Firefox/WebKit
  contexts) ships in stage 1.b once consent_scanner is split.
"""

from __future__ import annotations

import logging
from datetime import datetime, timezone

from fastapi import APIRouter
from pydantic import BaseModel

from services.consent_scanner import run_consent_test
from services.multi_browser_scanner import run_matrix

logger = logging.getLogger(__name__)
router = APIRouter()


class MatrixScanRequest(BaseModel):
    url: str
    timeout_per_phase: int = 10
    categories: list[str] = []
    # Resolved against browser_profiles.resolve_profiles. None or
    # empty list → default 4 profiles (chromium/firefox/webkit/iphone).
    browser_profiles: list[str] | None = None


async def _scanner_shim(url: str, browser_profile: dict | None = None,
                        timeout_per_phase: int = 10,
                        categories: list[str] | None = None):
    """Shim that ignores `browser_profile` until consent_scanner accepts it."""
    return await run_consent_test(url, timeout_per_phase,
                                  categories or [])


@router.post("/scan-matrix")
async def scan_matrix(req: MatrixScanRequest):
    """Run consent-scan across the resolved browser-profile matrix."""
    logger.info("Matrix scan for %s profiles=%s", req.url,
                req.browser_profiles or "default")
    matrix = await run_matrix(
        _scanner_shim,
        req.url,
        requested_profiles=req.browser_profiles,
        timeout_per_phase=req.timeout_per_phase,
        categories=req.categories,
    )
    matrix["url"] = req.url
    matrix["scanned_at"] = datetime.now(timezone.utc).isoformat()
    return matrix