7e426c31f1
cmp_extractor.py refactored to thin coordinator (123 LOC, was 223). Discovers all CMP modules via cmp_library/_registry.py:load_all() at import time. Restart consent-tester to pick up new modules. New cmp_library/ folder: - _registry.py: auto-discovers all modules with MATCHER + reconstruct() - epaas.py: BMW Group ePaaS (extracted from cmp_extractor) - onetrust.py: cdn.cookielaw.org Groups/Cookies schema - cookiebot.py: consent.cookiebot.com Categories schema - usercentrics.py: api.usercentrics.eu services schema - didomi.py: sdk.privacy-center.org notice + vendors + purposes - trustarc.py: consent.trustarc.com categories + vendors Each module: - MATCHER: re.Pattern matching the CMP JSON endpoint URL - reconstruct(d: dict) -> str: builds German Markdown cookie-policy text Phase E (self-improving) will write auto_*.py files into the same folder; _registry already picks those up via pkgutil.iter_modules.
56 lines
1.9 KiB
Python
56 lines
1.9 KiB
Python
"""
|
|
CMP Library Registry — discovers and registers all CMP modules.
|
|
|
|
Each CMP module exports two module-level symbols:
|
|
- MATCHER: a compiled regex matching the JSON endpoint URL
|
|
- reconstruct(data: dict) -> str: builds the cookie-policy text from JSON
|
|
|
|
The registry auto-discovers:
|
|
1. Hand-written modules: epaas, onetrust, cookiebot, usercentrics,
|
|
didomi, trustarc
|
|
2. Auto-promoted modules: any file matching `auto_*.py` in this folder
|
|
(created by Phase E when an LLM successfully discovers a new pattern)
|
|
|
|
A consent-tester restart picks up new auto_*.py files automatically.
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import importlib
|
|
import logging
|
|
import pkgutil
|
|
from pathlib import Path
|
|
from typing import Callable
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
# (cmp_name, url_pattern, reconstruct_fn)
|
|
Registry = list[tuple[str, "object", Callable[[dict], str]]]
|
|
|
|
|
|
def load_all() -> Registry:
|
|
"""Import every module in this package and collect MATCHER + reconstruct."""
|
|
import services.cmp_library as pkg # type: ignore[import-not-found]
|
|
registry: Registry = []
|
|
|
|
pkg_path = Path(pkg.__file__).parent
|
|
for module_info in pkgutil.iter_modules([str(pkg_path)]):
|
|
name = module_info.name
|
|
if name.startswith("_"):
|
|
continue
|
|
try:
|
|
module = importlib.import_module(f"services.cmp_library.{name}")
|
|
matcher = getattr(module, "MATCHER", None)
|
|
reconstruct = getattr(module, "reconstruct", None)
|
|
if matcher is None or not callable(reconstruct):
|
|
logger.warning(
|
|
"CMP module %s missing MATCHER or reconstruct() — skipped", name,
|
|
)
|
|
continue
|
|
registry.append((name, matcher, reconstruct))
|
|
logger.info("CMP loaded: %s", name)
|
|
except Exception as e:
|
|
logger.warning("CMP module %s failed to load: %s", name, e)
|
|
|
|
return registry
|