9d79cf1576
Know-how-Freeze der Website-Compliance-Runde (DSE/Cookie/Impressum/AGB). docs: platform_evidence_v1 (Evidenz-/Qualitaetsnachweis, echte Zahlen), nutzungsbedingungen_mapping (neues Modul = Mapping, empirisch belegt), platform_checker_matrix (Meta-Modell verification_method x decision_method), verification_method, platform_validation_v1. code: checkers/ (reusable Pruefer-Library base+reference+embedding+llm, im Container validiert), agb/ (decision_method-Routing + Checker-Prototypen, 71% FP -> ~0 validiert). Dev-only, kein Prod-Push; Benchmark-GTs/Korpora im internen Archiv (data-retention). Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
83 lines
2.8 KiB
Python
83 lines
2.8 KiB
Python
"""Pruefer-Library — gemeinsames Interface. Siehe docs platform_checker_matrix.md.
|
|
|
|
Ein Checker prueft EINEN Control gegen EIN Dokument und liefert: vorhanden / fehlt
|
|
/ unklar (+ Evidence). Module (DSE/Impressum/AGB/...) liefern nur Control-Metadaten
|
|
ueber `ControlSpec` (verification_method + decision_method + checker-spezifische
|
|
Config); die Engine routet method-agnostisch zum passenden Checker.
|
|
|
|
Ziel der Plattform: 14k Controls -> 7 Pruefertypen -> wenige Pruefer. Ein neues
|
|
Modul wird damit ein Klassifizierungs-, kein Forschungsproblem.
|
|
"""
|
|
from __future__ import annotations
|
|
|
|
from dataclasses import dataclass, field
|
|
from typing import Any, Optional, Protocol, runtime_checkable
|
|
|
|
|
|
class VerificationMethod:
|
|
"""Achse 1 — WELCHER Pruefer-Typ (Kategorie)."""
|
|
FIELD = "FIELD"
|
|
REFERENCE = "REFERENCE"
|
|
BEHAVIOR = "BEHAVIOR"
|
|
PRESENTATION = "PRESENTATION"
|
|
CONTENT = "CONTENT"
|
|
PROCESS = "PROCESS"
|
|
TECHNICAL = "TECHNICAL"
|
|
CONTRACTUAL = "CONTRACTUAL"
|
|
|
|
|
|
class DecisionMethod:
|
|
"""Achse 2 — WIE entschieden wird (konkreter Mechanismus)."""
|
|
REGEX = "REGEX"
|
|
EMBEDDING = "EMBEDDING"
|
|
LLM = "LLM"
|
|
LINK_RESOLVER = "LINK_RESOLVER"
|
|
PLAYWRIGHT = "PLAYWRIGHT"
|
|
AUDIT = "AUDIT"
|
|
SCANNER = "SCANNER"
|
|
|
|
|
|
@dataclass
|
|
class ControlSpec:
|
|
"""Routing-Metadaten + checker-spezifische Config eines Controls. Module fuellen
|
|
nur die fuer ihren decision_method relevanten Felder."""
|
|
control_id: str
|
|
verification_method: str
|
|
decision_method: str
|
|
label: str = ""
|
|
severity: str = "MEDIUM"
|
|
patterns: list[str] = field(default_factory=list) # FIELD/REGEX, REFERENCE
|
|
paraphrases: list[str] = field(default_factory=list) # CONTENT (EMBEDDING/LLM)
|
|
embed_threshold: Optional[float] = None # EMBEDDING (per-Control)
|
|
topic_regex: str = "" # LLM: Section-Retrieval
|
|
question: str = "" # LLM: Pruef-Frage
|
|
extra: dict[str, Any] = field(default_factory=dict)
|
|
|
|
|
|
@dataclass
|
|
class DocContext:
|
|
"""Das zu pruefende Artefakt. `text` = Volltext; `url`/`rendered` fuer
|
|
PRESENTATION/BEHAVIOR (Playwright) — spaeter."""
|
|
text: str = ""
|
|
url: str = ""
|
|
rendered: Any = None
|
|
|
|
|
|
@dataclass
|
|
class CheckResult:
|
|
present: Optional[bool] # True=erfuellt, False=fehlt, None=unklar (fail-safe)
|
|
evidence: str = ""
|
|
confidence: float = 0.0
|
|
source: str = "" # welcher Pruefer/Tier geantwortet hat
|
|
detail: dict[str, Any] = field(default_factory=dict)
|
|
|
|
|
|
@runtime_checkable
|
|
class Checker(Protocol):
|
|
"""Alle Pruefer haben dieselbe Signatur -> die Engine ist method-agnostisch und
|
|
routet nur ueber ctrl.verification_method / ctrl.decision_method."""
|
|
verification_method: str
|
|
|
|
async def check(self, ctrl: ControlSpec, doc: DocContext) -> CheckResult:
|
|
...
|