feat: Add 76 Level-2 regex checks for document correctness verification

Split dsi_document_checker.py (466 LOC) into doc_checks/ package (9 files). Two-pass L1→L2 logic: L1 checks "Is it mentioned?", L2 checks "Is it correct?" (e.g. controller has full address, specific Art. 6 lit., concrete time periods). 138 total checks (62 L1 + 76 L2) across 7 doc types: - DSE Art. 13: 31, Impressum §5 TMG: 16, Cookie §25 TDDDG: 15 - Widerruf §355: 15, AGB §305ff: 21, Social Media Art. 26: 20, DSFA Art. 35: 18 Frontend: hierarchical L1→L2 display with dual progress bars (green=completeness, blue=correctness). Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-05-07 12:37:03 +02:00
parent 3c12e06faf
commit b363c28539
12 changed files with 2083 additions and 496 deletions
@@ -48,6 +48,9 @@ class CheckItem(BaseModel):
    passed: bool
    severity: str
    matched_text: str = ""
+    level: int = 1
+    parent: str | None = None
+    skipped: bool = False


 class DocCheckResult(BaseModel):
@@ -56,6 +59,7 @@ class DocCheckResult(BaseModel):
    doc_type: str
    word_count: int = 0
    completeness_pct: int = 0
+    correctness_pct: int = 0
    checks: list[CheckItem] = []
    findings_count: int = 0
    error: str = ""
@@ -229,27 +233,30 @@ async def _check_single_document(entry: DocCheckEntry) -> list[DocCheckResult]:

 def _run_checklist(text: str, doc_type: str, label: str, url: str, word_count: int = 0) -> DocCheckResult:
    """Run checklist against text and return structured result."""
-    import re as _re
    findings = check_document_completeness(text, doc_type, label, url)

    all_checks: list[CheckItem] = []
    completeness = 0
+    correctness = 0
    for f in findings:
        if "SCORE" in f.get("code", ""):
            for c in f.get("all_checks", []):
                all_checks.append(CheckItem(
                    id=c["id"], label=c["label"], passed=c["passed"],
                    severity=c["severity"], matched_text=c.get("matched_text", ""),
+                    level=c.get("level", 1),
+                    parent=c.get("parent"),
+                    skipped=c.get("skipped", False),
                ))
-            pct_match = _re.search(r"(\d+)%", f.get("text", ""))
-            if pct_match:
-                completeness = int(pct_match.group(1))
+            completeness = f.get("completeness_pct", 0)
+            correctness = f.get("correctness_pct", 0)

    non_score = [f for f in findings if "SCORE" not in f.get("code", "")]
    return DocCheckResult(
        label=label, url=url, doc_type=doc_type,
        word_count=word_count or len(text.split()),
        completeness_pct=completeness,
+        correctness_pct=correctness,
        checks=all_checks, findings_count=len(non_score),
    )

@@ -374,11 +381,15 @@ def _build_report(results: list[DocCheckResult], cookie_result: dict | None) ->
        status = "OK" if r.completeness_pct == 100 else "LUECKENHAFT" if r.completeness_pct >= 50 else "MANGELHAFT"
        if r.error:
            status = "FEHLER"
-        parts.append(f"[{status}] {r.label} ({r.completeness_pct}%, {r.word_count} Woerter)")
+        detail = f", Korrektheit {r.correctness_pct}%" if r.correctness_pct else ""
+        parts.append(f"[{status}] {r.label} ({r.completeness_pct}%{detail}, {r.word_count} Woerter)")

        for check in r.checks:
+            if check.skipped:
+                continue
            icon = "+" if check.passed else "!!"
-            parts.append(f"  [{icon}] {check.label}")
+            indent = "    " if check.level == 2 else "  "
+            parts.append(f"{indent}[{icon}] {check.label}")

        if r.error:
            parts.append(f"  FEHLER: {r.error}")