feat: Add 76 Level-2 regex checks for document correctness verification
Split dsi_document_checker.py (466 LOC) into doc_checks/ package (9 files). Two-pass L1→L2 logic: L1 checks "Is it mentioned?", L2 checks "Is it correct?" (e.g. controller has full address, specific Art. 6 lit., concrete time periods). 138 total checks (62 L1 + 76 L2) across 7 doc types: - DSE Art. 13: 31, Impressum §5 TMG: 16, Cookie §25 TDDDG: 15 - Widerruf §355: 15, AGB §305ff: 21, Social Media Art. 26: 20, DSFA Art. 35: 18 Frontend: hierarchical L1→L2 display with dual progress bars (green=completeness, blue=correctness). Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -48,6 +48,9 @@ class CheckItem(BaseModel):
|
||||
passed: bool
|
||||
severity: str
|
||||
matched_text: str = ""
|
||||
level: int = 1
|
||||
parent: str | None = None
|
||||
skipped: bool = False
|
||||
|
||||
|
||||
class DocCheckResult(BaseModel):
|
||||
@@ -56,6 +59,7 @@ class DocCheckResult(BaseModel):
|
||||
doc_type: str
|
||||
word_count: int = 0
|
||||
completeness_pct: int = 0
|
||||
correctness_pct: int = 0
|
||||
checks: list[CheckItem] = []
|
||||
findings_count: int = 0
|
||||
error: str = ""
|
||||
@@ -229,27 +233,30 @@ async def _check_single_document(entry: DocCheckEntry) -> list[DocCheckResult]:
|
||||
|
||||
def _run_checklist(text: str, doc_type: str, label: str, url: str, word_count: int = 0) -> DocCheckResult:
|
||||
"""Run checklist against text and return structured result."""
|
||||
import re as _re
|
||||
findings = check_document_completeness(text, doc_type, label, url)
|
||||
|
||||
all_checks: list[CheckItem] = []
|
||||
completeness = 0
|
||||
correctness = 0
|
||||
for f in findings:
|
||||
if "SCORE" in f.get("code", ""):
|
||||
for c in f.get("all_checks", []):
|
||||
all_checks.append(CheckItem(
|
||||
id=c["id"], label=c["label"], passed=c["passed"],
|
||||
severity=c["severity"], matched_text=c.get("matched_text", ""),
|
||||
level=c.get("level", 1),
|
||||
parent=c.get("parent"),
|
||||
skipped=c.get("skipped", False),
|
||||
))
|
||||
pct_match = _re.search(r"(\d+)%", f.get("text", ""))
|
||||
if pct_match:
|
||||
completeness = int(pct_match.group(1))
|
||||
completeness = f.get("completeness_pct", 0)
|
||||
correctness = f.get("correctness_pct", 0)
|
||||
|
||||
non_score = [f for f in findings if "SCORE" not in f.get("code", "")]
|
||||
return DocCheckResult(
|
||||
label=label, url=url, doc_type=doc_type,
|
||||
word_count=word_count or len(text.split()),
|
||||
completeness_pct=completeness,
|
||||
correctness_pct=correctness,
|
||||
checks=all_checks, findings_count=len(non_score),
|
||||
)
|
||||
|
||||
@@ -374,11 +381,15 @@ def _build_report(results: list[DocCheckResult], cookie_result: dict | None) ->
|
||||
status = "OK" if r.completeness_pct == 100 else "LUECKENHAFT" if r.completeness_pct >= 50 else "MANGELHAFT"
|
||||
if r.error:
|
||||
status = "FEHLER"
|
||||
parts.append(f"[{status}] {r.label} ({r.completeness_pct}%, {r.word_count} Woerter)")
|
||||
detail = f", Korrektheit {r.correctness_pct}%" if r.correctness_pct else ""
|
||||
parts.append(f"[{status}] {r.label} ({r.completeness_pct}%{detail}, {r.word_count} Woerter)")
|
||||
|
||||
for check in r.checks:
|
||||
if check.skipped:
|
||||
continue
|
||||
icon = "+" if check.passed else "!!"
|
||||
parts.append(f" [{icon}] {check.label}")
|
||||
indent = " " if check.level == 2 else " "
|
||||
parts.append(f"{indent}[{icon}] {check.label}")
|
||||
|
||||
if r.error:
|
||||
parts.append(f" FEHLER: {r.error}")
|
||||
|
||||
Reference in New Issue
Block a user