fix(impressum): Findings aus 12 §5-TMG-Pattern-MCs statt verunreinigtem DB-Set
CI / detect-changes (push) Successful in 8s
CI / branch-name (push) Has been skipped
CI / guardrail-integrity (push) Has been skipped
CI / secret-scan (push) Has been skipped
CI / dep-audit (push) Has been skipped
CI / sbom-scan (push) Has been skipped
CI / build-sha-integrity (push) Failing after 5s
CI / validate-canonical-controls (push) Successful in 11s
CI / loc-budget (push) Successful in 14s
CI / go-lint (push) Has been skipped
CI / python-lint (push) Has been skipped
CI / nodejs-build (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / test-go (push) Has been skipped
CI / iace-gt-coverage (push) Has been skipped
CI / test-python-backend (push) Successful in 30s
CI / test-python-document-crawler (push) Has been skipped
CI / test-python-dsms-gateway (push) Has been skipped
CI / detect-changes (push) Successful in 8s
CI / branch-name (push) Has been skipped
CI / guardrail-integrity (push) Has been skipped
CI / secret-scan (push) Has been skipped
CI / dep-audit (push) Has been skipped
CI / sbom-scan (push) Has been skipped
CI / build-sha-integrity (push) Failing after 5s
CI / validate-canonical-controls (push) Successful in 11s
CI / loc-budget (push) Successful in 14s
CI / go-lint (push) Has been skipped
CI / python-lint (push) Has been skipped
CI / nodejs-build (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / test-go (push) Has been skipped
CI / iace-gt-coverage (push) Has been skipped
CI / test-python-backend (push) Successful in 30s
CI / test-python-document-crawler (push) Has been skipped
CI / test-python-dsms-gateway (push) Has been skipped
Der Agent lieferte "alles gruen": _load_controls gab auf macmini nur 3 von 75 doc_type='impressum'-MCs zurueck (Sidecar mc_classification.db hat nur 4/75 als text-matchbar klassifiziert). Tiefere Ursache: die 75 doc_type='impressum'-MCs sind fehl-klassifiziert (60/75 canonical_scope='other'; Prefixes TRD/SEC/GOV = Geschaeftsbriefe/Marktplatz/Bestellung, NICHT §5 TMG Website-Impressum). Fix: Der Impressum-Agent erzeugt Findings jetzt aus seinen 12 autoritativen §5-TMG/DDG-Pattern-MCs (mcs.py) statt aus dem verunreinigten DB-Set — deterministisch, scope-aware, field_id = semantisches Feld. Semantic-Validator- Demote + Massnahmen + Rollup bleiben. Die 5-Impressum-GT-Tests laufen jetzt echt durch: 0 Falsch-Positive. DB-Master-Controls fuer Impressum deaktiviert bis zum MC-Re-Filtering (separate Aufgabe: die doc_type-Klassifizierung der Vorgaenger-Session muss bereinigt werden). Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
@@ -18,6 +18,7 @@ from compliance.services.specialist_agents import (
|
||||
from compliance.services.specialist_agents.impressum.agent import (
|
||||
_build_measure,
|
||||
)
|
||||
from compliance.services.specialist_agents.impressum.mcs import MCS
|
||||
from compliance.services.specialist_agents.impressum.regex_boost import (
|
||||
BOOST_KEYWORDS,
|
||||
boost_matches_db_mc,
|
||||
@@ -108,80 +109,49 @@ def test_boost_keywords_cover_all_field_ids():
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def mock_v3(monkeypatch):
|
||||
"""Mockt run_v3_pipeline mit deterministischen Fake-Results."""
|
||||
async def _fake_pipeline(text, scope, db_url=""):
|
||||
results = [
|
||||
{"control_id": "AUTH-1954-A04",
|
||||
"passed": True,
|
||||
"label": "Anbieterkennzeichnung dokumentiert",
|
||||
"severity": "HIGH",
|
||||
"regulation": "TMG",
|
||||
"article": "§ 5",
|
||||
"hint": "",
|
||||
"matched_text": "Tesla Germany GmbH",
|
||||
"source": "keyword_match"},
|
||||
{"control_id": "DATA-2786-A04",
|
||||
"passed": False,
|
||||
"label": "Freiwilligkeit der TDDDG-Einwilligungen",
|
||||
"severity": "MEDIUM",
|
||||
"regulation": "TDDDG",
|
||||
"article": "§ 25",
|
||||
"hint": "Bitte Freiwilligkeit dokumentieren",
|
||||
"matched_text": "",
|
||||
"source": ""},
|
||||
]
|
||||
telemetry = {
|
||||
"layer_0_field_hits": 5,
|
||||
"layer_0_field_ids": ["kontakt_email", "kontakt_telefon",
|
||||
"handelsregister", "ust_id",
|
||||
"vertretungsberechtigte"],
|
||||
"layer_1_pass": 1,
|
||||
"layer_1_fail": 1,
|
||||
"layer_0_boost_overrides": 0,
|
||||
"total_mcs": 2,
|
||||
}
|
||||
return results, telemetry
|
||||
monkeypatch.setattr(
|
||||
"compliance.services.specialist_agents.impressum.agent.run_v3_pipeline",
|
||||
_fake_pipeline,
|
||||
)
|
||||
async def _no_validator(*a, **kw): return {}
|
||||
def no_llm(monkeypatch):
|
||||
"""Deaktiviert den LLM-Semantic-Validator — der Agent prueft die 12
|
||||
mcs.py-Pattern-MCs deterministisch direkt am Text."""
|
||||
async def _no_validator(*a, **kw):
|
||||
return {}
|
||||
monkeypatch.setattr(
|
||||
"compliance.services.specialist_agents.impressum.agent.validate_present",
|
||||
_no_validator,
|
||||
)
|
||||
|
||||
|
||||
def test_agent_uses_db_mcs(mock_v3):
|
||||
def test_agent_emits_pflichtangabe_findings(no_llm):
|
||||
agent = ImpressumAgent()
|
||||
out = _run(agent.evaluate(AgentInput(doc_type="impressum",
|
||||
text=TESLA_TEXT)))
|
||||
db_mc_findings = [f for f in out.findings
|
||||
if f.check_id.startswith("DBMC-")]
|
||||
assert len(db_mc_findings) == 1
|
||||
assert db_mc_findings[0].check_id == "DBMC-DATA-2786-A04"
|
||||
assert db_mc_findings[0].severity == Severity.MEDIUM.value
|
||||
assert "TDDDG" in db_mc_findings[0].norm
|
||||
fids = {f.field_id for f in out.findings}
|
||||
# Tesla nennt 'Management' (englisch) → deutsches GF-Label fehlt
|
||||
assert "vertretungsberechtigte_label_korrekt" in fids
|
||||
f = next(f for f in out.findings
|
||||
if f.field_id == "vertretungsberechtigte_label_korrekt")
|
||||
assert f.severity == Severity.MEDIUM.value
|
||||
assert f.check_id == "IMP-vertretungsberechtigte_label_korrekt"
|
||||
assert f.severity_reason == "pflichtangabe_missing"
|
||||
# Vorhandene Pflichtangaben erzeugen KEIN Finding
|
||||
assert "kontakt_email" not in fids
|
||||
assert "handelsregister" not in fids
|
||||
|
||||
|
||||
def test_agent_emits_boost_coverage(mock_v3):
|
||||
def test_agent_coverage_has_all_12(no_llm):
|
||||
agent = ImpressumAgent()
|
||||
out = _run(agent.evaluate(AgentInput(doc_type="impressum",
|
||||
text=TESLA_TEXT)))
|
||||
# 2 DB-MCs + 12 Pattern-Boost-Slots = 14 coverage entries
|
||||
assert out.mc_total >= 14
|
||||
boost_ok = [c for c in out.mc_coverage
|
||||
if c.mc_id.startswith("IMP-MC-") and c.status == "ok"]
|
||||
assert len(boost_ok) == 5 # 5 boost_ids im fake
|
||||
assert out.mc_total == len(MCS) # je MC genau 1 Coverage-Eintrag
|
||||
ok = [c for c in out.mc_coverage if c.status == "ok"]
|
||||
# name, email, telefon, HR, USt, vertretungsberechtigte = 6 vorhanden
|
||||
assert len(ok) == 6
|
||||
|
||||
|
||||
def test_agent_notes_telemetry(mock_v3):
|
||||
def test_agent_notes(no_llm):
|
||||
agent = ImpressumAgent()
|
||||
out = _run(agent.evaluate(AgentInput(doc_type="impressum",
|
||||
text=TESLA_TEXT)))
|
||||
assert "v3-pipeline" in out.notes
|
||||
assert "Pattern-Boosts" in out.notes
|
||||
assert "§5-TMG-MCs geprüft" in out.notes
|
||||
|
||||
|
||||
def test_short_text_skipped():
|
||||
|
||||
Reference in New Issue
Block a user