feat(agents): Sprint 1.12 Phase 2 — Cookie-Policy v3 + ImpressumAgent v3 finetune
CI / detect-changes (push) Successful in 8s
CI / branch-name (push) Has been skipped
CI / guardrail-integrity (push) Has been skipped
CI / secret-scan (push) Has been skipped
CI / dep-audit (push) Has been skipped
CI / build-sha-integrity (push) Failing after 4s
CI / validate-canonical-controls (push) Successful in 11s
CI / loc-budget (push) Successful in 14s
CI / go-lint (push) Has been skipped
CI / python-lint (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / nodejs-build (push) Has been skipped
CI / test-go (push) Has been skipped
CI / iace-gt-coverage (push) Has been skipped
CI / test-python-backend (push) Successful in 30s
CI / test-python-document-crawler (push) Has been skipped
CI / test-python-dsms-gateway (push) Has been skipped
CI / sbom-scan (push) Has been skipped

ImpressumAgent v3 (Refactor):
  - v3_engine: laedt direkt alle 75 doc_check_controls['impressum'] ohne
    Sidecar-Filter (Sidecar war zu streng, lieferte nur 3 von 75 MCs).
  - Layer 0 Boost prueft pass+fail_criteria gegen meine 12 Patterns mit
    erweiterten Initial-Seeds (User-Vorgabe 2026-06-09:
    manuelle Initial-Seeds OK, Auto-Learning erweitert zur Laufzeit).
  - ETO-Smoke: 75 DB-MCs · 7 Pattern-Boosts · 24 Boost-Overrides
    (versus 3 DB-MCs vorher).

CookiePolicyAgent v3 (Refactor):
  - cookie_policy/v3_engine.py + cookie_policy/regex_boost.py
  - Laedt direkt alle 381 Cookie-MCs aus doc_check_controls
  - Layer 0 mit 12 eigenen Patterns als Initial-Seed
  - KB-Layer (CMP-Vendor-Cross-Check) bleibt erhalten
  - agent_version='3.0'

Tests: 27/27 gruen (12 v3-impressum, 6 cookie-policy, 9 cross-placement).
Alte v2-cookie-tests umgeschrieben auf v3-Pipeline-Mock.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
Benjamin Admin
2026-06-09 09:23:12 +02:00
parent 216c7b8eca
commit bd4882e143
7 changed files with 659 additions and 352 deletions
@@ -70,6 +70,27 @@ def test_boost_matches_db_mc_returns_none_when_unrelated():
assert boost_matches_db_mc(boosts, pass_crit) is None
def test_boost_matches_db_mc_uses_fail_criteria():
"""Wörter aus fail_criteria sollen die Zuordnung mit unterstützen."""
boosts = {"name_anbieter"}
pass_crit = ["Sichtbar"]
fail_crit = ["Keine Postadresse angegeben", "Adresse fehlt"]
matched = boost_matches_db_mc(boosts, pass_crit, fail_crit)
assert matched == "name_anbieter"
def test_boost_matches_db_mc_eto_address_case():
"""Konkreter ETO-Fall: AUTH-1954-A07 'Postadresse + Geschäftssitz'."""
boosts = {"name_anbieter"}
pass_crit = [
"Vollständige Postadresse (Straße, Hausnummer, PLZ, Ort, Land)",
"Oder: Eindeutige Angabe des Geschäftssitzes",
"Adresse ist aktuell und korrekt",
]
matched = boost_matches_db_mc(boosts, pass_crit)
assert matched == "name_anbieter"
def test_boost_keywords_cover_all_field_ids():
"""Jedes mcs.py field_id muss in BOOST_KEYWORDS ein Eintrag haben."""
from compliance.services.specialist_agents.impressum.mcs import MCS
@@ -1,4 +1,4 @@
"""Tests für Cookie-Policy-Agent."""
"""Tests für Cookie-Policy-Agent v3 (Sprint 1.12 Phase 2)."""
from __future__ import annotations
@@ -22,122 +22,123 @@ Wir verwenden auf unserer Website verschiedene Cookies. Diese werden
in folgende Kategorien eingeteilt:
1. Essentielle Cookies (unbedingt erforderlich)
Zweck: Diese Cookies dienen der grundlegenden Funktion der Website.
Zweck: grundlegende Funktion der Website.
Rechtsgrundlage: § 25 Abs. 2 TDDDG
Laufzeit: Session
2. Funktionale Cookies
Zweck: Speichern Ihre Präferenzen wie Sprache und Region.
Rechtsgrundlage: Art. 6 Abs. 1 lit. a DSGVO
Laufzeit: 30 Tage
2. Funktionale Cookies — Zweck: Präferenzen speichern. Laufzeit: 30 Tage
3. Analytics-Cookies (Performance)
Drittanbieter: Google LLC, USA
Zweck: Nutzungsstatistiken erheben.
Laufzeit: 24 Monate
Cookies: _ga, _gid
Drittland: USA — Standardvertragsklauseln + Data Privacy Framework
3. Analytics-Cookies — Drittanbieter: Google LLC, USA
Cookies: _ga, _gid · Laufzeit: 24 Monate
Drittland: USA — Standardvertragsklauseln + DPF
4. Marketing-Cookies (Tracking)
Drittanbieter: Meta Platforms Inc., USA
Cookies: _fbp, _fbc
Laufzeit: 90 Tage
Sie können Ihre Cookie-Einstellungen jederzeit ändern über den Link
unten oder das Banner erneut öffnen.
Browser-Einstellungen: Auch in Chrome, Firefox, Safari und Edge
können Sie Cookies blockieren oder löschen.
4. Marketing — Drittanbieter: Meta Platforms Inc.
Cookies: _fbp, _fbc · Laufzeit: 90 Tage
Cookie-Einstellungen jederzeit ändern.
Browser-Einstellungen: Chrome, Firefox, Safari, Edge.
Kontakt: datenschutz@example.com
Datenschutzbeauftragter: Max Mustermann
"""
GAPPY_POLICY = """Cookies
Wir verwenden Cookies um die Website zu betreiben.
Cookies werden so lange gespeichert wie nötig.
"""
def _run(coro):
return asyncio.get_event_loop().run_until_complete(coro)
def test_agent_is_registered():
agent = REGISTRY.get("cookie_policy")
assert agent is not None
assert agent.doc_type == "cookie"
def test_short_text_skipped(monkeypatch):
async def _no_cascade(*a, **kw): return None, []
@pytest.fixture
def mock_v3_pipeline(monkeypatch):
"""Mockt run_v3_pipeline für deterministische Tests offline."""
async def _fake(text, scope):
results = [
{"control_id": "COOKIE-MC-001",
"passed": True, "severity": "MEDIUM",
"label": "Cookie-Kategorien benannt",
"regulation": "TDDDG", "article": "§ 25",
"hint": "", "matched_text": "essentiell", "source": "kw"},
{"control_id": "COOKIE-MC-002",
"passed": False, "severity": "HIGH",
"label": "Versionsdatum / Stand der Policy",
"regulation": "DSGVO", "article": "Art. 5",
"hint": "Bitte 'Stand: TT.MM.JJJJ' angeben",
"matched_text": "", "source": ""},
]
telemetry = {
"layer_0_field_hits": 4,
"layer_0_field_ids": ["categories_named", "purpose_described",
"retention_duration", "version_date"],
"layer_1_pass": 1,
"layer_0_boost_overrides": 0,
"total_mcs": 2,
}
return results, telemetry
monkeypatch.setattr(
"compliance.services.specialist_agents.cookie_policy.agent.cascade",
_no_cascade,
"compliance.services.specialist_agents.cookie_policy.agent.run_v3_pipeline",
_fake,
)
async def _no_validator(*a, **kw): return {}
monkeypatch.setattr(
"compliance.services.specialist_agents.cookie_policy.agent.validate_present",
_no_validator,
)
def test_agent_is_registered():
a = REGISTRY.get("cookie_policy")
assert a is not None
assert a.doc_type == "cookie"
assert a.agent_version == "3.0"
def test_short_text_skipped(mock_v3_pipeline):
agent = CookiePolicyAgent()
out = _run(agent.evaluate(AgentInput(doc_type="cookie", text="x")))
assert out.mc_total > 0
assert all(c.status == "skipped" for c in out.mc_coverage)
assert not out.findings
def test_full_policy_has_few_high_findings(monkeypatch):
async def _no_cascade(*a, **kw): return None, []
monkeypatch.setattr(
"compliance.services.specialist_agents.cookie_policy.agent.cascade",
_no_cascade,
)
agent = CookiePolicyAgent()
out = _run(agent.evaluate(AgentInput(doc_type="cookie", text=FULL_POLICY)))
high = [f for f in out.findings if f.severity == Severity.HIGH.value]
assert not high, f"unexpected HIGH findings: {[f.field_id for f in high]}"
def test_gappy_policy_triggers_high(monkeypatch):
async def _no_cascade(*a, **kw): return None, []
monkeypatch.setattr(
"compliance.services.specialist_agents.cookie_policy.agent.cascade",
_no_cascade,
)
def test_agent_uses_db_mcs(mock_v3_pipeline):
agent = CookiePolicyAgent()
out = _run(agent.evaluate(AgentInput(doc_type="cookie",
text=GAPPY_POLICY)))
field_ids = {f.field_id for f in out.findings}
# 4 Kategorien fehlen, Vendoren fehlen, Opt-Out fehlt, Tabelle fehlt
assert "categories_named" in field_ids
assert "vendor_recipients" in field_ids
assert "opt_out_mechanism" in field_ids
text=FULL_POLICY)))
db_findings = [f for f in out.findings
if f.check_id.startswith("DBMC-")]
assert len(db_findings) == 1
assert db_findings[0].check_id == "DBMC-COOKIE-MC-002"
assert db_findings[0].severity == Severity.HIGH.value
def test_cmp_vendor_cross_check_emits_finding(monkeypatch):
async def _no_cascade(*a, **kw): return None, []
monkeypatch.setattr(
"compliance.services.specialist_agents.cookie_policy.agent.cascade",
_no_cascade,
)
def test_agent_emits_boost_coverage(mock_v3_pipeline):
agent = CookiePolicyAgent()
out = _run(agent.evaluate(AgentInput(doc_type="cookie",
text=FULL_POLICY)))
# 2 DB-MCs + 12 Pattern-Boost-Slots = 14 coverage entries minimum
assert out.mc_total >= 14
boost_ok = [c for c in out.mc_coverage
if c.mc_id.startswith("CP-MC-") and c.status == "ok"]
assert len(boost_ok) == 4
def test_agent_notes_telemetry(mock_v3_pipeline):
agent = CookiePolicyAgent()
out = _run(agent.evaluate(AgentInput(doc_type="cookie",
text=FULL_POLICY)))
assert "v3-pipeline" in out.notes
assert "Pattern-Boosts" in out.notes
def test_cmp_vendor_cross_check_emits_finding(mock_v3_pipeline):
"""KB-Layer: CMP-Vendoren-Cross-Check bleibt erhalten in v3."""
agent = CookiePolicyAgent()
out = _run(agent.evaluate(AgentInput(
doc_type="cookie", text=FULL_POLICY,
context={"cmp_vendors": [
{"name": "Hotjar"}, # NICHT in Policy
{"name": "Google LLC"}, # IN Policy
{"name": "Hotjar"}, # nicht in Policy
{"name": "Google LLC"}, # in Policy
]},
)))
field_ids = {f.field_id for f in out.findings}
assert "vendor_consistency" in field_ids
cmp_f = next(f for f in out.findings
if f.field_id == "vendor_consistency")
assert "Hotjar" in cmp_f.evidence
assert "Google" not in cmp_f.evidence
def test_recommendations_are_built():
agent = CookiePolicyAgent()
out = _run(agent.evaluate(AgentInput(doc_type="cookie",
text=GAPPY_POLICY)))
assert out.recommendations
# Jede Recommendation hat mind. ein related_finding
for r in out.recommendations:
assert r.related_finding_ids
f = next(f for f in out.findings
if f.field_id == "vendor_consistency")
assert "Hotjar" in f.evidence