Files
breakpilot-compliance/backend-compliance/tests/test_specialist_cookie_policy.py
T
Benjamin Admin bd4882e143
CI / detect-changes (push) Successful in 8s
CI / branch-name (push) Has been skipped
CI / guardrail-integrity (push) Has been skipped
CI / secret-scan (push) Has been skipped
CI / dep-audit (push) Has been skipped
CI / build-sha-integrity (push) Failing after 4s
CI / validate-canonical-controls (push) Successful in 11s
CI / loc-budget (push) Successful in 14s
CI / go-lint (push) Has been skipped
CI / python-lint (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / nodejs-build (push) Has been skipped
CI / test-go (push) Has been skipped
CI / iace-gt-coverage (push) Has been skipped
CI / test-python-backend (push) Successful in 30s
CI / test-python-document-crawler (push) Has been skipped
CI / test-python-dsms-gateway (push) Has been skipped
CI / sbom-scan (push) Has been skipped
feat(agents): Sprint 1.12 Phase 2 — Cookie-Policy v3 + ImpressumAgent v3 finetune
ImpressumAgent v3 (Refactor):
  - v3_engine: laedt direkt alle 75 doc_check_controls['impressum'] ohne
    Sidecar-Filter (Sidecar war zu streng, lieferte nur 3 von 75 MCs).
  - Layer 0 Boost prueft pass+fail_criteria gegen meine 12 Patterns mit
    erweiterten Initial-Seeds (User-Vorgabe 2026-06-09:
    manuelle Initial-Seeds OK, Auto-Learning erweitert zur Laufzeit).
  - ETO-Smoke: 75 DB-MCs · 7 Pattern-Boosts · 24 Boost-Overrides
    (versus 3 DB-MCs vorher).

CookiePolicyAgent v3 (Refactor):
  - cookie_policy/v3_engine.py + cookie_policy/regex_boost.py
  - Laedt direkt alle 381 Cookie-MCs aus doc_check_controls
  - Layer 0 mit 12 eigenen Patterns als Initial-Seed
  - KB-Layer (CMP-Vendor-Cross-Check) bleibt erhalten
  - agent_version='3.0'

Tests: 27/27 gruen (12 v3-impressum, 6 cookie-policy, 9 cross-placement).
Alte v2-cookie-tests umgeschrieben auf v3-Pipeline-Mock.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-06-09 09:23:12 +02:00

145 lines
4.7 KiB
Python

"""Tests für Cookie-Policy-Agent v3 (Sprint 1.12 Phase 2)."""
from __future__ import annotations
import asyncio
import pytest
from compliance.services.specialist_agents import (
REGISTRY,
AgentInput,
CookiePolicyAgent,
Severity,
)
FULL_POLICY = """Cookie-Richtlinie
Stand: 1. Juni 2026
Wir verwenden auf unserer Website verschiedene Cookies. Diese werden
in folgende Kategorien eingeteilt:
1. Essentielle Cookies (unbedingt erforderlich)
Zweck: grundlegende Funktion der Website.
Rechtsgrundlage: § 25 Abs. 2 TDDDG
Laufzeit: Session
2. Funktionale Cookies — Zweck: Präferenzen speichern. Laufzeit: 30 Tage
3. Analytics-Cookies — Drittanbieter: Google LLC, USA
Cookies: _ga, _gid · Laufzeit: 24 Monate
Drittland: USA — Standardvertragsklauseln + DPF
4. Marketing — Drittanbieter: Meta Platforms Inc.
Cookies: _fbp, _fbc · Laufzeit: 90 Tage
Cookie-Einstellungen jederzeit ändern.
Browser-Einstellungen: Chrome, Firefox, Safari, Edge.
Kontakt: datenschutz@example.com
Datenschutzbeauftragter: Max Mustermann
"""
def _run(coro):
return asyncio.get_event_loop().run_until_complete(coro)
@pytest.fixture
def mock_v3_pipeline(monkeypatch):
"""Mockt run_v3_pipeline für deterministische Tests offline."""
async def _fake(text, scope):
results = [
{"control_id": "COOKIE-MC-001",
"passed": True, "severity": "MEDIUM",
"label": "Cookie-Kategorien benannt",
"regulation": "TDDDG", "article": "§ 25",
"hint": "", "matched_text": "essentiell", "source": "kw"},
{"control_id": "COOKIE-MC-002",
"passed": False, "severity": "HIGH",
"label": "Versionsdatum / Stand der Policy",
"regulation": "DSGVO", "article": "Art. 5",
"hint": "Bitte 'Stand: TT.MM.JJJJ' angeben",
"matched_text": "", "source": ""},
]
telemetry = {
"layer_0_field_hits": 4,
"layer_0_field_ids": ["categories_named", "purpose_described",
"retention_duration", "version_date"],
"layer_1_pass": 1,
"layer_0_boost_overrides": 0,
"total_mcs": 2,
}
return results, telemetry
monkeypatch.setattr(
"compliance.services.specialist_agents.cookie_policy.agent.run_v3_pipeline",
_fake,
)
async def _no_validator(*a, **kw): return {}
monkeypatch.setattr(
"compliance.services.specialist_agents.cookie_policy.agent.validate_present",
_no_validator,
)
def test_agent_is_registered():
a = REGISTRY.get("cookie_policy")
assert a is not None
assert a.doc_type == "cookie"
assert a.agent_version == "3.0"
def test_short_text_skipped(mock_v3_pipeline):
agent = CookiePolicyAgent()
out = _run(agent.evaluate(AgentInput(doc_type="cookie", text="x")))
assert all(c.status == "skipped" for c in out.mc_coverage)
assert not out.findings
def test_agent_uses_db_mcs(mock_v3_pipeline):
agent = CookiePolicyAgent()
out = _run(agent.evaluate(AgentInput(doc_type="cookie",
text=FULL_POLICY)))
db_findings = [f for f in out.findings
if f.check_id.startswith("DBMC-")]
assert len(db_findings) == 1
assert db_findings[0].check_id == "DBMC-COOKIE-MC-002"
assert db_findings[0].severity == Severity.HIGH.value
def test_agent_emits_boost_coverage(mock_v3_pipeline):
agent = CookiePolicyAgent()
out = _run(agent.evaluate(AgentInput(doc_type="cookie",
text=FULL_POLICY)))
# 2 DB-MCs + 12 Pattern-Boost-Slots = 14 coverage entries minimum
assert out.mc_total >= 14
boost_ok = [c for c in out.mc_coverage
if c.mc_id.startswith("CP-MC-") and c.status == "ok"]
assert len(boost_ok) == 4
def test_agent_notes_telemetry(mock_v3_pipeline):
agent = CookiePolicyAgent()
out = _run(agent.evaluate(AgentInput(doc_type="cookie",
text=FULL_POLICY)))
assert "v3-pipeline" in out.notes
assert "Pattern-Boosts" in out.notes
def test_cmp_vendor_cross_check_emits_finding(mock_v3_pipeline):
"""KB-Layer: CMP-Vendoren-Cross-Check bleibt erhalten in v3."""
agent = CookiePolicyAgent()
out = _run(agent.evaluate(AgentInput(
doc_type="cookie", text=FULL_POLICY,
context={"cmp_vendors": [
{"name": "Hotjar"}, # nicht in Policy
{"name": "Google LLC"}, # in Policy
]},
)))
field_ids = {f.field_id for f in out.findings}
assert "vendor_consistency" in field_ids
f = next(f for f in out.findings
if f.field_id == "vendor_consistency")
assert "Hotjar" in f.evidence