feat(platform): live-wire AGB v2 + DSE v3 + Architektur-Tab (#29)
CI / detect-changes (push) Successful in 7s
CI / branch-name (push) Has been skipped
CI / guardrail-integrity (push) Has been skipped
CI / secret-scan (push) Has been skipped
CI / dep-audit (push) Has been skipped
CI / sbom-scan (push) Has been skipped
CI / build-sha-integrity (push) Successful in 9s
CI / validate-canonical-controls (push) Successful in 12s
CI / loc-budget (push) Successful in 24s
CI / go-lint (push) Has been skipped
CI / python-lint (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / nodejs-build (push) Successful in 3m11s
CI / test-go (push) Has been skipped
CI / iace-gt-coverage (push) Has been skipped
CI / test-python-backend (push) Successful in 24s
CI / test-python-document-crawler (push) Has been skipped
CI / test-python-dsms-gateway (push) Has been skipped
CI / detect-changes (push) Successful in 7s
CI / branch-name (push) Has been skipped
CI / guardrail-integrity (push) Has been skipped
CI / secret-scan (push) Has been skipped
CI / dep-audit (push) Has been skipped
CI / sbom-scan (push) Has been skipped
CI / build-sha-integrity (push) Successful in 9s
CI / validate-canonical-controls (push) Successful in 12s
CI / loc-budget (push) Successful in 24s
CI / go-lint (push) Has been skipped
CI / python-lint (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / nodejs-build (push) Successful in 3m11s
CI / test-go (push) Has been skipped
CI / iace-gt-coverage (push) Has been skipped
CI / test-python-backend (push) Successful in 24s
CI / test-python-document-crawler (push) Has been skipped
CI / test-python-dsms-gateway (push) Has been skipped
AGB v2 (decision_method routing, 71%FP->~0) + DSE v3 (4-layer, recovered from container) + Architektur-Tab into /sdk/agent live path. Incl CI robustness (detect-changes.sh + PR-head checkout) + security (hardcoded Qdrant key removed, gitleaks allowlist). Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit was merged in pull request #29.
This commit is contained in:
@@ -1,12 +1,27 @@
|
||||
"""AGBAgent — kuratierte §§-305-ff-BGB-Checkliste (ChecklistAgent-Subclass)."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
"""AGBAgent (v2, routed). Embedding/LLM offline-gestubbt → kein Netzwerk."""
|
||||
import asyncio
|
||||
|
||||
import pytest
|
||||
|
||||
import compliance.services.specialist_agents.agb._pipeline as pipeline
|
||||
from compliance.services.checkers.base import CheckResult
|
||||
from compliance.services.specialist_agents import REGISTRY, AgentInput
|
||||
|
||||
|
||||
class _Stub:
|
||||
def __init__(self, present):
|
||||
self._p = present
|
||||
|
||||
async def check(self, ctrl, doc):
|
||||
return CheckResult(present=self._p)
|
||||
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
def _offline(monkeypatch):
|
||||
monkeypatch.setattr(pipeline, "_EMB", _Stub(None))
|
||||
monkeypatch.setattr(pipeline, "_LLM", _Stub(None))
|
||||
|
||||
|
||||
def _run(text: str):
|
||||
return asyncio.run(
|
||||
REGISTRY.get("agb").evaluate(AgentInput(doc_type="agb", text=text)))
|
||||
|
||||
@@ -0,0 +1,62 @@
|
||||
"""AGB routed-Pipeline: Gate, Reference-/Embedding-Rescue, LLM-skip, Re-Tiering.
|
||||
Embedding + LLM offline-gestubbt → deterministisch, kein Netzwerk (Reference = echtes Regex)."""
|
||||
import asyncio
|
||||
from types import SimpleNamespace
|
||||
|
||||
import pytest
|
||||
|
||||
import compliance.services.specialist_agents.agb._pipeline as pipeline
|
||||
from compliance.services.checkers.base import CheckResult
|
||||
from compliance.services.specialist_agents._base import AgentInput
|
||||
from compliance.services.specialist_agents.agb.agent import AGBAgent
|
||||
|
||||
|
||||
class _Stub:
|
||||
def __init__(self, present):
|
||||
self._p = present
|
||||
|
||||
async def check(self, ctrl, doc):
|
||||
return CheckResult(present=self._p)
|
||||
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
def _offline(monkeypatch):
|
||||
monkeypatch.setattr(pipeline, "_EMB", _Stub(None))
|
||||
monkeypatch.setattr(pipeline, "_LLM", _Stub(None))
|
||||
|
||||
|
||||
def _routed(field_ids, text, context=None):
|
||||
findings = [SimpleNamespace(field_id=fid) for fid in field_ids]
|
||||
return asyncio.run(pipeline.run_routed(findings, text, context or {}))
|
||||
|
||||
|
||||
def test_gate_termination_na_for_oneoff_shop():
|
||||
text = "Widerrufsbelehrung: Sie koennen binnen 14 Tagen widerrufen. " * 5
|
||||
kept, resolved, gated = _routed(["termination", "termination_form"], text)
|
||||
assert set(gated) == {"termination", "termination_form"}
|
||||
assert kept == []
|
||||
|
||||
|
||||
def test_reference_rescues_data_protection():
|
||||
text = "Einzelheiten zur Verarbeitung in unserer Datenschutzerklaerung. " * 5
|
||||
kept, resolved, gated = _routed(["data_protection"], text)
|
||||
assert "data_protection" in resolved and kept == []
|
||||
|
||||
|
||||
def test_embedding_rescue_resolves(monkeypatch):
|
||||
monkeypatch.setattr(pipeline, "_EMB", _Stub(True))
|
||||
kept, resolved, gated = _routed(["scope"], "x" * 200)
|
||||
assert "scope" in resolved
|
||||
|
||||
|
||||
def test_llm_skipped_keeps_finding():
|
||||
kept, resolved, gated = _routed(["delivery_timeframe"], "x" * 200, {"skip_llm": True})
|
||||
assert [f.field_id for f in kept] == ["delivery_timeframe"] and resolved == []
|
||||
|
||||
|
||||
def test_evaluate_retiers_low_out_of_findings():
|
||||
text = ("Allgemeine Geschaeftsbedingungen. Vertragsschluss durch Bestellung. "
|
||||
"Haftung beschraenkt. Gerichtsstand Muenchen. ") * 6
|
||||
out = asyncio.run(AGBAgent().evaluate(AgentInput(doc_type="agb", text=text)))
|
||||
assert out.agent == "agb" and out.agent_version == "2.0"
|
||||
assert all(f.severity in ("HIGH", "MEDIUM") for f in out.findings)
|
||||
@@ -0,0 +1,14 @@
|
||||
"""AGB muss im LIVE-Pfad verdrahtet sein (_TOPIC_AGENTS), nicht nur per Snapshot."""
|
||||
from compliance.api.agent_check._agent_outputs import _TOPIC_AGENTS
|
||||
|
||||
|
||||
def test_agb_wired_into_live_topic_agents():
|
||||
assert _TOPIC_AGENTS.get("agb") == "agb"
|
||||
|
||||
|
||||
def test_dse_wired_into_live_topic_agents():
|
||||
assert _TOPIC_AGENTS.get("dse") == "dse"
|
||||
|
||||
|
||||
def test_impressum_still_wired():
|
||||
assert _TOPIC_AGENTS.get("impressum") == "impressum"
|
||||
@@ -0,0 +1,83 @@
|
||||
"""Unit-Tests der Prüfer-Library. Embedding + LLM gemockt → kein Netzwerk."""
|
||||
import asyncio
|
||||
|
||||
import compliance.services.llm_cascade as cascade_mod
|
||||
import compliance.services.mc_embedding_matcher as emb_mod
|
||||
from compliance.services.checkers.base import (
|
||||
ControlSpec,
|
||||
DecisionMethod,
|
||||
DocContext,
|
||||
VerificationMethod,
|
||||
)
|
||||
from compliance.services.checkers.embedding_checker import EmbeddingChecker
|
||||
from compliance.services.checkers.llm_checker import LLMChecker
|
||||
from compliance.services.checkers.reference_checker import ReferenceChecker
|
||||
|
||||
|
||||
def _run(coro):
|
||||
return asyncio.run(coro)
|
||||
|
||||
|
||||
def test_reference_present_and_absent():
|
||||
rc = ReferenceChecker()
|
||||
spec = ControlSpec("data_protection", VerificationMethod.REFERENCE,
|
||||
DecisionMethod.LINK_RESOLVER,
|
||||
patterns=[r"datenschutz(erkl|bestimmung|hinweis)"])
|
||||
r = _run(rc.check(spec, DocContext(
|
||||
text="Details in unserer Datenschutzerklaerung: https://x.de/datenschutz")))
|
||||
assert r.present is True
|
||||
assert r.detail.get("link", "").startswith("https://")
|
||||
r2 = _run(rc.check(spec, DocContext(text="Keine Angabe zum Datenschutz-Thema.")))
|
||||
assert r2.present is False
|
||||
|
||||
|
||||
def test_embedding_threshold(monkeypatch):
|
||||
monkeypatch.setattr(emb_mod, "DIM", 3, raising=False)
|
||||
monkeypatch.setattr(emb_mod, "_chunk_text", lambda t: [t], raising=False)
|
||||
|
||||
async def _embed(texts):
|
||||
return [[1.0, 0.0, 0.0] for _ in texts]
|
||||
|
||||
monkeypatch.setattr(emb_mod, "_embed_texts", _embed, raising=False)
|
||||
ec = EmbeddingChecker()
|
||||
spec = ControlSpec("scope_t", VerificationMethod.CONTENT, DecisionMethod.EMBEDDING,
|
||||
paraphrases=["Geltungsbereich"], embed_threshold=0.58)
|
||||
monkeypatch.setattr(emb_mod, "_cosine", lambda a, b: 0.90, raising=False)
|
||||
r = _run(ec.check(spec, DocContext(text="x" * 200)))
|
||||
assert r.present is True and r.confidence >= 0.58
|
||||
monkeypatch.setattr(emb_mod, "_cosine", lambda a, b: 0.20, raising=False)
|
||||
r2 = _run(ec.check(spec, DocContext(text="x" * 200)))
|
||||
assert r2.present is False
|
||||
|
||||
|
||||
def test_embedding_offline_returns_none(monkeypatch):
|
||||
async def _boom(texts):
|
||||
raise ConnectionError("embedding-service down")
|
||||
|
||||
monkeypatch.setattr(emb_mod, "_embed_texts", _boom, raising=False)
|
||||
ec = EmbeddingChecker()
|
||||
spec = ControlSpec("scope_off", VerificationMethod.CONTENT, DecisionMethod.EMBEDDING,
|
||||
paraphrases=["x"], embed_threshold=0.6)
|
||||
r = _run(ec.check(spec, DocContext(text="y" * 200)))
|
||||
assert r.present is None # fail-safe
|
||||
|
||||
|
||||
def test_llm_present_and_absent(monkeypatch):
|
||||
lc = LLMChecker()
|
||||
spec = ControlSpec("delivery_timeframe", VerificationMethod.CONTENT, DecisionMethod.LLM,
|
||||
topic_regex=r"liefer", question="Konkrete Lieferfrist?")
|
||||
doc = DocContext(text=("1. Lieferung\nDie Ware wird innerhalb von 2 Werktagen "
|
||||
"geliefert.\n") * 4)
|
||||
|
||||
async def _erfuellt(system, user, **kw):
|
||||
return {"text": '{"verdict":"ERFUELLT","zitat":"2 Werktagen","begruendung":"x"}',
|
||||
"source": "qwen", "confidence": 0.7}
|
||||
|
||||
monkeypatch.setattr(cascade_mod, "call_with_cascade", _erfuellt, raising=False)
|
||||
assert _run(lc.check(spec, doc)).present is True
|
||||
|
||||
async def _fehlt(system, user, **kw):
|
||||
return {"text": '{"verdict":"FEHLT"}', "source": "qwen"}
|
||||
|
||||
monkeypatch.setattr(cascade_mod, "call_with_cascade", _fehlt, raising=False)
|
||||
assert _run(lc.check(spec, doc)).present is False
|
||||
@@ -1,65 +1,153 @@
|
||||
"""DSEAgent — kuratierte Art-13/14-Checkliste (kein Library-Firehose)."""
|
||||
"""DSE-Agent v3 — DB-Controls (doc_check_controls) via run_v3_pipeline +
|
||||
kuratierter Art-13-Regex-Boost (Layer 0). Volle Parität zu impressum/cookie.
|
||||
|
||||
Die Tests prüfen die deterministischen Bausteine (regex_boost/mcs) ohne DB und
|
||||
den Agent-Pfad mit gemocktem run_v3_pipeline (CI hat keine DB).
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
|
||||
import compliance.services.specialist_agents.dse.agent as dse_agent
|
||||
from compliance.services.specialist_agents import REGISTRY, AgentInput
|
||||
from compliance.services.specialist_agents.dse.mcs import MCS, MC_IDS
|
||||
from compliance.services.specialist_agents.dse.regex_boost import (
|
||||
boost_matches_db_mc,
|
||||
compute_regex_boosts,
|
||||
criteria_on_topic,
|
||||
)
|
||||
|
||||
_DSE_SAMPLE = (
|
||||
"Datenschutzerklaerung. Verantwortlich im Sinne der DSGVO ist die Muster "
|
||||
"GmbH, Musterstrasse 1, 12345 Berlin. E-Mail: info@muster.de. "
|
||||
"Datenschutzbeauftragter: dsb@muster.de. Zwecke der Verarbeitung und "
|
||||
"Rechtsgrundlage Art. 6 Abs. 1 lit. f berechtigtes Interesse. Empfaenger "
|
||||
"Ihrer Daten sind Auftragsverarbeiter. Speicherdauer der Daten richtet "
|
||||
"sich nach Aufbewahrungsfristen. Sie haben das Recht auf Auskunft, das "
|
||||
"Recht auf Berichtigung, das Recht auf Loeschung sowie ein "
|
||||
"Widerspruchsrecht. Beschwerde bei der Aufsichtsbehoerde moeglich. Stand: "
|
||||
"Januar 2026. ") * 3
|
||||
|
||||
|
||||
def _run(text: str):
|
||||
return asyncio.run(
|
||||
REGISTRY.get("dse").evaluate(AgentInput(doc_type="dse", text=text)))
|
||||
|
||||
|
||||
# ── Registrierung ────────────────────────────────────────────────────────
|
||||
def test_dse_agent_registered():
|
||||
assert REGISTRY.get("dse") is not None
|
||||
agent = REGISTRY.get("dse")
|
||||
assert agent is not None
|
||||
assert agent.agent_version == "3.0"
|
||||
assert agent.doc_type == "dse"
|
||||
|
||||
|
||||
def test_dse_detects_core_obligations():
|
||||
text = (
|
||||
"Datenschutzerklaerung. Verantwortlich im Sinne der DSGVO ist die "
|
||||
"Muster GmbH, Musterstrasse 1, 12345 Berlin. E-Mail: info@muster.de. "
|
||||
"Datenschutzbeauftragter: dsb@muster.de. Zwecke der Verarbeitung und "
|
||||
"Rechtsgrundlage Art. 6 Abs. 1. Empfaenger Ihrer Daten. Speicherdauer "
|
||||
"der Daten. Ihre Rechte: Auskunft, Loeschung, Widerspruch, Beschwerde "
|
||||
"bei der Aufsichtsbehoerde. ") * 3
|
||||
out = _run(text)
|
||||
assert out.agent == "dse"
|
||||
# 10 L1-Pflichtangaben immer + L2-Details deren Parent vorhanden ist
|
||||
# (fehlende Parents → L2 übersprungen, kein 'na'-Rauschen).
|
||||
assert 10 <= out.mc_total <= 33
|
||||
ok = [c.label for c in out.mc_coverage if c.status == "ok"]
|
||||
assert any("Verantwortlich" in lbl for lbl in ok)
|
||||
assert any("Rechtsgrundlage" in lbl for lbl in ok)
|
||||
def test_owned_mc_ids_match_checklist():
|
||||
# owned_mc_ids = die Boost-Pattern-IDs (aus ART13_CHECKLIST gehoben).
|
||||
assert MC_IDS == tuple(m.mc_id for m in MCS)
|
||||
assert len(MC_IDS) >= 10 # mind. die 10 L1-Pflichtfelder + L2
|
||||
|
||||
|
||||
def test_dse_missing_obligations_are_findings():
|
||||
out = _run("Lorem ipsum dolor sit amet consectetur adipiscing elit. " * 6)
|
||||
assert out.findings
|
||||
assert any(f.severity == "HIGH" for f in out.findings)
|
||||
# ── Layer-0 Regex-Boost (deterministisch, ohne DB) ───────────────────────
|
||||
def test_regex_boost_detects_core_fields():
|
||||
boosts = compute_regex_boosts(_DSE_SAMPLE)
|
||||
# Die zentralen Art-13-Felder müssen erkannt werden.
|
||||
for field in ("controller", "legal_basis", "rights", "complaint",
|
||||
"retention", "dse_version_date"):
|
||||
assert field in boosts, f"{field} nicht erkannt: {sorted(boosts)}"
|
||||
|
||||
|
||||
def test_regex_boost_empty_on_short_text():
|
||||
assert compute_regex_boosts("zu kurz") == set()
|
||||
|
||||
|
||||
def test_criteria_on_topic_accepts_dse_rejects_foreign():
|
||||
dse_crit = ["Rechtsgrundlage gemäß Art. 6 DSGVO benannt",
|
||||
"Speicherdauer und Löschfrist angegeben"]
|
||||
assert criteria_on_topic(dse_crit) is True
|
||||
foreign = ["Bestellbestätigung wird per E-Mail versendet",
|
||||
"Versandkosten werden im Warenkorb angezeigt"]
|
||||
assert criteria_on_topic(foreign) is False
|
||||
# leere Kriterien → konservativ on-topic behalten
|
||||
assert criteria_on_topic([]) is True
|
||||
|
||||
|
||||
def test_boost_matches_db_mc_third_country():
|
||||
boosts = {"third_country", "controller"}
|
||||
crit = ["Standardvertragsklauseln für Drittland benannt",
|
||||
"Geeignete Garantien bei Übermittlung in ein Drittland"]
|
||||
assert boost_matches_db_mc(boosts, crit) == "third_country"
|
||||
# ohne passende Boosts → None
|
||||
assert boost_matches_db_mc(set(), crit) is None
|
||||
|
||||
|
||||
# ── Agent-Pfad mit gemocktem run_v3_pipeline ─────────────────────────────
|
||||
def _mock_v3(results, telemetry=None):
|
||||
async def _fake(text, scope, db_url="", skip_embedding=False):
|
||||
return results, (telemetry or {
|
||||
"total_mcs": len(results), "layer_0_field_hits": 0,
|
||||
"layer_0_field_ids": [], "layer_0_boost_overrides": 0,
|
||||
"sector_dropped": 0, "offtopic_dropped": 0})
|
||||
return _fake
|
||||
|
||||
|
||||
def _run(text, context=None):
|
||||
return asyncio.run(REGISTRY.get("dse").evaluate(
|
||||
AgentInput(doc_type="dse", text=text, context=context or {})))
|
||||
|
||||
|
||||
def test_dse_short_text_skips():
|
||||
out = _run("zu kurz")
|
||||
assert out.confidence == 0.0
|
||||
assert all(c.status == "skipped" for c in out.mc_coverage)
|
||||
assert out.mc_coverage and all(
|
||||
c.status == "skipped" for c in out.mc_coverage)
|
||||
|
||||
|
||||
def test_third_country_high_when_applicable_no_na_detail_short_action():
|
||||
# Text ohne Drittland-Abschnitt + Scan-Kontext drittland=ja:
|
||||
# - third_country (L1) fehlt → HIGH (nicht weiches MEDIUM)
|
||||
# - Transfermechanismus (L2) → KEIN 'na' (übersprungen, Parent deckt ab)
|
||||
# - Titel/Maßnahme kurz (kein 280-Zeichen-Hint als Recommendation-Titel)
|
||||
text = ("Datenschutz. Verantwortlich ist die Muster GmbH, info@muster.de. "
|
||||
"Zwecke und Rechtsgrundlage Art. 6. Speicherdauer. Ihre Rechte. ") * 4
|
||||
out = asyncio.run(REGISTRY.get("dse").evaluate(AgentInput(
|
||||
doc_type="dse", text=text,
|
||||
context={"scan_context": {"third_country_transfer": "yes"}})))
|
||||
tc = [f for f in out.findings if "Drittland" in f.title]
|
||||
assert tc and tc[0].severity == "HIGH"
|
||||
assert not any(c.status == "na" and "Transfermechanismus" in c.label
|
||||
for c in out.mc_coverage)
|
||||
assert all(len(f.action) < 110 for f in out.findings)
|
||||
# Detail-Begründung bleibt als evidence erhalten
|
||||
assert any(f.evidence for f in out.findings)
|
||||
def test_dse_findings_from_failed_db_mc(monkeypatch):
|
||||
results = [{
|
||||
"control_id": "DATA-525-A17", "passed": False, "severity": "HIGH",
|
||||
"label": "Berechtigte Interessen ausweisen", "regulation": None,
|
||||
"article": None, "_pass_criteria": ["berechtigtes interesse benannt"],
|
||||
"matched_text": "", "source": "keyword_match",
|
||||
}, {
|
||||
"control_id": "AUTH-2051-A11", "passed": True, "severity": "LOW",
|
||||
"label": "Prägnante Form", "regulation": None, "article": None,
|
||||
"_pass_criteria": [], "matched_text": "ok",
|
||||
}]
|
||||
monkeypatch.setattr(dse_agent, "run_v3_pipeline", _mock_v3(results))
|
||||
out = _run(_DSE_SAMPLE, context={"skip_llm": True})
|
||||
fids = {f.field_id for f in out.findings}
|
||||
assert "DATA-525-A17" in fids # failed → Finding
|
||||
assert "AUTH-2051-A11" not in fids # passed → kein Finding
|
||||
f = next(f for f in out.findings if f.field_id == "DATA-525-A17")
|
||||
assert f.severity == "HIGH"
|
||||
assert f.norm == "DSGVO Art. 13/14" # NULL-regulation → Fallback-Norm
|
||||
assert len(f.action) < 410
|
||||
|
||||
|
||||
def test_dse_third_country_override_to_high(monkeypatch):
|
||||
# MEDIUM-Drittland-MC → HIGH bei dokumentiertem Transfer (scan_context).
|
||||
results = [{
|
||||
"control_id": "DATA-900-A01", "passed": False, "severity": "MEDIUM",
|
||||
"label": "Drittlandtransfer Schutzgarantien benennen",
|
||||
"regulation": None, "article": None,
|
||||
"_pass_criteria": ["standardvertragsklauseln", "drittland garantien"],
|
||||
"matched_text": "", "source": "keyword_match",
|
||||
}]
|
||||
monkeypatch.setattr(dse_agent, "run_v3_pipeline", _mock_v3(results))
|
||||
out = _run(_DSE_SAMPLE, context={
|
||||
"skip_llm": True,
|
||||
"scan_context": {"third_country_transfer": "yes"}})
|
||||
f = next(f for f in out.findings if f.field_id == "DATA-900-A01")
|
||||
assert f.severity == "HIGH"
|
||||
assert f.severity_reason == "db_mc_failed_third_country_transfer"
|
||||
|
||||
|
||||
def test_dse_no_transfer_keeps_medium(monkeypatch):
|
||||
results = [{
|
||||
"control_id": "DATA-900-A01", "passed": False, "severity": "MEDIUM",
|
||||
"label": "Drittlandtransfer Schutzgarantien benennen",
|
||||
"regulation": None, "article": None,
|
||||
"_pass_criteria": ["standardvertragsklauseln", "drittland garantien"],
|
||||
"matched_text": "", "source": "keyword_match",
|
||||
}]
|
||||
monkeypatch.setattr(dse_agent, "run_v3_pipeline", _mock_v3(results))
|
||||
out = _run(_DSE_SAMPLE, context={"skip_llm": True})
|
||||
f = next(f for f in out.findings if f.field_id == "DATA-900-A01")
|
||||
assert f.severity == "MEDIUM"
|
||||
|
||||
@@ -0,0 +1,59 @@
|
||||
"""Tests fuer das DSE-Applicability-Gate (_classification_gate).
|
||||
|
||||
Deckt die reine Split-Logik (apply_gate) und das defensive Verhalten von
|
||||
load_dse_gate ohne DB ab. Die DB-Abfrage selbst ist I/O und wird hier nicht
|
||||
gegen eine echte DB getestet (defensiver Pfad: kein DSN -> leeres Dict)."""
|
||||
|
||||
import asyncio
|
||||
import os
|
||||
|
||||
from compliance.services.specialist_agents.dse._classification_gate import (
|
||||
apply_gate,
|
||||
load_dse_gate,
|
||||
)
|
||||
|
||||
|
||||
def test_apply_gate_splits_findings_and_organizational():
|
||||
controls = [
|
||||
{"control_id": "AUTH-2051-A02", "title": "Speicherdauer nennen"},
|
||||
{"control_id": "AUTH-2049-A01", "title": "VVT fuehren"},
|
||||
]
|
||||
gate = {
|
||||
"AUTH-2049-A01": {
|
||||
"obligation_type": "EVIDENCE",
|
||||
"check_intent": "DIRECT_EVIDENCE",
|
||||
"applicable_artifacts": ["VVT", "AUDIT"],
|
||||
"reference_allowed": "NO",
|
||||
}
|
||||
}
|
||||
kept, organizational = apply_gate(controls, gate)
|
||||
assert [c["control_id"] for c in kept] == ["AUTH-2051-A02"]
|
||||
assert len(organizational) == 1
|
||||
org = organizational[0]
|
||||
assert org["control_id"] == "AUTH-2049-A01"
|
||||
assert org["title"] == "VVT fuehren"
|
||||
assert org["applicable_artifacts"] == ["VVT", "AUDIT"]
|
||||
assert org["check_intent"] == "DIRECT_EVIDENCE"
|
||||
|
||||
|
||||
def test_apply_gate_empty_gate_keeps_all():
|
||||
controls = [{"control_id": "X-1"}, {"control_id": "X-2"}]
|
||||
kept, organizational = apply_gate(controls, {})
|
||||
assert len(kept) == 2
|
||||
assert organizational == []
|
||||
|
||||
|
||||
def test_load_dse_gate_without_dsn_is_defensive():
|
||||
"""Kein DSN + keine Env -> leeres Dict (kein Filter), kein Fehler."""
|
||||
saved = (
|
||||
os.environ.pop("DATABASE_URL", None),
|
||||
os.environ.pop("COMPLIANCE_DATABASE_URL", None),
|
||||
)
|
||||
try:
|
||||
result = asyncio.run(load_dse_gate(""))
|
||||
assert result == {}
|
||||
finally:
|
||||
if saved[0] is not None:
|
||||
os.environ["DATABASE_URL"] = saved[0]
|
||||
if saved[1] is not None:
|
||||
os.environ["COMPLIANCE_DATABASE_URL"] = saved[1]
|
||||
@@ -0,0 +1,67 @@
|
||||
"""DSE Embedding-Recall — deterministische semantische Schicht (gecacht).
|
||||
|
||||
Testet die reine Logik OHNE Embedding-Service: Cache-Treffer-Pfad,
|
||||
Schwellen-Filter, Kandidaten-Schnitt, Reachability-Guard. Das Einbetten selbst
|
||||
(Embedding-Service) ist Integration und wird auf macmini/Prod validiert.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import json
|
||||
|
||||
import compliance.services.specialist_agents.dse._embedding_recall as er
|
||||
|
||||
|
||||
_TEXT = ("Datenschutzerklaerung der Muster GmbH. " * 20) # > 100 Zeichen
|
||||
|
||||
|
||||
def _seed_cache(tmp_path, scores: dict[str, float]) -> str:
|
||||
p = tmp_path / "dse_embed_cache.json"
|
||||
p.write_text(json.dumps({er._doc_hash(_TEXT): scores}))
|
||||
return str(p)
|
||||
|
||||
|
||||
def test_doc_hash_deterministic():
|
||||
# feste Funktion: gleicher Text → gleicher Hash (Reproduzierbarkeit)
|
||||
assert er._doc_hash(_TEXT) == er._doc_hash(_TEXT)
|
||||
assert er._doc_hash("a") != er._doc_hash("b")
|
||||
|
||||
|
||||
def test_cache_hit_threshold_filter(tmp_path, monkeypatch):
|
||||
# Cache-Treffer: kein Embedding-Service nötig. Nur Scores >= Schwelle UND
|
||||
# in den Kandidaten werden zurückgegeben.
|
||||
scores = {"DATA-1": 0.71, "DATA-2": 0.60, "AUTH-3": 0.68, "SEC-4": 0.50}
|
||||
monkeypatch.setenv("DSE_EMBED_CACHE", _seed_cache(tmp_path, scores))
|
||||
monkeypatch.setattr(er, "_CACHE_PATH", str(tmp_path / "dse_embed_cache.json"))
|
||||
|
||||
cands = ["DATA-1", "DATA-2", "AUTH-3", "SEC-4"]
|
||||
out = asyncio.run(er.embedding_recall(_TEXT, cands, threshold=0.65))
|
||||
# >=0.65: DATA-1 (0.71), AUTH-3 (0.68). NICHT DATA-2 (0.60), SEC-4 (0.50).
|
||||
assert out == {"DATA-1", "AUTH-3"}
|
||||
|
||||
|
||||
def test_cache_hit_candidate_intersection(tmp_path, monkeypatch):
|
||||
# Nur Kandidaten (durchgefallene Controls) zählen — andere ignoriert.
|
||||
scores = {"DATA-1": 0.90, "DATA-2": 0.90}
|
||||
monkeypatch.setattr(er, "_CACHE_PATH", str(tmp_path / "c.json"))
|
||||
(tmp_path / "c.json").write_text(json.dumps({er._doc_hash(_TEXT): scores}))
|
||||
out = asyncio.run(er.embedding_recall(_TEXT, ["DATA-1"], threshold=0.65))
|
||||
assert out == {"DATA-1"} # DATA-2 nicht in Kandidaten
|
||||
|
||||
|
||||
def test_empty_inputs():
|
||||
assert asyncio.run(er.embedding_recall("zu kurz", ["X"])) == set()
|
||||
assert asyncio.run(er.embedding_recall(_TEXT, [])) == set()
|
||||
|
||||
|
||||
def test_service_down_returns_empty(tmp_path, monkeypatch):
|
||||
# Kein Cache + Service nicht erreichbar → leer (deterministischer Layer trägt),
|
||||
# KEIN Hang.
|
||||
monkeypatch.setattr(er, "_CACHE_PATH", str(tmp_path / "none.json"))
|
||||
|
||||
async def _unreachable(timeout=2.0):
|
||||
return False
|
||||
monkeypatch.setattr(er, "_embedding_reachable", _unreachable)
|
||||
out = asyncio.run(er.embedding_recall(_TEXT, ["DATA-1"]))
|
||||
assert out == set()
|
||||
Reference in New Issue
Block a user