"""Unit-Tests der Prüfer-Library. Embedding + LLM gemockt → kein Netzwerk.""" import asyncio import compliance.services.llm_cascade as cascade_mod import compliance.services.mc_embedding_matcher as emb_mod from compliance.services.checkers.base import ( ControlSpec, DecisionMethod, DocContext, VerificationMethod, ) from compliance.services.checkers.embedding_checker import EmbeddingChecker from compliance.services.checkers.llm_checker import LLMChecker from compliance.services.checkers.reference_checker import ReferenceChecker def _run(coro): return asyncio.run(coro) def test_reference_present_and_absent(): rc = ReferenceChecker() spec = ControlSpec("data_protection", VerificationMethod.REFERENCE, DecisionMethod.LINK_RESOLVER, patterns=[r"datenschutz(erkl|bestimmung|hinweis)"]) r = _run(rc.check(spec, DocContext( text="Details in unserer Datenschutzerklaerung: https://x.de/datenschutz"))) assert r.present is True assert r.detail.get("link", "").startswith("https://") r2 = _run(rc.check(spec, DocContext(text="Keine Angabe zum Datenschutz-Thema."))) assert r2.present is False def test_embedding_threshold(monkeypatch): monkeypatch.setattr(emb_mod, "DIM", 3, raising=False) monkeypatch.setattr(emb_mod, "_chunk_text", lambda t: [t], raising=False) async def _embed(texts): return [[1.0, 0.0, 0.0] for _ in texts] monkeypatch.setattr(emb_mod, "_embed_texts", _embed, raising=False) ec = EmbeddingChecker() spec = ControlSpec("scope_t", VerificationMethod.CONTENT, DecisionMethod.EMBEDDING, paraphrases=["Geltungsbereich"], embed_threshold=0.58) monkeypatch.setattr(emb_mod, "_cosine", lambda a, b: 0.90, raising=False) r = _run(ec.check(spec, DocContext(text="x" * 200))) assert r.present is True and r.confidence >= 0.58 monkeypatch.setattr(emb_mod, "_cosine", lambda a, b: 0.20, raising=False) r2 = _run(ec.check(spec, DocContext(text="x" * 200))) assert r2.present is False def test_embedding_offline_returns_none(monkeypatch): async def _boom(texts): raise ConnectionError("embedding-service down") monkeypatch.setattr(emb_mod, "_embed_texts", _boom, raising=False) ec = EmbeddingChecker() spec = ControlSpec("scope_off", VerificationMethod.CONTENT, DecisionMethod.EMBEDDING, paraphrases=["x"], embed_threshold=0.6) r = _run(ec.check(spec, DocContext(text="y" * 200))) assert r.present is None # fail-safe def test_llm_present_and_absent(monkeypatch): lc = LLMChecker() spec = ControlSpec("delivery_timeframe", VerificationMethod.CONTENT, DecisionMethod.LLM, topic_regex=r"liefer", question="Konkrete Lieferfrist?") doc = DocContext(text=("1. Lieferung\nDie Ware wird innerhalb von 2 Werktagen " "geliefert.\n") * 4) async def _erfuellt(system, user, **kw): return {"text": '{"verdict":"ERFUELLT","zitat":"2 Werktagen","begruendung":"x"}', "source": "qwen", "confidence": 0.7} monkeypatch.setattr(cascade_mod, "call_with_cascade", _erfuellt, raising=False) assert _run(lc.check(spec, doc)).present is True async def _fehlt(system, user, **kw): return {"text": '{"verdict":"FEHLT"}', "source": "qwen"} monkeypatch.setattr(cascade_mod, "call_with_cascade", _fehlt, raising=False) assert _run(lc.check(spec, doc)).present is False