feat(agent): Impressum Rechtsform-Gates + USt-optional (Phase 3)
Die 8 Audit-Klassifizierungs-Felder (scan_context) treiben jetzt den business_scope der Agenten (vorher gespeichert, aber nicht genutzt). Rechtsform-Gates als opt-out (excludes_scope): Verein -> kein Handelsregister-Finding, e.K. -> kein Vertretungsberechtigte-Finding; unbekannte Rechtsform bleibt anwendbar. USt-IdNr optional -> fehlt = kein Finding. Rechts-Zuordnung vom Domain-Experten bestaetigt. - _classification.py: scan_context_to_scope (8 Felder -> scope-Tokens) - mcs.py: MC.excludes_scope + MC.optional; IMP-MC-004/006 Gate-Tokens; IMP-MC-005 optional; scope_matches respektiert excludes_scope - agent.py: optional -> kein Finding bei Abwesenheit - _agent_outputs.py: scope = scan_context vereinigt LLM-Profil-Fallback - Tests gruen: v3 25, Groundtruth 13, CI-Pfad 14 (+ SSE-Loop-Fix) Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
@@ -0,0 +1,122 @@
|
||||
"""Phase 3: scan_context → business_scope Normalisierung + Rechtsform-Gates.
|
||||
|
||||
Rechts-Zuordnung vom Domain-Experten bestätigt (2026-06-10):
|
||||
e.K. ist registerpflichtig (Handelsregister-Finding) aber ohne gesonderte
|
||||
Vertretungsberechtigte; Verein umgekehrt; USt-IdNr fehlt → kein Finding;
|
||||
healthcare triggert NICHT regulated_profession.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
|
||||
import pytest
|
||||
|
||||
from compliance.services.specialist_agents import AgentInput
|
||||
from compliance.services.specialist_agents.impressum._classification import (
|
||||
scan_context_to_scope,
|
||||
)
|
||||
from compliance.services.specialist_agents.impressum.agent import ImpressumAgent
|
||||
|
||||
|
||||
def _scope(**kw) -> set[str]:
|
||||
return set(scan_context_to_scope(kw))
|
||||
|
||||
|
||||
def test_gmbh_no_exclusions():
|
||||
# GmbH → keine Ausschluss-Tokens → beide MCs anwendbar.
|
||||
s = _scope(legal_form="gmbh")
|
||||
assert "kein_handelsregister" not in s
|
||||
assert "keine_vertretung" not in s
|
||||
|
||||
|
||||
def test_einzelkaufmann_register_but_no_vertretung():
|
||||
# e.K. registerpflichtig (kein Ausschluss) aber Inhaber genügt.
|
||||
s = _scope(legal_form="ek")
|
||||
assert "kein_handelsregister" not in s
|
||||
assert "keine_vertretung" in s
|
||||
|
||||
|
||||
def test_verein_vertretung_but_no_register():
|
||||
# e.V. = Vereinsregister, NICHT Handelsregister → HR ausgeschlossen.
|
||||
s = _scope(legal_form="verein")
|
||||
assert "kein_handelsregister" in s
|
||||
assert "keine_vertretung" not in s
|
||||
|
||||
|
||||
def test_branche_tokens():
|
||||
assert "ecommerce" in _scope(industry="ecommerce")
|
||||
assert "ecommerce" in _scope(direct_sales="yes")
|
||||
assert "b2c" in _scope(business_model="b2c")
|
||||
assert "b2c" in _scope(business_model="both")
|
||||
assert "automotive" in _scope(industry="automotive")
|
||||
assert "editorial" in _scope(industry="media")
|
||||
assert "insurance" in _scope(industry="insurance")
|
||||
assert "financial_services" in _scope(industry="banking")
|
||||
assert "public_authority" in _scope(legal_form="behoerde")
|
||||
assert "public_authority" in _scope(industry="public")
|
||||
|
||||
|
||||
def test_healthcare_does_not_imply_regulated_profession():
|
||||
# Krankenhaus-GmbH ≠ Apotheke → industry allein triggert es nicht.
|
||||
assert "regulated_profession" not in _scope(
|
||||
industry="healthcare", legal_form="gmbh")
|
||||
|
||||
|
||||
def test_unknown_legal_form_no_exclusions():
|
||||
# Unbekannte Rechtsform → keine Ausschluss-Tokens → MCs bleiben anwendbar.
|
||||
# (Das 4-Status-Modell INSUFFICIENT_EVIDENCE folgt in der nächsten Phase.)
|
||||
s = _scope(industry="ecommerce") # kein legal_form
|
||||
assert "kein_handelsregister" not in s
|
||||
assert "keine_vertretung" not in s
|
||||
|
||||
|
||||
# ── Agent-Verhalten mit den Gates ──────────────────────────────────
|
||||
|
||||
IMPRESSUM_MINIMAL = (
|
||||
"Angaben gemäß § 5 TMG\n\n"
|
||||
"Beispiel Firma\n"
|
||||
"Musterstraße 1\n"
|
||||
"12345 Berlin\n\n"
|
||||
"E-Mail: info@example.com\n"
|
||||
"Telefon: +49 30 1234567\n"
|
||||
"Mehr Informationen auf unserer Website.\n"
|
||||
)
|
||||
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
def _llm_offline(monkeypatch):
|
||||
async def _no_validate(*_a, **_kw):
|
||||
return {}
|
||||
monkeypatch.setattr(
|
||||
"compliance.services.specialist_agents.impressum.agent.validate_present",
|
||||
_no_validate, raising=False,
|
||||
)
|
||||
|
||||
|
||||
def _finding_fields(legal_form: str) -> set[str]:
|
||||
agent = ImpressumAgent()
|
||||
out = asyncio.run(agent.evaluate(AgentInput(
|
||||
doc_type="impressum",
|
||||
text=IMPRESSUM_MINIMAL,
|
||||
business_scope=scan_context_to_scope({"legal_form": legal_form}),
|
||||
)))
|
||||
return {f.field_id for f in out.findings}
|
||||
|
||||
|
||||
def test_einzelkaufmann_handelsregister_finding_no_vertretung():
|
||||
fields = _finding_fields("ek")
|
||||
assert "handelsregister" in fields # registerpflichtig
|
||||
assert "vertretungsberechtigte" not in fields # Inhaber genügt
|
||||
|
||||
|
||||
def test_gmbh_both_findings():
|
||||
fields = _finding_fields("gmbh")
|
||||
assert "handelsregister" in fields
|
||||
assert "vertretungsberechtigte" in fields
|
||||
|
||||
|
||||
def test_ust_id_absent_yields_no_finding():
|
||||
# USt-IdNr fehlt im Text → optional → KEIN Finding (egal welche Rechtsform).
|
||||
assert "ust_id" not in _finding_fields("gmbh")
|
||||
assert "ust_id" not in _finding_fields("ek")
|
||||
@@ -1,7 +1,7 @@
|
||||
"""Phase 2: SSE-Plumbing für den Compliance-Check.
|
||||
|
||||
Deckt emit (Queue-Push), _format_sse (SSE-Zeilenformat) und den
|
||||
event_generator (hello → Events → stream_close bei 'complete') ab.
|
||||
Queue + Generator laufen innerhalb eines asyncio.run (sonst bindet
|
||||
asyncio.Queue in Py3.9 an einen ggf. geschlossenen Loop).
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
@@ -11,15 +11,8 @@ import asyncio
|
||||
from compliance.api.agent_check import _sse
|
||||
|
||||
|
||||
def test_emit_pushes_and_format():
|
||||
cid = "sse-test-1"
|
||||
_sse.new_queue(cid)
|
||||
_sse.emit(cid, {"type": "topic", "topic": "impressum", "output": {"x": 1}})
|
||||
q = _sse._check_queues[cid]
|
||||
assert q.qsize() == 1
|
||||
ev = q.get_nowait()
|
||||
assert ev["type"] == "topic" and ev["topic"] == "impressum"
|
||||
line = _sse._format_sse(ev)
|
||||
def test_format_sse_line():
|
||||
line = _sse._format_sse({"type": "topic", "topic": "impressum"})
|
||||
assert line.startswith("data: ") and line.endswith("\n\n")
|
||||
assert '"impressum"' in line
|
||||
|
||||
@@ -29,21 +22,20 @@ def test_emit_is_noop_without_queue():
|
||||
_sse.emit("does-not-exist-xyz", {"type": "topic"})
|
||||
|
||||
|
||||
def test_event_generator_streams_topic_then_closes_on_complete():
|
||||
cid = "sse-test-gen"
|
||||
_sse.new_queue(cid)
|
||||
_sse.emit(cid, {"type": "topic", "topic": "impressum", "output": {}})
|
||||
_sse.emit(cid, {"type": "complete", "status": "completed"})
|
||||
|
||||
async def collect():
|
||||
out = []
|
||||
def test_emit_and_event_generator_streams_then_closes():
|
||||
async def scenario():
|
||||
cid = "sse-test-gen"
|
||||
_sse.new_queue(cid)
|
||||
_sse.emit(cid, {"type": "topic", "topic": "impressum", "output": {}})
|
||||
_sse.emit(cid, {"type": "complete", "status": "completed"})
|
||||
out: list[str] = []
|
||||
async for line in _sse.event_generator(cid):
|
||||
out.append(line)
|
||||
if len(out) > 12: # safety
|
||||
break
|
||||
return out
|
||||
|
||||
blob = "".join(asyncio.run(collect()))
|
||||
blob = "".join(asyncio.run(scenario()))
|
||||
assert '"type": "hello"' in blob
|
||||
assert '"topic": "impressum"' in blob
|
||||
assert '"type": "complete"' in blob
|
||||
|
||||
Reference in New Issue
Block a user