From 97575cc9c08fad0712e5a8f3906bd156b103db4c Mon Sep 17 00:00:00 2001 From: Benjamin Admin Date: Wed, 10 Jun 2026 22:38:11 +0200 Subject: [PATCH] =?UTF-8?q?feat(agent):=204-Status-Modell=20(NOT=5FAPPLICA?= =?UTF-8?q?BLE/INSUFFICIENT=5FEVIDENCE/POSSIBLY=5FAPPLICABLE)=20f=C3=BCr?= =?UTF-8?q?=20Impressum?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Kanonisches Compliance-Datenmodell, Impressum-Agent als Referenz: - CheckStatus-Enum + Finding.status GETRENNT von severity (Verdikt ≠ Risiko) - Unbestimmte Rechtsform (weder Text noch Wizard) → INSUFFICIENT_EVIDENCE (INFO) statt hartem HIGH-FAIL; legal_form_dependent-Gate + detect_legal_form_present - §18-MStV-Graubereich (Corporate-Blog via has_editorial_content) → POSSIBLY_APPLICABLE (LOW Prüf-Hinweis); 3-stufig via scope_disposition - Recommendations nur aus echten FAILs; mc_insufficient/mc_possibly-Aggregate - Frontend: Verdikt-Pill + Coverage-Vokabular - 19 neue Tests (test_four_status.py, AgentFindingCard); CI-Suite 204 grün, v3 25 / GT 13 unverändert Co-Authored-By: Claude Opus 4.7 --- .../agent/_components/AgentFindingCard.tsx | 23 ++- .../sdk/agent/_components/AgentMcCoverage.tsx | 4 + .../__tests__/AgentFindingCard.test.tsx | 33 +++ .../app/sdk/agent/_components/_agentTypes.ts | 30 ++- .../api/agent_check/_agent_outputs.py | 7 + .../services/specialist_agents/_base.py | 33 ++- .../impressum/_classification.py | 7 + .../specialist_agents/impressum/agent.py | 103 ++++++++- .../specialist_agents/impressum/mcs.py | 50 +++++ .../compliance/tests/test_four_status.py | 195 ++++++++++++++++++ 10 files changed, 473 insertions(+), 12 deletions(-) create mode 100644 admin-compliance/app/sdk/agent/_components/__tests__/AgentFindingCard.test.tsx create mode 100644 backend-compliance/compliance/tests/test_four_status.py diff --git a/admin-compliance/app/sdk/agent/_components/AgentFindingCard.tsx b/admin-compliance/app/sdk/agent/_components/AgentFindingCard.tsx index 766c1442..2d6b59c7 100644 --- a/admin-compliance/app/sdk/agent/_components/AgentFindingCard.tsx +++ b/admin-compliance/app/sdk/agent/_components/AgentFindingCard.tsx @@ -25,6 +25,8 @@ import { METHODIK_SHORT, SEVERITY_BG, SEVERITY_COLOR, + STATUS_LABEL, + STATUS_STYLE, } from './_agentTypes' export function AgentFindingCard({ f }: { f: Finding }) { @@ -32,6 +34,10 @@ export function AgentFindingCard({ f }: { f: Finding }) { const color = SEVERITY_COLOR[sev] const bg = SEVERITY_BG[sev] const sources = f.sources || [] + // Verdikt-Pill nur für Nicht-FAIL-Status (Applicability/Unknown) — + // macht klar: kein Verstoß, sondern Hinweis/unbestimmt. + const statusLabel = f.status ? STATUS_LABEL[f.status] : undefined + const statusStyle = f.status ? STATUS_STYLE[f.status] : undefined return (
{sev} + {statusLabel && statusStyle && ( + + {statusLabel} + + )} {f.check_id} {sources.map((s, i) => ( @@ -78,9 +92,12 @@ export function AgentFindingCard({ f }: { f: Finding }) { s.source_type === 'llm_cloud' ) ? 'Empfehlung (LLM-Vorschlag)' - : sev === 'HIGH' - ? 'Pflicht-Maßnahme' - : 'Best-Practice-Empfehlung' + : f.status === 'insufficient_evidence' || + f.status === 'possibly_applicable' + ? 'Prüf-Hinweis' + : sev === 'HIGH' + ? 'Pflicht-Maßnahme' + : 'Best-Practice-Empfehlung' } tone="green" > diff --git a/admin-compliance/app/sdk/agent/_components/AgentMcCoverage.tsx b/admin-compliance/app/sdk/agent/_components/AgentMcCoverage.tsx index 96885fa0..498d4a80 100644 --- a/admin-compliance/app/sdk/agent/_components/AgentMcCoverage.tsx +++ b/admin-compliance/app/sdk/agent/_components/AgentMcCoverage.tsx @@ -16,6 +16,8 @@ const STATUS_COLOR: Record = { high: '#dc2626', medium: '#f59e0b', low: '#3b82f6', + insufficient_evidence: '#64748b', + possibly_applicable: '#ca8a04', } const STATUS_LABEL: Record = { @@ -25,6 +27,8 @@ const STATUS_LABEL: Record = { high: 'HIGH', medium: 'MEDIUM', low: 'LOW', + insufficient_evidence: 'unklar', + possibly_applicable: 'evtl. relevant', } export function AgentMcCoverage({ coverage }: { coverage: McCoverage[] }) { diff --git a/admin-compliance/app/sdk/agent/_components/__tests__/AgentFindingCard.test.tsx b/admin-compliance/app/sdk/agent/_components/__tests__/AgentFindingCard.test.tsx new file mode 100644 index 00000000..c5280aa0 --- /dev/null +++ b/admin-compliance/app/sdk/agent/_components/__tests__/AgentFindingCard.test.tsx @@ -0,0 +1,33 @@ +import { describe, it, expect } from 'vitest' +import { render, screen } from '@testing-library/react' + +import { AgentFindingCard } from '../AgentFindingCard' +import type { Finding } from '../_agentTypes' + +const BASE: Finding = { + check_id: 'IMP-handelsregister', agent: 'impressum', agent_version: '3.0', + field_id: 'handelsregister', severity: 'HIGH', title: 'X', + norm: '§ 5 Abs. 1 Nr. 4 TMG', evidence: '', action: 'Tu etwas.', + confidence: 0.4, + sources: [{ source_type: 'regex', source_id: 'IMP-MC-004', confidence: 0.4 }], +} + +describe('AgentFindingCard — 4-Status', () => { + it('INSUFFICIENT_EVIDENCE zeigt Verdikt-Pill + Prüf-Hinweis statt FAIL', () => { + const f: Finding = { + ...BASE, status: 'insufficient_evidence', severity: 'INFO', + title: 'Handelsregister-Eintrag: Rechtsform nicht erkennbar', + } + render() + expect(screen.getByText('Unzureichende Evidenz')).toBeInTheDocument() + expect(screen.getByText('Prüf-Hinweis')).toBeInTheDocument() + expect(screen.queryByText('Pflicht-Maßnahme')).not.toBeInTheDocument() + }) + + it('FAIL/HIGH zeigt KEINE Verdikt-Pill, aber Pflicht-Maßnahme', () => { + const f: Finding = { ...BASE, status: 'fail', severity: 'HIGH' } + render() + expect(screen.queryByText('Unzureichende Evidenz')).not.toBeInTheDocument() + expect(screen.getByText('Pflicht-Maßnahme')).toBeInTheDocument() + }) +}) diff --git a/admin-compliance/app/sdk/agent/_components/_agentTypes.ts b/admin-compliance/app/sdk/agent/_components/_agentTypes.ts index 7c3805b4..5248e185 100644 --- a/admin-compliance/app/sdk/agent/_components/_agentTypes.ts +++ b/admin-compliance/app/sdk/agent/_components/_agentTypes.ts @@ -10,6 +10,15 @@ export type Severity = 'HIGH' | 'MEDIUM' | 'LOW' | 'INFO' +// Verdikt eines Checks — getrennt vom Risiko (severity). +// Applicability ≠ Compliance · Unknown ≠ Fail. +export type CheckStatus = + | 'pass' + | 'fail' + | 'not_applicable' + | 'insufficient_evidence' + | 'possibly_applicable' + export type SourceType = | 'mc' | 'regex' @@ -31,6 +40,7 @@ export interface Finding { agent: string agent_version: string field_id?: string + status?: CheckStatus severity: Severity severity_reason?: string title: string @@ -52,7 +62,8 @@ export interface Recommendation { export interface McCoverage { mc_id: string - status: 'ok' | 'na' | 'high' | 'medium' | 'low' | 'skipped' + status: 'ok' | 'na' | 'high' | 'medium' | 'low' | 'skipped' | + 'insufficient_evidence' reason?: string } @@ -79,6 +90,8 @@ export interface SlotOutput { mc_high: number mc_medium: number mc_low: number + mc_insufficient?: number + mc_possibly?: number duration_ms: number confidence: number notes?: string @@ -152,6 +165,21 @@ export const SEVERITY_BG: Record = { INFO: '#f8fafc', } +// Verdikt-Pill — nur für die Nicht-FAIL-Status (FAIL trägt die Severity). +export const STATUS_LABEL: Partial> = { + not_applicable: 'Nicht anwendbar', + insufficient_evidence: 'Unzureichende Evidenz', + possibly_applicable: 'Evtl. relevant', +} + +export const STATUS_STYLE: Partial< + Record +> = { + not_applicable: { bg: '#f1f5f9', fg: '#64748b' }, + insufficient_evidence: { bg: '#e2e8f0', fg: '#475569' }, + possibly_applicable: { bg: '#fef9c3', fg: '#854d0e' }, +} + // Ein Output gilt als "übersprungen" (Dokument nicht ladbar), wenn MCs // existieren, aber keiner ausgewertet wurde. export function isOutputSkipped(o: SlotOutput): boolean { diff --git a/backend-compliance/compliance/api/agent_check/_agent_outputs.py b/backend-compliance/compliance/api/agent_check/_agent_outputs.py index 751eb9b4..2e048893 100644 --- a/backend-compliance/compliance/api/agent_check/_agent_outputs.py +++ b/backend-compliance/compliance/api/agent_check/_agent_outputs.py @@ -43,6 +43,13 @@ def _derive_scope(profile_dict: dict) -> list[str]: scope.add("regulated_profession") if profile_dict.get("industry") in ("insurance", "Finance", "finance"): scope.add("insurance") + # §18 MStV — 3-stufig: Medienunternehmen (Verlag/Presse) = harte Pflicht; + # nur Blog/News-Inhalte (has_editorial_content) = Graubereich → der Agent + # wertet 'editorial_possible' als POSSIBLY_APPLICABLE (Pruef-Hinweis). + if profile_dict.get("industry") == "media": + scope.add("editorial") + elif profile_dict.get("has_editorial_content"): + scope.add("editorial_possible") return sorted(scope) diff --git a/backend-compliance/compliance/services/specialist_agents/_base.py b/backend-compliance/compliance/services/specialist_agents/_base.py index 74658a39..d66d1f9a 100644 --- a/backend-compliance/compliance/services/specialist_agents/_base.py +++ b/backend-compliance/compliance/services/specialist_agents/_base.py @@ -29,6 +29,21 @@ class Severity(str, Enum): INFO = "INFO" +class CheckStatus(str, Enum): + """Verdikt eines Checks — GETRENNT vom Risiko (severity). + + User-Vorgabe 2026-06-10 (kanonisches Datenmodell): + - Applicability ≠ Compliance: NOT_APPLICABLE ist KEIN FAIL. + - Unknown ≠ Fail: nicht bestimmbar → INSUFFICIENT_EVIDENCE, kein FAIL. + severity bleibt die Risiko-Achse (HIGH/…/INFO); status ist das Urteil. + """ + PASS = "pass" + FAIL = "fail" + NOT_APPLICABLE = "not_applicable" + INSUFFICIENT_EVIDENCE = "insufficient_evidence" + POSSIBLY_APPLICABLE = "possibly_applicable" + + class SourceType(str, Enum): """Wo kommt das Finding her? Für die auditfeste Beweiskette.""" MC = "mc" # Machine-Check (deterministisch) @@ -50,12 +65,14 @@ class EvidenceSource(BaseModel): class Finding(BaseModel): """Ein einzelnes Audit-Finding aus einem Specialist-Agent.""" - model_config = ConfigDict(use_enum_values=True) + model_config = ConfigDict(use_enum_values=True, validate_default=True) check_id: str # z.B. IMPRESSUM-AGENT-HANDELSREGISTER agent: str # impressum_v2 agent_version: str # 2.0 field_id: str = "" # field-key innerhalb des Agenten + # Verdikt (was IST der Fall) — getrennt vom Risiko (severity). + status: CheckStatus = CheckStatus.FAIL severity: Severity severity_reason: str = "" title: str @@ -78,9 +95,17 @@ class Recommendation(BaseModel): class McCoverage(BaseModel): - """Welche MC hat der Agent geprüft + Ergebnis.""" + """Welche MC hat der Agent geprüft + Ergebnis. + + status-Vokabular (mappt auf CheckStatus): + ok → PASS + high | medium | low → FAIL (Risiko = severity der Quelle) + na → NOT_APPLICABLE (Rechtsform/Branche) + insufficient_evidence → INSUFFICIENT_EVIDENCE (nicht bestimmbar) + skipped → Dokument nicht ladbar / zu kurz + """ mc_id: str - status: str # ok | high | medium | low | na | skipped + status: str reason: str = "" @@ -127,6 +152,8 @@ class AgentOutput(BaseModel): mc_high: int = 0 mc_medium: int = 0 mc_low: int = 0 + mc_insufficient: int = 0 + mc_possibly: int = 0 # Verbotene Wörter im Output — sicherheitshalber, damit kein Agent diff --git a/backend-compliance/compliance/services/specialist_agents/impressum/_classification.py b/backend-compliance/compliance/services/specialist_agents/impressum/_classification.py index 716c2cfa..0ea24635 100644 --- a/backend-compliance/compliance/services/specialist_agents/impressum/_classification.py +++ b/backend-compliance/compliance/services/specialist_agents/impressum/_classification.py @@ -59,4 +59,11 @@ def scan_context_to_scope(scan_context: dict | None) -> list[str]: if legal_form in _NON_VERTRETUNG_FORMS: scope.add("keine_vertretung") + # ── 4-Status: Rechtsform ueberhaupt bekannt? ── + # Hat der Wizard eine Rechtsform geliefert, ist die Register-/Vertretungs- + # pflicht belastbar entscheidbar (FAIL bei Fehlen). Fehlt sie hier UND im + # Text → INSUFFICIENT_EVIDENCE (Entscheidung trifft der Agent). + if legal_form: + scope.add("legal_form_known") + return sorted(scope) diff --git a/backend-compliance/compliance/services/specialist_agents/impressum/agent.py b/backend-compliance/compliance/services/specialist_agents/impressum/agent.py index de9e0dc7..0e91436a 100644 --- a/backend-compliance/compliance/services/specialist_agents/impressum/agent.py +++ b/backend-compliance/compliance/services/specialist_agents/impressum/agent.py @@ -28,6 +28,7 @@ from .._base import ( AgentInput, AgentOutput, BaseSpecialistAgent, + CheckStatus, EscalationLog, EvidenceSource, Finding, @@ -39,7 +40,13 @@ from .._base import ( from .._pattern_library import record as record_pattern from .._rollup import rollup from .._semantic_validator import build_rename_action, validate_present -from .mcs import MC_IDS, MCS, detect_automotive, scope_matches +from .mcs import ( + MC_IDS, + MCS, + detect_automotive, + detect_legal_form_present, + scope_disposition, +) logger = logging.getLogger(__name__) @@ -113,11 +120,18 @@ class ImpressumAgent(BaseSpecialistAgent): # Pattern-MCs die Findings-Quelle. field_id = semantisches Feld # (passt zum Semantic-Validator + den GT-Tests). is_auto = "automotive" in scope + # 4-Status: ist die Rechtsform ueberhaupt bestimmbar (Wizard ODER + # im Text genannt)? Wenn nicht, duerfen rechtsform-abhaengige Pflichten + # NICHT hart als FAIL gewertet werden → INSUFFICIENT_EVIDENCE. + form_known = ( + "legal_form_known" in scope or detect_legal_form_present(text) + ) for mc in MCS: - if not scope_matches(mc, scope, is_auto): + disp = scope_disposition(mc, scope, is_auto) + if disp == "na": coverage.append(McCoverage( mc_id=mc.mc_id, status="na", - reason="nicht im Business-Scope", + reason="nicht anwendbar (Rechtsform/Branche)", )) continue if any(p.search(text) for p in mc.patterns): @@ -133,12 +147,78 @@ class ImpressumAgent(BaseSpecialistAgent): reason="optional — nicht angegeben", )) continue + if disp == "possible": + # Graubereich (z.B. Corporate-Blog → §18 MStV evtl.) → + # POSSIBLY_APPLICABLE: Pruef-Hinweis (LOW), kein Verstoss. + findings.append(Finding( + check_id=f"IMP-{mc.field_id}", + agent=self.agent_id, + agent_version=self.agent_version, + field_id=mc.field_id, + status=CheckStatus.POSSIBLY_APPLICABLE, + severity=Severity.LOW, + severity_reason="graubereich", + title=f"{mc.label}: ggf. relevant — manuell prüfen", + norm=mc.norm, + evidence="", + action=( + "Bei journalistisch-redaktionellen Inhalten " + "(Nachrichten/Magazin) ist ein Verantwortlicher nach " + "§ 18 MStV anzugeben. Bei reinem Corporate-Blog meist " + "nicht erforderlich — bitte prüfen." + ), + confidence=0.5, + sources=[EvidenceSource( + source_type=SourceType.REGEX, + source_id=mc.mc_id, + detail="Graubereich-Signal (Blog/News), kein hartes Gate", + confidence=0.5, + )], + )) + coverage.append(McCoverage( + mc_id=mc.mc_id, status="possibly_applicable", + reason="Graubereich — manuelle Prüfung", + )) + continue + if mc.legal_form_dependent and not form_known: + # Rechtsform unbestimmt → kein hartes FAIL, sondern + # 'unzureichende Evidenz' (severity INFO, Hinweis statt Verstoss). + findings.append(Finding( + check_id=f"IMP-{mc.field_id}", + agent=self.agent_id, + agent_version=self.agent_version, + field_id=mc.field_id, + status=CheckStatus.INSUFFICIENT_EVIDENCE, + severity=Severity.INFO, + severity_reason="rechtsform_unbestimmt", + title=f"{mc.label}: Rechtsform nicht erkennbar", + norm=mc.norm, + evidence="", + action=( + "Rechtsform im Impressum nicht eindeutig erkennbar — " + "bitte pruefen, ob das Unternehmen registerpflichtig " + "ist; falls ja, die Pflichtangabe ergaenzen." + ), + confidence=0.4, + sources=[EvidenceSource( + source_type=SourceType.REGEX, + source_id=mc.mc_id, + detail="keine Rechtsform im Text + kein legal_form im Scope", + confidence=0.4, + )], + )) + coverage.append(McCoverage( + mc_id=mc.mc_id, status="insufficient_evidence", + reason="Rechtsform unbestimmt", + )) + continue sev = _SEV_TO_ENUM.get(mc.severity_if_missing, Severity.MEDIUM) findings.append(Finding( check_id=f"IMP-{mc.field_id}", agent=self.agent_id, agent_version=self.agent_version, field_id=mc.field_id, + status=CheckStatus.FAIL, severity=sev, severity_reason="pflichtangabe_missing", title=f"Pflichtangabe fehlt: {mc.label}", @@ -157,9 +237,14 @@ class ImpressumAgent(BaseSpecialistAgent): mc_id=mc.mc_id, status=sev.value.lower(), reason="kein Pattern-Treffer", )) + n_fail = sum(1 for f in findings + if f.status == CheckStatus.FAIL.value) + n_unklar = sum(1 for f in findings + if f.status == CheckStatus.INSUFFICIENT_EVIDENCE.value) notes_parts.append( f"{len(MCS)} §5-TMG-MCs geprüft · " - f"{len(findings)} Pflichtangabe(n) offen" + f"{n_fail} Pflichtangabe(n) offen" + + (f" · {n_unklar} unklar (Rechtsform)" if n_unklar else "") ) # ── Layer 3: Semantic-Validator nur für HIGH/MEDIUM-Fails ── @@ -246,7 +331,11 @@ class ImpressumAgent(BaseSpecialistAgent): notes: str = "", ) -> AgentOutput: end = datetime.now(timezone.utc) - recs = rollup(findings) + # Recommendations nur aus echten FAILs — INSUFFICIENT_EVIDENCE / + # POSSIBLY_APPLICABLE sind Hinweise, keine Pflicht-Massnahmen + # (User-Datenmodell: Finding → Remediation nur bei echtem Verstoss). + recs = rollup([f for f in findings + if f.status == CheckStatus.FAIL.value]) out = AgentOutput( agent=self.agent_id, agent_version=self.agent_version, @@ -265,5 +354,9 @@ class ImpressumAgent(BaseSpecialistAgent): mc_high=sum(1 for c in coverage if c.status == "high"), mc_medium=sum(1 for c in coverage if c.status == "medium"), mc_low=sum(1 for c in coverage if c.status == "low"), + mc_insufficient=sum( + 1 for c in coverage if c.status == "insufficient_evidence"), + mc_possibly=sum( + 1 for c in coverage if c.status == "possibly_applicable"), ) return lint_output(out) diff --git a/backend-compliance/compliance/services/specialist_agents/impressum/mcs.py b/backend-compliance/compliance/services/specialist_agents/impressum/mcs.py index a5c32d18..64b43f93 100644 --- a/backend-compliance/compliance/services/specialist_agents/impressum/mcs.py +++ b/backend-compliance/compliance/services/specialist_agents/impressum/mcs.py @@ -32,6 +32,14 @@ class MC: # Wenn True: fehlt die Angabe → KEIN Finding (z.B. USt-IdNr — # Kleinunternehmer §19 haben legitim keine). Nur wenn vorhanden relevant. optional: bool = False + # Wenn True: die Pflicht haengt an der Rechtsform (Handelsregister, + # Vertretungsberechtigte). Ist die Rechtsform weder im Text noch im + # Scope bestimmbar → INSUFFICIENT_EVIDENCE statt hartem FAIL. + legal_form_dependent: bool = False + # Graubereich: liegt eines dieser Tokens vor (aber NICHT requires_scope), + # ist die MC NICHT hart anwendbar, sondern POSSIBLY_APPLICABLE — Pruef- + # Hinweis (severity LOW) statt FAIL. Z.B. Corporate-Blog (§18 MStV evtl.). + possibly_applies_scope: tuple[str, ...] = field(default_factory=tuple) MCS: tuple[MC, ...] = ( @@ -86,6 +94,7 @@ MCS: tuple[MC, ...] = ( norm="§ 5 Abs. 1 Nr. 4 TMG", severity_if_missing="HIGH", excludes_scope=("kein_handelsregister",), + legal_form_dependent=True, patterns=( re.compile(r"\bHR[BA]\s+\d", re.IGNORECASE), re.compile(r"Handelsregister", re.IGNORECASE), @@ -113,6 +122,7 @@ MCS: tuple[MC, ...] = ( norm="§ 5 Abs. 1 Nr. 1 TMG (juristische Personen)", severity_if_missing="HIGH", excludes_scope=("keine_vertretung",), + legal_form_dependent=True, patterns=( re.compile( r"(?:Gesch(?:ae|ä)ftsf(?:ue|ü)hr(?:er|ung|erin)|" @@ -171,6 +181,7 @@ MCS: tuple[MC, ...] = ( norm="§ 18 MStV (bei Blog/News/Magazin/Newsroom Pflicht)", severity_if_missing="MEDIUM", requires_scope=("editorial",), + possibly_applies_scope=("editorial_possible",), patterns=(re.compile( r"(?:Verantwortlich(?:er|e)?\s+(?:f(?:ue|ü)r|i\.S\.d\.|" r"nach|gem(?:ae|ä)ß)\s+§\s*18|" @@ -235,6 +246,45 @@ def scope_matches(mc: MC, scope: set[str], is_automotive: bool) -> bool: return any(s in scope for s in mc.requires_scope) +_LEGAL_FORM_RE = re.compile( + r"(?:\bGmbH\b|\bgGmbH\b|\bmbH\b|\bUG\b|\bAG\b|\bSE\b|\bKGaA\b|" + r"\bKG\b|\bOHG\b|\bGbR\b|\bPartG(?:mbB)?\b|" + r"\be\.?\s?K(?:fm|fr)?\.?\b|\be\.?\s?V\.?\b|\bStiftung\b|" + r"\bLtd\.?\b|\bLimited\b|\bLLC\b|\bS\.A\.|\bN\.V\.|\bB\.V\.|" + r"\bEinzelunternehm\w*|\bKaufmann\b|\bKauffrau\b|\bFreiberuf\w*)", + re.IGNORECASE, +) + + +def scope_disposition(mc: MC, scope: set[str], is_automotive: bool) -> str: + """3-Wege-Anwendbarkeit: 'applies' (hart) | 'possible' (Graubereich) | + 'na' (nicht anwendbar). + + 'possible' nur, wenn die MC NICHT hart anwendbar ist, aber ein + possibly_applies_scope-Token vorliegt (z.B. Corporate-Blog → §18 MStV + evtl. relevant) → POSSIBLY_APPLICABLE statt FAIL/NA.""" + if mc.excludes_scope and any(s in scope for s in mc.excludes_scope): + return "na" + if scope_matches(mc, scope, is_automotive): + return "applies" + if mc.possibly_applies_scope and any( + s in scope for s in mc.possibly_applies_scope + ): + return "possible" + return "na" + + +def detect_legal_form_present(text: str) -> bool: + """Nennt der Text ueberhaupt eine Rechtsform? + + Grundlage fuer INSUFFICIENT_EVIDENCE: ohne erkennbare Rechtsform (und + ohne legal_form im Scope) kann der Agent die Register-/Vertretungs- + pflicht nicht belastbar behaupten → kein hartes FAIL, sondern + 'unzureichende Evidenz' (User-Vorgabe 2026-06-10: 'Muster Consulting' + ohne Rechtsform darf kein 'Handelsregister fehlt' ausloesen).""" + return bool(_LEGAL_FORM_RE.search(text or "")) + + def detect_automotive(text: str) -> bool: """KFZ-Hersteller/-Vertrieb → triggert KBA-Hint.""" if re.search( diff --git a/backend-compliance/compliance/tests/test_four_status.py b/backend-compliance/compliance/tests/test_four_status.py new file mode 100644 index 00000000..4f757bef --- /dev/null +++ b/backend-compliance/compliance/tests/test_four_status.py @@ -0,0 +1,195 @@ +"""4-Status-Modell: Applicability ≠ Compliance, Unknown ≠ Fail. + +User-Datenmodell 2026-06-10: + - Rechtsform-abhaengige Pflicht (Handelsregister/Vertretungsberechtigte) + bei UNBESTIMMTER Rechtsform → INSUFFICIENT_EVIDENCE (severity INFO), + NICHT hartes FAIL ('Muster Consulting' ohne Rechtsform). + - Rechtsform im Text ODER im Wizard-Scope → FAIL bei Fehlen (wie bisher). + - Ausgeschlossene Rechtsform (Verein→Handelsregister) → NOT_APPLICABLE. + - status (Verdikt) ist getrennt von severity (Risiko). +""" + +from __future__ import annotations + +import asyncio + +import pytest + +from compliance.api.agent_check._agent_outputs import _derive_scope +from compliance.services.specialist_agents import AgentInput +from compliance.services.specialist_agents._base import CheckStatus, Severity +from compliance.services.specialist_agents.impressum._classification import ( + scan_context_to_scope, +) +from compliance.services.specialist_agents.impressum.agent import ImpressumAgent +from compliance.services.specialist_agents.impressum.mcs import ( + MCS, + detect_legal_form_present, + scope_disposition, +) + +_MC009 = next(m for m in MCS if m.mc_id == "IMP-MC-009") + +# Pflicht-Felder vorhanden (Name/Email/Telefon) ABER keine Rechtsform, +# kein HR, keine Vertretungsangabe im Text. +TEXT_NO_LEGAL_FORM = ( + "Angaben gemäß § 5 TMG\n\n" + "Muster Consulting\n" + "Musterstraße 1\n" + "12345 Berlin\n\n" + "E-Mail: info@example.com\n" + "Telefon: +49 30 1234567\n" + "Weitere Hinweise finden Sie auf unserer Seite.\n" +) +# Gleicher Text, aber Rechtsform GmbH im Text → registerpflichtig erkennbar. +TEXT_WITH_GMBH = TEXT_NO_LEGAL_FORM.replace( + "Muster Consulting", "Muster Consulting GmbH") + + +@pytest.fixture(autouse=True) +def _llm_offline(monkeypatch): + async def _no_validate(*_a, **_kw): + return {} + monkeypatch.setattr( + "compliance.services.specialist_agents.impressum.agent.validate_present", + _no_validate, raising=False, + ) + + +def _run(scan_context: dict | None, text: str = TEXT_NO_LEGAL_FORM): + agent = ImpressumAgent() + scope = scan_context_to_scope(scan_context) if scan_context else [] + return asyncio.run(agent.evaluate(AgentInput( + doc_type="impressum", text=text, business_scope=scope))) + + +def _by_field(out, field_id): + return next((f for f in out.findings if f.field_id == field_id), None) + + +# ── detect_legal_form_present ─────────────────────────────────────── + + +def test_detector_recognizes_rechtsform(): + assert detect_legal_form_present("Muster Consulting GmbH") + assert detect_legal_form_present("Beispiel AG, Berlin") + assert detect_legal_form_present("Max Mustermann e.K.") + + +def test_detector_no_rechtsform(): + assert not detect_legal_form_present(TEXT_NO_LEGAL_FORM) + assert not detect_legal_form_present("Muster Consulting, Berlin") + + +# ── Unknown ≠ Fail: unbestimmte Rechtsform → INSUFFICIENT_EVIDENCE ── + + +def test_unknown_legal_form_is_insufficient_not_fail(): + out = _run(None) # kein Wizard-Scope, keine Rechtsform im Text + hr = _by_field(out, "handelsregister") + vt = _by_field(out, "vertretungsberechtigte") + assert hr is not None and vt is not None + for f in (hr, vt): + assert f.status == CheckStatus.INSUFFICIENT_EVIDENCE.value + assert f.severity == Severity.INFO.value # kein HIGH! + assert f.severity_reason == "rechtsform_unbestimmt" + + +def test_insufficient_evidence_counted_in_aggregate(): + out = _run(None) + assert out.mc_insufficient >= 2 + cov = {c.mc_id: c.status for c in out.mc_coverage} + assert cov["IMP-MC-004"] == "insufficient_evidence" + assert cov["IMP-MC-006"] == "insufficient_evidence" + + +def test_insufficient_findings_produce_no_recommendation(): + # Hinweise sind keine Pflicht-Massnahmen → kein Rollup. + out = _run(None) + insuf_ids = {f.check_id for f in out.findings + if f.status == CheckStatus.INSUFFICIENT_EVIDENCE.value} + for rec in out.recommendations: + assert not (set(rec.related_finding_ids) & insuf_ids) + + +# ── Rechtsform bekannt → FAIL (Text oder Wizard) ──────────────────── + + +def test_rechtsform_in_text_yields_fail(): + out = _run(None, text=TEXT_WITH_GMBH) + hr = _by_field(out, "handelsregister") + assert hr is not None + assert hr.status == CheckStatus.FAIL.value + assert hr.severity == Severity.HIGH.value + + +def test_rechtsform_in_wizard_scope_yields_fail(): + out = _run({"legal_form": "gmbh"}) # Text ohne Rechtsform, Wizard kennt sie + hr = _by_field(out, "handelsregister") + assert hr is not None + assert hr.status == CheckStatus.FAIL.value + assert hr.severity == Severity.HIGH.value + + +# ── Applicability ≠ Compliance: Verein → NOT_APPLICABLE ───────────── + + +def test_verein_handelsregister_not_applicable(): + out = _run({"legal_form": "verein"}) + assert _by_field(out, "handelsregister") is None # kein Finding + cov = {c.mc_id: c.status for c in out.mc_coverage} + assert cov["IMP-MC-004"] == "na" + + +def test_default_finding_status_is_fail(): + # Nicht-rechtsform-abhaengige Pflicht (Name) bleibt FAIL bei Fehlen. + out = _run(None, text="Angaben gemäß § 5 TMG\n" + "x" * 120) + name = _by_field(out, "name_anbieter") + assert name is not None + assert name.status == CheckStatus.FAIL.value + + +# ── POSSIBLY_APPLICABLE: §18-MStV-Graubereich (editorial) ─────────── + + +def test_scope_disposition_three_way(): + assert scope_disposition(_MC009, {"editorial"}, False) == "applies" + assert scope_disposition(_MC009, {"editorial_possible"}, False) == "possible" + assert scope_disposition(_MC009, set(), False) == "na" + + +def test_editorial_hard_yields_fail(): + # industry=media (Verlag/Presse) → harte §18-Pflicht. + out = _run({"industry": "media"}) + red = _by_field(out, "verantwortlicher_redaktion") + assert red is not None + assert red.status == CheckStatus.FAIL.value + assert red.severity == Severity.MEDIUM.value + + +def test_editorial_possible_yields_possibly_applicable(): + # Corporate-Blog (has_editorial_content, kein media) → Graubereich. + out = ImpressumAgent() + out = asyncio.run(out.evaluate(AgentInput( + doc_type="impressum", text=TEXT_NO_LEGAL_FORM, + business_scope=["editorial_possible"]))) + red = _by_field(out, "verantwortlicher_redaktion") + assert red is not None + assert red.status == CheckStatus.POSSIBLY_APPLICABLE.value + assert red.severity == Severity.LOW.value + assert out.mc_possibly >= 1 + + +def test_editorial_absent_is_not_applicable(): + out = _run(None) # kein editorial-Signal + assert _by_field(out, "verantwortlicher_redaktion") is None + cov = {c.mc_id: c.status for c in out.mc_coverage} + assert cov["IMP-MC-009"] == "na" + + +def test_derive_scope_editorial_tiers(): + assert "editorial_possible" in _derive_scope({"has_editorial_content": True}) + assert "editorial" in _derive_scope({"industry": "media"}) + # Medienunternehmen gewinnt — nicht beide Tokens. + s = _derive_scope({"industry": "media", "has_editorial_content": True}) + assert "editorial" in s and "editorial_possible" not in s