diff --git a/admin-compliance/app/sdk/agent/_components/AgentFindingCard.tsx b/admin-compliance/app/sdk/agent/_components/AgentFindingCard.tsx index 2d6b59c7..65adea4d 100644 --- a/admin-compliance/app/sdk/agent/_components/AgentFindingCard.tsx +++ b/admin-compliance/app/sdk/agent/_components/AgentFindingCard.tsx @@ -58,9 +58,8 @@ export function AgentFindingCard({ f }: { f: Finding }) { {statusLabel} )} - {f.check_id} {sources.map((s, i) => ( - + ))} {f.confidence !== undefined && ( diff --git a/admin-compliance/app/sdk/agent/_components/AgentMcCoverage.tsx b/admin-compliance/app/sdk/agent/_components/AgentMcCoverage.tsx deleted file mode 100644 index 498d4a80..00000000 --- a/admin-compliance/app/sdk/agent/_components/AgentMcCoverage.tsx +++ /dev/null @@ -1,67 +0,0 @@ -'use client' - -/** - * "Was wurde geprüft" — listet alle MCs eines Agents mit ihrem Status. - * Standardmäßig collapsed; zeigt sofort, was Methodik des Agents war. - */ - -import React, { useState } from 'react' - -import type { McCoverage } from './_agentTypes' - -const STATUS_COLOR: Record = { - ok: '#10b981', - na: '#94a3b8', - skipped: '#cbd5e1', - high: '#dc2626', - medium: '#f59e0b', - low: '#3b82f6', - insufficient_evidence: '#64748b', - possibly_applicable: '#ca8a04', -} - -const STATUS_LABEL: Record = { - ok: 'OK', - na: 'n/a', - skipped: 'übersprungen', - high: 'HIGH', - medium: 'MEDIUM', - low: 'LOW', - insufficient_evidence: 'unklar', - possibly_applicable: 'evtl. relevant', -} - -export function AgentMcCoverage({ coverage }: { coverage: McCoverage[] }) { - const [open, setOpen] = useState(false) - if (!coverage?.length) return null - return ( -
- - {open && ( -
- {coverage.map(c => ( -
- - {c.mc_id} - - {STATUS_LABEL[c.status] || c.status} - - {c.reason && ( - — {c.reason} - )} -
- ))} -
- )} -
- ) -} diff --git a/admin-compliance/app/sdk/agent/_components/AgentPflichtTable.tsx b/admin-compliance/app/sdk/agent/_components/AgentPflichtTable.tsx new file mode 100644 index 00000000..e6d83a5d --- /dev/null +++ b/admin-compliance/app/sdk/agent/_components/AgentPflichtTable.tsx @@ -0,0 +1,82 @@ +'use client' + +/** + * AgentPflichtTable — die geprüften Pflichtangaben als menschliche Tabelle: + * Status-Icon + Feldname + tatsächlich gefundener Text. Ersetzt die alte + * MC-ID-Liste. + * + * WICHTIG: zeigt NIE die mc_id (Reverse-Engineering-Schutz der MC-Bibliothek) + * — nur das menschliche `label`. Generisch für jeden Agenten verwendbar. + */ + +import React from 'react' + +import type { McCoverage } from './_agentTypes' + +const DISP: Record = { + ok: { icon: '✓', text: 'vorhanden', color: '#16a34a' }, + high: { icon: '✗', text: 'fehlt', color: '#dc2626' }, + medium: { icon: '✗', text: 'fehlt', color: '#d97706' }, + low: { icon: '✗', text: 'fehlt', color: '#2563eb' }, + possibly_applicable: { icon: '?', text: 'zu prüfen', color: '#ca8a04' }, + insufficient_evidence: { icon: '?', text: 'unklar', color: '#64748b' }, + na: { icon: '–', text: 'nicht anwendbar', color: '#94a3b8' }, + skipped: { icon: '–', text: 'nicht geprüft', color: '#cbd5e1' }, +} + +// Reihenfolge: Probleme zuerst, dann erfüllt, dann n/a. +const RANK: Record = { + high: 0, medium: 1, low: 2, possibly_applicable: 3, + insufficient_evidence: 4, ok: 5, na: 6, skipped: 7, +} + +export function AgentPflichtTable({ coverage }: { coverage: McCoverage[] }) { + if (!coverage?.length) return null + const rows = [...coverage].sort( + (a, b) => (RANK[a.status] ?? 9) - (RANK[b.status] ?? 9), + ) + const count = (s: string) => coverage.filter(c => c.status === s).length + const ok = count('ok') + const fehlt = count('high') + count('medium') + count('low') + const pruefen = count('possibly_applicable') + count('insufficient_evidence') + const na = count('na') + count('skipped') + + return ( +
+
+ Pflichtangaben — {ok} vorhanden + {fehlt > 0 && <> · {fehlt} fehlt} + {pruefen > 0 && ( + <> · {pruefen} zu prüfen + )} + {na > 0 && <> · {na} n/a} +
+
+ {rows.map((c, i) => { + const d = DISP[c.status] || DISP.skipped + return ( +
+ + {d.icon} + + + {c.label || 'Angabe'} + + + {c.status === 'ok' ? ( + {c.found || 'vorhanden'} + ) : ( + {d.text} + )} + +
+ ) + })} +
+
+ ) +} diff --git a/admin-compliance/app/sdk/agent/_components/AgentResultView.tsx b/admin-compliance/app/sdk/agent/_components/AgentResultView.tsx index bfa853d7..a1aa4d4a 100644 --- a/admin-compliance/app/sdk/agent/_components/AgentResultView.tsx +++ b/admin-compliance/app/sdk/agent/_components/AgentResultView.tsx @@ -15,7 +15,7 @@ import React, { useState } from 'react' import type { Severity, SlotOutput } from './_agentTypes' import { AgentFindingCard } from './AgentFindingCard' -import { AgentMcCoverage } from './AgentMcCoverage' +import { AgentPflichtTable } from './AgentPflichtTable' import { AgentRecommendationCard } from './AgentRecommendationCard' import { AgentSpeedometer } from './AgentSpeedometer' @@ -42,7 +42,7 @@ export function AgentResultView({ output }: { output: SlotOutput }) { )} - + = { +export const SCENARIO_LABELS: Record = { regenerate: { label: 'Neugenerierung', color: 'text-red-700', bg: 'bg-red-100' }, fix: { label: 'Korrekturen', color: 'text-amber-700', bg: 'bg-amber-100' }, import: { label: 'Konform', color: 'text-green-700', bg: 'bg-green-100' }, missing: { label: 'Fehlt', color: 'text-gray-600', bg: 'bg-gray-100' }, } -const DOC_TYPE_LABELS: Record = { +export const DOC_TYPE_LABELS: Record = { dse: 'DSI', agb: 'AGB', impressum: 'Impressum', cookie: 'Cookie', widerruf: 'Widerruf', other: 'Sonstiges', social_media: 'Social Media', dsfa: 'DSFA', joint_controller: 'Art. 26', @@ -46,7 +46,7 @@ interface GroupedCheck { children: CheckItem[] } -function groupChecks(checks: CheckItem[]): GroupedCheck[] { +export function groupChecks(checks: CheckItem[]): GroupedCheck[] { const l1 = checks.filter(c => (c.level ?? 1) === 1) return l1.map(c => ({ check: c, @@ -54,7 +54,7 @@ function groupChecks(checks: CheckItem[]): GroupedCheck[] { })) } -function CheckIcon({ passed, skipped, isInfo }: { passed: boolean; skipped?: boolean; isInfo?: boolean }) { +export function CheckIcon({ passed, skipped, isInfo }: { passed: boolean; skipped?: boolean; isInfo?: boolean }) { if (skipped) { return ( diff --git a/admin-compliance/app/sdk/agent/_components/ComplianceResultTabs.tsx b/admin-compliance/app/sdk/agent/_components/ComplianceResultTabs.tsx index 89b9aa15..84f1a465 100644 --- a/admin-compliance/app/sdk/agent/_components/ComplianceResultTabs.tsx +++ b/admin-compliance/app/sdk/agent/_components/ComplianceResultTabs.tsx @@ -10,21 +10,16 @@ import React, { useState } from 'react' -import type { SlotOutput } from './_agentTypes' -import { AgentResultTab } from './AgentResultTab' -import { ChecklistView } from './ChecklistView' +import { ChecklistView, DOC_TYPE_LABELS, type DocResult } from './ChecklistView' +import { DocResultView } from './DocResultView' import { MigrationPanel } from './MigrationPanel' -const TOPIC_LABELS: Record = { - impressum: 'Impressum', - cookie: 'Cookie-Banner', -} - export function ComplianceResultTabs({ results }: { results: any }) { - const agentOutputs: Record = results.agent_outputs || {} - const topicKeys = Object.keys(agentOutputs) - const tabs = [...topicKeys, 'raw'] - const [active, setActive] = useState(tabs[0]) + // Themen-Tabs aus der HAUPT-Engine (result.results) — nicht aus dem + // v3-Agent. Jedes Dokument = ein Tab mit der genauen Pflichtangaben-Tabelle. + const docs: DocResult[] = results.results || [] + const tabs = docs.map((_: DocResult, i: number) => String(i)).concat('raw') + const [active, setActive] = useState(tabs[0] ?? 'raw') return (
@@ -112,26 +107,30 @@ export function ComplianceResultTabs({ results }: { results: any }) {
)} - {/* Tab-Leiste — Themen-Agenten + Roh-Checkliste */} + {/* Tab-Leiste — ein Tab je Dokument (Haupt-Engine) + Übersicht */}
{tabs.map(t => { - const count = t !== 'raw' ? (agentOutputs[t]?.findings?.length ?? 0) : 0 + const tabClass = `px-3 py-1.5 text-sm font-medium border-b-2 -mb-px transition-colors flex items-center gap-1.5 ${ + active === t + ? 'border-purple-500 text-purple-700' + : 'border-transparent text-gray-500 hover:text-gray-700' + }` + if (t === 'raw') { + return ( + + ) + } + const doc = docs[Number(t)] + const dot = doc.error ? 'bg-gray-300' + : doc.scenario === 'import' ? 'bg-green-500' + : doc.scenario === 'fix' ? 'bg-amber-500' + : doc.scenario === 'regenerate' ? 'bg-red-500' : 'bg-gray-400' return ( - ) })} @@ -140,11 +139,8 @@ export function ComplianceResultTabs({ results }: { results: any }) { {/* Tab-Inhalt */} {active === 'raw' ? ( - ) : agentOutputs[active] ? ( - + ) : docs[Number(active)] ? ( + ) : null} {/* Check-Footer (themenübergreifend) */} diff --git a/admin-compliance/app/sdk/agent/_components/DocResultView.tsx b/admin-compliance/app/sdk/agent/_components/DocResultView.tsx new file mode 100644 index 00000000..472f287e --- /dev/null +++ b/admin-compliance/app/sdk/agent/_components/DocResultView.tsx @@ -0,0 +1,144 @@ +'use client' + +/** + * DocResultView — EIN Dokument-Prüfergebnis der HAUPT-Engine als saubere, + * immer-offene Pflichtangaben-Tabelle: Verdikt + Gruppen + extrahierte Texte + * (matched_text) pro Prüfpunkt. + * + * Quelle = result.results[doc] (die genaue Haupt-Doc-Check-Engine), NICHT + * der v3-Agent. Zeigt menschliche Labels + gefundene Snippets, keine internen + * IDs. Wiederverwendet die Render-Bausteine aus ChecklistView. + */ + +import React from 'react' + +import { + CheckIcon, + type DocResult, + groupChecks, + SCENARIO_LABELS, +} from './ChecklistView' + +function Snippet({ text }: { text: string }) { + return ( +
+ „…{text}…" +
+ ) +} + +function ScoreBar({ label, pct, blue }: { label: string; pct: number; blue?: boolean }) { + const color = blue + ? pct >= 80 ? 'bg-blue-400' : 'bg-blue-300' + : pct === 100 ? 'bg-green-500' : pct >= 50 ? 'bg-yellow-500' : 'bg-red-500' + return ( +
+ {label} +
+
+
+ {pct}% +
+ ) +} + +export function DocResultView({ doc }: { doc: DocResult }) { + if (doc.error) { + return ( +
+ {doc.error} +
+ ) + } + const grouped = groupChecks(doc.checks) + const l1 = doc.checks.filter(c => (c.level ?? 1) === 1) + const l1Score = l1.filter(c => c.severity !== 'INFO') + const l1Passed = l1Score.filter(c => c.passed).length + const l2 = doc.checks.filter(c => (c.level ?? 1) === 2 && !c.skipped) + const l2Passed = l2.filter(c => c.passed).length + const sc = doc.scenario ? SCENARIO_LABELS[doc.scenario] : null + + return ( +
+ {/* Verdikt-Kopf */} +
+ {sc && ( + + {sc.label} + + )} + + {l1Passed}/{l1Score.length} Pflichtangaben + {l2.length > 0 && <>, {l2Passed}/{l2.length} Detailprüfungen} + +
+ + {l2.length > 0 && ( + + )} +
+
+ + {/* Pflichtangaben-Tabelle */} +
+ {grouped.map(g => { + const l1Info = g.check.severity === 'INFO' && !g.check.passed + return ( +
+
+ +
+
+ {g.check.label} +
+ {g.check.passed && g.check.matched_text && g.children.length === 0 && ( + + )} + {!g.check.passed && g.check.hint && ( +
+ {g.check.hint} +
+ )} +
+
+ {g.children.length > 0 && ( +
+ {g.children.map(ch => { + const chInfo = ch.severity === 'INFO' && !ch.passed && !ch.skipped + return ( +
+ +
+
+ {ch.label}{ch.skipped && ' (übersprungen)'} +
+ {ch.passed && ch.matched_text && } + {!ch.passed && !ch.skipped && ch.hint && ( +
+ {ch.hint} +
+ )} +
+
+ ) + })} +
+ )} +
+ ) + })} +
+ + {doc.word_count > 0 && ( +
{doc.word_count} Wörter analysiert
+ )} +
+ ) +} diff --git a/admin-compliance/app/sdk/agent/_components/__tests__/AgentPflichtTable.test.tsx b/admin-compliance/app/sdk/agent/_components/__tests__/AgentPflichtTable.test.tsx new file mode 100644 index 00000000..f625659f --- /dev/null +++ b/admin-compliance/app/sdk/agent/_components/__tests__/AgentPflichtTable.test.tsx @@ -0,0 +1,29 @@ +import { describe, it, expect } from 'vitest' +import { render, screen } from '@testing-library/react' + +import { AgentPflichtTable } from '../AgentPflichtTable' +import type { McCoverage } from '../_agentTypes' + +const COV: McCoverage[] = [ + { mc_id: 'IMP-MC-002', status: 'ok', label: 'Email-Adresse', + found: 'kundenbetreuung@bmw.de' }, + { mc_id: 'IMP-MC-010', status: 'possibly_applicable', + label: 'Verbraucher-Streitbeilegung-Hinweis' }, + { mc_id: 'IMP-MC-009', status: 'na', label: 'Verantwortlicher § 18 MStV' }, +] + +describe('AgentPflichtTable', () => { + it('zeigt Label + gefundenen Wert, aber KEINE mc_id', () => { + render() + expect(screen.getByText('Email-Adresse')).toBeInTheDocument() + expect(screen.getByText('kundenbetreuung@bmw.de')).toBeInTheDocument() + // Reverse-Engineering-Schutz: mc_id darf NICHT erscheinen. + expect(screen.queryByText(/IMP-MC-/)).not.toBeInTheDocument() + }) + + it('Verdikt-Header zählt die Status', () => { + render() + expect(screen.getByText(/1 vorhanden/)).toBeInTheDocument() + expect(screen.getByText(/1 zu prüfen/)).toBeInTheDocument() + }) +}) diff --git a/admin-compliance/app/sdk/agent/_components/__tests__/AgentResultTab.test.tsx b/admin-compliance/app/sdk/agent/_components/__tests__/AgentResultTab.test.tsx index 5e522d46..fc5b9dd9 100644 --- a/admin-compliance/app/sdk/agent/_components/__tests__/AgentResultTab.test.tsx +++ b/admin-compliance/app/sdk/agent/_components/__tests__/AgentResultTab.test.tsx @@ -69,27 +69,32 @@ describe('AgentResultTab', () => { }) const DOC_RESULT = { - label: 'Impressum-Rohdaten', url: 'https://example.com/impressum', - doc_type: 'impressum', word_count: 50, completeness_pct: 80, - correctness_pct: 90, checks: [], findings_count: 2, error: '', + label: 'Impressum', url: 'https://example.com/impressum', + doc_type: 'impressum', word_count: 50, completeness_pct: 100, + correctness_pct: 100, findings_count: 0, error: '', scenario: 'import', + checks: [ + { id: 'name', label: 'Name des Anbieters', passed: true, severity: 'HIGH', + matched_text: 'Bayerische Motoren Werke Aktiengesellschaft', level: 1 }, + { id: 'email', label: 'E-Mail-Adresse', passed: true, severity: 'HIGH', + matched_text: 'kundenbetreuung@bmw.de', level: 1 }, + ], } describe('ComplianceResultTabs', () => { - it('zeigt den Impressum-Tab zuerst und wechselt auf die Roh-Checkliste', () => { - const result = { - agent_outputs: { impressum: IMPRESSUM_OUTPUT }, - results: [DOC_RESULT], - } + it('rendert das Dokument-Tab der Haupt-Engine mit extrahierten Texten', () => { + // Themen-Tabs kommen aus result.results (Haupt-Engine), NICHT agent_outputs. + const result = { results: [DOC_RESULT] } render() - // beide Tabs vorhanden + // Dokument-Tab + Übersicht expect(screen.getByRole('button', { name: /Impressum/ })).toBeInTheDocument() - expect(screen.getByRole('button', { name: /Alle Checks \(roh\)/ })).toBeInTheDocument() - // Impressum-Tab aktiv → Finding sichtbar - expect(screen.getByText('Pflichtangabe fehlt: Email-Adresse')).toBeInTheDocument() - // Wechsel auf die Roh-Checkliste - fireEvent.click(screen.getByRole('button', { name: /Alle Checks \(roh\)/ })) - // Impressum-Finding ist weg (umgeschaltet), Komponente intakt - expect(screen.queryByText('Pflichtangabe fehlt: Email-Adresse')).not.toBeInTheDocument() - expect(screen.getByRole('button', { name: /Alle Checks \(roh\)/ })).toBeInTheDocument() + expect(screen.getByRole('button', { name: /Alle Checks/ })).toBeInTheDocument() + // DocResultView: menschliches Label + gefundener Text sichtbar + expect(screen.getByText('Name des Anbieters')).toBeInTheDocument() + expect(screen.getByText(/Bayerische Motoren Werke/)).toBeInTheDocument() + // Wechsel auf die Übersicht + fireEvent.click(screen.getByRole('button', { name: /Alle Checks/ })) + expect( + screen.getByText(/Dokumenten-Pruefung/), + ).toBeInTheDocument() }) }) diff --git a/admin-compliance/app/sdk/agent/_components/_agentTypes.ts b/admin-compliance/app/sdk/agent/_components/_agentTypes.ts index 5248e185..e7176273 100644 --- a/admin-compliance/app/sdk/agent/_components/_agentTypes.ts +++ b/admin-compliance/app/sdk/agent/_components/_agentTypes.ts @@ -63,8 +63,10 @@ export interface Recommendation { export interface McCoverage { mc_id: string status: 'ok' | 'na' | 'high' | 'medium' | 'low' | 'skipped' | - 'insufficient_evidence' + 'insufficient_evidence' | 'possibly_applicable' reason?: string + label?: string // menschlicher Feldname (KEINE mc_id im Frontend zeigen) + found?: string // gefundener Text/Wert bei status=ok } export interface EscalationLog { diff --git a/backend-compliance/compliance/services/profile_extractor.py b/backend-compliance/compliance/services/profile_extractor.py index 4ac1a8b4..0aec5738 100644 --- a/backend-compliance/compliance/services/profile_extractor.py +++ b/backend-compliance/compliance/services/profile_extractor.py @@ -10,13 +10,14 @@ Returns a dict that maps to CompanyProfile and ScopeProfilingAnswer fields. import logging import re +from typing import Optional logger = logging.getLogger(__name__) def extract_profile_from_documents( doc_texts: dict[str, str], - business_profile: dict | None = None, + business_profile: Optional[dict] = None, ) -> dict: """Extract Company Profile fields from document texts. @@ -100,28 +101,38 @@ def _extract_company_info(text: str, result: dict) -> None: """Extract company name, legal form, address from text.""" cp = result["company_profile"] - # GmbH / AG / UG / e.K. etc. - legal_forms = { - r"(\S+(?:\s+\S+){0,4})\s+gmbh\b": ("GmbH", "gmbh"), - r"(\S+(?:\s+\S+){0,4})\s+ag\b": ("AG", "ag"), - r"(\S+(?:\s+\S+){0,4})\s+ug\b": ("UG", "ug"), - r"(\S+(?:\s+\S+){0,4})\s+e\.?\s*k\.?\b": ("e.K.", "ek"), - r"(\S+(?:\s+\S+){0,4})\s+gbr\b": ("GbR", "gbr"), - r"(\S+(?:\s+\S+){0,4})\s+ohg\b": ("OHG", "ohg"), - r"(\S+(?:\s+\S+){0,4})\s+gmbh\s*&\s*co\.?\s*kg": ("GmbH & Co. KG", "gmbh_co_kg"), - } + # Rechtsform + Firmenname. Die Reihenfolge der Muster ist NICHT die + # Priorität — wir nehmen den FRUEHESTEN Treffer im Text: ein Impressum + # nennt den Betreiber zuerst; spätere Erwähnungen (z.B. "juris GmbH" im + # Hinweis auf gesetze-im-internet.de) sind nicht der Anbieter. Ausge- + # schriebene Formen ("Aktiengesellschaft") zählen mit (sonst wird BMW AG + # nicht erkannt und faelschlich die naechste GmbH gegriffen). + legal_forms = [ + (r"(\S+(?:\s+\S+){0,4})\s+gmbh\s*&\s*co\.?\s*kg\b", "gmbh_co_kg"), + (r"(\S+(?:\s+\S+){0,4})\s+(?:aktiengesellschaft|ag)\b", "ag"), + (r"(\S+(?:\s+\S+){0,4})\s+(?:unternehmergesellschaft|ug)\b", "ug"), + (r"(\S+(?:\s+\S+){0,4})\s+gmbh\b", "gmbh"), + (r"(\S+(?:\s+\S+){0,4})\s+e\.?\s*k\.?\b", "ek"), + (r"(\S+(?:\s+\S+){0,4})\s+gbr\b", "gbr"), + (r"(\S+(?:\s+\S+){0,4})\s+ohg\b", "ohg"), + ] text_lower = text.lower() - for pattern, (form_label, form_id) in legal_forms.items(): + best = None # (start, end, form_id) — frühester Treffer + for pattern, form_id in legal_forms: m = re.search(pattern, text_lower) - if m: - raw_name = m.group(0).strip() - # Clean up: take from uppercase start - for i, ch in enumerate(text[m.start():m.end()]): - if ch.isupper(): - cp["companyName"] = text[m.start() + i:m.end()].strip() - break - cp["legalForm"] = form_id - break + # frühester Treffer gewinnt; bei Gleichstand die Listen-Reihenfolge + # (GmbH & Co. KG vor GmbH). + if m and (best is None or m.start() < best[0]): + best = (m.start(), m.end(), form_id) + if best: + start, end, form_id = best + # Firmenname ab dem ersten Grossbuchstaben im Treffer (schneidet + # führende Kleinwörter wie "von der" ab). + for i, ch in enumerate(text[start:end]): + if ch.isupper(): + cp["companyName"] = text[start + i:end].strip() + break + cp["legalForm"] = form_id # PLZ + Ort plz_match = re.search( diff --git a/backend-compliance/compliance/services/specialist_agents/_base.py b/backend-compliance/compliance/services/specialist_agents/_base.py index d66d1f9a..113e6b92 100644 --- a/backend-compliance/compliance/services/specialist_agents/_base.py +++ b/backend-compliance/compliance/services/specialist_agents/_base.py @@ -107,6 +107,11 @@ class McCoverage(BaseModel): mc_id: str status: str reason: str = "" + # Menschlicher Feldname (für die Pflichtangaben-Tabelle im Frontend — + # NICHT die mc_id zeigen, sonst Reverse-Engineering der MC-Bibliothek). + label: str = "" + # Der tatsächlich gefundene Text/Wert (Snippet) bei status=ok. + found: str = "" class EscalationLog(BaseModel): diff --git a/backend-compliance/compliance/services/specialist_agents/impressum/agent.py b/backend-compliance/compliance/services/specialist_agents/impressum/agent.py index 0e91436a..17fc7ecd 100644 --- a/backend-compliance/compliance/services/specialist_agents/impressum/agent.py +++ b/backend-compliance/compliance/services/specialist_agents/impressum/agent.py @@ -78,6 +78,25 @@ def _build_measure(label: str, norm: str) -> str: return msg +def _line_of(text: str, start_pos: int, end_pos: int) -> str: + """Die Zeile um einen Regex-Treffer — als 'gefundener Wert' für die + Pflichtangaben-Tabelle. Gekappt + bereinigt.""" + start = text.rfind("\n", 0, start_pos) + 1 + end = text.find("\n", end_pos) + if end == -1: + end = len(text) + return " ".join(text[start:end].split())[:160] + + +def _coverage(mc, status: str, reason: str, found: str = "") -> McCoverage: + """McCoverage mit menschlichem Label (mc.label) — das Frontend zeigt + NIE die mc_id (Reverse-Engineering-Schutz).""" + return McCoverage( + mc_id=mc.mc_id, status=status, reason=reason, + label=mc.label, found=found, + ) + + class ImpressumAgent(BaseSpecialistAgent): agent_id = "impressum" agent_version = "3.0" @@ -103,10 +122,7 @@ class ImpressumAgent(BaseSpecialistAgent): if len(text) < 100: # Doc zu kurz — alle eigenen Pattern-IDs als skipped for mc in MCS: - coverage.append(McCoverage( - mc_id=mc.mc_id, status="skipped", - reason="text too short", - )) + coverage.append(_coverage(mc, "skipped", "text too short")) return self._finalize( start, findings, esc_logs, coverage, confidence=0.0, @@ -129,27 +145,35 @@ class ImpressumAgent(BaseSpecialistAgent): for mc in MCS: disp = scope_disposition(mc, scope, is_auto) if disp == "na": - coverage.append(McCoverage( - mc_id=mc.mc_id, status="na", - reason="nicht anwendbar (Rechtsform/Branche)", - )) + coverage.append(_coverage( + mc, "na", "nicht anwendbar (Rechtsform/Branche)")) continue - if any(p.search(text) for p in mc.patterns): - coverage.append(McCoverage( - mc_id=mc.mc_id, status="ok", reason="Pattern-Treffer", + matched = None + for p in mc.patterns: + m = p.search(text) + if m: + matched = m + break + if matched is not None: + coverage.append(_coverage( + mc, "ok", "Pattern-Treffer", + found=_line_of(text, matched.start(), matched.end()), )) continue if mc.optional: # fehlt + optional → KEIN Finding (z.B. USt-IdNr; # Kleinunternehmer §19 haben legitim keine). - coverage.append(McCoverage( - mc_id=mc.mc_id, status="na", - reason="optional — nicht angegeben", - )) + coverage.append(_coverage( + mc, "na", "optional — nicht angegeben")) continue if disp == "possible": - # Graubereich (z.B. Corporate-Blog → §18 MStV evtl.) → - # POSSIBLY_APPLICABLE: Pruef-Hinweis (LOW), kein Verstoss. + # Graubereich (z.B. Corporate-Blog → §18, OEM-Markenseite → + # §36 VSBG) → POSSIBLY_APPLICABLE: Pruef-Hinweis (LOW), + # kein Verstoss. Hinweistext kommt MC-spezifisch. + hint = mc.possibly_hint or ( + f"Diese Angabe ist nur situativ Pflicht ({mc.norm}). " + "Bitte prüfen, ob sie auf Ihre Seite zutrifft." + ) findings.append(Finding( check_id=f"IMP-{mc.field_id}", agent=self.agent_id, @@ -161,12 +185,7 @@ class ImpressumAgent(BaseSpecialistAgent): title=f"{mc.label}: ggf. relevant — manuell prüfen", norm=mc.norm, evidence="", - action=( - "Bei journalistisch-redaktionellen Inhalten " - "(Nachrichten/Magazin) ist ein Verantwortlicher nach " - "§ 18 MStV anzugeben. Bei reinem Corporate-Blog meist " - "nicht erforderlich — bitte prüfen." - ), + action=hint, confidence=0.5, sources=[EvidenceSource( source_type=SourceType.REGEX, @@ -175,10 +194,8 @@ class ImpressumAgent(BaseSpecialistAgent): confidence=0.5, )], )) - coverage.append(McCoverage( - mc_id=mc.mc_id, status="possibly_applicable", - reason="Graubereich — manuelle Prüfung", - )) + coverage.append(_coverage( + mc, "possibly_applicable", "Graubereich — manuelle Prüfung")) continue if mc.legal_form_dependent and not form_known: # Rechtsform unbestimmt → kein hartes FAIL, sondern @@ -207,10 +224,8 @@ class ImpressumAgent(BaseSpecialistAgent): confidence=0.4, )], )) - coverage.append(McCoverage( - mc_id=mc.mc_id, status="insufficient_evidence", - reason="Rechtsform unbestimmt", - )) + coverage.append(_coverage( + mc, "insufficient_evidence", "Rechtsform unbestimmt")) continue sev = _SEV_TO_ENUM.get(mc.severity_if_missing, Severity.MEDIUM) findings.append(Finding( @@ -233,10 +248,8 @@ class ImpressumAgent(BaseSpecialistAgent): confidence=0.9, )], )) - coverage.append(McCoverage( - mc_id=mc.mc_id, status=sev.value.lower(), - reason="kein Pattern-Treffer", - )) + coverage.append(_coverage( + mc, sev.value.lower(), "kein Pattern-Treffer")) n_fail = sum(1 for f in findings if f.status == CheckStatus.FAIL.value) n_unklar = sum(1 for f in findings diff --git a/backend-compliance/compliance/services/specialist_agents/impressum/mcs.py b/backend-compliance/compliance/services/specialist_agents/impressum/mcs.py index 64b43f93..5d5a23ef 100644 --- a/backend-compliance/compliance/services/specialist_agents/impressum/mcs.py +++ b/backend-compliance/compliance/services/specialist_agents/impressum/mcs.py @@ -40,6 +40,9 @@ class MC: # ist die MC NICHT hart anwendbar, sondern POSSIBLY_APPLICABLE — Pruef- # Hinweis (severity LOW) statt FAIL. Z.B. Corporate-Blog (§18 MStV evtl.). possibly_applies_scope: tuple[str, ...] = field(default_factory=tuple) + # MC-spezifischer Pruef-Hinweis fuer den POSSIBLY_APPLICABLE-Fall + # (warum Graubereich + was der Nutzer pruefen soll). + possibly_hint: str = "" MCS: tuple[MC, ...] = ( @@ -182,6 +185,11 @@ MCS: tuple[MC, ...] = ( severity_if_missing="MEDIUM", requires_scope=("editorial",), possibly_applies_scope=("editorial_possible",), + possibly_hint=( + "Bei journalistisch-redaktionellen Inhalten (Nachrichten/Magazin) " + "ist ein Verantwortlicher nach § 18 MStV anzugeben. Bei reinem " + "Corporate-Blog meist nicht erforderlich — bitte prüfen." + ), patterns=(re.compile( r"(?:Verantwortlich(?:er|e)?\s+(?:f(?:ue|ü)r|i\.S\.d\.|" r"nach|gem(?:ae|ä)ß)\s+§\s*18|" @@ -194,9 +202,17 @@ MCS: tuple[MC, ...] = ( mc_id="IMP-MC-010", field_id="verbraucher_streitbeilegung", label="Verbraucher-Streitbeilegung-Hinweis", - norm="§ 36 VSBG (B2C-Anbieter Pflicht)", + norm="§ 36 VSBG (Verbraucherverträge über die Website)", severity_if_missing="MEDIUM", - requires_scope=("ecommerce", "b2c"), + # Hart nur bei echtem Online-Verkauf; reine B2C-Orientierung (z.B. + # OEM-Markenseite, Verkauf über Händler) = Graubereich → Prüf-Hinweis. + requires_scope=("ecommerce",), + possibly_applies_scope=("b2c",), + possibly_hint=( + "§ 36 VSBG gilt, wenn auf dieser Seite Verbraucherverträge " + "geschlossen werden. Bei reiner Marken-/Info-Seite (Verkauf über " + "Händler/Vertragspartner) meist nicht erforderlich — bitte prüfen." + ), patterns=(re.compile( r"(?:Verbraucherschlichtungs|VSBG|" r"Streitbeilegung|" diff --git a/backend-compliance/compliance/tests/test_four_status.py b/backend-compliance/compliance/tests/test_four_status.py index 4f757bef..71a6f055 100644 --- a/backend-compliance/compliance/tests/test_four_status.py +++ b/backend-compliance/compliance/tests/test_four_status.py @@ -178,6 +178,7 @@ def test_editorial_possible_yields_possibly_applicable(): assert red.status == CheckStatus.POSSIBLY_APPLICABLE.value assert red.severity == Severity.LOW.value assert out.mc_possibly >= 1 + assert "§ 18 MStV" in red.action # MC-spezifischer Hinweis def test_editorial_absent_is_not_applicable(): @@ -193,3 +194,30 @@ def test_derive_scope_editorial_tiers(): # Medienunternehmen gewinnt — nicht beide Tokens. s = _derive_scope({"industry": "media", "has_editorial_content": True}) assert "editorial" in s and "editorial_possible" not in s + + +# ── §36 VSBG Graubereich (BMW-Fall): reines b2c ≠ harter Verstoß ──── + + +def test_vsbg_b2c_is_possibly_applicable(): + # Reine B2C-Orientierung (z.B. OEM-Markenseite, Verkauf über Händler) → + # §36 VSBG = Graubereich, KEIN MEDIUM-FAIL (BMW-False-Positive-Fix). + out = asyncio.run(ImpressumAgent().evaluate(AgentInput( + doc_type="impressum", text=TEXT_NO_LEGAL_FORM, + business_scope=["b2c"]))) + vsbg = _by_field(out, "verbraucher_streitbeilegung") + assert vsbg is not None + assert vsbg.status == CheckStatus.POSSIBLY_APPLICABLE.value + assert vsbg.severity == Severity.LOW.value + assert "VSBG" in vsbg.action # §36-Hinweis, nicht §18 + + +def test_vsbg_ecommerce_is_hard_fail(): + # Echter Online-Shop (ecommerce) → §36 VSBG harte Pflicht (MEDIUM). + out = asyncio.run(ImpressumAgent().evaluate(AgentInput( + doc_type="impressum", text=TEXT_NO_LEGAL_FORM, + business_scope=["ecommerce"]))) + vsbg = _by_field(out, "verbraucher_streitbeilegung") + assert vsbg is not None + assert vsbg.status == CheckStatus.FAIL.value + assert vsbg.severity == Severity.MEDIUM.value