feat(agent): Impressum-Tab auf Haupt-Engine + Profil/§36-Fixes

Ergebnis-Tab rendert jetzt result.results (Haupt-Doc-Check) statt des
abweichenden v3-Agenten — BMW korrekt statt False Positives:
- DocResultView: ein Dokument als Pflichtangaben-Tabelle (Label + gefundener
  Text + 3-Tier-Status), KEINE MC-IDs. ComplianceResultTabs speist Tabs aus
  result.results; ChecklistView-Bausteine exportiert + wiederverwendet.
- profile_extractor: Firmenname/Rechtsform = fruehester Treffer + ausge-
  schriebene Formen (Aktiengesellschaft) -> BMW AG statt "juris GmbH".
- 36 VSBG (MC-010): reines b2c -> POSSIBLY_APPLICABLE (Pruef-Hinweis) statt
  MEDIUM-FAIL; hart nur bei ecommerce. possibly_hint pro MC.
- McCoverage traegt label + found (Snippet); mc_possibly-Aggregat.
- AgentFindingCard/Methodik: interne check_id/mc_id nicht mehr angezeigt.

Tests: test_four_status (16) + Frontend-Vitest gruen; CI-Suite 206, v3/GT
unveraendert. Nur eigene Dateien (geteilter Tree).

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
Benjamin Admin
2026-06-10 23:44:01 +02:00
parent a7dc12f30f
commit 3f23a64d5f
15 changed files with 450 additions and 187 deletions
@@ -58,9 +58,8 @@ export function AgentFindingCard({ f }: { f: Finding }) {
{statusLabel} {statusLabel}
</span> </span>
)} )}
<code className="text-[11px] text-gray-500">{f.check_id}</code>
{sources.map((s, i) => ( {sources.map((s, i) => (
<MethodikBadge key={i} src={s.source_type} sourceId={s.source_id} /> <MethodikBadge key={i} src={s.source_type} />
))} ))}
{f.confidence !== undefined && ( {f.confidence !== undefined && (
<span className="text-[10px] text-gray-500 ml-auto"> <span className="text-[10px] text-gray-500 ml-auto">
@@ -1,67 +0,0 @@
'use client'
/**
* "Was wurde geprüft" — listet alle MCs eines Agents mit ihrem Status.
* Standardmäßig collapsed; zeigt sofort, was Methodik des Agents war.
*/
import React, { useState } from 'react'
import type { McCoverage } from './_agentTypes'
const STATUS_COLOR: Record<string, string> = {
ok: '#10b981',
na: '#94a3b8',
skipped: '#cbd5e1',
high: '#dc2626',
medium: '#f59e0b',
low: '#3b82f6',
insufficient_evidence: '#64748b',
possibly_applicable: '#ca8a04',
}
const STATUS_LABEL: Record<string, string> = {
ok: 'OK',
na: 'n/a',
skipped: 'übersprungen',
high: 'HIGH',
medium: 'MEDIUM',
low: 'LOW',
insufficient_evidence: 'unklar',
possibly_applicable: 'evtl. relevant',
}
export function AgentMcCoverage({ coverage }: { coverage: McCoverage[] }) {
const [open, setOpen] = useState(false)
if (!coverage?.length) return null
return (
<div className="border rounded bg-slate-50">
<button
onClick={() => setOpen(o => !o)}
className="w-full text-left px-3 py-2 text-xs font-semibold uppercase text-gray-700 flex justify-between items-center"
>
<span>Was wurde geprüft? ({coverage.length} MCs)</span>
<span className="text-gray-400">{open ? '▾' : '▸'}</span>
</button>
{open && (
<div className="border-t bg-white p-2 space-y-0.5 max-h-60 overflow-y-auto">
{coverage.map(c => (
<div key={c.mc_id} className="flex items-center gap-2 text-xs">
<span
className="w-2 h-2 rounded-full inline-block"
style={{ background: STATUS_COLOR[c.status] || '#cbd5e1' }}
/>
<code className="text-gray-500">{c.mc_id}</code>
<span className="text-gray-700">
{STATUS_LABEL[c.status] || c.status}
</span>
{c.reason && (
<span className="text-gray-400 italic"> {c.reason}</span>
)}
</div>
))}
</div>
)}
</div>
)
}
@@ -0,0 +1,82 @@
'use client'
/**
* AgentPflichtTable — die geprüften Pflichtangaben als menschliche Tabelle:
* Status-Icon + Feldname + tatsächlich gefundener Text. Ersetzt die alte
* MC-ID-Liste.
*
* WICHTIG: zeigt NIE die mc_id (Reverse-Engineering-Schutz der MC-Bibliothek)
* — nur das menschliche `label`. Generisch für jeden Agenten verwendbar.
*/
import React from 'react'
import type { McCoverage } from './_agentTypes'
const DISP: Record<string, { icon: string; text: string; color: string }> = {
ok: { icon: '✓', text: 'vorhanden', color: '#16a34a' },
high: { icon: '✗', text: 'fehlt', color: '#dc2626' },
medium: { icon: '✗', text: 'fehlt', color: '#d97706' },
low: { icon: '✗', text: 'fehlt', color: '#2563eb' },
possibly_applicable: { icon: '?', text: 'zu prüfen', color: '#ca8a04' },
insufficient_evidence: { icon: '?', text: 'unklar', color: '#64748b' },
na: { icon: '', text: 'nicht anwendbar', color: '#94a3b8' },
skipped: { icon: '', text: 'nicht geprüft', color: '#cbd5e1' },
}
// Reihenfolge: Probleme zuerst, dann erfüllt, dann n/a.
const RANK: Record<string, number> = {
high: 0, medium: 1, low: 2, possibly_applicable: 3,
insufficient_evidence: 4, ok: 5, na: 6, skipped: 7,
}
export function AgentPflichtTable({ coverage }: { coverage: McCoverage[] }) {
if (!coverage?.length) return null
const rows = [...coverage].sort(
(a, b) => (RANK[a.status] ?? 9) - (RANK[b.status] ?? 9),
)
const count = (s: string) => coverage.filter(c => c.status === s).length
const ok = count('ok')
const fehlt = count('high') + count('medium') + count('low')
const pruefen = count('possibly_applicable') + count('insufficient_evidence')
const na = count('na') + count('skipped')
return (
<div className="border rounded overflow-hidden">
<div className="px-3 py-2 text-xs font-semibold uppercase text-gray-700 border-b bg-slate-50">
Pflichtangaben <span className="text-green-700">{ok} vorhanden</span>
{fehlt > 0 && <> · <span className="text-red-600">{fehlt} fehlt</span></>}
{pruefen > 0 && (
<> · <span className="text-yellow-700">{pruefen} zu prüfen</span></>
)}
{na > 0 && <> · <span className="text-gray-400">{na} n/a</span></>}
</div>
<div className="divide-y divide-gray-100">
{rows.map((c, i) => {
const d = DISP[c.status] || DISP.skipped
return (
<div key={i} className="flex items-start gap-2 px-3 py-1.5 text-xs">
<span
className="font-bold w-4 text-center shrink-0"
style={{ color: d.color }}
aria-label={d.text}
>
{d.icon}
</span>
<span className="font-medium text-gray-800 w-52 shrink-0">
{c.label || 'Angabe'}
</span>
<span className="text-gray-500 flex-1 min-w-0 break-words">
{c.status === 'ok' ? (
<span className="italic">{c.found || 'vorhanden'}</span>
) : (
<span style={{ color: d.color }}>{d.text}</span>
)}
</span>
</div>
)
})}
</div>
</div>
)
}
@@ -15,7 +15,7 @@ import React, { useState } from 'react'
import type { Severity, SlotOutput } from './_agentTypes' import type { Severity, SlotOutput } from './_agentTypes'
import { AgentFindingCard } from './AgentFindingCard' import { AgentFindingCard } from './AgentFindingCard'
import { AgentMcCoverage } from './AgentMcCoverage' import { AgentPflichtTable } from './AgentPflichtTable'
import { AgentRecommendationCard } from './AgentRecommendationCard' import { AgentRecommendationCard } from './AgentRecommendationCard'
import { AgentSpeedometer } from './AgentSpeedometer' import { AgentSpeedometer } from './AgentSpeedometer'
@@ -42,7 +42,7 @@ export function AgentResultView({ output }: { output: SlotOutput }) {
</div> </div>
)} )}
<AgentMcCoverage coverage={output.mc_coverage} /> <AgentPflichtTable coverage={output.mc_coverage} />
<AgentSpeedometer <AgentSpeedometer
total={output.mc_total} total={output.mc_total}
@@ -2,7 +2,7 @@
import React, { useState } from 'react' import React, { useState } from 'react'
interface CheckItem { export interface CheckItem {
id: string id: string
label: string label: string
passed: boolean passed: boolean
@@ -14,7 +14,7 @@ interface CheckItem {
hint?: string hint?: string
} }
interface DocResult { export interface DocResult {
label: string label: string
url: string url: string
doc_type: string doc_type: string
@@ -27,14 +27,14 @@ interface DocResult {
scenario?: string // regenerate | fix | import | skip scenario?: string // regenerate | fix | import | skip
} }
const SCENARIO_LABELS: Record<string, { label: string; color: string; bg: string }> = { export const SCENARIO_LABELS: Record<string, { label: string; color: string; bg: string }> = {
regenerate: { label: 'Neugenerierung', color: 'text-red-700', bg: 'bg-red-100' }, regenerate: { label: 'Neugenerierung', color: 'text-red-700', bg: 'bg-red-100' },
fix: { label: 'Korrekturen', color: 'text-amber-700', bg: 'bg-amber-100' }, fix: { label: 'Korrekturen', color: 'text-amber-700', bg: 'bg-amber-100' },
import: { label: 'Konform', color: 'text-green-700', bg: 'bg-green-100' }, import: { label: 'Konform', color: 'text-green-700', bg: 'bg-green-100' },
missing: { label: 'Fehlt', color: 'text-gray-600', bg: 'bg-gray-100' }, missing: { label: 'Fehlt', color: 'text-gray-600', bg: 'bg-gray-100' },
} }
const DOC_TYPE_LABELS: Record<string, string> = { export const DOC_TYPE_LABELS: Record<string, string> = {
dse: 'DSI', agb: 'AGB', impressum: 'Impressum', dse: 'DSI', agb: 'AGB', impressum: 'Impressum',
cookie: 'Cookie', widerruf: 'Widerruf', other: 'Sonstiges', cookie: 'Cookie', widerruf: 'Widerruf', other: 'Sonstiges',
social_media: 'Social Media', dsfa: 'DSFA', joint_controller: 'Art. 26', social_media: 'Social Media', dsfa: 'DSFA', joint_controller: 'Art. 26',
@@ -46,7 +46,7 @@ interface GroupedCheck {
children: CheckItem[] children: CheckItem[]
} }
function groupChecks(checks: CheckItem[]): GroupedCheck[] { export function groupChecks(checks: CheckItem[]): GroupedCheck[] {
const l1 = checks.filter(c => (c.level ?? 1) === 1) const l1 = checks.filter(c => (c.level ?? 1) === 1)
return l1.map(c => ({ return l1.map(c => ({
check: c, check: c,
@@ -54,7 +54,7 @@ function groupChecks(checks: CheckItem[]): GroupedCheck[] {
})) }))
} }
function CheckIcon({ passed, skipped, isInfo }: { passed: boolean; skipped?: boolean; isInfo?: boolean }) { export function CheckIcon({ passed, skipped, isInfo }: { passed: boolean; skipped?: boolean; isInfo?: boolean }) {
if (skipped) { if (skipped) {
return ( return (
<svg className="w-4 h-4 text-gray-300 mt-0.5 shrink-0" fill="none" stroke="currentColor" viewBox="0 0 24 24"> <svg className="w-4 h-4 text-gray-300 mt-0.5 shrink-0" fill="none" stroke="currentColor" viewBox="0 0 24 24">
@@ -10,21 +10,16 @@
import React, { useState } from 'react' import React, { useState } from 'react'
import type { SlotOutput } from './_agentTypes' import { ChecklistView, DOC_TYPE_LABELS, type DocResult } from './ChecklistView'
import { AgentResultTab } from './AgentResultTab' import { DocResultView } from './DocResultView'
import { ChecklistView } from './ChecklistView'
import { MigrationPanel } from './MigrationPanel' import { MigrationPanel } from './MigrationPanel'
const TOPIC_LABELS: Record<string, string> = {
impressum: 'Impressum',
cookie: 'Cookie-Banner',
}
export function ComplianceResultTabs({ results }: { results: any }) { export function ComplianceResultTabs({ results }: { results: any }) {
const agentOutputs: Record<string, SlotOutput> = results.agent_outputs || {} // Themen-Tabs aus der HAUPT-Engine (result.results) — nicht aus dem
const topicKeys = Object.keys(agentOutputs) // v3-Agent. Jedes Dokument = ein Tab mit der genauen Pflichtangaben-Tabelle.
const tabs = [...topicKeys, 'raw'] const docs: DocResult[] = results.results || []
const [active, setActive] = useState<string>(tabs[0]) const tabs = docs.map((_: DocResult, i: number) => String(i)).concat('raw')
const [active, setActive] = useState<string>(tabs[0] ?? 'raw')
return ( return (
<div className="bg-white border border-gray-200 rounded-xl p-6 shadow-sm space-y-4"> <div className="bg-white border border-gray-200 rounded-xl p-6 shadow-sm space-y-4">
@@ -112,26 +107,30 @@ export function ComplianceResultTabs({ results }: { results: any }) {
</div> </div>
)} )}
{/* Tab-Leiste — Themen-Agenten + Roh-Checkliste */} {/* Tab-Leiste — ein Tab je Dokument (Haupt-Engine) + Übersicht */}
<div className="flex gap-1 border-b border-gray-200 flex-wrap"> <div className="flex gap-1 border-b border-gray-200 flex-wrap">
{tabs.map(t => { {tabs.map(t => {
const count = t !== 'raw' ? (agentOutputs[t]?.findings?.length ?? 0) : 0 const tabClass = `px-3 py-1.5 text-sm font-medium border-b-2 -mb-px transition-colors flex items-center gap-1.5 ${
active === t
? 'border-purple-500 text-purple-700'
: 'border-transparent text-gray-500 hover:text-gray-700'
}`
if (t === 'raw') {
return (
<button key={t} onClick={() => setActive(t)} className={tabClass}>
Alle Checks
</button>
)
}
const doc = docs[Number(t)]
const dot = doc.error ? 'bg-gray-300'
: doc.scenario === 'import' ? 'bg-green-500'
: doc.scenario === 'fix' ? 'bg-amber-500'
: doc.scenario === 'regenerate' ? 'bg-red-500' : 'bg-gray-400'
return ( return (
<button <button key={t} onClick={() => setActive(t)} className={tabClass}>
key={t} <span className={`w-2 h-2 rounded-full ${dot}`} />
onClick={() => setActive(t)} {DOC_TYPE_LABELS[doc.doc_type] || doc.doc_type}
className={`px-3 py-1.5 text-sm font-medium border-b-2 -mb-px transition-colors ${
active === t
? 'border-purple-500 text-purple-700'
: 'border-transparent text-gray-500 hover:text-gray-700'
}`}
>
{t === 'raw' ? 'Alle Checks (roh)' : (TOPIC_LABELS[t] || t)}
{count > 0 && (
<span className="ml-1.5 text-xs bg-gray-100 text-gray-600 rounded-full px-1.5">
{count}
</span>
)}
</button> </button>
) )
})} })}
@@ -140,11 +139,8 @@ export function ComplianceResultTabs({ results }: { results: any }) {
{/* Tab-Inhalt */} {/* Tab-Inhalt */}
{active === 'raw' ? ( {active === 'raw' ? (
<ChecklistView results={results.results} /> <ChecklistView results={results.results} />
) : agentOutputs[active] ? ( ) : docs[Number(active)] ? (
<AgentResultTab <DocResultView doc={docs[Number(active)]} />
topicLabel={TOPIC_LABELS[active] || active}
output={agentOutputs[active]}
/>
) : null} ) : null}
{/* Check-Footer (themenübergreifend) */} {/* Check-Footer (themenübergreifend) */}
@@ -0,0 +1,144 @@
'use client'
/**
* DocResultView — EIN Dokument-Prüfergebnis der HAUPT-Engine als saubere,
* immer-offene Pflichtangaben-Tabelle: Verdikt + Gruppen + extrahierte Texte
* (matched_text) pro Prüfpunkt.
*
* Quelle = result.results[doc] (die genaue Haupt-Doc-Check-Engine), NICHT
* der v3-Agent. Zeigt menschliche Labels + gefundene Snippets, keine internen
* IDs. Wiederverwendet die Render-Bausteine aus ChecklistView.
*/
import React from 'react'
import {
CheckIcon,
type DocResult,
groupChecks,
SCENARIO_LABELS,
} from './ChecklistView'
function Snippet({ text }: { text: string }) {
return (
<div className="text-xs text-gray-500 mt-0.5 font-mono break-words">
{text}"
</div>
)
}
function ScoreBar({ label, pct, blue }: { label: string; pct: number; blue?: boolean }) {
const color = blue
? pct >= 80 ? 'bg-blue-400' : 'bg-blue-300'
: pct === 100 ? 'bg-green-500' : pct >= 50 ? 'bg-yellow-500' : 'bg-red-500'
return (
<div className="flex items-center gap-1.5">
<span className="text-[10px] text-gray-400">{label}</span>
<div className="w-12 h-1.5 bg-gray-200 rounded-full overflow-hidden">
<div className={`h-full rounded-full ${color}`} style={{ width: `${pct}%` }} />
</div>
<span className="text-gray-600 w-9 text-right">{pct}%</span>
</div>
)
}
export function DocResultView({ doc }: { doc: DocResult }) {
if (doc.error) {
return (
<div className="text-sm text-amber-700 bg-amber-50 rounded p-3">
{doc.error}
</div>
)
}
const grouped = groupChecks(doc.checks)
const l1 = doc.checks.filter(c => (c.level ?? 1) === 1)
const l1Score = l1.filter(c => c.severity !== 'INFO')
const l1Passed = l1Score.filter(c => c.passed).length
const l2 = doc.checks.filter(c => (c.level ?? 1) === 2 && !c.skipped)
const l2Passed = l2.filter(c => c.passed).length
const sc = doc.scenario ? SCENARIO_LABELS[doc.scenario] : null
return (
<div className="space-y-3">
{/* Verdikt-Kopf */}
<div className="flex items-center flex-wrap gap-3 border rounded-lg px-4 py-3 bg-slate-50">
{sc && (
<span className={`text-xs font-semibold px-2 py-0.5 rounded-full ${sc.bg} ${sc.color}`}>
{sc.label}
</span>
)}
<span className="text-sm text-gray-700">
{l1Passed}/{l1Score.length} Pflichtangaben
{l2.length > 0 && <>, {l2Passed}/{l2.length} Detailprüfungen</>}
</span>
<div className="flex gap-3 ml-auto">
<ScoreBar label="Pflicht" pct={doc.completeness_pct} />
{l2.length > 0 && (
<ScoreBar label="Detail" pct={doc.correctness_pct ?? 0} blue />
)}
</div>
</div>
{/* Pflichtangaben-Tabelle */}
<div className="border rounded-lg divide-y divide-gray-100">
{grouped.map(g => {
const l1Info = g.check.severity === 'INFO' && !g.check.passed
return (
<div key={g.check.id} className="px-4 py-2">
<div className="flex items-start gap-2">
<CheckIcon passed={g.check.passed} isInfo={l1Info} />
<div className="flex-1 min-w-0">
<div className={`text-sm ${
g.check.passed ? 'text-gray-800'
: l1Info ? 'text-gray-500' : 'text-red-700 font-medium'
}`}>
{g.check.label}
</div>
{g.check.passed && g.check.matched_text && g.children.length === 0 && (
<Snippet text={g.check.matched_text} />
)}
{!g.check.passed && g.check.hint && (
<div className={`text-xs mt-0.5 ${l1Info ? 'text-gray-400' : 'text-red-600/80'}`}>
{g.check.hint}
</div>
)}
</div>
</div>
{g.children.length > 0 && (
<div className="ml-6 mt-1 space-y-1 border-l-2 border-gray-200 pl-3">
{g.children.map(ch => {
const chInfo = ch.severity === 'INFO' && !ch.passed && !ch.skipped
return (
<div key={ch.id} className="flex items-start gap-2">
<CheckIcon passed={ch.passed} skipped={ch.skipped} isInfo={chInfo} />
<div className="flex-1 min-w-0">
<div className={`text-xs ${
ch.skipped ? 'text-gray-400 italic'
: ch.passed ? 'text-gray-600'
: chInfo ? 'text-gray-400' : 'text-red-600 font-medium'
}`}>
{ch.label}{ch.skipped && ' (übersprungen)'}
</div>
{ch.passed && ch.matched_text && <Snippet text={ch.matched_text} />}
{!ch.passed && !ch.skipped && ch.hint && (
<div className={`text-xs mt-0.5 ${chInfo ? 'text-gray-400' : 'text-red-500/80'}`}>
{ch.hint}
</div>
)}
</div>
</div>
)
})}
</div>
)}
</div>
)
})}
</div>
{doc.word_count > 0 && (
<div className="text-xs text-gray-400">{doc.word_count} Wörter analysiert</div>
)}
</div>
)
}
@@ -0,0 +1,29 @@
import { describe, it, expect } from 'vitest'
import { render, screen } from '@testing-library/react'
import { AgentPflichtTable } from '../AgentPflichtTable'
import type { McCoverage } from '../_agentTypes'
const COV: McCoverage[] = [
{ mc_id: 'IMP-MC-002', status: 'ok', label: 'Email-Adresse',
found: 'kundenbetreuung@bmw.de' },
{ mc_id: 'IMP-MC-010', status: 'possibly_applicable',
label: 'Verbraucher-Streitbeilegung-Hinweis' },
{ mc_id: 'IMP-MC-009', status: 'na', label: 'Verantwortlicher § 18 MStV' },
]
describe('AgentPflichtTable', () => {
it('zeigt Label + gefundenen Wert, aber KEINE mc_id', () => {
render(<AgentPflichtTable coverage={COV} />)
expect(screen.getByText('Email-Adresse')).toBeInTheDocument()
expect(screen.getByText('kundenbetreuung@bmw.de')).toBeInTheDocument()
// Reverse-Engineering-Schutz: mc_id darf NICHT erscheinen.
expect(screen.queryByText(/IMP-MC-/)).not.toBeInTheDocument()
})
it('Verdikt-Header zählt die Status', () => {
render(<AgentPflichtTable coverage={COV} />)
expect(screen.getByText(/1 vorhanden/)).toBeInTheDocument()
expect(screen.getByText(/1 zu prüfen/)).toBeInTheDocument()
})
})
@@ -69,27 +69,32 @@ describe('AgentResultTab', () => {
}) })
const DOC_RESULT = { const DOC_RESULT = {
label: 'Impressum-Rohdaten', url: 'https://example.com/impressum', label: 'Impressum', url: 'https://example.com/impressum',
doc_type: 'impressum', word_count: 50, completeness_pct: 80, doc_type: 'impressum', word_count: 50, completeness_pct: 100,
correctness_pct: 90, checks: [], findings_count: 2, error: '', correctness_pct: 100, findings_count: 0, error: '', scenario: 'import',
checks: [
{ id: 'name', label: 'Name des Anbieters', passed: true, severity: 'HIGH',
matched_text: 'Bayerische Motoren Werke Aktiengesellschaft', level: 1 },
{ id: 'email', label: 'E-Mail-Adresse', passed: true, severity: 'HIGH',
matched_text: 'kundenbetreuung@bmw.de', level: 1 },
],
} }
describe('ComplianceResultTabs', () => { describe('ComplianceResultTabs', () => {
it('zeigt den Impressum-Tab zuerst und wechselt auf die Roh-Checkliste', () => { it('rendert das Dokument-Tab der Haupt-Engine mit extrahierten Texten', () => {
const result = { // Themen-Tabs kommen aus result.results (Haupt-Engine), NICHT agent_outputs.
agent_outputs: { impressum: IMPRESSUM_OUTPUT }, const result = { results: [DOC_RESULT] }
results: [DOC_RESULT],
}
render(<ComplianceResultTabs results={result} />) render(<ComplianceResultTabs results={result} />)
// beide Tabs vorhanden // Dokument-Tab + Übersicht
expect(screen.getByRole('button', { name: /Impressum/ })).toBeInTheDocument() expect(screen.getByRole('button', { name: /Impressum/ })).toBeInTheDocument()
expect(screen.getByRole('button', { name: /Alle Checks \(roh\)/ })).toBeInTheDocument() expect(screen.getByRole('button', { name: /Alle Checks/ })).toBeInTheDocument()
// Impressum-Tab aktiv → Finding sichtbar // DocResultView: menschliches Label + gefundener Text sichtbar
expect(screen.getByText('Pflichtangabe fehlt: Email-Adresse')).toBeInTheDocument() expect(screen.getByText('Name des Anbieters')).toBeInTheDocument()
// Wechsel auf die Roh-Checkliste expect(screen.getByText(/Bayerische Motoren Werke/)).toBeInTheDocument()
fireEvent.click(screen.getByRole('button', { name: /Alle Checks \(roh\)/ })) // Wechsel auf die Übersicht
// Impressum-Finding ist weg (umgeschaltet), Komponente intakt fireEvent.click(screen.getByRole('button', { name: /Alle Checks/ }))
expect(screen.queryByText('Pflichtangabe fehlt: Email-Adresse')).not.toBeInTheDocument() expect(
expect(screen.getByRole('button', { name: /Alle Checks \(roh\)/ })).toBeInTheDocument() screen.getByText(/Dokumenten-Pruefung/),
).toBeInTheDocument()
}) })
}) })
@@ -63,8 +63,10 @@ export interface Recommendation {
export interface McCoverage { export interface McCoverage {
mc_id: string mc_id: string
status: 'ok' | 'na' | 'high' | 'medium' | 'low' | 'skipped' | status: 'ok' | 'na' | 'high' | 'medium' | 'low' | 'skipped' |
'insufficient_evidence' 'insufficient_evidence' | 'possibly_applicable'
reason?: string reason?: string
label?: string // menschlicher Feldname (KEINE mc_id im Frontend zeigen)
found?: string // gefundener Text/Wert bei status=ok
} }
export interface EscalationLog { export interface EscalationLog {
@@ -10,13 +10,14 @@ Returns a dict that maps to CompanyProfile and ScopeProfilingAnswer fields.
import logging import logging
import re import re
from typing import Optional
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
def extract_profile_from_documents( def extract_profile_from_documents(
doc_texts: dict[str, str], doc_texts: dict[str, str],
business_profile: dict | None = None, business_profile: Optional[dict] = None,
) -> dict: ) -> dict:
"""Extract Company Profile fields from document texts. """Extract Company Profile fields from document texts.
@@ -100,28 +101,38 @@ def _extract_company_info(text: str, result: dict) -> None:
"""Extract company name, legal form, address from text.""" """Extract company name, legal form, address from text."""
cp = result["company_profile"] cp = result["company_profile"]
# GmbH / AG / UG / e.K. etc. # Rechtsform + Firmenname. Die Reihenfolge der Muster ist NICHT die
legal_forms = { # Priorität — wir nehmen den FRUEHESTEN Treffer im Text: ein Impressum
r"(\S+(?:\s+\S+){0,4})\s+gmbh\b": ("GmbH", "gmbh"), # nennt den Betreiber zuerst; spätere Erwähnungen (z.B. "juris GmbH" im
r"(\S+(?:\s+\S+){0,4})\s+ag\b": ("AG", "ag"), # Hinweis auf gesetze-im-internet.de) sind nicht der Anbieter. Ausge-
r"(\S+(?:\s+\S+){0,4})\s+ug\b": ("UG", "ug"), # schriebene Formen ("Aktiengesellschaft") zählen mit (sonst wird BMW AG
r"(\S+(?:\s+\S+){0,4})\s+e\.?\s*k\.?\b": ("e.K.", "ek"), # nicht erkannt und faelschlich die naechste GmbH gegriffen).
r"(\S+(?:\s+\S+){0,4})\s+gbr\b": ("GbR", "gbr"), legal_forms = [
r"(\S+(?:\s+\S+){0,4})\s+ohg\b": ("OHG", "ohg"), (r"(\S+(?:\s+\S+){0,4})\s+gmbh\s*&\s*co\.?\s*kg\b", "gmbh_co_kg"),
r"(\S+(?:\s+\S+){0,4})\s+gmbh\s*&\s*co\.?\s*kg": ("GmbH & Co. KG", "gmbh_co_kg"), (r"(\S+(?:\s+\S+){0,4})\s+(?:aktiengesellschaft|ag)\b", "ag"),
} (r"(\S+(?:\s+\S+){0,4})\s+(?:unternehmergesellschaft|ug)\b", "ug"),
(r"(\S+(?:\s+\S+){0,4})\s+gmbh\b", "gmbh"),
(r"(\S+(?:\s+\S+){0,4})\s+e\.?\s*k\.?\b", "ek"),
(r"(\S+(?:\s+\S+){0,4})\s+gbr\b", "gbr"),
(r"(\S+(?:\s+\S+){0,4})\s+ohg\b", "ohg"),
]
text_lower = text.lower() text_lower = text.lower()
for pattern, (form_label, form_id) in legal_forms.items(): best = None # (start, end, form_id) — frühester Treffer
for pattern, form_id in legal_forms:
m = re.search(pattern, text_lower) m = re.search(pattern, text_lower)
if m: # frühester Treffer gewinnt; bei Gleichstand die Listen-Reihenfolge
raw_name = m.group(0).strip() # (GmbH & Co. KG vor GmbH).
# Clean up: take from uppercase start if m and (best is None or m.start() < best[0]):
for i, ch in enumerate(text[m.start():m.end()]): best = (m.start(), m.end(), form_id)
if ch.isupper(): if best:
cp["companyName"] = text[m.start() + i:m.end()].strip() start, end, form_id = best
break # Firmenname ab dem ersten Grossbuchstaben im Treffer (schneidet
cp["legalForm"] = form_id # führende Kleinwörter wie "von der" ab).
break for i, ch in enumerate(text[start:end]):
if ch.isupper():
cp["companyName"] = text[start + i:end].strip()
break
cp["legalForm"] = form_id
# PLZ + Ort # PLZ + Ort
plz_match = re.search( plz_match = re.search(
@@ -107,6 +107,11 @@ class McCoverage(BaseModel):
mc_id: str mc_id: str
status: str status: str
reason: str = "" reason: str = ""
# Menschlicher Feldname (für die Pflichtangaben-Tabelle im Frontend —
# NICHT die mc_id zeigen, sonst Reverse-Engineering der MC-Bibliothek).
label: str = ""
# Der tatsächlich gefundene Text/Wert (Snippet) bei status=ok.
found: str = ""
class EscalationLog(BaseModel): class EscalationLog(BaseModel):
@@ -78,6 +78,25 @@ def _build_measure(label: str, norm: str) -> str:
return msg return msg
def _line_of(text: str, start_pos: int, end_pos: int) -> str:
"""Die Zeile um einen Regex-Treffer — als 'gefundener Wert' für die
Pflichtangaben-Tabelle. Gekappt + bereinigt."""
start = text.rfind("\n", 0, start_pos) + 1
end = text.find("\n", end_pos)
if end == -1:
end = len(text)
return " ".join(text[start:end].split())[:160]
def _coverage(mc, status: str, reason: str, found: str = "") -> McCoverage:
"""McCoverage mit menschlichem Label (mc.label) — das Frontend zeigt
NIE die mc_id (Reverse-Engineering-Schutz)."""
return McCoverage(
mc_id=mc.mc_id, status=status, reason=reason,
label=mc.label, found=found,
)
class ImpressumAgent(BaseSpecialistAgent): class ImpressumAgent(BaseSpecialistAgent):
agent_id = "impressum" agent_id = "impressum"
agent_version = "3.0" agent_version = "3.0"
@@ -103,10 +122,7 @@ class ImpressumAgent(BaseSpecialistAgent):
if len(text) < 100: if len(text) < 100:
# Doc zu kurz — alle eigenen Pattern-IDs als skipped # Doc zu kurz — alle eigenen Pattern-IDs als skipped
for mc in MCS: for mc in MCS:
coverage.append(McCoverage( coverage.append(_coverage(mc, "skipped", "text too short"))
mc_id=mc.mc_id, status="skipped",
reason="text too short",
))
return self._finalize( return self._finalize(
start, findings, esc_logs, coverage, start, findings, esc_logs, coverage,
confidence=0.0, confidence=0.0,
@@ -129,27 +145,35 @@ class ImpressumAgent(BaseSpecialistAgent):
for mc in MCS: for mc in MCS:
disp = scope_disposition(mc, scope, is_auto) disp = scope_disposition(mc, scope, is_auto)
if disp == "na": if disp == "na":
coverage.append(McCoverage( coverage.append(_coverage(
mc_id=mc.mc_id, status="na", mc, "na", "nicht anwendbar (Rechtsform/Branche)"))
reason="nicht anwendbar (Rechtsform/Branche)",
))
continue continue
if any(p.search(text) for p in mc.patterns): matched = None
coverage.append(McCoverage( for p in mc.patterns:
mc_id=mc.mc_id, status="ok", reason="Pattern-Treffer", m = p.search(text)
if m:
matched = m
break
if matched is not None:
coverage.append(_coverage(
mc, "ok", "Pattern-Treffer",
found=_line_of(text, matched.start(), matched.end()),
)) ))
continue continue
if mc.optional: if mc.optional:
# fehlt + optional → KEIN Finding (z.B. USt-IdNr; # fehlt + optional → KEIN Finding (z.B. USt-IdNr;
# Kleinunternehmer §19 haben legitim keine). # Kleinunternehmer §19 haben legitim keine).
coverage.append(McCoverage( coverage.append(_coverage(
mc_id=mc.mc_id, status="na", mc, "na", "optional — nicht angegeben"))
reason="optional — nicht angegeben",
))
continue continue
if disp == "possible": if disp == "possible":
# Graubereich (z.B. Corporate-Blog → §18 MStV evtl.) # Graubereich (z.B. Corporate-Blog → §18, OEM-Markenseite
# POSSIBLY_APPLICABLE: Pruef-Hinweis (LOW), kein Verstoss. # §36 VSBG) → POSSIBLY_APPLICABLE: Pruef-Hinweis (LOW),
# kein Verstoss. Hinweistext kommt MC-spezifisch.
hint = mc.possibly_hint or (
f"Diese Angabe ist nur situativ Pflicht ({mc.norm}). "
"Bitte prüfen, ob sie auf Ihre Seite zutrifft."
)
findings.append(Finding( findings.append(Finding(
check_id=f"IMP-{mc.field_id}", check_id=f"IMP-{mc.field_id}",
agent=self.agent_id, agent=self.agent_id,
@@ -161,12 +185,7 @@ class ImpressumAgent(BaseSpecialistAgent):
title=f"{mc.label}: ggf. relevant — manuell prüfen", title=f"{mc.label}: ggf. relevant — manuell prüfen",
norm=mc.norm, norm=mc.norm,
evidence="", evidence="",
action=( action=hint,
"Bei journalistisch-redaktionellen Inhalten "
"(Nachrichten/Magazin) ist ein Verantwortlicher nach "
"§ 18 MStV anzugeben. Bei reinem Corporate-Blog meist "
"nicht erforderlich — bitte prüfen."
),
confidence=0.5, confidence=0.5,
sources=[EvidenceSource( sources=[EvidenceSource(
source_type=SourceType.REGEX, source_type=SourceType.REGEX,
@@ -175,10 +194,8 @@ class ImpressumAgent(BaseSpecialistAgent):
confidence=0.5, confidence=0.5,
)], )],
)) ))
coverage.append(McCoverage( coverage.append(_coverage(
mc_id=mc.mc_id, status="possibly_applicable", mc, "possibly_applicable", "Graubereich — manuelle Prüfung"))
reason="Graubereich — manuelle Prüfung",
))
continue continue
if mc.legal_form_dependent and not form_known: if mc.legal_form_dependent and not form_known:
# Rechtsform unbestimmt → kein hartes FAIL, sondern # Rechtsform unbestimmt → kein hartes FAIL, sondern
@@ -207,10 +224,8 @@ class ImpressumAgent(BaseSpecialistAgent):
confidence=0.4, confidence=0.4,
)], )],
)) ))
coverage.append(McCoverage( coverage.append(_coverage(
mc_id=mc.mc_id, status="insufficient_evidence", mc, "insufficient_evidence", "Rechtsform unbestimmt"))
reason="Rechtsform unbestimmt",
))
continue continue
sev = _SEV_TO_ENUM.get(mc.severity_if_missing, Severity.MEDIUM) sev = _SEV_TO_ENUM.get(mc.severity_if_missing, Severity.MEDIUM)
findings.append(Finding( findings.append(Finding(
@@ -233,10 +248,8 @@ class ImpressumAgent(BaseSpecialistAgent):
confidence=0.9, confidence=0.9,
)], )],
)) ))
coverage.append(McCoverage( coverage.append(_coverage(
mc_id=mc.mc_id, status=sev.value.lower(), mc, sev.value.lower(), "kein Pattern-Treffer"))
reason="kein Pattern-Treffer",
))
n_fail = sum(1 for f in findings n_fail = sum(1 for f in findings
if f.status == CheckStatus.FAIL.value) if f.status == CheckStatus.FAIL.value)
n_unklar = sum(1 for f in findings n_unklar = sum(1 for f in findings
@@ -40,6 +40,9 @@ class MC:
# ist die MC NICHT hart anwendbar, sondern POSSIBLY_APPLICABLE — Pruef- # ist die MC NICHT hart anwendbar, sondern POSSIBLY_APPLICABLE — Pruef-
# Hinweis (severity LOW) statt FAIL. Z.B. Corporate-Blog (§18 MStV evtl.). # Hinweis (severity LOW) statt FAIL. Z.B. Corporate-Blog (§18 MStV evtl.).
possibly_applies_scope: tuple[str, ...] = field(default_factory=tuple) possibly_applies_scope: tuple[str, ...] = field(default_factory=tuple)
# MC-spezifischer Pruef-Hinweis fuer den POSSIBLY_APPLICABLE-Fall
# (warum Graubereich + was der Nutzer pruefen soll).
possibly_hint: str = ""
MCS: tuple[MC, ...] = ( MCS: tuple[MC, ...] = (
@@ -182,6 +185,11 @@ MCS: tuple[MC, ...] = (
severity_if_missing="MEDIUM", severity_if_missing="MEDIUM",
requires_scope=("editorial",), requires_scope=("editorial",),
possibly_applies_scope=("editorial_possible",), possibly_applies_scope=("editorial_possible",),
possibly_hint=(
"Bei journalistisch-redaktionellen Inhalten (Nachrichten/Magazin) "
"ist ein Verantwortlicher nach § 18 MStV anzugeben. Bei reinem "
"Corporate-Blog meist nicht erforderlich — bitte prüfen."
),
patterns=(re.compile( patterns=(re.compile(
r"(?:Verantwortlich(?:er|e)?\s+(?:f(?:ue|ü)r|i\.S\.d\.|" r"(?:Verantwortlich(?:er|e)?\s+(?:f(?:ue|ü)r|i\.S\.d\.|"
r"nach|gem(?:ae|ä)ß)\s+§\s*18|" r"nach|gem(?:ae|ä)ß)\s+§\s*18|"
@@ -194,9 +202,17 @@ MCS: tuple[MC, ...] = (
mc_id="IMP-MC-010", mc_id="IMP-MC-010",
field_id="verbraucher_streitbeilegung", field_id="verbraucher_streitbeilegung",
label="Verbraucher-Streitbeilegung-Hinweis", label="Verbraucher-Streitbeilegung-Hinweis",
norm="§ 36 VSBG (B2C-Anbieter Pflicht)", norm="§ 36 VSBG (Verbraucherverträge über die Website)",
severity_if_missing="MEDIUM", severity_if_missing="MEDIUM",
requires_scope=("ecommerce", "b2c"), # Hart nur bei echtem Online-Verkauf; reine B2C-Orientierung (z.B.
# OEM-Markenseite, Verkauf über Händler) = Graubereich → Prüf-Hinweis.
requires_scope=("ecommerce",),
possibly_applies_scope=("b2c",),
possibly_hint=(
"§ 36 VSBG gilt, wenn auf dieser Seite Verbraucherverträge "
"geschlossen werden. Bei reiner Marken-/Info-Seite (Verkauf über "
"Händler/Vertragspartner) meist nicht erforderlich — bitte prüfen."
),
patterns=(re.compile( patterns=(re.compile(
r"(?:Verbraucherschlichtungs|VSBG|" r"(?:Verbraucherschlichtungs|VSBG|"
r"Streitbeilegung|" r"Streitbeilegung|"
@@ -178,6 +178,7 @@ def test_editorial_possible_yields_possibly_applicable():
assert red.status == CheckStatus.POSSIBLY_APPLICABLE.value assert red.status == CheckStatus.POSSIBLY_APPLICABLE.value
assert red.severity == Severity.LOW.value assert red.severity == Severity.LOW.value
assert out.mc_possibly >= 1 assert out.mc_possibly >= 1
assert "§ 18 MStV" in red.action # MC-spezifischer Hinweis
def test_editorial_absent_is_not_applicable(): def test_editorial_absent_is_not_applicable():
@@ -193,3 +194,30 @@ def test_derive_scope_editorial_tiers():
# Medienunternehmen gewinnt — nicht beide Tokens. # Medienunternehmen gewinnt — nicht beide Tokens.
s = _derive_scope({"industry": "media", "has_editorial_content": True}) s = _derive_scope({"industry": "media", "has_editorial_content": True})
assert "editorial" in s and "editorial_possible" not in s assert "editorial" in s and "editorial_possible" not in s
# ── §36 VSBG Graubereich (BMW-Fall): reines b2c ≠ harter Verstoß ────
def test_vsbg_b2c_is_possibly_applicable():
# Reine B2C-Orientierung (z.B. OEM-Markenseite, Verkauf über Händler) →
# §36 VSBG = Graubereich, KEIN MEDIUM-FAIL (BMW-False-Positive-Fix).
out = asyncio.run(ImpressumAgent().evaluate(AgentInput(
doc_type="impressum", text=TEXT_NO_LEGAL_FORM,
business_scope=["b2c"])))
vsbg = _by_field(out, "verbraucher_streitbeilegung")
assert vsbg is not None
assert vsbg.status == CheckStatus.POSSIBLY_APPLICABLE.value
assert vsbg.severity == Severity.LOW.value
assert "VSBG" in vsbg.action # §36-Hinweis, nicht §18
def test_vsbg_ecommerce_is_hard_fail():
# Echter Online-Shop (ecommerce) → §36 VSBG harte Pflicht (MEDIUM).
out = asyncio.run(ImpressumAgent().evaluate(AgentInput(
doc_type="impressum", text=TEXT_NO_LEGAL_FORM,
business_scope=["ecommerce"])))
vsbg = _by_field(out, "verbraucher_streitbeilegung")
assert vsbg is not None
assert vsbg.status == CheckStatus.FAIL.value
assert vsbg.severity == Severity.MEDIUM.value