From 0d0e705117cec45979e02d5a1f620b81be54bb25 Mon Sep 17 00:00:00 2001 From: Benjamin Admin Date: Mon, 11 May 2026 20:56:10 +0200 Subject: [PATCH] =?UTF-8?q?feat:=20Unified=20Compliance-Check=20=E2=80=94?= =?UTF-8?q?=208=20document=20types=20in=20one=20form?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit New 3-tab structure: Website-Scan, Compliance-Check, Banner-Check. Compliance-Check Tab (replaces Dokumenten-Pruefung + Impressum-Check): - 8 document rows: DSI, Impressum, Social Media, Cookie, AGB, Nutzungsbedingungen, Widerruf, DSB-Kontakt - Each row: URL input + "Text laden" + file upload + manual text - "Text laden" extracts via consent-tester, shows in editable textarea - User verifies/corrects text before checking - Empty fields = "not present" → own finding Business Profiler (business_profiler.py): - Detects B2B/B2C/B2G from all documents together - Recognizes regulated professions, online shops, editorial content - Context-aware: INFO checks become PASS/FAIL based on profile Backend: /compliance-check + /extract-text endpoints Frontend: ComplianceCheckTab.tsx + DocumentRow.tsx API proxies: compliance-check/route.ts + extract-text/route.ts Also: Impressum regex fixes (Telefon, AG, Geschaeftsfuehrung) and INFO severity for context-dependent checks. Co-Authored-By: Claude Opus 4.6 (1M context) --- .../sdk/v1/agent/compliance-check/route.ts | 39 ++ .../api/sdk/v1/agent/extract-text/route.ts | 27 ++ .../agent/_components/ComplianceCheckTab.tsx | 352 ++++++++++++++ .../app/sdk/agent/_components/DocumentRow.tsx | 163 +++++++ admin-compliance/app/sdk/agent/page.tsx | 13 +- .../api/agent_compliance_check_routes.py | 439 ++++++++++++++++++ .../compliance/services/business_profiler.py | 223 +++++++++ backend-compliance/main.py | 4 + 8 files changed, 1252 insertions(+), 8 deletions(-) create mode 100644 admin-compliance/app/api/sdk/v1/agent/compliance-check/route.ts create mode 100644 admin-compliance/app/api/sdk/v1/agent/extract-text/route.ts create mode 100644 admin-compliance/app/sdk/agent/_components/ComplianceCheckTab.tsx create mode 100644 admin-compliance/app/sdk/agent/_components/DocumentRow.tsx create mode 100644 backend-compliance/compliance/api/agent_compliance_check_routes.py create mode 100644 backend-compliance/compliance/services/business_profiler.py diff --git a/admin-compliance/app/api/sdk/v1/agent/compliance-check/route.ts b/admin-compliance/app/api/sdk/v1/agent/compliance-check/route.ts new file mode 100644 index 0000000..65bc882 --- /dev/null +++ b/admin-compliance/app/api/sdk/v1/agent/compliance-check/route.ts @@ -0,0 +1,39 @@ +/** + * Unified Compliance Check Proxy + * POST: start check for all documents, GET: poll status + */ + +import { NextRequest, NextResponse } from 'next/server' + +const BACKEND_URL = process.env.BACKEND_API_URL || 'http://backend-compliance:8002' + +export async function POST(request: NextRequest) { + try { + const body = await request.text() + const response = await fetch(`${BACKEND_URL}/api/compliance/agent/compliance-check`, { + method: 'POST', + headers: { 'Content-Type': 'application/json' }, + body, + signal: AbortSignal.timeout(30000), + }) + const data = await response.json() + return NextResponse.json(data, { status: response.status }) + } catch (error) { + return NextResponse.json({ error: 'Pruefung konnte nicht gestartet werden' }, { status: 503 }) + } +} + +export async function GET(request: NextRequest) { + const checkId = request.nextUrl.searchParams.get('check_id') + if (!checkId) return NextResponse.json({ error: 'check_id required' }, { status: 400 }) + try { + const response = await fetch( + `${BACKEND_URL}/api/compliance/agent/compliance-check/${checkId}`, + { signal: AbortSignal.timeout(10000) }, + ) + const data = await response.json() + return NextResponse.json(data) + } catch { + return NextResponse.json({ error: 'Status-Abfrage fehlgeschlagen' }, { status: 503 }) + } +} diff --git a/admin-compliance/app/api/sdk/v1/agent/extract-text/route.ts b/admin-compliance/app/api/sdk/v1/agent/extract-text/route.ts new file mode 100644 index 0000000..7997bb2 --- /dev/null +++ b/admin-compliance/app/api/sdk/v1/agent/extract-text/route.ts @@ -0,0 +1,27 @@ +/** + * Text Extraction Proxy — extract text from a URL via consent-tester + * POST: { url: string } -> { text, word_count, title, error } + */ + +import { NextRequest, NextResponse } from 'next/server' + +const BACKEND_URL = process.env.BACKEND_API_URL || 'http://backend-compliance:8002' + +export async function POST(request: NextRequest) { + try { + const body = await request.text() + const response = await fetch(`${BACKEND_URL}/api/compliance/agent/extract-text`, { + method: 'POST', + headers: { 'Content-Type': 'application/json' }, + body, + signal: AbortSignal.timeout(120000), + }) + const data = await response.json() + return NextResponse.json(data, { status: response.status }) + } catch (error) { + return NextResponse.json( + { text: '', word_count: 0, title: '', error: 'Text-Extraktion fehlgeschlagen' }, + { status: 503 }, + ) + } +} diff --git a/admin-compliance/app/sdk/agent/_components/ComplianceCheckTab.tsx b/admin-compliance/app/sdk/agent/_components/ComplianceCheckTab.tsx new file mode 100644 index 0000000..4d5bb5f --- /dev/null +++ b/admin-compliance/app/sdk/agent/_components/ComplianceCheckTab.tsx @@ -0,0 +1,352 @@ +'use client' + +import React, { useState, useCallback } from 'react' +import { ChecklistView } from './ChecklistView' +import { DocumentRow } from './DocumentRow' + +const DOCUMENT_TYPES = [ + { id: 'dse', label: 'DSI (Datenschutzinformation)', required: true }, + { id: 'impressum', label: 'Impressum', required: true }, + { id: 'social_media', label: 'Social Media DSE', required: false }, + { id: 'cookie', label: 'Cookie-Richtlinie', required: false }, + { id: 'agb', label: 'AGB', required: false }, + { id: 'nutzungsbedingungen', label: 'Nutzungsbedingungen', required: false }, + { id: 'widerruf', label: 'Widerrufsbelehrung', required: false }, + { id: 'dsb', label: 'DSB-Kontakt', required: false }, +] as const + +type DocTypeId = typeof DOCUMENT_TYPES[number]['id'] + +interface DocState { + url: string + text: string + loading: boolean + error: string | null +} + +type DocsState = Record + +const STORAGE_KEY_STATE = 'compliance-check-state' +const STORAGE_KEY_RESULTS = 'compliance-check-results' +const STORAGE_KEY_HISTORY = 'compliance-check-history' + +function emptyDocState(): DocState { + return { url: '', text: '', loading: false, error: null } +} + +function initState(): DocsState { + if (typeof window === 'undefined') { + return Object.fromEntries(DOCUMENT_TYPES.map(d => [d.id, emptyDocState()])) as DocsState + } + try { + const saved = localStorage.getItem(STORAGE_KEY_STATE) + if (saved) { + const parsed = JSON.parse(saved) as Record + return Object.fromEntries( + DOCUMENT_TYPES.map(d => [d.id, { + url: parsed[d.id]?.url || '', + text: parsed[d.id]?.text || '', + loading: false, + error: null, + }]) + ) as DocsState + } + } catch { /* ignore */ } + return Object.fromEntries(DOCUMENT_TYPES.map(d => [d.id, emptyDocState()])) as DocsState +} + +function countWords(text: string): number { + if (!text.trim()) return 0 + return text.trim().split(/\s+/).length +} + +interface HistoryEntry { + date: string + docCount: number + findings: number + resultKey: string +} + +export function ComplianceCheckTab() { + const [docs, setDocs] = useState(initState) + const [useAgent, setUseAgent] = useState(false) + const [loading, setLoading] = useState(false) + const [progress, setProgress] = useState('') + const [results, setResults] = useState(() => { + if (typeof window === 'undefined') return null + try { const s = localStorage.getItem(STORAGE_KEY_RESULTS); return s ? JSON.parse(s) : null } catch { return null } + }) + const [error, setError] = useState(null) + const [history, setHistory] = useState(() => { + if (typeof window === 'undefined') return [] + try { return JSON.parse(localStorage.getItem(STORAGE_KEY_HISTORY) || '[]') } catch { return [] } + }) + + // Persist URLs and texts (not loading/error state) + React.useEffect(() => { + const toSave: Record = {} + for (const [key, val] of Object.entries(docs)) { + toSave[key] = { url: val.url, text: val.text } + } + try { localStorage.setItem(STORAGE_KEY_STATE, JSON.stringify(toSave)) } catch { /* quota */ } + }, [docs]) + + const updateDoc = useCallback((docType: DocTypeId, patch: Partial) => { + setDocs(prev => ({ ...prev, [docType]: { ...prev[docType], ...patch } })) + }, []) + + const handleFetchText = useCallback(async (docType: DocTypeId) => { + const url = docs[docType].url.trim() + if (!url) return + + updateDoc(docType, { loading: true, error: null }) + try { + const res = await fetch('/api/sdk/v1/agent/extract-text', { + method: 'POST', + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify({ url }), + }) + if (!res.ok) { + const msg = res.status === 404 + ? 'Seite nicht erreichbar' + : `Fehler beim Laden (${res.status})` + throw new Error(msg) + } + const data = await res.json() + updateDoc(docType, { text: data.text || '', loading: false }) + } catch (e) { + updateDoc(docType, { + loading: false, + error: e instanceof Error ? e.message : 'Text konnte nicht geladen werden', + }) + } + }, [docs, updateDoc]) + + const handleFileUpload = useCallback(async (docType: DocTypeId, file: File) => { + // For now, read as text. PDF/DOCX parsing can be added server-side later. + const reader = new FileReader() + reader.onload = () => { + updateDoc(docType, { text: reader.result as string }) + } + reader.readAsText(file) + }, [updateDoc]) + + const filledCount = Object.values(docs).filter(d => d.url.trim() || d.text.trim()).length + + const handleSubmit = async () => { + if (filledCount === 0) return + + setLoading(true) + setError(null) + setResults(null) + setProgress('Compliance-Check wird gestartet...') + + try { + const entries = DOCUMENT_TYPES + .filter(dt => docs[dt.id].url.trim() || docs[dt.id].text.trim()) + .map(dt => ({ + doc_type: dt.id, + label: dt.label, + url: docs[dt.id].url.trim(), + text: docs[dt.id].text.trim() || undefined, + })) + + const startRes = await fetch('/api/sdk/v1/agent/compliance-check', { + method: 'POST', + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify({ + entries, + use_agent: useAgent, + }), + }) + if (!startRes.ok) throw new Error(`Pruefung konnte nicht gestartet werden: ${startRes.status}`) + const { check_id } = await startRes.json() + if (!check_id) throw new Error('Keine Check-ID erhalten') + + // Poll for results + let attempts = 0 + while (attempts < 120) { + await new Promise(r => setTimeout(r, 3000)) + const pollRes = await fetch(`/api/sdk/v1/agent/compliance-check?check_id=${check_id}`) + if (!pollRes.ok) { attempts++; continue } + const pollData = await pollRes.json() + if (pollData.progress) setProgress(pollData.progress) + if (pollData.status === 'completed' && pollData.result) { + setResults(pollData.result) + setProgress('') + localStorage.setItem(STORAGE_KEY_RESULTS, JSON.stringify(pollData.result)) + + const resultKey = `compliance-check-result-${Date.now()}` + try { localStorage.setItem(resultKey, JSON.stringify(pollData.result)) } catch { /* quota */ } + const entry: HistoryEntry = { + date: new Date().toISOString(), + docCount: entries.length, + findings: pollData.result.total_findings || 0, + resultKey, + } + const updated = [entry, ...history].slice(0, 30) + setHistory(updated) + localStorage.setItem(STORAGE_KEY_HISTORY, JSON.stringify(updated)) + break + } + if (pollData.status === 'failed') { + throw new Error(pollData.error || 'Pruefung fehlgeschlagen') + } + attempts++ + } + if (attempts >= 120) throw new Error('Zeitlimit ueberschritten') + } catch (e) { + setError(e instanceof Error ? e.message : 'Unbekannter Fehler') + setProgress('') + } finally { + setLoading(false) + } + } + + const loadFromHistory = (entry: HistoryEntry) => { + if (entry.resultKey) { + try { + const saved = localStorage.getItem(entry.resultKey) + if (saved) { setResults(JSON.parse(saved)); return } + } catch { /* ignore */ } + } + try { + const last = localStorage.getItem(STORAGE_KEY_RESULTS) + if (last) setResults(JSON.parse(last)) + } catch { /* ignore */ } + } + + return ( +
+ {/* Info box */} +
+

Compliance-Check (Alle Dokumente)

+

+ Geben Sie die URLs Ihrer Rechtstexte ein oder laden Sie die Dokumente hoch. + Das System prueft alle Pflichtangaben nach DSGVO, TDDDG, TMG und UWG. + Pflichtdokumente sind mit * markiert. +

+
+ + {/* Document rows */} +
+ {DOCUMENT_TYPES.map(dt => ( + updateDoc(dt.id, { url })} + onFetchText={() => handleFetchText(dt.id)} + onTextChange={text => updateDoc(dt.id, { text })} + onFileUpload={file => handleFileUpload(dt.id, file)} + /> + ))} +
+ + {/* Agent toggle + submit */} +
+ + + + {filledCount} von {DOCUMENT_TYPES.length} Dokumenten ausgefuellt + +
+ + {/* Submit button */} + + + {/* Progress */} + {progress && ( +
+ + + + + {progress} +
+ )} + + {/* Error */} + {error && ( +
{error}
+ )} + + {/* Results */} + {results && results.results && ( +
+ + + {/* Email status */} + {results.email_status && ( +
+ + E-Mail: {results.email_status === 'sent' ? 'Gesendet' : results.email_status} +
+ )} +
+ )} + + {/* History */} + {history.length > 0 && ( +
+

Letzte Compliance-Checks

+
+ {history.map((h, i) => ( + + ))} +
+
+ )} +
+ ) +} diff --git a/admin-compliance/app/sdk/agent/_components/DocumentRow.tsx b/admin-compliance/app/sdk/agent/_components/DocumentRow.tsx new file mode 100644 index 0000000..0bb1f96 --- /dev/null +++ b/admin-compliance/app/sdk/agent/_components/DocumentRow.tsx @@ -0,0 +1,163 @@ +'use client' + +import React, { useState, useRef } from 'react' + +interface DocumentRowProps { + label: string + docType: string + required?: boolean + url: string + text: string + loading: boolean + error: string | null + wordCount: number + onUrlChange: (url: string) => void + onFetchText: () => void + onTextChange: (text: string) => void + onFileUpload: (file: File) => void +} + +export function DocumentRow({ + label, + docType, + required, + url, + text, + loading, + error, + wordCount, + onUrlChange, + onFetchText, + onTextChange, + onFileUpload, +}: DocumentRowProps) { + const [showText, setShowText] = useState(false) + const fileRef = useRef(null) + + const textVisible = showText || text.length > 0 + + const handleFileChange = (e: React.ChangeEvent) => { + const file = e.target.files?.[0] + if (!file) return + + // Read text-based files directly + const reader = new FileReader() + reader.onload = () => { + const content = reader.result as string + onTextChange(content) + } + reader.onerror = () => { + // Let parent handle via onFileUpload for binary formats + onFileUpload(file) + } + + if (file.name.endsWith('.txt') || file.type === 'text/plain') { + reader.readAsText(file) + } else { + // PDF, DOCX — pass to parent for server-side parsing + onFileUpload(file) + } + + // Reset input so the same file can be re-selected + e.target.value = '' + } + + return ( +
+ {/* Header row: label + inputs */} +
+
+ + {label} + {required && *} + +
+ + onUrlChange(e.target.value)} + placeholder="https://example.com/datenschutz" + className="flex-1 px-3 py-2 border border-gray-300 rounded-lg text-sm focus:ring-2 focus:ring-purple-500 focus:border-transparent" + /> + + {/* Fetch text button */} + + + {/* File upload button */} + + + + {/* Toggle text area */} + + + {/* Word count badge */} + {wordCount > 0 && ( + + {wordCount.toLocaleString('de-DE')} W. + + )} +
+ + {/* Error */} + {error && ( +
{error}
+ )} + + {/* Collapsible textarea */} + {textVisible && ( +