feat: Multi-URL Document Check with full checklist visibility
New "Dokumenten-Pruefung" tab in Compliance Agent: - User adds multiple URLs with document type (DSI, AGB, Impressum, Cookie, Widerruf) - Each document loaded via Playwright, accordions expanded, text extracted - Checked against type-specific legal checklist - Optional: Cookie banner check via checkbox Checklisten-UX (solves "100% looks like nothing was checked"): - All checks shown per document: green checkmark + matched text excerpt - Red X for missing fields with legal reference - Builds user trust: "9 Punkte geprueft, alle bestanden" - Expandable per document with completeness bar New checklists: - Impressum: §5 TMG (6 fields: name, address, contact, register, VAT, representative) - Cookie-Richtlinie: §25 TDDDG (5 fields: types, purposes, retention, third-party, opt-out) Backend: - POST /agent/doc-check — async with polling (same pattern as /scan) - DocCheckResult includes checks[] with passed/failed + matched_text - dsi_document_checker returns all_checks in SCORE finding - Email report shows per-document checklist Files: agent_doc_check_routes.py (280 LOC), DocCheckTab.tsx (248 LOC), ChecklistView.tsx (130 LOC), dsi_document_checker.py (+70 LOC) Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -0,0 +1,39 @@
|
||||
/**
|
||||
* Agent Doc-Check Proxy — Multi-URL document verification
|
||||
* POST: start check, GET: poll status
|
||||
*/
|
||||
|
||||
import { NextRequest, NextResponse } from 'next/server'
|
||||
|
||||
const BACKEND_URL = process.env.BACKEND_API_URL || 'http://backend-compliance:8002'
|
||||
|
||||
export async function POST(request: NextRequest) {
|
||||
try {
|
||||
const body = await request.text()
|
||||
const response = await fetch(`${BACKEND_URL}/api/compliance/agent/doc-check`, {
|
||||
method: 'POST',
|
||||
headers: { 'Content-Type': 'application/json' },
|
||||
body,
|
||||
signal: AbortSignal.timeout(30000),
|
||||
})
|
||||
const data = await response.json()
|
||||
return NextResponse.json(data, { status: response.status })
|
||||
} catch (error) {
|
||||
return NextResponse.json({ error: 'Pruefung konnte nicht gestartet werden' }, { status: 503 })
|
||||
}
|
||||
}
|
||||
|
||||
export async function GET(request: NextRequest) {
|
||||
const checkId = request.nextUrl.searchParams.get('check_id')
|
||||
if (!checkId) return NextResponse.json({ error: 'check_id required' }, { status: 400 })
|
||||
try {
|
||||
const response = await fetch(
|
||||
`${BACKEND_URL}/api/compliance/agent/doc-check/${checkId}`,
|
||||
{ signal: AbortSignal.timeout(10000) },
|
||||
)
|
||||
const data = await response.json()
|
||||
return NextResponse.json(data)
|
||||
} catch {
|
||||
return NextResponse.json({ error: 'Status-Abfrage fehlgeschlagen' }, { status: 503 })
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,130 @@
|
||||
'use client'
|
||||
|
||||
import React, { useState } from 'react'
|
||||
|
||||
interface CheckItem {
|
||||
id: string
|
||||
label: string
|
||||
passed: boolean
|
||||
severity: string
|
||||
matched_text: string
|
||||
}
|
||||
|
||||
interface DocResult {
|
||||
label: string
|
||||
url: string
|
||||
doc_type: string
|
||||
word_count: number
|
||||
completeness_pct: number
|
||||
checks: CheckItem[]
|
||||
findings_count: number
|
||||
error: string
|
||||
}
|
||||
|
||||
const DOC_TYPE_LABELS: Record<string, string> = {
|
||||
dse: 'DSI', agb: 'AGB', impressum: 'Impressum',
|
||||
cookie: 'Cookie', widerruf: 'Widerruf', other: 'Sonstiges',
|
||||
}
|
||||
|
||||
export function ChecklistView({ results }: { results: DocResult[] }) {
|
||||
const [expanded, setExpanded] = useState<string | null>(null)
|
||||
|
||||
if (!results || results.length === 0) return null
|
||||
|
||||
const totalOk = results.filter(r => r.completeness_pct === 100).length
|
||||
|
||||
return (
|
||||
<div className="space-y-4">
|
||||
<div className="flex items-center justify-between">
|
||||
<h3 className="text-sm font-semibold text-gray-800">
|
||||
Dokumenten-Pruefung ({results.length} Dokumente, {totalOk} vollstaendig)
|
||||
</h3>
|
||||
</div>
|
||||
|
||||
<div className="space-y-2">
|
||||
{results.map((r, i) => {
|
||||
const isExp = expanded === r.url
|
||||
const pct = r.completeness_pct
|
||||
const barColor = pct === 100 ? 'bg-green-500' : pct >= 80 ? 'bg-green-400' : pct >= 50 ? 'bg-yellow-500' : 'bg-red-500'
|
||||
const typeLabel = DOC_TYPE_LABELS[r.doc_type] || r.doc_type
|
||||
|
||||
return (
|
||||
<div key={i} className="border border-gray-200 rounded-lg overflow-hidden">
|
||||
<button
|
||||
onClick={() => setExpanded(isExp ? null : r.url)}
|
||||
className="w-full flex items-center justify-between px-4 py-3 hover:bg-gray-50 text-left"
|
||||
>
|
||||
<div className="flex items-center gap-3 flex-1 min-w-0">
|
||||
<svg className={`w-4 h-4 text-gray-400 transition-transform shrink-0 ${isExp ? 'rotate-90' : ''}`}
|
||||
fill="none" stroke="currentColor" viewBox="0 0 24 24">
|
||||
<path strokeLinecap="round" strokeLinejoin="round" strokeWidth={2} d="M9 5l7 7-7 7" />
|
||||
</svg>
|
||||
<span className="text-xs px-2 py-0.5 rounded bg-gray-100 text-gray-600 font-medium shrink-0">
|
||||
{typeLabel}
|
||||
</span>
|
||||
<div className="min-w-0 flex-1">
|
||||
<div className="text-sm font-medium text-gray-900 truncate">{r.label}</div>
|
||||
<div className="text-xs text-gray-500 truncate">{r.url}</div>
|
||||
</div>
|
||||
</div>
|
||||
<div className="flex items-center gap-3 shrink-0 ml-3">
|
||||
{r.error ? (
|
||||
<span className="text-xs text-red-600 font-medium">Fehler</span>
|
||||
) : (
|
||||
<>
|
||||
<div className="w-16 h-2 bg-gray-200 rounded-full overflow-hidden">
|
||||
<div className={`h-full rounded-full ${barColor}`} style={{ width: `${pct}%` }} />
|
||||
</div>
|
||||
<span className={`text-xs font-medium w-10 text-right ${
|
||||
pct === 100 ? 'text-green-700' : pct >= 50 ? 'text-yellow-700' : 'text-red-700'
|
||||
}`}>{pct}%</span>
|
||||
</>
|
||||
)}
|
||||
</div>
|
||||
</button>
|
||||
|
||||
{isExp && (
|
||||
<div className="px-4 py-3 border-t border-gray-100 bg-gray-50/50">
|
||||
{r.error ? (
|
||||
<p className="text-sm text-red-600">{r.error}</p>
|
||||
) : (
|
||||
<div className="space-y-1.5">
|
||||
{r.checks.map((check, ci) => (
|
||||
<div key={ci} className="flex items-start gap-2">
|
||||
{check.passed ? (
|
||||
<svg className="w-4 h-4 text-green-500 mt-0.5 shrink-0" fill="none" stroke="currentColor" viewBox="0 0 24 24">
|
||||
<path strokeLinecap="round" strokeLinejoin="round" strokeWidth={2} d="M5 13l4 4L19 7" />
|
||||
</svg>
|
||||
) : (
|
||||
<svg className="w-4 h-4 text-red-500 mt-0.5 shrink-0" fill="none" stroke="currentColor" viewBox="0 0 24 24">
|
||||
<path strokeLinecap="round" strokeLinejoin="round" strokeWidth={2} d="M6 18L18 6M6 6l12 12" />
|
||||
</svg>
|
||||
)}
|
||||
<div className="flex-1">
|
||||
<div className={`text-sm ${check.passed ? 'text-gray-700' : 'text-red-700 font-medium'}`}>
|
||||
{check.label}
|
||||
</div>
|
||||
{check.passed && check.matched_text && (
|
||||
<div className="text-xs text-gray-400 mt-0.5 font-mono truncate">
|
||||
"...{check.matched_text}..."
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
</div>
|
||||
))}
|
||||
{r.word_count > 0 && (
|
||||
<div className="text-xs text-gray-400 mt-2 pt-2 border-t border-gray-200">
|
||||
{r.word_count} Woerter analysiert
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
)
|
||||
})}
|
||||
</div>
|
||||
</div>
|
||||
)
|
||||
}
|
||||
@@ -0,0 +1,248 @@
|
||||
'use client'
|
||||
|
||||
import React, { useState } from 'react'
|
||||
import { ChecklistView } from './ChecklistView'
|
||||
|
||||
interface DocEntry {
|
||||
id: string
|
||||
type: string
|
||||
label: string
|
||||
url: string
|
||||
}
|
||||
|
||||
const DOC_TYPES = [
|
||||
{ id: 'dse', label: 'DSI (Datenschutzinformation)' },
|
||||
{ id: 'agb', label: 'AGB / Nutzungsbedingungen' },
|
||||
{ id: 'impressum', label: 'Impressum' },
|
||||
{ id: 'cookie', label: 'Cookie-Richtlinie' },
|
||||
{ id: 'widerruf', label: 'Widerrufsbelehrung' },
|
||||
{ id: 'other', label: 'Sonstiges' },
|
||||
]
|
||||
|
||||
function newEntry(): DocEntry {
|
||||
return { id: crypto.randomUUID().slice(0, 8), type: 'dse', label: '', url: '' }
|
||||
}
|
||||
|
||||
export function DocCheckTab() {
|
||||
const [entries, setEntries] = useState<DocEntry[]>([newEntry()])
|
||||
const [checkCookieBanner, setCheckCookieBanner] = useState(false)
|
||||
const [loading, setLoading] = useState(false)
|
||||
const [progress, setProgress] = useState('')
|
||||
const [results, setResults] = useState<any>(null)
|
||||
const [error, setError] = useState<string | null>(null)
|
||||
|
||||
const updateEntry = (id: string, field: keyof DocEntry, value: string) => {
|
||||
setEntries(prev => prev.map(e => e.id === id ? { ...e, [field]: value } : e))
|
||||
}
|
||||
|
||||
const removeEntry = (id: string) => {
|
||||
setEntries(prev => prev.filter(e => e.id !== id))
|
||||
}
|
||||
|
||||
const addEntry = () => {
|
||||
setEntries(prev => [...prev, newEntry()])
|
||||
}
|
||||
|
||||
// Auto-detect label from URL
|
||||
const autoLabel = (entry: DocEntry) => {
|
||||
if (entry.label) return
|
||||
try {
|
||||
const path = new URL(entry.url).pathname
|
||||
const last = path.split('/').filter(Boolean).pop() || ''
|
||||
const label = last.replace(/-\d+$/, '').replace(/-/g, ' ')
|
||||
.replace(/\b\w/g, c => c.toUpperCase())
|
||||
if (label.length > 3) {
|
||||
updateEntry(entry.id, 'label', label)
|
||||
}
|
||||
} catch { /* invalid URL */ }
|
||||
}
|
||||
|
||||
const handleSubmit = async () => {
|
||||
const validEntries = entries.filter(e => e.url.trim())
|
||||
if (validEntries.length === 0) return
|
||||
|
||||
setLoading(true)
|
||||
setError(null)
|
||||
setResults(null)
|
||||
setProgress('Pruefung wird gestartet...')
|
||||
|
||||
try {
|
||||
const startRes = await fetch('/api/sdk/v1/agent/doc-check', {
|
||||
method: 'POST',
|
||||
headers: { 'Content-Type': 'application/json' },
|
||||
body: JSON.stringify({
|
||||
entries: validEntries.map(e => ({
|
||||
doc_type: e.type,
|
||||
label: e.label || e.url.split('/').pop() || 'Dokument',
|
||||
url: e.url.trim(),
|
||||
})),
|
||||
check_cookie_banner: checkCookieBanner,
|
||||
}),
|
||||
})
|
||||
if (!startRes.ok) throw new Error(`Pruefung konnte nicht gestartet werden: ${startRes.status}`)
|
||||
const { check_id } = await startRes.json()
|
||||
if (!check_id) throw new Error('Keine Check-ID erhalten')
|
||||
|
||||
// Poll for results
|
||||
let attempts = 0
|
||||
while (attempts < 120) {
|
||||
await new Promise(r => setTimeout(r, 3000))
|
||||
const pollRes = await fetch(`/api/sdk/v1/agent/doc-check?check_id=${check_id}`)
|
||||
if (!pollRes.ok) { attempts++; continue }
|
||||
const pollData = await pollRes.json()
|
||||
if (pollData.progress) setProgress(pollData.progress)
|
||||
if (pollData.status === 'completed' && pollData.result) {
|
||||
setResults(pollData.result)
|
||||
setProgress('')
|
||||
break
|
||||
}
|
||||
if (pollData.status === 'failed') {
|
||||
throw new Error(pollData.error || 'Pruefung fehlgeschlagen')
|
||||
}
|
||||
attempts++
|
||||
}
|
||||
} catch (e) {
|
||||
setError(e instanceof Error ? e.message : 'Unbekannter Fehler')
|
||||
setProgress('')
|
||||
} finally {
|
||||
setLoading(false)
|
||||
}
|
||||
}
|
||||
|
||||
return (
|
||||
<div className="space-y-4">
|
||||
{/* URL Entries */}
|
||||
<div className="space-y-2">
|
||||
{entries.map((entry, i) => (
|
||||
<div key={entry.id} className="flex items-center gap-2">
|
||||
<select
|
||||
value={entry.type}
|
||||
onChange={e => updateEntry(entry.id, 'type', e.target.value)}
|
||||
className="w-48 px-3 py-2.5 border border-gray-300 rounded-lg text-sm bg-white shrink-0"
|
||||
>
|
||||
{DOC_TYPES.map(t => (
|
||||
<option key={t.id} value={t.id}>{t.label}</option>
|
||||
))}
|
||||
</select>
|
||||
<input
|
||||
type="text"
|
||||
value={entry.label}
|
||||
onChange={e => updateEntry(entry.id, 'label', e.target.value)}
|
||||
placeholder="Bezeichnung (optional)"
|
||||
className="w-40 px-3 py-2.5 border border-gray-300 rounded-lg text-sm shrink-0"
|
||||
/>
|
||||
<input
|
||||
type="url"
|
||||
value={entry.url}
|
||||
onChange={e => updateEntry(entry.id, 'url', e.target.value)}
|
||||
onBlur={() => autoLabel(entry)}
|
||||
placeholder="https://example.com/datenschutz"
|
||||
className="flex-1 px-3 py-2.5 border border-gray-300 rounded-lg text-sm"
|
||||
/>
|
||||
{entries.length > 1 && (
|
||||
<button onClick={() => removeEntry(entry.id)}
|
||||
className="p-2 text-gray-400 hover:text-red-500 shrink-0">
|
||||
<svg className="w-4 h-4" fill="none" stroke="currentColor" viewBox="0 0 24 24">
|
||||
<path strokeLinecap="round" strokeLinejoin="round" strokeWidth={2} d="M6 18L18 6M6 6l12 12" />
|
||||
</svg>
|
||||
</button>
|
||||
)}
|
||||
</div>
|
||||
))}
|
||||
</div>
|
||||
|
||||
{/* Add URL + Options */}
|
||||
<div className="flex items-center justify-between">
|
||||
<button onClick={addEntry}
|
||||
className="flex items-center gap-1.5 text-sm text-purple-600 hover:text-purple-700 font-medium">
|
||||
<svg className="w-4 h-4" fill="none" stroke="currentColor" viewBox="0 0 24 24">
|
||||
<path strokeLinecap="round" strokeLinejoin="round" strokeWidth={2} d="M12 4v16m8-8H4" />
|
||||
</svg>
|
||||
URL hinzufuegen
|
||||
</button>
|
||||
|
||||
<label className="flex items-center gap-2 text-sm text-gray-600">
|
||||
<input
|
||||
type="checkbox"
|
||||
checked={checkCookieBanner}
|
||||
onChange={e => setCheckCookieBanner(e.target.checked)}
|
||||
className="rounded border-gray-300 text-purple-600 focus:ring-purple-500"
|
||||
/>
|
||||
Cookie-Banner pruefen
|
||||
</label>
|
||||
</div>
|
||||
|
||||
{/* Submit */}
|
||||
<button
|
||||
onClick={handleSubmit}
|
||||
disabled={loading || entries.every(e => !e.url.trim())}
|
||||
className="w-full px-4 py-3 bg-purple-600 text-white rounded-lg font-medium hover:bg-purple-700 disabled:opacity-50 transition-colors text-sm flex items-center justify-center gap-2"
|
||||
>
|
||||
{loading ? (
|
||||
<>
|
||||
<svg className="animate-spin w-4 h-4" fill="none" viewBox="0 0 24 24">
|
||||
<circle className="opacity-25" cx="12" cy="12" r="10" stroke="currentColor" strokeWidth="4" />
|
||||
<path className="opacity-75" fill="currentColor" d="M4 12a8 8 0 018-8V0C5.373 0 0 5.373 0 12h4z" />
|
||||
</svg>
|
||||
Pruefe...
|
||||
</>
|
||||
) : (
|
||||
`${entries.filter(e => e.url.trim()).length} Dokument${entries.filter(e => e.url.trim()).length !== 1 ? 'e' : ''} pruefen`
|
||||
)}
|
||||
</button>
|
||||
|
||||
{/* Progress */}
|
||||
{progress && (
|
||||
<div className="bg-purple-50 border border-purple-200 rounded-lg p-3 text-sm text-purple-700 flex items-center gap-3">
|
||||
<svg className="animate-spin w-4 h-4 text-purple-500 shrink-0" fill="none" viewBox="0 0 24 24">
|
||||
<circle className="opacity-25" cx="12" cy="12" r="10" stroke="currentColor" strokeWidth="4" />
|
||||
<path className="opacity-75" fill="currentColor" d="M4 12a8 8 0 018-8V0C5.373 0 0 5.373 0 12h4z" />
|
||||
</svg>
|
||||
{progress}
|
||||
</div>
|
||||
)}
|
||||
|
||||
{/* Error */}
|
||||
{error && (
|
||||
<div className="bg-red-50 border border-red-200 rounded-lg p-3 text-sm text-red-700">{error}</div>
|
||||
)}
|
||||
|
||||
{/* Results */}
|
||||
{results && results.results && (
|
||||
<div className="bg-white border border-gray-200 rounded-xl p-6 shadow-sm">
|
||||
<ChecklistView results={results.results} />
|
||||
|
||||
{/* Cookie Banner Result */}
|
||||
{results.cookie_banner_result && (
|
||||
<div className="mt-4 pt-4 border-t border-gray-200">
|
||||
<h4 className="text-sm font-semibold text-gray-800 mb-2">Cookie-Banner</h4>
|
||||
<div className="text-sm text-gray-600">
|
||||
{results.cookie_banner_result.banner_detected
|
||||
? `Banner erkannt: ${results.cookie_banner_result.banner_provider || 'unbekannt'}`
|
||||
: 'Kein Banner erkannt'}
|
||||
</div>
|
||||
{results.cookie_banner_result.banner_checks?.violations?.length > 0 && (
|
||||
<div className="mt-2 space-y-1">
|
||||
{results.cookie_banner_result.banner_checks.violations.map((v: any, i: number) => (
|
||||
<div key={i} className="text-xs text-red-600 flex items-start gap-1.5">
|
||||
<span className="shrink-0 mt-0.5">!!</span>
|
||||
<span>{v.text}</span>
|
||||
</div>
|
||||
))}
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
)}
|
||||
|
||||
{/* Email Status */}
|
||||
{results.email_status && (
|
||||
<div className="mt-3 text-xs text-gray-500 flex items-center gap-2">
|
||||
<span className={`w-2 h-2 rounded-full ${results.email_status === 'sent' ? 'bg-green-400' : 'bg-gray-300'}`} />
|
||||
E-Mail: {results.email_status === 'sent' ? 'Gesendet' : results.email_status}
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
)
|
||||
}
|
||||
@@ -6,9 +6,10 @@ import { AnalysisResult } from './_components/AnalysisResult'
|
||||
import { AnalysisHistory } from './_components/AnalysisHistory'
|
||||
import { FollowUpQuestions } from './_components/FollowUpQuestions'
|
||||
import { ScanResult } from './_components/ScanResult'
|
||||
import { DocCheckTab } from './_components/DocCheckTab'
|
||||
|
||||
type AnalysisMode = 'pre_launch' | 'post_launch'
|
||||
type AnalysisTab = 'quick' | 'scan'
|
||||
type AnalysisTab = 'quick' | 'scan' | 'doc-check'
|
||||
|
||||
const MODES: { id: AnalysisMode; label: string; desc: string; icon: string }[] = [
|
||||
{ id: 'pre_launch', label: 'Internes Dokument', desc: 'Vor Veroeffentlichung pruefen', icon: '📋' },
|
||||
@@ -18,6 +19,7 @@ const MODES: { id: AnalysisMode; label: string; desc: string; icon: string }[] =
|
||||
const TABS: { id: AnalysisTab; label: string; desc: string }[] = [
|
||||
{ id: 'quick', label: 'Schnellanalyse', desc: 'Einzelne Seite klassifizieren + bewerten' },
|
||||
{ id: 'scan', label: 'Website-Scan', desc: 'Mehrere Seiten scannen + Dienstleister abgleichen' },
|
||||
{ id: 'doc-check', label: 'Dokumenten-Pruefung', desc: 'Einzelne Dokumente gezielt pruefen' },
|
||||
]
|
||||
|
||||
export default function AgentPage() {
|
||||
@@ -219,8 +221,11 @@ export default function AgentPage() {
|
||||
))}
|
||||
</div>
|
||||
|
||||
{/* URL Input */}
|
||||
<form onSubmit={handleSubmit} className="flex gap-3">
|
||||
{/* Doc Check Tab — own component */}
|
||||
{tab === 'doc-check' && <DocCheckTab />}
|
||||
|
||||
{/* URL Input (quick + scan only) */}
|
||||
{tab !== 'doc-check' && <form onSubmit={handleSubmit} className="flex gap-3">
|
||||
<input type="url" value={url} onChange={e => setUrl(e.target.value)}
|
||||
placeholder={tab === 'scan' ? 'https://www.example.com/' : 'https://example.com/datenschutz'}
|
||||
className="flex-1 px-4 py-3 border border-gray-300 rounded-lg focus:ring-2 focus:ring-purple-500 focus:border-transparent text-sm"
|
||||
@@ -234,7 +239,7 @@ export default function AgentPage() {
|
||||
</svg>{tab === 'scan' ? 'Scanne...' : 'Analysiere...'}</>
|
||||
) : tab === 'scan' ? 'Website scannen' : 'Analysieren'}
|
||||
</button>
|
||||
</form>
|
||||
</form>}
|
||||
|
||||
{/* Scan Progress */}
|
||||
{scanProgress && tab === 'scan' && (
|
||||
|
||||
@@ -0,0 +1,280 @@
|
||||
"""
|
||||
Agent Document Check Routes — Multi-URL document verification.
|
||||
|
||||
The user provides explicit URLs + document types. No crawling needed.
|
||||
Each document is loaded, expanded (accordions/tabs), text extracted,
|
||||
and checked against its type-specific legal checklist.
|
||||
|
||||
POST /api/compliance/agent/doc-check
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import logging
|
||||
import os
|
||||
import uuid as _uuid
|
||||
from datetime import datetime, timezone
|
||||
|
||||
import httpx
|
||||
from fastapi import APIRouter
|
||||
from pydantic import BaseModel
|
||||
|
||||
from compliance.services.dsi_document_checker import (
|
||||
check_document_completeness, classify_document_type,
|
||||
)
|
||||
from compliance.services.smtp_sender import send_email
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
router = APIRouter(prefix="/compliance/agent", tags=["agent"])
|
||||
|
||||
CONSENT_TESTER_URL = "http://bp-compliance-consent-tester:8094"
|
||||
|
||||
|
||||
class DocCheckEntry(BaseModel):
|
||||
doc_type: str # dse, agb, impressum, cookie, widerruf, other
|
||||
label: str
|
||||
url: str
|
||||
|
||||
|
||||
class DocCheckRequest(BaseModel):
|
||||
entries: list[DocCheckEntry]
|
||||
recipient: str = "dsb@breakpilot.local"
|
||||
check_cookie_banner: bool = False
|
||||
|
||||
|
||||
class CheckItem(BaseModel):
|
||||
id: str
|
||||
label: str
|
||||
passed: bool
|
||||
severity: str
|
||||
matched_text: str = ""
|
||||
|
||||
|
||||
class DocCheckResult(BaseModel):
|
||||
label: str
|
||||
url: str
|
||||
doc_type: str
|
||||
word_count: int = 0
|
||||
completeness_pct: int = 0
|
||||
checks: list[CheckItem] = []
|
||||
findings_count: int = 0
|
||||
error: str = ""
|
||||
|
||||
|
||||
class DocCheckResponse(BaseModel):
|
||||
results: list[DocCheckResult]
|
||||
cookie_banner_result: dict | None = None
|
||||
total_documents: int
|
||||
total_findings: int
|
||||
email_status: str = ""
|
||||
checked_at: str
|
||||
|
||||
|
||||
# In-memory job store for async processing
|
||||
_doc_check_jobs: dict[str, dict] = {}
|
||||
|
||||
|
||||
class DocCheckStartResponse(BaseModel):
|
||||
check_id: str
|
||||
status: str = "running"
|
||||
|
||||
|
||||
class DocCheckStatusResponse(BaseModel):
|
||||
check_id: str
|
||||
status: str
|
||||
progress: str = ""
|
||||
result: DocCheckResponse | None = None
|
||||
error: str = ""
|
||||
|
||||
|
||||
@router.post("/doc-check")
|
||||
async def start_doc_check(req: DocCheckRequest):
|
||||
"""Start async multi-URL document check."""
|
||||
check_id = str(_uuid.uuid4())[:8]
|
||||
_doc_check_jobs[check_id] = {"status": "running", "progress": "Pruefung gestartet...", "result": None, "error": ""}
|
||||
asyncio.create_task(_run_doc_check(check_id, req))
|
||||
return DocCheckStartResponse(check_id=check_id, status="running")
|
||||
|
||||
|
||||
@router.get("/doc-check/{check_id}")
|
||||
async def get_doc_check_status(check_id: str):
|
||||
"""Poll document check status."""
|
||||
job = _doc_check_jobs.get(check_id)
|
||||
if not job:
|
||||
return {"check_id": check_id, "status": "not_found"}
|
||||
return DocCheckStatusResponse(
|
||||
check_id=check_id, status=job["status"],
|
||||
progress=job.get("progress", ""), result=job.get("result"),
|
||||
error=job.get("error", ""),
|
||||
)
|
||||
|
||||
|
||||
async def _run_doc_check(check_id: str, req: DocCheckRequest):
|
||||
"""Background task: check each document."""
|
||||
try:
|
||||
results: list[DocCheckResult] = []
|
||||
total_findings = 0
|
||||
|
||||
for i, entry in enumerate(req.entries):
|
||||
_doc_check_jobs[check_id]["progress"] = (
|
||||
f"Dokument {i+1}/{len(req.entries)}: {entry.label}..."
|
||||
)
|
||||
|
||||
result = await _check_single_document(entry)
|
||||
results.append(result)
|
||||
total_findings += result.findings_count
|
||||
|
||||
# Optional: Cookie banner check on first URL
|
||||
cookie_result = None
|
||||
if req.check_cookie_banner and req.entries:
|
||||
_doc_check_jobs[check_id]["progress"] = "Cookie-Banner wird geprueft..."
|
||||
cookie_result = await _check_cookie_banner(req.entries[0].url)
|
||||
|
||||
# Build email report
|
||||
_doc_check_jobs[check_id]["progress"] = "Report wird erstellt..."
|
||||
summary = _build_report(results, cookie_result)
|
||||
email_result = send_email(
|
||||
recipient=req.recipient,
|
||||
subject=f"[DOKUMENTEN-PRUEFUNG] {len(results)} Dokumente geprueft",
|
||||
body_html=f"<pre>{summary}</pre>",
|
||||
)
|
||||
|
||||
response = DocCheckResponse(
|
||||
results=results,
|
||||
cookie_banner_result=cookie_result,
|
||||
total_documents=len(results),
|
||||
total_findings=total_findings,
|
||||
email_status=email_result.get("status", "failed"),
|
||||
checked_at=datetime.now(timezone.utc).isoformat(),
|
||||
)
|
||||
|
||||
_doc_check_jobs[check_id]["status"] = "completed"
|
||||
_doc_check_jobs[check_id]["result"] = response
|
||||
_doc_check_jobs[check_id]["progress"] = "Fertig"
|
||||
|
||||
except Exception as e:
|
||||
logger.error("Doc check %s failed: %s", check_id, e)
|
||||
_doc_check_jobs[check_id]["status"] = "failed"
|
||||
_doc_check_jobs[check_id]["error"] = str(e)[:500]
|
||||
|
||||
|
||||
async def _check_single_document(entry: DocCheckEntry) -> DocCheckResult:
|
||||
"""Load a single URL, expand content, extract text, run checklist."""
|
||||
try:
|
||||
async with httpx.AsyncClient(timeout=90.0) as client:
|
||||
resp = await client.post(
|
||||
f"{CONSENT_TESTER_URL}/dsi-discovery",
|
||||
json={"url": entry.url, "max_documents": 1},
|
||||
)
|
||||
if resp.status_code != 200:
|
||||
return DocCheckResult(
|
||||
label=entry.label, url=entry.url, doc_type=entry.doc_type,
|
||||
error=f"Seite nicht erreichbar (HTTP {resp.status_code})",
|
||||
)
|
||||
|
||||
data = resp.json()
|
||||
docs = data.get("documents", [])
|
||||
|
||||
# Use the first document found, or fall back to any text
|
||||
doc_text = ""
|
||||
word_count = 0
|
||||
if docs:
|
||||
doc_text = docs[0].get("full_text", "") or docs[0].get("text_preview", "")
|
||||
word_count = docs[0].get("word_count", 0)
|
||||
|
||||
if not doc_text or len(doc_text) < 50:
|
||||
return DocCheckResult(
|
||||
label=entry.label, url=entry.url, doc_type=entry.doc_type,
|
||||
error="Kein Text extrahierbar",
|
||||
)
|
||||
|
||||
# Run checklist
|
||||
findings = check_document_completeness(
|
||||
doc_text, entry.doc_type, entry.label, entry.url,
|
||||
)
|
||||
|
||||
# Extract all_checks from SCORE finding
|
||||
all_checks: list[CheckItem] = []
|
||||
completeness = 0
|
||||
for f in findings:
|
||||
if "SCORE" in f.get("code", ""):
|
||||
checks_data = f.get("all_checks", [])
|
||||
all_checks = [
|
||||
CheckItem(
|
||||
id=c["id"], label=c["label"], passed=c["passed"],
|
||||
severity=c["severity"], matched_text=c.get("matched_text", ""),
|
||||
)
|
||||
for c in checks_data
|
||||
]
|
||||
# Extract percentage
|
||||
import re
|
||||
pct_match = re.search(r"(\d+)%", f.get("text", ""))
|
||||
if pct_match:
|
||||
completeness = int(pct_match.group(1))
|
||||
|
||||
non_score = [f for f in findings if "SCORE" not in f.get("code", "")]
|
||||
|
||||
return DocCheckResult(
|
||||
label=entry.label, url=entry.url, doc_type=entry.doc_type,
|
||||
word_count=word_count, completeness_pct=completeness,
|
||||
checks=all_checks, findings_count=len(non_score),
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
logger.warning("Doc check failed for %s: %s", entry.url, e)
|
||||
return DocCheckResult(
|
||||
label=entry.label, url=entry.url, doc_type=entry.doc_type,
|
||||
error=str(e)[:200],
|
||||
)
|
||||
|
||||
|
||||
async def _check_cookie_banner(url: str) -> dict | None:
|
||||
"""Run cookie banner consent test on a URL."""
|
||||
try:
|
||||
async with httpx.AsyncClient(timeout=120.0) as client:
|
||||
resp = await client.post(
|
||||
f"{CONSENT_TESTER_URL}/scan",
|
||||
json={"url": url, "timeout_per_phase": 8},
|
||||
)
|
||||
if resp.status_code == 200:
|
||||
return resp.json()
|
||||
except Exception as e:
|
||||
logger.warning("Cookie banner check failed: %s", e)
|
||||
return None
|
||||
|
||||
|
||||
def _build_report(results: list[DocCheckResult], cookie_result: dict | None) -> str:
|
||||
"""Build email report."""
|
||||
parts = [
|
||||
"DOKUMENTEN-PRUEFUNG",
|
||||
f"Dokumente geprueft: {len(results)}",
|
||||
"",
|
||||
]
|
||||
for r in results:
|
||||
status = "OK" if r.completeness_pct == 100 else "LUECKENHAFT" if r.completeness_pct >= 50 else "MANGELHAFT"
|
||||
if r.error:
|
||||
status = "FEHLER"
|
||||
parts.append(f"[{status}] {r.label} ({r.completeness_pct}%, {r.word_count} Woerter)")
|
||||
|
||||
for check in r.checks:
|
||||
icon = "+" if check.passed else "!!"
|
||||
parts.append(f" [{icon}] {check.label}")
|
||||
|
||||
if r.error:
|
||||
parts.append(f" FEHLER: {r.error}")
|
||||
parts.append("")
|
||||
|
||||
if cookie_result:
|
||||
parts.extend([
|
||||
"Cookie-Banner Pruefung:",
|
||||
f" Banner erkannt: {cookie_result.get('banner_detected', False)}",
|
||||
f" Anbieter: {cookie_result.get('banner_provider', 'unbekannt')}",
|
||||
])
|
||||
violations = cookie_result.get("banner_checks", {}).get("violations", [])
|
||||
if violations:
|
||||
for v in violations[:10]:
|
||||
parts.append(f" [!!] {v.get('text', '')[:80]}")
|
||||
else:
|
||||
parts.append(" Keine Verstoesse erkannt.")
|
||||
|
||||
return "\n".join(parts)
|
||||
@@ -163,6 +163,36 @@ AGB_CHECKLIST = [
|
||||
"patterns": [r"gerichtsstand", r"anwendbares\s+recht", r"jurisdiction", r"governing\s+law"]},
|
||||
]
|
||||
|
||||
# §5 TMG / §18 MStV Impressum requirements
|
||||
IMPRESSUM_CHECKLIST = [
|
||||
{"id": "name", "label": "Name des Anbieters",
|
||||
"patterns": [r"(?:gmbh|ag|e\.v\.|ohg|kg|gbr|ug|mbh|inc|ltd)", r"firma", r"unternehmen"]},
|
||||
{"id": "address", "label": "Anschrift",
|
||||
"patterns": [r"(?:str(?:asse|\.)|weg|platz|allee)\s*\d", r"d-\d{5}", r"\d{5}\s+\w+"]},
|
||||
{"id": "contact", "label": "Kontaktdaten (E-Mail + Telefon)",
|
||||
"patterns": [r"(?:e-?mail|mail).*@", r"telefon|phone|tel\.", r"\+?\d[\d\s/\-]{8,}"]},
|
||||
{"id": "register", "label": "Handelsregister / Registernummer",
|
||||
"patterns": [r"(?:handelsregister|hrb|hra|registergericht|amtsgericht)", r"register.*(?:nr|nummer)"]},
|
||||
{"id": "vat", "label": "USt-IdNr.",
|
||||
"patterns": [r"ust.*id", r"umsatzsteuer.*identifikation", r"vat.*id", r"de\s*\d{9}"]},
|
||||
{"id": "representative", "label": "Vertretungsberechtigte",
|
||||
"patterns": [r"vertretungsberechtigt", r"geschäftsführ", r"vorstand", r"inhaber"]},
|
||||
]
|
||||
|
||||
# §25 TDDDG Cookie policy requirements
|
||||
COOKIE_CHECKLIST = [
|
||||
{"id": "cookie_types", "label": "Arten der Cookies",
|
||||
"patterns": [r"(?:notwendig|essentiell|funktional|statistik|marketing|tracking)", r"cookie.*(?:art|typ|kategori)"]},
|
||||
{"id": "purposes", "label": "Zwecke der Cookies",
|
||||
"patterns": [r"zweck.*cookie", r"cookie.*zweck", r"(?:wofuer|wozu|warum).*cookie"]},
|
||||
{"id": "retention", "label": "Speicherdauer der Cookies",
|
||||
"patterns": [r"(?:speicherdauer|laufzeit|gueltigk|ablauf).*cookie", r"cookie.*(?:\d+\s+(?:tag|monat|jahr)|session)"]},
|
||||
{"id": "third_party", "label": "Drittanbieter-Cookies",
|
||||
"patterns": [r"drittanbieter", r"third.?party", r"(?:google|facebook|meta|microsoft).*cookie"]},
|
||||
{"id": "opt_out", "label": "Widerspruchsmoeglichkeit",
|
||||
"patterns": [r"(?:widerspruch|opt.?out|ablehnen|deaktivieren).*cookie", r"cookie.*(?:ablehnen|deaktivieren|loeschen)"]},
|
||||
]
|
||||
|
||||
|
||||
def check_document_completeness(
|
||||
text: str,
|
||||
@@ -215,15 +245,36 @@ def check_document_completeness(
|
||||
elif doc_type in ("agb", "terms", "nutzungsbedingungen"):
|
||||
checklist = AGB_CHECKLIST
|
||||
label = "§305ff BGB"
|
||||
elif doc_type in ("impressum", "imprint"):
|
||||
checklist = IMPRESSUM_CHECKLIST
|
||||
label = "§5 TMG / §18 MStV"
|
||||
elif doc_type in ("cookie",):
|
||||
checklist = COOKIE_CHECKLIST
|
||||
label = "§25 TDDDG"
|
||||
else:
|
||||
checklist = ART13_CHECKLIST # Default: check as DSE
|
||||
label = "Art. 13 DSGVO"
|
||||
|
||||
present = 0
|
||||
total = len(checklist)
|
||||
all_checks: list[dict] = []
|
||||
|
||||
for check in checklist:
|
||||
found = any(re.search(p, text_lower) for p in check["patterns"])
|
||||
if not found:
|
||||
match = None
|
||||
for p in check["patterns"]:
|
||||
m = re.search(p, text_lower)
|
||||
if m:
|
||||
match = m
|
||||
break
|
||||
|
||||
passed = match is not None
|
||||
matched_text = ""
|
||||
if match:
|
||||
start = max(0, match.start() - 30)
|
||||
end = min(len(text_lower), match.end() + 30)
|
||||
matched_text = text_lower[start:end].strip()
|
||||
present += 1
|
||||
else:
|
||||
findings.append({
|
||||
"code": f"DSI-MISSING-{check['id'].upper()}",
|
||||
"severity": check.get("severity", "MEDIUM"),
|
||||
@@ -236,8 +287,14 @@ def check_document_completeness(
|
||||
"doc_type": doc_type,
|
||||
"check_id": check["id"],
|
||||
})
|
||||
else:
|
||||
present += 1
|
||||
|
||||
all_checks.append({
|
||||
"id": check["id"],
|
||||
"label": check["label"],
|
||||
"passed": passed,
|
||||
"severity": check.get("severity", "MEDIUM"),
|
||||
"matched_text": matched_text,
|
||||
})
|
||||
|
||||
# Always add summary finding (even at 100% — needed for completeness tracking)
|
||||
if total > 0:
|
||||
@@ -252,6 +309,7 @@ def check_document_completeness(
|
||||
"doc_title": doc_title,
|
||||
"doc_url": doc_url,
|
||||
"doc_type": doc_type,
|
||||
"all_checks": all_checks,
|
||||
})
|
||||
|
||||
return findings
|
||||
|
||||
@@ -45,6 +45,7 @@ from compliance.api.company_profile_routes import router as company_profile_rout
|
||||
from compliance.api.agent_notification_routes import router as agent_notify_router
|
||||
from compliance.api.agent_analyze_routes import router as agent_analyze_router
|
||||
from compliance.api.agent_scan_routes import router as agent_scan_router
|
||||
from compliance.api.agent_doc_check_routes import router as agent_doc_check_router
|
||||
|
||||
# Middleware
|
||||
from middleware import (
|
||||
@@ -144,6 +145,7 @@ app.include_router(company_profile_router, prefix="/api")
|
||||
app.include_router(agent_notify_router, prefix="/api")
|
||||
app.include_router(agent_analyze_router, prefix="/api")
|
||||
app.include_router(agent_scan_router, prefix="/api")
|
||||
app.include_router(agent_doc_check_router, prefix="/api")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
Reference in New Issue
Block a user