Compare commits
12 Commits
cfc130a544
...
4c43253a53
| Author | SHA1 | Date | |
|---|---|---|---|
| 4c43253a53 | |||
| 0f1fae61a6 | |||
| 711b9b3146 | |||
| d0dc284cd5 | |||
| 24fb1e14e0 | |||
| 6aa753146f | |||
| acd2d5f944 | |||
| 2a6f526c88 | |||
| 1988274420 | |||
| cb5aa2949b | |||
| 41fd7e36d1 | |||
| f7483f5724 |
@@ -0,0 +1,38 @@
|
|||||||
|
/**
|
||||||
|
* Agent Scan API Proxy
|
||||||
|
* POST /api/sdk/v1/agent/scan → backend-compliance /api/compliance/agent/scan
|
||||||
|
*/
|
||||||
|
|
||||||
|
import { NextRequest, NextResponse } from 'next/server'
|
||||||
|
|
||||||
|
const BACKEND_URL = process.env.BACKEND_API_URL || 'http://backend-compliance:8002'
|
||||||
|
|
||||||
|
export async function POST(request: NextRequest) {
|
||||||
|
try {
|
||||||
|
const body = await request.text()
|
||||||
|
|
||||||
|
const response = await fetch(`${BACKEND_URL}/api/compliance/agent/scan`, {
|
||||||
|
method: 'POST',
|
||||||
|
headers: { 'Content-Type': 'application/json' },
|
||||||
|
body,
|
||||||
|
signal: AbortSignal.timeout(180000), // 3 min — multi-page scan + LLM
|
||||||
|
})
|
||||||
|
|
||||||
|
if (!response.ok) {
|
||||||
|
const errorText = await response.text()
|
||||||
|
return NextResponse.json(
|
||||||
|
{ error: `Backend: ${response.status}`, detail: errorText },
|
||||||
|
{ status: response.status }
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
const data = await response.json()
|
||||||
|
return NextResponse.json(data)
|
||||||
|
} catch (error) {
|
||||||
|
console.error('Agent scan proxy error:', error)
|
||||||
|
return NextResponse.json(
|
||||||
|
{ error: 'Scan fehlgeschlagen oder Timeout' },
|
||||||
|
{ status: 503 }
|
||||||
|
)
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -0,0 +1,91 @@
|
|||||||
|
'use client'
|
||||||
|
|
||||||
|
import React from 'react'
|
||||||
|
import type { FollowUpQuestion } from '../_hooks/useAgentAnalysis'
|
||||||
|
|
||||||
|
const SEVERITY_STYLE: Record<string, { border: string; bg: string; icon: string }> = {
|
||||||
|
high: { border: 'border-red-300', bg: 'bg-red-50', icon: '!!' },
|
||||||
|
medium: { border: 'border-yellow-300', bg: 'bg-yellow-50', icon: '!' },
|
||||||
|
low: { border: 'border-blue-300', bg: 'bg-blue-50', icon: 'i' },
|
||||||
|
}
|
||||||
|
|
||||||
|
interface Props {
|
||||||
|
questions: FollowUpQuestion[]
|
||||||
|
answers: Record<string, boolean>
|
||||||
|
onAnswer: (questionId: string, answer: boolean) => void
|
||||||
|
}
|
||||||
|
|
||||||
|
export function FollowUpQuestions({ questions, answers, onAnswer }: Props) {
|
||||||
|
const unanswered = questions.filter(q => answers[q.id] === undefined)
|
||||||
|
const answered = questions.filter(q => answers[q.id] !== undefined)
|
||||||
|
|
||||||
|
if (questions.length === 0) return null
|
||||||
|
|
||||||
|
return (
|
||||||
|
<div className="space-y-3">
|
||||||
|
<h4 className="text-sm font-medium text-gray-700 flex items-center gap-2">
|
||||||
|
<svg className="w-4 h-4 text-amber-500" fill="none" stroke="currentColor" viewBox="0 0 24 24">
|
||||||
|
<path strokeLinecap="round" strokeLinejoin="round" strokeWidth={2} d="M8.228 9c.549-1.165 2.03-2 3.772-2 2.21 0 4 1.343 4 3 0 1.4-1.278 2.575-3.006 2.907-.542.104-.994.54-.994 1.093m0 3h.01M21 12a9 9 0 11-18 0 9 9 0 0118 0z" />
|
||||||
|
</svg>
|
||||||
|
Rueckfragen zur manuellen Pruefung ({unanswered.length} offen)
|
||||||
|
</h4>
|
||||||
|
|
||||||
|
{/* Unanswered questions */}
|
||||||
|
{unanswered.map(q => {
|
||||||
|
const style = SEVERITY_STYLE[q.severity] || SEVERITY_STYLE.medium
|
||||||
|
return (
|
||||||
|
<div key={q.id} className={`border ${style.border} ${style.bg} rounded-lg p-4`}>
|
||||||
|
<div className="flex items-start gap-3">
|
||||||
|
<span className={`mt-0.5 w-6 h-6 rounded-full flex items-center justify-center text-xs font-bold ${
|
||||||
|
q.severity === 'high' ? 'bg-red-200 text-red-800' :
|
||||||
|
q.severity === 'medium' ? 'bg-yellow-200 text-yellow-800' :
|
||||||
|
'bg-blue-200 text-blue-800'
|
||||||
|
}`}>
|
||||||
|
{SEVERITY_STYLE[q.severity]?.icon || '?'}
|
||||||
|
</span>
|
||||||
|
<div className="flex-1">
|
||||||
|
<p className="text-sm font-medium text-gray-900">{q.question}</p>
|
||||||
|
<p className="text-xs text-gray-500 mt-1">Rechtsgrundlage: {q.legal_basis}</p>
|
||||||
|
<div className="flex gap-2 mt-3">
|
||||||
|
<button
|
||||||
|
onClick={() => onAnswer(q.id, true)}
|
||||||
|
className="px-4 py-1.5 text-sm bg-green-600 text-white rounded-md hover:bg-green-700 transition-colors"
|
||||||
|
>
|
||||||
|
Ja
|
||||||
|
</button>
|
||||||
|
<button
|
||||||
|
onClick={() => onAnswer(q.id, false)}
|
||||||
|
className="px-4 py-1.5 text-sm bg-red-600 text-white rounded-md hover:bg-red-700 transition-colors"
|
||||||
|
>
|
||||||
|
Nein
|
||||||
|
</button>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
)
|
||||||
|
})}
|
||||||
|
|
||||||
|
{/* Answered questions */}
|
||||||
|
{answered.map(q => {
|
||||||
|
const isYes = answers[q.id]
|
||||||
|
return (
|
||||||
|
<div key={q.id} className={`border rounded-lg p-3 ${isYes ? 'border-green-200 bg-green-50' : 'border-red-200 bg-red-50'}`}>
|
||||||
|
<div className="flex items-center gap-2">
|
||||||
|
<span className={`text-sm ${isYes ? 'text-green-700' : 'text-red-700'}`}>
|
||||||
|
{isYes ? '✓' : '✗'}
|
||||||
|
</span>
|
||||||
|
<span className="text-sm text-gray-700">{q.question}</span>
|
||||||
|
<span className={`ml-auto text-xs font-medium ${isYes ? 'text-green-600' : 'text-red-600'}`}>
|
||||||
|
{isYes ? 'Ja — OK' : 'Nein — Finding erstellt'}
|
||||||
|
</span>
|
||||||
|
</div>
|
||||||
|
{!isYes && (
|
||||||
|
<p className="text-xs text-red-600 mt-1 ml-6">{q.finding_if_no}</p>
|
||||||
|
)}
|
||||||
|
</div>
|
||||||
|
)
|
||||||
|
})}
|
||||||
|
</div>
|
||||||
|
)
|
||||||
|
}
|
||||||
@@ -0,0 +1,170 @@
|
|||||||
|
'use client'
|
||||||
|
|
||||||
|
import React, { useState } from 'react'
|
||||||
|
|
||||||
|
interface ServiceInfo {
|
||||||
|
name: string
|
||||||
|
category: string
|
||||||
|
provider: string
|
||||||
|
country: string
|
||||||
|
eu_adequate: boolean
|
||||||
|
requires_consent: boolean
|
||||||
|
legal_ref: string
|
||||||
|
in_dse: boolean
|
||||||
|
status: string
|
||||||
|
}
|
||||||
|
|
||||||
|
interface ScanFinding {
|
||||||
|
code: string
|
||||||
|
severity: string
|
||||||
|
text: string
|
||||||
|
correction: string
|
||||||
|
}
|
||||||
|
|
||||||
|
interface ScanData {
|
||||||
|
pages_scanned: number
|
||||||
|
services: ServiceInfo[]
|
||||||
|
findings: ScanFinding[]
|
||||||
|
ai_detected: boolean
|
||||||
|
chatbot_detected: boolean
|
||||||
|
chatbot_provider: string
|
||||||
|
missing_pages: Record<string, number>
|
||||||
|
email_status: string
|
||||||
|
}
|
||||||
|
|
||||||
|
const STATUS_ICON: Record<string, { icon: string; color: string }> = {
|
||||||
|
ok: { icon: '✓', color: 'text-green-600' },
|
||||||
|
undocumented: { icon: '✗', color: 'text-red-600' },
|
||||||
|
outdated: { icon: '~', color: 'text-yellow-600' },
|
||||||
|
}
|
||||||
|
|
||||||
|
const SEV_STYLE: Record<string, { bg: string; text: string }> = {
|
||||||
|
HIGH: { bg: 'bg-red-50 border-red-200', text: 'text-red-800' },
|
||||||
|
MEDIUM: { bg: 'bg-yellow-50 border-yellow-200', text: 'text-yellow-800' },
|
||||||
|
LOW: { bg: 'bg-blue-50 border-blue-200', text: 'text-blue-800' },
|
||||||
|
}
|
||||||
|
|
||||||
|
export function ScanResult({ data }: { data: ScanData }) {
|
||||||
|
const [expandedCorrection, setExpandedCorrection] = useState<string | null>(null)
|
||||||
|
|
||||||
|
const undocCount = data.services.filter(s => s.status === 'undocumented').length
|
||||||
|
const okCount = data.services.filter(s => s.status === 'ok').length
|
||||||
|
const outdatedCount = data.services.filter(s => s.status === 'outdated').length
|
||||||
|
const highCount = data.findings.filter(f => f.severity === 'HIGH').length
|
||||||
|
|
||||||
|
return (
|
||||||
|
<div className="space-y-5">
|
||||||
|
{/* Summary Bar */}
|
||||||
|
<div className="grid grid-cols-4 gap-3">
|
||||||
|
<div className="bg-gray-50 rounded-lg p-3 text-center">
|
||||||
|
<p className="text-2xl font-bold text-gray-900">{data.pages_scanned}</p>
|
||||||
|
<p className="text-xs text-gray-500">Seiten gescannt</p>
|
||||||
|
</div>
|
||||||
|
<div className="bg-green-50 rounded-lg p-3 text-center">
|
||||||
|
<p className="text-2xl font-bold text-green-700">{okCount}</p>
|
||||||
|
<p className="text-xs text-gray-500">Dokumentiert</p>
|
||||||
|
</div>
|
||||||
|
<div className="bg-red-50 rounded-lg p-3 text-center">
|
||||||
|
<p className="text-2xl font-bold text-red-700">{undocCount}</p>
|
||||||
|
<p className="text-xs text-gray-500">Nicht in DSE</p>
|
||||||
|
</div>
|
||||||
|
<div className="bg-yellow-50 rounded-lg p-3 text-center">
|
||||||
|
<p className="text-2xl font-bold text-yellow-700">{outdatedCount}</p>
|
||||||
|
<p className="text-xs text-gray-500">Veraltet</p>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
{/* AI / Chatbot Detection */}
|
||||||
|
<div className="flex gap-3">
|
||||||
|
<span className={`px-3 py-1 rounded-full text-xs font-medium ${data.ai_detected ? 'bg-purple-100 text-purple-800' : 'bg-gray-100 text-gray-600'}`}>
|
||||||
|
{data.ai_detected ? 'KI erkannt' : 'Keine KI erkannt'}
|
||||||
|
</span>
|
||||||
|
<span className={`px-3 py-1 rounded-full text-xs font-medium ${data.chatbot_detected ? 'bg-blue-100 text-blue-800' : 'bg-gray-100 text-gray-600'}`}>
|
||||||
|
{data.chatbot_detected ? `Chatbot: ${data.chatbot_provider}` : 'Kein Chatbot'}
|
||||||
|
</span>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
{/* Services Table */}
|
||||||
|
<div>
|
||||||
|
<h4 className="text-sm font-medium text-gray-700 mb-2">Dienstleister-Abgleich (SOLL/IST)</h4>
|
||||||
|
<div className="border rounded-lg overflow-hidden">
|
||||||
|
<table className="w-full text-sm">
|
||||||
|
<thead className="bg-gray-50">
|
||||||
|
<tr>
|
||||||
|
<th className="px-3 py-2 text-left text-xs font-medium text-gray-500">Status</th>
|
||||||
|
<th className="px-3 py-2 text-left text-xs font-medium text-gray-500">Dienst</th>
|
||||||
|
<th className="px-3 py-2 text-left text-xs font-medium text-gray-500">Land</th>
|
||||||
|
<th className="px-3 py-2 text-left text-xs font-medium text-gray-500">EU</th>
|
||||||
|
<th className="px-3 py-2 text-left text-xs font-medium text-gray-500">In DSE</th>
|
||||||
|
</tr>
|
||||||
|
</thead>
|
||||||
|
<tbody className="divide-y divide-gray-100">
|
||||||
|
{data.services.map((s, i) => {
|
||||||
|
const st = STATUS_ICON[s.status] || STATUS_ICON.ok
|
||||||
|
return (
|
||||||
|
<tr key={i} className={s.status === 'undocumented' ? 'bg-red-50' : ''}>
|
||||||
|
<td className={`px-3 py-2 font-bold ${st.color}`}>{st.icon}</td>
|
||||||
|
<td className="px-3 py-2">
|
||||||
|
<span className="font-medium text-gray-900">{s.name}</span>
|
||||||
|
<span className="text-gray-400 text-xs ml-2">{s.category}</span>
|
||||||
|
</td>
|
||||||
|
<td className="px-3 py-2 text-gray-600">{s.country}</td>
|
||||||
|
<td className="px-3 py-2">{s.eu_adequate ? '✓' : '✗'}</td>
|
||||||
|
<td className="px-3 py-2">{s.in_dse ? 'Ja' : <span className="text-red-600 font-medium">Nein</span>}</td>
|
||||||
|
</tr>
|
||||||
|
)
|
||||||
|
})}
|
||||||
|
</tbody>
|
||||||
|
</table>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
{/* Findings */}
|
||||||
|
{data.findings.length > 0 && (
|
||||||
|
<div>
|
||||||
|
<h4 className="text-sm font-medium text-gray-700 mb-2">
|
||||||
|
Findings ({data.findings.length}, davon {highCount} kritisch)
|
||||||
|
</h4>
|
||||||
|
<div className="space-y-2">
|
||||||
|
{data.findings.map((f, i) => {
|
||||||
|
const sev = SEV_STYLE[f.severity] || SEV_STYLE.MEDIUM
|
||||||
|
const isExpanded = expandedCorrection === f.code
|
||||||
|
return (
|
||||||
|
<div key={i} className={`border rounded-lg p-3 ${sev.bg}`}>
|
||||||
|
<div className="flex items-start gap-2">
|
||||||
|
<span className={`text-xs font-bold px-2 py-0.5 rounded ${sev.text} bg-white`}>
|
||||||
|
{f.severity}
|
||||||
|
</span>
|
||||||
|
<p className="text-sm text-gray-800 flex-1">{f.text}</p>
|
||||||
|
</div>
|
||||||
|
{f.correction && (
|
||||||
|
<div className="mt-2">
|
||||||
|
<button
|
||||||
|
onClick={() => setExpandedCorrection(isExpanded ? null : f.code)}
|
||||||
|
className="text-xs text-purple-600 hover:text-purple-800 font-medium"
|
||||||
|
>
|
||||||
|
{isExpanded ? '▼ Korrekturvorschlag ausblenden' : '▶ Korrekturvorschlag anzeigen'}
|
||||||
|
</button>
|
||||||
|
{isExpanded && (
|
||||||
|
<div className="mt-2 bg-white border border-gray-200 rounded-lg p-3 relative">
|
||||||
|
<pre className="text-xs text-gray-700 whitespace-pre-wrap font-sans">{f.correction}</pre>
|
||||||
|
<button
|
||||||
|
onClick={() => navigator.clipboard.writeText(f.correction)}
|
||||||
|
className="absolute top-2 right-2 text-xs bg-gray-100 hover:bg-gray-200 px-2 py-1 rounded"
|
||||||
|
title="Kopieren"
|
||||||
|
>
|
||||||
|
Kopieren
|
||||||
|
</button>
|
||||||
|
</div>
|
||||||
|
)}
|
||||||
|
</div>
|
||||||
|
)}
|
||||||
|
</div>
|
||||||
|
)
|
||||||
|
})}
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
)}
|
||||||
|
</div>
|
||||||
|
)
|
||||||
|
}
|
||||||
@@ -2,6 +2,14 @@
|
|||||||
|
|
||||||
import { useState } from 'react'
|
import { useState } from 'react'
|
||||||
|
|
||||||
|
export interface FollowUpQuestion {
|
||||||
|
id: string
|
||||||
|
question: string
|
||||||
|
legal_basis: string
|
||||||
|
severity: 'high' | 'medium' | 'low'
|
||||||
|
finding_if_no: string
|
||||||
|
}
|
||||||
|
|
||||||
export interface AnalysisResult {
|
export interface AnalysisResult {
|
||||||
url: string
|
url: string
|
||||||
classification: string
|
classification: string
|
||||||
@@ -14,6 +22,8 @@ export interface AnalysisResult {
|
|||||||
summary: string
|
summary: string
|
||||||
email_status: string
|
email_status: string
|
||||||
analyzed_at: string
|
analyzed_at: string
|
||||||
|
follow_up_questions: FollowUpQuestion[]
|
||||||
|
follow_up_answers: Record<string, boolean>
|
||||||
}
|
}
|
||||||
|
|
||||||
const ESCALATION_ROLES: Record<string, string> = {
|
const ESCALATION_ROLES: Record<string, string> = {
|
||||||
@@ -23,29 +33,22 @@ const ESCALATION_ROLES: Record<string, string> = {
|
|||||||
E3: 'DSB + Rechtsabteilung',
|
E3: 'DSB + Rechtsabteilung',
|
||||||
}
|
}
|
||||||
|
|
||||||
const SDK_HEADERS = {
|
|
||||||
'Content-Type': 'application/json',
|
|
||||||
'X-Tenant-ID': '9282a473-5c95-4b3a-bf78-0ecc0ec71d3e',
|
|
||||||
'X-User-ID': '00000000-0000-0000-0000-000000000001',
|
|
||||||
}
|
|
||||||
|
|
||||||
export function useAgentAnalysis() {
|
export function useAgentAnalysis() {
|
||||||
const [loading, setLoading] = useState(false)
|
const [loading, setLoading] = useState(false)
|
||||||
const [error, setError] = useState<string | null>(null)
|
const [error, setError] = useState<string | null>(null)
|
||||||
const [result, setResult] = useState<AnalysisResult | null>(null)
|
const [result, setResult] = useState<AnalysisResult | null>(null)
|
||||||
const [history, setHistory] = useState<AnalysisResult[]>([])
|
const [history, setHistory] = useState<AnalysisResult[]>([])
|
||||||
|
|
||||||
async function analyze(url: string) {
|
async function analyze(url: string, mode: string = 'post_launch') {
|
||||||
setLoading(true)
|
setLoading(true)
|
||||||
setError(null)
|
setError(null)
|
||||||
setResult(null)
|
setResult(null)
|
||||||
|
|
||||||
try {
|
try {
|
||||||
// Step 1: Fetch and classify
|
|
||||||
const fetchRes = await fetch('/api/sdk/v1/agent/analyze', {
|
const fetchRes = await fetch('/api/sdk/v1/agent/analyze', {
|
||||||
method: 'POST',
|
method: 'POST',
|
||||||
headers: { 'Content-Type': 'application/json' },
|
headers: { 'Content-Type': 'application/json' },
|
||||||
body: JSON.stringify({ url }),
|
body: JSON.stringify({ url, mode }),
|
||||||
})
|
})
|
||||||
|
|
||||||
if (!fetchRes.ok) {
|
if (!fetchRes.ok) {
|
||||||
@@ -65,6 +68,8 @@ export function useAgentAnalysis() {
|
|||||||
summary: data.summary || '',
|
summary: data.summary || '',
|
||||||
email_status: data.email_status || 'pending',
|
email_status: data.email_status || 'pending',
|
||||||
analyzed_at: new Date().toISOString(),
|
analyzed_at: new Date().toISOString(),
|
||||||
|
follow_up_questions: data.follow_up_questions || [],
|
||||||
|
follow_up_answers: {},
|
||||||
}
|
}
|
||||||
|
|
||||||
setResult(analysisResult)
|
setResult(analysisResult)
|
||||||
@@ -76,5 +81,26 @@ export function useAgentAnalysis() {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return { analyze, loading, error, result, history }
|
function answerFollowUp(questionId: string, answer: boolean) {
|
||||||
|
if (!result) return
|
||||||
|
const question = result.follow_up_questions.find(q => q.id === questionId)
|
||||||
|
const newAnswers = { ...result.follow_up_answers, [questionId]: answer }
|
||||||
|
const newFindings = [...result.findings]
|
||||||
|
|
||||||
|
// If user answered "no" → add the finding
|
||||||
|
if (!answer && question) {
|
||||||
|
newFindings.push(question.finding_if_no)
|
||||||
|
}
|
||||||
|
|
||||||
|
const updated = {
|
||||||
|
...result,
|
||||||
|
findings: newFindings,
|
||||||
|
follow_up_answers: newAnswers,
|
||||||
|
}
|
||||||
|
setResult(updated)
|
||||||
|
// Update history too
|
||||||
|
setHistory(prev => prev.map(h => h.analyzed_at === result.analyzed_at ? updated : h))
|
||||||
|
}
|
||||||
|
|
||||||
|
return { analyze, answerFollowUp, loading, error, result, history }
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -4,80 +4,142 @@ import React, { useState } from 'react'
|
|||||||
import { useAgentAnalysis } from './_hooks/useAgentAnalysis'
|
import { useAgentAnalysis } from './_hooks/useAgentAnalysis'
|
||||||
import { AnalysisResult } from './_components/AnalysisResult'
|
import { AnalysisResult } from './_components/AnalysisResult'
|
||||||
import { AnalysisHistory } from './_components/AnalysisHistory'
|
import { AnalysisHistory } from './_components/AnalysisHistory'
|
||||||
|
import { FollowUpQuestions } from './_components/FollowUpQuestions'
|
||||||
|
import { ScanResult } from './_components/ScanResult'
|
||||||
|
|
||||||
|
type AnalysisMode = 'pre_launch' | 'post_launch'
|
||||||
|
type AnalysisTab = 'quick' | 'scan'
|
||||||
|
|
||||||
|
const MODES: { id: AnalysisMode; label: string; desc: string; icon: string }[] = [
|
||||||
|
{ id: 'pre_launch', label: 'Internes Dokument', desc: 'Vor Veroeffentlichung pruefen', icon: '📋' },
|
||||||
|
{ id: 'post_launch', label: 'Live-Website', desc: 'Bereits online analysieren', icon: '🌐' },
|
||||||
|
]
|
||||||
|
|
||||||
|
const TABS: { id: AnalysisTab; label: string; desc: string }[] = [
|
||||||
|
{ id: 'quick', label: 'Schnellanalyse', desc: 'Einzelne Seite klassifizieren + bewerten' },
|
||||||
|
{ id: 'scan', label: 'Website-Scan', desc: 'Mehrere Seiten scannen + Dienstleister abgleichen' },
|
||||||
|
]
|
||||||
|
|
||||||
export default function AgentPage() {
|
export default function AgentPage() {
|
||||||
const [url, setUrl] = useState('')
|
const [url, setUrl] = useState('')
|
||||||
const { analyze, loading, error, result, history } = useAgentAnalysis()
|
const [mode, setMode] = useState<AnalysisMode>('post_launch')
|
||||||
|
const [tab, setTab] = useState<AnalysisTab>('quick')
|
||||||
|
const [scanLoading, setScanLoading] = useState(false)
|
||||||
|
const [scanError, setScanError] = useState<string | null>(null)
|
||||||
|
const [scanData, setScanData] = useState<any>(null)
|
||||||
|
const { analyze, answerFollowUp, loading, error, result, history } = useAgentAnalysis()
|
||||||
|
|
||||||
const handleSubmit = (e: React.FormEvent) => {
|
const handleSubmit = async (e: React.FormEvent) => {
|
||||||
e.preventDefault()
|
e.preventDefault()
|
||||||
if (!url.trim()) return
|
if (!url.trim()) return
|
||||||
analyze(url.trim())
|
|
||||||
|
if (tab === 'quick') {
|
||||||
|
analyze(url.trim(), mode)
|
||||||
|
} else {
|
||||||
|
setScanLoading(true)
|
||||||
|
setScanError(null)
|
||||||
|
setScanData(null)
|
||||||
|
try {
|
||||||
|
const res = await fetch('/api/sdk/v1/agent/scan', {
|
||||||
|
method: 'POST',
|
||||||
|
headers: { 'Content-Type': 'application/json' },
|
||||||
|
body: JSON.stringify({ url: url.trim(), mode }),
|
||||||
|
})
|
||||||
|
if (!res.ok) throw new Error(`Scan fehlgeschlagen: ${res.status}`)
|
||||||
|
setScanData(await res.json())
|
||||||
|
} catch (e) {
|
||||||
|
setScanError(e instanceof Error ? e.message : 'Unbekannter Fehler')
|
||||||
|
} finally {
|
||||||
|
setScanLoading(false)
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
const isLoading = tab === 'quick' ? loading : scanLoading
|
||||||
|
const currentError = tab === 'quick' ? error : scanError
|
||||||
|
|
||||||
return (
|
return (
|
||||||
<div className="space-y-6 max-w-4xl">
|
<div className="space-y-6 max-w-4xl">
|
||||||
{/* Header */}
|
|
||||||
<div>
|
<div>
|
||||||
<h1 className="text-2xl font-bold text-gray-900">Compliance Agent</h1>
|
<h1 className="text-2xl font-bold text-gray-900">Compliance Agent</h1>
|
||||||
<p className="text-gray-500 mt-1">
|
<p className="text-gray-500 mt-1">Analysiere Dokumente und Webseiten auf DSGVO-Konformitaet.</p>
|
||||||
Analysiere Webseiten auf DSGVO-Konformitaet. Der Agent holt das Dokument,
|
</div>
|
||||||
klassifiziert es, bewertet das Risiko und weist die Aufgabe der zustaendigen Rolle zu.
|
|
||||||
</p>
|
{/* Mode Selection */}
|
||||||
|
<div className="grid grid-cols-2 gap-3">
|
||||||
|
{MODES.map(m => (
|
||||||
|
<button key={m.id} onClick={() => setMode(m.id)}
|
||||||
|
className={`p-3 rounded-xl border-2 text-left transition-all ${
|
||||||
|
mode === m.id ? 'border-purple-500 bg-purple-50' : 'border-gray-200 hover:border-gray-300'}`}>
|
||||||
|
<div className="flex items-center gap-3">
|
||||||
|
<span className="text-xl">{m.icon}</span>
|
||||||
|
<div>
|
||||||
|
<p className={`text-sm font-semibold ${mode === m.id ? 'text-purple-900' : 'text-gray-900'}`}>{m.label}</p>
|
||||||
|
<p className="text-xs text-gray-500">{m.desc}</p>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</button>
|
||||||
|
))}
|
||||||
|
</div>
|
||||||
|
|
||||||
|
{/* Tab Selection */}
|
||||||
|
<div className="flex border-b border-gray-200">
|
||||||
|
{TABS.map(t => (
|
||||||
|
<button key={t.id} onClick={() => setTab(t.id)}
|
||||||
|
className={`px-4 py-2.5 text-sm font-medium border-b-2 transition-colors ${
|
||||||
|
tab === t.id
|
||||||
|
? 'border-purple-500 text-purple-700'
|
||||||
|
: 'border-transparent text-gray-500 hover:text-gray-700'}`}>
|
||||||
|
{t.label}
|
||||||
|
</button>
|
||||||
|
))}
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
{/* URL Input */}
|
{/* URL Input */}
|
||||||
<form onSubmit={handleSubmit} className="flex gap-3">
|
<form onSubmit={handleSubmit} className="flex gap-3">
|
||||||
<input
|
<input type="url" value={url} onChange={e => setUrl(e.target.value)}
|
||||||
type="url"
|
placeholder={tab === 'scan' ? 'https://www.example.com/' : 'https://example.com/datenschutz'}
|
||||||
value={url}
|
|
||||||
onChange={e => setUrl(e.target.value)}
|
|
||||||
placeholder="https://example.com/datenschutz"
|
|
||||||
className="flex-1 px-4 py-3 border border-gray-300 rounded-lg focus:ring-2 focus:ring-purple-500 focus:border-transparent text-sm"
|
className="flex-1 px-4 py-3 border border-gray-300 rounded-lg focus:ring-2 focus:ring-purple-500 focus:border-transparent text-sm"
|
||||||
disabled={loading}
|
disabled={isLoading} required />
|
||||||
required
|
<button type="submit" disabled={isLoading || !url.trim()}
|
||||||
/>
|
className="px-6 py-3 bg-purple-600 text-white rounded-lg hover:bg-purple-700 disabled:opacity-50 transition-colors flex items-center gap-2 text-sm font-medium">
|
||||||
<button
|
{isLoading ? (
|
||||||
type="submit"
|
<><svg className="animate-spin w-4 h-4" fill="none" viewBox="0 0 24 24">
|
||||||
disabled={loading || !url.trim()}
|
<circle className="opacity-25" cx="12" cy="12" r="10" stroke="currentColor" strokeWidth="4" />
|
||||||
className="px-6 py-3 bg-purple-600 text-white rounded-lg hover:bg-purple-700 disabled:opacity-50 disabled:cursor-not-allowed transition-colors flex items-center gap-2 text-sm font-medium"
|
<path className="opacity-75" fill="currentColor" d="M4 12a8 8 0 018-8V0C5.373 0 0 5.373 0 12h4z" />
|
||||||
>
|
</svg>{tab === 'scan' ? 'Scanne...' : 'Analysiere...'}</>
|
||||||
{loading ? (
|
) : tab === 'scan' ? 'Website scannen' : 'Analysieren'}
|
||||||
<>
|
|
||||||
<svg className="animate-spin w-4 h-4" fill="none" viewBox="0 0 24 24">
|
|
||||||
<circle className="opacity-25" cx="12" cy="12" r="10" stroke="currentColor" strokeWidth="4" />
|
|
||||||
<path className="opacity-75" fill="currentColor" d="M4 12a8 8 0 018-8V0C5.373 0 0 5.373 0 12h4z" />
|
|
||||||
</svg>
|
|
||||||
Analysiere...
|
|
||||||
</>
|
|
||||||
) : (
|
|
||||||
'Analysieren'
|
|
||||||
)}
|
|
||||||
</button>
|
</button>
|
||||||
</form>
|
</form>
|
||||||
|
|
||||||
{/* Error */}
|
{/* Error */}
|
||||||
{error && (
|
{currentError && (
|
||||||
<div className="bg-red-50 border border-red-200 rounded-lg p-4 text-sm text-red-700">
|
<div className="bg-red-50 border border-red-200 rounded-lg p-4 text-sm text-red-700">{currentError}</div>
|
||||||
{error}
|
|
||||||
</div>
|
|
||||||
)}
|
)}
|
||||||
|
|
||||||
{/* Result */}
|
{/* Quick Analysis Result */}
|
||||||
{result && (
|
{tab === 'quick' && result && (
|
||||||
<div className="bg-white border border-gray-200 rounded-xl p-6 shadow-sm">
|
<div className="bg-white border border-gray-200 rounded-xl p-6 shadow-sm space-y-6">
|
||||||
<AnalysisResult result={result} />
|
<AnalysisResult result={result} />
|
||||||
|
{result.follow_up_questions.length > 0 && (
|
||||||
|
<div className="border-t pt-4">
|
||||||
|
<FollowUpQuestions questions={result.follow_up_questions} answers={result.follow_up_answers} onAnswer={answerFollowUp} />
|
||||||
|
</div>
|
||||||
|
)}
|
||||||
</div>
|
</div>
|
||||||
)}
|
)}
|
||||||
|
|
||||||
{/* History */}
|
{/* Scan Result */}
|
||||||
<AnalysisHistory
|
{tab === 'scan' && scanData && (
|
||||||
history={history}
|
<div className="bg-white border border-gray-200 rounded-xl p-6 shadow-sm">
|
||||||
onSelect={r => {
|
<ScanResult data={scanData} />
|
||||||
setUrl(r.url)
|
</div>
|
||||||
analyze(r.url)
|
)}
|
||||||
}}
|
|
||||||
/>
|
{/* History (quick only) */}
|
||||||
|
{tab === 'quick' && (
|
||||||
|
<AnalysisHistory history={history} onSelect={r => { setUrl(r.url); analyze(r.url, mode) }} />
|
||||||
|
)}
|
||||||
</div>
|
</div>
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -108,7 +108,7 @@ export function EditorTab({
|
|||||||
{/* Variables */}
|
{/* Variables */}
|
||||||
<div className="flex flex-wrap gap-1.5">
|
<div className="flex flex-wrap gap-1.5">
|
||||||
<span className="text-xs text-gray-500 mr-1">Variablen:</span>
|
<span className="text-xs text-gray-500 mr-1">Variablen:</span>
|
||||||
{(template.variables || []).map(v => (
|
{(Array.isArray(template.variables) ? template.variables : []).map(v => (
|
||||||
<button
|
<button
|
||||||
key={v}
|
key={v}
|
||||||
onClick={() => onHtmlChange(html + `{{${v}}}`)}
|
onClick={() => onHtmlChange(html + `{{${v}}}`)}
|
||||||
|
|||||||
@@ -30,12 +30,12 @@ export function TemplateCard({ template, onEdit }: TemplateCardProps) {
|
|||||||
<p className="text-xs text-gray-500 mt-2 line-clamp-2">{template.description}</p>
|
<p className="text-xs text-gray-500 mt-2 line-clamp-2">{template.description}</p>
|
||||||
)}
|
)}
|
||||||
<div className="mt-3 flex flex-wrap gap-1">
|
<div className="mt-3 flex flex-wrap gap-1">
|
||||||
{(template.variables || []).slice(0, 4).map(v => (
|
{(Array.isArray(template.variables) ? template.variables : []).slice(0, 4).map(v => (
|
||||||
<span key={v} className="px-1.5 py-0.5 bg-gray-50 text-gray-500 rounded text-xs font-mono">
|
<span key={v} className="px-1.5 py-0.5 bg-gray-50 text-gray-500 rounded text-xs font-mono">
|
||||||
{`{{${v}}}`}
|
{`{{${v}}}`}
|
||||||
</span>
|
</span>
|
||||||
))}
|
))}
|
||||||
{(template.variables || []).length > 4 && (
|
{Array.isArray(template.variables) && template.variables.length > 4 && (
|
||||||
<span className="text-xs text-gray-400">+{template.variables.length - 4}</span>
|
<span className="text-xs text-gray-400">+{template.variables.length - 4}</span>
|
||||||
)}
|
)}
|
||||||
</div>
|
</div>
|
||||||
|
|||||||
@@ -54,6 +54,7 @@ export function SidebarModuleList({ collapsed, projectId, pendingCRCount }: Side
|
|||||||
<AdditionalModuleItem href="/sdk/ai-act" icon={<svg className="w-5 h-5" fill="none" stroke="currentColor" viewBox="0 0 24 24"><path strokeLinecap="round" strokeLinejoin="round" strokeWidth={2} d="M13 10V3L4 14h7v7l9-11h-7z" /></svg>} label="AI Act" isActive={pathname?.startsWith('/sdk/ai-act') ?? false} collapsed={collapsed} projectId={projectId} />
|
<AdditionalModuleItem href="/sdk/ai-act" icon={<svg className="w-5 h-5" fill="none" stroke="currentColor" viewBox="0 0 24 24"><path strokeLinecap="round" strokeLinejoin="round" strokeWidth={2} d="M13 10V3L4 14h7v7l9-11h-7z" /></svg>} label="AI Act" isActive={pathname?.startsWith('/sdk/ai-act') ?? false} collapsed={collapsed} projectId={projectId} />
|
||||||
<AdditionalModuleItem href="/sdk/ai-registration" icon={<svg className="w-5 h-5" fill="none" stroke="currentColor" viewBox="0 0 24 24"><path strokeLinecap="round" strokeLinejoin="round" strokeWidth={2} d="M19 21V5a2 2 0 00-2-2H7a2 2 0 00-2 2v16m14 0h2m-2 0h-5m-9 0H3m2 0h5M9 7h1m-1 4h1m4-4h1m-1 4h1m-5 10v-5a1 1 0 011-1h2a1 1 0 011 1v5m-4 0h4" /></svg>} label="EU Registrierung" isActive={pathname?.startsWith('/sdk/ai-registration') ?? false} collapsed={collapsed} projectId={projectId} />
|
<AdditionalModuleItem href="/sdk/ai-registration" icon={<svg className="w-5 h-5" fill="none" stroke="currentColor" viewBox="0 0 24 24"><path strokeLinecap="round" strokeLinejoin="round" strokeWidth={2} d="M19 21V5a2 2 0 00-2-2H7a2 2 0 00-2 2v16m14 0h2m-2 0h-5m-9 0H3m2 0h5M9 7h1m-1 4h1m4-4h1m-1 4h1m-5 10v-5a1 1 0 011-1h2a1 1 0 011 1v5m-4 0h4" /></svg>} label="EU Registrierung" isActive={pathname?.startsWith('/sdk/ai-registration') ?? false} collapsed={collapsed} projectId={projectId} />
|
||||||
<AdditionalModuleItem href="/sdk/compliance-optimizer" icon={<svg className="w-5 h-5" fill="none" stroke="currentColor" viewBox="0 0 24 24"><path strokeLinecap="round" strokeLinejoin="round" strokeWidth={2} d="M13 7h8m0 0v8m0-8l-8 8-4-4-6 6" /></svg>} label="Compliance Optimizer" isActive={pathname?.startsWith('/sdk/compliance-optimizer') ?? false} collapsed={collapsed} projectId={projectId} />
|
<AdditionalModuleItem href="/sdk/compliance-optimizer" icon={<svg className="w-5 h-5" fill="none" stroke="currentColor" viewBox="0 0 24 24"><path strokeLinecap="round" strokeLinejoin="round" strokeWidth={2} d="M13 7h8m0 0v8m0-8l-8 8-4-4-6 6" /></svg>} label="Compliance Optimizer" isActive={pathname?.startsWith('/sdk/compliance-optimizer') ?? false} collapsed={collapsed} projectId={projectId} />
|
||||||
|
<AdditionalModuleItem href="/sdk/agent" icon={<svg className="w-5 h-5" fill="none" stroke="currentColor" viewBox="0 0 24 24"><path strokeLinecap="round" strokeLinejoin="round" strokeWidth={2} d="M9.75 17L9 20l-1 1h8l-1-1-.75-3M3 13h18M5 17h14a2 2 0 002-2V5a2 2 0 00-2-2H5a2 2 0 00-2 2v10a2 2 0 002 2z" /></svg>} label="Compliance Agent" isActive={pathname?.startsWith('/sdk/agent') ?? false} collapsed={collapsed} projectId={projectId} />
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
{/* Payment / Terminal */}
|
{/* Payment / Terminal */}
|
||||||
|
|||||||
@@ -41,6 +41,15 @@ SDK_HEADERS = {
|
|||||||
class AnalyzeRequest(BaseModel):
|
class AnalyzeRequest(BaseModel):
|
||||||
url: str
|
url: str
|
||||||
recipient: str = "dsb@breakpilot.local"
|
recipient: str = "dsb@breakpilot.local"
|
||||||
|
mode: str = "post_launch" # "pre_launch" or "post_launch"
|
||||||
|
|
||||||
|
|
||||||
|
class FollowUpQuestion(BaseModel):
|
||||||
|
id: str
|
||||||
|
question: str
|
||||||
|
legal_basis: str
|
||||||
|
severity: str # "high", "medium", "low"
|
||||||
|
finding_if_no: str # Finding text if user answers "no"
|
||||||
|
|
||||||
|
|
||||||
class AnalyzeResponse(BaseModel):
|
class AnalyzeResponse(BaseModel):
|
||||||
@@ -55,6 +64,7 @@ class AnalyzeResponse(BaseModel):
|
|||||||
summary: str
|
summary: str
|
||||||
email_status: str
|
email_status: str
|
||||||
analyzed_at: str
|
analyzed_at: str
|
||||||
|
follow_up_questions: list[FollowUpQuestion] = []
|
||||||
|
|
||||||
|
|
||||||
@router.post("/analyze", response_model=AnalyzeResponse)
|
@router.post("/analyze", response_model=AnalyzeResponse)
|
||||||
@@ -62,7 +72,7 @@ async def analyze_url(req: AnalyzeRequest):
|
|||||||
"""Fetch URL, classify, assess compliance, and notify responsible role."""
|
"""Fetch URL, classify, assess compliance, and notify responsible role."""
|
||||||
async with httpx.AsyncClient(timeout=60.0) as client:
|
async with httpx.AsyncClient(timeout=60.0) as client:
|
||||||
# Step 1: Fetch and clean
|
# Step 1: Fetch and clean
|
||||||
text = await _fetch_and_clean(client, req.url)
|
text, raw_html = await _fetch_and_clean(client, req.url)
|
||||||
|
|
||||||
# Step 2: Classify via SDK LLM
|
# Step 2: Classify via SDK LLM
|
||||||
classification = await _classify(client, text)
|
classification = await _classify(client, text)
|
||||||
@@ -74,15 +84,27 @@ async def analyze_url(req: AnalyzeRequest):
|
|||||||
esc_level = assessment.get("escalation_level", "E0")
|
esc_level = assessment.get("escalation_level", "E0")
|
||||||
role = ESCALATION_ROLES.get(esc_level, ESCALATION_ROLES["E0"])
|
role = ESCALATION_ROLES.get(esc_level, ESCALATION_ROLES["E0"])
|
||||||
|
|
||||||
# Step 5: Build summary
|
# Step 5: Website compliance checks (§312k BGB etc.)
|
||||||
|
site_findings, follow_ups = await _check_website_compliance(client, req.url, raw_html)
|
||||||
|
|
||||||
|
# Step 6: Merge findings
|
||||||
findings = assessment.get("triggered_rules", [])
|
findings = assessment.get("triggered_rules", [])
|
||||||
controls = assessment.get("required_controls", [])
|
controls = assessment.get("required_controls", [])
|
||||||
summary = _build_summary(req.url, classification, assessment, role)
|
findings_str = _to_string_list(findings) + site_findings
|
||||||
|
controls_str = _to_string_list(controls)
|
||||||
|
|
||||||
# Step 6: Send notification
|
# Escalate if website checks found issues
|
||||||
|
if site_findings and esc_level == "E0":
|
||||||
|
esc_level = "E1"
|
||||||
|
role = ESCALATION_ROLES["E1"]
|
||||||
|
|
||||||
|
summary = _build_summary(req.url, classification, assessment, role, findings_str, controls_str, req.mode)
|
||||||
|
|
||||||
|
# Step 7: Send notification
|
||||||
|
mode_label = "INTERNE PRUEFUNG" if req.mode == "pre_launch" else "LIVE-WEBSITE"
|
||||||
email_result = send_email(
|
email_result = send_email(
|
||||||
recipient=req.recipient,
|
recipient=req.recipient,
|
||||||
subject=f"Compliance-Finding: {classification} — {req.url[:60]}",
|
subject=f"[{mode_label}] Compliance-Finding: {classification} — {req.url[:60]}",
|
||||||
body_html=f"<div>{summary}</div>",
|
body_html=f"<div>{summary}</div>",
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -93,16 +115,17 @@ async def analyze_url(req: AnalyzeRequest):
|
|||||||
risk_score=assessment.get("risk_score", 0),
|
risk_score=assessment.get("risk_score", 0),
|
||||||
escalation_level=esc_level,
|
escalation_level=esc_level,
|
||||||
responsible_role=role,
|
responsible_role=role,
|
||||||
findings=findings if isinstance(findings, list) else [str(findings)],
|
findings=findings_str,
|
||||||
required_controls=controls if isinstance(controls, list) else [str(controls)],
|
required_controls=controls_str,
|
||||||
summary=summary,
|
summary=summary,
|
||||||
email_status=email_result.get("status", "failed"),
|
email_status=email_result.get("status", "failed"),
|
||||||
analyzed_at=datetime.now(timezone.utc).isoformat(),
|
analyzed_at=datetime.now(timezone.utc).isoformat(),
|
||||||
|
follow_up_questions=follow_ups,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
async def _fetch_and_clean(client: httpx.AsyncClient, url: str) -> str:
|
async def _fetch_and_clean(client: httpx.AsyncClient, url: str) -> tuple[str, str]:
|
||||||
"""Fetch URL and strip HTML to plain text."""
|
"""Fetch URL. Returns (clean_text, raw_html)."""
|
||||||
resp = await client.get(url, follow_redirects=True, headers={
|
resp = await client.get(url, follow_redirects=True, headers={
|
||||||
"User-Agent": "BreakPilot-Compliance-Agent/1.0",
|
"User-Agent": "BreakPilot-Compliance-Agent/1.0",
|
||||||
})
|
})
|
||||||
@@ -112,7 +135,7 @@ async def _fetch_and_clean(client: httpx.AsyncClient, url: str) -> str:
|
|||||||
clean = re.sub(r"<[^>]+>", " ", clean)
|
clean = re.sub(r"<[^>]+>", " ", clean)
|
||||||
clean = re.sub(r" ", " ", clean)
|
clean = re.sub(r" ", " ", clean)
|
||||||
clean = re.sub(r"\s+", " ", clean).strip()
|
clean = re.sub(r"\s+", " ", clean).strip()
|
||||||
return clean[:4000]
|
return clean[:4000], html
|
||||||
|
|
||||||
|
|
||||||
async def _classify(client: httpx.AsyncClient, text: str) -> str:
|
async def _classify(client: httpx.AsyncClient, text: str) -> str:
|
||||||
@@ -204,6 +227,117 @@ async def _assess(client: httpx.AsyncClient, text: str, classification: str) ->
|
|||||||
return {"risk_level": "unknown", "risk_score": 0, "escalation_level": "E0"}
|
return {"risk_level": "unknown", "risk_score": 0, "escalation_level": "E0"}
|
||||||
|
|
||||||
|
|
||||||
|
async def _check_website_compliance(
|
||||||
|
client: httpx.AsyncClient, url: str, html: str,
|
||||||
|
) -> tuple[list[str], list[FollowUpQuestion]]:
|
||||||
|
"""Scan public website for consumer protection compliance (§312k BGB etc.)."""
|
||||||
|
findings: list[str] = []
|
||||||
|
follow_ups: list[FollowUpQuestion] = []
|
||||||
|
html_lower = html.lower()
|
||||||
|
base_domain = re.sub(r"https?://([^/]+).*", r"\1", url)
|
||||||
|
|
||||||
|
# --- §312k BGB: Kündigungsbutton ---
|
||||||
|
cancel_patterns = [
|
||||||
|
r'href="[^"]*(?:kuendig|kündig|cancel|vertrag.?beenden|abo.?beenden|mitgliedschaft.?beenden)[^"]*"',
|
||||||
|
r'(?:kündigen|kuendigen|vertrag beenden|abo beenden|mitgliedschaft kündigen)',
|
||||||
|
]
|
||||||
|
has_cancel_link = any(re.search(p, html_lower) for p in cancel_patterns)
|
||||||
|
|
||||||
|
# Also check common cancel URLs
|
||||||
|
cancel_urls_to_probe = [
|
||||||
|
f"https://{base_domain}/kuendigen",
|
||||||
|
f"https://{base_domain}/cancel",
|
||||||
|
f"https://{base_domain}/vertrag-kuendigen",
|
||||||
|
f"https://{base_domain}/abo-kuendigen",
|
||||||
|
f"https://{base_domain}/account/cancel",
|
||||||
|
]
|
||||||
|
if not has_cancel_link:
|
||||||
|
for probe_url in cancel_urls_to_probe:
|
||||||
|
try:
|
||||||
|
probe = await client.head(probe_url, follow_redirects=True, timeout=5.0)
|
||||||
|
if probe.status_code < 400:
|
||||||
|
has_cancel_link = True
|
||||||
|
break
|
||||||
|
except Exception:
|
||||||
|
continue
|
||||||
|
|
||||||
|
if not has_cancel_link:
|
||||||
|
findings.append(
|
||||||
|
"[§312k BGB] Kein oeffentlich sichtbarer Kuendigungsbutton gefunden. "
|
||||||
|
"Seit 01.07.2022 muessen online geschlossene Vertraege mit max. 2 Klicks kuendbar sein."
|
||||||
|
)
|
||||||
|
follow_ups.append(FollowUpQuestion(
|
||||||
|
id="cancel_button_312k",
|
||||||
|
question="Koennen Sie nach Login im Kundenbereich innerhalb von 2 Klicks Ihren Vertrag kuendigen?",
|
||||||
|
legal_basis="§ 312k BGB (Kuendigungsbutton), Omnibus-Richtlinie (EU) 2019/2161",
|
||||||
|
severity="high",
|
||||||
|
finding_if_no=(
|
||||||
|
"[§312k BGB] VERSTOSS: Kein funktionaler Kuendigungsbutton vorhanden. "
|
||||||
|
"Der Anbieter ist verpflichtet, einen leicht auffindbaren Kuendigungsbutton "
|
||||||
|
"bereitzustellen (max. 2 Klicks). Ein Zwang zur telefonischen Kuendigung "
|
||||||
|
"oder Kuendigung per Brief ist rechtswidrig."
|
||||||
|
),
|
||||||
|
))
|
||||||
|
|
||||||
|
# --- Impressumspflicht (§5 TMG / §18 MStV) ---
|
||||||
|
imprint_patterns = [
|
||||||
|
r'href="[^"]*(?:impressum|imprint|legal.?notice|about.?us/legal)[^"]*"',
|
||||||
|
r'>impressum<',
|
||||||
|
]
|
||||||
|
has_imprint = any(re.search(p, html_lower) for p in imprint_patterns)
|
||||||
|
if not has_imprint:
|
||||||
|
findings.append(
|
||||||
|
"[§5 TMG] Kein Impressum-Link auf der Seite gefunden. "
|
||||||
|
"Geschaeftsmaessige Online-Dienste muessen ein leicht erreichbares Impressum bereitstellen."
|
||||||
|
)
|
||||||
|
|
||||||
|
# --- Datenschutzerklaerung verlinkt? ---
|
||||||
|
privacy_patterns = [
|
||||||
|
r'href="[^"]*(?:datenschutz|privacy|dsgvo)[^"]*"',
|
||||||
|
r'>datenschutz<',
|
||||||
|
]
|
||||||
|
has_privacy = any(re.search(p, html_lower) for p in privacy_patterns)
|
||||||
|
if not has_privacy:
|
||||||
|
findings.append(
|
||||||
|
"[Art. 13 DSGVO] Kein Link zur Datenschutzerklaerung gefunden. "
|
||||||
|
"Nutzer muessen ueber die Verarbeitung personenbezogener Daten informiert werden."
|
||||||
|
)
|
||||||
|
|
||||||
|
# --- Cookie-Consent-Banner ---
|
||||||
|
cookie_patterns = [
|
||||||
|
r'(?:cookie.?consent|cookie.?banner|consent.?manager|didomi|cookiebot|onetrust|usercentrics)',
|
||||||
|
r'(?:gdpr|dsgvo).?(?:consent|einwilligung)',
|
||||||
|
]
|
||||||
|
has_cookie_consent = any(re.search(p, html_lower) for p in cookie_patterns)
|
||||||
|
if not has_cookie_consent:
|
||||||
|
follow_ups.append(FollowUpQuestion(
|
||||||
|
id="cookie_consent",
|
||||||
|
question="Wird beim ersten Besuch der Website ein Cookie-Consent-Banner angezeigt?",
|
||||||
|
legal_basis="§ 25 TDDDG (ehem. TTDSG), Art. 5(3) ePrivacy-Richtlinie",
|
||||||
|
severity="medium",
|
||||||
|
finding_if_no=(
|
||||||
|
"[§25 TDDDG] Kein Cookie-Consent-Banner erkannt. "
|
||||||
|
"Vor dem Setzen nicht-essentieller Cookies ist eine Einwilligung erforderlich."
|
||||||
|
),
|
||||||
|
))
|
||||||
|
|
||||||
|
return findings, follow_ups
|
||||||
|
|
||||||
|
|
||||||
|
def _to_string_list(items: list) -> list[str]:
|
||||||
|
"""Convert list of dicts or strings to list of strings."""
|
||||||
|
result = []
|
||||||
|
for item in (items or []):
|
||||||
|
if isinstance(item, dict):
|
||||||
|
# UCCA returns {code, category, description} or {id, name, description}
|
||||||
|
desc = item.get("description", item.get("name", item.get("code", str(item))))
|
||||||
|
code = item.get("code", item.get("id", ""))
|
||||||
|
result.append(f"[{code}] {desc}" if code else str(desc))
|
||||||
|
else:
|
||||||
|
result.append(str(item))
|
||||||
|
return result
|
||||||
|
|
||||||
|
|
||||||
def _risk_to_escalation(risk_level: str) -> str:
|
def _risk_to_escalation(risk_level: str) -> str:
|
||||||
"""Map UCCA risk level to escalation level."""
|
"""Map UCCA risk level to escalation level."""
|
||||||
mapping = {
|
mapping = {
|
||||||
@@ -215,19 +349,30 @@ def _risk_to_escalation(risk_level: str) -> str:
|
|||||||
return mapping.get(risk_level.upper() if risk_level else "", "E0")
|
return mapping.get(risk_level.upper() if risk_level else "", "E0")
|
||||||
|
|
||||||
|
|
||||||
def _build_summary(url: str, classification: str, assessment: dict, role: str) -> str:
|
def _build_summary(
|
||||||
"""Build a German manager summary."""
|
url: str, classification: str, assessment: dict, role: str,
|
||||||
|
findings_str: list[str], controls_str: list[str],
|
||||||
|
mode: str = "post_launch",
|
||||||
|
) -> str:
|
||||||
|
"""Build a German manager summary, adapted to pre/post-launch context."""
|
||||||
risk = assessment.get("risk_level", "unbekannt")
|
risk = assessment.get("risk_level", "unbekannt")
|
||||||
score = assessment.get("risk_score", 0)
|
score = assessment.get("risk_score", 0)
|
||||||
findings = assessment.get("triggered_rules", [])
|
|
||||||
controls = assessment.get("required_controls", [])
|
|
||||||
recommendation = assessment.get("recommendation", "")
|
recommendation = assessment.get("recommendation", "")
|
||||||
dsfa = assessment.get("dsfa_recommended", False)
|
dsfa = assessment.get("dsfa_recommended", False)
|
||||||
|
is_live = mode == "post_launch"
|
||||||
|
|
||||||
findings_text = "\n".join(f"- {f}" for f in findings[:5]) if findings else "Keine"
|
findings_text = "\n".join(f"- {f}" for f in findings_str[:5]) if findings_str else "Keine"
|
||||||
controls_text = "\n".join(f"- {c}" for c in controls[:5]) if controls else "Keine"
|
controls_text = "\n".join(f"- {c}" for c in controls_str[:5]) if controls_str else "Keine"
|
||||||
|
|
||||||
|
mode_header = (
|
||||||
|
"PRUEFUNG LIVE-WEBSITE — Das Dokument ist bereits oeffentlich zugaenglich."
|
||||||
|
if is_live else
|
||||||
|
"INTERNE PRUEFUNG — Das Dokument ist noch nicht veroeffentlicht."
|
||||||
|
)
|
||||||
|
|
||||||
parts = [
|
parts = [
|
||||||
|
mode_header,
|
||||||
|
"",
|
||||||
f"Dokumenttyp: {classification}",
|
f"Dokumenttyp: {classification}",
|
||||||
f"Quelle: {url}",
|
f"Quelle: {url}",
|
||||||
f"Risikobewertung: {risk} ({score}/100)",
|
f"Risikobewertung: {risk} ({score}/100)",
|
||||||
@@ -238,6 +383,19 @@ def _build_summary(url: str, classification: str, assessment: dict, role: str) -
|
|||||||
"",
|
"",
|
||||||
f"Erforderliche Massnahmen:\n{controls_text}",
|
f"Erforderliche Massnahmen:\n{controls_text}",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
if is_live and findings_str:
|
||||||
|
parts.extend([
|
||||||
|
"",
|
||||||
|
"ACHTUNG: Diese Maengel sind bereits oeffentlich sichtbar. "
|
||||||
|
"Sofortige Nachbesserung empfohlen um Abmahnrisiken zu minimieren.",
|
||||||
|
])
|
||||||
|
elif not is_live and controls_str:
|
||||||
|
parts.extend([
|
||||||
|
"",
|
||||||
|
"Empfehlung: Implementieren Sie die erforderlichen Kontrollen vor der Veroeffentlichung.",
|
||||||
|
])
|
||||||
|
|
||||||
if recommendation:
|
if recommendation:
|
||||||
parts.extend(["", f"Empfehlung: {recommendation}"])
|
parts.extend(["", f"Weitere Empfehlung: {recommendation}"])
|
||||||
return "\n".join(parts)
|
return "\n".join(parts)
|
||||||
|
|||||||
@@ -0,0 +1,302 @@
|
|||||||
|
"""
|
||||||
|
Agent Website Scan Routes — deep scan endpoint that performs multi-page
|
||||||
|
website analysis with SOLL/IST service comparison.
|
||||||
|
|
||||||
|
POST /api/compliance/agent/scan
|
||||||
|
"""
|
||||||
|
|
||||||
|
import logging
|
||||||
|
import os
|
||||||
|
from datetime import datetime, timezone
|
||||||
|
|
||||||
|
import httpx
|
||||||
|
from fastapi import APIRouter
|
||||||
|
from pydantic import BaseModel
|
||||||
|
|
||||||
|
from compliance.services.website_scanner import scan_website, DetectedService
|
||||||
|
from compliance.services.dse_service_extractor import extract_dse_services, compare_services
|
||||||
|
from compliance.services.smtp_sender import send_email
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
router = APIRouter(prefix="/compliance/agent", tags=["agent"])
|
||||||
|
|
||||||
|
SDK_URL = os.environ.get("AI_SDK_URL", "http://bp-compliance-ai-sdk:8090")
|
||||||
|
TENANT_ID = "9282a473-5c95-4b3a-bf78-0ecc0ec71d3e"
|
||||||
|
USER_ID = "00000000-0000-0000-0000-000000000001"
|
||||||
|
SDK_HEADERS = {
|
||||||
|
"Content-Type": "application/json",
|
||||||
|
"X-Tenant-ID": TENANT_ID,
|
||||||
|
"X-User-ID": USER_ID,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class ScanRequest(BaseModel):
|
||||||
|
url: str
|
||||||
|
mode: str = "post_launch"
|
||||||
|
recipient: str = "dsb@breakpilot.local"
|
||||||
|
|
||||||
|
|
||||||
|
class ServiceInfo(BaseModel):
|
||||||
|
name: str
|
||||||
|
category: str
|
||||||
|
provider: str
|
||||||
|
country: str
|
||||||
|
eu_adequate: bool
|
||||||
|
requires_consent: bool
|
||||||
|
legal_ref: str
|
||||||
|
in_dse: bool
|
||||||
|
status: str # "ok", "undocumented", "outdated"
|
||||||
|
|
||||||
|
|
||||||
|
class ScanFinding(BaseModel):
|
||||||
|
code: str
|
||||||
|
severity: str
|
||||||
|
text: str
|
||||||
|
correction: str = ""
|
||||||
|
|
||||||
|
|
||||||
|
class ScanResponse(BaseModel):
|
||||||
|
url: str
|
||||||
|
pages_scanned: int
|
||||||
|
services: list[ServiceInfo]
|
||||||
|
findings: list[ScanFinding]
|
||||||
|
ai_detected: bool
|
||||||
|
chatbot_detected: bool
|
||||||
|
chatbot_provider: str
|
||||||
|
missing_pages: dict
|
||||||
|
summary: str
|
||||||
|
email_status: str
|
||||||
|
scanned_at: str
|
||||||
|
|
||||||
|
|
||||||
|
@router.post("/scan", response_model=ScanResponse)
|
||||||
|
async def scan_website_endpoint(req: ScanRequest):
|
||||||
|
"""Deep website scan: multi-page crawl + SOLL/IST service comparison."""
|
||||||
|
is_live = req.mode == "post_launch"
|
||||||
|
|
||||||
|
# Step 1: Scan website (5-10 pages)
|
||||||
|
scan = await scan_website(req.url)
|
||||||
|
logger.info("Scanned %d pages, found %d services", len(scan.pages_scanned), len(scan.detected_services))
|
||||||
|
|
||||||
|
# Step 2: Fetch privacy policy text for SOLL extraction
|
||||||
|
dse_text = await _fetch_dse_text(req.url, scan.pages_scanned)
|
||||||
|
|
||||||
|
# Step 3: Extract services mentioned in DSE via LLM
|
||||||
|
dse_services = await extract_dse_services(dse_text) if dse_text else []
|
||||||
|
logger.info("DSE mentions %d services", len(dse_services))
|
||||||
|
|
||||||
|
# Step 4: SOLL/IST comparison
|
||||||
|
detected_dicts = [_service_to_dict(s) for s in scan.detected_services]
|
||||||
|
comparison = compare_services(detected_dicts, dse_services)
|
||||||
|
|
||||||
|
# Step 5: Generate findings
|
||||||
|
services_info, findings = _build_findings(comparison, scan, is_live)
|
||||||
|
|
||||||
|
# Step 6: Generate corrections for pre-launch mode
|
||||||
|
if not is_live and findings:
|
||||||
|
await _add_corrections(findings, dse_text)
|
||||||
|
|
||||||
|
# Step 7: Build summary
|
||||||
|
summary = _build_scan_summary(req.url, scan, comparison, findings, is_live)
|
||||||
|
|
||||||
|
# Step 8: Send notification
|
||||||
|
mode_label = "INTERNE PRUEFUNG" if not is_live else "LIVE-WEBSITE"
|
||||||
|
email_result = send_email(
|
||||||
|
recipient=req.recipient,
|
||||||
|
subject=f"[{mode_label}] Website-Scan: {req.url[:50]}",
|
||||||
|
body_html=f"<pre>{summary}</pre>",
|
||||||
|
)
|
||||||
|
|
||||||
|
return ScanResponse(
|
||||||
|
url=req.url,
|
||||||
|
pages_scanned=len(scan.pages_scanned),
|
||||||
|
services=services_info,
|
||||||
|
findings=findings,
|
||||||
|
ai_detected=len(scan.ai_mentions) > 0,
|
||||||
|
chatbot_detected=scan.chatbot_detected,
|
||||||
|
chatbot_provider=scan.chatbot_provider,
|
||||||
|
missing_pages=scan.missing_pages,
|
||||||
|
summary=summary,
|
||||||
|
email_status=email_result.get("status", "failed"),
|
||||||
|
scanned_at=datetime.now(timezone.utc).isoformat(),
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
async def _fetch_dse_text(url: str, scanned_pages: list[str]) -> str:
|
||||||
|
"""Find and fetch the privacy policy page text."""
|
||||||
|
import re
|
||||||
|
# Find DSE URL from scanned pages
|
||||||
|
dse_url = None
|
||||||
|
for page in scanned_pages:
|
||||||
|
if re.search(r"datenschutz|privacy|dsgvo", page, re.IGNORECASE):
|
||||||
|
dse_url = page
|
||||||
|
break
|
||||||
|
if not dse_url:
|
||||||
|
dse_url = url # Fallback to provided URL
|
||||||
|
|
||||||
|
try:
|
||||||
|
async with httpx.AsyncClient(timeout=15.0, follow_redirects=True) as client:
|
||||||
|
resp = await client.get(dse_url, headers={"User-Agent": "BreakPilot-Compliance-Agent/1.0"})
|
||||||
|
html = resp.text
|
||||||
|
clean = re.sub(r"<(script|style)[^>]*>.*?</\1>", "", html, flags=re.DOTALL | re.IGNORECASE)
|
||||||
|
clean = re.sub(r"<[^>]+>", " ", clean)
|
||||||
|
clean = re.sub(r"\s+", " ", clean).strip()
|
||||||
|
return clean[:4000]
|
||||||
|
except Exception:
|
||||||
|
return ""
|
||||||
|
|
||||||
|
|
||||||
|
def _service_to_dict(svc: DetectedService) -> dict:
|
||||||
|
return {
|
||||||
|
"id": svc.id, "name": svc.name, "category": svc.category,
|
||||||
|
"provider": svc.provider, "country": svc.country,
|
||||||
|
"eu_adequate": svc.eu_adequate, "requires_consent": svc.requires_consent,
|
||||||
|
"legal_ref": svc.legal_ref,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def _build_findings(
|
||||||
|
comparison: dict, scan, is_live: bool,
|
||||||
|
) -> tuple[list[ServiceInfo], list[ScanFinding]]:
|
||||||
|
"""Build service info list and findings from comparison."""
|
||||||
|
services = []
|
||||||
|
findings = []
|
||||||
|
|
||||||
|
# Undocumented services (on website, NOT in DSE)
|
||||||
|
for svc in comparison["undocumented"]:
|
||||||
|
services.append(ServiceInfo(
|
||||||
|
name=svc["name"], category=svc.get("category", "other"),
|
||||||
|
provider=svc.get("provider", ""), country=svc.get("country", ""),
|
||||||
|
eu_adequate=svc.get("eu_adequate", False),
|
||||||
|
requires_consent=svc.get("requires_consent", False),
|
||||||
|
legal_ref=svc.get("legal_ref", ""), in_dse=False, status="undocumented",
|
||||||
|
))
|
||||||
|
severity = "HIGH" if is_live else "MEDIUM"
|
||||||
|
findings.append(ScanFinding(
|
||||||
|
code=f"DSE-MISSING-{svc['id'].upper()}",
|
||||||
|
severity=severity,
|
||||||
|
text=f"{svc['name']} ({svc.get('provider', '')}, {svc.get('country', '')}) "
|
||||||
|
f"ist auf der Website eingebunden aber NICHT in der Datenschutzerklaerung "
|
||||||
|
f"dokumentiert (Art. 13 DSGVO).",
|
||||||
|
))
|
||||||
|
|
||||||
|
# Documented services (OK)
|
||||||
|
for item in comparison["documented"]:
|
||||||
|
svc = item["detected"]
|
||||||
|
services.append(ServiceInfo(
|
||||||
|
name=svc["name"], category=svc.get("category", "other"),
|
||||||
|
provider=svc.get("provider", ""), country=svc.get("country", ""),
|
||||||
|
eu_adequate=svc.get("eu_adequate", False),
|
||||||
|
requires_consent=svc.get("requires_consent", False),
|
||||||
|
legal_ref=svc.get("legal_ref", ""), in_dse=True, status="ok",
|
||||||
|
))
|
||||||
|
# Check third-country transfer
|
||||||
|
if not svc.get("eu_adequate", False):
|
||||||
|
findings.append(ScanFinding(
|
||||||
|
code=f"TRANSFER-{svc['id'].upper()}",
|
||||||
|
severity="MEDIUM",
|
||||||
|
text=f"{svc['name']} ({svc.get('country', '')}) — Drittlandtransfer. "
|
||||||
|
f"Pruefen ob SCCs oder Angemessenheitsbeschluss dokumentiert sind.",
|
||||||
|
))
|
||||||
|
|
||||||
|
# Outdated services (in DSE, NOT on website)
|
||||||
|
for svc in comparison["outdated"]:
|
||||||
|
services.append(ServiceInfo(
|
||||||
|
name=svc["name"], category="other",
|
||||||
|
provider=svc.get("provider", ""), country=svc.get("country", ""),
|
||||||
|
eu_adequate=True, requires_consent=False,
|
||||||
|
legal_ref="", in_dse=True, status="outdated",
|
||||||
|
))
|
||||||
|
findings.append(ScanFinding(
|
||||||
|
code=f"DSE-OUTDATED-{svc['name'].upper().replace(' ', '_')[:20]}",
|
||||||
|
severity="LOW",
|
||||||
|
text=f"{svc['name']} in Datenschutzerklaerung erwaehnt aber auf der Website "
|
||||||
|
f"nicht mehr gefunden. Eintrag bei naechster Aktualisierung entfernen.",
|
||||||
|
))
|
||||||
|
|
||||||
|
# Missing pages (e.g., /impressum returns 404)
|
||||||
|
for page_url, status_code in scan.missing_pages.items():
|
||||||
|
if "impressum" in page_url.lower():
|
||||||
|
findings.append(ScanFinding(
|
||||||
|
code="MISSING-IMPRESSUM",
|
||||||
|
severity="HIGH",
|
||||||
|
text=f"Impressum-Seite gibt HTTP {status_code} zurueck (§5 TMG Verstoss).",
|
||||||
|
))
|
||||||
|
|
||||||
|
return services, findings
|
||||||
|
|
||||||
|
|
||||||
|
async def _add_corrections(findings: list[ScanFinding], dse_text: str) -> None:
|
||||||
|
"""Add correction suggestions for pre-launch mode via LLM."""
|
||||||
|
for finding in findings:
|
||||||
|
if finding.severity in ("HIGH", "MEDIUM") and "MISSING" in finding.code:
|
||||||
|
service_name = finding.code.replace("DSE-MISSING-", "").replace("_", " ").title()
|
||||||
|
try:
|
||||||
|
async with httpx.AsyncClient(timeout=30.0) as client:
|
||||||
|
resp = await client.post(f"{SDK_URL}/sdk/v1/llm/chat", headers=SDK_HEADERS, json={
|
||||||
|
"messages": [
|
||||||
|
{"role": "system", "content": (
|
||||||
|
"/no_think\n"
|
||||||
|
"Du bist Datenschutzexperte. Erstelle einen einbaufertigen "
|
||||||
|
"Textbaustein fuer eine deutsche Datenschutzerklaerung fuer "
|
||||||
|
f"den Dienst '{service_name}'. Enthalte: Ueberschrift, "
|
||||||
|
"Anbietername, Zweck, Rechtsgrundlage nach DSGVO, "
|
||||||
|
"Drittlandtransfer-Hinweis wenn noetig, "
|
||||||
|
"Widerspruchsmoeglichkeit. Max 150 Woerter."
|
||||||
|
)},
|
||||||
|
{"role": "user", "content": f"Erstelle DSE-Textbaustein fuer: {service_name}"},
|
||||||
|
],
|
||||||
|
})
|
||||||
|
data = resp.json()
|
||||||
|
import re
|
||||||
|
raw = (
|
||||||
|
data.get("response", "")
|
||||||
|
or (data.get("message", {}) or {}).get("content", "")
|
||||||
|
or ""
|
||||||
|
).strip()
|
||||||
|
raw = re.sub(r"<think>.*?</think>", "", raw, flags=re.DOTALL).strip()
|
||||||
|
if raw:
|
||||||
|
finding.correction = raw
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning("Correction generation failed for %s: %s", service_name, e)
|
||||||
|
|
||||||
|
|
||||||
|
def _build_scan_summary(
|
||||||
|
url: str, scan, comparison: dict, findings: list[ScanFinding], is_live: bool,
|
||||||
|
) -> str:
|
||||||
|
"""Build German scan summary."""
|
||||||
|
mode = "PRUEFUNG LIVE-WEBSITE" if is_live else "INTERNE PRUEFUNG"
|
||||||
|
n_undoc = len(comparison["undocumented"])
|
||||||
|
n_ok = len(comparison["documented"])
|
||||||
|
n_outdated = len(comparison["outdated"])
|
||||||
|
n_findings = len(findings)
|
||||||
|
high = sum(1 for f in findings if f.severity == "HIGH")
|
||||||
|
|
||||||
|
parts = [
|
||||||
|
f"{mode} — Website-Scan",
|
||||||
|
f"URL: {url}",
|
||||||
|
f"Seiten gescannt: {len(scan.pages_scanned)}",
|
||||||
|
"",
|
||||||
|
f"Dienstleister-Abgleich (DSE vs. Website):",
|
||||||
|
f" Korrekt dokumentiert: {n_ok}",
|
||||||
|
f" NICHT in DSE (Verstoss): {n_undoc}",
|
||||||
|
f" Veraltet in DSE: {n_outdated}",
|
||||||
|
"",
|
||||||
|
f"Findings: {n_findings} ({high} mit hoher Prioritaet)",
|
||||||
|
]
|
||||||
|
|
||||||
|
if findings:
|
||||||
|
parts.append("")
|
||||||
|
for f in findings[:10]:
|
||||||
|
marker = "!!" if f.severity == "HIGH" else "!" if f.severity == "MEDIUM" else "i"
|
||||||
|
parts.append(f" [{marker}] {f.text}")
|
||||||
|
|
||||||
|
if is_live and high > 0:
|
||||||
|
parts.extend([
|
||||||
|
"",
|
||||||
|
"ACHTUNG: Verstoesse auf einer bereits veroeffentlichten Website. "
|
||||||
|
"Sofortige Korrektur empfohlen.",
|
||||||
|
])
|
||||||
|
|
||||||
|
return "\n".join(parts)
|
||||||
@@ -0,0 +1,127 @@
|
|||||||
|
"""
|
||||||
|
DSE Service Extractor — extracts mentioned third-party services from
|
||||||
|
a privacy policy text using LLM (Qwen) and compares against detected services.
|
||||||
|
|
||||||
|
Produces SOLL/IST comparison: what's in the DSE vs. what's on the website.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import logging
|
||||||
|
import os
|
||||||
|
import re
|
||||||
|
|
||||||
|
import httpx
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
SDK_URL = os.environ.get("AI_SDK_URL", "http://bp-compliance-ai-sdk:8090")
|
||||||
|
TENANT_ID = "9282a473-5c95-4b3a-bf78-0ecc0ec71d3e"
|
||||||
|
USER_ID = "00000000-0000-0000-0000-000000000001"
|
||||||
|
|
||||||
|
SDK_HEADERS = {
|
||||||
|
"Content-Type": "application/json",
|
||||||
|
"X-Tenant-ID": TENANT_ID,
|
||||||
|
"X-User-ID": USER_ID,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
async def extract_dse_services(dse_text: str) -> list[dict]:
|
||||||
|
"""Extract mentioned services from privacy policy text via LLM."""
|
||||||
|
prompt = (
|
||||||
|
"/no_think\n"
|
||||||
|
"Extrahiere aus dieser Datenschutzerklaerung ALLE erwaehnten Dienstleister, "
|
||||||
|
"Tools und externen Dienste. Fuer jeden nenne:\n"
|
||||||
|
"- name: Name des Dienstes (z.B. 'Google Analytics')\n"
|
||||||
|
"- purpose: Zweck (z.B. 'Webanalyse')\n"
|
||||||
|
"- country: Land/Sitz (z.B. 'USA')\n"
|
||||||
|
"- legal_basis: Genannte Rechtsgrundlage (z.B. 'Einwilligung')\n\n"
|
||||||
|
"Antworte als JSON-Array. Wenn keine Dienstleister erwaehnt werden, "
|
||||||
|
"antworte mit [].\n"
|
||||||
|
"Beispiel: [{\"name\": \"Google Analytics\", \"purpose\": \"Webanalyse\", "
|
||||||
|
"\"country\": \"USA\", \"legal_basis\": \"Einwilligung\"}]"
|
||||||
|
)
|
||||||
|
try:
|
||||||
|
async with httpx.AsyncClient(timeout=60.0) as client:
|
||||||
|
resp = await client.post(f"{SDK_URL}/sdk/v1/llm/chat", headers=SDK_HEADERS, json={
|
||||||
|
"messages": [
|
||||||
|
{"role": "system", "content": prompt},
|
||||||
|
{"role": "user", "content": dse_text[:3500]},
|
||||||
|
],
|
||||||
|
})
|
||||||
|
data = resp.json()
|
||||||
|
raw = (
|
||||||
|
data.get("response", "")
|
||||||
|
or (data.get("message", {}) or {}).get("content", "")
|
||||||
|
or ""
|
||||||
|
).strip()
|
||||||
|
raw = re.sub(r"<think>.*?</think>", "", raw, flags=re.DOTALL).strip()
|
||||||
|
# Extract JSON array from response
|
||||||
|
match = re.search(r"\[.*\]", raw, re.DOTALL)
|
||||||
|
if match:
|
||||||
|
import json
|
||||||
|
return json.loads(match.group())
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning("DSE service extraction failed: %s", e)
|
||||||
|
return []
|
||||||
|
|
||||||
|
|
||||||
|
def compare_services(
|
||||||
|
detected: list[dict], dse_services: list[dict],
|
||||||
|
) -> dict:
|
||||||
|
"""Compare detected website services against DSE-mentioned services.
|
||||||
|
|
||||||
|
Returns dict with three categories:
|
||||||
|
- undocumented: on website but NOT in DSE (Art. 13 violation)
|
||||||
|
- outdated: in DSE but NOT on website (cleanup)
|
||||||
|
- documented: on website AND in DSE (OK, check details)
|
||||||
|
"""
|
||||||
|
# Normalize names for matching
|
||||||
|
def normalize(name: str) -> str:
|
||||||
|
return re.sub(r"[^a-z0-9]", "", name.lower())
|
||||||
|
|
||||||
|
detected_names = {normalize(d["name"]): d for d in detected}
|
||||||
|
dse_names = {normalize(d["name"]): d for d in dse_services}
|
||||||
|
|
||||||
|
undocumented = []
|
||||||
|
documented = []
|
||||||
|
outdated = []
|
||||||
|
|
||||||
|
for key, svc in detected_names.items():
|
||||||
|
# Skip CMP — consent managers don't need DSE mention
|
||||||
|
if svc.get("category") == "other" and svc.get("id") == "cmp":
|
||||||
|
continue
|
||||||
|
matched = False
|
||||||
|
for dse_key, dse_svc in dse_names.items():
|
||||||
|
if key == dse_key or _fuzzy_match(svc["name"], dse_svc["name"]):
|
||||||
|
documented.append({"detected": svc, "dse": dse_svc, "status": "ok"})
|
||||||
|
matched = True
|
||||||
|
break
|
||||||
|
if not matched:
|
||||||
|
undocumented.append(svc)
|
||||||
|
|
||||||
|
for key, dse_svc in dse_names.items():
|
||||||
|
matched = False
|
||||||
|
for det_key in detected_names:
|
||||||
|
if key == det_key or _fuzzy_match(dse_svc["name"], detected_names[det_key]["name"]):
|
||||||
|
matched = True
|
||||||
|
break
|
||||||
|
if not matched:
|
||||||
|
outdated.append(dse_svc)
|
||||||
|
|
||||||
|
return {
|
||||||
|
"undocumented": undocumented,
|
||||||
|
"documented": documented,
|
||||||
|
"outdated": outdated,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def _fuzzy_match(a: str, b: str) -> bool:
|
||||||
|
"""Simple fuzzy matching — checks if one name contains the core of the other."""
|
||||||
|
a_lower = a.lower()
|
||||||
|
b_lower = b.lower()
|
||||||
|
# Direct substring
|
||||||
|
if a_lower in b_lower or b_lower in a_lower:
|
||||||
|
return True
|
||||||
|
# Core word match (e.g., "Google" in "Google Analytics" and "Google Ireland")
|
||||||
|
a_words = set(re.findall(r"\w{4,}", a_lower))
|
||||||
|
b_words = set(re.findall(r"\w{4,}", b_lower))
|
||||||
|
return bool(a_words & b_words)
|
||||||
@@ -0,0 +1,248 @@
|
|||||||
|
"""
|
||||||
|
Website Scanner — scans multiple pages of a website for third-party services,
|
||||||
|
chatbots, tracking, AI indicators, and compares against privacy policy.
|
||||||
|
|
||||||
|
Used by the Compliance Agent for SOLL/IST analysis.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import logging
|
||||||
|
import re
|
||||||
|
from dataclasses import dataclass, field
|
||||||
|
from urllib.parse import urljoin, urlparse
|
||||||
|
|
||||||
|
import httpx
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
USER_AGENT = "BreakPilot-Compliance-Agent/1.0"
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class DetectedService:
|
||||||
|
id: str
|
||||||
|
name: str
|
||||||
|
category: str # "tracking", "chatbot", "cdn", "payment", "marketing", "other"
|
||||||
|
provider: str
|
||||||
|
country: str
|
||||||
|
eu_adequate: bool
|
||||||
|
requires_consent: bool
|
||||||
|
legal_ref: str
|
||||||
|
found_on: str = "" # URL where detected
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class ScanResult:
|
||||||
|
pages_scanned: list[str] = field(default_factory=list)
|
||||||
|
detected_services: list[DetectedService] = field(default_factory=list)
|
||||||
|
ai_mentions: list[str] = field(default_factory=list)
|
||||||
|
chatbot_detected: bool = False
|
||||||
|
chatbot_provider: str = ""
|
||||||
|
missing_pages: dict = field(default_factory=dict) # url -> status_code
|
||||||
|
|
||||||
|
|
||||||
|
# ── Service Registry ──────────────────────────────────────────────────────────
|
||||||
|
# Each entry: regex pattern -> service metadata
|
||||||
|
SERVICE_REGISTRY: dict[str, dict] = {
|
||||||
|
# --- Tracking & Analytics ---
|
||||||
|
r"google.?analytics|gtag\(|UA-\d+|G-\w{5,}": {
|
||||||
|
"id": "google_analytics", "name": "Google Analytics", "category": "tracking",
|
||||||
|
"provider": "Google LLC", "country": "US", "eu_adequate": False,
|
||||||
|
"requires_consent": True, "legal_ref": "Art. 44-49 DSGVO, §25 TDDDG",
|
||||||
|
},
|
||||||
|
r"googletagmanager|gtm\.js": {
|
||||||
|
"id": "google_tag_manager", "name": "Google Tag Manager", "category": "tracking",
|
||||||
|
"provider": "Google LLC", "country": "US", "eu_adequate": False,
|
||||||
|
"requires_consent": True, "legal_ref": "Art. 44-49 DSGVO",
|
||||||
|
},
|
||||||
|
r"facebook\.net/.*fbevents|fbq\(": {
|
||||||
|
"id": "facebook_pixel", "name": "Meta/Facebook Pixel", "category": "marketing",
|
||||||
|
"provider": "Meta Platforms", "country": "US", "eu_adequate": False,
|
||||||
|
"requires_consent": True, "legal_ref": "Art. 44-49 DSGVO, §25 TDDDG",
|
||||||
|
},
|
||||||
|
r"hotjar\.com|_hjSettings": {
|
||||||
|
"id": "hotjar", "name": "Hotjar", "category": "tracking",
|
||||||
|
"provider": "Hotjar Ltd", "country": "MT", "eu_adequate": True,
|
||||||
|
"requires_consent": True, "legal_ref": "§25 TDDDG (Session Recording)",
|
||||||
|
},
|
||||||
|
r"clarity\.ms": {
|
||||||
|
"id": "ms_clarity", "name": "Microsoft Clarity", "category": "tracking",
|
||||||
|
"provider": "Microsoft", "country": "US", "eu_adequate": False,
|
||||||
|
"requires_consent": True, "legal_ref": "§25 TDDDG (Session Replay), Art. 44 DSGVO",
|
||||||
|
},
|
||||||
|
r"matomo|piwik": {
|
||||||
|
"id": "matomo", "name": "Matomo", "category": "tracking",
|
||||||
|
"provider": "InnoCraft/Self-hosted", "country": "EU/Self", "eu_adequate": True,
|
||||||
|
"requires_consent": False, "legal_ref": "Cookieless moeglich, §25 TDDDG",
|
||||||
|
},
|
||||||
|
r"plausible\.io": {
|
||||||
|
"id": "plausible", "name": "Plausible Analytics", "category": "tracking",
|
||||||
|
"provider": "Plausible Insights", "country": "EE", "eu_adequate": True,
|
||||||
|
"requires_consent": False, "legal_ref": "EU-Anbieter, cookieless",
|
||||||
|
},
|
||||||
|
# --- CDN & Fonts ---
|
||||||
|
r"fonts\.googleapis\.com|fonts\.gstatic\.com": {
|
||||||
|
"id": "google_fonts", "name": "Google Fonts (remote)", "category": "cdn",
|
||||||
|
"provider": "Google LLC", "country": "US", "eu_adequate": False,
|
||||||
|
"requires_consent": True, "legal_ref": "LG Muenchen I, Az. 3 O 17493/20",
|
||||||
|
},
|
||||||
|
r"cdn\.cloudflare\.com|cdnjs\.cloudflare\.com": {
|
||||||
|
"id": "cloudflare_cdn", "name": "Cloudflare CDN", "category": "cdn",
|
||||||
|
"provider": "Cloudflare Inc", "country": "US", "eu_adequate": False,
|
||||||
|
"requires_consent": False, "legal_ref": "Art. 44-49 DSGVO, berechtigtes Interesse",
|
||||||
|
},
|
||||||
|
# --- Chatbots ---
|
||||||
|
r"widget\.intercom\.io|intercomcdn": {
|
||||||
|
"id": "intercom", "name": "Intercom", "category": "chatbot",
|
||||||
|
"provider": "Intercom Inc", "country": "US", "eu_adequate": False,
|
||||||
|
"requires_consent": True, "legal_ref": "Art. 44-49 DSGVO, KI-gestuetzt",
|
||||||
|
},
|
||||||
|
r"tidio\.co|tidioChatApi": {
|
||||||
|
"id": "tidio", "name": "Tidio Chat", "category": "chatbot",
|
||||||
|
"provider": "Tidio LLC", "country": "PL", "eu_adequate": True,
|
||||||
|
"requires_consent": False, "legal_ref": "EU-Anbieter",
|
||||||
|
},
|
||||||
|
r"zendesk\.com/embeddable|zdassets": {
|
||||||
|
"id": "zendesk", "name": "Zendesk", "category": "chatbot",
|
||||||
|
"provider": "Zendesk Inc", "country": "US", "eu_adequate": False,
|
||||||
|
"requires_consent": True, "legal_ref": "Art. 44-49 DSGVO",
|
||||||
|
},
|
||||||
|
# --- Payment ---
|
||||||
|
r"js\.stripe\.com|stripe\.com/v3": {
|
||||||
|
"id": "stripe", "name": "Stripe", "category": "payment",
|
||||||
|
"provider": "Stripe Inc", "country": "US", "eu_adequate": False,
|
||||||
|
"requires_consent": False, "legal_ref": "Art. 6(1)(b) Vertragserfuellung, SCCs",
|
||||||
|
},
|
||||||
|
r"paypal\.com/sdk|paypalobjects": {
|
||||||
|
"id": "paypal", "name": "PayPal", "category": "payment",
|
||||||
|
"provider": "PayPal Holdings", "country": "US", "eu_adequate": False,
|
||||||
|
"requires_consent": False, "legal_ref": "Art. 6(1)(b) Vertragserfuellung",
|
||||||
|
},
|
||||||
|
r"klarna\.com|klarna-payments": {
|
||||||
|
"id": "klarna", "name": "Klarna", "category": "payment",
|
||||||
|
"provider": "Klarna AB", "country": "SE", "eu_adequate": True,
|
||||||
|
"requires_consent": False, "legal_ref": "EU, aber Art. 22 DSGVO bei Bonitaetspruefung!",
|
||||||
|
},
|
||||||
|
# --- Captcha ---
|
||||||
|
r"recaptcha|grecaptcha": {
|
||||||
|
"id": "recaptcha", "name": "Google reCAPTCHA", "category": "other",
|
||||||
|
"provider": "Google LLC", "country": "US", "eu_adequate": False,
|
||||||
|
"requires_consent": True, "legal_ref": "Art. 44-49 DSGVO, §25 TDDDG",
|
||||||
|
},
|
||||||
|
# --- Video ---
|
||||||
|
r"youtube\.com/embed|youtube-nocookie|ytimg": {
|
||||||
|
"id": "youtube", "name": "YouTube", "category": "other",
|
||||||
|
"provider": "Google LLC", "country": "US", "eu_adequate": False,
|
||||||
|
"requires_consent": True, "legal_ref": "Art. 44-49 DSGVO, 2-Klick empfohlen",
|
||||||
|
},
|
||||||
|
# --- Consent Management ---
|
||||||
|
r"didomi|cookiebot|onetrust|usercentrics|consentmanager|quantcast": {
|
||||||
|
"id": "cmp", "name": "Consent Management Platform", "category": "other",
|
||||||
|
"provider": "Various", "country": "EU", "eu_adequate": True,
|
||||||
|
"requires_consent": False, "legal_ref": "CMP vorhanden — gut",
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
AI_TEXT_PATTERNS = [
|
||||||
|
r"k(?:ue|ü)nstliche.?intelligenz",
|
||||||
|
r"artificial.?intelligence",
|
||||||
|
r"machine.?learning",
|
||||||
|
r"maschinelles.?lernen",
|
||||||
|
r"KI.?gest(?:ue|ü)tzt",
|
||||||
|
r"AI.?powered",
|
||||||
|
r"chatgpt|openai",
|
||||||
|
r"deep.?learning",
|
||||||
|
r"neural.?net",
|
||||||
|
r"automatisierte.?entscheidung",
|
||||||
|
]
|
||||||
|
|
||||||
|
FOOTER_LINK_PATTERNS = [
|
||||||
|
(r'href="([^"]*(?:impressum|imprint|legal-notice)[^"]*)"', "impressum"),
|
||||||
|
(r'href="([^"]*(?:datenschutz|privacy|dsgvo)[^"]*)"', "datenschutz"),
|
||||||
|
(r'href="([^"]*(?:agb|terms|nutzungsbedingungen)[^"]*)"', "agb"),
|
||||||
|
(r'href="([^"]*(?:cookie)[^"]*)"', "cookies"),
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
async def scan_website(base_url: str) -> ScanResult:
|
||||||
|
"""Scan a website: start page + footer links for services and AI indicators."""
|
||||||
|
result = ScanResult()
|
||||||
|
parsed = urlparse(base_url)
|
||||||
|
origin = f"{parsed.scheme}://{parsed.netloc}"
|
||||||
|
|
||||||
|
async with httpx.AsyncClient(timeout=10.0, follow_redirects=True) as client:
|
||||||
|
# 1. Fetch start page
|
||||||
|
start_html = await _fetch_page(client, origin, result)
|
||||||
|
if not start_html:
|
||||||
|
return result
|
||||||
|
|
||||||
|
# 2. Discover footer links
|
||||||
|
page_urls = {origin}
|
||||||
|
page_urls.add(base_url) # Also scan the provided URL
|
||||||
|
for pattern, _ in FOOTER_LINK_PATTERNS:
|
||||||
|
for match in re.finditer(pattern, start_html, re.IGNORECASE):
|
||||||
|
href = match.group(1)
|
||||||
|
if href.startswith("/"):
|
||||||
|
href = urljoin(origin, href)
|
||||||
|
if href.startswith(origin):
|
||||||
|
page_urls.add(href)
|
||||||
|
|
||||||
|
# 3. Scan all pages (max 10)
|
||||||
|
for url in list(page_urls)[:10]:
|
||||||
|
html = start_html if url == origin else await _fetch_page(client, url, result)
|
||||||
|
if html:
|
||||||
|
_detect_services(html, url, result)
|
||||||
|
_detect_ai_mentions(html, url, result)
|
||||||
|
|
||||||
|
# Deduplicate services
|
||||||
|
seen = set()
|
||||||
|
unique = []
|
||||||
|
for svc in result.detected_services:
|
||||||
|
if svc.id not in seen:
|
||||||
|
seen.add(svc.id)
|
||||||
|
unique.append(svc)
|
||||||
|
result.detected_services = unique
|
||||||
|
|
||||||
|
result.chatbot_detected = any(s.category == "chatbot" for s in result.detected_services)
|
||||||
|
if result.chatbot_detected:
|
||||||
|
result.chatbot_provider = next(
|
||||||
|
s.name for s in result.detected_services if s.category == "chatbot"
|
||||||
|
)
|
||||||
|
|
||||||
|
return result
|
||||||
|
|
||||||
|
|
||||||
|
async def _fetch_page(
|
||||||
|
client: httpx.AsyncClient, url: str, result: ScanResult,
|
||||||
|
) -> str:
|
||||||
|
"""Fetch a single page. Returns HTML or empty string on failure."""
|
||||||
|
try:
|
||||||
|
resp = await client.get(url, headers={"User-Agent": USER_AGENT})
|
||||||
|
result.pages_scanned.append(url)
|
||||||
|
if resp.status_code >= 400:
|
||||||
|
result.missing_pages[url] = resp.status_code
|
||||||
|
return ""
|
||||||
|
return resp.text
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning("Failed to fetch %s: %s", url, e)
|
||||||
|
return ""
|
||||||
|
|
||||||
|
|
||||||
|
def _detect_services(html: str, url: str, result: ScanResult) -> None:
|
||||||
|
"""Detect third-party services in HTML."""
|
||||||
|
for pattern, meta in SERVICE_REGISTRY.items():
|
||||||
|
if re.search(pattern, html, re.IGNORECASE):
|
||||||
|
result.detected_services.append(DetectedService(
|
||||||
|
found_on=url, **meta,
|
||||||
|
))
|
||||||
|
|
||||||
|
|
||||||
|
def _detect_ai_mentions(html: str, url: str, result: ScanResult) -> None:
|
||||||
|
"""Detect AI/ML text mentions in page content."""
|
||||||
|
# Strip scripts/styles first for text-only search
|
||||||
|
clean = re.sub(r"<(script|style)[^>]*>.*?</\1>", "", html, flags=re.DOTALL | re.IGNORECASE)
|
||||||
|
clean = re.sub(r"<[^>]+>", " ", clean)
|
||||||
|
for pattern in AI_TEXT_PATTERNS:
|
||||||
|
match = re.search(pattern, clean, re.IGNORECASE)
|
||||||
|
if match:
|
||||||
|
context = clean[max(0, match.start() - 40):match.end() + 40].strip()
|
||||||
|
result.ai_mentions.append(f"{url}: ...{context}...")
|
||||||
@@ -44,6 +44,7 @@ from compliance.api.company_profile_routes import router as company_profile_rout
|
|||||||
# Agent (ZeroClaw compliance agent)
|
# Agent (ZeroClaw compliance agent)
|
||||||
from compliance.api.agent_notification_routes import router as agent_notify_router
|
from compliance.api.agent_notification_routes import router as agent_notify_router
|
||||||
from compliance.api.agent_analyze_routes import router as agent_analyze_router
|
from compliance.api.agent_analyze_routes import router as agent_analyze_router
|
||||||
|
from compliance.api.agent_scan_routes import router as agent_scan_router
|
||||||
|
|
||||||
# Middleware
|
# Middleware
|
||||||
from middleware import (
|
from middleware import (
|
||||||
@@ -142,6 +143,7 @@ app.include_router(company_profile_router, prefix="/api")
|
|||||||
# Agent (ZeroClaw compliance agent → analyze + email via SMTP)
|
# Agent (ZeroClaw compliance agent → analyze + email via SMTP)
|
||||||
app.include_router(agent_notify_router, prefix="/api")
|
app.include_router(agent_notify_router, prefix="/api")
|
||||||
app.include_router(agent_analyze_router, prefix="/api")
|
app.include_router(agent_analyze_router, prefix="/api")
|
||||||
|
app.include_router(agent_scan_router, prefix="/api")
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
|
|||||||
@@ -0,0 +1,793 @@
|
|||||||
|
# Plan: Control Relevance Filter — Generische Controls kontextsensitiv filtern
|
||||||
|
|
||||||
|
## Problem
|
||||||
|
|
||||||
|
Die UCCA-Engine empfiehlt Controls pauschal basierend auf Intake-Flags (Boolean-Felder wie
|
||||||
|
`personal_data: true`, `marketing: true`). Sie prueft NICHT, ob der analysierte Text die
|
||||||
|
Bedingungen fuer einen spezifischen Control tatsaechlich erfuellt.
|
||||||
|
|
||||||
|
### Konkretes Beispiel (Opodo-Test, 2026-04-28)
|
||||||
|
|
||||||
|
- **Control:** `[C_TRANSPARENCY] Nutzer informieren dass sie mit KI interagieren`
|
||||||
|
- **Quelle:** AI Act Art. 52 — nur relevant wenn KI eingesetzt wird
|
||||||
|
- **Opodo sagt:** "automated processing" (kann regelbasierte Software sein, muss keine KI sein)
|
||||||
|
- **Ergebnis:** False Positive — Control wird empfohlen obwohl kein KI-Einsatz belegt ist
|
||||||
|
|
||||||
|
### Skalierung
|
||||||
|
|
||||||
|
Von ~166.740 Controls in der RAG-Datenbank wird ein unbekannter Prozentsatz
|
||||||
|
bei jeder Bewertung generisch empfohlen. Jedes False Positive untergräbt das
|
||||||
|
Vertrauen des Nutzers und macht das Tool fuer Abmahnungen unbrauchbar.
|
||||||
|
|
||||||
|
## Loesung: 3-Stufen Relevance Filter
|
||||||
|
|
||||||
|
### Stufe 1: Regelbasierter Vorfilter (deterministisch, schnell)
|
||||||
|
|
||||||
|
Jeder Control bekommt ein `relevance_conditions` Feld (JSON):
|
||||||
|
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"control_id": "C_TRANSPARENCY",
|
||||||
|
"relevance_conditions": {
|
||||||
|
"text_must_contain_any": ["KI", "kuenstliche Intelligenz", "artificial intelligence",
|
||||||
|
"machine learning", "maschinelles Lernen", "neural", "deep learning",
|
||||||
|
"AI system", "AI-System", "algorith"],
|
||||||
|
"text_must_not_contain": [],
|
||||||
|
"requires_intake_flag": "automation",
|
||||||
|
"min_confidence": 0.5
|
||||||
|
}
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
**Implementierung:**
|
||||||
|
- Neues Feld `relevance_conditions` in `compliance.canonical_controls` (JSONB)
|
||||||
|
- Funktion `check_relevance(control, source_text) -> (relevant: bool, confidence: float)`
|
||||||
|
- Laeuft NACH dem UCCA-Assessment, BEVOR das Ergebnis zurueckgegeben wird
|
||||||
|
- Filtert Controls raus deren Keywords im Quelltext nicht vorkommen
|
||||||
|
|
||||||
|
**Aufwand:** ~200 LOC Python, kein LLM-Call noetig
|
||||||
|
**Datei:** `ai-compliance-sdk/internal/ucca/relevance_filter.go` oder `backend-compliance/compliance/services/relevance_filter.py`
|
||||||
|
|
||||||
|
### Stufe 2: LLM-Validierung (fuer High-Value Controls)
|
||||||
|
|
||||||
|
Fuer Controls mit `severity >= HIGH` oder wenn der regelbasierte Filter unsicher ist
|
||||||
|
(confidence < 0.7), wird Qwen gefragt:
|
||||||
|
|
||||||
|
```
|
||||||
|
Gegeben dieser Dokumenttext:
|
||||||
|
"[...Auszug...]"
|
||||||
|
|
||||||
|
Ist der folgende Control relevant fuer dieses Dokument?
|
||||||
|
Control: "[C_TRANSPARENCY] Nutzer informieren dass sie mit KI interagieren"
|
||||||
|
Rechtsgrundlage: Art. 52 AI Act
|
||||||
|
|
||||||
|
Antworte NUR mit: JA (mit Begruendung) oder NEIN (mit Begruendung)
|
||||||
|
```
|
||||||
|
|
||||||
|
**Implementierung:**
|
||||||
|
- Neuer Endpoint: `POST /sdk/v1/ucca/validate-controls`
|
||||||
|
- Nimmt: `assessment_id`, `source_text`, `controls[]`
|
||||||
|
- Gibt zurueck: `controls[]` mit `relevant: bool`, `reason: string`
|
||||||
|
- Cached: Gleicher Text + Control = gleiche Antwort (24h TTL)
|
||||||
|
|
||||||
|
**Aufwand:** ~150 LOC, 1 LLM-Call pro Control (parallelisierbar)
|
||||||
|
|
||||||
|
### Stufe 3: Follow-Up-Fragen an den Nutzer (Hybrid)
|
||||||
|
|
||||||
|
Wenn weder Regel noch LLM sicher entscheiden koennen:
|
||||||
|
|
||||||
|
```
|
||||||
|
Follow-Up: "Setzt der Anbieter KI oder maschinelles Lernen ein?"
|
||||||
|
→ Ja: Control bleibt
|
||||||
|
→ Nein: Control wird entfernt
|
||||||
|
→ Unsicher: Control bleibt mit Hinweis "Nicht verifizierbar"
|
||||||
|
```
|
||||||
|
|
||||||
|
**Bereits implementiert:** Das `follow_up_questions` System im Agent-Endpoint.
|
||||||
|
|
||||||
|
## Datenmodell-Aenderung
|
||||||
|
|
||||||
|
```sql
|
||||||
|
-- Neues Feld in canonical_controls
|
||||||
|
ALTER TABLE compliance.canonical_controls
|
||||||
|
ADD COLUMN IF NOT EXISTS relevance_conditions JSONB DEFAULT '{}';
|
||||||
|
|
||||||
|
-- Index fuer schnelle Abfrage
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_controls_relevance
|
||||||
|
ON compliance.canonical_controls USING gin (relevance_conditions);
|
||||||
|
```
|
||||||
|
|
||||||
|
## Architektur
|
||||||
|
|
||||||
|
```
|
||||||
|
UCCA Assessment
|
||||||
|
│
|
||||||
|
▼
|
||||||
|
┌────────────────────┐
|
||||||
|
│ Stufe 1: Regelfilter│ ← text_must_contain_any, intake_flags
|
||||||
|
│ (deterministisch) │
|
||||||
|
└────────┬───────────┘
|
||||||
|
│ unsicher oder high-severity
|
||||||
|
▼
|
||||||
|
┌────────────────────┐
|
||||||
|
│ Stufe 2: LLM-Check │ ← Qwen validiert Relevanz
|
||||||
|
│ (1 Call/Control) │
|
||||||
|
└────────┬───────────┘
|
||||||
|
│ immer noch unsicher
|
||||||
|
▼
|
||||||
|
┌────────────────────┐
|
||||||
|
│ Stufe 3: Follow-Up │ ← Nutzer beantwortet Frage
|
||||||
|
│ (Frontend) │
|
||||||
|
└────────────────────┘
|
||||||
|
```
|
||||||
|
|
||||||
|
## Implementierungsreihenfolge
|
||||||
|
|
||||||
|
### Phase 1: Regelfilter (1 Tag)
|
||||||
|
|
||||||
|
1. Migration: `relevance_conditions` Feld zu `canonical_controls`
|
||||||
|
2. Seed-Script: Top-20 generische Controls mit Bedingungen versehen
|
||||||
|
(C_TRANSPARENCY, C_EXPLICIT_CONSENT, C_DSFA_REQUIRED, etc.)
|
||||||
|
3. Filter-Funktion in `agent_analyze_routes.py`
|
||||||
|
4. Test: Opodo erneut analysieren — C_TRANSPARENCY sollte rausfallen
|
||||||
|
|
||||||
|
### Phase 2: LLM-Validierung (1 Tag)
|
||||||
|
|
||||||
|
1. Neuer SDK-Endpoint `/sdk/v1/ucca/validate-controls`
|
||||||
|
2. Integration in den Agent-Workflow
|
||||||
|
3. Caching-Layer (Redis/Valkey)
|
||||||
|
|
||||||
|
### Phase 3: Batch-Seeding (2-3 Tage)
|
||||||
|
|
||||||
|
1. Pipeline-Job: Fuer alle 166k Controls `relevance_conditions` generieren
|
||||||
|
(LLM-gestuetzt: "Welche Keywords im Quelltext wuerden diesen Control relevant machen?")
|
||||||
|
2. Qualitaetspruefung: Stichprobe von 100 Controls manuell validieren
|
||||||
|
|
||||||
|
## Betroffene Dateien
|
||||||
|
|
||||||
|
| Datei | Aenderung |
|
||||||
|
|-------|-----------|
|
||||||
|
| `backend-compliance/compliance/api/agent_analyze_routes.py` | Filter-Integration |
|
||||||
|
| `backend-compliance/compliance/services/relevance_filter.py` | NEU: Regelfilter |
|
||||||
|
| `ai-compliance-sdk/internal/ucca/relevance_filter.go` | NEU: SDK-seitig (alternativ) |
|
||||||
|
| `ai-compliance-sdk/internal/api/handlers/ucca_handlers.go` | Neuer Endpoint |
|
||||||
|
| Migration | `relevance_conditions` Spalte |
|
||||||
|
| `control-pipeline/` | Batch-Seeding Job (Phase 3) |
|
||||||
|
|
||||||
|
## Phase 4: Website-Scan (Multi-Page Crawl)
|
||||||
|
|
||||||
|
### Problem
|
||||||
|
|
||||||
|
Aktuell analysieren wir nur EINE URL (z.B. `/datenschutz/`). Aber relevante Hinweise
|
||||||
|
auf KI, Chatbots, automatisierte Entscheidungen oder Tracking koennen auf ANDEREN
|
||||||
|
Seiten der Website stehen:
|
||||||
|
|
||||||
|
- Chatbot-Widget auf der Startseite (nicht auf der Datenschutzseite)
|
||||||
|
- "Powered by ChatGPT" im Footer
|
||||||
|
- KI-gestuetzte Produktempfehlungen auf der Shopseite
|
||||||
|
- Cookie-Scripts die Tracking-Dienste laden (Google Analytics, Meta Pixel, etc.)
|
||||||
|
- Chatbot-Anbieter wie Intercom, Drift, Zendesk, Tidio im HTML
|
||||||
|
|
||||||
|
### Loesung: Lightweight Website-Scan
|
||||||
|
|
||||||
|
Kein vollstaendiger Crawl (zu langsam, zu invasiv), sondern ein gezielter Scan
|
||||||
|
von 5-10 strategischen Seiten:
|
||||||
|
|
||||||
|
```
|
||||||
|
Eingabe: https://www.opodo.de/datenschutz/
|
||||||
|
|
||||||
|
Automatisch gescannte Seiten:
|
||||||
|
1. Startseite: https://www.opodo.de/
|
||||||
|
2. Datenschutz (bereits): https://www.opodo.de/datenschutz/
|
||||||
|
3. Impressum: https://www.opodo.de/impressum/ (aus Footer-Links)
|
||||||
|
4. AGB: https://www.opodo.de/agb/ (aus Footer-Links)
|
||||||
|
5. Cookie-Policy: https://www.opodo.de/cookies/ (falls vorhanden)
|
||||||
|
```
|
||||||
|
|
||||||
|
### Scan-Logik
|
||||||
|
|
||||||
|
**Schritt 1: Startseite holen + Footer-Links extrahieren**
|
||||||
|
```python
|
||||||
|
# Aus der Startseite die typischen Footer-Links extrahieren:
|
||||||
|
footer_patterns = [
|
||||||
|
r'href="([^"]*(?:impressum|imprint|legal)[^"]*)"',
|
||||||
|
r'href="([^"]*(?:datenschutz|privacy|dsgvo)[^"]*)"',
|
||||||
|
r'href="([^"]*(?:agb|terms|nutzungsbedingungen)[^"]*)"',
|
||||||
|
r'href="([^"]*(?:cookie|cookies)[^"]*)"',
|
||||||
|
r'href="([^"]*(?:kontakt|contact)[^"]*)"',
|
||||||
|
]
|
||||||
|
```
|
||||||
|
|
||||||
|
**Schritt 2: Jede Seite auf KI/Chatbot/Tracking-Indikatoren scannen**
|
||||||
|
```python
|
||||||
|
AI_INDICATORS = {
|
||||||
|
# Chatbot-Widgets (JavaScript-Einbindungen)
|
||||||
|
"chatbot_widgets": [
|
||||||
|
r"intercom", # Intercom (KI-gestuetzt)
|
||||||
|
r"drift\.com", # Drift Chatbot
|
||||||
|
r"tidio", # Tidio Chat
|
||||||
|
r"zendesk", # Zendesk Chat
|
||||||
|
r"crisp\.chat", # Crisp Chat
|
||||||
|
r"livechat", # LiveChat
|
||||||
|
r"hubspot.*chat", # HubSpot Chat
|
||||||
|
r"tawk\.to", # Tawk.to
|
||||||
|
r"freshchat", # Freshworks
|
||||||
|
r"dialogflow", # Google Dialogflow
|
||||||
|
r"watson.*assistant", # IBM Watson
|
||||||
|
r"chatgpt|openai", # OpenAI/ChatGPT
|
||||||
|
r"anthropic|claude", # Anthropic/Claude
|
||||||
|
],
|
||||||
|
# KI-Hinweise im Text
|
||||||
|
"ai_text_mentions": [
|
||||||
|
r"k(?:ue|ü)nstliche.?intelligenz",
|
||||||
|
r"artificial.?intelligence",
|
||||||
|
r"machine.?learning",
|
||||||
|
r"maschinelles.?lernen",
|
||||||
|
r"KI.?gest(?:ue|ü)tzt",
|
||||||
|
r"AI.?powered",
|
||||||
|
r"algorithm",
|
||||||
|
r"automatisierte.?entscheidung",
|
||||||
|
r"automated.?decision",
|
||||||
|
r"profiling",
|
||||||
|
r"personalisier", # Personalisierung
|
||||||
|
],
|
||||||
|
# Tracking & Analytics (EU + non-EU)
|
||||||
|
"tracking_analytics": [
|
||||||
|
# Google (USA)
|
||||||
|
r"google.?analytics|gtag|UA-\d+|G-\w+",
|
||||||
|
r"googletagmanager|gtm\.js",
|
||||||
|
r"google.?ads|googleads|adwords",
|
||||||
|
r"doubleclick\.net",
|
||||||
|
# Meta (USA)
|
||||||
|
r"facebook.?pixel|fbq\(|connect\.facebook",
|
||||||
|
r"meta.?pixel",
|
||||||
|
# Microsoft (USA)
|
||||||
|
r"clarity\.ms", # Microsoft Clarity
|
||||||
|
r"bing\.com/bat", # Bing Ads
|
||||||
|
r"linkedin\.com/insight", # LinkedIn Insight
|
||||||
|
# Analytics-Anbieter
|
||||||
|
r"hotjar", # Hotjar (Malta/EU — OK)
|
||||||
|
r"segment\.com", # Segment (USA)
|
||||||
|
r"mixpanel", # Mixpanel (USA)
|
||||||
|
r"amplitude", # Amplitude (USA)
|
||||||
|
r"heap\.io", # Heap (USA)
|
||||||
|
r"posthog", # PostHog (USA, self-host moeglich)
|
||||||
|
r"matomo|piwik", # Matomo (EU — self-host = OK, Cloud = pruefen)
|
||||||
|
r"plausible", # Plausible (EU — OK)
|
||||||
|
r"fathom", # Fathom (Kanada — Angemessenheitsbeschluss)
|
||||||
|
r"pirsch", # Pirsch (DE — OK)
|
||||||
|
r"umami", # Umami (self-host)
|
||||||
|
],
|
||||||
|
# CDN und Drittanbieter-Dienste (Drittlandtransfer-Risiko)
|
||||||
|
"third_party_services": [
|
||||||
|
# CDN (pruefen ob Drittland)
|
||||||
|
r"cdn\.cloudflare\.com", # Cloudflare (USA)
|
||||||
|
r"fastly\.net", # Fastly (USA)
|
||||||
|
r"akamai", # Akamai (USA)
|
||||||
|
r"cdn\.jsdelivr\.net", # jsDelivr (international)
|
||||||
|
r"unpkg\.com", # unpkg (USA)
|
||||||
|
r"cdnjs\.cloudflare\.com", # cdnjs (USA)
|
||||||
|
r"stackpath", # StackPath (USA)
|
||||||
|
r"bunny\.net|bunnycdn", # BunnyCDN (Slowenien/EU — OK)
|
||||||
|
r"keycdn", # KeyCDN (Schweiz — Angemessenheit)
|
||||||
|
# Fonts (IP-Uebermittlung!)
|
||||||
|
r"fonts\.googleapis\.com", # Google Fonts (USA — DSGVO-Verstoss!)
|
||||||
|
r"fonts\.gstatic\.com", # Google Fonts CDN
|
||||||
|
r"use\.typekit\.net", # Adobe Fonts (USA)
|
||||||
|
# Captcha
|
||||||
|
r"recaptcha|grecaptcha", # Google reCAPTCHA (USA)
|
||||||
|
r"hcaptcha", # hCaptcha (USA)
|
||||||
|
r"turnstile.*cloudflare", # Cloudflare Turnstile (USA)
|
||||||
|
# Maps
|
||||||
|
r"maps\.googleapis\.com", # Google Maps (USA)
|
||||||
|
r"maps\.google\.com",
|
||||||
|
r"openstreetmap", # OpenStreetMap (EU — OK)
|
||||||
|
r"mapbox", # Mapbox (USA)
|
||||||
|
# Video
|
||||||
|
r"youtube\.com|youtube-nocookie", # YouTube (USA)
|
||||||
|
r"vimeo\.com", # Vimeo (USA)
|
||||||
|
r"wistia", # Wistia (USA)
|
||||||
|
# Social Media Embeds
|
||||||
|
r"platform\.twitter\.com|x\.com/embed", # X/Twitter (USA)
|
||||||
|
r"instagram\.com/embed", # Instagram (USA)
|
||||||
|
r"linkedin\.com/embed", # LinkedIn (USA)
|
||||||
|
# Content Moderation
|
||||||
|
r"besedo", # Besedo (Schweden/EU — OK, aber pruefen)
|
||||||
|
# Payment (PCI-DSS relevant)
|
||||||
|
r"stripe\.com|js\.stripe", # Stripe (USA)
|
||||||
|
r"paypal\.com", # PayPal (USA)
|
||||||
|
r"adyen", # Adyen (NL/EU — OK)
|
||||||
|
r"mollie", # Mollie (NL/EU — OK)
|
||||||
|
# Andere
|
||||||
|
r"sentry\.io|sentry-cdn", # Sentry Error Tracking (USA)
|
||||||
|
r"intercom\.io", # Intercom (USA) — auch in chatbot_widgets
|
||||||
|
r"zendesk\.com", # Zendesk (USA)
|
||||||
|
r"freshdesk|freshworks", # Freshworks (USA/Indien)
|
||||||
|
],
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
### Drittland-Erkennung
|
||||||
|
|
||||||
|
Fuer jeden erkannten externen Dienst wird geprueft ob er aus einem Drittland stammt
|
||||||
|
(kein EU/EWR-Staat, kein Angemessenheitsbeschluss). Dafuer wird eine Registry gepflegt:
|
||||||
|
|
||||||
|
```python
|
||||||
|
# Statische Registry — ca. 80 Eintraege
|
||||||
|
THIRD_PARTY_REGISTRY = {
|
||||||
|
"google_analytics": {"provider": "Google LLC", "country": "US", "eu_adequate": False, "requires_consent": True, "legal_ref": "Art. 44-49 DSGVO, Schrems II"},
|
||||||
|
"google_fonts": {"provider": "Google LLC", "country": "US", "eu_adequate": False, "requires_consent": True, "legal_ref": "LG Muenchen I, Az. 3 O 17493/20 (Google Fonts Urteil)"},
|
||||||
|
"facebook_pixel": {"provider": "Meta Platforms", "country": "US", "eu_adequate": False, "requires_consent": True, "legal_ref": "Art. 44-49 DSGVO"},
|
||||||
|
"cloudflare_cdn": {"provider": "Cloudflare Inc", "country": "US", "eu_adequate": False, "requires_consent": False, "legal_ref": "Art. 44-49 DSGVO, berechtigtes Interesse moeglich"},
|
||||||
|
"matomo_cloud": {"provider": "Matomo (InnoCraft)", "country": "NZ", "eu_adequate": True, "requires_consent": True, "legal_ref": "Neuseeland hat Angemessenheitsbeschluss"},
|
||||||
|
"matomo_selfhost": {"provider": "Self-hosted", "country": "depends", "eu_adequate": True, "requires_consent": False, "legal_ref": "Kein Drittlandtransfer bei Self-Hosting"},
|
||||||
|
"plausible": {"provider": "Plausible Insights", "country": "EE", "eu_adequate": True, "requires_consent": False, "legal_ref": "EU-Anbieter, cookieless"},
|
||||||
|
"bunnycdn": {"provider": "BunnyCDN d.o.o.", "country": "SI", "eu_adequate": True, "requires_consent": False, "legal_ref": "EU-Anbieter"},
|
||||||
|
"stripe": {"provider": "Stripe Inc", "country": "US", "eu_adequate": False, "requires_consent": False, "legal_ref": "Art. 6(1)(b) Vertragserfuellung, SCCs"},
|
||||||
|
"besedo": {"provider": "Besedo AB", "country": "SE", "eu_adequate": True, "requires_consent": False, "legal_ref": "EU-Anbieter"},
|
||||||
|
# ... ~80 weitere Eintraege
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
### Generierte Findings
|
||||||
|
|
||||||
|
**Beispiel: Opodo mit erweitertem Scan:**
|
||||||
|
```
|
||||||
|
Externe Dienste erkannt:
|
||||||
|
- Google Analytics (G-03F834EHLM) — USA, kein Angemessenheitsbeschluss
|
||||||
|
→ FINDING: Drittlandtransfer USA ohne Einwilligung (Art. 44 DSGVO)
|
||||||
|
- Google Fonts (fonts.googleapis.com) — USA
|
||||||
|
→ FINDING: Google Fonts Einbindung (LG Muenchen I, Az. 3 O 17493/20)
|
||||||
|
- Didomi CMP — Frankreich (EU — OK)
|
||||||
|
- Bootstrap CDN (jsdelivr.net) — International, pruefen
|
||||||
|
→ FOLLOW-UP: "Wird das CDN aus der EU oder einem Drittland geladen?"
|
||||||
|
```
|
||||||
|
|
||||||
|
### Phase 4b: Soll-Ist-Abgleich (Dienstleister DSE vs. Website)
|
||||||
|
|
||||||
|
Der wertvollste Output des Agents: automatischer Abgleich zwischen dem was in der
|
||||||
|
Datenschutzerklaerung STEHT und dem was tatsaechlich auf der Website EINGEBUNDEN ist.
|
||||||
|
|
||||||
|
**Schritt 1: IST — Website scannen (bereits in Phase 4)**
|
||||||
|
Alle eingebundenen externen Dienste per HTML/Script-Analyse erkennen.
|
||||||
|
|
||||||
|
**Schritt 2: SOLL — Datenschutzerklaerung parsen**
|
||||||
|
Aus dem DSE-Text extrahieren welche Dienstleister erwaehnt werden:
|
||||||
|
```python
|
||||||
|
# Qwen/LLM extrahiert strukturiert:
|
||||||
|
PROMPT = """
|
||||||
|
Extrahiere aus dieser Datenschutzerklaerung ALLE erwaehnten Dienstleister/Tools.
|
||||||
|
Fuer jeden Dienstleister nenne:
|
||||||
|
- Name (z.B. "Google Analytics")
|
||||||
|
- Zweck (z.B. "Webanalyse")
|
||||||
|
- Land/Sitz (z.B. "USA")
|
||||||
|
- Genannte Rechtsgrundlage (z.B. "Einwilligung" oder "berechtigtes Interesse")
|
||||||
|
- Genannte Schutzmassnahme (z.B. "Standardvertragsklauseln")
|
||||||
|
|
||||||
|
Antworte als JSON-Array.
|
||||||
|
"""
|
||||||
|
```
|
||||||
|
|
||||||
|
**Schritt 3: Abgleich → 3 Kategorien**
|
||||||
|
|
||||||
|
| Kategorie | Bedeutung | Finding-Typ |
|
||||||
|
|-----------|-----------|-------------|
|
||||||
|
| Eingebunden + NICHT in DSE | Informationspflicht verletzt | HIGH — Art. 13 DSGVO Verstoss |
|
||||||
|
| In DSE + NICHT eingebunden | Veraltete/irrefuehrende DSE | LOW — Aufraumbedarf |
|
||||||
|
| Eingebunden + in DSE | Korrekt dokumentiert | OK — nur Drittland pruefen |
|
||||||
|
|
||||||
|
**Beispiel-Output fuer Opodo:**
|
||||||
|
```
|
||||||
|
Dienstleister-Abgleich (opodo.de)
|
||||||
|
══════════════════════════════════
|
||||||
|
|
||||||
|
Eingebunden auf Website In DSE erwaehnt? Status
|
||||||
|
─────────────────────────────── ─────────────────── ───────
|
||||||
|
Google Analytics (G-03F834EHLM) Ja (Abschnitt 3.6) ✓ OK — aber USA, SCCs pruefen
|
||||||
|
Didomi CMP Ja (Cookie Notice) ✓ OK — Frankreich/EU
|
||||||
|
Bootstrap CDN (jsdelivr) Nein ✗ FINDING: Nicht in DSE
|
||||||
|
Google Tag Manager Ja (Abschnitt 3.6) ✓ OK
|
||||||
|
|
||||||
|
In DSE erwaehnt Auf Website gefunden? Status
|
||||||
|
─────────────────────────────── ───────────────────── ───────
|
||||||
|
Amadeus IT (Buchungssystem) Nicht pruefbar ? Backend-Dienst
|
||||||
|
Adyen (Zahlungsabwicklung) Nicht pruefbar ? Backend-Dienst
|
||||||
|
Salesforce (CRM) Nicht pruefbar ? Backend-Dienst
|
||||||
|
|
||||||
|
Zusammenfassung:
|
||||||
|
- 1 Dienstleister eingebunden aber NICHT in DSE dokumentiert (jsdelivr CDN)
|
||||||
|
- 3 Backend-Dienste in DSE erwaehnt, nicht im Frontend pruefbar
|
||||||
|
- Empfehlung: jsdelivr CDN in Datenschutzerklaerung aufnehmen oder lokal hosten
|
||||||
|
```
|
||||||
|
|
||||||
|
Dieser Output allein ist fuer einen Datenschutzbeauftragten Gold wert — er spart
|
||||||
|
Stunden manueller Arbeit und deckt Luecken auf die bei Website-Updates entstehen.
|
||||||
|
|
||||||
|
### Controls die durch Drittland-Dienste ausgeloest werden
|
||||||
|
|
||||||
|
| Erkannter Dienst | Control |
|
||||||
|
|-----------------|---------|
|
||||||
|
| Jeder US-Dienst ohne SCCs | C_THIRD_COUNTRY_TRANSFER: Drittlandtransfer absichern (Art. 44-49 DSGVO) |
|
||||||
|
| Google Fonts remote | C_GOOGLE_FONTS: Fonts lokal einbinden (LG Muenchen I Urteil) |
|
||||||
|
| Tracking ohne Consent-Banner | C_EXPLICIT_CONSENT: Einwilligung vor Tracking einholen |
|
||||||
|
| reCAPTCHA | C_CAPTCHA_PRIVACY: Datenschutzkonformen Captcha-Dienst nutzen |
|
||||||
|
| YouTube Embed | C_VIDEO_EMBED: 2-Klick-Loesung oder youtube-nocookie verwenden |
|
||||||
|
```
|
||||||
|
|
||||||
|
**Schritt 3: Ergebnis aggregieren**
|
||||||
|
```python
|
||||||
|
scan_result = {
|
||||||
|
"pages_scanned": 5,
|
||||||
|
"chatbot_detected": True, # z.B. Intercom auf Startseite
|
||||||
|
"chatbot_provider": "intercom", # Identifizierter Anbieter
|
||||||
|
"ai_mentions_found": False, # Kein expliziter KI-Text
|
||||||
|
"tracking_services": ["google_analytics", "facebook_pixel"],
|
||||||
|
"tracking_count": 2,
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
**Schritt 4: Scan-Ergebnis in Relevanzpruefung einbeziehen**
|
||||||
|
- Chatbot erkannt → C_TRANSPARENCY wird relevant (auch ohne KI-Text)
|
||||||
|
- Tracking erkannt → C_EXPLICIT_CONSENT wird relevant
|
||||||
|
- Kein KI-Nachweis auf gesamter Website → C_TRANSPARENCY faellt weg
|
||||||
|
|
||||||
|
### Implementierung
|
||||||
|
|
||||||
|
**Neue Datei:** `backend-compliance/compliance/services/website_scanner.py` (~200 LOC)
|
||||||
|
|
||||||
|
```python
|
||||||
|
class WebsiteScanner:
|
||||||
|
async def scan(self, base_url: str) -> ScanResult:
|
||||||
|
"""Scan 5-10 pages for AI, chatbot, and tracking indicators."""
|
||||||
|
pages = await self._discover_pages(base_url)
|
||||||
|
indicators = {}
|
||||||
|
for page_url in pages[:10]:
|
||||||
|
html = await self._fetch(page_url)
|
||||||
|
indicators[page_url] = self._detect_indicators(html)
|
||||||
|
return self._aggregate(indicators)
|
||||||
|
```
|
||||||
|
|
||||||
|
**Integration in Agent-Workflow:**
|
||||||
|
- Zwischen Schritt 1 (Fetch) und Schritt 3 (UCCA Assess)
|
||||||
|
- Scan-Ergebnis fliesst in die Intake-Flags UND in den Relevanzfilter
|
||||||
|
- Scan-Ergebnis wird im Response zurueckgegeben (Transparenz)
|
||||||
|
|
||||||
|
**Frontend-Erweiterung:**
|
||||||
|
- "Erweiterte Analyse" Toggle: Nur Einzelseite vs. Website-Scan
|
||||||
|
- Scan-Ergebnis als aufklappbare Sektion: "5 Seiten gescannt, Chatbot auf Startseite erkannt"
|
||||||
|
|
||||||
|
### Aufwand
|
||||||
|
|
||||||
|
| Komponente | LOC | Zeit |
|
||||||
|
|-----------|-----|------|
|
||||||
|
| `website_scanner.py` | ~200 | 0.5 Tage |
|
||||||
|
| Integration in `agent_analyze_routes.py` | ~50 | 2h |
|
||||||
|
| Frontend: Scan-Ergebnis anzeigen | ~80 | 2h |
|
||||||
|
| Tests | ~100 | 2h |
|
||||||
|
|
||||||
|
### Beispiel: Opodo mit Website-Scan
|
||||||
|
|
||||||
|
```
|
||||||
|
Seiten gescannt: 5
|
||||||
|
- https://www.opodo.de/ → Didomi Cookie-Consent, Google Analytics
|
||||||
|
- https://www.opodo.de/datenschutz/ → Datenschutzerklaerung
|
||||||
|
- https://www.opodo.de/impressum/ → 404 (FINDING!)
|
||||||
|
- https://www.opodo.de/agb/ → AGB vorhanden
|
||||||
|
- https://www.opodo.de/cookies/ → Cookie-Policy
|
||||||
|
|
||||||
|
Chatbot erkannt: Nein
|
||||||
|
KI-Hinweise: Nein
|
||||||
|
Tracking: Google Analytics (G-03F834EHLM), Didomi CMP
|
||||||
|
|
||||||
|
→ C_TRANSPARENCY: NICHT relevant (kein KI-Nachweis auf gesamter Website)
|
||||||
|
→ C_EXPLICIT_CONSENT: Relevant (Google Analytics + Didomi = Tracking aktiv)
|
||||||
|
→ Impressum-Finding: 404 auf /impressum/ (§5 TMG Verstoss)
|
||||||
|
```
|
||||||
|
|
||||||
|
## Phase 5: Branchenspezifische Pruefmodule
|
||||||
|
|
||||||
|
### 5a: Zahlungsanbieter (PCI-DSS + DSGVO)
|
||||||
|
|
||||||
|
Pruefung der korrekten Einbindung von Payment-Anbietern:
|
||||||
|
|
||||||
|
```
|
||||||
|
Erkennungsmuster → Pruefpunkte
|
||||||
|
|
||||||
|
Stripe (js.stripe.com)
|
||||||
|
✓ Wird Stripe.js korrekt ueber stripe.com geladen (nicht self-hosted)?
|
||||||
|
✓ Ist "Payment" oder "Zahlung" in der DSE mit Stripe erwaehnt?
|
||||||
|
✓ Rechtsgrundlage: Art. 6(1)(b) Vertragserfuellung angegeben?
|
||||||
|
✓ SCCs oder DPF fuer USA-Transfer dokumentiert?
|
||||||
|
✗ Werden Kreditkartendaten an eigenen Server gesendet (PCI-Verstoss)?
|
||||||
|
|
||||||
|
PayPal (paypal.com/sdk)
|
||||||
|
✓ PayPal in DSE erwaehnt?
|
||||||
|
✓ Rechtsgrundlage angegeben?
|
||||||
|
✓ Hinweis auf PayPal-eigene DSE verlinkt?
|
||||||
|
|
||||||
|
Adyen (adyen.com)
|
||||||
|
✓ EU-Anbieter (NL) — kein Drittlandtransfer
|
||||||
|
✓ In DSE erwaehnt?
|
||||||
|
|
||||||
|
Mollie (mollie.com)
|
||||||
|
✓ EU-Anbieter (NL) — kein Drittlandtransfer
|
||||||
|
✓ In DSE erwaehnt?
|
||||||
|
|
||||||
|
Klarna (klarna.com)
|
||||||
|
✓ EU-Anbieter (SE) — kein Drittlandtransfer
|
||||||
|
✓ Bonitaetspruefung erwaehnt? (Art. 22 DSGVO — automatisierte Entscheidung!)
|
||||||
|
✓ SCHUFA/Auskunftei-Hinweis vorhanden?
|
||||||
|
```
|
||||||
|
|
||||||
|
**Besonderer Check:** Wenn ein Payment-Anbieter Bonitaetspruefungen durchfuehrt
|
||||||
|
(Klarna, PayPal, Ratenzahlung), MUSS Art. 22 DSGVO in der DSE erwaehnt werden
|
||||||
|
(automatisierte Einzelentscheidung). Das ist ein haeufig uebersehener Verstoss.
|
||||||
|
|
||||||
|
### 5b: Marketing & Tracking (ePrivacy + DSGVO)
|
||||||
|
|
||||||
|
Systematische Pruefung aller Marketing-/Tracking-Einbindungen:
|
||||||
|
|
||||||
|
```
|
||||||
|
Kategorie: Webanalyse
|
||||||
|
──────────────────────
|
||||||
|
Google Analytics → Einwilligung PFLICHT, DSE-Eintrag, IP-Anonymisierung pruefen
|
||||||
|
Matomo (Cloud) → Einwilligung empfohlen, DSE-Eintrag
|
||||||
|
Matomo (Self-Host) → Keine Einwilligung noetig wenn cookieless, DSE-Eintrag
|
||||||
|
Plausible/Pirsch → Keine Einwilligung noetig (cookieless, EU), DSE-Eintrag
|
||||||
|
|
||||||
|
Kategorie: Werbenetzwerke
|
||||||
|
─────────────────────────
|
||||||
|
Google Ads/AdSense → Einwilligung PFLICHT, DSE-Eintrag, Drittlandtransfer
|
||||||
|
Meta/Facebook Pixel → Einwilligung PFLICHT, DSE-Eintrag, Drittlandtransfer
|
||||||
|
TikTok Pixel → Einwilligung PFLICHT, DSE-Eintrag, Drittlandtransfer (China!)
|
||||||
|
Pinterest Tag → Einwilligung PFLICHT, DSE-Eintrag, Drittlandtransfer
|
||||||
|
LinkedIn Insight → Einwilligung PFLICHT, DSE-Eintrag, Drittlandtransfer
|
||||||
|
Twitter/X Pixel → Einwilligung PFLICHT, DSE-Eintrag, Drittlandtransfer
|
||||||
|
Criteo → Einwilligung PFLICHT, DSE-Eintrag (FR/EU aber Tracking)
|
||||||
|
|
||||||
|
Kategorie: Remarketing / Retargeting
|
||||||
|
─────────────────────────────────────
|
||||||
|
Google Remarketing → Einwilligung PFLICHT, eigene DSE-Sektion empfohlen
|
||||||
|
Facebook Custom Audiences → Einwilligung PFLICHT, Hochladen von Kundenlisten pruefen
|
||||||
|
|
||||||
|
Kategorie: Heatmaps & Session Recording
|
||||||
|
────────────────────────────────────────
|
||||||
|
Hotjar → Einwilligung PFLICHT (zeichnet Nutzerverhalten auf!)
|
||||||
|
Microsoft Clarity → Einwilligung PFLICHT (Session Replay!)
|
||||||
|
FullStory → Einwilligung PFLICHT, DSE-Eintrag, Drittlandtransfer
|
||||||
|
Mouseflow → Einwilligung PFLICHT, DSE-Eintrag
|
||||||
|
|
||||||
|
Kategorie: A/B Testing
|
||||||
|
───────────────────────
|
||||||
|
Google Optimize → Eingestellt, aber Legacy-Code pruefen
|
||||||
|
Optimizely → Einwilligung je nach Implementierung
|
||||||
|
VWO → Einwilligung wenn Cookies gesetzt werden
|
||||||
|
|
||||||
|
Kategorie: Newsletter / E-Mail Marketing
|
||||||
|
─────────────────────────────────────────
|
||||||
|
Mailchimp → Drittlandtransfer (USA), SCCs pruefen
|
||||||
|
Brevo (ehem. Sendinblue) → EU (FR) — OK
|
||||||
|
CleverReach → EU (DE) — OK
|
||||||
|
ActiveCampaign → USA, Drittlandtransfer
|
||||||
|
HubSpot → USA, Drittlandtransfer
|
||||||
|
Rapidmail → EU (DE) — OK
|
||||||
|
```
|
||||||
|
|
||||||
|
### Prueflogik pro Marketing-Dienst
|
||||||
|
|
||||||
|
```python
|
||||||
|
def check_marketing_service(service: DetectedService, dse_text: str) -> list[Finding]:
|
||||||
|
findings = []
|
||||||
|
registry = THIRD_PARTY_REGISTRY[service.id]
|
||||||
|
|
||||||
|
# 1. In DSE erwaehnt?
|
||||||
|
if not service_mentioned_in_dse(service, dse_text):
|
||||||
|
findings.append(Finding(
|
||||||
|
severity="HIGH",
|
||||||
|
code=f"MARKETING-{service.id}-NOT-IN-DSE",
|
||||||
|
text=f"{service.name} ist auf der Website eingebunden aber nicht in der "
|
||||||
|
f"Datenschutzerklaerung erwaehnt (Art. 13 DSGVO Verstoss)."
|
||||||
|
))
|
||||||
|
|
||||||
|
# 2. Einwilligung vorhanden?
|
||||||
|
if registry["requires_consent"] and not consent_banner_detected:
|
||||||
|
findings.append(Finding(
|
||||||
|
severity="HIGH",
|
||||||
|
code=f"MARKETING-{service.id}-NO-CONSENT",
|
||||||
|
text=f"{service.name} erfordert eine Einwilligung vor Aktivierung "
|
||||||
|
f"(§25 TDDDG). Kein funktionierender Consent-Banner erkannt."
|
||||||
|
))
|
||||||
|
|
||||||
|
# 3. Drittlandtransfer?
|
||||||
|
if not registry["eu_adequate"]:
|
||||||
|
if not sccs_mentioned_in_dse(service, dse_text):
|
||||||
|
findings.append(Finding(
|
||||||
|
severity="MEDIUM",
|
||||||
|
code=f"TRANSFER-{service.id}-NO-SCCS",
|
||||||
|
text=f"{service.name} ({registry['country']}) — Drittlandtransfer "
|
||||||
|
f"ohne dokumentierte Schutzmaßnahme ({registry['legal_ref']})."
|
||||||
|
))
|
||||||
|
|
||||||
|
return findings
|
||||||
|
```
|
||||||
|
|
||||||
|
## Phase 6: Automatische Korrekturvorschlaege (Pre-Launch Modus)
|
||||||
|
|
||||||
|
Im **internen Pruefmodus** (pre_launch) erstellt der Agent bei JEDER Abweichung
|
||||||
|
einen konkreten, einbaufertigen Korrekturvorschlag:
|
||||||
|
|
||||||
|
### Korrekturtypen
|
||||||
|
|
||||||
|
**Typ 1: DSE-Textbaustein (Dienstleister fehlt in DSE)**
|
||||||
|
|
||||||
|
Wenn ein Dienstleister auf der Website erkannt wird aber nicht in der DSE steht,
|
||||||
|
generiert der Agent einen fertigen Textbaustein:
|
||||||
|
|
||||||
|
```
|
||||||
|
ABWEICHUNG: Google Analytics (G-03F834EHLM) eingebunden, nicht in DSE dokumentiert.
|
||||||
|
|
||||||
|
KORREKTURVORSCHLAG (einbaufertig):
|
||||||
|
──────────────────────────────────
|
||||||
|
Webanalyse
|
||||||
|
|
||||||
|
Wir nutzen Google Analytics, einen Webanalysedienst der Google Ireland Limited
|
||||||
|
(Gordon House, Barrow Street, Dublin 4, Irland). Google Analytics verwendet Cookies,
|
||||||
|
die eine Analyse der Benutzung der Website ermoeglichen.
|
||||||
|
|
||||||
|
Rechtsgrundlage: Art. 6 Abs. 1 lit. a DSGVO (Einwilligung).
|
||||||
|
|
||||||
|
Die durch Cookies erzeugten Informationen werden in der Regel an einen Server von
|
||||||
|
Google in den USA uebertragen. Wir haben mit Google einen Auftragsverarbeitungsvertrag
|
||||||
|
abgeschlossen. Die Uebermittlung in die USA wird auf Standardvertragsklauseln
|
||||||
|
der EU-Kommission gestuetzt.
|
||||||
|
|
||||||
|
Sie koennen die Speicherung der Cookies durch eine entsprechende Einstellung Ihrer
|
||||||
|
Browser-Software verhindern. Sie koennen darueber hinaus die Erfassung der durch
|
||||||
|
das Cookie erzeugten und auf Ihre Nutzung der Website bezogenen Daten an Google
|
||||||
|
sowie die Verarbeitung dieser Daten durch Google verhindern, indem Sie das unter
|
||||||
|
dem folgenden Link verfuegbare Browser-Add-On herunterladen und installieren:
|
||||||
|
https://tools.google.com/dlpage/gaoptout
|
||||||
|
|
||||||
|
Weitere Informationen: https://policies.google.com/privacy
|
||||||
|
──────────────────────────────────
|
||||||
|
|
||||||
|
EINFUEGEN NACH: Abschnitt "Cookies" oder "Webanalyse"
|
||||||
|
PRIORITAET: Hoch — vor Veroeffentlichung korrigieren
|
||||||
|
```
|
||||||
|
|
||||||
|
**Typ 2: Rechtsgrundlage fehlt/falsch**
|
||||||
|
|
||||||
|
```
|
||||||
|
ABWEICHUNG: Klarna als Zahlungsanbieter genannt, aber kein Hinweis auf
|
||||||
|
automatisierte Bonitaetspruefung (Art. 22 DSGVO).
|
||||||
|
|
||||||
|
KORREKTURVORSCHLAG:
|
||||||
|
──────────────────
|
||||||
|
Ergaenzen Sie im Abschnitt "Zahlungsabwicklung":
|
||||||
|
|
||||||
|
"Bei Auswahl der Zahlungsart 'Rechnung' oder 'Ratenzahlung' ueber Klarna
|
||||||
|
wird eine automatisierte Bonitaetspruefung durchgefuehrt. Klarna uebermittelt
|
||||||
|
hierzu Ihre Angaben an Wirtschaftsauskunfteien (z.B. SCHUFA). Rechtsgrundlage
|
||||||
|
ist Art. 6 Abs. 1 lit. b DSGVO (Vertragserfuellung). Sie haben gemaess
|
||||||
|
Art. 22 Abs. 3 DSGVO das Recht, Ihren Standpunkt darzulegen und die
|
||||||
|
Entscheidung anzufechten."
|
||||||
|
──────────────────
|
||||||
|
```
|
||||||
|
|
||||||
|
**Typ 3: Dienst nicht mehr eingebunden (Aufraeumen)**
|
||||||
|
|
||||||
|
```
|
||||||
|
ABWEICHUNG: "Facebook Pixel" in DSE Abschnitt 4.2 erwaehnt, aber auf der
|
||||||
|
Website nicht mehr eingebunden.
|
||||||
|
|
||||||
|
KORREKTURVORSCHLAG:
|
||||||
|
──────────────────
|
||||||
|
Entfernen Sie den Absatz zu Facebook Pixel in Abschnitt 4.2 oder
|
||||||
|
kennzeichnen Sie ihn als "nicht mehr aktiv". Veraltete Eintraege in der
|
||||||
|
DSE sind zwar kein Rechtsverstoß, koennen aber bei einer Pruefung durch
|
||||||
|
die Aufsichtsbehoerde Fragen aufwerfen.
|
||||||
|
──────────────────
|
||||||
|
PRIORITAET: Niedrig — bei naechster DSE-Aktualisierung bereinigen
|
||||||
|
```
|
||||||
|
|
||||||
|
### Implementierung der Korrekturvorschlaege
|
||||||
|
|
||||||
|
```python
|
||||||
|
async def generate_correction(
|
||||||
|
service: DetectedService,
|
||||||
|
finding_type: str, # "missing_in_dse", "wrong_legal_basis", "outdated"
|
||||||
|
dse_text: str,
|
||||||
|
mode: str,
|
||||||
|
) -> str:
|
||||||
|
"""Generiere einbaufertigen Korrekturvorschlag via Qwen."""
|
||||||
|
if mode != "pre_launch":
|
||||||
|
return "" # Nur im internen Modus
|
||||||
|
|
||||||
|
prompt = f"""
|
||||||
|
/no_think
|
||||||
|
Du bist ein Datenschutzexperte. Erstelle einen einbaufertigen Textbaustein
|
||||||
|
fuer eine deutsche Datenschutzerklaerung.
|
||||||
|
|
||||||
|
Dienstleister: {service.name}
|
||||||
|
Anbieter: {service.provider}
|
||||||
|
Land: {service.country}
|
||||||
|
Zweck: {service.purpose}
|
||||||
|
Finding: {finding_type}
|
||||||
|
|
||||||
|
Der Textbaustein muss enthalten:
|
||||||
|
1. Ueberschrift (z.B. "Webanalyse" oder "Zahlungsabwicklung")
|
||||||
|
2. Name und Sitz des Anbieters
|
||||||
|
3. Zweck der Verarbeitung
|
||||||
|
4. Rechtsgrundlage (korrekt nach DSGVO)
|
||||||
|
5. Drittlandtransfer-Hinweis wenn nicht EU
|
||||||
|
6. Widerspruchsmoeglichkeit
|
||||||
|
|
||||||
|
Antworte NUR mit dem fertigen Textbaustein, ohne Erklaerung.
|
||||||
|
"""
|
||||||
|
# LLM-Call via SDK
|
||||||
|
response = await sdk_llm_chat(prompt)
|
||||||
|
return response
|
||||||
|
```
|
||||||
|
|
||||||
|
### Ergebnis-Format im Agent-Output
|
||||||
|
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"findings": [...],
|
||||||
|
"corrections": [
|
||||||
|
{
|
||||||
|
"finding_code": "MARKETING-google_analytics-NOT-IN-DSE",
|
||||||
|
"type": "missing_in_dse",
|
||||||
|
"service": "Google Analytics",
|
||||||
|
"priority": "high",
|
||||||
|
"insert_after": "Abschnitt Cookies / Webanalyse",
|
||||||
|
"correction_text": "Webanalyse\n\nWir nutzen Google Analytics...",
|
||||||
|
"legal_refs": ["Art. 13 DSGVO", "Art. 44-49 DSGVO", "§25 TDDDG"]
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
### Frontend: Korrekturvorschlaege anzeigen
|
||||||
|
|
||||||
|
Im Pre-Launch-Modus zeigt das Frontend fuer jedes Finding einen aufklappbaren
|
||||||
|
Korrekturvorschlag mit Copy-Button:
|
||||||
|
|
||||||
|
```
|
||||||
|
[!] Google Analytics eingebunden, nicht in DSE
|
||||||
|
Prioritaet: Hoch | Art. 13 DSGVO
|
||||||
|
|
||||||
|
▼ Korrekturvorschlag anzeigen
|
||||||
|
┌─────────────────────────────────────────┐
|
||||||
|
│ Webanalyse │
|
||||||
|
│ │
|
||||||
|
│ Wir nutzen Google Analytics, einen │
|
||||||
|
│ Webanalysedienst der Google Ireland │
|
||||||
|
│ Limited... │
|
||||||
|
│ [📋] │
|
||||||
|
└─────────────────────────────────────────┘
|
||||||
|
Einfuegen nach: Abschnitt "Cookies"
|
||||||
|
```
|
||||||
|
|
||||||
|
## Risiken
|
||||||
|
|
||||||
|
| Risiko | Mitigation |
|
||||||
|
|--------|------------|
|
||||||
|
| Zu aggressive Filterung (False Negatives) | Stufe 1 nur fuer klare Faelle, Stufe 2 als Fallback |
|
||||||
|
| LLM-Kosten bei vielen Controls | Caching + nur high-severity Controls |
|
||||||
|
| Datenbank-Migration auf Production | `ADD COLUMN IF NOT EXISTS` ist non-blocking |
|
||||||
|
| 166k Controls ohne relevance_conditions | Default `{}` = kein Filter = bisheriges Verhalten |
|
||||||
|
| Qwen-generierte Textbausteine rechtlich falsch | Review-Pflicht: "Vom KI-Assistenten erstellt, Pruefung durch DSB empfohlen" |
|
||||||
|
| Website-Scan zu langsam (10 Seiten fetchen) | Parallel fetchen, max 5s Timeout pro Seite, max 10 Seiten |
|
||||||
|
| Payment-Einbindung nicht im HTML sichtbar | Follow-Up: "Welche Zahlungsanbieter nutzen Sie?" |
|
||||||
|
|
||||||
|
## Testfaelle
|
||||||
|
|
||||||
|
1. **Opodo-Test:** C_TRANSPARENCY sollte NICHT mehr empfohlen werden (kein KI-Nachweis)
|
||||||
|
2. **Chatbot-Anbieter:** C_TRANSPARENCY SOLL empfohlen werden (KI explizit erwaehnt)
|
||||||
|
3. **Arztpraxis-Website:** C_DSFA_REQUIRED SOLL empfohlen werden (Gesundheitsdaten)
|
||||||
|
4. **Blog ohne Tracking:** Nur minimale Controls (Impressum, Datenschutzerklaerung)
|
||||||
|
5. **Shop mit Klarna:** Art. 22 DSGVO Finding wenn Bonitaetspruefung nicht in DSE
|
||||||
|
6. **Website mit Google Fonts:** LG Muenchen I Finding + Korrekturvorschlag "lokal einbinden"
|
||||||
|
7. **Pre-Launch DSE-Entwurf:** Korrekturvorschlaege fuer fehlende Dienstleister-Eintraege
|
||||||
|
8. **Opodo Soll-Ist:** jsdelivr CDN eingebunden aber nicht in DSE → Finding + Textbaustein
|
||||||
Reference in New Issue
Block a user