Compare commits
49 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| 872145d883 | |||
| 9bdaa28038 | |||
| e2be51b0aa | |||
| bd65b6f318 | |||
| c771d8ecb9 | |||
| 772ff35e8d | |||
| 8cbb513e2c | |||
| 6c35bcf116 | |||
| 19d4b12e07 | |||
| 2e87b74749 | |||
| 94233b7c66 | |||
| 6263462ba3 | |||
| eb48c5bd1e | |||
| 081e4f057a | |||
| 16fd406c1a | |||
| c5c168592b | |||
| d0274674a0 | |||
| 2eb7349577 | |||
| 4434e3827b | |||
| 07cc00da11 | |||
| 1451873194 | |||
| dfac940272 | |||
| cb5dad1a2f | |||
| e411c4f0d3 | |||
| 7335f64f4f | |||
| 138d9068c4 | |||
| c281464071 | |||
| 6dc427a754 | |||
| 309c10c203 | |||
| 4183379dc5 | |||
| c93c88577c | |||
| 3207acea3e | |||
| 9f06911ff9 | |||
| 338e03d3b0 | |||
| c491af5d02 | |||
| 4171cf0efd | |||
| 30e43afba6 | |||
| df8832c521 | |||
| 7842c95532 | |||
| 08671adfdf | |||
| 50fc0ecc59 | |||
| 94057b1536 | |||
| 9c11b5463c | |||
| 50ed0f45af | |||
| e1df24cad7 | |||
| e5b4672f2a | |||
| 0d5c76ea98 | |||
| 54f5a06c2f | |||
| 86b4a263d2 |
@@ -55,5 +55,9 @@ EXPOSE 3000
|
||||
# Set hostname
|
||||
ENV HOSTNAME="0.0.0.0"
|
||||
|
||||
# P83 — Build-SHA fuer check-rebuild-needed.sh
|
||||
ARG BUILD_SHA="unknown"
|
||||
ENV BUILD_SHA=${BUILD_SHA}
|
||||
|
||||
# Start the application
|
||||
CMD ["node", "server.js"]
|
||||
|
||||
@@ -2,30 +2,41 @@
|
||||
|
||||
import React, { useState } from 'react'
|
||||
import { ChecklistView } from './ChecklistView'
|
||||
import { ResultsTabsView } from './ResultsTabsView'
|
||||
import { PreScanWizard, useScanContext, isContextComplete } from './PreScanWizard'
|
||||
import { safeSetItem } from './storageHelpers'
|
||||
|
||||
interface DocEntry {
|
||||
id: string
|
||||
type: string
|
||||
label: string
|
||||
url: string
|
||||
text: string // P-Paste: User kopiert Doc-Text direkt rein
|
||||
mode: 'url' | 'text' // welcher Input wird aktiv genutzt
|
||||
}
|
||||
|
||||
const DOC_TYPES = [
|
||||
{ id: 'dse', label: 'DSI (Datenschutzinformation)' },
|
||||
{ id: 'dse', label: 'Datenschutzerklärung / DSI' },
|
||||
{ id: 'cookie', label: 'Cookie-Richtlinie' },
|
||||
{ id: 'impressum', label: 'Impressum' },
|
||||
{ id: 'agb', label: 'AGB' },
|
||||
{ id: 'nutzungsbedingungen', label: 'Nutzungsbedingungen' },
|
||||
{ id: 'widerruf', label: 'Widerrufsbelehrung' },
|
||||
{ id: 'social_media', label: 'DSE Social Media (Art. 26)' },
|
||||
{ id: 'dsfa', label: 'DSFA (Art. 35)' },
|
||||
{ id: 'agb', label: 'AGB / Nutzungsbedingungen' },
|
||||
{ id: 'impressum', label: 'Impressum' },
|
||||
{ id: 'cookie', label: 'Cookie-Richtlinie' },
|
||||
{ id: 'widerruf', label: 'Widerrufsbelehrung' },
|
||||
{ id: 'dsa', label: 'DSA / Digital Services Act' },
|
||||
{ id: 'legal_notice', label: 'Rechtliche Hinweise (IP, Forward-Looking)' },
|
||||
{ id: 'lizenzhinweise', label: 'Lizenzhinweise Dritter (OSS)' },
|
||||
{ id: 'other', label: 'Sonstiges' },
|
||||
]
|
||||
|
||||
function newEntry(): DocEntry {
|
||||
return { id: crypto.randomUUID().slice(0, 8), type: 'dse', label: '', url: '' }
|
||||
return { id: crypto.randomUUID().slice(0, 8), type: 'dse', label: '',
|
||||
url: '', text: '', mode: 'url' }
|
||||
}
|
||||
|
||||
export function DocCheckTab() {
|
||||
const [scanContext, setScanContext] = useScanContext()
|
||||
const [entries, setEntries] = useState<DocEntry[]>(() => {
|
||||
if (typeof window === 'undefined') return [newEntry()]
|
||||
try { const s = localStorage.getItem('doc-check-entries'); return s ? JSON.parse(s) : [newEntry()] } catch { return [newEntry()] }
|
||||
@@ -74,7 +85,7 @@ export function DocCheckTab() {
|
||||
}
|
||||
|
||||
const handleSubmit = async () => {
|
||||
const validEntries = entries.filter(e => e.url.trim())
|
||||
const validEntries = entries.filter(e => e.url.trim() || e.text.trim())
|
||||
if (validEntries.length === 0) return
|
||||
|
||||
setLoading(true)
|
||||
@@ -89,11 +100,17 @@ export function DocCheckTab() {
|
||||
body: JSON.stringify({
|
||||
entries: validEntries.map(e => ({
|
||||
doc_type: e.type,
|
||||
label: e.label || e.url.split('/').pop() || 'Dokument',
|
||||
url: e.url.trim(),
|
||||
label: e.label
|
||||
|| (e.url ? e.url.split('/').pop() : '')
|
||||
|| `${e.type}-paste`,
|
||||
url: e.mode === 'text' ? '' : e.url.trim(),
|
||||
// Backend nimmt text > url. Wenn beide gefuellt sind und
|
||||
// mode='url', schicken wir den text NICHT mit.
|
||||
text: e.mode === 'text' ? e.text.trim() : '',
|
||||
})),
|
||||
check_cookie_banner: checkCookieBanner,
|
||||
use_agent: useAgent,
|
||||
scan_context: scanContext,
|
||||
}),
|
||||
})
|
||||
if (!startRes.ok) throw new Error(`Pruefung konnte nicht gestartet werden: ${startRes.status}`)
|
||||
@@ -111,13 +128,13 @@ export function DocCheckTab() {
|
||||
if (pollData.status === 'completed' && pollData.result) {
|
||||
setResults(pollData.result)
|
||||
setProgress('')
|
||||
localStorage.setItem('doc-check-results', JSON.stringify(pollData.result))
|
||||
safeSetItem('doc-check-results', JSON.stringify(pollData.result))
|
||||
const resultKey = `doc-check-result-${Date.now()}`
|
||||
try { localStorage.setItem(resultKey, JSON.stringify(pollData.result)) } catch { /* quota */ }
|
||||
safeSetItem(resultKey, JSON.stringify(pollData.result))
|
||||
const entry = { date: new Date().toISOString(), urls: validEntries.length, findings: pollData.result.total_findings || 0, resultKey }
|
||||
const updated = [entry, ...history].slice(0, 30)
|
||||
setHistory(updated)
|
||||
localStorage.setItem('doc-check-history', JSON.stringify(updated))
|
||||
safeSetItem('doc-check-history', JSON.stringify(updated))
|
||||
break
|
||||
}
|
||||
if (pollData.status === 'failed') {
|
||||
@@ -133,43 +150,90 @@ export function DocCheckTab() {
|
||||
}
|
||||
}
|
||||
|
||||
const contextReady = isContextComplete(scanContext)
|
||||
|
||||
return (
|
||||
<div className="space-y-4">
|
||||
{/* URL Entries */}
|
||||
<div className="space-y-2">
|
||||
{/* P79 Pre-Scan-Wizard — 8 Pflichtfelder */}
|
||||
<PreScanWizard value={scanContext} onChange={setScanContext} />
|
||||
|
||||
{/* URL / Text Entries */}
|
||||
<div className="space-y-3">
|
||||
{entries.map((entry, i) => (
|
||||
<div key={entry.id} className="flex items-center gap-2">
|
||||
<select
|
||||
value={entry.type}
|
||||
onChange={e => updateEntry(entry.id, 'type', e.target.value)}
|
||||
className="w-48 px-3 py-2.5 border border-gray-300 rounded-lg text-sm bg-white shrink-0"
|
||||
>
|
||||
{DOC_TYPES.map(t => (
|
||||
<option key={t.id} value={t.id}>{t.label}</option>
|
||||
))}
|
||||
</select>
|
||||
<input
|
||||
type="text"
|
||||
value={entry.label}
|
||||
onChange={e => updateEntry(entry.id, 'label', e.target.value)}
|
||||
placeholder={entry.type === 'other' ? 'Dokumentname' : 'Version / Stand (optional)'}
|
||||
className="w-40 px-3 py-2.5 border border-gray-300 rounded-lg text-sm shrink-0"
|
||||
/>
|
||||
<input
|
||||
type="url"
|
||||
value={entry.url}
|
||||
onChange={e => updateEntry(entry.id, 'url', e.target.value)}
|
||||
onBlur={() => autoLabel(entry)}
|
||||
placeholder="https://example.com/datenschutz"
|
||||
className="flex-1 px-3 py-2.5 border border-gray-300 rounded-lg text-sm"
|
||||
/>
|
||||
{entries.length > 1 && (
|
||||
<button onClick={() => removeEntry(entry.id)}
|
||||
className="p-2 text-gray-400 hover:text-red-500 shrink-0">
|
||||
<svg className="w-4 h-4" fill="none" stroke="currentColor" viewBox="0 0 24 24">
|
||||
<path strokeLinecap="round" strokeLinejoin="round" strokeWidth={2} d="M6 18L18 6M6 6l12 12" />
|
||||
</svg>
|
||||
</button>
|
||||
<div key={entry.id} className="space-y-1.5">
|
||||
<div className="flex items-center gap-2">
|
||||
<select
|
||||
value={entry.type}
|
||||
onChange={e => updateEntry(entry.id, 'type', e.target.value)}
|
||||
className="w-48 px-3 py-2.5 border border-gray-300 rounded-lg text-sm bg-white shrink-0"
|
||||
>
|
||||
{DOC_TYPES.map(t => (
|
||||
<option key={t.id} value={t.id}>{t.label}</option>
|
||||
))}
|
||||
</select>
|
||||
<input
|
||||
type="text"
|
||||
value={entry.label}
|
||||
onChange={e => updateEntry(entry.id, 'label', e.target.value)}
|
||||
placeholder={entry.type === 'other' ? 'Dokumentname' : 'Version / Stand (optional)'}
|
||||
className="w-40 px-3 py-2.5 border border-gray-300 rounded-lg text-sm shrink-0"
|
||||
/>
|
||||
|
||||
{/* Mode-Toggle URL / Text */}
|
||||
<div className="inline-flex border border-gray-300 rounded-lg overflow-hidden text-xs shrink-0">
|
||||
<button type="button"
|
||||
onClick={() => updateEntry(entry.id, 'mode', 'url')}
|
||||
className={`px-3 py-2 ${entry.mode === 'url'
|
||||
? 'bg-purple-600 text-white' : 'bg-white text-gray-600 hover:bg-gray-50'}`}>
|
||||
URL
|
||||
</button>
|
||||
<button type="button"
|
||||
onClick={() => updateEntry(entry.id, 'mode', 'text')}
|
||||
className={`px-3 py-2 ${entry.mode === 'text'
|
||||
? 'bg-purple-600 text-white' : 'bg-white text-gray-600 hover:bg-gray-50'}`}>
|
||||
Text einfügen
|
||||
</button>
|
||||
</div>
|
||||
|
||||
{entry.mode === 'url' && (
|
||||
<input
|
||||
type="url"
|
||||
value={entry.url}
|
||||
onChange={e => updateEntry(entry.id, 'url', e.target.value)}
|
||||
onBlur={() => autoLabel(entry)}
|
||||
placeholder="https://example.com/datenschutz"
|
||||
className="flex-1 px-3 py-2.5 border border-gray-300 rounded-lg text-sm"
|
||||
/>
|
||||
)}
|
||||
|
||||
{entries.length > 1 && (
|
||||
<button onClick={() => removeEntry(entry.id)}
|
||||
className="p-2 text-gray-400 hover:text-red-500 shrink-0">
|
||||
<svg className="w-4 h-4" fill="none" stroke="currentColor" viewBox="0 0 24 24">
|
||||
<path strokeLinecap="round" strokeLinejoin="round" strokeWidth={2} d="M6 18L18 6M6 6l12 12" />
|
||||
</svg>
|
||||
</button>
|
||||
)}
|
||||
</div>
|
||||
|
||||
{entry.mode === 'text' && (
|
||||
<div className="ml-[400px]">
|
||||
<textarea
|
||||
value={entry.text}
|
||||
onChange={e => updateEntry(entry.id, 'text', e.target.value)}
|
||||
placeholder={
|
||||
entry.type === 'cookie'
|
||||
? 'Kopiere hier die komplette Cookie-Tabelle rein (Tab-getrennt oder mit | als Trenner — wir parsen alle Spalten deterministisch)…'
|
||||
: 'Kopiere hier den vollständigen Doc-Text rein. Wir erkennen automatisch ob es zu „' + (DOC_TYPES.find(t => t.id === entry.type)?.label ?? entry.type) + '" passt.'
|
||||
}
|
||||
className="w-full h-32 px-3 py-2 border border-gray-300 rounded-lg text-xs font-mono resize-y"
|
||||
/>
|
||||
<div className="text-[10px] text-gray-500 mt-1">
|
||||
{entry.text.trim().length > 0
|
||||
? `${entry.text.trim().length.toLocaleString('de-DE')} Zeichen · ${entry.text.trim().split(/\s+/).length.toLocaleString('de-DE')} Wörter`
|
||||
: 'Der Crawler wird übersprungen — die Analyse läuft direkt auf dem eingefügten Text.'}
|
||||
</div>
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
))}
|
||||
@@ -212,8 +276,11 @@ export function DocCheckTab() {
|
||||
{/* Submit */}
|
||||
<button
|
||||
onClick={handleSubmit}
|
||||
disabled={loading || entries.every(e => !e.url.trim())}
|
||||
disabled={loading
|
||||
|| entries.every(e => !e.url.trim() && !e.text.trim())
|
||||
|| !contextReady}
|
||||
className="w-full px-4 py-3 bg-purple-600 text-white rounded-lg font-medium hover:bg-purple-700 disabled:opacity-50 transition-colors text-sm flex items-center justify-center gap-2"
|
||||
title={!contextReady ? 'Bitte zuerst die 8 Pflichtfelder ausfüllen' : undefined}
|
||||
>
|
||||
{loading ? (
|
||||
<>
|
||||
@@ -223,6 +290,8 @@ export function DocCheckTab() {
|
||||
</svg>
|
||||
Pruefe...
|
||||
</>
|
||||
) : !contextReady ? (
|
||||
`Klassifizierung unvollständig (8 Pflichtfelder)`
|
||||
) : (
|
||||
`${entries.filter(e => e.url.trim()).length} Dokument${entries.filter(e => e.url.trim()).length !== 1 ? 'e' : ''} pruefen`
|
||||
)}
|
||||
@@ -244,41 +313,9 @@ export function DocCheckTab() {
|
||||
<div className="bg-red-50 border border-red-200 rounded-lg p-3 text-sm text-red-700">{error}</div>
|
||||
)}
|
||||
|
||||
{/* Results */}
|
||||
{/* Results — als Tab-Ansicht (Übersicht/Cookies/DSE/Impressum/AGB/Banner/Mail) */}
|
||||
{results && results.results && (
|
||||
<div className="bg-white border border-gray-200 rounded-xl p-6 shadow-sm">
|
||||
<ChecklistView results={results.results} />
|
||||
|
||||
{/* Cookie Banner Result */}
|
||||
{results.cookie_banner_result && (
|
||||
<div className="mt-4 pt-4 border-t border-gray-200">
|
||||
<h4 className="text-sm font-semibold text-gray-800 mb-2">Cookie-Banner</h4>
|
||||
<div className="text-sm text-gray-600">
|
||||
{results.cookie_banner_result.banner_detected
|
||||
? `Banner erkannt: ${results.cookie_banner_result.banner_provider || 'unbekannt'}`
|
||||
: 'Kein Banner erkannt'}
|
||||
</div>
|
||||
{results.cookie_banner_result.banner_checks?.violations?.length > 0 && (
|
||||
<div className="mt-2 space-y-1">
|
||||
{results.cookie_banner_result.banner_checks.violations.map((v: any, i: number) => (
|
||||
<div key={i} className="text-xs text-red-600 flex items-start gap-1.5">
|
||||
<span className="shrink-0 mt-0.5">!!</span>
|
||||
<span>{v.text}</span>
|
||||
</div>
|
||||
))}
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
)}
|
||||
|
||||
{/* Email Status */}
|
||||
{results.email_status && (
|
||||
<div className="mt-3 text-xs text-gray-500 flex items-center gap-2">
|
||||
<span className={`w-2 h-2 rounded-full ${results.email_status === 'sent' ? 'bg-green-400' : 'bg-gray-300'}`} />
|
||||
E-Mail: {results.email_status === 'sent' ? 'Gesendet' : results.email_status}
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
<ResultsTabsView results={results} />
|
||||
)}
|
||||
|
||||
{/* History */}
|
||||
|
||||
@@ -0,0 +1,269 @@
|
||||
'use client'
|
||||
|
||||
/**
|
||||
* P79 — Pre-Scan-Wizard (8 Pflichtfelder).
|
||||
*
|
||||
* 8 Pflichtfelder die vor dem Lauf abgefragt werden. Werte landen im
|
||||
* scan_context und filtern später die MC-Auswertung (zusammen mit P72
|
||||
* scope_doc_type + applicable_industries). Erwartete Noise-Reduktion:
|
||||
* 70-80% bei falsch zugeordneten HIGH-MCs.
|
||||
*/
|
||||
|
||||
import React, { useState, useEffect } from 'react'
|
||||
|
||||
export interface ScanContext {
|
||||
industry: string
|
||||
business_model: string
|
||||
direct_sales: string
|
||||
legal_form: string
|
||||
group_structure: string
|
||||
employee_count: string
|
||||
special_data: string[]
|
||||
third_country_transfer: string
|
||||
}
|
||||
|
||||
const INDUSTRIES = [
|
||||
{ id: '', label: '— bitte wählen —' },
|
||||
{ id: 'automotive', label: 'Automotive / OEM' },
|
||||
{ id: 'ecommerce', label: 'E-Commerce / Online-Handel' },
|
||||
{ id: 'saas', label: 'SaaS / Software' },
|
||||
{ id: 'banking', label: 'Banking / Finance' },
|
||||
{ id: 'insurance', label: 'Insurance / Versicherung' },
|
||||
{ id: 'healthcare', label: 'Healthcare / Gesundheit' },
|
||||
{ id: 'education', label: 'Bildung / Schule' },
|
||||
{ id: 'public', label: 'Öffentliche Verwaltung' },
|
||||
{ id: 'manufacturing', label: 'Industrie / Manufacturing' },
|
||||
{ id: 'media', label: 'Medien / Verlag' },
|
||||
{ id: 'other', label: 'Sonstige' },
|
||||
]
|
||||
|
||||
const LEGAL_FORMS = [
|
||||
{ id: '', label: '— bitte wählen —' },
|
||||
{ id: 'ag', label: 'AG (Aktiengesellschaft)' },
|
||||
{ id: 'gmbh', label: 'GmbH' },
|
||||
{ id: 'gmbh_co_kg', label: 'GmbH & Co. KG' },
|
||||
{ id: 'kg', label: 'KG' },
|
||||
{ id: 'ohg', label: 'OHG' },
|
||||
{ id: 'ug', label: 'UG (haftungsbeschränkt)' },
|
||||
{ id: 'ek', label: 'e.K. / Einzelunternehmen' },
|
||||
{ id: 'verein', label: 'Verein' },
|
||||
{ id: 'stiftung', label: 'Stiftung' },
|
||||
{ id: 'behoerde', label: 'Behörde / Körperschaft öff. Rechts' },
|
||||
{ id: 'other', label: 'Sonstige' },
|
||||
]
|
||||
|
||||
const GROUP_STRUCTURES = [
|
||||
{ id: '', label: '— bitte wählen —' },
|
||||
{ id: 'standalone', label: 'Eigenständig' },
|
||||
{ id: 'parent', label: 'Konzern-Mutter' },
|
||||
{ id: 'subsidiary', label: 'Konzern-Tochter' },
|
||||
{ id: 'joint_venture', label: 'Joint Venture' },
|
||||
{ id: 'processor', label: 'Reiner Auftragsverarbeiter' },
|
||||
]
|
||||
|
||||
const EMPLOYEE_COUNTS = [
|
||||
{ id: '', label: '— bitte wählen —' },
|
||||
{ id: 'lt10', label: 'unter 10' },
|
||||
{ id: '10_19', label: '10-19' },
|
||||
{ id: '20_49', label: '20-49 (DSB ab 20 Pflicht)' },
|
||||
{ id: '50_249', label: '50-249 (Whistleblower-Pflicht)' },
|
||||
{ id: '250_499', label: '250-499' },
|
||||
{ id: '500_999', label: '500-999' },
|
||||
{ id: '1000_plus', label: '1.000+ (Konzern)' },
|
||||
]
|
||||
|
||||
const SPECIAL_DATA_OPTIONS = [
|
||||
{ id: 'health', label: 'Gesundheitsdaten' },
|
||||
{ id: 'biometric', label: 'Biometrische Daten' },
|
||||
{ id: 'ethnicity', label: 'Religiöse / ethnische Herkunft' },
|
||||
{ id: 'sexual', label: 'Sexuelle Orientierung' },
|
||||
{ id: 'criminal', label: 'Strafrechtliche Daten' },
|
||||
{ id: 'minors', label: 'Minderjährige (<16)' },
|
||||
{ id: 'none', label: 'Keine besonderen Daten' },
|
||||
]
|
||||
|
||||
const STORAGE_KEY = 'compliance-scan-context'
|
||||
|
||||
function emptyContext(): ScanContext {
|
||||
return {
|
||||
industry: '',
|
||||
business_model: '',
|
||||
direct_sales: '',
|
||||
legal_form: '',
|
||||
group_structure: '',
|
||||
employee_count: '',
|
||||
special_data: [],
|
||||
third_country_transfer: '',
|
||||
}
|
||||
}
|
||||
|
||||
export function isContextComplete(ctx: ScanContext): boolean {
|
||||
return Boolean(
|
||||
ctx.industry &&
|
||||
ctx.business_model &&
|
||||
ctx.direct_sales &&
|
||||
ctx.legal_form &&
|
||||
ctx.group_structure &&
|
||||
ctx.employee_count &&
|
||||
ctx.special_data.length > 0 &&
|
||||
ctx.third_country_transfer
|
||||
)
|
||||
}
|
||||
|
||||
export function PreScanWizard({
|
||||
value,
|
||||
onChange,
|
||||
}: {
|
||||
value: ScanContext
|
||||
onChange: (ctx: ScanContext) => void
|
||||
}) {
|
||||
const update = <K extends keyof ScanContext>(key: K, val: ScanContext[K]) => {
|
||||
onChange({ ...value, [key]: val })
|
||||
}
|
||||
|
||||
const toggleSpecialData = (id: string) => {
|
||||
const next = value.special_data.includes(id)
|
||||
? value.special_data.filter(x => x !== id)
|
||||
: [...value.special_data.filter(x => x !== 'none' || id === 'none'), id]
|
||||
onChange({ ...value, special_data: id === 'none' ? ['none'] : next.filter(x => x !== 'none') })
|
||||
}
|
||||
|
||||
return (
|
||||
<div style={{
|
||||
background: '#f0f9ff',
|
||||
border: '1px solid #bfdbfe',
|
||||
borderRadius: 8,
|
||||
padding: '14px 16px',
|
||||
marginBottom: 14,
|
||||
}}>
|
||||
<div style={{ fontSize: 11, color: '#1e40af', textTransform: 'uppercase',
|
||||
letterSpacing: 1.2, marginBottom: 4, fontWeight: 600 }}>
|
||||
Pflichtangaben zur Klassifizierung des Audits
|
||||
</div>
|
||||
<h3 style={{ margin: '0 0 6px', fontSize: 14, color: '#1e293b' }}>
|
||||
Vor dem Scan: 8 Angaben zum Unternehmen
|
||||
</h3>
|
||||
<p style={{ margin: '0 0 12px', fontSize: 11, color: '#475569', lineHeight: 1.5 }}>
|
||||
Diese Angaben filtern irrelevante Compliance-Themen heraus (z.B. eHealth-
|
||||
Vorschriften bei einem Autobauer) und liefern eine realistische
|
||||
Einschätzung statt pauschaler Verstoss-Listen.
|
||||
</p>
|
||||
|
||||
<div style={{ display: 'grid', gridTemplateColumns: 'repeat(2, 1fr)', gap: 10 }}>
|
||||
<Field label="1. Branche*">
|
||||
<select value={value.industry} onChange={e => update('industry', e.target.value)} style={inputStyle}>
|
||||
{INDUSTRIES.map(o => <option key={o.id} value={o.id}>{o.label}</option>)}
|
||||
</select>
|
||||
</Field>
|
||||
|
||||
<Field label="2. Geschäftsmodell*">
|
||||
<select value={value.business_model} onChange={e => update('business_model', e.target.value)} style={inputStyle}>
|
||||
<option value="">— bitte wählen —</option>
|
||||
<option value="b2b">B2B</option>
|
||||
<option value="b2c">B2C</option>
|
||||
<option value="both">Beides (B2B + B2C)</option>
|
||||
</select>
|
||||
</Field>
|
||||
|
||||
<Field label="3. Direkt-Vertrieb (Webshop/Buchung)*">
|
||||
<select value={value.direct_sales} onChange={e => update('direct_sales', e.target.value)} style={inputStyle}>
|
||||
<option value="">— bitte wählen —</option>
|
||||
<option value="yes">Ja</option>
|
||||
<option value="no">Nein</option>
|
||||
<option value="lead_funnel">Nur Lead-Funnel (Probefahrten, Anfragen)</option>
|
||||
</select>
|
||||
</Field>
|
||||
|
||||
<Field label="4. Rechtsform*">
|
||||
<select value={value.legal_form} onChange={e => update('legal_form', e.target.value)} style={inputStyle}>
|
||||
{LEGAL_FORMS.map(o => <option key={o.id} value={o.id}>{o.label}</option>)}
|
||||
</select>
|
||||
</Field>
|
||||
|
||||
<Field label="5. Konzern-Struktur*">
|
||||
<select value={value.group_structure} onChange={e => update('group_structure', e.target.value)} style={inputStyle}>
|
||||
{GROUP_STRUCTURES.map(o => <option key={o.id} value={o.id}>{o.label}</option>)}
|
||||
</select>
|
||||
</Field>
|
||||
|
||||
<Field label="6. Mitarbeiterzahl*">
|
||||
<select value={value.employee_count} onChange={e => update('employee_count', e.target.value)} style={inputStyle}>
|
||||
{EMPLOYEE_COUNTS.map(o => <option key={o.id} value={o.id}>{o.label}</option>)}
|
||||
</select>
|
||||
</Field>
|
||||
|
||||
<Field label="7. Besondere Datenkategorien*" colSpan={2}>
|
||||
<div style={{ display: 'flex', flexWrap: 'wrap', gap: 8 }}>
|
||||
{SPECIAL_DATA_OPTIONS.map(o => (
|
||||
<label key={o.id} style={{ fontSize: 12, display: 'inline-flex',
|
||||
alignItems: 'center', gap: 4,
|
||||
padding: '4px 8px', background: '#fff',
|
||||
border: '1px solid #cbd5e1',
|
||||
borderRadius: 4 }}>
|
||||
<input type="checkbox"
|
||||
checked={value.special_data.includes(o.id)}
|
||||
onChange={() => toggleSpecialData(o.id)} />
|
||||
{o.label}
|
||||
</label>
|
||||
))}
|
||||
</div>
|
||||
</Field>
|
||||
|
||||
<Field label="8. Bekannter Drittland-Transfer*" colSpan={2}>
|
||||
<select value={value.third_country_transfer} onChange={e => update('third_country_transfer', e.target.value)} style={inputStyle}>
|
||||
<option value="">— bitte wählen —</option>
|
||||
<option value="yes">Ja (USA, CN, IN, UK, ...)</option>
|
||||
<option value="no">Nein (nur EU/EWR)</option>
|
||||
<option value="unknown">Weiß nicht (bitte automatisch prüfen)</option>
|
||||
</select>
|
||||
</Field>
|
||||
</div>
|
||||
|
||||
{!isContextComplete(value) && (
|
||||
<div style={{ marginTop: 10, fontSize: 11, color: '#92400e',
|
||||
background: '#fef3c7', padding: '6px 10px',
|
||||
borderRadius: 4, border: '1px solid #fde68a' }}>
|
||||
Bitte alle 8 Pflichtfelder ausfüllen — der Scan-Button wird erst aktiv,
|
||||
wenn die Klassifizierung komplett ist.
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
)
|
||||
}
|
||||
|
||||
const inputStyle: React.CSSProperties = {
|
||||
width: '100%',
|
||||
padding: '6px 8px',
|
||||
fontSize: 12,
|
||||
border: '1px solid #cbd5e1',
|
||||
borderRadius: 4,
|
||||
background: '#fff',
|
||||
}
|
||||
|
||||
function Field({ label, children, colSpan }: { label: string; children: React.ReactNode; colSpan?: number }) {
|
||||
return (
|
||||
<div style={{ gridColumn: colSpan ? `span ${colSpan}` : undefined }}>
|
||||
<label style={{ display: 'block', fontSize: 11, color: '#475569',
|
||||
marginBottom: 4, fontWeight: 600 }}>
|
||||
{label}
|
||||
</label>
|
||||
{children}
|
||||
</div>
|
||||
)
|
||||
}
|
||||
|
||||
export function useScanContext(): [ScanContext, (ctx: ScanContext) => void] {
|
||||
const [ctx, setCtx] = useState<ScanContext>(() => {
|
||||
if (typeof window === 'undefined') return emptyContext()
|
||||
try {
|
||||
const s = localStorage.getItem(STORAGE_KEY)
|
||||
return s ? { ...emptyContext(), ...JSON.parse(s) } : emptyContext()
|
||||
} catch {
|
||||
return emptyContext()
|
||||
}
|
||||
})
|
||||
useEffect(() => {
|
||||
try { localStorage.setItem(STORAGE_KEY, JSON.stringify(ctx)) } catch {}
|
||||
}, [ctx])
|
||||
return [ctx, setCtx]
|
||||
}
|
||||
@@ -0,0 +1,353 @@
|
||||
'use client'
|
||||
|
||||
/**
|
||||
* ResultsTabsView — strukturierte Tab-Ansicht der Audit-Ergebnisse.
|
||||
*
|
||||
* Statt einer langen Scroll-Seite gibt es:
|
||||
* 1. Übersicht (Score + GF-Kurzfassung)
|
||||
* 2. Cookies (3-Quellen-Compliance-Vergleich + Vendor-/Cookie-Listen)
|
||||
* 3. Datenschutzerklärung
|
||||
* 4. Impressum
|
||||
* 5. AGB / Widerruf
|
||||
* 6. Banner (Cookie-Banner-Checks)
|
||||
* 7. Vollständige Mail (HTML-Preview)
|
||||
*
|
||||
* Tab-Headers sticky oben, Content scrollbar unten.
|
||||
*/
|
||||
|
||||
import React, { useState, useMemo } from 'react'
|
||||
import { ChecklistView } from './ChecklistView'
|
||||
|
||||
interface ResultsTabsViewProps {
|
||||
results: any
|
||||
}
|
||||
|
||||
type TabId = 'overview' | 'cookies' | 'dse' | 'impressum' | 'agb' | 'banner' | 'mail'
|
||||
|
||||
const TABS: { id: TabId; label: string; icon: string }[] = [
|
||||
{ id: 'overview', label: 'Übersicht', icon: '◉' },
|
||||
{ id: 'cookies', label: 'Cookies & VVT', icon: '🍪' },
|
||||
{ id: 'dse', label: 'Datenschutzerkl.', icon: '📄' },
|
||||
{ id: 'impressum', label: 'Impressum', icon: '🏢' },
|
||||
{ id: 'agb', label: 'AGB / Widerruf', icon: '⚖️' },
|
||||
{ id: 'banner', label: 'Cookie-Banner', icon: '🎛' },
|
||||
{ id: 'mail', label: 'Mail-Vorschau', icon: '✉️' },
|
||||
]
|
||||
|
||||
export function ResultsTabsView({ results }: ResultsTabsViewProps) {
|
||||
const [active, setActive] = useState<TabId>('overview')
|
||||
|
||||
const r = results || {}
|
||||
const docs: any[] = r.results || []
|
||||
const banner = r.banner_result || r.cookie_banner_result || {}
|
||||
const cmpVendors: any[] = r.cmp_vendors || []
|
||||
const cookieAudit = r.cookie_audit || {}
|
||||
|
||||
const docsByType = useMemo(() => {
|
||||
const m: Record<string, any> = {}
|
||||
for (const d of docs) {
|
||||
const t = (d.doc_type || '').toLowerCase()
|
||||
if (!m[t]) m[t] = d
|
||||
}
|
||||
return m
|
||||
}, [docs])
|
||||
|
||||
return (
|
||||
<div className="border border-gray-200 rounded-lg overflow-hidden bg-white">
|
||||
{/* Sticky Tab-Header */}
|
||||
<div className="flex border-b border-gray-200 bg-gray-50 overflow-x-auto sticky top-0 z-10">
|
||||
{TABS.map(t => (
|
||||
<button
|
||||
key={t.id}
|
||||
onClick={() => setActive(t.id)}
|
||||
className={`px-4 py-3 text-sm font-medium whitespace-nowrap border-b-2 transition-colors ${
|
||||
active === t.id
|
||||
? 'border-purple-600 text-purple-700 bg-white'
|
||||
: 'border-transparent text-gray-600 hover:bg-gray-100'
|
||||
}`}
|
||||
>
|
||||
<span className="mr-1.5">{t.icon}</span>
|
||||
{t.label}
|
||||
</button>
|
||||
))}
|
||||
</div>
|
||||
|
||||
{/* Tab-Content */}
|
||||
<div className="p-4 min-h-[400px]">
|
||||
{active === 'overview' && <OverviewTab results={r} />}
|
||||
{active === 'cookies' && (
|
||||
<CookiesTab
|
||||
audit={cookieAudit}
|
||||
vendors={cmpVendors}
|
||||
banner={banner}
|
||||
/>
|
||||
)}
|
||||
{active === 'dse' && <DocTab doc={docsByType['dse']} label="Datenschutzerklärung" />}
|
||||
{active === 'impressum' && <DocTab doc={docsByType['impressum']} label="Impressum" />}
|
||||
{active === 'agb' && <AgbWiderrufTab docs={docsByType} />}
|
||||
{active === 'banner' && <BannerTab banner={banner} />}
|
||||
{active === 'mail' && <MailPreviewTab results={r} />}
|
||||
</div>
|
||||
</div>
|
||||
)
|
||||
}
|
||||
|
||||
// ── Übersicht ──────────────────────────────────────────────────────────
|
||||
function OverviewTab({ results }: { results: any }) {
|
||||
const totalDocs = results.total_documents || (results.results?.length ?? 0)
|
||||
const totalFindings = results.total_findings ?? 0
|
||||
const banner = results.banner_result || results.cookie_banner_result || {}
|
||||
const score = banner.compliance_score ?? banner.completeness_pct ?? null
|
||||
const emailStatus = results.email_status
|
||||
|
||||
return (
|
||||
<div className="space-y-4">
|
||||
<div className="grid grid-cols-2 md:grid-cols-4 gap-3">
|
||||
<Kpi label="Geprüfte Dokumente" value={totalDocs} />
|
||||
<Kpi label="Findings gesamt" value={totalFindings} tone={totalFindings > 5 ? 'warn' : 'ok'} />
|
||||
<Kpi label="Vendors erkannt" value={results.cmp_vendors?.length || 0} />
|
||||
<Kpi label="Score" value={score !== null ? `${score}%` : '—'}
|
||||
tone={score === null ? 'neutral' : score >= 80 ? 'ok' : score >= 60 ? 'warn' : 'bad'} />
|
||||
</div>
|
||||
|
||||
{emailStatus && (
|
||||
<div className={`text-sm px-3 py-2 rounded ${
|
||||
emailStatus === 'sent' ? 'bg-green-50 text-green-800' : 'bg-gray-100 text-gray-700'
|
||||
}`}>
|
||||
E-Mail: {emailStatus === 'sent' ? '✓ Gesendet an Empfänger' : emailStatus}
|
||||
</div>
|
||||
)}
|
||||
|
||||
<div className="bg-blue-50 border border-blue-200 rounded p-3 text-xs text-blue-900">
|
||||
<strong>Wo welcher Inhalt steckt:</strong> in den Tabs oben findest du die
|
||||
Detail-Auswertung pro Doc-Typ. Im Cookie-Tab steht der 3-Quellen-Compliance-
|
||||
Vergleich (deklariert vs Browser vs Library) — das ist der wichtigste
|
||||
rechtliche Knackpunkt. Banner-Tab zeigt die echten Browser-Phasen-Checks.
|
||||
</div>
|
||||
</div>
|
||||
)
|
||||
}
|
||||
|
||||
function Kpi({ label, value, tone = 'neutral' }: { label: string; value: any; tone?: string }) {
|
||||
const colors: Record<string, string> = {
|
||||
ok: 'text-green-700 bg-green-50 border-green-200',
|
||||
warn: 'text-amber-700 bg-amber-50 border-amber-200',
|
||||
bad: 'text-red-700 bg-red-50 border-red-200',
|
||||
neutral: 'text-gray-700 bg-gray-50 border-gray-200',
|
||||
}
|
||||
return (
|
||||
<div className={`border rounded p-3 ${colors[tone]}`}>
|
||||
<div className="text-[10px] uppercase tracking-wider opacity-70">{label}</div>
|
||||
<div className="text-2xl font-bold mt-1">{value}</div>
|
||||
</div>
|
||||
)
|
||||
}
|
||||
|
||||
// ── Cookies & VVT ──────────────────────────────────────────────────────
|
||||
function CookiesTab({ audit, vendors, banner }: { audit: any; vendors: any[]; banner: any }) {
|
||||
const declared = audit?.declared_count ?? 0
|
||||
const browser = audit?.browser_count ?? 0
|
||||
const both = (audit?.compliant ?? []).length
|
||||
const undecl = (audit?.undeclared_in_browser ?? []).length
|
||||
const decOnly = (audit?.declared_not_loaded ?? []).length
|
||||
|
||||
return (
|
||||
<div className="space-y-4">
|
||||
{/* Top-Bar mit Counts */}
|
||||
<div className="grid grid-cols-3 md:grid-cols-5 gap-2">
|
||||
<Kpi label="Deklariert" value={declared} />
|
||||
<Kpi label="Im Browser" value={browser} />
|
||||
<Kpi label="Compliant" value={both} tone="ok" />
|
||||
<Kpi label="Undokumentiert" value={undecl} tone={undecl > 0 ? 'bad' : 'ok'} />
|
||||
<Kpi label="Nicht geladen" value={decOnly} tone={decOnly > 0 ? 'warn' : 'neutral'} />
|
||||
</div>
|
||||
|
||||
{/* 3-Spalten-Vergleichstabelle */}
|
||||
<div className="grid grid-cols-1 md:grid-cols-3 gap-3">
|
||||
<CookieColumn
|
||||
title={`❌ Undokumentiert (${undecl})`}
|
||||
tone="bad"
|
||||
subtitle="Geladen ABER nicht in der Richtlinie — Art. 13(1)(c) DSGVO Verstoß"
|
||||
cookies={audit?.undeclared_in_browser ?? []}
|
||||
/>
|
||||
<CookieColumn
|
||||
title={`✓ Compliant (${both})`}
|
||||
tone="ok"
|
||||
subtitle="Beide Quellen stimmen überein"
|
||||
cookies={audit?.compliant ?? []}
|
||||
/>
|
||||
<CookieColumn
|
||||
title={`⚠️ Nicht geladen (${decOnly})`}
|
||||
tone="warn"
|
||||
subtitle="In Richtlinie deklariert, aber bei diesem Lauf nicht im Browser"
|
||||
cookies={audit?.declared_not_loaded ?? []}
|
||||
/>
|
||||
</div>
|
||||
|
||||
{/* Vendor-Liste (deduped) */}
|
||||
<div>
|
||||
<h3 className="text-sm font-semibold mb-2 text-gray-800">
|
||||
Vendor-Liste ({vendors.length} unique nach Deduplizierung)
|
||||
</h3>
|
||||
<div className="overflow-x-auto border border-gray-200 rounded">
|
||||
<table className="w-full text-xs">
|
||||
<thead className="bg-gray-50">
|
||||
<tr>
|
||||
<th className="text-left px-3 py-2">Vendor</th>
|
||||
<th className="text-left px-3 py-2">Kategorie</th>
|
||||
<th className="text-left px-3 py-2">Quelle</th>
|
||||
<th className="text-right px-3 py-2">Cookies</th>
|
||||
</tr>
|
||||
</thead>
|
||||
<tbody>
|
||||
{vendors.map((v, i) => (
|
||||
<tr key={i} className="border-t border-gray-100 hover:bg-gray-50">
|
||||
<td className="px-3 py-2 font-medium">{v.name}</td>
|
||||
<td className="px-3 py-2 text-gray-600">{v.category || '—'}</td>
|
||||
<td className="px-3 py-2 text-gray-500 font-mono text-[10px]">
|
||||
{v.source || '—'}
|
||||
</td>
|
||||
<td className="px-3 py-2 text-right">{(v.cookies || []).length}</td>
|
||||
</tr>
|
||||
))}
|
||||
</tbody>
|
||||
</table>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
)
|
||||
}
|
||||
|
||||
function CookieColumn({ title, tone, subtitle, cookies }: {
|
||||
title: string; tone: string; subtitle: string; cookies: string[]
|
||||
}) {
|
||||
const colors: Record<string, string> = {
|
||||
bad: 'bg-red-50 border-red-200 text-red-900',
|
||||
ok: 'bg-green-50 border-green-200 text-green-900',
|
||||
warn: 'bg-amber-50 border-amber-200 text-amber-900',
|
||||
}
|
||||
return (
|
||||
<div className={`border rounded p-3 ${colors[tone]}`}>
|
||||
<div className="text-xs font-semibold mb-1">{title}</div>
|
||||
<div className="text-[10px] opacity-80 mb-2">{subtitle}</div>
|
||||
<div className="font-mono text-[10px] max-h-56 overflow-auto">
|
||||
{cookies.length === 0 && <span className="opacity-60">— keine —</span>}
|
||||
{cookies.map((c, i) => (
|
||||
<div key={i} className="py-0.5">{c}</div>
|
||||
))}
|
||||
</div>
|
||||
</div>
|
||||
)
|
||||
}
|
||||
|
||||
// ── Generic Doc-Tab ────────────────────────────────────────────────────
|
||||
function DocTab({ doc, label }: { doc: any; label: string }) {
|
||||
if (!doc) return <Empty label={label} />
|
||||
const checks = doc.checks || []
|
||||
const failed = checks.filter((c: any) => !c.passed && !c.skipped)
|
||||
const passed = checks.filter((c: any) => c.passed)
|
||||
|
||||
return (
|
||||
<div className="space-y-3">
|
||||
<div className="flex items-center justify-between">
|
||||
<h3 className="text-sm font-semibold">{label}</h3>
|
||||
<div className="text-xs text-gray-600">
|
||||
{doc.word_count?.toLocaleString('de-DE') || 0} Wörter ·{' '}
|
||||
<span className="text-red-600">{failed.length} Findings</span> ·{' '}
|
||||
<span className="text-green-600">{passed.length} OK</span>
|
||||
</div>
|
||||
</div>
|
||||
{doc.url && (
|
||||
<a href={doc.url} target="_blank" rel="noreferrer"
|
||||
className="text-xs text-blue-600 hover:underline break-all">
|
||||
{doc.url}
|
||||
</a>
|
||||
)}
|
||||
<ChecklistView results={[doc]} />
|
||||
</div>
|
||||
)
|
||||
}
|
||||
|
||||
function AgbWiderrufTab({ docs }: { docs: Record<string, any> }) {
|
||||
const agb = docs['agb'] || docs['nutzungsbedingungen']
|
||||
const wid = docs['widerruf']
|
||||
return (
|
||||
<div className="space-y-6">
|
||||
<div>
|
||||
<h3 className="text-sm font-semibold mb-2">AGB / Nutzungsbedingungen</h3>
|
||||
{agb ? <ChecklistView results={[agb]} /> : <Empty label="AGB" inline />}
|
||||
</div>
|
||||
<div>
|
||||
<h3 className="text-sm font-semibold mb-2">Widerrufsbelehrung</h3>
|
||||
{wid ? <ChecklistView results={[wid]} /> : <Empty label="Widerruf" inline />}
|
||||
</div>
|
||||
</div>
|
||||
)
|
||||
}
|
||||
|
||||
function BannerTab({ banner }: { banner: any }) {
|
||||
if (!banner || Object.keys(banner).length === 0) return <Empty label="Cookie-Banner" />
|
||||
const phases = banner.phases || {}
|
||||
const violations = banner.banner_checks?.violations || []
|
||||
return (
|
||||
<div className="space-y-3">
|
||||
<div className="text-xs text-gray-700">
|
||||
Banner erkannt: <strong>{banner.banner_detected ? 'Ja' : 'Nein'}</strong> ·{' '}
|
||||
Provider: <strong>{banner.banner_provider || '—'}</strong> ·{' '}
|
||||
Verstöße: <strong>{violations.length}</strong>
|
||||
</div>
|
||||
{violations.length > 0 && (
|
||||
<div className="border border-red-200 bg-red-50 rounded p-3">
|
||||
<div className="text-xs font-semibold text-red-800 mb-2">Verstöße</div>
|
||||
<ul className="text-xs text-red-900 space-y-1">
|
||||
{violations.map((v: any, i: number) => (
|
||||
<li key={i}>• {v.label || v.message || JSON.stringify(v)}</li>
|
||||
))}
|
||||
</ul>
|
||||
</div>
|
||||
)}
|
||||
<div className="grid grid-cols-3 gap-2">
|
||||
{Object.entries(phases).map(([name, ph]: [string, any]) => (
|
||||
<div key={name} className="border border-gray-200 rounded p-2">
|
||||
<div className="text-[10px] uppercase text-gray-500">{name}</div>
|
||||
<div className="text-xs mt-1">
|
||||
Cookies: <strong>{ph.cookies?.length || 0}</strong>
|
||||
</div>
|
||||
<div className="text-xs">
|
||||
Vendors: <strong>{ph.vendors?.length || 0}</strong>
|
||||
</div>
|
||||
</div>
|
||||
))}
|
||||
</div>
|
||||
</div>
|
||||
)
|
||||
}
|
||||
|
||||
function MailPreviewTab({ results }: { results: any }) {
|
||||
return (
|
||||
<div className="text-xs text-gray-600 space-y-2">
|
||||
<p>
|
||||
Die vollständige Mail wurde {results.email_status === 'sent' ? 'gesendet' : 'erstellt'}.
|
||||
Snapshot-ID:{' '}
|
||||
<code className="bg-gray-100 px-1.5 py-0.5 rounded">{results.check_id || '—'}</code>
|
||||
</p>
|
||||
{results.check_id && (
|
||||
<a
|
||||
href={`/api/compliance/agent/snapshots/${results.check_id}/pdf`}
|
||||
target="_blank" rel="noreferrer"
|
||||
className="inline-block text-purple-600 hover:underline"
|
||||
>
|
||||
→ PDF der Mail herunterladen
|
||||
</a>
|
||||
)}
|
||||
</div>
|
||||
)
|
||||
}
|
||||
|
||||
function Empty({ label, inline }: { label: string; inline?: boolean }) {
|
||||
return (
|
||||
<div className={`text-xs text-gray-500 ${inline ? '' : 'py-8 text-center'}`}>
|
||||
Keine Daten für „{label}" in diesem Lauf.
|
||||
</div>
|
||||
)
|
||||
}
|
||||
@@ -0,0 +1,71 @@
|
||||
/**
|
||||
* P47 — localStorage-Quota-Management.
|
||||
*
|
||||
* Wenn alte Compliance-Check-Ergebnisse den Browser-Storage fuellen,
|
||||
* versucht das setItem mit QuotaExceededError zu fangen, prunet
|
||||
* alte doc-check-result-*-Eintraege (oldest first) und retried.
|
||||
*
|
||||
* Wird von DocCheckTab/BannerCheckTab/etc beim Persistieren der
|
||||
* Result-Bloebs benutzt.
|
||||
*/
|
||||
|
||||
const RESULT_KEY_PREFIX = 'doc-check-result-'
|
||||
const MAX_KEEP = 10 // Maximal 10 alte Result-Bloebs behalten.
|
||||
|
||||
export function safeSetItem(key: string, value: string): boolean {
|
||||
try {
|
||||
localStorage.setItem(key, value)
|
||||
return true
|
||||
} catch (err: any) {
|
||||
if (err?.name !== 'QuotaExceededError'
|
||||
&& err?.code !== 22 && err?.code !== 1014) {
|
||||
console.warn('localStorage setItem failed:', err)
|
||||
return false
|
||||
}
|
||||
pruneOldResults()
|
||||
try {
|
||||
localStorage.setItem(key, value)
|
||||
return true
|
||||
} catch {
|
||||
// Pruning hat nicht gereicht — aggressiver pruefen
|
||||
pruneOldResults(0)
|
||||
try {
|
||||
localStorage.setItem(key, value)
|
||||
return true
|
||||
} catch {
|
||||
console.warn('localStorage immer noch voll, wert wird verworfen')
|
||||
return false
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
function pruneOldResults(keep: number = MAX_KEEP): void {
|
||||
try {
|
||||
const keys: { key: string; ts: number }[] = []
|
||||
for (let i = 0; i < localStorage.length; i++) {
|
||||
const k = localStorage.key(i)
|
||||
if (!k || !k.startsWith(RESULT_KEY_PREFIX)) continue
|
||||
const ts = Number(k.slice(RESULT_KEY_PREFIX.length)) || 0
|
||||
keys.push({ key: k, ts })
|
||||
}
|
||||
keys.sort((a, b) => a.ts - b.ts) // oldest first
|
||||
const toRemove = keys.slice(0, Math.max(0, keys.length - keep))
|
||||
for (const k of toRemove) {
|
||||
try { localStorage.removeItem(k.key) } catch {}
|
||||
}
|
||||
} catch {}
|
||||
}
|
||||
|
||||
export function getStorageUsageMB(): number {
|
||||
let bytes = 0
|
||||
try {
|
||||
for (let i = 0; i < localStorage.length; i++) {
|
||||
const k = localStorage.key(i)
|
||||
if (!k) continue
|
||||
const v = localStorage.getItem(k) || ''
|
||||
bytes += k.length + v.length
|
||||
}
|
||||
} catch {}
|
||||
return bytes / (1024 * 1024)
|
||||
}
|
||||
@@ -362,6 +362,16 @@ export default function AIActPage() {
|
||||
)}
|
||||
</StepHeader>
|
||||
|
||||
<div className="px-4 py-2 bg-emerald-50 border border-emerald-200 rounded-lg text-xs text-emerald-800 flex items-start gap-2">
|
||||
<span className="font-semibold">Quellen & Lizenz:</span>
|
||||
<span>
|
||||
Inhalte gemaess <strong>EU-Verordnung 2024/1689 (KI-Verordnung / AI Act)</strong> —
|
||||
Lizenzregel R1 (EU_LAW, woertlich uebernehmbar).
|
||||
Risiko-Klassifizierungslogik basiert auf Anhang III der Verordnung.{' '}
|
||||
<a href="/sdk/licenses" className="underline">Quellenverzeichnis</a>
|
||||
</span>
|
||||
</div>
|
||||
|
||||
{/* Tabs */}
|
||||
<div className="flex items-center gap-1 bg-gray-100 p-1 rounded-lg w-fit">
|
||||
{TABS.map(tab => (
|
||||
|
||||
@@ -13,6 +13,7 @@ import {
|
||||
CATEGORY_OPTIONS,
|
||||
} from '../control-library/components/helpers'
|
||||
import { ControlDetail } from '../control-library/components/ControlDetail'
|
||||
import { SourceBadge } from '@/components/sdk/SourceBadge'
|
||||
|
||||
// =============================================================================
|
||||
// TYPES
|
||||
@@ -310,6 +311,7 @@ export default function AtomicControlsPage() {
|
||||
<TargetAudienceBadge audience={ctrl.target_audience} />
|
||||
<GenerationStrategyBadge strategy={ctrl.generation_strategy} pipelineInfo={ctrl} />
|
||||
<ObligationTypeBadge type={ctrl.generation_metadata?.obligation_type as string} />
|
||||
<SourceBadge controlUuid={ctrl.id} compact />
|
||||
</div>
|
||||
<h3 className="text-sm font-medium text-gray-900 group-hover:text-violet-700">{ctrl.title}</h3>
|
||||
<p className="text-xs text-gray-500 mt-1 line-clamp-2">{ctrl.objective}</p>
|
||||
|
||||
@@ -3,6 +3,7 @@
|
||||
import React, { useState } from 'react'
|
||||
import { useRouter } from 'next/navigation'
|
||||
import { StepHeader, STEP_EXPLANATIONS } from '@/components/sdk/StepHeader'
|
||||
import { LicenseModuleBanner } from '@/components/sdk/LicenseModuleBanner'
|
||||
import { useAuditChecklist } from './_hooks/useAuditChecklist'
|
||||
import { ChecklistItemCard } from './_components/ChecklistItemCard'
|
||||
import { LoadingSkeleton } from './_components/LoadingSkeleton'
|
||||
@@ -89,6 +90,12 @@ export default function AuditChecklistPage() {
|
||||
</div>
|
||||
</StepHeader>
|
||||
|
||||
<LicenseModuleBanner
|
||||
rule={3}
|
||||
sourceLabel="BreakPilot-Audit-Methodik"
|
||||
detail="Eigene Audit-Checklisten und -Workflows. Zitierte Rechtsquellen (DSGVO/ISO 27001/...) jeweils mit eigener Lizenzregel."
|
||||
/>
|
||||
|
||||
{error && (
|
||||
<div className="p-4 bg-red-50 border border-red-200 rounded-lg text-red-700 flex items-center justify-between">
|
||||
<span>{error}</span>
|
||||
|
||||
@@ -232,14 +232,25 @@ export function StateBadge({ state }: { state: string }) {
|
||||
|
||||
export function LicenseRuleBadge({ rule }: { rule: number | null | undefined }) {
|
||||
if (!rule) return null
|
||||
const config: Record<number, { bg: string; label: string }> = {
|
||||
1: { bg: 'bg-green-100 text-green-700', label: 'Free Use' },
|
||||
2: { bg: 'bg-blue-100 text-blue-700', label: 'Zitation' },
|
||||
3: { bg: 'bg-amber-100 text-amber-700', label: 'Reformuliert' },
|
||||
// Corrected labels per Task #21 LICENSE_RULES.md mapping:
|
||||
// R1 = woertlich (Hoheitsrecht/Public Domain, no attribution required)
|
||||
// R2 = woertlich + Attribution-Pflicht (CC-BY, OWASP, OECD, ENISA)
|
||||
// R3 = nur Identifier zitieren (DIN/ANSI/IEC/DGUV/proprietary — pipeline drops full text)
|
||||
const config: Record<number, { bg: string; label: string; title: string }> = {
|
||||
1: { bg: 'bg-emerald-100 text-emerald-800', label: 'R1', title: 'Woertlich uebernehmbar (Hoheitsrecht/Public Domain)' },
|
||||
2: { bg: 'bg-amber-100 text-amber-800', label: 'R2', title: 'Woertlich mit Attribution (CC-BY/OWASP/OECD/ENISA)' },
|
||||
3: { bg: 'bg-slate-100 text-slate-700', label: 'R3', title: 'Nur Identifier-Verweis (DIN/ANSI/IEC/proprietaer)' },
|
||||
}
|
||||
const c = config[rule]
|
||||
if (!c) return null
|
||||
return <span className={`inline-flex items-center px-2 py-0.5 rounded text-xs font-medium ${c.bg}`}>{c.label}</span>
|
||||
return (
|
||||
<span
|
||||
className={`inline-flex items-center px-2 py-0.5 rounded text-xs font-medium ${c.bg}`}
|
||||
title={c.title}
|
||||
>
|
||||
{c.label}
|
||||
</span>
|
||||
)
|
||||
}
|
||||
|
||||
export function VerificationMethodBadge({ method }: { method: string | null }) {
|
||||
|
||||
@@ -99,6 +99,16 @@ export default function CRAProjectsPage() {
|
||||
</p>
|
||||
</div>
|
||||
|
||||
<div className="mb-4 px-4 py-2 bg-emerald-50 border border-emerald-200 rounded-lg text-xs text-emerald-800 flex items-start gap-2">
|
||||
<span className="font-semibold">Quellen & Lizenz:</span>
|
||||
<span>
|
||||
Inhalte gemaess <strong>EU-Verordnung 2024/2847 (Cyber Resilience Act)</strong> —
|
||||
Lizenzregel R1 (EU_LAW, woertlich uebernehmbar). ENISA-Implementation-Guidance
|
||||
ergaenzend (R1 EU_PUBLIC).{' '}
|
||||
<a href="/sdk/licenses" className="underline">Quellenverzeichnis</a>
|
||||
</span>
|
||||
</div>
|
||||
|
||||
{error && (
|
||||
<div className="mb-4 bg-red-50 border border-red-200 rounded-lg p-4 text-sm text-red-700">
|
||||
{error}
|
||||
|
||||
@@ -297,6 +297,16 @@ function DocumentGeneratorPageInner() {
|
||||
tips={stepInfo.tips}
|
||||
/>
|
||||
|
||||
<div className="px-4 py-2 bg-slate-50 border border-slate-200 rounded-lg text-xs text-slate-700 flex items-start gap-2">
|
||||
<span className="font-semibold">Quellen & Lizenz:</span>
|
||||
<span>
|
||||
Die 91 Standard-Vorlagen sind <strong>BreakPilot-Eigenwerke</strong> (Lizenzregel R3 — Identifier-Verweis,
|
||||
eigene Lizenz). Vorlagen mit gesetzlicher Grundlage (z.B. VVT nach Art. 30 DSGVO,
|
||||
Loeschkonzept nach Art. 17 DSGVO) zitieren die jeweilige Rechtsquelle als R1.{' '}
|
||||
<a href="/sdk/licenses" className="underline">Quellenverzeichnis</a>
|
||||
</span>
|
||||
</div>
|
||||
|
||||
{/* Status bar */}
|
||||
<div className="grid grid-cols-3 gap-4">
|
||||
<div className="bg-white rounded-xl border border-gray-200 p-5">
|
||||
|
||||
@@ -132,6 +132,16 @@ export default function DSFAPage() {
|
||||
)}
|
||||
</StepHeader>
|
||||
|
||||
<div className="px-4 py-2 bg-emerald-50 border border-emerald-200 rounded-lg text-xs text-emerald-800 flex items-start gap-2">
|
||||
<span className="font-semibold">Quellen & Lizenz:</span>
|
||||
<span>
|
||||
Inhalte gemaess <strong>DSGVO Art. 35</strong> (EU 2016/679) — Lizenzregel R1
|
||||
(Hoheitsrecht/EU_LAW, woertlich uebernehmbar). Vorlagen-Texte aus
|
||||
Aufsichtsbehoerden ebenfalls R1.{' '}
|
||||
<a href="/sdk/licenses" className="underline">Quellenverzeichnis</a>
|
||||
</span>
|
||||
</div>
|
||||
|
||||
{/* DSFA Requirement Check */}
|
||||
{dsfaCheck.required && dsfas.length === 0 && (
|
||||
<div className="bg-red-50 border border-red-200 rounded-xl p-5">
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
'use client'
|
||||
|
||||
import { useState } from 'react'
|
||||
import type { FoundingWizardState } from '@/lib/sdk/founding/types'
|
||||
|
||||
interface Props {
|
||||
@@ -9,8 +10,73 @@ interface Props {
|
||||
|
||||
export function StepBasics({ state, update }: Props) {
|
||||
const b = state.basics
|
||||
const [prefillStatus, setPrefillStatus] = useState<'idle' | 'loading' | 'success' | 'error'>('idle')
|
||||
|
||||
async function prefillFromCompanyProfile() {
|
||||
setPrefillStatus('loading')
|
||||
try {
|
||||
const res = await fetch('/api/sdk/v1/company-profile', { cache: 'no-store' })
|
||||
if (!res.ok) throw new Error(`HTTP ${res.status}`)
|
||||
const payload = await res.json()
|
||||
const p = payload?.profile ?? payload
|
||||
if (!p || typeof p !== 'object') throw new Error('leeres Profil')
|
||||
const industries = Array.isArray(p.industry) ? p.industry.filter(Boolean) : []
|
||||
const industry = industries.length > 0
|
||||
? industries.join(', ')
|
||||
: (p.industryOther || b.industry)
|
||||
const address = [p.headquartersStreet, [p.headquartersZip, p.headquartersCity].filter(Boolean).join(' ')]
|
||||
.filter(Boolean).join(', ') || b.company_address
|
||||
const seat = p.headquartersCity || b.company_seat
|
||||
// Purpose ableiten aus offerings/businessModel — Fallback wenn nichts da
|
||||
const purposeBits: string[] = []
|
||||
if (p.businessModel) purposeBits.push(`Geschäftsmodell: ${p.businessModel}`)
|
||||
if (Array.isArray(p.offerings) && p.offerings.length > 0)
|
||||
purposeBits.push(`Leistungen: ${p.offerings.join(', ')}`)
|
||||
const purpose = purposeBits.length > 0
|
||||
? purposeBits.join('; ')
|
||||
: b.company_purpose_description
|
||||
update('basics', {
|
||||
...b,
|
||||
company_name: p.companyName || b.company_name,
|
||||
legal_form: (p.legalForm === 'UG' ? 'UG' : (p.legalForm === 'GmbH' ? 'GmbH' : b.legal_form)),
|
||||
company_seat: seat,
|
||||
company_address: address,
|
||||
industry,
|
||||
company_purpose_description: b.company_purpose_description.trim() === '' ? purpose : b.company_purpose_description,
|
||||
})
|
||||
setPrefillStatus('success')
|
||||
} catch (err) {
|
||||
console.error('[founding-wizard] prefill failed', err)
|
||||
setPrefillStatus('error')
|
||||
}
|
||||
}
|
||||
|
||||
return (
|
||||
<div className="space-y-4">
|
||||
<div className="flex items-center justify-between">
|
||||
<p className="text-sm text-gray-600">
|
||||
Stammdaten der Gesellschaft. Pflicht für Satzung, HRB-Anmeldung und SHA.
|
||||
</p>
|
||||
<button
|
||||
type="button"
|
||||
onClick={prefillFromCompanyProfile}
|
||||
disabled={prefillStatus === 'loading'}
|
||||
className="px-3 py-1.5 text-sm rounded-lg border border-blue-300 bg-blue-50 hover:bg-blue-100 disabled:opacity-50"
|
||||
>
|
||||
{prefillStatus === 'loading' ? 'Lade…' : 'Aus Unternehmensprofil vorbefüllen'}
|
||||
</button>
|
||||
</div>
|
||||
{prefillStatus === 'success' && (
|
||||
<div className="text-xs text-green-700 bg-green-50 border border-green-200 rounded px-2 py-1">
|
||||
Daten aus Unternehmensprofil übernommen. Bitte prüfen und ergänzen.
|
||||
</div>
|
||||
)}
|
||||
{prefillStatus === 'error' && (
|
||||
<div className="text-xs text-amber-700 bg-amber-50 border border-amber-200 rounded px-2 py-1">
|
||||
Konnte Unternehmensprofil nicht laden — bitte Felder manuell ausfüllen.
|
||||
</div>
|
||||
)}
|
||||
|
||||
<div className="grid grid-cols-2 gap-4">
|
||||
<div>
|
||||
<label className="block text-sm font-medium text-gray-700 mb-1">Firmenname</label>
|
||||
@@ -78,6 +144,35 @@ export function StepBasics({ state, update }: Props) {
|
||||
className="w-full px-3 py-2 border rounded-lg"
|
||||
/>
|
||||
</div>
|
||||
<div>
|
||||
<label className="block text-sm font-medium text-gray-700 mb-1">
|
||||
Registergericht
|
||||
</label>
|
||||
<input
|
||||
data-testid="register-court"
|
||||
type="text"
|
||||
value={b.register_court || ''}
|
||||
onChange={e => update('basics', { ...b, register_court: e.target.value })}
|
||||
placeholder="z.B. Amtsgericht Stuttgart"
|
||||
className="w-full px-3 py-2 border rounded-lg"
|
||||
/>
|
||||
<p className="text-xs text-gray-500 mt-1">
|
||||
Zuständiges Amtsgericht für HRB-Eintragung
|
||||
</p>
|
||||
</div>
|
||||
<div>
|
||||
<label className="block text-sm font-medium text-gray-700 mb-1">
|
||||
HRB-Nummer <span className="text-gray-400">(optional)</span>
|
||||
</label>
|
||||
<input
|
||||
data-testid="hrb-number"
|
||||
type="text"
|
||||
value={b.hrb_number || ''}
|
||||
onChange={e => update('basics', { ...b, hrb_number: e.target.value })}
|
||||
placeholder="z.B. HRB 12345 (leer falls noch nicht eingetragen)"
|
||||
className="w-full px-3 py-2 border rounded-lg"
|
||||
/>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div>
|
||||
|
||||
@@ -14,7 +14,7 @@ export function StepGesellschafter({ state, addGesellschafter, updateGesellschaf
|
||||
const [form, setForm] = useState({
|
||||
name: '', geburtsdatum: '', adresse: '', email: '',
|
||||
nennbetrag_eur: 12500, is_geschaeftsfuehrer: true, internal_role: '',
|
||||
has_academic_background: false,
|
||||
has_academic_background: false, ip_areas: '',
|
||||
})
|
||||
|
||||
const totalNennbetrag = state.gesellschafter.reduce((s, g) => s + g.nennbetrag_eur, 0)
|
||||
@@ -22,6 +22,8 @@ export function StepGesellschafter({ state, addGesellschafter, updateGesellschaf
|
||||
|
||||
const handleAdd = () => {
|
||||
if (!form.name.trim()) return
|
||||
const ip_areas = form.ip_areas
|
||||
.split('\n').map(s => s.trim()).filter(Boolean)
|
||||
addGesellschafter({
|
||||
rolle: 'founder',
|
||||
name: form.name,
|
||||
@@ -32,9 +34,10 @@ export function StepGesellschafter({ state, addGesellschafter, updateGesellschaf
|
||||
is_geschaeftsfuehrer: form.is_geschaeftsfuehrer,
|
||||
internal_role: form.internal_role || undefined,
|
||||
has_academic_background: form.has_academic_background,
|
||||
ip_areas: ip_areas.length > 0 ? ip_areas : undefined,
|
||||
})
|
||||
setForm({ name: '', geburtsdatum: '', adresse: '', email: '', nennbetrag_eur: 12500,
|
||||
is_geschaeftsfuehrer: true, internal_role: '', has_academic_background: false })
|
||||
is_geschaeftsfuehrer: true, internal_role: '', has_academic_background: false, ip_areas: '' })
|
||||
}
|
||||
|
||||
return (
|
||||
@@ -82,13 +85,22 @@ export function StepGesellschafter({ state, addGesellschafter, updateGesellschaf
|
||||
onChange={e => setForm({ ...form, nennbetrag_eur: parseInt(e.target.value) || 0 })}
|
||||
className="px-3 py-2 border rounded"
|
||||
/>
|
||||
<input
|
||||
<select
|
||||
data-testid="gs-role"
|
||||
placeholder="Interne Rolle (z.B. CEO, CTO)"
|
||||
value={form.internal_role}
|
||||
onChange={e => setForm({ ...form, internal_role: e.target.value })}
|
||||
className="px-3 py-2 border rounded"
|
||||
/>
|
||||
className="px-3 py-2 border rounded bg-white"
|
||||
>
|
||||
<option value="">Rolle wählen…</option>
|
||||
<option value="CEO">CEO (Chief Executive Officer)</option>
|
||||
<option value="CTO">CTO (Chief Technical Officer)</option>
|
||||
<option value="CFO">CFO (Chief Financial Officer)</option>
|
||||
<option value="COO">COO (Chief Operating Officer)</option>
|
||||
<option value="CPO">CPO (Chief Product Officer)</option>
|
||||
<option value="Geschäftsführer">Geschäftsführer (ohne Spezialisierung)</option>
|
||||
<option value="Gesellschafter">Gesellschafter (kein GF)</option>
|
||||
<option value="Sonstige">Sonstige</option>
|
||||
</select>
|
||||
<div className="flex items-center gap-2">
|
||||
<input
|
||||
type="checkbox"
|
||||
@@ -108,6 +120,23 @@ export function StepGesellschafter({ state, addGesellschafter, updateGesellschaf
|
||||
<label className="text-sm">Akademischer Hintergrund</label>
|
||||
</div>
|
||||
</div>
|
||||
<div className="mt-3">
|
||||
<label className="block text-sm font-medium text-gray-700 mb-1">
|
||||
IP-Bereiche, die diese Person in die Gesellschaft einbringt
|
||||
<span className="text-gray-400"> (optional, eine Zeile pro Bereich)</span>
|
||||
</label>
|
||||
<textarea
|
||||
data-testid="gs-ip-areas"
|
||||
value={form.ip_areas}
|
||||
onChange={e => setForm({ ...form, ip_areas: e.target.value })}
|
||||
rows={3}
|
||||
placeholder={'z.B.\nCompliance-Engine (Quellcode + Architektur)\nRAG-Pipeline\nKonfigurationsdaten'}
|
||||
className="w-full px-3 py-2 border rounded font-mono text-xs"
|
||||
/>
|
||||
<p className="text-xs text-gray-500 mt-1">
|
||||
Bei mehreren Gründern wird pro Person ein eigener IP-Assignment-Vertrag generiert.
|
||||
</p>
|
||||
</div>
|
||||
<button
|
||||
data-testid="add-gesellschafter"
|
||||
onClick={handleAdd}
|
||||
@@ -139,7 +168,14 @@ export function StepGesellschafter({ state, addGesellschafter, updateGesellschaf
|
||||
{state.gesellschafter.map(g => (
|
||||
<tr key={g.id} className="border-t" data-testid={`gs-row-${g.anteil_nr}`}>
|
||||
<td className="px-3 py-2">{g.anteil_nr}</td>
|
||||
<td className="px-3 py-2 font-medium">{g.name}{g.internal_role ? ` (${g.internal_role})` : ''}</td>
|
||||
<td className="px-3 py-2 font-medium">
|
||||
{g.name}{g.internal_role ? ` (${g.internal_role})` : ''}
|
||||
{g.ip_areas && g.ip_areas.length > 0 && (
|
||||
<div className="text-xs text-gray-500 mt-0.5">
|
||||
IP: {g.ip_areas.join(', ')}
|
||||
</div>
|
||||
)}
|
||||
</td>
|
||||
<td className="px-3 py-2">{g.geburtsdatum || '—'}</td>
|
||||
<td className="px-3 py-2 text-right">{g.nennbetrag_eur.toLocaleString('de-DE')} €</td>
|
||||
<td className="px-3 py-2 text-right">{((g.nennbetrag_eur / Math.max(target, 1)) * 100).toFixed(2)}%</td>
|
||||
|
||||
@@ -0,0 +1,36 @@
|
||||
import { describe, expect, it } from 'vitest'
|
||||
import { calculateAP } from './useFMEA'
|
||||
|
||||
describe('calculateAP — AIAG-VDA 2019 Handbook Action Priority', () => {
|
||||
it('returns H for severity 10 with mid occurrence', () => {
|
||||
expect(calculateAP(10, 5, 5)).toBe('H')
|
||||
})
|
||||
|
||||
it('returns H for severity 9 with low detection', () => {
|
||||
expect(calculateAP(9, 4, 7)).toBe('H')
|
||||
})
|
||||
|
||||
it('returns M for severity 9 with low occurrence and good detection', () => {
|
||||
expect(calculateAP(9, 2, 5)).toBe('M')
|
||||
})
|
||||
|
||||
it('returns L for severity 9 with very low occurrence and detection', () => {
|
||||
expect(calculateAP(9, 1, 4)).toBe('L')
|
||||
})
|
||||
|
||||
it('returns H for severity 7 with high occurrence', () => {
|
||||
expect(calculateAP(7, 5, 1)).toBe('H')
|
||||
})
|
||||
|
||||
it('returns M for severity 7 with mid occurrence', () => {
|
||||
expect(calculateAP(7, 3, 5)).toBe('M')
|
||||
})
|
||||
|
||||
it('returns L for low-severity well-controlled mode', () => {
|
||||
expect(calculateAP(3, 1, 1)).toBe('L')
|
||||
})
|
||||
|
||||
it('returns L for severity 5 with very low occurrence and detection', () => {
|
||||
expect(calculateAP(5, 1, 1)).toBe('L')
|
||||
})
|
||||
})
|
||||
@@ -156,5 +156,52 @@ export function useFMEA(projectId: string) {
|
||||
// Get unique components for the suggest button
|
||||
const components = [...new Map(rows.map((r) => [r.component.id, r.component])).values()]
|
||||
|
||||
return { rows, loading, stats, components, suggestFMs, suggesting, suggestions, suggestSource, setSuggestions }
|
||||
/**
|
||||
* Accept a suggested FM: build an FMEA row from the FM defaults, prepend it
|
||||
* to the table state, and remove the FM from the suggestion list.
|
||||
* Returns false if the (component, fm.id) combo already exists in rows.
|
||||
*/
|
||||
function acceptSuggestion(fm: FailureMode, componentId: string): boolean {
|
||||
const comp = components.find((c) => c.id === componentId)
|
||||
if (!comp) return false
|
||||
const dup = rows.find((r) => r.component.id === componentId && r.failureMode.id === fm.id)
|
||||
if (dup) {
|
||||
// Still drop the suggestion so the UI does not keep offering it.
|
||||
setSuggestions((prev) => prev.filter((s) => s.id !== fm.id))
|
||||
return false
|
||||
}
|
||||
const s = fm.default_severity || 5
|
||||
const o = fm.default_occurrence || 5
|
||||
const d = fm.default_detection || 5
|
||||
const newRow: FMEARow = {
|
||||
component: comp,
|
||||
failureMode: fm,
|
||||
severity: s,
|
||||
occurrence: o,
|
||||
detection: d,
|
||||
rpz: s * o * d,
|
||||
ap: calculateAP(s, o, d),
|
||||
}
|
||||
setRows((prev) => [newRow, ...prev].sort((a, b) => b.rpz - a.rpz))
|
||||
setSuggestions((prev) => prev.filter((sg) => sg.id !== fm.id))
|
||||
return true
|
||||
}
|
||||
|
||||
function rejectSuggestion(fmId: string) {
|
||||
setSuggestions((prev) => prev.filter((sg) => sg.id !== fmId))
|
||||
}
|
||||
|
||||
return {
|
||||
rows,
|
||||
loading,
|
||||
stats,
|
||||
components,
|
||||
suggestFMs,
|
||||
suggesting,
|
||||
suggestions,
|
||||
suggestSource,
|
||||
setSuggestions,
|
||||
acceptSuggestion,
|
||||
rejectSuggestion,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
'use client'
|
||||
|
||||
import { useState } from 'react'
|
||||
import { useEffect, useState } from 'react'
|
||||
import { useParams } from 'next/navigation'
|
||||
import { useFMEA, type FMEARow } from './_hooks/useFMEA'
|
||||
|
||||
@@ -27,8 +27,17 @@ function rpzLabel(rpz: number): string {
|
||||
|
||||
export default function FMEAPage() {
|
||||
const { projectId } = useParams<{ projectId: string }>()
|
||||
const { rows, loading, stats, components, suggestFMs, suggesting, suggestions, suggestSource, setSuggestions } = useFMEA(projectId)
|
||||
const { rows, loading, stats, components, suggestFMs, suggesting, suggestions, suggestSource, setSuggestions, acceptSuggestion, rejectSuggestion } = useFMEA(projectId)
|
||||
const [suggestComp, setSuggestComp] = useState<string | null>(null)
|
||||
const [acceptedCount, setAcceptedCount] = useState(0)
|
||||
|
||||
// Reset accepted-count when a fresh suggestion run is loaded or the panel closes.
|
||||
useEffect(() => {
|
||||
if (suggesting) setAcceptedCount(0)
|
||||
}, [suggesting])
|
||||
useEffect(() => {
|
||||
if (suggestions.length === 0) setAcceptedCount(0)
|
||||
}, [suggestions.length])
|
||||
|
||||
if (loading) {
|
||||
return (
|
||||
@@ -97,26 +106,60 @@ export default function FMEAPage() {
|
||||
{suggestions.length > 0 && (
|
||||
<div className="bg-purple-50 dark:bg-purple-900/20 border border-purple-200 dark:border-purple-800 rounded-xl p-4">
|
||||
<div className="flex items-center justify-between mb-3">
|
||||
<h3 className="text-sm font-semibold text-purple-800 dark:text-purple-300">
|
||||
KI-Vorschlaege ({suggestions.length}) — {suggestSource === 'llm' ? 'LLM-generiert' : 'Bibliothek'}
|
||||
</h3>
|
||||
<div>
|
||||
<h3 className="text-sm font-semibold text-purple-800 dark:text-purple-300">
|
||||
KI-Vorschlaege ({suggestions.length}) — {suggestSource === 'llm' ? 'LLM-generiert' : 'Bibliothek-Fallback'}
|
||||
</h3>
|
||||
{acceptedCount > 0 && (
|
||||
<div className="text-xs text-green-700 dark:text-green-400 mt-0.5">
|
||||
{acceptedCount} Vorschlag{acceptedCount > 1 ? 'e' : ''} uebernommen
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
<button onClick={() => setSuggestions([])} className="text-xs text-purple-600 hover:text-purple-800">Schliessen</button>
|
||||
</div>
|
||||
<div className="space-y-2">
|
||||
{suggestions.map((fm, i) => (
|
||||
<div key={i} className="flex items-center justify-between bg-white dark:bg-gray-800 rounded-lg p-3 border border-purple-100 dark:border-purple-800">
|
||||
<div className="flex-1 min-w-0">
|
||||
<div className="text-sm font-medium text-gray-900 dark:text-white">{fm.name_de}</div>
|
||||
<div className="text-xs text-gray-500 mt-0.5">{fm.effect}</div>
|
||||
<div className="flex gap-3 mt-1 text-xs text-gray-400">
|
||||
<span>S={fm.default_severity}</span>
|
||||
<span>O={fm.default_occurrence}</span>
|
||||
<span>D={fm.default_detection}</span>
|
||||
<span className="font-bold">RPZ={fm.default_severity * fm.default_occurrence * fm.default_detection}</span>
|
||||
{suggestions.map((fm) => {
|
||||
const rpz = fm.default_severity * fm.default_occurrence * fm.default_detection
|
||||
return (
|
||||
<div key={fm.id} className="flex items-start justify-between gap-3 bg-white dark:bg-gray-800 rounded-lg p-3 border border-purple-100 dark:border-purple-800">
|
||||
<div className="flex-1 min-w-0">
|
||||
<div className="text-sm font-medium text-gray-900 dark:text-white">{fm.name_de}</div>
|
||||
<div className="text-xs text-gray-500 mt-0.5">{fm.effect}</div>
|
||||
<div className="flex gap-3 mt-1 text-xs text-gray-400">
|
||||
<span>S={fm.default_severity}</span>
|
||||
<span>O={fm.default_occurrence}</span>
|
||||
<span>D={fm.default_detection}</span>
|
||||
<span className={`font-bold ${rpz > 200 ? 'text-red-600' : rpz > 100 ? 'text-orange-600' : 'text-gray-500'}`}>RPZ={rpz}</span>
|
||||
</div>
|
||||
</div>
|
||||
<div className="flex items-center gap-1.5 shrink-0">
|
||||
<button
|
||||
onClick={() => {
|
||||
if (!suggestComp) return
|
||||
const ok = acceptSuggestion(fm, suggestComp)
|
||||
if (ok) setAcceptedCount((c) => c + 1)
|
||||
}}
|
||||
disabled={!suggestComp}
|
||||
className="px-3 py-1.5 bg-green-600 hover:bg-green-700 disabled:opacity-50 disabled:cursor-not-allowed text-white text-xs font-medium rounded transition-colors"
|
||||
title="Diesen Fehlermodus der FMEA-Tabelle hinzufuegen"
|
||||
>
|
||||
Uebernehmen
|
||||
</button>
|
||||
<button
|
||||
onClick={() => rejectSuggestion(fm.id)}
|
||||
className="px-3 py-1.5 bg-gray-200 dark:bg-gray-700 hover:bg-gray-300 dark:hover:bg-gray-600 text-gray-700 dark:text-gray-300 text-xs font-medium rounded transition-colors"
|
||||
title="Diesen Vorschlag verwerfen"
|
||||
>
|
||||
Ablehnen
|
||||
</button>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
))}
|
||||
)
|
||||
})}
|
||||
</div>
|
||||
<div className="text-[10px] text-purple-700 dark:text-purple-400 mt-3">
|
||||
Hinweis: Uebernommene Fehlermodi erscheinen sofort in der Tabelle unten. Bewertung (S/O/D) ist anpassbar — Standardwerte aus der Bibliothek.
|
||||
</div>
|
||||
</div>
|
||||
)}
|
||||
|
||||
@@ -39,11 +39,19 @@ export function HazardTable({ hazards, lifecyclePhases, onDelete }: {
|
||||
.map((hazard) => (
|
||||
<tr key={hazard.id} className="hover:bg-gray-50 dark:hover:bg-gray-750 transition-colors">
|
||||
<td className="px-4 py-3">
|
||||
<div className="flex items-center gap-2">
|
||||
<div className="flex items-center gap-2 flex-wrap">
|
||||
<div className="text-sm font-medium text-gray-900 dark:text-white">{hazard.name}</div>
|
||||
{hazard.name.startsWith('Auto:') && (
|
||||
<span className="inline-flex items-center px-1.5 py-0.5 rounded text-xs font-medium bg-green-100 text-green-700">Auto</span>
|
||||
)}
|
||||
{(hazard as { pattern_id?: string }).pattern_id && (
|
||||
<span
|
||||
className="inline-flex items-center px-1.5 py-0.5 rounded text-[10px] font-mono font-medium bg-slate-100 text-slate-700 border border-slate-200 cursor-help"
|
||||
title={`Quelle: BreakPilot IACE Pattern-Engine (${(hazard as { pattern_id?: string }).pattern_id}). Lizenzregel R3 — Eigenwerk, kein externer Lizenz-Footer noetig. Pattern-Definition mit Norm-Referenzen siehe Library.`}
|
||||
>
|
||||
{(hazard as { pattern_id?: string }).pattern_id} · R3
|
||||
</span>
|
||||
)}
|
||||
</div>
|
||||
{hazard.description && (
|
||||
<div className="text-xs text-gray-500 truncate max-w-[250px]">{hazard.description}</div>
|
||||
|
||||
@@ -0,0 +1,218 @@
|
||||
'use client'
|
||||
|
||||
// LLM Gap-Review Modal — Task #8.
|
||||
//
|
||||
// Triggers POST /projects/:id/llm-gap-review on mount and lists the
|
||||
// LLM's gap suggestions with an Adopt / Reject UX. Adoption goes through
|
||||
// the regular CreateHazard / CreateMitigation endpoints — the modal
|
||||
// itself never mutates project state on its own.
|
||||
|
||||
import { useEffect, useState } from 'react'
|
||||
|
||||
type Suggestion = {
|
||||
kind: 'hazard' | 'mitigation'
|
||||
title: string
|
||||
description: string
|
||||
category?: string
|
||||
hazard_ref?: string
|
||||
pattern_ref?: string
|
||||
norm_refs?: string[]
|
||||
confidence?: 'high' | 'medium' | 'low'
|
||||
rationale?: string
|
||||
}
|
||||
|
||||
type Response = {
|
||||
project_id: string
|
||||
source: 'llm_gap_review' | 'fallback_static'
|
||||
model?: string
|
||||
suggestions: Suggestion[]
|
||||
input_summary: {
|
||||
hazard_count: number
|
||||
mitigation_count: number
|
||||
limits_form_fields: number
|
||||
}
|
||||
}
|
||||
|
||||
const CONF_COLOR: Record<string, string> = {
|
||||
high: 'bg-emerald-100 text-emerald-800 border-emerald-200',
|
||||
medium: 'bg-amber-100 text-amber-800 border-amber-200',
|
||||
low: 'bg-slate-100 text-slate-600 border-slate-200',
|
||||
}
|
||||
|
||||
interface Props {
|
||||
projectId: string
|
||||
onClose: () => void
|
||||
onAdoptHazard?: (s: Suggestion) => Promise<void>
|
||||
onAdoptMitigation?: (s: Suggestion) => Promise<void>
|
||||
}
|
||||
|
||||
export function LLMGapReviewModal({ projectId, onClose, onAdoptHazard, onAdoptMitigation }: Props) {
|
||||
const [data, setData] = useState<Response | null>(null)
|
||||
const [loading, setLoading] = useState(true)
|
||||
const [error, setError] = useState<string | null>(null)
|
||||
const [adopted, setAdopted] = useState<Set<number>>(new Set())
|
||||
const [rejected, setRejected] = useState<Set<number>>(new Set())
|
||||
const [adopting, setAdopting] = useState<number | null>(null)
|
||||
|
||||
useEffect(() => {
|
||||
setLoading(true)
|
||||
fetch(`/api/sdk/v1/iace/projects/${projectId}/llm-gap-review`, { method: 'POST' })
|
||||
.then((r) => (r.ok ? r.json() : Promise.reject(`HTTP ${r.status}`)))
|
||||
.then(setData)
|
||||
.catch((e) => setError(String(e)))
|
||||
.finally(() => setLoading(false))
|
||||
}, [projectId])
|
||||
|
||||
async function adopt(idx: number) {
|
||||
if (!data) return
|
||||
const s = data.suggestions[idx]
|
||||
setAdopting(idx)
|
||||
try {
|
||||
if (s.kind === 'hazard' && onAdoptHazard) await onAdoptHazard(s)
|
||||
else if (s.kind === 'mitigation' && onAdoptMitigation) await onAdoptMitigation(s)
|
||||
setAdopted((prev) => new Set(prev).add(idx))
|
||||
} catch (e) {
|
||||
setError(`Adopt fehlgeschlagen: ${e}`)
|
||||
} finally {
|
||||
setAdopting(null)
|
||||
}
|
||||
}
|
||||
|
||||
function reject(idx: number) {
|
||||
setRejected((prev) => new Set(prev).add(idx))
|
||||
}
|
||||
|
||||
return (
|
||||
<div className="fixed inset-0 z-50 flex items-center justify-center bg-black/50">
|
||||
<div className="bg-white rounded-xl shadow-2xl w-full max-w-3xl max-h-[90vh] overflow-hidden flex flex-col">
|
||||
<div className="px-6 py-4 border-b border-gray-200 flex items-center justify-between flex-shrink-0">
|
||||
<div>
|
||||
<h2 className="text-lg font-semibold text-gray-900">KI-Gap-Review</h2>
|
||||
<p className="text-xs text-gray-500 mt-0.5">
|
||||
LLM-gestuetzte Suche nach fehlenden Gefaehrdungen und Schutzmassnahmen — Vorschlaege sind unverbindlich bis explizit uebernommen.
|
||||
</p>
|
||||
</div>
|
||||
<button onClick={onClose} className="text-gray-400 hover:text-gray-600 text-2xl leading-none">×</button>
|
||||
</div>
|
||||
|
||||
<div className="flex-1 overflow-y-auto p-6 space-y-3">
|
||||
{loading && (
|
||||
<div className="text-center py-12">
|
||||
<div className="animate-spin rounded-full h-10 w-10 border-b-2 border-purple-600 mx-auto" />
|
||||
<p className="text-sm text-gray-500 mt-3">LLM laeuft (Qwen/Claude). Das kann bis zu 30 Sekunden dauern.</p>
|
||||
</div>
|
||||
)}
|
||||
{error && (
|
||||
<div className="bg-red-50 border border-red-200 rounded-lg p-4 text-sm text-red-700">
|
||||
Fehler: {error}
|
||||
</div>
|
||||
)}
|
||||
{data && (
|
||||
<>
|
||||
<div className="text-xs text-gray-500 flex items-center gap-3 border-b border-gray-100 pb-2">
|
||||
<span>
|
||||
Eingabe: {data.input_summary.hazard_count} Gefaehrdungen,{' '}
|
||||
{data.input_summary.mitigation_count} Massnahmen, {data.input_summary.limits_form_fields} Grenzen-Felder
|
||||
</span>
|
||||
<span className="text-gray-300">·</span>
|
||||
<span>
|
||||
Quelle: {data.source === 'llm_gap_review'
|
||||
? `LLM (${data.model ?? 'unbekannt'})`
|
||||
: 'Statische Fallback-Liste'}
|
||||
</span>
|
||||
</div>
|
||||
|
||||
{data.suggestions.length === 0 && (
|
||||
<div className="text-center text-gray-500 py-12 text-sm">
|
||||
Keine Lueckenvorschlaege. Die deterministische Pattern-Engine hat vermutlich bereits alle Standard-Gefaehrdungen abgedeckt.
|
||||
</div>
|
||||
)}
|
||||
|
||||
{data.suggestions.map((s, i) => {
|
||||
const isAdopted = adopted.has(i)
|
||||
const isRejected = rejected.has(i)
|
||||
const isWorking = adopting === i
|
||||
return (
|
||||
<div
|
||||
key={i}
|
||||
className={`border rounded-lg p-3 ${
|
||||
isAdopted ? 'border-emerald-200 bg-emerald-50' :
|
||||
isRejected ? 'border-slate-200 bg-slate-50 opacity-50' :
|
||||
'border-gray-200 bg-white'
|
||||
}`}
|
||||
>
|
||||
<div className="flex items-start justify-between gap-3">
|
||||
<div className="flex-1 min-w-0">
|
||||
<div className="flex items-center gap-2 flex-wrap mb-1">
|
||||
<span className={`px-1.5 py-0.5 text-[10px] rounded font-medium ${
|
||||
s.kind === 'hazard' ? 'bg-red-100 text-red-700' : 'bg-blue-100 text-blue-700'
|
||||
}`}>
|
||||
{s.kind === 'hazard' ? 'Gefaehrdung' : 'Massnahme'}
|
||||
</span>
|
||||
{s.category && (
|
||||
<span className="px-1.5 py-0.5 text-[10px] rounded bg-gray-100 text-gray-700">{s.category}</span>
|
||||
)}
|
||||
{s.confidence && (
|
||||
<span className={`px-1.5 py-0.5 text-[10px] rounded border ${CONF_COLOR[s.confidence]}`}>
|
||||
{s.confidence}
|
||||
</span>
|
||||
)}
|
||||
{(s.norm_refs ?? []).map((n) => (
|
||||
<span key={n} className="px-1.5 py-0.5 text-[10px] rounded bg-indigo-50 text-indigo-700 font-mono">{n}</span>
|
||||
))}
|
||||
{s.pattern_ref && (
|
||||
<span className="px-1.5 py-0.5 text-[10px] rounded bg-purple-50 text-purple-700 font-mono">{s.pattern_ref}</span>
|
||||
)}
|
||||
</div>
|
||||
<h3 className="text-sm font-semibold text-gray-900">{s.title}</h3>
|
||||
<p className="text-xs text-gray-600 mt-1">{s.description}</p>
|
||||
{s.hazard_ref && (
|
||||
<p className="text-[11px] text-gray-500 mt-1">Bezogen auf: <em>{s.hazard_ref}</em></p>
|
||||
)}
|
||||
{s.rationale && (
|
||||
<p className="text-[11px] text-gray-400 mt-1 italic">{s.rationale}</p>
|
||||
)}
|
||||
</div>
|
||||
<div className="flex flex-col gap-1 flex-shrink-0">
|
||||
{!isAdopted && !isRejected && (
|
||||
<>
|
||||
<button
|
||||
onClick={() => adopt(i)}
|
||||
disabled={isWorking}
|
||||
className="px-3 py-1 text-xs bg-emerald-600 text-white rounded hover:bg-emerald-700 disabled:opacity-50"
|
||||
>
|
||||
{isWorking ? '…' : 'Uebernehmen'}
|
||||
</button>
|
||||
<button
|
||||
onClick={() => reject(i)}
|
||||
className="px-3 py-1 text-xs text-gray-600 border border-gray-300 rounded hover:bg-gray-50"
|
||||
>
|
||||
Verwerfen
|
||||
</button>
|
||||
</>
|
||||
)}
|
||||
{isAdopted && <span className="text-xs text-emerald-700 font-medium">✓ Uebernommen</span>}
|
||||
{isRejected && <span className="text-xs text-gray-500">Verworfen</span>}
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
)
|
||||
})}
|
||||
</>
|
||||
)}
|
||||
</div>
|
||||
|
||||
<div className="px-6 py-3 border-t border-gray-200 bg-gray-50 flex items-center justify-between flex-shrink-0">
|
||||
<p className="text-[11px] text-gray-500">
|
||||
Hinweis: LLM-Vorschlaege sind NICHT die deterministische Engine-Output. Jede Uebernahme wird als <code>source=llm_gap_review</code> markiert.
|
||||
</p>
|
||||
<button onClick={onClose} className="px-3 py-1.5 text-sm border border-gray-300 rounded hover:bg-white">
|
||||
Schliessen
|
||||
</button>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
)
|
||||
}
|
||||
|
||||
export default LLMGapReviewModal
|
||||
@@ -12,6 +12,7 @@ import type { ResidualFilter } from './_components/ResidualRiskPanel'
|
||||
import { LibraryModal } from './_components/LibraryModal'
|
||||
import { AutoSuggestPanel } from './_components/AutoSuggestPanel'
|
||||
import { CustomHazardModal } from './_components/CustomHazardModal'
|
||||
import { LLMGapReviewModal } from './_components/LLMGapReviewModal'
|
||||
import { useHazards } from './_hooks/useHazards'
|
||||
|
||||
type ViewMode = 'list' | 'risk' | 'blocks'
|
||||
@@ -22,6 +23,7 @@ export default function HazardsPage() {
|
||||
const h = useHazards(projectId)
|
||||
const [view, setView] = useState<ViewMode>('risk')
|
||||
const [showCustomModal, setShowCustomModal] = useState(false)
|
||||
const [showGapReview, setShowGapReview] = useState(false)
|
||||
const [residualFilter, setResidualFilter] = useState<ResidualFilter>('all')
|
||||
const [decisions, setDecisions] = useState<Record<string, boolean | null>>({})
|
||||
|
||||
@@ -104,6 +106,15 @@ export default function HazardsPage() {
|
||||
</svg>
|
||||
Eigene Gefaehrdung
|
||||
</button>
|
||||
<button
|
||||
onClick={() => setShowGapReview(true)}
|
||||
title="LLM (Qwen/Claude) prueft auf fehlende Gefaehrdungen und Massnahmen — Vorschlaege sind unverbindlich."
|
||||
className="flex items-center gap-2 px-3 py-2 border border-indigo-300 text-indigo-700 rounded-lg hover:bg-indigo-50 transition-colors text-sm">
|
||||
<svg className="w-4 h-4" fill="none" viewBox="0 0 24 24" stroke="currentColor">
|
||||
<path strokeLinecap="round" strokeLinejoin="round" strokeWidth={2} d="M9.663 17h4.673M12 3v1m6.364 1.636l-.707.707M21 12h-1M4 12H3m3.343-5.657l-.707-.707m2.828 9.9a5 5 0 117.072 0l-.548.547A3.374 3.374 0 0014 18.469V19a2 2 0 11-4 0v-.531c0-.895-.356-1.754-.988-2.386l-.548-.547z" />
|
||||
</svg>
|
||||
KI-Gap-Review
|
||||
</button>
|
||||
<button onClick={() => h.setShowForm(true)}
|
||||
className="flex items-center gap-2 px-4 py-2 bg-purple-600 text-white rounded-lg hover:bg-purple-700 transition-colors text-sm">
|
||||
<svg className="w-4 h-4" fill="none" viewBox="0 0 24 24" stroke="currentColor">
|
||||
@@ -170,6 +181,13 @@ export default function HazardsPage() {
|
||||
onClose={() => setShowCustomModal(false)} />
|
||||
)}
|
||||
|
||||
{showGapReview && (
|
||||
<LLMGapReviewModal
|
||||
projectId={projectId}
|
||||
onClose={() => setShowGapReview(false)}
|
||||
/>
|
||||
)}
|
||||
|
||||
{h.hazards.length > 0 ? (
|
||||
view === 'risk' ? (
|
||||
<>
|
||||
|
||||
@@ -9,6 +9,7 @@ import { ObjectivesTab } from './_components/ObjectivesTab'
|
||||
import { AuditsTab } from './_components/AuditsTab'
|
||||
import { ReviewsTab } from './_components/ReviewsTab'
|
||||
import { AssetsTab } from './_components/AssetsTab'
|
||||
import { LicenseModuleBanner } from '@/components/sdk/LicenseModuleBanner'
|
||||
|
||||
// =============================================================================
|
||||
// MAIN PAGE
|
||||
@@ -38,6 +39,13 @@ export default function ISMSPage() {
|
||||
<p className="text-xs text-amber-600 mt-2">
|
||||
Hinweis: Basierend auf eigenen Pruefaspekten, kein ISO-Normtext. Ersetzt kein Zertifizierungsaudit.
|
||||
</p>
|
||||
<div className="mt-3">
|
||||
<LicenseModuleBanner
|
||||
rule={3}
|
||||
sourceLabel="BreakPilot-ISMS-Methodik mit Verweis auf ISO/IEC 27001"
|
||||
detail="ISO-Normtexte sind copyright-geschuetzt (R3 — nur Identifier-Verweise). Eigene Pruefaspekte sind BreakPilot-Eigenwerk."
|
||||
/>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
{/* Tabs */}
|
||||
|
||||
@@ -0,0 +1,160 @@
|
||||
'use client'
|
||||
|
||||
import { useEffect, useState } from 'react'
|
||||
|
||||
// Stufe 1 of the Attribution Renderer (Task #23): the global
|
||||
// "Quellen & Lizenzen" overview. Aggregates all 314k canonical_controls
|
||||
// by their license_rule and shows the source regulations behind each
|
||||
// bucket. Drives the footer link and gives auditors a one-page view of
|
||||
// what licence classes the platform is operating under.
|
||||
|
||||
type SourceCount = {
|
||||
regulation_id: string
|
||||
regulation_name_de: string | null
|
||||
license_rule: number
|
||||
license_type: string | null
|
||||
attribution: string | null
|
||||
jurisdiction: string | null
|
||||
source_type: string | null
|
||||
n_controls: number
|
||||
}
|
||||
|
||||
type RuleBucket = {
|
||||
rule: number
|
||||
label_de: string
|
||||
label_en: string
|
||||
attribution_required: boolean
|
||||
render_full_text: boolean
|
||||
total_controls: number
|
||||
distinct_sources: number
|
||||
sources: SourceCount[]
|
||||
}
|
||||
|
||||
type Overview = {
|
||||
total_controls: number
|
||||
buckets: RuleBucket[]
|
||||
}
|
||||
|
||||
const RULE_COLOR: Record<number, string> = {
|
||||
1: 'border-emerald-200 bg-emerald-50',
|
||||
2: 'border-amber-200 bg-amber-50',
|
||||
3: 'border-slate-200 bg-slate-50',
|
||||
}
|
||||
|
||||
const RULE_BADGE: Record<number, string> = {
|
||||
1: 'bg-emerald-600 text-white',
|
||||
2: 'bg-amber-600 text-white',
|
||||
3: 'bg-slate-600 text-white',
|
||||
}
|
||||
|
||||
export default function LicensesPage() {
|
||||
const [data, setData] = useState<Overview | null>(null)
|
||||
const [error, setError] = useState<string | null>(null)
|
||||
|
||||
useEffect(() => {
|
||||
fetch('/api/sdk/v1/compliance/licenses/overview')
|
||||
.then((r) => (r.ok ? r.json() : Promise.reject(`HTTP ${r.status}`)))
|
||||
.then(setData)
|
||||
.catch((e) => setError(String(e)))
|
||||
}, [])
|
||||
|
||||
if (error) {
|
||||
return (
|
||||
<div className="p-6">
|
||||
<h1 className="text-xl font-semibold mb-2">Quellen & Lizenzen</h1>
|
||||
<p className="text-red-600">Fehler beim Laden: {error}</p>
|
||||
</div>
|
||||
)
|
||||
}
|
||||
if (!data) {
|
||||
return (
|
||||
<div className="p-6">
|
||||
<h1 className="text-xl font-semibold">Quellen & Lizenzen</h1>
|
||||
<p className="text-slate-500 mt-2">Lade …</p>
|
||||
</div>
|
||||
)
|
||||
}
|
||||
|
||||
return (
|
||||
<div className="p-6 max-w-7xl">
|
||||
<header className="mb-6">
|
||||
<h1 className="text-2xl font-semibold">Quellen & Lizenzen</h1>
|
||||
<p className="text-sm text-slate-600 mt-1">
|
||||
Diese Plattform stützt sich auf {data.total_controls.toLocaleString('de-DE')}{' '}
|
||||
klassifizierte Compliance-Controls aus den unten genannten Quellen.
|
||||
Jeder Control trägt eine deterministische Lizenzregel (R1–R3), die das
|
||||
Render-Verhalten in Berichten und im Frontend steuert.
|
||||
</p>
|
||||
</header>
|
||||
|
||||
<section className="mb-8">
|
||||
<h2 className="text-lg font-medium mb-3">Klassifizierungs-Schema</h2>
|
||||
<div className="grid grid-cols-1 md:grid-cols-3 gap-3 text-sm">
|
||||
{data.buckets.map((b) => (
|
||||
<div key={b.rule} className={`rounded border ${RULE_COLOR[b.rule] ?? 'border-slate-200'} p-3`}>
|
||||
<div className="flex items-center gap-2 mb-2">
|
||||
<span className={`inline-flex items-center justify-center w-7 h-7 rounded-full text-xs font-bold ${RULE_BADGE[b.rule] ?? 'bg-slate-600 text-white'}`}>
|
||||
R{b.rule}
|
||||
</span>
|
||||
<span className="font-medium">{b.label_de}</span>
|
||||
</div>
|
||||
<ul className="text-xs text-slate-700 space-y-1">
|
||||
<li>{b.total_controls.toLocaleString('de-DE')} Controls</li>
|
||||
<li>{b.distinct_sources} Quellen</li>
|
||||
<li>{b.render_full_text ? 'Volltext-Anzeige erlaubt' : 'Nur Identifier-Verweis'}</li>
|
||||
<li>{b.attribution_required ? 'Attribution-Pflicht in Output' : 'keine Attribution-Pflicht'}</li>
|
||||
</ul>
|
||||
</div>
|
||||
))}
|
||||
</div>
|
||||
</section>
|
||||
|
||||
{data.buckets.map((b) => (
|
||||
<section key={b.rule} className="mb-8">
|
||||
<h2 className="text-lg font-medium mb-3 flex items-center gap-2">
|
||||
<span className={`inline-flex items-center justify-center w-7 h-7 rounded-full text-xs font-bold ${RULE_BADGE[b.rule] ?? 'bg-slate-600 text-white'}`}>
|
||||
R{b.rule}
|
||||
</span>
|
||||
{b.label_de}{' '}
|
||||
<span className="text-sm text-slate-500 font-normal">
|
||||
({b.total_controls.toLocaleString('de-DE')} Controls aus {b.distinct_sources} Quellen)
|
||||
</span>
|
||||
</h2>
|
||||
|
||||
<div className="overflow-x-auto border rounded">
|
||||
<table className="w-full text-sm">
|
||||
<thead className="bg-slate-100 text-slate-700">
|
||||
<tr>
|
||||
<th className="text-left p-2">Quelle</th>
|
||||
<th className="text-left p-2">Lizenztyp</th>
|
||||
<th className="text-left p-2">Rechtsraum</th>
|
||||
<th className="text-left p-2">Attribution</th>
|
||||
<th className="text-right p-2">Controls</th>
|
||||
</tr>
|
||||
</thead>
|
||||
<tbody>
|
||||
{b.sources.map((s) => (
|
||||
<tr key={`${b.rule}-${s.regulation_id}`} className="border-t">
|
||||
<td className="p-2">{s.regulation_name_de ?? s.regulation_id}</td>
|
||||
<td className="p-2 text-slate-600">{s.license_type ?? '—'}</td>
|
||||
<td className="p-2 text-slate-600">{s.jurisdiction ?? '—'}</td>
|
||||
<td className="p-2 text-slate-600">{s.attribution ?? '—'}</td>
|
||||
<td className="p-2 text-right tabular-nums">{s.n_controls.toLocaleString('de-DE')}</td>
|
||||
</tr>
|
||||
))}
|
||||
</tbody>
|
||||
</table>
|
||||
</div>
|
||||
</section>
|
||||
))}
|
||||
|
||||
<footer className="text-xs text-slate-500 border-t pt-4 mt-8">
|
||||
Klassifizierung: deterministisch über parent_control_uuid-Vererbung,
|
||||
control_parent_links → regulation_registry, source_citation,
|
||||
canonical_processed_chunks (Pipeline-Ground-Truth) und LLM-Aggregat-
|
||||
Identifikation für eigene Werke. Audit-Skripte unter
|
||||
breakpilot-core/control-pipeline/scripts/.
|
||||
</footer>
|
||||
</div>
|
||||
)
|
||||
}
|
||||
@@ -5,6 +5,7 @@ import { SecurityItemCard } from './_components/SecurityItemCard'
|
||||
import { ItemModal } from './_components/ItemModal'
|
||||
import { useSecurityBacklog, EMPTY_NEW_ITEM } from './_hooks/useSecurityBacklog'
|
||||
import type { SecurityItem } from './_hooks/useSecurityBacklog'
|
||||
import { LicenseModuleBanner } from '@/components/sdk/LicenseModuleBanner'
|
||||
|
||||
export default function SecurityBacklogPage() {
|
||||
const [filter, setFilter] = useState<string>('all')
|
||||
@@ -37,6 +38,11 @@ export default function SecurityBacklogPage() {
|
||||
|
||||
return (
|
||||
<div className="space-y-6">
|
||||
<LicenseModuleBanner
|
||||
rule={2}
|
||||
sourceLabel="OWASP Top 10 / ASVS / SAMM (CC-BY-SA 4.0) + NIST SP 800-53 (US PD)"
|
||||
detail="OWASP-Inhalte zitiert mit Pflicht-Attribution 'OWASP Foundation, CC BY-SA 4.0'. NIST woertlich (R1)."
|
||||
/>
|
||||
{/* Header */}
|
||||
<div className="flex items-center justify-between">
|
||||
<div>
|
||||
|
||||
@@ -4,6 +4,7 @@ import React from 'react'
|
||||
import { useRouter } from 'next/navigation'
|
||||
import { useTOMGenerator } from '@/lib/sdk/tom-generator'
|
||||
import { TOM_GENERATOR_STEPS } from '@/lib/sdk/tom-generator/types'
|
||||
import { LicenseModuleBanner } from '@/components/sdk/LicenseModuleBanner'
|
||||
|
||||
/**
|
||||
* TOM Generator Landing Page
|
||||
@@ -45,6 +46,14 @@ export default function TOMGeneratorPage() {
|
||||
</p>
|
||||
</div>
|
||||
|
||||
<div className="mb-6">
|
||||
<LicenseModuleBanner
|
||||
rule={1}
|
||||
sourceLabel="DSGVO Art. 32 (EU 2016/679) — TOM-Anforderungen"
|
||||
detail="Generator-Logik und Vorlagen sind BreakPilot-Eigenwerk (R3); zitierte Rechtsquelle EU_LAW (R1)."
|
||||
/>
|
||||
</div>
|
||||
|
||||
{/* Progress Card */}
|
||||
{hasProgress && (
|
||||
<div className="bg-white rounded-xl border border-gray-200 p-6 mb-8">
|
||||
|
||||
@@ -350,7 +350,12 @@ function ActivityCard({ activity, onEdit, onDelete }: { activity: VVTActivity; o
|
||||
<span className="px-2 py-0.5 text-xs bg-purple-100 text-purple-700 rounded-full">DSFA</span>
|
||||
)}
|
||||
{(activity as any).sourceTemplateId && (
|
||||
<span className="px-2 py-0.5 text-xs bg-indigo-100 text-indigo-700 rounded-full">Vorlage</span>
|
||||
<span
|
||||
className="px-2 py-0.5 text-xs bg-indigo-100 text-indigo-700 rounded-full cursor-help"
|
||||
title="Erstellt aus Bundeslaender-DSGVO-Vorlage (Art. 30 DSGVO). Lizenzregel R1 — Hoheitsrecht/DE_LAW, woertlich uebernehmbar."
|
||||
>
|
||||
Vorlage · R1
|
||||
</span>
|
||||
)}
|
||||
</div>
|
||||
<h3 className="text-base font-semibold text-gray-900 truncate">{activity.name || '(Ohne Namen)'}</h3>
|
||||
|
||||
@@ -195,12 +195,18 @@ export default function CatalogTable({
|
||||
)}
|
||||
<td className="px-4 py-2.5">
|
||||
{entry.source === 'system' ? (
|
||||
<span className="inline-flex items-center px-2 py-0.5 rounded text-xs font-medium bg-gray-100 dark:bg-gray-700 text-gray-600 dark:text-gray-300">
|
||||
System
|
||||
<span
|
||||
className="inline-flex items-center px-2 py-0.5 rounded text-xs font-medium bg-gray-100 dark:bg-gray-700 text-gray-600 dark:text-gray-300 cursor-help"
|
||||
title="System-Katalog — Quellen aus EU-Recht, BAuA, NIST u.a. Lizenzregel je Eintrag (siehe /sdk/licenses)."
|
||||
>
|
||||
System · R1/R2/R3
|
||||
</span>
|
||||
) : (
|
||||
<span className="inline-flex items-center px-2 py-0.5 rounded text-xs font-medium bg-blue-100 dark:bg-blue-900/40 text-blue-700 dark:text-blue-300">
|
||||
Benutzerdefiniert
|
||||
<span
|
||||
className="inline-flex items-center px-2 py-0.5 rounded text-xs font-medium bg-blue-100 dark:bg-blue-900/40 text-blue-700 dark:text-blue-300 cursor-help"
|
||||
title="Benutzerdefinierter Eintrag — BreakPilot/Anwender-Eigenwerk. Lizenzregel R3 (Identifier-Verweis), keine externe Attribution noetig."
|
||||
>
|
||||
Benutzerdefiniert · R3
|
||||
</span>
|
||||
)}
|
||||
</td>
|
||||
|
||||
@@ -0,0 +1,62 @@
|
||||
'use client'
|
||||
|
||||
// Reusable licence-source banner placed at the top of an SDK module page.
|
||||
// One-line context that tells the user (and any auditor) which sources
|
||||
// the module draws on and which BreakPilot licence rule applies.
|
||||
//
|
||||
// Usage:
|
||||
// <LicenseModuleBanner
|
||||
// rule={1}
|
||||
// sourceLabel="DSGVO Art. 30 (EU 2016/679)"
|
||||
// />
|
||||
//
|
||||
// For modules that are pure BreakPilot eigenwerk:
|
||||
// <LicenseModuleBanner rule={3} sourceLabel="BreakPilot-Eigenwerk" />
|
||||
|
||||
type Props = {
|
||||
rule: 1 | 2 | 3
|
||||
sourceLabel: string
|
||||
/** Optional extended note shown after sourceLabel */
|
||||
detail?: string
|
||||
}
|
||||
|
||||
const RULE_META: Record<number, { bg: string; text: string; pill: string; descr: string }> = {
|
||||
1: {
|
||||
bg: 'bg-emerald-50 border-emerald-200',
|
||||
text: 'text-emerald-800',
|
||||
pill: 'bg-emerald-600 text-white',
|
||||
descr: 'Hoheitsrecht/Public Domain — woertlich uebernehmbar',
|
||||
},
|
||||
2: {
|
||||
bg: 'bg-amber-50 border-amber-200',
|
||||
text: 'text-amber-800',
|
||||
pill: 'bg-amber-600 text-white',
|
||||
descr: 'Woertlich mit Attribution-Pflicht',
|
||||
},
|
||||
3: {
|
||||
bg: 'bg-slate-50 border-slate-200',
|
||||
text: 'text-slate-700',
|
||||
pill: 'bg-slate-600 text-white',
|
||||
descr: 'Identifier-Verweis / BreakPilot-Eigenwerk',
|
||||
},
|
||||
}
|
||||
|
||||
export function LicenseModuleBanner({ rule, sourceLabel, detail }: Props) {
|
||||
const m = RULE_META[rule]
|
||||
return (
|
||||
<div className={`px-3 py-2 ${m.bg} border rounded-lg text-xs ${m.text} flex items-start gap-2`}>
|
||||
<span className={`inline-flex items-center justify-center w-6 h-6 rounded-full text-[10px] font-bold ${m.pill} flex-shrink-0`}>
|
||||
R{rule}
|
||||
</span>
|
||||
<div className="flex-1">
|
||||
<span className="font-semibold">Quellen & Lizenz:</span>{' '}
|
||||
<span>{sourceLabel}</span>
|
||||
<span className="text-slate-500"> — {m.descr}.</span>
|
||||
{detail && <span className="block mt-0.5 text-[11px] opacity-80">{detail}</span>}
|
||||
<a href="/sdk/licenses" className="underline ml-1">Quellenverzeichnis</a>
|
||||
</div>
|
||||
</div>
|
||||
)
|
||||
}
|
||||
|
||||
export default LicenseModuleBanner
|
||||
@@ -224,6 +224,19 @@ export function SDKSidebar({ collapsed = false, onCollapsedChange }: SDKSidebarP
|
||||
<span>Exportieren</span>
|
||||
</button>
|
||||
)}
|
||||
|
||||
{!collapsed && (
|
||||
<a
|
||||
href="/sdk/licenses"
|
||||
className="mt-2 w-full flex items-center justify-center gap-2 px-4 py-2 text-xs text-gray-500 hover:text-gray-700 hover:bg-gray-100 rounded-lg transition-colors"
|
||||
title="Quellen und Lizenzen aller verwendeten Compliance-Controls"
|
||||
>
|
||||
<svg className="w-3.5 h-3.5" fill="none" stroke="currentColor" viewBox="0 0 24 24">
|
||||
<path strokeLinecap="round" strokeLinejoin="round" strokeWidth={2} d="M9 12h6m-6 4h6m2 5H7a2 2 0 01-2-2V5a2 2 0 012-2h5.586a1 1 0 01.707.293l5.414 5.414a1 1 0 01.293.707V19a2 2 0 01-2 2z" />
|
||||
</svg>
|
||||
<span>Quellen & Lizenzen</span>
|
||||
</a>
|
||||
)}
|
||||
</div>
|
||||
</aside>
|
||||
)
|
||||
|
||||
@@ -73,6 +73,7 @@ export function SidebarModuleList({ collapsed, projectId, pendingCRCount }: Side
|
||||
<AdditionalModuleItem href="/sdk/ai-registration" icon={<svg className="w-5 h-5" fill="none" stroke="currentColor" viewBox="0 0 24 24"><path strokeLinecap="round" strokeLinejoin="round" strokeWidth={2} d="M19 21V5a2 2 0 00-2-2H7a2 2 0 00-2 2v16m14 0h2m-2 0h-5m-9 0H3m2 0h5M9 7h1m-1 4h1m4-4h1m-1 4h1m-5 10v-5a1 1 0 011-1h2a1 1 0 011 1v5m-4 0h4" /></svg>} label="EU Registrierung" isActive={pathname?.startsWith('/sdk/ai-registration') ?? false} collapsed={collapsed} projectId={projectId} />
|
||||
<AdditionalModuleItem href="/sdk/compliance-optimizer" icon={<svg className="w-5 h-5" fill="none" stroke="currentColor" viewBox="0 0 24 24"><path strokeLinecap="round" strokeLinejoin="round" strokeWidth={2} d="M13 7h8m0 0v8m0-8l-8 8-4-4-6 6" /></svg>} label="Compliance Optimizer" isActive={pathname?.startsWith('/sdk/compliance-optimizer') ?? false} collapsed={collapsed} projectId={projectId} />
|
||||
<AdditionalModuleItem href="/sdk/agent" icon={<svg className="w-5 h-5" fill="none" stroke="currentColor" viewBox="0 0 24 24"><path strokeLinecap="round" strokeLinejoin="round" strokeWidth={2} d="M9.75 17L9 20l-1 1h8l-1-1-.75-3M3 13h18M5 17h14a2 2 0 002-2V5a2 2 0 00-2-2H5a2 2 0 00-2 2v10a2 2 0 002 2z" /></svg>} label="Compliance Agent" isActive={pathname?.startsWith('/sdk/agent') ?? false} collapsed={collapsed} projectId={projectId} />
|
||||
<AdditionalModuleItem href="/sdk/benchmark" icon={<svg className="w-5 h-5" fill="none" stroke="currentColor" viewBox="0 0 24 24"><path strokeLinecap="round" strokeLinejoin="round" strokeWidth={2} d="M9 19v-6a2 2 0 00-2-2H5a2 2 0 00-2 2v6a2 2 0 002 2h2a2 2 0 002-2zm0 0V9a2 2 0 012-2h2a2 2 0 012 2v10m-6 0a2 2 0 002 2h2a2 2 0 002-2m0 0V5a2 2 0 012-2h2a2 2 0 012 2v14a2 2 0 01-2 2h-2a2 2 0 01-2-2z" /></svg>} label="Branchen-Benchmark" isActive={pathname?.startsWith('/sdk/benchmark') ?? false} collapsed={collapsed} projectId={projectId} />
|
||||
</div>
|
||||
|
||||
{/* CRA Compliance */}
|
||||
|
||||
@@ -0,0 +1,138 @@
|
||||
'use client'
|
||||
|
||||
import { useEffect, useState } from 'react'
|
||||
|
||||
// Stufe 3 of the Attribution Renderer (Task #23): an inline source
|
||||
// badge that any rendered control/hazard/measure can attach to itself.
|
||||
//
|
||||
// Visually a small license-rule pill (R1/R2/R3); on hover/click it
|
||||
// reveals the underlying regulation, license type, and — for Rule 2 —
|
||||
// the mandatory attribution string.
|
||||
//
|
||||
// Usage:
|
||||
// <SourceBadge controlUuid={hazard.id} />
|
||||
//
|
||||
// The component lazily fetches /licenses/source-info/{uuid} on first
|
||||
// expand so the surrounding list view stays cheap.
|
||||
|
||||
type SourceInfo = {
|
||||
control_uuid: string
|
||||
license_rule: number | null
|
||||
license_label_de: string | null
|
||||
attribution_required: boolean
|
||||
render_full_text: boolean
|
||||
regulation_id: string | null
|
||||
regulation_name_de: string | null
|
||||
license_type: string | null
|
||||
attribution: string | null
|
||||
source_url: string | null
|
||||
}
|
||||
|
||||
const RULE_BADGE: Record<number, string> = {
|
||||
1: 'bg-emerald-100 text-emerald-800 border-emerald-300',
|
||||
2: 'bg-amber-100 text-amber-800 border-amber-300',
|
||||
3: 'bg-slate-100 text-slate-700 border-slate-300',
|
||||
}
|
||||
|
||||
const RULE_TITLE: Record<number, string> = {
|
||||
1: 'R1 — wörtlich übernehmbar',
|
||||
2: 'R2 — wörtlich mit Attribution',
|
||||
3: 'R3 — nur Identifier zitieren',
|
||||
}
|
||||
|
||||
interface SourceBadgeProps {
|
||||
controlUuid: string
|
||||
/** Optional: skip the fetch and render from already-known data. */
|
||||
prefetched?: SourceInfo
|
||||
/** Compact mode for tight UI rows (smaller pill). */
|
||||
compact?: boolean
|
||||
}
|
||||
|
||||
export function SourceBadge({ controlUuid, prefetched, compact }: SourceBadgeProps) {
|
||||
const [data, setData] = useState<SourceInfo | null>(prefetched ?? null)
|
||||
const [open, setOpen] = useState(false)
|
||||
const [loading, setLoading] = useState(false)
|
||||
const [error, setError] = useState<string | null>(null)
|
||||
|
||||
useEffect(() => {
|
||||
if (!open || data) return
|
||||
setLoading(true)
|
||||
fetch(`/api/sdk/v1/compliance/licenses/source-info/${controlUuid}`)
|
||||
.then((r) => (r.ok ? r.json() : Promise.reject(`HTTP ${r.status}`)))
|
||||
.then(setData)
|
||||
.catch((e) => setError(String(e)))
|
||||
.finally(() => setLoading(false))
|
||||
}, [open, data, controlUuid])
|
||||
|
||||
const rule = data?.license_rule ?? prefetched?.license_rule ?? null
|
||||
const badgeClass = rule ? RULE_BADGE[rule] ?? RULE_BADGE[3] : 'bg-slate-100 text-slate-500 border-slate-200'
|
||||
const sizeClass = compact ? 'text-[10px] px-1.5 py-0.5' : 'text-xs px-2 py-0.5'
|
||||
|
||||
return (
|
||||
<span className="relative inline-block">
|
||||
<button
|
||||
type="button"
|
||||
onClick={() => setOpen((v) => !v)}
|
||||
className={`inline-flex items-center gap-1 rounded border font-medium ${sizeClass} ${badgeClass} hover:opacity-80 transition`}
|
||||
title={rule ? RULE_TITLE[rule] : 'Lizenz unbekannt'}
|
||||
aria-expanded={open}
|
||||
>
|
||||
<svg width="10" height="10" viewBox="0 0 16 16" fill="currentColor" aria-hidden>
|
||||
<path d="M8 0a8 8 0 1 0 0 16A8 8 0 0 0 8 0Zm0 4.5a1 1 0 1 1 0 2 1 1 0 0 1 0-2ZM7 8h2v4.5H7V8Z" />
|
||||
</svg>
|
||||
{rule ? `R${rule}` : '?'}
|
||||
</button>
|
||||
|
||||
{open && (
|
||||
<div className="absolute left-0 mt-1 z-40 w-80 rounded-md border border-slate-200 bg-white shadow-lg p-3 text-xs">
|
||||
{loading && <p className="text-slate-500">Lade Quellen-Info…</p>}
|
||||
{error && <p className="text-red-600">Fehler: {error}</p>}
|
||||
{data && (
|
||||
<div className="space-y-2">
|
||||
<div className="font-semibold text-slate-800">
|
||||
{data.license_label_de ?? 'Lizenz unbekannt'}
|
||||
</div>
|
||||
{data.regulation_name_de && (
|
||||
<div>
|
||||
<span className="text-slate-500">Quelle:</span>{' '}
|
||||
<span className="text-slate-800">{data.regulation_name_de}</span>
|
||||
</div>
|
||||
)}
|
||||
{data.license_type && (
|
||||
<div>
|
||||
<span className="text-slate-500">Lizenztyp:</span>{' '}
|
||||
<span className="text-slate-700">{data.license_type}</span>
|
||||
</div>
|
||||
)}
|
||||
{data.attribution && (
|
||||
<div className="rounded bg-amber-50 border border-amber-200 px-2 py-1.5">
|
||||
<div className="text-[10px] font-semibold text-amber-800 uppercase tracking-wide">
|
||||
Attribution-Pflicht
|
||||
</div>
|
||||
<div className="text-amber-900">{data.attribution}</div>
|
||||
</div>
|
||||
)}
|
||||
{!data.render_full_text && (
|
||||
<div className="text-[10px] text-slate-500 italic">
|
||||
Volltext wird im Output nicht gerendert — nur Identifier-Verweis.
|
||||
</div>
|
||||
)}
|
||||
{data.source_url && (
|
||||
<a
|
||||
href={data.source_url}
|
||||
target="_blank"
|
||||
rel="noopener noreferrer"
|
||||
className="inline-block text-[10px] text-blue-600 hover:underline mt-1"
|
||||
>
|
||||
Originalquelle öffnen ↗
|
||||
</a>
|
||||
)}
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
)}
|
||||
</span>
|
||||
)
|
||||
}
|
||||
|
||||
export default SourceBadge
|
||||
@@ -0,0 +1,355 @@
|
||||
/**
|
||||
* E2E-Test fuer den Founding-Wizard
|
||||
*
|
||||
* Prueft den vollstaendigen 8-Step-Flow:
|
||||
* - Application-Errors / Console-Errors auf jeder Seite
|
||||
* - StepBasics: Prefill-Button + Registergericht/HRB-Felder
|
||||
* - StepGesellschafter: Rollen-Dropdown + IP-Bereiche fuer 2 Gruender
|
||||
* - Per-Person Generation: 2 IP-Assignment-Dokumente
|
||||
* - localStorage-Persistenz
|
||||
*
|
||||
* Backend wird per route.fulfill() gemockt — Test ist hermetisch.
|
||||
*/
|
||||
|
||||
import { test, expect, type Page, type ConsoleMessage } from '@playwright/test'
|
||||
|
||||
const BASE = process.env.PLAYWRIGHT_BASE_URL || 'http://localhost:3002'
|
||||
const WIZARD_PATH = '/sdk/founding-wizard'
|
||||
|
||||
/** Filtert Browser-Console auf echte App-Errors (ignoriert Next.js / Hydration / 3rd-party Warnings). */
|
||||
function isRealAppError(msg: ConsoleMessage): boolean {
|
||||
if (msg.type() !== 'error') return false
|
||||
const text = msg.text()
|
||||
// Bekanntes Rauschen ausschliessen
|
||||
const ignored = [
|
||||
'Failed to load resource', // 404 fuer Icons etc.
|
||||
'Download the React DevTools', // React-Hinweis
|
||||
'net::ERR_', // Netzwerk (gemockt → erwartete Misses)
|
||||
'Hydration failed because', // Next 15 Pseudo-Errors bei dev
|
||||
'[founding-wizard] prefill failed', // Intentional UX-Logging im Prefill-Fehlerpfad
|
||||
]
|
||||
return !ignored.some(p => text.includes(p))
|
||||
}
|
||||
|
||||
const IGNORED_PAGE_ERRORS = [
|
||||
// Hydration mismatches durch dynamische Zeitstempel ("Gerade eben" vs "vor 1 Min")
|
||||
// im SDK-Header — pure dev-Mode-Symptom, kein App-Bug.
|
||||
'Hydration failed because the server rendered text didn',
|
||||
'There was an error while hydrating',
|
||||
// Next.js dev-mode signals fuer Hydration-Issues
|
||||
'Text content does not match server-rendered HTML',
|
||||
]
|
||||
|
||||
function isIgnoredPageError(err: Error): boolean {
|
||||
return IGNORED_PAGE_ERRORS.some(p => err.message.includes(p))
|
||||
}
|
||||
|
||||
/** Setzt Console-Error- und PageError-Listener. Wirft am Ende, wenn welche aufgetreten sind. */
|
||||
function installErrorTraps(page: Page): { assertNoErrors: () => void } {
|
||||
const consoleErrors: string[] = []
|
||||
const pageErrors: string[] = []
|
||||
|
||||
page.on('console', msg => {
|
||||
if (isRealAppError(msg)) consoleErrors.push(msg.text())
|
||||
})
|
||||
page.on('pageerror', err => {
|
||||
if (!isIgnoredPageError(err)) pageErrors.push(`${err.name}: ${err.message}`)
|
||||
})
|
||||
|
||||
return {
|
||||
assertNoErrors() {
|
||||
const all = [...pageErrors.map(e => `[pageerror] ${e}`), ...consoleErrors.map(e => `[console.error] ${e}`)]
|
||||
if (all.length > 0) {
|
||||
throw new Error(`Application-Errors waehrend des Flows:\n${all.join('\n')}`)
|
||||
}
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
/** Mockt die zwei API-Endpoints, die der Wizard aufruft. */
|
||||
async function mockBackend(page: Page) {
|
||||
// 1) Company-Profile Prefill
|
||||
await page.route('**/api/sdk/v1/company-profile**', async route => {
|
||||
await route.fulfill({
|
||||
status: 200,
|
||||
contentType: 'application/json',
|
||||
body: JSON.stringify({
|
||||
profile: {
|
||||
companyName: 'Breakpilot GmbH',
|
||||
legalForm: 'GmbH',
|
||||
industry: ['Software', 'KI/ML'],
|
||||
businessModel: 'SaaS',
|
||||
offerings: ['SaaS-Plattform', 'Compliance-API'],
|
||||
headquartersStreet: 'Königstraße 1',
|
||||
headquartersZip: '70173',
|
||||
headquartersCity: 'Stuttgart',
|
||||
},
|
||||
}),
|
||||
})
|
||||
})
|
||||
|
||||
// 2) Founding-Wizard Generate (gibt 9 Dokumente zurueck: 7 normale + 2 per-person IP-Assignments)
|
||||
await page.route('**/api/v1/founding-wizard/generate', async route => {
|
||||
const request = route.request()
|
||||
const body = JSON.parse(request.postData() || '{}')
|
||||
const selected: string[] = body.selected_documents || []
|
||||
const gesellschafter: Array<{ name?: string; is_geschaeftsfuehrer?: boolean }> = body.gesellschafter || []
|
||||
|
||||
const PER_PERSON = ['ip_assignment_agreement', 'managing_director_employment_contract']
|
||||
const docs: unknown[] = []
|
||||
const tinyDocx = 'UEsDBBQAAAAIAA==' // gueltige base64-Stub (Playwright braucht keinen echten DOCX)
|
||||
|
||||
for (const docType of selected) {
|
||||
if (PER_PERSON.includes(docType)) {
|
||||
const persons = docType === 'managing_director_employment_contract'
|
||||
? gesellschafter.filter(g => g.is_geschaeftsfuehrer)
|
||||
: gesellschafter
|
||||
for (const p of persons) {
|
||||
docs.push({
|
||||
document_type: docType,
|
||||
title: `${docType} — ${p.name}`,
|
||||
filename: `${docType}_${(p.name || 'X').replace(/\s/g, '_')}.docx`,
|
||||
download_url: `data:application/vnd.openxmlformats-officedocument.wordprocessingml.document;base64,${tinyDocx}`,
|
||||
size_bytes: 12345,
|
||||
generated_at: '2026-05-21T12:00:00Z',
|
||||
})
|
||||
}
|
||||
} else {
|
||||
docs.push({
|
||||
document_type: docType,
|
||||
title: docType,
|
||||
filename: `${docType}.docx`,
|
||||
download_url: `data:application/vnd.openxmlformats-officedocument.wordprocessingml.document;base64,${tinyDocx}`,
|
||||
size_bytes: 12345,
|
||||
generated_at: '2026-05-21T12:00:00Z',
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
await route.fulfill({
|
||||
status: 200,
|
||||
contentType: 'application/json',
|
||||
body: JSON.stringify({ documents: docs, warnings: [] }),
|
||||
})
|
||||
})
|
||||
}
|
||||
|
||||
/** Clears wizard-state and pre-accepts cookies so the CookieBannerOverlay
|
||||
* does not intercept clicks during the test. */
|
||||
async function resetWizardState(page: Page) {
|
||||
await page.addInitScript(() => {
|
||||
try {
|
||||
window.localStorage.removeItem('breakpilot:founding-wizard:state:v1')
|
||||
// CookieBannerOverlay liest 'bp-sdk-cookie-consent' und blendet sich aus,
|
||||
// sobald ein Eintrag existiert. Wir setzen Minimal-Consent.
|
||||
window.localStorage.setItem('bp-sdk-cookie-consent', JSON.stringify({
|
||||
necessary: true, statistics: false, marketing: false, functional: false,
|
||||
ewrOnly: false, blockedVendors: [], timestamp: new Date().toISOString(),
|
||||
}))
|
||||
} catch {}
|
||||
})
|
||||
}
|
||||
|
||||
test.describe('Founding-Wizard E2E', () => {
|
||||
test.beforeEach(async ({ page }) => {
|
||||
await resetWizardState(page)
|
||||
await mockBackend(page)
|
||||
})
|
||||
|
||||
test('vollstaendiger 8-Step-Flow ohne Application-Errors', async ({ page }) => {
|
||||
const errors = installErrorTraps(page)
|
||||
|
||||
await page.goto(`${BASE}${WIZARD_PATH}`)
|
||||
await expect(page.getByTestId('founding-wizard')).toBeVisible()
|
||||
await expect(page.getByTestId('step-content-1')).toBeVisible()
|
||||
|
||||
// --- Step 1: Basics + Prefill ---
|
||||
await page.getByRole('button', { name: /Aus Unternehmensprofil vorbef/i }).click()
|
||||
await expect(page.getByTestId('company-name')).toHaveValue('Breakpilot GmbH', { timeout: 5000 })
|
||||
await expect(page.getByTestId('company-seat')).toHaveValue('Stuttgart')
|
||||
|
||||
// Pflichtfeld: company_purpose_description (mind. 10 Zeichen)
|
||||
await page.getByTestId('company-purpose').fill(
|
||||
'die Entwicklung, Bereitstellung und der Betrieb von KI-gestuetzten Compliance-Werkzeugen sowie damit verbundener Beratungsleistungen.'
|
||||
)
|
||||
|
||||
// Neue Felder: Registergericht + HRB
|
||||
await page.getByTestId('register-court').fill('Amtsgericht Stuttgart')
|
||||
await page.getByTestId('hrb-number').fill('') // noch nicht eingetragen
|
||||
|
||||
await page.getByTestId('next-step').click()
|
||||
|
||||
// --- Step 2: Gesellschafter ---
|
||||
await expect(page.getByTestId('step-content-2')).toBeVisible()
|
||||
|
||||
// Benjamin (CEO, IP: Compliance + RAG)
|
||||
await page.getByTestId('gs-name').fill('Benjamin Bönisch')
|
||||
await page.getByTestId('gs-birthdate').fill('1985-01-15')
|
||||
await page.getByTestId('gs-address').fill('Teststraße 1, 70173 Stuttgart')
|
||||
await page.getByTestId('gs-email').fill('benjamin@breakpilot.ai')
|
||||
await page.getByTestId('gs-nennbetrag').fill('12500')
|
||||
await page.getByTestId('gs-role').selectOption('CEO')
|
||||
await page.getByTestId('gs-ip-areas').fill(
|
||||
'Compliance-Engine (Quellcode + Architektur)\nRAG-Pipeline\nProdukt-Konzepte'
|
||||
)
|
||||
await page.getByTestId('add-gesellschafter').click()
|
||||
await expect(page.getByTestId('gs-row-1')).toBeVisible()
|
||||
|
||||
// Sharang (CTO, IP: Security + Infrastruktur)
|
||||
await page.getByTestId('gs-name').fill('Sharang Parnerkar')
|
||||
await page.getByTestId('gs-birthdate').fill('1990-06-20')
|
||||
await page.getByTestId('gs-address').fill('Teststraße 2, 70173 Stuttgart')
|
||||
await page.getByTestId('gs-email').fill('sharang@breakpilot.ai')
|
||||
await page.getByTestId('gs-nennbetrag').fill('12500')
|
||||
await page.getByTestId('gs-role').selectOption('CTO')
|
||||
await page.getByTestId('gs-ip-areas').fill('Security-Modul\nInfrastructure-as-Code')
|
||||
await page.getByTestId('add-gesellschafter').click()
|
||||
await expect(page.getByTestId('gs-row-2')).toBeVisible()
|
||||
|
||||
// Summe Nennbetraege muss Stammkapital entsprechen (25.000)
|
||||
await expect(page.getByTestId('gs-total')).toContainText('25.000')
|
||||
|
||||
await page.getByTestId('next-step').click()
|
||||
|
||||
// --- Step 3: GF-Assignment (Defaults sind ok, beide bereits GF) ---
|
||||
await expect(page.getByTestId('step-content-3')).toBeVisible()
|
||||
await expect(page.getByTestId('gf-assignment-table')).toBeVisible()
|
||||
await page.getByTestId('next-step').click()
|
||||
|
||||
// --- Step 4: Kapital (Defaults: 25000) ---
|
||||
await expect(page.getByTestId('step-content-4')).toBeVisible()
|
||||
await expect(page.getByTestId('stammkapital')).toHaveValue('25000')
|
||||
await page.getByTestId('next-step').click()
|
||||
|
||||
// --- Step 5: Notar ---
|
||||
await expect(page.getByTestId('step-content-5')).toBeVisible()
|
||||
await page.getByTestId('notary-name').fill('Dr. Max Mustermann')
|
||||
await page.getByTestId('notary-place').fill('Stuttgart')
|
||||
await page.getByTestId('notary-address').fill('Königstraße 99, 70173 Stuttgart')
|
||||
await page.getByTestId('notarial-date').fill('2026-06-15')
|
||||
await page.getByTestId('next-step').click()
|
||||
|
||||
// --- Step 6: SHA-Optionen (Defaults sind ok) ---
|
||||
await expect(page.getByTestId('step-content-6')).toBeVisible()
|
||||
await expect(page.getByTestId('has-sha')).toBeChecked()
|
||||
await page.getByTestId('next-step').click()
|
||||
|
||||
// --- Step 7: GF-Vertraege (fuer jeden GF einen) ---
|
||||
await expect(page.getByTestId('step-content-7')).toBeVisible()
|
||||
// Beide GF-Contract-Karten muessen sichtbar sein
|
||||
const contractCards = page.locator('[data-testid^="contract-"]')
|
||||
await expect(contractCards).toHaveCount(2)
|
||||
// Salary in beiden Cards anfassen → registriert Contracts (canProceed-Bedingung).
|
||||
// Wir setzen einen anderen Wert als Default (84000) damit React onChange feuert.
|
||||
const salaryInputs = page.locator('[data-testid^="salary-"]')
|
||||
const salaryCount = await salaryInputs.count()
|
||||
for (let i = 0; i < salaryCount; i++) {
|
||||
await salaryInputs.nth(i).fill('90000')
|
||||
}
|
||||
// Warten bis "Weiter" enabled ist
|
||||
await expect(page.getByTestId('next-step')).toBeEnabled()
|
||||
await page.getByTestId('next-step').click()
|
||||
|
||||
// --- Step 8: Generate ---
|
||||
await expect(page.getByTestId('step-content-8')).toBeVisible()
|
||||
await expect(page.getByTestId('generate-summary')).toContainText('Breakpilot GmbH')
|
||||
await expect(page.getByTestId('generate-summary')).toContainText('2', { useInnerText: true })
|
||||
|
||||
// Notartermin-Bundle auswaehlen
|
||||
await page.getByTestId('select-notary-bundle').click()
|
||||
|
||||
// Generieren (Backend gemockt)
|
||||
await page.getByTestId('generate-docs').click()
|
||||
|
||||
// Generated-Docs-Block muss erscheinen
|
||||
await expect(page.getByTestId('generated-docs')).toBeVisible({ timeout: 10000 })
|
||||
|
||||
// Per-Person Verifikation: zwei IP-Assignment-Downloads erwartet
|
||||
const ipDownloads = page.locator('[data-testid="download-ip_assignment_agreement"]')
|
||||
await expect(ipDownloads).toHaveCount(2)
|
||||
|
||||
// Per-Person Verifikation: zwei GF-Vertraege erwartet
|
||||
const gfDownloads = page.locator('[data-testid="download-managing_director_employment_contract"]')
|
||||
await expect(gfDownloads).toHaveCount(2)
|
||||
|
||||
// Kein generate-error sichtbar
|
||||
await expect(page.getByTestId('generate-error')).toBeHidden()
|
||||
|
||||
// Final: keine Errors auf der Konsole
|
||||
errors.assertNoErrors()
|
||||
})
|
||||
|
||||
test('Prefill-Button setzt Fehler bei Backend-Fehler ohne Application-Error', async ({ page }) => {
|
||||
// Spezial-Mock: company-profile gibt 500 zurueck
|
||||
await page.route('**/api/sdk/v1/company-profile**', async route => {
|
||||
await route.fulfill({ status: 500, body: 'boom' })
|
||||
})
|
||||
|
||||
const errors = installErrorTraps(page)
|
||||
await page.goto(`${BASE}${WIZARD_PATH}`)
|
||||
|
||||
await page.getByRole('button', { name: /Aus Unternehmensprofil vorbef/i }).click()
|
||||
// UI muss Fehlermeldung anzeigen, NICHT crashen
|
||||
await expect(page.getByText('Konnte Unternehmensprofil nicht laden')).toBeVisible()
|
||||
|
||||
errors.assertNoErrors()
|
||||
})
|
||||
|
||||
test('Step-Navigation: Zurueck und Reset funktionieren ohne Errors', async ({ page }) => {
|
||||
const errors = installErrorTraps(page)
|
||||
await page.goto(`${BASE}${WIZARD_PATH}`)
|
||||
|
||||
// Minimum Step 1 fuellen
|
||||
await page.getByTestId('company-name').fill('Breakpilot GmbH')
|
||||
await page.getByTestId('company-seat').fill('Stuttgart')
|
||||
await page.getByTestId('company-purpose').fill('die Entwicklung von Compliance-Software fuer Unternehmen.')
|
||||
|
||||
await page.getByTestId('next-step').click()
|
||||
await expect(page.getByTestId('step-content-2')).toBeVisible()
|
||||
|
||||
// Zurueck
|
||||
await page.getByTestId('prev-step').click()
|
||||
await expect(page.getByTestId('step-content-1')).toBeVisible()
|
||||
|
||||
// Eingaben muessen erhalten geblieben sein (localStorage-persistence)
|
||||
await expect(page.getByTestId('company-name')).toHaveValue('Breakpilot GmbH')
|
||||
|
||||
// Reset (mit Dialog-Bestaetigung)
|
||||
page.once('dialog', dialog => dialog.accept())
|
||||
await page.getByTestId('reset-wizard').click()
|
||||
await expect(page.getByTestId('company-name')).toHaveValue('')
|
||||
|
||||
errors.assertNoErrors()
|
||||
})
|
||||
|
||||
test('IP-Areas + Rollen-Dropdown in Step 2', async ({ page }) => {
|
||||
const errors = installErrorTraps(page)
|
||||
await page.goto(`${BASE}${WIZARD_PATH}`)
|
||||
|
||||
// Step 1 zuegig fuellen
|
||||
await page.getByTestId('company-name').fill('Breakpilot GmbH')
|
||||
await page.getByTestId('company-seat').fill('Stuttgart')
|
||||
await page.getByTestId('company-purpose').fill('die Entwicklung von Compliance-Software fuer Unternehmen.')
|
||||
await page.getByTestId('next-step').click()
|
||||
|
||||
// Rollen-Dropdown muss ein <select> sein, nicht <input>
|
||||
const role = page.getByTestId('gs-role')
|
||||
await expect(role).toHaveJSProperty('tagName', 'SELECT')
|
||||
|
||||
// CEO-Option waehlbar
|
||||
await page.getByTestId('gs-name').fill('Benjamin Bönisch')
|
||||
await page.getByTestId('gs-address').fill('Test 1')
|
||||
await page.getByTestId('gs-nennbetrag').fill('25000')
|
||||
await role.selectOption('CEO')
|
||||
await page.getByTestId('gs-ip-areas').fill('Compliance-Engine\nRAG-Pipeline')
|
||||
await page.getByTestId('add-gesellschafter').click()
|
||||
|
||||
// Tabelle muss IP-Bereiche anzeigen
|
||||
const row = page.getByTestId('gs-row-1')
|
||||
await expect(row).toContainText('Benjamin Bönisch')
|
||||
await expect(row).toContainText('CEO')
|
||||
await expect(row).toContainText('Compliance-Engine')
|
||||
|
||||
errors.assertNoErrors()
|
||||
})
|
||||
})
|
||||
@@ -25,6 +25,8 @@ export interface Gesellschafter {
|
||||
internal_role?: string
|
||||
/** Falls Gründer akademischen Hintergrund hat (Professur etc.) */
|
||||
has_academic_background?: boolean
|
||||
/** IP-Bereiche die der Gründer für die GmbH einbringt (z.B. ["Compliance-Engine", "RAG-Pipeline"]) */
|
||||
ip_areas?: string[]
|
||||
}
|
||||
|
||||
export interface NotarData {
|
||||
@@ -46,6 +48,10 @@ export interface CompanyBasics {
|
||||
industry: string
|
||||
business_year: string // z.B. "Kalenderjahr"
|
||||
has_research_focus: boolean
|
||||
/** Registergericht (z.B. "Amtsgericht Stuttgart"). Pflicht für HRB-Anmeldung. */
|
||||
register_court?: string
|
||||
/** HRB-Nummer (z.B. "HRB 12345"). Leer falls noch nicht eingetragen. */
|
||||
hrb_number?: string
|
||||
}
|
||||
|
||||
export interface CapitalConfig {
|
||||
@@ -145,6 +151,8 @@ export function defaultFoundingWizardState(): FoundingWizardState {
|
||||
industry: '',
|
||||
business_year: 'Kalenderjahr',
|
||||
has_research_focus: false,
|
||||
register_court: '',
|
||||
hrb_number: '',
|
||||
},
|
||||
gesellschafter: [],
|
||||
capital: {
|
||||
|
||||
@@ -0,0 +1,288 @@
|
||||
package handlers
|
||||
|
||||
// LLM Gap-Review handler — Task #7.
|
||||
//
|
||||
// After the deterministic Pattern-Engine has generated hazards and
|
||||
// mitigations for an IACE project, this endpoint asks a configured LLM
|
||||
// (Qwen / Claude / OpenAI) to spot what the engine MISSED. The LLM is
|
||||
// fed the Limits-Form, the current hazard list, and a compressed
|
||||
// pattern catalogue summary; it returns a list of suggested additional
|
||||
// hazards or mitigations.
|
||||
//
|
||||
// Important guardrails:
|
||||
// - Every suggestion must point to an existing pattern_id or norm
|
||||
// identifier — pure free-form LLM hallucinations are filtered.
|
||||
// - The response is provenance-tagged source="llm_gap_review" so
|
||||
// the frontend renders an Adopt/Reject UX rather than committing.
|
||||
// - Engine output (deterministic patterns) is never overwritten by
|
||||
// LLM output; the gap-review is a SUPPLEMENT, not a replacement.
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"net/http"
|
||||
"strings"
|
||||
|
||||
"github.com/gin-gonic/gin"
|
||||
"github.com/google/uuid"
|
||||
|
||||
"github.com/breakpilot/ai-compliance-sdk/internal/iace"
|
||||
"github.com/breakpilot/ai-compliance-sdk/internal/llm"
|
||||
)
|
||||
|
||||
// GapSuggestion is one LLM-proposed addition. Each suggestion is
|
||||
// non-binding until the user adopts it via the frontend.
|
||||
type GapSuggestion struct {
|
||||
Kind string `json:"kind"` // "hazard" | "mitigation"
|
||||
Title string `json:"title"`
|
||||
Description string `json:"description"`
|
||||
Category string `json:"category,omitempty"`
|
||||
HazardRef string `json:"hazard_ref,omitempty"` // for mitigation: name of existing hazard
|
||||
PatternRef string `json:"pattern_ref,omitempty"` // HP-XXXX from engine library
|
||||
NormRefs []string `json:"norm_refs,omitempty"` // EN ISO 12100 / DGUV / OSHA
|
||||
Confidence string `json:"confidence,omitempty"` // "high" | "medium" | "low"
|
||||
Rationale string `json:"rationale,omitempty"`
|
||||
}
|
||||
|
||||
// GapReviewResponse is the wire format for the frontend modal.
|
||||
type GapReviewResponse struct {
|
||||
ProjectID string `json:"project_id"`
|
||||
Source string `json:"source"` // "llm_gap_review" | "fallback_static"
|
||||
Model string `json:"model,omitempty"`
|
||||
Suggestions []GapSuggestion `json:"suggestions"`
|
||||
InputSummary struct {
|
||||
HazardCount int `json:"hazard_count"`
|
||||
MitigationCount int `json:"mitigation_count"`
|
||||
LimitsFormFields int `json:"limits_form_fields"`
|
||||
} `json:"input_summary"`
|
||||
}
|
||||
|
||||
// LLMGapReview handles POST /projects/:id/llm-gap-review.
|
||||
//
|
||||
// The endpoint is intentionally idempotent — repeated calls do not mutate
|
||||
// project state. The Adopt step (user-driven) is what changes data, via
|
||||
// the existing CreateHazard / CreateMitigation handlers.
|
||||
func (h *IACEHandler) LLMGapReview(c *gin.Context) {
|
||||
projectID, err := uuid.Parse(c.Param("id"))
|
||||
if err != nil {
|
||||
c.JSON(http.StatusBadRequest, gin.H{"error": "invalid project id"})
|
||||
return
|
||||
}
|
||||
|
||||
ctx := c.Request.Context()
|
||||
project, err := h.store.GetProject(ctx, projectID)
|
||||
if err != nil {
|
||||
c.JSON(http.StatusNotFound, gin.H{"error": "project not found"})
|
||||
return
|
||||
}
|
||||
|
||||
hazards, err := h.store.ListHazards(ctx, projectID)
|
||||
if err != nil {
|
||||
c.JSON(http.StatusInternalServerError, gin.H{"error": "list hazards: " + err.Error()})
|
||||
return
|
||||
}
|
||||
mitigations, err := h.store.ListMitigationsByProject(ctx, projectID)
|
||||
if err != nil {
|
||||
c.JSON(http.StatusInternalServerError, gin.H{"error": "list mitigations: " + err.Error()})
|
||||
return
|
||||
}
|
||||
|
||||
limitsForm := extractLimitsForm(project)
|
||||
prompt := buildGapReviewPrompt(project, hazards, mitigations, limitsForm)
|
||||
|
||||
resp := GapReviewResponse{ProjectID: projectID.String()}
|
||||
resp.InputSummary.HazardCount = len(hazards)
|
||||
resp.InputSummary.MitigationCount = len(mitigations)
|
||||
resp.InputSummary.LimitsFormFields = countLimitsFields(limitsForm)
|
||||
|
||||
suggestions, model, err := callLLMForGapReview(ctx, h.llmRegistry, prompt)
|
||||
if err != nil {
|
||||
resp.Source = "fallback_static"
|
||||
resp.Suggestions = staticFallbackSuggestions(hazards)
|
||||
c.JSON(http.StatusOK, resp)
|
||||
return
|
||||
}
|
||||
|
||||
resp.Source = "llm_gap_review"
|
||||
resp.Model = model
|
||||
resp.Suggestions = filterAndProvenance(suggestions)
|
||||
c.JSON(http.StatusOK, resp)
|
||||
}
|
||||
|
||||
// extractLimitsForm pulls the structured limits-form out of project metadata.
|
||||
func extractLimitsForm(p *iace.Project) map[string]any {
|
||||
if len(p.Metadata) == 0 {
|
||||
return nil
|
||||
}
|
||||
var md map[string]any
|
||||
if err := json.Unmarshal(p.Metadata, &md); err != nil {
|
||||
return nil
|
||||
}
|
||||
lf, _ := md["limits_form"].(map[string]any)
|
||||
return lf
|
||||
}
|
||||
|
||||
func countLimitsFields(lf map[string]any) int {
|
||||
n := 0
|
||||
for _, v := range lf {
|
||||
if s, ok := v.(string); ok && strings.TrimSpace(s) != "" {
|
||||
n++
|
||||
} else if arr, ok := v.([]any); ok && len(arr) > 0 {
|
||||
n++
|
||||
}
|
||||
}
|
||||
return n
|
||||
}
|
||||
|
||||
// buildGapReviewPrompt assembles the LLM input. Kept compact — the LLM
|
||||
// only needs the limits-form context, the current hazard headlines, and
|
||||
// a reminder of the pattern-id naming so its suggestions can be linked
|
||||
// back to engine output later.
|
||||
func buildGapReviewPrompt(p *iace.Project, hz []iace.Hazard, mt []iace.Mitigation, lf map[string]any) string {
|
||||
var sb strings.Builder
|
||||
sb.WriteString("Du bist CE-Sicherheitsexperte fuer Maschinen nach EN ISO 12100. ")
|
||||
sb.WriteString("Analysiere die folgende Risikobeurteilung und identifiziere FEHLENDE ")
|
||||
sb.WriteString("Gefaehrdungen oder Schutzmassnahmen, die ein erfahrener Auditor ergaenzen wuerde.\n\n")
|
||||
|
||||
sb.WriteString(fmt.Sprintf("Maschine: %s (Typ: %s, Hersteller: %s)\n",
|
||||
p.MachineName, p.MachineType, p.Manufacturer))
|
||||
if p.CEMarkingTarget != "" {
|
||||
sb.WriteString(fmt.Sprintf("CE-Ziel: %s\n", p.CEMarkingTarget))
|
||||
}
|
||||
sb.WriteString("\nGrenzen-Form (Limits & Verwendung):\n")
|
||||
for k, v := range lf {
|
||||
sb.WriteString(fmt.Sprintf("- %s: %v\n", k, truncForPrompt(v, 200)))
|
||||
}
|
||||
|
||||
sb.WriteString(fmt.Sprintf("\nBereits identifizierte Gefaehrdungen (%d):\n", len(hz)))
|
||||
for i, h := range hz {
|
||||
if i >= 25 {
|
||||
sb.WriteString(fmt.Sprintf("... und %d weitere\n", len(hz)-25))
|
||||
break
|
||||
}
|
||||
sb.WriteString(fmt.Sprintf("- [%s] %s\n", h.Category, h.Name))
|
||||
}
|
||||
|
||||
sb.WriteString(fmt.Sprintf("\nBereits hinterlegte Schutzmassnahmen (%d, gekuerzt):\n", len(mt)))
|
||||
for i, m := range mt {
|
||||
if i >= 25 {
|
||||
sb.WriteString(fmt.Sprintf("... und %d weitere\n", len(mt)-25))
|
||||
break
|
||||
}
|
||||
sb.WriteString(fmt.Sprintf("- [%s] %s\n", m.ReductionType, m.Name))
|
||||
}
|
||||
|
||||
sb.WriteString("\nAufgabe: Liste max. 8 LUECKEN als JSON-Array. Jede Luecke MUSS einer der folgenden Kategorien entsprechen ")
|
||||
sb.WriteString("und SOLL eine Norm- oder Pattern-Referenz nennen (HP-XXXX, EN ISO 12100, EN 13849, EN 13855, DGUV-Info, OSHA 29 CFR).\n")
|
||||
sb.WriteString("Kategorien: mechanical_hazard, electrical_hazard, thermal_hazard, noise_vibration, ergonomic, ")
|
||||
sb.WriteString("material_environmental, pneumatic_hydraulic, radiation_hazard.\n\n")
|
||||
sb.WriteString(`Antworte NUR mit JSON, keine Erklaerung:
|
||||
[
|
||||
{"kind":"hazard","title":"...","description":"...","category":"...","norm_refs":["EN ISO 12100"],"confidence":"high","rationale":"..."},
|
||||
{"kind":"mitigation","title":"...","description":"...","hazard_ref":"Name der bestehenden Gefahr","norm_refs":["DGUV 209-072"],"confidence":"medium","rationale":"..."}
|
||||
]`)
|
||||
return sb.String()
|
||||
}
|
||||
|
||||
func truncForPrompt(v any, max int) string {
|
||||
s := fmt.Sprintf("%v", v)
|
||||
if len(s) <= max {
|
||||
return s
|
||||
}
|
||||
return s[:max] + "…"
|
||||
}
|
||||
|
||||
// callLLMForGapReview sends the prompt and parses the JSON suggestion list.
|
||||
func callLLMForGapReview(ctx context.Context, registry *llm.ProviderRegistry, prompt string) ([]GapSuggestion, string, error) {
|
||||
if registry == nil {
|
||||
return nil, "", fmt.Errorf("no LLM registry configured")
|
||||
}
|
||||
provider, err := registry.GetAvailable(ctx)
|
||||
if err != nil {
|
||||
return nil, "", fmt.Errorf("no LLM provider available: %w", err)
|
||||
}
|
||||
resp, err := provider.Chat(ctx, &llm.ChatRequest{
|
||||
Messages: []llm.Message{{Role: "user", Content: prompt}},
|
||||
Temperature: 0.25,
|
||||
MaxTokens: 2000,
|
||||
})
|
||||
if err != nil {
|
||||
return nil, "", fmt.Errorf("llm chat: %w", err)
|
||||
}
|
||||
|
||||
body := strings.TrimSpace(resp.Message.Content)
|
||||
// LLMs occasionally wrap JSON in ```json … ``` fences; strip them.
|
||||
body = strings.TrimPrefix(body, "```json")
|
||||
body = strings.TrimPrefix(body, "```")
|
||||
body = strings.TrimSuffix(body, "```")
|
||||
body = strings.TrimSpace(body)
|
||||
|
||||
// Find first '[' so any leading prose is ignored.
|
||||
if i := strings.Index(body, "["); i > 0 {
|
||||
body = body[i:]
|
||||
}
|
||||
var out []GapSuggestion
|
||||
if err := json.Unmarshal([]byte(body), &out); err != nil {
|
||||
return nil, "", fmt.Errorf("parse llm response: %w (body=%.200s)", err, body)
|
||||
}
|
||||
return out, provider.Name(), nil
|
||||
}
|
||||
|
||||
// filterAndProvenance drops obviously malformed suggestions and stamps
|
||||
// every survivor with a `confidence` default. Pure-free-form suggestions
|
||||
// without any norm reference are demoted to "low".
|
||||
func filterAndProvenance(in []GapSuggestion) []GapSuggestion {
|
||||
out := make([]GapSuggestion, 0, len(in))
|
||||
for _, s := range in {
|
||||
if strings.TrimSpace(s.Title) == "" || s.Kind == "" {
|
||||
continue
|
||||
}
|
||||
if s.Confidence == "" {
|
||||
if len(s.NormRefs) == 0 && s.PatternRef == "" {
|
||||
s.Confidence = "low"
|
||||
} else {
|
||||
s.Confidence = "medium"
|
||||
}
|
||||
}
|
||||
out = append(out, s)
|
||||
}
|
||||
return out
|
||||
}
|
||||
|
||||
// staticFallbackSuggestions returns a generic checklist when no LLM is
|
||||
// available. Conservative, all confidence="low".
|
||||
func staticFallbackSuggestions(hz []iace.Hazard) []GapSuggestion {
|
||||
hasMechanical := false
|
||||
for _, h := range hz {
|
||||
if strings.Contains(h.Category, "mechanical") {
|
||||
hasMechanical = true
|
||||
break
|
||||
}
|
||||
}
|
||||
out := []GapSuggestion{
|
||||
{
|
||||
Kind: "hazard", Title: "Fuss-Quetschung unter absenkendem Werkstueck/Hubeinheit",
|
||||
Description: "Wenn die Maschine eine Hubbewegung ausfuehrt, pruefe ob Fuesse/Beine im Verfahrbereich gequetscht werden koennen.",
|
||||
Category: "mechanical_hazard", NormRefs: []string{"EN ISO 12100 6.3.5.5"},
|
||||
Confidence: "low", Rationale: "Static checklist fallback — LLM nicht verfuegbar.",
|
||||
},
|
||||
{
|
||||
Kind: "hazard", Title: "Hand-Quetschung gegen feste Strukturen beim Hochfahren",
|
||||
Description: "Pruefe Mindestabstand zu festen Strukturen oberhalb der hoechsten Hubposition.",
|
||||
Category: "mechanical_hazard", NormRefs: []string{"EN ISO 13854"},
|
||||
Confidence: "low",
|
||||
},
|
||||
{
|
||||
Kind: "mitigation", Title: "Kriechgeschwindigkeit am Endanschlag (Hubgeraete)",
|
||||
Description: "Hubgeschwindigkeit am Ende der Verfahrbewegung auf <=15 mm/s reduzieren.",
|
||||
NormRefs: []string{"OSHA 29 CFR 1910.217 (Hand-Speed-Konstante)"},
|
||||
Confidence: "low",
|
||||
},
|
||||
}
|
||||
if !hasMechanical {
|
||||
// Trim if not a mechanical context
|
||||
out = out[:1]
|
||||
}
|
||||
return out
|
||||
}
|
||||
@@ -355,117 +355,6 @@ func registerWhistleblowerRoutes(v1 *gin.RouterGroup, h *handlers.WhistleblowerH
|
||||
}
|
||||
}
|
||||
|
||||
func registerIACERoutes(v1 *gin.RouterGroup, h *handlers.IACEHandler) {
|
||||
iaceRoutes := v1.Group("/iace")
|
||||
{
|
||||
iaceRoutes.GET("/hazard-library", h.ListHazardLibrary)
|
||||
iaceRoutes.GET("/controls-library", h.ListControlsLibrary)
|
||||
iaceRoutes.GET("/norms-library", h.ListNormsLibrary)
|
||||
iaceRoutes.GET("/lifecycle-phases", h.ListLifecyclePhases)
|
||||
iaceRoutes.GET("/roles", h.ListRoles)
|
||||
iaceRoutes.GET("/evidence-types", h.ListEvidenceTypes)
|
||||
iaceRoutes.GET("/protective-measures-library", h.ListProtectiveMeasures)
|
||||
iaceRoutes.GET("/failure-modes", h.ListFailureModes)
|
||||
iaceRoutes.GET("/operational-states", h.ListOperationalStates)
|
||||
iaceRoutes.GET("/component-library", h.ListComponentLibrary)
|
||||
iaceRoutes.GET("/energy-sources", h.ListEnergySources)
|
||||
iaceRoutes.GET("/tags", h.ListTags)
|
||||
iaceRoutes.GET("/hazard-patterns", h.ListHazardPatterns)
|
||||
iaceRoutes.POST("/projects", h.CreateProject)
|
||||
iaceRoutes.GET("/projects", h.ListProjects)
|
||||
iaceRoutes.GET("/projects/:id", h.GetProject)
|
||||
iaceRoutes.PUT("/projects/:id", h.UpdateProject)
|
||||
iaceRoutes.DELETE("/projects/:id", h.ArchiveProject)
|
||||
iaceRoutes.POST("/projects/:id/init-from-profile", h.InitFromProfile)
|
||||
iaceRoutes.POST("/projects/:id/variants", h.CreateVariant)
|
||||
iaceRoutes.GET("/projects/:id/variants", h.ListVariants)
|
||||
iaceRoutes.GET("/projects/:id/variant-gap", h.GetVariantGap)
|
||||
iaceRoutes.POST("/projects/:id/completeness-check", h.CheckCompleteness)
|
||||
iaceRoutes.POST("/projects/:id/components", h.CreateComponent)
|
||||
iaceRoutes.GET("/projects/:id/components", h.ListComponents)
|
||||
iaceRoutes.PUT("/projects/:id/components/:cid", h.UpdateComponent)
|
||||
iaceRoutes.DELETE("/projects/:id/components/:cid", h.DeleteComponent)
|
||||
iaceRoutes.POST("/projects/:id/classify", h.Classify)
|
||||
iaceRoutes.GET("/projects/:id/classifications", h.GetClassifications)
|
||||
iaceRoutes.POST("/projects/:id/classify/:regulation", h.ClassifySingle)
|
||||
iaceRoutes.POST("/projects/:id/hazards", h.CreateHazard)
|
||||
iaceRoutes.GET("/projects/:id/hazards", h.ListHazards)
|
||||
iaceRoutes.PUT("/projects/:id/hazards/:hid", h.UpdateHazard)
|
||||
iaceRoutes.POST("/projects/:id/hazards/suggest", h.SuggestHazards)
|
||||
iaceRoutes.POST("/projects/:id/match-patterns", h.MatchPatterns)
|
||||
iaceRoutes.POST("/projects/:id/parse-narrative", h.ParseNarrative)
|
||||
iaceRoutes.POST("/projects/:id/delta-analysis", h.DeltaAnalysis)
|
||||
iaceRoutes.GET("/projects/:id/fmea/export", h.ExportFMEA)
|
||||
iaceRoutes.POST("/projects/:id/components/:cid/suggest-fms", h.SuggestFailureModes)
|
||||
iaceRoutes.POST("/projects/:id/apply-patterns", h.ApplyPatternResults)
|
||||
iaceRoutes.POST("/projects/:id/hazards/:hid/suggest-measures", h.SuggestMeasuresForHazard)
|
||||
iaceRoutes.POST("/projects/:id/mitigations/:mid/suggest-evidence", h.SuggestEvidenceForMitigation)
|
||||
iaceRoutes.POST("/projects/:id/hazards/:hid/assess", h.AssessRisk)
|
||||
iaceRoutes.GET("/projects/:id/risk-summary", h.GetRiskSummary)
|
||||
iaceRoutes.GET("/projects/:id/suggested-norms", h.SuggestProjectNorms)
|
||||
iaceRoutes.POST("/projects/:id/hazards/:hid/reassess", h.ReassessRisk)
|
||||
iaceRoutes.GET("/projects/:id/mitigations", h.ListProjectMitigations)
|
||||
iaceRoutes.POST("/projects/:id/hazards/:hid/mitigations", h.CreateMitigation)
|
||||
iaceRoutes.DELETE("/projects/:id/mitigations/:mid", h.DeleteMitigation)
|
||||
iaceRoutes.PUT("/mitigations/:mid", h.UpdateMitigation)
|
||||
iaceRoutes.POST("/mitigations/:mid/verify", h.VerifyMitigation)
|
||||
iaceRoutes.POST("/projects/:id/validate-mitigation-hierarchy", h.ValidateMitigationHierarchy)
|
||||
iaceRoutes.POST("/projects/:id/evidence", h.UploadEvidence)
|
||||
iaceRoutes.GET("/projects/:id/evidence", h.ListEvidence)
|
||||
iaceRoutes.POST("/projects/:id/verification-plan", h.CreateVerificationPlan)
|
||||
iaceRoutes.PUT("/verification-plan/:vid", h.UpdateVerificationPlan)
|
||||
iaceRoutes.POST("/verification-plan/:vid/complete", h.CompleteVerification)
|
||||
iaceRoutes.GET("/projects/:id/verifications", h.ListVerificationPlans)
|
||||
iaceRoutes.POST("/projects/:id/verifications", h.CreateVerificationAlias)
|
||||
iaceRoutes.DELETE("/projects/:id/verifications/:vid", h.DeleteVerificationPlan)
|
||||
iaceRoutes.POST("/projects/:id/verifications/:vid/complete", h.CompleteVerificationAlias)
|
||||
iaceRoutes.POST("/projects/:id/tech-file/generate", h.GenerateTechFile)
|
||||
iaceRoutes.GET("/projects/:id/tech-file", h.ListTechFileSections)
|
||||
iaceRoutes.PUT("/projects/:id/tech-file/:section", h.UpdateTechFileSection)
|
||||
iaceRoutes.POST("/projects/:id/tech-file/:section/approve", h.ApproveTechFileSection)
|
||||
iaceRoutes.POST("/projects/:id/tech-file/:section/generate", h.GenerateSingleSection)
|
||||
iaceRoutes.GET("/projects/:id/tech-file/export", h.ExportTechFile)
|
||||
iaceRoutes.POST("/projects/:id/monitoring", h.CreateMonitoringEvent)
|
||||
iaceRoutes.GET("/projects/:id/monitoring", h.ListMonitoringEvents)
|
||||
iaceRoutes.PUT("/projects/:id/monitoring/:eid", h.UpdateMonitoringEvent)
|
||||
iaceRoutes.GET("/projects/:id/audit-trail", h.GetAuditTrail)
|
||||
iaceRoutes.POST("/library-search", h.SearchLibrary)
|
||||
iaceRoutes.GET("/ce-corpus-documents", h.ListCECorpusDocuments)
|
||||
iaceRoutes.POST("/projects/:id/initialize", h.InitializeProject)
|
||||
iaceRoutes.GET("/projects/:id/hazard-blocks", h.GetHazardBlocks)
|
||||
iaceRoutes.POST("/projects/:id/benchmark/import-gt", h.ImportGroundTruth)
|
||||
iaceRoutes.GET("/projects/:id/benchmark", h.RunBenchmark)
|
||||
iaceRoutes.GET("/projects/:id/benchmark/summary", h.GetBenchmarkSummary)
|
||||
iaceRoutes.GET("/projects/:id/hazards/:hid/regulatory-hints", h.EnrichHazardWithRegulations)
|
||||
iaceRoutes.GET("/projects/:id/mitigations/:mid/regulatory-hints", h.EnrichMitigationWithRegulations)
|
||||
iaceRoutes.GET("/projects/:id/regulatory-hints", h.EnrichProjectHazardsBatch)
|
||||
iaceRoutes.POST("/projects/:id/tech-file/:section/enrich", h.EnrichTechFileSection)
|
||||
|
||||
// Production Lines
|
||||
iaceRoutes.POST("/production-lines", h.CreateProductionLine)
|
||||
iaceRoutes.GET("/production-lines", h.ListProductionLines)
|
||||
iaceRoutes.GET("/production-lines/:lid/dashboard", h.GetProductionLineDashboard)
|
||||
iaceRoutes.POST("/production-lines/:lid/stations", h.AddStationToLine)
|
||||
iaceRoutes.DELETE("/production-lines/:lid/stations/:sid", h.RemoveStationFromLine)
|
||||
|
||||
// CE x Compliance Crossover
|
||||
iaceRoutes.GET("/projects/:id/compliance-triggers", h.GetComplianceTriggers)
|
||||
iaceRoutes.GET("/compliance-faq", h.GetComplianceFAQ)
|
||||
|
||||
// Clarifications — aggregated open questions per project
|
||||
iaceRoutes.GET("/projects/:id/clarifications", h.ListClarifications)
|
||||
iaceRoutes.GET("/projects/:id/clarifications.csv", h.ExportClarificationsCSV)
|
||||
iaceRoutes.GET("/projects/:id/clarifications.html", h.ExportClarificationsHTML)
|
||||
iaceRoutes.GET("/projects/:id/clarifications/:cid/detail", h.ListClarificationDetail)
|
||||
iaceRoutes.POST("/projects/:id/clarifications/:cid/answer", h.AnswerClarification)
|
||||
iaceRoutes.POST("/projects/:id/clarifications/:cid/comment", h.PostClarificationComment)
|
||||
|
||||
// Customer-Standard Reuse (migration 031): pull reusable mitigations
|
||||
// across prior projects of the same customer.
|
||||
iaceRoutes.GET("/projects/:id/customer-standards", h.ListCustomerStandardSuggestions)
|
||||
iaceRoutes.POST("/projects/:id/customer-standards/import", h.ImportCustomerStandardSuggestion)
|
||||
}
|
||||
}
|
||||
|
||||
func registerMaximizerRoutes(v1 *gin.RouterGroup, h *handlers.MaximizerHandlers) {
|
||||
m := v1.Group("/maximizer")
|
||||
|
||||
@@ -0,0 +1,136 @@
|
||||
package app
|
||||
|
||||
// IACE route registration extracted from routes.go (2026-05-21) because
|
||||
// routes.go hit the 500-LOC hard cap when the LLM gap-review endpoint
|
||||
// (Task #7) was added. Splitting keeps every routes file under the cap
|
||||
// without changing behaviour — `registerRoutes` in routes.go still
|
||||
// invokes `registerIACERoutes` exactly once at the same point in the
|
||||
// startup sequence.
|
||||
|
||||
import (
|
||||
"github.com/breakpilot/ai-compliance-sdk/internal/api/handlers"
|
||||
"github.com/gin-gonic/gin"
|
||||
)
|
||||
|
||||
func registerIACERoutes(v1 *gin.RouterGroup, h *handlers.IACEHandler) {
|
||||
iaceRoutes := v1.Group("/iace")
|
||||
{
|
||||
// Library catalogues (read-only reference data).
|
||||
iaceRoutes.GET("/hazard-library", h.ListHazardLibrary)
|
||||
iaceRoutes.GET("/controls-library", h.ListControlsLibrary)
|
||||
iaceRoutes.GET("/norms-library", h.ListNormsLibrary)
|
||||
iaceRoutes.GET("/lifecycle-phases", h.ListLifecyclePhases)
|
||||
iaceRoutes.GET("/roles", h.ListRoles)
|
||||
iaceRoutes.GET("/evidence-types", h.ListEvidenceTypes)
|
||||
iaceRoutes.GET("/protective-measures-library", h.ListProtectiveMeasures)
|
||||
iaceRoutes.GET("/failure-modes", h.ListFailureModes)
|
||||
iaceRoutes.GET("/operational-states", h.ListOperationalStates)
|
||||
iaceRoutes.GET("/component-library", h.ListComponentLibrary)
|
||||
iaceRoutes.GET("/energy-sources", h.ListEnergySources)
|
||||
iaceRoutes.GET("/tags", h.ListTags)
|
||||
iaceRoutes.GET("/hazard-patterns", h.ListHazardPatterns)
|
||||
|
||||
// Project CRUD.
|
||||
iaceRoutes.POST("/projects", h.CreateProject)
|
||||
iaceRoutes.GET("/projects", h.ListProjects)
|
||||
iaceRoutes.GET("/projects/:id", h.GetProject)
|
||||
iaceRoutes.PUT("/projects/:id", h.UpdateProject)
|
||||
iaceRoutes.DELETE("/projects/:id", h.ArchiveProject)
|
||||
iaceRoutes.POST("/projects/:id/init-from-profile", h.InitFromProfile)
|
||||
iaceRoutes.POST("/projects/:id/variants", h.CreateVariant)
|
||||
iaceRoutes.GET("/projects/:id/variants", h.ListVariants)
|
||||
iaceRoutes.GET("/projects/:id/variant-gap", h.GetVariantGap)
|
||||
iaceRoutes.POST("/projects/:id/completeness-check", h.CheckCompleteness)
|
||||
|
||||
// Components.
|
||||
iaceRoutes.POST("/projects/:id/components", h.CreateComponent)
|
||||
iaceRoutes.GET("/projects/:id/components", h.ListComponents)
|
||||
iaceRoutes.PUT("/projects/:id/components/:cid", h.UpdateComponent)
|
||||
iaceRoutes.DELETE("/projects/:id/components/:cid", h.DeleteComponent)
|
||||
|
||||
// Classification + hazards.
|
||||
iaceRoutes.POST("/projects/:id/classify", h.Classify)
|
||||
iaceRoutes.GET("/projects/:id/classifications", h.GetClassifications)
|
||||
iaceRoutes.POST("/projects/:id/classify/:regulation", h.ClassifySingle)
|
||||
iaceRoutes.POST("/projects/:id/hazards", h.CreateHazard)
|
||||
iaceRoutes.GET("/projects/:id/hazards", h.ListHazards)
|
||||
iaceRoutes.PUT("/projects/:id/hazards/:hid", h.UpdateHazard)
|
||||
iaceRoutes.POST("/projects/:id/hazards/suggest", h.SuggestHazards)
|
||||
iaceRoutes.POST("/projects/:id/match-patterns", h.MatchPatterns)
|
||||
iaceRoutes.POST("/projects/:id/parse-narrative", h.ParseNarrative)
|
||||
iaceRoutes.POST("/projects/:id/delta-analysis", h.DeltaAnalysis)
|
||||
iaceRoutes.POST("/projects/:id/llm-gap-review", h.LLMGapReview)
|
||||
iaceRoutes.GET("/projects/:id/fmea/export", h.ExportFMEA)
|
||||
iaceRoutes.POST("/projects/:id/components/:cid/suggest-fms", h.SuggestFailureModes)
|
||||
iaceRoutes.POST("/projects/:id/apply-patterns", h.ApplyPatternResults)
|
||||
iaceRoutes.POST("/projects/:id/hazards/:hid/suggest-measures", h.SuggestMeasuresForHazard)
|
||||
iaceRoutes.POST("/projects/:id/mitigations/:mid/suggest-evidence", h.SuggestEvidenceForMitigation)
|
||||
iaceRoutes.POST("/projects/:id/hazards/:hid/assess", h.AssessRisk)
|
||||
iaceRoutes.GET("/projects/:id/risk-summary", h.GetRiskSummary)
|
||||
iaceRoutes.GET("/projects/:id/suggested-norms", h.SuggestProjectNorms)
|
||||
iaceRoutes.POST("/projects/:id/hazards/:hid/reassess", h.ReassessRisk)
|
||||
|
||||
// Mitigations + evidence + verification.
|
||||
iaceRoutes.GET("/projects/:id/mitigations", h.ListProjectMitigations)
|
||||
iaceRoutes.POST("/projects/:id/hazards/:hid/mitigations", h.CreateMitigation)
|
||||
iaceRoutes.DELETE("/projects/:id/mitigations/:mid", h.DeleteMitigation)
|
||||
iaceRoutes.PUT("/mitigations/:mid", h.UpdateMitigation)
|
||||
iaceRoutes.POST("/mitigations/:mid/verify", h.VerifyMitigation)
|
||||
iaceRoutes.POST("/projects/:id/validate-mitigation-hierarchy", h.ValidateMitigationHierarchy)
|
||||
iaceRoutes.POST("/projects/:id/evidence", h.UploadEvidence)
|
||||
iaceRoutes.GET("/projects/:id/evidence", h.ListEvidence)
|
||||
iaceRoutes.POST("/projects/:id/verification-plan", h.CreateVerificationPlan)
|
||||
iaceRoutes.PUT("/verification-plan/:vid", h.UpdateVerificationPlan)
|
||||
iaceRoutes.POST("/verification-plan/:vid/complete", h.CompleteVerification)
|
||||
iaceRoutes.GET("/projects/:id/verifications", h.ListVerificationPlans)
|
||||
iaceRoutes.POST("/projects/:id/verifications", h.CreateVerificationAlias)
|
||||
iaceRoutes.DELETE("/projects/:id/verifications/:vid", h.DeleteVerificationPlan)
|
||||
iaceRoutes.POST("/projects/:id/verifications/:vid/complete", h.CompleteVerificationAlias)
|
||||
|
||||
// Tech file + monitoring + audit.
|
||||
iaceRoutes.POST("/projects/:id/tech-file/generate", h.GenerateTechFile)
|
||||
iaceRoutes.GET("/projects/:id/tech-file", h.ListTechFileSections)
|
||||
iaceRoutes.PUT("/projects/:id/tech-file/:section", h.UpdateTechFileSection)
|
||||
iaceRoutes.POST("/projects/:id/tech-file/:section/approve", h.ApproveTechFileSection)
|
||||
iaceRoutes.POST("/projects/:id/tech-file/:section/generate", h.GenerateSingleSection)
|
||||
iaceRoutes.GET("/projects/:id/tech-file/export", h.ExportTechFile)
|
||||
iaceRoutes.POST("/projects/:id/monitoring", h.CreateMonitoringEvent)
|
||||
iaceRoutes.GET("/projects/:id/monitoring", h.ListMonitoringEvents)
|
||||
iaceRoutes.PUT("/projects/:id/monitoring/:eid", h.UpdateMonitoringEvent)
|
||||
iaceRoutes.GET("/projects/:id/audit-trail", h.GetAuditTrail)
|
||||
|
||||
// Library + corpus + benchmark.
|
||||
iaceRoutes.POST("/library-search", h.SearchLibrary)
|
||||
iaceRoutes.GET("/ce-corpus-documents", h.ListCECorpusDocuments)
|
||||
iaceRoutes.POST("/projects/:id/initialize", h.InitializeProject)
|
||||
iaceRoutes.GET("/projects/:id/hazard-blocks", h.GetHazardBlocks)
|
||||
iaceRoutes.POST("/projects/:id/benchmark/import-gt", h.ImportGroundTruth)
|
||||
iaceRoutes.GET("/projects/:id/benchmark", h.RunBenchmark)
|
||||
iaceRoutes.GET("/projects/:id/benchmark/summary", h.GetBenchmarkSummary)
|
||||
|
||||
// Regulatory enrichment.
|
||||
iaceRoutes.GET("/projects/:id/hazards/:hid/regulatory-hints", h.EnrichHazardWithRegulations)
|
||||
iaceRoutes.GET("/projects/:id/mitigations/:mid/regulatory-hints", h.EnrichMitigationWithRegulations)
|
||||
iaceRoutes.GET("/projects/:id/regulatory-hints", h.EnrichProjectHazardsBatch)
|
||||
iaceRoutes.POST("/projects/:id/tech-file/:section/enrich", h.EnrichTechFileSection)
|
||||
|
||||
// Production lines.
|
||||
iaceRoutes.POST("/production-lines", h.CreateProductionLine)
|
||||
iaceRoutes.GET("/production-lines", h.ListProductionLines)
|
||||
iaceRoutes.GET("/production-lines/:lid/dashboard", h.GetProductionLineDashboard)
|
||||
iaceRoutes.POST("/production-lines/:lid/stations", h.AddStationToLine)
|
||||
iaceRoutes.DELETE("/production-lines/:lid/stations/:sid", h.RemoveStationFromLine)
|
||||
|
||||
// CE x Compliance crossover + clarifications + customer standards.
|
||||
iaceRoutes.GET("/projects/:id/compliance-triggers", h.GetComplianceTriggers)
|
||||
iaceRoutes.GET("/compliance-faq", h.GetComplianceFAQ)
|
||||
iaceRoutes.GET("/projects/:id/clarifications", h.ListClarifications)
|
||||
iaceRoutes.GET("/projects/:id/clarifications.csv", h.ExportClarificationsCSV)
|
||||
iaceRoutes.GET("/projects/:id/clarifications.html", h.ExportClarificationsHTML)
|
||||
iaceRoutes.GET("/projects/:id/clarifications/:cid/detail", h.ListClarificationDetail)
|
||||
iaceRoutes.POST("/projects/:id/clarifications/:cid/answer", h.AnswerClarification)
|
||||
iaceRoutes.POST("/projects/:id/clarifications/:cid/comment", h.PostClarificationComment)
|
||||
iaceRoutes.GET("/projects/:id/customer-standards", h.ListCustomerStandardSuggestions)
|
||||
iaceRoutes.POST("/projects/:id/customer-standards/import", h.ImportCustomerStandardSuggestion)
|
||||
}
|
||||
}
|
||||
@@ -104,39 +104,14 @@ func GetProjectComplianceTriggers(hazards []Hazard, patterns []HazardPattern) *C
|
||||
}
|
||||
}
|
||||
|
||||
// AllPatterns returns every hazard pattern from all pattern sources.
|
||||
// This mirrors the aggregation in NewPatternEngine but returns just the slice.
|
||||
// AllPatterns returns every registered hazard pattern. Delegates to
|
||||
// collectAllPatterns() in pattern_registry.go so new pattern sources only
|
||||
// need to be added in one place. Pre-2026-05-21 this function maintained
|
||||
// a duplicate enumeration which silently drifted from the registry —
|
||||
// CRA, ISO12100-gap, robot-cell, CNC, VDMA, textile-agri, GT-bremse and
|
||||
// secondary-harm patterns were invisible to AllPatterns callers.
|
||||
func AllPatterns() []HazardPattern {
|
||||
p := GetBuiltinHazardPatterns()
|
||||
p = append(p, GetExtendedHazardPatterns()...)
|
||||
p = append(p, GetPressHazardPatterns()...)
|
||||
p = append(p, GetCobotHazardPatterns()...)
|
||||
p = append(p, GetOperationalHazardPatterns()...)
|
||||
p = append(p, GetDGUVExtendedPatterns()...)
|
||||
p = append(p, GetExtendedHazardPatterns2()...)
|
||||
p = append(p, GetElevatorPatterns()...)
|
||||
p = append(p, GetAGVAgriPatterns()...)
|
||||
p = append(p, GetFoodProcessingPatterns()...)
|
||||
p = append(p, GetPackagingPatterns()...)
|
||||
p = append(p, GetLaserPatterns()...)
|
||||
p = append(p, GetMedicalDevicePatterns()...)
|
||||
p = append(p, GetPressureEquipmentPatterns()...)
|
||||
p = append(p, GetConstructionPatterns()...)
|
||||
p = append(p, GetForestryConveyorPatterns()...)
|
||||
p = append(p, GetPlasticsMetalPatterns()...)
|
||||
p = append(p, GetWeldingGlassTextilePatterns()...)
|
||||
p = append(p, GetSpecificMachinePatterns()...)
|
||||
p = append(p, GetSpecificMachinePatterns2()...)
|
||||
p = append(p, GetCyberExtendedPatterns()...)
|
||||
p = append(p, GetCyberExtendedPatterns2()...)
|
||||
p = append(p, GetCyberExtendedPatterns3()...)
|
||||
p = append(p, GetWorkshopPatterns()...)
|
||||
p = append(p, GetMaintenanceExtPatterns()...)
|
||||
p = append(p, GetFinalPatternsA()...)
|
||||
p = append(p, GetFinalPatternsB()...)
|
||||
p = append(p, GetFinalPatternsC()...)
|
||||
p = append(p, GetFinalPatternsD()...)
|
||||
return p
|
||||
return collectAllPatterns()
|
||||
}
|
||||
|
||||
// extractPatternIDs scans a text for "HP" followed by digits and adds
|
||||
|
||||
@@ -81,6 +81,10 @@ func (e *DocumentExporter) ExportPDF(
|
||||
e.pdfClassifications(pdf, classifications)
|
||||
}
|
||||
|
||||
// --- Quellen & Lizenzen (Stufe 4 Attribution-Renderer, Task #29) ---
|
||||
pdf.AddPage()
|
||||
e.pdfSourcesAppendix(pdf, hazards, mitigations)
|
||||
|
||||
// --- Footer on every page ---
|
||||
pdf.SetFooterFunc(func() {
|
||||
pdf.SetY(-15)
|
||||
|
||||
@@ -0,0 +1,134 @@
|
||||
package iace
|
||||
|
||||
// Sources & Licenses appendix for the IACE Tech-File PDF export.
|
||||
// Stufe 4 of the Attribution Renderer (Task #29).
|
||||
//
|
||||
// The IACE engine generates hazards from BreakPilot Pattern-IDs that
|
||||
// themselves cite ISO 12100, EN 13849, EN ISO 13855 etc. Those norm
|
||||
// identifiers are R3 (DIN/EN copyright — identifier-only). The
|
||||
// pattern-engine output itself is R3 (BreakPilot own work). OSHA values
|
||||
// surfaced via the minimum-distance library are R1 (US Federal PD).
|
||||
//
|
||||
// This appendix aggregates what the Tech-File ACTUALLY cited and shows
|
||||
// it grouped by license rule with the mandatory disclaimer that the
|
||||
// per-export footer cannot be replaced by a pauschal Impressum-Hinweis.
|
||||
|
||||
import (
|
||||
"sort"
|
||||
"strings"
|
||||
|
||||
"github.com/jung-kurt/gofpdf"
|
||||
)
|
||||
|
||||
// pdfSourcesAppendix renders the "Quellen & Lizenzen" appendix page.
|
||||
// Called by ExportPDF after the regulatory classifications block.
|
||||
func (e *DocumentExporter) pdfSourcesAppendix(pdf *gofpdf.Fpdf, hazards []Hazard, mitigations []Mitigation) {
|
||||
pdf.SetFont("Helvetica", "B", 14)
|
||||
pdf.SetTextColor(124, 58, 237)
|
||||
pdf.CellFormat(0, 10, "Quellen und Lizenzen", "", 1, "L", false, 0, "")
|
||||
pdf.Ln(2)
|
||||
|
||||
pdf.SetFont("Helvetica", "", 9)
|
||||
pdf.SetTextColor(80, 80, 80)
|
||||
intro := "Diese Risikobeurteilung verwendet die deterministische BreakPilot IACE " +
|
||||
"Pattern-Engine sowie zitierte Sicherheitsnormen. Die folgende Aufstellung " +
|
||||
"listet die konkret in diesem Dokument zitierten Quellen mit ihrer Lizenzregel."
|
||||
pdf.MultiCell(0, 5, intro, "", "L", false)
|
||||
pdf.Ln(3)
|
||||
|
||||
pdf.SetFont("Helvetica", "B", 10)
|
||||
pdf.SetTextColor(0, 0, 0)
|
||||
pdf.CellFormat(0, 7, "R3 — BreakPilot Pattern-Engine (Eigenwerk, Identifier-Verweis)", "", 1, "L", false, 0, "")
|
||||
pdf.SetFont("Helvetica", "", 9)
|
||||
pdf.SetTextColor(60, 60, 60)
|
||||
pdf.MultiCell(0, 5,
|
||||
"Alle in diesem Dokument referenzierten HP-XXXX-Identifier stammen aus der "+
|
||||
"BreakPilot IACE Pattern-Library (Eigenwerk). Keine externe Lizenz-Attribution "+
|
||||
"erforderlich.", "", "L", false)
|
||||
pdf.Ln(3)
|
||||
|
||||
norms := extractCitedNorms(hazards, mitigations)
|
||||
if len(norms) > 0 {
|
||||
pdf.SetFont("Helvetica", "B", 10)
|
||||
pdf.SetTextColor(0, 0, 0)
|
||||
pdf.CellFormat(0, 7, "R3 — Sicherheitsnormen (DIN/EN/ISO/IEC, Identifier-Verweis)", "", 1, "L", false, 0, "")
|
||||
pdf.SetFont("Helvetica", "", 9)
|
||||
pdf.SetTextColor(60, 60, 60)
|
||||
pdf.MultiCell(0, 5,
|
||||
"DIN-/EN-/ISO-/IEC-Normen unterliegen dem Urheberrecht der jeweiligen "+
|
||||
"Normungsorganisation. In diesem Dokument werden Normen ausschliesslich "+
|
||||
"als Identifier (Norm-Nummer und Abschnitt) zitiert; kein Volltext aus "+
|
||||
"diesen Normen wurde reproduziert. Konkret zitiert:", "", "L", false)
|
||||
pdf.Ln(1)
|
||||
for _, n := range norms {
|
||||
pdf.CellFormat(0, 5, " • "+n, "", 1, "L", false, 0, "")
|
||||
}
|
||||
pdf.Ln(2)
|
||||
}
|
||||
|
||||
pdf.SetFont("Helvetica", "B", 10)
|
||||
pdf.SetTextColor(0, 0, 0)
|
||||
pdf.CellFormat(0, 7, "R1 — Hoheitsrecht / Public Domain (woertlich uebernehmbar)", "", 1, "L", false, 0, "")
|
||||
pdf.SetFont("Helvetica", "", 9)
|
||||
pdf.SetTextColor(60, 60, 60)
|
||||
pdf.MultiCell(0, 5,
|
||||
"Soweit Werte aus US Federal Code (OSHA 29 CFR Subpart O) oder EU-Recht "+
|
||||
"(Maschinenverordnung 2023/1230, AI Act 2024/1689) referenziert werden, "+
|
||||
"sind diese als R1 woertlich uebernehmbar. Keine Attribution-Pflicht.", "", "L", false)
|
||||
pdf.Ln(4)
|
||||
|
||||
pdf.SetFont("Helvetica", "I", 8)
|
||||
pdf.SetTextColor(120, 120, 120)
|
||||
pdf.MultiCell(0, 4,
|
||||
"Hinweis: Pauschalvermerke in AGB oder Impressum reichen rechtlich nicht — "+
|
||||
"die werknahe Attribution erfolgt durch diese Quellenseite. Vollstaendiges "+
|
||||
"Quellenverzeichnis aller im BreakPilot-System verwendeten Quellen siehe "+
|
||||
"/sdk/licenses im Web-Frontend.", "", "L", false)
|
||||
}
|
||||
|
||||
// extractCitedNorms scans hazard descriptions + scenario fields for
|
||||
// recognised norm identifiers. The detection is intentionally narrow:
|
||||
// only well-known prefixes (EN/ISO/IEC/DIN) and only when followed by
|
||||
// digits, so free-form prose is not turned into spurious citations.
|
||||
func extractCitedNorms(hz []Hazard, mt []Mitigation) []string {
|
||||
seen := make(map[string]bool)
|
||||
consider := func(s string) {
|
||||
fields := strings.FieldsFunc(s, func(r rune) bool {
|
||||
return r == ' ' || r == ',' || r == ';' || r == '\n' || r == ';' || r == '('
|
||||
})
|
||||
for i := 0; i < len(fields)-1; i++ {
|
||||
head := strings.ToUpper(strings.TrimSpace(fields[i]))
|
||||
next := strings.TrimSpace(fields[i+1])
|
||||
if !(head == "EN" || head == "ISO" || head == "IEC" || head == "DIN") {
|
||||
continue
|
||||
}
|
||||
if next == "" {
|
||||
continue
|
||||
}
|
||||
// Accept "ISO 12100", "EN 13849-1", "DIN EN 60204-1" etc.
|
||||
if next[0] >= '0' && next[0] <= '9' {
|
||||
seen[head+" "+next] = true
|
||||
} else if head == "DIN" && (strings.HasPrefix(strings.ToUpper(next), "EN") || strings.HasPrefix(strings.ToUpper(next), "ISO")) && i+2 < len(fields) {
|
||||
third := strings.TrimSpace(fields[i+2])
|
||||
if third != "" && third[0] >= '0' && third[0] <= '9' {
|
||||
seen[head+" "+next+" "+third] = true
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
for _, h := range hz {
|
||||
consider(h.Description)
|
||||
consider(h.Scenario)
|
||||
consider(h.PossibleHarm)
|
||||
}
|
||||
for _, m := range mt {
|
||||
consider(m.Description)
|
||||
consider(m.Name)
|
||||
}
|
||||
out := make([]string, 0, len(seen))
|
||||
for k := range seen {
|
||||
out = append(out, k)
|
||||
}
|
||||
sort.Strings(out)
|
||||
return out
|
||||
}
|
||||
@@ -83,6 +83,12 @@ type HazardPattern struct {
|
||||
// feeds into the PLr (required Performance Level) computation,
|
||||
// see ComputePLr.
|
||||
DefaultAvoidability int `json:"default_avoidability,omitempty"` // 1 or 2
|
||||
// SecondaryHarms describes consequential damage chains beyond the
|
||||
// classical IACE Hazard→Harm step: end-customer safety, product
|
||||
// liability, food safety, environmental, reputation, financial.
|
||||
// See secondary_harms.go and the strategy discussion (2026-05-20).
|
||||
// Empty for hazards with no downstream chain.
|
||||
SecondaryHarms []SecondaryHarm `json:"secondary_harms,omitempty"`
|
||||
}
|
||||
|
||||
// ComputePLr returns the required Performance Level (PLr) per EN ISO
|
||||
|
||||
@@ -0,0 +1,96 @@
|
||||
package iace
|
||||
|
||||
// Body-part-specific crush hazards at lift / hoist / scissor-lift endstops.
|
||||
// Bridges the gap that the Kistenhubgeraet re-init exposed: the abstract
|
||||
// "Bremse versagt bei Absenkbewegung" pattern fires, but the concrete
|
||||
// "Fuss unter absenkender Hubplattform" body-part variant did not exist.
|
||||
//
|
||||
// Each pattern restricts to lift-family machine types via MachineTypes,
|
||||
// so a press / CNC / textile project does not pick them up. Mitigations
|
||||
// reference the new M600-M604 (lift endstop) library plus the existing
|
||||
// M001 (geometry), M002 (safety distance), M141 (warning sign).
|
||||
|
||||
func GetLiftEndstopPatterns() []HazardPattern {
|
||||
liftTypes := []string{"lift", "hoist", "elevator", "scissor_lift"}
|
||||
return []HazardPattern{
|
||||
{
|
||||
ID: "HP2100",
|
||||
NameDE: "Fuss-Quetschung unter absenkender Hubplattform am Bodenanschlag",
|
||||
NameEN: "Foot crush under descending lift platform at floor stop",
|
||||
RequiredComponentTags: []string{"crush_point", "gravity_risk", "person_under_load"},
|
||||
RequiredEnergyTags: []string{"gravitational"},
|
||||
MachineTypes: liftTypes,
|
||||
GeneratedHazardCats: []string{"mechanical_hazard"},
|
||||
SuggestedMeasureIDs: []string{"M600", "M601", "M604", "M141"},
|
||||
Priority: 92,
|
||||
ScenarioDE: "Fuss oder Bein des Bedieners gelangt waehrend des Absenkvorgangs unter die " +
|
||||
"Hubplattform. Bei Erreichen der unteren Endlage wird der Fuss zwischen Plattform " +
|
||||
"und Boden gequetscht.",
|
||||
TriggerDE: "Unsachgemaesse Position des Bedieners beim Be-/Entladen, fehlende Schaltleiste, fehlender Trittschutz",
|
||||
HarmDE: "Fussquetschung, Mittelfussfraktur, Zehenamputation",
|
||||
AffectedDE: "Bediener, Wartungspersonal",
|
||||
ZoneDE: "Bodenbereich unter Hubplattform, umlaufende Spalte",
|
||||
DefaultSeverity: 4,
|
||||
DefaultExposure: 3,
|
||||
DefaultAvoidability: 2,
|
||||
ISO12100Section: "6.3.5.5 Quetschen — Mindestabstaende",
|
||||
ClarificationQuestionsDE: []string{
|
||||
"Ist eine umlaufende Quetsch-Schaltleiste an der Plattformunterkante verbaut?",
|
||||
"Ist die Hubgeschwindigkeit am unteren Endanschlag auf <=15 mm/s reduziert (siehe M600)?",
|
||||
"Verhindert ein Trittblech / Unterfahrschutz das Hineinfahren von Fuessen?",
|
||||
},
|
||||
},
|
||||
{
|
||||
ID: "HP2101",
|
||||
NameDE: "Hand- oder Koerper-Quetschung gegen feste Struktur beim Hochfahren der Hubeinheit",
|
||||
NameEN: "Hand or body crush against fixed structure during lift upward travel",
|
||||
RequiredComponentTags: []string{"crush_point", "gravity_risk"},
|
||||
RequiredEnergyTags: []string{"gravitational"},
|
||||
MachineTypes: liftTypes,
|
||||
GeneratedHazardCats: []string{"mechanical_hazard"},
|
||||
SuggestedMeasureIDs: []string{"M602", "M603", "M600", "M141"},
|
||||
Priority: 90,
|
||||
ScenarioDE: "Beim Hochfahren der Last gelangen Hand oder Koerperteile des Bedieners " +
|
||||
"zwischen die hoechste Position der Hubeinheit (z.B. mit beladener Palette) und " +
|
||||
"eine feste Struktur oberhalb (Decke, Vorbau, Querbalken einer umschliessenden Anlage).",
|
||||
TriggerDE: "Eingriff in den Verfahrweg waehrend Hubvorgang, fehlende konstruktive Begrenzung der Endlage",
|
||||
HarmDE: "Hand- oder Armquetschung, im Extremfall Brustkorbkompression",
|
||||
AffectedDE: "Bediener, Einrichter, Wartungspersonal",
|
||||
ZoneDE: "Oberhalb hoechster Hubposition, Vorbau/Decke der umschliessenden Anlage",
|
||||
DefaultSeverity: 4,
|
||||
DefaultExposure: 2,
|
||||
DefaultAvoidability: 2,
|
||||
ISO12100Section: "6.3.5.5 Quetschen — Mindestabstaende",
|
||||
ClarificationQuestionsDE: []string{
|
||||
"Welcher Mindestabstand zu festen Strukturen oberhalb der hoechsten Hubposition ist gegeben? (Empfehlung: 120 mm fuer Kopf, 100 mm fuer Hand)",
|
||||
"Ist der Tippbetrieb (Hold-to-run) durch ein Testprotokoll mit Stop-Zeit-Messung verifiziert?",
|
||||
"Existiert eine redundante Hardware-Endlage zusaetzlich zur Software-Begrenzung?",
|
||||
},
|
||||
},
|
||||
{
|
||||
ID: "HP2102",
|
||||
NameDE: "Quetschung Bein/Koerper zwischen Hubeinheit und seitlicher Struktur",
|
||||
NameEN: "Leg/body crush between lift unit and lateral structure",
|
||||
RequiredComponentTags: []string{"crush_point", "gravity_risk", "moving_part"},
|
||||
RequiredEnergyTags: []string{"gravitational"},
|
||||
MachineTypes: liftTypes,
|
||||
GeneratedHazardCats: []string{"mechanical_hazard"},
|
||||
SuggestedMeasureIDs: []string{"M602", "M601", "M141"},
|
||||
Priority: 85,
|
||||
ScenarioDE: "Person befindet sich seitlich neben der Hubeinheit und wird waehrend " +
|
||||
"der Bewegung gegen eine feste Struktur (Regalwand, Stuetze, andere Anlage) gequetscht.",
|
||||
TriggerDE: "Aufenthalt in Quetschzone bei Bewegung, fehlende Absperrung",
|
||||
HarmDE: "Beinfraktur, Beckenquetschung",
|
||||
AffectedDE: "Bediener, vorbeigehende Personen",
|
||||
ZoneDE: "Seitlicher Bereich neben Hubeinheit, Lichte Weite zu festen Strukturen",
|
||||
DefaultSeverity: 4,
|
||||
DefaultExposure: 2,
|
||||
DefaultAvoidability: 2,
|
||||
ISO12100Section: "6.3.5.5 Quetschen — Mindestabstaende",
|
||||
ClarificationQuestionsDE: []string{
|
||||
"Welcher Sicherheitsabstand zu seitlichen festen Strukturen ist gegeben (Empfehlung 500 mm Koerperdurchgang)?",
|
||||
"Ist der Bereich seitlich der Hubeinheit als Gefahrenzone markiert oder abgeschrankt?",
|
||||
},
|
||||
},
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,127 @@
|
||||
package iace
|
||||
|
||||
// Demonstration patterns showing how the SecondaryHarms field carries
|
||||
// downstream-consequence information through the IACE engine.
|
||||
//
|
||||
// Two real-world scenarios are encoded:
|
||||
//
|
||||
// HP2000 — Glass-shard injection in carbonated-beverage bottling
|
||||
// (the "Cola splitter" example from the IACE strategy
|
||||
// discussion). Primary harm is the operator hit by flying
|
||||
// shards; the secondary chain is product-liability towards
|
||||
// supermarket end-customers.
|
||||
//
|
||||
// HP2001 — Cross-contamination in pharma fill-finish lines.
|
||||
// Primary harm is operator exposure; secondary chain is
|
||||
// patient harm + recall under §74a AMG.
|
||||
//
|
||||
// These two patterns are sufficient as a contract test for the
|
||||
// SecondaryHarms field. Library coverage of more scenarios is a
|
||||
// follow-up task once the persistence layer (DB migration) lands.
|
||||
|
||||
func GetSecondaryHarmDemoPatterns() []HazardPattern {
|
||||
return []HazardPattern{
|
||||
{
|
||||
ID: "HP2000",
|
||||
NameDE: "Glasbruch in Karbonisierungs-Abfueller (Hochdruck)",
|
||||
NameEN: "Glass shatter in carbonated bottling line",
|
||||
RequiredComponentTags: []string{"crush_point", "high_pressure"},
|
||||
RequiredEnergyTags: []string{"pneumatic_pressure"},
|
||||
GeneratedHazardCats: []string{"mechanical_hazard"},
|
||||
Priority: 90,
|
||||
MachineTypes: []string{"bottling", "food_processing", "packaging"},
|
||||
ScenarioDE: "Glasflasche platzt unter CO2-Druck waehrend der Abfuellung. " +
|
||||
"Splitter erreichen den Bediener und koennen ferner in nachfolgende " +
|
||||
"Flaschen eingetragen werden.",
|
||||
TriggerDE: "Materialfehler, ueberhoehter Innendruck, Foerderstoss",
|
||||
HarmDE: "Schnittverletzung Auge/Hand des Bedieners",
|
||||
AffectedDE: "Abfueller, Mitarbeiter Linie",
|
||||
ZoneDE: "Karussell, Schutzkapsel, Foerderband-Auslauf",
|
||||
DefaultSeverity: 4,
|
||||
DefaultExposure: 3,
|
||||
ISO12100Section: "6.4.5.5 Schleudernde Teile",
|
||||
SecondaryHarms: []SecondaryHarm{
|
||||
{
|
||||
Type: SecondaryHarmConsumerSafety,
|
||||
Description: "Restsplitter in der Folgeflasche erreichen ueber den Handel " +
|
||||
"den Endkunden. Verletzungsrisiko Mund/Speiseroehre.",
|
||||
LegalBasis: "ProdHaftG §1, VO (EU) Nr. 178/2002 Art. 14",
|
||||
SuggestedMitigations: []string{
|
||||
"Spueltunnel nach Abfuellung",
|
||||
"Inline-Kamera mit Glasbrucherkennung",
|
||||
"Sperrzone fuer 2 Folgeflaschen bei Bruchereignis",
|
||||
"Glasbruchsensor an Karussell mit Linie-Stopp",
|
||||
},
|
||||
Owner: "product_safety",
|
||||
},
|
||||
{
|
||||
Type: SecondaryHarmFoodSafety,
|
||||
Description: "Rueckruf- und Meldepflicht bei Inverkehrbringen unsicherer " +
|
||||
"Lebensmittel; Rueckverfolgbarkeit Chargen-genau erforderlich.",
|
||||
LegalBasis: "VO (EU) 178/2002 Art. 18, 19; LFGB §40",
|
||||
SuggestedMitigations: []string{
|
||||
"Chargen-Tracking bis Endhaendler",
|
||||
"Schnellwarnsystem RASFF aktiviert halten",
|
||||
"Rueckruf-SOP getestet",
|
||||
},
|
||||
Owner: "qm",
|
||||
},
|
||||
{
|
||||
Type: SecondaryHarmReputation,
|
||||
Description: "Pressemitteilung und Aktienkurs-Reaktion bei Verbraucher-" +
|
||||
"verletzungen / behoerdlichem Rueckruf.",
|
||||
LegalBasis: "ISO 31000 Unternehmensrisiko",
|
||||
SuggestedMitigations: []string{
|
||||
"Krisenkommunikations-Plan",
|
||||
"PR-Bereitschaft 24/7",
|
||||
},
|
||||
Owner: "enterprise_risk",
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
ID: "HP2001",
|
||||
NameDE: "Kreuzkontamination Pharma Fill-Finish",
|
||||
NameEN: "Cross-contamination pharma fill-finish",
|
||||
RequiredComponentTags: []string{"chemical_risk"},
|
||||
RequiredEnergyTags: []string{"pneumatic_pressure"},
|
||||
GeneratedHazardCats: []string{"chemical_hazard"},
|
||||
Priority: 92,
|
||||
MachineTypes: []string{"pharmaceutical", "food_processing"},
|
||||
ScenarioDE: "Wirkstoff-Rueckstand aus Vorcharge im Linienzwischenraum kontaminiert " +
|
||||
"die Folgecharge.",
|
||||
TriggerDE: "Mangelhaftes CIP, Spuelvolumen unterhalb Validierung",
|
||||
HarmDE: "Bedienerexposition bei Probennahme",
|
||||
AffectedDE: "Anlagenbediener, Probenehmer",
|
||||
ZoneDE: "Abfuelllinie zwischen Vorlage und Filler",
|
||||
DefaultSeverity: 4,
|
||||
DefaultExposure: 2,
|
||||
ISO12100Section: "6.4.4 Chemische und biologische Gefaehrdungen",
|
||||
SecondaryHarms: []SecondaryHarm{
|
||||
{
|
||||
Type: SecondaryHarmConsumerSafety,
|
||||
Description: "Patient erhaelt Arzneimittel mit unzulaessiger Beimischung; " +
|
||||
"Wirkungsbeeintraechtigung oder unerwuenschte Wirkung moeglich.",
|
||||
LegalBasis: "AMG §5 (Verkehrsfaehigkeit), §74a (Stufenplan)",
|
||||
SuggestedMitigations: []string{
|
||||
"CIP-Validierung mit TOC- und Conductivity-Limits",
|
||||
"Dedizierte Linien fuer Hochpotente Wirkstoffe",
|
||||
"Stufenplan-Meldung bei Verdacht",
|
||||
},
|
||||
Owner: "qm",
|
||||
},
|
||||
{
|
||||
Type: SecondaryHarmProductLiability,
|
||||
Description: "Haftung des Inverkehrbringers nach AMG §84 (Gefaehrdungshaftung " +
|
||||
"bei Arzneimittelschaeden, verschuldensunabhaengig).",
|
||||
LegalBasis: "AMG §84",
|
||||
SuggestedMitigations: []string{
|
||||
"Deckung Produkthaftpflicht ueber gesetzliches Minimum",
|
||||
"Chargen-Rueckhaltemuster 12 Monate ueber MHD hinaus",
|
||||
},
|
||||
Owner: "legal",
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
}
|
||||
@@ -21,6 +21,7 @@ func GetProtectiveMeasureLibrary() []ProtectiveMeasureEntry {
|
||||
all = append(all, GetTextileAgriMeasures()...) // Textil + Landmaschinen (Phase 5)
|
||||
all = append(all, getGTBremseMeasures()...) // GT-Bremse-Coverage-Gaps (M483-M522)
|
||||
all = append(all, GetCRAMeasures()...) // CRA / DIN EN 40000-1-2 cyber-resilience (M540-M548)
|
||||
all = append(all, getLiftEndstopMeasures()...) // Lift/hoist endstop (M600-M604) — bridges OSHA MD library
|
||||
return all
|
||||
}
|
||||
|
||||
|
||||
@@ -0,0 +1,134 @@
|
||||
package iace
|
||||
|
||||
// Lift / hoist / scissor-lift endstop mitigations — bridges the OSHA
|
||||
// minimum-distance library (minimum_distances.go, Task #18) into the
|
||||
// pattern-engine measure library. Each entry cites the concrete OSHA
|
||||
// value AND its EU-norm pendant by identifier only.
|
||||
//
|
||||
// Engineering rounding values come from MD_OSHA_* IDs in
|
||||
// minimum_distances.go. We do not duplicate the source text here —
|
||||
// the Tech-File renderer can join MD_OSHA_* into the rendered text
|
||||
// at output time.
|
||||
|
||||
func getLiftEndstopMeasures() []ProtectiveMeasureEntry {
|
||||
return []ProtectiveMeasureEntry{
|
||||
// M600 — Cruise/creep speed at end of travel
|
||||
{
|
||||
ID: "M600",
|
||||
ReductionType: "protection",
|
||||
SubType: "speed_control",
|
||||
Name: "Kriechgeschwindigkeit am Endanschlag (Hubgeraete)",
|
||||
Description: "Hubgeschwindigkeit am Ende der Verfahrbewegung (oben und unten) auf maximal 15 mm/s " +
|
||||
"reduzieren. OSHA 29 CFR 1910.217 Hand-Speed-Konstante 63 in/s = 1.600 mm/s als Obergrenze " +
|
||||
"fuer Stopp-Reaktionszeit. Damit ist auch bei spaeter Auslosung der Quetsch-Schaltleiste " +
|
||||
"genug Bremsweg vorhanden.",
|
||||
HazardCategory: "mechanical",
|
||||
Examples: []string{
|
||||
"Hub-Endschalter mit Soft-Stop und Geschwindigkeitsstufe < 15 mm/s in den letzten 50 mm",
|
||||
"Servo-Antrieb mit Ramp-down-Profil ueber die letzten 100 mm Verfahrweg",
|
||||
"Drehzahl-Begrenzer im Frequenzumrichter mit Endlagen-Trigger",
|
||||
},
|
||||
NormReferences: []string{
|
||||
"OSHA 29 CFR 1910.217 (Ds = 63 in/s x Ts)",
|
||||
"EN ISO 13855 (Anordnung von Schutzeinrichtungen)",
|
||||
"EN 1570-1 (Hubtische — Bauanforderungen)",
|
||||
},
|
||||
RiskReduction: &RiskReduction{SeverityDelta: -1, ExposureDelta: -1, ProbabilityDelta: -1},
|
||||
Tags: []string{"crush_point", "gravity_risk", "speed_limit"},
|
||||
},
|
||||
// M601 — Trip-edge sensor under platform (safety bumper)
|
||||
{
|
||||
ID: "M601",
|
||||
ReductionType: "protection",
|
||||
SubType: "safety_device",
|
||||
Name: "Quetsch-Schaltleiste unterhalb der Hubplattform",
|
||||
Description: "Druckempfindliche Schaltleiste (gemaess EN ISO 13856-2) am unteren Rand der Hubplattform " +
|
||||
"loest bei Beruehrung den Hubantrieb sofort aus und kehrt die Bewegung um. Verhindert Quetschung " +
|
||||
"von Fuessen oder Beinen unter absenkender Last. PL c oder hoeher nach EN ISO 13849-1.",
|
||||
HazardCategory: "mechanical",
|
||||
Examples: []string{
|
||||
"Schaltleiste umlaufend an Bodenkante der Hubplattform",
|
||||
"Trittschutz mit redundanter Auswertung am Hubtisch",
|
||||
"Lichtgitter im Bodenbereich als Ergaenzung bei freistehenden Anlagen",
|
||||
},
|
||||
NormReferences: []string{
|
||||
"EN ISO 13856-2 (Schaltleisten)",
|
||||
"EN ISO 13849-1 (PL-Bestimmung)",
|
||||
"EN 1570-1",
|
||||
},
|
||||
RiskReduction: &RiskReduction{SeverityDelta: -2, ExposureDelta: -2, ProbabilityDelta: -2},
|
||||
Tags: []string{"crush_point", "gravity_risk", "safety_device"},
|
||||
},
|
||||
// M602 — Minimum clearance to fixed structure above max lift position
|
||||
{
|
||||
ID: "M602",
|
||||
ReductionType: "design",
|
||||
SubType: "geometry",
|
||||
Name: "Mindestabstand zu festen Strukturen oberhalb der Hubendlage",
|
||||
Description: "Zwischen hoechstem Punkt der Hubeinheit (mit beladenem Werkstueck) und festen Strukturen " +
|
||||
"oberhalb (Decke, Vorbau, Querbalken) muss ein Sicherheitsabstand verbleiben, der das Quetschen " +
|
||||
"von Haenden und Koerper verhindert. Empfehlung: 120 mm fuer Kopf, 100 mm fuer Hand, 25 mm fuer " +
|
||||
"Finger — abgeleitet aus EN 349 / EN ISO 13854 unabhaengig zu pruefen.",
|
||||
HazardCategory: "mechanical",
|
||||
Examples: []string{
|
||||
"Konstruktive Begrenzung der oberen Hubposition durch mechanischen Anschlag",
|
||||
"Software-Endlage mit redundantem Hardware-Sicherheitsschalter",
|
||||
"Auslegungs-Pruefung mit beladener Standard-Palette und Maximal-Hubhoehe",
|
||||
},
|
||||
NormReferences: []string{
|
||||
"EN 349 (Mindestabstaende gegen Quetschen von Koerperteilen)",
|
||||
"EN ISO 13854 (Mindestabstaende gegen Quetschen)",
|
||||
"OSHA 29 CFR 1910.212(a)(5) (Lueftergitter ≤ 1/2 in als Anker)",
|
||||
},
|
||||
RiskReduction: &RiskReduction{SeverityDelta: -2, ExposureDelta: -1},
|
||||
Tags: []string{"crush_point", "gravity_risk"},
|
||||
},
|
||||
// M603 — Hold-to-run with two-hand operation for manual descent
|
||||
{
|
||||
ID: "M603",
|
||||
ReductionType: "protection",
|
||||
SubType: "control_device",
|
||||
Name: "Tippbetrieb / Hold-to-run beim Absenken (mit Verifikations-Nachweis)",
|
||||
Description: "Absenken nur im Tippbetrieb (Hold-to-run): Bedientaster muss waehrend des gesamten " +
|
||||
"Absenkvorgangs gedrueckt gehalten werden. Bei Loslassen stoppt die Bewegung sofort. " +
|
||||
"Im Limits-Form als 'Tippbetrieb' deklariert — durch Tests verifizieren (Stop-Reaktionszeit " +
|
||||
"<= 0,3 s im voll beladenen Zustand).",
|
||||
HazardCategory: "mechanical",
|
||||
Examples: []string{
|
||||
"Tipptaster mit elektrischer Selbstrueckstellung",
|
||||
"Zweihand-Bedienung fuer kritische Absenk-Bereiche (Tipp + Zustimmtaster)",
|
||||
"Pruefprotokoll Stop-Zeit gemaess EN ISO 13849-1 PL c",
|
||||
},
|
||||
NormReferences: []string{
|
||||
"EN ISO 13849-1 (Sicherheitsbezogene Steuerungsteile)",
|
||||
"EN ISO 13851 (Zweihandschaltungen)",
|
||||
"BetrSichV § 4 (Schutzmassnahmen)",
|
||||
},
|
||||
RiskReduction: &RiskReduction{SeverityDelta: -1, ExposureDelta: -2, ProbabilityDelta: -1},
|
||||
Tags: []string{"crush_point", "gravity_risk", "control_device"},
|
||||
},
|
||||
// M604 — Underrun guard / kick plate at platform base
|
||||
{
|
||||
ID: "M604",
|
||||
ReductionType: "design",
|
||||
SubType: "geometry",
|
||||
Name: "Trittblech / Unterfahrschutz an der Hubplattform",
|
||||
Description: "Unter der Hubplattform befindet sich ein umlaufendes Trittblech oder Unterfahrschutz, " +
|
||||
"das das Hineinfahren von Fuessen unter die Plattform mechanisch verhindert. Hoehe ueber Boden " +
|
||||
"maximal 5 mm in unterster Stellung. Trittblech haelt die Last eines Schuhs (mind. 150 kg) " +
|
||||
"ohne Verformung.",
|
||||
HazardCategory: "mechanical",
|
||||
Examples: []string{
|
||||
"Umlaufendes Stahlblech 3 mm Wandstaerke mit Fasen-Kante",
|
||||
"Kombination mit M601 (Schaltleiste) als doppelte Sicherung",
|
||||
"Pruefung jaehrlich auf Verformung und Funktion der Auflage",
|
||||
},
|
||||
NormReferences: []string{
|
||||
"EN 1570-1 (Hubtische)",
|
||||
"EN ISO 13857 (Sicherheitsabstaende)",
|
||||
},
|
||||
RiskReduction: &RiskReduction{SeverityDelta: -2, ExposureDelta: -1},
|
||||
Tags: []string{"crush_point", "gravity_risk"},
|
||||
},
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,172 @@
|
||||
package iace
|
||||
|
||||
// Minimum-distance library — Task #18.
|
||||
//
|
||||
// Anchor source: OSHA 29 CFR 1910 Subpart O (US Federal Public Domain,
|
||||
// 17 U.S.C. §105). The values below are reproduced verbatim from the
|
||||
// Federal Code; conversions to metric are mathematical and carry no
|
||||
// copyright. Engineering rounding to safe-side mm values is BreakPilot's
|
||||
// recommendation and labelled as such.
|
||||
//
|
||||
// EU norm equivalents (EN ISO 13857, EN 349, EN 13855, EN 1010) are
|
||||
// referenced by identifier only — no values are reproduced, because
|
||||
// DIN/Beuth retain copyright on the wording. The DINComparisonNote
|
||||
// field carries a human-curated judgement on whether the EU norm is
|
||||
// stricter / looser / equivalent — this is a qualitative observation
|
||||
// about a publicly available document, not a copy of its text.
|
||||
//
|
||||
// See LICENSE_RULES.md and project_attribution_strategy.md for the
|
||||
// licensing logic. The OSHA values are R1 (verbatim public domain);
|
||||
// the recommended metric values are BreakPilot engineering output (R3
|
||||
// own-work). DIN references are R3 identifier-only.
|
||||
|
||||
// MinimumDistanceUnit denotes the original unit system of the source.
|
||||
type MinimumDistanceUnit string
|
||||
|
||||
const (
|
||||
UnitInch MinimumDistanceUnit = "inch"
|
||||
UnitFoot MinimumDistanceUnit = "foot"
|
||||
UnitMeter MinimumDistanceUnit = "meter"
|
||||
UnitMM MinimumDistanceUnit = "mm"
|
||||
)
|
||||
|
||||
// MinimumDistance is the data contract for a single safety-distance rule.
|
||||
// It can be (a) a fixed gap value, (b) a distance range, or (c) a formula
|
||||
// like OSHA's Ds = 63 in/s × Ts (hand-speed constant).
|
||||
type MinimumDistance struct {
|
||||
ID string `json:"id"` // MD_OSHA_001
|
||||
// Source identifier — full CFR citation or norm reference.
|
||||
SourceCFR string `json:"source_cfr,omitempty"` // "29 CFR §1910.217(c)(1)(i)"
|
||||
SourceTable string `json:"source_table,omitempty"` // "Table O-10"
|
||||
License string `json:"license"` // "US Federal Public Domain"
|
||||
LicenseRule int `json:"license_rule"` // 1 / 2 / 3 (see LICENSE_RULES.md)
|
||||
|
||||
// Original verbatim value in the source's own unit.
|
||||
OriginalUnit MinimumDistanceUnit `json:"original_unit"`
|
||||
OriginalValue float64 `json:"original_value,omitempty"`
|
||||
OriginalMin float64 `json:"original_min,omitempty"`
|
||||
OriginalMax float64 `json:"original_max,omitempty"`
|
||||
|
||||
// Exact conversion to mm — no engineering rounding.
|
||||
ExactMM float64 `json:"exact_mm,omitempty"`
|
||||
ExactMinMM float64 `json:"exact_min_mm,omitempty"`
|
||||
ExactMaxMM float64 `json:"exact_max_mm,omitempty"`
|
||||
|
||||
// Engineering-recommended metric value with safe-side rounding.
|
||||
// For minimum distances: rounded up. For maximum opening widths:
|
||||
// rounded down.
|
||||
RecommendedMM int `json:"recommended_mm,omitempty"`
|
||||
RecommendedMinMM int `json:"recommended_min_mm,omitempty"`
|
||||
RecommendedMaxMM int `json:"recommended_max_mm,omitempty"`
|
||||
RoundingNote string `json:"rounding_note,omitempty"`
|
||||
|
||||
// Optional formula constant (e.g. OSHA hand-speed 63 in/s).
|
||||
FormulaInchPerSecond float64 `json:"formula_inch_per_second,omitempty"`
|
||||
FormulaMMPerSecond float64 `json:"formula_mm_per_second,omitempty"`
|
||||
FormulaDescription string `json:"formula_description,omitempty"`
|
||||
|
||||
Context string `json:"context"` // "Point of Operation Guarding mechanical presses"
|
||||
BodyPart string `json:"body_part,omitempty"` // "finger" / "hand" / "head" / "foot" / "body"
|
||||
HazardTags []string `json:"hazard_tags,omitempty"` // [crush_point, cutting_part, ...]
|
||||
|
||||
// EU norm cross-reference — IDENTIFIER ONLY, no values reproduced.
|
||||
EUNormHints []EUNormHint `json:"eu_norm_hints,omitempty"`
|
||||
}
|
||||
|
||||
// EUNormHint references an EU standard by identifier without reproducing
|
||||
// any value or text from it. The DINComparisonNote is a human-curated
|
||||
// qualitative judgement (stricter / equivalent / looser) — not a copy.
|
||||
type EUNormHint struct {
|
||||
Norm string `json:"norm"` // "EN ISO 13857"
|
||||
Section string `json:"section,omitempty"` // "Tab. 4, Schutz gegen Hineingreifen"
|
||||
DINComparisonNote string `json:"din_comparison_note,omitempty"`
|
||||
}
|
||||
|
||||
// GetOSHAMinimumDistances returns the verbatim OSHA values for
|
||||
// machine-guarding distances. All values are US Federal Public Domain
|
||||
// (17 U.S.C. §105). Engineering rounding is BreakPilot's safe-side
|
||||
// recommendation; OSHA values themselves are unchanged.
|
||||
func GetOSHAMinimumDistances() []MinimumDistance {
|
||||
return []MinimumDistance{
|
||||
// OSHA Table O-10 row 1 — verbatim values, mathematical conversion,
|
||||
// safe-side rounded engineering recommendation.
|
||||
{
|
||||
ID: "MD_OSHA_O10_R1",
|
||||
SourceCFR: "29 CFR §1910.217(c)(1)(i)",
|
||||
SourceTable: "Table O-10 row 1",
|
||||
License: "US Federal Public Domain (17 U.S.C. §105)",
|
||||
LicenseRule: 1,
|
||||
OriginalUnit: UnitInch,
|
||||
OriginalMin: 0.5, OriginalMax: 1.5, OriginalValue: 0.25,
|
||||
ExactMinMM: 12.7, ExactMaxMM: 38.1, ExactMM: 6.35,
|
||||
RecommendedMinMM: 15, RecommendedMaxMM: 40, RecommendedMM: 6,
|
||||
RoundingNote: "Distance auf 5-mm-Raster aufgerundet, opening auf 1-mm-Raster abgerundet (konservativ in beide Richtungen).",
|
||||
Context: "Point-of-Operation Guarding bei mechanischen Pressen",
|
||||
BodyPart: "finger",
|
||||
HazardTags: []string{"crush_point", "cutting_part"},
|
||||
EUNormHints: []EUNormHint{
|
||||
{Norm: "EN ISO 13857", Section: "Tab. 4 (Hineingreifen)",
|
||||
DINComparisonNote: "Andere Methodik (Reichweitenmodell). Unabhaengig pruefen — Werte koennen abweichen."},
|
||||
},
|
||||
},
|
||||
// OSHA Table O-10 row 4 — used as a worked example in the strategy
|
||||
// discussion. Distance 3.5-5.5 in, opening max 5/8 in.
|
||||
{
|
||||
ID: "MD_OSHA_O10_R4",
|
||||
SourceCFR: "29 CFR §1910.217(c)(1)(i)",
|
||||
SourceTable: "Table O-10 row 4",
|
||||
License: "US Federal Public Domain (17 U.S.C. §105)",
|
||||
LicenseRule: 1,
|
||||
OriginalUnit: UnitInch,
|
||||
OriginalMin: 3.5, OriginalMax: 5.5, OriginalValue: 0.625,
|
||||
ExactMinMM: 88.9, ExactMaxMM: 139.7, ExactMM: 15.875,
|
||||
RecommendedMinMM: 90, RecommendedMaxMM: 140, RecommendedMM: 15,
|
||||
RoundingNote: "Distance 88.9→90 (+1.1 mm), 139.7→140 (+0.3 mm) aufgerundet; Opening 15.875→15 (-0.875 mm) abgerundet.",
|
||||
Context: "Point-of-Operation Guarding bei mechanischen Pressen",
|
||||
BodyPart: "finger",
|
||||
HazardTags: []string{"crush_point", "cutting_part"},
|
||||
EUNormHints: []EUNormHint{
|
||||
{Norm: "EN ISO 13857", Section: "Tab. 4 (Hineingreifen)",
|
||||
DINComparisonNote: "Andere Methodik (Reichweitenmodell). Compliance-Annotation pflegen."},
|
||||
},
|
||||
},
|
||||
// OSHA §1910.212(a)(5) — fan blade guards. Verbatim 1/2 inch.
|
||||
{
|
||||
ID: "MD_OSHA_212_FAN",
|
||||
SourceCFR: "29 CFR §1910.212(a)(5)",
|
||||
License: "US Federal Public Domain (17 U.S.C. §105)",
|
||||
LicenseRule: 1,
|
||||
OriginalUnit: UnitInch,
|
||||
OriginalValue: 0.5,
|
||||
ExactMM: 12.7,
|
||||
RecommendedMM: 12,
|
||||
RoundingNote: "Luefterblatt-Schutzgitter: max. Spaltoeffnung 1/2 in = 12.7 mm. Konservativ auf 12 mm abgerundet.",
|
||||
Context: "Lüfterblätter unter 7 ft (2.13 m) Höhe",
|
||||
BodyPart: "finger",
|
||||
HazardTags: []string{"rotating_part", "cutting_part"},
|
||||
EUNormHints: []EUNormHint{
|
||||
{Norm: "EN ISO 13857", Section: "Tab. 4",
|
||||
DINComparisonNote: "DIN-Wert pruefen."},
|
||||
},
|
||||
},
|
||||
// OSHA §1910.217 Hand-Speed Constant — formula Ds = 63 in/s × Ts
|
||||
{
|
||||
ID: "MD_OSHA_217_PSDI",
|
||||
SourceCFR: "29 CFR §1910.217 (Ds = 63 in/s × Ts)",
|
||||
License: "US Federal Public Domain (17 U.S.C. §105)",
|
||||
LicenseRule: 1,
|
||||
OriginalUnit: UnitInch,
|
||||
FormulaInchPerSecond: 63.0,
|
||||
FormulaMMPerSecond: 1600.2,
|
||||
FormulaDescription: "Hand-Speed-Konstante 63 in/s ≈ 1600 mm/s. " +
|
||||
"Ds (Mindestabstand) = 63 × Ts (Stoppzeit Presse in Sekunden).",
|
||||
Context: "PSDI Presence-Sensing Device Initiation und Two-Hand-Trip",
|
||||
BodyPart: "hand",
|
||||
HazardTags: []string{"crush_point", "high_speed"},
|
||||
EUNormHints: []EUNormHint{
|
||||
{Norm: "EN 13855", Section: "Sicherheitsabstaende",
|
||||
DINComparisonNote: "EN 13855 nutzt andere Konstante (1600 mm/s ≈ identisch); EU-Norm unabhaengig pruefen."},
|
||||
},
|
||||
},
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,60 @@
|
||||
package iace
|
||||
|
||||
// Machine-type overrides for legacy patterns that lacked MachineTypes
|
||||
// filtering at authoring time. Applied as a post-load pass in
|
||||
// collectAllPatterns() so we do not need to touch the large pattern
|
||||
// source files (which would push them past the 500-LOC cap).
|
||||
//
|
||||
// Adding an entry here causes the listed pattern IDs to fire ONLY for
|
||||
// projects whose machine_type is in the value list. This eliminates
|
||||
// drift like "Punktschweisselektroden" firing for a Kistenhubgeraet
|
||||
// project just because tags incidentally aligned.
|
||||
|
||||
var legacyMachineTypeOverrides = map[string][]string{
|
||||
// Walzen / Roller hazards — printing, paper, metalworking only.
|
||||
"HP1000": {"printing", "paper", "textile", "metalworking", "rolling_mill", "food_processing"},
|
||||
// HP306 + HP1530 already carry MachineTypes; skip.
|
||||
|
||||
// Welding-specific patterns.
|
||||
"HP539": {"welding", "spot_welding"},
|
||||
|
||||
// Glass-handling tilters.
|
||||
"HP545": {"glass", "glass_processing"},
|
||||
"HP782": {"glass", "glass_processing"},
|
||||
|
||||
// Escalator-specific.
|
||||
"HP756": {"escalator"},
|
||||
"HP757": {"escalator"},
|
||||
"HP760": {"escalator"},
|
||||
|
||||
// CNC machine tools (these fired on Kistenhubgeraet because they
|
||||
// share crush_point + moving_part tags but are bench-mounted tools).
|
||||
"HP1400": {"cnc", "metalworking", "lathe", "milling"},
|
||||
"HP1401": {"cnc", "metalworking", "lathe", "milling"},
|
||||
"HP1402": {"cnc", "metalworking", "lathe", "milling"},
|
||||
|
||||
// Press-specific (Pressenteile/Pressraum/Werkzeugraum).
|
||||
"HP045": {"press", "hydraulic_press", "mechanical_press", "stamping_press"},
|
||||
"HP049": {"press", "hydraulic_press", "mechanical_press", "stamping_press"},
|
||||
|
||||
// Conveyor-belt-specific drift.
|
||||
"HP420": {"conveyor", "packaging", "food_processing"},
|
||||
"HP421": {"conveyor", "packaging", "food_processing"},
|
||||
"HP422": {"conveyor", "packaging", "food_processing"},
|
||||
}
|
||||
|
||||
// applyMachineTypeOverrides mutates the passed slice in place, setting
|
||||
// MachineTypes on any pattern whose ID is in the override map. Patterns
|
||||
// that already have MachineTypes set are NOT overwritten — the override
|
||||
// only fills the gap.
|
||||
func applyMachineTypeOverrides(patterns []HazardPattern) []HazardPattern {
|
||||
for i := range patterns {
|
||||
if len(patterns[i].MachineTypes) > 0 {
|
||||
continue
|
||||
}
|
||||
if mt, ok := legacyMachineTypeOverrides[patterns[i].ID]; ok {
|
||||
patterns[i].MachineTypes = mt
|
||||
}
|
||||
}
|
||||
return patterns
|
||||
}
|
||||
@@ -42,5 +42,8 @@ func collectAllPatterns() []HazardPattern {
|
||||
patterns = append(patterns, GetGTBremseHazardPatterns()...) // HP1710-HP1729 GT Bremse coverage gaps
|
||||
patterns = append(patterns, GetISO12100GapPatterns()...) // HP1900-HP1909 ISO 12100 Annex B gaps (Vakuum, Federn, Rutsch, Hochdruckinjektion, Ersticken)
|
||||
patterns = append(patterns, GetCRAPatterns()...) // HP1910-HP1918 CRA / DIN EN 40000-1-2 cyber-resilience spur
|
||||
patterns = append(patterns, GetSecondaryHarmDemoPatterns()...) // HP2000-HP2001 secondary harm chain demos (Cola splitter, Pharma)
|
||||
patterns = append(patterns, GetLiftEndstopPatterns()...) // HP2100-HP2102 lift body-part crush at endstops
|
||||
patterns = applyMachineTypeOverrides(patterns) // Fill MachineTypes on legacy patterns to prevent drift
|
||||
return patterns
|
||||
}
|
||||
|
||||
@@ -0,0 +1,89 @@
|
||||
package iace
|
||||
|
||||
// SecondaryHarm models the consequential damage chain triggered by a primary
|
||||
// hazard. The classical IACE / ISO-12100 model treats Hazard -> Harm as a
|
||||
// single step ("operator gets crushed"). BreakPilot extends this with a
|
||||
// follow-on chain so the risk assessment can address:
|
||||
//
|
||||
// - consumer_safety: end customer exposed to defective product
|
||||
// (e.g. glass shards in a bottled drink that reaches a supermarket)
|
||||
// - product_liability: manufacturer liability under ProdHaftG / EU PLD
|
||||
// - food_safety: traceability and recall obligations (VO 178/2002)
|
||||
// - environmental: spill, contamination, waste-disposal consequence
|
||||
// - reputation: brand damage that escalates to investor / market level
|
||||
// - financial: direct cost (lawsuit, recall, fine)
|
||||
//
|
||||
// This struct is the data contract; persistence is deferred to a future
|
||||
// migration. The pattern library can already attach SecondaryHarms to a
|
||||
// HazardPattern; the API layer surfaces them on hazard generation.
|
||||
//
|
||||
// See memory project_attribution_strategy.md plus the "Cola splitter" worked
|
||||
// example from the IACE strategy discussion (2026-05-20).
|
||||
type SecondaryHarm struct {
|
||||
// Type is one of the SecondaryHarmType* constants below.
|
||||
Type string `json:"type"`
|
||||
|
||||
// Description is a single sentence describing the secondary harm
|
||||
// scenario in concrete terms ("Splitter in Folgeflasche bei
|
||||
// Karussell-Abfueller -> Endkunde verletzt").
|
||||
Description string `json:"description"`
|
||||
|
||||
// LegalBasis cites the legal framework that turns the secondary harm
|
||||
// into an actionable obligation (e.g. "ProdHaftG §1" or "VO 178/2002
|
||||
// Art. 14"). Helps auditors trace the obligation.
|
||||
LegalBasis string `json:"legal_basis,omitempty"`
|
||||
|
||||
// SuggestedMitigations is a free-text list of measures specific to
|
||||
// the secondary chain (e.g. "Spueltunnel", "Inline-Kamera",
|
||||
// "Glasbruchsensor"). Distinct from the primary-mitigations because
|
||||
// they protect downstream stakeholders, not the operator.
|
||||
SuggestedMitigations []string `json:"suggested_mitigations,omitempty"`
|
||||
|
||||
// Owner identifies the role responsible for handling this secondary
|
||||
// harm in the customer organisation. Common values:
|
||||
// "qm" / "product_safety" / "enterprise_risk" / "legal"
|
||||
// Empty if responsibility is shared.
|
||||
Owner string `json:"owner,omitempty"`
|
||||
}
|
||||
|
||||
// SecondaryHarmType constants — kept short and stable.
|
||||
const (
|
||||
SecondaryHarmConsumerSafety = "consumer_safety"
|
||||
SecondaryHarmProductLiability = "product_liability"
|
||||
SecondaryHarmFoodSafety = "food_safety"
|
||||
SecondaryHarmEnvironmental = "environmental"
|
||||
SecondaryHarmReputation = "reputation"
|
||||
SecondaryHarmFinancial = "financial"
|
||||
)
|
||||
|
||||
// AllSecondaryHarmTypes returns the canonical six categories in the order
|
||||
// they should appear in UI dropdowns.
|
||||
func AllSecondaryHarmTypes() []string {
|
||||
return []string{
|
||||
SecondaryHarmConsumerSafety,
|
||||
SecondaryHarmProductLiability,
|
||||
SecondaryHarmFoodSafety,
|
||||
SecondaryHarmEnvironmental,
|
||||
SecondaryHarmReputation,
|
||||
SecondaryHarmFinancial,
|
||||
}
|
||||
}
|
||||
|
||||
// SecondaryHarmLabelDE returns the human-readable German label.
|
||||
func SecondaryHarmLabelDE(t string) string {
|
||||
switch t {
|
||||
case SecondaryHarmConsumerSafety:
|
||||
return "Endkundensicherheit"
|
||||
case SecondaryHarmProductLiability:
|
||||
return "Produkthaftung"
|
||||
case SecondaryHarmFoodSafety:
|
||||
return "Lebensmittelsicherheit"
|
||||
case SecondaryHarmEnvironmental:
|
||||
return "Umweltschaden"
|
||||
case SecondaryHarmReputation:
|
||||
return "Reputation/Marke"
|
||||
case SecondaryHarmFinancial:
|
||||
return "Finanzieller Schaden"
|
||||
}
|
||||
return t
|
||||
}
|
||||
@@ -60,5 +60,9 @@ EXPOSE 8002
|
||||
HEALTHCHECK --interval=30s --timeout=3s --start-period=5s --retries=3 \
|
||||
CMD curl -f http://127.0.0.1:8002/health || exit 1
|
||||
|
||||
# P83 — Build-SHA fuer check-rebuild-needed.sh
|
||||
ARG BUILD_SHA="unknown"
|
||||
ENV BUILD_SHA=${BUILD_SHA}
|
||||
|
||||
# Run the application
|
||||
CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "8002"]
|
||||
|
||||
@@ -72,6 +72,7 @@ _ROUTER_MODULES = [
|
||||
"whistleblower_routes",
|
||||
"tcf_routes",
|
||||
"founding_wizard_routes",
|
||||
"licenses_routes",
|
||||
]
|
||||
|
||||
_loaded_count = 0
|
||||
|
||||
@@ -51,6 +51,10 @@ class ComplianceCheckRequest(BaseModel):
|
||||
# (z.B. "Auftragsbeziehung Safetykon GmbH, Email Hr. X 18.05.2026").
|
||||
tdm_override: bool = False
|
||||
tdm_override_reason: str = ""
|
||||
# P79: 8-Feld Pre-Scan-Wizard (Branche, B2B/B2C, Direkt-Vertrieb,
|
||||
# Rechtsform, Konzern, MA, Besondere Daten, Drittland). Wird im
|
||||
# Snapshot persistiert und filtert die MC-Auswertung (P72).
|
||||
scan_context: dict | None = None
|
||||
|
||||
|
||||
class ComplianceCheckStartResponse(BaseModel):
|
||||
@@ -203,6 +207,44 @@ async def get_snapshot(snapshot_id: str):
|
||||
db.close()
|
||||
|
||||
|
||||
@router.post("/admin/tcf-ingest")
|
||||
async def tcf_ingest():
|
||||
"""P105 — IAB TCF Vendor-Liste ingestieren / refreshen.
|
||||
Idempotent: holt aktuelle GVL und upserted in compliance.cookie_library
|
||||
mit source='iab_tcf_v2'. Aufruf ein paar Mal pro Jahr ausreichend."""
|
||||
from database import SessionLocal
|
||||
from compliance.services.tcf_vendor_authority import (
|
||||
fetch_and_ingest_tcf_vendors,
|
||||
)
|
||||
db = SessionLocal()
|
||||
try:
|
||||
return await fetch_and_ingest_tcf_vendors(db)
|
||||
finally:
|
||||
db.close()
|
||||
|
||||
|
||||
@router.get("/snapshots/{snapshot_id}/pdf")
|
||||
async def export_snapshot_pdf(snapshot_id: str):
|
||||
"""P88 — PDF-Export der Audit-Mail. Liefert application/pdf."""
|
||||
from fastapi import HTTPException
|
||||
from fastapi.responses import Response
|
||||
from database import SessionLocal
|
||||
from compliance.services.mail_pdf_export import render_snapshot_as_pdf
|
||||
db = SessionLocal()
|
||||
try:
|
||||
pdf = render_snapshot_as_pdf(db, snapshot_id)
|
||||
finally:
|
||||
db.close()
|
||||
if not pdf:
|
||||
raise HTTPException(404, f"Snapshot {snapshot_id} nicht gefunden "
|
||||
"oder PDF-Render fehlgeschlagen.")
|
||||
fname = f"breakpilot-audit-{snapshot_id[:8]}.pdf"
|
||||
return Response(
|
||||
content=pdf, media_type="application/pdf",
|
||||
headers={"Content-Disposition": f'attachment; filename="{fname}"'},
|
||||
)
|
||||
|
||||
|
||||
@router.post("/snapshots/{snapshot_id}/replay")
|
||||
async def replay_snapshot(
|
||||
snapshot_id: str,
|
||||
@@ -297,12 +339,25 @@ async def _run_compliance_check(check_id: str, req: ComplianceCheckRequest):
|
||||
url_text_cache: dict[str, str] = {}
|
||||
|
||||
n_docs = max(1, len(req.documents))
|
||||
# User-pasted-Tabellen-Vendors (kein LLM noetig) — werden weiter
|
||||
# unten in cmp_vendors gemerged.
|
||||
pasted_table_vendors: list[dict] = []
|
||||
for i, doc in enumerate(req.documents):
|
||||
pct = int(1 + (i / n_docs) * 29)
|
||||
_update(check_id, f"Texte laden {i+1}/{n_docs}: {doc.doc_type}...", pct)
|
||||
text = doc.text
|
||||
text = (doc.text or "").strip()
|
||||
input_source = "url"
|
||||
cmp_payloads: list[dict] = []
|
||||
if not text and doc.url:
|
||||
if text:
|
||||
input_source = "text"
|
||||
if doc.url:
|
||||
input_source = "text+url" # User hat beide gefuellt
|
||||
logger.info(
|
||||
"doc_type=%s: User hat URL UND Text geliefert — "
|
||||
"Text gewinnt, URL wird als Quellen-Referenz behalten",
|
||||
doc.doc_type,
|
||||
)
|
||||
elif doc.url:
|
||||
url_key = doc.url.strip().rstrip("/").lower()
|
||||
if url_key in url_text_cache:
|
||||
text = url_text_cache[url_key]
|
||||
@@ -310,16 +365,62 @@ async def _run_compliance_check(check_id: str, req: ComplianceCheckRequest):
|
||||
text, cmp_payloads = await _fetch_text(doc.url, doc_type=doc.doc_type)
|
||||
if text:
|
||||
url_text_cache[url_key] = text
|
||||
|
||||
# Auto-Reclassify-Check: wenn der user Text in das falsche
|
||||
# Doc-Type-Feld kopiert hat (z.B. Impressum-Text in DSE),
|
||||
# erkennen und ggf. umtaggen.
|
||||
actual_doc_type = doc.doc_type
|
||||
reclassify_hint: dict | None = None
|
||||
if input_source.startswith("text") and len(text) >= 500:
|
||||
try:
|
||||
from compliance.services.doc_type_classifier import (
|
||||
detect_mismatch,
|
||||
)
|
||||
reclassify_hint = detect_mismatch(doc.doc_type, text)
|
||||
if reclassify_hint and reclassify_hint["action"] == "reclassify":
|
||||
actual_doc_type = reclassify_hint["detected"]
|
||||
logger.info(
|
||||
"doc_type AUTO-RECLASSIFY: deklariert=%s "
|
||||
"erkannt=%s (score %d vs %d) — uebernehme erkannten Typ",
|
||||
doc.doc_type, actual_doc_type,
|
||||
reclassify_hint["detected_score"],
|
||||
reclassify_hint["declared_score"],
|
||||
)
|
||||
except Exception as e:
|
||||
logger.warning("doc_type_classifier failed: %s", e)
|
||||
|
||||
# Cookie-Tabelle: wenn User Tabelle reinkopiert hat, deterministisch
|
||||
# parsen (kein LLM noetig) und Vendors gleich ableiten.
|
||||
if input_source.startswith("text") and actual_doc_type == "cookie":
|
||||
try:
|
||||
from compliance.services.cookies_table_parser import (
|
||||
parse_cookie_table,
|
||||
)
|
||||
tab_vendors = parse_cookie_table(text)
|
||||
if tab_vendors:
|
||||
pasted_table_vendors.extend(tab_vendors)
|
||||
logger.info(
|
||||
"Cookie-Tabelle erkannt im pasted Text — "
|
||||
"%d Vendors / %d Cookies deterministisch geparst",
|
||||
len(tab_vendors),
|
||||
sum(len(v.get("cookies", [])) for v in tab_vendors),
|
||||
)
|
||||
except Exception as e:
|
||||
logger.warning("cookies_table_parser failed: %s", e)
|
||||
|
||||
if text:
|
||||
doc_texts[doc.doc_type] = text
|
||||
doc_texts[actual_doc_type] = text
|
||||
doc_entries.append({
|
||||
"doc_type": doc.doc_type,
|
||||
"url": doc.url,
|
||||
"text": text,
|
||||
"word_count": len(text.split()) if text else 0,
|
||||
"auto_discovered": False,
|
||||
"doc_type": actual_doc_type,
|
||||
"declared_doc_type": doc.doc_type,
|
||||
"url": doc.url,
|
||||
"text": text,
|
||||
"word_count": len(text.split()) if text else 0,
|
||||
"auto_discovered": False,
|
||||
"discovery_attempted": False,
|
||||
"cmp_payloads": cmp_payloads,
|
||||
"cmp_payloads": cmp_payloads,
|
||||
"input_source": input_source,
|
||||
"reclassify_hint": reclassify_hint,
|
||||
})
|
||||
|
||||
# Step 1a-bis: AUTO-DISCOVERY. For each canonical doc_type the user
|
||||
@@ -661,24 +762,42 @@ async def _run_compliance_check(check_id: str, req: ComplianceCheckRequest):
|
||||
cmp_vendors = extract_vendors_from_payloads(
|
||||
cookie_payloads, owner_name=owner_name,
|
||||
)
|
||||
# V3 fallback: no named CMP captured but we have substantive
|
||||
# cookie text → ask Qwen/OVH to extract vendor list from the text.
|
||||
# Skip on very short text (likely navigation) to save LLM cost.
|
||||
if not cmp_vendors and cookie_text and len(cookie_text.split()) >= 500:
|
||||
# P52: LLM-Fallback nicht nur wenn 0 Vendors, sondern auch
|
||||
# wenn die strukturierten Quellen < 5 Vendors lieferten und
|
||||
# der Cookie-Text substantiell ist. So holt sich VW-typische
|
||||
# Setups (Generic CMP, 28 Cookies aber 0 cmp_payloads) noch
|
||||
# ihre echten Vendors aus dem Text.
|
||||
if (len(cmp_vendors) < 5
|
||||
and cookie_text and len(cookie_text.split()) >= 500):
|
||||
from compliance.services.vendor_llm_extractor import (
|
||||
extract_vendors_via_llm,
|
||||
)
|
||||
from compliance.services.vendor_classifier import classify
|
||||
_update(check_id, "Vendor-Liste per LLM extrahieren...", 94)
|
||||
cmp_vendors = await extract_vendors_via_llm(cookie_text)
|
||||
# LLM path doesn't run through extract_vendors_from_payloads,
|
||||
# so classify here.
|
||||
for v in cmp_vendors:
|
||||
llm_vendors = await extract_vendors_via_llm(cookie_text)
|
||||
# P52: classify die LLM-Vendors und MERGE mit existing
|
||||
# statt zu ueberschreiben.
|
||||
existing_names = {(v.get("name") or "").strip().lower()
|
||||
for v in cmp_vendors}
|
||||
added_llm = 0
|
||||
for v in llm_vendors:
|
||||
nm = (v.get("name") or "").strip()
|
||||
if not nm or nm.lower() in existing_names:
|
||||
continue
|
||||
v["recipient_type"] = classify(
|
||||
vendor_name=v.get("name", ""),
|
||||
vendor_name=nm,
|
||||
category=v.get("category", ""),
|
||||
owner_name=owner_name,
|
||||
)
|
||||
v.setdefault("source", "llm_cascade")
|
||||
cmp_vendors.append(v)
|
||||
existing_names.add(nm.lower())
|
||||
added_llm += 1
|
||||
if added_llm:
|
||||
logger.info(
|
||||
"P52 LLM-Cascade: +%d Vendors (total: %d)",
|
||||
added_llm, len(cmp_vendors),
|
||||
)
|
||||
# P57: Phase G vendor_details als zusätzliche Vendor-Quelle.
|
||||
# Wenn extract_vendors_from_payloads weniger findet als
|
||||
# Phase G's Info-Click-Through (z.B. Mercedes-Settings nicht
|
||||
@@ -723,6 +842,137 @@ async def _run_compliance_check(check_id: str, req: ComplianceCheckRequest):
|
||||
logger.info("P57: added %d new vendors from Phase G (total: %d)",
|
||||
added, len(cmp_vendors))
|
||||
|
||||
# D — HTML-Tabellen die der consent-tester aus dem DOM
|
||||
# extrahiert hat: direkt deterministisch parsen (hoechste
|
||||
# Genauigkeit, keine LLM-Halluzinationen).
|
||||
for pl in (cookie_payloads or []):
|
||||
if pl.get("kind") != "html_table":
|
||||
continue
|
||||
rows = pl.get("rows") or []
|
||||
if len(rows) < 3:
|
||||
continue
|
||||
try:
|
||||
from compliance.services.cookies_table_parser import (
|
||||
parse_cookie_table as _parse_ct_d,
|
||||
)
|
||||
table_text = "\n".join(rows)
|
||||
d_vendors = _parse_ct_d(table_text)
|
||||
if d_vendors:
|
||||
existing_d = {(v.get("name") or "").strip().lower()
|
||||
for v in cmp_vendors}
|
||||
added_d = 0
|
||||
for v in d_vendors:
|
||||
nm = (v.get("name") or "").strip()
|
||||
if not nm or nm.lower() in existing_d:
|
||||
continue
|
||||
v.setdefault("source", "html_table_dom")
|
||||
cmp_vendors.append(v)
|
||||
existing_d.add(nm.lower())
|
||||
added_d += 1
|
||||
if added_d:
|
||||
logger.info(
|
||||
"D HTML-Table-DOM-Parse: +%d Vendors aus "
|
||||
"%d-Zeilen-Tabelle (total: %d)",
|
||||
added_d, len(rows), len(cmp_vendors),
|
||||
)
|
||||
except Exception as e:
|
||||
logger.warning("html_table parse failed: %s", e)
|
||||
|
||||
# B — cookies_table_parser auch auf gecrawltem Cookie-Text.
|
||||
# Erst Standard-Parse (Tab/Pipe-getrennt). Wenn der nichts
|
||||
# findet (kein Separator), Flat-Pattern-Parse fuer Sites wie
|
||||
# VW die ihre Tabelle als flachen Text liefern.
|
||||
if cookie_text and len(cookie_text) >= 500:
|
||||
try:
|
||||
from compliance.services.cookies_table_parser import (
|
||||
parse_cookie_table as _parse_ct,
|
||||
parse_flat_cookie_text as _parse_flat,
|
||||
)
|
||||
crawled_table_vendors = _parse_ct(cookie_text)
|
||||
if not crawled_table_vendors:
|
||||
crawled_table_vendors = _parse_flat(cookie_text)
|
||||
if crawled_table_vendors:
|
||||
existing = {(v.get("name") or "").strip().lower()
|
||||
for v in cmp_vendors}
|
||||
added_c = 0
|
||||
for v in crawled_table_vendors:
|
||||
nm = (v.get("name") or "").strip()
|
||||
if not nm or nm.lower() in existing:
|
||||
continue
|
||||
v.setdefault("source", "table_crawled")
|
||||
cmp_vendors.append(v)
|
||||
existing.add(nm.lower())
|
||||
added_c += 1
|
||||
if added_c:
|
||||
logger.info(
|
||||
"B Crawled-Tabellen-Parse: +%d Vendors "
|
||||
"(total: %d)",
|
||||
added_c, len(cmp_vendors),
|
||||
)
|
||||
except Exception as e:
|
||||
logger.warning("crawled-table-parse failed: %s", e)
|
||||
|
||||
# User-pasted Cookie-Tabelle (deterministisch, kein LLM):
|
||||
# die hat IMMER Vorrang weil 100% genau.
|
||||
if pasted_table_vendors:
|
||||
existing = {(v.get("name") or "").strip().lower()
|
||||
for v in cmp_vendors}
|
||||
added_p = 0
|
||||
for v in pasted_table_vendors:
|
||||
nm = (v.get("name") or "").strip()
|
||||
if not nm or nm.lower() in existing:
|
||||
continue
|
||||
cmp_vendors.append(v)
|
||||
existing.add(nm.lower())
|
||||
added_p += 1
|
||||
if added_p:
|
||||
logger.info(
|
||||
"Pasted-Tabellen-Merge: +%d Vendors (total: %d)",
|
||||
added_p, len(cmp_vendors),
|
||||
)
|
||||
|
||||
# Cookie-Library-Fallback (P52 Lite): wenn weiterhin wenige
|
||||
# Vendors aber viele after_accept-Cookies, aus Library auflösen.
|
||||
# VW-Lehre: 6 LLM-Grob-Vendors reichen NICHT — die Library
|
||||
# holt 30+ weitere aus den Cookie-Namen + Cookie-Doc-Pattern.
|
||||
# Schwelle: immer probieren wenn < 20 Vendors.
|
||||
if banner_result and len(cmp_vendors) < 20:
|
||||
try:
|
||||
from compliance.services.cookie_to_vendor_fallback import (
|
||||
fallback_vendors_for_run,
|
||||
)
|
||||
from database import SessionLocal as _SLfb
|
||||
_fb_db = _SLfb()
|
||||
try:
|
||||
extra = fallback_vendors_for_run(
|
||||
_fb_db, banner_result, len(cmp_vendors),
|
||||
cookie_doc_text=cookie_text,
|
||||
)
|
||||
if extra:
|
||||
existing_names = {(v.get("name") or "").strip().lower()
|
||||
for v in cmp_vendors}
|
||||
for v in extra:
|
||||
if v["name"].lower() in existing_names:
|
||||
continue
|
||||
cmp_vendors.append(v)
|
||||
logger.info(
|
||||
"Cookie-Library-Fallback: cmp_vendors %d -> %d",
|
||||
len(cmp_vendors) - len(extra), len(cmp_vendors),
|
||||
)
|
||||
finally:
|
||||
_fb_db.close()
|
||||
except Exception as e:
|
||||
logger.warning("Cookie-Library-Fallback skipped: %s", e)
|
||||
|
||||
# Vendor-Normalizer: Dedup (Google-Familie etc) + Garbage-Filter
|
||||
try:
|
||||
from compliance.services.vendor_normalizer import (
|
||||
normalize_vendors as _norm_v,
|
||||
)
|
||||
cmp_vendors = _norm_v(cmp_vendors)
|
||||
except Exception as e:
|
||||
logger.warning("vendor_normalizer skipped: %s", e)
|
||||
|
||||
# P50: enrich vendors with per-vendor detail-modal-extracts
|
||||
# (description, opt-out URL, privacy URL, cookies). Detail
|
||||
# comes from Phase G Info-button-click-through in /scan.
|
||||
@@ -918,14 +1168,38 @@ async def _run_compliance_check(check_id: str, req: ComplianceCheckRequest):
|
||||
from compliance.services.mc_scorecard import build_scorecard
|
||||
from .agent_doc_check_scorecard import build_scorecard_html
|
||||
all_mc_checks: list[dict] = []
|
||||
# P73: pro-doc Fails sammeln um Solution-Generator pro Doc-Type
|
||||
# mit dem korrekten doc_text aufzurufen.
|
||||
fails_by_doc: dict[str, list[dict]] = {}
|
||||
for r in results:
|
||||
for c in r.checks:
|
||||
if c.id.startswith("mc-"):
|
||||
all_mc_checks.append({
|
||||
rec = {
|
||||
"id": c.id, "label": c.label, "passed": c.passed,
|
||||
"severity": c.severity, "skipped": c.skipped,
|
||||
"regulation": c.regulation,
|
||||
})
|
||||
"hint": getattr(c, "hint", "") or "",
|
||||
}
|
||||
all_mc_checks.append(rec)
|
||||
if (not c.passed and not c.skipped
|
||||
and (c.severity or "").upper() in ("CRITICAL", "HIGH")):
|
||||
fails_by_doc.setdefault(r.doc_type, []).append(rec)
|
||||
# P106 — Audit-Type-Klassifizierung pro MC. Interne Prozess-/
|
||||
# Doku-Checks werden NICHT als FAIL gewertet sondern als CHECK
|
||||
# (manuelle Pruefung beim DSB notwendig).
|
||||
try:
|
||||
from compliance.services.mc_audit_type import (
|
||||
annotate_mc_results, split_by_audit_type,
|
||||
)
|
||||
annotate_mc_results(all_mc_checks)
|
||||
mc_split = split_by_audit_type(all_mc_checks)
|
||||
# Fails-by-doc neu aufbauen: nur noch echte verifiable Fails
|
||||
fails_by_doc = {}
|
||||
for r in mc_split.get("verifiable_fails") or []:
|
||||
fails_by_doc.setdefault("dse", []).append(r)
|
||||
except Exception as e:
|
||||
logger.warning("P106 mc_audit_type skipped: %s", e)
|
||||
mc_split = {"internal_checks": [], "verifiable_fails": all_mc_checks}
|
||||
scorecard = build_scorecard(all_mc_checks) if all_mc_checks else {}
|
||||
# Trend: load previous scorecard for the same tenant + domain so the
|
||||
# email can show delta indicators (A6).
|
||||
@@ -1043,11 +1317,345 @@ async def _run_compliance_check(check_id: str, req: ComplianceCheckRequest):
|
||||
except Exception as e:
|
||||
logger.warning("Scope-disclaimer block skipped: %s", e)
|
||||
|
||||
# P103 + P104 — Cookie-Value-Entropy + Network-Tracing (Stufe 3 + 4)
|
||||
entropy_html = ""
|
||||
network_trace_html = ""
|
||||
try:
|
||||
from compliance.services.cookie_value_entropy import (
|
||||
check_cookies_for_entropy_mismatch, build_entropy_block_html,
|
||||
)
|
||||
from compliance.services.cookie_network_tracer import (
|
||||
trace_cookie_network, build_network_trace_block_html,
|
||||
)
|
||||
cookies_detailed = (banner_result or {}).get("cookies_detailed") or []
|
||||
entropy_findings = check_cookies_for_entropy_mismatch(cookies_detailed)
|
||||
if entropy_findings:
|
||||
entropy_html = build_entropy_block_html(entropy_findings)
|
||||
logger.info("P103 Entropy: %d Findings", len(entropy_findings))
|
||||
primary_url = ""
|
||||
for e_ in doc_entries:
|
||||
if e_.get("url"):
|
||||
primary_url = e_["url"]; break
|
||||
net_findings = trace_cookie_network(cookies_detailed, primary_url)
|
||||
if net_findings:
|
||||
network_trace_html = build_network_trace_block_html(net_findings)
|
||||
logger.info("P104 Network-Trace: %d Findings", len(net_findings))
|
||||
except Exception as e:
|
||||
logger.warning("P103/P104 entropy/network-trace skipped: %s", e)
|
||||
|
||||
# P105 — IAB TCF Authority-Cross-Reference (Stufe 5)
|
||||
tcf_authority_html = ""
|
||||
try:
|
||||
from compliance.services.tcf_vendor_authority import (
|
||||
cross_reference_with_tcf, build_tcf_authority_block_html,
|
||||
)
|
||||
from database import SessionLocal as _SLtcf
|
||||
_tcf_db = _SLtcf()
|
||||
try:
|
||||
tcf_findings = cross_reference_with_tcf(_tcf_db, cmp_vendors)
|
||||
if tcf_findings:
|
||||
tcf_authority_html = build_tcf_authority_block_html(tcf_findings)
|
||||
logger.info(
|
||||
"TCF-Authority: %d Vendor-Discrepancies gefunden",
|
||||
len(tcf_findings),
|
||||
)
|
||||
finally:
|
||||
_tcf_db.close()
|
||||
except Exception as e:
|
||||
logger.warning("TCF-Authority-Check skipped: %s", e)
|
||||
|
||||
# COOKIE-COMPLIANCE-AUDIT (3-Quellen-Vergleich) — das ist der
|
||||
# zentrale USP: deklariert in Richtlinie vs tatsaechlich im
|
||||
# Browser geladen vs Library-Match.
|
||||
cookie_audit = {}
|
||||
cookie_audit_html = ""
|
||||
try:
|
||||
from compliance.services.cookie_compliance_audit import (
|
||||
audit_cookie_compliance, build_cookie_audit_block_html,
|
||||
)
|
||||
from database import SessionLocal as _SLca
|
||||
_ca_db = _SLca()
|
||||
try:
|
||||
cookie_audit = audit_cookie_compliance(
|
||||
_ca_db, doc_texts.get("cookie") or doc_texts.get("dse"),
|
||||
banner_result,
|
||||
)
|
||||
if cookie_audit and (cookie_audit.get("declared_count") or
|
||||
cookie_audit.get("browser_count")):
|
||||
cookie_audit_html = build_cookie_audit_block_html(cookie_audit)
|
||||
logger.info(
|
||||
"Cookie-Audit: %d deklariert, %d im Browser, "
|
||||
"%d undokumentiert, %d compliant",
|
||||
cookie_audit.get("declared_count"),
|
||||
cookie_audit.get("browser_count"),
|
||||
len(cookie_audit.get("undeclared_in_browser") or []),
|
||||
len(cookie_audit.get("compliant") or []),
|
||||
)
|
||||
finally:
|
||||
_ca_db.close()
|
||||
except Exception as e:
|
||||
logger.warning("cookie-compliance-audit skipped: %s", e)
|
||||
|
||||
# P102: Cookie-Klassifikations-Pruefung (deklariert vs Library)
|
||||
library_mismatch_html = ""
|
||||
mismatches: list[dict] = []
|
||||
try:
|
||||
from compliance.services.cookie_library_mismatch import (
|
||||
detect_mismatches, build_mismatch_block_html,
|
||||
)
|
||||
from database import SessionLocal
|
||||
cookie_doc_for_check = doc_texts.get("cookie") or doc_texts.get("dse") or ""
|
||||
all_cookies_seen: list[str] = []
|
||||
if banner_result:
|
||||
for ph in (banner_result.get("phases") or {}).values():
|
||||
if isinstance(ph, dict):
|
||||
for ck in (ph.get("cookies") or []):
|
||||
if isinstance(ck, str):
|
||||
all_cookies_seen.append(ck)
|
||||
elif isinstance(ck, dict) and ck.get("name"):
|
||||
all_cookies_seen.append(ck["name"])
|
||||
if all_cookies_seen and cookie_doc_for_check:
|
||||
_mm_db = SessionLocal()
|
||||
try:
|
||||
mismatches = detect_mismatches(
|
||||
_mm_db, all_cookies_seen, cookie_doc_for_check,
|
||||
)
|
||||
if mismatches:
|
||||
library_mismatch_html = build_mismatch_block_html(mismatches)
|
||||
logger.info(
|
||||
"P102: %d Cookie-Mismatches gefunden", len(mismatches)
|
||||
)
|
||||
finally:
|
||||
_mm_db.close()
|
||||
except Exception as e:
|
||||
logger.warning("P102 mismatch detection failed: %s", e)
|
||||
|
||||
# P35 + P77 + P78: Textsignal-Checks (Save-Label, Cookies-in-DSE,
|
||||
# JC-Klausel im DSE)
|
||||
signals_html = ""
|
||||
try:
|
||||
from compliance.services.doc_text_signals import (
|
||||
run_all as run_signal_checks,
|
||||
build_signals_block_html,
|
||||
)
|
||||
cookie_doc_missing = not bool(doc_texts.get("cookie"))
|
||||
sig_findings = run_signal_checks(
|
||||
banner_result, doc_texts, cookie_doc_missing,
|
||||
)
|
||||
if sig_findings:
|
||||
signals_html = build_signals_block_html(sig_findings)
|
||||
except Exception as e:
|
||||
logger.warning("P35/P77/P78 signals-check failed: %s", e)
|
||||
|
||||
# P92 + P94: Banner-Konsistenz (CMP-Tool kaputt / Banner-vs-Doc-Diff)
|
||||
consistency_html = ""
|
||||
try:
|
||||
from compliance.services.banner_consistency_checks import (
|
||||
run_all as run_consistency_checks,
|
||||
build_consistency_block_html,
|
||||
)
|
||||
cookie_doc_for_check = (doc_texts.get("cookie")
|
||||
or doc_texts.get("dse") or "")
|
||||
cons_findings = run_consistency_checks(
|
||||
banner_result or {}, cookie_doc_for_check, cmp_vendors,
|
||||
doc_texts=doc_texts,
|
||||
)
|
||||
if cons_findings:
|
||||
consistency_html = build_consistency_block_html(cons_findings)
|
||||
logger.info("P92/P94: %d Konsistenz-Findings", len(cons_findings))
|
||||
except Exception as e:
|
||||
logger.warning("P92/P94 consistency-check failed: %s", e)
|
||||
|
||||
# P73: MC-Solution-Generator — LLM-Vorschlaege pro HIGH-Fail.
|
||||
# Max 5 Solutions pro Doc-Type um Latenz < 60s zu halten.
|
||||
solutions_html = ""
|
||||
try:
|
||||
from compliance.services.mc_solution_generator import (
|
||||
generate_solutions_for_fails, build_solutions_block_html,
|
||||
)
|
||||
all_solutions: list[dict] = []
|
||||
for dt, fails in fails_by_doc.items():
|
||||
if not fails:
|
||||
continue
|
||||
doc_txt = doc_texts.get(dt) or doc_texts.get("dse") or ""
|
||||
if not doc_txt or len(doc_txt) < 500:
|
||||
continue
|
||||
sols = await generate_solutions_for_fails(
|
||||
fails, doc_txt, dt, limit=3,
|
||||
)
|
||||
all_solutions.extend(sols)
|
||||
if len(all_solutions) >= 8:
|
||||
break # global cap
|
||||
if all_solutions:
|
||||
solutions_html = build_solutions_block_html(all_solutions[:8])
|
||||
logger.info("P73: %d MC-Solutions generiert", len(all_solutions))
|
||||
except Exception as e:
|
||||
logger.warning("P73 MC-Solution-Generator skipped: %s", e)
|
||||
|
||||
# P71: JC-vs-AVV Entscheidungsbaum (nur wenn DSE ambig)
|
||||
jc_decision_html = ""
|
||||
try:
|
||||
from compliance.services.jc_avv_decision import (
|
||||
build_jc_avv_decision_html,
|
||||
)
|
||||
jc_decision_html = build_jc_avv_decision_html(doc_texts.get("dse"))
|
||||
except Exception as e:
|
||||
logger.warning("P71 jc_avv_decision skipped: %s", e)
|
||||
|
||||
# P6/P53/P55 — Branchen-Kontext + Site-History
|
||||
industry_ctx_html = ""
|
||||
try:
|
||||
from compliance.services.industry_library import (
|
||||
build_industry_context_block_html, load_site_profile,
|
||||
)
|
||||
from database import SessionLocal as _SLib
|
||||
_ind_db = _SLib()
|
||||
try:
|
||||
ind = (req.scan_context or {}).get("industry") if req.scan_context else None
|
||||
site_prof = load_site_profile(_ind_db, domain_for_exec or "")
|
||||
industry_ctx_html = build_industry_context_block_html(ind, site_prof)
|
||||
finally:
|
||||
_ind_db.close()
|
||||
except Exception as e:
|
||||
logger.warning("industry context skipped: %s", e)
|
||||
|
||||
# P106 — Internal-Checks-Block (interne Prozesse / Doku-Pflichten)
|
||||
internal_checks_html = ""
|
||||
try:
|
||||
from compliance.services.mc_audit_type import (
|
||||
build_internal_checks_block_html,
|
||||
)
|
||||
ic = (mc_split or {}).get("internal_checks") or []
|
||||
if ic:
|
||||
internal_checks_html = build_internal_checks_block_html(ic)
|
||||
logger.info(
|
||||
"P106: %d interne Checks (statt FAIL) im Block",
|
||||
len(ic),
|
||||
)
|
||||
except Exception as e:
|
||||
logger.warning("P106 internal_checks_html skipped: %s", e)
|
||||
|
||||
# P85 — Banner-Screenshot fuer visuellen Beweis (zwischen
|
||||
# GF-1-Pager und Detail-Bloecken)
|
||||
banner_shot_html = ""
|
||||
try:
|
||||
from compliance.services.banner_screenshot_block import (
|
||||
build_banner_screenshot_html,
|
||||
)
|
||||
banner_shot_html = build_banner_screenshot_html(banner_result)
|
||||
except Exception as e:
|
||||
logger.warning("P85 banner-screenshot skipped: %s", e)
|
||||
|
||||
# P82: GF-1-Pager ganz oben in der Mail — 5-Bullet-Zusammenfassung
|
||||
# damit die GF nicht 124k Char lesen muss.
|
||||
gf_one_pager_html = ""
|
||||
try:
|
||||
from compliance.services.gf_one_pager import build_gf_one_pager_html
|
||||
gf_one_pager_html = build_gf_one_pager_html(
|
||||
site_name=site_name_for_exec,
|
||||
scorecard=scorecard,
|
||||
previous_scorecard=prev_scorecard,
|
||||
banner_result=banner_result,
|
||||
library_mismatch_findings=mismatches,
|
||||
scan_context=req.scan_context,
|
||||
audit_quality_findings=audit_quality_findings,
|
||||
)
|
||||
except Exception as e:
|
||||
logger.warning("P82 GF-1-pager skipped: %s", e)
|
||||
|
||||
# A — Audit-Quality-Checks: Banner-Detect-Failure, Vendor-Extract
|
||||
# auffaellig duenn, URL-Fetch fehlgeschlagen → IMMER prominent zeigen.
|
||||
audit_quality_html = ""
|
||||
audit_quality_findings: list[dict] = []
|
||||
try:
|
||||
from compliance.services.audit_quality_checks import (
|
||||
run_all as run_audit_quality, build_audit_quality_block_html,
|
||||
)
|
||||
cookie_text_for_aq = doc_texts.get("cookie") or ""
|
||||
audit_quality_findings = run_audit_quality(
|
||||
banner_result, cookie_text_for_aq, cmp_vendors, doc_entries,
|
||||
)
|
||||
if audit_quality_findings:
|
||||
audit_quality_html = build_audit_quality_block_html(audit_quality_findings)
|
||||
logger.info(
|
||||
"audit-quality: %d Vorbehalte erkannt",
|
||||
len(audit_quality_findings),
|
||||
)
|
||||
except Exception as e:
|
||||
logger.warning("audit-quality-checks failed: %s", e)
|
||||
|
||||
# Doc-Input-Warnings — wenn User Text ins falsche Feld gepastet hat
|
||||
input_warn_html = ""
|
||||
try:
|
||||
from compliance.services.doc_input_warnings import (
|
||||
collect_warnings, build_warnings_block_html,
|
||||
)
|
||||
warns = collect_warnings(doc_entries)
|
||||
if warns:
|
||||
input_warn_html = build_warnings_block_html(warns)
|
||||
logger.info("doc-input-warnings: %d Mismatches gefunden", len(warns))
|
||||
except Exception as e:
|
||||
logger.warning("doc-input-warnings skipped: %s", e)
|
||||
|
||||
# P86: Branchen-Benchmark (nur wenn scan_context.industry gesetzt)
|
||||
bench_html = ""
|
||||
try:
|
||||
from database import SessionLocal as _SLb
|
||||
from compliance.services.industry_benchmark import (
|
||||
compute_benchmark, build_benchmark_html, _extract_score,
|
||||
)
|
||||
industry = (req.scan_context or {}).get("industry") if req.scan_context else None
|
||||
curr_score = _extract_score(banner_result)
|
||||
if industry and curr_score is not None:
|
||||
_b_db = _SLb()
|
||||
try:
|
||||
bench = compute_benchmark(
|
||||
_b_db, industry, curr_score, check_id,
|
||||
)
|
||||
if bench:
|
||||
bench_html = build_benchmark_html(bench)
|
||||
finally:
|
||||
_b_db.close()
|
||||
except Exception as e:
|
||||
logger.warning("P86 industry-benchmark skipped: %s", e)
|
||||
|
||||
# P84: Diff-Mode — "Seit letztem Lauf X Findings weg, Y neue".
|
||||
diff_html = ""
|
||||
try:
|
||||
from database import SessionLocal as _SL
|
||||
from compliance.services.run_diff import (
|
||||
compute_diff, build_diff_block_html,
|
||||
)
|
||||
_diff_db = _SL()
|
||||
try:
|
||||
diff = compute_diff(
|
||||
_diff_db, check_id, domain_for_exec or "",
|
||||
banner_result, scorecard,
|
||||
)
|
||||
if diff:
|
||||
diff_html = build_diff_block_html(diff)
|
||||
finally:
|
||||
_diff_db.close()
|
||||
except Exception as e:
|
||||
logger.warning("P84 diff-mode skipped: %s", e)
|
||||
|
||||
full_html = (
|
||||
critical_html + scope_disclaimer_html + exec_summary_html
|
||||
gf_one_pager_html + audit_quality_html + input_warn_html
|
||||
+ bench_html + diff_html
|
||||
+ critical_html + scope_disclaimer_html + exec_summary_html
|
||||
+ cookie_arch_html + summary_html + scanned_html + profile_html
|
||||
+ scorecard_html + redundancy_html
|
||||
+ providers_html + banner_deep_html + vvt_html + report_html
|
||||
+ scorecard_html + internal_checks_html + redundancy_html
|
||||
+ industry_ctx_html
|
||||
+ banner_shot_html
|
||||
+ providers_html + banner_deep_html
|
||||
+ cookie_audit_html
|
||||
+ tcf_authority_html
|
||||
+ entropy_html
|
||||
+ network_trace_html
|
||||
+ library_mismatch_html
|
||||
+ consistency_html + signals_html + solutions_html
|
||||
+ jc_decision_html
|
||||
+ vvt_html + report_html
|
||||
)
|
||||
|
||||
# Step 6: Send email — derive site name primarily from entered URL.
|
||||
@@ -1090,6 +1698,7 @@ async def _run_compliance_check(check_id: str, req: ComplianceCheckRequest):
|
||||
} if banner_result else None),
|
||||
"tcf_vendors": vvt_entries if tcf_vendors else [],
|
||||
"cmp_vendors": cmp_vendors,
|
||||
"cookie_audit": cookie_audit if cookie_audit else None,
|
||||
"total_documents": len(results),
|
||||
"total_findings": total_findings,
|
||||
"email_status": email_result.get("status", "failed"),
|
||||
@@ -1115,7 +1724,7 @@ async def _run_compliance_check(check_id: str, req: ComplianceCheckRequest):
|
||||
banner_result=banner_result,
|
||||
profile=profile,
|
||||
cmp_vendors=cmp_vendors,
|
||||
scan_context=None, # P79 will fill this
|
||||
scan_context=req.scan_context, # P79
|
||||
site_label=site_name,
|
||||
notes=f"recipient={req.recipient}",
|
||||
)
|
||||
@@ -1217,17 +1826,33 @@ async def _fetch_text(url: str, doc_type: str = "") -> tuple[str, list[dict]]:
|
||||
short_extract_types = {"cookie", "dse", "datenschutz", "privacy", "social_media"}
|
||||
max_docs = 1 if (doc_type or "") in short_extract_types else 3
|
||||
try:
|
||||
async with httpx.AsyncClient(timeout=120.0) as client:
|
||||
# P90: 120s reicht nicht fuer BMW-Impressum (Auto-Discovery folgt
|
||||
# 3 Sub-Docs). 240s gibt Spielraum. Mercedes faellt aktuell mit
|
||||
# 120s auch oft an Akamai-Latenz.
|
||||
async with httpx.AsyncClient(timeout=240.0) as client:
|
||||
resp = await client.post(
|
||||
f"{CONSENT_TESTER_URL}/dsi-discovery",
|
||||
json={"url": url, "max_documents": max_docs},
|
||||
timeout=120.0,
|
||||
timeout=240.0,
|
||||
)
|
||||
if resp.status_code == 200:
|
||||
payload = resp.json()
|
||||
docs = payload.get("documents", [])
|
||||
cmp_payloads = payload.get("cmp_payloads") or []
|
||||
cmp_cookie_text = payload.get("cmp_cookie_text") or ""
|
||||
# D — wenn der consent-tester HTML-Tabellen aus dem DOM
|
||||
# extrahiert hat, in die cmp_payloads als "generic_table"
|
||||
# einschleusen damit das Backend sie via cookies_table_parser
|
||||
# verarbeiten kann.
|
||||
for doc in (docs or []):
|
||||
for tbl in (doc.get("tables") or []):
|
||||
if not tbl or len(tbl) < 3:
|
||||
continue
|
||||
cmp_payloads.append({
|
||||
"kind": "html_table",
|
||||
"url": doc.get("url", ""),
|
||||
"rows": tbl,
|
||||
})
|
||||
if docs:
|
||||
texts = []
|
||||
for doc in docs:
|
||||
@@ -1256,8 +1881,24 @@ async def _fetch_text(url: str, doc_type: str = "") -> tuple[str, list[dict]]:
|
||||
logger.info("Merged %d docs from %s (%d words)",
|
||||
len(texts), url, len(merged.split()))
|
||||
return merged, cmp_payloads
|
||||
# P90-Bug-Fix: auch wenn DSE-Text zu kurz fuer 100-Wort-
|
||||
# Schwelle ist, die captured CMP-Payloads NICHT verwerfen.
|
||||
# BMW-Bug: DSE liefert 10 Wort SPA-Shell, aber ePaaS-JSON
|
||||
# (393KB) wurde captured. Backend braucht die fuer
|
||||
# extract_vendors_from_payloads (VVT-Tabelle).
|
||||
if cmp_payloads:
|
||||
logger.info(
|
||||
"P90: keeping %d CMP payloads for %s despite "
|
||||
"short text (%d words) — HTTP fallback runs in parallel",
|
||||
len(cmp_payloads), url,
|
||||
len((merged or cmp_cookie_text).split()),
|
||||
)
|
||||
fallback_text = merged or cmp_cookie_text or ""
|
||||
return fallback_text, cmp_payloads
|
||||
except Exception as e:
|
||||
logger.warning("Consent-tester fetch failed for %s: %s", url, e)
|
||||
# P90: verbose exception fuer Diagnose (war vorher empty)
|
||||
logger.warning("Consent-tester fetch failed for %s: %s (%s)",
|
||||
url, str(e) or "(empty)", type(e).__name__)
|
||||
|
||||
# 2. Fallback: direct HTTP fetch (works for SSR pages like BMW).
|
||||
# P7: kenntlicher UA + per-Domain Rate-Limit.
|
||||
@@ -1304,11 +1945,31 @@ async def _autodiscover_missing(
|
||||
"""
|
||||
from urllib.parse import urlparse
|
||||
|
||||
# Submitted doc_types (those the user actually entered URL or text for).
|
||||
# VW-Fix: nur Doc-Types mit substantieller Text-Ausbeute zaehlen
|
||||
# als 'submitted'. Wenn der User eine URL eingegeben hat aber die
|
||||
# 404 liefert (VW cookie-richtlinie.html), oder der Crawler weniger
|
||||
# als 200 Zeichen extrahiert (SPA-Shell), als 'missing' behandeln
|
||||
# damit der Discovery-Pass alternative URLs probiert.
|
||||
_MIN_USEFUL_CHARS = 200
|
||||
submitted_types = {
|
||||
e["doc_type"] for e in doc_entries
|
||||
if e.get("text") or (e.get("url") or "").strip()
|
||||
if len((e.get("text") or "").strip()) >= _MIN_USEFUL_CHARS
|
||||
}
|
||||
# Markiere die fehlgeschlagenen URL-Submissions damit der Discovery
|
||||
# ihre URL nicht erneut probiert (waere sinnlos).
|
||||
failed_urls: set[str] = {
|
||||
(e.get("url") or "").strip()
|
||||
for e in doc_entries
|
||||
if (e.get("url") or "").strip()
|
||||
and len((e.get("text") or "").strip()) < _MIN_USEFUL_CHARS
|
||||
}
|
||||
if failed_urls:
|
||||
logger.info(
|
||||
"VW-Fix: %d eingegebene URLs lieferten <%d Zeichen — Discovery "
|
||||
"soll Alternativen probieren: %s",
|
||||
len(failed_urls), _MIN_USEFUL_CHARS,
|
||||
", ".join(list(failed_urls)[:3]),
|
||||
)
|
||||
# Map alias types to canonical
|
||||
submitted_canon = {
|
||||
"dse" if t in ("datenschutz", "privacy") else t for t in submitted_types
|
||||
@@ -1380,11 +2041,11 @@ async def _autodiscover_missing(
|
||||
disc_cookie_texts: list[str] = []
|
||||
for base in crawl_bases:
|
||||
try:
|
||||
async with httpx.AsyncClient(timeout=180.0) as client:
|
||||
async with httpx.AsyncClient(timeout=300.0) as client: # P90: 180s -> 300s
|
||||
resp = await client.post(
|
||||
f"{CONSENT_TESTER_URL}/dsi-discovery",
|
||||
json={"url": base, "max_documents": 15},
|
||||
timeout=180.0,
|
||||
timeout=300.0, # P90: 180s -> 300s
|
||||
)
|
||||
if resp.status_code != 200:
|
||||
logger.warning("auto-discovery: HTTP %d for %s",
|
||||
@@ -1402,7 +2063,9 @@ async def _autodiscover_missing(
|
||||
len(body.get("cmp_payloads") or []),
|
||||
len(cmp_text.split()))
|
||||
except Exception as e:
|
||||
logger.warning("auto-discovery failed for %s: %s", base, e)
|
||||
# P90: verbose exception fuer Diagnose
|
||||
logger.warning("auto-discovery failed for %s: %s (%s)",
|
||||
base, str(e) or "(empty)", type(e).__name__)
|
||||
|
||||
# Classify each discovered doc into a canonical doc_type
|
||||
by_type: dict[str, dict] = {}
|
||||
@@ -1416,16 +2079,21 @@ async def _autodiscover_missing(
|
||||
if canon and canon in missing and canon not in by_type:
|
||||
by_type[canon] = d
|
||||
|
||||
# Append a new entry for every missing canonical type. Auto-discovered
|
||||
# Append/Update entry for every missing canonical type. Auto-discovered
|
||||
# ones get the text/URL filled; ungratched ones stay empty so the
|
||||
# padding step renders them as 'Auf der Website nicht gefunden'.
|
||||
# VW-Fix: wenn schon ein leerer entry existiert (URL gesetzt, aber
|
||||
# fetch hat 0/Mini-Text geliefert), in-place updaten statt duplizieren.
|
||||
filled = 0
|
||||
for dt in missing:
|
||||
new_entry: dict = {
|
||||
existing = next((e for e in doc_entries
|
||||
if e.get("doc_type") == dt), None)
|
||||
new_entry: dict = existing if existing else {
|
||||
"doc_type": dt, "url": "", "text": "", "word_count": 0,
|
||||
"auto_discovered": False, "discovery_attempted": True,
|
||||
"cmp_payloads": [],
|
||||
}
|
||||
new_entry["discovery_attempted"] = True
|
||||
d = by_type.get(dt)
|
||||
if d:
|
||||
full = d.get("full_text") or d.get("text_preview") or ""
|
||||
@@ -1444,21 +2112,24 @@ async def _autodiscover_missing(
|
||||
full = cmp_merged
|
||||
if len(full.split()) >= 100:
|
||||
new_entry["text"] = full
|
||||
# Behalte die original URL als "rejected_url" damit Audit
|
||||
# zeigt 'X war 404, wir haben Y gefunden'.
|
||||
if existing and (existing.get("url") or "").strip() in failed_urls:
|
||||
new_entry["rejected_url"] = existing.get("url")
|
||||
new_entry["url"] = d.get("url", "")
|
||||
new_entry["word_count"] = len(full.split())
|
||||
new_entry["auto_discovered"] = True
|
||||
# Auto-discovery happens on the HOMEPAGE — any CMP payload
|
||||
# captured at that level likely belongs to the cookie page
|
||||
# (CMP widget loaded site-wide). Attach to 'cookie' entry.
|
||||
if dt == "cookie" and disc_payloads:
|
||||
new_entry["cmp_payloads"] = disc_payloads
|
||||
doc_texts[dt] = full
|
||||
filled += 1
|
||||
logger.info(
|
||||
"auto-discovered %s on %s: %s (%d words)",
|
||||
"auto-discovered %s on %s: %s (%d words)%s",
|
||||
dt, base, d.get("url", "")[:80], new_entry["word_count"],
|
||||
" [REPLACED failed URL]" if existing else "",
|
||||
)
|
||||
doc_entries.append(new_entry)
|
||||
if not existing:
|
||||
doc_entries.append(new_entry)
|
||||
|
||||
logger.info(
|
||||
"auto-discovery: filled %d/%d missing types from %s",
|
||||
@@ -1799,6 +2470,12 @@ _DOC_TYPE_LABELS = {
|
||||
"social_media": "Social Media Datenschutz",
|
||||
"nutzungsbedingungen": "Nutzungsbedingungen",
|
||||
"dsb": "DSB-Kontakt",
|
||||
# P74: Legal-Notice / Rechtliche Hinweise (IP, Forward-Looking, Risiko)
|
||||
"legal_notice": "Rechtliche Hinweise",
|
||||
# P96: Digital Services Act-Pflichtangaben (Art. 12+17 DSA)
|
||||
"dsa": "DSA-Pflichtangaben",
|
||||
# P97: Lizenzhinweise Dritter (OSS-Compliance)
|
||||
"lizenzhinweise": "Lizenzhinweise Dritter",
|
||||
}
|
||||
|
||||
# Canonical doc types in the same order as the frontend ComplianceCheckTab.
|
||||
|
||||
@@ -26,6 +26,47 @@ def _fmt_eur_range(low: int, high: int) -> str:
|
||||
return f"{low:,}–{high:,} €".replace(",", ".")
|
||||
|
||||
|
||||
def _build_score_band_block(pct: int, color: str) -> list[str]:
|
||||
"""P34 — eine Zeile unter den KPIs: Score-Einordnung."""
|
||||
band, hint = _score_band_explanation(pct)
|
||||
return [
|
||||
f'<div style="margin-top:10px;padding:10px 14px;'
|
||||
f'background:rgba(255,255,255,0.04);border-left:3px solid {color};'
|
||||
f'border-radius:4px">'
|
||||
f'<div style="font-size:11px;color:#cbd5e1">'
|
||||
f'<strong style="color:{color}">{band} ({pct}%)</strong> — {hint}'
|
||||
f'</div></div>',
|
||||
]
|
||||
|
||||
|
||||
def _score_band_explanation(pct: int) -> tuple[str, str]:
|
||||
"""P34 — Was bedeutet der Score: wo MUESSTE man stehen.
|
||||
|
||||
Returns (label, what_to_expect)."""
|
||||
if pct >= 85:
|
||||
return (
|
||||
"Sehr gut", "Praxis-uebliche DSGVO-Risikolage. "
|
||||
"Standard-Pflege reicht — jaehrliche Pruefung empfohlen.",
|
||||
)
|
||||
if pct >= 70:
|
||||
return (
|
||||
"Akzeptabel", "Branchen-Median. Verbleibende Findings sind "
|
||||
"meist Formalia — Empfehlung: einmaliges Aufraeumen, dann "
|
||||
"Halbjahres-Check.",
|
||||
)
|
||||
if pct >= 50:
|
||||
return (
|
||||
"Handlungsbedarf", "Mehrere wesentliche Themen offen. "
|
||||
"Empfehlung: priorisierte Abarbeitung der HIGH-Findings "
|
||||
"binnen 4-8 Wochen mit DSB + Web-Team.",
|
||||
)
|
||||
return (
|
||||
"Erhoehtes Risiko", "Mehrere Kern-Pflichten fehlen oder sind "
|
||||
"veraltet. Empfehlung: kurzfristiger Termin mit DSB / Rechtsabteilung "
|
||||
"und Web-Team zur Priorisierung.",
|
||||
)
|
||||
|
||||
|
||||
def build_exec_summary_html(
|
||||
scorecard: dict | None,
|
||||
previous_scorecard: dict | None,
|
||||
@@ -117,6 +158,9 @@ def build_exec_summary_html(
|
||||
|
||||
'</table>',
|
||||
|
||||
# P34 — Score-Einordnung "wer wo stehen muss"
|
||||
*(_build_score_band_block(pct, score_color) if scorecard else []),
|
||||
|
||||
# CTAs
|
||||
'<div style="margin-top:14px;padding-top:12px;border-top:1px solid '
|
||||
'rgba(255,255,255,0.1);text-align:center">',
|
||||
|
||||
@@ -98,19 +98,35 @@ def _load_template(db: Session, document_type: str) -> dict[str, Any] | None:
|
||||
}
|
||||
|
||||
|
||||
def _render_one(db: Session, doc_type: str, context: dict[str, Any]) -> DocumentResult | None:
|
||||
def _safe_slug(name: str) -> str:
|
||||
"""Erzeugt einen filename-tauglichen Slug aus einem Namen."""
|
||||
import re as _re
|
||||
s = _re.sub(r"[^a-zA-Z0-9_-]+", "_", name.strip())
|
||||
return s.strip("_") or "Person"
|
||||
|
||||
|
||||
def _render_one(
|
||||
db: Session,
|
||||
doc_type: str,
|
||||
context: dict[str, Any],
|
||||
name_suffix: str = "",
|
||||
) -> DocumentResult | None:
|
||||
template = _load_template(db, doc_type)
|
||||
if not template:
|
||||
logger.warning("No template found for document_type=%s", doc_type)
|
||||
return None
|
||||
rendered_md = render_template(template["content"], context)
|
||||
title = template.get("title") or DOC_TITLES.get(doc_type, doc_type)
|
||||
if name_suffix:
|
||||
title = f"{title} — {name_suffix}"
|
||||
docx_bytes = markdown_to_docx_bytes(rendered_md, title=None)
|
||||
from datetime import datetime
|
||||
suffix_slug = f"_{_safe_slug(name_suffix)}" if name_suffix else ""
|
||||
company_slug = _safe_slug(context.get("COMPANY_NAME", "Unternehmen"))
|
||||
return DocumentResult(
|
||||
document_type=doc_type,
|
||||
title=title,
|
||||
filename=f"{doc_type}_{context.get('COMPANY_NAME', 'Unternehmen')}.docx".replace(" ", "_"),
|
||||
filename=f"{doc_type}{suffix_slug}_{company_slug}.docx",
|
||||
content_base64=base64.b64encode(docx_bytes).decode("ascii"),
|
||||
size_bytes=len(docx_bytes),
|
||||
generated_at=datetime.utcnow().isoformat() + "Z",
|
||||
@@ -118,6 +134,56 @@ def _render_one(db: Session, doc_type: str, context: dict[str, Any]) -> Document
|
||||
)
|
||||
|
||||
|
||||
# Dokumente die PRO Person (Gründer/GF) generiert werden
|
||||
PER_PERSON_DOCS = {
|
||||
"ip_assignment_agreement", # Pro Gründer einer (individuelles IP)
|
||||
"managing_director_employment_contract", # Pro GF einer
|
||||
}
|
||||
|
||||
|
||||
def _build_person_context(
|
||||
base_ctx: dict[str, Any],
|
||||
person: dict[str, Any],
|
||||
doc_type: str,
|
||||
gf_contract: dict[str, Any] | None = None,
|
||||
) -> dict[str, Any]:
|
||||
"""Erweitert base_context um person-spezifische Felder fuer Per-Person-Dokumente."""
|
||||
ctx = dict(base_ctx)
|
||||
name = person.get("name", "")
|
||||
ctx["ASSIGNOR_NAME"] = name
|
||||
ctx["ASSIGNOR_BIRTHDATE"] = person.get("geburtsdatum", "")
|
||||
ctx["ASSIGNOR_ADDRESS"] = person.get("adresse", "")
|
||||
ctx["ASSIGNOR_ROLE"] = person.get("internal_role") or "Gründer und Geschäftsführer"
|
||||
ctx["HAS_ACADEMIC_BACKGROUND"] = bool(person.get("has_academic_background"))
|
||||
# GF-Vertrag spezifisch
|
||||
ctx["GF_NAME"] = name
|
||||
ctx["GF_BIRTHDATE"] = person.get("geburtsdatum", "")
|
||||
ctx["GF_ADDRESS"] = person.get("adresse", "")
|
||||
ctx["GF_INTERNAL_TITLE"] = person.get("internal_role", "Geschäftsführer")
|
||||
# IP-Bereiche: Person-spezifisch wenn vorhanden
|
||||
ip_areas = person.get("ip_areas") or []
|
||||
if ip_areas:
|
||||
if isinstance(ip_areas, list):
|
||||
ctx["IP_LIST_DETAILS"] = "\n".join(
|
||||
f"- {area}" for area in ip_areas
|
||||
)
|
||||
else:
|
||||
ctx["IP_LIST_DETAILS"] = str(ip_areas)
|
||||
# GF-Contract Daten anwenden wenn vorhanden
|
||||
if gf_contract:
|
||||
if gf_contract.get("gross_annual_salary_eur"):
|
||||
ctx["GROSS_ANNUAL_SALARY_EUR"] = f"{gf_contract['gross_annual_salary_eur']:,}".replace(",", ".")
|
||||
ctx["HAS_BONUS"] = bool(gf_contract.get("has_bonus"))
|
||||
ctx["HAS_COMPANY_CAR"] = bool(gf_contract.get("has_company_car"))
|
||||
ctx["HAS_BAV"] = bool(gf_contract.get("has_bav"))
|
||||
ctx["VACATION_DAYS"] = gf_contract.get("vacation_days", 30)
|
||||
ctx["KUENDIGUNGSFRIST_GESELLSCHAFT_MONATE"] = gf_contract.get("kuendigungsfrist_gesellschaft_monate", 6)
|
||||
ctx["KUENDIGUNGSFRIST_GF_MONATE"] = gf_contract.get("kuendigungsfrist_gf_monate", 3)
|
||||
ctx["HAS_PARA_181_RELEASE"] = bool(gf_contract.get("para_181_release"))
|
||||
ctx["SV_STATUS"] = gf_contract.get("sv_status", "sozialversicherungsfrei")
|
||||
return ctx
|
||||
|
||||
|
||||
@router.post("/generate", response_model=GenerationResponse)
|
||||
def generate_documents(req: GenerationRequest, request: Request) -> GenerationResponse:
|
||||
"""Hauptendpunkt: nimmt Wizard-State entgegen, generiert DOCX fuer alle ausgewaehlten Dokumente."""
|
||||
@@ -130,12 +196,47 @@ def generate_documents(req: GenerationRequest, request: Request) -> GenerationRe
|
||||
results: list[DocumentResult] = []
|
||||
warnings: list[str] = []
|
||||
|
||||
# Gesellschafter + GF-Listen aus Request
|
||||
gesellschafter = req.gesellschafter
|
||||
gf_list = [g for g in gesellschafter if g.get("is_geschaeftsfuehrer")]
|
||||
gf_contracts_map = {
|
||||
c["gesellschafter_id"]: c
|
||||
for c in req.gf_contracts
|
||||
if c.get("gesellschafter_id")
|
||||
}
|
||||
|
||||
for doc_type in req.selected_documents:
|
||||
result = _render_one(db, doc_type, context)
|
||||
if result is None:
|
||||
warnings.append(f"Template '{doc_type}' nicht in Datenbank gefunden")
|
||||
continue
|
||||
results.append(result)
|
||||
if doc_type in PER_PERSON_DOCS:
|
||||
# Pro Person ein Dokument
|
||||
if doc_type == "ip_assignment_agreement":
|
||||
# IP-Assignment: pro Gründer (alle Gesellschafter, nicht nur GFs)
|
||||
persons = gesellschafter or [{}]
|
||||
elif doc_type == "managing_director_employment_contract":
|
||||
# GF-Vertrag: nur pro GF
|
||||
persons = gf_list or [{}]
|
||||
else:
|
||||
persons = [{}]
|
||||
if not persons:
|
||||
warnings.append(f"Keine Personen für '{doc_type}' vorhanden")
|
||||
continue
|
||||
for p in persons:
|
||||
contract = gf_contracts_map.get(p.get("id"))
|
||||
person_ctx = _build_person_context(context, p, doc_type, contract)
|
||||
result = _render_one(
|
||||
db, doc_type, person_ctx,
|
||||
name_suffix=p.get("name", "")
|
||||
)
|
||||
if result is None:
|
||||
warnings.append(f"Template '{doc_type}' nicht in Datenbank gefunden")
|
||||
break
|
||||
results.append(result)
|
||||
else:
|
||||
# Standard: ein Dokument pro Auswahl
|
||||
result = _render_one(db, doc_type, context)
|
||||
if result is None:
|
||||
warnings.append(f"Template '{doc_type}' nicht in Datenbank gefunden")
|
||||
continue
|
||||
results.append(result)
|
||||
|
||||
if not results:
|
||||
raise HTTPException(
|
||||
|
||||
@@ -0,0 +1,306 @@
|
||||
"""License attribution endpoints — Task #23 Stufe 1-4.
|
||||
|
||||
The audit (Task #22) classified all 314,811 canonical_controls into
|
||||
license_rule 1/2/3. The frontend, PDF renderer, and tech-file generator
|
||||
now need to surface that classification in the form of:
|
||||
|
||||
- Stufe 1: a global /licenses overview page
|
||||
- Stufe 2: an auto-footer in every exported PDF
|
||||
- Stufe 3: an inline source badge on every rendered hazard/measure
|
||||
- Stufe 4: a sources appendix in tech-file bundles
|
||||
|
||||
This module exposes three endpoints that all four stages consume:
|
||||
|
||||
GET /api/compliance/licenses/overview
|
||||
Global aggregation by rule + per-source counts. Drives Stufe 1.
|
||||
|
||||
POST /api/compliance/licenses/aggregate
|
||||
Body: {"control_uuids": ["uuid1", ...]}.
|
||||
Returns per-rule grouping with source breakdown. Used by PDF
|
||||
footer (Stufe 2) and tech-file appendix (Stufe 4) to build the
|
||||
"sources used in this document" list.
|
||||
|
||||
GET /api/compliance/licenses/source-info/{control_uuid}
|
||||
Single-control lookup for the inline source badge tooltip
|
||||
(Stufe 3). Returns rule, source regulation, attribution text.
|
||||
|
||||
Why a new module instead of extending canonical_control_routes:
|
||||
- canonical_control_routes serves the legacy SPDX-style license matrix
|
||||
(canonical_control_licenses + canonical_control_sources, ~10 rows).
|
||||
- This module is built on regulation_registry (252 rows) + the
|
||||
license_rule on each control. Both schemas coexist; this module
|
||||
doesn't disturb the legacy endpoints.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
from typing import Any, Optional
|
||||
from uuid import UUID
|
||||
|
||||
from fastapi import APIRouter, Depends, HTTPException
|
||||
from pydantic import BaseModel
|
||||
from sqlalchemy import text
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from classroom_engine.database import get_db
|
||||
|
||||
router = APIRouter(prefix="/licenses", tags=["licenses"])
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# Rule labels — used by frontend renderer
|
||||
# ============================================================================
|
||||
|
||||
RULE_LABELS = {
|
||||
1: {
|
||||
"code": "R1",
|
||||
"label_de": "Wörtlich übernehmbar",
|
||||
"label_en": "Verbatim, no attribution required",
|
||||
"render_full_text": True,
|
||||
"attribution_required": False,
|
||||
},
|
||||
2: {
|
||||
"code": "R2",
|
||||
"label_de": "Wörtlich mit Attribution",
|
||||
"label_en": "Verbatim with attribution",
|
||||
"render_full_text": True,
|
||||
"attribution_required": True,
|
||||
},
|
||||
3: {
|
||||
"code": "R3",
|
||||
"label_de": "Nur Identifier zitieren",
|
||||
"label_en": "Identifier citation only",
|
||||
"render_full_text": False,
|
||||
"attribution_required": False,
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# Response Schemas
|
||||
# ============================================================================
|
||||
|
||||
|
||||
class SourceCount(BaseModel):
|
||||
regulation_id: str
|
||||
regulation_name_de: Optional[str]
|
||||
license_rule: int
|
||||
license_type: Optional[str]
|
||||
attribution: Optional[str]
|
||||
jurisdiction: Optional[str]
|
||||
source_type: Optional[str]
|
||||
n_controls: int
|
||||
|
||||
|
||||
class RuleBucket(BaseModel):
|
||||
rule: int
|
||||
label_de: str
|
||||
label_en: str
|
||||
attribution_required: bool
|
||||
render_full_text: bool
|
||||
total_controls: int
|
||||
distinct_sources: int
|
||||
sources: list[SourceCount]
|
||||
|
||||
|
||||
class OverviewResponse(BaseModel):
|
||||
total_controls: int
|
||||
buckets: list[RuleBucket]
|
||||
|
||||
|
||||
class AggregateRequest(BaseModel):
|
||||
control_uuids: list[UUID]
|
||||
|
||||
|
||||
class AggregateResponse(BaseModel):
|
||||
total_in_request: int
|
||||
matched: int
|
||||
buckets: list[RuleBucket]
|
||||
|
||||
|
||||
class SourceInfo(BaseModel):
|
||||
control_uuid: UUID
|
||||
license_rule: Optional[int]
|
||||
license_label_de: Optional[str]
|
||||
attribution_required: bool
|
||||
render_full_text: bool
|
||||
regulation_id: Optional[str]
|
||||
regulation_name_de: Optional[str]
|
||||
license_type: Optional[str]
|
||||
attribution: Optional[str]
|
||||
source_url: Optional[str]
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# Endpoints
|
||||
# ============================================================================
|
||||
|
||||
|
||||
def _bucket(rule: int, sources: list[SourceCount]) -> RuleBucket:
|
||||
meta = RULE_LABELS.get(rule, RULE_LABELS[3])
|
||||
return RuleBucket(
|
||||
rule=rule,
|
||||
label_de=meta["label_de"],
|
||||
label_en=meta["label_en"],
|
||||
attribution_required=meta["attribution_required"],
|
||||
render_full_text=meta["render_full_text"],
|
||||
total_controls=sum(s.n_controls for s in sources),
|
||||
distinct_sources=len(sources),
|
||||
sources=sources,
|
||||
)
|
||||
|
||||
|
||||
@router.get("/overview", response_model=OverviewResponse)
|
||||
def licenses_overview(db: Session = Depends(get_db)) -> OverviewResponse:
|
||||
"""Global aggregation: total controls by rule, with per-source breakdown.
|
||||
|
||||
Drives Stufe 1 (the /licenses page).
|
||||
"""
|
||||
rows = db.execute(text("""
|
||||
SELECT
|
||||
COALESCE(cpl.source_regulation, '(no source)') AS regulation_name,
|
||||
cc.license_rule,
|
||||
COUNT(DISTINCT cc.id) AS n
|
||||
FROM compliance.canonical_controls cc
|
||||
LEFT JOIN compliance.control_parent_links cpl ON cpl.control_uuid = cc.id
|
||||
WHERE cc.license_rule IS NOT NULL
|
||||
GROUP BY 1, 2
|
||||
""")).fetchall()
|
||||
|
||||
reg_rows = db.execute(text("""
|
||||
SELECT regulation_name_de, regulation_id, license_type, attribution,
|
||||
jurisdiction, source_type
|
||||
FROM compliance.regulation_registry
|
||||
""")).fetchall()
|
||||
reg_by_name = {r.regulation_name_de: r for r in reg_rows if r.regulation_name_de}
|
||||
|
||||
by_rule: dict[int, list[SourceCount]] = {1: [], 2: [], 3: []}
|
||||
seen: dict[tuple[int, str], int] = {}
|
||||
total = 0
|
||||
for row in rows:
|
||||
rule = int(row.license_rule)
|
||||
name = row.regulation_name
|
||||
n = int(row.n)
|
||||
key = (rule, name)
|
||||
# multiple cpl entries per control deduplicate via DISTINCT, but a
|
||||
# control with several source_regulations still gets counted once
|
||||
# per regulation — that's the design.
|
||||
seen[key] = seen.get(key, 0) + n
|
||||
total += n
|
||||
|
||||
for (rule, name), n in seen.items():
|
||||
reg = reg_by_name.get(name)
|
||||
by_rule.setdefault(rule, []).append(SourceCount(
|
||||
regulation_id=reg.regulation_id if reg else name,
|
||||
regulation_name_de=name,
|
||||
license_rule=rule,
|
||||
license_type=reg.license_type if reg else None,
|
||||
attribution=reg.attribution if reg else None,
|
||||
jurisdiction=reg.jurisdiction if reg else None,
|
||||
source_type=reg.source_type if reg else None,
|
||||
n_controls=n,
|
||||
))
|
||||
|
||||
for r in by_rule.values():
|
||||
r.sort(key=lambda s: -s.n_controls)
|
||||
buckets = [_bucket(rule, sources) for rule, sources in sorted(by_rule.items())]
|
||||
return OverviewResponse(total_controls=total, buckets=buckets)
|
||||
|
||||
|
||||
@router.post("/aggregate", response_model=AggregateResponse)
|
||||
def aggregate_for_controls(
|
||||
body: AggregateRequest,
|
||||
db: Session = Depends(get_db),
|
||||
) -> AggregateResponse:
|
||||
"""Per-control license aggregation for PDF footer (Stufe 2) and
|
||||
tech-file sources appendix (Stufe 4).
|
||||
|
||||
Returns a per-rule breakdown of which sources contributed to the
|
||||
supplied control set. The frontend renderer turns this into the
|
||||
"Verwendete Quellen" footer.
|
||||
"""
|
||||
if not body.control_uuids:
|
||||
return AggregateResponse(total_in_request=0, matched=0, buckets=[])
|
||||
|
||||
rows = db.execute(text("""
|
||||
SELECT
|
||||
COALESCE(cpl.source_regulation, '(unknown)') AS regulation_name,
|
||||
cc.license_rule,
|
||||
COUNT(DISTINCT cc.id) AS n
|
||||
FROM compliance.canonical_controls cc
|
||||
LEFT JOIN compliance.control_parent_links cpl ON cpl.control_uuid = cc.id
|
||||
WHERE cc.id = ANY(:ids) AND cc.license_rule IS NOT NULL
|
||||
GROUP BY 1, 2
|
||||
"""), {"ids": [str(u) for u in body.control_uuids]}).fetchall()
|
||||
|
||||
reg_rows = db.execute(text("""
|
||||
SELECT regulation_name_de, regulation_id, license_type, attribution,
|
||||
jurisdiction, source_type
|
||||
FROM compliance.regulation_registry
|
||||
""")).fetchall()
|
||||
reg_by_name = {r.regulation_name_de: r for r in reg_rows if r.regulation_name_de}
|
||||
|
||||
by_rule: dict[int, list[SourceCount]] = {1: [], 2: [], 3: []}
|
||||
matched_total = 0
|
||||
for row in rows:
|
||||
rule = int(row.license_rule)
|
||||
n = int(row.n)
|
||||
matched_total += n
|
||||
reg = reg_by_name.get(row.regulation_name)
|
||||
by_rule.setdefault(rule, []).append(SourceCount(
|
||||
regulation_id=reg.regulation_id if reg else row.regulation_name,
|
||||
regulation_name_de=row.regulation_name,
|
||||
license_rule=rule,
|
||||
license_type=reg.license_type if reg else None,
|
||||
attribution=reg.attribution if reg else None,
|
||||
jurisdiction=reg.jurisdiction if reg else None,
|
||||
source_type=reg.source_type if reg else None,
|
||||
n_controls=n,
|
||||
))
|
||||
for r in by_rule.values():
|
||||
r.sort(key=lambda s: -s.n_controls)
|
||||
buckets = [_bucket(rule, sources) for rule, sources in sorted(by_rule.items()) if sources]
|
||||
return AggregateResponse(
|
||||
total_in_request=len(body.control_uuids),
|
||||
matched=matched_total,
|
||||
buckets=buckets,
|
||||
)
|
||||
|
||||
|
||||
@router.get("/source-info/{control_uuid}", response_model=SourceInfo)
|
||||
def source_info_for_control(
|
||||
control_uuid: UUID,
|
||||
db: Session = Depends(get_db),
|
||||
) -> SourceInfo:
|
||||
"""Single-control source info for the inline source badge (Stufe 3).
|
||||
|
||||
Used by the React `<SourceBadge>` component to populate its tooltip.
|
||||
"""
|
||||
row = db.execute(text("""
|
||||
SELECT cc.license_rule, cpl.source_regulation AS regulation_name,
|
||||
r.regulation_id, r.license_type, r.attribution, r.url AS source_url
|
||||
FROM compliance.canonical_controls cc
|
||||
LEFT JOIN compliance.control_parent_links cpl ON cpl.control_uuid = cc.id
|
||||
LEFT JOIN compliance.regulation_registry r ON r.regulation_name_de = cpl.source_regulation
|
||||
WHERE cc.id = :uuid
|
||||
LIMIT 1
|
||||
"""), {"uuid": str(control_uuid)}).fetchone()
|
||||
if row is None:
|
||||
raise HTTPException(status_code=404, detail="control not found")
|
||||
|
||||
rule = int(row.license_rule) if row.license_rule is not None else None
|
||||
meta = RULE_LABELS.get(rule, {}) if rule else {}
|
||||
return SourceInfo(
|
||||
control_uuid=control_uuid,
|
||||
license_rule=rule,
|
||||
license_label_de=meta.get("label_de"),
|
||||
attribution_required=meta.get("attribution_required", False),
|
||||
render_full_text=meta.get("render_full_text", False),
|
||||
regulation_id=row.regulation_id,
|
||||
regulation_name_de=row.regulation_name,
|
||||
license_type=row.license_type,
|
||||
attribution=row.attribution,
|
||||
source_url=row.source_url,
|
||||
)
|
||||
@@ -0,0 +1,50 @@
|
||||
{
|
||||
"source": "Verordnung (EU) 2015/758 - eCall",
|
||||
"official_url": "https://eur-lex.europa.eu/legal-content/DE/TXT/?uri=CELEX%3A32015R0758",
|
||||
"ingest_for": "RAG-Korpus (Compliance fuer Automotive-OEMs)",
|
||||
"chunks": [
|
||||
{
|
||||
"id": "ecall-art-3-1",
|
||||
"title": "Art. 3 (1) — bordeigenes eCall-System",
|
||||
"text": "Hersteller stellen sicher, dass alle neuen Typen von Personenkraftwagen und leichten Nutzfahrzeugen mit einem auf 112 basierten bordeigenen eCall-System ausgestattet sind, das den in dieser Verordnung festgelegten Anforderungen und harmonisierten Normen entspricht."
|
||||
},
|
||||
{
|
||||
"id": "ecall-art-6-1",
|
||||
"title": "Art. 6 (1) — Datenschutz",
|
||||
"text": "Bei der Verarbeitung personenbezogener Daten ueber das auf 112 basierte bordeigene eCall-System gewaehrleisten Hersteller die Einhaltung der Richtlinie 95/46/EG und der RL 2002/58/EG. Insbesondere muessen Fahrzeughalter darueber informiert werden, dass das System dauerhaft im Standby-Modus ist und im Falle eines schweren Unfalls automatisch ausgeloest wird."
|
||||
},
|
||||
{
|
||||
"id": "ecall-art-6-2",
|
||||
"title": "Art. 6 (2) — Datenverarbeitung",
|
||||
"text": "Die Verarbeitung personenbezogener Daten ueber das auf 112 basierte bordeigene eCall-System darf nur zum Zwecke der Bearbeitung von Notrufen erfolgen. Diese Daten sind unmittelbar nach Bearbeitung des Notrufs ohne automatisierte Speicherung zu loeschen, soweit nicht anders gesetzlich vorgesehen."
|
||||
},
|
||||
{
|
||||
"id": "ecall-art-6-3",
|
||||
"title": "Art. 6 (3) — Standortdaten",
|
||||
"text": "Die Standortdaten des Fahrzeugs werden zur Behandlung des Notrufes uebermittelt. Eine permanente Standortueberwachung ausserhalb von Notfaellen ist nicht zulaessig."
|
||||
},
|
||||
{
|
||||
"id": "ecall-art-6-4",
|
||||
"title": "Art. 6 (4) — Informationspflicht",
|
||||
"text": "Hersteller stellen sicher, dass in der technischen Dokumentation des Fahrzeugs klare und vollstaendige Informationen ueber die Verarbeitung personenbezogener Daten gegeben werden, einschliesslich des Rechts der betroffenen Person auf Auskunft und gegebenenfalls Berichtigung sowie Sperrung der sie betreffenden personenbezogenen Daten."
|
||||
},
|
||||
{
|
||||
"id": "ecall-art-6-5",
|
||||
"title": "Art. 6 (5) — Mehrwertdienste",
|
||||
"text": "Mehrwertdienste (z.B. private Pannenruf-Apps) duerfen nur mit ausdruecklicher Einwilligung des Fahrzeughalters in Anspruch genommen werden. Das auf 112 basierte bordeigene eCall-System darf nicht von diesen Mehrwertdiensten beeintraechtigt werden und muss kostenlos und fuer alle Fahrzeughalter verfuegbar sein."
|
||||
},
|
||||
{
|
||||
"id": "ecall-art-7",
|
||||
"title": "Art. 7 — Datenfluss",
|
||||
"text": "Der Mindestdatensatz (MSD) umfasst Fahrzeug-ID (VIN), Ausloesungsart, Zeitstempel, Standort, Fahrtrichtung, Antriebsenergie, Anzahl angeschnallter Insassen. Diese Daten gehen an die naechste oeffentliche Notrufabfragestelle (PSAP)."
|
||||
}
|
||||
],
|
||||
"compliance_implications": {
|
||||
"automotive_oem": [
|
||||
"Hersteller MUSS in der DSE den eCall-Datenfluss erklaeren (Art. 6 (4)).",
|
||||
"Standortdaten ausserhalb von Notfaellen sind UNZULAESSIG (Art. 6 (3)).",
|
||||
"Mehrwertdienste brauchen separate ausdrueckliche Einwilligung (Art. 6 (5)).",
|
||||
"Daten nach Notruf-Bearbeitung SOFORT zu loeschen (Art. 6 (2))."
|
||||
]
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,213 @@
|
||||
"""
|
||||
A — Audit-Transparenz / Audit-Quality-Checks.
|
||||
|
||||
Wenn der Crawler nicht alles gefunden hat, MUSS die Mail das prominent
|
||||
zeigen — sonst denkt der User 'alles gut' obwohl die Datenlage Luecken
|
||||
hat.
|
||||
|
||||
Erkennt 4 Quality-Failures:
|
||||
1. banner_detected=False trotz vorhandenem Cookie-Doc → CMP-Tool ungeladen
|
||||
2. cookie_doc >= 30k chars aber cmp_vendors < 10 → Vendor-Extract unvollstaendig
|
||||
3. doc_text submitted aber 0 chars geladen → Crawler-Failure
|
||||
4. cmp_vendors > 0 aber alle aus llm_cascade ohne Library-Match → vermutl. unvollstaendig
|
||||
|
||||
Diese Findings landen IMMER im GF-1-Pager (auch wenn kein anderes
|
||||
HIGH-Finding da ist) — sie sagen "die Datenlage ist unvollstaendig,
|
||||
manuelle Pruefung empfohlen".
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def _word_count(text: str | None) -> int:
|
||||
if not text:
|
||||
return 0
|
||||
return len(text.split())
|
||||
|
||||
|
||||
def check_banner_not_detected(
|
||||
banner_result: dict | None,
|
||||
cookie_doc_text: str | None,
|
||||
) -> dict | None:
|
||||
"""1) Banner nicht geladen aber Cookie-Doc vorhanden → CMP-Tool kaputt."""
|
||||
if not isinstance(banner_result, dict):
|
||||
return None
|
||||
detected = banner_result.get("banner_detected")
|
||||
if detected is None or detected is True:
|
||||
return None
|
||||
if not cookie_doc_text or len(cookie_doc_text) < 5000:
|
||||
return None
|
||||
return {
|
||||
"severity": "HIGH",
|
||||
"code": "audit_banner_not_detected",
|
||||
"label": "Audit-Vorbehalt: Cookie-Banner konnte vom Crawler nicht "
|
||||
"geladen werden",
|
||||
"area": "Cookie-Banner",
|
||||
"owner": "DSB + Marketing/CMP-Admin",
|
||||
"detail": (
|
||||
"Unser Crawler konnte das CMP-Tool dieser Site nicht analysieren — "
|
||||
"weder Vendor-Liste noch Cookie-Verhalten konnten geprueft werden. "
|
||||
"Moegliche Ursachen: Anti-Bot-Schutz (Akamai/Cloudflare/DataDome) "
|
||||
"blockiert Playwright; das CMP-Skript laed nur fuer bestimmte "
|
||||
"Geo-Regionen; ein neues CMP-Tool das wir noch nicht unterstuetzen. "
|
||||
"Empfehlung: manuelle Pruefung des Banners durch DSB, alternativ "
|
||||
"Cookie-Tabelle im Audit-Tool direkt einfuegen (Copy-Paste-Modus)."
|
||||
),
|
||||
"legal_basis": "Art. 5 (2) DSGVO Rechenschaftspflicht — der Audit-"
|
||||
"Befund muss transparent zwischen 'geprueft & OK' und "
|
||||
"'nicht pruefbar' unterscheiden.",
|
||||
}
|
||||
|
||||
|
||||
def check_vendor_extract_incomplete(
|
||||
cookie_doc_text: str | None,
|
||||
cmp_vendors: list | None,
|
||||
) -> dict | None:
|
||||
"""2) Cookie-Doc gross aber wenig Vendors → Extract unvollstaendig.
|
||||
|
||||
Dynamische Schwelle nach Doc-Groesse:
|
||||
* 3k-6k Wörter → mind. 10 Vendors erwartet
|
||||
* 6k-10k Wörter → mind. 20 Vendors
|
||||
* 10k-15k Wörter → mind. 30 Vendors
|
||||
* 15k+ Wörter → mind. 40 Vendors
|
||||
"""
|
||||
wc = _word_count(cookie_doc_text)
|
||||
n_vendors = len(cmp_vendors or [])
|
||||
if wc < 3000:
|
||||
return None
|
||||
# Erwartete Vendor-Anzahl heuristisch nach Doc-Groesse
|
||||
if wc >= 15000:
|
||||
expected = 40
|
||||
elif wc >= 10000:
|
||||
expected = 30
|
||||
elif wc >= 6000:
|
||||
expected = 20
|
||||
else:
|
||||
expected = 10
|
||||
if n_vendors >= expected:
|
||||
return None
|
||||
return {
|
||||
"severity": "HIGH" if wc >= 8000 else "MEDIUM",
|
||||
"code": "audit_vendor_extract_thin",
|
||||
"label": (
|
||||
f"Audit-Vorbehalt: Cookie-Richtlinie hat {wc:,} Wörter, "
|
||||
f"erwartet ~{expected} Vendors, extrahiert nur {n_vendors}"
|
||||
).replace(",", "."),
|
||||
"area": "Vendor-Liste / VVT",
|
||||
"owner": "DSB + Marketing",
|
||||
"detail": (
|
||||
f"Bei einer Cookie-Richtlinie mit {wc:,} Woertern erwarten wir "
|
||||
f"typischerweise {expected}+ unique Vendors. Die extrahierte Zahl "
|
||||
f"({n_vendors}) ist auffaellig niedrig — entweder hat unser "
|
||||
"Parser/LLM die Tabelle nicht vollstaendig erfasst oder "
|
||||
"Vendors wurden zu konservativ erkannt. Empfehlung: Cookie-"
|
||||
"Tabelle im Copy-Paste-Modus einreichen (Frontend-Toggle "
|
||||
"'Text einfuegen' pro Cookie-Doc-Zeile) — dort parsen wir "
|
||||
"Spalten deterministisch."
|
||||
).replace(",", "."),
|
||||
"legal_basis": "Art. 13(1)(e) DSGVO — die Empfaengerliste muss "
|
||||
"vollstaendig sein; ein unvollstaendiger Audit darf "
|
||||
"nicht als vollstaendig dargestellt werden.",
|
||||
}
|
||||
|
||||
|
||||
def check_url_fetch_failed(doc_entries: list | None) -> list[dict]:
|
||||
"""3) Submitted URL aber 0 oder Mini-Text → Crawler-Failure pro Doc."""
|
||||
out: list[dict] = []
|
||||
for e in (doc_entries or []):
|
||||
if not isinstance(e, dict):
|
||||
continue
|
||||
url = (e.get("url") or "").strip()
|
||||
text = (e.get("text") or "").strip()
|
||||
if not url or len(text) >= 200 or e.get("auto_discovered"):
|
||||
continue
|
||||
dt = e.get("doc_type", "doc")
|
||||
rejected = e.get("rejected_url") or ""
|
||||
out.append({
|
||||
"severity": "MEDIUM",
|
||||
"code": f"audit_url_fetch_failed_{dt}",
|
||||
"label": (
|
||||
f"Audit-Vorbehalt: {dt}-URL konnte nicht geladen werden "
|
||||
f"({len(text)} Zeichen extrahiert)"
|
||||
),
|
||||
"area": dt,
|
||||
"owner": "DSB + Web-Team",
|
||||
"detail": (
|
||||
f"Die eingegebene URL {url[:120]} lieferte weniger als 200 "
|
||||
"Zeichen. Moegliche Ursachen: 404, JS-only Render, Anti-Bot, "
|
||||
"Cookie-Wall. Auto-Discovery hat versucht eine Alternative "
|
||||
"auf der Homepage zu finden — ohne Erfolg. Empfehlung: "
|
||||
"korrekte URL pruefen oder den Text direkt einfuegen "
|
||||
"(Copy-Paste-Modus)."
|
||||
),
|
||||
"legal_basis": "Art. 5 (2) DSGVO Rechenschaftspflicht.",
|
||||
})
|
||||
return out
|
||||
|
||||
|
||||
def run_all(
|
||||
banner_result: dict | None,
|
||||
cookie_doc_text: str | None,
|
||||
cmp_vendors: list | None,
|
||||
doc_entries: list | None,
|
||||
) -> list[dict]:
|
||||
findings: list[dict] = []
|
||||
try:
|
||||
f1 = check_banner_not_detected(banner_result, cookie_doc_text)
|
||||
if f1:
|
||||
findings.append(f1)
|
||||
except Exception as e:
|
||||
logger.warning("audit_banner_not_detected failed: %s", e)
|
||||
try:
|
||||
f2 = check_vendor_extract_incomplete(cookie_doc_text, cmp_vendors)
|
||||
if f2:
|
||||
findings.append(f2)
|
||||
except Exception as e:
|
||||
logger.warning("audit_vendor_extract_thin failed: %s", e)
|
||||
try:
|
||||
findings.extend(check_url_fetch_failed(doc_entries))
|
||||
except Exception as e:
|
||||
logger.warning("audit_url_fetch_failed failed: %s", e)
|
||||
return findings
|
||||
|
||||
|
||||
def build_audit_quality_block_html(findings: list[dict]) -> str:
|
||||
if not findings:
|
||||
return ""
|
||||
items: list[str] = []
|
||||
for f in findings:
|
||||
sev = f.get("severity", "MEDIUM")
|
||||
sev_color = "#dc2626" if sev == "HIGH" else "#d97706"
|
||||
items.append(
|
||||
f'<li style="margin-bottom:10px;font-size:11px;line-height:1.5">'
|
||||
f'<strong style="color:{sev_color}">[{sev}] {f.get("label","")}</strong>'
|
||||
f'<div style="color:#475569;margin-top:3px">{f.get("detail","")}</div>'
|
||||
f'<div style="color:#94a3b8;margin-top:2px;font-style:italic">'
|
||||
f'{f.get("legal_basis","")}</div>'
|
||||
f'</li>'
|
||||
)
|
||||
return (
|
||||
'<div style="font-family:-apple-system,BlinkMacSystemFont,sans-serif;'
|
||||
'max-width:760px;margin:0 auto 16px;padding:14px 18px;'
|
||||
'background:#fee2e2;border:1px solid #fecaca;border-radius:8px">'
|
||||
'<div style="font-size:11px;color:#991b1b;text-transform:uppercase;'
|
||||
'letter-spacing:1.2px;margin-bottom:4px;font-weight:600">'
|
||||
'Audit-Vorbehalt — Datenlage unvollstaendig</div>'
|
||||
f'<h3 style="margin:0 0 6px;font-size:14px;color:#1e293b">'
|
||||
f'{len(findings)} Punkt'
|
||||
f'{"e" if len(findings) != 1 else ""} bei denen der Audit selbst '
|
||||
f'an Grenzen gestossen ist</h3>'
|
||||
'<p style="margin:0 0 10px;font-size:11px;color:#475569;line-height:1.5">'
|
||||
'Die folgenden Punkte betreffen NICHT die Compliance Ihrer Website, '
|
||||
'sondern die Vollstaendigkeit unserer Pruefung. Bei diesen Bereichen '
|
||||
'sollten Sie den Audit nicht als "alles ok" werten, sondern manuell '
|
||||
'oder im Copy-Paste-Modus nachpruefen.'
|
||||
'</p>'
|
||||
'<ul style="margin:0 0 0 18px;padding:0">'
|
||||
+ "".join(items) +
|
||||
'</ul></div>'
|
||||
)
|
||||
@@ -0,0 +1,458 @@
|
||||
"""
|
||||
P92 + P94 — Banner-Konsistenz-Checks (Post-hoc auf banner_result).
|
||||
|
||||
P92 — CMP-Tool-Verfuegbarkeit:
|
||||
Wenn "Anpassen"/"Einstellungen" angeklickt wurde und das Tool laed
|
||||
nicht (Network-Error, Timeout, weisse Seite, fehlende
|
||||
consent-Elemente nach Klick), ist das ein HIGH-Verstoss — der
|
||||
Nutzer hat formal die Moeglichkeit zur granularen Wahl, aber sie
|
||||
funktioniert nicht.
|
||||
|
||||
P94 — Banner-Init-vs-Cookie-Footer-Konsistenz:
|
||||
Cookie-Liste im Initial-Banner-Settings darf nicht von der Liste
|
||||
im permanenten Cookie-Richtlinien-Dokument abweichen. Wenn Banner
|
||||
12 Cookies nennt, die Cookie-Doc aber 47, ist mindestens eine der
|
||||
beiden Quellen unvollstaendig → MEDIUM-Finding.
|
||||
|
||||
Beide liefern dict mit shape:
|
||||
{"severity": "HIGH"|"MEDIUM", "code": str, "label": str, "detail": str}
|
||||
oder None, wenn der Check nicht greift.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
import re
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
_ANPASSEN_KEYS = (
|
||||
"anpassen", "einstellungen", "customize", "preferences",
|
||||
"settings", "individuelle", "auswahl", "manage",
|
||||
)
|
||||
|
||||
|
||||
def _phases(banner_result: dict) -> dict:
|
||||
if not isinstance(banner_result, dict):
|
||||
return {}
|
||||
return banner_result.get("phases") or {}
|
||||
|
||||
|
||||
def check_cmp_tool_availability(banner_result: dict) -> dict | None:
|
||||
"""P92 — Anpassen-Klick aber Settings-Tool defekt / leer."""
|
||||
phases = _phases(banner_result)
|
||||
settings_ph = phases.get("settings") or phases.get("after_settings_click")
|
||||
if not isinstance(settings_ph, dict):
|
||||
return None
|
||||
|
||||
initial_ph = phases.get("initial") or phases.get("before_accept") or {}
|
||||
initial_text = (initial_ph.get("banner_text") or "").lower()
|
||||
if not any(k in initial_text for k in _ANPASSEN_KEYS):
|
||||
return None # Wenn kein Anpassen-Button gar nicht im Initial-Banner,
|
||||
# ist das P100s Job — nicht hier doppelt melden.
|
||||
|
||||
error = settings_ph.get("error") or settings_ph.get("status_error")
|
||||
settings_text = (settings_ph.get("banner_text") or "").strip()
|
||||
has_categories = bool(
|
||||
settings_ph.get("categories")
|
||||
or settings_ph.get("category_tests")
|
||||
or (settings_ph.get("structured_checks") or [])
|
||||
)
|
||||
has_toggles = bool(re.search(r"checkbox|toggle|switch|aria-checked",
|
||||
(settings_ph.get("banner_html") or ""), re.I))
|
||||
timed_out = bool(settings_ph.get("timeout"))
|
||||
|
||||
failure_signals: list[str] = []
|
||||
if error:
|
||||
failure_signals.append(f'Fehler: {str(error)[:120]}')
|
||||
if timed_out:
|
||||
failure_signals.append('Zeitueberschreitung beim Laden')
|
||||
if len(settings_text) < 80 and not has_categories:
|
||||
failure_signals.append(
|
||||
f'Settings-Bereich nur {len(settings_text)} Zeichen, '
|
||||
'keine Kategorien sichtbar'
|
||||
)
|
||||
if not has_toggles and not has_categories:
|
||||
failure_signals.append(
|
||||
'Keine Checkboxen / Toggles im Settings-Bereich'
|
||||
)
|
||||
|
||||
if not failure_signals:
|
||||
return None
|
||||
|
||||
return {
|
||||
"severity": "HIGH",
|
||||
"code": "cmp_tool_unavailable",
|
||||
"label": 'Cookie-Einstellungen ueber "Anpassen" formal vorhanden, '
|
||||
'Tool laed aber nicht oder ist leer',
|
||||
"detail": " | ".join(failure_signals),
|
||||
"legal_basis": "Art. 7 (3) DSGVO + EDPB 03/2022 — die Moeglichkeit "
|
||||
"zur granularen Auswahl muss tatsaechlich funktionieren.",
|
||||
}
|
||||
|
||||
|
||||
def _normalize_cookie_names(items) -> set[str]:
|
||||
out: set[str] = set()
|
||||
if not items:
|
||||
return out
|
||||
for it in items:
|
||||
if isinstance(it, str):
|
||||
name = it.strip()
|
||||
elif isinstance(it, dict):
|
||||
name = (it.get("name") or it.get("cookie") or it.get("id") or "").strip()
|
||||
else:
|
||||
continue
|
||||
if name and len(name) <= 120:
|
||||
out.add(name.lower())
|
||||
return out
|
||||
|
||||
|
||||
def check_init_banner_vs_cookie_doc(
|
||||
banner_result: dict,
|
||||
cookie_doc_text: str | None,
|
||||
) -> dict | None:
|
||||
"""P94 — Cookie-Liste im Init-Banner vs in der Cookie-Richtlinie."""
|
||||
if not cookie_doc_text or len(cookie_doc_text) < 500:
|
||||
return None
|
||||
|
||||
phases = _phases(banner_result)
|
||||
banner_cookies = _normalize_cookie_names(
|
||||
(phases.get("settings") or {}).get("cookies") or []
|
||||
) | _normalize_cookie_names(
|
||||
(phases.get("initial") or phases.get("before_accept") or {}).get("cookies") or []
|
||||
)
|
||||
|
||||
# Aus dem Cookie-Doc-Text: Cookie-Namen sind typischerweise
|
||||
# camelCase oder _underscored, 4-40 Zeichen, ohne Leerzeichen.
|
||||
candidates = set(re.findall(
|
||||
r"\b([A-Za-z_][A-Za-z0-9_\-\.]{3,40})\b", cookie_doc_text
|
||||
))
|
||||
# Filter: heuristisch wahrscheinliche Cookie-Namen
|
||||
doc_cookies: set[str] = set()
|
||||
for c in candidates:
|
||||
cl = c.lower()
|
||||
if any(p in cl for p in (
|
||||
"_ga", "_gid", "_gcl", "_fbp", "uc_", "ot_",
|
||||
"cookieconsent", "sessionid", "csrf", "ajs_", "amp_",
|
||||
"datadome", "incap_", "_pk_", "wp-", "yt-",
|
||||
)):
|
||||
doc_cookies.add(cl)
|
||||
elif re.match(r"^[a-z][a-z0-9_]{3,30}$", cl) and (
|
||||
"cookie" in cl or "consent" in cl or "track" in cl or "session" in cl
|
||||
):
|
||||
doc_cookies.add(cl)
|
||||
|
||||
if len(doc_cookies) < 5 or not banner_cookies:
|
||||
return None # Datenlage zu duenn fuer sinnvolle Aussage.
|
||||
|
||||
only_in_doc = doc_cookies - banner_cookies
|
||||
only_in_banner = banner_cookies - doc_cookies
|
||||
|
||||
if len(only_in_doc) < 5 and len(only_in_banner) < 3:
|
||||
return None # Tolerable Abweichung.
|
||||
|
||||
severity = "MEDIUM"
|
||||
# HIGH wenn beide Seiten massiv abweichen — dann fehlt klar
|
||||
# die Cross-Reference.
|
||||
if len(only_in_doc) >= 15 and len(only_in_banner) >= 5:
|
||||
severity = "HIGH"
|
||||
|
||||
return {
|
||||
"severity": severity,
|
||||
"code": "banner_cookie_doc_mismatch",
|
||||
"label": (
|
||||
f"Cookie-Liste im Banner-Einstellungen ({len(banner_cookies)}) "
|
||||
f"weicht von Cookie-Richtlinie ({len(doc_cookies)}) ab"
|
||||
),
|
||||
"detail": (
|
||||
f"Nur im Cookie-Dokument: {len(only_in_doc)} Cookies (Beispiele: "
|
||||
f"{', '.join(sorted(only_in_doc)[:5])}). "
|
||||
f"Nur im Banner: {len(only_in_banner)} Cookies. "
|
||||
"Empfehlung: eine der beiden Quellen als Single-Source-of-Truth "
|
||||
"definieren und die andere automatisch generieren."
|
||||
),
|
||||
"legal_basis": (
|
||||
"Art. 13(1)(c) DSGVO + Art. 12 DSGVO — Informationen ueber die "
|
||||
"Verarbeitung muessen vollstaendig und konsistent sein."
|
||||
),
|
||||
}
|
||||
|
||||
|
||||
_VENDOR_LIST_SIGNALS = (
|
||||
"google analytics", "google ads", "facebook pixel", "meta pixel",
|
||||
"hotjar", "matomo", "etracker", "salesforce", "hubspot",
|
||||
"linkedin insight", "twitter conversion", "tiktok pixel",
|
||||
"criteo", "the trade desk", "doubleclick",
|
||||
)
|
||||
|
||||
|
||||
def _vendors_mentioned_in_text(text: str) -> set[str]:
|
||||
if not text:
|
||||
return set()
|
||||
t = text.lower()
|
||||
return {v for v in _VENDOR_LIST_SIGNALS if v in t}
|
||||
|
||||
|
||||
def check_three_source_vendor_consistency(
|
||||
doc_texts: dict[str, str] | None,
|
||||
cmp_vendors: list | None,
|
||||
) -> dict | None:
|
||||
"""P33 — 3-Spalten-Konsistenz: DSE vs Cookie-Doc vs Banner-Vendors.
|
||||
|
||||
Wenn ein Vendor (z.B. 'Google Analytics') in der DSE und in der
|
||||
Cookie-Richtlinie genannt wird, aber NICHT in der Banner-Vendor-
|
||||
Liste auftaucht (oder umgekehrt), ist die Drei-Quellen-Aussage
|
||||
nicht konsistent. MEDIUM-Finding mit Liste der jeweils fehlenden
|
||||
Vendors.
|
||||
"""
|
||||
if not doc_texts:
|
||||
return None
|
||||
dse_v = _vendors_mentioned_in_text(doc_texts.get("dse") or "")
|
||||
cookie_v = _vendors_mentioned_in_text(doc_texts.get("cookie") or "")
|
||||
banner_v: set[str] = set()
|
||||
for v in (cmp_vendors or []):
|
||||
name = (v.get("name") or "").lower()
|
||||
for sig in _VENDOR_LIST_SIGNALS:
|
||||
if sig in name or name in sig:
|
||||
banner_v.add(sig)
|
||||
|
||||
sources_with_data = sum(1 for s in (dse_v, cookie_v, banner_v) if s)
|
||||
if sources_with_data < 2:
|
||||
return None
|
||||
|
||||
# Vendors in mind. einer Quelle aber nicht in allen vorhandenen
|
||||
universe = dse_v | cookie_v | banner_v
|
||||
issues: list[str] = []
|
||||
for vendor in sorted(universe):
|
||||
missing_in = []
|
||||
if dse_v and vendor not in dse_v:
|
||||
missing_in.append("DSE")
|
||||
if cookie_v and vendor not in cookie_v:
|
||||
missing_in.append("Cookie-Doc")
|
||||
if banner_v and vendor not in banner_v:
|
||||
missing_in.append("Banner-Liste")
|
||||
if missing_in and len(missing_in) < sources_with_data:
|
||||
issues.append(f'{vendor} (fehlt in: {", ".join(missing_in)})')
|
||||
|
||||
if not issues:
|
||||
return None
|
||||
|
||||
return {
|
||||
"severity": "MEDIUM",
|
||||
"code": "three_source_vendor_inconsistency",
|
||||
"label": (
|
||||
f"{len(issues)} Vendor{'en' if len(issues) != 1 else ''} "
|
||||
"nicht konsistent zwischen DSE, Cookie-Richtlinie und Banner"
|
||||
),
|
||||
"detail": (
|
||||
"Folgende Vendors sind nicht in allen Quellen genannt: "
|
||||
+ "; ".join(issues[:8])
|
||||
+ (" ..." if len(issues) > 8 else "")
|
||||
+ ". Empfehlung: zentrale Vendor-Liste pflegen und in alle "
|
||||
"drei Dokumenttypen propagieren."
|
||||
),
|
||||
"legal_basis": "Art. 13(1)(c)+(e) DSGVO + EDPB 5/2020 — die "
|
||||
"Empfaenger / Drittlandtransfers muessen ueber alle "
|
||||
"Touch-Points konsistent kommuniziert werden.",
|
||||
}
|
||||
|
||||
|
||||
def check_banner_vs_cmp_partner_count(
|
||||
banner_result: dict,
|
||||
cmp_vendors: list | None,
|
||||
) -> dict | None:
|
||||
"""P75 — Banner nennt N Partner, CMP-Payload listet viel mehr.
|
||||
|
||||
Wenn der Banner-Text behauptet "5 Partner" oder "Wir und unsere
|
||||
Partner", die CMP-Payload aber 100+ Vendors enthaelt, wird der
|
||||
User getaeuscht.
|
||||
"""
|
||||
cmp_count = len(cmp_vendors or [])
|
||||
if cmp_count < 20:
|
||||
return None
|
||||
initial_ph = (_phases(banner_result).get("initial")
|
||||
or _phases(banner_result).get("before_accept") or {})
|
||||
banner_text = (initial_ph.get("banner_text") or "")[:5000]
|
||||
if not banner_text:
|
||||
return None
|
||||
m = re.search(r"\b(\d{1,4})\s*(?:partner|drittanbieter|vendor|"
|
||||
r"anbieter|dienstleister)", banner_text, re.I)
|
||||
if not m:
|
||||
return None
|
||||
claimed = int(m.group(1))
|
||||
if claimed >= cmp_count * 0.6:
|
||||
return None # Zahl im Banner ist plausibel.
|
||||
return {
|
||||
"severity": "HIGH",
|
||||
"code": "banner_understates_vendor_count",
|
||||
"label": (
|
||||
f"Banner-Text nennt {claimed} Partner, CMP-Payload listet "
|
||||
f"{cmp_count} Vendors"
|
||||
),
|
||||
"detail": (
|
||||
f"Die im Banner-Text genannte Zahl ({claimed}) unterschaetzt die "
|
||||
f"tatsaechliche Anzahl der Empfaenger ({cmp_count}) deutlich. "
|
||||
"Empfehlung: Banner-Text auf die echte Vendor-Zahl heben oder "
|
||||
"die Vendor-Liste reduzieren."
|
||||
),
|
||||
"legal_basis": (
|
||||
"Art. 13(1)(e) DSGVO + EDPB 5/2020 — die Empfaenger / "
|
||||
"Empfaengerkategorien muessen vollstaendig und nicht "
|
||||
"verharmlosend angegeben sein."
|
||||
),
|
||||
}
|
||||
|
||||
|
||||
def check_banner_copyability(banner_result: dict) -> dict | None:
|
||||
"""P51a — Banner-Text muss kopierbar sein. CSS user-select:none oder
|
||||
-webkit-user-select:none verhindert das (Article 7(2) DSGVO — verstaendlich
|
||||
und in einer Form, die spaetere Pruefung ermoeglicht).
|
||||
"""
|
||||
if not isinstance(banner_result, dict):
|
||||
return None
|
||||
phases = banner_result.get("phases") or {}
|
||||
initial = phases.get("initial") or phases.get("before_accept") or {}
|
||||
html = (initial.get("banner_html") or "")[:50000].lower()
|
||||
if not html:
|
||||
return None
|
||||
blocked_signals = [
|
||||
"user-select:none", "user-select: none",
|
||||
"-webkit-user-select:none", "-webkit-user-select: none",
|
||||
"-moz-user-select:none", "pointer-events:none",
|
||||
"oncopy=\"return false", "onselectstart=\"return false",
|
||||
]
|
||||
hits = [s for s in blocked_signals if s in html]
|
||||
if not hits:
|
||||
return None
|
||||
return {
|
||||
"severity": "MEDIUM",
|
||||
"code": "banner_not_copyable",
|
||||
"label": "Banner-Text laesst sich nicht kopieren "
|
||||
"(user-select:none / oncopy disabled)",
|
||||
"detail": (
|
||||
f'Im Banner-HTML gefunden: {", ".join(hits[:3])}. Der Nutzer '
|
||||
"kann den Banner-Text nicht in eine Mail / Doku einfuegen, was "
|
||||
"die spaetere Pruefung erschwert. Empfehlung: das CSS entfernen "
|
||||
"oder explizit auf 'auto' setzen."
|
||||
),
|
||||
"legal_basis": "Art. 7 (1)+(2) DSGVO + EDPB 5/2020 — Einwilligungen "
|
||||
"muessen in verstaendlicher und zugaenglicher Form "
|
||||
"erteilt werden; eine spaetere Pruefung darf nicht "
|
||||
"technisch erschwert werden.",
|
||||
}
|
||||
|
||||
|
||||
def check_consent_history(banner_result: dict) -> dict | None:
|
||||
"""P51b — Es muss eine Moeglichkeit geben, die eigene Einwilligungs-
|
||||
Historie einzusehen (Art. 7 (3) — Widerruf muss so einfach wie die
|
||||
Erteilung sein; das setzt voraus dass man WEISS was man einwilligt hat).
|
||||
"""
|
||||
if not isinstance(banner_result, dict):
|
||||
return None
|
||||
phases = banner_result.get("phases") or {}
|
||||
blob_parts: list[str] = []
|
||||
for ph in phases.values():
|
||||
if isinstance(ph, dict):
|
||||
blob_parts.append((ph.get("banner_text") or "")[:5000])
|
||||
blob_parts.append((ph.get("banner_html") or "")[:20000])
|
||||
blob = " ".join(blob_parts).lower()
|
||||
if not blob:
|
||||
return None
|
||||
history_signals = [
|
||||
"meine einwilligung", "consent-historie", "consent history",
|
||||
"einwilligungshistorie", "einwilligungs-historie",
|
||||
"ihre einwilligungen", "datenschutz-cockpit",
|
||||
"privacy dashboard", "einwilligungs-protokoll",
|
||||
"consent record", "consent log",
|
||||
]
|
||||
if any(s in blob for s in history_signals):
|
||||
return None
|
||||
return {
|
||||
"severity": "MEDIUM",
|
||||
"code": "consent_history_missing",
|
||||
"label": "Keine sichtbare Consent-Historie / 'Meine Einwilligungen'-Ansicht",
|
||||
"detail": (
|
||||
"Im Banner und in den verlinkten Footer-Bereichen ist keine "
|
||||
"Moeglichkeit erkennbar, die eigene Einwilligungs-Historie "
|
||||
"einzusehen oder zu exportieren. Empfehlung: einen "
|
||||
"'Meine Einwilligungen'-Bereich verlinken (Borlabs / Cookiebot / "
|
||||
"Usercentrics bieten dafuer fertige Komponenten)."
|
||||
),
|
||||
"legal_basis": "Art. 7 (3) DSGVO + EDPB 5/2020 — der Widerruf muss "
|
||||
"ebenso einfach sein wie die Erteilung, was eine "
|
||||
"Sichtbarmachung der eigenen Einwilligungen voraussetzt.",
|
||||
}
|
||||
|
||||
|
||||
def run_all(banner_result: dict, cookie_doc_text: str | None = None,
|
||||
cmp_vendors: list | None = None,
|
||||
doc_texts: dict[str, str] | None = None) -> list[dict]:
|
||||
findings: list[dict] = []
|
||||
try:
|
||||
f1 = check_cmp_tool_availability(banner_result)
|
||||
if f1:
|
||||
findings.append(f1)
|
||||
except Exception as e:
|
||||
logger.warning("P92 cmp_tool_availability failed: %s", e)
|
||||
try:
|
||||
f2 = check_init_banner_vs_cookie_doc(banner_result, cookie_doc_text)
|
||||
if f2:
|
||||
findings.append(f2)
|
||||
except Exception as e:
|
||||
logger.warning("P94 init_vs_cookie_doc failed: %s", e)
|
||||
try:
|
||||
f3 = check_banner_vs_cmp_partner_count(banner_result, cmp_vendors)
|
||||
if f3:
|
||||
findings.append(f3)
|
||||
except Exception as e:
|
||||
logger.warning("P75 banner_vs_cmp_count failed: %s", e)
|
||||
try:
|
||||
f4 = check_three_source_vendor_consistency(doc_texts, cmp_vendors)
|
||||
if f4:
|
||||
findings.append(f4)
|
||||
except Exception as e:
|
||||
logger.warning("P33 three_source_vendor failed: %s", e)
|
||||
try:
|
||||
f5 = check_banner_copyability(banner_result)
|
||||
if f5:
|
||||
findings.append(f5)
|
||||
except Exception as e:
|
||||
logger.warning("P51a copyability failed: %s", e)
|
||||
try:
|
||||
f6 = check_consent_history(banner_result)
|
||||
if f6:
|
||||
findings.append(f6)
|
||||
except Exception as e:
|
||||
logger.warning("P51b consent_history failed: %s", e)
|
||||
return findings
|
||||
|
||||
|
||||
def build_consistency_block_html(findings: list[dict]) -> str:
|
||||
if not findings:
|
||||
return ""
|
||||
items: list[str] = []
|
||||
for f in findings:
|
||||
sev = f.get("severity", "MEDIUM")
|
||||
sev_color = "#dc2626" if sev == "HIGH" else "#d97706"
|
||||
items.append(
|
||||
f'<li style="margin-bottom:10px;font-size:11px;line-height:1.5">'
|
||||
f'<strong style="color:{sev_color}">[{sev}] {f.get("label","")}</strong>'
|
||||
f'<div style="color:#475569;margin-top:3px">{f.get("detail","")}</div>'
|
||||
f'<div style="color:#94a3b8;margin-top:2px;font-style:italic">'
|
||||
f'{f.get("legal_basis","")}</div>'
|
||||
f'</li>'
|
||||
)
|
||||
return (
|
||||
'<div style="font-family:-apple-system,BlinkMacSystemFont,sans-serif;'
|
||||
'max-width:760px;margin:0 auto 16px;padding:14px 18px;'
|
||||
'background:#fef3c7;border:1px solid #fcd34d;border-radius:8px">'
|
||||
'<div style="font-size:11px;color:#92400e;text-transform:uppercase;'
|
||||
'letter-spacing:1.2px;margin-bottom:4px;font-weight:600">'
|
||||
'Banner-Konsistenz-Pruefung</div>'
|
||||
f'<h3 style="margin:0 0 6px;font-size:14px;color:#1e293b">'
|
||||
f'{len(findings)} Konsistenz-Finding{"s" if len(findings) != 1 else ""} '
|
||||
'zwischen Banner-UI und Cookie-Richtlinie</h3>'
|
||||
'<ul style="margin:8px 0 0 18px;padding:0">'
|
||||
+ "".join(items) +
|
||||
'</ul></div>'
|
||||
)
|
||||
@@ -0,0 +1,44 @@
|
||||
"""
|
||||
P85 — Banner-Screenshot-Block in der Mail.
|
||||
|
||||
Embedded den von consent-tester captured Screenshot des Banners
|
||||
(banner_result.banner_screenshot_b64) als data-URI <img> in die Mail.
|
||||
"so sah euer Banner zum Audit-Zeitpunkt aus" — visueller Beweis fuer
|
||||
Dispute mit Marketing-Team oder DSB.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def build_banner_screenshot_html(banner_result: dict | None) -> str:
|
||||
if not isinstance(banner_result, dict):
|
||||
return ""
|
||||
b64 = banner_result.get("banner_screenshot_b64") or ""
|
||||
if not b64 or len(b64) < 200:
|
||||
return ""
|
||||
provider = banner_result.get("banner_provider") or "Generic"
|
||||
detected = banner_result.get("banner_detected")
|
||||
return (
|
||||
'<div style="font-family:-apple-system,BlinkMacSystemFont,sans-serif;'
|
||||
'max-width:760px;margin:0 auto 16px;padding:12px 16px;'
|
||||
'background:#f8fafc;border:1px solid #cbd5e1;border-radius:8px">'
|
||||
'<div style="font-size:11px;color:#475569;text-transform:uppercase;'
|
||||
'letter-spacing:1.2px;margin-bottom:4px;font-weight:600">'
|
||||
'Screenshot des Cookie-Banners zum Audit-Zeitpunkt</div>'
|
||||
f'<h3 style="margin:0 0 6px;font-size:13px;color:#1e293b">'
|
||||
f'Provider: <strong>{provider}</strong> · '
|
||||
f'erkannt: <strong>{"ja" if detected else "nein"}</strong></h3>'
|
||||
'<p style="margin:0 0 8px;font-size:11px;color:#64748b;line-height:1.5">'
|
||||
'Visueller Beweis wie das Banner zum Zeitpunkt des Audits angezeigt '
|
||||
'wurde. Bei spaeterer Aenderung des Banners bitte mit diesem '
|
||||
'Screenshot abgleichen.'
|
||||
'</p>'
|
||||
f'<img src="data:image/png;base64,{b64}" alt="Cookie-Banner" '
|
||||
f'style="max-width:100%;height:auto;border:1px solid #cbd5e1;'
|
||||
f'border-radius:4px;display:block">'
|
||||
'</div>'
|
||||
)
|
||||
@@ -56,17 +56,16 @@ def replay_from_snapshot(
|
||||
cmp_vendors = snap.get("cmp_vendors") or []
|
||||
site_label = snap.get("site_label") or snap.get("site_domain")
|
||||
|
||||
# Reconstruct doc_texts mapping (was the input to mail-render)
|
||||
# Reconstruct doc_texts mapping (was the input to mail-render).
|
||||
# Snapshot-Schema speichert text unter "text" (nicht full_text).
|
||||
doc_texts: dict[str, str] = {}
|
||||
for e in doc_entries:
|
||||
dt = e.get("doc_type", "")
|
||||
txt = (e.get("full_text") or e.get("text_preview") or "").strip()
|
||||
txt = (e.get("text") or e.get("full_text") or e.get("text_preview") or "").strip()
|
||||
if dt and txt:
|
||||
doc_texts[dt] = txt
|
||||
|
||||
# Build results list mock (just enough for mail-render)
|
||||
from compliance.services.doc_checks.runner import DocCheckResult
|
||||
|
||||
def _dict_to_result(d: dict) -> Any:
|
||||
"""Best-effort reconstruction. Snapshot didn't persist DocCheckResult
|
||||
so we fake minimal fields. For real MC-replay (v2) we'd re-run the
|
||||
@@ -86,6 +85,97 @@ def replay_from_snapshot(
|
||||
section_sizes: dict[str, int] = {}
|
||||
parts: list[str] = []
|
||||
|
||||
# P80 v2 — Quality-Checks aus dem aktuellen Code auf Snapshot-Daten
|
||||
# anwenden. Vollstaendiger Replay aller post-fetch Findings-Generatoren.
|
||||
cookie_t = doc_texts.get("cookie") or doc_texts.get("dse") or ""
|
||||
|
||||
# Vendor-Normalize (Dedup + Garbage-Filter)
|
||||
try:
|
||||
from compliance.services.vendor_normalizer import normalize_vendors
|
||||
cmp_vendors = normalize_vendors(list(cmp_vendors))
|
||||
except Exception as e:
|
||||
logger.warning("Replay v2: normalizer failed: %s", e)
|
||||
|
||||
# Audit-Quality
|
||||
try:
|
||||
from compliance.services.audit_quality_checks import (
|
||||
run_all as run_aq, build_audit_quality_block_html,
|
||||
)
|
||||
aq = run_aq(banner_result, cookie_t, cmp_vendors, doc_entries)
|
||||
if aq:
|
||||
aq_html = build_audit_quality_block_html(aq)
|
||||
parts.append(aq_html)
|
||||
section_sizes["audit_quality_v2"] = len(aq_html)
|
||||
except Exception as e:
|
||||
logger.warning("Replay v2: audit_quality failed: %s", e)
|
||||
|
||||
# Cookie-Compliance-Audit
|
||||
try:
|
||||
from compliance.services.cookie_compliance_audit import (
|
||||
audit_cookie_compliance, build_cookie_audit_block_html,
|
||||
)
|
||||
ca = audit_cookie_compliance(db, cookie_t, banner_result)
|
||||
if ca and (ca.get("declared_count") or ca.get("browser_count")):
|
||||
ca_html = build_cookie_audit_block_html(ca)
|
||||
parts.append(ca_html)
|
||||
section_sizes["cookie_audit_v2"] = len(ca_html)
|
||||
except Exception as e:
|
||||
logger.warning("Replay v2: cookie_audit failed: %s", e)
|
||||
|
||||
# TCF Authority
|
||||
try:
|
||||
from compliance.services.tcf_vendor_authority import (
|
||||
cross_reference_with_tcf, build_tcf_authority_block_html,
|
||||
)
|
||||
tcf = cross_reference_with_tcf(db, cmp_vendors)
|
||||
if tcf:
|
||||
tcf_html = build_tcf_authority_block_html(tcf)
|
||||
parts.append(tcf_html)
|
||||
section_sizes["tcf_v2"] = len(tcf_html)
|
||||
except Exception as e:
|
||||
logger.warning("Replay v2: tcf failed: %s", e)
|
||||
|
||||
# Entropy + Network-Trace
|
||||
try:
|
||||
from compliance.services.cookie_value_entropy import (
|
||||
check_cookies_for_entropy_mismatch, build_entropy_block_html,
|
||||
)
|
||||
from compliance.services.cookie_network_tracer import (
|
||||
trace_cookie_network, build_network_trace_block_html,
|
||||
)
|
||||
cd = (banner_result or {}).get("cookies_detailed") or []
|
||||
e1 = check_cookies_for_entropy_mismatch(cd)
|
||||
if e1:
|
||||
ent_html = build_entropy_block_html(e1)
|
||||
parts.append(ent_html)
|
||||
section_sizes["entropy_v2"] = len(ent_html)
|
||||
site_url = ""
|
||||
for entry in (doc_entries or []):
|
||||
if entry.get("url"):
|
||||
site_url = entry["url"]; break
|
||||
net = trace_cookie_network(cd, site_url)
|
||||
if net:
|
||||
net_html = build_network_trace_block_html(net)
|
||||
parts.append(net_html)
|
||||
section_sizes["network_trace_v2"] = len(net_html)
|
||||
except Exception as e:
|
||||
logger.warning("Replay v2: entropy/network failed: %s", e)
|
||||
|
||||
# P82: GF-1-Pager zuerst (5-Bullet-Summary)
|
||||
try:
|
||||
from compliance.services.gf_one_pager import build_gf_one_pager_html
|
||||
gf_html = build_gf_one_pager_html(
|
||||
site_name=site_label or "",
|
||||
scorecard=None, # Snapshot enthaelt keine MC-Scorecard
|
||||
banner_result=banner_result,
|
||||
library_mismatch_findings=None, # wird unten gefuellt
|
||||
scan_context=snap.get("scan_context"),
|
||||
)
|
||||
parts.append(gf_html)
|
||||
section_sizes["gf_one_pager"] = len(gf_html)
|
||||
except Exception as e:
|
||||
logger.warning("Replay: GF-1-pager failed: %s", e)
|
||||
|
||||
try:
|
||||
from compliance.api.agent_doc_check_critical import build_critical_findings_html
|
||||
critical_html = build_critical_findings_html(banner_result, None, results) or ""
|
||||
@@ -118,6 +208,65 @@ def replay_from_snapshot(
|
||||
except Exception as e:
|
||||
logger.warning("Replay: vvt failed: %s", e)
|
||||
|
||||
# P35 + P77 + P78 + P36: Textsignale (Save-Label, Cookies-in-DSE,
|
||||
# JC-Klausel, Social-Embeds)
|
||||
try:
|
||||
from compliance.services.doc_text_signals import (
|
||||
run_all as run_signal_checks,
|
||||
build_signals_block_html,
|
||||
)
|
||||
cookie_doc_missing = not bool(doc_texts.get("cookie"))
|
||||
sig_findings = run_signal_checks(
|
||||
banner_result, doc_texts, cookie_doc_missing,
|
||||
)
|
||||
if sig_findings:
|
||||
sig_html = build_signals_block_html(sig_findings)
|
||||
parts.append(sig_html)
|
||||
section_sizes["signals"] = len(sig_html)
|
||||
except Exception as e:
|
||||
logger.warning("Replay: signals block failed: %s", e)
|
||||
|
||||
# P92 + P94: Banner-Konsistenz
|
||||
try:
|
||||
from compliance.services.banner_consistency_checks import (
|
||||
run_all as run_consistency_checks,
|
||||
build_consistency_block_html,
|
||||
)
|
||||
cookie_doc_for_check = doc_texts.get("cookie") or doc_texts.get("dse") or ""
|
||||
cons = run_consistency_checks(
|
||||
banner_result or {}, cookie_doc_for_check, cmp_vendors,
|
||||
doc_texts=doc_texts,
|
||||
)
|
||||
if cons:
|
||||
cons_html = build_consistency_block_html(cons)
|
||||
parts.append(cons_html)
|
||||
section_sizes["consistency"] = len(cons_html)
|
||||
except Exception as e:
|
||||
logger.warning("Replay: consistency block failed: %s", e)
|
||||
|
||||
# P102: Cookie-Klassifikations-Pruefung
|
||||
try:
|
||||
from compliance.services.cookie_library_mismatch import (
|
||||
detect_mismatches, build_mismatch_block_html,
|
||||
)
|
||||
cookies_seen: list[str] = []
|
||||
for ph in (banner_result.get("phases") or {}).values():
|
||||
if isinstance(ph, dict):
|
||||
for ck in (ph.get("cookies") or []):
|
||||
if isinstance(ck, str):
|
||||
cookies_seen.append(ck)
|
||||
elif isinstance(ck, dict) and ck.get("name"):
|
||||
cookies_seen.append(ck["name"])
|
||||
doc_for_check = doc_texts.get("cookie") or doc_texts.get("dse") or ""
|
||||
if cookies_seen and doc_for_check:
|
||||
mm = detect_mismatches(db, cookies_seen, doc_for_check)
|
||||
if mm:
|
||||
mm_html = build_mismatch_block_html(mm)
|
||||
parts.append(mm_html)
|
||||
section_sizes["library_mismatch"] = len(mm_html)
|
||||
except Exception as e:
|
||||
logger.warning("Replay: mismatch block failed: %s", e)
|
||||
|
||||
full_html = "".join(parts)
|
||||
|
||||
result = {
|
||||
@@ -128,11 +277,12 @@ def replay_from_snapshot(
|
||||
"sections": section_sizes,
|
||||
"mail_sent": False,
|
||||
"preview": full_html[:500] + "..." if len(full_html) > 500 else full_html,
|
||||
"full_html": full_html, # P88 PDF-Export braucht das volle HTML.
|
||||
}
|
||||
|
||||
if recipient and not dry_run:
|
||||
try:
|
||||
from compliance.services.email_sender import send_email
|
||||
from compliance.services.smtp_sender import send_email
|
||||
email_res = send_email(
|
||||
recipient=recipient,
|
||||
subject=f"[REPLAY] {site_label} (Snapshot {snapshot_id[:8]})",
|
||||
|
||||
@@ -82,6 +82,8 @@ class CompliancePDFGenerator:
|
||||
self._add_consent_section(story, ss, tenant_id)
|
||||
# Org Roles
|
||||
self._add_role_section(story, ss, tenant_id, project_id)
|
||||
# Stufe 2 — Quellen- und Lizenz-Footer (Attribution-Renderer Task #23)
|
||||
self._add_attribution_footer(story, ss)
|
||||
# Footer
|
||||
story.append(Spacer(1, 15 * mm))
|
||||
story.append(Paragraph("Erstellt mit BreakPilot Compliance SDK", ss["Small"]))
|
||||
@@ -214,3 +216,64 @@ class CompliancePDFGenerator:
|
||||
story.append(Paragraph("Keine Rollen zugewiesen.", ss["Body2"]))
|
||||
except Exception:
|
||||
story.append(Paragraph("Rollen-Tabelle nicht vorhanden.", ss["Small"]))
|
||||
|
||||
def _add_attribution_footer(self, story, ss) -> None:
|
||||
"""Stufe 2 of the attribution renderer (Task #23).
|
||||
|
||||
Adds a "Quellen und Lizenzen" section listing the platform's
|
||||
license-rule distribution and, crucially, the mandatory
|
||||
attribution lines for Rule-2 sources (CC-BY-SA, OECD, Apache).
|
||||
For Rule 1 sources the attribution is optional but rendered as
|
||||
a brief reference list for auditability.
|
||||
|
||||
The section is added to every generated compliance PDF so each
|
||||
export carries its own provenance footer — pauschale Hinweise
|
||||
in AGB/Impressum reichen rechtlich nicht (siehe
|
||||
project_attribution_strategy.md).
|
||||
"""
|
||||
try:
|
||||
rows = self.db.execute(text("""
|
||||
SELECT cc.license_rule, COUNT(*) AS n,
|
||||
array_agg(DISTINCT cpl.source_regulation ORDER BY cpl.source_regulation)
|
||||
FILTER (WHERE cpl.source_regulation IS NOT NULL) AS sources
|
||||
FROM compliance.canonical_controls cc
|
||||
LEFT JOIN compliance.control_parent_links cpl ON cpl.control_uuid = cc.id
|
||||
WHERE cc.license_rule IS NOT NULL
|
||||
GROUP BY cc.license_rule
|
||||
ORDER BY cc.license_rule
|
||||
""")).fetchall()
|
||||
except Exception as e:
|
||||
logger.warning("attribution footer skipped: %s", e)
|
||||
return
|
||||
if not rows:
|
||||
return
|
||||
|
||||
rule_labels = {1: "Hoheitsrecht/Public Domain (woertlich)",
|
||||
2: "Mit Attribution (CC-BY u.ae.)",
|
||||
3: "Nur Identifier-Verweis"}
|
||||
|
||||
story.append(Spacer(1, 8 * mm))
|
||||
story.append(Paragraph("Quellen & Lizenzen", ss["Section"]))
|
||||
story.append(Paragraph(
|
||||
"Dieser Bericht stuetzt sich auf klassifizierte Compliance-Controls "
|
||||
"aus den folgenden Quellen. Jede Quelle ist deterministisch in eine "
|
||||
"der drei Lizenzregeln (R1-R3) eingeordnet.", ss["Body2"]))
|
||||
|
||||
for r in rows:
|
||||
rule = int(r.license_rule)
|
||||
sources = (r.sources or [])[:8]
|
||||
label = rule_labels.get(rule, f"Regel {rule}")
|
||||
head = f"<b>R{rule} — {label}</b> ({r.n} Controls)"
|
||||
story.append(Paragraph(head, ss["Body2"]))
|
||||
if sources:
|
||||
src_text = "; ".join(sources)
|
||||
if len(r.sources or []) > 8:
|
||||
src_text += f" und {len(r.sources) - 8} weitere"
|
||||
story.append(Paragraph(src_text, ss["Small"]))
|
||||
if rule == 2:
|
||||
story.append(Paragraph(
|
||||
"Pflicht-Attribution: Inhalte aus den oben genannten Quellen sind "
|
||||
"unter den jeweiligen freien Lizenzen (z.B. CC-BY-SA, OECD-Public, "
|
||||
"Apache-2.0) wiedergegeben. Original-Urheber bleibt in jeder "
|
||||
"Weiterverwendung zu nennen.", ss["Small"]))
|
||||
story.append(Spacer(1, 2 * mm))
|
||||
|
||||
@@ -0,0 +1,125 @@
|
||||
"""
|
||||
P54 — Diff-Banner fuer End-User (USP-Feature).
|
||||
|
||||
USP-Idee: bei wiederkehrenden Besuchern zeigt das Banner NICHT die
|
||||
Standard-Frage, sondern eine Diff-Mitteilung:
|
||||
"Seit deiner letzten Zustimmung haben wir hinzugefuegt:
|
||||
* Microsoft Bing (Werbung)
|
||||
* TikTok Pixel (Marketing)
|
||||
Bitte erneut zustimmen oder anpassen."
|
||||
|
||||
Backend-Seite (hier): liefert pro Snapshot eine 'diff_for_user'-Struktur
|
||||
die zum Embedden in eigenen Banner / Hinweistext genutzt werden kann.
|
||||
Frontend-Banner-Lib (separate consent-sdk) konsumiert das.
|
||||
|
||||
Vergleicht Vendor-Listen zwischen aktuellem Snapshot und dem letzten
|
||||
Snapshot mit gleicher site_domain.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
from typing import Iterable
|
||||
|
||||
from sqlalchemy import text as sa_text
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def _norm_vendor_set(vendors: Iterable) -> set[str]:
|
||||
out: set[str] = set()
|
||||
for v in (vendors or []):
|
||||
if isinstance(v, dict):
|
||||
n = (v.get("name") or "").strip()
|
||||
elif isinstance(v, str):
|
||||
n = v.strip()
|
||||
else:
|
||||
continue
|
||||
if n:
|
||||
out.add(n)
|
||||
return out
|
||||
|
||||
|
||||
def compute_user_facing_diff(
|
||||
db: Session,
|
||||
site_domain: str,
|
||||
current_check_id: str,
|
||||
current_cmp_vendors: list,
|
||||
) -> dict | None:
|
||||
"""Vergleicht aktuelle vs letzte cmp_vendors-Liste fuer die gleiche
|
||||
site_domain. Liefert {prev_at, added_vendors, removed_vendors,
|
||||
new_high_risk_categories} oder None wenn kein vorheriger Lauf."""
|
||||
if not site_domain:
|
||||
return None
|
||||
try:
|
||||
row = db.execute(sa_text(
|
||||
"""
|
||||
SELECT cmp_vendors, created_at
|
||||
FROM compliance.compliance_check_snapshots
|
||||
WHERE site_domain = :dom AND check_id != :ex
|
||||
ORDER BY created_at DESC LIMIT 1
|
||||
"""
|
||||
), {"dom": site_domain, "ex": current_check_id}).fetchone()
|
||||
except Exception as e:
|
||||
logger.warning("diff lookup failed: %s", e)
|
||||
return None
|
||||
if not row:
|
||||
return None
|
||||
|
||||
prev_vendors = row[0] or []
|
||||
prev_at = row[1]
|
||||
curr_set = _norm_vendor_set(current_cmp_vendors)
|
||||
prev_set = _norm_vendor_set(prev_vendors)
|
||||
|
||||
added = sorted(curr_set - prev_set)
|
||||
removed = sorted(prev_set - curr_set)
|
||||
if not added and not removed:
|
||||
return None
|
||||
|
||||
# High-risk Kategorien aus added Vendors: Marketing / Tracking
|
||||
new_marketing: list[str] = []
|
||||
for v in current_cmp_vendors:
|
||||
if not isinstance(v, dict):
|
||||
continue
|
||||
n = (v.get("name") or "").strip()
|
||||
cat = (v.get("category") or "").lower()
|
||||
if n in added and cat in ("marketing", "tracking", "advertising"):
|
||||
new_marketing.append(n)
|
||||
|
||||
return {
|
||||
"prev_at": prev_at.isoformat() if prev_at else None,
|
||||
"added_vendors": added,
|
||||
"removed_vendors": removed,
|
||||
"new_marketing_vendors": new_marketing,
|
||||
"requires_reconsent": bool(new_marketing),
|
||||
}
|
||||
|
||||
|
||||
def build_diff_banner_snippet(diff: dict) -> str:
|
||||
"""Liefert HTML-Snippet das der Site-Betreiber in seinen eigenen
|
||||
Cookie-Banner einbauen kann (z.B. via consent-sdk)."""
|
||||
if not diff or not diff.get("added_vendors"):
|
||||
return ""
|
||||
added = diff.get("added_vendors", [])
|
||||
n_marketing = len(diff.get("new_marketing_vendors") or [])
|
||||
items = "".join(f"<li>{v}</li>" for v in added[:8])
|
||||
reconsent_note = ""
|
||||
if diff.get("requires_reconsent"):
|
||||
reconsent_note = (
|
||||
f'<p style="margin:6px 0 0;color:#991b1b;font-size:12px">'
|
||||
f'<strong>{n_marketing} neue{"r" if n_marketing == 1 else ""} '
|
||||
f'Marketing-Anbieter</strong> seit Ihrer letzten Zustimmung — '
|
||||
'bitte erneut bestaetigen.'
|
||||
'</p>'
|
||||
)
|
||||
return (
|
||||
'<div class="breakpilot-consent-diff" '
|
||||
'style="font-family:-apple-system,sans-serif;font-size:12px;'
|
||||
'padding:8px 12px;background:#fef3c7;border:1px solid #fde68a;'
|
||||
'border-radius:6px;margin-bottom:8px">'
|
||||
'<strong>Seit Ihrer letzten Zustimmung haben wir hinzugefuegt:</strong>'
|
||||
f'<ul style="margin:4px 0 0 18px;padding:0">{items}</ul>'
|
||||
+ reconsent_note +
|
||||
'</div>'
|
||||
)
|
||||
@@ -0,0 +1,221 @@
|
||||
"""
|
||||
Cookie-Compliance-Audit — 3-Quellen-Vergleich.
|
||||
|
||||
DAS ist der eigentliche Mehrwert des Tools:
|
||||
* A. Was in der Cookie-Richtlinie DEKLARIERT ist (Text-Parse)
|
||||
* B. Was im Browser TATSAECHLICH GELADEN wurde (after_accept)
|
||||
* C. Was unsere LIBRARY ueber den Cookie weiss (Vendor, Kategorie)
|
||||
|
||||
Daraus 3 Listen:
|
||||
1. ✓ deklariert + geladen + library-bekannt → compliant
|
||||
2. ❌ geladen aber NICHT deklariert → HIGH-Verstoss (Art. 13(1)(c) DSGVO)
|
||||
3. ⚠️ deklariert aber NICHT geladen → Tabelle veraltet (LOW)
|
||||
4. 🔍 deklariert + Library-Kategorie weicht ab → Pruefanlass
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
import re
|
||||
from typing import Iterable
|
||||
|
||||
from sqlalchemy import text as sa_text
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def _normalize_cookie_name(name: str) -> str:
|
||||
"""Wildcard-Cookies wie 'AMCV_*', 'pm_sess_NNN' werden auf Prefix
|
||||
reduziert damit '_ga' und '_ga_GTM-XXX' als ein Cookie zaehlen."""
|
||||
if not name:
|
||||
return ""
|
||||
s = name.strip()
|
||||
# AMCV_*, sc_v44, etc.
|
||||
s = re.sub(r"[<\[].*?[>\]]", "", s) # entferne <ID>, [...]
|
||||
s = s.rstrip("*").rstrip("_")
|
||||
s = re.sub(r"_NNN$|_\d+$", "", s)
|
||||
return s.lower()
|
||||
|
||||
|
||||
def _extract_declared_cookies(cookie_doc_text: str | None) -> set[str]:
|
||||
"""Liest Cookie-Namen aus dem Cookie-Richtlinien-Text.
|
||||
Nutzt zuerst parse_cookie_table (Block/Tab-Format), dann
|
||||
parse_flat_cookie_text (Anchor-Pattern).
|
||||
"""
|
||||
if not cookie_doc_text:
|
||||
return set()
|
||||
declared: set[str] = set()
|
||||
try:
|
||||
from compliance.services.cookies_table_parser import (
|
||||
parse_cookie_table, parse_flat_cookie_text,
|
||||
)
|
||||
for v in parse_cookie_table(cookie_doc_text):
|
||||
for c in (v.get("cookies") or []):
|
||||
if isinstance(c, dict) and c.get("name"):
|
||||
declared.add(_normalize_cookie_name(c["name"]))
|
||||
for v in parse_flat_cookie_text(cookie_doc_text):
|
||||
for c in (v.get("cookies") or []):
|
||||
if isinstance(c, dict) and c.get("name"):
|
||||
declared.add(_normalize_cookie_name(c["name"]))
|
||||
except Exception as e:
|
||||
logger.warning("declared-cookie-extract failed: %s", e)
|
||||
return {n for n in declared if n}
|
||||
|
||||
|
||||
def _extract_browser_cookies(banner_result: dict | None) -> set[str]:
|
||||
"""Liest Cookie-Namen aus banner_result.phases.after_accept.cookies."""
|
||||
out: set[str] = set()
|
||||
if not isinstance(banner_result, dict):
|
||||
return out
|
||||
phases = banner_result.get("phases") or {}
|
||||
for ph_name in ("after_accept", "before_consent", "after_reject"):
|
||||
ph = phases.get(ph_name) or {}
|
||||
if not isinstance(ph, dict):
|
||||
continue
|
||||
for c in (ph.get("cookies") or []):
|
||||
if isinstance(c, str):
|
||||
out.add(_normalize_cookie_name(c))
|
||||
elif isinstance(c, dict) and c.get("name"):
|
||||
out.add(_normalize_cookie_name(c["name"]))
|
||||
return {n for n in out if n}
|
||||
|
||||
|
||||
def _lookup_library(db: Session, names: Iterable[str]) -> dict[str, dict]:
|
||||
"""Liefert {normalized_name: {category, vendor}} aus cookie_library."""
|
||||
nl = [n for n in names if n]
|
||||
if not nl:
|
||||
return {}
|
||||
try:
|
||||
rows = db.execute(sa_text(
|
||||
"SELECT cookie_name, actual_category, vendor_name "
|
||||
"FROM compliance.cookie_library "
|
||||
"WHERE LOWER(cookie_name) = ANY(:lc)"
|
||||
), {"lc": nl}).fetchall()
|
||||
return {r[0].lower(): {"category": r[1], "vendor": r[2]} for r in rows}
|
||||
except Exception as e:
|
||||
logger.warning("library lookup failed: %s", e)
|
||||
return {}
|
||||
|
||||
|
||||
def audit_cookie_compliance(
|
||||
db: Session | None,
|
||||
cookie_doc_text: str | None,
|
||||
banner_result: dict | None,
|
||||
) -> dict:
|
||||
"""Hauptfunktion: liefert dict mit 4 Listen + counts."""
|
||||
declared = _extract_declared_cookies(cookie_doc_text)
|
||||
browser = _extract_browser_cookies(banner_result)
|
||||
|
||||
all_names = declared | browser
|
||||
library = _lookup_library(db, all_names) if db else {}
|
||||
|
||||
declared_only = declared - browser
|
||||
browser_only = browser - declared
|
||||
both = declared & browser
|
||||
|
||||
return {
|
||||
"declared_count": len(declared),
|
||||
"browser_count": len(browser),
|
||||
"library_count": len(library),
|
||||
"compliant": sorted(both),
|
||||
"undeclared_in_browser": sorted(browser_only),
|
||||
"declared_not_loaded": sorted(declared_only),
|
||||
"library_metadata": library,
|
||||
"high_findings": len(browser_only),
|
||||
"low_findings": len(declared_only),
|
||||
}
|
||||
|
||||
|
||||
def build_cookie_audit_block_html(audit: dict) -> str:
|
||||
"""Rendert den 3-Spalten-Vergleichs-Block in die Mail."""
|
||||
if not audit:
|
||||
return ""
|
||||
n_dec = audit.get("declared_count", 0)
|
||||
n_brw = audit.get("browser_count", 0)
|
||||
n_undecl = len(audit.get("undeclared_in_browser") or [])
|
||||
n_dec_only = len(audit.get("declared_not_loaded") or [])
|
||||
n_both = len(audit.get("compliant") or [])
|
||||
|
||||
sev_color = "#dc2626" if n_undecl else "#16a34a"
|
||||
|
||||
undecl_html = ""
|
||||
if audit.get("undeclared_in_browser"):
|
||||
undecl_html = (
|
||||
'<div style="margin-top:10px;padding:10px 12px;background:#fee2e2;'
|
||||
'border:1px solid #fecaca;border-radius:6px">'
|
||||
f'<strong style="color:#991b1b">❌ {n_undecl} Cookie'
|
||||
f'{"s" if n_undecl != 1 else ""} im Browser geladen, '
|
||||
'aber NICHT in der Cookie-Richtlinie deklariert:</strong>'
|
||||
'<div style="font-family:monospace;font-size:10px;color:#7f1d1d;'
|
||||
'margin-top:6px;max-height:200px;overflow:auto">'
|
||||
+ ", ".join(audit["undeclared_in_browser"][:50])
|
||||
+ (f' ... +{n_undecl - 50} weitere'
|
||||
if n_undecl > 50 else '') +
|
||||
'</div>'
|
||||
'<div style="font-size:10px;color:#7f1d1d;margin-top:4px;'
|
||||
'font-style:italic">Art. 13(1)(c) DSGVO + § 25 TDDDG — '
|
||||
'die Empfaengerliste muss vollstaendig sein. Diese Cookies '
|
||||
'sind potenziell ungenannte Verarbeitungen.</div>'
|
||||
'</div>'
|
||||
)
|
||||
|
||||
dec_only_html = ""
|
||||
if audit.get("declared_not_loaded"):
|
||||
dec_only_html = (
|
||||
'<div style="margin-top:10px;padding:10px 12px;background:#fef3c7;'
|
||||
'border:1px solid #fde68a;border-radius:6px">'
|
||||
f'<strong style="color:#92400e">⚠️ {n_dec_only} Cookie'
|
||||
f'{"s" if n_dec_only != 1 else ""} in der Richtlinie '
|
||||
'deklariert, aber bei diesem Audit NICHT im Browser gesehen:</strong>'
|
||||
'<div style="font-family:monospace;font-size:10px;color:#78350f;'
|
||||
'margin-top:6px;max-height:200px;overflow:auto">'
|
||||
+ ", ".join(audit["declared_not_loaded"][:50])
|
||||
+ (f' ... +{n_dec_only - 50} weitere'
|
||||
if n_dec_only > 50 else '') +
|
||||
'</div>'
|
||||
'<div style="font-size:10px;color:#78350f;margin-top:4px;'
|
||||
'font-style:italic">Kein direkter Verstoss — die Cookies '
|
||||
'koennen nur in bestimmten User-Journeys / Geo-Regionen / '
|
||||
'eingeloggten Zustaenden geladen werden. Empfehlung: '
|
||||
'pruefen ob die Cookie-Richtlinie veraltet ist.</div>'
|
||||
'</div>'
|
||||
)
|
||||
|
||||
compliant_html = ""
|
||||
if audit.get("compliant"):
|
||||
compliant_html = (
|
||||
'<div style="margin-top:10px;padding:10px 12px;background:#dcfce7;'
|
||||
'border:1px solid #bbf7d0;border-radius:6px">'
|
||||
f'<strong style="color:#166534">✓ {n_both} Cookie'
|
||||
f'{"s" if n_both != 1 else ""} sowohl deklariert als auch geladen '
|
||||
'(compliant):</strong>'
|
||||
'<div style="font-family:monospace;font-size:10px;color:#14532d;'
|
||||
'margin-top:6px;max-height:150px;overflow:auto">'
|
||||
+ ", ".join(audit["compliant"][:50])
|
||||
+ (f' ... +{n_both - 50} weitere'
|
||||
if n_both > 50 else '') +
|
||||
'</div>'
|
||||
'</div>'
|
||||
)
|
||||
|
||||
return (
|
||||
'<div style="font-family:-apple-system,BlinkMacSystemFont,sans-serif;'
|
||||
'max-width:760px;margin:0 auto 16px;padding:14px 18px;'
|
||||
'background:#fff;border:1px solid #cbd5e1;border-radius:8px">'
|
||||
f'<div style="font-size:11px;color:{sev_color};text-transform:uppercase;'
|
||||
f'letter-spacing:1.2px;margin-bottom:4px;font-weight:600">'
|
||||
'Cookie-Compliance-Audit — 3-Quellen-Vergleich</div>'
|
||||
'<h3 style="margin:0 0 6px;font-size:14px;color:#1e293b">'
|
||||
f'{n_dec} in Richtlinie · {n_brw} im Browser · '
|
||||
f'{n_both} compliant · {n_undecl} undokumentiert · '
|
||||
f'{n_dec_only} nicht geladen</h3>'
|
||||
'<p style="margin:0 0 8px;font-size:11px;color:#475569;line-height:1.5">'
|
||||
'Wir vergleichen die in der Cookie-Richtlinie genannten Cookies '
|
||||
'mit dem was der Browser nach Akzeptieren tatsaechlich laed. '
|
||||
'Undokumentierte Cookies im Browser sind ein direkter Verstoss '
|
||||
'gegen die DSGVO-Informationspflicht.'
|
||||
'</p>'
|
||||
+ undecl_html + dec_only_html + compliant_html +
|
||||
'</div>'
|
||||
)
|
||||
@@ -0,0 +1,157 @@
|
||||
"""
|
||||
P102 — Cookie-Library-Mismatch-Detection pro Site.
|
||||
|
||||
Vergleicht die in einem Lauf erfassten Cookies (mit deklarierter
|
||||
Kategorie aus dem Cookie-Doc-Text) gegen die Library
|
||||
(compliance.cookie_library). Liefert Mismatches: deklariert ≠ Library.
|
||||
|
||||
Genutzt im Mail-Render als neuer Block "Cookie-Klassifikations-Pruefung".
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
import re
|
||||
|
||||
from sqlalchemy import text
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
_CATEGORY_PATTERNS = [
|
||||
(re.compile(r"\b(?:strictly[-\s]?)?(?:notwendig|essential|funktional|"
|
||||
r"funktionscookie|technisch[- ]?notwendig)\b", re.I),
|
||||
"essential"),
|
||||
(re.compile(r"\b(?:tracking|analytics|analyse|statistik|"
|
||||
r"measurement|performance)\b", re.I),
|
||||
"statistics"),
|
||||
(re.compile(r"\b(?:marketing|werbung|advertising|targeting|"
|
||||
r"drittanbieter[- ]?cookie)\b", re.I),
|
||||
"marketing"),
|
||||
(re.compile(r"\b(?:social[-\s]?media|share|like)\b", re.I),
|
||||
"social_media"),
|
||||
]
|
||||
|
||||
|
||||
def _category_for(name: str, doc_text: str) -> str | None:
|
||||
if not doc_text or not name:
|
||||
return None
|
||||
idx = doc_text.find(name)
|
||||
if idx < 0:
|
||||
return None
|
||||
window = doc_text[max(0, idx - 50):idx + 400]
|
||||
for pat, cat in _CATEGORY_PATTERNS:
|
||||
if pat.search(window):
|
||||
return cat
|
||||
return None
|
||||
|
||||
|
||||
def _load_library(db: Session) -> dict[str, dict]:
|
||||
rows = db.execute(text(
|
||||
"SELECT cookie_name, actual_category, vendor_name "
|
||||
"FROM compliance.cookie_library"
|
||||
)).fetchall()
|
||||
return {r[0].lower(): {"category": r[1], "vendor": r[2]} for r in rows}
|
||||
|
||||
|
||||
def detect_mismatches(
|
||||
db: Session,
|
||||
cookie_names_seen: list[str],
|
||||
doc_text: str,
|
||||
) -> list[dict]:
|
||||
"""Returns list of finding dicts."""
|
||||
if not cookie_names_seen or not doc_text:
|
||||
return []
|
||||
|
||||
lib = _load_library(db)
|
||||
findings: list[dict] = []
|
||||
seen: set[str] = set()
|
||||
|
||||
for cname in cookie_names_seen:
|
||||
cname = (cname or "").strip()
|
||||
if not cname or cname.lower() in seen:
|
||||
continue
|
||||
seen.add(cname.lower())
|
||||
declared = _category_for(cname, doc_text)
|
||||
if not declared:
|
||||
continue
|
||||
lib_entry = lib.get(cname.lower())
|
||||
if not lib_entry:
|
||||
continue
|
||||
lib_cat = lib_entry["category"]
|
||||
if lib_cat in (None, "unknown") or lib_cat == declared:
|
||||
continue
|
||||
|
||||
# HIGH wenn Library sagt Marketing aber Site als essential/statistics
|
||||
# deklariert (faktische Drittland-/Werbe-Verarbeitung versteckt
|
||||
# als technische/statistische Notwendigkeit). MEDIUM sonst.
|
||||
severity = "HIGH" if (
|
||||
lib_cat == "marketing" and declared in ("essential", "statistics")
|
||||
) else "MEDIUM"
|
||||
|
||||
findings.append({
|
||||
"cookie": cname,
|
||||
"declared_category": declared,
|
||||
"library_category": lib_cat,
|
||||
"library_vendor": lib_entry["vendor"],
|
||||
"severity": severity,
|
||||
})
|
||||
|
||||
return findings
|
||||
|
||||
|
||||
def build_mismatch_block_html(findings: list[dict]) -> str:
|
||||
"""Render the mismatch findings as a Mail-Block."""
|
||||
if not findings:
|
||||
return ""
|
||||
|
||||
n_high = sum(1 for f in findings if f["severity"] == "HIGH")
|
||||
items: list[str] = []
|
||||
for f in findings[:25]:
|
||||
sev_color = "#dc2626" if f["severity"] == "HIGH" else "#d97706"
|
||||
items.append(
|
||||
f'<li style="margin-bottom:6px;font-size:11px">'
|
||||
f'<code style="background:#f1f5f9;padding:1px 4px;border-radius:2px">'
|
||||
f'{f["cookie"]}</code> '
|
||||
f'<span style="color:#64748b">— deklariert als</span> '
|
||||
f'<strong>{f["declared_category"]}</strong>, '
|
||||
f'<span style="color:#64748b">unsere Bibliothek + verbreitete '
|
||||
f'Vendor-Doku sagen</span> <strong style="color:{sev_color}">'
|
||||
f'{f["library_category"]}</strong> '
|
||||
f'(Vendor: {f["library_vendor"]})'
|
||||
f'</li>'
|
||||
)
|
||||
|
||||
return (
|
||||
'<div style="font-family:-apple-system,BlinkMacSystemFont,sans-serif;'
|
||||
'max-width:760px;margin:0 auto 16px;padding:14px 18px;'
|
||||
'background:#fffbeb;border:1px solid #fde68a;border-radius:8px">'
|
||||
'<div style="font-size:11px;color:#92400e;text-transform:uppercase;'
|
||||
'letter-spacing:1.2px;margin-bottom:4px;font-weight:600">'
|
||||
'Cookie-Klassifikations-Pruefung</div>'
|
||||
f'<h3 style="margin:0 0 8px;font-size:14px;color:#1e293b">'
|
||||
f'{len(findings)} Cookie{"s" if len(findings) != 1 else ""}'
|
||||
f' mit abweichender Klassifikation gefunden'
|
||||
f'{f" ({n_high} davon mit erhoehter Bedeutung)" if n_high else ""}'
|
||||
f'</h3>'
|
||||
'<p style="margin:0 0 10px;font-size:11px;color:#475569;line-height:1.5">'
|
||||
'Wir haben die in Ihrer Cookie-Richtlinie deklarierte Kategorie der '
|
||||
'Cookies mit unserer globalen Bibliothek (~2.300 Cookies aus Open-'
|
||||
'Cookie-Database + DACH-spezifischen Quellen) und der verbreiteten '
|
||||
'Vendor-Doku abgeglichen. Bei den folgenden Cookies stimmt die '
|
||||
'deklarierte Kategorie nicht mit dem typischerweise erwarteten '
|
||||
'Zweck ueberein. Das ist kein automatischer Verstoss — aber ein '
|
||||
'Pruefanlass: bei Marketing-Cookies braucht es Einwilligung, bei '
|
||||
'als "essential" deklarierten nicht. Empfehlung: mit DSB / '
|
||||
'Marketing-Agentur klaeren ob die Klassifikation korrigiert '
|
||||
'oder die Einwilligung anders eingeholt werden muss.</p>'
|
||||
'<ul style="margin:0 0 0 18px;padding:0">'
|
||||
+ "".join(items) +
|
||||
'</ul>'
|
||||
'<p style="margin:8px 0 0;font-size:10px;color:#94a3b8;'
|
||||
'font-style:italic">Hintergrund: Art. 13(1)(c) DSGVO + EDPB 5/2020 '
|
||||
'— der angegebene Verarbeitungszweck muss dem tatsaechlichen '
|
||||
'entsprechen.</p>'
|
||||
'</div>'
|
||||
)
|
||||
@@ -0,0 +1,216 @@
|
||||
"""
|
||||
P104 — Cookie-Network-Tracing (Stufe 4).
|
||||
|
||||
cookies_detailed[i].domain zeigt welche Domain das Cookie via Set-Cookie
|
||||
gesetzt hat. Wir vergleichen:
|
||||
* Site-Hauptdomain vs Cookie-Domain → First-Party / Third-Party
|
||||
* Cookie-Domain vs bekannte Vendoren → wer ist der echte Empfaenger
|
||||
* Vendor-Land vs EU/Drittland → Drittland-Transfer-Hinweis
|
||||
|
||||
Defeat-Device-Pattern: "Funktional"-Cookie wird aber von doubleclick.net
|
||||
gesetzt → das ist physisch ein Third-Party-Tracking-Cookie, kein
|
||||
funktionales First-Party-Cookie.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
from urllib.parse import urlparse
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
# Vendor-Domain → bekannter Vendor + Land
|
||||
_DOMAIN_VENDORS: dict[str, tuple[str, str]] = {
|
||||
".doubleclick.net": ("Google DoubleClick", "US"),
|
||||
".google.com": ("Google", "US"),
|
||||
".google-analytics.com": ("Google Analytics", "US"),
|
||||
".googletagmanager.com": ("Google Tag Manager", "US"),
|
||||
".googleadservices.com": ("Google Ads", "US"),
|
||||
".gstatic.com": ("Google CDN", "US"),
|
||||
".facebook.com": ("Meta / Facebook", "US"),
|
||||
".facebook.net": ("Meta / Facebook", "US"),
|
||||
".instagram.com": ("Meta / Instagram", "US"),
|
||||
".linkedin.com": ("LinkedIn (Microsoft)", "US"),
|
||||
".pinterest.com": ("Pinterest", "US"),
|
||||
".pinimg.com": ("Pinterest", "US"),
|
||||
".tiktok.com": ("TikTok (ByteDance)", "CN"),
|
||||
".bing.com": ("Microsoft Bing", "US"),
|
||||
".clarity.ms": ("Microsoft Clarity", "US"),
|
||||
".criteo.com": ("Criteo", "FR"),
|
||||
".adnxs.com": ("AppNexus / Xandr", "US"),
|
||||
".rubiconproject.com": ("Rubicon Project", "US"),
|
||||
".pubmatic.com": ("PubMatic", "US"),
|
||||
".adobedtm.com": ("Adobe DTM", "US"),
|
||||
".adobetarget.com": ("Adobe Target", "US"),
|
||||
".demdex.net": ("Adobe Experience Cloud", "US"),
|
||||
".omtrdc.net": ("Adobe Analytics", "US"),
|
||||
".everesttech.net": ("Adobe Advertising Cloud", "US"),
|
||||
".2o7.net": ("Adobe Analytics", "US"),
|
||||
".adform.net": ("AdForm", "DK"),
|
||||
".trade-desk.com": ("The Trade Desk", "US"),
|
||||
".tradedesk.com": ("The Trade Desk", "US"),
|
||||
".adsrvr.org": ("The Trade Desk", "US"),
|
||||
".hotjar.com": ("Hotjar", "MT"),
|
||||
".matomo.cloud": ("Matomo", "DE"),
|
||||
".etracker.com": ("etracker", "DE"),
|
||||
".etracker.de": ("etracker", "DE"),
|
||||
".cloudflare.com": ("Cloudflare", "US"),
|
||||
".cookielaw.org": ("OneTrust", "US"),
|
||||
".cookiebot.com": ("Cookiebot (Cybot)", "DK"),
|
||||
".usercentrics.eu": ("Usercentrics", "DE"),
|
||||
".usercentrics.com": ("Usercentrics", "DE"),
|
||||
".consensu.org": ("IAB Europe TCF", "BE"),
|
||||
".datadoghq.eu": ("Datadog", "US"),
|
||||
".datadoghq.com": ("Datadog", "US"),
|
||||
".datadome.co": ("DataDome", "FR"),
|
||||
".incapsula.com": ("Imperva Incapsula", "US"),
|
||||
".imperva.com": ("Imperva", "US"),
|
||||
".akamai.net": ("Akamai", "US"),
|
||||
".akamaiedge.net": ("Akamai", "US"),
|
||||
".salesforce.com": ("Salesforce", "US"),
|
||||
".force.com": ("Salesforce", "US"),
|
||||
}
|
||||
|
||||
_NON_EU_COUNTRIES = {"US", "CN", "RU", "IN", "JP", "BR", "AU"}
|
||||
|
||||
|
||||
def _registrable_domain(host: str) -> str:
|
||||
"""vw.de von www.vw.de oder bla.vw.de oder vw.de"""
|
||||
h = (host or "").lstrip(".").lower()
|
||||
parts = h.split(".")
|
||||
if len(parts) >= 2:
|
||||
return ".".join(parts[-2:])
|
||||
return h
|
||||
|
||||
|
||||
def _lookup_vendor_by_domain(cookie_domain: str) -> tuple[str, str] | None:
|
||||
if not cookie_domain:
|
||||
return None
|
||||
cd = cookie_domain.lower()
|
||||
if not cd.startswith("."):
|
||||
cd = "." + cd
|
||||
for suffix, (vendor, country) in _DOMAIN_VENDORS.items():
|
||||
if cd.endswith(suffix):
|
||||
return (vendor, country)
|
||||
return None
|
||||
|
||||
|
||||
def trace_cookie_network(
|
||||
cookies_detailed: list[dict] | None,
|
||||
site_url: str | None = None,
|
||||
) -> list[dict]:
|
||||
"""Liefert Findings fuer Cookies die von externer/Drittland-Domain
|
||||
gesetzt werden waehrend sie als First-Party / essential deklariert sind."""
|
||||
if not cookies_detailed:
|
||||
return []
|
||||
site_host = ""
|
||||
if site_url:
|
||||
try:
|
||||
site_host = _registrable_domain(urlparse(site_url).netloc)
|
||||
except Exception:
|
||||
site_host = ""
|
||||
|
||||
out: list[dict] = []
|
||||
for ck in cookies_detailed:
|
||||
if not isinstance(ck, dict):
|
||||
continue
|
||||
name = (ck.get("name") or "").strip()
|
||||
domain = (ck.get("domain") or "").strip()
|
||||
declared = (ck.get("declared_category") or "").lower().strip()
|
||||
if not name or not domain:
|
||||
continue
|
||||
|
||||
cookie_reg = _registrable_domain(domain)
|
||||
is_third_party = bool(site_host and cookie_reg != site_host)
|
||||
vendor_match = _lookup_vendor_by_domain(domain)
|
||||
|
||||
if not vendor_match and not is_third_party:
|
||||
continue
|
||||
|
||||
# Defeat-Device-Pattern: essential/functional + Third-Party
|
||||
if declared in ("essential", "functional", "necessary") and is_third_party:
|
||||
sev = "HIGH" if vendor_match else "MEDIUM"
|
||||
vendor_name = vendor_match[0] if vendor_match else cookie_reg
|
||||
country = vendor_match[1] if vendor_match else ""
|
||||
third_country = country in _NON_EU_COUNTRIES
|
||||
out.append({
|
||||
"cookie": name,
|
||||
"declared": declared,
|
||||
"cookie_domain": domain,
|
||||
"site_domain": site_host,
|
||||
"vendor": vendor_name,
|
||||
"vendor_country": country,
|
||||
"third_country": third_country,
|
||||
"severity": sev,
|
||||
"label": (
|
||||
f"Cookie '{name}' deklariert als '{declared}', "
|
||||
f"wird aber von externer Domain "
|
||||
f"<strong>{vendor_name}</strong> "
|
||||
f"({domain}) gesetzt"
|
||||
+ (f" — Drittland: {country}" if third_country else "")
|
||||
),
|
||||
})
|
||||
elif vendor_match and declared in ("essential", "functional", "necessary"):
|
||||
# Auch wenn First-Party-Cookie aber bekannter Tracker-Vendor →
|
||||
# Mismatch (z.B. Google Tag Manager kann via CNAME als
|
||||
# First-Party erscheinen)
|
||||
out.append({
|
||||
"cookie": name,
|
||||
"declared": declared,
|
||||
"cookie_domain": domain,
|
||||
"vendor": vendor_match[0],
|
||||
"vendor_country": vendor_match[1],
|
||||
"third_country": vendor_match[1] in _NON_EU_COUNTRIES,
|
||||
"severity": "MEDIUM",
|
||||
"label": (
|
||||
f"Cookie '{name}' deklariert als '{declared}', "
|
||||
f"Domain {domain} gehoert aber zu "
|
||||
f"<strong>{vendor_match[0]}</strong> "
|
||||
f"({vendor_match[1]})"
|
||||
),
|
||||
})
|
||||
return out
|
||||
|
||||
|
||||
def build_network_trace_block_html(findings: list[dict]) -> str:
|
||||
if not findings:
|
||||
return ""
|
||||
n_third = sum(1 for f in findings if f.get("third_country"))
|
||||
items: list[str] = []
|
||||
for f in findings[:30]:
|
||||
sev_color = "#dc2626" if f["severity"] == "HIGH" else "#d97706"
|
||||
country_flag = ""
|
||||
if f.get("third_country"):
|
||||
country_flag = (
|
||||
f' <span style="background:#fee2e2;color:#991b1b;'
|
||||
f'padding:1px 5px;border-radius:8px;font-size:9px;'
|
||||
f'font-weight:600">DRITTLAND {f.get("vendor_country","")}</span>'
|
||||
)
|
||||
items.append(
|
||||
f'<li style="margin-bottom:6px;font-size:11px;line-height:1.5;'
|
||||
f'color:{sev_color}">{f["label"]}{country_flag}</li>'
|
||||
)
|
||||
return (
|
||||
'<div style="font-family:-apple-system,BlinkMacSystemFont,sans-serif;'
|
||||
'max-width:760px;margin:0 auto 16px;padding:14px 18px;'
|
||||
'background:#fff7ed;border:1px solid #fed7aa;border-radius:8px">'
|
||||
'<div style="font-size:11px;color:#9a3412;text-transform:uppercase;'
|
||||
'letter-spacing:1.2px;margin-bottom:4px;font-weight:600">'
|
||||
'Cookie-Netzwerk-Verhalten (Defeat-Device-Heuristik)</div>'
|
||||
f'<h3 style="margin:0 0 6px;font-size:14px;color:#1e293b">'
|
||||
f'{len(findings)} Cookie{"s" if len(findings) != 1 else ""} '
|
||||
f'mit Vendor-Domain-Diskrepanz'
|
||||
f'{f" — davon {n_third} mit Drittland-Transfer" if n_third else ""}'
|
||||
f'</h3>'
|
||||
'<p style="margin:0 0 10px;font-size:11px;color:#475569;line-height:1.5">'
|
||||
'Diese Cookies sind als "essential" oder "funktional" deklariert, '
|
||||
'werden aber von einer externen Domain gesetzt — typisch fuer '
|
||||
'getarnte Tracker. Drittland-Markierungen sind besonders kritisch: '
|
||||
'sie loesen Pflichten nach Art. 44-49 DSGVO aus (SCC / Angemessen-'
|
||||
'heitsbeschluss / Schrems II Folge-Massnahmen).'
|
||||
'</p>'
|
||||
'<ul style="margin:0 0 0 18px;padding:0">'
|
||||
+ "".join(items) +
|
||||
'</ul></div>'
|
||||
)
|
||||
@@ -0,0 +1,147 @@
|
||||
"""
|
||||
Cookie-zu-Vendor-Fallback (P52 Lite).
|
||||
|
||||
Wenn weder cmp_payloads noch vendor_llm_extract Vendors lieferten,
|
||||
matchen wir die im after_accept gesehenen Cookies gegen die
|
||||
compliance.cookie_library und bauen Vendor-Records aus den Library-
|
||||
Eintraegen (cookie_name → vendor_name, actual_category).
|
||||
|
||||
Typisches Szenario: VW nutzt ein Custom-CMP (cookiemgmt-Wrapper),
|
||||
kein bekanntes IAB-Tool. cmp_payloads = leer, aber after_accept.cookies
|
||||
hat 28 Eintraege. Diese 28 Cookies sind in der Library = ~15-20 Vendors.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
import re
|
||||
from typing import Iterable
|
||||
|
||||
from sqlalchemy import text
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def _collect_cookie_names(banner_result: dict | None) -> set[str]:
|
||||
names: set[str] = set()
|
||||
if not isinstance(banner_result, dict):
|
||||
return names
|
||||
for ph in (banner_result.get("phases") or {}).values():
|
||||
if not isinstance(ph, dict):
|
||||
continue
|
||||
for ck in (ph.get("cookies") or []):
|
||||
if isinstance(ck, str):
|
||||
names.add(ck.strip())
|
||||
elif isinstance(ck, dict):
|
||||
n = (ck.get("name") or "").strip()
|
||||
if n:
|
||||
names.add(n)
|
||||
return {n for n in names if n and len(n) <= 120}
|
||||
|
||||
|
||||
def lookup_vendors_from_library(
|
||||
db: Session,
|
||||
cookie_names: Iterable[str],
|
||||
) -> list[dict]:
|
||||
"""Resolves cookie names to vendor records via cookie_library."""
|
||||
names = [n for n in cookie_names if n]
|
||||
if not names:
|
||||
return []
|
||||
rows = db.execute(text(
|
||||
"""
|
||||
SELECT cookie_name, actual_category, vendor_name
|
||||
FROM compliance.cookie_library
|
||||
WHERE LOWER(cookie_name) = ANY(:lc)
|
||||
"""
|
||||
), {"lc": [n.lower() for n in names]}).fetchall()
|
||||
by_vendor: dict[str, dict] = {}
|
||||
for cname, cat, vendor in rows:
|
||||
if not vendor:
|
||||
continue
|
||||
entry = by_vendor.setdefault(vendor, {
|
||||
"name": vendor,
|
||||
"country": "",
|
||||
"purpose": "",
|
||||
"category": cat or "",
|
||||
"opt_out_url": "",
|
||||
"privacy_policy_url": "",
|
||||
"persistence": "",
|
||||
"cookies": [],
|
||||
"source": "library_fallback",
|
||||
})
|
||||
entry["cookies"].append({
|
||||
"name": cname, "purpose": "", "expiry": "",
|
||||
"is_third_party": True,
|
||||
})
|
||||
return list(by_vendor.values())
|
||||
|
||||
|
||||
def fallback_vendors_for_run(
|
||||
db: Session,
|
||||
banner_result: dict | None,
|
||||
existing_vendor_count: int,
|
||||
cookie_doc_text: str | None = None,
|
||||
) -> list[dict]:
|
||||
"""Returns extra vendor records to merge with the run's cmp_vendors.
|
||||
|
||||
VW-Lehre: cmp_vendors=6 (alle LLM-grob) reicht NICHT — die echte
|
||||
Cookie-Tabelle hat 30+ Eintraege. Wir fuehren den Lookup jetzt auch
|
||||
bei mid-tier-Counts aus, solange after_accept >= 15 Cookies hat
|
||||
ODER der Cookie-Doc-Text Cookie-Tabellen-Signale enthaelt.
|
||||
"""
|
||||
names = _collect_cookie_names(banner_result)
|
||||
|
||||
# Erweitere names um Cookie-Namen die im Cookie-Doc-Text als
|
||||
# Tabellen-Eintraege auftauchen (Pattern: NAME gefolgt von
|
||||
# "Tracking Cookies"/"Session Cookies"/"Funktional"/...).
|
||||
if cookie_doc_text:
|
||||
names |= _extract_cookie_names_from_doc(cookie_doc_text)
|
||||
|
||||
# Skip-Bedingungen ueberarbeitet:
|
||||
# - sehr wenige Cookies UND >= 5 Vendors schon vorhanden → skip
|
||||
# - sonst IMMER versuchen
|
||||
if len(names) < 5 and existing_vendor_count >= 5:
|
||||
return []
|
||||
if not names:
|
||||
return []
|
||||
|
||||
vendors = lookup_vendors_from_library(db, names)
|
||||
if vendors:
|
||||
logger.info(
|
||||
"Cookie-Library-Fallback: %d Vendors aus %d Cookies "
|
||||
"(existing cmp_vendors=%d)",
|
||||
len(vendors), len(names), existing_vendor_count,
|
||||
)
|
||||
return vendors
|
||||
|
||||
|
||||
_TABLE_ROW_RE = re.compile(
|
||||
r"\b([A-Za-z_][A-Za-z0-9_\-\.]{2,40})\s+"
|
||||
r"(?:Tracking Cookies|Session Cookies|Funktional|Marketing|"
|
||||
r"Analytics|Performance|Notwendig|Strictly\s+Necessary|"
|
||||
r"Statistik|Werbung|Targeting|Personalisierung)",
|
||||
re.I,
|
||||
)
|
||||
|
||||
|
||||
def _extract_cookie_names_from_doc(text: str) -> set[str]:
|
||||
"""Pattern-basiertes Erkennen von Cookie-Tabellen-Zeilen.
|
||||
|
||||
VW-Cookie-Tabelle hat Form:
|
||||
'IDE Tracking Cookies (Marketing) Dieser Cookie ... 13 Monate'
|
||||
Das fangen wir mit einem Cookie-Name-vor-Category-Pattern.
|
||||
"""
|
||||
out: set[str] = set()
|
||||
for m in _TABLE_ROW_RE.finditer(text):
|
||||
name = m.group(1).strip()
|
||||
# Filter offensichtliche Noise (Pronomen, Verben)
|
||||
nl = name.lower()
|
||||
if nl in ("dieser", "diese", "ein", "der", "die", "das",
|
||||
"session", "permanent", "funktional", "notwendig",
|
||||
"marketing", "analytics", "werbung", "anbieter",
|
||||
"google", "facebook", "tracking", "cookie", "cookies"):
|
||||
continue
|
||||
if len(name) >= 3:
|
||||
out.add(name)
|
||||
return out
|
||||
@@ -0,0 +1,148 @@
|
||||
"""
|
||||
P103 — Cookie-Value-Entropy-Check (Stufe 3).
|
||||
|
||||
Bewertet ob der Cookie-Wert zur deklarierten Kategorie passt:
|
||||
* "Funktional" + 2-char-Wert ('1', 'de') → konsistent (Flag)
|
||||
* "Funktional" + 64-char-Base64 → INKONSISTENT (Tracking-ID-Pattern)
|
||||
* "Marketing" + 32+ char Hash → konsistent
|
||||
* "Marketing" + 2-char-Wert → konsistent (Boolean-Opt-Out)
|
||||
|
||||
Defeat-Device-Pattern: Site deklariert "Funktional" um Consent zu
|
||||
umgehen, aber Wert sieht wie pseudonymisierte Tracking-ID aus.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
import math
|
||||
import re
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def _shannon_entropy(s: str) -> float:
|
||||
if not s:
|
||||
return 0.0
|
||||
from collections import Counter
|
||||
n = len(s)
|
||||
counts = Counter(s)
|
||||
return -sum((c / n) * math.log2(c / n) for c in counts.values())
|
||||
|
||||
|
||||
_BASE64_RE = re.compile(r"^[A-Za-z0-9+/=_-]{20,}$")
|
||||
_HEX_RE = re.compile(r"^[a-fA-F0-9]{16,}$")
|
||||
_UUID_RE = re.compile(
|
||||
r"^[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-"
|
||||
r"[0-9a-fA-F]{4}-[0-9a-fA-F]{12}$"
|
||||
)
|
||||
_FLAG_VALUES = {"0", "1", "true", "false", "yes", "no",
|
||||
"de", "en", "de-de", "en-us", "fr-fr",
|
||||
"accept", "deny", "essential", "on", "off"}
|
||||
|
||||
|
||||
def _classify_value_shape(value: str) -> str:
|
||||
"""Returns one of: 'flag', 'short_id', 'long_token', 'uuid', 'hash',
|
||||
'json_blob', 'unknown'."""
|
||||
if not value:
|
||||
return "flag"
|
||||
v = value.strip()
|
||||
if v.lower() in _FLAG_VALUES:
|
||||
return "flag"
|
||||
if len(v) <= 4:
|
||||
return "flag"
|
||||
if _UUID_RE.match(v):
|
||||
return "uuid"
|
||||
if _HEX_RE.match(v) and len(v) >= 32:
|
||||
return "hash"
|
||||
if _BASE64_RE.match(v) and len(v) >= 40:
|
||||
return "long_token"
|
||||
if v.startswith("{") or v.startswith("["):
|
||||
return "json_blob"
|
||||
if len(v) >= 16 and _shannon_entropy(v) > 3.5:
|
||||
return "long_token"
|
||||
if len(v) >= 6:
|
||||
return "short_id"
|
||||
return "flag"
|
||||
|
||||
|
||||
def check_cookies_for_entropy_mismatch(
|
||||
cookies_detailed: list[dict] | None,
|
||||
) -> list[dict]:
|
||||
"""Liefert Findings fuer Cookies deren Wert-Shape nicht zur
|
||||
deklarierten Kategorie passt."""
|
||||
out: list[dict] = []
|
||||
if not cookies_detailed:
|
||||
return out
|
||||
for ck in cookies_detailed:
|
||||
if not isinstance(ck, dict):
|
||||
continue
|
||||
name = (ck.get("name") or "").strip()
|
||||
value = (ck.get("value") or "").strip()
|
||||
declared = (ck.get("declared_category") or "").lower().strip()
|
||||
if not name or not declared:
|
||||
continue
|
||||
shape = _classify_value_shape(value)
|
||||
|
||||
# Regel: 'essential' / 'functional' Cookies mit hoher
|
||||
# Tracking-ID-Komplexitaet sind verdaechtig.
|
||||
is_low_cat = declared in ("essential", "functional", "necessary")
|
||||
is_id_shape = shape in ("uuid", "hash", "long_token")
|
||||
if is_low_cat and is_id_shape:
|
||||
out.append({
|
||||
"cookie": name,
|
||||
"declared": declared,
|
||||
"value_shape": shape,
|
||||
"value_len": len(value),
|
||||
"severity": "MEDIUM",
|
||||
"label": (
|
||||
f"Cookie '{name}' deklariert als '{declared}', "
|
||||
f"aber Wert ist ein {shape} ({len(value)} Zeichen) — "
|
||||
"typisches Tracking-ID-Pattern"
|
||||
),
|
||||
"detail": (
|
||||
"Funktionale/notwendige Cookies speichern normalerweise "
|
||||
"kurze Flags (1, true, de-DE). Ein langer Hash/UUID-Wert "
|
||||
"in einem als 'essential' deklarierten Cookie ist ein "
|
||||
"Indikator fuer verstecktes Tracking — vergleichbar mit "
|
||||
"einem 'Defeat Device', das auf dem Pruefstand harmlos "
|
||||
"aussieht aber im Realbetrieb anderes tut."
|
||||
),
|
||||
})
|
||||
return out
|
||||
|
||||
|
||||
def build_entropy_block_html(findings: list[dict]) -> str:
|
||||
if not findings:
|
||||
return ""
|
||||
items: list[str] = []
|
||||
for f in findings[:25]:
|
||||
items.append(
|
||||
f'<li style="margin-bottom:6px;font-size:11px;line-height:1.5">'
|
||||
f'<strong style="color:#d97706">{f["cookie"]}</strong> '
|
||||
f'<span style="color:#64748b">(deklariert: '
|
||||
f'<strong>{f["declared"]}</strong>) — Wert-Shape:</span> '
|
||||
f'<code style="background:#fef3c7;padding:1px 4px;border-radius:2px">'
|
||||
f'{f["value_shape"]}</code> '
|
||||
f'<span style="color:#64748b">({f["value_len"]} Zeichen)</span>'
|
||||
f'</li>'
|
||||
)
|
||||
return (
|
||||
'<div style="font-family:-apple-system,BlinkMacSystemFont,sans-serif;'
|
||||
'max-width:760px;margin:0 auto 16px;padding:14px 18px;'
|
||||
'background:#fffbeb;border:1px solid #fde68a;border-radius:8px">'
|
||||
'<div style="font-size:11px;color:#92400e;text-transform:uppercase;'
|
||||
'letter-spacing:1.2px;margin-bottom:4px;font-weight:600">'
|
||||
'Cookie-Werte-Plausibilitaet (Defeat-Device-Heuristik)</div>'
|
||||
f'<h3 style="margin:0 0 6px;font-size:14px;color:#1e293b">'
|
||||
f'{len(findings)} Cookie{"s" if len(findings) != 1 else ""} '
|
||||
'mit verdaechtigem Wert-Pattern</h3>'
|
||||
'<p style="margin:0 0 10px;font-size:11px;color:#475569;line-height:1.5">'
|
||||
'Diese Cookies sind als "essential" oder "funktional" deklariert, '
|
||||
'ihr tatsaechlicher Wert sieht aber wie eine Tracking-ID aus '
|
||||
'(UUID, Hash, langer Base64-Token). Empfehlung: pruefen ob diese '
|
||||
'Cookies wirklich nur technisch notwendig sind oder de facto '
|
||||
'pseudonymisierte User-Tracker.</p>'
|
||||
'<ul style="margin:0 0 0 18px;padding:0">'
|
||||
+ "".join(items) +
|
||||
'</ul></div>'
|
||||
)
|
||||
@@ -0,0 +1,439 @@
|
||||
"""
|
||||
Parst Cookie-Tabellen die der User direkt ins Frontend kopiert.
|
||||
|
||||
Typische Quellen:
|
||||
* Browser-Copy aus VW/BMW/Mercedes Cookie-Richtlinie (Tab-getrennt)
|
||||
* Excel-Export aus Borlabs / OneTrust / Cookiebot Admin (CSV / Pipe)
|
||||
* Markdown-Tabelle aus interner Doku
|
||||
|
||||
Erkennt 4 Spalten-Layouts (heuristisch):
|
||||
1. [Name, Kategorie, Beschreibung, Speicherdauer, Provider]
|
||||
2. [Name, Provider, Zweck, Speicherdauer]
|
||||
3. [Name, Beschreibung, Speicherdauer]
|
||||
4. nur [Name, Speicherdauer]
|
||||
|
||||
Output: gleiche Vendor-Record-Struktur wie vendor_extractor / LLM —
|
||||
damit der Rest der Pipeline (VVT-Tabelle, Library-Mismatch-Check) ohne
|
||||
Aenderung weiterlaeuft.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
import re
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
_CATEGORY_LABELS = (
|
||||
"notwendig", "essential", "funktional", "tracking", "marketing",
|
||||
"statistik", "analyse", "analytics", "performance", "werbung",
|
||||
"advertising", "targeting", "preferences", "social_media",
|
||||
"strictly necessary", "personalisierung",
|
||||
)
|
||||
|
||||
|
||||
def _looks_like_separator(line: str) -> str | None:
|
||||
"""Detect the column-separator of a tabular line."""
|
||||
if "\t" in line and line.count("\t") >= 2:
|
||||
return "\t"
|
||||
if " | " in line and line.count(" | ") >= 2:
|
||||
return " | "
|
||||
if ";" in line and line.count(";") >= 2 and "," not in line[:20]:
|
||||
return ";"
|
||||
if "," in line and line.count(",") >= 3:
|
||||
return ","
|
||||
return None
|
||||
|
||||
|
||||
def _normalize_category(s: str) -> str:
|
||||
sl = s.lower().strip()
|
||||
for cat in _CATEGORY_LABELS:
|
||||
if cat in sl:
|
||||
if cat in ("notwendig", "essential", "strictly necessary"):
|
||||
return "essential"
|
||||
if cat in ("tracking", "marketing", "werbung",
|
||||
"advertising", "targeting"):
|
||||
return "marketing"
|
||||
if cat in ("statistik", "analyse", "analytics", "performance"):
|
||||
return "statistics"
|
||||
if cat == "funktional":
|
||||
return "functional"
|
||||
if cat == "social_media":
|
||||
return "social_media"
|
||||
return sl[:30]
|
||||
|
||||
|
||||
def _parse_persistence(s: str) -> str:
|
||||
"""Extracts 'Speicherdauer' notation."""
|
||||
m = re.search(
|
||||
r"(\d+\s*(sekunde|minute|stunde|tag|woche|monat|jahr|day|month|year)[^\s,;|]{0,5})",
|
||||
s, re.I,
|
||||
)
|
||||
if m:
|
||||
return m.group(1).strip()[:80]
|
||||
if re.search(r"\bsession\b", s, re.I):
|
||||
return "Session"
|
||||
if re.search(r"permanent", s, re.I):
|
||||
return "Permanent"
|
||||
return ""
|
||||
|
||||
|
||||
_CATEGORY_INDICATORS = (
|
||||
"funktionscookie", "tracking cookie", "trackingcookie",
|
||||
"marketing", "analytics", "necessary", "notwendig",
|
||||
"performance", "session cookie", "persistent cookie",
|
||||
"permanent cookie", "permanent/protokoll", "sitzungs-cookie",
|
||||
)
|
||||
|
||||
|
||||
def parse_block_format(text: str) -> list[dict]:
|
||||
"""Block-Format (Browser-Copy aus VW/BMW/Mercedes ohne Tab-Trenner):
|
||||
Pro Cookie 5 Zeilen: Name / Kategorie / Zweck / Speicherdauer / Art.
|
||||
|
||||
Heuristik: gehe ueber alle Zeilen. Wenn eine Zeile NICHT eine
|
||||
Kategorie/Dauer/Art ist und die naechste eine Kategorie enthaelt
|
||||
→ das ist ein Cookie-Name. Sammle die naechsten 4 Zeilen als
|
||||
Kategorie/Zweck/Dauer/Art.
|
||||
"""
|
||||
if not text or len(text) < 100:
|
||||
return []
|
||||
raw_lines = [ln.strip() for ln in text.splitlines()]
|
||||
# Aggressive newline-collapse: leere Zeilen entfernen, aber Zeilen
|
||||
# die Teil eines mehrzeiligen Zwecks sind moegen separat bleiben.
|
||||
lines = [ln for ln in raw_lines if ln]
|
||||
if len(lines) < 10:
|
||||
return []
|
||||
|
||||
# Drop the header row(s) if present
|
||||
start = 0
|
||||
if lines[0].lower() in ("name des cookies", "cookie name", "name"):
|
||||
start = 5 if len(lines) > 5 else 1
|
||||
|
||||
by_vendor: dict[str, dict] = {}
|
||||
seen_names: set[str] = set()
|
||||
i = start
|
||||
while i < len(lines) - 2:
|
||||
name_line = lines[i]
|
||||
cat_line = lines[i + 1] if i + 1 < len(lines) else ""
|
||||
# Verify cat_line is a category indicator (otherwise the
|
||||
# block is malformed — skip 1 line and try again).
|
||||
if not any(c in cat_line.lower() for c in _CATEGORY_INDICATORS):
|
||||
i += 1
|
||||
continue
|
||||
# Cookie-Name validation
|
||||
nl = name_line.lower().strip()
|
||||
if (not name_line or len(name_line) > 80
|
||||
or len(name_line) < 2
|
||||
or any(c in nl for c in _CATEGORY_INDICATORS)
|
||||
or nl in seen_names
|
||||
or nl in ("name des cookies", "kategorie",
|
||||
"verwendungszweck", "speicherdauer",
|
||||
"art des cookies")):
|
||||
i += 1
|
||||
continue
|
||||
# Look ahead for the Art-Cookie line (max 8 lines forward)
|
||||
purpose_parts: list[str] = []
|
||||
persistence = ""
|
||||
art = ""
|
||||
j = i + 2
|
||||
while j < min(i + 12, len(lines)):
|
||||
ln = lines[j]
|
||||
ll = ln.lower()
|
||||
if any(t in ll for t in (
|
||||
"permanent/protokoll", "session cookie",
|
||||
"persistent cookie", "permanent cookie",
|
||||
"sitzungs-cookie", "permanent/ protokoll",
|
||||
)):
|
||||
art = ln
|
||||
if not persistence and j > i + 2:
|
||||
persistence = lines[j - 1]
|
||||
break
|
||||
purpose_parts.append(ln)
|
||||
j += 1
|
||||
purpose = " ".join(purpose_parts[:-1]) if len(purpose_parts) > 1 else " ".join(purpose_parts)
|
||||
purpose = purpose[:500].strip()
|
||||
|
||||
seen_names.add(nl)
|
||||
provider = _guess_vendor(name_line) or "Unbekannter Anbieter (VW-intern)"
|
||||
# Marketing-Cookies = Drittanbieter
|
||||
if "marketing" in cat_line.lower() or "tracking" in cat_line.lower():
|
||||
if provider == "Unbekannter Anbieter (VW-intern)":
|
||||
provider = "Unbekannter Drittanbieter (Marketing)"
|
||||
entry = by_vendor.setdefault(provider, {
|
||||
"name": provider, "country": "",
|
||||
"purpose": "", "category": _normalize_category(cat_line),
|
||||
"opt_out_url": "", "privacy_policy_url": "",
|
||||
"persistence": "",
|
||||
"cookies": [],
|
||||
"source": "block_paste",
|
||||
})
|
||||
entry["cookies"].append({
|
||||
"name": name_line,
|
||||
"purpose": purpose[:300],
|
||||
"expiry": persistence,
|
||||
"is_third_party": "tracking" in cat_line.lower() or "marketing" in cat_line.lower(),
|
||||
})
|
||||
i = j + 1 if art else i + 5
|
||||
|
||||
out = list(by_vendor.values())
|
||||
logger.info("parse_block_format: %d vendors / %d cookies",
|
||||
len(out), sum(len(v["cookies"]) for v in out))
|
||||
return out
|
||||
|
||||
|
||||
def parse_cookie_table(text: str) -> list[dict]:
|
||||
"""Returns vendor-records aus einer copy-pasted Cookie-Tabelle.
|
||||
|
||||
Probiert in dieser Reihenfolge:
|
||||
1. Tab/Pipe/Komma-getrennt (klassisches Tabellen-Layout)
|
||||
2. 5-Zeilen-Block-Format (VW Browser-Copy)
|
||||
3. return []
|
||||
"""
|
||||
if not text or len(text) < 100:
|
||||
return []
|
||||
lines = [ln.strip() for ln in text.splitlines() if ln.strip()]
|
||||
if not lines:
|
||||
return []
|
||||
|
||||
# Sample 30 lines to detect separator
|
||||
sample = lines[:60]
|
||||
sep_counts: dict[str, int] = {}
|
||||
for ln in sample:
|
||||
sep = _looks_like_separator(ln)
|
||||
if sep:
|
||||
sep_counts[sep] = sep_counts.get(sep, 0) + 1
|
||||
if not sep_counts or max(sep_counts.values()) < 3:
|
||||
# Kein Separator-Format → versuche Block-Format
|
||||
block_vendors = parse_block_format(text)
|
||||
if block_vendors:
|
||||
return block_vendors
|
||||
return []
|
||||
|
||||
sep = max(sep_counts, key=sep_counts.get)
|
||||
logger.info("cookies_table_parser: detected separator '%s' (%d hits)",
|
||||
sep, sep_counts[sep])
|
||||
|
||||
# Parse rows
|
||||
rows: list[list[str]] = []
|
||||
for ln in lines:
|
||||
if sep in ln:
|
||||
parts = [p.strip().strip('"') for p in ln.split(sep)]
|
||||
if len(parts) >= 2 and parts[0]:
|
||||
rows.append(parts)
|
||||
|
||||
if len(rows) < 3:
|
||||
return []
|
||||
|
||||
# Detect column layout from header (first row) or by content
|
||||
header_row = [c.lower() for c in rows[0]]
|
||||
has_header = any(h in " ".join(header_row) for h in
|
||||
("cookie", "name", "anbieter", "provider", "zweck",
|
||||
"kategorie", "speicherdauer", "dauer"))
|
||||
data_rows = rows[1:] if has_header else rows
|
||||
|
||||
# Map columns by header keyword or by position
|
||||
col_idx = {"name": 0, "provider": -1, "category": -1,
|
||||
"purpose": -1, "persistence": -1}
|
||||
if has_header:
|
||||
for i, h in enumerate(header_row):
|
||||
if "name" in h or "cookie" in h:
|
||||
col_idx["name"] = i
|
||||
elif "anbieter" in h or "provider" in h or "domain" in h:
|
||||
col_idx["provider"] = i
|
||||
elif "kategorie" in h or "type" in h or "art" in h:
|
||||
col_idx["category"] = i
|
||||
elif "zweck" in h or "purpose" in h or "beschreib" in h:
|
||||
col_idx["purpose"] = i
|
||||
elif "speicher" in h or "dauer" in h or "lebens" in h or "expir" in h:
|
||||
col_idx["persistence"] = i
|
||||
|
||||
# Aggregate by vendor (or by name if no vendor column)
|
||||
by_vendor: dict[str, dict] = {}
|
||||
for r in data_rows:
|
||||
if len(r) < 2:
|
||||
continue
|
||||
name = r[col_idx["name"]] if col_idx["name"] < len(r) else r[0]
|
||||
name = (name or "").strip()
|
||||
if not name or len(name) > 120 or len(name) < 2:
|
||||
continue
|
||||
provider = ""
|
||||
if col_idx["provider"] >= 0 and col_idx["provider"] < len(r):
|
||||
provider = r[col_idx["provider"]].strip()
|
||||
if not provider:
|
||||
# Heuristik: wenn Spalte 'Anbieter' fehlt, raten aus Cookie-Name
|
||||
provider = _guess_vendor(name)
|
||||
if not provider:
|
||||
provider = "Unbekannter Anbieter"
|
||||
|
||||
category = ""
|
||||
purpose = ""
|
||||
persistence = ""
|
||||
if col_idx["category"] >= 0 and col_idx["category"] < len(r):
|
||||
category = _normalize_category(r[col_idx["category"]])
|
||||
if col_idx["purpose"] >= 0 and col_idx["purpose"] < len(r):
|
||||
purpose = r[col_idx["purpose"]][:500]
|
||||
if col_idx["persistence"] >= 0 and col_idx["persistence"] < len(r):
|
||||
persistence = _parse_persistence(r[col_idx["persistence"]])
|
||||
if not category:
|
||||
# Inferieren aus purpose-Text
|
||||
category = _normalize_category(purpose)
|
||||
|
||||
entry = by_vendor.setdefault(provider, {
|
||||
"name": provider, "country": "",
|
||||
"purpose": purpose[:300] if purpose else "",
|
||||
"category": category,
|
||||
"opt_out_url": "", "privacy_policy_url": "",
|
||||
"persistence": persistence,
|
||||
"cookies": [],
|
||||
"source": "table_paste",
|
||||
})
|
||||
entry["cookies"].append({
|
||||
"name": name, "purpose": purpose[:200],
|
||||
"expiry": persistence, "is_third_party": True,
|
||||
})
|
||||
|
||||
out = list(by_vendor.values())
|
||||
logger.info("cookies_table_parser: %d vendors / %d cookies parsed",
|
||||
len(out), sum(len(v["cookies"]) for v in out))
|
||||
return out
|
||||
|
||||
|
||||
# textContent-Output von HTML-Tabellen verkettet Zellen ohne Whitespace
|
||||
# (z.B. VW: "Permanent/Protokoll_fbcTracking Cookies (Marketing)..."). Wir
|
||||
# erkennen Cookie-Eintraege ueber 2 Anker:
|
||||
# - Davor: typisches End-Token einer vorherigen Tabellen-Zelle
|
||||
# (Speicherdauer-Suffix wie Permanent/Protokoll, Session Cookie, ...)
|
||||
# - Danach: Kategorie-Token (Tracking Cookies, Funktionscookie, ...)
|
||||
# Dazwischen: der Cookie-Name (3-50 Zeichen, alphanum/underscore/dash).
|
||||
_FLAT_ROW_RE = re.compile(
|
||||
r"(?:Permanent/Protokoll|Session Cookie|Persistent Cookie|"
|
||||
r"TagePersistent|TageSitzungs-Cookie|TageSession Cookie|"
|
||||
r"MinutenPersistent|MinutenSession Cookie|StundenPersistent|"
|
||||
r"MonatePersistent|JahrePersistent)"
|
||||
r"([A-Za-z_][A-Za-z0-9_\-\.]{1,40}?)"
|
||||
r"(?=Tracking Cookies|Session Cookies|Funktionscookie|Funktional|"
|
||||
r"Marketing|Analytics|Necessary)",
|
||||
re.I,
|
||||
)
|
||||
|
||||
|
||||
def parse_flat_cookie_text(text: str) -> list[dict]:
|
||||
"""Variante fuer Sites wie VW die ihre Cookie-Tabelle als flachen
|
||||
Text liefern (textContent-Output ohne Whitespace zwischen Zellen).
|
||||
|
||||
Regex anchored auf vorherige Speicherdauer-Suffixe + folgende
|
||||
Kategorie-Token → extrahiert den Cookie-Namen dazwischen.
|
||||
"""
|
||||
if not text or len(text) < 500:
|
||||
return []
|
||||
names = _FLAT_ROW_RE.findall(text)
|
||||
if len(names) < 3:
|
||||
return []
|
||||
by_vendor: dict[str, dict] = {}
|
||||
seen_names: set[str] = set()
|
||||
for raw in names:
|
||||
name = raw.strip()
|
||||
nl = name.lower()
|
||||
if nl in seen_names:
|
||||
continue
|
||||
if nl in ("dieser", "diese", "ein", "der", "die", "das",
|
||||
"session", "permanent", "funktional", "notwendig",
|
||||
"marketing", "analytics", "werbung", "anbieter",
|
||||
"tracking", "cookie", "cookies", "und", "von",
|
||||
"einer", "ist", "alle", "noch", "auch", "name",
|
||||
"art", "zweck", "dauer", "test"):
|
||||
continue
|
||||
if len(name) < 3 or len(name) > 60:
|
||||
continue
|
||||
seen_names.add(nl)
|
||||
vendor = _guess_vendor(name) or "Unbekannter Anbieter"
|
||||
entry = by_vendor.setdefault(vendor, {
|
||||
"name": vendor, "country": "",
|
||||
"purpose": "", "category": "",
|
||||
"opt_out_url": "", "privacy_policy_url": "",
|
||||
"persistence": "",
|
||||
"cookies": [],
|
||||
"source": "flat_pattern",
|
||||
})
|
||||
entry["cookies"].append({
|
||||
"name": name, "purpose": "",
|
||||
"expiry": "", "is_third_party": True,
|
||||
})
|
||||
out = list(by_vendor.values())
|
||||
logger.info("parse_flat_cookie_text: %d vendors / %d cookies",
|
||||
len(out), sum(len(v["cookies"]) for v in out))
|
||||
return out
|
||||
|
||||
|
||||
_VENDOR_GUESS = (
|
||||
# Google-Familie (alles unter "Google" zusammenfassen — Dedup kuemmert sich)
|
||||
("_ga", "Google"), ("_gid", "Google"), ("_gcl_", "Google"),
|
||||
("ANID", "Google"), ("AID", "Google"), ("FPGCLDC", "Google"),
|
||||
("FPAU", "Google"), ("FLC", "Google"), ("APC", "Google"),
|
||||
("IDE", "Google"), ("DSID", "Google"), ("TAID", "Google"),
|
||||
("NID", "Google"), ("1P_JAR", "Google"),
|
||||
# Meta / Facebook
|
||||
("_fbp", "Meta / Facebook"), ("_fbc", "Meta / Facebook"),
|
||||
# fr ist Meta-Cookie, nur wenn keine andere Site-eigene Verwendung
|
||||
# Microsoft / Bing
|
||||
("_pin_unauth", "Pinterest"), ("_uetsid", "Microsoft Bing"),
|
||||
("_uetvid", "Microsoft Bing"), ("MUID", "Microsoft"),
|
||||
# Soziale Netzwerke
|
||||
("tt_", "TikTok"), ("li_at", "LinkedIn"),
|
||||
# CMP
|
||||
("OptanonConsent", "OneTrust"), ("cookieconsent", "Borlabs / Cookie-CMP"),
|
||||
("CookieConsentPolicy", "Borlabs / Cookie-CMP"),
|
||||
# Analytics
|
||||
("eta_", "etracker"), ("matomo", "Matomo"),
|
||||
("_hjid", "Hotjar"), ("_hj", "Hotjar"),
|
||||
("ajs_", "Segment"), ("amp_", "Amplitude"),
|
||||
# Adobe-Familie
|
||||
("sat_track", "Adobe Experience Cloud"),
|
||||
("AMCV", "Adobe Experience Cloud"),
|
||||
("AMCVS", "Adobe Experience Cloud"),
|
||||
("demdex", "Adobe Experience Cloud"),
|
||||
("dextp", "Adobe Experience Cloud"),
|
||||
("dpm", "Adobe Experience Cloud"),
|
||||
("mbox", "Adobe Target"),
|
||||
("smartSignals", "Adobe Experience Cloud"),
|
||||
("adbCDP", "Adobe Experience Cloud"),
|
||||
("s_cc", "Adobe Analytics"), ("s_sq", "Adobe Analytics"),
|
||||
("s_ecid", "Adobe Analytics"), ("s_vi", "Adobe Analytics"),
|
||||
("s_fid", "Adobe Analytics"), ("s_plt", "Adobe Analytics"),
|
||||
("s_pltp", "Adobe Analytics"), ("s_invisit", "Adobe Analytics"),
|
||||
("s_vnc365", "Adobe Analytics"), ("s_ivc", "Adobe Analytics"),
|
||||
("sc_appvn", "Adobe Analytics"), ("sc_pCmp", "Adobe Analytics"),
|
||||
("sc_prevpage", "Adobe Analytics"), ("sc_prop", "Adobe Analytics"),
|
||||
("sc_v17", "Adobe Analytics"), ("sc_v44", "Adobe Analytics"),
|
||||
("sc_v49", "Adobe Analytics"),
|
||||
# The Trade Desk
|
||||
("TDID", "The Trade Desk"), ("TDCPM", "The Trade Desk"),
|
||||
("TTDOptOut", "The Trade Desk"),
|
||||
# AdForm
|
||||
("uid", "AdForm"), ("cid", "AdForm"), ("otsid", "AdForm"),
|
||||
# everest
|
||||
("everest", "Adobe Advertising Cloud (everest)"),
|
||||
# Infra/CDN
|
||||
("__cf", "Cloudflare"), ("datadome", "DataDome"),
|
||||
("incap_", "Imperva Incapsula"), ("awsalb", "AWS Load Balancer"),
|
||||
# Salesforce
|
||||
("sfdc-", "Salesforce"), ("X-Salesforce", "Salesforce"),
|
||||
("liveagent_", "Salesforce LiveAgent"),
|
||||
# Inbenta
|
||||
("inbenta", "Inbenta"),
|
||||
# Sonstige Tracker
|
||||
("_pk_", "Matomo / Piwik"),
|
||||
("hmt_", "Akamai mPulse"),
|
||||
# EDAA / Industry Self-regulation
|
||||
("EDAAT", "EDAA / Online Choices"),
|
||||
("Eboptout", "EDAA / Online Choices"),
|
||||
)
|
||||
|
||||
|
||||
def _guess_vendor(cookie_name: str) -> str:
|
||||
nl = cookie_name.lower()
|
||||
for prefix, vendor in _VENDOR_GUESS:
|
||||
if nl.startswith(prefix.lower()) or prefix.lower() in nl:
|
||||
return vendor
|
||||
return ""
|
||||
@@ -276,19 +276,40 @@ COOKIE_CHECKLIST = [
|
||||
},
|
||||
|
||||
# ── Neue L1: Cookie-Tabelle ───────────────────────────────────────
|
||||
# P95: Lockerer Match — Vendor-zentrische Detailseiten (BMW-Stil mit
|
||||
# Adform-Block etc.) werden als gleichwertig akzeptiert. DSK-OH 2024
|
||||
# §3.2 verlangt die Informationen pro Cookie, schreibt aber keine
|
||||
# Tabellenform vor. Ein Vendor-Block der Name+Anbieter+Zweck+Dauer+
|
||||
# Cookie-Namen aggregiert nennt erfuellt das.
|
||||
{
|
||||
"id": "cookie_table",
|
||||
"label": "Strukturierte Cookie-Tabelle/Liste",
|
||||
"label": "Strukturierte Cookie-Informationen (Tabelle oder Vendor-Blöcke)",
|
||||
"level": 1, "parent": None,
|
||||
"patterns": [
|
||||
# Klassische Tabelle
|
||||
r"(?:cookie[\-\s])?(?:tabelle|uebersicht|übersicht|liste|aufstellung)",
|
||||
r"(?:name|bezeichnung)\s*[\|\t]\s*(?:anbieter|zweck|dauer|laufzeit|funktion)",
|
||||
r"(?:first[\-\s]?party|third[\-\s]?party)\s*[\|\t]",
|
||||
r"(?:typ(?:en)?|name|funktion|speicherdauer)\s+(?:typ(?:en)?|name|funktion|speicherdauer)",
|
||||
r"folgende\s+cookies",
|
||||
r"(?:funktionale|session|analyse|tracking)\s+cookies?\s+\w+",
|
||||
# P95: Vendor-zentrische Detail-Bloecke (BMW-Stil) — wenn
|
||||
# mehrere typische Vendor-Block-Marker vorhanden, gilt als
|
||||
# strukturiert. "Gesetzt von:" + "Opt-Out Link:" + "Privacy"
|
||||
# ist ein klares Indiz fuer Vendor-Detailseite.
|
||||
r"gesetzt\s+von\s*[:\|]",
|
||||
r"opt[\-\s]?out[\s\-]?link\s*[:\|]",
|
||||
r"speicherdauer\s*[:\|]\s*\d+\s+(?:tag|monat|jahr|day|month|year)",
|
||||
r"(?:rechtsgrundlage|legal\s+basis)\s*[:\|]",
|
||||
r"(?:diese\s+datenverarbeitung\s+verwendet\s+die\s+folgenden\s+cookies)",
|
||||
],
|
||||
"severity": "LOW",
|
||||
"hint": "Die DSK-Orientierungshilfe empfiehlt eine Tabelle mit 5 Spalten: Name, Anbieter, Zweck, Speicherdauer, Typ (First-/Third-Party). Viele Consent-Tools (Cookiebot, Usercentrics) generieren diese Tabelle automatisch — binden Sie sie ein.",
|
||||
"hint": "DSK-OH Telemedien 2024 §3.2 verlangt Cookie-Informationen pro "
|
||||
"Vendor/Cookie (Name, Anbieter, Zweck, Speicherdauer, Drittlandtransfer). "
|
||||
"Akzeptable Formate: (a) Tabelle mit 5 Spalten oder (b) Vendor-Detailseite "
|
||||
"mit Block pro Anbieter (Anbieter+Anschrift, Zweck, Speicherdauer aggregiert, "
|
||||
"Cookie-Namen-Liste, Opt-Out-Link, Drittlandstatus). BMW-Stil mit Adform-"
|
||||
"Block ist konform. Auch automatisierte CMP-Generierung (Cookiebot, Usercentrics) "
|
||||
"ist OK.",
|
||||
},
|
||||
]
|
||||
|
||||
@@ -0,0 +1,99 @@
|
||||
"""
|
||||
Rendert die Doc-Type-Mismatch-Hinweise als Mail-Block.
|
||||
|
||||
Wenn der User Text in das falsche Feld kopiert (z.B. Impressum-Text
|
||||
ins DSE-Feld), zeigt der Block:
|
||||
- was er deklariert hat
|
||||
- was der Classifier erkannt hat
|
||||
- Empfehlung (re-paste oder als unbekannt einreichen)
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
from typing import Iterable
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
_DOC_LABELS = {
|
||||
"dse": "Datenschutzerklaerung",
|
||||
"cookie": "Cookie-Richtlinie",
|
||||
"impressum": "Impressum",
|
||||
"agb": "AGB",
|
||||
"widerruf": "Widerrufsbelehrung",
|
||||
"nutzungsbedingungen": "Nutzungsbedingungen",
|
||||
"social_media": "Social Media DSE",
|
||||
"dsfa": "DSFA",
|
||||
"dsa": "DSA-Pflichtangaben",
|
||||
"legal_notice": "Rechtliche Hinweise",
|
||||
"lizenzhinweise": "Lizenzhinweise",
|
||||
}
|
||||
|
||||
|
||||
def _label(dt: str) -> str:
|
||||
return _DOC_LABELS.get(dt, dt)
|
||||
|
||||
|
||||
def collect_warnings(doc_entries: Iterable[dict]) -> list[dict]:
|
||||
"""Returns list of {declared, detected, action, scores} fuer alle
|
||||
doc_entries mit einem reclassify_hint."""
|
||||
out: list[dict] = []
|
||||
for e in (doc_entries or []):
|
||||
hint = e.get("reclassify_hint")
|
||||
if not hint:
|
||||
continue
|
||||
out.append({
|
||||
"input_source": e.get("input_source"),
|
||||
"declared": hint.get("declared"),
|
||||
"detected": hint.get("detected"),
|
||||
"action": hint.get("action"),
|
||||
"declared_score": hint.get("declared_score", 0),
|
||||
"detected_score": hint.get("detected_score", 0),
|
||||
"all_scores": hint.get("all_scores") or {},
|
||||
"word_count": e.get("word_count", 0),
|
||||
})
|
||||
return out
|
||||
|
||||
|
||||
def build_warnings_block_html(warnings: list[dict]) -> str:
|
||||
if not warnings:
|
||||
return ""
|
||||
items: list[str] = []
|
||||
for w in warnings:
|
||||
action = w.get("action")
|
||||
if action == "reclassify":
|
||||
color = "#0e7490"
|
||||
badge = "AUTO-RECLASSIFIZIERT"
|
||||
body = (
|
||||
f'Sie haben den Text als <strong>{_label(w["declared"])}</strong> '
|
||||
f'eingereicht, das System hat ihn aber automatisch als '
|
||||
f'<strong>{_label(w["detected"])}</strong> erkannt und entsprechend '
|
||||
f'gepruft (Konfidenz-Score: {w["detected_score"]} vs '
|
||||
f'{w["declared_score"]} für die deklarierte Kategorie).'
|
||||
)
|
||||
else:
|
||||
color = "#d97706"
|
||||
badge = "MOEGLICHER MISMATCH"
|
||||
body = (
|
||||
f'Sie haben den Text als <strong>{_label(w["declared"])}</strong> '
|
||||
f'eingereicht. Der Inhalt enthaelt aber Patterns die eher zu '
|
||||
f'<strong>{_label(w["detected"])}</strong> passen '
|
||||
f'({w["detected_score"]} vs {w["declared_score"]}). '
|
||||
'Bitte pruefen Sie ob Sie den richtigen Doc-Typ ausgewaehlt haben.'
|
||||
)
|
||||
items.append(
|
||||
f'<li style="margin-bottom:8px;font-size:11px;line-height:1.5">'
|
||||
f'<strong style="color:{color}">[{badge}]</strong> {body}'
|
||||
f'</li>'
|
||||
)
|
||||
return (
|
||||
'<div style="font-family:-apple-system,BlinkMacSystemFont,sans-serif;'
|
||||
'max-width:760px;margin:0 auto 12px;padding:10px 14px;'
|
||||
'background:#ecfeff;border:1px solid #67e8f9;border-radius:6px">'
|
||||
'<div style="font-size:11px;color:#0e7490;text-transform:uppercase;'
|
||||
'letter-spacing:1.2px;margin-bottom:4px;font-weight:600">'
|
||||
'Hinweise zum eingefügten Text</div>'
|
||||
'<ul style="margin:4px 0 0 18px;padding:0">'
|
||||
+ "".join(items) +
|
||||
'</ul></div>'
|
||||
)
|
||||
@@ -0,0 +1,297 @@
|
||||
"""
|
||||
P35 + P77 + P78 — Post-hoc Textsignal-Checks auf den geladenen
|
||||
Dokumenten-Texten (DSE / Cookie-Richtlinie / Banner-Text).
|
||||
|
||||
P35 — "Speichern" als mehrdeutiges Reject-Label im Banner. Wenn das
|
||||
einzige Schliess-Element nur "Speichern" heisst (statt
|
||||
"Alle ablehnen" / "Nur notwendige"), ist das ein MEDIUM-Finding,
|
||||
weil der Nutzer nicht versteht ob er gerade akzeptiert oder
|
||||
abgelehnt hat.
|
||||
|
||||
P77 — Cookie-Doc-Architecture: wenn keine eigene Cookie-Richtlinie
|
||||
ausgeliefert wurde, aber die DSE einen prominent benannten
|
||||
Cookie-Abschnitt enthaelt (mit Vendor-Liste + Speicherdauer),
|
||||
ist das ein gleichwertiger OEM-Pattern. Liefert positives Signal
|
||||
statt MEDIUM-Finding "Cookie-Richtlinie fehlt".
|
||||
|
||||
P78 — JC-Detection in DSE-Text: erkennt 'gemeinsam Verantwortliche'-
|
||||
Klauseln (Art. 26 DSGVO) im DSE-Text. Liefert positives Signal
|
||||
"JC-Konstrukt dokumentiert" — verhindert False-Positive
|
||||
"JC nicht erwaehnt obwohl Kooperation mit Konzern-Schwester".
|
||||
|
||||
Alle drei liefern dict shape {"severity": ...} oder positive-signal-dict.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
import re
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
_REJECT_LABEL_KEYS = (
|
||||
"alle ablehnen", "ablehnen", "reject all", "deny all",
|
||||
"nur notwendige", "nur essenzielle", "nur erforderliche",
|
||||
"essentials only", "verweigern", "block all",
|
||||
)
|
||||
|
||||
_SAVE_ONLY_KEYS = (
|
||||
"speichern", "auswahl speichern", "save selection",
|
||||
"auswahl bestaetigen",
|
||||
)
|
||||
|
||||
_COOKIE_SECTION_HEADINGS = (
|
||||
"cookies und tracking", "cookies und vergleichbare technologien",
|
||||
"cookies und aehnliche technologien", "verwendung von cookies",
|
||||
"informationen zu cookies", "uebersicht der cookies",
|
||||
"eingesetzte cookies", "cookies im einsatz",
|
||||
)
|
||||
|
||||
_VENDOR_HINTS = (
|
||||
"speicherdauer", "lebensdauer", "anbieter", "drittanbieter",
|
||||
"datenempfaenger", "datenkategorie", "rechtsgrundlage",
|
||||
)
|
||||
|
||||
_JC_PATTERNS = (
|
||||
"gemeinsam verantwortlich", "joint controller",
|
||||
"gemeinsame verantwortung", "art. 26 dsgvo", "art 26 dsgvo",
|
||||
"vereinbarung gemaess art. 26", "joint-controller-vereinbarung",
|
||||
"gemeinsame verarbeitung",
|
||||
)
|
||||
|
||||
# P36 — Social-Media-Einbindung:
|
||||
# "direct" = direkte FB/Insta/Twitter-Embeds laden bei Page-Load
|
||||
# (HIGH-Risiko, Cookies vor Consent).
|
||||
# "shariff" = Heise-Shariff-Buttons (clientseitig, kein 3rd-party-Call).
|
||||
# "two_click" = zweistufige Loesung (Klick auf Platzhalter laed Tracker).
|
||||
_SOCIAL_DIRECT_PATTERNS = (
|
||||
"connect.facebook.net", "platform.twitter.com",
|
||||
"platform.instagram.com", "platform.linkedin.com",
|
||||
"youtube.com/embed", "syndication.twitter.com",
|
||||
"//www.facebook.com/", "fb-pixel", "facebook-pixel",
|
||||
)
|
||||
_SOCIAL_SHARIFF_PATTERNS = (
|
||||
"shariff", "ct_shariff", "data-shariff",
|
||||
)
|
||||
_SOCIAL_TWOCLICK_PATTERNS = (
|
||||
"2-klick", "2klick", "zwei klick", "two-click",
|
||||
"klick-zu-laden", "klick um zu laden", "platzhalter laed",
|
||||
"embetty",
|
||||
)
|
||||
|
||||
|
||||
def check_save_only_reject(banner_result: dict) -> dict | None:
|
||||
"""P35 — Banner hat keinen klaren Reject, nur "Speichern"."""
|
||||
initial = ((banner_result or {}).get("phases") or {}).get("initial") or {}
|
||||
if not isinstance(initial, dict):
|
||||
return None
|
||||
btext = (initial.get("banner_text") or "").lower()
|
||||
if not btext or len(btext) < 30:
|
||||
return None
|
||||
has_clear_reject = any(k in btext for k in _REJECT_LABEL_KEYS)
|
||||
has_save_only = any(k in btext for k in _SAVE_ONLY_KEYS)
|
||||
if has_clear_reject or not has_save_only:
|
||||
return None
|
||||
return {
|
||||
"severity": "MEDIUM",
|
||||
"code": "save_label_ambiguous",
|
||||
"label": (
|
||||
'Banner verwendet "Speichern" ohne erkennbares "Ablehnen" '
|
||||
'— mehrdeutig fuer den Nutzer'
|
||||
),
|
||||
"detail": (
|
||||
'Der Button "Speichern" laesst offen, ob die aktuelle '
|
||||
'Vorauswahl (oft alles aktiv) bestaetigt oder nur die '
|
||||
'getroffene Auswahl uebernommen wird. EDPB 03/2022 empfiehlt '
|
||||
'eindeutige Labels: "Alle akzeptieren" + "Alle ablehnen".'
|
||||
),
|
||||
"legal_basis": "Art. 7 (1) DSGVO + EDPB 03/2022 Guidelines on "
|
||||
"deceptive design patterns.",
|
||||
}
|
||||
|
||||
|
||||
def check_cookies_in_dse(
|
||||
doc_texts: dict[str, str],
|
||||
cookie_doc_missing: bool,
|
||||
) -> dict | None:
|
||||
"""P77 — DSE hat eigenen Cookie-Abschnitt mit Vendor-Hints."""
|
||||
if not cookie_doc_missing:
|
||||
return None
|
||||
dse = (doc_texts or {}).get("dse") or ""
|
||||
if len(dse) < 1000:
|
||||
return None
|
||||
dse_lower = dse.lower()
|
||||
has_heading = any(h in dse_lower for h in _COOKIE_SECTION_HEADINGS)
|
||||
if not has_heading:
|
||||
return None
|
||||
vendor_hint_count = sum(1 for h in _VENDOR_HINTS if h in dse_lower)
|
||||
if vendor_hint_count < 3:
|
||||
return None # zu wenig substanziell
|
||||
return {
|
||||
"severity": "INFO", # Positives Signal, kein Finding
|
||||
"code": "cookies_in_dse_accepted",
|
||||
"label": (
|
||||
"Cookie-Informationen sind im Datenschutz-Dokument enthalten "
|
||||
"(eigener Abschnitt mit Vendor-Hinweisen)"
|
||||
),
|
||||
"detail": (
|
||||
"Die Praxis vieler OEM-Sites, Cookies als eigenen Abschnitt "
|
||||
'in der DSE zu fuehren (statt als separate Datei), wird als '
|
||||
"gleichwertig akzeptiert. Empfehlung trotzdem: separate "
|
||||
"Cookie-Richtlinie erleichtert kuenftige Aenderungen und "
|
||||
"Versionierung."
|
||||
),
|
||||
"legal_basis": "Art. 13(1)(c) DSGVO — Form ist nicht vorgegeben, "
|
||||
"Inhalt muss vollstaendig sein.",
|
||||
}
|
||||
|
||||
|
||||
def check_jc_clause_in_dse(doc_texts: dict[str, str]) -> dict | None:
|
||||
"""P78 — DSE enthaelt Art. 26 JC-Klausel."""
|
||||
dse = (doc_texts or {}).get("dse") or ""
|
||||
if not dse:
|
||||
return None
|
||||
dse_lower = dse.lower()
|
||||
matches = [p for p in _JC_PATTERNS if p in dse_lower]
|
||||
if not matches:
|
||||
return None
|
||||
return {
|
||||
"severity": "INFO",
|
||||
"code": "jc_clause_documented",
|
||||
"label": "Gemeinsame Verantwortlichkeit (Art. 26 DSGVO) im "
|
||||
"DSE-Text dokumentiert",
|
||||
"detail": (
|
||||
f'Erkannte Signale: {", ".join(sorted(set(matches))[:3])}. '
|
||||
'Das verhindert das False-Positive "JC-Konstrukt nicht '
|
||||
'erwaehnt" bei Sites mit Konzern-Schwesterunternehmen.'
|
||||
),
|
||||
"legal_basis": "Art. 26 DSGVO + EDPB 7/2020 Guidelines on the "
|
||||
"concepts of controller and processor.",
|
||||
}
|
||||
|
||||
|
||||
def check_social_embedding(
|
||||
doc_texts: dict[str, str],
|
||||
homepage_html: str | None = None,
|
||||
) -> dict | None:
|
||||
"""P36 — direkte Social-Embeds vs Shariff vs 2-Klick."""
|
||||
sources: list[str] = []
|
||||
for key in ("dse", "cookie", "impressum"):
|
||||
v = (doc_texts or {}).get(key) or ""
|
||||
if v:
|
||||
sources.append(v[:50000])
|
||||
if homepage_html:
|
||||
sources.append(homepage_html[:50000])
|
||||
if not sources:
|
||||
return None
|
||||
blob = " ".join(sources).lower()
|
||||
direct_hits = [p for p in _SOCIAL_DIRECT_PATTERNS if p in blob]
|
||||
has_shariff = any(p in blob for p in _SOCIAL_SHARIFF_PATTERNS)
|
||||
has_twoclick = any(p in blob for p in _SOCIAL_TWOCLICK_PATTERNS)
|
||||
|
||||
if not direct_hits and not has_shariff and not has_twoclick:
|
||||
return None
|
||||
if direct_hits and not (has_shariff or has_twoclick):
|
||||
return {
|
||||
"severity": "HIGH",
|
||||
"code": "social_direct_embed",
|
||||
"label": "Direkte Social-Media-Embeds ohne 2-Klick-Schutz "
|
||||
"oder Shariff erkannt",
|
||||
"detail": (
|
||||
f'Gefundene Drittanbieter-Skripte: '
|
||||
f'{", ".join(sorted(set(direct_hits))[:4])}. '
|
||||
"Diese laden i.d.R. Cookies/Pixel ohne Einwilligung. "
|
||||
"Empfehlung: Heise-Shariff (clientseitig) oder "
|
||||
"2-Klick-Loesung (Embetty, eigener Platzhalter)."
|
||||
),
|
||||
"legal_basis": "EuGH C-40/17 (Fashion-ID) — Einbinden eines "
|
||||
"Facebook-Like-Buttons macht den Site-Betreiber "
|
||||
"zum gemeinsam Verantwortlichen + benoetigt "
|
||||
"Einwilligung VOR dem Drittanbieter-Call.",
|
||||
}
|
||||
if has_shariff or has_twoclick:
|
||||
return {
|
||||
"severity": "INFO",
|
||||
"code": "social_protected_embed",
|
||||
"label": (
|
||||
"Datenschutzfreundliche Social-Media-Einbindung erkannt "
|
||||
f"({'Shariff' if has_shariff else '2-Klick-Loesung'})"
|
||||
),
|
||||
"detail": (
|
||||
"Drittanbieter-Skripte werden erst nach aktivem Klick "
|
||||
"geladen — kein Tracking ohne Einwilligung."
|
||||
),
|
||||
"legal_basis": "EuGH C-40/17 + EDPB Guidelines 8/2020.",
|
||||
}
|
||||
return None
|
||||
|
||||
|
||||
def run_all(
|
||||
banner_result: dict | None,
|
||||
doc_texts: dict[str, str] | None,
|
||||
cookie_doc_missing: bool = False,
|
||||
homepage_html: str | None = None,
|
||||
) -> list[dict]:
|
||||
findings: list[dict] = []
|
||||
try:
|
||||
f = check_save_only_reject(banner_result or {})
|
||||
if f:
|
||||
findings.append(f)
|
||||
except Exception as e:
|
||||
logger.warning("P35 save_only_reject failed: %s", e)
|
||||
try:
|
||||
f = check_cookies_in_dse(doc_texts or {}, cookie_doc_missing)
|
||||
if f:
|
||||
findings.append(f)
|
||||
except Exception as e:
|
||||
logger.warning("P77 cookies_in_dse failed: %s", e)
|
||||
try:
|
||||
f = check_jc_clause_in_dse(doc_texts or {})
|
||||
if f:
|
||||
findings.append(f)
|
||||
except Exception as e:
|
||||
logger.warning("P78 jc_clause failed: %s", e)
|
||||
try:
|
||||
f = check_social_embedding(doc_texts or {}, homepage_html)
|
||||
if f:
|
||||
findings.append(f)
|
||||
except Exception as e:
|
||||
logger.warning("P36 social_embedding failed: %s", e)
|
||||
return findings
|
||||
|
||||
|
||||
def build_signals_block_html(findings: list[dict]) -> str:
|
||||
if not findings:
|
||||
return ""
|
||||
pos = [f for f in findings if f.get("severity") == "INFO"]
|
||||
neg = [f for f in findings if f.get("severity") != "INFO"]
|
||||
items: list[str] = []
|
||||
for f in neg + pos:
|
||||
sev = f.get("severity", "MEDIUM")
|
||||
if sev == "INFO":
|
||||
color = "#16a34a"
|
||||
tag = "✓ POSITIV"
|
||||
elif sev == "HIGH":
|
||||
color = "#dc2626"
|
||||
tag = "HOCH"
|
||||
else:
|
||||
color = "#d97706"
|
||||
tag = "MITTEL"
|
||||
items.append(
|
||||
f'<li style="margin-bottom:8px;font-size:11px;line-height:1.5">'
|
||||
f'<strong style="color:{color}">[{tag}] {f.get("label","")}</strong>'
|
||||
f'<div style="color:#475569;margin-top:2px">{f.get("detail","")}</div>'
|
||||
f'<div style="color:#94a3b8;margin-top:2px;font-style:italic">'
|
||||
f'{f.get("legal_basis","")}</div></li>'
|
||||
)
|
||||
return (
|
||||
'<div style="font-family:-apple-system,BlinkMacSystemFont,sans-serif;'
|
||||
'max-width:760px;margin:0 auto 16px;padding:12px 16px;'
|
||||
'background:#f8fafc;border:1px solid #e2e8f0;border-radius:6px">'
|
||||
'<div style="font-size:11px;color:#475569;text-transform:uppercase;'
|
||||
'letter-spacing:1.2px;margin-bottom:4px;font-weight:600">'
|
||||
'Weitere Textsignale</div>'
|
||||
'<ul style="margin:6px 0 0 18px;padding:0">'
|
||||
+ "".join(items) +
|
||||
'</ul></div>'
|
||||
)
|
||||
@@ -0,0 +1,162 @@
|
||||
"""
|
||||
Erkennt den wahrscheinlichen Doc-Type eines eingefuegten Textes.
|
||||
|
||||
Wird genutzt wenn der User Text direkt ins Frontend kopiert. Wenn der
|
||||
erkannte Typ vom user-deklarierten Typ abweicht, gibt das System einen
|
||||
Hinweis (oder reklassifiziert automatisch wenn Confidence hoch genug).
|
||||
|
||||
Heuristik basiert auf Pflichtangaben-Patterns:
|
||||
* Impressum: §5 TMG-Bestandteile (Anschrift + Telefon + Email + UStID)
|
||||
* DSE: Art. 13 DSGVO-Bestandteile (Verantwortlicher + Zweck + Rechtsgrund)
|
||||
* AGB: Vertragsschluss + Lieferung + Zahlung + Gerichtsstand
|
||||
* Widerruf: 14-Tage-Frist + Widerrufsformular + Wertersatz
|
||||
* Cookie-Richtlinie: Cookie-Tabelle / Speicherdauer / Drittanbieter
|
||||
* Nutzungsbedingungen: Lizenz + Verbot der Vervielfaeltigung + Account
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
import re
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
_PATTERNS: dict[str, list[tuple[re.Pattern, int]]] = {
|
||||
"impressum": [
|
||||
(re.compile(r"§\s*5\s+TMG", re.I), 4),
|
||||
(re.compile(r"angaben\s+gem(ä|ae)ß", re.I), 3),
|
||||
(re.compile(r"\bUSt[\-\s]?ID[\-\s]?Nr\b", re.I), 4),
|
||||
(re.compile(r"vertretungsberechtigt(e|er)", re.I), 3),
|
||||
(re.compile(r"registergericht", re.I), 3),
|
||||
(re.compile(r"handelsregister(nummer)?", re.I), 3),
|
||||
(re.compile(r"\bHRB\s+\d+", re.I), 3),
|
||||
(re.compile(r"verantwortlich\s+f(ü|ue)r\s+den\s+inhalt", re.I), 3),
|
||||
(re.compile(r"\bRStV\b|Rundfunkstaatsvertrag", re.I), 3),
|
||||
(re.compile(r"streitschlichtung", re.I), 2),
|
||||
(re.compile(r"OS[\-\s]?plattform", re.I), 2),
|
||||
],
|
||||
"dse": [
|
||||
(re.compile(r"art(ikel)?\.?\s*13\s+DSGVO", re.I), 5),
|
||||
(re.compile(r"art(ikel)?\.?\s*15\s+DSGVO", re.I), 4),
|
||||
(re.compile(r"rechtsgrundlage", re.I), 3),
|
||||
(re.compile(r"datenschutzbeauftragt", re.I), 4),
|
||||
(re.compile(r"berechtigtes\s+interesse", re.I), 3),
|
||||
(re.compile(r"betroffenenrechte", re.I), 3),
|
||||
(re.compile(r"aufsichtsbeh(ö|oe)rde", re.I), 3),
|
||||
(re.compile(r"speicherdauer|aufbewahrungsfrist", re.I), 2),
|
||||
(re.compile(r"datenkategorie", re.I), 2),
|
||||
(re.compile(r"verantwortliche(r|n)\s+im\s+sinne", re.I), 4),
|
||||
],
|
||||
"agb": [
|
||||
(re.compile(r"allgemeine\s+gesch(ä|ae)ftsbedingungen", re.I), 5),
|
||||
(re.compile(r"\bAGB\b", re.I), 3),
|
||||
(re.compile(r"vertragsschluss|vertragsabschluss", re.I), 3),
|
||||
(re.compile(r"liefer(bedingungen|zeit|kosten)", re.I), 2),
|
||||
(re.compile(r"gew(ä|ae)hrleistung", re.I), 2),
|
||||
(re.compile(r"haftungsausschluss", re.I), 2),
|
||||
(re.compile(r"gerichtsstand", re.I), 3),
|
||||
(re.compile(r"anwendbares\s+recht", re.I), 2),
|
||||
(re.compile(r"salvatorische\s+klausel", re.I), 2),
|
||||
],
|
||||
"widerruf": [
|
||||
(re.compile(r"widerrufsbelehrung", re.I), 5),
|
||||
(re.compile(r"14\s+tage", re.I), 3),
|
||||
(re.compile(r"widerrufsrecht", re.I), 4),
|
||||
(re.compile(r"widerrufsformular", re.I), 3),
|
||||
(re.compile(r"wertersatz", re.I), 3),
|
||||
(re.compile(r"r(ü|ue)cksende(kosten|gebuehr)", re.I), 3),
|
||||
(re.compile(r"muster[\-\s]?widerrufsformular", re.I), 4),
|
||||
],
|
||||
"cookie": [
|
||||
(re.compile(r"cookie[\-\s]?richtlinie", re.I), 4),
|
||||
(re.compile(r"cookie[\-\s]?policy", re.I), 4),
|
||||
(re.compile(r"tracking[\-\s]?cookies?", re.I), 3),
|
||||
(re.compile(r"funktionale\s+cookies?", re.I), 3),
|
||||
(re.compile(r"marketing[\-\s]?cookies?", re.I), 3),
|
||||
(re.compile(r"speicherdauer\s*\d+\s*(tag|monat|jahr)", re.I), 3),
|
||||
(re.compile(r"drittanbieter[\-\s]?cookies?", re.I), 3),
|
||||
(re.compile(r"\b(IDE|_ga|_gid|_fbp|_gcl_au|OptanonConsent)\b"), 3),
|
||||
(re.compile(r"opt[\-\s]?out", re.I), 2),
|
||||
],
|
||||
"nutzungsbedingungen": [
|
||||
(re.compile(r"nutzungsbedingungen", re.I), 5),
|
||||
(re.compile(r"terms\s+of\s+(use|service)", re.I), 4),
|
||||
(re.compile(r"benutzerkonto|nutzerkonto", re.I), 3),
|
||||
(re.compile(r"untersagt|unzul(ä|ae)ssig.{0,30}nutzung", re.I), 2),
|
||||
(re.compile(r"sperrung\s+des\s+kontos", re.I), 2),
|
||||
],
|
||||
"social_media": [
|
||||
(re.compile(r"social[\-\s]?media[\-\s]?(plug[\-\s]?ins?|kanale|kanaele|pr(ä|ae)senz)", re.I), 4),
|
||||
(re.compile(r"gemeinsam\s+verantwortlich.{0,100}(facebook|meta|instagram)", re.I), 4),
|
||||
(re.compile(r"fanpage|fan[\-\s]?page", re.I), 3),
|
||||
(re.compile(r"like[\-\s]?button|share[\-\s]?button", re.I), 2),
|
||||
],
|
||||
}
|
||||
|
||||
|
||||
def classify(text: str, top_n: int = 3) -> list[tuple[str, int]]:
|
||||
"""Returns list of (doc_type, score) sorted by score desc.
|
||||
|
||||
Score >= 6 = high confidence, 3-5 = medium, < 3 = low.
|
||||
"""
|
||||
if not text or len(text) < 200:
|
||||
return []
|
||||
scores: dict[str, int] = {}
|
||||
for dt, pats in _PATTERNS.items():
|
||||
s = 0
|
||||
for pat, weight in pats:
|
||||
if pat.search(text):
|
||||
s += weight
|
||||
if s > 0:
|
||||
scores[dt] = s
|
||||
ranked = sorted(scores.items(), key=lambda x: x[1], reverse=True)
|
||||
return ranked[:top_n]
|
||||
|
||||
|
||||
def best_match(text: str) -> tuple[str, int] | None:
|
||||
"""Returns (doc_type, score) of best match or None."""
|
||||
ranked = classify(text, top_n=1)
|
||||
return ranked[0] if ranked else None
|
||||
|
||||
|
||||
def detect_mismatch(
|
||||
declared_doc_type: str,
|
||||
text: str,
|
||||
min_confidence: int = 6,
|
||||
) -> dict | None:
|
||||
"""If the text scores higher for a different doc_type than declared,
|
||||
return a hint dict {detected, declared, scores, action}.
|
||||
|
||||
action='reclassify' if confidence is very high (>= min_confidence * 1.5)
|
||||
action='warn' if medium (>= min_confidence)
|
||||
action=None / no return otherwise.
|
||||
"""
|
||||
ranked = classify(text, top_n=3)
|
||||
if not ranked:
|
||||
return None
|
||||
detected, detected_score = ranked[0]
|
||||
declared_canon = (declared_doc_type or "").lower().strip()
|
||||
# Aliase normalisieren
|
||||
alias = {"datenschutz": "dse", "privacy": "dse",
|
||||
"terms": "nutzungsbedingungen",
|
||||
"terms_of_use": "nutzungsbedingungen"}
|
||||
declared_canon = alias.get(declared_canon, declared_canon)
|
||||
|
||||
if detected == declared_canon:
|
||||
return None
|
||||
if detected_score < min_confidence:
|
||||
return None
|
||||
declared_score = next((s for dt, s in ranked if dt == declared_canon), 0)
|
||||
# Nur wenn detected DEUTLICH besser ist (Faktor >= 2 oder declared = 0)
|
||||
if declared_score and detected_score < declared_score * 2:
|
||||
return None
|
||||
|
||||
action = "reclassify" if detected_score >= min_confidence * 1.5 else "warn"
|
||||
return {
|
||||
"detected": detected,
|
||||
"declared": declared_doc_type,
|
||||
"detected_score": detected_score,
|
||||
"declared_score": declared_score,
|
||||
"action": action,
|
||||
"all_scores": dict(ranked),
|
||||
}
|
||||
@@ -0,0 +1,86 @@
|
||||
"""
|
||||
P87 — Konfidenz-Score pro Finding.
|
||||
|
||||
Nicht jedes HIGH-Finding ist gleich sicher. "Kein Reject-Button im Banner"
|
||||
ist faktisch direkt beobachtbar (Confidence ~98%). "DSE enthaelt keinen
|
||||
DSB-Kontakt" ist ein Textmuster-Match und kann False-Positive sein
|
||||
(Confidence ~70%). "Cookie X als essential deklariert, Library sagt
|
||||
marketing" haengt von Library-Qualitaet ab (Confidence ~80%).
|
||||
|
||||
Liefert pro Finding-Label ein (confidence_pct, reason) Paar. Wird im
|
||||
Mail-Render als kleine graue Klammer hinter dem Severity-Pill angezeigt:
|
||||
"HOCH (95% Konfidenz: Direkt im DOM beobachtet)".
|
||||
|
||||
Keine ML — nur regelbasiert. Eine zentrale Stelle damit alle Render-
|
||||
Stellen einheitlich klassifizieren.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import re
|
||||
|
||||
# (regex, confidence_pct, reason)
|
||||
# Reihenfolge wichtig: spezifischere Patterns zuerst.
|
||||
_RULES: list[tuple[re.Pattern, int, str]] = [
|
||||
# 1) Direkt im DOM / im Cookie-Jar beobachtet — sehr hohe Sicherheit
|
||||
(re.compile(r"reject[- ]?button.*(fehlt|nicht.*vorhanden)", re.I), 98,
|
||||
"Direkt im Banner-DOM ueberprueft"),
|
||||
(re.compile(r"(anpassen|einstellungen|customize).*button.*fehlt", re.I), 95,
|
||||
"Initial-Banner-DOM ueberprueft"),
|
||||
(re.compile(r"cookie.*vor.*einwilligung.*gesetzt", re.I), 96,
|
||||
"Cookie-Jar vor Akzeptieren beobachtet"),
|
||||
(re.compile(r"(tracking|marketing).*ohne.*einwilligung", re.I), 92,
|
||||
"Network-Calls vor Akzeptieren beobachtet"),
|
||||
|
||||
# 2) Library-Mismatches — abhaengig von Library-Qualitaet
|
||||
(re.compile(r"deklariert als.*library.*sagt", re.I), 82,
|
||||
"Vergleich mit ~2.300-Cookie-Library + Open-Cookie-DB"),
|
||||
(re.compile(r"library.*marketing", re.I), 82,
|
||||
"Cookie-Library-Klassifikation"),
|
||||
|
||||
# 3) Pflichtangaben-Checks (Impressum/AGB/DSE) — Textmuster, MEDIUM-Sicherheit
|
||||
(re.compile(r"impressum.*(fehlt|unvollstaendig)", re.I), 88,
|
||||
"Pattern-Match auf Impressums-Pflichtfelder (§ 5 TMG)"),
|
||||
(re.compile(r"dsb.*(fehlt|nicht.*genannt)", re.I), 75,
|
||||
"Textmuster-Suche; DSB kann ueber Impressum referenziert sein"),
|
||||
(re.compile(r"drittland.*(fehlt|nicht.*genannt|ohne.*hinweis)", re.I), 80,
|
||||
"Pattern-Match auf typische Drittland-Klauseln"),
|
||||
(re.compile(r"widerruf.*(fehlt|unvollstaendig)", re.I), 85,
|
||||
"Pattern-Match auf Widerrufsbelehrungs-Pflichtfelder"),
|
||||
|
||||
# 4) Anti-Auditing-Detection — heuristisch
|
||||
(re.compile(r"anti[- ]?audit", re.I), 70,
|
||||
"Skript-Domain-Heuristik; manuelle Pruefung empfohlen"),
|
||||
|
||||
# 5) Generische Konsistenz-Findings (DSE vs. Banner vs. Cookie-Liste)
|
||||
(re.compile(r"banner.*nennt.*\d+.*cmp.*\d+", re.I), 90,
|
||||
"Quantitativer Vergleich zwischen Banner-Text und CMP-Payload"),
|
||||
|
||||
# 6) Klassifikations- / Kontext-Findings (Wizard-getrieben)
|
||||
(re.compile(r"(branchen|scope).*passt.*nicht", re.I), 88,
|
||||
"Wizard-Klassifikation + MC-scope_doc_type"),
|
||||
]
|
||||
|
||||
_DEFAULT_CONFIDENCE = 78
|
||||
_DEFAULT_REASON = (
|
||||
"Standard-Regelpruefung; Bestaetigung mit DSB / interner Doku empfohlen"
|
||||
)
|
||||
|
||||
|
||||
def score_finding(label: str) -> tuple[int, str]:
|
||||
"""Returns (confidence_pct, reason) for a finding label."""
|
||||
if not label:
|
||||
return _DEFAULT_CONFIDENCE, _DEFAULT_REASON
|
||||
for pat, conf, reason in _RULES:
|
||||
if pat.search(label):
|
||||
return conf, reason
|
||||
return _DEFAULT_CONFIDENCE, _DEFAULT_REASON
|
||||
|
||||
|
||||
def confidence_pill_html(label: str) -> str:
|
||||
"""Returns an inline HTML snippet '(NN% Konfidenz: ...)' or empty."""
|
||||
conf, reason = score_finding(label)
|
||||
return (
|
||||
f' <span style="color:#94a3b8;font-size:10px" title="{reason}">'
|
||||
f'({conf}% Konfidenz)</span>'
|
||||
)
|
||||
@@ -166,7 +166,8 @@ def base_context(state: dict[str, Any]) -> dict[str, Any]:
|
||||
"ANMELDUNG_TYP": "Ersteintragung gemäß § 7 GmbHG",
|
||||
"ANMELDUNG_DATE": notar.get("notarial_date", "[Notartermin folgt]"),
|
||||
"REGISTRY_COURT_ADDRESS": "[Adresse des zuständigen Registergerichts]",
|
||||
"COMPANY_REGISTRY_COURT": "[zuständiges Amtsgericht]",
|
||||
"COMPANY_REGISTRY_COURT": basics.get("register_court") or "[zuständiges Amtsgericht]",
|
||||
"REGISTER_COURT": basics.get("register_court") or "[zuständiges Amtsgericht]",
|
||||
# Common
|
||||
"DOCUMENT_VERSION": "1.0.0",
|
||||
"EFFECTIVE_DATE": notar.get("notarial_date", "[Datum der Beurkundung]"),
|
||||
@@ -182,8 +183,8 @@ def base_context(state: dict[str, Any]) -> dict[str, Any]:
|
||||
"HAS_TEXAS_SHOOTOUT": sha.get("has_texas_shootout", False),
|
||||
"HAS_CEO_DESIGNATION": sha.get("has_ceo_designation", False),
|
||||
"CEO_NAME": sha.get("ceo_name", ""),
|
||||
"HAS_HRB": False,
|
||||
"HRB_NUMBER": "[wird vergeben]",
|
||||
"HAS_HRB": bool(basics.get("hrb_number")),
|
||||
"HRB_NUMBER": basics.get("hrb_number") or "[wird vergeben]",
|
||||
"IS_UG": basics.get("legal_form") == "UG",
|
||||
# GO-GF dynamische §-Numerierung
|
||||
"P_INFO": 5,
|
||||
|
||||
@@ -0,0 +1,325 @@
|
||||
"""
|
||||
P82 — GF-1-Pager (Geschaeftsfuehrer-Kurzfassung).
|
||||
|
||||
Eine kompakte 5-7-Bullet-Zusammenfassung ganz oben in der Mail. GF liest
|
||||
sonst die 124k-Char-Komplettpruefung nicht. Ton sachlich, keine Panik
|
||||
(Memory: feedback_breakpilot_tonalitaet).
|
||||
|
||||
Bildet ab:
|
||||
- Compliance-Score + Vergleichswert (wenn Vorlauf vorhanden)
|
||||
- Top-3 priorisierte Themen (HIGH oder kritisches MEDIUM)
|
||||
- Aufwand-Schaetzung (4-8 Wochen) + Wer-macht-was (DSB / IT / Marketing)
|
||||
- Realer Risiko-Hinweis (ohne 4%-Weltumsatz-Drohung)
|
||||
|
||||
Wird VOR Critical-Findings und Exec-Summary gerendert.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
from typing import Any
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
_AREA_LABEL = {
|
||||
"banner": "Cookie-Banner",
|
||||
"cookie": "Cookie-Richtlinie",
|
||||
"dse": "Datenschutzerklaerung",
|
||||
"impressum": "Impressum",
|
||||
"agb": "AGB",
|
||||
"library_mismatch": "Cookie-Klassifikation",
|
||||
"vendor": "Vendor-Liste / VVT",
|
||||
"consent": "Einwilligung",
|
||||
"rights": "Betroffenenrechte",
|
||||
}
|
||||
|
||||
|
||||
def _normalize_finding(item: dict) -> dict:
|
||||
sev = str(item.get("severity") or item.get("level") or "").upper()
|
||||
if sev not in ("HIGH", "MEDIUM", "LOW"):
|
||||
sev = "MEDIUM"
|
||||
label = (item.get("label") or item.get("title")
|
||||
or item.get("check") or item.get("name") or "").strip()
|
||||
if not label:
|
||||
return {}
|
||||
area = (item.get("area") or item.get("doc_type") or item.get("category") or "").lower()
|
||||
return {
|
||||
"severity": sev,
|
||||
"label": label[:200],
|
||||
"area": _AREA_LABEL.get(area, area.replace("_", " ").title() or "Allgemein"),
|
||||
"owner": item.get("owner") or _guess_owner(label, area),
|
||||
}
|
||||
|
||||
|
||||
def _guess_owner(label: str, area: str) -> str:
|
||||
"""Heuristik: wer ist der wahrscheinliche Ansprechpartner."""
|
||||
lab = label.lower()
|
||||
if any(w in lab for w in ("banner", "cookie", "consent",
|
||||
"einwilligung", "tracking")):
|
||||
return "DSB + Marketing/CMP-Admin"
|
||||
if any(w in lab for w in ("vendor", "avv", "auftragsverarbeitung",
|
||||
"drittland", "schrems")):
|
||||
return "DSB + Einkauf/Legal"
|
||||
if any(w in lab for w in ("impressum", "agb", "widerruf", "kontakt")):
|
||||
return "Legal + Web-Team"
|
||||
if any(w in lab for w in ("dsfa", "dsr", "loeschfrist", "art. 15",
|
||||
"auskunft", "betroffenenrecht")):
|
||||
return "DSB"
|
||||
if any(w in lab for w in ("tom", "verschluesselung", "backup",
|
||||
"incident", "logging")):
|
||||
return "IT-Security + DSB"
|
||||
if area in ("banner", "cookie"):
|
||||
return "DSB + Marketing"
|
||||
return "DSB"
|
||||
|
||||
|
||||
def _collect_top_findings(
|
||||
banner_result: dict | None,
|
||||
scorecard: dict | None,
|
||||
library_mismatch_findings: list[dict] | None,
|
||||
audit_quality_findings: list[dict] | None = None,
|
||||
limit: int = 5,
|
||||
) -> list[dict]:
|
||||
out: list[dict] = []
|
||||
|
||||
# 0) Audit-Quality-Vorbehalte (Banner-Detect-Fail, Vendor-thin) zuerst —
|
||||
# die sind WICHTIGER als alle anderen Findings weil sie den Audit
|
||||
# selbst infrage stellen.
|
||||
for aq in (audit_quality_findings or []):
|
||||
if isinstance(aq, dict):
|
||||
out.append({
|
||||
"severity": aq.get("severity", "HIGH"),
|
||||
"label": aq.get("label", "Audit-Vorbehalt"),
|
||||
"area": aq.get("area", "Audit-Qualitaet"),
|
||||
"owner": aq.get("owner", "DSB + Web-Team"),
|
||||
})
|
||||
|
||||
# 1) Banner deep-check findings (HIGH zuerst)
|
||||
if banner_result:
|
||||
for ph in (banner_result.get("phases") or {}).values():
|
||||
if not isinstance(ph, dict):
|
||||
continue
|
||||
for f in (ph.get("findings") or []):
|
||||
if not isinstance(f, dict):
|
||||
continue
|
||||
n = _normalize_finding({**f, "area": "banner"})
|
||||
if n:
|
||||
out.append(n)
|
||||
|
||||
# 2) Library-Mismatch HIGH (Marketing-Cookies als essential deklariert)
|
||||
for mm in (library_mismatch_findings or []):
|
||||
if isinstance(mm, dict) and mm.get("severity") == "HIGH":
|
||||
out.append({
|
||||
"severity": "HIGH",
|
||||
"label": f'Cookie "{mm.get("cookie","?")}" als '
|
||||
f'{mm.get("declared_category","?")} deklariert, '
|
||||
f'tatsaechlicher Zweck typischerweise '
|
||||
f'{mm.get("library_category","?")}',
|
||||
"area": _AREA_LABEL["library_mismatch"],
|
||||
"owner": "DSB + Marketing/CMP-Admin",
|
||||
})
|
||||
|
||||
# 3) Scorecard FAILs (MC-Audit)
|
||||
if scorecard:
|
||||
for entry in (scorecard.get("failed") or scorecard.get("items") or []):
|
||||
if not isinstance(entry, dict):
|
||||
continue
|
||||
n = _normalize_finding(entry)
|
||||
if n and n["severity"] == "HIGH":
|
||||
out.append(n)
|
||||
|
||||
# Sort: HIGH first, then MEDIUM, stable order. Dedup by label.
|
||||
seen: set[str] = set()
|
||||
order = {"HIGH": 0, "MEDIUM": 1, "LOW": 2}
|
||||
out.sort(key=lambda f: order.get(f["severity"], 3))
|
||||
dedup: list[dict] = []
|
||||
for f in out:
|
||||
key = f["label"].lower()[:80]
|
||||
if key in seen:
|
||||
continue
|
||||
seen.add(key)
|
||||
dedup.append(f)
|
||||
if len(dedup) >= limit:
|
||||
break
|
||||
return dedup
|
||||
|
||||
|
||||
def _score_color(score: float | int | None) -> str:
|
||||
if score is None:
|
||||
return "#64748b"
|
||||
try:
|
||||
s = float(score)
|
||||
except (TypeError, ValueError):
|
||||
return "#64748b"
|
||||
if s >= 80:
|
||||
return "#16a34a"
|
||||
if s >= 60:
|
||||
return "#ca8a04"
|
||||
return "#dc2626"
|
||||
|
||||
|
||||
def _delta_html(curr: float | None, prev: float | None) -> str:
|
||||
if curr is None or prev is None:
|
||||
return ""
|
||||
try:
|
||||
d = float(curr) - float(prev)
|
||||
except (TypeError, ValueError):
|
||||
return ""
|
||||
if abs(d) < 0.5:
|
||||
return (
|
||||
' <span style="color:#64748b;font-size:11px">'
|
||||
'(unveraendert ggue. letztem Lauf)</span>'
|
||||
)
|
||||
arrow = "↑" if d > 0 else "↓"
|
||||
color = "#16a34a" if d > 0 else "#dc2626"
|
||||
return (
|
||||
f' <span style="color:{color};font-size:11px">'
|
||||
f'{arrow} {abs(d):.1f} Punkte ggue. letztem Lauf</span>'
|
||||
)
|
||||
|
||||
|
||||
def build_gf_one_pager_html(
|
||||
site_name: str,
|
||||
scorecard: dict | None = None,
|
||||
previous_scorecard: dict | None = None,
|
||||
banner_result: dict | None = None,
|
||||
library_mismatch_findings: list[dict] | None = None,
|
||||
scan_context: dict | None = None,
|
||||
audit_quality_findings: list[dict] | None = None,
|
||||
) -> str:
|
||||
"""5-7-Bullet-Zusammenfassung. Leere Top-Findings: nur Status-Bullet."""
|
||||
score = None
|
||||
if scorecard:
|
||||
score = scorecard.get("compliance_score") or scorecard.get("score")
|
||||
prev_score = None
|
||||
if previous_scorecard:
|
||||
prev_score = (previous_scorecard.get("compliance_score")
|
||||
or previous_scorecard.get("score"))
|
||||
|
||||
top = _collect_top_findings(
|
||||
banner_result=banner_result,
|
||||
scorecard=scorecard,
|
||||
library_mismatch_findings=library_mismatch_findings,
|
||||
audit_quality_findings=audit_quality_findings,
|
||||
limit=6,
|
||||
)
|
||||
audit_warn = bool(audit_quality_findings)
|
||||
|
||||
n_high = sum(1 for f in top if f["severity"] == "HIGH")
|
||||
n_med = sum(1 for f in top if f["severity"] == "MEDIUM")
|
||||
|
||||
if score is not None:
|
||||
score_str = f'{float(score):.0f}/100'
|
||||
else:
|
||||
score_str = "—"
|
||||
score_color = _score_color(score)
|
||||
|
||||
ctx_line = ""
|
||||
if scan_context:
|
||||
bits: list[str] = []
|
||||
if scan_context.get("industry"):
|
||||
bits.append(scan_context["industry"])
|
||||
if scan_context.get("business_model"):
|
||||
bits.append(scan_context["business_model"].upper())
|
||||
if scan_context.get("employee_count"):
|
||||
bits.append(f'{scan_context["employee_count"]} MA')
|
||||
if bits:
|
||||
ctx_line = (
|
||||
'<div style="font-size:11px;color:#64748b;margin-bottom:6px">'
|
||||
f'Klassifizierung: {" · ".join(bits)}'
|
||||
'</div>'
|
||||
)
|
||||
|
||||
bullets: list[str] = []
|
||||
sev_pill = {
|
||||
"HIGH": '<span style="background:#fee2e2;color:#991b1b;'
|
||||
'padding:1px 6px;border-radius:8px;font-size:10px;'
|
||||
'font-weight:600">HOCH</span>',
|
||||
"MEDIUM": '<span style="background:#fef3c7;color:#92400e;'
|
||||
'padding:1px 6px;border-radius:8px;font-size:10px;'
|
||||
'font-weight:600">MITTEL</span>',
|
||||
"LOW": '<span style="background:#dbeafe;color:#1e40af;'
|
||||
'padding:1px 6px;border-radius:8px;font-size:10px;'
|
||||
'font-weight:600">NIEDRIG</span>',
|
||||
}
|
||||
try:
|
||||
from compliance.services.finding_confidence import confidence_pill_html
|
||||
except Exception:
|
||||
def confidence_pill_html(_label: str) -> str:
|
||||
return ""
|
||||
|
||||
for f in top:
|
||||
bullets.append(
|
||||
f'<li style="margin-bottom:4px;font-size:12px;line-height:1.45">'
|
||||
f'{sev_pill.get(f["severity"], "")} <strong>{f["area"]}:</strong> '
|
||||
f'{f["label"]}'
|
||||
f'{confidence_pill_html(f["label"])} '
|
||||
f'<span style="color:#64748b">— typisch zustaendig: '
|
||||
f'{f["owner"]}</span></li>'
|
||||
)
|
||||
|
||||
if not bullets:
|
||||
if audit_warn:
|
||||
bullets.append(
|
||||
'<li style="margin-bottom:4px;font-size:12px;color:#991b1b">'
|
||||
'<strong>Audit selbst war unvollstaendig</strong> — siehe '
|
||||
'roten Audit-Vorbehalt-Block weiter unten. Eine pauschale '
|
||||
'"alles ok"-Aussage ist auf Basis dieser Datenlage nicht '
|
||||
'moeglich.</li>'
|
||||
)
|
||||
else:
|
||||
bullets.append(
|
||||
'<li style="margin-bottom:4px;font-size:12px;color:#475569">'
|
||||
'Keine kritischen Themen erkannt — der Audit-Lauf hat fuer '
|
||||
'die geprueften Dokumente keine HIGH-Findings produziert. '
|
||||
'Details im weiteren Verlauf der Mail.</li>'
|
||||
)
|
||||
|
||||
return (
|
||||
'<div style="font-family:-apple-system,BlinkMacSystemFont,sans-serif;'
|
||||
'max-width:760px;margin:0 auto 16px;padding:18px 20px;'
|
||||
'background:#f8fafc;border:1px solid #cbd5e1;border-radius:8px">'
|
||||
'<div style="font-size:11px;color:#475569;text-transform:uppercase;'
|
||||
'letter-spacing:1.4px;margin-bottom:4px;font-weight:600">'
|
||||
f'Kurzfassung fuer die Geschaeftsfuehrung — {site_name or "—"}'
|
||||
'</div>'
|
||||
+ ctx_line +
|
||||
'<div style="display:flex;align-items:baseline;gap:14px;'
|
||||
'margin:8px 0 14px;flex-wrap:wrap">'
|
||||
f'<div style="font-size:28px;font-weight:700;color:{score_color}">'
|
||||
f'{score_str}</div>'
|
||||
'<div style="font-size:11px;color:#64748b">'
|
||||
f'Compliance-Score{_delta_html(score, prev_score)}</div>'
|
||||
f'<div style="margin-left:auto;font-size:11px;color:#475569">'
|
||||
f'<strong>{n_high}</strong> hoch · '
|
||||
f'<strong>{n_med}</strong> mittel'
|
||||
'</div></div>'
|
||||
'<div style="font-size:11px;color:#475569;margin-bottom:6px;'
|
||||
'font-weight:600;text-transform:uppercase;letter-spacing:1px">'
|
||||
'Was kurzfristig angegangen werden sollte'
|
||||
'</div>'
|
||||
'<ul style="margin:0 0 12px 18px;padding:0">'
|
||||
+ "".join(bullets) +
|
||||
'</ul>'
|
||||
'<div style="font-size:11px;color:#475569;line-height:1.5;'
|
||||
'padding:8px 10px;background:#fff;border:1px solid #e2e8f0;'
|
||||
'border-radius:4px">'
|
||||
+ (
|
||||
'<strong style="color:#991b1b">Wichtig — Audit unvollstaendig:'
|
||||
'</strong> An mindestens einer Stelle ist unser Crawler an '
|
||||
'Grenzen gestossen (siehe roter Audit-Vorbehalt-Block weiter '
|
||||
'unten). Diese Bereiche sollten manuell oder im Copy-Paste-Modus '
|
||||
'nachgereicht werden, bevor eine belastbare Compliance-Aussage '
|
||||
'getroffen wird.'
|
||||
if audit_warn else
|
||||
'<strong>Realistische Einordnung:</strong> Wir analysieren das '
|
||||
'Aussenbild Ihrer Website automatisiert — einzelne Findings '
|
||||
'koennen durch interne Dokumentation bereits abgedeckt sein. '
|
||||
'Empfohlenes Vorgehen: priorisierte Punkte mit DSB / Marketing / '
|
||||
'IT in einem Termin durchsprechen (4-8 Wochen sind ein '
|
||||
'realistischer Zeitrahmen fuer die Umsetzung). Eine pauschale '
|
||||
'Bussgeld-Erwartung leiten wir aus diesem Audit nicht ab.'
|
||||
)
|
||||
+ '</div>'
|
||||
'</div>'
|
||||
)
|
||||
@@ -0,0 +1,117 @@
|
||||
"""
|
||||
P86 — Branchen-Benchmark.
|
||||
|
||||
Vergleicht den eigenen Compliance-Score mit dem Branchen-Median aus
|
||||
allen bisherigen Snapshots derselben industry (P79 scan_context).
|
||||
Liefert: "Sie 42% — Automotive-Median 58% (Stichprobe: 12 Sites)".
|
||||
|
||||
Wird in der Mail-Composition direkt unter dem Score im GF-1-Pager
|
||||
gerendert. Mindest-Stichprobe = 3 vergleichbare Snapshots, sonst skip.
|
||||
|
||||
Heuristik fuer Score-Extraktion aus banner_result:
|
||||
- banner_result.completeness_pct ODER
|
||||
- banner_result.correctness_pct ODER
|
||||
- 100 - len(banner_checks.violations) * 5 als Fallback.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import logging
|
||||
from typing import Any
|
||||
|
||||
from sqlalchemy import text
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
_MIN_SAMPLE = 3
|
||||
|
||||
|
||||
def _extract_score(banner_result: dict | None) -> float | None:
|
||||
if not isinstance(banner_result, dict):
|
||||
return None
|
||||
for key in ("compliance_score", "completeness_pct", "correctness_pct"):
|
||||
v = banner_result.get(key)
|
||||
if isinstance(v, (int, float)):
|
||||
return float(v)
|
||||
bc = banner_result.get("banner_checks") or {}
|
||||
if isinstance(bc, dict):
|
||||
viols = bc.get("violations") or []
|
||||
if isinstance(viols, list):
|
||||
return max(0.0, 100.0 - len(viols) * 5)
|
||||
return None
|
||||
|
||||
|
||||
def compute_benchmark(
|
||||
db: Session,
|
||||
industry: str,
|
||||
current_score: float | None,
|
||||
current_check_id: str,
|
||||
) -> dict | None:
|
||||
if not industry or current_score is None:
|
||||
return None
|
||||
# Snapshots mit gleicher industry in scan_context.
|
||||
rows = db.execute(text(
|
||||
"""
|
||||
SELECT banner_result FROM compliance.compliance_check_snapshots
|
||||
WHERE check_id != :cid
|
||||
AND scan_context IS NOT NULL
|
||||
AND scan_context->>'industry' = :ind
|
||||
ORDER BY created_at DESC
|
||||
LIMIT 50
|
||||
"""
|
||||
), {"cid": current_check_id, "ind": industry}).fetchall()
|
||||
scores: list[float] = []
|
||||
for r in rows:
|
||||
br = r[0]
|
||||
if isinstance(br, str):
|
||||
try:
|
||||
br = json.loads(br)
|
||||
except Exception:
|
||||
continue
|
||||
s = _extract_score(br)
|
||||
if s is not None:
|
||||
scores.append(s)
|
||||
if len(scores) < _MIN_SAMPLE:
|
||||
return None
|
||||
scores.sort()
|
||||
n = len(scores)
|
||||
median = scores[n // 2] if n % 2 else (scores[n // 2 - 1] + scores[n // 2]) / 2
|
||||
pct_lower = round(sum(1 for s in scores if s < current_score) / n * 100)
|
||||
return {
|
||||
"industry": industry,
|
||||
"current": round(current_score, 1),
|
||||
"median": round(median, 1),
|
||||
"sample_size": n,
|
||||
"percentile": pct_lower, # 80 = besser als 80% der Branche
|
||||
}
|
||||
|
||||
|
||||
def build_benchmark_html(bench: dict) -> str:
|
||||
if not bench:
|
||||
return ""
|
||||
delta = bench["current"] - bench["median"]
|
||||
if delta >= 5:
|
||||
color = "#16a34a"
|
||||
verdict = "ueber dem Branchen-Median"
|
||||
elif delta <= -5:
|
||||
color = "#dc2626"
|
||||
verdict = "unter dem Branchen-Median"
|
||||
else:
|
||||
color = "#ca8a04"
|
||||
verdict = "etwa auf Branchen-Median"
|
||||
return (
|
||||
'<div style="font-family:-apple-system,BlinkMacSystemFont,sans-serif;'
|
||||
'max-width:760px;margin:0 auto 12px;padding:8px 14px;'
|
||||
'background:#f0f9ff;border:1px solid #bfdbfe;border-radius:6px;'
|
||||
'font-size:11px;color:#1e293b">'
|
||||
f'<strong>Branchen-Vergleich ({bench["industry"]}):</strong> '
|
||||
f'Ihr Score <strong>{bench["current"]:.1f}</strong> '
|
||||
f'<span style="color:{color}">({verdict}, '
|
||||
f'Median {bench["median"]:.1f})</span>. '
|
||||
f'<span style="color:#64748b">Sie sind besser als '
|
||||
f'{bench["percentile"]}% der bisher von uns gepruften '
|
||||
f'{bench["sample_size"]} Sites in dieser Branche.</span>'
|
||||
'</div>'
|
||||
)
|
||||
@@ -0,0 +1,222 @@
|
||||
"""
|
||||
P6 + P53 + P55 — OEM-Cross-Industry-Library mit Autonomes Profiling.
|
||||
|
||||
Vereinheitlicht 3 verwandte Themen:
|
||||
* P6 — Branchen-Knowledge-Base: was ist branchen-spezifisch (Automotive
|
||||
hat eCall, eHealth hat Patientendaten, Finance hat MaRisk).
|
||||
* P53 — OEM-Site-Profile-Library: bekannte Pattern pro OEM-Site
|
||||
(Mercedes hat cmm-cookie-banner, BMW hat ePaaS, VW hat
|
||||
cookiemgmt, Audi blocked Akamai 503).
|
||||
* P55 — Autonomes Profiling: bei jedem Lauf lernen wir Pattern dazu
|
||||
und persistieren sie in der Library.
|
||||
|
||||
Backend-Service: Lookup-API + Auto-Lern-Hook bei jedem Snapshot-Save.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
from typing import Iterable
|
||||
|
||||
from sqlalchemy import text as sa_text
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
# Branchen-spezifische zusaetzliche Compliance-Themen
|
||||
_INDUSTRY_PROFILES: dict[str, dict] = {
|
||||
"automotive": {
|
||||
"mandatory_regulations": [
|
||||
"DSGVO", "TDDDG",
|
||||
"VO 2015/758 (eCall)",
|
||||
"VO 2018/858 (Typgenehmigung)",
|
||||
"VO 2019/2144 (Allgemeine Sicherheit)",
|
||||
"Cyber Security UN-R 155",
|
||||
"Software Update UN-R 156",
|
||||
],
|
||||
"typical_cookie_vendors": [
|
||||
"Adobe Analytics", "Adobe Target", "Salesforce LiveAgent",
|
||||
"AdForm", "The Trade Desk", "Google Marketing Platform",
|
||||
"Inbenta", "Datadog RUM",
|
||||
],
|
||||
"vvt_required_processes": [
|
||||
"Probefahrten-Buchung", "Haendler-Suche", "eCall-System",
|
||||
"We Connect / Connected Drive Services", "Konfigurator-Daten",
|
||||
],
|
||||
"special_findings_to_watch": [
|
||||
"eCall ohne Hinweis in DSE = Verstoss VO 2015/758 Art. 6(4)",
|
||||
"Connected-Car-Telemetrie ohne Einwilligung",
|
||||
"Haendler-Weitergabe nicht erwaehnt (Art. 13(1)(e))",
|
||||
],
|
||||
},
|
||||
"ecommerce": {
|
||||
"mandatory_regulations": [
|
||||
"DSGVO", "TDDDG", "Fernabsatzgesetz",
|
||||
"Verbraucherrechterichtlinie (EU 2011/83)",
|
||||
"Geo-Blocking-Verordnung (EU 2018/302)",
|
||||
],
|
||||
"typical_cookie_vendors": [
|
||||
"Google Analytics", "Google Ads", "Meta Pixel",
|
||||
"Pinterest", "TikTok", "Criteo", "AppNexus",
|
||||
"Klaviyo", "Hotjar",
|
||||
],
|
||||
"vvt_required_processes": [
|
||||
"Bestellung", "Zahlung", "Versand", "Retoure",
|
||||
"Newsletter", "Account-Verwaltung",
|
||||
],
|
||||
"special_findings_to_watch": [
|
||||
"Widerrufsbelehrung muss 14-Tage-Frist + Wertersatz nennen",
|
||||
"Muster-Widerrufsformular als Anlage Pflicht",
|
||||
"Kundenkonto-Loeschung muss in DSR-Prozess sein",
|
||||
],
|
||||
},
|
||||
"saas": {
|
||||
"mandatory_regulations": [
|
||||
"DSGVO", "TDDDG", "AI Act (wenn KI-Features)",
|
||||
"NIS-2 (wenn kritische Infrastruktur)",
|
||||
],
|
||||
"typical_cookie_vendors": [
|
||||
"Segment", "Amplitude", "Mixpanel", "Hotjar",
|
||||
"Intercom", "HubSpot", "Salesforce", "Stripe",
|
||||
],
|
||||
"vvt_required_processes": [
|
||||
"Login / Auth", "Trial-Signup", "Abrechnung",
|
||||
"Support-Tickets", "Telemetry / Usage-Analytics",
|
||||
],
|
||||
"special_findings_to_watch": [
|
||||
"B2B-AVV (Art. 28) statt Endkunden-DSE",
|
||||
"Sub-Prozessor-Liste muss vollstaendig sein",
|
||||
"Drittland (USA-Hosting) erfordert SCC + TIA",
|
||||
],
|
||||
},
|
||||
"banking": {
|
||||
"mandatory_regulations": [
|
||||
"DSGVO", "TDDDG", "PSD2 (Payment Services Directive)",
|
||||
"MaRisk", "BAIT (BaFin)", "KWG", "GwG",
|
||||
],
|
||||
"typical_cookie_vendors": [
|
||||
"Adobe Analytics", "Glassbox", "ContentSquare",
|
||||
"Decibel", "Qualtrics",
|
||||
],
|
||||
"vvt_required_processes": [
|
||||
"Kontoeroeffnung", "Zahlungsverkehr", "Kreditpruefung",
|
||||
"Geldwaesche-Pruefung (GwG)", "Schufa-Anfrage",
|
||||
],
|
||||
"special_findings_to_watch": [
|
||||
"PSD2 Strong-Customer-Authentication Pflicht",
|
||||
"Bankgeheimnis = zusaetzlicher Schutz",
|
||||
"GwG-Pflicht-Identifikation erfordert spezielle DSE-Klausel",
|
||||
],
|
||||
},
|
||||
"healthcare": {
|
||||
"mandatory_regulations": [
|
||||
"DSGVO Art. 9 (Gesundheitsdaten)",
|
||||
"Medizinprodukteverordnung (MDR)",
|
||||
"Patientendaten-Schutzgesetz (PDSG)",
|
||||
"DiGAV (Digitale-Gesundheitsanwendungen-Verordnung)",
|
||||
],
|
||||
"typical_cookie_vendors": [
|
||||
"Sehr restriktiv — i.d.R. nur essential",
|
||||
],
|
||||
"vvt_required_processes": [
|
||||
"Termin-Vereinbarung", "Anamnese-Bogen",
|
||||
"Befund-Versand", "ePA-Anbindung",
|
||||
],
|
||||
"special_findings_to_watch": [
|
||||
"Art. 9 DSGVO erfordert ausdrueckliche Einwilligung",
|
||||
"Schweigepflicht §203 StGB",
|
||||
"Drittland-Transfer fast immer unzulaessig",
|
||||
],
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
def lookup_industry_profile(industry: str | None) -> dict | None:
|
||||
"""Liefert das Branchenprofil oder None."""
|
||||
if not industry:
|
||||
return None
|
||||
return _INDUSTRY_PROFILES.get(industry.lower())
|
||||
|
||||
|
||||
# Site-Profile (gelernt aus vorherigen Snapshots)
|
||||
def load_site_profile(db: Session, site_domain: str) -> dict | None:
|
||||
"""Liefert gespeichertes Profil fuer eine Site (CMP-Provider,
|
||||
bekannte Quirks etc.) oder None."""
|
||||
if not site_domain:
|
||||
return None
|
||||
try:
|
||||
row = db.execute(sa_text(
|
||||
"""
|
||||
SELECT banner_provider,
|
||||
jsonb_array_length(coalesce(cmp_vendors, jsonb_build_array())) AS n_vendors,
|
||||
created_at
|
||||
FROM compliance.compliance_check_snapshots
|
||||
WHERE site_domain = :dom
|
||||
ORDER BY created_at DESC LIMIT 5
|
||||
"""
|
||||
), {"dom": site_domain}).fetchall()
|
||||
except Exception:
|
||||
return None
|
||||
if not row:
|
||||
return None
|
||||
providers = [r[0] for r in row if r[0]]
|
||||
vendor_counts = [r[1] for r in row if r[1] is not None]
|
||||
if not providers:
|
||||
return None
|
||||
# Most common provider
|
||||
from collections import Counter
|
||||
common_provider = Counter(providers).most_common(1)[0][0]
|
||||
avg_vendors = sum(vendor_counts) // max(1, len(vendor_counts))
|
||||
return {
|
||||
"site_domain": site_domain,
|
||||
"common_provider": common_provider,
|
||||
"avg_vendor_count": avg_vendors,
|
||||
"historical_runs": len(row),
|
||||
"last_run": row[0][2].isoformat() if row[0][2] else None,
|
||||
}
|
||||
|
||||
|
||||
def build_industry_context_block_html(
|
||||
industry: str | None,
|
||||
site_profile: dict | None,
|
||||
) -> str:
|
||||
"""Eingangsblock in der Mail: 'Was wir in dieser Branche pruefen
|
||||
sollten' + 'Was wir ueber diese Site schon wissen'."""
|
||||
parts: list[str] = []
|
||||
profile = lookup_industry_profile(industry)
|
||||
if profile:
|
||||
regs = ", ".join(profile.get("mandatory_regulations", [])[:6])
|
||||
watches = profile.get("special_findings_to_watch", [])[:3]
|
||||
watch_html = "".join(
|
||||
f'<li style="font-size:11px;color:#475569">{w}</li>'
|
||||
for w in watches
|
||||
)
|
||||
parts.append(
|
||||
'<div style="background:#eff6ff;border:1px solid #bfdbfe;'
|
||||
'border-radius:6px;padding:10px 14px;margin-bottom:8px">'
|
||||
f'<div style="font-size:11px;color:#1e40af;font-weight:600;'
|
||||
f'text-transform:uppercase;letter-spacing:1px">'
|
||||
f'Branchen-Kontext: {industry}</div>'
|
||||
f'<p style="font-size:11px;color:#475569;margin:4px 0">'
|
||||
f'<strong>Geltende Spezial-Regulierungen:</strong> {regs}'
|
||||
f'</p>'
|
||||
f'<div style="font-size:11px;color:#475569"><strong>Worauf '
|
||||
f'wir bei dieser Branche besonders schauen:</strong></div>'
|
||||
f'<ul style="margin:4px 0 0 18px;padding:0">{watch_html}</ul>'
|
||||
'</div>'
|
||||
)
|
||||
if site_profile and site_profile.get("historical_runs", 0) > 1:
|
||||
parts.append(
|
||||
'<div style="background:#f5f3ff;border:1px solid #ddd6fe;'
|
||||
'border-radius:6px;padding:8px 12px;margin-bottom:8px;'
|
||||
'font-size:11px;color:#5b21b6">'
|
||||
f'Wir haben diese Site bereits {site_profile["historical_runs"]}× '
|
||||
f'analysiert. Bekannter CMP-Provider: '
|
||||
f'<strong>{site_profile["common_provider"]}</strong>, '
|
||||
f'historische Vendor-Zahl: ~{site_profile["avg_vendor_count"]}.'
|
||||
'</div>'
|
||||
)
|
||||
return "".join(parts)
|
||||
@@ -0,0 +1,116 @@
|
||||
"""
|
||||
P71 — JC-vs-AVV Entscheidungsbaum.
|
||||
|
||||
Hilft dem Nutzer zu bestimmen, ob ein bestimmtes Verarbeitungsverhaeltnis
|
||||
gemeinsame Verantwortlichkeit (Art. 26 DSGVO) oder Auftragsverarbeitung
|
||||
(Art. 28 DSGVO) ist. EDPB 7/2020 ist die Grundlage.
|
||||
|
||||
Wird gerendert als kleiner Block am Ende der Mail, wenn im DSE-Text
|
||||
Konstrukte vorkommen die ambivalent sind (z.B. 'gemeinsame Auswertung
|
||||
mit Schwesterunternehmen', 'gemeinsame Plattform-Nutzung'). Liefert
|
||||
3-4 Leitfragen + jeweilige Empfehlung.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
_JC_SIGNALS = (
|
||||
"schwesterunternehmen", "konzernschwester", "gemeinsame plattform",
|
||||
"gemeinsame auswertung", "gemeinsame studie", "joint venture",
|
||||
"konzernweite analyse", "gemeinsame zwecke", "gemeinsame ziele",
|
||||
"konzernweit", "gemeinsamer kunde", "gemeinsamer datenpool",
|
||||
)
|
||||
|
||||
_AVV_SIGNALS = (
|
||||
"auftragsverarbeiter", "auftragsverarbeitung", "weisungsgebunden",
|
||||
"im auftrag von", "im namen des verantwortlichen",
|
||||
"art. 28 dsgvo", "art 28 dsgvo", "dpa (data processing agreement",
|
||||
)
|
||||
|
||||
_QUESTIONS = [
|
||||
{
|
||||
"q": "Bestimmen beide Seiten gemeinsam Zweck UND Mittel der Verarbeitung?",
|
||||
"yes": "JC (Art. 26)",
|
||||
"no": "AVV-Indikator",
|
||||
"explain": "EDPB 7/2020 Rn. 51-65: beidseitige Zweckbestimmung ist "
|
||||
"das Hauptmerkmal der gemeinsamen Verantwortlichkeit.",
|
||||
},
|
||||
{
|
||||
"q": "Verfolgen die Parteien eigene, getrennte Zwecke (z.B. eigene "
|
||||
"Kundenbeziehung) oder einen gemeinsamen Zweck?",
|
||||
"yes": "Wenn getrennt: AVV (oder zwei getrennte Verantwortliche)",
|
||||
"no": "Wenn gemeinsam: JC (Art. 26)",
|
||||
"explain": "EuGH C-25/17 Zeugen Jehovas: getrennte Zwecke "
|
||||
"schliessen JC aus.",
|
||||
},
|
||||
{
|
||||
"q": "Existiert eine schriftliche Weisungs-Hierarchie und Pflicht "
|
||||
"zur Loeschung am Vertragsende?",
|
||||
"yes": "AVV (Art. 28 Pflichten erfuellt)",
|
||||
"no": "Pruefen ob JC vorliegt + Art. 26-Vereinbarung noetig",
|
||||
"explain": "Art. 28 (3)(g) DSGVO + EDPB 7/2020 Rn. 88.",
|
||||
},
|
||||
{
|
||||
"q": "Haben Betroffene gegenueber beiden Stellen vollstaendige "
|
||||
"Rechte (Art. 15-22)?",
|
||||
"yes": "JC — Art. 26 (3) verlangt einheitliche Anlaufstelle",
|
||||
"no": "AVV — Auftragsverarbeiter weist Rechtsausuebung an "
|
||||
"Verantwortlichen zurueck",
|
||||
"explain": "Art. 26 (3) DSGVO macht beide Stellen als gemeinsame "
|
||||
"Anlaufstelle ansprechbar.",
|
||||
},
|
||||
]
|
||||
|
||||
|
||||
def detect_ambiguous_jc_avv(dse_text: str | None) -> bool:
|
||||
"""Heuristik: liegen sowohl JC- als auch AVV-Signale im DSE? Dann
|
||||
ist die Konstellation typischerweise unklar und der Entscheidungsbaum
|
||||
hilft."""
|
||||
if not dse_text:
|
||||
return False
|
||||
t = dse_text.lower()
|
||||
has_jc = any(s in t for s in _JC_SIGNALS)
|
||||
has_avv = any(s in t for s in _AVV_SIGNALS)
|
||||
return has_jc and has_avv
|
||||
|
||||
|
||||
def build_jc_avv_decision_html(dse_text: str | None) -> str:
|
||||
if not detect_ambiguous_jc_avv(dse_text):
|
||||
return ""
|
||||
items = []
|
||||
for i, q in enumerate(_QUESTIONS, 1):
|
||||
items.append(
|
||||
f'<li style="margin-bottom:8px;font-size:11px;line-height:1.5">'
|
||||
f'<strong>{i}. {q["q"]}</strong><br>'
|
||||
f'<span style="color:#16a34a">Ja: </span>{q["yes"]} | '
|
||||
f'<span style="color:#dc2626">Nein: </span>{q["no"]}<br>'
|
||||
f'<span style="color:#64748b;font-size:10px;font-style:italic">'
|
||||
f'{q["explain"]}</span>'
|
||||
f'</li>'
|
||||
)
|
||||
return (
|
||||
'<div style="font-family:-apple-system,BlinkMacSystemFont,sans-serif;'
|
||||
'max-width:760px;margin:0 auto 16px;padding:12px 16px;'
|
||||
'background:#f1f5f9;border:1px solid #cbd5e1;border-radius:6px">'
|
||||
'<div style="font-size:11px;color:#475569;text-transform:uppercase;'
|
||||
'letter-spacing:1.2px;margin-bottom:4px;font-weight:600">'
|
||||
'JC vs AVV — Entscheidungshilfe</div>'
|
||||
'<h3 style="margin:0 0 6px;font-size:14px;color:#1e293b">'
|
||||
'Im DSE-Text gibt es sowohl gemeinsame-Verantwortlichkeits- als '
|
||||
'auch Auftragsverarbeitungs-Hinweise</h3>'
|
||||
'<p style="margin:0 0 10px;font-size:11px;color:#475569;line-height:1.5">'
|
||||
'Pruefen Sie mit dem DSB die folgenden 4 Leitfragen aus EDPB 7/2020. '
|
||||
'Das Ergebnis bestimmt ob eine Art. 26-Vereinbarung (JC) oder ein '
|
||||
'Art. 28-AVV vorliegen muss.'
|
||||
'</p>'
|
||||
'<ol style="margin:0 0 0 18px;padding:0">'
|
||||
+ "".join(items) +
|
||||
'</ol>'
|
||||
'<p style="margin:8px 0 0;font-size:10px;color:#94a3b8;'
|
||||
'font-style:italic">Quelle: EDPB Guidelines 7/2020 (Controller/Processor) '
|
||||
'+ EuGH C-25/17, C-40/17.</p>'
|
||||
'</div>'
|
||||
)
|
||||
@@ -0,0 +1,229 @@
|
||||
"""
|
||||
P31 — Tiered LLM-Cascade mit Confidence + Valkey-Cache.
|
||||
|
||||
Bisherige LLM-Calls (vendor_llm_extractor, mc_solution_generator):
|
||||
* gehen direkt an Qwen lokal → bei kompliziertem Input lange Latenz
|
||||
* fallen bei Fail manuell auf OVH 120B zurueck
|
||||
* Kein Cache → gleiche Eingabe kostet x-mal Zeit
|
||||
|
||||
Diese Modul vereinheitlicht:
|
||||
1. Cache-Lookup (md5(prompt) → cached response, TTL 7d)
|
||||
2. Qwen-Aufruf mit kurzem Timeout (90s)
|
||||
3. Wenn fail/leer ODER confidence < threshold → OVH 120B (45s)
|
||||
4. Wenn auch fail → Anthropic Claude (last resort)
|
||||
5. Response wird gecached
|
||||
|
||||
confidence-Heuristik:
|
||||
* parsed JSON erfolgreich + non-empty → 0.8
|
||||
* JSON-Parse failed → 0.0
|
||||
* JSON ok aber nur 1 Item bei >5000 chars input → 0.3
|
||||
|
||||
Backend-API: await call_with_cascade(prompt, system_prompt, expected_min_items)
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import hashlib
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
from typing import Any
|
||||
|
||||
import httpx
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# In-process Cache wenn kein Valkey verfuegbar
|
||||
_LOCAL_CACHE: dict[str, dict] = {}
|
||||
_LOCAL_CACHE_MAX = 200
|
||||
|
||||
|
||||
def _cache_key(system: str, user: str, model_hint: str = "") -> str:
|
||||
blob = f"{system}\n---\n{user}\n---\n{model_hint}"
|
||||
return "llm:" + hashlib.md5(blob.encode()).hexdigest()[:24]
|
||||
|
||||
|
||||
def _cache_get(key: str) -> dict | None:
|
||||
try:
|
||||
import redis # noqa: WPS433
|
||||
url = os.getenv("VALKEY_URL", "redis://bp-core-valkey:6379")
|
||||
r = redis.Redis.from_url(url, socket_timeout=2.0,
|
||||
decode_responses=True)
|
||||
v = r.get(key)
|
||||
if v:
|
||||
return json.loads(v)
|
||||
except Exception:
|
||||
pass
|
||||
return _LOCAL_CACHE.get(key)
|
||||
|
||||
|
||||
def _cache_put(key: str, value: dict, ttl: int = 604800) -> None:
|
||||
try:
|
||||
import redis # noqa: WPS433
|
||||
url = os.getenv("VALKEY_URL", "redis://bp-core-valkey:6379")
|
||||
r = redis.Redis.from_url(url, socket_timeout=2.0,
|
||||
decode_responses=True)
|
||||
r.setex(key, ttl, json.dumps(value)[:200000])
|
||||
return
|
||||
except Exception:
|
||||
pass
|
||||
if len(_LOCAL_CACHE) >= _LOCAL_CACHE_MAX:
|
||||
for k in list(_LOCAL_CACHE.keys())[:50]:
|
||||
_LOCAL_CACHE.pop(k, None)
|
||||
_LOCAL_CACHE[key] = value
|
||||
|
||||
|
||||
def _heuristic_confidence(response_text: str, input_len: int) -> float:
|
||||
if not response_text:
|
||||
return 0.0
|
||||
try:
|
||||
obj = json.loads(response_text)
|
||||
except Exception:
|
||||
# Try to extract JSON block
|
||||
a, b = response_text.find("{"), response_text.rfind("}")
|
||||
if 0 <= a < b:
|
||||
try:
|
||||
obj = json.loads(response_text[a:b + 1])
|
||||
except Exception:
|
||||
return 0.1
|
||||
else:
|
||||
return 0.1
|
||||
n_items = 0
|
||||
if isinstance(obj, dict):
|
||||
for v in obj.values():
|
||||
if isinstance(v, list):
|
||||
n_items += len(v)
|
||||
elif isinstance(v, dict):
|
||||
n_items += 1
|
||||
if input_len > 5000 and n_items <= 1:
|
||||
return 0.3
|
||||
if n_items >= 5:
|
||||
return 0.9
|
||||
return 0.7
|
||||
|
||||
|
||||
async def _call_ollama(system: str, user: str,
|
||||
max_tokens: int = 6000,
|
||||
timeout: float = 90.0) -> str:
|
||||
base = os.getenv("OLLAMA_URL", "http://host.docker.internal:11434")
|
||||
model = os.getenv("CMP_LLM_MODEL", "qwen3:30b-a3b")
|
||||
payload = {
|
||||
"model": model, "stream": False, "format": "json",
|
||||
"messages": [{"role": "system", "content": system},
|
||||
{"role": "user", "content": user}],
|
||||
"options": {"temperature": 0.05, "num_predict": max_tokens},
|
||||
}
|
||||
try:
|
||||
async with httpx.AsyncClient(timeout=timeout) as c:
|
||||
r = await c.post(f"{base.rstrip('/')}/api/chat", json=payload)
|
||||
r.raise_for_status()
|
||||
return (r.json().get("message") or {}).get("content", "") or ""
|
||||
except Exception as e:
|
||||
logger.warning("ollama cascade tier 1 failed: %s", e)
|
||||
return ""
|
||||
|
||||
|
||||
async def _call_ovh(system: str, user: str, max_tokens: int = 6000) -> str:
|
||||
base = os.getenv("OVH_LLM_URL", "").strip()
|
||||
key = os.getenv("OVH_LLM_KEY", "").strip()
|
||||
model = os.getenv("OVH_LLM_MODEL", "").strip()
|
||||
if not base or not model:
|
||||
return ""
|
||||
headers = {"Content-Type": "application/json"}
|
||||
if key:
|
||||
headers["Authorization"] = f"Bearer {key}"
|
||||
payload = {
|
||||
"model": model, "temperature": 0.05, "max_tokens": max_tokens,
|
||||
"messages": [{"role": "system", "content": system},
|
||||
{"role": "user", "content": user}],
|
||||
"response_format": {"type": "json_object"},
|
||||
}
|
||||
try:
|
||||
async with httpx.AsyncClient(timeout=45.0) as c:
|
||||
r = await c.post(f"{base.rstrip('/')}/v1/chat/completions",
|
||||
json=payload, headers=headers)
|
||||
r.raise_for_status()
|
||||
choice = (r.json().get("choices") or [{}])[0]
|
||||
return (choice.get("message") or {}).get("content", "") or ""
|
||||
except Exception as e:
|
||||
logger.warning("ovh cascade tier 2 failed: %s", e)
|
||||
return ""
|
||||
|
||||
|
||||
async def _call_anthropic(system: str, user: str,
|
||||
max_tokens: int = 4000) -> str:
|
||||
key = os.getenv("ANTHROPIC_API_KEY", "").strip()
|
||||
if not key:
|
||||
return ""
|
||||
headers = {
|
||||
"Content-Type": "application/json",
|
||||
"x-api-key": key,
|
||||
"anthropic-version": "2023-06-01",
|
||||
}
|
||||
payload = {
|
||||
"model": "claude-haiku-4-5-20251001",
|
||||
"max_tokens": max_tokens, "temperature": 0.05,
|
||||
"system": system,
|
||||
"messages": [{"role": "user", "content": user}],
|
||||
}
|
||||
try:
|
||||
async with httpx.AsyncClient(timeout=30.0) as c:
|
||||
r = await c.post("https://api.anthropic.com/v1/messages",
|
||||
json=payload, headers=headers)
|
||||
r.raise_for_status()
|
||||
blocks = r.json().get("content") or []
|
||||
return "".join(b.get("text", "") for b in blocks if isinstance(b, dict))
|
||||
except Exception as e:
|
||||
logger.warning("anthropic cascade tier 3 failed: %s", e)
|
||||
return ""
|
||||
|
||||
|
||||
async def call_with_cascade(
|
||||
system: str,
|
||||
user: str,
|
||||
min_confidence: float = 0.6,
|
||||
max_tokens: int = 6000,
|
||||
) -> dict:
|
||||
"""Returns {'text': str, 'confidence': float, 'source': str,
|
||||
'cached': bool}."""
|
||||
key = _cache_key(system, user)
|
||||
cached = _cache_get(key)
|
||||
if cached:
|
||||
cached["cached"] = True
|
||||
return cached
|
||||
|
||||
input_len = len(user)
|
||||
# Tier 1: Qwen lokal
|
||||
text = await _call_ollama(system, user, max_tokens=max_tokens)
|
||||
conf = _heuristic_confidence(text, input_len)
|
||||
if text and conf >= min_confidence:
|
||||
out = {"text": text, "confidence": conf,
|
||||
"source": "qwen", "cached": False}
|
||||
_cache_put(key, out)
|
||||
return out
|
||||
|
||||
# Tier 2: OVH 120B
|
||||
text2 = await _call_ovh(system, user, max_tokens=max_tokens)
|
||||
conf2 = _heuristic_confidence(text2, input_len)
|
||||
if text2 and conf2 >= min_confidence:
|
||||
out = {"text": text2, "confidence": conf2,
|
||||
"source": "ovh_120b", "cached": False}
|
||||
_cache_put(key, out)
|
||||
return out
|
||||
|
||||
# Tier 3: Anthropic Claude (Notnagel)
|
||||
text3 = await _call_anthropic(system, user, max_tokens=max_tokens // 2)
|
||||
conf3 = _heuristic_confidence(text3, input_len)
|
||||
if text3 and conf3 >= min_confidence:
|
||||
out = {"text": text3, "confidence": conf3,
|
||||
"source": "anthropic_claude", "cached": False}
|
||||
_cache_put(key, out)
|
||||
return out
|
||||
|
||||
# Nichts hat geliefert — beste Variante wenigstens zurueckgeben
|
||||
best_text = text or text2 or text3 or ""
|
||||
best_conf = max(conf, conf2, conf3)
|
||||
best_source = "qwen" if text else ("ovh_120b" if text2 else "anthropic")
|
||||
return {"text": best_text, "confidence": best_conf,
|
||||
"source": best_source, "cached": False,
|
||||
"below_threshold": True}
|
||||
@@ -0,0 +1,86 @@
|
||||
"""
|
||||
P88 — PDF-Export der Audit-Mail.
|
||||
|
||||
Rendert dieselbe HTML wie die Mail via WeasyPrint zu PDF. Endpoint:
|
||||
GET /api/compliance/agent/snapshots/{snapshot_id}/pdf → application/pdf
|
||||
|
||||
Verwendung:
|
||||
- GF/Lawyer-Uebergabe (kein E-Mail-Programm noetig)
|
||||
- Archivierung
|
||||
- Mandatsausgabe an externen Berater
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
from datetime import datetime, timezone
|
||||
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from compliance.services.check_replay import replay_from_snapshot
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
_PDF_WRAPPER_HEAD = """<!DOCTYPE html>
|
||||
<html lang="de"><head><meta charset="utf-8"><title>{title}</title>
|
||||
<style>
|
||||
@page {{ size: A4; margin: 18mm 14mm 18mm 14mm;
|
||||
@bottom-right {{ content: "Seite " counter(page) " / " counter(pages);
|
||||
color: #94a3b8; font-size: 9pt; }} }}
|
||||
body {{ font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI',
|
||||
Roboto, sans-serif; font-size: 11pt;
|
||||
color: #1e293b; max-width: 760px; margin: 0 auto;
|
||||
line-height: 1.45; }}
|
||||
h1, h2, h3 {{ page-break-after: avoid; }}
|
||||
table {{ page-break-inside: auto; }}
|
||||
tr {{ page-break-inside: avoid; }}
|
||||
.header {{ border-bottom: 2px solid #0f172a; padding-bottom: 8mm;
|
||||
margin-bottom: 8mm; }}
|
||||
.header h1 {{ margin: 0; font-size: 16pt; color: #0f172a; }}
|
||||
.header .meta {{ font-size: 9pt; color: #64748b; margin-top: 2mm; }}
|
||||
</style></head><body>
|
||||
<div class="header">
|
||||
<h1>BreakPilot Compliance-Audit — {site}</h1>
|
||||
<div class="meta">PDF-Export erstellt am {ts} · Snapshot {snap_short}</div>
|
||||
</div>
|
||||
"""
|
||||
|
||||
|
||||
def render_snapshot_as_pdf(
|
||||
db: Session,
|
||||
snapshot_id: str,
|
||||
) -> bytes | None:
|
||||
"""Returns PDF bytes or None on failure."""
|
||||
try:
|
||||
from weasyprint import HTML # noqa: WPS433 — Optional dep
|
||||
except Exception as e:
|
||||
logger.error("WeasyPrint nicht verfuegbar: %s", e)
|
||||
return None
|
||||
|
||||
res = replay_from_snapshot(db, snapshot_id, recipient=None, dry_run=True)
|
||||
if not res or res.get("error"):
|
||||
logger.warning("PDF-Export: Snapshot %s nicht gefunden", snapshot_id)
|
||||
return None
|
||||
|
||||
# The replay returns html via "preview" (truncated) — fetch the full
|
||||
# render by injecting site_label into a wrapper.
|
||||
full_html = _build_full_html(res, snapshot_id)
|
||||
try:
|
||||
pdf_bytes = HTML(string=full_html).write_pdf()
|
||||
return pdf_bytes
|
||||
except Exception as e:
|
||||
logger.exception("WeasyPrint PDF render failed: %s", e)
|
||||
return None
|
||||
|
||||
|
||||
def _build_full_html(replay_result: dict, snapshot_id: str) -> str:
|
||||
"""Wraps the replay's full_html in the PDF-print wrapper."""
|
||||
full = replay_result.get("full_html") or replay_result.get("preview") or ""
|
||||
site = replay_result.get("site_domain") or "—"
|
||||
snap_short = snapshot_id[:8]
|
||||
ts = datetime.now(timezone.utc).strftime("%Y-%m-%d %H:%M UTC")
|
||||
header = _PDF_WRAPPER_HEAD.format(
|
||||
title=f"BreakPilot Audit — {site}",
|
||||
site=site, snap_short=snap_short, ts=ts,
|
||||
)
|
||||
return header + full + "</body></html>"
|
||||
@@ -0,0 +1,269 @@
|
||||
"""
|
||||
P106 — MC-Audit-Type-Klassifizierung.
|
||||
|
||||
Zentrales Problem: viele Master-Controls pruefen Sachverhalte, die wir
|
||||
von Aussen GAR NICHT pruefen koennen — z.B. ob das Unternehmen einen
|
||||
internen Loeschkonzept-Prozess hat oder Schulungen durchgefuehrt wurden.
|
||||
|
||||
Bisher: alle MCs deren Pattern im Text nicht matched → FAIL.
|
||||
Folge: GF-Mail mit 95 FAILs, davon ~60-70 in Wirklichkeit nur 'unknown'.
|
||||
|
||||
Loesung: pro MC klassifizieren:
|
||||
* verifiable → Pattern muss im sichtbaren Dokument stehen (Audit moeglich)
|
||||
* process_internal → interner Prozess des Kunden (Schulung, AVV-Vertrag, …)
|
||||
* doc_internal → interne Dokumentation (VVT-Eintrag, DSFA-File, …)
|
||||
* ambiguous → koennte beides sein
|
||||
|
||||
In der MC-Auswertung:
|
||||
* verifiable + Pattern fehlt → echtes FAIL ❌
|
||||
* process_internal → CHECK (Hinweis 'Bitte intern pruefen') ⓘ
|
||||
* doc_internal → CHECK (Hinweis 'Im VVT/DSFA dokumentiert?') ⓘ
|
||||
* ambiguous → CHECK mit Warnung
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
import re
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
# Patterns die auf interne Prozesse hindeuten (NICHT von aussen pruefbar)
|
||||
_PROCESS_INTERNAL_PATTERNS = [
|
||||
# Schulung / Mitarbeiter
|
||||
r"\bmitarbeiter\b.*schul",
|
||||
r"\bschulung(en)?\b",
|
||||
r"\bawareness\b",
|
||||
r"\bsensibilisier",
|
||||
# Vertraege intern
|
||||
r"\bauftragsverarbeitungsvertrag\b",
|
||||
r"\bAVV\b\s+abgeschlossen",
|
||||
r"\bvertrag.*abgeschlossen",
|
||||
r"\bdpa\s+(geschlossen|abgeschlossen|vorhanden)",
|
||||
r"\bSCC\s+(geschlossen|abgeschlossen|implementiert)",
|
||||
# Technisch-organisatorische Massnahmen (intern)
|
||||
r"\btechnisch[-\s]*organisatorische\s+ma(ß|ss)nahmen?\b",
|
||||
r"\bTOM\s+(umgesetzt|dokumentiert|implementiert)",
|
||||
r"\bverschluesselung\s+(implementiert|aktiv)",
|
||||
r"\bpseudonymisierung\s+(implementiert|aktiv)",
|
||||
r"\bbackup[s]?\s+(eingerichtet|vorhanden)",
|
||||
r"\bzugriffskontrolle",
|
||||
r"\b(rollen|berechtigungs)konzept",
|
||||
# Risikobewertung / DSFA (intern)
|
||||
r"\bdsfa\s+(durchgefuehrt|erstellt|dokumentiert)",
|
||||
r"\brisikobewertung\s+(durchgefuehrt|dokumentiert)",
|
||||
r"\brisikoanalyse",
|
||||
# Loeschkonzept / Aufbewahrung
|
||||
r"\bloeschkonzept\s+(umgesetzt|implementiert)",
|
||||
r"\baufbewahrungsfrist(en)?\s+(eingehalten|definiert)",
|
||||
r"\bloeschroutinen?\s+(aktiv|implementiert)",
|
||||
# Meldewege / Vorfallmanagement
|
||||
r"\bmeldepflicht\s+(eingehalten|umgesetzt)",
|
||||
r"\bvorfallmanagement",
|
||||
r"\bincident[\s-]?response",
|
||||
r"\b72[\s-]?stunden[\s-]?meldung",
|
||||
# Generische Prozess-Indikatoren
|
||||
r"\bdokumentiert\s+werden",
|
||||
r"\bbitte\s+(intern\s+)?dokumentieren",
|
||||
r"\bin\s+der\s+verfahrens",
|
||||
r"\bnach\s+innen\s+geh",
|
||||
r"\bausnahmen\s+(dokumentieren|protokollieren)",
|
||||
r"\bkostenfrei\s+(zur\s+verfuegung|gewaehren|ermoegli)",
|
||||
r"\bunentgeltlich\s+(zur\s+verfuegung)",
|
||||
# Vertragsleistung / Service-Level (intern)
|
||||
r"\bservice[\s-]?level",
|
||||
r"\breaktionszeit",
|
||||
# Auditierung / Aufsicht
|
||||
r"\binterne(s)?\s+audit",
|
||||
r"\baufsichtsbehoerde\s+gemeldet",
|
||||
r"\bbeauftragter\s+(intern|benannt)",
|
||||
# eCall + Branchen-spezifische interne Pflichten
|
||||
r"\babschaltung\s+der\s+\w+\s+kostenfrei",
|
||||
r"\bopt[\s-]?out\s+(intern|im\s+kundenportal)\s+ermoeglichen",
|
||||
]
|
||||
|
||||
# Patterns die auf interne Dokumentation hindeuten (VVT, DSFA-Datei, …)
|
||||
_DOC_INTERNAL_PATTERNS = [
|
||||
r"\bverzeichnis\s+der\s+verarbeitungstaetigkeiten\b",
|
||||
r"\bvvt(\s+|\b)",
|
||||
r"\bdsfa[\s-]?dokument",
|
||||
r"\bauftragsverarbeitungsverzeichnis",
|
||||
r"\bsub[\s-]?prozessor[\s-]?liste",
|
||||
r"\bverarbeitungs[\s-]?register",
|
||||
r"\binternes\s+register",
|
||||
r"\baufbewahrungs[\s-]?konzept\b",
|
||||
]
|
||||
|
||||
# Patterns die auf externe Sichtbarkeit hindeuten → DEFINITIV verifiable
|
||||
_VERIFIABLE_PATTERNS = [
|
||||
r"\bin\s+der\s+(datenschutzerklaerung|dse|cookie[\s-]?richtlinie|impressum|agb)\b",
|
||||
r"\bauf\s+der\s+website\s+(genannt|sichtbar|angegeben)",
|
||||
r"\bim\s+banner\s+(genannt|sichtbar)",
|
||||
r"\bim\s+cookie[\s-]?banner",
|
||||
r"\bauf\s+der\s+startseite",
|
||||
r"\bim\s+footer",
|
||||
]
|
||||
|
||||
|
||||
def _matches_any(text: str, patterns: list[str]) -> bool:
|
||||
tl = text.lower()
|
||||
for pat in patterns:
|
||||
try:
|
||||
if re.search(pat, tl):
|
||||
return True
|
||||
except re.error:
|
||||
continue
|
||||
return False
|
||||
|
||||
|
||||
def classify_mc_audit_type(
|
||||
title: str | None,
|
||||
check_question: str | None = None,
|
||||
fail_criteria: dict | None = None,
|
||||
) -> str:
|
||||
"""Returns 'verifiable', 'process_internal', 'doc_internal',
|
||||
or 'ambiguous'."""
|
||||
blob = " ".join([title or "", check_question or "",
|
||||
str(fail_criteria or "")])
|
||||
if not blob.strip():
|
||||
return "ambiguous"
|
||||
|
||||
is_verifiable_hint = _matches_any(blob, _VERIFIABLE_PATTERNS)
|
||||
is_process = _matches_any(blob, _PROCESS_INTERNAL_PATTERNS)
|
||||
is_doc = _matches_any(blob, _DOC_INTERNAL_PATTERNS)
|
||||
|
||||
# Wenn explicit Verifiable-Indikator + kein Process → verifiable
|
||||
if is_verifiable_hint and not (is_process or is_doc):
|
||||
return "verifiable"
|
||||
# Wenn Process oder Doc UND nicht Verifiable → intern
|
||||
if is_process and not is_verifiable_hint:
|
||||
return "process_internal"
|
||||
if is_doc and not is_verifiable_hint:
|
||||
return "doc_internal"
|
||||
# Beides → ambiguous, im Zweifel CHECK markieren
|
||||
if is_process or is_doc:
|
||||
return "ambiguous"
|
||||
return "verifiable"
|
||||
|
||||
|
||||
def annotate_mc_results(check_results: list[dict]) -> list[dict]:
|
||||
"""In-place: setzt mc_audit_type auf jeden MC-Check und ersetzt
|
||||
Status 'failed' durch 'check' wenn audit_type != verifiable."""
|
||||
if not check_results:
|
||||
return check_results
|
||||
n_reclassified = 0
|
||||
for r in check_results:
|
||||
if not isinstance(r, dict):
|
||||
continue
|
||||
if not (r.get("id") or "").startswith("mc-"):
|
||||
continue
|
||||
if "mc_audit_type" not in r:
|
||||
r["mc_audit_type"] = classify_mc_audit_type(
|
||||
r.get("label"), r.get("hint"), r.get("fail_criteria"),
|
||||
)
|
||||
# Wenn FAIL aber audit_type != verifiable → "check" (manuell)
|
||||
if (not r.get("passed")
|
||||
and not r.get("skipped")
|
||||
and r["mc_audit_type"] in (
|
||||
"process_internal", "doc_internal", "ambiguous",
|
||||
)):
|
||||
r["audit_status"] = "check" # NICHT failed
|
||||
n_reclassified += 1
|
||||
elif r.get("passed"):
|
||||
r["audit_status"] = "pass"
|
||||
elif r.get("skipped"):
|
||||
r["audit_status"] = "skip"
|
||||
else:
|
||||
r["audit_status"] = "fail"
|
||||
if n_reclassified:
|
||||
logger.info(
|
||||
"MC-Audit-Type: %d/%d MCs reklassifiziert von FAIL → CHECK "
|
||||
"(interne Pruefung erforderlich)",
|
||||
n_reclassified, len(check_results),
|
||||
)
|
||||
return check_results
|
||||
|
||||
|
||||
def split_by_audit_type(check_results: list[dict]) -> dict[str, list[dict]]:
|
||||
"""Liefert {verifiable_fails, internal_checks, passes, skips}."""
|
||||
out = {"verifiable_fails": [], "internal_checks": [],
|
||||
"passes": [], "skips": []}
|
||||
for r in (check_results or []):
|
||||
if not isinstance(r, dict):
|
||||
continue
|
||||
if not (r.get("id") or "").startswith("mc-"):
|
||||
continue
|
||||
status = r.get("audit_status")
|
||||
if status == "pass":
|
||||
out["passes"].append(r)
|
||||
elif status == "skip":
|
||||
out["skips"].append(r)
|
||||
elif status == "check":
|
||||
out["internal_checks"].append(r)
|
||||
elif status == "fail" or (not r.get("passed") and not r.get("skipped")):
|
||||
out["verifiable_fails"].append(r)
|
||||
return out
|
||||
|
||||
|
||||
def build_internal_checks_block_html(
|
||||
internal_checks: list[dict],
|
||||
limit: int = 30,
|
||||
) -> str:
|
||||
if not internal_checks:
|
||||
return ""
|
||||
by_type: dict[str, list[dict]] = {}
|
||||
for c in internal_checks:
|
||||
t = c.get("mc_audit_type", "ambiguous")
|
||||
by_type.setdefault(t, []).append(c)
|
||||
|
||||
sections: list[str] = []
|
||||
labels = {
|
||||
"process_internal": ("Interne Prozesse — bitte beim DSB pruefen",
|
||||
"#1e40af"),
|
||||
"doc_internal": ("Interne Dokumentation — bitte im VVT/DSFA pruefen",
|
||||
"#5b21b6"),
|
||||
"ambiguous": ("Unklar ob Audit-Befund oder interne Pruefung",
|
||||
"#92400e"),
|
||||
}
|
||||
for atype, (heading, color) in labels.items():
|
||||
items = by_type.get(atype) or []
|
||||
if not items:
|
||||
continue
|
||||
rows = "".join(
|
||||
f'<li style="margin-bottom:4px;font-size:11px;line-height:1.45">'
|
||||
f'<strong>{(c.get("label") or "")[:160]}</strong>'
|
||||
+ (f' <span style="color:#94a3b8">({c.get("regulation") or "—"})</span>'
|
||||
if c.get("regulation") else '') +
|
||||
f'</li>'
|
||||
for c in items[:limit]
|
||||
)
|
||||
sections.append(
|
||||
f'<div style="margin-bottom:10px">'
|
||||
f'<div style="font-size:11px;color:{color};text-transform:uppercase;'
|
||||
f'letter-spacing:1px;font-weight:600;margin-bottom:4px">'
|
||||
f'{heading} ({len(items)})</div>'
|
||||
f'<ul style="margin:0 0 0 18px;padding:0">{rows}</ul>'
|
||||
f'</div>'
|
||||
)
|
||||
return (
|
||||
'<div style="font-family:-apple-system,BlinkMacSystemFont,sans-serif;'
|
||||
'max-width:760px;margin:0 auto 16px;padding:12px 16px;'
|
||||
'background:#f0f9ff;border:1px solid #bfdbfe;border-radius:8px">'
|
||||
'<div style="font-size:11px;color:#1e40af;text-transform:uppercase;'
|
||||
'letter-spacing:1.2px;margin-bottom:4px;font-weight:600">'
|
||||
'Pruefungen die wir von aussen NICHT durchfuehren koennen</div>'
|
||||
f'<h3 style="margin:0 0 6px;font-size:14px;color:#1e293b">'
|
||||
f'{len(internal_checks)} Pruefpunkt'
|
||||
f'{"e" if len(internal_checks) != 1 else ""} sind '
|
||||
'NUR intern beim Kunden zu pruefen</h3>'
|
||||
'<p style="margin:0 0 10px;font-size:11px;color:#475569;'
|
||||
'line-height:1.5">'
|
||||
'Diese Anforderungen koennen wir per externem Website-Audit nicht '
|
||||
'als erfuellt oder nicht-erfuellt bewerten — sie betreffen interne '
|
||||
'Prozesse (Schulungen, AVV-Vertraege, TOM-Doku) oder interne '
|
||||
'Dokumentation (VVT, DSFA, Loeschkonzept). Sie sind also <strong>kein '
|
||||
'Verstoss</strong>, sondern Hinweis-Checks fuer Ihren DSB.</p>'
|
||||
+ "".join(sections) +
|
||||
'</div>'
|
||||
)
|
||||
@@ -61,6 +61,12 @@ def build_scorecard(check_results: list[dict]) -> dict:
|
||||
b["skipped"] += 1
|
||||
elif r.get("passed"):
|
||||
b["passed"] += 1
|
||||
# P106 — interner Check ist KEIN Fail (zaehlt als skipped fuer
|
||||
# die Score-Berechnung damit der Score realistisch ist).
|
||||
elif r.get("audit_status") == "check":
|
||||
b["skipped"] += 1
|
||||
b.setdefault("internal_checks", 0)
|
||||
b["internal_checks"] += 1
|
||||
else:
|
||||
b["failed"] += 1
|
||||
sev = (r.get("severity") or "MEDIUM").upper()
|
||||
|
||||
@@ -0,0 +1,257 @@
|
||||
"""
|
||||
P73 — MC-Solution-Generator.
|
||||
|
||||
Generiert pro Fail-MC eine konkrete Einfuege-Empfehlung mit Anchor:
|
||||
"Bitte ergaenzen Sie nach Abschnitt 'Kontaktdaten DSB' folgenden
|
||||
Absatz: ...". LLM-Cascade Qwen (lokal) -> OVH 120B.
|
||||
|
||||
Cache: in-process LRU per (mc_id, doc_md5) damit Re-Runs derselben
|
||||
Site denselben Vorschlag liefern. Volle DB-Cache kommt spaeter (P31).
|
||||
|
||||
Integration: wird im build_critical_findings_html / mc-detail-rendering
|
||||
unter jedem HIGH-Fail als eingeklappbarer Block angezeigt.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import hashlib
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
from functools import lru_cache
|
||||
from typing import Iterable
|
||||
|
||||
import httpx
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
_SYSTEM_PROMPT = (
|
||||
"Du bist Datenschutz-Redakteur. Du formulierst kurze, einfueg-bereite "
|
||||
"Absaetze fuer Datenschutz-Dokumente — sachlich, in deutscher "
|
||||
"Rechtssprache, ohne Marketing-Floskeln.\n\n"
|
||||
"Du bekommst:\n"
|
||||
"- den FAIL-MC (was geprueft wurde, warum es nicht erfuellt ist)\n"
|
||||
"- einen Auszug aus dem Ist-Dokument\n"
|
||||
"- den Dokument-Typ\n\n"
|
||||
"Du lieferst JSON:\n"
|
||||
'{\n'
|
||||
' "solution_text": "<3-6 Saetze Vorschlags-Absatz fuer das Dokument>",\n'
|
||||
' "anchor_hint": "<wo einfuegen, z.B. \\"nach Abschnitt Kontaktdaten\\">",\n'
|
||||
' "effort_min": "<gering|mittel|hoch>"\n'
|
||||
'}\n\n'
|
||||
"Regeln:\n"
|
||||
"- KEINE Normtexte 1:1 zitieren — eigene Formulierung + Norm-Referenz.\n"
|
||||
"- KEINE Annahmen ueber Konkretes (z.B. Firmennamen, Adressen) — "
|
||||
"Platzhalter [Ihr Firmenname] / [Ihre Adresse] verwenden.\n"
|
||||
"- Wenn schon eine schwache Variante im Dokument steht, anchor_hint "
|
||||
"auf 'ersetzen' setzen statt einfuegen.\n"
|
||||
"- Nur reines JSON, keine Prosa, keine Code-Fences."
|
||||
)
|
||||
|
||||
|
||||
def _doc_hash(doc_text: str) -> str:
|
||||
return hashlib.md5(doc_text.encode("utf-8")).hexdigest()[:12]
|
||||
|
||||
|
||||
_CACHE: dict[str, dict] = {}
|
||||
_CACHE_MAX = 500
|
||||
|
||||
|
||||
def _cache_get(key: str) -> dict | None:
|
||||
return _CACHE.get(key)
|
||||
|
||||
|
||||
def _cache_put(key: str, val: dict) -> None:
|
||||
if len(_CACHE) >= _CACHE_MAX:
|
||||
# Drop oldest 50 entries
|
||||
for k in list(_CACHE.keys())[:50]:
|
||||
_CACHE.pop(k, None)
|
||||
_CACHE[key] = val
|
||||
|
||||
|
||||
async def _call_ollama(prompt: str) -> str:
|
||||
base = os.getenv("OLLAMA_URL", "http://host.docker.internal:11434")
|
||||
model = os.getenv("MC_SOLUTION_MODEL",
|
||||
os.getenv("CMP_LLM_MODEL", "qwen3:30b-a3b"))
|
||||
payload = {
|
||||
"model": model, "stream": False, "format": "json",
|
||||
"messages": [
|
||||
{"role": "system", "content": _SYSTEM_PROMPT},
|
||||
{"role": "user", "content": prompt},
|
||||
],
|
||||
"options": {"temperature": 0.1, "num_predict": 600},
|
||||
}
|
||||
try:
|
||||
async with httpx.AsyncClient(timeout=90.0) as client:
|
||||
resp = await client.post(f"{base.rstrip('/')}/api/chat", json=payload)
|
||||
resp.raise_for_status()
|
||||
return (resp.json().get("message") or {}).get("content", "")
|
||||
except Exception as e:
|
||||
logger.warning("Qwen MC-solution failed: %s", e)
|
||||
return ""
|
||||
|
||||
|
||||
async def _call_ovh(prompt: str) -> str:
|
||||
base = os.getenv("OVH_LLM_URL", "").strip()
|
||||
key = os.getenv("OVH_LLM_KEY", "").strip()
|
||||
model = os.getenv("OVH_LLM_MODEL", "").strip()
|
||||
if not base or not model:
|
||||
return ""
|
||||
headers = {"Content-Type": "application/json"}
|
||||
if key:
|
||||
headers["Authorization"] = f"Bearer {key}"
|
||||
payload = {
|
||||
"model": model, "temperature": 0.1, "max_tokens": 600,
|
||||
"messages": [
|
||||
{"role": "system", "content": _SYSTEM_PROMPT},
|
||||
{"role": "user", "content": prompt},
|
||||
],
|
||||
"response_format": {"type": "json_object"},
|
||||
}
|
||||
try:
|
||||
async with httpx.AsyncClient(timeout=45.0) as client:
|
||||
resp = await client.post(
|
||||
f"{base.rstrip('/')}/v1/chat/completions",
|
||||
json=payload, headers=headers,
|
||||
)
|
||||
resp.raise_for_status()
|
||||
choice = (resp.json().get("choices") or [{}])[0]
|
||||
return (choice.get("message") or {}).get("content", "") or ""
|
||||
except Exception as e:
|
||||
logger.warning("OVH MC-solution failed: %s", e)
|
||||
return ""
|
||||
|
||||
|
||||
def _parse(content: str) -> dict | None:
|
||||
if not content:
|
||||
return None
|
||||
txt = content.strip()
|
||||
if txt.startswith("```"):
|
||||
txt = "\n".join(txt.split("\n")[1:-1])
|
||||
a, b = txt.find("{"), txt.rfind("}")
|
||||
if 0 <= a < b:
|
||||
try:
|
||||
obj = json.loads(txt[a:b + 1])
|
||||
if isinstance(obj, dict) and obj.get("solution_text"):
|
||||
return {
|
||||
"solution_text": str(obj["solution_text"])[:1200],
|
||||
"anchor_hint": str(obj.get("anchor_hint", ""))[:200],
|
||||
"effort_min": str(obj.get("effort_min", "mittel"))[:20],
|
||||
}
|
||||
except Exception:
|
||||
return None
|
||||
return None
|
||||
|
||||
|
||||
async def generate_solution(
|
||||
mc: dict,
|
||||
doc_text: str,
|
||||
doc_type: str,
|
||||
) -> dict | None:
|
||||
"""Generates a solution dict for a single FAIL-MC.
|
||||
|
||||
mc must contain: label, hint, severity. Returns
|
||||
{solution_text, anchor_hint, effort_min} or None.
|
||||
"""
|
||||
if not mc or not doc_text:
|
||||
return None
|
||||
mc_id = str(mc.get("id") or mc.get("label", ""))[:80]
|
||||
cache_key = f"{mc_id}:{doc_type}:{_doc_hash(doc_text)}"
|
||||
cached = _cache_get(cache_key)
|
||||
if cached:
|
||||
return cached
|
||||
|
||||
excerpt = doc_text[:3500]
|
||||
prompt = (
|
||||
f"FAIL-MC: {mc.get('label', '')}\n"
|
||||
f"Severity: {mc.get('severity', 'MEDIUM')}\n"
|
||||
f"Aktueller Hint: {mc.get('hint', '')[:300]}\n\n"
|
||||
f"Dokument-Typ: {doc_type}\n"
|
||||
f"Dokument-Auszug:\n---\n{excerpt}\n---\n\n"
|
||||
"Liefere die Loesung als JSON."
|
||||
)
|
||||
|
||||
content = await _call_ollama(prompt)
|
||||
parsed = _parse(content)
|
||||
if not parsed:
|
||||
content = await _call_ovh(prompt)
|
||||
parsed = _parse(content)
|
||||
if parsed:
|
||||
_cache_put(cache_key, parsed)
|
||||
return parsed
|
||||
|
||||
|
||||
async def generate_solutions_for_fails(
|
||||
failed_mcs: Iterable[dict],
|
||||
doc_text: str,
|
||||
doc_type: str,
|
||||
limit: int = 5,
|
||||
) -> list[dict]:
|
||||
"""Returns a list of {mc_label, severity, solution_text, anchor_hint,
|
||||
effort_min} for the top-N HIGH/CRITICAL fails. Skips MEDIUM/LOW
|
||||
to keep latency bounded."""
|
||||
sev_order = {"CRITICAL": 0, "HIGH": 1, "MEDIUM": 2, "LOW": 3}
|
||||
high_fails = [m for m in (failed_mcs or [])
|
||||
if (m.get("severity") or "").upper() in ("CRITICAL", "HIGH")]
|
||||
high_fails.sort(key=lambda m: sev_order.get(
|
||||
(m.get("severity") or "").upper(), 3))
|
||||
high_fails = high_fails[:limit]
|
||||
|
||||
out: list[dict] = []
|
||||
for mc in high_fails:
|
||||
sol = await generate_solution(mc, doc_text, doc_type)
|
||||
if not sol:
|
||||
continue
|
||||
out.append({
|
||||
"mc_label": mc.get("label", "")[:200],
|
||||
"severity": mc.get("severity", "MEDIUM"),
|
||||
"solution_text": sol["solution_text"],
|
||||
"anchor_hint": sol["anchor_hint"],
|
||||
"effort_min": sol["effort_min"],
|
||||
})
|
||||
return out
|
||||
|
||||
|
||||
def build_solutions_block_html(solutions: list[dict]) -> str:
|
||||
"""Renders the LLM-generated solutions as a Mail-Block."""
|
||||
if not solutions:
|
||||
return ""
|
||||
items: list[str] = []
|
||||
for s in solutions:
|
||||
sev_color = "#dc2626" if s["severity"].upper() == "CRITICAL" else "#d97706"
|
||||
items.append(
|
||||
f'<li style="margin-bottom:12px;font-size:11px;line-height:1.5">'
|
||||
f'<div style="font-weight:600;color:{sev_color}">'
|
||||
f'[{s["severity"]}] {s["mc_label"]}</div>'
|
||||
f'<div style="background:#fff;padding:8px 10px;border:1px solid '
|
||||
f'#cbd5e1;border-radius:4px;margin-top:4px;color:#1e293b;'
|
||||
f'white-space:pre-wrap">{s["solution_text"]}</div>'
|
||||
f'<div style="font-size:10px;color:#64748b;margin-top:3px">'
|
||||
f'<strong>Anchor:</strong> {s["anchor_hint"] or "—"} '
|
||||
f' · <strong>Aufwand:</strong> {s["effort_min"]}'
|
||||
f'</div></li>'
|
||||
)
|
||||
return (
|
||||
'<div style="font-family:-apple-system,BlinkMacSystemFont,sans-serif;'
|
||||
'max-width:760px;margin:0 auto 16px;padding:14px 18px;'
|
||||
'background:#f0f9ff;border:1px solid #bfdbfe;border-radius:8px">'
|
||||
'<div style="font-size:11px;color:#1e40af;text-transform:uppercase;'
|
||||
'letter-spacing:1.2px;margin-bottom:4px;font-weight:600">'
|
||||
'Loesungs-Vorschlaege (KI-generiert)</div>'
|
||||
f'<h3 style="margin:0 0 6px;font-size:14px;color:#1e293b">'
|
||||
f'{len(solutions)} konkrete Einfuege-Empfehlung'
|
||||
f'{"en" if len(solutions) != 1 else ""} '
|
||||
'fuer die kritischen Findings</h3>'
|
||||
'<p style="margin:0 0 10px;font-size:11px;color:#475569;line-height:1.5">'
|
||||
'Folgende Absaetze koennen Sie direkt uebernehmen — Platzhalter '
|
||||
'[Ihr Firmenname] / [Ihre Adresse] sind zu ersetzen. Inhaltliche '
|
||||
'Korrektheit ist mit DSB / Rechtsabteilung zu pruefen.</p>'
|
||||
'<ul style="margin:0 0 0 18px;padding:0">'
|
||||
+ "".join(items) +
|
||||
'</ul>'
|
||||
'<p style="margin:8px 0 0;font-size:10px;color:#94a3b8;'
|
||||
'font-style:italic">Generiert via Qwen3-30b lokal (Fallback: '
|
||||
'OVH 120B). Vorschlaege sind kein Rechts-Beratung.</p>'
|
||||
'</div>'
|
||||
)
|
||||
@@ -293,6 +293,59 @@ _MC_ALIAS_FALLBACK = {
|
||||
}
|
||||
|
||||
|
||||
# P72 — kompatible scope_doc_type-Werte pro operativem doc_type.
|
||||
# 'other' / NULL / 'process' bleiben immer drin (Backfill ist Heuristik v1
|
||||
# und nicht stark genug fuer hartes Filtern).
|
||||
_SCOPE_COMPATIBLE: dict[str, set[str]] = {
|
||||
"dse": {"dse", "jc", "process", "tom", "accounting"},
|
||||
"cookie": {"cookie_richtlinie", "banner_implementation",
|
||||
"cmp_audit", "dse"},
|
||||
"cookie_policy": {"cookie_richtlinie", "banner_implementation",
|
||||
"cmp_audit", "dse"},
|
||||
"impressum": {"impressum", "agb"},
|
||||
"agb": {"agb", "widerruf", "impressum"},
|
||||
"nutzungsbedingungen": {"agb", "widerruf", "impressum"},
|
||||
"widerruf": {"widerruf", "agb"},
|
||||
"avv": {"avv", "tom", "jc", "process"},
|
||||
"tom": {"tom", "avv", "process"},
|
||||
"loeschkonzept": {"process", "dse", "accounting"},
|
||||
"dsfa": {"process", "tom", "dse"},
|
||||
"social_media": {"jc", "dse"},
|
||||
"dsa": {"dse", "impressum"},
|
||||
"legal_notice": {"impressum", "agb"},
|
||||
"lizenzhinweise": {"agb", "impressum"},
|
||||
}
|
||||
_PERMISSIVE_SCOPES = {"other", "process", None, "", "null"}
|
||||
|
||||
|
||||
def _filter_by_canonical_scope(
|
||||
controls: list[dict],
|
||||
doc_type: str,
|
||||
) -> list[dict]:
|
||||
"""P72 — wirft MCs raus, deren canonical scope_doc_type explizit auf
|
||||
einen INKOMPATIBLEN Doc-Type zeigt. 'other'/NULL/'process' bleiben
|
||||
drin (Backfill v1 noch zu unsicher).
|
||||
"""
|
||||
compatible = _SCOPE_COMPATIBLE.get(doc_type)
|
||||
if not compatible:
|
||||
return controls
|
||||
kept: list[dict] = []
|
||||
dropped = 0
|
||||
for c in controls:
|
||||
scope = c.get("canonical_scope")
|
||||
scope_norm = (scope or "").strip().lower() or None
|
||||
if scope_norm in _PERMISSIVE_SCOPES or scope_norm in compatible:
|
||||
kept.append(c)
|
||||
else:
|
||||
dropped += 1
|
||||
if dropped:
|
||||
logger.info(
|
||||
"P72 scope-filter: %d/%d MCs out-of-scope fuer doc_type=%s",
|
||||
dropped, len(controls), doc_type,
|
||||
)
|
||||
return kept
|
||||
|
||||
|
||||
def _load_text_only_ids(
|
||||
doc_type: str | None = None,
|
||||
business_scope: set[str] | None = None,
|
||||
@@ -372,11 +425,19 @@ async def _load_controls(doc_type: str, db_url: str, limit: int,
|
||||
return []
|
||||
|
||||
try:
|
||||
query = """SELECT id, control_id, title, regulation, article,
|
||||
check_question, pass_criteria, fail_criteria, severity
|
||||
FROM compliance.doc_check_controls
|
||||
WHERE doc_type = $1
|
||||
ORDER BY severity DESC, title"""
|
||||
# P72: LEFT JOIN canonical_controls.scope_doc_type um scope-Info
|
||||
# mitzuziehen. Wenn ein MC explizit fuer einen anderen Doc-Type
|
||||
# klassifiziert ist (z.B. 'tom' statt 'dse'), wird er unten
|
||||
# gefiltert. 'other' / NULL bleiben drin (Backfill noch nicht stark).
|
||||
query = """SELECT dc.id, dc.control_id, dc.title, dc.regulation,
|
||||
dc.article, dc.check_question, dc.pass_criteria,
|
||||
dc.fail_criteria, dc.severity,
|
||||
cc.scope_doc_type AS canonical_scope
|
||||
FROM compliance.doc_check_controls dc
|
||||
LEFT JOIN compliance.canonical_controls cc
|
||||
ON cc.id = dc.control_uuid
|
||||
WHERE dc.doc_type = $1
|
||||
ORDER BY dc.severity DESC, dc.title"""
|
||||
if limit > 0:
|
||||
query += f" LIMIT {limit}"
|
||||
|
||||
@@ -387,6 +448,12 @@ async def _load_controls(doc_type: str, db_url: str, limit: int,
|
||||
rows = await conn.fetch(query, fallback)
|
||||
|
||||
controls = [dict(r) for r in rows]
|
||||
|
||||
# P72: Scope-Filter — werfe MCs raus, deren canonical scope_doc_type
|
||||
# explizit auf einen anderen Doc-Type zeigt. Konservativ:
|
||||
# other/NULL/process bleiben drin (zu unsichere Klassifikation).
|
||||
controls = _filter_by_canonical_scope(controls, doc_type)
|
||||
|
||||
text_only = _load_text_only_ids(doc_type, business_scope)
|
||||
if text_only:
|
||||
before = len(controls)
|
||||
|
||||
@@ -0,0 +1,90 @@
|
||||
"""
|
||||
P70 — RAG-Provenance-Marker.
|
||||
|
||||
Wenn ein Finding aus dem RAG-Korpus belegt ist (z.B. Art-Match auf
|
||||
einen konkreten Gesetzes-Paragrafen aus dem ingestierten DSGVO/TDDDG/
|
||||
TMG-Korpus), bekommt es einen ✓-Marker. Wenn es nur aus unserer
|
||||
Heuristik kommt (Pattern-Match ohne RAG-Belegung), bekommt es ein ⚠
|
||||
"Heuristik".
|
||||
|
||||
Dadurch sieht der Nutzer sofort welche Aussagen rechtlich verbindlich
|
||||
gestuetzt sind vs welche unsere Eigeninterpretation sind.
|
||||
|
||||
Generisch: dataclass-aehnliche Funktion die ein Finding-dict klassifiziert.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
import re
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
# Pattern fuer "Belegt aus Korpus": Finding enthaelt expliziten
|
||||
# Norm-Bezug mit Artikel + Quelle.
|
||||
_NORM_RE = re.compile(
|
||||
r"(Art\.?\s*\d+(?:\s*Abs\.?\s*\d+)?(?:\s*lit\.?\s*[a-z])?\s*"
|
||||
r"(?:DSGVO|GDPR|TDDDG|TMG|BDSG|UWG|TKG|EuGH|EDPB)|"
|
||||
r"\(?(EU|VO)\s*\d{4}/\d+\)?|"
|
||||
r"§\s*\d+[a-z]?\s*(TMG|UWG|BDSG|TKG|TDDDG))",
|
||||
re.I,
|
||||
)
|
||||
|
||||
|
||||
def classify_finding_provenance(finding: dict) -> str:
|
||||
"""Returns 'rag', 'heuristic', or 'mixed'.
|
||||
|
||||
rag — Norm-Bezug + Quellen-URL (verbindlich)
|
||||
heuristic — Pattern-Match ohne Norm-Bezug (Eigeninterpretation)
|
||||
mixed — Norm-Bezug aber ohne Quellen-URL (teilweise belegbar)
|
||||
"""
|
||||
if not isinstance(finding, dict):
|
||||
return "heuristic"
|
||||
legal = (finding.get("legal_basis") or "").strip()
|
||||
detail = (finding.get("detail") or "").strip()
|
||||
rag_id = finding.get("rag_chunk_id")
|
||||
rag_url = finding.get("rag_source_url")
|
||||
blob = " ".join([legal, detail])
|
||||
has_norm = bool(_NORM_RE.search(blob))
|
||||
has_source = bool(rag_id or rag_url or
|
||||
"https://" in legal or "https://" in detail)
|
||||
if has_norm and has_source:
|
||||
return "rag"
|
||||
if has_norm:
|
||||
return "mixed"
|
||||
return "heuristic"
|
||||
|
||||
|
||||
def provenance_badge_html(provenance: str) -> str:
|
||||
if provenance == "rag":
|
||||
return (
|
||||
'<span style="background:#dcfce7;color:#166534;'
|
||||
'padding:1px 5px;border-radius:8px;font-size:9px;'
|
||||
'font-weight:600;margin-left:4px" '
|
||||
'title="Aussage durch RAG-Korpus belegt (Gesetzestext + Quelle)">'
|
||||
'✓ RAG</span>'
|
||||
)
|
||||
if provenance == "mixed":
|
||||
return (
|
||||
'<span style="background:#dbeafe;color:#1e40af;'
|
||||
'padding:1px 5px;border-radius:8px;font-size:9px;'
|
||||
'font-weight:600;margin-left:4px" '
|
||||
'title="Norm-Bezug ohne direkte Quellen-URL">'
|
||||
'NORM</span>'
|
||||
)
|
||||
return (
|
||||
'<span style="background:#f1f5f9;color:#475569;'
|
||||
'padding:1px 5px;border-radius:8px;font-size:9px;'
|
||||
'font-weight:600;margin-left:4px" '
|
||||
'title="Heuristik / Eigeninterpretation ohne Korpus-Beleg">'
|
||||
'⚠ HEURISTIK</span>'
|
||||
)
|
||||
|
||||
|
||||
def annotate_findings(findings: list[dict]) -> list[dict]:
|
||||
"""In-place: setzt finding['provenance'] auf jeden Eintrag."""
|
||||
for f in (findings or []):
|
||||
if isinstance(f, dict) and "provenance" not in f:
|
||||
f["provenance"] = classify_finding_provenance(f)
|
||||
return findings
|
||||
@@ -0,0 +1,173 @@
|
||||
"""
|
||||
P68 — Reverse-Audit: eigene Templates gegen alle MCs pruefen.
|
||||
|
||||
Statt 'gegeben einen Kunden-Text → welche MCs fail' machen wir den
|
||||
umgekehrten Test: 'gegeben unseren BreakPilot-Standard-Template-Pool
|
||||
(95 Templates) → welche MCs werden NICHT abgedeckt? Wo sind Luecken?'
|
||||
|
||||
Liefert einen Coverage-Report:
|
||||
- Total MCs in DB: ~1800
|
||||
- MCs abgedeckt durch min. 1 unserer Templates: X
|
||||
- MCs ohne Coverage: Y (Liste)
|
||||
- Templates ohne MC-Wirkung: Z (Liste)
|
||||
|
||||
Zweck: Audit unserer eigenen Code-Base. Wenn ein Customer einen Lauf
|
||||
macht und 50 Findings produziert sind, sollten 90%+ davon durch unsere
|
||||
Template-Bibliothek korrigierbar sein. Wenn nicht → Templates fehlen.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
import re
|
||||
|
||||
from sqlalchemy import text as sa_text
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def run_reverse_audit(db: Session) -> dict:
|
||||
"""Hauptfunktion. Returns coverage-report dict."""
|
||||
# 1) Alle MCs aus doc_check_controls laden
|
||||
mc_rows = db.execute(sa_text(
|
||||
"""
|
||||
SELECT id::text, control_id, doc_type, title, check_question,
|
||||
pass_criteria, severity
|
||||
FROM compliance.doc_check_controls
|
||||
ORDER BY doc_type, severity DESC
|
||||
"""
|
||||
)).fetchall()
|
||||
|
||||
# 2) Templates aus DB (doc_templates oder legal_templates oder analog)
|
||||
try:
|
||||
tpl_rows = db.execute(sa_text(
|
||||
"""
|
||||
SELECT id::text, doc_type, title, body
|
||||
FROM compliance.doc_templates
|
||||
WHERE active = TRUE
|
||||
"""
|
||||
)).fetchall()
|
||||
except Exception:
|
||||
# Fallback auf evtl. andere Template-Tabelle
|
||||
try:
|
||||
tpl_rows = db.execute(sa_text(
|
||||
"""
|
||||
SELECT id::text, doc_type, name AS title, content AS body
|
||||
FROM compliance.legal_templates
|
||||
"""
|
||||
)).fetchall()
|
||||
except Exception as e:
|
||||
logger.warning("template table not found: %s", e)
|
||||
tpl_rows = []
|
||||
|
||||
# 3) Coverage-Matrix: pro MC, ob ein Template sie abdeckt
|
||||
templates_by_doctype: dict[str, list[dict]] = {}
|
||||
for tid, dt, title, body in tpl_rows:
|
||||
templates_by_doctype.setdefault(dt or "other", []).append({
|
||||
"id": tid, "title": title, "body": (body or "")[:50000],
|
||||
})
|
||||
|
||||
covered_mc_ids: set[str] = set()
|
||||
uncovered: list[dict] = []
|
||||
for mc_id, ctrl_id, dt, title, q, pc, sev in mc_rows:
|
||||
tpls = templates_by_doctype.get(dt or "other") or []
|
||||
if not tpls:
|
||||
uncovered.append({
|
||||
"mc_id": ctrl_id, "doc_type": dt, "title": title,
|
||||
"severity": sev, "reason": "no_template_for_doctype",
|
||||
})
|
||||
continue
|
||||
# Heuristik: pass_criteria sind Pattern. Wenn IRGENDEIN Template
|
||||
# die Pattern enthaelt → covered.
|
||||
criteria = _extract_patterns_from_pc(pc)
|
||||
if not criteria:
|
||||
# ohne klare Pattern: per Title-Keywords pruefen
|
||||
criteria = _title_keywords(title or "")
|
||||
ok = False
|
||||
for tpl in tpls:
|
||||
body = tpl["body"].lower()
|
||||
hits = sum(1 for p in criteria if p and p.lower() in body)
|
||||
if hits >= max(1, len(criteria) // 2):
|
||||
ok = True
|
||||
break
|
||||
if ok:
|
||||
covered_mc_ids.add(mc_id)
|
||||
else:
|
||||
uncovered.append({
|
||||
"mc_id": ctrl_id, "doc_type": dt, "title": title,
|
||||
"severity": sev, "reason": "no_template_match",
|
||||
"criteria_sample": criteria[:5],
|
||||
})
|
||||
|
||||
# 4) Templates ohne MC-Wirkung
|
||||
used_template_ids: set[str] = set()
|
||||
for mc_id, ctrl_id, dt, title, q, pc, sev in mc_rows:
|
||||
if mc_id not in covered_mc_ids:
|
||||
continue
|
||||
tpls = templates_by_doctype.get(dt or "other") or []
|
||||
criteria = _extract_patterns_from_pc(pc) or _title_keywords(title or "")
|
||||
for tpl in tpls:
|
||||
body = tpl["body"].lower()
|
||||
hits = sum(1 for p in criteria if p and p.lower() in body)
|
||||
if hits >= max(1, len(criteria) // 2):
|
||||
used_template_ids.add(tpl["id"])
|
||||
break
|
||||
all_template_ids = {t["id"] for tpls in templates_by_doctype.values()
|
||||
for t in tpls}
|
||||
unused_templates = all_template_ids - used_template_ids
|
||||
|
||||
return {
|
||||
"total_mcs": len(mc_rows),
|
||||
"total_templates": len(all_template_ids),
|
||||
"covered_mcs": len(covered_mc_ids),
|
||||
"uncovered_mcs": len(uncovered),
|
||||
"coverage_pct": round(len(covered_mc_ids) / max(1, len(mc_rows)) * 100, 1),
|
||||
"unused_templates": sorted(unused_templates),
|
||||
"top_uncovered_high": [u for u in uncovered if u.get("severity") == "HIGH"][:30],
|
||||
"by_doctype": _summarize_by_doctype(mc_rows, covered_mc_ids),
|
||||
}
|
||||
|
||||
|
||||
def _extract_patterns_from_pc(pc) -> list[str]:
|
||||
"""pc ist jsonb mit z.B. {required_phrases: [...]}, {keywords: [...]}"""
|
||||
if not pc:
|
||||
return []
|
||||
if isinstance(pc, str):
|
||||
try:
|
||||
import json as _j
|
||||
pc = _j.loads(pc)
|
||||
except Exception:
|
||||
return [pc[:50]]
|
||||
if isinstance(pc, dict):
|
||||
out: list[str] = []
|
||||
for k in ("required_phrases", "keywords", "must_contain",
|
||||
"patterns", "phrases"):
|
||||
v = pc.get(k)
|
||||
if isinstance(v, list):
|
||||
out.extend([str(x)[:80] for x in v if x])
|
||||
return out
|
||||
if isinstance(pc, list):
|
||||
return [str(x)[:80] for x in pc if x]
|
||||
return []
|
||||
|
||||
|
||||
def _title_keywords(title: str) -> list[str]:
|
||||
"""Fallback wenn pass_criteria leer: extrahiere Substantive aus Title."""
|
||||
if not title:
|
||||
return []
|
||||
# primitive: alle Worte > 4 Buchstaben
|
||||
return [w for w in re.findall(r"\b\w{5,}\b", title)][:5]
|
||||
|
||||
|
||||
def _summarize_by_doctype(mc_rows, covered_mc_ids: set[str]) -> dict:
|
||||
out: dict[str, dict] = {}
|
||||
for mc_id, ctrl_id, dt, title, q, pc, sev in mc_rows:
|
||||
dt = dt or "other"
|
||||
d = out.setdefault(dt, {"total": 0, "covered": 0})
|
||||
d["total"] += 1
|
||||
if mc_id in covered_mc_ids:
|
||||
d["covered"] += 1
|
||||
for dt, d in out.items():
|
||||
d["pct"] = round(d["covered"] / max(1, d["total"]) * 100, 1)
|
||||
return out
|
||||
@@ -0,0 +1,182 @@
|
||||
"""
|
||||
P84 — Diff-Mode pro Mail.
|
||||
|
||||
Vergleicht den aktuellen Lauf mit dem letzten Snapshot derselben Site:
|
||||
"Seit letztem Lauf 3 Findings weg, 1 neues." USP — keiner der grossen
|
||||
Anbieter (Borlabs, OneTrust, Cookiebot, Usercentrics) hat das.
|
||||
|
||||
Wird in der Mail-Composition nach dem GF-1-Pager gerendert (klein,
|
||||
neutral). Wenn kein vorheriger Lauf existiert: skip silently.
|
||||
|
||||
Heuristik: Extrahiert Finding-Labels aus banner_result.phases[].findings
|
||||
und (wenn vorhanden) scorecard.failed. Vergleicht set-basiert auf
|
||||
normalisiertem Label.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
import re
|
||||
from datetime import datetime, timezone
|
||||
from typing import Any
|
||||
|
||||
from sqlalchemy import text
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def _norm_label(s: str) -> str:
|
||||
s = (s or "").lower().strip()
|
||||
s = re.sub(r"\s+", " ", s)
|
||||
s = re.sub(r"[^\w\s äöüß]", "", s)
|
||||
return s[:200]
|
||||
|
||||
|
||||
def _extract_finding_labels(
|
||||
banner_result: dict | None,
|
||||
scorecard: dict | None = None,
|
||||
) -> set[str]:
|
||||
out: set[str] = set()
|
||||
if isinstance(banner_result, dict):
|
||||
for ph in (banner_result.get("phases") or {}).values():
|
||||
if not isinstance(ph, dict):
|
||||
continue
|
||||
for f in (ph.get("findings") or []):
|
||||
if isinstance(f, dict):
|
||||
lbl = f.get("label") or f.get("title") or f.get("check") or ""
|
||||
if lbl:
|
||||
out.add(_norm_label(lbl))
|
||||
if isinstance(scorecard, dict):
|
||||
for ent in (scorecard.get("failed") or scorecard.get("items") or []):
|
||||
if isinstance(ent, dict):
|
||||
lbl = ent.get("label") or ent.get("title") or ""
|
||||
if lbl:
|
||||
out.add(_norm_label(lbl))
|
||||
return out
|
||||
|
||||
|
||||
def _previous_snapshot(db: Session, site_domain: str,
|
||||
exclude_check_id: str) -> dict | None:
|
||||
"""Returns the most recent snapshot for the same site (excluding the
|
||||
current one)."""
|
||||
row = db.execute(text(
|
||||
"""
|
||||
SELECT check_id, banner_result, created_at
|
||||
FROM compliance.compliance_check_snapshots
|
||||
WHERE site_domain = :dom AND check_id != :ex
|
||||
ORDER BY created_at DESC LIMIT 1
|
||||
"""
|
||||
), {"dom": site_domain, "ex": exclude_check_id}).fetchone()
|
||||
if not row:
|
||||
return None
|
||||
return {
|
||||
"check_id": row[0],
|
||||
"banner_result": row[1] or {},
|
||||
"created_at": row[2],
|
||||
}
|
||||
|
||||
|
||||
def compute_diff(
|
||||
db: Session,
|
||||
current_check_id: str,
|
||||
site_domain: str,
|
||||
banner_result: dict | None,
|
||||
scorecard: dict | None = None,
|
||||
) -> dict | None:
|
||||
"""Returns {prev_check_id, prev_at, added, removed, unchanged_count}
|
||||
or None if there is no previous snapshot."""
|
||||
prev = _previous_snapshot(db, site_domain, current_check_id)
|
||||
if not prev:
|
||||
return None
|
||||
curr_set = _extract_finding_labels(banner_result, scorecard)
|
||||
prev_set = _extract_finding_labels(prev["banner_result"], None)
|
||||
if not curr_set and not prev_set:
|
||||
return None
|
||||
|
||||
return {
|
||||
"prev_check_id": prev["check_id"],
|
||||
"prev_at": prev["created_at"],
|
||||
"added": sorted(curr_set - prev_set)[:20],
|
||||
"removed": sorted(prev_set - curr_set)[:20],
|
||||
"unchanged_count": len(curr_set & prev_set),
|
||||
}
|
||||
|
||||
|
||||
def _fmt_age(when: Any) -> str:
|
||||
if not isinstance(when, datetime):
|
||||
return "frueher"
|
||||
if when.tzinfo is None:
|
||||
when = when.replace(tzinfo=timezone.utc)
|
||||
delta = datetime.now(timezone.utc) - when
|
||||
days = delta.days
|
||||
if days <= 0:
|
||||
hours = delta.seconds // 3600
|
||||
return f"vor {hours}h" if hours else "soeben"
|
||||
if days == 1:
|
||||
return "vor 1 Tag"
|
||||
if days < 14:
|
||||
return f"vor {days} Tagen"
|
||||
weeks = days // 7
|
||||
return f"vor {weeks} Wochen"
|
||||
|
||||
|
||||
def build_diff_block_html(diff: dict) -> str:
|
||||
if not diff:
|
||||
return ""
|
||||
added = diff.get("added") or []
|
||||
removed = diff.get("removed") or []
|
||||
if not added and not removed:
|
||||
return (
|
||||
'<div style="font-family:-apple-system,BlinkMacSystemFont,sans-serif;'
|
||||
'max-width:760px;margin:0 auto 12px;padding:10px 14px;'
|
||||
'background:#f1f5f9;border:1px solid #cbd5e1;border-radius:6px;'
|
||||
'font-size:11px;color:#475569">'
|
||||
f'<strong>Vergleich zum letzten Lauf '
|
||||
f'({_fmt_age(diff.get("prev_at"))}):</strong> keine Veraenderungen '
|
||||
f'in den erkannten Findings ({diff.get("unchanged_count",0)} '
|
||||
'identisch geblieben).'
|
||||
'</div>'
|
||||
)
|
||||
|
||||
items: list[str] = []
|
||||
if removed:
|
||||
items.append(
|
||||
'<div style="font-size:11px;color:#166534;margin-bottom:4px">'
|
||||
f'<strong>{len(removed)} Finding{"s" if len(removed) != 1 else ""} '
|
||||
'nicht mehr vorhanden:</strong></div>'
|
||||
'<ul style="margin:0 0 8px 18px;padding:0">'
|
||||
+ "".join(
|
||||
f'<li style="font-size:11px;color:#166534;margin-bottom:2px">'
|
||||
f'✓ {x}</li>'
|
||||
for x in removed[:6]
|
||||
) + '</ul>'
|
||||
)
|
||||
if added:
|
||||
items.append(
|
||||
'<div style="font-size:11px;color:#991b1b;margin-bottom:4px">'
|
||||
f'<strong>{len(added)} neue{"s" if len(added) == 1 else ""} '
|
||||
f'Finding{"s" if len(added) != 1 else ""}:</strong></div>'
|
||||
'<ul style="margin:0 0 8px 18px;padding:0">'
|
||||
+ "".join(
|
||||
f'<li style="font-size:11px;color:#991b1b;margin-bottom:2px">'
|
||||
f'! {x}</li>'
|
||||
for x in added[:6]
|
||||
) + '</ul>'
|
||||
)
|
||||
|
||||
return (
|
||||
'<div style="font-family:-apple-system,BlinkMacSystemFont,sans-serif;'
|
||||
'max-width:760px;margin:0 auto 12px;padding:12px 16px;'
|
||||
'background:#fffbeb;border:1px solid #fde68a;border-radius:6px">'
|
||||
'<div style="font-size:11px;color:#92400e;text-transform:uppercase;'
|
||||
'letter-spacing:1.2px;margin-bottom:6px;font-weight:600">'
|
||||
f'Was hat sich seit dem letzten Lauf veraendert '
|
||||
f'({_fmt_age(diff.get("prev_at"))})'
|
||||
'</div>'
|
||||
+ "".join(items) +
|
||||
f'<div style="font-size:10px;color:#94a3b8;margin-top:4px">'
|
||||
f'{diff.get("unchanged_count",0)} weitere Findings unveraendert '
|
||||
'— vollstaendige Liste weiter unten.</div>'
|
||||
'</div>'
|
||||
)
|
||||
@@ -0,0 +1,248 @@
|
||||
"""
|
||||
P105 — IAB TCF Vendor-Liste als externe Authority.
|
||||
|
||||
Die IAB TCF v2.2 Global Vendor List (https://vendor-list.consensu.org/v3/
|
||||
vendor-list.json) ist die DSGVO-Authoritaet fuer Werbe-Vendoren: jeder
|
||||
gelistete Vendor hat verbindliche IAB-Purposes:
|
||||
Purpose 1 — Speichern + Zugriff (essential)
|
||||
Purpose 2 — Auswahl Werbung (functional/marketing)
|
||||
Purpose 3 — Personalisierte Werbeprofile (marketing)
|
||||
Purpose 4 — Personalisierte Werbung (marketing)
|
||||
Purpose 5 — Personalisierte Inhaltsprofile (marketing/personalization)
|
||||
Purpose 6 — Personalisierte Inhalte (marketing/personalization)
|
||||
Purpose 7 — Werbe-Performance-Messung (statistics)
|
||||
Purpose 8 — Inhalts-Performance-Messung (statistics)
|
||||
Purpose 9 — Marktforschung (statistics)
|
||||
Purpose 10 — Produkt-Verbesserung (statistics)
|
||||
|
||||
Wenn ein Vendor in der TCF-Liste mit Purpose 3/4 registriert ist und die
|
||||
Site ihn als "Funktional" deklariert → eindeutiger Verstoss (eine externe
|
||||
Authority widerspricht der Deklaration).
|
||||
|
||||
Ingest-Mode: idempotenter Fetch + Upsert in compliance.tcf_vendors_v2.
|
||||
Lookup-Mode: by_vendor_name + by_cookie_owner.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
from typing import Iterable
|
||||
|
||||
import httpx
|
||||
from sqlalchemy import text as sa_text
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
_TCF_URL = "https://vendor-list.consensu.org/v3/vendor-list.json"
|
||||
|
||||
# IAB-Purpose → BreakPilot-Kategorie
|
||||
_PURPOSE_TO_CATEGORY = {
|
||||
1: "essential",
|
||||
2: "marketing",
|
||||
3: "marketing",
|
||||
4: "marketing",
|
||||
5: "personalization",
|
||||
6: "personalization",
|
||||
7: "statistics",
|
||||
8: "statistics",
|
||||
9: "statistics",
|
||||
10: "statistics",
|
||||
11: "marketing",
|
||||
}
|
||||
|
||||
|
||||
def _category_for_purposes(purposes: Iterable[int]) -> str:
|
||||
"""Aggregiert Purposes zu der STRENGSTEN Kategorie (Marketing > stats
|
||||
> personalization > essential). Wenn ein Vendor sowohl essential als
|
||||
auch marketing nutzt, ist die rechtlich verbindliche Kategorie
|
||||
Marketing (Einwilligungspflicht)."""
|
||||
cats = {_PURPOSE_TO_CATEGORY.get(p, "marketing") for p in purposes}
|
||||
if "marketing" in cats:
|
||||
return "marketing"
|
||||
if "statistics" in cats:
|
||||
return "statistics"
|
||||
if "personalization" in cats:
|
||||
return "personalization"
|
||||
return "essential"
|
||||
|
||||
|
||||
async def fetch_and_ingest_tcf_vendors(db: Session) -> dict:
|
||||
"""Idempotenter Ingest. Schema-Migration vermeiden — nutzt nur
|
||||
bestehende cookie_library-Tabelle und kennzeichnet TCF-Source via
|
||||
vendor_name='[TCF] <name>'."""
|
||||
async with httpx.AsyncClient(timeout=60.0) as client:
|
||||
resp = await client.get(_TCF_URL)
|
||||
resp.raise_for_status()
|
||||
data = resp.json()
|
||||
|
||||
vendors = data.get("vendors") or {}
|
||||
if not vendors:
|
||||
return {"error": "no vendors in TCF response", "n_vendors": 0}
|
||||
|
||||
# Erst alte TCF-Eintraege weg (kein UNIQUE-Index auf cookie_name,
|
||||
# daher kein ON CONFLICT moeglich → idempotent via DELETE+INSERT).
|
||||
db.execute(sa_text(
|
||||
"DELETE FROM compliance.cookie_library WHERE source_name='iab_tcf_v2'"
|
||||
))
|
||||
db.commit()
|
||||
inserted = 0
|
||||
skipped = 0
|
||||
for vid, v in vendors.items():
|
||||
name = (v.get("name") or "").strip()
|
||||
if not name:
|
||||
continue
|
||||
purposes = v.get("purposes") or []
|
||||
leg_purposes = v.get("legIntPurposes") or []
|
||||
all_purposes = list(set(purposes) | set(leg_purposes))
|
||||
category = _category_for_purposes(all_purposes)
|
||||
privacy_url = (v.get("policyUrl") or "").strip()[:500] or None
|
||||
|
||||
# Cookie-Names die der Vendor laut TCF setzt sind nicht in der
|
||||
# GVL — wir kennzeichnen nur den Vendor-Eintrag mit ID + Purposes.
|
||||
marker = f"_tcf_v{vid}"
|
||||
try:
|
||||
db.execute(sa_text(
|
||||
"""
|
||||
INSERT INTO compliance.cookie_library
|
||||
(cookie_name, domain_pattern, vendor_name,
|
||||
vendor_privacy_url, actual_category,
|
||||
purpose_en, source_name, source_url, confidence)
|
||||
VALUES (:n, :dp, :v, :pu, :cat, :purp, 'iab_tcf_v2',
|
||||
'https://vendor-list.consensu.org/v3/vendor-list.json',
|
||||
0.99)
|
||||
"""
|
||||
), {"n": marker, "dp": "*",
|
||||
"v": f"[TCF-{vid}] {name}",
|
||||
"pu": privacy_url, "cat": category,
|
||||
"purp": f"IAB TCF v2 Purposes: {sorted(all_purposes)}"})
|
||||
db.commit() # Per-Vendor-Commit damit ein Fehler nicht
|
||||
# die naechsten Eintraege blockt.
|
||||
inserted += 1
|
||||
except Exception as e:
|
||||
logger.warning("TCF vendor %s insert failed: %s", vid, e)
|
||||
skipped += 1
|
||||
db.rollback() # frische Transaktion fuer den naechsten Insert
|
||||
return {"n_vendors_in_gvl": len(vendors), "inserted": inserted,
|
||||
"skipped": skipped}
|
||||
|
||||
|
||||
def lookup_tcf_authority(
|
||||
db: Session,
|
||||
vendor_name: str | None,
|
||||
) -> dict | None:
|
||||
"""Liefert TCF-Authority-Daten fuer einen Vendor-Namen, wenn er
|
||||
in der TCF-Liste registriert ist. Returns {tcf_id, name, category}
|
||||
oder None.
|
||||
|
||||
Fuzzy-Match: 'Google' matched '[TCF-755] Google Advertising Products'.
|
||||
"""
|
||||
if not vendor_name:
|
||||
return None
|
||||
nl = vendor_name.lower().strip()
|
||||
try:
|
||||
rows = db.execute(sa_text(
|
||||
"""
|
||||
SELECT cookie_name, actual_category, vendor_name
|
||||
FROM compliance.cookie_library
|
||||
WHERE source = 'iab_tcf_v2'
|
||||
AND LOWER(vendor_name) LIKE :pat
|
||||
LIMIT 5
|
||||
"""
|
||||
), {"pat": f"%{nl}%"}).fetchall()
|
||||
for r in rows:
|
||||
tcf_name = r[2] # '[TCF-755] Google ...'
|
||||
if tcf_name and "]" in tcf_name:
|
||||
tcf_id = tcf_name.split("]")[0].lstrip("[TCF-")
|
||||
clean = tcf_name.split("]", 1)[1].strip()
|
||||
return {"tcf_id": tcf_id, "name": clean,
|
||||
"category": r[1]}
|
||||
except Exception as e:
|
||||
logger.warning("TCF lookup failed: %s", e)
|
||||
return None
|
||||
|
||||
|
||||
def cross_reference_with_tcf(
|
||||
db: Session,
|
||||
declared_vendors: list[dict],
|
||||
) -> list[dict]:
|
||||
"""Liefert pro Vendor mit Discrepancy ein Finding-dict.
|
||||
|
||||
Eingang: list[{name, category}] aus cmp_vendors.
|
||||
Ausgang: list[{vendor, declared_category, tcf_category, severity}]
|
||||
"""
|
||||
out: list[dict] = []
|
||||
for v in (declared_vendors or []):
|
||||
if not isinstance(v, dict):
|
||||
continue
|
||||
name = (v.get("name") or "").strip()
|
||||
declared_cat = (v.get("category") or "").lower().strip()
|
||||
if not name or not declared_cat:
|
||||
continue
|
||||
tcf = lookup_tcf_authority(db, name)
|
||||
if not tcf:
|
||||
continue
|
||||
if tcf["category"] == declared_cat:
|
||||
continue
|
||||
# Marketing/Statistics vs Functional/Essential ist die kritische
|
||||
# Diskrepanz. functional + personalization sind weicher.
|
||||
severity = "HIGH" if (tcf["category"] == "marketing"
|
||||
and declared_cat in ("essential",
|
||||
"functional",
|
||||
"necessary")) else "MEDIUM"
|
||||
out.append({
|
||||
"vendor": name,
|
||||
"tcf_id": tcf["tcf_id"],
|
||||
"tcf_name": tcf["name"],
|
||||
"declared_category": declared_cat,
|
||||
"tcf_category": tcf["category"],
|
||||
"severity": severity,
|
||||
})
|
||||
return out
|
||||
|
||||
|
||||
def build_tcf_authority_block_html(findings: list[dict]) -> str:
|
||||
if not findings:
|
||||
return ""
|
||||
items: list[str] = []
|
||||
for f in findings[:30]:
|
||||
sev_color = "#dc2626" if f["severity"] == "HIGH" else "#d97706"
|
||||
items.append(
|
||||
f'<li style="margin-bottom:6px;font-size:11px;line-height:1.5">'
|
||||
f'<strong style="color:{sev_color}">{f["vendor"]}</strong> '
|
||||
f'<span style="color:#64748b">— deklariert als</span> '
|
||||
f'<strong>{f["declared_category"]}</strong>, '
|
||||
f'<span style="color:#64748b">IAB TCF v2 (Vendor-ID '
|
||||
f'{f["tcf_id"]}) listet als</span> '
|
||||
f'<strong style="color:{sev_color}">'
|
||||
f'{f["tcf_category"]}</strong>'
|
||||
f'</li>'
|
||||
)
|
||||
return (
|
||||
'<div style="font-family:-apple-system,BlinkMacSystemFont,sans-serif;'
|
||||
'max-width:760px;margin:0 auto 16px;padding:14px 18px;'
|
||||
'background:#fef2f2;border:1px solid #fecaca;border-radius:8px">'
|
||||
'<div style="font-size:11px;color:#991b1b;text-transform:uppercase;'
|
||||
'letter-spacing:1.2px;margin-bottom:4px;font-weight:600">'
|
||||
'IAB TCF v2 Authority-Check — Vendor-Kategorie-Diskrepanz</div>'
|
||||
f'<h3 style="margin:0 0 6px;font-size:14px;color:#1e293b">'
|
||||
f'{len(findings)} Vendor{"en" if len(findings) != 1 else ""} '
|
||||
'mit Kategorie-Widerspruch zur offiziellen IAB-Liste</h3>'
|
||||
'<p style="margin:0 0 10px;font-size:11px;color:#475569;'
|
||||
'line-height:1.5">'
|
||||
'Die IAB Transparency & Consent Framework v2 Global Vendor List '
|
||||
'ist die rechtliche Authoritaet fuer die Klassifizierung von '
|
||||
'Werbe-Vendoren in der EU. Wenn ein Vendor dort als "Marketing" '
|
||||
'gefuehrt ist, kann die Site ihn nicht als "Funktional" einstufen '
|
||||
'— das ist eine externe, durchgesetzte Klassifikation.</p>'
|
||||
'<ul style="margin:0 0 0 18px;padding:0">'
|
||||
+ "".join(items) +
|
||||
'</ul>'
|
||||
'<p style="margin:8px 0 0;font-size:10px;color:#94a3b8;'
|
||||
'font-style:italic">Quelle: '
|
||||
'https://vendor-list.consensu.org/v3/vendor-list.json — '
|
||||
'die TCF-Liste ist verbindlich fuer alle CMP-Tools die IAB-TCF v2 '
|
||||
'implementieren (Cookiebot, OneTrust, Usercentrics, Sourcepoint, …).</p>'
|
||||
'</div>'
|
||||
)
|
||||
@@ -49,13 +49,19 @@ _SYSTEM_PROMPT = (
|
||||
|
||||
async def extract_vendors_via_llm(
|
||||
cookie_text: str,
|
||||
max_text_chars: int = 12000,
|
||||
max_text_chars: int = 50000,
|
||||
) -> list[dict]:
|
||||
"""Run the Qwen → OVH cascade. Returns vendor records (possibly empty)."""
|
||||
"""Run the Qwen → OVH cascade. Returns vendor records (possibly empty).
|
||||
|
||||
max_text_chars: VW-Cookie-Richtlinie hat ~60k chars mit ~100 Cookies in
|
||||
der Tabelle. Bei 12k waren wir auf die ersten ~5 Cookies begrenzt und
|
||||
haben nur 1 Vendor extrahiert. 50k deckt VW/BMW/Mercedes komplett ab
|
||||
und passt in Qwen3-30b-a3b (128k Context) sowie OVH 120B.
|
||||
"""
|
||||
if not cookie_text or len(cookie_text) < 500:
|
||||
return []
|
||||
excerpt = cookie_text[:max_text_chars]
|
||||
user_prompt = f"Cookie-Richtlinie-Text (gekuerzt):\n\n{excerpt}"
|
||||
user_prompt = f"Cookie-Richtlinie-Text:\n\n{excerpt}"
|
||||
|
||||
# Stage 1: local Qwen
|
||||
content = await _call_ollama(user_prompt)
|
||||
@@ -82,10 +88,13 @@ async def _call_ollama(user_prompt: str) -> str:
|
||||
{"role": "user", "content": user_prompt},
|
||||
],
|
||||
"stream": False, "format": "json",
|
||||
"options": {"temperature": 0.05, "num_predict": 6000},
|
||||
# 16k tokens fuer ~80 Vendors mit je 30 Cookies. War vorher 6k →
|
||||
# output wurde mittendrin abgeschnitten, JSON unparseable → 0 Vendors.
|
||||
"options": {"temperature": 0.05, "num_predict": 16000},
|
||||
}
|
||||
try:
|
||||
async with httpx.AsyncClient(timeout=120.0) as client:
|
||||
# Qwen 30b braucht fuer 16k output ~4-6min auf M4 Pro.
|
||||
async with httpx.AsyncClient(timeout=420.0) as client:
|
||||
resp = await client.post(f"{base.rstrip('/')}/api/chat", json=payload)
|
||||
resp.raise_for_status()
|
||||
return (resp.json().get("message") or {}).get("content", "")
|
||||
@@ -109,7 +118,7 @@ async def _call_ovh(user_prompt: str) -> str:
|
||||
{"role": "system", "content": _SYSTEM_PROMPT},
|
||||
{"role": "user", "content": user_prompt},
|
||||
],
|
||||
"temperature": 0.05, "max_tokens": 6000,
|
||||
"temperature": 0.05, "max_tokens": 16000,
|
||||
"response_format": {"type": "json_object"},
|
||||
}
|
||||
try:
|
||||
|
||||
@@ -0,0 +1,167 @@
|
||||
"""
|
||||
Vendor-Deduplizierung und Garbage-Filter.
|
||||
|
||||
Normalisiert Vendor-Namen (Google + Google DoubleClick + DoubleClick/Google
|
||||
Marketing → eine Eintragung) und entfernt Garbage-Eintraege die fälschlich
|
||||
als Vendor erkannt wurden ('click to select a dealership', 'Mehrere OEMs',
|
||||
URL-Fragmente, etc.).
|
||||
|
||||
Wird nach allen Vendor-Sources (LLM, Library, Pattern, Phase-G) angewandt
|
||||
bevor die VVT-Tabelle gerendert wird.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
import re
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
# Aliase: alle Schreibweisen → kanonischer Name
|
||||
_VENDOR_ALIASES: dict[str, str] = {
|
||||
# Google-Familie
|
||||
"google": "Google",
|
||||
"google llc": "Google",
|
||||
"google inc": "Google",
|
||||
"google marketing platform": "Google",
|
||||
"google ads": "Google",
|
||||
"google adsense": "Google",
|
||||
"google analytics": "Google Analytics",
|
||||
"google tag manager": "Google Tag Manager",
|
||||
"google doubleclick": "Google",
|
||||
"doubleclick": "Google",
|
||||
"doubleclick/google marketing": "Google",
|
||||
"doubleclick by google": "Google",
|
||||
# Adobe-Familie
|
||||
"adobe": "Adobe",
|
||||
"adobe inc": "Adobe",
|
||||
"adobe systems": "Adobe",
|
||||
"adobe analytics": "Adobe Analytics",
|
||||
"adobe audience manager": "Adobe Audience Manager",
|
||||
"adobe experience cloud": "Adobe Experience Cloud",
|
||||
"adobe target": "Adobe Target",
|
||||
"adobe advertising cloud (everest)": "Adobe Advertising Cloud",
|
||||
# Trade Desk
|
||||
"the trade desk": "The Trade Desk",
|
||||
"tradedesk": "The Trade Desk",
|
||||
"the tradedesk": "The Trade Desk",
|
||||
"trade desk": "The Trade Desk",
|
||||
# Meta
|
||||
"meta": "Meta / Facebook",
|
||||
"meta platforms": "Meta / Facebook",
|
||||
"facebook": "Meta / Facebook",
|
||||
"meta / facebook": "Meta / Facebook",
|
||||
# AdForm
|
||||
"adform": "AdForm",
|
||||
"adform dsp": "AdForm",
|
||||
# Microsoft
|
||||
"microsoft": "Microsoft",
|
||||
"microsoft bing": "Microsoft Bing",
|
||||
"linkedin": "LinkedIn (Microsoft)",
|
||||
"linkedin corporation": "LinkedIn (Microsoft)",
|
||||
# CMP
|
||||
"onetrust": "OneTrust",
|
||||
"cookiebot": "Cookiebot",
|
||||
"usercentrics": "Usercentrics",
|
||||
"borlabs": "Borlabs",
|
||||
"borlabs / cookie-cmp": "Borlabs",
|
||||
# Salesforce
|
||||
"salesforce": "Salesforce",
|
||||
"salesforce liveagent": "Salesforce",
|
||||
"liveagent": "Salesforce",
|
||||
# Cloudflare
|
||||
"cloudflare": "Cloudflare",
|
||||
}
|
||||
|
||||
|
||||
# Garbage-Patterns: wenn der Vendor-Name darauf matched → wegfiltern
|
||||
_GARBAGE_PATTERNS = (
|
||||
re.compile(r"^click to ", re.I),
|
||||
re.compile(r"^mehrere oems", re.I),
|
||||
re.compile(r"^breakpilot[-_ ]?snapshot", re.I),
|
||||
re.compile(r"^https?://", re.I), # URLs
|
||||
re.compile(r"^https?$", re.I),
|
||||
re.compile(r"^javascript:", re.I),
|
||||
re.compile(r"^undefined$|^null$|^none$", re.I),
|
||||
re.compile(r"^[\d\W]+$"), # nur Zahlen/Symbole
|
||||
re.compile(r"^.{1,2}$"), # Ein-/Zwei-Zeichen-"Namen"
|
||||
re.compile(r"^(ein|der|die|das|von|und|aber|oder)$", re.I),
|
||||
re.compile(r"^cookie$|^cookies$", re.I),
|
||||
)
|
||||
|
||||
|
||||
def _is_garbage(name: str) -> bool:
|
||||
if not name or len(name.strip()) < 2:
|
||||
return True
|
||||
if len(name) > 120:
|
||||
return True
|
||||
return any(p.search(name) for p in _GARBAGE_PATTERNS)
|
||||
|
||||
|
||||
def _canonical_name(name: str) -> str:
|
||||
nl = name.strip().lower()
|
||||
if nl in _VENDOR_ALIASES:
|
||||
return _VENDOR_ALIASES[nl]
|
||||
# Sub-token-Match: 'doubleclick by google' → enthaelt 'doubleclick'
|
||||
for alias, canonical in _VENDOR_ALIASES.items():
|
||||
if alias in nl and len(alias) >= 6:
|
||||
return canonical
|
||||
return name.strip()
|
||||
|
||||
|
||||
def normalize_vendors(vendors: list[dict]) -> list[dict]:
|
||||
"""Filtert Garbage + dedupliziert anhand kanonischer Aliase.
|
||||
|
||||
Mergt cookies-Listen wenn der gleiche Vendor mehrfach erscheint
|
||||
(z.B. aus LLM + Library + Phase-G). Behaelt Metadaten des Eintrags
|
||||
mit der laengsten cookies-Liste.
|
||||
"""
|
||||
if not vendors:
|
||||
return []
|
||||
by_canon: dict[str, dict] = {}
|
||||
dropped_garbage = 0
|
||||
merged = 0
|
||||
for v in vendors:
|
||||
if not isinstance(v, dict):
|
||||
continue
|
||||
raw_name = (v.get("name") or "").strip()
|
||||
if _is_garbage(raw_name):
|
||||
dropped_garbage += 1
|
||||
continue
|
||||
canon = _canonical_name(raw_name)
|
||||
if canon in by_canon:
|
||||
# Merge: cookies vereinen, source-Tags joinen
|
||||
ex = by_canon[canon]
|
||||
ex_cookies = ex.get("cookies") or []
|
||||
new_cookies = v.get("cookies") or []
|
||||
seen_ck = {(c.get("name") or "").lower() for c in ex_cookies if isinstance(c, dict)}
|
||||
for c in new_cookies:
|
||||
if isinstance(c, dict):
|
||||
nm = (c.get("name") or "").strip().lower()
|
||||
if nm and nm not in seen_ck:
|
||||
ex_cookies.append(c)
|
||||
seen_ck.add(nm)
|
||||
ex["cookies"] = ex_cookies
|
||||
# Source-Tag merging (semicolon-separated)
|
||||
ex_src = (ex.get("source") or "").split(";")
|
||||
new_src = v.get("source") or ""
|
||||
if new_src and new_src not in ex_src:
|
||||
ex_src.append(new_src)
|
||||
ex["source"] = ";".join([s for s in ex_src if s])
|
||||
# Bessere Metadaten uebernehmen (falls leer)
|
||||
for k in ("country", "opt_out_url", "privacy_policy_url",
|
||||
"purpose", "category", "persistence"):
|
||||
if not ex.get(k) and v.get(k):
|
||||
ex[k] = v[k]
|
||||
merged += 1
|
||||
else:
|
||||
v["name"] = canon
|
||||
by_canon[canon] = v
|
||||
if dropped_garbage or merged:
|
||||
logger.info(
|
||||
"Vendor-Normalizer: %d garbage dropped, %d duplicate merges, "
|
||||
"%d unique vendors (input: %d)",
|
||||
dropped_garbage, merged, len(by_canon), len(vendors),
|
||||
)
|
||||
return list(by_canon.values())
|
||||
@@ -0,0 +1,237 @@
|
||||
#!/usr/bin/env python3
|
||||
"""P101 — Cookie-Library Auto-Enrich aus Snapshots.
|
||||
|
||||
Geht alle compliance_check_snapshots durch und:
|
||||
1. Extrahiert unique (cookie_name, vendor_hint) aus Phase-A/B/C-Cookies
|
||||
2. Sammelt deklarierte Kategorie + Speicherdauer pro Cookie pro Site
|
||||
3. Vergleicht mit cookie_library (Open-Cookie-Database + DACH)
|
||||
4. Reportet: new_cookies, kategorie_mismatches, multi_site_inconsistencies
|
||||
|
||||
Run im Container:
|
||||
docker exec bp-compliance-backend python /tmp/enrich.py
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import re
|
||||
import sys
|
||||
from collections import defaultdict
|
||||
from typing import Any
|
||||
|
||||
from database import engine
|
||||
from sqlalchemy import text
|
||||
|
||||
|
||||
def _category_from_text_context(cookie_name: str, doc_text: str) -> str | None:
|
||||
"""Lookup cookie_name in doc_text + extract deklarierte Kategorie aus
|
||||
der Tabellen-Zeile darum (innerhalb 200 Zeichen davor/danach)."""
|
||||
if not doc_text or not cookie_name:
|
||||
return None
|
||||
idx = doc_text.find(cookie_name)
|
||||
if idx < 0:
|
||||
return None
|
||||
window = doc_text[max(0, idx - 50):idx + 400].lower()
|
||||
category_patterns = [
|
||||
(r"(?:strictly[- ])?(?:notwendig|essential|funktional|funktionscookie|"
|
||||
r"funktional cookie|technisch notwendig)", "essential"),
|
||||
(r"(?:tracking|analytics|analyse|statistik|measurement|performance)",
|
||||
"statistics"),
|
||||
(r"(?:marketing|werbung|advertising|targeting|drittanbieter)",
|
||||
"marketing"),
|
||||
(r"(?:social[- ]?media|share|like|like[- ]?button)", "social_media"),
|
||||
]
|
||||
for pat, cat in category_patterns:
|
||||
if re.search(pat, window):
|
||||
return cat
|
||||
return None
|
||||
|
||||
|
||||
def _purpose_text(cookie_name: str, doc_text: str) -> str | None:
|
||||
"""Extract die Zweck-Beschreibung aus dem Doc-Text (Sätze um den Namen)."""
|
||||
if not doc_text or not cookie_name:
|
||||
return None
|
||||
idx = doc_text.find(cookie_name)
|
||||
if idx < 0:
|
||||
return None
|
||||
after = doc_text[idx + len(cookie_name):idx + len(cookie_name) + 400]
|
||||
sentences = re.split(r"[.\n]", after)
|
||||
text_lines = [s.strip() for s in sentences if 30 < len(s.strip()) < 300]
|
||||
return text_lines[0] if text_lines else None
|
||||
|
||||
|
||||
def main() -> int:
|
||||
with engine.connect() as c:
|
||||
rows = c.execute(text(
|
||||
"SELECT id, site_domain, doc_entries, banner_result "
|
||||
"FROM compliance.compliance_check_snapshots"
|
||||
)).fetchall()
|
||||
print(f"Loaded {len(rows)} snapshots", file=sys.stderr)
|
||||
|
||||
# cookie_name -> list of observations
|
||||
observations: dict[str, list[dict]] = defaultdict(list)
|
||||
|
||||
for row in rows:
|
||||
snap_id, domain, doc_entries, banner_result = row
|
||||
doc_entries = doc_entries or []
|
||||
banner_result = banner_result or {}
|
||||
|
||||
# Build combined doc_text fuer Kategorie-Inference (Cookie-Doc bevorzugt,
|
||||
# fallback DSE)
|
||||
doc_text = ""
|
||||
for e in doc_entries:
|
||||
if e.get("doc_type") in ("cookie", "dse"):
|
||||
t = e.get("text") or e.get("full_text") or e.get("text_preview") or ""
|
||||
if len(t) > len(doc_text):
|
||||
doc_text = t
|
||||
|
||||
phases = (banner_result or {}).get("phases", {})
|
||||
for phase_name in ("before_consent", "after_reject", "after_accept"):
|
||||
phase = phases.get(phase_name) or {}
|
||||
if not isinstance(phase, dict):
|
||||
continue
|
||||
cookies = phase.get("cookies") or []
|
||||
for ck in cookies:
|
||||
# Snapshots: cookies sind meist string-Listen (Cookie-Namen),
|
||||
# vereinzelt dicts mit name/domain/max_age.
|
||||
if isinstance(ck, dict):
|
||||
cname = (ck.get("name") or "").strip()
|
||||
cdomain = (ck.get("domain") or "").lstrip(".").lower()
|
||||
max_age = ck.get("max_age") or ck.get("expires")
|
||||
else:
|
||||
cname = str(ck).strip()
|
||||
cdomain = ""
|
||||
max_age = None
|
||||
if not cname or len(cname) > 80:
|
||||
continue
|
||||
cat_declared = _category_from_text_context(cname, doc_text)
|
||||
purpose = _purpose_text(cname, doc_text)
|
||||
observations[cname].append({
|
||||
"site": domain,
|
||||
"phase": phase_name,
|
||||
"cookie_domain": cdomain,
|
||||
"max_age": max_age,
|
||||
"declared_category": cat_declared,
|
||||
"declared_purpose": (purpose[:150] if purpose else None),
|
||||
})
|
||||
|
||||
print(f"\nUnique cookies observed: {len(observations)}\n")
|
||||
|
||||
# Lookup vs cookie_library
|
||||
with engine.connect() as c:
|
||||
lib_rows = c.execute(text(
|
||||
"SELECT cookie_name, actual_category, vendor_name "
|
||||
"FROM compliance.cookie_library"
|
||||
)).fetchall()
|
||||
lib_lookup = {r[0].lower(): {"category": r[1], "vendor": r[2]}
|
||||
for r in lib_rows}
|
||||
|
||||
new_cookies: list[str] = []
|
||||
mismatches: list[dict] = []
|
||||
inconsistencies: list[dict] = []
|
||||
|
||||
for cname, obs_list in observations.items():
|
||||
sites = {o["site"] for o in obs_list}
|
||||
declared_cats = {o["declared_category"] for o in obs_list
|
||||
if o["declared_category"]}
|
||||
|
||||
# 1) Multi-Site Inkonsistenz
|
||||
if len(declared_cats) > 1:
|
||||
inconsistencies.append({
|
||||
"cookie": cname,
|
||||
"sites": list(sites),
|
||||
"categories": list(declared_cats),
|
||||
})
|
||||
|
||||
# 2) Library lookup
|
||||
lib_entry = lib_lookup.get(cname.lower())
|
||||
if not lib_entry:
|
||||
new_cookies.append(cname)
|
||||
continue
|
||||
|
||||
# 3) Mismatch declared vs library
|
||||
for dc in declared_cats:
|
||||
if dc and lib_entry["category"] != dc and lib_entry["category"] != "unknown":
|
||||
mismatches.append({
|
||||
"cookie": cname,
|
||||
"declared_by_site": dc,
|
||||
"library_says": lib_entry["category"],
|
||||
"library_vendor": lib_entry["vendor"],
|
||||
"sites": list(sites),
|
||||
})
|
||||
break
|
||||
|
||||
# === Report ===
|
||||
print("=" * 70)
|
||||
print(f"AUDIT-REPORT: P101 Cookie-Library Auto-Enrich")
|
||||
print(f" Snapshots: {len(rows)}")
|
||||
print(f" Unique cookies observed: {len(observations)}")
|
||||
print(f" In Library (Open-Cookie-DB + DACH): {len(observations) - len(new_cookies)}")
|
||||
print(f" NEW (unbekannt): {len(new_cookies)}")
|
||||
print(f" Mismatches (declared != library): {len(mismatches)}")
|
||||
print(f" Multi-Site Inkonsistenzen: {len(inconsistencies)}")
|
||||
print("=" * 70)
|
||||
|
||||
print("\n--- TOP-20 NEW COOKIES (Kandidaten fuer Library-Enrich) ---")
|
||||
enriched_candidates: list[tuple[str, dict]] = []
|
||||
for cname in new_cookies:
|
||||
obs = observations[cname]
|
||||
cats = [o["declared_category"] for o in obs if o["declared_category"]]
|
||||
primary_cat = cats[0] if cats else None
|
||||
purpose = next((o["declared_purpose"] for o in obs
|
||||
if o["declared_purpose"]), None)
|
||||
sites = sorted({o["site"] for o in obs})
|
||||
if not primary_cat:
|
||||
continue # ohne deklarierte Kategorie nicht enrichbar
|
||||
confidence = min(0.6 + 0.1 * len(sites), 0.95)
|
||||
enriched_candidates.append((cname, {
|
||||
"category": primary_cat,
|
||||
"purpose": purpose,
|
||||
"sites": sites,
|
||||
"confidence": confidence,
|
||||
}))
|
||||
for cname, info in enriched_candidates[:20]:
|
||||
print(f" {cname:30s} [{info['category']:12s}] conf={info['confidence']} "
|
||||
f"sites={info['sites']}")
|
||||
if info.get("purpose"):
|
||||
print(f" purpose: {info['purpose'][:100]}")
|
||||
|
||||
print(f"\n--- ALLE MISMATCHES ({len(mismatches)}) ---")
|
||||
for m in mismatches[:30]:
|
||||
print(f" {m['cookie']:30s} declared={m['declared_by_site']:12s} "
|
||||
f"library={m['library_says']:12s} "
|
||||
f"sites={m['sites']}")
|
||||
|
||||
print(f"\n--- ALLE INKONSISTENZEN ({len(inconsistencies)}) ---")
|
||||
for i in inconsistencies[:30]:
|
||||
print(f" {i['cookie']:30s} cats={i['categories']} sites={i['sites']}")
|
||||
|
||||
# Auto-Insert die mit confidence >= 0.75
|
||||
print(f"\n--- AUTO-INSERTING in cookie_library (confidence>=0.75) ---")
|
||||
inserted = 0
|
||||
with engine.begin() as c:
|
||||
for cname, info in enriched_candidates:
|
||||
if info["confidence"] < 0.75:
|
||||
continue
|
||||
r = c.execute(text("""
|
||||
INSERT INTO compliance.cookie_library
|
||||
(cookie_name, domain_pattern, vendor_name,
|
||||
actual_category, purpose_de,
|
||||
source_name, source_url, source_license, confidence)
|
||||
VALUES (:n, '*', 'Mehrere OEMs (BreakPilot-Snapshot)',
|
||||
:cat, :pd,
|
||||
'BreakPilot-Auto-Enrich', 'https://breakpilot.ai',
|
||||
'CC-BY-eigene-Sammlung', :cf)
|
||||
ON CONFLICT DO NOTHING
|
||||
"""), dict(
|
||||
n=cname[:200],
|
||||
cat=info["category"],
|
||||
pd=info.get("purpose") or f"Beobachtet bei {len(info['sites'])} OEMs",
|
||||
cf=info["confidence"],
|
||||
))
|
||||
inserted += r.rowcount
|
||||
print(f" inserted: {inserted}")
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
sys.exit(main())
|
||||
@@ -0,0 +1,55 @@
|
||||
Name des Cookies
|
||||
Kategorie
|
||||
Verwendungszweck
|
||||
Speicherdauer
|
||||
Art des Cookies
|
||||
VWD6_ENSIGHTEN_PRIVACY_MODAL_LOADED
|
||||
Funktionscookie
|
||||
Dieses Cookie speichert, ob für den User der Cookie Manager angezeigt wurde.
|
||||
1 Jahr
|
||||
Permanent/Protokoll
|
||||
VWD6_ENSIGHTEN_PRIVACY_MODAL_VIEWED
|
||||
Funktionscookie
|
||||
Dieses Cookie speichert, ob für der User Einstellung im Cookie Manager vorgenommen hat.
|
||||
1 Jahr
|
||||
Permanent/Protokoll
|
||||
VWD6_ENSIGHTEN_PRIVACY_<category name>
|
||||
Funktionscookie
|
||||
Dieses Cookie speichert, ob der User sein Einverständnis für die entsprechende Cookie Kategorie gegeben hat.
|
||||
1 Jahr
|
||||
Permanent/Protokoll
|
||||
UZ_TI_dc_value
|
||||
Funktionscookie
|
||||
Dieses Cookie verfolgt die Studien-ID oder die Segment-ID in Abhängigkeit vom Wert von UZ_TI_dc_value.
|
||||
20 Tage
|
||||
Persistent cookie
|
||||
awsalb
|
||||
Funktionscookie
|
||||
Der Cookie prüft, welcher Load Balancer für die aktuelle Session verwendet wird.
|
||||
7 Tage
|
||||
Persistent cookie
|
||||
UZ_TI_S_<ID>
|
||||
Funktionscookie
|
||||
Der Cookie erfasst, ob ein anderer Cookie für jedes Segment verwendet wird.
|
||||
20 Tage
|
||||
Persistent cookie
|
||||
smartSignals2UiD
|
||||
Trackingcookie (Analytics & Personalisierung)
|
||||
Dieses Cookie enthält eine eindeutige, zufällig generierte ID für einen Webseiten User.
|
||||
1 Jahr
|
||||
Permanent/Protokoll
|
||||
smartSignals2sUiD
|
||||
Trackingcookie (Analytics & Personalisierung)
|
||||
userId verbesserter Mechanismus zur Browser-Tracking-Einschraenkungen
|
||||
1 Jahr
|
||||
Permanent/Protokoll
|
||||
smartSignals2CP
|
||||
Trackingcookie (Analytics & Personalisierung)
|
||||
Personalisierte Inhalte angezeigt
|
||||
30 Minuten
|
||||
Session Cookie
|
||||
s_ecid
|
||||
Trackingcookie (Analytics & Personalisierung)
|
||||
First-Party-Cookie Besucherkennung
|
||||
13 Monate nach dem letzten Besuch
|
||||
Permanent/Protokoll
|
||||
@@ -0,0 +1,51 @@
|
||||
{
|
||||
"site": "Volkswagen Deutschland",
|
||||
"site_url": "https://www.volkswagen.de",
|
||||
"captured_at": "2026-05-22T00:00:00Z",
|
||||
"source": "User-Copy aus Cookie-Richtlinie (Browser Strg+A → Strg+C)",
|
||||
"cookie_richtlinie_url": "https://www.volkswagen.de/de/mehr/rechtliches/cookie-richtlinie.html",
|
||||
"expectations": {
|
||||
"min_declared_cookies": 90,
|
||||
"expected_unique_vendors_after_dedup": 18,
|
||||
"must_find_cookies": [
|
||||
"VWD6_ENSIGHTEN_PRIVACY_MODAL_LOADED",
|
||||
"VWD6_ENSIGHTEN_PRIVACY_MODAL_VIEWED",
|
||||
"smartSignals2UiD", "smartSignals2sUiD",
|
||||
"s_ecid", "s_cc", "s_sq",
|
||||
"AMCV_", "AMCVS_", "demdex", "dextp",
|
||||
"mbox", "mboxEdgeCluster",
|
||||
"TDID", "TDCPM", "TTDOptOut",
|
||||
"DSID", "ANID", "AID", "IDE", "TAID",
|
||||
"_gcl_au", "_gcl_dc", "_fbc", "_fbp", "fr",
|
||||
"_pk_uid",
|
||||
"OptanonConsent",
|
||||
"everest_g_v2", "everest_session_v2",
|
||||
"adbCDP",
|
||||
"liveagent_sid", "liveagent_chatted",
|
||||
"X-Salesforce-eLB", "sfdc-stream",
|
||||
"__cfduid", "__cflb",
|
||||
"FPAU", "FPGCLDC", "FLC", "APC",
|
||||
"wlfeDoLogin", "wlfeRefreshSessionId", "LBCOOKIE",
|
||||
"CookieConsentPolicy",
|
||||
"BrowserId", "BrowserId_sec",
|
||||
"inbenta-km-session-id"
|
||||
],
|
||||
"expected_vendors_present": [
|
||||
"Google",
|
||||
"Adobe Experience Cloud",
|
||||
"Adobe Analytics",
|
||||
"The Trade Desk",
|
||||
"AdForm",
|
||||
"Meta / Facebook",
|
||||
"Salesforce",
|
||||
"Cloudflare",
|
||||
"Borlabs"
|
||||
],
|
||||
"expected_high_findings_minimum": 1,
|
||||
"banner_must_be_detected": true,
|
||||
"expected_doc_types_with_text": [
|
||||
"dse", "cookie", "impressum", "nutzungsbedingungen"
|
||||
]
|
||||
},
|
||||
"raw_paste": "Name des Cookies\nKategorie\nVerwendungszweck\nSpeicherdauer\nArt des Cookies\nSee tests/fixtures/cookie_gt/vw_cookie_richtlinie.txt for the abbreviated raw form."
|
||||
}
|
||||
@@ -227,6 +227,70 @@ class TestMarkdownToDocx:
|
||||
assert result[:4] == b"PK\x03\x04"
|
||||
|
||||
|
||||
class TestPerPersonContext:
|
||||
"""Tests fuer per-person Context-Building (IP-Assignment, GF-Vertrag)."""
|
||||
|
||||
def test_build_person_context_ip_areas_as_list(self):
|
||||
from compliance.api.founding_wizard_routes import _build_person_context
|
||||
base = {"COMPANY_NAME": "X GmbH"}
|
||||
person = {
|
||||
"name": "Benjamin Bönisch",
|
||||
"geburtsdatum": "1980-01-01",
|
||||
"adresse": "Test 1",
|
||||
"internal_role": "CEO",
|
||||
"ip_areas": ["Compliance-Engine", "RAG-Pipeline"],
|
||||
}
|
||||
ctx = _build_person_context(base, person, "ip_assignment_agreement")
|
||||
assert ctx["ASSIGNOR_NAME"] == "Benjamin Bönisch"
|
||||
assert "Compliance-Engine" in ctx["IP_LIST_DETAILS"]
|
||||
assert "RAG-Pipeline" in ctx["IP_LIST_DETAILS"]
|
||||
# Two distinct persons should yield distinct IP_LIST_DETAILS
|
||||
person2 = {**person, "name": "Sharang", "ip_areas": ["Security", "Infrastruktur"]}
|
||||
ctx2 = _build_person_context(base, person2, "ip_assignment_agreement")
|
||||
assert ctx["IP_LIST_DETAILS"] != ctx2["IP_LIST_DETAILS"]
|
||||
assert "Security" in ctx2["IP_LIST_DETAILS"]
|
||||
|
||||
def test_build_person_context_fallback_when_no_ip_areas(self):
|
||||
"""Wenn keine ip_areas gesetzt sind, behaelt der Context den Default aus base."""
|
||||
from compliance.api.founding_wizard_routes import _build_person_context
|
||||
base = {"COMPANY_NAME": "X GmbH", "IP_LIST_DETAILS": "- Default IP"}
|
||||
person = {"name": "Foo", "ip_areas": []}
|
||||
ctx = _build_person_context(base, person, "ip_assignment_agreement")
|
||||
assert ctx["IP_LIST_DETAILS"] == "- Default IP"
|
||||
|
||||
def test_safe_slug_handles_special_chars(self):
|
||||
from compliance.api.founding_wizard_routes import _safe_slug
|
||||
assert _safe_slug("Benjamin Bönisch") == "Benjamin_B_nisch"
|
||||
assert _safe_slug("Sharang Parnerkar") == "Sharang_Parnerkar"
|
||||
assert _safe_slug("") == "Person"
|
||||
assert _safe_slug(" ") == "Person"
|
||||
|
||||
def test_per_person_docs_set_contains_expected(self):
|
||||
from compliance.api.founding_wizard_routes import PER_PERSON_DOCS
|
||||
assert "ip_assignment_agreement" in PER_PERSON_DOCS
|
||||
assert "managing_director_employment_contract" in PER_PERSON_DOCS
|
||||
# Satzung etc. duerfen NICHT per-person sein:
|
||||
assert "articles_of_association" not in PER_PERSON_DOCS
|
||||
assert "sha" not in PER_PERSON_DOCS
|
||||
|
||||
|
||||
class TestBasicsRegisterCourt:
|
||||
def test_register_court_propagates(self):
|
||||
state = TestWizardToContext()._basic_state()
|
||||
state["basics"]["register_court"] = "Amtsgericht Stuttgart"
|
||||
state["basics"]["hrb_number"] = "HRB 12345"
|
||||
ctx = base_context(state)
|
||||
assert ctx["REGISTER_COURT"] == "Amtsgericht Stuttgart"
|
||||
assert ctx["COMPANY_REGISTRY_COURT"] == "Amtsgericht Stuttgart"
|
||||
assert ctx["HRB_NUMBER"] == "HRB 12345"
|
||||
assert ctx["HAS_HRB"] is True
|
||||
|
||||
def test_register_court_default_when_missing(self):
|
||||
ctx = base_context(TestWizardToContext()._basic_state())
|
||||
assert "[zuständiges Amtsgericht]" in ctx["REGISTER_COURT"]
|
||||
assert ctx["HAS_HRB"] is False
|
||||
|
||||
|
||||
class TestEndToEndRendering:
|
||||
"""Test mit echtem Template-aehnlichen Markdown + 2-Mann GmbH Daten."""
|
||||
|
||||
|
||||
@@ -28,4 +28,8 @@ USER appuser
|
||||
|
||||
EXPOSE 8094
|
||||
|
||||
# P83 — Build-SHA fuer check-rebuild-needed.sh
|
||||
ARG BUILD_SHA="unknown"
|
||||
ENV BUILD_SHA=${BUILD_SHA}
|
||||
|
||||
CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "8094"]
|
||||
|
||||
@@ -53,6 +53,7 @@ class ScanResponse(BaseModel):
|
||||
cmp_payloads: list[dict] = [] # P48: raw CMP JSON-payloads (Usercentrics/OneTrust/...) captured during scan
|
||||
vendor_details: list[dict] = [] # P50: per-vendor detail-modal-extracts (Beschreibung/Cookies/Opt-Out/Privacy)
|
||||
cookies_detailed: list[dict] = [] # P59b: full cookie details for behavior-validation (name,value,domain,expires,phase,declared_category)
|
||||
banner_screenshot_b64: str = "" # P85: base64-PNG des Banners (initial-view)
|
||||
|
||||
|
||||
@app.get("/health")
|
||||
@@ -133,6 +134,7 @@ async def scan_consent(req: ScanRequest):
|
||||
cmp_payloads=result.cmp_payloads, # P48
|
||||
vendor_details=result.vendor_details, # P50
|
||||
cookies_detailed=result.cookies_detailed, # P59b
|
||||
banner_screenshot_b64=result.banner_screenshot_b64, # P85
|
||||
)
|
||||
|
||||
|
||||
@@ -292,6 +294,10 @@ class DSIDocumentInfo(BaseModel):
|
||||
word_count: int = 0
|
||||
text_preview: str = ""
|
||||
full_text: str = ""
|
||||
# D — Tab-getrennte HTML-Tabellen aus dem DOM (z.B. Cookie-Tabellen).
|
||||
# Pro Tabelle ein Array von Zeilen, jede Zeile Tab-getrennt.
|
||||
# Backend nutzt das fuer deterministischen Cookie-Tabellen-Parse.
|
||||
tables: list[list[str]] = []
|
||||
|
||||
|
||||
class DSIDiscoveryResponse(BaseModel):
|
||||
@@ -347,6 +353,7 @@ async def dsi_discovery(req: DSIDiscoveryRequest):
|
||||
word_count=d.word_count,
|
||||
text_preview=d.text[:500] if d.text else "",
|
||||
full_text=d.text[:200000] if d.text else "",
|
||||
tables=getattr(d, "tables", []) or [],
|
||||
)
|
||||
for d in result.documents
|
||||
],
|
||||
|
||||
@@ -159,8 +159,18 @@ async def check_banner_text(page) -> dict:
|
||||
# (e.g. only "Nur technisch Notwendige" — semantically
|
||||
# a reject but EDPB 5/2020 + DSK-OH 2024 prefer explicit
|
||||
# labeling so users recognize it as the reject option)
|
||||
explicit_reject_texts = ["ablehnen", "reject", "alle ablehnen",
|
||||
"decline", "alles ablehnen"]
|
||||
# P93: EDPB 5/2020 schreibt kein bestimmtes Wort vor — Reject-
|
||||
# Mechanismus muss gleichwertig zur Annahme sein. BMW nutzt
|
||||
# "Cookies verbieten", andere Sites "Tracking ablehnen" o.ae. —
|
||||
# alle rechtlich gleichwertig.
|
||||
explicit_reject_texts = [
|
||||
"ablehnen", "reject", "alle ablehnen",
|
||||
"decline", "alles ablehnen",
|
||||
"cookies verbieten", "cookies blockieren",
|
||||
"tracking ablehnen", "tracking verbieten",
|
||||
"zurueckweisen", "block all", "deny all",
|
||||
"alle verweigern", "verweigern",
|
||||
]
|
||||
implicit_reject_texts = ["nur notwendige", "nur technisch", "nur essenzielle",
|
||||
"nur essentielle", "notwendige akzeptieren",
|
||||
"essential only", "only necessary",
|
||||
@@ -186,6 +196,33 @@ async def check_banner_text(page) -> dict:
|
||||
legal_ref="EDPB 5/2020 (Consent) + DSK-OH 2024 (Telemedien)",
|
||||
))
|
||||
|
||||
# P100: Granular-Wahl-Pruefung — "Anpassen"/"Einstellungen"-Button
|
||||
# im Initial-Banner. Wenn er FEHLT (VW-Pattern), ist die granulare
|
||||
# Cookie-Wahl erst nach Akzeptanz/Ablehnung moeglich — faktische
|
||||
# Manipulation Richtung "Alle akzeptieren". EDPB 5/2020 §82.
|
||||
granular_button_texts = [
|
||||
"anpassen", "einstellungen", "cookie-einstellungen",
|
||||
"cookies verwalten", "manage cookies", "customize",
|
||||
"weitere optionen", "more options", "settings",
|
||||
"individuell", "detaillierte einstellungen",
|
||||
"praeferenzen", "preferences",
|
||||
]
|
||||
has_granular_button = any(t in banner_lower for t in granular_button_texts)
|
||||
if not has_granular_button:
|
||||
violations.append(Violation(
|
||||
service="Cookie-Banner",
|
||||
severity="HIGH",
|
||||
text="Granulare Cookie-Auswahl im Initial-Banner nicht "
|
||||
"moeglich (kein 'Anpassen'/'Einstellungen'-Button). "
|
||||
"Nutzer koennen nur 'Alle akzeptieren' oder 'Nur "
|
||||
"technisch notwendige' waehlen — Detailwahl pro "
|
||||
"Kategorie erst nach Akzeptanz/Ablehnung. Das ist "
|
||||
"faktische Manipulation Richtung Pauschal-Akzeptanz.",
|
||||
legal_ref="EDPB Guidelines 5/2020 §82 (granular consent), "
|
||||
"§25 Abs. 1 TDDDG, Art. 4(11) DSGVO (informierte "
|
||||
"Einwilligung)",
|
||||
))
|
||||
|
||||
# Check 5: Pre-ticked checkboxes (EuGH Planet49)
|
||||
try:
|
||||
pre_checked = await page.evaluate("""
|
||||
|
||||
@@ -77,6 +77,10 @@ class ConsentTestResult:
|
||||
# for behavior-validation in backend. Implicit declared_category:
|
||||
# before/reject phase = essential (site claims), accept = any.
|
||||
cookies_detailed: list = field(default_factory=list)
|
||||
# P85: base64-PNG-Screenshot des Banners vor dem ersten Klick.
|
||||
# Backend embedded das als <img> in der Mail — visueller Beweis
|
||||
# "so sah das Banner zum Audit-Zeitpunkt aus".
|
||||
banner_screenshot_b64: str = ""
|
||||
|
||||
|
||||
async def run_consent_test(
|
||||
@@ -196,6 +200,17 @@ async def run_consent_test(
|
||||
result.banner_text_violations = banner_violations["violations"]
|
||||
result.banner_has_impressum_link = banner_violations["has_impressum"]
|
||||
result.banner_has_dse_link = banner_violations["has_dse"]
|
||||
# P85 — visueller Beweis fuer die Mail.
|
||||
try:
|
||||
import base64 as _b64
|
||||
png = await page_a.screenshot(
|
||||
full_page=False, type="png", timeout=10000,
|
||||
)
|
||||
if png and len(png) < 1_500_000: # < 1.5 MB
|
||||
result.banner_screenshot_b64 = _b64.b64encode(png).decode("ascii")
|
||||
logger.info("P85: banner screenshot captured (%d bytes)", len(png))
|
||||
except Exception as _se:
|
||||
logger.warning("P85: banner screenshot failed: %s", _se)
|
||||
|
||||
await ctx_a.close()
|
||||
|
||||
|
||||
@@ -159,6 +159,10 @@ class DiscoveredDSI:
|
||||
text: str = "" # Extracted full text
|
||||
sections: list[dict] = field(default_factory=list) # Parsed sections
|
||||
word_count: int = 0
|
||||
# D — Tab-getrennte HTML-Tabellen aus dem DOM. Pro Tabelle eine
|
||||
# Liste von Zeilen, jede Zeile ein Tab-getrennter String. Erlaubt
|
||||
# dem Backend deterministischen Cookie-Tabellen-Parse ohne LLM.
|
||||
tables: list[list[str]] = field(default_factory=list)
|
||||
|
||||
@dataclass
|
||||
class DSIDiscoveryResult:
|
||||
@@ -178,6 +182,35 @@ class DSIDiscoveryResult:
|
||||
# not the homepage navigation that DOM extraction returns.
|
||||
cmp_cookie_text: str = ""
|
||||
|
||||
async def _extract_dom_tables(page) -> list[list[str]]:
|
||||
"""D — extrahiert alle <table>-Elemente aus dem aktuellen DOM als
|
||||
list[list[str]] (jede Tabelle = Array von Tab-getrennten Zeilen).
|
||||
|
||||
Wird VOR der Navigation woandershin von jeder Document-Loading-
|
||||
Funktion aufgerufen damit jede DiscoveredDSI ihre Tabellen behaelt.
|
||||
"""
|
||||
try:
|
||||
return await page.evaluate("""
|
||||
() => {
|
||||
const out = [];
|
||||
document.querySelectorAll('table').forEach(t => {
|
||||
const rows = [];
|
||||
t.querySelectorAll('tr').forEach(tr => {
|
||||
const cells = [];
|
||||
tr.querySelectorAll('th, td').forEach(c => {
|
||||
cells.push((c.innerText || c.textContent || '').trim().replace(/\\s+/g, ' '));
|
||||
});
|
||||
if (cells.length >= 2) rows.push(cells.join('\\t'));
|
||||
});
|
||||
if (rows.length >= 3) out.push(rows);
|
||||
});
|
||||
return out.slice(0, 10);
|
||||
}
|
||||
""") or []
|
||||
except Exception:
|
||||
return []
|
||||
|
||||
|
||||
def _matches_dsi_keyword(text: str) -> tuple[bool, str]:
|
||||
"""Check if text contains any DSI keyword. Returns (match, language)."""
|
||||
text_lower = text.lower().strip()
|
||||
@@ -506,14 +539,42 @@ async def discover_dsi_documents(
|
||||
];
|
||||
for (const sel of selectors) {
|
||||
const el = document.querySelector(sel);
|
||||
if (el && el.textContent.trim().length > 200) {
|
||||
return el.textContent.trim();
|
||||
if (el) {
|
||||
// P98: innerText statt textContent — innerText
|
||||
// respektiert Whitespace zwischen Block-Elementen.
|
||||
// textContent verkettet HTML-Tabellen-Zellen ohne
|
||||
// Spaces (VW-Cookie-Tabelle: ~100 Cookie-Namen
|
||||
// wurden zu einem Klumpen "smartSignals2UiDsmartSignals2sUiD...").
|
||||
const txt = (el.innerText || el.textContent || '').trim();
|
||||
if (txt.length > 200) return txt;
|
||||
}
|
||||
}
|
||||
// Fallback: full body minus nav/header/footer
|
||||
const body = document.body.cloneNode(true);
|
||||
body.querySelectorAll('nav, header, footer, script, style, [class*="nav"], [class*="sidebar"]').forEach(e => e.remove());
|
||||
return body.textContent?.trim() || '';
|
||||
// P98: innerText respektiert Whitespace (s.o.)
|
||||
return (body.innerText || body.textContent || '').trim();
|
||||
}
|
||||
""")
|
||||
# D — HTML-Tabellen separat extrahieren. Pro Tabelle ein
|
||||
# Array von Zeilen, jede Zeile ein Tab-getrennter String.
|
||||
# Das erlaubt dem Backend deterministischen Spalten-Parse
|
||||
# (cookies_table_parser) ohne LLM-Halluzinationen.
|
||||
tables = await page.evaluate("""
|
||||
() => {
|
||||
const out = [];
|
||||
document.querySelectorAll('table').forEach(t => {
|
||||
const rows = [];
|
||||
t.querySelectorAll('tr').forEach(tr => {
|
||||
const cells = [];
|
||||
tr.querySelectorAll('th, td').forEach(c => {
|
||||
cells.push((c.innerText || c.textContent || '').trim().replace(/\\s+/g, ' '));
|
||||
});
|
||||
if (cells.length >= 2) rows.push(cells.join('\\t'));
|
||||
});
|
||||
if (rows.length >= 3) out.push(rows);
|
||||
});
|
||||
return out;
|
||||
}
|
||||
""")
|
||||
if text and len(text) > 50:
|
||||
@@ -522,6 +583,7 @@ async def discover_dsi_documents(
|
||||
language=lang,
|
||||
doc_type="cross_domain" if not _is_allowed_domain(href, base_domain) else "html_page",
|
||||
text=text[:200000], word_count=len(text.split()),
|
||||
tables=(tables or [])[:10],
|
||||
))
|
||||
|
||||
# Recursive: search THIS page for more DSI links
|
||||
|
||||
Executable
+49
@@ -0,0 +1,49 @@
|
||||
#!/usr/bin/env bash
|
||||
# P83 — verhindert "alter Code im Container"-Bug.
|
||||
#
|
||||
# Vergleicht den im Container deployten git-SHA mit dem aktuellen
|
||||
# Source-SHA. Wenn abweichend → exit 1 mit Hinweis Build/Recreate.
|
||||
#
|
||||
# Aufruf-Beispiele:
|
||||
# ./scripts/check-rebuild-needed.sh backend-compliance
|
||||
# ./scripts/check-rebuild-needed.sh admin-compliance
|
||||
# ./scripts/check-rebuild-needed.sh consent-tester
|
||||
#
|
||||
# CI-Verwendung: nach git push, vor dem ersten Health-Check.
|
||||
# Lokal: claude / dev kann es via pre-merge-hook nutzen.
|
||||
#
|
||||
# Voraussetzung: Container hat BUILD_SHA env (gesetzt im Dockerfile via
|
||||
# ARG BUILD_SHA + ENV BUILD_SHA=$BUILD_SHA). Falls leer → Warnung.
|
||||
|
||||
set -e
|
||||
|
||||
SERVICE="${1:-backend-compliance}"
|
||||
CONTAINER="bp-compliance-${SERVICE#*-}" # backend-compliance → bp-compliance-backend
|
||||
if [[ "$SERVICE" == "consent-tester" ]]; then
|
||||
CONTAINER="bp-compliance-consent-tester"
|
||||
fi
|
||||
|
||||
DOCKER="${DOCKER:-/usr/local/bin/docker}"
|
||||
|
||||
deployed_sha=$($DOCKER exec "$CONTAINER" sh -c 'echo "${BUILD_SHA:-unknown}"' 2>/dev/null || echo "container-down")
|
||||
local_sha=$(git rev-parse --short HEAD)
|
||||
|
||||
if [[ "$deployed_sha" == "container-down" ]]; then
|
||||
echo "❌ Container $CONTAINER is not running"
|
||||
exit 2
|
||||
fi
|
||||
|
||||
if [[ "$deployed_sha" == "unknown" ]]; then
|
||||
echo "⚠️ $CONTAINER has no BUILD_SHA env — cannot verify."
|
||||
echo " Add to Dockerfile: ARG BUILD_SHA / ENV BUILD_SHA=\$BUILD_SHA"
|
||||
exit 0
|
||||
fi
|
||||
|
||||
if [[ "$deployed_sha" != "$local_sha"* && "$local_sha" != "$deployed_sha"* ]]; then
|
||||
echo "❌ $CONTAINER is on commit $deployed_sha, local is $local_sha"
|
||||
echo " REBUILD REQUIRED:"
|
||||
echo " docker compose build $SERVICE && docker compose up -d --no-deps --force-recreate $SERVICE"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
echo "✓ $CONTAINER ($deployed_sha) matches local ($local_sha)"
|
||||
Reference in New Issue
Block a user