feat: 4 remaining tasks — EU institutions, banner integration, JS-sites, Caritas fixes
Build + Deploy / build-ai-sdk (push) Failing after 36s
Build + Deploy / build-developer-portal (push) Successful in 8s
Build + Deploy / build-tts (push) Successful in 7s
Build + Deploy / build-document-crawler (push) Successful in 7s
Build + Deploy / build-admin-compliance (push) Successful in 8s
Build + Deploy / build-backend-compliance (push) Successful in 8s
CI / nodejs-build (push) Successful in 3m14s
CI / dep-audit (push) Has been skipped
CI / sbom-scan (push) Has been skipped
CI / test-go (push) Failing after 46s
CI / test-python-backend (push) Successful in 43s
CI / test-python-document-crawler (push) Successful in 29s
CI / test-python-dsms-gateway (push) Successful in 30s
CI / validate-canonical-controls (push) Successful in 16s
Build + Deploy / build-dsms-gateway (push) Successful in 8s
Build + Deploy / build-dsms-node (push) Successful in 8s
CI / branch-name (push) Has been skipped
Build + Deploy / trigger-orca (push) Has been skipped
CI / guardrail-integrity (push) Has been skipped
CI / loc-budget (push) Failing after 17s
CI / secret-scan (push) Has been skipped
CI / go-lint (push) Has been skipped
CI / python-lint (push) Has been skipped
CI / nodejs-lint (push) Has been skipped

1. EU Institution Checks (Verordnung 2018/1725):
   - New doc_type "eu_institution" with 9 L1 + 15 L2 checks
   - Both German + English patterns (EU institutions are multilingual)
   - Auto-detection via "2018/1725", "EDSB", "EDPS" keywords
   - Correct article references (Art. 15 instead of 13, Art. 5 instead of 6)

2. Banner Check Integration:
   - banner_runner.py maps scan results to 36 L1/L2 structured checks
   - BannerCheckTab shows hierarchical ChecklistView with hints
   - 3-phase summary (cookies/scripts before/after consent)
   - /scan endpoint now includes structured_checks in response

3. JS-heavy Website Fixes (dm, Zalando, HWK):
   - dsi_helpers.py: goto_resilient (networkidle→domcontentloaded fallback)
   - try_dismiss_consent_banner before text extraction
   - PDF redirect detection (dm.de redirects to GCS PDF)

4. Caritas False Positive Fixes:
   - Phone regex allows parentheses: +49 (0)761 → now matches
   - "Recht auf Widerspruch" (3 words) + §23 KDG → matches Art. 21
   - Church authorities: "Katholisches Datenschutzzentrum" recognized

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
Benjamin Admin
2026-05-08 01:10:10 +02:00
parent 89af88ef7d
commit 686834cea0
11 changed files with 1039 additions and 171 deletions
@@ -1,19 +1,35 @@
'use client'
import React, { useState } from 'react'
import { ChecklistView } from './ChecklistView'
interface CheckItem {
id: string
label: string
passed: boolean
severity: string
matched_text: string
level?: number
parent?: string | null
skipped?: boolean
hint?: string
}
interface BannerResult {
banner_detected: boolean
banner_provider: string
banner_text: string
banner_checks?: {
violations: { code: string; text: string; severity: string }[]
passes: { code: string; text: string }[]
has_impressum_link?: boolean
has_dse_link?: boolean
}
structured_checks?: CheckItem[]
completeness_pct?: number
correctness_pct?: number
phases?: {
before_consent: { cookies: number; scripts: number; violations: string[] }
after_reject: { cookies: number; scripts: number; violations: string[] }
after_accept: { cookies: number; scripts: number; violations: string[] }
before_consent: { cookies: string[]; scripts: string[]; tracking_services: string[]; violations: any[] }
after_reject: { cookies: string[]; scripts: string[]; new_tracking: string[]; violations: any[] }
after_accept: { cookies: string[]; scripts: string[]; new_tracking: string[]; undocumented: string[] }
}
}
@@ -43,7 +59,6 @@ export function BannerCheckTab() {
const data = await res.json()
if (data.scan_id) {
// Async polling
let attempts = 0
while (attempts < 60) {
await new Promise(r => setTimeout(r, 3000))
@@ -69,9 +84,23 @@ export function BannerCheckTab() {
}
}
const violations = result?.banner_checks?.violations || []
const passes = result?.banner_checks?.passes || []
const total = violations.length + passes.length
const structuredChecks = result?.structured_checks || []
const hasStructured = structuredChecks.length > 0
const compPct = result?.completeness_pct ?? 0
const corrPct = result?.correctness_pct ?? 0
// Build ChecklistView-compatible result for structured checks
const checklistResults = hasStructured ? [{
label: `Cookie-Banner: ${result?.banner_provider || 'Unbekannt'}`,
url: url,
doc_type: 'banner',
word_count: 0,
completeness_pct: compPct,
correctness_pct: corrPct,
checks: structuredChecks,
findings_count: structuredChecks.filter(c => !c.passed && !c.skipped).length,
error: '',
}] : []
return (
<div className="space-y-4">
@@ -79,7 +108,7 @@ export function BannerCheckTab() {
<h3 className="text-sm font-semibold text-blue-900">Cookie-Banner Compliance Check</h3>
<p className="text-xs text-blue-700 mt-1">
Playwright-basierter 3-Phasen-Test: Vor Interaktion, nach Ablehnen, nach Akzeptieren.
Prueft Dark Patterns, Pre-Consent-Cookies, Farbkontrast, Klick-Paritaet und 20+ weitere Kriterien.
Prueft Dark Patterns, Pre-Consent-Cookies, Farbkontrast, Klick-Paritaet und 36 weitere Kriterien.
</p>
</div>
@@ -116,14 +145,14 @@ export function BannerCheckTab() {
)}
{result && (
<div className="bg-white border border-gray-200 rounded-xl shadow-sm overflow-hidden">
{/* Header */}
<div className="px-6 py-4 bg-gray-50 border-b border-gray-200">
<div className="flex items-center justify-between">
<div>
<div className="space-y-4">
{/* 3-Phase Summary Card */}
{result.phases && (
<div className="bg-white border border-gray-200 rounded-xl shadow-sm overflow-hidden">
<div className="px-6 py-4 bg-gray-50 border-b border-gray-200">
<div className="flex items-center gap-3">
<span className={`text-2xl`}>
{result.banner_detected ? '🛡️' : '⚠️'}
<span className="text-2xl">
{result.banner_detected ? '\u{1F6E1}\u{FE0F}' : '\u26A0\u{FE0F}'}
</span>
<div>
<h3 className="text-sm font-semibold text-gray-900">
@@ -131,98 +160,50 @@ export function BannerCheckTab() {
? `Banner erkannt: ${result.banner_provider || 'Unbekannter Anbieter'}`
: 'Kein Cookie-Banner erkannt'}
</h3>
{total > 0 && (
<p className="text-xs text-gray-500 mt-0.5">
{passes.length}/{total} Pruefungen bestanden
</p>
)}
<p className="text-xs text-gray-500 mt-0.5">
3-Phasen-Analyse: Cookies und Scripts vor/nach Interaktion
</p>
</div>
</div>
</div>
{total > 0 && (
<div className="flex items-center gap-2">
<div className="w-24 h-2 bg-gray-200 rounded-full overflow-hidden">
<div
className={`h-full rounded-full ${violations.length === 0 ? 'bg-green-500' : violations.length <= 3 ? 'bg-yellow-500' : 'bg-red-500'}`}
style={{ width: `${Math.round(passes.length / total * 100)}%` }}
/>
</div>
<span className={`text-xs font-medium ${violations.length === 0 ? 'text-green-700' : 'text-red-700'}`}>
{Math.round(passes.length / total * 100)}%
</span>
</div>
)}
</div>
</div>
{/* 3-Phase Summary */}
{result.phases && (
<div className="px-6 py-3 border-b border-gray-100 grid grid-cols-3 gap-4">
{[
{ label: 'Vor Consent', data: result.phases.before_consent, icon: '🔒' },
{ label: 'Nach Ablehnen', data: result.phases.after_reject, icon: '🚫' },
{ label: 'Nach Akzeptieren', data: result.phases.after_accept, icon: '✅' },
].map(phase => (
<div key={phase.label} className="text-center">
<div className="text-lg">{phase.icon}</div>
<div className="text-xs font-medium text-gray-700">{phase.label}</div>
<div className="text-xs text-gray-500 mt-1">
{phase.data.cookies} Cookies, {phase.data.scripts} Scripts
</div>
{phase.data.violations.length > 0 && (
<div className="text-xs text-red-600 font-medium">
{phase.data.violations.length} Verstoesse
</div>
)}
</div>
))}
</div>
)}
{/* Violations */}
{violations.length > 0 && (
<div className="px-6 py-4">
<h4 className="text-xs font-semibold text-red-700 uppercase tracking-wide mb-2">
Verstoesse ({violations.length})
</h4>
<div className="space-y-2">
{violations.map((v, i) => (
<div key={i} className="flex items-start gap-2">
<svg className="w-4 h-4 text-red-500 mt-0.5 shrink-0" fill="none" stroke="currentColor" viewBox="0 0 24 24">
<path strokeLinecap="round" strokeLinejoin="round" strokeWidth={2} d="M6 18L18 6M6 6l12 12" />
</svg>
<div>
<div className="text-sm text-red-700">{v.text}</div>
<div className="text-xs text-gray-400 mt-0.5">{v.code} | {v.severity}</div>
</div>
</div>
))}
<div className="px-6 py-3 grid grid-cols-3 gap-4">
<PhaseBox
label="Vor Consent"
icon="\uD83D\uDD12"
cookies={result.phases.before_consent.cookies?.length ?? 0}
scripts={result.phases.before_consent.scripts?.length ?? 0}
violations={result.phases.before_consent.violations?.length ?? 0}
/>
<PhaseBox
label="Nach Ablehnen"
icon="\uD83D\uDEAB"
cookies={result.phases.after_reject.cookies?.length ?? 0}
scripts={result.phases.after_reject.scripts?.length ?? 0}
violations={result.phases.after_reject.violations?.length ?? 0}
/>
<PhaseBox
label="Nach Akzeptieren"
icon="\u2705"
cookies={result.phases.after_accept.cookies?.length ?? 0}
scripts={result.phases.after_accept.scripts?.length ?? 0}
violations={0}
/>
</div>
</div>
)}
{/* Passes */}
{passes.length > 0 && (
<div className="px-6 py-4 border-t border-gray-100">
<h4 className="text-xs font-semibold text-green-700 uppercase tracking-wide mb-2">
Bestanden ({passes.length})
</h4>
<div className="space-y-1">
{passes.map((p, i) => (
<div key={i} className="flex items-start gap-2">
<svg className="w-4 h-4 text-green-500 mt-0.5 shrink-0" fill="none" stroke="currentColor" viewBox="0 0 24 24">
<path strokeLinecap="round" strokeLinejoin="round" strokeWidth={2} d="M5 13l4 4L19 7" />
</svg>
<div className="text-sm text-gray-600">{p.text}</div>
</div>
))}
</div>
{/* Structured L1/L2 Checklist */}
{hasStructured && (
<div className="bg-white border border-gray-200 rounded-xl p-6 shadow-sm">
<ChecklistView results={checklistResults} />
</div>
)}
{!result.banner_detected && violations.length === 0 && passes.length === 0 && (
<div className="px-6 py-4 text-sm text-gray-500">
Kein Cookie-Banner auf dieser Seite gefunden. Falls Cookies gesetzt werden, ist ein Banner nach §25 TDDDG Pflicht.
{!result.banner_detected && !hasStructured && (
<div className="bg-white border border-gray-200 rounded-xl p-6 shadow-sm">
<p className="text-sm text-gray-500">
Kein Cookie-Banner auf dieser Seite gefunden. Falls Cookies gesetzt werden, ist ein Banner nach ss25 TDDDG Pflicht.
</p>
</div>
)}
</div>
@@ -230,3 +211,22 @@ export function BannerCheckTab() {
</div>
)
}
function PhaseBox({ label, icon, cookies, scripts, violations }: {
label: string; icon: string; cookies: number; scripts: number; violations: number
}) {
return (
<div className="text-center">
<div className="text-lg">{icon}</div>
<div className="text-xs font-medium text-gray-700">{label}</div>
<div className="text-xs text-gray-500 mt-1">
{cookies} Cookies, {scripts} Scripts
</div>
{violations > 0 && (
<div className="text-xs text-red-600 font-medium">
{violations} Verstoesse
</div>
)}
</div>
)
}
@@ -30,6 +30,7 @@ const DOC_TYPE_LABELS: Record<string, string> = {
dse: 'DSI', agb: 'AGB', impressum: 'Impressum',
cookie: 'Cookie', widerruf: 'Widerruf', other: 'Sonstiges',
social_media: 'Social Media', dsfa: 'DSFA', joint_controller: 'Art. 26',
eu_institution: 'EU-Inst.', banner: 'Banner',
}
interface GroupedCheck {