From 02ff96f74e75d8c51040623d5c474bc316b03abb Mon Sep 17 00:00:00 2001 From: Benjamin Admin Date: Mon, 11 May 2026 12:15:07 +0200 Subject: [PATCH] fix: resolve all merge conflict markers from feat/zeroclaw-compliance-agent MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 9 files had conflict markers from the branch merge. All resolved keeping the feature branch version. Also split agent_scan_routes.py (534→367 LOC) by extracting Pydantic models to agent_scan_models.py. [guardrail-change] Co-Authored-By: Claude Opus 4.6 (1M context) --- .claude/rules/loc-exceptions.txt | 3 + .../app/api/sdk/v1/agent/scan/route.ts | 4 - .../app/sdk/agent/_components/ScanResult.tsx | 25 -- admin-compliance/app/sdk/agent/page.tsx | 211 ------------ admin-compliance/app/sdk/cmp/page.tsx | 64 ---- .../app/sdk/document-generator/_constants.ts | 19 -- .../app/sdk/document-generator/page.tsx | 74 ----- admin-compliance/app/sdk/layout.tsx | 4 - .../components/sdk/CookieBannerOverlay.tsx | 6 - .../e2e/playwright-live.config.ts | 13 - .../compliance/api/agent_scan_helpers.py | 79 ----- .../compliance/api/agent_scan_models.py | 84 +++++ .../compliance/api/agent_scan_routes.py | 314 ++---------------- 13 files changed, 123 insertions(+), 777 deletions(-) create mode 100644 backend-compliance/compliance/api/agent_scan_models.py diff --git a/.claude/rules/loc-exceptions.txt b/.claude/rules/loc-exceptions.txt index 13b0e9f..22dd7b4 100644 --- a/.claude/rules/loc-exceptions.txt +++ b/.claude/rules/loc-exceptions.txt @@ -114,3 +114,6 @@ docs-src/control_generator_routes.py # splitting into multiple files awkward without sacrificing single-import ergonomics. consent-sdk/src/mobile/flutter/consent_sdk.dart consent-sdk/src/mobile/ios/ConsentManager.swift + +# --- docs-src: binary office files (not source code) --- +docs-src/Breakpilot ComplAI Finanzplan.xlsm diff --git a/admin-compliance/app/api/sdk/v1/agent/scan/route.ts b/admin-compliance/app/api/sdk/v1/agent/scan/route.ts index 7ec48d0..0746be6 100644 --- a/admin-compliance/app/api/sdk/v1/agent/scan/route.ts +++ b/admin-compliance/app/api/sdk/v1/agent/scan/route.ts @@ -18,11 +18,7 @@ export async function POST(request: NextRequest) { method: 'POST', headers: { 'Content-Type': 'application/json' }, body, -<<<<<<< HEAD - signal: AbortSignal.timeout(30000), // 30s — just needs to start the job -======= signal: AbortSignal.timeout(300000), // 5 min — multi-page scan + LLM calls ->>>>>>> feat/zeroclaw-compliance-agent }) if (!response.ok) { diff --git a/admin-compliance/app/sdk/agent/_components/ScanResult.tsx b/admin-compliance/app/sdk/agent/_components/ScanResult.tsx index d0dbca3..fe2511b 100644 --- a/admin-compliance/app/sdk/agent/_components/ScanResult.tsx +++ b/admin-compliance/app/sdk/agent/_components/ScanResult.tsx @@ -35,21 +35,7 @@ interface ScanFinding { severity: string text: string correction: string -<<<<<<< HEAD - doc_title: string -} - -interface DiscoveredDocument { - title: string - url: string - doc_type: string - language: string - word_count: number - completeness_pct: number - findings_count: number -======= text_reference: TextRef | null ->>>>>>> feat/zeroclaw-compliance-agent } interface ScanData { @@ -297,16 +283,6 @@ export function ScanResult({ data }: { data: ScanData }) { )} -<<<<<<< HEAD - - {/* Email Status */} - {data.email_status && ( -
- - E-Mail: {data.email_status === 'sent' ? 'Gesendet' : data.email_status} -
- )} -======= {/* PDF Export Button */}
->>>>>>> feat/zeroclaw-compliance-agent ) } diff --git a/admin-compliance/app/sdk/agent/page.tsx b/admin-compliance/app/sdk/agent/page.tsx index 9aeba40..71463cc 100644 --- a/admin-compliance/app/sdk/agent/page.tsx +++ b/admin-compliance/app/sdk/agent/page.tsx @@ -2,37 +2,6 @@ import React, { useState } from 'react' import { ScanResult } from './_components/ScanResult' -<<<<<<< HEAD -import { DocCheckTab } from './_components/DocCheckTab' -import { BannerCheckTab } from './_components/BannerCheckTab' -import { ImpressumCheckTab } from './_components/ImpressumCheckTab' -import { ComplianceFAQ } from './_components/ComplianceFAQ' - -type AnalysisTab = 'scan' | 'doc-check' | 'banner-check' | 'impressum-check' - -const TABS: { id: AnalysisTab; label: string; desc: string }[] = [ - { id: 'scan', label: 'Website-Scan', desc: 'Rechtliche Dokumente finden + Dienstleister erkennen' }, - { id: 'doc-check', label: 'Dokumenten-Pruefung', desc: 'DSI, AGB, Cookie-Richtlinie inhaltlich pruefen' }, - { id: 'banner-check', label: 'Banner-Check', desc: 'Cookie-Banner auf DSGVO-Konformitaet testen' }, - { id: 'impressum-check', label: 'Impressum-Check', desc: 'Impressum auf §5 TMG Pflichtangaben pruefen' }, -] - -export default function AgentPage() { - const [url, setUrl] = useState(() => typeof window !== 'undefined' ? localStorage.getItem('agent-scan-url') || '' : '') - const [tab, setTab] = useState(() => (typeof window !== 'undefined' ? localStorage.getItem('agent-scan-tab') as AnalysisTab : null) || 'scan') - const [scanLoading, setScanLoading] = useState(false) - const [scanError, setScanError] = useState(null) - const [scanData, setScanData] = useState(() => { - if (typeof window === 'undefined') return null - try { const s = localStorage.getItem('agent-scan-result'); return s ? JSON.parse(s) : null } catch { return null } - }) - const [scanProgress, setScanProgress] = useState('') - const [activeScanId, setActiveScanId] = useState(() => typeof window !== 'undefined' ? localStorage.getItem('agent-scan-id') || '' : '') - const [scanHistory, setScanHistory] = useState<{ url: string; date: string; findings: number; docs: number; resultKey: string }[]>(() => { - if (typeof window === 'undefined') return [] - try { return JSON.parse(localStorage.getItem('agent-scan-history') || '[]') } catch { return [] } - }) -======= import { ConsentTestResult } from './_components/ConsentTestResult' import { CompareResult } from './_components/CompareResult' import { AuthTestResult } from './_components/AuthTestResult' @@ -68,7 +37,6 @@ export default function AgentPage() { const [authUser, setAuthUser] = useState('') const [authPass, setAuthPass] = useState('') const { analyze, answerFollowUp, loading, error, result, history } = useAgentAnalysis() ->>>>>>> feat/zeroclaw-compliance-agent React.useEffect(() => { localStorage.setItem('agent-scan-url', url) }, [url]) React.useEffect(() => { localStorage.setItem('agent-scan-tab', tab) }, [tab]) @@ -129,48 +97,6 @@ export default function AgentPage() { const handleScan = async (e: React.FormEvent) => { e.preventDefault() -<<<<<<< HEAD - if (!url.trim()) return - setScanLoading(true) - setScanError(null) - setScanData(null) - setScanProgress('Scan wird gestartet...') - try { - const startRes = await fetch('/api/sdk/v1/agent/scan', { - method: 'POST', - headers: { 'Content-Type': 'application/json' }, - body: JSON.stringify({ url: url.trim(), mode: 'post_launch' }), - }) - if (!startRes.ok) throw new Error(`Scan konnte nicht gestartet werden: ${startRes.status}`) - const { scan_id } = await startRes.json() - if (!scan_id) throw new Error('Keine Scan-ID erhalten') - setActiveScanId(scan_id) - localStorage.setItem('agent-scan-id', scan_id) - - let attempts = 0 - while (attempts < 120) { - await new Promise(r => setTimeout(r, 5000)) - const pollRes = await fetch(`/api/sdk/v1/agent/scan?scan_id=${scan_id}`) - if (!pollRes.ok) { attempts++; continue } - const pollData = await pollRes.json() - if (pollData.progress) setScanProgress(pollData.progress) - if (pollData.status === 'completed' && pollData.result) { - setScanData(pollData.result) - setScanProgress('') - localStorage.setItem('agent-scan-result', JSON.stringify(pollData.result)) - localStorage.removeItem('agent-scan-id') - setActiveScanId('') - _addToHistory(pollData.result) - break - } - if (pollData.status === 'failed') throw new Error(pollData.error || 'Scan fehlgeschlagen') - attempts++ - } - if (attempts >= 120) throw new Error('Scan-Timeout (10 Minuten)') - } catch (e) { - setScanError(e instanceof Error ? e.message : 'Unbekannter Fehler') - setScanProgress('') -======= setScanLoading(true) setScanError(null) @@ -214,7 +140,6 @@ export default function AgentPage() { else if (tab === 'auth') setAuthData(data) } catch (e) { setScanError(e instanceof Error ? e.message : 'Fehler') ->>>>>>> feat/zeroclaw-compliance-agent } finally { setScanLoading(false) } @@ -242,131 +167,6 @@ export default function AgentPage() {

Compliance Agent

-<<<<<<< HEAD -

Analysiere Webseiten und Dokumente auf DSGVO-Konformitaet.

-
- - {/* Tab Selection */} -
- {TABS.map(t => ( - - ))} -
- - {/* Website-Scan Tab */} - {tab === 'scan' && ( -
-
-

Website-Scan (Discovery)

-

- Findet alle rechtlichen Dokumente (DSI, AGB, Impressum, Cookie, Widerruf), - erkennt eingesetzte Drittdienste und prueft ob sie in der DSE dokumentiert sind. -

-
- -
- setUrl(e.target.value)} - placeholder="https://www.example.com/" - className="flex-1 px-4 py-3 border border-gray-300 rounded-lg focus:ring-2 focus:ring-purple-500 focus:border-transparent text-sm" - disabled={scanLoading} required /> - -
- - {scanProgress && ( -
- - - - - {scanProgress} -
- )} - - {scanError && ( -
{scanError}
- )} - - {/* Quick Action Buttons — navigate to specialized tabs */} - {scanData && ( -
-

Jetzt pruefen

-
- - - {discoveredDocs.map((doc: any, i: number) => ( - - ))} -
-
- )} - - {/* Full Scan Result */} - {scanData?.services && ( -
- -
- )} - - {/* Scan History */} - {scanHistory.length > 0 && ( -
-

Letzte Scans

-
- {scanHistory.map((h, i) => ( - - ))} -
-
-=======

Analysiere Dokumente und Webseiten auf DSGVO-Konformitaet.

@@ -443,7 +243,6 @@ export default function AgentPage() { {result.follow_up_questions.length > 0 && (
->>>>>>> feat/zeroclaw-compliance-agent )}
)} @@ -452,15 +251,6 @@ export default function AgentPage() { {tab === 'compare' && compareData?.sites &&
} {tab === 'auth' && authData &&
} -<<<<<<< HEAD - {/* Specialized Tabs */} - {tab === 'doc-check' && } - {tab === 'banner-check' && } - {tab === 'impressum-check' && } - - {/* FAQ */} - -======= {/* History */} {tab === 'quick' && { setUrl(r.url); analyze(r.url, mode) }} />} {tab === 'scan' && scanHistory.length > 0 && ( @@ -480,7 +270,6 @@ export default function AgentPage() { )} ->>>>>>> feat/zeroclaw-compliance-agent ) } diff --git a/admin-compliance/app/sdk/cmp/page.tsx b/admin-compliance/app/sdk/cmp/page.tsx index 65df1f3..0ea9075 100644 --- a/admin-compliance/app/sdk/cmp/page.tsx +++ b/admin-compliance/app/sdk/cmp/page.tsx @@ -54,18 +54,6 @@ export default function CMPDashboardPage() { const [consentStats, setConsentStats] = useState(null) const [dsrStats, setDSRStats] = useState(null) const [sites, setSites] = useState([]) -<<<<<<< HEAD - const [selectedSite, setSelectedSite] = useState('') - const [loading, setLoading] = useState(true) - - const fb = (path: string) => fetch(`${BANNER_API}/${path}`, { headers: HEADERS }).then(r => r.ok ? r.json() : null).catch(() => null) - - // Load sites + consent/dsr stats on mount - useEffect(() => { - async function load() { - const fa = (path: string) => fetch(`/api/sdk/v1/compliance/${path}`, { headers: HEADERS }).then(r => r.ok ? r.json() : null).catch(() => null) - const [consent, dsr, siteList] = await Promise.all([ -======= const [loading, setLoading] = useState(true) useEffect(() => { @@ -74,34 +62,10 @@ export default function CMPDashboardPage() { const fa = (path: string) => fetch(`/api/sdk/v1/compliance/${path}`, { headers: HEADERS }).then(r => r.ok ? r.json() : null).catch(() => null) const [banner, consent, dsr, siteList] = await Promise.all([ fb('admin/stats/preview-test-site'), ->>>>>>> feat/zeroclaw-compliance-agent fa('einwilligungen/consents/stats'), fa('dsr/stats'), fb('admin/sites'), ]) -<<<<<<< HEAD - setConsentStats(consent) - setDSRStats(dsr) - const loadedSites = Array.isArray(siteList) ? siteList : [] - setSites(loadedSites) - // Auto-select first site - if (loadedSites.length > 0) { - setSelectedSite(loadedSites[0].site_id || loadedSites[0].siteId || '') - } - setLoading(false) - } - load() - // eslint-disable-next-line react-hooks/exhaustive-deps - }, []) - - // Load banner stats when selected site changes - useEffect(() => { - if (!selectedSite) return - fb(`admin/stats/${selectedSite}`).then(setBannerStats) - // eslint-disable-next-line react-hooks/exhaustive-deps - }, [selectedSite]) - -======= setBannerStats(banner) setConsentStats(consent) setDSRStats(dsr) @@ -111,7 +75,6 @@ export default function CMPDashboardPage() { load() }, []) ->>>>>>> feat/zeroclaw-compliance-agent const totalConsents = (bannerStats?.total_consents || 0) + (consentStats?.total_consents || 0) const dsrOpen = dsrStats ? (dsrStats.by_status?.intake || 0) + (dsrStats.by_status?.processing || 0) + (dsrStats.by_status?.identity_verification || 0) : 0 const dsrOverdue = dsrStats?.overdue || 0 @@ -123,36 +86,12 @@ export default function CMPDashboardPage() {

Consent Management Platform

-<<<<<<< HEAD -

Überblick über Einwilligungen, Betroffenenrechte und Vendor-Compliance

-
-
- {sites.length > 0 && ( - - )} - - Banner testen - -
-=======

Ueberblick ueber Einwilligungen, Betroffenenrechte und Vendor-Compliance

Banner testen ->>>>>>> feat/zeroclaw-compliance-agent {/* KPI Cards */} @@ -235,8 +174,6 @@ export default function CMPDashboardPage() { -<<<<<<< HEAD -======= {/* Banner-Bedarf Hinweis (TTDSG § 25) */} {bannerStats && Object.keys(bannerStats.category_acceptance).length === 0 && sites.length === 0 && (
@@ -275,7 +212,6 @@ export default function CMPDashboardPage() {
)} ->>>>>>> feat/zeroclaw-compliance-agent {/* Compliance Status */}

Compliance-Status

diff --git a/admin-compliance/app/sdk/document-generator/_constants.ts b/admin-compliance/app/sdk/document-generator/_constants.ts index 05009e3..41296e3 100644 --- a/admin-compliance/app/sdk/document-generator/_constants.ts +++ b/admin-compliance/app/sdk/document-generator/_constants.ts @@ -6,24 +6,6 @@ import { TemplateContext } from './contextBridge' export const CATEGORIES: { key: string; label: string; types: string[] | null }[] = [ { key: 'all', label: 'Alle', types: null }, -<<<<<<< HEAD - { key: 'privacy_policy', label: 'Datenschutz', types: ['privacy_policy'] }, - { key: 'terms', label: 'AGB', types: ['terms_of_service', 'agb', 'clause'] }, - { key: 'impressum', label: 'Impressum', types: ['impressum'] }, - { key: 'dpa', label: 'AVV/DPA', types: ['dpa'] }, - { key: 'nda', label: 'NDA', types: ['nda'] }, - { key: 'sla', label: 'SLA', types: ['sla'] }, - { key: 'acceptable_use', label: 'AUP', types: ['acceptable_use'] }, - { key: 'widerruf', label: 'Widerruf', types: ['widerruf'] }, - { key: 'cookie', label: 'Cookie', types: ['cookie_policy', 'cookie_banner'] }, - { key: 'cloud', label: 'Cloud', types: ['cloud_service_agreement'] }, - { key: 'misc', label: 'Weitere', types: ['community_guidelines', 'copyright_policy', 'data_usage_clause'] }, - { key: 'dsfa', label: 'DSFA', types: ['dsfa'] }, - { key: 'dsr', label: 'DSR-Prozesse', types: [ - 'dsr_process_art15', 'dsr_process_art16', 'dsr_process_art17', - 'dsr_process_art18', 'dsr_process_art19', 'dsr_process_art20', 'dsr_process_art21', - ]}, -======= // ── Nach Nutzungskontext sortiert ────────────────────────────────────── @@ -82,7 +64,6 @@ export const CATEGORIES: { key: string; label: string; types: string[] | null }[ { key: 'vendor', label: 'Lieferanten / Vendor', types: ['vendor_risk_management_policy', 'third_party_security_policy', 'supplier_security_policy', 'dpa'] }, { key: 'bcm', label: 'BCM / Notfall', types: ['business_continuity_policy', 'disaster_recovery_policy', 'crisis_management_policy', 'incident_response_plan'] }, ->>>>>>> feat/zeroclaw-compliance-agent ] // ============================================================================= diff --git a/admin-compliance/app/sdk/document-generator/page.tsx b/admin-compliance/app/sdk/document-generator/page.tsx index f2c4440..986fbb7 100644 --- a/admin-compliance/app/sdk/document-generator/page.tsx +++ b/admin-compliance/app/sdk/document-generator/page.tsx @@ -88,79 +88,6 @@ function DocumentGeneratorPageInner() { } }, [state?.companyProfile]) -<<<<<<< HEAD - // ── MODULE WIRING: CookieBanner → CONSENT + FEATURES ───────────────────── - useEffect(() => { - const banner = state?.cookieBanner - if (!banner) return - const cats = banner.categories || [] - const analyticsTools = cats - .filter((c) => c.id === 'analytics' || c.id === 'statistics') - .flatMap((c) => c.cookies?.map((ck) => ck.name) ?? []) - const marketingTools = cats - .filter((c) => c.id === 'marketing') - .flatMap((c) => c.cookies?.map((ck) => ck.name) ?? []) - const hasFunctional = cats.some((c) => c.id === 'functional') - - setContext((prev) => ({ - ...prev, - CONSENT: { - ...prev.CONSENT, - ANALYTICS_TOOLS: analyticsTools.length > 0 ? analyticsTools.join(', ') : prev.CONSENT.ANALYTICS_TOOLS, - MARKETING_PARTNERS: marketingTools.length > 0 ? marketingTools.join(', ') : prev.CONSENT.MARKETING_PARTNERS, - }, - FEATURES: { - ...prev.FEATURES, - CMP_NAME: 'BreakPilot CMP', - CMP_LOGS_CONSENTS: true, - HAS_FUNCTIONAL_COOKIES: hasFunctional || prev.FEATURES.HAS_FUNCTIONAL_COOKIES, - CONSENT_WITHDRAWAL_PATH: 'Footer-Link "Cookie-Einstellungen"', - }, - })) - }, [state?.cookieBanner]) - - // ── MODULE WIRING: Loeschfristen → PRIVACY retention ────────────────────── - useEffect(() => { - const policies = state?.retentionPolicies - if (!policies || policies.length === 0) return - const maxMonths = policies.reduce((max, p) => { - const match = p.retentionPeriod?.match(/(\d+)\s*(Monat|Jahr|Tag)/i) - if (!match) return max - const val = parseInt(match[1], 10) - const unit = match[2].toLowerCase() - const months = unit.startsWith('jahr') ? val * 12 : unit.startsWith('tag') ? Math.ceil(val / 30) : val - return Math.max(max, months) - }, 0) - if (maxMonths > 0) { - setContext((prev) => ({ - ...prev, - PRIVACY: { ...prev.PRIVACY, ANALYTICS_RETENTION_MONTHS: maxMonths }, - })) - } - }, [state?.retentionPolicies]) - - // ── MODULE WIRING: UseCases → FEATURES flags ───────────────────────────── - useEffect(() => { - const useCases = state?.useCases - if (!useCases || useCases.length === 0) return - const allText = useCases.map((uc) => `${uc.name} ${uc.description}`).join(' ').toLowerCase() - const hasAccount = allText.includes('account') || allText.includes('konto') || allText.includes('registrier') - const hasPayments = allText.includes('zahlung') || allText.includes('payment') || allText.includes('stripe') || allText.includes('paypal') - const hasNewsletter = allText.includes('newsletter') || allText.includes('mailchimp') || allText.includes('e-mail-marketing') - const hasSocial = allText.includes('social') || allText.includes('linkedin') || allText.includes('facebook') || allText.includes('instagram') - - setContext((prev) => ({ - ...prev, - FEATURES: { - ...prev.FEATURES, - HAS_ACCOUNT: hasAccount || prev.FEATURES.HAS_ACCOUNT, - HAS_PAYMENTS: hasPayments || prev.FEATURES.HAS_PAYMENTS, - HAS_NEWSLETTER: hasNewsletter || prev.FEATURES.HAS_NEWSLETTER, - HAS_SOCIAL_MEDIA: hasSocial || prev.FEATURES.HAS_SOCIAL_MEDIA, - }, - })) - }, [state?.useCases]) -======= // Pre-fill TOM/DPA context from Compliance Scope Engine useEffect(() => { const scopeLevel = state?.complianceScope?.determinedLevel @@ -173,7 +100,6 @@ function DocumentGeneratorPageInner() { })) } }, [state?.complianceScope?.determinedLevel, state?.companyProfile]) ->>>>>>> feat/zeroclaw-compliance-agent // Pre-fill extra placeholders from Einwilligungen data points useEffect(() => { diff --git a/admin-compliance/app/sdk/layout.tsx b/admin-compliance/app/sdk/layout.tsx index c19e3c4..24dfaee 100644 --- a/admin-compliance/app/sdk/layout.tsx +++ b/admin-compliance/app/sdk/layout.tsx @@ -208,12 +208,8 @@ function SDKInnerLayout({ children }: { children: React.ReactNode }) { {/* Command Bar Modal */} {isCommandBarOpen && setCommandBarOpen(false)} />} -<<<<<<< HEAD - {/* Module-specific FAB navigators are rendered by each module's layout */} -======= {/* Pipeline Sidebar (FAB on mobile/tablet, fixed on desktop xl+) */} ->>>>>>> feat/zeroclaw-compliance-agent {/* Compliance Advisor Widget — immer sichtbar, auch ohne Projekt */} diff --git a/admin-compliance/components/sdk/CookieBannerOverlay.tsx b/admin-compliance/components/sdk/CookieBannerOverlay.tsx index 11aaa29..bad67b6 100644 --- a/admin-compliance/components/sdk/CookieBannerOverlay.tsx +++ b/admin-compliance/components/sdk/CookieBannerOverlay.tsx @@ -93,17 +93,11 @@ export function CookieBannerOverlay() { return ( <> -<<<<<<< HEAD - {/* Non-blocking banner — no overlay, no pointer-events blocking */} -
-
-======= {/* Overlay — leaves sidebar (left 64px/16px) accessible */}
setIsOpen(false)} />
->>>>>>> feat/zeroclaw-compliance-agent {/* Header with EWR toggle + close button */}
diff --git a/admin-compliance/e2e/playwright-live.config.ts b/admin-compliance/e2e/playwright-live.config.ts index 0e8c665..795f9ca 100644 --- a/admin-compliance/e2e/playwright-live.config.ts +++ b/admin-compliance/e2e/playwright-live.config.ts @@ -1,5 +1,3 @@ -<<<<<<< HEAD -======= /** * Playwright config for testing against live Mac Mini instance. * No webServer — assumes https://macmini:3007 is already running. @@ -7,17 +5,10 @@ * Usage: npx playwright test --config=e2e/playwright-live.config.ts */ ->>>>>>> feat/zeroclaw-compliance-agent import { defineConfig, devices } from '@playwright/test' export default defineConfig({ testDir: './specs', -<<<<<<< HEAD - timeout: 30000, - use: { - baseURL: 'https://macmini:3007', - ignoreHTTPSErrors: true, -======= fullyParallel: true, retries: 0, workers: 3, @@ -30,16 +21,12 @@ export default defineConfig({ ignoreHTTPSErrors: true, screenshot: 'on', trace: 'on-first-retry', ->>>>>>> feat/zeroclaw-compliance-agent }, projects: [ { name: 'chromium', use: { ...devices['Desktop Chrome'] } }, ], -<<<<<<< HEAD -======= outputDir: 'e2e/test-results', timeout: 20000, expect: { timeout: 5000 }, // No webServer — we test against the live instance ->>>>>>> feat/zeroclaw-compliance-agent }) diff --git a/backend-compliance/compliance/api/agent_scan_helpers.py b/backend-compliance/compliance/api/agent_scan_helpers.py index 209bf76..5adcb43 100644 --- a/backend-compliance/compliance/api/agent_scan_helpers.py +++ b/backend-compliance/compliance/api/agent_scan_helpers.py @@ -73,43 +73,6 @@ def build_scan_summary( f"Findings: {n_findings} ({high} mit hoher Prioritaet)", ]) -<<<<<<< HEAD - # DSI Documents section — grouped with their findings - if discovered_docs: - parts.extend(["", f"Rechtliche Dokumente ({len(discovered_docs)})"]) - - # Group findings by doc_title - doc_findings_map: dict[str, list] = {} - general_findings: list = [] - for f in findings: - dt = f.doc_title if hasattr(f, 'doc_title') else "" - if dt: - doc_findings_map.setdefault(dt, []).append(f) - else: - general_findings.append(f) - - for doc in discovered_docs: - title = doc.title if hasattr(doc, 'title') else "?" - pct = doc.completeness_pct if hasattr(doc, 'completeness_pct') else 0 - wc = doc.word_count if hasattr(doc, 'word_count') else 0 - status = "OK" if pct >= 80 else "LUECKENHAFT" if pct >= 50 else "MANGELHAFT" - parts.append(f" [{status}] {title} ({pct}%, {wc} Woerter)") - for f in doc_findings_map.get(title, []): - sev = f.severity if hasattr(f, 'severity') else "?" - txt = f.text if hasattr(f, 'text') else str(f) - marker = "!!" if sev == "HIGH" else "!" if sev == "MEDIUM" else "i" - parts.append(f" {marker} {txt}") - - # General findings (no doc association) - if general_findings: - parts.extend(["", "Allgemeine Findings"]) - for f in general_findings[:20]: - sev = f.severity if hasattr(f, 'severity') else "?" - txt = f.text if hasattr(f, 'text') else str(f) - marker = "!!" if sev == "HIGH" else "!" if sev == "MEDIUM" else "i" - parts.append(f" [{marker}] {txt}") - elif findings: -======= # DSI Documents section if discovered_docs: parts.extend([ @@ -129,7 +92,6 @@ def build_scan_summary( ) if findings: ->>>>>>> feat/zeroclaw-compliance-agent parts.append("") for f in findings[:20]: sev = f.severity if hasattr(f, 'severity') else "?" @@ -145,44 +107,3 @@ def build_scan_summary( ]) return "\n".join(parts) -<<<<<<< HEAD - - -async def fetch_dse_text(url: str, scanned_pages: list[str]) -> str: - """Find and fetch the privacy policy page text.""" - dse_url = None - for page in scanned_pages: - if re.search(r"datenschutz|privacy|dsgvo", page, re.IGNORECASE): - dse_url = page - break - if not dse_url: - dse_url = url - try: - async with httpx.AsyncClient(timeout=15.0, follow_redirects=True) as client: - resp = await client.get(dse_url, headers={"User-Agent": "BreakPilot-Compliance-Agent/1.0"}) - html = resp.text - clean = re.sub(r"<(script|style)[^>]*>.*?", "", html, flags=re.DOTALL | re.IGNORECASE) - clean = re.sub(r"<[^>]+>", " ", clean) - clean = re.sub(r"\s+", " ", clean).strip() - return clean[:8000] - except Exception: - return "" - - -async def fetch_dse_html(url: str, scanned_pages: list[str]) -> str: - """Fetch the raw HTML of the privacy policy page.""" - dse_url = None - for page in scanned_pages: - if re.search(r"datenschutz|privacy|dsgvo", page, re.IGNORECASE): - dse_url = page - break - if not dse_url: - dse_url = url - try: - async with httpx.AsyncClient(timeout=15.0, follow_redirects=True) as client: - resp = await client.get(dse_url, headers={"User-Agent": "BreakPilot-Compliance-Agent/1.0"}) - return resp.text - except Exception: - return "" -======= ->>>>>>> feat/zeroclaw-compliance-agent diff --git a/backend-compliance/compliance/api/agent_scan_models.py b/backend-compliance/compliance/api/agent_scan_models.py new file mode 100644 index 0000000..2128158 --- /dev/null +++ b/backend-compliance/compliance/api/agent_scan_models.py @@ -0,0 +1,84 @@ +"""Pydantic models for the Agent Website Scan API.""" + +from pydantic import BaseModel + + +class ScanRequest(BaseModel): + url: str + mode: str = "post_launch" + recipient: str = "dsb@breakpilot.local" + + +class ServiceInfo(BaseModel): + name: str + category: str + provider: str + country: str + eu_adequate: bool + requires_consent: bool + legal_ref: str + in_dse: bool + status: str # "ok", "undocumented", "outdated" + + +class TextReferenceModel(BaseModel): + found: bool = False + source_url: str = "" + document_type: str = "Datenschutzerklaerung" + section_heading: str = "" + section_number: str = "" + parent_section: str = "" + paragraph_index: int = 0 + original_text: str = "" + issue: str = "" + correction_type: str = "" + correction_text: str = "" + insert_after: str = "" + + +class ScanFinding(BaseModel): + code: str + severity: str + text: str + correction: str = "" + text_reference: TextReferenceModel | None = None + + +class DiscoveredDocument(BaseModel): + title: str + url: str + doc_type: str + language: str = "" + word_count: int = 0 + completeness_pct: int = 0 + findings_count: int = 0 + + +class ScanResponse(BaseModel): + url: str + pages_scanned: int + pages_list: list[str] = [] + services: list[ServiceInfo] + findings: list[ScanFinding] + discovered_documents: list[DiscoveredDocument] = [] + ai_detected: bool + chatbot_detected: bool + chatbot_provider: str + missing_pages: dict + summary: str + email_status: str + scanned_at: str + + +class ScanStartResponse(BaseModel): + scan_id: str + status: str = "running" + message: str = "" + + +class ScanStatusResponse(BaseModel): + scan_id: str + status: str # "running", "completed", "failed" + progress: str = "" + result: ScanResponse | None = None + error: str = "" diff --git a/backend-compliance/compliance/api/agent_scan_routes.py b/backend-compliance/compliance/api/agent_scan_routes.py index ec32dc5..94d3a02 100644 --- a/backend-compliance/compliance/api/agent_scan_routes.py +++ b/backend-compliance/compliance/api/agent_scan_routes.py @@ -12,7 +12,6 @@ from datetime import datetime, timezone import httpx from fastapi import APIRouter -from pydantic import BaseModel from compliance.services.website_scanner import scan_website, DetectedService from compliance.services.dse_service_extractor import extract_dse_services, compare_services @@ -23,13 +22,11 @@ from compliance.services.mandatory_content_checker import ( check_mandatory_documents, check_dse_mandatory_content, MandatoryFinding, ) from compliance.services.legal_basis_validator import validate_legal_bases -<<<<<<< HEAD -from compliance.api.agent_scan_helpers import ( - add_corrections, build_scan_summary, fetch_dse_text, fetch_dse_html, -) -======= from compliance.api.agent_scan_helpers import add_corrections, build_scan_summary ->>>>>>> feat/zeroclaw-compliance-agent +from compliance.api.agent_scan_models import ( + ScanRequest, ServiceInfo, TextReferenceModel, ScanFinding, + DiscoveredDocument, ScanResponse, ScanStartResponse, ScanStatusResponse, +) logger = logging.getLogger(__name__) @@ -45,77 +42,6 @@ SDK_HEADERS = { } -class ScanRequest(BaseModel): - url: str - mode: str = "post_launch" - recipient: str = "dsb@breakpilot.local" - - -class ServiceInfo(BaseModel): - name: str - category: str - provider: str - country: str - eu_adequate: bool - requires_consent: bool - legal_ref: str - in_dse: bool - status: str # "ok", "undocumented", "outdated" - - -class TextReferenceModel(BaseModel): - found: bool = False - source_url: str = "" - document_type: str = "Datenschutzerklaerung" - section_heading: str = "" - section_number: str = "" - parent_section: str = "" - paragraph_index: int = 0 - original_text: str = "" - issue: str = "" - correction_type: str = "" - correction_text: str = "" - insert_after: str = "" - - -class ScanFinding(BaseModel): - code: str - severity: str - text: str - correction: str = "" -<<<<<<< HEAD - doc_title: str = "" -======= ->>>>>>> feat/zeroclaw-compliance-agent - text_reference: TextReferenceModel | None = None - - -class DiscoveredDocument(BaseModel): - title: str - url: str - doc_type: str - language: str = "" - word_count: int = 0 - completeness_pct: int = 0 - findings_count: int = 0 - - -class ScanResponse(BaseModel): - url: str - pages_scanned: int - pages_list: list[str] = [] - services: list[ServiceInfo] - findings: list[ScanFinding] - discovered_documents: list[DiscoveredDocument] = [] - ai_detected: bool - chatbot_detected: bool - chatbot_provider: str - missing_pages: dict - summary: str - email_status: str - scanned_at: str - - import asyncio import uuid as _uuid @@ -123,20 +49,6 @@ import uuid as _uuid _scan_jobs: dict[str, dict] = {} -class ScanStartResponse(BaseModel): - scan_id: str - status: str = "running" - message: str = "" - - -class ScanStatusResponse(BaseModel): - scan_id: str - status: str # "running", "completed", "failed" - progress: str = "" - result: ScanResponse | None = None - error: str = "" - - @router.post("/scan") async def scan_website_endpoint(req: ScanRequest): """Start async website scan. Returns scan_id immediately. @@ -204,60 +116,12 @@ async def _execute_scan(req: ScanRequest, scan_id: str = "") -> ScanResponse: # Use Playwright results if available, otherwise fall back to httpx scanner if playwright_htmls: - # Build ScanResult from Playwright data - from compliance.services.website_scanner import ScanResult, DetectedService, _detect_services, _detect_ai_mentions - from compliance.services.service_registry import SERVICE_REGISTRY + from compliance.services.website_scanner import ScanResult, _detect_services, _detect_ai_mentions scan = ScanResult() scan.pages_scanned = list(playwright_htmls.keys()) for page_url, html in playwright_htmls.items(): _detect_services(html, page_url, scan) _detect_ai_mentions(html, page_url, scan) - # Deduplicate - seen = set() - unique = [] - for svc in scan.detected_services: - if svc.id not in seen: - seen.add(svc.id) - unique.append(svc) - scan.detected_services = unique - scan.chatbot_detected = any(s.category == "chatbot" for s in scan.detected_services) - if scan.chatbot_detected: - scan.chatbot_provider = next(s.name for s in scan.detected_services if s.category == "chatbot") - else: - scan = await scan_website(req.url) - -<<<<<<< HEAD - logger.info("Scanned %d pages, found %d services", len(scan.pages_scanned), len(scan.detected_services)) - - _progress(f"Schritt 2/7: Rechtliche Dokumente suchen... ({len(scan.pages_scanned)} Seiten gescannt)") -======= - # Step 1: Scan website — try Playwright first (JS-rendered), fallback to httpx - playwright_htmls: dict[str, str] = {} - try: - async with httpx.AsyncClient(timeout=120.0) as pw_client: - pw_resp = await pw_client.post( - "http://bp-compliance-consent-tester:8094/website-scan", - json={"url": req.url, "max_pages": 15, "click_nav": True}, - ) - if pw_resp.status_code == 200: - pw_data = pw_resp.json() - playwright_htmls = pw_data.get("page_htmls", {}) - logger.info("Playwright scan: %d pages, %d scripts", - pw_data.get("pages_count", 0), len(pw_data.get("external_scripts", []))) - except Exception as e: - logger.warning("Playwright scanner unavailable, falling back to httpx: %s", e) - - # Use Playwright results if available, otherwise fall back to httpx scanner - if playwright_htmls: - # Build ScanResult from Playwright data - from compliance.services.website_scanner import ScanResult, DetectedService, _detect_services, _detect_ai_mentions - from compliance.services.service_registry import SERVICE_REGISTRY - scan = ScanResult() - scan.pages_scanned = list(playwright_htmls.keys()) - for page_url, html in playwright_htmls.items(): - _detect_services(html, page_url, scan) - _detect_ai_mentions(html, page_url, scan) - # Deduplicate seen = set() unique = [] for svc in scan.detected_services: @@ -273,54 +137,30 @@ async def _execute_scan(req: ScanRequest, scan_id: str = "") -> ScanResponse: logger.info("Scanned %d pages, found %d services", len(scan.pages_scanned), len(scan.detected_services)) ->>>>>>> feat/zeroclaw-compliance-agent # Step 1b: DSI Discovery — find all legal documents on the website discovered_docs: list[DiscoveredDocument] = [] dsi_findings: list[ScanFinding] = [] try: -<<<<<<< HEAD - async with httpx.AsyncClient(timeout=300.0) as dsi_client: - dsi_resp = await dsi_client.post( - "http://bp-compliance-consent-tester:8094/dsi-discovery", - json={"url": req.url, "max_documents": 30}, -======= async with httpx.AsyncClient(timeout=180.0) as dsi_client: dsi_resp = await dsi_client.post( "http://bp-compliance-consent-tester:8094/dsi-discovery", json={"url": req.url, "max_documents": 20}, ->>>>>>> feat/zeroclaw-compliance-agent ) if dsi_resp.status_code == 200: dsi_data = dsi_resp.json() logger.info("DSI discovery: %d documents found", dsi_data.get("total_found", 0)) - - # Check each document against its legal requirements - from compliance.services.dsi_document_checker import ( - check_document_completeness, classify_document_type, - ) + from compliance.services.dsi_document_checker import check_document_completeness, classify_document_type for doc in dsi_data.get("documents", []): doc_type = classify_document_type(doc["title"], doc["url"]) -<<<<<<< HEAD - doc_text = doc.get("full_text", "") or doc.get("text_preview", "") - logger.info("DSI check: '%s' type=%s text_len=%d full_text_len=%d preview_len=%d", - doc["title"][:50], doc_type, len(doc_text), - len(doc.get("full_text", "")), len(doc.get("text_preview", ""))) - doc_findings = check_document_completeness( - doc_text, doc_type, doc["title"], doc["url"], -======= doc_findings = check_document_completeness( doc.get("text_preview", ""), doc_type, doc["title"], doc["url"], ->>>>>>> feat/zeroclaw-compliance-agent ) - # Count completeness score_finding = next((f for f in doc_findings if "SCORE" in f.get("code", "")), None) completeness = 0 if score_finding: - import re as _re2 - pct_match = _re2.search(r"(\d+)%", score_finding.get("text", "")) + pct_match = re.search(r"(\d+)%", score_finding.get("text", "")) if pct_match: completeness = int(pct_match.group(1)) - discovered_docs.append(DiscoveredDocument( title=doc["title"], url=doc["url"], doc_type=doc_type, language=doc.get("language", ""), @@ -330,39 +170,7 @@ async def _execute_scan(req: ScanRequest, scan_id: str = "") -> ScanResponse: )) for df in doc_findings: if "SCORE" not in df.get("code", ""): - dsi_findings.append(ScanFinding( - code=df["code"], severity=df["severity"], text=df["text"], -<<<<<<< HEAD - doc_title=doc["title"], - )) - except Exception as e: - logger.warning("DSI discovery failed: %s %s", type(e).__name__, e) - - _progress(f"Schritt 3/7: Datenschutzerklaerung analysieren... ({len(discovered_docs)} Dokumente gefunden)") - # Step 2: Fetch privacy policy text — combine all DSI texts for best coverage - dse_text = "" - # Start with Playwright HTML if available - for page_url, html in playwright_htmls.items(): - if re.search(r"datenschutz|privacy|dsgvo", page_url, re.IGNORECASE): - clean = re.sub(r"<(script|style)[^>]*>.*?", "", html, flags=re.DOTALL | re.IGNORECASE) - clean = re.sub(r"<[^>]+>", " ", clean) - clean = re.sub(r"\s+", " ", clean).strip() - dse_text = clean[:30000] - break - # Enrich: append DSI discovery texts (they contain the actual document content) - try: - if 'dsi_resp' in dir() or 'dsi_data' in dir(): - dsi_data_for_text = dsi_data if 'dsi_data' in dir() else {} - for doc in dsi_data_for_text.get("documents", []): - ft = doc.get("full_text", "") - if ft and len(ft) > 500: - dse_text = (dse_text + " " + ft)[:50000] - except Exception: - pass - if not dse_text: - dse_text = await fetch_dse_text(req.url, scan.pages_scanned) -======= - )) + dsi_findings.append(ScanFinding(code=df["code"], severity=df["severity"], text=df["text"])) except Exception as e: logger.warning("DSI discovery failed: %s", e) @@ -370,62 +178,42 @@ async def _execute_scan(req: ScanRequest, scan_id: str = "") -> ScanResponse: dse_text = "" for page_url, html in playwright_htmls.items(): if re.search(r"datenschutz|privacy|dsgvo", page_url, re.IGNORECASE): - import re as _re - clean = _re.sub(r"<(script|style)[^>]*>.*?", "", html, flags=_re.DOTALL | _re.IGNORECASE) - clean = _re.sub(r"<[^>]+>", " ", clean) - clean = _re.sub(r"\s+", " ", clean).strip() + clean = re.sub(r"<(script|style)[^>]*>.*?", "", html, flags=re.DOTALL | re.IGNORECASE) + clean = re.sub(r"<[^>]+>", " ", clean) + clean = re.sub(r"\s+", " ", clean).strip() dse_text = clean[:4000] break if not dse_text: dse_text = await _fetch_dse_text(req.url, scan.pages_scanned) ->>>>>>> feat/zeroclaw-compliance-agent # Step 3: Extract services mentioned in DSE via LLM + text fallback dse_services = await extract_dse_services(dse_text) if dse_text else [] logger.info("DSE mentions %d services (LLM)", len(dse_services)) - - # Fallback: if LLM extraction failed, search DSE text directly for service names if not dse_services and dse_text: dse_lower = dse_text.lower() detected_dicts_for_check = [_service_to_dict(s) for s in scan.detected_services] for svc in detected_dicts_for_check: name = svc.get("name", "").lower() - # Check if service name appears in DSE text if name and len(name) > 3 and name in dse_lower: dse_services.append({"name": svc["name"], "purpose": "", "country": svc.get("country", ""), "legal_basis": ""}) if dse_services: logger.info("DSE text fallback found %d services", len(dse_services)) - # Step 4: Parse DSE into structured sections (prefer Playwright HTML) + # Step 4: Parse DSE into structured sections dse_html = "" for page_url, html in playwright_htmls.items(): if re.search(r"datenschutz|privacy|dsgvo", page_url, re.IGNORECASE): dse_html = html break if not dse_html: -<<<<<<< HEAD - dse_html = await fetch_dse_html(req.url, scan.pages_scanned) - dse_sections = parse_dse(dse_html, req.url) if dse_html else [] - logger.info("Parsed %d DSE sections", len(dse_sections)) - - _progress("Schritt 4/7: SOLL/IST Vergleich...") -======= dse_html = await _fetch_dse_html(req.url, scan.pages_scanned) dse_sections = parse_dse(dse_html, req.url) if dse_html else [] - logger.info("Parsed %d DSE sections", len(dse_sections)) ->>>>>>> feat/zeroclaw-compliance-agent - # Step 5: SOLL/IST comparison + # Step 5-8: Comparison, findings, mandatory checks, legal basis validation detected_dicts = [_service_to_dict(s) for s in scan.detected_services] comparison = compare_services(detected_dicts, dse_services) - - # Step 6: Build TextReferences for each detected service text_refs = build_text_references(detected_dicts, dse_services, dse_sections, req.url) - - # Step 7: Generate findings with text references services_info, findings = _build_findings(comparison, scan, is_live, text_refs) - - # Step 8: Check mandatory content (documents + DSE sections) mandatory_findings = check_mandatory_documents(scan.pages_scanned, scan.missing_pages) mandatory_findings += check_dse_mandatory_content(dse_sections, dse_text) for mf in mandatory_findings: @@ -433,40 +221,24 @@ async def _execute_scan(req: ScanRequest, scan_id: str = "") -> ScanResponse: code=mf.code, severity=mf.severity, text=f"{mf.text}" + (f" — {mf.suggestion}" if mf.suggestion else ""), )) - - # Step 8b: Validate legal bases (lit. a-f) in DSE if dse_text: - lit_findings = validate_legal_bases(dse_text) - for lf in lit_findings: + for lf in validate_legal_bases(dse_text): findings.append(ScanFinding( - code=f"LIT-{lf.purpose.upper()}", - severity=lf.severity, - text=lf.text, + code=f"LIT-{lf.purpose.upper()}", severity=lf.severity, text=lf.text, text_reference=TextReferenceModel( - found=True, source_url=req.url, - original_text=lf.original_text, + found=True, source_url=req.url, original_text=lf.original_text, issue="incorrect", correction_type="replace", correction_text=f"Korrekte Rechtsgrundlage: {lf.correct_basis} ({lf.legal_ref})", ) if lf.original_text else None, )) - - # Step 8c: Add DSI document findings findings.extend(dsi_findings) - -<<<<<<< HEAD - _progress(f"Schritt 5/7: Korrekturen generieren... ({len(findings)} Findings)") -======= ->>>>>>> feat/zeroclaw-compliance-agent - # Step 9: Generate corrections for pre-launch mode if not is_live and findings: await add_corrections(findings, dse_text) _progress("Schritt 6/7: Report erstellen...") - # Step 7: Build summary summary = build_scan_summary(req.url, scan, comparison, findings, is_live, discovered_docs) _progress("Schritt 7/7: E-Mail senden...") - # Step 8: Send notification mode_label = "INTERNE PRUEFUNG" if not is_live else "LIVE-WEBSITE" email_result = send_email( recipient=req.recipient, @@ -475,34 +247,30 @@ async def _execute_scan(req: ScanRequest, scan_id: str = "") -> ScanResponse: ) return ScanResponse( - url=req.url, - pages_scanned=len(scan.pages_scanned), - pages_list=scan.pages_scanned, - services=services_info, - findings=findings, - discovered_documents=discovered_docs, - ai_detected=len(scan.ai_mentions) > 0, - chatbot_detected=scan.chatbot_detected, - chatbot_provider=scan.chatbot_provider, - missing_pages=scan.missing_pages, - summary=summary, - email_status=email_result.get("status", "failed"), + url=req.url, pages_scanned=len(scan.pages_scanned), pages_list=scan.pages_scanned, + services=services_info, findings=findings, discovered_documents=discovered_docs, + ai_detected=len(scan.ai_mentions) > 0, chatbot_detected=scan.chatbot_detected, + chatbot_provider=scan.chatbot_provider, missing_pages=scan.missing_pages, + summary=summary, email_status=email_result.get("status", "failed"), scanned_at=datetime.now(timezone.utc).isoformat(), ) +async def _fetch_dse_text(url: str, scanned_pages: list[str]) -> str: + """Fetch DSE text from the privacy policy page.""" + dse_url = next((p for p in scanned_pages if re.search(r"datenschutz|privacy|dsgvo", p, re.IGNORECASE)), url) + try: + async with httpx.AsyncClient(timeout=15.0, follow_redirects=True) as client: + resp = await client.get(dse_url, headers={"User-Agent": "BreakPilot-Compliance-Agent/1.0"}) + clean = re.sub(r"<[^>]+>", " ", resp.text) + return re.sub(r"\s+", " ", clean).strip()[:4000] + except Exception: + return "" async def _fetch_dse_html(url: str, scanned_pages: list[str]) -> str: - """Fetch the raw HTML of the privacy policy page (for structured parsing).""" - import re - dse_url = None - for page in scanned_pages: - if re.search(r"datenschutz|privacy|dsgvo", page, re.IGNORECASE): - dse_url = page - break - if not dse_url: - dse_url = url + """Fetch the raw HTML of the privacy policy page.""" + dse_url = next((p for p in scanned_pages if re.search(r"datenschutz|privacy|dsgvo", p, re.IGNORECASE)), url) try: async with httpx.AsyncClient(timeout=15.0, follow_redirects=True) as client: resp = await client.get(dse_url, headers={"User-Agent": "BreakPilot-Compliance-Agent/1.0"}) @@ -541,7 +309,6 @@ def _build_findings( correction_text=ref.correction_text, insert_after=ref.insert_after, ) - # Undocumented services (on website, NOT in DSE) for svc in comparison["undocumented"]: services.append(ServiceInfo( name=svc["name"], category=svc.get("category", "other"), @@ -550,18 +317,16 @@ def _build_findings( requires_consent=svc.get("requires_consent", False), legal_ref=svc.get("legal_ref", ""), in_dse=False, status="undocumented", )) - severity = "HIGH" if is_live else "MEDIUM" ref = _get_ref(svc.get("id", "")) findings.append(ScanFinding( code=f"DSE-MISSING-{svc['id'].upper()}", - severity=severity, + severity="HIGH" if is_live else "MEDIUM", text=f"{svc['name']} ({svc.get('provider', '')}, {svc.get('country', '')}) " f"ist auf der Website eingebunden aber NICHT in der Datenschutzerklaerung " f"dokumentiert (Art. 13 DSGVO).", text_reference=ref, )) - # Documented services (OK) for item in comparison["documented"]: svc = item["detected"] services.append(ServiceInfo( @@ -571,16 +336,13 @@ def _build_findings( requires_consent=svc.get("requires_consent", False), legal_ref=svc.get("legal_ref", ""), in_dse=True, status="ok", )) - # Check third-country transfer if not svc.get("eu_adequate", False): findings.append(ScanFinding( - code=f"TRANSFER-{svc['id'].upper()}", - severity="MEDIUM", + code=f"TRANSFER-{svc['id'].upper()}", severity="MEDIUM", text=f"{svc['name']} ({svc.get('country', '')}) — Drittlandtransfer. " f"Pruefen ob SCCs oder Angemessenheitsbeschluss dokumentiert sind.", )) - # Outdated services (in DSE, NOT on website) for svc in comparison["outdated"]: services.append(ServiceInfo( name=svc["name"], category="other", @@ -595,15 +357,11 @@ def _build_findings( f"nicht mehr gefunden. Eintrag bei naechster Aktualisierung entfernen.", )) - # Missing pages (e.g., /impressum returns 404) for page_url, status_code in scan.missing_pages.items(): if "impressum" in page_url.lower(): findings.append(ScanFinding( - code="MISSING-IMPRESSUM", - severity="HIGH", + code="MISSING-IMPRESSUM", severity="HIGH", text=f"Impressum-Seite gibt HTTP {status_code} zurueck (§5 TMG Verstoss).", )) return services, findings - -