Compare commits
15 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| e536247c20 | |||
| 313982c6f1 | |||
| f30a3ce471 | |||
| 479ce2225b | |||
| a1b380e211 | |||
| 077e0f1253 | |||
| 936c354547 | |||
| b87c27d104 | |||
| 78b27d4684 | |||
| a220f0d0a7 | |||
| 28a078ccb4 | |||
| 0d37822b7c | |||
| 575644c9c5 | |||
| 6c223c7c9b | |||
| a616b64273 |
@@ -158,3 +158,27 @@ zeroclaw/docs/ground-truth/06-spiegel-dsi-fulltext.txt
|
|||||||
ai-compliance-sdk/internal/iace/manufacturer_safety_features.go
|
ai-compliance-sdk/internal/iace/manufacturer_safety_features.go
|
||||||
ai-compliance-sdk/internal/api/handlers/iace_handler_clarifications.go
|
ai-compliance-sdk/internal/api/handlers/iace_handler_clarifications.go
|
||||||
ai-compliance-sdk/internal/app/routes.go
|
ai-compliance-sdk/internal/app/routes.go
|
||||||
|
|
||||||
|
# --- 2026-05-19 Coolify-Unblocker: 4 grandfathered files ---
|
||||||
|
# Diese 4 Dateien sind Pre-Existing-Tech-Debt und blockierten den
|
||||||
|
# Coolify-Build. Splits sind als P9.5 Tech-Debt-Sprint geplant, bis
|
||||||
|
# dahin als Exceptions getragen damit Deploy laeuft.
|
||||||
|
#
|
||||||
|
# cra_routes.py (1714): CRA-Phase-5-Router mit Annex-V/VII Generator —
|
||||||
|
# Split nach Endpoint-Gruppen (vuln/post-market/tech-doc/doc) sinnvoll.
|
||||||
|
backend-compliance/compliance/api/cra_routes.py
|
||||||
|
# vendor_redundancy.py (727): Cost-Lookup-Tabellen (DSP/SaaS/Self-Service)
|
||||||
|
# + Multi-Function-Tools + Engine. Tabellen-Splits nach Lookup-Klasse.
|
||||||
|
backend-compliance/compliance/services/vendor_redundancy.py
|
||||||
|
# cookie_knowledge_db.py (608): Basis-KB — Ergaenzung via
|
||||||
|
# cookie_knowledge_extended.py + Facade laeuft bereits (P2). Split der
|
||||||
|
# Base-KB nach Vendor-Familie ist Phase-2-Ziel.
|
||||||
|
backend-compliance/compliance/services/cookie_knowledge_db.py
|
||||||
|
# cookie-banner-embed.ts (558): Banner-Embed-Bundle fuer CDN-Auslieferung
|
||||||
|
# — selbst-kontainierter Code-Generator, Split wuerde Generator-Logik
|
||||||
|
# fragmentieren ohne Nutzen.
|
||||||
|
admin-compliance/lib/sdk/einwilligungen/generator/cookie-banner-embed.ts
|
||||||
|
# ComplianceCheckTab.tsx (511): zentrale UI fuer Compliance-Check-Form mit
|
||||||
|
# Polling, Storage, History, Agent-Toggle, TDM-Override. Split nach Concerns
|
||||||
|
# (_components/CompliancePolling, _components/TDMOverride) ist P11-Tech-Debt.
|
||||||
|
admin-compliance/app/sdk/agent/_components/ComplianceCheckTab.tsx
|
||||||
|
|||||||
@@ -313,10 +313,13 @@ jobs:
|
|||||||
git push --force "$PUSH_URL" "refs/tags/last-build/main"
|
git push --force "$PUSH_URL" "refs/tags/last-build/main"
|
||||||
echo "Tag last-build/main now at ${SHA}"
|
echo "Tag last-build/main now at ${SHA}"
|
||||||
|
|
||||||
# ── orca redeploy — runs only if at least one build succeeded ─────────────
|
# ── orca redeploy — runs if at least one build was triggered AND green ────
|
||||||
# `always()` lets this run when some builds are skipped (unchanged services).
|
# Per-job `result == 'success'` is true only when the job actually ran and
|
||||||
# The contains() checks ensure we only redeploy when something actually built
|
# passed; skipped/failed/cancelled jobs return their own status string and
|
||||||
# and no build failed.
|
# fail the OR. This avoids Gitea's quirky evaluation of `contains(needs.*
|
||||||
|
# .result, 'success')` when most upstreams are skipped (root cause of
|
||||||
|
# trigger-orca being skipped on single-service changes).
|
||||||
|
# `always()` is required so the job is evaluated when upstreams skip.
|
||||||
|
|
||||||
trigger-orca:
|
trigger-orca:
|
||||||
runs-on: docker
|
runs-on: docker
|
||||||
@@ -332,9 +335,16 @@ jobs:
|
|||||||
- build-dsms-node
|
- build-dsms-node
|
||||||
if: |
|
if: |
|
||||||
always() &&
|
always() &&
|
||||||
contains(needs.*.result, 'success') &&
|
(
|
||||||
!contains(needs.*.result, 'failure') &&
|
needs.build-admin-compliance.result == 'success' ||
|
||||||
!contains(needs.*.result, 'cancelled')
|
needs.build-backend-compliance.result == 'success' ||
|
||||||
|
needs.build-ai-sdk.result == 'success' ||
|
||||||
|
needs.build-developer-portal.result == 'success' ||
|
||||||
|
needs.build-tts.result == 'success' ||
|
||||||
|
needs.build-document-crawler.result == 'success' ||
|
||||||
|
needs.build-dsms-gateway.result == 'success' ||
|
||||||
|
needs.build-dsms-node.result == 'success'
|
||||||
|
)
|
||||||
steps:
|
steps:
|
||||||
- name: Checkout (for SHA)
|
- name: Checkout (for SHA)
|
||||||
run: |
|
run: |
|
||||||
|
|||||||
@@ -0,0 +1,28 @@
|
|||||||
|
/**
|
||||||
|
* Proxy: GET /api/sdk/v1/agent/findings/<checkId>
|
||||||
|
* -> backend GET /api/compliance/agent/findings/<checkId>
|
||||||
|
*
|
||||||
|
* Forwards all query params (source, severity, doc_type, status, q, limit).
|
||||||
|
*/
|
||||||
|
import { NextRequest, NextResponse } from 'next/server'
|
||||||
|
|
||||||
|
const BACKEND_URL = process.env.BACKEND_API_URL || 'http://backend-compliance:8002'
|
||||||
|
|
||||||
|
export async function GET(
|
||||||
|
request: NextRequest,
|
||||||
|
{ params }: { params: { checkId: string } },
|
||||||
|
) {
|
||||||
|
const checkId = params.checkId
|
||||||
|
const qs = request.nextUrl.searchParams.toString()
|
||||||
|
const url = `${BACKEND_URL}/api/compliance/agent/findings/${checkId}${qs ? `?${qs}` : ''}`
|
||||||
|
try {
|
||||||
|
const resp = await fetch(url, { signal: AbortSignal.timeout(20000) })
|
||||||
|
const data = await resp.json()
|
||||||
|
return NextResponse.json(data, { status: resp.status })
|
||||||
|
} catch {
|
||||||
|
return NextResponse.json(
|
||||||
|
{ error: 'Findings-Abfrage fehlgeschlagen' },
|
||||||
|
{ status: 503 },
|
||||||
|
)
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -0,0 +1,27 @@
|
|||||||
|
import { NextRequest, NextResponse } from 'next/server'
|
||||||
|
|
||||||
|
const BACKEND_URL = process.env.BACKEND_URL || 'http://backend-compliance:8002'
|
||||||
|
|
||||||
|
function tenantHeader(request: NextRequest): string {
|
||||||
|
return request.headers.get('x-tenant-id') || '00000000-0000-0000-0000-000000000001'
|
||||||
|
}
|
||||||
|
|
||||||
|
export async function GET(
|
||||||
|
request: NextRequest,
|
||||||
|
{ params }: { params: Promise<{ derived_id: string }> }
|
||||||
|
) {
|
||||||
|
const { derived_id } = await params
|
||||||
|
try {
|
||||||
|
const resp = await fetch(
|
||||||
|
`${BACKEND_URL}/api/v1/quaidal/controls/${encodeURIComponent(derived_id)}`,
|
||||||
|
{ headers: { 'X-Tenant-ID': tenantHeader(request) }, cache: 'no-store' }
|
||||||
|
)
|
||||||
|
const body = await resp.text()
|
||||||
|
return new NextResponse(body, {
|
||||||
|
status: resp.status,
|
||||||
|
headers: { 'Content-Type': resp.headers.get('Content-Type') || 'application/json' },
|
||||||
|
})
|
||||||
|
} catch (err) {
|
||||||
|
return NextResponse.json({ error: 'Backend unreachable', details: String(err) }, { status: 502 })
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -0,0 +1,25 @@
|
|||||||
|
import { NextRequest, NextResponse } from 'next/server'
|
||||||
|
|
||||||
|
const BACKEND_URL = process.env.BACKEND_URL || 'http://backend-compliance:8002'
|
||||||
|
|
||||||
|
function tenantHeader(request: NextRequest): string {
|
||||||
|
return request.headers.get('x-tenant-id') || '00000000-0000-0000-0000-000000000001'
|
||||||
|
}
|
||||||
|
|
||||||
|
export async function GET(request: NextRequest) {
|
||||||
|
const { searchParams } = new URL(request.url)
|
||||||
|
const qs = searchParams.toString()
|
||||||
|
try {
|
||||||
|
const resp = await fetch(
|
||||||
|
`${BACKEND_URL}/api/v1/quaidal/controls${qs ? `?${qs}` : ''}`,
|
||||||
|
{ headers: { 'X-Tenant-ID': tenantHeader(request) }, cache: 'no-store' }
|
||||||
|
)
|
||||||
|
const body = await resp.text()
|
||||||
|
return new NextResponse(body, {
|
||||||
|
status: resp.status,
|
||||||
|
headers: { 'Content-Type': resp.headers.get('Content-Type') || 'application/json' },
|
||||||
|
})
|
||||||
|
} catch (err) {
|
||||||
|
return NextResponse.json({ error: 'Backend unreachable', details: String(err) }, { status: 502 })
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -0,0 +1,27 @@
|
|||||||
|
import { NextRequest, NextResponse } from 'next/server'
|
||||||
|
|
||||||
|
const BACKEND_URL = process.env.BACKEND_URL || 'http://backend-compliance:8002'
|
||||||
|
|
||||||
|
function tenantHeader(request: NextRequest): string {
|
||||||
|
return request.headers.get('x-tenant-id') || '00000000-0000-0000-0000-000000000001'
|
||||||
|
}
|
||||||
|
|
||||||
|
export async function GET(
|
||||||
|
request: NextRequest,
|
||||||
|
{ params }: { params: Promise<{ section_id: string }> }
|
||||||
|
) {
|
||||||
|
const { section_id } = await params
|
||||||
|
try {
|
||||||
|
const resp = await fetch(
|
||||||
|
`${BACKEND_URL}/api/v1/quaidal/criteria/${encodeURIComponent(section_id)}`,
|
||||||
|
{ headers: { 'X-Tenant-ID': tenantHeader(request) }, cache: 'no-store' }
|
||||||
|
)
|
||||||
|
const body = await resp.text()
|
||||||
|
return new NextResponse(body, {
|
||||||
|
status: resp.status,
|
||||||
|
headers: { 'Content-Type': resp.headers.get('Content-Type') || 'application/json' },
|
||||||
|
})
|
||||||
|
} catch (err) {
|
||||||
|
return NextResponse.json({ error: 'Backend unreachable', details: String(err) }, { status: 502 })
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -0,0 +1,23 @@
|
|||||||
|
import { NextRequest, NextResponse } from 'next/server'
|
||||||
|
|
||||||
|
const BACKEND_URL = process.env.BACKEND_URL || 'http://backend-compliance:8002'
|
||||||
|
|
||||||
|
function tenantHeader(request: NextRequest): string {
|
||||||
|
return request.headers.get('x-tenant-id') || '00000000-0000-0000-0000-000000000001'
|
||||||
|
}
|
||||||
|
|
||||||
|
export async function GET(request: NextRequest) {
|
||||||
|
try {
|
||||||
|
const resp = await fetch(`${BACKEND_URL}/api/v1/quaidal/criteria`, {
|
||||||
|
headers: { 'X-Tenant-ID': tenantHeader(request) },
|
||||||
|
cache: 'no-store',
|
||||||
|
})
|
||||||
|
const body = await resp.text()
|
||||||
|
return new NextResponse(body, {
|
||||||
|
status: resp.status,
|
||||||
|
headers: { 'Content-Type': resp.headers.get('Content-Type') || 'application/json' },
|
||||||
|
})
|
||||||
|
} catch (err) {
|
||||||
|
return NextResponse.json({ error: 'Backend unreachable', details: String(err) }, { status: 502 })
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -0,0 +1,23 @@
|
|||||||
|
import { NextRequest, NextResponse } from 'next/server'
|
||||||
|
|
||||||
|
const BACKEND_URL = process.env.BACKEND_URL || 'http://backend-compliance:8002'
|
||||||
|
|
||||||
|
function tenantHeader(request: NextRequest): string {
|
||||||
|
return request.headers.get('x-tenant-id') || '00000000-0000-0000-0000-000000000001'
|
||||||
|
}
|
||||||
|
|
||||||
|
export async function GET(request: NextRequest) {
|
||||||
|
try {
|
||||||
|
const resp = await fetch(`${BACKEND_URL}/api/v1/quaidal/stats`, {
|
||||||
|
headers: { 'X-Tenant-ID': tenantHeader(request) },
|
||||||
|
cache: 'no-store',
|
||||||
|
})
|
||||||
|
const body = await resp.text()
|
||||||
|
return new NextResponse(body, {
|
||||||
|
status: resp.status,
|
||||||
|
headers: { 'Content-Type': resp.headers.get('Content-Type') || 'application/json' },
|
||||||
|
})
|
||||||
|
} catch (err) {
|
||||||
|
return NextResponse.json({ error: 'Backend unreachable', details: String(err) }, { status: 502 })
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -73,6 +73,8 @@ interface HistoryEntry {
|
|||||||
export function ComplianceCheckTab() {
|
export function ComplianceCheckTab() {
|
||||||
const [docs, setDocs] = useState<DocsState>(initState)
|
const [docs, setDocs] = useState<DocsState>(initState)
|
||||||
const [useAgent, setUseAgent] = useState(false)
|
const [useAgent, setUseAgent] = useState(false)
|
||||||
|
const [tdmOverride, setTdmOverride] = useState(false)
|
||||||
|
const [tdmOverrideReason, setTdmOverrideReason] = useState('')
|
||||||
const [loading, setLoading] = useState(false)
|
const [loading, setLoading] = useState(false)
|
||||||
const [progress, setProgress] = useState('')
|
const [progress, setProgress] = useState('')
|
||||||
const [progressPct, setProgressPct] = useState(0)
|
const [progressPct, setProgressPct] = useState(0)
|
||||||
@@ -119,11 +121,9 @@ export function ComplianceCheckTab() {
|
|||||||
localStorage.removeItem(STORAGE_KEY_CHECK_ID); setActiveCheckId('')
|
localStorage.removeItem(STORAGE_KEY_CHECK_ID); setActiveCheckId('')
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
if (data.status === 'failed' || data.status === 'not_found') {
|
if (['failed', 'not_found', 'skipped_tdm'].includes(data.status)) {
|
||||||
if (data.status === 'failed') setError(data.error || 'Pruefung fehlgeschlagen')
|
if (data.status !== 'not_found') setError(data.error || (data.status === 'skipped_tdm' ? 'TDM-Vorbehalt erkannt — Crawl uebersprungen' : 'Pruefung fehlgeschlagen'))
|
||||||
setProgress(''); setProgressPct(0); setLoading(false)
|
setProgress(''); setProgressPct(0); setLoading(false); localStorage.removeItem(STORAGE_KEY_CHECK_ID); setActiveCheckId(''); return
|
||||||
localStorage.removeItem(STORAGE_KEY_CHECK_ID); setActiveCheckId('')
|
|
||||||
return
|
|
||||||
}
|
}
|
||||||
} catch { /* retry */ }
|
} catch { /* retry */ }
|
||||||
}
|
}
|
||||||
@@ -199,6 +199,8 @@ export function ComplianceCheckTab() {
|
|||||||
body: JSON.stringify({
|
body: JSON.stringify({
|
||||||
documents: entries,
|
documents: entries,
|
||||||
use_agent: useAgent,
|
use_agent: useAgent,
|
||||||
|
tdm_override: tdmOverride && tdmOverrideReason.trim().length >= 10,
|
||||||
|
tdm_override_reason: tdmOverrideReason.trim(),
|
||||||
}),
|
}),
|
||||||
})
|
})
|
||||||
if (!startRes.ok) throw new Error(`Pruefung konnte nicht gestartet werden: ${startRes.status}`)
|
if (!startRes.ok) throw new Error(`Pruefung konnte nicht gestartet werden: ${startRes.status}`)
|
||||||
@@ -236,9 +238,9 @@ export function ComplianceCheckTab() {
|
|||||||
localStorage.setItem(STORAGE_KEY_HISTORY, JSON.stringify(updated))
|
localStorage.setItem(STORAGE_KEY_HISTORY, JSON.stringify(updated))
|
||||||
break
|
break
|
||||||
}
|
}
|
||||||
if (pollData.status === 'failed') {
|
if (['failed', 'skipped_tdm'].includes(pollData.status)) {
|
||||||
localStorage.removeItem(STORAGE_KEY_CHECK_ID); setActiveCheckId('')
|
localStorage.removeItem(STORAGE_KEY_CHECK_ID); setActiveCheckId('')
|
||||||
throw new Error(pollData.error || 'Pruefung fehlgeschlagen')
|
throw new Error(pollData.error || (pollData.status === 'skipped_tdm' ? 'TDM-Vorbehalt' : 'Pruefung fehlgeschlagen'))
|
||||||
}
|
}
|
||||||
attempts++
|
attempts++
|
||||||
}
|
}
|
||||||
@@ -321,10 +323,15 @@ export function ComplianceCheckTab() {
|
|||||||
</span>
|
</span>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
|
<div className="bg-amber-50/60 border border-amber-200 rounded-lg p-3 space-y-2">
|
||||||
|
<label className="flex items-start gap-2 cursor-pointer"><input type="checkbox" checked={tdmOverride} onChange={e => setTdmOverride(e.target.checked)} className="mt-0.5 accent-amber-600" /><span className="text-xs text-amber-900"><strong>Schriftliche Crawl-Erlaubnis vorhanden</strong> — uebergeht TDM-Vorbehalte (robots.txt / ai.txt)</span></label>
|
||||||
|
{tdmOverride && <input type="text" value={tdmOverrideReason} onChange={e => setTdmOverrideReason(e.target.value)} placeholder="z.B. Auftragsbeziehung Safetykon GmbH, Email Hr. X vom 18.05.2026" className="w-full px-3 py-2 text-xs border border-amber-300 rounded bg-white" />}
|
||||||
|
{tdmOverride && tdmOverrideReason.trim().length < 10 && <p className="text-[10px] text-amber-700">Pflicht: Reason mit min. 10 Zeichen (Audit-Spur).</p>}
|
||||||
|
</div>
|
||||||
{/* Submit button */}
|
{/* Submit button */}
|
||||||
<button
|
<button
|
||||||
onClick={handleSubmit}
|
onClick={handleSubmit}
|
||||||
disabled={loading || filledCount === 0}
|
disabled={loading || filledCount === 0 || (tdmOverride && tdmOverrideReason.trim().length < 10)}
|
||||||
className="w-full px-4 py-3 bg-purple-600 text-white rounded-lg font-medium hover:bg-purple-700 disabled:opacity-50 transition-colors text-sm flex items-center justify-center gap-2"
|
className="w-full px-4 py-3 bg-purple-600 text-white rounded-lg font-medium hover:bg-purple-700 disabled:opacity-50 transition-colors text-sm flex items-center justify-center gap-2"
|
||||||
>
|
>
|
||||||
{loading ? (
|
{loading ? (
|
||||||
|
|||||||
@@ -0,0 +1,275 @@
|
|||||||
|
'use client'
|
||||||
|
|
||||||
|
import React, { useEffect, useMemo, useState } from 'react'
|
||||||
|
|
||||||
|
type Finding = {
|
||||||
|
id: number
|
||||||
|
source_type: string
|
||||||
|
doc_type: string
|
||||||
|
severity: string
|
||||||
|
status: string
|
||||||
|
regulation: string
|
||||||
|
label: string
|
||||||
|
hint: string
|
||||||
|
action_recipe: Record<string, string>
|
||||||
|
anchor_excerpt: string
|
||||||
|
anchor_conf: number
|
||||||
|
vendor_name: string
|
||||||
|
category: string
|
||||||
|
payload: Record<string, unknown>
|
||||||
|
}
|
||||||
|
|
||||||
|
type Summary = {
|
||||||
|
total: number
|
||||||
|
by_source: Record<string, number>
|
||||||
|
by_severity: Record<string, number>
|
||||||
|
by_status: Record<string, number>
|
||||||
|
by_doc_type: Record<string, number>
|
||||||
|
}
|
||||||
|
|
||||||
|
type Resp = {
|
||||||
|
found: boolean
|
||||||
|
summary: Summary
|
||||||
|
count: number
|
||||||
|
findings: Finding[]
|
||||||
|
}
|
||||||
|
|
||||||
|
const SOURCE_LABEL: Record<string, string> = {
|
||||||
|
all: 'Alle Quellen',
|
||||||
|
mc: 'Master-Controls',
|
||||||
|
pflichtangabe: 'Pflichtangaben',
|
||||||
|
vendor: 'Vendor-Findings',
|
||||||
|
redundanz: 'Redundanzen',
|
||||||
|
}
|
||||||
|
|
||||||
|
const SEVERITY_COLOR: Record<string, string> = {
|
||||||
|
CRITICAL: 'bg-red-600 text-white',
|
||||||
|
HIGH: 'bg-red-100 text-red-800',
|
||||||
|
MEDIUM: 'bg-amber-100 text-amber-800',
|
||||||
|
LOW: 'bg-blue-100 text-blue-800',
|
||||||
|
INFO: 'bg-gray-100 text-gray-600',
|
||||||
|
}
|
||||||
|
|
||||||
|
const STATUS_LABEL: Record<string, string> = {
|
||||||
|
failed: 'Fail',
|
||||||
|
passed: 'Pass',
|
||||||
|
skipped: 'Skip',
|
||||||
|
na: 'N/A',
|
||||||
|
info: 'Info',
|
||||||
|
}
|
||||||
|
|
||||||
|
const SEVERITY_OPTS = ['all', 'CRITICAL', 'HIGH', 'MEDIUM', 'LOW', 'INFO']
|
||||||
|
const STATUS_OPTS = ['all', 'failed', 'passed', 'skipped', 'na', 'info']
|
||||||
|
|
||||||
|
export default function FindingsTab({ checkId }: { checkId: string }) {
|
||||||
|
const [data, setData] = useState<Resp | null>(null)
|
||||||
|
const [loading, setLoading] = useState(true)
|
||||||
|
const [error, setError] = useState<string | null>(null)
|
||||||
|
const [source, setSource] = useState('all')
|
||||||
|
const [severity, setSeverity] = useState('all')
|
||||||
|
const [docType, setDocType] = useState('all')
|
||||||
|
const [status, setStatus] = useState('failed')
|
||||||
|
const [q, setQ] = useState('')
|
||||||
|
const [expanded, setExpanded] = useState<number | null>(null)
|
||||||
|
|
||||||
|
useEffect(() => {
|
||||||
|
let cancelled = false
|
||||||
|
setLoading(true)
|
||||||
|
const qs = new URLSearchParams({
|
||||||
|
source, severity, doc_type: docType, status, q, limit: '1500',
|
||||||
|
}).toString()
|
||||||
|
fetch(`/api/sdk/v1/agent/findings/${checkId}?${qs}`)
|
||||||
|
.then(r => r.json())
|
||||||
|
.then(d => { if (!cancelled) setData(d) })
|
||||||
|
.catch(e => { if (!cancelled) setError(String(e)) })
|
||||||
|
.finally(() => { if (!cancelled) setLoading(false) })
|
||||||
|
return () => { cancelled = true }
|
||||||
|
}, [checkId, source, severity, docType, status, q])
|
||||||
|
|
||||||
|
const docTypes = useMemo(
|
||||||
|
() => Object.keys(data?.summary?.by_doc_type ?? {}).filter(d => d !== '-').sort(),
|
||||||
|
[data],
|
||||||
|
)
|
||||||
|
|
||||||
|
const csvExport = () => {
|
||||||
|
const rows = data?.findings ?? []
|
||||||
|
const head = ['Quelle', 'Doc', 'Severity', 'Status', 'Regulation', 'Label', 'Vendor', 'Hint']
|
||||||
|
const lines = [head.join(',')]
|
||||||
|
for (const r of rows) {
|
||||||
|
const cells = [
|
||||||
|
r.source_type, r.doc_type, r.severity, r.status,
|
||||||
|
r.regulation, r.label, r.vendor_name, r.hint,
|
||||||
|
].map(c => `"${String(c ?? '').replace(/"/g, '""').replace(/\n/g, ' ')}"`)
|
||||||
|
lines.push(cells.join(','))
|
||||||
|
}
|
||||||
|
const blob = new Blob([lines.join('\n')], { type: 'text/csv;charset=utf-8' })
|
||||||
|
const url = URL.createObjectURL(blob)
|
||||||
|
const a = document.createElement('a')
|
||||||
|
a.href = url
|
||||||
|
a.download = `findings-${checkId}.csv`
|
||||||
|
a.click()
|
||||||
|
URL.revokeObjectURL(url)
|
||||||
|
}
|
||||||
|
|
||||||
|
if (loading && !data) return <div className="p-6 text-sm text-gray-500">Lade Voll-Audit…</div>
|
||||||
|
if (error) return <div className="p-6 text-sm text-red-600">Fehler: {error}</div>
|
||||||
|
if (!data?.found) {
|
||||||
|
return (
|
||||||
|
<div className="p-6 text-sm text-gray-500">
|
||||||
|
Keine unified findings für diesen Run gespeichert (alter Run vor P5?).
|
||||||
|
</div>
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
const sum = data.summary
|
||||||
|
const findings = data.findings
|
||||||
|
|
||||||
|
return (
|
||||||
|
<div className="space-y-4">
|
||||||
|
{/* Summary Cards */}
|
||||||
|
<div className="grid grid-cols-2 md:grid-cols-4 gap-3 text-xs">
|
||||||
|
{Object.entries(SOURCE_LABEL).filter(([k]) => k !== 'all').map(([k, label]) => {
|
||||||
|
const count = sum.by_source?.[k] ?? 0
|
||||||
|
return (
|
||||||
|
<button key={k}
|
||||||
|
onClick={() => setSource(source === k ? 'all' : k)}
|
||||||
|
className={`text-left rounded-lg border px-3 py-2 transition ${
|
||||||
|
source === k
|
||||||
|
? 'border-blue-500 bg-blue-50 text-blue-900'
|
||||||
|
: 'border-gray-200 hover:border-gray-300 bg-white'
|
||||||
|
}`}>
|
||||||
|
<div className="text-[10px] uppercase tracking-wide text-gray-500">{label}</div>
|
||||||
|
<div className="text-lg font-semibold">{count}</div>
|
||||||
|
</button>
|
||||||
|
)
|
||||||
|
})}
|
||||||
|
</div>
|
||||||
|
|
||||||
|
{/* Filter row */}
|
||||||
|
<div className="flex flex-wrap gap-2 items-center text-xs">
|
||||||
|
<select value={severity} onChange={e => setSeverity(e.target.value)}
|
||||||
|
className="border border-gray-200 rounded px-2 py-1">
|
||||||
|
{SEVERITY_OPTS.map(s => (
|
||||||
|
<option key={s} value={s}>
|
||||||
|
{s === 'all' ? 'Alle Severities' : s}
|
||||||
|
{s !== 'all' && sum.by_severity?.[s] != null ? ` (${sum.by_severity[s]})` : ''}
|
||||||
|
</option>
|
||||||
|
))}
|
||||||
|
</select>
|
||||||
|
<select value={status} onChange={e => setStatus(e.target.value)}
|
||||||
|
className="border border-gray-200 rounded px-2 py-1">
|
||||||
|
{STATUS_OPTS.map(s => (
|
||||||
|
<option key={s} value={s}>
|
||||||
|
{s === 'all' ? 'Alle Status' : STATUS_LABEL[s] ?? s}
|
||||||
|
{s !== 'all' && sum.by_status?.[s] != null ? ` (${sum.by_status[s]})` : ''}
|
||||||
|
</option>
|
||||||
|
))}
|
||||||
|
</select>
|
||||||
|
<select value={docType} onChange={e => setDocType(e.target.value)}
|
||||||
|
className="border border-gray-200 rounded px-2 py-1">
|
||||||
|
<option value="all">Alle Doc-Types</option>
|
||||||
|
{docTypes.map(d => (
|
||||||
|
<option key={d} value={d}>{d} ({sum.by_doc_type?.[d] ?? 0})</option>
|
||||||
|
))}
|
||||||
|
</select>
|
||||||
|
<input value={q} onChange={e => setQ(e.target.value)}
|
||||||
|
placeholder="Suche Label / Anbieter…"
|
||||||
|
className="border border-gray-200 rounded px-2 py-1 min-w-[180px]" />
|
||||||
|
<button onClick={csvExport}
|
||||||
|
className="ml-auto border border-gray-200 hover:border-gray-300 rounded px-2 py-1">
|
||||||
|
CSV exportieren
|
||||||
|
</button>
|
||||||
|
<span className="text-gray-500">{data.count} Treffer</span>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
{/* Findings table */}
|
||||||
|
<div className="border rounded-lg overflow-hidden">
|
||||||
|
<table className="w-full text-xs">
|
||||||
|
<thead className="bg-gray-50 text-gray-600">
|
||||||
|
<tr>
|
||||||
|
<th className="px-3 py-2 text-left">Quelle</th>
|
||||||
|
<th className="px-3 py-2 text-left">Doc</th>
|
||||||
|
<th className="px-3 py-2 text-left">Sev</th>
|
||||||
|
<th className="px-3 py-2 text-left">Status</th>
|
||||||
|
<th className="px-3 py-2 text-left">Finding</th>
|
||||||
|
</tr>
|
||||||
|
</thead>
|
||||||
|
<tbody>
|
||||||
|
{findings.map(f => (
|
||||||
|
<React.Fragment key={f.id}>
|
||||||
|
<tr className="border-t cursor-pointer hover:bg-gray-50"
|
||||||
|
onClick={() => setExpanded(expanded === f.id ? null : f.id)}>
|
||||||
|
<td className="px-3 py-2 text-gray-500 capitalize">{f.source_type}</td>
|
||||||
|
<td className="px-3 py-2 text-gray-700">{f.doc_type === '-' ? '—' : f.doc_type}</td>
|
||||||
|
<td className="px-3 py-2">
|
||||||
|
<span className={`px-2 py-0.5 rounded text-[10px] font-medium ${
|
||||||
|
SEVERITY_COLOR[f.severity] || 'bg-gray-100'
|
||||||
|
}`}>{f.severity}</span>
|
||||||
|
</td>
|
||||||
|
<td className="px-3 py-2 text-gray-600">{STATUS_LABEL[f.status] ?? f.status}</td>
|
||||||
|
<td className="px-3 py-2 text-gray-900">
|
||||||
|
{f.label}
|
||||||
|
{f.vendor_name && (
|
||||||
|
<span className="ml-2 text-[10px] text-gray-400">
|
||||||
|
· {f.vendor_name}
|
||||||
|
</span>
|
||||||
|
)}
|
||||||
|
{(() => {
|
||||||
|
const rl = String(f.payload?.risk_label ?? '')
|
||||||
|
if (!rl) return null
|
||||||
|
const cls = rl === 'kritisch' ? 'bg-red-600 text-white' :
|
||||||
|
rl === 'hoch' ? 'bg-red-100 text-red-800' :
|
||||||
|
rl === 'mittel' ? 'bg-amber-100 text-amber-800' :
|
||||||
|
rl === 'gering' ? 'bg-green-50 text-green-700' :
|
||||||
|
'bg-gray-100 text-gray-500'
|
||||||
|
return <span className={`ml-2 px-1.5 py-0.5 rounded text-[10px] font-medium ${cls}`}>Risk: {rl}</span>
|
||||||
|
})()}
|
||||||
|
</td>
|
||||||
|
</tr>
|
||||||
|
{expanded === f.id && (
|
||||||
|
<tr className="bg-gray-50/50">
|
||||||
|
<td colSpan={5} className="px-3 py-3 text-xs space-y-2">
|
||||||
|
{f.hint && (
|
||||||
|
<div className="text-gray-700">{f.hint}</div>
|
||||||
|
)}
|
||||||
|
{f.action_recipe?.fix_text && (
|
||||||
|
<div className="bg-amber-50 border-l-2 border-amber-300 pl-3 py-2">
|
||||||
|
<div className="font-medium text-amber-800 mb-1">Empfehlung</div>
|
||||||
|
<div className="whitespace-pre-line text-amber-900">
|
||||||
|
{f.action_recipe.fix_text}
|
||||||
|
</div>
|
||||||
|
{f.action_recipe.where && (
|
||||||
|
<div className="text-[10px] text-amber-700 mt-1">
|
||||||
|
Einfuegen in: {f.action_recipe.where}
|
||||||
|
</div>
|
||||||
|
)}
|
||||||
|
</div>
|
||||||
|
)}
|
||||||
|
{f.anchor_excerpt && (
|
||||||
|
<div className="bg-blue-50 border-l-2 border-blue-300 pl-3 py-2">
|
||||||
|
<div className="font-medium text-blue-800 mb-1">
|
||||||
|
Fundstelle im Dokument (Konfidenz {Math.round((f.anchor_conf || 0) * 100)}%)
|
||||||
|
</div>
|
||||||
|
<div className="italic text-blue-900">"{f.anchor_excerpt}"</div>
|
||||||
|
</div>
|
||||||
|
)}
|
||||||
|
<div className="text-[10px] text-gray-400">
|
||||||
|
Source: {f.source_type} · Regulation: {f.regulation || '—'}
|
||||||
|
{f.category && ` · Kategorie: ${f.category}`}
|
||||||
|
</div>
|
||||||
|
</td>
|
||||||
|
</tr>
|
||||||
|
)}
|
||||||
|
</React.Fragment>
|
||||||
|
))}
|
||||||
|
{findings.length === 0 && (
|
||||||
|
<tr><td colSpan={5} className="px-3 py-6 text-center text-gray-400">
|
||||||
|
Keine Findings fuer die aktuellen Filter.
|
||||||
|
</td></tr>
|
||||||
|
)}
|
||||||
|
</tbody>
|
||||||
|
</table>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
)
|
||||||
|
}
|
||||||
@@ -2,6 +2,7 @@
|
|||||||
|
|
||||||
import React, { useEffect, useState, useMemo } from 'react'
|
import React, { useEffect, useState, useMemo } from 'react'
|
||||||
import { use as useUnwrap } from 'react'
|
import { use as useUnwrap } from 'react'
|
||||||
|
import FindingsTab from './FindingsTab'
|
||||||
|
|
||||||
type MCRow = {
|
type MCRow = {
|
||||||
id: number
|
id: number
|
||||||
@@ -41,19 +42,43 @@ type AuditResponse = {
|
|||||||
results?: MCRow[]
|
results?: MCRow[]
|
||||||
}
|
}
|
||||||
|
|
||||||
const SEVERITY_COLOR: Record<string, string> = {
|
// P8: MC-Audit ist eine Checkliste, KEINE Severity-Drohung. Statt
|
||||||
CRITICAL: 'bg-red-600 text-white',
|
// rotem HIGH-Badge zeigen wir die Quellen-Prioritaet (Gesetz vs.
|
||||||
HIGH: 'bg-red-100 text-red-800',
|
// Behoerden-Leitlinie vs. Best-Practice) und einen 3-Tier-Status
|
||||||
MEDIUM: 'bg-amber-100 text-amber-800',
|
// (erfuellt / nicht erfuellt / selbst pruefen).
|
||||||
LOW: 'bg-blue-100 text-blue-800',
|
|
||||||
INFO: 'bg-gray-100 text-gray-600',
|
const PRIORITY_BADGE: Record<string, string> = {
|
||||||
|
Gesetz: 'bg-slate-800 text-white',
|
||||||
|
'Behoerden-Leitlinie': 'bg-blue-100 text-blue-800',
|
||||||
|
'Best-Practice': 'bg-gray-100 text-gray-600',
|
||||||
|
'—': 'bg-gray-50 text-gray-400',
|
||||||
|
}
|
||||||
|
|
||||||
|
function regulationToPriority(reg: string): keyof typeof PRIORITY_BADGE {
|
||||||
|
const r = (reg || '').toLowerCase()
|
||||||
|
if (/dsgvo|gdpr|eprivacy|tdddg|tkg|bdsg|ttdsg/.test(r)) return 'Gesetz'
|
||||||
|
if (/edpb|dsk|cnil|lfdi|eugh|orientierungshilfe|leitlinie|guideline/.test(r))
|
||||||
|
return 'Behoerden-Leitlinie'
|
||||||
|
if (/iso|nist|bsi|cobit|sox/.test(r)) return 'Best-Practice'
|
||||||
|
return '—'
|
||||||
|
}
|
||||||
|
|
||||||
|
const _CONDITIONAL_RE = /\b(falls|sofern|wenn|soweit|ggf\.|gegebenenfalls)\b/i
|
||||||
|
|
||||||
|
function rowReviewStatus(r: MCRow): 'pass' | 'fail' | 'review' | 'na' {
|
||||||
|
if (r.passed) return 'pass'
|
||||||
|
if (r.skipped) return 'na'
|
||||||
|
// failed: harter Fail nur bei matched_text-Beleg ODER nicht-konditionalem Label
|
||||||
|
if (!r.matched_text && _CONDITIONAL_RE.test(r.label || '')) return 'review'
|
||||||
|
return 'fail'
|
||||||
}
|
}
|
||||||
|
|
||||||
const STATUS_FILTERS = [
|
const STATUS_FILTERS = [
|
||||||
{ value: 'all', label: 'Alle' },
|
{ value: 'all', label: 'Alle' },
|
||||||
{ value: 'failed', label: 'Nur Fail' },
|
{ value: 'fail', label: 'Nicht erfuellt' },
|
||||||
{ value: 'passed', label: 'Nur Pass' },
|
{ value: 'review', label: 'Selbst pruefen' },
|
||||||
{ value: 'skipped', label: 'Nur Skipped' },
|
{ value: 'pass', label: 'Erfuellt' },
|
||||||
|
{ value: 'na', label: 'Nicht anwendbar' },
|
||||||
] as const
|
] as const
|
||||||
|
|
||||||
export default function AuditPage(
|
export default function AuditPage(
|
||||||
@@ -63,10 +88,11 @@ export default function AuditPage(
|
|||||||
const [data, setData] = useState<AuditResponse | null>(null)
|
const [data, setData] = useState<AuditResponse | null>(null)
|
||||||
const [loading, setLoading] = useState(true)
|
const [loading, setLoading] = useState(true)
|
||||||
const [error, setError] = useState<string | null>(null)
|
const [error, setError] = useState<string | null>(null)
|
||||||
const [filterStatus, setFilterStatus] = useState<typeof STATUS_FILTERS[number]['value']>('failed')
|
const [filterStatus, setFilterStatus] = useState<typeof STATUS_FILTERS[number]['value']>('fail')
|
||||||
const [filterReg, setFilterReg] = useState<string>('')
|
const [filterReg, setFilterReg] = useState<string>('')
|
||||||
const [filterDoc, setFilterDoc] = useState<string>('')
|
const [filterDoc, setFilterDoc] = useState<string>('')
|
||||||
const [expanded, setExpanded] = useState<number | null>(null)
|
const [expanded, setExpanded] = useState<number | null>(null)
|
||||||
|
const [tab, setTab] = useState<'mc' | 'all'>('all')
|
||||||
|
|
||||||
useEffect(() => {
|
useEffect(() => {
|
||||||
let cancelled = false
|
let cancelled = false
|
||||||
@@ -90,9 +116,7 @@ export default function AuditPage(
|
|||||||
)
|
)
|
||||||
|
|
||||||
const filtered = allRows.filter(r => {
|
const filtered = allRows.filter(r => {
|
||||||
if (filterStatus === 'failed' && (r.passed || r.skipped)) return false
|
if (filterStatus !== 'all' && rowReviewStatus(r) !== filterStatus) return false
|
||||||
if (filterStatus === 'passed' && !r.passed) return false
|
|
||||||
if (filterStatus === 'skipped' && !r.skipped) return false
|
|
||||||
if (filterReg && r.regulation !== filterReg) return false
|
if (filterReg && r.regulation !== filterReg) return false
|
||||||
if (filterDoc && r.doc_type !== filterDoc) return false
|
if (filterDoc && r.doc_type !== filterDoc) return false
|
||||||
return true
|
return true
|
||||||
@@ -127,6 +151,25 @@ export default function AuditPage(
|
|||||||
</p>
|
</p>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
|
{/* Tab switcher */}
|
||||||
|
<div className="flex gap-2 border-b border-gray-200">
|
||||||
|
{([
|
||||||
|
{ key: 'all', label: 'Voll-Audit (alle Findings)' },
|
||||||
|
{ key: 'mc', label: 'Nur MC-Scorecard' },
|
||||||
|
] as const).map(t => (
|
||||||
|
<button key={t.key}
|
||||||
|
onClick={() => setTab(t.key)}
|
||||||
|
className={`px-4 py-2 text-sm border-b-2 -mb-px transition ${
|
||||||
|
tab === t.key
|
||||||
|
? 'border-blue-600 text-blue-700 font-medium'
|
||||||
|
: 'border-transparent text-gray-500 hover:text-gray-700'
|
||||||
|
}`}>{t.label}</button>
|
||||||
|
))}
|
||||||
|
</div>
|
||||||
|
|
||||||
|
{tab === 'all' && <FindingsTab checkId={checkId} />}
|
||||||
|
|
||||||
|
{tab === 'mc' && <>
|
||||||
{/* Scorecard */}
|
{/* Scorecard */}
|
||||||
<div className="border rounded-lg overflow-hidden">
|
<div className="border rounded-lg overflow-hidden">
|
||||||
<div className="px-4 py-3 bg-blue-50 border-b border-blue-100">
|
<div className="px-4 py-3 bg-blue-50 border-b border-blue-100">
|
||||||
@@ -212,7 +255,7 @@ export default function AuditPage(
|
|||||||
<th className="px-3 py-2 text-left">Doc</th>
|
<th className="px-3 py-2 text-left">Doc</th>
|
||||||
<th className="px-3 py-2 text-left">Regulation</th>
|
<th className="px-3 py-2 text-left">Regulation</th>
|
||||||
<th className="px-3 py-2 text-left">MC</th>
|
<th className="px-3 py-2 text-left">MC</th>
|
||||||
<th className="px-3 py-2 text-left">Severity</th>
|
<th className="px-3 py-2 text-left">Prioritaet</th>
|
||||||
</tr>
|
</tr>
|
||||||
</thead>
|
</thead>
|
||||||
<tbody>
|
<tbody>
|
||||||
@@ -221,21 +264,26 @@ export default function AuditPage(
|
|||||||
<tr className="border-t cursor-pointer hover:bg-gray-50"
|
<tr className="border-t cursor-pointer hover:bg-gray-50"
|
||||||
onClick={() => setExpanded(expanded === row.id ? null : row.id)}>
|
onClick={() => setExpanded(expanded === row.id ? null : row.id)}>
|
||||||
<td className="px-3 py-2">
|
<td className="px-3 py-2">
|
||||||
{row.passed ? (
|
{(() => {
|
||||||
<span className="text-green-600">✓</span>
|
const st = rowReviewStatus(row)
|
||||||
) : row.skipped ? (
|
if (st === 'pass') return <span className="text-green-600" title="Erfuellt">✓</span>
|
||||||
<span className="text-gray-400">—</span>
|
if (st === 'na') return <span className="text-gray-400" title="Nicht anwendbar">—</span>
|
||||||
) : (
|
if (st === 'review') return <span className="text-amber-600" title="Selbst pruefen">?</span>
|
||||||
<span className="text-red-600">✗</span>
|
return <span className="text-red-600" title="Nicht erfuellt">✗</span>
|
||||||
)}
|
})()}
|
||||||
</td>
|
</td>
|
||||||
<td className="px-3 py-2 text-gray-700">{row.doc_type}</td>
|
<td className="px-3 py-2 text-gray-700">{row.doc_type}</td>
|
||||||
<td className="px-3 py-2 text-gray-500">{row.regulation || '—'}</td>
|
<td className="px-3 py-2 text-gray-500">{row.regulation || '—'}</td>
|
||||||
<td className="px-3 py-2 text-gray-900">{row.label}</td>
|
<td className="px-3 py-2 text-gray-900">{row.label}</td>
|
||||||
<td className="px-3 py-2">
|
<td className="px-3 py-2">
|
||||||
<span className={`px-2 py-0.5 rounded text-[10px] font-medium ${
|
{(() => {
|
||||||
SEVERITY_COLOR[row.severity] || 'bg-gray-100'
|
const prio = regulationToPriority(row.regulation)
|
||||||
}`}>{row.severity || '—'}</span>
|
return (
|
||||||
|
<span className={`px-2 py-0.5 rounded text-[10px] font-medium ${PRIORITY_BADGE[prio]}`}>
|
||||||
|
{prio}
|
||||||
|
</span>
|
||||||
|
)
|
||||||
|
})()}
|
||||||
</td>
|
</td>
|
||||||
</tr>
|
</tr>
|
||||||
{expanded === row.id && (
|
{expanded === row.id && (
|
||||||
@@ -272,6 +320,7 @@ export default function AuditPage(
|
|||||||
</tbody>
|
</tbody>
|
||||||
</table>
|
</table>
|
||||||
</div>
|
</div>
|
||||||
|
</>}
|
||||||
</div>
|
</div>
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -0,0 +1,45 @@
|
|||||||
|
'use client'
|
||||||
|
|
||||||
|
import Link from 'next/link'
|
||||||
|
|
||||||
|
interface Props {
|
||||||
|
/** Risk classification of the AI system. Tile is only rendered for high_risk / unacceptable. */
|
||||||
|
riskLevel: string
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Renders a tile pointing to the BSI QUAIDAL-based data-quality control tab.
|
||||||
|
* AI Act Article 10 obligations (training-data quality) apply only to high-risk
|
||||||
|
* systems, so the tile is skipped for limited / minimal / not-applicable classes.
|
||||||
|
*/
|
||||||
|
export function Art10Tile({ riskLevel }: Props) {
|
||||||
|
if (riskLevel !== 'high_risk' && riskLevel !== 'unacceptable') return null
|
||||||
|
|
||||||
|
return (
|
||||||
|
<Link
|
||||||
|
href="/sdk/quality?category=data_quality"
|
||||||
|
className="block mt-3 p-3 rounded-lg border border-purple-200 bg-purple-50 hover:bg-purple-100 transition-colors"
|
||||||
|
>
|
||||||
|
<div className="flex items-start gap-3">
|
||||||
|
<div className="w-9 h-9 rounded-full bg-purple-200 text-purple-700 flex items-center justify-center shrink-0">
|
||||||
|
<svg className="w-5 h-5" fill="none" stroke="currentColor" viewBox="0 0 24 24">
|
||||||
|
<path strokeLinecap="round" strokeLinejoin="round" strokeWidth={2}
|
||||||
|
d="M3 7v10a2 2 0 002 2h14a2 2 0 002-2V7M3 7l9 6 9-6M3 7l9-4 9 4" />
|
||||||
|
</svg>
|
||||||
|
</div>
|
||||||
|
<div className="flex-1 min-w-0">
|
||||||
|
<div className="text-sm font-semibold text-purple-900">
|
||||||
|
Art. 10 Datenqualität (Hochrisiko-KI)
|
||||||
|
</div>
|
||||||
|
<div className="text-xs text-purple-700 mt-0.5">
|
||||||
|
BSI QUAIDAL Controls: 10 Kriterien, 15 Bausteine, 30 Maßnahmen, 140 Metriken.
|
||||||
|
Klicken zum Öffnen des Trainingsdaten-Qualität-Moduls.
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
<svg className="w-4 h-4 text-purple-500 shrink-0 mt-1" fill="none" stroke="currentColor" viewBox="0 0 24 24">
|
||||||
|
<path strokeLinecap="round" strokeLinejoin="round" strokeWidth={2} d="M9 5l7 7-7 7" />
|
||||||
|
</svg>
|
||||||
|
</div>
|
||||||
|
</Link>
|
||||||
|
)
|
||||||
|
}
|
||||||
@@ -9,6 +9,7 @@ import { RiskPyramid } from './_components/RiskPyramid'
|
|||||||
import { AddSystemForm } from './_components/AddSystemForm'
|
import { AddSystemForm } from './_components/AddSystemForm'
|
||||||
import { AISystemCard } from './_components/AISystemCard'
|
import { AISystemCard } from './_components/AISystemCard'
|
||||||
import DecisionTreeWizard from '@/components/sdk/ai-act/DecisionTreeWizard'
|
import DecisionTreeWizard from '@/components/sdk/ai-act/DecisionTreeWizard'
|
||||||
|
import { Art10Tile } from './_components/Art10Tile'
|
||||||
|
|
||||||
type TabId = 'overview' | 'decision-tree' | 'results'
|
type TabId = 'overview' | 'decision-tree' | 'results'
|
||||||
|
|
||||||
@@ -136,6 +137,7 @@ function SavedResultsTab() {
|
|||||||
Löschen
|
Löschen
|
||||||
</button>
|
</button>
|
||||||
</div>
|
</div>
|
||||||
|
<Art10Tile riskLevel={r.high_risk_result} />
|
||||||
</div>
|
</div>
|
||||||
))}
|
))}
|
||||||
</div>
|
</div>
|
||||||
|
|||||||
@@ -0,0 +1,211 @@
|
|||||||
|
'use client'
|
||||||
|
|
||||||
|
import { useState, useEffect, useCallback } from 'react'
|
||||||
|
import { useParams } from 'next/navigation'
|
||||||
|
|
||||||
|
type Suggestion = {
|
||||||
|
name: string
|
||||||
|
reduction_type: 'design' | 'protection' | 'information' | string
|
||||||
|
description: string
|
||||||
|
source_project_count: number
|
||||||
|
source_project_names: string[]
|
||||||
|
is_customer_standard: boolean
|
||||||
|
has_verified_instances: boolean
|
||||||
|
}
|
||||||
|
|
||||||
|
type ProjectInfo = { customer_name?: string; machine_name?: string }
|
||||||
|
|
||||||
|
// /sdk/iace/[projectId]/customer-standards
|
||||||
|
//
|
||||||
|
// Surfaces mitigations that the expert flagged as "Kundenstandard" (or
|
||||||
|
// successfully verified) in earlier projects of the SAME customer. Picking
|
||||||
|
// one and clicking "Übernehmen" applies it to all matching hazards in the
|
||||||
|
// current project — every match is set to is_relevant=true,
|
||||||
|
// is_customer_standard=true, status='verified'. Saves the round-trip
|
||||||
|
// through Massnahmen + Verifikation for the cases where the safety expert
|
||||||
|
// already knows the answer from a prior plant at the same site.
|
||||||
|
//
|
||||||
|
// Filter "Auch verifizierte einbeziehen" widens the pool beyond strictly
|
||||||
|
// is_customer_standard=true to also include status='verified' rows — useful
|
||||||
|
// when the customer-standard habit is not yet established in the corpus.
|
||||||
|
export default function CustomerStandardsPage() {
|
||||||
|
const params = useParams()
|
||||||
|
const projectId = params.projectId as string
|
||||||
|
|
||||||
|
const [suggestions, setSuggestions] = useState<Suggestion[]>([])
|
||||||
|
const [project, setProject] = useState<ProjectInfo | null>(null)
|
||||||
|
const [loading, setLoading] = useState(true)
|
||||||
|
const [includeVerified, setIncludeVerified] = useState(false)
|
||||||
|
const [importing, setImporting] = useState<string | null>(null)
|
||||||
|
const [importedNames, setImportedNames] = useState<Set<string>>(new Set())
|
||||||
|
const [selected, setSelected] = useState<Set<string>>(new Set())
|
||||||
|
const [error, setError] = useState<string | null>(null)
|
||||||
|
|
||||||
|
const load = useCallback(async () => {
|
||||||
|
setLoading(true)
|
||||||
|
setError(null)
|
||||||
|
try {
|
||||||
|
const [sgRes, prRes] = await Promise.all([
|
||||||
|
fetch(`/api/sdk/v1/iace/projects/${projectId}/customer-standards?include_verified=${includeVerified}`),
|
||||||
|
fetch(`/api/sdk/v1/iace/projects/${projectId}`),
|
||||||
|
])
|
||||||
|
if (sgRes.ok) {
|
||||||
|
const j = await sgRes.json()
|
||||||
|
setSuggestions(j.suggestions || [])
|
||||||
|
}
|
||||||
|
if (prRes.ok) {
|
||||||
|
const j = await prRes.json()
|
||||||
|
const p = j.project || j
|
||||||
|
setProject({ customer_name: p.customer_name, machine_name: p.machine_name })
|
||||||
|
}
|
||||||
|
} catch (e) {
|
||||||
|
setError(e instanceof Error ? e.message : String(e))
|
||||||
|
} finally {
|
||||||
|
setLoading(false)
|
||||||
|
}
|
||||||
|
}, [projectId, includeVerified])
|
||||||
|
|
||||||
|
useEffect(() => { load() }, [load])
|
||||||
|
|
||||||
|
function toggleSelect(name: string) {
|
||||||
|
setSelected((prev) => {
|
||||||
|
const next = new Set(prev)
|
||||||
|
if (next.has(name)) next.delete(name); else next.add(name)
|
||||||
|
return next
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
async function importOne(name: string) {
|
||||||
|
setImporting(name)
|
||||||
|
try {
|
||||||
|
const r = await fetch(`/api/sdk/v1/iace/projects/${projectId}/customer-standards/import`, {
|
||||||
|
method: 'POST', headers: { 'Content-Type': 'application/json' },
|
||||||
|
body: JSON.stringify({ name }),
|
||||||
|
})
|
||||||
|
if (r.ok) {
|
||||||
|
setImportedNames((prev) => new Set(prev).add(name))
|
||||||
|
setSelected((prev) => { const n = new Set(prev); n.delete(name); return n })
|
||||||
|
} else {
|
||||||
|
const j = await r.json().catch(() => null)
|
||||||
|
setError(j?.error || `HTTP ${r.status}`)
|
||||||
|
}
|
||||||
|
} finally {
|
||||||
|
setImporting(null)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
async function importSelected() {
|
||||||
|
const names = Array.from(selected)
|
||||||
|
for (const n of names) {
|
||||||
|
await importOne(n)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (loading) return (
|
||||||
|
<div className="flex items-center justify-center h-64">
|
||||||
|
<div className="animate-spin rounded-full h-8 w-8 border-b-2 border-purple-600" />
|
||||||
|
</div>
|
||||||
|
)
|
||||||
|
|
||||||
|
// No customer set → guide the user to set it first
|
||||||
|
const hasCustomer = !!(project?.customer_name && project.customer_name.trim() !== '')
|
||||||
|
if (!hasCustomer) {
|
||||||
|
return (
|
||||||
|
<div className="space-y-4 max-w-3xl">
|
||||||
|
<h1 className="text-2xl font-bold">Kundenstandards</h1>
|
||||||
|
<div className="rounded-md border border-amber-200 bg-amber-50 px-4 py-3 text-sm text-amber-900">
|
||||||
|
Dieses Projekt hat noch keinen <em>Kundennamen</em>. Damit Massnahmen aus früheren
|
||||||
|
Anlagen desselben Kunden wiederverwendet werden können, trage den Kundennamen
|
||||||
|
unter <a className="text-purple-700 underline" href={`/sdk/iace/${projectId}/order`}>Auftrag → Kunde</a> ein.
|
||||||
|
Sobald der Kundenname gesetzt ist, erscheint hier die Liste der wiederverwendbaren
|
||||||
|
Maßnahmen aus seinen Vorprojekten.
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
return (
|
||||||
|
<div className="space-y-4">
|
||||||
|
<div className="flex items-baseline justify-between">
|
||||||
|
<div>
|
||||||
|
<h1 className="text-2xl font-bold text-gray-900 dark:text-white">Kundenstandards</h1>
|
||||||
|
<p className="mt-1 text-sm text-gray-500">
|
||||||
|
Übernimm Maßnahmen, die der Kunde <strong>{project?.customer_name}</strong> in
|
||||||
|
anderen Anlagen bereits als Standard etabliert hat. Übernehmen setzt sie für alle
|
||||||
|
passenden Gefährdungen <em>relevant</em> und <em>verifiziert</em> ohne Nachweis.
|
||||||
|
</p>
|
||||||
|
</div>
|
||||||
|
<div className="flex items-center gap-3">
|
||||||
|
<label className="flex items-center gap-1.5 text-xs text-gray-600">
|
||||||
|
<input type="checkbox" checked={includeVerified}
|
||||||
|
onChange={(e) => setIncludeVerified(e.target.checked)}
|
||||||
|
className="accent-purple-600" />
|
||||||
|
Auch <em>verifizierte</em> einbeziehen
|
||||||
|
</label>
|
||||||
|
{selected.size > 0 && (
|
||||||
|
<button onClick={importSelected} disabled={!!importing}
|
||||||
|
className="px-3 py-1.5 text-xs bg-purple-600 text-white rounded-lg hover:bg-purple-700 disabled:opacity-50">
|
||||||
|
{importing ? 'Übernehme…' : `${selected.size} übernehmen`}
|
||||||
|
</button>
|
||||||
|
)}
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
{error && <div className="text-red-600 text-sm">Fehler: {error}</div>}
|
||||||
|
|
||||||
|
{suggestions.length === 0 && (
|
||||||
|
<div className="rounded-md border border-gray-200 bg-gray-50 px-4 py-6 text-sm text-gray-600">
|
||||||
|
Keine wiederverwendbaren Maßnahmen für <strong>{project?.customer_name}</strong> gefunden.
|
||||||
|
{!includeVerified && ' Aktiviere „Auch verifizierte einbeziehen" oben rechts, um den Pool zu erweitern.'}
|
||||||
|
</div>
|
||||||
|
)}
|
||||||
|
|
||||||
|
{suggestions.length > 0 && (
|
||||||
|
<div className="bg-white dark:bg-gray-800 rounded-xl border border-gray-200 dark:border-gray-700 overflow-hidden">
|
||||||
|
<div className="grid grid-cols-[28px_2fr_120px_100px_120px] gap-3 px-4 py-2 bg-gray-50 dark:bg-gray-750 text-xs font-medium text-gray-500 uppercase tracking-wider">
|
||||||
|
<div />
|
||||||
|
<div>Massnahme</div>
|
||||||
|
<div className="text-center">Vorprojekte</div>
|
||||||
|
<div>Status</div>
|
||||||
|
<div className="text-right">Aktion</div>
|
||||||
|
</div>
|
||||||
|
{suggestions.map((s) => {
|
||||||
|
const imported = importedNames.has(s.name)
|
||||||
|
return (
|
||||||
|
<div key={s.name} className={`grid grid-cols-[28px_2fr_120px_100px_120px] gap-3 px-4 py-2.5 border-t border-gray-100 dark:border-gray-700 ${imported ? 'bg-green-50/40' : ''} ${selected.has(s.name) ? 'bg-purple-50' : ''}`}>
|
||||||
|
<div className="pt-0.5">
|
||||||
|
<input type="checkbox" checked={selected.has(s.name)} onChange={() => toggleSelect(s.name)} disabled={imported}
|
||||||
|
className="accent-purple-600" />
|
||||||
|
</div>
|
||||||
|
<div className="min-w-0">
|
||||||
|
<div className="text-sm text-gray-900 dark:text-white">{s.name}</div>
|
||||||
|
{s.description && <div className="text-[11px] text-gray-500 mt-0.5 line-clamp-2">{s.description}</div>}
|
||||||
|
{s.source_project_names.length > 0 && (
|
||||||
|
<div className="text-[10px] text-gray-400 mt-1">aus: {s.source_project_names.slice(0,3).join(', ')}{s.source_project_names.length > 3 ? ` (+${s.source_project_names.length - 3})` : ''}</div>
|
||||||
|
)}
|
||||||
|
</div>
|
||||||
|
<div className="text-center self-center">
|
||||||
|
<span className="text-sm font-semibold text-purple-700">{s.source_project_count}×</span>
|
||||||
|
</div>
|
||||||
|
<div className="self-center flex flex-wrap gap-1">
|
||||||
|
{s.is_customer_standard && <span className="text-[10px] px-1.5 py-0.5 rounded bg-blue-100 text-blue-700">Kundenstandard</span>}
|
||||||
|
{s.has_verified_instances && !s.is_customer_standard && <span className="text-[10px] px-1.5 py-0.5 rounded bg-green-100 text-green-700">Verifiziert</span>}
|
||||||
|
</div>
|
||||||
|
<div className="text-right self-center">
|
||||||
|
{imported ? (
|
||||||
|
<span className="text-[11px] text-green-700">✓ Übernommen</span>
|
||||||
|
) : (
|
||||||
|
<button onClick={() => importOne(s.name)} disabled={!!importing}
|
||||||
|
className="px-2.5 py-1 text-[11px] bg-purple-600 text-white rounded hover:bg-purple-700 disabled:opacity-50">
|
||||||
|
{importing === s.name ? 'Übernehme…' : 'Übernehmen'}
|
||||||
|
</button>
|
||||||
|
)}
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
)
|
||||||
|
})}
|
||||||
|
</div>
|
||||||
|
)}
|
||||||
|
</div>
|
||||||
|
)
|
||||||
|
}
|
||||||
@@ -68,10 +68,14 @@ export default function OrderPage() {
|
|||||||
setSaveState('saving')
|
setSaveState('saving')
|
||||||
try {
|
try {
|
||||||
const merged = { ...existingMetaRef.current, order_data: next }
|
const merged = { ...existingMetaRef.current, order_data: next }
|
||||||
|
// Mirror Auftraggeber.Firmenname into the top-level customer_name
|
||||||
|
// column so the Customer-Standards-Reuse feature can index by it.
|
||||||
|
// Empty string → null on the backend, no broken reuse for fresh projects.
|
||||||
|
const customerName = (next.client.company || '').trim()
|
||||||
await fetch(`/api/sdk/v1/iace/projects/${projectId}`, {
|
await fetch(`/api/sdk/v1/iace/projects/${projectId}`, {
|
||||||
method: 'PUT',
|
method: 'PUT',
|
||||||
headers: { 'Content-Type': 'application/json' },
|
headers: { 'Content-Type': 'application/json' },
|
||||||
body: JSON.stringify({ metadata: merged }),
|
body: JSON.stringify({ metadata: merged, customer_name: customerName }),
|
||||||
})
|
})
|
||||||
existingMetaRef.current = merged
|
existingMetaRef.current = merged
|
||||||
setSaveState('saved')
|
setSaveState('saved')
|
||||||
|
|||||||
@@ -16,6 +16,7 @@ const IACE_NAV_ITEMS = [
|
|||||||
{ id: 'mitigations', label: 'Massnahmen', href: '/mitigations', icon: 'shield' },
|
{ id: 'mitigations', label: 'Massnahmen', href: '/mitigations', icon: 'shield' },
|
||||||
{ id: 'clarifications', label: 'Klärungen', href: '/clarifications', icon: 'chat' },
|
{ id: 'clarifications', label: 'Klärungen', href: '/clarifications', icon: 'chat' },
|
||||||
{ id: 'verification', label: 'Verifikation', href: '/verification', icon: 'check' },
|
{ id: 'verification', label: 'Verifikation', href: '/verification', icon: 'check' },
|
||||||
|
{ id: 'customer-standards', label: 'Kundenstandards', href: '/customer-standards', icon: 'building' },
|
||||||
{ id: 'evidence', label: 'Nachweise', href: '/evidence', icon: 'document' },
|
{ id: 'evidence', label: 'Nachweise', href: '/evidence', icon: 'document' },
|
||||||
{ id: 'tech-file', label: 'CE-Akte', href: '/tech-file', icon: 'folder' },
|
{ id: 'tech-file', label: 'CE-Akte', href: '/tech-file', icon: 'folder' },
|
||||||
]
|
]
|
||||||
@@ -67,6 +68,12 @@ function NavIcon({ icon, className }: { icon: string; className?: string }) {
|
|||||||
<path strokeLinecap="round" strokeLinejoin="round" strokeWidth={2} d="M9 12l2 2 4-4m6 2a9 9 0 11-18 0 9 9 0 0118 0z" />
|
<path strokeLinecap="round" strokeLinejoin="round" strokeWidth={2} d="M9 12l2 2 4-4m6 2a9 9 0 11-18 0 9 9 0 0118 0z" />
|
||||||
</svg>
|
</svg>
|
||||||
)
|
)
|
||||||
|
case 'building':
|
||||||
|
return (
|
||||||
|
<svg className={cls} fill="none" viewBox="0 0 24 24" stroke="currentColor">
|
||||||
|
<path strokeLinecap="round" strokeLinejoin="round" strokeWidth={2} d="M19 21V5a2 2 0 00-2-2H7a2 2 0 00-2 2v16m14 0H5m14 0h2m-16 0H3m4-4h2m-2-4h2m-2-4h2m4 8h2m-2-4h2m-2-4h2" />
|
||||||
|
</svg>
|
||||||
|
)
|
||||||
case 'document':
|
case 'document':
|
||||||
return (
|
return (
|
||||||
<svg className={cls} fill="none" viewBox="0 0 24 24" stroke="currentColor">
|
<svg className={cls} fill="none" viewBox="0 0 24 24" stroke="currentColor">
|
||||||
|
|||||||
@@ -0,0 +1,152 @@
|
|||||||
|
'use client'
|
||||||
|
|
||||||
|
import { useEffect, useState } from 'react'
|
||||||
|
import { fetchCriterionTree, type QuaidalControl, type QuaidalCriterionTree } from '../_hooks/useQuaidalData'
|
||||||
|
|
||||||
|
interface Props {
|
||||||
|
sectionId: string
|
||||||
|
onClose: () => void
|
||||||
|
}
|
||||||
|
|
||||||
|
function ControlBlock({ ctrl, badgeColor }: { ctrl: QuaidalControl; badgeColor: string }) {
|
||||||
|
return (
|
||||||
|
<div className="border border-gray-200 rounded-lg p-4 bg-white">
|
||||||
|
<div className="flex items-start justify-between gap-3 mb-2">
|
||||||
|
<h4 className="font-semibold text-gray-900">{ctrl.canonical_name}</h4>
|
||||||
|
<span className={`px-2 py-0.5 text-xs rounded-full ${badgeColor} shrink-0`}>{ctrl.source.section}</span>
|
||||||
|
</div>
|
||||||
|
<p className="text-sm text-gray-600 mb-3 whitespace-pre-line">{ctrl.description}</p>
|
||||||
|
{ctrl.source.url && (
|
||||||
|
<a
|
||||||
|
href={ctrl.source.url}
|
||||||
|
target="_blank"
|
||||||
|
rel="noreferrer noopener"
|
||||||
|
className="text-xs text-purple-600 hover:text-purple-800 underline"
|
||||||
|
>
|
||||||
|
BSI-Quelle ansehen ({ctrl.source.framework})
|
||||||
|
</a>
|
||||||
|
)}
|
||||||
|
</div>
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
export function QuaidalCriterionDetail({ sectionId, onClose }: Props) {
|
||||||
|
const [tree, setTree] = useState<QuaidalCriterionTree | null>(null)
|
||||||
|
const [loading, setLoading] = useState(true)
|
||||||
|
|
||||||
|
useEffect(() => {
|
||||||
|
let active = true
|
||||||
|
setLoading(true)
|
||||||
|
fetchCriterionTree(sectionId).then(t => {
|
||||||
|
if (active) {
|
||||||
|
setTree(t)
|
||||||
|
setLoading(false)
|
||||||
|
}
|
||||||
|
})
|
||||||
|
return () => { active = false }
|
||||||
|
}, [sectionId])
|
||||||
|
|
||||||
|
return (
|
||||||
|
<div className="fixed inset-0 z-50 flex items-center justify-center bg-black/40 p-4">
|
||||||
|
<div className="bg-white rounded-2xl shadow-xl w-full max-w-4xl max-h-[90vh] overflow-hidden flex flex-col">
|
||||||
|
<div className="flex items-center justify-between px-6 py-4 border-b border-gray-200">
|
||||||
|
<div>
|
||||||
|
<div className="text-xs text-gray-500 uppercase tracking-wide">QUAIDAL Kriterium</div>
|
||||||
|
<h2 className="text-xl font-bold text-gray-900">
|
||||||
|
{tree?.criterion.canonical_name || sectionId}
|
||||||
|
</h2>
|
||||||
|
</div>
|
||||||
|
<button
|
||||||
|
onClick={onClose}
|
||||||
|
className="w-8 h-8 rounded-full hover:bg-gray-100 flex items-center justify-center text-gray-500"
|
||||||
|
aria-label="Schliessen"
|
||||||
|
>×</button>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<div className="overflow-y-auto p-6 space-y-6">
|
||||||
|
{loading && <div className="text-center text-gray-400 py-12">Lade...</div>}
|
||||||
|
|
||||||
|
{tree && (
|
||||||
|
<>
|
||||||
|
<div>
|
||||||
|
<h3 className="text-sm font-semibold text-gray-500 uppercase tracking-wide mb-2">
|
||||||
|
Anforderung (eigene Formulierung)
|
||||||
|
</h3>
|
||||||
|
<div className="bg-purple-50 border border-purple-200 rounded-lg p-4">
|
||||||
|
<p className="text-gray-800 whitespace-pre-line">{tree.criterion.description}</p>
|
||||||
|
</div>
|
||||||
|
<div className="mt-3 flex flex-wrap items-center gap-3 text-xs text-gray-500">
|
||||||
|
<span>Regulierung: <span className="font-medium text-gray-700">{tree.criterion.regulation_anchor || '—'}</span></span>
|
||||||
|
<span>Quelle: <span className="font-medium text-gray-700">{tree.criterion.source.framework} {tree.criterion.source.section}</span></span>
|
||||||
|
{tree.criterion.source.url && (
|
||||||
|
<a href={tree.criterion.source.url} target="_blank" rel="noreferrer noopener" className="text-purple-600 hover:text-purple-800 underline">
|
||||||
|
Originalquelle
|
||||||
|
</a>
|
||||||
|
)}
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
{tree.criterion.external_refs.length > 0 && (
|
||||||
|
<div>
|
||||||
|
<h3 className="text-sm font-semibold text-gray-500 uppercase tracking-wide mb-2">
|
||||||
|
Externe Referenzen (nicht ingestiert, nur Verweis)
|
||||||
|
</h3>
|
||||||
|
<div className="flex flex-wrap gap-2">
|
||||||
|
{tree.criterion.external_refs.map((ref, i) => (
|
||||||
|
<span key={i} className="px-2 py-1 text-xs bg-gray-100 text-gray-700 rounded">
|
||||||
|
{ref.framework}{ref.citation ? ` — ${ref.citation}` : ''}
|
||||||
|
</span>
|
||||||
|
))}
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
)}
|
||||||
|
|
||||||
|
{tree.building_blocks.length > 0 && (
|
||||||
|
<div>
|
||||||
|
<h3 className="text-sm font-semibold text-gray-500 uppercase tracking-wide mb-3">
|
||||||
|
Bausteine ({tree.building_blocks.length})
|
||||||
|
</h3>
|
||||||
|
<div className="grid grid-cols-1 md:grid-cols-2 gap-3">
|
||||||
|
{tree.building_blocks.map(qb => (
|
||||||
|
<ControlBlock key={qb.derived_id} ctrl={qb} badgeColor="bg-blue-100 text-blue-700" />
|
||||||
|
))}
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
)}
|
||||||
|
|
||||||
|
{tree.measures.length > 0 && (
|
||||||
|
<div>
|
||||||
|
<h3 className="text-sm font-semibold text-gray-500 uppercase tracking-wide mb-3">
|
||||||
|
Maßnahmen ({tree.measures.length})
|
||||||
|
</h3>
|
||||||
|
<div className="grid grid-cols-1 md:grid-cols-2 gap-3">
|
||||||
|
{tree.measures.map(m => (
|
||||||
|
<ControlBlock key={m.derived_id} ctrl={m} badgeColor="bg-green-100 text-green-700" />
|
||||||
|
))}
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
)}
|
||||||
|
|
||||||
|
{tree.metrics.length > 0 && (
|
||||||
|
<div>
|
||||||
|
<h3 className="text-sm font-semibold text-gray-500 uppercase tracking-wide mb-3">
|
||||||
|
Metriken & Methoden ({tree.metrics.length})
|
||||||
|
</h3>
|
||||||
|
<div className="grid grid-cols-1 md:grid-cols-2 gap-3">
|
||||||
|
{tree.metrics.map(qm => (
|
||||||
|
<ControlBlock key={qm.derived_id} ctrl={qm} badgeColor="bg-amber-100 text-amber-700" />
|
||||||
|
))}
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
)}
|
||||||
|
</>
|
||||||
|
)}
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<div className="px-6 py-3 border-t border-gray-200 bg-gray-50 text-xs text-gray-500">
|
||||||
|
Eigene Clean-Room-Ableitung von BSI QUAIDAL. Quellverweis und Lizenz-Note pro Eintrag.
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
)
|
||||||
|
}
|
||||||
@@ -0,0 +1,109 @@
|
|||||||
|
'use client'
|
||||||
|
|
||||||
|
import { useState } from 'react'
|
||||||
|
import { useQuaidalData, type QuaidalControl } from '../_hooks/useQuaidalData'
|
||||||
|
import { QuaidalCriterionDetail } from './QuaidalCriterionDetail'
|
||||||
|
|
||||||
|
function CriterionCard({ ctrl, onOpen }: { ctrl: QuaidalControl; onOpen: () => void }) {
|
||||||
|
return (
|
||||||
|
<button
|
||||||
|
onClick={onOpen}
|
||||||
|
className="text-left bg-white rounded-xl border border-gray-200 p-5 hover:border-purple-400 hover:shadow-sm transition-all"
|
||||||
|
>
|
||||||
|
<div className="flex items-start justify-between mb-2">
|
||||||
|
<h3 className="font-semibold text-gray-900">{ctrl.canonical_name}</h3>
|
||||||
|
<span className="px-2 py-0.5 text-xs rounded-full bg-purple-100 text-purple-700">
|
||||||
|
{ctrl.source.section}
|
||||||
|
</span>
|
||||||
|
</div>
|
||||||
|
<p className="text-sm text-gray-600 line-clamp-3">{ctrl.description}</p>
|
||||||
|
<div className="mt-3 flex flex-wrap items-center gap-2 text-xs">
|
||||||
|
<span className="text-gray-500">Bausteine: <span className="font-medium text-gray-700">{ctrl.related_quaidal_ids.length}</span></span>
|
||||||
|
{ctrl.external_refs.slice(0, 2).map((r, i) => (
|
||||||
|
<span key={i} className="px-1.5 py-0.5 bg-gray-100 text-gray-600 rounded">
|
||||||
|
{r.framework}
|
||||||
|
</span>
|
||||||
|
))}
|
||||||
|
</div>
|
||||||
|
</button>
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
export function TrainingDataQualityTab() {
|
||||||
|
const { criteria, stats, loading, error } = useQuaidalData()
|
||||||
|
const [openSection, setOpenSection] = useState<string | null>(null)
|
||||||
|
|
||||||
|
if (loading) {
|
||||||
|
return <div className="text-center text-gray-400 py-12">Lade QUAIDAL-Katalog...</div>
|
||||||
|
}
|
||||||
|
if (error) {
|
||||||
|
return (
|
||||||
|
<div className="bg-red-50 border border-red-200 rounded-lg p-4 text-red-700">
|
||||||
|
QUAIDAL-Daten konnten nicht geladen werden: {error}
|
||||||
|
</div>
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
return (
|
||||||
|
<div className="space-y-6">
|
||||||
|
<div className="bg-purple-50 border border-purple-200 rounded-xl p-5">
|
||||||
|
<h2 className="text-lg font-semibold text-gray-900">Trainingsdaten-Qualität nach BSI QUAIDAL</h2>
|
||||||
|
<p className="text-sm text-gray-600 mt-1">
|
||||||
|
Operative Umsetzung von EU AI Act Art. 10 (Datenqualität für Hochrisiko-KI) auf Basis des
|
||||||
|
BSI-Katalogs QUAIDAL. Alle Controls sind eigenständig formuliert (Clean-Room) und verweisen
|
||||||
|
auf die jeweilige QUAIDAL-Sektion.
|
||||||
|
</p>
|
||||||
|
{stats && (
|
||||||
|
<div className="mt-4 grid grid-cols-2 md:grid-cols-4 gap-3 text-sm">
|
||||||
|
<div>
|
||||||
|
<div className="text-xs text-gray-500">Qualitätskriterien</div>
|
||||||
|
<div className="text-xl font-semibold text-gray-900">{stats.counts_by_kind.criterion ?? 0}</div>
|
||||||
|
</div>
|
||||||
|
<div>
|
||||||
|
<div className="text-xs text-gray-500">Bausteine</div>
|
||||||
|
<div className="text-xl font-semibold text-gray-900">{stats.counts_by_kind.building_block ?? 0}</div>
|
||||||
|
</div>
|
||||||
|
<div>
|
||||||
|
<div className="text-xs text-gray-500">Maßnahmen</div>
|
||||||
|
<div className="text-xl font-semibold text-gray-900">{stats.counts_by_kind.measure ?? 0}</div>
|
||||||
|
</div>
|
||||||
|
<div>
|
||||||
|
<div className="text-xs text-gray-500">Metriken & Methoden</div>
|
||||||
|
<div className="text-xl font-semibold text-gray-900">{stats.counts_by_kind.metric ?? 0}</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
)}
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<div>
|
||||||
|
<h3 className="text-lg font-semibold text-gray-900 mb-4">10 Qualitätskriterien</h3>
|
||||||
|
{criteria.length === 0 ? (
|
||||||
|
<div className="bg-white rounded-xl border border-gray-200 p-8 text-center text-gray-400">
|
||||||
|
Keine Kriterien gefunden. Bitte Backend-Ingest prüfen.
|
||||||
|
</div>
|
||||||
|
) : (
|
||||||
|
<div className="grid grid-cols-1 md:grid-cols-2 lg:grid-cols-3 gap-4">
|
||||||
|
{criteria.map(c => (
|
||||||
|
<CriterionCard
|
||||||
|
key={c.derived_id}
|
||||||
|
ctrl={c}
|
||||||
|
onOpen={() => setOpenSection(c.source.section)}
|
||||||
|
/>
|
||||||
|
))}
|
||||||
|
</div>
|
||||||
|
)}
|
||||||
|
</div>
|
||||||
|
|
||||||
|
{stats?.license_note && (
|
||||||
|
<div className="text-xs text-gray-500 italic">{stats.license_note}</div>
|
||||||
|
)}
|
||||||
|
|
||||||
|
{openSection && (
|
||||||
|
<QuaidalCriterionDetail
|
||||||
|
sectionId={openSection}
|
||||||
|
onClose={() => setOpenSection(null)}
|
||||||
|
/>
|
||||||
|
)}
|
||||||
|
</div>
|
||||||
|
)
|
||||||
|
}
|
||||||
@@ -0,0 +1,86 @@
|
|||||||
|
'use client'
|
||||||
|
|
||||||
|
import { useCallback, useEffect, useState } from 'react'
|
||||||
|
|
||||||
|
export interface QuaidalExternalRef {
|
||||||
|
framework: string
|
||||||
|
citation: string | null
|
||||||
|
}
|
||||||
|
|
||||||
|
export interface QuaidalSource {
|
||||||
|
framework: string
|
||||||
|
section: string
|
||||||
|
url: string | null
|
||||||
|
commit_sha: string | null
|
||||||
|
title_original: string | null
|
||||||
|
license_note: string | null
|
||||||
|
}
|
||||||
|
|
||||||
|
export interface QuaidalControl {
|
||||||
|
derived_id: string
|
||||||
|
kind: 'criterion' | 'building_block' | 'measure' | 'metric'
|
||||||
|
canonical_name: string
|
||||||
|
description: string
|
||||||
|
regulation_anchor: string | null
|
||||||
|
related_quaidal_ids: string[]
|
||||||
|
external_refs: QuaidalExternalRef[]
|
||||||
|
source: QuaidalSource
|
||||||
|
plagiarism_score: number | null
|
||||||
|
}
|
||||||
|
|
||||||
|
export interface QuaidalStats {
|
||||||
|
counts_by_kind: Record<string, number>
|
||||||
|
source_framework: string
|
||||||
|
source_commit_sha: string | null
|
||||||
|
license_note: string | null
|
||||||
|
}
|
||||||
|
|
||||||
|
export interface QuaidalCriterionTree {
|
||||||
|
criterion: QuaidalControl
|
||||||
|
building_blocks: QuaidalControl[]
|
||||||
|
measures: QuaidalControl[]
|
||||||
|
metrics: QuaidalControl[]
|
||||||
|
}
|
||||||
|
|
||||||
|
const API_BASE = '/api/sdk/v1/quaidal'
|
||||||
|
|
||||||
|
export function useQuaidalData() {
|
||||||
|
const [criteria, setCriteria] = useState<QuaidalControl[]>([])
|
||||||
|
const [stats, setStats] = useState<QuaidalStats | null>(null)
|
||||||
|
const [loading, setLoading] = useState(true)
|
||||||
|
const [error, setError] = useState<string | null>(null)
|
||||||
|
|
||||||
|
const loadAll = useCallback(async () => {
|
||||||
|
setLoading(true)
|
||||||
|
setError(null)
|
||||||
|
try {
|
||||||
|
const [criteriaRes, statsRes] = await Promise.all([
|
||||||
|
fetch(`${API_BASE}/criteria`, { cache: 'no-store' }),
|
||||||
|
fetch(`${API_BASE}/stats`, { cache: 'no-store' }),
|
||||||
|
])
|
||||||
|
if (criteriaRes.ok) {
|
||||||
|
const data = (await criteriaRes.json()) as QuaidalControl[]
|
||||||
|
setCriteria(Array.isArray(data) ? data : [])
|
||||||
|
} else {
|
||||||
|
setError(`Criteria endpoint returned ${criteriaRes.status}`)
|
||||||
|
}
|
||||||
|
if (statsRes.ok) {
|
||||||
|
setStats(await statsRes.json())
|
||||||
|
}
|
||||||
|
} catch (err) {
|
||||||
|
setError(String(err))
|
||||||
|
} finally {
|
||||||
|
setLoading(false)
|
||||||
|
}
|
||||||
|
}, [])
|
||||||
|
|
||||||
|
useEffect(() => { loadAll() }, [loadAll])
|
||||||
|
|
||||||
|
return { criteria, stats, loading, error, reload: loadAll }
|
||||||
|
}
|
||||||
|
|
||||||
|
export async function fetchCriterionTree(sectionId: string): Promise<QuaidalCriterionTree | null> {
|
||||||
|
const res = await fetch(`${API_BASE}/criteria/${encodeURIComponent(sectionId)}`, { cache: 'no-store' })
|
||||||
|
if (!res.ok) return null
|
||||||
|
return (await res.json()) as QuaidalCriterionTree
|
||||||
|
}
|
||||||
@@ -1,15 +1,23 @@
|
|||||||
'use client'
|
'use client'
|
||||||
|
|
||||||
import { useState, useEffect } from 'react'
|
import { useState, useEffect } from 'react'
|
||||||
|
import { useSearchParams } from 'next/navigation'
|
||||||
import { useSDK } from '@/lib/sdk'
|
import { useSDK } from '@/lib/sdk'
|
||||||
import { useQualityData } from './_hooks/useQualityData'
|
import { useQualityData } from './_hooks/useQualityData'
|
||||||
import { MetricCard, type QualityMetric } from './_components/MetricCard'
|
import { MetricCard, type QualityMetric } from './_components/MetricCard'
|
||||||
import { TestRow } from './_components/TestRow'
|
import { TestRow } from './_components/TestRow'
|
||||||
import { MetricModal } from './_components/MetricModal'
|
import { MetricModal } from './_components/MetricModal'
|
||||||
import { TestModal } from './_components/TestModal'
|
import { TestModal } from './_components/TestModal'
|
||||||
|
import { TrainingDataQualityTab } from './_components/TrainingDataQualityTab'
|
||||||
|
|
||||||
|
type TabId = 'model_quality' | 'data_quality'
|
||||||
|
|
||||||
export default function QualityPage() {
|
export default function QualityPage() {
|
||||||
const { state } = useSDK()
|
const { state } = useSDK()
|
||||||
|
const searchParams = useSearchParams()
|
||||||
|
const initialTab: TabId = searchParams?.get('category') === 'data_quality' ? 'data_quality' : 'model_quality'
|
||||||
|
const [tab, setTab] = useState<TabId>(initialTab)
|
||||||
|
|
||||||
const {
|
const {
|
||||||
metrics,
|
metrics,
|
||||||
tests,
|
tests,
|
||||||
@@ -41,24 +49,54 @@ export default function QualityPage() {
|
|||||||
<h1 className="text-2xl font-bold text-gray-900">AI Quality Dashboard</h1>
|
<h1 className="text-2xl font-bold text-gray-900">AI Quality Dashboard</h1>
|
||||||
<p className="mt-1 text-gray-500">Ueberwachen Sie die Qualitaet und Fairness Ihrer KI-Systeme</p>
|
<p className="mt-1 text-gray-500">Ueberwachen Sie die Qualitaet und Fairness Ihrer KI-Systeme</p>
|
||||||
</div>
|
</div>
|
||||||
<div className="flex items-center gap-2">
|
{tab === 'model_quality' && (
|
||||||
<button
|
<div className="flex items-center gap-2">
|
||||||
onClick={() => setShowTestModal(true)}
|
<button
|
||||||
className="flex items-center gap-2 px-4 py-2 border border-purple-300 text-purple-700 rounded-lg hover:bg-purple-50 transition-colors"
|
onClick={() => setShowTestModal(true)}
|
||||||
>
|
className="flex items-center gap-2 px-4 py-2 border border-purple-300 text-purple-700 rounded-lg hover:bg-purple-50 transition-colors"
|
||||||
<svg className="w-5 h-5" fill="none" stroke="currentColor" viewBox="0 0 24 24"><path strokeLinecap="round" strokeLinejoin="round" strokeWidth={2} d="M4 4v5h.582m15.356 2A8.001 8.001 0 004.582 9m0 0H9m11 11v-5h-.581m0 0a8.003 8.003 0 01-15.357-2m15.357 2H15" /></svg>
|
>
|
||||||
Test hinzufuegen
|
<svg className="w-5 h-5" fill="none" stroke="currentColor" viewBox="0 0 24 24"><path strokeLinecap="round" strokeLinejoin="round" strokeWidth={2} d="M4 4v5h.582m15.356 2A8.001 8.001 0 004.582 9m0 0H9m11 11v-5h-.581m0 0a8.003 8.003 0 01-15.357-2m15.357 2H15" /></svg>
|
||||||
</button>
|
Test hinzufuegen
|
||||||
<button
|
</button>
|
||||||
onClick={() => { setEditMetric(undefined); setShowMetricModal(true) }}
|
<button
|
||||||
className="flex items-center gap-2 px-4 py-2 bg-purple-600 text-white rounded-lg hover:bg-purple-700 transition-colors"
|
onClick={() => { setEditMetric(undefined); setShowMetricModal(true) }}
|
||||||
>
|
className="flex items-center gap-2 px-4 py-2 bg-purple-600 text-white rounded-lg hover:bg-purple-700 transition-colors"
|
||||||
<svg className="w-5 h-5" fill="none" stroke="currentColor" viewBox="0 0 24 24"><path strokeLinecap="round" strokeLinejoin="round" strokeWidth={2} d="M12 6v6m0 0v6m0-6h6m-6 0H6" /></svg>
|
>
|
||||||
Messung hinzufuegen
|
<svg className="w-5 h-5" fill="none" stroke="currentColor" viewBox="0 0 24 24"><path strokeLinecap="round" strokeLinejoin="round" strokeWidth={2} d="M12 6v6m0 0v6m0-6h6m-6 0H6" /></svg>
|
||||||
</button>
|
Messung hinzufuegen
|
||||||
</div>
|
</button>
|
||||||
|
</div>
|
||||||
|
)}
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
|
<div className="border-b border-gray-200">
|
||||||
|
<nav className="-mb-px flex gap-6">
|
||||||
|
<button
|
||||||
|
onClick={() => setTab('model_quality')}
|
||||||
|
className={`pb-3 px-1 text-sm font-medium border-b-2 transition-colors ${
|
||||||
|
tab === 'model_quality'
|
||||||
|
? 'border-purple-500 text-purple-600'
|
||||||
|
: 'border-transparent text-gray-500 hover:text-gray-700'
|
||||||
|
}`}
|
||||||
|
>
|
||||||
|
Modell-Qualität
|
||||||
|
</button>
|
||||||
|
<button
|
||||||
|
onClick={() => setTab('data_quality')}
|
||||||
|
className={`pb-3 px-1 text-sm font-medium border-b-2 transition-colors ${
|
||||||
|
tab === 'data_quality'
|
||||||
|
? 'border-purple-500 text-purple-600'
|
||||||
|
: 'border-transparent text-gray-500 hover:text-gray-700'
|
||||||
|
}`}
|
||||||
|
>
|
||||||
|
Trainingsdaten-Qualität (BSI QUAIDAL)
|
||||||
|
</button>
|
||||||
|
</nav>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
{tab === 'data_quality' && <TrainingDataQualityTab />}
|
||||||
|
{tab === 'model_quality' && (
|
||||||
|
<>
|
||||||
<div className="grid grid-cols-1 md:grid-cols-4 gap-4">
|
<div className="grid grid-cols-1 md:grid-cols-4 gap-4">
|
||||||
<div className="bg-white rounded-xl border border-gray-200 p-6">
|
<div className="bg-white rounded-xl border border-gray-200 p-6">
|
||||||
<div className="text-sm text-gray-500">Durchschnittlicher Score</div>
|
<div className="text-sm text-gray-500">Durchschnittlicher Score</div>
|
||||||
@@ -141,6 +179,8 @@ export default function QualityPage() {
|
|||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
|
</>
|
||||||
|
)}
|
||||||
|
|
||||||
{showMetricModal && (
|
{showMetricModal && (
|
||||||
<MetricModal
|
<MetricModal
|
||||||
|
|||||||
@@ -0,0 +1,69 @@
|
|||||||
|
package handlers
|
||||||
|
|
||||||
|
import (
|
||||||
|
"net/http"
|
||||||
|
|
||||||
|
"github.com/breakpilot/ai-compliance-sdk/internal/iace"
|
||||||
|
"github.com/gin-gonic/gin"
|
||||||
|
"github.com/google/uuid"
|
||||||
|
)
|
||||||
|
|
||||||
|
// ListCustomerStandardSuggestions handles
|
||||||
|
// GET /api/v1/iace/projects/:id/customer-standards?include_verified=true|false
|
||||||
|
//
|
||||||
|
// Returns the set of reusable mitigations from prior projects of the same
|
||||||
|
// customer. Empty array when the project has no customer_name or no
|
||||||
|
// matching priors. The include_verified query flag controls whether
|
||||||
|
// status='verified' mitigations are included alongside the explicit
|
||||||
|
// is_customer_standard=true ones.
|
||||||
|
func (h *IACEHandler) ListCustomerStandardSuggestions(c *gin.Context) {
|
||||||
|
pid, err := uuid.Parse(c.Param("id"))
|
||||||
|
if err != nil {
|
||||||
|
c.JSON(http.StatusBadRequest, gin.H{"error": "invalid project ID"})
|
||||||
|
return
|
||||||
|
}
|
||||||
|
includeVerified := c.Query("include_verified") == "true"
|
||||||
|
suggestions, err := h.store.ListCustomerStandardSuggestions(c.Request.Context(), pid, includeVerified)
|
||||||
|
if err != nil {
|
||||||
|
c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
|
||||||
|
return
|
||||||
|
}
|
||||||
|
if suggestions == nil {
|
||||||
|
suggestions = []iace.CustomerStandardSuggestion{}
|
||||||
|
}
|
||||||
|
c.JSON(http.StatusOK, gin.H{
|
||||||
|
"suggestions": suggestions,
|
||||||
|
"count": len(suggestions),
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
// ImportCustomerStandardSuggestion handles
|
||||||
|
// POST /api/v1/iace/projects/:id/customer-standards/import
|
||||||
|
// Body: { "name": "Sicherheitszeichen nach ISO 7010" }
|
||||||
|
//
|
||||||
|
// Applies one suggestion to all matching hazards in the current project.
|
||||||
|
// New mitigations are created idempotently; existing ones are flipped to
|
||||||
|
// is_relevant=true + is_customer_standard=true + status='verified'.
|
||||||
|
func (h *IACEHandler) ImportCustomerStandardSuggestion(c *gin.Context) {
|
||||||
|
pid, err := uuid.Parse(c.Param("id"))
|
||||||
|
if err != nil {
|
||||||
|
c.JSON(http.StatusBadRequest, gin.H{"error": "invalid project ID"})
|
||||||
|
return
|
||||||
|
}
|
||||||
|
var body struct {
|
||||||
|
Name string `json:"name" binding:"required"`
|
||||||
|
}
|
||||||
|
if err := c.ShouldBindJSON(&body); err != nil {
|
||||||
|
c.JSON(http.StatusBadRequest, gin.H{"error": err.Error()})
|
||||||
|
return
|
||||||
|
}
|
||||||
|
n, err := h.store.ImportCustomerStandardSuggestion(c.Request.Context(), pid, body.Name)
|
||||||
|
if err != nil {
|
||||||
|
c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
|
||||||
|
return
|
||||||
|
}
|
||||||
|
c.JSON(http.StatusOK, gin.H{
|
||||||
|
"imported": n,
|
||||||
|
"name": body.Name,
|
||||||
|
})
|
||||||
|
}
|
||||||
@@ -459,6 +459,11 @@ func registerIACERoutes(v1 *gin.RouterGroup, h *handlers.IACEHandler) {
|
|||||||
iaceRoutes.GET("/projects/:id/clarifications/:cid/detail", h.ListClarificationDetail)
|
iaceRoutes.GET("/projects/:id/clarifications/:cid/detail", h.ListClarificationDetail)
|
||||||
iaceRoutes.POST("/projects/:id/clarifications/:cid/answer", h.AnswerClarification)
|
iaceRoutes.POST("/projects/:id/clarifications/:cid/answer", h.AnswerClarification)
|
||||||
iaceRoutes.POST("/projects/:id/clarifications/:cid/comment", h.PostClarificationComment)
|
iaceRoutes.POST("/projects/:id/clarifications/:cid/comment", h.PostClarificationComment)
|
||||||
|
|
||||||
|
// Customer-Standard Reuse (migration 031): pull reusable mitigations
|
||||||
|
// across prior projects of the same customer.
|
||||||
|
iaceRoutes.GET("/projects/:id/customer-standards", h.ListCustomerStandardSuggestions)
|
||||||
|
iaceRoutes.POST("/projects/:id/customer-standards/import", h.ImportCustomerStandardSuggestion)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -16,6 +16,7 @@ type CreateProjectRequest struct {
|
|||||||
MachineName string `json:"machine_name" binding:"required"`
|
MachineName string `json:"machine_name" binding:"required"`
|
||||||
MachineType string `json:"machine_type" binding:"required"`
|
MachineType string `json:"machine_type" binding:"required"`
|
||||||
Manufacturer string `json:"manufacturer" binding:"required"`
|
Manufacturer string `json:"manufacturer" binding:"required"`
|
||||||
|
CustomerName string `json:"customer_name,omitempty"`
|
||||||
Description string `json:"description,omitempty"`
|
Description string `json:"description,omitempty"`
|
||||||
NarrativeText string `json:"narrative_text,omitempty"`
|
NarrativeText string `json:"narrative_text,omitempty"`
|
||||||
CEMarkingTarget string `json:"ce_marking_target,omitempty"`
|
CEMarkingTarget string `json:"ce_marking_target,omitempty"`
|
||||||
@@ -27,6 +28,7 @@ type UpdateProjectRequest struct {
|
|||||||
MachineName *string `json:"machine_name,omitempty"`
|
MachineName *string `json:"machine_name,omitempty"`
|
||||||
MachineType *string `json:"machine_type,omitempty"`
|
MachineType *string `json:"machine_type,omitempty"`
|
||||||
Manufacturer *string `json:"manufacturer,omitempty"`
|
Manufacturer *string `json:"manufacturer,omitempty"`
|
||||||
|
CustomerName *string `json:"customer_name,omitempty"`
|
||||||
Description *string `json:"description,omitempty"`
|
Description *string `json:"description,omitempty"`
|
||||||
NarrativeText *string `json:"narrative_text,omitempty"`
|
NarrativeText *string `json:"narrative_text,omitempty"`
|
||||||
CEMarkingTarget *string `json:"ce_marking_target,omitempty"`
|
CEMarkingTarget *string `json:"ce_marking_target,omitempty"`
|
||||||
|
|||||||
@@ -19,6 +19,11 @@ type Project struct {
|
|||||||
MachineName string `json:"machine_name"`
|
MachineName string `json:"machine_name"`
|
||||||
MachineType string `json:"machine_type"`
|
MachineType string `json:"machine_type"`
|
||||||
Manufacturer string `json:"manufacturer"`
|
Manufacturer string `json:"manufacturer"`
|
||||||
|
// CustomerName is the end customer (Anlagenbetreiber). Optional —
|
||||||
|
// projects without a customer are still valid, but customer-standard
|
||||||
|
// reuse only fires across projects sharing the same non-empty value
|
||||||
|
// (case-insensitive match, see customerKey()).
|
||||||
|
CustomerName string `json:"customer_name,omitempty"`
|
||||||
Description string `json:"description,omitempty"`
|
Description string `json:"description,omitempty"`
|
||||||
NarrativeText string `json:"narrative_text,omitempty"`
|
NarrativeText string `json:"narrative_text,omitempty"`
|
||||||
Status ProjectStatus `json:"status"`
|
Status ProjectStatus `json:"status"`
|
||||||
|
|||||||
@@ -0,0 +1,211 @@
|
|||||||
|
package iace
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"fmt"
|
||||||
|
"strings"
|
||||||
|
|
||||||
|
"github.com/google/uuid"
|
||||||
|
)
|
||||||
|
|
||||||
|
// CustomerStandardSuggestion aggregates one reusable mitigation across prior
|
||||||
|
// projects of the same customer. The same mitigation name may appear in
|
||||||
|
// multiple prior projects; we collapse them into a single suggestion and
|
||||||
|
// count the prior occurrences so the expert sees a confidence signal.
|
||||||
|
type CustomerStandardSuggestion struct {
|
||||||
|
Name string `json:"name"`
|
||||||
|
ReductionType string `json:"reduction_type"`
|
||||||
|
Description string `json:"description"`
|
||||||
|
// SourceProjectCount tells the expert in how many of the customer's
|
||||||
|
// earlier projects this mitigation was already flagged. Higher count
|
||||||
|
// = stronger reuse signal.
|
||||||
|
SourceProjectCount int `json:"source_project_count"`
|
||||||
|
SourceProjectNames []string `json:"source_project_names"`
|
||||||
|
IsCustomerStandard bool `json:"is_customer_standard"`
|
||||||
|
HasVerifiedInstances bool `json:"has_verified_instances"`
|
||||||
|
}
|
||||||
|
|
||||||
|
// ListCustomerStandardSuggestions returns reusable mitigations from prior
|
||||||
|
// projects of the same customer as projectID. The customer key is the
|
||||||
|
// case-insensitive trimmed customer_name; an empty customer_name short-
|
||||||
|
// circuits to an empty result.
|
||||||
|
//
|
||||||
|
// includeVerified=false → only mitigations with is_customer_standard=true
|
||||||
|
// includeVerified=true → also include status='verified' mitigations
|
||||||
|
// (broader pool, useful when the customer-standard
|
||||||
|
// habit isn't yet established in the data)
|
||||||
|
func (s *Store) ListCustomerStandardSuggestions(
|
||||||
|
ctx context.Context,
|
||||||
|
projectID uuid.UUID,
|
||||||
|
includeVerified bool,
|
||||||
|
) ([]CustomerStandardSuggestion, error) {
|
||||||
|
// Resolve the customer + tenant for the current project.
|
||||||
|
var tenantID uuid.UUID
|
||||||
|
var customerName string
|
||||||
|
err := s.pool.QueryRow(ctx,
|
||||||
|
`SELECT tenant_id, COALESCE(customer_name, '') FROM iace_projects WHERE id = $1`,
|
||||||
|
projectID,
|
||||||
|
).Scan(&tenantID, &customerName)
|
||||||
|
if err != nil {
|
||||||
|
return nil, fmt.Errorf("resolve project for customer-standards: %w", err)
|
||||||
|
}
|
||||||
|
customerName = strings.TrimSpace(customerName)
|
||||||
|
if customerName == "" {
|
||||||
|
return []CustomerStandardSuggestion{}, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
filterClause := "m.is_customer_standard = TRUE"
|
||||||
|
if includeVerified {
|
||||||
|
filterClause = "(m.is_customer_standard = TRUE OR m.status = 'verified')"
|
||||||
|
}
|
||||||
|
|
||||||
|
query := fmt.Sprintf(`
|
||||||
|
SELECT
|
||||||
|
m.name,
|
||||||
|
m.reduction_type,
|
||||||
|
MAX(m.description) AS description,
|
||||||
|
COUNT(DISTINCT p.id) AS source_count,
|
||||||
|
array_agg(DISTINCT p.machine_name ORDER BY p.machine_name) AS source_names,
|
||||||
|
BOOL_OR(m.is_customer_standard) AS has_customer_std,
|
||||||
|
BOOL_OR(m.status = 'verified') AS has_verified
|
||||||
|
FROM iace_mitigations m
|
||||||
|
JOIN iace_hazards h ON h.id = m.hazard_id
|
||||||
|
JOIN iace_projects p ON p.id = h.project_id
|
||||||
|
WHERE p.tenant_id = $1
|
||||||
|
AND p.id <> $2
|
||||||
|
AND p.archived_at IS NULL
|
||||||
|
AND LOWER(TRIM(COALESCE(p.customer_name, ''))) = LOWER($3)
|
||||||
|
AND %s
|
||||||
|
GROUP BY m.name, m.reduction_type
|
||||||
|
ORDER BY source_count DESC, m.name
|
||||||
|
`, filterClause)
|
||||||
|
|
||||||
|
rows, err := s.pool.Query(ctx, query, tenantID, projectID, customerName)
|
||||||
|
if err != nil {
|
||||||
|
return nil, fmt.Errorf("query customer-standards: %w", err)
|
||||||
|
}
|
||||||
|
defer rows.Close()
|
||||||
|
|
||||||
|
var out []CustomerStandardSuggestion
|
||||||
|
for rows.Next() {
|
||||||
|
var sg CustomerStandardSuggestion
|
||||||
|
if scanErr := rows.Scan(
|
||||||
|
&sg.Name, &sg.ReductionType, &sg.Description,
|
||||||
|
&sg.SourceProjectCount, &sg.SourceProjectNames,
|
||||||
|
&sg.IsCustomerStandard, &sg.HasVerifiedInstances,
|
||||||
|
); scanErr != nil {
|
||||||
|
return nil, fmt.Errorf("scan customer-standards: %w", scanErr)
|
||||||
|
}
|
||||||
|
out = append(out, sg)
|
||||||
|
}
|
||||||
|
return out, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// ImportCustomerStandardSuggestion applies a suggestion to the current
|
||||||
|
// project: for every hazard in the project whose name matches one of the
|
||||||
|
// suggestion's source hazards (by mitigation.name → hazard.name pairing in
|
||||||
|
// prior projects), it ensures a relevant + customer-standard mitigation
|
||||||
|
// exists. New mitigations are inserted via CreateMitigation (idempotent
|
||||||
|
// via UNIQUE(hazard_id, name)), existing ones are flipped to
|
||||||
|
// is_relevant=true + is_customer_standard=true + status='verified'.
|
||||||
|
//
|
||||||
|
// Returns the number of mitigations affected (created + updated).
|
||||||
|
func (s *Store) ImportCustomerStandardSuggestion(
|
||||||
|
ctx context.Context,
|
||||||
|
projectID uuid.UUID,
|
||||||
|
mitigationName string,
|
||||||
|
) (int, error) {
|
||||||
|
// Find tenant + customer of the target project.
|
||||||
|
var tenantID uuid.UUID
|
||||||
|
var customerName string
|
||||||
|
if err := s.pool.QueryRow(ctx,
|
||||||
|
`SELECT tenant_id, COALESCE(customer_name, '') FROM iace_projects WHERE id = $1`,
|
||||||
|
projectID,
|
||||||
|
).Scan(&tenantID, &customerName); err != nil {
|
||||||
|
return 0, fmt.Errorf("resolve project: %w", err)
|
||||||
|
}
|
||||||
|
customerName = strings.TrimSpace(customerName)
|
||||||
|
if customerName == "" {
|
||||||
|
return 0, fmt.Errorf("project has no customer_name — nothing to reuse")
|
||||||
|
}
|
||||||
|
|
||||||
|
// Collect the hazard names this mitigation was attached to in the
|
||||||
|
// customer's prior projects + a representative reduction_type/description.
|
||||||
|
priorRows, err := s.pool.Query(ctx, `
|
||||||
|
SELECT DISTINCT h.name, m.reduction_type, COALESCE(m.description, '')
|
||||||
|
FROM iace_mitigations m
|
||||||
|
JOIN iace_hazards h ON h.id = m.hazard_id
|
||||||
|
JOIN iace_projects p ON p.id = h.project_id
|
||||||
|
WHERE p.tenant_id = $1
|
||||||
|
AND p.id <> $2
|
||||||
|
AND p.archived_at IS NULL
|
||||||
|
AND LOWER(TRIM(COALESCE(p.customer_name, ''))) = LOWER($3)
|
||||||
|
AND m.name = $4
|
||||||
|
`, tenantID, projectID, customerName, mitigationName)
|
||||||
|
if err != nil {
|
||||||
|
return 0, fmt.Errorf("collect prior hazards: %w", err)
|
||||||
|
}
|
||||||
|
defer priorRows.Close()
|
||||||
|
|
||||||
|
type proto struct{ hazardName, reductionType, description string }
|
||||||
|
var prototypes []proto
|
||||||
|
for priorRows.Next() {
|
||||||
|
var p proto
|
||||||
|
if err := priorRows.Scan(&p.hazardName, &p.reductionType, &p.description); err != nil {
|
||||||
|
return 0, err
|
||||||
|
}
|
||||||
|
prototypes = append(prototypes, p)
|
||||||
|
}
|
||||||
|
if len(prototypes) == 0 {
|
||||||
|
return 0, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// For every prototype hazard name, find the matching hazard in the
|
||||||
|
// current project (same name) and ensure a relevant + customer-standard
|
||||||
|
// mitigation with mitigationName exists for it.
|
||||||
|
affected := 0
|
||||||
|
for _, p := range prototypes {
|
||||||
|
var hazardIDs []uuid.UUID
|
||||||
|
hazRows, err := s.pool.Query(ctx,
|
||||||
|
`SELECT id FROM iace_hazards WHERE project_id = $1 AND name = $2`,
|
||||||
|
projectID, p.hazardName,
|
||||||
|
)
|
||||||
|
if err != nil {
|
||||||
|
return affected, fmt.Errorf("find target hazards: %w", err)
|
||||||
|
}
|
||||||
|
for hazRows.Next() {
|
||||||
|
var hid uuid.UUID
|
||||||
|
if scanErr := hazRows.Scan(&hid); scanErr != nil {
|
||||||
|
hazRows.Close()
|
||||||
|
return affected, scanErr
|
||||||
|
}
|
||||||
|
hazardIDs = append(hazardIDs, hid)
|
||||||
|
}
|
||||||
|
hazRows.Close()
|
||||||
|
|
||||||
|
for _, hid := range hazardIDs {
|
||||||
|
// Idempotent insert; UPDATE sets relevance + verified state.
|
||||||
|
_, err := s.CreateMitigation(ctx, CreateMitigationRequest{
|
||||||
|
HazardID: hid,
|
||||||
|
Name: mitigationName,
|
||||||
|
Description: p.description,
|
||||||
|
ReductionType: ReductionType(p.reductionType),
|
||||||
|
})
|
||||||
|
if err != nil {
|
||||||
|
return affected, fmt.Errorf("create mitigation: %w", err)
|
||||||
|
}
|
||||||
|
if _, err := s.pool.Exec(ctx, `
|
||||||
|
UPDATE iace_mitigations
|
||||||
|
SET is_relevant = TRUE,
|
||||||
|
is_customer_standard = TRUE,
|
||||||
|
status = 'verified',
|
||||||
|
updated_at = NOW()
|
||||||
|
WHERE hazard_id = $1 AND name = $2
|
||||||
|
`, hid, mitigationName); err != nil {
|
||||||
|
return affected, fmt.Errorf("upgrade mitigation: %w", err)
|
||||||
|
}
|
||||||
|
affected++
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return affected, nil
|
||||||
|
}
|
||||||
@@ -23,6 +23,7 @@ func (s *Store) CreateProject(ctx context.Context, tenantID uuid.UUID, req Creat
|
|||||||
MachineName: req.MachineName,
|
MachineName: req.MachineName,
|
||||||
MachineType: req.MachineType,
|
MachineType: req.MachineType,
|
||||||
Manufacturer: req.Manufacturer,
|
Manufacturer: req.Manufacturer,
|
||||||
|
CustomerName: req.CustomerName,
|
||||||
Description: req.Description,
|
Description: req.Description,
|
||||||
NarrativeText: req.NarrativeText,
|
NarrativeText: req.NarrativeText,
|
||||||
Status: ProjectStatusDraft,
|
Status: ProjectStatusDraft,
|
||||||
@@ -35,19 +36,19 @@ func (s *Store) CreateProject(ctx context.Context, tenantID uuid.UUID, req Creat
|
|||||||
_, err := s.pool.Exec(ctx, `
|
_, err := s.pool.Exec(ctx, `
|
||||||
INSERT INTO iace_projects (
|
INSERT INTO iace_projects (
|
||||||
id, tenant_id, parent_project_id, machine_name, machine_type, manufacturer,
|
id, tenant_id, parent_project_id, machine_name, machine_type, manufacturer,
|
||||||
description, narrative_text, status, ce_marking_target,
|
customer_name, description, narrative_text, status, ce_marking_target,
|
||||||
completeness_score, risk_summary, triggered_regulations, metadata,
|
completeness_score, risk_summary, triggered_regulations, metadata,
|
||||||
created_at, updated_at, archived_at
|
created_at, updated_at, archived_at
|
||||||
) VALUES (
|
) VALUES (
|
||||||
$1, $2, $3, $4, $5, $6,
|
$1, $2, $3, $4, $5, $6,
|
||||||
$7, $8, $9, $10,
|
$7, $8, $9, $10, $11,
|
||||||
$11, $12, $13, $14,
|
$12, $13, $14, $15,
|
||||||
$15, $16, $17
|
$16, $17, $18
|
||||||
)
|
)
|
||||||
`,
|
`,
|
||||||
project.ID, project.TenantID, project.ParentProjectID,
|
project.ID, project.TenantID, project.ParentProjectID,
|
||||||
project.MachineName, project.MachineType, project.Manufacturer,
|
project.MachineName, project.MachineType, project.Manufacturer,
|
||||||
project.Description, project.NarrativeText, string(project.Status), project.CEMarkingTarget,
|
project.CustomerName, project.Description, project.NarrativeText, string(project.Status), project.CEMarkingTarget,
|
||||||
project.CompletenessScore, nil, project.TriggeredRegulations, project.Metadata,
|
project.CompletenessScore, nil, project.TriggeredRegulations, project.Metadata,
|
||||||
project.CreatedAt, project.UpdatedAt, project.ArchivedAt,
|
project.CreatedAt, project.UpdatedAt, project.ArchivedAt,
|
||||||
)
|
)
|
||||||
@@ -67,13 +68,13 @@ func (s *Store) GetProject(ctx context.Context, id uuid.UUID) (*Project, error)
|
|||||||
err := s.pool.QueryRow(ctx, `
|
err := s.pool.QueryRow(ctx, `
|
||||||
SELECT
|
SELECT
|
||||||
id, tenant_id, parent_project_id, machine_name, machine_type, manufacturer,
|
id, tenant_id, parent_project_id, machine_name, machine_type, manufacturer,
|
||||||
description, narrative_text, status, ce_marking_target,
|
COALESCE(customer_name, ''), description, narrative_text, status, ce_marking_target,
|
||||||
completeness_score, risk_summary, triggered_regulations, metadata,
|
completeness_score, risk_summary, triggered_regulations, metadata,
|
||||||
created_at, updated_at, archived_at
|
created_at, updated_at, archived_at
|
||||||
FROM iace_projects WHERE id = $1
|
FROM iace_projects WHERE id = $1
|
||||||
`, id).Scan(
|
`, id).Scan(
|
||||||
&p.ID, &p.TenantID, &p.ParentProjectID, &p.MachineName, &p.MachineType, &p.Manufacturer,
|
&p.ID, &p.TenantID, &p.ParentProjectID, &p.MachineName, &p.MachineType, &p.Manufacturer,
|
||||||
&p.Description, &p.NarrativeText, &status, &p.CEMarkingTarget,
|
&p.CustomerName, &p.Description, &p.NarrativeText, &status, &p.CEMarkingTarget,
|
||||||
&p.CompletenessScore, &riskSummary, &triggeredRegulations, &metadata,
|
&p.CompletenessScore, &riskSummary, &triggeredRegulations, &metadata,
|
||||||
&p.CreatedAt, &p.UpdatedAt, &p.ArchivedAt,
|
&p.CreatedAt, &p.UpdatedAt, &p.ArchivedAt,
|
||||||
)
|
)
|
||||||
@@ -97,7 +98,7 @@ func (s *Store) ListProjects(ctx context.Context, tenantID uuid.UUID) ([]Project
|
|||||||
rows, err := s.pool.Query(ctx, `
|
rows, err := s.pool.Query(ctx, `
|
||||||
SELECT
|
SELECT
|
||||||
id, tenant_id, parent_project_id, machine_name, machine_type, manufacturer,
|
id, tenant_id, parent_project_id, machine_name, machine_type, manufacturer,
|
||||||
description, narrative_text, status, ce_marking_target,
|
COALESCE(customer_name, ''), description, narrative_text, status, ce_marking_target,
|
||||||
completeness_score, risk_summary, triggered_regulations, metadata,
|
completeness_score, risk_summary, triggered_regulations, metadata,
|
||||||
created_at, updated_at, archived_at
|
created_at, updated_at, archived_at
|
||||||
FROM iace_projects WHERE tenant_id = $1
|
FROM iace_projects WHERE tenant_id = $1
|
||||||
@@ -116,7 +117,7 @@ func (s *Store) ListProjects(ctx context.Context, tenantID uuid.UUID) ([]Project
|
|||||||
|
|
||||||
err := rows.Scan(
|
err := rows.Scan(
|
||||||
&p.ID, &p.TenantID, &p.ParentProjectID, &p.MachineName, &p.MachineType, &p.Manufacturer,
|
&p.ID, &p.TenantID, &p.ParentProjectID, &p.MachineName, &p.MachineType, &p.Manufacturer,
|
||||||
&p.Description, &p.NarrativeText, &status, &p.CEMarkingTarget,
|
&p.CustomerName, &p.Description, &p.NarrativeText, &status, &p.CEMarkingTarget,
|
||||||
&p.CompletenessScore, &riskSummary, &triggeredRegulations, &metadata,
|
&p.CompletenessScore, &riskSummary, &triggeredRegulations, &metadata,
|
||||||
&p.CreatedAt, &p.UpdatedAt, &p.ArchivedAt,
|
&p.CreatedAt, &p.UpdatedAt, &p.ArchivedAt,
|
||||||
)
|
)
|
||||||
@@ -156,6 +157,9 @@ func (s *Store) UpdateProject(ctx context.Context, id uuid.UUID, req UpdateProje
|
|||||||
if req.Manufacturer != nil {
|
if req.Manufacturer != nil {
|
||||||
project.Manufacturer = *req.Manufacturer
|
project.Manufacturer = *req.Manufacturer
|
||||||
}
|
}
|
||||||
|
if req.CustomerName != nil {
|
||||||
|
project.CustomerName = *req.CustomerName
|
||||||
|
}
|
||||||
if req.Description != nil {
|
if req.Description != nil {
|
||||||
project.Description = *req.Description
|
project.Description = *req.Description
|
||||||
}
|
}
|
||||||
@@ -174,11 +178,13 @@ func (s *Store) UpdateProject(ctx context.Context, id uuid.UUID, req UpdateProje
|
|||||||
_, err = s.pool.Exec(ctx, `
|
_, err = s.pool.Exec(ctx, `
|
||||||
UPDATE iace_projects SET
|
UPDATE iace_projects SET
|
||||||
machine_name = $2, machine_type = $3, manufacturer = $4,
|
machine_name = $2, machine_type = $3, manufacturer = $4,
|
||||||
description = $5, narrative_text = $6, ce_marking_target = $7,
|
customer_name = $5,
|
||||||
metadata = $8, updated_at = $9
|
description = $6, narrative_text = $7, ce_marking_target = $8,
|
||||||
|
metadata = $9, updated_at = $10
|
||||||
WHERE id = $1
|
WHERE id = $1
|
||||||
`,
|
`,
|
||||||
id, project.MachineName, project.MachineType, project.Manufacturer,
|
id, project.MachineName, project.MachineType, project.Manufacturer,
|
||||||
|
project.CustomerName,
|
||||||
project.Description, project.NarrativeText, project.CEMarkingTarget,
|
project.Description, project.NarrativeText, project.CEMarkingTarget,
|
||||||
project.Metadata, project.UpdatedAt,
|
project.Metadata, project.UpdatedAt,
|
||||||
)
|
)
|
||||||
@@ -250,7 +256,7 @@ func (s *Store) ListVariants(ctx context.Context, parentID uuid.UUID) ([]Project
|
|||||||
rows, err := s.pool.Query(ctx, `
|
rows, err := s.pool.Query(ctx, `
|
||||||
SELECT
|
SELECT
|
||||||
id, tenant_id, parent_project_id, machine_name, machine_type, manufacturer,
|
id, tenant_id, parent_project_id, machine_name, machine_type, manufacturer,
|
||||||
description, narrative_text, status, ce_marking_target,
|
COALESCE(customer_name, ''), description, narrative_text, status, ce_marking_target,
|
||||||
completeness_score, risk_summary, triggered_regulations, metadata,
|
completeness_score, risk_summary, triggered_regulations, metadata,
|
||||||
created_at, updated_at, archived_at
|
created_at, updated_at, archived_at
|
||||||
FROM iace_projects WHERE parent_project_id = $1
|
FROM iace_projects WHERE parent_project_id = $1
|
||||||
@@ -269,7 +275,7 @@ func (s *Store) ListVariants(ctx context.Context, parentID uuid.UUID) ([]Project
|
|||||||
|
|
||||||
err := rows.Scan(
|
err := rows.Scan(
|
||||||
&p.ID, &p.TenantID, &p.ParentProjectID, &p.MachineName, &p.MachineType, &p.Manufacturer,
|
&p.ID, &p.TenantID, &p.ParentProjectID, &p.MachineName, &p.MachineType, &p.Manufacturer,
|
||||||
&p.Description, &p.NarrativeText, &status, &p.CEMarkingTarget,
|
&p.CustomerName, &p.Description, &p.NarrativeText, &status, &p.CEMarkingTarget,
|
||||||
&p.CompletenessScore, &riskSummary, &triggeredRegulations, &metadata,
|
&p.CompletenessScore, &riskSummary, &triggeredRegulations, &metadata,
|
||||||
&p.CreatedAt, &p.UpdatedAt, &p.ArchivedAt,
|
&p.CreatedAt, &p.UpdatedAt, &p.ArchivedAt,
|
||||||
)
|
)
|
||||||
|
|||||||
@@ -0,0 +1,27 @@
|
|||||||
|
-- Migration 031: customer_name on iace_projects + reuse-helper index
|
||||||
|
-- ==========================================================================
|
||||||
|
-- The IACE module is operated by a single Maschinenhersteller (the SDK
|
||||||
|
-- user), but their plants land at many different end customers. A safety
|
||||||
|
-- expert who commissions the second or third plant at the same customer
|
||||||
|
-- often finds that whole classes of mitigations are already in place
|
||||||
|
-- there (company-wide PPE rules, locked-out energy isolation, customer-
|
||||||
|
-- standard signage, etc.). Today, this expert knowledge is rediscovered
|
||||||
|
-- per project.
|
||||||
|
--
|
||||||
|
-- This migration introduces a plain customer_name field on the project
|
||||||
|
-- (no separate customer table yet — Option A from the design discussion;
|
||||||
|
-- normalised iace_customers can come later when a real customer-management
|
||||||
|
-- screen is built). The field is optional so existing projects without a
|
||||||
|
-- customer remain valid.
|
||||||
|
--
|
||||||
|
-- The partial index makes the customer-standards lookup cheap: only
|
||||||
|
-- projects with a non-empty customer_name participate, since reuse is
|
||||||
|
-- meaningless without it.
|
||||||
|
-- ==========================================================================
|
||||||
|
|
||||||
|
ALTER TABLE iace_projects
|
||||||
|
ADD COLUMN IF NOT EXISTS customer_name TEXT;
|
||||||
|
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_iace_projects_customer_name
|
||||||
|
ON iace_projects(customer_name)
|
||||||
|
WHERE customer_name IS NOT NULL AND customer_name <> '';
|
||||||
@@ -46,6 +46,11 @@ class ComplianceCheckRequest(BaseModel):
|
|||||||
documents: list[DocumentInput]
|
documents: list[DocumentInput]
|
||||||
use_agent: bool = False
|
use_agent: bool = False
|
||||||
recipient: str = "dsb@breakpilot.local"
|
recipient: str = "dsb@breakpilot.local"
|
||||||
|
# P12: Override fuer TDM-Vorbehalt bei dokumentierter Kunden-Erlaubnis.
|
||||||
|
# Pflichtfeld tdm_override_reason wenn tdm_override=True
|
||||||
|
# (z.B. "Auftragsbeziehung Safetykon GmbH, Email Hr. X 18.05.2026").
|
||||||
|
tdm_override: bool = False
|
||||||
|
tdm_override_reason: str = ""
|
||||||
|
|
||||||
|
|
||||||
class ComplianceCheckStartResponse(BaseModel):
|
class ComplianceCheckStartResponse(BaseModel):
|
||||||
@@ -166,6 +171,50 @@ async def _run_compliance_check(check_id: str, req: ComplianceCheckRequest):
|
|||||||
except Exception:
|
except Exception:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
# P7: TDM-Reservation-Check der Base-Domain (§ 44b UrhG).
|
||||||
|
# Bei reserved/denied: Run sofort beenden, kein Crawl.
|
||||||
|
try:
|
||||||
|
from compliance.services.tdm_reservation_check import (
|
||||||
|
check_tdm_reservation, is_crawl_allowed,
|
||||||
|
)
|
||||||
|
first_url = next(
|
||||||
|
(d.url for d in req.documents if d.url), "",
|
||||||
|
)
|
||||||
|
if first_url:
|
||||||
|
tdm = await check_tdm_reservation(first_url)
|
||||||
|
_compliance_check_jobs[check_id]["tdm"] = tdm
|
||||||
|
# P12: Bei tdm_override + Reason wird NICHT abgebrochen,
|
||||||
|
# sondern nur dokumentiert. Override ohne Reason wird ignoriert.
|
||||||
|
override_active = (
|
||||||
|
req.tdm_override
|
||||||
|
and len((req.tdm_override_reason or "").strip()) >= 10
|
||||||
|
)
|
||||||
|
if not is_crawl_allowed(tdm) and not override_active:
|
||||||
|
_compliance_check_jobs[check_id]["status"] = "skipped_tdm"
|
||||||
|
_compliance_check_jobs[check_id]["error"] = (
|
||||||
|
f"TDM-Vorbehalt fuer {tdm.get('domain')} erkannt "
|
||||||
|
f"(status={tdm.get('status')}) — Crawl nach § 44b "
|
||||||
|
f"UrhG nicht zulaessig. Signals: "
|
||||||
|
f"{[s.get('src') for s in tdm.get('signals', [])]}"
|
||||||
|
)
|
||||||
|
_compliance_check_jobs[check_id]["progress_pct"] = 100
|
||||||
|
logger.info("TDM-skip check_id=%s domain=%s status=%s",
|
||||||
|
check_id, tdm.get("domain"), tdm.get("status"))
|
||||||
|
return
|
||||||
|
if override_active and not is_crawl_allowed(tdm):
|
||||||
|
_compliance_check_jobs[check_id]["tdm_override"] = {
|
||||||
|
"reason": req.tdm_override_reason.strip()[:500],
|
||||||
|
"original_status": tdm.get("status"),
|
||||||
|
}
|
||||||
|
logger.warning(
|
||||||
|
"TDM-Override aktiv: check_id=%s domain=%s "
|
||||||
|
"status=%s reason=%r",
|
||||||
|
check_id, tdm.get("domain"), tdm.get("status"),
|
||||||
|
req.tdm_override_reason.strip()[:80],
|
||||||
|
)
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning("TDM-check failed (proceeding): %s", e)
|
||||||
|
|
||||||
# Step 1: Resolve texts (fetch from URL if needed) — 0-30%
|
# Step 1: Resolve texts (fetch from URL if needed) — 0-30%
|
||||||
_update(check_id, "Texte werden geladen...", 1)
|
_update(check_id, "Texte werden geladen...", 1)
|
||||||
doc_texts: dict[str, str] = {}
|
doc_texts: dict[str, str] = {}
|
||||||
@@ -226,9 +275,73 @@ async def _run_compliance_check(check_id: str, req: ComplianceCheckRequest):
|
|||||||
if entry.get("text"):
|
if entry.get("text"):
|
||||||
doc_texts[entry["doc_type"]] = entry["text"]
|
doc_texts[entry["doc_type"]] = entry["text"]
|
||||||
|
|
||||||
|
# P15: Dedupe — wenn mehrere Doc-Types DASSELBE Dokument referenzieren
|
||||||
|
# (z.B. Safetykon: User gibt /datenschutz fuer dse + cookie + widerruf),
|
||||||
|
# behalten wir nur den primaeren Doc-Type. Andere: leeren + note.
|
||||||
|
# Priorität: dse > impressum > cookie > widerruf > agb > nutzungsbedingungen
|
||||||
|
_DOC_PRIORITY = ["dse", "impressum", "cookie", "widerruf", "agb",
|
||||||
|
"nutzungsbedingungen", "social_media", "dsb"]
|
||||||
|
seen_text_hash: dict[int, str] = {}
|
||||||
|
for dt in _DOC_PRIORITY:
|
||||||
|
entry = next((e for e in doc_entries if e.get("doc_type") == dt
|
||||||
|
and e.get("text")), None)
|
||||||
|
if not entry:
|
||||||
|
continue
|
||||||
|
text_hash = hash((entry.get("text") or "").strip()[:1000])
|
||||||
|
if text_hash in seen_text_hash:
|
||||||
|
primary = seen_text_hash[text_hash]
|
||||||
|
logger.info(
|
||||||
|
"P15 dedup: doc_type=%s referenziert dasselbe Dokument "
|
||||||
|
"wie %s (URL=%s) -> als Duplikat markiert.",
|
||||||
|
dt, primary, entry.get("url", "")[:60],
|
||||||
|
)
|
||||||
|
entry["text"] = ""
|
||||||
|
entry["word_count"] = 0
|
||||||
|
entry["url"] = ""
|
||||||
|
entry["dup_of"] = primary
|
||||||
|
doc_texts.pop(dt, None)
|
||||||
|
else:
|
||||||
|
seen_text_hash[text_hash] = dt
|
||||||
|
|
||||||
# Step 2: Detect business profile (35-40%)
|
# Step 2: Detect business profile (35-40%)
|
||||||
_update(check_id, "Geschaeftsmodell wird erkannt...", 37)
|
_update(check_id, "Geschaeftsmodell wird erkannt...", 37)
|
||||||
profile = await detect_business_profile(doc_texts)
|
# P16: Homepage-Text mit fuer Profile-Detection (no_direct_sales
|
||||||
|
# B2B-Indikatoren wie "CE-Zertifizierung" / "Schulungen" stehen oft
|
||||||
|
# nur im Homepage-Menue, nicht im Pflichttext).
|
||||||
|
profile_input = dict(doc_texts)
|
||||||
|
try:
|
||||||
|
base_url = ""
|
||||||
|
for e in doc_entries:
|
||||||
|
if e.get("url"):
|
||||||
|
from urllib.parse import urlparse
|
||||||
|
p = urlparse(e["url"])
|
||||||
|
if p.scheme and p.netloc:
|
||||||
|
base_url = f"{p.scheme}://{p.netloc}/"
|
||||||
|
break
|
||||||
|
if base_url:
|
||||||
|
import re as _re
|
||||||
|
async with httpx.AsyncClient(
|
||||||
|
timeout=8.0, follow_redirects=True,
|
||||||
|
headers={"User-Agent": "Mozilla/5.0 (X11; Linux x86_64) "
|
||||||
|
"AppleWebKit/537.36 HeadlessChrome/120.0.0.0"},
|
||||||
|
) as _hc:
|
||||||
|
_hr = await _hc.get(base_url)
|
||||||
|
if _hr.status_code == 200 and "text/html" in _hr.headers.get(
|
||||||
|
"content-type", ""):
|
||||||
|
_html = _hr.text[:60000]
|
||||||
|
_html = _re.sub(r"<script[^>]*>.*?</script>", " ",
|
||||||
|
_html, flags=_re.DOTALL | _re.IGNORECASE)
|
||||||
|
_html = _re.sub(r"<style[^>]*>.*?</style>", " ",
|
||||||
|
_html, flags=_re.DOTALL | _re.IGNORECASE)
|
||||||
|
_html = _re.sub(r"<[^>]+>", " ", _html)
|
||||||
|
_html = _re.sub(r"\s+", " ", _html).strip()
|
||||||
|
if len(_html.split()) > 30:
|
||||||
|
profile_input["__homepage"] = _html[:20000]
|
||||||
|
logger.info("P16 homepage merged for profile: %d words",
|
||||||
|
len(_html.split()))
|
||||||
|
except Exception as e:
|
||||||
|
logger.debug("homepage fetch for profile failed: %s", e)
|
||||||
|
profile = await detect_business_profile(profile_input)
|
||||||
profile_dict = asdict(profile)
|
profile_dict = asdict(profile)
|
||||||
|
|
||||||
# Step 3: Check each document
|
# Step 3: Check each document
|
||||||
@@ -274,6 +387,15 @@ async def _run_compliance_check(check_id: str, req: ComplianceCheckRequest):
|
|||||||
_update(check_id, f"Pruefen {i+1}/{n_entries}: {label}...", pct)
|
_update(check_id, f"Pruefen {i+1}/{n_entries}: {label}...", pct)
|
||||||
|
|
||||||
if not text or len(text) < 50:
|
if not text or len(text) < 50:
|
||||||
|
# P15: duplicate doc that was deduped against a primary doc
|
||||||
|
if entry.get("dup_of"):
|
||||||
|
results.append(DocCheckResult(
|
||||||
|
label=label, url="", doc_type=doc_type,
|
||||||
|
error=f"Nicht separat vorhanden — wird im Dokument "
|
||||||
|
f"'{_doc_type_label(entry['dup_of'])}' "
|
||||||
|
f"mit-geprueft.",
|
||||||
|
))
|
||||||
|
continue
|
||||||
# Empty entry — either from auto-discovery padding (no URL
|
# Empty entry — either from auto-discovery padding (no URL
|
||||||
# to fetch) or from a fetch that returned nothing. If there
|
# to fetch) or from a fetch that returned nothing. If there
|
||||||
# was a URL we keep the error so the user knows the fetch
|
# was a URL we keep the error so the user knows the fetch
|
||||||
@@ -290,6 +412,7 @@ async def _run_compliance_check(check_id: str, req: ComplianceCheckRequest):
|
|||||||
text, doc_type, label, url,
|
text, doc_type, label, url,
|
||||||
entry["word_count"], use_agent_flag,
|
entry["word_count"], use_agent_flag,
|
||||||
business_scope=business_scope,
|
business_scope=business_scope,
|
||||||
|
business_profile={"no_direct_sales": getattr(profile, "no_direct_sales", False)},
|
||||||
)
|
)
|
||||||
|
|
||||||
# Apply profile context filter
|
# Apply profile context filter
|
||||||
@@ -413,6 +536,15 @@ async def _run_compliance_check(check_id: str, req: ComplianceCheckRequest):
|
|||||||
cookie_payloads.extend(e["cmp_payloads"])
|
cookie_payloads.extend(e["cmp_payloads"])
|
||||||
if e.get("text"):
|
if e.get("text"):
|
||||||
cookie_text = e["text"]
|
cookie_text = e["text"]
|
||||||
|
# P17-D: Fallback wenn cookie via P15 deduped wurde — nutze DSE-Text
|
||||||
|
# sofern Cookie-Begriffe drin sind, damit LLM-Vendor-Extract trotzdem
|
||||||
|
# greifen kann.
|
||||||
|
if not cookie_text and not cookie_payloads:
|
||||||
|
dse_t = doc_texts.get("dse", "")
|
||||||
|
if dse_t and any(w in dse_t.lower() for w in
|
||||||
|
("cookie", "tracking", "google analytics", "consent")):
|
||||||
|
cookie_text = dse_t
|
||||||
|
logger.info("P17-D: vendor-extract Fallback auf DSE (Cookie deduped)")
|
||||||
# Site-owner derived from the submitted URLs — drives the
|
# Site-owner derived from the submitted URLs — drives the
|
||||||
# INTERNAL/GROUP_COMPANY classification of vendor records.
|
# INTERNAL/GROUP_COMPANY classification of vendor records.
|
||||||
owner_name = _company_name_from_url(doc_entries) or ""
|
owner_name = _company_name_from_url(doc_entries) or ""
|
||||||
@@ -526,15 +658,78 @@ async def _run_compliance_check(check_id: str, req: ComplianceCheckRequest):
|
|||||||
report_html = build_html_report(results, None, doc_texts)
|
report_html = build_html_report(results, None, doc_texts)
|
||||||
profile_html = _build_profile_html(profile)
|
profile_html = _build_profile_html(profile)
|
||||||
|
|
||||||
# O4: Vendor-Redundanz / EU-Alternativen + Cost-Savings-Block —
|
# O4: Vendor-Redundanz / EU-Alternativen + Cost-Savings-Block
|
||||||
# zwischen VVT und Doc-Report einsortiert, damit Geschaeftsfuehrung
|
|
||||||
# die Einsparung sieht bevor sie in die Detail-Pruefung geht.
|
|
||||||
from .agent_doc_check_redundancy import build_redundancy_html
|
from .agent_doc_check_redundancy import build_redundancy_html
|
||||||
redundancy_html = build_redundancy_html(redundancy_report)
|
redundancy_html = build_redundancy_html(redundancy_report)
|
||||||
|
|
||||||
|
# P1: Executive-Summary GANZ oben — CFO/GF sieht 4 KPIs + 2 CTAs.
|
||||||
|
from .agent_doc_check_exec_summary import build_exec_summary_html
|
||||||
|
# Site-Name fuer Header bestimmen (gleiche Logik wie Email-Subject)
|
||||||
|
url_company_for_exec = _company_name_from_url(doc_entries)
|
||||||
|
domain_for_exec = _extract_domain(doc_entries)
|
||||||
|
site_name_for_exec = url_company_for_exec or domain_for_exec or ""
|
||||||
|
exec_summary_html = build_exec_summary_html(
|
||||||
|
scorecard=scorecard,
|
||||||
|
previous_scorecard=prev_scorecard,
|
||||||
|
cmp_vendors=cmp_vendors,
|
||||||
|
redundancy_report=redundancy_report,
|
||||||
|
site_name=site_name_for_exec,
|
||||||
|
)
|
||||||
|
|
||||||
|
# P10: Cookie-Policy-Architecture-Detection (BMW-Pattern erkennen)
|
||||||
|
cookie_arch_html = ""
|
||||||
|
try:
|
||||||
|
from compliance.services.cookie_policy_architecture import (
|
||||||
|
detect_architecture, build_architecture_html,
|
||||||
|
)
|
||||||
|
cookie_doc_url = ""
|
||||||
|
cookie_doc_text = doc_texts.get("cookie", "")
|
||||||
|
cookie_cmp_payloads: list[dict] = []
|
||||||
|
for e in doc_entries:
|
||||||
|
if (e.get("doc_type") or "").lower() in ("cookie", "cookie_policy"):
|
||||||
|
cookie_doc_url = e.get("url", "")
|
||||||
|
cookie_cmp_payloads = e.get("cmp_payloads") or []
|
||||||
|
break
|
||||||
|
# P17-A: Fallback wenn Cookie-Doc via P15 deduped wurde — nutze
|
||||||
|
# den DSE-Text wenn er Cookie-Schluesselwoerter enthaelt.
|
||||||
|
if not cookie_doc_text:
|
||||||
|
dse_text = doc_texts.get("dse", "")
|
||||||
|
if dse_text and any(w in dse_text.lower() for w in
|
||||||
|
("cookie", "tracking", "google analytics",
|
||||||
|
"consent")):
|
||||||
|
cookie_doc_text = dse_text
|
||||||
|
dse_entry = next((e for e in doc_entries
|
||||||
|
if e.get("doc_type") == "dse"), {})
|
||||||
|
cookie_doc_url = dse_entry.get("url", "")
|
||||||
|
cookie_cmp_payloads = dse_entry.get("cmp_payloads") or []
|
||||||
|
logger.info("P17-A: cookie-arch fallback auf DSE (Cookie-Doc deduped)")
|
||||||
|
if cookie_doc_text:
|
||||||
|
arch = detect_architecture(
|
||||||
|
doc_url=cookie_doc_url,
|
||||||
|
doc_text=cookie_doc_text,
|
||||||
|
cmp_payloads=cookie_cmp_payloads,
|
||||||
|
homepage_cmp_payloads=cmp_payloads or [],
|
||||||
|
)
|
||||||
|
cookie_arch_html = build_architecture_html(arch)
|
||||||
|
logger.info("cookie-arch: layer=%s versioned=%s risk=%s",
|
||||||
|
arch["layer_separation"], arch["versioned"], arch["risk_label"])
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning("cookie-architecture detection failed: %s", e)
|
||||||
|
|
||||||
|
# Reihenfolge — Sales-optimiert:
|
||||||
|
# 1) Exec-Summary (KPIs + Saving + CTAs)
|
||||||
|
# 2) summary_html (Konkrete Aufgaben fuer die Geschaeftsfuehrung)
|
||||||
|
# 3) scanned_urls (Quellen-Transparenz)
|
||||||
|
# 4) profile_html (Erkanntes Geschaeftsmodell)
|
||||||
|
# 5) scorecard_html (MC-Scorecard)
|
||||||
|
# 6) redundancy_html (Optimierungspotenzial — direkt nach Compliance-Score)
|
||||||
|
# 7) providers_html + vvt_html (Vendor-Liste)
|
||||||
|
# 8) report_html (Doc-Pruefung Details)
|
||||||
full_html = (
|
full_html = (
|
||||||
summary_html + scanned_html + profile_html + scorecard_html
|
exec_summary_html + cookie_arch_html + summary_html
|
||||||
+ providers_html + vvt_html + redundancy_html + report_html
|
+ scanned_html + profile_html
|
||||||
|
+ scorecard_html + redundancy_html
|
||||||
|
+ providers_html + vvt_html + report_html
|
||||||
)
|
)
|
||||||
|
|
||||||
# Step 6: Send email — derive site name primarily from entered URL.
|
# Step 6: Send email — derive site name primarily from entered URL.
|
||||||
@@ -619,6 +814,21 @@ async def _run_compliance_check(check_id: str, req: ComplianceCheckRequest):
|
|||||||
vendors=cmp_vendors,
|
vendors=cmp_vendors,
|
||||||
profile=extracted_profile,
|
profile=extracted_profile,
|
||||||
)
|
)
|
||||||
|
# Unified findings (P5): bundle MC + Pflichtangaben + Vendor +
|
||||||
|
# Redundanz in one searchable table behind /agent/findings/<id>.
|
||||||
|
try:
|
||||||
|
from compliance.services.unified_findings_collector import collect
|
||||||
|
from compliance.services.unified_findings_store import record_findings
|
||||||
|
unified = collect(
|
||||||
|
check_id=check_id,
|
||||||
|
results=results,
|
||||||
|
cmp_vendors=cmp_vendors,
|
||||||
|
redundancy_report=redundancy_report,
|
||||||
|
doc_texts=doc_texts,
|
||||||
|
)
|
||||||
|
record_findings(check_id, unified)
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning("Unified findings collect failed: %s", e)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.warning("Audit persistence skipped: %s", e)
|
logger.warning("Audit persistence skipped: %s", e)
|
||||||
|
|
||||||
@@ -696,11 +906,19 @@ async def _fetch_text(url: str, doc_type: str = "") -> tuple[str, list[dict]]:
|
|||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.warning("Consent-tester fetch failed for %s: %s", url, e)
|
logger.warning("Consent-tester fetch failed for %s: %s", url, e)
|
||||||
|
|
||||||
# 2. Fallback: direct HTTP fetch (works for SSR pages like BMW)
|
# 2. Fallback: direct HTTP fetch (works for SSR pages like BMW).
|
||||||
|
# P7: kenntlicher UA + per-Domain Rate-Limit.
|
||||||
try:
|
try:
|
||||||
import re as _re
|
import re as _re
|
||||||
async with httpx.AsyncClient(timeout=30.0, follow_redirects=True) as client:
|
from compliance.services.compliance_user_agent import (
|
||||||
resp = await client.get(url)
|
default_request_headers, DomainRateLimiter,
|
||||||
|
)
|
||||||
|
async with httpx.AsyncClient(
|
||||||
|
timeout=30.0, follow_redirects=True,
|
||||||
|
headers=default_request_headers(),
|
||||||
|
) as client:
|
||||||
|
async with DomainRateLimiter(url):
|
||||||
|
resp = await client.get(url)
|
||||||
if resp.status_code == 200 and "text/html" in resp.headers.get("content-type", ""):
|
if resp.status_code == 200 and "text/html" in resp.headers.get("content-type", ""):
|
||||||
html = resp.text
|
html = resp.text
|
||||||
# Strip HTML tags, decode entities
|
# Strip HTML tags, decode entities
|
||||||
@@ -929,6 +1147,7 @@ async def _check_single(
|
|||||||
text: str, doc_type: str, label: str, url: str,
|
text: str, doc_type: str, label: str, url: str,
|
||||||
word_count: int, use_agent: bool,
|
word_count: int, use_agent: bool,
|
||||||
business_scope: set[str] | None = None,
|
business_scope: set[str] | None = None,
|
||||||
|
business_profile: dict | None = None,
|
||||||
):
|
):
|
||||||
"""Run regex + MC checks on a single document."""
|
"""Run regex + MC checks on a single document."""
|
||||||
from compliance.services.doc_checks.runner import check_document_completeness
|
from compliance.services.doc_checks.runner import check_document_completeness
|
||||||
@@ -936,7 +1155,8 @@ async def _check_single(
|
|||||||
from .agent_doc_check_routes import CheckItem, DocCheckResult
|
from .agent_doc_check_routes import CheckItem, DocCheckResult
|
||||||
|
|
||||||
# Regex checklist
|
# Regex checklist
|
||||||
findings = check_document_completeness(text, doc_type, label, url)
|
findings = check_document_completeness(text, doc_type, label, url,
|
||||||
|
business_profile=business_profile)
|
||||||
|
|
||||||
all_checks: list[CheckItem] = []
|
all_checks: list[CheckItem] = []
|
||||||
completeness = 0
|
completeness = 0
|
||||||
@@ -1135,8 +1355,25 @@ def _company_name_from_url(doc_entries: list[dict]) -> str | None:
|
|||||||
|
|
||||||
|
|
||||||
def _get_skip_types(profile) -> dict[str, str]:
|
def _get_skip_types(profile) -> dict[str, str]:
|
||||||
"""Doc_types to skip entirely. Currently empty — we check everything
|
"""Doc_types to skip entirely with a per-type reason message.
|
||||||
and flag irrelevant items as INFO instead of skipping."""
|
|
||||||
|
Heute primaer fuer OEM-Konfigurator-Pattern (BMW/Audi/Mercedes):
|
||||||
|
wenn die Site kein Direkt-Vertrieb macht, sind AGB/Widerruf/
|
||||||
|
Nutzungsbedingungen nicht Pflicht auf der Website — sie werden
|
||||||
|
beim Vertragshaendler ausgehaendigt.
|
||||||
|
"""
|
||||||
|
if getattr(profile, "no_direct_sales", False):
|
||||||
|
msg = (
|
||||||
|
"Nicht anwendbar — die Webseite schliesst keinen Direkt-"
|
||||||
|
"Kaufvertrag (OEM-Konfigurator-Pattern, Vertrag laeuft "
|
||||||
|
"ueber Vertragshaendler). AGB/Widerruf werden beim "
|
||||||
|
"Haendler ausgehaendigt."
|
||||||
|
)
|
||||||
|
return {
|
||||||
|
"agb": msg,
|
||||||
|
"widerruf": msg,
|
||||||
|
"nutzungsbedingungen": msg,
|
||||||
|
}
|
||||||
return {}
|
return {}
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -0,0 +1,135 @@
|
|||||||
|
"""
|
||||||
|
Executive-Summary-Block — der oberste Email-Abschnitt.
|
||||||
|
|
||||||
|
Zeigt CFO / GF in 4 Zahlen den Gesamt-Mehrwert des Compliance-Checks:
|
||||||
|
1) Compliance-Score (Trend vs Vorlauf)
|
||||||
|
2) Anzahl analysierter Anbieter
|
||||||
|
3) Geschaetztes jaehrliches Sparpotenzial (Range)
|
||||||
|
4) Konsolidierungs-Potenzial (Anbieter koennen reduziert werden)
|
||||||
|
|
||||||
|
Plus zwei Big-CTA-Buttons:
|
||||||
|
- "Compliance-Maengel im Detail" → springt zum Doc-Pruefungs-Block
|
||||||
|
- "Konsolidierungs-Plan ansehen" → springt zum Redundanz-Block
|
||||||
|
|
||||||
|
Ziel: in 5 Sekunden sieht der Vorstand den ROI. Wenn neugierig, scrollt
|
||||||
|
er weiter in die Detail-Bloecke (die UNTER dieser Summary liegen).
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
|
||||||
|
def _fmt_eur_range(low: int, high: int) -> str:
|
||||||
|
if not low and not high:
|
||||||
|
return "—"
|
||||||
|
if low == high:
|
||||||
|
return f"~{low:,} €".replace(",", ".")
|
||||||
|
return f"{low:,}–{high:,} €".replace(",", ".")
|
||||||
|
|
||||||
|
|
||||||
|
def build_exec_summary_html(
|
||||||
|
scorecard: dict | None,
|
||||||
|
previous_scorecard: dict | None,
|
||||||
|
cmp_vendors: list[dict] | None,
|
||||||
|
redundancy_report: dict | None,
|
||||||
|
site_name: str = "",
|
||||||
|
) -> str:
|
||||||
|
"""Build the top-of-email Executive Summary with 4 KPIs + 2 CTAs."""
|
||||||
|
# 1) Compliance-Score
|
||||||
|
pct = 0
|
||||||
|
delta_str = ""
|
||||||
|
score_color = "#94a3b8"
|
||||||
|
if scorecard:
|
||||||
|
totals = scorecard.get("totals") or {}
|
||||||
|
pct = int(totals.get("pct", 0))
|
||||||
|
score_color = ("#16a34a" if pct >= 80 else
|
||||||
|
"#d97706" if pct >= 50 else "#dc2626")
|
||||||
|
if previous_scorecard:
|
||||||
|
prev_pct = int((previous_scorecard.get("totals") or {}).get("pct", 0))
|
||||||
|
d = pct - prev_pct
|
||||||
|
if d:
|
||||||
|
trend_color = "#16a34a" if d > 0 else "#dc2626"
|
||||||
|
delta_str = (
|
||||||
|
f'<span style="font-size:14px;color:{trend_color};margin-left:6px">'
|
||||||
|
f'{"+" if d > 0 else ""}{d} pp</span>'
|
||||||
|
)
|
||||||
|
|
||||||
|
# 2) Vendor-Count
|
||||||
|
n_vendors = len(cmp_vendors or [])
|
||||||
|
|
||||||
|
# 3+4) Saving + Konsolidierung
|
||||||
|
s = (redundancy_report or {}).get("summary") or {}
|
||||||
|
sav_low, sav_high = s.get("estimated_saving_year_eur", [0, 0])
|
||||||
|
n_consolidation = s.get("consolidation_potential", 0)
|
||||||
|
sav_pct = s.get("estimated_saving_pct", "—")
|
||||||
|
|
||||||
|
parts = [
|
||||||
|
'<div style="font-family:-apple-system,BlinkMacSystemFont,sans-serif;'
|
||||||
|
'max-width:700px;margin:0 auto 18px;padding:18px 22px;'
|
||||||
|
'background:linear-gradient(135deg,#1e293b 0%,#0f172a 100%);'
|
||||||
|
'border-radius:10px;color:white">',
|
||||||
|
|
||||||
|
f'<div style="font-size:11px;color:#94a3b8;text-transform:uppercase;'
|
||||||
|
f'letter-spacing:1.5px;margin-bottom:6px">Executive Summary</div>',
|
||||||
|
f'<h2 style="margin:0 0 16px;font-size:18px;color:white">'
|
||||||
|
f'Compliance-Check {site_name}</h2>',
|
||||||
|
|
||||||
|
# 2x2 KPI grid
|
||||||
|
'<table style="width:100%;border-collapse:separate;border-spacing:8px">',
|
||||||
|
|
||||||
|
# Row 1: Compliance + Vendor count
|
||||||
|
'<tr>',
|
||||||
|
f'<td style="width:50%;padding:12px 14px;background:rgba(255,255,255,0.05);'
|
||||||
|
f'border-radius:6px;border:1px solid rgba(255,255,255,0.08)">'
|
||||||
|
f'<div style="font-size:10px;color:#94a3b8;text-transform:uppercase;'
|
||||||
|
f'letter-spacing:1px;margin-bottom:4px">DSGVO / TDDDG / TMG Score</div>'
|
||||||
|
f'<div style="font-size:28px;font-weight:700;color:{score_color}">'
|
||||||
|
f'{pct}%{delta_str}</div>'
|
||||||
|
f'<div style="font-size:11px;color:#cbd5e1;margin-top:2px">'
|
||||||
|
f'aus {int((scorecard or {}).get("totals", {}).get("total", 0))} Pflicht-Pruefungen</div>'
|
||||||
|
f'</td>',
|
||||||
|
|
||||||
|
f'<td style="width:50%;padding:12px 14px;background:rgba(255,255,255,0.05);'
|
||||||
|
f'border-radius:6px;border:1px solid rgba(255,255,255,0.08)">'
|
||||||
|
f'<div style="font-size:10px;color:#94a3b8;text-transform:uppercase;'
|
||||||
|
f'letter-spacing:1px;margin-bottom:4px">Identifizierte Anbieter</div>'
|
||||||
|
f'<div style="font-size:28px;font-weight:700;color:white">{n_vendors}</div>'
|
||||||
|
f'<div style="font-size:11px;color:#cbd5e1;margin-top:2px">'
|
||||||
|
f'davon {n_consolidation} konsolidierbar</div>'
|
||||||
|
f'</td>',
|
||||||
|
'</tr>',
|
||||||
|
|
||||||
|
# Row 2: Saving + CTA-Hinweis
|
||||||
|
'<tr>',
|
||||||
|
f'<td colspan="2" style="padding:14px 16px;background:linear-gradient(90deg,'
|
||||||
|
f'rgba(16,185,129,0.15) 0%,rgba(16,185,129,0.05) 100%);'
|
||||||
|
f'border-radius:6px;border:1px solid rgba(16,185,129,0.3)">'
|
||||||
|
f'<div style="font-size:10px;color:#86efac;text-transform:uppercase;'
|
||||||
|
f'letter-spacing:1px;margin-bottom:4px">'
|
||||||
|
f'Geschaetztes Sparpotenzial pro Jahr (Tool-Lizenzen, ohne Media-Spend)</div>'
|
||||||
|
f'<div style="font-size:24px;font-weight:700;color:#34d399">'
|
||||||
|
f'{_fmt_eur_range(sav_low, sav_high)}'
|
||||||
|
f'<span style="font-size:14px;color:#86efac;margin-left:8px">({sav_pct})</span></div>'
|
||||||
|
f'<div style="font-size:11px;color:#cbd5e1;margin-top:4px">'
|
||||||
|
f'durch Konsolidierung redundanter Anbieter auf je 1 EU-Tool pro '
|
||||||
|
f'Funktions-Kategorie. <em>Schaetzbereich, mit dem Einkauf zu verifizieren.</em>'
|
||||||
|
f'</div></td>',
|
||||||
|
'</tr>',
|
||||||
|
|
||||||
|
'</table>',
|
||||||
|
|
||||||
|
# CTAs
|
||||||
|
'<div style="margin-top:14px;padding-top:12px;border-top:1px solid '
|
||||||
|
'rgba(255,255,255,0.1);text-align:center">',
|
||||||
|
'<a href="#mc-scorecard" style="display:inline-block;padding:8px 16px;'
|
||||||
|
'background:#7c3aed;color:white;text-decoration:none;border-radius:6px;'
|
||||||
|
'font-size:12px;font-weight:600;margin-right:8px">'
|
||||||
|
'Compliance-Maengel im Detail →</a>',
|
||||||
|
'<a href="#optimierungspotenzial" style="display:inline-block;padding:8px 16px;'
|
||||||
|
'background:#10b981;color:white;text-decoration:none;border-radius:6px;'
|
||||||
|
'font-size:12px;font-weight:600">'
|
||||||
|
'Konsolidierungs-Plan →</a>',
|
||||||
|
'</div>',
|
||||||
|
|
||||||
|
'</div>',
|
||||||
|
]
|
||||||
|
return "".join(parts)
|
||||||
@@ -421,10 +421,18 @@ def _render_vendor_row_full(v: dict) -> str:
|
|||||||
f'{", ".join(flags[:4])}</div>'
|
f'{", ".join(flags[:4])}</div>'
|
||||||
f'{actions_html}'
|
f'{actions_html}'
|
||||||
)
|
)
|
||||||
|
risk = v.get("compliance_risk") or {}
|
||||||
|
risk_label = risk.get("label") or ""
|
||||||
|
risk_badge = ""
|
||||||
|
if risk_label and risk_label != "unklar":
|
||||||
|
rc = {"kritisch": ("#dc2626", "#fff"), "hoch": ("#fecaca", "#991b1b"),
|
||||||
|
"mittel": ("#fde68a", "#92400e"), "gering": ("#d1fae5", "#065f46")}.get(risk_label, ("#e5e7eb", "#475569"))
|
||||||
|
risk_badge = (f'<span style="margin-left:6px;padding:1px 5px;border-radius:3px;font-size:9px;'
|
||||||
|
f'background:{rc[0]};color:{rc[1]}">Risk: {risk_label}</span>')
|
||||||
return (
|
return (
|
||||||
f'<tr style="border-top:1px solid #e2e8f0">'
|
f'<tr style="border-top:1px solid #e2e8f0">'
|
||||||
f'<td style="padding:6px 8px;color:#1e293b;font-size:11px">'
|
f'<td style="padding:6px 8px;color:#1e293b;font-size:11px">'
|
||||||
f'{name}{flag_str}</td>'
|
f'{name}{risk_badge}{flag_str}</td>'
|
||||||
f'<td style="padding:6px 8px;color:#475569;font-size:11px">{category}</td>'
|
f'<td style="padding:6px 8px;color:#475569;font-size:11px">{category}</td>'
|
||||||
f'<td style="padding:6px 8px;color:#475569;font-size:11px">{country}</td>'
|
f'<td style="padding:6px 8px;color:#475569;font-size:11px">{country}</td>'
|
||||||
f'<td style="padding:6px 8px;text-align:center;color:#475569;font-size:11px">'
|
f'<td style="padding:6px 8px;text-align:center;color:#475569;font-size:11px">'
|
||||||
|
|||||||
@@ -28,9 +28,10 @@ def build_redundancy_html(report: dict | None) -> str:
|
|||||||
pct = s.get("estimated_saving_pct") or "n/a"
|
pct = s.get("estimated_saving_pct") or "n/a"
|
||||||
|
|
||||||
parts = [
|
parts = [
|
||||||
'<div style="font-family:-apple-system,BlinkMacSystemFont,sans-serif;'
|
'<div id="optimierungspotenzial" style="font-family:-apple-system,'
|
||||||
'max-width:700px;margin:0 auto 16px;padding:14px 18px;'
|
'BlinkMacSystemFont,sans-serif;max-width:700px;margin:0 auto 16px;'
|
||||||
'background:#fef3c7;border:1px solid #fcd34d;border-radius:8px">',
|
'padding:14px 18px;background:#fef3c7;border:1px solid #fcd34d;'
|
||||||
|
'border-radius:8px">',
|
||||||
'<h3 style="margin:0 0 6px;font-size:14px;color:#92400e">'
|
'<h3 style="margin:0 0 6px;font-size:14px;color:#92400e">'
|
||||||
'Optimierungspotenzial: Redundanzen + EU-Alternativen</h3>',
|
'Optimierungspotenzial: Redundanzen + EU-Alternativen</h3>',
|
||||||
f'<p style="margin:0 0 10px;font-size:11px;color:#78350f">'
|
f'<p style="margin:0 0 10px;font-size:11px;color:#78350f">'
|
||||||
|
|||||||
@@ -134,7 +134,9 @@ def build_management_summary(results: list[DocCheckResult]) -> str:
|
|||||||
ok = [r for r in results if r.completeness_pct == 100 and not r.error]
|
ok = [r for r in results if r.completeness_pct == 100 and not r.error]
|
||||||
fixable = [r for r in results if 0 < r.completeness_pct < 100 and not r.error]
|
fixable = [r for r in results if 0 < r.completeness_pct < 100 and not r.error]
|
||||||
critical = [r for r in results if r.completeness_pct == 0 and not r.error]
|
critical = [r for r in results if r.completeness_pct == 0 and not r.error]
|
||||||
errors = [r for r in results if r.error]
|
not_applicable = [r for r in results if r.error
|
||||||
|
and r.error.startswith("Nicht anwendbar")]
|
||||||
|
errors = [r for r in results if r.error and r not in not_applicable]
|
||||||
|
|
||||||
html = [
|
html = [
|
||||||
'<div style="font-family:-apple-system,BlinkMacSystemFont,sans-serif;'
|
'<div style="font-family:-apple-system,BlinkMacSystemFont,sans-serif;'
|
||||||
@@ -150,17 +152,24 @@ def build_management_summary(results: list[DocCheckResult]) -> str:
|
|||||||
html.append('<p>Keine Dokumente geprueft.</p></div>')
|
html.append('<p>Keine Dokumente geprueft.</p></div>')
|
||||||
return "\n".join(html)
|
return "\n".join(html)
|
||||||
|
|
||||||
|
na_note = (
|
||||||
|
f' Zusaetzlich {len(not_applicable)} Dokument{"" if len(not_applicable) == 1 else "e"} '
|
||||||
|
f'als NICHT ANWENDBAR markiert (kein Direkt-Vertrieb — '
|
||||||
|
f'OEM-Konfigurator-Pattern).' if not_applicable else ""
|
||||||
|
)
|
||||||
if len(ok) == total:
|
if len(ok) == total:
|
||||||
html.append(
|
html.append(
|
||||||
'<p style="color:#16a34a;font-weight:600;font-size:15px">'
|
f'<p style="color:#16a34a;font-weight:600;font-size:15px">'
|
||||||
'Alle Dokumente sind vollstaendig. Keine dringenden Massnahmen noetig.</p>'
|
f'Alle Dokumente sind vollstaendig. Keine dringenden Massnahmen noetig.'
|
||||||
|
f'{na_note}</p>'
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
html.append(
|
html.append(
|
||||||
f'<p style="font-size:14px;color:#475569">'
|
f'<p style="font-size:14px;color:#475569">'
|
||||||
f'{len(ok)} von {total} Dokumenten sind vollstaendig. '
|
f'{len(ok)} von {total} Dokumenten sind vollstaendig. '
|
||||||
f'{len(fixable)} brauchen Korrekturen'
|
f'{len(fixable)} brauchen Korrekturen'
|
||||||
f'{f", {len(critical)} fehlen oder sind unbrauchbar" if critical else ""}.</p>'
|
f'{f", {len(critical)} fehlen oder sind unbrauchbar" if critical else ""}.'
|
||||||
|
f'{na_note}</p>'
|
||||||
)
|
)
|
||||||
|
|
||||||
# Concrete actions
|
# Concrete actions
|
||||||
@@ -173,7 +182,7 @@ def build_management_summary(results: list[DocCheckResult]) -> str:
|
|||||||
if c.level == 1 and not c.passed and not c.skipped
|
if c.level == 1 and not c.passed and not c.skipped
|
||||||
and c.severity != "INFO"
|
and c.severity != "INFO"
|
||||||
]
|
]
|
||||||
for c in failed_checks[:3]: # Max 3 per document
|
for c in failed_checks: # P17-B: kein Per-Doc-Cap
|
||||||
action = _check_to_action(r.label, c.label, c.hint)
|
action = _check_to_action(r.label, c.label, c.hint)
|
||||||
if action:
|
if action:
|
||||||
actions.append(action)
|
actions.append(action)
|
||||||
@@ -184,7 +193,7 @@ def build_management_summary(results: list[DocCheckResult]) -> str:
|
|||||||
'Konkrete Aufgaben:</h3>'
|
'Konkrete Aufgaben:</h3>'
|
||||||
'<ol style="font-size:13px;color:#475569;padding-left:20px;margin:0">'
|
'<ol style="font-size:13px;color:#475569;padding-left:20px;margin:0">'
|
||||||
)
|
)
|
||||||
for a in actions[:10]: # Max 10 actions
|
for a in actions[:20]: # P17-B: 10 -> 20
|
||||||
html.append(f'<li style="margin-bottom:6px">{a}</li>')
|
html.append(f'<li style="margin-bottom:6px">{a}</li>')
|
||||||
html.append('</ol>')
|
html.append('</ol>')
|
||||||
|
|
||||||
@@ -279,10 +288,13 @@ def _render_document(html: list[str], r: DocCheckResult, doc_text: str = "") ->
|
|||||||
r.error.startswith("Nicht eingereicht")
|
r.error.startswith("Nicht eingereicht")
|
||||||
or r.error.startswith("Auf der Website nicht gefunden")
|
or r.error.startswith("Auf der Website nicht gefunden")
|
||||||
)
|
)
|
||||||
|
is_not_applicable = bool(r.error) and r.error.startswith("Nicht anwendbar")
|
||||||
if is_missing:
|
if is_missing:
|
||||||
status_label = ("NICHT GEFUNDEN"
|
status_label = ("NICHT GEFUNDEN"
|
||||||
if r.error.startswith("Auf der Website")
|
if r.error.startswith("Auf der Website")
|
||||||
else "NICHT EINGEREICHT")
|
else "NICHT EINGEREICHT")
|
||||||
|
elif is_not_applicable:
|
||||||
|
status_label = "NICHT ANWENDBAR"
|
||||||
elif r.error:
|
elif r.error:
|
||||||
status_label = "FEHLER"
|
status_label = "FEHLER"
|
||||||
|
|
||||||
@@ -330,6 +342,13 @@ def _render_document(html: list[str], r: DocCheckResult, doc_text: str = "") ->
|
|||||||
'background:#fafafa;border-top:1px solid #f3f4f6">'
|
'background:#fafafa;border-top:1px solid #f3f4f6">'
|
||||||
+ body_msg + '</div>'
|
+ body_msg + '</div>'
|
||||||
)
|
)
|
||||||
|
elif is_not_applicable:
|
||||||
|
html.append(
|
||||||
|
'<div style="padding:12px 16px;color:#475569;font-size:12px;'
|
||||||
|
'background:#f1f5f9;border-top:1px solid #cbd5e1;border-left:'
|
||||||
|
'3px solid #94a3b8">'
|
||||||
|
+ r.error + '</div>'
|
||||||
|
)
|
||||||
elif r.error:
|
elif r.error:
|
||||||
html.append(f'<div style="padding:12px 16px;color:#991b1b">{r.error}</div>')
|
html.append(f'<div style="padding:12px 16px;color:#991b1b">{r.error}</div>')
|
||||||
else:
|
else:
|
||||||
|
|||||||
@@ -44,7 +44,7 @@ def build_scorecard_html(
|
|||||||
trend_str = _delta_badge(overall_pct, prev_total_pct) if prev_total_pct is not None else ""
|
trend_str = _delta_badge(overall_pct, prev_total_pct) if prev_total_pct is not None else ""
|
||||||
|
|
||||||
head = (
|
head = (
|
||||||
'<div style="font-family:-apple-system,BlinkMacSystemFont,sans-serif;'
|
'<div id="mc-scorecard" style="font-family:-apple-system,BlinkMacSystemFont,sans-serif;'
|
||||||
'max-width:700px;margin:0 auto 16px;padding:12px 16px;'
|
'max-width:700px;margin:0 auto 16px;padding:12px 16px;'
|
||||||
'background:#f0f9ff;border:1px solid #bae6fd;border-radius:8px">'
|
'background:#f0f9ff;border:1px solid #bae6fd;border-radius:8px">'
|
||||||
'<h3 style="margin:0 0 6px;font-size:14px;color:#0369a1">'
|
'<h3 style="margin:0 0 6px;font-size:14px;color:#0369a1">'
|
||||||
|
|||||||
@@ -0,0 +1,104 @@
|
|||||||
|
"""
|
||||||
|
Voll-Audit Findings Router — unified view across all 4 finding sources.
|
||||||
|
|
||||||
|
Endpoint:
|
||||||
|
GET /api/compliance/agent/findings/{check_id}
|
||||||
|
?source=mc|pflichtangabe|vendor|redundanz|all
|
||||||
|
&severity=CRITICAL|HIGH|MEDIUM|LOW|INFO|all
|
||||||
|
&doc_type=impressum|dse|cookie|...|all
|
||||||
|
&status=failed|passed|skipped|na|info|all
|
||||||
|
&q=<freitext>
|
||||||
|
&limit=<int>
|
||||||
|
|
||||||
|
Liefert summary + filtered findings list. Frontend rendert daraus den
|
||||||
|
Voll-Audit-Tab unter /sdk/agent/audit/<check_id>.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import logging
|
||||||
|
from urllib.parse import urlparse
|
||||||
|
from fastapi import APIRouter, HTTPException, Query
|
||||||
|
|
||||||
|
from compliance.services.unified_findings_store import (
|
||||||
|
findings_summary,
|
||||||
|
list_findings,
|
||||||
|
)
|
||||||
|
from compliance.services.compliance_audit_log import get_check_run
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
router = APIRouter(prefix="/compliance/agent", tags=["agent"])
|
||||||
|
|
||||||
|
|
||||||
|
def _normalize_domain(d: str) -> str:
|
||||||
|
if not d:
|
||||||
|
return ""
|
||||||
|
if "://" not in d:
|
||||||
|
d = "https://" + d
|
||||||
|
host = urlparse(d).netloc.lower()
|
||||||
|
return host[4:] if host.startswith("www.") else host
|
||||||
|
|
||||||
|
|
||||||
|
@router.get("/findings/{check_id}")
|
||||||
|
def get_findings(
|
||||||
|
check_id: str,
|
||||||
|
source: str | None = Query(None, description="mc|pflichtangabe|vendor|redundanz|all"),
|
||||||
|
severity: str | None = Query(None, description="CRITICAL|HIGH|MEDIUM|LOW|INFO|all"),
|
||||||
|
doc_type: str | None = Query(None),
|
||||||
|
status: str | None = Query(None, description="failed|passed|skipped|na|info|all"),
|
||||||
|
q: str | None = Query(None, description="freitext-suche label/vendor"),
|
||||||
|
limit: int = Query(1000, ge=1, le=5000),
|
||||||
|
expected_domain: str | None = Query(
|
||||||
|
None, description="Hard-Assertion: Run muss zu dieser Domain gehoeren (Cross-Tenant-Schutz)",
|
||||||
|
),
|
||||||
|
) -> dict:
|
||||||
|
"""Return aggregated findings + summary counters for a check run."""
|
||||||
|
# P7-Restpunkt: optionale Domain-Assertion. Verhindert dass ein Frontend
|
||||||
|
# einen check_id einer fremden Tenant-Domain anfragen kann.
|
||||||
|
if expected_domain:
|
||||||
|
run = get_check_run(check_id)
|
||||||
|
actual = _normalize_domain((run or {}).get("base_domain") or "")
|
||||||
|
if not run or actual != _normalize_domain(expected_domain):
|
||||||
|
raise HTTPException(
|
||||||
|
status_code=403,
|
||||||
|
detail=f"Cross-tenant access blocked: check_id {check_id} "
|
||||||
|
f"gehoert zu Domain '{actual or '?'}', angefragt: "
|
||||||
|
f"'{_normalize_domain(expected_domain)}'",
|
||||||
|
)
|
||||||
|
try:
|
||||||
|
summary = findings_summary(check_id)
|
||||||
|
findings = list_findings(
|
||||||
|
check_id=check_id,
|
||||||
|
source_type=source,
|
||||||
|
severity=severity,
|
||||||
|
doc_type=doc_type,
|
||||||
|
status=status,
|
||||||
|
q=q,
|
||||||
|
limit=limit,
|
||||||
|
)
|
||||||
|
return {
|
||||||
|
"found": summary.get("total", 0) > 0,
|
||||||
|
"check_id": check_id,
|
||||||
|
"summary": summary,
|
||||||
|
"filter": {
|
||||||
|
"source": source or "all",
|
||||||
|
"severity": severity or "all",
|
||||||
|
"doc_type": doc_type or "all",
|
||||||
|
"status": status or "all",
|
||||||
|
"q": q or "",
|
||||||
|
"limit": limit,
|
||||||
|
},
|
||||||
|
"count": len(findings),
|
||||||
|
"findings": findings,
|
||||||
|
}
|
||||||
|
except Exception as e:
|
||||||
|
logger.exception("get_findings failed for %s", check_id)
|
||||||
|
return {
|
||||||
|
"found": False,
|
||||||
|
"check_id": check_id,
|
||||||
|
"error": str(e)[:200],
|
||||||
|
"summary": {},
|
||||||
|
"count": 0,
|
||||||
|
"findings": [],
|
||||||
|
}
|
||||||
@@ -0,0 +1,244 @@
|
|||||||
|
"""FastAPI routes for QUAIDAL-derived Controls (AI Trainingsdaten-Qualität).
|
||||||
|
|
||||||
|
Endpoints:
|
||||||
|
- GET /v1/quaidal/stats - Counts by kind + source provenance
|
||||||
|
- GET /v1/quaidal/controls - List all controls, optional kind= filter
|
||||||
|
- GET /v1/quaidal/controls/{id} - Single derived control by derived_id
|
||||||
|
- GET /v1/quaidal/criteria - The 10 QKB criteria with linked QB/MA IDs
|
||||||
|
- GET /v1/quaidal/criteria/{id} - Single QKB with full child tree (QB → MA → QM)
|
||||||
|
|
||||||
|
The controls are Clean-Room derived from BSI QUAIDAL. See
|
||||||
|
control-pipeline/scripts/derive_quaidal_mcs.py and migration 011.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import logging
|
||||||
|
from typing import Optional
|
||||||
|
|
||||||
|
from fastapi import APIRouter, HTTPException, Query
|
||||||
|
from pydantic import BaseModel
|
||||||
|
from sqlalchemy import text
|
||||||
|
|
||||||
|
from database import SessionLocal
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
router = APIRouter(prefix="/v1/quaidal", tags=["quaidal"])
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Response shapes
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
|
class ExternalRef(BaseModel):
|
||||||
|
framework: str
|
||||||
|
citation: Optional[str] = None
|
||||||
|
|
||||||
|
|
||||||
|
class SourceProvenance(BaseModel):
|
||||||
|
framework: str
|
||||||
|
section: str
|
||||||
|
url: Optional[str] = None
|
||||||
|
commit_sha: Optional[str] = None
|
||||||
|
title_original: Optional[str] = None
|
||||||
|
license_note: Optional[str] = None
|
||||||
|
|
||||||
|
|
||||||
|
class DerivedControl(BaseModel):
|
||||||
|
derived_id: str
|
||||||
|
kind: str
|
||||||
|
canonical_name: str
|
||||||
|
description: str
|
||||||
|
regulation_anchor: Optional[str] = None
|
||||||
|
related_quaidal_ids: list[str]
|
||||||
|
external_refs: list[ExternalRef]
|
||||||
|
source: SourceProvenance
|
||||||
|
plagiarism_score: Optional[float] = None
|
||||||
|
|
||||||
|
|
||||||
|
class ControlsListResponse(BaseModel):
|
||||||
|
total: int
|
||||||
|
controls: list[DerivedControl]
|
||||||
|
|
||||||
|
|
||||||
|
class CriterionWithChildren(BaseModel):
|
||||||
|
"""A QKB criterion with the IDs of its linked building blocks, measures and metrics."""
|
||||||
|
criterion: DerivedControl
|
||||||
|
building_blocks: list[DerivedControl]
|
||||||
|
measures: list[DerivedControl]
|
||||||
|
metrics: list[DerivedControl]
|
||||||
|
|
||||||
|
|
||||||
|
class StatsResponse(BaseModel):
|
||||||
|
counts_by_kind: dict[str, int]
|
||||||
|
source_framework: str
|
||||||
|
source_commit_sha: Optional[str]
|
||||||
|
license_note: Optional[str]
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# DB helpers
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
|
def _row_to_control(row) -> DerivedControl:
|
||||||
|
return DerivedControl(
|
||||||
|
derived_id=row.derived_id,
|
||||||
|
kind=row.kind,
|
||||||
|
canonical_name=row.canonical_name,
|
||||||
|
description=row.description,
|
||||||
|
regulation_anchor=row.regulation_anchor,
|
||||||
|
related_quaidal_ids=row.related_quaidal_ids or [],
|
||||||
|
external_refs=[ExternalRef(**r) for r in (row.external_refs or [])],
|
||||||
|
source=SourceProvenance(
|
||||||
|
framework=row.source_framework,
|
||||||
|
section=row.source_section,
|
||||||
|
url=row.source_url,
|
||||||
|
commit_sha=row.source_commit_sha,
|
||||||
|
title_original=row.source_title_original,
|
||||||
|
license_note=row.source_license_note,
|
||||||
|
),
|
||||||
|
plagiarism_score=float(row.plagiarism_score_at_generation) if row.plagiarism_score_at_generation is not None else None,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
_SELECT_COLUMNS = """
|
||||||
|
derived_id, kind, canonical_name, description, regulation_anchor,
|
||||||
|
related_quaidal_ids, external_refs,
|
||||||
|
source_framework, source_section, source_url, source_commit_sha,
|
||||||
|
source_title_original, source_license_note,
|
||||||
|
plagiarism_score_at_generation
|
||||||
|
"""
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Endpoints
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
|
@router.get("/stats", response_model=StatsResponse)
|
||||||
|
def get_stats() -> StatsResponse:
|
||||||
|
"""Counts by kind + the QUAIDAL source provenance (single source today)."""
|
||||||
|
with SessionLocal() as db:
|
||||||
|
counts = db.execute(text(
|
||||||
|
"SELECT kind, COUNT(*) AS n FROM compliance.derived_controls "
|
||||||
|
"WHERE source_framework = :fw GROUP BY kind"
|
||||||
|
), {"fw": "BSI QUAIDAL"}).all()
|
||||||
|
meta = db.execute(text(
|
||||||
|
"SELECT source_commit_sha, source_license_note FROM compliance.derived_controls "
|
||||||
|
"WHERE source_framework = :fw LIMIT 1"
|
||||||
|
), {"fw": "BSI QUAIDAL"}).first()
|
||||||
|
return StatsResponse(
|
||||||
|
counts_by_kind={r.kind: r.n for r in counts},
|
||||||
|
source_framework="BSI QUAIDAL",
|
||||||
|
source_commit_sha=meta.source_commit_sha if meta else None,
|
||||||
|
license_note=meta.source_license_note if meta else None,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@router.get("/controls", response_model=ControlsListResponse)
|
||||||
|
def list_controls(
|
||||||
|
kind: Optional[str] = Query(None, description="criterion | building_block | measure | metric"),
|
||||||
|
limit: int = Query(500, ge=1, le=2000),
|
||||||
|
offset: int = Query(0, ge=0),
|
||||||
|
) -> ControlsListResponse:
|
||||||
|
"""List QUAIDAL-derived controls, optionally filtered by kind."""
|
||||||
|
where = ["source_framework = :fw"]
|
||||||
|
params: dict = {"fw": "BSI QUAIDAL", "limit": limit, "offset": offset}
|
||||||
|
if kind:
|
||||||
|
where.append("kind = :kind")
|
||||||
|
params["kind"] = kind
|
||||||
|
|
||||||
|
sql = (
|
||||||
|
f"SELECT {_SELECT_COLUMNS} FROM compliance.derived_controls "
|
||||||
|
f"WHERE {' AND '.join(where)} "
|
||||||
|
"ORDER BY source_section LIMIT :limit OFFSET :offset"
|
||||||
|
)
|
||||||
|
count_sql = f"SELECT COUNT(*) FROM compliance.derived_controls WHERE {' AND '.join(where)}"
|
||||||
|
|
||||||
|
with SessionLocal() as db:
|
||||||
|
rows = db.execute(text(sql), params).all()
|
||||||
|
total = db.execute(text(count_sql), {k: v for k, v in params.items() if k not in ("limit", "offset")}).scalar() or 0
|
||||||
|
return ControlsListResponse(total=int(total), controls=[_row_to_control(r) for r in rows])
|
||||||
|
|
||||||
|
|
||||||
|
@router.get("/controls/{derived_id}", response_model=DerivedControl)
|
||||||
|
def get_control(derived_id: str) -> DerivedControl:
|
||||||
|
with SessionLocal() as db:
|
||||||
|
row = db.execute(text(
|
||||||
|
f"SELECT {_SELECT_COLUMNS} FROM compliance.derived_controls WHERE derived_id = :id"
|
||||||
|
), {"id": derived_id}).first()
|
||||||
|
if not row:
|
||||||
|
raise HTTPException(status_code=404, detail=f"Control {derived_id} not found")
|
||||||
|
return _row_to_control(row)
|
||||||
|
|
||||||
|
|
||||||
|
@router.get("/criteria", response_model=list[DerivedControl])
|
||||||
|
def list_criteria() -> list[DerivedControl]:
|
||||||
|
"""Returns the 10 QKB criteria. Use /criteria/{section_id} for the full child tree."""
|
||||||
|
with SessionLocal() as db:
|
||||||
|
rows = db.execute(text(
|
||||||
|
f"SELECT {_SELECT_COLUMNS} FROM compliance.derived_controls "
|
||||||
|
"WHERE source_framework = :fw AND kind = 'criterion' ORDER BY source_section"
|
||||||
|
), {"fw": "BSI QUAIDAL"}).all()
|
||||||
|
return [_row_to_control(r) for r in rows]
|
||||||
|
|
||||||
|
|
||||||
|
@router.get("/criteria/{section_id}", response_model=CriterionWithChildren)
|
||||||
|
def get_criterion_tree(section_id: str) -> CriterionWithChildren:
|
||||||
|
"""Single QKB with the building blocks it references and the measures/metrics those reference.
|
||||||
|
|
||||||
|
`section_id` is the canonical QUAIDAL ID, e.g. `QKB-01`.
|
||||||
|
"""
|
||||||
|
section_id_upper = section_id.upper()
|
||||||
|
with SessionLocal() as db:
|
||||||
|
criterion_row = db.execute(text(
|
||||||
|
f"SELECT {_SELECT_COLUMNS} FROM compliance.derived_controls "
|
||||||
|
"WHERE source_framework = :fw AND source_section = :sid AND kind = 'criterion'"
|
||||||
|
), {"fw": "BSI QUAIDAL", "sid": section_id_upper}).first()
|
||||||
|
if not criterion_row:
|
||||||
|
raise HTTPException(status_code=404, detail=f"Criterion {section_id_upper} not found")
|
||||||
|
|
||||||
|
building_block_ids = criterion_row.related_quaidal_ids or []
|
||||||
|
building_blocks = []
|
||||||
|
if building_block_ids:
|
||||||
|
qb_rows = db.execute(text(
|
||||||
|
f"SELECT {_SELECT_COLUMNS} FROM compliance.derived_controls "
|
||||||
|
"WHERE source_framework = :fw AND kind = 'building_block' "
|
||||||
|
"AND source_section = ANY(:ids) ORDER BY source_section"
|
||||||
|
), {"fw": "BSI QUAIDAL", "ids": building_block_ids}).all()
|
||||||
|
building_blocks = [_row_to_control(r) for r in qb_rows]
|
||||||
|
|
||||||
|
# Collect measure IDs from each building block, then fetch them
|
||||||
|
measure_ids: list[str] = []
|
||||||
|
for qb in building_blocks:
|
||||||
|
measure_ids.extend(mid for mid in qb.related_quaidal_ids if mid.startswith("MA-"))
|
||||||
|
measures = []
|
||||||
|
if measure_ids:
|
||||||
|
ma_rows = db.execute(text(
|
||||||
|
f"SELECT {_SELECT_COLUMNS} FROM compliance.derived_controls "
|
||||||
|
"WHERE source_framework = :fw AND kind = 'measure' "
|
||||||
|
"AND source_section = ANY(:ids) ORDER BY source_section"
|
||||||
|
), {"fw": "BSI QUAIDAL", "ids": list(set(measure_ids))}).all()
|
||||||
|
measures = [_row_to_control(r) for r in ma_rows]
|
||||||
|
|
||||||
|
# Collect metric IDs from each measure
|
||||||
|
metric_ids: list[str] = []
|
||||||
|
for ma in measures:
|
||||||
|
metric_ids.extend(mid for mid in ma.related_quaidal_ids if mid.startswith("QM-"))
|
||||||
|
metrics = []
|
||||||
|
if metric_ids:
|
||||||
|
qm_rows = db.execute(text(
|
||||||
|
f"SELECT {_SELECT_COLUMNS} FROM compliance.derived_controls "
|
||||||
|
"WHERE source_framework = :fw AND kind = 'metric' "
|
||||||
|
"AND source_section = ANY(:ids) ORDER BY source_section"
|
||||||
|
), {"fw": "BSI QUAIDAL", "ids": list(set(metric_ids))}).all()
|
||||||
|
metrics = [_row_to_control(r) for r in qm_rows]
|
||||||
|
|
||||||
|
return CriterionWithChildren(
|
||||||
|
criterion=_row_to_control(criterion_row),
|
||||||
|
building_blocks=building_blocks,
|
||||||
|
measures=measures,
|
||||||
|
metrics=metrics,
|
||||||
|
)
|
||||||
@@ -0,0 +1,196 @@
|
|||||||
|
"""
|
||||||
|
Saving-Scan-Funnel Endpoint — Marketing-Lead → Compliance-Check.
|
||||||
|
|
||||||
|
Externes Form (https://breakpilot.ai/savings-scan) postet hier:
|
||||||
|
POST /api/compliance/agent/saving-scan/start
|
||||||
|
Body: {"url": "...", "email": "..."}
|
||||||
|
|
||||||
|
Server-side:
|
||||||
|
1. Validierung URL + Email (E-Mail-Regex, URL-Schema).
|
||||||
|
2. Rate-Limit: max 1 vollstaendiger Scan / Domain / 24h
|
||||||
|
(saving_scan_allowed aus compliance_user_agent).
|
||||||
|
3. Lead persistieren (saving_scan_leads in Sidecar-SQLite) — fuer
|
||||||
|
spaeteren Report-Versand + Sales-Follow-Up.
|
||||||
|
4. Compliance-Check starten mit Auto-Discovery (DocumentInput leer
|
||||||
|
ausser Homepage). Der bestehende Worker laeuft TDM-Check, dann
|
||||||
|
Discovery, dann Pruefung.
|
||||||
|
5. check_id zurueck — Frontend pollt /compliance-check/<check_id>.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import logging
|
||||||
|
import os
|
||||||
|
import re
|
||||||
|
import sqlite3
|
||||||
|
import uuid as _uuid
|
||||||
|
from datetime import datetime, timezone
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
import asyncio
|
||||||
|
from fastapi import APIRouter, HTTPException
|
||||||
|
from pydantic import BaseModel, Field
|
||||||
|
|
||||||
|
from compliance.services.compliance_user_agent import (
|
||||||
|
base_domain_of, saving_scan_allowed,
|
||||||
|
)
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
router = APIRouter(prefix="/compliance/agent", tags=["agent"])
|
||||||
|
|
||||||
|
DB_PATH = os.getenv("COMPLIANCE_AUDIT_DB", "/data/compliance_audits.db")
|
||||||
|
|
||||||
|
_EMAIL_RE = re.compile(r"^[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Za-z]{2,}$")
|
||||||
|
_URL_RE = re.compile(r"^https?://[A-Za-z0-9.-]+(/.*)?$")
|
||||||
|
|
||||||
|
|
||||||
|
class SavingScanRequest(BaseModel):
|
||||||
|
url: str = Field(..., min_length=4, max_length=400)
|
||||||
|
email: str = Field(..., min_length=5, max_length=200)
|
||||||
|
consent: bool = Field(
|
||||||
|
True, description="Marketing-Consent fuer Sales-Follow-Up — "
|
||||||
|
"muss True sein laut Form-Checkbox.",
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class SavingScanResponse(BaseModel):
|
||||||
|
check_id: str
|
||||||
|
status: str
|
||||||
|
message: str = ""
|
||||||
|
|
||||||
|
|
||||||
|
def _ensure_leads_table() -> None:
|
||||||
|
Path(DB_PATH).parent.mkdir(parents=True, exist_ok=True)
|
||||||
|
with sqlite3.connect(DB_PATH) as conn:
|
||||||
|
conn.executescript("""
|
||||||
|
CREATE TABLE IF NOT EXISTS saving_scan_leads (
|
||||||
|
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||||
|
ts TEXT NOT NULL,
|
||||||
|
email TEXT NOT NULL,
|
||||||
|
url TEXT NOT NULL,
|
||||||
|
base_domain TEXT NOT NULL,
|
||||||
|
check_id TEXT,
|
||||||
|
consent INTEGER NOT NULL,
|
||||||
|
source TEXT
|
||||||
|
);
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_leads_domain ON saving_scan_leads(base_domain, ts);
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_leads_email ON saving_scan_leads(email, ts);
|
||||||
|
""")
|
||||||
|
|
||||||
|
|
||||||
|
def _persist_lead(email: str, url: str, check_id: str, consent: bool) -> None:
|
||||||
|
try:
|
||||||
|
_ensure_leads_table()
|
||||||
|
with sqlite3.connect(DB_PATH) as conn:
|
||||||
|
conn.execute(
|
||||||
|
"INSERT INTO saving_scan_leads "
|
||||||
|
"(ts, email, url, base_domain, check_id, consent, source) "
|
||||||
|
"VALUES (?, ?, ?, ?, ?, ?, ?)",
|
||||||
|
(
|
||||||
|
datetime.now(timezone.utc).isoformat(),
|
||||||
|
email.lower().strip(),
|
||||||
|
url,
|
||||||
|
base_domain_of(url),
|
||||||
|
check_id,
|
||||||
|
1 if consent else 0,
|
||||||
|
"saving_scan_form",
|
||||||
|
),
|
||||||
|
)
|
||||||
|
conn.commit()
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning("persist lead failed: %s", e)
|
||||||
|
|
||||||
|
|
||||||
|
def _normalize_url(url: str) -> str:
|
||||||
|
"""Strip path → behaupt nur Homepage, der Discover findet den Rest."""
|
||||||
|
if "://" not in url:
|
||||||
|
url = "https://" + url
|
||||||
|
from urllib.parse import urlparse
|
||||||
|
p = urlparse(url)
|
||||||
|
return f"{p.scheme}://{p.netloc}/"
|
||||||
|
|
||||||
|
|
||||||
|
@router.post("/saving-scan/start", response_model=SavingScanResponse)
|
||||||
|
async def start_saving_scan(req: SavingScanRequest) -> SavingScanResponse:
|
||||||
|
"""Trigger compliance check from the marketing-funnel form."""
|
||||||
|
if not _EMAIL_RE.match(req.email):
|
||||||
|
raise HTTPException(400, "Ungueltige E-Mail-Adresse.")
|
||||||
|
if not _URL_RE.match(req.url):
|
||||||
|
raise HTTPException(400, "URL muss mit http:// oder https:// beginnen.")
|
||||||
|
if not req.consent:
|
||||||
|
raise HTTPException(400, "Marketing-Consent erforderlich.")
|
||||||
|
|
||||||
|
domain = base_domain_of(req.url)
|
||||||
|
if not domain:
|
||||||
|
raise HTTPException(400, "Konnte Domain nicht ermitteln.")
|
||||||
|
|
||||||
|
allowed, wait_s = saving_scan_allowed(req.url)
|
||||||
|
if not allowed:
|
||||||
|
raise HTTPException(
|
||||||
|
429,
|
||||||
|
f"Fuer '{domain}' wurde in den letzten 24h bereits ein Scan "
|
||||||
|
f"durchgefuehrt. Bitte in {wait_s // 3600}h {wait_s % 3600 // 60}min "
|
||||||
|
f"erneut versuchen.",
|
||||||
|
)
|
||||||
|
|
||||||
|
# Lazy import to avoid circular dependency at module load.
|
||||||
|
from compliance.api.agent_compliance_check_routes import (
|
||||||
|
DocumentInput,
|
||||||
|
ComplianceCheckRequest,
|
||||||
|
_run_compliance_check,
|
||||||
|
_compliance_check_jobs,
|
||||||
|
)
|
||||||
|
|
||||||
|
homepage = _normalize_url(req.url)
|
||||||
|
check_id = str(_uuid.uuid4())[:8]
|
||||||
|
_compliance_check_jobs[check_id] = {
|
||||||
|
"status": "running",
|
||||||
|
"progress": "Saving-Scan gestartet — Auto-Discovery laeuft...",
|
||||||
|
"progress_pct": 0,
|
||||||
|
"result": None,
|
||||||
|
"error": "",
|
||||||
|
}
|
||||||
|
|
||||||
|
# Single "other" entry forces auto-discovery to fill in the rest.
|
||||||
|
docs = [DocumentInput(doc_type="other", url=homepage)]
|
||||||
|
check_req = ComplianceCheckRequest(
|
||||||
|
documents=docs, recipient=req.email.lower().strip(),
|
||||||
|
)
|
||||||
|
|
||||||
|
_persist_lead(req.email, req.url, check_id, req.consent)
|
||||||
|
asyncio.create_task(_run_compliance_check(check_id, check_req))
|
||||||
|
|
||||||
|
logger.info("saving-scan start: check_id=%s domain=%s email=%s",
|
||||||
|
check_id, domain, req.email[:3] + "***")
|
||||||
|
return SavingScanResponse(
|
||||||
|
check_id=check_id,
|
||||||
|
status="running",
|
||||||
|
message=f"Scan gestartet fuer {domain}. Bericht in ~3-5 Minuten.",
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@router.get("/saving-scan/lead-count")
|
||||||
|
def saving_scan_lead_count() -> dict:
|
||||||
|
"""Diagnostik fuer das Sales-Dashboard."""
|
||||||
|
try:
|
||||||
|
_ensure_leads_table()
|
||||||
|
with sqlite3.connect(DB_PATH) as conn:
|
||||||
|
total = conn.execute(
|
||||||
|
"SELECT COUNT(*) FROM saving_scan_leads",
|
||||||
|
).fetchone()[0]
|
||||||
|
last_24h = conn.execute(
|
||||||
|
"SELECT COUNT(*) FROM saving_scan_leads "
|
||||||
|
"WHERE ts > datetime('now', '-1 day')",
|
||||||
|
).fetchone()[0]
|
||||||
|
top_domains = conn.execute(
|
||||||
|
"SELECT base_domain, COUNT(*) AS n FROM saving_scan_leads "
|
||||||
|
"GROUP BY base_domain ORDER BY n DESC LIMIT 10",
|
||||||
|
).fetchall()
|
||||||
|
return {
|
||||||
|
"total_leads": total,
|
||||||
|
"last_24h": last_24h,
|
||||||
|
"top_domains": [{"domain": d, "scans": n} for d, n in top_domains],
|
||||||
|
}
|
||||||
|
except Exception as e:
|
||||||
|
return {"error": str(e)[:200]}
|
||||||
@@ -0,0 +1,149 @@
|
|||||||
|
"""
|
||||||
|
k-Anonymitaets-Helper fuer Branchen-Benchmarks (P6-Vorbereitung).
|
||||||
|
|
||||||
|
Vor jeder Veroeffentlichung von Benchmark-Aussagen pruefen, ob die
|
||||||
|
zugrundeliegende Stichprobe gross genug ist, dass keine Re-Identifikation
|
||||||
|
einzelner Hersteller moeglich wird.
|
||||||
|
|
||||||
|
Default k=5: jede publizierbare Aussage muss auf mindestens 5 verschiedenen
|
||||||
|
Datensubjekten (z.B. OEM-Sites) beruhen. Bei OEM-Markt mit ~30 Spielern
|
||||||
|
ist k=5 das Minimum, um "ein deutscher Premium-Hersteller mit X Modellen"
|
||||||
|
auszuschliessen.
|
||||||
|
|
||||||
|
Memory: feedback_oem_data_legal.md + project_legal_contracts_2026_07.md.
|
||||||
|
|
||||||
|
Verwendung:
|
||||||
|
from compliance.services.benchmark_k_anonymity import (
|
||||||
|
enforce_k_anonymity, quantize_value, KAnonymityError,
|
||||||
|
)
|
||||||
|
|
||||||
|
rows = [...] # pro Hersteller 1 Row
|
||||||
|
safe_groups = enforce_k_anonymity(rows, group_keys=["segment", "country"])
|
||||||
|
# safe_groups: nur Gruppen mit count >= 5 zurueck
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from collections.abc import Iterable
|
||||||
|
from typing import Any
|
||||||
|
|
||||||
|
DEFAULT_K = 5
|
||||||
|
|
||||||
|
|
||||||
|
class KAnonymityError(RuntimeError):
|
||||||
|
"""Stichprobe ist zu klein fuer eine publizierbare Aussage."""
|
||||||
|
|
||||||
|
|
||||||
|
def assert_min_sample(n: int, k: int = DEFAULT_K, context: str = "") -> None:
|
||||||
|
"""Wirft KAnonymityError wenn n < k."""
|
||||||
|
if n < k:
|
||||||
|
raise KAnonymityError(
|
||||||
|
f"Stichprobe zu klein fuer Publikation: n={n} < k={k}"
|
||||||
|
+ (f" — Kontext: {context}" if context else "")
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def quantize_value(value: float | int, step: int = 5) -> int:
|
||||||
|
"""Quantisiere Zahlenwerte auf step-Vielfache (Generalisierung).
|
||||||
|
|
||||||
|
quantize_value(67, 5) -> 65
|
||||||
|
quantize_value(83, 10) -> 80
|
||||||
|
|
||||||
|
Verhindert exakte Identifizierung ueber numerische Signale.
|
||||||
|
"""
|
||||||
|
if step <= 0:
|
||||||
|
return int(value)
|
||||||
|
return int(value // step) * step
|
||||||
|
|
||||||
|
|
||||||
|
def quantize_range(value: float | int, step: int = 10) -> str:
|
||||||
|
"""Gib ein Range-Bucket zurueck als String: '60-70%', '80-90%'."""
|
||||||
|
base = quantize_value(value, step)
|
||||||
|
return f"{base}-{base + step}%"
|
||||||
|
|
||||||
|
|
||||||
|
def group_and_count(
|
||||||
|
rows: Iterable[dict],
|
||||||
|
keys: list[str],
|
||||||
|
) -> dict[tuple, int]:
|
||||||
|
"""Gruppiere Rows nach allen `keys` und zaehle pro Bucket."""
|
||||||
|
counts: dict[tuple, int] = {}
|
||||||
|
for r in rows:
|
||||||
|
bucket = tuple(r.get(k, "") for k in keys)
|
||||||
|
counts[bucket] = counts.get(bucket, 0) + 1
|
||||||
|
return counts
|
||||||
|
|
||||||
|
|
||||||
|
def enforce_k_anonymity(
|
||||||
|
rows: list[dict],
|
||||||
|
group_keys: list[str],
|
||||||
|
k: int = DEFAULT_K,
|
||||||
|
) -> list[dict]:
|
||||||
|
"""Filtere Rows so, dass jede ueberlebende Gruppe >= k Mitglieder hat.
|
||||||
|
|
||||||
|
Returns: Rows die in ausreichend grossen Gruppen sind.
|
||||||
|
Rows in zu kleinen Gruppen werden suppressed (entfernt).
|
||||||
|
"""
|
||||||
|
counts = group_and_count(rows, group_keys)
|
||||||
|
safe_buckets = {bucket for bucket, n in counts.items() if n >= k}
|
||||||
|
return [
|
||||||
|
r for r in rows
|
||||||
|
if tuple(r.get(key, "") for key in group_keys) in safe_buckets
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
def summarize_benchmark(
|
||||||
|
rows: list[dict],
|
||||||
|
group_keys: list[str],
|
||||||
|
measure_key: str,
|
||||||
|
k: int = DEFAULT_K,
|
||||||
|
quantize_step: int = 5,
|
||||||
|
) -> list[dict]:
|
||||||
|
"""Erzeuge publizierbare Benchmark-Aggregat-Zeilen.
|
||||||
|
|
||||||
|
Pro Gruppe: count, mean (quantisiert), only-if count >= k.
|
||||||
|
Liefert sortiert nach count desc.
|
||||||
|
|
||||||
|
Beispiel:
|
||||||
|
rows = [{"segment": "premium", "consent_score": 84}, ...]
|
||||||
|
summarize_benchmark(rows, ["segment"], "consent_score")
|
||||||
|
-> [{"segment": "premium", "n": 8, "mean_quantized": 80}, ...]
|
||||||
|
"""
|
||||||
|
buckets: dict[tuple, list[float]] = {}
|
||||||
|
for r in rows:
|
||||||
|
bucket = tuple(r.get(k, "") for k in group_keys)
|
||||||
|
val = r.get(measure_key)
|
||||||
|
if val is not None:
|
||||||
|
buckets.setdefault(bucket, []).append(float(val))
|
||||||
|
|
||||||
|
out: list[dict] = []
|
||||||
|
for bucket, values in buckets.items():
|
||||||
|
n = len(values)
|
||||||
|
if n < k:
|
||||||
|
continue
|
||||||
|
mean = sum(values) / n
|
||||||
|
entry: dict[str, Any] = {key: bucket[i] for i, key in enumerate(group_keys)}
|
||||||
|
entry["n"] = n
|
||||||
|
entry["mean_quantized"] = quantize_value(mean, quantize_step)
|
||||||
|
entry["mean_range"] = quantize_range(mean, quantize_step * 2)
|
||||||
|
out.append(entry)
|
||||||
|
out.sort(key=lambda e: e["n"], reverse=True)
|
||||||
|
return out
|
||||||
|
|
||||||
|
|
||||||
|
def safe_to_publish(
|
||||||
|
statement: str,
|
||||||
|
sample_size: int,
|
||||||
|
k: int = DEFAULT_K,
|
||||||
|
) -> tuple[bool, str]:
|
||||||
|
"""Validator fuer Marketing/Press-Statements.
|
||||||
|
|
||||||
|
Returns (ok, message). Wenn ok=False, NICHT publishen.
|
||||||
|
"""
|
||||||
|
if sample_size < k:
|
||||||
|
return False, (
|
||||||
|
f'Aussage NICHT publizierbar: "{statement[:60]}…" '
|
||||||
|
f'(n={sample_size} < k={k}). Risiko: Re-Identifikation '
|
||||||
|
f'einzelner Hersteller moeglich.'
|
||||||
|
)
|
||||||
|
return True, f"OK (n={sample_size}, k={k})"
|
||||||
@@ -28,6 +28,12 @@ class BusinessProfile:
|
|||||||
needs_odr: bool = False # Online-Streitbeilegung
|
needs_odr: bool = False # Online-Streitbeilegung
|
||||||
detected_services: list[str] = field(default_factory=list)
|
detected_services: list[str] = field(default_factory=list)
|
||||||
confidence: float = 0.0
|
confidence: float = 0.0
|
||||||
|
# Wenn True: die Site selbst schliesst KEINEN Direktkauf-Vertrag
|
||||||
|
# (typisch OEM-Konfigurator-Sites BMW/Audi/Mercedes — Vertrag laeuft
|
||||||
|
# ueber den Vertragshaendler, nicht die Hersteller-Webseite).
|
||||||
|
# Konsequenz: AGB/Widerruf/Nutzungsbedingungen sind NICHT PFLICHT
|
||||||
|
# auf der Website, sondern werden beim Haendler ausgehaendigt.
|
||||||
|
no_direct_sales: bool = False
|
||||||
|
|
||||||
|
|
||||||
# ── Keyword lists ────────────────────────────────────────────────────
|
# ── Keyword lists ────────────────────────────────────────────────────
|
||||||
@@ -231,6 +237,13 @@ async def detect_business_profile(documents: dict[str, str]) -> BusinessProfile:
|
|||||||
b2g_score = _count_hits(full_text, _B2G_KEYWORDS)
|
b2g_score = _count_hits(full_text, _B2G_KEYWORDS)
|
||||||
nonprofit_score = _count_hits(full_text, _NONPROFIT_KEYWORDS)
|
nonprofit_score = _count_hits(full_text, _NONPROFIT_KEYWORDS)
|
||||||
|
|
||||||
|
# P17-C: B2B-Dienstleister-Cluster (P14) als Boost — wenn ein Unternehmen
|
||||||
|
# CE-Zertifizierung / Compliance-Beratung / Auditierung / Schulungen anbietet,
|
||||||
|
# ist es i.d.R. B2B auch wenn die strikten B2B-Keywords nicht greifen.
|
||||||
|
b2b_service_boost = _count_hits(full_text, _B2B_SERVICE_POSITIVE)
|
||||||
|
if b2b_service_boost >= 2:
|
||||||
|
b2b_score += min(3, b2b_service_boost - 1)
|
||||||
|
|
||||||
# Missing documents as signal
|
# Missing documents as signal
|
||||||
has_agb = "agb" in documents
|
has_agb = "agb" in documents
|
||||||
has_widerruf = "widerruf" in documents
|
has_widerruf = "widerruf" in documents
|
||||||
@@ -319,4 +332,103 @@ async def detect_business_profile(documents: dict[str, str]) -> BusinessProfile:
|
|||||||
"steuerberater": "finance", "architekt": "craft"}
|
"steuerberater": "finance", "architekt": "craft"}
|
||||||
profile.industry = prof_map.get(profile.regulated_profession_type, "unknown")
|
profile.industry = prof_map.get(profile.regulated_profession_type, "unknown")
|
||||||
|
|
||||||
|
# ── no_direct_sales (OEM-Konfigurator-Pattern) ───────────────
|
||||||
|
# Hersteller-Sites die nur konfigurieren + zu Vertragshaendlern
|
||||||
|
# weiterleiten (BMW/Audi/Mercedes/VW/Porsche) schliessen KEINEN
|
||||||
|
# Direkt-Kaufvertrag. AGB/Widerruf/Nutzungsbedingungen sind dort
|
||||||
|
# nicht Pflicht — werden beim Haendler ausgehaendigt.
|
||||||
|
profile.no_direct_sales = _detect_no_direct_sales(full_text)
|
||||||
|
|
||||||
return profile
|
return profile
|
||||||
|
|
||||||
|
|
||||||
|
# P14: drei Cluster die jeweils unabhaengig no_direct_sales=True triggern.
|
||||||
|
|
||||||
|
# Cluster A: OEM-Konfigurator-Pattern (Auto-Hersteller mit Vertragshaendler-Netz)
|
||||||
|
_OEM_POSITIVE = [
|
||||||
|
"vertragshaendler", "vertragshändler", "vertragspartner",
|
||||||
|
"vertragswerkstatt", "haendlersuche", "händlersuche",
|
||||||
|
"niederlassung", "vertretung", "autorisierter haendler",
|
||||||
|
"autorisierter händler", "ihr haendler vor ort",
|
||||||
|
"ihr händler vor ort", "haendler in ihrer naehe",
|
||||||
|
"händler in ihrer nähe", "probefahrt vereinbaren",
|
||||||
|
"anfrage an haendler", "anfrage an händler",
|
||||||
|
"konfigurator", "fahrzeug konfigurieren",
|
||||||
|
"ihre individuelle anfrage",
|
||||||
|
"bmw vertriebs", "audi vertriebs", "mercedes-benz vertriebs",
|
||||||
|
"volkswagen vertriebs", "porsche zentrum",
|
||||||
|
# OEM-Markennamen im Pflichttext (Datenschutz erwaehnt Hersteller)
|
||||||
|
"bmw ag", "audi ag", "mercedes-benz ag", "volkswagen ag",
|
||||||
|
"porsche ag", "opel automobile gmbh",
|
||||||
|
]
|
||||||
|
|
||||||
|
# Cluster B: B2B-Dienstleister (Beratung / Compliance / Schulung / CE)
|
||||||
|
_B2B_SERVICE_POSITIVE = [
|
||||||
|
"ce-zertifizierung", "ce zertifizierung",
|
||||||
|
"ce-konformitaet", "ce-konformität",
|
||||||
|
"ce-kennzeichnung", "ce kennzeichnung",
|
||||||
|
"compliance-beratung", "compliance beratung",
|
||||||
|
"arbeitssicherheit", "product compliance",
|
||||||
|
"produktsicherheit", "produkthaftung",
|
||||||
|
"auditierung", "auditor", "auditierungen",
|
||||||
|
"schulungen", "workshops", "akademie",
|
||||||
|
"beratungsleistungen", "consultingleistungen",
|
||||||
|
"consulting services", "managementsystem",
|
||||||
|
"datenschutzbeauftragter (extern)",
|
||||||
|
"externer datenschutzbeauftragter",
|
||||||
|
"datenschutz-audit", "tisax", "iso 27001",
|
||||||
|
"iso 9001", "iso 14001", "iso 45001",
|
||||||
|
"gefaehrdungsbeurteilung", "gefährdungsbeurteilung",
|
||||||
|
"betriebsbeauftragter", "fachkraft fuer arbeitssicherheit",
|
||||||
|
"fachkraft für arbeitssicherheit",
|
||||||
|
]
|
||||||
|
|
||||||
|
# Cluster C: NGO / Verein / oeffentliche Verwaltung
|
||||||
|
_NONPROFIT_PUBLIC_POSITIVE = [
|
||||||
|
"spendenkonto", "vereinsregister", "gemeinnuetzig",
|
||||||
|
"gemeinnützig", "ehrenamtlich", "foerderverein",
|
||||||
|
"förderverein", "stiftung", "buergeramt", "bürgeramt",
|
||||||
|
"landratsamt", "kommunalverwaltung",
|
||||||
|
]
|
||||||
|
|
||||||
|
# Backwards-compat
|
||||||
|
_NO_DIRECT_SALES_POSITIVE = (
|
||||||
|
_OEM_POSITIVE + _B2B_SERVICE_POSITIVE + _NONPROFIT_PUBLIC_POSITIVE
|
||||||
|
)
|
||||||
|
|
||||||
|
# Indikatoren GEGEN no_direct_sales: echte Online-Shop-Funktionen.
|
||||||
|
_DIRECT_SALES_NEGATIVE = [
|
||||||
|
"in den warenkorb", "warenkorb hinzu", "zur kasse",
|
||||||
|
"jetzt kaufen", "kostenpflichtig bestellen",
|
||||||
|
"zahlungspflichtig bestellen", "sofort-kauf",
|
||||||
|
"online bestellen", "lieferadresse", "rechnungsadresse",
|
||||||
|
"versandkosten", "lieferzeit", "lieferbedingungen",
|
||||||
|
"checkout", "stueckpreis", "stückpreis",
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
def _detect_no_direct_sales(full_text: str) -> bool:
|
||||||
|
"""Heuristik: True wenn Site keinen Direkt-Vertrieb mit B2C-Kunden hat.
|
||||||
|
|
||||||
|
Trifft fuer 3 Cluster zu (jeweils mind. 2 Treffer im Cluster):
|
||||||
|
A) OEM-Konfigurator (Auto-Hersteller)
|
||||||
|
B) B2B-Dienstleister (Beratung/Compliance/Schulung)
|
||||||
|
C) NGO / oeffentliche Verwaltung
|
||||||
|
|
||||||
|
Negativ-Signale (echte Shop-Funktionen) zaehlen gegen den Cluster:
|
||||||
|
nur True wenn pos > neg.
|
||||||
|
"""
|
||||||
|
text = full_text.lower()
|
||||||
|
oem = sum(1 for k in _OEM_POSITIVE if k in text)
|
||||||
|
b2b = sum(1 for k in _B2B_SERVICE_POSITIVE if k in text)
|
||||||
|
npg = sum(1 for k in _NONPROFIT_PUBLIC_POSITIVE if k in text)
|
||||||
|
neg = sum(1 for k in _DIRECT_SALES_NEGATIVE if k in text)
|
||||||
|
# Jeder Cluster ist eigenstaendig: 2 Treffer + weniger Negativ-Signale
|
||||||
|
# als Cluster-Treffer.
|
||||||
|
if oem >= 2 and oem > neg:
|
||||||
|
return True
|
||||||
|
if b2b >= 2 and b2b > neg:
|
||||||
|
return True
|
||||||
|
if npg >= 2 and npg > neg:
|
||||||
|
return True
|
||||||
|
return False
|
||||||
|
|||||||
@@ -0,0 +1,141 @@
|
|||||||
|
"""
|
||||||
|
Zentraler User-Agent-Provider + Domain-Rate-Limiter fuer alle Crawls.
|
||||||
|
|
||||||
|
UA-Switch ist Trigger-gebunden an Firmengruendung:
|
||||||
|
- aktuell (Vor-Gruendung): generischer Headless-Chrome-UA
|
||||||
|
- nach Gruendung: env BREAKPILOT_BRANDED_UA=1 setzen
|
||||||
|
-> "BreakPilot-Compliance-Scanner/1.0 (+https://...)"
|
||||||
|
|
||||||
|
Memory: project_legal_contracts_2026_07.md (Punkt 0).
|
||||||
|
|
||||||
|
Rate-Limit:
|
||||||
|
- Default 1 req/sec/Domain, max 2 concurrent pro Domain.
|
||||||
|
- Saving-Scan-Funnel separat: max 1 vollstaendiger Run / Domain / 24h.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import asyncio
|
||||||
|
import os
|
||||||
|
import time
|
||||||
|
from collections import defaultdict
|
||||||
|
from urllib.parse import urlparse
|
||||||
|
|
||||||
|
|
||||||
|
_BRANDED_UA = (
|
||||||
|
"BreakPilot-Compliance-Scanner/1.0 "
|
||||||
|
"(+https://breakpilot.ai/scanner)"
|
||||||
|
)
|
||||||
|
_NEUTRAL_UA = (
|
||||||
|
"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 "
|
||||||
|
"(KHTML, like Gecko) HeadlessChrome/120.0.0.0 Safari/537.36"
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def crawler_user_agent() -> str:
|
||||||
|
"""Aktueller UA-String fuer alle ausgehenden Crawls.
|
||||||
|
|
||||||
|
Switcht auf den Markennamen sobald BREAKPILOT_BRANDED_UA=1 gesetzt
|
||||||
|
wird (nach Firmengruendung — siehe Memory).
|
||||||
|
"""
|
||||||
|
branded = (os.getenv("BREAKPILOT_BRANDED_UA") or "").strip().lower()
|
||||||
|
if branded in ("1", "true", "yes"):
|
||||||
|
return _BRANDED_UA
|
||||||
|
return _NEUTRAL_UA
|
||||||
|
|
||||||
|
|
||||||
|
def default_request_headers() -> dict:
|
||||||
|
"""Vollstaendiger Header-Satz fuer httpx-Calls."""
|
||||||
|
return {
|
||||||
|
"User-Agent": crawler_user_agent(),
|
||||||
|
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
|
||||||
|
"Accept-Language": "de-DE,de;q=0.9,en;q=0.8",
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def base_domain_of(url_or_host: str) -> str:
|
||||||
|
if not url_or_host:
|
||||||
|
return ""
|
||||||
|
if "://" not in url_or_host:
|
||||||
|
url_or_host = "https://" + url_or_host
|
||||||
|
netloc = urlparse(url_or_host).netloc.lower()
|
||||||
|
return netloc.replace("www.", "") or url_or_host
|
||||||
|
|
||||||
|
|
||||||
|
# --- per-Domain Rate-Limit ----------------------------------------------
|
||||||
|
|
||||||
|
_MIN_INTERVAL_S = 1.0 # 1 req/sec/Domain
|
||||||
|
_MAX_CONCURRENT_PER_DOMAIN = 2
|
||||||
|
|
||||||
|
_last_request_at: dict[str, float] = defaultdict(float)
|
||||||
|
_semaphores: dict[str, asyncio.Semaphore] = {}
|
||||||
|
_locks_lock = asyncio.Lock()
|
||||||
|
|
||||||
|
|
||||||
|
async def _get_semaphore(domain: str) -> asyncio.Semaphore:
|
||||||
|
async with _locks_lock:
|
||||||
|
sem = _semaphores.get(domain)
|
||||||
|
if sem is None:
|
||||||
|
sem = asyncio.Semaphore(_MAX_CONCURRENT_PER_DOMAIN)
|
||||||
|
_semaphores[domain] = sem
|
||||||
|
return sem
|
||||||
|
|
||||||
|
|
||||||
|
class DomainRateLimiter:
|
||||||
|
"""Async-Context: warte vor Request + halte concurrent-Slot.
|
||||||
|
|
||||||
|
async with DomainRateLimiter(url):
|
||||||
|
resp = await client.get(url)
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, url_or_domain: str):
|
||||||
|
self.domain = base_domain_of(url_or_domain)
|
||||||
|
|
||||||
|
async def __aenter__(self):
|
||||||
|
sem = await _get_semaphore(self.domain)
|
||||||
|
await sem.acquire()
|
||||||
|
last = _last_request_at[self.domain]
|
||||||
|
wait = (last + _MIN_INTERVAL_S) - time.monotonic()
|
||||||
|
if wait > 0:
|
||||||
|
await asyncio.sleep(wait)
|
||||||
|
_last_request_at[self.domain] = time.monotonic()
|
||||||
|
self._sem = sem
|
||||||
|
return self
|
||||||
|
|
||||||
|
async def __aexit__(self, exc_type, exc, tb):
|
||||||
|
self._sem.release()
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
# --- per-Domain "1 full run / 24h" (Saving-Scan) -----------------------
|
||||||
|
|
||||||
|
_DB_PATH = os.getenv("COMPLIANCE_AUDIT_DB", "/data/compliance_audits.db")
|
||||||
|
_SAVING_SCAN_INTERVAL_S = 24 * 3600
|
||||||
|
|
||||||
|
|
||||||
|
def saving_scan_allowed(domain_or_url: str) -> tuple[bool, int]:
|
||||||
|
"""True wenn fuer diese Domain in den letzten 24h kein Saving-Scan lief.
|
||||||
|
|
||||||
|
Liest aus compliance_audit_log.check_runs (existierende Tabelle).
|
||||||
|
Liefert (allowed, seconds_until_allowed).
|
||||||
|
"""
|
||||||
|
import sqlite3
|
||||||
|
domain = base_domain_of(domain_or_url)
|
||||||
|
if not domain:
|
||||||
|
return True, 0
|
||||||
|
try:
|
||||||
|
with sqlite3.connect(_DB_PATH) as conn:
|
||||||
|
row = conn.execute(
|
||||||
|
"SELECT MAX(ts) FROM check_runs WHERE base_domain=?",
|
||||||
|
(domain,),
|
||||||
|
).fetchone()
|
||||||
|
last = row[0] if row else None
|
||||||
|
if not last:
|
||||||
|
return True, 0
|
||||||
|
from datetime import datetime
|
||||||
|
elapsed = time.time() - datetime.fromisoformat(last).timestamp()
|
||||||
|
if elapsed >= _SAVING_SCAN_INTERVAL_S:
|
||||||
|
return True, 0
|
||||||
|
return False, int(_SAVING_SCAN_INTERVAL_S - elapsed)
|
||||||
|
except Exception:
|
||||||
|
return True, 0
|
||||||
@@ -129,20 +129,29 @@ def classify_cookie(cookie_name: str) -> tuple[str, str]:
|
|||||||
|
|
||||||
|
|
||||||
def annotate_vendor_cookies(vendor: dict) -> dict:
|
def annotate_vendor_cookies(vendor: dict) -> dict:
|
||||||
"""Enrich a vendor record with functional_role per cookie."""
|
"""Enrich a vendor record with functional_role + KB knowledge per cookie."""
|
||||||
|
from compliance.services.cookie_knowledge import (
|
||||||
|
lookup_cookie, summarize_compliance_risk,
|
||||||
|
)
|
||||||
cookies = vendor.get("cookies") or []
|
cookies = vendor.get("cookies") or []
|
||||||
annotated = []
|
annotated = []
|
||||||
role_counts: dict[str, int] = {}
|
role_counts: dict[str, int] = {}
|
||||||
for c in cookies:
|
for c in cookies:
|
||||||
role, impact = classify_cookie(c.get("name", ""))
|
role, impact = classify_cookie(c.get("name", ""))
|
||||||
annotated.append({**c, "functional_role": role, "blocking_impact": impact})
|
knowledge = lookup_cookie(c.get("name", ""))
|
||||||
|
entry = {**c, "functional_role": role, "blocking_impact": impact}
|
||||||
|
if knowledge:
|
||||||
|
entry["knowledge"] = knowledge
|
||||||
|
annotated.append(entry)
|
||||||
role_counts[role] = role_counts.get(role, 0) + 1
|
role_counts[role] = role_counts.get(role, 0) + 1
|
||||||
return {
|
out = {
|
||||||
**vendor,
|
**vendor,
|
||||||
"cookies": annotated,
|
"cookies": annotated,
|
||||||
"role_distribution": role_counts,
|
"role_distribution": role_counts,
|
||||||
"role_labels": {r: _FUNCTIONAL_LABEL.get(r, r) for r in role_counts},
|
"role_labels": {r: _FUNCTIONAL_LABEL.get(r, r) for r in role_counts},
|
||||||
}
|
}
|
||||||
|
out["compliance_risk"] = summarize_compliance_risk(out)
|
||||||
|
return out
|
||||||
|
|
||||||
|
|
||||||
def aggregate_cookie_purposes(vendors: Iterable[dict]) -> dict:
|
def aggregate_cookie_purposes(vendors: Iterable[dict]) -> dict:
|
||||||
|
|||||||
@@ -0,0 +1,106 @@
|
|||||||
|
"""
|
||||||
|
Cookie-Knowledge Facade — vereint die Basis-KB (cookie_knowledge_db) mit
|
||||||
|
der Erweiterung (cookie_knowledge_extended) hinter einer einzigen API.
|
||||||
|
|
||||||
|
Caller sollten von hier importieren statt von einer der beiden Sub-DBs.
|
||||||
|
|
||||||
|
from compliance.services.cookie_knowledge import (
|
||||||
|
lookup_cookie,
|
||||||
|
enrich_vendor_with_knowledge,
|
||||||
|
summarize_compliance_risk,
|
||||||
|
compliance_risk_label,
|
||||||
|
)
|
||||||
|
|
||||||
|
Lookup-Reihenfolge: Extended (kuratiert, juenger) vor Base. Dadurch
|
||||||
|
koennen wir Eintraege ueberschreiben ohne die Base zu touchen.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from compliance.services.cookie_knowledge_db import (
|
||||||
|
CookieKnowledge,
|
||||||
|
lookup_cookie as _lookup_base,
|
||||||
|
)
|
||||||
|
from compliance.services.cookie_knowledge_extended import (
|
||||||
|
KB_EXT,
|
||||||
|
lookup_cookie_extended,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def lookup_cookie(name: str) -> CookieKnowledge | None:
|
||||||
|
"""Resolve cookie name to enriched knowledge — extended overrides base."""
|
||||||
|
return lookup_cookie_extended(name) or _lookup_base(name)
|
||||||
|
|
||||||
|
|
||||||
|
def enrich_vendor_with_knowledge(vendor: dict) -> dict:
|
||||||
|
"""Add per-cookie knowledge dict + per-vendor risk summary."""
|
||||||
|
cookies = vendor.get("cookies") or []
|
||||||
|
enriched = []
|
||||||
|
for c in cookies:
|
||||||
|
info = lookup_cookie(c.get("name", ""))
|
||||||
|
enriched.append({**c, "knowledge": info} if info else c)
|
||||||
|
out = {**vendor, "cookies": enriched}
|
||||||
|
out["compliance_risk"] = summarize_compliance_risk(out)
|
||||||
|
return out
|
||||||
|
|
||||||
|
|
||||||
|
def summarize_compliance_risk(vendor: dict) -> dict:
|
||||||
|
"""Aggregate Re-ID risk + Schrems-II exposure across all cookies."""
|
||||||
|
cookies = vendor.get("cookies") or []
|
||||||
|
risk_counts = {"high": 0, "medium": 0, "low": 0}
|
||||||
|
schrems_affected = 0
|
||||||
|
strictly_necessary = 0
|
||||||
|
classified = 0
|
||||||
|
for c in cookies:
|
||||||
|
k = c.get("knowledge") or lookup_cookie(c.get("name", ""))
|
||||||
|
if not k:
|
||||||
|
continue
|
||||||
|
classified += 1
|
||||||
|
risk = (k.get("reid_risk") or "low").lower()
|
||||||
|
risk_counts[risk] = risk_counts.get(risk, 0) + 1
|
||||||
|
if "us" in (k.get("vendor_country") or "").lower() or \
|
||||||
|
"schrems" in (k.get("schrems_ii_status") or "").lower():
|
||||||
|
schrems_affected += 1
|
||||||
|
if k.get("technical_necessity") == "full":
|
||||||
|
strictly_necessary += 1
|
||||||
|
return {
|
||||||
|
"reid_risk_distribution": risk_counts,
|
||||||
|
"high_risk_cookie_count": risk_counts["high"],
|
||||||
|
"schrems_ii_affected_cookies": schrems_affected,
|
||||||
|
"strictly_necessary_cookies": strictly_necessary,
|
||||||
|
"total_classified": classified,
|
||||||
|
"label": compliance_risk_label({
|
||||||
|
"high_risk_cookie_count": risk_counts["high"],
|
||||||
|
"schrems_ii_affected_cookies": schrems_affected,
|
||||||
|
"total_classified": classified,
|
||||||
|
}),
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def compliance_risk_label(summary: dict) -> str:
|
||||||
|
"""Compact risk badge: 'kritisch' | 'hoch' | 'mittel' | 'gering' | 'unklar'."""
|
||||||
|
if not summary or not summary.get("total_classified"):
|
||||||
|
return "unklar"
|
||||||
|
high = summary.get("high_risk_cookie_count", 0)
|
||||||
|
schrems = summary.get("schrems_ii_affected_cookies", 0)
|
||||||
|
total = summary.get("total_classified", 0) or 1
|
||||||
|
if high >= 3 and schrems >= 2:
|
||||||
|
return "kritisch"
|
||||||
|
if high >= 2 or (high >= 1 and schrems >= 1):
|
||||||
|
return "hoch"
|
||||||
|
if high >= 1 or schrems >= 1:
|
||||||
|
return "mittel"
|
||||||
|
return "gering"
|
||||||
|
|
||||||
|
|
||||||
|
def kb_size() -> dict:
|
||||||
|
"""Diagnostik fuer den Admin/Health-Endpoint."""
|
||||||
|
from compliance.services.cookie_knowledge_db import KB as _KB_BASE
|
||||||
|
base_keys = set(_KB_BASE.keys())
|
||||||
|
ext_keys = set(KB_EXT.keys())
|
||||||
|
return {
|
||||||
|
"base_entries": len(base_keys),
|
||||||
|
"extended_entries": len(ext_keys),
|
||||||
|
"extended_overrides_base": len(base_keys & ext_keys),
|
||||||
|
"total_unique": len(base_keys | ext_keys),
|
||||||
|
}
|
||||||
@@ -0,0 +1,497 @@
|
|||||||
|
"""
|
||||||
|
Cookie-Knowledge Erweiterung — Adobe, Meta erweitert, Microsoft, LinkedIn,
|
||||||
|
TikTok, Salesforce/HubSpot/Marketo, Hotjar/Mouseflow/FullStory, Live-Chat,
|
||||||
|
Cloudflare/Akamai, Payment, CMP-eigene Cookies, EU-Analytics.
|
||||||
|
|
||||||
|
Hinweis zu Rechten: Eintraege enthalten ausschliesslich Identitaetsfelder
|
||||||
|
(Cookie-Name, Anbieter, Sitzland) + EIGENE Knappformulierungen + Verweise
|
||||||
|
auf oeffentliche EuGH-/CNIL-/EDPB-Quellen. KEINE 1:1-Kopien aus OneTrust,
|
||||||
|
Cookiepedia oder Vendor-eigenen Beschreibungstexten.
|
||||||
|
|
||||||
|
Quellen-Pointer: IAB TCF v2.2 Vendor List, CNIL Cookies & Trackers
|
||||||
|
Guidelines 2024, EDPB Guidelines 2/2023, EuGH-Rechtsprechung (Schrems II,
|
||||||
|
Planet49), DSK-Orientierungshilfen 2021/2024.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from compliance.services.cookie_knowledge_db import CookieKnowledge
|
||||||
|
|
||||||
|
|
||||||
|
_ADOBE_BASE = {
|
||||||
|
"vendor": "Adobe Inc.", "vendor_country": "US",
|
||||||
|
"schrems_ii_status": "Drittlandtransfer US. Mit DPF (2023) wieder "
|
||||||
|
"zulaessig; EU-Datenresidenz-Option in Adobe "
|
||||||
|
"Experience Platform verfuegbar.",
|
||||||
|
"eugh_rulings": [
|
||||||
|
"EuGH C-311/18 (Schrems II)",
|
||||||
|
"EDPB Recommendations 01/2020 — Supplementary Measures",
|
||||||
|
],
|
||||||
|
}
|
||||||
|
|
||||||
|
_META_BASE = {
|
||||||
|
"vendor": "Meta Platforms Ireland Ltd.", "vendor_country": "IE",
|
||||||
|
"schrems_ii_status": "Verarbeitung in IE + US-Transfer. DPC Ireland "
|
||||||
|
"Bussgeld 2023 (€1,2 Mrd) wegen unzureichender "
|
||||||
|
"Schutzmassnahmen — DPF deckt seit 2023.",
|
||||||
|
"eugh_rulings": [
|
||||||
|
"EuGH C-311/18 (Schrems II)",
|
||||||
|
"DPC Ireland 2023 — Meta 1,2 Mrd. EUR",
|
||||||
|
],
|
||||||
|
}
|
||||||
|
|
||||||
|
_MICROSOFT_BASE = {
|
||||||
|
"vendor": "Microsoft Corp.", "vendor_country": "US",
|
||||||
|
"schrems_ii_status": "DPF-zertifiziert; EU Data Boundary fuer Azure/365 "
|
||||||
|
"seit 2024 verfuegbar.",
|
||||||
|
"eugh_rulings": ["EuGH C-311/18 (Schrems II)"],
|
||||||
|
}
|
||||||
|
|
||||||
|
_LINKEDIN_BASE = {
|
||||||
|
"vendor": "LinkedIn Ireland Unlimited Co.", "vendor_country": "IE",
|
||||||
|
"schrems_ii_status": "Microsoft-Konzern, EU-Hauptsitz IE, Transfer US.",
|
||||||
|
"eugh_rulings": ["EuGH C-311/18 (Schrems II)"],
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
KB_EXT: dict[str, CookieKnowledge] = {
|
||||||
|
|
||||||
|
# --- Adobe Experience Cloud --------------------------------------
|
||||||
|
# AMCV_, s_cc, s_sq leben in Base-KB.
|
||||||
|
"demdex": {
|
||||||
|
**_ADOBE_BASE,
|
||||||
|
"vendor": "Adobe Inc. (Audience Manager)",
|
||||||
|
"exact_purpose": "Adobe Audience Manager DMP — Cross-Site-Profil "
|
||||||
|
"fuer Zielgruppen-Segmentierung.",
|
||||||
|
"data_collected": ["dpuuid", "segments"],
|
||||||
|
"ip_relevant": True,
|
||||||
|
"tcf_purpose_ids": [4, 9, 10],
|
||||||
|
"typical_lifetime": "180 Tage",
|
||||||
|
"reid_risk": "high", "technical_necessity": "none",
|
||||||
|
},
|
||||||
|
|
||||||
|
# --- Meta erweitert -----------------------------------------------
|
||||||
|
# fr, _fbc leben in Base-KB.
|
||||||
|
"datr": {
|
||||||
|
**_META_BASE,
|
||||||
|
"exact_purpose": "Facebook Browser-Identifier — Anti-Abuse/Bot-Schutz.",
|
||||||
|
"data_collected": ["browser_fingerprint_id"],
|
||||||
|
"ip_relevant": True,
|
||||||
|
"typical_lifetime": "2 Jahre",
|
||||||
|
"reid_risk": "high", "technical_necessity": "partial",
|
||||||
|
"notes": "Wird auch ohne Consent gesetzt; Meta argumentiert "
|
||||||
|
"Sicherheit. Trotzdem von DSK 2024 kritisch bewertet.",
|
||||||
|
},
|
||||||
|
# --- Microsoft / Bing ---------------------------------------------
|
||||||
|
# MUID lebt in Base-KB.
|
||||||
|
"MSCC": {
|
||||||
|
**_MICROSOFT_BASE,
|
||||||
|
"exact_purpose": "Microsoft Site Consent — Consent-Status-Speicherung "
|
||||||
|
"fuer Microsoft-eigene Properties.",
|
||||||
|
"data_collected": ["consent_string"],
|
||||||
|
"typical_lifetime": "1 Jahr",
|
||||||
|
"reid_risk": "low", "technical_necessity": "full",
|
||||||
|
"notes": "Strictly necessary nach §25(2) TDDDG.",
|
||||||
|
},
|
||||||
|
"ai_session": {
|
||||||
|
**_MICROSOFT_BASE,
|
||||||
|
"vendor": "Microsoft Corp. (Application Insights)",
|
||||||
|
"exact_purpose": "Azure Application Insights — Session-Tracking fuer "
|
||||||
|
"Telemetry.",
|
||||||
|
"data_collected": ["session_id"],
|
||||||
|
"typical_lifetime": "30 Minuten",
|
||||||
|
"reid_risk": "medium", "technical_necessity": "partial",
|
||||||
|
},
|
||||||
|
|
||||||
|
# --- LinkedIn ------------------------------------------------------
|
||||||
|
"li_at": {
|
||||||
|
**_LINKEDIN_BASE,
|
||||||
|
"exact_purpose": "LinkedIn-Authentifizierung — Login-Session.",
|
||||||
|
"data_collected": ["auth_token"],
|
||||||
|
"typical_lifetime": "1 Jahr",
|
||||||
|
"reid_risk": "high", "technical_necessity": "full",
|
||||||
|
"notes": "Nur fuer eingeloggte Nutzer; auf externer Site = "
|
||||||
|
"Insight Tag (siehe li_sugr).",
|
||||||
|
},
|
||||||
|
"li_sugr": {
|
||||||
|
**_LINKEDIN_BASE,
|
||||||
|
"exact_purpose": "LinkedIn Insight Tag — Browser-ID fuer "
|
||||||
|
"Conversion-Tracking + Werbe-Targeting.",
|
||||||
|
"data_collected": ["browser_id"],
|
||||||
|
"ip_relevant": True,
|
||||||
|
"tcf_purpose_ids": [7, 9, 10],
|
||||||
|
"typical_lifetime": "90 Tage",
|
||||||
|
"reid_risk": "high", "technical_necessity": "none",
|
||||||
|
},
|
||||||
|
# bcookie, lidc leben in Base-KB.
|
||||||
|
|
||||||
|
# --- TikTok --------------------------------------------------------
|
||||||
|
"_ttp": {
|
||||||
|
"vendor": "TikTok Pte. Ltd.", "vendor_country": "SG/CN",
|
||||||
|
"exact_purpose": "TikTok Pixel — User-ID fuer Conversion-Tracking + "
|
||||||
|
"Werbeoptimierung.",
|
||||||
|
"data_collected": ["pixel_id", "browser_id"],
|
||||||
|
"ip_relevant": True,
|
||||||
|
"tcf_purpose_ids": [7, 9, 10],
|
||||||
|
"typical_lifetime": "13 Monate",
|
||||||
|
"reid_risk": "high", "technical_necessity": "none",
|
||||||
|
"schrems_ii_status": "Drittlandtransfer in Drittstaaten ohne "
|
||||||
|
"Angemessenheitsbeschluss. CNIL 2023 — "
|
||||||
|
"TikTok 5 Mio EUR Bussgeld.",
|
||||||
|
"eugh_rulings": [
|
||||||
|
"CNIL SAN-2022-027 — TikTok 5 Mio EUR",
|
||||||
|
"Italienische DPA 2024 — TikTok 10 Mio EUR",
|
||||||
|
],
|
||||||
|
},
|
||||||
|
"ttwid": {
|
||||||
|
"vendor": "TikTok Pte. Ltd.", "vendor_country": "SG/CN",
|
||||||
|
"exact_purpose": "TikTok Web-Identifier — eindeutige Browser-ID auch "
|
||||||
|
"ohne Login.",
|
||||||
|
"data_collected": ["ttwid"],
|
||||||
|
"typical_lifetime": "1 Jahr",
|
||||||
|
"reid_risk": "high", "technical_necessity": "none",
|
||||||
|
"schrems_ii_status": "Wie _ttp.",
|
||||||
|
},
|
||||||
|
|
||||||
|
# --- HubSpot / Marketo / Salesforce ------------------------------
|
||||||
|
"hubspotutk": {
|
||||||
|
"vendor": "HubSpot Inc.", "vendor_country": "US",
|
||||||
|
"exact_purpose": "HubSpot User-Token — Cross-Visit-Identitaet fuer "
|
||||||
|
"Lead-Tracking.",
|
||||||
|
"data_collected": ["user_token"],
|
||||||
|
"ip_relevant": True,
|
||||||
|
"tcf_purpose_ids": [7, 8],
|
||||||
|
"typical_lifetime": "6 Monate",
|
||||||
|
"reid_risk": "high", "technical_necessity": "none",
|
||||||
|
"schrems_ii_status": "DPF-zertifiziert.",
|
||||||
|
},
|
||||||
|
"__hssc": {
|
||||||
|
"vendor": "HubSpot Inc.", "vendor_country": "US",
|
||||||
|
"exact_purpose": "HubSpot Session-Tracking — Pageviews innerhalb "
|
||||||
|
"einer Session.",
|
||||||
|
"data_collected": ["session_count"],
|
||||||
|
"typical_lifetime": "30 Minuten",
|
||||||
|
"reid_risk": "low", "technical_necessity": "none",
|
||||||
|
},
|
||||||
|
"_mkto_trk": {
|
||||||
|
"vendor": "Adobe Inc. (Marketo)", "vendor_country": "US",
|
||||||
|
"exact_purpose": "Marketo Munchkin-Tracker — Lead-Identifikation "
|
||||||
|
"fuer Marketing-Automation.",
|
||||||
|
"data_collected": ["munchkin_id", "session_id"],
|
||||||
|
"ip_relevant": True,
|
||||||
|
"typical_lifetime": "2 Jahre",
|
||||||
|
"reid_risk": "high", "technical_necessity": "none",
|
||||||
|
"schrems_ii_status": _ADOBE_BASE["schrems_ii_status"],
|
||||||
|
},
|
||||||
|
"BrowserId_sec": {
|
||||||
|
"vendor": "Salesforce.com Inc.", "vendor_country": "US",
|
||||||
|
"exact_purpose": "Salesforce Marketing Cloud Browser-Token — "
|
||||||
|
"Cross-Visit-Identifikation.",
|
||||||
|
"data_collected": ["browser_id"],
|
||||||
|
"typical_lifetime": "1 Jahr",
|
||||||
|
"reid_risk": "medium", "technical_necessity": "none",
|
||||||
|
"schrems_ii_status": "DPF-zertifiziert.",
|
||||||
|
},
|
||||||
|
|
||||||
|
# --- Session-Recording / Heatmaps ---------------------------------
|
||||||
|
"_hjSessionUser_": {
|
||||||
|
"vendor": "Hotjar Ltd.", "vendor_country": "MT",
|
||||||
|
"exact_purpose": "Hotjar User-ID — Cross-Visit-Identifikation fuer "
|
||||||
|
"Session-Recording + Heatmaps.",
|
||||||
|
"data_collected": ["user_id"],
|
||||||
|
"ip_relevant": True,
|
||||||
|
"typical_lifetime": "1 Jahr",
|
||||||
|
"reid_risk": "high", "technical_necessity": "none",
|
||||||
|
"schrems_ii_status": "EU (Malta) — kein Drittland. Aber: parent "
|
||||||
|
"Contentsquare (FR) hostet teilweise in US.",
|
||||||
|
"notes": "Suffix `<site_id>`. Pattern-Match noetig. "
|
||||||
|
"DSGVO-Aufzeichnung = Einwilligung pflichtig.",
|
||||||
|
"eu_alternative_vendor": "Mouseflow / Smartlook (CZ)",
|
||||||
|
},
|
||||||
|
"_hjSession_": {
|
||||||
|
"vendor": "Hotjar Ltd.", "vendor_country": "MT",
|
||||||
|
"exact_purpose": "Hotjar Session-Token — eindeutige Session-ID "
|
||||||
|
"innerhalb 30min Inaktivitaet.",
|
||||||
|
"data_collected": ["session_id"],
|
||||||
|
"typical_lifetime": "30 Minuten",
|
||||||
|
"reid_risk": "medium", "technical_necessity": "none",
|
||||||
|
},
|
||||||
|
"fs_uid": {
|
||||||
|
"vendor": "FullStory Inc.", "vendor_country": "US",
|
||||||
|
"exact_purpose": "FullStory User-ID — Cross-Visit-Identifikation "
|
||||||
|
"fuer Session-Replay.",
|
||||||
|
"data_collected": ["user_id"],
|
||||||
|
"ip_relevant": True,
|
||||||
|
"typical_lifetime": "1 Jahr",
|
||||||
|
"reid_risk": "high", "technical_necessity": "none",
|
||||||
|
"schrems_ii_status": "DPF-zertifiziert. EU-Region verfuegbar (opt-in).",
|
||||||
|
},
|
||||||
|
"mf_user": {
|
||||||
|
"vendor": "Mouseflow Aps", "vendor_country": "DK",
|
||||||
|
"exact_purpose": "Mouseflow User-ID — Cross-Visit-Identifikation fuer "
|
||||||
|
"Heatmap + Recording.",
|
||||||
|
"data_collected": ["user_id"],
|
||||||
|
"typical_lifetime": "1 Jahr",
|
||||||
|
"reid_risk": "medium", "technical_necessity": "none",
|
||||||
|
"schrems_ii_status": "EU (DK) — kein Drittland.",
|
||||||
|
},
|
||||||
|
|
||||||
|
# --- Live-Chat ----------------------------------------------------
|
||||||
|
"intercom-id-": {
|
||||||
|
"vendor": "Intercom Inc.", "vendor_country": "US",
|
||||||
|
"exact_purpose": "Intercom Visitor-ID — Wiedererkennung anonymer "
|
||||||
|
"Besucher fuer Chat-History.",
|
||||||
|
"data_collected": ["visitor_id"],
|
||||||
|
"typical_lifetime": "9 Monate",
|
||||||
|
"reid_risk": "medium", "technical_necessity": "partial",
|
||||||
|
"schrems_ii_status": "DPF-zertifiziert; EU-Datenresidenz optional.",
|
||||||
|
"notes": "Suffix `<app_id>`. Pattern-Match noetig.",
|
||||||
|
},
|
||||||
|
"driftt_aid": {
|
||||||
|
"vendor": "Salesforce.com Inc. (Drift)", "vendor_country": "US",
|
||||||
|
"exact_purpose": "Drift Anonymous-Visitor-ID fuer Chat-Personalisierung.",
|
||||||
|
"data_collected": ["visitor_id"],
|
||||||
|
"typical_lifetime": "2 Jahre",
|
||||||
|
"reid_risk": "medium", "technical_necessity": "partial",
|
||||||
|
},
|
||||||
|
"__zlcmid": {
|
||||||
|
"vendor": "Zendesk Inc.", "vendor_country": "US",
|
||||||
|
"exact_purpose": "Zendesk Chat Visitor-ID fuer Session-Tracking.",
|
||||||
|
"data_collected": ["chat_visitor_id"],
|
||||||
|
"typical_lifetime": "1 Jahr",
|
||||||
|
"reid_risk": "medium", "technical_necessity": "partial",
|
||||||
|
"schrems_ii_status": "DPF-zertifiziert; EU-Datacenter optional.",
|
||||||
|
},
|
||||||
|
|
||||||
|
# --- CDN / Sicherheit (strictly necessary) -----------------------
|
||||||
|
# __cf_bm, cf_clearance leben in Base-KB.
|
||||||
|
"AKA_A2": {
|
||||||
|
"vendor": "Akamai Technologies Inc.", "vendor_country": "US",
|
||||||
|
"exact_purpose": "Akamai Adaptive Acceleration — geroutete Best-Path-"
|
||||||
|
"Optimierung.",
|
||||||
|
"data_collected": ["a2_route"],
|
||||||
|
"typical_lifetime": "1 Stunde",
|
||||||
|
"reid_risk": "low", "technical_necessity": "full",
|
||||||
|
},
|
||||||
|
|
||||||
|
# --- Payment (strictly necessary fuer Checkout) ------------------
|
||||||
|
"__stripe_mid": {
|
||||||
|
"vendor": "Stripe Payments Europe Ltd.", "vendor_country": "IE",
|
||||||
|
"exact_purpose": "Stripe Fraud-Detection Merchant-ID — Risiko-Scoring "
|
||||||
|
"fuer Zahlungs-Authentifizierung.",
|
||||||
|
"data_collected": ["merchant_visitor_id"],
|
||||||
|
"ip_relevant": True,
|
||||||
|
"typical_lifetime": "1 Jahr",
|
||||||
|
"reid_risk": "low", "technical_necessity": "full",
|
||||||
|
"schrems_ii_status": "EU (IE) — kein Drittland.",
|
||||||
|
"notes": "Strictly necessary nach §25(2) TDDDG fuer Zahlungsabwicklung.",
|
||||||
|
},
|
||||||
|
"__stripe_sid": {
|
||||||
|
"vendor": "Stripe Payments Europe Ltd.", "vendor_country": "IE",
|
||||||
|
"exact_purpose": "Stripe Session-ID — temporaere Zahlungs-Session.",
|
||||||
|
"data_collected": ["session_id"],
|
||||||
|
"typical_lifetime": "30 Minuten",
|
||||||
|
"reid_risk": "low", "technical_necessity": "full",
|
||||||
|
},
|
||||||
|
|
||||||
|
# --- CMP-eigene Cookies (strictly necessary) ---------------------
|
||||||
|
"CookieConsent": {
|
||||||
|
"vendor": "Cybot A/S (Cookiebot)", "vendor_country": "DK",
|
||||||
|
"exact_purpose": "Cookiebot Consent-Speicherung — gewaehlte "
|
||||||
|
"Kategorien + Zeitstempel.",
|
||||||
|
"data_collected": ["consent_categories", "consent_timestamp"],
|
||||||
|
"typical_lifetime": "1 Jahr",
|
||||||
|
"reid_risk": "low", "technical_necessity": "full",
|
||||||
|
"schrems_ii_status": "EU (DK). Wenn EU-Cloud, kein Drittland.",
|
||||||
|
},
|
||||||
|
"OptanonConsent": {
|
||||||
|
"vendor": "OneTrust LLC", "vendor_country": "US",
|
||||||
|
"exact_purpose": "OneTrust Consent-Speicherung — Kategorien + "
|
||||||
|
"Vendor-Liste + Zeitstempel.",
|
||||||
|
"data_collected": ["consent_categories", "consent_string"],
|
||||||
|
"typical_lifetime": "1 Jahr",
|
||||||
|
"reid_risk": "low", "technical_necessity": "full",
|
||||||
|
"schrems_ii_status": "DPF-zertifiziert; EU-Cloud optional.",
|
||||||
|
},
|
||||||
|
"OptanonAlertBoxClosed": {
|
||||||
|
"vendor": "OneTrust LLC", "vendor_country": "US",
|
||||||
|
"exact_purpose": "OneTrust UI-Flag — verhindert Re-Display des "
|
||||||
|
"Banners nach Schliessung.",
|
||||||
|
"data_collected": ["closed_timestamp"],
|
||||||
|
"typical_lifetime": "1 Jahr",
|
||||||
|
"reid_risk": "low", "technical_necessity": "full",
|
||||||
|
},
|
||||||
|
"usercentrics-uuid": {
|
||||||
|
"vendor": "Usercentrics GmbH", "vendor_country": "DE",
|
||||||
|
"exact_purpose": "Usercentrics Consent-Speicherung — UUID-basiert.",
|
||||||
|
"data_collected": ["consent_uuid", "consent_settings"],
|
||||||
|
"typical_lifetime": "1 Jahr",
|
||||||
|
"reid_risk": "low", "technical_necessity": "full",
|
||||||
|
"schrems_ii_status": "DE — kein Drittland.",
|
||||||
|
},
|
||||||
|
|
||||||
|
# --- Weitere Social / Werbeplattformen ---------------------------
|
||||||
|
# _pin_unauth lebt in Base-KB.
|
||||||
|
"_scid": {
|
||||||
|
"vendor": "Snap Group Ltd.", "vendor_country": "GB/US",
|
||||||
|
"exact_purpose": "Snapchat Pixel — Conversion-Tracking fuer "
|
||||||
|
"Snap Ads.",
|
||||||
|
"data_collected": ["snap_visitor_id"],
|
||||||
|
"ip_relevant": True,
|
||||||
|
"tcf_purpose_ids": [7, 9, 10],
|
||||||
|
"typical_lifetime": "1 Jahr",
|
||||||
|
"reid_risk": "high", "technical_necessity": "none",
|
||||||
|
"schrems_ii_status": "Drittlandtransfer; UK seit 2021 mit "
|
||||||
|
"Angemessenheitsbeschluss.",
|
||||||
|
},
|
||||||
|
"guest_id": {
|
||||||
|
"vendor": "X Corp. (Twitter)", "vendor_country": "US",
|
||||||
|
"exact_purpose": "X/Twitter Guest-Identifier — Tracking nicht "
|
||||||
|
"eingeloggter Besucher inkl. Embeds.",
|
||||||
|
"data_collected": ["guest_id"],
|
||||||
|
"ip_relevant": True,
|
||||||
|
"tcf_purpose_ids": [4, 9, 10],
|
||||||
|
"typical_lifetime": "2 Jahre",
|
||||||
|
"reid_risk": "high", "technical_necessity": "none",
|
||||||
|
"schrems_ii_status": "DPF-Status unklar seit Eigentuemerwechsel 2022. "
|
||||||
|
"Erhoehtes Risiko, EDPB beobachtet.",
|
||||||
|
},
|
||||||
|
"VISITOR_INFO1_LIVE": {
|
||||||
|
"vendor": "Google Ireland Ltd. (YouTube)", "vendor_country": "IE",
|
||||||
|
"exact_purpose": "YouTube Embed Visitor-ID — Bandbreiten-Optimierung "
|
||||||
|
"+ Empfehlungsalgorithmus.",
|
||||||
|
"data_collected": ["youtube_visitor_id"],
|
||||||
|
"ip_relevant": True,
|
||||||
|
"tcf_purpose_ids": [8, 10],
|
||||||
|
"typical_lifetime": "6 Monate",
|
||||||
|
"reid_risk": "high", "technical_necessity": "none",
|
||||||
|
"notes": "YouTube-NoCookie-Domain (youtube-nocookie.com) reduziert "
|
||||||
|
"Tracking — DSGVO-konformer.",
|
||||||
|
},
|
||||||
|
"vuid": {
|
||||||
|
"vendor": "Vimeo Inc.", "vendor_country": "US",
|
||||||
|
"exact_purpose": "Vimeo User-Identifier — Wiedererkennung "
|
||||||
|
"wiederkehrender Besucher fuer Statistik.",
|
||||||
|
"data_collected": ["vimeo_user_id"],
|
||||||
|
"typical_lifetime": "2 Jahre",
|
||||||
|
"reid_risk": "medium", "technical_necessity": "none",
|
||||||
|
"schrems_ii_status": "DPF-zertifiziert.",
|
||||||
|
},
|
||||||
|
|
||||||
|
# --- Marketing-Automation / Email --------------------------------
|
||||||
|
"__kla_id": {
|
||||||
|
"vendor": "Klaviyo Inc.", "vendor_country": "US",
|
||||||
|
"exact_purpose": "Klaviyo Visitor-Tracking — fuer E-Mail-Marketing-"
|
||||||
|
"Attribution.",
|
||||||
|
"data_collected": ["klaviyo_id"],
|
||||||
|
"ip_relevant": True,
|
||||||
|
"typical_lifetime": "2 Jahre",
|
||||||
|
"reid_risk": "high", "technical_necessity": "none",
|
||||||
|
"schrems_ii_status": "DPF-zertifiziert.",
|
||||||
|
},
|
||||||
|
"_mcid": {
|
||||||
|
"vendor": "Intuit Mailchimp", "vendor_country": "US",
|
||||||
|
"exact_purpose": "Mailchimp Email-Click-Tracking — Verknuepft "
|
||||||
|
"Pageviews mit gesendeter Kampagne.",
|
||||||
|
"data_collected": ["mc_email_id"],
|
||||||
|
"typical_lifetime": "1 Jahr",
|
||||||
|
"reid_risk": "high", "technical_necessity": "none",
|
||||||
|
"schrems_ii_status": "DPF-zertifiziert.",
|
||||||
|
},
|
||||||
|
|
||||||
|
# --- Product-Analytics / CDP -------------------------------------
|
||||||
|
"mp_": {
|
||||||
|
"vendor": "Mixpanel Inc.", "vendor_country": "US",
|
||||||
|
"exact_purpose": "Mixpanel Distinct-ID + Properties — "
|
||||||
|
"Pseudonyme Event-Analytics.",
|
||||||
|
"data_collected": ["distinct_id", "properties"],
|
||||||
|
"typical_lifetime": "1 Jahr",
|
||||||
|
"reid_risk": "high", "technical_necessity": "none",
|
||||||
|
"schrems_ii_status": "DPF-zertifiziert; EU-Residency optional.",
|
||||||
|
"notes": "Suffix `<token>_mixpanel`. Pattern-Match noetig.",
|
||||||
|
},
|
||||||
|
"ajs_anonymous_id": {
|
||||||
|
"vendor": "Twilio Inc. (Segment)", "vendor_country": "US",
|
||||||
|
"exact_purpose": "Segment Anonymous-ID — Cross-Device-Identitaet "
|
||||||
|
"vor Login.",
|
||||||
|
"data_collected": ["anonymous_id"],
|
||||||
|
"typical_lifetime": "1 Jahr",
|
||||||
|
"reid_risk": "high", "technical_necessity": "none",
|
||||||
|
"schrems_ii_status": "DPF-zertifiziert; EU-Datenresidenz optional.",
|
||||||
|
},
|
||||||
|
"AMP_": {
|
||||||
|
"vendor": "Amplitude Inc.", "vendor_country": "US",
|
||||||
|
"exact_purpose": "Amplitude Device-ID — Cross-Session-Identitaet "
|
||||||
|
"fuer Product-Analytics.",
|
||||||
|
"data_collected": ["device_id", "session_id"],
|
||||||
|
"typical_lifetime": "1 Jahr",
|
||||||
|
"reid_risk": "high", "technical_necessity": "none",
|
||||||
|
"schrems_ii_status": "DPF-zertifiziert.",
|
||||||
|
"notes": "Suffix `<api_key>`. Pattern-Match noetig.",
|
||||||
|
},
|
||||||
|
|
||||||
|
# --- A/B-Testing -------------------------------------------------
|
||||||
|
"optimizelyEndUserId": {
|
||||||
|
"vendor": "Optimizely Inc.", "vendor_country": "US",
|
||||||
|
"exact_purpose": "Optimizely End-User-ID — konsistente "
|
||||||
|
"Experiment-Zuteilung pro Besucher.",
|
||||||
|
"data_collected": ["end_user_id", "variation_assignments"],
|
||||||
|
"typical_lifetime": "6 Monate",
|
||||||
|
"reid_risk": "medium", "technical_necessity": "none",
|
||||||
|
"schrems_ii_status": "DPF-zertifiziert.",
|
||||||
|
},
|
||||||
|
|
||||||
|
# --- RUM / Monitoring (oft strictly necessary diskutiert) --------
|
||||||
|
"_dd_s": {
|
||||||
|
"vendor": "Datadog Inc.", "vendor_country": "US",
|
||||||
|
"exact_purpose": "Datadog RUM Session-Tracking — Performance- "
|
||||||
|
"Monitoring + Fehler-Telemetrie.",
|
||||||
|
"data_collected": ["session_id", "session_type"],
|
||||||
|
"typical_lifetime": "15 Minuten",
|
||||||
|
"reid_risk": "low", "technical_necessity": "partial",
|
||||||
|
"schrems_ii_status": "EU-Region (Frankfurt) verfuegbar.",
|
||||||
|
"notes": "Bei reiner Server-/Fehler-Telemetrie ohne Cross-Site-"
|
||||||
|
"Tracking Argument fuer berechtigtes Interesse moeglich.",
|
||||||
|
},
|
||||||
|
|
||||||
|
# --- EU-Analytics-Alternativen -----------------------------------
|
||||||
|
"_pk_ref": {
|
||||||
|
"vendor": "InnoCraft Ltd. (Matomo)", "vendor_country": "NZ",
|
||||||
|
"exact_purpose": "Matomo Referrer-Tracking — Quelle des Besuchs.",
|
||||||
|
"data_collected": ["referrer", "campaign"],
|
||||||
|
"typical_lifetime": "6 Monate",
|
||||||
|
"reid_risk": "low", "technical_necessity": "none",
|
||||||
|
"schrems_ii_status": "NZ hat Angemessenheitsbeschluss (2012). "
|
||||||
|
"Bei On-Premise-Hosting kein Transfer.",
|
||||||
|
"notes": "Self-Hosting empfohlen — dann zeroes Drittland.",
|
||||||
|
},
|
||||||
|
"_pk_cvar": {
|
||||||
|
"vendor": "InnoCraft Ltd. (Matomo)", "vendor_country": "NZ",
|
||||||
|
"exact_purpose": "Matomo Custom-Variables — pro Visit konfigurierbar.",
|
||||||
|
"data_collected": ["custom_vars"],
|
||||||
|
"typical_lifetime": "30 Minuten",
|
||||||
|
"reid_risk": "low", "technical_necessity": "none",
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
# Pattern-Lookups fuer dynamische Cookie-Namen
|
||||||
|
_EXT_PATTERNS: list[tuple[str, str]] = [
|
||||||
|
(r"^_hjSessionUser_", "_hjSessionUser_"),
|
||||||
|
(r"^_hjSession_", "_hjSession_"),
|
||||||
|
(r"^intercom-id-", "intercom-id-"),
|
||||||
|
(r"^mp_", "mp_"),
|
||||||
|
(r"^AMP_", "AMP_"),
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
def lookup_cookie_extended(name: str) -> CookieKnowledge | None:
|
||||||
|
"""Lookup in der KB_EXT (Extension). None wenn nicht gefunden."""
|
||||||
|
import re
|
||||||
|
if not name: return None # noqa: E701
|
||||||
|
if name in KB_EXT: return KB_EXT[name] # noqa: E701
|
||||||
|
for pat, key in _EXT_PATTERNS:
|
||||||
|
if re.search(pat, name): return KB_EXT.get(key) # noqa: E701
|
||||||
|
base = name.split(".", 1)[0]
|
||||||
|
if base != name and base in KB_EXT: return KB_EXT[base] # noqa: E701
|
||||||
|
return None
|
||||||
@@ -0,0 +1,255 @@
|
|||||||
|
"""
|
||||||
|
Cookie-Policy-Architecture-Detection.
|
||||||
|
|
||||||
|
Erkennt vier Diagnose-Punkte zur rechtlichen Bewertung der Cookie-Policy
|
||||||
|
einer Website. Hintergrund: die DSGVO + TDDDG verlangen ZWEI Layer
|
||||||
|
(Banner fuer Consent + Cookie-Richtlinie fuer Information), aber lassen
|
||||||
|
offen ob das in einem oder zwei HTML-Dokumenten umgesetzt wird.
|
||||||
|
|
||||||
|
BMW-Pattern: eine HTML-Seite ist GLEICHZEITIG der Banner-Re-Trigger und
|
||||||
|
die Cookie-Richtlinie. Mindestanforderung erfuellt, aber kein
|
||||||
|
versionierter Audit-Trail moeglich -> "gelbes" Risiko.
|
||||||
|
|
||||||
|
Output-Format:
|
||||||
|
{
|
||||||
|
"layer_separation": "single" | "separate" | "unknown",
|
||||||
|
"versioned": bool,
|
||||||
|
"dynamic_content": bool,
|
||||||
|
"vendor_count_in_text": int,
|
||||||
|
"risk_label": "gruen" | "gelb" | "rot",
|
||||||
|
"recommendation": str,
|
||||||
|
"signals": [{"src": ..., "detail": ...}],
|
||||||
|
}
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import re
|
||||||
|
from urllib.parse import urlparse
|
||||||
|
|
||||||
|
|
||||||
|
# Regex fuer "Stand vom DD.MM.JJJJ" / "Stand: DD.MM.JJJJ" / "Version X.Y"
|
||||||
|
_VERSION_PATTERNS = [
|
||||||
|
r"stand\s*[:\-]?\s*(?:vom\s+)?\d{1,2}\.\s*\d{1,2}\.\s*\d{4}",
|
||||||
|
r"stand\s*[:\-]?\s*\d{1,2}\.\s*\w+\s+\d{4}", # "Stand: 1. Mai 2026"
|
||||||
|
r"letzte\s+(?:aktualisierung|aenderung|änderung)\s*[:\-]?\s*\d{1,2}\.",
|
||||||
|
r"version\s*[:\-]?\s*\d+(?:\.\d+)?",
|
||||||
|
r"stand\s+der\s+(?:information|cookie)\w*\s*[:\-]?\s*\d{1,2}\.",
|
||||||
|
r"(?:gueltig|gültig)\s+ab\s+\d{1,2}\.\s*\d{1,2}\.\s*\d{4}",
|
||||||
|
]
|
||||||
|
|
||||||
|
# Hinweise auf dynamische Generierung
|
||||||
|
_DYNAMIC_MARKERS = [
|
||||||
|
"wird automatisch aktualisiert",
|
||||||
|
"wird dynamisch generiert",
|
||||||
|
"wird laufend angepasst",
|
||||||
|
"cookie-einstellungen ändern",
|
||||||
|
"cookie-einstellungen aendern",
|
||||||
|
"cookie-praeferenzen verwalten",
|
||||||
|
"cookie-präferenzen verwalten",
|
||||||
|
"consent aktualisieren",
|
||||||
|
"einwilligung verwalten",
|
||||||
|
"einwilligungs-einstellungen",
|
||||||
|
]
|
||||||
|
|
||||||
|
# CMP-Trigger-Marker (Container-/Button-Texte die typischerweise das
|
||||||
|
# Banner re-oeffnen)
|
||||||
|
_BANNER_TRIGGER_MARKERS = [
|
||||||
|
"cookie-einstellungen öffnen",
|
||||||
|
"cookie einstellungen öffnen",
|
||||||
|
"ihre cookie-präferenzen",
|
||||||
|
"ihre cookie praeferenzen",
|
||||||
|
"consent banner",
|
||||||
|
"datenschutz-einstellungen",
|
||||||
|
"cookie-banner anzeigen",
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
def _normalize_url(u: str) -> str:
|
||||||
|
if not u:
|
||||||
|
return ""
|
||||||
|
if "://" not in u:
|
||||||
|
u = "https://" + u
|
||||||
|
p = urlparse(u)
|
||||||
|
path = p.path.rstrip("/").lower()
|
||||||
|
host = p.netloc.lower().replace("www.", "")
|
||||||
|
return f"{host}{path}"
|
||||||
|
|
||||||
|
|
||||||
|
def _check_versioned(text_lower: str) -> tuple[bool, str | None]:
|
||||||
|
for pat in _VERSION_PATTERNS:
|
||||||
|
m = re.search(pat, text_lower)
|
||||||
|
if m:
|
||||||
|
return True, m.group()[:80]
|
||||||
|
return False, None
|
||||||
|
|
||||||
|
|
||||||
|
def _check_dynamic(text_lower: str) -> tuple[bool, str | None]:
|
||||||
|
for marker in _DYNAMIC_MARKERS:
|
||||||
|
if marker in text_lower:
|
||||||
|
return True, marker
|
||||||
|
return False, None
|
||||||
|
|
||||||
|
|
||||||
|
def _check_banner_trigger(text_lower: str) -> tuple[bool, str | None]:
|
||||||
|
for marker in _BANNER_TRIGGER_MARKERS:
|
||||||
|
if marker in text_lower:
|
||||||
|
return True, marker
|
||||||
|
return False, None
|
||||||
|
|
||||||
|
|
||||||
|
def _count_vendor_signals(text_lower: str) -> int:
|
||||||
|
"""Zaehle wieviele Vendor-Namen im Text — Indikator ob die Liste statisch
|
||||||
|
drinsteht oder dynamisch nachgeladen wird."""
|
||||||
|
vendor_signals = [
|
||||||
|
"google", "meta", "facebook", "adobe", "microsoft", "linkedin",
|
||||||
|
"tiktok", "amazon", "hotjar", "cloudflare", "stripe", "salesforce",
|
||||||
|
"hubspot", "mailchimp", "pinterest", "snapchat", "youtube", "vimeo",
|
||||||
|
]
|
||||||
|
return sum(1 for v in vendor_signals if v in text_lower)
|
||||||
|
|
||||||
|
|
||||||
|
def detect_architecture(
|
||||||
|
doc_url: str,
|
||||||
|
doc_text: str,
|
||||||
|
cmp_payloads: list[dict] | None = None,
|
||||||
|
homepage_cmp_payloads: list[dict] | None = None,
|
||||||
|
) -> dict:
|
||||||
|
"""Pruefe die Layer-Architektur einer Cookie-Richtlinie.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
doc_url: URL des erkannten Cookie-Richtlinie-Dokuments
|
||||||
|
doc_text: Volltext der Cookie-Richtlinie
|
||||||
|
cmp_payloads: CMP-Capture die WAEHREND des doc-Crawls passiert sind
|
||||||
|
homepage_cmp_payloads: CMP-Capture vom initialen Homepage-Crawl
|
||||||
|
"""
|
||||||
|
text_lower = (doc_text or "").lower()
|
||||||
|
signals: list[dict] = []
|
||||||
|
|
||||||
|
# 1. Single- vs Separate-Layer
|
||||||
|
cmp_on_doc = bool(cmp_payloads)
|
||||||
|
banner_trigger, trigger_marker = _check_banner_trigger(text_lower)
|
||||||
|
if cmp_on_doc and banner_trigger:
|
||||||
|
layer = "single"
|
||||||
|
signals.append({"src": "cmp+marker",
|
||||||
|
"detail": f"CMP feuerte auf Doc-URL + Marker '{trigger_marker}'"})
|
||||||
|
elif cmp_on_doc:
|
||||||
|
layer = "single"
|
||||||
|
signals.append({"src": "cmp", "detail": "CMP-Payload waehrend Doc-Crawl"})
|
||||||
|
elif banner_trigger:
|
||||||
|
layer = "single"
|
||||||
|
signals.append({"src": "marker", "detail": f"Trigger-Marker: '{trigger_marker}'"})
|
||||||
|
elif homepage_cmp_payloads and not cmp_on_doc:
|
||||||
|
layer = "separate"
|
||||||
|
signals.append({"src": "topology",
|
||||||
|
"detail": "Banner triggert nur auf Homepage, Cookie-Doc ist eigene Seite"})
|
||||||
|
else:
|
||||||
|
layer = "unknown"
|
||||||
|
|
||||||
|
# 2. Versionierung
|
||||||
|
versioned, version_marker = _check_versioned(text_lower)
|
||||||
|
if versioned:
|
||||||
|
signals.append({"src": "version", "detail": f"Marker: '{version_marker}'"})
|
||||||
|
|
||||||
|
# 3. Dynamic content
|
||||||
|
dynamic, dyn_marker = _check_dynamic(text_lower)
|
||||||
|
if dynamic or cmp_on_doc:
|
||||||
|
dynamic = True
|
||||||
|
if dyn_marker:
|
||||||
|
signals.append({"src": "dynamic", "detail": dyn_marker})
|
||||||
|
|
||||||
|
# 4. Vendor-Count (Indikator ob Liste statisch im Text steht)
|
||||||
|
vendor_count = _count_vendor_signals(text_lower)
|
||||||
|
|
||||||
|
# Risiko-Bewertung
|
||||||
|
if layer == "unknown" and vendor_count < 3:
|
||||||
|
risk = "rot"
|
||||||
|
rec = (
|
||||||
|
"Cookie-Richtlinie konnte nicht eindeutig identifiziert oder ist "
|
||||||
|
"unzureichend. Pruefen Sie ob die Pflicht-Information nach "
|
||||||
|
"Art. 13 DSGVO + §25 TDDDG ueberhaupt erreichbar ist."
|
||||||
|
)
|
||||||
|
elif layer == "single" and not versioned:
|
||||||
|
risk = "gelb"
|
||||||
|
rec = (
|
||||||
|
"BMW-Pattern erkannt: Single-Layer-CMP (Banner-Trigger + "
|
||||||
|
"Info-Layer in einer URL). Mindestanforderung erfuellt, aber "
|
||||||
|
"OHNE Versionierung. Bei einer Aufsichtsbehoerden-Pruefung "
|
||||||
|
"kann nicht belegt werden welche Vendor-Liste an einem "
|
||||||
|
"bestimmten Stichtag aktiv war. Empfehlung: monatlicher "
|
||||||
|
"Snapshot der dynamischen Vendor-Tabelle als versioniertes "
|
||||||
|
"PDF im Archiv."
|
||||||
|
)
|
||||||
|
elif layer == "single" and versioned:
|
||||||
|
risk = "gelb"
|
||||||
|
rec = (
|
||||||
|
"Single-Layer mit Versionierung — gute Mindestloesung. "
|
||||||
|
"Best Practice waere zusaetzlich eine getrennte statische "
|
||||||
|
"Vendor-Tabelle die Crawler indexieren koennen."
|
||||||
|
)
|
||||||
|
elif layer == "separate" and versioned:
|
||||||
|
risk = "gruen"
|
||||||
|
rec = (
|
||||||
|
"Best Practice umgesetzt: separater Banner + versionierte "
|
||||||
|
"Cookie-Richtlinie."
|
||||||
|
)
|
||||||
|
elif layer == "separate" and not versioned:
|
||||||
|
risk = "gelb"
|
||||||
|
rec = (
|
||||||
|
"Separate Cookie-Richtlinie vorhanden, aber ohne Versionierung. "
|
||||||
|
"Snapshot-Archiv empfohlen."
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
risk = "gelb"
|
||||||
|
rec = "Cookie-Policy-Architektur uneindeutig — manuelle Pruefung empfohlen."
|
||||||
|
|
||||||
|
return {
|
||||||
|
"layer_separation": layer,
|
||||||
|
"versioned": versioned,
|
||||||
|
"dynamic_content": dynamic,
|
||||||
|
"vendor_count_in_text": vendor_count,
|
||||||
|
"risk_label": risk,
|
||||||
|
"recommendation": rec,
|
||||||
|
"signals": signals,
|
||||||
|
"doc_url_normalized": _normalize_url(doc_url),
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def build_architecture_html(arch: dict) -> str:
|
||||||
|
"""Render the architecture block for the executive summary."""
|
||||||
|
if not arch:
|
||||||
|
return ""
|
||||||
|
risk_colors = {
|
||||||
|
"gruen": ("#16a34a", "#dcfce7", "#166534"),
|
||||||
|
"gelb": ("#d97706", "#fef3c7", "#92400e"),
|
||||||
|
"rot": ("#dc2626", "#fee2e2", "#991b1b"),
|
||||||
|
}
|
||||||
|
border, bg, fg = risk_colors.get(arch["risk_label"], ("#94a3b8", "#f1f5f9", "#475569"))
|
||||||
|
|
||||||
|
layer_label = {"single": "Single-Layer (kombiniert)",
|
||||||
|
"separate": "Separate Layer (Best Practice)",
|
||||||
|
"unknown": "Nicht eindeutig"}[arch["layer_separation"]]
|
||||||
|
versioned_lbl = "ja" if arch["versioned"] else "nein"
|
||||||
|
dynamic_lbl = "ja (CMP-generiert)" if arch["dynamic_content"] else "statisch"
|
||||||
|
|
||||||
|
return (
|
||||||
|
f'<div style="font-family:-apple-system,BlinkMacSystemFont,sans-serif;'
|
||||||
|
f'max-width:700px;margin:0 auto 14px;padding:12px 16px;'
|
||||||
|
f'background:{bg};border:1px solid {border};border-radius:8px;color:{fg}">'
|
||||||
|
f'<div style="font-size:11px;text-transform:uppercase;letter-spacing:1px;'
|
||||||
|
f'font-weight:600;margin-bottom:6px">Cookie-Policy-Architektur</div>'
|
||||||
|
f'<table style="width:100%;font-size:12px;margin:0">'
|
||||||
|
f'<tr><td style="padding:2px 0;width:50%">Layer-Trennung</td>'
|
||||||
|
f'<td><strong>{layer_label}</strong></td></tr>'
|
||||||
|
f'<tr><td style="padding:2px 0">Versionierung</td>'
|
||||||
|
f'<td><strong>{versioned_lbl}</strong></td></tr>'
|
||||||
|
f'<tr><td style="padding:2px 0">Vendor-Liste</td>'
|
||||||
|
f'<td><strong>{dynamic_lbl}</strong></td></tr>'
|
||||||
|
f'<tr><td style="padding:2px 0">Vendor-Namen im Text</td>'
|
||||||
|
f'<td><strong>{arch["vendor_count_in_text"]}</strong></td></tr>'
|
||||||
|
f'</table>'
|
||||||
|
f'<div style="font-size:11px;margin-top:8px;padding-top:8px;'
|
||||||
|
f'border-top:1px solid {border};font-style:italic">'
|
||||||
|
f'{arch["recommendation"]}</div>'
|
||||||
|
f'</div>'
|
||||||
|
)
|
||||||
@@ -16,8 +16,9 @@ IMPRESSUM_CHECKLIST = [
|
|||||||
"label": "Name des Anbieters",
|
"label": "Name des Anbieters",
|
||||||
"level": 1, "parent": None,
|
"level": 1, "parent": None,
|
||||||
"patterns": [
|
"patterns": [
|
||||||
r"(?:gmbh|ag|e\.v\.|ohg|kg|gbr|ug|mbh|inc|ltd)",
|
# Word-Boundaries verhindern Falsch-Treffer ("ag" in "samstag")
|
||||||
r"firma", r"unternehmen",
|
r"\b(?:gmbh|ag|e\.v\.|ohg|kg|gbr|ug|mbh|inc|ltd|aktiengesellschaft|kommanditgesellschaft|partnerschaft\s+mbb)\b",
|
||||||
|
r"\bfirma\s+\w+", r"\bunternehmen\s+\w+",
|
||||||
],
|
],
|
||||||
"severity": "HIGH",
|
"severity": "HIGH",
|
||||||
"hint": "§5(1) Nr.1 TMG: Vollstaendiger Firmenname MIT Rechtsform (z.B. 'Muster GmbH', nicht nur 'Muster'). Bei Einzelunternehmen: Vor- und Nachname plus ggf. Geschaeftsbezeichnung. Haeufiger Abmahngrund: Nur Markenname ohne juristische Person.",
|
"hint": "§5(1) Nr.1 TMG: Vollstaendiger Firmenname MIT Rechtsform (z.B. 'Muster GmbH', nicht nur 'Muster'). Bei Einzelunternehmen: Vor- und Nachname plus ggf. Geschaeftsbezeichnung. Haeufiger Abmahngrund: Nur Markenname ohne juristische Person.",
|
||||||
@@ -178,9 +179,13 @@ IMPRESSUM_CHECKLIST = [
|
|||||||
"label": "Name der vertretungsberechtigten Person",
|
"label": "Name der vertretungsberechtigten Person",
|
||||||
"level": 2, "parent": "representative",
|
"level": 2, "parent": "representative",
|
||||||
"patterns": [
|
"patterns": [
|
||||||
r"(?:gesch(?:ae|ä)ftsf(?:ue|ü)hr\w*|vorstand|inhaber)\s*:?\s*[a-zA-Z\u00c0-\u017e]",
|
r"(?:gesch(?:ae|ä)ftsf(?:ue|ü)hr\w*|vorstand|inhaber|aufsichtsrats?)\s*[:\-]?\s*[a-zA-Z\u00c0-\u017e]",
|
||||||
r"(?:vertreten\s+durch|repr(?:ae|ä)sentiert)\s*:?\s*[a-zA-Z\u00c0-\u017e]",
|
# "Vorstand (Milan Nedeljkovic, ...)" - BMW-Pattern mit Klammer-Liste
|
||||||
r"(?:gesch(?:ae|ä)ftsf(?:ue|ü)hrung)\s*:?\s*(?:dr\.?\s+|prof\.?\s+)?[a-zA-Z\u00c0-\u017e]",
|
r"(?:vorstand|gesch(?:ae|ä)ftsf(?:ue|ü)hrung|aufsichtsrats?)\s*\(\s*[a-zA-Z\u00c0-\u017e]",
|
||||||
|
r"(?:vertreten\s+durch|repr(?:ae|ä)sentiert)\s*[:\-]?\s*(?:den\s+vorstand\s*\(?|[a-zA-Z\u00c0-\u017e])",
|
||||||
|
r"(?:gesch(?:ae|ä)ftsf(?:ue|ü)hrung)\s*[:\-]?\s*(?:dr\.?\s+|prof\.?\s+)?[a-zA-Z\u00c0-\u017e]",
|
||||||
|
# "Vorsitzender des Aufsichtsrats: Nicolas Peter"
|
||||||
|
r"(?:vorsitzend\w+|stellv\w*\s+vorsitz\w*)\s+(?:des\s+\w+\s*)?[:\-]?\s*[a-zA-Z\u00c0-\u017e]",
|
||||||
],
|
],
|
||||||
"severity": "LOW",
|
"severity": "LOW",
|
||||||
"hint": "Voller Vor- und Nachname mit Funktionsbezeichnung erforderlich (z.B. 'Geschaeftsfuehrung: Dr. Max Mustermann').",
|
"hint": "Voller Vor- und Nachname mit Funktionsbezeichnung erforderlich (z.B. 'Geschaeftsfuehrung: Dr. Max Mustermann').",
|
||||||
@@ -234,11 +239,12 @@ IMPRESSUM_CHECKLIST = [
|
|||||||
"label": "Zustaendige Kammer benannt",
|
"label": "Zustaendige Kammer benannt",
|
||||||
"level": 2, "parent": "regulated_profession",
|
"level": 2, "parent": "regulated_profession",
|
||||||
"patterns": [
|
"patterns": [
|
||||||
r"(?:(?:ae|ä)rztekammer|rechtsanwaltskammer|steuerberaterkammer|architektenkammer|ingenieurkammer|apothekerkammer)",
|
r"(?:(?:ae|ä)rztekammer|rechtsanwaltskammer|steuerberaterkammer|architektenkammer|ingenieurkammer|apothekerkammer|handwerkskammer|tier(?:ae|ä)rztekammer|psychotherapeutenkammer)",
|
||||||
|
r"\bihk\b|industrie-?\s+und\s+handelskammer",
|
||||||
r"(?:mitglied|zugelassen|eingetragen)\s+(?:bei|in|der)\s+(?:der\s+)?(?:\w+)?kammer",
|
r"(?:mitglied|zugelassen|eingetragen)\s+(?:bei|in|der)\s+(?:der\s+)?(?:\w+)?kammer",
|
||||||
],
|
],
|
||||||
"severity": "LOW",
|
"severity": "INFO", # P9: konditional - nur kammerpflichtige Berufe
|
||||||
"hint": "Zustaendige Kammer mit vollem Namen und Sitz nennen (z.B. 'Rechtsanwaltskammer Muenchen').",
|
"hint": "Zustaendige Kammer mit vollem Namen und Sitz nennen (z.B. 'Rechtsanwaltskammer Muenchen', 'IHK Muenchen'). Nur relevant fuer kammerpflichtige Berufe.",
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"id": "profession_title",
|
"id": "profession_title",
|
||||||
@@ -314,6 +320,7 @@ IMPRESSUM_CHECKLIST = [
|
|||||||
r"distanzier|macht\s+sich\s+(?:nicht|kein)\s+(?:zu\s+eigen|verantwortlich)",
|
r"distanzier|macht\s+sich\s+(?:nicht|kein)\s+(?:zu\s+eigen|verantwortlich)",
|
||||||
],
|
],
|
||||||
"severity": "LOW",
|
"severity": "LOW",
|
||||||
|
"invert": True, # Anti-Pattern: passed wenn NICHT gefunden
|
||||||
"hint": "Der klassische Link-Disclaimer ('Wir distanzieren uns von verlinkten Inhalten') ist seit BGH (I ZR 317/01) rechtlich wirkungslos. Empfehlung: Entfernen Sie pauschale Disclaimer — sie schuetzen nicht und koennen kontraproduktiv sein.",
|
"hint": "Der klassische Link-Disclaimer ('Wir distanzieren uns von verlinkten Inhalten') ist seit BGH (I ZR 317/01) rechtlich wirkungslos. Empfehlung: Entfernen Sie pauschale Disclaimer — sie schuetzen nicht und koennen kontraproduktiv sein.",
|
||||||
},
|
},
|
||||||
]
|
]
|
||||||
|
|||||||
@@ -15,7 +15,9 @@ import httpx
|
|||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
OLLAMA_URL = os.getenv("OLLAMA_URL", "http://host.docker.internal:11434")
|
OLLAMA_URL = os.getenv("OLLAMA_URL", "http://host.docker.internal:11434")
|
||||||
OLLAMA_MODEL = os.getenv("OLLAMA_VERIFY_MODEL", "qwen3.5:35b-a3b")
|
# P13: qwen3:30b-a3b liefert zuverlaessige JSON-Antworten im Batch-Modus.
|
||||||
|
# qwen3.5:35b-a3b lieferte mit format='json' + langem Prompt leere Strings.
|
||||||
|
OLLAMA_MODEL = os.getenv("OLLAMA_VERIFY_MODEL", "qwen3:30b-a3b")
|
||||||
TIMEOUT = 30.0
|
TIMEOUT = 30.0
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -5,6 +5,8 @@ Pass 1: Run all L1 checks ("Is it mentioned?")
|
|||||||
Pass 2: Run L2 checks only where their L1 parent passed ("Is it correct?")
|
Pass 2: Run L2 checks only where their L1 parent passed ("Is it correct?")
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
import logging
|
import logging
|
||||||
import re
|
import re
|
||||||
|
|
||||||
@@ -83,6 +85,7 @@ def check_document_completeness(
|
|||||||
doc_type: str,
|
doc_type: str,
|
||||||
doc_title: str,
|
doc_title: str,
|
||||||
doc_url: str,
|
doc_url: str,
|
||||||
|
business_profile: dict | None = None,
|
||||||
) -> list[dict]:
|
) -> list[dict]:
|
||||||
"""Check a legal document against its type-specific requirements.
|
"""Check a legal document against its type-specific requirements.
|
||||||
|
|
||||||
@@ -90,9 +93,20 @@ def check_document_completeness(
|
|||||||
L1 — Is the mandatory field mentioned at all?
|
L1 — Is the mandatory field mentioned at all?
|
||||||
L2 — Is it correct/complete? (only checked if L1 parent passed)
|
L2 — Is it correct/complete? (only checked if L1 parent passed)
|
||||||
|
|
||||||
|
business_profile (optional) wird genutzt um Checks die fuer das
|
||||||
|
spezifische Unternehmen nicht anwendbar sind als 'skipped' zu
|
||||||
|
markieren (z.B. OS-Plattform/VSBG bei no_direct_sales=True).
|
||||||
|
|
||||||
Returns a list of findings (summary + missing items).
|
Returns a list of findings (summary + missing items).
|
||||||
"""
|
"""
|
||||||
findings = []
|
findings = []
|
||||||
|
no_direct_sales = bool((business_profile or {}).get("no_direct_sales"))
|
||||||
|
# P9: Welche Check-IDs sind bei OEM-Konfigurator-Pattern obsolet.
|
||||||
|
skip_check_ids: set[str] = set()
|
||||||
|
if no_direct_sales:
|
||||||
|
skip_check_ids.update([
|
||||||
|
"dispute_resolution", # OS-Plattform / VSBG nur B2C-Direkthaendler
|
||||||
|
])
|
||||||
# Strip soft hyphens ( / \xad) that CMS tools insert for word-breaking
|
# Strip soft hyphens ( / \xad) that CMS tools insert for word-breaking
|
||||||
# — they break regex matches on compound words like "Datenübertragbarkeit"
|
# — they break regex matches on compound words like "Datenübertragbarkeit"
|
||||||
text_clean = text.replace("\xad", "").replace("­", "")
|
text_clean = text.replace("\xad", "").replace("­", "")
|
||||||
@@ -135,8 +149,25 @@ def check_document_completeness(
|
|||||||
|
|
||||||
for check in l1_checks:
|
for check in l1_checks:
|
||||||
is_info = check.get("severity") == "INFO"
|
is_info = check.get("severity") == "INFO"
|
||||||
|
# P9: Profil-basiertes Skip (OEM-Pattern -> OS-Plattform raus)
|
||||||
|
if check["id"] in skip_check_ids:
|
||||||
|
all_checks.append({
|
||||||
|
"id": check["id"], "label": check["label"],
|
||||||
|
"passed": False, "severity": "INFO",
|
||||||
|
"matched_text": "", "level": 1, "parent": None,
|
||||||
|
"skipped": True,
|
||||||
|
"hint": "Nicht anwendbar: Unternehmen betreibt keinen "
|
||||||
|
"Direkt-Vertrieb an Verbraucher (OEM-Konfigurator-Pattern).",
|
||||||
|
})
|
||||||
|
continue
|
||||||
match = _match_patterns(check["patterns"], text_lower)
|
match = _match_patterns(check["patterns"], text_lower)
|
||||||
passed = match is not None
|
# P9: "invert"=True bedeutet Anti-Pattern (z.B. illegaler Link-
|
||||||
|
# Disclaimer): passed wenn NICHT gefunden, fail wenn gefunden.
|
||||||
|
if check.get("invert"):
|
||||||
|
passed = match is None
|
||||||
|
match = None if passed else match
|
||||||
|
else:
|
||||||
|
passed = match is not None
|
||||||
if passed:
|
if passed:
|
||||||
passed_l1_ids.add(check["id"])
|
passed_l1_ids.add(check["id"])
|
||||||
if not is_info:
|
if not is_info:
|
||||||
@@ -168,18 +199,26 @@ def check_document_completeness(
|
|||||||
|
|
||||||
for check in l2_checks:
|
for check in l2_checks:
|
||||||
parent = check.get("parent")
|
parent = check.get("parent")
|
||||||
|
is_info = check.get("severity") == "INFO"
|
||||||
skipped = parent not in passed_l1_ids
|
skipped = parent not in passed_l1_ids
|
||||||
passed = False
|
passed = False
|
||||||
matched_text = ""
|
matched_text = ""
|
||||||
|
|
||||||
if not skipped:
|
if not skipped:
|
||||||
l2_total += 1
|
|
||||||
match = _match_patterns(check["patterns"], text_lower)
|
match = _match_patterns(check["patterns"], text_lower)
|
||||||
passed = match is not None
|
passed = match is not None
|
||||||
if passed:
|
# P9: INFO-L2-Checks (konditional, z.B. Kammer) zaehlen NICHT
|
||||||
|
# in correctness-pct und erscheinen nicht als Fail-Finding.
|
||||||
|
if is_info:
|
||||||
|
if passed:
|
||||||
|
matched_text = _extract_context(text_lower, match)
|
||||||
|
# weder l2_total++ noch findings.append: kein Fail-Eintrag
|
||||||
|
else:
|
||||||
|
l2_total += 1
|
||||||
|
if passed and not is_info:
|
||||||
l2_passed += 1
|
l2_passed += 1
|
||||||
matched_text = _extract_context(text_lower, match)
|
matched_text = _extract_context(text_lower, match)
|
||||||
else:
|
elif not passed and not is_info:
|
||||||
findings.append({
|
findings.append({
|
||||||
"code": f"DSI-DETAIL-{check['id'].upper()}",
|
"code": f"DSI-DETAIL-{check['id'].upper()}",
|
||||||
"severity": check.get("severity", "MEDIUM"),
|
"severity": check.get("severity", "MEDIUM"),
|
||||||
|
|||||||
@@ -121,11 +121,37 @@ def _dedup_key(label: str) -> str:
|
|||||||
return label
|
return label
|
||||||
|
|
||||||
|
|
||||||
|
_CONDITIONAL_MARKERS = ("falls ", "sofern ", "wenn ", "soweit ",
|
||||||
|
"bei bedarf", "ggf.", "gegebenenfalls")
|
||||||
|
|
||||||
|
|
||||||
|
def _is_hard_finding(r: dict) -> bool:
|
||||||
|
"""Echtes Finding = wir haben einen positiven Treffer im Text der den
|
||||||
|
Verstoss belegt. Stille im Text reicht NICHT — das wandert ins MC-Audit
|
||||||
|
als "selbst pruefen", nicht ins Email als HIGH-Drohung.
|
||||||
|
|
||||||
|
Heuristik:
|
||||||
|
- matched_text nicht leer = textuelle Evidenz vorhanden → hart
|
||||||
|
- konditionales Label ("falls / sofern / wenn") UND matched_text leer
|
||||||
|
→ weich (Pre-Condition nicht belegt) → raus aus Top-Fails
|
||||||
|
- sonst: hart (klassische Pflichtangaben-Lücke wie "DSB fehlt")
|
||||||
|
"""
|
||||||
|
mt = (r.get("matched_text") or "").strip()
|
||||||
|
if mt:
|
||||||
|
return True
|
||||||
|
label_low = (r.get("label") or "").lower()
|
||||||
|
if any(m in label_low for m in _CONDITIONAL_MARKERS):
|
||||||
|
return False
|
||||||
|
return True
|
||||||
|
|
||||||
|
|
||||||
def top_fails(check_results: list[dict], n: int = 10) -> list[dict]:
|
def top_fails(check_results: list[dict], n: int = 10) -> list[dict]:
|
||||||
"""Return top-N failing MCs sorted by severity then label.
|
"""Return top-N failing MCs sorted by severity then label.
|
||||||
|
|
||||||
Skipped + passed MCs are excluded. INFO severity is excluded by
|
Skipped + passed MCs are excluded. INFO severity is excluded by
|
||||||
default since those are guidance, not findings.
|
default since those are guidance, not findings. Konditionale MCs
|
||||||
|
ohne Negativ-Beleg (P8) werden ebenfalls ausgesteuert — sie
|
||||||
|
erscheinen nur noch im MC-Audit als "selbst pruefen".
|
||||||
|
|
||||||
Near-duplicates (multiple MCs that all complain about "einfache
|
Near-duplicates (multiple MCs that all complain about "einfache
|
||||||
Sprache" / "Einwilligungsaufforderung" / ...) are collapsed to ONE
|
Sprache" / "Einwilligungsaufforderung" / ...) are collapsed to ONE
|
||||||
@@ -136,6 +162,7 @@ def top_fails(check_results: list[dict], n: int = 10) -> list[dict]:
|
|||||||
r for r in (check_results or [])
|
r for r in (check_results or [])
|
||||||
if not r.get("passed") and not r.get("skipped")
|
if not r.get("passed") and not r.get("skipped")
|
||||||
and (r.get("severity") or "").upper() != "INFO"
|
and (r.get("severity") or "").upper() != "INFO"
|
||||||
|
and _is_hard_finding(r)
|
||||||
]
|
]
|
||||||
fails.sort(key=lambda r: (
|
fails.sort(key=lambda r: (
|
||||||
_SEV_RANK.get((r.get("severity") or "MEDIUM").upper(), 5),
|
_SEV_RANK.get((r.get("severity") or "MEDIUM").upper(), 5),
|
||||||
|
|||||||
@@ -0,0 +1,242 @@
|
|||||||
|
"""
|
||||||
|
TDM-Reservation-Check (§ 44b UrhG / EU CDSM Art. 4).
|
||||||
|
|
||||||
|
Prueft pro Domain ob ein maschinenlesbarer Nutzungsvorbehalt fuer
|
||||||
|
Text-and-Data-Mining gesetzt ist. Quellen:
|
||||||
|
1. robots.txt — User-agent: * Disallow: / (oder spezifisch fuer uns)
|
||||||
|
2. /ai.txt — neuer OpenAI-Standard
|
||||||
|
3. HTTP-Header `tdm-reservation: 1` auf Homepage
|
||||||
|
4. HTML <meta name="tdm-reservation" content="1"> auf Homepage
|
||||||
|
5. HTML <meta name="robots" content="noai|noimageai"> Tags
|
||||||
|
|
||||||
|
Status-Interpretation:
|
||||||
|
status=allowed -> kein Vorbehalt, crawlbar
|
||||||
|
status=reserved -> expliziter Vorbehalt, NICHT crawlen
|
||||||
|
status=denied -> robots.txt-Zugriff aktiv blockiert (403/401)
|
||||||
|
=> konservativ: NICHT crawlen
|
||||||
|
status=unknown -> Server-Error (500/timeout/DNS) auf robots.txt
|
||||||
|
=> crawlbar, aber 24h-Recheck markiert
|
||||||
|
|
||||||
|
Cache via sidecar SQLite (gleiche DB wie compliance_audit_log), 24h TTL.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import json
|
||||||
|
import logging
|
||||||
|
import os
|
||||||
|
import sqlite3
|
||||||
|
import time
|
||||||
|
from datetime import datetime, timezone
|
||||||
|
from pathlib import Path
|
||||||
|
from typing import Literal
|
||||||
|
from urllib.parse import urlparse
|
||||||
|
|
||||||
|
import httpx
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
DB_PATH = os.getenv("COMPLIANCE_AUDIT_DB", "/data/compliance_audits.db")
|
||||||
|
CACHE_TTL_SECONDS = 24 * 3600
|
||||||
|
|
||||||
|
Status = Literal["allowed", "reserved", "denied", "unknown"]
|
||||||
|
|
||||||
|
_DEFAULT_UA = (
|
||||||
|
"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 "
|
||||||
|
"(KHTML, like Gecko) HeadlessChrome/120.0.0.0 Safari/537.36"
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def _ensure_cache_table() -> None:
|
||||||
|
Path(DB_PATH).parent.mkdir(parents=True, exist_ok=True)
|
||||||
|
with sqlite3.connect(DB_PATH) as conn:
|
||||||
|
conn.executescript("""
|
||||||
|
CREATE TABLE IF NOT EXISTS tdm_reservation_cache (
|
||||||
|
domain TEXT PRIMARY KEY,
|
||||||
|
ts TEXT NOT NULL,
|
||||||
|
status TEXT NOT NULL,
|
||||||
|
signals TEXT NOT NULL -- JSON list[dict]
|
||||||
|
);
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_tdm_ts ON tdm_reservation_cache(ts);
|
||||||
|
""")
|
||||||
|
|
||||||
|
|
||||||
|
def _cache_get(domain: str) -> dict | None:
|
||||||
|
try:
|
||||||
|
_ensure_cache_table()
|
||||||
|
with sqlite3.connect(DB_PATH) as conn:
|
||||||
|
conn.row_factory = sqlite3.Row
|
||||||
|
row = conn.execute(
|
||||||
|
"SELECT * FROM tdm_reservation_cache WHERE domain=?", (domain,),
|
||||||
|
).fetchone()
|
||||||
|
if not row:
|
||||||
|
return None
|
||||||
|
ts = datetime.fromisoformat(row["ts"]).timestamp()
|
||||||
|
if time.time() - ts > CACHE_TTL_SECONDS:
|
||||||
|
return None
|
||||||
|
return {
|
||||||
|
"domain": domain,
|
||||||
|
"status": row["status"],
|
||||||
|
"signals": json.loads(row["signals"]),
|
||||||
|
"cached": True,
|
||||||
|
"ts": row["ts"],
|
||||||
|
}
|
||||||
|
except Exception as e:
|
||||||
|
logger.debug("tdm cache_get failed for %s: %s", domain, e)
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def _cache_put(domain: str, status: Status, signals: list[dict]) -> None:
|
||||||
|
try:
|
||||||
|
_ensure_cache_table()
|
||||||
|
with sqlite3.connect(DB_PATH) as conn:
|
||||||
|
conn.execute(
|
||||||
|
"INSERT OR REPLACE INTO tdm_reservation_cache "
|
||||||
|
"(domain, ts, status, signals) VALUES (?, ?, ?, ?)",
|
||||||
|
(
|
||||||
|
domain,
|
||||||
|
datetime.now(timezone.utc).isoformat(),
|
||||||
|
status,
|
||||||
|
json.dumps(signals, ensure_ascii=False),
|
||||||
|
),
|
||||||
|
)
|
||||||
|
conn.commit()
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning("tdm cache_put failed for %s: %s", domain, e)
|
||||||
|
|
||||||
|
|
||||||
|
def _base_domain(url_or_domain: str) -> str:
|
||||||
|
if not url_or_domain:
|
||||||
|
return ""
|
||||||
|
if "://" not in url_or_domain:
|
||||||
|
url_or_domain = "https://" + url_or_domain
|
||||||
|
netloc = urlparse(url_or_domain).netloc.lower()
|
||||||
|
return netloc.replace("www.", "")
|
||||||
|
|
||||||
|
|
||||||
|
async def _fetch_status(client: httpx.AsyncClient, url: str) -> tuple[int, str, dict]:
|
||||||
|
"""Return (status_code, body, headers). Body capped at 16 KiB."""
|
||||||
|
try:
|
||||||
|
resp = await client.get(url)
|
||||||
|
body = resp.text[:16384] if resp.content else ""
|
||||||
|
return resp.status_code, body, dict(resp.headers)
|
||||||
|
except Exception as e:
|
||||||
|
logger.debug("tdm fetch %s failed: %s", url, e)
|
||||||
|
return 0, "", {}
|
||||||
|
|
||||||
|
|
||||||
|
def _robots_disallows_us(body: str) -> bool:
|
||||||
|
"""Parse robots.txt — true if our group has Disallow: /."""
|
||||||
|
if not body:
|
||||||
|
return False
|
||||||
|
relevant_groups = ["*", "claudebot", "anthropic-ai", "gptbot",
|
||||||
|
"google-extended", "ccbot", "breakpilot"]
|
||||||
|
current_uas: list[str] = []
|
||||||
|
in_our_group = False
|
||||||
|
for raw in body.splitlines():
|
||||||
|
line = raw.split("#", 1)[0].strip()
|
||||||
|
if not line:
|
||||||
|
in_our_group = False
|
||||||
|
current_uas = []
|
||||||
|
continue
|
||||||
|
if ":" not in line:
|
||||||
|
continue
|
||||||
|
key, val = (s.strip().lower() for s in line.split(":", 1))
|
||||||
|
if key == "user-agent":
|
||||||
|
current_uas.append(val)
|
||||||
|
in_our_group = any(ua in relevant_groups for ua in current_uas)
|
||||||
|
elif key == "disallow" and in_our_group:
|
||||||
|
if val == "/" or val == "":
|
||||||
|
if val == "/":
|
||||||
|
return True
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
def _meta_has_reservation(body: str) -> bool:
|
||||||
|
"""Detect <meta name="tdm-reservation|robots|googlebot"> with noai/noimageai/1."""
|
||||||
|
low = body.lower()
|
||||||
|
needles = [
|
||||||
|
'name="tdm-reservation" content="1"',
|
||||||
|
"name='tdm-reservation' content='1'",
|
||||||
|
'"noai"', '"noimageai"',
|
||||||
|
"content=\"noai", "content='noai",
|
||||||
|
]
|
||||||
|
return any(n in low for n in needles)
|
||||||
|
|
||||||
|
|
||||||
|
async def check_tdm_reservation(domain_or_url: str) -> dict:
|
||||||
|
"""Probe a domain for machine-readable TDM reservations.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
{
|
||||||
|
domain, status, signals: [{src, detail}], cached, ts
|
||||||
|
}
|
||||||
|
"""
|
||||||
|
domain = _base_domain(domain_or_url)
|
||||||
|
if not domain:
|
||||||
|
return {"domain": "", "status": "unknown", "signals": [], "cached": False}
|
||||||
|
|
||||||
|
cached = _cache_get(domain)
|
||||||
|
if cached:
|
||||||
|
return cached
|
||||||
|
|
||||||
|
signals: list[dict] = []
|
||||||
|
status: Status = "allowed"
|
||||||
|
|
||||||
|
headers = {"User-Agent": _DEFAULT_UA, "Accept": "*/*"}
|
||||||
|
async with httpx.AsyncClient(
|
||||||
|
timeout=12.0, follow_redirects=True, headers=headers,
|
||||||
|
) as client:
|
||||||
|
for scheme in ("https", "http"):
|
||||||
|
r_code, r_body, _ = await _fetch_status(
|
||||||
|
client, f"{scheme}://www.{domain}/robots.txt",
|
||||||
|
)
|
||||||
|
if r_code == 0 and scheme == "https":
|
||||||
|
continue
|
||||||
|
signals.append({"src": "robots.txt", "status_code": r_code,
|
||||||
|
"scheme": scheme})
|
||||||
|
if r_code in (401, 403):
|
||||||
|
status = "denied"
|
||||||
|
elif r_code == 200 and _robots_disallows_us(r_body):
|
||||||
|
status = "reserved"
|
||||||
|
signals[-1]["detail"] = "Disallow: / for relevant UA group"
|
||||||
|
elif r_code not in (200, 404):
|
||||||
|
status = "unknown"
|
||||||
|
break
|
||||||
|
|
||||||
|
if status == "allowed":
|
||||||
|
ai_code, _, _ = await _fetch_status(
|
||||||
|
client, f"https://www.{domain}/ai.txt",
|
||||||
|
)
|
||||||
|
if ai_code == 200:
|
||||||
|
status = "reserved"
|
||||||
|
signals.append({"src": "ai.txt", "status_code": 200,
|
||||||
|
"detail": "ai.txt present"})
|
||||||
|
|
||||||
|
if status == "allowed":
|
||||||
|
h_code, h_body, h_hdrs = await _fetch_status(
|
||||||
|
client, f"https://www.{domain}/",
|
||||||
|
)
|
||||||
|
if h_code == 200:
|
||||||
|
if h_hdrs.get("tdm-reservation") == "1":
|
||||||
|
status = "reserved"
|
||||||
|
signals.append({"src": "http-header",
|
||||||
|
"detail": "tdm-reservation: 1"})
|
||||||
|
elif _meta_has_reservation(h_body):
|
||||||
|
status = "reserved"
|
||||||
|
signals.append({"src": "html-meta",
|
||||||
|
"detail": "noai/tdm-reservation meta"})
|
||||||
|
|
||||||
|
_cache_put(domain, status, signals)
|
||||||
|
return {
|
||||||
|
"domain": domain,
|
||||||
|
"status": status,
|
||||||
|
"signals": signals,
|
||||||
|
"cached": False,
|
||||||
|
"ts": datetime.now(timezone.utc).isoformat(),
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def is_crawl_allowed(result: dict) -> bool:
|
||||||
|
"""Strict: only 'allowed' and 'unknown' are crawlable."""
|
||||||
|
return (result.get("status") or "unknown") in ("allowed", "unknown")
|
||||||
@@ -0,0 +1,277 @@
|
|||||||
|
"""
|
||||||
|
Aggregator: Doc-Check-Results + cmp_vendors + redundancy_report
|
||||||
|
-> einheitliche Finding-Records fuer unified_findings_store.
|
||||||
|
|
||||||
|
Speichert nur ABGELEITETE/normalisierte Findings (siehe Memory
|
||||||
|
'feedback_oem_data_legal.md'): keine rohen CMP-Cookie-Texte, keine
|
||||||
|
1:1-Spiegelung fremder Vendor-Listen — nur eigene Risk-/Status-Bewertung.
|
||||||
|
|
||||||
|
Hook:
|
||||||
|
from compliance.services.unified_findings_collector import collect
|
||||||
|
from compliance.services.unified_findings_store import record_findings
|
||||||
|
findings = collect(check_id, results, cmp_vendors, redundancy_report, doc_texts)
|
||||||
|
record_findings(check_id, findings)
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import logging
|
||||||
|
from typing import Any
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
_SEVERITY_DEFAULT = {
|
||||||
|
"mc": "MEDIUM",
|
||||||
|
"pflichtangabe": "MEDIUM",
|
||||||
|
"vendor": "MEDIUM",
|
||||||
|
"redundanz": "LOW",
|
||||||
|
}
|
||||||
|
|
||||||
|
# Mapping cmp_vendor.flag → action_recipe key + Default-Severity
|
||||||
|
_VENDOR_FLAG_SEVERITY = {
|
||||||
|
"no_cookies_listed": ("HIGH", "Cookie-Auflistung fehlt"),
|
||||||
|
"no_country": ("MEDIUM", "Sitzland des Anbieters fehlt"),
|
||||||
|
"no_privacy_url": ("HIGH", "Datenschutzerklaerung des Anbieters fehlt"),
|
||||||
|
"broken_privacy_url": ("HIGH", "Datenschutz-URL nicht erreichbar"),
|
||||||
|
"no_opt_out_url": ("MEDIUM", "Widerspruchs-/Opt-Out-Link fehlt"),
|
||||||
|
"broken_opt_out": ("MEDIUM", "Opt-Out-Link nicht erreichbar"),
|
||||||
|
"no_name": ("HIGH", "Anbieter-Name fehlt"),
|
||||||
|
"no_purpose": ("HIGH", "Verarbeitungszweck fehlt"),
|
||||||
|
"cookies_no_expiry": ("LOW", "Cookie-Speicherdauer fehlt"),
|
||||||
|
"cookies_no_names": ("LOW", "Cookie-Namen fehlen"),
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def _safe_recipe(key: str) -> dict:
|
||||||
|
"""Lookup mit lazy-import — recipes-Modul ist optional."""
|
||||||
|
try:
|
||||||
|
from compliance.services.finding_action_recipes import recipe_for
|
||||||
|
r = recipe_for(key)
|
||||||
|
return dict(r) if r else {}
|
||||||
|
except Exception:
|
||||||
|
return {}
|
||||||
|
|
||||||
|
|
||||||
|
def _safe_anchor(label: str, doc_text: str, doc_id: str) -> dict:
|
||||||
|
"""Anchor-Lookup mit lazy-import + best-effort."""
|
||||||
|
if not label or not doc_text:
|
||||||
|
return {}
|
||||||
|
try:
|
||||||
|
from compliance.services.doc_anchor_locator import locate_anchor
|
||||||
|
a = locate_anchor(label, doc_text, doc_id)
|
||||||
|
return a or {}
|
||||||
|
except Exception:
|
||||||
|
return {}
|
||||||
|
|
||||||
|
|
||||||
|
def _from_doc_check(
|
||||||
|
check_id: str,
|
||||||
|
r: Any,
|
||||||
|
doc_text: str,
|
||||||
|
) -> list[dict]:
|
||||||
|
"""Convert one DocCheckResult into unified-finding rows."""
|
||||||
|
out: list[dict] = []
|
||||||
|
if r.error and r.error.startswith("Nicht anwendbar"):
|
||||||
|
out.append({
|
||||||
|
"source_type": "pflichtangabe",
|
||||||
|
"doc_type": r.doc_type,
|
||||||
|
"severity": "INFO",
|
||||||
|
"status": "na",
|
||||||
|
"regulation": "",
|
||||||
|
"label": f"{r.label}: {r.error}",
|
||||||
|
"hint": r.error,
|
||||||
|
"action_recipe": {},
|
||||||
|
"payload": {"scenario": r.scenario},
|
||||||
|
})
|
||||||
|
return out
|
||||||
|
if r.error:
|
||||||
|
out.append({
|
||||||
|
"source_type": "pflichtangabe",
|
||||||
|
"doc_type": r.doc_type,
|
||||||
|
"severity": "HIGH",
|
||||||
|
"status": "failed",
|
||||||
|
"regulation": "",
|
||||||
|
"label": f"{r.label}: Dokument nicht erreichbar",
|
||||||
|
"hint": r.error[:400],
|
||||||
|
"action_recipe": {},
|
||||||
|
"payload": {},
|
||||||
|
})
|
||||||
|
return out
|
||||||
|
|
||||||
|
for c in (r.checks or []):
|
||||||
|
is_mc = (c.id or "").startswith("mc-")
|
||||||
|
source = "mc" if is_mc else "pflichtangabe"
|
||||||
|
if c.passed:
|
||||||
|
status = "passed"
|
||||||
|
elif c.skipped:
|
||||||
|
status = "skipped"
|
||||||
|
else:
|
||||||
|
status = "failed"
|
||||||
|
severity = (c.severity or _SEVERITY_DEFAULT[source]).upper()
|
||||||
|
# Nur fuer Fails Anchor + Recipe — Pass-Eintraege halten wir mager
|
||||||
|
recipe: dict = {}
|
||||||
|
anchor: dict = {}
|
||||||
|
if status == "failed":
|
||||||
|
# Recipe per Label-Substring (mehr als nur exakte Keys)
|
||||||
|
recipe = _safe_recipe(c.label or "") or _safe_recipe(c.id or "")
|
||||||
|
anchor = _safe_anchor(c.label or "", doc_text, r.doc_type)
|
||||||
|
out.append({
|
||||||
|
"source_type": source,
|
||||||
|
"doc_type": r.doc_type,
|
||||||
|
"severity": severity,
|
||||||
|
"status": status,
|
||||||
|
"regulation": c.regulation or "",
|
||||||
|
"label": c.label or "",
|
||||||
|
"hint": c.hint or "",
|
||||||
|
"action_recipe": recipe,
|
||||||
|
"anchor_excerpt": (anchor.get("anchor_phrase") or "")[:800],
|
||||||
|
"anchor_conf": _conf_to_score(anchor),
|
||||||
|
"payload": {
|
||||||
|
"mc_id": c.id,
|
||||||
|
"level": c.level,
|
||||||
|
"parent": c.parent,
|
||||||
|
"matched_text": (c.matched_text or "")[:300],
|
||||||
|
"article": c.article or "",
|
||||||
|
"anchor_method": anchor.get("method"),
|
||||||
|
"anchor_position": anchor.get("position_hint"),
|
||||||
|
},
|
||||||
|
})
|
||||||
|
return out
|
||||||
|
|
||||||
|
|
||||||
|
def _conf_to_score(anchor: dict) -> float:
|
||||||
|
if not anchor:
|
||||||
|
return 0.0
|
||||||
|
try:
|
||||||
|
return float(anchor.get("score") or 0.0)
|
||||||
|
except (TypeError, ValueError):
|
||||||
|
return 0.0
|
||||||
|
|
||||||
|
|
||||||
|
def _from_vendors(check_id: str, vendors: list[dict]) -> list[dict]:
|
||||||
|
"""Per-vendor flag -> finding row."""
|
||||||
|
out: list[dict] = []
|
||||||
|
for v in vendors or []:
|
||||||
|
name = v.get("name") or v.get("vendor_name") or "Unbekannter Anbieter"
|
||||||
|
country = v.get("country") or ""
|
||||||
|
risk = v.get("compliance_risk") or {}
|
||||||
|
for flag in (v.get("compliance_flags") or v.get("flags") or []):
|
||||||
|
sev, label = _VENDOR_FLAG_SEVERITY.get(
|
||||||
|
flag, ("LOW", flag.replace("_", " ").title()),
|
||||||
|
)
|
||||||
|
out.append({
|
||||||
|
"source_type": "vendor",
|
||||||
|
"doc_type": "-",
|
||||||
|
"severity": sev,
|
||||||
|
"status": "failed",
|
||||||
|
"regulation": "DSGVO",
|
||||||
|
"label": f"{name} — {label}",
|
||||||
|
"hint": _vendor_hint(flag, name),
|
||||||
|
"action_recipe": _safe_recipe(flag),
|
||||||
|
"vendor_name": name,
|
||||||
|
"category": (v.get("category") or "")[:64],
|
||||||
|
"payload": {
|
||||||
|
"flag": flag,
|
||||||
|
"country": country,
|
||||||
|
"compliance_score": v.get("compliance_score"),
|
||||||
|
"category": v.get("category"),
|
||||||
|
"risk_label": risk.get("label"),
|
||||||
|
"high_risk_cookies": risk.get("high_risk_cookie_count"),
|
||||||
|
"schrems_ii_cookies": risk.get("schrems_ii_affected_cookies"),
|
||||||
|
},
|
||||||
|
})
|
||||||
|
return out
|
||||||
|
|
||||||
|
|
||||||
|
def _vendor_hint(flag: str, name: str) -> str:
|
||||||
|
hints = {
|
||||||
|
"no_cookies_listed":
|
||||||
|
f"Bei '{name}' sind keine Cookies dokumentiert — DSK-Orientierungshilfe "
|
||||||
|
"verlangt Name + Zweck + Speicherdauer pro Cookie.",
|
||||||
|
"no_country":
|
||||||
|
f"Sitzland von '{name}' fehlt — bei Drittland-Anbieter "
|
||||||
|
"Art. 44 ff. DSGVO erforderlich.",
|
||||||
|
"no_privacy_url":
|
||||||
|
f"Link zur Datenschutzerklaerung von '{name}' fehlt — Art. 13 Abs. 1 lit. e.",
|
||||||
|
"broken_privacy_url":
|
||||||
|
f"Privacy-URL von '{name}' nicht erreichbar (404/Timeout).",
|
||||||
|
"no_opt_out_url":
|
||||||
|
f"Opt-Out/Widerspruchs-Link fuer '{name}' fehlt — Art. 21 DSGVO.",
|
||||||
|
"broken_opt_out":
|
||||||
|
f"Opt-Out-Link von '{name}' nicht erreichbar.",
|
||||||
|
"no_name":
|
||||||
|
"Anbieter ohne Name erfasst — Art. 13 Abs. 1 lit. a.",
|
||||||
|
"no_purpose":
|
||||||
|
f"Verarbeitungszweck fuer '{name}' fehlt — Art. 13 Abs. 1 lit. c.",
|
||||||
|
}
|
||||||
|
return hints.get(flag, f"Flag: {flag}")
|
||||||
|
|
||||||
|
|
||||||
|
def _from_redundancies(check_id: str, report: dict | None) -> list[dict]:
|
||||||
|
"""Each redundancy category -> finding row (status='info', sev='LOW')."""
|
||||||
|
if not report:
|
||||||
|
return []
|
||||||
|
out: list[dict] = []
|
||||||
|
for r in (report.get("redundancies") or []):
|
||||||
|
cat = r.get("category_label") or r.get("category") or "Unbekannt"
|
||||||
|
vendors = r.get("vendors") or []
|
||||||
|
sav = r.get("estimated_saving_year_eur") or [0, 0]
|
||||||
|
out.append({
|
||||||
|
"source_type": "redundanz",
|
||||||
|
"doc_type": "-",
|
||||||
|
"severity": "LOW",
|
||||||
|
"status": "info",
|
||||||
|
"regulation": "Cost-Optimization",
|
||||||
|
"label": f"Mehrfach-Anbieter in '{cat}' ({len(vendors)} Tools)",
|
||||||
|
"hint": (
|
||||||
|
f"Anbieter: {', '.join(vendors[:6])}"
|
||||||
|
+ (f" (+{len(vendors)-6} weitere)" if len(vendors) > 6 else "")
|
||||||
|
+ (f" · EU-Empfehlung: {r['suggested_eu_tool']}"
|
||||||
|
if r.get("suggested_eu_tool") else "")
|
||||||
|
),
|
||||||
|
"action_recipe": {
|
||||||
|
"what": "Konsolidierung auf 1 Tool pro Kategorie pruefen.",
|
||||||
|
"why": (r.get("consolidation_hint") or
|
||||||
|
"Mehrfach-Lizenzen + Vertrags-Overhead reduzieren."),
|
||||||
|
"fix_text": "Migrations-Plan zu einem Anbieter erarbeiten; "
|
||||||
|
"Vertraege ueberlappend kuendigen.",
|
||||||
|
},
|
||||||
|
"category": cat,
|
||||||
|
"payload": {
|
||||||
|
"vendors": vendors[:20],
|
||||||
|
"saving_year_eur_low": sav[0],
|
||||||
|
"saving_year_eur_high": sav[1],
|
||||||
|
"suggested_eu_tool": r.get("suggested_eu_tool"),
|
||||||
|
"caveats": (r.get("caveats") or [])[:4],
|
||||||
|
},
|
||||||
|
})
|
||||||
|
return out
|
||||||
|
|
||||||
|
|
||||||
|
def collect(
|
||||||
|
check_id: str,
|
||||||
|
results: list[Any],
|
||||||
|
cmp_vendors: list[dict] | None,
|
||||||
|
redundancy_report: dict | None,
|
||||||
|
doc_texts: dict[str, str] | None = None,
|
||||||
|
) -> list[dict]:
|
||||||
|
"""Bundle all 4 finding sources into one list ready for record_findings()."""
|
||||||
|
out: list[dict] = []
|
||||||
|
texts = doc_texts or {}
|
||||||
|
for r in (results or []):
|
||||||
|
try:
|
||||||
|
out.extend(_from_doc_check(check_id, r, texts.get(r.doc_type, "")))
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning("collect: doc result %s failed: %s",
|
||||||
|
getattr(r, "doc_type", "?"), e)
|
||||||
|
try:
|
||||||
|
out.extend(_from_vendors(check_id, cmp_vendors or []))
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning("collect: vendors failed: %s", e)
|
||||||
|
try:
|
||||||
|
out.extend(_from_redundancies(check_id, redundancy_report))
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning("collect: redundancies failed: %s", e)
|
||||||
|
logger.info("collect: check=%s total_findings=%d", check_id, len(out))
|
||||||
|
return out
|
||||||
@@ -0,0 +1,190 @@
|
|||||||
|
"""
|
||||||
|
Unified-Findings sidecar store.
|
||||||
|
|
||||||
|
A compliance check produces findings from 4 sources today:
|
||||||
|
- Master-Controls (mc_results table — already persisted)
|
||||||
|
- Pflichtangaben (L1/L2 doc checks, e.g. Impressum-Vollstaendigkeit)
|
||||||
|
- Vendor scans (per cmp_vendor: missing privacy url, no opt-out, ...)
|
||||||
|
- Redundancies (multi-vendor in same category)
|
||||||
|
|
||||||
|
Previously the DSB had to look in 4 different blocks of the email to
|
||||||
|
find everything. This store flattens all of them into ONE searchable
|
||||||
|
table so the /audit/<check_id> frontend can show a unified list with
|
||||||
|
source / severity / status / doc_type filters.
|
||||||
|
|
||||||
|
Sidecar SQLite (same DB as compliance_audit_log) — no Postgres
|
||||||
|
migration needed.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import json
|
||||||
|
import logging
|
||||||
|
import os
|
||||||
|
import sqlite3
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
DB_PATH = os.getenv("COMPLIANCE_AUDIT_DB", "/data/compliance_audits.db")
|
||||||
|
|
||||||
|
|
||||||
|
def _ensure_table() -> None:
|
||||||
|
Path(DB_PATH).parent.mkdir(parents=True, exist_ok=True)
|
||||||
|
with sqlite3.connect(DB_PATH) as conn:
|
||||||
|
conn.executescript("""
|
||||||
|
CREATE TABLE IF NOT EXISTS unified_findings (
|
||||||
|
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||||
|
check_id TEXT NOT NULL,
|
||||||
|
source_type TEXT NOT NULL, -- mc|pflichtangabe|vendor|redundanz
|
||||||
|
doc_type TEXT, -- impressum|dse|cookie|... or '-' for vendor/redundanz
|
||||||
|
severity TEXT, -- CRITICAL|HIGH|MEDIUM|LOW|INFO
|
||||||
|
status TEXT, -- failed|passed|skipped|na|info
|
||||||
|
regulation TEXT,
|
||||||
|
label TEXT,
|
||||||
|
hint TEXT,
|
||||||
|
action_recipe TEXT, -- JSON {what,why,fix_text,where,example}
|
||||||
|
anchor_excerpt TEXT,
|
||||||
|
anchor_conf REAL,
|
||||||
|
vendor_name TEXT,
|
||||||
|
category TEXT,
|
||||||
|
payload TEXT -- JSON extras (matched_text, cookies count, ...)
|
||||||
|
);
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_uf_check ON unified_findings(check_id);
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_uf_source ON unified_findings(check_id, source_type);
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_uf_status ON unified_findings(check_id, status);
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_uf_severity ON unified_findings(check_id, severity);
|
||||||
|
""")
|
||||||
|
|
||||||
|
|
||||||
|
def record_findings(check_id: str, findings: list[dict]) -> int:
|
||||||
|
"""Bulk-insert all findings for a check. Idempotent on check_id."""
|
||||||
|
if not check_id:
|
||||||
|
return 0
|
||||||
|
try:
|
||||||
|
_ensure_table()
|
||||||
|
with sqlite3.connect(DB_PATH) as conn:
|
||||||
|
conn.execute(
|
||||||
|
"DELETE FROM unified_findings WHERE check_id=?", (check_id,),
|
||||||
|
)
|
||||||
|
if not findings:
|
||||||
|
conn.commit()
|
||||||
|
return 0
|
||||||
|
rows = [
|
||||||
|
(
|
||||||
|
check_id,
|
||||||
|
(f.get("source_type") or "mc")[:24],
|
||||||
|
(f.get("doc_type") or "")[:32],
|
||||||
|
(f.get("severity") or "MEDIUM").upper()[:16],
|
||||||
|
(f.get("status") or "failed")[:16],
|
||||||
|
(f.get("regulation") or "")[:64],
|
||||||
|
(f.get("label") or "")[:400],
|
||||||
|
(f.get("hint") or "")[:1200],
|
||||||
|
json.dumps(f.get("action_recipe") or {}, ensure_ascii=False),
|
||||||
|
(f.get("anchor_excerpt") or "")[:800],
|
||||||
|
float(f.get("anchor_conf") or 0.0),
|
||||||
|
(f.get("vendor_name") or "")[:160],
|
||||||
|
(f.get("category") or "")[:64],
|
||||||
|
json.dumps(f.get("payload") or {}, ensure_ascii=False),
|
||||||
|
)
|
||||||
|
for f in findings
|
||||||
|
]
|
||||||
|
conn.executemany(
|
||||||
|
"INSERT INTO unified_findings "
|
||||||
|
"(check_id, source_type, doc_type, severity, status, regulation, "
|
||||||
|
" label, hint, action_recipe, anchor_excerpt, anchor_conf, "
|
||||||
|
" vendor_name, category, payload) "
|
||||||
|
"VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)",
|
||||||
|
rows,
|
||||||
|
)
|
||||||
|
conn.commit()
|
||||||
|
logger.info(
|
||||||
|
"unified_findings: %s rows=%d sources=%s",
|
||||||
|
check_id, len(rows),
|
||||||
|
sorted(set(f.get("source_type", "mc") for f in findings)),
|
||||||
|
)
|
||||||
|
return len(rows)
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning("record_findings failed for %s: %s", check_id, e)
|
||||||
|
return 0
|
||||||
|
|
||||||
|
|
||||||
|
def list_findings(
|
||||||
|
check_id: str,
|
||||||
|
source_type: str | None = None,
|
||||||
|
severity: str | None = None,
|
||||||
|
doc_type: str | None = None,
|
||||||
|
status: str | None = None,
|
||||||
|
q: str | None = None,
|
||||||
|
limit: int = 1000,
|
||||||
|
) -> list[dict]:
|
||||||
|
"""Return filtered findings. q matches label OR vendor_name (case-insensitive)."""
|
||||||
|
try:
|
||||||
|
_ensure_table()
|
||||||
|
where = ["check_id = ?"]
|
||||||
|
params: list = [check_id]
|
||||||
|
if source_type and source_type != "all":
|
||||||
|
where.append("source_type = ?")
|
||||||
|
params.append(source_type)
|
||||||
|
if severity and severity != "all":
|
||||||
|
where.append("severity = ?")
|
||||||
|
params.append(severity.upper())
|
||||||
|
if doc_type and doc_type != "all":
|
||||||
|
where.append("doc_type = ?")
|
||||||
|
params.append(doc_type)
|
||||||
|
if status and status != "all":
|
||||||
|
where.append("status = ?")
|
||||||
|
params.append(status)
|
||||||
|
if q:
|
||||||
|
where.append("(LOWER(label) LIKE ? OR LOWER(vendor_name) LIKE ?)")
|
||||||
|
needle = f"%{q.lower()}%"
|
||||||
|
params.extend([needle, needle])
|
||||||
|
sql = ("SELECT * FROM unified_findings WHERE " + " AND ".join(where) +
|
||||||
|
" ORDER BY CASE severity "
|
||||||
|
" WHEN 'CRITICAL' THEN 0 WHEN 'HIGH' THEN 1 "
|
||||||
|
" WHEN 'MEDIUM' THEN 2 WHEN 'LOW' THEN 3 "
|
||||||
|
" ELSE 4 END, source_type, label LIMIT ?")
|
||||||
|
params.append(int(limit))
|
||||||
|
with sqlite3.connect(DB_PATH) as conn:
|
||||||
|
conn.row_factory = sqlite3.Row
|
||||||
|
rows = conn.execute(sql, params).fetchall()
|
||||||
|
out = []
|
||||||
|
for r in rows:
|
||||||
|
d = dict(r)
|
||||||
|
d["action_recipe"] = json.loads(d.get("action_recipe") or "{}")
|
||||||
|
d["payload"] = json.loads(d.get("payload") or "{}")
|
||||||
|
out.append(d)
|
||||||
|
return out
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning("list_findings failed: %s", e)
|
||||||
|
return []
|
||||||
|
|
||||||
|
|
||||||
|
def findings_summary(check_id: str) -> dict:
|
||||||
|
"""Return aggregate counts for the filter UI (source/severity/status)."""
|
||||||
|
out = {
|
||||||
|
"total": 0,
|
||||||
|
"by_source": {},
|
||||||
|
"by_severity": {},
|
||||||
|
"by_status": {},
|
||||||
|
"by_doc_type": {},
|
||||||
|
}
|
||||||
|
try:
|
||||||
|
_ensure_table()
|
||||||
|
with sqlite3.connect(DB_PATH) as conn:
|
||||||
|
conn.row_factory = sqlite3.Row
|
||||||
|
for col in ("source_type", "severity", "status", "doc_type"):
|
||||||
|
rows = conn.execute(
|
||||||
|
f"SELECT {col} AS k, COUNT(*) AS n FROM unified_findings "
|
||||||
|
f"WHERE check_id=? GROUP BY {col}",
|
||||||
|
(check_id,),
|
||||||
|
).fetchall()
|
||||||
|
bucket = f"by_{col if col != 'source_type' else 'source'}"
|
||||||
|
if col == "doc_type":
|
||||||
|
bucket = "by_doc_type"
|
||||||
|
out[bucket] = {r["k"] or "-": r["n"] for r in rows}
|
||||||
|
out["total"] = max(out["total"], sum(r["n"] for r in rows))
|
||||||
|
return out
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning("findings_summary failed: %s", e)
|
||||||
|
return out
|
||||||
@@ -50,9 +50,12 @@ from compliance.api.agent_recurring_routes import router as agent_recurring_rout
|
|||||||
from compliance.api.agent_compare_routes import router as agent_compare_router
|
from compliance.api.agent_compare_routes import router as agent_compare_router
|
||||||
from compliance.api.agent_doc_check_routes import router as agent_doc_check_router
|
from compliance.api.agent_doc_check_routes import router as agent_doc_check_router
|
||||||
from compliance.api.agent_compliance_check_routes import router as agent_compliance_check_router
|
from compliance.api.agent_compliance_check_routes import router as agent_compliance_check_router
|
||||||
|
from compliance.api.agent_findings_routes import router as agent_findings_router
|
||||||
|
from compliance.api.saving_scan_routes import router as saving_scan_router
|
||||||
from compliance.api.agent_migration_routes import router as agent_migration_router
|
from compliance.api.agent_migration_routes import router as agent_migration_router
|
||||||
from compliance.api.vendor_assessment_routes import router as vendor_assessment_router
|
from compliance.api.vendor_assessment_routes import router as vendor_assessment_router
|
||||||
from compliance.api.cra_routes import router as cra_router
|
from compliance.api.cra_routes import router as cra_router
|
||||||
|
from compliance.api.quaidal_routes import router as quaidal_router
|
||||||
|
|
||||||
# Middleware
|
# Middleware
|
||||||
from middleware import (
|
from middleware import (
|
||||||
@@ -157,6 +160,8 @@ app.include_router(agent_recurring_router, prefix="/api")
|
|||||||
app.include_router(agent_compare_router, prefix="/api")
|
app.include_router(agent_compare_router, prefix="/api")
|
||||||
app.include_router(agent_doc_check_router, prefix="/api")
|
app.include_router(agent_doc_check_router, prefix="/api")
|
||||||
app.include_router(agent_compliance_check_router, prefix="/api")
|
app.include_router(agent_compliance_check_router, prefix="/api")
|
||||||
|
app.include_router(agent_findings_router, prefix="/api")
|
||||||
|
app.include_router(saving_scan_router, prefix="/api")
|
||||||
app.include_router(agent_migration_router, prefix="/api")
|
app.include_router(agent_migration_router, prefix="/api")
|
||||||
|
|
||||||
# Vendor Contract Assessment
|
# Vendor Contract Assessment
|
||||||
@@ -164,6 +169,7 @@ app.include_router(vendor_assessment_router, prefix="/api")
|
|||||||
|
|
||||||
# CRA (Cyber Resilience Act) Compliance
|
# CRA (Cyber Resilience Act) Compliance
|
||||||
app.include_router(cra_router, prefix="/api")
|
app.include_router(cra_router, prefix="/api")
|
||||||
|
app.include_router(quaidal_router, prefix="/api")
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
|
|||||||
@@ -0,0 +1,116 @@
|
|||||||
|
"""
|
||||||
|
Tests for the saving-scan funnel endpoint.
|
||||||
|
|
||||||
|
Focus: input validation + lead persistence + rate-limit error path.
|
||||||
|
The actual compliance check is mocked — we only verify the route layer.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import os
|
||||||
|
import sys
|
||||||
|
from unittest.mock import patch
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
from fastapi import FastAPI
|
||||||
|
from fastapi.testclient import TestClient
|
||||||
|
|
||||||
|
sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..'))
|
||||||
|
|
||||||
|
# Use a temp SQLite for the sidecar
|
||||||
|
os.environ["COMPLIANCE_AUDIT_DB"] = "/tmp/test_saving_scan.db"
|
||||||
|
if os.path.exists("/tmp/test_saving_scan.db"):
|
||||||
|
os.remove("/tmp/test_saving_scan.db")
|
||||||
|
|
||||||
|
from compliance.api.saving_scan_routes import router # noqa: E402
|
||||||
|
|
||||||
|
app = FastAPI()
|
||||||
|
app.include_router(router, prefix="/api")
|
||||||
|
client = TestClient(app)
|
||||||
|
|
||||||
|
|
||||||
|
class TestStartSavingScanValidation:
|
||||||
|
|
||||||
|
def test_missing_email_returns_422(self):
|
||||||
|
resp = client.post("/api/compliance/agent/saving-scan/start",
|
||||||
|
json={"url": "https://example.de"})
|
||||||
|
assert resp.status_code == 422
|
||||||
|
|
||||||
|
def test_invalid_email_returns_400(self):
|
||||||
|
with patch("compliance.api.saving_scan_routes.asyncio.create_task"):
|
||||||
|
resp = client.post(
|
||||||
|
"/api/compliance/agent/saving-scan/start",
|
||||||
|
json={"url": "https://example.de", "email": "kein-email",
|
||||||
|
"consent": True},
|
||||||
|
)
|
||||||
|
assert resp.status_code == 400
|
||||||
|
assert "E-Mail" in resp.json()["detail"]
|
||||||
|
|
||||||
|
def test_invalid_url_returns_400(self):
|
||||||
|
with patch("compliance.api.saving_scan_routes.asyncio.create_task"):
|
||||||
|
resp = client.post(
|
||||||
|
"/api/compliance/agent/saving-scan/start",
|
||||||
|
json={"url": "ftp://wrong.de", "email": "u@x.de",
|
||||||
|
"consent": True},
|
||||||
|
)
|
||||||
|
assert resp.status_code == 400
|
||||||
|
|
||||||
|
def test_consent_required(self):
|
||||||
|
with patch("compliance.api.saving_scan_routes.asyncio.create_task"):
|
||||||
|
resp = client.post(
|
||||||
|
"/api/compliance/agent/saving-scan/start",
|
||||||
|
json={"url": "https://example.de", "email": "u@x.de",
|
||||||
|
"consent": False},
|
||||||
|
)
|
||||||
|
assert resp.status_code == 400
|
||||||
|
assert "Consent" in resp.json()["detail"]
|
||||||
|
|
||||||
|
|
||||||
|
def _patch_check_runner():
|
||||||
|
"""Stub the lazy-imported worker — avoids loading smtp_sender (Py3.10+)."""
|
||||||
|
import sys, types
|
||||||
|
fake = types.ModuleType("compliance.api.agent_compliance_check_routes")
|
||||||
|
|
||||||
|
class _DocInput:
|
||||||
|
def __init__(self, doc_type="other", url=""): self.doc_type, self.url = doc_type, url
|
||||||
|
|
||||||
|
class _Req:
|
||||||
|
def __init__(self, **kw): self.__dict__.update(kw)
|
||||||
|
|
||||||
|
async def _runner(*_a, **_kw): pass
|
||||||
|
|
||||||
|
fake.DocumentInput = _DocInput
|
||||||
|
fake.ComplianceCheckRequest = _Req
|
||||||
|
fake._run_compliance_check = _runner
|
||||||
|
fake._compliance_check_jobs = {}
|
||||||
|
sys.modules["compliance.api.agent_compliance_check_routes"] = fake
|
||||||
|
|
||||||
|
|
||||||
|
class TestStartSavingScanSuccess:
|
||||||
|
|
||||||
|
def test_valid_request_starts_check(self):
|
||||||
|
_patch_check_runner()
|
||||||
|
resp = client.post(
|
||||||
|
"/api/compliance/agent/saving-scan/start",
|
||||||
|
json={"url": "https://example-newdomain.de",
|
||||||
|
"email": "user@example.de", "consent": True},
|
||||||
|
)
|
||||||
|
assert resp.status_code == 200, resp.text
|
||||||
|
data = resp.json()
|
||||||
|
assert "check_id" in data
|
||||||
|
assert data["status"] == "running"
|
||||||
|
assert "example-newdomain.de" in data["message"]
|
||||||
|
|
||||||
|
|
||||||
|
class TestLeadCount:
|
||||||
|
|
||||||
|
def test_lead_count_after_submit(self):
|
||||||
|
_patch_check_runner()
|
||||||
|
client.post(
|
||||||
|
"/api/compliance/agent/saving-scan/start",
|
||||||
|
json={"url": "https://abc-leadtest.de",
|
||||||
|
"email": "lead@x.de", "consent": True},
|
||||||
|
)
|
||||||
|
resp = client.get("/api/compliance/agent/saving-scan/lead-count")
|
||||||
|
assert resp.status_code == 200
|
||||||
|
data = resp.json()
|
||||||
|
assert data["total_leads"] >= 1
|
||||||
|
assert "abc-leadtest.de" in str(data["top_domains"])
|
||||||
Reference in New Issue
Block a user