Compare commits

...

2 Commits

Author SHA1 Message Date
Benjamin Admin a3287cd5e6 feat: HTML email report with hints + fix duplicate Social Media sections
Build + Deploy / build-admin-compliance (push) Successful in 1m45s
Build + Deploy / build-backend-compliance (push) Successful in 9s
Build + Deploy / build-ai-sdk (push) Successful in 36s
Build + Deploy / build-developer-portal (push) Successful in 7s
Build + Deploy / build-tts (push) Successful in 7s
Build + Deploy / build-document-crawler (push) Successful in 8s
Build + Deploy / build-dsms-gateway (push) Successful in 7s
Build + Deploy / build-dsms-node (push) Successful in 8s
CI / branch-name (push) Has been skipped
CI / guardrail-integrity (push) Has been skipped
CI / loc-budget (push) Failing after 15s
CI / secret-scan (push) Has been skipped
CI / go-lint (push) Has been skipped
CI / python-lint (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / nodejs-build (push) Successful in 2m47s
CI / dep-audit (push) Has been skipped
CI / sbom-scan (push) Has been skipped
CI / test-go (push) Failing after 44s
CI / test-python-backend (push) Successful in 41s
CI / test-python-document-crawler (push) Successful in 26s
CI / test-python-dsms-gateway (push) Successful in 22s
CI / validate-canonical-controls (push) Successful in 15s
Build + Deploy / trigger-orca (push) Successful in 2m23s
1. Email report now renders as styled HTML (matching frontend design):
   - Progress bars (green=completeness, blue=correctness)
   - Hierarchical L1→L2 check display
   - Red hint boxes under failed checks explaining what to fix
   - Matched text evidence for passed checks

2. Section splitter deduplicates: two "Social Media" headings on the
   same page are merged into one section instead of creating duplicates.

3. Extracted report builder to agent_doc_check_report.py (175 LOC)
   to keep routes file under 500 LOC (386 LOC).

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-05-07 15:13:00 +02:00
Benjamin Admin 56892cf7dc feat: CE × Compliance Crossover Engine
Automatische Erkennung von DSGVO/AI Act/CRA/NIS2/Data Act
Implikationen bei CE-Gefaehrdungen. 50 Trigger-Mappings auf
Hazard-Patterns → Compliance-Module mit Modul-Links.

- compliance_triggers.go: 50 Pattern→Regulation Mappings
- compliance_crossover.go: Engine die Projekt-Hazards gegen Trigger prueft
- iace_handler_compliance.go: GET /compliance-triggers API
- ComplianceAlerts.tsx: Frontend Alert-Panel auf Projekt-Uebersicht

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-05-07 15:07:22 +02:00
8 changed files with 1215 additions and 67 deletions
@@ -0,0 +1,218 @@
'use client'
import { useState, useEffect } from 'react'
import Link from 'next/link'
interface ComplianceTrigger {
id: string
regulation: string
article: string
title: string
severity: 'high' | 'medium' | 'low'
reason: string
affected_hazard_count?: number
module_path: string
module_label: string
}
interface TriggersResponse {
triggers: ComplianceTrigger[]
total: number
}
const SEVERITY_CONFIG: Record<string, { border: string; bg: string; text: string; badge: string; icon: string }> = {
high: {
border: 'border-red-200 dark:border-red-800',
bg: 'bg-red-50 dark:bg-red-900/20',
text: 'text-red-700 dark:text-red-400',
badge: 'bg-red-100 text-red-800 dark:bg-red-900/50 dark:text-red-300',
icon: 'text-red-500',
},
medium: {
border: 'border-yellow-200 dark:border-yellow-800',
bg: 'bg-yellow-50 dark:bg-yellow-900/20',
text: 'text-yellow-700 dark:text-yellow-400',
badge: 'bg-yellow-100 text-yellow-800 dark:bg-yellow-900/50 dark:text-yellow-300',
icon: 'text-yellow-500',
},
low: {
border: 'border-blue-200 dark:border-blue-800',
bg: 'bg-blue-50 dark:bg-blue-900/20',
text: 'text-blue-700 dark:text-blue-400',
badge: 'bg-blue-100 text-blue-800 dark:bg-blue-900/50 dark:text-blue-300',
icon: 'text-blue-500',
},
}
const SEVERITY_LABELS: Record<string, string> = {
high: 'HOCH',
medium: 'MITTEL',
low: 'NIEDRIG',
}
const REGULATION_BADGES: { key: string; label: string; activeColor: string }[] = [
{ key: 'DSGVO', label: 'DSGVO', activeColor: 'bg-red-100 text-red-800 border-red-300' },
{ key: 'AI Act', label: 'AI Act', activeColor: 'bg-orange-100 text-orange-800 border-orange-300' },
{ key: 'CRA', label: 'CRA', activeColor: 'bg-yellow-100 text-yellow-800 border-yellow-300' },
{ key: 'NIS2', label: 'NIS2', activeColor: 'bg-indigo-100 text-indigo-800 border-indigo-300' },
{ key: 'Data Act', label: 'Data Act', activeColor: 'bg-amber-100 text-amber-800 border-amber-300' },
]
function WarningIcon({ className }: { className?: string }) {
return (
<svg className={className} fill="none" viewBox="0 0 24 24" stroke="currentColor">
<path strokeLinecap="round" strokeLinejoin="round" strokeWidth={2}
d="M12 9v2m0 4h.01m-6.938 4h13.856c1.54 0 2.502-1.667 1.732-2.5L13.732 4.5c-.77-.833-2.694-.833-3.464 0L3.34 16.5c-.77.833.192 2.5 1.732 2.5z" />
</svg>
)
}
function ChevronIcon({ open }: { open: boolean }) {
return (
<svg className={`w-4 h-4 text-gray-400 transition-transform ${open ? 'rotate-180' : ''}`}
fill="none" viewBox="0 0 24 24" stroke="currentColor">
<path strokeLinecap="round" strokeLinejoin="round" strokeWidth={2} d="M19 9l-7 7-7-7" />
</svg>
)
}
export function ComplianceAlerts({ projectId }: { projectId: string }) {
const [data, setData] = useState<TriggersResponse | null>(null)
const [loading, setLoading] = useState(true)
const [collapsed, setCollapsed] = useState(false)
const [expandedIds, setExpandedIds] = useState<Set<string>>(new Set())
useEffect(() => {
fetch(`/api/sdk/v1/iace/projects/${projectId}/compliance-triggers`)
.then((r) => (r.ok ? r.json() : null))
.then((json) => {
if (json?.triggers) setData(json)
else if (Array.isArray(json)) setData({ triggers: json, total: json.length })
})
.catch(() => {})
.finally(() => setLoading(false))
}, [projectId])
if (loading) return null
if (!data || data.triggers.length === 0) return null
const triggers = data.triggers
const activeRegulations = new Set(triggers.map((t) => t.regulation))
function toggleExpanded(id: string) {
setExpandedIds((prev) => {
const next = new Set(prev)
if (next.has(id)) next.delete(id)
else next.add(id)
return next
})
}
return (
<div className="bg-white dark:bg-gray-800 rounded-xl border border-red-200 dark:border-red-800">
{/* Header */}
<button
onClick={() => setCollapsed(!collapsed)}
className="w-full flex items-center justify-between p-6 text-left"
>
<div className="flex items-center gap-3">
<div className="w-10 h-10 bg-red-50 dark:bg-red-900/30 rounded-lg flex items-center justify-center">
<WarningIcon className="w-5 h-5 text-red-600" />
</div>
<div>
<h2 className="text-sm font-semibold text-gray-900 dark:text-white">
{triggers.length} Compliance-Hinweise erkannt
</h2>
<p className="text-xs text-gray-500">
Basierend auf den identifizierten Gefaehrdungen bestehen rechtliche Implikationen
</p>
</div>
</div>
<div className="w-8 h-8 flex items-center justify-center rounded-full hover:bg-gray-100 dark:hover:bg-gray-700 transition-colors flex-shrink-0">
<ChevronIcon open={!collapsed} />
</div>
</button>
{!collapsed && (
<div className="px-6 pb-6 space-y-4">
{/* Regulation summary badges */}
<div className="flex flex-wrap gap-2">
{REGULATION_BADGES.map((reg) => {
const active = activeRegulations.has(reg.key)
return (
<span
key={reg.key}
className={`px-2.5 py-1 text-xs font-medium rounded-full border ${
active
? reg.activeColor
: 'bg-gray-50 text-gray-400 border-gray-200 dark:bg-gray-700 dark:text-gray-500 dark:border-gray-600'
}`}
>
{reg.label}
</span>
)
})}
</div>
{/* Trigger list */}
<div className="space-y-2">
{triggers.map((trigger) => {
const sev = SEVERITY_CONFIG[trigger.severity] || SEVERITY_CONFIG.low
const isOpen = expandedIds.has(trigger.id)
return (
<div key={trigger.id} className={`rounded-lg border ${sev.border} ${sev.bg} overflow-hidden`}>
{/* Trigger header row */}
<button
onClick={() => toggleExpanded(trigger.id)}
className="w-full flex items-center gap-3 px-4 py-3 text-left"
>
<ChevronIcon open={isOpen} />
<div className="flex-1 min-w-0">
<span className="text-sm font-medium text-gray-900 dark:text-white">
{trigger.regulation} {trigger.article} {trigger.title}
</span>
</div>
<span className={`px-2 py-0.5 text-xs font-bold rounded ${sev.badge}`}>
{SEVERITY_LABELS[trigger.severity] || trigger.severity}
</span>
</button>
{/* Expanded detail */}
{isOpen && (
<div className="px-4 pb-4 pt-0 ml-7 space-y-2">
<p className="text-xs text-gray-700 dark:text-gray-300">
<span className="font-medium">Grund:</span> {trigger.reason}
</p>
{trigger.affected_hazard_count != null && trigger.affected_hazard_count > 0 && (
<p className="text-xs text-gray-500">
Betroffene Gefaehrdungen: {trigger.affected_hazard_count}
</p>
)}
<Link
href={trigger.module_path}
className={`inline-flex items-center gap-1.5 text-xs font-medium ${sev.text} hover:underline`}
>
{trigger.module_label} oeffnen
<svg className="w-3.5 h-3.5" fill="none" viewBox="0 0 24 24" stroke="currentColor">
<path strokeLinecap="round" strokeLinejoin="round" strokeWidth={2} d="M13 7l5 5m0 0l-5 5m5-5H6" />
</svg>
</Link>
</div>
)}
</div>
)
})}
</div>
{/* Disclaimer */}
<div className="p-3 rounded-lg bg-amber-50 dark:bg-amber-900/20 border border-amber-200 dark:border-amber-800 text-xs text-amber-800 dark:text-amber-300">
<strong>Hinweis:</strong> Diese Compliance-Hinweise werden automatisch aus den
Gefaehrdungen und Klassifikationen abgeleitet. Der CE-Fachmann muss die
regulatorischen Anforderungen im jeweiligen Modul verifizieren.
</div>
</div>
)}
</div>
)
}
@@ -4,6 +4,7 @@ import React, { useState, useEffect } from 'react'
import Link from 'next/link'
import { useParams } from 'next/navigation'
import { SuggestedNorms } from './_components/SuggestedNorms'
import { ComplianceAlerts } from './_components/ComplianceAlerts'
interface ProjectOverview {
id: string
@@ -349,6 +350,9 @@ export default function ProjectOverviewPage() {
</div>
</div>
{/* Compliance Alerts */}
<ComplianceAlerts projectId={projectId} />
{/* Suggested Norms */}
<SuggestedNorms projectId={projectId} />
@@ -0,0 +1,86 @@
package handlers
import (
"net/http"
"github.com/breakpilot/ai-compliance-sdk/internal/iace"
"github.com/gin-gonic/gin"
"github.com/google/uuid"
)
// ============================================================================
// CE x Compliance Crossover Engine
// ============================================================================
// GetComplianceTriggers handles GET /projects/:id/compliance-triggers.
// It analyses the project's hazards and component patterns to determine
// which DSGVO, AI Act, CRA, NIS2, and EU Data Act obligations are triggered.
// The response includes deduplicated triggers sorted by severity, plus boolean
// summary flags (dsfa_required, ai_act_relevant, cra_relevant, etc.).
func (h *IACEHandler) GetComplianceTriggers(c *gin.Context) {
projectID, err := uuid.Parse(c.Param("id"))
if err != nil {
c.JSON(http.StatusBadRequest, gin.H{"error": "invalid project ID"})
return
}
// Verify project exists
project, err := h.store.GetProject(c.Request.Context(), projectID)
if err != nil {
c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
return
}
if project == nil {
c.JSON(http.StatusNotFound, gin.H{"error": "project not found"})
return
}
// Fetch all hazards for this project
hazards, err := h.store.ListHazards(c.Request.Context(), projectID)
if err != nil {
c.JSON(http.StatusInternalServerError, gin.H{"error": "failed to load hazards: " + err.Error()})
return
}
// Also run pattern matching with component tags to catch tag-based triggers.
// Collect tags from the project's components (reuse the norms handler logic).
componentTags := collectComponentTags(h, c, projectID)
// Get all patterns from the pattern library
allPatterns := iace.AllPatterns()
// Additionally derive extra fired patterns by re-matching component tags
// against the pattern engine. This ensures patterns that are not yet
// applied as hazards still contribute their compliance triggers.
engine := iace.NewPatternEngine()
matchInput := iace.MatchInput{
CustomTags: componentTags,
}
matchResult := engine.Match(matchInput)
// Merge matched pattern IDs into a pseudo-hazard list so the crossover
// engine picks them up. We create lightweight Hazard structs with the
// pattern ID embedded in the Description field.
mergedHazards := make([]iace.Hazard, len(hazards))
copy(mergedHazards, hazards)
for _, pm := range matchResult.MatchedPatterns {
mergedHazards = append(mergedHazards, iace.Hazard{
Name: pm.PatternName,
Description: "Pattern " + pm.PatternID,
Category: firstOrEmpty(pm.HazardCats),
})
}
// Run the crossover engine
summary := iace.GetProjectComplianceTriggers(mergedHazards, allPatterns)
c.JSON(http.StatusOK, summary)
}
// firstOrEmpty returns the first element of a string slice or "".
func firstOrEmpty(ss []string) string {
if len(ss) > 0 {
return ss[0]
}
return ""
}
+3
View File
@@ -422,6 +422,9 @@ func registerIACERoutes(v1 *gin.RouterGroup, h *handlers.IACEHandler) {
iaceRoutes.GET("/production-lines/:lid/dashboard", h.GetProductionLineDashboard)
iaceRoutes.POST("/production-lines/:lid/stations", h.AddStationToLine)
iaceRoutes.DELETE("/production-lines/:lid/stations/:sid", h.RemoveStationFromLine)
// CE x Compliance Crossover
iaceRoutes.GET("/projects/:id/compliance-triggers", h.GetComplianceTriggers)
}
}
@@ -0,0 +1,254 @@
package iace
import "sort"
// GetProjectComplianceTriggers analyses a project's hazards and the full
// pattern library to determine which DSGVO/AI Act/CRA/NIS2/Data Act
// obligations are triggered. It returns a deduplicated, severity-sorted
// summary with boolean flags for each regulation family.
func GetProjectComplianceTriggers(hazards []Hazard, patterns []HazardPattern) *ComplianceTriggerSummary {
triggerMap := GetComplianceTriggerMap()
// Build set of pattern IDs present in the pattern library for quick lookup
patternByID := make(map[string]HazardPattern, len(patterns))
for _, p := range patterns {
patternByID[p.ID] = p
}
// Collect all fired pattern IDs from the project's hazards.
// Hazards created from pattern matching store the source pattern ID
// in their Description ("Pattern HPXXX") or Name field.
firedPatterns := make(map[string]bool)
for _, h := range hazards {
extractPatternIDs(h.Description, firedPatterns)
extractPatternIDs(h.Name, firedPatterns)
extractPatternIDs(h.Scenario, firedPatterns)
}
// Also check each pattern against the hazard categories present
// in the project — if a pattern generates a category that exists
// among the hazards, consider the pattern relevant.
hazardCats := make(map[string]bool)
for _, h := range hazards {
if h.Category != "" {
hazardCats[h.Category] = true
}
}
for _, p := range patterns {
for _, cat := range p.GeneratedHazardCats {
if hazardCats[cat] {
firedPatterns[p.ID] = true
break
}
}
}
// Collect tag-level information from hazard metadata
tags := collectHazardTags(hazards)
tagTriggers := GetTagBasedTriggers(tags)
// Gather all triggers from fired patterns
var results []TriggerResult
seenRegulation := make(map[string]bool)
for pid := range firedPatterns {
triggers, ok := triggerMap[pid]
if !ok {
continue
}
for _, t := range triggers {
key := t.Regulation + "|" + t.Module
if seenRegulation[key] {
continue
}
seenRegulation[key] = true
// Try to find a hazard name for context
hName := findHazardNameForPattern(pid, hazards)
results = append(results, TriggerResult{
HazardID: "",
HazardName: hName,
PatternID: pid,
Trigger: t,
})
}
}
// Append tag-based triggers (deduplicated against pattern triggers)
for _, t := range tagTriggers {
key := t.Regulation + "|" + t.Module
if seenRegulation[key] {
continue
}
seenRegulation[key] = true
results = append(results, TriggerResult{
HazardID: "",
HazardName: "Tag-basiert",
PatternID: "",
Trigger: t,
})
}
// Sort by severity: high > medium > low
sort.Slice(results, func(i, j int) bool {
return severityRank(results[i].Trigger.Severity) > severityRank(results[j].Trigger.Severity)
})
// Build boolean summary flags
summary := buildSummaryFlags(results)
return &ComplianceTriggerSummary{
Triggers: results,
Total: len(results),
Summary: summary,
}
}
// AllPatterns returns every hazard pattern from all pattern sources.
// This mirrors the aggregation in NewPatternEngine but returns just the slice.
func AllPatterns() []HazardPattern {
p := GetBuiltinHazardPatterns()
p = append(p, GetExtendedHazardPatterns()...)
p = append(p, GetPressHazardPatterns()...)
p = append(p, GetCobotHazardPatterns()...)
p = append(p, GetOperationalHazardPatterns()...)
p = append(p, GetDGUVExtendedPatterns()...)
p = append(p, GetExtendedHazardPatterns2()...)
p = append(p, GetElevatorPatterns()...)
p = append(p, GetAGVAgriPatterns()...)
p = append(p, GetFoodProcessingPatterns()...)
p = append(p, GetPackagingPatterns()...)
p = append(p, GetLaserPatterns()...)
p = append(p, GetMedicalDevicePatterns()...)
p = append(p, GetPressureEquipmentPatterns()...)
p = append(p, GetConstructionPatterns()...)
p = append(p, GetForestryConveyorPatterns()...)
p = append(p, GetPlasticsMetalPatterns()...)
p = append(p, GetWeldingGlassTextilePatterns()...)
p = append(p, GetSpecificMachinePatterns()...)
p = append(p, GetSpecificMachinePatterns2()...)
p = append(p, GetCyberExtendedPatterns()...)
p = append(p, GetCyberExtendedPatterns2()...)
p = append(p, GetCyberExtendedPatterns3()...)
p = append(p, GetWorkshopPatterns()...)
p = append(p, GetMaintenanceExtPatterns()...)
p = append(p, GetFinalPatternsA()...)
p = append(p, GetFinalPatternsB()...)
p = append(p, GetFinalPatternsC()...)
p = append(p, GetFinalPatternsD()...)
return p
}
// extractPatternIDs scans a text for "HP" followed by digits and adds
// any found pattern IDs to the set.
func extractPatternIDs(text string, set map[string]bool) {
for i := 0; i < len(text)-2; i++ {
if text[i] == 'H' && text[i+1] == 'P' && i+2 < len(text) && text[i+2] >= '0' && text[i+2] <= '9' {
end := i + 2
for end < len(text) && text[end] >= '0' && text[end] <= '9' {
end++
}
set[text[i:end]] = true
}
}
}
// findHazardNameForPattern returns the name of the first hazard whose
// description/name/scenario mentions the given pattern ID.
func findHazardNameForPattern(pid string, hazards []Hazard) string {
for _, h := range hazards {
if containsPatternID(h.Description, pid) || containsPatternID(h.Name, pid) || containsPatternID(h.Scenario, pid) {
return h.Name
}
}
if len(hazards) > 0 {
return hazards[0].Name
}
return ""
}
// containsPatternID checks whether text contains the exact pattern ID token.
func containsPatternID(text, pid string) bool {
idx := 0
for idx <= len(text)-len(pid) {
if text[idx:idx+len(pid)] == pid {
// Ensure it is not a substring of a longer ID
after := idx + len(pid)
if after >= len(text) || text[after] < '0' || text[after] > '9' {
return true
}
}
idx++
}
return false
}
// collectHazardTags extracts tag-like signals from hazard fields.
func collectHazardTags(hazards []Hazard) []string {
tagSet := make(map[string]bool)
for _, h := range hazards {
// Infer tags from hazard category names
switch h.Category {
case "software", "steuerung", "steuerungsfehler":
tagSet["has_software"] = true
tagSet["programmable"] = true
case "cyber", "cybersicherheit", "netzwerk":
tagSet["is_networked"] = true
tagSet["has_software"] = true
case "ki", "kuenstliche_intelligenz", "ai_ml":
tagSet["has_ai"] = true
tagSet["has_software"] = true
case "sensorik", "sensor":
tagSet["sensor_part"] = true
}
}
tags := make([]string, 0, len(tagSet))
for t := range tagSet {
tags = append(tags, t)
}
return tags
}
// severityRank maps severity strings to sort-order integers.
func severityRank(s string) int {
switch s {
case "high":
return 3
case "medium":
return 2
case "low":
return 1
default:
return 0
}
}
// buildSummaryFlags derives boolean flags from the collected trigger results.
func buildSummaryFlags(results []TriggerResult) map[string]bool {
summary := map[string]bool{
"dsfa_required": false,
"ai_act_relevant": false,
"cra_relevant": false,
"nis2_relevant": false,
"data_act_relevant": false,
}
for _, r := range results {
reg := r.Trigger.Regulation
if len(reg) >= 4 && reg[:4] == "DSGV" {
summary["dsfa_required"] = true
}
if len(reg) >= 6 && reg[:6] == "AI Act" {
summary["ai_act_relevant"] = true
}
if len(reg) >= 3 && reg[:3] == "CRA" {
summary["cra_relevant"] = true
}
if len(reg) >= 4 && reg[:4] == "NIS2" {
summary["nis2_relevant"] = true
}
if len(reg) >= 11 && reg[:11] == "EU Data Act" {
summary["data_act_relevant"] = true
}
}
return summary
}
@@ -0,0 +1,439 @@
package iace
// ComplianceTrigger maps a CE hazard pattern to a regulatory requirement.
// When a pattern fires for a project, the corresponding triggers tell
// the user which DSGVO/AI Act/CRA/NIS2/Data Act obligations apply and
// which SDK module they should visit.
type ComplianceTrigger struct {
Regulation string `json:"regulation"` // e.g. "DSGVO Art. 35"
TriggerCondDE string `json:"trigger_cond_de"` // Why this triggers (German)
Severity string `json:"severity"` // "high", "medium", "low"
Module string `json:"module"` // SDK module key
ModuleLink string `json:"module_link"` // Frontend route
ActionDE string `json:"action_de"` // Recommended action (German)
RAGQuery string `json:"rag_query"` // Search query for RAG enrichment
}
// TriggerResult pairs a fired pattern with one of its compliance triggers.
type TriggerResult struct {
HazardID string `json:"hazard_id"`
HazardName string `json:"hazard_name"`
PatternID string `json:"pattern_id"`
Trigger ComplianceTrigger `json:"trigger"`
}
// ComplianceTriggerSummary is the top-level response for the crossover engine.
type ComplianceTriggerSummary struct {
Triggers []TriggerResult `json:"triggers"`
Total int `json:"total"`
Summary map[string]bool `json:"summary"` // dsfa_required, ai_act_relevant, etc.
}
// GetComplianceTriggerMap returns pattern-ID-keyed compliance triggers.
// Each entry lists the regulatory obligations that a fired pattern implies.
func GetComplianceTriggerMap() map[string][]ComplianceTrigger {
m := make(map[string][]ComplianceTrigger)
// --- Cobot / camera / biometric patterns ---
m["HP059"] = []ComplianceTrigger{
{
Regulation: "DSGVO Art. 35",
TriggerCondDE: "Kamera-Personenerkennung verarbeitet biometrische Daten",
Severity: "high",
Module: "dsfa",
ModuleLink: "/sdk/dsfa",
ActionDE: "Datenschutz-Folgenabschaetzung fuer Kamera-System durchfuehren",
RAGQuery: "DSFA biometrische Daten Kameraerkennung",
},
{
Regulation: "AI Act Art. 6",
TriggerCondDE: "Autonome Sicherheitsentscheidung durch KI-System",
Severity: "high",
Module: "ai-act",
ModuleLink: "/sdk/ai-act",
ActionDE: "Hochrisiko-KI-Einstufung pruefen und dokumentieren",
RAGQuery: "AI Act Hochrisiko autonome Sicherheitsentscheidung",
},
}
m["HP060"] = []ComplianceTrigger{
{
Regulation: "DSGVO Art. 35",
TriggerCondDE: "Werkzeug-Tracking erfordert Personenerkennung",
Severity: "high",
Module: "dsfa",
ModuleLink: "/sdk/dsfa",
ActionDE: "DSFA fuer Werkzeug-Tracking mit Personenerkennung erstellen",
RAGQuery: "DSFA Personenerkennung Werkzeug-Tracking",
},
}
// --- AI/ML safety-critical patterns ---
m["HP040"] = []ComplianceTrigger{
{
Regulation: "AI Act Art. 6",
TriggerCondDE: "KI trifft sicherheitsrelevante Entscheidung",
Severity: "high",
Module: "ai-act",
ModuleLink: "/sdk/ai-act",
ActionDE: "Hochrisiko-Klassifizierung und Konformitaetsbewertung einleiten",
RAGQuery: "AI Act Art 6 Hochrisiko Sicherheitsentscheidung",
},
{
Regulation: "AI Act Art. 9",
TriggerCondDE: "Risikomanagement fuer Hochrisiko-KI erforderlich",
Severity: "high",
Module: "ai-act",
ModuleLink: "/sdk/ai-act",
ActionDE: "Risikomanagementsystem nach Art. 9 AI Act aufsetzen",
RAGQuery: "AI Act Art 9 Risikomanagementsystem Hochrisiko",
},
}
m["HP041"] = []ComplianceTrigger{
{
Regulation: "AI Act Art. 14",
TriggerCondDE: "Menschliche Aufsicht ueber KI-System erforderlich",
Severity: "high",
Module: "ai-act",
ModuleLink: "/sdk/ai-act",
ActionDE: "Human-Oversight-Mechanismus implementieren und dokumentieren",
RAGQuery: "AI Act Art 14 menschliche Aufsicht Human Oversight",
},
}
m["HP042"] = []ComplianceTrigger{
{
Regulation: "AI Act Art. 6",
TriggerCondDE: "Bias in sicherheitsrelevanter KI moeglich",
Severity: "high",
Module: "ai-act",
ModuleLink: "/sdk/ai-act",
ActionDE: "Bias-Analyse und Datenqualitaetspruefung durchfuehren",
RAGQuery: "AI Act Bias Diskriminierung Sicherheits-KI",
},
}
m["HP043"] = []ComplianceTrigger{
{
Regulation: "AI Act Art. 11",
TriggerCondDE: "Technische Dokumentation fuer KI-System erforderlich",
Severity: "medium",
Module: "ai-act",
ModuleLink: "/sdk/ai-act",
ActionDE: "Technische Dokumentation nach Anhang IV AI Act erstellen",
RAGQuery: "AI Act Art 11 technische Dokumentation Anhang IV",
},
}
m["HP044"] = []ComplianceTrigger{
{
Regulation: "AI Act Art. 13",
TriggerCondDE: "Transparenz-Anforderungen fuer KI-System",
Severity: "medium",
Module: "ai-act",
ModuleLink: "/sdk/ai-act",
ActionDE: "Transparenzhinweise und Nutzerdokumentation bereitstellen",
RAGQuery: "AI Act Art 13 Transparenz KI Nutzerinformation",
},
}
// --- Cyber Resilience Act (software/firmware) ---
m["HP033"] = []ComplianceTrigger{
{
Regulation: "CRA Art. 10",
TriggerCondDE: "Schwachstellenmanagement fuer Software-Komponente",
Severity: "high",
Module: "cyber",
ModuleLink: "/sdk/security-backlog",
ActionDE: "Vulnerability-Management-Prozess nach CRA einrichten",
RAGQuery: "CRA Art 10 Schwachstellenmanagement Software",
},
{
Regulation: "CRA Art. 13",
TriggerCondDE: "Sicherheitsupdates muessen bereitgestellt werden",
Severity: "medium",
Module: "cyber",
ModuleLink: "/sdk/security-backlog",
ActionDE: "Update-Strategie und Patch-Management dokumentieren",
RAGQuery: "CRA Art 13 Sicherheitsupdates Patch-Management",
},
}
m["HP158"] = []ComplianceTrigger{
{
Regulation: "CRA Art. 10",
TriggerCondDE: "Schwachstelle in Firmware erfordert Vulnerability-Handling",
Severity: "high",
Module: "cyber",
ModuleLink: "/sdk/security-backlog",
ActionDE: "Schwachstellenmeldung und Patch-Prozess nach CRA etablieren",
RAGQuery: "CRA Art 10 Firmware Schwachstelle Meldepflicht",
},
{
Regulation: "CRA Art. 11",
TriggerCondDE: "Meldepflicht bei bekannter Schwachstelle",
Severity: "high",
Module: "cyber",
ModuleLink: "/sdk/security-backlog",
ActionDE: "Meldeprozess an ENISA/BSI fuer Schwachstellen einrichten",
RAGQuery: "CRA Art 11 Meldepflicht ENISA Schwachstelle",
},
}
m["HP159"] = []ComplianceTrigger{
{
Regulation: "CRA Art. 10",
TriggerCondDE: "Datenintegritaet der Software muss sichergestellt sein",
Severity: "medium",
Module: "cyber",
ModuleLink: "/sdk/security-backlog",
ActionDE: "Integritaetsschutz fuer Software-Artefakte implementieren",
RAGQuery: "CRA Art 10 Datenintegritaet Software Signierung",
},
}
m["HP160"] = []ComplianceTrigger{
{
Regulation: "NIS2 Art. 21",
TriggerCondDE: "Cybersicherheits-Risikomanagement erforderlich",
Severity: "high",
Module: "cyber",
ModuleLink: "/sdk/security-backlog",
ActionDE: "Cybersicherheits-Risikomanagement nach NIS2 Art. 21 aufsetzen",
RAGQuery: "NIS2 Art 21 Cybersicherheit Risikomanagement",
},
{
Regulation: "CRA Anhang I",
TriggerCondDE: "Wesentliche Cybersicherheits-Anforderungen nach CRA",
Severity: "high",
Module: "cyber",
ModuleLink: "/sdk/security-backlog",
ActionDE: "CRA Anhang I Checkliste fuer Produkt-Cybersicherheit abarbeiten",
RAGQuery: "CRA Anhang I wesentliche Anforderungen Cybersicherheit",
},
}
// --- Logging/monitoring patterns ---
m["HP131"] = []ComplianceTrigger{
{
Regulation: "DSGVO Art. 6",
TriggerCondDE: "Rechtsgrundlage fuer Protokollierung personenbez. Daten",
Severity: "medium",
Module: "dsfa",
ModuleLink: "/sdk/dsfa",
ActionDE: "Rechtsgrundlage fuer Protokollierung pruefen und dokumentieren",
RAGQuery: "DSGVO Art 6 Rechtsgrundlage Protokollierung Logging",
},
}
// --- AGV / movement profile patterns (HP199-HP213) ---
agvIDs := genPatternRange("HP", 199, 213)
for _, pid := range agvIDs {
m[pid] = []ComplianceTrigger{
{
Regulation: "DSGVO Art. 35",
TriggerCondDE: "AGV-Bewegungsprofile koennen Rueckschluesse auf Personen erlauben",
Severity: "high",
Module: "dsfa",
ModuleLink: "/sdk/dsfa",
ActionDE: "DSFA fuer AGV-Bewegungsdaten erstellen",
RAGQuery: "DSFA Bewegungsprofile AGV Personenbezug",
},
{
Regulation: "EU Data Act Art. 3",
TriggerCondDE: "Maschinendaten-Zugangsrecht fuer Nutzer nach Data Act",
Severity: "medium",
Module: "vendor-compliance",
ModuleLink: "/sdk/vendor-compliance",
ActionDE: "Datenzugangsrechte nach EU Data Act fuer Maschinendaten pruefen",
RAGQuery: "EU Data Act Art 3 Maschinendaten Zugangsrecht",
},
}
}
// --- Cyber-security patterns HP800-HP814 ---
cyberIDs1 := genPatternRange("HP", 800, 814)
for _, pid := range cyberIDs1 {
m[pid] = []ComplianceTrigger{
{
Regulation: "NIS2 Art. 21",
TriggerCondDE: "Cybersicherheits-Risikomanagement fuer vernetzte Komponente",
Severity: "high",
Module: "cyber",
ModuleLink: "/sdk/security-backlog",
ActionDE: "NIS2 Cybersicherheits-Massnahmen pruefen und dokumentieren",
RAGQuery: "NIS2 Art 21 Cybersicherheit vernetzte Maschine",
},
{
Regulation: "CRA Art. 10",
TriggerCondDE: "Schwachstellenmanagement fuer vernetzte Komponente",
Severity: "high",
Module: "cyber",
ModuleLink: "/sdk/security-backlog",
ActionDE: "CRA-konforme Schwachstellenbehandlung einrichten",
RAGQuery: "CRA Art 10 Schwachstellenmanagement vernetzte Maschine",
},
}
}
// --- Cyber-security patterns HP815-HP829 ---
cyberIDs2 := genPatternRange("HP", 815, 829)
for _, pid := range cyberIDs2 {
m[pid] = []ComplianceTrigger{
{
Regulation: "NIS2 Art. 21",
TriggerCondDE: "Netzwerk-Sicherheitsmassnahmen nach NIS2",
Severity: "high",
Module: "cyber",
ModuleLink: "/sdk/security-backlog",
ActionDE: "NIS2-Sicherheitskonzept fuer Netzwerkkomponenten erstellen",
RAGQuery: "NIS2 Art 21 Netzwerk Sicherheit Massnahmen",
},
{
Regulation: "CRA Art. 10",
TriggerCondDE: "CRA-Anforderungen fuer Software mit Netzwerkzugang",
Severity: "medium",
Module: "cyber",
ModuleLink: "/sdk/security-backlog",
ActionDE: "CRA-Konformitaet fuer Netzwerk-Software sicherstellen",
RAGQuery: "CRA Software Netzwerkzugang Sicherheitsanforderungen",
},
}
}
// --- AI/ML-specific cyber patterns HP830-HP844 ---
aiCyberIDs := genPatternRange("HP", 830, 844)
for _, pid := range aiCyberIDs {
m[pid] = []ComplianceTrigger{
{
Regulation: "AI Act Art. 6",
TriggerCondDE: "KI/ML-System in sicherheitsrelevantem Kontext",
Severity: "high",
Module: "ai-act",
ModuleLink: "/sdk/ai-act",
ActionDE: "Hochrisiko-Einstufung und AI-Act-Konformitaet pruefen",
RAGQuery: "AI Act Hochrisiko KI ML sicherheitsrelevant",
},
{
Regulation: "DSGVO Art. 22",
TriggerCondDE: "Automatisierte Entscheidungsfindung durch KI moeglich",
Severity: "high",
Module: "dsfa",
ModuleLink: "/sdk/dsfa",
ActionDE: "Automatisierte Einzelentscheidung nach Art. 22 DSGVO pruefen",
RAGQuery: "DSGVO Art 22 automatisierte Entscheidung KI Profiling",
},
}
}
// --- NIS2 network/HMI patterns HP845-HP864 ---
nis2IDs := genPatternRange("HP", 845, 864)
for _, pid := range nis2IDs {
m[pid] = []ComplianceTrigger{
{
Regulation: "NIS2 Art. 21",
TriggerCondDE: "Netzwerk-/HMI-Komponente erfordert NIS2-Massnahmen",
Severity: "high",
Module: "cyber",
ModuleLink: "/sdk/security-backlog",
ActionDE: "NIS2-Sicherheitsanforderungen fuer HMI/Netzwerk umsetzen",
RAGQuery: "NIS2 Art 21 HMI Netzwerk Sicherheit",
},
}
}
return m
}
// GetTagBasedTriggers returns compliance triggers that fire based on
// component tag combinations rather than specific pattern IDs.
func GetTagBasedTriggers(tags []string) []ComplianceTrigger {
tagSet := make(map[string]bool, len(tags))
for _, t := range tags {
tagSet[t] = true
}
var triggers []ComplianceTrigger
// has_software + programmable → CRA Art. 10
if tagSet["has_software"] && tagSet["programmable"] {
triggers = append(triggers, ComplianceTrigger{
Regulation: "CRA Art. 10",
TriggerCondDE: "Programmierbare Software-Komponente erfordert CRA-Konformitaet",
Severity: "medium",
Module: "cyber",
ModuleLink: "/sdk/security-backlog",
ActionDE: "CRA-Anforderungen fuer programmierbare Software pruefen",
RAGQuery: "CRA Art 10 programmierbare Software Sicherheit",
})
}
// sensor_part + has_software → EU Data Act Art. 3
if tagSet["sensor_part"] && tagSet["has_software"] {
triggers = append(triggers, ComplianceTrigger{
Regulation: "EU Data Act Art. 3",
TriggerCondDE: "Sensor mit Software erzeugt Maschinendaten — Zugangsrecht nach Data Act",
Severity: "medium",
Module: "vendor-compliance",
ModuleLink: "/sdk/vendor-compliance",
ActionDE: "Datenzugangsrechte fuer Sensor-/Maschinendaten nach Data Act pruefen",
RAGQuery: "EU Data Act Art 3 Sensordaten Maschinendaten Zugang",
})
}
// has_ai → AI Act Art. 6 (generic)
if tagSet["has_ai"] {
triggers = append(triggers, ComplianceTrigger{
Regulation: "AI Act Art. 6",
TriggerCondDE: "KI-Komponente erkannt — Hochrisiko-Einstufung pruefen",
Severity: "high",
Module: "ai-act",
ModuleLink: "/sdk/ai-act",
ActionDE: "AI-Act-Klassifizierung fuer KI-Komponente durchfuehren",
RAGQuery: "AI Act Art 6 Klassifizierung KI-System Hochrisiko",
})
}
// is_networked → NIS2 Art. 21
if tagSet["is_networked"] {
triggers = append(triggers, ComplianceTrigger{
Regulation: "NIS2 Art. 21",
TriggerCondDE: "Vernetzte Komponente unterliegt NIS2-Sicherheitspflichten",
Severity: "medium",
Module: "cyber",
ModuleLink: "/sdk/security-backlog",
ActionDE: "NIS2-Anforderungen fuer vernetzte Infrastruktur bewerten",
RAGQuery: "NIS2 Art 21 vernetzte Infrastruktur Pflichten",
})
}
return triggers
}
// genPatternRange generates pattern IDs like "HP800", "HP801", ..., "HP814".
func genPatternRange(prefix string, from, to int) []string {
ids := make([]string, 0, to-from+1)
for i := from; i <= to; i++ {
ids = append(ids, prefix+padInt(i))
}
return ids
}
// padInt formats an integer with leading zeros to 3 digits minimum.
func padInt(n int) string {
if n < 10 {
return "00" + triggerItoa(n)
}
if n < 100 {
return "0" + triggerItoa(n)
}
return triggerItoa(n)
}
// triggerItoa converts a non-negative integer to a string without importing strconv.
func triggerItoa(n int) string {
if n == 0 {
return "0"
}
var buf [20]byte
pos := len(buf)
for n > 0 {
pos--
buf[pos] = byte('0' + n%10)
n /= 10
}
return string(buf[pos:])
}
@@ -0,0 +1,175 @@
"""
HTML email report builder for document checks.
Generates a styled HTML report similar to the frontend ChecklistView,
including L1/L2 check hierarchy, progress bars, and actionable hints.
"""
from __future__ import annotations
from typing import TYPE_CHECKING
if TYPE_CHECKING:
from .agent_doc_check_routes import CheckItem, DocCheckResult
def _bar(pct: int, color: str) -> str:
bg = {"green": "#22c55e", "yellow": "#eab308", "red": "#ef4444", "blue": "#60a5fa"}
c = bg.get(color, "#60a5fa")
return (
f'<div style="display:inline-block;width:120px;height:8px;background:#e5e7eb;'
f'border-radius:4px;overflow:hidden;vertical-align:middle;margin-right:8px">'
f'<div style="width:{pct}%;height:100%;background:{c};border-radius:4px"></div>'
f'</div><span style="font-size:13px;font-weight:600;color:{c}">{pct}%</span>'
)
def _icon(passed: bool, skipped: bool = False) -> str:
if skipped:
return '<span style="color:#d1d5db">&mdash;</span>'
if passed:
return '<span style="color:#22c55e;font-weight:bold">&#10003;</span>'
return '<span style="color:#ef4444;font-weight:bold">&#10007;</span>'
def _hint_box(hint: str) -> str:
return (
f'<div style="font-size:11px;color:#dc2626;margin:2px 0 4px 20px;'
f'padding:4px 8px;background:#fef2f2;border-radius:4px;'
f'border-left:3px solid #fca5a5">{hint}</div>'
)
def build_html_report(
results: list[DocCheckResult],
cookie_result: dict | None,
) -> str:
"""Build HTML email report styled like the frontend."""
ok_count = sum(1 for r in results if r.completeness_pct == 100)
html = [
'<div style="font-family:-apple-system,BlinkMacSystemFont,sans-serif;'
'max-width:700px;margin:0 auto">',
'<h2 style="margin-bottom:4px">Dokumenten-Pruefung</h2>',
f'<p style="color:#6b7280;margin-top:0">'
f'{len(results)} Dokumente, {ok_count} vollstaendig</p>',
]
for r in results:
_render_document(html, r)
if cookie_result:
_render_cookie_banner(html, cookie_result)
html.append('</div>')
return "\n".join(html)
def _render_document(html: list[str], r: DocCheckResult) -> None:
pct = r.completeness_pct
cpct = r.correctness_pct
bar_color = "green" if pct >= 80 else "yellow" if pct >= 50 else "red"
status_label = "OK" if pct == 100 else "LUECKENHAFT" if pct >= 50 else "MANGELHAFT"
if r.error:
status_label = "FEHLER"
l1_checks = [c for c in r.checks if c.level == 1]
l2_by_parent: dict[str, list[CheckItem]] = {}
for c in r.checks:
if c.level == 2 and c.parent:
l2_by_parent.setdefault(c.parent, []).append(c)
l1_passed = sum(1 for c in l1_checks if c.passed)
l2_active = [c for c in r.checks if c.level == 2 and not c.skipped]
l2_passed = sum(1 for c in l2_active if c.passed)
# Header
html.append(
f'<div style="border:1px solid #e5e7eb;border-radius:8px;margin-bottom:12px;overflow:hidden">'
f'<div style="padding:12px 16px;background:#f9fafb">'
f'<div style="display:flex;justify-content:space-between;align-items:center"><div>'
f'<span style="font-size:11px;background:#f3f4f6;padding:2px 8px;border-radius:4px;'
f'color:#4b5563;font-weight:500;margin-right:8px">{status_label}</span>'
f'<strong style="font-size:14px">{r.label}</strong>'
f'<div style="font-size:12px;color:#6b7280;margin-top:2px">'
f'{l1_passed}/{len(l1_checks)} Pflichtangaben'
)
if l2_active:
html.append(f', {l2_passed}/{len(l2_active)} Detailpruefungen')
html.append(f'</div></div><div style="text-align:right">{_bar(pct, bar_color)}')
if cpct and l2_active:
html.append(f'<br>{_bar(cpct, "blue")}')
html.append('</div></div></div>')
# Body
if r.error:
html.append(f'<div style="padding:12px 16px;color:#991b1b">{r.error}</div>')
else:
html.append('<div style="padding:8px 16px 12px">')
for c in l1_checks:
_render_l1_check(html, c, l2_by_parent.get(c.id, []))
if r.word_count:
html.append(
f'<div style="font-size:11px;color:#9ca3af;margin-top:8px;'
f'padding-top:8px;border-top:1px solid #e5e7eb">'
f'{r.word_count} Woerter analysiert</div>'
)
html.append('</div>')
html.append('</div>')
def _render_l1_check(
html: list[str], c: CheckItem, children: list[CheckItem],
) -> None:
l2_sub = [ch for ch in children if not ch.skipped]
l2_passed = sum(1 for ch in l2_sub if ch.passed)
style = "color:#991b1b;font-weight:600" if not c.passed else "color:#374151"
html.append(
f'<div style="padding:3px 0">{_icon(c.passed)} '
f'<span style="font-size:13px;{style}">{c.label}</span>'
)
if l2_sub:
html.append(f' <span style="color:#9ca3af;font-size:11px">({l2_passed}/{len(l2_sub)})</span>')
if not c.passed and c.hint:
html.append(_hint_box(c.hint))
html.append('</div>')
for ch in children:
if ch.skipped:
continue
_render_l2_check(html, ch)
def _render_l2_check(html: list[str], ch: CheckItem) -> None:
style = "color:#dc2626;font-weight:500" if not ch.passed else "color:#6b7280"
html.append(
f'<div style="padding:2px 0 2px 24px;border-left:2px solid #e5e7eb;margin-left:8px">'
f'{_icon(ch.passed)} '
f'<span style="font-size:12px;{style}">{ch.label}</span>'
)
if ch.passed and ch.matched_text:
html.append(
f'<div style="font-size:10px;color:#9ca3af;font-family:monospace;'
f'margin-left:20px;overflow:hidden;text-overflow:ellipsis;'
f'white-space:nowrap">"...{ch.matched_text[:80]}..."</div>'
)
if not ch.passed and ch.hint:
html.append(_hint_box(ch.hint))
html.append('</div>')
def _render_cookie_banner(html: list[str], cookie_result: dict) -> None:
html.append(
'<div style="border:1px solid #e5e7eb;border-radius:8px;'
'padding:12px 16px;margin-bottom:12px">'
'<strong>Cookie-Banner Pruefung</strong><br>'
f'Banner erkannt: {cookie_result.get("banner_detected", False)}<br>'
f'Anbieter: {cookie_result.get("banner_provider", "unbekannt")}'
)
violations = cookie_result.get("banner_checks", {}).get("violations", [])
if violations:
for v in violations[:10]:
html.append(f'<br>{_icon(False)} {v.get("text", "")[:80]}')
else:
html.append('<br><span style="color:#22c55e">Keine Verstoesse erkannt.</span>')
html.append('</div>')
@@ -141,7 +141,7 @@ async def _run_doc_check(check_id: str, req: DocCheckRequest):
email_result = send_email(
recipient=req.recipient,
subject=f"[DOKUMENTEN-PRUEFUNG] {len(results)} Dokumente geprueft",
body_html=f"<pre>{summary}</pre>",
body_html=summary,
)
response = DocCheckResponse(
@@ -284,40 +284,49 @@ def _split_into_sections(text: str, parent_label: str, url: str) -> list[dict]:
Detects sections like 'Cookies', 'Social Media', 'Dienste von Drittanbietern'
and classifies each by document type for separate checking.
Deduplicates: if the same doc_type appears twice, texts are merged.
"""
import re as _re
sections = []
sections: list[dict] = []
seen_types: dict[str, int] = {} # doc_type -> index in sections
# Split by lines that look like headings (short, followed by longer content)
lines = text.split("\n")
current_heading = ""
current_text = []
current_text: list[str] = []
def _save_section(heading: str, text_lines: list[str]) -> None:
sec_text = "\n".join(text_lines)
if len(sec_text.split()) < 100:
return
sec_type = _classify_section(heading)
if not sec_type:
return
# Merge duplicate doc_types (e.g. two "Social Media" headings)
if sec_type in seen_types:
idx = seen_types[sec_type]
sections[idx]["text"] += "\n\n" + sec_text
sections[idx]["word_count"] = len(sections[idx]["text"].split())
else:
seen_types[sec_type] = len(sections)
sections.append({
"title": f"{parent_label} > {heading}",
"text": sec_text,
"doc_type": sec_type,
"word_count": len(sec_text.split()),
})
for line in lines:
stripped = line.strip()
# Detect heading: short line (< 80 chars), not empty, followed by content
is_heading = (
5 < len(stripped) < 80
and not stripped.endswith(".")
and not stripped.endswith(",")
and stripped[0].isupper()
)
# Skip-headings should NOT start a new section — their text
# belongs to the previous section (e.g. "Risikoabwägung" inside DSFA)
is_skip = is_heading and stripped.lower().strip() in SKIP_HEADINGS
if is_heading and not is_skip and current_heading and len("\n".join(current_text)) > 200:
# Save previous section
sec_text = "\n".join(current_text)
sec_type = _classify_section(current_heading)
if sec_type and sec_type != "skip":
sections.append({
"title": f"{parent_label} > {current_heading}",
"text": sec_text,
"doc_type": sec_type,
"word_count": len(sec_text.split()),
})
if is_heading and not is_skip and current_heading:
_save_section(current_heading, current_text)
if is_heading and not is_skip:
current_heading = stripped
@@ -326,16 +335,8 @@ def _split_into_sections(text: str, parent_label: str, url: str) -> list[dict]:
current_text.append(line)
# Last section
if current_heading and len("\n".join(current_text)) > 200:
sec_text = "\n".join(current_text)
sec_type = _classify_section(current_heading)
if sec_type and sec_type != "skip":
sections.append({
"title": f"{parent_label} > {current_heading}",
"text": sec_text,
"doc_type": sec_type,
"word_count": len(sec_text.split()),
})
if current_heading:
_save_section(current_heading, current_text)
return sections
@@ -347,6 +348,10 @@ SKIP_HEADINGS = {
"risikoabwaegung und datenschutzfolgenabschaetzung",
}
# Track already-seen section types to avoid duplicate sub-documents
# (e.g. two "Social Media" headings on the same page)
_DEDUP_TYPES = {"social_media", "cookie", "dsfa", "widerruf", "impressum"}
def _classify_section(heading: str) -> str | None:
"""Classify a section heading into a document type."""
@@ -377,41 +382,5 @@ async def _check_cookie_banner(url: str) -> dict | None:
def _build_report(results: list[DocCheckResult], cookie_result: dict | None) -> str:
"""Build email report."""
parts = [
"DOKUMENTEN-PRUEFUNG",
f"Dokumente geprueft: {len(results)}",
"",
]
for r in results:
status = "OK" if r.completeness_pct == 100 else "LUECKENHAFT" if r.completeness_pct >= 50 else "MANGELHAFT"
if r.error:
status = "FEHLER"
detail = f", Korrektheit {r.correctness_pct}%" if r.correctness_pct else ""
parts.append(f"[{status}] {r.label} ({r.completeness_pct}%{detail}, {r.word_count} Woerter)")
for check in r.checks:
if check.skipped:
continue
icon = "+" if check.passed else "!!"
indent = " " if check.level == 2 else " "
parts.append(f"{indent}[{icon}] {check.label}")
if r.error:
parts.append(f" FEHLER: {r.error}")
parts.append("")
if cookie_result:
parts.extend([
"Cookie-Banner Pruefung:",
f" Banner erkannt: {cookie_result.get('banner_detected', False)}",
f" Anbieter: {cookie_result.get('banner_provider', 'unbekannt')}",
])
violations = cookie_result.get("banner_checks", {}).get("violations", [])
if violations:
for v in violations[:10]:
parts.append(f" [!!] {v.get('text', '')[:80]}")
else:
parts.append(" Keine Verstoesse erkannt.")
return "\n".join(parts)
from .agent_doc_check_report import build_html_report
return build_html_report(results, cookie_result)