Compare commits

...

2 Commits

Author SHA1 Message Date
Benjamin Admin a3287cd5e6 feat: HTML email report with hints + fix duplicate Social Media sections
Build + Deploy / build-admin-compliance (push) Successful in 1m45s
Build + Deploy / build-backend-compliance (push) Successful in 9s
Build + Deploy / build-ai-sdk (push) Successful in 36s
Build + Deploy / build-developer-portal (push) Successful in 7s
Build + Deploy / build-tts (push) Successful in 7s
Build + Deploy / build-document-crawler (push) Successful in 8s
Build + Deploy / build-dsms-gateway (push) Successful in 7s
Build + Deploy / build-dsms-node (push) Successful in 8s
CI / branch-name (push) Has been skipped
CI / guardrail-integrity (push) Has been skipped
CI / loc-budget (push) Failing after 15s
CI / secret-scan (push) Has been skipped
CI / go-lint (push) Has been skipped
CI / python-lint (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / nodejs-build (push) Successful in 2m47s
CI / dep-audit (push) Has been skipped
CI / sbom-scan (push) Has been skipped
CI / test-go (push) Failing after 44s
CI / test-python-backend (push) Successful in 41s
CI / test-python-document-crawler (push) Successful in 26s
CI / test-python-dsms-gateway (push) Successful in 22s
CI / validate-canonical-controls (push) Successful in 15s
Build + Deploy / trigger-orca (push) Successful in 2m23s
1. Email report now renders as styled HTML (matching frontend design):
   - Progress bars (green=completeness, blue=correctness)
   - Hierarchical L1→L2 check display
   - Red hint boxes under failed checks explaining what to fix
   - Matched text evidence for passed checks

2. Section splitter deduplicates: two "Social Media" headings on the
   same page are merged into one section instead of creating duplicates.

3. Extracted report builder to agent_doc_check_report.py (175 LOC)
   to keep routes file under 500 LOC (386 LOC).

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-05-07 15:13:00 +02:00
Benjamin Admin 56892cf7dc feat: CE × Compliance Crossover Engine
Automatische Erkennung von DSGVO/AI Act/CRA/NIS2/Data Act
Implikationen bei CE-Gefaehrdungen. 50 Trigger-Mappings auf
Hazard-Patterns → Compliance-Module mit Modul-Links.

- compliance_triggers.go: 50 Pattern→Regulation Mappings
- compliance_crossover.go: Engine die Projekt-Hazards gegen Trigger prueft
- iace_handler_compliance.go: GET /compliance-triggers API
- ComplianceAlerts.tsx: Frontend Alert-Panel auf Projekt-Uebersicht

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-05-07 15:07:22 +02:00
8 changed files with 1215 additions and 67 deletions
@@ -0,0 +1,218 @@
'use client'
import { useState, useEffect } from 'react'
import Link from 'next/link'
interface ComplianceTrigger {
id: string
regulation: string
article: string
title: string
severity: 'high' | 'medium' | 'low'
reason: string
affected_hazard_count?: number
module_path: string
module_label: string
}
interface TriggersResponse {
triggers: ComplianceTrigger[]
total: number
}
const SEVERITY_CONFIG: Record<string, { border: string; bg: string; text: string; badge: string; icon: string }> = {
high: {
border: 'border-red-200 dark:border-red-800',
bg: 'bg-red-50 dark:bg-red-900/20',
text: 'text-red-700 dark:text-red-400',
badge: 'bg-red-100 text-red-800 dark:bg-red-900/50 dark:text-red-300',
icon: 'text-red-500',
},
medium: {
border: 'border-yellow-200 dark:border-yellow-800',
bg: 'bg-yellow-50 dark:bg-yellow-900/20',
text: 'text-yellow-700 dark:text-yellow-400',
badge: 'bg-yellow-100 text-yellow-800 dark:bg-yellow-900/50 dark:text-yellow-300',
icon: 'text-yellow-500',
},
low: {
border: 'border-blue-200 dark:border-blue-800',
bg: 'bg-blue-50 dark:bg-blue-900/20',
text: 'text-blue-700 dark:text-blue-400',
badge: 'bg-blue-100 text-blue-800 dark:bg-blue-900/50 dark:text-blue-300',
icon: 'text-blue-500',
},
}
const SEVERITY_LABELS: Record<string, string> = {
high: 'HOCH',
medium: 'MITTEL',
low: 'NIEDRIG',
}
const REGULATION_BADGES: { key: string; label: string; activeColor: string }[] = [
{ key: 'DSGVO', label: 'DSGVO', activeColor: 'bg-red-100 text-red-800 border-red-300' },
{ key: 'AI Act', label: 'AI Act', activeColor: 'bg-orange-100 text-orange-800 border-orange-300' },
{ key: 'CRA', label: 'CRA', activeColor: 'bg-yellow-100 text-yellow-800 border-yellow-300' },
{ key: 'NIS2', label: 'NIS2', activeColor: 'bg-indigo-100 text-indigo-800 border-indigo-300' },
{ key: 'Data Act', label: 'Data Act', activeColor: 'bg-amber-100 text-amber-800 border-amber-300' },
]
function WarningIcon({ className }: { className?: string }) {
return (
<svg className={className} fill="none" viewBox="0 0 24 24" stroke="currentColor">
<path strokeLinecap="round" strokeLinejoin="round" strokeWidth={2}
d="M12 9v2m0 4h.01m-6.938 4h13.856c1.54 0 2.502-1.667 1.732-2.5L13.732 4.5c-.77-.833-2.694-.833-3.464 0L3.34 16.5c-.77.833.192 2.5 1.732 2.5z" />
</svg>
)
}
function ChevronIcon({ open }: { open: boolean }) {
return (
<svg className={`w-4 h-4 text-gray-400 transition-transform ${open ? 'rotate-180' : ''}`}
fill="none" viewBox="0 0 24 24" stroke="currentColor">
<path strokeLinecap="round" strokeLinejoin="round" strokeWidth={2} d="M19 9l-7 7-7-7" />
</svg>
)
}
export function ComplianceAlerts({ projectId }: { projectId: string }) {
const [data, setData] = useState<TriggersResponse | null>(null)
const [loading, setLoading] = useState(true)
const [collapsed, setCollapsed] = useState(false)
const [expandedIds, setExpandedIds] = useState<Set<string>>(new Set())
useEffect(() => {
fetch(`/api/sdk/v1/iace/projects/${projectId}/compliance-triggers`)
.then((r) => (r.ok ? r.json() : null))
.then((json) => {
if (json?.triggers) setData(json)
else if (Array.isArray(json)) setData({ triggers: json, total: json.length })
})
.catch(() => {})
.finally(() => setLoading(false))
}, [projectId])
if (loading) return null
if (!data || data.triggers.length === 0) return null
const triggers = data.triggers
const activeRegulations = new Set(triggers.map((t) => t.regulation))
function toggleExpanded(id: string) {
setExpandedIds((prev) => {
const next = new Set(prev)
if (next.has(id)) next.delete(id)
else next.add(id)
return next
})
}
return (
<div className="bg-white dark:bg-gray-800 rounded-xl border border-red-200 dark:border-red-800">
{/* Header */}
<button
onClick={() => setCollapsed(!collapsed)}
className="w-full flex items-center justify-between p-6 text-left"
>
<div className="flex items-center gap-3">
<div className="w-10 h-10 bg-red-50 dark:bg-red-900/30 rounded-lg flex items-center justify-center">
<WarningIcon className="w-5 h-5 text-red-600" />
</div>
<div>
<h2 className="text-sm font-semibold text-gray-900 dark:text-white">
{triggers.length} Compliance-Hinweise erkannt
</h2>
<p className="text-xs text-gray-500">
Basierend auf den identifizierten Gefaehrdungen bestehen rechtliche Implikationen
</p>
</div>
</div>
<div className="w-8 h-8 flex items-center justify-center rounded-full hover:bg-gray-100 dark:hover:bg-gray-700 transition-colors flex-shrink-0">
<ChevronIcon open={!collapsed} />
</div>
</button>
{!collapsed && (
<div className="px-6 pb-6 space-y-4">
{/* Regulation summary badges */}
<div className="flex flex-wrap gap-2">
{REGULATION_BADGES.map((reg) => {
const active = activeRegulations.has(reg.key)
return (
<span
key={reg.key}
className={`px-2.5 py-1 text-xs font-medium rounded-full border ${
active
? reg.activeColor
: 'bg-gray-50 text-gray-400 border-gray-200 dark:bg-gray-700 dark:text-gray-500 dark:border-gray-600'
}`}
>
{reg.label}
</span>
)
})}
</div>
{/* Trigger list */}
<div className="space-y-2">
{triggers.map((trigger) => {
const sev = SEVERITY_CONFIG[trigger.severity] || SEVERITY_CONFIG.low
const isOpen = expandedIds.has(trigger.id)
return (
<div key={trigger.id} className={`rounded-lg border ${sev.border} ${sev.bg} overflow-hidden`}>
{/* Trigger header row */}
<button
onClick={() => toggleExpanded(trigger.id)}
className="w-full flex items-center gap-3 px-4 py-3 text-left"
>
<ChevronIcon open={isOpen} />
<div className="flex-1 min-w-0">
<span className="text-sm font-medium text-gray-900 dark:text-white">
{trigger.regulation} {trigger.article} {trigger.title}
</span>
</div>
<span className={`px-2 py-0.5 text-xs font-bold rounded ${sev.badge}`}>
{SEVERITY_LABELS[trigger.severity] || trigger.severity}
</span>
</button>
{/* Expanded detail */}
{isOpen && (
<div className="px-4 pb-4 pt-0 ml-7 space-y-2">
<p className="text-xs text-gray-700 dark:text-gray-300">
<span className="font-medium">Grund:</span> {trigger.reason}
</p>
{trigger.affected_hazard_count != null && trigger.affected_hazard_count > 0 && (
<p className="text-xs text-gray-500">
Betroffene Gefaehrdungen: {trigger.affected_hazard_count}
</p>
)}
<Link
href={trigger.module_path}
className={`inline-flex items-center gap-1.5 text-xs font-medium ${sev.text} hover:underline`}
>
{trigger.module_label} oeffnen
<svg className="w-3.5 h-3.5" fill="none" viewBox="0 0 24 24" stroke="currentColor">
<path strokeLinecap="round" strokeLinejoin="round" strokeWidth={2} d="M13 7l5 5m0 0l-5 5m5-5H6" />
</svg>
</Link>
</div>
)}
</div>
)
})}
</div>
{/* Disclaimer */}
<div className="p-3 rounded-lg bg-amber-50 dark:bg-amber-900/20 border border-amber-200 dark:border-amber-800 text-xs text-amber-800 dark:text-amber-300">
<strong>Hinweis:</strong> Diese Compliance-Hinweise werden automatisch aus den
Gefaehrdungen und Klassifikationen abgeleitet. Der CE-Fachmann muss die
regulatorischen Anforderungen im jeweiligen Modul verifizieren.
</div>
</div>
)}
</div>
)
}
@@ -4,6 +4,7 @@ import React, { useState, useEffect } from 'react'
import Link from 'next/link' import Link from 'next/link'
import { useParams } from 'next/navigation' import { useParams } from 'next/navigation'
import { SuggestedNorms } from './_components/SuggestedNorms' import { SuggestedNorms } from './_components/SuggestedNorms'
import { ComplianceAlerts } from './_components/ComplianceAlerts'
interface ProjectOverview { interface ProjectOverview {
id: string id: string
@@ -349,6 +350,9 @@ export default function ProjectOverviewPage() {
</div> </div>
</div> </div>
{/* Compliance Alerts */}
<ComplianceAlerts projectId={projectId} />
{/* Suggested Norms */} {/* Suggested Norms */}
<SuggestedNorms projectId={projectId} /> <SuggestedNorms projectId={projectId} />
@@ -0,0 +1,86 @@
package handlers
import (
"net/http"
"github.com/breakpilot/ai-compliance-sdk/internal/iace"
"github.com/gin-gonic/gin"
"github.com/google/uuid"
)
// ============================================================================
// CE x Compliance Crossover Engine
// ============================================================================
// GetComplianceTriggers handles GET /projects/:id/compliance-triggers.
// It analyses the project's hazards and component patterns to determine
// which DSGVO, AI Act, CRA, NIS2, and EU Data Act obligations are triggered.
// The response includes deduplicated triggers sorted by severity, plus boolean
// summary flags (dsfa_required, ai_act_relevant, cra_relevant, etc.).
func (h *IACEHandler) GetComplianceTriggers(c *gin.Context) {
projectID, err := uuid.Parse(c.Param("id"))
if err != nil {
c.JSON(http.StatusBadRequest, gin.H{"error": "invalid project ID"})
return
}
// Verify project exists
project, err := h.store.GetProject(c.Request.Context(), projectID)
if err != nil {
c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
return
}
if project == nil {
c.JSON(http.StatusNotFound, gin.H{"error": "project not found"})
return
}
// Fetch all hazards for this project
hazards, err := h.store.ListHazards(c.Request.Context(), projectID)
if err != nil {
c.JSON(http.StatusInternalServerError, gin.H{"error": "failed to load hazards: " + err.Error()})
return
}
// Also run pattern matching with component tags to catch tag-based triggers.
// Collect tags from the project's components (reuse the norms handler logic).
componentTags := collectComponentTags(h, c, projectID)
// Get all patterns from the pattern library
allPatterns := iace.AllPatterns()
// Additionally derive extra fired patterns by re-matching component tags
// against the pattern engine. This ensures patterns that are not yet
// applied as hazards still contribute their compliance triggers.
engine := iace.NewPatternEngine()
matchInput := iace.MatchInput{
CustomTags: componentTags,
}
matchResult := engine.Match(matchInput)
// Merge matched pattern IDs into a pseudo-hazard list so the crossover
// engine picks them up. We create lightweight Hazard structs with the
// pattern ID embedded in the Description field.
mergedHazards := make([]iace.Hazard, len(hazards))
copy(mergedHazards, hazards)
for _, pm := range matchResult.MatchedPatterns {
mergedHazards = append(mergedHazards, iace.Hazard{
Name: pm.PatternName,
Description: "Pattern " + pm.PatternID,
Category: firstOrEmpty(pm.HazardCats),
})
}
// Run the crossover engine
summary := iace.GetProjectComplianceTriggers(mergedHazards, allPatterns)
c.JSON(http.StatusOK, summary)
}
// firstOrEmpty returns the first element of a string slice or "".
func firstOrEmpty(ss []string) string {
if len(ss) > 0 {
return ss[0]
}
return ""
}
+3
View File
@@ -422,6 +422,9 @@ func registerIACERoutes(v1 *gin.RouterGroup, h *handlers.IACEHandler) {
iaceRoutes.GET("/production-lines/:lid/dashboard", h.GetProductionLineDashboard) iaceRoutes.GET("/production-lines/:lid/dashboard", h.GetProductionLineDashboard)
iaceRoutes.POST("/production-lines/:lid/stations", h.AddStationToLine) iaceRoutes.POST("/production-lines/:lid/stations", h.AddStationToLine)
iaceRoutes.DELETE("/production-lines/:lid/stations/:sid", h.RemoveStationFromLine) iaceRoutes.DELETE("/production-lines/:lid/stations/:sid", h.RemoveStationFromLine)
// CE x Compliance Crossover
iaceRoutes.GET("/projects/:id/compliance-triggers", h.GetComplianceTriggers)
} }
} }
@@ -0,0 +1,254 @@
package iace
import "sort"
// GetProjectComplianceTriggers analyses a project's hazards and the full
// pattern library to determine which DSGVO/AI Act/CRA/NIS2/Data Act
// obligations are triggered. It returns a deduplicated, severity-sorted
// summary with boolean flags for each regulation family.
func GetProjectComplianceTriggers(hazards []Hazard, patterns []HazardPattern) *ComplianceTriggerSummary {
triggerMap := GetComplianceTriggerMap()
// Build set of pattern IDs present in the pattern library for quick lookup
patternByID := make(map[string]HazardPattern, len(patterns))
for _, p := range patterns {
patternByID[p.ID] = p
}
// Collect all fired pattern IDs from the project's hazards.
// Hazards created from pattern matching store the source pattern ID
// in their Description ("Pattern HPXXX") or Name field.
firedPatterns := make(map[string]bool)
for _, h := range hazards {
extractPatternIDs(h.Description, firedPatterns)
extractPatternIDs(h.Name, firedPatterns)
extractPatternIDs(h.Scenario, firedPatterns)
}
// Also check each pattern against the hazard categories present
// in the project — if a pattern generates a category that exists
// among the hazards, consider the pattern relevant.
hazardCats := make(map[string]bool)
for _, h := range hazards {
if h.Category != "" {
hazardCats[h.Category] = true
}
}
for _, p := range patterns {
for _, cat := range p.GeneratedHazardCats {
if hazardCats[cat] {
firedPatterns[p.ID] = true
break
}
}
}
// Collect tag-level information from hazard metadata
tags := collectHazardTags(hazards)
tagTriggers := GetTagBasedTriggers(tags)
// Gather all triggers from fired patterns
var results []TriggerResult
seenRegulation := make(map[string]bool)
for pid := range firedPatterns {
triggers, ok := triggerMap[pid]
if !ok {
continue
}
for _, t := range triggers {
key := t.Regulation + "|" + t.Module
if seenRegulation[key] {
continue
}
seenRegulation[key] = true
// Try to find a hazard name for context
hName := findHazardNameForPattern(pid, hazards)
results = append(results, TriggerResult{
HazardID: "",
HazardName: hName,
PatternID: pid,
Trigger: t,
})
}
}
// Append tag-based triggers (deduplicated against pattern triggers)
for _, t := range tagTriggers {
key := t.Regulation + "|" + t.Module
if seenRegulation[key] {
continue
}
seenRegulation[key] = true
results = append(results, TriggerResult{
HazardID: "",
HazardName: "Tag-basiert",
PatternID: "",
Trigger: t,
})
}
// Sort by severity: high > medium > low
sort.Slice(results, func(i, j int) bool {
return severityRank(results[i].Trigger.Severity) > severityRank(results[j].Trigger.Severity)
})
// Build boolean summary flags
summary := buildSummaryFlags(results)
return &ComplianceTriggerSummary{
Triggers: results,
Total: len(results),
Summary: summary,
}
}
// AllPatterns returns every hazard pattern from all pattern sources.
// This mirrors the aggregation in NewPatternEngine but returns just the slice.
func AllPatterns() []HazardPattern {
p := GetBuiltinHazardPatterns()
p = append(p, GetExtendedHazardPatterns()...)
p = append(p, GetPressHazardPatterns()...)
p = append(p, GetCobotHazardPatterns()...)
p = append(p, GetOperationalHazardPatterns()...)
p = append(p, GetDGUVExtendedPatterns()...)
p = append(p, GetExtendedHazardPatterns2()...)
p = append(p, GetElevatorPatterns()...)
p = append(p, GetAGVAgriPatterns()...)
p = append(p, GetFoodProcessingPatterns()...)
p = append(p, GetPackagingPatterns()...)
p = append(p, GetLaserPatterns()...)
p = append(p, GetMedicalDevicePatterns()...)
p = append(p, GetPressureEquipmentPatterns()...)
p = append(p, GetConstructionPatterns()...)
p = append(p, GetForestryConveyorPatterns()...)
p = append(p, GetPlasticsMetalPatterns()...)
p = append(p, GetWeldingGlassTextilePatterns()...)
p = append(p, GetSpecificMachinePatterns()...)
p = append(p, GetSpecificMachinePatterns2()...)
p = append(p, GetCyberExtendedPatterns()...)
p = append(p, GetCyberExtendedPatterns2()...)
p = append(p, GetCyberExtendedPatterns3()...)
p = append(p, GetWorkshopPatterns()...)
p = append(p, GetMaintenanceExtPatterns()...)
p = append(p, GetFinalPatternsA()...)
p = append(p, GetFinalPatternsB()...)
p = append(p, GetFinalPatternsC()...)
p = append(p, GetFinalPatternsD()...)
return p
}
// extractPatternIDs scans a text for "HP" followed by digits and adds
// any found pattern IDs to the set.
func extractPatternIDs(text string, set map[string]bool) {
for i := 0; i < len(text)-2; i++ {
if text[i] == 'H' && text[i+1] == 'P' && i+2 < len(text) && text[i+2] >= '0' && text[i+2] <= '9' {
end := i + 2
for end < len(text) && text[end] >= '0' && text[end] <= '9' {
end++
}
set[text[i:end]] = true
}
}
}
// findHazardNameForPattern returns the name of the first hazard whose
// description/name/scenario mentions the given pattern ID.
func findHazardNameForPattern(pid string, hazards []Hazard) string {
for _, h := range hazards {
if containsPatternID(h.Description, pid) || containsPatternID(h.Name, pid) || containsPatternID(h.Scenario, pid) {
return h.Name
}
}
if len(hazards) > 0 {
return hazards[0].Name
}
return ""
}
// containsPatternID checks whether text contains the exact pattern ID token.
func containsPatternID(text, pid string) bool {
idx := 0
for idx <= len(text)-len(pid) {
if text[idx:idx+len(pid)] == pid {
// Ensure it is not a substring of a longer ID
after := idx + len(pid)
if after >= len(text) || text[after] < '0' || text[after] > '9' {
return true
}
}
idx++
}
return false
}
// collectHazardTags extracts tag-like signals from hazard fields.
func collectHazardTags(hazards []Hazard) []string {
tagSet := make(map[string]bool)
for _, h := range hazards {
// Infer tags from hazard category names
switch h.Category {
case "software", "steuerung", "steuerungsfehler":
tagSet["has_software"] = true
tagSet["programmable"] = true
case "cyber", "cybersicherheit", "netzwerk":
tagSet["is_networked"] = true
tagSet["has_software"] = true
case "ki", "kuenstliche_intelligenz", "ai_ml":
tagSet["has_ai"] = true
tagSet["has_software"] = true
case "sensorik", "sensor":
tagSet["sensor_part"] = true
}
}
tags := make([]string, 0, len(tagSet))
for t := range tagSet {
tags = append(tags, t)
}
return tags
}
// severityRank maps severity strings to sort-order integers.
func severityRank(s string) int {
switch s {
case "high":
return 3
case "medium":
return 2
case "low":
return 1
default:
return 0
}
}
// buildSummaryFlags derives boolean flags from the collected trigger results.
func buildSummaryFlags(results []TriggerResult) map[string]bool {
summary := map[string]bool{
"dsfa_required": false,
"ai_act_relevant": false,
"cra_relevant": false,
"nis2_relevant": false,
"data_act_relevant": false,
}
for _, r := range results {
reg := r.Trigger.Regulation
if len(reg) >= 4 && reg[:4] == "DSGV" {
summary["dsfa_required"] = true
}
if len(reg) >= 6 && reg[:6] == "AI Act" {
summary["ai_act_relevant"] = true
}
if len(reg) >= 3 && reg[:3] == "CRA" {
summary["cra_relevant"] = true
}
if len(reg) >= 4 && reg[:4] == "NIS2" {
summary["nis2_relevant"] = true
}
if len(reg) >= 11 && reg[:11] == "EU Data Act" {
summary["data_act_relevant"] = true
}
}
return summary
}
@@ -0,0 +1,439 @@
package iace
// ComplianceTrigger maps a CE hazard pattern to a regulatory requirement.
// When a pattern fires for a project, the corresponding triggers tell
// the user which DSGVO/AI Act/CRA/NIS2/Data Act obligations apply and
// which SDK module they should visit.
type ComplianceTrigger struct {
Regulation string `json:"regulation"` // e.g. "DSGVO Art. 35"
TriggerCondDE string `json:"trigger_cond_de"` // Why this triggers (German)
Severity string `json:"severity"` // "high", "medium", "low"
Module string `json:"module"` // SDK module key
ModuleLink string `json:"module_link"` // Frontend route
ActionDE string `json:"action_de"` // Recommended action (German)
RAGQuery string `json:"rag_query"` // Search query for RAG enrichment
}
// TriggerResult pairs a fired pattern with one of its compliance triggers.
type TriggerResult struct {
HazardID string `json:"hazard_id"`
HazardName string `json:"hazard_name"`
PatternID string `json:"pattern_id"`
Trigger ComplianceTrigger `json:"trigger"`
}
// ComplianceTriggerSummary is the top-level response for the crossover engine.
type ComplianceTriggerSummary struct {
Triggers []TriggerResult `json:"triggers"`
Total int `json:"total"`
Summary map[string]bool `json:"summary"` // dsfa_required, ai_act_relevant, etc.
}
// GetComplianceTriggerMap returns pattern-ID-keyed compliance triggers.
// Each entry lists the regulatory obligations that a fired pattern implies.
func GetComplianceTriggerMap() map[string][]ComplianceTrigger {
m := make(map[string][]ComplianceTrigger)
// --- Cobot / camera / biometric patterns ---
m["HP059"] = []ComplianceTrigger{
{
Regulation: "DSGVO Art. 35",
TriggerCondDE: "Kamera-Personenerkennung verarbeitet biometrische Daten",
Severity: "high",
Module: "dsfa",
ModuleLink: "/sdk/dsfa",
ActionDE: "Datenschutz-Folgenabschaetzung fuer Kamera-System durchfuehren",
RAGQuery: "DSFA biometrische Daten Kameraerkennung",
},
{
Regulation: "AI Act Art. 6",
TriggerCondDE: "Autonome Sicherheitsentscheidung durch KI-System",
Severity: "high",
Module: "ai-act",
ModuleLink: "/sdk/ai-act",
ActionDE: "Hochrisiko-KI-Einstufung pruefen und dokumentieren",
RAGQuery: "AI Act Hochrisiko autonome Sicherheitsentscheidung",
},
}
m["HP060"] = []ComplianceTrigger{
{
Regulation: "DSGVO Art. 35",
TriggerCondDE: "Werkzeug-Tracking erfordert Personenerkennung",
Severity: "high",
Module: "dsfa",
ModuleLink: "/sdk/dsfa",
ActionDE: "DSFA fuer Werkzeug-Tracking mit Personenerkennung erstellen",
RAGQuery: "DSFA Personenerkennung Werkzeug-Tracking",
},
}
// --- AI/ML safety-critical patterns ---
m["HP040"] = []ComplianceTrigger{
{
Regulation: "AI Act Art. 6",
TriggerCondDE: "KI trifft sicherheitsrelevante Entscheidung",
Severity: "high",
Module: "ai-act",
ModuleLink: "/sdk/ai-act",
ActionDE: "Hochrisiko-Klassifizierung und Konformitaetsbewertung einleiten",
RAGQuery: "AI Act Art 6 Hochrisiko Sicherheitsentscheidung",
},
{
Regulation: "AI Act Art. 9",
TriggerCondDE: "Risikomanagement fuer Hochrisiko-KI erforderlich",
Severity: "high",
Module: "ai-act",
ModuleLink: "/sdk/ai-act",
ActionDE: "Risikomanagementsystem nach Art. 9 AI Act aufsetzen",
RAGQuery: "AI Act Art 9 Risikomanagementsystem Hochrisiko",
},
}
m["HP041"] = []ComplianceTrigger{
{
Regulation: "AI Act Art. 14",
TriggerCondDE: "Menschliche Aufsicht ueber KI-System erforderlich",
Severity: "high",
Module: "ai-act",
ModuleLink: "/sdk/ai-act",
ActionDE: "Human-Oversight-Mechanismus implementieren und dokumentieren",
RAGQuery: "AI Act Art 14 menschliche Aufsicht Human Oversight",
},
}
m["HP042"] = []ComplianceTrigger{
{
Regulation: "AI Act Art. 6",
TriggerCondDE: "Bias in sicherheitsrelevanter KI moeglich",
Severity: "high",
Module: "ai-act",
ModuleLink: "/sdk/ai-act",
ActionDE: "Bias-Analyse und Datenqualitaetspruefung durchfuehren",
RAGQuery: "AI Act Bias Diskriminierung Sicherheits-KI",
},
}
m["HP043"] = []ComplianceTrigger{
{
Regulation: "AI Act Art. 11",
TriggerCondDE: "Technische Dokumentation fuer KI-System erforderlich",
Severity: "medium",
Module: "ai-act",
ModuleLink: "/sdk/ai-act",
ActionDE: "Technische Dokumentation nach Anhang IV AI Act erstellen",
RAGQuery: "AI Act Art 11 technische Dokumentation Anhang IV",
},
}
m["HP044"] = []ComplianceTrigger{
{
Regulation: "AI Act Art. 13",
TriggerCondDE: "Transparenz-Anforderungen fuer KI-System",
Severity: "medium",
Module: "ai-act",
ModuleLink: "/sdk/ai-act",
ActionDE: "Transparenzhinweise und Nutzerdokumentation bereitstellen",
RAGQuery: "AI Act Art 13 Transparenz KI Nutzerinformation",
},
}
// --- Cyber Resilience Act (software/firmware) ---
m["HP033"] = []ComplianceTrigger{
{
Regulation: "CRA Art. 10",
TriggerCondDE: "Schwachstellenmanagement fuer Software-Komponente",
Severity: "high",
Module: "cyber",
ModuleLink: "/sdk/security-backlog",
ActionDE: "Vulnerability-Management-Prozess nach CRA einrichten",
RAGQuery: "CRA Art 10 Schwachstellenmanagement Software",
},
{
Regulation: "CRA Art. 13",
TriggerCondDE: "Sicherheitsupdates muessen bereitgestellt werden",
Severity: "medium",
Module: "cyber",
ModuleLink: "/sdk/security-backlog",
ActionDE: "Update-Strategie und Patch-Management dokumentieren",
RAGQuery: "CRA Art 13 Sicherheitsupdates Patch-Management",
},
}
m["HP158"] = []ComplianceTrigger{
{
Regulation: "CRA Art. 10",
TriggerCondDE: "Schwachstelle in Firmware erfordert Vulnerability-Handling",
Severity: "high",
Module: "cyber",
ModuleLink: "/sdk/security-backlog",
ActionDE: "Schwachstellenmeldung und Patch-Prozess nach CRA etablieren",
RAGQuery: "CRA Art 10 Firmware Schwachstelle Meldepflicht",
},
{
Regulation: "CRA Art. 11",
TriggerCondDE: "Meldepflicht bei bekannter Schwachstelle",
Severity: "high",
Module: "cyber",
ModuleLink: "/sdk/security-backlog",
ActionDE: "Meldeprozess an ENISA/BSI fuer Schwachstellen einrichten",
RAGQuery: "CRA Art 11 Meldepflicht ENISA Schwachstelle",
},
}
m["HP159"] = []ComplianceTrigger{
{
Regulation: "CRA Art. 10",
TriggerCondDE: "Datenintegritaet der Software muss sichergestellt sein",
Severity: "medium",
Module: "cyber",
ModuleLink: "/sdk/security-backlog",
ActionDE: "Integritaetsschutz fuer Software-Artefakte implementieren",
RAGQuery: "CRA Art 10 Datenintegritaet Software Signierung",
},
}
m["HP160"] = []ComplianceTrigger{
{
Regulation: "NIS2 Art. 21",
TriggerCondDE: "Cybersicherheits-Risikomanagement erforderlich",
Severity: "high",
Module: "cyber",
ModuleLink: "/sdk/security-backlog",
ActionDE: "Cybersicherheits-Risikomanagement nach NIS2 Art. 21 aufsetzen",
RAGQuery: "NIS2 Art 21 Cybersicherheit Risikomanagement",
},
{
Regulation: "CRA Anhang I",
TriggerCondDE: "Wesentliche Cybersicherheits-Anforderungen nach CRA",
Severity: "high",
Module: "cyber",
ModuleLink: "/sdk/security-backlog",
ActionDE: "CRA Anhang I Checkliste fuer Produkt-Cybersicherheit abarbeiten",
RAGQuery: "CRA Anhang I wesentliche Anforderungen Cybersicherheit",
},
}
// --- Logging/monitoring patterns ---
m["HP131"] = []ComplianceTrigger{
{
Regulation: "DSGVO Art. 6",
TriggerCondDE: "Rechtsgrundlage fuer Protokollierung personenbez. Daten",
Severity: "medium",
Module: "dsfa",
ModuleLink: "/sdk/dsfa",
ActionDE: "Rechtsgrundlage fuer Protokollierung pruefen und dokumentieren",
RAGQuery: "DSGVO Art 6 Rechtsgrundlage Protokollierung Logging",
},
}
// --- AGV / movement profile patterns (HP199-HP213) ---
agvIDs := genPatternRange("HP", 199, 213)
for _, pid := range agvIDs {
m[pid] = []ComplianceTrigger{
{
Regulation: "DSGVO Art. 35",
TriggerCondDE: "AGV-Bewegungsprofile koennen Rueckschluesse auf Personen erlauben",
Severity: "high",
Module: "dsfa",
ModuleLink: "/sdk/dsfa",
ActionDE: "DSFA fuer AGV-Bewegungsdaten erstellen",
RAGQuery: "DSFA Bewegungsprofile AGV Personenbezug",
},
{
Regulation: "EU Data Act Art. 3",
TriggerCondDE: "Maschinendaten-Zugangsrecht fuer Nutzer nach Data Act",
Severity: "medium",
Module: "vendor-compliance",
ModuleLink: "/sdk/vendor-compliance",
ActionDE: "Datenzugangsrechte nach EU Data Act fuer Maschinendaten pruefen",
RAGQuery: "EU Data Act Art 3 Maschinendaten Zugangsrecht",
},
}
}
// --- Cyber-security patterns HP800-HP814 ---
cyberIDs1 := genPatternRange("HP", 800, 814)
for _, pid := range cyberIDs1 {
m[pid] = []ComplianceTrigger{
{
Regulation: "NIS2 Art. 21",
TriggerCondDE: "Cybersicherheits-Risikomanagement fuer vernetzte Komponente",
Severity: "high",
Module: "cyber",
ModuleLink: "/sdk/security-backlog",
ActionDE: "NIS2 Cybersicherheits-Massnahmen pruefen und dokumentieren",
RAGQuery: "NIS2 Art 21 Cybersicherheit vernetzte Maschine",
},
{
Regulation: "CRA Art. 10",
TriggerCondDE: "Schwachstellenmanagement fuer vernetzte Komponente",
Severity: "high",
Module: "cyber",
ModuleLink: "/sdk/security-backlog",
ActionDE: "CRA-konforme Schwachstellenbehandlung einrichten",
RAGQuery: "CRA Art 10 Schwachstellenmanagement vernetzte Maschine",
},
}
}
// --- Cyber-security patterns HP815-HP829 ---
cyberIDs2 := genPatternRange("HP", 815, 829)
for _, pid := range cyberIDs2 {
m[pid] = []ComplianceTrigger{
{
Regulation: "NIS2 Art. 21",
TriggerCondDE: "Netzwerk-Sicherheitsmassnahmen nach NIS2",
Severity: "high",
Module: "cyber",
ModuleLink: "/sdk/security-backlog",
ActionDE: "NIS2-Sicherheitskonzept fuer Netzwerkkomponenten erstellen",
RAGQuery: "NIS2 Art 21 Netzwerk Sicherheit Massnahmen",
},
{
Regulation: "CRA Art. 10",
TriggerCondDE: "CRA-Anforderungen fuer Software mit Netzwerkzugang",
Severity: "medium",
Module: "cyber",
ModuleLink: "/sdk/security-backlog",
ActionDE: "CRA-Konformitaet fuer Netzwerk-Software sicherstellen",
RAGQuery: "CRA Software Netzwerkzugang Sicherheitsanforderungen",
},
}
}
// --- AI/ML-specific cyber patterns HP830-HP844 ---
aiCyberIDs := genPatternRange("HP", 830, 844)
for _, pid := range aiCyberIDs {
m[pid] = []ComplianceTrigger{
{
Regulation: "AI Act Art. 6",
TriggerCondDE: "KI/ML-System in sicherheitsrelevantem Kontext",
Severity: "high",
Module: "ai-act",
ModuleLink: "/sdk/ai-act",
ActionDE: "Hochrisiko-Einstufung und AI-Act-Konformitaet pruefen",
RAGQuery: "AI Act Hochrisiko KI ML sicherheitsrelevant",
},
{
Regulation: "DSGVO Art. 22",
TriggerCondDE: "Automatisierte Entscheidungsfindung durch KI moeglich",
Severity: "high",
Module: "dsfa",
ModuleLink: "/sdk/dsfa",
ActionDE: "Automatisierte Einzelentscheidung nach Art. 22 DSGVO pruefen",
RAGQuery: "DSGVO Art 22 automatisierte Entscheidung KI Profiling",
},
}
}
// --- NIS2 network/HMI patterns HP845-HP864 ---
nis2IDs := genPatternRange("HP", 845, 864)
for _, pid := range nis2IDs {
m[pid] = []ComplianceTrigger{
{
Regulation: "NIS2 Art. 21",
TriggerCondDE: "Netzwerk-/HMI-Komponente erfordert NIS2-Massnahmen",
Severity: "high",
Module: "cyber",
ModuleLink: "/sdk/security-backlog",
ActionDE: "NIS2-Sicherheitsanforderungen fuer HMI/Netzwerk umsetzen",
RAGQuery: "NIS2 Art 21 HMI Netzwerk Sicherheit",
},
}
}
return m
}
// GetTagBasedTriggers returns compliance triggers that fire based on
// component tag combinations rather than specific pattern IDs.
func GetTagBasedTriggers(tags []string) []ComplianceTrigger {
tagSet := make(map[string]bool, len(tags))
for _, t := range tags {
tagSet[t] = true
}
var triggers []ComplianceTrigger
// has_software + programmable → CRA Art. 10
if tagSet["has_software"] && tagSet["programmable"] {
triggers = append(triggers, ComplianceTrigger{
Regulation: "CRA Art. 10",
TriggerCondDE: "Programmierbare Software-Komponente erfordert CRA-Konformitaet",
Severity: "medium",
Module: "cyber",
ModuleLink: "/sdk/security-backlog",
ActionDE: "CRA-Anforderungen fuer programmierbare Software pruefen",
RAGQuery: "CRA Art 10 programmierbare Software Sicherheit",
})
}
// sensor_part + has_software → EU Data Act Art. 3
if tagSet["sensor_part"] && tagSet["has_software"] {
triggers = append(triggers, ComplianceTrigger{
Regulation: "EU Data Act Art. 3",
TriggerCondDE: "Sensor mit Software erzeugt Maschinendaten — Zugangsrecht nach Data Act",
Severity: "medium",
Module: "vendor-compliance",
ModuleLink: "/sdk/vendor-compliance",
ActionDE: "Datenzugangsrechte fuer Sensor-/Maschinendaten nach Data Act pruefen",
RAGQuery: "EU Data Act Art 3 Sensordaten Maschinendaten Zugang",
})
}
// has_ai → AI Act Art. 6 (generic)
if tagSet["has_ai"] {
triggers = append(triggers, ComplianceTrigger{
Regulation: "AI Act Art. 6",
TriggerCondDE: "KI-Komponente erkannt — Hochrisiko-Einstufung pruefen",
Severity: "high",
Module: "ai-act",
ModuleLink: "/sdk/ai-act",
ActionDE: "AI-Act-Klassifizierung fuer KI-Komponente durchfuehren",
RAGQuery: "AI Act Art 6 Klassifizierung KI-System Hochrisiko",
})
}
// is_networked → NIS2 Art. 21
if tagSet["is_networked"] {
triggers = append(triggers, ComplianceTrigger{
Regulation: "NIS2 Art. 21",
TriggerCondDE: "Vernetzte Komponente unterliegt NIS2-Sicherheitspflichten",
Severity: "medium",
Module: "cyber",
ModuleLink: "/sdk/security-backlog",
ActionDE: "NIS2-Anforderungen fuer vernetzte Infrastruktur bewerten",
RAGQuery: "NIS2 Art 21 vernetzte Infrastruktur Pflichten",
})
}
return triggers
}
// genPatternRange generates pattern IDs like "HP800", "HP801", ..., "HP814".
func genPatternRange(prefix string, from, to int) []string {
ids := make([]string, 0, to-from+1)
for i := from; i <= to; i++ {
ids = append(ids, prefix+padInt(i))
}
return ids
}
// padInt formats an integer with leading zeros to 3 digits minimum.
func padInt(n int) string {
if n < 10 {
return "00" + triggerItoa(n)
}
if n < 100 {
return "0" + triggerItoa(n)
}
return triggerItoa(n)
}
// triggerItoa converts a non-negative integer to a string without importing strconv.
func triggerItoa(n int) string {
if n == 0 {
return "0"
}
var buf [20]byte
pos := len(buf)
for n > 0 {
pos--
buf[pos] = byte('0' + n%10)
n /= 10
}
return string(buf[pos:])
}
@@ -0,0 +1,175 @@
"""
HTML email report builder for document checks.
Generates a styled HTML report similar to the frontend ChecklistView,
including L1/L2 check hierarchy, progress bars, and actionable hints.
"""
from __future__ import annotations
from typing import TYPE_CHECKING
if TYPE_CHECKING:
from .agent_doc_check_routes import CheckItem, DocCheckResult
def _bar(pct: int, color: str) -> str:
bg = {"green": "#22c55e", "yellow": "#eab308", "red": "#ef4444", "blue": "#60a5fa"}
c = bg.get(color, "#60a5fa")
return (
f'<div style="display:inline-block;width:120px;height:8px;background:#e5e7eb;'
f'border-radius:4px;overflow:hidden;vertical-align:middle;margin-right:8px">'
f'<div style="width:{pct}%;height:100%;background:{c};border-radius:4px"></div>'
f'</div><span style="font-size:13px;font-weight:600;color:{c}">{pct}%</span>'
)
def _icon(passed: bool, skipped: bool = False) -> str:
if skipped:
return '<span style="color:#d1d5db">&mdash;</span>'
if passed:
return '<span style="color:#22c55e;font-weight:bold">&#10003;</span>'
return '<span style="color:#ef4444;font-weight:bold">&#10007;</span>'
def _hint_box(hint: str) -> str:
return (
f'<div style="font-size:11px;color:#dc2626;margin:2px 0 4px 20px;'
f'padding:4px 8px;background:#fef2f2;border-radius:4px;'
f'border-left:3px solid #fca5a5">{hint}</div>'
)
def build_html_report(
results: list[DocCheckResult],
cookie_result: dict | None,
) -> str:
"""Build HTML email report styled like the frontend."""
ok_count = sum(1 for r in results if r.completeness_pct == 100)
html = [
'<div style="font-family:-apple-system,BlinkMacSystemFont,sans-serif;'
'max-width:700px;margin:0 auto">',
'<h2 style="margin-bottom:4px">Dokumenten-Pruefung</h2>',
f'<p style="color:#6b7280;margin-top:0">'
f'{len(results)} Dokumente, {ok_count} vollstaendig</p>',
]
for r in results:
_render_document(html, r)
if cookie_result:
_render_cookie_banner(html, cookie_result)
html.append('</div>')
return "\n".join(html)
def _render_document(html: list[str], r: DocCheckResult) -> None:
pct = r.completeness_pct
cpct = r.correctness_pct
bar_color = "green" if pct >= 80 else "yellow" if pct >= 50 else "red"
status_label = "OK" if pct == 100 else "LUECKENHAFT" if pct >= 50 else "MANGELHAFT"
if r.error:
status_label = "FEHLER"
l1_checks = [c for c in r.checks if c.level == 1]
l2_by_parent: dict[str, list[CheckItem]] = {}
for c in r.checks:
if c.level == 2 and c.parent:
l2_by_parent.setdefault(c.parent, []).append(c)
l1_passed = sum(1 for c in l1_checks if c.passed)
l2_active = [c for c in r.checks if c.level == 2 and not c.skipped]
l2_passed = sum(1 for c in l2_active if c.passed)
# Header
html.append(
f'<div style="border:1px solid #e5e7eb;border-radius:8px;margin-bottom:12px;overflow:hidden">'
f'<div style="padding:12px 16px;background:#f9fafb">'
f'<div style="display:flex;justify-content:space-between;align-items:center"><div>'
f'<span style="font-size:11px;background:#f3f4f6;padding:2px 8px;border-radius:4px;'
f'color:#4b5563;font-weight:500;margin-right:8px">{status_label}</span>'
f'<strong style="font-size:14px">{r.label}</strong>'
f'<div style="font-size:12px;color:#6b7280;margin-top:2px">'
f'{l1_passed}/{len(l1_checks)} Pflichtangaben'
)
if l2_active:
html.append(f', {l2_passed}/{len(l2_active)} Detailpruefungen')
html.append(f'</div></div><div style="text-align:right">{_bar(pct, bar_color)}')
if cpct and l2_active:
html.append(f'<br>{_bar(cpct, "blue")}')
html.append('</div></div></div>')
# Body
if r.error:
html.append(f'<div style="padding:12px 16px;color:#991b1b">{r.error}</div>')
else:
html.append('<div style="padding:8px 16px 12px">')
for c in l1_checks:
_render_l1_check(html, c, l2_by_parent.get(c.id, []))
if r.word_count:
html.append(
f'<div style="font-size:11px;color:#9ca3af;margin-top:8px;'
f'padding-top:8px;border-top:1px solid #e5e7eb">'
f'{r.word_count} Woerter analysiert</div>'
)
html.append('</div>')
html.append('</div>')
def _render_l1_check(
html: list[str], c: CheckItem, children: list[CheckItem],
) -> None:
l2_sub = [ch for ch in children if not ch.skipped]
l2_passed = sum(1 for ch in l2_sub if ch.passed)
style = "color:#991b1b;font-weight:600" if not c.passed else "color:#374151"
html.append(
f'<div style="padding:3px 0">{_icon(c.passed)} '
f'<span style="font-size:13px;{style}">{c.label}</span>'
)
if l2_sub:
html.append(f' <span style="color:#9ca3af;font-size:11px">({l2_passed}/{len(l2_sub)})</span>')
if not c.passed and c.hint:
html.append(_hint_box(c.hint))
html.append('</div>')
for ch in children:
if ch.skipped:
continue
_render_l2_check(html, ch)
def _render_l2_check(html: list[str], ch: CheckItem) -> None:
style = "color:#dc2626;font-weight:500" if not ch.passed else "color:#6b7280"
html.append(
f'<div style="padding:2px 0 2px 24px;border-left:2px solid #e5e7eb;margin-left:8px">'
f'{_icon(ch.passed)} '
f'<span style="font-size:12px;{style}">{ch.label}</span>'
)
if ch.passed and ch.matched_text:
html.append(
f'<div style="font-size:10px;color:#9ca3af;font-family:monospace;'
f'margin-left:20px;overflow:hidden;text-overflow:ellipsis;'
f'white-space:nowrap">"...{ch.matched_text[:80]}..."</div>'
)
if not ch.passed and ch.hint:
html.append(_hint_box(ch.hint))
html.append('</div>')
def _render_cookie_banner(html: list[str], cookie_result: dict) -> None:
html.append(
'<div style="border:1px solid #e5e7eb;border-radius:8px;'
'padding:12px 16px;margin-bottom:12px">'
'<strong>Cookie-Banner Pruefung</strong><br>'
f'Banner erkannt: {cookie_result.get("banner_detected", False)}<br>'
f'Anbieter: {cookie_result.get("banner_provider", "unbekannt")}'
)
violations = cookie_result.get("banner_checks", {}).get("violations", [])
if violations:
for v in violations[:10]:
html.append(f'<br>{_icon(False)} {v.get("text", "")[:80]}')
else:
html.append('<br><span style="color:#22c55e">Keine Verstoesse erkannt.</span>')
html.append('</div>')
@@ -141,7 +141,7 @@ async def _run_doc_check(check_id: str, req: DocCheckRequest):
email_result = send_email( email_result = send_email(
recipient=req.recipient, recipient=req.recipient,
subject=f"[DOKUMENTEN-PRUEFUNG] {len(results)} Dokumente geprueft", subject=f"[DOKUMENTEN-PRUEFUNG] {len(results)} Dokumente geprueft",
body_html=f"<pre>{summary}</pre>", body_html=summary,
) )
response = DocCheckResponse( response = DocCheckResponse(
@@ -284,40 +284,49 @@ def _split_into_sections(text: str, parent_label: str, url: str) -> list[dict]:
Detects sections like 'Cookies', 'Social Media', 'Dienste von Drittanbietern' Detects sections like 'Cookies', 'Social Media', 'Dienste von Drittanbietern'
and classifies each by document type for separate checking. and classifies each by document type for separate checking.
Deduplicates: if the same doc_type appears twice, texts are merged.
""" """
import re as _re import re as _re
sections = [] sections: list[dict] = []
seen_types: dict[str, int] = {} # doc_type -> index in sections
# Split by lines that look like headings (short, followed by longer content)
lines = text.split("\n") lines = text.split("\n")
current_heading = "" current_heading = ""
current_text = [] current_text: list[str] = []
def _save_section(heading: str, text_lines: list[str]) -> None:
sec_text = "\n".join(text_lines)
if len(sec_text.split()) < 100:
return
sec_type = _classify_section(heading)
if not sec_type:
return
# Merge duplicate doc_types (e.g. two "Social Media" headings)
if sec_type in seen_types:
idx = seen_types[sec_type]
sections[idx]["text"] += "\n\n" + sec_text
sections[idx]["word_count"] = len(sections[idx]["text"].split())
else:
seen_types[sec_type] = len(sections)
sections.append({
"title": f"{parent_label} > {heading}",
"text": sec_text,
"doc_type": sec_type,
"word_count": len(sec_text.split()),
})
for line in lines: for line in lines:
stripped = line.strip() stripped = line.strip()
# Detect heading: short line (< 80 chars), not empty, followed by content
is_heading = ( is_heading = (
5 < len(stripped) < 80 5 < len(stripped) < 80
and not stripped.endswith(".") and not stripped.endswith(".")
and not stripped.endswith(",") and not stripped.endswith(",")
and stripped[0].isupper() and stripped[0].isupper()
) )
# Skip-headings should NOT start a new section — their text
# belongs to the previous section (e.g. "Risikoabwägung" inside DSFA)
is_skip = is_heading and stripped.lower().strip() in SKIP_HEADINGS is_skip = is_heading and stripped.lower().strip() in SKIP_HEADINGS
if is_heading and not is_skip and current_heading and len("\n".join(current_text)) > 200: if is_heading and not is_skip and current_heading:
# Save previous section _save_section(current_heading, current_text)
sec_text = "\n".join(current_text)
sec_type = _classify_section(current_heading)
if sec_type and sec_type != "skip":
sections.append({
"title": f"{parent_label} > {current_heading}",
"text": sec_text,
"doc_type": sec_type,
"word_count": len(sec_text.split()),
})
if is_heading and not is_skip: if is_heading and not is_skip:
current_heading = stripped current_heading = stripped
@@ -326,16 +335,8 @@ def _split_into_sections(text: str, parent_label: str, url: str) -> list[dict]:
current_text.append(line) current_text.append(line)
# Last section # Last section
if current_heading and len("\n".join(current_text)) > 200: if current_heading:
sec_text = "\n".join(current_text) _save_section(current_heading, current_text)
sec_type = _classify_section(current_heading)
if sec_type and sec_type != "skip":
sections.append({
"title": f"{parent_label} > {current_heading}",
"text": sec_text,
"doc_type": sec_type,
"word_count": len(sec_text.split()),
})
return sections return sections
@@ -347,6 +348,10 @@ SKIP_HEADINGS = {
"risikoabwaegung und datenschutzfolgenabschaetzung", "risikoabwaegung und datenschutzfolgenabschaetzung",
} }
# Track already-seen section types to avoid duplicate sub-documents
# (e.g. two "Social Media" headings on the same page)
_DEDUP_TYPES = {"social_media", "cookie", "dsfa", "widerruf", "impressum"}
def _classify_section(heading: str) -> str | None: def _classify_section(heading: str) -> str | None:
"""Classify a section heading into a document type.""" """Classify a section heading into a document type."""
@@ -377,41 +382,5 @@ async def _check_cookie_banner(url: str) -> dict | None:
def _build_report(results: list[DocCheckResult], cookie_result: dict | None) -> str: def _build_report(results: list[DocCheckResult], cookie_result: dict | None) -> str:
"""Build email report.""" from .agent_doc_check_report import build_html_report
parts = [ return build_html_report(results, cookie_result)
"DOKUMENTEN-PRUEFUNG",
f"Dokumente geprueft: {len(results)}",
"",
]
for r in results:
status = "OK" if r.completeness_pct == 100 else "LUECKENHAFT" if r.completeness_pct >= 50 else "MANGELHAFT"
if r.error:
status = "FEHLER"
detail = f", Korrektheit {r.correctness_pct}%" if r.correctness_pct else ""
parts.append(f"[{status}] {r.label} ({r.completeness_pct}%{detail}, {r.word_count} Woerter)")
for check in r.checks:
if check.skipped:
continue
icon = "+" if check.passed else "!!"
indent = " " if check.level == 2 else " "
parts.append(f"{indent}[{icon}] {check.label}")
if r.error:
parts.append(f" FEHLER: {r.error}")
parts.append("")
if cookie_result:
parts.extend([
"Cookie-Banner Pruefung:",
f" Banner erkannt: {cookie_result.get('banner_detected', False)}",
f" Anbieter: {cookie_result.get('banner_provider', 'unbekannt')}",
])
violations = cookie_result.get("banner_checks", {}).get("violations", [])
if violations:
for v in violations[:10]:
parts.append(f" [!!] {v.get('text', '')[:80]}")
else:
parts.append(" Keine Verstoesse erkannt.")
return "\n".join(parts)