fix: Restore all files lost during destructive rebase
A previous `git pull --rebase origin main` dropped 177 local commits,
losing 3400+ files across admin-v2, backend, studio-v2, website,
klausur-service, and many other services. The partial restore attempt
(660295e2) only recovered some files.
This commit restores all missing files from pre-rebase ref 98933f5e
while preserving post-rebase additions (night-scheduler, night-mode UI,
NightModeWidget dashboard integration).
Restored features include:
- AI Module Sidebar (FAB), OCR Labeling, OCR Compare
- GPU Dashboard, RAG Pipeline, Magic Help
- Klausur-Korrektur (8 files), Abitur-Archiv (5+ files)
- Companion, Zeugnisse-Crawler, Screen Flow
- Full backend, studio-v2, website, klausur-service
- All compliance SDKs, agent-core, voice-service
- CI/CD configs, documentation, scripts
Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
17
backend/compliance/services/__init__.py
Normal file
17
backend/compliance/services/__init__.py
Normal file
@@ -0,0 +1,17 @@
|
||||
"""
|
||||
Compliance Services Module.
|
||||
|
||||
Contains business logic services for the compliance module:
|
||||
- PDF extraction from BSI-TR and EU regulations
|
||||
- LLM-based requirement interpretation
|
||||
- Export generation
|
||||
"""
|
||||
|
||||
from .pdf_extractor import BSIPDFExtractor, BSIAspect, EURegulationExtractor, EUArticle
|
||||
|
||||
__all__ = [
|
||||
"BSIPDFExtractor",
|
||||
"BSIAspect",
|
||||
"EURegulationExtractor",
|
||||
"EUArticle",
|
||||
]
|
||||
500
backend/compliance/services/ai_compliance_assistant.py
Normal file
500
backend/compliance/services/ai_compliance_assistant.py
Normal file
@@ -0,0 +1,500 @@
|
||||
"""
|
||||
AI Compliance Assistant for Breakpilot.
|
||||
|
||||
Provides AI-powered features for:
|
||||
- Requirement interpretation (translating legal text to technical guidance)
|
||||
- Control suggestions (recommending controls for requirements)
|
||||
- Risk assessment (evaluating compliance risks)
|
||||
- Gap analysis (identifying missing controls)
|
||||
"""
|
||||
|
||||
import json
|
||||
import logging
|
||||
import re
|
||||
from dataclasses import dataclass, field
|
||||
from typing import List, Optional, Dict, Any
|
||||
from enum import Enum
|
||||
|
||||
from .llm_provider import LLMProvider, get_shared_provider, LLMResponse
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class InterpretationSection(str, Enum):
|
||||
"""Sections in a requirement interpretation."""
|
||||
SUMMARY = "summary"
|
||||
APPLICABILITY = "applicability"
|
||||
TECHNICAL_MEASURES = "technical_measures"
|
||||
AFFECTED_MODULES = "affected_modules"
|
||||
RISK_LEVEL = "risk_level"
|
||||
IMPLEMENTATION_HINTS = "implementation_hints"
|
||||
|
||||
|
||||
@dataclass
|
||||
class RequirementInterpretation:
|
||||
"""AI-generated interpretation of a regulatory requirement."""
|
||||
requirement_id: str
|
||||
summary: str
|
||||
applicability: str
|
||||
technical_measures: List[str]
|
||||
affected_modules: List[str]
|
||||
risk_level: str # low, medium, high, critical
|
||||
implementation_hints: List[str]
|
||||
confidence_score: float # 0.0 - 1.0
|
||||
raw_response: Optional[str] = None
|
||||
error: Optional[str] = None
|
||||
|
||||
|
||||
@dataclass
|
||||
class ControlSuggestion:
|
||||
"""AI-suggested control for a requirement."""
|
||||
control_id: str # Suggested ID like "PRIV-XXX"
|
||||
domain: str # Control domain (priv, sdlc, iam, etc.)
|
||||
title: str
|
||||
description: str
|
||||
pass_criteria: str
|
||||
implementation_guidance: str
|
||||
is_automated: bool
|
||||
automation_tool: Optional[str] = None
|
||||
priority: str = "medium" # low, medium, high, critical
|
||||
confidence_score: float = 0.0
|
||||
|
||||
|
||||
@dataclass
|
||||
class RiskAssessment:
|
||||
"""AI-generated risk assessment for a module."""
|
||||
module_name: str
|
||||
overall_risk: str # low, medium, high, critical
|
||||
risk_factors: List[Dict[str, Any]]
|
||||
recommendations: List[str]
|
||||
compliance_gaps: List[str]
|
||||
confidence_score: float = 0.0
|
||||
|
||||
|
||||
@dataclass
|
||||
class GapAnalysis:
|
||||
"""Gap analysis result for requirement-control mapping."""
|
||||
requirement_id: str
|
||||
requirement_title: str
|
||||
coverage_level: str # full, partial, none
|
||||
existing_controls: List[str]
|
||||
missing_coverage: List[str]
|
||||
suggested_actions: List[str]
|
||||
|
||||
|
||||
class AIComplianceAssistant:
|
||||
"""
|
||||
AI-powered compliance assistant using LLM providers.
|
||||
|
||||
Supports both Claude API and self-hosted LLMs through the
|
||||
abstracted LLMProvider interface.
|
||||
"""
|
||||
|
||||
# System prompts for different tasks
|
||||
SYSTEM_PROMPT_BASE = """Du bist ein Compliance-Experte für die Breakpilot Bildungsplattform.
|
||||
Breakpilot ist ein EdTech SaaS-System mit folgenden Eigenschaften:
|
||||
- KI-gestützte Klausurkorrektur und Feedback
|
||||
- Videokonferenzen (Jitsi) und Chat (Matrix)
|
||||
- Schulverwaltung mit Noten und Zeugnissen
|
||||
- Consent-Management und DSGVO-Compliance
|
||||
- Self-Hosted in Deutschland
|
||||
|
||||
Du analysierst regulatorische Anforderungen und gibst konkrete technische Empfehlungen."""
|
||||
|
||||
INTERPRETATION_PROMPT = """Analysiere folgende regulatorische Anforderung für Breakpilot:
|
||||
|
||||
Verordnung: {regulation_name} ({regulation_code})
|
||||
Artikel: {article}
|
||||
Titel: {title}
|
||||
Originaltext: {requirement_text}
|
||||
|
||||
Erstelle eine strukturierte Analyse im JSON-Format:
|
||||
{{
|
||||
"summary": "Kurze Zusammenfassung in 2-3 Sätzen",
|
||||
"applicability": "Erklärung wie dies auf Breakpilot anwendbar ist",
|
||||
"technical_measures": ["Liste konkreter technischer Maßnahmen"],
|
||||
"affected_modules": ["Liste betroffener Breakpilot-Module (z.B. consent-service, klausur-service, matrix-synapse)"],
|
||||
"risk_level": "low|medium|high|critical",
|
||||
"implementation_hints": ["Konkrete Implementierungshinweise"]
|
||||
}}
|
||||
|
||||
Gib NUR das JSON zurück, keine zusätzlichen Erklärungen."""
|
||||
|
||||
CONTROL_SUGGESTION_PROMPT = """Basierend auf folgender Anforderung, schlage passende Controls vor:
|
||||
|
||||
Verordnung: {regulation_name}
|
||||
Anforderung: {requirement_title}
|
||||
Beschreibung: {requirement_text}
|
||||
Betroffene Module: {affected_modules}
|
||||
|
||||
Schlage 1-3 Controls im JSON-Format vor:
|
||||
{{
|
||||
"controls": [
|
||||
{{
|
||||
"control_id": "DOMAIN-XXX",
|
||||
"domain": "priv|iam|sdlc|crypto|ops|ai|cra|gov|aud",
|
||||
"title": "Kurzer Titel",
|
||||
"description": "Beschreibung des Controls",
|
||||
"pass_criteria": "Messbare Erfolgskriterien",
|
||||
"implementation_guidance": "Wie implementieren",
|
||||
"is_automated": true|false,
|
||||
"automation_tool": "Tool-Name oder null",
|
||||
"priority": "low|medium|high|critical"
|
||||
}}
|
||||
]
|
||||
}}
|
||||
|
||||
Domains:
|
||||
- priv: Datenschutz & Privacy (DSGVO)
|
||||
- iam: Identity & Access Management
|
||||
- sdlc: Secure Development Lifecycle
|
||||
- crypto: Kryptografie
|
||||
- ops: Betrieb & Monitoring
|
||||
- ai: KI-spezifisch (AI Act)
|
||||
- cra: Cyber Resilience Act
|
||||
- gov: Governance
|
||||
- aud: Audit & Nachvollziehbarkeit
|
||||
|
||||
Gib NUR das JSON zurück."""
|
||||
|
||||
RISK_ASSESSMENT_PROMPT = """Bewerte das Compliance-Risiko für folgendes Breakpilot-Modul:
|
||||
|
||||
Modul: {module_name}
|
||||
Typ: {service_type}
|
||||
Beschreibung: {description}
|
||||
Verarbeitet PII: {processes_pii}
|
||||
KI-Komponenten: {ai_components}
|
||||
Kritikalität: {criticality}
|
||||
Daten-Kategorien: {data_categories}
|
||||
Zugeordnete Verordnungen: {regulations}
|
||||
|
||||
Erstelle eine Risikobewertung im JSON-Format:
|
||||
{{
|
||||
"overall_risk": "low|medium|high|critical",
|
||||
"risk_factors": [
|
||||
{{"factor": "Beschreibung", "severity": "low|medium|high", "likelihood": "low|medium|high"}}
|
||||
],
|
||||
"recommendations": ["Empfehlungen zur Risikominderung"],
|
||||
"compliance_gaps": ["Identifizierte Compliance-Lücken"]
|
||||
}}
|
||||
|
||||
Gib NUR das JSON zurück."""
|
||||
|
||||
GAP_ANALYSIS_PROMPT = """Analysiere die Control-Abdeckung für folgende Anforderung:
|
||||
|
||||
Anforderung: {requirement_title}
|
||||
Verordnung: {regulation_code}
|
||||
Beschreibung: {requirement_text}
|
||||
|
||||
Existierende Controls:
|
||||
{existing_controls}
|
||||
|
||||
Bewerte die Abdeckung und identifiziere Lücken im JSON-Format:
|
||||
{{
|
||||
"coverage_level": "full|partial|none",
|
||||
"covered_aspects": ["Was ist bereits abgedeckt"],
|
||||
"missing_coverage": ["Was fehlt noch"],
|
||||
"suggested_actions": ["Empfohlene Maßnahmen"]
|
||||
}}
|
||||
|
||||
Gib NUR das JSON zurück."""
|
||||
|
||||
def __init__(self, llm_provider: Optional[LLMProvider] = None):
|
||||
"""Initialize the assistant with an LLM provider."""
|
||||
self.llm = llm_provider or get_shared_provider()
|
||||
|
||||
async def interpret_requirement(
|
||||
self,
|
||||
requirement_id: str,
|
||||
article: str,
|
||||
title: str,
|
||||
requirement_text: str,
|
||||
regulation_code: str,
|
||||
regulation_name: str
|
||||
) -> RequirementInterpretation:
|
||||
"""
|
||||
Generate an interpretation for a regulatory requirement.
|
||||
|
||||
Translates legal text into practical technical guidance
|
||||
for the Breakpilot development team.
|
||||
"""
|
||||
prompt = self.INTERPRETATION_PROMPT.format(
|
||||
regulation_name=regulation_name,
|
||||
regulation_code=regulation_code,
|
||||
article=article,
|
||||
title=title,
|
||||
requirement_text=requirement_text or "Kein Text verfügbar"
|
||||
)
|
||||
|
||||
try:
|
||||
response = await self.llm.complete(
|
||||
prompt=prompt,
|
||||
system_prompt=self.SYSTEM_PROMPT_BASE,
|
||||
max_tokens=2000,
|
||||
temperature=0.3
|
||||
)
|
||||
|
||||
# Parse JSON response
|
||||
data = self._parse_json_response(response.content)
|
||||
|
||||
return RequirementInterpretation(
|
||||
requirement_id=requirement_id,
|
||||
summary=data.get("summary", ""),
|
||||
applicability=data.get("applicability", ""),
|
||||
technical_measures=data.get("technical_measures", []),
|
||||
affected_modules=data.get("affected_modules", []),
|
||||
risk_level=data.get("risk_level", "medium"),
|
||||
implementation_hints=data.get("implementation_hints", []),
|
||||
confidence_score=0.85, # Based on model quality
|
||||
raw_response=response.content
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to interpret requirement {requirement_id}: {e}")
|
||||
return RequirementInterpretation(
|
||||
requirement_id=requirement_id,
|
||||
summary="",
|
||||
applicability="",
|
||||
technical_measures=[],
|
||||
affected_modules=[],
|
||||
risk_level="medium",
|
||||
implementation_hints=[],
|
||||
confidence_score=0.0,
|
||||
error=str(e)
|
||||
)
|
||||
|
||||
async def suggest_controls(
|
||||
self,
|
||||
requirement_title: str,
|
||||
requirement_text: str,
|
||||
regulation_name: str,
|
||||
affected_modules: List[str]
|
||||
) -> List[ControlSuggestion]:
|
||||
"""
|
||||
Suggest controls for a given requirement.
|
||||
|
||||
Returns a list of control suggestions with implementation guidance.
|
||||
"""
|
||||
prompt = self.CONTROL_SUGGESTION_PROMPT.format(
|
||||
regulation_name=regulation_name,
|
||||
requirement_title=requirement_title,
|
||||
requirement_text=requirement_text or "Keine Beschreibung",
|
||||
affected_modules=", ".join(affected_modules) if affected_modules else "Alle Module"
|
||||
)
|
||||
|
||||
try:
|
||||
response = await self.llm.complete(
|
||||
prompt=prompt,
|
||||
system_prompt=self.SYSTEM_PROMPT_BASE,
|
||||
max_tokens=2000,
|
||||
temperature=0.4
|
||||
)
|
||||
|
||||
data = self._parse_json_response(response.content)
|
||||
controls = data.get("controls", [])
|
||||
|
||||
return [
|
||||
ControlSuggestion(
|
||||
control_id=c.get("control_id", "NEW-001"),
|
||||
domain=c.get("domain", "gov"),
|
||||
title=c.get("title", ""),
|
||||
description=c.get("description", ""),
|
||||
pass_criteria=c.get("pass_criteria", ""),
|
||||
implementation_guidance=c.get("implementation_guidance", ""),
|
||||
is_automated=c.get("is_automated", False),
|
||||
automation_tool=c.get("automation_tool"),
|
||||
priority=c.get("priority", "medium"),
|
||||
confidence_score=0.75
|
||||
)
|
||||
for c in controls
|
||||
]
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to suggest controls: {e}")
|
||||
return []
|
||||
|
||||
async def assess_module_risk(
|
||||
self,
|
||||
module_name: str,
|
||||
service_type: str,
|
||||
description: str,
|
||||
processes_pii: bool,
|
||||
ai_components: bool,
|
||||
criticality: str,
|
||||
data_categories: List[str],
|
||||
regulations: List[Dict[str, str]]
|
||||
) -> RiskAssessment:
|
||||
"""
|
||||
Assess the compliance risk for a service module.
|
||||
"""
|
||||
prompt = self.RISK_ASSESSMENT_PROMPT.format(
|
||||
module_name=module_name,
|
||||
service_type=service_type,
|
||||
description=description or "Keine Beschreibung",
|
||||
processes_pii="Ja" if processes_pii else "Nein",
|
||||
ai_components="Ja" if ai_components else "Nein",
|
||||
criticality=criticality,
|
||||
data_categories=", ".join(data_categories) if data_categories else "Keine",
|
||||
regulations=", ".join([f"{r['code']} ({r.get('relevance', 'medium')})" for r in regulations]) if regulations else "Keine"
|
||||
)
|
||||
|
||||
try:
|
||||
response = await self.llm.complete(
|
||||
prompt=prompt,
|
||||
system_prompt=self.SYSTEM_PROMPT_BASE,
|
||||
max_tokens=1500,
|
||||
temperature=0.3
|
||||
)
|
||||
|
||||
data = self._parse_json_response(response.content)
|
||||
|
||||
return RiskAssessment(
|
||||
module_name=module_name,
|
||||
overall_risk=data.get("overall_risk", "medium"),
|
||||
risk_factors=data.get("risk_factors", []),
|
||||
recommendations=data.get("recommendations", []),
|
||||
compliance_gaps=data.get("compliance_gaps", []),
|
||||
confidence_score=0.8
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to assess risk for {module_name}: {e}")
|
||||
return RiskAssessment(
|
||||
module_name=module_name,
|
||||
overall_risk="unknown",
|
||||
risk_factors=[],
|
||||
recommendations=[],
|
||||
compliance_gaps=[],
|
||||
confidence_score=0.0
|
||||
)
|
||||
|
||||
async def analyze_gap(
|
||||
self,
|
||||
requirement_id: str,
|
||||
requirement_title: str,
|
||||
requirement_text: str,
|
||||
regulation_code: str,
|
||||
existing_controls: List[Dict[str, str]]
|
||||
) -> GapAnalysis:
|
||||
"""
|
||||
Analyze gaps between requirements and existing controls.
|
||||
"""
|
||||
controls_text = "\n".join([
|
||||
f"- {c.get('control_id', 'N/A')}: {c.get('title', 'N/A')} - {c.get('status', 'N/A')}"
|
||||
for c in existing_controls
|
||||
]) if existing_controls else "Keine Controls zugeordnet"
|
||||
|
||||
prompt = self.GAP_ANALYSIS_PROMPT.format(
|
||||
requirement_title=requirement_title,
|
||||
regulation_code=regulation_code,
|
||||
requirement_text=requirement_text or "Keine Beschreibung",
|
||||
existing_controls=controls_text
|
||||
)
|
||||
|
||||
try:
|
||||
response = await self.llm.complete(
|
||||
prompt=prompt,
|
||||
system_prompt=self.SYSTEM_PROMPT_BASE,
|
||||
max_tokens=1500,
|
||||
temperature=0.3
|
||||
)
|
||||
|
||||
data = self._parse_json_response(response.content)
|
||||
|
||||
return GapAnalysis(
|
||||
requirement_id=requirement_id,
|
||||
requirement_title=requirement_title,
|
||||
coverage_level=data.get("coverage_level", "none"),
|
||||
existing_controls=[c.get("control_id", "") for c in existing_controls],
|
||||
missing_coverage=data.get("missing_coverage", []),
|
||||
suggested_actions=data.get("suggested_actions", [])
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to analyze gap for {requirement_id}: {e}")
|
||||
return GapAnalysis(
|
||||
requirement_id=requirement_id,
|
||||
requirement_title=requirement_title,
|
||||
coverage_level="unknown",
|
||||
existing_controls=[],
|
||||
missing_coverage=[],
|
||||
suggested_actions=[]
|
||||
)
|
||||
|
||||
async def batch_interpret_requirements(
|
||||
self,
|
||||
requirements: List[Dict[str, Any]],
|
||||
rate_limit: float = 1.0
|
||||
) -> List[RequirementInterpretation]:
|
||||
"""
|
||||
Process multiple requirements with rate limiting.
|
||||
|
||||
Useful for bulk processing of regulations.
|
||||
"""
|
||||
results = []
|
||||
|
||||
for i, req in enumerate(requirements):
|
||||
if i > 0:
|
||||
import asyncio
|
||||
await asyncio.sleep(rate_limit)
|
||||
|
||||
result = await self.interpret_requirement(
|
||||
requirement_id=req.get("id", str(i)),
|
||||
article=req.get("article", ""),
|
||||
title=req.get("title", ""),
|
||||
requirement_text=req.get("requirement_text", ""),
|
||||
regulation_code=req.get("regulation_code", ""),
|
||||
regulation_name=req.get("regulation_name", "")
|
||||
)
|
||||
results.append(result)
|
||||
|
||||
logger.info(f"Processed requirement {i+1}/{len(requirements)}: {req.get('title', 'N/A')}")
|
||||
|
||||
return results
|
||||
|
||||
def _parse_json_response(self, content: str) -> Dict[str, Any]:
|
||||
"""
|
||||
Parse JSON from LLM response, handling common formatting issues.
|
||||
"""
|
||||
# Try to extract JSON from the response
|
||||
content = content.strip()
|
||||
|
||||
# Remove markdown code blocks if present
|
||||
if content.startswith("```json"):
|
||||
content = content[7:]
|
||||
elif content.startswith("```"):
|
||||
content = content[3:]
|
||||
if content.endswith("```"):
|
||||
content = content[:-3]
|
||||
|
||||
content = content.strip()
|
||||
|
||||
# Find JSON object in the response
|
||||
json_match = re.search(r'\{[\s\S]*\}', content)
|
||||
if json_match:
|
||||
content = json_match.group(0)
|
||||
|
||||
try:
|
||||
return json.loads(content)
|
||||
except json.JSONDecodeError as e:
|
||||
logger.warning(f"Failed to parse JSON response: {e}")
|
||||
logger.debug(f"Raw content: {content[:500]}")
|
||||
return {}
|
||||
|
||||
|
||||
# Singleton instance
|
||||
_assistant_instance: Optional[AIComplianceAssistant] = None
|
||||
|
||||
|
||||
def get_ai_assistant() -> AIComplianceAssistant:
|
||||
"""Get the shared AI compliance assistant instance."""
|
||||
global _assistant_instance
|
||||
if _assistant_instance is None:
|
||||
_assistant_instance = AIComplianceAssistant()
|
||||
return _assistant_instance
|
||||
|
||||
|
||||
def reset_ai_assistant():
|
||||
"""Reset the shared assistant instance (useful for testing)."""
|
||||
global _assistant_instance
|
||||
_assistant_instance = None
|
||||
880
backend/compliance/services/audit_pdf_generator.py
Normal file
880
backend/compliance/services/audit_pdf_generator.py
Normal file
@@ -0,0 +1,880 @@
|
||||
"""
|
||||
Audit Session PDF Report Generator.
|
||||
|
||||
Sprint 3 Phase 4: Generates PDF reports for completed audit sessions.
|
||||
|
||||
Features:
|
||||
- Cover page with audit session metadata
|
||||
- Executive summary with traffic light status
|
||||
- Statistics pie chart (compliant/non-compliant/pending)
|
||||
- Detailed checklist with sign-off status
|
||||
- Digital signature verification
|
||||
- Appendix with non-compliant items
|
||||
|
||||
Uses reportlab for PDF generation (lightweight, no external dependencies).
|
||||
"""
|
||||
|
||||
import io
|
||||
import logging
|
||||
from datetime import datetime
|
||||
from typing import Dict, List, Any, Optional, Tuple
|
||||
from uuid import uuid4
|
||||
import hashlib
|
||||
|
||||
from sqlalchemy.orm import Session, selectinload
|
||||
|
||||
from reportlab.lib import colors
|
||||
from reportlab.lib.pagesizes import A4
|
||||
from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle
|
||||
from reportlab.lib.units import mm, cm
|
||||
from reportlab.lib.enums import TA_CENTER, TA_LEFT, TA_RIGHT, TA_JUSTIFY
|
||||
from reportlab.platypus import (
|
||||
SimpleDocTemplate, Paragraph, Spacer, Table, TableStyle,
|
||||
PageBreak, Image, ListFlowable, ListItem, KeepTogether,
|
||||
HRFlowable
|
||||
)
|
||||
from reportlab.graphics.shapes import Drawing, Rect, String
|
||||
from reportlab.graphics.charts.piecharts import Pie
|
||||
|
||||
from ..db.models import (
|
||||
AuditSessionDB, AuditSignOffDB, AuditResultEnum, AuditSessionStatusEnum,
|
||||
RequirementDB, RegulationDB
|
||||
)
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# Color Definitions
|
||||
# =============================================================================
|
||||
|
||||
COLORS = {
|
||||
'primary': colors.HexColor('#1a365d'), # Dark blue
|
||||
'secondary': colors.HexColor('#2c5282'), # Medium blue
|
||||
'accent': colors.HexColor('#3182ce'), # Light blue
|
||||
'success': colors.HexColor('#38a169'), # Green
|
||||
'warning': colors.HexColor('#d69e2e'), # Yellow/Orange
|
||||
'danger': colors.HexColor('#e53e3e'), # Red
|
||||
'muted': colors.HexColor('#718096'), # Gray
|
||||
'light': colors.HexColor('#f7fafc'), # Light gray
|
||||
'white': colors.white,
|
||||
'black': colors.black,
|
||||
}
|
||||
|
||||
RESULT_COLORS = {
|
||||
'compliant': COLORS['success'],
|
||||
'compliant_notes': colors.HexColor('#68d391'), # Light green
|
||||
'non_compliant': COLORS['danger'],
|
||||
'not_applicable': COLORS['muted'],
|
||||
'pending': COLORS['warning'],
|
||||
}
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# Custom Styles
|
||||
# =============================================================================
|
||||
|
||||
def get_custom_styles() -> Dict[str, ParagraphStyle]:
|
||||
"""Create custom paragraph styles for the audit report."""
|
||||
styles = getSampleStyleSheet()
|
||||
|
||||
custom = {
|
||||
'Title': ParagraphStyle(
|
||||
'AuditTitle',
|
||||
parent=styles['Title'],
|
||||
fontSize=24,
|
||||
textColor=COLORS['primary'],
|
||||
spaceAfter=12*mm,
|
||||
alignment=TA_CENTER,
|
||||
),
|
||||
'Subtitle': ParagraphStyle(
|
||||
'AuditSubtitle',
|
||||
parent=styles['Normal'],
|
||||
fontSize=14,
|
||||
textColor=COLORS['secondary'],
|
||||
spaceAfter=6*mm,
|
||||
alignment=TA_CENTER,
|
||||
),
|
||||
'Heading1': ParagraphStyle(
|
||||
'AuditH1',
|
||||
parent=styles['Heading1'],
|
||||
fontSize=18,
|
||||
textColor=COLORS['primary'],
|
||||
spaceBefore=12*mm,
|
||||
spaceAfter=6*mm,
|
||||
borderPadding=3*mm,
|
||||
),
|
||||
'Heading2': ParagraphStyle(
|
||||
'AuditH2',
|
||||
parent=styles['Heading2'],
|
||||
fontSize=14,
|
||||
textColor=COLORS['secondary'],
|
||||
spaceBefore=8*mm,
|
||||
spaceAfter=4*mm,
|
||||
),
|
||||
'Heading3': ParagraphStyle(
|
||||
'AuditH3',
|
||||
parent=styles['Heading3'],
|
||||
fontSize=12,
|
||||
textColor=COLORS['accent'],
|
||||
spaceBefore=6*mm,
|
||||
spaceAfter=3*mm,
|
||||
),
|
||||
'Normal': ParagraphStyle(
|
||||
'AuditNormal',
|
||||
parent=styles['Normal'],
|
||||
fontSize=10,
|
||||
textColor=COLORS['black'],
|
||||
spaceAfter=3*mm,
|
||||
alignment=TA_JUSTIFY,
|
||||
),
|
||||
'Small': ParagraphStyle(
|
||||
'AuditSmall',
|
||||
parent=styles['Normal'],
|
||||
fontSize=8,
|
||||
textColor=COLORS['muted'],
|
||||
spaceAfter=2*mm,
|
||||
),
|
||||
'Footer': ParagraphStyle(
|
||||
'AuditFooter',
|
||||
parent=styles['Normal'],
|
||||
fontSize=8,
|
||||
textColor=COLORS['muted'],
|
||||
alignment=TA_CENTER,
|
||||
),
|
||||
'Success': ParagraphStyle(
|
||||
'AuditSuccess',
|
||||
parent=styles['Normal'],
|
||||
fontSize=10,
|
||||
textColor=COLORS['success'],
|
||||
),
|
||||
'Warning': ParagraphStyle(
|
||||
'AuditWarning',
|
||||
parent=styles['Normal'],
|
||||
fontSize=10,
|
||||
textColor=COLORS['warning'],
|
||||
),
|
||||
'Danger': ParagraphStyle(
|
||||
'AuditDanger',
|
||||
parent=styles['Normal'],
|
||||
fontSize=10,
|
||||
textColor=COLORS['danger'],
|
||||
),
|
||||
}
|
||||
|
||||
return custom
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# PDF Generator Class
|
||||
# =============================================================================
|
||||
|
||||
class AuditPDFGenerator:
|
||||
"""Generates PDF reports for audit sessions."""
|
||||
|
||||
def __init__(self, db: Session):
|
||||
self.db = db
|
||||
self.styles = get_custom_styles()
|
||||
self.page_width, self.page_height = A4
|
||||
self.margin = 20 * mm
|
||||
|
||||
def generate(
|
||||
self,
|
||||
session_id: str,
|
||||
language: str = 'de',
|
||||
include_signatures: bool = True,
|
||||
) -> Tuple[bytes, str]:
|
||||
"""
|
||||
Generate a PDF report for an audit session.
|
||||
|
||||
Args:
|
||||
session_id: The audit session ID
|
||||
language: Report language ('de' or 'en')
|
||||
include_signatures: Whether to include digital signature info
|
||||
|
||||
Returns:
|
||||
Tuple of (PDF bytes, filename)
|
||||
"""
|
||||
# Load session with all related data
|
||||
session = self._load_session(session_id)
|
||||
if not session:
|
||||
raise ValueError(f"Audit session {session_id} not found")
|
||||
|
||||
# Load all sign-offs
|
||||
signoffs = self._load_signoffs(session_id)
|
||||
signoff_map = {s.requirement_id: s for s in signoffs}
|
||||
|
||||
# Load requirements for this session
|
||||
requirements = self._load_requirements(session)
|
||||
|
||||
# Calculate statistics
|
||||
stats = self._calculate_statistics(session, signoffs)
|
||||
|
||||
# Generate PDF
|
||||
buffer = io.BytesIO()
|
||||
doc = SimpleDocTemplate(
|
||||
buffer,
|
||||
pagesize=A4,
|
||||
leftMargin=self.margin,
|
||||
rightMargin=self.margin,
|
||||
topMargin=self.margin,
|
||||
bottomMargin=self.margin,
|
||||
)
|
||||
|
||||
# Build story (content)
|
||||
story = []
|
||||
|
||||
# 1. Cover page
|
||||
story.extend(self._build_cover_page(session, language))
|
||||
story.append(PageBreak())
|
||||
|
||||
# 2. Executive summary
|
||||
story.extend(self._build_executive_summary(session, stats, language))
|
||||
story.append(PageBreak())
|
||||
|
||||
# 3. Statistics overview
|
||||
story.extend(self._build_statistics_section(stats, language))
|
||||
|
||||
# 4. Detailed checklist
|
||||
story.extend(self._build_checklist_section(
|
||||
session, requirements, signoff_map, language
|
||||
))
|
||||
|
||||
# 5. Non-compliant items appendix (if any)
|
||||
non_compliant = [s for s in signoffs if s.result == AuditResultEnum.NON_COMPLIANT]
|
||||
if non_compliant:
|
||||
story.append(PageBreak())
|
||||
story.extend(self._build_non_compliant_appendix(
|
||||
non_compliant, requirements, language
|
||||
))
|
||||
|
||||
# 6. Signature verification (if requested)
|
||||
if include_signatures:
|
||||
signed_items = [s for s in signoffs if s.signature_hash]
|
||||
if signed_items:
|
||||
story.append(PageBreak())
|
||||
story.extend(self._build_signature_section(signed_items, language))
|
||||
|
||||
# Build the PDF
|
||||
doc.build(story)
|
||||
|
||||
# Generate filename
|
||||
date_str = datetime.utcnow().strftime('%Y%m%d')
|
||||
filename = f"audit_report_{session.name.replace(' ', '_')}_{date_str}.pdf"
|
||||
|
||||
return buffer.getvalue(), filename
|
||||
|
||||
def _load_session(self, session_id: str) -> Optional[AuditSessionDB]:
|
||||
"""Load an audit session by ID."""
|
||||
return self.db.query(AuditSessionDB).filter(
|
||||
AuditSessionDB.id == session_id
|
||||
).first()
|
||||
|
||||
def _load_signoffs(self, session_id: str) -> List[AuditSignOffDB]:
|
||||
"""Load all sign-offs for a session."""
|
||||
return (
|
||||
self.db.query(AuditSignOffDB)
|
||||
.filter(AuditSignOffDB.session_id == session_id)
|
||||
.all()
|
||||
)
|
||||
|
||||
def _load_requirements(self, session: AuditSessionDB) -> List[RequirementDB]:
|
||||
"""Load requirements for a session based on filters."""
|
||||
query = self.db.query(RequirementDB).join(RegulationDB)
|
||||
|
||||
if session.regulation_ids:
|
||||
query = query.filter(RegulationDB.code.in_(session.regulation_ids))
|
||||
|
||||
return query.order_by(RegulationDB.code, RequirementDB.article).all()
|
||||
|
||||
def _calculate_statistics(
|
||||
self,
|
||||
session: AuditSessionDB,
|
||||
signoffs: List[AuditSignOffDB],
|
||||
) -> Dict[str, Any]:
|
||||
"""Calculate audit statistics."""
|
||||
total = session.total_items
|
||||
completed = len(signoffs)
|
||||
|
||||
compliant = sum(1 for s in signoffs if s.result == AuditResultEnum.COMPLIANT)
|
||||
compliant_notes = sum(1 for s in signoffs if s.result == AuditResultEnum.COMPLIANT_WITH_NOTES)
|
||||
non_compliant = sum(1 for s in signoffs if s.result == AuditResultEnum.NON_COMPLIANT)
|
||||
not_applicable = sum(1 for s in signoffs if s.result == AuditResultEnum.NOT_APPLICABLE)
|
||||
pending = total - completed
|
||||
|
||||
# Calculate compliance rate (excluding N/A and pending)
|
||||
applicable = compliant + compliant_notes + non_compliant
|
||||
compliance_rate = ((compliant + compliant_notes) / applicable * 100) if applicable > 0 else 0
|
||||
|
||||
return {
|
||||
'total': total,
|
||||
'completed': completed,
|
||||
'pending': pending,
|
||||
'compliant': compliant,
|
||||
'compliant_notes': compliant_notes,
|
||||
'non_compliant': non_compliant,
|
||||
'not_applicable': not_applicable,
|
||||
'completion_percentage': round((completed / total * 100) if total > 0 else 0, 1),
|
||||
'compliance_rate': round(compliance_rate, 1),
|
||||
'traffic_light': self._determine_traffic_light(compliance_rate, pending, total),
|
||||
}
|
||||
|
||||
def _determine_traffic_light(
|
||||
self,
|
||||
compliance_rate: float,
|
||||
pending: int,
|
||||
total: int,
|
||||
) -> str:
|
||||
"""Determine traffic light status."""
|
||||
pending_ratio = pending / total if total > 0 else 0
|
||||
|
||||
if pending_ratio > 0.3:
|
||||
return 'yellow' # Too many pending items
|
||||
elif compliance_rate >= 90:
|
||||
return 'green'
|
||||
elif compliance_rate >= 70:
|
||||
return 'yellow'
|
||||
else:
|
||||
return 'red'
|
||||
|
||||
# =========================================================================
|
||||
# Build Page Sections
|
||||
# =========================================================================
|
||||
|
||||
def _build_cover_page(
|
||||
self,
|
||||
session: AuditSessionDB,
|
||||
language: str,
|
||||
) -> List:
|
||||
"""Build the cover page."""
|
||||
story = []
|
||||
|
||||
# Title
|
||||
title = 'AUDIT-BERICHT' if language == 'de' else 'AUDIT REPORT'
|
||||
story.append(Spacer(1, 30*mm))
|
||||
story.append(Paragraph(title, self.styles['Title']))
|
||||
|
||||
# Session name
|
||||
story.append(Paragraph(session.name, self.styles['Subtitle']))
|
||||
story.append(Spacer(1, 15*mm))
|
||||
|
||||
# Horizontal rule
|
||||
story.append(HRFlowable(
|
||||
width="80%",
|
||||
thickness=1,
|
||||
color=COLORS['accent'],
|
||||
spaceAfter=15*mm,
|
||||
))
|
||||
|
||||
# Metadata table
|
||||
labels = {
|
||||
'de': {
|
||||
'auditor': 'Auditor',
|
||||
'organization': 'Organisation',
|
||||
'status': 'Status',
|
||||
'created': 'Erstellt am',
|
||||
'started': 'Gestartet am',
|
||||
'completed': 'Abgeschlossen am',
|
||||
'regulations': 'Verordnungen',
|
||||
},
|
||||
'en': {
|
||||
'auditor': 'Auditor',
|
||||
'organization': 'Organization',
|
||||
'status': 'Status',
|
||||
'created': 'Created',
|
||||
'started': 'Started',
|
||||
'completed': 'Completed',
|
||||
'regulations': 'Regulations',
|
||||
},
|
||||
}
|
||||
l = labels.get(language, labels['de'])
|
||||
|
||||
status_map = {
|
||||
'draft': 'Entwurf' if language == 'de' else 'Draft',
|
||||
'in_progress': 'In Bearbeitung' if language == 'de' else 'In Progress',
|
||||
'completed': 'Abgeschlossen' if language == 'de' else 'Completed',
|
||||
'archived': 'Archiviert' if language == 'de' else 'Archived',
|
||||
}
|
||||
|
||||
data = [
|
||||
[l['auditor'], session.auditor_name],
|
||||
[l['organization'], session.auditor_organization or '-'],
|
||||
[l['status'], status_map.get(session.status.value, session.status.value)],
|
||||
[l['created'], session.created_at.strftime('%d.%m.%Y %H:%M') if session.created_at else '-'],
|
||||
[l['started'], session.started_at.strftime('%d.%m.%Y %H:%M') if session.started_at else '-'],
|
||||
[l['completed'], session.completed_at.strftime('%d.%m.%Y %H:%M') if session.completed_at else '-'],
|
||||
[l['regulations'], ', '.join(session.regulation_ids) if session.regulation_ids else 'Alle'],
|
||||
]
|
||||
|
||||
table = Table(data, colWidths=[50*mm, 100*mm])
|
||||
table.setStyle(TableStyle([
|
||||
('FONTNAME', (0, 0), (0, -1), 'Helvetica-Bold'),
|
||||
('FONTNAME', (1, 0), (1, -1), 'Helvetica'),
|
||||
('FONTSIZE', (0, 0), (-1, -1), 11),
|
||||
('TEXTCOLOR', (0, 0), (0, -1), COLORS['secondary']),
|
||||
('TEXTCOLOR', (1, 0), (1, -1), COLORS['black']),
|
||||
('BOTTOMPADDING', (0, 0), (-1, -1), 8),
|
||||
('TOPPADDING', (0, 0), (-1, -1), 8),
|
||||
('ALIGN', (0, 0), (0, -1), 'RIGHT'),
|
||||
('ALIGN', (1, 0), (1, -1), 'LEFT'),
|
||||
('VALIGN', (0, 0), (-1, -1), 'MIDDLE'),
|
||||
]))
|
||||
|
||||
story.append(table)
|
||||
story.append(Spacer(1, 20*mm))
|
||||
|
||||
# Description if available
|
||||
if session.description:
|
||||
desc_label = 'Beschreibung' if language == 'de' else 'Description'
|
||||
story.append(Paragraph(f"<b>{desc_label}:</b>", self.styles['Normal']))
|
||||
story.append(Paragraph(session.description, self.styles['Normal']))
|
||||
|
||||
# Generation timestamp
|
||||
story.append(Spacer(1, 30*mm))
|
||||
gen_label = 'Generiert am' if language == 'de' else 'Generated on'
|
||||
story.append(Paragraph(
|
||||
f"{gen_label}: {datetime.utcnow().strftime('%d.%m.%Y %H:%M')} UTC",
|
||||
self.styles['Footer']
|
||||
))
|
||||
|
||||
return story
|
||||
|
||||
def _build_executive_summary(
|
||||
self,
|
||||
session: AuditSessionDB,
|
||||
stats: Dict[str, Any],
|
||||
language: str,
|
||||
) -> List:
|
||||
"""Build the executive summary section."""
|
||||
story = []
|
||||
|
||||
title = 'ZUSAMMENFASSUNG' if language == 'de' else 'EXECUTIVE SUMMARY'
|
||||
story.append(Paragraph(title, self.styles['Heading1']))
|
||||
|
||||
# Traffic light status
|
||||
traffic_light = stats['traffic_light']
|
||||
tl_colors = {
|
||||
'green': COLORS['success'],
|
||||
'yellow': COLORS['warning'],
|
||||
'red': COLORS['danger'],
|
||||
}
|
||||
tl_labels = {
|
||||
'de': {'green': 'GUT', 'yellow': 'AUFMERKSAMKEIT', 'red': 'KRITISCH'},
|
||||
'en': {'green': 'GOOD', 'yellow': 'ATTENTION', 'red': 'CRITICAL'},
|
||||
}
|
||||
|
||||
# Create traffic light indicator
|
||||
tl_table = Table(
|
||||
[[tl_labels[language][traffic_light]]],
|
||||
colWidths=[60*mm],
|
||||
rowHeights=[15*mm],
|
||||
)
|
||||
tl_table.setStyle(TableStyle([
|
||||
('BACKGROUND', (0, 0), (0, 0), tl_colors[traffic_light]),
|
||||
('TEXTCOLOR', (0, 0), (0, 0), COLORS['white']),
|
||||
('FONTNAME', (0, 0), (0, 0), 'Helvetica-Bold'),
|
||||
('FONTSIZE', (0, 0), (0, 0), 16),
|
||||
('ALIGN', (0, 0), (0, 0), 'CENTER'),
|
||||
('VALIGN', (0, 0), (0, 0), 'MIDDLE'),
|
||||
('ROUNDEDCORNERS', [3, 3, 3, 3]),
|
||||
]))
|
||||
|
||||
story.append(tl_table)
|
||||
story.append(Spacer(1, 10*mm))
|
||||
|
||||
# Key metrics
|
||||
labels = {
|
||||
'de': {
|
||||
'completion': 'Abschlussrate',
|
||||
'compliance': 'Konformitaetsrate',
|
||||
'total': 'Gesamtanforderungen',
|
||||
'non_compliant': 'Nicht konform',
|
||||
'pending': 'Ausstehend',
|
||||
},
|
||||
'en': {
|
||||
'completion': 'Completion Rate',
|
||||
'compliance': 'Compliance Rate',
|
||||
'total': 'Total Requirements',
|
||||
'non_compliant': 'Non-Compliant',
|
||||
'pending': 'Pending',
|
||||
},
|
||||
}
|
||||
l = labels.get(language, labels['de'])
|
||||
|
||||
metrics_data = [
|
||||
[l['completion'], f"{stats['completion_percentage']}%"],
|
||||
[l['compliance'], f"{stats['compliance_rate']}%"],
|
||||
[l['total'], str(stats['total'])],
|
||||
[l['non_compliant'], str(stats['non_compliant'])],
|
||||
[l['pending'], str(stats['pending'])],
|
||||
]
|
||||
|
||||
metrics_table = Table(metrics_data, colWidths=[60*mm, 40*mm])
|
||||
metrics_table.setStyle(TableStyle([
|
||||
('FONTNAME', (0, 0), (0, -1), 'Helvetica-Bold'),
|
||||
('FONTNAME', (1, 0), (1, -1), 'Helvetica'),
|
||||
('FONTSIZE', (0, 0), (-1, -1), 12),
|
||||
('TEXTCOLOR', (0, 0), (0, -1), COLORS['secondary']),
|
||||
('TEXTCOLOR', (1, 0), (1, -1), COLORS['black']),
|
||||
('BOTTOMPADDING', (0, 0), (-1, -1), 6),
|
||||
('TOPPADDING', (0, 0), (-1, -1), 6),
|
||||
('ALIGN', (0, 0), (0, -1), 'LEFT'),
|
||||
('ALIGN', (1, 0), (1, -1), 'RIGHT'),
|
||||
('LINEABOVE', (0, 0), (-1, 0), 1, COLORS['light']),
|
||||
('LINEBELOW', (0, -1), (-1, -1), 1, COLORS['light']),
|
||||
]))
|
||||
|
||||
story.append(metrics_table)
|
||||
story.append(Spacer(1, 10*mm))
|
||||
|
||||
# Key findings
|
||||
findings_title = 'Wichtige Erkenntnisse' if language == 'de' else 'Key Findings'
|
||||
story.append(Paragraph(f"<b>{findings_title}:</b>", self.styles['Heading3']))
|
||||
|
||||
findings = self._generate_findings(stats, language)
|
||||
for finding in findings:
|
||||
story.append(Paragraph(f"• {finding}", self.styles['Normal']))
|
||||
|
||||
return story
|
||||
|
||||
def _generate_findings(self, stats: Dict[str, Any], language: str) -> List[str]:
|
||||
"""Generate key findings based on statistics."""
|
||||
findings = []
|
||||
|
||||
if language == 'de':
|
||||
if stats['non_compliant'] > 0:
|
||||
findings.append(
|
||||
f"{stats['non_compliant']} Anforderungen sind nicht konform und "
|
||||
f"erfordern Massnahmen."
|
||||
)
|
||||
if stats['pending'] > 0:
|
||||
findings.append(
|
||||
f"{stats['pending']} Anforderungen wurden noch nicht geprueft."
|
||||
)
|
||||
if stats['compliance_rate'] >= 90:
|
||||
findings.append(
|
||||
"Hohe Konformitaetsrate erreicht. Weiter so!"
|
||||
)
|
||||
elif stats['compliance_rate'] < 70:
|
||||
findings.append(
|
||||
"Konformitaetsrate unter 70%. Priorisierte Massnahmen erforderlich."
|
||||
)
|
||||
if stats['compliant_notes'] > 0:
|
||||
findings.append(
|
||||
f"{stats['compliant_notes']} Anforderungen sind konform mit Anmerkungen. "
|
||||
f"Verbesserungspotenzial identifiziert."
|
||||
)
|
||||
if not findings:
|
||||
findings.append("Audit vollstaendig abgeschlossen ohne kritische Befunde.")
|
||||
else:
|
||||
if stats['non_compliant'] > 0:
|
||||
findings.append(
|
||||
f"{stats['non_compliant']} requirements are non-compliant and "
|
||||
f"require action."
|
||||
)
|
||||
if stats['pending'] > 0:
|
||||
findings.append(
|
||||
f"{stats['pending']} requirements have not been reviewed yet."
|
||||
)
|
||||
if stats['compliance_rate'] >= 90:
|
||||
findings.append(
|
||||
"High compliance rate achieved. Keep up the good work!"
|
||||
)
|
||||
elif stats['compliance_rate'] < 70:
|
||||
findings.append(
|
||||
"Compliance rate below 70%. Prioritized actions required."
|
||||
)
|
||||
if stats['compliant_notes'] > 0:
|
||||
findings.append(
|
||||
f"{stats['compliant_notes']} requirements are compliant with notes. "
|
||||
f"Improvement potential identified."
|
||||
)
|
||||
if not findings:
|
||||
findings.append("Audit completed without critical findings.")
|
||||
|
||||
return findings
|
||||
|
||||
def _build_statistics_section(
|
||||
self,
|
||||
stats: Dict[str, Any],
|
||||
language: str,
|
||||
) -> List:
|
||||
"""Build the statistics overview section with pie chart."""
|
||||
story = []
|
||||
|
||||
title = 'STATISTIK-UEBERSICHT' if language == 'de' else 'STATISTICS OVERVIEW'
|
||||
story.append(Paragraph(title, self.styles['Heading1']))
|
||||
|
||||
# Create pie chart
|
||||
drawing = Drawing(200, 200)
|
||||
pie = Pie()
|
||||
pie.x = 50
|
||||
pie.y = 25
|
||||
pie.width = 100
|
||||
pie.height = 100
|
||||
|
||||
# Data for pie chart
|
||||
data = [
|
||||
stats['compliant'],
|
||||
stats['compliant_notes'],
|
||||
stats['non_compliant'],
|
||||
stats['not_applicable'],
|
||||
stats['pending'],
|
||||
]
|
||||
|
||||
# Only include non-zero values
|
||||
labels_de = ['Konform', 'Konform (Anm.)', 'Nicht konform', 'N/A', 'Ausstehend']
|
||||
labels_en = ['Compliant', 'Compliant (Notes)', 'Non-Compliant', 'N/A', 'Pending']
|
||||
labels = labels_de if language == 'de' else labels_en
|
||||
|
||||
pie_colors = [
|
||||
COLORS['success'],
|
||||
colors.HexColor('#68d391'),
|
||||
COLORS['danger'],
|
||||
COLORS['muted'],
|
||||
COLORS['warning'],
|
||||
]
|
||||
|
||||
# Filter out zero values
|
||||
filtered_data = []
|
||||
filtered_labels = []
|
||||
filtered_colors = []
|
||||
for i, val in enumerate(data):
|
||||
if val > 0:
|
||||
filtered_data.append(val)
|
||||
filtered_labels.append(labels[i])
|
||||
filtered_colors.append(pie_colors[i])
|
||||
|
||||
if filtered_data:
|
||||
pie.data = filtered_data
|
||||
pie.labels = filtered_labels
|
||||
pie.slices.strokeWidth = 0.5
|
||||
|
||||
for i, col in enumerate(filtered_colors):
|
||||
pie.slices[i].fillColor = col
|
||||
|
||||
drawing.add(pie)
|
||||
story.append(drawing)
|
||||
else:
|
||||
no_data = 'Keine Daten verfuegbar' if language == 'de' else 'No data available'
|
||||
story.append(Paragraph(no_data, self.styles['Normal']))
|
||||
|
||||
story.append(Spacer(1, 10*mm))
|
||||
|
||||
# Legend table
|
||||
legend_data = []
|
||||
for i, label in enumerate(labels):
|
||||
if data[i] > 0:
|
||||
count = data[i]
|
||||
pct = round(count / stats['total'] * 100, 1) if stats['total'] > 0 else 0
|
||||
legend_data.append([label, str(count), f"{pct}%"])
|
||||
|
||||
if legend_data:
|
||||
header = ['Status', 'Anzahl', '%'] if language == 'de' else ['Status', 'Count', '%']
|
||||
legend_table = Table([header] + legend_data, colWidths=[50*mm, 25*mm, 25*mm])
|
||||
legend_table.setStyle(TableStyle([
|
||||
('FONTNAME', (0, 0), (-1, 0), 'Helvetica-Bold'),
|
||||
('FONTNAME', (0, 1), (-1, -1), 'Helvetica'),
|
||||
('FONTSIZE', (0, 0), (-1, -1), 10),
|
||||
('BACKGROUND', (0, 0), (-1, 0), COLORS['light']),
|
||||
('ALIGN', (1, 0), (-1, -1), 'RIGHT'),
|
||||
('BOTTOMPADDING', (0, 0), (-1, -1), 5),
|
||||
('TOPPADDING', (0, 0), (-1, -1), 5),
|
||||
('GRID', (0, 0), (-1, -1), 0.5, COLORS['muted']),
|
||||
]))
|
||||
story.append(legend_table)
|
||||
|
||||
return story
|
||||
|
||||
def _build_checklist_section(
|
||||
self,
|
||||
session: AuditSessionDB,
|
||||
requirements: List[RequirementDB],
|
||||
signoff_map: Dict[str, AuditSignOffDB],
|
||||
language: str,
|
||||
) -> List:
|
||||
"""Build the detailed checklist section."""
|
||||
story = []
|
||||
|
||||
story.append(PageBreak())
|
||||
title = 'PRUEFUNGSCHECKLISTE' if language == 'de' else 'AUDIT CHECKLIST'
|
||||
story.append(Paragraph(title, self.styles['Heading1']))
|
||||
|
||||
# Group by regulation
|
||||
by_regulation = {}
|
||||
for req in requirements:
|
||||
reg_code = req.regulation.code if req.regulation else 'OTHER'
|
||||
if reg_code not in by_regulation:
|
||||
by_regulation[reg_code] = []
|
||||
by_regulation[reg_code].append(req)
|
||||
|
||||
result_labels = {
|
||||
'de': {
|
||||
'compliant': 'Konform',
|
||||
'compliant_notes': 'Konform (Anm.)',
|
||||
'non_compliant': 'Nicht konform',
|
||||
'not_applicable': 'N/A',
|
||||
'pending': 'Ausstehend',
|
||||
},
|
||||
'en': {
|
||||
'compliant': 'Compliant',
|
||||
'compliant_notes': 'Compliant (Notes)',
|
||||
'non_compliant': 'Non-Compliant',
|
||||
'not_applicable': 'N/A',
|
||||
'pending': 'Pending',
|
||||
},
|
||||
}
|
||||
labels = result_labels.get(language, result_labels['de'])
|
||||
|
||||
for reg_code, reqs in sorted(by_regulation.items()):
|
||||
story.append(Paragraph(reg_code, self.styles['Heading2']))
|
||||
|
||||
# Build table data
|
||||
header = ['Art.', 'Titel', 'Ergebnis', 'Signiert'] if language == 'de' else \
|
||||
['Art.', 'Title', 'Result', 'Signed']
|
||||
table_data = [header]
|
||||
|
||||
for req in reqs:
|
||||
signoff = signoff_map.get(req.id)
|
||||
result = signoff.result.value if signoff else 'pending'
|
||||
result_label = labels.get(result, result)
|
||||
signed = 'Ja' if (signoff and signoff.signature_hash) else '-'
|
||||
if language == 'en':
|
||||
signed = 'Yes' if (signoff and signoff.signature_hash) else '-'
|
||||
|
||||
# Truncate title if too long
|
||||
title_text = req.title[:50] + '...' if len(req.title) > 50 else req.title
|
||||
|
||||
table_data.append([
|
||||
req.article or '-',
|
||||
title_text,
|
||||
result_label,
|
||||
signed,
|
||||
])
|
||||
|
||||
table = Table(table_data, colWidths=[20*mm, 80*mm, 35*mm, 20*mm])
|
||||
|
||||
# Style rows based on result
|
||||
style_commands = [
|
||||
('FONTNAME', (0, 0), (-1, 0), 'Helvetica-Bold'),
|
||||
('FONTNAME', (0, 1), (-1, -1), 'Helvetica'),
|
||||
('FONTSIZE', (0, 0), (-1, -1), 9),
|
||||
('BACKGROUND', (0, 0), (-1, 0), COLORS['light']),
|
||||
('ALIGN', (0, 0), (-1, -1), 'LEFT'),
|
||||
('ALIGN', (2, 0), (3, -1), 'CENTER'),
|
||||
('BOTTOMPADDING', (0, 0), (-1, -1), 4),
|
||||
('TOPPADDING', (0, 0), (-1, -1), 4),
|
||||
('GRID', (0, 0), (-1, -1), 0.5, COLORS['muted']),
|
||||
('VALIGN', (0, 0), (-1, -1), 'TOP'),
|
||||
]
|
||||
|
||||
# Color code results
|
||||
for i, req in enumerate(reqs, start=1):
|
||||
signoff = signoff_map.get(req.id)
|
||||
if signoff:
|
||||
result = signoff.result.value
|
||||
if result == 'compliant':
|
||||
style_commands.append(('TEXTCOLOR', (2, i), (2, i), COLORS['success']))
|
||||
elif result == 'compliant_notes':
|
||||
style_commands.append(('TEXTCOLOR', (2, i), (2, i), colors.HexColor('#2f855a')))
|
||||
elif result == 'non_compliant':
|
||||
style_commands.append(('TEXTCOLOR', (2, i), (2, i), COLORS['danger']))
|
||||
else:
|
||||
style_commands.append(('TEXTCOLOR', (2, i), (2, i), COLORS['warning']))
|
||||
|
||||
table.setStyle(TableStyle(style_commands))
|
||||
story.append(table)
|
||||
story.append(Spacer(1, 5*mm))
|
||||
|
||||
return story
|
||||
|
||||
def _build_non_compliant_appendix(
|
||||
self,
|
||||
non_compliant: List[AuditSignOffDB],
|
||||
requirements: List[RequirementDB],
|
||||
language: str,
|
||||
) -> List:
|
||||
"""Build appendix with non-compliant items detail."""
|
||||
story = []
|
||||
|
||||
title = 'ANHANG: NICHT KONFORME ANFORDERUNGEN' if language == 'de' else \
|
||||
'APPENDIX: NON-COMPLIANT REQUIREMENTS'
|
||||
story.append(Paragraph(title, self.styles['Heading1']))
|
||||
|
||||
req_map = {r.id: r for r in requirements}
|
||||
|
||||
for i, signoff in enumerate(non_compliant, start=1):
|
||||
req = req_map.get(signoff.requirement_id)
|
||||
if not req:
|
||||
continue
|
||||
|
||||
# Requirement header
|
||||
story.append(Paragraph(
|
||||
f"<b>{i}. {req.regulation.code if req.regulation else ''} {req.article}</b>",
|
||||
self.styles['Heading3']
|
||||
))
|
||||
|
||||
story.append(Paragraph(f"<b>{req.title}</b>", self.styles['Normal']))
|
||||
|
||||
if req.description:
|
||||
desc = req.description[:500] + '...' if len(req.description) > 500 else req.description
|
||||
story.append(Paragraph(desc, self.styles['Small']))
|
||||
|
||||
# Notes from auditor
|
||||
if signoff.notes:
|
||||
notes_label = 'Auditor-Anmerkungen' if language == 'de' else 'Auditor Notes'
|
||||
story.append(Paragraph(f"<b>{notes_label}:</b>", self.styles['Normal']))
|
||||
story.append(Paragraph(signoff.notes, self.styles['Normal']))
|
||||
|
||||
story.append(Spacer(1, 5*mm))
|
||||
|
||||
return story
|
||||
|
||||
def _build_signature_section(
|
||||
self,
|
||||
signed_items: List[AuditSignOffDB],
|
||||
language: str,
|
||||
) -> List:
|
||||
"""Build section with digital signature verification."""
|
||||
story = []
|
||||
|
||||
title = 'DIGITALE SIGNATUREN' if language == 'de' else 'DIGITAL SIGNATURES'
|
||||
story.append(Paragraph(title, self.styles['Heading1']))
|
||||
|
||||
explanation = (
|
||||
'Die folgenden Pruefpunkte wurden digital signiert. '
|
||||
'Die SHA-256 Hashes dienen als unveraenderlicher Nachweis des Pruefergebnisses.'
|
||||
) if language == 'de' else (
|
||||
'The following audit items have been digitally signed. '
|
||||
'The SHA-256 hashes serve as immutable proof of the audit result.'
|
||||
)
|
||||
story.append(Paragraph(explanation, self.styles['Normal']))
|
||||
story.append(Spacer(1, 5*mm))
|
||||
|
||||
header = ['Anforderung', 'Signiert von', 'Datum', 'SHA-256 (gekuerzt)'] if language == 'de' else \
|
||||
['Requirement', 'Signed by', 'Date', 'SHA-256 (truncated)']
|
||||
|
||||
table_data = [header]
|
||||
for item in signed_items[:50]: # Limit to 50 entries
|
||||
table_data.append([
|
||||
item.requirement_id[:8] + '...',
|
||||
item.signed_by or '-',
|
||||
item.signed_at.strftime('%d.%m.%Y') if item.signed_at else '-',
|
||||
item.signature_hash[:16] + '...' if item.signature_hash else '-',
|
||||
])
|
||||
|
||||
table = Table(table_data, colWidths=[35*mm, 40*mm, 30*mm, 50*mm])
|
||||
table.setStyle(TableStyle([
|
||||
('FONTNAME', (0, 0), (-1, 0), 'Helvetica-Bold'),
|
||||
('FONTNAME', (0, 1), (-1, -1), 'Courier'),
|
||||
('FONTSIZE', (0, 0), (-1, -1), 8),
|
||||
('BACKGROUND', (0, 0), (-1, 0), COLORS['light']),
|
||||
('ALIGN', (0, 0), (-1, -1), 'LEFT'),
|
||||
('BOTTOMPADDING', (0, 0), (-1, -1), 3),
|
||||
('TOPPADDING', (0, 0), (-1, -1), 3),
|
||||
('GRID', (0, 0), (-1, -1), 0.5, COLORS['muted']),
|
||||
]))
|
||||
|
||||
story.append(table)
|
||||
|
||||
return story
|
||||
383
backend/compliance/services/auto_risk_updater.py
Normal file
383
backend/compliance/services/auto_risk_updater.py
Normal file
@@ -0,0 +1,383 @@
|
||||
"""
|
||||
Automatic Risk Update Service for Compliance Framework.
|
||||
|
||||
This service processes CI/CD security scan results and automatically:
|
||||
1. Updates Control status based on scan findings
|
||||
2. Adjusts Risk levels when critical CVEs are found
|
||||
3. Creates Evidence records from scan reports
|
||||
4. Generates alerts for significant findings
|
||||
|
||||
Sprint 6: CI/CD Evidence Collection (2026-01-18)
|
||||
"""
|
||||
|
||||
import logging
|
||||
from datetime import datetime
|
||||
from typing import Dict, List, Optional, Any
|
||||
from dataclasses import dataclass
|
||||
from enum import Enum
|
||||
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from ..db.models import (
|
||||
ControlDB, ControlStatusEnum,
|
||||
EvidenceDB, EvidenceStatusEnum,
|
||||
RiskDB, RiskLevelEnum,
|
||||
)
|
||||
from ..db.repository import ControlRepository, EvidenceRepository, RiskRepository
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class ScanType(str, Enum):
|
||||
"""Types of CI/CD security scans."""
|
||||
SAST = "sast" # Static Application Security Testing
|
||||
DEPENDENCY = "dependency" # Dependency/CVE scanning
|
||||
SECRET = "secret" # Secret detection
|
||||
CONTAINER = "container" # Container image scanning
|
||||
SBOM = "sbom" # Software Bill of Materials
|
||||
|
||||
|
||||
class FindingSeverity(str, Enum):
|
||||
"""Severity levels for security findings."""
|
||||
CRITICAL = "critical"
|
||||
HIGH = "high"
|
||||
MEDIUM = "medium"
|
||||
LOW = "low"
|
||||
INFO = "info"
|
||||
|
||||
|
||||
@dataclass
|
||||
class ScanResult:
|
||||
"""Represents a CI/CD scan result."""
|
||||
scan_type: ScanType
|
||||
tool: str
|
||||
timestamp: datetime
|
||||
commit_sha: str
|
||||
branch: str
|
||||
control_id: str # Mapped Control ID (e.g., SDLC-001)
|
||||
findings: Dict[str, int] # {"critical": 0, "high": 2, ...}
|
||||
raw_report: Optional[Dict] = None
|
||||
ci_job_id: Optional[str] = None
|
||||
|
||||
|
||||
@dataclass
|
||||
class RiskUpdateResult:
|
||||
"""Result of an automatic risk update."""
|
||||
control_id: str
|
||||
control_updated: bool
|
||||
old_status: Optional[str]
|
||||
new_status: Optional[str]
|
||||
evidence_created: bool
|
||||
evidence_id: Optional[str]
|
||||
risks_affected: List[str]
|
||||
alerts_generated: List[str]
|
||||
message: str
|
||||
|
||||
|
||||
# Mapping from Control IDs to scan types
|
||||
CONTROL_SCAN_MAPPING = {
|
||||
"SDLC-001": ScanType.SAST, # SAST Scanning
|
||||
"SDLC-002": ScanType.DEPENDENCY, # Dependency Scanning
|
||||
"SDLC-003": ScanType.SECRET, # Secret Detection
|
||||
"SDLC-006": ScanType.CONTAINER, # Container Scanning
|
||||
"CRA-001": ScanType.SBOM, # SBOM Generation
|
||||
}
|
||||
|
||||
|
||||
class AutoRiskUpdater:
|
||||
"""
|
||||
Automatically updates Controls and Risks based on CI/CD scan results.
|
||||
|
||||
Flow:
|
||||
1. Receive scan result from CI/CD pipeline
|
||||
2. Determine Control status based on findings
|
||||
3. Create Evidence record
|
||||
4. Update linked Risks if necessary
|
||||
5. Generate alerts for critical findings
|
||||
"""
|
||||
|
||||
def __init__(self, db: Session):
|
||||
self.db = db
|
||||
self.control_repo = ControlRepository(db)
|
||||
self.evidence_repo = EvidenceRepository(db)
|
||||
self.risk_repo = RiskRepository(db)
|
||||
|
||||
def process_scan_result(self, scan_result: ScanResult) -> RiskUpdateResult:
|
||||
"""
|
||||
Process a CI/CD scan result and update Compliance status.
|
||||
|
||||
Args:
|
||||
scan_result: The scan result from CI/CD pipeline
|
||||
|
||||
Returns:
|
||||
RiskUpdateResult with details of all updates made
|
||||
"""
|
||||
logger.info(f"Processing {scan_result.scan_type.value} scan for control {scan_result.control_id}")
|
||||
|
||||
# Find the Control
|
||||
control = self.control_repo.get_by_control_id(scan_result.control_id)
|
||||
if not control:
|
||||
logger.warning(f"Control {scan_result.control_id} not found")
|
||||
return RiskUpdateResult(
|
||||
control_id=scan_result.control_id,
|
||||
control_updated=False,
|
||||
old_status=None,
|
||||
new_status=None,
|
||||
evidence_created=False,
|
||||
evidence_id=None,
|
||||
risks_affected=[],
|
||||
alerts_generated=[],
|
||||
message=f"Control {scan_result.control_id} not found"
|
||||
)
|
||||
|
||||
old_status = control.status.value if control.status else "unknown"
|
||||
|
||||
# Determine new Control status based on findings
|
||||
new_status = self._determine_control_status(scan_result.findings)
|
||||
|
||||
# Update Control status
|
||||
control_updated = False
|
||||
if new_status != old_status:
|
||||
control.status = ControlStatusEnum(new_status)
|
||||
control.status_notes = self._generate_status_notes(scan_result)
|
||||
control.updated_at = datetime.utcnow()
|
||||
control_updated = True
|
||||
logger.info(f"Control {scan_result.control_id} status changed: {old_status} -> {new_status}")
|
||||
|
||||
# Create Evidence record
|
||||
evidence = self._create_evidence(control, scan_result)
|
||||
|
||||
# Update linked Risks
|
||||
risks_affected = self._update_linked_risks(control, new_status, scan_result.findings)
|
||||
|
||||
# Generate alerts for critical findings
|
||||
alerts = self._generate_alerts(scan_result, new_status)
|
||||
|
||||
# Commit all changes
|
||||
self.db.commit()
|
||||
|
||||
return RiskUpdateResult(
|
||||
control_id=scan_result.control_id,
|
||||
control_updated=control_updated,
|
||||
old_status=old_status,
|
||||
new_status=new_status,
|
||||
evidence_created=True,
|
||||
evidence_id=evidence.id,
|
||||
risks_affected=risks_affected,
|
||||
alerts_generated=alerts,
|
||||
message=f"Processed {scan_result.scan_type.value} scan successfully"
|
||||
)
|
||||
|
||||
def _determine_control_status(self, findings: Dict[str, int]) -> str:
|
||||
"""
|
||||
Determine Control status based on security findings.
|
||||
|
||||
Rules:
|
||||
- Any CRITICAL findings -> fail
|
||||
- >5 HIGH findings -> fail
|
||||
- 1-5 HIGH findings -> partial
|
||||
- Only MEDIUM/LOW findings -> pass (with notes)
|
||||
- No findings -> pass
|
||||
"""
|
||||
critical = findings.get("critical", 0)
|
||||
high = findings.get("high", 0)
|
||||
medium = findings.get("medium", 0)
|
||||
|
||||
if critical > 0:
|
||||
return ControlStatusEnum.FAIL.value
|
||||
elif high > 5:
|
||||
return ControlStatusEnum.FAIL.value
|
||||
elif high > 0:
|
||||
return ControlStatusEnum.PARTIAL.value
|
||||
elif medium > 10:
|
||||
return ControlStatusEnum.PARTIAL.value
|
||||
else:
|
||||
return ControlStatusEnum.PASS.value
|
||||
|
||||
def _generate_status_notes(self, scan_result: ScanResult) -> str:
|
||||
"""Generate human-readable status notes from scan result."""
|
||||
findings = scan_result.findings
|
||||
parts = []
|
||||
|
||||
if findings.get("critical", 0) > 0:
|
||||
parts.append(f"{findings['critical']} CRITICAL")
|
||||
if findings.get("high", 0) > 0:
|
||||
parts.append(f"{findings['high']} HIGH")
|
||||
if findings.get("medium", 0) > 0:
|
||||
parts.append(f"{findings['medium']} MEDIUM")
|
||||
|
||||
if parts:
|
||||
findings_str = ", ".join(parts)
|
||||
return f"Auto-updated from {scan_result.tool} scan ({scan_result.timestamp.strftime('%Y-%m-%d %H:%M')}): {findings_str} findings"
|
||||
else:
|
||||
return f"Auto-updated from {scan_result.tool} scan ({scan_result.timestamp.strftime('%Y-%m-%d %H:%M')}): No significant findings"
|
||||
|
||||
def _create_evidence(self, control: ControlDB, scan_result: ScanResult) -> EvidenceDB:
|
||||
"""Create an Evidence record from the scan result."""
|
||||
from uuid import uuid4
|
||||
|
||||
evidence = EvidenceDB(
|
||||
id=str(uuid4()),
|
||||
control_id=control.id,
|
||||
evidence_type=f"{scan_result.scan_type.value}_report",
|
||||
title=f"{scan_result.tool} Scan - {scan_result.timestamp.strftime('%Y-%m-%d')}",
|
||||
description=self._generate_status_notes(scan_result),
|
||||
source="ci_pipeline",
|
||||
ci_job_id=scan_result.ci_job_id,
|
||||
status=EvidenceStatusEnum.VALID,
|
||||
valid_from=datetime.utcnow(),
|
||||
collected_at=scan_result.timestamp,
|
||||
)
|
||||
|
||||
self.db.add(evidence)
|
||||
logger.info(f"Created evidence {evidence.id} for control {control.control_id}")
|
||||
|
||||
return evidence
|
||||
|
||||
def _update_linked_risks(
|
||||
self,
|
||||
control: ControlDB,
|
||||
new_status: str,
|
||||
findings: Dict[str, int]
|
||||
) -> List[str]:
|
||||
"""
|
||||
Update Risks that are mitigated by this Control.
|
||||
|
||||
When a Control fails:
|
||||
- Increase residual risk of linked Risks
|
||||
- Update risk status to "open" if was "mitigated"
|
||||
|
||||
When a Control passes:
|
||||
- Decrease residual risk if appropriate
|
||||
"""
|
||||
affected_risks = []
|
||||
|
||||
# Find all Risks that list this Control as a mitigating control
|
||||
all_risks = self.risk_repo.get_all()
|
||||
|
||||
for risk in all_risks:
|
||||
if not risk.mitigating_controls:
|
||||
continue
|
||||
|
||||
mitigating_ids = risk.mitigating_controls
|
||||
if control.control_id not in mitigating_ids:
|
||||
continue
|
||||
|
||||
# This Risk is linked to the affected Control
|
||||
risk_updated = False
|
||||
|
||||
if new_status == ControlStatusEnum.FAIL.value:
|
||||
# Control failed - increase risk
|
||||
if risk.status == "mitigated":
|
||||
risk.status = "open"
|
||||
risk_updated = True
|
||||
|
||||
# Increase residual likelihood if critical findings
|
||||
if findings.get("critical", 0) > 0:
|
||||
old_likelihood = risk.residual_likelihood or risk.likelihood
|
||||
risk.residual_likelihood = min(5, old_likelihood + 1)
|
||||
risk.residual_risk = RiskDB.calculate_risk_level(
|
||||
risk.residual_likelihood,
|
||||
risk.residual_impact or risk.impact
|
||||
)
|
||||
risk_updated = True
|
||||
|
||||
elif new_status == ControlStatusEnum.PASS.value:
|
||||
# Control passed - potentially reduce risk
|
||||
if risk.status == "open":
|
||||
# Check if all mitigating controls are passing
|
||||
all_passing = True
|
||||
for ctrl_id in mitigating_ids:
|
||||
other_ctrl = self.control_repo.get_by_control_id(ctrl_id)
|
||||
if other_ctrl and other_ctrl.status != ControlStatusEnum.PASS:
|
||||
all_passing = False
|
||||
break
|
||||
|
||||
if all_passing:
|
||||
risk.status = "mitigated"
|
||||
risk_updated = True
|
||||
|
||||
if risk_updated:
|
||||
risk.last_assessed_at = datetime.utcnow()
|
||||
risk.updated_at = datetime.utcnow()
|
||||
affected_risks.append(risk.risk_id)
|
||||
logger.info(f"Updated risk {risk.risk_id} due to control {control.control_id} status change")
|
||||
|
||||
return affected_risks
|
||||
|
||||
def _generate_alerts(self, scan_result: ScanResult, new_status: str) -> List[str]:
|
||||
"""
|
||||
Generate alerts for significant findings.
|
||||
|
||||
Alert conditions:
|
||||
- Any CRITICAL findings
|
||||
- Control status changed to FAIL
|
||||
- >10 HIGH findings in one scan
|
||||
"""
|
||||
alerts = []
|
||||
findings = scan_result.findings
|
||||
|
||||
if findings.get("critical", 0) > 0:
|
||||
alert_msg = f"CRITICAL: {findings['critical']} critical vulnerabilities found in {scan_result.tool} scan"
|
||||
alerts.append(alert_msg)
|
||||
logger.warning(alert_msg)
|
||||
|
||||
if new_status == ControlStatusEnum.FAIL.value:
|
||||
alert_msg = f"Control {scan_result.control_id} status changed to FAIL"
|
||||
alerts.append(alert_msg)
|
||||
logger.warning(alert_msg)
|
||||
|
||||
if findings.get("high", 0) > 10:
|
||||
alert_msg = f"HIGH: {findings['high']} high-severity findings in {scan_result.tool} scan"
|
||||
alerts.append(alert_msg)
|
||||
logger.warning(alert_msg)
|
||||
|
||||
return alerts
|
||||
|
||||
def process_evidence_collect_request(
|
||||
self,
|
||||
tool: str,
|
||||
control_id: str,
|
||||
evidence_type: str,
|
||||
timestamp: str,
|
||||
commit_sha: str,
|
||||
ci_job_id: Optional[str] = None,
|
||||
findings: Optional[Dict[str, int]] = None,
|
||||
**kwargs
|
||||
) -> RiskUpdateResult:
|
||||
"""
|
||||
Process an evidence collection request from CI/CD.
|
||||
|
||||
This is the main entry point for the /evidence/collect API endpoint.
|
||||
"""
|
||||
# Parse timestamp
|
||||
try:
|
||||
ts = datetime.fromisoformat(timestamp.replace('Z', '+00:00'))
|
||||
except (ValueError, AttributeError):
|
||||
ts = datetime.utcnow()
|
||||
|
||||
# Determine scan type from evidence_type
|
||||
scan_type = ScanType.SAST # Default
|
||||
for ctrl_id, stype in CONTROL_SCAN_MAPPING.items():
|
||||
if ctrl_id == control_id:
|
||||
scan_type = stype
|
||||
break
|
||||
|
||||
# Create ScanResult
|
||||
scan_result = ScanResult(
|
||||
scan_type=scan_type,
|
||||
tool=tool,
|
||||
timestamp=ts,
|
||||
commit_sha=commit_sha,
|
||||
branch=kwargs.get("branch", "unknown"),
|
||||
control_id=control_id,
|
||||
findings=findings or {"critical": 0, "high": 0, "medium": 0, "low": 0},
|
||||
ci_job_id=ci_job_id,
|
||||
)
|
||||
|
||||
return self.process_scan_result(scan_result)
|
||||
|
||||
|
||||
def create_auto_risk_updater(db: Session) -> AutoRiskUpdater:
|
||||
"""Factory function for creating AutoRiskUpdater instances."""
|
||||
return AutoRiskUpdater(db)
|
||||
616
backend/compliance/services/export_generator.py
Normal file
616
backend/compliance/services/export_generator.py
Normal file
@@ -0,0 +1,616 @@
|
||||
"""
|
||||
Audit Export Generator.
|
||||
|
||||
Generates ZIP packages for external auditors containing:
|
||||
- Regulations & Requirements
|
||||
- Control Catalogue with status
|
||||
- Evidence artifacts
|
||||
- Risk register
|
||||
- Summary reports
|
||||
"""
|
||||
|
||||
import hashlib
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import shutil
|
||||
import tempfile
|
||||
import zipfile
|
||||
from datetime import datetime, date
|
||||
from pathlib import Path
|
||||
from typing import Dict, List, Optional, Any
|
||||
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from ..db.models import (
|
||||
RegulationDB,
|
||||
RequirementDB,
|
||||
ControlDB,
|
||||
ControlMappingDB,
|
||||
EvidenceDB,
|
||||
RiskDB,
|
||||
AuditExportDB,
|
||||
ExportStatusEnum,
|
||||
ControlStatusEnum,
|
||||
)
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class AuditExportGenerator:
|
||||
"""Generates audit export packages."""
|
||||
|
||||
def __init__(self, db: Session, export_dir: str = "/tmp/compliance_exports"):
|
||||
self.db = db
|
||||
self.export_dir = Path(export_dir)
|
||||
self.export_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
def create_export(
|
||||
self,
|
||||
requested_by: str,
|
||||
export_type: str = "full",
|
||||
included_regulations: Optional[List[str]] = None,
|
||||
included_domains: Optional[List[str]] = None,
|
||||
date_range_start: Optional[date] = None,
|
||||
date_range_end: Optional[date] = None,
|
||||
) -> AuditExportDB:
|
||||
"""
|
||||
Create a new audit export.
|
||||
|
||||
Args:
|
||||
requested_by: User requesting the export
|
||||
export_type: "full", "controls_only", "evidence_only"
|
||||
included_regulations: Filter by regulation codes
|
||||
included_domains: Filter by control domains
|
||||
date_range_start: Evidence collected after this date
|
||||
date_range_end: Evidence collected before this date
|
||||
|
||||
Returns:
|
||||
AuditExportDB record
|
||||
"""
|
||||
# Create export record
|
||||
export_record = AuditExportDB(
|
||||
export_type=export_type,
|
||||
export_name=f"Breakpilot Compliance Export {datetime.now().strftime('%Y-%m-%d %H:%M')}",
|
||||
included_regulations=included_regulations,
|
||||
included_domains=included_domains,
|
||||
date_range_start=date_range_start,
|
||||
date_range_end=date_range_end,
|
||||
requested_by=requested_by,
|
||||
status=ExportStatusEnum.GENERATING,
|
||||
)
|
||||
self.db.add(export_record)
|
||||
self.db.flush()
|
||||
|
||||
try:
|
||||
# Generate the export
|
||||
file_path, file_hash, file_size = self._generate_zip(
|
||||
export_record.id,
|
||||
export_type,
|
||||
included_regulations,
|
||||
included_domains,
|
||||
date_range_start,
|
||||
date_range_end,
|
||||
)
|
||||
|
||||
# Update record with results
|
||||
export_record.file_path = str(file_path)
|
||||
export_record.file_hash = file_hash
|
||||
export_record.file_size_bytes = file_size
|
||||
export_record.status = ExportStatusEnum.COMPLETED
|
||||
export_record.completed_at = datetime.utcnow()
|
||||
|
||||
# Calculate statistics
|
||||
stats = self._calculate_statistics(
|
||||
included_regulations, included_domains
|
||||
)
|
||||
export_record.total_controls = stats["total_controls"]
|
||||
export_record.total_evidence = stats["total_evidence"]
|
||||
export_record.compliance_score = stats["compliance_score"]
|
||||
|
||||
self.db.commit()
|
||||
logger.info(f"Export completed: {file_path}")
|
||||
return export_record
|
||||
|
||||
except Exception as e:
|
||||
export_record.status = ExportStatusEnum.FAILED
|
||||
export_record.error_message = str(e)
|
||||
self.db.commit()
|
||||
logger.error(f"Export failed: {e}")
|
||||
raise
|
||||
|
||||
def _generate_zip(
|
||||
self,
|
||||
export_id: str,
|
||||
export_type: str,
|
||||
included_regulations: Optional[List[str]],
|
||||
included_domains: Optional[List[str]],
|
||||
date_range_start: Optional[date],
|
||||
date_range_end: Optional[date],
|
||||
) -> tuple:
|
||||
"""Generate the actual ZIP file."""
|
||||
timestamp = datetime.now().strftime("%Y-%m-%d_%H%M%S")
|
||||
zip_filename = f"audit_export_{timestamp}.zip"
|
||||
zip_path = self.export_dir / zip_filename
|
||||
|
||||
with tempfile.TemporaryDirectory() as temp_dir:
|
||||
temp_path = Path(temp_dir)
|
||||
|
||||
# Create directory structure
|
||||
(temp_path / "regulations").mkdir()
|
||||
(temp_path / "controls").mkdir()
|
||||
(temp_path / "evidence").mkdir()
|
||||
(temp_path / "risks").mkdir()
|
||||
|
||||
# Generate content based on export type
|
||||
if export_type in ["full", "controls_only"]:
|
||||
self._export_regulations(temp_path / "regulations", included_regulations)
|
||||
self._export_controls(temp_path / "controls", included_domains)
|
||||
|
||||
if export_type in ["full", "evidence_only"]:
|
||||
self._export_evidence(
|
||||
temp_path / "evidence",
|
||||
included_domains,
|
||||
date_range_start,
|
||||
date_range_end,
|
||||
)
|
||||
|
||||
if export_type == "full":
|
||||
self._export_risks(temp_path / "risks")
|
||||
|
||||
# Generate summary
|
||||
self._export_summary(
|
||||
temp_path,
|
||||
export_type,
|
||||
included_regulations,
|
||||
included_domains,
|
||||
)
|
||||
|
||||
# Generate README
|
||||
self._export_readme(temp_path)
|
||||
|
||||
# Generate index.html for navigation
|
||||
self._export_index_html(temp_path)
|
||||
|
||||
# Create ZIP
|
||||
with zipfile.ZipFile(zip_path, "w", zipfile.ZIP_DEFLATED) as zf:
|
||||
for file_path in temp_path.rglob("*"):
|
||||
if file_path.is_file():
|
||||
arcname = file_path.relative_to(temp_path)
|
||||
zf.write(file_path, arcname)
|
||||
|
||||
# Calculate hash
|
||||
file_hash = self._calculate_file_hash(zip_path)
|
||||
file_size = zip_path.stat().st_size
|
||||
|
||||
return zip_path, file_hash, file_size
|
||||
|
||||
def _export_regulations(
|
||||
self, output_dir: Path, included_regulations: Optional[List[str]]
|
||||
) -> None:
|
||||
"""Export regulations to JSON files."""
|
||||
query = self.db.query(RegulationDB).filter(RegulationDB.is_active == True)
|
||||
if included_regulations:
|
||||
query = query.filter(RegulationDB.code.in_(included_regulations))
|
||||
|
||||
regulations = query.all()
|
||||
|
||||
for reg in regulations:
|
||||
# Get requirements for this regulation
|
||||
requirements = self.db.query(RequirementDB).filter(
|
||||
RequirementDB.regulation_id == reg.id
|
||||
).all()
|
||||
|
||||
data = {
|
||||
"code": reg.code,
|
||||
"name": reg.name,
|
||||
"full_name": reg.full_name,
|
||||
"type": reg.regulation_type.value if reg.regulation_type else None,
|
||||
"source_url": reg.source_url,
|
||||
"effective_date": reg.effective_date.isoformat() if reg.effective_date else None,
|
||||
"description": reg.description,
|
||||
"requirements": [
|
||||
{
|
||||
"article": r.article,
|
||||
"paragraph": r.paragraph,
|
||||
"title": r.title,
|
||||
"description": r.description,
|
||||
"is_applicable": r.is_applicable,
|
||||
"breakpilot_interpretation": r.breakpilot_interpretation,
|
||||
}
|
||||
for r in requirements
|
||||
],
|
||||
}
|
||||
|
||||
file_path = output_dir / f"{reg.code.lower()}.json"
|
||||
with open(file_path, "w", encoding="utf-8") as f:
|
||||
json.dump(data, f, indent=2, ensure_ascii=False)
|
||||
|
||||
def _export_controls(
|
||||
self, output_dir: Path, included_domains: Optional[List[str]]
|
||||
) -> None:
|
||||
"""Export controls to JSON and generate summary."""
|
||||
query = self.db.query(ControlDB)
|
||||
if included_domains:
|
||||
from ..db.models import ControlDomainEnum
|
||||
domain_enums = [ControlDomainEnum(d) for d in included_domains]
|
||||
query = query.filter(ControlDB.domain.in_(domain_enums))
|
||||
|
||||
controls = query.order_by(ControlDB.control_id).all()
|
||||
|
||||
controls_data = []
|
||||
for ctrl in controls:
|
||||
# Get mappings
|
||||
mappings = self.db.query(ControlMappingDB).filter(
|
||||
ControlMappingDB.control_id == ctrl.id
|
||||
).all()
|
||||
|
||||
# Get requirement references
|
||||
requirement_refs = []
|
||||
for m in mappings:
|
||||
req = self.db.query(RequirementDB).get(m.requirement_id)
|
||||
if req:
|
||||
reg = self.db.query(RegulationDB).get(req.regulation_id)
|
||||
requirement_refs.append({
|
||||
"regulation": reg.code if reg else None,
|
||||
"article": req.article,
|
||||
"paragraph": req.paragraph,
|
||||
"coverage": m.coverage_level,
|
||||
})
|
||||
|
||||
ctrl_data = {
|
||||
"control_id": ctrl.control_id,
|
||||
"domain": ctrl.domain.value if ctrl.domain else None,
|
||||
"type": ctrl.control_type.value if ctrl.control_type else None,
|
||||
"title": ctrl.title,
|
||||
"description": ctrl.description,
|
||||
"pass_criteria": ctrl.pass_criteria,
|
||||
"status": ctrl.status.value if ctrl.status else None,
|
||||
"is_automated": ctrl.is_automated,
|
||||
"automation_tool": ctrl.automation_tool,
|
||||
"owner": ctrl.owner,
|
||||
"last_reviewed": ctrl.last_reviewed_at.isoformat() if ctrl.last_reviewed_at else None,
|
||||
"code_reference": ctrl.code_reference,
|
||||
"mapped_requirements": requirement_refs,
|
||||
}
|
||||
controls_data.append(ctrl_data)
|
||||
|
||||
# Write full catalogue
|
||||
with open(output_dir / "control_catalogue.json", "w", encoding="utf-8") as f:
|
||||
json.dump(controls_data, f, indent=2, ensure_ascii=False)
|
||||
|
||||
# Write summary by domain
|
||||
domain_summary = {}
|
||||
for ctrl in controls_data:
|
||||
domain = ctrl["domain"]
|
||||
if domain not in domain_summary:
|
||||
domain_summary[domain] = {"total": 0, "pass": 0, "partial": 0, "fail": 0}
|
||||
domain_summary[domain]["total"] += 1
|
||||
status = ctrl["status"]
|
||||
if status in domain_summary[domain]:
|
||||
domain_summary[domain][status] += 1
|
||||
|
||||
with open(output_dir / "domain_summary.json", "w", encoding="utf-8") as f:
|
||||
json.dump(domain_summary, f, indent=2, ensure_ascii=False)
|
||||
|
||||
def _export_evidence(
|
||||
self,
|
||||
output_dir: Path,
|
||||
included_domains: Optional[List[str]],
|
||||
date_range_start: Optional[date],
|
||||
date_range_end: Optional[date],
|
||||
) -> None:
|
||||
"""Export evidence metadata and files."""
|
||||
query = self.db.query(EvidenceDB)
|
||||
|
||||
if date_range_start:
|
||||
query = query.filter(EvidenceDB.collected_at >= datetime.combine(date_range_start, datetime.min.time()))
|
||||
if date_range_end:
|
||||
query = query.filter(EvidenceDB.collected_at <= datetime.combine(date_range_end, datetime.max.time()))
|
||||
|
||||
if included_domains:
|
||||
from ..db.models import ControlDomainEnum
|
||||
domain_enums = [ControlDomainEnum(d) for d in included_domains]
|
||||
query = query.join(ControlDB).filter(ControlDB.domain.in_(domain_enums))
|
||||
|
||||
evidence_list = query.all()
|
||||
|
||||
evidence_data = []
|
||||
for ev in evidence_list:
|
||||
ctrl = self.db.query(ControlDB).get(ev.control_id)
|
||||
|
||||
ev_data = {
|
||||
"id": ev.id,
|
||||
"control_id": ctrl.control_id if ctrl else None,
|
||||
"evidence_type": ev.evidence_type,
|
||||
"title": ev.title,
|
||||
"description": ev.description,
|
||||
"artifact_path": ev.artifact_path,
|
||||
"artifact_url": ev.artifact_url,
|
||||
"artifact_hash": ev.artifact_hash,
|
||||
"status": ev.status.value if ev.status else None,
|
||||
"valid_from": ev.valid_from.isoformat() if ev.valid_from else None,
|
||||
"valid_until": ev.valid_until.isoformat() if ev.valid_until else None,
|
||||
"collected_at": ev.collected_at.isoformat() if ev.collected_at else None,
|
||||
"source": ev.source,
|
||||
}
|
||||
evidence_data.append(ev_data)
|
||||
|
||||
# Copy evidence files if they exist
|
||||
if ev.artifact_path and os.path.exists(ev.artifact_path):
|
||||
evidence_subdir = output_dir / ev.evidence_type
|
||||
evidence_subdir.mkdir(exist_ok=True)
|
||||
filename = os.path.basename(ev.artifact_path)
|
||||
shutil.copy2(ev.artifact_path, evidence_subdir / filename)
|
||||
|
||||
with open(output_dir / "evidence_index.json", "w", encoding="utf-8") as f:
|
||||
json.dump(evidence_data, f, indent=2, ensure_ascii=False)
|
||||
|
||||
def _export_risks(self, output_dir: Path) -> None:
|
||||
"""Export risk register."""
|
||||
risks = self.db.query(RiskDB).order_by(RiskDB.risk_id).all()
|
||||
|
||||
risks_data = []
|
||||
for risk in risks:
|
||||
risk_data = {
|
||||
"risk_id": risk.risk_id,
|
||||
"title": risk.title,
|
||||
"description": risk.description,
|
||||
"category": risk.category,
|
||||
"likelihood": risk.likelihood,
|
||||
"impact": risk.impact,
|
||||
"inherent_risk": risk.inherent_risk.value if risk.inherent_risk else None,
|
||||
"mitigating_controls": risk.mitigating_controls,
|
||||
"residual_likelihood": risk.residual_likelihood,
|
||||
"residual_impact": risk.residual_impact,
|
||||
"residual_risk": risk.residual_risk.value if risk.residual_risk else None,
|
||||
"owner": risk.owner,
|
||||
"status": risk.status,
|
||||
"treatment_plan": risk.treatment_plan,
|
||||
}
|
||||
risks_data.append(risk_data)
|
||||
|
||||
with open(output_dir / "risk_register.json", "w", encoding="utf-8") as f:
|
||||
json.dump(risks_data, f, indent=2, ensure_ascii=False)
|
||||
|
||||
def _export_summary(
|
||||
self,
|
||||
output_dir: Path,
|
||||
export_type: str,
|
||||
included_regulations: Optional[List[str]],
|
||||
included_domains: Optional[List[str]],
|
||||
) -> None:
|
||||
"""Generate summary.json with overall statistics."""
|
||||
stats = self._calculate_statistics(included_regulations, included_domains)
|
||||
|
||||
summary = {
|
||||
"export_date": datetime.now().isoformat(),
|
||||
"export_type": export_type,
|
||||
"filters": {
|
||||
"regulations": included_regulations,
|
||||
"domains": included_domains,
|
||||
},
|
||||
"statistics": stats,
|
||||
"organization": "Breakpilot",
|
||||
"version": "1.0.0",
|
||||
}
|
||||
|
||||
with open(output_dir / "summary.json", "w", encoding="utf-8") as f:
|
||||
json.dump(summary, f, indent=2, ensure_ascii=False)
|
||||
|
||||
def _export_readme(self, output_dir: Path) -> None:
|
||||
"""Generate README.md for auditors."""
|
||||
readme = """# Breakpilot Compliance Export
|
||||
|
||||
Dieses Paket enthält die Compliance-Dokumentation von Breakpilot.
|
||||
|
||||
## Struktur
|
||||
|
||||
```
|
||||
├── summary.json # Zusammenfassung und Statistiken
|
||||
├── index.html # HTML-Navigation (im Browser öffnen)
|
||||
├── regulations/ # Verordnungen und Anforderungen
|
||||
│ ├── gdpr.json
|
||||
│ ├── aiact.json
|
||||
│ └── ...
|
||||
├── controls/ # Control Catalogue
|
||||
│ ├── control_catalogue.json
|
||||
│ └── domain_summary.json
|
||||
├── evidence/ # Nachweise
|
||||
│ ├── evidence_index.json
|
||||
│ └── [evidence_type]/
|
||||
└── risks/ # Risikoregister
|
||||
└── risk_register.json
|
||||
```
|
||||
|
||||
## Verwendung
|
||||
|
||||
1. **HTML-Navigation**: Öffnen Sie `index.html` im Browser für eine visuelle Übersicht.
|
||||
2. **JSON-Dateien**: Maschinenlesbare Daten für Import in GRC-Tools.
|
||||
3. **Nachweis-Dateien**: Originale Scan-Reports und Konfigurationen.
|
||||
|
||||
## Kontakt
|
||||
|
||||
Bei Fragen wenden Sie sich an das Breakpilot Security Team.
|
||||
|
||||
---
|
||||
Generiert am: """ + datetime.now().strftime("%Y-%m-%d %H:%M:%S")
|
||||
|
||||
with open(output_dir / "README.md", "w", encoding="utf-8") as f:
|
||||
f.write(readme)
|
||||
|
||||
def _export_index_html(self, output_dir: Path) -> None:
|
||||
"""Generate index.html for browser navigation."""
|
||||
html = """<!DOCTYPE html>
|
||||
<html lang="de">
|
||||
<head>
|
||||
<meta charset="UTF-8">
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||
<title>Breakpilot Compliance Export</title>
|
||||
<style>
|
||||
body { font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, sans-serif; max-width: 1200px; margin: 0 auto; padding: 2rem; background: #f5f5f5; }
|
||||
h1 { color: #1a1a1a; border-bottom: 3px solid #0066cc; padding-bottom: 1rem; }
|
||||
h2 { color: #333; margin-top: 2rem; }
|
||||
.card { background: white; border-radius: 8px; padding: 1.5rem; margin: 1rem 0; box-shadow: 0 2px 4px rgba(0,0,0,0.1); }
|
||||
.stats { display: grid; grid-template-columns: repeat(auto-fit, minmax(200px, 1fr)); gap: 1rem; }
|
||||
.stat { background: linear-gradient(135deg, #0066cc, #004499); color: white; padding: 1.5rem; border-radius: 8px; text-align: center; }
|
||||
.stat-value { font-size: 2.5rem; font-weight: bold; }
|
||||
.stat-label { opacity: 0.9; margin-top: 0.5rem; }
|
||||
ul { list-style: none; padding: 0; }
|
||||
li { padding: 0.75rem; border-bottom: 1px solid #eee; }
|
||||
li:last-child { border-bottom: none; }
|
||||
a { color: #0066cc; text-decoration: none; }
|
||||
a:hover { text-decoration: underline; }
|
||||
.footer { margin-top: 3rem; padding-top: 1rem; border-top: 1px solid #ddd; color: #666; font-size: 0.9rem; }
|
||||
</style>
|
||||
</head>
|
||||
<body>
|
||||
<h1>Breakpilot Compliance Export</h1>
|
||||
|
||||
<div class="stats">
|
||||
<div class="stat">
|
||||
<div class="stat-value" id="score">--%</div>
|
||||
<div class="stat-label">Compliance Score</div>
|
||||
</div>
|
||||
<div class="stat">
|
||||
<div class="stat-value" id="controls">--</div>
|
||||
<div class="stat-label">Controls</div>
|
||||
</div>
|
||||
<div class="stat">
|
||||
<div class="stat-value" id="evidence">--</div>
|
||||
<div class="stat-label">Evidence Items</div>
|
||||
</div>
|
||||
<div class="stat">
|
||||
<div class="stat-value" id="regulations">--</div>
|
||||
<div class="stat-label">Regulations</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div class="card">
|
||||
<h2>Regulations & Requirements</h2>
|
||||
<ul id="regulations-list">
|
||||
<li>Loading...</li>
|
||||
</ul>
|
||||
</div>
|
||||
|
||||
<div class="card">
|
||||
<h2>Controls by Domain</h2>
|
||||
<ul id="domains-list">
|
||||
<li>Loading...</li>
|
||||
</ul>
|
||||
</div>
|
||||
|
||||
<div class="card">
|
||||
<h2>Export Contents</h2>
|
||||
<ul>
|
||||
<li><a href="summary.json">summary.json</a> - Export metadata and statistics</li>
|
||||
<li><a href="controls/control_catalogue.json">controls/control_catalogue.json</a> - Full control catalogue</li>
|
||||
<li><a href="evidence/evidence_index.json">evidence/evidence_index.json</a> - Evidence index</li>
|
||||
<li><a href="risks/risk_register.json">risks/risk_register.json</a> - Risk register</li>
|
||||
</ul>
|
||||
</div>
|
||||
|
||||
<div class="footer">
|
||||
<p>Generated by Breakpilot Compliance Framework</p>
|
||||
</div>
|
||||
|
||||
<script>
|
||||
// Load summary and populate stats
|
||||
fetch('summary.json')
|
||||
.then(r => r.json())
|
||||
.then(data => {
|
||||
document.getElementById('score').textContent = (data.statistics.compliance_score || 0).toFixed(0) + '%';
|
||||
document.getElementById('controls').textContent = data.statistics.total_controls || 0;
|
||||
document.getElementById('evidence').textContent = data.statistics.total_evidence || 0;
|
||||
document.getElementById('regulations').textContent = data.statistics.total_regulations || 0;
|
||||
})
|
||||
.catch(() => console.log('Could not load summary'));
|
||||
|
||||
// Load regulations list
|
||||
const regsDir = 'regulations/';
|
||||
document.getElementById('regulations-list').innerHTML =
|
||||
'<li><a href="regulations/gdpr.json">GDPR</a> - Datenschutz-Grundverordnung</li>' +
|
||||
'<li><a href="regulations/aiact.json">AI Act</a> - KI-Verordnung</li>' +
|
||||
'<li><a href="regulations/cra.json">CRA</a> - Cyber Resilience Act</li>';
|
||||
|
||||
// Load domain summary
|
||||
fetch('controls/domain_summary.json')
|
||||
.then(r => r.json())
|
||||
.then(data => {
|
||||
const list = document.getElementById('domains-list');
|
||||
list.innerHTML = Object.entries(data).map(([domain, stats]) =>
|
||||
`<li><strong>${domain.toUpperCase()}</strong>: ${stats.pass || 0}/${stats.total} controls passing</li>`
|
||||
).join('');
|
||||
})
|
||||
.catch(() => console.log('Could not load domain summary'));
|
||||
</script>
|
||||
</body>
|
||||
</html>"""
|
||||
|
||||
with open(output_dir / "index.html", "w", encoding="utf-8") as f:
|
||||
f.write(html)
|
||||
|
||||
def _calculate_statistics(
|
||||
self,
|
||||
included_regulations: Optional[List[str]],
|
||||
included_domains: Optional[List[str]],
|
||||
) -> Dict[str, Any]:
|
||||
"""Calculate compliance statistics."""
|
||||
# Count regulations
|
||||
reg_query = self.db.query(RegulationDB).filter(RegulationDB.is_active == True)
|
||||
if included_regulations:
|
||||
reg_query = reg_query.filter(RegulationDB.code.in_(included_regulations))
|
||||
total_regulations = reg_query.count()
|
||||
|
||||
# Count controls
|
||||
ctrl_query = self.db.query(ControlDB)
|
||||
if included_domains:
|
||||
from ..db.models import ControlDomainEnum
|
||||
domain_enums = [ControlDomainEnum(d) for d in included_domains]
|
||||
ctrl_query = ctrl_query.filter(ControlDB.domain.in_(domain_enums))
|
||||
|
||||
total_controls = ctrl_query.count()
|
||||
passing_controls = ctrl_query.filter(ControlDB.status == ControlStatusEnum.PASS).count()
|
||||
partial_controls = ctrl_query.filter(ControlDB.status == ControlStatusEnum.PARTIAL).count()
|
||||
|
||||
# Count evidence
|
||||
total_evidence = self.db.query(EvidenceDB).count()
|
||||
|
||||
# Calculate compliance score
|
||||
if total_controls > 0:
|
||||
score = ((passing_controls + partial_controls * 0.5) / total_controls) * 100
|
||||
else:
|
||||
score = 0
|
||||
|
||||
return {
|
||||
"total_regulations": total_regulations,
|
||||
"total_controls": total_controls,
|
||||
"passing_controls": passing_controls,
|
||||
"partial_controls": partial_controls,
|
||||
"total_evidence": total_evidence,
|
||||
"compliance_score": round(score, 1),
|
||||
}
|
||||
|
||||
def _calculate_file_hash(self, file_path: Path) -> str:
|
||||
"""Calculate SHA-256 hash of file."""
|
||||
sha256 = hashlib.sha256()
|
||||
with open(file_path, "rb") as f:
|
||||
for chunk in iter(lambda: f.read(8192), b""):
|
||||
sha256.update(chunk)
|
||||
return sha256.hexdigest()
|
||||
|
||||
def get_export_status(self, export_id: str) -> Optional[AuditExportDB]:
|
||||
"""Get status of an export."""
|
||||
return self.db.query(AuditExportDB).get(export_id)
|
||||
|
||||
def list_exports(
|
||||
self, limit: int = 20, offset: int = 0
|
||||
) -> List[AuditExportDB]:
|
||||
"""List recent exports."""
|
||||
return (
|
||||
self.db.query(AuditExportDB)
|
||||
.order_by(AuditExportDB.requested_at.desc())
|
||||
.offset(offset)
|
||||
.limit(limit)
|
||||
.all()
|
||||
)
|
||||
622
backend/compliance/services/llm_provider.py
Normal file
622
backend/compliance/services/llm_provider.py
Normal file
@@ -0,0 +1,622 @@
|
||||
"""
|
||||
LLM Provider Abstraction for Compliance AI Features.
|
||||
|
||||
Supports:
|
||||
- Anthropic Claude API (default)
|
||||
- Self-Hosted LLMs (Ollama, vLLM, LocalAI, etc.)
|
||||
- HashiCorp Vault integration for secure API key storage
|
||||
|
||||
Configuration via environment variables:
|
||||
- COMPLIANCE_LLM_PROVIDER: "anthropic" or "self_hosted"
|
||||
- ANTHROPIC_API_KEY: API key for Claude (or loaded from Vault)
|
||||
- ANTHROPIC_MODEL: Model name (default: claude-sonnet-4-20250514)
|
||||
- SELF_HOSTED_LLM_URL: Base URL for self-hosted LLM
|
||||
- SELF_HOSTED_LLM_MODEL: Model name for self-hosted
|
||||
- SELF_HOSTED_LLM_KEY: Optional API key for self-hosted
|
||||
|
||||
Vault Configuration:
|
||||
- VAULT_ADDR: Vault server address (e.g., http://vault:8200)
|
||||
- VAULT_TOKEN: Vault authentication token
|
||||
- USE_VAULT_SECRETS: Set to "true" to enable Vault integration
|
||||
- VAULT_SECRET_PATH: Path to secrets (default: secret/breakpilot/api_keys)
|
||||
"""
|
||||
|
||||
import os
|
||||
import asyncio
|
||||
import logging
|
||||
from abc import ABC, abstractmethod
|
||||
from typing import List, Optional, Dict, Any
|
||||
from dataclasses import dataclass, field
|
||||
from enum import Enum
|
||||
|
||||
import httpx
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# Vault Integration
|
||||
# =============================================================================
|
||||
|
||||
class VaultClient:
|
||||
"""
|
||||
HashiCorp Vault client for retrieving secrets.
|
||||
|
||||
Supports KV v2 secrets engine.
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
addr: Optional[str] = None,
|
||||
token: Optional[str] = None
|
||||
):
|
||||
self.addr = addr or os.getenv("VAULT_ADDR", "http://localhost:8200")
|
||||
self.token = token or os.getenv("VAULT_TOKEN")
|
||||
self._cache: Dict[str, Any] = {}
|
||||
self._cache_ttl = 300 # 5 minutes cache
|
||||
|
||||
def _get_headers(self) -> Dict[str, str]:
|
||||
"""Get request headers with Vault token."""
|
||||
headers = {"Content-Type": "application/json"}
|
||||
if self.token:
|
||||
headers["X-Vault-Token"] = self.token
|
||||
return headers
|
||||
|
||||
def get_secret(self, path: str, key: str = "value") -> Optional[str]:
|
||||
"""
|
||||
Get a secret from Vault KV v2.
|
||||
|
||||
Args:
|
||||
path: Secret path (e.g., "breakpilot/api_keys/anthropic")
|
||||
key: Key within the secret data (default: "value")
|
||||
|
||||
Returns:
|
||||
Secret value or None if not found
|
||||
"""
|
||||
cache_key = f"{path}:{key}"
|
||||
|
||||
# Check cache first
|
||||
if cache_key in self._cache:
|
||||
return self._cache[cache_key]
|
||||
|
||||
try:
|
||||
# KV v2 uses /data/ in the path
|
||||
full_path = f"{self.addr}/v1/secret/data/{path}"
|
||||
|
||||
response = httpx.get(
|
||||
full_path,
|
||||
headers=self._get_headers(),
|
||||
timeout=10.0
|
||||
)
|
||||
|
||||
if response.status_code == 200:
|
||||
data = response.json()
|
||||
secret_data = data.get("data", {}).get("data", {})
|
||||
secret_value = secret_data.get(key)
|
||||
|
||||
if secret_value:
|
||||
self._cache[cache_key] = secret_value
|
||||
logger.info(f"Successfully loaded secret from Vault: {path}")
|
||||
return secret_value
|
||||
|
||||
elif response.status_code == 404:
|
||||
logger.warning(f"Secret not found in Vault: {path}")
|
||||
else:
|
||||
logger.error(f"Vault error {response.status_code}: {response.text}")
|
||||
|
||||
except httpx.RequestError as e:
|
||||
logger.error(f"Failed to connect to Vault at {self.addr}: {e}")
|
||||
except Exception as e:
|
||||
logger.error(f"Error retrieving secret from Vault: {e}")
|
||||
|
||||
return None
|
||||
|
||||
def get_anthropic_key(self) -> Optional[str]:
|
||||
"""Get Anthropic API key from Vault."""
|
||||
path = os.getenv("VAULT_ANTHROPIC_PATH", "breakpilot/api_keys/anthropic")
|
||||
return self.get_secret(path, "value")
|
||||
|
||||
def is_available(self) -> bool:
|
||||
"""Check if Vault is available and authenticated."""
|
||||
try:
|
||||
response = httpx.get(
|
||||
f"{self.addr}/v1/sys/health",
|
||||
headers=self._get_headers(),
|
||||
timeout=5.0
|
||||
)
|
||||
return response.status_code in (200, 429, 472, 473, 501, 503)
|
||||
except Exception:
|
||||
return False
|
||||
|
||||
|
||||
# Singleton Vault client
|
||||
_vault_client: Optional[VaultClient] = None
|
||||
|
||||
|
||||
def get_vault_client() -> VaultClient:
|
||||
"""Get shared Vault client instance."""
|
||||
global _vault_client
|
||||
if _vault_client is None:
|
||||
_vault_client = VaultClient()
|
||||
return _vault_client
|
||||
|
||||
|
||||
def get_secret_from_vault_or_env(
|
||||
vault_path: str,
|
||||
env_var: str,
|
||||
vault_key: str = "value"
|
||||
) -> Optional[str]:
|
||||
"""
|
||||
Get a secret, trying Vault first, then falling back to environment variable.
|
||||
|
||||
Args:
|
||||
vault_path: Path in Vault (e.g., "breakpilot/api_keys/anthropic")
|
||||
env_var: Environment variable name as fallback
|
||||
vault_key: Key within Vault secret data
|
||||
|
||||
Returns:
|
||||
Secret value or None
|
||||
"""
|
||||
use_vault = os.getenv("USE_VAULT_SECRETS", "").lower() in ("true", "1", "yes")
|
||||
|
||||
if use_vault:
|
||||
vault = get_vault_client()
|
||||
secret = vault.get_secret(vault_path, vault_key)
|
||||
if secret:
|
||||
return secret
|
||||
logger.info(f"Vault secret not found, falling back to env: {env_var}")
|
||||
|
||||
return os.getenv(env_var)
|
||||
|
||||
|
||||
class LLMProviderType(str, Enum):
|
||||
"""Supported LLM provider types."""
|
||||
ANTHROPIC = "anthropic"
|
||||
SELF_HOSTED = "self_hosted"
|
||||
MOCK = "mock" # For testing
|
||||
|
||||
|
||||
@dataclass
|
||||
class LLMResponse:
|
||||
"""Standard response from LLM."""
|
||||
content: str
|
||||
model: str
|
||||
provider: str
|
||||
usage: Optional[Dict[str, int]] = None
|
||||
raw_response: Optional[Dict[str, Any]] = None
|
||||
|
||||
|
||||
@dataclass
|
||||
class LLMConfig:
|
||||
"""Configuration for LLM provider."""
|
||||
provider_type: LLMProviderType
|
||||
api_key: Optional[str] = None
|
||||
model: str = "claude-sonnet-4-20250514"
|
||||
base_url: Optional[str] = None
|
||||
max_tokens: int = 4096
|
||||
temperature: float = 0.3
|
||||
timeout: float = 60.0
|
||||
|
||||
|
||||
class LLMProvider(ABC):
|
||||
"""Abstract base class for LLM providers."""
|
||||
|
||||
def __init__(self, config: LLMConfig):
|
||||
self.config = config
|
||||
|
||||
@abstractmethod
|
||||
async def complete(
|
||||
self,
|
||||
prompt: str,
|
||||
system_prompt: Optional[str] = None,
|
||||
max_tokens: Optional[int] = None,
|
||||
temperature: Optional[float] = None
|
||||
) -> LLMResponse:
|
||||
"""Generate a completion for the given prompt."""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
async def batch_complete(
|
||||
self,
|
||||
prompts: List[str],
|
||||
system_prompt: Optional[str] = None,
|
||||
max_tokens: Optional[int] = None,
|
||||
rate_limit: float = 1.0
|
||||
) -> List[LLMResponse]:
|
||||
"""Generate completions for multiple prompts with rate limiting."""
|
||||
pass
|
||||
|
||||
@property
|
||||
@abstractmethod
|
||||
def provider_name(self) -> str:
|
||||
"""Return the provider name."""
|
||||
pass
|
||||
|
||||
|
||||
class AnthropicProvider(LLMProvider):
|
||||
"""Claude API Provider using Anthropic's official API."""
|
||||
|
||||
ANTHROPIC_API_URL = "https://api.anthropic.com/v1/messages"
|
||||
|
||||
def __init__(self, config: LLMConfig):
|
||||
super().__init__(config)
|
||||
if not config.api_key:
|
||||
raise ValueError("Anthropic API key is required")
|
||||
self.api_key = config.api_key
|
||||
self.model = config.model or "claude-sonnet-4-20250514"
|
||||
|
||||
@property
|
||||
def provider_name(self) -> str:
|
||||
return "anthropic"
|
||||
|
||||
async def complete(
|
||||
self,
|
||||
prompt: str,
|
||||
system_prompt: Optional[str] = None,
|
||||
max_tokens: Optional[int] = None,
|
||||
temperature: Optional[float] = None
|
||||
) -> LLMResponse:
|
||||
"""Generate completion using Claude API."""
|
||||
|
||||
headers = {
|
||||
"x-api-key": self.api_key,
|
||||
"anthropic-version": "2023-06-01",
|
||||
"content-type": "application/json"
|
||||
}
|
||||
|
||||
messages = [{"role": "user", "content": prompt}]
|
||||
|
||||
payload = {
|
||||
"model": self.model,
|
||||
"max_tokens": max_tokens or self.config.max_tokens,
|
||||
"messages": messages
|
||||
}
|
||||
|
||||
if system_prompt:
|
||||
payload["system"] = system_prompt
|
||||
|
||||
if temperature is not None:
|
||||
payload["temperature"] = temperature
|
||||
elif self.config.temperature is not None:
|
||||
payload["temperature"] = self.config.temperature
|
||||
|
||||
async with httpx.AsyncClient(timeout=self.config.timeout) as client:
|
||||
try:
|
||||
response = await client.post(
|
||||
self.ANTHROPIC_API_URL,
|
||||
headers=headers,
|
||||
json=payload
|
||||
)
|
||||
response.raise_for_status()
|
||||
data = response.json()
|
||||
|
||||
content = ""
|
||||
if data.get("content"):
|
||||
content = data["content"][0].get("text", "")
|
||||
|
||||
return LLMResponse(
|
||||
content=content,
|
||||
model=self.model,
|
||||
provider=self.provider_name,
|
||||
usage=data.get("usage"),
|
||||
raw_response=data
|
||||
)
|
||||
|
||||
except httpx.HTTPStatusError as e:
|
||||
logger.error(f"Anthropic API error: {e.response.status_code} - {e.response.text}")
|
||||
raise
|
||||
except Exception as e:
|
||||
logger.error(f"Anthropic API request failed: {e}")
|
||||
raise
|
||||
|
||||
async def batch_complete(
|
||||
self,
|
||||
prompts: List[str],
|
||||
system_prompt: Optional[str] = None,
|
||||
max_tokens: Optional[int] = None,
|
||||
rate_limit: float = 1.0
|
||||
) -> List[LLMResponse]:
|
||||
"""Process multiple prompts with rate limiting."""
|
||||
results = []
|
||||
|
||||
for i, prompt in enumerate(prompts):
|
||||
if i > 0:
|
||||
await asyncio.sleep(rate_limit)
|
||||
|
||||
try:
|
||||
result = await self.complete(
|
||||
prompt=prompt,
|
||||
system_prompt=system_prompt,
|
||||
max_tokens=max_tokens
|
||||
)
|
||||
results.append(result)
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to process prompt {i}: {e}")
|
||||
# Append error response
|
||||
results.append(LLMResponse(
|
||||
content=f"Error: {str(e)}",
|
||||
model=self.model,
|
||||
provider=self.provider_name
|
||||
))
|
||||
|
||||
return results
|
||||
|
||||
|
||||
class SelfHostedProvider(LLMProvider):
|
||||
"""Self-Hosted LLM Provider supporting Ollama, vLLM, LocalAI, etc."""
|
||||
|
||||
def __init__(self, config: LLMConfig):
|
||||
super().__init__(config)
|
||||
if not config.base_url:
|
||||
raise ValueError("Base URL is required for self-hosted provider")
|
||||
self.base_url = config.base_url.rstrip("/")
|
||||
self.model = config.model
|
||||
self.api_key = config.api_key
|
||||
|
||||
@property
|
||||
def provider_name(self) -> str:
|
||||
return "self_hosted"
|
||||
|
||||
def _detect_api_format(self) -> str:
|
||||
"""Detect the API format based on URL patterns."""
|
||||
if "11434" in self.base_url or "ollama" in self.base_url.lower():
|
||||
return "ollama"
|
||||
elif "openai" in self.base_url.lower() or "v1" in self.base_url:
|
||||
return "openai"
|
||||
else:
|
||||
return "ollama" # Default to Ollama format
|
||||
|
||||
async def complete(
|
||||
self,
|
||||
prompt: str,
|
||||
system_prompt: Optional[str] = None,
|
||||
max_tokens: Optional[int] = None,
|
||||
temperature: Optional[float] = None
|
||||
) -> LLMResponse:
|
||||
"""Generate completion using self-hosted LLM."""
|
||||
|
||||
api_format = self._detect_api_format()
|
||||
|
||||
headers = {"content-type": "application/json"}
|
||||
if self.api_key:
|
||||
headers["Authorization"] = f"Bearer {self.api_key}"
|
||||
|
||||
if api_format == "ollama":
|
||||
# Ollama API format
|
||||
endpoint = f"{self.base_url}/api/generate"
|
||||
full_prompt = prompt
|
||||
if system_prompt:
|
||||
full_prompt = f"{system_prompt}\n\n{prompt}"
|
||||
|
||||
payload = {
|
||||
"model": self.model,
|
||||
"prompt": full_prompt,
|
||||
"stream": False,
|
||||
"options": {}
|
||||
}
|
||||
|
||||
if max_tokens:
|
||||
payload["options"]["num_predict"] = max_tokens
|
||||
if temperature is not None:
|
||||
payload["options"]["temperature"] = temperature
|
||||
|
||||
else:
|
||||
# OpenAI-compatible format (vLLM, LocalAI, etc.)
|
||||
endpoint = f"{self.base_url}/v1/chat/completions"
|
||||
|
||||
messages = []
|
||||
if system_prompt:
|
||||
messages.append({"role": "system", "content": system_prompt})
|
||||
messages.append({"role": "user", "content": prompt})
|
||||
|
||||
payload = {
|
||||
"model": self.model,
|
||||
"messages": messages,
|
||||
"max_tokens": max_tokens or self.config.max_tokens,
|
||||
"temperature": temperature if temperature is not None else self.config.temperature
|
||||
}
|
||||
|
||||
async with httpx.AsyncClient(timeout=self.config.timeout) as client:
|
||||
try:
|
||||
response = await client.post(endpoint, headers=headers, json=payload)
|
||||
response.raise_for_status()
|
||||
data = response.json()
|
||||
|
||||
# Parse response based on format
|
||||
if api_format == "ollama":
|
||||
content = data.get("response", "")
|
||||
else:
|
||||
# OpenAI format
|
||||
content = data.get("choices", [{}])[0].get("message", {}).get("content", "")
|
||||
|
||||
return LLMResponse(
|
||||
content=content,
|
||||
model=self.model,
|
||||
provider=self.provider_name,
|
||||
usage=data.get("usage"),
|
||||
raw_response=data
|
||||
)
|
||||
|
||||
except httpx.HTTPStatusError as e:
|
||||
logger.error(f"Self-hosted LLM error: {e.response.status_code} - {e.response.text}")
|
||||
raise
|
||||
except Exception as e:
|
||||
logger.error(f"Self-hosted LLM request failed: {e}")
|
||||
raise
|
||||
|
||||
async def batch_complete(
|
||||
self,
|
||||
prompts: List[str],
|
||||
system_prompt: Optional[str] = None,
|
||||
max_tokens: Optional[int] = None,
|
||||
rate_limit: float = 0.5 # Self-hosted can be faster
|
||||
) -> List[LLMResponse]:
|
||||
"""Process multiple prompts with rate limiting."""
|
||||
results = []
|
||||
|
||||
for i, prompt in enumerate(prompts):
|
||||
if i > 0:
|
||||
await asyncio.sleep(rate_limit)
|
||||
|
||||
try:
|
||||
result = await self.complete(
|
||||
prompt=prompt,
|
||||
system_prompt=system_prompt,
|
||||
max_tokens=max_tokens
|
||||
)
|
||||
results.append(result)
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to process prompt {i}: {e}")
|
||||
results.append(LLMResponse(
|
||||
content=f"Error: {str(e)}",
|
||||
model=self.model,
|
||||
provider=self.provider_name
|
||||
))
|
||||
|
||||
return results
|
||||
|
||||
|
||||
class MockProvider(LLMProvider):
|
||||
"""Mock provider for testing without actual API calls."""
|
||||
|
||||
def __init__(self, config: LLMConfig):
|
||||
super().__init__(config)
|
||||
self.responses: List[str] = []
|
||||
self.call_count = 0
|
||||
|
||||
@property
|
||||
def provider_name(self) -> str:
|
||||
return "mock"
|
||||
|
||||
def set_responses(self, responses: List[str]):
|
||||
"""Set predetermined responses for testing."""
|
||||
self.responses = responses
|
||||
self.call_count = 0
|
||||
|
||||
async def complete(
|
||||
self,
|
||||
prompt: str,
|
||||
system_prompt: Optional[str] = None,
|
||||
max_tokens: Optional[int] = None,
|
||||
temperature: Optional[float] = None
|
||||
) -> LLMResponse:
|
||||
"""Return mock response."""
|
||||
if self.responses:
|
||||
content = self.responses[self.call_count % len(self.responses)]
|
||||
else:
|
||||
content = f"Mock response for: {prompt[:50]}..."
|
||||
|
||||
self.call_count += 1
|
||||
|
||||
return LLMResponse(
|
||||
content=content,
|
||||
model="mock-model",
|
||||
provider=self.provider_name,
|
||||
usage={"input_tokens": len(prompt), "output_tokens": len(content)}
|
||||
)
|
||||
|
||||
async def batch_complete(
|
||||
self,
|
||||
prompts: List[str],
|
||||
system_prompt: Optional[str] = None,
|
||||
max_tokens: Optional[int] = None,
|
||||
rate_limit: float = 0.0
|
||||
) -> List[LLMResponse]:
|
||||
"""Return mock responses for batch."""
|
||||
return [await self.complete(p, system_prompt, max_tokens) for p in prompts]
|
||||
|
||||
|
||||
def get_llm_config() -> LLMConfig:
|
||||
"""
|
||||
Create LLM config from environment variables or Vault.
|
||||
|
||||
Priority for API key:
|
||||
1. Vault (if USE_VAULT_SECRETS=true and Vault is available)
|
||||
2. Environment variable (ANTHROPIC_API_KEY)
|
||||
"""
|
||||
provider_type_str = os.getenv("COMPLIANCE_LLM_PROVIDER", "anthropic")
|
||||
|
||||
try:
|
||||
provider_type = LLMProviderType(provider_type_str)
|
||||
except ValueError:
|
||||
logger.warning(f"Unknown LLM provider: {provider_type_str}, falling back to mock")
|
||||
provider_type = LLMProviderType.MOCK
|
||||
|
||||
# Get API key from Vault or environment
|
||||
api_key = None
|
||||
if provider_type == LLMProviderType.ANTHROPIC:
|
||||
api_key = get_secret_from_vault_or_env(
|
||||
vault_path="breakpilot/api_keys/anthropic",
|
||||
env_var="ANTHROPIC_API_KEY"
|
||||
)
|
||||
elif provider_type == LLMProviderType.SELF_HOSTED:
|
||||
api_key = get_secret_from_vault_or_env(
|
||||
vault_path="breakpilot/api_keys/self_hosted_llm",
|
||||
env_var="SELF_HOSTED_LLM_KEY"
|
||||
)
|
||||
|
||||
# Select model based on provider type
|
||||
if provider_type == LLMProviderType.ANTHROPIC:
|
||||
model = os.getenv("ANTHROPIC_MODEL", "claude-sonnet-4-20250514")
|
||||
elif provider_type == LLMProviderType.SELF_HOSTED:
|
||||
model = os.getenv("SELF_HOSTED_LLM_MODEL", "qwen2.5:14b")
|
||||
else:
|
||||
model = "mock-model"
|
||||
|
||||
return LLMConfig(
|
||||
provider_type=provider_type,
|
||||
api_key=api_key,
|
||||
model=model,
|
||||
base_url=os.getenv("SELF_HOSTED_LLM_URL"),
|
||||
max_tokens=int(os.getenv("COMPLIANCE_LLM_MAX_TOKENS", "4096")),
|
||||
temperature=float(os.getenv("COMPLIANCE_LLM_TEMPERATURE", "0.3")),
|
||||
timeout=float(os.getenv("COMPLIANCE_LLM_TIMEOUT", "60.0"))
|
||||
)
|
||||
|
||||
|
||||
def get_llm_provider(config: Optional[LLMConfig] = None) -> LLMProvider:
|
||||
"""
|
||||
Factory function to get the appropriate LLM provider based on configuration.
|
||||
|
||||
Usage:
|
||||
provider = get_llm_provider()
|
||||
response = await provider.complete("Analyze this requirement...")
|
||||
"""
|
||||
if config is None:
|
||||
config = get_llm_config()
|
||||
|
||||
if config.provider_type == LLMProviderType.ANTHROPIC:
|
||||
if not config.api_key:
|
||||
logger.warning("No Anthropic API key found, using mock provider")
|
||||
return MockProvider(config)
|
||||
return AnthropicProvider(config)
|
||||
|
||||
elif config.provider_type == LLMProviderType.SELF_HOSTED:
|
||||
if not config.base_url:
|
||||
logger.warning("No self-hosted LLM URL found, using mock provider")
|
||||
return MockProvider(config)
|
||||
return SelfHostedProvider(config)
|
||||
|
||||
elif config.provider_type == LLMProviderType.MOCK:
|
||||
return MockProvider(config)
|
||||
|
||||
else:
|
||||
raise ValueError(f"Unsupported LLM provider type: {config.provider_type}")
|
||||
|
||||
|
||||
# Singleton instance for reuse
|
||||
_provider_instance: Optional[LLMProvider] = None
|
||||
|
||||
|
||||
def get_shared_provider() -> LLMProvider:
|
||||
"""Get a shared LLM provider instance."""
|
||||
global _provider_instance
|
||||
if _provider_instance is None:
|
||||
_provider_instance = get_llm_provider()
|
||||
return _provider_instance
|
||||
|
||||
|
||||
def reset_shared_provider():
|
||||
"""Reset the shared provider instance (useful for testing)."""
|
||||
global _provider_instance
|
||||
_provider_instance = None
|
||||
602
backend/compliance/services/pdf_extractor.py
Normal file
602
backend/compliance/services/pdf_extractor.py
Normal file
@@ -0,0 +1,602 @@
|
||||
"""
|
||||
PDF Extractor for BSI-TR-03161 and EU Regulation Documents.
|
||||
|
||||
This module extracts Pruefaspekte (test aspects) from BSI Technical Guidelines
|
||||
and Articles from EU regulations in PDF format.
|
||||
"""
|
||||
|
||||
import re
|
||||
import logging
|
||||
from dataclasses import dataclass, field
|
||||
from typing import List, Optional, Dict, Any
|
||||
from pathlib import Path
|
||||
from enum import Enum
|
||||
|
||||
try:
|
||||
import fitz # PyMuPDF
|
||||
except ImportError:
|
||||
fitz = None
|
||||
logging.warning("PyMuPDF not installed. PDF extraction will not work.")
|
||||
|
||||
|
||||
class RequirementLevel(str, Enum):
|
||||
"""BSI requirement levels (German: Anforderungsstufen)."""
|
||||
MUSS = "MUSS" # MUST - mandatory
|
||||
SOLL = "SOLL" # SHOULD - recommended
|
||||
KANN = "KANN" # MAY - optional
|
||||
DARF_NICHT = "DARF NICHT" # MUST NOT - prohibited
|
||||
|
||||
|
||||
class AspectCategory(str, Enum):
|
||||
"""Categories for BSI-TR Pruefaspekte."""
|
||||
AUTHENTICATION = "authentication"
|
||||
SESSION_MANAGEMENT = "session_management"
|
||||
CRYPTOGRAPHY = "cryptography"
|
||||
INPUT_VALIDATION = "input_validation"
|
||||
SQL_INJECTION = "sql_injection"
|
||||
XSS_PREVENTION = "xss_prevention"
|
||||
CSRF_PROTECTION = "csrf_protection"
|
||||
LOGGING_AUDIT = "logging_audit"
|
||||
ERROR_HANDLING = "error_handling"
|
||||
NETWORK_SECURITY = "network_security"
|
||||
SECURE_STORAGE = "secure_storage"
|
||||
PRIVACY = "privacy"
|
||||
ACCESS_CONTROL = "access_control"
|
||||
DATA_PROTECTION = "data_protection"
|
||||
KEY_MANAGEMENT = "key_management"
|
||||
SECURE_COMMUNICATION = "secure_communication"
|
||||
UPDATE_MECHANISM = "update_mechanism"
|
||||
GENERAL = "general"
|
||||
TEST_ASPECT = "test_aspect"
|
||||
|
||||
|
||||
@dataclass
|
||||
class BSIAspect:
|
||||
"""A single extracted BSI-TR Pruefaspekt (test aspect)."""
|
||||
aspect_id: str # e.g., "O.Auth_1", "T.Sess_2"
|
||||
title: str # Short title
|
||||
full_text: str # Complete requirement text
|
||||
category: AspectCategory # Categorization
|
||||
page_number: int # PDF page where found
|
||||
section: str # Chapter/section number
|
||||
requirement_level: RequirementLevel # MUSS/SOLL/KANN
|
||||
source_document: str # e.g., "BSI-TR-03161-2"
|
||||
context_before: str = "" # Text before the aspect
|
||||
context_after: str = "" # Text after the aspect
|
||||
related_aspects: List[str] = field(default_factory=list) # Related aspect IDs
|
||||
keywords: List[str] = field(default_factory=list) # Extracted keywords
|
||||
|
||||
|
||||
@dataclass
|
||||
class EUArticle:
|
||||
"""A single extracted EU regulation article."""
|
||||
article_number: str # e.g., "Art. 32", "Artikel 5"
|
||||
title: str # Article title
|
||||
full_text: str # Complete article text
|
||||
paragraphs: List[str] # Individual paragraphs
|
||||
page_number: int # PDF page
|
||||
regulation_name: str # e.g., "DSGVO", "AI Act"
|
||||
recitals: List[str] = field(default_factory=list) # Related recitals
|
||||
keywords: List[str] = field(default_factory=list) # Extracted keywords
|
||||
|
||||
|
||||
class BSIPDFExtractor:
|
||||
"""
|
||||
Extracts Pruefaspekte from BSI-TR-03161 PDF documents.
|
||||
|
||||
The BSI-TR-03161 series contains security requirements for mobile applications:
|
||||
- Part 1: General security requirements
|
||||
- Part 2: Web application security (OAuth, Sessions, Input validation, etc.)
|
||||
- Part 3: Backend/server security
|
||||
|
||||
Each document contains hundreds of Pruefaspekte (test aspects) that need to
|
||||
be extracted, categorized, and stored for compliance tracking.
|
||||
"""
|
||||
|
||||
# Regex patterns for BSI-TR aspect identification
|
||||
PATTERNS = {
|
||||
# Primary aspect ID patterns (e.g., O.Auth_1, T.Network_2)
|
||||
'aspect_id': r'(O\.[A-Za-z]+_\d+|T\.[A-Za-z]+_\d+)',
|
||||
|
||||
# Alternative section-based pattern (e.g., "Pruefaspekt 4.2.1")
|
||||
'section_aspect': r'(?:Prüfaspekt|Pruefaspekt|Anforderung)\s+(\d+\.\d+(?:\.\d+)?)',
|
||||
|
||||
# Section number pattern
|
||||
'section': r'(\d+\.\d+(?:\.\d+)?)',
|
||||
|
||||
# Requirement level pattern
|
||||
'requirement': r'\b(MUSS|SOLL|KANN|DARF\s+NICHT|muss|soll|kann|darf\s+nicht)\b',
|
||||
|
||||
# Table header pattern for Pruefaspekte tables
|
||||
'table_header': r'(?:Prüfaspekt|Bezeichnung|ID|Anforderung)',
|
||||
}
|
||||
|
||||
# Category mapping based on aspect ID prefix
|
||||
CATEGORY_MAP = {
|
||||
'O.Auth': AspectCategory.AUTHENTICATION,
|
||||
'O.Sess': AspectCategory.SESSION_MANAGEMENT,
|
||||
'O.Cryp': AspectCategory.CRYPTOGRAPHY,
|
||||
'O.Crypto': AspectCategory.CRYPTOGRAPHY,
|
||||
'O.Input': AspectCategory.INPUT_VALIDATION,
|
||||
'O.SQL': AspectCategory.SQL_INJECTION,
|
||||
'O.XSS': AspectCategory.XSS_PREVENTION,
|
||||
'O.CSRF': AspectCategory.CSRF_PROTECTION,
|
||||
'O.Log': AspectCategory.LOGGING_AUDIT,
|
||||
'O.Audit': AspectCategory.LOGGING_AUDIT,
|
||||
'O.Err': AspectCategory.ERROR_HANDLING,
|
||||
'O.Error': AspectCategory.ERROR_HANDLING,
|
||||
'O.Net': AspectCategory.NETWORK_SECURITY,
|
||||
'O.Network': AspectCategory.NETWORK_SECURITY,
|
||||
'O.Store': AspectCategory.SECURE_STORAGE,
|
||||
'O.Storage': AspectCategory.SECURE_STORAGE,
|
||||
'O.Priv': AspectCategory.PRIVACY,
|
||||
'O.Privacy': AspectCategory.PRIVACY,
|
||||
'O.Data': AspectCategory.DATA_PROTECTION,
|
||||
'O.Access': AspectCategory.ACCESS_CONTROL,
|
||||
'O.Key': AspectCategory.KEY_MANAGEMENT,
|
||||
'O.Comm': AspectCategory.SECURE_COMMUNICATION,
|
||||
'O.TLS': AspectCategory.SECURE_COMMUNICATION,
|
||||
'O.Update': AspectCategory.UPDATE_MECHANISM,
|
||||
'T.': AspectCategory.TEST_ASPECT,
|
||||
}
|
||||
|
||||
# Keywords for category detection when aspect ID is ambiguous
|
||||
CATEGORY_KEYWORDS = {
|
||||
AspectCategory.AUTHENTICATION: [
|
||||
'authentifizierung', 'authentication', 'login', 'anmeldung',
|
||||
'passwort', 'password', 'credential', 'oauth', 'oidc', 'token',
|
||||
'bearer', 'jwt', 'session', 'multi-faktor', 'mfa', '2fa'
|
||||
],
|
||||
AspectCategory.SESSION_MANAGEMENT: [
|
||||
'session', 'sitzung', 'cookie', 'timeout', 'ablauf',
|
||||
'session-id', 'sessionid', 'logout', 'abmeldung'
|
||||
],
|
||||
AspectCategory.CRYPTOGRAPHY: [
|
||||
'verschlüsselung', 'encryption', 'kryptograph', 'cryptograph',
|
||||
'aes', 'rsa', 'hash', 'signatur', 'signature', 'zertifikat',
|
||||
'certificate', 'tls', 'ssl', 'hmac', 'pbkdf', 'argon'
|
||||
],
|
||||
AspectCategory.INPUT_VALIDATION: [
|
||||
'eingabevalidierung', 'input validation', 'validierung',
|
||||
'eingabeprüfung', 'sanitiz', 'whitelist', 'blacklist',
|
||||
'filter', 'escape', 'encoding'
|
||||
],
|
||||
AspectCategory.SQL_INJECTION: [
|
||||
'sql injection', 'sql-injection', 'prepared statement',
|
||||
'parameterisiert', 'parameterized', 'orm', 'database'
|
||||
],
|
||||
AspectCategory.XSS_PREVENTION: [
|
||||
'xss', 'cross-site scripting', 'script injection',
|
||||
'html encoding', 'output encoding', 'csp', 'content-security'
|
||||
],
|
||||
AspectCategory.CSRF_PROTECTION: [
|
||||
'csrf', 'cross-site request', 'token', 'anti-csrf',
|
||||
'state parameter', 'same-site', 'samesite'
|
||||
],
|
||||
AspectCategory.LOGGING_AUDIT: [
|
||||
'logging', 'protokollierung', 'audit', 'nachvollziehbar',
|
||||
'traceability', 'log', 'event', 'monitoring'
|
||||
],
|
||||
AspectCategory.ERROR_HANDLING: [
|
||||
'fehlerbehandlung', 'error handling', 'exception',
|
||||
'fehlermeldung', 'error message', 'stack trace'
|
||||
],
|
||||
}
|
||||
|
||||
def __init__(self, logger: Optional[logging.Logger] = None):
|
||||
"""Initialize the PDF extractor."""
|
||||
self.logger = logger or logging.getLogger(__name__)
|
||||
|
||||
if fitz is None:
|
||||
raise ImportError(
|
||||
"PyMuPDF is required for PDF extraction. "
|
||||
"Install it with: pip install PyMuPDF"
|
||||
)
|
||||
|
||||
def extract_from_file(self, pdf_path: str, source_name: Optional[str] = None) -> List[BSIAspect]:
|
||||
"""
|
||||
Extract all Pruefaspekte from a BSI-TR PDF file.
|
||||
|
||||
Args:
|
||||
pdf_path: Path to the PDF file
|
||||
source_name: Optional source document name (auto-detected if not provided)
|
||||
|
||||
Returns:
|
||||
List of extracted BSIAspect objects
|
||||
"""
|
||||
path = Path(pdf_path)
|
||||
if not path.exists():
|
||||
raise FileNotFoundError(f"PDF file not found: {pdf_path}")
|
||||
|
||||
source = source_name or path.stem
|
||||
self.logger.info(f"Extracting aspects from: {source}")
|
||||
|
||||
doc = fitz.open(pdf_path)
|
||||
aspects = []
|
||||
|
||||
for page_num in range(len(doc)):
|
||||
page = doc[page_num]
|
||||
text = page.get_text()
|
||||
|
||||
# Extract aspects from this page
|
||||
page_aspects = self._extract_aspects_from_text(
|
||||
text=text,
|
||||
page_num=page_num + 1,
|
||||
source_document=source
|
||||
)
|
||||
aspects.extend(page_aspects)
|
||||
|
||||
doc.close()
|
||||
|
||||
# Post-process: deduplicate and enrich
|
||||
aspects = self._deduplicate_aspects(aspects)
|
||||
aspects = self._enrich_aspects(aspects)
|
||||
|
||||
self.logger.info(f"Extracted {len(aspects)} unique aspects from {source}")
|
||||
return aspects
|
||||
|
||||
def extract_all_documents(self, docs_dir: str) -> Dict[str, List[BSIAspect]]:
|
||||
"""
|
||||
Extract aspects from all BSI-TR PDFs in a directory.
|
||||
|
||||
Args:
|
||||
docs_dir: Directory containing BSI-TR PDF files
|
||||
|
||||
Returns:
|
||||
Dictionary mapping document names to their extracted aspects
|
||||
"""
|
||||
docs_path = Path(docs_dir)
|
||||
results = {}
|
||||
|
||||
# Look for BSI-TR PDFs
|
||||
patterns = ["BSI-TR-03161*.pdf", "bsi-tr-03161*.pdf"]
|
||||
|
||||
for pattern in patterns:
|
||||
for pdf_file in docs_path.glob(pattern):
|
||||
try:
|
||||
aspects = self.extract_from_file(str(pdf_file))
|
||||
results[pdf_file.stem] = aspects
|
||||
except Exception as e:
|
||||
self.logger.error(f"Failed to extract from {pdf_file}: {e}")
|
||||
|
||||
return results
|
||||
|
||||
def _extract_aspects_from_text(
|
||||
self,
|
||||
text: str,
|
||||
page_num: int,
|
||||
source_document: str
|
||||
) -> List[BSIAspect]:
|
||||
"""Extract all Pruefaspekte from a page's text."""
|
||||
aspects = []
|
||||
|
||||
# Find all aspect IDs on this page
|
||||
for match in re.finditer(self.PATTERNS['aspect_id'], text, re.IGNORECASE):
|
||||
aspect_id = match.group(1).upper()
|
||||
|
||||
# Extract context around the match
|
||||
start = max(0, match.start() - 200)
|
||||
end = min(len(text), match.end() + 1000)
|
||||
context = text[start:end]
|
||||
|
||||
# Determine category from aspect ID
|
||||
category = self._determine_category(aspect_id, context)
|
||||
|
||||
# Extract requirement level
|
||||
req_level = self._extract_requirement_level(context)
|
||||
|
||||
# Extract title (text immediately after aspect ID)
|
||||
title = self._extract_title(context, aspect_id)
|
||||
|
||||
# Extract section number
|
||||
section = self._extract_section(context)
|
||||
|
||||
# Extract full requirement text
|
||||
full_text = self._extract_full_text(context, aspect_id)
|
||||
|
||||
aspects.append(BSIAspect(
|
||||
aspect_id=aspect_id,
|
||||
title=title,
|
||||
full_text=full_text,
|
||||
category=category,
|
||||
page_number=page_num,
|
||||
section=section,
|
||||
requirement_level=req_level,
|
||||
source_document=source_document,
|
||||
context_before=text[start:match.start()].strip()[-100:],
|
||||
context_after=text[match.end():end].strip()[:200],
|
||||
))
|
||||
|
||||
# Also look for section-based aspects
|
||||
for match in re.finditer(self.PATTERNS['section_aspect'], text, re.IGNORECASE):
|
||||
section_id = match.group(1)
|
||||
aspect_id = f"SEC_{section_id.replace('.', '_')}"
|
||||
|
||||
# Check if we already have this as an O.* aspect
|
||||
if any(a.section == section_id for a in aspects):
|
||||
continue
|
||||
|
||||
start = max(0, match.start() - 100)
|
||||
end = min(len(text), match.end() + 800)
|
||||
context = text[start:end]
|
||||
|
||||
category = self._determine_category_from_keywords(context)
|
||||
req_level = self._extract_requirement_level(context)
|
||||
|
||||
aspects.append(BSIAspect(
|
||||
aspect_id=aspect_id,
|
||||
title=f"Prüfaspekt {section_id}",
|
||||
full_text=context.strip(),
|
||||
category=category,
|
||||
page_number=page_num,
|
||||
section=section_id,
|
||||
requirement_level=req_level,
|
||||
source_document=source_document,
|
||||
))
|
||||
|
||||
return aspects
|
||||
|
||||
def _determine_category(self, aspect_id: str, context: str) -> AspectCategory:
|
||||
"""Determine the category of an aspect based on its ID and context."""
|
||||
# First try to match by aspect ID prefix
|
||||
for prefix, category in self.CATEGORY_MAP.items():
|
||||
if aspect_id.upper().startswith(prefix.upper()):
|
||||
return category
|
||||
|
||||
# Fall back to keyword-based detection
|
||||
return self._determine_category_from_keywords(context)
|
||||
|
||||
def _determine_category_from_keywords(self, text: str) -> AspectCategory:
|
||||
"""Determine category based on keywords in the text."""
|
||||
text_lower = text.lower()
|
||||
|
||||
category_scores = {}
|
||||
for category, keywords in self.CATEGORY_KEYWORDS.items():
|
||||
score = sum(1 for kw in keywords if kw in text_lower)
|
||||
if score > 0:
|
||||
category_scores[category] = score
|
||||
|
||||
if category_scores:
|
||||
return max(category_scores, key=category_scores.get)
|
||||
|
||||
return AspectCategory.GENERAL
|
||||
|
||||
def _extract_requirement_level(self, text: str) -> RequirementLevel:
|
||||
"""Extract the requirement level from text."""
|
||||
match = re.search(self.PATTERNS['requirement'], text, re.IGNORECASE)
|
||||
if match:
|
||||
level = match.group(1).upper()
|
||||
if 'DARF' in level and 'NICHT' in level:
|
||||
return RequirementLevel.DARF_NICHT
|
||||
elif level == 'MUSS':
|
||||
return RequirementLevel.MUSS
|
||||
elif level == 'SOLL':
|
||||
return RequirementLevel.SOLL
|
||||
elif level == 'KANN':
|
||||
return RequirementLevel.KANN
|
||||
|
||||
return RequirementLevel.SOLL # Default
|
||||
|
||||
def _extract_title(self, context: str, aspect_id: str) -> str:
|
||||
"""Extract the title/short description of an aspect."""
|
||||
# Look for text immediately after the aspect ID
|
||||
pattern = rf'{re.escape(aspect_id)}\s*[:\-–]?\s*([^\n]+)'
|
||||
match = re.search(pattern, context, re.IGNORECASE)
|
||||
|
||||
if match:
|
||||
title = match.group(1).strip()
|
||||
# Clean up the title
|
||||
title = re.sub(r'\s+', ' ', title)
|
||||
# Truncate if too long
|
||||
if len(title) > 200:
|
||||
title = title[:197] + "..."
|
||||
return title
|
||||
|
||||
return aspect_id
|
||||
|
||||
def _extract_section(self, context: str) -> str:
|
||||
"""Extract the section number from context."""
|
||||
match = re.search(self.PATTERNS['section'], context)
|
||||
return match.group(1) if match else ""
|
||||
|
||||
def _extract_full_text(self, context: str, aspect_id: str) -> str:
|
||||
"""Extract the complete requirement text."""
|
||||
# Find the aspect ID and get text until the next aspect or section
|
||||
pattern = rf'{re.escape(aspect_id)}[^\n]*\n(.*?)(?=\n\s*(?:O\.[A-Z]|T\.[A-Z]|\d+\.\d+\s|\Z))'
|
||||
match = re.search(pattern, context, re.IGNORECASE | re.DOTALL)
|
||||
|
||||
if match:
|
||||
full_text = match.group(0).strip()
|
||||
else:
|
||||
# Fall back to context
|
||||
full_text = context.strip()
|
||||
|
||||
# Clean up
|
||||
full_text = re.sub(r'\s+', ' ', full_text)
|
||||
return full_text
|
||||
|
||||
def _deduplicate_aspects(self, aspects: List[BSIAspect]) -> List[BSIAspect]:
|
||||
"""Remove duplicate aspects, keeping the one with more context."""
|
||||
seen = {}
|
||||
|
||||
for aspect in aspects:
|
||||
key = aspect.aspect_id
|
||||
if key not in seen:
|
||||
seen[key] = aspect
|
||||
else:
|
||||
# Keep the one with longer full_text
|
||||
if len(aspect.full_text) > len(seen[key].full_text):
|
||||
seen[key] = aspect
|
||||
|
||||
return list(seen.values())
|
||||
|
||||
def _enrich_aspects(self, aspects: List[BSIAspect]) -> List[BSIAspect]:
|
||||
"""Enrich aspects with additional metadata."""
|
||||
aspect_ids = {a.aspect_id for a in aspects}
|
||||
|
||||
for aspect in aspects:
|
||||
# Find related aspects mentioned in the full text
|
||||
for other_id in aspect_ids:
|
||||
if other_id != aspect.aspect_id and other_id in aspect.full_text:
|
||||
aspect.related_aspects.append(other_id)
|
||||
|
||||
# Extract keywords based on category
|
||||
aspect.keywords = self._extract_keywords(aspect)
|
||||
|
||||
return aspects
|
||||
|
||||
def _extract_keywords(self, aspect: BSIAspect) -> List[str]:
|
||||
"""Extract relevant keywords from an aspect."""
|
||||
keywords = []
|
||||
text_lower = aspect.full_text.lower()
|
||||
|
||||
# Add keywords based on category
|
||||
if aspect.category in self.CATEGORY_KEYWORDS:
|
||||
for kw in self.CATEGORY_KEYWORDS[aspect.category]:
|
||||
if kw in text_lower:
|
||||
keywords.append(kw)
|
||||
|
||||
return list(set(keywords))[:10] # Limit to 10 keywords
|
||||
|
||||
def get_statistics(self, aspects: List[BSIAspect]) -> Dict[str, Any]:
|
||||
"""Get statistics about extracted aspects."""
|
||||
stats = {
|
||||
"total_aspects": len(aspects),
|
||||
"by_category": {},
|
||||
"by_requirement_level": {},
|
||||
"by_source": {},
|
||||
"unique_sections": set(),
|
||||
}
|
||||
|
||||
for aspect in aspects:
|
||||
# By category
|
||||
cat = aspect.category.value
|
||||
stats["by_category"][cat] = stats["by_category"].get(cat, 0) + 1
|
||||
|
||||
# By requirement level
|
||||
level = aspect.requirement_level.value
|
||||
stats["by_requirement_level"][level] = stats["by_requirement_level"].get(level, 0) + 1
|
||||
|
||||
# By source
|
||||
src = aspect.source_document
|
||||
stats["by_source"][src] = stats["by_source"].get(src, 0) + 1
|
||||
|
||||
# Unique sections
|
||||
if aspect.section:
|
||||
stats["unique_sections"].add(aspect.section)
|
||||
|
||||
stats["unique_sections"] = len(stats["unique_sections"])
|
||||
return stats
|
||||
|
||||
|
||||
class EURegulationExtractor:
|
||||
"""
|
||||
Extracts Articles from EU Regulation PDF documents.
|
||||
|
||||
Handles documents like GDPR, AI Act, CRA, etc. in their official formats.
|
||||
"""
|
||||
|
||||
PATTERNS = {
|
||||
'article_de': r'Artikel\s+(\d+)',
|
||||
'article_en': r'Article\s+(\d+)',
|
||||
'paragraph': r'\((\d+)\)',
|
||||
'recital': r'Erwägungsgrund\s+(\d+)|Recital\s+(\d+)',
|
||||
}
|
||||
|
||||
def __init__(self, logger: Optional[logging.Logger] = None):
|
||||
self.logger = logger or logging.getLogger(__name__)
|
||||
|
||||
def extract_from_file(
|
||||
self,
|
||||
pdf_path: str,
|
||||
regulation_name: str,
|
||||
language: str = "de"
|
||||
) -> List[EUArticle]:
|
||||
"""Extract all articles from an EU regulation PDF."""
|
||||
if fitz is None:
|
||||
raise ImportError("PyMuPDF is required for PDF extraction.")
|
||||
|
||||
path = Path(pdf_path)
|
||||
if not path.exists():
|
||||
raise FileNotFoundError(f"PDF file not found: {pdf_path}")
|
||||
|
||||
doc = fitz.open(pdf_path)
|
||||
articles = []
|
||||
|
||||
article_pattern = (
|
||||
self.PATTERNS['article_de'] if language == "de"
|
||||
else self.PATTERNS['article_en']
|
||||
)
|
||||
|
||||
for page_num in range(len(doc)):
|
||||
page = doc[page_num]
|
||||
text = page.get_text()
|
||||
|
||||
# Find article starts
|
||||
for match in re.finditer(article_pattern, text):
|
||||
article_num = match.group(1)
|
||||
|
||||
# Extract article content
|
||||
start = match.start()
|
||||
# Find next article or end of page
|
||||
next_match = re.search(article_pattern, text[match.end():])
|
||||
end = match.end() + next_match.start() if next_match else len(text)
|
||||
|
||||
article_text = text[start:end].strip()
|
||||
|
||||
# Extract paragraphs
|
||||
paragraphs = self._extract_paragraphs(article_text)
|
||||
|
||||
# Extract title
|
||||
title = self._extract_article_title(article_text, article_num)
|
||||
|
||||
articles.append(EUArticle(
|
||||
article_number=f"Art. {article_num}",
|
||||
title=title,
|
||||
full_text=article_text,
|
||||
paragraphs=paragraphs,
|
||||
page_number=page_num + 1,
|
||||
regulation_name=regulation_name,
|
||||
))
|
||||
|
||||
doc.close()
|
||||
return self._deduplicate_articles(articles)
|
||||
|
||||
def _extract_paragraphs(self, text: str) -> List[str]:
|
||||
"""Extract numbered paragraphs from article text."""
|
||||
paragraphs = []
|
||||
matches = list(re.finditer(self.PATTERNS['paragraph'], text))
|
||||
|
||||
for i, match in enumerate(matches):
|
||||
start = match.start()
|
||||
end = matches[i + 1].start() if i + 1 < len(matches) else len(text)
|
||||
para_text = text[start:end].strip()
|
||||
if para_text:
|
||||
paragraphs.append(para_text)
|
||||
|
||||
return paragraphs
|
||||
|
||||
def _extract_article_title(self, text: str, article_num: str) -> str:
|
||||
"""Extract the title of an article."""
|
||||
# Look for title after "Artikel X"
|
||||
pattern = rf'Artikel\s+{article_num}\s*\n\s*([^\n]+)'
|
||||
match = re.search(pattern, text)
|
||||
|
||||
if match:
|
||||
return match.group(1).strip()
|
||||
|
||||
return f"Artikel {article_num}"
|
||||
|
||||
def _deduplicate_articles(self, articles: List[EUArticle]) -> List[EUArticle]:
|
||||
"""Remove duplicate articles."""
|
||||
seen = {}
|
||||
|
||||
for article in articles:
|
||||
key = article.article_number
|
||||
if key not in seen:
|
||||
seen[key] = article
|
||||
else:
|
||||
if len(article.full_text) > len(seen[key].full_text):
|
||||
seen[key] = article
|
||||
|
||||
return list(seen.values())
|
||||
876
backend/compliance/services/regulation_scraper.py
Normal file
876
backend/compliance/services/regulation_scraper.py
Normal file
@@ -0,0 +1,876 @@
|
||||
"""
|
||||
Compliance Regulation Scraper Service.
|
||||
|
||||
Extracts requirements and audit aspects from:
|
||||
- EU-Lex regulations (GDPR, AI Act, CRA, NIS2, etc.)
|
||||
- BSI Technical Guidelines (TR-03161)
|
||||
- German laws (TDDDG, etc.)
|
||||
|
||||
Similar pattern to edu-search and zeugnisse-crawler.
|
||||
"""
|
||||
|
||||
import logging
|
||||
import re
|
||||
import asyncio
|
||||
from datetime import datetime
|
||||
from typing import Dict, List, Any, Optional
|
||||
from enum import Enum
|
||||
import hashlib
|
||||
|
||||
import httpx
|
||||
from bs4 import BeautifulSoup
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from ..db.models import (
|
||||
RegulationDB,
|
||||
RequirementDB,
|
||||
RegulationTypeEnum,
|
||||
)
|
||||
from ..db.repository import (
|
||||
RegulationRepository,
|
||||
RequirementRepository,
|
||||
)
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class SourceType(str, Enum):
|
||||
EUR_LEX = "eur_lex"
|
||||
BSI_PDF = "bsi_pdf"
|
||||
GESETZE_IM_INTERNET = "gesetze_im_internet"
|
||||
MANUAL = "manual"
|
||||
|
||||
|
||||
class ScraperStatus(str, Enum):
|
||||
IDLE = "idle"
|
||||
RUNNING = "running"
|
||||
COMPLETED = "completed"
|
||||
ERROR = "error"
|
||||
|
||||
|
||||
class RegulationScraperService:
|
||||
"""
|
||||
Scrapes and extracts requirements from regulatory sources.
|
||||
|
||||
Supported sources:
|
||||
- EUR-Lex: https://eur-lex.europa.eu/eli/reg/{year}/{number}/oj/eng
|
||||
- BSI: Local PDF parsing
|
||||
- Gesetze-im-Internet: German law portal
|
||||
"""
|
||||
|
||||
# EUR-Lex patterns for article extraction
|
||||
ARTICLE_PATTERN = re.compile(
|
||||
r'Article\s+(\d+[a-z]?)\s*\n\s*(.+?)(?=\nArticle\s+\d|$)',
|
||||
re.DOTALL | re.IGNORECASE
|
||||
)
|
||||
|
||||
# BSI TR pattern for test aspects
|
||||
BSI_ASPECT_PATTERN = re.compile(
|
||||
r'(O\.[A-Za-z_]+[\d]*)\s+(.+?)(?=\nO\.|$)',
|
||||
re.DOTALL
|
||||
)
|
||||
|
||||
# Known regulation URLs - All 19 regulations from seed data
|
||||
KNOWN_SOURCES = {
|
||||
# A. Datenschutz & Datenuebermittlung
|
||||
"GDPR": {
|
||||
"url": "https://eur-lex.europa.eu/legal-content/EN/TXT/HTML/?uri=CELEX:32016R0679",
|
||||
"type": SourceType.EUR_LEX,
|
||||
"regulation_type": RegulationTypeEnum.EU_REGULATION,
|
||||
},
|
||||
"EPRIVACY": {
|
||||
"url": "https://eur-lex.europa.eu/legal-content/EN/TXT/HTML/?uri=CELEX:32002L0058",
|
||||
"type": SourceType.EUR_LEX,
|
||||
"regulation_type": RegulationTypeEnum.EU_DIRECTIVE,
|
||||
},
|
||||
"SCC": {
|
||||
"url": "https://eur-lex.europa.eu/legal-content/EN/TXT/HTML/?uri=CELEX:32021D0914",
|
||||
"type": SourceType.EUR_LEX,
|
||||
"regulation_type": RegulationTypeEnum.EU_REGULATION,
|
||||
},
|
||||
"DPF": {
|
||||
"url": "https://eur-lex.europa.eu/legal-content/EN/TXT/HTML/?uri=CELEX:32023D1795",
|
||||
"type": SourceType.EUR_LEX,
|
||||
"regulation_type": RegulationTypeEnum.EU_REGULATION,
|
||||
},
|
||||
# B. KI-Regulierung
|
||||
"AIACT": {
|
||||
"url": "https://eur-lex.europa.eu/legal-content/EN/TXT/HTML/?uri=OJ:L_202401689",
|
||||
"type": SourceType.EUR_LEX,
|
||||
"regulation_type": RegulationTypeEnum.EU_REGULATION,
|
||||
},
|
||||
# C. Cybersecurity & Produktsicherheit
|
||||
"CRA": {
|
||||
"url": "https://eur-lex.europa.eu/legal-content/EN/TXT/HTML/?uri=OJ:L_202402847",
|
||||
"type": SourceType.EUR_LEX,
|
||||
"regulation_type": RegulationTypeEnum.EU_REGULATION,
|
||||
},
|
||||
"NIS2": {
|
||||
"url": "https://eur-lex.europa.eu/legal-content/EN/TXT/HTML/?uri=CELEX:32022L2555",
|
||||
"type": SourceType.EUR_LEX,
|
||||
"regulation_type": RegulationTypeEnum.EU_DIRECTIVE,
|
||||
},
|
||||
"EUCSA": {
|
||||
"url": "https://eur-lex.europa.eu/legal-content/EN/TXT/HTML/?uri=CELEX:32019R0881",
|
||||
"type": SourceType.EUR_LEX,
|
||||
"regulation_type": RegulationTypeEnum.EU_REGULATION,
|
||||
},
|
||||
# D. Datenoekonomie & Interoperabilitaet
|
||||
"DATAACT": {
|
||||
"url": "https://eur-lex.europa.eu/legal-content/EN/TXT/HTML/?uri=CELEX:32023R2854",
|
||||
"type": SourceType.EUR_LEX,
|
||||
"regulation_type": RegulationTypeEnum.EU_REGULATION,
|
||||
},
|
||||
"DGA": {
|
||||
"url": "https://eur-lex.europa.eu/legal-content/EN/TXT/HTML/?uri=CELEX:32022R0868",
|
||||
"type": SourceType.EUR_LEX,
|
||||
"regulation_type": RegulationTypeEnum.EU_REGULATION,
|
||||
},
|
||||
# E. Plattform-Pflichten
|
||||
"DSA": {
|
||||
"url": "https://eur-lex.europa.eu/legal-content/EN/TXT/HTML/?uri=CELEX:32022R2065",
|
||||
"type": SourceType.EUR_LEX,
|
||||
"regulation_type": RegulationTypeEnum.EU_REGULATION,
|
||||
},
|
||||
# F. Barrierefreiheit
|
||||
"EAA": {
|
||||
"url": "https://eur-lex.europa.eu/legal-content/EN/TXT/HTML/?uri=CELEX:32019L0882",
|
||||
"type": SourceType.EUR_LEX,
|
||||
"regulation_type": RegulationTypeEnum.EU_DIRECTIVE,
|
||||
},
|
||||
# G. IP & Urheberrecht
|
||||
"DSM": {
|
||||
"url": "https://eur-lex.europa.eu/legal-content/EN/TXT/HTML/?uri=CELEX:32019L0790",
|
||||
"type": SourceType.EUR_LEX,
|
||||
"regulation_type": RegulationTypeEnum.EU_DIRECTIVE,
|
||||
},
|
||||
# H. Produkthaftung
|
||||
"PLD": {
|
||||
"url": "https://eur-lex.europa.eu/legal-content/EN/TXT/HTML/?uri=CELEX:32024L2853",
|
||||
"type": SourceType.EUR_LEX,
|
||||
"regulation_type": RegulationTypeEnum.EU_DIRECTIVE,
|
||||
},
|
||||
"GPSR": {
|
||||
"url": "https://eur-lex.europa.eu/legal-content/EN/TXT/HTML/?uri=CELEX:32023R0988",
|
||||
"type": SourceType.EUR_LEX,
|
||||
"regulation_type": RegulationTypeEnum.EU_REGULATION,
|
||||
},
|
||||
# I. BSI-Standards (Deutschland)
|
||||
"BSI-TR-03161-1": {
|
||||
"url": "/docs/BSI-TR-03161-1.pdf",
|
||||
"type": SourceType.BSI_PDF,
|
||||
"regulation_type": RegulationTypeEnum.BSI_STANDARD,
|
||||
},
|
||||
"BSI-TR-03161-2": {
|
||||
"url": "/docs/BSI-TR-03161-2.pdf",
|
||||
"type": SourceType.BSI_PDF,
|
||||
"regulation_type": RegulationTypeEnum.BSI_STANDARD,
|
||||
},
|
||||
"BSI-TR-03161-3": {
|
||||
"url": "/docs/BSI-TR-03161-3.pdf",
|
||||
"type": SourceType.BSI_PDF,
|
||||
"regulation_type": RegulationTypeEnum.BSI_STANDARD,
|
||||
},
|
||||
}
|
||||
|
||||
def __init__(self, db: Session):
|
||||
self.db = db
|
||||
self.reg_repo = RegulationRepository(db)
|
||||
self.req_repo = RequirementRepository(db)
|
||||
self.status = ScraperStatus.IDLE
|
||||
self.current_source: Optional[str] = None
|
||||
self.last_error: Optional[str] = None
|
||||
self.stats = {
|
||||
"sources_processed": 0,
|
||||
"requirements_extracted": 0,
|
||||
"errors": 0,
|
||||
"last_run": None,
|
||||
}
|
||||
|
||||
async def get_status(self) -> Dict[str, Any]:
|
||||
"""Get current scraper status."""
|
||||
return {
|
||||
"status": self.status.value,
|
||||
"current_source": self.current_source,
|
||||
"last_error": self.last_error,
|
||||
"stats": self.stats,
|
||||
"known_sources": list(self.KNOWN_SOURCES.keys()),
|
||||
}
|
||||
|
||||
async def scrape_all(self) -> Dict[str, Any]:
|
||||
"""Scrape all known regulation sources."""
|
||||
self.status = ScraperStatus.RUNNING
|
||||
self.stats["last_run"] = datetime.utcnow().isoformat()
|
||||
|
||||
results = {
|
||||
"success": [],
|
||||
"failed": [],
|
||||
"skipped": [],
|
||||
}
|
||||
|
||||
for code, source_info in self.KNOWN_SOURCES.items():
|
||||
try:
|
||||
self.current_source = code
|
||||
|
||||
# Check if already scraped recently
|
||||
existing = self.reg_repo.get_by_code(code)
|
||||
if existing and existing.requirements:
|
||||
results["skipped"].append({
|
||||
"code": code,
|
||||
"reason": "already_has_requirements",
|
||||
"requirement_count": len(existing.requirements),
|
||||
})
|
||||
continue
|
||||
|
||||
# Scrape based on source type
|
||||
if source_info["type"] == SourceType.EUR_LEX:
|
||||
count = await self._scrape_eurlex(code, source_info)
|
||||
elif source_info["type"] == SourceType.BSI_PDF:
|
||||
count = await self._scrape_bsi_pdf(code, source_info)
|
||||
else:
|
||||
results["skipped"].append({
|
||||
"code": code,
|
||||
"reason": "unknown_source_type",
|
||||
})
|
||||
continue
|
||||
|
||||
results["success"].append({
|
||||
"code": code,
|
||||
"requirements_extracted": count,
|
||||
})
|
||||
self.stats["sources_processed"] += 1
|
||||
self.stats["requirements_extracted"] += count
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error scraping {code}: {e}")
|
||||
results["failed"].append({
|
||||
"code": code,
|
||||
"error": str(e),
|
||||
})
|
||||
self.stats["errors"] += 1
|
||||
self.last_error = str(e)
|
||||
|
||||
self.status = ScraperStatus.COMPLETED
|
||||
self.current_source = None
|
||||
return results
|
||||
|
||||
async def scrape_single(self, code: str, force: bool = False) -> Dict[str, Any]:
|
||||
"""Scrape a single regulation source."""
|
||||
if code not in self.KNOWN_SOURCES:
|
||||
raise ValueError(f"Unknown regulation code: {code}")
|
||||
|
||||
source_info = self.KNOWN_SOURCES[code]
|
||||
self.status = ScraperStatus.RUNNING
|
||||
self.current_source = code
|
||||
|
||||
try:
|
||||
# Check existing
|
||||
existing = self.reg_repo.get_by_code(code)
|
||||
if existing and existing.requirements and not force:
|
||||
self.status = ScraperStatus.COMPLETED
|
||||
return {
|
||||
"code": code,
|
||||
"status": "skipped",
|
||||
"reason": "already_has_requirements",
|
||||
"requirement_count": len(existing.requirements),
|
||||
}
|
||||
|
||||
# Delete existing requirements if force
|
||||
if existing and force:
|
||||
for req in existing.requirements:
|
||||
self.db.delete(req)
|
||||
self.db.commit()
|
||||
|
||||
# Scrape
|
||||
if source_info["type"] == SourceType.EUR_LEX:
|
||||
count = await self._scrape_eurlex(code, source_info)
|
||||
elif source_info["type"] == SourceType.BSI_PDF:
|
||||
count = await self._scrape_bsi_pdf(code, source_info)
|
||||
else:
|
||||
raise ValueError(f"Unknown source type: {source_info['type']}")
|
||||
|
||||
self.status = ScraperStatus.COMPLETED
|
||||
return {
|
||||
"code": code,
|
||||
"status": "success",
|
||||
"requirements_extracted": count,
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
self.status = ScraperStatus.ERROR
|
||||
self.last_error = str(e)
|
||||
raise
|
||||
finally:
|
||||
self.current_source = None
|
||||
|
||||
async def _scrape_eurlex(self, code: str, source_info: Dict) -> int:
|
||||
"""Scrape EUR-Lex regulation page."""
|
||||
url = source_info["url"]
|
||||
logger.info(f"Scraping EUR-Lex: {code} from {url}")
|
||||
|
||||
async with httpx.AsyncClient(timeout=60.0) as client:
|
||||
response = await client.get(url, follow_redirects=True)
|
||||
response.raise_for_status()
|
||||
|
||||
html = response.text
|
||||
soup = BeautifulSoup(html, 'html.parser')
|
||||
|
||||
# Get or create regulation
|
||||
regulation = self.reg_repo.get_by_code(code)
|
||||
if not regulation:
|
||||
regulation = RegulationDB(
|
||||
code=code,
|
||||
name=code,
|
||||
regulation_type=source_info["regulation_type"],
|
||||
source_url=url,
|
||||
is_active=True,
|
||||
)
|
||||
self.db.add(regulation)
|
||||
self.db.commit()
|
||||
self.db.refresh(regulation)
|
||||
|
||||
# Extract articles
|
||||
requirements_created = 0
|
||||
|
||||
# Find all article elements (EUR-Lex structure varies)
|
||||
articles = soup.find_all('div', class_='eli-subdivision')
|
||||
if not articles:
|
||||
articles = soup.find_all('p', class_='oj-ti-art')
|
||||
|
||||
for article_elem in articles:
|
||||
try:
|
||||
# Extract article number and title
|
||||
article_id = article_elem.get('id', '')
|
||||
if not article_id:
|
||||
title_elem = article_elem.find(['span', 'p'], class_=['oj-ti-art', 'eli-title'])
|
||||
if title_elem:
|
||||
text = title_elem.get_text(strip=True)
|
||||
match = re.search(r'Article\s+(\d+[a-z]?)', text, re.IGNORECASE)
|
||||
if match:
|
||||
article_id = f"art_{match.group(1)}"
|
||||
|
||||
if not article_id:
|
||||
continue
|
||||
|
||||
# Extract article text
|
||||
article_text = article_elem.get_text(separator='\n', strip=True)
|
||||
|
||||
# Parse article number and title
|
||||
lines = article_text.split('\n')
|
||||
article_num = None
|
||||
title = None
|
||||
|
||||
for line in lines[:3]:
|
||||
art_match = re.search(r'Article\s+(\d+[a-z]?)', line, re.IGNORECASE)
|
||||
if art_match:
|
||||
article_num = f"Art. {art_match.group(1)}"
|
||||
elif not article_num:
|
||||
continue
|
||||
elif not title and len(line) > 3 and not line.startswith('Article'):
|
||||
title = line[:200]
|
||||
break
|
||||
|
||||
if not article_num:
|
||||
continue
|
||||
|
||||
# Check if requirement already exists
|
||||
existing = self.db.query(RequirementDB).filter(
|
||||
RequirementDB.regulation_id == regulation.id,
|
||||
RequirementDB.article == article_num
|
||||
).first()
|
||||
|
||||
if existing:
|
||||
continue
|
||||
|
||||
# Create requirement
|
||||
requirement = RequirementDB(
|
||||
regulation_id=regulation.id,
|
||||
article=article_num,
|
||||
title=title or f"{code} {article_num}",
|
||||
requirement_text=article_text[:5000], # Limit length
|
||||
source_section=article_id,
|
||||
is_applicable=True,
|
||||
priority=2,
|
||||
)
|
||||
self.db.add(requirement)
|
||||
requirements_created += 1
|
||||
|
||||
except Exception as e:
|
||||
logger.warning(f"Error parsing article in {code}: {e}")
|
||||
continue
|
||||
|
||||
# Alternative: extract from raw text with regex
|
||||
if requirements_created == 0:
|
||||
text = soup.get_text()
|
||||
matches = self.ARTICLE_PATTERN.findall(text)
|
||||
|
||||
for art_num, art_text in matches[:50]: # Limit to 50 articles
|
||||
article_num = f"Art. {art_num}"
|
||||
|
||||
existing = self.db.query(RequirementDB).filter(
|
||||
RequirementDB.regulation_id == regulation.id,
|
||||
RequirementDB.article == article_num
|
||||
).first()
|
||||
|
||||
if existing:
|
||||
continue
|
||||
|
||||
# Extract first line as title
|
||||
lines = art_text.strip().split('\n')
|
||||
title = lines[0][:200] if lines else f"{code} {article_num}"
|
||||
|
||||
requirement = RequirementDB(
|
||||
regulation_id=regulation.id,
|
||||
article=article_num,
|
||||
title=title,
|
||||
requirement_text=art_text[:5000],
|
||||
is_applicable=True,
|
||||
priority=2,
|
||||
)
|
||||
self.db.add(requirement)
|
||||
requirements_created += 1
|
||||
|
||||
# Fallback: If scraping failed (e.g., WAF protection), use seed requirements
|
||||
if requirements_created == 0:
|
||||
logger.info(f"Scraping returned 0 results for {code}, using seed requirements")
|
||||
seed_reqs = self._get_eurlex_seed_requirements(code)
|
||||
for seed in seed_reqs:
|
||||
existing = self.db.query(RequirementDB).filter(
|
||||
RequirementDB.regulation_id == regulation.id,
|
||||
RequirementDB.article == seed["article"]
|
||||
).first()
|
||||
|
||||
if existing:
|
||||
continue
|
||||
|
||||
requirement = RequirementDB(
|
||||
regulation_id=regulation.id,
|
||||
article=seed["article"],
|
||||
title=seed["title"],
|
||||
description=seed.get("description"),
|
||||
requirement_text=seed.get("requirement_text"),
|
||||
is_applicable=True,
|
||||
priority=seed.get("priority", 2),
|
||||
)
|
||||
self.db.add(requirement)
|
||||
requirements_created += 1
|
||||
|
||||
self.db.commit()
|
||||
logger.info(f"Extracted {requirements_created} requirements from {code}")
|
||||
return requirements_created
|
||||
|
||||
def _get_eurlex_seed_requirements(self, code: str) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
Returns seed requirements for EUR-Lex regulations when scraping fails.
|
||||
|
||||
These are the key articles relevant for Breakpilot compliance.
|
||||
"""
|
||||
if code == "NIS2":
|
||||
return [
|
||||
{"article": "Art. 6", "title": "Risikobewertung", "description": "Risikobewertung fuer Cybersicherheit", "requirement_text": "Einrichtungen muessen eine Risikobewertung fuer Cybersicherheit durchfuehren.", "priority": 1},
|
||||
{"article": "Art. 7", "title": "Nationale Cybersicherheitsstrategie", "description": "Umsetzung nationaler Vorgaben", "requirement_text": "Einhaltung der nationalen Cybersicherheitsstrategie.", "priority": 2},
|
||||
{"article": "Art. 20", "title": "Governance", "description": "Leitungsorgane muessen Cybersicherheit beaufsichtigen", "requirement_text": "Leitungsorgane muessen Cybersicherheitsmassnahmen genehmigen und deren Umsetzung beaufsichtigen.", "priority": 1},
|
||||
{"article": "Art. 21", "title": "Risikomanagementmassnahmen", "description": "Technische und organisatorische Massnahmen", "requirement_text": "Geeignete und verhaeltnismaessige technische, operative und organisatorische Massnahmen zur Beherrschung von Cybersicherheitsrisiken.", "priority": 1},
|
||||
{"article": "Art. 21(2)(a)", "title": "Risikoanalyse und Sicherheitskonzepte", "description": "Konzepte fuer Risikoanalyse", "requirement_text": "Konzepte fuer die Risikoanalyse und Sicherheit von Informationssystemen.", "priority": 1},
|
||||
{"article": "Art. 21(2)(b)", "title": "Bewertung von Sicherheitsvorfaellen", "description": "Incident Handling", "requirement_text": "Bewertung der Wirksamkeit von Risikomanagementmassnahmen.", "priority": 1},
|
||||
{"article": "Art. 21(2)(c)", "title": "Business Continuity", "description": "Betriebskontinuitaet sicherstellen", "requirement_text": "Aufrechterhaltung des Betriebs, Backup-Management und Krisenmanagement.", "priority": 1},
|
||||
{"article": "Art. 21(2)(d)", "title": "Lieferkettensicherheit", "description": "Sicherheit in der Lieferkette", "requirement_text": "Sicherheit der Lieferkette einschliesslich Beziehungen zu Lieferanten.", "priority": 1},
|
||||
{"article": "Art. 21(2)(e)", "title": "Sicherheit bei Entwicklung", "description": "Sichere Entwicklung", "requirement_text": "Sicherheit bei Erwerb, Entwicklung und Wartung von Systemen.", "priority": 1},
|
||||
{"article": "Art. 21(2)(f)", "title": "Schwachstellenmanagement", "description": "Umgang mit Schwachstellen", "requirement_text": "Konzepte zur Bewertung der Wirksamkeit von Massnahmen.", "priority": 1},
|
||||
{"article": "Art. 21(2)(g)", "title": "Cyberhygiene und Schulungen", "description": "Grundlegende Cyberhygiene-Praktiken", "requirement_text": "Grundlegende Cyberhygiene-Praktiken und Schulungen.", "priority": 1},
|
||||
{"article": "Art. 21(2)(h)", "title": "Kryptografie", "description": "Einsatz von Verschluesselung", "requirement_text": "Konzepte und Verfahren fuer Kryptografie und Verschluesselung.", "priority": 1},
|
||||
{"article": "Art. 21(2)(i)", "title": "Personalsicherheit", "description": "HR-Security", "requirement_text": "Sicherheit des Personals, Zugangskontrollen und Asset-Management.", "priority": 1},
|
||||
{"article": "Art. 21(2)(j)", "title": "MFA und sichere Authentifizierung", "description": "Multi-Faktor-Authentifizierung", "requirement_text": "Multi-Faktor-Authentifizierung und sichere Kommunikation.", "priority": 1},
|
||||
{"article": "Art. 23", "title": "Meldepflichten", "description": "Meldung von Sicherheitsvorfaellen", "requirement_text": "Erhebliche Sicherheitsvorfaelle muessen den zustaendigen Behoerden gemeldet werden.", "priority": 1},
|
||||
{"article": "Art. 24", "title": "Europaeische Schwachstellendatenbank", "description": "CVE-Datenbank nutzen", "requirement_text": "Nutzung der europaeischen Schwachstellendatenbank.", "priority": 2},
|
||||
]
|
||||
|
||||
elif code == "DATAACT":
|
||||
return [
|
||||
{"article": "Art. 3", "title": "Datenzugang fuer Nutzer", "description": "Nutzer koennen auf ihre Daten zugreifen", "requirement_text": "Daten, die durch Nutzung vernetzter Produkte generiert werden, muessen dem Nutzer zugaenglich gemacht werden.", "priority": 1},
|
||||
{"article": "Art. 4", "title": "Recht auf Datenzugang", "description": "Unentgeltlicher Zugang", "requirement_text": "Nutzer haben das Recht auf unentgeltlichen Zugang zu ihren Daten.", "priority": 1},
|
||||
{"article": "Art. 5", "title": "Recht auf Datenweitergabe", "description": "Daten an Dritte weitergeben", "requirement_text": "Nutzer koennen verlangen, dass Daten an Dritte weitergegeben werden.", "priority": 1},
|
||||
{"article": "Art. 6", "title": "Pflichten des Dateninhabers", "description": "Daten zeitnah bereitstellen", "requirement_text": "Dateninhaber muessen Daten unverzueglich und in geeignetem Format bereitstellen.", "priority": 1},
|
||||
{"article": "Art. 8", "title": "Faire Vertragsbedingungen", "description": "Keine unfairen Klauseln", "requirement_text": "Vertragsbedingungen fuer Datenzugang muessen fair und nicht-diskriminierend sein.", "priority": 2},
|
||||
{"article": "Art. 14", "title": "Cloud-Switching", "description": "Wechsel zwischen Cloud-Anbietern", "requirement_text": "Unterstuetzung beim Wechsel zwischen Cloud-Diensten und Datenportabilitaet.", "priority": 1},
|
||||
{"article": "Art. 23", "title": "Technische Schutzmassnahmen", "description": "Schutz nicht-personenbezogener Daten", "requirement_text": "Angemessene technische Schutzmassnahmen fuer nicht-personenbezogene Daten.", "priority": 1},
|
||||
{"article": "Art. 25", "title": "Geschaeftsgeheimnisse", "description": "Schutz von Geschaeftsgeheimnissen", "requirement_text": "Massnahmen zum Schutz von Geschaeftsgeheimnissen bei Datenzugang.", "priority": 2},
|
||||
]
|
||||
|
||||
elif code == "DGA":
|
||||
return [
|
||||
{"article": "Art. 5", "title": "Bedingungen fuer Weiterverwendung", "description": "Weiterverwendung oeffentlicher Daten", "requirement_text": "Bedingungen fuer die Weiterverwendung geschuetzter Daten oeffentlicher Stellen.", "priority": 2},
|
||||
{"article": "Art. 7", "title": "Technische Anforderungen", "description": "Sichere Verarbeitungsumgebungen", "requirement_text": "Sichere Verarbeitungsumgebungen fuer Zugang zu geschuetzten Daten.", "priority": 1},
|
||||
{"article": "Art. 10", "title": "Datenvermittlungsdienste", "description": "Registrierung von Vermittlungsdiensten", "requirement_text": "Datenvermittlungsdienste muessen registriert und reguliert werden.", "priority": 2},
|
||||
{"article": "Art. 12", "title": "Bedingungen fuer Datenvermittlung", "description": "Neutralitaet wahren", "requirement_text": "Datenvermittler muessen neutral handeln und duerfen Daten nicht fuer eigene Zwecke nutzen.", "priority": 1},
|
||||
{"article": "Art. 16", "title": "Datenaltruismus", "description": "Freiwillige Datenspende", "requirement_text": "Registrierung als Organisation fuer Datenaltruismus moeglich.", "priority": 3},
|
||||
{"article": "Art. 21", "title": "Einwilligungsformular", "description": "Europaeisches Einwilligungsformular", "requirement_text": "Verwendung des europaeischen Einwilligungsformulars fuer Datenaltruismus.", "priority": 3},
|
||||
]
|
||||
|
||||
elif code == "DSA":
|
||||
return [
|
||||
{"article": "Art. 6", "title": "Haftungsausschluss Hosting", "description": "Bedingungen fuer Haftungsausschluss", "requirement_text": "Hosting-Dienste haften nicht, wenn sie keine Kenntnis von rechtswidrigen Inhalten haben.", "priority": 1},
|
||||
{"article": "Art. 11", "title": "Kontaktstelle", "description": "Behoerdenkontakt", "requirement_text": "Anbieter muessen eine Kontaktstelle fuer Behoerden benennen.", "priority": 2},
|
||||
{"article": "Art. 12", "title": "Rechtsvertreter", "description": "Vertreter in der EU", "requirement_text": "Nicht-EU-Anbieter muessen einen Rechtsvertreter in der EU benennen.", "priority": 2},
|
||||
{"article": "Art. 13", "title": "AGB-Transparenz", "description": "Transparente Nutzungsbedingungen", "requirement_text": "AGB muessen klar, verstaendlich und leicht zugaenglich sein.", "priority": 1},
|
||||
{"article": "Art. 14", "title": "Transparenzberichte", "description": "Jaehrliche Berichte", "requirement_text": "Jaehrliche Transparenzberichte ueber Content-Moderation veroeffentlichen.", "priority": 2},
|
||||
{"article": "Art. 16", "title": "Melde- und Abhilfeverfahren", "description": "Notice and Action", "requirement_text": "Leicht zugaengliches System fuer Meldung rechtswidriger Inhalte.", "priority": 1},
|
||||
{"article": "Art. 17", "title": "Begruendungspflicht", "description": "Entscheidungen begruenden", "requirement_text": "Nutzer muessen ueber Content-Moderation-Entscheidungen informiert werden.", "priority": 1},
|
||||
{"article": "Art. 20", "title": "Internes Beschwerdemanagement", "description": "Beschwerden bearbeiten", "requirement_text": "Internes System zur Bearbeitung von Beschwerden ueber Content-Moderation.", "priority": 1},
|
||||
{"article": "Art. 26", "title": "Werbetransparenz", "description": "Werbung kennzeichnen", "requirement_text": "Online-Werbung muss klar als solche erkennbar sein.", "priority": 1},
|
||||
{"article": "Art. 27", "title": "Empfehlungssysteme", "description": "Algorithmen erklaeren", "requirement_text": "Transparenz ueber Parameter von Empfehlungsalgorithmen.", "priority": 2},
|
||||
]
|
||||
|
||||
elif code == "EUCSA":
|
||||
return [
|
||||
{"article": "Art. 46", "title": "Cybersicherheitszertifizierung", "description": "EU-Zertifizierungsrahmen", "requirement_text": "Freiwillige europaeische Zertifizierung fuer IKT-Produkte und -Dienste.", "priority": 2},
|
||||
{"article": "Art. 51", "title": "Sicherheitsziele", "description": "Ziele der Zertifizierung", "requirement_text": "Schutz von Daten vor unbefugtem Zugriff, Manipulation und Zerstoerung.", "priority": 1},
|
||||
{"article": "Art. 52", "title": "Vertrauenswuerdigkeitsstufen", "description": "Basic, Substantial, High", "requirement_text": "Drei Stufen: Basic, Substantial, High - je nach Risiko.", "priority": 1},
|
||||
{"article": "Art. 54", "title": "Konformitaetsbewertung", "description": "Selbstbewertung oder Drittbewertung", "requirement_text": "Je nach Stufe Selbstbewertung oder unabhaengige Bewertung.", "priority": 2},
|
||||
{"article": "Art. 56", "title": "Zertifizierungsstellen", "description": "Akkreditierte Stellen", "requirement_text": "Zertifizierung durch akkreditierte Konformitaetsbewertungsstellen.", "priority": 2},
|
||||
]
|
||||
|
||||
elif code == "EAA":
|
||||
return [
|
||||
{"article": "Art. 3", "title": "Barrierefreiheitsanforderungen", "description": "Produkte barrierefrei gestalten", "requirement_text": "Produkte und Dienstleistungen muessen die Barrierefreiheitsanforderungen erfuellen.", "priority": 1},
|
||||
{"article": "Art. 4", "title": "Bestehende Rechtsvorschriften", "description": "Verhaeltnis zu anderen Vorschriften", "requirement_text": "Ergaenzung zu bestehenden Barrierefreiheitsvorschriften.", "priority": 3},
|
||||
{"article": "Art. 13", "title": "Konformitaetsvermutung", "description": "Harmonisierte Normen", "requirement_text": "Konformitaet bei Einhaltung harmonisierter Normen vermutet.", "priority": 2},
|
||||
{"article": "Art. 14", "title": "Gemeinsame technische Spezifikationen", "description": "Falls keine Normen existieren", "requirement_text": "EU-Kommission kann technische Spezifikationen festlegen.", "priority": 3},
|
||||
{"article": "Anhang I", "title": "Barrierefreiheitsanforderungen fuer Produkte", "description": "WCAG-konforme Webseiten", "requirement_text": "Webseiten, Apps und E-Books muessen WCAG 2.1 Level AA erfuellen.", "priority": 1},
|
||||
]
|
||||
|
||||
elif code == "DSM":
|
||||
return [
|
||||
{"article": "Art. 3", "title": "Text and Data Mining (Forschung)", "description": "TDM fuer Forschung erlaubt", "requirement_text": "Text- und Data-Mining fuer wissenschaftliche Forschung ist erlaubt.", "priority": 2},
|
||||
{"article": "Art. 4", "title": "Text and Data Mining (Allgemein)", "description": "TDM-Ausnahme", "requirement_text": "TDM erlaubt, wenn Rechteinhaber nicht widersprochen haben.", "priority": 1},
|
||||
{"article": "Art. 15", "title": "Leistungsschutzrecht Presse", "description": "Verguetung fuer Presseverleger", "requirement_text": "Online-Nutzung von Presseveroeffentlichungen erfordert Lizenz.", "priority": 2},
|
||||
{"article": "Art. 17", "title": "Upload-Filter", "description": "Plattformhaftung fuer Uploads", "requirement_text": "Plattformen haften fuer urheberrechtsverletzende Uploads ihrer Nutzer.", "priority": 1},
|
||||
{"article": "Art. 17(7)", "title": "Overblocking verhindern", "description": "Legitime Nutzung schuetzen", "requirement_text": "Massnahmen duerfen nicht zu ungerechtfertigter Sperrung fuehren.", "priority": 1},
|
||||
]
|
||||
|
||||
elif code == "PLD":
|
||||
return [
|
||||
{"article": "Art. 4", "title": "Produktbegriff", "description": "Software als Produkt", "requirement_text": "Software gilt als Produkt im Sinne der Produkthaftung.", "priority": 1},
|
||||
{"article": "Art. 6", "title": "Fehlerhaftes Produkt", "description": "Definition Produktfehler", "requirement_text": "Ein Produkt ist fehlerhaft, wenn es nicht die erwartete Sicherheit bietet.", "priority": 1},
|
||||
{"article": "Art. 7", "title": "KI-Systeme", "description": "Haftung fuer KI", "requirement_text": "Haftung gilt auch fuer durch KI verursachte Schaeden.", "priority": 1},
|
||||
{"article": "Art. 9", "title": "Haftung des Herstellers", "description": "Verschuldensunabhaengige Haftung", "requirement_text": "Hersteller haften verschuldensunabhaengig fuer Produktfehler.", "priority": 1},
|
||||
{"article": "Art. 10", "title": "Softwareaktualisierungen", "description": "Pflicht zu Updates", "requirement_text": "Fehlende Sicherheitsupdates koennen Haftung begruenden.", "priority": 1},
|
||||
]
|
||||
|
||||
elif code == "GPSR":
|
||||
return [
|
||||
{"article": "Art. 5", "title": "Allgemeine Sicherheitsanforderung", "description": "Produkte muessen sicher sein", "requirement_text": "Nur sichere Produkte duerfen in Verkehr gebracht werden.", "priority": 1},
|
||||
{"article": "Art. 8", "title": "Pflichten der Hersteller", "description": "Sicherheitsbewertung durchfuehren", "requirement_text": "Hersteller muessen Risikoanalyse und Sicherheitsbewertung durchfuehren.", "priority": 1},
|
||||
{"article": "Art. 9", "title": "Technische Dokumentation", "description": "Dokumentationspflicht", "requirement_text": "Technische Dokumentation zur Konformitaet erstellen und aufbewahren.", "priority": 1},
|
||||
{"article": "Art. 10", "title": "EU-Konformitaetserklaerung", "description": "CE-Kennzeichnung", "requirement_text": "Konformitaetserklaerung und CE-Kennzeichnung erforderlich.", "priority": 1},
|
||||
{"article": "Art. 14", "title": "Produktrueckrufe", "description": "Rueckrufverfahren", "requirement_text": "Bei Sicherheitsrisiken muessen Produkte zurueckgerufen werden.", "priority": 1},
|
||||
]
|
||||
|
||||
elif code == "CRA":
|
||||
return [
|
||||
{"article": "Art. 5", "title": "Wesentliche Anforderungen", "description": "Cybersicherheit bei Entwurf", "requirement_text": "Produkte muessen so entworfen werden, dass sie ein angemessenes Cybersicherheitsniveau gewaehrleisten.", "priority": 1},
|
||||
{"article": "Art. 6", "title": "Sicherheitsupdates", "description": "Updates bereitstellen", "requirement_text": "Hersteller muessen Sicherheitsupdates fuer die erwartete Produktlebensdauer bereitstellen.", "priority": 1},
|
||||
{"article": "Art. 10", "title": "Schwachstellenbehandlung", "description": "Vulnerability Handling", "requirement_text": "Hersteller muessen ein koordiniertes Schwachstellenmanagement implementieren.", "priority": 1},
|
||||
{"article": "Art. 11", "title": "Meldepflicht", "description": "Schwachstellen melden", "requirement_text": "Aktiv ausgenutzte Schwachstellen muessen innerhalb von 24 Stunden gemeldet werden.", "priority": 1},
|
||||
{"article": "Art. 13", "title": "SBOM", "description": "Software Bill of Materials", "requirement_text": "Eine SBOM muss fuer das Produkt erstellt und gepflegt werden.", "priority": 1},
|
||||
{"article": "Art. 15", "title": "Support-Zeitraum", "description": "Mindest-Support-Dauer", "requirement_text": "Mindestens 5 Jahre Support oder erwartete Produktlebensdauer.", "priority": 1},
|
||||
{"article": "Anhang I.1", "title": "Sichere Standardkonfiguration", "description": "Secure by Default", "requirement_text": "Produkte muessen mit sicheren Standardeinstellungen ausgeliefert werden.", "priority": 1},
|
||||
{"article": "Anhang I.2", "title": "Schutz vor unbefugtem Zugriff", "description": "Access Control", "requirement_text": "Mechanismen zum Schutz vor unbefugtem Zugriff implementieren.", "priority": 1},
|
||||
{"article": "Anhang I.3", "title": "Datenintegritaet", "description": "Integritaetsschutz", "requirement_text": "Schutz der Integritaet von Daten und Konfiguration.", "priority": 1},
|
||||
{"article": "Anhang I.4", "title": "Verfuegbarkeit", "description": "Resilienz", "requirement_text": "Schutz vor DoS-Angriffen und Sicherstellung der Verfuegbarkeit.", "priority": 1},
|
||||
]
|
||||
|
||||
elif code == "EPRIVACY":
|
||||
return [
|
||||
{"article": "Art. 5", "title": "Vertraulichkeit der Kommunikation", "description": "Kommunikation schuetzen", "requirement_text": "Vertraulichkeit der Kommunikation und Verkehrsdaten gewaehrleisten.", "priority": 1},
|
||||
{"article": "Art. 6", "title": "Verkehrsdaten", "description": "Umgang mit Verkehrsdaten", "requirement_text": "Verkehrsdaten muessen nach Abschluss geloescht oder anonymisiert werden.", "priority": 1},
|
||||
{"article": "Art. 9", "title": "Standortdaten", "description": "Nur mit Einwilligung", "requirement_text": "Standortdaten nur mit ausdruecklicher Einwilligung verarbeiten.", "priority": 1},
|
||||
{"article": "Art. 13", "title": "Unerbetene Nachrichten", "description": "Opt-in fuer Marketing", "requirement_text": "Direktwerbung per E-Mail nur mit vorheriger Einwilligung.", "priority": 1},
|
||||
]
|
||||
|
||||
elif code == "SCC":
|
||||
return [
|
||||
{"article": "Klausel 8", "title": "Datenschutzgarantien", "description": "Garantien dokumentieren", "requirement_text": "Datenimporteur muss angemessene Datenschutzgarantien gewaehrleisten.", "priority": 1},
|
||||
{"article": "Klausel 10", "title": "Betroffenenrechte", "description": "Rechte durchsetzen", "requirement_text": "Betroffene koennen ihre Rechte auch gegenueber Datenimporteur geltend machen.", "priority": 1},
|
||||
{"article": "Klausel 14", "title": "Lokale Rechtsvorschriften", "description": "Rechtslage pruefen", "requirement_text": "Parteien muessen pruefen, ob lokale Gesetze die Einhaltung verhindern.", "priority": 1},
|
||||
{"article": "Klausel 15", "title": "Behoerdenzugriff", "description": "Transparenz bei Anfragen", "requirement_text": "Datenimporteur muss ueber Behoerdenanfragen informieren.", "priority": 1},
|
||||
]
|
||||
|
||||
elif code == "DPF":
|
||||
return [
|
||||
{"article": "Prinzip 1", "title": "Notice", "description": "Informationspflicht", "requirement_text": "Betroffene muessen ueber Datenverarbeitung informiert werden.", "priority": 1},
|
||||
{"article": "Prinzip 2", "title": "Choice", "description": "Wahlmoeglichkeit", "requirement_text": "Betroffene muessen der Weitergabe widersprechen koennen.", "priority": 1},
|
||||
{"article": "Prinzip 4", "title": "Security", "description": "Sicherheitsmassnahmen", "requirement_text": "Angemessene Sicherheitsmassnahmen zum Schutz der Daten.", "priority": 1},
|
||||
{"article": "Prinzip 5", "title": "Data Integrity", "description": "Datenintegritaet", "requirement_text": "Daten muessen richtig, vollstaendig und aktuell sein.", "priority": 1},
|
||||
{"article": "Prinzip 6", "title": "Access", "description": "Auskunftsrecht", "requirement_text": "Betroffene haben Recht auf Zugang zu ihren Daten.", "priority": 1},
|
||||
]
|
||||
|
||||
return []
|
||||
|
||||
async def _scrape_bsi_pdf(self, code: str, source_info: Dict) -> int:
|
||||
"""
|
||||
Scrape BSI Technical Guideline PDF.
|
||||
|
||||
Note: Full PDF parsing requires PyMuPDF or pdfplumber.
|
||||
This is a placeholder that creates seed requirements.
|
||||
"""
|
||||
logger.info(f"Processing BSI TR: {code}")
|
||||
|
||||
# Get or create regulation
|
||||
regulation = self.reg_repo.get_by_code(code)
|
||||
if not regulation:
|
||||
regulation = RegulationDB(
|
||||
code=code,
|
||||
name=f"BSI {code}",
|
||||
full_name=f"BSI Technical Guideline {code}",
|
||||
regulation_type=source_info["regulation_type"],
|
||||
local_pdf_path=source_info["url"],
|
||||
is_active=True,
|
||||
)
|
||||
self.db.add(regulation)
|
||||
self.db.commit()
|
||||
self.db.refresh(regulation)
|
||||
|
||||
# Known BSI TR-03161 test aspects (Pruefaspekte)
|
||||
# These are the key security requirements from the TR
|
||||
bsi_aspects = self._get_bsi_aspects(code)
|
||||
|
||||
requirements_created = 0
|
||||
for aspect in bsi_aspects:
|
||||
existing = self.db.query(RequirementDB).filter(
|
||||
RequirementDB.regulation_id == regulation.id,
|
||||
RequirementDB.article == aspect["id"]
|
||||
).first()
|
||||
|
||||
if existing:
|
||||
continue
|
||||
|
||||
requirement = RequirementDB(
|
||||
regulation_id=regulation.id,
|
||||
article=aspect["id"],
|
||||
title=aspect["title"],
|
||||
description=aspect.get("description"),
|
||||
requirement_text=aspect.get("requirement_text"),
|
||||
breakpilot_interpretation=aspect.get("interpretation"),
|
||||
is_applicable=aspect.get("is_applicable", True),
|
||||
priority=aspect.get("priority", 2),
|
||||
source_page=aspect.get("page"),
|
||||
source_section=aspect.get("section"),
|
||||
)
|
||||
self.db.add(requirement)
|
||||
requirements_created += 1
|
||||
|
||||
self.db.commit()
|
||||
logger.info(f"Created {requirements_created} BSI requirements from {code}")
|
||||
return requirements_created
|
||||
|
||||
def _get_bsi_aspects(self, code: str) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
Returns comprehensive BSI TR-03161 test aspects (Pruefaspekte).
|
||||
|
||||
These are the actual test aspects from BSI TR-03161:
|
||||
- Part 1: Allgemeine Anforderungen (~45 Aspekte)
|
||||
- Part 2: Web-Anwendungen (~40 Aspekte)
|
||||
- Part 3: Hintergrundsysteme (~35 Aspekte)
|
||||
|
||||
Total: ~120 Pruefaspekte
|
||||
"""
|
||||
if code == "BSI-TR-03161-1":
|
||||
# Teil 1: Allgemeine Anforderungen
|
||||
return [
|
||||
# Zweckbindung & Datenminimierung
|
||||
{"id": "O.Purp_1", "title": "Zweckbindung", "description": "Anwendungszweck klar definiert", "requirement_text": "Die Anwendung muss einen klar definierten und dokumentierten Zweck haben.", "priority": 1, "section": "4.1"},
|
||||
{"id": "O.Purp_2", "title": "Zweckdokumentation", "description": "Zweck fuer Nutzer einsehbar", "requirement_text": "Der Zweck muss fuer Nutzer transparent und einsehbar dokumentiert sein.", "priority": 2, "section": "4.1"},
|
||||
{"id": "O.Data_1", "title": "Datenminimierung", "description": "Nur notwendige Daten erheben", "requirement_text": "Es duerfen nur die fuer den definierten Zweck erforderlichen Daten erhoben werden.", "priority": 1, "section": "4.2"},
|
||||
{"id": "O.Data_2", "title": "Datenerforderlichkeit", "description": "Erforderlichkeit pruefen", "requirement_text": "Vor jeder Datenerhebung muss die Erforderlichkeit geprueft und dokumentiert werden.", "priority": 1, "section": "4.2"},
|
||||
{"id": "O.Data_3", "title": "Datenkategorien", "description": "Datenkategorien klassifizieren", "requirement_text": "Alle verarbeiteten Datenkategorien muessen klassifiziert und dokumentiert sein.", "priority": 2, "section": "4.2"},
|
||||
{"id": "O.Data_4", "title": "Besondere Kategorien", "description": "Art. 9 DSGVO Daten identifizieren", "requirement_text": "Besondere Kategorien personenbezogener Daten (Art. 9 DSGVO) muessen identifiziert und besonders geschuetzt werden.", "priority": 1, "section": "4.2"},
|
||||
|
||||
# Authentifizierung
|
||||
{"id": "O.Auth_1", "title": "Authentifizierungsmechanismus", "description": "Sichere Authentifizierung", "requirement_text": "Die Anwendung muss sichere Authentifizierungsmechanismen implementieren.", "priority": 1, "section": "4.3"},
|
||||
{"id": "O.Auth_2", "title": "Passwortrichtlinie", "description": "Starke Passwoerter erzwingen", "requirement_text": "Passwortrichtlinien muessen Mindestlaenge (12 Zeichen), Komplexitaet und Historie durchsetzen.", "priority": 1, "section": "4.3"},
|
||||
{"id": "O.Auth_3", "title": "Passwort-Hashing", "description": "Sichere Hash-Algorithmen", "requirement_text": "Passwoerter muessen mit aktuellen Algorithmen (bcrypt, Argon2) gehasht werden.", "priority": 1, "section": "4.3"},
|
||||
{"id": "O.Auth_4", "title": "Multi-Faktor-Authentifizierung", "description": "MFA fuer sensitive Bereiche", "requirement_text": "Fuer administrative und sensitive Funktionen muss MFA verfuegbar sein.", "priority": 1, "section": "4.3"},
|
||||
{"id": "O.Auth_5", "title": "Brute-Force-Schutz", "description": "Rate Limiting bei Login", "requirement_text": "Nach mehreren fehlgeschlagenen Anmeldeversuchen muss Account-Lockout oder Rate-Limiting greifen.", "priority": 1, "section": "4.3"},
|
||||
{"id": "O.Auth_6", "title": "Sichere Passwort-Wiederherstellung", "description": "Reset-Prozess absichern", "requirement_text": "Der Passwort-Reset-Prozess muss gegen Enumeration und Manipulation geschuetzt sein.", "priority": 1, "section": "4.3"},
|
||||
|
||||
# Autorisierung
|
||||
{"id": "O.Authz_1", "title": "Zugriffskontrolle", "description": "Rollenbasierte Zugriffskontrolle", "requirement_text": "Ein rollenbasiertes Zugriffskonzept (RBAC) muss implementiert sein.", "priority": 1, "section": "4.4"},
|
||||
{"id": "O.Authz_2", "title": "Least Privilege", "description": "Minimale Rechte", "requirement_text": "Benutzer sollen nur die minimal notwendigen Berechtigungen erhalten.", "priority": 1, "section": "4.4"},
|
||||
{"id": "O.Authz_3", "title": "Rechtetrennung", "description": "Funktionale Trennung", "requirement_text": "Administrative und operative Rollen muessen getrennt sein.", "priority": 1, "section": "4.4"},
|
||||
{"id": "O.Authz_4", "title": "Autorisierungspruefung", "description": "Serverseitige Pruefung", "requirement_text": "Jede Ressource muss serverseitig auf Zugriffsberechtigung geprueft werden.", "priority": 1, "section": "4.4"},
|
||||
|
||||
# Kryptografie
|
||||
{"id": "O.Cryp_1", "title": "TLS-Verschluesselung", "description": "TLS 1.2+ fuer Transport", "requirement_text": "Alle Daten muessen bei der Uebertragung mit TLS 1.2 oder hoeher verschluesselt werden.", "priority": 1, "section": "4.5"},
|
||||
{"id": "O.Cryp_2", "title": "Verschluesselung at Rest", "description": "Sensible Daten verschluesseln", "requirement_text": "Sensible Daten muessen bei der Speicherung verschluesselt werden (AES-256 oder vergleichbar).", "priority": 1, "section": "4.5"},
|
||||
{"id": "O.Cryp_3", "title": "HSTS", "description": "HTTP Strict Transport Security", "requirement_text": "HSTS-Header muessen gesetzt sein um HTTPS zu erzwingen.", "priority": 1, "section": "4.5"},
|
||||
{"id": "O.Cryp_4", "title": "Zertifikatvalidierung", "description": "Zertifikate pruefen", "requirement_text": "TLS-Zertifikate muessen vollstaendig validiert werden (Chain, Revocation, Hostname).", "priority": 1, "section": "4.5"},
|
||||
{"id": "O.Cryp_5", "title": "Key Management", "description": "Sichere Schluesselverwaltung", "requirement_text": "Kryptographische Schluessel muessen sicher generiert, gespeichert und rotiert werden.", "priority": 1, "section": "4.5"},
|
||||
{"id": "O.Cryp_6", "title": "Aktuelle Algorithmen", "description": "Keine veralteten Algorithmen", "requirement_text": "Es duerfen nur aktuelle, als sicher geltende kryptographische Algorithmen verwendet werden.", "priority": 1, "section": "4.5"},
|
||||
|
||||
# Datenschutz
|
||||
{"id": "O.Priv_1", "title": "Datenschutzerklaerung", "description": "Transparente Information", "requirement_text": "Eine vollstaendige Datenschutzerklaerung muss vor Nutzung einsehbar sein.", "priority": 1, "section": "4.6"},
|
||||
{"id": "O.Priv_2", "title": "Einwilligung", "description": "Wirksame Einwilligung", "requirement_text": "Einwilligungen muessen freiwillig, informiert, spezifisch und dokumentiert sein.", "priority": 1, "section": "4.6"},
|
||||
{"id": "O.Priv_3", "title": "Betroffenenrechte", "description": "Auskunft, Loeschung, etc.", "requirement_text": "Technische Prozesse fuer Betroffenenrechte (Art. 15-21 DSGVO) muessen implementiert sein.", "priority": 1, "section": "4.6"},
|
||||
{"id": "O.Priv_4", "title": "Loeschkonzept", "description": "Aufbewahrungsfristen", "requirement_text": "Ein dokumentiertes Loeschkonzept mit definierten Aufbewahrungsfristen muss umgesetzt sein.", "priority": 1, "section": "4.6"},
|
||||
{"id": "O.Priv_5", "title": "Datenschutz durch Technik", "description": "Privacy by Design", "requirement_text": "Datenschutz muss bereits bei der Entwicklung beruecksichtigt werden (Art. 25 DSGVO).", "priority": 1, "section": "4.6"},
|
||||
|
||||
# Logging & Audit
|
||||
{"id": "O.Log_1", "title": "Security Logging", "description": "Sicherheitsereignisse protokollieren", "requirement_text": "Sicherheitsrelevante Ereignisse (Login, Fehler, Zugriffsverletzungen) muessen protokolliert werden.", "priority": 1, "section": "4.7"},
|
||||
{"id": "O.Log_2", "title": "Audit Trail", "description": "Nachvollziehbarkeit", "requirement_text": "Aenderungen an personenbezogenen Daten muessen nachvollziehbar protokolliert werden.", "priority": 1, "section": "4.7"},
|
||||
{"id": "O.Log_3", "title": "Log-Integritaet", "description": "Logs vor Manipulation schuetzen", "requirement_text": "Logs muessen vor unbefugter Aenderung oder Loeschung geschuetzt sein.", "priority": 2, "section": "4.7"},
|
||||
{"id": "O.Log_4", "title": "Keine PII in Logs", "description": "Keine personenbezogenen Daten loggen", "requirement_text": "Logs duerfen keine personenbezogenen Daten im Klartext enthalten.", "priority": 1, "section": "4.7"},
|
||||
|
||||
# Software-Entwicklung
|
||||
{"id": "O.Dev_1", "title": "Secure SDLC", "description": "Sicherer Entwicklungsprozess", "requirement_text": "Ein dokumentierter sicherer Entwicklungsprozess (Secure SDLC) muss etabliert sein.", "priority": 1, "section": "4.8"},
|
||||
{"id": "O.Dev_2", "title": "Code Review", "description": "Sicherheits-Review von Code", "requirement_text": "Sicherheitsrelevanter Code muss vor Release einem Review unterzogen werden.", "priority": 2, "section": "4.8"},
|
||||
{"id": "O.Dev_3", "title": "Dependency Management", "description": "Abhaengigkeiten pruefen", "requirement_text": "Externe Abhaengigkeiten muessen auf bekannte Schwachstellen geprueft werden.", "priority": 1, "section": "4.8"},
|
||||
{"id": "O.Dev_4", "title": "Penetration Testing", "description": "Regelmaessige Sicherheitstests", "requirement_text": "Regelmaessige Penetrationstests oder Schwachstellenscans muessen durchgefuehrt werden.", "priority": 2, "section": "4.8"},
|
||||
|
||||
# Betrieb
|
||||
{"id": "O.Ops_1", "title": "Patch Management", "description": "Zeitnahes Patchen", "requirement_text": "Sicherheitspatches muessen zeitnah (kritisch: 24-72h) eingespielt werden.", "priority": 1, "section": "4.9"},
|
||||
{"id": "O.Ops_2", "title": "Backup", "description": "Regelmaessige Datensicherung", "requirement_text": "Regelmaessige, getestete Backups muessen vorhanden sein.", "priority": 1, "section": "4.9"},
|
||||
{"id": "O.Ops_3", "title": "Incident Response", "description": "Vorfallsmanagement", "requirement_text": "Ein dokumentierter Incident-Response-Prozess muss etabliert sein.", "priority": 1, "section": "4.9"},
|
||||
{"id": "O.Ops_4", "title": "Monitoring", "description": "Systemueberwachung", "requirement_text": "Kritische Systeme und Dienste muessen kontinuierlich ueberwacht werden.", "priority": 2, "section": "4.9"},
|
||||
|
||||
# Dokumentation
|
||||
{"id": "O.Doc_1", "title": "Technische Dokumentation", "description": "Systemarchitektur dokumentiert", "requirement_text": "Die Systemarchitektur und Datenflüsse muessen dokumentiert sein.", "priority": 2, "section": "4.10"},
|
||||
{"id": "O.Doc_2", "title": "Verarbeitungsverzeichnis", "description": "Art. 30 DSGVO", "requirement_text": "Ein vollstaendiges Verzeichnis von Verarbeitungstaetigkeiten muss gefuehrt werden.", "priority": 1, "section": "4.10"},
|
||||
{"id": "O.Doc_3", "title": "TOMs", "description": "Technisch-organisatorische Massnahmen", "requirement_text": "Technisch-organisatorische Massnahmen (Art. 32 DSGVO) muessen dokumentiert sein.", "priority": 1, "section": "4.10"},
|
||||
]
|
||||
|
||||
elif code == "BSI-TR-03161-2":
|
||||
# Teil 2: Web-Anwendungen
|
||||
return [
|
||||
# Session Management
|
||||
{"id": "O.Sess_1", "title": "Session-Timeout", "description": "Automatische Sitzungsbeendigung", "requirement_text": "Sessions muessen nach Inaktivitaet automatisch beendet werden (max. 30 Min).", "priority": 1, "section": "5.1"},
|
||||
{"id": "O.Sess_2", "title": "Session-ID Sicherheit", "description": "Sichere Session-IDs", "requirement_text": "Session-IDs muessen kryptographisch sicher generiert werden (min. 128 Bit Entropie).", "priority": 1, "section": "5.1"},
|
||||
{"id": "O.Sess_3", "title": "Session-Regeneration", "description": "ID nach Login erneuern", "requirement_text": "Nach erfolgreicher Authentifizierung muss eine neue Session-ID generiert werden.", "priority": 1, "section": "5.1"},
|
||||
{"id": "O.Sess_4", "title": "Secure Cookie Flags", "description": "HttpOnly, Secure, SameSite", "requirement_text": "Session-Cookies muessen mit Secure, HttpOnly und SameSite-Flags gesetzt werden.", "priority": 1, "section": "5.1"},
|
||||
{"id": "O.Sess_5", "title": "Session-Binding", "description": "Session an Client binden", "requirement_text": "Sessions sollten an Client-Eigenschaften (User-Agent, IP) gebunden werden.", "priority": 2, "section": "5.1"},
|
||||
{"id": "O.Sess_6", "title": "Logout-Funktionalitaet", "description": "Vollstaendiges Logout", "requirement_text": "Beim Logout muss die Session vollstaendig invalidiert werden.", "priority": 1, "section": "5.1"},
|
||||
|
||||
# Eingabevalidierung
|
||||
{"id": "O.Input_1", "title": "Serverseitige Validierung", "description": "Alle Eingaben serverseitig pruefen", "requirement_text": "Alle Benutzereingaben muessen serverseitig validiert werden.", "priority": 1, "section": "5.2"},
|
||||
{"id": "O.Input_2", "title": "Whitelist-Validierung", "description": "Erlaubte Zeichen definieren", "requirement_text": "Eingabevalidierung sollte auf Whitelist-Basis erfolgen.", "priority": 1, "section": "5.2"},
|
||||
{"id": "O.Input_3", "title": "Encoding", "description": "Korrekte Zeichenkodierung", "requirement_text": "Einheitliche Zeichenkodierung (UTF-8) muss durchgesetzt werden.", "priority": 2, "section": "5.2"},
|
||||
{"id": "O.Input_4", "title": "Datei-Upload Validierung", "description": "Uploads pruefen", "requirement_text": "Datei-Uploads muessen auf Typ, Groesse und Inhalt validiert werden.", "priority": 1, "section": "5.2"},
|
||||
|
||||
# Injection-Schutz
|
||||
{"id": "O.SQL_1", "title": "SQL-Injection Schutz", "description": "Prepared Statements", "requirement_text": "SQL-Anfragen muessen parametrisiert sein (Prepared Statements).", "priority": 1, "section": "5.3"},
|
||||
{"id": "O.SQL_2", "title": "ORM Nutzung", "description": "Abstraktionsschicht nutzen", "requirement_text": "Es sollte ein ORM oder Query Builder verwendet werden.", "priority": 2, "section": "5.3"},
|
||||
{"id": "O.Cmd_1", "title": "Command Injection Schutz", "description": "Keine Shell-Befehle mit Eingaben", "requirement_text": "Benutzereingaben duerfen nicht in Shell-Befehlen verwendet werden.", "priority": 1, "section": "5.3"},
|
||||
{"id": "O.LDAP_1", "title": "LDAP Injection Schutz", "description": "LDAP-Queries absichern", "requirement_text": "LDAP-Queries muessen gegen Injection geschuetzt sein.", "priority": 1, "section": "5.3"},
|
||||
{"id": "O.XML_1", "title": "XML Injection Schutz", "description": "XXE verhindern", "requirement_text": "XML-Parser muessen gegen XXE-Angriffe konfiguriert sein.", "priority": 1, "section": "5.3"},
|
||||
|
||||
# XSS-Schutz
|
||||
{"id": "O.XSS_1", "title": "Output Encoding", "description": "Kontextabhaengiges Escaping", "requirement_text": "Ausgaben muessen kontextabhaengig (HTML, JS, CSS, URL) escaped werden.", "priority": 1, "section": "5.4"},
|
||||
{"id": "O.XSS_2", "title": "Content Security Policy", "description": "CSP-Header setzen", "requirement_text": "Ein restriktiver Content-Security-Policy-Header muss gesetzt sein.", "priority": 1, "section": "5.4"},
|
||||
{"id": "O.XSS_3", "title": "DOM-basiertes XSS", "description": "DOM-Manipulation absichern", "requirement_text": "JavaScript-DOM-Manipulationen muessen sicher implementiert sein.", "priority": 1, "section": "5.4"},
|
||||
{"id": "O.XSS_4", "title": "Template-Engine Escaping", "description": "Auto-Escaping aktivieren", "requirement_text": "Template-Engines muessen mit aktiviertem Auto-Escaping verwendet werden.", "priority": 1, "section": "5.4"},
|
||||
|
||||
# CSRF-Schutz
|
||||
{"id": "O.CSRF_1", "title": "Anti-CSRF Token", "description": "Token bei State-Changes", "requirement_text": "Zustandsaendernde Anfragen muessen mit Anti-CSRF-Token geschuetzt sein.", "priority": 1, "section": "5.5"},
|
||||
{"id": "O.CSRF_2", "title": "SameSite Cookie", "description": "SameSite-Attribut setzen", "requirement_text": "Cookies sollten das SameSite-Attribut (Strict oder Lax) haben.", "priority": 1, "section": "5.5"},
|
||||
{"id": "O.CSRF_3", "title": "Referer-Pruefung", "description": "Origin validieren", "requirement_text": "Bei kritischen Aktionen sollte der Origin/Referer-Header geprueft werden.", "priority": 2, "section": "5.5"},
|
||||
|
||||
# Security Headers
|
||||
{"id": "O.Head_1", "title": "X-Content-Type-Options", "description": "nosniff setzen", "requirement_text": "Der X-Content-Type-Options: nosniff Header muss gesetzt sein.", "priority": 1, "section": "5.6"},
|
||||
{"id": "O.Head_2", "title": "X-Frame-Options", "description": "Clickjacking-Schutz", "requirement_text": "X-Frame-Options oder CSP frame-ancestors muss Clickjacking verhindern.", "priority": 1, "section": "5.6"},
|
||||
{"id": "O.Head_3", "title": "X-XSS-Protection", "description": "Browser XSS-Filter", "requirement_text": "X-XSS-Protection sollte aktiviert sein (oder CSP nutzen).", "priority": 2, "section": "5.6"},
|
||||
{"id": "O.Head_4", "title": "Referrer-Policy", "description": "Referrer einschraenken", "requirement_text": "Eine restriktive Referrer-Policy sollte gesetzt sein.", "priority": 2, "section": "5.6"},
|
||||
{"id": "O.Head_5", "title": "Permissions-Policy", "description": "Browser-Features einschraenken", "requirement_text": "Nicht benoetigte Browser-APIs sollten per Permissions-Policy deaktiviert werden.", "priority": 3, "section": "5.6"},
|
||||
|
||||
# Fehlerbehandlung
|
||||
{"id": "O.Err_1", "title": "Generische Fehlermeldungen", "description": "Keine technischen Details", "requirement_text": "Fehlermeldungen an Benutzer duerfen keine technischen Details enthalten.", "priority": 1, "section": "5.7"},
|
||||
{"id": "O.Err_2", "title": "Custom Error Pages", "description": "Eigene Fehlerseiten", "requirement_text": "Standard-Fehlerseiten des Servers muessen durch eigene ersetzt werden.", "priority": 2, "section": "5.7"},
|
||||
{"id": "O.Err_3", "title": "Exception Handling", "description": "Alle Exceptions abfangen", "requirement_text": "Unbehandelte Exceptions muessen abgefangen und geloggt werden.", "priority": 1, "section": "5.7"},
|
||||
|
||||
# API-Sicherheit
|
||||
{"id": "O.API_1", "title": "API-Authentifizierung", "description": "API-Keys oder OAuth", "requirement_text": "APIs muessen authentifiziert werden (API-Keys, OAuth, JWT).", "priority": 1, "section": "5.8"},
|
||||
{"id": "O.API_2", "title": "Rate Limiting", "description": "Anfragen begrenzen", "requirement_text": "APIs muessen Rate-Limiting implementieren.", "priority": 1, "section": "5.8"},
|
||||
{"id": "O.API_3", "title": "Input-Validierung API", "description": "Request-Body validieren", "requirement_text": "API-Request-Bodies muessen gegen ein Schema validiert werden.", "priority": 1, "section": "5.8"},
|
||||
{"id": "O.API_4", "title": "Versionierung", "description": "API-Versionen", "requirement_text": "APIs sollten versioniert sein um Breaking Changes zu vermeiden.", "priority": 3, "section": "5.8"},
|
||||
|
||||
# Client-Sicherheit
|
||||
{"id": "O.JS_1", "title": "JavaScript Sicherheit", "description": "Sichere JS-Praktiken", "requirement_text": "JavaScript muss sicher implementiert sein (kein eval, innerHTML mit Vorsicht).", "priority": 1, "section": "5.9"},
|
||||
{"id": "O.JS_2", "title": "Third-Party Scripts", "description": "Externe Scripts absichern", "requirement_text": "Third-Party Scripts muessen mit SRI oder CSP abgesichert werden.", "priority": 1, "section": "5.9"},
|
||||
{"id": "O.Store_1", "title": "Lokale Speicherung", "description": "LocalStorage sicher nutzen", "requirement_text": "Sensible Daten duerfen nicht im LocalStorage/SessionStorage gespeichert werden.", "priority": 1, "section": "5.9"},
|
||||
]
|
||||
|
||||
elif code == "BSI-TR-03161-3":
|
||||
# Teil 3: Hintergrundsysteme (Backend)
|
||||
return [
|
||||
# Systemarchitektur
|
||||
{"id": "O.Arch_1", "title": "Defense in Depth", "description": "Mehrschichtige Sicherheit", "requirement_text": "Eine mehrschichtige Sicherheitsarchitektur (Defense in Depth) muss implementiert sein.", "priority": 1, "section": "6.1"},
|
||||
{"id": "O.Arch_2", "title": "Segmentierung", "description": "Netzwerksegmentierung", "requirement_text": "Das Netzwerk muss segmentiert sein (DMZ, interne Zonen).", "priority": 1, "section": "6.1"},
|
||||
{"id": "O.Arch_3", "title": "Microservices Isolation", "description": "Services isolieren", "requirement_text": "Microservices sollten minimal gekoppelt und isoliert sein.", "priority": 2, "section": "6.1"},
|
||||
{"id": "O.Arch_4", "title": "Zero Trust", "description": "Kein implizites Vertrauen", "requirement_text": "Interne Kommunikation sollte nach Zero-Trust-Prinzipien abgesichert sein.", "priority": 2, "section": "6.1"},
|
||||
|
||||
# Datenspeicherung
|
||||
{"id": "O.DB_1", "title": "Datenbank-Sicherheit", "description": "DB abhaerten", "requirement_text": "Datenbanken muessen gehaertet sein (keine Default-Credentials, minimale Rechte).", "priority": 1, "section": "6.2"},
|
||||
{"id": "O.DB_2", "title": "Verschluesselung in DB", "description": "Sensible Felder verschluesseln", "requirement_text": "Sensible Daten sollten in der Datenbank verschluesselt gespeichert werden.", "priority": 1, "section": "6.2"},
|
||||
{"id": "O.DB_3", "title": "Backup-Verschluesselung", "description": "Backups verschluesseln", "requirement_text": "Datenbank-Backups muessen verschluesselt sein.", "priority": 1, "section": "6.2"},
|
||||
{"id": "O.DB_4", "title": "Zugriffskontrolle DB", "description": "DB-Zugriffe beschraenken", "requirement_text": "Der Datenbankzugriff muss auf notwendige Dienste beschraenkt sein.", "priority": 1, "section": "6.2"},
|
||||
{"id": "O.Store_2", "title": "Dateispeicher-Sicherheit", "description": "Uploads isolieren", "requirement_text": "Hochgeladene Dateien muessen isoliert und mit Malware-Scanning verarbeitet werden.", "priority": 1, "section": "6.2"},
|
||||
|
||||
# Container & Infrastruktur
|
||||
{"id": "O.Cont_1", "title": "Container-Sicherheit", "description": "Images scannen", "requirement_text": "Container-Images muessen auf Schwachstellen gescannt werden.", "priority": 1, "section": "6.3"},
|
||||
{"id": "O.Cont_2", "title": "Rootless Container", "description": "Nicht als Root laufen", "requirement_text": "Container sollten nicht als Root-User ausgefuehrt werden.", "priority": 1, "section": "6.3"},
|
||||
{"id": "O.Cont_3", "title": "Image-Herkunft", "description": "Vertrauenswuerdige Images", "requirement_text": "Es duerfen nur Images aus vertrauenswuerdigen Quellen verwendet werden.", "priority": 1, "section": "6.3"},
|
||||
{"id": "O.Cont_4", "title": "Read-Only Filesystem", "description": "Unveraenderliches Dateisystem", "requirement_text": "Container sollten mit Read-Only Root-Filesystem laufen wo moeglich.", "priority": 2, "section": "6.3"},
|
||||
{"id": "O.Cont_5", "title": "Resource Limits", "description": "CPU/Memory begrenzen", "requirement_text": "Container muessen Resource-Limits (CPU, Memory) konfiguriert haben.", "priority": 2, "section": "6.3"},
|
||||
|
||||
# Secrets Management
|
||||
{"id": "O.Sec_1", "title": "Secrets Management", "description": "Zentrale Secrets-Verwaltung", "requirement_text": "Sensible Konfiguration (Passwoerter, Keys) muss zentral und sicher verwaltet werden.", "priority": 1, "section": "6.4"},
|
||||
{"id": "O.Sec_2", "title": "Keine Hardcoded Secrets", "description": "Secrets nicht im Code", "requirement_text": "Secrets duerfen nicht im Quellcode oder in Git-Repositories stehen.", "priority": 1, "section": "6.4"},
|
||||
{"id": "O.Sec_3", "title": "Secret Rotation", "description": "Regelmaessige Rotation", "requirement_text": "Secrets und API-Keys sollten regelmaessig rotiert werden.", "priority": 2, "section": "6.4"},
|
||||
{"id": "O.Sec_4", "title": "Vault Integration", "description": "Secrets-Vault nutzen", "requirement_text": "Ein Secrets-Management-System (HashiCorp Vault o.ae.) sollte verwendet werden.", "priority": 2, "section": "6.4"},
|
||||
|
||||
# Kommunikation
|
||||
{"id": "O.Comm_1", "title": "Service-to-Service TLS", "description": "Interne Verschluesselung", "requirement_text": "Auch interne Service-Kommunikation sollte verschluesselt sein (mTLS).", "priority": 2, "section": "6.5"},
|
||||
{"id": "O.Comm_2", "title": "Message Queue Sicherheit", "description": "Queue-Zugriff absichern", "requirement_text": "Message Queues muessen authentifiziert und autorisiert werden.", "priority": 1, "section": "6.5"},
|
||||
{"id": "O.Comm_3", "title": "API Gateway", "description": "Zentraler Zugangspunkt", "requirement_text": "Ein API Gateway sollte als zentraler Zugangspunkt dienen.", "priority": 2, "section": "6.5"},
|
||||
|
||||
# Monitoring & Logging
|
||||
{"id": "O.Mon_1", "title": "Zentrale Logs", "description": "Log-Aggregation", "requirement_text": "Logs aller Services muessen zentral aggregiert werden.", "priority": 1, "section": "6.6"},
|
||||
{"id": "O.Mon_2", "title": "Security Monitoring", "description": "Anomalie-Erkennung", "requirement_text": "Sicherheitsrelevante Ereignisse muessen ueberwacht und alarmiert werden.", "priority": 1, "section": "6.6"},
|
||||
{"id": "O.Mon_3", "title": "Metriken", "description": "Performance-Monitoring", "requirement_text": "System-Metriken (CPU, Memory, Latenz) sollten erfasst und ueberwacht werden.", "priority": 2, "section": "6.6"},
|
||||
{"id": "O.Mon_4", "title": "Alerting", "description": "Alarmierung konfigurieren", "requirement_text": "Kritische Schwellwerte muessen definiert und alarmiert werden.", "priority": 1, "section": "6.6"},
|
||||
|
||||
# CI/CD Sicherheit
|
||||
{"id": "O.CI_1", "title": "Pipeline-Sicherheit", "description": "CI/CD absichern", "requirement_text": "CI/CD-Pipelines muessen abgesichert sein (Secrets, Zugriffsrechte).", "priority": 1, "section": "6.7"},
|
||||
{"id": "O.CI_2", "title": "SAST/DAST", "description": "Automatisierte Security-Tests", "requirement_text": "Statische und dynamische Sicherheitsanalysen sollten in die Pipeline integriert sein.", "priority": 2, "section": "6.7"},
|
||||
{"id": "O.CI_3", "title": "Dependency Scanning", "description": "Abhaengigkeiten pruefen", "requirement_text": "Abhaengigkeiten muessen automatisiert auf Schwachstellen geprueft werden.", "priority": 1, "section": "6.7"},
|
||||
{"id": "O.CI_4", "title": "SBOM", "description": "Software Bill of Materials", "requirement_text": "Ein SBOM (Software Bill of Materials) sollte generiert und gepflegt werden.", "priority": 2, "section": "6.7"},
|
||||
|
||||
# Disaster Recovery
|
||||
{"id": "O.DR_1", "title": "Backup-Strategie", "description": "3-2-1 Backup-Regel", "requirement_text": "Backups sollten der 3-2-1-Regel folgen (3 Kopien, 2 Medien, 1 offsite).", "priority": 1, "section": "6.8"},
|
||||
{"id": "O.DR_2", "title": "Recovery-Tests", "description": "Restore regelmaessig testen", "requirement_text": "Die Wiederherstellung aus Backups muss regelmaessig getestet werden.", "priority": 1, "section": "6.8"},
|
||||
{"id": "O.DR_3", "title": "RTO/RPO", "description": "Recovery-Ziele definieren", "requirement_text": "Recovery Time Objective (RTO) und Recovery Point Objective (RPO) muessen definiert sein.", "priority": 2, "section": "6.8"},
|
||||
]
|
||||
|
||||
return []
|
||||
|
||||
def get_known_sources(self) -> List[Dict[str, Any]]:
|
||||
"""Get list of known regulation sources with metadata."""
|
||||
sources = []
|
||||
for code, info in self.KNOWN_SOURCES.items():
|
||||
# Check database for existing data
|
||||
regulation = self.reg_repo.get_by_code(code)
|
||||
requirement_count = 0
|
||||
if regulation:
|
||||
requirement_count = self.db.query(RequirementDB).filter(
|
||||
RequirementDB.regulation_id == regulation.id
|
||||
).count()
|
||||
|
||||
sources.append({
|
||||
"code": code,
|
||||
"url": info["url"],
|
||||
"source_type": info["type"].value,
|
||||
"regulation_type": info["regulation_type"].value,
|
||||
"has_data": regulation is not None,
|
||||
"requirement_count": requirement_count,
|
||||
})
|
||||
|
||||
return sources
|
||||
442
backend/compliance/services/report_generator.py
Normal file
442
backend/compliance/services/report_generator.py
Normal file
@@ -0,0 +1,442 @@
|
||||
"""
|
||||
Compliance Report Generator Service.
|
||||
|
||||
Generates periodic compliance reports (weekly, monthly, quarterly, yearly).
|
||||
Reports include:
|
||||
- Compliance score trends
|
||||
- Control status summary
|
||||
- Risk assessment summary
|
||||
- Evidence coverage
|
||||
- Action items / recommendations
|
||||
"""
|
||||
|
||||
import logging
|
||||
from datetime import datetime, date, timedelta
|
||||
from typing import Dict, List, Any, Optional
|
||||
from enum import Enum
|
||||
|
||||
from sqlalchemy.orm import Session
|
||||
from sqlalchemy import func
|
||||
|
||||
from ..db.models import (
|
||||
RegulationDB,
|
||||
RequirementDB,
|
||||
ControlDB,
|
||||
ControlMappingDB,
|
||||
EvidenceDB,
|
||||
RiskDB,
|
||||
AuditExportDB,
|
||||
ControlStatusEnum,
|
||||
RiskLevelEnum,
|
||||
EvidenceStatusEnum,
|
||||
)
|
||||
from ..db.repository import (
|
||||
RegulationRepository,
|
||||
ControlRepository,
|
||||
EvidenceRepository,
|
||||
RiskRepository,
|
||||
)
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class ReportPeriod(str, Enum):
|
||||
WEEKLY = "weekly"
|
||||
MONTHLY = "monthly"
|
||||
QUARTERLY = "quarterly"
|
||||
YEARLY = "yearly"
|
||||
|
||||
|
||||
class ComplianceReportGenerator:
|
||||
"""Generates compliance reports for different time periods."""
|
||||
|
||||
def __init__(self, db: Session):
|
||||
self.db = db
|
||||
self.reg_repo = RegulationRepository(db)
|
||||
self.ctrl_repo = ControlRepository(db)
|
||||
self.evidence_repo = EvidenceRepository(db)
|
||||
self.risk_repo = RiskRepository(db)
|
||||
|
||||
def generate_report(
|
||||
self,
|
||||
period: ReportPeriod,
|
||||
as_of_date: Optional[date] = None,
|
||||
) -> Dict[str, Any]:
|
||||
"""
|
||||
Generate a compliance report for the specified period.
|
||||
|
||||
Args:
|
||||
period: Report period (weekly, monthly, quarterly, yearly)
|
||||
as_of_date: Report date (defaults to today)
|
||||
|
||||
Returns:
|
||||
Complete report dictionary
|
||||
"""
|
||||
if as_of_date is None:
|
||||
as_of_date = date.today()
|
||||
|
||||
# Calculate date ranges
|
||||
date_range = self._get_date_range(period, as_of_date)
|
||||
|
||||
report = {
|
||||
"report_metadata": {
|
||||
"generated_at": datetime.utcnow().isoformat(),
|
||||
"period": period.value,
|
||||
"as_of_date": as_of_date.isoformat(),
|
||||
"date_range_start": date_range["start"].isoformat(),
|
||||
"date_range_end": date_range["end"].isoformat(),
|
||||
"report_title": self._get_report_title(period, as_of_date),
|
||||
},
|
||||
"executive_summary": self._generate_executive_summary(),
|
||||
"compliance_score": self._generate_compliance_score_section(),
|
||||
"regulations_coverage": self._generate_regulations_coverage(),
|
||||
"controls_summary": self._generate_controls_summary(),
|
||||
"risks_summary": self._generate_risks_summary(),
|
||||
"evidence_summary": self._generate_evidence_summary(),
|
||||
"action_items": self._generate_action_items(),
|
||||
"trends": self._generate_trends_placeholder(period),
|
||||
}
|
||||
|
||||
return report
|
||||
|
||||
def _get_date_range(self, period: ReportPeriod, as_of: date) -> Dict[str, date]:
|
||||
"""Calculate date range for the reporting period."""
|
||||
if period == ReportPeriod.WEEKLY:
|
||||
# Last 7 days
|
||||
start = as_of - timedelta(days=7)
|
||||
elif period == ReportPeriod.MONTHLY:
|
||||
# Last 30 days
|
||||
start = as_of - timedelta(days=30)
|
||||
elif period == ReportPeriod.QUARTERLY:
|
||||
# Last 90 days
|
||||
start = as_of - timedelta(days=90)
|
||||
elif period == ReportPeriod.YEARLY:
|
||||
# Last 365 days
|
||||
start = as_of - timedelta(days=365)
|
||||
else:
|
||||
start = as_of - timedelta(days=30)
|
||||
|
||||
return {"start": start, "end": as_of}
|
||||
|
||||
def _get_report_title(self, period: ReportPeriod, as_of: date) -> str:
|
||||
"""Generate report title based on period."""
|
||||
titles = {
|
||||
ReportPeriod.WEEKLY: f"Woechentlicher Compliance-Report KW{as_of.isocalendar()[1]} {as_of.year}",
|
||||
ReportPeriod.MONTHLY: f"Monatlicher Compliance-Report {as_of.strftime('%B %Y')}",
|
||||
ReportPeriod.QUARTERLY: f"Quartals-Compliance-Report Q{(as_of.month - 1) // 3 + 1}/{as_of.year}",
|
||||
ReportPeriod.YEARLY: f"Jaehrlicher Compliance-Report {as_of.year}",
|
||||
}
|
||||
return titles.get(period, f"Compliance Report {as_of.isoformat()}")
|
||||
|
||||
def _generate_executive_summary(self) -> Dict[str, Any]:
|
||||
"""Generate executive summary section."""
|
||||
stats = self.ctrl_repo.get_statistics()
|
||||
risk_matrix = self.risk_repo.get_matrix_data()
|
||||
|
||||
total_controls = stats.get("total", 0)
|
||||
score = stats.get("compliance_score", 0)
|
||||
|
||||
# Determine overall status
|
||||
if score >= 80:
|
||||
status = "GREEN"
|
||||
status_text = "Guter Compliance-Stand"
|
||||
elif score >= 60:
|
||||
status = "YELLOW"
|
||||
status_text = "Verbesserungsbedarf"
|
||||
else:
|
||||
status = "RED"
|
||||
status_text = "Kritischer Handlungsbedarf"
|
||||
|
||||
high_critical_risks = (
|
||||
risk_matrix["by_level"].get("critical", 0) +
|
||||
risk_matrix["by_level"].get("high", 0)
|
||||
)
|
||||
|
||||
return {
|
||||
"overall_status": status,
|
||||
"status_text": status_text,
|
||||
"compliance_score": score,
|
||||
"total_controls": total_controls,
|
||||
"high_critical_risks": high_critical_risks,
|
||||
"key_findings": self._generate_key_findings(stats, risk_matrix),
|
||||
}
|
||||
|
||||
def _generate_key_findings(
|
||||
self,
|
||||
ctrl_stats: Dict[str, Any],
|
||||
risk_matrix: Dict[str, Any],
|
||||
) -> List[str]:
|
||||
"""Generate key findings for executive summary."""
|
||||
findings = []
|
||||
|
||||
# Control status findings
|
||||
by_status = ctrl_stats.get("by_status", {})
|
||||
passed = by_status.get("pass", 0)
|
||||
failed = by_status.get("fail", 0)
|
||||
planned = by_status.get("planned", 0)
|
||||
|
||||
if failed > 0:
|
||||
findings.append(f"{failed} Controls im Status 'Fail' - sofortige Massnahmen erforderlich")
|
||||
|
||||
if planned > 5:
|
||||
findings.append(f"{planned} Controls noch nicht implementiert")
|
||||
|
||||
# Risk findings
|
||||
critical = risk_matrix["by_level"].get("critical", 0)
|
||||
high = risk_matrix["by_level"].get("high", 0)
|
||||
|
||||
if critical > 0:
|
||||
findings.append(f"{critical} kritische Risiken identifiziert - Eskalation empfohlen")
|
||||
|
||||
if high > 3:
|
||||
findings.append(f"{high} hohe Risiken - priorisierte Behandlung erforderlich")
|
||||
|
||||
if not findings:
|
||||
findings.append("Keine kritischen Befunde - Compliance-Status stabil")
|
||||
|
||||
return findings
|
||||
|
||||
def _generate_compliance_score_section(self) -> Dict[str, Any]:
|
||||
"""Generate compliance score section with breakdown."""
|
||||
stats = self.ctrl_repo.get_statistics()
|
||||
|
||||
by_domain = stats.get("by_domain", {})
|
||||
domain_scores = {}
|
||||
|
||||
controls = self.ctrl_repo.get_all()
|
||||
domain_stats = {}
|
||||
|
||||
for ctrl in controls:
|
||||
domain = ctrl.domain.value if ctrl.domain else "unknown"
|
||||
if domain not in domain_stats:
|
||||
domain_stats[domain] = {"total": 0, "pass": 0, "partial": 0}
|
||||
|
||||
domain_stats[domain]["total"] += 1
|
||||
if ctrl.status == ControlStatusEnum.PASS:
|
||||
domain_stats[domain]["pass"] += 1
|
||||
elif ctrl.status == ControlStatusEnum.PARTIAL:
|
||||
domain_stats[domain]["partial"] += 1
|
||||
|
||||
for domain, ds in domain_stats.items():
|
||||
if ds["total"] > 0:
|
||||
score = ((ds["pass"] + ds["partial"] * 0.5) / ds["total"]) * 100
|
||||
domain_scores[domain] = round(score, 1)
|
||||
else:
|
||||
domain_scores[domain] = 0
|
||||
|
||||
return {
|
||||
"overall_score": stats.get("compliance_score", 0),
|
||||
"by_domain": domain_scores,
|
||||
"domain_labels": {
|
||||
"gov": "Governance",
|
||||
"priv": "Datenschutz",
|
||||
"iam": "Identity & Access",
|
||||
"crypto": "Kryptografie",
|
||||
"sdlc": "Secure Development",
|
||||
"ops": "Operations",
|
||||
"ai": "KI-spezifisch",
|
||||
"cra": "Supply Chain",
|
||||
"aud": "Audit",
|
||||
},
|
||||
}
|
||||
|
||||
def _generate_regulations_coverage(self) -> Dict[str, Any]:
|
||||
"""Generate regulations coverage section."""
|
||||
regulations = self.reg_repo.get_all()
|
||||
coverage = []
|
||||
|
||||
for reg in regulations:
|
||||
# Count requirements for this regulation
|
||||
req_count = self.db.query(func.count(RequirementDB.id)).filter(
|
||||
RequirementDB.regulation_id == reg.id
|
||||
).scalar() or 0
|
||||
|
||||
# Count mapped controls
|
||||
mapped_controls = self.db.query(func.count(ControlMappingDB.id)).join(
|
||||
RequirementDB
|
||||
).filter(
|
||||
RequirementDB.regulation_id == reg.id
|
||||
).scalar() or 0
|
||||
|
||||
coverage.append({
|
||||
"code": reg.code,
|
||||
"name": reg.name,
|
||||
"requirements": req_count,
|
||||
"mapped_controls": mapped_controls,
|
||||
"coverage_status": "covered" if mapped_controls > 0 else "pending",
|
||||
})
|
||||
|
||||
return {
|
||||
"total_regulations": len(regulations),
|
||||
"covered_regulations": len([c for c in coverage if c["coverage_status"] == "covered"]),
|
||||
"details": coverage,
|
||||
}
|
||||
|
||||
def _generate_controls_summary(self) -> Dict[str, Any]:
|
||||
"""Generate controls summary section."""
|
||||
stats = self.ctrl_repo.get_statistics()
|
||||
due_for_review = self.ctrl_repo.get_due_for_review()
|
||||
|
||||
return {
|
||||
"total": stats.get("total", 0),
|
||||
"by_status": stats.get("by_status", {}),
|
||||
"by_domain": stats.get("by_domain", {}),
|
||||
"due_for_review": len(due_for_review),
|
||||
"review_items": [
|
||||
{
|
||||
"control_id": c.control_id,
|
||||
"title": c.title,
|
||||
"last_reviewed": c.last_reviewed_at.isoformat() if c.last_reviewed_at else None,
|
||||
}
|
||||
for c in due_for_review[:10] # Top 10
|
||||
],
|
||||
}
|
||||
|
||||
def _generate_risks_summary(self) -> Dict[str, Any]:
|
||||
"""Generate risks summary section."""
|
||||
matrix = self.risk_repo.get_matrix_data()
|
||||
risks = self.risk_repo.get_all()
|
||||
|
||||
# Group by category
|
||||
by_category = {}
|
||||
for risk in risks:
|
||||
cat = risk.category or "other"
|
||||
if cat not in by_category:
|
||||
by_category[cat] = 0
|
||||
by_category[cat] += 1
|
||||
|
||||
# High priority risks
|
||||
high_priority = [
|
||||
{
|
||||
"risk_id": r.risk_id,
|
||||
"title": r.title,
|
||||
"inherent_risk": r.inherent_risk.value if r.inherent_risk else None,
|
||||
"owner": r.owner,
|
||||
"status": r.status,
|
||||
}
|
||||
for r in risks
|
||||
if r.inherent_risk in [RiskLevelEnum.CRITICAL, RiskLevelEnum.HIGH]
|
||||
]
|
||||
|
||||
return {
|
||||
"total_risks": matrix["total_risks"],
|
||||
"by_level": matrix["by_level"],
|
||||
"by_category": by_category,
|
||||
"high_priority_risks": high_priority,
|
||||
"risk_matrix": matrix["matrix"],
|
||||
}
|
||||
|
||||
def _generate_evidence_summary(self) -> Dict[str, Any]:
|
||||
"""Generate evidence summary section."""
|
||||
stats = self.evidence_repo.get_statistics()
|
||||
all_evidence = self.evidence_repo.get_all(limit=100)
|
||||
|
||||
# Find controls without evidence
|
||||
controls = self.ctrl_repo.get_all()
|
||||
controls_with_evidence = set()
|
||||
|
||||
for evidence in all_evidence:
|
||||
control = self.db.query(ControlDB).filter(
|
||||
ControlDB.id == evidence.control_id
|
||||
).first()
|
||||
if control:
|
||||
controls_with_evidence.add(control.control_id)
|
||||
|
||||
controls_without_evidence = [
|
||||
c.control_id for c in controls
|
||||
if c.control_id not in controls_with_evidence
|
||||
]
|
||||
|
||||
return {
|
||||
"total_evidence": stats.get("total", 0),
|
||||
"by_type": stats.get("by_type", {}),
|
||||
"by_status": stats.get("by_status", {}),
|
||||
"coverage_percent": stats.get("coverage_percent", 0),
|
||||
"controls_without_evidence": controls_without_evidence[:20], # Top 20
|
||||
}
|
||||
|
||||
def _generate_action_items(self) -> List[Dict[str, Any]]:
|
||||
"""Generate action items based on current status."""
|
||||
action_items = []
|
||||
|
||||
# Check for failed controls
|
||||
failed_controls = self.ctrl_repo.get_all(status=ControlStatusEnum.FAIL)
|
||||
for ctrl in failed_controls[:5]:
|
||||
action_items.append({
|
||||
"priority": "high",
|
||||
"category": "control_remediation",
|
||||
"title": f"Control {ctrl.control_id} beheben",
|
||||
"description": f"Control '{ctrl.title}' ist im Status 'Fail'. Sofortige Massnahmen erforderlich.",
|
||||
"owner": ctrl.owner,
|
||||
"due_date": (date.today() + timedelta(days=7)).isoformat(),
|
||||
})
|
||||
|
||||
# Check for critical/high risks
|
||||
critical_risks = self.risk_repo.get_all(min_risk_level=RiskLevelEnum.HIGH)
|
||||
for risk in critical_risks[:5]:
|
||||
if risk.status == "open":
|
||||
action_items.append({
|
||||
"priority": "high" if risk.inherent_risk == RiskLevelEnum.CRITICAL else "medium",
|
||||
"category": "risk_treatment",
|
||||
"title": f"Risiko {risk.risk_id} behandeln",
|
||||
"description": f"Risiko '{risk.title}' hat Status 'open' und Level '{risk.inherent_risk.value}'.",
|
||||
"owner": risk.owner,
|
||||
"due_date": (date.today() + timedelta(days=14)).isoformat(),
|
||||
})
|
||||
|
||||
# Check for overdue reviews
|
||||
due_for_review = self.ctrl_repo.get_due_for_review()
|
||||
if len(due_for_review) > 5:
|
||||
action_items.append({
|
||||
"priority": "medium",
|
||||
"category": "review",
|
||||
"title": f"{len(due_for_review)} Control-Reviews ueberfaellig",
|
||||
"description": "Mehrere Controls muessen reviewed werden.",
|
||||
"owner": "Compliance Officer",
|
||||
"due_date": (date.today() + timedelta(days=30)).isoformat(),
|
||||
})
|
||||
|
||||
return action_items
|
||||
|
||||
def _generate_trends_placeholder(self, period: ReportPeriod) -> Dict[str, Any]:
|
||||
"""
|
||||
Generate trends section.
|
||||
|
||||
Note: Full trend analysis requires historical data storage.
|
||||
This is a placeholder for future implementation.
|
||||
"""
|
||||
return {
|
||||
"note": "Trend-Analyse erfordert historische Daten. Feature in Entwicklung.",
|
||||
"period": period.value,
|
||||
"compliance_score_trend": "stable", # Placeholder
|
||||
"risk_trend": "stable", # Placeholder
|
||||
"recommendations": [
|
||||
"Historische Score-Snapshots aktivieren fuer Trend-Analyse",
|
||||
"Regelmaessige Report-Generierung einrichten",
|
||||
],
|
||||
}
|
||||
|
||||
def generate_summary_report(self) -> Dict[str, Any]:
|
||||
"""Generate a quick summary report (for dashboard)."""
|
||||
stats = self.ctrl_repo.get_statistics()
|
||||
risk_matrix = self.risk_repo.get_matrix_data()
|
||||
evidence_stats = self.evidence_repo.get_statistics()
|
||||
|
||||
return {
|
||||
"generated_at": datetime.utcnow().isoformat(),
|
||||
"compliance_score": stats.get("compliance_score", 0),
|
||||
"controls": {
|
||||
"total": stats.get("total", 0),
|
||||
"passing": stats.get("by_status", {}).get("pass", 0),
|
||||
"failing": stats.get("by_status", {}).get("fail", 0),
|
||||
},
|
||||
"risks": {
|
||||
"total": risk_matrix["total_risks"],
|
||||
"critical": risk_matrix["by_level"].get("critical", 0),
|
||||
"high": risk_matrix["by_level"].get("high", 0),
|
||||
},
|
||||
"evidence": {
|
||||
"total": evidence_stats.get("total", 0),
|
||||
"coverage": evidence_stats.get("coverage_percent", 0),
|
||||
},
|
||||
}
|
||||
488
backend/compliance/services/seeder.py
Normal file
488
backend/compliance/services/seeder.py
Normal file
@@ -0,0 +1,488 @@
|
||||
"""
|
||||
Compliance Seeder Service.
|
||||
|
||||
Seeds the database with initial regulations, controls, and requirements.
|
||||
"""
|
||||
|
||||
import logging
|
||||
from typing import Dict, List, Optional
|
||||
from datetime import datetime
|
||||
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from ..db.models import (
|
||||
RegulationDB,
|
||||
RequirementDB,
|
||||
ControlDB,
|
||||
ControlMappingDB,
|
||||
RiskDB,
|
||||
ServiceModuleDB,
|
||||
ModuleRegulationMappingDB,
|
||||
StatementOfApplicabilityDB,
|
||||
RegulationTypeEnum,
|
||||
ControlTypeEnum,
|
||||
ControlDomainEnum,
|
||||
ControlStatusEnum,
|
||||
RiskLevelEnum,
|
||||
ServiceTypeEnum,
|
||||
RelevanceLevelEnum,
|
||||
)
|
||||
from ..data.regulations import REGULATIONS_SEED
|
||||
from ..data.controls import CONTROLS_SEED
|
||||
from ..data.requirements import REQUIREMENTS_SEED
|
||||
from ..data.risks import RISKS_SEED
|
||||
from ..data.service_modules import BREAKPILOT_SERVICES
|
||||
from ..data.iso27001_annex_a import ISO27001_ANNEX_A_CONTROLS
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class ComplianceSeeder:
|
||||
"""Seeds the compliance database with initial data."""
|
||||
|
||||
def __init__(self, db: Session):
|
||||
self.db = db
|
||||
self._regulation_map: Dict[str, str] = {} # code -> id
|
||||
self._module_map: Dict[str, str] = {} # name -> id
|
||||
|
||||
def seed_all(self, force: bool = False) -> Dict[str, int]:
|
||||
"""
|
||||
Seed all compliance data.
|
||||
|
||||
Args:
|
||||
force: If True, re-seed even if data exists
|
||||
|
||||
Returns:
|
||||
Dictionary with counts of seeded items
|
||||
"""
|
||||
results = {
|
||||
"regulations": 0,
|
||||
"controls": 0,
|
||||
"requirements": 0,
|
||||
"mappings": 0,
|
||||
"risks": 0,
|
||||
"service_modules": 0,
|
||||
"module_regulation_mappings": 0,
|
||||
"soa_entries": 0,
|
||||
}
|
||||
|
||||
# Check if already seeded
|
||||
existing_regulations = self.db.query(RegulationDB).count()
|
||||
if existing_regulations > 0 and not force:
|
||||
logger.info(f"Database already has {existing_regulations} regulations, skipping seed")
|
||||
return results
|
||||
|
||||
try:
|
||||
# Seed in order (regulations first, then controls, then requirements, then risks, then service modules)
|
||||
results["regulations"] = self._seed_regulations()
|
||||
results["controls"] = self._seed_controls()
|
||||
results["requirements"] = self._seed_requirements()
|
||||
results["mappings"] = self._seed_default_mappings()
|
||||
results["risks"] = self._seed_risks()
|
||||
results["service_modules"] = self._seed_service_modules()
|
||||
results["module_regulation_mappings"] = self._seed_module_regulation_mappings()
|
||||
results["soa_entries"] = self._seed_soa()
|
||||
|
||||
self.db.commit()
|
||||
logger.info(f"Seeding completed: {results}")
|
||||
return results
|
||||
|
||||
except Exception as e:
|
||||
self.db.rollback()
|
||||
logger.error(f"Seeding failed: {e}")
|
||||
raise
|
||||
|
||||
def _seed_regulations(self) -> int:
|
||||
"""Seed regulations from REGULATIONS_SEED."""
|
||||
count = 0
|
||||
for reg_data in REGULATIONS_SEED:
|
||||
# Check if regulation already exists
|
||||
existing = self.db.query(RegulationDB).filter(
|
||||
RegulationDB.code == reg_data["code"]
|
||||
).first()
|
||||
|
||||
if existing:
|
||||
self._regulation_map[reg_data["code"]] = existing.id
|
||||
continue
|
||||
|
||||
regulation = RegulationDB(
|
||||
code=reg_data["code"],
|
||||
name=reg_data["name"],
|
||||
full_name=reg_data.get("full_name"),
|
||||
regulation_type=RegulationTypeEnum(reg_data["regulation_type"]),
|
||||
source_url=reg_data.get("source_url"),
|
||||
local_pdf_path=reg_data.get("local_pdf_path"),
|
||||
effective_date=reg_data.get("effective_date"),
|
||||
description=reg_data.get("description"),
|
||||
is_active=reg_data.get("is_active", True),
|
||||
)
|
||||
self.db.add(regulation)
|
||||
self.db.flush() # Get the ID
|
||||
self._regulation_map[reg_data["code"]] = regulation.id
|
||||
count += 1
|
||||
|
||||
return count
|
||||
|
||||
def _seed_controls(self) -> int:
|
||||
"""Seed controls from CONTROLS_SEED."""
|
||||
count = 0
|
||||
for ctrl_data in CONTROLS_SEED:
|
||||
# Check if control already exists
|
||||
existing = self.db.query(ControlDB).filter(
|
||||
ControlDB.control_id == ctrl_data["control_id"]
|
||||
).first()
|
||||
|
||||
if existing:
|
||||
continue
|
||||
|
||||
control = ControlDB(
|
||||
control_id=ctrl_data["control_id"],
|
||||
domain=ControlDomainEnum(ctrl_data["domain"]),
|
||||
control_type=ControlTypeEnum(ctrl_data["control_type"]),
|
||||
title=ctrl_data["title"],
|
||||
description=ctrl_data.get("description"),
|
||||
pass_criteria=ctrl_data["pass_criteria"],
|
||||
implementation_guidance=ctrl_data.get("implementation_guidance"),
|
||||
code_reference=ctrl_data.get("code_reference"),
|
||||
is_automated=ctrl_data.get("is_automated", False),
|
||||
automation_tool=ctrl_data.get("automation_tool"),
|
||||
owner=ctrl_data.get("owner"),
|
||||
review_frequency_days=ctrl_data.get("review_frequency_days", 90),
|
||||
status=ControlStatusEnum.PLANNED, # All start as planned
|
||||
)
|
||||
self.db.add(control)
|
||||
count += 1
|
||||
|
||||
return count
|
||||
|
||||
def _seed_requirements(self) -> int:
|
||||
"""Seed requirements from REQUIREMENTS_SEED."""
|
||||
count = 0
|
||||
for req_data in REQUIREMENTS_SEED:
|
||||
# Get regulation ID
|
||||
regulation_code = req_data["regulation_code"]
|
||||
regulation_id = self._regulation_map.get(regulation_code)
|
||||
|
||||
if not regulation_id:
|
||||
# Try to find in database
|
||||
regulation = self.db.query(RegulationDB).filter(
|
||||
RegulationDB.code == regulation_code
|
||||
).first()
|
||||
if regulation:
|
||||
regulation_id = regulation.id
|
||||
self._regulation_map[regulation_code] = regulation_id
|
||||
else:
|
||||
logger.warning(f"Regulation {regulation_code} not found, skipping requirement")
|
||||
continue
|
||||
|
||||
# Check if requirement already exists
|
||||
existing = self.db.query(RequirementDB).filter(
|
||||
RequirementDB.regulation_id == regulation_id,
|
||||
RequirementDB.article == req_data["article"],
|
||||
RequirementDB.paragraph == req_data.get("paragraph"),
|
||||
).first()
|
||||
|
||||
if existing:
|
||||
continue
|
||||
|
||||
requirement = RequirementDB(
|
||||
regulation_id=regulation_id,
|
||||
article=req_data["article"],
|
||||
paragraph=req_data.get("paragraph"),
|
||||
title=req_data["title"],
|
||||
description=req_data.get("description"),
|
||||
requirement_text=req_data.get("requirement_text"),
|
||||
breakpilot_interpretation=req_data.get("breakpilot_interpretation"),
|
||||
is_applicable=req_data.get("is_applicable", True),
|
||||
applicability_reason=req_data.get("applicability_reason"),
|
||||
priority=req_data.get("priority", 2),
|
||||
)
|
||||
self.db.add(requirement)
|
||||
count += 1
|
||||
|
||||
return count
|
||||
|
||||
def _seed_default_mappings(self) -> int:
|
||||
"""Create default mappings between requirements and controls."""
|
||||
# Define default mappings based on domain/regulation relationships
|
||||
mapping_rules = [
|
||||
# GDPR Privacy mappings
|
||||
("GDPR", "Art. 5", ["PRIV-001", "PRIV-003", "PRIV-006", "PRIV-007"]),
|
||||
("GDPR", "Art. 25", ["PRIV-003", "PRIV-007"]),
|
||||
("GDPR", "Art. 28", ["PRIV-005"]),
|
||||
("GDPR", "Art. 30", ["PRIV-001"]),
|
||||
("GDPR", "Art. 32", ["CRYPTO-001", "CRYPTO-002", "CRYPTO-003", "IAM-001", "OPS-002"]),
|
||||
("GDPR", "Art. 35", ["PRIV-002", "AI-005"]),
|
||||
# AI Act mappings
|
||||
("AIACT", "Art. 9", ["AI-001", "AI-004", "AI-005"]),
|
||||
("AIACT", "Art. 13", ["AI-002", "AI-003"]),
|
||||
("AIACT", "Art. 14", ["AI-003"]),
|
||||
("AIACT", "Art. 15", ["AI-004", "SDLC-001", "SDLC-002"]),
|
||||
("AIACT", "Art. 50", ["AI-002"]),
|
||||
# CRA mappings
|
||||
("CRA", "Art. 10", ["SDLC-001", "SDLC-002", "SDLC-006"]),
|
||||
("CRA", "Art. 11", ["GOV-005", "OPS-003"]),
|
||||
("CRA", "Art. 13", ["CRA-001", "SDLC-005"]),
|
||||
("CRA", "Art. 14", ["CRA-003", "OPS-004"]),
|
||||
("CRA", "Art. 15", ["CRA-004"]),
|
||||
# BSI-TR mappings
|
||||
("BSI-TR-03161-1", "O.Arch_1", ["GOV-001", "GOV-002", "GOV-004"]),
|
||||
("BSI-TR-03161-1", "O.Auth_1", ["IAM-001", "IAM-002", "IAM-004"]),
|
||||
("BSI-TR-03161-1", "O.Cryp_1", ["CRYPTO-001", "CRYPTO-002", "CRYPTO-003", "CRYPTO-004"]),
|
||||
("BSI-TR-03161-1", "O.Data_1", ["CRYPTO-001", "CRYPTO-002", "PRIV-007"]),
|
||||
("BSI-TR-03161-2", "O.Auth_2", ["IAM-004"]),
|
||||
("BSI-TR-03161-2", "O.Source_1", ["SDLC-001", "SDLC-004"]),
|
||||
("BSI-TR-03161-3", "O.Back_1", ["CRYPTO-002"]),
|
||||
("BSI-TR-03161-3", "O.Ops_1", ["OPS-001", "OPS-002", "OPS-005"]),
|
||||
]
|
||||
|
||||
count = 0
|
||||
for reg_code, article_prefix, control_ids in mapping_rules:
|
||||
# Find requirements matching this regulation and article
|
||||
requirements = self.db.query(RequirementDB).join(RegulationDB).filter(
|
||||
RegulationDB.code == reg_code,
|
||||
RequirementDB.article.like(f"{article_prefix}%"),
|
||||
).all()
|
||||
|
||||
for req in requirements:
|
||||
for control_id in control_ids:
|
||||
# Find control
|
||||
control = self.db.query(ControlDB).filter(
|
||||
ControlDB.control_id == control_id
|
||||
).first()
|
||||
|
||||
if not control:
|
||||
continue
|
||||
|
||||
# Check if mapping exists
|
||||
existing = self.db.query(ControlMappingDB).filter(
|
||||
ControlMappingDB.requirement_id == req.id,
|
||||
ControlMappingDB.control_id == control.id,
|
||||
).first()
|
||||
|
||||
if existing:
|
||||
continue
|
||||
|
||||
mapping = ControlMappingDB(
|
||||
requirement_id=req.id,
|
||||
control_id=control.id,
|
||||
coverage_level="full",
|
||||
)
|
||||
self.db.add(mapping)
|
||||
count += 1
|
||||
|
||||
return count
|
||||
|
||||
def seed_regulations_only(self) -> int:
|
||||
"""Seed only regulations (useful for incremental updates)."""
|
||||
count = self._seed_regulations()
|
||||
self.db.commit()
|
||||
return count
|
||||
|
||||
def seed_controls_only(self) -> int:
|
||||
"""Seed only controls (useful for incremental updates)."""
|
||||
count = self._seed_controls()
|
||||
self.db.commit()
|
||||
return count
|
||||
|
||||
def _seed_risks(self) -> int:
|
||||
"""Seed risks from RISKS_SEED."""
|
||||
count = 0
|
||||
for risk_data in RISKS_SEED:
|
||||
# Check if risk already exists
|
||||
existing = self.db.query(RiskDB).filter(
|
||||
RiskDB.risk_id == risk_data["risk_id"]
|
||||
).first()
|
||||
|
||||
if existing:
|
||||
continue
|
||||
|
||||
# Calculate inherent risk level
|
||||
inherent_risk = RiskDB.calculate_risk_level(
|
||||
risk_data["likelihood"],
|
||||
risk_data["impact"]
|
||||
)
|
||||
|
||||
risk = RiskDB(
|
||||
risk_id=risk_data["risk_id"],
|
||||
title=risk_data["title"],
|
||||
description=risk_data.get("description"),
|
||||
category=risk_data["category"],
|
||||
likelihood=risk_data["likelihood"],
|
||||
impact=risk_data["impact"],
|
||||
inherent_risk=inherent_risk,
|
||||
mitigating_controls=risk_data.get("mitigating_controls", []),
|
||||
owner=risk_data.get("owner"),
|
||||
treatment_plan=risk_data.get("treatment_plan"),
|
||||
status="open",
|
||||
)
|
||||
self.db.add(risk)
|
||||
count += 1
|
||||
|
||||
return count
|
||||
|
||||
def seed_risks_only(self) -> int:
|
||||
"""Seed only risks (useful for incremental updates)."""
|
||||
count = self._seed_risks()
|
||||
self.db.commit()
|
||||
return count
|
||||
|
||||
def _seed_service_modules(self) -> int:
|
||||
"""Seed service modules from BREAKPILOT_SERVICES."""
|
||||
count = 0
|
||||
for service_data in BREAKPILOT_SERVICES:
|
||||
# Check if service already exists
|
||||
existing = self.db.query(ServiceModuleDB).filter(
|
||||
ServiceModuleDB.name == service_data["name"]
|
||||
).first()
|
||||
|
||||
if existing:
|
||||
self._module_map[service_data["name"]] = existing.id
|
||||
continue
|
||||
|
||||
module = ServiceModuleDB(
|
||||
name=service_data["name"],
|
||||
display_name=service_data["display_name"],
|
||||
description=service_data.get("description"),
|
||||
service_type=ServiceTypeEnum(service_data["service_type"]),
|
||||
port=service_data.get("port"),
|
||||
technology_stack=service_data.get("technology_stack", []),
|
||||
repository_path=service_data.get("repository_path"),
|
||||
docker_image=service_data.get("docker_image"),
|
||||
data_categories=service_data.get("data_categories", []),
|
||||
processes_pii=service_data.get("processes_pii", False),
|
||||
processes_health_data=service_data.get("processes_health_data", False),
|
||||
ai_components=service_data.get("ai_components", False),
|
||||
is_active=True,
|
||||
criticality=service_data.get("criticality", "medium"),
|
||||
owner_team=service_data.get("owner_team"),
|
||||
)
|
||||
self.db.add(module)
|
||||
self.db.flush() # Get the ID
|
||||
self._module_map[service_data["name"]] = module.id
|
||||
count += 1
|
||||
|
||||
return count
|
||||
|
||||
def _seed_module_regulation_mappings(self) -> int:
|
||||
"""Create mappings between service modules and regulations."""
|
||||
count = 0
|
||||
for service_data in BREAKPILOT_SERVICES:
|
||||
# Get module ID
|
||||
module_id = self._module_map.get(service_data["name"])
|
||||
if not module_id:
|
||||
# Try to find in database
|
||||
module = self.db.query(ServiceModuleDB).filter(
|
||||
ServiceModuleDB.name == service_data["name"]
|
||||
).first()
|
||||
if module:
|
||||
module_id = module.id
|
||||
self._module_map[service_data["name"]] = module_id
|
||||
else:
|
||||
logger.warning(f"Module {service_data['name']} not found, skipping regulation mappings")
|
||||
continue
|
||||
|
||||
# Process regulation mappings
|
||||
regulations = service_data.get("regulations", [])
|
||||
for reg_mapping in regulations:
|
||||
# Find regulation by code
|
||||
regulation_code = reg_mapping["code"]
|
||||
regulation_id = self._regulation_map.get(regulation_code)
|
||||
|
||||
if not regulation_id:
|
||||
regulation = self.db.query(RegulationDB).filter(
|
||||
RegulationDB.code == regulation_code
|
||||
).first()
|
||||
if regulation:
|
||||
regulation_id = regulation.id
|
||||
self._regulation_map[regulation_code] = regulation_id
|
||||
else:
|
||||
logger.warning(f"Regulation {regulation_code} not found, skipping mapping for {service_data['name']}")
|
||||
continue
|
||||
|
||||
# Check if mapping exists
|
||||
existing = self.db.query(ModuleRegulationMappingDB).filter(
|
||||
ModuleRegulationMappingDB.module_id == module_id,
|
||||
ModuleRegulationMappingDB.regulation_id == regulation_id,
|
||||
).first()
|
||||
|
||||
if existing:
|
||||
continue
|
||||
|
||||
mapping = ModuleRegulationMappingDB(
|
||||
module_id=module_id,
|
||||
regulation_id=regulation_id,
|
||||
relevance_level=RelevanceLevelEnum(reg_mapping["relevance"]),
|
||||
notes=reg_mapping.get("notes"),
|
||||
)
|
||||
self.db.add(mapping)
|
||||
count += 1
|
||||
|
||||
return count
|
||||
|
||||
def seed_service_modules_only(self) -> int:
|
||||
"""Seed only service modules (useful for incremental updates)."""
|
||||
results = {
|
||||
"service_modules": 0,
|
||||
"module_regulation_mappings": 0,
|
||||
}
|
||||
|
||||
# Ensure regulations are loaded first
|
||||
if not self._regulation_map:
|
||||
self._seed_regulations()
|
||||
|
||||
results["service_modules"] = self._seed_service_modules()
|
||||
results["module_regulation_mappings"] = self._seed_module_regulation_mappings()
|
||||
|
||||
self.db.commit()
|
||||
logger.info(f"Service modules seeding completed: {results}")
|
||||
return results["service_modules"] + results["module_regulation_mappings"]
|
||||
|
||||
def _seed_soa(self) -> int:
|
||||
"""
|
||||
Seed Statement of Applicability (SoA) entries from ISO 27001:2022 Annex A.
|
||||
|
||||
Creates SoA entries for all 93 Annex A controls.
|
||||
This is MANDATORY for ISO 27001 certification.
|
||||
"""
|
||||
count = 0
|
||||
for annex_control in ISO27001_ANNEX_A_CONTROLS:
|
||||
control_id = annex_control["control_id"]
|
||||
|
||||
# Check if SoA entry already exists
|
||||
existing = self.db.query(StatementOfApplicabilityDB).filter(
|
||||
StatementOfApplicabilityDB.annex_a_control == control_id
|
||||
).first()
|
||||
|
||||
if existing:
|
||||
continue
|
||||
|
||||
# Create SoA entry
|
||||
soa_entry = StatementOfApplicabilityDB(
|
||||
annex_a_control=control_id,
|
||||
annex_a_title=annex_control["title"],
|
||||
annex_a_category=annex_control["category"],
|
||||
is_applicable=annex_control.get("default_applicable", True),
|
||||
applicability_justification=annex_control.get("description", ""),
|
||||
implementation_status="planned",
|
||||
implementation_notes=annex_control.get("implementation_guidance", ""),
|
||||
breakpilot_control_ids=annex_control.get("breakpilot_controls", []),
|
||||
evidence_description="",
|
||||
risk_assessment_notes="",
|
||||
)
|
||||
self.db.add(soa_entry)
|
||||
count += 1
|
||||
|
||||
logger.info(f"Seeded {count} SoA entries from ISO 27001:2022 Annex A")
|
||||
return count
|
||||
|
||||
def seed_soa_only(self) -> int:
|
||||
"""
|
||||
Seed only SoA entries (useful for incremental updates).
|
||||
|
||||
Creates all 93 ISO 27001:2022 Annex A control entries in the SoA.
|
||||
"""
|
||||
count = self._seed_soa()
|
||||
self.db.commit()
|
||||
logger.info(f"SoA seeding completed: {count} entries")
|
||||
return count
|
||||
Reference in New Issue
Block a user