""" Audit Export Generator. Generates ZIP packages for external auditors containing: - Regulations & Requirements - Control Catalogue with status - Evidence artifacts - Risk register - Summary reports """ import hashlib import json import logging import os import shutil import tempfile import zipfile from datetime import datetime, date, timezone from pathlib import Path from typing import Dict, List, Optional, Any from sqlalchemy.orm import Session from ..db.models import ( RegulationDB, RequirementDB, ControlDB, ControlMappingDB, EvidenceDB, RiskDB, AuditExportDB, ExportStatusEnum, ControlStatusEnum, ) logger = logging.getLogger(__name__) class AuditExportGenerator: """Generates audit export packages.""" def __init__(self, db: Session, export_dir: str = "/tmp/compliance_exports"): self.db = db self.export_dir = Path(export_dir) self.export_dir.mkdir(parents=True, exist_ok=True) def create_export( self, requested_by: str, export_type: str = "full", included_regulations: Optional[List[str]] = None, included_domains: Optional[List[str]] = None, date_range_start: Optional[date] = None, date_range_end: Optional[date] = None, ) -> AuditExportDB: """ Create a new audit export. Args: requested_by: User requesting the export export_type: "full", "controls_only", "evidence_only" included_regulations: Filter by regulation codes included_domains: Filter by control domains date_range_start: Evidence collected after this date date_range_end: Evidence collected before this date Returns: AuditExportDB record """ # Create export record export_record = AuditExportDB( export_type=export_type, export_name=f"Breakpilot Compliance Export {datetime.now().strftime('%Y-%m-%d %H:%M')}", included_regulations=included_regulations, included_domains=included_domains, date_range_start=date_range_start, date_range_end=date_range_end, requested_by=requested_by, status=ExportStatusEnum.GENERATING, ) self.db.add(export_record) self.db.flush() try: # Generate the export file_path, file_hash, file_size = self._generate_zip( export_record.id, export_type, included_regulations, included_domains, date_range_start, date_range_end, ) # Update record with results export_record.file_path = str(file_path) export_record.file_hash = file_hash export_record.file_size_bytes = file_size export_record.status = ExportStatusEnum.COMPLETED export_record.completed_at = datetime.now(timezone.utc) # Calculate statistics stats = self._calculate_statistics( included_regulations, included_domains ) export_record.total_controls = stats["total_controls"] export_record.total_evidence = stats["total_evidence"] export_record.compliance_score = stats["compliance_score"] self.db.commit() logger.info(f"Export completed: {file_path}") return export_record except Exception as e: export_record.status = ExportStatusEnum.FAILED export_record.error_message = str(e) self.db.commit() logger.error(f"Export failed: {e}") raise def _generate_zip( self, export_id: str, export_type: str, included_regulations: Optional[List[str]], included_domains: Optional[List[str]], date_range_start: Optional[date], date_range_end: Optional[date], ) -> tuple: """Generate the actual ZIP file.""" timestamp = datetime.now().strftime("%Y-%m-%d_%H%M%S") zip_filename = f"audit_export_{timestamp}.zip" zip_path = self.export_dir / zip_filename with tempfile.TemporaryDirectory() as temp_dir: temp_path = Path(temp_dir) # Create directory structure (temp_path / "regulations").mkdir() (temp_path / "controls").mkdir() (temp_path / "evidence").mkdir() (temp_path / "risks").mkdir() # Generate content based on export type if export_type in ["full", "controls_only"]: self._export_regulations(temp_path / "regulations", included_regulations) self._export_controls(temp_path / "controls", included_domains) if export_type in ["full", "evidence_only"]: self._export_evidence( temp_path / "evidence", included_domains, date_range_start, date_range_end, ) if export_type == "full": self._export_risks(temp_path / "risks") # Generate summary self._export_summary( temp_path, export_type, included_regulations, included_domains, ) # Generate README self._export_readme(temp_path) # Generate index.html for navigation self._export_index_html(temp_path) # Create ZIP with zipfile.ZipFile(zip_path, "w", zipfile.ZIP_DEFLATED) as zf: for file_path in temp_path.rglob("*"): if file_path.is_file(): arcname = file_path.relative_to(temp_path) zf.write(file_path, arcname) # Calculate hash file_hash = self._calculate_file_hash(zip_path) file_size = zip_path.stat().st_size return zip_path, file_hash, file_size def _export_regulations( self, output_dir: Path, included_regulations: Optional[List[str]] ) -> None: """Export regulations to JSON files.""" query = self.db.query(RegulationDB).filter(RegulationDB.is_active) if included_regulations: query = query.filter(RegulationDB.code.in_(included_regulations)) regulations = query.all() for reg in regulations: # Get requirements for this regulation requirements = self.db.query(RequirementDB).filter( RequirementDB.regulation_id == reg.id ).all() data = { "code": reg.code, "name": reg.name, "full_name": reg.full_name, "type": reg.regulation_type.value if reg.regulation_type else None, "source_url": reg.source_url, "effective_date": reg.effective_date.isoformat() if reg.effective_date else None, "description": reg.description, "requirements": [ { "article": r.article, "paragraph": r.paragraph, "title": r.title, "description": r.description, "is_applicable": r.is_applicable, "breakpilot_interpretation": r.breakpilot_interpretation, } for r in requirements ], } file_path = output_dir / f"{reg.code.lower()}.json" with open(file_path, "w", encoding="utf-8") as f: json.dump(data, f, indent=2, ensure_ascii=False) def _export_controls( self, output_dir: Path, included_domains: Optional[List[str]] ) -> None: """Export controls to JSON and generate summary.""" query = self.db.query(ControlDB) if included_domains: from ..db.models import ControlDomainEnum domain_enums = [ControlDomainEnum(d) for d in included_domains] query = query.filter(ControlDB.domain.in_(domain_enums)) controls = query.order_by(ControlDB.control_id).all() controls_data = [] for ctrl in controls: # Get mappings mappings = self.db.query(ControlMappingDB).filter( ControlMappingDB.control_id == ctrl.id ).all() # Get requirement references requirement_refs = [] for m in mappings: req = self.db.query(RequirementDB).get(m.requirement_id) if req: reg = self.db.query(RegulationDB).get(req.regulation_id) requirement_refs.append({ "regulation": reg.code if reg else None, "article": req.article, "paragraph": req.paragraph, "coverage": m.coverage_level, }) ctrl_data = { "control_id": ctrl.control_id, "domain": ctrl.domain.value if ctrl.domain else None, "type": ctrl.control_type.value if ctrl.control_type else None, "title": ctrl.title, "description": ctrl.description, "pass_criteria": ctrl.pass_criteria, "status": ctrl.status.value if ctrl.status else None, "is_automated": ctrl.is_automated, "automation_tool": ctrl.automation_tool, "owner": ctrl.owner, "last_reviewed": ctrl.last_reviewed_at.isoformat() if ctrl.last_reviewed_at else None, "code_reference": ctrl.code_reference, "mapped_requirements": requirement_refs, } controls_data.append(ctrl_data) # Write full catalogue with open(output_dir / "control_catalogue.json", "w", encoding="utf-8") as f: json.dump(controls_data, f, indent=2, ensure_ascii=False) # Write summary by domain domain_summary = {} for ctrl in controls_data: domain = ctrl["domain"] if domain not in domain_summary: domain_summary[domain] = {"total": 0, "pass": 0, "partial": 0, "fail": 0} domain_summary[domain]["total"] += 1 status = ctrl["status"] if status in domain_summary[domain]: domain_summary[domain][status] += 1 with open(output_dir / "domain_summary.json", "w", encoding="utf-8") as f: json.dump(domain_summary, f, indent=2, ensure_ascii=False) def _export_evidence( self, output_dir: Path, included_domains: Optional[List[str]], date_range_start: Optional[date], date_range_end: Optional[date], ) -> None: """Export evidence metadata and files.""" query = self.db.query(EvidenceDB) if date_range_start: query = query.filter(EvidenceDB.collected_at >= datetime.combine(date_range_start, datetime.min.time())) if date_range_end: query = query.filter(EvidenceDB.collected_at <= datetime.combine(date_range_end, datetime.max.time())) if included_domains: from ..db.models import ControlDomainEnum domain_enums = [ControlDomainEnum(d) for d in included_domains] query = query.join(ControlDB).filter(ControlDB.domain.in_(domain_enums)) evidence_list = query.all() evidence_data = [] for ev in evidence_list: ctrl = self.db.query(ControlDB).get(ev.control_id) ev_data = { "id": ev.id, "control_id": ctrl.control_id if ctrl else None, "evidence_type": ev.evidence_type, "title": ev.title, "description": ev.description, "artifact_path": ev.artifact_path, "artifact_url": ev.artifact_url, "artifact_hash": ev.artifact_hash, "status": ev.status.value if ev.status else None, "valid_from": ev.valid_from.isoformat() if ev.valid_from else None, "valid_until": ev.valid_until.isoformat() if ev.valid_until else None, "collected_at": ev.collected_at.isoformat() if ev.collected_at else None, "source": ev.source, } evidence_data.append(ev_data) # Copy evidence files if they exist if ev.artifact_path and os.path.exists(ev.artifact_path): evidence_subdir = output_dir / ev.evidence_type evidence_subdir.mkdir(exist_ok=True) filename = os.path.basename(ev.artifact_path) shutil.copy2(ev.artifact_path, evidence_subdir / filename) with open(output_dir / "evidence_index.json", "w", encoding="utf-8") as f: json.dump(evidence_data, f, indent=2, ensure_ascii=False) def _export_risks(self, output_dir: Path) -> None: """Export risk register.""" risks = self.db.query(RiskDB).order_by(RiskDB.risk_id).all() risks_data = [] for risk in risks: risk_data = { "risk_id": risk.risk_id, "title": risk.title, "description": risk.description, "category": risk.category, "likelihood": risk.likelihood, "impact": risk.impact, "inherent_risk": risk.inherent_risk.value if risk.inherent_risk else None, "mitigating_controls": risk.mitigating_controls, "residual_likelihood": risk.residual_likelihood, "residual_impact": risk.residual_impact, "residual_risk": risk.residual_risk.value if risk.residual_risk else None, "owner": risk.owner, "status": risk.status, "treatment_plan": risk.treatment_plan, } risks_data.append(risk_data) with open(output_dir / "risk_register.json", "w", encoding="utf-8") as f: json.dump(risks_data, f, indent=2, ensure_ascii=False) def _export_summary( self, output_dir: Path, export_type: str, included_regulations: Optional[List[str]], included_domains: Optional[List[str]], ) -> None: """Generate summary.json with overall statistics.""" stats = self._calculate_statistics(included_regulations, included_domains) summary = { "export_date": datetime.now().isoformat(), "export_type": export_type, "filters": { "regulations": included_regulations, "domains": included_domains, }, "statistics": stats, "organization": "Breakpilot", "version": "1.0.0", } with open(output_dir / "summary.json", "w", encoding="utf-8") as f: json.dump(summary, f, indent=2, ensure_ascii=False) def _export_readme(self, output_dir: Path) -> None: """Generate README.md for auditors.""" readme = """# Breakpilot Compliance Export Dieses Paket enthält die Compliance-Dokumentation von Breakpilot. ## Struktur ``` ├── summary.json # Zusammenfassung und Statistiken ├── index.html # HTML-Navigation (im Browser öffnen) ├── regulations/ # Verordnungen und Anforderungen │ ├── gdpr.json │ ├── aiact.json │ └── ... ├── controls/ # Control Catalogue │ ├── control_catalogue.json │ └── domain_summary.json ├── evidence/ # Nachweise │ ├── evidence_index.json │ └── [evidence_type]/ └── risks/ # Risikoregister └── risk_register.json ``` ## Verwendung 1. **HTML-Navigation**: Öffnen Sie `index.html` im Browser für eine visuelle Übersicht. 2. **JSON-Dateien**: Maschinenlesbare Daten für Import in GRC-Tools. 3. **Nachweis-Dateien**: Originale Scan-Reports und Konfigurationen. ## Kontakt Bei Fragen wenden Sie sich an das Breakpilot Security Team. --- Generiert am: """ + datetime.now().strftime("%Y-%m-%d %H:%M:%S") with open(output_dir / "README.md", "w", encoding="utf-8") as f: f.write(readme) def _export_index_html(self, output_dir: Path) -> None: """Generate index.html for browser navigation.""" html = """