Files
breakpilot-compliance/backend-compliance/compliance/services/seeder.py
Benjamin Admin 95fcba34cd
Some checks failed
CI / go-lint (push) Has been skipped
CI / python-lint (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / test-go-ai-compliance (push) Failing after 30s
CI / test-python-backend-compliance (push) Successful in 30s
CI / test-python-document-crawler (push) Successful in 21s
CI / test-python-dsms-gateway (push) Successful in 17s
fix(quality): Ruff/CVE/TS-Fixes, 104 neue Tests, Complexity-Refactoring
- Ruff: 144 auto-fixes (unused imports, == None → is None), F821/F811/F841 manuell
- CVEs: python-multipart>=0.0.22, weasyprint>=68.0, pillow>=12.1.1, npm audit fix (0 vulns)
- TS: 5 tote Drafting-Engine-Dateien entfernt, allowed-facts/sanitizer/StepHeader/context fixes
- Tests: +104 (ISMS 58, Evidence 18, VVT 14, Generation 14) → 1449 passed
- Refactoring: collect_ci_evidence (F→A), row_to_response (E→A), extract_requirements (E→A)
- Dead Code: pca-platform, 7 Go-Handler, dsr_api.py, duplicate Schemas entfernt

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-07 19:00:33 +01:00

487 lines
19 KiB
Python

"""
Compliance Seeder Service.
Seeds the database with initial regulations, controls, and requirements.
"""
import logging
from typing import Dict
from sqlalchemy.orm import Session
from ..db.models import (
RegulationDB,
RequirementDB,
ControlDB,
ControlMappingDB,
RiskDB,
ServiceModuleDB,
ModuleRegulationMappingDB,
StatementOfApplicabilityDB,
RegulationTypeEnum,
ControlTypeEnum,
ControlDomainEnum,
ControlStatusEnum,
ServiceTypeEnum,
RelevanceLevelEnum,
)
from ..data.regulations import REGULATIONS_SEED
from ..data.controls import CONTROLS_SEED
from ..data.requirements import REQUIREMENTS_SEED
from ..data.risks import RISKS_SEED
from ..data.service_modules import BREAKPILOT_SERVICES
from ..data.iso27001_annex_a import ISO27001_ANNEX_A_CONTROLS
logger = logging.getLogger(__name__)
class ComplianceSeeder:
"""Seeds the compliance database with initial data."""
def __init__(self, db: Session):
self.db = db
self._regulation_map: Dict[str, str] = {} # code -> id
self._module_map: Dict[str, str] = {} # name -> id
def seed_all(self, force: bool = False) -> Dict[str, int]:
"""
Seed all compliance data.
Args:
force: If True, re-seed even if data exists
Returns:
Dictionary with counts of seeded items
"""
results = {
"regulations": 0,
"controls": 0,
"requirements": 0,
"mappings": 0,
"risks": 0,
"service_modules": 0,
"module_regulation_mappings": 0,
"soa_entries": 0,
}
# Check if already seeded
existing_regulations = self.db.query(RegulationDB).count()
if existing_regulations > 0 and not force:
logger.info(f"Database already has {existing_regulations} regulations, skipping seed")
return results
try:
# Seed in order (regulations first, then controls, then requirements, then risks, then service modules)
results["regulations"] = self._seed_regulations()
results["controls"] = self._seed_controls()
results["requirements"] = self._seed_requirements()
results["mappings"] = self._seed_default_mappings()
results["risks"] = self._seed_risks()
results["service_modules"] = self._seed_service_modules()
results["module_regulation_mappings"] = self._seed_module_regulation_mappings()
results["soa_entries"] = self._seed_soa()
self.db.commit()
logger.info(f"Seeding completed: {results}")
return results
except Exception as e:
self.db.rollback()
logger.error(f"Seeding failed: {e}")
raise
def _seed_regulations(self) -> int:
"""Seed regulations from REGULATIONS_SEED."""
count = 0
for reg_data in REGULATIONS_SEED:
# Check if regulation already exists
existing = self.db.query(RegulationDB).filter(
RegulationDB.code == reg_data["code"]
).first()
if existing:
self._regulation_map[reg_data["code"]] = existing.id
continue
regulation = RegulationDB(
code=reg_data["code"],
name=reg_data["name"],
full_name=reg_data.get("full_name"),
regulation_type=RegulationTypeEnum(reg_data["regulation_type"]),
source_url=reg_data.get("source_url"),
local_pdf_path=reg_data.get("local_pdf_path"),
effective_date=reg_data.get("effective_date"),
description=reg_data.get("description"),
is_active=reg_data.get("is_active", True),
)
self.db.add(regulation)
self.db.flush() # Get the ID
self._regulation_map[reg_data["code"]] = regulation.id
count += 1
return count
def _seed_controls(self) -> int:
"""Seed controls from CONTROLS_SEED."""
count = 0
for ctrl_data in CONTROLS_SEED:
# Check if control already exists
existing = self.db.query(ControlDB).filter(
ControlDB.control_id == ctrl_data["control_id"]
).first()
if existing:
continue
control = ControlDB(
control_id=ctrl_data["control_id"],
domain=ControlDomainEnum(ctrl_data["domain"]),
control_type=ControlTypeEnum(ctrl_data["control_type"]),
title=ctrl_data["title"],
description=ctrl_data.get("description"),
pass_criteria=ctrl_data["pass_criteria"],
implementation_guidance=ctrl_data.get("implementation_guidance"),
code_reference=ctrl_data.get("code_reference"),
is_automated=ctrl_data.get("is_automated", False),
automation_tool=ctrl_data.get("automation_tool"),
owner=ctrl_data.get("owner"),
review_frequency_days=ctrl_data.get("review_frequency_days", 90),
status=ControlStatusEnum.PLANNED, # All start as planned
)
self.db.add(control)
count += 1
return count
def _seed_requirements(self) -> int:
"""Seed requirements from REQUIREMENTS_SEED."""
count = 0
for req_data in REQUIREMENTS_SEED:
# Get regulation ID
regulation_code = req_data["regulation_code"]
regulation_id = self._regulation_map.get(regulation_code)
if not regulation_id:
# Try to find in database
regulation = self.db.query(RegulationDB).filter(
RegulationDB.code == regulation_code
).first()
if regulation:
regulation_id = regulation.id
self._regulation_map[regulation_code] = regulation_id
else:
logger.warning(f"Regulation {regulation_code} not found, skipping requirement")
continue
# Check if requirement already exists
existing = self.db.query(RequirementDB).filter(
RequirementDB.regulation_id == regulation_id,
RequirementDB.article == req_data["article"],
RequirementDB.paragraph == req_data.get("paragraph"),
).first()
if existing:
continue
requirement = RequirementDB(
regulation_id=regulation_id,
article=req_data["article"],
paragraph=req_data.get("paragraph"),
title=req_data["title"],
description=req_data.get("description"),
requirement_text=req_data.get("requirement_text"),
breakpilot_interpretation=req_data.get("breakpilot_interpretation"),
is_applicable=req_data.get("is_applicable", True),
applicability_reason=req_data.get("applicability_reason"),
priority=req_data.get("priority", 2),
)
self.db.add(requirement)
count += 1
return count
def _seed_default_mappings(self) -> int:
"""Create default mappings between requirements and controls."""
# Define default mappings based on domain/regulation relationships
mapping_rules = [
# GDPR Privacy mappings
("GDPR", "Art. 5", ["PRIV-001", "PRIV-003", "PRIV-006", "PRIV-007"]),
("GDPR", "Art. 25", ["PRIV-003", "PRIV-007"]),
("GDPR", "Art. 28", ["PRIV-005"]),
("GDPR", "Art. 30", ["PRIV-001"]),
("GDPR", "Art. 32", ["CRYPTO-001", "CRYPTO-002", "CRYPTO-003", "IAM-001", "OPS-002"]),
("GDPR", "Art. 35", ["PRIV-002", "AI-005"]),
# AI Act mappings
("AIACT", "Art. 9", ["AI-001", "AI-004", "AI-005"]),
("AIACT", "Art. 13", ["AI-002", "AI-003"]),
("AIACT", "Art. 14", ["AI-003"]),
("AIACT", "Art. 15", ["AI-004", "SDLC-001", "SDLC-002"]),
("AIACT", "Art. 50", ["AI-002"]),
# CRA mappings
("CRA", "Art. 10", ["SDLC-001", "SDLC-002", "SDLC-006"]),
("CRA", "Art. 11", ["GOV-005", "OPS-003"]),
("CRA", "Art. 13", ["CRA-001", "SDLC-005"]),
("CRA", "Art. 14", ["CRA-003", "OPS-004"]),
("CRA", "Art. 15", ["CRA-004"]),
# BSI-TR mappings
("BSI-TR-03161-1", "O.Arch_1", ["GOV-001", "GOV-002", "GOV-004"]),
("BSI-TR-03161-1", "O.Auth_1", ["IAM-001", "IAM-002", "IAM-004"]),
("BSI-TR-03161-1", "O.Cryp_1", ["CRYPTO-001", "CRYPTO-002", "CRYPTO-003", "CRYPTO-004"]),
("BSI-TR-03161-1", "O.Data_1", ["CRYPTO-001", "CRYPTO-002", "PRIV-007"]),
("BSI-TR-03161-2", "O.Auth_2", ["IAM-004"]),
("BSI-TR-03161-2", "O.Source_1", ["SDLC-001", "SDLC-004"]),
("BSI-TR-03161-3", "O.Back_1", ["CRYPTO-002"]),
("BSI-TR-03161-3", "O.Ops_1", ["OPS-001", "OPS-002", "OPS-005"]),
]
count = 0
for reg_code, article_prefix, control_ids in mapping_rules:
# Find requirements matching this regulation and article
requirements = self.db.query(RequirementDB).join(RegulationDB).filter(
RegulationDB.code == reg_code,
RequirementDB.article.like(f"{article_prefix}%"),
).all()
for req in requirements:
for control_id in control_ids:
# Find control
control = self.db.query(ControlDB).filter(
ControlDB.control_id == control_id
).first()
if not control:
continue
# Check if mapping exists
existing = self.db.query(ControlMappingDB).filter(
ControlMappingDB.requirement_id == req.id,
ControlMappingDB.control_id == control.id,
).first()
if existing:
continue
mapping = ControlMappingDB(
requirement_id=req.id,
control_id=control.id,
coverage_level="full",
)
self.db.add(mapping)
count += 1
return count
def seed_regulations_only(self) -> int:
"""Seed only regulations (useful for incremental updates)."""
count = self._seed_regulations()
self.db.commit()
return count
def seed_controls_only(self) -> int:
"""Seed only controls (useful for incremental updates)."""
count = self._seed_controls()
self.db.commit()
return count
def _seed_risks(self) -> int:
"""Seed risks from RISKS_SEED."""
count = 0
for risk_data in RISKS_SEED:
# Check if risk already exists
existing = self.db.query(RiskDB).filter(
RiskDB.risk_id == risk_data["risk_id"]
).first()
if existing:
continue
# Calculate inherent risk level
inherent_risk = RiskDB.calculate_risk_level(
risk_data["likelihood"],
risk_data["impact"]
)
risk = RiskDB(
risk_id=risk_data["risk_id"],
title=risk_data["title"],
description=risk_data.get("description"),
category=risk_data["category"],
likelihood=risk_data["likelihood"],
impact=risk_data["impact"],
inherent_risk=inherent_risk,
mitigating_controls=risk_data.get("mitigating_controls", []),
owner=risk_data.get("owner"),
treatment_plan=risk_data.get("treatment_plan"),
status="open",
)
self.db.add(risk)
count += 1
return count
def seed_risks_only(self) -> int:
"""Seed only risks (useful for incremental updates)."""
count = self._seed_risks()
self.db.commit()
return count
def _seed_service_modules(self) -> int:
"""Seed service modules from BREAKPILOT_SERVICES."""
count = 0
for service_data in BREAKPILOT_SERVICES:
# Check if service already exists
existing = self.db.query(ServiceModuleDB).filter(
ServiceModuleDB.name == service_data["name"]
).first()
if existing:
self._module_map[service_data["name"]] = existing.id
continue
module = ServiceModuleDB(
name=service_data["name"],
display_name=service_data["display_name"],
description=service_data.get("description"),
service_type=ServiceTypeEnum(service_data["service_type"]),
port=service_data.get("port"),
technology_stack=service_data.get("technology_stack", []),
repository_path=service_data.get("repository_path"),
docker_image=service_data.get("docker_image"),
data_categories=service_data.get("data_categories", []),
processes_pii=service_data.get("processes_pii", False),
processes_health_data=service_data.get("processes_health_data", False),
ai_components=service_data.get("ai_components", False),
is_active=True,
criticality=service_data.get("criticality", "medium"),
owner_team=service_data.get("owner_team"),
)
self.db.add(module)
self.db.flush() # Get the ID
self._module_map[service_data["name"]] = module.id
count += 1
return count
def _seed_module_regulation_mappings(self) -> int:
"""Create mappings between service modules and regulations."""
count = 0
for service_data in BREAKPILOT_SERVICES:
# Get module ID
module_id = self._module_map.get(service_data["name"])
if not module_id:
# Try to find in database
module = self.db.query(ServiceModuleDB).filter(
ServiceModuleDB.name == service_data["name"]
).first()
if module:
module_id = module.id
self._module_map[service_data["name"]] = module_id
else:
logger.warning(f"Module {service_data['name']} not found, skipping regulation mappings")
continue
# Process regulation mappings
regulations = service_data.get("regulations", [])
for reg_mapping in regulations:
# Find regulation by code
regulation_code = reg_mapping["code"]
regulation_id = self._regulation_map.get(regulation_code)
if not regulation_id:
regulation = self.db.query(RegulationDB).filter(
RegulationDB.code == regulation_code
).first()
if regulation:
regulation_id = regulation.id
self._regulation_map[regulation_code] = regulation_id
else:
logger.warning(f"Regulation {regulation_code} not found, skipping mapping for {service_data['name']}")
continue
# Check if mapping exists
existing = self.db.query(ModuleRegulationMappingDB).filter(
ModuleRegulationMappingDB.module_id == module_id,
ModuleRegulationMappingDB.regulation_id == regulation_id,
).first()
if existing:
continue
mapping = ModuleRegulationMappingDB(
module_id=module_id,
regulation_id=regulation_id,
relevance_level=RelevanceLevelEnum(reg_mapping["relevance"]),
notes=reg_mapping.get("notes"),
)
self.db.add(mapping)
count += 1
return count
def seed_service_modules_only(self) -> int:
"""Seed only service modules (useful for incremental updates)."""
results = {
"service_modules": 0,
"module_regulation_mappings": 0,
}
# Ensure regulations are loaded first
if not self._regulation_map:
self._seed_regulations()
results["service_modules"] = self._seed_service_modules()
results["module_regulation_mappings"] = self._seed_module_regulation_mappings()
self.db.commit()
logger.info(f"Service modules seeding completed: {results}")
return results["service_modules"] + results["module_regulation_mappings"]
def _seed_soa(self) -> int:
"""
Seed Statement of Applicability (SoA) entries from ISO 27001:2022 Annex A.
Creates SoA entries for all 93 Annex A controls.
This is MANDATORY for ISO 27001 certification.
"""
count = 0
for annex_control in ISO27001_ANNEX_A_CONTROLS:
control_id = annex_control["control_id"]
# Check if SoA entry already exists
existing = self.db.query(StatementOfApplicabilityDB).filter(
StatementOfApplicabilityDB.annex_a_control == control_id
).first()
if existing:
continue
# Create SoA entry
soa_entry = StatementOfApplicabilityDB(
annex_a_control=control_id,
annex_a_title=annex_control["title"],
annex_a_category=annex_control["category"],
is_applicable=annex_control.get("default_applicable", True),
applicability_justification=annex_control.get("description", ""),
implementation_status="planned",
implementation_notes=annex_control.get("implementation_guidance", ""),
breakpilot_control_ids=annex_control.get("breakpilot_controls", []),
evidence_description="",
risk_assessment_notes="",
)
self.db.add(soa_entry)
count += 1
logger.info(f"Seeded {count} SoA entries from ISO 27001:2022 Annex A")
return count
def seed_soa_only(self) -> int:
"""
Seed only SoA entries (useful for incremental updates).
Creates all 93 ISO 27001:2022 Annex A control entries in the SoA.
"""
count = self._seed_soa()
self.db.commit()
logger.info(f"SoA seeding completed: {count} entries")
return count