Some checks failed
CI / go-lint (push) Has been skipped
CI / python-lint (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / test-go-ai-compliance (push) Failing after 30s
CI / test-python-backend-compliance (push) Successful in 30s
CI / test-python-document-crawler (push) Successful in 21s
CI / test-python-dsms-gateway (push) Successful in 17s
- Ruff: 144 auto-fixes (unused imports, == None → is None), F821/F811/F841 manuell - CVEs: python-multipart>=0.0.22, weasyprint>=68.0, pillow>=12.1.1, npm audit fix (0 vulns) - TS: 5 tote Drafting-Engine-Dateien entfernt, allowed-facts/sanitizer/StepHeader/context fixes - Tests: +104 (ISMS 58, Evidence 18, VVT 14, Generation 14) → 1449 passed - Refactoring: collect_ci_evidence (F→A), row_to_response (E→A), extract_requirements (E→A) - Dead Code: pca-platform, 7 Go-Handler, dsr_api.py, duplicate Schemas entfernt Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
487 lines
19 KiB
Python
487 lines
19 KiB
Python
"""
|
|
Compliance Seeder Service.
|
|
|
|
Seeds the database with initial regulations, controls, and requirements.
|
|
"""
|
|
|
|
import logging
|
|
from typing import Dict
|
|
|
|
from sqlalchemy.orm import Session
|
|
|
|
from ..db.models import (
|
|
RegulationDB,
|
|
RequirementDB,
|
|
ControlDB,
|
|
ControlMappingDB,
|
|
RiskDB,
|
|
ServiceModuleDB,
|
|
ModuleRegulationMappingDB,
|
|
StatementOfApplicabilityDB,
|
|
RegulationTypeEnum,
|
|
ControlTypeEnum,
|
|
ControlDomainEnum,
|
|
ControlStatusEnum,
|
|
ServiceTypeEnum,
|
|
RelevanceLevelEnum,
|
|
)
|
|
from ..data.regulations import REGULATIONS_SEED
|
|
from ..data.controls import CONTROLS_SEED
|
|
from ..data.requirements import REQUIREMENTS_SEED
|
|
from ..data.risks import RISKS_SEED
|
|
from ..data.service_modules import BREAKPILOT_SERVICES
|
|
from ..data.iso27001_annex_a import ISO27001_ANNEX_A_CONTROLS
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
class ComplianceSeeder:
|
|
"""Seeds the compliance database with initial data."""
|
|
|
|
def __init__(self, db: Session):
|
|
self.db = db
|
|
self._regulation_map: Dict[str, str] = {} # code -> id
|
|
self._module_map: Dict[str, str] = {} # name -> id
|
|
|
|
def seed_all(self, force: bool = False) -> Dict[str, int]:
|
|
"""
|
|
Seed all compliance data.
|
|
|
|
Args:
|
|
force: If True, re-seed even if data exists
|
|
|
|
Returns:
|
|
Dictionary with counts of seeded items
|
|
"""
|
|
results = {
|
|
"regulations": 0,
|
|
"controls": 0,
|
|
"requirements": 0,
|
|
"mappings": 0,
|
|
"risks": 0,
|
|
"service_modules": 0,
|
|
"module_regulation_mappings": 0,
|
|
"soa_entries": 0,
|
|
}
|
|
|
|
# Check if already seeded
|
|
existing_regulations = self.db.query(RegulationDB).count()
|
|
if existing_regulations > 0 and not force:
|
|
logger.info(f"Database already has {existing_regulations} regulations, skipping seed")
|
|
return results
|
|
|
|
try:
|
|
# Seed in order (regulations first, then controls, then requirements, then risks, then service modules)
|
|
results["regulations"] = self._seed_regulations()
|
|
results["controls"] = self._seed_controls()
|
|
results["requirements"] = self._seed_requirements()
|
|
results["mappings"] = self._seed_default_mappings()
|
|
results["risks"] = self._seed_risks()
|
|
results["service_modules"] = self._seed_service_modules()
|
|
results["module_regulation_mappings"] = self._seed_module_regulation_mappings()
|
|
results["soa_entries"] = self._seed_soa()
|
|
|
|
self.db.commit()
|
|
logger.info(f"Seeding completed: {results}")
|
|
return results
|
|
|
|
except Exception as e:
|
|
self.db.rollback()
|
|
logger.error(f"Seeding failed: {e}")
|
|
raise
|
|
|
|
def _seed_regulations(self) -> int:
|
|
"""Seed regulations from REGULATIONS_SEED."""
|
|
count = 0
|
|
for reg_data in REGULATIONS_SEED:
|
|
# Check if regulation already exists
|
|
existing = self.db.query(RegulationDB).filter(
|
|
RegulationDB.code == reg_data["code"]
|
|
).first()
|
|
|
|
if existing:
|
|
self._regulation_map[reg_data["code"]] = existing.id
|
|
continue
|
|
|
|
regulation = RegulationDB(
|
|
code=reg_data["code"],
|
|
name=reg_data["name"],
|
|
full_name=reg_data.get("full_name"),
|
|
regulation_type=RegulationTypeEnum(reg_data["regulation_type"]),
|
|
source_url=reg_data.get("source_url"),
|
|
local_pdf_path=reg_data.get("local_pdf_path"),
|
|
effective_date=reg_data.get("effective_date"),
|
|
description=reg_data.get("description"),
|
|
is_active=reg_data.get("is_active", True),
|
|
)
|
|
self.db.add(regulation)
|
|
self.db.flush() # Get the ID
|
|
self._regulation_map[reg_data["code"]] = regulation.id
|
|
count += 1
|
|
|
|
return count
|
|
|
|
def _seed_controls(self) -> int:
|
|
"""Seed controls from CONTROLS_SEED."""
|
|
count = 0
|
|
for ctrl_data in CONTROLS_SEED:
|
|
# Check if control already exists
|
|
existing = self.db.query(ControlDB).filter(
|
|
ControlDB.control_id == ctrl_data["control_id"]
|
|
).first()
|
|
|
|
if existing:
|
|
continue
|
|
|
|
control = ControlDB(
|
|
control_id=ctrl_data["control_id"],
|
|
domain=ControlDomainEnum(ctrl_data["domain"]),
|
|
control_type=ControlTypeEnum(ctrl_data["control_type"]),
|
|
title=ctrl_data["title"],
|
|
description=ctrl_data.get("description"),
|
|
pass_criteria=ctrl_data["pass_criteria"],
|
|
implementation_guidance=ctrl_data.get("implementation_guidance"),
|
|
code_reference=ctrl_data.get("code_reference"),
|
|
is_automated=ctrl_data.get("is_automated", False),
|
|
automation_tool=ctrl_data.get("automation_tool"),
|
|
owner=ctrl_data.get("owner"),
|
|
review_frequency_days=ctrl_data.get("review_frequency_days", 90),
|
|
status=ControlStatusEnum.PLANNED, # All start as planned
|
|
)
|
|
self.db.add(control)
|
|
count += 1
|
|
|
|
return count
|
|
|
|
def _seed_requirements(self) -> int:
|
|
"""Seed requirements from REQUIREMENTS_SEED."""
|
|
count = 0
|
|
for req_data in REQUIREMENTS_SEED:
|
|
# Get regulation ID
|
|
regulation_code = req_data["regulation_code"]
|
|
regulation_id = self._regulation_map.get(regulation_code)
|
|
|
|
if not regulation_id:
|
|
# Try to find in database
|
|
regulation = self.db.query(RegulationDB).filter(
|
|
RegulationDB.code == regulation_code
|
|
).first()
|
|
if regulation:
|
|
regulation_id = regulation.id
|
|
self._regulation_map[regulation_code] = regulation_id
|
|
else:
|
|
logger.warning(f"Regulation {regulation_code} not found, skipping requirement")
|
|
continue
|
|
|
|
# Check if requirement already exists
|
|
existing = self.db.query(RequirementDB).filter(
|
|
RequirementDB.regulation_id == regulation_id,
|
|
RequirementDB.article == req_data["article"],
|
|
RequirementDB.paragraph == req_data.get("paragraph"),
|
|
).first()
|
|
|
|
if existing:
|
|
continue
|
|
|
|
requirement = RequirementDB(
|
|
regulation_id=regulation_id,
|
|
article=req_data["article"],
|
|
paragraph=req_data.get("paragraph"),
|
|
title=req_data["title"],
|
|
description=req_data.get("description"),
|
|
requirement_text=req_data.get("requirement_text"),
|
|
breakpilot_interpretation=req_data.get("breakpilot_interpretation"),
|
|
is_applicable=req_data.get("is_applicable", True),
|
|
applicability_reason=req_data.get("applicability_reason"),
|
|
priority=req_data.get("priority", 2),
|
|
)
|
|
self.db.add(requirement)
|
|
count += 1
|
|
|
|
return count
|
|
|
|
def _seed_default_mappings(self) -> int:
|
|
"""Create default mappings between requirements and controls."""
|
|
# Define default mappings based on domain/regulation relationships
|
|
mapping_rules = [
|
|
# GDPR Privacy mappings
|
|
("GDPR", "Art. 5", ["PRIV-001", "PRIV-003", "PRIV-006", "PRIV-007"]),
|
|
("GDPR", "Art. 25", ["PRIV-003", "PRIV-007"]),
|
|
("GDPR", "Art. 28", ["PRIV-005"]),
|
|
("GDPR", "Art. 30", ["PRIV-001"]),
|
|
("GDPR", "Art. 32", ["CRYPTO-001", "CRYPTO-002", "CRYPTO-003", "IAM-001", "OPS-002"]),
|
|
("GDPR", "Art. 35", ["PRIV-002", "AI-005"]),
|
|
# AI Act mappings
|
|
("AIACT", "Art. 9", ["AI-001", "AI-004", "AI-005"]),
|
|
("AIACT", "Art. 13", ["AI-002", "AI-003"]),
|
|
("AIACT", "Art. 14", ["AI-003"]),
|
|
("AIACT", "Art. 15", ["AI-004", "SDLC-001", "SDLC-002"]),
|
|
("AIACT", "Art. 50", ["AI-002"]),
|
|
# CRA mappings
|
|
("CRA", "Art. 10", ["SDLC-001", "SDLC-002", "SDLC-006"]),
|
|
("CRA", "Art. 11", ["GOV-005", "OPS-003"]),
|
|
("CRA", "Art. 13", ["CRA-001", "SDLC-005"]),
|
|
("CRA", "Art. 14", ["CRA-003", "OPS-004"]),
|
|
("CRA", "Art. 15", ["CRA-004"]),
|
|
# BSI-TR mappings
|
|
("BSI-TR-03161-1", "O.Arch_1", ["GOV-001", "GOV-002", "GOV-004"]),
|
|
("BSI-TR-03161-1", "O.Auth_1", ["IAM-001", "IAM-002", "IAM-004"]),
|
|
("BSI-TR-03161-1", "O.Cryp_1", ["CRYPTO-001", "CRYPTO-002", "CRYPTO-003", "CRYPTO-004"]),
|
|
("BSI-TR-03161-1", "O.Data_1", ["CRYPTO-001", "CRYPTO-002", "PRIV-007"]),
|
|
("BSI-TR-03161-2", "O.Auth_2", ["IAM-004"]),
|
|
("BSI-TR-03161-2", "O.Source_1", ["SDLC-001", "SDLC-004"]),
|
|
("BSI-TR-03161-3", "O.Back_1", ["CRYPTO-002"]),
|
|
("BSI-TR-03161-3", "O.Ops_1", ["OPS-001", "OPS-002", "OPS-005"]),
|
|
]
|
|
|
|
count = 0
|
|
for reg_code, article_prefix, control_ids in mapping_rules:
|
|
# Find requirements matching this regulation and article
|
|
requirements = self.db.query(RequirementDB).join(RegulationDB).filter(
|
|
RegulationDB.code == reg_code,
|
|
RequirementDB.article.like(f"{article_prefix}%"),
|
|
).all()
|
|
|
|
for req in requirements:
|
|
for control_id in control_ids:
|
|
# Find control
|
|
control = self.db.query(ControlDB).filter(
|
|
ControlDB.control_id == control_id
|
|
).first()
|
|
|
|
if not control:
|
|
continue
|
|
|
|
# Check if mapping exists
|
|
existing = self.db.query(ControlMappingDB).filter(
|
|
ControlMappingDB.requirement_id == req.id,
|
|
ControlMappingDB.control_id == control.id,
|
|
).first()
|
|
|
|
if existing:
|
|
continue
|
|
|
|
mapping = ControlMappingDB(
|
|
requirement_id=req.id,
|
|
control_id=control.id,
|
|
coverage_level="full",
|
|
)
|
|
self.db.add(mapping)
|
|
count += 1
|
|
|
|
return count
|
|
|
|
def seed_regulations_only(self) -> int:
|
|
"""Seed only regulations (useful for incremental updates)."""
|
|
count = self._seed_regulations()
|
|
self.db.commit()
|
|
return count
|
|
|
|
def seed_controls_only(self) -> int:
|
|
"""Seed only controls (useful for incremental updates)."""
|
|
count = self._seed_controls()
|
|
self.db.commit()
|
|
return count
|
|
|
|
def _seed_risks(self) -> int:
|
|
"""Seed risks from RISKS_SEED."""
|
|
count = 0
|
|
for risk_data in RISKS_SEED:
|
|
# Check if risk already exists
|
|
existing = self.db.query(RiskDB).filter(
|
|
RiskDB.risk_id == risk_data["risk_id"]
|
|
).first()
|
|
|
|
if existing:
|
|
continue
|
|
|
|
# Calculate inherent risk level
|
|
inherent_risk = RiskDB.calculate_risk_level(
|
|
risk_data["likelihood"],
|
|
risk_data["impact"]
|
|
)
|
|
|
|
risk = RiskDB(
|
|
risk_id=risk_data["risk_id"],
|
|
title=risk_data["title"],
|
|
description=risk_data.get("description"),
|
|
category=risk_data["category"],
|
|
likelihood=risk_data["likelihood"],
|
|
impact=risk_data["impact"],
|
|
inherent_risk=inherent_risk,
|
|
mitigating_controls=risk_data.get("mitigating_controls", []),
|
|
owner=risk_data.get("owner"),
|
|
treatment_plan=risk_data.get("treatment_plan"),
|
|
status="open",
|
|
)
|
|
self.db.add(risk)
|
|
count += 1
|
|
|
|
return count
|
|
|
|
def seed_risks_only(self) -> int:
|
|
"""Seed only risks (useful for incremental updates)."""
|
|
count = self._seed_risks()
|
|
self.db.commit()
|
|
return count
|
|
|
|
def _seed_service_modules(self) -> int:
|
|
"""Seed service modules from BREAKPILOT_SERVICES."""
|
|
count = 0
|
|
for service_data in BREAKPILOT_SERVICES:
|
|
# Check if service already exists
|
|
existing = self.db.query(ServiceModuleDB).filter(
|
|
ServiceModuleDB.name == service_data["name"]
|
|
).first()
|
|
|
|
if existing:
|
|
self._module_map[service_data["name"]] = existing.id
|
|
continue
|
|
|
|
module = ServiceModuleDB(
|
|
name=service_data["name"],
|
|
display_name=service_data["display_name"],
|
|
description=service_data.get("description"),
|
|
service_type=ServiceTypeEnum(service_data["service_type"]),
|
|
port=service_data.get("port"),
|
|
technology_stack=service_data.get("technology_stack", []),
|
|
repository_path=service_data.get("repository_path"),
|
|
docker_image=service_data.get("docker_image"),
|
|
data_categories=service_data.get("data_categories", []),
|
|
processes_pii=service_data.get("processes_pii", False),
|
|
processes_health_data=service_data.get("processes_health_data", False),
|
|
ai_components=service_data.get("ai_components", False),
|
|
is_active=True,
|
|
criticality=service_data.get("criticality", "medium"),
|
|
owner_team=service_data.get("owner_team"),
|
|
)
|
|
self.db.add(module)
|
|
self.db.flush() # Get the ID
|
|
self._module_map[service_data["name"]] = module.id
|
|
count += 1
|
|
|
|
return count
|
|
|
|
def _seed_module_regulation_mappings(self) -> int:
|
|
"""Create mappings between service modules and regulations."""
|
|
count = 0
|
|
for service_data in BREAKPILOT_SERVICES:
|
|
# Get module ID
|
|
module_id = self._module_map.get(service_data["name"])
|
|
if not module_id:
|
|
# Try to find in database
|
|
module = self.db.query(ServiceModuleDB).filter(
|
|
ServiceModuleDB.name == service_data["name"]
|
|
).first()
|
|
if module:
|
|
module_id = module.id
|
|
self._module_map[service_data["name"]] = module_id
|
|
else:
|
|
logger.warning(f"Module {service_data['name']} not found, skipping regulation mappings")
|
|
continue
|
|
|
|
# Process regulation mappings
|
|
regulations = service_data.get("regulations", [])
|
|
for reg_mapping in regulations:
|
|
# Find regulation by code
|
|
regulation_code = reg_mapping["code"]
|
|
regulation_id = self._regulation_map.get(regulation_code)
|
|
|
|
if not regulation_id:
|
|
regulation = self.db.query(RegulationDB).filter(
|
|
RegulationDB.code == regulation_code
|
|
).first()
|
|
if regulation:
|
|
regulation_id = regulation.id
|
|
self._regulation_map[regulation_code] = regulation_id
|
|
else:
|
|
logger.warning(f"Regulation {regulation_code} not found, skipping mapping for {service_data['name']}")
|
|
continue
|
|
|
|
# Check if mapping exists
|
|
existing = self.db.query(ModuleRegulationMappingDB).filter(
|
|
ModuleRegulationMappingDB.module_id == module_id,
|
|
ModuleRegulationMappingDB.regulation_id == regulation_id,
|
|
).first()
|
|
|
|
if existing:
|
|
continue
|
|
|
|
mapping = ModuleRegulationMappingDB(
|
|
module_id=module_id,
|
|
regulation_id=regulation_id,
|
|
relevance_level=RelevanceLevelEnum(reg_mapping["relevance"]),
|
|
notes=reg_mapping.get("notes"),
|
|
)
|
|
self.db.add(mapping)
|
|
count += 1
|
|
|
|
return count
|
|
|
|
def seed_service_modules_only(self) -> int:
|
|
"""Seed only service modules (useful for incremental updates)."""
|
|
results = {
|
|
"service_modules": 0,
|
|
"module_regulation_mappings": 0,
|
|
}
|
|
|
|
# Ensure regulations are loaded first
|
|
if not self._regulation_map:
|
|
self._seed_regulations()
|
|
|
|
results["service_modules"] = self._seed_service_modules()
|
|
results["module_regulation_mappings"] = self._seed_module_regulation_mappings()
|
|
|
|
self.db.commit()
|
|
logger.info(f"Service modules seeding completed: {results}")
|
|
return results["service_modules"] + results["module_regulation_mappings"]
|
|
|
|
def _seed_soa(self) -> int:
|
|
"""
|
|
Seed Statement of Applicability (SoA) entries from ISO 27001:2022 Annex A.
|
|
|
|
Creates SoA entries for all 93 Annex A controls.
|
|
This is MANDATORY for ISO 27001 certification.
|
|
"""
|
|
count = 0
|
|
for annex_control in ISO27001_ANNEX_A_CONTROLS:
|
|
control_id = annex_control["control_id"]
|
|
|
|
# Check if SoA entry already exists
|
|
existing = self.db.query(StatementOfApplicabilityDB).filter(
|
|
StatementOfApplicabilityDB.annex_a_control == control_id
|
|
).first()
|
|
|
|
if existing:
|
|
continue
|
|
|
|
# Create SoA entry
|
|
soa_entry = StatementOfApplicabilityDB(
|
|
annex_a_control=control_id,
|
|
annex_a_title=annex_control["title"],
|
|
annex_a_category=annex_control["category"],
|
|
is_applicable=annex_control.get("default_applicable", True),
|
|
applicability_justification=annex_control.get("description", ""),
|
|
implementation_status="planned",
|
|
implementation_notes=annex_control.get("implementation_guidance", ""),
|
|
breakpilot_control_ids=annex_control.get("breakpilot_controls", []),
|
|
evidence_description="",
|
|
risk_assessment_notes="",
|
|
)
|
|
self.db.add(soa_entry)
|
|
count += 1
|
|
|
|
logger.info(f"Seeded {count} SoA entries from ISO 27001:2022 Annex A")
|
|
return count
|
|
|
|
def seed_soa_only(self) -> int:
|
|
"""
|
|
Seed only SoA entries (useful for incremental updates).
|
|
|
|
Creates all 93 ISO 27001:2022 Annex A control entries in the SoA.
|
|
"""
|
|
count = self._seed_soa()
|
|
self.db.commit()
|
|
logger.info(f"SoA seeding completed: {count} entries")
|
|
return count
|