""" Compliance Seeder Service. Seeds the database with initial regulations, controls, and requirements. """ import logging from typing import Dict, List, Optional from datetime import datetime from sqlalchemy.orm import Session from ..db.models import ( RegulationDB, RequirementDB, ControlDB, ControlMappingDB, RiskDB, ServiceModuleDB, ModuleRegulationMappingDB, StatementOfApplicabilityDB, RegulationTypeEnum, ControlTypeEnum, ControlDomainEnum, ControlStatusEnum, RiskLevelEnum, ServiceTypeEnum, RelevanceLevelEnum, ) from ..data.regulations import REGULATIONS_SEED from ..data.controls import CONTROLS_SEED from ..data.requirements import REQUIREMENTS_SEED from ..data.risks import RISKS_SEED from ..data.service_modules import BREAKPILOT_SERVICES from ..data.iso27001_annex_a import ISO27001_ANNEX_A_CONTROLS logger = logging.getLogger(__name__) class ComplianceSeeder: """Seeds the compliance database with initial data.""" def __init__(self, db: Session): self.db = db self._regulation_map: Dict[str, str] = {} # code -> id self._module_map: Dict[str, str] = {} # name -> id def seed_all(self, force: bool = False) -> Dict[str, int]: """ Seed all compliance data. Args: force: If True, re-seed even if data exists Returns: Dictionary with counts of seeded items """ results = { "regulations": 0, "controls": 0, "requirements": 0, "mappings": 0, "risks": 0, "service_modules": 0, "module_regulation_mappings": 0, "soa_entries": 0, } # Check if already seeded existing_regulations = self.db.query(RegulationDB).count() if existing_regulations > 0 and not force: logger.info(f"Database already has {existing_regulations} regulations, skipping seed") return results try: # Seed in order (regulations first, then controls, then requirements, then risks, then service modules) results["regulations"] = self._seed_regulations() results["controls"] = self._seed_controls() results["requirements"] = self._seed_requirements() results["mappings"] = self._seed_default_mappings() results["risks"] = self._seed_risks() results["service_modules"] = self._seed_service_modules() results["module_regulation_mappings"] = self._seed_module_regulation_mappings() results["soa_entries"] = self._seed_soa() self.db.commit() logger.info(f"Seeding completed: {results}") return results except Exception as e: self.db.rollback() logger.error(f"Seeding failed: {e}") raise def _seed_regulations(self) -> int: """Seed regulations from REGULATIONS_SEED.""" count = 0 for reg_data in REGULATIONS_SEED: # Check if regulation already exists existing = self.db.query(RegulationDB).filter( RegulationDB.code == reg_data["code"] ).first() if existing: self._regulation_map[reg_data["code"]] = existing.id continue regulation = RegulationDB( code=reg_data["code"], name=reg_data["name"], full_name=reg_data.get("full_name"), regulation_type=RegulationTypeEnum(reg_data["regulation_type"]), source_url=reg_data.get("source_url"), local_pdf_path=reg_data.get("local_pdf_path"), effective_date=reg_data.get("effective_date"), description=reg_data.get("description"), is_active=reg_data.get("is_active", True), ) self.db.add(regulation) self.db.flush() # Get the ID self._regulation_map[reg_data["code"]] = regulation.id count += 1 return count def _seed_controls(self) -> int: """Seed controls from CONTROLS_SEED.""" count = 0 for ctrl_data in CONTROLS_SEED: # Check if control already exists existing = self.db.query(ControlDB).filter( ControlDB.control_id == ctrl_data["control_id"] ).first() if existing: continue control = ControlDB( control_id=ctrl_data["control_id"], domain=ControlDomainEnum(ctrl_data["domain"]), control_type=ControlTypeEnum(ctrl_data["control_type"]), title=ctrl_data["title"], description=ctrl_data.get("description"), pass_criteria=ctrl_data["pass_criteria"], implementation_guidance=ctrl_data.get("implementation_guidance"), code_reference=ctrl_data.get("code_reference"), is_automated=ctrl_data.get("is_automated", False), automation_tool=ctrl_data.get("automation_tool"), owner=ctrl_data.get("owner"), review_frequency_days=ctrl_data.get("review_frequency_days", 90), status=ControlStatusEnum.PLANNED, # All start as planned ) self.db.add(control) count += 1 return count def _seed_requirements(self) -> int: """Seed requirements from REQUIREMENTS_SEED.""" count = 0 for req_data in REQUIREMENTS_SEED: # Get regulation ID regulation_code = req_data["regulation_code"] regulation_id = self._regulation_map.get(regulation_code) if not regulation_id: # Try to find in database regulation = self.db.query(RegulationDB).filter( RegulationDB.code == regulation_code ).first() if regulation: regulation_id = regulation.id self._regulation_map[regulation_code] = regulation_id else: logger.warning(f"Regulation {regulation_code} not found, skipping requirement") continue # Check if requirement already exists existing = self.db.query(RequirementDB).filter( RequirementDB.regulation_id == regulation_id, RequirementDB.article == req_data["article"], RequirementDB.paragraph == req_data.get("paragraph"), ).first() if existing: continue requirement = RequirementDB( regulation_id=regulation_id, article=req_data["article"], paragraph=req_data.get("paragraph"), title=req_data["title"], description=req_data.get("description"), requirement_text=req_data.get("requirement_text"), breakpilot_interpretation=req_data.get("breakpilot_interpretation"), is_applicable=req_data.get("is_applicable", True), applicability_reason=req_data.get("applicability_reason"), priority=req_data.get("priority", 2), ) self.db.add(requirement) count += 1 return count def _seed_default_mappings(self) -> int: """Create default mappings between requirements and controls.""" # Define default mappings based on domain/regulation relationships mapping_rules = [ # GDPR Privacy mappings ("GDPR", "Art. 5", ["PRIV-001", "PRIV-003", "PRIV-006", "PRIV-007"]), ("GDPR", "Art. 25", ["PRIV-003", "PRIV-007"]), ("GDPR", "Art. 28", ["PRIV-005"]), ("GDPR", "Art. 30", ["PRIV-001"]), ("GDPR", "Art. 32", ["CRYPTO-001", "CRYPTO-002", "CRYPTO-003", "IAM-001", "OPS-002"]), ("GDPR", "Art. 35", ["PRIV-002", "AI-005"]), # AI Act mappings ("AIACT", "Art. 9", ["AI-001", "AI-004", "AI-005"]), ("AIACT", "Art. 13", ["AI-002", "AI-003"]), ("AIACT", "Art. 14", ["AI-003"]), ("AIACT", "Art. 15", ["AI-004", "SDLC-001", "SDLC-002"]), ("AIACT", "Art. 50", ["AI-002"]), # CRA mappings ("CRA", "Art. 10", ["SDLC-001", "SDLC-002", "SDLC-006"]), ("CRA", "Art. 11", ["GOV-005", "OPS-003"]), ("CRA", "Art. 13", ["CRA-001", "SDLC-005"]), ("CRA", "Art. 14", ["CRA-003", "OPS-004"]), ("CRA", "Art. 15", ["CRA-004"]), # BSI-TR mappings ("BSI-TR-03161-1", "O.Arch_1", ["GOV-001", "GOV-002", "GOV-004"]), ("BSI-TR-03161-1", "O.Auth_1", ["IAM-001", "IAM-002", "IAM-004"]), ("BSI-TR-03161-1", "O.Cryp_1", ["CRYPTO-001", "CRYPTO-002", "CRYPTO-003", "CRYPTO-004"]), ("BSI-TR-03161-1", "O.Data_1", ["CRYPTO-001", "CRYPTO-002", "PRIV-007"]), ("BSI-TR-03161-2", "O.Auth_2", ["IAM-004"]), ("BSI-TR-03161-2", "O.Source_1", ["SDLC-001", "SDLC-004"]), ("BSI-TR-03161-3", "O.Back_1", ["CRYPTO-002"]), ("BSI-TR-03161-3", "O.Ops_1", ["OPS-001", "OPS-002", "OPS-005"]), ] count = 0 for reg_code, article_prefix, control_ids in mapping_rules: # Find requirements matching this regulation and article requirements = self.db.query(RequirementDB).join(RegulationDB).filter( RegulationDB.code == reg_code, RequirementDB.article.like(f"{article_prefix}%"), ).all() for req in requirements: for control_id in control_ids: # Find control control = self.db.query(ControlDB).filter( ControlDB.control_id == control_id ).first() if not control: continue # Check if mapping exists existing = self.db.query(ControlMappingDB).filter( ControlMappingDB.requirement_id == req.id, ControlMappingDB.control_id == control.id, ).first() if existing: continue mapping = ControlMappingDB( requirement_id=req.id, control_id=control.id, coverage_level="full", ) self.db.add(mapping) count += 1 return count def seed_regulations_only(self) -> int: """Seed only regulations (useful for incremental updates).""" count = self._seed_regulations() self.db.commit() return count def seed_controls_only(self) -> int: """Seed only controls (useful for incremental updates).""" count = self._seed_controls() self.db.commit() return count def _seed_risks(self) -> int: """Seed risks from RISKS_SEED.""" count = 0 for risk_data in RISKS_SEED: # Check if risk already exists existing = self.db.query(RiskDB).filter( RiskDB.risk_id == risk_data["risk_id"] ).first() if existing: continue # Calculate inherent risk level inherent_risk = RiskDB.calculate_risk_level( risk_data["likelihood"], risk_data["impact"] ) risk = RiskDB( risk_id=risk_data["risk_id"], title=risk_data["title"], description=risk_data.get("description"), category=risk_data["category"], likelihood=risk_data["likelihood"], impact=risk_data["impact"], inherent_risk=inherent_risk, mitigating_controls=risk_data.get("mitigating_controls", []), owner=risk_data.get("owner"), treatment_plan=risk_data.get("treatment_plan"), status="open", ) self.db.add(risk) count += 1 return count def seed_risks_only(self) -> int: """Seed only risks (useful for incremental updates).""" count = self._seed_risks() self.db.commit() return count def _seed_service_modules(self) -> int: """Seed service modules from BREAKPILOT_SERVICES.""" count = 0 for service_data in BREAKPILOT_SERVICES: # Check if service already exists existing = self.db.query(ServiceModuleDB).filter( ServiceModuleDB.name == service_data["name"] ).first() if existing: self._module_map[service_data["name"]] = existing.id continue module = ServiceModuleDB( name=service_data["name"], display_name=service_data["display_name"], description=service_data.get("description"), service_type=ServiceTypeEnum(service_data["service_type"]), port=service_data.get("port"), technology_stack=service_data.get("technology_stack", []), repository_path=service_data.get("repository_path"), docker_image=service_data.get("docker_image"), data_categories=service_data.get("data_categories", []), processes_pii=service_data.get("processes_pii", False), processes_health_data=service_data.get("processes_health_data", False), ai_components=service_data.get("ai_components", False), is_active=True, criticality=service_data.get("criticality", "medium"), owner_team=service_data.get("owner_team"), ) self.db.add(module) self.db.flush() # Get the ID self._module_map[service_data["name"]] = module.id count += 1 return count def _seed_module_regulation_mappings(self) -> int: """Create mappings between service modules and regulations.""" count = 0 for service_data in BREAKPILOT_SERVICES: # Get module ID module_id = self._module_map.get(service_data["name"]) if not module_id: # Try to find in database module = self.db.query(ServiceModuleDB).filter( ServiceModuleDB.name == service_data["name"] ).first() if module: module_id = module.id self._module_map[service_data["name"]] = module_id else: logger.warning(f"Module {service_data['name']} not found, skipping regulation mappings") continue # Process regulation mappings regulations = service_data.get("regulations", []) for reg_mapping in regulations: # Find regulation by code regulation_code = reg_mapping["code"] regulation_id = self._regulation_map.get(regulation_code) if not regulation_id: regulation = self.db.query(RegulationDB).filter( RegulationDB.code == regulation_code ).first() if regulation: regulation_id = regulation.id self._regulation_map[regulation_code] = regulation_id else: logger.warning(f"Regulation {regulation_code} not found, skipping mapping for {service_data['name']}") continue # Check if mapping exists existing = self.db.query(ModuleRegulationMappingDB).filter( ModuleRegulationMappingDB.module_id == module_id, ModuleRegulationMappingDB.regulation_id == regulation_id, ).first() if existing: continue mapping = ModuleRegulationMappingDB( module_id=module_id, regulation_id=regulation_id, relevance_level=RelevanceLevelEnum(reg_mapping["relevance"]), notes=reg_mapping.get("notes"), ) self.db.add(mapping) count += 1 return count def seed_service_modules_only(self) -> int: """Seed only service modules (useful for incremental updates).""" results = { "service_modules": 0, "module_regulation_mappings": 0, } # Ensure regulations are loaded first if not self._regulation_map: self._seed_regulations() results["service_modules"] = self._seed_service_modules() results["module_regulation_mappings"] = self._seed_module_regulation_mappings() self.db.commit() logger.info(f"Service modules seeding completed: {results}") return results["service_modules"] + results["module_regulation_mappings"] def _seed_soa(self) -> int: """ Seed Statement of Applicability (SoA) entries from ISO 27001:2022 Annex A. Creates SoA entries for all 93 Annex A controls. This is MANDATORY for ISO 27001 certification. """ count = 0 for annex_control in ISO27001_ANNEX_A_CONTROLS: control_id = annex_control["control_id"] # Check if SoA entry already exists existing = self.db.query(StatementOfApplicabilityDB).filter( StatementOfApplicabilityDB.annex_a_control == control_id ).first() if existing: continue # Create SoA entry soa_entry = StatementOfApplicabilityDB( annex_a_control=control_id, annex_a_title=annex_control["title"], annex_a_category=annex_control["category"], is_applicable=annex_control.get("default_applicable", True), applicability_justification=annex_control.get("description", ""), implementation_status="planned", implementation_notes=annex_control.get("implementation_guidance", ""), breakpilot_control_ids=annex_control.get("breakpilot_controls", []), evidence_description="", risk_assessment_notes="", ) self.db.add(soa_entry) count += 1 logger.info(f"Seeded {count} SoA entries from ISO 27001:2022 Annex A") return count def seed_soa_only(self) -> int: """ Seed only SoA entries (useful for incremental updates). Creates all 93 ISO 27001:2022 Annex A control entries in the SoA. """ count = self._seed_soa() self.db.commit() logger.info(f"SoA seeding completed: {count} entries") return count