""" Compliance Seeder Service. Seeds the database with initial regulations, controls, and requirements. """ import logging from typing import Dict from sqlalchemy.orm import Session from ..db.models import ( RegulationDB, RequirementDB, ControlDB, ControlMappingDB, RiskDB, ServiceModuleDB, ModuleRegulationMappingDB, StatementOfApplicabilityDB, RegulationTypeEnum, ControlTypeEnum, ControlDomainEnum, ControlStatusEnum, ServiceTypeEnum, RelevanceLevelEnum, ) from ..data.regulations import REGULATIONS_SEED from ..data.controls import CONTROLS_SEED from ..data.requirements import REQUIREMENTS_SEED from ..data.risks import RISKS_SEED from ..data.service_modules import BREAKPILOT_SERVICES from ..data.iso27001_annex_a import ISO27001_ANNEX_A_CONTROLS logger = logging.getLogger(__name__) class ComplianceSeeder: """Seeds the compliance database with initial data.""" def __init__(self, db: Session): self.db = db self._regulation_map: Dict[str, str] = {} # code -> id self._module_map: Dict[str, str] = {} # name -> id def seed_all(self, force: bool = False) -> Dict[str, int]: """ Seed all compliance data. Args: force: If True, re-seed even if data exists Returns: Dictionary with counts of seeded items """ results = { "regulations": 0, "controls": 0, "requirements": 0, "mappings": 0, "risks": 0, "service_modules": 0, "module_regulation_mappings": 0, "soa_entries": 0, } # Check if already seeded existing_regulations = self.db.query(RegulationDB).count() if existing_regulations > 0 and not force: logger.info(f"Database already has {existing_regulations} regulations, skipping seed") return results try: # Seed in order (regulations first, then controls, then requirements, then risks, then service modules) results["regulations"] = self._seed_regulations() results["controls"] = self._seed_controls() results["requirements"] = self._seed_requirements() results["mappings"] = self._seed_default_mappings() results["risks"] = self._seed_risks() results["service_modules"] = self._seed_service_modules() results["module_regulation_mappings"] = self._seed_module_regulation_mappings() results["soa_entries"] = self._seed_soa() self.db.commit() logger.info(f"Seeding completed: {results}") return results except Exception as e: self.db.rollback() logger.error(f"Seeding failed: {e}") raise def _seed_regulations(self) -> int: """Seed regulations from REGULATIONS_SEED.""" count = 0 for reg_data in REGULATIONS_SEED: # Check if regulation already exists existing = self.db.query(RegulationDB).filter( RegulationDB.code == reg_data["code"] ).first() if existing: self._regulation_map[reg_data["code"]] = existing.id continue regulation = RegulationDB( code=reg_data["code"], name=reg_data["name"], full_name=reg_data.get("full_name"), regulation_type=RegulationTypeEnum(reg_data["regulation_type"]), source_url=reg_data.get("source_url"), local_pdf_path=reg_data.get("local_pdf_path"), effective_date=reg_data.get("effective_date"), description=reg_data.get("description"), is_active=reg_data.get("is_active", True), ) self.db.add(regulation) self.db.flush() # Get the ID self._regulation_map[reg_data["code"]] = regulation.id count += 1 return count def _seed_controls(self) -> int: """Seed controls from CONTROLS_SEED.""" count = 0 for ctrl_data in CONTROLS_SEED: # Check if control already exists existing = self.db.query(ControlDB).filter( ControlDB.control_id == ctrl_data["control_id"] ).first() if existing: continue control = ControlDB( control_id=ctrl_data["control_id"], domain=ControlDomainEnum(ctrl_data["domain"]), control_type=ControlTypeEnum(ctrl_data["control_type"]), title=ctrl_data["title"], description=ctrl_data.get("description"), pass_criteria=ctrl_data["pass_criteria"], implementation_guidance=ctrl_data.get("implementation_guidance"), code_reference=ctrl_data.get("code_reference"), is_automated=ctrl_data.get("is_automated", False), automation_tool=ctrl_data.get("automation_tool"), owner=ctrl_data.get("owner"), review_frequency_days=ctrl_data.get("review_frequency_days", 90), status=ControlStatusEnum.PLANNED, # All start as planned ) self.db.add(control) count += 1 return count def _seed_requirements(self) -> int: """Seed requirements from REQUIREMENTS_SEED.""" count = 0 for req_data in REQUIREMENTS_SEED: # Get regulation ID regulation_code = req_data["regulation_code"] regulation_id = self._regulation_map.get(regulation_code) if not regulation_id: # Try to find in database regulation = self.db.query(RegulationDB).filter( RegulationDB.code == regulation_code ).first() if regulation: regulation_id = regulation.id self._regulation_map[regulation_code] = regulation_id else: logger.warning(f"Regulation {regulation_code} not found, skipping requirement") continue # Check if requirement already exists existing = self.db.query(RequirementDB).filter( RequirementDB.regulation_id == regulation_id, RequirementDB.article == req_data["article"], RequirementDB.paragraph == req_data.get("paragraph"), ).first() if existing: continue requirement = RequirementDB( regulation_id=regulation_id, article=req_data["article"], paragraph=req_data.get("paragraph"), title=req_data["title"], description=req_data.get("description"), requirement_text=req_data.get("requirement_text"), breakpilot_interpretation=req_data.get("breakpilot_interpretation"), is_applicable=req_data.get("is_applicable", True), applicability_reason=req_data.get("applicability_reason"), priority=req_data.get("priority", 2), ) self.db.add(requirement) count += 1 return count def _seed_default_mappings(self) -> int: """Create default mappings between requirements and controls.""" # Define default mappings based on domain/regulation relationships mapping_rules = [ # GDPR Privacy mappings ("GDPR", "Art. 5", ["PRIV-001", "PRIV-003", "PRIV-006", "PRIV-007"]), ("GDPR", "Art. 25", ["PRIV-003", "PRIV-007"]), ("GDPR", "Art. 28", ["PRIV-005"]), ("GDPR", "Art. 30", ["PRIV-001"]), ("GDPR", "Art. 32", ["CRYPTO-001", "CRYPTO-002", "CRYPTO-003", "IAM-001", "OPS-002"]), ("GDPR", "Art. 35", ["PRIV-002", "AI-005"]), # AI Act mappings ("AIACT", "Art. 9", ["AI-001", "AI-004", "AI-005"]), ("AIACT", "Art. 13", ["AI-002", "AI-003"]), ("AIACT", "Art. 14", ["AI-003"]), ("AIACT", "Art. 15", ["AI-004", "SDLC-001", "SDLC-002"]), ("AIACT", "Art. 50", ["AI-002"]), # CRA mappings ("CRA", "Art. 10", ["SDLC-001", "SDLC-002", "SDLC-006"]), ("CRA", "Art. 11", ["GOV-005", "OPS-003"]), ("CRA", "Art. 13", ["CRA-001", "SDLC-005"]), ("CRA", "Art. 14", ["CRA-003", "OPS-004"]), ("CRA", "Art. 15", ["CRA-004"]), # BSI-TR mappings ("BSI-TR-03161-1", "O.Arch_1", ["GOV-001", "GOV-002", "GOV-004"]), ("BSI-TR-03161-1", "O.Auth_1", ["IAM-001", "IAM-002", "IAM-004"]), ("BSI-TR-03161-1", "O.Cryp_1", ["CRYPTO-001", "CRYPTO-002", "CRYPTO-003", "CRYPTO-004"]), ("BSI-TR-03161-1", "O.Data_1", ["CRYPTO-001", "CRYPTO-002", "PRIV-007"]), ("BSI-TR-03161-2", "O.Auth_2", ["IAM-004"]), ("BSI-TR-03161-2", "O.Source_1", ["SDLC-001", "SDLC-004"]), ("BSI-TR-03161-3", "O.Back_1", ["CRYPTO-002"]), ("BSI-TR-03161-3", "O.Ops_1", ["OPS-001", "OPS-002", "OPS-005"]), ] count = 0 for reg_code, article_prefix, control_ids in mapping_rules: # Find requirements matching this regulation and article requirements = self.db.query(RequirementDB).join(RegulationDB).filter( RegulationDB.code == reg_code, RequirementDB.article.like(f"{article_prefix}%"), ).all() for req in requirements: for control_id in control_ids: # Find control control = self.db.query(ControlDB).filter( ControlDB.control_id == control_id ).first() if not control: continue # Check if mapping exists existing = self.db.query(ControlMappingDB).filter( ControlMappingDB.requirement_id == req.id, ControlMappingDB.control_id == control.id, ).first() if existing: continue mapping = ControlMappingDB( requirement_id=req.id, control_id=control.id, coverage_level="full", ) self.db.add(mapping) count += 1 return count def seed_regulations_only(self) -> int: """Seed only regulations (useful for incremental updates).""" count = self._seed_regulations() self.db.commit() return count def seed_controls_only(self) -> int: """Seed only controls (useful for incremental updates).""" count = self._seed_controls() self.db.commit() return count def _seed_risks(self) -> int: """Seed risks from RISKS_SEED.""" count = 0 for risk_data in RISKS_SEED: # Check if risk already exists existing = self.db.query(RiskDB).filter( RiskDB.risk_id == risk_data["risk_id"] ).first() if existing: continue # Calculate inherent risk level inherent_risk = RiskDB.calculate_risk_level( risk_data["likelihood"], risk_data["impact"] ) risk = RiskDB( risk_id=risk_data["risk_id"], title=risk_data["title"], description=risk_data.get("description"), category=risk_data["category"], likelihood=risk_data["likelihood"], impact=risk_data["impact"], inherent_risk=inherent_risk, mitigating_controls=risk_data.get("mitigating_controls", []), owner=risk_data.get("owner"), treatment_plan=risk_data.get("treatment_plan"), status="open", ) self.db.add(risk) count += 1 return count def seed_risks_only(self) -> int: """Seed only risks (useful for incremental updates).""" count = self._seed_risks() self.db.commit() return count def _seed_service_modules(self) -> int: """Seed service modules from BREAKPILOT_SERVICES.""" count = 0 for service_data in BREAKPILOT_SERVICES: # Check if service already exists existing = self.db.query(ServiceModuleDB).filter( ServiceModuleDB.name == service_data["name"] ).first() if existing: self._module_map[service_data["name"]] = existing.id continue module = ServiceModuleDB( name=service_data["name"], display_name=service_data["display_name"], description=service_data.get("description"), service_type=ServiceTypeEnum(service_data["service_type"]), port=service_data.get("port"), technology_stack=service_data.get("technology_stack", []), repository_path=service_data.get("repository_path"), docker_image=service_data.get("docker_image"), data_categories=service_data.get("data_categories", []), processes_pii=service_data.get("processes_pii", False), processes_health_data=service_data.get("processes_health_data", False), ai_components=service_data.get("ai_components", False), is_active=True, criticality=service_data.get("criticality", "medium"), owner_team=service_data.get("owner_team"), ) self.db.add(module) self.db.flush() # Get the ID self._module_map[service_data["name"]] = module.id count += 1 return count def _seed_module_regulation_mappings(self) -> int: """Create mappings between service modules and regulations.""" count = 0 for service_data in BREAKPILOT_SERVICES: # Get module ID module_id = self._module_map.get(service_data["name"]) if not module_id: # Try to find in database module = self.db.query(ServiceModuleDB).filter( ServiceModuleDB.name == service_data["name"] ).first() if module: module_id = module.id self._module_map[service_data["name"]] = module_id else: logger.warning(f"Module {service_data['name']} not found, skipping regulation mappings") continue # Process regulation mappings regulations = service_data.get("regulations", []) for reg_mapping in regulations: # Find regulation by code regulation_code = reg_mapping["code"] regulation_id = self._regulation_map.get(regulation_code) if not regulation_id: regulation = self.db.query(RegulationDB).filter( RegulationDB.code == regulation_code ).first() if regulation: regulation_id = regulation.id self._regulation_map[regulation_code] = regulation_id else: logger.warning(f"Regulation {regulation_code} not found, skipping mapping for {service_data['name']}") continue # Check if mapping exists existing = self.db.query(ModuleRegulationMappingDB).filter( ModuleRegulationMappingDB.module_id == module_id, ModuleRegulationMappingDB.regulation_id == regulation_id, ).first() if existing: continue mapping = ModuleRegulationMappingDB( module_id=module_id, regulation_id=regulation_id, relevance_level=RelevanceLevelEnum(reg_mapping["relevance"]), notes=reg_mapping.get("notes"), ) self.db.add(mapping) count += 1 return count def seed_service_modules_only(self) -> int: """Seed only service modules (useful for incremental updates).""" results = { "service_modules": 0, "module_regulation_mappings": 0, } # Ensure regulations are loaded first if not self._regulation_map: self._seed_regulations() results["service_modules"] = self._seed_service_modules() results["module_regulation_mappings"] = self._seed_module_regulation_mappings() self.db.commit() logger.info(f"Service modules seeding completed: {results}") return results["service_modules"] + results["module_regulation_mappings"] def _seed_soa(self) -> int: """ Seed Statement of Applicability (SoA) entries from ISO 27001:2022 Annex A. Creates SoA entries for all 93 Annex A controls. This is MANDATORY for ISO 27001 certification. """ count = 0 for annex_control in ISO27001_ANNEX_A_CONTROLS: control_id = annex_control["control_id"] # Check if SoA entry already exists existing = self.db.query(StatementOfApplicabilityDB).filter( StatementOfApplicabilityDB.annex_a_control == control_id ).first() if existing: continue # Create SoA entry soa_entry = StatementOfApplicabilityDB( annex_a_control=control_id, annex_a_title=annex_control["title"], annex_a_category=annex_control["category"], is_applicable=annex_control.get("default_applicable", True), applicability_justification=annex_control.get("description", ""), implementation_status="planned", implementation_notes=annex_control.get("implementation_guidance", ""), breakpilot_control_ids=annex_control.get("breakpilot_controls", []), evidence_description="", risk_assessment_notes="", ) self.db.add(soa_entry) count += 1 logger.info(f"Seeded {count} SoA entries from ISO 27001:2022 Annex A") return count def seed_soa_only(self) -> int: """ Seed only SoA entries (useful for incremental updates). Creates all 93 ISO 27001:2022 Annex A control entries in the SoA. """ count = self._seed_soa() self.db.commit() logger.info(f"SoA seeding completed: {count} entries") return count