fix: Restore all files lost during destructive rebase
A previous `git pull --rebase origin main` dropped 177 local commits,
losing 3400+ files across admin-v2, backend, studio-v2, website,
klausur-service, and many other services. The partial restore attempt
(660295e2) only recovered some files.
This commit restores all missing files from pre-rebase ref 98933f5e
while preserving post-rebase additions (night-scheduler, night-mode UI,
NightModeWidget dashboard integration).
Restored features include:
- AI Module Sidebar (FAB), OCR Labeling, OCR Compare
- GPU Dashboard, RAG Pipeline, Magic Help
- Klausur-Korrektur (8 files), Abitur-Archiv (5+ files)
- Companion, Zeugnisse-Crawler, Screen Flow
- Full backend, studio-v2, website, klausur-service
- All compliance SDKs, agent-core, voice-service
- CI/CD configs, documentation, scripts
Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
488
backend/compliance/services/seeder.py
Normal file
488
backend/compliance/services/seeder.py
Normal file
@@ -0,0 +1,488 @@
|
||||
"""
|
||||
Compliance Seeder Service.
|
||||
|
||||
Seeds the database with initial regulations, controls, and requirements.
|
||||
"""
|
||||
|
||||
import logging
|
||||
from typing import Dict, List, Optional
|
||||
from datetime import datetime
|
||||
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from ..db.models import (
|
||||
RegulationDB,
|
||||
RequirementDB,
|
||||
ControlDB,
|
||||
ControlMappingDB,
|
||||
RiskDB,
|
||||
ServiceModuleDB,
|
||||
ModuleRegulationMappingDB,
|
||||
StatementOfApplicabilityDB,
|
||||
RegulationTypeEnum,
|
||||
ControlTypeEnum,
|
||||
ControlDomainEnum,
|
||||
ControlStatusEnum,
|
||||
RiskLevelEnum,
|
||||
ServiceTypeEnum,
|
||||
RelevanceLevelEnum,
|
||||
)
|
||||
from ..data.regulations import REGULATIONS_SEED
|
||||
from ..data.controls import CONTROLS_SEED
|
||||
from ..data.requirements import REQUIREMENTS_SEED
|
||||
from ..data.risks import RISKS_SEED
|
||||
from ..data.service_modules import BREAKPILOT_SERVICES
|
||||
from ..data.iso27001_annex_a import ISO27001_ANNEX_A_CONTROLS
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class ComplianceSeeder:
|
||||
"""Seeds the compliance database with initial data."""
|
||||
|
||||
def __init__(self, db: Session):
|
||||
self.db = db
|
||||
self._regulation_map: Dict[str, str] = {} # code -> id
|
||||
self._module_map: Dict[str, str] = {} # name -> id
|
||||
|
||||
def seed_all(self, force: bool = False) -> Dict[str, int]:
|
||||
"""
|
||||
Seed all compliance data.
|
||||
|
||||
Args:
|
||||
force: If True, re-seed even if data exists
|
||||
|
||||
Returns:
|
||||
Dictionary with counts of seeded items
|
||||
"""
|
||||
results = {
|
||||
"regulations": 0,
|
||||
"controls": 0,
|
||||
"requirements": 0,
|
||||
"mappings": 0,
|
||||
"risks": 0,
|
||||
"service_modules": 0,
|
||||
"module_regulation_mappings": 0,
|
||||
"soa_entries": 0,
|
||||
}
|
||||
|
||||
# Check if already seeded
|
||||
existing_regulations = self.db.query(RegulationDB).count()
|
||||
if existing_regulations > 0 and not force:
|
||||
logger.info(f"Database already has {existing_regulations} regulations, skipping seed")
|
||||
return results
|
||||
|
||||
try:
|
||||
# Seed in order (regulations first, then controls, then requirements, then risks, then service modules)
|
||||
results["regulations"] = self._seed_regulations()
|
||||
results["controls"] = self._seed_controls()
|
||||
results["requirements"] = self._seed_requirements()
|
||||
results["mappings"] = self._seed_default_mappings()
|
||||
results["risks"] = self._seed_risks()
|
||||
results["service_modules"] = self._seed_service_modules()
|
||||
results["module_regulation_mappings"] = self._seed_module_regulation_mappings()
|
||||
results["soa_entries"] = self._seed_soa()
|
||||
|
||||
self.db.commit()
|
||||
logger.info(f"Seeding completed: {results}")
|
||||
return results
|
||||
|
||||
except Exception as e:
|
||||
self.db.rollback()
|
||||
logger.error(f"Seeding failed: {e}")
|
||||
raise
|
||||
|
||||
def _seed_regulations(self) -> int:
|
||||
"""Seed regulations from REGULATIONS_SEED."""
|
||||
count = 0
|
||||
for reg_data in REGULATIONS_SEED:
|
||||
# Check if regulation already exists
|
||||
existing = self.db.query(RegulationDB).filter(
|
||||
RegulationDB.code == reg_data["code"]
|
||||
).first()
|
||||
|
||||
if existing:
|
||||
self._regulation_map[reg_data["code"]] = existing.id
|
||||
continue
|
||||
|
||||
regulation = RegulationDB(
|
||||
code=reg_data["code"],
|
||||
name=reg_data["name"],
|
||||
full_name=reg_data.get("full_name"),
|
||||
regulation_type=RegulationTypeEnum(reg_data["regulation_type"]),
|
||||
source_url=reg_data.get("source_url"),
|
||||
local_pdf_path=reg_data.get("local_pdf_path"),
|
||||
effective_date=reg_data.get("effective_date"),
|
||||
description=reg_data.get("description"),
|
||||
is_active=reg_data.get("is_active", True),
|
||||
)
|
||||
self.db.add(regulation)
|
||||
self.db.flush() # Get the ID
|
||||
self._regulation_map[reg_data["code"]] = regulation.id
|
||||
count += 1
|
||||
|
||||
return count
|
||||
|
||||
def _seed_controls(self) -> int:
|
||||
"""Seed controls from CONTROLS_SEED."""
|
||||
count = 0
|
||||
for ctrl_data in CONTROLS_SEED:
|
||||
# Check if control already exists
|
||||
existing = self.db.query(ControlDB).filter(
|
||||
ControlDB.control_id == ctrl_data["control_id"]
|
||||
).first()
|
||||
|
||||
if existing:
|
||||
continue
|
||||
|
||||
control = ControlDB(
|
||||
control_id=ctrl_data["control_id"],
|
||||
domain=ControlDomainEnum(ctrl_data["domain"]),
|
||||
control_type=ControlTypeEnum(ctrl_data["control_type"]),
|
||||
title=ctrl_data["title"],
|
||||
description=ctrl_data.get("description"),
|
||||
pass_criteria=ctrl_data["pass_criteria"],
|
||||
implementation_guidance=ctrl_data.get("implementation_guidance"),
|
||||
code_reference=ctrl_data.get("code_reference"),
|
||||
is_automated=ctrl_data.get("is_automated", False),
|
||||
automation_tool=ctrl_data.get("automation_tool"),
|
||||
owner=ctrl_data.get("owner"),
|
||||
review_frequency_days=ctrl_data.get("review_frequency_days", 90),
|
||||
status=ControlStatusEnum.PLANNED, # All start as planned
|
||||
)
|
||||
self.db.add(control)
|
||||
count += 1
|
||||
|
||||
return count
|
||||
|
||||
def _seed_requirements(self) -> int:
|
||||
"""Seed requirements from REQUIREMENTS_SEED."""
|
||||
count = 0
|
||||
for req_data in REQUIREMENTS_SEED:
|
||||
# Get regulation ID
|
||||
regulation_code = req_data["regulation_code"]
|
||||
regulation_id = self._regulation_map.get(regulation_code)
|
||||
|
||||
if not regulation_id:
|
||||
# Try to find in database
|
||||
regulation = self.db.query(RegulationDB).filter(
|
||||
RegulationDB.code == regulation_code
|
||||
).first()
|
||||
if regulation:
|
||||
regulation_id = regulation.id
|
||||
self._regulation_map[regulation_code] = regulation_id
|
||||
else:
|
||||
logger.warning(f"Regulation {regulation_code} not found, skipping requirement")
|
||||
continue
|
||||
|
||||
# Check if requirement already exists
|
||||
existing = self.db.query(RequirementDB).filter(
|
||||
RequirementDB.regulation_id == regulation_id,
|
||||
RequirementDB.article == req_data["article"],
|
||||
RequirementDB.paragraph == req_data.get("paragraph"),
|
||||
).first()
|
||||
|
||||
if existing:
|
||||
continue
|
||||
|
||||
requirement = RequirementDB(
|
||||
regulation_id=regulation_id,
|
||||
article=req_data["article"],
|
||||
paragraph=req_data.get("paragraph"),
|
||||
title=req_data["title"],
|
||||
description=req_data.get("description"),
|
||||
requirement_text=req_data.get("requirement_text"),
|
||||
breakpilot_interpretation=req_data.get("breakpilot_interpretation"),
|
||||
is_applicable=req_data.get("is_applicable", True),
|
||||
applicability_reason=req_data.get("applicability_reason"),
|
||||
priority=req_data.get("priority", 2),
|
||||
)
|
||||
self.db.add(requirement)
|
||||
count += 1
|
||||
|
||||
return count
|
||||
|
||||
def _seed_default_mappings(self) -> int:
|
||||
"""Create default mappings between requirements and controls."""
|
||||
# Define default mappings based on domain/regulation relationships
|
||||
mapping_rules = [
|
||||
# GDPR Privacy mappings
|
||||
("GDPR", "Art. 5", ["PRIV-001", "PRIV-003", "PRIV-006", "PRIV-007"]),
|
||||
("GDPR", "Art. 25", ["PRIV-003", "PRIV-007"]),
|
||||
("GDPR", "Art. 28", ["PRIV-005"]),
|
||||
("GDPR", "Art. 30", ["PRIV-001"]),
|
||||
("GDPR", "Art. 32", ["CRYPTO-001", "CRYPTO-002", "CRYPTO-003", "IAM-001", "OPS-002"]),
|
||||
("GDPR", "Art. 35", ["PRIV-002", "AI-005"]),
|
||||
# AI Act mappings
|
||||
("AIACT", "Art. 9", ["AI-001", "AI-004", "AI-005"]),
|
||||
("AIACT", "Art. 13", ["AI-002", "AI-003"]),
|
||||
("AIACT", "Art. 14", ["AI-003"]),
|
||||
("AIACT", "Art. 15", ["AI-004", "SDLC-001", "SDLC-002"]),
|
||||
("AIACT", "Art. 50", ["AI-002"]),
|
||||
# CRA mappings
|
||||
("CRA", "Art. 10", ["SDLC-001", "SDLC-002", "SDLC-006"]),
|
||||
("CRA", "Art. 11", ["GOV-005", "OPS-003"]),
|
||||
("CRA", "Art. 13", ["CRA-001", "SDLC-005"]),
|
||||
("CRA", "Art. 14", ["CRA-003", "OPS-004"]),
|
||||
("CRA", "Art. 15", ["CRA-004"]),
|
||||
# BSI-TR mappings
|
||||
("BSI-TR-03161-1", "O.Arch_1", ["GOV-001", "GOV-002", "GOV-004"]),
|
||||
("BSI-TR-03161-1", "O.Auth_1", ["IAM-001", "IAM-002", "IAM-004"]),
|
||||
("BSI-TR-03161-1", "O.Cryp_1", ["CRYPTO-001", "CRYPTO-002", "CRYPTO-003", "CRYPTO-004"]),
|
||||
("BSI-TR-03161-1", "O.Data_1", ["CRYPTO-001", "CRYPTO-002", "PRIV-007"]),
|
||||
("BSI-TR-03161-2", "O.Auth_2", ["IAM-004"]),
|
||||
("BSI-TR-03161-2", "O.Source_1", ["SDLC-001", "SDLC-004"]),
|
||||
("BSI-TR-03161-3", "O.Back_1", ["CRYPTO-002"]),
|
||||
("BSI-TR-03161-3", "O.Ops_1", ["OPS-001", "OPS-002", "OPS-005"]),
|
||||
]
|
||||
|
||||
count = 0
|
||||
for reg_code, article_prefix, control_ids in mapping_rules:
|
||||
# Find requirements matching this regulation and article
|
||||
requirements = self.db.query(RequirementDB).join(RegulationDB).filter(
|
||||
RegulationDB.code == reg_code,
|
||||
RequirementDB.article.like(f"{article_prefix}%"),
|
||||
).all()
|
||||
|
||||
for req in requirements:
|
||||
for control_id in control_ids:
|
||||
# Find control
|
||||
control = self.db.query(ControlDB).filter(
|
||||
ControlDB.control_id == control_id
|
||||
).first()
|
||||
|
||||
if not control:
|
||||
continue
|
||||
|
||||
# Check if mapping exists
|
||||
existing = self.db.query(ControlMappingDB).filter(
|
||||
ControlMappingDB.requirement_id == req.id,
|
||||
ControlMappingDB.control_id == control.id,
|
||||
).first()
|
||||
|
||||
if existing:
|
||||
continue
|
||||
|
||||
mapping = ControlMappingDB(
|
||||
requirement_id=req.id,
|
||||
control_id=control.id,
|
||||
coverage_level="full",
|
||||
)
|
||||
self.db.add(mapping)
|
||||
count += 1
|
||||
|
||||
return count
|
||||
|
||||
def seed_regulations_only(self) -> int:
|
||||
"""Seed only regulations (useful for incremental updates)."""
|
||||
count = self._seed_regulations()
|
||||
self.db.commit()
|
||||
return count
|
||||
|
||||
def seed_controls_only(self) -> int:
|
||||
"""Seed only controls (useful for incremental updates)."""
|
||||
count = self._seed_controls()
|
||||
self.db.commit()
|
||||
return count
|
||||
|
||||
def _seed_risks(self) -> int:
|
||||
"""Seed risks from RISKS_SEED."""
|
||||
count = 0
|
||||
for risk_data in RISKS_SEED:
|
||||
# Check if risk already exists
|
||||
existing = self.db.query(RiskDB).filter(
|
||||
RiskDB.risk_id == risk_data["risk_id"]
|
||||
).first()
|
||||
|
||||
if existing:
|
||||
continue
|
||||
|
||||
# Calculate inherent risk level
|
||||
inherent_risk = RiskDB.calculate_risk_level(
|
||||
risk_data["likelihood"],
|
||||
risk_data["impact"]
|
||||
)
|
||||
|
||||
risk = RiskDB(
|
||||
risk_id=risk_data["risk_id"],
|
||||
title=risk_data["title"],
|
||||
description=risk_data.get("description"),
|
||||
category=risk_data["category"],
|
||||
likelihood=risk_data["likelihood"],
|
||||
impact=risk_data["impact"],
|
||||
inherent_risk=inherent_risk,
|
||||
mitigating_controls=risk_data.get("mitigating_controls", []),
|
||||
owner=risk_data.get("owner"),
|
||||
treatment_plan=risk_data.get("treatment_plan"),
|
||||
status="open",
|
||||
)
|
||||
self.db.add(risk)
|
||||
count += 1
|
||||
|
||||
return count
|
||||
|
||||
def seed_risks_only(self) -> int:
|
||||
"""Seed only risks (useful for incremental updates)."""
|
||||
count = self._seed_risks()
|
||||
self.db.commit()
|
||||
return count
|
||||
|
||||
def _seed_service_modules(self) -> int:
|
||||
"""Seed service modules from BREAKPILOT_SERVICES."""
|
||||
count = 0
|
||||
for service_data in BREAKPILOT_SERVICES:
|
||||
# Check if service already exists
|
||||
existing = self.db.query(ServiceModuleDB).filter(
|
||||
ServiceModuleDB.name == service_data["name"]
|
||||
).first()
|
||||
|
||||
if existing:
|
||||
self._module_map[service_data["name"]] = existing.id
|
||||
continue
|
||||
|
||||
module = ServiceModuleDB(
|
||||
name=service_data["name"],
|
||||
display_name=service_data["display_name"],
|
||||
description=service_data.get("description"),
|
||||
service_type=ServiceTypeEnum(service_data["service_type"]),
|
||||
port=service_data.get("port"),
|
||||
technology_stack=service_data.get("technology_stack", []),
|
||||
repository_path=service_data.get("repository_path"),
|
||||
docker_image=service_data.get("docker_image"),
|
||||
data_categories=service_data.get("data_categories", []),
|
||||
processes_pii=service_data.get("processes_pii", False),
|
||||
processes_health_data=service_data.get("processes_health_data", False),
|
||||
ai_components=service_data.get("ai_components", False),
|
||||
is_active=True,
|
||||
criticality=service_data.get("criticality", "medium"),
|
||||
owner_team=service_data.get("owner_team"),
|
||||
)
|
||||
self.db.add(module)
|
||||
self.db.flush() # Get the ID
|
||||
self._module_map[service_data["name"]] = module.id
|
||||
count += 1
|
||||
|
||||
return count
|
||||
|
||||
def _seed_module_regulation_mappings(self) -> int:
|
||||
"""Create mappings between service modules and regulations."""
|
||||
count = 0
|
||||
for service_data in BREAKPILOT_SERVICES:
|
||||
# Get module ID
|
||||
module_id = self._module_map.get(service_data["name"])
|
||||
if not module_id:
|
||||
# Try to find in database
|
||||
module = self.db.query(ServiceModuleDB).filter(
|
||||
ServiceModuleDB.name == service_data["name"]
|
||||
).first()
|
||||
if module:
|
||||
module_id = module.id
|
||||
self._module_map[service_data["name"]] = module_id
|
||||
else:
|
||||
logger.warning(f"Module {service_data['name']} not found, skipping regulation mappings")
|
||||
continue
|
||||
|
||||
# Process regulation mappings
|
||||
regulations = service_data.get("regulations", [])
|
||||
for reg_mapping in regulations:
|
||||
# Find regulation by code
|
||||
regulation_code = reg_mapping["code"]
|
||||
regulation_id = self._regulation_map.get(regulation_code)
|
||||
|
||||
if not regulation_id:
|
||||
regulation = self.db.query(RegulationDB).filter(
|
||||
RegulationDB.code == regulation_code
|
||||
).first()
|
||||
if regulation:
|
||||
regulation_id = regulation.id
|
||||
self._regulation_map[regulation_code] = regulation_id
|
||||
else:
|
||||
logger.warning(f"Regulation {regulation_code} not found, skipping mapping for {service_data['name']}")
|
||||
continue
|
||||
|
||||
# Check if mapping exists
|
||||
existing = self.db.query(ModuleRegulationMappingDB).filter(
|
||||
ModuleRegulationMappingDB.module_id == module_id,
|
||||
ModuleRegulationMappingDB.regulation_id == regulation_id,
|
||||
).first()
|
||||
|
||||
if existing:
|
||||
continue
|
||||
|
||||
mapping = ModuleRegulationMappingDB(
|
||||
module_id=module_id,
|
||||
regulation_id=regulation_id,
|
||||
relevance_level=RelevanceLevelEnum(reg_mapping["relevance"]),
|
||||
notes=reg_mapping.get("notes"),
|
||||
)
|
||||
self.db.add(mapping)
|
||||
count += 1
|
||||
|
||||
return count
|
||||
|
||||
def seed_service_modules_only(self) -> int:
|
||||
"""Seed only service modules (useful for incremental updates)."""
|
||||
results = {
|
||||
"service_modules": 0,
|
||||
"module_regulation_mappings": 0,
|
||||
}
|
||||
|
||||
# Ensure regulations are loaded first
|
||||
if not self._regulation_map:
|
||||
self._seed_regulations()
|
||||
|
||||
results["service_modules"] = self._seed_service_modules()
|
||||
results["module_regulation_mappings"] = self._seed_module_regulation_mappings()
|
||||
|
||||
self.db.commit()
|
||||
logger.info(f"Service modules seeding completed: {results}")
|
||||
return results["service_modules"] + results["module_regulation_mappings"]
|
||||
|
||||
def _seed_soa(self) -> int:
|
||||
"""
|
||||
Seed Statement of Applicability (SoA) entries from ISO 27001:2022 Annex A.
|
||||
|
||||
Creates SoA entries for all 93 Annex A controls.
|
||||
This is MANDATORY for ISO 27001 certification.
|
||||
"""
|
||||
count = 0
|
||||
for annex_control in ISO27001_ANNEX_A_CONTROLS:
|
||||
control_id = annex_control["control_id"]
|
||||
|
||||
# Check if SoA entry already exists
|
||||
existing = self.db.query(StatementOfApplicabilityDB).filter(
|
||||
StatementOfApplicabilityDB.annex_a_control == control_id
|
||||
).first()
|
||||
|
||||
if existing:
|
||||
continue
|
||||
|
||||
# Create SoA entry
|
||||
soa_entry = StatementOfApplicabilityDB(
|
||||
annex_a_control=control_id,
|
||||
annex_a_title=annex_control["title"],
|
||||
annex_a_category=annex_control["category"],
|
||||
is_applicable=annex_control.get("default_applicable", True),
|
||||
applicability_justification=annex_control.get("description", ""),
|
||||
implementation_status="planned",
|
||||
implementation_notes=annex_control.get("implementation_guidance", ""),
|
||||
breakpilot_control_ids=annex_control.get("breakpilot_controls", []),
|
||||
evidence_description="",
|
||||
risk_assessment_notes="",
|
||||
)
|
||||
self.db.add(soa_entry)
|
||||
count += 1
|
||||
|
||||
logger.info(f"Seeded {count} SoA entries from ISO 27001:2022 Annex A")
|
||||
return count
|
||||
|
||||
def seed_soa_only(self) -> int:
|
||||
"""
|
||||
Seed only SoA entries (useful for incremental updates).
|
||||
|
||||
Creates all 93 ISO 27001:2022 Annex A control entries in the SoA.
|
||||
"""
|
||||
count = self._seed_soa()
|
||||
self.db.commit()
|
||||
logger.info(f"SoA seeding completed: {count} entries")
|
||||
return count
|
||||
Reference in New Issue
Block a user