Files
breakpilot-compliance/backend-compliance/compliance/db/models.py
Sharang Parnerkar 3702f70754
Some checks failed
CI / branch-name (pull_request) Successful in 3s
CI / guardrail-integrity (pull_request) Failing after 5s
CI / loc-budget (pull_request) Failing after 6s
CI / secret-scan (pull_request) Failing after 4s
CI / go-lint (pull_request) Failing after 2s
CI / python-lint (pull_request) Failing after 12s
CI / nodejs-lint (pull_request) Failing after 4s
CI / nodejs-build (pull_request) Failing after 3s
CI / dep-audit (pull_request) Failing after 10s
CI / sbom-scan (pull_request) Failing after 2s
CI / test-go (pull_request) Failing after 2s
CI / test-python-backend (pull_request) Failing after 10s
CI / test-python-document-crawler (pull_request) Failing after 9s
CI / test-python-dsms-gateway (pull_request) Failing after 10s
CI / validate-canonical-controls (pull_request) Failing after 10s
fix: strip duplicate inline class definitions from db shim files
models.py and repository.py are backwards-compat re-export shims from
Phase 1. Both files still contained the original 1466/1547 line class
definitions below the re-export block. These inline definitions shadowed
the correctly-imported sub-module versions and failed at import time
because Column, AuditResultEnum, etc. were no longer in scope.

Fix:
- models.py: remove all duplicate Base-subclass definitions (lines 209-
  1581). Retain EvidenceConfidenceEnum and EvidenceTruthStatusEnum (unique
  to this shim, not yet extracted to a sub-module) and the two models that
  have no sub-module yet: LLMGenerationAuditDB and AssertionDB. Add back
  the SQLAlchemy column-type imports those two models need.
- repository.py: remove all duplicate Repository class definitions (lines
  40-1692). All classes are now fully provided by the sub-repositories.

Result: 172 pytest tests pass, import OK.
2026-04-20 07:50:29 +02:00

287 lines
10 KiB
Python

"""
compliance.db.models — backwards-compatibility re-export shim.
Phase 1 refactor split the monolithic 1466-line models module into per-aggregate
sibling modules. Every public symbol is re-exported here so existing imports
(``from compliance.db.models import RegulationDB, ...``) continue to work
unchanged.
New code SHOULD import directly from the aggregate module:
from compliance.db.regulation_models import RegulationDB, RequirementDB
from compliance.db.control_models import ControlDB, RiskDB
from compliance.db.ai_system_models import AISystemDB
from compliance.db.service_module_models import ServiceModuleDB
from compliance.db.audit_session_models import AuditSessionDB
from compliance.db.isms_governance_models import ISMSScopeDB
from compliance.db.isms_audit_models import AuditFindingDB
Import order here also matters for SQLAlchemy mapper configuration: aggregates
that are referenced by name-string relationships must be imported before their
referrers. Regulation/Control/Risk come first, then Service Module, then the
audit sessions and ISMS layers.
DO NOT add new classes to this file. Add them to the appropriate aggregate
module and re-export here.
"""
import enum # noqa: F401 — used by inline enum classes below
import uuid
from datetime import datetime
from sqlalchemy import ( # noqa: F401 — used by inline model classes below
Boolean, Column, Date, DateTime, Enum, Float,
ForeignKey, Index, Integer, JSON, String, Text,
)
from sqlalchemy.orm import backref # noqa: F401
# Order matters: later modules reference classes defined in earlier ones via
# SQLAlchemy string relationships. Keep foundational aggregates first.
from compliance.db.regulation_models import ( # noqa: F401
RegulationTypeEnum,
RegulationDB,
RequirementDB,
)
from compliance.db.control_models import ( # noqa: F401
ControlTypeEnum,
ControlDomainEnum,
ControlStatusEnum,
RiskLevelEnum,
EvidenceStatusEnum,
ControlDB,
ControlMappingDB,
EvidenceDB,
RiskDB,
)
from compliance.db.ai_system_models import ( # noqa: F401
AIClassificationEnum,
AISystemStatusEnum,
ExportStatusEnum,
AISystemDB,
AuditExportDB,
)
from compliance.db.service_module_models import ( # noqa: F401
ServiceTypeEnum,
RelevanceLevelEnum,
ServiceModuleDB,
ModuleRegulationMappingDB,
ModuleRiskDB,
)
from compliance.db.audit_session_models import ( # noqa: F401
AuditResultEnum,
AuditSessionStatusEnum,
AuditSessionDB,
AuditSignOffDB,
)
from compliance.db.isms_governance_models import ( # noqa: F401
ApprovalStatusEnum,
ISMSScopeDB,
ISMSContextDB,
ISMSPolicyDB,
SecurityObjectiveDB,
StatementOfApplicabilityDB,
)
from compliance.db.isms_audit_models import ( # noqa: F401
FindingTypeEnum,
FindingStatusEnum,
CAPATypeEnum,
AuditFindingDB,
CorrectiveActionDB,
ManagementReviewDB,
InternalAuditDB,
AuditTrailDB,
ISMSReadinessCheckDB,
)
from sqlalchemy.orm import relationship
# Import shared Base from classroom_engine
from classroom_engine.database import Base
# ============================================================================
# ENUMS
# ============================================================================
class RegulationTypeEnum(str, enum.Enum):
"""Type of regulation/standard."""
EU_REGULATION = "eu_regulation" # Directly applicable EU law
EU_DIRECTIVE = "eu_directive" # Requires national implementation
DE_LAW = "de_law" # German national law
BSI_STANDARD = "bsi_standard" # BSI technical guidelines
INDUSTRY_STANDARD = "industry_standard" # ISO, OWASP, etc.
class ControlTypeEnum(str, enum.Enum):
"""Type of security control."""
PREVENTIVE = "preventive" # Prevents incidents
DETECTIVE = "detective" # Detects incidents
CORRECTIVE = "corrective" # Corrects after incidents
class ControlDomainEnum(str, enum.Enum):
"""Domain/category of control."""
GOVERNANCE = "gov" # Governance & Organization
PRIVACY = "priv" # Privacy & Data Protection
IAM = "iam" # Identity & Access Management
CRYPTO = "crypto" # Cryptography & Key Management
SDLC = "sdlc" # Secure Development Lifecycle
OPS = "ops" # Operations & Monitoring
AI = "ai" # AI-specific controls
CRA = "cra" # CRA & Supply Chain
AUDIT = "aud" # Audit & Traceability
class ControlStatusEnum(str, enum.Enum):
"""Implementation status of a control."""
PASS = "pass" # Fully implemented & passing
PARTIAL = "partial" # Partially implemented
FAIL = "fail" # Not passing
NOT_APPLICABLE = "n/a" # Not applicable
PLANNED = "planned" # Planned for implementation
IN_PROGRESS = "in_progress" # Implementation in progress
class RiskLevelEnum(str, enum.Enum):
"""Risk severity level."""
LOW = "low"
MEDIUM = "medium"
HIGH = "high"
CRITICAL = "critical"
class EvidenceStatusEnum(str, enum.Enum):
"""Status of evidence artifact."""
VALID = "valid" # Currently valid
EXPIRED = "expired" # Past validity date
PENDING = "pending" # Awaiting validation
FAILED = "failed" # Failed validation
class EvidenceConfidenceEnum(str, enum.Enum):
"""Confidence level of evidence (Anti-Fake-Evidence)."""
E0 = "E0" # Generated / no real evidence (LLM output, placeholder)
E1 = "E1" # Uploaded but unreviewed (manual upload, no hash, no reviewer)
E2 = "E2" # Reviewed internally (human reviewed, hash verified)
E3 = "E3" # Observed by system (CI/CD pipeline, API with hash)
E4 = "E4" # Validated by external auditor
class EvidenceTruthStatusEnum(str, enum.Enum):
"""Truth status lifecycle for evidence (Anti-Fake-Evidence)."""
GENERATED = "generated"
UPLOADED = "uploaded"
OBSERVED = "observed"
VALIDATED_INTERNAL = "validated_internal"
REJECTED = "rejected"
PROVIDED_TO_AUDITOR = "provided_to_auditor"
ACCEPTED_BY_AUDITOR = "accepted_by_auditor"
class ExportStatusEnum(str, enum.Enum):
"""Status of audit export."""
PENDING = "pending"
GENERATING = "generating"
COMPLETED = "completed"
FAILED = "failed"
class ServiceTypeEnum(str, enum.Enum):
"""Type of Breakpilot service/module."""
BACKEND = "backend" # API/Backend services
DATABASE = "database" # Data storage
AI = "ai" # AI/ML services
COMMUNICATION = "communication" # Chat/Video/Messaging
STORAGE = "storage" # File/Object storage
INFRASTRUCTURE = "infrastructure" # Load balancer, reverse proxy
MONITORING = "monitoring" # Logging, metrics
SECURITY = "security" # Auth, encryption, secrets
class RelevanceLevelEnum(str, enum.Enum):
"""Relevance level of a regulation to a service."""
CRITICAL = "critical" # Non-compliance = shutdown
HIGH = "high" # Major risk
MEDIUM = "medium" # Moderate risk
LOW = "low" # Minor risk
# ============================================================================
# MODELS — unique to this shim (not yet extracted to a sub-module)
# ============================================================================
class LLMGenerationAuditDB(Base):
"""
Audit trail for LLM-generated content.
Every piece of content generated by an LLM is recorded here with its
truth_status and may_be_used_as_evidence flag, ensuring transparency
about what is real evidence vs. generated assistance.
"""
__tablename__ = 'compliance_llm_generation_audit'
id = Column(String(36), primary_key=True, default=lambda: str(uuid.uuid4()))
tenant_id = Column(String(36), index=True)
entity_type = Column(String(50), nullable=False) # 'evidence', 'control', 'document'
entity_id = Column(String(36)) # FK to generated entity
generation_mode = Column(String(100), nullable=False) # 'draft_assistance', 'auto_generation'
truth_status = Column(Enum(EvidenceTruthStatusEnum), nullable=False, default=EvidenceTruthStatusEnum.GENERATED)
may_be_used_as_evidence = Column(Boolean, nullable=False, default=False)
llm_model = Column(String(100))
llm_provider = Column(String(50)) # 'ollama', 'anthropic'
prompt_hash = Column(String(64)) # SHA-256 of prompt
input_summary = Column(Text)
output_summary = Column(Text)
extra_metadata = Column("metadata", JSON, default=dict)
created_at = Column(DateTime, default=datetime.utcnow)
updated_at = Column(DateTime, default=datetime.utcnow, onupdate=datetime.utcnow)
__table_args__ = (
Index('ix_llm_audit_entity', 'entity_type', 'entity_id'),
)
def __repr__(self):
return f"<LLMGenerationAudit {self.entity_type}:{self.entity_id} mode={self.generation_mode}>"
class AssertionDB(Base):
"""
Assertion tracking — separates claims from verified facts.
Each sentence from a control/evidence/document is stored here with its
classification (assertion vs. fact vs. rationale) and optional evidence linkage.
"""
__tablename__ = 'compliance_assertions'
id = Column(String(36), primary_key=True, default=lambda: str(uuid.uuid4()))
tenant_id = Column(String(36), index=True)
entity_type = Column(String(50), nullable=False) # 'control', 'evidence', 'document', 'obligation'
entity_id = Column(String(36), nullable=False)
sentence_text = Column(Text, nullable=False)
sentence_index = Column(Integer, nullable=False, default=0)
assertion_type = Column(String(20), nullable=False, default='assertion') # 'assertion' | 'fact' | 'rationale'
evidence_ids = Column(JSON, default=list)
confidence = Column(Float, default=0.0)
normative_tier = Column(String(20)) # 'pflicht' | 'empfehlung' | 'kann'
verified_by = Column(String(200))
verified_at = Column(DateTime)
created_at = Column(DateTime, default=datetime.utcnow)
updated_at = Column(DateTime, default=datetime.utcnow, onupdate=datetime.utcnow)
__table_args__ = (
Index('ix_assertion_entity', 'entity_type', 'entity_id'),
Index('ix_assertion_type', 'assertion_type'),
)
def __repr__(self):
return f"<Assertion {self.assertion_type}: {self.sentence_text[:50]}>"