breakpilot-compliance/backend-compliance/compliance/db/models.py

"""
compliance.db.models — backwards-compatibility re-export shim.

Phase 1 refactor split the monolithic 1466-line models module into per-aggregate
sibling modules. Every public symbol is re-exported here so existing imports
(``from compliance.db.models import RegulationDB, ...``) continue to work
unchanged.

New code SHOULD import directly from the aggregate module:

    from compliance.db.regulation_models import RegulationDB, RequirementDB
    from compliance.db.control_models    import ControlDB, RiskDB
    from compliance.db.ai_system_models  import AISystemDB
    from compliance.db.service_module_models import ServiceModuleDB
    from compliance.db.audit_session_models   import AuditSessionDB
    from compliance.db.isms_governance_models import ISMSScopeDB
    from compliance.db.isms_audit_models      import AuditFindingDB

Import order here also matters for SQLAlchemy mapper configuration: aggregates
that are referenced by name-string relationships must be imported before their
referrers. Regulation/Control/Risk come first, then Service Module, then the
audit sessions and ISMS layers.

DO NOT add new classes to this file. Add them to the appropriate aggregate
module and re-export here.
"""

import enum  # noqa: F401  — used by inline enum classes below
import uuid
from datetime import datetime

from sqlalchemy import (  # noqa: F401  — used by inline model classes below
    Boolean, Column, Date, DateTime, Enum, Float,
    ForeignKey, Index, Integer, JSON, String, Text,
)
from sqlalchemy.orm import backref  # noqa: F401

# Order matters: later modules reference classes defined in earlier ones via
# SQLAlchemy string relationships. Keep foundational aggregates first.

from compliance.db.regulation_models import (  # noqa: F401
    RegulationTypeEnum,
    RegulationDB,
    RequirementDB,
)
from compliance.db.control_models import (  # noqa: F401
    ControlTypeEnum,
    ControlDomainEnum,
    ControlStatusEnum,
    RiskLevelEnum,
    EvidenceStatusEnum,
    ControlDB,
    ControlMappingDB,
    EvidenceDB,
    RiskDB,
)
from compliance.db.ai_system_models import (  # noqa: F401
    AIClassificationEnum,
    AISystemStatusEnum,
    ExportStatusEnum,
    AISystemDB,
    AuditExportDB,
)
from compliance.db.service_module_models import (  # noqa: F401
    ServiceTypeEnum,
    RelevanceLevelEnum,
    ServiceModuleDB,
    ModuleRegulationMappingDB,
    ModuleRiskDB,
)
from compliance.db.audit_session_models import (  # noqa: F401
    AuditResultEnum,
    AuditSessionStatusEnum,
    AuditSessionDB,
    AuditSignOffDB,
)
from compliance.db.isms_governance_models import (  # noqa: F401
    ApprovalStatusEnum,
    ISMSScopeDB,
    ISMSContextDB,
    ISMSPolicyDB,
    SecurityObjectiveDB,
    StatementOfApplicabilityDB,
)
from compliance.db.isms_audit_models import (  # noqa: F401
    FindingTypeEnum,
    FindingStatusEnum,
    CAPATypeEnum,
    AuditFindingDB,
    CorrectiveActionDB,
    ManagementReviewDB,
    InternalAuditDB,
    AuditTrailDB,
    ISMSReadinessCheckDB,
)
from sqlalchemy.orm import relationship

# Import shared Base from classroom_engine
from classroom_engine.database import Base


# ============================================================================
# ENUMS
# ============================================================================

class RegulationTypeEnum(str, enum.Enum):
    """Type of regulation/standard."""
    EU_REGULATION = "eu_regulation"      # Directly applicable EU law
    EU_DIRECTIVE = "eu_directive"        # Requires national implementation
    DE_LAW = "de_law"                    # German national law
    BSI_STANDARD = "bsi_standard"        # BSI technical guidelines
    INDUSTRY_STANDARD = "industry_standard"  # ISO, OWASP, etc.


class ControlTypeEnum(str, enum.Enum):
    """Type of security control."""
    PREVENTIVE = "preventive"    # Prevents incidents
    DETECTIVE = "detective"      # Detects incidents
    CORRECTIVE = "corrective"    # Corrects after incidents


class ControlDomainEnum(str, enum.Enum):
    """Domain/category of control."""
    GOVERNANCE = "gov"           # Governance & Organization
    PRIVACY = "priv"             # Privacy & Data Protection
    IAM = "iam"                  # Identity & Access Management
    CRYPTO = "crypto"            # Cryptography & Key Management
    SDLC = "sdlc"                # Secure Development Lifecycle
    OPS = "ops"                  # Operations & Monitoring
    AI = "ai"                    # AI-specific controls
    CRA = "cra"                  # CRA & Supply Chain
    AUDIT = "aud"                # Audit & Traceability


class ControlStatusEnum(str, enum.Enum):
    """Implementation status of a control."""
    PASS = "pass"                # Fully implemented & passing
    PARTIAL = "partial"          # Partially implemented
    FAIL = "fail"                # Not passing
    NOT_APPLICABLE = "n/a"       # Not applicable
    PLANNED = "planned"          # Planned for implementation
    IN_PROGRESS = "in_progress"  # Implementation in progress


class RiskLevelEnum(str, enum.Enum):
    """Risk severity level."""
    LOW = "low"
    MEDIUM = "medium"
    HIGH = "high"
    CRITICAL = "critical"


class EvidenceStatusEnum(str, enum.Enum):
    """Status of evidence artifact."""
    VALID = "valid"              # Currently valid
    EXPIRED = "expired"          # Past validity date
    PENDING = "pending"          # Awaiting validation
    FAILED = "failed"            # Failed validation


class EvidenceConfidenceEnum(str, enum.Enum):
    """Confidence level of evidence (Anti-Fake-Evidence)."""
    E0 = "E0"    # Generated / no real evidence (LLM output, placeholder)
    E1 = "E1"    # Uploaded but unreviewed (manual upload, no hash, no reviewer)
    E2 = "E2"    # Reviewed internally (human reviewed, hash verified)
    E3 = "E3"    # Observed by system (CI/CD pipeline, API with hash)
    E4 = "E4"    # Validated by external auditor


class EvidenceTruthStatusEnum(str, enum.Enum):
    """Truth status lifecycle for evidence (Anti-Fake-Evidence)."""
    GENERATED = "generated"
    UPLOADED = "uploaded"
    OBSERVED = "observed"
    VALIDATED_INTERNAL = "validated_internal"
    REJECTED = "rejected"
    PROVIDED_TO_AUDITOR = "provided_to_auditor"
    ACCEPTED_BY_AUDITOR = "accepted_by_auditor"


class ExportStatusEnum(str, enum.Enum):
    """Status of audit export."""
    PENDING = "pending"
    GENERATING = "generating"
    COMPLETED = "completed"
    FAILED = "failed"


class ServiceTypeEnum(str, enum.Enum):
    """Type of Breakpilot service/module."""
    BACKEND = "backend"           # API/Backend services
    DATABASE = "database"         # Data storage
    AI = "ai"                     # AI/ML services
    COMMUNICATION = "communication"  # Chat/Video/Messaging
    STORAGE = "storage"           # File/Object storage
    INFRASTRUCTURE = "infrastructure"  # Load balancer, reverse proxy
    MONITORING = "monitoring"     # Logging, metrics
    SECURITY = "security"         # Auth, encryption, secrets


class RelevanceLevelEnum(str, enum.Enum):
    """Relevance level of a regulation to a service."""
    CRITICAL = "critical"         # Non-compliance = shutdown
    HIGH = "high"                 # Major risk
    MEDIUM = "medium"             # Moderate risk
    LOW = "low"                   # Minor risk


# ============================================================================
# MODELS — unique to this shim (not yet extracted to a sub-module)
# ============================================================================

class LLMGenerationAuditDB(Base):
    """
    Audit trail for LLM-generated content.

    Every piece of content generated by an LLM is recorded here with its
    truth_status and may_be_used_as_evidence flag, ensuring transparency
    about what is real evidence vs. generated assistance.
    """
    __tablename__ = 'compliance_llm_generation_audit'

    id = Column(String(36), primary_key=True, default=lambda: str(uuid.uuid4()))
    tenant_id = Column(String(36), index=True)

    entity_type = Column(String(50), nullable=False)           # 'evidence', 'control', 'document'
    entity_id = Column(String(36))                             # FK to generated entity
    generation_mode = Column(String(100), nullable=False)      # 'draft_assistance', 'auto_generation'
    truth_status = Column(Enum(EvidenceTruthStatusEnum), nullable=False, default=EvidenceTruthStatusEnum.GENERATED)
    may_be_used_as_evidence = Column(Boolean, nullable=False, default=False)

    llm_model = Column(String(100))
    llm_provider = Column(String(50))                          # 'ollama', 'anthropic'
    prompt_hash = Column(String(64))                           # SHA-256 of prompt
    input_summary = Column(Text)
    output_summary = Column(Text)
    extra_metadata = Column("metadata", JSON, default=dict)

    created_at = Column(DateTime, default=datetime.utcnow)
    updated_at = Column(DateTime, default=datetime.utcnow, onupdate=datetime.utcnow)

    __table_args__ = (
        Index('ix_llm_audit_entity', 'entity_type', 'entity_id'),
    )

    def __repr__(self):
        return f"<LLMGenerationAudit {self.entity_type}:{self.entity_id} mode={self.generation_mode}>"


class AssertionDB(Base):
    """
    Assertion tracking — separates claims from verified facts.

    Each sentence from a control/evidence/document is stored here with its
    classification (assertion vs. fact vs. rationale) and optional evidence linkage.
    """
    __tablename__ = 'compliance_assertions'

    id = Column(String(36), primary_key=True, default=lambda: str(uuid.uuid4()))
    tenant_id = Column(String(36), index=True)

    entity_type = Column(String(50), nullable=False)       # 'control', 'evidence', 'document', 'obligation'
    entity_id = Column(String(36), nullable=False)
    sentence_text = Column(Text, nullable=False)
    sentence_index = Column(Integer, nullable=False, default=0)

    assertion_type = Column(String(20), nullable=False, default='assertion')  # 'assertion' | 'fact' | 'rationale'
    evidence_ids = Column(JSON, default=list)
    confidence = Column(Float, default=0.0)
    normative_tier = Column(String(20))                    # 'pflicht' | 'empfehlung' | 'kann'

    verified_by = Column(String(200))
    verified_at = Column(DateTime)

    created_at = Column(DateTime, default=datetime.utcnow)
    updated_at = Column(DateTime, default=datetime.utcnow, onupdate=datetime.utcnow)

    __table_args__ = (
        Index('ix_assertion_entity', 'entity_type', 'entity_id'),
        Index('ix_assertion_type', 'assertion_type'),
    )

    def __repr__(self):
        return f"<Assertion {self.assertion_type}: {self.sentence_text[:50]}>"