feat: Anti-Fake-Evidence System (Phase 1-4b)

Implement full evidence integrity pipeline to prevent compliance theater: - Confidence levels (E0-E4), truth status tracking, assertion engine - Four-Eyes approval workflow, audit trail, reject endpoint - Evidence distribution dashboard, LLM audit routes - Traceability matrix (backend endpoint + Compliance Hub UI tab) - Anti-fake badges, control status machine, normative patterns - 2 migrations, 4 test suites, MkDocs documentation Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-23 17:15:45 +01:00
parent 48ca0a6bef
commit e6201d5239
36 changed files with 5627 additions and 189 deletions
@@ -8,12 +8,16 @@ from .models import (
    EvidenceDB,
    RiskDB,
    AuditExportDB,
+    LLMGenerationAuditDB,
+    AssertionDB,
    RegulationTypeEnum,
    ControlTypeEnum,
    ControlDomainEnum,
    RiskLevelEnum,
    EvidenceStatusEnum,
    ControlStatusEnum,
+    EvidenceConfidenceEnum,
+    EvidenceTruthStatusEnum,
 )
 from .repository import (
    RegulationRepository,
@@ -33,6 +37,8 @@ __all__ = [
    "EvidenceDB",
    "RiskDB",
    "AuditExportDB",
+    "LLMGenerationAuditDB",
+    "AssertionDB",
    # Enums
    "RegulationTypeEnum",
    "ControlTypeEnum",
@@ -40,6 +46,8 @@ __all__ = [
    "RiskLevelEnum",
    "EvidenceStatusEnum",
    "ControlStatusEnum",
+    "EvidenceConfidenceEnum",
+    "EvidenceTruthStatusEnum",
    # Repositories
    "RegulationRepository",
    "RequirementRepository",
@@ -65,6 +65,7 @@ class ControlStatusEnum(str, enum.Enum):
    FAIL = "fail"                # Not passing
    NOT_APPLICABLE = "n/a"       # Not applicable
    PLANNED = "planned"          # Planned for implementation
+    IN_PROGRESS = "in_progress"  # Implementation in progress


 class RiskLevelEnum(str, enum.Enum):
@@ -83,6 +84,26 @@ class EvidenceStatusEnum(str, enum.Enum):
    FAILED = "failed"            # Failed validation


+class EvidenceConfidenceEnum(str, enum.Enum):
+    """Confidence level of evidence (Anti-Fake-Evidence)."""
+    E0 = "E0"    # Generated / no real evidence (LLM output, placeholder)
+    E1 = "E1"    # Uploaded but unreviewed (manual upload, no hash, no reviewer)
+    E2 = "E2"    # Reviewed internally (human reviewed, hash verified)
+    E3 = "E3"    # Observed by system (CI/CD pipeline, API with hash)
+    E4 = "E4"    # Validated by external auditor
+
+
+class EvidenceTruthStatusEnum(str, enum.Enum):
+    """Truth status lifecycle for evidence (Anti-Fake-Evidence)."""
+    GENERATED = "generated"
+    UPLOADED = "uploaded"
+    OBSERVED = "observed"
+    VALIDATED_INTERNAL = "validated_internal"
+    REJECTED = "rejected"
+    PROVIDED_TO_AUDITOR = "provided_to_auditor"
+    ACCEPTED_BY_AUDITOR = "accepted_by_auditor"
+
+
 class ExportStatusEnum(str, enum.Enum):
    """Status of audit export."""
    PENDING = "pending"
@@ -239,6 +260,7 @@ class ControlDB(Base):
    # Status
    status = Column(Enum(ControlStatusEnum), default=ControlStatusEnum.PLANNED)
    status_notes = Column(Text)
+    status_justification = Column(Text)                                  # Required for n/a transitions

    # Ownership & Review
    owner = Column(String(100))                                          # Responsible person/team
@@ -321,6 +343,22 @@ class EvidenceDB(Base):
    ci_job_id = Column(String(100))                                      # CI/CD job reference
    uploaded_by = Column(String(100))                                    # User who uploaded

+    # Anti-Fake-Evidence: Confidence & Truth tracking
+    confidence_level = Column(Enum(EvidenceConfidenceEnum), default=EvidenceConfidenceEnum.E1)
+    truth_status = Column(Enum(EvidenceTruthStatusEnum), default=EvidenceTruthStatusEnum.UPLOADED)
+    generation_mode = Column(String(100))                                # e.g. "draft_assistance", "auto_generation"
+    may_be_used_as_evidence = Column(Boolean, default=True)
+    reviewed_by = Column(String(200))
+    reviewed_at = Column(DateTime)
+
+    # Anti-Fake-Evidence Phase 2: Four-Eyes review
+    approval_status = Column(String(30), default="none")
+    first_reviewer = Column(String(200))
+    first_reviewed_at = Column(DateTime)
+    second_reviewer = Column(String(200))
+    second_reviewed_at = Column(DateTime)
+    requires_four_eyes = Column(Boolean, default=False)
+
    # Timestamps
    collected_at = Column(DateTime, default=datetime.utcnow)
    created_at = Column(DateTime, default=datetime.utcnow)
@@ -332,6 +370,7 @@ class EvidenceDB(Base):
    __table_args__ = (
        Index('ix_evidence_control_type', 'control_id', 'evidence_type'),
        Index('ix_evidence_status', 'status'),
+        Index('ix_evidence_approval_status', 'approval_status'),
    )

    def __repr__(self):
@@ -1464,3 +1503,77 @@ class ISMSReadinessCheckDB(Base):

    def __repr__(self):
        return f"<ISMSReadiness {self.check_date}: {self.overall_status}>"
+
+
+class LLMGenerationAuditDB(Base):
+    """
+    Audit trail for LLM-generated content.
+
+    Every piece of content generated by an LLM is recorded here with its
+    truth_status and may_be_used_as_evidence flag, ensuring transparency
+    about what is real evidence vs. generated assistance.
+    """
+    __tablename__ = 'compliance_llm_generation_audit'
+
+    id = Column(String(36), primary_key=True, default=lambda: str(uuid.uuid4()))
+    tenant_id = Column(String(36), index=True)
+
+    entity_type = Column(String(50), nullable=False)           # 'evidence', 'control', 'document'
+    entity_id = Column(String(36))                             # FK to generated entity
+    generation_mode = Column(String(100), nullable=False)      # 'draft_assistance', 'auto_generation'
+    truth_status = Column(Enum(EvidenceTruthStatusEnum), nullable=False, default=EvidenceTruthStatusEnum.GENERATED)
+    may_be_used_as_evidence = Column(Boolean, nullable=False, default=False)
+
+    llm_model = Column(String(100))
+    llm_provider = Column(String(50))                          # 'ollama', 'anthropic'
+    prompt_hash = Column(String(64))                           # SHA-256 of prompt
+    input_summary = Column(Text)
+    output_summary = Column(Text)
+    extra_metadata = Column("metadata", JSON, default=dict)
+
+    created_at = Column(DateTime, default=datetime.utcnow)
+    updated_at = Column(DateTime, default=datetime.utcnow, onupdate=datetime.utcnow)
+
+    __table_args__ = (
+        Index('ix_llm_audit_entity', 'entity_type', 'entity_id'),
+    )
+
+    def __repr__(self):
+        return f"<LLMGenerationAudit {self.entity_type}:{self.entity_id} mode={self.generation_mode}>"
+
+
+class AssertionDB(Base):
+    """
+    Assertion tracking — separates claims from verified facts.
+
+    Each sentence from a control/evidence/document is stored here with its
+    classification (assertion vs. fact vs. rationale) and optional evidence linkage.
+    """
+    __tablename__ = 'compliance_assertions'
+
+    id = Column(String(36), primary_key=True, default=lambda: str(uuid.uuid4()))
+    tenant_id = Column(String(36), index=True)
+
+    entity_type = Column(String(50), nullable=False)       # 'control', 'evidence', 'document', 'obligation'
+    entity_id = Column(String(36), nullable=False)
+    sentence_text = Column(Text, nullable=False)
+    sentence_index = Column(Integer, nullable=False, default=0)
+
+    assertion_type = Column(String(20), nullable=False, default='assertion')  # 'assertion' | 'fact' | 'rationale'
+    evidence_ids = Column(JSON, default=list)
+    confidence = Column(Float, default=0.0)
+    normative_tier = Column(String(20))                    # 'pflicht' | 'empfehlung' | 'kann'
+
+    verified_by = Column(String(200))
+    verified_at = Column(DateTime)
+
+    created_at = Column(DateTime, default=datetime.utcnow)
+    updated_at = Column(DateTime, default=datetime.utcnow, onupdate=datetime.utcnow)
+
+    __table_args__ = (
+        Index('ix_assertion_entity', 'entity_type', 'entity_id'),
+        Index('ix_assertion_type', 'assertion_type'),
+    )
+
+    def __repr__(self):
+        return f"<Assertion {self.assertion_type}: {self.sentence_text[:50]}>"
@@ -487,6 +487,137 @@ class ControlRepository:
            "compliance_score": round(score, 1),
        }

+    def get_multi_dimensional_score(self) -> Dict[str, Any]:
+        """
+        Calculate multi-dimensional compliance score (Anti-Fake-Evidence).
+
+        Returns 6 dimensions + hard_blocks + overall_readiness.
+        """
+        from .models import (
+            EvidenceDB, RequirementDB, ControlMappingDB,
+            EvidenceConfidenceEnum, EvidenceTruthStatusEnum,
+        )
+
+        # Weight map for confidence levels
+        conf_weights = {"E0": 0.0, "E1": 0.25, "E2": 0.5, "E3": 0.75, "E4": 1.0}
+        validated_statuses = {"validated_internal", "accepted_by_auditor", "provided_to_auditor"}
+
+        controls = self.get_all()
+        total_controls = len(controls)
+
+        if total_controls == 0:
+            return {
+                "requirement_coverage": 0.0,
+                "evidence_strength": 0.0,
+                "validation_quality": 0.0,
+                "evidence_freshness": 0.0,
+                "control_effectiveness": 0.0,
+                "overall_readiness": 0.0,
+                "hard_blocks": ["Keine Controls vorhanden"],
+            }
+
+        # 1. requirement_coverage: % requirements linked to at least one control
+        total_reqs = self.db.query(func.count(RequirementDB.id)).scalar() or 0
+        linked_reqs = (
+            self.db.query(func.count(func.distinct(ControlMappingDB.requirement_id)))
+            .scalar() or 0
+        )
+        requirement_coverage = (linked_reqs / total_reqs * 100) if total_reqs > 0 else 0.0
+
+        # 2. evidence_strength: weighted average of evidence confidence
+        all_evidence = self.db.query(EvidenceDB).all()
+        if all_evidence:
+            total_weight = 0.0
+            for e in all_evidence:
+                conf_val = e.confidence_level.value if e.confidence_level else "E1"
+                total_weight += conf_weights.get(conf_val, 0.25)
+            evidence_strength = (total_weight / len(all_evidence)) * 100
+        else:
+            evidence_strength = 0.0
+
+        # 3. validation_quality: % evidence with truth_status >= validated_internal
+        if all_evidence:
+            validated_count = sum(
+                1 for e in all_evidence
+                if (e.truth_status.value if e.truth_status else "uploaded") in validated_statuses
+            )
+            validation_quality = (validated_count / len(all_evidence)) * 100
+        else:
+            validation_quality = 0.0
+
+        # 4. evidence_freshness: % evidence not expired and reviewed < 90 days
+        now = datetime.now()
+        if all_evidence:
+            fresh_count = 0
+            for e in all_evidence:
+                is_expired = e.valid_until and e.valid_until < now
+                is_stale = e.reviewed_at and (now - e.reviewed_at).days > 90 if hasattr(e, 'reviewed_at') and e.reviewed_at else False
+                if not is_expired and not is_stale:
+                    fresh_count += 1
+            evidence_freshness = (fresh_count / len(all_evidence)) * 100
+        else:
+            evidence_freshness = 0.0
+
+        # 5. control_effectiveness: existing formula
+        passed = sum(1 for c in controls if c.status == ControlStatusEnum.PASS)
+        partial = sum(1 for c in controls if c.status == ControlStatusEnum.PARTIAL)
+        control_effectiveness = ((passed + partial * 0.5) / total_controls) * 100
+
+        # 6. overall_readiness: weighted composite
+        overall_readiness = (
+            0.20 * requirement_coverage +
+            0.25 * evidence_strength +
+            0.20 * validation_quality +
+            0.10 * evidence_freshness +
+            0.25 * control_effectiveness
+        )
+
+        # Hard blocks
+        hard_blocks = []
+
+        # Critical controls without any evidence
+        critical_no_evidence = []
+        for c in controls:
+            if c.status in (ControlStatusEnum.PASS, ControlStatusEnum.PARTIAL):
+                evidence_for_ctrl = [e for e in all_evidence if e.control_id == c.id]
+                if not evidence_for_ctrl:
+                    critical_no_evidence.append(c.control_id)
+        if critical_no_evidence:
+            hard_blocks.append(
+                f"{len(critical_no_evidence)} Controls mit Status pass/partial haben keine Evidence: "
+                f"{', '.join(critical_no_evidence[:5])}"
+            )
+
+        # Controls with only E0/E1 evidence claiming pass
+        weak_evidence_pass = []
+        for c in controls:
+            if c.status == ControlStatusEnum.PASS:
+                evidence_for_ctrl = [e for e in all_evidence if e.control_id == c.id]
+                if evidence_for_ctrl:
+                    max_conf = max(
+                        conf_weights.get(
+                            e.confidence_level.value if e.confidence_level else "E1", 0.25
+                        )
+                        for e in evidence_for_ctrl
+                    )
+                    if max_conf < 0.5:  # Only E0 or E1
+                        weak_evidence_pass.append(c.control_id)
+        if weak_evidence_pass:
+            hard_blocks.append(
+                f"{len(weak_evidence_pass)} Controls auf 'pass' haben nur E0/E1-Evidence: "
+                f"{', '.join(weak_evidence_pass[:5])}"
+            )
+
+        return {
+            "requirement_coverage": round(requirement_coverage, 1),
+            "evidence_strength": round(evidence_strength, 1),
+            "validation_quality": round(validation_quality, 1),
+            "evidence_freshness": round(evidence_freshness, 1),
+            "control_effectiveness": round(control_effectiveness, 1),
+            "overall_readiness": round(overall_readiness, 1),
+            "hard_blocks": hard_blocks,
+        }
+

 class ControlMappingRepository:
    """Repository for requirement-control mappings."""