feat: Anti-Fake-Evidence System (Phase 1-4b)
Implement full evidence integrity pipeline to prevent compliance theater: - Confidence levels (E0-E4), truth status tracking, assertion engine - Four-Eyes approval workflow, audit trail, reject endpoint - Evidence distribution dashboard, LLM audit routes - Traceability matrix (backend endpoint + Compliance Hub UI tab) - Anti-fake badges, control status machine, normative patterns - 2 migrations, 4 test suites, MkDocs documentation Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -8,12 +8,16 @@ from .models import (
|
||||
EvidenceDB,
|
||||
RiskDB,
|
||||
AuditExportDB,
|
||||
LLMGenerationAuditDB,
|
||||
AssertionDB,
|
||||
RegulationTypeEnum,
|
||||
ControlTypeEnum,
|
||||
ControlDomainEnum,
|
||||
RiskLevelEnum,
|
||||
EvidenceStatusEnum,
|
||||
ControlStatusEnum,
|
||||
EvidenceConfidenceEnum,
|
||||
EvidenceTruthStatusEnum,
|
||||
)
|
||||
from .repository import (
|
||||
RegulationRepository,
|
||||
@@ -33,6 +37,8 @@ __all__ = [
|
||||
"EvidenceDB",
|
||||
"RiskDB",
|
||||
"AuditExportDB",
|
||||
"LLMGenerationAuditDB",
|
||||
"AssertionDB",
|
||||
# Enums
|
||||
"RegulationTypeEnum",
|
||||
"ControlTypeEnum",
|
||||
@@ -40,6 +46,8 @@ __all__ = [
|
||||
"RiskLevelEnum",
|
||||
"EvidenceStatusEnum",
|
||||
"ControlStatusEnum",
|
||||
"EvidenceConfidenceEnum",
|
||||
"EvidenceTruthStatusEnum",
|
||||
# Repositories
|
||||
"RegulationRepository",
|
||||
"RequirementRepository",
|
||||
|
||||
@@ -65,6 +65,7 @@ class ControlStatusEnum(str, enum.Enum):
|
||||
FAIL = "fail" # Not passing
|
||||
NOT_APPLICABLE = "n/a" # Not applicable
|
||||
PLANNED = "planned" # Planned for implementation
|
||||
IN_PROGRESS = "in_progress" # Implementation in progress
|
||||
|
||||
|
||||
class RiskLevelEnum(str, enum.Enum):
|
||||
@@ -83,6 +84,26 @@ class EvidenceStatusEnum(str, enum.Enum):
|
||||
FAILED = "failed" # Failed validation
|
||||
|
||||
|
||||
class EvidenceConfidenceEnum(str, enum.Enum):
|
||||
"""Confidence level of evidence (Anti-Fake-Evidence)."""
|
||||
E0 = "E0" # Generated / no real evidence (LLM output, placeholder)
|
||||
E1 = "E1" # Uploaded but unreviewed (manual upload, no hash, no reviewer)
|
||||
E2 = "E2" # Reviewed internally (human reviewed, hash verified)
|
||||
E3 = "E3" # Observed by system (CI/CD pipeline, API with hash)
|
||||
E4 = "E4" # Validated by external auditor
|
||||
|
||||
|
||||
class EvidenceTruthStatusEnum(str, enum.Enum):
|
||||
"""Truth status lifecycle for evidence (Anti-Fake-Evidence)."""
|
||||
GENERATED = "generated"
|
||||
UPLOADED = "uploaded"
|
||||
OBSERVED = "observed"
|
||||
VALIDATED_INTERNAL = "validated_internal"
|
||||
REJECTED = "rejected"
|
||||
PROVIDED_TO_AUDITOR = "provided_to_auditor"
|
||||
ACCEPTED_BY_AUDITOR = "accepted_by_auditor"
|
||||
|
||||
|
||||
class ExportStatusEnum(str, enum.Enum):
|
||||
"""Status of audit export."""
|
||||
PENDING = "pending"
|
||||
@@ -239,6 +260,7 @@ class ControlDB(Base):
|
||||
# Status
|
||||
status = Column(Enum(ControlStatusEnum), default=ControlStatusEnum.PLANNED)
|
||||
status_notes = Column(Text)
|
||||
status_justification = Column(Text) # Required for n/a transitions
|
||||
|
||||
# Ownership & Review
|
||||
owner = Column(String(100)) # Responsible person/team
|
||||
@@ -321,6 +343,22 @@ class EvidenceDB(Base):
|
||||
ci_job_id = Column(String(100)) # CI/CD job reference
|
||||
uploaded_by = Column(String(100)) # User who uploaded
|
||||
|
||||
# Anti-Fake-Evidence: Confidence & Truth tracking
|
||||
confidence_level = Column(Enum(EvidenceConfidenceEnum), default=EvidenceConfidenceEnum.E1)
|
||||
truth_status = Column(Enum(EvidenceTruthStatusEnum), default=EvidenceTruthStatusEnum.UPLOADED)
|
||||
generation_mode = Column(String(100)) # e.g. "draft_assistance", "auto_generation"
|
||||
may_be_used_as_evidence = Column(Boolean, default=True)
|
||||
reviewed_by = Column(String(200))
|
||||
reviewed_at = Column(DateTime)
|
||||
|
||||
# Anti-Fake-Evidence Phase 2: Four-Eyes review
|
||||
approval_status = Column(String(30), default="none")
|
||||
first_reviewer = Column(String(200))
|
||||
first_reviewed_at = Column(DateTime)
|
||||
second_reviewer = Column(String(200))
|
||||
second_reviewed_at = Column(DateTime)
|
||||
requires_four_eyes = Column(Boolean, default=False)
|
||||
|
||||
# Timestamps
|
||||
collected_at = Column(DateTime, default=datetime.utcnow)
|
||||
created_at = Column(DateTime, default=datetime.utcnow)
|
||||
@@ -332,6 +370,7 @@ class EvidenceDB(Base):
|
||||
__table_args__ = (
|
||||
Index('ix_evidence_control_type', 'control_id', 'evidence_type'),
|
||||
Index('ix_evidence_status', 'status'),
|
||||
Index('ix_evidence_approval_status', 'approval_status'),
|
||||
)
|
||||
|
||||
def __repr__(self):
|
||||
@@ -1464,3 +1503,77 @@ class ISMSReadinessCheckDB(Base):
|
||||
|
||||
def __repr__(self):
|
||||
return f"<ISMSReadiness {self.check_date}: {self.overall_status}>"
|
||||
|
||||
|
||||
class LLMGenerationAuditDB(Base):
|
||||
"""
|
||||
Audit trail for LLM-generated content.
|
||||
|
||||
Every piece of content generated by an LLM is recorded here with its
|
||||
truth_status and may_be_used_as_evidence flag, ensuring transparency
|
||||
about what is real evidence vs. generated assistance.
|
||||
"""
|
||||
__tablename__ = 'compliance_llm_generation_audit'
|
||||
|
||||
id = Column(String(36), primary_key=True, default=lambda: str(uuid.uuid4()))
|
||||
tenant_id = Column(String(36), index=True)
|
||||
|
||||
entity_type = Column(String(50), nullable=False) # 'evidence', 'control', 'document'
|
||||
entity_id = Column(String(36)) # FK to generated entity
|
||||
generation_mode = Column(String(100), nullable=False) # 'draft_assistance', 'auto_generation'
|
||||
truth_status = Column(Enum(EvidenceTruthStatusEnum), nullable=False, default=EvidenceTruthStatusEnum.GENERATED)
|
||||
may_be_used_as_evidence = Column(Boolean, nullable=False, default=False)
|
||||
|
||||
llm_model = Column(String(100))
|
||||
llm_provider = Column(String(50)) # 'ollama', 'anthropic'
|
||||
prompt_hash = Column(String(64)) # SHA-256 of prompt
|
||||
input_summary = Column(Text)
|
||||
output_summary = Column(Text)
|
||||
extra_metadata = Column("metadata", JSON, default=dict)
|
||||
|
||||
created_at = Column(DateTime, default=datetime.utcnow)
|
||||
updated_at = Column(DateTime, default=datetime.utcnow, onupdate=datetime.utcnow)
|
||||
|
||||
__table_args__ = (
|
||||
Index('ix_llm_audit_entity', 'entity_type', 'entity_id'),
|
||||
)
|
||||
|
||||
def __repr__(self):
|
||||
return f"<LLMGenerationAudit {self.entity_type}:{self.entity_id} mode={self.generation_mode}>"
|
||||
|
||||
|
||||
class AssertionDB(Base):
|
||||
"""
|
||||
Assertion tracking — separates claims from verified facts.
|
||||
|
||||
Each sentence from a control/evidence/document is stored here with its
|
||||
classification (assertion vs. fact vs. rationale) and optional evidence linkage.
|
||||
"""
|
||||
__tablename__ = 'compliance_assertions'
|
||||
|
||||
id = Column(String(36), primary_key=True, default=lambda: str(uuid.uuid4()))
|
||||
tenant_id = Column(String(36), index=True)
|
||||
|
||||
entity_type = Column(String(50), nullable=False) # 'control', 'evidence', 'document', 'obligation'
|
||||
entity_id = Column(String(36), nullable=False)
|
||||
sentence_text = Column(Text, nullable=False)
|
||||
sentence_index = Column(Integer, nullable=False, default=0)
|
||||
|
||||
assertion_type = Column(String(20), nullable=False, default='assertion') # 'assertion' | 'fact' | 'rationale'
|
||||
evidence_ids = Column(JSON, default=list)
|
||||
confidence = Column(Float, default=0.0)
|
||||
normative_tier = Column(String(20)) # 'pflicht' | 'empfehlung' | 'kann'
|
||||
|
||||
verified_by = Column(String(200))
|
||||
verified_at = Column(DateTime)
|
||||
|
||||
created_at = Column(DateTime, default=datetime.utcnow)
|
||||
updated_at = Column(DateTime, default=datetime.utcnow, onupdate=datetime.utcnow)
|
||||
|
||||
__table_args__ = (
|
||||
Index('ix_assertion_entity', 'entity_type', 'entity_id'),
|
||||
Index('ix_assertion_type', 'assertion_type'),
|
||||
)
|
||||
|
||||
def __repr__(self):
|
||||
return f"<Assertion {self.assertion_type}: {self.sentence_text[:50]}>"
|
||||
|
||||
@@ -487,6 +487,137 @@ class ControlRepository:
|
||||
"compliance_score": round(score, 1),
|
||||
}
|
||||
|
||||
def get_multi_dimensional_score(self) -> Dict[str, Any]:
|
||||
"""
|
||||
Calculate multi-dimensional compliance score (Anti-Fake-Evidence).
|
||||
|
||||
Returns 6 dimensions + hard_blocks + overall_readiness.
|
||||
"""
|
||||
from .models import (
|
||||
EvidenceDB, RequirementDB, ControlMappingDB,
|
||||
EvidenceConfidenceEnum, EvidenceTruthStatusEnum,
|
||||
)
|
||||
|
||||
# Weight map for confidence levels
|
||||
conf_weights = {"E0": 0.0, "E1": 0.25, "E2": 0.5, "E3": 0.75, "E4": 1.0}
|
||||
validated_statuses = {"validated_internal", "accepted_by_auditor", "provided_to_auditor"}
|
||||
|
||||
controls = self.get_all()
|
||||
total_controls = len(controls)
|
||||
|
||||
if total_controls == 0:
|
||||
return {
|
||||
"requirement_coverage": 0.0,
|
||||
"evidence_strength": 0.0,
|
||||
"validation_quality": 0.0,
|
||||
"evidence_freshness": 0.0,
|
||||
"control_effectiveness": 0.0,
|
||||
"overall_readiness": 0.0,
|
||||
"hard_blocks": ["Keine Controls vorhanden"],
|
||||
}
|
||||
|
||||
# 1. requirement_coverage: % requirements linked to at least one control
|
||||
total_reqs = self.db.query(func.count(RequirementDB.id)).scalar() or 0
|
||||
linked_reqs = (
|
||||
self.db.query(func.count(func.distinct(ControlMappingDB.requirement_id)))
|
||||
.scalar() or 0
|
||||
)
|
||||
requirement_coverage = (linked_reqs / total_reqs * 100) if total_reqs > 0 else 0.0
|
||||
|
||||
# 2. evidence_strength: weighted average of evidence confidence
|
||||
all_evidence = self.db.query(EvidenceDB).all()
|
||||
if all_evidence:
|
||||
total_weight = 0.0
|
||||
for e in all_evidence:
|
||||
conf_val = e.confidence_level.value if e.confidence_level else "E1"
|
||||
total_weight += conf_weights.get(conf_val, 0.25)
|
||||
evidence_strength = (total_weight / len(all_evidence)) * 100
|
||||
else:
|
||||
evidence_strength = 0.0
|
||||
|
||||
# 3. validation_quality: % evidence with truth_status >= validated_internal
|
||||
if all_evidence:
|
||||
validated_count = sum(
|
||||
1 for e in all_evidence
|
||||
if (e.truth_status.value if e.truth_status else "uploaded") in validated_statuses
|
||||
)
|
||||
validation_quality = (validated_count / len(all_evidence)) * 100
|
||||
else:
|
||||
validation_quality = 0.0
|
||||
|
||||
# 4. evidence_freshness: % evidence not expired and reviewed < 90 days
|
||||
now = datetime.now()
|
||||
if all_evidence:
|
||||
fresh_count = 0
|
||||
for e in all_evidence:
|
||||
is_expired = e.valid_until and e.valid_until < now
|
||||
is_stale = e.reviewed_at and (now - e.reviewed_at).days > 90 if hasattr(e, 'reviewed_at') and e.reviewed_at else False
|
||||
if not is_expired and not is_stale:
|
||||
fresh_count += 1
|
||||
evidence_freshness = (fresh_count / len(all_evidence)) * 100
|
||||
else:
|
||||
evidence_freshness = 0.0
|
||||
|
||||
# 5. control_effectiveness: existing formula
|
||||
passed = sum(1 for c in controls if c.status == ControlStatusEnum.PASS)
|
||||
partial = sum(1 for c in controls if c.status == ControlStatusEnum.PARTIAL)
|
||||
control_effectiveness = ((passed + partial * 0.5) / total_controls) * 100
|
||||
|
||||
# 6. overall_readiness: weighted composite
|
||||
overall_readiness = (
|
||||
0.20 * requirement_coverage +
|
||||
0.25 * evidence_strength +
|
||||
0.20 * validation_quality +
|
||||
0.10 * evidence_freshness +
|
||||
0.25 * control_effectiveness
|
||||
)
|
||||
|
||||
# Hard blocks
|
||||
hard_blocks = []
|
||||
|
||||
# Critical controls without any evidence
|
||||
critical_no_evidence = []
|
||||
for c in controls:
|
||||
if c.status in (ControlStatusEnum.PASS, ControlStatusEnum.PARTIAL):
|
||||
evidence_for_ctrl = [e for e in all_evidence if e.control_id == c.id]
|
||||
if not evidence_for_ctrl:
|
||||
critical_no_evidence.append(c.control_id)
|
||||
if critical_no_evidence:
|
||||
hard_blocks.append(
|
||||
f"{len(critical_no_evidence)} Controls mit Status pass/partial haben keine Evidence: "
|
||||
f"{', '.join(critical_no_evidence[:5])}"
|
||||
)
|
||||
|
||||
# Controls with only E0/E1 evidence claiming pass
|
||||
weak_evidence_pass = []
|
||||
for c in controls:
|
||||
if c.status == ControlStatusEnum.PASS:
|
||||
evidence_for_ctrl = [e for e in all_evidence if e.control_id == c.id]
|
||||
if evidence_for_ctrl:
|
||||
max_conf = max(
|
||||
conf_weights.get(
|
||||
e.confidence_level.value if e.confidence_level else "E1", 0.25
|
||||
)
|
||||
for e in evidence_for_ctrl
|
||||
)
|
||||
if max_conf < 0.5: # Only E0 or E1
|
||||
weak_evidence_pass.append(c.control_id)
|
||||
if weak_evidence_pass:
|
||||
hard_blocks.append(
|
||||
f"{len(weak_evidence_pass)} Controls auf 'pass' haben nur E0/E1-Evidence: "
|
||||
f"{', '.join(weak_evidence_pass[:5])}"
|
||||
)
|
||||
|
||||
return {
|
||||
"requirement_coverage": round(requirement_coverage, 1),
|
||||
"evidence_strength": round(evidence_strength, 1),
|
||||
"validation_quality": round(validation_quality, 1),
|
||||
"evidence_freshness": round(evidence_freshness, 1),
|
||||
"control_effectiveness": round(control_effectiveness, 1),
|
||||
"overall_readiness": round(overall_readiness, 1),
|
||||
"hard_blocks": hard_blocks,
|
||||
}
|
||||
|
||||
|
||||
class ControlMappingRepository:
|
||||
"""Repository for requirement-control mappings."""
|
||||
|
||||
Reference in New Issue
Block a user