feat: Anti-Fake-Evidence System (Phase 1-4b)

Implement full evidence integrity pipeline to prevent compliance theater:
- Confidence levels (E0-E4), truth status tracking, assertion engine
- Four-Eyes approval workflow, audit trail, reject endpoint
- Evidence distribution dashboard, LLM audit routes
- Traceability matrix (backend endpoint + Compliance Hub UI tab)
- Anti-fake badges, control status machine, normative patterns
- 2 migrations, 4 test suites, MkDocs documentation

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
Benjamin Admin
2026-03-23 17:15:45 +01:00
parent 48ca0a6bef
commit e6201d5239
36 changed files with 5627 additions and 189 deletions

View File

@@ -61,6 +61,8 @@ _ROUTER_MODULES = [
"evidence_check_routes",
"vvt_library_routes",
"tom_mapping_routes",
"llm_audit_routes",
"assertion_routes",
]
_loaded_count = 0

View File

@@ -0,0 +1,227 @@
"""
API routes for Assertion Engine (Anti-Fake-Evidence Phase 2).
Endpoints:
- /assertions: CRUD for assertions
- /assertions/extract: Automatic extraction from entity text
- /assertions/summary: Stats (total assertions, facts, unverified)
"""
import logging
from datetime import datetime
from typing import Optional
from fastapi import APIRouter, Depends, HTTPException, Query
from sqlalchemy.orm import Session
from classroom_engine.database import get_db
from ..db.models import AssertionDB
from ..services.assertion_engine import extract_assertions
from .schemas import (
AssertionCreate,
AssertionUpdate,
AssertionResponse,
AssertionListResponse,
AssertionSummaryResponse,
AssertionExtractRequest,
)
from .audit_trail_utils import log_audit_trail, generate_id
logger = logging.getLogger(__name__)
router = APIRouter(tags=["compliance-assertions"])
def _build_assertion_response(a: AssertionDB) -> AssertionResponse:
return AssertionResponse(
id=a.id,
tenant_id=a.tenant_id,
entity_type=a.entity_type,
entity_id=a.entity_id,
sentence_text=a.sentence_text,
sentence_index=a.sentence_index,
assertion_type=a.assertion_type,
evidence_ids=a.evidence_ids or [],
confidence=a.confidence or 0.0,
normative_tier=a.normative_tier,
verified_by=a.verified_by,
verified_at=a.verified_at,
created_at=a.created_at,
updated_at=a.updated_at,
)
@router.post("/assertions", response_model=AssertionResponse)
async def create_assertion(
data: AssertionCreate,
tenant_id: Optional[str] = Query(None),
db: Session = Depends(get_db),
):
"""Create a single assertion manually."""
a = AssertionDB(
id=generate_id(),
tenant_id=tenant_id,
entity_type=data.entity_type,
entity_id=data.entity_id,
sentence_text=data.sentence_text,
assertion_type=data.assertion_type or "assertion",
evidence_ids=data.evidence_ids or [],
normative_tier=data.normative_tier,
)
db.add(a)
db.commit()
db.refresh(a)
return _build_assertion_response(a)
@router.get("/assertions", response_model=AssertionListResponse)
async def list_assertions(
entity_type: Optional[str] = Query(None),
entity_id: Optional[str] = Query(None),
assertion_type: Optional[str] = Query(None),
tenant_id: Optional[str] = Query(None),
limit: int = Query(100, ge=1, le=500),
db: Session = Depends(get_db),
):
"""List assertions with optional filters."""
query = db.query(AssertionDB)
if entity_type:
query = query.filter(AssertionDB.entity_type == entity_type)
if entity_id:
query = query.filter(AssertionDB.entity_id == entity_id)
if assertion_type:
query = query.filter(AssertionDB.assertion_type == assertion_type)
if tenant_id:
query = query.filter(AssertionDB.tenant_id == tenant_id)
total = query.count()
records = query.order_by(AssertionDB.sentence_index.asc()).limit(limit).all()
return AssertionListResponse(
assertions=[_build_assertion_response(a) for a in records],
total=total,
)
@router.get("/assertions/summary", response_model=AssertionSummaryResponse)
async def assertion_summary(
tenant_id: Optional[str] = Query(None),
entity_type: Optional[str] = Query(None),
entity_id: Optional[str] = Query(None),
db: Session = Depends(get_db),
):
"""Summary stats: total assertions, facts, rationale, unverified."""
query = db.query(AssertionDB)
if tenant_id:
query = query.filter(AssertionDB.tenant_id == tenant_id)
if entity_type:
query = query.filter(AssertionDB.entity_type == entity_type)
if entity_id:
query = query.filter(AssertionDB.entity_id == entity_id)
all_records = query.all()
total = len(all_records)
facts = sum(1 for a in all_records if a.assertion_type == "fact")
rationale = sum(1 for a in all_records if a.assertion_type == "rationale")
unverified = sum(1 for a in all_records if a.assertion_type == "assertion" and not a.verified_by)
return AssertionSummaryResponse(
total_assertions=total,
total_facts=facts,
total_rationale=rationale,
unverified_count=unverified,
)
@router.get("/assertions/{assertion_id}", response_model=AssertionResponse)
async def get_assertion(
assertion_id: str,
db: Session = Depends(get_db),
):
"""Get a single assertion by ID."""
a = db.query(AssertionDB).filter(AssertionDB.id == assertion_id).first()
if not a:
raise HTTPException(status_code=404, detail=f"Assertion {assertion_id} not found")
return _build_assertion_response(a)
@router.put("/assertions/{assertion_id}", response_model=AssertionResponse)
async def update_assertion(
assertion_id: str,
data: AssertionUpdate,
db: Session = Depends(get_db),
):
"""Update an assertion (e.g. link evidence, change type)."""
a = db.query(AssertionDB).filter(AssertionDB.id == assertion_id).first()
if not a:
raise HTTPException(status_code=404, detail=f"Assertion {assertion_id} not found")
update_fields = data.model_dump(exclude_unset=True)
for key, value in update_fields.items():
setattr(a, key, value)
a.updated_at = datetime.utcnow()
db.commit()
db.refresh(a)
return _build_assertion_response(a)
@router.post("/assertions/{assertion_id}/verify", response_model=AssertionResponse)
async def verify_assertion(
assertion_id: str,
verified_by: str = Query(...),
db: Session = Depends(get_db),
):
"""Mark an assertion as verified fact."""
a = db.query(AssertionDB).filter(AssertionDB.id == assertion_id).first()
if not a:
raise HTTPException(status_code=404, detail=f"Assertion {assertion_id} not found")
a.assertion_type = "fact"
a.verified_by = verified_by
a.verified_at = datetime.utcnow()
a.updated_at = datetime.utcnow()
db.commit()
db.refresh(a)
return _build_assertion_response(a)
@router.post("/assertions/extract", response_model=AssertionListResponse)
async def extract_assertions_endpoint(
data: AssertionExtractRequest,
tenant_id: Optional[str] = Query(None),
db: Session = Depends(get_db),
):
"""Extract assertions from free text and persist them."""
extracted = extract_assertions(
text=data.text,
entity_type=data.entity_type,
entity_id=data.entity_id,
tenant_id=tenant_id,
)
created = []
for item in extracted:
a = AssertionDB(
id=generate_id(),
tenant_id=item["tenant_id"],
entity_type=item["entity_type"],
entity_id=item["entity_id"],
sentence_text=item["sentence_text"],
sentence_index=item["sentence_index"],
assertion_type=item["assertion_type"],
evidence_ids=item["evidence_ids"],
normative_tier=item.get("normative_tier"),
confidence=item.get("confidence", 0.0),
)
db.add(a)
created.append(a)
db.commit()
for a in created:
db.refresh(a)
return AssertionListResponse(
assertions=[_build_assertion_response(a) for a in created],
total=len(created),
)

View File

@@ -0,0 +1,53 @@
"""Shared audit trail utilities.
Extracted from isms_routes.py for reuse across evidence, control,
and assertion routes.
"""
import hashlib
import uuid
from datetime import datetime
from sqlalchemy.orm import Session
from ..db.models import AuditTrailDB
def generate_id() -> str:
"""Generate a UUID string."""
return str(uuid.uuid4())
def create_signature(data: str) -> str:
"""Create SHA-256 signature."""
return hashlib.sha256(data.encode()).hexdigest()
def log_audit_trail(
db: Session,
entity_type: str,
entity_id: str,
entity_name: str,
action: str,
performed_by: str,
field_changed: str = None,
old_value: str = None,
new_value: str = None,
change_summary: str = None,
):
"""Log an entry to the audit trail."""
trail = AuditTrailDB(
id=generate_id(),
entity_type=entity_type,
entity_id=entity_id,
entity_name=entity_name,
action=action,
field_changed=field_changed,
old_value=old_value,
new_value=new_value,
change_summary=change_summary,
performed_by=performed_by,
performed_at=datetime.utcnow(),
checksum=create_signature(f"{entity_type}|{entity_id}|{action}|{performed_by}"),
)
db.add(trail)

View File

@@ -32,14 +32,21 @@ from ..db import (
ControlRepository,
EvidenceRepository,
RiskRepository,
AssertionDB,
)
from .schemas import (
DashboardResponse,
MultiDimensionalScore,
ExecutiveDashboardResponse,
TrendDataPoint,
RiskSummary,
DeadlineItem,
TeamWorkloadItem,
TraceabilityAssertion,
TraceabilityEvidence,
TraceabilityCoverage,
TraceabilityControl,
TraceabilityMatrixResponse,
)
from .tenant_utils import get_tenant_id as _get_tenant_id
from .db_utils import row_to_dict as _row_to_dict
@@ -95,6 +102,14 @@ async def get_dashboard(db: Session = Depends(get_db)):
# or compute from by_status dict
score = ctrl_stats.get("compliance_score", 0.0)
# Multi-dimensional score (Anti-Fake-Evidence)
try:
ms = ctrl_repo.get_multi_dimensional_score()
multi_score = MultiDimensionalScore(**ms)
except Exception as e:
logger.warning(f"Failed to compute multi-dimensional score: {e}")
multi_score = None
return DashboardResponse(
compliance_score=round(score, 1),
total_regulations=len(regulations),
@@ -107,6 +122,7 @@ async def get_dashboard(db: Session = Depends(get_db)):
total_risks=len(risks),
risks_by_level=risks_by_level,
recent_activity=[],
multi_score=multi_score,
)
@@ -125,11 +141,18 @@ async def get_compliance_score(db: Session = Depends(get_db)):
else:
score = 0
# Multi-dimensional score (Anti-Fake-Evidence)
try:
multi_score = ctrl_repo.get_multi_dimensional_score()
except Exception:
multi_score = None
return {
"score": round(score, 1),
"total_controls": total,
"passing_controls": passing,
"partial_controls": partial,
"multi_score": multi_score,
}
@@ -597,6 +620,158 @@ async def get_score_history(
}
# ============================================================================
# Evidence Distribution (Anti-Fake-Evidence Phase 3)
# ============================================================================
@router.get("/dashboard/evidence-distribution")
async def get_evidence_distribution(
db: Session = Depends(get_db),
tenant_id: str = Depends(_get_tenant_id),
):
"""Evidence counts by confidence level and four-eyes status."""
evidence_repo = EvidenceRepository(db)
all_evidence = evidence_repo.get_all()
by_confidence = {"E0": 0, "E1": 0, "E2": 0, "E3": 0, "E4": 0}
four_eyes_pending = 0
for e in all_evidence:
level = e.confidence_level.value if e.confidence_level else "E1"
if level in by_confidence:
by_confidence[level] += 1
if e.requires_four_eyes and e.approval_status not in ("approved", "rejected"):
four_eyes_pending += 1
return {
"by_confidence": by_confidence,
"four_eyes_pending": four_eyes_pending,
"total": len(all_evidence),
}
# ============================================================================
# Traceability Matrix (Anti-Fake-Evidence Phase 4a)
# ============================================================================
@router.get("/dashboard/traceability-matrix", response_model=TraceabilityMatrixResponse)
async def get_traceability_matrix(
db: Session = Depends(get_db),
tenant_id: str = Depends(_get_tenant_id),
):
"""
Full traceability chain: Control → Evidence → Assertions.
Loads each entity set once, builds in-memory indices, and nests
the result so the frontend can render a matrix view.
"""
ctrl_repo = ControlRepository(db)
evidence_repo = EvidenceRepository(db)
# 1. Load all three entity sets
controls = ctrl_repo.get_all()
all_evidence = evidence_repo.get_all()
all_assertions = db.query(AssertionDB).filter(
AssertionDB.entity_type == "evidence",
).all()
# 2. Index assertions by evidence_id (entity_id)
assertions_by_evidence: Dict[str, list] = {}
for a in all_assertions:
assertions_by_evidence.setdefault(a.entity_id, []).append(a)
# 3. Index evidence by control_id
evidence_by_control: Dict[str, list] = {}
for e in all_evidence:
evidence_by_control.setdefault(str(e.control_id), []).append(e)
# 4. Build nested response
result_controls: list = []
total_controls = 0
covered_controls = 0
fully_verified = 0
for ctrl in controls:
total_controls += 1
ctrl_id = str(ctrl.id)
ctrl_evidence = evidence_by_control.get(ctrl_id, [])
nested_evidence: list = []
has_evidence = len(ctrl_evidence) > 0
has_assertions = False
all_verified = True
min_conf: Optional[str] = None
conf_order = {"E0": 0, "E1": 1, "E2": 2, "E3": 3, "E4": 4}
for e in ctrl_evidence:
ev_id = str(e.id)
ev_assertions = assertions_by_evidence.get(ev_id, [])
nested_assertions = [
TraceabilityAssertion(
id=str(a.id),
sentence_text=a.sentence_text,
assertion_type=a.assertion_type or "assertion",
confidence=a.confidence or 0.0,
verified=a.verified_by is not None,
)
for a in ev_assertions
]
if nested_assertions:
has_assertions = True
for na in nested_assertions:
if not na.verified:
all_verified = False
conf = e.confidence_level.value if e.confidence_level else "E1"
if min_conf is None or conf_order.get(conf, 1) < conf_order.get(min_conf, 1):
min_conf = conf
nested_evidence.append(TraceabilityEvidence(
id=ev_id,
title=e.title,
evidence_type=e.evidence_type,
confidence_level=conf,
status=e.status.value if e.status else "valid",
assertions=nested_assertions,
))
if not has_assertions:
all_verified = False
if has_evidence:
covered_controls += 1
if has_evidence and has_assertions and all_verified:
fully_verified += 1
coverage = TraceabilityCoverage(
has_evidence=has_evidence,
has_assertions=has_assertions,
all_assertions_verified=all_verified,
min_confidence_level=min_conf,
)
result_controls.append(TraceabilityControl(
id=ctrl_id,
control_id=ctrl.control_id,
title=ctrl.title,
status=ctrl.status.value if ctrl.status else "planned",
domain=ctrl.domain.value if ctrl.domain else "unknown",
evidence=nested_evidence,
coverage=coverage,
))
summary = {
"total_controls": total_controls,
"covered_controls": covered_controls,
"fully_verified": fully_verified,
"uncovered_controls": total_controls - covered_controls,
}
return TraceabilityMatrixResponse(controls=result_controls, summary=summary)
# ============================================================================
# Reports
# ============================================================================

View File

@@ -26,17 +26,102 @@ from ..db import (
ControlRepository,
EvidenceRepository,
EvidenceStatusEnum,
EvidenceConfidenceEnum,
EvidenceTruthStatusEnum,
)
from ..db.models import EvidenceDB, ControlDB
from ..db.models import EvidenceDB, ControlDB, AuditTrailDB
from ..services.auto_risk_updater import AutoRiskUpdater
from .schemas import (
EvidenceCreate, EvidenceResponse, EvidenceListResponse,
EvidenceRejectRequest,
)
from .audit_trail_utils import log_audit_trail
logger = logging.getLogger(__name__)
router = APIRouter(tags=["compliance-evidence"])
# ============================================================================
# Anti-Fake-Evidence: Four-Eyes Domain Check
# ============================================================================
FOUR_EYES_DOMAINS = {"gov", "priv"}
def _requires_four_eyes(control_domain: str) -> bool:
"""Controls in governance/privacy domains require two independent reviewers."""
return control_domain in FOUR_EYES_DOMAINS
# ============================================================================
# Anti-Fake-Evidence: Auto-Classification Helpers
# ============================================================================
def _classify_confidence(source: Optional[str], evidence_type: Optional[str] = None, artifact_hash: Optional[str] = None) -> EvidenceConfidenceEnum:
"""Classify evidence confidence level based on source and metadata."""
if source == "ci_pipeline":
return EvidenceConfidenceEnum.E3
if source == "api" and artifact_hash:
return EvidenceConfidenceEnum.E3
if source == "api":
return EvidenceConfidenceEnum.E3
if source in ("manual", "upload"):
return EvidenceConfidenceEnum.E1
if source == "generated":
return EvidenceConfidenceEnum.E0
# Default for unknown sources
return EvidenceConfidenceEnum.E1
def _classify_truth_status(source: Optional[str]) -> EvidenceTruthStatusEnum:
"""Classify evidence truth status based on source."""
if source == "ci_pipeline":
return EvidenceTruthStatusEnum.OBSERVED
if source in ("manual", "upload"):
return EvidenceTruthStatusEnum.UPLOADED
if source == "generated":
return EvidenceTruthStatusEnum.GENERATED
if source == "api":
return EvidenceTruthStatusEnum.OBSERVED
return EvidenceTruthStatusEnum.UPLOADED
def _build_evidence_response(e: EvidenceDB) -> EvidenceResponse:
"""Build an EvidenceResponse from an EvidenceDB, including anti-fake fields."""
return EvidenceResponse(
id=e.id,
control_id=e.control_id,
evidence_type=e.evidence_type,
title=e.title,
description=e.description,
artifact_path=e.artifact_path,
artifact_url=e.artifact_url,
artifact_hash=e.artifact_hash,
file_size_bytes=e.file_size_bytes,
mime_type=e.mime_type,
valid_from=e.valid_from,
valid_until=e.valid_until,
status=e.status.value if e.status else None,
source=e.source,
ci_job_id=e.ci_job_id,
uploaded_by=e.uploaded_by,
collected_at=e.collected_at,
created_at=e.created_at,
confidence_level=e.confidence_level.value if e.confidence_level else None,
truth_status=e.truth_status.value if e.truth_status else None,
generation_mode=e.generation_mode,
may_be_used_as_evidence=e.may_be_used_as_evidence,
reviewed_by=e.reviewed_by,
reviewed_at=e.reviewed_at,
approval_status=e.approval_status,
first_reviewer=e.first_reviewer,
first_reviewed_at=e.first_reviewed_at,
second_reviewer=e.second_reviewer,
second_reviewed_at=e.second_reviewed_at,
requires_four_eyes=e.requires_four_eyes,
)
# ============================================================================
# Evidence
# ============================================================================
@@ -80,29 +165,7 @@ async def list_evidence(
offset = (page - 1) * limit
evidence = evidence[offset:offset + limit]
results = [
EvidenceResponse(
id=e.id,
control_id=e.control_id,
evidence_type=e.evidence_type,
title=e.title,
description=e.description,
artifact_path=e.artifact_path,
artifact_url=e.artifact_url,
artifact_hash=e.artifact_hash,
file_size_bytes=e.file_size_bytes,
mime_type=e.mime_type,
valid_from=e.valid_from,
valid_until=e.valid_until,
status=e.status.value if e.status else None,
source=e.source,
ci_job_id=e.ci_job_id,
uploaded_by=e.uploaded_by,
collected_at=e.collected_at,
created_at=e.created_at,
)
for e in evidence
]
results = [_build_evidence_response(e) for e in evidence]
return EvidenceListResponse(evidence=results, total=total)
@@ -121,6 +184,22 @@ async def create_evidence(
if not control:
raise HTTPException(status_code=404, detail=f"Control {evidence_data.control_id} not found")
source = evidence_data.source or "api"
confidence = _classify_confidence(source, evidence_data.evidence_type)
truth = _classify_truth_status(source)
# Allow explicit override from request
if evidence_data.confidence_level:
try:
confidence = EvidenceConfidenceEnum(evidence_data.confidence_level)
except ValueError:
pass
if evidence_data.truth_status:
try:
truth = EvidenceTruthStatusEnum(evidence_data.truth_status)
except ValueError:
pass
evidence = repo.create(
control_id=control.id,
evidence_type=evidence_data.evidence_type,
@@ -129,31 +208,34 @@ async def create_evidence(
artifact_url=evidence_data.artifact_url,
valid_from=evidence_data.valid_from,
valid_until=evidence_data.valid_until,
source=evidence_data.source or "api",
source=source,
ci_job_id=evidence_data.ci_job_id,
)
# Set anti-fake-evidence fields
evidence.confidence_level = confidence
evidence.truth_status = truth
# Generated evidence should not be used as evidence by default
if truth == EvidenceTruthStatusEnum.GENERATED:
evidence.may_be_used_as_evidence = False
# Four-Eyes: check if the linked control's domain requires it
control_domain = control.domain.value if control.domain else ""
if _requires_four_eyes(control_domain):
evidence.requires_four_eyes = True
evidence.approval_status = "pending_first"
db.commit()
# Audit trail
log_audit_trail(
db, "evidence", evidence.id, evidence.title, "create",
performed_by=evidence_data.source or "api",
change_summary=f"Evidence created with confidence={confidence.value}, truth={truth.value}",
)
db.commit()
return EvidenceResponse(
id=evidence.id,
control_id=evidence.control_id,
evidence_type=evidence.evidence_type,
title=evidence.title,
description=evidence.description,
artifact_path=evidence.artifact_path,
artifact_url=evidence.artifact_url,
artifact_hash=evidence.artifact_hash,
file_size_bytes=evidence.file_size_bytes,
mime_type=evidence.mime_type,
valid_from=evidence.valid_from,
valid_until=evidence.valid_until,
status=evidence.status.value if evidence.status else None,
source=evidence.source,
ci_job_id=evidence.ci_job_id,
uploaded_by=evidence.uploaded_by,
collected_at=evidence.collected_at,
created_at=evidence.created_at,
)
return _build_evidence_response(evidence)
@router.delete("/evidence/{evidence_id}")
@@ -223,28 +305,20 @@ async def upload_evidence(
mime_type=file.content_type,
source="upload",
)
# Upload evidence → E1 + uploaded
evidence.confidence_level = EvidenceConfidenceEnum.E1
evidence.truth_status = EvidenceTruthStatusEnum.UPLOADED
# Four-Eyes: check if the linked control's domain requires it
control_domain = control.domain.value if control.domain else ""
if _requires_four_eyes(control_domain):
evidence.requires_four_eyes = True
evidence.approval_status = "pending_first"
db.commit()
return EvidenceResponse(
id=evidence.id,
control_id=evidence.control_id,
evidence_type=evidence.evidence_type,
title=evidence.title,
description=evidence.description,
artifact_path=evidence.artifact_path,
artifact_url=evidence.artifact_url,
artifact_hash=evidence.artifact_hash,
file_size_bytes=evidence.file_size_bytes,
mime_type=evidence.mime_type,
valid_from=evidence.valid_from,
valid_until=evidence.valid_until,
status=evidence.status.value if evidence.status else None,
source=evidence.source,
ci_job_id=evidence.ci_job_id,
uploaded_by=evidence.uploaded_by,
collected_at=evidence.collected_at,
created_at=evidence.created_at,
)
return _build_evidence_response(evidence)
# ============================================================================
@@ -357,7 +431,7 @@ def _store_evidence(
with open(file_path, "w") as f:
json.dump(report_data or {}, f, indent=2)
# Create evidence record
# Create evidence record with anti-fake-evidence classification
evidence = EvidenceDB(
id=str(uuid_module.uuid4()),
control_id=control_db_id,
@@ -373,6 +447,10 @@ def _store_evidence(
valid_from=datetime.utcnow(),
valid_until=datetime.utcnow() + timedelta(days=90),
status=EvidenceStatusEnum(parsed["evidence_status"]),
# CI pipeline evidence → E3 observed (system-observed, hash-verified)
confidence_level=EvidenceConfidenceEnum.E3,
truth_status=EvidenceTruthStatusEnum.OBSERVED,
may_be_used_as_evidence=True,
)
db.add(evidence)
db.commit()
@@ -639,3 +717,169 @@ async def get_ci_evidence_status(
"total_evidence": len(evidence_list),
"controls": result,
}
# ============================================================================
# Evidence Review (Anti-Fake-Evidence)
# ============================================================================
from pydantic import BaseModel as _BaseModel
class _EvidenceReviewRequest(_BaseModel):
confidence_level: Optional[str] = None
truth_status: Optional[str] = None
reviewed_by: str
@router.patch("/evidence/{evidence_id}/review", response_model=EvidenceResponse)
async def review_evidence(
evidence_id: str,
review: _EvidenceReviewRequest,
db: Session = Depends(get_db),
):
"""
Review evidence: upgrade confidence level and/or change truth status.
For Four-Eyes evidence, the first reviewer sets first_reviewer and
approval_status='first_approved'. A second (different) reviewer then
sets second_reviewer and approval_status='approved'.
"""
evidence = db.query(EvidenceDB).filter(EvidenceDB.id == evidence_id).first()
if not evidence:
raise HTTPException(status_code=404, detail=f"Evidence {evidence_id} not found")
old_confidence = evidence.confidence_level.value if evidence.confidence_level else None
old_truth = evidence.truth_status.value if evidence.truth_status else None
if review.confidence_level:
try:
evidence.confidence_level = EvidenceConfidenceEnum(review.confidence_level)
except ValueError:
raise HTTPException(status_code=400, detail=f"Invalid confidence_level: {review.confidence_level}")
if review.truth_status:
try:
evidence.truth_status = EvidenceTruthStatusEnum(review.truth_status)
except ValueError:
raise HTTPException(status_code=400, detail=f"Invalid truth_status: {review.truth_status}")
# Four-Eyes branching
if evidence.requires_four_eyes:
status = evidence.approval_status or "none"
if status in ("none", "pending_first"):
evidence.first_reviewer = review.reviewed_by
evidence.first_reviewed_at = datetime.utcnow()
evidence.approval_status = "first_approved"
elif status == "first_approved":
if review.reviewed_by == evidence.first_reviewer:
raise HTTPException(
status_code=400,
detail="Four-Eyes: second reviewer must be different from first reviewer",
)
evidence.second_reviewer = review.reviewed_by
evidence.second_reviewed_at = datetime.utcnow()
evidence.approval_status = "approved"
elif status == "approved":
raise HTTPException(status_code=400, detail="Evidence already approved")
elif status == "rejected":
raise HTTPException(status_code=400, detail="Evidence was rejected — create new evidence instead")
evidence.reviewed_by = review.reviewed_by
evidence.reviewed_at = datetime.utcnow()
db.commit()
# Audit trail
new_confidence = evidence.confidence_level.value if evidence.confidence_level else None
if old_confidence != new_confidence:
log_audit_trail(
db, "evidence", evidence_id, evidence.title, "review",
performed_by=review.reviewed_by,
field_changed="confidence_level",
old_value=old_confidence,
new_value=new_confidence,
)
new_truth = evidence.truth_status.value if evidence.truth_status else None
if old_truth != new_truth:
log_audit_trail(
db, "evidence", evidence_id, evidence.title, "review",
performed_by=review.reviewed_by,
field_changed="truth_status",
old_value=old_truth,
new_value=new_truth,
)
db.commit()
db.refresh(evidence)
return _build_evidence_response(evidence)
@router.patch("/evidence/{evidence_id}/reject", response_model=EvidenceResponse)
async def reject_evidence(
evidence_id: str,
body: EvidenceRejectRequest,
db: Session = Depends(get_db),
):
"""Reject evidence (sets approval_status='rejected')."""
evidence = db.query(EvidenceDB).filter(EvidenceDB.id == evidence_id).first()
if not evidence:
raise HTTPException(status_code=404, detail=f"Evidence {evidence_id} not found")
evidence.approval_status = "rejected"
evidence.reviewed_by = body.reviewed_by
evidence.reviewed_at = datetime.utcnow()
db.commit()
log_audit_trail(
db, "evidence", evidence_id, evidence.title, "reject",
performed_by=body.reviewed_by,
change_summary=body.rejection_reason or "Evidence rejected",
)
db.commit()
db.refresh(evidence)
return _build_evidence_response(evidence)
# ============================================================================
# Audit Trail Query
# ============================================================================
@router.get("/audit-trail")
async def get_audit_trail(
entity_type: Optional[str] = Query(None),
entity_id: Optional[str] = Query(None),
action: Optional[str] = Query(None),
limit: int = Query(50, ge=1, le=200),
db: Session = Depends(get_db),
):
"""Query audit trail entries for an entity."""
query = db.query(AuditTrailDB)
if entity_type:
query = query.filter(AuditTrailDB.entity_type == entity_type)
if entity_id:
query = query.filter(AuditTrailDB.entity_id == entity_id)
if action:
query = query.filter(AuditTrailDB.action == action)
records = query.order_by(AuditTrailDB.performed_at.desc()).limit(limit).all()
return {
"entries": [
{
"id": r.id,
"entity_type": r.entity_type,
"entity_id": r.entity_id,
"entity_name": r.entity_name,
"action": r.action,
"field_changed": r.field_changed,
"old_value": r.old_value,
"new_value": r.new_value,
"change_summary": r.change_summary,
"performed_by": r.performed_by,
"performed_at": r.performed_at.isoformat() if r.performed_at else None,
"checksum": r.checksum,
}
for r in records
],
"total": len(records),
}

View File

@@ -73,39 +73,8 @@ def generate_id() -> str:
return str(uuid.uuid4())
def create_signature(data: str) -> str:
"""Create SHA-256 signature."""
return hashlib.sha256(data.encode()).hexdigest()
def log_audit_trail(
db: Session,
entity_type: str,
entity_id: str,
entity_name: str,
action: str,
performed_by: str,
field_changed: str = None,
old_value: str = None,
new_value: str = None,
change_summary: str = None
):
"""Log an entry to the audit trail."""
trail = AuditTrailDB(
id=generate_id(),
entity_type=entity_type,
entity_id=entity_id,
entity_name=entity_name,
action=action,
field_changed=field_changed,
old_value=old_value,
new_value=new_value,
change_summary=change_summary,
performed_by=performed_by,
performed_at=datetime.utcnow(),
checksum=create_signature(f"{entity_type}|{entity_id}|{action}|{performed_by}")
)
db.add(trail)
# Shared audit trail utilities — canonical implementation in audit_trail_utils.py
from .audit_trail_utils import log_audit_trail, create_signature # noqa: E402
# =============================================================================

View File

@@ -0,0 +1,162 @@
"""
FastAPI routes for LLM Generation Audit Trail.
Endpoints:
- POST /llm-audit: Record an LLM generation event
- GET /llm-audit: List audit records with filters
"""
import logging
import uuid as uuid_module
from datetime import datetime
from typing import Optional
from fastapi import APIRouter, Depends, Query
from pydantic import BaseModel
from sqlalchemy.orm import Session
from classroom_engine.database import get_db
from ..db.models import LLMGenerationAuditDB
logger = logging.getLogger(__name__)
router = APIRouter(tags=["compliance-llm-audit"])
# ============================================================================
# Schemas
# ============================================================================
class LLMAuditCreate(BaseModel):
entity_type: str
entity_id: Optional[str] = None
generation_mode: str
truth_status: str = "generated"
may_be_used_as_evidence: bool = False
llm_model: Optional[str] = None
llm_provider: Optional[str] = None
prompt_hash: Optional[str] = None
input_summary: Optional[str] = None
output_summary: Optional[str] = None
metadata: Optional[dict] = None
tenant_id: Optional[str] = None
class LLMAuditResponse(BaseModel):
id: str
tenant_id: Optional[str] = None
entity_type: str
entity_id: Optional[str] = None
generation_mode: str
truth_status: str
may_be_used_as_evidence: bool
llm_model: Optional[str] = None
llm_provider: Optional[str] = None
prompt_hash: Optional[str] = None
input_summary: Optional[str] = None
output_summary: Optional[str] = None
metadata: Optional[dict] = None
created_at: datetime
class Config:
from_attributes = True
# ============================================================================
# Routes
# ============================================================================
@router.post("/llm-audit", response_model=LLMAuditResponse)
async def create_llm_audit(
data: LLMAuditCreate,
db: Session = Depends(get_db),
):
"""Record an LLM generation event for audit trail."""
from ..db.models import EvidenceTruthStatusEnum
# Validate truth_status
try:
truth_enum = EvidenceTruthStatusEnum(data.truth_status)
except ValueError:
truth_enum = EvidenceTruthStatusEnum.GENERATED
record = LLMGenerationAuditDB(
id=str(uuid_module.uuid4()),
tenant_id=data.tenant_id,
entity_type=data.entity_type,
entity_id=data.entity_id,
generation_mode=data.generation_mode,
truth_status=truth_enum,
may_be_used_as_evidence=data.may_be_used_as_evidence,
llm_model=data.llm_model,
llm_provider=data.llm_provider,
prompt_hash=data.prompt_hash,
input_summary=data.input_summary[:500] if data.input_summary else None,
output_summary=data.output_summary[:500] if data.output_summary else None,
extra_metadata=data.metadata or {},
)
db.add(record)
db.commit()
db.refresh(record)
return LLMAuditResponse(
id=record.id,
tenant_id=record.tenant_id,
entity_type=record.entity_type,
entity_id=record.entity_id,
generation_mode=record.generation_mode,
truth_status=record.truth_status.value if record.truth_status else "generated",
may_be_used_as_evidence=record.may_be_used_as_evidence,
llm_model=record.llm_model,
llm_provider=record.llm_provider,
prompt_hash=record.prompt_hash,
input_summary=record.input_summary,
output_summary=record.output_summary,
metadata=record.extra_metadata,
created_at=record.created_at,
)
@router.get("/llm-audit")
async def list_llm_audit(
entity_type: Optional[str] = Query(None),
entity_id: Optional[str] = Query(None),
page: int = Query(1, ge=1),
limit: int = Query(50, ge=1, le=200),
db: Session = Depends(get_db),
):
"""List LLM generation audit records with optional filters."""
query = db.query(LLMGenerationAuditDB)
if entity_type:
query = query.filter(LLMGenerationAuditDB.entity_type == entity_type)
if entity_id:
query = query.filter(LLMGenerationAuditDB.entity_id == entity_id)
total = query.count()
offset = (page - 1) * limit
records = query.order_by(LLMGenerationAuditDB.created_at.desc()).offset(offset).limit(limit).all()
return {
"records": [
LLMAuditResponse(
id=r.id,
tenant_id=r.tenant_id,
entity_type=r.entity_type,
entity_id=r.entity_id,
generation_mode=r.generation_mode,
truth_status=r.truth_status.value if r.truth_status else "generated",
may_be_used_as_evidence=r.may_be_used_as_evidence,
llm_model=r.llm_model,
llm_provider=r.llm_provider,
prompt_hash=r.prompt_hash,
input_summary=r.input_summary,
output_summary=r.output_summary,
metadata=r.extra_metadata,
created_at=r.created_at,
)
for r in records
],
"total": total,
"page": page,
"limit": limit,
}

View File

@@ -25,6 +25,7 @@ from sqlalchemy.orm import Session
from classroom_engine.database import get_db
from .audit_trail_utils import log_audit_trail
from ..db import (
RegulationRepository,
RequirementRepository,
@@ -595,6 +596,7 @@ async def get_control(control_id: str, db: Session = Depends(get_db)):
review_frequency_days=control.review_frequency_days,
status=control.status.value if control.status else None,
status_notes=control.status_notes,
status_justification=control.status_justification,
last_reviewed_at=control.last_reviewed_at,
next_review_at=control.next_review_at,
created_at=control.created_at,
@@ -617,16 +619,52 @@ async def update_control(
update_data = update.model_dump(exclude_unset=True)
# Convert status string to enum
# Convert status string to enum and validate transition
if "status" in update_data:
try:
update_data["status"] = ControlStatusEnum(update_data["status"])
new_status_enum = ControlStatusEnum(update_data["status"])
except ValueError:
raise HTTPException(status_code=400, detail=f"Invalid status: {update_data['status']}")
# Validate status transition (Anti-Fake-Evidence)
from ..services.control_status_machine import validate_transition
current_status = control.status.value if control.status else "planned"
evidence_list = db.query(EvidenceDB).filter(EvidenceDB.control_id == control.id).all()
allowed, violations = validate_transition(
current_status=current_status,
new_status=update_data["status"],
evidence_list=evidence_list,
status_justification=update_data.get("status_justification") or update_data.get("status_notes"),
)
if not allowed:
raise HTTPException(
status_code=409,
detail={
"error": "Status transition not allowed",
"current_status": current_status,
"requested_status": update_data["status"],
"violations": violations,
}
)
update_data["status"] = new_status_enum
updated = repo.update(control.id, **update_data)
db.commit()
# Audit trail for status changes
new_status = updated.status.value if updated.status else None
if "status" in update.model_dump(exclude_unset=True) and current_status != new_status:
log_audit_trail(
db, "control", control.id, updated.control_id or updated.title,
"status_change",
performed_by=update.owner or "system",
field_changed="status",
old_value=current_status,
new_value=new_status,
)
db.commit()
return ControlResponse(
id=updated.id,
control_id=updated.control_id,
@@ -645,6 +683,7 @@ async def update_control(
review_frequency_days=updated.review_frequency_days,
status=updated.status.value if updated.status else None,
status_notes=updated.status_notes,
status_justification=updated.status_justification,
last_reviewed_at=updated.last_reviewed_at,
next_review_at=updated.next_review_at,
created_at=updated.created_at,
@@ -690,6 +729,7 @@ async def review_control(
review_frequency_days=updated.review_frequency_days,
status=updated.status.value if updated.status else None,
status_notes=updated.status_notes,
status_justification=updated.status_justification,
last_reviewed_at=updated.last_reviewed_at,
next_review_at=updated.next_review_at,
created_at=updated.created_at,

View File

@@ -43,6 +43,7 @@ class ControlStatus(str):
FAIL = "fail"
NOT_APPLICABLE = "n/a"
PLANNED = "planned"
IN_PROGRESS = "in_progress"
class RiskLevel(str):
@@ -209,12 +210,14 @@ class ControlUpdate(BaseModel):
owner: Optional[str] = None
status: Optional[str] = None
status_notes: Optional[str] = None
status_justification: Optional[str] = None
class ControlResponse(ControlBase):
id: str
status: str
status_notes: Optional[str] = None
status_justification: Optional[str] = None
last_reviewed_at: Optional[datetime] = None
next_review_at: Optional[datetime] = None
created_at: datetime
@@ -291,7 +294,8 @@ class EvidenceBase(BaseModel):
class EvidenceCreate(EvidenceBase):
pass
confidence_level: Optional[str] = None
truth_status: Optional[str] = None
class EvidenceResponse(EvidenceBase):
@@ -304,6 +308,20 @@ class EvidenceResponse(EvidenceBase):
uploaded_by: Optional[str] = None
collected_at: datetime
created_at: datetime
# Anti-Fake-Evidence fields
confidence_level: Optional[str] = None
truth_status: Optional[str] = None
generation_mode: Optional[str] = None
may_be_used_as_evidence: Optional[bool] = None
reviewed_by: Optional[str] = None
reviewed_at: Optional[datetime] = None
# Anti-Fake-Evidence Phase 2: Four-Eyes
approval_status: Optional[str] = None
first_reviewer: Optional[str] = None
first_reviewed_at: Optional[datetime] = None
second_reviewer: Optional[str] = None
second_reviewed_at: Optional[datetime] = None
requires_four_eyes: Optional[bool] = None
class Config:
from_attributes = True
@@ -435,6 +453,25 @@ class AISystemListResponse(BaseModel):
# Dashboard & Export Schemas
# ============================================================================
class MultiDimensionalScore(BaseModel):
"""Multi-dimensional compliance score (Anti-Fake-Evidence)."""
requirement_coverage: float = 0.0 # % requirements with linked control
evidence_strength: float = 0.0 # Weighted avg of evidence confidence (E0=0..E4=1)
validation_quality: float = 0.0 # % evidence with truth_status >= validated_internal
evidence_freshness: float = 0.0 # % evidence not expired + reviewed < 90 days
control_effectiveness: float = 0.0 # Existing formula (pass + partial*0.5)
overall_readiness: float = 0.0 # Weighted composite
hard_blocks: List[str] = [] # Blocking issues preventing audit-readiness
class StatusTransitionError(BaseModel):
"""Error detail for forbidden control status transitions."""
allowed: bool = False
current_status: str
requested_status: str
violations: List[str] = []
class DashboardResponse(BaseModel):
compliance_score: float
total_regulations: int
@@ -447,6 +484,7 @@ class DashboardResponse(BaseModel):
total_risks: int
risks_by_level: Dict[str, int]
recent_activity: List[Dict[str, Any]]
multi_score: Optional[MultiDimensionalScore] = None
class ExportRequest(BaseModel):
@@ -1939,3 +1977,111 @@ class TOMStatsResponse(BaseModel):
implemented: int = 0
partial: int = 0
not_implemented: int = 0
# ============================================================================
# Assertion Schemas (Anti-Fake-Evidence Phase 2)
# ============================================================================
class AssertionCreate(BaseModel):
entity_type: str
entity_id: str
sentence_text: str
assertion_type: Optional[str] = "assertion"
evidence_ids: Optional[List[str]] = []
normative_tier: Optional[str] = None
class AssertionUpdate(BaseModel):
sentence_text: Optional[str] = None
assertion_type: Optional[str] = None
evidence_ids: Optional[List[str]] = None
normative_tier: Optional[str] = None
confidence: Optional[float] = None
class AssertionResponse(BaseModel):
id: str
tenant_id: Optional[str] = None
entity_type: str
entity_id: str
sentence_text: str
sentence_index: int = 0
assertion_type: str = "assertion"
evidence_ids: Optional[List[str]] = []
confidence: float = 0.0
normative_tier: Optional[str] = None
verified_by: Optional[str] = None
verified_at: Optional[datetime] = None
created_at: Optional[datetime] = None
updated_at: Optional[datetime] = None
class Config:
from_attributes = True
class AssertionListResponse(BaseModel):
assertions: List[AssertionResponse]
total: int
class AssertionSummaryResponse(BaseModel):
total_assertions: int = 0
total_facts: int = 0
total_rationale: int = 0
unverified_count: int = 0
class AssertionExtractRequest(BaseModel):
entity_type: str
entity_id: str
text: str
class EvidenceRejectRequest(BaseModel):
reviewed_by: str
rejection_reason: Optional[str] = None
# ============================================================================
# Traceability Matrix (Anti-Fake-Evidence Phase 4a)
# ============================================================================
class TraceabilityAssertion(BaseModel):
"""Single assertion linked to an evidence item."""
id: str
sentence_text: str
assertion_type: str = "assertion"
confidence: float = 0.0
verified: bool = False
class TraceabilityEvidence(BaseModel):
"""Evidence item with nested assertions."""
id: str
title: str
evidence_type: str
confidence_level: str = "E1"
status: str = "valid"
assertions: List[TraceabilityAssertion] = []
class TraceabilityCoverage(BaseModel):
"""Coverage flags for a single control."""
has_evidence: bool = False
has_assertions: bool = False
all_assertions_verified: bool = False
min_confidence_level: Optional[str] = None
class TraceabilityControl(BaseModel):
"""Control with nested evidence and coverage info."""
id: str
control_id: str
title: str
status: str = "planned"
domain: str = "unknown"
evidence: List[TraceabilityEvidence] = []
coverage: TraceabilityCoverage = TraceabilityCoverage()
class TraceabilityMatrixResponse(BaseModel):
"""Full traceability matrix: Controls → Evidence → Assertions."""
controls: List[TraceabilityControl]
summary: Dict[str, int]