feat: Anti-Fake-Evidence System (Phase 1-4b)

Implement full evidence integrity pipeline to prevent compliance theater:
- Confidence levels (E0-E4), truth status tracking, assertion engine
- Four-Eyes approval workflow, audit trail, reject endpoint
- Evidence distribution dashboard, LLM audit routes
- Traceability matrix (backend endpoint + Compliance Hub UI tab)
- Anti-fake badges, control status machine, normative patterns
- 2 migrations, 4 test suites, MkDocs documentation

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
Benjamin Admin
2026-03-23 17:15:45 +01:00
parent 48ca0a6bef
commit e6201d5239
36 changed files with 5627 additions and 189 deletions

View File

@@ -61,6 +61,8 @@ _ROUTER_MODULES = [
"evidence_check_routes",
"vvt_library_routes",
"tom_mapping_routes",
"llm_audit_routes",
"assertion_routes",
]
_loaded_count = 0

View File

@@ -0,0 +1,227 @@
"""
API routes for Assertion Engine (Anti-Fake-Evidence Phase 2).
Endpoints:
- /assertions: CRUD for assertions
- /assertions/extract: Automatic extraction from entity text
- /assertions/summary: Stats (total assertions, facts, unverified)
"""
import logging
from datetime import datetime
from typing import Optional
from fastapi import APIRouter, Depends, HTTPException, Query
from sqlalchemy.orm import Session
from classroom_engine.database import get_db
from ..db.models import AssertionDB
from ..services.assertion_engine import extract_assertions
from .schemas import (
AssertionCreate,
AssertionUpdate,
AssertionResponse,
AssertionListResponse,
AssertionSummaryResponse,
AssertionExtractRequest,
)
from .audit_trail_utils import log_audit_trail, generate_id
logger = logging.getLogger(__name__)
router = APIRouter(tags=["compliance-assertions"])
def _build_assertion_response(a: AssertionDB) -> AssertionResponse:
return AssertionResponse(
id=a.id,
tenant_id=a.tenant_id,
entity_type=a.entity_type,
entity_id=a.entity_id,
sentence_text=a.sentence_text,
sentence_index=a.sentence_index,
assertion_type=a.assertion_type,
evidence_ids=a.evidence_ids or [],
confidence=a.confidence or 0.0,
normative_tier=a.normative_tier,
verified_by=a.verified_by,
verified_at=a.verified_at,
created_at=a.created_at,
updated_at=a.updated_at,
)
@router.post("/assertions", response_model=AssertionResponse)
async def create_assertion(
data: AssertionCreate,
tenant_id: Optional[str] = Query(None),
db: Session = Depends(get_db),
):
"""Create a single assertion manually."""
a = AssertionDB(
id=generate_id(),
tenant_id=tenant_id,
entity_type=data.entity_type,
entity_id=data.entity_id,
sentence_text=data.sentence_text,
assertion_type=data.assertion_type or "assertion",
evidence_ids=data.evidence_ids or [],
normative_tier=data.normative_tier,
)
db.add(a)
db.commit()
db.refresh(a)
return _build_assertion_response(a)
@router.get("/assertions", response_model=AssertionListResponse)
async def list_assertions(
entity_type: Optional[str] = Query(None),
entity_id: Optional[str] = Query(None),
assertion_type: Optional[str] = Query(None),
tenant_id: Optional[str] = Query(None),
limit: int = Query(100, ge=1, le=500),
db: Session = Depends(get_db),
):
"""List assertions with optional filters."""
query = db.query(AssertionDB)
if entity_type:
query = query.filter(AssertionDB.entity_type == entity_type)
if entity_id:
query = query.filter(AssertionDB.entity_id == entity_id)
if assertion_type:
query = query.filter(AssertionDB.assertion_type == assertion_type)
if tenant_id:
query = query.filter(AssertionDB.tenant_id == tenant_id)
total = query.count()
records = query.order_by(AssertionDB.sentence_index.asc()).limit(limit).all()
return AssertionListResponse(
assertions=[_build_assertion_response(a) for a in records],
total=total,
)
@router.get("/assertions/summary", response_model=AssertionSummaryResponse)
async def assertion_summary(
tenant_id: Optional[str] = Query(None),
entity_type: Optional[str] = Query(None),
entity_id: Optional[str] = Query(None),
db: Session = Depends(get_db),
):
"""Summary stats: total assertions, facts, rationale, unverified."""
query = db.query(AssertionDB)
if tenant_id:
query = query.filter(AssertionDB.tenant_id == tenant_id)
if entity_type:
query = query.filter(AssertionDB.entity_type == entity_type)
if entity_id:
query = query.filter(AssertionDB.entity_id == entity_id)
all_records = query.all()
total = len(all_records)
facts = sum(1 for a in all_records if a.assertion_type == "fact")
rationale = sum(1 for a in all_records if a.assertion_type == "rationale")
unverified = sum(1 for a in all_records if a.assertion_type == "assertion" and not a.verified_by)
return AssertionSummaryResponse(
total_assertions=total,
total_facts=facts,
total_rationale=rationale,
unverified_count=unverified,
)
@router.get("/assertions/{assertion_id}", response_model=AssertionResponse)
async def get_assertion(
assertion_id: str,
db: Session = Depends(get_db),
):
"""Get a single assertion by ID."""
a = db.query(AssertionDB).filter(AssertionDB.id == assertion_id).first()
if not a:
raise HTTPException(status_code=404, detail=f"Assertion {assertion_id} not found")
return _build_assertion_response(a)
@router.put("/assertions/{assertion_id}", response_model=AssertionResponse)
async def update_assertion(
assertion_id: str,
data: AssertionUpdate,
db: Session = Depends(get_db),
):
"""Update an assertion (e.g. link evidence, change type)."""
a = db.query(AssertionDB).filter(AssertionDB.id == assertion_id).first()
if not a:
raise HTTPException(status_code=404, detail=f"Assertion {assertion_id} not found")
update_fields = data.model_dump(exclude_unset=True)
for key, value in update_fields.items():
setattr(a, key, value)
a.updated_at = datetime.utcnow()
db.commit()
db.refresh(a)
return _build_assertion_response(a)
@router.post("/assertions/{assertion_id}/verify", response_model=AssertionResponse)
async def verify_assertion(
assertion_id: str,
verified_by: str = Query(...),
db: Session = Depends(get_db),
):
"""Mark an assertion as verified fact."""
a = db.query(AssertionDB).filter(AssertionDB.id == assertion_id).first()
if not a:
raise HTTPException(status_code=404, detail=f"Assertion {assertion_id} not found")
a.assertion_type = "fact"
a.verified_by = verified_by
a.verified_at = datetime.utcnow()
a.updated_at = datetime.utcnow()
db.commit()
db.refresh(a)
return _build_assertion_response(a)
@router.post("/assertions/extract", response_model=AssertionListResponse)
async def extract_assertions_endpoint(
data: AssertionExtractRequest,
tenant_id: Optional[str] = Query(None),
db: Session = Depends(get_db),
):
"""Extract assertions from free text and persist them."""
extracted = extract_assertions(
text=data.text,
entity_type=data.entity_type,
entity_id=data.entity_id,
tenant_id=tenant_id,
)
created = []
for item in extracted:
a = AssertionDB(
id=generate_id(),
tenant_id=item["tenant_id"],
entity_type=item["entity_type"],
entity_id=item["entity_id"],
sentence_text=item["sentence_text"],
sentence_index=item["sentence_index"],
assertion_type=item["assertion_type"],
evidence_ids=item["evidence_ids"],
normative_tier=item.get("normative_tier"),
confidence=item.get("confidence", 0.0),
)
db.add(a)
created.append(a)
db.commit()
for a in created:
db.refresh(a)
return AssertionListResponse(
assertions=[_build_assertion_response(a) for a in created],
total=len(created),
)

View File

@@ -0,0 +1,53 @@
"""Shared audit trail utilities.
Extracted from isms_routes.py for reuse across evidence, control,
and assertion routes.
"""
import hashlib
import uuid
from datetime import datetime
from sqlalchemy.orm import Session
from ..db.models import AuditTrailDB
def generate_id() -> str:
"""Generate a UUID string."""
return str(uuid.uuid4())
def create_signature(data: str) -> str:
"""Create SHA-256 signature."""
return hashlib.sha256(data.encode()).hexdigest()
def log_audit_trail(
db: Session,
entity_type: str,
entity_id: str,
entity_name: str,
action: str,
performed_by: str,
field_changed: str = None,
old_value: str = None,
new_value: str = None,
change_summary: str = None,
):
"""Log an entry to the audit trail."""
trail = AuditTrailDB(
id=generate_id(),
entity_type=entity_type,
entity_id=entity_id,
entity_name=entity_name,
action=action,
field_changed=field_changed,
old_value=old_value,
new_value=new_value,
change_summary=change_summary,
performed_by=performed_by,
performed_at=datetime.utcnow(),
checksum=create_signature(f"{entity_type}|{entity_id}|{action}|{performed_by}"),
)
db.add(trail)

View File

@@ -32,14 +32,21 @@ from ..db import (
ControlRepository,
EvidenceRepository,
RiskRepository,
AssertionDB,
)
from .schemas import (
DashboardResponse,
MultiDimensionalScore,
ExecutiveDashboardResponse,
TrendDataPoint,
RiskSummary,
DeadlineItem,
TeamWorkloadItem,
TraceabilityAssertion,
TraceabilityEvidence,
TraceabilityCoverage,
TraceabilityControl,
TraceabilityMatrixResponse,
)
from .tenant_utils import get_tenant_id as _get_tenant_id
from .db_utils import row_to_dict as _row_to_dict
@@ -95,6 +102,14 @@ async def get_dashboard(db: Session = Depends(get_db)):
# or compute from by_status dict
score = ctrl_stats.get("compliance_score", 0.0)
# Multi-dimensional score (Anti-Fake-Evidence)
try:
ms = ctrl_repo.get_multi_dimensional_score()
multi_score = MultiDimensionalScore(**ms)
except Exception as e:
logger.warning(f"Failed to compute multi-dimensional score: {e}")
multi_score = None
return DashboardResponse(
compliance_score=round(score, 1),
total_regulations=len(regulations),
@@ -107,6 +122,7 @@ async def get_dashboard(db: Session = Depends(get_db)):
total_risks=len(risks),
risks_by_level=risks_by_level,
recent_activity=[],
multi_score=multi_score,
)
@@ -125,11 +141,18 @@ async def get_compliance_score(db: Session = Depends(get_db)):
else:
score = 0
# Multi-dimensional score (Anti-Fake-Evidence)
try:
multi_score = ctrl_repo.get_multi_dimensional_score()
except Exception:
multi_score = None
return {
"score": round(score, 1),
"total_controls": total,
"passing_controls": passing,
"partial_controls": partial,
"multi_score": multi_score,
}
@@ -597,6 +620,158 @@ async def get_score_history(
}
# ============================================================================
# Evidence Distribution (Anti-Fake-Evidence Phase 3)
# ============================================================================
@router.get("/dashboard/evidence-distribution")
async def get_evidence_distribution(
db: Session = Depends(get_db),
tenant_id: str = Depends(_get_tenant_id),
):
"""Evidence counts by confidence level and four-eyes status."""
evidence_repo = EvidenceRepository(db)
all_evidence = evidence_repo.get_all()
by_confidence = {"E0": 0, "E1": 0, "E2": 0, "E3": 0, "E4": 0}
four_eyes_pending = 0
for e in all_evidence:
level = e.confidence_level.value if e.confidence_level else "E1"
if level in by_confidence:
by_confidence[level] += 1
if e.requires_four_eyes and e.approval_status not in ("approved", "rejected"):
four_eyes_pending += 1
return {
"by_confidence": by_confidence,
"four_eyes_pending": four_eyes_pending,
"total": len(all_evidence),
}
# ============================================================================
# Traceability Matrix (Anti-Fake-Evidence Phase 4a)
# ============================================================================
@router.get("/dashboard/traceability-matrix", response_model=TraceabilityMatrixResponse)
async def get_traceability_matrix(
db: Session = Depends(get_db),
tenant_id: str = Depends(_get_tenant_id),
):
"""
Full traceability chain: Control → Evidence → Assertions.
Loads each entity set once, builds in-memory indices, and nests
the result so the frontend can render a matrix view.
"""
ctrl_repo = ControlRepository(db)
evidence_repo = EvidenceRepository(db)
# 1. Load all three entity sets
controls = ctrl_repo.get_all()
all_evidence = evidence_repo.get_all()
all_assertions = db.query(AssertionDB).filter(
AssertionDB.entity_type == "evidence",
).all()
# 2. Index assertions by evidence_id (entity_id)
assertions_by_evidence: Dict[str, list] = {}
for a in all_assertions:
assertions_by_evidence.setdefault(a.entity_id, []).append(a)
# 3. Index evidence by control_id
evidence_by_control: Dict[str, list] = {}
for e in all_evidence:
evidence_by_control.setdefault(str(e.control_id), []).append(e)
# 4. Build nested response
result_controls: list = []
total_controls = 0
covered_controls = 0
fully_verified = 0
for ctrl in controls:
total_controls += 1
ctrl_id = str(ctrl.id)
ctrl_evidence = evidence_by_control.get(ctrl_id, [])
nested_evidence: list = []
has_evidence = len(ctrl_evidence) > 0
has_assertions = False
all_verified = True
min_conf: Optional[str] = None
conf_order = {"E0": 0, "E1": 1, "E2": 2, "E3": 3, "E4": 4}
for e in ctrl_evidence:
ev_id = str(e.id)
ev_assertions = assertions_by_evidence.get(ev_id, [])
nested_assertions = [
TraceabilityAssertion(
id=str(a.id),
sentence_text=a.sentence_text,
assertion_type=a.assertion_type or "assertion",
confidence=a.confidence or 0.0,
verified=a.verified_by is not None,
)
for a in ev_assertions
]
if nested_assertions:
has_assertions = True
for na in nested_assertions:
if not na.verified:
all_verified = False
conf = e.confidence_level.value if e.confidence_level else "E1"
if min_conf is None or conf_order.get(conf, 1) < conf_order.get(min_conf, 1):
min_conf = conf
nested_evidence.append(TraceabilityEvidence(
id=ev_id,
title=e.title,
evidence_type=e.evidence_type,
confidence_level=conf,
status=e.status.value if e.status else "valid",
assertions=nested_assertions,
))
if not has_assertions:
all_verified = False
if has_evidence:
covered_controls += 1
if has_evidence and has_assertions and all_verified:
fully_verified += 1
coverage = TraceabilityCoverage(
has_evidence=has_evidence,
has_assertions=has_assertions,
all_assertions_verified=all_verified,
min_confidence_level=min_conf,
)
result_controls.append(TraceabilityControl(
id=ctrl_id,
control_id=ctrl.control_id,
title=ctrl.title,
status=ctrl.status.value if ctrl.status else "planned",
domain=ctrl.domain.value if ctrl.domain else "unknown",
evidence=nested_evidence,
coverage=coverage,
))
summary = {
"total_controls": total_controls,
"covered_controls": covered_controls,
"fully_verified": fully_verified,
"uncovered_controls": total_controls - covered_controls,
}
return TraceabilityMatrixResponse(controls=result_controls, summary=summary)
# ============================================================================
# Reports
# ============================================================================

View File

@@ -26,17 +26,102 @@ from ..db import (
ControlRepository,
EvidenceRepository,
EvidenceStatusEnum,
EvidenceConfidenceEnum,
EvidenceTruthStatusEnum,
)
from ..db.models import EvidenceDB, ControlDB
from ..db.models import EvidenceDB, ControlDB, AuditTrailDB
from ..services.auto_risk_updater import AutoRiskUpdater
from .schemas import (
EvidenceCreate, EvidenceResponse, EvidenceListResponse,
EvidenceRejectRequest,
)
from .audit_trail_utils import log_audit_trail
logger = logging.getLogger(__name__)
router = APIRouter(tags=["compliance-evidence"])
# ============================================================================
# Anti-Fake-Evidence: Four-Eyes Domain Check
# ============================================================================
FOUR_EYES_DOMAINS = {"gov", "priv"}
def _requires_four_eyes(control_domain: str) -> bool:
"""Controls in governance/privacy domains require two independent reviewers."""
return control_domain in FOUR_EYES_DOMAINS
# ============================================================================
# Anti-Fake-Evidence: Auto-Classification Helpers
# ============================================================================
def _classify_confidence(source: Optional[str], evidence_type: Optional[str] = None, artifact_hash: Optional[str] = None) -> EvidenceConfidenceEnum:
"""Classify evidence confidence level based on source and metadata."""
if source == "ci_pipeline":
return EvidenceConfidenceEnum.E3
if source == "api" and artifact_hash:
return EvidenceConfidenceEnum.E3
if source == "api":
return EvidenceConfidenceEnum.E3
if source in ("manual", "upload"):
return EvidenceConfidenceEnum.E1
if source == "generated":
return EvidenceConfidenceEnum.E0
# Default for unknown sources
return EvidenceConfidenceEnum.E1
def _classify_truth_status(source: Optional[str]) -> EvidenceTruthStatusEnum:
"""Classify evidence truth status based on source."""
if source == "ci_pipeline":
return EvidenceTruthStatusEnum.OBSERVED
if source in ("manual", "upload"):
return EvidenceTruthStatusEnum.UPLOADED
if source == "generated":
return EvidenceTruthStatusEnum.GENERATED
if source == "api":
return EvidenceTruthStatusEnum.OBSERVED
return EvidenceTruthStatusEnum.UPLOADED
def _build_evidence_response(e: EvidenceDB) -> EvidenceResponse:
"""Build an EvidenceResponse from an EvidenceDB, including anti-fake fields."""
return EvidenceResponse(
id=e.id,
control_id=e.control_id,
evidence_type=e.evidence_type,
title=e.title,
description=e.description,
artifact_path=e.artifact_path,
artifact_url=e.artifact_url,
artifact_hash=e.artifact_hash,
file_size_bytes=e.file_size_bytes,
mime_type=e.mime_type,
valid_from=e.valid_from,
valid_until=e.valid_until,
status=e.status.value if e.status else None,
source=e.source,
ci_job_id=e.ci_job_id,
uploaded_by=e.uploaded_by,
collected_at=e.collected_at,
created_at=e.created_at,
confidence_level=e.confidence_level.value if e.confidence_level else None,
truth_status=e.truth_status.value if e.truth_status else None,
generation_mode=e.generation_mode,
may_be_used_as_evidence=e.may_be_used_as_evidence,
reviewed_by=e.reviewed_by,
reviewed_at=e.reviewed_at,
approval_status=e.approval_status,
first_reviewer=e.first_reviewer,
first_reviewed_at=e.first_reviewed_at,
second_reviewer=e.second_reviewer,
second_reviewed_at=e.second_reviewed_at,
requires_four_eyes=e.requires_four_eyes,
)
# ============================================================================
# Evidence
# ============================================================================
@@ -80,29 +165,7 @@ async def list_evidence(
offset = (page - 1) * limit
evidence = evidence[offset:offset + limit]
results = [
EvidenceResponse(
id=e.id,
control_id=e.control_id,
evidence_type=e.evidence_type,
title=e.title,
description=e.description,
artifact_path=e.artifact_path,
artifact_url=e.artifact_url,
artifact_hash=e.artifact_hash,
file_size_bytes=e.file_size_bytes,
mime_type=e.mime_type,
valid_from=e.valid_from,
valid_until=e.valid_until,
status=e.status.value if e.status else None,
source=e.source,
ci_job_id=e.ci_job_id,
uploaded_by=e.uploaded_by,
collected_at=e.collected_at,
created_at=e.created_at,
)
for e in evidence
]
results = [_build_evidence_response(e) for e in evidence]
return EvidenceListResponse(evidence=results, total=total)
@@ -121,6 +184,22 @@ async def create_evidence(
if not control:
raise HTTPException(status_code=404, detail=f"Control {evidence_data.control_id} not found")
source = evidence_data.source or "api"
confidence = _classify_confidence(source, evidence_data.evidence_type)
truth = _classify_truth_status(source)
# Allow explicit override from request
if evidence_data.confidence_level:
try:
confidence = EvidenceConfidenceEnum(evidence_data.confidence_level)
except ValueError:
pass
if evidence_data.truth_status:
try:
truth = EvidenceTruthStatusEnum(evidence_data.truth_status)
except ValueError:
pass
evidence = repo.create(
control_id=control.id,
evidence_type=evidence_data.evidence_type,
@@ -129,31 +208,34 @@ async def create_evidence(
artifact_url=evidence_data.artifact_url,
valid_from=evidence_data.valid_from,
valid_until=evidence_data.valid_until,
source=evidence_data.source or "api",
source=source,
ci_job_id=evidence_data.ci_job_id,
)
# Set anti-fake-evidence fields
evidence.confidence_level = confidence
evidence.truth_status = truth
# Generated evidence should not be used as evidence by default
if truth == EvidenceTruthStatusEnum.GENERATED:
evidence.may_be_used_as_evidence = False
# Four-Eyes: check if the linked control's domain requires it
control_domain = control.domain.value if control.domain else ""
if _requires_four_eyes(control_domain):
evidence.requires_four_eyes = True
evidence.approval_status = "pending_first"
db.commit()
# Audit trail
log_audit_trail(
db, "evidence", evidence.id, evidence.title, "create",
performed_by=evidence_data.source or "api",
change_summary=f"Evidence created with confidence={confidence.value}, truth={truth.value}",
)
db.commit()
return EvidenceResponse(
id=evidence.id,
control_id=evidence.control_id,
evidence_type=evidence.evidence_type,
title=evidence.title,
description=evidence.description,
artifact_path=evidence.artifact_path,
artifact_url=evidence.artifact_url,
artifact_hash=evidence.artifact_hash,
file_size_bytes=evidence.file_size_bytes,
mime_type=evidence.mime_type,
valid_from=evidence.valid_from,
valid_until=evidence.valid_until,
status=evidence.status.value if evidence.status else None,
source=evidence.source,
ci_job_id=evidence.ci_job_id,
uploaded_by=evidence.uploaded_by,
collected_at=evidence.collected_at,
created_at=evidence.created_at,
)
return _build_evidence_response(evidence)
@router.delete("/evidence/{evidence_id}")
@@ -223,28 +305,20 @@ async def upload_evidence(
mime_type=file.content_type,
source="upload",
)
# Upload evidence → E1 + uploaded
evidence.confidence_level = EvidenceConfidenceEnum.E1
evidence.truth_status = EvidenceTruthStatusEnum.UPLOADED
# Four-Eyes: check if the linked control's domain requires it
control_domain = control.domain.value if control.domain else ""
if _requires_four_eyes(control_domain):
evidence.requires_four_eyes = True
evidence.approval_status = "pending_first"
db.commit()
return EvidenceResponse(
id=evidence.id,
control_id=evidence.control_id,
evidence_type=evidence.evidence_type,
title=evidence.title,
description=evidence.description,
artifact_path=evidence.artifact_path,
artifact_url=evidence.artifact_url,
artifact_hash=evidence.artifact_hash,
file_size_bytes=evidence.file_size_bytes,
mime_type=evidence.mime_type,
valid_from=evidence.valid_from,
valid_until=evidence.valid_until,
status=evidence.status.value if evidence.status else None,
source=evidence.source,
ci_job_id=evidence.ci_job_id,
uploaded_by=evidence.uploaded_by,
collected_at=evidence.collected_at,
created_at=evidence.created_at,
)
return _build_evidence_response(evidence)
# ============================================================================
@@ -357,7 +431,7 @@ def _store_evidence(
with open(file_path, "w") as f:
json.dump(report_data or {}, f, indent=2)
# Create evidence record
# Create evidence record with anti-fake-evidence classification
evidence = EvidenceDB(
id=str(uuid_module.uuid4()),
control_id=control_db_id,
@@ -373,6 +447,10 @@ def _store_evidence(
valid_from=datetime.utcnow(),
valid_until=datetime.utcnow() + timedelta(days=90),
status=EvidenceStatusEnum(parsed["evidence_status"]),
# CI pipeline evidence → E3 observed (system-observed, hash-verified)
confidence_level=EvidenceConfidenceEnum.E3,
truth_status=EvidenceTruthStatusEnum.OBSERVED,
may_be_used_as_evidence=True,
)
db.add(evidence)
db.commit()
@@ -639,3 +717,169 @@ async def get_ci_evidence_status(
"total_evidence": len(evidence_list),
"controls": result,
}
# ============================================================================
# Evidence Review (Anti-Fake-Evidence)
# ============================================================================
from pydantic import BaseModel as _BaseModel
class _EvidenceReviewRequest(_BaseModel):
confidence_level: Optional[str] = None
truth_status: Optional[str] = None
reviewed_by: str
@router.patch("/evidence/{evidence_id}/review", response_model=EvidenceResponse)
async def review_evidence(
evidence_id: str,
review: _EvidenceReviewRequest,
db: Session = Depends(get_db),
):
"""
Review evidence: upgrade confidence level and/or change truth status.
For Four-Eyes evidence, the first reviewer sets first_reviewer and
approval_status='first_approved'. A second (different) reviewer then
sets second_reviewer and approval_status='approved'.
"""
evidence = db.query(EvidenceDB).filter(EvidenceDB.id == evidence_id).first()
if not evidence:
raise HTTPException(status_code=404, detail=f"Evidence {evidence_id} not found")
old_confidence = evidence.confidence_level.value if evidence.confidence_level else None
old_truth = evidence.truth_status.value if evidence.truth_status else None
if review.confidence_level:
try:
evidence.confidence_level = EvidenceConfidenceEnum(review.confidence_level)
except ValueError:
raise HTTPException(status_code=400, detail=f"Invalid confidence_level: {review.confidence_level}")
if review.truth_status:
try:
evidence.truth_status = EvidenceTruthStatusEnum(review.truth_status)
except ValueError:
raise HTTPException(status_code=400, detail=f"Invalid truth_status: {review.truth_status}")
# Four-Eyes branching
if evidence.requires_four_eyes:
status = evidence.approval_status or "none"
if status in ("none", "pending_first"):
evidence.first_reviewer = review.reviewed_by
evidence.first_reviewed_at = datetime.utcnow()
evidence.approval_status = "first_approved"
elif status == "first_approved":
if review.reviewed_by == evidence.first_reviewer:
raise HTTPException(
status_code=400,
detail="Four-Eyes: second reviewer must be different from first reviewer",
)
evidence.second_reviewer = review.reviewed_by
evidence.second_reviewed_at = datetime.utcnow()
evidence.approval_status = "approved"
elif status == "approved":
raise HTTPException(status_code=400, detail="Evidence already approved")
elif status == "rejected":
raise HTTPException(status_code=400, detail="Evidence was rejected — create new evidence instead")
evidence.reviewed_by = review.reviewed_by
evidence.reviewed_at = datetime.utcnow()
db.commit()
# Audit trail
new_confidence = evidence.confidence_level.value if evidence.confidence_level else None
if old_confidence != new_confidence:
log_audit_trail(
db, "evidence", evidence_id, evidence.title, "review",
performed_by=review.reviewed_by,
field_changed="confidence_level",
old_value=old_confidence,
new_value=new_confidence,
)
new_truth = evidence.truth_status.value if evidence.truth_status else None
if old_truth != new_truth:
log_audit_trail(
db, "evidence", evidence_id, evidence.title, "review",
performed_by=review.reviewed_by,
field_changed="truth_status",
old_value=old_truth,
new_value=new_truth,
)
db.commit()
db.refresh(evidence)
return _build_evidence_response(evidence)
@router.patch("/evidence/{evidence_id}/reject", response_model=EvidenceResponse)
async def reject_evidence(
evidence_id: str,
body: EvidenceRejectRequest,
db: Session = Depends(get_db),
):
"""Reject evidence (sets approval_status='rejected')."""
evidence = db.query(EvidenceDB).filter(EvidenceDB.id == evidence_id).first()
if not evidence:
raise HTTPException(status_code=404, detail=f"Evidence {evidence_id} not found")
evidence.approval_status = "rejected"
evidence.reviewed_by = body.reviewed_by
evidence.reviewed_at = datetime.utcnow()
db.commit()
log_audit_trail(
db, "evidence", evidence_id, evidence.title, "reject",
performed_by=body.reviewed_by,
change_summary=body.rejection_reason or "Evidence rejected",
)
db.commit()
db.refresh(evidence)
return _build_evidence_response(evidence)
# ============================================================================
# Audit Trail Query
# ============================================================================
@router.get("/audit-trail")
async def get_audit_trail(
entity_type: Optional[str] = Query(None),
entity_id: Optional[str] = Query(None),
action: Optional[str] = Query(None),
limit: int = Query(50, ge=1, le=200),
db: Session = Depends(get_db),
):
"""Query audit trail entries for an entity."""
query = db.query(AuditTrailDB)
if entity_type:
query = query.filter(AuditTrailDB.entity_type == entity_type)
if entity_id:
query = query.filter(AuditTrailDB.entity_id == entity_id)
if action:
query = query.filter(AuditTrailDB.action == action)
records = query.order_by(AuditTrailDB.performed_at.desc()).limit(limit).all()
return {
"entries": [
{
"id": r.id,
"entity_type": r.entity_type,
"entity_id": r.entity_id,
"entity_name": r.entity_name,
"action": r.action,
"field_changed": r.field_changed,
"old_value": r.old_value,
"new_value": r.new_value,
"change_summary": r.change_summary,
"performed_by": r.performed_by,
"performed_at": r.performed_at.isoformat() if r.performed_at else None,
"checksum": r.checksum,
}
for r in records
],
"total": len(records),
}

View File

@@ -73,39 +73,8 @@ def generate_id() -> str:
return str(uuid.uuid4())
def create_signature(data: str) -> str:
"""Create SHA-256 signature."""
return hashlib.sha256(data.encode()).hexdigest()
def log_audit_trail(
db: Session,
entity_type: str,
entity_id: str,
entity_name: str,
action: str,
performed_by: str,
field_changed: str = None,
old_value: str = None,
new_value: str = None,
change_summary: str = None
):
"""Log an entry to the audit trail."""
trail = AuditTrailDB(
id=generate_id(),
entity_type=entity_type,
entity_id=entity_id,
entity_name=entity_name,
action=action,
field_changed=field_changed,
old_value=old_value,
new_value=new_value,
change_summary=change_summary,
performed_by=performed_by,
performed_at=datetime.utcnow(),
checksum=create_signature(f"{entity_type}|{entity_id}|{action}|{performed_by}")
)
db.add(trail)
# Shared audit trail utilities — canonical implementation in audit_trail_utils.py
from .audit_trail_utils import log_audit_trail, create_signature # noqa: E402
# =============================================================================

View File

@@ -0,0 +1,162 @@
"""
FastAPI routes for LLM Generation Audit Trail.
Endpoints:
- POST /llm-audit: Record an LLM generation event
- GET /llm-audit: List audit records with filters
"""
import logging
import uuid as uuid_module
from datetime import datetime
from typing import Optional
from fastapi import APIRouter, Depends, Query
from pydantic import BaseModel
from sqlalchemy.orm import Session
from classroom_engine.database import get_db
from ..db.models import LLMGenerationAuditDB
logger = logging.getLogger(__name__)
router = APIRouter(tags=["compliance-llm-audit"])
# ============================================================================
# Schemas
# ============================================================================
class LLMAuditCreate(BaseModel):
entity_type: str
entity_id: Optional[str] = None
generation_mode: str
truth_status: str = "generated"
may_be_used_as_evidence: bool = False
llm_model: Optional[str] = None
llm_provider: Optional[str] = None
prompt_hash: Optional[str] = None
input_summary: Optional[str] = None
output_summary: Optional[str] = None
metadata: Optional[dict] = None
tenant_id: Optional[str] = None
class LLMAuditResponse(BaseModel):
id: str
tenant_id: Optional[str] = None
entity_type: str
entity_id: Optional[str] = None
generation_mode: str
truth_status: str
may_be_used_as_evidence: bool
llm_model: Optional[str] = None
llm_provider: Optional[str] = None
prompt_hash: Optional[str] = None
input_summary: Optional[str] = None
output_summary: Optional[str] = None
metadata: Optional[dict] = None
created_at: datetime
class Config:
from_attributes = True
# ============================================================================
# Routes
# ============================================================================
@router.post("/llm-audit", response_model=LLMAuditResponse)
async def create_llm_audit(
data: LLMAuditCreate,
db: Session = Depends(get_db),
):
"""Record an LLM generation event for audit trail."""
from ..db.models import EvidenceTruthStatusEnum
# Validate truth_status
try:
truth_enum = EvidenceTruthStatusEnum(data.truth_status)
except ValueError:
truth_enum = EvidenceTruthStatusEnum.GENERATED
record = LLMGenerationAuditDB(
id=str(uuid_module.uuid4()),
tenant_id=data.tenant_id,
entity_type=data.entity_type,
entity_id=data.entity_id,
generation_mode=data.generation_mode,
truth_status=truth_enum,
may_be_used_as_evidence=data.may_be_used_as_evidence,
llm_model=data.llm_model,
llm_provider=data.llm_provider,
prompt_hash=data.prompt_hash,
input_summary=data.input_summary[:500] if data.input_summary else None,
output_summary=data.output_summary[:500] if data.output_summary else None,
extra_metadata=data.metadata or {},
)
db.add(record)
db.commit()
db.refresh(record)
return LLMAuditResponse(
id=record.id,
tenant_id=record.tenant_id,
entity_type=record.entity_type,
entity_id=record.entity_id,
generation_mode=record.generation_mode,
truth_status=record.truth_status.value if record.truth_status else "generated",
may_be_used_as_evidence=record.may_be_used_as_evidence,
llm_model=record.llm_model,
llm_provider=record.llm_provider,
prompt_hash=record.prompt_hash,
input_summary=record.input_summary,
output_summary=record.output_summary,
metadata=record.extra_metadata,
created_at=record.created_at,
)
@router.get("/llm-audit")
async def list_llm_audit(
entity_type: Optional[str] = Query(None),
entity_id: Optional[str] = Query(None),
page: int = Query(1, ge=1),
limit: int = Query(50, ge=1, le=200),
db: Session = Depends(get_db),
):
"""List LLM generation audit records with optional filters."""
query = db.query(LLMGenerationAuditDB)
if entity_type:
query = query.filter(LLMGenerationAuditDB.entity_type == entity_type)
if entity_id:
query = query.filter(LLMGenerationAuditDB.entity_id == entity_id)
total = query.count()
offset = (page - 1) * limit
records = query.order_by(LLMGenerationAuditDB.created_at.desc()).offset(offset).limit(limit).all()
return {
"records": [
LLMAuditResponse(
id=r.id,
tenant_id=r.tenant_id,
entity_type=r.entity_type,
entity_id=r.entity_id,
generation_mode=r.generation_mode,
truth_status=r.truth_status.value if r.truth_status else "generated",
may_be_used_as_evidence=r.may_be_used_as_evidence,
llm_model=r.llm_model,
llm_provider=r.llm_provider,
prompt_hash=r.prompt_hash,
input_summary=r.input_summary,
output_summary=r.output_summary,
metadata=r.extra_metadata,
created_at=r.created_at,
)
for r in records
],
"total": total,
"page": page,
"limit": limit,
}

View File

@@ -25,6 +25,7 @@ from sqlalchemy.orm import Session
from classroom_engine.database import get_db
from .audit_trail_utils import log_audit_trail
from ..db import (
RegulationRepository,
RequirementRepository,
@@ -595,6 +596,7 @@ async def get_control(control_id: str, db: Session = Depends(get_db)):
review_frequency_days=control.review_frequency_days,
status=control.status.value if control.status else None,
status_notes=control.status_notes,
status_justification=control.status_justification,
last_reviewed_at=control.last_reviewed_at,
next_review_at=control.next_review_at,
created_at=control.created_at,
@@ -617,16 +619,52 @@ async def update_control(
update_data = update.model_dump(exclude_unset=True)
# Convert status string to enum
# Convert status string to enum and validate transition
if "status" in update_data:
try:
update_data["status"] = ControlStatusEnum(update_data["status"])
new_status_enum = ControlStatusEnum(update_data["status"])
except ValueError:
raise HTTPException(status_code=400, detail=f"Invalid status: {update_data['status']}")
# Validate status transition (Anti-Fake-Evidence)
from ..services.control_status_machine import validate_transition
current_status = control.status.value if control.status else "planned"
evidence_list = db.query(EvidenceDB).filter(EvidenceDB.control_id == control.id).all()
allowed, violations = validate_transition(
current_status=current_status,
new_status=update_data["status"],
evidence_list=evidence_list,
status_justification=update_data.get("status_justification") or update_data.get("status_notes"),
)
if not allowed:
raise HTTPException(
status_code=409,
detail={
"error": "Status transition not allowed",
"current_status": current_status,
"requested_status": update_data["status"],
"violations": violations,
}
)
update_data["status"] = new_status_enum
updated = repo.update(control.id, **update_data)
db.commit()
# Audit trail for status changes
new_status = updated.status.value if updated.status else None
if "status" in update.model_dump(exclude_unset=True) and current_status != new_status:
log_audit_trail(
db, "control", control.id, updated.control_id or updated.title,
"status_change",
performed_by=update.owner or "system",
field_changed="status",
old_value=current_status,
new_value=new_status,
)
db.commit()
return ControlResponse(
id=updated.id,
control_id=updated.control_id,
@@ -645,6 +683,7 @@ async def update_control(
review_frequency_days=updated.review_frequency_days,
status=updated.status.value if updated.status else None,
status_notes=updated.status_notes,
status_justification=updated.status_justification,
last_reviewed_at=updated.last_reviewed_at,
next_review_at=updated.next_review_at,
created_at=updated.created_at,
@@ -690,6 +729,7 @@ async def review_control(
review_frequency_days=updated.review_frequency_days,
status=updated.status.value if updated.status else None,
status_notes=updated.status_notes,
status_justification=updated.status_justification,
last_reviewed_at=updated.last_reviewed_at,
next_review_at=updated.next_review_at,
created_at=updated.created_at,

View File

@@ -43,6 +43,7 @@ class ControlStatus(str):
FAIL = "fail"
NOT_APPLICABLE = "n/a"
PLANNED = "planned"
IN_PROGRESS = "in_progress"
class RiskLevel(str):
@@ -209,12 +210,14 @@ class ControlUpdate(BaseModel):
owner: Optional[str] = None
status: Optional[str] = None
status_notes: Optional[str] = None
status_justification: Optional[str] = None
class ControlResponse(ControlBase):
id: str
status: str
status_notes: Optional[str] = None
status_justification: Optional[str] = None
last_reviewed_at: Optional[datetime] = None
next_review_at: Optional[datetime] = None
created_at: datetime
@@ -291,7 +294,8 @@ class EvidenceBase(BaseModel):
class EvidenceCreate(EvidenceBase):
pass
confidence_level: Optional[str] = None
truth_status: Optional[str] = None
class EvidenceResponse(EvidenceBase):
@@ -304,6 +308,20 @@ class EvidenceResponse(EvidenceBase):
uploaded_by: Optional[str] = None
collected_at: datetime
created_at: datetime
# Anti-Fake-Evidence fields
confidence_level: Optional[str] = None
truth_status: Optional[str] = None
generation_mode: Optional[str] = None
may_be_used_as_evidence: Optional[bool] = None
reviewed_by: Optional[str] = None
reviewed_at: Optional[datetime] = None
# Anti-Fake-Evidence Phase 2: Four-Eyes
approval_status: Optional[str] = None
first_reviewer: Optional[str] = None
first_reviewed_at: Optional[datetime] = None
second_reviewer: Optional[str] = None
second_reviewed_at: Optional[datetime] = None
requires_four_eyes: Optional[bool] = None
class Config:
from_attributes = True
@@ -435,6 +453,25 @@ class AISystemListResponse(BaseModel):
# Dashboard & Export Schemas
# ============================================================================
class MultiDimensionalScore(BaseModel):
"""Multi-dimensional compliance score (Anti-Fake-Evidence)."""
requirement_coverage: float = 0.0 # % requirements with linked control
evidence_strength: float = 0.0 # Weighted avg of evidence confidence (E0=0..E4=1)
validation_quality: float = 0.0 # % evidence with truth_status >= validated_internal
evidence_freshness: float = 0.0 # % evidence not expired + reviewed < 90 days
control_effectiveness: float = 0.0 # Existing formula (pass + partial*0.5)
overall_readiness: float = 0.0 # Weighted composite
hard_blocks: List[str] = [] # Blocking issues preventing audit-readiness
class StatusTransitionError(BaseModel):
"""Error detail for forbidden control status transitions."""
allowed: bool = False
current_status: str
requested_status: str
violations: List[str] = []
class DashboardResponse(BaseModel):
compliance_score: float
total_regulations: int
@@ -447,6 +484,7 @@ class DashboardResponse(BaseModel):
total_risks: int
risks_by_level: Dict[str, int]
recent_activity: List[Dict[str, Any]]
multi_score: Optional[MultiDimensionalScore] = None
class ExportRequest(BaseModel):
@@ -1939,3 +1977,111 @@ class TOMStatsResponse(BaseModel):
implemented: int = 0
partial: int = 0
not_implemented: int = 0
# ============================================================================
# Assertion Schemas (Anti-Fake-Evidence Phase 2)
# ============================================================================
class AssertionCreate(BaseModel):
entity_type: str
entity_id: str
sentence_text: str
assertion_type: Optional[str] = "assertion"
evidence_ids: Optional[List[str]] = []
normative_tier: Optional[str] = None
class AssertionUpdate(BaseModel):
sentence_text: Optional[str] = None
assertion_type: Optional[str] = None
evidence_ids: Optional[List[str]] = None
normative_tier: Optional[str] = None
confidence: Optional[float] = None
class AssertionResponse(BaseModel):
id: str
tenant_id: Optional[str] = None
entity_type: str
entity_id: str
sentence_text: str
sentence_index: int = 0
assertion_type: str = "assertion"
evidence_ids: Optional[List[str]] = []
confidence: float = 0.0
normative_tier: Optional[str] = None
verified_by: Optional[str] = None
verified_at: Optional[datetime] = None
created_at: Optional[datetime] = None
updated_at: Optional[datetime] = None
class Config:
from_attributes = True
class AssertionListResponse(BaseModel):
assertions: List[AssertionResponse]
total: int
class AssertionSummaryResponse(BaseModel):
total_assertions: int = 0
total_facts: int = 0
total_rationale: int = 0
unverified_count: int = 0
class AssertionExtractRequest(BaseModel):
entity_type: str
entity_id: str
text: str
class EvidenceRejectRequest(BaseModel):
reviewed_by: str
rejection_reason: Optional[str] = None
# ============================================================================
# Traceability Matrix (Anti-Fake-Evidence Phase 4a)
# ============================================================================
class TraceabilityAssertion(BaseModel):
"""Single assertion linked to an evidence item."""
id: str
sentence_text: str
assertion_type: str = "assertion"
confidence: float = 0.0
verified: bool = False
class TraceabilityEvidence(BaseModel):
"""Evidence item with nested assertions."""
id: str
title: str
evidence_type: str
confidence_level: str = "E1"
status: str = "valid"
assertions: List[TraceabilityAssertion] = []
class TraceabilityCoverage(BaseModel):
"""Coverage flags for a single control."""
has_evidence: bool = False
has_assertions: bool = False
all_assertions_verified: bool = False
min_confidence_level: Optional[str] = None
class TraceabilityControl(BaseModel):
"""Control with nested evidence and coverage info."""
id: str
control_id: str
title: str
status: str = "planned"
domain: str = "unknown"
evidence: List[TraceabilityEvidence] = []
coverage: TraceabilityCoverage = TraceabilityCoverage()
class TraceabilityMatrixResponse(BaseModel):
"""Full traceability matrix: Controls → Evidence → Assertions."""
controls: List[TraceabilityControl]
summary: Dict[str, int]

View File

@@ -8,12 +8,16 @@ from .models import (
EvidenceDB,
RiskDB,
AuditExportDB,
LLMGenerationAuditDB,
AssertionDB,
RegulationTypeEnum,
ControlTypeEnum,
ControlDomainEnum,
RiskLevelEnum,
EvidenceStatusEnum,
ControlStatusEnum,
EvidenceConfidenceEnum,
EvidenceTruthStatusEnum,
)
from .repository import (
RegulationRepository,
@@ -33,6 +37,8 @@ __all__ = [
"EvidenceDB",
"RiskDB",
"AuditExportDB",
"LLMGenerationAuditDB",
"AssertionDB",
# Enums
"RegulationTypeEnum",
"ControlTypeEnum",
@@ -40,6 +46,8 @@ __all__ = [
"RiskLevelEnum",
"EvidenceStatusEnum",
"ControlStatusEnum",
"EvidenceConfidenceEnum",
"EvidenceTruthStatusEnum",
# Repositories
"RegulationRepository",
"RequirementRepository",

View File

@@ -65,6 +65,7 @@ class ControlStatusEnum(str, enum.Enum):
FAIL = "fail" # Not passing
NOT_APPLICABLE = "n/a" # Not applicable
PLANNED = "planned" # Planned for implementation
IN_PROGRESS = "in_progress" # Implementation in progress
class RiskLevelEnum(str, enum.Enum):
@@ -83,6 +84,26 @@ class EvidenceStatusEnum(str, enum.Enum):
FAILED = "failed" # Failed validation
class EvidenceConfidenceEnum(str, enum.Enum):
"""Confidence level of evidence (Anti-Fake-Evidence)."""
E0 = "E0" # Generated / no real evidence (LLM output, placeholder)
E1 = "E1" # Uploaded but unreviewed (manual upload, no hash, no reviewer)
E2 = "E2" # Reviewed internally (human reviewed, hash verified)
E3 = "E3" # Observed by system (CI/CD pipeline, API with hash)
E4 = "E4" # Validated by external auditor
class EvidenceTruthStatusEnum(str, enum.Enum):
"""Truth status lifecycle for evidence (Anti-Fake-Evidence)."""
GENERATED = "generated"
UPLOADED = "uploaded"
OBSERVED = "observed"
VALIDATED_INTERNAL = "validated_internal"
REJECTED = "rejected"
PROVIDED_TO_AUDITOR = "provided_to_auditor"
ACCEPTED_BY_AUDITOR = "accepted_by_auditor"
class ExportStatusEnum(str, enum.Enum):
"""Status of audit export."""
PENDING = "pending"
@@ -239,6 +260,7 @@ class ControlDB(Base):
# Status
status = Column(Enum(ControlStatusEnum), default=ControlStatusEnum.PLANNED)
status_notes = Column(Text)
status_justification = Column(Text) # Required for n/a transitions
# Ownership & Review
owner = Column(String(100)) # Responsible person/team
@@ -321,6 +343,22 @@ class EvidenceDB(Base):
ci_job_id = Column(String(100)) # CI/CD job reference
uploaded_by = Column(String(100)) # User who uploaded
# Anti-Fake-Evidence: Confidence & Truth tracking
confidence_level = Column(Enum(EvidenceConfidenceEnum), default=EvidenceConfidenceEnum.E1)
truth_status = Column(Enum(EvidenceTruthStatusEnum), default=EvidenceTruthStatusEnum.UPLOADED)
generation_mode = Column(String(100)) # e.g. "draft_assistance", "auto_generation"
may_be_used_as_evidence = Column(Boolean, default=True)
reviewed_by = Column(String(200))
reviewed_at = Column(DateTime)
# Anti-Fake-Evidence Phase 2: Four-Eyes review
approval_status = Column(String(30), default="none")
first_reviewer = Column(String(200))
first_reviewed_at = Column(DateTime)
second_reviewer = Column(String(200))
second_reviewed_at = Column(DateTime)
requires_four_eyes = Column(Boolean, default=False)
# Timestamps
collected_at = Column(DateTime, default=datetime.utcnow)
created_at = Column(DateTime, default=datetime.utcnow)
@@ -332,6 +370,7 @@ class EvidenceDB(Base):
__table_args__ = (
Index('ix_evidence_control_type', 'control_id', 'evidence_type'),
Index('ix_evidence_status', 'status'),
Index('ix_evidence_approval_status', 'approval_status'),
)
def __repr__(self):
@@ -1464,3 +1503,77 @@ class ISMSReadinessCheckDB(Base):
def __repr__(self):
return f"<ISMSReadiness {self.check_date}: {self.overall_status}>"
class LLMGenerationAuditDB(Base):
"""
Audit trail for LLM-generated content.
Every piece of content generated by an LLM is recorded here with its
truth_status and may_be_used_as_evidence flag, ensuring transparency
about what is real evidence vs. generated assistance.
"""
__tablename__ = 'compliance_llm_generation_audit'
id = Column(String(36), primary_key=True, default=lambda: str(uuid.uuid4()))
tenant_id = Column(String(36), index=True)
entity_type = Column(String(50), nullable=False) # 'evidence', 'control', 'document'
entity_id = Column(String(36)) # FK to generated entity
generation_mode = Column(String(100), nullable=False) # 'draft_assistance', 'auto_generation'
truth_status = Column(Enum(EvidenceTruthStatusEnum), nullable=False, default=EvidenceTruthStatusEnum.GENERATED)
may_be_used_as_evidence = Column(Boolean, nullable=False, default=False)
llm_model = Column(String(100))
llm_provider = Column(String(50)) # 'ollama', 'anthropic'
prompt_hash = Column(String(64)) # SHA-256 of prompt
input_summary = Column(Text)
output_summary = Column(Text)
extra_metadata = Column("metadata", JSON, default=dict)
created_at = Column(DateTime, default=datetime.utcnow)
updated_at = Column(DateTime, default=datetime.utcnow, onupdate=datetime.utcnow)
__table_args__ = (
Index('ix_llm_audit_entity', 'entity_type', 'entity_id'),
)
def __repr__(self):
return f"<LLMGenerationAudit {self.entity_type}:{self.entity_id} mode={self.generation_mode}>"
class AssertionDB(Base):
"""
Assertion tracking — separates claims from verified facts.
Each sentence from a control/evidence/document is stored here with its
classification (assertion vs. fact vs. rationale) and optional evidence linkage.
"""
__tablename__ = 'compliance_assertions'
id = Column(String(36), primary_key=True, default=lambda: str(uuid.uuid4()))
tenant_id = Column(String(36), index=True)
entity_type = Column(String(50), nullable=False) # 'control', 'evidence', 'document', 'obligation'
entity_id = Column(String(36), nullable=False)
sentence_text = Column(Text, nullable=False)
sentence_index = Column(Integer, nullable=False, default=0)
assertion_type = Column(String(20), nullable=False, default='assertion') # 'assertion' | 'fact' | 'rationale'
evidence_ids = Column(JSON, default=list)
confidence = Column(Float, default=0.0)
normative_tier = Column(String(20)) # 'pflicht' | 'empfehlung' | 'kann'
verified_by = Column(String(200))
verified_at = Column(DateTime)
created_at = Column(DateTime, default=datetime.utcnow)
updated_at = Column(DateTime, default=datetime.utcnow, onupdate=datetime.utcnow)
__table_args__ = (
Index('ix_assertion_entity', 'entity_type', 'entity_id'),
Index('ix_assertion_type', 'assertion_type'),
)
def __repr__(self):
return f"<Assertion {self.assertion_type}: {self.sentence_text[:50]}>"

View File

@@ -487,6 +487,137 @@ class ControlRepository:
"compliance_score": round(score, 1),
}
def get_multi_dimensional_score(self) -> Dict[str, Any]:
"""
Calculate multi-dimensional compliance score (Anti-Fake-Evidence).
Returns 6 dimensions + hard_blocks + overall_readiness.
"""
from .models import (
EvidenceDB, RequirementDB, ControlMappingDB,
EvidenceConfidenceEnum, EvidenceTruthStatusEnum,
)
# Weight map for confidence levels
conf_weights = {"E0": 0.0, "E1": 0.25, "E2": 0.5, "E3": 0.75, "E4": 1.0}
validated_statuses = {"validated_internal", "accepted_by_auditor", "provided_to_auditor"}
controls = self.get_all()
total_controls = len(controls)
if total_controls == 0:
return {
"requirement_coverage": 0.0,
"evidence_strength": 0.0,
"validation_quality": 0.0,
"evidence_freshness": 0.0,
"control_effectiveness": 0.0,
"overall_readiness": 0.0,
"hard_blocks": ["Keine Controls vorhanden"],
}
# 1. requirement_coverage: % requirements linked to at least one control
total_reqs = self.db.query(func.count(RequirementDB.id)).scalar() or 0
linked_reqs = (
self.db.query(func.count(func.distinct(ControlMappingDB.requirement_id)))
.scalar() or 0
)
requirement_coverage = (linked_reqs / total_reqs * 100) if total_reqs > 0 else 0.0
# 2. evidence_strength: weighted average of evidence confidence
all_evidence = self.db.query(EvidenceDB).all()
if all_evidence:
total_weight = 0.0
for e in all_evidence:
conf_val = e.confidence_level.value if e.confidence_level else "E1"
total_weight += conf_weights.get(conf_val, 0.25)
evidence_strength = (total_weight / len(all_evidence)) * 100
else:
evidence_strength = 0.0
# 3. validation_quality: % evidence with truth_status >= validated_internal
if all_evidence:
validated_count = sum(
1 for e in all_evidence
if (e.truth_status.value if e.truth_status else "uploaded") in validated_statuses
)
validation_quality = (validated_count / len(all_evidence)) * 100
else:
validation_quality = 0.0
# 4. evidence_freshness: % evidence not expired and reviewed < 90 days
now = datetime.now()
if all_evidence:
fresh_count = 0
for e in all_evidence:
is_expired = e.valid_until and e.valid_until < now
is_stale = e.reviewed_at and (now - e.reviewed_at).days > 90 if hasattr(e, 'reviewed_at') and e.reviewed_at else False
if not is_expired and not is_stale:
fresh_count += 1
evidence_freshness = (fresh_count / len(all_evidence)) * 100
else:
evidence_freshness = 0.0
# 5. control_effectiveness: existing formula
passed = sum(1 for c in controls if c.status == ControlStatusEnum.PASS)
partial = sum(1 for c in controls if c.status == ControlStatusEnum.PARTIAL)
control_effectiveness = ((passed + partial * 0.5) / total_controls) * 100
# 6. overall_readiness: weighted composite
overall_readiness = (
0.20 * requirement_coverage +
0.25 * evidence_strength +
0.20 * validation_quality +
0.10 * evidence_freshness +
0.25 * control_effectiveness
)
# Hard blocks
hard_blocks = []
# Critical controls without any evidence
critical_no_evidence = []
for c in controls:
if c.status in (ControlStatusEnum.PASS, ControlStatusEnum.PARTIAL):
evidence_for_ctrl = [e for e in all_evidence if e.control_id == c.id]
if not evidence_for_ctrl:
critical_no_evidence.append(c.control_id)
if critical_no_evidence:
hard_blocks.append(
f"{len(critical_no_evidence)} Controls mit Status pass/partial haben keine Evidence: "
f"{', '.join(critical_no_evidence[:5])}"
)
# Controls with only E0/E1 evidence claiming pass
weak_evidence_pass = []
for c in controls:
if c.status == ControlStatusEnum.PASS:
evidence_for_ctrl = [e for e in all_evidence if e.control_id == c.id]
if evidence_for_ctrl:
max_conf = max(
conf_weights.get(
e.confidence_level.value if e.confidence_level else "E1", 0.25
)
for e in evidence_for_ctrl
)
if max_conf < 0.5: # Only E0 or E1
weak_evidence_pass.append(c.control_id)
if weak_evidence_pass:
hard_blocks.append(
f"{len(weak_evidence_pass)} Controls auf 'pass' haben nur E0/E1-Evidence: "
f"{', '.join(weak_evidence_pass[:5])}"
)
return {
"requirement_coverage": round(requirement_coverage, 1),
"evidence_strength": round(evidence_strength, 1),
"validation_quality": round(validation_quality, 1),
"evidence_freshness": round(evidence_freshness, 1),
"control_effectiveness": round(control_effectiveness, 1),
"overall_readiness": round(overall_readiness, 1),
"hard_blocks": hard_blocks,
}
class ControlMappingRepository:
"""Repository for requirement-control mappings."""

View File

@@ -0,0 +1,80 @@
"""Assertion Engine — splits text into sentences and classifies each.
Each sentence is tagged as:
- assertion: normative statement (pflicht / empfehlung / kann)
- fact: references concrete evidence artifacts
- rationale: explains why something is required
"""
import re
from typing import Optional
from .normative_patterns import (
PFLICHT_RE, EMPFEHLUNG_RE, KANN_RE, RATIONALE_RE, EVIDENCE_RE,
)
# Sentence splitter: period/excl/question followed by space+uppercase, or newlines
_SENTENCE_SPLIT = re.compile(r'(?<=[.!?])\s+(?=[A-ZÄÖÜ])|(?:\n\s*\n)')
def extract_assertions(
text: str,
entity_type: str,
entity_id: str,
tenant_id: Optional[str] = None,
) -> list[dict]:
"""Split *text* into sentences and classify each one.
Returns a list of dicts ready for AssertionDB creation.
"""
if not text or not text.strip():
return []
sentences = _SENTENCE_SPLIT.split(text.strip())
results: list[dict] = []
for idx, raw in enumerate(sentences):
sentence = raw.strip()
if not sentence or len(sentence) < 5:
continue
assertion_type, normative_tier = _classify_sentence(sentence)
results.append({
"tenant_id": tenant_id,
"entity_type": entity_type,
"entity_id": entity_id,
"sentence_text": sentence,
"sentence_index": idx,
"assertion_type": assertion_type,
"normative_tier": normative_tier,
"evidence_ids": [],
"confidence": 0.0,
})
return results
def _classify_sentence(sentence: str) -> tuple[str, Optional[str]]:
"""Return (assertion_type, normative_tier) for a single sentence."""
# 1. Check for evidence/fact keywords first
if EVIDENCE_RE.search(sentence):
return ("fact", None)
# 2. Check for rationale
normative_count = len(PFLICHT_RE.findall(sentence)) + len(EMPFEHLUNG_RE.findall(sentence)) + len(KANN_RE.findall(sentence))
rationale_count = len(RATIONALE_RE.findall(sentence))
if rationale_count > 0 and rationale_count >= normative_count:
return ("rationale", None)
# 3. Normative classification
if PFLICHT_RE.search(sentence):
return ("assertion", "pflicht")
if EMPFEHLUNG_RE.search(sentence):
return ("assertion", "empfehlung")
if KANN_RE.search(sentence):
return ("assertion", "kann")
# 4. Default: unclassified assertion
return ("assertion", None)

View File

@@ -493,6 +493,9 @@ class GeneratedControl:
applicable_industries: Optional[list] = None # e.g. ["all"] or ["Telekommunikation", "Energie"]
applicable_company_size: Optional[list] = None # e.g. ["all"] or ["medium", "large", "enterprise"]
scope_conditions: Optional[dict] = None # e.g. {"requires_any": ["uses_ai"], "description": "..."}
# Anti-Fake-Evidence: truth tracking for generated controls
truth_status: str = "generated"
may_be_used_as_evidence: bool = False
@dataclass
@@ -781,10 +784,23 @@ REFORM_SYSTEM_PROMPT = """Du bist ein Security-Compliance-Experte. Deine Aufgabe
Security Controls zu formulieren. Du formulierst IMMER in eigenen Worten.
KOPIERE KEINE Sätze aus dem Quelltext. Verwende eigene Begriffe und Struktur.
NENNE NICHT die Quelle. Keine proprietären Bezeichner.
WICHTIG — Truthfulness-Guardrail:
Deine Ausgabe ist ein ENTWURF. Formuliere NIEMALS Behauptungen über bereits erfolgte Umsetzung.
Verwende NICHT: "ist compliant", "erfüllt vollständig", "wurde geprüft", "wurde umgesetzt",
"ist auditiert", "vollständig implementiert", "nachweislich konform".
Verwende stattdessen: "soll umsetzen", "ist vorgesehen", "muss implementiert werden".
Antworte NUR mit validem JSON. Bei mehreren Controls antworte mit einem JSON-Array."""
STRUCTURE_SYSTEM_PROMPT = """Du bist ein Security-Compliance-Experte. Strukturiere den gegebenen Text
als praxisorientiertes Security Control. Erstelle eine verständliche, umsetzbare Formulierung.
WICHTIG — Truthfulness-Guardrail:
Deine Ausgabe ist ein ENTWURF. Formuliere NIEMALS Behauptungen über bereits erfolgte Umsetzung.
Verwende NICHT: "ist compliant", "erfüllt vollständig", "wurde geprüft", "wurde umgesetzt".
Verwende stattdessen: "soll umsetzen", "ist vorgesehen", "muss implementiert werden".
Antworte NUR mit validem JSON. Bei mehreren Controls antworte mit einem JSON-Array."""
# Shared applicability prompt block — appended to all generation prompts (v3)
@@ -1877,7 +1893,38 @@ Kategorien: {CATEGORY_LIST_STR}"""
)
self.db.commit()
row = result.fetchone()
return str(row[0]) if row else None
control_uuid = str(row[0]) if row else None
# Anti-Fake-Evidence: Record LLM audit trail for generated control
if control_uuid:
try:
self.db.execute(
text("""
INSERT INTO compliance_llm_generation_audit (
entity_type, entity_id, generation_mode,
truth_status, may_be_used_as_evidence,
llm_model, llm_provider,
input_summary, output_summary
) VALUES (
'control', :entity_id, 'auto_generation',
'generated', FALSE,
:llm_model, :llm_provider,
:input_summary, :output_summary
)
"""),
{
"entity_id": control_uuid,
"llm_model": ANTHROPIC_MODEL if ANTHROPIC_API_KEY else OLLAMA_MODEL,
"llm_provider": "anthropic" if ANTHROPIC_API_KEY else "ollama",
"input_summary": f"Control generation for {control.control_id}",
"output_summary": control.title[:500] if control.title else None,
},
)
self.db.commit()
except Exception as audit_err:
logger.warning("Failed to create LLM audit record: %s", audit_err)
return control_uuid
except Exception as e:
logger.error("Failed to store control %s: %s", control.control_id, e)
self.db.rollback()

View File

@@ -0,0 +1,152 @@
"""
Control Status Transition State Machine.
Enforces that controls cannot be set to "pass" without sufficient evidence.
Prevents Compliance-Theater where controls claim compliance without real proof.
Transition rules:
planned → in_progress : always allowed
in_progress → pass : requires ≥1 evidence with confidence ≥ E2 and
truth_status in (uploaded, observed, validated_internal)
in_progress → partial : requires ≥1 evidence (any level)
pass → fail : always allowed (degradation)
any → n/a : requires status_justification
any → planned : always allowed (reset)
"""
from typing import List, Optional, Tuple
from ..db.models import EvidenceDB
# Confidence level ordering for comparisons
CONFIDENCE_ORDER = {"E0": 0, "E1": 1, "E2": 2, "E3": 3, "E4": 4}
# Truth statuses that qualify as "real" evidence for pass transitions
VALID_TRUTH_STATUSES = {"uploaded", "observed", "validated_internal", "accepted_by_auditor", "provided_to_auditor"}
def validate_transition(
current_status: str,
new_status: str,
evidence_list: Optional[List[EvidenceDB]] = None,
status_justification: Optional[str] = None,
bypass_for_auto_updater: bool = False,
) -> Tuple[bool, List[str]]:
"""
Validate whether a control status transition is allowed.
Args:
current_status: Current control status value (e.g. "planned", "pass")
new_status: Requested new status
evidence_list: List of EvidenceDB objects linked to this control
status_justification: Text justification (required for n/a transitions)
bypass_for_auto_updater: If True, skip evidence checks (used by CI/CD auto-updater
which creates evidence atomically with status change)
Returns:
Tuple of (allowed: bool, violations: list[str])
"""
violations: List[str] = []
evidence_list = evidence_list or []
# Same status → no-op, always allowed
if current_status == new_status:
return True, []
# Reset to planned is always allowed
if new_status == "planned":
return True, []
# n/a requires justification
if new_status == "n/a":
if not status_justification or not status_justification.strip():
violations.append("Transition to 'n/a' requires a status_justification explaining why this control is not applicable.")
return len(violations) == 0, violations
# Degradation: pass → fail is always allowed
if current_status == "pass" and new_status == "fail":
return True, []
# planned → in_progress: always allowed
if current_status == "planned" and new_status == "in_progress":
return True, []
# in_progress → partial: needs at least 1 evidence
if new_status == "partial":
if not bypass_for_auto_updater and len(evidence_list) == 0:
violations.append("Transition to 'partial' requires at least 1 evidence record.")
return len(violations) == 0, violations
# in_progress → pass: strict requirements
if new_status == "pass":
if bypass_for_auto_updater:
return True, []
if len(evidence_list) == 0:
violations.append("Transition to 'pass' requires at least 1 evidence record.")
return False, violations
# Check for at least one qualifying evidence
has_qualifying = False
for e in evidence_list:
conf = getattr(e, "confidence_level", None)
truth = getattr(e, "truth_status", None)
# Get string values from enum or string
conf_val = conf.value if hasattr(conf, "value") else str(conf) if conf else "E1"
truth_val = truth.value if hasattr(truth, "value") else str(truth) if truth else "uploaded"
if CONFIDENCE_ORDER.get(conf_val, 1) >= CONFIDENCE_ORDER["E2"] and truth_val in VALID_TRUTH_STATUSES:
has_qualifying = True
break
if not has_qualifying:
violations.append(
"Transition to 'pass' requires at least 1 evidence with confidence >= E2 "
"and truth_status in (uploaded, observed, validated_internal, accepted_by_auditor). "
"Current evidence does not meet this threshold."
)
return len(violations) == 0, violations
# in_progress → fail: always allowed
if new_status == "fail":
return True, []
# Any other transition from planned/fail to pass requires going through in_progress
if current_status in ("planned", "fail") and new_status == "pass":
if bypass_for_auto_updater:
return True, []
violations.append(
f"Direct transition from '{current_status}' to 'pass' is not allowed. "
f"Move to 'in_progress' first, then to 'pass' with qualifying evidence."
)
return False, violations
# Default: allow other transitions (e.g. fail → partial, partial → pass)
# For partial → pass, apply the same evidence checks
if current_status == "partial" and new_status == "pass":
if bypass_for_auto_updater:
return True, []
has_qualifying = False
for e in evidence_list:
conf = getattr(e, "confidence_level", None)
truth = getattr(e, "truth_status", None)
conf_val = conf.value if hasattr(conf, "value") else str(conf) if conf else "E1"
truth_val = truth.value if hasattr(truth, "value") else str(truth) if truth else "uploaded"
if CONFIDENCE_ORDER.get(conf_val, 1) >= CONFIDENCE_ORDER["E2"] and truth_val in VALID_TRUTH_STATUSES:
has_qualifying = True
break
if not has_qualifying:
violations.append(
"Transition from 'partial' to 'pass' requires at least 1 evidence with confidence >= E2 "
"and truth_status in (uploaded, observed, validated_internal, accepted_by_auditor)."
)
return len(violations) == 0, violations
# All other transitions allowed
return True, []

View File

@@ -52,64 +52,18 @@ ANTHROPIC_API_URL = "https://api.anthropic.com/v1"
# Tier 2: Empfehlung (recommendation) — weaker normative signals
# Tier 3: Kann (optional/permissive) — permissive signals
# Nothing is rejected — everything is classified.
#
# Patterns are defined in normative_patterns.py and imported here
# with local aliases for backward compatibility.
_PFLICHT_SIGNALS = [
# Deutsche modale Pflichtformulierungen
r"\bmüssen\b", r"\bmuss\b", r"\bhat\s+sicherzustellen\b",
r"\bhaben\s+sicherzustellen\b", r"\bsind\s+verpflichtet\b",
r"\bist\s+verpflichtet\b",
# "ist zu prüfen", "sind zu dokumentieren" (direkt)
r"\bist\s+zu\s+\w+en\b", r"\bsind\s+zu\s+\w+en\b",
r"\bhat\s+zu\s+\w+en\b", r"\bhaben\s+zu\s+\w+en\b",
# "ist festzustellen", "sind vorzunehmen" (Compound-Verben, eingebettetes zu)
r"\bist\s+\w+zu\w+en\b", r"\bsind\s+\w+zu\w+en\b",
# "ist zusätzlich zu prüfen", "sind regelmäßig zu überwachen" (Adverb dazwischen)
r"\bist\s+\w+\s+zu\s+\w+en\b", r"\bsind\s+\w+\s+zu\s+\w+en\b",
r"\bhat\s+\w+\s+zu\s+\w+en\b", r"\bhaben\s+\w+\s+zu\s+\w+en\b",
# Englische Pflicht-Signale
r"\bshall\b", r"\bmust\b", r"\brequired\b",
# Compound-Infinitive (Gerundivum): mitzuteilen, anzuwenden, bereitzustellen
r"\b\w+zuteilen\b", r"\b\w+zuwenden\b", r"\b\w+zustellen\b", r"\b\w+zulegen\b",
r"\b\w+zunehmen\b", r"\b\w+zuführen\b", r"\b\w+zuhalten\b", r"\b\w+zusetzen\b",
r"\b\w+zuweisen\b", r"\b\w+zuordnen\b", r"\b\w+zufügen\b", r"\b\w+zugeben\b",
# Breites Pattern: "ist ... [bis 80 Zeichen] ... zu + Infinitiv"
r"\bist\b.{1,80}\bzu\s+\w+en\b", r"\bsind\b.{1,80}\bzu\s+\w+en\b",
]
_PFLICHT_RE = re.compile("|".join(_PFLICHT_SIGNALS), re.IGNORECASE)
_EMPFEHLUNG_SIGNALS = [
# Modale Verben (schwaecher als "muss")
r"\bsoll\b", r"\bsollen\b", r"\bsollte\b", r"\bsollten\b",
r"\bgewährleisten\b", r"\bsicherstellen\b",
# Englische Empfehlungs-Signale
r"\bshould\b", r"\bensure\b", r"\brecommend\w*\b",
# Haeufige normative Infinitive (ohne Hilfsverb, als Empfehlung)
r"\bnachweisen\b", r"\beinhalten\b", r"\bunterlassen\b", r"\bwahren\b",
r"\bdokumentieren\b", r"\bimplementieren\b", r"\büberprüfen\b", r"\büberwachen\b",
# Pruefanweisungen als normative Aussage
r"\bprüfen,\s+ob\b", r"\bkontrollieren,\s+ob\b",
]
_EMPFEHLUNG_RE = re.compile("|".join(_EMPFEHLUNG_SIGNALS), re.IGNORECASE)
_KANN_SIGNALS = [
r"\bkann\b", r"\bkönnen\b", r"\bdarf\b", r"\bdürfen\b",
r"\bmay\b", r"\boptional\b",
]
_KANN_RE = re.compile("|".join(_KANN_SIGNALS), re.IGNORECASE)
# Union of all normative signals (for backward-compatible has_normative_signal flag)
_NORMATIVE_RE = re.compile(
"|".join(_PFLICHT_SIGNALS + _EMPFEHLUNG_SIGNALS + _KANN_SIGNALS),
re.IGNORECASE,
from .normative_patterns import (
PFLICHT_RE as _PFLICHT_RE,
EMPFEHLUNG_RE as _EMPFEHLUNG_RE,
KANN_RE as _KANN_RE,
NORMATIVE_RE as _NORMATIVE_RE,
RATIONALE_RE as _RATIONALE_RE,
)
_RATIONALE_SIGNALS = [
r"\bda\s+", r"\bweil\b", r"\bgrund\b", r"\berwägung",
r"\bbecause\b", r"\breason\b", r"\brationale\b",
r"\bkönnen\s+.*\s+verursachen\b", r"\bführt\s+zu\b",
]
_RATIONALE_RE = re.compile("|".join(_RATIONALE_SIGNALS), re.IGNORECASE)
_TEST_SIGNALS = [
r"\btesten\b", r"\btest\b", r"\bprüfung\b", r"\bprüfen\b",
r"\bgetestet\b", r"\bwirksamkeit\b", r"\baudit\b",

View File

@@ -0,0 +1,59 @@
"""Shared normative language patterns for assertion classification.
Extracted from decomposition_pass.py for reuse in the assertion engine.
"""
import re
_PFLICHT_SIGNALS = [
r"\bmüssen\b", r"\bmuss\b", r"\bhat\s+sicherzustellen\b",
r"\bhaben\s+sicherzustellen\b", r"\bsind\s+verpflichtet\b",
r"\bist\s+verpflichtet\b",
r"\bist\s+zu\s+\w+en\b", r"\bsind\s+zu\s+\w+en\b",
r"\bhat\s+zu\s+\w+en\b", r"\bhaben\s+zu\s+\w+en\b",
r"\bist\s+\w+zu\w+en\b", r"\bsind\s+\w+zu\w+en\b",
r"\bist\s+\w+\s+zu\s+\w+en\b", r"\bsind\s+\w+\s+zu\s+\w+en\b",
r"\bhat\s+\w+\s+zu\s+\w+en\b", r"\bhaben\s+\w+\s+zu\s+\w+en\b",
r"\bshall\b", r"\bmust\b", r"\brequired\b",
r"\b\w+zuteilen\b", r"\b\w+zuwenden\b", r"\b\w+zustellen\b", r"\b\w+zulegen\b",
r"\b\w+zunehmen\b", r"\b\w+zuführen\b", r"\b\w+zuhalten\b", r"\b\w+zusetzen\b",
r"\b\w+zuweisen\b", r"\b\w+zuordnen\b", r"\b\w+zufügen\b", r"\b\w+zugeben\b",
r"\bist\b.{1,80}\bzu\s+\w+en\b", r"\bsind\b.{1,80}\bzu\s+\w+en\b",
]
PFLICHT_RE = re.compile("|".join(_PFLICHT_SIGNALS), re.IGNORECASE)
_EMPFEHLUNG_SIGNALS = [
r"\bsoll\b", r"\bsollen\b", r"\bsollte\b", r"\bsollten\b",
r"\bgewährleisten\b", r"\bsicherstellen\b",
r"\bshould\b", r"\bensure\b", r"\brecommend\w*\b",
r"\bnachweisen\b", r"\beinhalten\b", r"\bunterlassen\b", r"\bwahren\b",
r"\bdokumentieren\b", r"\bimplementieren\b", r"\büberprüfen\b", r"\büberwachen\b",
r"\bprüfen,\s+ob\b", r"\bkontrollieren,\s+ob\b",
]
EMPFEHLUNG_RE = re.compile("|".join(_EMPFEHLUNG_SIGNALS), re.IGNORECASE)
_KANN_SIGNALS = [
r"\bkann\b", r"\bkönnen\b", r"\bdarf\b", r"\bdürfen\b",
r"\bmay\b", r"\boptional\b",
]
KANN_RE = re.compile("|".join(_KANN_SIGNALS), re.IGNORECASE)
NORMATIVE_RE = re.compile(
"|".join(_PFLICHT_SIGNALS + _EMPFEHLUNG_SIGNALS + _KANN_SIGNALS),
re.IGNORECASE,
)
_RATIONALE_SIGNALS = [
r"\bda\s+", r"\bweil\b", r"\bgrund\b", r"\berwägung",
r"\bbecause\b", r"\breason\b", r"\brationale\b",
r"\bkönnen\s+.*\s+verursachen\b", r"\bführt\s+zu\b",
]
RATIONALE_RE = re.compile("|".join(_RATIONALE_SIGNALS), re.IGNORECASE)
# Evidence-related keywords (for fact detection)
_EVIDENCE_KEYWORDS = [
r"\bnachweis\b", r"\bzertifikat\b", r"\baudit.report\b",
r"\bprotokoll\b", r"\bdokumentation\b", r"\bbericht\b",
r"\bcertificate\b", r"\bevidence\b", r"\bproof\b",
]
EVIDENCE_RE = re.compile("|".join(_EVIDENCE_KEYWORDS), re.IGNORECASE)