feat: Anti-Fake-Evidence System (Phase 1-4b)
Implement full evidence integrity pipeline to prevent compliance theater: - Confidence levels (E0-E4), truth status tracking, assertion engine - Four-Eyes approval workflow, audit trail, reject endpoint - Evidence distribution dashboard, LLM audit routes - Traceability matrix (backend endpoint + Compliance Hub UI tab) - Anti-fake badges, control status machine, normative patterns - 2 migrations, 4 test suites, MkDocs documentation Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -61,6 +61,8 @@ _ROUTER_MODULES = [
|
||||
"evidence_check_routes",
|
||||
"vvt_library_routes",
|
||||
"tom_mapping_routes",
|
||||
"llm_audit_routes",
|
||||
"assertion_routes",
|
||||
]
|
||||
|
||||
_loaded_count = 0
|
||||
|
||||
227
backend-compliance/compliance/api/assertion_routes.py
Normal file
227
backend-compliance/compliance/api/assertion_routes.py
Normal file
@@ -0,0 +1,227 @@
|
||||
"""
|
||||
API routes for Assertion Engine (Anti-Fake-Evidence Phase 2).
|
||||
|
||||
Endpoints:
|
||||
- /assertions: CRUD for assertions
|
||||
- /assertions/extract: Automatic extraction from entity text
|
||||
- /assertions/summary: Stats (total assertions, facts, unverified)
|
||||
"""
|
||||
|
||||
import logging
|
||||
from datetime import datetime
|
||||
from typing import Optional
|
||||
|
||||
from fastapi import APIRouter, Depends, HTTPException, Query
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from classroom_engine.database import get_db
|
||||
|
||||
from ..db.models import AssertionDB
|
||||
from ..services.assertion_engine import extract_assertions
|
||||
from .schemas import (
|
||||
AssertionCreate,
|
||||
AssertionUpdate,
|
||||
AssertionResponse,
|
||||
AssertionListResponse,
|
||||
AssertionSummaryResponse,
|
||||
AssertionExtractRequest,
|
||||
)
|
||||
from .audit_trail_utils import log_audit_trail, generate_id
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
router = APIRouter(tags=["compliance-assertions"])
|
||||
|
||||
|
||||
def _build_assertion_response(a: AssertionDB) -> AssertionResponse:
|
||||
return AssertionResponse(
|
||||
id=a.id,
|
||||
tenant_id=a.tenant_id,
|
||||
entity_type=a.entity_type,
|
||||
entity_id=a.entity_id,
|
||||
sentence_text=a.sentence_text,
|
||||
sentence_index=a.sentence_index,
|
||||
assertion_type=a.assertion_type,
|
||||
evidence_ids=a.evidence_ids or [],
|
||||
confidence=a.confidence or 0.0,
|
||||
normative_tier=a.normative_tier,
|
||||
verified_by=a.verified_by,
|
||||
verified_at=a.verified_at,
|
||||
created_at=a.created_at,
|
||||
updated_at=a.updated_at,
|
||||
)
|
||||
|
||||
|
||||
@router.post("/assertions", response_model=AssertionResponse)
|
||||
async def create_assertion(
|
||||
data: AssertionCreate,
|
||||
tenant_id: Optional[str] = Query(None),
|
||||
db: Session = Depends(get_db),
|
||||
):
|
||||
"""Create a single assertion manually."""
|
||||
a = AssertionDB(
|
||||
id=generate_id(),
|
||||
tenant_id=tenant_id,
|
||||
entity_type=data.entity_type,
|
||||
entity_id=data.entity_id,
|
||||
sentence_text=data.sentence_text,
|
||||
assertion_type=data.assertion_type or "assertion",
|
||||
evidence_ids=data.evidence_ids or [],
|
||||
normative_tier=data.normative_tier,
|
||||
)
|
||||
db.add(a)
|
||||
db.commit()
|
||||
db.refresh(a)
|
||||
return _build_assertion_response(a)
|
||||
|
||||
|
||||
@router.get("/assertions", response_model=AssertionListResponse)
|
||||
async def list_assertions(
|
||||
entity_type: Optional[str] = Query(None),
|
||||
entity_id: Optional[str] = Query(None),
|
||||
assertion_type: Optional[str] = Query(None),
|
||||
tenant_id: Optional[str] = Query(None),
|
||||
limit: int = Query(100, ge=1, le=500),
|
||||
db: Session = Depends(get_db),
|
||||
):
|
||||
"""List assertions with optional filters."""
|
||||
query = db.query(AssertionDB)
|
||||
if entity_type:
|
||||
query = query.filter(AssertionDB.entity_type == entity_type)
|
||||
if entity_id:
|
||||
query = query.filter(AssertionDB.entity_id == entity_id)
|
||||
if assertion_type:
|
||||
query = query.filter(AssertionDB.assertion_type == assertion_type)
|
||||
if tenant_id:
|
||||
query = query.filter(AssertionDB.tenant_id == tenant_id)
|
||||
|
||||
total = query.count()
|
||||
records = query.order_by(AssertionDB.sentence_index.asc()).limit(limit).all()
|
||||
|
||||
return AssertionListResponse(
|
||||
assertions=[_build_assertion_response(a) for a in records],
|
||||
total=total,
|
||||
)
|
||||
|
||||
|
||||
@router.get("/assertions/summary", response_model=AssertionSummaryResponse)
|
||||
async def assertion_summary(
|
||||
tenant_id: Optional[str] = Query(None),
|
||||
entity_type: Optional[str] = Query(None),
|
||||
entity_id: Optional[str] = Query(None),
|
||||
db: Session = Depends(get_db),
|
||||
):
|
||||
"""Summary stats: total assertions, facts, rationale, unverified."""
|
||||
query = db.query(AssertionDB)
|
||||
if tenant_id:
|
||||
query = query.filter(AssertionDB.tenant_id == tenant_id)
|
||||
if entity_type:
|
||||
query = query.filter(AssertionDB.entity_type == entity_type)
|
||||
if entity_id:
|
||||
query = query.filter(AssertionDB.entity_id == entity_id)
|
||||
|
||||
all_records = query.all()
|
||||
|
||||
total = len(all_records)
|
||||
facts = sum(1 for a in all_records if a.assertion_type == "fact")
|
||||
rationale = sum(1 for a in all_records if a.assertion_type == "rationale")
|
||||
unverified = sum(1 for a in all_records if a.assertion_type == "assertion" and not a.verified_by)
|
||||
|
||||
return AssertionSummaryResponse(
|
||||
total_assertions=total,
|
||||
total_facts=facts,
|
||||
total_rationale=rationale,
|
||||
unverified_count=unverified,
|
||||
)
|
||||
|
||||
|
||||
@router.get("/assertions/{assertion_id}", response_model=AssertionResponse)
|
||||
async def get_assertion(
|
||||
assertion_id: str,
|
||||
db: Session = Depends(get_db),
|
||||
):
|
||||
"""Get a single assertion by ID."""
|
||||
a = db.query(AssertionDB).filter(AssertionDB.id == assertion_id).first()
|
||||
if not a:
|
||||
raise HTTPException(status_code=404, detail=f"Assertion {assertion_id} not found")
|
||||
return _build_assertion_response(a)
|
||||
|
||||
|
||||
@router.put("/assertions/{assertion_id}", response_model=AssertionResponse)
|
||||
async def update_assertion(
|
||||
assertion_id: str,
|
||||
data: AssertionUpdate,
|
||||
db: Session = Depends(get_db),
|
||||
):
|
||||
"""Update an assertion (e.g. link evidence, change type)."""
|
||||
a = db.query(AssertionDB).filter(AssertionDB.id == assertion_id).first()
|
||||
if not a:
|
||||
raise HTTPException(status_code=404, detail=f"Assertion {assertion_id} not found")
|
||||
|
||||
update_fields = data.model_dump(exclude_unset=True)
|
||||
for key, value in update_fields.items():
|
||||
setattr(a, key, value)
|
||||
a.updated_at = datetime.utcnow()
|
||||
db.commit()
|
||||
db.refresh(a)
|
||||
return _build_assertion_response(a)
|
||||
|
||||
|
||||
@router.post("/assertions/{assertion_id}/verify", response_model=AssertionResponse)
|
||||
async def verify_assertion(
|
||||
assertion_id: str,
|
||||
verified_by: str = Query(...),
|
||||
db: Session = Depends(get_db),
|
||||
):
|
||||
"""Mark an assertion as verified fact."""
|
||||
a = db.query(AssertionDB).filter(AssertionDB.id == assertion_id).first()
|
||||
if not a:
|
||||
raise HTTPException(status_code=404, detail=f"Assertion {assertion_id} not found")
|
||||
|
||||
a.assertion_type = "fact"
|
||||
a.verified_by = verified_by
|
||||
a.verified_at = datetime.utcnow()
|
||||
a.updated_at = datetime.utcnow()
|
||||
db.commit()
|
||||
db.refresh(a)
|
||||
return _build_assertion_response(a)
|
||||
|
||||
|
||||
@router.post("/assertions/extract", response_model=AssertionListResponse)
|
||||
async def extract_assertions_endpoint(
|
||||
data: AssertionExtractRequest,
|
||||
tenant_id: Optional[str] = Query(None),
|
||||
db: Session = Depends(get_db),
|
||||
):
|
||||
"""Extract assertions from free text and persist them."""
|
||||
extracted = extract_assertions(
|
||||
text=data.text,
|
||||
entity_type=data.entity_type,
|
||||
entity_id=data.entity_id,
|
||||
tenant_id=tenant_id,
|
||||
)
|
||||
|
||||
created = []
|
||||
for item in extracted:
|
||||
a = AssertionDB(
|
||||
id=generate_id(),
|
||||
tenant_id=item["tenant_id"],
|
||||
entity_type=item["entity_type"],
|
||||
entity_id=item["entity_id"],
|
||||
sentence_text=item["sentence_text"],
|
||||
sentence_index=item["sentence_index"],
|
||||
assertion_type=item["assertion_type"],
|
||||
evidence_ids=item["evidence_ids"],
|
||||
normative_tier=item.get("normative_tier"),
|
||||
confidence=item.get("confidence", 0.0),
|
||||
)
|
||||
db.add(a)
|
||||
created.append(a)
|
||||
|
||||
db.commit()
|
||||
for a in created:
|
||||
db.refresh(a)
|
||||
|
||||
return AssertionListResponse(
|
||||
assertions=[_build_assertion_response(a) for a in created],
|
||||
total=len(created),
|
||||
)
|
||||
53
backend-compliance/compliance/api/audit_trail_utils.py
Normal file
53
backend-compliance/compliance/api/audit_trail_utils.py
Normal file
@@ -0,0 +1,53 @@
|
||||
"""Shared audit trail utilities.
|
||||
|
||||
Extracted from isms_routes.py for reuse across evidence, control,
|
||||
and assertion routes.
|
||||
"""
|
||||
|
||||
import hashlib
|
||||
import uuid
|
||||
from datetime import datetime
|
||||
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from ..db.models import AuditTrailDB
|
||||
|
||||
|
||||
def generate_id() -> str:
|
||||
"""Generate a UUID string."""
|
||||
return str(uuid.uuid4())
|
||||
|
||||
|
||||
def create_signature(data: str) -> str:
|
||||
"""Create SHA-256 signature."""
|
||||
return hashlib.sha256(data.encode()).hexdigest()
|
||||
|
||||
|
||||
def log_audit_trail(
|
||||
db: Session,
|
||||
entity_type: str,
|
||||
entity_id: str,
|
||||
entity_name: str,
|
||||
action: str,
|
||||
performed_by: str,
|
||||
field_changed: str = None,
|
||||
old_value: str = None,
|
||||
new_value: str = None,
|
||||
change_summary: str = None,
|
||||
):
|
||||
"""Log an entry to the audit trail."""
|
||||
trail = AuditTrailDB(
|
||||
id=generate_id(),
|
||||
entity_type=entity_type,
|
||||
entity_id=entity_id,
|
||||
entity_name=entity_name,
|
||||
action=action,
|
||||
field_changed=field_changed,
|
||||
old_value=old_value,
|
||||
new_value=new_value,
|
||||
change_summary=change_summary,
|
||||
performed_by=performed_by,
|
||||
performed_at=datetime.utcnow(),
|
||||
checksum=create_signature(f"{entity_type}|{entity_id}|{action}|{performed_by}"),
|
||||
)
|
||||
db.add(trail)
|
||||
@@ -32,14 +32,21 @@ from ..db import (
|
||||
ControlRepository,
|
||||
EvidenceRepository,
|
||||
RiskRepository,
|
||||
AssertionDB,
|
||||
)
|
||||
from .schemas import (
|
||||
DashboardResponse,
|
||||
MultiDimensionalScore,
|
||||
ExecutiveDashboardResponse,
|
||||
TrendDataPoint,
|
||||
RiskSummary,
|
||||
DeadlineItem,
|
||||
TeamWorkloadItem,
|
||||
TraceabilityAssertion,
|
||||
TraceabilityEvidence,
|
||||
TraceabilityCoverage,
|
||||
TraceabilityControl,
|
||||
TraceabilityMatrixResponse,
|
||||
)
|
||||
from .tenant_utils import get_tenant_id as _get_tenant_id
|
||||
from .db_utils import row_to_dict as _row_to_dict
|
||||
@@ -95,6 +102,14 @@ async def get_dashboard(db: Session = Depends(get_db)):
|
||||
# or compute from by_status dict
|
||||
score = ctrl_stats.get("compliance_score", 0.0)
|
||||
|
||||
# Multi-dimensional score (Anti-Fake-Evidence)
|
||||
try:
|
||||
ms = ctrl_repo.get_multi_dimensional_score()
|
||||
multi_score = MultiDimensionalScore(**ms)
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to compute multi-dimensional score: {e}")
|
||||
multi_score = None
|
||||
|
||||
return DashboardResponse(
|
||||
compliance_score=round(score, 1),
|
||||
total_regulations=len(regulations),
|
||||
@@ -107,6 +122,7 @@ async def get_dashboard(db: Session = Depends(get_db)):
|
||||
total_risks=len(risks),
|
||||
risks_by_level=risks_by_level,
|
||||
recent_activity=[],
|
||||
multi_score=multi_score,
|
||||
)
|
||||
|
||||
|
||||
@@ -125,11 +141,18 @@ async def get_compliance_score(db: Session = Depends(get_db)):
|
||||
else:
|
||||
score = 0
|
||||
|
||||
# Multi-dimensional score (Anti-Fake-Evidence)
|
||||
try:
|
||||
multi_score = ctrl_repo.get_multi_dimensional_score()
|
||||
except Exception:
|
||||
multi_score = None
|
||||
|
||||
return {
|
||||
"score": round(score, 1),
|
||||
"total_controls": total,
|
||||
"passing_controls": passing,
|
||||
"partial_controls": partial,
|
||||
"multi_score": multi_score,
|
||||
}
|
||||
|
||||
|
||||
@@ -597,6 +620,158 @@ async def get_score_history(
|
||||
}
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# Evidence Distribution (Anti-Fake-Evidence Phase 3)
|
||||
# ============================================================================
|
||||
|
||||
@router.get("/dashboard/evidence-distribution")
|
||||
async def get_evidence_distribution(
|
||||
db: Session = Depends(get_db),
|
||||
tenant_id: str = Depends(_get_tenant_id),
|
||||
):
|
||||
"""Evidence counts by confidence level and four-eyes status."""
|
||||
evidence_repo = EvidenceRepository(db)
|
||||
all_evidence = evidence_repo.get_all()
|
||||
|
||||
by_confidence = {"E0": 0, "E1": 0, "E2": 0, "E3": 0, "E4": 0}
|
||||
four_eyes_pending = 0
|
||||
|
||||
for e in all_evidence:
|
||||
level = e.confidence_level.value if e.confidence_level else "E1"
|
||||
if level in by_confidence:
|
||||
by_confidence[level] += 1
|
||||
if e.requires_four_eyes and e.approval_status not in ("approved", "rejected"):
|
||||
four_eyes_pending += 1
|
||||
|
||||
return {
|
||||
"by_confidence": by_confidence,
|
||||
"four_eyes_pending": four_eyes_pending,
|
||||
"total": len(all_evidence),
|
||||
}
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# Traceability Matrix (Anti-Fake-Evidence Phase 4a)
|
||||
# ============================================================================
|
||||
|
||||
@router.get("/dashboard/traceability-matrix", response_model=TraceabilityMatrixResponse)
|
||||
async def get_traceability_matrix(
|
||||
db: Session = Depends(get_db),
|
||||
tenant_id: str = Depends(_get_tenant_id),
|
||||
):
|
||||
"""
|
||||
Full traceability chain: Control → Evidence → Assertions.
|
||||
|
||||
Loads each entity set once, builds in-memory indices, and nests
|
||||
the result so the frontend can render a matrix view.
|
||||
"""
|
||||
ctrl_repo = ControlRepository(db)
|
||||
evidence_repo = EvidenceRepository(db)
|
||||
|
||||
# 1. Load all three entity sets
|
||||
controls = ctrl_repo.get_all()
|
||||
all_evidence = evidence_repo.get_all()
|
||||
all_assertions = db.query(AssertionDB).filter(
|
||||
AssertionDB.entity_type == "evidence",
|
||||
).all()
|
||||
|
||||
# 2. Index assertions by evidence_id (entity_id)
|
||||
assertions_by_evidence: Dict[str, list] = {}
|
||||
for a in all_assertions:
|
||||
assertions_by_evidence.setdefault(a.entity_id, []).append(a)
|
||||
|
||||
# 3. Index evidence by control_id
|
||||
evidence_by_control: Dict[str, list] = {}
|
||||
for e in all_evidence:
|
||||
evidence_by_control.setdefault(str(e.control_id), []).append(e)
|
||||
|
||||
# 4. Build nested response
|
||||
result_controls: list = []
|
||||
total_controls = 0
|
||||
covered_controls = 0
|
||||
fully_verified = 0
|
||||
|
||||
for ctrl in controls:
|
||||
total_controls += 1
|
||||
ctrl_id = str(ctrl.id)
|
||||
ctrl_evidence = evidence_by_control.get(ctrl_id, [])
|
||||
|
||||
nested_evidence: list = []
|
||||
has_evidence = len(ctrl_evidence) > 0
|
||||
has_assertions = False
|
||||
all_verified = True
|
||||
min_conf: Optional[str] = None
|
||||
conf_order = {"E0": 0, "E1": 1, "E2": 2, "E3": 3, "E4": 4}
|
||||
|
||||
for e in ctrl_evidence:
|
||||
ev_id = str(e.id)
|
||||
ev_assertions = assertions_by_evidence.get(ev_id, [])
|
||||
|
||||
nested_assertions = [
|
||||
TraceabilityAssertion(
|
||||
id=str(a.id),
|
||||
sentence_text=a.sentence_text,
|
||||
assertion_type=a.assertion_type or "assertion",
|
||||
confidence=a.confidence or 0.0,
|
||||
verified=a.verified_by is not None,
|
||||
)
|
||||
for a in ev_assertions
|
||||
]
|
||||
|
||||
if nested_assertions:
|
||||
has_assertions = True
|
||||
for na in nested_assertions:
|
||||
if not na.verified:
|
||||
all_verified = False
|
||||
|
||||
conf = e.confidence_level.value if e.confidence_level else "E1"
|
||||
if min_conf is None or conf_order.get(conf, 1) < conf_order.get(min_conf, 1):
|
||||
min_conf = conf
|
||||
|
||||
nested_evidence.append(TraceabilityEvidence(
|
||||
id=ev_id,
|
||||
title=e.title,
|
||||
evidence_type=e.evidence_type,
|
||||
confidence_level=conf,
|
||||
status=e.status.value if e.status else "valid",
|
||||
assertions=nested_assertions,
|
||||
))
|
||||
|
||||
if not has_assertions:
|
||||
all_verified = False
|
||||
|
||||
if has_evidence:
|
||||
covered_controls += 1
|
||||
if has_evidence and has_assertions and all_verified:
|
||||
fully_verified += 1
|
||||
|
||||
coverage = TraceabilityCoverage(
|
||||
has_evidence=has_evidence,
|
||||
has_assertions=has_assertions,
|
||||
all_assertions_verified=all_verified,
|
||||
min_confidence_level=min_conf,
|
||||
)
|
||||
|
||||
result_controls.append(TraceabilityControl(
|
||||
id=ctrl_id,
|
||||
control_id=ctrl.control_id,
|
||||
title=ctrl.title,
|
||||
status=ctrl.status.value if ctrl.status else "planned",
|
||||
domain=ctrl.domain.value if ctrl.domain else "unknown",
|
||||
evidence=nested_evidence,
|
||||
coverage=coverage,
|
||||
))
|
||||
|
||||
summary = {
|
||||
"total_controls": total_controls,
|
||||
"covered_controls": covered_controls,
|
||||
"fully_verified": fully_verified,
|
||||
"uncovered_controls": total_controls - covered_controls,
|
||||
}
|
||||
|
||||
return TraceabilityMatrixResponse(controls=result_controls, summary=summary)
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# Reports
|
||||
# ============================================================================
|
||||
|
||||
@@ -26,17 +26,102 @@ from ..db import (
|
||||
ControlRepository,
|
||||
EvidenceRepository,
|
||||
EvidenceStatusEnum,
|
||||
EvidenceConfidenceEnum,
|
||||
EvidenceTruthStatusEnum,
|
||||
)
|
||||
from ..db.models import EvidenceDB, ControlDB
|
||||
from ..db.models import EvidenceDB, ControlDB, AuditTrailDB
|
||||
from ..services.auto_risk_updater import AutoRiskUpdater
|
||||
from .schemas import (
|
||||
EvidenceCreate, EvidenceResponse, EvidenceListResponse,
|
||||
EvidenceRejectRequest,
|
||||
)
|
||||
from .audit_trail_utils import log_audit_trail
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
router = APIRouter(tags=["compliance-evidence"])
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# Anti-Fake-Evidence: Four-Eyes Domain Check
|
||||
# ============================================================================
|
||||
|
||||
FOUR_EYES_DOMAINS = {"gov", "priv"}
|
||||
|
||||
|
||||
def _requires_four_eyes(control_domain: str) -> bool:
|
||||
"""Controls in governance/privacy domains require two independent reviewers."""
|
||||
return control_domain in FOUR_EYES_DOMAINS
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# Anti-Fake-Evidence: Auto-Classification Helpers
|
||||
# ============================================================================
|
||||
|
||||
def _classify_confidence(source: Optional[str], evidence_type: Optional[str] = None, artifact_hash: Optional[str] = None) -> EvidenceConfidenceEnum:
|
||||
"""Classify evidence confidence level based on source and metadata."""
|
||||
if source == "ci_pipeline":
|
||||
return EvidenceConfidenceEnum.E3
|
||||
if source == "api" and artifact_hash:
|
||||
return EvidenceConfidenceEnum.E3
|
||||
if source == "api":
|
||||
return EvidenceConfidenceEnum.E3
|
||||
if source in ("manual", "upload"):
|
||||
return EvidenceConfidenceEnum.E1
|
||||
if source == "generated":
|
||||
return EvidenceConfidenceEnum.E0
|
||||
# Default for unknown sources
|
||||
return EvidenceConfidenceEnum.E1
|
||||
|
||||
|
||||
def _classify_truth_status(source: Optional[str]) -> EvidenceTruthStatusEnum:
|
||||
"""Classify evidence truth status based on source."""
|
||||
if source == "ci_pipeline":
|
||||
return EvidenceTruthStatusEnum.OBSERVED
|
||||
if source in ("manual", "upload"):
|
||||
return EvidenceTruthStatusEnum.UPLOADED
|
||||
if source == "generated":
|
||||
return EvidenceTruthStatusEnum.GENERATED
|
||||
if source == "api":
|
||||
return EvidenceTruthStatusEnum.OBSERVED
|
||||
return EvidenceTruthStatusEnum.UPLOADED
|
||||
|
||||
|
||||
def _build_evidence_response(e: EvidenceDB) -> EvidenceResponse:
|
||||
"""Build an EvidenceResponse from an EvidenceDB, including anti-fake fields."""
|
||||
return EvidenceResponse(
|
||||
id=e.id,
|
||||
control_id=e.control_id,
|
||||
evidence_type=e.evidence_type,
|
||||
title=e.title,
|
||||
description=e.description,
|
||||
artifact_path=e.artifact_path,
|
||||
artifact_url=e.artifact_url,
|
||||
artifact_hash=e.artifact_hash,
|
||||
file_size_bytes=e.file_size_bytes,
|
||||
mime_type=e.mime_type,
|
||||
valid_from=e.valid_from,
|
||||
valid_until=e.valid_until,
|
||||
status=e.status.value if e.status else None,
|
||||
source=e.source,
|
||||
ci_job_id=e.ci_job_id,
|
||||
uploaded_by=e.uploaded_by,
|
||||
collected_at=e.collected_at,
|
||||
created_at=e.created_at,
|
||||
confidence_level=e.confidence_level.value if e.confidence_level else None,
|
||||
truth_status=e.truth_status.value if e.truth_status else None,
|
||||
generation_mode=e.generation_mode,
|
||||
may_be_used_as_evidence=e.may_be_used_as_evidence,
|
||||
reviewed_by=e.reviewed_by,
|
||||
reviewed_at=e.reviewed_at,
|
||||
approval_status=e.approval_status,
|
||||
first_reviewer=e.first_reviewer,
|
||||
first_reviewed_at=e.first_reviewed_at,
|
||||
second_reviewer=e.second_reviewer,
|
||||
second_reviewed_at=e.second_reviewed_at,
|
||||
requires_four_eyes=e.requires_four_eyes,
|
||||
)
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# Evidence
|
||||
# ============================================================================
|
||||
@@ -80,29 +165,7 @@ async def list_evidence(
|
||||
offset = (page - 1) * limit
|
||||
evidence = evidence[offset:offset + limit]
|
||||
|
||||
results = [
|
||||
EvidenceResponse(
|
||||
id=e.id,
|
||||
control_id=e.control_id,
|
||||
evidence_type=e.evidence_type,
|
||||
title=e.title,
|
||||
description=e.description,
|
||||
artifact_path=e.artifact_path,
|
||||
artifact_url=e.artifact_url,
|
||||
artifact_hash=e.artifact_hash,
|
||||
file_size_bytes=e.file_size_bytes,
|
||||
mime_type=e.mime_type,
|
||||
valid_from=e.valid_from,
|
||||
valid_until=e.valid_until,
|
||||
status=e.status.value if e.status else None,
|
||||
source=e.source,
|
||||
ci_job_id=e.ci_job_id,
|
||||
uploaded_by=e.uploaded_by,
|
||||
collected_at=e.collected_at,
|
||||
created_at=e.created_at,
|
||||
)
|
||||
for e in evidence
|
||||
]
|
||||
results = [_build_evidence_response(e) for e in evidence]
|
||||
|
||||
return EvidenceListResponse(evidence=results, total=total)
|
||||
|
||||
@@ -121,6 +184,22 @@ async def create_evidence(
|
||||
if not control:
|
||||
raise HTTPException(status_code=404, detail=f"Control {evidence_data.control_id} not found")
|
||||
|
||||
source = evidence_data.source or "api"
|
||||
confidence = _classify_confidence(source, evidence_data.evidence_type)
|
||||
truth = _classify_truth_status(source)
|
||||
|
||||
# Allow explicit override from request
|
||||
if evidence_data.confidence_level:
|
||||
try:
|
||||
confidence = EvidenceConfidenceEnum(evidence_data.confidence_level)
|
||||
except ValueError:
|
||||
pass
|
||||
if evidence_data.truth_status:
|
||||
try:
|
||||
truth = EvidenceTruthStatusEnum(evidence_data.truth_status)
|
||||
except ValueError:
|
||||
pass
|
||||
|
||||
evidence = repo.create(
|
||||
control_id=control.id,
|
||||
evidence_type=evidence_data.evidence_type,
|
||||
@@ -129,31 +208,34 @@ async def create_evidence(
|
||||
artifact_url=evidence_data.artifact_url,
|
||||
valid_from=evidence_data.valid_from,
|
||||
valid_until=evidence_data.valid_until,
|
||||
source=evidence_data.source or "api",
|
||||
source=source,
|
||||
ci_job_id=evidence_data.ci_job_id,
|
||||
)
|
||||
|
||||
# Set anti-fake-evidence fields
|
||||
evidence.confidence_level = confidence
|
||||
evidence.truth_status = truth
|
||||
# Generated evidence should not be used as evidence by default
|
||||
if truth == EvidenceTruthStatusEnum.GENERATED:
|
||||
evidence.may_be_used_as_evidence = False
|
||||
|
||||
# Four-Eyes: check if the linked control's domain requires it
|
||||
control_domain = control.domain.value if control.domain else ""
|
||||
if _requires_four_eyes(control_domain):
|
||||
evidence.requires_four_eyes = True
|
||||
evidence.approval_status = "pending_first"
|
||||
|
||||
db.commit()
|
||||
|
||||
# Audit trail
|
||||
log_audit_trail(
|
||||
db, "evidence", evidence.id, evidence.title, "create",
|
||||
performed_by=evidence_data.source or "api",
|
||||
change_summary=f"Evidence created with confidence={confidence.value}, truth={truth.value}",
|
||||
)
|
||||
db.commit()
|
||||
|
||||
return EvidenceResponse(
|
||||
id=evidence.id,
|
||||
control_id=evidence.control_id,
|
||||
evidence_type=evidence.evidence_type,
|
||||
title=evidence.title,
|
||||
description=evidence.description,
|
||||
artifact_path=evidence.artifact_path,
|
||||
artifact_url=evidence.artifact_url,
|
||||
artifact_hash=evidence.artifact_hash,
|
||||
file_size_bytes=evidence.file_size_bytes,
|
||||
mime_type=evidence.mime_type,
|
||||
valid_from=evidence.valid_from,
|
||||
valid_until=evidence.valid_until,
|
||||
status=evidence.status.value if evidence.status else None,
|
||||
source=evidence.source,
|
||||
ci_job_id=evidence.ci_job_id,
|
||||
uploaded_by=evidence.uploaded_by,
|
||||
collected_at=evidence.collected_at,
|
||||
created_at=evidence.created_at,
|
||||
)
|
||||
return _build_evidence_response(evidence)
|
||||
|
||||
|
||||
@router.delete("/evidence/{evidence_id}")
|
||||
@@ -223,28 +305,20 @@ async def upload_evidence(
|
||||
mime_type=file.content_type,
|
||||
source="upload",
|
||||
)
|
||||
|
||||
# Upload evidence → E1 + uploaded
|
||||
evidence.confidence_level = EvidenceConfidenceEnum.E1
|
||||
evidence.truth_status = EvidenceTruthStatusEnum.UPLOADED
|
||||
|
||||
# Four-Eyes: check if the linked control's domain requires it
|
||||
control_domain = control.domain.value if control.domain else ""
|
||||
if _requires_four_eyes(control_domain):
|
||||
evidence.requires_four_eyes = True
|
||||
evidence.approval_status = "pending_first"
|
||||
|
||||
db.commit()
|
||||
|
||||
return EvidenceResponse(
|
||||
id=evidence.id,
|
||||
control_id=evidence.control_id,
|
||||
evidence_type=evidence.evidence_type,
|
||||
title=evidence.title,
|
||||
description=evidence.description,
|
||||
artifact_path=evidence.artifact_path,
|
||||
artifact_url=evidence.artifact_url,
|
||||
artifact_hash=evidence.artifact_hash,
|
||||
file_size_bytes=evidence.file_size_bytes,
|
||||
mime_type=evidence.mime_type,
|
||||
valid_from=evidence.valid_from,
|
||||
valid_until=evidence.valid_until,
|
||||
status=evidence.status.value if evidence.status else None,
|
||||
source=evidence.source,
|
||||
ci_job_id=evidence.ci_job_id,
|
||||
uploaded_by=evidence.uploaded_by,
|
||||
collected_at=evidence.collected_at,
|
||||
created_at=evidence.created_at,
|
||||
)
|
||||
return _build_evidence_response(evidence)
|
||||
|
||||
|
||||
# ============================================================================
|
||||
@@ -357,7 +431,7 @@ def _store_evidence(
|
||||
with open(file_path, "w") as f:
|
||||
json.dump(report_data or {}, f, indent=2)
|
||||
|
||||
# Create evidence record
|
||||
# Create evidence record with anti-fake-evidence classification
|
||||
evidence = EvidenceDB(
|
||||
id=str(uuid_module.uuid4()),
|
||||
control_id=control_db_id,
|
||||
@@ -373,6 +447,10 @@ def _store_evidence(
|
||||
valid_from=datetime.utcnow(),
|
||||
valid_until=datetime.utcnow() + timedelta(days=90),
|
||||
status=EvidenceStatusEnum(parsed["evidence_status"]),
|
||||
# CI pipeline evidence → E3 observed (system-observed, hash-verified)
|
||||
confidence_level=EvidenceConfidenceEnum.E3,
|
||||
truth_status=EvidenceTruthStatusEnum.OBSERVED,
|
||||
may_be_used_as_evidence=True,
|
||||
)
|
||||
db.add(evidence)
|
||||
db.commit()
|
||||
@@ -639,3 +717,169 @@ async def get_ci_evidence_status(
|
||||
"total_evidence": len(evidence_list),
|
||||
"controls": result,
|
||||
}
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# Evidence Review (Anti-Fake-Evidence)
|
||||
# ============================================================================
|
||||
|
||||
from pydantic import BaseModel as _BaseModel
|
||||
|
||||
class _EvidenceReviewRequest(_BaseModel):
|
||||
confidence_level: Optional[str] = None
|
||||
truth_status: Optional[str] = None
|
||||
reviewed_by: str
|
||||
|
||||
|
||||
@router.patch("/evidence/{evidence_id}/review", response_model=EvidenceResponse)
|
||||
async def review_evidence(
|
||||
evidence_id: str,
|
||||
review: _EvidenceReviewRequest,
|
||||
db: Session = Depends(get_db),
|
||||
):
|
||||
"""
|
||||
Review evidence: upgrade confidence level and/or change truth status.
|
||||
|
||||
For Four-Eyes evidence, the first reviewer sets first_reviewer and
|
||||
approval_status='first_approved'. A second (different) reviewer then
|
||||
sets second_reviewer and approval_status='approved'.
|
||||
"""
|
||||
evidence = db.query(EvidenceDB).filter(EvidenceDB.id == evidence_id).first()
|
||||
if not evidence:
|
||||
raise HTTPException(status_code=404, detail=f"Evidence {evidence_id} not found")
|
||||
|
||||
old_confidence = evidence.confidence_level.value if evidence.confidence_level else None
|
||||
old_truth = evidence.truth_status.value if evidence.truth_status else None
|
||||
|
||||
if review.confidence_level:
|
||||
try:
|
||||
evidence.confidence_level = EvidenceConfidenceEnum(review.confidence_level)
|
||||
except ValueError:
|
||||
raise HTTPException(status_code=400, detail=f"Invalid confidence_level: {review.confidence_level}")
|
||||
|
||||
if review.truth_status:
|
||||
try:
|
||||
evidence.truth_status = EvidenceTruthStatusEnum(review.truth_status)
|
||||
except ValueError:
|
||||
raise HTTPException(status_code=400, detail=f"Invalid truth_status: {review.truth_status}")
|
||||
|
||||
# Four-Eyes branching
|
||||
if evidence.requires_four_eyes:
|
||||
status = evidence.approval_status or "none"
|
||||
if status in ("none", "pending_first"):
|
||||
evidence.first_reviewer = review.reviewed_by
|
||||
evidence.first_reviewed_at = datetime.utcnow()
|
||||
evidence.approval_status = "first_approved"
|
||||
elif status == "first_approved":
|
||||
if review.reviewed_by == evidence.first_reviewer:
|
||||
raise HTTPException(
|
||||
status_code=400,
|
||||
detail="Four-Eyes: second reviewer must be different from first reviewer",
|
||||
)
|
||||
evidence.second_reviewer = review.reviewed_by
|
||||
evidence.second_reviewed_at = datetime.utcnow()
|
||||
evidence.approval_status = "approved"
|
||||
elif status == "approved":
|
||||
raise HTTPException(status_code=400, detail="Evidence already approved")
|
||||
elif status == "rejected":
|
||||
raise HTTPException(status_code=400, detail="Evidence was rejected — create new evidence instead")
|
||||
|
||||
evidence.reviewed_by = review.reviewed_by
|
||||
evidence.reviewed_at = datetime.utcnow()
|
||||
db.commit()
|
||||
|
||||
# Audit trail
|
||||
new_confidence = evidence.confidence_level.value if evidence.confidence_level else None
|
||||
if old_confidence != new_confidence:
|
||||
log_audit_trail(
|
||||
db, "evidence", evidence_id, evidence.title, "review",
|
||||
performed_by=review.reviewed_by,
|
||||
field_changed="confidence_level",
|
||||
old_value=old_confidence,
|
||||
new_value=new_confidence,
|
||||
)
|
||||
new_truth = evidence.truth_status.value if evidence.truth_status else None
|
||||
if old_truth != new_truth:
|
||||
log_audit_trail(
|
||||
db, "evidence", evidence_id, evidence.title, "review",
|
||||
performed_by=review.reviewed_by,
|
||||
field_changed="truth_status",
|
||||
old_value=old_truth,
|
||||
new_value=new_truth,
|
||||
)
|
||||
db.commit()
|
||||
|
||||
db.refresh(evidence)
|
||||
return _build_evidence_response(evidence)
|
||||
|
||||
|
||||
@router.patch("/evidence/{evidence_id}/reject", response_model=EvidenceResponse)
|
||||
async def reject_evidence(
|
||||
evidence_id: str,
|
||||
body: EvidenceRejectRequest,
|
||||
db: Session = Depends(get_db),
|
||||
):
|
||||
"""Reject evidence (sets approval_status='rejected')."""
|
||||
evidence = db.query(EvidenceDB).filter(EvidenceDB.id == evidence_id).first()
|
||||
if not evidence:
|
||||
raise HTTPException(status_code=404, detail=f"Evidence {evidence_id} not found")
|
||||
|
||||
evidence.approval_status = "rejected"
|
||||
evidence.reviewed_by = body.reviewed_by
|
||||
evidence.reviewed_at = datetime.utcnow()
|
||||
db.commit()
|
||||
|
||||
log_audit_trail(
|
||||
db, "evidence", evidence_id, evidence.title, "reject",
|
||||
performed_by=body.reviewed_by,
|
||||
change_summary=body.rejection_reason or "Evidence rejected",
|
||||
)
|
||||
db.commit()
|
||||
|
||||
db.refresh(evidence)
|
||||
return _build_evidence_response(evidence)
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# Audit Trail Query
|
||||
# ============================================================================
|
||||
|
||||
@router.get("/audit-trail")
|
||||
async def get_audit_trail(
|
||||
entity_type: Optional[str] = Query(None),
|
||||
entity_id: Optional[str] = Query(None),
|
||||
action: Optional[str] = Query(None),
|
||||
limit: int = Query(50, ge=1, le=200),
|
||||
db: Session = Depends(get_db),
|
||||
):
|
||||
"""Query audit trail entries for an entity."""
|
||||
query = db.query(AuditTrailDB)
|
||||
if entity_type:
|
||||
query = query.filter(AuditTrailDB.entity_type == entity_type)
|
||||
if entity_id:
|
||||
query = query.filter(AuditTrailDB.entity_id == entity_id)
|
||||
if action:
|
||||
query = query.filter(AuditTrailDB.action == action)
|
||||
|
||||
records = query.order_by(AuditTrailDB.performed_at.desc()).limit(limit).all()
|
||||
|
||||
return {
|
||||
"entries": [
|
||||
{
|
||||
"id": r.id,
|
||||
"entity_type": r.entity_type,
|
||||
"entity_id": r.entity_id,
|
||||
"entity_name": r.entity_name,
|
||||
"action": r.action,
|
||||
"field_changed": r.field_changed,
|
||||
"old_value": r.old_value,
|
||||
"new_value": r.new_value,
|
||||
"change_summary": r.change_summary,
|
||||
"performed_by": r.performed_by,
|
||||
"performed_at": r.performed_at.isoformat() if r.performed_at else None,
|
||||
"checksum": r.checksum,
|
||||
}
|
||||
for r in records
|
||||
],
|
||||
"total": len(records),
|
||||
}
|
||||
|
||||
@@ -73,39 +73,8 @@ def generate_id() -> str:
|
||||
return str(uuid.uuid4())
|
||||
|
||||
|
||||
def create_signature(data: str) -> str:
|
||||
"""Create SHA-256 signature."""
|
||||
return hashlib.sha256(data.encode()).hexdigest()
|
||||
|
||||
|
||||
def log_audit_trail(
|
||||
db: Session,
|
||||
entity_type: str,
|
||||
entity_id: str,
|
||||
entity_name: str,
|
||||
action: str,
|
||||
performed_by: str,
|
||||
field_changed: str = None,
|
||||
old_value: str = None,
|
||||
new_value: str = None,
|
||||
change_summary: str = None
|
||||
):
|
||||
"""Log an entry to the audit trail."""
|
||||
trail = AuditTrailDB(
|
||||
id=generate_id(),
|
||||
entity_type=entity_type,
|
||||
entity_id=entity_id,
|
||||
entity_name=entity_name,
|
||||
action=action,
|
||||
field_changed=field_changed,
|
||||
old_value=old_value,
|
||||
new_value=new_value,
|
||||
change_summary=change_summary,
|
||||
performed_by=performed_by,
|
||||
performed_at=datetime.utcnow(),
|
||||
checksum=create_signature(f"{entity_type}|{entity_id}|{action}|{performed_by}")
|
||||
)
|
||||
db.add(trail)
|
||||
# Shared audit trail utilities — canonical implementation in audit_trail_utils.py
|
||||
from .audit_trail_utils import log_audit_trail, create_signature # noqa: E402
|
||||
|
||||
|
||||
# =============================================================================
|
||||
|
||||
162
backend-compliance/compliance/api/llm_audit_routes.py
Normal file
162
backend-compliance/compliance/api/llm_audit_routes.py
Normal file
@@ -0,0 +1,162 @@
|
||||
"""
|
||||
FastAPI routes for LLM Generation Audit Trail.
|
||||
|
||||
Endpoints:
|
||||
- POST /llm-audit: Record an LLM generation event
|
||||
- GET /llm-audit: List audit records with filters
|
||||
"""
|
||||
|
||||
import logging
|
||||
import uuid as uuid_module
|
||||
from datetime import datetime
|
||||
from typing import Optional
|
||||
|
||||
from fastapi import APIRouter, Depends, Query
|
||||
from pydantic import BaseModel
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from classroom_engine.database import get_db
|
||||
from ..db.models import LLMGenerationAuditDB
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
router = APIRouter(tags=["compliance-llm-audit"])
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# Schemas
|
||||
# ============================================================================
|
||||
|
||||
class LLMAuditCreate(BaseModel):
|
||||
entity_type: str
|
||||
entity_id: Optional[str] = None
|
||||
generation_mode: str
|
||||
truth_status: str = "generated"
|
||||
may_be_used_as_evidence: bool = False
|
||||
llm_model: Optional[str] = None
|
||||
llm_provider: Optional[str] = None
|
||||
prompt_hash: Optional[str] = None
|
||||
input_summary: Optional[str] = None
|
||||
output_summary: Optional[str] = None
|
||||
metadata: Optional[dict] = None
|
||||
tenant_id: Optional[str] = None
|
||||
|
||||
|
||||
class LLMAuditResponse(BaseModel):
|
||||
id: str
|
||||
tenant_id: Optional[str] = None
|
||||
entity_type: str
|
||||
entity_id: Optional[str] = None
|
||||
generation_mode: str
|
||||
truth_status: str
|
||||
may_be_used_as_evidence: bool
|
||||
llm_model: Optional[str] = None
|
||||
llm_provider: Optional[str] = None
|
||||
prompt_hash: Optional[str] = None
|
||||
input_summary: Optional[str] = None
|
||||
output_summary: Optional[str] = None
|
||||
metadata: Optional[dict] = None
|
||||
created_at: datetime
|
||||
|
||||
class Config:
|
||||
from_attributes = True
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# Routes
|
||||
# ============================================================================
|
||||
|
||||
@router.post("/llm-audit", response_model=LLMAuditResponse)
|
||||
async def create_llm_audit(
|
||||
data: LLMAuditCreate,
|
||||
db: Session = Depends(get_db),
|
||||
):
|
||||
"""Record an LLM generation event for audit trail."""
|
||||
from ..db.models import EvidenceTruthStatusEnum
|
||||
|
||||
# Validate truth_status
|
||||
try:
|
||||
truth_enum = EvidenceTruthStatusEnum(data.truth_status)
|
||||
except ValueError:
|
||||
truth_enum = EvidenceTruthStatusEnum.GENERATED
|
||||
|
||||
record = LLMGenerationAuditDB(
|
||||
id=str(uuid_module.uuid4()),
|
||||
tenant_id=data.tenant_id,
|
||||
entity_type=data.entity_type,
|
||||
entity_id=data.entity_id,
|
||||
generation_mode=data.generation_mode,
|
||||
truth_status=truth_enum,
|
||||
may_be_used_as_evidence=data.may_be_used_as_evidence,
|
||||
llm_model=data.llm_model,
|
||||
llm_provider=data.llm_provider,
|
||||
prompt_hash=data.prompt_hash,
|
||||
input_summary=data.input_summary[:500] if data.input_summary else None,
|
||||
output_summary=data.output_summary[:500] if data.output_summary else None,
|
||||
extra_metadata=data.metadata or {},
|
||||
)
|
||||
db.add(record)
|
||||
db.commit()
|
||||
db.refresh(record)
|
||||
|
||||
return LLMAuditResponse(
|
||||
id=record.id,
|
||||
tenant_id=record.tenant_id,
|
||||
entity_type=record.entity_type,
|
||||
entity_id=record.entity_id,
|
||||
generation_mode=record.generation_mode,
|
||||
truth_status=record.truth_status.value if record.truth_status else "generated",
|
||||
may_be_used_as_evidence=record.may_be_used_as_evidence,
|
||||
llm_model=record.llm_model,
|
||||
llm_provider=record.llm_provider,
|
||||
prompt_hash=record.prompt_hash,
|
||||
input_summary=record.input_summary,
|
||||
output_summary=record.output_summary,
|
||||
metadata=record.extra_metadata,
|
||||
created_at=record.created_at,
|
||||
)
|
||||
|
||||
|
||||
@router.get("/llm-audit")
|
||||
async def list_llm_audit(
|
||||
entity_type: Optional[str] = Query(None),
|
||||
entity_id: Optional[str] = Query(None),
|
||||
page: int = Query(1, ge=1),
|
||||
limit: int = Query(50, ge=1, le=200),
|
||||
db: Session = Depends(get_db),
|
||||
):
|
||||
"""List LLM generation audit records with optional filters."""
|
||||
query = db.query(LLMGenerationAuditDB)
|
||||
|
||||
if entity_type:
|
||||
query = query.filter(LLMGenerationAuditDB.entity_type == entity_type)
|
||||
if entity_id:
|
||||
query = query.filter(LLMGenerationAuditDB.entity_id == entity_id)
|
||||
|
||||
total = query.count()
|
||||
offset = (page - 1) * limit
|
||||
records = query.order_by(LLMGenerationAuditDB.created_at.desc()).offset(offset).limit(limit).all()
|
||||
|
||||
return {
|
||||
"records": [
|
||||
LLMAuditResponse(
|
||||
id=r.id,
|
||||
tenant_id=r.tenant_id,
|
||||
entity_type=r.entity_type,
|
||||
entity_id=r.entity_id,
|
||||
generation_mode=r.generation_mode,
|
||||
truth_status=r.truth_status.value if r.truth_status else "generated",
|
||||
may_be_used_as_evidence=r.may_be_used_as_evidence,
|
||||
llm_model=r.llm_model,
|
||||
llm_provider=r.llm_provider,
|
||||
prompt_hash=r.prompt_hash,
|
||||
input_summary=r.input_summary,
|
||||
output_summary=r.output_summary,
|
||||
metadata=r.extra_metadata,
|
||||
created_at=r.created_at,
|
||||
)
|
||||
for r in records
|
||||
],
|
||||
"total": total,
|
||||
"page": page,
|
||||
"limit": limit,
|
||||
}
|
||||
@@ -25,6 +25,7 @@ from sqlalchemy.orm import Session
|
||||
|
||||
from classroom_engine.database import get_db
|
||||
|
||||
from .audit_trail_utils import log_audit_trail
|
||||
from ..db import (
|
||||
RegulationRepository,
|
||||
RequirementRepository,
|
||||
@@ -595,6 +596,7 @@ async def get_control(control_id: str, db: Session = Depends(get_db)):
|
||||
review_frequency_days=control.review_frequency_days,
|
||||
status=control.status.value if control.status else None,
|
||||
status_notes=control.status_notes,
|
||||
status_justification=control.status_justification,
|
||||
last_reviewed_at=control.last_reviewed_at,
|
||||
next_review_at=control.next_review_at,
|
||||
created_at=control.created_at,
|
||||
@@ -617,16 +619,52 @@ async def update_control(
|
||||
|
||||
update_data = update.model_dump(exclude_unset=True)
|
||||
|
||||
# Convert status string to enum
|
||||
# Convert status string to enum and validate transition
|
||||
if "status" in update_data:
|
||||
try:
|
||||
update_data["status"] = ControlStatusEnum(update_data["status"])
|
||||
new_status_enum = ControlStatusEnum(update_data["status"])
|
||||
except ValueError:
|
||||
raise HTTPException(status_code=400, detail=f"Invalid status: {update_data['status']}")
|
||||
|
||||
# Validate status transition (Anti-Fake-Evidence)
|
||||
from ..services.control_status_machine import validate_transition
|
||||
current_status = control.status.value if control.status else "planned"
|
||||
evidence_list = db.query(EvidenceDB).filter(EvidenceDB.control_id == control.id).all()
|
||||
allowed, violations = validate_transition(
|
||||
current_status=current_status,
|
||||
new_status=update_data["status"],
|
||||
evidence_list=evidence_list,
|
||||
status_justification=update_data.get("status_justification") or update_data.get("status_notes"),
|
||||
)
|
||||
if not allowed:
|
||||
raise HTTPException(
|
||||
status_code=409,
|
||||
detail={
|
||||
"error": "Status transition not allowed",
|
||||
"current_status": current_status,
|
||||
"requested_status": update_data["status"],
|
||||
"violations": violations,
|
||||
}
|
||||
)
|
||||
|
||||
update_data["status"] = new_status_enum
|
||||
|
||||
updated = repo.update(control.id, **update_data)
|
||||
db.commit()
|
||||
|
||||
# Audit trail for status changes
|
||||
new_status = updated.status.value if updated.status else None
|
||||
if "status" in update.model_dump(exclude_unset=True) and current_status != new_status:
|
||||
log_audit_trail(
|
||||
db, "control", control.id, updated.control_id or updated.title,
|
||||
"status_change",
|
||||
performed_by=update.owner or "system",
|
||||
field_changed="status",
|
||||
old_value=current_status,
|
||||
new_value=new_status,
|
||||
)
|
||||
db.commit()
|
||||
|
||||
return ControlResponse(
|
||||
id=updated.id,
|
||||
control_id=updated.control_id,
|
||||
@@ -645,6 +683,7 @@ async def update_control(
|
||||
review_frequency_days=updated.review_frequency_days,
|
||||
status=updated.status.value if updated.status else None,
|
||||
status_notes=updated.status_notes,
|
||||
status_justification=updated.status_justification,
|
||||
last_reviewed_at=updated.last_reviewed_at,
|
||||
next_review_at=updated.next_review_at,
|
||||
created_at=updated.created_at,
|
||||
@@ -690,6 +729,7 @@ async def review_control(
|
||||
review_frequency_days=updated.review_frequency_days,
|
||||
status=updated.status.value if updated.status else None,
|
||||
status_notes=updated.status_notes,
|
||||
status_justification=updated.status_justification,
|
||||
last_reviewed_at=updated.last_reviewed_at,
|
||||
next_review_at=updated.next_review_at,
|
||||
created_at=updated.created_at,
|
||||
|
||||
@@ -43,6 +43,7 @@ class ControlStatus(str):
|
||||
FAIL = "fail"
|
||||
NOT_APPLICABLE = "n/a"
|
||||
PLANNED = "planned"
|
||||
IN_PROGRESS = "in_progress"
|
||||
|
||||
|
||||
class RiskLevel(str):
|
||||
@@ -209,12 +210,14 @@ class ControlUpdate(BaseModel):
|
||||
owner: Optional[str] = None
|
||||
status: Optional[str] = None
|
||||
status_notes: Optional[str] = None
|
||||
status_justification: Optional[str] = None
|
||||
|
||||
|
||||
class ControlResponse(ControlBase):
|
||||
id: str
|
||||
status: str
|
||||
status_notes: Optional[str] = None
|
||||
status_justification: Optional[str] = None
|
||||
last_reviewed_at: Optional[datetime] = None
|
||||
next_review_at: Optional[datetime] = None
|
||||
created_at: datetime
|
||||
@@ -291,7 +294,8 @@ class EvidenceBase(BaseModel):
|
||||
|
||||
|
||||
class EvidenceCreate(EvidenceBase):
|
||||
pass
|
||||
confidence_level: Optional[str] = None
|
||||
truth_status: Optional[str] = None
|
||||
|
||||
|
||||
class EvidenceResponse(EvidenceBase):
|
||||
@@ -304,6 +308,20 @@ class EvidenceResponse(EvidenceBase):
|
||||
uploaded_by: Optional[str] = None
|
||||
collected_at: datetime
|
||||
created_at: datetime
|
||||
# Anti-Fake-Evidence fields
|
||||
confidence_level: Optional[str] = None
|
||||
truth_status: Optional[str] = None
|
||||
generation_mode: Optional[str] = None
|
||||
may_be_used_as_evidence: Optional[bool] = None
|
||||
reviewed_by: Optional[str] = None
|
||||
reviewed_at: Optional[datetime] = None
|
||||
# Anti-Fake-Evidence Phase 2: Four-Eyes
|
||||
approval_status: Optional[str] = None
|
||||
first_reviewer: Optional[str] = None
|
||||
first_reviewed_at: Optional[datetime] = None
|
||||
second_reviewer: Optional[str] = None
|
||||
second_reviewed_at: Optional[datetime] = None
|
||||
requires_four_eyes: Optional[bool] = None
|
||||
|
||||
class Config:
|
||||
from_attributes = True
|
||||
@@ -435,6 +453,25 @@ class AISystemListResponse(BaseModel):
|
||||
# Dashboard & Export Schemas
|
||||
# ============================================================================
|
||||
|
||||
class MultiDimensionalScore(BaseModel):
|
||||
"""Multi-dimensional compliance score (Anti-Fake-Evidence)."""
|
||||
requirement_coverage: float = 0.0 # % requirements with linked control
|
||||
evidence_strength: float = 0.0 # Weighted avg of evidence confidence (E0=0..E4=1)
|
||||
validation_quality: float = 0.0 # % evidence with truth_status >= validated_internal
|
||||
evidence_freshness: float = 0.0 # % evidence not expired + reviewed < 90 days
|
||||
control_effectiveness: float = 0.0 # Existing formula (pass + partial*0.5)
|
||||
overall_readiness: float = 0.0 # Weighted composite
|
||||
hard_blocks: List[str] = [] # Blocking issues preventing audit-readiness
|
||||
|
||||
|
||||
class StatusTransitionError(BaseModel):
|
||||
"""Error detail for forbidden control status transitions."""
|
||||
allowed: bool = False
|
||||
current_status: str
|
||||
requested_status: str
|
||||
violations: List[str] = []
|
||||
|
||||
|
||||
class DashboardResponse(BaseModel):
|
||||
compliance_score: float
|
||||
total_regulations: int
|
||||
@@ -447,6 +484,7 @@ class DashboardResponse(BaseModel):
|
||||
total_risks: int
|
||||
risks_by_level: Dict[str, int]
|
||||
recent_activity: List[Dict[str, Any]]
|
||||
multi_score: Optional[MultiDimensionalScore] = None
|
||||
|
||||
|
||||
class ExportRequest(BaseModel):
|
||||
@@ -1939,3 +1977,111 @@ class TOMStatsResponse(BaseModel):
|
||||
implemented: int = 0
|
||||
partial: int = 0
|
||||
not_implemented: int = 0
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# Assertion Schemas (Anti-Fake-Evidence Phase 2)
|
||||
# ============================================================================
|
||||
|
||||
class AssertionCreate(BaseModel):
|
||||
entity_type: str
|
||||
entity_id: str
|
||||
sentence_text: str
|
||||
assertion_type: Optional[str] = "assertion"
|
||||
evidence_ids: Optional[List[str]] = []
|
||||
normative_tier: Optional[str] = None
|
||||
|
||||
|
||||
class AssertionUpdate(BaseModel):
|
||||
sentence_text: Optional[str] = None
|
||||
assertion_type: Optional[str] = None
|
||||
evidence_ids: Optional[List[str]] = None
|
||||
normative_tier: Optional[str] = None
|
||||
confidence: Optional[float] = None
|
||||
|
||||
|
||||
class AssertionResponse(BaseModel):
|
||||
id: str
|
||||
tenant_id: Optional[str] = None
|
||||
entity_type: str
|
||||
entity_id: str
|
||||
sentence_text: str
|
||||
sentence_index: int = 0
|
||||
assertion_type: str = "assertion"
|
||||
evidence_ids: Optional[List[str]] = []
|
||||
confidence: float = 0.0
|
||||
normative_tier: Optional[str] = None
|
||||
verified_by: Optional[str] = None
|
||||
verified_at: Optional[datetime] = None
|
||||
created_at: Optional[datetime] = None
|
||||
updated_at: Optional[datetime] = None
|
||||
|
||||
class Config:
|
||||
from_attributes = True
|
||||
|
||||
|
||||
class AssertionListResponse(BaseModel):
|
||||
assertions: List[AssertionResponse]
|
||||
total: int
|
||||
|
||||
|
||||
class AssertionSummaryResponse(BaseModel):
|
||||
total_assertions: int = 0
|
||||
total_facts: int = 0
|
||||
total_rationale: int = 0
|
||||
unverified_count: int = 0
|
||||
|
||||
|
||||
class AssertionExtractRequest(BaseModel):
|
||||
entity_type: str
|
||||
entity_id: str
|
||||
text: str
|
||||
|
||||
|
||||
class EvidenceRejectRequest(BaseModel):
|
||||
reviewed_by: str
|
||||
rejection_reason: Optional[str] = None
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# Traceability Matrix (Anti-Fake-Evidence Phase 4a)
|
||||
# ============================================================================
|
||||
|
||||
class TraceabilityAssertion(BaseModel):
|
||||
"""Single assertion linked to an evidence item."""
|
||||
id: str
|
||||
sentence_text: str
|
||||
assertion_type: str = "assertion"
|
||||
confidence: float = 0.0
|
||||
verified: bool = False
|
||||
|
||||
class TraceabilityEvidence(BaseModel):
|
||||
"""Evidence item with nested assertions."""
|
||||
id: str
|
||||
title: str
|
||||
evidence_type: str
|
||||
confidence_level: str = "E1"
|
||||
status: str = "valid"
|
||||
assertions: List[TraceabilityAssertion] = []
|
||||
|
||||
class TraceabilityCoverage(BaseModel):
|
||||
"""Coverage flags for a single control."""
|
||||
has_evidence: bool = False
|
||||
has_assertions: bool = False
|
||||
all_assertions_verified: bool = False
|
||||
min_confidence_level: Optional[str] = None
|
||||
|
||||
class TraceabilityControl(BaseModel):
|
||||
"""Control with nested evidence and coverage info."""
|
||||
id: str
|
||||
control_id: str
|
||||
title: str
|
||||
status: str = "planned"
|
||||
domain: str = "unknown"
|
||||
evidence: List[TraceabilityEvidence] = []
|
||||
coverage: TraceabilityCoverage = TraceabilityCoverage()
|
||||
|
||||
class TraceabilityMatrixResponse(BaseModel):
|
||||
"""Full traceability matrix: Controls → Evidence → Assertions."""
|
||||
controls: List[TraceabilityControl]
|
||||
summary: Dict[str, int]
|
||||
|
||||
@@ -8,12 +8,16 @@ from .models import (
|
||||
EvidenceDB,
|
||||
RiskDB,
|
||||
AuditExportDB,
|
||||
LLMGenerationAuditDB,
|
||||
AssertionDB,
|
||||
RegulationTypeEnum,
|
||||
ControlTypeEnum,
|
||||
ControlDomainEnum,
|
||||
RiskLevelEnum,
|
||||
EvidenceStatusEnum,
|
||||
ControlStatusEnum,
|
||||
EvidenceConfidenceEnum,
|
||||
EvidenceTruthStatusEnum,
|
||||
)
|
||||
from .repository import (
|
||||
RegulationRepository,
|
||||
@@ -33,6 +37,8 @@ __all__ = [
|
||||
"EvidenceDB",
|
||||
"RiskDB",
|
||||
"AuditExportDB",
|
||||
"LLMGenerationAuditDB",
|
||||
"AssertionDB",
|
||||
# Enums
|
||||
"RegulationTypeEnum",
|
||||
"ControlTypeEnum",
|
||||
@@ -40,6 +46,8 @@ __all__ = [
|
||||
"RiskLevelEnum",
|
||||
"EvidenceStatusEnum",
|
||||
"ControlStatusEnum",
|
||||
"EvidenceConfidenceEnum",
|
||||
"EvidenceTruthStatusEnum",
|
||||
# Repositories
|
||||
"RegulationRepository",
|
||||
"RequirementRepository",
|
||||
|
||||
@@ -65,6 +65,7 @@ class ControlStatusEnum(str, enum.Enum):
|
||||
FAIL = "fail" # Not passing
|
||||
NOT_APPLICABLE = "n/a" # Not applicable
|
||||
PLANNED = "planned" # Planned for implementation
|
||||
IN_PROGRESS = "in_progress" # Implementation in progress
|
||||
|
||||
|
||||
class RiskLevelEnum(str, enum.Enum):
|
||||
@@ -83,6 +84,26 @@ class EvidenceStatusEnum(str, enum.Enum):
|
||||
FAILED = "failed" # Failed validation
|
||||
|
||||
|
||||
class EvidenceConfidenceEnum(str, enum.Enum):
|
||||
"""Confidence level of evidence (Anti-Fake-Evidence)."""
|
||||
E0 = "E0" # Generated / no real evidence (LLM output, placeholder)
|
||||
E1 = "E1" # Uploaded but unreviewed (manual upload, no hash, no reviewer)
|
||||
E2 = "E2" # Reviewed internally (human reviewed, hash verified)
|
||||
E3 = "E3" # Observed by system (CI/CD pipeline, API with hash)
|
||||
E4 = "E4" # Validated by external auditor
|
||||
|
||||
|
||||
class EvidenceTruthStatusEnum(str, enum.Enum):
|
||||
"""Truth status lifecycle for evidence (Anti-Fake-Evidence)."""
|
||||
GENERATED = "generated"
|
||||
UPLOADED = "uploaded"
|
||||
OBSERVED = "observed"
|
||||
VALIDATED_INTERNAL = "validated_internal"
|
||||
REJECTED = "rejected"
|
||||
PROVIDED_TO_AUDITOR = "provided_to_auditor"
|
||||
ACCEPTED_BY_AUDITOR = "accepted_by_auditor"
|
||||
|
||||
|
||||
class ExportStatusEnum(str, enum.Enum):
|
||||
"""Status of audit export."""
|
||||
PENDING = "pending"
|
||||
@@ -239,6 +260,7 @@ class ControlDB(Base):
|
||||
# Status
|
||||
status = Column(Enum(ControlStatusEnum), default=ControlStatusEnum.PLANNED)
|
||||
status_notes = Column(Text)
|
||||
status_justification = Column(Text) # Required for n/a transitions
|
||||
|
||||
# Ownership & Review
|
||||
owner = Column(String(100)) # Responsible person/team
|
||||
@@ -321,6 +343,22 @@ class EvidenceDB(Base):
|
||||
ci_job_id = Column(String(100)) # CI/CD job reference
|
||||
uploaded_by = Column(String(100)) # User who uploaded
|
||||
|
||||
# Anti-Fake-Evidence: Confidence & Truth tracking
|
||||
confidence_level = Column(Enum(EvidenceConfidenceEnum), default=EvidenceConfidenceEnum.E1)
|
||||
truth_status = Column(Enum(EvidenceTruthStatusEnum), default=EvidenceTruthStatusEnum.UPLOADED)
|
||||
generation_mode = Column(String(100)) # e.g. "draft_assistance", "auto_generation"
|
||||
may_be_used_as_evidence = Column(Boolean, default=True)
|
||||
reviewed_by = Column(String(200))
|
||||
reviewed_at = Column(DateTime)
|
||||
|
||||
# Anti-Fake-Evidence Phase 2: Four-Eyes review
|
||||
approval_status = Column(String(30), default="none")
|
||||
first_reviewer = Column(String(200))
|
||||
first_reviewed_at = Column(DateTime)
|
||||
second_reviewer = Column(String(200))
|
||||
second_reviewed_at = Column(DateTime)
|
||||
requires_four_eyes = Column(Boolean, default=False)
|
||||
|
||||
# Timestamps
|
||||
collected_at = Column(DateTime, default=datetime.utcnow)
|
||||
created_at = Column(DateTime, default=datetime.utcnow)
|
||||
@@ -332,6 +370,7 @@ class EvidenceDB(Base):
|
||||
__table_args__ = (
|
||||
Index('ix_evidence_control_type', 'control_id', 'evidence_type'),
|
||||
Index('ix_evidence_status', 'status'),
|
||||
Index('ix_evidence_approval_status', 'approval_status'),
|
||||
)
|
||||
|
||||
def __repr__(self):
|
||||
@@ -1464,3 +1503,77 @@ class ISMSReadinessCheckDB(Base):
|
||||
|
||||
def __repr__(self):
|
||||
return f"<ISMSReadiness {self.check_date}: {self.overall_status}>"
|
||||
|
||||
|
||||
class LLMGenerationAuditDB(Base):
|
||||
"""
|
||||
Audit trail for LLM-generated content.
|
||||
|
||||
Every piece of content generated by an LLM is recorded here with its
|
||||
truth_status and may_be_used_as_evidence flag, ensuring transparency
|
||||
about what is real evidence vs. generated assistance.
|
||||
"""
|
||||
__tablename__ = 'compliance_llm_generation_audit'
|
||||
|
||||
id = Column(String(36), primary_key=True, default=lambda: str(uuid.uuid4()))
|
||||
tenant_id = Column(String(36), index=True)
|
||||
|
||||
entity_type = Column(String(50), nullable=False) # 'evidence', 'control', 'document'
|
||||
entity_id = Column(String(36)) # FK to generated entity
|
||||
generation_mode = Column(String(100), nullable=False) # 'draft_assistance', 'auto_generation'
|
||||
truth_status = Column(Enum(EvidenceTruthStatusEnum), nullable=False, default=EvidenceTruthStatusEnum.GENERATED)
|
||||
may_be_used_as_evidence = Column(Boolean, nullable=False, default=False)
|
||||
|
||||
llm_model = Column(String(100))
|
||||
llm_provider = Column(String(50)) # 'ollama', 'anthropic'
|
||||
prompt_hash = Column(String(64)) # SHA-256 of prompt
|
||||
input_summary = Column(Text)
|
||||
output_summary = Column(Text)
|
||||
extra_metadata = Column("metadata", JSON, default=dict)
|
||||
|
||||
created_at = Column(DateTime, default=datetime.utcnow)
|
||||
updated_at = Column(DateTime, default=datetime.utcnow, onupdate=datetime.utcnow)
|
||||
|
||||
__table_args__ = (
|
||||
Index('ix_llm_audit_entity', 'entity_type', 'entity_id'),
|
||||
)
|
||||
|
||||
def __repr__(self):
|
||||
return f"<LLMGenerationAudit {self.entity_type}:{self.entity_id} mode={self.generation_mode}>"
|
||||
|
||||
|
||||
class AssertionDB(Base):
|
||||
"""
|
||||
Assertion tracking — separates claims from verified facts.
|
||||
|
||||
Each sentence from a control/evidence/document is stored here with its
|
||||
classification (assertion vs. fact vs. rationale) and optional evidence linkage.
|
||||
"""
|
||||
__tablename__ = 'compliance_assertions'
|
||||
|
||||
id = Column(String(36), primary_key=True, default=lambda: str(uuid.uuid4()))
|
||||
tenant_id = Column(String(36), index=True)
|
||||
|
||||
entity_type = Column(String(50), nullable=False) # 'control', 'evidence', 'document', 'obligation'
|
||||
entity_id = Column(String(36), nullable=False)
|
||||
sentence_text = Column(Text, nullable=False)
|
||||
sentence_index = Column(Integer, nullable=False, default=0)
|
||||
|
||||
assertion_type = Column(String(20), nullable=False, default='assertion') # 'assertion' | 'fact' | 'rationale'
|
||||
evidence_ids = Column(JSON, default=list)
|
||||
confidence = Column(Float, default=0.0)
|
||||
normative_tier = Column(String(20)) # 'pflicht' | 'empfehlung' | 'kann'
|
||||
|
||||
verified_by = Column(String(200))
|
||||
verified_at = Column(DateTime)
|
||||
|
||||
created_at = Column(DateTime, default=datetime.utcnow)
|
||||
updated_at = Column(DateTime, default=datetime.utcnow, onupdate=datetime.utcnow)
|
||||
|
||||
__table_args__ = (
|
||||
Index('ix_assertion_entity', 'entity_type', 'entity_id'),
|
||||
Index('ix_assertion_type', 'assertion_type'),
|
||||
)
|
||||
|
||||
def __repr__(self):
|
||||
return f"<Assertion {self.assertion_type}: {self.sentence_text[:50]}>"
|
||||
|
||||
@@ -487,6 +487,137 @@ class ControlRepository:
|
||||
"compliance_score": round(score, 1),
|
||||
}
|
||||
|
||||
def get_multi_dimensional_score(self) -> Dict[str, Any]:
|
||||
"""
|
||||
Calculate multi-dimensional compliance score (Anti-Fake-Evidence).
|
||||
|
||||
Returns 6 dimensions + hard_blocks + overall_readiness.
|
||||
"""
|
||||
from .models import (
|
||||
EvidenceDB, RequirementDB, ControlMappingDB,
|
||||
EvidenceConfidenceEnum, EvidenceTruthStatusEnum,
|
||||
)
|
||||
|
||||
# Weight map for confidence levels
|
||||
conf_weights = {"E0": 0.0, "E1": 0.25, "E2": 0.5, "E3": 0.75, "E4": 1.0}
|
||||
validated_statuses = {"validated_internal", "accepted_by_auditor", "provided_to_auditor"}
|
||||
|
||||
controls = self.get_all()
|
||||
total_controls = len(controls)
|
||||
|
||||
if total_controls == 0:
|
||||
return {
|
||||
"requirement_coverage": 0.0,
|
||||
"evidence_strength": 0.0,
|
||||
"validation_quality": 0.0,
|
||||
"evidence_freshness": 0.0,
|
||||
"control_effectiveness": 0.0,
|
||||
"overall_readiness": 0.0,
|
||||
"hard_blocks": ["Keine Controls vorhanden"],
|
||||
}
|
||||
|
||||
# 1. requirement_coverage: % requirements linked to at least one control
|
||||
total_reqs = self.db.query(func.count(RequirementDB.id)).scalar() or 0
|
||||
linked_reqs = (
|
||||
self.db.query(func.count(func.distinct(ControlMappingDB.requirement_id)))
|
||||
.scalar() or 0
|
||||
)
|
||||
requirement_coverage = (linked_reqs / total_reqs * 100) if total_reqs > 0 else 0.0
|
||||
|
||||
# 2. evidence_strength: weighted average of evidence confidence
|
||||
all_evidence = self.db.query(EvidenceDB).all()
|
||||
if all_evidence:
|
||||
total_weight = 0.0
|
||||
for e in all_evidence:
|
||||
conf_val = e.confidence_level.value if e.confidence_level else "E1"
|
||||
total_weight += conf_weights.get(conf_val, 0.25)
|
||||
evidence_strength = (total_weight / len(all_evidence)) * 100
|
||||
else:
|
||||
evidence_strength = 0.0
|
||||
|
||||
# 3. validation_quality: % evidence with truth_status >= validated_internal
|
||||
if all_evidence:
|
||||
validated_count = sum(
|
||||
1 for e in all_evidence
|
||||
if (e.truth_status.value if e.truth_status else "uploaded") in validated_statuses
|
||||
)
|
||||
validation_quality = (validated_count / len(all_evidence)) * 100
|
||||
else:
|
||||
validation_quality = 0.0
|
||||
|
||||
# 4. evidence_freshness: % evidence not expired and reviewed < 90 days
|
||||
now = datetime.now()
|
||||
if all_evidence:
|
||||
fresh_count = 0
|
||||
for e in all_evidence:
|
||||
is_expired = e.valid_until and e.valid_until < now
|
||||
is_stale = e.reviewed_at and (now - e.reviewed_at).days > 90 if hasattr(e, 'reviewed_at') and e.reviewed_at else False
|
||||
if not is_expired and not is_stale:
|
||||
fresh_count += 1
|
||||
evidence_freshness = (fresh_count / len(all_evidence)) * 100
|
||||
else:
|
||||
evidence_freshness = 0.0
|
||||
|
||||
# 5. control_effectiveness: existing formula
|
||||
passed = sum(1 for c in controls if c.status == ControlStatusEnum.PASS)
|
||||
partial = sum(1 for c in controls if c.status == ControlStatusEnum.PARTIAL)
|
||||
control_effectiveness = ((passed + partial * 0.5) / total_controls) * 100
|
||||
|
||||
# 6. overall_readiness: weighted composite
|
||||
overall_readiness = (
|
||||
0.20 * requirement_coverage +
|
||||
0.25 * evidence_strength +
|
||||
0.20 * validation_quality +
|
||||
0.10 * evidence_freshness +
|
||||
0.25 * control_effectiveness
|
||||
)
|
||||
|
||||
# Hard blocks
|
||||
hard_blocks = []
|
||||
|
||||
# Critical controls without any evidence
|
||||
critical_no_evidence = []
|
||||
for c in controls:
|
||||
if c.status in (ControlStatusEnum.PASS, ControlStatusEnum.PARTIAL):
|
||||
evidence_for_ctrl = [e for e in all_evidence if e.control_id == c.id]
|
||||
if not evidence_for_ctrl:
|
||||
critical_no_evidence.append(c.control_id)
|
||||
if critical_no_evidence:
|
||||
hard_blocks.append(
|
||||
f"{len(critical_no_evidence)} Controls mit Status pass/partial haben keine Evidence: "
|
||||
f"{', '.join(critical_no_evidence[:5])}"
|
||||
)
|
||||
|
||||
# Controls with only E0/E1 evidence claiming pass
|
||||
weak_evidence_pass = []
|
||||
for c in controls:
|
||||
if c.status == ControlStatusEnum.PASS:
|
||||
evidence_for_ctrl = [e for e in all_evidence if e.control_id == c.id]
|
||||
if evidence_for_ctrl:
|
||||
max_conf = max(
|
||||
conf_weights.get(
|
||||
e.confidence_level.value if e.confidence_level else "E1", 0.25
|
||||
)
|
||||
for e in evidence_for_ctrl
|
||||
)
|
||||
if max_conf < 0.5: # Only E0 or E1
|
||||
weak_evidence_pass.append(c.control_id)
|
||||
if weak_evidence_pass:
|
||||
hard_blocks.append(
|
||||
f"{len(weak_evidence_pass)} Controls auf 'pass' haben nur E0/E1-Evidence: "
|
||||
f"{', '.join(weak_evidence_pass[:5])}"
|
||||
)
|
||||
|
||||
return {
|
||||
"requirement_coverage": round(requirement_coverage, 1),
|
||||
"evidence_strength": round(evidence_strength, 1),
|
||||
"validation_quality": round(validation_quality, 1),
|
||||
"evidence_freshness": round(evidence_freshness, 1),
|
||||
"control_effectiveness": round(control_effectiveness, 1),
|
||||
"overall_readiness": round(overall_readiness, 1),
|
||||
"hard_blocks": hard_blocks,
|
||||
}
|
||||
|
||||
|
||||
class ControlMappingRepository:
|
||||
"""Repository for requirement-control mappings."""
|
||||
|
||||
80
backend-compliance/compliance/services/assertion_engine.py
Normal file
80
backend-compliance/compliance/services/assertion_engine.py
Normal file
@@ -0,0 +1,80 @@
|
||||
"""Assertion Engine — splits text into sentences and classifies each.
|
||||
|
||||
Each sentence is tagged as:
|
||||
- assertion: normative statement (pflicht / empfehlung / kann)
|
||||
- fact: references concrete evidence artifacts
|
||||
- rationale: explains why something is required
|
||||
"""
|
||||
|
||||
import re
|
||||
from typing import Optional
|
||||
|
||||
from .normative_patterns import (
|
||||
PFLICHT_RE, EMPFEHLUNG_RE, KANN_RE, RATIONALE_RE, EVIDENCE_RE,
|
||||
)
|
||||
|
||||
# Sentence splitter: period/excl/question followed by space+uppercase, or newlines
|
||||
_SENTENCE_SPLIT = re.compile(r'(?<=[.!?])\s+(?=[A-ZÄÖÜ])|(?:\n\s*\n)')
|
||||
|
||||
|
||||
def extract_assertions(
|
||||
text: str,
|
||||
entity_type: str,
|
||||
entity_id: str,
|
||||
tenant_id: Optional[str] = None,
|
||||
) -> list[dict]:
|
||||
"""Split *text* into sentences and classify each one.
|
||||
|
||||
Returns a list of dicts ready for AssertionDB creation.
|
||||
"""
|
||||
if not text or not text.strip():
|
||||
return []
|
||||
|
||||
sentences = _SENTENCE_SPLIT.split(text.strip())
|
||||
results: list[dict] = []
|
||||
|
||||
for idx, raw in enumerate(sentences):
|
||||
sentence = raw.strip()
|
||||
if not sentence or len(sentence) < 5:
|
||||
continue
|
||||
|
||||
assertion_type, normative_tier = _classify_sentence(sentence)
|
||||
|
||||
results.append({
|
||||
"tenant_id": tenant_id,
|
||||
"entity_type": entity_type,
|
||||
"entity_id": entity_id,
|
||||
"sentence_text": sentence,
|
||||
"sentence_index": idx,
|
||||
"assertion_type": assertion_type,
|
||||
"normative_tier": normative_tier,
|
||||
"evidence_ids": [],
|
||||
"confidence": 0.0,
|
||||
})
|
||||
|
||||
return results
|
||||
|
||||
|
||||
def _classify_sentence(sentence: str) -> tuple[str, Optional[str]]:
|
||||
"""Return (assertion_type, normative_tier) for a single sentence."""
|
||||
|
||||
# 1. Check for evidence/fact keywords first
|
||||
if EVIDENCE_RE.search(sentence):
|
||||
return ("fact", None)
|
||||
|
||||
# 2. Check for rationale
|
||||
normative_count = len(PFLICHT_RE.findall(sentence)) + len(EMPFEHLUNG_RE.findall(sentence)) + len(KANN_RE.findall(sentence))
|
||||
rationale_count = len(RATIONALE_RE.findall(sentence))
|
||||
if rationale_count > 0 and rationale_count >= normative_count:
|
||||
return ("rationale", None)
|
||||
|
||||
# 3. Normative classification
|
||||
if PFLICHT_RE.search(sentence):
|
||||
return ("assertion", "pflicht")
|
||||
if EMPFEHLUNG_RE.search(sentence):
|
||||
return ("assertion", "empfehlung")
|
||||
if KANN_RE.search(sentence):
|
||||
return ("assertion", "kann")
|
||||
|
||||
# 4. Default: unclassified assertion
|
||||
return ("assertion", None)
|
||||
@@ -493,6 +493,9 @@ class GeneratedControl:
|
||||
applicable_industries: Optional[list] = None # e.g. ["all"] or ["Telekommunikation", "Energie"]
|
||||
applicable_company_size: Optional[list] = None # e.g. ["all"] or ["medium", "large", "enterprise"]
|
||||
scope_conditions: Optional[dict] = None # e.g. {"requires_any": ["uses_ai"], "description": "..."}
|
||||
# Anti-Fake-Evidence: truth tracking for generated controls
|
||||
truth_status: str = "generated"
|
||||
may_be_used_as_evidence: bool = False
|
||||
|
||||
|
||||
@dataclass
|
||||
@@ -781,10 +784,23 @@ REFORM_SYSTEM_PROMPT = """Du bist ein Security-Compliance-Experte. Deine Aufgabe
|
||||
Security Controls zu formulieren. Du formulierst IMMER in eigenen Worten.
|
||||
KOPIERE KEINE Sätze aus dem Quelltext. Verwende eigene Begriffe und Struktur.
|
||||
NENNE NICHT die Quelle. Keine proprietären Bezeichner.
|
||||
|
||||
WICHTIG — Truthfulness-Guardrail:
|
||||
Deine Ausgabe ist ein ENTWURF. Formuliere NIEMALS Behauptungen über bereits erfolgte Umsetzung.
|
||||
Verwende NICHT: "ist compliant", "erfüllt vollständig", "wurde geprüft", "wurde umgesetzt",
|
||||
"ist auditiert", "vollständig implementiert", "nachweislich konform".
|
||||
Verwende stattdessen: "soll umsetzen", "ist vorgesehen", "muss implementiert werden".
|
||||
|
||||
Antworte NUR mit validem JSON. Bei mehreren Controls antworte mit einem JSON-Array."""
|
||||
|
||||
STRUCTURE_SYSTEM_PROMPT = """Du bist ein Security-Compliance-Experte. Strukturiere den gegebenen Text
|
||||
als praxisorientiertes Security Control. Erstelle eine verständliche, umsetzbare Formulierung.
|
||||
|
||||
WICHTIG — Truthfulness-Guardrail:
|
||||
Deine Ausgabe ist ein ENTWURF. Formuliere NIEMALS Behauptungen über bereits erfolgte Umsetzung.
|
||||
Verwende NICHT: "ist compliant", "erfüllt vollständig", "wurde geprüft", "wurde umgesetzt".
|
||||
Verwende stattdessen: "soll umsetzen", "ist vorgesehen", "muss implementiert werden".
|
||||
|
||||
Antworte NUR mit validem JSON. Bei mehreren Controls antworte mit einem JSON-Array."""
|
||||
|
||||
# Shared applicability prompt block — appended to all generation prompts (v3)
|
||||
@@ -1877,7 +1893,38 @@ Kategorien: {CATEGORY_LIST_STR}"""
|
||||
)
|
||||
self.db.commit()
|
||||
row = result.fetchone()
|
||||
return str(row[0]) if row else None
|
||||
control_uuid = str(row[0]) if row else None
|
||||
|
||||
# Anti-Fake-Evidence: Record LLM audit trail for generated control
|
||||
if control_uuid:
|
||||
try:
|
||||
self.db.execute(
|
||||
text("""
|
||||
INSERT INTO compliance_llm_generation_audit (
|
||||
entity_type, entity_id, generation_mode,
|
||||
truth_status, may_be_used_as_evidence,
|
||||
llm_model, llm_provider,
|
||||
input_summary, output_summary
|
||||
) VALUES (
|
||||
'control', :entity_id, 'auto_generation',
|
||||
'generated', FALSE,
|
||||
:llm_model, :llm_provider,
|
||||
:input_summary, :output_summary
|
||||
)
|
||||
"""),
|
||||
{
|
||||
"entity_id": control_uuid,
|
||||
"llm_model": ANTHROPIC_MODEL if ANTHROPIC_API_KEY else OLLAMA_MODEL,
|
||||
"llm_provider": "anthropic" if ANTHROPIC_API_KEY else "ollama",
|
||||
"input_summary": f"Control generation for {control.control_id}",
|
||||
"output_summary": control.title[:500] if control.title else None,
|
||||
},
|
||||
)
|
||||
self.db.commit()
|
||||
except Exception as audit_err:
|
||||
logger.warning("Failed to create LLM audit record: %s", audit_err)
|
||||
|
||||
return control_uuid
|
||||
except Exception as e:
|
||||
logger.error("Failed to store control %s: %s", control.control_id, e)
|
||||
self.db.rollback()
|
||||
|
||||
152
backend-compliance/compliance/services/control_status_machine.py
Normal file
152
backend-compliance/compliance/services/control_status_machine.py
Normal file
@@ -0,0 +1,152 @@
|
||||
"""
|
||||
Control Status Transition State Machine.
|
||||
|
||||
Enforces that controls cannot be set to "pass" without sufficient evidence.
|
||||
Prevents Compliance-Theater where controls claim compliance without real proof.
|
||||
|
||||
Transition rules:
|
||||
planned → in_progress : always allowed
|
||||
in_progress → pass : requires ≥1 evidence with confidence ≥ E2 and
|
||||
truth_status in (uploaded, observed, validated_internal)
|
||||
in_progress → partial : requires ≥1 evidence (any level)
|
||||
pass → fail : always allowed (degradation)
|
||||
any → n/a : requires status_justification
|
||||
any → planned : always allowed (reset)
|
||||
"""
|
||||
|
||||
from typing import List, Optional, Tuple
|
||||
|
||||
from ..db.models import EvidenceDB
|
||||
|
||||
|
||||
# Confidence level ordering for comparisons
|
||||
CONFIDENCE_ORDER = {"E0": 0, "E1": 1, "E2": 2, "E3": 3, "E4": 4}
|
||||
|
||||
# Truth statuses that qualify as "real" evidence for pass transitions
|
||||
VALID_TRUTH_STATUSES = {"uploaded", "observed", "validated_internal", "accepted_by_auditor", "provided_to_auditor"}
|
||||
|
||||
|
||||
def validate_transition(
|
||||
current_status: str,
|
||||
new_status: str,
|
||||
evidence_list: Optional[List[EvidenceDB]] = None,
|
||||
status_justification: Optional[str] = None,
|
||||
bypass_for_auto_updater: bool = False,
|
||||
) -> Tuple[bool, List[str]]:
|
||||
"""
|
||||
Validate whether a control status transition is allowed.
|
||||
|
||||
Args:
|
||||
current_status: Current control status value (e.g. "planned", "pass")
|
||||
new_status: Requested new status
|
||||
evidence_list: List of EvidenceDB objects linked to this control
|
||||
status_justification: Text justification (required for n/a transitions)
|
||||
bypass_for_auto_updater: If True, skip evidence checks (used by CI/CD auto-updater
|
||||
which creates evidence atomically with status change)
|
||||
|
||||
Returns:
|
||||
Tuple of (allowed: bool, violations: list[str])
|
||||
"""
|
||||
violations: List[str] = []
|
||||
evidence_list = evidence_list or []
|
||||
|
||||
# Same status → no-op, always allowed
|
||||
if current_status == new_status:
|
||||
return True, []
|
||||
|
||||
# Reset to planned is always allowed
|
||||
if new_status == "planned":
|
||||
return True, []
|
||||
|
||||
# n/a requires justification
|
||||
if new_status == "n/a":
|
||||
if not status_justification or not status_justification.strip():
|
||||
violations.append("Transition to 'n/a' requires a status_justification explaining why this control is not applicable.")
|
||||
return len(violations) == 0, violations
|
||||
|
||||
# Degradation: pass → fail is always allowed
|
||||
if current_status == "pass" and new_status == "fail":
|
||||
return True, []
|
||||
|
||||
# planned → in_progress: always allowed
|
||||
if current_status == "planned" and new_status == "in_progress":
|
||||
return True, []
|
||||
|
||||
# in_progress → partial: needs at least 1 evidence
|
||||
if new_status == "partial":
|
||||
if not bypass_for_auto_updater and len(evidence_list) == 0:
|
||||
violations.append("Transition to 'partial' requires at least 1 evidence record.")
|
||||
return len(violations) == 0, violations
|
||||
|
||||
# in_progress → pass: strict requirements
|
||||
if new_status == "pass":
|
||||
if bypass_for_auto_updater:
|
||||
return True, []
|
||||
|
||||
if len(evidence_list) == 0:
|
||||
violations.append("Transition to 'pass' requires at least 1 evidence record.")
|
||||
return False, violations
|
||||
|
||||
# Check for at least one qualifying evidence
|
||||
has_qualifying = False
|
||||
for e in evidence_list:
|
||||
conf = getattr(e, "confidence_level", None)
|
||||
truth = getattr(e, "truth_status", None)
|
||||
|
||||
# Get string values from enum or string
|
||||
conf_val = conf.value if hasattr(conf, "value") else str(conf) if conf else "E1"
|
||||
truth_val = truth.value if hasattr(truth, "value") else str(truth) if truth else "uploaded"
|
||||
|
||||
if CONFIDENCE_ORDER.get(conf_val, 1) >= CONFIDENCE_ORDER["E2"] and truth_val in VALID_TRUTH_STATUSES:
|
||||
has_qualifying = True
|
||||
break
|
||||
|
||||
if not has_qualifying:
|
||||
violations.append(
|
||||
"Transition to 'pass' requires at least 1 evidence with confidence >= E2 "
|
||||
"and truth_status in (uploaded, observed, validated_internal, accepted_by_auditor). "
|
||||
"Current evidence does not meet this threshold."
|
||||
)
|
||||
|
||||
return len(violations) == 0, violations
|
||||
|
||||
# in_progress → fail: always allowed
|
||||
if new_status == "fail":
|
||||
return True, []
|
||||
|
||||
# Any other transition from planned/fail to pass requires going through in_progress
|
||||
if current_status in ("planned", "fail") and new_status == "pass":
|
||||
if bypass_for_auto_updater:
|
||||
return True, []
|
||||
violations.append(
|
||||
f"Direct transition from '{current_status}' to 'pass' is not allowed. "
|
||||
f"Move to 'in_progress' first, then to 'pass' with qualifying evidence."
|
||||
)
|
||||
return False, violations
|
||||
|
||||
# Default: allow other transitions (e.g. fail → partial, partial → pass)
|
||||
# For partial → pass, apply the same evidence checks
|
||||
if current_status == "partial" and new_status == "pass":
|
||||
if bypass_for_auto_updater:
|
||||
return True, []
|
||||
|
||||
has_qualifying = False
|
||||
for e in evidence_list:
|
||||
conf = getattr(e, "confidence_level", None)
|
||||
truth = getattr(e, "truth_status", None)
|
||||
conf_val = conf.value if hasattr(conf, "value") else str(conf) if conf else "E1"
|
||||
truth_val = truth.value if hasattr(truth, "value") else str(truth) if truth else "uploaded"
|
||||
|
||||
if CONFIDENCE_ORDER.get(conf_val, 1) >= CONFIDENCE_ORDER["E2"] and truth_val in VALID_TRUTH_STATUSES:
|
||||
has_qualifying = True
|
||||
break
|
||||
|
||||
if not has_qualifying:
|
||||
violations.append(
|
||||
"Transition from 'partial' to 'pass' requires at least 1 evidence with confidence >= E2 "
|
||||
"and truth_status in (uploaded, observed, validated_internal, accepted_by_auditor)."
|
||||
)
|
||||
return len(violations) == 0, violations
|
||||
|
||||
# All other transitions allowed
|
||||
return True, []
|
||||
@@ -52,64 +52,18 @@ ANTHROPIC_API_URL = "https://api.anthropic.com/v1"
|
||||
# Tier 2: Empfehlung (recommendation) — weaker normative signals
|
||||
# Tier 3: Kann (optional/permissive) — permissive signals
|
||||
# Nothing is rejected — everything is classified.
|
||||
#
|
||||
# Patterns are defined in normative_patterns.py and imported here
|
||||
# with local aliases for backward compatibility.
|
||||
|
||||
_PFLICHT_SIGNALS = [
|
||||
# Deutsche modale Pflichtformulierungen
|
||||
r"\bmüssen\b", r"\bmuss\b", r"\bhat\s+sicherzustellen\b",
|
||||
r"\bhaben\s+sicherzustellen\b", r"\bsind\s+verpflichtet\b",
|
||||
r"\bist\s+verpflichtet\b",
|
||||
# "ist zu prüfen", "sind zu dokumentieren" (direkt)
|
||||
r"\bist\s+zu\s+\w+en\b", r"\bsind\s+zu\s+\w+en\b",
|
||||
r"\bhat\s+zu\s+\w+en\b", r"\bhaben\s+zu\s+\w+en\b",
|
||||
# "ist festzustellen", "sind vorzunehmen" (Compound-Verben, eingebettetes zu)
|
||||
r"\bist\s+\w+zu\w+en\b", r"\bsind\s+\w+zu\w+en\b",
|
||||
# "ist zusätzlich zu prüfen", "sind regelmäßig zu überwachen" (Adverb dazwischen)
|
||||
r"\bist\s+\w+\s+zu\s+\w+en\b", r"\bsind\s+\w+\s+zu\s+\w+en\b",
|
||||
r"\bhat\s+\w+\s+zu\s+\w+en\b", r"\bhaben\s+\w+\s+zu\s+\w+en\b",
|
||||
# Englische Pflicht-Signale
|
||||
r"\bshall\b", r"\bmust\b", r"\brequired\b",
|
||||
# Compound-Infinitive (Gerundivum): mitzuteilen, anzuwenden, bereitzustellen
|
||||
r"\b\w+zuteilen\b", r"\b\w+zuwenden\b", r"\b\w+zustellen\b", r"\b\w+zulegen\b",
|
||||
r"\b\w+zunehmen\b", r"\b\w+zuführen\b", r"\b\w+zuhalten\b", r"\b\w+zusetzen\b",
|
||||
r"\b\w+zuweisen\b", r"\b\w+zuordnen\b", r"\b\w+zufügen\b", r"\b\w+zugeben\b",
|
||||
# Breites Pattern: "ist ... [bis 80 Zeichen] ... zu + Infinitiv"
|
||||
r"\bist\b.{1,80}\bzu\s+\w+en\b", r"\bsind\b.{1,80}\bzu\s+\w+en\b",
|
||||
]
|
||||
_PFLICHT_RE = re.compile("|".join(_PFLICHT_SIGNALS), re.IGNORECASE)
|
||||
|
||||
_EMPFEHLUNG_SIGNALS = [
|
||||
# Modale Verben (schwaecher als "muss")
|
||||
r"\bsoll\b", r"\bsollen\b", r"\bsollte\b", r"\bsollten\b",
|
||||
r"\bgewährleisten\b", r"\bsicherstellen\b",
|
||||
# Englische Empfehlungs-Signale
|
||||
r"\bshould\b", r"\bensure\b", r"\brecommend\w*\b",
|
||||
# Haeufige normative Infinitive (ohne Hilfsverb, als Empfehlung)
|
||||
r"\bnachweisen\b", r"\beinhalten\b", r"\bunterlassen\b", r"\bwahren\b",
|
||||
r"\bdokumentieren\b", r"\bimplementieren\b", r"\büberprüfen\b", r"\büberwachen\b",
|
||||
# Pruefanweisungen als normative Aussage
|
||||
r"\bprüfen,\s+ob\b", r"\bkontrollieren,\s+ob\b",
|
||||
]
|
||||
_EMPFEHLUNG_RE = re.compile("|".join(_EMPFEHLUNG_SIGNALS), re.IGNORECASE)
|
||||
|
||||
_KANN_SIGNALS = [
|
||||
r"\bkann\b", r"\bkönnen\b", r"\bdarf\b", r"\bdürfen\b",
|
||||
r"\bmay\b", r"\boptional\b",
|
||||
]
|
||||
_KANN_RE = re.compile("|".join(_KANN_SIGNALS), re.IGNORECASE)
|
||||
|
||||
# Union of all normative signals (for backward-compatible has_normative_signal flag)
|
||||
_NORMATIVE_RE = re.compile(
|
||||
"|".join(_PFLICHT_SIGNALS + _EMPFEHLUNG_SIGNALS + _KANN_SIGNALS),
|
||||
re.IGNORECASE,
|
||||
from .normative_patterns import (
|
||||
PFLICHT_RE as _PFLICHT_RE,
|
||||
EMPFEHLUNG_RE as _EMPFEHLUNG_RE,
|
||||
KANN_RE as _KANN_RE,
|
||||
NORMATIVE_RE as _NORMATIVE_RE,
|
||||
RATIONALE_RE as _RATIONALE_RE,
|
||||
)
|
||||
|
||||
_RATIONALE_SIGNALS = [
|
||||
r"\bda\s+", r"\bweil\b", r"\bgrund\b", r"\berwägung",
|
||||
r"\bbecause\b", r"\breason\b", r"\brationale\b",
|
||||
r"\bkönnen\s+.*\s+verursachen\b", r"\bführt\s+zu\b",
|
||||
]
|
||||
_RATIONALE_RE = re.compile("|".join(_RATIONALE_SIGNALS), re.IGNORECASE)
|
||||
|
||||
_TEST_SIGNALS = [
|
||||
r"\btesten\b", r"\btest\b", r"\bprüfung\b", r"\bprüfen\b",
|
||||
r"\bgetestet\b", r"\bwirksamkeit\b", r"\baudit\b",
|
||||
|
||||
59
backend-compliance/compliance/services/normative_patterns.py
Normal file
59
backend-compliance/compliance/services/normative_patterns.py
Normal file
@@ -0,0 +1,59 @@
|
||||
"""Shared normative language patterns for assertion classification.
|
||||
|
||||
Extracted from decomposition_pass.py for reuse in the assertion engine.
|
||||
"""
|
||||
|
||||
import re
|
||||
|
||||
_PFLICHT_SIGNALS = [
|
||||
r"\bmüssen\b", r"\bmuss\b", r"\bhat\s+sicherzustellen\b",
|
||||
r"\bhaben\s+sicherzustellen\b", r"\bsind\s+verpflichtet\b",
|
||||
r"\bist\s+verpflichtet\b",
|
||||
r"\bist\s+zu\s+\w+en\b", r"\bsind\s+zu\s+\w+en\b",
|
||||
r"\bhat\s+zu\s+\w+en\b", r"\bhaben\s+zu\s+\w+en\b",
|
||||
r"\bist\s+\w+zu\w+en\b", r"\bsind\s+\w+zu\w+en\b",
|
||||
r"\bist\s+\w+\s+zu\s+\w+en\b", r"\bsind\s+\w+\s+zu\s+\w+en\b",
|
||||
r"\bhat\s+\w+\s+zu\s+\w+en\b", r"\bhaben\s+\w+\s+zu\s+\w+en\b",
|
||||
r"\bshall\b", r"\bmust\b", r"\brequired\b",
|
||||
r"\b\w+zuteilen\b", r"\b\w+zuwenden\b", r"\b\w+zustellen\b", r"\b\w+zulegen\b",
|
||||
r"\b\w+zunehmen\b", r"\b\w+zuführen\b", r"\b\w+zuhalten\b", r"\b\w+zusetzen\b",
|
||||
r"\b\w+zuweisen\b", r"\b\w+zuordnen\b", r"\b\w+zufügen\b", r"\b\w+zugeben\b",
|
||||
r"\bist\b.{1,80}\bzu\s+\w+en\b", r"\bsind\b.{1,80}\bzu\s+\w+en\b",
|
||||
]
|
||||
PFLICHT_RE = re.compile("|".join(_PFLICHT_SIGNALS), re.IGNORECASE)
|
||||
|
||||
_EMPFEHLUNG_SIGNALS = [
|
||||
r"\bsoll\b", r"\bsollen\b", r"\bsollte\b", r"\bsollten\b",
|
||||
r"\bgewährleisten\b", r"\bsicherstellen\b",
|
||||
r"\bshould\b", r"\bensure\b", r"\brecommend\w*\b",
|
||||
r"\bnachweisen\b", r"\beinhalten\b", r"\bunterlassen\b", r"\bwahren\b",
|
||||
r"\bdokumentieren\b", r"\bimplementieren\b", r"\büberprüfen\b", r"\büberwachen\b",
|
||||
r"\bprüfen,\s+ob\b", r"\bkontrollieren,\s+ob\b",
|
||||
]
|
||||
EMPFEHLUNG_RE = re.compile("|".join(_EMPFEHLUNG_SIGNALS), re.IGNORECASE)
|
||||
|
||||
_KANN_SIGNALS = [
|
||||
r"\bkann\b", r"\bkönnen\b", r"\bdarf\b", r"\bdürfen\b",
|
||||
r"\bmay\b", r"\boptional\b",
|
||||
]
|
||||
KANN_RE = re.compile("|".join(_KANN_SIGNALS), re.IGNORECASE)
|
||||
|
||||
NORMATIVE_RE = re.compile(
|
||||
"|".join(_PFLICHT_SIGNALS + _EMPFEHLUNG_SIGNALS + _KANN_SIGNALS),
|
||||
re.IGNORECASE,
|
||||
)
|
||||
|
||||
_RATIONALE_SIGNALS = [
|
||||
r"\bda\s+", r"\bweil\b", r"\bgrund\b", r"\berwägung",
|
||||
r"\bbecause\b", r"\breason\b", r"\brationale\b",
|
||||
r"\bkönnen\s+.*\s+verursachen\b", r"\bführt\s+zu\b",
|
||||
]
|
||||
RATIONALE_RE = re.compile("|".join(_RATIONALE_SIGNALS), re.IGNORECASE)
|
||||
|
||||
# Evidence-related keywords (for fact detection)
|
||||
_EVIDENCE_KEYWORDS = [
|
||||
r"\bnachweis\b", r"\bzertifikat\b", r"\baudit.report\b",
|
||||
r"\bprotokoll\b", r"\bdokumentation\b", r"\bbericht\b",
|
||||
r"\bcertificate\b", r"\bevidence\b", r"\bproof\b",
|
||||
]
|
||||
EVIDENCE_RE = re.compile("|".join(_EVIDENCE_KEYWORDS), re.IGNORECASE)
|
||||
Reference in New Issue
Block a user