feat: Anti-Fake-Evidence System (Phase 1-4b)
Implement full evidence integrity pipeline to prevent compliance theater: - Confidence levels (E0-E4), truth status tracking, assertion engine - Four-Eyes approval workflow, audit trail, reject endpoint - Evidence distribution dashboard, LLM audit routes - Traceability matrix (backend endpoint + Compliance Hub UI tab) - Anti-fake badges, control status machine, normative patterns - 2 migrations, 4 test suites, MkDocs documentation Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
80
backend-compliance/compliance/services/assertion_engine.py
Normal file
80
backend-compliance/compliance/services/assertion_engine.py
Normal file
@@ -0,0 +1,80 @@
|
||||
"""Assertion Engine — splits text into sentences and classifies each.
|
||||
|
||||
Each sentence is tagged as:
|
||||
- assertion: normative statement (pflicht / empfehlung / kann)
|
||||
- fact: references concrete evidence artifacts
|
||||
- rationale: explains why something is required
|
||||
"""
|
||||
|
||||
import re
|
||||
from typing import Optional
|
||||
|
||||
from .normative_patterns import (
|
||||
PFLICHT_RE, EMPFEHLUNG_RE, KANN_RE, RATIONALE_RE, EVIDENCE_RE,
|
||||
)
|
||||
|
||||
# Sentence splitter: period/excl/question followed by space+uppercase, or newlines
|
||||
_SENTENCE_SPLIT = re.compile(r'(?<=[.!?])\s+(?=[A-ZÄÖÜ])|(?:\n\s*\n)')
|
||||
|
||||
|
||||
def extract_assertions(
|
||||
text: str,
|
||||
entity_type: str,
|
||||
entity_id: str,
|
||||
tenant_id: Optional[str] = None,
|
||||
) -> list[dict]:
|
||||
"""Split *text* into sentences and classify each one.
|
||||
|
||||
Returns a list of dicts ready for AssertionDB creation.
|
||||
"""
|
||||
if not text or not text.strip():
|
||||
return []
|
||||
|
||||
sentences = _SENTENCE_SPLIT.split(text.strip())
|
||||
results: list[dict] = []
|
||||
|
||||
for idx, raw in enumerate(sentences):
|
||||
sentence = raw.strip()
|
||||
if not sentence or len(sentence) < 5:
|
||||
continue
|
||||
|
||||
assertion_type, normative_tier = _classify_sentence(sentence)
|
||||
|
||||
results.append({
|
||||
"tenant_id": tenant_id,
|
||||
"entity_type": entity_type,
|
||||
"entity_id": entity_id,
|
||||
"sentence_text": sentence,
|
||||
"sentence_index": idx,
|
||||
"assertion_type": assertion_type,
|
||||
"normative_tier": normative_tier,
|
||||
"evidence_ids": [],
|
||||
"confidence": 0.0,
|
||||
})
|
||||
|
||||
return results
|
||||
|
||||
|
||||
def _classify_sentence(sentence: str) -> tuple[str, Optional[str]]:
|
||||
"""Return (assertion_type, normative_tier) for a single sentence."""
|
||||
|
||||
# 1. Check for evidence/fact keywords first
|
||||
if EVIDENCE_RE.search(sentence):
|
||||
return ("fact", None)
|
||||
|
||||
# 2. Check for rationale
|
||||
normative_count = len(PFLICHT_RE.findall(sentence)) + len(EMPFEHLUNG_RE.findall(sentence)) + len(KANN_RE.findall(sentence))
|
||||
rationale_count = len(RATIONALE_RE.findall(sentence))
|
||||
if rationale_count > 0 and rationale_count >= normative_count:
|
||||
return ("rationale", None)
|
||||
|
||||
# 3. Normative classification
|
||||
if PFLICHT_RE.search(sentence):
|
||||
return ("assertion", "pflicht")
|
||||
if EMPFEHLUNG_RE.search(sentence):
|
||||
return ("assertion", "empfehlung")
|
||||
if KANN_RE.search(sentence):
|
||||
return ("assertion", "kann")
|
||||
|
||||
# 4. Default: unclassified assertion
|
||||
return ("assertion", None)
|
||||
Reference in New Issue
Block a user