feat: Anti-Fake-Evidence System (Phase 1-4b)

Implement full evidence integrity pipeline to prevent compliance theater: - Confidence levels (E0-E4), truth status tracking, assertion engine - Four-Eyes approval workflow, audit trail, reject endpoint - Evidence distribution dashboard, LLM audit routes - Traceability matrix (backend endpoint + Compliance Hub UI tab) - Anti-fake badges, control status machine, normative patterns - 2 migrations, 4 test suites, MkDocs documentation Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-23 17:15:45 +01:00
parent 48ca0a6bef
commit e6201d5239
36 changed files with 5627 additions and 189 deletions
@@ -0,0 +1,80 @@
+"""Assertion Engine — splits text into sentences and classifies each.
+
+Each sentence is tagged as:
+- assertion: normative statement (pflicht / empfehlung / kann)
+- fact: references concrete evidence artifacts
+- rationale: explains why something is required
+"""
+
+import re
+from typing import Optional
+
+from .normative_patterns import (
+    PFLICHT_RE, EMPFEHLUNG_RE, KANN_RE, RATIONALE_RE, EVIDENCE_RE,
+)
+
+# Sentence splitter: period/excl/question followed by space+uppercase, or newlines
+_SENTENCE_SPLIT = re.compile(r'(?<=[.!?])\s+(?=[A-ZÄÖÜ])|(?:\n\s*\n)')
+
+
+def extract_assertions(
+    text: str,
+    entity_type: str,
+    entity_id: str,
+    tenant_id: Optional[str] = None,
+) -> list[dict]:
+    """Split *text* into sentences and classify each one.
+
+    Returns a list of dicts ready for AssertionDB creation.
+    """
+    if not text or not text.strip():
+        return []
+
+    sentences = _SENTENCE_SPLIT.split(text.strip())
+    results: list[dict] = []
+
+    for idx, raw in enumerate(sentences):
+        sentence = raw.strip()
+        if not sentence or len(sentence) < 5:
+            continue
+
+        assertion_type, normative_tier = _classify_sentence(sentence)
+
+        results.append({
+            "tenant_id": tenant_id,
+            "entity_type": entity_type,
+            "entity_id": entity_id,
+            "sentence_text": sentence,
+            "sentence_index": idx,
+            "assertion_type": assertion_type,
+            "normative_tier": normative_tier,
+            "evidence_ids": [],
+            "confidence": 0.0,
+        })
+
+    return results
+
+
+def _classify_sentence(sentence: str) -> tuple[str, Optional[str]]:
+    """Return (assertion_type, normative_tier) for a single sentence."""
+
+    # 1. Check for evidence/fact keywords first
+    if EVIDENCE_RE.search(sentence):
+        return ("fact", None)
+
+    # 2. Check for rationale
+    normative_count = len(PFLICHT_RE.findall(sentence)) + len(EMPFEHLUNG_RE.findall(sentence)) + len(KANN_RE.findall(sentence))
+    rationale_count = len(RATIONALE_RE.findall(sentence))
+    if rationale_count > 0 and rationale_count >= normative_count:
+        return ("rationale", None)
+
+    # 3. Normative classification
+    if PFLICHT_RE.search(sentence):
+        return ("assertion", "pflicht")
+    if EMPFEHLUNG_RE.search(sentence):
+        return ("assertion", "empfehlung")
+    if KANN_RE.search(sentence):
+        return ("assertion", "kann")
+
+    # 4. Default: unclassified assertion
+    return ("assertion", None)