Files
Benjamin Admin e6201d5239 feat: Anti-Fake-Evidence System (Phase 1-4b)
Implement full evidence integrity pipeline to prevent compliance theater:
- Confidence levels (E0-E4), truth status tracking, assertion engine
- Four-Eyes approval workflow, audit trail, reject endpoint
- Evidence distribution dashboard, LLM audit routes
- Traceability matrix (backend endpoint + Compliance Hub UI tab)
- Anti-fake badges, control status machine, normative patterns
- 2 migrations, 4 test suites, MkDocs documentation

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-23 17:15:45 +01:00

81 lines
2.4 KiB
Python

"""Assertion Engine — splits text into sentences and classifies each.
Each sentence is tagged as:
- assertion: normative statement (pflicht / empfehlung / kann)
- fact: references concrete evidence artifacts
- rationale: explains why something is required
"""
import re
from typing import Optional
from .normative_patterns import (
PFLICHT_RE, EMPFEHLUNG_RE, KANN_RE, RATIONALE_RE, EVIDENCE_RE,
)
# Sentence splitter: period/excl/question followed by space+uppercase, or newlines
_SENTENCE_SPLIT = re.compile(r'(?<=[.!?])\s+(?=[A-ZÄÖÜ])|(?:\n\s*\n)')
def extract_assertions(
text: str,
entity_type: str,
entity_id: str,
tenant_id: Optional[str] = None,
) -> list[dict]:
"""Split *text* into sentences and classify each one.
Returns a list of dicts ready for AssertionDB creation.
"""
if not text or not text.strip():
return []
sentences = _SENTENCE_SPLIT.split(text.strip())
results: list[dict] = []
for idx, raw in enumerate(sentences):
sentence = raw.strip()
if not sentence or len(sentence) < 5:
continue
assertion_type, normative_tier = _classify_sentence(sentence)
results.append({
"tenant_id": tenant_id,
"entity_type": entity_type,
"entity_id": entity_id,
"sentence_text": sentence,
"sentence_index": idx,
"assertion_type": assertion_type,
"normative_tier": normative_tier,
"evidence_ids": [],
"confidence": 0.0,
})
return results
def _classify_sentence(sentence: str) -> tuple[str, Optional[str]]:
"""Return (assertion_type, normative_tier) for a single sentence."""
# 1. Check for evidence/fact keywords first
if EVIDENCE_RE.search(sentence):
return ("fact", None)
# 2. Check for rationale
normative_count = len(PFLICHT_RE.findall(sentence)) + len(EMPFEHLUNG_RE.findall(sentence)) + len(KANN_RE.findall(sentence))
rationale_count = len(RATIONALE_RE.findall(sentence))
if rationale_count > 0 and rationale_count >= normative_count:
return ("rationale", None)
# 3. Normative classification
if PFLICHT_RE.search(sentence):
return ("assertion", "pflicht")
if EMPFEHLUNG_RE.search(sentence):
return ("assertion", "empfehlung")
if KANN_RE.search(sentence):
return ("assertion", "kann")
# 4. Default: unclassified assertion
return ("assertion", None)