Implement full evidence integrity pipeline to prevent compliance theater: - Confidence levels (E0-E4), truth status tracking, assertion engine - Four-Eyes approval workflow, audit trail, reject endpoint - Evidence distribution dashboard, LLM audit routes - Traceability matrix (backend endpoint + Compliance Hub UI tab) - Anti-fake badges, control status machine, normative patterns - 2 migrations, 4 test suites, MkDocs documentation Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
81 lines
2.4 KiB
Python
81 lines
2.4 KiB
Python
"""Assertion Engine — splits text into sentences and classifies each.
|
|
|
|
Each sentence is tagged as:
|
|
- assertion: normative statement (pflicht / empfehlung / kann)
|
|
- fact: references concrete evidence artifacts
|
|
- rationale: explains why something is required
|
|
"""
|
|
|
|
import re
|
|
from typing import Optional
|
|
|
|
from .normative_patterns import (
|
|
PFLICHT_RE, EMPFEHLUNG_RE, KANN_RE, RATIONALE_RE, EVIDENCE_RE,
|
|
)
|
|
|
|
# Sentence splitter: period/excl/question followed by space+uppercase, or newlines
|
|
_SENTENCE_SPLIT = re.compile(r'(?<=[.!?])\s+(?=[A-ZÄÖÜ])|(?:\n\s*\n)')
|
|
|
|
|
|
def extract_assertions(
|
|
text: str,
|
|
entity_type: str,
|
|
entity_id: str,
|
|
tenant_id: Optional[str] = None,
|
|
) -> list[dict]:
|
|
"""Split *text* into sentences and classify each one.
|
|
|
|
Returns a list of dicts ready for AssertionDB creation.
|
|
"""
|
|
if not text or not text.strip():
|
|
return []
|
|
|
|
sentences = _SENTENCE_SPLIT.split(text.strip())
|
|
results: list[dict] = []
|
|
|
|
for idx, raw in enumerate(sentences):
|
|
sentence = raw.strip()
|
|
if not sentence or len(sentence) < 5:
|
|
continue
|
|
|
|
assertion_type, normative_tier = _classify_sentence(sentence)
|
|
|
|
results.append({
|
|
"tenant_id": tenant_id,
|
|
"entity_type": entity_type,
|
|
"entity_id": entity_id,
|
|
"sentence_text": sentence,
|
|
"sentence_index": idx,
|
|
"assertion_type": assertion_type,
|
|
"normative_tier": normative_tier,
|
|
"evidence_ids": [],
|
|
"confidence": 0.0,
|
|
})
|
|
|
|
return results
|
|
|
|
|
|
def _classify_sentence(sentence: str) -> tuple[str, Optional[str]]:
|
|
"""Return (assertion_type, normative_tier) for a single sentence."""
|
|
|
|
# 1. Check for evidence/fact keywords first
|
|
if EVIDENCE_RE.search(sentence):
|
|
return ("fact", None)
|
|
|
|
# 2. Check for rationale
|
|
normative_count = len(PFLICHT_RE.findall(sentence)) + len(EMPFEHLUNG_RE.findall(sentence)) + len(KANN_RE.findall(sentence))
|
|
rationale_count = len(RATIONALE_RE.findall(sentence))
|
|
if rationale_count > 0 and rationale_count >= normative_count:
|
|
return ("rationale", None)
|
|
|
|
# 3. Normative classification
|
|
if PFLICHT_RE.search(sentence):
|
|
return ("assertion", "pflicht")
|
|
if EMPFEHLUNG_RE.search(sentence):
|
|
return ("assertion", "empfehlung")
|
|
if KANN_RE.search(sentence):
|
|
return ("assertion", "kann")
|
|
|
|
# 4. Default: unclassified assertion
|
|
return ("assertion", None)
|