feat(qa): recital detection, review split, duplicate comparison
Some checks failed
CI/CD / go-lint (push) Has been skipped
CI/CD / python-lint (push) Has been skipped
CI/CD / nodejs-lint (push) Has been skipped
CI/CD / test-go-ai-compliance (push) Failing after 42s
CI/CD / test-python-backend-compliance (push) Successful in 34s
CI/CD / test-python-document-crawler (push) Successful in 21s
CI/CD / test-python-dsms-gateway (push) Successful in 20s
CI/CD / validate-canonical-controls (push) Successful in 12s
CI/CD / Deploy (push) Has been skipped
Some checks failed
CI/CD / go-lint (push) Has been skipped
CI/CD / python-lint (push) Has been skipped
CI/CD / nodejs-lint (push) Has been skipped
CI/CD / test-go-ai-compliance (push) Failing after 42s
CI/CD / test-python-backend-compliance (push) Successful in 34s
CI/CD / test-python-document-crawler (push) Successful in 21s
CI/CD / test-python-dsms-gateway (push) Successful in 20s
CI/CD / validate-canonical-controls (push) Successful in 12s
CI/CD / Deploy (push) Has been skipped
Add _detect_recital() to QA pipeline — flags controls where source_original_text contains Erwägungsgrund markers instead of article text (28% of controls with source text affected). - Recital detection via regex + phrase matching in QA validation - 10 new tests (TestRecitalDetection), 81 total - ReviewCompare component for side-by-side duplicate comparison - Review mode split: Duplikat-Verdacht vs Rule-3-ohne-Anchor tabs - MkDocs: recital detection documentation - Detection script for bulk analysis (scripts/find_recital_controls.py) Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -321,6 +321,62 @@ VALID_CATEGORIES = set(CATEGORY_KEYWORDS.keys())
|
||||
VALID_DOMAINS = {"AUTH", "CRYP", "NET", "DATA", "LOG", "ACC", "SEC", "INC",
|
||||
"AI", "COMP", "GOV", "LAB", "FIN", "TRD", "ENV", "HLT"}
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Recital (Erwägungsgrund) detection in source text
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
# Pattern: standalone recital number like (125)\n or (126) at line start
|
||||
_RECITAL_RE = re.compile(r'\((\d{1,3})\)\s*\n')
|
||||
|
||||
# Recital-typical phrasing (German EU law Erwägungsgründe)
|
||||
_RECITAL_PHRASES = [
|
||||
"in erwägung nachstehender gründe",
|
||||
"erwägungsgrund",
|
||||
"in anbetracht",
|
||||
"daher sollte",
|
||||
"aus diesem grund",
|
||||
"es ist daher",
|
||||
"folglich sollte",
|
||||
"es sollte daher",
|
||||
"in diesem zusammenhang",
|
||||
]
|
||||
|
||||
|
||||
def _detect_recital(text: str) -> Optional[dict]:
|
||||
"""Detect if source text is a recital (Erwägungsgrund) rather than an article.
|
||||
|
||||
Returns a dict with detection details if recital markers are found,
|
||||
or None if the text appears to be genuine article text.
|
||||
|
||||
Detection criteria:
|
||||
1. Standalone recital numbers like (126)\\n in the text
|
||||
2. Recital-typical phrasing ("daher sollte", "erwägungsgrund", etc.)
|
||||
"""
|
||||
if not text:
|
||||
return None
|
||||
|
||||
# Check 1: Recital number markers
|
||||
recital_matches = _RECITAL_RE.findall(text)
|
||||
|
||||
# Check 2: Recital phrasing
|
||||
text_lower = text.lower()
|
||||
phrase_hits = [p for p in _RECITAL_PHRASES if p in text_lower]
|
||||
|
||||
if not recital_matches and not phrase_hits:
|
||||
return None
|
||||
|
||||
# Require at least recital numbers OR >=2 phrase hits to be a suspect
|
||||
if not recital_matches and len(phrase_hits) < 2:
|
||||
return None
|
||||
|
||||
return {
|
||||
"recital_suspect": True,
|
||||
"recital_numbers": recital_matches[:10],
|
||||
"recital_phrases": phrase_hits[:5],
|
||||
"detection_method": "regex+phrases" if recital_matches and phrase_hits
|
||||
else "regex" if recital_matches else "phrases",
|
||||
}
|
||||
|
||||
CATEGORY_LIST_STR = ", ".join(sorted(VALID_CATEGORIES))
|
||||
|
||||
VERIFICATION_KEYWORDS = {
|
||||
@@ -1520,9 +1576,23 @@ Gib ein JSON-Array zurueck mit GENAU {len(chunks)} Elementen. Fuer Aspekte ohne
|
||||
) -> tuple[GeneratedControl, bool]:
|
||||
"""Cross-validate category/domain using keyword detection + local LLM.
|
||||
|
||||
Also checks for recital (Erwägungsgrund) contamination in source text.
|
||||
Returns (control, was_fixed). Only triggers Ollama QA when the LLM
|
||||
classification disagrees with keyword detection — keeps it fast.
|
||||
"""
|
||||
# ── Recital detection ──────────────────────────────────────────
|
||||
source_text = control.source_original_text or ""
|
||||
recital_info = _detect_recital(source_text)
|
||||
if recital_info:
|
||||
control.generation_metadata["recital_suspect"] = True
|
||||
control.generation_metadata["recital_detection"] = recital_info
|
||||
control.release_state = "needs_review"
|
||||
logger.warning(
|
||||
"Recital suspect: '%s' — recitals %s detected in source text",
|
||||
control.title[:40],
|
||||
recital_info.get("recital_numbers", []),
|
||||
)
|
||||
|
||||
kw_category = _detect_category(chunk_text) or _detect_category(control.objective)
|
||||
kw_domain = _detect_domain(chunk_text)
|
||||
llm_domain = control.generation_metadata.get("_effective_domain", "")
|
||||
|
||||
@@ -7,6 +7,7 @@ from unittest.mock import AsyncMock, MagicMock, patch
|
||||
from compliance.services.control_generator import (
|
||||
_classify_regulation,
|
||||
_detect_domain,
|
||||
_detect_recital,
|
||||
_parse_llm_json,
|
||||
_parse_llm_json_array,
|
||||
GeneratorConfig,
|
||||
@@ -1306,3 +1307,92 @@ class TestPipelineVersion:
|
||||
assert controls[0] is not None
|
||||
assert controls[1] is None # Null entry from LLM
|
||||
assert controls[2] is not None
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# Recital (Erwägungsgrund) Detection Tests
|
||||
# =============================================================================
|
||||
|
||||
class TestRecitalDetection:
|
||||
"""Tests for _detect_recital — identifying Erwägungsgrund text in source."""
|
||||
|
||||
def test_recital_number_detected(self):
|
||||
"""Text with (126)\\n pattern is flagged as recital suspect."""
|
||||
text = "Daher ist es wichtig...\n(126)\nDie Konformitätsbewertung sollte..."
|
||||
result = _detect_recital(text)
|
||||
assert result is not None
|
||||
assert result["recital_suspect"] is True
|
||||
assert "126" in result["recital_numbers"]
|
||||
|
||||
def test_multiple_recital_numbers(self):
|
||||
"""Multiple recital markers are all captured."""
|
||||
text = "(124)\nErster Punkt.\n(125)\nZweiter Punkt.\n(126)\nDritter Punkt."
|
||||
result = _detect_recital(text)
|
||||
assert result is not None
|
||||
assert "124" in result["recital_numbers"]
|
||||
assert "125" in result["recital_numbers"]
|
||||
assert "126" in result["recital_numbers"]
|
||||
|
||||
def test_article_text_not_flagged(self):
|
||||
"""Normal article text without recital markers returns None."""
|
||||
text = ("Der Anbieter eines Hochrisiko-KI-Systems muss sicherstellen, "
|
||||
"dass die technische Dokumentation erstellt wird.")
|
||||
result = _detect_recital(text)
|
||||
assert result is None
|
||||
|
||||
def test_empty_text_returns_none(self):
|
||||
result = _detect_recital("")
|
||||
assert result is None
|
||||
|
||||
def test_none_text_returns_none(self):
|
||||
result = _detect_recital(None)
|
||||
assert result is None
|
||||
|
||||
def test_recital_phrases_detected(self):
|
||||
"""Text with multiple recital-typical phrases is flagged."""
|
||||
text = ("In Erwägung nachstehender Gründe wurde beschlossen, "
|
||||
"daher sollte der Anbieter folgende Maßnahmen ergreifen. "
|
||||
"Es ist daher notwendig, die Konformität sicherzustellen.")
|
||||
result = _detect_recital(text)
|
||||
assert result is not None
|
||||
assert result["detection_method"] == "phrases"
|
||||
|
||||
def test_single_phrase_not_enough(self):
|
||||
"""A single recital phrase alone is not sufficient for detection."""
|
||||
text = "Daher sollte das System regelmäßig geprüft werden."
|
||||
result = _detect_recital(text)
|
||||
assert result is None
|
||||
|
||||
def test_combined_regex_and_phrases(self):
|
||||
"""Both recital numbers and phrases → detection_method is regex+phrases."""
|
||||
text = "(42)\nIn Erwägung nachstehender Gründe wurde entschieden..."
|
||||
result = _detect_recital(text)
|
||||
assert result is not None
|
||||
assert result["detection_method"] == "regex+phrases"
|
||||
assert "42" in result["recital_numbers"]
|
||||
|
||||
def test_parenthesized_number_without_newline_ignored(self):
|
||||
"""Numbers in parentheses without trailing newline are not recital markers.
|
||||
e.g. 'gemäß Absatz (3) des Artikels' should not be flagged."""
|
||||
text = "Gemäß Absatz (3) des Artikels 52 muss der Anbieter sicherstellen..."
|
||||
result = _detect_recital(text)
|
||||
assert result is None
|
||||
|
||||
def test_real_world_recital_text(self):
|
||||
"""Real-world example: AI Act Erwägungsgrund (126) about conformity assessment."""
|
||||
text = (
|
||||
"(126)\n"
|
||||
"Um den Verwaltungsaufwand zu verringern und die Konformitätsbewertung "
|
||||
"zu vereinfachen, sollten bestimmte Hochrisiko-KI-Systeme, die von "
|
||||
"Anbietern zertifiziert oder für die eine Konformitätserklärung "
|
||||
"ausgestellt wurde, automatisch als konform mit den Anforderungen "
|
||||
"dieser Verordnung gelten, sofern sie den harmonisierten Normen oder "
|
||||
"gemeinsamen Spezifikationen entsprechen.\n"
|
||||
"(127)\n"
|
||||
"Es ist daher angezeigt, dass der Anbieter das entsprechende "
|
||||
"Konformitätsbewertungsverfahren anwendet."
|
||||
)
|
||||
result = _detect_recital(text)
|
||||
assert result is not None
|
||||
assert "126" in result["recital_numbers"]
|
||||
assert "127" in result["recital_numbers"]
|
||||
|
||||
Reference in New Issue
Block a user