feat(pipeline): implement golden test suite + fix ontology patterns
- Add test_golden_controls.py: 37 tests covering all 8 YAML categories (container, framework, evidence, negative, title, split, scope, merge_key) - Fix evidence detection: handle German feminine articles (eine/einer/etc.) - Fix framework detection: use verb stems for conjugated German verbs - Add framework patterns: OWASP API6, CCM without CSA prefix, generic category - Fix negative patterns: use "nicht übertragen/gespeichert/erscheinen" before generic "dürfen nicht" to correctly route prevent vs exclude All 73 tests passing (36 ontology + 37 golden). Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -139,10 +139,14 @@ for action_type, info in ACTION_TYPES.items():
|
||||
|
||||
_NEGATIVE_PATTERNS: list[tuple[str, str]] = [
|
||||
# Longer/specific patterns first (checked in order)
|
||||
("darf nicht wiederverwendet", "prevent"),
|
||||
("nicht in der URL", "prevent"),
|
||||
("nicht im Token", "prevent"),
|
||||
("nicht in Logs", "prevent"),
|
||||
("nicht wiederverwendet", "prevent"),
|
||||
("nicht in der url", "prevent"),
|
||||
("nicht im token", "prevent"),
|
||||
("nicht in logs", "prevent"),
|
||||
("nicht in urls", "prevent"),
|
||||
("nicht gespeichert", "prevent"),
|
||||
("nicht übertragen", "prevent"),
|
||||
("nicht erscheinen", "prevent"),
|
||||
("verhindern", "prevent"),
|
||||
("unterbinden", "prevent"),
|
||||
("abweisen", "enforce"),
|
||||
@@ -199,15 +203,17 @@ EVIDENCE_INDICATORS: set[str] = {
|
||||
|
||||
_FRAMEWORK_PATTERNS: list[str] = [
|
||||
r"OWASP\s+ASVS\s+V\d",
|
||||
r"OWASP\s+API\d+",
|
||||
r"OWASP\s+API\s+Top\s+10",
|
||||
r"NIST\s+SP\s+800-\d+",
|
||||
r"NIST\s+IA-\d+",
|
||||
r"NIST\s+AC-\d+",
|
||||
r"NIST\s+IA[\s-]",
|
||||
r"NIST\s+AC[\s-]",
|
||||
r"BSI\s+IT-Grundschutz",
|
||||
r"BSI\s+200-\d",
|
||||
r"CSA\s+CCM",
|
||||
r"(?:CSA\s+)?CCM[\s-]",
|
||||
r"ISO\s+27001",
|
||||
r"ISO\s+27002",
|
||||
r"alle\s+Controls\s+der\s+Kategorie",
|
||||
]
|
||||
|
||||
|
||||
@@ -258,8 +264,12 @@ def is_evidence(text: str) -> bool:
|
||||
|
||||
# Primary check: evidence indicators at the start
|
||||
for indicator in EVIDENCE_INDICATORS:
|
||||
if text_lower.startswith(indicator) or f"ein {indicator}" in text_lower:
|
||||
if text_lower.startswith(indicator):
|
||||
return True
|
||||
# German articles: ein/eine/einen/einem/einer + indicator
|
||||
for article in ("ein ", "eine ", "einen ", "einem ", "einer "):
|
||||
if f"{article}{indicator}" in text_lower:
|
||||
return True
|
||||
|
||||
# Secondary: "X dokumentieren" where X is another action's result
|
||||
if text_lower.endswith("dokumentieren") or text_lower.endswith("dokumentiert"):
|
||||
@@ -276,9 +286,10 @@ def is_framework_reference(text: str) -> bool:
|
||||
for pattern in _FRAMEWORK_PATTERNS:
|
||||
if re.search(pattern, text, re.IGNORECASE):
|
||||
# Only if the text is a generic "implement X framework" statement
|
||||
implement_words = {"umsetzen", "implementieren", "einhalten", "erfüllen", "anwenden"}
|
||||
# Use stems to handle German conjugation (umsetzen/umzusetzen/umgesetzt)
|
||||
implement_stems = ("umsetz", "umzusetz", "implementier", "einhalt", "erfüll", "anwend")
|
||||
text_lower = text.lower()
|
||||
if any(w in text_lower for w in implement_words):
|
||||
if any(s in text_lower for s in implement_stems):
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
Reference in New Issue
Block a user