fix: Filter controls by test_procedure content — eliminates governance false positives
Only use controls whose test_procedure mentions document-type-specific terms: - DSI: test_procedure must contain 'datenschutzerkl' or 'art. 13/14' - Cookie: must contain 'cookie', 'einwilligung', 'consent' - Impressum: must contain 'impressum' This filters out internal governance controls (Datenmodelle, Infrastruktur) that are irrelevant for public document checks. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -30,24 +30,28 @@ OLLAMA_MODEL = os.getenv("OLLAMA_MODEL", "qwen3.5:35b-a3b")
|
||||
DOC_TYPE_FILTERS = {
|
||||
"dse": {
|
||||
"category": "data_protection",
|
||||
"keywords": ["informationspflicht", "datenschutzerkl", "art. 13", "art. 14",
|
||||
"betroffenenrecht", "verantwortlich", "datenschutzbeauftrag"],
|
||||
"keywords": ["informationspflicht"],
|
||||
"test_proc_must_contain": ["datenschutzerkl", "informationspflicht", "art. 13", "art. 14"],
|
||||
},
|
||||
"cookie": {
|
||||
"category": "data_protection",
|
||||
"keywords": ["cookie", "einwilligung", "tracking", "consent"],
|
||||
"keywords": ["cookie", "einwilligung"],
|
||||
"test_proc_must_contain": ["cookie", "einwilligung", "consent", "banner"],
|
||||
},
|
||||
"impressum": {
|
||||
"category": "compliance",
|
||||
"keywords": ["impressum", "anbieterkennzeichnung", "telemedien", "tmg"],
|
||||
"keywords": ["impressum", "anbieterkennzeichnung"],
|
||||
"test_proc_must_contain": ["impressum", "anbieterkennzeichnung"],
|
||||
},
|
||||
"widerruf": {
|
||||
"category": "compliance",
|
||||
"keywords": ["widerruf", "verbraucher", "fernabsatz"],
|
||||
"keywords": ["widerruf", "verbraucher"],
|
||||
"test_proc_must_contain": ["widerruf", "fernabsatz"],
|
||||
},
|
||||
"agb": {
|
||||
"category": "compliance",
|
||||
"keywords": ["geschäftsbedingung", "agb", "vertragsklausel"],
|
||||
"keywords": ["geschäftsbedingung", "agb"],
|
||||
"test_proc_must_contain": ["geschäftsbedingung", "agb", "vertragsbedingung"],
|
||||
},
|
||||
}
|
||||
|
||||
@@ -68,7 +72,8 @@ async def check_document_with_controls(
|
||||
keywords = filters.get("keywords", [])
|
||||
|
||||
# Query relevant controls from DB
|
||||
controls = _query_controls(db_session, category, keywords, max_controls)
|
||||
test_proc_kw = filters.get("test_proc_must_contain")
|
||||
controls = _query_controls(db_session, category, keywords, max_controls, test_proc_kw)
|
||||
if not controls:
|
||||
logger.info("No canonical controls found for '%s' (%s)", doc_title, doc_type)
|
||||
return []
|
||||
@@ -85,13 +90,23 @@ async def check_document_with_controls(
|
||||
return results
|
||||
|
||||
|
||||
def _query_controls(db_session, category: str, keywords: list[str], limit: int) -> list[dict]:
|
||||
"""Query canonical_controls by category + title keywords."""
|
||||
def _query_controls(db_session, category: str, keywords: list[str], limit: int,
|
||||
test_proc_keywords: list[str] | None = None) -> list[dict]:
|
||||
"""Query canonical_controls by category + title + test_procedure keywords."""
|
||||
from sqlalchemy import text
|
||||
|
||||
# Build keyword filter
|
||||
# Build keyword filter for title
|
||||
keyword_clauses = " OR ".join([f"title ILIKE :kw{i}" for i in range(len(keywords))])
|
||||
params = {f"kw{i}": f"%{kw}%" for i, kw in enumerate(keywords)}
|
||||
|
||||
# Build test_procedure filter (ensures controls are relevant to document type)
|
||||
proc_filter = ""
|
||||
if test_proc_keywords:
|
||||
proc_clauses = " OR ".join([f"test_procedure::text ILIKE :tp{i}" for i in range(len(test_proc_keywords))])
|
||||
for i, tp in enumerate(test_proc_keywords):
|
||||
params[f"tp{i}"] = f"%{tp}%"
|
||||
proc_filter = f"AND ({proc_clauses})"
|
||||
|
||||
params["cat"] = category
|
||||
params["limit"] = limit
|
||||
|
||||
@@ -101,6 +116,8 @@ def _query_controls(db_session, category: str, keywords: list[str], limit: int)
|
||||
WHERE category = :cat
|
||||
AND release_state != 'deleted'
|
||||
AND ({keyword_clauses})
|
||||
{proc_filter}
|
||||
AND test_procedure::text != '[]'
|
||||
ORDER BY risk_score DESC NULLS LAST
|
||||
LIMIT :limit
|
||||
""")
|
||||
|
||||
Reference in New Issue
Block a user