diff --git a/control-pipeline/api/control_generator_routes.py b/control-pipeline/api/control_generator_routes.py index 1012d85..0fa23a8 100644 --- a/control-pipeline/api/control_generator_routes.py +++ b/control-pipeline/api/control_generator_routes.py @@ -2392,9 +2392,13 @@ async def get_quality_metrics( duplicate_rate = round(dup_rows / max(total_with_mk, 1) * 100, 1) - # Evidence leak rate: controls that look like evidence - evidence_keywords = ("nachweis", "screenshot", "export", "zertifizierung", - "auditbericht", "prüfbericht", "protokoll") + # Evidence leak rate: controls whose TITLE starts with evidence-type nouns + # "nachweisen" (verb) is a valid action; "Nachweis vorliegen" is evidence + evidence_start_patterns = ( + "nachweis vorli", "nachweis muss vorl", "screenshot", + "export der", "auditbericht", "prüfbericht", + "log-auszug", "jira-ticket", "sbom-nachweis", + ) evidence_count = 0 rows = db.execute(text(f""" SELECT title FROM compliance.canonical_controls cc @@ -2404,17 +2408,23 @@ async def get_quality_metrics( """)).fetchall() for row in rows: title_lower = (row[0] or "").lower() - if any(kw in title_lower for kw in evidence_keywords): + if any(title_lower.startswith(p) for p in evidence_start_patterns): evidence_count += 1 evidence_leak_rate = round(evidence_count / max(total, 1) * 100, 1) - # Truncated title rate: titles ending mid-word (heuristic) + # Truncated title rate: titles cut off mid-word + # True truncation: ends with incomplete word fragments (no space before last char) + # NOT truncation: German titles ending with complete verbs (infinitives etc.) truncated_count = 0 for row in rows: title = (row[0] or "").strip() - if title and len(title) >= 75: - # Likely truncated if it's close to max and doesn't end with a word boundary - if not title[-1] in ".!?)\"'": + if title and len(title) >= 195: + # Near the 200-char limit — likely truncated + truncated_count += 1 + elif title and len(title) >= 75: + # Check for mid-word cutoff: last "word" has <3 chars and title is long + last_word = title.split()[-1] if title.split() else "" + if len(last_word) <= 2 and not last_word.endswith((".", "!", "?")): truncated_count += 1 truncated_title_rate = round(truncated_count / max(total, 1) * 100, 1)