From c1ea9458a7efb006482b4cd7bdd5a8ca5ef484ff Mon Sep 17 00:00:00 2001 From: Benjamin Admin Date: Wed, 24 Jun 2026 20:15:45 +0200 Subject: [PATCH] Add met_count and recall_limited_obligations to shadow telemetry MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reichert die Obligation-Shadow-Telemetrie um zwei Felder an für die Cross-Firmen- Auswertung: met_count (abgedeckte Obligations) + recall_limited_obligations (welche Obligations recall-limitiert sind) — erlaubt die Konzentrations-Analyse über Firmen. 7-Firmen-Shadow: 136 Control-Findings → 29 Obligation-Findings (4,7×); recall_limited nur 6/29, konzentriert auf third_country/safeguards in 2/7 Firmen → LLM-Fix bounded. Co-Authored-By: Claude Opus 4.7 --- .../services/specialist_agents/dse/_obligation_shadow.py | 8 +++++++- backend-compliance/tests/test_obligation_shadow.py | 2 ++ 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/backend-compliance/compliance/services/specialist_agents/dse/_obligation_shadow.py b/backend-compliance/compliance/services/specialist_agents/dse/_obligation_shadow.py index 5f5b1caf..92801e7e 100644 --- a/backend-compliance/compliance/services/specialist_agents/dse/_obligation_shadow.py +++ b/backend-compliance/compliance/services/specialist_agents/dse/_obligation_shadow.py @@ -56,7 +56,7 @@ def compute_obligation_shadow(results: list[dict], text: str, """Reiner Shadow-Vergleich (keine DB, keine Seiteneffekte). `markers`: {control_id: {obl:[...], cond:str|None}}. `met` = Legacy-`passed`.""" from compliance.services.obligation_aggregation import ( - FAILED, LM, NA, PARTIAL, CriterionEval, aggregate_obligations, + FAILED, LM, MET, NA, PARTIAL, CriterionEval, aggregate_obligations, ) from compliance.services.obligation_applicability import applicable from compliance.services.obligation_taxonomy import requires_llm @@ -101,16 +101,22 @@ def compute_obligation_shadow(results: list[dict], text: str, "recall_limited": bool(requires_llm(o.obligation_id) and o.status in (FAILED, PARTIAL))}) top.sort(key=lambda x: -x["fehlt"]) + met_count = sum(1 for o in obls if o.status == MET) + recall_limited_obls = sorted({o.obligation_id for o in obls + if o.status in (FAILED, PARTIAL) + and requires_llm(o.obligation_id)}) return { "legacy_control_findings": legacy, "obligation_shadow_results": len(obls), "obligation_findings": findings, "failed_by_current_checker": failed_current, "recall_limited": recall_limited, + "met_count": met_count, "collapse_factor": round(legacy / findings, 2) if findings else None, "na_count": na, "met_failed_delta": legacy - findings, "top_collapsed_obligations": top[:10], + "recall_limited_obligations": recall_limited_obls, } diff --git a/backend-compliance/tests/test_obligation_shadow.py b/backend-compliance/tests/test_obligation_shadow.py index 0a1bdb9b..608efab7 100644 --- a/backend-compliance/tests/test_obligation_shadow.py +++ b/backend-compliance/tests/test_obligation_shadow.py @@ -21,6 +21,7 @@ class TestComputeShadow: assert s["recall_limited"] == 0 assert s["collapse_factor"] == 5.0 assert s["met_failed_delta"] == 4 + assert s["met_count"] == 0 top = s["top_collapsed_obligations"][0] assert top["obligation"] == NON_LLM and top["fehlt"] == 5 assert top["recall_limited"] is False @@ -60,6 +61,7 @@ class TestRecallSegregation: assert s["obligation_findings"] == 1 assert s["recall_limited"] == 1 assert s["failed_by_current_checker"] == 0 + assert s["recall_limited_obligations"] == [LLM_REQ] assert s["top_collapsed_obligations"][0]["recall_limited"] is True def test_mixed_real_gap_and_recall_limited(self):