Add met_count and recall_limited_obligations to shadow telemetry

Reichert die Obligation-Shadow-Telemetrie um zwei Felder an für die Cross-Firmen- Auswertung: met_count (abgedeckte Obligations) + recall_limited_obligations (welche Obligations recall-limitiert sind) — erlaubt die Konzentrations-Analyse über Firmen. 7-Firmen-Shadow: 136 Control-Findings → 29 Obligation-Findings (4,7×); recall_limited nur 6/29, konzentriert auf third_country/safeguards in 2/7 Firmen → LLM-Fix bounded. Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
2026-06-24 20:15:45 +02:00
parent 0631a98bdd
commit c1ea9458a7
2 changed files with 9 additions and 1 deletions
@@ -56,7 +56,7 @@ def compute_obligation_shadow(results: list[dict], text: str,
    """Reiner Shadow-Vergleich (keine DB, keine Seiteneffekte). `markers`:
    {control_id: {obl:[...], cond:str|None}}. `met` = Legacy-`passed`."""
    from compliance.services.obligation_aggregation import (
-        FAILED, LM, NA, PARTIAL, CriterionEval, aggregate_obligations,
+        FAILED, LM, MET, NA, PARTIAL, CriterionEval, aggregate_obligations,
    )
    from compliance.services.obligation_applicability import applicable
    from compliance.services.obligation_taxonomy import requires_llm
@@ -101,16 +101,22 @@ def compute_obligation_shadow(results: list[dict], text: str,
                        "recall_limited": bool(requires_llm(o.obligation_id)
                                               and o.status in (FAILED, PARTIAL))})
    top.sort(key=lambda x: -x["fehlt"])
    met_count = sum(1 for o in obls if o.status == MET)
    recall_limited_obls = sorted({o.obligation_id for o in obls
                                  if o.status in (FAILED, PARTIAL)
                                  and requires_llm(o.obligation_id)})
    return {
        "legacy_control_findings": legacy,
        "obligation_shadow_results": len(obls),
        "obligation_findings": findings,
        "failed_by_current_checker": failed_current,
        "recall_limited": recall_limited,
        "met_count": met_count,
        "collapse_factor": round(legacy / findings, 2) if findings else None,
        "na_count": na,
        "met_failed_delta": legacy - findings,
        "top_collapsed_obligations": top[:10],
        "recall_limited_obligations": recall_limited_obls,
    }
@@ -21,6 +21,7 @@ class TestComputeShadow:
        assert s["recall_limited"] == 0
        assert s["collapse_factor"] == 5.0
        assert s["met_failed_delta"] == 4
        assert s["met_count"] == 0
        top = s["top_collapsed_obligations"][0]
        assert top["obligation"] == NON_LLM and top["fehlt"] == 5
        assert top["recall_limited"] is False
@@ -60,6 +61,7 @@ class TestRecallSegregation:
        assert s["obligation_findings"] == 1
        assert s["recall_limited"] == 1
        assert s["failed_by_current_checker"] == 0
        assert s["recall_limited_obligations"] == [LLM_REQ]
        assert s["top_collapsed_obligations"][0]["recall_limited"] is True
    def test_mixed_real_gap_and_recall_limited(self):