From 9dbb4cc5d2d38238bec89ad3a99877ef7b4a017b Mon Sep 17 00:00:00 2001 From: Benjamin Admin Date: Wed, 25 Mar 2026 08:25:32 +0100 Subject: [PATCH] fix: Backfill nutzt source_citation statt control_parent_links Die Obligation kennt ihren Parent-Rich-Control direkt. Dessen source_citation->>'source' gibt die Quell-Regulierung zuverlaessiger als der Umweg ueber control_parent_links (M:N-Inflation). Co-Authored-By: Claude Opus 4.6 --- .../api/canonical_control_routes.py | 41 +++++-------------- 1 file changed, 11 insertions(+), 30 deletions(-) diff --git a/backend-compliance/compliance/api/canonical_control_routes.py b/backend-compliance/compliance/api/canonical_control_routes.py index bda8595..a87dac1 100644 --- a/backend-compliance/compliance/api/canonical_control_routes.py +++ b/backend-compliance/compliance/api/canonical_control_routes.py @@ -937,52 +937,32 @@ async def backfill_normative_strength( """ from compliance.data.source_type_classification import ( classify_source_regulation, - get_highest_source_type, cap_normative_strength, ) with SessionLocal() as db: - # 1. Alle Obligations mit ihren Parent-Control-Links laden + # 1. Alle Obligations mit source_citation des Parent Controls laden obligations = db.execute(text(""" SELECT oc.id, oc.candidate_id, oc.normative_strength, - oc.parent_control_uuid + cc.source_citation->>'source' AS parent_source FROM obligation_candidates oc + JOIN canonical_controls cc ON cc.id = oc.parent_control_uuid WHERE oc.release_state NOT IN ('rejected', 'merged') AND oc.normative_strength IS NOT NULL ORDER BY oc.candidate_id """)).fetchall() - # 2. Fuer jeden Parent Control die source_regulations sammeln - parent_uuids = list({str(o.parent_control_uuid) for o in obligations if o.parent_control_uuid}) - source_types_by_parent: dict[str, list[str]] = {} - - if parent_uuids: - # Batch-Query fuer alle Parent-Links - links = db.execute(text(""" - SELECT control_uuid::text, source_regulation - FROM control_parent_links - WHERE control_uuid::text = ANY(:uuids) - """), {"uuids": parent_uuids}).fetchall() - - for link in links: - uid = link.control_uuid - src_type = classify_source_regulation(link.source_regulation or "") - source_types_by_parent.setdefault(uid, []).append(src_type) - - # 3. Normative strength korrigieren + # 2. Normative strength korrigieren basierend auf source_type changes = [] - stats = {"total": len(obligations), "unchanged": 0, "capped_to_should": 0, "capped_to_can": 0, "no_parent_links": 0} + stats = {"total": len(obligations), "unchanged": 0, "capped_to_should": 0, "capped_to_can": 0, "no_source": 0} for obl in obligations: - parent_uid = str(obl.parent_control_uuid) if obl.parent_control_uuid else None - source_types = source_types_by_parent.get(parent_uid, []) if parent_uid else [] - - if not source_types: - stats["no_parent_links"] += 1 + if not obl.parent_source: + stats["no_source"] += 1 continue - highest_type = get_highest_source_type(source_types) - new_strength = cap_normative_strength(obl.normative_strength, highest_type) + source_type = classify_source_regulation(obl.parent_source) + new_strength = cap_normative_strength(obl.normative_strength, source_type) if new_strength != obl.normative_strength: changes.append({ @@ -990,7 +970,8 @@ async def backfill_normative_strength( "candidate_id": obl.candidate_id, "old_strength": obl.normative_strength, "new_strength": new_strength, - "source_type": highest_type, + "source_type": source_type, + "source_regulation": obl.parent_source, }) if new_strength == "should": stats["capped_to_should"] += 1