From f398088fbb309cdf006ca09e14659c2987d8905b Mon Sep 17 00:00:00 2001 From: Benjamin Admin Date: Sun, 21 Jun 2026 22:44:38 +0200 Subject: [PATCH] feat(controls): atom-inheritance schema-aware (text + jsonb source_citation) Prod canonical_controls.source_citation ist text-mit-JSON (DB-Swap-Anomalie), macmini ist jsonb. _art()-Helper nutzt pg_input_is_valid(col::text,'jsonb') + (col::text)::jsonb->>'article' (PG16+) -> ein Skript fuer beide Schemata. Prod-Apply 2026-06-21 verifiziert: Zitierfaehigkeit 6,8%->60,8% (+169.755), Stichprobe 8/8 korrekt. macmini-Dry-Run 0 (idempotent, kein Regress). Co-Authored-By: Claude Opus 4.7 --- .../scripts/atom_citation_inheritance.py | 23 +++++++++++-------- 1 file changed, 13 insertions(+), 10 deletions(-) diff --git a/control-pipeline/scripts/atom_citation_inheritance.py b/control-pipeline/scripts/atom_citation_inheritance.py index f9620ba..38d8d0e 100644 --- a/control-pipeline/scripts/atom_citation_inheritance.py +++ b/control-pipeline/scripts/atom_citation_inheritance.py @@ -33,18 +33,21 @@ import sys DB_URL = os.getenv("DATABASE_URL", "postgresql://breakpilot:breakpilot@localhost:5432/breakpilot_db") +def _art(alias: str) -> str: + """SQL for source_citation->>'article' that works whether the column is jsonb + (macmini) or text-containing-JSON (prod schema anomaly from the DB swap). + pg_input_is_valid (PG16+) guards rows with invalid JSON so the cast never errors.""" + col = f"{alias}.source_citation" + return ( + f"(CASE WHEN {col} IS NOT NULL AND pg_input_is_valid({col}::text, 'jsonb') " + f"THEN ({col}::text)::jsonb->>'article' ELSE NULL END)" + ) + + # A row "needs" a citation when it has no article yet. -_NEEDS = ( - "(cc.source_citation IS NULL " - " OR cc.source_citation->>'article' IS NULL " - " OR cc.source_citation->>'article' = '')" -) +_NEEDS = f"({_art('cc')} IS NULL OR {_art('cc')} = '')" # A parent can supply one when it carries a real article. -_PARENT_HAS = ( - "p.source_citation IS NOT NULL " - "AND p.source_citation->>'article' IS NOT NULL " - "AND p.source_citation->>'article' <> ''" -) +_PARENT_HAS = f"({_art('p')} IS NOT NULL AND {_art('p')} <> '')" SQL_REPORT = f""" SET search_path TO compliance, public;