diff --git a/control-pipeline/scripts/atom_citation_inheritance.py b/control-pipeline/scripts/atom_citation_inheritance.py index f9620ba..38d8d0e 100644 --- a/control-pipeline/scripts/atom_citation_inheritance.py +++ b/control-pipeline/scripts/atom_citation_inheritance.py @@ -33,18 +33,21 @@ import sys DB_URL = os.getenv("DATABASE_URL", "postgresql://breakpilot:breakpilot@localhost:5432/breakpilot_db") +def _art(alias: str) -> str: + """SQL for source_citation->>'article' that works whether the column is jsonb + (macmini) or text-containing-JSON (prod schema anomaly from the DB swap). + pg_input_is_valid (PG16+) guards rows with invalid JSON so the cast never errors.""" + col = f"{alias}.source_citation" + return ( + f"(CASE WHEN {col} IS NOT NULL AND pg_input_is_valid({col}::text, 'jsonb') " + f"THEN ({col}::text)::jsonb->>'article' ELSE NULL END)" + ) + + # A row "needs" a citation when it has no article yet. -_NEEDS = ( - "(cc.source_citation IS NULL " - " OR cc.source_citation->>'article' IS NULL " - " OR cc.source_citation->>'article' = '')" -) +_NEEDS = f"({_art('cc')} IS NULL OR {_art('cc')} = '')" # A parent can supply one when it carries a real article. -_PARENT_HAS = ( - "p.source_citation IS NOT NULL " - "AND p.source_citation->>'article' IS NOT NULL " - "AND p.source_citation->>'article' <> ''" -) +_PARENT_HAS = f"({_art('p')} IS NOT NULL AND {_art('p')} <> '')" SQL_REPORT = f""" SET search_path TO compliance, public;