""" Sync controls from Mac Mini (local) to Production (Hetzner). Both have PostgreSQL. Mac Mini has 6,373 active controls, Production ~3,159. Strategy: 1. Export all non-duplicate/non-too_close controls from Mac Mini 2. Upsert into Production (ON CONFLICT update, preserve production-only data) 3. Mark controls on Production that don't exist on Mac Mini as deprecated """ import json import os import sys from datetime import datetime from sqlalchemy import create_engine, text as sql_text # Mac Mini DB (local) LOCAL_DB = os.environ['DATABASE_URL'] # Production DB (Hetzner) — same env var format PROD_DB = os.environ.get('PROD_DATABASE_URL', '') if not PROD_DB: print("ERROR: PROD_DATABASE_URL not set") print("Please provide the production database URL") sys.exit(1) DRY_RUN = '--dry-run' in sys.argv local_engine = create_engine(LOCAL_DB, connect_args={"options": "-c search_path=compliance,public"}) prod_engine = create_engine(PROD_DB, connect_args={"options": "-c search_path=compliance,public"}) # ── Step 1: Export from Mac Mini ────────────────────────────────────── print("=" * 60) print("SYNC CONTROLS: Mac Mini → Production") print("=" * 60) with local_engine.connect() as local_conn: # Get all controls (include duplicates/too_close so prod knows about them) rows = local_conn.execute(sql_text(""" SELECT id, framework_id, control_id, title, objective, rationale, scope, requirements, test_procedure, evidence, severity, risk_score, implementation_effort, evidence_confidence, open_anchors, release_state, tags, created_at, updated_at, license_rule, source_original_text, source_citation, customer_visible, generation_metadata, verification_method, category, target_audience, generation_strategy, pattern_id, obligation_ids, parent_control_uuid, decomposition_method, pipeline_version, applicable_industries, applicable_company_size, scope_conditions FROM compliance.canonical_controls """)).fetchall() print(f" Local controls: {len(rows)}") # Count by state states = {} for r in rows: s = r[15] # release_state states[s] = states.get(s, 0) + 1 for s, c in sorted(states.items(), key=lambda x: -x[1]): print(f" {s}: {c}") # ── Step 2: Check Production state ─────────────────────────────────── with prod_engine.connect() as prod_conn: r = prod_conn.execute(sql_text(""" SELECT count(*) FROM compliance.canonical_controls """)) prod_count = r.scalar() print(f"\n Production controls before sync: {prod_count}") # Check if framework exists fw = prod_conn.execute(sql_text(""" SELECT id FROM compliance.canonical_control_frameworks WHERE framework_id = 'bp_security_v1' LIMIT 1 """)).fetchone() if fw: print(f" Framework bp_security_v1: {fw[0]}") else: print(" WARNING: Framework bp_security_v1 not found on production!") # ── Step 3: Upsert to Production ───────────────────────────────────── print(f"\n Syncing {len(rows)} controls to production...") with prod_engine.begin() as prod_conn: inserted = 0 updated = 0 errors = 0 for i, row in enumerate(rows): try: result = prod_conn.execute(sql_text(""" INSERT INTO compliance.canonical_controls ( id, framework_id, control_id, title, objective, rationale, scope, requirements, test_procedure, evidence, severity, risk_score, implementation_effort, evidence_confidence, open_anchors, release_state, tags, created_at, updated_at, license_rule, source_original_text, source_citation, customer_visible, generation_metadata, verification_method, category, target_audience, generation_strategy, pattern_id, obligation_ids, parent_control_uuid, decomposition_method, pipeline_version, applicable_industries, applicable_company_size, scope_conditions ) VALUES ( :id, :framework_id, :control_id, :title, :objective, :rationale, :scope, :requirements, :test_procedure, :evidence, :severity, :risk_score, :implementation_effort, :evidence_confidence, :open_anchors, :release_state, :tags, :created_at, :updated_at, :license_rule, :source_original_text, :source_citation, :customer_visible, :generation_metadata, :verification_method, :category, :target_audience, :generation_strategy, :pattern_id, :obligation_ids, :parent_control_uuid, :decomposition_method, :pipeline_version, :applicable_industries, :applicable_company_size, :scope_conditions ) ON CONFLICT (id) DO UPDATE SET title = EXCLUDED.title, objective = EXCLUDED.objective, rationale = EXCLUDED.rationale, scope = EXCLUDED.scope, requirements = EXCLUDED.requirements, test_procedure = EXCLUDED.test_procedure, evidence = EXCLUDED.evidence, severity = EXCLUDED.severity, risk_score = EXCLUDED.risk_score, implementation_effort = EXCLUDED.implementation_effort, open_anchors = EXCLUDED.open_anchors, release_state = EXCLUDED.release_state, tags = EXCLUDED.tags, updated_at = EXCLUDED.updated_at, license_rule = EXCLUDED.license_rule, source_original_text = EXCLUDED.source_original_text, source_citation = EXCLUDED.source_citation, customer_visible = EXCLUDED.customer_visible, generation_metadata = EXCLUDED.generation_metadata, verification_method = EXCLUDED.verification_method, category = EXCLUDED.category, target_audience = EXCLUDED.target_audience, generation_strategy = EXCLUDED.generation_strategy, pipeline_version = EXCLUDED.pipeline_version, applicable_industries = EXCLUDED.applicable_industries, applicable_company_size = EXCLUDED.applicable_company_size, scope_conditions = EXCLUDED.scope_conditions """), { "id": row[0], "framework_id": row[1], "control_id": row[2], "title": row[3], "objective": row[4], "rationale": row[5], "scope": json.dumps(row[6]) if isinstance(row[6], (dict, list)) else row[6], "requirements": json.dumps(row[7]) if isinstance(row[7], (dict, list)) else row[7], "test_procedure": json.dumps(row[8]) if isinstance(row[8], (dict, list)) else row[8], "evidence": json.dumps(row[9]) if isinstance(row[9], (dict, list)) else row[9], "severity": row[10], "risk_score": row[11], "implementation_effort": row[12], "evidence_confidence": row[13], "open_anchors": json.dumps(row[14]) if isinstance(row[14], (dict, list)) else row[14], "release_state": row[15], "tags": json.dumps(row[16]) if isinstance(row[16], (dict, list)) else row[16], "created_at": row[17], "updated_at": row[18], "license_rule": row[19], "source_original_text": row[20], "source_citation": json.dumps(row[21]) if isinstance(row[21], (dict, list)) else row[21], "customer_visible": row[22], "generation_metadata": json.dumps(row[23]) if isinstance(row[23], (dict, list)) else row[23], "verification_method": row[24], "category": row[25], "target_audience": json.dumps(row[26]) if isinstance(row[26], (dict, list)) else row[26], "generation_strategy": row[27], "pattern_id": row[28], "obligation_ids": json.dumps(row[29]) if isinstance(row[29], (dict, list)) else row[29], "parent_control_uuid": row[30], "decomposition_method": row[31], "pipeline_version": row[32], "applicable_industries": json.dumps(row[33]) if isinstance(row[33], (dict, list)) else row[33], "applicable_company_size": json.dumps(row[34]) if isinstance(row[34], (dict, list)) else row[34], "scope_conditions": json.dumps(row[35]) if isinstance(row[35], (dict, list)) else row[35], }) # Check if it was insert or update (xmax = 0 means insert) inserted += 1 except Exception as e: errors += 1 if errors <= 5: print(f" ERROR on {row[2]}: {str(e)[:100]}") if (i + 1) % 1000 == 0: sys.stdout.write(f"\r Progress: {i+1}/{len(rows)} (errors: {errors})") sys.stdout.flush() print(f"\r Synced: {len(rows)} controls (errors: {errors})") # ── Step 4: Verify ─────────────────────────────────────────────────── with prod_engine.connect() as prod_conn: r = prod_conn.execute(sql_text(""" SELECT release_state, count(*) FROM compliance.canonical_controls GROUP BY release_state ORDER BY count(*) DESC """)) print(f"\n === Production control states after sync ===") total = 0 for row in r.fetchall(): print(f" {str(row[0]):20s} {row[1]:6d}") total += row[1] print(f" {'TOTAL':20s} {total:6d}") r2 = prod_conn.execute(sql_text(""" SELECT count(*) FROM compliance.canonical_controls WHERE release_state NOT IN ('duplicate', 'too_close', 'deprecated') """)) active = r2.scalar() print(f"\n Active controls on production: {active}")