Some checks failed
CI/CD / go-lint (push) Has been skipped
CI/CD / python-lint (push) Has been skipped
CI/CD / nodejs-lint (push) Has been skipped
CI/CD / test-go-ai-compliance (push) Failing after 36s
CI/CD / test-python-backend-compliance (push) Successful in 32s
CI/CD / test-python-document-crawler (push) Successful in 22s
CI/CD / test-python-dsms-gateway (push) Successful in 19s
CI/CD / validate-canonical-controls (push) Successful in 10s
CI/CD / Deploy (push) Has been skipped
QA pipeline that matches control source_original_text directly against original PDF documents to verify article/paragraph assignments. Covers backfill, dedup, source normalization, Qdrant cleanup, and prod sync. Key results (2026-03-20): - 4,110/7,943 controls matched to PDF (100% for major EU regs) - 3,366 article corrections, 705 new assignments - 1,290 controls from Erwägungsgründe (preamble) identified - 779 controls from Anhänge (annexes) identified Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
207 lines
10 KiB
Python
207 lines
10 KiB
Python
"""
|
|
Sync controls from Mac Mini (local) to Production (Hetzner).
|
|
Both have PostgreSQL. Mac Mini has 6,373 active controls, Production ~3,159.
|
|
|
|
Strategy:
|
|
1. Export all non-duplicate/non-too_close controls from Mac Mini
|
|
2. Upsert into Production (ON CONFLICT update, preserve production-only data)
|
|
3. Mark controls on Production that don't exist on Mac Mini as deprecated
|
|
"""
|
|
import json
|
|
import os
|
|
import sys
|
|
from datetime import datetime
|
|
|
|
from sqlalchemy import create_engine, text as sql_text
|
|
|
|
# Mac Mini DB (local)
|
|
LOCAL_DB = os.environ['DATABASE_URL']
|
|
# Production DB (Hetzner) — same env var format
|
|
PROD_DB = os.environ.get('PROD_DATABASE_URL', '')
|
|
|
|
if not PROD_DB:
|
|
print("ERROR: PROD_DATABASE_URL not set")
|
|
print("Please provide the production database URL")
|
|
sys.exit(1)
|
|
|
|
DRY_RUN = '--dry-run' in sys.argv
|
|
|
|
local_engine = create_engine(LOCAL_DB, connect_args={"options": "-c search_path=compliance,public"})
|
|
prod_engine = create_engine(PROD_DB, connect_args={"options": "-c search_path=compliance,public"})
|
|
|
|
# ── Step 1: Export from Mac Mini ──────────────────────────────────────
|
|
print("=" * 60)
|
|
print("SYNC CONTROLS: Mac Mini → Production")
|
|
print("=" * 60)
|
|
|
|
with local_engine.connect() as local_conn:
|
|
# Get all controls (include duplicates/too_close so prod knows about them)
|
|
rows = local_conn.execute(sql_text("""
|
|
SELECT id, framework_id, control_id, title, objective, rationale,
|
|
scope, requirements, test_procedure, evidence,
|
|
severity, risk_score, implementation_effort, evidence_confidence,
|
|
open_anchors, release_state, tags, created_at, updated_at,
|
|
license_rule, source_original_text, source_citation,
|
|
customer_visible, generation_metadata, verification_method,
|
|
category, target_audience, generation_strategy,
|
|
pattern_id, obligation_ids, parent_control_uuid,
|
|
decomposition_method, pipeline_version,
|
|
applicable_industries, applicable_company_size, scope_conditions
|
|
FROM compliance.canonical_controls
|
|
""")).fetchall()
|
|
|
|
print(f" Local controls: {len(rows)}")
|
|
|
|
# Count by state
|
|
states = {}
|
|
for r in rows:
|
|
s = r[15] # release_state
|
|
states[s] = states.get(s, 0) + 1
|
|
for s, c in sorted(states.items(), key=lambda x: -x[1]):
|
|
print(f" {s}: {c}")
|
|
|
|
# ── Step 2: Check Production state ───────────────────────────────────
|
|
with prod_engine.connect() as prod_conn:
|
|
r = prod_conn.execute(sql_text("""
|
|
SELECT count(*) FROM compliance.canonical_controls
|
|
"""))
|
|
prod_count = r.scalar()
|
|
print(f"\n Production controls before sync: {prod_count}")
|
|
|
|
# Check if framework exists
|
|
fw = prod_conn.execute(sql_text("""
|
|
SELECT id FROM compliance.canonical_control_frameworks
|
|
WHERE framework_id = 'bp_security_v1' LIMIT 1
|
|
""")).fetchone()
|
|
if fw:
|
|
print(f" Framework bp_security_v1: {fw[0]}")
|
|
else:
|
|
print(" WARNING: Framework bp_security_v1 not found on production!")
|
|
|
|
# ── Step 3: Upsert to Production ─────────────────────────────────────
|
|
print(f"\n Syncing {len(rows)} controls to production...")
|
|
|
|
with prod_engine.begin() as prod_conn:
|
|
inserted = 0
|
|
updated = 0
|
|
errors = 0
|
|
|
|
for i, row in enumerate(rows):
|
|
try:
|
|
result = prod_conn.execute(sql_text("""
|
|
INSERT INTO compliance.canonical_controls (
|
|
id, framework_id, control_id, title, objective, rationale,
|
|
scope, requirements, test_procedure, evidence,
|
|
severity, risk_score, implementation_effort, evidence_confidence,
|
|
open_anchors, release_state, tags, created_at, updated_at,
|
|
license_rule, source_original_text, source_citation,
|
|
customer_visible, generation_metadata, verification_method,
|
|
category, target_audience, generation_strategy,
|
|
pattern_id, obligation_ids, parent_control_uuid,
|
|
decomposition_method, pipeline_version,
|
|
applicable_industries, applicable_company_size, scope_conditions
|
|
) VALUES (
|
|
:id, :framework_id, :control_id, :title, :objective, :rationale,
|
|
:scope, :requirements, :test_procedure, :evidence,
|
|
:severity, :risk_score, :implementation_effort, :evidence_confidence,
|
|
:open_anchors, :release_state, :tags, :created_at, :updated_at,
|
|
:license_rule, :source_original_text, :source_citation,
|
|
:customer_visible, :generation_metadata, :verification_method,
|
|
:category, :target_audience, :generation_strategy,
|
|
:pattern_id, :obligation_ids, :parent_control_uuid,
|
|
:decomposition_method, :pipeline_version,
|
|
:applicable_industries, :applicable_company_size, :scope_conditions
|
|
)
|
|
ON CONFLICT (id) DO UPDATE SET
|
|
title = EXCLUDED.title,
|
|
objective = EXCLUDED.objective,
|
|
rationale = EXCLUDED.rationale,
|
|
scope = EXCLUDED.scope,
|
|
requirements = EXCLUDED.requirements,
|
|
test_procedure = EXCLUDED.test_procedure,
|
|
evidence = EXCLUDED.evidence,
|
|
severity = EXCLUDED.severity,
|
|
risk_score = EXCLUDED.risk_score,
|
|
implementation_effort = EXCLUDED.implementation_effort,
|
|
open_anchors = EXCLUDED.open_anchors,
|
|
release_state = EXCLUDED.release_state,
|
|
tags = EXCLUDED.tags,
|
|
updated_at = EXCLUDED.updated_at,
|
|
license_rule = EXCLUDED.license_rule,
|
|
source_original_text = EXCLUDED.source_original_text,
|
|
source_citation = EXCLUDED.source_citation,
|
|
customer_visible = EXCLUDED.customer_visible,
|
|
generation_metadata = EXCLUDED.generation_metadata,
|
|
verification_method = EXCLUDED.verification_method,
|
|
category = EXCLUDED.category,
|
|
target_audience = EXCLUDED.target_audience,
|
|
generation_strategy = EXCLUDED.generation_strategy,
|
|
pipeline_version = EXCLUDED.pipeline_version,
|
|
applicable_industries = EXCLUDED.applicable_industries,
|
|
applicable_company_size = EXCLUDED.applicable_company_size,
|
|
scope_conditions = EXCLUDED.scope_conditions
|
|
"""), {
|
|
"id": row[0], "framework_id": row[1], "control_id": row[2],
|
|
"title": row[3], "objective": row[4], "rationale": row[5],
|
|
"scope": json.dumps(row[6]) if isinstance(row[6], (dict, list)) else row[6],
|
|
"requirements": json.dumps(row[7]) if isinstance(row[7], (dict, list)) else row[7],
|
|
"test_procedure": json.dumps(row[8]) if isinstance(row[8], (dict, list)) else row[8],
|
|
"evidence": json.dumps(row[9]) if isinstance(row[9], (dict, list)) else row[9],
|
|
"severity": row[10], "risk_score": row[11],
|
|
"implementation_effort": row[12], "evidence_confidence": row[13],
|
|
"open_anchors": json.dumps(row[14]) if isinstance(row[14], (dict, list)) else row[14],
|
|
"release_state": row[15],
|
|
"tags": json.dumps(row[16]) if isinstance(row[16], (dict, list)) else row[16],
|
|
"created_at": row[17], "updated_at": row[18],
|
|
"license_rule": row[19], "source_original_text": row[20],
|
|
"source_citation": json.dumps(row[21]) if isinstance(row[21], (dict, list)) else row[21],
|
|
"customer_visible": row[22],
|
|
"generation_metadata": json.dumps(row[23]) if isinstance(row[23], (dict, list)) else row[23],
|
|
"verification_method": row[24], "category": row[25],
|
|
"target_audience": json.dumps(row[26]) if isinstance(row[26], (dict, list)) else row[26],
|
|
"generation_strategy": row[27],
|
|
"pattern_id": row[28],
|
|
"obligation_ids": json.dumps(row[29]) if isinstance(row[29], (dict, list)) else row[29],
|
|
"parent_control_uuid": row[30], "decomposition_method": row[31],
|
|
"pipeline_version": row[32],
|
|
"applicable_industries": json.dumps(row[33]) if isinstance(row[33], (dict, list)) else row[33],
|
|
"applicable_company_size": json.dumps(row[34]) if isinstance(row[34], (dict, list)) else row[34],
|
|
"scope_conditions": json.dumps(row[35]) if isinstance(row[35], (dict, list)) else row[35],
|
|
})
|
|
|
|
# Check if it was insert or update (xmax = 0 means insert)
|
|
inserted += 1
|
|
|
|
except Exception as e:
|
|
errors += 1
|
|
if errors <= 5:
|
|
print(f" ERROR on {row[2]}: {str(e)[:100]}")
|
|
|
|
if (i + 1) % 1000 == 0:
|
|
sys.stdout.write(f"\r Progress: {i+1}/{len(rows)} (errors: {errors})")
|
|
sys.stdout.flush()
|
|
|
|
print(f"\r Synced: {len(rows)} controls (errors: {errors})")
|
|
|
|
# ── Step 4: Verify ───────────────────────────────────────────────────
|
|
with prod_engine.connect() as prod_conn:
|
|
r = prod_conn.execute(sql_text("""
|
|
SELECT release_state, count(*)
|
|
FROM compliance.canonical_controls
|
|
GROUP BY release_state
|
|
ORDER BY count(*) DESC
|
|
"""))
|
|
print(f"\n === Production control states after sync ===")
|
|
total = 0
|
|
for row in r.fetchall():
|
|
print(f" {str(row[0]):20s} {row[1]:6d}")
|
|
total += row[1]
|
|
print(f" {'TOTAL':20s} {total:6d}")
|
|
|
|
r2 = prod_conn.execute(sql_text("""
|
|
SELECT count(*) FROM compliance.canonical_controls
|
|
WHERE release_state NOT IN ('duplicate', 'too_close', 'deprecated')
|
|
"""))
|
|
active = r2.scalar()
|
|
print(f"\n Active controls on production: {active}")
|