Some checks failed
CI / go-lint (push) Has been skipped
CI / python-lint (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / test-go-consent (push) Successful in 35s
CI / test-python-voice (push) Successful in 33s
CI / test-bqas (push) Successful in 37s
CI / Deploy (push) Failing after 2s
- Applicability Engine (deterministisch, kein LLM): filtert Controls nach Branche, Unternehmensgroesse, Scope-Signalen - API-Filter auf GET /controls, /controls-count, /controls-meta - POST /controls/applicable Endpoint fuer Company-Profile-Matching - 35 Unit-Tests fuer Engine - Port-8098-Konflikt mit Nginx gefixt (nur expose, kein Host-Port) - CLAUDE.md: control-pipeline Dokumentation ergaenzt - 6 internationale Gesetze geloescht (ES/FR/HU/NL/SE/CZ — nur DACH) - DB-Backup-Import-Script (import_backup.py) Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
220 lines
6.2 KiB
Python
220 lines
6.2 KiB
Python
"""
|
|
Import compliance backup into local PostgreSQL.
|
|
Fixes Python-style lists/dicts in JSONB fields to valid JSON.
|
|
"""
|
|
import ast
|
|
import gzip
|
|
import json
|
|
import re
|
|
import sys
|
|
import psycopg2
|
|
|
|
DB_URL = "postgresql://breakpilot:breakpilot123@localhost:5432/breakpilot_db"
|
|
BACKUP_PATH = "/tmp/compliance-db-2026-03-28_16-25-19.sql.gz"
|
|
|
|
# Tables with JSONB columns that need Python→JSON conversion
|
|
JSONB_TABLES = {
|
|
"canonical_controls",
|
|
"canonical_controls_pre_dedup",
|
|
"obligation_candidates",
|
|
"control_dedup_reviews",
|
|
"canonical_generation_jobs",
|
|
"canonical_processed_chunks",
|
|
}
|
|
|
|
|
|
def fix_python_value(val: str) -> str:
|
|
"""Convert Python repr to JSON string for JSONB fields."""
|
|
if val == "NULL":
|
|
return None
|
|
# Strip outer SQL quotes
|
|
if val.startswith("'") and val.endswith("'"):
|
|
# Unescape SQL single quotes
|
|
inner = val[1:-1].replace("''", "'")
|
|
else:
|
|
return val
|
|
|
|
# Try to parse as Python literal and convert to JSON
|
|
try:
|
|
obj = ast.literal_eval(inner)
|
|
return json.dumps(obj, ensure_ascii=False)
|
|
except (ValueError, SyntaxError):
|
|
# Already valid JSON or plain string
|
|
return inner
|
|
|
|
|
|
def process_line(line: str, conn) -> bool:
|
|
"""Process a single SQL line. Returns True if it was an INSERT."""
|
|
line = line.strip()
|
|
if not line.startswith("INSERT INTO"):
|
|
if line.startswith("SET "):
|
|
return False
|
|
return False
|
|
|
|
# Execute directly for non-JSONB tables
|
|
table_match = re.match(r'INSERT INTO "(\w+)"', line)
|
|
if not table_match:
|
|
return False
|
|
table = table_match.group(1)
|
|
|
|
if table not in JSONB_TABLES:
|
|
# Execute as-is
|
|
try:
|
|
with conn.cursor() as cur:
|
|
cur.execute(line)
|
|
return True
|
|
except Exception as e:
|
|
conn.rollback()
|
|
return False
|
|
|
|
# For JSONB tables: use psycopg2 parameterized query
|
|
# Extract column names and values
|
|
cols_match = re.match(r'INSERT INTO "\w+" \(([^)]+)\) VALUES \(', line)
|
|
if not cols_match:
|
|
return False
|
|
|
|
col_names = [c.strip().strip('"') for c in cols_match.group(1).split(",")]
|
|
|
|
# Extract VALUES portion
|
|
vals_start = line.index("VALUES (") + 8
|
|
vals_str = line[vals_start:-2] # Remove trailing );
|
|
|
|
# Parse SQL values (handling nested quotes and parentheses)
|
|
values = []
|
|
current = ""
|
|
in_quote = False
|
|
depth = 0
|
|
i = 0
|
|
while i < len(vals_str):
|
|
c = vals_str[i]
|
|
if in_quote:
|
|
if c == "'" and i + 1 < len(vals_str) and vals_str[i + 1] == "'":
|
|
current += "''"
|
|
i += 2
|
|
continue
|
|
elif c == "'":
|
|
current += "'"
|
|
in_quote = False
|
|
else:
|
|
current += c
|
|
else:
|
|
if c == "'":
|
|
current += "'"
|
|
in_quote = True
|
|
elif c == "(" :
|
|
depth += 1
|
|
current += c
|
|
elif c == ")":
|
|
depth -= 1
|
|
current += c
|
|
elif c == "," and depth == 0:
|
|
values.append(current.strip())
|
|
current = ""
|
|
else:
|
|
current += c
|
|
i += 1
|
|
values.append(current.strip())
|
|
|
|
if len(values) != len(col_names):
|
|
# Fallback: try direct execution
|
|
try:
|
|
with conn.cursor() as cur:
|
|
cur.execute(line)
|
|
return True
|
|
except Exception:
|
|
conn.rollback()
|
|
return False
|
|
|
|
# Convert values
|
|
params = []
|
|
placeholders = []
|
|
for col, val in zip(col_names, values):
|
|
if val == "NULL":
|
|
params.append(None)
|
|
placeholders.append("%s")
|
|
elif val in ("TRUE", "true"):
|
|
params.append(True)
|
|
placeholders.append("%s")
|
|
elif val in ("FALSE", "false"):
|
|
params.append(False)
|
|
placeholders.append("%s")
|
|
elif val.startswith("'") and val.endswith("'"):
|
|
inner = val[1:-1].replace("''", "'")
|
|
# Check if this looks like a Python literal (list/dict)
|
|
stripped = inner.strip()
|
|
if stripped and stripped[0] in ("[", "{") and stripped not in ("[]", "{}"):
|
|
try:
|
|
obj = ast.literal_eval(inner)
|
|
params.append(json.dumps(obj, ensure_ascii=False))
|
|
except (ValueError, SyntaxError):
|
|
params.append(inner)
|
|
else:
|
|
params.append(inner)
|
|
placeholders.append("%s")
|
|
else:
|
|
# Numeric or other
|
|
try:
|
|
if "." in val:
|
|
params.append(float(val))
|
|
else:
|
|
params.append(int(val))
|
|
except ValueError:
|
|
params.append(val)
|
|
placeholders.append("%s")
|
|
|
|
col_list = ", ".join(f'"{c}"' for c in col_names)
|
|
ph_list = ", ".join(placeholders)
|
|
sql = f'INSERT INTO "{table}" ({col_list}) VALUES ({ph_list})'
|
|
|
|
try:
|
|
with conn.cursor() as cur:
|
|
cur.execute(sql, params)
|
|
return True
|
|
except Exception as e:
|
|
conn.rollback()
|
|
if "duplicate key" not in str(e):
|
|
print(f" ERROR [{table}]: {str(e)[:120]}", file=sys.stderr)
|
|
return False
|
|
|
|
|
|
def main():
|
|
conn = psycopg2.connect(DB_URL)
|
|
conn.autocommit = True
|
|
|
|
with conn.cursor() as cur:
|
|
cur.execute("SET search_path TO compliance, public")
|
|
|
|
total = 0
|
|
ok = 0
|
|
errors = 0
|
|
|
|
print(f"Reading {BACKUP_PATH}...")
|
|
with gzip.open(BACKUP_PATH, "rt", encoding="utf-8") as f:
|
|
buffer = ""
|
|
for line in f:
|
|
buffer += line
|
|
if not buffer.rstrip().endswith(";"):
|
|
continue
|
|
# Complete SQL statement
|
|
stmt = buffer.strip()
|
|
buffer = ""
|
|
|
|
if not stmt.startswith("INSERT"):
|
|
continue
|
|
|
|
total += 1
|
|
if process_line(stmt, conn):
|
|
ok += 1
|
|
else:
|
|
errors += 1
|
|
|
|
if total % 10000 == 0:
|
|
print(f" {total:>8} processed, {ok} ok, {errors} errors")
|
|
|
|
print(f"\nDONE: {total} total, {ok} ok, {errors} errors")
|
|
conn.close()
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|