feat: auto-run SQL migrations on backend startup
All checks were successful
CI/CD / go-lint (push) Has been skipped
CI/CD / python-lint (push) Has been skipped
CI/CD / nodejs-lint (push) Has been skipped
CI/CD / test-go-ai-compliance (push) Successful in 35s
CI/CD / test-python-backend-compliance (push) Successful in 33s
CI/CD / test-python-document-crawler (push) Successful in 26s
CI/CD / test-python-dsms-gateway (push) Successful in 19s
CI/CD / validate-canonical-controls (push) Successful in 11s
CI/CD / deploy-hetzner (push) Successful in 2m35s

Adds migration_runner.py that executes pending migrations from
migrations/ directory when backend-compliance starts. Tracks applied
migrations in _migration_history table.

Handles existing databases: detects if tables from migrations 001-045
already exist and seeds the history table accordingly, so only new
migrations (046+) are applied.

Skippable via SKIP_MIGRATIONS=true env var.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
Benjamin Admin
2026-03-13 09:14:18 +01:00
parent de19ef0684
commit cdafc4d9f4
2 changed files with 161 additions and 0 deletions

View File

@@ -7,10 +7,15 @@ Provides: Compliance Framework, Consent Admin, DSR, GDPR Export.
Runs on port 8002 with DB search_path=compliance,core,public.
"""
import logging
import os
from contextlib import asynccontextmanager
from fastapi import FastAPI
from fastapi.middleware.cors import CORSMiddleware
logger = logging.getLogger(__name__)
# Compliance-specific API routers
from consent_api import router as consent_router
from consent_admin_api import router as consent_admin_router
@@ -36,10 +41,23 @@ from middleware import (
SecurityHeadersMiddleware,
)
@asynccontextmanager
async def lifespan(app: FastAPI):
"""Run migrations on startup."""
from migration_runner import run_migrations
try:
run_migrations()
logger.info("Database migrations completed")
except Exception as e:
logger.error("Migration failed: %s — backend starting anyway", e)
yield
app = FastAPI(
title="BreakPilot Compliance Backend",
description="GDPR/DSGVO Compliance, Consent Management, Data Subject Requests, and Regulatory Compliance Framework",
version="1.0.0",
lifespan=lifespan,
)
# --- CORS ---

View File

@@ -0,0 +1,143 @@
"""
Auto-Migration Runner for BreakPilot Compliance Backend.
Runs all SQL migrations from the migrations/ directory on startup.
Tracks which migrations have already been applied in a _migration_history table.
Migrations are executed in filename order (001_, 002_, ...).
Safe for repeated runs: already-applied migrations are skipped.
Uses raw DBAPI connections to handle SQL files with explicit BEGIN/COMMIT.
"""
import logging
import os
import re
from pathlib import Path
from sqlalchemy import text
from database import engine, SCHEMA_SEARCH_PATH
logger = logging.getLogger(__name__)
MIGRATIONS_DIR = Path(__file__).parent / "migrations"
MIGRATION_PATTERN = re.compile(r"^(\d{3})_.+\.sql$")
# Migrations that existed before the auto-runner was introduced.
# These are assumed to have been applied manually on all environments.
_PRE_RUNNER_CUTOFF = 45
def run_migrations():
"""Run all pending SQL migrations in order."""
if os.getenv("SKIP_MIGRATIONS", "").lower() in ("1", "true", "yes"):
logger.info("SKIP_MIGRATIONS is set — skipping auto-migration")
return
if not MIGRATIONS_DIR.is_dir():
logger.warning("Migrations directory not found: %s", MIGRATIONS_DIR)
return
# Use raw DBAPI connection for full SQL execution (BEGIN/COMMIT support)
raw_conn = engine.raw_connection()
try:
cursor = raw_conn.cursor()
cursor.execute(f"SET search_path TO {SCHEMA_SEARCH_PATH}")
# Ensure tracking table exists
cursor.execute("""
CREATE TABLE IF NOT EXISTS _migration_history (
id SERIAL PRIMARY KEY,
filename VARCHAR(255) NOT NULL UNIQUE,
applied_at TIMESTAMPTZ NOT NULL DEFAULT NOW()
)
""")
raw_conn.commit()
# Get already-applied migrations
cursor.execute("SELECT filename FROM _migration_history")
applied = {row[0] for row in cursor.fetchall()}
# Discover and sort migration files
migration_files = sorted(
f for f in MIGRATIONS_DIR.iterdir()
if f.is_file() and MIGRATION_PATTERN.match(f.name)
)
# First run: if _migration_history is empty but DB already has tables
# from manually applied migrations, seed the history so we don't re-run them.
if not applied:
_seed_existing_migrations(cursor, raw_conn, migration_files)
cursor.execute("SELECT filename FROM _migration_history")
applied = {row[0] for row in cursor.fetchall()}
pending = [f for f in migration_files if f.name not in applied]
if not pending:
logger.info("All %d migrations already applied", len(applied))
return
logger.info("%d pending migrations (of %d total)", len(pending), len(migration_files))
for migration_file in pending:
logger.info("Applying migration: %s", migration_file.name)
try:
sql = migration_file.read_text(encoding="utf-8")
# Execute the full SQL file as-is (supports BEGIN/COMMIT)
cursor.execute(sql)
raw_conn.commit()
# Record successful application
cursor.execute(
"INSERT INTO _migration_history (filename) VALUES (%s)",
(migration_file.name,),
)
raw_conn.commit()
logger.info(" OK: %s", migration_file.name)
except Exception as e:
raw_conn.rollback()
logger.error(" FAILED: %s%s", migration_file.name, e)
raise RuntimeError(
f"Migration {migration_file.name} failed: {e}"
) from e
logger.info("All migrations applied successfully")
finally:
raw_conn.close()
def _seed_existing_migrations(cursor, conn, migration_files: list[Path]):
"""Seed _migration_history with pre-existing migrations.
On first run the history table is empty. We check if the DB already
has tables from earlier migrations (e.g. canonical_controls from 044).
If yes, we mark all migrations up to _PRE_RUNNER_CUTOFF as applied
so the runner doesn't try to re-run them.
"""
cursor.execute("""
SELECT EXISTS (
SELECT 1 FROM information_schema.tables
WHERE table_name = 'canonical_controls'
)
""")
db_has_tables = cursor.fetchone()[0]
if not db_has_tables:
logger.info("Fresh database — no seeding needed, will run all migrations")
return
logger.info(
"Existing database detected — seeding migration history for pre-runner migrations (001-%03d)",
_PRE_RUNNER_CUTOFF,
)
for f in migration_files:
match = MIGRATION_PATTERN.match(f.name)
if not match:
continue
num = int(match.group(1))
if num <= _PRE_RUNNER_CUTOFF:
cursor.execute(
"INSERT INTO _migration_history (filename) VALUES (%s) ON CONFLICT (filename) DO NOTHING",
(f.name,),
)
conn.commit()
logger.info("Seeded %d pre-existing migrations", _PRE_RUNNER_CUTOFF)