#!/usr/bin/env python3 """ Migration 022: Insert [BLOCK:ID]...[/BLOCK:ID] markers into DB templates. Targets: - NDA DE + NDA EN: wraps Vertragsstrafe section → NDA_PENALTY_BLOCK - Cookie Banner DE: wraps Analyse section → COOKIE_ANALYTICS_BLOCK wraps Marketing section → COOKIE_MARKETING_BLOCK Usage: python3 apply_block_markers_022.py Or via Docker: docker cp apply_block_markers_022.py bp-compliance-backend:/tmp/ docker exec bp-compliance-backend python3 /tmp/apply_block_markers_022.py """ import os import re import sys # Allow running inside container where the app is at /app sys.path.insert(0, '/app') from sqlalchemy import create_engine, text # ───────────────────────────────────────────────────────────────────────────── # DB connection # ───────────────────────────────────────────────────────────────────────────── DATABASE_URL = os.environ.get( 'DATABASE_URL', 'postgresql://compliance_user:compliance_pass@bp-core-postgres:5432/breakpilot_db' ) engine = create_engine(DATABASE_URL) # ───────────────────────────────────────────────────────────────────────────── # Marker helpers # ───────────────────────────────────────────────────────────────────────────── def wrap_block(content: str, block_id: str, pattern: str, flags: int = re.MULTILINE | re.DOTALL) -> tuple[str, int]: """ Finds the first match of `pattern` in `content` and wraps it with [BLOCK:block_id]...[/BLOCK:block_id]. Returns (new_content, match_count). """ match_count = 0 def replacer(m: re.Match) -> str: nonlocal match_count match_count += 1 matched = m.group(0) # Avoid double-wrapping if f'[BLOCK:{block_id}]' in matched: return matched return f'[BLOCK:{block_id}]\n{matched}[/BLOCK:{block_id}]\n' new_content = re.sub(pattern, replacer, content, flags=flags) return new_content, match_count # ───────────────────────────────────────────────────────────────────────────── # Template-specific transformations # ───────────────────────────────────────────────────────────────────────────── def apply_nda_penalty_block(content: str) -> tuple[str, int]: """Wraps the Vertragsstrafe section in NDA templates.""" # Match: section header containing "Vertragsstrafe" up to the next ## section or end pattern = r'(^## \d+[\.:]?\s+[^\n]*[Vv]ertragsstrafe[^\n]*\n)(.*?)(?=^## \d+|\Z)' return wrap_block(content, 'NDA_PENALTY_BLOCK', pattern) def apply_cookie_analytics_block(content: str) -> tuple[str, int]: """Wraps the ### Analyse section in Cookie Banner templates.""" pattern = r'(^### Analyse\b[^\n]*\n)(.*?)(?=^###|\Z)' return wrap_block(content, 'COOKIE_ANALYTICS_BLOCK', pattern) def apply_cookie_marketing_block(content: str) -> tuple[str, int]: """Wraps the ### Marketing section in Cookie Banner templates.""" pattern = r'(^### Marketing\b[^\n]*\n)(.*?)(?=^###|\Z)' return wrap_block(content, 'COOKIE_MARKETING_BLOCK', pattern) # ───────────────────────────────────────────────────────────────────────────── # Main # ───────────────────────────────────────────────────────────────────────────── TARGETS = [ # (document_type_filter, language_filter, list of transform functions) ('nda', 'de', [apply_nda_penalty_block]), ('nda', 'en', [apply_nda_penalty_block]), ('cookie_banner', 'de', [apply_cookie_analytics_block, apply_cookie_marketing_block]), ] def main() -> None: print('=== Migration 022: Block Markers ===\n') with engine.begin() as conn: for doc_type, lang, transforms in TARGETS: rows = conn.execute( text( 'SELECT id, title, content FROM public.compliance_legal_templates ' 'WHERE document_type = :doc_type AND language = :lang' ), {'doc_type': doc_type, 'lang': lang} ).fetchall() if not rows: print(f'[SKIP] No templates found for {doc_type}/{lang}') continue for row in rows: tid, title, content = row.id, row.title, row.content if content is None: print(f'[SKIP] {title} (id={tid}) — content is NULL') continue original_len = len(content) new_content = content total_matches = 0 for transform in transforms: new_content, match_count = transform(new_content) total_matches += match_count if new_content == content: print(f'[NOOP] {title} ({doc_type}/{lang}) — no changes') continue conn.execute( text( 'UPDATE public.compliance_legal_templates ' 'SET content = :content, updated_at = NOW() ' 'WHERE id = :id' ), {'content': new_content, 'id': tid} ) print( f'[OK] {title} ({doc_type}/{lang})' f' | {original_len} → {len(new_content)} chars' f' | {total_matches} block(s) wrapped' ) print('\n=== Done ===') if __name__ == '__main__': main()