Files
breakpilot-compliance/scripts/apply_block_markers_022.py
Benjamin Admin 94b6b2b05b
All checks were successful
CI / go-lint (push) Has been skipped
CI / python-lint (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / test-go-ai-compliance (push) Successful in 36s
CI / test-python-backend-compliance (push) Successful in 32s
CI / test-python-document-crawler (push) Successful in 23s
CI / test-python-dsms-gateway (push) Successful in 19s
fix: Migration 022 — Regex an echte Template-Struktur angepasst (bold-headings)
2026-03-04 13:42:30 +01:00

164 lines
6.8 KiB
Python

#!/usr/bin/env python3
"""
Migration 022: Insert [BLOCK:ID]...[/BLOCK:ID] markers into DB templates.
Targets:
- NDA DE + NDA EN: wraps Vertragsstrafe section → NDA_PENALTY_BLOCK
- Cookie Banner DE: wraps Analyse section → COOKIE_ANALYTICS_BLOCK
wraps Marketing section → COOKIE_MARKETING_BLOCK
Usage:
python3 apply_block_markers_022.py
Or via Docker:
docker cp apply_block_markers_022.py bp-compliance-backend:/tmp/
docker exec bp-compliance-backend python3 /tmp/apply_block_markers_022.py
"""
import os
import re
import sys
# Allow running inside container where the app is at /app
sys.path.insert(0, '/app')
from sqlalchemy import create_engine, text
# ─────────────────────────────────────────────────────────────────────────────
# DB connection
# ─────────────────────────────────────────────────────────────────────────────
DATABASE_URL = os.environ.get(
'DATABASE_URL',
'postgresql://compliance_user:compliance_pass@bp-core-postgres:5432/breakpilot_db'
)
engine = create_engine(DATABASE_URL)
# ─────────────────────────────────────────────────────────────────────────────
# Marker helpers
# ─────────────────────────────────────────────────────────────────────────────
def wrap_block(content: str, block_id: str, pattern: str, flags: int = re.MULTILINE | re.DOTALL) -> tuple[str, int]:
"""
Finds the first match of `pattern` in `content` and wraps it with
[BLOCK:block_id]...[/BLOCK:block_id].
Returns (new_content, match_count).
"""
match_count = 0
def replacer(m: re.Match) -> str:
nonlocal match_count
match_count += 1
matched = m.group(0)
# Avoid double-wrapping
if f'[BLOCK:{block_id}]' in matched:
return matched
return f'[BLOCK:{block_id}]\n{matched}[/BLOCK:{block_id}]\n'
new_content = re.sub(pattern, replacer, content, flags=flags)
return new_content, match_count
# ─────────────────────────────────────────────────────────────────────────────
# Template-specific transformations
# ─────────────────────────────────────────────────────────────────────────────
def apply_nda_penalty_block(content: str) -> tuple[str, int]:
"""Wraps the Vertragsstrafe section in NDA templates.
Matches bold-style heading: **N. Vertragsstrafe ...**
up to the next bold-numbered section or end of string.
"""
pattern = r'(\*\*\d+\.\s+[^\n]*[Vv]ertragsstrafe[^\n]*\*\*\n)(.*?)(?=\*\*\d+\.|\Z)'
return wrap_block(content, 'NDA_PENALTY_BLOCK', pattern)
def apply_cookie_analytics_block(content: str) -> tuple[str, int]:
"""Wraps the Analyse section in Cookie Banner templates.
Matches: **Abschnitt „Analyse":**
up to the next **Abschnitt or end of string.
"""
pattern = r'(\*\*Abschnitt\s+[^\n]*Analyse[^\n]*\*\*[^\n]*\n)(.*?)(?=\*\*Abschnitt\s+[^\n]*Marketing|\Z)'
return wrap_block(content, 'COOKIE_ANALYTICS_BLOCK', pattern)
def apply_cookie_marketing_block(content: str) -> tuple[str, int]:
"""Wraps the Marketing section in Cookie Banner templates.
Matches: **Abschnitt „Marketing":**
up to the next double-newline section divider or --- or end.
"""
pattern = r'(\*\*Abschnitt\s+[^\n]*Marketing[^\n]*\*\*[^\n]*\n)(.*?)(?=\n---|\n\*\*[A-Z]\)|\Z)'
return wrap_block(content, 'COOKIE_MARKETING_BLOCK', pattern)
# ─────────────────────────────────────────────────────────────────────────────
# Main
# ─────────────────────────────────────────────────────────────────────────────
TARGETS = [
# (document_type_filter, language_filter, list of transform functions)
('nda', 'de', [apply_nda_penalty_block]),
('nda', 'en', [apply_nda_penalty_block]),
('cookie_banner', 'de', [apply_cookie_analytics_block, apply_cookie_marketing_block]),
]
def main() -> None:
print('=== Migration 022: Block Markers ===\n')
with engine.begin() as conn:
for doc_type, lang, transforms in TARGETS:
rows = conn.execute(
text(
'SELECT id, title, content FROM public.compliance_legal_templates '
'WHERE document_type = :doc_type AND language = :lang'
),
{'doc_type': doc_type, 'lang': lang}
).fetchall()
if not rows:
print(f'[SKIP] No templates found for {doc_type}/{lang}')
continue
for row in rows:
tid, title, content = row.id, row.title, row.content
if content is None:
print(f'[SKIP] {title} (id={tid}) — content is NULL')
continue
original_len = len(content)
new_content = content
total_matches = 0
for transform in transforms:
new_content, match_count = transform(new_content)
total_matches += match_count
if new_content == content:
print(f'[NOOP] {title} ({doc_type}/{lang}) — no changes')
continue
conn.execute(
text(
'UPDATE public.compliance_legal_templates '
'SET content = :content, updated_at = NOW() '
'WHERE id = :id'
),
{'content': new_content, 'id': tid}
)
print(
f'[OK] {title} ({doc_type}/{lang})'
f' | {original_len}{len(new_content)} chars'
f' | {total_matches} block(s) wrapped'
)
print('\n=== Done ===')
if __name__ == '__main__':
main()