All checks were successful
CI / go-lint (push) Has been skipped
CI / python-lint (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / test-go-ai-compliance (push) Successful in 36s
CI / test-python-backend-compliance (push) Successful in 32s
CI / test-python-document-crawler (push) Successful in 23s
CI / test-python-dsms-gateway (push) Successful in 19s
164 lines
6.8 KiB
Python
164 lines
6.8 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
Migration 022: Insert [BLOCK:ID]...[/BLOCK:ID] markers into DB templates.
|
|
|
|
Targets:
|
|
- NDA DE + NDA EN: wraps Vertragsstrafe section → NDA_PENALTY_BLOCK
|
|
- Cookie Banner DE: wraps Analyse section → COOKIE_ANALYTICS_BLOCK
|
|
wraps Marketing section → COOKIE_MARKETING_BLOCK
|
|
|
|
Usage:
|
|
python3 apply_block_markers_022.py
|
|
|
|
Or via Docker:
|
|
docker cp apply_block_markers_022.py bp-compliance-backend:/tmp/
|
|
docker exec bp-compliance-backend python3 /tmp/apply_block_markers_022.py
|
|
"""
|
|
|
|
import os
|
|
import re
|
|
import sys
|
|
|
|
# Allow running inside container where the app is at /app
|
|
sys.path.insert(0, '/app')
|
|
|
|
from sqlalchemy import create_engine, text
|
|
|
|
# ─────────────────────────────────────────────────────────────────────────────
|
|
# DB connection
|
|
# ─────────────────────────────────────────────────────────────────────────────
|
|
|
|
DATABASE_URL = os.environ.get(
|
|
'DATABASE_URL',
|
|
'postgresql://compliance_user:compliance_pass@bp-core-postgres:5432/breakpilot_db'
|
|
)
|
|
|
|
engine = create_engine(DATABASE_URL)
|
|
|
|
|
|
# ─────────────────────────────────────────────────────────────────────────────
|
|
# Marker helpers
|
|
# ─────────────────────────────────────────────────────────────────────────────
|
|
|
|
def wrap_block(content: str, block_id: str, pattern: str, flags: int = re.MULTILINE | re.DOTALL) -> tuple[str, int]:
|
|
"""
|
|
Finds the first match of `pattern` in `content` and wraps it with
|
|
[BLOCK:block_id]...[/BLOCK:block_id].
|
|
|
|
Returns (new_content, match_count).
|
|
"""
|
|
match_count = 0
|
|
|
|
def replacer(m: re.Match) -> str:
|
|
nonlocal match_count
|
|
match_count += 1
|
|
matched = m.group(0)
|
|
# Avoid double-wrapping
|
|
if f'[BLOCK:{block_id}]' in matched:
|
|
return matched
|
|
return f'[BLOCK:{block_id}]\n{matched}[/BLOCK:{block_id}]\n'
|
|
|
|
new_content = re.sub(pattern, replacer, content, flags=flags)
|
|
return new_content, match_count
|
|
|
|
|
|
# ─────────────────────────────────────────────────────────────────────────────
|
|
# Template-specific transformations
|
|
# ─────────────────────────────────────────────────────────────────────────────
|
|
|
|
def apply_nda_penalty_block(content: str) -> tuple[str, int]:
|
|
"""Wraps the Vertragsstrafe section in NDA templates.
|
|
|
|
Matches bold-style heading: **N. Vertragsstrafe ...**
|
|
up to the next bold-numbered section or end of string.
|
|
"""
|
|
pattern = r'(\*\*\d+\.\s+[^\n]*[Vv]ertragsstrafe[^\n]*\*\*\n)(.*?)(?=\*\*\d+\.|\Z)'
|
|
return wrap_block(content, 'NDA_PENALTY_BLOCK', pattern)
|
|
|
|
|
|
def apply_cookie_analytics_block(content: str) -> tuple[str, int]:
|
|
"""Wraps the Analyse section in Cookie Banner templates.
|
|
|
|
Matches: **Abschnitt „Analyse":**
|
|
up to the next **Abschnitt or end of string.
|
|
"""
|
|
pattern = r'(\*\*Abschnitt\s+[^\n]*Analyse[^\n]*\*\*[^\n]*\n)(.*?)(?=\*\*Abschnitt\s+[^\n]*Marketing|\Z)'
|
|
return wrap_block(content, 'COOKIE_ANALYTICS_BLOCK', pattern)
|
|
|
|
|
|
def apply_cookie_marketing_block(content: str) -> tuple[str, int]:
|
|
"""Wraps the Marketing section in Cookie Banner templates.
|
|
|
|
Matches: **Abschnitt „Marketing":**
|
|
up to the next double-newline section divider or --- or end.
|
|
"""
|
|
pattern = r'(\*\*Abschnitt\s+[^\n]*Marketing[^\n]*\*\*[^\n]*\n)(.*?)(?=\n---|\n\*\*[A-Z]\)|\Z)'
|
|
return wrap_block(content, 'COOKIE_MARKETING_BLOCK', pattern)
|
|
|
|
|
|
# ─────────────────────────────────────────────────────────────────────────────
|
|
# Main
|
|
# ─────────────────────────────────────────────────────────────────────────────
|
|
|
|
TARGETS = [
|
|
# (document_type_filter, language_filter, list of transform functions)
|
|
('nda', 'de', [apply_nda_penalty_block]),
|
|
('nda', 'en', [apply_nda_penalty_block]),
|
|
('cookie_banner', 'de', [apply_cookie_analytics_block, apply_cookie_marketing_block]),
|
|
]
|
|
|
|
def main() -> None:
|
|
print('=== Migration 022: Block Markers ===\n')
|
|
|
|
with engine.begin() as conn:
|
|
for doc_type, lang, transforms in TARGETS:
|
|
rows = conn.execute(
|
|
text(
|
|
'SELECT id, title, content FROM public.compliance_legal_templates '
|
|
'WHERE document_type = :doc_type AND language = :lang'
|
|
),
|
|
{'doc_type': doc_type, 'lang': lang}
|
|
).fetchall()
|
|
|
|
if not rows:
|
|
print(f'[SKIP] No templates found for {doc_type}/{lang}')
|
|
continue
|
|
|
|
for row in rows:
|
|
tid, title, content = row.id, row.title, row.content
|
|
if content is None:
|
|
print(f'[SKIP] {title} (id={tid}) — content is NULL')
|
|
continue
|
|
|
|
original_len = len(content)
|
|
new_content = content
|
|
total_matches = 0
|
|
|
|
for transform in transforms:
|
|
new_content, match_count = transform(new_content)
|
|
total_matches += match_count
|
|
|
|
if new_content == content:
|
|
print(f'[NOOP] {title} ({doc_type}/{lang}) — no changes')
|
|
continue
|
|
|
|
conn.execute(
|
|
text(
|
|
'UPDATE public.compliance_legal_templates '
|
|
'SET content = :content, updated_at = NOW() '
|
|
'WHERE id = :id'
|
|
),
|
|
{'content': new_content, 'id': tid}
|
|
)
|
|
print(
|
|
f'[OK] {title} ({doc_type}/{lang})'
|
|
f' | {original_len} → {len(new_content)} chars'
|
|
f' | {total_matches} block(s) wrapped'
|
|
)
|
|
|
|
print('\n=== Done ===')
|
|
|
|
|
|
if __name__ == '__main__':
|
|
main()
|