#!/usr/bin/env python3 """Clean up Finanzplan Excel files: Strip 'category — ' prefix from column-A labels in 'Betriebliche Aufwendungen' and 'Liquidität' sheets. Runs against ALL Finanzplan-*.xlsx files in pitch-deck/exports/ (except BACKUPs). Creates a single timestamped backup per file before modification. Usage: python3 pitch-deck/scripts/cleanup-finanzplan-labels.py python3 pitch-deck/scripts/cleanup-finanzplan-labels.py --dry-run python3 pitch-deck/scripts/cleanup-finanzplan-labels.py --only Finanzplan-Wandeldarlehen-400k.xlsx """ from __future__ import annotations import argparse import shutil import sys from datetime import datetime from pathlib import Path from openpyxl import load_workbook EXPORTS = Path(__file__).resolve().parent.parent / "exports" def strip_prefix(value): """Drop a lowercase 'kategorie — ' prefix from a label.""" if not value: return value s = str(value) if " — " not in s: return s prefix, rest = s.split(" — ", 1) # Only strip if prefix looks like a single-word lowercase category tag if prefix.islower() and prefix.replace("/", "").replace("-", "").isalpha(): return rest return s def clean_labels(ws, start_row: int = 4) -> int: n = 0 for r in range(start_row, ws.max_row + 1): before = ws.cell(row=r, column=1).value after = strip_prefix(before) if after != before: ws.cell(row=r, column=1).value = after n += 1 return n def process_file(path: Path, dry_run: bool = False) -> dict: wb = load_workbook(path) stats = {"liq_labels": 0, "ba_labels": 0} if "Liquidität" in wb.sheetnames: stats["liq_labels"] = clean_labels(wb["Liquidität"], start_row=4) if "Betriebliche Aufwendungen" in wb.sheetnames: stats["ba_labels"] = clean_labels(wb["Betriebliche Aufwendungen"], start_row=4) if not dry_run: wb.save(path) return stats def backup(path: Path, tag: str) -> Path: """Make a single timestamped backup. Returns the backup path.""" ts = datetime.now().strftime("%Y%m%d-%H%M%S") bk = path.with_name(f"{path.stem}.BACKUP-{tag}-{ts}{path.suffix}") shutil.copy2(path, bk) return bk def main() -> int: ap = argparse.ArgumentParser(description=__doc__) ap.add_argument("--dry-run", action="store_true", help="Show what would change, do not write") ap.add_argument("--only", help="Process only a specific filename") ap.add_argument("--no-backup", action="store_true", help="Skip backup (default: backup once)") args = ap.parse_args() files = sorted(EXPORTS.glob("Finanzplan-*.xlsx")) files = [f for f in files if "BACKUP" not in f.name] if args.only: files = [f for f in files if f.name == args.only] if not files: print("No files matched", file=sys.stderr) return 1 print(f"{'DRY-RUN — ' if args.dry_run else ''}{len(files)} file(s) to process") for path in files: if not args.dry_run and not args.no_backup: bk = backup(path, "pre-label-cleanup") print(f" ✓ backup: {bk.name}") stats = process_file(path, dry_run=args.dry_run) print( f" {path.name}: " f"Liq labels={stats['liq_labels']} BA labels={stats['ba_labels']}" ) return 0 if __name__ == "__main__": sys.exit(main())