#!/usr/bin/env python3 """Apply Euro / Count / Percent number formatting to Finanzplan Excel files. Per-sheet defaults + per-row classification based on column-A labels: - Sheets with mostly EUR values: Dashboard, Umsatzerlöse, Personalkosten, Investitionen, Materialaufwand, Betriebliche Aufwendungen, Liquidität, GuV. - Kunden sheet: counts by default, with helper rows (rates) as percent. - Treiber sheet: row-by-row classification by label (no sheet default). - Skip: Formelübersicht (docs). Label patterns drive the classification: - 'EUR/', 'EUR ', 'Startpreis', 'Preis/Monat' → euro - 'rate', 'satz', 'quote', 'inflation', 'erhöhung', 'provision', '% vom', 'anteil', 'förderquote' → percent - 'headcount', 'anzahl', 'mitarbeiter je', 'neukunden/monat', 'neukunden ', 'bestandskunden', 'churn ' → count - 'faktor' → skip (it's a multiplier, leave default) Inputs sections (Personalkosten rows 5-24, Investitionen 5-29) are skipped because they contain mixed text/dates/numbers per row that would mis-format under a single classification. Usage: python3 pitch-deck/scripts/apply-number-formatting.py --dry-run python3 pitch-deck/scripts/apply-number-formatting.py """ from __future__ import annotations import argparse import shutil import sys from datetime import datetime from pathlib import Path from openpyxl import load_workbook EXPORTS = Path(__file__).resolve().parent.parent / "exports" EURO_FORMAT = '#,##0 "€";-#,##0 "€"' COUNT_FORMAT = '#,##0' PERCENT_FORMAT = '0.0%' SHEET_CONFIG: dict[str, dict] = { "Dashboard": {"default": "euro", "start_row": 4}, "Umsatzerlöse": {"default": "euro", "start_row": 4}, "Personalkosten": {"default": "euro", "start_row": 27}, "Investitionen": {"default": "euro", "start_row": 31}, "Materialaufwand": {"default": "euro", "start_row": 4}, "Betriebliche Aufwendungen":{"default": "euro", "start_row": 4}, "Liquidität": {"default": "euro", "start_row": 4}, "GuV": {"default": "euro", "start_row": 2}, "Kunden": {"default": "count", "start_row": 4}, "Treiber": {"default": None, "start_row": 1}, } SKIP_SHEETS = {"Formelübersicht"} FORMAT_MAP = {"euro": EURO_FORMAT, "count": COUNT_FORMAT, "percent": PERCENT_FORMAT} def classify_kunden_row(label: str | None) -> str: """Kunden sheet is always counts/rates regardless of stray 'EUR' substrings.""" if not label: return "skip" s = str(label).lower() if "rate" in s or "helper" in s: return "percent" return "count" def classify_label(label: str | None, sheet_default: str | None) -> str: if not label: return "skip" s = str(label).lower() # 1. Explicit Euro markers if any(k in s for k in ("eur/", " eur ", "eur ", " eur", "startpreis", "preis/monat", "preis (")): return "euro" # 2. Multipliers — skip (preserve existing format) if "faktor" in s: return "skip" # 3. Percent patterns. Use precise tokens to avoid substring traps: # 'satz' alone matches 'Umsatz' — use '-satz' / 'steuersatz' instead. # 'inflation' as substring matches 'Büromiete (+Inflation)' annotation — # require the label to START with 'inflation' (covers 'Inflation 2027' driver rows). if s.startswith("inflation"): return "percent" # Note: 'provision' alone is too broad — it matches the BA channel-partner # provision row whose value is in EUR. Use 'anteil' (matches Treiber's # 'Channel-Provision (Anteil vom Umsatz)') instead. if any(k in s for k in ("-rate", "rate ", "rate(", "-satz", "steuersatz", "quote", "erhöhung", "% vom", "anteil")): return "percent" # 4. Count patterns if any(k in s for k in ("headcount", "anzahl", "mitarbeiter je", "/monat starter", "/monat professional", "/monat enterprise")): return "count" # 5. Kunden-sheet customer-tracking rows if s.startswith(("neukunden", "churn ", "bestandskunden")): return "count" if sheet_default: return sheet_default return "skip" def cell_is_numeric_or_formula(value) -> bool: if value is None: return True if isinstance(value, (int, float)): return True if isinstance(value, str): return value.startswith("=") return False def format_sheet(ws, sheet_name: str) -> dict[str, int]: config = SHEET_CONFIG.get(sheet_name) if config is None: return {"euro": 0, "count": 0, "percent": 0, "skipped_rows": 0} start_row = config["start_row"] sheet_default = config["default"] stats = {"euro": 0, "count": 0, "percent": 0, "skipped_rows": 0} for r in range(start_row, ws.max_row + 1): label = ws.cell(row=r, column=1).value if sheet_name == "Kunden": kind = classify_kunden_row(label) else: kind = classify_label(label, sheet_default) if kind == "skip": stats["skipped_rows"] += 1 continue fmt = FORMAT_MAP[kind] # Treiber: value lives in col B only (apply to row's col B) if sheet_name == "Treiber": cell = ws.cell(row=r, column=2) if cell_is_numeric_or_formula(cell.value) and cell.value is not None: cell.number_format = fmt stats[kind] += 1 continue # Other sheets: apply across all value columns for c in range(2, ws.max_column + 1): cell = ws.cell(row=r, column=c) if not cell_is_numeric_or_formula(cell.value): continue cell.number_format = fmt stats[kind] += 1 return stats def process_file(path: Path, dry_run: bool) -> dict: wb = load_workbook(path) sheet_stats: dict[str, dict] = {} for sheet_name in wb.sheetnames: if sheet_name in SKIP_SHEETS: continue if sheet_name not in SHEET_CONFIG: continue sheet_stats[sheet_name] = format_sheet(wb[sheet_name], sheet_name) if not dry_run: wb.save(path) return sheet_stats def backup(path: Path) -> Path: ts = datetime.now().strftime("%Y%m%d-%H%M%S") bk = path.with_name(f"{path.stem}.BACKUP-pre-formatting-{ts}{path.suffix}") shutil.copy2(path, bk) return bk def main() -> int: ap = argparse.ArgumentParser(description=__doc__) ap.add_argument("--dry-run", action="store_true") ap.add_argument("--only", help="Process only this filename") ap.add_argument("--no-backup", action="store_true") args = ap.parse_args() files = sorted(EXPORTS.glob("Finanzplan-*.xlsx")) files = [f for f in files if "BACKUP" not in f.name] if args.only: files = [f for f in files if f.name == args.only] for path in files: if not args.dry_run and not args.no_backup: bk = backup(path) print(f" ✓ backup: {bk.name}") stats = process_file(path, dry_run=args.dry_run) print(f"\n === {path.name} ===") for sheet, s in stats.items(): total = s["euro"] + s["count"] + s["percent"] if total > 0 or s["skipped_rows"] > 0: parts = [] if s["euro"]: parts.append(f"€:{s['euro']}") if s["count"]: parts.append(f"#:{s['count']}") if s["percent"]: parts.append(f"%:{s['percent']}") if s["skipped_rows"]: parts.append(f"skip:{s['skipped_rows']}") print(f" {sheet}: {' '.join(parts)}") return 0 if __name__ == "__main__": sys.exit(main())