""" Konvertiert gerendertes Markdown in eine .docx-Datei mittels python-docx. Unterstuetzte Markdown-Elemente: - # / ## / ### / #### / ##### Headings - **bold** und _italic_ inline - Tabellen (Pipe-Syntax) - Listen mit - oder * oder Ziffer.) - Horizontale Linien --- - Code-Inline `code` Bewusst minimal — fuer rechtliche Dokumente brauchen wir keine Bilder/Embeds. """ from __future__ import annotations import io import re from typing import Any, Optional from docx import Document from docx.shared import Pt from docx.enum.text import WD_ALIGN_PARAGRAPH HEADING_RE = re.compile(r"^(#{1,5})\s+(.+)$") HR_RE = re.compile(r"^[-_*]{3,}\s*$") LIST_BULLET_RE = re.compile(r"^(\s*)([-*+])\s+(.+)$") LIST_NUMBER_RE = re.compile(r"^(\s*)(\d+)[\.\)]\s+(.+)$") TABLE_ROW_RE = re.compile(r"^\|(.+)\|\s*$") TABLE_SEP_RE = re.compile(r"^\|[\s\-:|]+\|\s*$") INLINE_BOLD = re.compile(r"\*\*([^*]+)\*\*") # Italic: nur _wort_ wenn von Whitespace/Satzzeichen umgeben — verhindert dass # snake_case-Variablen wie ESKALATION_TAGE_INTERN als Italic interpretiert werden. INLINE_ITALIC = re.compile( r"(? None: """Parse inline-Formatierung und fuege Runs hinzu.""" pos = 0 tokens: list[tuple[str, str]] = [] while pos < len(text): m_bold = INLINE_BOLD.search(text, pos) m_code = INLINE_CODE.search(text, pos) m_italic = INLINE_ITALIC.search(text, pos) candidates = [m for m in (m_bold, m_code, m_italic) if m] if not candidates: tokens.append(("plain", text[pos:])) break first = min(candidates, key=lambda m: m.start()) if first.start() > pos: tokens.append(("plain", text[pos:first.start()])) if first is m_bold: tokens.append(("bold", first.group(1))) elif first is m_code: tokens.append(("code", first.group(1))) elif m_italic is not None: content = m_italic.group(1) or m_italic.group(2) tokens.append(("italic", content)) pos = first.end() for kind, content in tokens: run = paragraph.add_run(content) if kind == "bold": run.bold = True elif kind == "italic": run.italic = True elif kind == "code": run.font.name = "Courier New" run.font.size = Pt(10) def _parse_table(lines: list[str], start: int) -> tuple[list[list[str]], int]: """Parst Markdown-Tabelle. Returns (rows, next_line_index).""" rows: list[list[str]] = [] i = start while i < len(lines): line = lines[i].rstrip() if not TABLE_ROW_RE.match(line) and not TABLE_SEP_RE.match(line): break if TABLE_SEP_RE.match(line): i += 1 continue cells = [c.strip() for c in line.strip("|").split("|")] rows.append(cells) i += 1 return rows, i def _add_table(doc: Any, rows: list[list[str]]) -> None: if not rows: return ncols = max(len(r) for r in rows) table = doc.add_table(rows=len(rows), cols=ncols) table.style = "Light Grid" for r_idx, row in enumerate(rows): for c_idx, cell_text in enumerate(row): if c_idx < ncols: cell = table.rows[r_idx].cells[c_idx] cell.text = "" p = cell.paragraphs[0] _add_runs(p, cell_text) if r_idx == 0: for run in p.runs: run.bold = True def markdown_to_docx_bytes(markdown_text: str, title: Optional[str] = None) -> bytes: """Konvertiert Markdown nach DOCX und returns die Bytes.""" doc = Document() # Basis-Style style = doc.styles["Normal"] style.font.name = "Calibri" style.font.size = Pt(11) if title: h = doc.add_heading(title, level=0) h.alignment = WD_ALIGN_PARAGRAPH.LEFT lines = markdown_text.split("\n") i = 0 while i < len(lines): line = lines[i].rstrip() if not line.strip(): i += 1 continue # Heading h_match = HEADING_RE.match(line) if h_match: level = len(h_match.group(1)) text = h_match.group(2) heading = doc.add_heading(level=min(level, 4)) _add_runs(heading, text) i += 1 continue # Horizontal Rule if HR_RE.match(line): doc.add_paragraph("─" * 60) i += 1 continue # Tabelle if TABLE_ROW_RE.match(line): rows, i = _parse_table(lines, i) _add_table(doc, rows) doc.add_paragraph() continue # List Bullet b_match = LIST_BULLET_RE.match(line) if b_match: p = doc.add_paragraph(style="List Bullet") _add_runs(p, b_match.group(3)) i += 1 continue # List Number n_match = LIST_NUMBER_RE.match(line) if n_match: p = doc.add_paragraph(style="List Number") _add_runs(p, n_match.group(3)) i += 1 continue # Sonst: normaler Paragraph p = doc.add_paragraph() _add_runs(p, line) i += 1 buf = io.BytesIO() doc.save(buf) return buf.getvalue()