Initial commit: breakpilot-core - Shared Infrastructure

Docker Compose with 24+ services: - PostgreSQL (PostGIS), Valkey, MinIO, Qdrant - Vault (PKI/TLS), Nginx (Reverse Proxy) - Backend Core API, Consent Service, Billing Service - RAG Service, Embedding Service - Gitea, Woodpecker CI/CD - Night Scheduler, Health Aggregator - Jitsi (Web/XMPP/JVB/Jicofo), Mailpit Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-11 23:47:13 +01:00
commit ad111d5e69
244 changed files with 84288 additions and 0 deletions
@@ -0,0 +1,22 @@
+# Backend Services Module
+# Shared services for PDF generation, file processing, and more
+
+# PDFService requires WeasyPrint which needs system libraries (libgobject, etc.)
+# Make import optional for environments without these dependencies (e.g., CI)
+try:
+    from .pdf_service import PDFService
+    _pdf_available = True
+except (ImportError, OSError) as e:
+    PDFService = None  # type: ignore
+    _pdf_available = False
+
+# FileProcessor requires OpenCV which needs libGL.so.1
+# Make import optional for CI environments
+try:
+    from .file_processor import FileProcessor
+    _file_processor_available = True
+except (ImportError, OSError) as e:
+    FileProcessor = None  # type: ignore
+    _file_processor_available = False
+
+__all__ = ["PDFService", "FileProcessor"]
@@ -0,0 +1,563 @@
+"""
+File Processor Service - Dokumentenverarbeitung für BreakPilot.
+
+Shared Service für:
+- OCR (Optical Character Recognition) für Handschrift und gedruckten Text
+- PDF-Parsing und Textextraktion
+- Bildverarbeitung und -optimierung
+- DOCX/DOC Textextraktion
+
+Verwendet:
+- PaddleOCR für deutsche Handschrift
+- PyMuPDF für PDF-Verarbeitung
+- python-docx für DOCX-Dateien
+- OpenCV für Bildvorverarbeitung
+"""
+
+import logging
+import os
+import io
+import base64
+from pathlib import Path
+from typing import Optional, List, Dict, Any, Tuple, Union
+from dataclasses import dataclass
+from enum import Enum
+
+import cv2
+import numpy as np
+from PIL import Image
+
+logger = logging.getLogger(__name__)
+
+
+class FileType(str, Enum):
+    """Unterstützte Dateitypen."""
+    PDF = "pdf"
+    IMAGE = "image"
+    DOCX = "docx"
+    DOC = "doc"
+    TXT = "txt"
+    UNKNOWN = "unknown"
+
+
+class ProcessingMode(str, Enum):
+    """Verarbeitungsmodi."""
+    OCR_HANDWRITING = "ocr_handwriting"  # Handschrifterkennung
+    OCR_PRINTED = "ocr_printed"          # Gedruckter Text
+    TEXT_EXTRACT = "text_extract"        # Textextraktion (PDF/DOCX)
+    MIXED = "mixed"                       # Kombiniert OCR + Textextraktion
+
+
+@dataclass
+class ProcessedRegion:
+    """Ein erkannter Textbereich."""
+    text: str
+    confidence: float
+    bbox: Tuple[int, int, int, int]  # x1, y1, x2, y2
+    page: int = 1
+
+
+@dataclass
+class ProcessingResult:
+    """Ergebnis der Dokumentenverarbeitung."""
+    text: str
+    confidence: float
+    regions: List[ProcessedRegion]
+    page_count: int
+    file_type: FileType
+    processing_mode: ProcessingMode
+    metadata: Dict[str, Any]
+
+
+class FileProcessor:
+    """
+    Zentrale Dokumentenverarbeitung für BreakPilot.
+
+    Unterstützt:
+    - Handschrifterkennung (OCR) für Klausuren
+    - Textextraktion aus PDFs
+    - DOCX/DOC Verarbeitung
+    - Bildvorverarbeitung für bessere OCR-Ergebnisse
+    """
+
+    def __init__(self, ocr_lang: str = "de", use_gpu: bool = False):
+        """
+        Initialisiert den File Processor.
+
+        Args:
+            ocr_lang: Sprache für OCR (default: "de" für Deutsch)
+            use_gpu: GPU für OCR nutzen (beschleunigt Verarbeitung)
+        """
+        self.ocr_lang = ocr_lang
+        self.use_gpu = use_gpu
+        self._ocr_engine = None
+
+        logger.info(f"FileProcessor initialized (lang={ocr_lang}, gpu={use_gpu})")
+
+    @property
+    def ocr_engine(self):
+        """Lazy-Loading des OCR-Engines."""
+        if self._ocr_engine is None:
+            self._ocr_engine = self._init_ocr_engine()
+        return self._ocr_engine
+
+    def _init_ocr_engine(self):
+        """Initialisiert PaddleOCR oder Fallback."""
+        try:
+            from paddleocr import PaddleOCR
+            return PaddleOCR(
+                use_angle_cls=True,
+                lang='german',  # Deutsch
+                use_gpu=self.use_gpu,
+                show_log=False
+            )
+        except ImportError:
+            logger.warning("PaddleOCR nicht installiert - verwende Fallback")
+            return None
+
+    def detect_file_type(self, file_path: str = None, file_bytes: bytes = None) -> FileType:
+        """
+        Erkennt den Dateityp.
+
+        Args:
+            file_path: Pfad zur Datei
+            file_bytes: Dateiinhalt als Bytes
+
+        Returns:
+            FileType enum
+        """
+        if file_path:
+            ext = Path(file_path).suffix.lower()
+            if ext == ".pdf":
+                return FileType.PDF
+            elif ext in [".jpg", ".jpeg", ".png", ".bmp", ".tiff", ".gif"]:
+                return FileType.IMAGE
+            elif ext == ".docx":
+                return FileType.DOCX
+            elif ext == ".doc":
+                return FileType.DOC
+            elif ext == ".txt":
+                return FileType.TXT
+
+        if file_bytes:
+            # Magic number detection
+            if file_bytes[:4] == b'%PDF':
+                return FileType.PDF
+            elif file_bytes[:8] == b'\x89PNG\r\n\x1a\n':
+                return FileType.IMAGE
+            elif file_bytes[:2] in [b'\xff\xd8', b'BM']:  # JPEG, BMP
+                return FileType.IMAGE
+            elif file_bytes[:4] == b'PK\x03\x04':  # ZIP (DOCX)
+                return FileType.DOCX
+
+        return FileType.UNKNOWN
+
+    def process(
+        self,
+        file_path: str = None,
+        file_bytes: bytes = None,
+        mode: ProcessingMode = ProcessingMode.MIXED
+    ) -> ProcessingResult:
+        """
+        Verarbeitet ein Dokument.
+
+        Args:
+            file_path: Pfad zur Datei
+            file_bytes: Dateiinhalt als Bytes
+            mode: Verarbeitungsmodus
+
+        Returns:
+            ProcessingResult mit extrahiertem Text und Metadaten
+        """
+        if not file_path and not file_bytes:
+            raise ValueError("Entweder file_path oder file_bytes muss angegeben werden")
+
+        file_type = self.detect_file_type(file_path, file_bytes)
+        logger.info(f"Processing file of type: {file_type}")
+
+        if file_type == FileType.PDF:
+            return self._process_pdf(file_path, file_bytes, mode)
+        elif file_type == FileType.IMAGE:
+            return self._process_image(file_path, file_bytes, mode)
+        elif file_type == FileType.DOCX:
+            return self._process_docx(file_path, file_bytes)
+        elif file_type == FileType.TXT:
+            return self._process_txt(file_path, file_bytes)
+        else:
+            raise ValueError(f"Nicht unterstützter Dateityp: {file_type}")
+
+    def _process_pdf(
+        self,
+        file_path: str = None,
+        file_bytes: bytes = None,
+        mode: ProcessingMode = ProcessingMode.MIXED
+    ) -> ProcessingResult:
+        """Verarbeitet PDF-Dateien."""
+        try:
+            import fitz  # PyMuPDF
+        except ImportError:
+            logger.warning("PyMuPDF nicht installiert - versuche Fallback")
+            # Fallback: PDF als Bild behandeln
+            return self._process_image(file_path, file_bytes, mode)
+
+        if file_bytes:
+            doc = fitz.open(stream=file_bytes, filetype="pdf")
+        else:
+            doc = fitz.open(file_path)
+
+        all_text = []
+        all_regions = []
+        total_confidence = 0.0
+        region_count = 0
+
+        for page_num, page in enumerate(doc, start=1):
+            # Erst versuchen Text direkt zu extrahieren
+            page_text = page.get_text()
+
+            if page_text.strip() and mode != ProcessingMode.OCR_HANDWRITING:
+                # PDF enthält Text (nicht nur Bilder)
+                all_text.append(page_text)
+                all_regions.append(ProcessedRegion(
+                    text=page_text,
+                    confidence=1.0,
+                    bbox=(0, 0, int(page.rect.width), int(page.rect.height)),
+                    page=page_num
+                ))
+                total_confidence += 1.0
+                region_count += 1
+            else:
+                # Seite als Bild rendern und OCR anwenden
+                pix = page.get_pixmap(matrix=fitz.Matrix(2, 2))  # 2x Auflösung
+                img_bytes = pix.tobytes("png")
+                img = Image.open(io.BytesIO(img_bytes))
+
+                ocr_result = self._ocr_image(img)
+                all_text.append(ocr_result["text"])
+
+                for region in ocr_result["regions"]:
+                    region.page = page_num
+                    all_regions.append(region)
+                    total_confidence += region.confidence
+                    region_count += 1
+
+        doc.close()
+
+        avg_confidence = total_confidence / region_count if region_count > 0 else 0.0
+
+        return ProcessingResult(
+            text="\n\n".join(all_text),
+            confidence=avg_confidence,
+            regions=all_regions,
+            page_count=len(doc) if hasattr(doc, '__len__') else 1,
+            file_type=FileType.PDF,
+            processing_mode=mode,
+            metadata={"source": file_path or "bytes"}
+        )
+
+    def _process_image(
+        self,
+        file_path: str = None,
+        file_bytes: bytes = None,
+        mode: ProcessingMode = ProcessingMode.MIXED
+    ) -> ProcessingResult:
+        """Verarbeitet Bilddateien."""
+        if file_bytes:
+            img = Image.open(io.BytesIO(file_bytes))
+        else:
+            img = Image.open(file_path)
+
+        # Bildvorverarbeitung
+        processed_img = self._preprocess_image(img)
+
+        # OCR
+        ocr_result = self._ocr_image(processed_img)
+
+        return ProcessingResult(
+            text=ocr_result["text"],
+            confidence=ocr_result["confidence"],
+            regions=ocr_result["regions"],
+            page_count=1,
+            file_type=FileType.IMAGE,
+            processing_mode=mode,
+            metadata={
+                "source": file_path or "bytes",
+                "image_size": img.size
+            }
+        )
+
+    def _process_docx(
+        self,
+        file_path: str = None,
+        file_bytes: bytes = None
+    ) -> ProcessingResult:
+        """Verarbeitet DOCX-Dateien."""
+        try:
+            from docx import Document
+        except ImportError:
+            raise ImportError("python-docx ist nicht installiert")
+
+        if file_bytes:
+            doc = Document(io.BytesIO(file_bytes))
+        else:
+            doc = Document(file_path)
+
+        paragraphs = []
+        for para in doc.paragraphs:
+            if para.text.strip():
+                paragraphs.append(para.text)
+
+        # Auch Tabellen extrahieren
+        for table in doc.tables:
+            for row in table.rows:
+                row_text = " | ".join(cell.text for cell in row.cells)
+                if row_text.strip():
+                    paragraphs.append(row_text)
+
+        text = "\n\n".join(paragraphs)
+
+        return ProcessingResult(
+            text=text,
+            confidence=1.0,  # Direkte Textextraktion
+            regions=[ProcessedRegion(
+                text=text,
+                confidence=1.0,
+                bbox=(0, 0, 0, 0),
+                page=1
+            )],
+            page_count=1,
+            file_type=FileType.DOCX,
+            processing_mode=ProcessingMode.TEXT_EXTRACT,
+            metadata={"source": file_path or "bytes"}
+        )
+
+    def _process_txt(
+        self,
+        file_path: str = None,
+        file_bytes: bytes = None
+    ) -> ProcessingResult:
+        """Verarbeitet Textdateien."""
+        if file_bytes:
+            text = file_bytes.decode('utf-8', errors='ignore')
+        else:
+            with open(file_path, 'r', encoding='utf-8', errors='ignore') as f:
+                text = f.read()
+
+        return ProcessingResult(
+            text=text,
+            confidence=1.0,
+            regions=[ProcessedRegion(
+                text=text,
+                confidence=1.0,
+                bbox=(0, 0, 0, 0),
+                page=1
+            )],
+            page_count=1,
+            file_type=FileType.TXT,
+            processing_mode=ProcessingMode.TEXT_EXTRACT,
+            metadata={"source": file_path or "bytes"}
+        )
+
+    def _preprocess_image(self, img: Image.Image) -> Image.Image:
+        """
+        Vorverarbeitung des Bildes für bessere OCR-Ergebnisse.
+
+        - Konvertierung zu Graustufen
+        - Kontrastverstärkung
+        - Rauschunterdrückung
+        - Binarisierung
+        """
+        # PIL zu OpenCV
+        cv_img = cv2.cvtColor(np.array(img), cv2.COLOR_RGB2BGR)
+
+        # Zu Graustufen konvertieren
+        gray = cv2.cvtColor(cv_img, cv2.COLOR_BGR2GRAY)
+
+        # Rauschunterdrückung
+        denoised = cv2.fastNlMeansDenoising(gray, None, 10, 7, 21)
+
+        # Kontrastverstärkung (CLAHE)
+        clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8))
+        enhanced = clahe.apply(denoised)
+
+        # Adaptive Binarisierung
+        binary = cv2.adaptiveThreshold(
+            enhanced,
+            255,
+            cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
+            cv2.THRESH_BINARY,
+            11,
+            2
+        )
+
+        # Zurück zu PIL
+        return Image.fromarray(binary)
+
+    def _ocr_image(self, img: Image.Image) -> Dict[str, Any]:
+        """
+        Führt OCR auf einem Bild aus.
+
+        Returns:
+            Dict mit text, confidence und regions
+        """
+        if self.ocr_engine is None:
+            # Fallback wenn kein OCR-Engine verfügbar
+            return {
+                "text": "[OCR nicht verfügbar - bitte PaddleOCR installieren]",
+                "confidence": 0.0,
+                "regions": []
+            }
+
+        # PIL zu numpy array
+        img_array = np.array(img)
+
+        # Wenn Graustufen, zu RGB konvertieren (PaddleOCR erwartet RGB)
+        if len(img_array.shape) == 2:
+            img_array = cv2.cvtColor(img_array, cv2.COLOR_GRAY2RGB)
+
+        # OCR ausführen
+        result = self.ocr_engine.ocr(img_array, cls=True)
+
+        if not result or not result[0]:
+            return {"text": "", "confidence": 0.0, "regions": []}
+
+        all_text = []
+        all_regions = []
+        total_confidence = 0.0
+
+        for line in result[0]:
+            bbox_points = line[0]  # [[x1,y1], [x2,y2], [x3,y3], [x4,y4]]
+            text, confidence = line[1]
+
+            # Bounding Box zu x1, y1, x2, y2 konvertieren
+            x_coords = [p[0] for p in bbox_points]
+            y_coords = [p[1] for p in bbox_points]
+            bbox = (
+                int(min(x_coords)),
+                int(min(y_coords)),
+                int(max(x_coords)),
+                int(max(y_coords))
+            )
+
+            all_text.append(text)
+            all_regions.append(ProcessedRegion(
+                text=text,
+                confidence=confidence,
+                bbox=bbox
+            ))
+            total_confidence += confidence
+
+        avg_confidence = total_confidence / len(all_regions) if all_regions else 0.0
+
+        return {
+            "text": "\n".join(all_text),
+            "confidence": avg_confidence,
+            "regions": all_regions
+        }
+
+    def extract_handwriting_regions(
+        self,
+        img: Image.Image,
+        min_area: int = 500
+    ) -> List[Dict[str, Any]]:
+        """
+        Erkennt und extrahiert handschriftliche Bereiche aus einem Bild.
+
+        Nützlich für Klausuren mit gedruckten Fragen und handschriftlichen Antworten.
+
+        Args:
+            img: Eingabebild
+            min_area: Minimale Fläche für erkannte Regionen
+
+        Returns:
+            Liste von Regionen mit Koordinaten und erkanntem Text
+        """
+        # Bildvorverarbeitung
+        cv_img = cv2.cvtColor(np.array(img), cv2.COLOR_RGB2BGR)
+        gray = cv2.cvtColor(cv_img, cv2.COLOR_BGR2GRAY)
+
+        # Kanten erkennen
+        edges = cv2.Canny(gray, 50, 150)
+
+        # Morphologische Operationen zum Verbinden
+        kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (15, 5))
+        dilated = cv2.dilate(edges, kernel, iterations=2)
+
+        # Konturen finden
+        contours, _ = cv2.findContours(dilated, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
+
+        regions = []
+        for contour in contours:
+            area = cv2.contourArea(contour)
+            if area < min_area:
+                continue
+
+            x, y, w, h = cv2.boundingRect(contour)
+
+            # Region ausschneiden
+            region_img = img.crop((x, y, x + w, y + h))
+
+            # OCR auf Region anwenden
+            ocr_result = self._ocr_image(region_img)
+
+            regions.append({
+                "bbox": (x, y, x + w, y + h),
+                "area": area,
+                "text": ocr_result["text"],
+                "confidence": ocr_result["confidence"]
+            })
+
+        # Nach Y-Position sortieren (oben nach unten)
+        regions.sort(key=lambda r: r["bbox"][1])
+
+        return regions
+
+
+# Singleton-Instanz
+_file_processor: Optional[FileProcessor] = None
+
+
+def get_file_processor() -> FileProcessor:
+    """Gibt Singleton-Instanz des File Processors zurück."""
+    global _file_processor
+    if _file_processor is None:
+        _file_processor = FileProcessor()
+    return _file_processor
+
+
+# Convenience functions
+def process_file(
+    file_path: str = None,
+    file_bytes: bytes = None,
+    mode: ProcessingMode = ProcessingMode.MIXED
+) -> ProcessingResult:
+    """
+    Convenience function zum Verarbeiten einer Datei.
+
+    Args:
+        file_path: Pfad zur Datei
+        file_bytes: Dateiinhalt als Bytes
+        mode: Verarbeitungsmodus
+
+    Returns:
+        ProcessingResult
+    """
+    processor = get_file_processor()
+    return processor.process(file_path, file_bytes, mode)
+
+
+def extract_text_from_pdf(file_path: str = None, file_bytes: bytes = None) -> str:
+    """Extrahiert Text aus einer PDF-Datei."""
+    result = process_file(file_path, file_bytes, ProcessingMode.TEXT_EXTRACT)
+    return result.text
+
+
+def ocr_image(file_path: str = None, file_bytes: bytes = None) -> str:
+    """Führt OCR auf einem Bild aus."""
+    result = process_file(file_path, file_bytes, ProcessingMode.OCR_PRINTED)
+    return result.text
+
+
+def ocr_handwriting(file_path: str = None, file_bytes: bytes = None) -> str:
+    """Führt Handschrift-OCR auf einem Bild aus."""
+    result = process_file(file_path, file_bytes, ProcessingMode.OCR_HANDWRITING)
+    return result.text
@@ -0,0 +1,916 @@
+"""
+PDF Service - Zentrale PDF-Generierung für BreakPilot.
+
+Shared Service für:
+- Letters (Elternbriefe)
+- Zeugnisse (Schulzeugnisse)
+- Correction (Korrektur-Übersichten)
+
+Verwendet WeasyPrint für PDF-Rendering und Jinja2 für Templates.
+"""
+
+import logging
+import os
+from datetime import datetime
+from pathlib import Path
+from typing import Any, Dict, Optional, List
+from dataclasses import dataclass
+
+from jinja2 import Environment, FileSystemLoader, select_autoescape
+from weasyprint import HTML, CSS
+from weasyprint.text.fonts import FontConfiguration
+
+logger = logging.getLogger(__name__)
+
+# Template directory
+TEMPLATES_DIR = Path(__file__).parent.parent / "templates" / "pdf"
+
+
+@dataclass
+class SchoolInfo:
+    """Schulinformationen für Header."""
+    name: str
+    address: str
+    phone: str
+    email: str
+    logo_path: Optional[str] = None
+    website: Optional[str] = None
+    principal: Optional[str] = None
+
+
+@dataclass
+class LetterData:
+    """Daten für Elternbrief-PDF."""
+    recipient_name: str
+    recipient_address: str
+    student_name: str
+    student_class: str
+    subject: str
+    content: str
+    date: str
+    teacher_name: str
+    teacher_title: Optional[str] = None
+    school_info: Optional[SchoolInfo] = None
+    letter_type: str = "general"  # general, halbjahr, fehlzeiten, elternabend, lob
+    tone: str = "professional"
+    legal_references: Optional[List[Dict[str, str]]] = None
+    gfk_principles_applied: Optional[List[str]] = None
+
+
+@dataclass
+class CertificateData:
+    """Daten für Zeugnis-PDF."""
+    student_name: str
+    student_birthdate: str
+    student_class: str
+    school_year: str
+    certificate_type: str  # halbjahr, jahres, abschluss
+    subjects: List[Dict[str, Any]]  # [{name, grade, note}]
+    attendance: Dict[str, int]  # {days_absent, days_excused, days_unexcused}
+    remarks: Optional[str] = None
+    class_teacher: str = ""
+    principal: str = ""
+    school_info: Optional[SchoolInfo] = None
+    issue_date: str = ""
+    social_behavior: Optional[str] = None  # A, B, C, D
+    work_behavior: Optional[str] = None  # A, B, C, D
+
+
+@dataclass
+class StudentInfo:
+    """Schülerinformationen für Korrektur-PDFs."""
+    student_id: str
+    name: str
+    class_name: str
+
+
+@dataclass
+class CorrectionData:
+    """Daten für Korrektur-Übersicht PDF."""
+    student: StudentInfo
+    exam_title: str
+    subject: str
+    date: str
+    max_points: int
+    achieved_points: int
+    grade: str
+    percentage: float
+    corrections: List[Dict[str, Any]]  # [{question, answer, points, feedback}]
+    teacher_notes: str = ""
+    ai_feedback: str = ""
+    grade_distribution: Optional[Dict[str, int]] = None  # {note: anzahl}
+    class_average: Optional[float] = None
+
+
+class PDFService:
+    """
+    Zentrale PDF-Generierung für BreakPilot.
+
+    Unterstützt:
+    - Elternbriefe mit GFK-Prinzipien und rechtlichen Referenzen
+    - Schulzeugnisse (Halbjahr, Jahres, Abschluss)
+    - Korrektur-Übersichten für Klausuren
+    """
+
+    def __init__(self, templates_dir: Optional[Path] = None):
+        """
+        Initialisiert den PDF-Service.
+
+        Args:
+            templates_dir: Optionaler Pfad zu Templates (Standard: backend/templates/pdf)
+        """
+        self.templates_dir = templates_dir or TEMPLATES_DIR
+
+        # Ensure templates directory exists
+        self.templates_dir.mkdir(parents=True, exist_ok=True)
+
+        # Initialize Jinja2 environment
+        self.jinja_env = Environment(
+            loader=FileSystemLoader(str(self.templates_dir)),
+            autoescape=select_autoescape(['html', 'xml']),
+            trim_blocks=True,
+            lstrip_blocks=True
+        )
+
+        # Add custom filters
+        self.jinja_env.filters['date_format'] = self._date_format
+        self.jinja_env.filters['grade_color'] = self._grade_color
+
+        # Font configuration for WeasyPrint
+        self.font_config = FontConfiguration()
+
+        logger.info(f"PDFService initialized with templates from {self.templates_dir}")
+
+    @staticmethod
+    def _date_format(value: str, format_str: str = "%d.%m.%Y") -> str:
+        """Formatiert Datum für deutsche Darstellung."""
+        if not value:
+            return ""
+        try:
+            dt = datetime.fromisoformat(value.replace("Z", "+00:00"))
+            return dt.strftime(format_str)
+        except (ValueError, AttributeError):
+            return value
+
+    @staticmethod
+    def _grade_color(grade: str) -> str:
+        """Gibt Farbe basierend auf Note zurück."""
+        grade_colors = {
+            "1": "#27ae60",  # Grün
+            "2": "#2ecc71",  # Hellgrün
+            "3": "#f1c40f",  # Gelb
+            "4": "#e67e22",  # Orange
+            "5": "#e74c3c",  # Rot
+            "6": "#c0392b",  # Dunkelrot
+            "A": "#27ae60",
+            "B": "#2ecc71",
+            "C": "#f1c40f",
+            "D": "#e74c3c",
+        }
+        return grade_colors.get(str(grade), "#333333")
+
+    def _get_base_css(self) -> str:
+        """Gibt Basis-CSS für alle PDFs zurück."""
+        return """
+        @page {
+            size: A4;
+            margin: 2cm 2.5cm;
+            @top-right {
+                content: counter(page) " / " counter(pages);
+                font-size: 9pt;
+                color: #666;
+            }
+        }
+
+        body {
+            font-family: 'DejaVu Sans', 'Liberation Sans', Arial, sans-serif;
+            font-size: 11pt;
+            line-height: 1.5;
+            color: #333;
+        }
+
+        h1, h2, h3 {
+            font-weight: bold;
+            margin-top: 1em;
+            margin-bottom: 0.5em;
+        }
+
+        h1 { font-size: 16pt; }
+        h2 { font-size: 14pt; }
+        h3 { font-size: 12pt; }
+
+        .header {
+            border-bottom: 2px solid #2c3e50;
+            padding-bottom: 15px;
+            margin-bottom: 20px;
+        }
+
+        .school-name {
+            font-size: 18pt;
+            font-weight: bold;
+            color: #2c3e50;
+        }
+
+        .school-info {
+            font-size: 9pt;
+            color: #666;
+        }
+
+        .letter-date {
+            text-align: right;
+            margin-bottom: 20px;
+        }
+
+        .recipient {
+            margin-bottom: 30px;
+        }
+
+        .subject {
+            font-weight: bold;
+            margin-bottom: 20px;
+        }
+
+        .content {
+            text-align: justify;
+            margin-bottom: 30px;
+        }
+
+        .signature {
+            margin-top: 40px;
+        }
+
+        .legal-references {
+            font-size: 9pt;
+            color: #666;
+            border-top: 1px solid #ddd;
+            margin-top: 30px;
+            padding-top: 10px;
+        }
+
+        .gfk-badge {
+            display: inline-block;
+            background: #e8f5e9;
+            color: #27ae60;
+            font-size: 8pt;
+            padding: 2px 8px;
+            border-radius: 10px;
+            margin-right: 5px;
+        }
+
+        /* Zeugnis-Styles */
+        .certificate-header {
+            text-align: center;
+            margin-bottom: 30px;
+        }
+
+        .certificate-title {
+            font-size: 20pt;
+            font-weight: bold;
+            margin-bottom: 10px;
+        }
+
+        .student-info {
+            margin-bottom: 20px;
+            padding: 15px;
+            background: #f9f9f9;
+            border-radius: 5px;
+        }
+
+        .grades-table {
+            width: 100%;
+            border-collapse: collapse;
+            margin-bottom: 20px;
+        }
+
+        .grades-table th,
+        .grades-table td {
+            border: 1px solid #ddd;
+            padding: 8px 12px;
+            text-align: left;
+        }
+
+        .grades-table th {
+            background: #2c3e50;
+            color: white;
+        }
+
+        .grades-table tr:nth-child(even) {
+            background: #f9f9f9;
+        }
+
+        .grade-cell {
+            text-align: center;
+            font-weight: bold;
+            font-size: 12pt;
+        }
+
+        .attendance-box {
+            background: #fff3cd;
+            padding: 15px;
+            border-radius: 5px;
+            margin-bottom: 20px;
+        }
+
+        .signatures-row {
+            display: flex;
+            justify-content: space-between;
+            margin-top: 50px;
+        }
+
+        .signature-block {
+            text-align: center;
+            width: 40%;
+        }
+
+        .signature-line {
+            border-top: 1px solid #333;
+            margin-top: 40px;
+            padding-top: 5px;
+        }
+
+        /* Korrektur-Styles */
+        .exam-header {
+            background: #2c3e50;
+            color: white;
+            padding: 15px;
+            margin-bottom: 20px;
+        }
+
+        .result-box {
+            background: #e8f5e9;
+            padding: 20px;
+            text-align: center;
+            margin-bottom: 20px;
+            border-radius: 5px;
+        }
+
+        .result-grade {
+            font-size: 36pt;
+            font-weight: bold;
+        }
+
+        .result-points {
+            font-size: 14pt;
+            color: #666;
+        }
+
+        .corrections-list {
+            margin-bottom: 20px;
+        }
+
+        .correction-item {
+            border: 1px solid #ddd;
+            padding: 15px;
+            margin-bottom: 10px;
+            border-radius: 5px;
+        }
+
+        .correction-question {
+            font-weight: bold;
+            margin-bottom: 5px;
+        }
+
+        .correction-feedback {
+            background: #fff8e1;
+            padding: 10px;
+            margin-top: 10px;
+            border-left: 3px solid #ffc107;
+            font-size: 10pt;
+        }
+
+        .stats-table {
+            width: 100%;
+            margin-top: 20px;
+        }
+
+        .stats-table td {
+            padding: 5px 10px;
+        }
+        """
+
+    def generate_letter_pdf(self, data: LetterData) -> bytes:
+        """
+        Generiert PDF für Elternbrief.
+
+        Args:
+            data: LetterData mit allen Briefinformationen
+
+        Returns:
+            PDF als bytes
+        """
+        logger.info(f"Generating letter PDF for student: {data.student_name}")
+
+        template = self._get_letter_template()
+        html_content = template.render(
+            data=data,
+            generated_at=datetime.now().strftime("%d.%m.%Y %H:%M")
+        )
+
+        css = CSS(string=self._get_base_css(), font_config=self.font_config)
+        pdf_bytes = HTML(string=html_content).write_pdf(
+            stylesheets=[css],
+            font_config=self.font_config
+        )
+
+        logger.info(f"Letter PDF generated: {len(pdf_bytes)} bytes")
+        return pdf_bytes
+
+    def generate_certificate_pdf(self, data: CertificateData) -> bytes:
+        """
+        Generiert PDF für Schulzeugnis.
+
+        Args:
+            data: CertificateData mit allen Zeugnisinformationen
+
+        Returns:
+            PDF als bytes
+        """
+        logger.info(f"Generating certificate PDF for: {data.student_name}")
+
+        template = self._get_certificate_template()
+        html_content = template.render(
+            data=data,
+            generated_at=datetime.now().strftime("%d.%m.%Y %H:%M")
+        )
+
+        css = CSS(string=self._get_base_css(), font_config=self.font_config)
+        pdf_bytes = HTML(string=html_content).write_pdf(
+            stylesheets=[css],
+            font_config=self.font_config
+        )
+
+        logger.info(f"Certificate PDF generated: {len(pdf_bytes)} bytes")
+        return pdf_bytes
+
+    def generate_correction_pdf(self, data: CorrectionData) -> bytes:
+        """
+        Generiert PDF für Korrektur-Übersicht.
+
+        Args:
+            data: CorrectionData mit allen Korrekturinformationen
+
+        Returns:
+            PDF als bytes
+        """
+        logger.info(f"Generating correction PDF for: {data.student.name}")
+
+        template = self._get_correction_template()
+        html_content = template.render(
+            data=data,
+            generated_at=datetime.now().strftime("%d.%m.%Y %H:%M")
+        )
+
+        css = CSS(string=self._get_base_css(), font_config=self.font_config)
+        pdf_bytes = HTML(string=html_content).write_pdf(
+            stylesheets=[css],
+            font_config=self.font_config
+        )
+
+        logger.info(f"Correction PDF generated: {len(pdf_bytes)} bytes")
+        return pdf_bytes
+
+    def _get_letter_template(self):
+        """Gibt Letter-Template zurück (inline falls Datei nicht existiert)."""
+        template_path = self.templates_dir / "letter.html"
+        if template_path.exists():
+            return self.jinja_env.get_template("letter.html")
+
+        # Inline-Template als Fallback
+        return self.jinja_env.from_string(self._get_letter_template_html())
+
+    def _get_certificate_template(self):
+        """Gibt Certificate-Template zurück."""
+        template_path = self.templates_dir / "certificate.html"
+        if template_path.exists():
+            return self.jinja_env.get_template("certificate.html")
+
+        return self.jinja_env.from_string(self._get_certificate_template_html())
+
+    def _get_correction_template(self):
+        """Gibt Correction-Template zurück."""
+        template_path = self.templates_dir / "correction.html"
+        if template_path.exists():
+            return self.jinja_env.get_template("correction.html")
+
+        return self.jinja_env.from_string(self._get_correction_template_html())
+
+    @staticmethod
+    def _get_letter_template_html() -> str:
+        """Inline HTML-Template für Elternbriefe."""
+        return """
+<!DOCTYPE html>
+<html lang="de">
+<head>
+    <meta charset="UTF-8">
+    <title>{{ data.subject }}</title>
+</head>
+<body>
+    <div class="header">
+        {% if data.school_info %}
+        <div class="school-name">{{ data.school_info.name }}</div>
+        <div class="school-info">
+            {{ data.school_info.address }}<br>
+            Tel: {{ data.school_info.phone }} | E-Mail: {{ data.school_info.email }}
+            {% if data.school_info.website %} | {{ data.school_info.website }}{% endif %}
+        </div>
+        {% else %}
+        <div class="school-name">Schule</div>
+        {% endif %}
+    </div>
+
+    <div class="letter-date">
+        {{ data.date }}
+    </div>
+
+    <div class="recipient">
+        {{ data.recipient_name }}<br>
+        {{ data.recipient_address | replace('\\n', '<br>') | safe }}
+    </div>
+
+    <div class="subject">
+        Betreff: {{ data.subject }}
+    </div>
+
+    <div class="meta-info" style="font-size: 10pt; color: #666; margin-bottom: 20px;">
+        Schüler/in: {{ data.student_name }} | Klasse: {{ data.student_class }}
+    </div>
+
+    <div class="content">
+        {{ data.content | replace('\\n', '<br>') | safe }}
+    </div>
+
+    {% if data.gfk_principles_applied %}
+    <div style="margin-bottom: 20px;">
+        {% for principle in data.gfk_principles_applied %}
+        <span class="gfk-badge">✓ {{ principle }}</span>
+        {% endfor %}
+    </div>
+    {% endif %}
+
+    <div class="signature">
+        <p>Mit freundlichen Grüßen</p>
+        <p style="margin-top: 30px;">
+            {{ data.teacher_name }}
+            {% if data.teacher_title %}<br><span style="font-size: 10pt;">{{ data.teacher_title }}</span>{% endif %}
+        </p>
+    </div>
+
+    {% if data.legal_references %}
+    <div class="legal-references">
+        <strong>Rechtliche Grundlagen:</strong><br>
+        {% for ref in data.legal_references %}
+        • {{ ref.law }} {{ ref.paragraph }}: {{ ref.title }}<br>
+        {% endfor %}
+    </div>
+    {% endif %}
+
+    <div style="font-size: 8pt; color: #999; margin-top: 30px; text-align: center;">
+        Erstellt mit BreakPilot | {{ generated_at }}
+    </div>
+</body>
+</html>
+"""
+
+    @staticmethod
+    def _get_certificate_template_html() -> str:
+        """Inline HTML-Template für Zeugnisse."""
+        return """
+<!DOCTYPE html>
+<html lang="de">
+<head>
+    <meta charset="UTF-8">
+    <title>Zeugnis - {{ data.student_name }}</title>
+</head>
+<body>
+    <div class="certificate-header">
+        {% if data.school_info %}
+        <div class="school-name" style="font-size: 14pt;">{{ data.school_info.name }}</div>
+        {% endif %}
+        <div class="certificate-title">
+            {% if data.certificate_type == 'halbjahr' %}
+            Halbjahreszeugnis
+            {% elif data.certificate_type == 'jahres' %}
+            Jahreszeugnis
+            {% else %}
+            Abschlusszeugnis
+            {% endif %}
+        </div>
+        <div>Schuljahr {{ data.school_year }}</div>
+    </div>
+
+    <div class="student-info">
+        <table style="width: 100%;">
+            <tr>
+                <td><strong>Name:</strong> {{ data.student_name }}</td>
+                <td><strong>Geburtsdatum:</strong> {{ data.student_birthdate }}</td>
+            </tr>
+            <tr>
+                <td><strong>Klasse:</strong> {{ data.student_class }}</td>
+                <td>&nbsp;</td>
+            </tr>
+        </table>
+    </div>
+
+    <h3>Leistungen</h3>
+    <table class="grades-table">
+        <thead>
+            <tr>
+                <th style="width: 70%;">Fach</th>
+                <th style="width: 15%;">Note</th>
+                <th style="width: 15%;">Punkte</th>
+            </tr>
+        </thead>
+        <tbody>
+            {% for subject in data.subjects %}
+            <tr>
+                <td>{{ subject.name }}</td>
+                <td class="grade-cell" style="color: {{ subject.grade | grade_color }};">
+                    {{ subject.grade }}
+                </td>
+                <td class="grade-cell">{{ subject.points | default('-') }}</td>
+            </tr>
+            {% endfor %}
+        </tbody>
+    </table>
+
+    {% if data.social_behavior or data.work_behavior %}
+    <h3>Verhalten</h3>
+    <table class="grades-table" style="width: 50%;">
+        {% if data.social_behavior %}
+        <tr>
+            <td>Sozialverhalten</td>
+            <td class="grade-cell">{{ data.social_behavior }}</td>
+        </tr>
+        {% endif %}
+        {% if data.work_behavior %}
+        <tr>
+            <td>Arbeitsverhalten</td>
+            <td class="grade-cell">{{ data.work_behavior }}</td>
+        </tr>
+        {% endif %}
+    </table>
+    {% endif %}
+
+    <div class="attendance-box">
+        <strong>Versäumte Tage:</strong> {{ data.attendance.days_absent | default(0) }}
+        (davon entschuldigt: {{ data.attendance.days_excused | default(0) }},
+        unentschuldigt: {{ data.attendance.days_unexcused | default(0) }})
+    </div>
+
+    {% if data.remarks %}
+    <div style="margin-bottom: 20px;">
+        <strong>Bemerkungen:</strong><br>
+        {{ data.remarks }}
+    </div>
+    {% endif %}
+
+    <div style="margin-top: 30px;">
+        <strong>Ausgestellt am:</strong> {{ data.issue_date }}
+    </div>
+
+    <div class="signatures-row">
+        <div class="signature-block">
+            <div class="signature-line">{{ data.class_teacher }}</div>
+            <div style="font-size: 9pt;">Klassenlehrer/in</div>
+        </div>
+        <div class="signature-block">
+            <div class="signature-line">{{ data.principal }}</div>
+            <div style="font-size: 9pt;">Schulleiter/in</div>
+        </div>
+    </div>
+
+    <div style="text-align: center; margin-top: 40px;">
+        <div style="font-size: 9pt; color: #666;">Siegel der Schule</div>
+    </div>
+</body>
+</html>
+"""
+
+    @staticmethod
+    def _get_correction_template_html() -> str:
+        """Inline HTML-Template für Korrektur-Übersichten."""
+        return """
+<!DOCTYPE html>
+<html lang="de">
+<head>
+    <meta charset="UTF-8">
+    <title>Korrektur - {{ data.exam_title }}</title>
+</head>
+<body>
+    <div class="exam-header">
+        <h1 style="margin: 0; color: white;">{{ data.exam_title }}</h1>
+        <div>{{ data.subject }} | {{ data.date }}</div>
+    </div>
+
+    <div class="student-info">
+        <strong>{{ data.student.name }}</strong> | Klasse {{ data.student.class_name }}
+    </div>
+
+    <div class="result-box">
+        <div class="result-grade" style="color: {{ data.grade | grade_color }};">
+            Note: {{ data.grade }}
+        </div>
+        <div class="result-points">
+            {{ data.achieved_points }} von {{ data.max_points }} Punkten
+            ({{ data.percentage | round(1) }}%)
+        </div>
+    </div>
+
+    <h3>Detaillierte Auswertung</h3>
+    <div class="corrections-list">
+        {% for item in data.corrections %}
+        <div class="correction-item">
+            <div class="correction-question">
+                {{ item.question }}
+            </div>
+            {% if item.answer %}
+            <div style="margin: 5px 0; font-style: italic; color: #555;">
+                <strong>Antwort:</strong> {{ item.answer }}
+            </div>
+            {% endif %}
+            <div>
+                <strong>Punkte:</strong> {{ item.points }}
+            </div>
+            {% if item.feedback %}
+            <div class="correction-feedback">
+                {{ item.feedback }}
+            </div>
+            {% endif %}
+        </div>
+        {% endfor %}
+    </div>
+
+    {% if data.teacher_notes %}
+    <div style="background: #e3f2fd; padding: 15px; border-radius: 5px; margin-bottom: 20px;">
+        <strong>Lehrerkommentar:</strong><br>
+        {{ data.teacher_notes }}
+    </div>
+    {% endif %}
+
+    {% if data.ai_feedback %}
+    <div style="background: #f3e5f5; padding: 15px; border-radius: 5px; margin-bottom: 20px;">
+        <strong>KI-Feedback:</strong><br>
+        {{ data.ai_feedback }}
+    </div>
+    {% endif %}
+
+    {% if data.class_average or data.grade_distribution %}
+    <h3>Klassenstatistik</h3>
+    <table class="stats-table">
+        {% if data.class_average %}
+        <tr>
+            <td><strong>Klassendurchschnitt:</strong></td>
+            <td>{{ data.class_average }}</td>
+        </tr>
+        {% endif %}
+        {% if data.grade_distribution %}
+        <tr>
+            <td><strong>Notenverteilung:</strong></td>
+            <td>
+                {% for grade, count in data.grade_distribution.items() %}
+                Note {{ grade }}: {{ count }}x{% if not loop.last %}, {% endif %}
+                {% endfor %}
+            </td>
+        </tr>
+        {% endif %}
+    </table>
+    {% endif %}
+
+    <div class="signature" style="margin-top: 40px;">
+        <p style="font-size: 9pt; color: #666;">Datum: {{ data.date }}</p>
+    </div>
+
+    <div style="font-size: 8pt; color: #999; margin-top: 30px; text-align: center;">
+        Erstellt mit BreakPilot | {{ generated_at }}
+    </div>
+</body>
+</html>
+"""
+
+
+# Convenience functions for direct usage
+_pdf_service: Optional[PDFService] = None
+
+
+def get_pdf_service() -> PDFService:
+    """Gibt Singleton-Instanz des PDF-Service zurück."""
+    global _pdf_service
+    if _pdf_service is None:
+        _pdf_service = PDFService()
+    return _pdf_service
+
+
+def generate_letter_pdf(data: Dict[str, Any]) -> bytes:
+    """
+    Convenience function zum Generieren eines Elternbrief-PDFs.
+
+    Args:
+        data: Dict mit allen Briefdaten
+
+    Returns:
+        PDF als bytes
+    """
+    service = get_pdf_service()
+
+    # Convert dict to LetterData
+    school_info = None
+    if data.get("school_info"):
+        school_info = SchoolInfo(**data["school_info"])
+
+    letter_data = LetterData(
+        recipient_name=data.get("recipient_name", ""),
+        recipient_address=data.get("recipient_address", ""),
+        student_name=data.get("student_name", ""),
+        student_class=data.get("student_class", ""),
+        subject=data.get("subject", ""),
+        content=data.get("content", ""),
+        date=data.get("date", datetime.now().strftime("%d.%m.%Y")),
+        teacher_name=data.get("teacher_name", ""),
+        teacher_title=data.get("teacher_title"),
+        school_info=school_info,
+        letter_type=data.get("letter_type", "general"),
+        tone=data.get("tone", "professional"),
+        legal_references=data.get("legal_references"),
+        gfk_principles_applied=data.get("gfk_principles_applied")
+    )
+
+    return service.generate_letter_pdf(letter_data)
+
+
+def generate_certificate_pdf(data: Dict[str, Any]) -> bytes:
+    """
+    Convenience function zum Generieren eines Zeugnis-PDFs.
+
+    Args:
+        data: Dict mit allen Zeugnisdaten
+
+    Returns:
+        PDF als bytes
+    """
+    service = get_pdf_service()
+
+    school_info = None
+    if data.get("school_info"):
+        school_info = SchoolInfo(**data["school_info"])
+
+    cert_data = CertificateData(
+        student_name=data.get("student_name", ""),
+        student_birthdate=data.get("student_birthdate", ""),
+        student_class=data.get("student_class", ""),
+        school_year=data.get("school_year", ""),
+        certificate_type=data.get("certificate_type", "halbjahr"),
+        subjects=data.get("subjects", []),
+        attendance=data.get("attendance", {"days_absent": 0, "days_excused": 0, "days_unexcused": 0}),
+        remarks=data.get("remarks"),
+        class_teacher=data.get("class_teacher", ""),
+        principal=data.get("principal", ""),
+        school_info=school_info,
+        issue_date=data.get("issue_date", datetime.now().strftime("%d.%m.%Y")),
+        social_behavior=data.get("social_behavior"),
+        work_behavior=data.get("work_behavior")
+    )
+
+    return service.generate_certificate_pdf(cert_data)
+
+
+def generate_correction_pdf(data: Dict[str, Any]) -> bytes:
+    """
+    Convenience function zum Generieren eines Korrektur-PDFs.
+
+    Args:
+        data: Dict mit allen Korrekturdaten
+
+    Returns:
+        PDF als bytes
+    """
+    service = get_pdf_service()
+
+    # Create StudentInfo from dict
+    student = StudentInfo(
+        student_id=data.get("student_id", "unknown"),
+        name=data.get("student_name", data.get("name", "")),
+        class_name=data.get("student_class", data.get("class_name", ""))
+    )
+
+    # Calculate percentage if not provided
+    max_points = data.get("max_points", data.get("total_points", 0))
+    achieved_points = data.get("achieved_points", 0)
+    percentage = data.get("percentage", (achieved_points / max_points * 100) if max_points > 0 else 0.0)
+
+    correction_data = CorrectionData(
+        student=student,
+        exam_title=data.get("exam_title", ""),
+        subject=data.get("subject", ""),
+        date=data.get("date", data.get("exam_date", "")),
+        max_points=max_points,
+        achieved_points=achieved_points,
+        grade=data.get("grade", ""),
+        percentage=percentage,
+        corrections=data.get("corrections", []),
+        teacher_notes=data.get("teacher_notes", data.get("teacher_comment", "")),
+        ai_feedback=data.get("ai_feedback", ""),
+        grade_distribution=data.get("grade_distribution"),
+        class_average=data.get("class_average")
+    )
+
+    return service.generate_correction_pdf(correction_data)