Initial commit: breakpilot-core - Shared Infrastructure

Docker Compose with 24+ services:
- PostgreSQL (PostGIS), Valkey, MinIO, Qdrant
- Vault (PKI/TLS), Nginx (Reverse Proxy)
- Backend Core API, Consent Service, Billing Service
- RAG Service, Embedding Service
- Gitea, Woodpecker CI/CD
- Night Scheduler, Health Aggregator
- Jitsi (Web/XMPP/JVB/Jicofo), Mailpit

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
Benjamin Boenisch
2026-02-11 23:47:13 +01:00
commit ad111d5e69
244 changed files with 84288 additions and 0 deletions

View File

@@ -0,0 +1,22 @@
# Backend Services Module
# Shared services for PDF generation, file processing, and more
# PDFService requires WeasyPrint which needs system libraries (libgobject, etc.)
# Make import optional for environments without these dependencies (e.g., CI)
try:
from .pdf_service import PDFService
_pdf_available = True
except (ImportError, OSError) as e:
PDFService = None # type: ignore
_pdf_available = False
# FileProcessor requires OpenCV which needs libGL.so.1
# Make import optional for CI environments
try:
from .file_processor import FileProcessor
_file_processor_available = True
except (ImportError, OSError) as e:
FileProcessor = None # type: ignore
_file_processor_available = False
__all__ = ["PDFService", "FileProcessor"]

View File

@@ -0,0 +1,563 @@
"""
File Processor Service - Dokumentenverarbeitung für BreakPilot.
Shared Service für:
- OCR (Optical Character Recognition) für Handschrift und gedruckten Text
- PDF-Parsing und Textextraktion
- Bildverarbeitung und -optimierung
- DOCX/DOC Textextraktion
Verwendet:
- PaddleOCR für deutsche Handschrift
- PyMuPDF für PDF-Verarbeitung
- python-docx für DOCX-Dateien
- OpenCV für Bildvorverarbeitung
"""
import logging
import os
import io
import base64
from pathlib import Path
from typing import Optional, List, Dict, Any, Tuple, Union
from dataclasses import dataclass
from enum import Enum
import cv2
import numpy as np
from PIL import Image
logger = logging.getLogger(__name__)
class FileType(str, Enum):
"""Unterstützte Dateitypen."""
PDF = "pdf"
IMAGE = "image"
DOCX = "docx"
DOC = "doc"
TXT = "txt"
UNKNOWN = "unknown"
class ProcessingMode(str, Enum):
"""Verarbeitungsmodi."""
OCR_HANDWRITING = "ocr_handwriting" # Handschrifterkennung
OCR_PRINTED = "ocr_printed" # Gedruckter Text
TEXT_EXTRACT = "text_extract" # Textextraktion (PDF/DOCX)
MIXED = "mixed" # Kombiniert OCR + Textextraktion
@dataclass
class ProcessedRegion:
"""Ein erkannter Textbereich."""
text: str
confidence: float
bbox: Tuple[int, int, int, int] # x1, y1, x2, y2
page: int = 1
@dataclass
class ProcessingResult:
"""Ergebnis der Dokumentenverarbeitung."""
text: str
confidence: float
regions: List[ProcessedRegion]
page_count: int
file_type: FileType
processing_mode: ProcessingMode
metadata: Dict[str, Any]
class FileProcessor:
"""
Zentrale Dokumentenverarbeitung für BreakPilot.
Unterstützt:
- Handschrifterkennung (OCR) für Klausuren
- Textextraktion aus PDFs
- DOCX/DOC Verarbeitung
- Bildvorverarbeitung für bessere OCR-Ergebnisse
"""
def __init__(self, ocr_lang: str = "de", use_gpu: bool = False):
"""
Initialisiert den File Processor.
Args:
ocr_lang: Sprache für OCR (default: "de" für Deutsch)
use_gpu: GPU für OCR nutzen (beschleunigt Verarbeitung)
"""
self.ocr_lang = ocr_lang
self.use_gpu = use_gpu
self._ocr_engine = None
logger.info(f"FileProcessor initialized (lang={ocr_lang}, gpu={use_gpu})")
@property
def ocr_engine(self):
"""Lazy-Loading des OCR-Engines."""
if self._ocr_engine is None:
self._ocr_engine = self._init_ocr_engine()
return self._ocr_engine
def _init_ocr_engine(self):
"""Initialisiert PaddleOCR oder Fallback."""
try:
from paddleocr import PaddleOCR
return PaddleOCR(
use_angle_cls=True,
lang='german', # Deutsch
use_gpu=self.use_gpu,
show_log=False
)
except ImportError:
logger.warning("PaddleOCR nicht installiert - verwende Fallback")
return None
def detect_file_type(self, file_path: str = None, file_bytes: bytes = None) -> FileType:
"""
Erkennt den Dateityp.
Args:
file_path: Pfad zur Datei
file_bytes: Dateiinhalt als Bytes
Returns:
FileType enum
"""
if file_path:
ext = Path(file_path).suffix.lower()
if ext == ".pdf":
return FileType.PDF
elif ext in [".jpg", ".jpeg", ".png", ".bmp", ".tiff", ".gif"]:
return FileType.IMAGE
elif ext == ".docx":
return FileType.DOCX
elif ext == ".doc":
return FileType.DOC
elif ext == ".txt":
return FileType.TXT
if file_bytes:
# Magic number detection
if file_bytes[:4] == b'%PDF':
return FileType.PDF
elif file_bytes[:8] == b'\x89PNG\r\n\x1a\n':
return FileType.IMAGE
elif file_bytes[:2] in [b'\xff\xd8', b'BM']: # JPEG, BMP
return FileType.IMAGE
elif file_bytes[:4] == b'PK\x03\x04': # ZIP (DOCX)
return FileType.DOCX
return FileType.UNKNOWN
def process(
self,
file_path: str = None,
file_bytes: bytes = None,
mode: ProcessingMode = ProcessingMode.MIXED
) -> ProcessingResult:
"""
Verarbeitet ein Dokument.
Args:
file_path: Pfad zur Datei
file_bytes: Dateiinhalt als Bytes
mode: Verarbeitungsmodus
Returns:
ProcessingResult mit extrahiertem Text und Metadaten
"""
if not file_path and not file_bytes:
raise ValueError("Entweder file_path oder file_bytes muss angegeben werden")
file_type = self.detect_file_type(file_path, file_bytes)
logger.info(f"Processing file of type: {file_type}")
if file_type == FileType.PDF:
return self._process_pdf(file_path, file_bytes, mode)
elif file_type == FileType.IMAGE:
return self._process_image(file_path, file_bytes, mode)
elif file_type == FileType.DOCX:
return self._process_docx(file_path, file_bytes)
elif file_type == FileType.TXT:
return self._process_txt(file_path, file_bytes)
else:
raise ValueError(f"Nicht unterstützter Dateityp: {file_type}")
def _process_pdf(
self,
file_path: str = None,
file_bytes: bytes = None,
mode: ProcessingMode = ProcessingMode.MIXED
) -> ProcessingResult:
"""Verarbeitet PDF-Dateien."""
try:
import fitz # PyMuPDF
except ImportError:
logger.warning("PyMuPDF nicht installiert - versuche Fallback")
# Fallback: PDF als Bild behandeln
return self._process_image(file_path, file_bytes, mode)
if file_bytes:
doc = fitz.open(stream=file_bytes, filetype="pdf")
else:
doc = fitz.open(file_path)
all_text = []
all_regions = []
total_confidence = 0.0
region_count = 0
for page_num, page in enumerate(doc, start=1):
# Erst versuchen Text direkt zu extrahieren
page_text = page.get_text()
if page_text.strip() and mode != ProcessingMode.OCR_HANDWRITING:
# PDF enthält Text (nicht nur Bilder)
all_text.append(page_text)
all_regions.append(ProcessedRegion(
text=page_text,
confidence=1.0,
bbox=(0, 0, int(page.rect.width), int(page.rect.height)),
page=page_num
))
total_confidence += 1.0
region_count += 1
else:
# Seite als Bild rendern und OCR anwenden
pix = page.get_pixmap(matrix=fitz.Matrix(2, 2)) # 2x Auflösung
img_bytes = pix.tobytes("png")
img = Image.open(io.BytesIO(img_bytes))
ocr_result = self._ocr_image(img)
all_text.append(ocr_result["text"])
for region in ocr_result["regions"]:
region.page = page_num
all_regions.append(region)
total_confidence += region.confidence
region_count += 1
doc.close()
avg_confidence = total_confidence / region_count if region_count > 0 else 0.0
return ProcessingResult(
text="\n\n".join(all_text),
confidence=avg_confidence,
regions=all_regions,
page_count=len(doc) if hasattr(doc, '__len__') else 1,
file_type=FileType.PDF,
processing_mode=mode,
metadata={"source": file_path or "bytes"}
)
def _process_image(
self,
file_path: str = None,
file_bytes: bytes = None,
mode: ProcessingMode = ProcessingMode.MIXED
) -> ProcessingResult:
"""Verarbeitet Bilddateien."""
if file_bytes:
img = Image.open(io.BytesIO(file_bytes))
else:
img = Image.open(file_path)
# Bildvorverarbeitung
processed_img = self._preprocess_image(img)
# OCR
ocr_result = self._ocr_image(processed_img)
return ProcessingResult(
text=ocr_result["text"],
confidence=ocr_result["confidence"],
regions=ocr_result["regions"],
page_count=1,
file_type=FileType.IMAGE,
processing_mode=mode,
metadata={
"source": file_path or "bytes",
"image_size": img.size
}
)
def _process_docx(
self,
file_path: str = None,
file_bytes: bytes = None
) -> ProcessingResult:
"""Verarbeitet DOCX-Dateien."""
try:
from docx import Document
except ImportError:
raise ImportError("python-docx ist nicht installiert")
if file_bytes:
doc = Document(io.BytesIO(file_bytes))
else:
doc = Document(file_path)
paragraphs = []
for para in doc.paragraphs:
if para.text.strip():
paragraphs.append(para.text)
# Auch Tabellen extrahieren
for table in doc.tables:
for row in table.rows:
row_text = " | ".join(cell.text for cell in row.cells)
if row_text.strip():
paragraphs.append(row_text)
text = "\n\n".join(paragraphs)
return ProcessingResult(
text=text,
confidence=1.0, # Direkte Textextraktion
regions=[ProcessedRegion(
text=text,
confidence=1.0,
bbox=(0, 0, 0, 0),
page=1
)],
page_count=1,
file_type=FileType.DOCX,
processing_mode=ProcessingMode.TEXT_EXTRACT,
metadata={"source": file_path or "bytes"}
)
def _process_txt(
self,
file_path: str = None,
file_bytes: bytes = None
) -> ProcessingResult:
"""Verarbeitet Textdateien."""
if file_bytes:
text = file_bytes.decode('utf-8', errors='ignore')
else:
with open(file_path, 'r', encoding='utf-8', errors='ignore') as f:
text = f.read()
return ProcessingResult(
text=text,
confidence=1.0,
regions=[ProcessedRegion(
text=text,
confidence=1.0,
bbox=(0, 0, 0, 0),
page=1
)],
page_count=1,
file_type=FileType.TXT,
processing_mode=ProcessingMode.TEXT_EXTRACT,
metadata={"source": file_path or "bytes"}
)
def _preprocess_image(self, img: Image.Image) -> Image.Image:
"""
Vorverarbeitung des Bildes für bessere OCR-Ergebnisse.
- Konvertierung zu Graustufen
- Kontrastverstärkung
- Rauschunterdrückung
- Binarisierung
"""
# PIL zu OpenCV
cv_img = cv2.cvtColor(np.array(img), cv2.COLOR_RGB2BGR)
# Zu Graustufen konvertieren
gray = cv2.cvtColor(cv_img, cv2.COLOR_BGR2GRAY)
# Rauschunterdrückung
denoised = cv2.fastNlMeansDenoising(gray, None, 10, 7, 21)
# Kontrastverstärkung (CLAHE)
clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8))
enhanced = clahe.apply(denoised)
# Adaptive Binarisierung
binary = cv2.adaptiveThreshold(
enhanced,
255,
cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
cv2.THRESH_BINARY,
11,
2
)
# Zurück zu PIL
return Image.fromarray(binary)
def _ocr_image(self, img: Image.Image) -> Dict[str, Any]:
"""
Führt OCR auf einem Bild aus.
Returns:
Dict mit text, confidence und regions
"""
if self.ocr_engine is None:
# Fallback wenn kein OCR-Engine verfügbar
return {
"text": "[OCR nicht verfügbar - bitte PaddleOCR installieren]",
"confidence": 0.0,
"regions": []
}
# PIL zu numpy array
img_array = np.array(img)
# Wenn Graustufen, zu RGB konvertieren (PaddleOCR erwartet RGB)
if len(img_array.shape) == 2:
img_array = cv2.cvtColor(img_array, cv2.COLOR_GRAY2RGB)
# OCR ausführen
result = self.ocr_engine.ocr(img_array, cls=True)
if not result or not result[0]:
return {"text": "", "confidence": 0.0, "regions": []}
all_text = []
all_regions = []
total_confidence = 0.0
for line in result[0]:
bbox_points = line[0] # [[x1,y1], [x2,y2], [x3,y3], [x4,y4]]
text, confidence = line[1]
# Bounding Box zu x1, y1, x2, y2 konvertieren
x_coords = [p[0] for p in bbox_points]
y_coords = [p[1] for p in bbox_points]
bbox = (
int(min(x_coords)),
int(min(y_coords)),
int(max(x_coords)),
int(max(y_coords))
)
all_text.append(text)
all_regions.append(ProcessedRegion(
text=text,
confidence=confidence,
bbox=bbox
))
total_confidence += confidence
avg_confidence = total_confidence / len(all_regions) if all_regions else 0.0
return {
"text": "\n".join(all_text),
"confidence": avg_confidence,
"regions": all_regions
}
def extract_handwriting_regions(
self,
img: Image.Image,
min_area: int = 500
) -> List[Dict[str, Any]]:
"""
Erkennt und extrahiert handschriftliche Bereiche aus einem Bild.
Nützlich für Klausuren mit gedruckten Fragen und handschriftlichen Antworten.
Args:
img: Eingabebild
min_area: Minimale Fläche für erkannte Regionen
Returns:
Liste von Regionen mit Koordinaten und erkanntem Text
"""
# Bildvorverarbeitung
cv_img = cv2.cvtColor(np.array(img), cv2.COLOR_RGB2BGR)
gray = cv2.cvtColor(cv_img, cv2.COLOR_BGR2GRAY)
# Kanten erkennen
edges = cv2.Canny(gray, 50, 150)
# Morphologische Operationen zum Verbinden
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (15, 5))
dilated = cv2.dilate(edges, kernel, iterations=2)
# Konturen finden
contours, _ = cv2.findContours(dilated, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
regions = []
for contour in contours:
area = cv2.contourArea(contour)
if area < min_area:
continue
x, y, w, h = cv2.boundingRect(contour)
# Region ausschneiden
region_img = img.crop((x, y, x + w, y + h))
# OCR auf Region anwenden
ocr_result = self._ocr_image(region_img)
regions.append({
"bbox": (x, y, x + w, y + h),
"area": area,
"text": ocr_result["text"],
"confidence": ocr_result["confidence"]
})
# Nach Y-Position sortieren (oben nach unten)
regions.sort(key=lambda r: r["bbox"][1])
return regions
# Singleton-Instanz
_file_processor: Optional[FileProcessor] = None
def get_file_processor() -> FileProcessor:
"""Gibt Singleton-Instanz des File Processors zurück."""
global _file_processor
if _file_processor is None:
_file_processor = FileProcessor()
return _file_processor
# Convenience functions
def process_file(
file_path: str = None,
file_bytes: bytes = None,
mode: ProcessingMode = ProcessingMode.MIXED
) -> ProcessingResult:
"""
Convenience function zum Verarbeiten einer Datei.
Args:
file_path: Pfad zur Datei
file_bytes: Dateiinhalt als Bytes
mode: Verarbeitungsmodus
Returns:
ProcessingResult
"""
processor = get_file_processor()
return processor.process(file_path, file_bytes, mode)
def extract_text_from_pdf(file_path: str = None, file_bytes: bytes = None) -> str:
"""Extrahiert Text aus einer PDF-Datei."""
result = process_file(file_path, file_bytes, ProcessingMode.TEXT_EXTRACT)
return result.text
def ocr_image(file_path: str = None, file_bytes: bytes = None) -> str:
"""Führt OCR auf einem Bild aus."""
result = process_file(file_path, file_bytes, ProcessingMode.OCR_PRINTED)
return result.text
def ocr_handwriting(file_path: str = None, file_bytes: bytes = None) -> str:
"""Führt Handschrift-OCR auf einem Bild aus."""
result = process_file(file_path, file_bytes, ProcessingMode.OCR_HANDWRITING)
return result.text

View File

@@ -0,0 +1,916 @@
"""
PDF Service - Zentrale PDF-Generierung für BreakPilot.
Shared Service für:
- Letters (Elternbriefe)
- Zeugnisse (Schulzeugnisse)
- Correction (Korrektur-Übersichten)
Verwendet WeasyPrint für PDF-Rendering und Jinja2 für Templates.
"""
import logging
import os
from datetime import datetime
from pathlib import Path
from typing import Any, Dict, Optional, List
from dataclasses import dataclass
from jinja2 import Environment, FileSystemLoader, select_autoescape
from weasyprint import HTML, CSS
from weasyprint.text.fonts import FontConfiguration
logger = logging.getLogger(__name__)
# Template directory
TEMPLATES_DIR = Path(__file__).parent.parent / "templates" / "pdf"
@dataclass
class SchoolInfo:
"""Schulinformationen für Header."""
name: str
address: str
phone: str
email: str
logo_path: Optional[str] = None
website: Optional[str] = None
principal: Optional[str] = None
@dataclass
class LetterData:
"""Daten für Elternbrief-PDF."""
recipient_name: str
recipient_address: str
student_name: str
student_class: str
subject: str
content: str
date: str
teacher_name: str
teacher_title: Optional[str] = None
school_info: Optional[SchoolInfo] = None
letter_type: str = "general" # general, halbjahr, fehlzeiten, elternabend, lob
tone: str = "professional"
legal_references: Optional[List[Dict[str, str]]] = None
gfk_principles_applied: Optional[List[str]] = None
@dataclass
class CertificateData:
"""Daten für Zeugnis-PDF."""
student_name: str
student_birthdate: str
student_class: str
school_year: str
certificate_type: str # halbjahr, jahres, abschluss
subjects: List[Dict[str, Any]] # [{name, grade, note}]
attendance: Dict[str, int] # {days_absent, days_excused, days_unexcused}
remarks: Optional[str] = None
class_teacher: str = ""
principal: str = ""
school_info: Optional[SchoolInfo] = None
issue_date: str = ""
social_behavior: Optional[str] = None # A, B, C, D
work_behavior: Optional[str] = None # A, B, C, D
@dataclass
class StudentInfo:
"""Schülerinformationen für Korrektur-PDFs."""
student_id: str
name: str
class_name: str
@dataclass
class CorrectionData:
"""Daten für Korrektur-Übersicht PDF."""
student: StudentInfo
exam_title: str
subject: str
date: str
max_points: int
achieved_points: int
grade: str
percentage: float
corrections: List[Dict[str, Any]] # [{question, answer, points, feedback}]
teacher_notes: str = ""
ai_feedback: str = ""
grade_distribution: Optional[Dict[str, int]] = None # {note: anzahl}
class_average: Optional[float] = None
class PDFService:
"""
Zentrale PDF-Generierung für BreakPilot.
Unterstützt:
- Elternbriefe mit GFK-Prinzipien und rechtlichen Referenzen
- Schulzeugnisse (Halbjahr, Jahres, Abschluss)
- Korrektur-Übersichten für Klausuren
"""
def __init__(self, templates_dir: Optional[Path] = None):
"""
Initialisiert den PDF-Service.
Args:
templates_dir: Optionaler Pfad zu Templates (Standard: backend/templates/pdf)
"""
self.templates_dir = templates_dir or TEMPLATES_DIR
# Ensure templates directory exists
self.templates_dir.mkdir(parents=True, exist_ok=True)
# Initialize Jinja2 environment
self.jinja_env = Environment(
loader=FileSystemLoader(str(self.templates_dir)),
autoescape=select_autoescape(['html', 'xml']),
trim_blocks=True,
lstrip_blocks=True
)
# Add custom filters
self.jinja_env.filters['date_format'] = self._date_format
self.jinja_env.filters['grade_color'] = self._grade_color
# Font configuration for WeasyPrint
self.font_config = FontConfiguration()
logger.info(f"PDFService initialized with templates from {self.templates_dir}")
@staticmethod
def _date_format(value: str, format_str: str = "%d.%m.%Y") -> str:
"""Formatiert Datum für deutsche Darstellung."""
if not value:
return ""
try:
dt = datetime.fromisoformat(value.replace("Z", "+00:00"))
return dt.strftime(format_str)
except (ValueError, AttributeError):
return value
@staticmethod
def _grade_color(grade: str) -> str:
"""Gibt Farbe basierend auf Note zurück."""
grade_colors = {
"1": "#27ae60", # Grün
"2": "#2ecc71", # Hellgrün
"3": "#f1c40f", # Gelb
"4": "#e67e22", # Orange
"5": "#e74c3c", # Rot
"6": "#c0392b", # Dunkelrot
"A": "#27ae60",
"B": "#2ecc71",
"C": "#f1c40f",
"D": "#e74c3c",
}
return grade_colors.get(str(grade), "#333333")
def _get_base_css(self) -> str:
"""Gibt Basis-CSS für alle PDFs zurück."""
return """
@page {
size: A4;
margin: 2cm 2.5cm;
@top-right {
content: counter(page) " / " counter(pages);
font-size: 9pt;
color: #666;
}
}
body {
font-family: 'DejaVu Sans', 'Liberation Sans', Arial, sans-serif;
font-size: 11pt;
line-height: 1.5;
color: #333;
}
h1, h2, h3 {
font-weight: bold;
margin-top: 1em;
margin-bottom: 0.5em;
}
h1 { font-size: 16pt; }
h2 { font-size: 14pt; }
h3 { font-size: 12pt; }
.header {
border-bottom: 2px solid #2c3e50;
padding-bottom: 15px;
margin-bottom: 20px;
}
.school-name {
font-size: 18pt;
font-weight: bold;
color: #2c3e50;
}
.school-info {
font-size: 9pt;
color: #666;
}
.letter-date {
text-align: right;
margin-bottom: 20px;
}
.recipient {
margin-bottom: 30px;
}
.subject {
font-weight: bold;
margin-bottom: 20px;
}
.content {
text-align: justify;
margin-bottom: 30px;
}
.signature {
margin-top: 40px;
}
.legal-references {
font-size: 9pt;
color: #666;
border-top: 1px solid #ddd;
margin-top: 30px;
padding-top: 10px;
}
.gfk-badge {
display: inline-block;
background: #e8f5e9;
color: #27ae60;
font-size: 8pt;
padding: 2px 8px;
border-radius: 10px;
margin-right: 5px;
}
/* Zeugnis-Styles */
.certificate-header {
text-align: center;
margin-bottom: 30px;
}
.certificate-title {
font-size: 20pt;
font-weight: bold;
margin-bottom: 10px;
}
.student-info {
margin-bottom: 20px;
padding: 15px;
background: #f9f9f9;
border-radius: 5px;
}
.grades-table {
width: 100%;
border-collapse: collapse;
margin-bottom: 20px;
}
.grades-table th,
.grades-table td {
border: 1px solid #ddd;
padding: 8px 12px;
text-align: left;
}
.grades-table th {
background: #2c3e50;
color: white;
}
.grades-table tr:nth-child(even) {
background: #f9f9f9;
}
.grade-cell {
text-align: center;
font-weight: bold;
font-size: 12pt;
}
.attendance-box {
background: #fff3cd;
padding: 15px;
border-radius: 5px;
margin-bottom: 20px;
}
.signatures-row {
display: flex;
justify-content: space-between;
margin-top: 50px;
}
.signature-block {
text-align: center;
width: 40%;
}
.signature-line {
border-top: 1px solid #333;
margin-top: 40px;
padding-top: 5px;
}
/* Korrektur-Styles */
.exam-header {
background: #2c3e50;
color: white;
padding: 15px;
margin-bottom: 20px;
}
.result-box {
background: #e8f5e9;
padding: 20px;
text-align: center;
margin-bottom: 20px;
border-radius: 5px;
}
.result-grade {
font-size: 36pt;
font-weight: bold;
}
.result-points {
font-size: 14pt;
color: #666;
}
.corrections-list {
margin-bottom: 20px;
}
.correction-item {
border: 1px solid #ddd;
padding: 15px;
margin-bottom: 10px;
border-radius: 5px;
}
.correction-question {
font-weight: bold;
margin-bottom: 5px;
}
.correction-feedback {
background: #fff8e1;
padding: 10px;
margin-top: 10px;
border-left: 3px solid #ffc107;
font-size: 10pt;
}
.stats-table {
width: 100%;
margin-top: 20px;
}
.stats-table td {
padding: 5px 10px;
}
"""
def generate_letter_pdf(self, data: LetterData) -> bytes:
"""
Generiert PDF für Elternbrief.
Args:
data: LetterData mit allen Briefinformationen
Returns:
PDF als bytes
"""
logger.info(f"Generating letter PDF for student: {data.student_name}")
template = self._get_letter_template()
html_content = template.render(
data=data,
generated_at=datetime.now().strftime("%d.%m.%Y %H:%M")
)
css = CSS(string=self._get_base_css(), font_config=self.font_config)
pdf_bytes = HTML(string=html_content).write_pdf(
stylesheets=[css],
font_config=self.font_config
)
logger.info(f"Letter PDF generated: {len(pdf_bytes)} bytes")
return pdf_bytes
def generate_certificate_pdf(self, data: CertificateData) -> bytes:
"""
Generiert PDF für Schulzeugnis.
Args:
data: CertificateData mit allen Zeugnisinformationen
Returns:
PDF als bytes
"""
logger.info(f"Generating certificate PDF for: {data.student_name}")
template = self._get_certificate_template()
html_content = template.render(
data=data,
generated_at=datetime.now().strftime("%d.%m.%Y %H:%M")
)
css = CSS(string=self._get_base_css(), font_config=self.font_config)
pdf_bytes = HTML(string=html_content).write_pdf(
stylesheets=[css],
font_config=self.font_config
)
logger.info(f"Certificate PDF generated: {len(pdf_bytes)} bytes")
return pdf_bytes
def generate_correction_pdf(self, data: CorrectionData) -> bytes:
"""
Generiert PDF für Korrektur-Übersicht.
Args:
data: CorrectionData mit allen Korrekturinformationen
Returns:
PDF als bytes
"""
logger.info(f"Generating correction PDF for: {data.student.name}")
template = self._get_correction_template()
html_content = template.render(
data=data,
generated_at=datetime.now().strftime("%d.%m.%Y %H:%M")
)
css = CSS(string=self._get_base_css(), font_config=self.font_config)
pdf_bytes = HTML(string=html_content).write_pdf(
stylesheets=[css],
font_config=self.font_config
)
logger.info(f"Correction PDF generated: {len(pdf_bytes)} bytes")
return pdf_bytes
def _get_letter_template(self):
"""Gibt Letter-Template zurück (inline falls Datei nicht existiert)."""
template_path = self.templates_dir / "letter.html"
if template_path.exists():
return self.jinja_env.get_template("letter.html")
# Inline-Template als Fallback
return self.jinja_env.from_string(self._get_letter_template_html())
def _get_certificate_template(self):
"""Gibt Certificate-Template zurück."""
template_path = self.templates_dir / "certificate.html"
if template_path.exists():
return self.jinja_env.get_template("certificate.html")
return self.jinja_env.from_string(self._get_certificate_template_html())
def _get_correction_template(self):
"""Gibt Correction-Template zurück."""
template_path = self.templates_dir / "correction.html"
if template_path.exists():
return self.jinja_env.get_template("correction.html")
return self.jinja_env.from_string(self._get_correction_template_html())
@staticmethod
def _get_letter_template_html() -> str:
"""Inline HTML-Template für Elternbriefe."""
return """
<!DOCTYPE html>
<html lang="de">
<head>
<meta charset="UTF-8">
<title>{{ data.subject }}</title>
</head>
<body>
<div class="header">
{% if data.school_info %}
<div class="school-name">{{ data.school_info.name }}</div>
<div class="school-info">
{{ data.school_info.address }}<br>
Tel: {{ data.school_info.phone }} | E-Mail: {{ data.school_info.email }}
{% if data.school_info.website %} | {{ data.school_info.website }}{% endif %}
</div>
{% else %}
<div class="school-name">Schule</div>
{% endif %}
</div>
<div class="letter-date">
{{ data.date }}
</div>
<div class="recipient">
{{ data.recipient_name }}<br>
{{ data.recipient_address | replace('\\n', '<br>') | safe }}
</div>
<div class="subject">
Betreff: {{ data.subject }}
</div>
<div class="meta-info" style="font-size: 10pt; color: #666; margin-bottom: 20px;">
Schüler/in: {{ data.student_name }} | Klasse: {{ data.student_class }}
</div>
<div class="content">
{{ data.content | replace('\\n', '<br>') | safe }}
</div>
{% if data.gfk_principles_applied %}
<div style="margin-bottom: 20px;">
{% for principle in data.gfk_principles_applied %}
<span class="gfk-badge">✓ {{ principle }}</span>
{% endfor %}
</div>
{% endif %}
<div class="signature">
<p>Mit freundlichen Grüßen</p>
<p style="margin-top: 30px;">
{{ data.teacher_name }}
{% if data.teacher_title %}<br><span style="font-size: 10pt;">{{ data.teacher_title }}</span>{% endif %}
</p>
</div>
{% if data.legal_references %}
<div class="legal-references">
<strong>Rechtliche Grundlagen:</strong><br>
{% for ref in data.legal_references %}
{{ ref.law }} {{ ref.paragraph }}: {{ ref.title }}<br>
{% endfor %}
</div>
{% endif %}
<div style="font-size: 8pt; color: #999; margin-top: 30px; text-align: center;">
Erstellt mit BreakPilot | {{ generated_at }}
</div>
</body>
</html>
"""
@staticmethod
def _get_certificate_template_html() -> str:
"""Inline HTML-Template für Zeugnisse."""
return """
<!DOCTYPE html>
<html lang="de">
<head>
<meta charset="UTF-8">
<title>Zeugnis - {{ data.student_name }}</title>
</head>
<body>
<div class="certificate-header">
{% if data.school_info %}
<div class="school-name" style="font-size: 14pt;">{{ data.school_info.name }}</div>
{% endif %}
<div class="certificate-title">
{% if data.certificate_type == 'halbjahr' %}
Halbjahreszeugnis
{% elif data.certificate_type == 'jahres' %}
Jahreszeugnis
{% else %}
Abschlusszeugnis
{% endif %}
</div>
<div>Schuljahr {{ data.school_year }}</div>
</div>
<div class="student-info">
<table style="width: 100%;">
<tr>
<td><strong>Name:</strong> {{ data.student_name }}</td>
<td><strong>Geburtsdatum:</strong> {{ data.student_birthdate }}</td>
</tr>
<tr>
<td><strong>Klasse:</strong> {{ data.student_class }}</td>
<td>&nbsp;</td>
</tr>
</table>
</div>
<h3>Leistungen</h3>
<table class="grades-table">
<thead>
<tr>
<th style="width: 70%;">Fach</th>
<th style="width: 15%;">Note</th>
<th style="width: 15%;">Punkte</th>
</tr>
</thead>
<tbody>
{% for subject in data.subjects %}
<tr>
<td>{{ subject.name }}</td>
<td class="grade-cell" style="color: {{ subject.grade | grade_color }};">
{{ subject.grade }}
</td>
<td class="grade-cell">{{ subject.points | default('-') }}</td>
</tr>
{% endfor %}
</tbody>
</table>
{% if data.social_behavior or data.work_behavior %}
<h3>Verhalten</h3>
<table class="grades-table" style="width: 50%;">
{% if data.social_behavior %}
<tr>
<td>Sozialverhalten</td>
<td class="grade-cell">{{ data.social_behavior }}</td>
</tr>
{% endif %}
{% if data.work_behavior %}
<tr>
<td>Arbeitsverhalten</td>
<td class="grade-cell">{{ data.work_behavior }}</td>
</tr>
{% endif %}
</table>
{% endif %}
<div class="attendance-box">
<strong>Versäumte Tage:</strong> {{ data.attendance.days_absent | default(0) }}
(davon entschuldigt: {{ data.attendance.days_excused | default(0) }},
unentschuldigt: {{ data.attendance.days_unexcused | default(0) }})
</div>
{% if data.remarks %}
<div style="margin-bottom: 20px;">
<strong>Bemerkungen:</strong><br>
{{ data.remarks }}
</div>
{% endif %}
<div style="margin-top: 30px;">
<strong>Ausgestellt am:</strong> {{ data.issue_date }}
</div>
<div class="signatures-row">
<div class="signature-block">
<div class="signature-line">{{ data.class_teacher }}</div>
<div style="font-size: 9pt;">Klassenlehrer/in</div>
</div>
<div class="signature-block">
<div class="signature-line">{{ data.principal }}</div>
<div style="font-size: 9pt;">Schulleiter/in</div>
</div>
</div>
<div style="text-align: center; margin-top: 40px;">
<div style="font-size: 9pt; color: #666;">Siegel der Schule</div>
</div>
</body>
</html>
"""
@staticmethod
def _get_correction_template_html() -> str:
"""Inline HTML-Template für Korrektur-Übersichten."""
return """
<!DOCTYPE html>
<html lang="de">
<head>
<meta charset="UTF-8">
<title>Korrektur - {{ data.exam_title }}</title>
</head>
<body>
<div class="exam-header">
<h1 style="margin: 0; color: white;">{{ data.exam_title }}</h1>
<div>{{ data.subject }} | {{ data.date }}</div>
</div>
<div class="student-info">
<strong>{{ data.student.name }}</strong> | Klasse {{ data.student.class_name }}
</div>
<div class="result-box">
<div class="result-grade" style="color: {{ data.grade | grade_color }};">
Note: {{ data.grade }}
</div>
<div class="result-points">
{{ data.achieved_points }} von {{ data.max_points }} Punkten
({{ data.percentage | round(1) }}%)
</div>
</div>
<h3>Detaillierte Auswertung</h3>
<div class="corrections-list">
{% for item in data.corrections %}
<div class="correction-item">
<div class="correction-question">
{{ item.question }}
</div>
{% if item.answer %}
<div style="margin: 5px 0; font-style: italic; color: #555;">
<strong>Antwort:</strong> {{ item.answer }}
</div>
{% endif %}
<div>
<strong>Punkte:</strong> {{ item.points }}
</div>
{% if item.feedback %}
<div class="correction-feedback">
{{ item.feedback }}
</div>
{% endif %}
</div>
{% endfor %}
</div>
{% if data.teacher_notes %}
<div style="background: #e3f2fd; padding: 15px; border-radius: 5px; margin-bottom: 20px;">
<strong>Lehrerkommentar:</strong><br>
{{ data.teacher_notes }}
</div>
{% endif %}
{% if data.ai_feedback %}
<div style="background: #f3e5f5; padding: 15px; border-radius: 5px; margin-bottom: 20px;">
<strong>KI-Feedback:</strong><br>
{{ data.ai_feedback }}
</div>
{% endif %}
{% if data.class_average or data.grade_distribution %}
<h3>Klassenstatistik</h3>
<table class="stats-table">
{% if data.class_average %}
<tr>
<td><strong>Klassendurchschnitt:</strong></td>
<td>{{ data.class_average }}</td>
</tr>
{% endif %}
{% if data.grade_distribution %}
<tr>
<td><strong>Notenverteilung:</strong></td>
<td>
{% for grade, count in data.grade_distribution.items() %}
Note {{ grade }}: {{ count }}x{% if not loop.last %}, {% endif %}
{% endfor %}
</td>
</tr>
{% endif %}
</table>
{% endif %}
<div class="signature" style="margin-top: 40px;">
<p style="font-size: 9pt; color: #666;">Datum: {{ data.date }}</p>
</div>
<div style="font-size: 8pt; color: #999; margin-top: 30px; text-align: center;">
Erstellt mit BreakPilot | {{ generated_at }}
</div>
</body>
</html>
"""
# Convenience functions for direct usage
_pdf_service: Optional[PDFService] = None
def get_pdf_service() -> PDFService:
"""Gibt Singleton-Instanz des PDF-Service zurück."""
global _pdf_service
if _pdf_service is None:
_pdf_service = PDFService()
return _pdf_service
def generate_letter_pdf(data: Dict[str, Any]) -> bytes:
"""
Convenience function zum Generieren eines Elternbrief-PDFs.
Args:
data: Dict mit allen Briefdaten
Returns:
PDF als bytes
"""
service = get_pdf_service()
# Convert dict to LetterData
school_info = None
if data.get("school_info"):
school_info = SchoolInfo(**data["school_info"])
letter_data = LetterData(
recipient_name=data.get("recipient_name", ""),
recipient_address=data.get("recipient_address", ""),
student_name=data.get("student_name", ""),
student_class=data.get("student_class", ""),
subject=data.get("subject", ""),
content=data.get("content", ""),
date=data.get("date", datetime.now().strftime("%d.%m.%Y")),
teacher_name=data.get("teacher_name", ""),
teacher_title=data.get("teacher_title"),
school_info=school_info,
letter_type=data.get("letter_type", "general"),
tone=data.get("tone", "professional"),
legal_references=data.get("legal_references"),
gfk_principles_applied=data.get("gfk_principles_applied")
)
return service.generate_letter_pdf(letter_data)
def generate_certificate_pdf(data: Dict[str, Any]) -> bytes:
"""
Convenience function zum Generieren eines Zeugnis-PDFs.
Args:
data: Dict mit allen Zeugnisdaten
Returns:
PDF als bytes
"""
service = get_pdf_service()
school_info = None
if data.get("school_info"):
school_info = SchoolInfo(**data["school_info"])
cert_data = CertificateData(
student_name=data.get("student_name", ""),
student_birthdate=data.get("student_birthdate", ""),
student_class=data.get("student_class", ""),
school_year=data.get("school_year", ""),
certificate_type=data.get("certificate_type", "halbjahr"),
subjects=data.get("subjects", []),
attendance=data.get("attendance", {"days_absent": 0, "days_excused": 0, "days_unexcused": 0}),
remarks=data.get("remarks"),
class_teacher=data.get("class_teacher", ""),
principal=data.get("principal", ""),
school_info=school_info,
issue_date=data.get("issue_date", datetime.now().strftime("%d.%m.%Y")),
social_behavior=data.get("social_behavior"),
work_behavior=data.get("work_behavior")
)
return service.generate_certificate_pdf(cert_data)
def generate_correction_pdf(data: Dict[str, Any]) -> bytes:
"""
Convenience function zum Generieren eines Korrektur-PDFs.
Args:
data: Dict mit allen Korrekturdaten
Returns:
PDF als bytes
"""
service = get_pdf_service()
# Create StudentInfo from dict
student = StudentInfo(
student_id=data.get("student_id", "unknown"),
name=data.get("student_name", data.get("name", "")),
class_name=data.get("student_class", data.get("class_name", ""))
)
# Calculate percentage if not provided
max_points = data.get("max_points", data.get("total_points", 0))
achieved_points = data.get("achieved_points", 0)
percentage = data.get("percentage", (achieved_points / max_points * 100) if max_points > 0 else 0.0)
correction_data = CorrectionData(
student=student,
exam_title=data.get("exam_title", ""),
subject=data.get("subject", ""),
date=data.get("date", data.get("exam_date", "")),
max_points=max_points,
achieved_points=achieved_points,
grade=data.get("grade", ""),
percentage=percentage,
corrections=data.get("corrections", []),
teacher_notes=data.get("teacher_notes", data.get("teacher_comment", "")),
ai_feedback=data.get("ai_feedback", ""),
grade_distribution=data.get("grade_distribution"),
class_average=data.get("class_average")
)
return service.generate_correction_pdf(correction_data)