fix: Restore all files lost during destructive rebase

A previous `git pull --rebase origin main` dropped 177 local commits,
losing 3400+ files across admin-v2, backend, studio-v2, website,
klausur-service, and many other services. The partial restore attempt
(660295e2) only recovered some files.

This commit restores all missing files from pre-rebase ref 98933f5e
while preserving post-rebase additions (night-scheduler, night-mode UI,
NightModeWidget dashboard integration).

Restored features include:
- AI Module Sidebar (FAB), OCR Labeling, OCR Compare
- GPU Dashboard, RAG Pipeline, Magic Help
- Klausur-Korrektur (8 files), Abitur-Archiv (5+ files)
- Companion, Zeugnisse-Crawler, Screen Flow
- Full backend, studio-v2, website, klausur-service
- All compliance SDKs, agent-core, voice-service
- CI/CD configs, documentation, scripts

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
Benjamin Admin
2026-02-09 09:51:32 +01:00
parent f7487ee240
commit bfdaf63ba9
2009 changed files with 749983 additions and 1731 deletions

70
backend/.dockerignore Normal file
View File

@@ -0,0 +1,70 @@
# Virtual Environment
venv/
.venv/
env/
.env/
# Python cache
__pycache__/
*.py[cod]
*$py.class
*.so
.Python
# Distribution
dist/
build/
*.egg-info/
.eggs/
# IDE
.idea/
.vscode/
*.swp
*.swo
*~
# Testing
.pytest_cache/
.coverage
htmlcov/
.tox/
# Local config
.env
.env.local
*.local
# Logs
*.log
logs/
# Temp files
*.tmp
*.temp
.DS_Store
# Documentation builds
docs/_build/
# Backup files
*.bak
*.save
*_backup.py
*_before_*.py
# Large files
*.docx
*.zip
*.pdf
# Git
.git/
.gitignore
# Docker
Dockerfile
docker-compose*.yml
# Secrets directory (conflicts with Python stdlib secrets module)
secrets/

135
backend/.env.example Normal file
View File

@@ -0,0 +1,135 @@
# BreakPilot Backend Environment Variables
# Kopiere diese Datei nach .env und passe die Werte an
# =============================================================================
# LLM Gateway Konfiguration
# =============================================================================
# Server-Einstellungen
LLM_GATEWAY_HOST=0.0.0.0
LLM_GATEWAY_PORT=8002
LLM_GATEWAY_DEBUG=false
# API Keys für Gateway-Zugriff (komma-separiert)
# Generiere mit: openssl rand -hex 32
LLM_API_KEYS=your-api-key-1,your-api-key-2
# JWT Secret (gleicher Wert wie Consent Service für SSO)
JWT_SECRET=your-jwt-secret
# Rate Limiting
LLM_RATE_LIMIT_RPM=60
LLM_RATE_LIMIT_TPM=100000
# Logging
LLM_LOG_LEVEL=INFO
LLM_AUDIT_LOGGING=true
# Backend Priorität (Reihenfolge für Fallback)
LLM_BACKEND_PRIORITY=ollama,vllm,anthropic
# =============================================================================
# Ollama Backend (lokal)
# =============================================================================
OLLAMA_BASE_URL=http://localhost:11434
OLLAMA_DEFAULT_MODEL=llama3.1:8b
OLLAMA_TIMEOUT=120
OLLAMA_ENABLED=true
# =============================================================================
# vLLM Backend (remote, z.B. vast.ai)
# =============================================================================
# VLLM_BASE_URL=http://gpu-server:8000
# VLLM_API_KEY=your-vllm-api-key
# VLLM_DEFAULT_MODEL=meta-llama/Meta-Llama-3.1-8B-Instruct
# VLLM_TIMEOUT=120
# VLLM_ENABLED=false
# =============================================================================
# Anthropic Claude API (Fallback)
# =============================================================================
ANTHROPIC_API_KEY=your-anthropic-api-key
ANTHROPIC_DEFAULT_MODEL=claude-3-5-sonnet-20241022
ANTHROPIC_TIMEOUT=120
ANTHROPIC_ENABLED=true
# =============================================================================
# Tool Gateway (Web Search mit PII-Schutz)
# =============================================================================
# Tavily API für Web-Suche
TAVILY_API_KEY=your-tavily-api-key
TAVILY_BASE_URL=https://api.tavily.com
TAVILY_TIMEOUT=30
TAVILY_MAX_RESULTS=5
TAVILY_SEARCH_DEPTH=basic
TAVILY_INCLUDE_ANSWER=true
TAVILY_INCLUDE_IMAGES=false
# PII Redaktion (automatische Entfernung personenbezogener Daten)
PII_REDACTION_ENABLED=true
# =============================================================================
# Alerts Agent (Google Alerts Relevanz-Filterung)
# =============================================================================
# Agent aktivieren
ALERTS_AGENT_ENABLED=false
# LLM-basiertes Scoring aktivieren (sonst Keyword-basiert)
ALERTS_USE_LLM=false
# LLM Gateway URL für Scoring (Standard: lokaler Server)
LLM_GATEWAY_URL=http://localhost:8000/llm
# Google Alerts RSS Feed URLs (komma-separiert)
# Erstelle Feeds unter: https://www.google.com/alerts
# Wähle "Zustellung an: RSS-Feed"
# ALERTS_RSS_FEEDS=https://google.com/alerts/feeds/.../...,https://google.com/alerts/feeds/.../...
# Feed Labels (komma-separiert, gleiche Reihenfolge wie FEEDS)
# ALERTS_RSS_LABELS=Inklusion Bayern,Datenschutz Schule,Schulrecht
# Fetch-Intervall in Minuten
ALERTS_FETCH_INTERVAL=60
# =============================================================================
# vast.ai GPU Infrastructure
# =============================================================================
# vast.ai API Key (von https://cloud.vast.ai/cli/)
# Wenn gesetzt, werden GPU-Kontrollen im Admin Panel aktiviert
# VAST_API_KEY=your-vast-api-key
# Instance ID (numerische ID deiner vast.ai Instanz)
# VAST_INSTANCE_ID=123456
# Control API Key (Admin-Schutz für Start/Stop Endpoints)
# Generiere mit: openssl rand -hex 32
# CONTROL_API_KEY=your-control-api-key
# Health Check Konfiguration
VAST_HEALTH_PORT=8001
VAST_HEALTH_PATH=/health
VAST_WAIT_TIMEOUT_S=600
# Auto-Shutdown (stoppt Instanz bei Inaktivität)
VAST_AUTO_SHUTDOWN=true
VAST_AUTO_SHUTDOWN_MINUTES=30
# State Persistence (Pfade für Status und Audit Log)
# VAST_STATE_PATH=./vast_state.json
# VAST_AUDIT_PATH=./vast_audit.log
# =============================================================================
# Bestehendes Backend
# =============================================================================
# Consent Service URL
CONSENT_SERVICE_URL=http://localhost:8081
# OpenAI API Key (für andere Features wie ai_processor.py)
OPENAI_API_KEY=your-openai-api-key

BIN
backend/BPAI Brandbook.docx Executable file

Binary file not shown.

63
backend/Dockerfile Normal file
View File

@@ -0,0 +1,63 @@
# Build stage
FROM python:3.12-slim-bookworm AS builder
WORKDIR /app
# Install build dependencies
RUN apt-get update && apt-get install -y --no-install-recommends \
build-essential \
libpq-dev \
&& rm -rf /var/lib/apt/lists/*
# Copy requirements first for better caching
COPY requirements.txt .
# Create virtual environment and install dependencies
RUN python -m venv /opt/venv
ENV PATH="/opt/venv/bin:$PATH"
RUN pip install --no-cache-dir --upgrade pip && \
pip install --no-cache-dir -r requirements.txt
# Runtime stage
FROM python:3.12-slim-bookworm
WORKDIR /app
# Install runtime dependencies for WeasyPrint and OpenCV
RUN apt-get update && apt-get install -y --no-install-recommends \
libpango-1.0-0 \
libpangocairo-1.0-0 \
libgdk-pixbuf-2.0-0 \
libffi-dev \
shared-mime-info \
libgl1 \
libglib2.0-0 \
curl \
&& rm -rf /var/lib/apt/lists/*
# Copy virtual environment from builder
COPY --from=builder /opt/venv /opt/venv
ENV PATH="/opt/venv/bin:$PATH"
# Create non-root user
RUN useradd --create-home --shell /bin/bash appuser
# Copy application code
COPY --chown=appuser:appuser . .
# Switch to non-root user
USER appuser
# Environment variables
ENV PYTHONUNBUFFERED=1
ENV PYTHONDONTWRITEBYTECODE=1
# Expose port
EXPOSE 8000
# Health check
HEALTHCHECK --interval=30s --timeout=3s --start-period=5s --retries=3 \
CMD curl -f http://localhost:8000/api/consent/health || exit 1
# Run the application
CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "8000"]

49
backend/Dockerfile.worker Normal file
View File

@@ -0,0 +1,49 @@
# BreakPilot Transcription Worker
# GPU-optimized container for faster-whisper + pyannote.audio
#
# Build: docker build -f Dockerfile.worker -t breakpilot-transcription-worker .
# Run: docker run --gpus all breakpilot-transcription-worker
FROM python:3.11-slim
# Set environment variables
ENV PYTHONDONTWRITEBYTECODE=1 \
PYTHONUNBUFFERED=1 \
PIP_NO_CACHE_DIR=1 \
PIP_DISABLE_PIP_VERSION_CHECK=1
# Install system dependencies
RUN apt-get update && apt-get install -y --no-install-recommends \
ffmpeg \
libsndfile1 \
git \
curl \
&& rm -rf /var/lib/apt/lists/*
# Create app user (non-root)
RUN useradd --create-home --shell /bin/bash worker
WORKDIR /app
# Install Python dependencies for transcription
COPY requirements-worker.txt .
RUN pip install --no-cache-dir -r requirements-worker.txt
# Copy worker package
COPY transcription_worker/ ./transcription_worker/
# Change ownership
RUN chown -R worker:worker /app
# Switch to non-root user
USER worker
# Create cache directories
RUN mkdir -p /home/worker/.cache/huggingface \
&& mkdir -p /tmp/transcriptions
# Health check
HEALTHCHECK --interval=30s --timeout=10s --start-period=60s --retries=3 \
CMD python -c "from transcription_worker.transcriber import WhisperTranscriber; print('OK')" || exit 1
# Entry point - RQ worker
CMD ["python", "-m", "transcription_worker.worker"]

View File

@@ -0,0 +1,202 @@
# Klausurkorrektur - Privacy-by-Design Implementation Plan
## Übersicht
Implementierung eines DSGVO-konformen Klausurkorrektur-Moduls mit QR-Code-basierter Pseudonymisierung. Keine personenbezogenen Daten werden an das LLM gesendet.
## Architektur-Prinzipien
1. **Pseudonymisierung (Art. 4 Nr. 5 DSGVO)**: `doc_token` (128-bit random) als einziger Join-Key
2. **Teacher Namespace Isolation**: Jeder Lehrer hat isolierten Workspace
3. **Privacy by Architecture**: Backend kann Schüler nicht identifizieren
4. **Self-hosted LLM**: SysEleven, kein OpenAI/externe Datenübertragung
---
## Phase 1: Backend-Infrastruktur
### 1.1 Datenmodelle erstellen
**Datei**: `/backend/klausur/db_models.py` (NEU)
```python
# Modelle:
- ExamSession: teacher_id, session_id, created_at, status
- PseudonymizedDocument: doc_token (UUID), session_id, ocr_text, ai_feedback, created_at
- TeacherVault: teacher_id, encrypted_identity_map (lokal verschlüsselt)
```
### 1.2 Repository-Klasse
**Datei**: `/backend/klausur/repository.py` (NEU)
- CRUD für ExamSession, PseudonymizedDocument
- Teacher-Isolation durch `teacher_id` Filter
- Keine Speicherung von Klarnamen im Backend
### 1.3 Pseudonymisierung-Service
**Datei**: `/backend/klausur/services/pseudonymizer.py` (NEU)
```python
class PseudonymizationService:
def generate_doc_token() -> str # 128-bit UUID
def generate_qr_overlay(doc_token: str) -> bytes # QR-Code PNG
def redact_header(image: bytes) -> bytes # Entfernt Kopfzeile mit Namen
def extract_doc_token_from_qr(image: bytes) -> str # QR-Code lesen
```
### 1.4 OCR-Integration erweitern
**Datei**: `/backend/services/file_processor.py` (ERWEITERN)
- Neue Methode: `process_exam_page(image, redact_header=True)`
- Integration mit PaddleOCR für Handschrift
- Header-Redaction vor OCR
### 1.5 LLM-Korrektur-Service
**Datei**: `/backend/klausur/services/correction_service.py` (NEU)
```python
class ExamCorrectionService:
def correct_answer(doc_token: str, question: str, student_answer: str, rubric: str) -> CorrectionResult
def batch_correct(session_id: str) -> List[CorrectionResult]
```
- Nutzt bestehenden LLM-Gateway (Ollama/vLLM)
- Nur pseudonymisierte Texte werden gesendet
- `doc_token` als einzige Referenz
---
## Phase 2: API-Endpunkte
### 2.1 Klausur-API
**Datei**: `/backend/klausur/routes.py` (NEU)
```
POST /api/klausur/session # Neue Korrektur-Session erstellen
GET /api/klausur/session/{session_id} # Session-Status abrufen
POST /api/klausur/session/{session_id}/upload # Gescannte Seiten hochladen
GET /api/klausur/session/{session_id}/qr-sheet # QR-Overlay-Bogen generieren
POST /api/klausur/session/{session_id}/process # OCR + KI-Korrektur starten
GET /api/klausur/session/{session_id}/results # Pseudonymisierte Ergebnisse
DELETE /api/klausur/session/{session_id} # Session löschen
```
### 2.2 Teacher Vault API (Client-seitig)
**Datei**: `/backend/klausur/routes.py`
```
POST /api/klausur/vault/encrypt # Identity-Map clientseitig verschlüsseln
POST /api/klausur/vault/decrypt # Identity-Map clientseitig entschlüsseln
```
- Server speichert nur verschlüsselte Blobs
- Schlüssel bleibt beim Lehrer (Browser/LocalStorage)
---
## Phase 3: PWA Frontend-Modul
### 3.1 Klausurkorrektur-Modul ersetzen
**Datei**: `/backend/frontend/modules/klausur_korrektur.py` (ÜBERSCHREIBEN)
**Tabs im Modul**:
1. **Neue Session**: QR-Bogen generieren, Schülerliste eingeben
2. **Upload**: Gescannte Klausuren hochladen (Drag & Drop)
3. **Verarbeitung**: OCR + KI-Korrektur Status
4. **Ergebnisse**: Pseudonymisierte Bewertungen → Re-Join mit lokaler Identity-Map
5. **Export**: Excel/PDF Export der Ergebnisse
### 3.2 Identity-Map (Teacher Vault)
**Client-seitig in JavaScript**:
```javascript
class TeacherVault {
constructor(teacherId)
generateIdentityMap(studentNames) // Erstellt doc_token → Name Mapping
encrypt(password) // AES-256 Verschlüsselung im Browser
decrypt(password) // Entschlüsselung im Browser
rejoinResults(pseudonymizedResults) // Verknüpft Ergebnisse mit Namen
exportToLocalStorage()
importFromLocalStorage()
}
```
### 3.3 QR-Code Workflow
**UI-Flow**:
1. Lehrer gibt Schülerliste ein (Namen)
2. System generiert für jeden Schüler einen `doc_token`
3. Identity-Map wird lokal im Browser verschlüsselt gespeichert
4. QR-Bogen wird zum Ausdrucken generiert (jeder QR = ein doc_token)
5. Schüler kleben QR auf ihre Klausur
6. Nach Scan: QR wird erkannt, Header redacted, nur Text an LLM
---
## Phase 4: Admin-Panel Integration
### 4.1 GPU/LLM-Tab erweitern
**Datei**: `/backend/frontend/components/admin_gpu.py` (ERWEITERN)
Neue Abschnitte:
- **Klausur-Queue**: Warteschlange der Korrektur-Jobs
- **LLM-Status**: Verbindung zu SysEleven vLLM
- **OCR-Status**: PaddleOCR Verfügbarkeit
- **Session-Übersicht**: Aktive Korrektur-Sessions (nur Anzahl, keine Namen)
---
## Phase 5: Sicherheits-Features
### 5.1 Zero-Knowledge Design
- Backend speichert niemals Klarnamen
- Identity-Map nur verschlüsselt (Client-Key)
- doc_token ist kryptografisch zufällig (UUID4)
- Keine Korrelation zwischen Sessions möglich
### 5.2 Daten-Retention
- Pseudonymisierte Daten: 30 Tage, dann auto-delete
- Verschlüsselte Vault-Daten: Lehrer kann jederzeit löschen
- Audit-Log ohne PII
### 5.3 Teacher-Isolation
- Alle Queries gefiltert nach `teacher_id`
- Kein Cross-Teacher-Zugriff möglich
- Row-Level Security in PostgreSQL
---
## Implementierungs-Reihenfolge
1. **Backend-Modelle** (`/backend/klausur/db_models.py`)
2. **Repository** (`/backend/klausur/repository.py`)
3. **Pseudonymizer-Service** (`/backend/klausur/services/pseudonymizer.py`)
4. **Correction-Service** (`/backend/klausur/services/correction_service.py`)
5. **API-Routes** (`/backend/klausur/routes.py`)
6. **PWA-Modul** (`/backend/frontend/modules/klausur_korrektur.py`)
7. **TeacherVault JS** (eingebettet in Modul)
8. **Admin-Panel** (`/backend/frontend/components/admin_gpu.py`)
9. **Tests** (`/backend/klausur/tests/`)
10. **Migration** (Alembic für neue Tabellen)
---
## Technologie-Stack
| Komponente | Technologie |
|------------|-------------|
| QR-Code | `qrcode` + `Pillow` |
| OCR | PaddleOCR (bereits vorhanden) |
| Verschlüsselung (Client) | Web Crypto API (AES-256-GCM) |
| Verschlüsselung (Server) | `cryptography` (Fernet) |
| LLM | vLLM @ SysEleven (via llm_gateway) |
| DB | PostgreSQL + SQLAlchemy |
---
## Datenschutz-Garantien
✅ Keine Schülernamen im Backend gespeichert
✅ Keine Schülernamen an LLM gesendet
✅ Identity-Map nur verschlüsselt (Client-Schlüssel)
✅ Teacher-Namespace-Isolation
✅ Automatische Daten-Löschung nach 30 Tagen
✅ Vergleichbar mit lokaler PC-Verarbeitung (Art. 4 Nr. 5 DSGVO)

View File

@@ -0,0 +1,47 @@
# BreakPilot Projektstruktur (Stand: lokal, Dezember 2025)
## Root-Verzeichnis (~/Projekte/arbeitsblatt-app/backend)
- `main.py`
FastAPI-Backend mit den API-Routen (Upload, Analyse, Pipeline etc.).
Dieses Backend wird unter `/api` in die Frontend-App eingehängt.
- `ai_processor.py`
Logik für die Kommunikation mit der KI (OpenAI), Verarbeitung der Arbeitsblätter usw.
- `learning_units.py`
Interne Datenstrukturen / Logik für Lernbausteine.
- `learning_units_api.py`
(Optional) API-Endpunkte rund um Lernbausteine; kann später ebenfalls als Router eingebunden werden.
- `original_service.py`
Historische/ursprüngliche Service-Logik; dient als Referenz und Backup.
- `frontend/`
Paket mit allen Frontend-Routen und der HTML-Oberfläche (siehe unten).
- `tools/`
Hilfsskripte, z. B. zum Extrahieren der alten /app-HTML in das neue Frontend-Modul.
- `venv/`
Virtuelle Python-Umgebung für dieses Projekt.
---
## Ordner `frontend/`
- `__init__.py`
Markiert den Ordner als Python-Paket.
- `app.py`
Zentrale FastAPI-App für **Frontend + Backend-Kombination**.
- Mountet das Backend aus `main.py` unter `/api`
- Include der Frontend-Router (`home`, `preview`, `studio`)
Startbefehl für die gesamte App:
```bash
cd ~/Projekte/arbeitsblatt-app/backend
source venv/bin/activate
uvicorn frontend.app:app --reload

956
backend/abitur_docs_api.py Normal file
View File

@@ -0,0 +1,956 @@
"""
Abitur Document Store API - Verwaltung von Abitur-Aufgaben und Erwartungshorizonten.
Unterstützt:
- Bundesland-spezifische Dokumente
- Fach, Jahr, Niveau (eA/gA), Aufgabennummer
- KI-basierte Dokumentenerkennung
- RAG-Integration mit Vector Store
Dateinamen-Schema (NiBiS Niedersachsen):
- 2025_Deutsch_eA_I.pdf - Aufgabe
- 2025_Deutsch_eA_I_EWH.pdf - Erwartungshorizont
"""
import logging
import uuid
import os
import re
import zipfile
import tempfile
from datetime import datetime
from typing import List, Dict, Any, Optional
from enum import Enum
from pathlib import Path
from dataclasses import dataclass
from fastapi import APIRouter, HTTPException, UploadFile, File, Form, BackgroundTasks
from fastapi.responses import FileResponse
from pydantic import BaseModel, Field
logger = logging.getLogger(__name__)
router = APIRouter(
prefix="/abitur-docs",
tags=["abitur-docs"],
)
# Storage directory
DOCS_DIR = Path("/tmp/abitur-docs")
DOCS_DIR.mkdir(parents=True, exist_ok=True)
# ============================================================================
# Enums
# ============================================================================
class Bundesland(str, Enum):
"""Bundesländer mit Zentralabitur."""
NIEDERSACHSEN = "niedersachsen"
BAYERN = "bayern"
BADEN_WUERTTEMBERG = "baden_wuerttemberg"
NORDRHEIN_WESTFALEN = "nordrhein_westfalen"
HESSEN = "hessen"
SACHSEN = "sachsen"
THUERINGEN = "thueringen"
BERLIN = "berlin"
HAMBURG = "hamburg"
SCHLESWIG_HOLSTEIN = "schleswig_holstein"
BREMEN = "bremen"
BRANDENBURG = "brandenburg"
MECKLENBURG_VORPOMMERN = "mecklenburg_vorpommern"
SACHSEN_ANHALT = "sachsen_anhalt"
RHEINLAND_PFALZ = "rheinland_pfalz"
SAARLAND = "saarland"
class Fach(str, Enum):
"""Abiturfächer."""
DEUTSCH = "deutsch"
ENGLISCH = "englisch"
MATHEMATIK = "mathematik"
BIOLOGIE = "biologie"
CHEMIE = "chemie"
PHYSIK = "physik"
GESCHICHTE = "geschichte"
ERDKUNDE = "erdkunde"
POLITIK_WIRTSCHAFT = "politik_wirtschaft"
FRANZOESISCH = "franzoesisch"
SPANISCH = "spanisch"
LATEIN = "latein"
GRIECHISCH = "griechisch"
KUNST = "kunst"
MUSIK = "musik"
SPORT = "sport"
INFORMATIK = "informatik"
EV_RELIGION = "ev_religion"
KATH_RELIGION = "kath_religion"
WERTE_NORMEN = "werte_normen"
BRC = "brc" # Betriebswirtschaft mit Rechnungswesen
BVW = "bvw" # Volkswirtschaft
ERNAEHRUNG = "ernaehrung"
MECHATRONIK = "mechatronik"
GESUNDHEIT_PFLEGE = "gesundheit_pflege"
PAEDAGOGIK_PSYCHOLOGIE = "paedagogik_psychologie"
class Niveau(str, Enum):
"""Anforderungsniveau."""
EA = "eA" # Erhöhtes Anforderungsniveau (Leistungskurs)
GA = "gA" # Grundlegendes Anforderungsniveau (Grundkurs)
class DokumentTyp(str, Enum):
"""Dokumenttyp."""
AUFGABE = "aufgabe"
ERWARTUNGSHORIZONT = "erwartungshorizont"
DECKBLATT = "deckblatt"
MATERIAL = "material"
HOERVERSTEHEN = "hoerverstehen" # Für Sprachen
SPRACHMITTLUNG = "sprachmittlung" # Für Sprachen
BEWERTUNGSBOGEN = "bewertungsbogen"
class VerarbeitungsStatus(str, Enum):
"""Status der Dokumentenverarbeitung."""
PENDING = "pending"
PROCESSING = "processing"
RECOGNIZED = "recognized" # KI hat Metadaten erkannt
CONFIRMED = "confirmed" # Entwickler hat bestätigt
INDEXED = "indexed" # Im Vector Store
ERROR = "error"
# ============================================================================
# Fach-Mapping für Dateinamen
# ============================================================================
FACH_NAME_MAPPING = {
"deutsch": Fach.DEUTSCH,
"englisch": Fach.ENGLISCH,
"mathe": Fach.MATHEMATIK,
"mathematik": Fach.MATHEMATIK,
"biologie": Fach.BIOLOGIE,
"bio": Fach.BIOLOGIE,
"chemie": Fach.CHEMIE,
"physik": Fach.PHYSIK,
"geschichte": Fach.GESCHICHTE,
"erdkunde": Fach.ERDKUNDE,
"geographie": Fach.ERDKUNDE,
"politikwirtschaft": Fach.POLITIK_WIRTSCHAFT,
"politik": Fach.POLITIK_WIRTSCHAFT,
"franzoesisch": Fach.FRANZOESISCH,
"franz": Fach.FRANZOESISCH,
"spanisch": Fach.SPANISCH,
"latein": Fach.LATEIN,
"griechisch": Fach.GRIECHISCH,
"kunst": Fach.KUNST,
"musik": Fach.MUSIK,
"sport": Fach.SPORT,
"informatik": Fach.INFORMATIK,
"evreligion": Fach.EV_RELIGION,
"kathreligion": Fach.KATH_RELIGION,
"wertenormen": Fach.WERTE_NORMEN,
"brc": Fach.BRC,
"bvw": Fach.BVW,
"ernaehrung": Fach.ERNAEHRUNG,
"mecha": Fach.MECHATRONIK,
"mechatronik": Fach.MECHATRONIK,
"technikmecha": Fach.MECHATRONIK,
"gespfl": Fach.GESUNDHEIT_PFLEGE,
"paedpsych": Fach.PAEDAGOGIK_PSYCHOLOGIE,
}
# ============================================================================
# Pydantic Models
# ============================================================================
class DokumentCreate(BaseModel):
"""Manuelles Erstellen eines Dokuments."""
bundesland: Bundesland
fach: Fach
jahr: int = Field(ge=2000, le=2100)
niveau: Niveau
typ: DokumentTyp
aufgaben_nummer: Optional[str] = None # I, II, III, 1, 2, etc.
class DokumentUpdate(BaseModel):
"""Update für erkannte Metadaten."""
bundesland: Optional[Bundesland] = None
fach: Optional[Fach] = None
jahr: Optional[int] = None
niveau: Optional[Niveau] = None
typ: Optional[DokumentTyp] = None
aufgaben_nummer: Optional[str] = None
status: Optional[VerarbeitungsStatus] = None
class DokumentResponse(BaseModel):
"""Response für ein Dokument."""
id: str
dateiname: str
original_dateiname: str
bundesland: Bundesland
fach: Fach
jahr: int
niveau: Niveau
typ: DokumentTyp
aufgaben_nummer: Optional[str]
status: VerarbeitungsStatus
confidence: float # Erkennungs-Confidence
file_path: str
file_size: int
indexed: bool
vector_ids: List[str]
created_at: datetime
updated_at: datetime
class ImportResult(BaseModel):
"""Ergebnis eines ZIP-Imports."""
total_files: int
recognized: int
errors: int
documents: List[DokumentResponse]
class RecognitionResult(BaseModel):
"""Ergebnis der Dokumentenerkennung."""
success: bool
bundesland: Optional[Bundesland]
fach: Optional[Fach]
jahr: Optional[int]
niveau: Optional[Niveau]
typ: Optional[DokumentTyp]
aufgaben_nummer: Optional[str]
confidence: float
raw_filename: str
suggestions: List[Dict[str, Any]]
@property
def extracted(self) -> Dict[str, Any]:
"""Backwards-compatible property returning extracted values as dict."""
result = {}
if self.bundesland:
result["bundesland"] = self.bundesland.value
if self.fach:
result["fach"] = self.fach.value
if self.jahr:
result["jahr"] = self.jahr
if self.niveau:
result["niveau"] = self.niveau.value
if self.typ:
result["typ"] = self.typ.value
if self.aufgaben_nummer:
result["aufgaben_nummer"] = self.aufgaben_nummer
return result
@property
def method(self) -> str:
"""Backwards-compatible property for recognition method."""
return "filename_pattern"
# ============================================================================
# Internal Data Classes
# ============================================================================
@dataclass
class AbiturDokument:
"""Internes Dokument."""
id: str
dateiname: str
original_dateiname: str
bundesland: Bundesland
fach: Fach
jahr: int
niveau: Niveau
typ: DokumentTyp
aufgaben_nummer: Optional[str]
status: VerarbeitungsStatus
confidence: float
file_path: str
file_size: int
indexed: bool
vector_ids: List[str]
created_at: datetime
updated_at: datetime
# ============================================================================
# In-Memory Storage
# ============================================================================
_dokumente: Dict[str, AbiturDokument] = {}
# ============================================================================
# Helper Functions - Dokumentenerkennung
# ============================================================================
def parse_nibis_filename(filename: str) -> RecognitionResult:
"""
Erkennt Metadaten aus NiBiS-Dateinamen.
Beispiele:
- 2025_Deutsch_eA_I.pdf
- 2025_Deutsch_eA_I_EWH.pdf
- 2025_Biologie_gA_1.pdf
- 2025_Englisch_eA_HV.pdf (Hörverstehen)
"""
result = RecognitionResult(
success=False,
bundesland=Bundesland.NIEDERSACHSEN, # NiBiS = Niedersachsen
fach=None,
jahr=None,
niveau=None,
typ=None,
aufgaben_nummer=None,
confidence=0.0,
raw_filename=filename,
suggestions=[]
)
# Bereinige Dateiname
name = Path(filename).stem.lower()
# Extrahiere Jahr (4 Ziffern am Anfang)
jahr_match = re.match(r'^(\d{4})', name)
if jahr_match:
result.jahr = int(jahr_match.group(1))
result.confidence += 0.2
# Extrahiere Fach
for fach_key, fach_enum in FACH_NAME_MAPPING.items():
if fach_key in name.replace("_", "").replace("-", ""):
result.fach = fach_enum
result.confidence += 0.3
break
# Extrahiere Niveau (eA/gA)
if "_ea" in name or "_ea_" in name or "ea_" in name:
result.niveau = Niveau.EA
result.confidence += 0.2
elif "_ga" in name or "_ga_" in name or "ga_" in name:
result.niveau = Niveau.GA
result.confidence += 0.2
# Extrahiere Typ
if "_ewh" in name:
result.typ = DokumentTyp.ERWARTUNGSHORIZONT
result.confidence += 0.2
elif "_hv" in name or "hoerverstehen" in name:
result.typ = DokumentTyp.HOERVERSTEHEN
result.confidence += 0.15
elif "_sm" in name or "_me" in name or "sprachmittlung" in name:
result.typ = DokumentTyp.SPRACHMITTLUNG
result.confidence += 0.15
elif "deckblatt" in name:
result.typ = DokumentTyp.DECKBLATT
result.confidence += 0.15
elif "material" in name:
result.typ = DokumentTyp.MATERIAL
result.confidence += 0.15
elif "bewertung" in name:
result.typ = DokumentTyp.BEWERTUNGSBOGEN
result.confidence += 0.15
else:
result.typ = DokumentTyp.AUFGABE
result.confidence += 0.1
# Extrahiere Aufgabennummer (römisch oder arabisch)
aufgabe_match = re.search(r'_([ivx]+|[1-4][abc]?)(?:_|\.pdf|$)', name, re.IGNORECASE)
if aufgabe_match:
result.aufgaben_nummer = aufgabe_match.group(1).upper()
result.confidence += 0.1
# Erfolg wenn mindestens Fach und Jahr erkannt
if result.fach and result.jahr:
result.success = True
# Normalisiere Confidence auf max 1.0
result.confidence = min(result.confidence, 1.0)
return result
def _to_dokument_response(doc: AbiturDokument) -> DokumentResponse:
"""Konvertiert internes Dokument zu Response."""
return DokumentResponse(
id=doc.id,
dateiname=doc.dateiname,
original_dateiname=doc.original_dateiname,
bundesland=doc.bundesland,
fach=doc.fach,
jahr=doc.jahr,
niveau=doc.niveau,
typ=doc.typ,
aufgaben_nummer=doc.aufgaben_nummer,
status=doc.status,
confidence=doc.confidence,
file_path=doc.file_path,
file_size=doc.file_size,
indexed=doc.indexed,
vector_ids=doc.vector_ids,
created_at=doc.created_at,
updated_at=doc.updated_at
)
# ============================================================================
# API Endpoints - Dokumente
# ============================================================================
@router.post("/upload", response_model=DokumentResponse)
async def upload_dokument(
file: UploadFile = File(...),
bundesland: Optional[Bundesland] = Form(None),
fach: Optional[Fach] = Form(None),
jahr: Optional[int] = Form(None),
niveau: Optional[Niveau] = Form(None),
typ: Optional[DokumentTyp] = Form(None),
aufgaben_nummer: Optional[str] = Form(None)
):
"""
Lädt ein einzelnes Dokument hoch.
Metadaten können manuell angegeben oder automatisch erkannt werden.
"""
if not file.filename:
raise HTTPException(status_code=400, detail="Kein Dateiname")
# Erkenne Metadaten aus Dateiname
recognition = parse_nibis_filename(file.filename)
# Überschreibe mit manuellen Angaben
final_bundesland = bundesland or recognition.bundesland or Bundesland.NIEDERSACHSEN
final_fach = fach or recognition.fach
final_jahr = jahr or recognition.jahr or datetime.now().year
final_niveau = niveau or recognition.niveau or Niveau.EA
final_typ = typ or recognition.typ or DokumentTyp.AUFGABE
final_aufgabe = aufgaben_nummer or recognition.aufgaben_nummer
if not final_fach:
raise HTTPException(status_code=400, detail="Fach konnte nicht erkannt werden")
# Generiere ID und speichere Datei
doc_id = str(uuid.uuid4())
file_ext = Path(file.filename).suffix
safe_filename = f"{doc_id}{file_ext}"
file_path = DOCS_DIR / safe_filename
content = await file.read()
with open(file_path, "wb") as f:
f.write(content)
now = datetime.utcnow()
dokument = AbiturDokument(
id=doc_id,
dateiname=safe_filename,
original_dateiname=file.filename,
bundesland=final_bundesland,
fach=final_fach,
jahr=final_jahr,
niveau=final_niveau,
typ=final_typ,
aufgaben_nummer=final_aufgabe,
status=VerarbeitungsStatus.RECOGNIZED if recognition.success else VerarbeitungsStatus.PENDING,
confidence=recognition.confidence,
file_path=str(file_path),
file_size=len(content),
indexed=False,
vector_ids=[],
created_at=now,
updated_at=now
)
_dokumente[doc_id] = dokument
logger.info(f"Uploaded document {doc_id}: {file.filename}")
return _to_dokument_response(dokument)
@router.post("/import-zip", response_model=ImportResult)
async def import_zip(
file: UploadFile = File(...),
bundesland: Bundesland = Form(Bundesland.NIEDERSACHSEN),
background_tasks: BackgroundTasks = None
):
"""
Importiert alle PDFs aus einer ZIP-Datei.
Erkennt automatisch Metadaten aus Dateinamen.
"""
if not file.filename or not file.filename.endswith(".zip"):
raise HTTPException(status_code=400, detail="ZIP-Datei erforderlich")
# Speichere ZIP temporär
with tempfile.NamedTemporaryFile(delete=False, suffix=".zip") as tmp:
content = await file.read()
tmp.write(content)
tmp_path = tmp.name
documents = []
total = 0
recognized = 0
errors = 0
try:
with zipfile.ZipFile(tmp_path, 'r') as zip_ref:
for zip_info in zip_ref.infolist():
# Nur PDFs
if not zip_info.filename.lower().endswith(".pdf"):
continue
# Ignoriere Mac-spezifische Dateien
if "__MACOSX" in zip_info.filename or zip_info.filename.startswith("."):
continue
# Ignoriere Thumbs.db
if "thumbs.db" in zip_info.filename.lower():
continue
total += 1
try:
# Erkenne Metadaten
basename = Path(zip_info.filename).name
recognition = parse_nibis_filename(basename)
if not recognition.fach:
errors += 1
logger.warning(f"Konnte Fach nicht erkennen: {basename}")
continue
# Extrahiere und speichere
doc_id = str(uuid.uuid4())
file_ext = Path(basename).suffix
safe_filename = f"{doc_id}{file_ext}"
file_path = DOCS_DIR / safe_filename
with zip_ref.open(zip_info.filename) as source:
file_content = source.read()
with open(file_path, "wb") as target:
target.write(file_content)
now = datetime.utcnow()
dokument = AbiturDokument(
id=doc_id,
dateiname=safe_filename,
original_dateiname=basename,
bundesland=bundesland,
fach=recognition.fach,
jahr=recognition.jahr or datetime.now().year,
niveau=recognition.niveau or Niveau.EA,
typ=recognition.typ or DokumentTyp.AUFGABE,
aufgaben_nummer=recognition.aufgaben_nummer,
status=VerarbeitungsStatus.RECOGNIZED,
confidence=recognition.confidence,
file_path=str(file_path),
file_size=len(file_content),
indexed=False,
vector_ids=[],
created_at=now,
updated_at=now
)
_dokumente[doc_id] = dokument
documents.append(_to_dokument_response(dokument))
recognized += 1
except Exception as e:
errors += 1
logger.error(f"Fehler bei {zip_info.filename}: {e}")
finally:
# Lösche temporäre ZIP
os.unlink(tmp_path)
logger.info(f"ZIP-Import: {recognized}/{total} erkannt, {errors} Fehler")
return ImportResult(
total_files=total,
recognized=recognized,
errors=errors,
documents=documents
)
@router.get("/", response_model=List[DokumentResponse])
async def list_dokumente(
bundesland: Optional[Bundesland] = None,
fach: Optional[Fach] = None,
jahr: Optional[int] = None,
niveau: Optional[Niveau] = None,
typ: Optional[DokumentTyp] = None,
status: Optional[VerarbeitungsStatus] = None,
indexed: Optional[bool] = None
):
"""Listet Dokumente mit optionalen Filtern."""
docs = list(_dokumente.values())
if bundesland:
docs = [d for d in docs if d.bundesland == bundesland]
if fach:
docs = [d for d in docs if d.fach == fach]
if jahr:
docs = [d for d in docs if d.jahr == jahr]
if niveau:
docs = [d for d in docs if d.niveau == niveau]
if typ:
docs = [d for d in docs if d.typ == typ]
if status:
docs = [d for d in docs if d.status == status]
if indexed is not None:
docs = [d for d in docs if d.indexed == indexed]
docs.sort(key=lambda x: (x.jahr, x.fach.value, x.niveau.value), reverse=True)
return [_to_dokument_response(d) for d in docs]
@router.get("/{doc_id}", response_model=DokumentResponse)
async def get_dokument(doc_id: str):
"""Ruft ein Dokument ab."""
doc = _dokumente.get(doc_id)
if not doc:
raise HTTPException(status_code=404, detail="Dokument nicht gefunden")
return _to_dokument_response(doc)
@router.put("/{doc_id}", response_model=DokumentResponse)
async def update_dokument(doc_id: str, data: DokumentUpdate):
"""Aktualisiert Dokument-Metadaten (nach KI-Erkennung durch Entwickler)."""
doc = _dokumente.get(doc_id)
if not doc:
raise HTTPException(status_code=404, detail="Dokument nicht gefunden")
if data.bundesland is not None:
doc.bundesland = data.bundesland
if data.fach is not None:
doc.fach = data.fach
if data.jahr is not None:
doc.jahr = data.jahr
if data.niveau is not None:
doc.niveau = data.niveau
if data.typ is not None:
doc.typ = data.typ
if data.aufgaben_nummer is not None:
doc.aufgaben_nummer = data.aufgaben_nummer
if data.status is not None:
doc.status = data.status
doc.updated_at = datetime.utcnow()
return _to_dokument_response(doc)
@router.post("/{doc_id}/confirm", response_model=DokumentResponse)
async def confirm_dokument(doc_id: str):
"""Bestätigt erkannte Metadaten."""
doc = _dokumente.get(doc_id)
if not doc:
raise HTTPException(status_code=404, detail="Dokument nicht gefunden")
doc.status = VerarbeitungsStatus.CONFIRMED
doc.updated_at = datetime.utcnow()
return _to_dokument_response(doc)
@router.post("/{doc_id}/index", response_model=DokumentResponse)
async def index_dokument(doc_id: str):
"""Indiziert Dokument im Vector Store."""
doc = _dokumente.get(doc_id)
if not doc:
raise HTTPException(status_code=404, detail="Dokument nicht gefunden")
if doc.status not in [VerarbeitungsStatus.CONFIRMED, VerarbeitungsStatus.RECOGNIZED]:
raise HTTPException(status_code=400, detail="Dokument muss erst bestätigt werden")
# TODO: Vector Store Integration
# 1. PDF lesen und Text extrahieren
# 2. In Chunks aufteilen
# 3. Embeddings generieren
# 4. Mit Metadaten im Vector Store speichern
# Demo: Simuliere Indexierung
doc.indexed = True
doc.vector_ids = [f"vec_{doc_id}_{i}" for i in range(3)] # Demo-IDs
doc.status = VerarbeitungsStatus.INDEXED
doc.updated_at = datetime.utcnow()
logger.info(f"Document {doc_id} indexed (demo)")
return _to_dokument_response(doc)
@router.delete("/{doc_id}")
async def delete_dokument(doc_id: str):
"""Löscht ein Dokument."""
doc = _dokumente.get(doc_id)
if not doc:
raise HTTPException(status_code=404, detail="Dokument nicht gefunden")
# Lösche Datei
if os.path.exists(doc.file_path):
os.remove(doc.file_path)
# TODO: Aus Vector Store entfernen
del _dokumente[doc_id]
return {"status": "deleted", "id": doc_id}
@router.get("/{doc_id}/download")
async def download_dokument(doc_id: str):
"""Lädt Dokument herunter."""
doc = _dokumente.get(doc_id)
if not doc:
raise HTTPException(status_code=404, detail="Dokument nicht gefunden")
if not os.path.exists(doc.file_path):
raise HTTPException(status_code=404, detail="Datei nicht gefunden")
return FileResponse(
doc.file_path,
filename=doc.original_dateiname,
media_type="application/pdf"
)
# ============================================================================
# API Endpoints - Erkennung
# ============================================================================
@router.post("/recognize", response_model=RecognitionResult)
async def recognize_filename(filename: str):
"""Erkennt Metadaten aus einem Dateinamen."""
return parse_nibis_filename(filename)
@router.post("/bulk-confirm")
async def bulk_confirm(doc_ids: List[str]):
"""Bestätigt mehrere Dokumente auf einmal."""
confirmed = 0
for doc_id in doc_ids:
doc = _dokumente.get(doc_id)
if doc and doc.status == VerarbeitungsStatus.RECOGNIZED:
doc.status = VerarbeitungsStatus.CONFIRMED
doc.updated_at = datetime.utcnow()
confirmed += 1
return {"confirmed": confirmed, "total": len(doc_ids)}
@router.post("/bulk-index")
async def bulk_index(doc_ids: List[str]):
"""Indiziert mehrere Dokumente auf einmal."""
indexed = 0
for doc_id in doc_ids:
doc = _dokumente.get(doc_id)
if doc and doc.status in [VerarbeitungsStatus.CONFIRMED, VerarbeitungsStatus.RECOGNIZED]:
# Demo-Indexierung
doc.indexed = True
doc.vector_ids = [f"vec_{doc_id}_{i}" for i in range(3)]
doc.status = VerarbeitungsStatus.INDEXED
doc.updated_at = datetime.utcnow()
indexed += 1
return {"indexed": indexed, "total": len(doc_ids)}
# ============================================================================
# API Endpoints - Statistiken
# ============================================================================
@router.get("/stats/overview")
async def get_stats_overview():
"""Gibt Übersicht über alle Dokumente."""
docs = list(_dokumente.values())
by_bundesland = {}
by_fach = {}
by_jahr = {}
by_status = {}
for doc in docs:
by_bundesland[doc.bundesland.value] = by_bundesland.get(doc.bundesland.value, 0) + 1
by_fach[doc.fach.value] = by_fach.get(doc.fach.value, 0) + 1
by_jahr[doc.jahr] = by_jahr.get(doc.jahr, 0) + 1
by_status[doc.status.value] = by_status.get(doc.status.value, 0) + 1
return {
"total": len(docs),
"indexed": sum(1 for d in docs if d.indexed),
"pending": sum(1 for d in docs if d.status == VerarbeitungsStatus.PENDING),
"by_bundesland": by_bundesland,
"by_fach": by_fach,
"by_jahr": by_jahr,
"by_status": by_status
}
# ============================================================================
# API Endpoints - Suche (für Klausur-Korrektur)
# ============================================================================
@router.get("/search", response_model=List[DokumentResponse])
async def search_dokumente(
bundesland: Bundesland,
fach: Fach,
jahr: Optional[int] = None,
niveau: Optional[Niveau] = None,
nur_indexed: bool = True
):
"""
Sucht Dokumente für Klausur-Korrektur.
Gibt nur indizierte Dokumente zurück (Standard).
"""
docs = list(_dokumente.values())
# Pflichtfilter
docs = [d for d in docs if d.bundesland == bundesland and d.fach == fach]
# Optionale Filter
if jahr:
docs = [d for d in docs if d.jahr == jahr]
if niveau:
docs = [d for d in docs if d.niveau == niveau]
if nur_indexed:
docs = [d for d in docs if d.indexed]
# Sortiere: Aufgaben vor Erwartungshorizonten
aufgaben = [d for d in docs if d.typ == DokumentTyp.AUFGABE]
ewh = [d for d in docs if d.typ == DokumentTyp.ERWARTUNGSHORIZONT]
andere = [d for d in docs if d.typ not in [DokumentTyp.AUFGABE, DokumentTyp.ERWARTUNGSHORIZONT]]
result = []
for aufgabe in aufgaben:
result.append(_to_dokument_response(aufgabe))
# Finde passenden EWH
matching_ewh = next(
(e for e in ewh
if e.jahr == aufgabe.jahr
and e.niveau == aufgabe.niveau
and e.aufgaben_nummer == aufgabe.aufgaben_nummer),
None
)
if matching_ewh:
result.append(_to_dokument_response(matching_ewh))
# Restliche EWH und andere
for e in ewh:
if _to_dokument_response(e) not in result:
result.append(_to_dokument_response(e))
for a in andere:
result.append(_to_dokument_response(a))
return result
# ============================================================================
# Enums Endpoint (für Frontend)
# ============================================================================
@router.get("/enums/bundeslaender")
async def get_bundeslaender():
"""Gibt alle Bundesländer zurück."""
return [{"value": b.value, "label": b.value.replace("_", " ").title()} for b in Bundesland]
@router.get("/enums/faecher")
async def get_faecher():
"""Gibt alle Fächer zurück."""
labels = {
Fach.DEUTSCH: "Deutsch",
Fach.ENGLISCH: "Englisch",
Fach.MATHEMATIK: "Mathematik",
Fach.BIOLOGIE: "Biologie",
Fach.CHEMIE: "Chemie",
Fach.PHYSIK: "Physik",
Fach.GESCHICHTE: "Geschichte",
Fach.ERDKUNDE: "Erdkunde",
Fach.POLITIK_WIRTSCHAFT: "Politik-Wirtschaft",
Fach.FRANZOESISCH: "Französisch",
Fach.SPANISCH: "Spanisch",
Fach.LATEIN: "Latein",
Fach.GRIECHISCH: "Griechisch",
Fach.KUNST: "Kunst",
Fach.MUSIK: "Musik",
Fach.SPORT: "Sport",
Fach.INFORMATIK: "Informatik",
Fach.EV_RELIGION: "Ev. Religion",
Fach.KATH_RELIGION: "Kath. Religion",
Fach.WERTE_NORMEN: "Werte und Normen",
Fach.BRC: "BRC (Betriebswirtschaft)",
Fach.BVW: "BVW (Volkswirtschaft)",
Fach.ERNAEHRUNG: "Ernährung",
Fach.MECHATRONIK: "Mechatronik",
Fach.GESUNDHEIT_PFLEGE: "Gesundheit-Pflege",
Fach.PAEDAGOGIK_PSYCHOLOGIE: "Pädagogik-Psychologie",
}
return [{"value": f.value, "label": labels.get(f, f.value)} for f in Fach]
@router.get("/enums/niveaus")
async def get_niveaus():
"""Gibt alle Niveaus zurück."""
return [
{"value": "eA", "label": "eA (erhöhtes Anforderungsniveau)"},
{"value": "gA", "label": "gA (grundlegendes Anforderungsniveau)"}
]
@router.get("/enums/typen")
async def get_typen():
"""Gibt alle Dokumenttypen zurück."""
labels = {
DokumentTyp.AUFGABE: "Aufgabe",
DokumentTyp.ERWARTUNGSHORIZONT: "Erwartungshorizont",
DokumentTyp.DECKBLATT: "Deckblatt",
DokumentTyp.MATERIAL: "Material",
DokumentTyp.HOERVERSTEHEN: "Hörverstehen",
DokumentTyp.SPRACHMITTLUNG: "Sprachmittlung",
DokumentTyp.BEWERTUNGSBOGEN: "Bewertungsbogen",
}
return [{"value": t.value, "label": labels.get(t, t.value)} for t in DokumentTyp]
# ============================================================================
# Backwards-compatibility aliases (used by tests)
# ============================================================================
AbiturFach = Fach
Anforderungsniveau = Niveau
documents_db = _dokumente
class DocumentMetadata(BaseModel):
"""Backwards-compatible metadata model for tests."""
jahr: Optional[int] = None
bundesland: Optional[str] = None
fach: Optional[str] = None
niveau: Optional[str] = None
dokument_typ: Optional[str] = None
aufgaben_nummer: Optional[str] = None
# Backwards-compatible AbiturDokument for tests (different from internal dataclass)
class AbiturDokumentCompat(BaseModel):
"""Backwards-compatible AbiturDokument model for tests."""
id: str
filename: str
file_path: str
metadata: DocumentMetadata
status: VerarbeitungsStatus
recognition_result: Optional[RecognitionResult] = None
created_at: datetime
updated_at: datetime
class Config:
arbitrary_types_allowed = True

View File

@@ -0,0 +1,126 @@
"""
AI Processing - Modul für KI-gestützte Arbeitsblatt-Verarbeitung.
Dieses Modul bietet:
- Bildanalyse mit Vision APIs (OpenAI/Claude)
- Handschrift-Entfernung aus Scans
- Generierung von Übungsmaterialien (MC, Lückentext, Q&A)
- Leitner-System für Spaced Repetition
- Druck-Versionen für alle Materialtypen
- Mindmap-Generierung für Lernposter
Verwendung:
from ai_processing import analyze_scan_structure_with_ai, generate_mc_from_analysis
# Oder alle Funktionen:
from ai_processing import *
"""
# Core utilities
from .core import (
BASE_DIR,
EINGANG_DIR,
BEREINIGT_DIR,
get_openai_api_key,
get_anthropic_api_key,
encode_image_to_data_url,
encode_image_to_base64,
ensure_directories,
dummy_process_scan,
get_vision_api,
)
# Analysis functions
from .analysis import (
describe_scan_with_ai,
analyze_scan_structure_with_ai,
)
# HTML generation
from .html_generator import (
build_clean_html_from_analysis,
)
# Image processing
from .image_processor import (
remove_handwriting_from_scan,
)
# Multiple Choice generator
from .mc_generator import (
generate_mc_from_analysis,
)
# Cloze/Lückentext generator
from .cloze_generator import (
generate_cloze_from_analysis,
)
# Q&A generator
from .qa_generator import (
generate_qa_from_analysis,
)
# Leitner system
from .leitner import (
update_leitner_progress,
get_next_review_items,
)
# Print version generators
from .print_generator import (
generate_print_version_qa,
generate_print_version_cloze,
generate_print_version_mc,
generate_print_version_worksheet,
)
# Mindmap generator
from .mindmap import (
generate_mindmap_data,
generate_mindmap_html,
save_mindmap_for_worksheet,
)
# Legacy aliases for backwards compatibility
_get_api_key = get_openai_api_key
_encode_image_to_data_url = encode_image_to_data_url
__all__ = [
# Core
"BASE_DIR",
"EINGANG_DIR",
"BEREINIGT_DIR",
"get_openai_api_key",
"get_anthropic_api_key",
"encode_image_to_data_url",
"encode_image_to_base64",
"ensure_directories",
"dummy_process_scan",
"get_vision_api",
# Analysis
"describe_scan_with_ai",
"analyze_scan_structure_with_ai",
# HTML
"build_clean_html_from_analysis",
# Image
"remove_handwriting_from_scan",
# MC
"generate_mc_from_analysis",
# Cloze
"generate_cloze_from_analysis",
# Q&A
"generate_qa_from_analysis",
# Leitner
"update_leitner_progress",
"get_next_review_items",
# Print
"generate_print_version_qa",
"generate_print_version_cloze",
"generate_print_version_mc",
"generate_print_version_worksheet",
# Mindmap
"generate_mindmap_data",
"generate_mindmap_html",
"save_mindmap_for_worksheet",
]

View File

@@ -0,0 +1,209 @@
"""
AI Processing - Worksheet Analysis.
Strukturierte Analyse von Arbeitsblättern mit OpenAI oder Claude.
"""
from pathlib import Path
import json
import requests
import logging
from .core import (
get_openai_api_key,
encode_image_to_data_url,
BEREINIGT_DIR,
get_vision_api,
)
logger = logging.getLogger(__name__)
def describe_scan_with_ai(input_path: Path) -> Path:
"""Vision-Modell gibt eine kurze Beschreibung des Arbeitsblatts zurück."""
if not input_path.exists():
raise FileNotFoundError(f"Eingabedatei nicht gefunden: {input_path}")
api_key = get_openai_api_key()
image_data_url = encode_image_to_data_url(input_path)
url = "https://api.openai.com/v1/chat/completions"
headers = {"Authorization": f"Bearer {api_key}", "Content-Type": "application/json"}
payload = {
"model": "gpt-4o-mini",
"messages": [
{
"role": "system",
"content": "Du bist ein hilfreicher Assistent, der Schul-Arbeitsblätter knapp beschreibt.",
},
{
"role": "user",
"content": [
{
"type": "text",
"text": (
"Beschreibe dieses Arbeitsblatt knapp: Thema, Art der Aufgaben "
"(z.B. Lückentext, Multiple Choice, Rechenaufgaben) und groben Inhalt."
),
},
{"type": "image_url", "image_url": {"url": image_data_url}},
],
},
],
"max_tokens": 400,
}
response = requests.post(url, headers=headers, json=payload)
response.raise_for_status()
data = response.json()
try:
description = data["choices"][0]["message"]["content"]
except Exception as e:
raise RuntimeError(f"Unerwartete Antwortstruktur von der KI: {e}\nAntwort: {data}") from e
out_name = input_path.stem + "_beschreibung.txt"
out_path = BEREINIGT_DIR / out_name
out_path.write_text(description, encoding="utf-8")
return out_path
def _get_analysis_system_prompt() -> str:
"""Gibt den System-Prompt für die Arbeitsblatt-Analyse zurück."""
return (
"Du bist ein Experte für die Analyse von Schul-Arbeitsblättern.\n\n"
"HAUPTAUFGABEN:\n"
"1. Erkenne ALLE gedruckten Elemente: Text, Überschriften, Tabellen, Linien, Kästchen, Diagramme, Illustrationen\n"
"2. Identifiziere ALLE handschriftlichen Ergänzungen: Antworten, Zahlen, Buchstaben, Notizen, Zeichnungen\n"
"3. Bestimme präzise Positionen (Bounding Boxes in Pixeln) für JEDES Element\n\n"
"KRITISCH - DIAGRAMME & ILLUSTRATIONEN:\n"
"- Suche aktiv nach: anatomischen Zeichnungen, beschrifteten Diagrammen, Grafiken, Tabellen, Skizzen\n"
"- Wenn du irgendeine bildliche Darstellung siehst (z.B. Auge, Pflanze, Karte, Schaubild), setze 'has_diagram: true'\n"
"- Für JEDES visuelle Element: Erstelle einen Eintrag in 'diagram_elements' mit genauer Position\n"
"- Beschrifte-Linien (von Beschriftung zu Bildteil) gehören zum Diagramm!\n\n"
"HANDSCHRIFT ERKENNUNG:\n"
"- Unterscheide gedruckt vs. handgeschrieben anhand der Schriftart\n"
"- Klassifiziere Farbe: blau/schwarz/rot/pencil (Bleistift)\n"
"- Durchgestrichene Wörter separat auflisten\n\n"
"AUSGABE: Gib deine Antwort AUSSCHLIESSLICH als gültiges JSON zurück (kein Markdown, keine Code-Blöcke)."
)
def _analyze_with_openai(input_path: Path) -> Path:
"""Strukturierte JSON-Analyse des Arbeitsblatts mit OpenAI."""
if not input_path.exists():
raise FileNotFoundError(f"Eingabedatei nicht gefunden: {input_path}")
api_key = get_openai_api_key()
image_data_url = encode_image_to_data_url(input_path)
url = "https://api.openai.com/v1/chat/completions"
headers = {"Authorization": f"Bearer {api_key}", "Content-Type": "application/json"}
system_prompt = _get_analysis_system_prompt()
user_text = '''Analysiere dieses Arbeitsblatt und gib ein JSON mit folgendem Aufbau zurück:
{
"title": string | null,
"subject": string | null,
"grade_level": string | null,
"instructions": string | null,
"canonical_text": string,
"printed_blocks": [...],
"handwritten_annotations": [...],
"struck_through_words": [...],
"tasks": [...],
"has_diagram": boolean,
"diagram_elements": [...]
}'''
payload = {
"model": "gpt-4o-mini",
"messages": [
{"role": "system", "content": system_prompt},
{
"role": "user",
"content": [
{"type": "text", "text": user_text},
{"type": "image_url", "image_url": {"url": image_data_url}},
],
},
],
"max_tokens": 2500,
}
response = requests.post(url, headers=headers, json=payload)
response.raise_for_status()
data = response.json()
try:
raw_content = data["choices"][0]["message"]["content"]
# JSON-Block extrahieren falls in Markdown eingebettet
if "```json" in raw_content:
raw_content = raw_content.split("```json")[1].split("```")[0].strip()
elif "```" in raw_content:
raw_content = raw_content.split("```")[1].split("```")[0].strip()
obj = json.loads(raw_content)
except json.JSONDecodeError as e:
raise RuntimeError(f"KI hat kein valides JSON zurückgegeben: {e}\nAntwort: {raw_content}") from e
except Exception as e:
raise RuntimeError(f"Unerwartete Antwortstruktur: {e}\nAntwort: {data}") from e
out_name = input_path.stem + "_analyse.json"
out_path = BEREINIGT_DIR / out_name
out_path.write_text(json.dumps(obj, ensure_ascii=False, indent=2), encoding="utf-8")
return out_path
def _analyze_with_claude(input_path: Path) -> Path:
"""Strukturierte JSON-Analyse mit Claude Vision API."""
from claude_vision import analyze_worksheet_with_claude
if not input_path.exists():
raise FileNotFoundError(f"Eingabedatei nicht gefunden: {input_path}")
logger.info(f"Analyzing with Claude Vision: {input_path.name}")
try:
analysis_data = analyze_worksheet_with_claude(input_path, max_tokens=2500)
out_name = input_path.stem + "_analyse.json"
out_path = BEREINIGT_DIR / out_name
out_path.write_text(
json.dumps(analysis_data, ensure_ascii=False, indent=2),
encoding="utf-8"
)
logger.info(f"Claude analysis saved: {out_path.name}")
return out_path
except Exception as e:
logger.error(f"Claude analysis failed: {e}")
raise
def analyze_scan_structure_with_ai(input_path: Path) -> Path:
"""
Strukturierte JSON-Analyse des Arbeitsblatts (Hybrid-Modus).
Verwendet die in VISION_API konfigurierte API:
- "claude" (Standard): Claude 3.5 Sonnet - bessere OCR, Layout-Erkennung
- "openai": OpenAI GPT-4o-mini - günstiger, schneller
"""
vision_api = get_vision_api()
logger.info(f"Using Vision API: {vision_api}")
if vision_api == "claude":
try:
return _analyze_with_claude(input_path)
except Exception as e:
logger.warning(f"Claude failed, falling back to OpenAI: {e}")
return _analyze_with_openai(input_path)
elif vision_api == "openai":
return _analyze_with_openai(input_path)
else:
logger.warning(f"Unknown VISION_API '{vision_api}', using Claude as default")
return _analyze_with_claude(input_path)

View File

@@ -0,0 +1,328 @@
"""
AI Processing - Cloze/Lückentext Generator.
Generiert Lückentexte mit Übersetzungen aus Arbeitsblatt-Analysen.
"""
from pathlib import Path
import json
import os
import requests
import logging
from .core import (
get_openai_api_key,
get_vision_api,
BEREINIGT_DIR,
)
logger = logging.getLogger(__name__)
# Sprachcodes zu Namen
LANGUAGE_NAMES = {
"tr": "Türkisch",
"ar": "Arabisch",
"ru": "Russisch",
"en": "Englisch",
"fr": "Französisch",
"es": "Spanisch",
"pl": "Polnisch",
"uk": "Ukrainisch",
}
def _generate_cloze_with_openai(analysis_data: dict, target_language: str = "tr") -> dict:
"""
Generiert Lückentexte basierend auf der Arbeitsblatt-Analyse.
Wichtige didaktische Anforderungen:
- Mehrere sinnvolle Lücken pro Satz (nicht nur eine!)
- Schwierigkeitsgrad entspricht dem Original
- Übersetzung mit denselben Lücken
Args:
analysis_data: Die Analyse-JSON des Arbeitsblatts
target_language: Zielsprache für Übersetzung (default: "tr" für Türkisch)
Returns:
Dict mit cloze_items und metadata
"""
api_key = get_openai_api_key()
# Extrahiere relevante Inhalte
title = analysis_data.get("title") or "Arbeitsblatt"
subject = analysis_data.get("subject") or "Allgemein"
grade_level = analysis_data.get("grade_level") or "unbekannt"
canonical_text = analysis_data.get("canonical_text") or ""
printed_blocks = analysis_data.get("printed_blocks") or []
# Baue Textinhalt zusammen
content_parts = []
if canonical_text:
content_parts.append(canonical_text)
for block in printed_blocks:
text = block.get("text", "").strip()
if text and text not in content_parts:
content_parts.append(text)
worksheet_content = "\n\n".join(content_parts)
if not worksheet_content.strip():
logger.warning("Kein Textinhalt für Lückentext-Generierung gefunden")
return {"cloze_items": [], "metadata": {"error": "Kein Textinhalt gefunden"}}
target_lang_name = LANGUAGE_NAMES.get(target_language, "Türkisch")
url = "https://api.openai.com/v1/chat/completions"
headers = {"Authorization": f"Bearer {api_key}", "Content-Type": "application/json"}
system_prompt = f"""Du bist ein erfahrener Pädagoge, der Lückentexte für Schüler erstellt.
WICHTIGE REGELN FÜR LÜCKENTEXTE:
1. MEHRERE LÜCKEN PRO SATZ:
- Erstelle IMMER mehrere sinnvolle Lücken pro Satz
- Beispiel: "Ich habe gestern meine Hausaufgaben gemacht."
→ Lücken: "habe" UND "gemacht" (nicht nur eine!)
- Wähle Wörter, die für das Verständnis wichtig sind
2. SCHWIERIGKEITSGRAD:
- Niveau muss exakt "{grade_level}" entsprechen
- Nicht zu leicht, nicht zu schwer
- Altersgerechte Lücken wählen
3. SINNVOLLE LÜCKENWÖRTER:
- Verben (konjugiert)
- Wichtige Nomen
- Adjektive
- KEINE Artikel oder Präpositionen allein
4. ÜBERSETZUNG:
- Übersetze den VOLLSTÄNDIGEN Satz auf {target_lang_name}
- Die GLEICHEN Wörter müssen als Lücken markiert sein
- Die Übersetzung dient als Hilfe für Eltern
5. AUSGABE: Nur gültiges JSON, kein Markdown."""
user_prompt = f"""Erstelle Lückentexte aus diesem Arbeitsblatt:
TITEL: {title}
FACH: {subject}
KLASSENSTUFE: {grade_level}
TEXT:
{worksheet_content}
Erstelle 5-8 Sätze mit Lücken. Gib das Ergebnis als JSON zurück:
{{
"cloze_items": [
{{
"id": "c1",
"original_sentence": "Der vollständige Originalsatz ohne Lücken",
"sentence_with_gaps": "Der Satz mit ___ für jede Lücke",
"gaps": [
{{
"id": "g1",
"word": "das fehlende Wort",
"position": 0,
"hint": "optionaler Hinweis"
}}
],
"translation": {{
"language": "{target_language}",
"language_name": "{target_lang_name}",
"full_sentence": "Vollständige Übersetzung",
"sentence_with_gaps": "Übersetzung mit ___ an gleichen Stellen"
}}
}}
],
"metadata": {{
"subject": "{subject}",
"grade_level": "{grade_level}",
"source_title": "{title}",
"target_language": "{target_language}",
"total_gaps": 0
}}
}}
WICHTIG:
- Jeder Satz MUSS mindestens 2 Lücken haben!
- Die Lücken in der Übersetzung müssen den deutschen Lücken entsprechen
- Position ist der Index des Wortes im Satz (0-basiert)"""
payload = {
"model": "gpt-4o-mini",
"response_format": {"type": "json_object"},
"messages": [
{"role": "system", "content": system_prompt},
{"role": "user", "content": user_prompt},
],
"max_tokens": 3000,
"temperature": 0.7,
}
response = requests.post(url, headers=headers, json=payload)
response.raise_for_status()
data = response.json()
try:
content = data["choices"][0]["message"]["content"]
cloze_data = json.loads(content)
except (KeyError, json.JSONDecodeError) as e:
raise RuntimeError(f"Fehler bei Lückentext-Generierung: {e}")
# Berechne Gesamtzahl der Lücken
total_gaps = sum(len(item.get("gaps", [])) for item in cloze_data.get("cloze_items", []))
if "metadata" in cloze_data:
cloze_data["metadata"]["total_gaps"] = total_gaps
return cloze_data
def _generate_cloze_with_claude(analysis_data: dict, target_language: str = "tr") -> dict:
"""
Generiert Lückentexte mit Claude API.
"""
import anthropic
api_key = os.getenv("ANTHROPIC_API_KEY")
if not api_key:
raise RuntimeError("ANTHROPIC_API_KEY ist nicht gesetzt.")
client = anthropic.Anthropic(api_key=api_key)
# Extrahiere relevante Inhalte
title = analysis_data.get("title") or "Arbeitsblatt"
subject = analysis_data.get("subject") or "Allgemein"
grade_level = analysis_data.get("grade_level") or "unbekannt"
canonical_text = analysis_data.get("canonical_text") or ""
printed_blocks = analysis_data.get("printed_blocks") or []
content_parts = []
if canonical_text:
content_parts.append(canonical_text)
for block in printed_blocks:
text = block.get("text", "").strip()
if text and text not in content_parts:
content_parts.append(text)
worksheet_content = "\n\n".join(content_parts)
if not worksheet_content.strip():
return {"cloze_items": [], "metadata": {"error": "Kein Textinhalt gefunden"}}
target_lang_name = LANGUAGE_NAMES.get(target_language, "Türkisch")
prompt = f"""Erstelle Lückentexte aus diesem Arbeitsblatt.
WICHTIGE REGELN:
1. MEHRERE LÜCKEN PRO SATZ (mindestens 2!)
Beispiel: "Ich habe gestern Hausaufgaben gemacht" → Lücken: "habe" UND "gemacht"
2. Schwierigkeitsgrad: exakt "{grade_level}"
3. Übersetzung auf {target_lang_name} mit gleichen Lücken
TITEL: {title}
FACH: {subject}
KLASSENSTUFE: {grade_level}
TEXT:
{worksheet_content}
Antworte NUR mit diesem JSON (5-8 Sätze):
{{
"cloze_items": [
{{
"id": "c1",
"original_sentence": "Vollständiger Satz",
"sentence_with_gaps": "Satz mit ___ für Lücken",
"gaps": [
{{"id": "g1", "word": "Lückenwort", "position": 0, "hint": "Hinweis"}}
],
"translation": {{
"language": "{target_language}",
"language_name": "{target_lang_name}",
"full_sentence": "Übersetzung",
"sentence_with_gaps": "Übersetzung mit ___"
}}
}}
],
"metadata": {{
"subject": "{subject}",
"grade_level": "{grade_level}",
"source_title": "{title}",
"target_language": "{target_language}",
"total_gaps": 0
}}
}}"""
message = client.messages.create(
model="claude-3-5-sonnet-20241022",
max_tokens=3000,
messages=[{"role": "user", "content": prompt}]
)
content = message.content[0].text
try:
if "```json" in content:
content = content.split("```json")[1].split("```")[0]
elif "```" in content:
content = content.split("```")[1].split("```")[0]
cloze_data = json.loads(content.strip())
except json.JSONDecodeError as e:
raise RuntimeError(f"Claude hat ungültiges JSON geliefert: {e}")
# Berechne Gesamtzahl der Lücken
total_gaps = sum(len(item.get("gaps", [])) for item in cloze_data.get("cloze_items", []))
if "metadata" in cloze_data:
cloze_data["metadata"]["total_gaps"] = total_gaps
return cloze_data
def generate_cloze_from_analysis(analysis_path: Path, target_language: str = "tr") -> Path:
"""
Generiert Lückentexte aus einer Analyse-JSON-Datei.
Die Lückentexte werden:
- Mit mehreren sinnvollen Lücken pro Satz erstellt
- Auf dem Schwierigkeitsniveau des Originals gehalten
- Mit Übersetzung in die Zielsprache versehen
Args:
analysis_path: Pfad zur *_analyse.json Datei
target_language: Sprachcode für Übersetzung (default: "tr" für Türkisch)
Returns:
Pfad zur generierten *_cloze.json Datei
"""
if not analysis_path.exists():
raise FileNotFoundError(f"Analysedatei nicht gefunden: {analysis_path}")
try:
analysis_data = json.loads(analysis_path.read_text(encoding="utf-8"))
except json.JSONDecodeError as e:
raise RuntimeError(f"Ungültige Analyse-JSON: {e}")
logger.info(f"Generiere Lückentexte für: {analysis_path.name}")
vision_api = get_vision_api()
# Generiere Lückentexte (nutze konfigurierte API)
if vision_api == "claude":
try:
cloze_data = _generate_cloze_with_claude(analysis_data, target_language)
except Exception as e:
logger.warning(f"Claude Lückentext-Generierung fehlgeschlagen, nutze OpenAI: {e}")
cloze_data = _generate_cloze_with_openai(analysis_data, target_language)
else:
cloze_data = _generate_cloze_with_openai(analysis_data, target_language)
# Speichere Lückentext-Daten
out_name = analysis_path.stem.replace("_analyse", "") + "_cloze.json"
out_path = BEREINIGT_DIR / out_name
out_path.write_text(json.dumps(cloze_data, ensure_ascii=False, indent=2), encoding="utf-8")
logger.info(f"Lückentexte gespeichert: {out_path.name}")
return out_path

View File

@@ -0,0 +1,71 @@
"""
AI Processing - Core Utilities.
Basis-Funktionen für API-Zugriff, Encoding und Verzeichnisse.
"""
from pathlib import Path
import shutil
import os
import base64
import logging
logger = logging.getLogger(__name__)
# Verzeichnisse
BASE_DIR = Path.home() / "Arbeitsblaetter"
EINGANG_DIR = BASE_DIR / "Eingang"
BEREINIGT_DIR = BASE_DIR / "Bereinigt"
# Vision API Konfiguration
VISION_API = os.getenv("VISION_API", "claude").lower()
def get_openai_api_key() -> str:
"""Holt den OpenAI API Key aus der Umgebungsvariable."""
api_key = os.getenv("OPENAI_API_KEY")
if not api_key:
raise RuntimeError("OPENAI_API_KEY ist nicht gesetzt. Bitte API-Schlüssel als Umgebungsvariable setzen.")
return api_key
def get_anthropic_api_key() -> str:
"""Holt den Anthropic API Key aus der Umgebungsvariable."""
api_key = os.getenv("ANTHROPIC_API_KEY")
if not api_key:
raise RuntimeError("ANTHROPIC_API_KEY ist nicht gesetzt.")
return api_key
def encode_image_to_data_url(input_path: Path) -> str:
"""Kodiert ein Bild als Data-URL für Vision APIs."""
image_bytes = input_path.read_bytes()
image_b64 = base64.b64encode(image_bytes).decode("utf-8")
return f"data:image/jpeg;base64,{image_b64}"
def encode_image_to_base64(input_path: Path) -> str:
"""Kodiert ein Bild als Base64-String."""
image_bytes = input_path.read_bytes()
return base64.b64encode(image_bytes).decode("utf-8")
def ensure_directories():
"""Stellt sicher, dass alle benötigten Verzeichnisse existieren."""
EINGANG_DIR.mkdir(parents=True, exist_ok=True)
BEREINIGT_DIR.mkdir(parents=True, exist_ok=True)
def dummy_process_scan(input_path: Path) -> Path:
"""Einfache Kopie in den Ordner Bereinigt bleibt als Fallback erhalten."""
if not input_path.exists():
raise FileNotFoundError(f"Eingabedatei nicht gefunden: {input_path}")
new_name = input_path.stem + "_bereinigt" + input_path.suffix
target = BEREINIGT_DIR / new_name
shutil.copy2(input_path, target)
return target
def get_vision_api() -> str:
"""Gibt die konfigurierte Vision API zurück."""
return VISION_API

View File

@@ -0,0 +1,211 @@
"""
AI Processing - HTML Generator.
Baut saubere HTML-Arbeitsblätter aus Analyse-JSON.
"""
from pathlib import Path
import json
import logging
from .core import BEREINIGT_DIR
logger = logging.getLogger(__name__)
def build_clean_html_from_analysis(analysis_path: Path) -> Path:
"""
Nimmt eine *_analyse.json-Datei und baut daraus ein sauberes HTML-Arbeitsblatt.
NEU:
- Fokus auf gedruckten Text (canonical_text / printed_blocks)
- Handschriftliche Eintragungen und durchgestrichene Wörter werden NICHT in den
Haupttext übernommen
- Verwendung eines Open-Source-Font-Stacks (z.B. Inter / Noto Sans)
"""
if not analysis_path.exists():
raise FileNotFoundError(f"Analysedatei nicht gefunden: {analysis_path}")
try:
data = json.loads(analysis_path.read_text(encoding="utf-8"))
except json.JSONDecodeError as e:
raise RuntimeError(f"Analyse-Datei enthält kein gültiges JSON: {analysis_path}\n{e}") from e
title = data.get("title") or "Arbeitsblatt"
subject = data.get("subject") or ""
grade_level = data.get("grade_level") or ""
instructions = data.get("instructions") or ""
tasks = data.get("tasks", []) or []
canonical_text = data.get("canonical_text") or ""
printed_blocks = data.get("printed_blocks") or []
struck = data.get("struck_through_words") or []
html_parts = []
html_parts.append("<!DOCTYPE html>")
html_parts.append("<html lang='de'>")
html_parts.append("<head>")
html_parts.append("<meta charset='UTF-8'>")
html_parts.append(f"<title>{title}</title>")
html_parts.append(
"""
<style>
:root {
--font-main: "Inter", "Noto Sans", system-ui, -apple-system, BlinkMacSystemFont, sans-serif;
}
* { box-sizing: border-box; }
body {
font-family: var(--font-main);
margin: 32px;
line-height: 1.5;
font-size: 14px;
color: #111827;
}
.page {
max-width: 800px;
margin: 0 auto;
}
h1 {
font-size: 24px;
margin-bottom: 4px;
}
h2 {
font-size: 18px;
margin-top: 24px;
}
.meta {
font-size: 12px;
color: #6b7280;
margin-bottom: 16px;
}
.instructions {
margin-bottom: 20px;
padding: 8px 10px;
border-radius: 8px;
background: #eff6ff;
border: 1px solid #bfdbfe;
font-size: 13px;
}
.text-blocks {
margin-bottom: 24px;
}
.text-block {
margin-bottom: 8px;
}
.text-block-title {
font-weight: 600;
margin-bottom: 4px;
}
.task-list {
margin-top: 8px;
}
.task {
margin-bottom: 14px;
padding-bottom: 8px;
border-bottom: 1px dashed #e5e7eb;
}
.task-title {
font-weight: 600;
margin-bottom: 4px;
}
.gap-line {
display: inline-block;
border-bottom: 1px solid #000;
min-width: 80px;
margin: 0 4px;
}
.footnote {
margin-top: 24px;
font-size: 11px;
color: #9ca3af;
}
</style>
"""
)
html_parts.append("</head>")
html_parts.append("<body>")
html_parts.append("<div class='page'>")
# Kopfbereich
html_parts.append(f"<h1>{title}</h1>")
meta_bits = []
if subject:
meta_bits.append(f"Fach: {subject}")
if grade_level:
meta_bits.append(f"Klassenstufe: {grade_level}")
if meta_bits:
html_parts.append(f"<div class='meta'>{' | '.join(meta_bits)}</div>")
if instructions:
html_parts.append(
f"<div class='instructions'><strong>Arbeitsanweisung:</strong> {instructions}</div>"
)
# Haupttext / gedruckte Blöcke
html_parts.append("<section class='text-blocks'>")
if printed_blocks:
for block in printed_blocks:
role = (block.get("role") or "body").lower()
text = (block.get("text") or "").strip()
if not text:
continue
html_parts.append("<div class='text-block'>")
if role == "title":
html_parts.append(f"<div class='text-block-title'>{text}</div>")
else:
html_parts.append(f"<div>{text}</div>")
html_parts.append("</div>")
elif canonical_text:
# Fallback: canonical_text in Absätze aufteilen
paragraphs = [
p.strip()
for p in canonical_text.replace("\r\n", "\n").split("\n\n")
if p.strip()
]
for p in paragraphs:
html_parts.append(f"<div class='text-block'>{p}</div>")
html_parts.append("</section>")
# Aufgabenbereich
if tasks:
html_parts.append("<h2>Aufgaben</h2>")
html_parts.append("<div class='task-list'>")
for idx, task in enumerate(tasks, start=1):
t_type = task.get("type") or "other"
desc = task.get("description") or ""
text_with_gaps = task.get("text_with_gaps")
html_parts.append("<div class='task'>")
html_parts.append(
f"<div class='task-title'>Aufgabe {idx} ({t_type}): {desc}</div>"
)
if text_with_gaps:
# Lücken „___" werden in Linien umgewandelt
rendered = text_with_gaps.replace("___", "<span class='gap-line'>&nbsp;</span>")
html_parts.append(f"<div>{rendered}</div>")
html_parts.append("</div>")
html_parts.append("</div>") # .task-list
# kleine Fußnote mit Hinweis
if struck:
html_parts.append(
"<div class='footnote'>Hinweis: Einige im Original durchgestrichene Wörter wurden "
"von der KI erkannt und NICHT in dieses saubere Arbeitsblatt übernommen.</div>"
)
else:
html_parts.append(
"<div class='footnote'>Dieses Arbeitsblatt wurde automatisch aus einem Scan rekonstruiert "
"und von handschriftlichen Eintragungen bereinigt.</div>"
)
html_parts.append("</div>") # .page
html_parts.append("</body></html>")
html_content = "\n".join(html_parts)
out_name = analysis_path.stem.replace("_analyse", "") + "_clean.html"
out_path = BEREINIGT_DIR / out_name
out_path.write_text(html_content, encoding="utf-8")
return out_path

View File

@@ -0,0 +1,78 @@
"""
AI Processing - Image Processor.
Entfernt Handschrift aus Arbeitsblatt-Scans.
"""
from pathlib import Path
import shutil
import json
import logging
from .core import BEREINIGT_DIR
from .analysis import analyze_scan_structure_with_ai
logger = logging.getLogger(__name__)
def remove_handwriting_from_scan(input_path: Path) -> Path:
"""
Remove handwriting from worksheet scan using AI-guided image processing (Stage 2).
Process:
1. Load corresponding analysis JSON (from Stage 1)
2. Apply multi-strategy cleaning using WorksheetCleaner:
- Color-based filtering (blue ink)
- AI-guided region masking
- Stroke thickness analysis
3. Preserve diagrams and printed content
4. Save cleaned image
Returns:
Path to cleaned image (*_clean.jpg)
Raises:
FileNotFoundError: If input file not found
RuntimeError: If cleaning fails (falls back to copy)
"""
if not input_path.exists():
raise FileNotFoundError(f"Eingabedatei nicht gefunden: {input_path}")
# Import WorksheetCleaner
from image_cleaner import WorksheetCleaner
# Load analysis JSON (from Stage 1)
analysis_name = input_path.stem + "_analyse.json"
analysis_path = BEREINIGT_DIR / analysis_name
# If analysis doesn't exist, run it first
if not analysis_path.exists():
logger.info(f"Analysis not found for {input_path.name}, running analysis first")
analysis_path = analyze_scan_structure_with_ai(input_path)
# Load analysis data
try:
analysis_data = json.loads(analysis_path.read_text(encoding='utf-8'))
except json.JSONDecodeError as e:
logger.error(f"Invalid analysis JSON: {analysis_path}\n{e}")
# Fallback: create minimal analysis structure
analysis_data = {
"layout": {"text_regions": [], "diagram_elements": []},
"handwriting_regions": []
}
# Prepare output path
output_name = input_path.stem + "_clean" + input_path.suffix
output_path = BEREINIGT_DIR / output_name
# Clean the image using WorksheetCleaner
cleaner = WorksheetCleaner(debug_mode=False)
try:
cleaned_path = cleaner.clean_worksheet(input_path, analysis_data, output_path)
logger.info(f"Successfully cleaned {input_path.name}")
return cleaned_path
except Exception as e:
# Fallback: if cleaning fails, copy original
logger.error(f"Cleaning failed for {input_path.name}, using original: {e}")
shutil.copy2(input_path, output_path)
return output_path

View File

@@ -0,0 +1,155 @@
"""
AI Processing - Leitner System.
Spaced Repetition System für Q&A-Paare.
"""
from pathlib import Path
from datetime import datetime, timedelta
import json
import logging
logger = logging.getLogger(__name__)
def update_leitner_progress(qa_path: Path, item_id: str, correct: bool) -> dict:
"""
Aktualisiert den Lernfortschritt eines Q&A-Items nach dem Leitner-System.
Leitner-Boxen:
- Box 0: Neu (noch nicht gelernt)
- Box 1: Gelernt (bei Fehler → zurück zu Box 0)
- Box 2: Gefestigt (bei Fehler → zurück zu Box 1)
Bei korrekter Antwort: Box erhöhen (max 2)
Bei falscher Antwort: Box verringern (min 0)
Args:
qa_path: Pfad zur *_qa.json Datei
item_id: ID des Q&A-Items
correct: True wenn korrekt beantwortet
Returns:
Dict mit aktualisiertem Item und Status
"""
if not qa_path.exists():
raise FileNotFoundError(f"Q&A-Datei nicht gefunden: {qa_path}")
qa_data = json.loads(qa_path.read_text(encoding="utf-8"))
# Finde das Item
item = None
for qa_item in qa_data.get("qa_items", []):
if qa_item.get("id") == item_id:
item = qa_item
break
if not item:
return {"status": "NOT_FOUND", "message": f"Item {item_id} nicht gefunden"}
# Aktualisiere Statistiken
now = datetime.now().isoformat()
item["last_seen"] = now
if correct:
item["correct_count"] = item.get("correct_count", 0) + 1
# Box erhöhen (max 2)
current_box = item.get("leitner_box", 0)
if current_box < 2:
item["leitner_box"] = current_box + 1
# Nächste Wiederholung basierend auf Box
# Box 0→1: Nach 1 Tag, Box 1→2: Nach 3 Tagen, Box 2: Nach 7 Tagen
days = [1, 3, 7][item["leitner_box"]]
item["next_review"] = (datetime.now() + timedelta(days=days)).isoformat()
else:
item["incorrect_count"] = item.get("incorrect_count", 0) + 1
# Box verringern (min 0)
current_box = item.get("leitner_box", 0)
if current_box > 0:
item["leitner_box"] = current_box - 1
# Bei Fehler: Bald wiederholen
item["next_review"] = (datetime.now() + timedelta(hours=4)).isoformat()
# Speichere aktualisierte Daten
qa_path.write_text(json.dumps(qa_data, ensure_ascii=False, indent=2), encoding="utf-8")
box_names = ["Neu", "Gelernt", "Gefestigt"]
return {
"status": "OK",
"item_id": item_id,
"correct": correct,
"new_box": item["leitner_box"],
"box_name": box_names[item["leitner_box"]],
"correct_count": item["correct_count"],
"incorrect_count": item["incorrect_count"],
"next_review": item["next_review"]
}
def get_next_review_items(qa_path: Path, limit: int = 5) -> list:
"""
Gibt die nächsten zu wiederholenden Items zurück.
Priorisierung:
1. Falsch beantwortete Items (Box 0) - häufiger
2. Gelernte Items (Box 1) deren Review fällig ist
3. Gefestigte Items (Box 2) zur gelegentlichen Auffrischung
Args:
qa_path: Pfad zur *_qa.json Datei
limit: Maximale Anzahl Items
Returns:
Liste der zu wiederholenden Items (sortiert nach Priorität)
"""
if not qa_path.exists():
return []
qa_data = json.loads(qa_path.read_text(encoding="utf-8"))
items = qa_data.get("qa_items", [])
now = datetime.now()
review_items = []
for item in items:
box = item.get("leitner_box", 0)
next_review = item.get("next_review")
incorrect = item.get("incorrect_count", 0)
# Priorität berechnen (niedriger = wichtiger)
priority = box * 10 # Box 0 hat höchste Priorität
# Bonus für häufig falsch beantwortete
priority -= incorrect * 2
# Prüfe ob Review fällig
is_due = True
if next_review:
try:
review_time = datetime.fromisoformat(next_review)
is_due = now >= review_time
# Überfällige Items bekommen höhere Priorität
if is_due:
overdue_hours = (now - review_time).total_seconds() / 3600
priority -= overdue_hours
except (ValueError, TypeError):
is_due = True
# Neue Items (Box 0) immer einschließen
if box == 0 or is_due:
review_items.append({
**item,
"_priority": priority,
"_is_due": is_due
})
# Sortiere nach Priorität (niedrigste zuerst)
review_items.sort(key=lambda x: x["_priority"])
# Entferne interne Felder und limitiere
result = []
for item in review_items[:limit]:
clean_item = {k: v for k, v in item.items() if not k.startswith("_")}
result.append(clean_item)
return result

View File

@@ -0,0 +1,316 @@
"""
AI Processing - Multiple Choice Generator.
Generiert Multiple-Choice-Fragen aus Arbeitsblatt-Analysen.
"""
from pathlib import Path
import json
import random
import os
import requests
import logging
from .core import (
get_openai_api_key,
get_vision_api,
BEREINIGT_DIR,
)
logger = logging.getLogger(__name__)
def _generate_mc_with_openai(analysis_data: dict, num_questions: int = 5) -> dict:
"""
Generiert Multiple-Choice-Fragen basierend auf der Arbeitsblatt-Analyse.
Verwendet OpenAI GPT-4o-mini für die Generierung.
Schwierigkeitsgrad entspricht dem Original (grade_level aus Analyse).
"""
api_key = get_openai_api_key()
# Extrahiere relevante Inhalte aus der Analyse
title = analysis_data.get("title") or "Arbeitsblatt"
subject = analysis_data.get("subject") or "Allgemein"
grade_level = analysis_data.get("grade_level") or "unbekannt"
canonical_text = analysis_data.get("canonical_text") or ""
printed_blocks = analysis_data.get("printed_blocks") or []
# Baue den Textinhalt zusammen
content_parts = []
if canonical_text:
content_parts.append(canonical_text)
for block in printed_blocks:
text = block.get("text", "").strip()
if text and text not in content_parts:
content_parts.append(text)
worksheet_content = "\n\n".join(content_parts)
if not worksheet_content.strip():
logger.warning("Kein Textinhalt für MC-Generierung gefunden")
return {"questions": [], "metadata": {"error": "Kein Textinhalt gefunden"}}
url = "https://api.openai.com/v1/chat/completions"
headers = {"Authorization": f"Bearer {api_key}", "Content-Type": "application/json"}
system_prompt = f"""Du bist ein erfahrener Pädagoge, der Multiple-Choice-Fragen für Schüler erstellt.
WICHTIGE REGELN:
1. SCHWIERIGKEITSGRAD: Die Fragen müssen exakt dem Niveau "{grade_level}" entsprechen.
- Nicht zu leicht, nicht zu schwer
- Passend für das angegebene Klassenniveau
2. INHALTSTREUE: Alle Fragen müssen sich direkt auf den gegebenen Text beziehen.
- Keine Fragen zu Themen, die nicht im Text vorkommen
- Die richtige Antwort muss aus dem Text ableitbar sein
3. QUALITÄT DER DISTRAKTOREN (falsche Antworten):
- Müssen plausibel klingen
- Dürfen nicht offensichtlich falsch sein
- Sollten typische Schüler-Missverständnisse widerspiegeln
4. AUSGABEFORMAT: Gib deine Antwort AUSSCHLIESSLICH als gültiges JSON zurück."""
user_prompt = f"""Erstelle {num_questions} Multiple-Choice-Fragen basierend auf diesem Arbeitsblatt:
TITEL: {title}
FACH: {subject}
KLASSENSTUFE: {grade_level}
INHALT DES ARBEITSBLATTS:
{worksheet_content}
Gib die Fragen als JSON zurück:
{{
"questions": [
{{
"id": "q1",
"question": "Die Fragestellung hier",
"options": [
{{"id": "a", "text": "Antwort A"}},
{{"id": "b", "text": "Antwort B"}},
{{"id": "c", "text": "Antwort C"}},
{{"id": "d", "text": "Antwort D"}}
],
"correct_answer": "a",
"explanation": "Kurze Erklärung warum diese Antwort richtig ist"
}}
],
"metadata": {{
"subject": "{subject}",
"grade_level": "{grade_level}",
"source_title": "{title}",
"num_questions": {num_questions}
}}
}}"""
payload = {
"model": "gpt-4o-mini",
"response_format": {"type": "json_object"},
"messages": [
{"role": "system", "content": system_prompt},
{"role": "user", "content": user_prompt},
],
"max_tokens": 2000,
"temperature": 0.7,
}
response = requests.post(url, headers=headers, json=payload)
response.raise_for_status()
data = response.json()
try:
content = data["choices"][0]["message"]["content"]
mc_data = json.loads(content)
except (KeyError, json.JSONDecodeError) as e:
raise RuntimeError(f"Fehler bei MC-Generierung: {e}")
return mc_data
def _generate_mc_with_claude(analysis_data: dict, num_questions: int = 5) -> dict:
"""
Generiert Multiple-Choice-Fragen mit Claude API.
"""
import anthropic
api_key = os.getenv("ANTHROPIC_API_KEY")
if not api_key:
raise RuntimeError("ANTHROPIC_API_KEY ist nicht gesetzt.")
client = anthropic.Anthropic(api_key=api_key)
# Extrahiere relevante Inhalte
title = analysis_data.get("title") or "Arbeitsblatt"
subject = analysis_data.get("subject") or "Allgemein"
grade_level = analysis_data.get("grade_level") or "unbekannt"
canonical_text = analysis_data.get("canonical_text") or ""
printed_blocks = analysis_data.get("printed_blocks") or []
content_parts = []
if canonical_text:
content_parts.append(canonical_text)
for block in printed_blocks:
text = block.get("text", "").strip()
if text and text not in content_parts:
content_parts.append(text)
worksheet_content = "\n\n".join(content_parts)
if not worksheet_content.strip():
return {"questions": [], "metadata": {"error": "Kein Textinhalt gefunden"}}
prompt = f"""Erstelle {num_questions} Multiple-Choice-Fragen basierend auf diesem Arbeitsblatt.
WICHTIGE REGELN:
1. SCHWIERIGKEITSGRAD: Exakt Niveau "{grade_level}" - nicht leichter, nicht schwerer
2. INHALTSTREUE: Nur Fragen zum gegebenen Text
3. QUALITÄT: Plausible Distraktoren (falsche Antworten)
TITEL: {title}
FACH: {subject}
KLASSENSTUFE: {grade_level}
INHALT:
{worksheet_content}
Antworte NUR mit diesem JSON-Format:
{{
"questions": [
{{
"id": "q1",
"question": "Fragestellung",
"options": [
{{"id": "a", "text": "Antwort A"}},
{{"id": "b", "text": "Antwort B"}},
{{"id": "c", "text": "Antwort C"}},
{{"id": "d", "text": "Antwort D"}}
],
"correct_answer": "a",
"explanation": "Erklärung"
}}
],
"metadata": {{
"subject": "{subject}",
"grade_level": "{grade_level}",
"source_title": "{title}",
"num_questions": {num_questions}
}}
}}"""
message = client.messages.create(
model="claude-3-5-sonnet-20241022",
max_tokens=2000,
messages=[{"role": "user", "content": prompt}]
)
content = message.content[0].text
# Versuche JSON zu extrahieren
try:
# Falls in Code-Block eingebettet
if "```json" in content:
content = content.split("```json")[1].split("```")[0]
elif "```" in content:
content = content.split("```")[1].split("```")[0]
mc_data = json.loads(content.strip())
except json.JSONDecodeError as e:
raise RuntimeError(f"Claude hat ungültiges JSON geliefert: {e}")
return mc_data
def _shuffle_mc_options(mc_data: dict) -> dict:
"""
Mischt die Antwort-Optionen jeder Frage zufällig durch.
Aktualisiert auch correct_answer entsprechend.
Dies stellt sicher, dass die richtige Antwort nicht immer an der gleichen Position steht.
"""
if "questions" not in mc_data:
return mc_data
for question in mc_data["questions"]:
options = question.get("options", [])
correct_id = question.get("correct_answer")
if not options or not correct_id:
continue
# Finde den Text der richtigen Antwort
correct_text = None
for opt in options:
if opt.get("id") == correct_id:
correct_text = opt.get("text")
break
# Mische die Optionen
random.shuffle(options)
# Vergebe neue IDs (a, b, c, d) und finde neue Position der richtigen Antwort
new_ids = ["a", "b", "c", "d"]
new_correct = None
for i, opt in enumerate(options):
if i < len(new_ids):
if opt.get("text") == correct_text:
new_correct = new_ids[i]
opt["id"] = new_ids[i]
if new_correct:
question["correct_answer"] = new_correct
question["options"] = options
return mc_data
def generate_mc_from_analysis(analysis_path: Path, num_questions: int = 5) -> Path:
"""
Generiert Multiple-Choice-Fragen aus einer Analyse-JSON-Datei.
Die Fragen werden:
- Basierend auf dem extrahierten Text erstellt
- Auf dem Schwierigkeitsniveau des Originals gehalten
- Mit zufällig angeordneten Antworten versehen
Args:
analysis_path: Pfad zur *_analyse.json Datei
num_questions: Anzahl der zu generierenden Fragen (Standard: 5)
Returns:
Pfad zur generierten *_mc.json Datei
"""
if not analysis_path.exists():
raise FileNotFoundError(f"Analysedatei nicht gefunden: {analysis_path}")
try:
analysis_data = json.loads(analysis_path.read_text(encoding="utf-8"))
except json.JSONDecodeError as e:
raise RuntimeError(f"Ungültige Analyse-JSON: {e}")
logger.info(f"Generiere MC-Fragen für: {analysis_path.name}")
vision_api = get_vision_api()
# Generiere MC-Fragen (nutze konfigurierte API)
if vision_api == "claude":
try:
mc_data = _generate_mc_with_claude(analysis_data, num_questions)
except Exception as e:
logger.warning(f"Claude MC-Generierung fehlgeschlagen, nutze OpenAI: {e}")
mc_data = _generate_mc_with_openai(analysis_data, num_questions)
else:
mc_data = _generate_mc_with_openai(analysis_data, num_questions)
# Mische die Antwort-Positionen durch
mc_data = _shuffle_mc_options(mc_data)
# Speichere MC-Daten
out_name = analysis_path.stem.replace("_analyse", "") + "_mc.json"
out_path = BEREINIGT_DIR / out_name
out_path.write_text(json.dumps(mc_data, ensure_ascii=False, indent=2), encoding="utf-8")
logger.info(f"MC-Fragen gespeichert: {out_path.name}")
return out_path

View File

@@ -0,0 +1,472 @@
"""
AI Processing - Mindmap Generator.
Generiert kindgerechte Lernposter-Mindmaps aus Arbeitsblatt-Analysen.
"""
from pathlib import Path
import math
import json
import os
import requests
import logging
from .core import get_openai_api_key, BEREINIGT_DIR
logger = logging.getLogger(__name__)
def generate_mindmap_data(analysis_path: Path) -> dict:
"""
Extrahiert Fachbegriffe aus der Analyse und gruppiert sie für eine Mindmap.
Args:
analysis_path: Pfad zur *_analyse.json Datei
Returns:
Dictionary mit Mindmap-Struktur:
{
"topic": "Hauptthema",
"subject": "Fach",
"categories": [
{
"name": "Kategorie",
"color": "#hexcolor",
"emoji": "🔬",
"terms": [
{"term": "Begriff", "explanation": "Kurze Erklärung"}
]
}
]
}
"""
if not analysis_path.exists():
raise FileNotFoundError(f"Analysedatei nicht gefunden: {analysis_path}")
try:
data = json.loads(analysis_path.read_text(encoding="utf-8"))
except json.JSONDecodeError as e:
raise RuntimeError(f"Analyse-Datei enthält kein gültiges JSON: {analysis_path}\n{e}") from e
title = data.get("title") or "Arbeitsblatt"
subject = data.get("subject") or ""
canonical_text = data.get("canonical_text") or ""
tasks = data.get("tasks", []) or []
# Sammle allen Text für die Analyse
all_text = canonical_text
for task in tasks:
if task.get("description"):
all_text += "\n" + task.get("description")
if task.get("text_with_gaps"):
all_text += "\n" + task.get("text_with_gaps")
if not all_text.strip():
return {
"topic": title,
"subject": subject,
"categories": []
}
# KI-basierte Extraktion der Fachbegriffe
api_key = get_openai_api_key()
prompt = f"""Analysiere diesen Schultext und extrahiere alle Fachbegriffe für eine kindgerechte Lern-Mindmap.
THEMA: {title}
FACH: {subject}
TEXT:
{all_text[:3000]}
AUFGABE:
1. Identifiziere das Hauptthema (ein einzelnes Wort oder kurzer Begriff)
2. Finde ALLE Fachbegriffe und gruppiere sie in 3-6 sinnvolle Kategorien
3. Gib für jeden Begriff eine kurze, kindgerechte Erklärung (max 10 Wörter)
4. Wähle für jede Kategorie ein passendes Emoji und eine Farbe
Antworte NUR mit diesem JSON-Format:
{{
"topic": "Hauptthema (z.B. 'Das Auge')",
"categories": [
{{
"name": "Kategoriename",
"emoji": "passendes Emoji",
"color": "#Hexfarbe (bunt, kindgerecht)",
"terms": [
{{"term": "Fachbegriff", "explanation": "Kurze Erklärung"}}
]
}}
]
}}
WICHTIG:
- Verwende kindgerechte, einfache Sprache
- Bunte, fröhliche Farben: #FF6B6B, #4ECDC4, #45B7D1, #96CEB4, #FFEAA7, #DDA0DD, #98D8C8
- Passende Emojis für jede Kategorie
- Mindestens 3 Begriffe pro Kategorie wenn möglich
- Maximal 6 Kategorien"""
try:
# Versuche Claude
claude_key = os.environ.get("ANTHROPIC_API_KEY")
if claude_key:
import anthropic
client = anthropic.Anthropic(api_key=claude_key)
response = client.messages.create(
model="claude-3-5-sonnet-20241022",
max_tokens=2000,
messages=[{"role": "user", "content": prompt}]
)
result_text = response.content[0].text
else:
# Fallback zu OpenAI
logger.info("Claude Mindmap-Generierung fehlgeschlagen, nutze OpenAI: ANTHROPIC_API_KEY ist nicht gesetzt.")
url = "https://api.openai.com/v1/chat/completions"
headers = {"Authorization": f"Bearer {api_key}", "Content-Type": "application/json"}
payload = {
"model": "gpt-4o-mini",
"messages": [
{"role": "system", "content": "Du bist ein Experte für kindgerechte Lernmaterialien."},
{"role": "user", "content": prompt}
],
"max_tokens": 2000,
"temperature": 0.7
}
resp = requests.post(url, headers=headers, json=payload, timeout=60)
resp.raise_for_status()
result_text = resp.json()["choices"][0]["message"]["content"]
# JSON extrahieren
result_text = result_text.strip()
if result_text.startswith("```"):
result_text = result_text.split("```")[1]
if result_text.startswith("json"):
result_text = result_text[4:]
result_text = result_text.strip()
mindmap_data = json.loads(result_text)
mindmap_data["subject"] = subject
return mindmap_data
except Exception as e:
logger.error(f"Mindmap-Generierung fehlgeschlagen: {e}")
# Fallback: Einfache Struktur zurückgeben
return {
"topic": title,
"subject": subject,
"categories": []
}
def generate_mindmap_html(mindmap_data: dict, format: str = "a3") -> str:
"""
Generiert ein kindgerechtes HTML/SVG Mindmap-Poster.
Args:
mindmap_data: Dictionary aus generate_mindmap_data()
format: "a3" für A3-Poster (Standard) oder "a4" für A4-Ansicht
Returns:
HTML-String mit SVG-Mindmap
"""
topic = mindmap_data.get("topic", "Thema")
subject = mindmap_data.get("subject", "")
categories = mindmap_data.get("categories", [])
# Format-spezifische Einstellungen
if format.lower() == "a4":
page_size = "A4 landscape"
svg_width = 1100
svg_height = 780
radius = 250
else: # a3 (Standard)
page_size = "A3 landscape"
svg_width = 1400
svg_height = 990
radius = 320
# Wenn keine Kategorien, zeige Platzhalter
if not categories:
return f"""<!DOCTYPE html>
<html lang="de">
<head>
<meta charset="UTF-8">
<title>Mindmap - {topic}</title>
<style>
body {{ font-family: 'Comic Sans MS', cursive, sans-serif; text-align: center; padding: 50px; }}
h1 {{ color: #FF6B6B; }}
</style>
</head>
<body>
<h1>🧠 Mindmap: {topic}</h1>
<p>Noch keine Daten vorhanden. Bitte zuerst das Arbeitsblatt analysieren.</p>
</body>
</html>"""
# Farben für Verbindungslinien
num_categories = len(categories)
# SVG-Dimensionen wurden oben basierend auf format gesetzt
center_x = svg_width // 2
center_y = svg_height // 2
# Berechne Positionen der Kategorien im Kreis
category_positions = []
for i, cat in enumerate(categories):
angle = (2 * math.pi * i / num_categories) - (math.pi / 2) # Start oben
x = center_x + radius * math.cos(angle)
y = center_y + radius * math.sin(angle)
category_positions.append({
"x": x,
"y": y,
"angle": angle,
"data": cat
})
html = f"""<!DOCTYPE html>
<html lang="de">
<head>
<meta charset="UTF-8">
<title>Lernposter - {topic}</title>
<style>
@page {{
size: {page_size};
margin: 10mm;
}}
@media print {{
body {{ -webkit-print-color-adjust: exact; print-color-adjust: exact; }}
.no-print {{ display: none !important; }}
}}
* {{ box-sizing: border-box; margin: 0; padding: 0; }}
body {{
font-family: 'Comic Sans MS', 'Chalkboard SE', 'Comic Neue', cursive, sans-serif;
background: linear-gradient(135deg, #f5f7fa 0%, #e4e8f0 100%);
min-height: 100vh;
padding: 20px;
}}
.poster-container {{
width: 100%;
max-width: 1400px;
margin: 0 auto;
background: white;
border-radius: 20px;
box-shadow: 0 10px 40px rgba(0,0,0,0.1);
overflow: hidden;
}}
.poster-header {{
background: linear-gradient(90deg, #FF6B6B, #4ECDC4);
padding: 15px 30px;
display: flex;
justify-content: space-between;
align-items: center;
}}
.poster-title {{
color: white;
font-size: 24px;
text-shadow: 2px 2px 4px rgba(0,0,0,0.2);
}}
.poster-subject {{
color: white;
font-size: 16px;
opacity: 0.9;
}}
.mindmap-svg {{
width: 100%;
height: auto;
}}
.print-btn {{
position: fixed;
top: 20px;
right: 20px;
padding: 12px 24px;
background: #4ECDC4;
color: white;
border: none;
border-radius: 25px;
font-size: 16px;
cursor: pointer;
box-shadow: 0 4px 15px rgba(78, 205, 196, 0.4);
font-family: inherit;
}}
.print-btn:hover {{
transform: scale(1.05);
background: #45B7D1;
}}
/* Animationen für interaktive Version */
.category-group:hover {{
transform: scale(1.02);
cursor: pointer;
}}
.term-bubble:hover {{
transform: scale(1.1);
filter: brightness(1.1);
}}
</style>
</head>
<body>
<button class="print-btn no-print" onclick="window.print()">🖨️ Als A3 drucken</button>
<div class="poster-container">
<div class="poster-header">
<div class="poster-title">🧠 Lernposter: {topic}</div>
<div class="poster-subject">{subject}</div>
</div>
<svg class="mindmap-svg" viewBox="0 0 {svg_width} {svg_height}" xmlns="http://www.w3.org/2000/svg">
<defs>
<!-- Schatten für Bubbles -->
<filter id="shadow" x="-20%" y="-20%" width="140%" height="140%">
<feDropShadow dx="2" dy="4" stdDeviation="4" flood-opacity="0.2"/>
</filter>
<!-- Glow-Effekt für Zentrum -->
<filter id="glow">
<feGaussianBlur stdDeviation="8" result="coloredBlur"/>
<feMerge>
<feMergeNode in="coloredBlur"/>
<feMergeNode in="SourceGraphic"/>
</feMerge>
</filter>
</defs>
<!-- Hintergrund-Muster (dezente Punkte) -->
<pattern id="dots" x="0" y="0" width="30" height="30" patternUnits="userSpaceOnUse">
<circle cx="15" cy="15" r="1.5" fill="#e0e0e0"/>
</pattern>
<rect width="100%" height="100%" fill="url(#dots)"/>
<!-- Verbindungslinien vom Zentrum zu Kategorien -->
"""
# Zeichne Verbindungslinien
for pos in category_positions:
color = pos["data"].get("color", "#4ECDC4")
html += f""" <path d="M {center_x} {center_y} Q {(center_x + pos['x'])/2 + 30} {(center_y + pos['y'])/2 - 30} {pos['x']} {pos['y']}"
stroke="{color}" stroke-width="4" fill="none" stroke-linecap="round" opacity="0.6"/>
"""
# Zentrum (Hauptthema)
html += f"""
<!-- Zentrum: Hauptthema -->
<g filter="url(#glow)">
<circle cx="{center_x}" cy="{center_y}" r="85" fill="url(#centerGradient)"/>
<defs>
<radialGradient id="centerGradient" cx="30%" cy="30%">
<stop offset="0%" stop-color="#FFD93D"/>
<stop offset="100%" stop-color="#FF6B6B"/>
</radialGradient>
</defs>
<text x="{center_x}" y="{center_y - 10}" text-anchor="middle" font-size="28" font-weight="bold" fill="white">🌟</text>
<text x="{center_x}" y="{center_y + 25}" text-anchor="middle" font-size="22" font-weight="bold" fill="white">{topic}</text>
</g>
"""
# Zeichne Kategorien mit ihren Begriffen
for i, pos in enumerate(category_positions):
cat = pos["data"]
cat_x = pos["x"]
cat_y = pos["y"]
color = cat.get("color", "#4ECDC4")
emoji = cat.get("emoji", "📚")
name = cat.get("name", "Kategorie")
terms = cat.get("terms", [])
# Kategorie-Bubble
html += f"""
<!-- Kategorie: {name} -->
<g class="category-group" transform="translate({cat_x}, {cat_y})">
<ellipse cx="0" cy="0" rx="75" ry="45" fill="{color}" filter="url(#shadow)"/>
<text x="0" y="-8" text-anchor="middle" font-size="20">{emoji}</text>
<text x="0" y="18" text-anchor="middle" font-size="14" font-weight="bold" fill="white">{name}</text>
"""
# Begriffe um die Kategorie herum
term_radius = 110
num_terms = len(terms)
for j, term_data in enumerate(terms[:8]): # Max 8 Begriffe pro Kategorie
term = term_data.get("term", "")
# Berechne Position relativ zur Kategorie
# Verteile Begriffe in einem Halbkreis auf der Außenseite
base_angle = pos["angle"]
spread = math.pi * 0.8 # 80% eines Halbkreises
if num_terms > 1:
term_angle = base_angle - spread/2 + (spread * j / (num_terms - 1))
else:
term_angle = base_angle
term_x = term_radius * math.cos(term_angle - base_angle)
term_y = term_radius * math.sin(term_angle - base_angle)
# Kleine Verbindungslinie
html += f""" <line x1="0" y1="0" x2="{term_x * 0.6}" y2="{term_y * 0.6}" stroke="{color}" stroke-width="2" opacity="0.5"/>
"""
# Begriff-Bubble
bubble_width = max(70, len(term) * 8 + 20)
html += f""" <g class="term-bubble" transform="translate({term_x}, {term_y})">
<rect x="{-bubble_width/2}" y="-22" width="{bubble_width}" height="44" rx="22" fill="white" stroke="{color}" stroke-width="2" filter="url(#shadow)"/>
<text x="0" y="5" text-anchor="middle" font-size="12" font-weight="bold" fill="#333">{term}</text>
</g>
"""
html += " </g>\n"
# Legende mit Erklärungen (unten)
html += f"""
<!-- Legende -->
<g transform="translate(50, {svg_height - 80})">
<text x="0" y="0" font-size="14" font-weight="bold" fill="#666">📖 Begriffe zum Lernen:</text>
"""
legend_x = 0
for i, pos in enumerate(category_positions):
cat = pos["data"]
color = cat.get("color", "#4ECDC4")
emoji = cat.get("emoji", "📚")
name = cat.get("name", "")
terms = cat.get("terms", [])
# Zeige Kategorie mit ersten 3 Begriffen
terms_text = ", ".join([t.get("term", "") for t in terms[:3]])
if len(terms) > 3:
terms_text += "..."
html += f""" <g transform="translate({legend_x}, 25)">
<circle cx="8" cy="0" r="8" fill="{color}"/>
<text x="22" y="4" font-size="11" fill="#444"><tspan font-weight="bold">{emoji} {name}:</tspan> {terms_text}</text>
</g>
"""
legend_x += 220
html += """ </g>
</svg>
</div>
</body>
</html>"""
return html
def save_mindmap_for_worksheet(analysis_path: Path, mindmap_data: dict = None) -> Path:
"""
Speichert eine Mindmap für ein Arbeitsblatt.
Args:
analysis_path: Pfad zur *_analyse.json Datei
mindmap_data: Optional - bereits generierte Mindmap-Daten. Falls nicht angegeben, werden sie generiert.
Returns:
Pfad zur gespeicherten *_mindmap.json Datei
"""
if mindmap_data is None:
mindmap_data = generate_mindmap_data(analysis_path)
# Speichere JSON
out_name = analysis_path.stem.replace("_analyse", "") + "_mindmap.json"
out_path = BEREINIGT_DIR / out_name
out_path.write_text(json.dumps(mindmap_data, ensure_ascii=False, indent=2), encoding="utf-8")
logger.info(f"Mindmap-Daten gespeichert: {out_path.name}")
return out_path

View File

@@ -0,0 +1,824 @@
"""
AI Processing - Print Version Generator.
Generiert druckbare HTML-Versionen für verschiedene Arbeitsblatt-Typen.
"""
from pathlib import Path
import json
import random
import logging
from .core import BEREINIGT_DIR
logger = logging.getLogger(__name__)
def generate_print_version_qa(qa_path: Path, include_answers: bool = False) -> Path:
"""
Generiert eine druckbare HTML-Version der Frage-Antwort-Paare.
Args:
qa_path: Pfad zur *_qa.json Datei
include_answers: True für Lösungsblatt (für Eltern)
Returns:
Pfad zur generierten HTML-Datei
"""
if not qa_path.exists():
raise FileNotFoundError(f"Q&A-Datei nicht gefunden: {qa_path}")
qa_data = json.loads(qa_path.read_text(encoding="utf-8"))
items = qa_data.get("qa_items", [])
metadata = qa_data.get("metadata", {})
title = metadata.get("source_title", "Arbeitsblatt")
subject = metadata.get("subject", "")
grade = metadata.get("grade_level", "")
html_parts = []
html_parts.append("""<!DOCTYPE html>
<html lang="de">
<head>
<meta charset="UTF-8">
<title>""" + title + """ - Fragen</title>
<style>
@media print {
.no-print { display: none; }
.page-break { page-break-before: always; }
}
body {
font-family: Arial, sans-serif;
max-width: 800px;
margin: 40px auto;
padding: 20px;
line-height: 1.6;
}
h1 { font-size: 24px; margin-bottom: 8px; }
.meta { color: #666; margin-bottom: 24px; }
.question-block {
margin-bottom: 32px;
padding-bottom: 16px;
border-bottom: 1px dashed #ccc;
}
.question-number {
font-weight: bold;
color: #333;
}
.question-text {
font-size: 16px;
margin: 8px 0;
}
.answer-space {
border: 1px solid #ddd;
min-height: 60px;
margin-top: 12px;
background: #fafafa;
}
.answer-lines {
margin-top: 12px;
}
.answer-line {
border-bottom: 1px solid #999;
height: 28px;
}
.answer {
margin-top: 8px;
padding: 8px;
background: #e8f5e9;
border-left: 3px solid #4caf50;
}
.key-terms {
font-size: 12px;
color: #666;
margin-top: 8px;
}
.key-terms span {
background: #fff3e0;
padding: 2px 6px;
border-radius: 3px;
margin-right: 4px;
}
</style>
</head>
<body>
""")
# Header
version_text = "Lösungsblatt" if include_answers else "Fragenblatt"
html_parts.append(f"<h1>{title} - {version_text}</h1>")
meta_parts = []
if subject:
meta_parts.append(f"Fach: {subject}")
if grade:
meta_parts.append(f"Klasse: {grade}")
meta_parts.append(f"Anzahl Fragen: {len(items)}")
html_parts.append(f"<div class='meta'>{' | '.join(meta_parts)}</div>")
# Fragen
for idx, item in enumerate(items, 1):
html_parts.append("<div class='question-block'>")
html_parts.append(f"<div class='question-number'>Frage {idx}</div>")
html_parts.append(f"<div class='question-text'>{item.get('question', '')}</div>")
if include_answers:
# Lösungsblatt: Antwort anzeigen
html_parts.append(f"<div class='answer'><strong>Antwort:</strong> {item.get('answer', '')}</div>")
# Schlüsselbegriffe
key_terms = item.get("key_terms", [])
if key_terms:
terms_html = " ".join([f"<span>{term}</span>" for term in key_terms])
html_parts.append(f"<div class='key-terms'>Wichtige Begriffe: {terms_html}</div>")
else:
# Fragenblatt: Antwortlinien
html_parts.append("<div class='answer-lines'>")
for _ in range(3):
html_parts.append("<div class='answer-line'></div>")
html_parts.append("</div>")
html_parts.append("</div>")
html_parts.append("</body></html>")
# Speichern
suffix = "_qa_solutions.html" if include_answers else "_qa_print.html"
out_name = qa_path.stem.replace("_qa", "") + suffix
out_path = BEREINIGT_DIR / out_name
out_path.write_text("\n".join(html_parts), encoding="utf-8")
logger.info(f"Print-Version gespeichert: {out_path.name}")
return out_path
def generate_print_version_cloze(cloze_path: Path, include_answers: bool = False) -> Path:
"""
Generiert eine druckbare HTML-Version der Lückentexte.
Args:
cloze_path: Pfad zur *_cloze.json Datei
include_answers: True für Lösungsblatt (für Eltern)
Returns:
Pfad zur generierten HTML-Datei
"""
if not cloze_path.exists():
raise FileNotFoundError(f"Cloze-Datei nicht gefunden: {cloze_path}")
cloze_data = json.loads(cloze_path.read_text(encoding="utf-8"))
items = cloze_data.get("cloze_items", [])
metadata = cloze_data.get("metadata", {})
title = metadata.get("source_title", "Arbeitsblatt")
subject = metadata.get("subject", "")
grade = metadata.get("grade_level", "")
total_gaps = metadata.get("total_gaps", 0)
html_parts = []
html_parts.append("""<!DOCTYPE html>
<html lang="de">
<head>
<meta charset="UTF-8">
<title>""" + title + """ - Lückentext</title>
<style>
@media print {
.no-print { display: none; }
.page-break { page-break-before: always; }
}
body {
font-family: Arial, sans-serif;
max-width: 800px;
margin: 40px auto;
padding: 20px;
line-height: 1.8;
}
h1 { font-size: 24px; margin-bottom: 8px; }
.meta { color: #666; margin-bottom: 24px; }
.cloze-item {
margin-bottom: 24px;
padding: 16px;
background: #f9f9f9;
border-radius: 8px;
}
.cloze-number {
font-weight: bold;
color: #333;
margin-bottom: 8px;
}
.cloze-sentence {
font-size: 16px;
line-height: 2;
}
.gap {
display: inline-block;
min-width: 80px;
border-bottom: 2px solid #333;
margin: 0 4px;
text-align: center;
}
.gap-filled {
display: inline-block;
padding: 2px 8px;
background: #e8f5e9;
border: 1px solid #4caf50;
border-radius: 4px;
font-weight: bold;
}
.translation {
margin-top: 12px;
padding: 8px;
background: #e3f2fd;
border-left: 3px solid #2196f3;
font-size: 14px;
color: #555;
}
.translation-label {
font-size: 12px;
color: #777;
margin-bottom: 4px;
}
.word-bank {
margin-top: 32px;
padding: 16px;
background: #fff3e0;
border-radius: 8px;
}
.word-bank-title {
font-weight: bold;
margin-bottom: 12px;
}
.word {
display: inline-block;
padding: 4px 12px;
margin: 4px;
background: white;
border: 1px solid #ddd;
border-radius: 4px;
}
</style>
</head>
<body>
""")
# Header
version_text = "Lösungsblatt" if include_answers else "Lückentext"
html_parts.append(f"<h1>{title} - {version_text}</h1>")
meta_parts = []
if subject:
meta_parts.append(f"Fach: {subject}")
if grade:
meta_parts.append(f"Klasse: {grade}")
meta_parts.append(f"Lücken gesamt: {total_gaps}")
html_parts.append(f"<div class='meta'>{' | '.join(meta_parts)}</div>")
# Sammle alle Lückenwörter für Wortbank
all_words = []
# Lückentexte
for idx, item in enumerate(items, 1):
html_parts.append("<div class='cloze-item'>")
html_parts.append(f"<div class='cloze-number'>{idx}.</div>")
gaps = item.get("gaps", [])
sentence = item.get("sentence_with_gaps", "")
if include_answers:
# Lösungsblatt: Lücken mit Antworten füllen
for gap in gaps:
word = gap.get("word", "")
sentence = sentence.replace("___", f"<span class='gap-filled'>{word}</span>", 1)
else:
# Fragenblatt: Lücken als Linien
sentence = sentence.replace("___", "<span class='gap'>&nbsp;</span>")
# Wörter für Wortbank sammeln
for gap in gaps:
all_words.append(gap.get("word", ""))
html_parts.append(f"<div class='cloze-sentence'>{sentence}</div>")
# Übersetzung anzeigen
translation = item.get("translation", {})
if translation:
lang_name = translation.get("language_name", "Übersetzung")
full_sentence = translation.get("full_sentence", "")
if full_sentence:
html_parts.append("<div class='translation'>")
html_parts.append(f"<div class='translation-label'>{lang_name}:</div>")
html_parts.append(full_sentence)
html_parts.append("</div>")
html_parts.append("</div>")
# Wortbank (nur für Fragenblatt)
if not include_answers and all_words:
random.shuffle(all_words) # Mische die Wörter
html_parts.append("<div class='word-bank'>")
html_parts.append("<div class='word-bank-title'>Wortbank (diese Wörter fehlen):</div>")
for word in all_words:
html_parts.append(f"<span class='word'>{word}</span>")
html_parts.append("</div>")
html_parts.append("</body></html>")
# Speichern
suffix = "_cloze_solutions.html" if include_answers else "_cloze_print.html"
out_name = cloze_path.stem.replace("_cloze", "") + suffix
out_path = BEREINIGT_DIR / out_name
out_path.write_text("\n".join(html_parts), encoding="utf-8")
logger.info(f"Cloze Print-Version gespeichert: {out_path.name}")
return out_path
def generate_print_version_mc(mc_path: Path, include_answers: bool = False) -> str:
"""
Generiert eine druckbare HTML-Version der Multiple-Choice-Fragen.
Args:
mc_path: Pfad zur *_mc.json Datei
include_answers: True für Lösungsblatt mit markierten richtigen Antworten
Returns:
HTML-String (zum direkten Ausliefern)
"""
if not mc_path.exists():
raise FileNotFoundError(f"MC-Datei nicht gefunden: {mc_path}")
mc_data = json.loads(mc_path.read_text(encoding="utf-8"))
questions = mc_data.get("questions", [])
metadata = mc_data.get("metadata", {})
title = metadata.get("source_title", "Arbeitsblatt")
subject = metadata.get("subject", "")
grade = metadata.get("grade_level", "")
html_parts = []
html_parts.append("""<!DOCTYPE html>
<html lang="de">
<head>
<meta charset="UTF-8">
<title>""" + title + """ - Multiple Choice</title>
<style>
@media print {
.no-print { display: none; }
.page-break { page-break-before: always; }
body { font-size: 14pt; }
}
body {
font-family: Arial, Helvetica, sans-serif;
max-width: 800px;
margin: 40px auto;
padding: 20px;
line-height: 1.6;
color: #000;
}
h1 {
font-size: 28px;
margin-bottom: 8px;
border-bottom: 2px solid #000;
padding-bottom: 8px;
}
.meta {
color: #333;
margin-bottom: 32px;
font-size: 14px;
}
.instructions {
background: #f5f5f5;
padding: 12px 16px;
border-radius: 4px;
margin-bottom: 24px;
font-size: 14px;
}
.question-block {
margin-bottom: 28px;
padding-bottom: 16px;
border-bottom: 1px solid #ddd;
}
.question-number {
font-weight: bold;
font-size: 18px;
color: #000;
margin-bottom: 8px;
}
.question-text {
font-size: 16px;
margin: 8px 0 16px 0;
line-height: 1.5;
}
.options {
margin-left: 20px;
}
.option {
display: flex;
align-items: flex-start;
margin-bottom: 12px;
padding: 8px 12px;
border: 1px solid #ccc;
border-radius: 4px;
background: #fff;
}
.option-correct {
background: #e8f5e9;
border-color: #4caf50;
border-width: 2px;
}
.option-checkbox {
width: 20px;
height: 20px;
border: 2px solid #333;
border-radius: 50%;
margin-right: 12px;
flex-shrink: 0;
display: flex;
align-items: center;
justify-content: center;
}
.option-checkbox.checked::after {
content: "";
font-weight: bold;
color: #4caf50;
}
.option-label {
font-weight: bold;
margin-right: 8px;
min-width: 24px;
}
.option-text {
flex: 1;
}
.explanation {
margin-top: 8px;
padding: 8px 12px;
background: #e3f2fd;
border-left: 3px solid #2196f3;
font-size: 13px;
color: #333;
}
.answer-key {
margin-top: 40px;
padding: 16px;
background: #f5f5f5;
border-radius: 8px;
}
.answer-key-title {
font-weight: bold;
font-size: 18px;
margin-bottom: 12px;
border-bottom: 1px solid #999;
padding-bottom: 8px;
}
.answer-key-grid {
display: grid;
grid-template-columns: repeat(5, 1fr);
gap: 8px;
}
.answer-key-item {
padding: 8px;
text-align: center;
background: white;
border: 1px solid #ddd;
border-radius: 4px;
}
.answer-key-q {
font-weight: bold;
}
.answer-key-a {
color: #4caf50;
font-weight: bold;
}
</style>
</head>
<body>
""")
# Header
version_text = "Lösungsblatt" if include_answers else "Multiple Choice Test"
html_parts.append(f"<h1>{title}</h1>")
html_parts.append(f"<div class='meta'><strong>{version_text}</strong>")
if subject:
html_parts.append(f" | Fach: {subject}")
if grade:
html_parts.append(f" | Klasse: {grade}")
html_parts.append(f" | Anzahl Fragen: {len(questions)}</div>")
if not include_answers:
html_parts.append("<div class='instructions'>")
html_parts.append("<strong>Anleitung:</strong> Kreuze bei jeder Frage die richtige Antwort an. ")
html_parts.append("Es ist immer nur eine Antwort richtig.")
html_parts.append("</div>")
# Fragen
for idx, q in enumerate(questions, 1):
html_parts.append("<div class='question-block'>")
html_parts.append(f"<div class='question-number'>Frage {idx}</div>")
html_parts.append(f"<div class='question-text'>{q.get('question', '')}</div>")
html_parts.append("<div class='options'>")
correct_answer = q.get("correct_answer", "")
for opt in q.get("options", []):
opt_id = opt.get("id", "")
is_correct = opt_id == correct_answer
opt_class = "option"
checkbox_class = "option-checkbox"
if include_answers and is_correct:
opt_class += " option-correct"
checkbox_class += " checked"
html_parts.append(f"<div class='{opt_class}'>")
html_parts.append(f"<div class='{checkbox_class}'></div>")
html_parts.append(f"<span class='option-label'>{opt_id})</span>")
html_parts.append(f"<span class='option-text'>{opt.get('text', '')}</span>")
html_parts.append("</div>")
html_parts.append("</div>")
# Erklärung nur bei Lösungsblatt
if include_answers and q.get("explanation"):
html_parts.append(f"<div class='explanation'><strong>Erklärung:</strong> {q.get('explanation')}</div>")
html_parts.append("</div>")
# Lösungsschlüssel (kompakt) - nur bei Lösungsblatt
if include_answers:
html_parts.append("<div class='answer-key'>")
html_parts.append("<div class='answer-key-title'>Lösungsschlüssel</div>")
html_parts.append("<div class='answer-key-grid'>")
for idx, q in enumerate(questions, 1):
html_parts.append("<div class='answer-key-item'>")
html_parts.append(f"<span class='answer-key-q'>{idx}.</span> ")
html_parts.append(f"<span class='answer-key-a'>{q.get('correct_answer', '')}</span>")
html_parts.append("</div>")
html_parts.append("</div>")
html_parts.append("</div>")
html_parts.append("</body></html>")
return "\n".join(html_parts)
def generate_print_version_worksheet(analysis_path: Path) -> str:
"""
Generiert eine druckoptimierte HTML-Version des Arbeitsblatts.
Eigenschaften:
- Große, gut lesbare Schrift (16pt)
- Schwarz-weiß / Graustufen-tauglich
- Klare Struktur für Druck
- Keine interaktiven Elemente
Args:
analysis_path: Pfad zur *_analyse.json Datei
Returns:
HTML-String zum direkten Ausliefern
"""
if not analysis_path.exists():
raise FileNotFoundError(f"Analysedatei nicht gefunden: {analysis_path}")
try:
data = json.loads(analysis_path.read_text(encoding="utf-8"))
except json.JSONDecodeError as e:
raise RuntimeError(f"Analyse-Datei enthält kein gültiges JSON: {analysis_path}\n{e}") from e
title = data.get("title") or "Arbeitsblatt"
subject = data.get("subject") or ""
grade_level = data.get("grade_level") or ""
instructions = data.get("instructions") or ""
tasks = data.get("tasks", []) or []
canonical_text = data.get("canonical_text") or ""
printed_blocks = data.get("printed_blocks") or []
html_parts = []
html_parts.append("""<!DOCTYPE html>
<html lang="de">
<head>
<meta charset="UTF-8">
<title>""" + title + """</title>
<style>
@page {
size: A4;
margin: 20mm;
}
@media print {
body {
font-size: 14pt !important;
-webkit-print-color-adjust: exact;
print-color-adjust: exact;
}
.no-print { display: none !important; }
.page-break { page-break-before: always; }
}
* { box-sizing: border-box; }
body {
font-family: Arial, "Helvetica Neue", sans-serif;
max-width: 800px;
margin: 0 auto;
padding: 30px;
line-height: 1.7;
font-size: 16px;
color: #000;
background: #fff;
}
h1 {
font-size: 28px;
margin: 0 0 8px 0;
padding-bottom: 8px;
border-bottom: 3px solid #000;
}
h2 {
font-size: 20px;
margin: 28px 0 12px 0;
padding-bottom: 4px;
border-bottom: 1px solid #666;
}
.meta {
font-size: 14px;
color: #333;
margin-bottom: 20px;
padding: 8px 0;
}
.meta span {
margin-right: 20px;
}
.instructions {
margin: 20px 0;
padding: 16px;
border: 2px solid #333;
background: #f5f5f5;
font-size: 15px;
}
.instructions-label {
font-weight: bold;
margin-bottom: 8px;
}
.text-section {
margin: 24px 0;
}
.text-block {
margin-bottom: 16px;
text-align: justify;
}
.text-block-title {
font-weight: bold;
font-size: 17px;
margin-bottom: 8px;
}
.task-section {
margin-top: 32px;
}
.task {
margin-bottom: 24px;
padding: 16px;
border: 1px solid #999;
background: #fafafa;
}
.task-header {
font-weight: bold;
font-size: 16px;
margin-bottom: 12px;
padding-bottom: 8px;
border-bottom: 1px dashed #666;
}
.task-content {
font-size: 15px;
}
.gap-line {
display: inline-block;
border-bottom: 2px solid #000;
min-width: 100px;
margin: 0 6px;
}
.answer-lines {
margin-top: 16px;
}
.answer-line {
border-bottom: 1px solid #333;
height: 36px;
margin-bottom: 4px;
}
.footer {
margin-top: 40px;
padding-top: 16px;
border-top: 1px solid #ccc;
font-size: 11px;
color: #666;
text-align: center;
}
/* Print Button - versteckt beim Drucken */
.print-button {
position: fixed;
top: 20px;
right: 20px;
padding: 12px 24px;
background: #333;
color: #fff;
border: none;
border-radius: 6px;
cursor: pointer;
font-size: 14px;
}
.print-button:hover {
background: #555;
}
</style>
</head>
<body>
<button class="print-button no-print" onclick="window.print()">🖨️ Drucken</button>
""")
# Titel
html_parts.append(f"<h1>{title}</h1>")
# Meta-Informationen
meta_parts = []
if subject:
meta_parts.append(f"<span><strong>Fach:</strong> {subject}</span>")
if grade_level:
meta_parts.append(f"<span><strong>Klasse:</strong> {grade_level}</span>")
if meta_parts:
html_parts.append(f"<div class='meta'>{''.join(meta_parts)}</div>")
# Arbeitsanweisung
if instructions:
html_parts.append("<div class='instructions'>")
html_parts.append("<div class='instructions-label'>Arbeitsanweisung:</div>")
html_parts.append(f"<div>{instructions}</div>")
html_parts.append("</div>")
# Haupttext / gedruckte Blöcke
if printed_blocks:
html_parts.append("<section class='text-section'>")
for block in printed_blocks:
role = (block.get("role") or "body").lower()
text = (block.get("text") or "").strip()
if not text:
continue
if role == "title":
html_parts.append(f"<div class='text-block'><div class='text-block-title'>{text}</div></div>")
else:
html_parts.append(f"<div class='text-block'>{text}</div>")
html_parts.append("</section>")
elif canonical_text:
html_parts.append("<section class='text-section'>")
paragraphs = [
p.strip()
for p in canonical_text.replace("\r\n", "\n").split("\n\n")
if p.strip()
]
for p in paragraphs:
html_parts.append(f"<div class='text-block'>{p}</div>")
html_parts.append("</section>")
# Aufgaben
if tasks:
html_parts.append("<section class='task-section'>")
html_parts.append("<h2>Aufgaben</h2>")
for idx, task in enumerate(tasks, start=1):
t_type = task.get("type") or "Aufgabe"
desc = task.get("description") or ""
text_with_gaps = task.get("text_with_gaps")
html_parts.append("<div class='task'>")
# Task-Header
type_label = {
"fill_in_blank": "Lückentext",
"multiple_choice": "Multiple Choice",
"free_text": "Freitext",
"matching": "Zuordnung",
"labeling": "Beschriftung",
"calculation": "Rechnung",
"other": "Aufgabe"
}.get(t_type, t_type)
html_parts.append(f"<div class='task-header'>Aufgabe {idx}: {type_label}</div>")
if desc:
html_parts.append(f"<div class='task-content'>{desc}</div>")
if text_with_gaps:
rendered = text_with_gaps.replace("___", "<span class='gap-line'>&nbsp;</span>")
html_parts.append(f"<div class='task-content' style='margin-top:12px;'>{rendered}</div>")
# Antwortlinien für Freitext-Aufgaben
if t_type in ["free_text", "other"] or (not text_with_gaps and not desc):
html_parts.append("<div class='answer-lines'>")
for _ in range(3):
html_parts.append("<div class='answer-line'></div>")
html_parts.append("</div>")
html_parts.append("</div>")
html_parts.append("</section>")
# Fußzeile
html_parts.append("<div class='footer'>")
html_parts.append("Dieses Arbeitsblatt wurde automatisch aus einem Scan rekonstruiert.")
html_parts.append("</div>")
html_parts.append("</body></html>")
return "\n".join(html_parts)

View File

@@ -0,0 +1,333 @@
"""
AI Processing - Q&A Generator.
Generiert Frage-Antwort-Paare mit Leitner-System-Vorbereitung.
"""
from pathlib import Path
import json
import os
import requests
import logging
from .core import (
get_openai_api_key,
get_vision_api,
BEREINIGT_DIR,
)
logger = logging.getLogger(__name__)
def _generate_qa_with_openai(analysis_data: dict, num_questions: int = 8) -> dict:
"""
Generiert Frage-Antwort-Paare basierend auf der Arbeitsblatt-Analyse.
Wichtige didaktische Anforderungen:
- Fragen basieren fast wörtlich auf dem vorhandenen Stoff
- Nur minimale Umformulierung erlaubt
- Schlüsselwörter/Fachbegriffe werden als wichtig markiert
- Schwierigkeitsgrad entspricht dem Original (grade_level)
Args:
analysis_data: Die Analyse-JSON des Arbeitsblatts
num_questions: Anzahl der zu generierenden Fragen (Standard: 8)
Returns:
Dict mit qa_items und metadata
"""
api_key = get_openai_api_key()
# Extrahiere relevante Inhalte
title = analysis_data.get("title") or "Arbeitsblatt"
subject = analysis_data.get("subject") or "Allgemein"
grade_level = analysis_data.get("grade_level") or "unbekannt"
canonical_text = analysis_data.get("canonical_text") or ""
printed_blocks = analysis_data.get("printed_blocks") or []
tasks = analysis_data.get("tasks") or []
# Baue Textinhalt zusammen
content_parts = []
if canonical_text:
content_parts.append(canonical_text)
for block in printed_blocks:
text = block.get("text", "").strip()
if text and text not in content_parts:
content_parts.append(text)
# Aufgaben-Texte hinzufügen
for task in tasks:
desc = task.get("description", "").strip()
text = task.get("text_with_gaps", "").strip()
if desc:
content_parts.append(f"Aufgabe: {desc}")
if text:
content_parts.append(text)
worksheet_content = "\n\n".join(content_parts)
if not worksheet_content.strip():
logger.warning("Kein Textinhalt für Q&A-Generierung gefunden")
return {"qa_items": [], "metadata": {"error": "Kein Textinhalt gefunden"}}
url = "https://api.openai.com/v1/chat/completions"
headers = {"Authorization": f"Bearer {api_key}", "Content-Type": "application/json"}
system_prompt = f"""Du bist ein erfahrener Pädagoge, der Frage-Antwort-Paare für Schüler erstellt.
WICHTIGE REGELN:
1. INHALTE NUR AUS DEM TEXT:
- Verwende FAST WÖRTLICH den vorhandenen Stoff
- Du darfst nur minimal umformulieren (z.B. "Beschreibe...""Erkläre in eigenen Worten...")
- KEINE neuen Fakten oder Inhalte einführen!
- Alles muss aus dem gegebenen Text ableitbar sein
2. SCHWIERIGKEITSGRAD:
- Niveau muss exakt "{grade_level}" entsprechen
- Fragen altersgerecht formulieren
3. SCHLÜSSELWÖRTER MARKIEREN:
- Identifiziere wichtige Fachbegriffe als "key_terms"
- Diese Begriffe sind besonders wichtig für die Wiederholung
- Beispiele: Netzhaut, Linse, Pupille (beim Thema Auge)
4. FRAGETYPEN:
- Wissensfragen: "Was ist...?", "Nenne..."
- Verständnisfragen: "Erkläre...", "Beschreibe..."
- Anwendungsfragen: "Warum...?", "Was passiert, wenn...?"
5. ANTWORT-FORMAT:
- Kurze, präzise Antworten (1-3 Sätze)
- Die Antwort muss direkt aus dem Text stammen
6. AUSGABE: Nur gültiges JSON, kein Markdown."""
user_prompt = f"""Erstelle {num_questions} Frage-Antwort-Paare aus diesem Arbeitsblatt:
TITEL: {title}
FACH: {subject}
KLASSENSTUFE: {grade_level}
TEXT:
{worksheet_content}
Gib das Ergebnis als JSON zurück:
{{
"qa_items": [
{{
"id": "qa1",
"question": "Die Frage hier (fast wörtlich aus dem Text)",
"answer": "Die korrekte Antwort (direkt aus dem Text)",
"question_type": "knowledge" | "understanding" | "application",
"key_terms": ["wichtiger Begriff 1", "wichtiger Begriff 2"],
"difficulty": 1-3,
"source_hint": "Kurzer Hinweis, wo im Text die Antwort steht",
"leitner_box": 0
}}
],
"metadata": {{
"subject": "{subject}",
"grade_level": "{grade_level}",
"source_title": "{title}",
"total_questions": {num_questions},
"key_terms_summary": ["alle", "wichtigen", "Fachbegriffe", "gesammelt"]
}}
}}
WICHTIG:
- Alle Antworten müssen aus dem Text ableitbar sein!
- "leitner_box": 0 bedeutet "neu" (noch nicht gelernt)
- "difficulty": 1=leicht, 2=mittel, 3=schwer (passend zu {grade_level})
- "key_terms" sind die wichtigsten Wörter, die der Schüler lernen soll"""
payload = {
"model": "gpt-4o-mini",
"response_format": {"type": "json_object"},
"messages": [
{"role": "system", "content": system_prompt},
{"role": "user", "content": user_prompt},
],
"max_tokens": 3000,
"temperature": 0.5,
}
response = requests.post(url, headers=headers, json=payload)
response.raise_for_status()
data = response.json()
try:
content = data["choices"][0]["message"]["content"]
qa_data = json.loads(content)
except (KeyError, json.JSONDecodeError) as e:
raise RuntimeError(f"Fehler bei Q&A-Generierung: {e}")
# Initialisiere Leitner-Box Felder für alle Items
for item in qa_data.get("qa_items", []):
if "leitner_box" not in item:
item["leitner_box"] = 0 # 0=neu, 1=gelernt, 2=gefestigt
if "correct_count" not in item:
item["correct_count"] = 0
if "incorrect_count" not in item:
item["incorrect_count"] = 0
if "last_seen" not in item:
item["last_seen"] = None
if "next_review" not in item:
item["next_review"] = None
return qa_data
def _generate_qa_with_claude(analysis_data: dict, num_questions: int = 8) -> dict:
"""
Generiert Frage-Antwort-Paare mit Claude API.
"""
import anthropic
api_key = os.getenv("ANTHROPIC_API_KEY")
if not api_key:
raise RuntimeError("ANTHROPIC_API_KEY ist nicht gesetzt.")
client = anthropic.Anthropic(api_key=api_key)
# Extrahiere relevante Inhalte
title = analysis_data.get("title") or "Arbeitsblatt"
subject = analysis_data.get("subject") or "Allgemein"
grade_level = analysis_data.get("grade_level") or "unbekannt"
canonical_text = analysis_data.get("canonical_text") or ""
printed_blocks = analysis_data.get("printed_blocks") or []
tasks = analysis_data.get("tasks") or []
content_parts = []
if canonical_text:
content_parts.append(canonical_text)
for block in printed_blocks:
text = block.get("text", "").strip()
if text and text not in content_parts:
content_parts.append(text)
for task in tasks:
desc = task.get("description", "").strip()
if desc:
content_parts.append(f"Aufgabe: {desc}")
worksheet_content = "\n\n".join(content_parts)
if not worksheet_content.strip():
return {"qa_items": [], "metadata": {"error": "Kein Textinhalt gefunden"}}
prompt = f"""Erstelle {num_questions} Frage-Antwort-Paare aus diesem Arbeitsblatt.
WICHTIGE REGELN:
1. Verwende FAST WÖRTLICH den vorhandenen Stoff - KEINE neuen Fakten!
2. Schwierigkeitsgrad: exakt "{grade_level}"
3. Markiere wichtige Fachbegriffe als "key_terms"
TITEL: {title}
FACH: {subject}
KLASSENSTUFE: {grade_level}
TEXT:
{worksheet_content}
Antworte NUR mit diesem JSON:
{{
"qa_items": [
{{
"id": "qa1",
"question": "Frage (fast wörtlich aus Text)",
"answer": "Antwort (direkt aus Text)",
"question_type": "knowledge",
"key_terms": ["Begriff1", "Begriff2"],
"difficulty": 1,
"source_hint": "Wo im Text",
"leitner_box": 0
}}
],
"metadata": {{
"subject": "{subject}",
"grade_level": "{grade_level}",
"source_title": "{title}",
"total_questions": {num_questions},
"key_terms_summary": ["alle", "Fachbegriffe"]
}}
}}"""
message = client.messages.create(
model="claude-3-5-sonnet-20241022",
max_tokens=3000,
messages=[{"role": "user", "content": prompt}]
)
content = message.content[0].text
try:
if "```json" in content:
content = content.split("```json")[1].split("```")[0]
elif "```" in content:
content = content.split("```")[1].split("```")[0]
qa_data = json.loads(content.strip())
except json.JSONDecodeError as e:
raise RuntimeError(f"Claude hat ungültiges JSON geliefert: {e}")
# Initialisiere Leitner-Box Felder
for item in qa_data.get("qa_items", []):
if "leitner_box" not in item:
item["leitner_box"] = 0
if "correct_count" not in item:
item["correct_count"] = 0
if "incorrect_count" not in item:
item["incorrect_count"] = 0
if "last_seen" not in item:
item["last_seen"] = None
if "next_review" not in item:
item["next_review"] = None
return qa_data
def generate_qa_from_analysis(analysis_path: Path, num_questions: int = 8) -> Path:
"""
Generiert Frage-Antwort-Paare aus einer Analyse-JSON-Datei.
Die Q&A-Paare werden:
- Fast wörtlich aus dem Originaltext erstellt
- Mit Leitner-Box-System für Wiederholung vorbereitet
- Mit Schlüsselbegriffen für Festigung markiert
Args:
analysis_path: Pfad zur *_analyse.json Datei
num_questions: Anzahl der zu generierenden Fragen
Returns:
Pfad zur generierten *_qa.json Datei
"""
if not analysis_path.exists():
raise FileNotFoundError(f"Analysedatei nicht gefunden: {analysis_path}")
try:
analysis_data = json.loads(analysis_path.read_text(encoding="utf-8"))
except json.JSONDecodeError as e:
raise RuntimeError(f"Ungültige Analyse-JSON: {e}")
logger.info(f"Generiere Q&A-Paare für: {analysis_path.name}")
vision_api = get_vision_api()
# Generiere Q&A (nutze konfigurierte API)
if vision_api == "claude":
try:
qa_data = _generate_qa_with_claude(analysis_data, num_questions)
except Exception as e:
logger.warning(f"Claude Q&A-Generierung fehlgeschlagen, nutze OpenAI: {e}")
qa_data = _generate_qa_with_openai(analysis_data, num_questions)
else:
qa_data = _generate_qa_with_openai(analysis_data, num_questions)
# Speichere Q&A-Daten
out_name = analysis_path.stem.replace("_analyse", "") + "_qa.json"
out_path = BEREINIGT_DIR / out_name
out_path.write_text(json.dumps(qa_data, ensure_ascii=False, indent=2), encoding="utf-8")
logger.info(f"Q&A-Paare gespeichert: {out_path.name}")
return out_path

81
backend/ai_processor.py Normal file
View File

@@ -0,0 +1,81 @@
"""
AI Processor - Legacy Import Wrapper
This file provides backward compatibility for code that imports from ai_processor.
All functionality has been moved to the ai_processor/ module.
Usage (new):
from ai_processor import analyze_scan_structure_with_ai
Usage (legacy, still works):
from ai_processor import analyze_scan_structure_with_ai
"""
# Re-export everything from the new modular structure
from ai_processor import (
# Configuration
BASE_DIR,
EINGANG_DIR,
BEREINIGT_DIR,
VISION_API,
# Utilities (with legacy aliases)
encode_image_to_data_url as _encode_image_to_data_url,
dummy_process_scan,
# Vision - Scan Analysis
analyze_scan_structure_with_ai,
describe_scan_with_ai,
remove_handwriting_from_scan,
build_clean_html_from_analysis,
# Generators - Multiple Choice
generate_mc_from_analysis,
# Generators - Cloze
generate_cloze_from_analysis,
# Generators - Q&A with Leitner
generate_qa_from_analysis,
update_leitner_progress,
get_next_review_items,
# Export - Print Versions
generate_print_version_qa,
generate_print_version_cloze,
generate_print_version_mc,
generate_print_version_worksheet,
# Visualization - Mindmap
generate_mindmap_data,
generate_mindmap_html,
save_mindmap_for_worksheet,
)
# Legacy function alias
from ai_processor import get_openai_api_key as _get_api_key
__all__ = [
# Configuration
"BASE_DIR",
"EINGANG_DIR",
"BEREINIGT_DIR",
"VISION_API",
# Legacy private functions
"_get_api_key",
"_encode_image_to_data_url",
# Vision
"analyze_scan_structure_with_ai",
"describe_scan_with_ai",
"remove_handwriting_from_scan",
"build_clean_html_from_analysis",
"dummy_process_scan",
# Generators
"generate_mc_from_analysis",
"generate_cloze_from_analysis",
"generate_qa_from_analysis",
"update_leitner_progress",
"get_next_review_items",
# Export
"generate_print_version_qa",
"generate_print_version_cloze",
"generate_print_version_mc",
"generate_print_version_worksheet",
# Visualization
"generate_mindmap_data",
"generate_mindmap_html",
"save_mindmap_for_worksheet",
]

View File

@@ -0,0 +1,106 @@
"""
AI Processor Module
A modular AI-powered worksheet processing system for:
- Vision-based analysis
- Content generation (MC, Cloze, Q&A)
- Print version export
- Mindmap visualization
Usage:
from ai_processor import analyze_scan_structure_with_ai, generate_mc_from_analysis
"""
# Configuration
from .config import (
BASE_DIR,
EINGANG_DIR,
BEREINIGT_DIR,
VISION_API,
get_openai_api_key,
get_anthropic_api_key,
ensure_directories,
)
# Utilities
from .utils import (
encode_image_to_data_url,
encode_image_to_base64,
dummy_process_scan,
get_media_type,
)
# Vision - Scan Analysis
from .vision import (
analyze_scan_structure_with_ai,
describe_scan_with_ai,
remove_handwriting_from_scan,
build_clean_html_from_analysis,
)
# Generators
from .generators import (
generate_mc_from_analysis,
generate_cloze_from_analysis,
generate_qa_from_analysis,
update_leitner_progress,
get_next_review_items,
)
# Export - Print Versions
from .export import (
generate_print_version_qa,
generate_print_version_cloze,
generate_print_version_mc,
generate_print_version_worksheet,
)
# Visualization - Mindmap
from .visualization import (
generate_mindmap_data,
generate_mindmap_html,
save_mindmap_for_worksheet,
)
# Legacy aliases for backward compatibility
_get_api_key = get_openai_api_key
_encode_image_to_data_url = encode_image_to_data_url
__all__ = [
# Config
"BASE_DIR",
"EINGANG_DIR",
"BEREINIGT_DIR",
"VISION_API",
"get_openai_api_key",
"get_anthropic_api_key",
"ensure_directories",
# Utils
"encode_image_to_data_url",
"encode_image_to_base64",
"dummy_process_scan",
"get_media_type",
# Vision
"analyze_scan_structure_with_ai",
"describe_scan_with_ai",
"remove_handwriting_from_scan",
"build_clean_html_from_analysis",
# Generators
"generate_mc_from_analysis",
"generate_cloze_from_analysis",
"generate_qa_from_analysis",
"update_leitner_progress",
"get_next_review_items",
# Export
"generate_print_version_qa",
"generate_print_version_cloze",
"generate_print_version_mc",
"generate_print_version_worksheet",
# Visualization
"generate_mindmap_data",
"generate_mindmap_html",
"save_mindmap_for_worksheet",
# Legacy aliases
"_get_api_key",
"_encode_image_to_data_url",
]

View File

@@ -0,0 +1,43 @@
"""
AI Processor - Configuration
API keys, constants, and directory paths.
"""
from pathlib import Path
import os
import logging
logger = logging.getLogger(__name__)
# Directory Configuration
BASE_DIR = Path.home() / "Arbeitsblaetter"
EINGANG_DIR = BASE_DIR / "Eingang"
BEREINIGT_DIR = BASE_DIR / "Bereinigt"
# Vision API Configuration
# Set VISION_API environment variable to "openai" or "claude" (default: claude)
VISION_API = os.getenv("VISION_API", "claude").lower()
def get_openai_api_key() -> str:
"""Get OpenAI API key from environment."""
api_key = os.getenv("OPENAI_API_KEY")
if not api_key:
raise RuntimeError("OPENAI_API_KEY ist nicht gesetzt. Bitte API-Schluessel als Umgebungsvariable setzen.")
return api_key
def get_anthropic_api_key() -> str:
"""Get Anthropic API key from environment."""
api_key = os.getenv("ANTHROPIC_API_KEY")
if not api_key:
raise RuntimeError("ANTHROPIC_API_KEY ist nicht gesetzt.")
return api_key
# Ensure directories exist
def ensure_directories():
"""Create directories if they don't exist."""
EINGANG_DIR.mkdir(parents=True, exist_ok=True)
BEREINIGT_DIR.mkdir(parents=True, exist_ok=True)

View File

@@ -0,0 +1,19 @@
"""
AI Processor - Export Module
Print version generation and worksheet export.
"""
from .print_versions import (
generate_print_version_qa,
generate_print_version_cloze,
generate_print_version_mc,
)
from .worksheet import generate_print_version_worksheet
__all__ = [
"generate_print_version_qa",
"generate_print_version_cloze",
"generate_print_version_mc",
"generate_print_version_worksheet",
]

View File

@@ -0,0 +1,508 @@
"""
AI Processor - Print Version Generators
Generate printable HTML versions for Q&A, Cloze, and Multiple Choice.
"""
from pathlib import Path
import json
import logging
import random
from ..config import BEREINIGT_DIR
logger = logging.getLogger(__name__)
def generate_print_version_qa(qa_path: Path, include_answers: bool = False) -> Path:
"""
Generate a printable HTML version of the Q&A pairs.
Args:
qa_path: Path to *_qa.json file
include_answers: True for solution sheet (for parents)
Returns:
Path to generated HTML file
"""
if not qa_path.exists():
raise FileNotFoundError(f"Q&A-Datei nicht gefunden: {qa_path}")
qa_data = json.loads(qa_path.read_text(encoding="utf-8"))
items = qa_data.get("qa_items", [])
metadata = qa_data.get("metadata", {})
title = metadata.get("source_title", "Arbeitsblatt")
subject = metadata.get("subject", "")
grade = metadata.get("grade_level", "")
html_parts = []
html_parts.append(_get_qa_html_header(title))
# Header
version_text = "Loesungsblatt" if include_answers else "Fragenblatt"
html_parts.append(f"<h1>{title} - {version_text}</h1>")
meta_parts = []
if subject:
meta_parts.append(f"Fach: {subject}")
if grade:
meta_parts.append(f"Klasse: {grade}")
meta_parts.append(f"Anzahl Fragen: {len(items)}")
html_parts.append(f"<div class='meta'>{' | '.join(meta_parts)}</div>")
# Questions
for idx, item in enumerate(items, 1):
html_parts.append("<div class='question-block'>")
html_parts.append(f"<div class='question-number'>Frage {idx}</div>")
html_parts.append(f"<div class='question-text'>{item.get('question', '')}</div>")
if include_answers:
html_parts.append(f"<div class='answer'><strong>Antwort:</strong> {item.get('answer', '')}</div>")
key_terms = item.get("key_terms", [])
if key_terms:
terms_html = " ".join([f"<span>{term}</span>" for term in key_terms])
html_parts.append(f"<div class='key-terms'>Wichtige Begriffe: {terms_html}</div>")
else:
html_parts.append("<div class='answer-lines'>")
for _ in range(3):
html_parts.append("<div class='answer-line'></div>")
html_parts.append("</div>")
html_parts.append("</div>")
html_parts.append("</body></html>")
# Save
suffix = "_qa_solutions.html" if include_answers else "_qa_print.html"
out_name = qa_path.stem.replace("_qa", "") + suffix
out_path = BEREINIGT_DIR / out_name
out_path.write_text("\n".join(html_parts), encoding="utf-8")
logger.info(f"Print-Version gespeichert: {out_path.name}")
return out_path
def generate_print_version_cloze(cloze_path: Path, include_answers: bool = False) -> Path:
"""
Generate a printable HTML version of the cloze texts.
Args:
cloze_path: Path to *_cloze.json file
include_answers: True for solution sheet (for parents)
Returns:
Path to generated HTML file
"""
if not cloze_path.exists():
raise FileNotFoundError(f"Cloze-Datei nicht gefunden: {cloze_path}")
cloze_data = json.loads(cloze_path.read_text(encoding="utf-8"))
items = cloze_data.get("cloze_items", [])
metadata = cloze_data.get("metadata", {})
title = metadata.get("source_title", "Arbeitsblatt")
subject = metadata.get("subject", "")
grade = metadata.get("grade_level", "")
total_gaps = metadata.get("total_gaps", 0)
html_parts = []
html_parts.append(_get_cloze_html_header(title))
# Header
version_text = "Loesungsblatt" if include_answers else "Lueckentext"
html_parts.append(f"<h1>{title} - {version_text}</h1>")
meta_parts = []
if subject:
meta_parts.append(f"Fach: {subject}")
if grade:
meta_parts.append(f"Klasse: {grade}")
meta_parts.append(f"Luecken gesamt: {total_gaps}")
html_parts.append(f"<div class='meta'>{' | '.join(meta_parts)}</div>")
# Collect all gap words for word bank
all_words = []
# Cloze texts
for idx, item in enumerate(items, 1):
html_parts.append("<div class='cloze-item'>")
html_parts.append(f"<div class='cloze-number'>{idx}.</div>")
gaps = item.get("gaps", [])
sentence = item.get("sentence_with_gaps", "")
if include_answers:
# Solution sheet: fill gaps with answers
for gap in gaps:
word = gap.get("word", "")
sentence = sentence.replace("___", f"<span class='gap-filled'>{word}</span>", 1)
else:
# Question sheet: gaps as lines
sentence = sentence.replace("___", "<span class='gap'>&nbsp;</span>")
for gap in gaps:
all_words.append(gap.get("word", ""))
html_parts.append(f"<div class='cloze-sentence'>{sentence}</div>")
# Show translation
translation = item.get("translation", {})
if translation:
lang_name = translation.get("language_name", "Uebersetzung")
full_sentence = translation.get("full_sentence", "")
if full_sentence:
html_parts.append("<div class='translation'>")
html_parts.append(f"<div class='translation-label'>{lang_name}:</div>")
html_parts.append(full_sentence)
html_parts.append("</div>")
html_parts.append("</div>")
# Word bank (only for question sheet)
if not include_answers and all_words:
random.shuffle(all_words)
html_parts.append("<div class='word-bank'>")
html_parts.append("<div class='word-bank-title'>Wortbank (diese Woerter fehlen):</div>")
for word in all_words:
html_parts.append(f"<span class='word'>{word}</span>")
html_parts.append("</div>")
html_parts.append("</body></html>")
# Save
suffix = "_cloze_solutions.html" if include_answers else "_cloze_print.html"
out_name = cloze_path.stem.replace("_cloze", "") + suffix
out_path = BEREINIGT_DIR / out_name
out_path.write_text("\n".join(html_parts), encoding="utf-8")
logger.info(f"Cloze Print-Version gespeichert: {out_path.name}")
return out_path
def generate_print_version_mc(mc_path: Path, include_answers: bool = False) -> str:
"""
Generate a printable HTML version of the multiple choice questions.
Args:
mc_path: Path to *_mc.json file
include_answers: True for solution sheet with marked correct answers
Returns:
HTML string (for direct delivery)
"""
if not mc_path.exists():
raise FileNotFoundError(f"MC-Datei nicht gefunden: {mc_path}")
mc_data = json.loads(mc_path.read_text(encoding="utf-8"))
questions = mc_data.get("questions", [])
metadata = mc_data.get("metadata", {})
title = metadata.get("source_title", "Arbeitsblatt")
subject = metadata.get("subject", "")
grade = metadata.get("grade_level", "")
html_parts = []
html_parts.append(_get_mc_html_header(title))
# Header
version_text = "Loesungsblatt" if include_answers else "Multiple Choice Test"
html_parts.append(f"<h1>{title}</h1>")
html_parts.append(f"<div class='meta'><strong>{version_text}</strong>")
if subject:
html_parts.append(f" | Fach: {subject}")
if grade:
html_parts.append(f" | Klasse: {grade}")
html_parts.append(f" | Anzahl Fragen: {len(questions)}</div>")
if not include_answers:
html_parts.append("<div class='instructions'>")
html_parts.append("<strong>Anleitung:</strong> Kreuze bei jeder Frage die richtige Antwort an. ")
html_parts.append("Es ist immer nur eine Antwort richtig.")
html_parts.append("</div>")
# Questions
for idx, q in enumerate(questions, 1):
html_parts.append("<div class='question-block'>")
html_parts.append(f"<div class='question-number'>Frage {idx}</div>")
html_parts.append(f"<div class='question-text'>{q.get('question', '')}</div>")
html_parts.append("<div class='options'>")
correct_answer = q.get("correct_answer", "")
for opt in q.get("options", []):
opt_id = opt.get("id", "")
is_correct = opt_id == correct_answer
opt_class = "option"
checkbox_class = "option-checkbox"
if include_answers and is_correct:
opt_class += " option-correct"
checkbox_class += " checked"
html_parts.append(f"<div class='{opt_class}'>")
html_parts.append(f"<div class='{checkbox_class}'></div>")
html_parts.append(f"<span class='option-label'>{opt_id})</span>")
html_parts.append(f"<span class='option-text'>{opt.get('text', '')}</span>")
html_parts.append("</div>")
html_parts.append("</div>")
# Explanation only for solution sheet
if include_answers and q.get("explanation"):
html_parts.append(f"<div class='explanation'><strong>Erklaerung:</strong> {q.get('explanation')}</div>")
html_parts.append("</div>")
# Answer key (compact) - only for solution sheet
if include_answers:
html_parts.append("<div class='answer-key'>")
html_parts.append("<div class='answer-key-title'>Loesungsschluessel</div>")
html_parts.append("<div class='answer-key-grid'>")
for idx, q in enumerate(questions, 1):
html_parts.append("<div class='answer-key-item'>")
html_parts.append(f"<span class='answer-key-q'>{idx}.</span> ")
html_parts.append(f"<span class='answer-key-a'>{q.get('correct_answer', '')}</span>")
html_parts.append("</div>")
html_parts.append("</div>")
html_parts.append("</div>")
html_parts.append("</body></html>")
return "\n".join(html_parts)
def _get_qa_html_header(title: str) -> str:
"""Get HTML header for Q&A print version."""
return f"""<!DOCTYPE html>
<html lang="de">
<head>
<meta charset="UTF-8">
<title>{title} - Fragen</title>
<style>
@media print {{
.no-print {{ display: none; }}
.page-break {{ page-break-before: always; }}
}}
body {{
font-family: Arial, sans-serif;
max-width: 800px;
margin: 40px auto;
padding: 20px;
line-height: 1.6;
}}
h1 {{ font-size: 24px; margin-bottom: 8px; }}
.meta {{ color: #666; margin-bottom: 24px; }}
.question-block {{
margin-bottom: 32px;
padding-bottom: 16px;
border-bottom: 1px dashed #ccc;
}}
.question-number {{ font-weight: bold; color: #333; }}
.question-text {{ font-size: 16px; margin: 8px 0; }}
.answer-lines {{ margin-top: 12px; }}
.answer-line {{ border-bottom: 1px solid #999; height: 28px; }}
.answer {{
margin-top: 8px;
padding: 8px;
background: #e8f5e9;
border-left: 3px solid #4caf50;
}}
.key-terms {{ font-size: 12px; color: #666; margin-top: 8px; }}
.key-terms span {{
background: #fff3e0;
padding: 2px 6px;
border-radius: 3px;
margin-right: 4px;
}}
</style>
</head>
<body>
"""
def _get_cloze_html_header(title: str) -> str:
"""Get HTML header for cloze print version."""
return f"""<!DOCTYPE html>
<html lang="de">
<head>
<meta charset="UTF-8">
<title>{title} - Lueckentext</title>
<style>
@media print {{
.no-print {{ display: none; }}
.page-break {{ page-break-before: always; }}
}}
body {{
font-family: Arial, sans-serif;
max-width: 800px;
margin: 40px auto;
padding: 20px;
line-height: 1.8;
}}
h1 {{ font-size: 24px; margin-bottom: 8px; }}
.meta {{ color: #666; margin-bottom: 24px; }}
.cloze-item {{
margin-bottom: 24px;
padding: 16px;
background: #f9f9f9;
border-radius: 8px;
}}
.cloze-number {{ font-weight: bold; color: #333; margin-bottom: 8px; }}
.cloze-sentence {{ font-size: 16px; line-height: 2; }}
.gap {{
display: inline-block;
min-width: 80px;
border-bottom: 2px solid #333;
margin: 0 4px;
text-align: center;
}}
.gap-filled {{
display: inline-block;
padding: 2px 8px;
background: #e8f5e9;
border: 1px solid #4caf50;
border-radius: 4px;
font-weight: bold;
}}
.translation {{
margin-top: 12px;
padding: 8px;
background: #e3f2fd;
border-left: 3px solid #2196f3;
font-size: 14px;
color: #555;
}}
.translation-label {{ font-size: 12px; color: #777; margin-bottom: 4px; }}
.word-bank {{
margin-top: 32px;
padding: 16px;
background: #fff3e0;
border-radius: 8px;
}}
.word-bank-title {{ font-weight: bold; margin-bottom: 12px; }}
.word {{
display: inline-block;
padding: 4px 12px;
margin: 4px;
background: white;
border: 1px solid #ddd;
border-radius: 4px;
}}
</style>
</head>
<body>
"""
def _get_mc_html_header(title: str) -> str:
"""Get HTML header for MC print version."""
return f"""<!DOCTYPE html>
<html lang="de">
<head>
<meta charset="UTF-8">
<title>{title} - Multiple Choice</title>
<style>
@media print {{
.no-print {{ display: none; }}
.page-break {{ page-break-before: always; }}
body {{ font-size: 14pt; }}
}}
body {{
font-family: Arial, Helvetica, sans-serif;
max-width: 800px;
margin: 40px auto;
padding: 20px;
line-height: 1.6;
color: #000;
}}
h1 {{
font-size: 28px;
margin-bottom: 8px;
border-bottom: 2px solid #000;
padding-bottom: 8px;
}}
.meta {{ color: #333; margin-bottom: 32px; font-size: 14px; }}
.instructions {{
background: #f5f5f5;
padding: 12px 16px;
border-radius: 4px;
margin-bottom: 24px;
font-size: 14px;
}}
.question-block {{
margin-bottom: 28px;
padding-bottom: 16px;
border-bottom: 1px solid #ddd;
}}
.question-number {{ font-weight: bold; font-size: 18px; color: #000; margin-bottom: 8px; }}
.question-text {{ font-size: 16px; margin: 8px 0 16px 0; line-height: 1.5; }}
.options {{ margin-left: 20px; }}
.option {{
display: flex;
align-items: flex-start;
margin-bottom: 12px;
padding: 8px 12px;
border: 1px solid #ccc;
border-radius: 4px;
background: #fff;
}}
.option-correct {{
background: #e8f5e9;
border-color: #4caf50;
border-width: 2px;
}}
.option-checkbox {{
width: 20px;
height: 20px;
border: 2px solid #333;
border-radius: 50%;
margin-right: 12px;
flex-shrink: 0;
display: flex;
align-items: center;
justify-content: center;
}}
.option-checkbox.checked::after {{
content: "";
font-weight: bold;
color: #4caf50;
}}
.option-label {{ font-weight: bold; margin-right: 8px; min-width: 24px; }}
.option-text {{ flex: 1; }}
.explanation {{
margin-top: 8px;
padding: 8px 12px;
background: #e3f2fd;
border-left: 3px solid #2196f3;
font-size: 13px;
color: #333;
}}
.answer-key {{
margin-top: 40px;
padding: 16px;
background: #f5f5f5;
border-radius: 8px;
}}
.answer-key-title {{
font-weight: bold;
font-size: 18px;
margin-bottom: 12px;
border-bottom: 1px solid #999;
padding-bottom: 8px;
}}
.answer-key-grid {{
display: grid;
grid-template-columns: repeat(5, 1fr);
gap: 8px;
}}
.answer-key-item {{
padding: 8px;
text-align: center;
background: white;
border: 1px solid #ddd;
border-radius: 4px;
}}
.answer-key-q {{ font-weight: bold; }}
.answer-key-a {{ color: #4caf50; font-weight: bold; }}
</style>
</head>
<body>
"""

View File

@@ -0,0 +1,286 @@
"""
AI Processor - Worksheet Export
Generate printable worksheet versions.
"""
from pathlib import Path
import json
import logging
logger = logging.getLogger(__name__)
def generate_print_version_worksheet(analysis_path: Path) -> str:
"""
Generate a print-optimized HTML version of the worksheet.
Features:
- Large, readable font (16pt)
- Black and white / grayscale compatible
- Clear structure for printing
- No interactive elements
Args:
analysis_path: Path to *_analyse.json file
Returns:
HTML string for direct delivery
"""
if not analysis_path.exists():
raise FileNotFoundError(f"Analysedatei nicht gefunden: {analysis_path}")
try:
data = json.loads(analysis_path.read_text(encoding="utf-8"))
except json.JSONDecodeError as e:
raise RuntimeError(f"Analyse-Datei enthaelt kein gueltiges JSON: {analysis_path}\n{e}") from e
title = data.get("title") or "Arbeitsblatt"
subject = data.get("subject") or ""
grade_level = data.get("grade_level") or ""
instructions = data.get("instructions") or ""
tasks = data.get("tasks", []) or []
canonical_text = data.get("canonical_text") or ""
printed_blocks = data.get("printed_blocks") or []
html_parts = []
html_parts.append(_get_worksheet_html_header(title))
# Print button
html_parts.append('<button class="print-button no-print" onclick="window.print()">🖨️ Drucken</button>')
# Title
html_parts.append(f"<h1>{title}</h1>")
# Meta information
meta_parts = []
if subject:
meta_parts.append(f"<span><strong>Fach:</strong> {subject}</span>")
if grade_level:
meta_parts.append(f"<span><strong>Klasse:</strong> {grade_level}</span>")
if meta_parts:
html_parts.append(f"<div class='meta'>{''.join(meta_parts)}</div>")
# Instructions
if instructions:
html_parts.append("<div class='instructions'>")
html_parts.append("<div class='instructions-label'>Arbeitsanweisung:</div>")
html_parts.append(f"<div>{instructions}</div>")
html_parts.append("</div>")
# Main text / printed blocks
has_text_content = False
if printed_blocks:
html_parts.append("<section class='text-section'>")
for block in printed_blocks:
role = (block.get("role") or "body").lower()
text = (block.get("text") or "").strip()
if not text:
continue
has_text_content = True
if role == "title":
html_parts.append(f"<div class='text-block'><div class='text-block-title'>{text}</div></div>")
else:
html_parts.append(f"<div class='text-block'>{text}</div>")
html_parts.append("</section>")
elif canonical_text:
html_parts.append("<section class='text-section'>")
paragraphs = [
p.strip()
for p in canonical_text.replace("\r\n", "\n").split("\n\n")
if p.strip()
]
for p in paragraphs:
has_text_content = True
html_parts.append(f"<div class='text-block'>{p}</div>")
html_parts.append("</section>")
# Tasks
if tasks:
html_parts.append("<section class='task-section'>")
html_parts.append("<h2>Aufgaben</h2>")
for idx, task in enumerate(tasks, start=1):
t_type = task.get("type") or "Aufgabe"
desc = task.get("description") or ""
text_with_gaps = task.get("text_with_gaps")
html_parts.append("<div class='task'>")
# Task header
type_label = {
"fill_in_blank": "Lueckentext",
"multiple_choice": "Multiple Choice",
"free_text": "Freitext",
"matching": "Zuordnung",
"labeling": "Beschriftung",
"calculation": "Rechnung",
"other": "Aufgabe"
}.get(t_type, t_type)
html_parts.append(f"<div class='task-header'>Aufgabe {idx}: {type_label}</div>")
if desc:
html_parts.append(f"<div class='task-content'>{desc}</div>")
if text_with_gaps:
rendered = text_with_gaps.replace("___", "<span class='gap-line'>&nbsp;</span>")
html_parts.append(f"<div class='task-content' style='margin-top:12px;'>{rendered}</div>")
# Answer lines for free text tasks
if t_type in ["free_text", "other"] or (not text_with_gaps and not desc):
html_parts.append("<div class='answer-lines'>")
for _ in range(3):
html_parts.append("<div class='answer-line'></div>")
html_parts.append("</div>")
html_parts.append("</div>")
html_parts.append("</section>")
# Footer
html_parts.append("<div class='footer'>")
html_parts.append("Dieses Arbeitsblatt wurde automatisch aus einem Scan rekonstruiert.")
html_parts.append("</div>")
html_parts.append("</body></html>")
return "\n".join(html_parts)
def _get_worksheet_html_header(title: str) -> str:
"""Get HTML header for worksheet print version."""
return f"""<!DOCTYPE html>
<html lang="de">
<head>
<meta charset="UTF-8">
<title>{title}</title>
<style>
@page {{
size: A4;
margin: 20mm;
}}
@media print {{
body {{
font-size: 14pt !important;
-webkit-print-color-adjust: exact;
print-color-adjust: exact;
}}
.no-print {{ display: none !important; }}
.page-break {{ page-break-before: always; }}
}}
* {{ box-sizing: border-box; }}
body {{
font-family: Arial, "Helvetica Neue", sans-serif;
max-width: 800px;
margin: 0 auto;
padding: 30px;
line-height: 1.7;
font-size: 16px;
color: #000;
background: #fff;
}}
h1 {{
font-size: 28px;
margin: 0 0 8px 0;
padding-bottom: 8px;
border-bottom: 3px solid #000;
}}
h2 {{
font-size: 20px;
margin: 28px 0 12px 0;
padding-bottom: 4px;
border-bottom: 1px solid #666;
}}
.meta {{
font-size: 14px;
color: #333;
margin-bottom: 20px;
padding: 8px 0;
}}
.meta span {{
margin-right: 20px;
}}
.instructions {{
margin: 20px 0;
padding: 16px;
border: 2px solid #333;
background: #f5f5f5;
font-size: 15px;
}}
.instructions-label {{
font-weight: bold;
margin-bottom: 8px;
}}
.text-section {{
margin: 24px 0;
}}
.text-block {{
margin-bottom: 16px;
text-align: justify;
}}
.text-block-title {{
font-weight: bold;
font-size: 17px;
margin-bottom: 8px;
}}
.task-section {{
margin-top: 32px;
}}
.task {{
margin-bottom: 24px;
padding: 16px;
border: 1px solid #999;
background: #fafafa;
}}
.task-header {{
font-weight: bold;
font-size: 16px;
margin-bottom: 12px;
padding-bottom: 8px;
border-bottom: 1px dashed #666;
}}
.task-content {{
font-size: 15px;
}}
.gap-line {{
display: inline-block;
border-bottom: 2px solid #000;
min-width: 100px;
margin: 0 6px;
}}
.answer-lines {{
margin-top: 16px;
}}
.answer-line {{
border-bottom: 1px solid #333;
height: 36px;
margin-bottom: 4px;
}}
.footer {{
margin-top: 40px;
padding-top: 16px;
border-top: 1px solid #ccc;
font-size: 11px;
color: #666;
text-align: center;
}}
.print-button {{
position: fixed;
top: 20px;
right: 20px;
padding: 12px 24px;
background: #333;
color: #fff;
border: none;
border-radius: 6px;
cursor: pointer;
font-size: 14px;
}}
.print-button:hover {{
background: #555;
}}
</style>
</head>
<body>
"""

View File

@@ -0,0 +1,21 @@
"""
AI Processor - Generators Module
Content generation for multiple choice, cloze, and Q&A.
"""
from .multiple_choice import generate_mc_from_analysis
from .cloze import generate_cloze_from_analysis
from .qa import (
generate_qa_from_analysis,
update_leitner_progress,
get_next_review_items,
)
__all__ = [
"generate_mc_from_analysis",
"generate_cloze_from_analysis",
"generate_qa_from_analysis",
"update_leitner_progress",
"get_next_review_items",
]

View File

@@ -0,0 +1,312 @@
"""
AI Processor - Cloze Text Generator
Generate cloze (fill-in-the-blank) texts from worksheet analysis.
"""
from pathlib import Path
import json
import logging
import os
import requests
from ..config import VISION_API, BEREINIGT_DIR, get_openai_api_key
logger = logging.getLogger(__name__)
# Language codes to names
LANGUAGE_NAMES = {
"tr": "Tuerkisch",
"ar": "Arabisch",
"ru": "Russisch",
"en": "Englisch",
"fr": "Franzoesisch",
"es": "Spanisch",
"pl": "Polnisch",
"uk": "Ukrainisch",
}
def _generate_cloze_with_openai(analysis_data: dict, target_language: str = "tr") -> dict:
"""
Generate cloze texts based on worksheet analysis.
Important didactic requirements:
- Multiple meaningful gaps per sentence (not just one!)
- Difficulty level matches the original
- Translation with the same gaps
Args:
analysis_data: The analysis JSON of the worksheet
target_language: Target language for translation (default: "tr" for Turkish)
Returns:
Dict with cloze_items and metadata
"""
api_key = get_openai_api_key()
title = analysis_data.get("title") or "Arbeitsblatt"
subject = analysis_data.get("subject") or "Allgemein"
grade_level = analysis_data.get("grade_level") or "unbekannt"
canonical_text = analysis_data.get("canonical_text") or ""
printed_blocks = analysis_data.get("printed_blocks") or []
content_parts = []
if canonical_text:
content_parts.append(canonical_text)
for block in printed_blocks:
text = block.get("text", "").strip()
if text and text not in content_parts:
content_parts.append(text)
worksheet_content = "\n\n".join(content_parts)
if not worksheet_content.strip():
logger.warning("Kein Textinhalt fuer Lueckentext-Generierung gefunden")
return {"cloze_items": [], "metadata": {"error": "Kein Textinhalt gefunden"}}
target_lang_name = LANGUAGE_NAMES.get(target_language, "Tuerkisch")
url = "https://api.openai.com/v1/chat/completions"
headers = {"Authorization": f"Bearer {api_key}", "Content-Type": "application/json"}
system_prompt = f"""Du bist ein erfahrener Paedagoge, der Lueckentexte fuer Schueler erstellt.
WICHTIGE REGELN FUER LUECKENTEXTE:
1. MEHRERE LUECKEN PRO SATZ:
- Erstelle IMMER mehrere sinnvolle Luecken pro Satz
- Beispiel: "Ich habe gestern meine Hausaufgaben gemacht."
→ Luecken: "habe" UND "gemacht" (nicht nur eine!)
2. SCHWIERIGKEITSGRAD:
- Niveau muss exakt "{grade_level}" entsprechen
3. SINNVOLLE LUECKENWOERTER:
- Verben (konjugiert)
- Wichtige Nomen
- Adjektive
- KEINE Artikel oder Praepositionen allein
4. UEBERSETZUNG:
- Uebersetze den VOLLSTAENDIGEN Satz auf {target_lang_name}
- Die GLEICHEN Woerter muessen als Luecken markiert sein
5. AUSGABE: Nur gueltiges JSON, kein Markdown."""
user_prompt = f"""Erstelle Lueckentexte aus diesem Arbeitsblatt:
TITEL: {title}
FACH: {subject}
KLASSENSTUFE: {grade_level}
TEXT:
{worksheet_content}
Erstelle 5-8 Saetze mit Luecken. Gib das Ergebnis als JSON zurueck:
{{
"cloze_items": [
{{
"id": "c1",
"original_sentence": "Der vollstaendige Originalsatz ohne Luecken",
"sentence_with_gaps": "Der Satz mit ___ fuer jede Luecke",
"gaps": [
{{
"id": "g1",
"word": "das fehlende Wort",
"position": 0,
"hint": "optionaler Hinweis"
}}
],
"translation": {{
"language": "{target_language}",
"language_name": "{target_lang_name}",
"full_sentence": "Vollstaendige Uebersetzung",
"sentence_with_gaps": "Uebersetzung mit ___ an gleichen Stellen"
}}
}}
],
"metadata": {{
"subject": "{subject}",
"grade_level": "{grade_level}",
"source_title": "{title}",
"target_language": "{target_language}",
"total_gaps": 0
}}
}}
WICHTIG:
- Jeder Satz MUSS mindestens 2 Luecken haben!
- Position ist der Index des Wortes im Satz (0-basiert)"""
payload = {
"model": "gpt-4o-mini",
"response_format": {"type": "json_object"},
"messages": [
{"role": "system", "content": system_prompt},
{"role": "user", "content": user_prompt},
],
"max_tokens": 3000,
"temperature": 0.7,
}
response = requests.post(url, headers=headers, json=payload)
response.raise_for_status()
data = response.json()
try:
content = data["choices"][0]["message"]["content"]
cloze_data = json.loads(content)
except (KeyError, json.JSONDecodeError) as e:
raise RuntimeError(f"Fehler bei Lueckentext-Generierung: {e}")
# Calculate total number of gaps
total_gaps = sum(len(item.get("gaps", [])) for item in cloze_data.get("cloze_items", []))
if "metadata" in cloze_data:
cloze_data["metadata"]["total_gaps"] = total_gaps
return cloze_data
def _generate_cloze_with_claude(analysis_data: dict, target_language: str = "tr") -> dict:
"""Generate cloze texts with Claude API."""
import anthropic
api_key = os.getenv("ANTHROPIC_API_KEY")
if not api_key:
raise RuntimeError("ANTHROPIC_API_KEY ist nicht gesetzt.")
client = anthropic.Anthropic(api_key=api_key)
title = analysis_data.get("title") or "Arbeitsblatt"
subject = analysis_data.get("subject") or "Allgemein"
grade_level = analysis_data.get("grade_level") or "unbekannt"
canonical_text = analysis_data.get("canonical_text") or ""
printed_blocks = analysis_data.get("printed_blocks") or []
content_parts = []
if canonical_text:
content_parts.append(canonical_text)
for block in printed_blocks:
text = block.get("text", "").strip()
if text and text not in content_parts:
content_parts.append(text)
worksheet_content = "\n\n".join(content_parts)
if not worksheet_content.strip():
return {"cloze_items": [], "metadata": {"error": "Kein Textinhalt gefunden"}}
target_lang_name = LANGUAGE_NAMES.get(target_language, "Tuerkisch")
prompt = f"""Erstelle Lueckentexte aus diesem Arbeitsblatt.
WICHTIGE REGELN:
1. MEHRERE LUECKEN PRO SATZ (mindestens 2!)
Beispiel: "Ich habe gestern Hausaufgaben gemacht" → Luecken: "habe" UND "gemacht"
2. Schwierigkeitsgrad: exakt "{grade_level}"
3. Uebersetzung auf {target_lang_name} mit gleichen Luecken
TITEL: {title}
FACH: {subject}
KLASSENSTUFE: {grade_level}
TEXT:
{worksheet_content}
Antworte NUR mit diesem JSON (5-8 Saetze):
{{
"cloze_items": [
{{
"id": "c1",
"original_sentence": "Vollstaendiger Satz",
"sentence_with_gaps": "Satz mit ___ fuer Luecken",
"gaps": [
{{"id": "g1", "word": "Lueckenwort", "position": 0, "hint": "Hinweis"}}
],
"translation": {{
"language": "{target_language}",
"language_name": "{target_lang_name}",
"full_sentence": "Uebersetzung",
"sentence_with_gaps": "Uebersetzung mit ___"
}}
}}
],
"metadata": {{
"subject": "{subject}",
"grade_level": "{grade_level}",
"source_title": "{title}",
"target_language": "{target_language}",
"total_gaps": 0
}}
}}"""
message = client.messages.create(
model="claude-3-5-sonnet-20241022",
max_tokens=3000,
messages=[{"role": "user", "content": prompt}]
)
content = message.content[0].text
try:
if "```json" in content:
content = content.split("```json")[1].split("```")[0]
elif "```" in content:
content = content.split("```")[1].split("```")[0]
cloze_data = json.loads(content.strip())
except json.JSONDecodeError as e:
raise RuntimeError(f"Claude hat ungueltiges JSON geliefert: {e}")
# Calculate total number of gaps
total_gaps = sum(len(item.get("gaps", [])) for item in cloze_data.get("cloze_items", []))
if "metadata" in cloze_data:
cloze_data["metadata"]["total_gaps"] = total_gaps
return cloze_data
def generate_cloze_from_analysis(analysis_path: Path, target_language: str = "tr") -> Path:
"""
Generate cloze texts from an analysis JSON file.
The cloze texts will:
- Have multiple meaningful gaps per sentence
- Match the difficulty level of the original
- Include translation to target language
Args:
analysis_path: Path to *_analyse.json file
target_language: Language code for translation (default: "tr" for Turkish)
Returns:
Path to generated *_cloze.json file
"""
if not analysis_path.exists():
raise FileNotFoundError(f"Analysedatei nicht gefunden: {analysis_path}")
try:
analysis_data = json.loads(analysis_path.read_text(encoding="utf-8"))
except json.JSONDecodeError as e:
raise RuntimeError(f"Ungueltige Analyse-JSON: {e}")
logger.info(f"Generiere Lueckentexte fuer: {analysis_path.name}")
# Generate cloze texts (use configured API)
if VISION_API == "claude":
try:
cloze_data = _generate_cloze_with_claude(analysis_data, target_language)
except Exception as e:
logger.warning(f"Claude Lueckentext-Generierung fehlgeschlagen, nutze OpenAI: {e}")
cloze_data = _generate_cloze_with_openai(analysis_data, target_language)
else:
cloze_data = _generate_cloze_with_openai(analysis_data, target_language)
# Save cloze data
out_name = analysis_path.stem.replace("_analyse", "") + "_cloze.json"
out_path = BEREINIGT_DIR / out_name
out_path.write_text(json.dumps(cloze_data, ensure_ascii=False, indent=2), encoding="utf-8")
logger.info(f"Lueckentexte gespeichert: {out_path.name}")
return out_path

View File

@@ -0,0 +1,291 @@
"""
AI Processor - Multiple Choice Generator
Generate multiple choice questions from worksheet analysis.
"""
from pathlib import Path
import json
import logging
import random
import os
import requests
from ..config import VISION_API, BEREINIGT_DIR, get_openai_api_key
logger = logging.getLogger(__name__)
def _generate_mc_with_openai(analysis_data: dict, num_questions: int = 5) -> dict:
"""
Generate multiple choice questions based on worksheet analysis.
Uses OpenAI GPT-4o-mini for generation.
Difficulty level matches the original (grade_level from analysis).
"""
api_key = get_openai_api_key()
title = analysis_data.get("title") or "Arbeitsblatt"
subject = analysis_data.get("subject") or "Allgemein"
grade_level = analysis_data.get("grade_level") or "unbekannt"
canonical_text = analysis_data.get("canonical_text") or ""
printed_blocks = analysis_data.get("printed_blocks") or []
content_parts = []
if canonical_text:
content_parts.append(canonical_text)
for block in printed_blocks:
text = block.get("text", "").strip()
if text and text not in content_parts:
content_parts.append(text)
worksheet_content = "\n\n".join(content_parts)
if not worksheet_content.strip():
logger.warning("Kein Textinhalt fuer MC-Generierung gefunden")
return {"questions": [], "metadata": {"error": "Kein Textinhalt gefunden"}}
url = "https://api.openai.com/v1/chat/completions"
headers = {"Authorization": f"Bearer {api_key}", "Content-Type": "application/json"}
system_prompt = f"""Du bist ein erfahrener Paedagoge, der Multiple-Choice-Fragen fuer Schueler erstellt.
WICHTIGE REGELN:
1. SCHWIERIGKEITSGRAD: Die Fragen muessen exakt dem Niveau "{grade_level}" entsprechen.
2. INHALTSTREUE: Alle Fragen muessen sich direkt auf den gegebenen Text beziehen.
3. QUALITAET DER DISTRAKTOREN: Muessen plausibel klingen, nicht offensichtlich falsch.
4. AUSGABEFORMAT: Gib deine Antwort AUSSCHLIESSLICH als gueltiges JSON zurueck."""
user_prompt = f"""Erstelle {num_questions} Multiple-Choice-Fragen basierend auf diesem Arbeitsblatt:
TITEL: {title}
FACH: {subject}
KLASSENSTUFE: {grade_level}
INHALT DES ARBEITSBLATTS:
{worksheet_content}
Gib die Fragen als JSON zurueck:
{{
"questions": [
{{
"id": "q1",
"question": "Die Fragestellung hier",
"options": [
{{"id": "a", "text": "Antwort A"}},
{{"id": "b", "text": "Antwort B"}},
{{"id": "c", "text": "Antwort C"}},
{{"id": "d", "text": "Antwort D"}}
],
"correct_answer": "a",
"explanation": "Kurze Erklaerung warum diese Antwort richtig ist"
}}
],
"metadata": {{
"subject": "{subject}",
"grade_level": "{grade_level}",
"source_title": "{title}",
"num_questions": {num_questions}
}}
}}"""
payload = {
"model": "gpt-4o-mini",
"response_format": {"type": "json_object"},
"messages": [
{"role": "system", "content": system_prompt},
{"role": "user", "content": user_prompt},
],
"max_tokens": 2000,
"temperature": 0.7,
}
response = requests.post(url, headers=headers, json=payload)
response.raise_for_status()
data = response.json()
try:
content = data["choices"][0]["message"]["content"]
mc_data = json.loads(content)
except (KeyError, json.JSONDecodeError) as e:
raise RuntimeError(f"Fehler bei MC-Generierung: {e}")
return mc_data
def _generate_mc_with_claude(analysis_data: dict, num_questions: int = 5) -> dict:
"""Generate multiple choice questions with Claude API."""
import anthropic
api_key = os.getenv("ANTHROPIC_API_KEY")
if not api_key:
raise RuntimeError("ANTHROPIC_API_KEY ist nicht gesetzt.")
client = anthropic.Anthropic(api_key=api_key)
title = analysis_data.get("title") or "Arbeitsblatt"
subject = analysis_data.get("subject") or "Allgemein"
grade_level = analysis_data.get("grade_level") or "unbekannt"
canonical_text = analysis_data.get("canonical_text") or ""
printed_blocks = analysis_data.get("printed_blocks") or []
content_parts = []
if canonical_text:
content_parts.append(canonical_text)
for block in printed_blocks:
text = block.get("text", "").strip()
if text and text not in content_parts:
content_parts.append(text)
worksheet_content = "\n\n".join(content_parts)
if not worksheet_content.strip():
return {"questions": [], "metadata": {"error": "Kein Textinhalt gefunden"}}
prompt = f"""Erstelle {num_questions} Multiple-Choice-Fragen basierend auf diesem Arbeitsblatt.
WICHTIGE REGELN:
1. SCHWIERIGKEITSGRAD: Exakt Niveau "{grade_level}" - nicht leichter, nicht schwerer
2. INHALTSTREUE: Nur Fragen zum gegebenen Text
3. QUALITAET: Plausible Distraktoren (falsche Antworten)
TITEL: {title}
FACH: {subject}
KLASSENSTUFE: {grade_level}
INHALT:
{worksheet_content}
Antworte NUR mit diesem JSON-Format:
{{
"questions": [
{{
"id": "q1",
"question": "Fragestellung",
"options": [
{{"id": "a", "text": "Antwort A"}},
{{"id": "b", "text": "Antwort B"}},
{{"id": "c", "text": "Antwort C"}},
{{"id": "d", "text": "Antwort D"}}
],
"correct_answer": "a",
"explanation": "Erklaerung"
}}
],
"metadata": {{
"subject": "{subject}",
"grade_level": "{grade_level}",
"source_title": "{title}",
"num_questions": {num_questions}
}}
}}"""
message = client.messages.create(
model="claude-3-5-sonnet-20241022",
max_tokens=2000,
messages=[{"role": "user", "content": prompt}]
)
content = message.content[0].text
try:
if "```json" in content:
content = content.split("```json")[1].split("```")[0]
elif "```" in content:
content = content.split("```")[1].split("```")[0]
mc_data = json.loads(content.strip())
except json.JSONDecodeError as e:
raise RuntimeError(f"Claude hat ungueltiges JSON geliefert: {e}")
return mc_data
def _shuffle_mc_options(mc_data: dict) -> dict:
"""
Shuffle the answer options for each question randomly.
Also updates correct_answer accordingly.
"""
if "questions" not in mc_data:
return mc_data
for question in mc_data["questions"]:
options = question.get("options", [])
correct_id = question.get("correct_answer")
if not options or not correct_id:
continue
# Find the text of the correct answer
correct_text = None
for opt in options:
if opt.get("id") == correct_id:
correct_text = opt.get("text")
break
# Shuffle the options
random.shuffle(options)
# Assign new IDs and find new position of correct answer
new_ids = ["a", "b", "c", "d"]
new_correct = None
for i, opt in enumerate(options):
if i < len(new_ids):
if opt.get("text") == correct_text:
new_correct = new_ids[i]
opt["id"] = new_ids[i]
if new_correct:
question["correct_answer"] = new_correct
question["options"] = options
return mc_data
def generate_mc_from_analysis(analysis_path: Path, num_questions: int = 5) -> Path:
"""
Generate multiple choice questions from an analysis JSON file.
The questions will:
- Be based on the extracted text
- Match the difficulty level of the original
- Have randomly arranged answers
Args:
analysis_path: Path to *_analyse.json file
num_questions: Number of questions to generate (default: 5)
Returns:
Path to generated *_mc.json file
"""
if not analysis_path.exists():
raise FileNotFoundError(f"Analysedatei nicht gefunden: {analysis_path}")
try:
analysis_data = json.loads(analysis_path.read_text(encoding="utf-8"))
except json.JSONDecodeError as e:
raise RuntimeError(f"Ungueltige Analyse-JSON: {e}")
logger.info(f"Generiere MC-Fragen fuer: {analysis_path.name}")
# Generate MC questions (use configured API)
if VISION_API == "claude":
try:
mc_data = _generate_mc_with_claude(analysis_data, num_questions)
except Exception as e:
logger.warning(f"Claude MC-Generierung fehlgeschlagen, nutze OpenAI: {e}")
mc_data = _generate_mc_with_openai(analysis_data, num_questions)
else:
mc_data = _generate_mc_with_openai(analysis_data, num_questions)
# Shuffle answer positions
mc_data = _shuffle_mc_options(mc_data)
# Save MC data
out_name = analysis_path.stem.replace("_analyse", "") + "_mc.json"
out_path = BEREINIGT_DIR / out_name
out_path.write_text(json.dumps(mc_data, ensure_ascii=False, indent=2), encoding="utf-8")
logger.info(f"MC-Fragen gespeichert: {out_path.name}")
return out_path

View File

@@ -0,0 +1,458 @@
"""
AI Processor - Q&A Generator
Generate question-answer pairs with Leitner system for spaced repetition.
"""
from pathlib import Path
from datetime import datetime, timedelta
import json
import logging
import os
import requests
from ..config import VISION_API, BEREINIGT_DIR, get_openai_api_key
logger = logging.getLogger(__name__)
def _generate_qa_with_openai(analysis_data: dict, num_questions: int = 8) -> dict:
"""
Generate question-answer pairs based on worksheet analysis.
Important didactic requirements:
- Questions based almost verbatim on the existing material
- Only minimal rephrasing allowed
- Key terms/technical terms marked as important
- Difficulty level matches the original (grade_level)
Args:
analysis_data: The analysis JSON of the worksheet
num_questions: Number of questions to generate (default: 8)
Returns:
Dict with qa_items and metadata
"""
api_key = get_openai_api_key()
title = analysis_data.get("title") or "Arbeitsblatt"
subject = analysis_data.get("subject") or "Allgemein"
grade_level = analysis_data.get("grade_level") or "unbekannt"
canonical_text = analysis_data.get("canonical_text") or ""
printed_blocks = analysis_data.get("printed_blocks") or []
tasks = analysis_data.get("tasks") or []
content_parts = []
if canonical_text:
content_parts.append(canonical_text)
for block in printed_blocks:
text = block.get("text", "").strip()
if text and text not in content_parts:
content_parts.append(text)
for task in tasks:
desc = task.get("description", "").strip()
text = task.get("text_with_gaps", "").strip()
if desc:
content_parts.append(f"Aufgabe: {desc}")
if text:
content_parts.append(text)
worksheet_content = "\n\n".join(content_parts)
if not worksheet_content.strip():
logger.warning("Kein Textinhalt fuer Q&A-Generierung gefunden")
return {"qa_items": [], "metadata": {"error": "Kein Textinhalt gefunden"}}
url = "https://api.openai.com/v1/chat/completions"
headers = {"Authorization": f"Bearer {api_key}", "Content-Type": "application/json"}
system_prompt = f"""Du bist ein erfahrener Paedagoge, der Frage-Antwort-Paare fuer Schueler erstellt.
WICHTIGE REGELN:
1. INHALTE NUR AUS DEM TEXT:
- Verwende FAST WOERTLICH den vorhandenen Stoff
- KEINE neuen Fakten oder Inhalte einfuehren!
- Alles muss aus dem gegebenen Text ableitbar sein
2. SCHWIERIGKEITSGRAD:
- Niveau muss exakt "{grade_level}" entsprechen
3. SCHLUESSELWOERTER MARKIEREN:
- Identifiziere wichtige Fachbegriffe als "key_terms"
4. FRAGETYPEN:
- Wissensfragen: "Was ist...?", "Nenne..."
- Verstaendnisfragen: "Erklaere...", "Beschreibe..."
- Anwendungsfragen: "Warum...?", "Was passiert, wenn...?"
5. ANTWORT-FORMAT:
- Kurze, praezise Antworten (1-3 Saetze)
6. AUSGABE: Nur gueltiges JSON, kein Markdown."""
user_prompt = f"""Erstelle {num_questions} Frage-Antwort-Paare aus diesem Arbeitsblatt:
TITEL: {title}
FACH: {subject}
KLASSENSTUFE: {grade_level}
TEXT:
{worksheet_content}
Gib das Ergebnis als JSON zurueck:
{{
"qa_items": [
{{
"id": "qa1",
"question": "Die Frage hier (fast woertlich aus dem Text)",
"answer": "Die korrekte Antwort (direkt aus dem Text)",
"question_type": "knowledge" | "understanding" | "application",
"key_terms": ["wichtiger Begriff 1", "wichtiger Begriff 2"],
"difficulty": 1-3,
"source_hint": "Kurzer Hinweis, wo im Text die Antwort steht",
"leitner_box": 0
}}
],
"metadata": {{
"subject": "{subject}",
"grade_level": "{grade_level}",
"source_title": "{title}",
"total_questions": {num_questions},
"key_terms_summary": ["alle", "wichtigen", "Fachbegriffe", "gesammelt"]
}}
}}
WICHTIG:
- Alle Antworten muessen aus dem Text ableitbar sein!
- "leitner_box": 0 bedeutet "neu" (noch nicht gelernt)
- "difficulty": 1=leicht, 2=mittel, 3=schwer"""
payload = {
"model": "gpt-4o-mini",
"response_format": {"type": "json_object"},
"messages": [
{"role": "system", "content": system_prompt},
{"role": "user", "content": user_prompt},
],
"max_tokens": 3000,
"temperature": 0.5,
}
response = requests.post(url, headers=headers, json=payload)
response.raise_for_status()
data = response.json()
try:
content = data["choices"][0]["message"]["content"]
qa_data = json.loads(content)
except (KeyError, json.JSONDecodeError) as e:
raise RuntimeError(f"Fehler bei Q&A-Generierung: {e}")
# Initialize Leitner-Box fields for all items
_initialize_leitner_fields(qa_data)
return qa_data
def _generate_qa_with_claude(analysis_data: dict, num_questions: int = 8) -> dict:
"""Generate question-answer pairs with Claude API."""
import anthropic
api_key = os.getenv("ANTHROPIC_API_KEY")
if not api_key:
raise RuntimeError("ANTHROPIC_API_KEY ist nicht gesetzt.")
client = anthropic.Anthropic(api_key=api_key)
title = analysis_data.get("title") or "Arbeitsblatt"
subject = analysis_data.get("subject") or "Allgemein"
grade_level = analysis_data.get("grade_level") or "unbekannt"
canonical_text = analysis_data.get("canonical_text") or ""
printed_blocks = analysis_data.get("printed_blocks") or []
tasks = analysis_data.get("tasks") or []
content_parts = []
if canonical_text:
content_parts.append(canonical_text)
for block in printed_blocks:
text = block.get("text", "").strip()
if text and text not in content_parts:
content_parts.append(text)
for task in tasks:
desc = task.get("description", "").strip()
if desc:
content_parts.append(f"Aufgabe: {desc}")
worksheet_content = "\n\n".join(content_parts)
if not worksheet_content.strip():
return {"qa_items": [], "metadata": {"error": "Kein Textinhalt gefunden"}}
prompt = f"""Erstelle {num_questions} Frage-Antwort-Paare aus diesem Arbeitsblatt.
WICHTIGE REGELN:
1. Verwende FAST WOERTLICH den vorhandenen Stoff - KEINE neuen Fakten!
2. Schwierigkeitsgrad: exakt "{grade_level}"
3. Markiere wichtige Fachbegriffe als "key_terms"
TITEL: {title}
FACH: {subject}
KLASSENSTUFE: {grade_level}
TEXT:
{worksheet_content}
Antworte NUR mit diesem JSON:
{{
"qa_items": [
{{
"id": "qa1",
"question": "Frage (fast woertlich aus Text)",
"answer": "Antwort (direkt aus Text)",
"question_type": "knowledge",
"key_terms": ["Begriff1", "Begriff2"],
"difficulty": 1,
"source_hint": "Wo im Text",
"leitner_box": 0
}}
],
"metadata": {{
"subject": "{subject}",
"grade_level": "{grade_level}",
"source_title": "{title}",
"total_questions": {num_questions},
"key_terms_summary": ["alle", "Fachbegriffe"]
}}
}}"""
message = client.messages.create(
model="claude-3-5-sonnet-20241022",
max_tokens=3000,
messages=[{"role": "user", "content": prompt}]
)
content = message.content[0].text
try:
if "```json" in content:
content = content.split("```json")[1].split("```")[0]
elif "```" in content:
content = content.split("```")[1].split("```")[0]
qa_data = json.loads(content.strip())
except json.JSONDecodeError as e:
raise RuntimeError(f"Claude hat ungueltiges JSON geliefert: {e}")
# Initialize Leitner-Box fields
_initialize_leitner_fields(qa_data)
return qa_data
def _initialize_leitner_fields(qa_data: dict) -> None:
"""Initialize Leitner-Box fields for all Q&A items."""
for item in qa_data.get("qa_items", []):
if "leitner_box" not in item:
item["leitner_box"] = 0
if "correct_count" not in item:
item["correct_count"] = 0
if "incorrect_count" not in item:
item["incorrect_count"] = 0
if "last_seen" not in item:
item["last_seen"] = None
if "next_review" not in item:
item["next_review"] = None
def generate_qa_from_analysis(analysis_path: Path, num_questions: int = 8) -> Path:
"""
Generate question-answer pairs from an analysis JSON file.
The Q&A pairs will:
- Be based almost verbatim on the original text
- Be prepared with Leitner-Box system for repetition
- Have key terms marked for reinforcement
Args:
analysis_path: Path to *_analyse.json file
num_questions: Number of questions to generate
Returns:
Path to generated *_qa.json file
"""
if not analysis_path.exists():
raise FileNotFoundError(f"Analysedatei nicht gefunden: {analysis_path}")
try:
analysis_data = json.loads(analysis_path.read_text(encoding="utf-8"))
except json.JSONDecodeError as e:
raise RuntimeError(f"Ungueltige Analyse-JSON: {e}")
logger.info(f"Generiere Q&A-Paare fuer: {analysis_path.name}")
# Generate Q&A (use configured API)
if VISION_API == "claude":
try:
qa_data = _generate_qa_with_claude(analysis_data, num_questions)
except Exception as e:
logger.warning(f"Claude Q&A-Generierung fehlgeschlagen, nutze OpenAI: {e}")
qa_data = _generate_qa_with_openai(analysis_data, num_questions)
else:
qa_data = _generate_qa_with_openai(analysis_data, num_questions)
# Save Q&A data
out_name = analysis_path.stem.replace("_analyse", "") + "_qa.json"
out_path = BEREINIGT_DIR / out_name
out_path.write_text(json.dumps(qa_data, ensure_ascii=False, indent=2), encoding="utf-8")
logger.info(f"Q&A-Paare gespeichert: {out_path.name}")
return out_path
# ---------------------------------------------------------------------------
# Leitner-Box System for Spaced Repetition
# ---------------------------------------------------------------------------
def update_leitner_progress(qa_path: Path, item_id: str, correct: bool) -> dict:
"""
Update the learning progress of a Q&A item using the Leitner system.
Leitner Boxes:
- Box 0: New (not yet learned)
- Box 1: Learned (on error → back to Box 0)
- Box 2: Consolidated (on error → back to Box 1)
On correct answer: Increase box (max 2)
On wrong answer: Decrease box (min 0)
Args:
qa_path: Path to *_qa.json file
item_id: ID of the Q&A item
correct: True if answered correctly
Returns:
Dict with updated item and status
"""
if not qa_path.exists():
raise FileNotFoundError(f"Q&A-Datei nicht gefunden: {qa_path}")
qa_data = json.loads(qa_path.read_text(encoding="utf-8"))
# Find the item
item = None
for qa_item in qa_data.get("qa_items", []):
if qa_item.get("id") == item_id:
item = qa_item
break
if not item:
return {"status": "NOT_FOUND", "message": f"Item {item_id} nicht gefunden"}
# Update statistics
now = datetime.now().isoformat()
item["last_seen"] = now
if correct:
item["correct_count"] = item.get("correct_count", 0) + 1
# Increase box (max 2)
current_box = item.get("leitner_box", 0)
if current_box < 2:
item["leitner_box"] = current_box + 1
# Next review based on box
# Box 0→1: After 1 day, Box 1→2: After 3 days, Box 2: After 7 days
days = [1, 3, 7][item["leitner_box"]]
item["next_review"] = (datetime.now() + timedelta(days=days)).isoformat()
else:
item["incorrect_count"] = item.get("incorrect_count", 0) + 1
# Decrease box (min 0)
current_box = item.get("leitner_box", 0)
if current_box > 0:
item["leitner_box"] = current_box - 1
# On error: review soon
item["next_review"] = (datetime.now() + timedelta(hours=4)).isoformat()
# Save updated data
qa_path.write_text(json.dumps(qa_data, ensure_ascii=False, indent=2), encoding="utf-8")
box_names = ["Neu", "Gelernt", "Gefestigt"]
return {
"status": "OK",
"item_id": item_id,
"correct": correct,
"new_box": item["leitner_box"],
"box_name": box_names[item["leitner_box"]],
"correct_count": item["correct_count"],
"incorrect_count": item["incorrect_count"],
"next_review": item["next_review"]
}
def get_next_review_items(qa_path: Path, limit: int = 5) -> list:
"""
Get the next items to review.
Prioritization:
1. Wrongly answered items (Box 0) - more frequent
2. Learned items (Box 1) whose review is due
3. Consolidated items (Box 2) for occasional refresh
Args:
qa_path: Path to *_qa.json file
limit: Maximum number of items
Returns:
List of items to review (sorted by priority)
"""
if not qa_path.exists():
return []
qa_data = json.loads(qa_path.read_text(encoding="utf-8"))
items = qa_data.get("qa_items", [])
now = datetime.now()
review_items = []
for item in items:
box = item.get("leitner_box", 0)
next_review = item.get("next_review")
incorrect = item.get("incorrect_count", 0)
# Calculate priority (lower = more important)
priority = box * 10 # Box 0 has highest priority
# Bonus for frequently wrong answers
priority -= incorrect * 2
# Check if review is due
is_due = True
if next_review:
try:
review_time = datetime.fromisoformat(next_review)
is_due = now >= review_time
# Overdue items get higher priority
if is_due:
overdue_hours = (now - review_time).total_seconds() / 3600
priority -= overdue_hours
except (ValueError, TypeError):
is_due = True
# New items (Box 0) always included
if box == 0 or is_due:
review_items.append({
**item,
"_priority": priority,
"_is_due": is_due
})
# Sort by priority (lowest first)
review_items.sort(key=lambda x: x["_priority"])
# Remove internal fields and limit
result = []
for item in review_items[:limit]:
clean_item = {k: v for k, v in item.items() if not k.startswith("_")}
result.append(clean_item)
return result

View File

@@ -0,0 +1,83 @@
"""
AI Processor - Utility Functions
Image encoding and helper functions.
"""
from pathlib import Path
import base64
import shutil
import logging
from .config import BEREINIGT_DIR
logger = logging.getLogger(__name__)
def encode_image_to_data_url(input_path: Path) -> str:
"""
Encode an image file to a data URL for API requests.
Args:
input_path: Path to the image file
Returns:
Data URL string (data:image/jpeg;base64,...)
"""
image_bytes = input_path.read_bytes()
image_b64 = base64.b64encode(image_bytes).decode("utf-8")
return f"data:image/jpeg;base64,{image_b64}"
def encode_image_to_base64(input_path: Path) -> str:
"""
Encode an image file to base64 string.
Args:
input_path: Path to the image file
Returns:
Base64 encoded string
"""
image_bytes = input_path.read_bytes()
return base64.b64encode(image_bytes).decode("utf-8")
def dummy_process_scan(input_path: Path) -> Path:
"""
Simple copy to Bereinigt folder - kept as fallback.
Args:
input_path: Path to input file
Returns:
Path to copied file
"""
if not input_path.exists():
raise FileNotFoundError(f"Eingabedatei nicht gefunden: {input_path}")
new_name = input_path.stem + "_bereinigt" + input_path.suffix
target = BEREINIGT_DIR / new_name
shutil.copy2(input_path, target)
return target
def get_media_type(input_path: Path) -> str:
"""
Determine media type from file extension.
Args:
input_path: Path to the file
Returns:
MIME type string
"""
suffix = input_path.suffix.lower()
media_types = {
".jpg": "image/jpeg",
".jpeg": "image/jpeg",
".png": "image/png",
".gif": "image/gif",
".webp": "image/webp",
".pdf": "application/pdf",
}
return media_types.get(suffix, "image/jpeg")

View File

@@ -0,0 +1,19 @@
"""
AI Processor - Vision Module
Scan analysis and HTML generation.
"""
from .scan_analyzer import (
analyze_scan_structure_with_ai,
describe_scan_with_ai,
remove_handwriting_from_scan,
)
from .html_builder import build_clean_html_from_analysis
__all__ = [
"analyze_scan_structure_with_ai",
"describe_scan_with_ai",
"remove_handwriting_from_scan",
"build_clean_html_from_analysis",
]

View File

@@ -0,0 +1,218 @@
"""
AI Processor - HTML Builder
Build clean HTML worksheets from analysis data.
"""
from pathlib import Path
import json
import logging
from ..config import BEREINIGT_DIR
logger = logging.getLogger(__name__)
def build_clean_html_from_analysis(analysis_path: Path) -> Path:
"""
Build a clean HTML worksheet from an analysis JSON file.
Features:
- Focus on printed text (canonical_text / printed_blocks)
- Handwritten entries and crossed-out words are NOT included
- Uses open-source font stack (Inter / Noto Sans)
Args:
analysis_path: Path to *_analyse.json file
Returns:
Path to the generated HTML file
"""
if not analysis_path.exists():
raise FileNotFoundError(f"Analysedatei nicht gefunden: {analysis_path}")
try:
data = json.loads(analysis_path.read_text(encoding="utf-8"))
except json.JSONDecodeError as e:
raise RuntimeError(f"Analyse-Datei enthaelt kein gueltiges JSON: {analysis_path}\n{e}") from e
title = data.get("title") or "Arbeitsblatt"
subject = data.get("subject") or ""
grade_level = data.get("grade_level") or ""
instructions = data.get("instructions") or ""
tasks = data.get("tasks", []) or []
canonical_text = data.get("canonical_text") or ""
printed_blocks = data.get("printed_blocks") or []
struck = data.get("struck_through_words") or []
html_parts = []
html_parts.append("<!DOCTYPE html>")
html_parts.append("<html lang='de'>")
html_parts.append("<head>")
html_parts.append("<meta charset='UTF-8'>")
html_parts.append(f"<title>{title}</title>")
html_parts.append(_get_html_styles())
html_parts.append("</head>")
html_parts.append("<body>")
html_parts.append("<div class='page'>")
# Header section
html_parts.append(f"<h1>{title}</h1>")
meta_bits = []
if subject:
meta_bits.append(f"Fach: {subject}")
if grade_level:
meta_bits.append(f"Klassenstufe: {grade_level}")
if meta_bits:
html_parts.append(f"<div class='meta'>{' | '.join(meta_bits)}</div>")
if instructions:
html_parts.append(
f"<div class='instructions'><strong>Arbeitsanweisung:</strong> {instructions}</div>"
)
# Main text / printed blocks
html_parts.append("<section class='text-blocks'>")
if printed_blocks:
for block in printed_blocks:
role = (block.get("role") or "body").lower()
text = (block.get("text") or "").strip()
if not text:
continue
html_parts.append("<div class='text-block'>")
if role == "title":
html_parts.append(f"<div class='text-block-title'>{text}</div>")
else:
html_parts.append(f"<div>{text}</div>")
html_parts.append("</div>")
elif canonical_text:
# Fallback: split canonical_text into paragraphs
paragraphs = [
p.strip()
for p in canonical_text.replace("\r\n", "\n").split("\n\n")
if p.strip()
]
for p in paragraphs:
html_parts.append(f"<div class='text-block'>{p}</div>")
html_parts.append("</section>")
# Tasks section
if tasks:
html_parts.append("<h2>Aufgaben</h2>")
html_parts.append("<div class='task-list'>")
for idx, task in enumerate(tasks, start=1):
t_type = task.get("type") or "other"
desc = task.get("description") or ""
text_with_gaps = task.get("text_with_gaps")
html_parts.append("<div class='task'>")
html_parts.append(
f"<div class='task-title'>Aufgabe {idx} ({t_type}): {desc}</div>"
)
if text_with_gaps:
rendered = text_with_gaps.replace("___", "<span class='gap-line'>&nbsp;</span>")
html_parts.append(f"<div>{rendered}</div>")
html_parts.append("</div>")
html_parts.append("</div>")
# Footer note
if struck:
html_parts.append(
"<div class='footnote'>Hinweis: Einige im Original durchgestrichene Woerter wurden "
"von der KI erkannt und NICHT in dieses saubere Arbeitsblatt uebernommen.</div>"
)
else:
html_parts.append(
"<div class='footnote'>Dieses Arbeitsblatt wurde automatisch aus einem Scan rekonstruiert "
"und von handschriftlichen Eintragungen bereinigt.</div>"
)
html_parts.append("</div>") # .page
html_parts.append("</body></html>")
html_content = "\n".join(html_parts)
out_name = analysis_path.stem.replace("_analyse", "") + "_clean.html"
out_path = BEREINIGT_DIR / out_name
out_path.write_text(html_content, encoding="utf-8")
return out_path
def _get_html_styles() -> str:
"""Get CSS styles for clean HTML output."""
return """
<style>
:root {
--font-main: "Inter", "Noto Sans", system-ui, -apple-system, BlinkMacSystemFont, sans-serif;
}
* { box-sizing: border-box; }
body {
font-family: var(--font-main);
margin: 32px;
line-height: 1.5;
font-size: 14px;
color: #111827;
}
.page {
max-width: 800px;
margin: 0 auto;
}
h1 {
font-size: 24px;
margin-bottom: 4px;
}
h2 {
font-size: 18px;
margin-top: 24px;
}
.meta {
font-size: 12px;
color: #6b7280;
margin-bottom: 16px;
}
.instructions {
margin-bottom: 20px;
padding: 8px 10px;
border-radius: 8px;
background: #eff6ff;
border: 1px solid #bfdbfe;
font-size: 13px;
}
.text-blocks {
margin-bottom: 24px;
}
.text-block {
margin-bottom: 8px;
}
.text-block-title {
font-weight: 600;
margin-bottom: 4px;
}
.task-list {
margin-top: 8px;
}
.task {
margin-bottom: 14px;
padding-bottom: 8px;
border-bottom: 1px dashed #e5e7eb;
}
.task-title {
font-weight: 600;
margin-bottom: 4px;
}
.gap-line {
display: inline-block;
border-bottom: 1px solid #000;
min-width: 80px;
margin: 0 4px;
}
.footnote {
margin-top: 24px;
font-size: 11px;
color: #9ca3af;
}
</style>
"""

View File

@@ -0,0 +1,307 @@
"""
AI Processor - Scan Analyzer
Vision-based analysis of worksheets using OpenAI and Claude APIs.
"""
from pathlib import Path
import json
import logging
import shutil
import requests
from ..config import (
VISION_API,
BEREINIGT_DIR,
get_openai_api_key,
)
from ..utils import encode_image_to_data_url
logger = logging.getLogger(__name__)
def describe_scan_with_ai(input_path: Path) -> Path:
"""
Vision model gives a short description of the worksheet.
Args:
input_path: Path to the input image
Returns:
Path to the description text file
"""
if not input_path.exists():
raise FileNotFoundError(f"Eingabedatei nicht gefunden: {input_path}")
api_key = get_openai_api_key()
image_data_url = encode_image_to_data_url(input_path)
url = "https://api.openai.com/v1/chat/completions"
headers = {"Authorization": f"Bearer {api_key}", "Content-Type": "application/json"}
payload = {
"model": "gpt-4o-mini",
"messages": [
{
"role": "system",
"content": "Du bist ein hilfreicher Assistent, der Schul-Arbeitsblaetter knapp beschreibt.",
},
{
"role": "user",
"content": [
{
"type": "text",
"text": (
"Beschreibe dieses Arbeitsblatt knapp: Thema, Art der Aufgaben "
"(z.B. Lueckentext, Multiple Choice, Rechenaufgaben) und groben Inhalt."
),
},
{"type": "image_url", "image_url": {"url": image_data_url}},
],
},
],
"max_tokens": 400,
}
response = requests.post(url, headers=headers, json=payload)
response.raise_for_status()
data = response.json()
try:
description = data["choices"][0]["message"]["content"]
except Exception as e:
raise RuntimeError(f"Unerwartete Antwortstruktur von der KI: {e}\nAntwort: {data}") from e
out_name = input_path.stem + "_beschreibung.txt"
out_path = BEREINIGT_DIR / out_name
out_path.write_text(description, encoding="utf-8")
return out_path
def _analyze_with_openai(input_path: Path) -> Path:
"""
Structured JSON analysis of the worksheet using OpenAI.
Features:
- canonical_text: complete corrected text without handwriting
- printed_blocks: structured blocks of printed text
- handwritten_annotations: student handwritten notes
- struck_through_words: crossed out words
"""
if not input_path.exists():
raise FileNotFoundError(f"Eingabedatei nicht gefunden: {input_path}")
api_key = get_openai_api_key()
image_data_url = encode_image_to_data_url(input_path)
url = "https://api.openai.com/v1/chat/completions"
headers = {"Authorization": f"Bearer {api_key}", "Content-Type": "application/json"}
system_prompt = (
"Du bist ein Experte fuer die Analyse von Schul-Arbeitsblaettern.\n\n"
"HAUPTAUFGABEN:\n"
"1. Erkenne ALLE gedruckten Elemente: Text, Ueberschriften, Tabellen, Linien, Kaestchen, Diagramme, Illustrationen\n"
"2. Identifiziere ALLE handschriftlichen Ergaenzungen: Antworten, Zahlen, Buchstaben, Notizen, Zeichnungen\n"
"3. Bestimme praezise Positionen (Bounding Boxes in Pixeln) fuer JEDES Element\n\n"
"KRITISCH - DIAGRAMME & ILLUSTRATIONEN:\n"
"- Suche aktiv nach: anatomischen Zeichnungen, beschrifteten Diagrammen, Grafiken, Tabellen, Skizzen\n"
"- Wenn du irgendeine bildliche Darstellung siehst (z.B. Auge, Pflanze, Karte, Schaubild), setze 'has_diagram: true'\n"
"- Fuer JEDES visuelle Element: Erstelle einen Eintrag in 'diagram_elements' mit genauer Position\n"
"- Beschrifte-Linien (von Beschriftung zu Bildteil) gehoeren zum Diagramm!\n\n"
"HANDSCHRIFT ERKENNUNG:\n"
"- Unterscheide gedruckt vs. handgeschrieben anhand der Schriftart\n"
"- Klassifiziere Farbe: blau/schwarz/rot/pencil (Bleistift)\n"
"- Durchgestrichene Woerter separat auflisten\n\n"
"AUSGABE: Gib deine Antwort AUSSCHLIESSLICH als gueltiges JSON zurueck (kein Markdown, keine Code-Bloecke)."
)
user_text = _get_analysis_user_prompt()
payload = {
"model": "gpt-4o-mini",
"response_format": {"type": "json_object"},
"messages": [
{"role": "system", "content": system_prompt},
{
"role": "user",
"content": [
{"type": "text", "text": user_text},
{"type": "image_url", "image_url": {"url": image_data_url}},
],
},
],
"max_tokens": 2500,
"temperature": 0.15,
}
response = requests.post(url, headers=headers, json=payload)
response.raise_for_status()
data = response.json()
try:
content = data["choices"][0]["message"]["content"]
except Exception as e:
raise RuntimeError(f"Unerwartete Antwortstruktur von der KI: {e}\nAntwort: {data}") from e
try:
obj = json.loads(content)
except json.JSONDecodeError as e:
raise RuntimeError(f"Modell hat ungueltiges JSON geliefert: {e}\nInhalt: {content}") from e
out_name = input_path.stem + "_analyse.json"
out_path = BEREINIGT_DIR / out_name
out_path.write_text(json.dumps(obj, ensure_ascii=False, indent=2), encoding="utf-8")
return out_path
def _analyze_with_claude(input_path: Path) -> Path:
"""
Structured JSON analysis with Claude Vision API.
Uses Claude 3.5 Sonnet for better OCR and layout detection.
"""
from claude_vision import analyze_worksheet_with_claude
if not input_path.exists():
raise FileNotFoundError(f"Eingabedatei nicht gefunden: {input_path}")
logger.info(f"Analyzing with Claude Vision: {input_path.name}")
try:
analysis_data = analyze_worksheet_with_claude(
input_path,
max_tokens=2500
)
out_name = input_path.stem + "_analyse.json"
out_path = BEREINIGT_DIR / out_name
out_path.write_text(
json.dumps(analysis_data, ensure_ascii=False, indent=2),
encoding="utf-8"
)
logger.info(f"Claude analysis saved: {out_path.name}")
return out_path
except Exception as e:
logger.error(f"Claude analysis failed: {e}")
raise
def analyze_scan_structure_with_ai(input_path: Path) -> Path:
"""
Structured JSON analysis of the worksheet (Hybrid mode).
Uses the API configured in VISION_API:
- "claude" (default): Claude 3.5 Sonnet - better OCR, layout detection
- "openai": OpenAI GPT-4o-mini - cheaper, faster
Switch via environment variable:
export VISION_API="claude" # or "openai"
Returns:
Path to analysis JSON file
"""
logger.info(f"Using Vision API: {VISION_API}")
if VISION_API == "claude":
try:
return _analyze_with_claude(input_path)
except Exception as e:
logger.warning(f"Claude failed, falling back to OpenAI: {e}")
return _analyze_with_openai(input_path)
elif VISION_API == "openai":
return _analyze_with_openai(input_path)
else:
logger.warning(f"Unknown VISION_API '{VISION_API}', using Claude as default")
return _analyze_with_claude(input_path)
def remove_handwriting_from_scan(input_path: Path) -> Path:
"""
Remove handwriting from worksheet scan using AI-guided image processing.
Process:
1. Load corresponding analysis JSON (from Stage 1)
2. Apply multi-strategy cleaning using WorksheetCleaner
3. Preserve diagrams and printed content
4. Save cleaned image
Returns:
Path to cleaned image (*_clean.jpg)
"""
if not input_path.exists():
raise FileNotFoundError(f"Eingabedatei nicht gefunden: {input_path}")
from image_cleaner import WorksheetCleaner
# Load analysis JSON (from Stage 1)
analysis_name = input_path.stem + "_analyse.json"
analysis_path = BEREINIGT_DIR / analysis_name
# If analysis doesn't exist, run it first
if not analysis_path.exists():
logger.info(f"Analysis not found for {input_path.name}, running analysis first")
analysis_path = analyze_scan_structure_with_ai(input_path)
# Load analysis data
try:
analysis_data = json.loads(analysis_path.read_text(encoding='utf-8'))
except json.JSONDecodeError as e:
logger.error(f"Invalid analysis JSON: {analysis_path}\n{e}")
analysis_data = {
"layout": {"text_regions": [], "diagram_elements": []},
"handwriting_regions": []
}
# Prepare output path
output_name = input_path.stem + "_clean" + input_path.suffix
output_path = BEREINIGT_DIR / output_name
# Clean the image using WorksheetCleaner
cleaner = WorksheetCleaner(debug_mode=False)
try:
cleaned_path = cleaner.clean_worksheet(input_path, analysis_data, output_path)
logger.info(f"Successfully cleaned {input_path.name}")
return cleaned_path
except Exception as e:
logger.error(f"Cleaning failed for {input_path.name}, using original: {e}")
shutil.copy2(input_path, output_path)
return output_path
def _get_analysis_user_prompt() -> str:
"""Get the user prompt for worksheet analysis."""
return (
"Analysiere dieses Arbeitsblatt und gib ein JSON mit folgendem Aufbau zurueck:\n\n"
"{\n"
' "title": string | null,\n'
' "subject": string | null,\n'
' "grade_level": string | null,\n'
' "instructions": string | null,\n'
' "canonical_text": string | null,\n'
' "printed_blocks": [\n'
" {\n"
' "id": string,\n'
' "role": "title" | "instructions" | "body" | "other",\n'
' "text": string\n'
" }\n"
" ],\n"
' "layout": {\n'
' "page_structure": {\n'
' "has_diagram": boolean,\n'
' "orientation": "portrait" | "landscape"\n'
" },\n"
' "text_regions": [...],\n'
' "diagram_elements": [...]\n'
" },\n"
' "handwriting_regions": [...],\n'
' "handwritten_annotations": [...],\n'
' "struck_through_words": [...],\n'
' "tasks": [...]\n'
"}\n\n"
"WICHTIG - BITTE GENAU BEACHTEN:\n"
"1. CANONICAL TEXT: Nur gedruckter Text, OHNE Handschrift\n"
"2. DIAGRAMME: Bei JEDER Zeichnung/Grafik has_diagram: true setzen\n"
"3. HANDSCHRIFT: Mit Farb-Klassifizierung und Bounding Boxes\n"
"4. Bei Unsicherheit: null oder leeres Array"
)

View File

@@ -0,0 +1,17 @@
"""
AI Processor - Visualization Module
Mindmap generation for learning posters.
"""
from .mindmap import (
generate_mindmap_data,
generate_mindmap_html,
save_mindmap_for_worksheet,
)
__all__ = [
"generate_mindmap_data",
"generate_mindmap_html",
"save_mindmap_for_worksheet",
]

View File

@@ -0,0 +1,471 @@
"""
AI Processor - Mindmap Generator
Generate mindmaps for learning posters.
"""
from pathlib import Path
import json
import logging
import math
import os
import requests
from ..config import BEREINIGT_DIR, get_openai_api_key
logger = logging.getLogger(__name__)
def generate_mindmap_data(analysis_path: Path) -> dict:
"""
Extract technical terms from analysis and group them for a mindmap.
Args:
analysis_path: Path to *_analyse.json file
Returns:
Dictionary with mindmap structure:
{
"topic": "Main topic",
"subject": "Subject",
"categories": [
{
"name": "Category",
"color": "#hexcolor",
"emoji": "🔬",
"terms": [
{"term": "Term", "explanation": "Short explanation"}
]
}
]
}
"""
if not analysis_path.exists():
raise FileNotFoundError(f"Analysedatei nicht gefunden: {analysis_path}")
try:
data = json.loads(analysis_path.read_text(encoding="utf-8"))
except json.JSONDecodeError as e:
raise RuntimeError(f"Analyse-Datei enthaelt kein gueltiges JSON: {analysis_path}\n{e}") from e
title = data.get("title") or "Arbeitsblatt"
subject = data.get("subject") or ""
canonical_text = data.get("canonical_text") or ""
tasks = data.get("tasks", []) or []
# Collect all text for analysis
all_text = canonical_text
for task in tasks:
if task.get("description"):
all_text += "\n" + task.get("description")
if task.get("text_with_gaps"):
all_text += "\n" + task.get("text_with_gaps")
if not all_text.strip():
return {
"topic": title,
"subject": subject,
"categories": []
}
# AI-based extraction of technical terms
api_key = get_openai_api_key()
prompt = f"""Analysiere diesen Schultext und extrahiere alle Fachbegriffe fuer eine kindgerechte Lern-Mindmap.
THEMA: {title}
FACH: {subject}
TEXT:
{all_text[:3000]}
AUFGABE:
1. Identifiziere das Hauptthema (ein einzelnes Wort oder kurzer Begriff)
2. Finde ALLE Fachbegriffe und gruppiere sie in 3-6 sinnvolle Kategorien
3. Gib fuer jeden Begriff eine kurze, kindgerechte Erklaerung (max 10 Woerter)
4. Waehle fuer jede Kategorie ein passendes Emoji und eine Farbe
Antworte NUR mit diesem JSON-Format:
{{
"topic": "Hauptthema (z.B. 'Das Auge')",
"categories": [
{{
"name": "Kategoriename",
"emoji": "passendes Emoji",
"color": "#Hexfarbe (bunt, kindgerecht)",
"terms": [
{{"term": "Fachbegriff", "explanation": "Kurze Erklaerung"}}
]
}}
]
}}
WICHTIG:
- Verwende kindgerechte, einfache Sprache
- Bunte, froehliche Farben: #FF6B6B, #4ECDC4, #45B7D1, #96CEB4, #FFEAA7, #DDA0DD, #98D8C8
- Passende Emojis fuer jede Kategorie
- Mindestens 3 Begriffe pro Kategorie wenn moeglich
- Maximal 6 Kategorien"""
try:
# Try Claude first
claude_key = os.environ.get("ANTHROPIC_API_KEY")
if claude_key:
import anthropic
client = anthropic.Anthropic(api_key=claude_key)
response = client.messages.create(
model="claude-3-5-sonnet-20241022",
max_tokens=2000,
messages=[{"role": "user", "content": prompt}]
)
result_text = response.content[0].text
else:
# Fallback to OpenAI
logger.info("Claude Mindmap-Generierung fehlgeschlagen, nutze OpenAI: ANTHROPIC_API_KEY ist nicht gesetzt.")
url = "https://api.openai.com/v1/chat/completions"
headers = {"Authorization": f"Bearer {api_key}", "Content-Type": "application/json"}
payload = {
"model": "gpt-4o-mini",
"messages": [
{"role": "system", "content": "Du bist ein Experte fuer kindgerechte Lernmaterialien."},
{"role": "user", "content": prompt}
],
"max_tokens": 2000,
"temperature": 0.7
}
resp = requests.post(url, headers=headers, json=payload, timeout=60)
resp.raise_for_status()
result_text = resp.json()["choices"][0]["message"]["content"]
# Extract JSON
result_text = result_text.strip()
if result_text.startswith("```"):
result_text = result_text.split("```")[1]
if result_text.startswith("json"):
result_text = result_text[4:]
result_text = result_text.strip()
mindmap_data = json.loads(result_text)
mindmap_data["subject"] = subject
return mindmap_data
except Exception as e:
logger.error(f"Mindmap-Generierung fehlgeschlagen: {e}")
return {
"topic": title,
"subject": subject,
"categories": []
}
def generate_mindmap_html(mindmap_data: dict, format: str = "a3") -> str:
"""
Generate a child-friendly HTML/SVG mindmap poster.
Args:
mindmap_data: Dictionary from generate_mindmap_data()
format: "a3" for A3 poster (default) or "a4" for A4 view
Returns:
HTML string with SVG mindmap
"""
topic = mindmap_data.get("topic", "Thema")
subject = mindmap_data.get("subject", "")
categories = mindmap_data.get("categories", [])
# Format-specific settings
if format.lower() == "a4":
page_size = "A4 landscape"
svg_width = 1100
svg_height = 780
radius = 250
else: # a3 (default)
page_size = "A3 landscape"
svg_width = 1400
svg_height = 990
radius = 320
# If no categories, show placeholder
if not categories:
return f"""<!DOCTYPE html>
<html lang="de">
<head>
<meta charset="UTF-8">
<title>Mindmap - {topic}</title>
<style>
body {{ font-family: 'Comic Sans MS', cursive, sans-serif; text-align: center; padding: 50px; }}
h1 {{ color: #FF6B6B; }}
</style>
</head>
<body>
<h1>🧠 Mindmap: {topic}</h1>
<p>Noch keine Daten vorhanden. Bitte zuerst das Arbeitsblatt analysieren.</p>
</body>
</html>"""
num_categories = len(categories)
center_x = svg_width // 2
center_y = svg_height // 2
# Calculate positions of categories in a circle
category_positions = []
for i, cat in enumerate(categories):
angle = (2 * math.pi * i / num_categories) - (math.pi / 2) # Start at top
x = center_x + radius * math.cos(angle)
y = center_y + radius * math.sin(angle)
category_positions.append({
"x": x,
"y": y,
"angle": angle,
"data": cat
})
html = _get_mindmap_html_header(topic, subject, page_size, svg_width, svg_height)
# Draw connection lines
for pos in category_positions:
color = pos["data"].get("color", "#4ECDC4")
html += f""" <path d="M {center_x} {center_y} Q {(center_x + pos['x'])/2 + 30} {(center_y + pos['y'])/2 - 30} {pos['x']} {pos['y']}"
stroke="{color}" stroke-width="4" fill="none" stroke-linecap="round" opacity="0.6"/>
"""
# Center (main topic)
html += f"""
<!-- Center: Main Topic -->
<g filter="url(#glow)">
<circle cx="{center_x}" cy="{center_y}" r="85" fill="url(#centerGradient)"/>
<defs>
<radialGradient id="centerGradient" cx="30%" cy="30%">
<stop offset="0%" stop-color="#FFD93D"/>
<stop offset="100%" stop-color="#FF6B6B"/>
</radialGradient>
</defs>
<text x="{center_x}" y="{center_y - 10}" text-anchor="middle" font-size="28" font-weight="bold" fill="white">🌟</text>
<text x="{center_x}" y="{center_y + 25}" text-anchor="middle" font-size="22" font-weight="bold" fill="white">{topic}</text>
</g>
"""
# Draw categories with their terms
for i, pos in enumerate(category_positions):
cat = pos["data"]
cat_x = pos["x"]
cat_y = pos["y"]
color = cat.get("color", "#4ECDC4")
emoji = cat.get("emoji", "📚")
name = cat.get("name", "Kategorie")
terms = cat.get("terms", [])
# Category bubble
html += f"""
<!-- Category: {name} -->
<g class="category-group" transform="translate({cat_x}, {cat_y})">
<ellipse cx="0" cy="0" rx="75" ry="45" fill="{color}" filter="url(#shadow)"/>
<text x="0" y="-8" text-anchor="middle" font-size="20">{emoji}</text>
<text x="0" y="18" text-anchor="middle" font-size="14" font-weight="bold" fill="white">{name}</text>
"""
# Terms around the category
term_radius = 110
num_terms = len(terms)
for j, term_data in enumerate(terms[:8]): # Max 8 terms per category
term = term_data.get("term", "")
# Calculate position relative to category
base_angle = pos["angle"]
spread = math.pi * 0.8 # 80% of a half circle
if num_terms > 1:
term_angle = base_angle - spread/2 + (spread * j / (num_terms - 1))
else:
term_angle = base_angle
term_x = term_radius * math.cos(term_angle - base_angle)
term_y = term_radius * math.sin(term_angle - base_angle)
# Small connection line
html += f""" <line x1="0" y1="0" x2="{term_x * 0.6}" y2="{term_y * 0.6}" stroke="{color}" stroke-width="2" opacity="0.5"/>
"""
# Term bubble
bubble_width = max(70, len(term) * 8 + 20)
html += f""" <g class="term-bubble" transform="translate({term_x}, {term_y})">
<rect x="{-bubble_width/2}" y="-22" width="{bubble_width}" height="44" rx="22" fill="white" stroke="{color}" stroke-width="2" filter="url(#shadow)"/>
<text x="0" y="5" text-anchor="middle" font-size="12" font-weight="bold" fill="#333">{term}</text>
</g>
"""
html += " </g>\n"
# Legend with explanations (bottom)
html += f"""
<!-- Legend -->
<g transform="translate(50, {svg_height - 80})">
<text x="0" y="0" font-size="14" font-weight="bold" fill="#666">📖 Begriffe zum Lernen:</text>
"""
legend_x = 0
for i, pos in enumerate(category_positions):
cat = pos["data"]
color = cat.get("color", "#4ECDC4")
emoji = cat.get("emoji", "📚")
name = cat.get("name", "")
terms = cat.get("terms", [])
terms_text = ", ".join([t.get("term", "") for t in terms[:3]])
if len(terms) > 3:
terms_text += "..."
html += f""" <g transform="translate({legend_x}, 25)">
<circle cx="8" cy="0" r="8" fill="{color}"/>
<text x="22" y="4" font-size="11" fill="#444"><tspan font-weight="bold">{emoji} {name}:</tspan> {terms_text}</text>
</g>
"""
legend_x += 220
html += """ </g>
</svg>
</div>
</body>
</html>"""
return html
def save_mindmap_for_worksheet(analysis_path: Path, mindmap_data: dict = None) -> Path:
"""
Save a mindmap for a worksheet.
Args:
analysis_path: Path to *_analyse.json file
mindmap_data: Optional - already generated mindmap data.
If not provided, it will be generated.
Returns:
Path to saved *_mindmap.json file
"""
if mindmap_data is None:
mindmap_data = generate_mindmap_data(analysis_path)
# Save JSON
out_name = analysis_path.stem.replace("_analyse", "") + "_mindmap.json"
out_path = BEREINIGT_DIR / out_name
out_path.write_text(json.dumps(mindmap_data, ensure_ascii=False, indent=2), encoding="utf-8")
logger.info(f"Mindmap-Daten gespeichert: {out_path.name}")
return out_path
def _get_mindmap_html_header(topic: str, subject: str, page_size: str, svg_width: int, svg_height: int) -> str:
"""Get HTML header for mindmap."""
return f"""<!DOCTYPE html>
<html lang="de">
<head>
<meta charset="UTF-8">
<title>Lernposter - {topic}</title>
<style>
@page {{
size: {page_size};
margin: 10mm;
}}
@media print {{
body {{ -webkit-print-color-adjust: exact; print-color-adjust: exact; }}
.no-print {{ display: none !important; }}
}}
* {{ box-sizing: border-box; margin: 0; padding: 0; }}
body {{
font-family: 'Comic Sans MS', 'Chalkboard SE', 'Comic Neue', cursive, sans-serif;
background: linear-gradient(135deg, #f5f7fa 0%, #e4e8f0 100%);
min-height: 100vh;
padding: 20px;
}}
.poster-container {{
width: 100%;
max-width: 1400px;
margin: 0 auto;
background: white;
border-radius: 20px;
box-shadow: 0 10px 40px rgba(0,0,0,0.1);
overflow: hidden;
}}
.poster-header {{
background: linear-gradient(90deg, #FF6B6B, #4ECDC4);
padding: 15px 30px;
display: flex;
justify-content: space-between;
align-items: center;
}}
.poster-title {{
color: white;
font-size: 24px;
text-shadow: 2px 2px 4px rgba(0,0,0,0.2);
}}
.poster-subject {{
color: white;
font-size: 16px;
opacity: 0.9;
}}
.mindmap-svg {{
width: 100%;
height: auto;
}}
.print-btn {{
position: fixed;
top: 20px;
right: 20px;
padding: 12px 24px;
background: #4ECDC4;
color: white;
border: none;
border-radius: 25px;
font-size: 16px;
cursor: pointer;
box-shadow: 0 4px 15px rgba(78, 205, 196, 0.4);
font-family: inherit;
}}
.print-btn:hover {{
transform: scale(1.05);
background: #45B7D1;
}}
.category-group:hover {{
transform: scale(1.02);
cursor: pointer;
}}
.term-bubble:hover {{
transform: scale(1.1);
filter: brightness(1.1);
}}
</style>
</head>
<body>
<button class="print-btn no-print" onclick="window.print()">🖨️ Als A3 drucken</button>
<div class="poster-container">
<div class="poster-header">
<div class="poster-title">🧠 Lernposter: {topic}</div>
<div class="poster-subject">{subject}</div>
</div>
<svg class="mindmap-svg" viewBox="0 0 {svg_width} {svg_height}" xmlns="http://www.w3.org/2000/svg">
<defs>
<!-- Shadow for bubbles -->
<filter id="shadow" x="-20%" y="-20%" width="140%" height="140%">
<feDropShadow dx="2" dy="4" stdDeviation="4" flood-opacity="0.2"/>
</filter>
<!-- Glow effect for center -->
<filter id="glow">
<feGaussianBlur stdDeviation="8" result="coloredBlur"/>
<feMerge>
<feMergeNode in="coloredBlur"/>
<feMergeNode in="SourceGraphic"/>
</feMerge>
</filter>
</defs>
<!-- Background pattern (subtle dots) -->
<pattern id="dots" x="0" y="0" width="30" height="30" patternUnits="userSpaceOnUse">
<circle cx="15" cy="15" r="1.5" fill="#e0e0e0"/>
</pattern>
<rect width="100%" height="100%" fill="url(#dots)"/>
<!-- Connection lines from center to categories -->
"""

72
backend/alembic.ini Normal file
View File

@@ -0,0 +1,72 @@
# Alembic Configuration fuer Classroom Engine (Feature f15)
#
# Migration-Tool fuer PostgreSQL Schema-Aenderungen.
[alembic]
# Pfad zum migrations-Verzeichnis
script_location = alembic
# Template fuer neue Migrationen
file_template = %%(year)d%%(month).2d%%(day).2d_%%(hour).2d%%(minute).2d_%%(rev)s_%%(slug)s
# Prepend sys.path
prepend_sys_path = .
# Timezone fuer Revisions
timezone = UTC
# Truncate slug
truncate_slug_length = 40
# Version locations
version_locations = %(here)s/alembic/versions
# Output encoding
output_encoding = utf-8
# Database URL (wird von env.py ueberschrieben)
sqlalchemy.url = postgresql://breakpilot:breakpilot123@localhost:5432/breakpilot
[post_write_hooks]
# Formatierung nach Migration-Erstellung
# hooks = black
# black.type = console_scripts
# black.entrypoint = black
# black.options = -q
# Logging configuration
[loggers]
keys = root,sqlalchemy,alembic
[handlers]
keys = console
[formatters]
keys = generic
[logger_root]
level = WARN
handlers = console
qualname =
[logger_sqlalchemy]
level = WARN
handlers =
qualname = sqlalchemy.engine
[logger_alembic]
level = INFO
handlers =
qualname = alembic
[handler_console]
class = StreamHandler
args = (sys.stderr,)
level = NOTSET
formatter = generic
[formatter_generic]
format = %(levelname)-5.5s [%(name)s] %(message)s
datefmt = %H:%M:%S

View File

107
backend/alembic/env.py Normal file
View File

@@ -0,0 +1,107 @@
"""
Alembic Environment Configuration fuer Classroom Engine.
Laedt die DB-Konfiguration und Models fuer Migrationen.
"""
import os
from logging.config import fileConfig
from sqlalchemy import engine_from_config
from sqlalchemy import pool
from alembic import context
# Alembic Config
config = context.config
# Logging Setup
if config.config_file_name is not None:
fileConfig(config.config_file_name)
# Import unserer Models fuer autogenerate
from classroom_engine.database import Base
from classroom_engine.db_models import (
LessonSessionDB,
PhaseHistoryDB,
TeacherSettingsDB,
)
# Alerts Agent Models (nutzt gleiche Base)
from alerts_agent.db.models import (
AlertTopicDB,
AlertItemDB,
AlertRuleDB,
AlertProfileDB,
)
# Test Registry Models
try:
from api.tests.db_models import (
TestRunDB,
TestResultDB,
FailedTestBacklogDB,
TestFixHistoryDB,
TestServiceStatsDB,
)
except ImportError:
# Models noch nicht vorhanden - wird bei Migration erstellt
pass
target_metadata = Base.metadata
# Database URL aus Umgebungsvariable oder Config
_raw_url = os.getenv(
"DATABASE_URL",
config.get_main_option("sqlalchemy.url")
)
# SQLAlchemy 2.0 erfordert "postgresql://" statt "postgres://"
DATABASE_URL = _raw_url.replace("postgres://", "postgresql://", 1) if _raw_url and _raw_url.startswith("postgres://") else _raw_url
def run_migrations_offline() -> None:
"""
Run migrations in 'offline' mode.
Generiert SQL-Skripte ohne DB-Verbindung.
"""
url = DATABASE_URL
context.configure(
url=url,
target_metadata=target_metadata,
literal_binds=True,
dialect_opts={"paramstyle": "named"},
)
with context.begin_transaction():
context.run_migrations()
def run_migrations_online() -> None:
"""
Run migrations in 'online' mode.
Fuehrt Migrationen direkt auf der DB aus.
"""
configuration = config.get_section(config.config_ini_section)
configuration["sqlalchemy.url"] = DATABASE_URL
connectable = engine_from_config(
configuration,
prefix="sqlalchemy.",
poolclass=pool.NullPool,
)
with connectable.connect() as connection:
context.configure(
connection=connection,
target_metadata=target_metadata,
)
with context.begin_transaction():
context.run_migrations()
if context.is_offline_mode():
run_migrations_offline()
else:
run_migrations_online()

View File

@@ -0,0 +1,26 @@
"""${message}
Revision ID: ${up_revision}
Revises: ${down_revision | comma,n}
Create Date: ${create_date}
"""
from typing import Sequence, Union
from alembic import op
import sqlalchemy as sa
${imports if imports else ""}
# revision identifiers, used by Alembic.
revision: str = ${repr(up_revision)}
down_revision: Union[str, None] = ${repr(down_revision)}
branch_labels: Union[str, Sequence[str], None] = ${repr(branch_labels)}
depends_on: Union[str, Sequence[str], None] = ${repr(depends_on)}
def upgrade() -> None:
${upgrades if upgrades else "pass"}
def downgrade() -> None:
${downgrades if downgrades else "pass"}

View File

@@ -0,0 +1,123 @@
"""Initial Classroom Engine Tables (Feature f13/f15)
Erstellt die Tabellen fuer:
- lesson_sessions: Unterrichtsstunden
- lesson_phase_history: Phasen-Verlauf
- teacher_settings: Lehrer-Einstellungen
Revision ID: 001
Revises: None
Create Date: 2026-01-15 12:00:00
"""
from typing import Sequence, Union
from alembic import op
import sqlalchemy as sa
from sqlalchemy.dialects import postgresql
# revision identifiers, used by Alembic.
revision: str = '001'
down_revision: Union[str, None] = None
branch_labels: Union[str, Sequence[str], None] = None
depends_on: Union[str, Sequence[str], None] = None
# Enum-Typ fuer Unterrichtsphasen
lesson_phase_enum = postgresql.ENUM(
'not_started',
'einstieg',
'erarbeitung',
'sicherung',
'transfer',
'reflexion',
'ended',
name='lessonphaseenum',
create_type=False
)
def upgrade() -> None:
# Enum-Typ erstellen
lesson_phase_enum.create(op.get_bind(), checkfirst=True)
# lesson_sessions Tabelle
op.create_table(
'lesson_sessions',
sa.Column('id', sa.String(36), primary_key=True),
sa.Column('teacher_id', sa.String(100), nullable=False, index=True),
sa.Column('class_id', sa.String(50), nullable=False, index=True),
sa.Column('subject', sa.String(100), nullable=False),
sa.Column('topic', sa.String(500), nullable=True),
sa.Column(
'current_phase',
lesson_phase_enum,
nullable=False,
server_default='not_started'
),
sa.Column('is_paused', sa.Boolean(), default=False),
sa.Column('created_at', sa.DateTime(), server_default=sa.func.now()),
sa.Column('lesson_started_at', sa.DateTime(), nullable=True),
sa.Column('lesson_ended_at', sa.DateTime(), nullable=True),
sa.Column('phase_started_at', sa.DateTime(), nullable=True),
sa.Column('pause_started_at', sa.DateTime(), nullable=True),
sa.Column('total_paused_seconds', sa.Integer(), default=0),
sa.Column('phase_durations', sa.JSON(), default=dict),
sa.Column('phase_history', sa.JSON(), default=list),
sa.Column('notes', sa.Text(), default=''),
sa.Column('homework', sa.Text(), default=''),
)
# lesson_phase_history Tabelle
op.create_table(
'lesson_phase_history',
sa.Column('id', sa.String(36), primary_key=True),
sa.Column(
'session_id',
sa.String(36),
sa.ForeignKey('lesson_sessions.id', ondelete='CASCADE'),
nullable=False,
index=True
),
sa.Column('phase', lesson_phase_enum, nullable=False),
sa.Column('started_at', sa.DateTime(), nullable=False),
sa.Column('ended_at', sa.DateTime(), nullable=True),
sa.Column('duration_seconds', sa.Integer(), nullable=True),
sa.Column('was_extended', sa.Boolean(), default=False),
sa.Column('extension_minutes', sa.Integer(), default=0),
sa.Column('pause_count', sa.Integer(), default=0),
sa.Column('total_pause_seconds', sa.Integer(), default=0),
)
# teacher_settings Tabelle
op.create_table(
'teacher_settings',
sa.Column('id', sa.String(36), primary_key=True),
sa.Column(
'teacher_id',
sa.String(100),
unique=True,
nullable=False,
index=True
),
sa.Column('default_phase_durations', sa.JSON(), default=dict),
sa.Column('audio_enabled', sa.Boolean(), default=True),
sa.Column('high_contrast', sa.Boolean(), default=False),
sa.Column('show_statistics', sa.Boolean(), default=True),
sa.Column('created_at', sa.DateTime(), server_default=sa.func.now()),
sa.Column(
'updated_at',
sa.DateTime(),
server_default=sa.func.now(),
onupdate=sa.func.now()
),
)
def downgrade() -> None:
op.drop_table('teacher_settings')
op.drop_table('lesson_phase_history')
op.drop_table('lesson_sessions')
# Enum-Typ entfernen
lesson_phase_enum.drop(op.get_bind(), checkfirst=True)

View File

@@ -0,0 +1,52 @@
"""Add Lesson Templates Table (Feature f37)
Erstellt die lesson_templates Tabelle fuer wiederverwendbare
Stundenkonfigurationen.
Revision ID: 002
Revises: 001
Create Date: 2026-01-15 14:00:00
"""
from typing import Sequence, Union
from alembic import op
import sqlalchemy as sa
# revision identifiers, used by Alembic.
revision: str = '002'
down_revision: Union[str, None] = '001'
branch_labels: Union[str, Sequence[str], None] = None
depends_on: Union[str, Sequence[str], None] = None
def upgrade() -> None:
op.create_table(
'lesson_templates',
sa.Column('id', sa.String(36), primary_key=True),
sa.Column('teacher_id', sa.String(100), nullable=False, index=True),
sa.Column('name', sa.String(200), nullable=False),
sa.Column('description', sa.Text(), default=''),
sa.Column('subject', sa.String(100), default=''),
sa.Column('grade_level', sa.String(50), default=''),
sa.Column('phase_durations', sa.JSON(), default=dict),
sa.Column('default_topic', sa.String(500), default=''),
sa.Column('default_notes', sa.Text(), default=''),
sa.Column('is_public', sa.Boolean(), default=False),
sa.Column('usage_count', sa.Integer(), default=0),
sa.Column('created_at', sa.DateTime(), server_default=sa.func.now()),
sa.Column('updated_at', sa.DateTime(), server_default=sa.func.now(), onupdate=sa.func.now()),
)
# Index fuer oeffentliche Vorlagen
op.create_index(
'ix_lesson_templates_public',
'lesson_templates',
['is_public', 'usage_count'],
postgresql_where=sa.text('is_public = true')
)
def downgrade() -> None:
op.drop_index('ix_lesson_templates_public', table_name='lesson_templates')
op.drop_table('lesson_templates')

View File

@@ -0,0 +1,56 @@
"""Add Homework Assignments Table (Feature f20)
Erstellt die homework_assignments Tabelle fuer das
Hausaufgaben-Tracking.
Revision ID: 003
Revises: 002
Create Date: 2026-01-15 16:00:00
"""
from typing import Sequence, Union
from alembic import op
import sqlalchemy as sa
# revision identifiers, used by Alembic.
revision: str = '003'
down_revision: Union[str, None] = '002'
branch_labels: Union[str, Sequence[str], None] = None
depends_on: Union[str, Sequence[str], None] = None
def upgrade() -> None:
op.create_table(
'homework_assignments',
sa.Column('id', sa.String(36), primary_key=True),
sa.Column('teacher_id', sa.String(100), nullable=False, index=True),
sa.Column('class_id', sa.String(50), nullable=False, index=True),
sa.Column('subject', sa.String(100), nullable=False),
sa.Column('title', sa.String(300), nullable=False),
sa.Column('description', sa.Text(), default=''),
sa.Column('session_id', sa.String(36), sa.ForeignKey('lesson_sessions.id'), nullable=True, index=True),
sa.Column('due_date', sa.DateTime(), nullable=True, index=True),
sa.Column(
'status',
sa.Enum('assigned', 'in_progress', 'completed', 'overdue', name='homeworkstatusenum'),
default='assigned',
nullable=False
),
sa.Column('created_at', sa.DateTime(), server_default=sa.func.now()),
sa.Column('updated_at', sa.DateTime(), server_default=sa.func.now(), onupdate=sa.func.now()),
)
# Index fuer anstehende Hausaufgaben
op.create_index(
'ix_homework_pending',
'homework_assignments',
['teacher_id', 'status', 'due_date'],
)
def downgrade() -> None:
op.drop_index('ix_homework_pending', table_name='homework_assignments')
op.drop_table('homework_assignments')
# Enum-Typ entfernen
op.execute("DROP TYPE IF EXISTS homeworkstatusenum")

View File

@@ -0,0 +1,69 @@
"""Add Phase Materials Table (Feature f19)
Erstellt die phase_materials Tabelle fuer die
Material-Verknuepfung an Unterrichtsphasen.
Revision ID: 004
Revises: 003
Create Date: 2026-01-15 17:00:00
"""
from typing import Sequence, Union
from alembic import op
import sqlalchemy as sa
# revision identifiers, used by Alembic.
revision: str = '004'
down_revision: Union[str, None] = '003'
branch_labels: Union[str, Sequence[str], None] = None
depends_on: Union[str, Sequence[str], None] = None
def upgrade() -> None:
op.create_table(
'phase_materials',
sa.Column('id', sa.String(36), primary_key=True),
sa.Column('teacher_id', sa.String(100), nullable=False, index=True),
sa.Column('title', sa.String(300), nullable=False),
sa.Column(
'material_type',
sa.Enum('document', 'link', 'video', 'image', 'worksheet', 'presentation', 'other',
name='materialtypeenum'),
default='document',
nullable=False
),
sa.Column('url', sa.String(2000), nullable=True),
sa.Column('description', sa.Text(), default=''),
sa.Column('phase', sa.String(50), nullable=True, index=True),
sa.Column('subject', sa.String(100), default=''),
sa.Column('grade_level', sa.String(50), default=''),
sa.Column('tags', sa.JSON(), default=list),
sa.Column('is_public', sa.Boolean(), default=False),
sa.Column('usage_count', sa.Integer(), default=0),
sa.Column('session_id', sa.String(36), sa.ForeignKey('lesson_sessions.id'), nullable=True, index=True),
sa.Column('created_at', sa.DateTime(), server_default=sa.func.now()),
sa.Column('updated_at', sa.DateTime(), server_default=sa.func.now(), onupdate=sa.func.now()),
)
# Index fuer Phasen-Suche
op.create_index(
'ix_phase_materials_search',
'phase_materials',
['teacher_id', 'phase', 'subject'],
)
# Index fuer oeffentliche Materialien
op.create_index(
'ix_phase_materials_public',
'phase_materials',
['is_public', 'usage_count'],
postgresql_where=sa.text('is_public = true')
)
def downgrade() -> None:
op.drop_index('ix_phase_materials_public', table_name='phase_materials')
op.drop_index('ix_phase_materials_search', table_name='phase_materials')
op.drop_table('phase_materials')
op.execute("DROP TYPE IF EXISTS materialtypeenum")

View File

@@ -0,0 +1,40 @@
"""Add Lesson Reflections Table (Phase 5: Analytics)
Erstellt die lesson_reflections Tabelle fuer
Post-Lesson Reflexionen.
Revision ID: 005
Revises: 004
Create Date: 2026-01-15 18:00:00
"""
from typing import Sequence, Union
from alembic import op
import sqlalchemy as sa
# revision identifiers, used by Alembic.
revision: str = '005'
down_revision: Union[str, None] = '004'
branch_labels: Union[str, Sequence[str], None] = None
depends_on: Union[str, Sequence[str], None] = None
def upgrade() -> None:
op.create_table(
'lesson_reflections',
sa.Column('id', sa.String(36), primary_key=True),
sa.Column('session_id', sa.String(36), sa.ForeignKey('lesson_sessions.id'), nullable=False, unique=True, index=True),
sa.Column('teacher_id', sa.String(100), nullable=False, index=True),
sa.Column('notes', sa.Text(), default=''),
sa.Column('overall_rating', sa.Integer(), nullable=True),
sa.Column('what_worked', sa.JSON(), default=list),
sa.Column('improvements', sa.JSON(), default=list),
sa.Column('notes_for_next_lesson', sa.Text(), default=''),
sa.Column('created_at', sa.DateTime(), server_default=sa.func.now()),
sa.Column('updated_at', sa.DateTime(), server_default=sa.func.now(), onupdate=sa.func.now()),
)
def downgrade() -> None:
op.drop_table('lesson_reflections')

View File

@@ -0,0 +1,45 @@
"""Add teacher_feedback table for Phase 7.
Revision ID: 006_teacher_feedback
Revises: 005_lesson_reflections
Create Date: 2026-01-15 19:00:00.000000
"""
from alembic import op
import sqlalchemy as sa
# revision identifiers
revision = '006_teacher_feedback'
down_revision = '005_lesson_reflections'
branch_labels = None
depends_on = None
def upgrade() -> None:
"""Create teacher_feedback table."""
op.create_table(
'teacher_feedback',
sa.Column('id', sa.String(36), primary_key=True),
sa.Column('teacher_id', sa.String(100), nullable=False, index=True),
sa.Column('teacher_name', sa.String(200), default=''),
sa.Column('teacher_email', sa.String(200), default=''),
sa.Column('title', sa.String(500), nullable=False),
sa.Column('description', sa.Text, nullable=False),
sa.Column('feedback_type', sa.String(50), nullable=False, default='improvement'),
sa.Column('priority', sa.String(50), nullable=False, default='medium'),
sa.Column('status', sa.String(50), nullable=False, default='new', index=True),
sa.Column('related_feature', sa.String(50), nullable=True),
sa.Column('context_url', sa.String(500), default=''),
sa.Column('context_phase', sa.String(50), default=''),
sa.Column('context_session_id', sa.String(36), nullable=True),
sa.Column('user_agent', sa.String(500), default=''),
sa.Column('response', sa.Text, default=''),
sa.Column('responded_at', sa.DateTime, nullable=True),
sa.Column('responded_by', sa.String(100), nullable=True),
sa.Column('created_at', sa.DateTime, default=sa.func.now()),
sa.Column('updated_at', sa.DateTime, default=sa.func.now(), onupdate=sa.func.now()),
)
def downgrade() -> None:
"""Drop teacher_feedback table."""
op.drop_table('teacher_feedback')

View File

@@ -0,0 +1,111 @@
"""Add teacher_contexts, schoolyear_events, recurring_routines tables for Phase 8.
Schuljahres-Begleiter Erweiterung mit 2-Schichten-Modell:
- Makro-Ebene: 7 Schuljahres-Phasen
- Mikro-Ebene: Events, Routinen, Arbeitsmodi
Revision ID: 007_teacher_context
Revises: 006_teacher_feedback
Create Date: 2026-01-15 20:00:00.000000
"""
from alembic import op
import sqlalchemy as sa
# revision identifiers
revision = '007_teacher_context'
down_revision = '006_teacher_feedback'
branch_labels = None
depends_on = None
def upgrade() -> None:
"""Create teacher_contexts, schoolyear_events, recurring_routines tables."""
# Teacher Context - Makro-State pro Lehrer
op.create_table(
'teacher_contexts',
sa.Column('id', sa.String(36), primary_key=True),
sa.Column('teacher_id', sa.String(100), unique=True, nullable=False, index=True),
# Schul-Kontext
sa.Column('federal_state', sa.String(10), default='BY'),
sa.Column('school_type', sa.String(50), default='gymnasium'),
# Schuljahr
sa.Column('schoolyear', sa.String(20), default='2024-2025'),
sa.Column('schoolyear_start', sa.DateTime, nullable=True),
# Makro-Phase
sa.Column('macro_phase', sa.String(50), nullable=False, default='onboarding'),
sa.Column('current_week', sa.Integer, default=1),
# Berechnete Flags
sa.Column('is_exam_period', sa.Boolean, default=False),
sa.Column('is_before_holidays', sa.Boolean, default=False),
# Onboarding-Status
sa.Column('onboarding_completed', sa.Boolean, default=False),
sa.Column('has_classes', sa.Boolean, default=False),
sa.Column('has_schedule', sa.Boolean, default=False),
# Metadaten
sa.Column('created_at', sa.DateTime, default=sa.func.now()),
sa.Column('updated_at', sa.DateTime, default=sa.func.now(), onupdate=sa.func.now()),
)
# Schoolyear Events - Einmalige Events (Klausuren, Elternabende, etc.)
op.create_table(
'schoolyear_events',
sa.Column('id', sa.String(36), primary_key=True),
sa.Column('teacher_id', sa.String(100), nullable=False, index=True),
# Event-Details
sa.Column('event_type', sa.String(50), nullable=False, default='other'),
sa.Column('title', sa.String(300), nullable=False),
sa.Column('description', sa.Text, default=''),
# Zeitraum
sa.Column('start_date', sa.DateTime, nullable=False, index=True),
sa.Column('end_date', sa.DateTime, nullable=True),
# Zuordnung
sa.Column('class_id', sa.String(100), nullable=True, index=True),
sa.Column('subject', sa.String(100), nullable=True),
# Status
sa.Column('status', sa.String(50), nullable=False, default='planned', index=True),
# Antizipations-Flags
sa.Column('needs_preparation', sa.Boolean, default=True),
sa.Column('preparation_done', sa.Boolean, default=False),
sa.Column('reminder_days_before', sa.Integer, default=7),
# Flexible Metadaten
sa.Column('extra_data', sa.JSON, default=dict),
# Timestamps
sa.Column('created_at', sa.DateTime, default=sa.func.now()),
sa.Column('updated_at', sa.DateTime, default=sa.func.now(), onupdate=sa.func.now()),
)
# Recurring Routines - Wiederkehrende Aktivitaeten
op.create_table(
'recurring_routines',
sa.Column('id', sa.String(36), primary_key=True),
sa.Column('teacher_id', sa.String(100), nullable=False, index=True),
# Routine-Details
sa.Column('routine_type', sa.String(50), nullable=False, default='other'),
sa.Column('title', sa.String(300), nullable=False),
sa.Column('description', sa.Text, default=''),
# Wiederholung
sa.Column('recurrence_pattern', sa.String(50), nullable=False, default='weekly'),
sa.Column('day_of_week', sa.Integer, nullable=True), # 0=Mo, 6=So
sa.Column('day_of_month', sa.Integer, nullable=True), # 1-31
sa.Column('time_of_day', sa.Time, nullable=True),
# Dauer
sa.Column('duration_minutes', sa.Integer, default=60),
# Aktiv?
sa.Column('is_active', sa.Boolean, default=True),
# Gueltigkeit
sa.Column('valid_from', sa.DateTime, nullable=True),
sa.Column('valid_until', sa.DateTime, nullable=True),
# Metadaten
sa.Column('extra_data', sa.JSON, default=dict),
# Timestamps
sa.Column('created_at', sa.DateTime, default=sa.func.now()),
sa.Column('updated_at', sa.DateTime, default=sa.func.now(), onupdate=sa.func.now()),
)
def downgrade() -> None:
"""Drop Phase 8 tables."""
op.drop_table('recurring_routines')
op.drop_table('schoolyear_events')
op.drop_table('teacher_contexts')

View File

@@ -0,0 +1,255 @@
"""Alerts Agent Tables
Erstellt die Tabellen für:
- alert_topics: Feed-Quellen (Google Alerts, RSS)
- alert_items: Einzelne Alerts/Artikel
- alert_rules: Filterregeln
- alert_profiles: Nutzer-Profile für Relevanz-Scoring
Revision ID: 008
Revises: 007
Create Date: 2026-01-15 21:00:00
"""
from typing import Sequence, Union
from alembic import op
import sqlalchemy as sa
from sqlalchemy.dialects import postgresql
# revision identifiers, used by Alembic.
revision: str = '008'
down_revision: Union[str, None] = '007'
branch_labels: Union[str, Sequence[str], None] = None
depends_on: Union[str, Sequence[str], None] = None
# Enum-Typen
alert_source_enum = postgresql.ENUM(
'google_alerts_rss',
'google_alerts_email',
'rss_feed',
'webhook',
'manual',
name='alertsourceenum',
create_type=False
)
alert_status_enum = postgresql.ENUM(
'new',
'processed',
'duplicate',
'scored',
'reviewed',
'archived',
name='alertstatusenum',
create_type=False
)
relevance_decision_enum = postgresql.ENUM(
'KEEP',
'DROP',
'REVIEW',
name='relevancedecisionenum',
create_type=False
)
feed_type_enum = postgresql.ENUM(
'rss',
'email',
'webhook',
name='feedtypeenum',
create_type=False
)
rule_action_enum = postgresql.ENUM(
'keep',
'drop',
'tag',
'email',
'webhook',
'slack',
name='ruleactionenum',
create_type=False
)
def upgrade() -> None:
# Enum-Typen erstellen
alert_source_enum.create(op.get_bind(), checkfirst=True)
alert_status_enum.create(op.get_bind(), checkfirst=True)
relevance_decision_enum.create(op.get_bind(), checkfirst=True)
feed_type_enum.create(op.get_bind(), checkfirst=True)
rule_action_enum.create(op.get_bind(), checkfirst=True)
# alert_topics Tabelle
op.create_table(
'alert_topics',
sa.Column('id', sa.String(36), primary_key=True),
sa.Column('user_id', sa.String(36), nullable=True, index=True),
sa.Column('name', sa.String(255), nullable=False),
sa.Column('description', sa.Text(), default=''),
sa.Column('feed_url', sa.String(2000), nullable=True),
sa.Column(
'feed_type',
feed_type_enum,
nullable=False,
server_default='rss'
),
sa.Column('is_active', sa.Boolean(), default=True, index=True),
sa.Column('fetch_interval_minutes', sa.Integer(), default=60),
sa.Column('last_fetched_at', sa.DateTime(), nullable=True),
sa.Column('last_fetch_error', sa.Text(), nullable=True),
sa.Column('total_items_fetched', sa.Integer(), default=0),
sa.Column('items_kept', sa.Integer(), default=0),
sa.Column('items_dropped', sa.Integer(), default=0),
sa.Column('created_at', sa.DateTime(), server_default=sa.func.now()),
sa.Column(
'updated_at',
sa.DateTime(),
server_default=sa.func.now(),
onupdate=sa.func.now()
),
)
# alert_items Tabelle
op.create_table(
'alert_items',
sa.Column('id', sa.String(36), primary_key=True),
sa.Column(
'topic_id',
sa.String(36),
sa.ForeignKey('alert_topics.id', ondelete='CASCADE'),
nullable=False,
index=True
),
sa.Column('title', sa.Text(), nullable=False),
sa.Column('url', sa.String(2000), nullable=False),
sa.Column('snippet', sa.Text(), default=''),
sa.Column('article_text', sa.Text(), nullable=True),
sa.Column('lang', sa.String(10), default='de'),
sa.Column('published_at', sa.DateTime(), nullable=True, index=True),
sa.Column('fetched_at', sa.DateTime(), server_default=sa.func.now(), index=True),
sa.Column('processed_at', sa.DateTime(), nullable=True),
sa.Column(
'source',
alert_source_enum,
nullable=False,
server_default='google_alerts_rss'
),
sa.Column('url_hash', sa.String(64), unique=True, nullable=False, index=True),
sa.Column('content_hash', sa.String(64), nullable=True),
sa.Column('canonical_url', sa.String(2000), nullable=True),
sa.Column(
'status',
alert_status_enum,
nullable=False,
server_default='new',
index=True
),
sa.Column('cluster_id', sa.String(36), nullable=True),
sa.Column('relevance_score', sa.Float(), nullable=True),
sa.Column('relevance_decision', relevance_decision_enum, nullable=True, index=True),
sa.Column('relevance_reasons', sa.JSON(), default=list),
sa.Column('relevance_summary', sa.Text(), nullable=True),
sa.Column('scored_by_model', sa.String(100), nullable=True),
sa.Column('scored_at', sa.DateTime(), nullable=True),
sa.Column('user_marked_relevant', sa.Boolean(), nullable=True),
sa.Column('user_tags', sa.JSON(), default=list),
sa.Column('user_notes', sa.Text(), nullable=True),
sa.Column('created_at', sa.DateTime(), server_default=sa.func.now()),
sa.Column(
'updated_at',
sa.DateTime(),
server_default=sa.func.now(),
onupdate=sa.func.now()
),
)
# Composite Indexes für alert_items
op.create_index(
'ix_alert_items_topic_status',
'alert_items',
['topic_id', 'status']
)
op.create_index(
'ix_alert_items_topic_decision',
'alert_items',
['topic_id', 'relevance_decision']
)
# alert_rules Tabelle
op.create_table(
'alert_rules',
sa.Column('id', sa.String(36), primary_key=True),
sa.Column(
'topic_id',
sa.String(36),
sa.ForeignKey('alert_topics.id', ondelete='CASCADE'),
nullable=True,
index=True
),
sa.Column('user_id', sa.String(36), nullable=True, index=True),
sa.Column('name', sa.String(255), nullable=False),
sa.Column('description', sa.Text(), default=''),
sa.Column('conditions', sa.JSON(), nullable=False, default=list),
sa.Column(
'action_type',
rule_action_enum,
nullable=False,
server_default='keep'
),
sa.Column('action_config', sa.JSON(), default=dict),
sa.Column('priority', sa.Integer(), default=0, index=True),
sa.Column('is_active', sa.Boolean(), default=True, index=True),
sa.Column('match_count', sa.Integer(), default=0),
sa.Column('last_matched_at', sa.DateTime(), nullable=True),
sa.Column('created_at', sa.DateTime(), server_default=sa.func.now()),
sa.Column(
'updated_at',
sa.DateTime(),
server_default=sa.func.now(),
onupdate=sa.func.now()
),
)
# alert_profiles Tabelle
op.create_table(
'alert_profiles',
sa.Column('id', sa.String(36), primary_key=True),
sa.Column('user_id', sa.String(36), unique=True, nullable=True, index=True),
sa.Column('name', sa.String(255), default='Default'),
sa.Column('priorities', sa.JSON(), default=list),
sa.Column('exclusions', sa.JSON(), default=list),
sa.Column('positive_examples', sa.JSON(), default=list),
sa.Column('negative_examples', sa.JSON(), default=list),
sa.Column('policies', sa.JSON(), default=dict),
sa.Column('total_scored', sa.Integer(), default=0),
sa.Column('total_kept', sa.Integer(), default=0),
sa.Column('total_dropped', sa.Integer(), default=0),
sa.Column('accuracy_estimate', sa.Float(), nullable=True),
sa.Column('created_at', sa.DateTime(), server_default=sa.func.now()),
sa.Column(
'updated_at',
sa.DateTime(),
server_default=sa.func.now(),
onupdate=sa.func.now()
),
)
def downgrade() -> None:
# Tabellen löschen (umgekehrte Reihenfolge wegen Foreign Keys)
op.drop_table('alert_profiles')
op.drop_table('alert_rules')
op.drop_index('ix_alert_items_topic_decision', 'alert_items')
op.drop_index('ix_alert_items_topic_status', 'alert_items')
op.drop_table('alert_items')
op.drop_table('alert_topics')
# Enum-Typen löschen
rule_action_enum.drop(op.get_bind(), checkfirst=True)
feed_type_enum.drop(op.get_bind(), checkfirst=True)
relevance_decision_enum.drop(op.get_bind(), checkfirst=True)
alert_status_enum.drop(op.get_bind(), checkfirst=True)
alert_source_enum.drop(op.get_bind(), checkfirst=True)

View File

@@ -0,0 +1,143 @@
"""Test Registry Tables - Persistente Test-Speicherung
Revision ID: 009
Revises: 008
Create Date: 2026-02-02 10:00:00.000000
Erstellt Tabellen fuer:
- test_runs: Historie aller Test-Durchlaeufe
- test_results: Einzelne Test-Ergebnisse pro Run
- failed_tests_backlog: Persistenter Backlog fuer zu fixende Tests
- test_fixes_history: Historie aller Fix-Versuche
"""
from alembic import op
import sqlalchemy as sa
from sqlalchemy.dialects import postgresql
# revision identifiers
revision = '009'
down_revision = '008'
branch_labels = None
depends_on = None
def upgrade() -> None:
# test_runs: Jeder Testlauf wird gespeichert
op.create_table(
'test_runs',
sa.Column('id', sa.Integer(), autoincrement=True, nullable=False),
sa.Column('run_id', sa.String(50), unique=True, nullable=False),
sa.Column('service', sa.String(100), nullable=False),
sa.Column('framework', sa.String(50), nullable=False),
sa.Column('started_at', sa.DateTime(), nullable=False),
sa.Column('completed_at', sa.DateTime(), nullable=True),
sa.Column('status', sa.String(20), nullable=False), # queued, running, completed, failed
sa.Column('total_tests', sa.Integer(), default=0),
sa.Column('passed_tests', sa.Integer(), default=0),
sa.Column('failed_tests', sa.Integer(), default=0),
sa.Column('skipped_tests', sa.Integer(), default=0),
sa.Column('duration_seconds', sa.Float(), default=0),
sa.Column('git_commit', sa.String(40), nullable=True),
sa.Column('git_branch', sa.String(100), nullable=True),
sa.Column('triggered_by', sa.String(50), nullable=True), # manual, ci, schedule
sa.Column('output', sa.Text(), nullable=True),
sa.Column('created_at', sa.DateTime(), server_default=sa.func.now()),
sa.PrimaryKeyConstraint('id')
)
# Indizes fuer test_runs
op.create_index('idx_test_runs_service', 'test_runs', ['service'])
op.create_index('idx_test_runs_started_at', 'test_runs', ['started_at'])
op.create_index('idx_test_runs_run_id', 'test_runs', ['run_id'])
# test_results: Einzelne Test-Ergebnisse pro Run
op.create_table(
'test_results',
sa.Column('id', sa.Integer(), autoincrement=True, nullable=False),
sa.Column('run_id', sa.String(50), sa.ForeignKey('test_runs.run_id', ondelete='CASCADE'), nullable=False),
sa.Column('test_name', sa.String(500), nullable=False),
sa.Column('test_file', sa.String(500), nullable=True),
sa.Column('line_number', sa.Integer(), nullable=True),
sa.Column('status', sa.String(20), nullable=False), # passed, failed, skipped, error
sa.Column('duration_ms', sa.Float(), nullable=True),
sa.Column('error_message', sa.Text(), nullable=True),
sa.Column('error_type', sa.String(100), nullable=True),
sa.Column('output', sa.Text(), nullable=True),
sa.Column('created_at', sa.DateTime(), server_default=sa.func.now()),
sa.PrimaryKeyConstraint('id')
)
# Indizes fuer test_results
op.create_index('idx_test_results_run_id', 'test_results', ['run_id'])
op.create_index('idx_test_results_status', 'test_results', ['status'])
op.create_index('idx_test_results_test_name', 'test_results', ['test_name'])
# failed_tests_backlog: Persistenter Backlog fuer Fixes
op.create_table(
'failed_tests_backlog',
sa.Column('id', sa.Integer(), autoincrement=True, nullable=False),
sa.Column('test_name', sa.String(500), nullable=False),
sa.Column('test_file', sa.String(500), nullable=True),
sa.Column('service', sa.String(100), nullable=False),
sa.Column('framework', sa.String(50), nullable=True),
sa.Column('error_message', sa.Text(), nullable=True),
sa.Column('error_type', sa.String(100), nullable=True),
sa.Column('first_failed_at', sa.DateTime(), nullable=False),
sa.Column('last_failed_at', sa.DateTime(), nullable=False),
sa.Column('failure_count', sa.Integer(), default=1),
sa.Column('status', sa.String(30), default='open'), # open, in_progress, fixed, wont_fix, flaky
sa.Column('priority', sa.String(20), default='medium'), # critical, high, medium, low
sa.Column('assigned_to', sa.String(100), nullable=True),
sa.Column('fix_suggestion', sa.Text(), nullable=True),
sa.Column('notes', sa.Text(), nullable=True),
sa.Column('created_at', sa.DateTime(), server_default=sa.func.now()),
sa.Column('updated_at', sa.DateTime(), server_default=sa.func.now(), onupdate=sa.func.now()),
sa.PrimaryKeyConstraint('id'),
sa.UniqueConstraint('test_name', 'service', name='uq_backlog_test_service')
)
# Indizes fuer failed_tests_backlog
op.create_index('idx_backlog_status', 'failed_tests_backlog', ['status'])
op.create_index('idx_backlog_service', 'failed_tests_backlog', ['service'])
op.create_index('idx_backlog_priority', 'failed_tests_backlog', ['priority'])
# test_fixes_history: Historie aller Fix-Versuche
op.create_table(
'test_fixes_history',
sa.Column('id', sa.Integer(), autoincrement=True, nullable=False),
sa.Column('backlog_id', sa.Integer(), sa.ForeignKey('failed_tests_backlog.id', ondelete='CASCADE'), nullable=False),
sa.Column('fix_type', sa.String(50), nullable=True), # manual, auto_claude, auto_script
sa.Column('fix_description', sa.Text(), nullable=True),
sa.Column('commit_hash', sa.String(40), nullable=True),
sa.Column('success', sa.Boolean(), nullable=True),
sa.Column('created_at', sa.DateTime(), server_default=sa.func.now()),
sa.PrimaryKeyConstraint('id')
)
# Index fuer test_fixes_history
op.create_index('idx_fixes_backlog_id', 'test_fixes_history', ['backlog_id'])
# Aggregated Test Stats Tabelle (fuer schnelle Abfragen)
op.create_table(
'test_service_stats',
sa.Column('id', sa.Integer(), autoincrement=True, nullable=False),
sa.Column('service', sa.String(100), unique=True, nullable=False),
sa.Column('total_tests', sa.Integer(), default=0),
sa.Column('passed_tests', sa.Integer(), default=0),
sa.Column('failed_tests', sa.Integer(), default=0),
sa.Column('skipped_tests', sa.Integer(), default=0),
sa.Column('pass_rate', sa.Float(), default=0.0),
sa.Column('last_run_id', sa.String(50), nullable=True),
sa.Column('last_run_at', sa.DateTime(), nullable=True),
sa.Column('last_status', sa.String(20), nullable=True),
sa.Column('updated_at', sa.DateTime(), server_default=sa.func.now(), onupdate=sa.func.now()),
sa.PrimaryKeyConstraint('id')
)
def downgrade() -> None:
op.drop_table('test_service_stats')
op.drop_table('test_fixes_history')
op.drop_table('failed_tests_backlog')
op.drop_table('test_results')
op.drop_table('test_runs')

View File

View File

@@ -0,0 +1,14 @@
"""
BreakPilot Alerts Agent.
Automatisierte Relevanz-Filterung für Google Alerts.
Reduziert ~900 Alerts/Monat auf <120 Review/Keep.
Komponenten:
- ingestion: RSS Fetcher, Email Fetcher
- processing: Deduplication, Clustering, Relevance Scoring
- models: AlertItem, RelevanceProfile
- api: Inbox, Feedback Endpoints
"""
__version__ = "0.1.0"

View File

@@ -0,0 +1,20 @@
"""
Actions Module für Alerts Agent.
Führt Aktionen aus, die durch Regeln oder Scoring ausgelöst werden.
"""
from .base import ActionHandler, ActionResult, ActionType
from .email_action import EmailAction
from .webhook_action import WebhookAction
from .slack_action import SlackAction
from .dispatcher import ActionDispatcher
__all__ = [
"ActionHandler",
"ActionResult",
"ActionType",
"EmailAction",
"WebhookAction",
"SlackAction",
"ActionDispatcher",
]

View File

@@ -0,0 +1,123 @@
"""
Base Classes für Alert Actions.
Definiert das Interface für alle Action-Handler.
"""
from abc import ABC, abstractmethod
from dataclasses import dataclass, field
from datetime import datetime
from typing import Dict, Any, Optional, List
from enum import Enum
class ActionType(str, Enum):
"""Verfügbare Aktionstypen."""
EMAIL = "email"
WEBHOOK = "webhook"
SLACK = "slack"
TEAMS = "teams"
TAG = "tag"
ARCHIVE = "archive"
@dataclass
class ActionResult:
"""Ergebnis einer ausgeführten Aktion."""
success: bool
action_type: ActionType
message: str
timestamp: datetime = field(default_factory=datetime.utcnow)
details: Dict[str, Any] = field(default_factory=dict)
error: Optional[str] = None
def to_dict(self) -> Dict[str, Any]:
"""Konvertiert zu Dict für Logging/Speicherung."""
return {
"success": self.success,
"action_type": self.action_type.value,
"message": self.message,
"timestamp": self.timestamp.isoformat(),
"details": self.details,
"error": self.error,
}
@dataclass
class AlertContext:
"""Kontext für eine Aktion mit Alert-Informationen."""
alert_id: str
title: str
url: str
snippet: str
topic_name: str
relevance_score: Optional[float] = None
relevance_decision: Optional[str] = None
matched_rule: Optional[str] = None
tags: List[str] = field(default_factory=list)
def to_dict(self) -> Dict[str, Any]:
"""Konvertiert zu Dict für Templates."""
return {
"alert_id": self.alert_id,
"title": self.title,
"url": self.url,
"snippet": self.snippet,
"topic_name": self.topic_name,
"relevance_score": self.relevance_score,
"relevance_decision": self.relevance_decision,
"matched_rule": self.matched_rule,
"tags": self.tags,
}
class ActionHandler(ABC):
"""
Abstrakte Basisklasse für Action-Handler.
Jede Aktion (Email, Webhook, Slack) implementiert diese Schnittstelle.
"""
@property
@abstractmethod
def action_type(self) -> ActionType:
"""Gibt den Aktionstyp zurück."""
pass
@abstractmethod
async def execute(
self,
context: AlertContext,
config: Dict[str, Any],
) -> ActionResult:
"""
Führt die Aktion aus.
Args:
context: Alert-Kontext mit allen relevanten Informationen
config: Aktionsspezifische Konfiguration
Returns:
ActionResult mit Erfolgsstatus und Details
"""
pass
def validate_config(self, config: Dict[str, Any]) -> bool:
"""
Validiert die Aktions-Konfiguration.
Args:
config: Zu validierende Konfiguration
Returns:
True wenn gültig
"""
return True
def get_required_config_fields(self) -> List[str]:
"""
Gibt erforderliche Konfigurationsfelder zurück.
Returns:
Liste von Feldnamen
"""
return []

View File

@@ -0,0 +1,232 @@
"""
Action Dispatcher für Alerts Agent.
Verteilt Aktionen an die entsprechenden Handler.
"""
import logging
from typing import Dict, Any, List, Optional
from datetime import datetime
from .base import ActionHandler, ActionResult, ActionType, AlertContext
from .email_action import EmailAction
from .webhook_action import WebhookAction
from .slack_action import SlackAction
logger = logging.getLogger(__name__)
class ActionDispatcher:
"""
Zentrale Verteilung von Aktionen an Handler.
Registriert Handler für verschiedene Aktionstypen und
führt Aktionen basierend auf Regel-Konfigurationen aus.
"""
def __init__(self):
"""Initialisiert den Dispatcher mit Standard-Handlern."""
self._handlers: Dict[ActionType, ActionHandler] = {}
# Standard-Handler registrieren
self.register_handler(EmailAction())
self.register_handler(WebhookAction())
self.register_handler(SlackAction())
def register_handler(self, handler: ActionHandler) -> None:
"""
Registriert einen Action-Handler.
Args:
handler: Handler-Instanz
"""
self._handlers[handler.action_type] = handler
logger.debug(f"Registered action handler: {handler.action_type.value}")
def get_handler(self, action_type: ActionType) -> Optional[ActionHandler]:
"""
Gibt den Handler für einen Aktionstyp zurück.
Args:
action_type: Aktionstyp
Returns:
Handler oder None
"""
return self._handlers.get(action_type)
def list_handlers(self) -> List[str]:
"""Gibt Liste der registrierten Handler zurück."""
return [at.value for at in self._handlers.keys()]
async def dispatch(
self,
action_type: str,
context: AlertContext,
config: Dict[str, Any],
) -> ActionResult:
"""
Führt eine Aktion aus.
Args:
action_type: Aktionstyp als String (email, webhook, slack)
context: Alert-Kontext
config: Aktionsspezifische Konfiguration
Returns:
ActionResult
"""
try:
# ActionType aus String
at = ActionType(action_type.lower())
except ValueError:
return ActionResult(
success=False,
action_type=ActionType.WEBHOOK, # Fallback
message=f"Unbekannter Aktionstyp: {action_type}",
error="Unknown action type",
)
handler = self.get_handler(at)
if not handler:
return ActionResult(
success=False,
action_type=at,
message=f"Kein Handler für {action_type} registriert",
error="No handler registered",
)
# Konfiguration validieren
if not handler.validate_config(config):
required = handler.get_required_config_fields()
return ActionResult(
success=False,
action_type=at,
message=f"Ungültige Konfiguration für {action_type}",
error=f"Required fields: {required}",
)
# Aktion ausführen
logger.info(f"Dispatching {action_type} action for alert {context.alert_id[:8]}")
result = await handler.execute(context, config)
return result
async def dispatch_multiple(
self,
actions: List[Dict[str, Any]],
context: AlertContext,
) -> List[ActionResult]:
"""
Führt mehrere Aktionen aus.
Args:
actions: Liste von Aktionen [{type, config}, ...]
context: Alert-Kontext
Returns:
Liste von ActionResults
"""
results = []
for action in actions:
action_type = action.get("type", action.get("action_type", ""))
config = action.get("config", action.get("action_config", {}))
result = await self.dispatch(action_type, context, config)
results.append(result)
return results
# Singleton-Instanz
_dispatcher: Optional[ActionDispatcher] = None
def get_dispatcher() -> ActionDispatcher:
"""Gibt den globalen ActionDispatcher zurück."""
global _dispatcher
if _dispatcher is None:
_dispatcher = ActionDispatcher()
return _dispatcher
async def execute_action(
action_type: str,
alert_id: str,
title: str,
url: str,
snippet: str,
topic_name: str,
config: Dict[str, Any],
relevance_score: Optional[float] = None,
relevance_decision: Optional[str] = None,
matched_rule: Optional[str] = None,
tags: Optional[List[str]] = None,
) -> ActionResult:
"""
Convenience-Funktion zum Ausführen einer Aktion.
Erstellt den Kontext und ruft den Dispatcher auf.
"""
context = AlertContext(
alert_id=alert_id,
title=title,
url=url,
snippet=snippet,
topic_name=topic_name,
relevance_score=relevance_score,
relevance_decision=relevance_decision,
matched_rule=matched_rule,
tags=tags or [],
)
dispatcher = get_dispatcher()
return await dispatcher.dispatch(action_type, context, config)
async def execute_rule_actions(
alert_id: str,
title: str,
url: str,
snippet: str,
topic_name: str,
rule_action: str,
rule_config: Dict[str, Any],
rule_name: str,
) -> ActionResult:
"""
Führt die Aktion einer gematschten Regel aus.
Args:
alert_id: Alert-ID
title: Alert-Titel
url: Alert-URL
snippet: Alert-Snippet
topic_name: Topic-Name
rule_action: Aktionstyp der Regel
rule_config: Aktions-Konfiguration
rule_name: Name der Regel
Returns:
ActionResult
"""
# Nur externe Aktionen (email, webhook, slack) hier behandeln
# keep/drop/tag werden direkt von der Rule Engine behandelt
if rule_action not in ["email", "webhook", "slack"]:
return ActionResult(
success=True,
action_type=ActionType.TAG, # Dummy
message=f"Interne Aktion {rule_action} von Rule Engine behandelt",
)
return await execute_action(
action_type=rule_action,
alert_id=alert_id,
title=title,
url=url,
snippet=snippet,
topic_name=topic_name,
config=rule_config,
matched_rule=rule_name,
)

View File

@@ -0,0 +1,251 @@
"""
Email Action für Alerts Agent.
Sendet E-Mail-Benachrichtigungen für Alerts.
"""
import logging
from typing import Dict, Any, List
from datetime import datetime
from .base import ActionHandler, ActionResult, ActionType, AlertContext
logger = logging.getLogger(__name__)
# HTML-Template für Alert-E-Mails
EMAIL_TEMPLATE = """
<!DOCTYPE html>
<html>
<head>
<meta charset="utf-8">
<style>
body {{ font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, sans-serif; line-height: 1.6; color: #333; }}
.container {{ max-width: 600px; margin: 0 auto; padding: 20px; }}
.header {{ background: #4A90E2; color: white; padding: 20px; border-radius: 8px 8px 0 0; }}
.content {{ background: #f9f9f9; padding: 20px; border: 1px solid #ddd; border-top: none; }}
.alert-card {{ background: white; padding: 15px; margin: 10px 0; border-radius: 4px; box-shadow: 0 1px 3px rgba(0,0,0,0.1); }}
.alert-title {{ font-size: 16px; font-weight: 600; color: #1a1a1a; margin-bottom: 8px; }}
.alert-title a {{ color: #4A90E2; text-decoration: none; }}
.alert-snippet {{ font-size: 14px; color: #666; margin-bottom: 8px; }}
.alert-meta {{ font-size: 12px; color: #999; }}
.badge {{ display: inline-block; padding: 2px 8px; border-radius: 12px; font-size: 11px; font-weight: 500; }}
.badge-keep {{ background: #d4edda; color: #155724; }}
.badge-review {{ background: #fff3cd; color: #856404; }}
.footer {{ padding: 15px; text-align: center; font-size: 12px; color: #999; }}
</style>
</head>
<body>
<div class="container">
<div class="header">
<h2 style="margin: 0;">BreakPilot Alert</h2>
<p style="margin: 5px 0 0 0; opacity: 0.9;">{topic_name}</p>
</div>
<div class="content">
<div class="alert-card">
<div class="alert-title">
<a href="{url}">{title}</a>
</div>
<div class="alert-snippet">{snippet}</div>
<div class="alert-meta">
{decision_badge}
{score_display}
{rule_display}
</div>
</div>
</div>
<div class="footer">
Gesendet von BreakPilot Alerts Agent<br>
<a href="{dashboard_url}" style="color: #4A90E2;">Zur Inbox</a>
</div>
</div>
</body>
</html>
"""
class EmailAction(ActionHandler):
"""
E-Mail-Benachrichtigungen für Alerts.
Konfiguration:
- to: E-Mail-Adresse(n) des Empfängers
- subject_prefix: Optionaler Betreff-Prefix
- include_snippet: Snippet einbinden (default: true)
"""
@property
def action_type(self) -> ActionType:
return ActionType.EMAIL
def get_required_config_fields(self) -> List[str]:
return ["to"]
def validate_config(self, config: Dict[str, Any]) -> bool:
to = config.get("to")
if not to:
return False
if isinstance(to, str):
return "@" in to
if isinstance(to, list):
return all("@" in email for email in to)
return False
async def execute(
self,
context: AlertContext,
config: Dict[str, Any],
) -> ActionResult:
"""
Sendet eine E-Mail-Benachrichtigung.
Args:
context: Alert-Kontext
config: E-Mail-Konfiguration (to, subject_prefix, etc.)
Returns:
ActionResult
"""
try:
# Empfänger
to = config.get("to")
if isinstance(to, str):
recipients = [to]
else:
recipients = to
# Betreff
subject_prefix = config.get("subject_prefix", "[BreakPilot Alert]")
subject = f"{subject_prefix} {context.title[:50]}"
# HTML-Body generieren
html_body = self._render_email(context, config)
# E-Mail senden
sent = await self._send_email(
recipients=recipients,
subject=subject,
html_body=html_body,
)
if sent:
return ActionResult(
success=True,
action_type=self.action_type,
message=f"E-Mail an {len(recipients)} Empfänger gesendet",
details={"recipients": recipients, "subject": subject},
)
else:
return ActionResult(
success=False,
action_type=self.action_type,
message="E-Mail konnte nicht gesendet werden",
error="SMTP-Fehler",
)
except Exception as e:
logger.error(f"Email action error: {e}")
return ActionResult(
success=False,
action_type=self.action_type,
message="E-Mail-Fehler",
error=str(e),
)
def _render_email(
self,
context: AlertContext,
config: Dict[str, Any],
) -> str:
"""Rendert das E-Mail-Template."""
# Decision Badge
decision_badge = ""
if context.relevance_decision:
badge_class = "badge-keep" if context.relevance_decision == "KEEP" else "badge-review"
decision_badge = f'<span class="badge {badge_class}">{context.relevance_decision}</span>'
# Score
score_display = ""
if context.relevance_score is not None:
score_display = f' | Score: {context.relevance_score:.0%}'
# Matched Rule
rule_display = ""
if context.matched_rule:
rule_display = f' | Regel: {context.matched_rule}'
# Snippet
snippet = context.snippet[:200] if context.snippet else ""
if config.get("include_snippet", True) is False:
snippet = ""
# Dashboard URL
dashboard_url = config.get("dashboard_url", "http://localhost:8000/studio#alerts")
return EMAIL_TEMPLATE.format(
topic_name=context.topic_name,
title=context.title,
url=context.url,
snippet=snippet,
decision_badge=decision_badge,
score_display=score_display,
rule_display=rule_display,
dashboard_url=dashboard_url,
)
async def _send_email(
self,
recipients: List[str],
subject: str,
html_body: str,
) -> bool:
"""
Sendet die E-Mail über SMTP.
Verwendet aiosmtplib für async SMTP.
"""
import os
smtp_host = os.getenv("SMTP_HOST", "localhost")
smtp_port = int(os.getenv("SMTP_PORT", "587"))
smtp_user = os.getenv("SMTP_USER", "")
smtp_pass = os.getenv("SMTP_PASS", "")
smtp_from = os.getenv("SMTP_FROM", "alerts@breakpilot.de")
try:
import aiosmtplib
from email.mime.text import MIMEText
from email.mime.multipart import MIMEMultipart
# E-Mail erstellen
msg = MIMEMultipart("alternative")
msg["Subject"] = subject
msg["From"] = smtp_from
msg["To"] = ", ".join(recipients)
# HTML-Teil
html_part = MIMEText(html_body, "html", "utf-8")
msg.attach(html_part)
# Senden
await aiosmtplib.send(
msg,
hostname=smtp_host,
port=smtp_port,
username=smtp_user if smtp_user else None,
password=smtp_pass if smtp_pass else None,
start_tls=True if smtp_port == 587 else False,
)
logger.info(f"Email sent to {recipients}")
return True
except ImportError:
logger.warning("aiosmtplib not installed. Email not sent.")
# Im Dev-Modus: Erfolg simulieren
logger.info(f"[DEV] Would send email to {recipients}: {subject}")
return True
except Exception as e:
logger.error(f"SMTP error: {e}")
return False

View File

@@ -0,0 +1,198 @@
"""
Slack Action für Alerts Agent.
Sendet Slack-Nachrichten für Alerts via Incoming Webhooks.
"""
import logging
from typing import Dict, Any, List
import httpx
from .base import ActionHandler, ActionResult, ActionType, AlertContext
logger = logging.getLogger(__name__)
class SlackAction(ActionHandler):
"""
Slack-Benachrichtigungen für Alerts via Incoming Webhooks.
Konfiguration:
- webhook_url: Slack Incoming Webhook URL
- channel: Optional - Channel überschreiben
- username: Optional - Bot-Username (default: BreakPilot Alerts)
- icon_emoji: Optional - Bot-Icon (default: :bell:)
"""
@property
def action_type(self) -> ActionType:
return ActionType.SLACK
def get_required_config_fields(self) -> List[str]:
return ["webhook_url"]
def validate_config(self, config: Dict[str, Any]) -> bool:
url = config.get("webhook_url", "")
return "hooks.slack.com" in url or url.startswith("https://")
async def execute(
self,
context: AlertContext,
config: Dict[str, Any],
) -> ActionResult:
"""
Sendet eine Slack-Nachricht.
Args:
context: Alert-Kontext
config: Slack-Konfiguration (webhook_url, channel, etc.)
Returns:
ActionResult
"""
try:
webhook_url = config.get("webhook_url")
# Slack-Payload mit Block Kit
payload = self._build_slack_payload(context, config)
# Request senden
async with httpx.AsyncClient(timeout=30) as client:
response = await client.post(
webhook_url,
json=payload,
headers={"Content-Type": "application/json"},
)
# Slack gibt "ok" als Text zurück bei Erfolg
success = response.status_code == 200 and response.text == "ok"
return ActionResult(
success=success,
action_type=self.action_type,
message="Slack-Nachricht gesendet" if success else "Slack-Fehler",
details={
"status_code": response.status_code,
"response": response.text[:100],
},
error=None if success else response.text,
)
except Exception as e:
logger.error(f"Slack action error: {e}")
return ActionResult(
success=False,
action_type=self.action_type,
message="Slack-Fehler",
error=str(e),
)
def _build_slack_payload(
self,
context: AlertContext,
config: Dict[str, Any],
) -> Dict[str, Any]:
"""
Erstellt den Slack-Payload mit Block Kit.
Verwendet Rich-Formatting für bessere Darstellung.
"""
# Basis-Payload
payload = {
"username": config.get("username", "BreakPilot Alerts"),
"icon_emoji": config.get("icon_emoji", ":bell:"),
}
# Channel überschreiben wenn angegeben
if config.get("channel"):
payload["channel"] = config["channel"]
# Block Kit Blocks
blocks = [
# Header
{
"type": "header",
"text": {
"type": "plain_text",
"text": f"📰 {context.topic_name}",
"emoji": True,
}
},
# Alert-Titel als Link
{
"type": "section",
"text": {
"type": "mrkdwn",
"text": f"*<{context.url}|{context.title}>*",
}
},
]
# Snippet wenn vorhanden
if context.snippet:
snippet = context.snippet[:200]
if len(context.snippet) > 200:
snippet += "..."
blocks.append({
"type": "section",
"text": {
"type": "plain_text",
"text": snippet,
"emoji": False,
}
})
# Kontext-Felder (Score, Decision, Rule)
fields = []
if context.relevance_score is not None:
score_emoji = "🟢" if context.relevance_score >= 0.7 else "🟡" if context.relevance_score >= 0.4 else "🔴"
fields.append({
"type": "mrkdwn",
"text": f"*Score:* {score_emoji} {context.relevance_score:.0%}",
})
if context.relevance_decision:
decision_emoji = {"KEEP": "", "DROP": "", "REVIEW": "👀"}.get(context.relevance_decision, "")
fields.append({
"type": "mrkdwn",
"text": f"*Decision:* {decision_emoji} {context.relevance_decision}",
})
if context.matched_rule:
fields.append({
"type": "mrkdwn",
"text": f"*Regel:* {context.matched_rule}",
})
if context.tags:
fields.append({
"type": "mrkdwn",
"text": f"*Tags:* {', '.join(context.tags)}",
})
if fields:
blocks.append({
"type": "section",
"fields": fields[:10], # Max 10 Felder
})
# Divider
blocks.append({"type": "divider"})
# Actions (Link zur Inbox)
blocks.append({
"type": "context",
"elements": [
{
"type": "mrkdwn",
"text": f"<{config.get('dashboard_url', 'http://localhost:8000/studio#alerts')}|Zur Alerts Inbox> | Gesendet von BreakPilot",
}
]
})
payload["blocks"] = blocks
# Fallback-Text für Notifications
payload["text"] = f"Neuer Alert: {context.title}"
return payload

View File

@@ -0,0 +1,135 @@
"""
Webhook Action für Alerts Agent.
Sendet HTTP-Webhooks für Alerts.
"""
import logging
from typing import Dict, Any, List
import httpx
from .base import ActionHandler, ActionResult, ActionType, AlertContext
logger = logging.getLogger(__name__)
class WebhookAction(ActionHandler):
"""
Webhook-Benachrichtigungen für Alerts.
Konfiguration:
- url: Webhook-URL
- method: HTTP-Methode (default: POST)
- headers: Zusätzliche Headers
- include_full_context: Vollen Alert-Kontext senden (default: true)
"""
@property
def action_type(self) -> ActionType:
return ActionType.WEBHOOK
def get_required_config_fields(self) -> List[str]:
return ["url"]
def validate_config(self, config: Dict[str, Any]) -> bool:
url = config.get("url", "")
return url.startswith("http://") or url.startswith("https://")
async def execute(
self,
context: AlertContext,
config: Dict[str, Any],
) -> ActionResult:
"""
Sendet einen Webhook.
Args:
context: Alert-Kontext
config: Webhook-Konfiguration (url, method, headers)
Returns:
ActionResult
"""
try:
url = config.get("url")
method = config.get("method", "POST").upper()
headers = config.get("headers", {})
timeout = config.get("timeout", 30)
# Payload erstellen
payload = self._build_payload(context, config)
# Standard-Headers
headers.setdefault("Content-Type", "application/json")
headers.setdefault("User-Agent", "BreakPilot-AlertsAgent/1.0")
# Request senden
async with httpx.AsyncClient(timeout=timeout) as client:
if method == "POST":
response = await client.post(url, json=payload, headers=headers)
elif method == "PUT":
response = await client.put(url, json=payload, headers=headers)
else:
response = await client.get(url, params=payload, headers=headers)
# Erfolg prüfen
success = 200 <= response.status_code < 300
return ActionResult(
success=success,
action_type=self.action_type,
message=f"Webhook {method} {url} - Status {response.status_code}",
details={
"url": url,
"method": method,
"status_code": response.status_code,
"response_length": len(response.text),
},
error=None if success else f"HTTP {response.status_code}",
)
except httpx.TimeoutException:
logger.error(f"Webhook timeout: {config.get('url')}")
return ActionResult(
success=False,
action_type=self.action_type,
message="Webhook Timeout",
error="Request timed out",
)
except Exception as e:
logger.error(f"Webhook error: {e}")
return ActionResult(
success=False,
action_type=self.action_type,
message="Webhook-Fehler",
error=str(e),
)
def _build_payload(
self,
context: AlertContext,
config: Dict[str, Any],
) -> Dict[str, Any]:
"""Erstellt den Webhook-Payload."""
if config.get("include_full_context", True):
# Voller Kontext
return {
"event": "alert.matched",
"alert": context.to_dict(),
"timestamp": self._get_timestamp(),
}
else:
# Minimal-Payload
return {
"event": "alert.matched",
"alert_id": context.alert_id,
"title": context.title,
"url": context.url,
"timestamp": self._get_timestamp(),
}
def _get_timestamp(self) -> str:
"""Gibt aktuellen ISO-Timestamp zurück."""
from datetime import datetime
return datetime.utcnow().isoformat() + "Z"

View File

@@ -0,0 +1,17 @@
"""Alert Agent API."""
from fastapi import APIRouter
from .routes import router as main_router
from .topics import router as topics_router
from .rules import router as rules_router
# Erstelle einen kombinierten Router
router = APIRouter(prefix="/alerts", tags=["Alerts Agent"])
# Include alle Sub-Router
router.include_router(main_router)
router.include_router(topics_router)
router.include_router(rules_router)
__all__ = ["router"]

View File

@@ -0,0 +1,551 @@
"""
API Routes fuer Alert Digests (Wochenzusammenfassungen).
Endpoints:
- GET /digests - Liste aller Digests fuer den User
- GET /digests/{id} - Digest-Details
- GET /digests/{id}/pdf - PDF-Download
- POST /digests/generate - Digest manuell generieren
- POST /digests/{id}/send-email - Digest per E-Mail versenden
"""
import uuid
import io
from typing import Optional, List
from datetime import datetime, timedelta
from fastapi import APIRouter, Depends, HTTPException, Query, Response
from fastapi.responses import StreamingResponse
from pydantic import BaseModel, Field
from sqlalchemy.orm import Session as DBSession
from ..db.database import get_db
from ..db.models import (
AlertDigestDB, UserAlertSubscriptionDB, DigestStatusEnum
)
from ..processing.digest_generator import DigestGenerator
router = APIRouter(prefix="/digests", tags=["digests"])
# ============================================================================
# Request/Response Models
# ============================================================================
class DigestListItem(BaseModel):
"""Kurze Digest-Info fuer Liste."""
id: str
period_start: datetime
period_end: datetime
total_alerts: int
critical_count: int
urgent_count: int
status: str
created_at: datetime
class DigestDetail(BaseModel):
"""Vollstaendige Digest-Details."""
id: str
subscription_id: Optional[str]
user_id: str
period_start: datetime
period_end: datetime
summary_html: str
summary_pdf_url: Optional[str]
total_alerts: int
critical_count: int
urgent_count: int
important_count: int
review_count: int
info_count: int
status: str
sent_at: Optional[datetime]
created_at: datetime
class DigestListResponse(BaseModel):
"""Response fuer Digest-Liste."""
digests: List[DigestListItem]
total: int
class GenerateDigestRequest(BaseModel):
"""Request fuer manuelle Digest-Generierung."""
weeks_back: int = Field(default=1, ge=1, le=4, description="Wochen zurueck")
force_regenerate: bool = Field(default=False, description="Vorhandenen Digest ueberschreiben")
class GenerateDigestResponse(BaseModel):
"""Response fuer Digest-Generierung."""
status: str
digest_id: Optional[str]
message: str
class SendEmailRequest(BaseModel):
"""Request fuer E-Mail-Versand."""
email: Optional[str] = Field(default=None, description="E-Mail-Adresse (optional, sonst aus Subscription)")
class SendEmailResponse(BaseModel):
"""Response fuer E-Mail-Versand."""
status: str
sent_to: str
message: str
# ============================================================================
# Helper Functions
# ============================================================================
def get_user_id_from_request() -> str:
"""
Extrahiert User-ID aus Request.
TODO: JWT-Token auswerten, aktuell Dummy.
"""
return "demo-user"
def _digest_to_list_item(digest: AlertDigestDB) -> DigestListItem:
"""Konvertiere DB-Model zu List-Item."""
return DigestListItem(
id=digest.id,
period_start=digest.period_start,
period_end=digest.period_end,
total_alerts=digest.total_alerts or 0,
critical_count=digest.critical_count or 0,
urgent_count=digest.urgent_count or 0,
status=digest.status.value if digest.status else "pending",
created_at=digest.created_at
)
def _digest_to_detail(digest: AlertDigestDB) -> DigestDetail:
"""Konvertiere DB-Model zu Detail."""
return DigestDetail(
id=digest.id,
subscription_id=digest.subscription_id,
user_id=digest.user_id,
period_start=digest.period_start,
period_end=digest.period_end,
summary_html=digest.summary_html or "",
summary_pdf_url=digest.summary_pdf_url,
total_alerts=digest.total_alerts or 0,
critical_count=digest.critical_count or 0,
urgent_count=digest.urgent_count or 0,
important_count=digest.important_count or 0,
review_count=digest.review_count or 0,
info_count=digest.info_count or 0,
status=digest.status.value if digest.status else "pending",
sent_at=digest.sent_at,
created_at=digest.created_at
)
# ============================================================================
# Endpoints
# ============================================================================
@router.get("", response_model=DigestListResponse)
async def list_digests(
limit: int = Query(10, ge=1, le=50),
offset: int = Query(0, ge=0),
db: DBSession = Depends(get_db)
):
"""
Liste alle Digests des aktuellen Users.
Sortiert nach Erstellungsdatum (neueste zuerst).
"""
user_id = get_user_id_from_request()
query = db.query(AlertDigestDB).filter(
AlertDigestDB.user_id == user_id
).order_by(AlertDigestDB.created_at.desc())
total = query.count()
digests = query.offset(offset).limit(limit).all()
return DigestListResponse(
digests=[_digest_to_list_item(d) for d in digests],
total=total
)
@router.get("/latest", response_model=DigestDetail)
async def get_latest_digest(
db: DBSession = Depends(get_db)
):
"""
Hole den neuesten Digest des Users.
"""
user_id = get_user_id_from_request()
digest = db.query(AlertDigestDB).filter(
AlertDigestDB.user_id == user_id
).order_by(AlertDigestDB.created_at.desc()).first()
if not digest:
raise HTTPException(status_code=404, detail="Kein Digest vorhanden")
return _digest_to_detail(digest)
@router.get("/{digest_id}", response_model=DigestDetail)
async def get_digest(
digest_id: str,
db: DBSession = Depends(get_db)
):
"""
Hole Details eines spezifischen Digests.
"""
user_id = get_user_id_from_request()
digest = db.query(AlertDigestDB).filter(
AlertDigestDB.id == digest_id,
AlertDigestDB.user_id == user_id
).first()
if not digest:
raise HTTPException(status_code=404, detail="Digest nicht gefunden")
return _digest_to_detail(digest)
@router.get("/{digest_id}/pdf")
async def get_digest_pdf(
digest_id: str,
db: DBSession = Depends(get_db)
):
"""
Generiere und lade PDF-Version des Digests herunter.
"""
user_id = get_user_id_from_request()
digest = db.query(AlertDigestDB).filter(
AlertDigestDB.id == digest_id,
AlertDigestDB.user_id == user_id
).first()
if not digest:
raise HTTPException(status_code=404, detail="Digest nicht gefunden")
if not digest.summary_html:
raise HTTPException(status_code=400, detail="Digest hat keinen Inhalt")
# PDF generieren
try:
pdf_bytes = await generate_pdf_from_html(digest.summary_html)
except Exception as e:
raise HTTPException(status_code=500, detail=f"PDF-Generierung fehlgeschlagen: {str(e)}")
# Dateiname
filename = f"wochenbericht_{digest.period_start.strftime('%Y%m%d')}_{digest.period_end.strftime('%Y%m%d')}.pdf"
return StreamingResponse(
io.BytesIO(pdf_bytes),
media_type="application/pdf",
headers={
"Content-Disposition": f"attachment; filename={filename}"
}
)
@router.get("/latest/pdf")
async def get_latest_digest_pdf(
db: DBSession = Depends(get_db)
):
"""
PDF des neuesten Digests herunterladen.
"""
user_id = get_user_id_from_request()
digest = db.query(AlertDigestDB).filter(
AlertDigestDB.user_id == user_id
).order_by(AlertDigestDB.created_at.desc()).first()
if not digest:
raise HTTPException(status_code=404, detail="Kein Digest vorhanden")
if not digest.summary_html:
raise HTTPException(status_code=400, detail="Digest hat keinen Inhalt")
# PDF generieren
try:
pdf_bytes = await generate_pdf_from_html(digest.summary_html)
except Exception as e:
raise HTTPException(status_code=500, detail=f"PDF-Generierung fehlgeschlagen: {str(e)}")
filename = f"wochenbericht_{digest.period_start.strftime('%Y%m%d')}_{digest.period_end.strftime('%Y%m%d')}.pdf"
return StreamingResponse(
io.BytesIO(pdf_bytes),
media_type="application/pdf",
headers={
"Content-Disposition": f"attachment; filename={filename}"
}
)
@router.post("/generate", response_model=GenerateDigestResponse)
async def generate_digest(
request: GenerateDigestRequest = None,
db: DBSession = Depends(get_db)
):
"""
Generiere einen neuen Digest manuell.
Normalerweise werden Digests automatisch woechentlich generiert.
Diese Route erlaubt manuelle Generierung fuer Tests oder On-Demand.
"""
user_id = get_user_id_from_request()
weeks_back = request.weeks_back if request else 1
# Pruefe ob bereits ein Digest fuer diesen Zeitraum existiert
now = datetime.utcnow()
period_end = now - timedelta(days=now.weekday())
period_start = period_end - timedelta(weeks=weeks_back)
existing = db.query(AlertDigestDB).filter(
AlertDigestDB.user_id == user_id,
AlertDigestDB.period_start >= period_start - timedelta(days=1),
AlertDigestDB.period_end <= period_end + timedelta(days=1)
).first()
if existing and not (request and request.force_regenerate):
return GenerateDigestResponse(
status="exists",
digest_id=existing.id,
message="Digest fuer diesen Zeitraum existiert bereits"
)
# Generiere neuen Digest
generator = DigestGenerator(db)
try:
digest = await generator.generate_weekly_digest(user_id, weeks_back)
if digest:
return GenerateDigestResponse(
status="success",
digest_id=digest.id,
message="Digest erfolgreich generiert"
)
else:
return GenerateDigestResponse(
status="empty",
digest_id=None,
message="Keine Alerts fuer diesen Zeitraum vorhanden"
)
except Exception as e:
raise HTTPException(status_code=500, detail=f"Fehler bei Digest-Generierung: {str(e)}")
@router.post("/{digest_id}/send-email", response_model=SendEmailResponse)
async def send_digest_email(
digest_id: str,
request: SendEmailRequest = None,
db: DBSession = Depends(get_db)
):
"""
Versende Digest per E-Mail.
"""
user_id = get_user_id_from_request()
digest = db.query(AlertDigestDB).filter(
AlertDigestDB.id == digest_id,
AlertDigestDB.user_id == user_id
).first()
if not digest:
raise HTTPException(status_code=404, detail="Digest nicht gefunden")
# E-Mail-Adresse ermitteln
email = None
if request and request.email:
email = request.email
else:
# Aus Subscription holen
subscription = db.query(UserAlertSubscriptionDB).filter(
UserAlertSubscriptionDB.id == digest.subscription_id
).first()
if subscription:
email = subscription.notification_email
if not email:
raise HTTPException(status_code=400, detail="Keine E-Mail-Adresse angegeben")
# E-Mail versenden
try:
await send_digest_by_email(digest, email)
# Status aktualisieren
digest.status = DigestStatusEnum.SENT
digest.sent_at = datetime.utcnow()
db.commit()
return SendEmailResponse(
status="success",
sent_to=email,
message="E-Mail erfolgreich versendet"
)
except Exception as e:
digest.status = DigestStatusEnum.FAILED
db.commit()
raise HTTPException(status_code=500, detail=f"E-Mail-Versand fehlgeschlagen: {str(e)}")
# ============================================================================
# PDF Generation
# ============================================================================
async def generate_pdf_from_html(html_content: str) -> bytes:
"""
Generiere PDF aus HTML.
Verwendet WeasyPrint oder wkhtmltopdf als Fallback.
"""
try:
# Versuche WeasyPrint (bevorzugt)
from weasyprint import HTML
pdf_bytes = HTML(string=html_content).write_pdf()
return pdf_bytes
except ImportError:
pass
try:
# Fallback: wkhtmltopdf via pdfkit
import pdfkit
pdf_bytes = pdfkit.from_string(html_content, False)
return pdf_bytes
except ImportError:
pass
try:
# Fallback: xhtml2pdf
from xhtml2pdf import pisa
result = io.BytesIO()
pisa.CreatePDF(io.StringIO(html_content), dest=result)
return result.getvalue()
except ImportError:
pass
# Letzter Fallback: Einfache Text-Konvertierung
raise ImportError(
"Keine PDF-Bibliothek verfuegbar. "
"Installieren Sie: pip install weasyprint oder pip install pdfkit oder pip install xhtml2pdf"
)
# ============================================================================
# Email Sending
# ============================================================================
async def send_digest_by_email(digest: AlertDigestDB, recipient_email: str):
"""
Versende Digest per E-Mail.
Verwendet:
- Lokalen SMTP-Server (Postfix/Sendmail)
- SMTP-Relay (z.B. SES, Mailgun)
- SendGrid API
"""
import os
import smtplib
from email.mime.text import MIMEText
from email.mime.multipart import MIMEMultipart
from email.mime.application import MIMEApplication
# E-Mail zusammenstellen
msg = MIMEMultipart('alternative')
msg['Subject'] = f"Wochenbericht: {digest.period_start.strftime('%d.%m.%Y')} - {digest.period_end.strftime('%d.%m.%Y')}"
msg['From'] = os.getenv('SMTP_FROM', 'alerts@breakpilot.app')
msg['To'] = recipient_email
# Text-Version
text_content = f"""
BreakPilot Alerts - Wochenbericht
Zeitraum: {digest.period_start.strftime('%d.%m.%Y')} - {digest.period_end.strftime('%d.%m.%Y')}
Gesamt: {digest.total_alerts} Meldungen
Kritisch: {digest.critical_count}
Dringend: {digest.urgent_count}
Oeffnen Sie die HTML-Version fuer die vollstaendige Uebersicht.
---
Diese E-Mail wurde automatisch von BreakPilot Alerts generiert.
"""
msg.attach(MIMEText(text_content, 'plain', 'utf-8'))
# HTML-Version
if digest.summary_html:
msg.attach(MIMEText(digest.summary_html, 'html', 'utf-8'))
# PDF-Anhang (optional)
try:
pdf_bytes = await generate_pdf_from_html(digest.summary_html)
pdf_attachment = MIMEApplication(pdf_bytes, _subtype='pdf')
pdf_attachment.add_header(
'Content-Disposition', 'attachment',
filename=f"wochenbericht_{digest.period_start.strftime('%Y%m%d')}.pdf"
)
msg.attach(pdf_attachment)
except Exception:
pass # PDF-Anhang ist optional
# Senden
smtp_host = os.getenv('SMTP_HOST', 'localhost')
smtp_port = int(os.getenv('SMTP_PORT', '25'))
smtp_user = os.getenv('SMTP_USER', '')
smtp_pass = os.getenv('SMTP_PASS', '')
try:
if smtp_port == 465:
# SSL
server = smtplib.SMTP_SSL(smtp_host, smtp_port)
else:
server = smtplib.SMTP(smtp_host, smtp_port)
if smtp_port == 587:
server.starttls()
if smtp_user and smtp_pass:
server.login(smtp_user, smtp_pass)
server.send_message(msg)
server.quit()
except Exception as e:
# Fallback: SendGrid API
sendgrid_key = os.getenv('SENDGRID_API_KEY')
if sendgrid_key:
await send_via_sendgrid(msg, sendgrid_key)
else:
raise e
async def send_via_sendgrid(msg, api_key: str):
"""Fallback: SendGrid API."""
import httpx
async with httpx.AsyncClient() as client:
response = await client.post(
"https://api.sendgrid.com/v3/mail/send",
headers={
"Authorization": f"Bearer {api_key}",
"Content-Type": "application/json"
},
json={
"personalizations": [{"to": [{"email": msg['To']}]}],
"from": {"email": msg['From']},
"subject": msg['Subject'],
"content": [
{"type": "text/plain", "value": msg.get_payload(0).get_payload()},
{"type": "text/html", "value": msg.get_payload(1).get_payload() if len(msg.get_payload()) > 1 else ""}
]
}
)
if response.status_code >= 400:
raise Exception(f"SendGrid error: {response.status_code}")

View File

@@ -0,0 +1,510 @@
"""
API Routes für Alerts Agent.
Endpoints:
- POST /alerts/ingest - Manuell Alerts importieren
- POST /alerts/run - Scoring Pipeline starten
- GET /alerts/inbox - Inbox Items abrufen
- POST /alerts/feedback - Relevanz-Feedback geben
- GET /alerts/profile - User Relevance Profile
- PUT /alerts/profile - Profile aktualisieren
"""
import os
from datetime import datetime
from typing import Optional
from fastapi import APIRouter, Depends, HTTPException, Query
from pydantic import BaseModel, Field
from ..models.alert_item import AlertItem, AlertStatus
from ..models.relevance_profile import RelevanceProfile, PriorityItem
from ..processing.relevance_scorer import RelevanceDecision, RelevanceScorer
router = APIRouter(prefix="/alerts", tags=["alerts"])
# LLM Scorer Konfiguration aus Umgebungsvariablen
LLM_GATEWAY_URL = os.getenv("LLM_GATEWAY_URL", "http://localhost:8000/llm")
LLM_API_KEY = os.getenv("LLM_API_KEYS", "").split(",")[0] if os.getenv("LLM_API_KEYS") else ""
ALERTS_USE_LLM = os.getenv("ALERTS_USE_LLM", "false").lower() == "true"
# ============================================================================
# In-Memory Storage (später durch DB ersetzen)
# ============================================================================
_alerts_store: dict[str, AlertItem] = {}
_profile_store: dict[str, RelevanceProfile] = {}
# ============================================================================
# Request/Response Models
# ============================================================================
class AlertIngestRequest(BaseModel):
"""Request für manuelles Alert-Import."""
title: str = Field(..., min_length=1, max_length=500)
url: str = Field(..., min_length=1)
snippet: Optional[str] = Field(default=None, max_length=2000)
topic_label: str = Field(default="Manual Import")
published_at: Optional[datetime] = None
class AlertIngestResponse(BaseModel):
"""Response für Alert-Import."""
id: str
status: str
message: str
class AlertRunRequest(BaseModel):
"""Request für Scoring-Pipeline."""
limit: int = Field(default=50, ge=1, le=200)
skip_scored: bool = Field(default=True)
class AlertRunResponse(BaseModel):
"""Response für Scoring-Pipeline."""
processed: int
keep: int
drop: int
review: int
errors: int
duration_ms: int
class InboxItem(BaseModel):
"""Ein Item in der Inbox."""
id: str
title: str
url: str
snippet: Optional[str]
topic_label: str
published_at: Optional[datetime]
relevance_score: Optional[float]
relevance_decision: Optional[str]
relevance_summary: Optional[str]
status: str
class InboxResponse(BaseModel):
"""Response für Inbox-Abfrage."""
items: list[InboxItem]
total: int
page: int
page_size: int
class FeedbackRequest(BaseModel):
"""Request für Relevanz-Feedback."""
alert_id: str
is_relevant: bool
reason: Optional[str] = None
tags: list[str] = Field(default_factory=list)
class FeedbackResponse(BaseModel):
"""Response für Feedback."""
success: bool
message: str
profile_updated: bool
class ProfilePriorityRequest(BaseModel):
"""Priority für Profile-Update."""
label: str
weight: float = Field(default=0.5, ge=0.0, le=1.0)
keywords: list[str] = Field(default_factory=list)
description: Optional[str] = None
class ProfileUpdateRequest(BaseModel):
"""Request für Profile-Update."""
priorities: Optional[list[ProfilePriorityRequest]] = None
exclusions: Optional[list[str]] = None
policies: Optional[dict] = None
class ProfileResponse(BaseModel):
"""Response für Profile."""
id: str
priorities: list[dict]
exclusions: list[str]
policies: dict
total_scored: int
total_kept: int
total_dropped: int
accuracy_estimate: Optional[float]
# ============================================================================
# Endpoints
# ============================================================================
@router.post("/ingest", response_model=AlertIngestResponse)
async def ingest_alert(request: AlertIngestRequest):
"""
Manuell einen Alert importieren.
Nützlich für Tests oder manuelles Hinzufügen von Artikeln.
"""
alert = AlertItem(
title=request.title,
url=request.url,
snippet=request.snippet or "",
topic_label=request.topic_label,
published_at=request.published_at,
)
_alerts_store[alert.id] = alert
return AlertIngestResponse(
id=alert.id,
status="created",
message=f"Alert '{alert.title[:50]}...' importiert"
)
@router.post("/run", response_model=AlertRunResponse)
async def run_scoring_pipeline(request: AlertRunRequest):
"""
Scoring-Pipeline für neue Alerts starten.
Bewertet alle unbewerteten Alerts und klassifiziert sie
in KEEP, DROP oder REVIEW.
Wenn ALERTS_USE_LLM=true, wird das LLM Gateway für Scoring verwendet.
Sonst wird ein schnelles Keyword-basiertes Scoring durchgeführt.
"""
import time
start = time.time()
# Alle unbewerteten Alerts holen
alerts_to_score = [
a for a in _alerts_store.values()
if a.status == AlertStatus.NEW or (not request.skip_scored and a.status == AlertStatus.SCORED)
][:request.limit]
if not alerts_to_score:
return AlertRunResponse(
processed=0, keep=0, drop=0, review=0, errors=0,
duration_ms=int((time.time() - start) * 1000)
)
keep = drop = review = errors = 0
# Profil für Scoring laden
profile = _profile_store.get("default")
if not profile:
profile = RelevanceProfile.create_default_education_profile()
profile.id = "default"
_profile_store["default"] = profile
if ALERTS_USE_LLM and LLM_API_KEY:
# LLM-basiertes Scoring über Gateway
scorer = RelevanceScorer(
gateway_url=LLM_GATEWAY_URL,
api_key=LLM_API_KEY,
model="breakpilot-teacher-8b",
)
try:
results = await scorer.score_batch(alerts_to_score, profile=profile)
for result in results:
if result.error:
errors += 1
elif result.decision == RelevanceDecision.KEEP:
keep += 1
elif result.decision == RelevanceDecision.DROP:
drop += 1
else:
review += 1
finally:
await scorer.close()
else:
# Fallback: Keyword-basiertes Scoring (schnell, ohne LLM)
for alert in alerts_to_score:
title_lower = alert.title.lower()
snippet_lower = (alert.snippet or "").lower()
combined = title_lower + " " + snippet_lower
# Ausschlüsse aus Profil prüfen
if any(excl.lower() in combined for excl in profile.exclusions):
alert.relevance_score = 0.15
alert.relevance_decision = RelevanceDecision.DROP.value
drop += 1
# Prioritäten aus Profil prüfen
elif any(
p.label.lower() in combined or
any(kw.lower() in combined for kw in (p.keywords if hasattr(p, 'keywords') else []))
for p in profile.priorities
):
alert.relevance_score = 0.85
alert.relevance_decision = RelevanceDecision.KEEP.value
keep += 1
else:
alert.relevance_score = 0.55
alert.relevance_decision = RelevanceDecision.REVIEW.value
review += 1
alert.status = AlertStatus.SCORED
duration_ms = int((time.time() - start) * 1000)
return AlertRunResponse(
processed=len(alerts_to_score),
keep=keep,
drop=drop,
review=review,
errors=errors,
duration_ms=duration_ms,
)
@router.get("/inbox", response_model=InboxResponse)
async def get_inbox(
decision: Optional[str] = Query(default=None, description="Filter: KEEP, DROP, REVIEW"),
page: int = Query(default=1, ge=1),
page_size: int = Query(default=20, ge=1, le=100),
):
"""
Inbox Items abrufen.
Filtert nach Relevanz-Entscheidung. Standard zeigt KEEP und REVIEW.
"""
# Filter Alerts
alerts = list(_alerts_store.values())
if decision:
alerts = [a for a in alerts if a.relevance_decision == decision.upper()]
else:
# Standard: KEEP und REVIEW zeigen
alerts = [a for a in alerts if a.relevance_decision in ["KEEP", "REVIEW"]]
# Sortieren nach Score (absteigend)
alerts.sort(key=lambda a: a.relevance_score or 0, reverse=True)
# Pagination
total = len(alerts)
start = (page - 1) * page_size
end = start + page_size
page_alerts = alerts[start:end]
items = [
InboxItem(
id=a.id,
title=a.title,
url=a.url,
snippet=a.snippet,
topic_label=a.topic_label,
published_at=a.published_at,
relevance_score=a.relevance_score,
relevance_decision=a.relevance_decision,
relevance_summary=a.relevance_summary,
status=a.status.value,
)
for a in page_alerts
]
return InboxResponse(
items=items,
total=total,
page=page,
page_size=page_size,
)
@router.post("/feedback", response_model=FeedbackResponse)
async def submit_feedback(request: FeedbackRequest):
"""
Feedback zu einem Alert geben.
Das Feedback wird verwendet um das Relevanzprofil zu verbessern.
"""
alert = _alerts_store.get(request.alert_id)
if not alert:
raise HTTPException(status_code=404, detail="Alert nicht gefunden")
# Alert Status aktualisieren
alert.status = AlertStatus.REVIEWED
# Profile aktualisieren (Default-Profile für Demo)
profile = _profile_store.get("default")
if not profile:
profile = RelevanceProfile.create_default_education_profile()
profile.id = "default"
_profile_store["default"] = profile
profile.update_from_feedback(
alert_title=alert.title,
alert_url=alert.url,
is_relevant=request.is_relevant,
reason=request.reason or "",
)
return FeedbackResponse(
success=True,
message="Feedback gespeichert",
profile_updated=True,
)
@router.get("/profile", response_model=ProfileResponse)
async def get_profile(user_id: Optional[str] = Query(default=None)):
"""
Relevanz-Profil abrufen.
Ohne user_id wird das Default-Profil zurückgegeben.
"""
profile_id = user_id or "default"
profile = _profile_store.get(profile_id)
if not profile:
# Default-Profile erstellen
profile = RelevanceProfile.create_default_education_profile()
profile.id = profile_id
_profile_store[profile_id] = profile
return ProfileResponse(
id=profile.id,
priorities=[p.to_dict() if isinstance(p, PriorityItem) else p
for p in profile.priorities],
exclusions=profile.exclusions,
policies=profile.policies,
total_scored=profile.total_scored,
total_kept=profile.total_kept,
total_dropped=profile.total_dropped,
accuracy_estimate=profile.accuracy_estimate,
)
@router.put("/profile", response_model=ProfileResponse)
async def update_profile(
request: ProfileUpdateRequest,
user_id: Optional[str] = Query(default=None),
):
"""
Relevanz-Profil aktualisieren.
Erlaubt Anpassung von Prioritäten, Ausschlüssen und Policies.
"""
profile_id = user_id or "default"
profile = _profile_store.get(profile_id)
if not profile:
profile = RelevanceProfile()
profile.id = profile_id
# Updates anwenden
if request.priorities is not None:
profile.priorities = [
PriorityItem(
label=p.label,
weight=p.weight,
keywords=p.keywords,
description=p.description,
)
for p in request.priorities
]
if request.exclusions is not None:
profile.exclusions = request.exclusions
if request.policies is not None:
profile.policies = request.policies
profile.updated_at = datetime.utcnow()
_profile_store[profile_id] = profile
return ProfileResponse(
id=profile.id,
priorities=[p.to_dict() if isinstance(p, PriorityItem) else p
for p in profile.priorities],
exclusions=profile.exclusions,
policies=profile.policies,
total_scored=profile.total_scored,
total_kept=profile.total_kept,
total_dropped=profile.total_dropped,
accuracy_estimate=profile.accuracy_estimate,
)
@router.get("/stats")
async def get_stats():
"""
Statistiken über Alerts und Scoring.
Gibt Statistiken im Format zurück, das das Frontend erwartet:
- total_alerts, new_alerts, kept_alerts, review_alerts, dropped_alerts
- total_topics, active_topics, total_rules
"""
alerts = list(_alerts_store.values())
total = len(alerts)
# Zähle nach Status und Decision
new_alerts = sum(1 for a in alerts if a.status == AlertStatus.NEW)
kept_alerts = sum(1 for a in alerts if a.relevance_decision == "KEEP")
review_alerts = sum(1 for a in alerts if a.relevance_decision == "REVIEW")
dropped_alerts = sum(1 for a in alerts if a.relevance_decision == "DROP")
# Topics und Rules (In-Memory hat diese nicht, aber wir geben 0 zurück)
# Bei DB-Implementierung würden wir hier die Repositories nutzen
total_topics = 0
active_topics = 0
total_rules = 0
# Versuche DB-Statistiken zu laden wenn verfügbar
try:
from alerts_agent.db import get_db
from alerts_agent.db.repository import TopicRepository, RuleRepository
from contextlib import contextmanager
# Versuche eine DB-Session zu bekommen
db_gen = get_db()
db = next(db_gen, None)
if db:
try:
topic_repo = TopicRepository(db)
rule_repo = RuleRepository(db)
all_topics = topic_repo.get_all()
total_topics = len(all_topics)
active_topics = len([t for t in all_topics if t.is_active])
all_rules = rule_repo.get_all()
total_rules = len(all_rules)
finally:
try:
next(db_gen, None)
except StopIteration:
pass
except Exception:
# DB nicht verfügbar, nutze In-Memory Defaults
pass
# Berechne Durchschnittsscore
scored_alerts = [a for a in alerts if a.relevance_score is not None]
avg_score = sum(a.relevance_score for a in scored_alerts) / len(scored_alerts) if scored_alerts else 0.0
return {
# Frontend-kompatibles Format
"total_alerts": total,
"new_alerts": new_alerts,
"kept_alerts": kept_alerts,
"review_alerts": review_alerts,
"dropped_alerts": dropped_alerts,
"total_topics": total_topics,
"active_topics": active_topics,
"total_rules": total_rules,
"avg_score": avg_score,
# Zusätzliche Details (Abwärtskompatibilität)
"by_status": {
"new": new_alerts,
"scored": sum(1 for a in alerts if a.status == AlertStatus.SCORED),
"reviewed": sum(1 for a in alerts if a.status == AlertStatus.REVIEWED),
},
"by_decision": {
"KEEP": kept_alerts,
"REVIEW": review_alerts,
"DROP": dropped_alerts,
},
}

View File

@@ -0,0 +1,473 @@
"""
Rules API Routes für Alerts Agent.
CRUD-Operationen für Alert-Regeln.
"""
from typing import List, Optional, Dict, Any
from datetime import datetime
from fastapi import APIRouter, Depends, HTTPException
from pydantic import BaseModel, Field
from sqlalchemy.orm import Session as DBSession
from alerts_agent.db import get_db
from alerts_agent.db.repository import RuleRepository
from alerts_agent.db.models import RuleActionEnum
router = APIRouter(prefix="/rules", tags=["alerts"])
# =============================================================================
# PYDANTIC MODELS
# =============================================================================
class RuleConditionModel(BaseModel):
"""Model für eine Regel-Bedingung."""
field: str = Field(..., description="Feld zum Prüfen (title, snippet, url, source, relevance_score)")
operator: str = Field(..., alias="op", description="Operator (contains, not_contains, equals, regex, gt, lt, in)")
value: Any = Field(..., description="Vergleichswert (String, Zahl, oder Liste)")
class Config:
populate_by_name = True
class RuleCreate(BaseModel):
"""Request-Model für Regel-Erstellung."""
name: str = Field(..., min_length=1, max_length=255)
description: str = Field(default="", max_length=2000)
conditions: List[RuleConditionModel] = Field(default_factory=list)
action_type: str = Field(default="keep", description="Aktion: keep, drop, tag, email, webhook, slack")
action_config: Dict[str, Any] = Field(default_factory=dict)
topic_id: Optional[str] = Field(default=None, description="Optional: Nur für bestimmtes Topic")
priority: int = Field(default=0, ge=0, le=1000, description="Priorität (höher = wird zuerst evaluiert)")
is_active: bool = Field(default=True)
class RuleUpdate(BaseModel):
"""Request-Model für Regel-Update."""
name: Optional[str] = Field(default=None, min_length=1, max_length=255)
description: Optional[str] = Field(default=None, max_length=2000)
conditions: Optional[List[RuleConditionModel]] = None
action_type: Optional[str] = None
action_config: Optional[Dict[str, Any]] = None
topic_id: Optional[str] = None
priority: Optional[int] = Field(default=None, ge=0, le=1000)
is_active: Optional[bool] = None
class RuleResponse(BaseModel):
"""Response-Model für Regel."""
id: str
name: str
description: str
conditions: List[Dict[str, Any]]
action_type: str
action_config: Dict[str, Any]
topic_id: Optional[str]
priority: int
is_active: bool
match_count: int
last_matched_at: Optional[datetime]
created_at: datetime
updated_at: datetime
class Config:
from_attributes = True
class RuleListResponse(BaseModel):
"""Response-Model für Regel-Liste."""
rules: List[RuleResponse]
total: int
class RuleTestRequest(BaseModel):
"""Request-Model für Regel-Test."""
title: str = Field(default="Test Title")
snippet: str = Field(default="Test snippet content")
url: str = Field(default="https://example.com/test")
source: str = Field(default="rss_feed")
relevance_score: Optional[float] = Field(default=None)
class RuleTestResponse(BaseModel):
"""Response-Model für Regel-Test."""
rule_id: str
rule_name: str
matched: bool
action: str
conditions_met: List[str]
# =============================================================================
# API ENDPOINTS
# =============================================================================
@router.post("", response_model=RuleResponse, status_code=201)
async def create_rule(
rule: RuleCreate,
db: DBSession = Depends(get_db),
) -> RuleResponse:
"""
Erstellt eine neue Regel.
Regeln werden nach Priorität evaluiert. Höhere Priorität = wird zuerst geprüft.
"""
repo = RuleRepository(db)
# Conditions zu Dict konvertieren
conditions = [
{"field": c.field, "op": c.operator, "value": c.value}
for c in rule.conditions
]
created = repo.create(
name=rule.name,
description=rule.description,
conditions=conditions,
action_type=rule.action_type,
action_config=rule.action_config,
topic_id=rule.topic_id,
priority=rule.priority,
)
if not rule.is_active:
repo.update(created.id, is_active=False)
created = repo.get_by_id(created.id)
return _to_rule_response(created)
@router.get("", response_model=RuleListResponse)
async def list_rules(
is_active: Optional[bool] = None,
topic_id: Optional[str] = None,
db: DBSession = Depends(get_db),
) -> RuleListResponse:
"""
Listet alle Regeln auf.
Regeln sind nach Priorität sortiert (höchste zuerst).
"""
repo = RuleRepository(db)
if is_active is True:
rules = repo.get_active()
else:
rules = repo.get_all()
# Topic-Filter
if topic_id:
rules = [r for r in rules if r.topic_id == topic_id or r.topic_id is None]
return RuleListResponse(
rules=[_to_rule_response(r) for r in rules],
total=len(rules),
)
@router.get("/{rule_id}", response_model=RuleResponse)
async def get_rule(
rule_id: str,
db: DBSession = Depends(get_db),
) -> RuleResponse:
"""
Ruft eine Regel nach ID ab.
"""
repo = RuleRepository(db)
rule = repo.get_by_id(rule_id)
if not rule:
raise HTTPException(status_code=404, detail="Regel nicht gefunden")
return _to_rule_response(rule)
@router.put("/{rule_id}", response_model=RuleResponse)
async def update_rule(
rule_id: str,
updates: RuleUpdate,
db: DBSession = Depends(get_db),
) -> RuleResponse:
"""
Aktualisiert eine Regel.
"""
repo = RuleRepository(db)
# Nur übergebene Werte updaten
update_dict = {}
if updates.name is not None:
update_dict["name"] = updates.name
if updates.description is not None:
update_dict["description"] = updates.description
if updates.conditions is not None:
update_dict["conditions"] = [
{"field": c.field, "op": c.operator, "value": c.value}
for c in updates.conditions
]
if updates.action_type is not None:
update_dict["action_type"] = updates.action_type
if updates.action_config is not None:
update_dict["action_config"] = updates.action_config
if updates.topic_id is not None:
update_dict["topic_id"] = updates.topic_id
if updates.priority is not None:
update_dict["priority"] = updates.priority
if updates.is_active is not None:
update_dict["is_active"] = updates.is_active
if not update_dict:
raise HTTPException(status_code=400, detail="Keine Updates angegeben")
updated = repo.update(rule_id, **update_dict)
if not updated:
raise HTTPException(status_code=404, detail="Regel nicht gefunden")
return _to_rule_response(updated)
@router.delete("/{rule_id}", status_code=204)
async def delete_rule(
rule_id: str,
db: DBSession = Depends(get_db),
):
"""
Löscht eine Regel.
"""
repo = RuleRepository(db)
success = repo.delete(rule_id)
if not success:
raise HTTPException(status_code=404, detail="Regel nicht gefunden")
return None
@router.post("/{rule_id}/activate", response_model=RuleResponse)
async def activate_rule(
rule_id: str,
db: DBSession = Depends(get_db),
) -> RuleResponse:
"""
Aktiviert eine Regel.
"""
repo = RuleRepository(db)
updated = repo.update(rule_id, is_active=True)
if not updated:
raise HTTPException(status_code=404, detail="Regel nicht gefunden")
return _to_rule_response(updated)
@router.post("/{rule_id}/deactivate", response_model=RuleResponse)
async def deactivate_rule(
rule_id: str,
db: DBSession = Depends(get_db),
) -> RuleResponse:
"""
Deaktiviert eine Regel.
"""
repo = RuleRepository(db)
updated = repo.update(rule_id, is_active=False)
if not updated:
raise HTTPException(status_code=404, detail="Regel nicht gefunden")
return _to_rule_response(updated)
@router.post("/{rule_id}/test", response_model=RuleTestResponse)
async def test_rule(
rule_id: str,
test_data: RuleTestRequest,
db: DBSession = Depends(get_db),
) -> RuleTestResponse:
"""
Testet eine Regel gegen Testdaten.
Nützlich um Regeln vor der Aktivierung zu testen.
"""
from alerts_agent.processing.rule_engine import evaluate_rule
from alerts_agent.db.models import AlertItemDB, AlertSourceEnum, AlertStatusEnum
repo = RuleRepository(db)
rule = repo.get_by_id(rule_id)
if not rule:
raise HTTPException(status_code=404, detail="Regel nicht gefunden")
# Mock-Alert für Test erstellen
mock_alert = AlertItemDB(
id="test-alert",
topic_id="test-topic",
title=test_data.title,
snippet=test_data.snippet,
url=test_data.url,
url_hash="test-hash",
source=AlertSourceEnum(test_data.source) if test_data.source else AlertSourceEnum.RSS_FEED,
status=AlertStatusEnum.NEW,
relevance_score=test_data.relevance_score,
)
# Regel evaluieren
match = evaluate_rule(mock_alert, rule)
return RuleTestResponse(
rule_id=match.rule_id,
rule_name=match.rule_name,
matched=match.matched,
action=match.action.value,
conditions_met=match.conditions_met,
)
@router.post("/test-all", response_model=List[RuleTestResponse])
async def test_all_rules(
test_data: RuleTestRequest,
db: DBSession = Depends(get_db),
) -> List[RuleTestResponse]:
"""
Testet alle aktiven Regeln gegen Testdaten.
Zeigt welche Regeln matchen würden.
"""
from alerts_agent.processing.rule_engine import evaluate_rules_for_alert, evaluate_rule
from alerts_agent.db.models import AlertItemDB, AlertSourceEnum, AlertStatusEnum
repo = RuleRepository(db)
rules = repo.get_active()
# Mock-Alert für Test erstellen
mock_alert = AlertItemDB(
id="test-alert",
topic_id="test-topic",
title=test_data.title,
snippet=test_data.snippet,
url=test_data.url,
url_hash="test-hash",
source=AlertSourceEnum(test_data.source) if test_data.source else AlertSourceEnum.RSS_FEED,
status=AlertStatusEnum.NEW,
relevance_score=test_data.relevance_score,
)
results = []
for rule in rules:
match = evaluate_rule(mock_alert, rule)
results.append(RuleTestResponse(
rule_id=match.rule_id,
rule_name=match.rule_name,
matched=match.matched,
action=match.action.value,
conditions_met=match.conditions_met,
))
return results
# =============================================================================
# HELPER FUNCTIONS
# =============================================================================
def _to_rule_response(rule) -> RuleResponse:
"""Konvertiert ein Rule-DB-Objekt zu RuleResponse."""
return RuleResponse(
id=rule.id,
name=rule.name,
description=rule.description or "",
conditions=rule.conditions or [],
action_type=rule.action_type.value if rule.action_type else "keep",
action_config=rule.action_config or {},
topic_id=rule.topic_id,
priority=rule.priority,
is_active=rule.is_active,
match_count=rule.match_count,
last_matched_at=rule.last_matched_at,
created_at=rule.created_at,
updated_at=rule.updated_at,
)
# =============================================================================
# PRESET RULES
# =============================================================================
PRESET_RULES = {
"exclude_jobs": {
"name": "Stellenanzeigen ausschließen",
"description": "Filtert Stellenanzeigen und Job-Postings",
"conditions": [
{"field": "title", "op": "in", "value": ["Stellenanzeige", "Job", "Karriere", "Praktikum", "Werkstudent", "Ausbildung", "Referendariat"]}
],
"action_type": "drop",
"priority": 100,
},
"exclude_ads": {
"name": "Werbung ausschließen",
"description": "Filtert Werbung und Pressemitteilungen",
"conditions": [
{"field": "title", "op": "in", "value": ["Werbung", "Anzeige", "Pressemitteilung", "PR:", "Sponsored"]}
],
"action_type": "drop",
"priority": 100,
},
"keep_inklusion": {
"name": "Inklusion behalten",
"description": "Behält Artikel zum Thema Inklusion",
"conditions": [
{"field": "title", "op": "in", "value": ["Inklusion", "inklusiv", "Förderbedarf", "Förderschule", "Nachteilsausgleich"]}
],
"action_type": "keep",
"priority": 50,
},
"keep_datenschutz": {
"name": "Datenschutz behalten",
"description": "Behält Artikel zum Thema Datenschutz in Schulen",
"conditions": [
{"field": "title", "op": "in", "value": ["DSGVO", "Datenschutz", "Schülerfotos", "personenbezogen"]}
],
"action_type": "keep",
"priority": 50,
},
}
@router.get("/presets/list")
async def list_preset_rules() -> Dict[str, Any]:
"""
Listet verfügbare Regel-Vorlagen auf.
"""
return {
"presets": [
{"id": key, **value}
for key, value in PRESET_RULES.items()
]
}
@router.post("/presets/{preset_id}/apply", response_model=RuleResponse)
async def apply_preset_rule(
preset_id: str,
db: DBSession = Depends(get_db),
) -> RuleResponse:
"""
Wendet eine Regel-Vorlage an (erstellt die Regel).
"""
if preset_id not in PRESET_RULES:
raise HTTPException(status_code=404, detail="Preset nicht gefunden")
preset = PRESET_RULES[preset_id]
repo = RuleRepository(db)
created = repo.create(
name=preset["name"],
description=preset.get("description", ""),
conditions=preset["conditions"],
action_type=preset["action_type"],
priority=preset.get("priority", 0),
)
return _to_rule_response(created)

View File

@@ -0,0 +1,421 @@
"""
API Routes für User Alert Subscriptions.
Verwaltet Nutzer-Abonnements für Templates und Digest-Einstellungen.
Endpoints:
- POST /subscriptions - Neue Subscription erstellen
- GET /subscriptions - User-Subscriptions auflisten
- GET /subscriptions/{id} - Subscription-Details
- PUT /subscriptions/{id} - Subscription aktualisieren
- DELETE /subscriptions/{id} - Subscription deaktivieren
- POST /subscriptions/{id}/activate-template - Template aktivieren
"""
import uuid
from typing import Optional, List
from datetime import datetime
from fastapi import APIRouter, Depends, HTTPException, Query
from pydantic import BaseModel, Field
from sqlalchemy.orm import Session as DBSession
from ..db.database import get_db
from ..db.models import (
UserAlertSubscriptionDB, AlertTemplateDB, AlertProfileDB,
AlertTopicDB, AlertRuleDB, AlertModeEnum, UserRoleEnum
)
router = APIRouter(prefix="/subscriptions", tags=["subscriptions"])
# ============================================================================
# Request/Response Models
# ============================================================================
class SubscriptionCreate(BaseModel):
"""Request für neue Subscription."""
mode: str = Field(default="guided", description="'guided' oder 'expert'")
user_role: Optional[str] = Field(default=None, description="lehrkraft, schulleitung, it_beauftragte")
template_ids: List[str] = Field(default=[], description="Ausgewählte Template-IDs (max. 3)")
notification_email: Optional[str] = Field(default=None)
digest_enabled: bool = Field(default=True)
digest_frequency: str = Field(default="weekly")
class SubscriptionUpdate(BaseModel):
"""Request für Subscription-Update."""
template_ids: Optional[List[str]] = None
notification_email: Optional[str] = None
digest_enabled: Optional[bool] = None
digest_frequency: Optional[str] = None
is_active: Optional[bool] = None
class SubscriptionResponse(BaseModel):
"""Response für eine Subscription."""
id: str
user_id: str
mode: str
user_role: Optional[str]
selected_template_ids: List[str]
template_names: List[str]
notification_email: Optional[str]
digest_enabled: bool
digest_frequency: str
wizard_completed: bool
is_active: bool
created_at: datetime
updated_at: datetime
class SubscriptionListResponse(BaseModel):
"""Response für Subscription-Liste."""
subscriptions: List[SubscriptionResponse]
total: int
class ActivateTemplateRequest(BaseModel):
"""Request für Template-Aktivierung."""
create_topics: bool = Field(default=True, description="Topics aus Template-Config erstellen")
create_rules: bool = Field(default=True, description="Rules aus Template-Config erstellen")
class ActivateTemplateResponse(BaseModel):
"""Response für Template-Aktivierung."""
status: str
topics_created: int
rules_created: int
profile_updated: bool
message: str
# ============================================================================
# Helper Functions
# ============================================================================
def get_user_id_from_request() -> str:
"""
Extrahiert User-ID aus Request.
TODO: JWT-Token auswerten, aktuell Dummy.
"""
return "demo-user"
def _get_template_names(db: DBSession, template_ids: List[str]) -> List[str]:
"""Hole Template-Namen für IDs."""
if not template_ids:
return []
templates = db.query(AlertTemplateDB).filter(
AlertTemplateDB.id.in_(template_ids)
).all()
return [t.name for t in templates]
def _subscription_to_response(sub: UserAlertSubscriptionDB, db: DBSession) -> SubscriptionResponse:
"""Konvertiere DB-Model zu Response."""
template_ids = sub.selected_template_ids or []
return SubscriptionResponse(
id=sub.id,
user_id=sub.user_id,
mode=sub.mode.value if sub.mode else "guided",
user_role=sub.user_role.value if sub.user_role else None,
selected_template_ids=template_ids,
template_names=_get_template_names(db, template_ids),
notification_email=sub.notification_email,
digest_enabled=sub.digest_enabled if sub.digest_enabled is not None else True,
digest_frequency=sub.digest_frequency or "weekly",
wizard_completed=sub.wizard_completed if sub.wizard_completed is not None else False,
is_active=sub.is_active if sub.is_active is not None else True,
created_at=sub.created_at,
updated_at=sub.updated_at,
)
# ============================================================================
# Endpoints
# ============================================================================
@router.post("", response_model=SubscriptionResponse)
async def create_subscription(
request: SubscriptionCreate,
db: DBSession = Depends(get_db)
):
"""
Erstelle eine neue Alert-Subscription.
Im Guided Mode werden 1-3 Templates ausgewählt.
Im Expert Mode wird ein eigenes Profil konfiguriert.
"""
user_id = get_user_id_from_request()
# Validiere Modus
try:
mode = AlertModeEnum(request.mode)
except ValueError:
raise HTTPException(status_code=400, detail="Ungültiger Modus. Erlaubt: 'guided', 'expert'")
# Validiere Rolle
user_role = None
if request.user_role:
try:
user_role = UserRoleEnum(request.user_role)
except ValueError:
raise HTTPException(
status_code=400,
detail="Ungültige Rolle. Erlaubt: 'lehrkraft', 'schulleitung', 'it_beauftragte'"
)
# Validiere Template-IDs
if request.template_ids:
if len(request.template_ids) > 3:
raise HTTPException(status_code=400, detail="Maximal 3 Templates erlaubt")
# Prüfe ob Templates existieren
existing = db.query(AlertTemplateDB).filter(
AlertTemplateDB.id.in_(request.template_ids)
).count()
if existing != len(request.template_ids):
raise HTTPException(status_code=400, detail="Eine oder mehrere Template-IDs sind ungültig")
# Erstelle Subscription
subscription = UserAlertSubscriptionDB(
id=str(uuid.uuid4()),
user_id=user_id,
mode=mode,
user_role=user_role,
selected_template_ids=request.template_ids,
notification_email=request.notification_email,
digest_enabled=request.digest_enabled,
digest_frequency=request.digest_frequency,
wizard_completed=len(request.template_ids) > 0, # Abgeschlossen wenn Templates gewählt
is_active=True,
)
db.add(subscription)
db.commit()
db.refresh(subscription)
return _subscription_to_response(subscription, db)
@router.get("", response_model=SubscriptionListResponse)
async def list_subscriptions(
active_only: bool = Query(True, description="Nur aktive Subscriptions"),
db: DBSession = Depends(get_db)
):
"""Liste alle Subscriptions des aktuellen Users."""
user_id = get_user_id_from_request()
query = db.query(UserAlertSubscriptionDB).filter(
UserAlertSubscriptionDB.user_id == user_id
)
if active_only:
query = query.filter(UserAlertSubscriptionDB.is_active == True)
subscriptions = query.order_by(UserAlertSubscriptionDB.created_at.desc()).all()
return SubscriptionListResponse(
subscriptions=[_subscription_to_response(s, db) for s in subscriptions],
total=len(subscriptions)
)
@router.get("/{subscription_id}", response_model=SubscriptionResponse)
async def get_subscription(
subscription_id: str,
db: DBSession = Depends(get_db)
):
"""Hole Details einer Subscription."""
user_id = get_user_id_from_request()
subscription = db.query(UserAlertSubscriptionDB).filter(
UserAlertSubscriptionDB.id == subscription_id,
UserAlertSubscriptionDB.user_id == user_id
).first()
if not subscription:
raise HTTPException(status_code=404, detail="Subscription nicht gefunden")
return _subscription_to_response(subscription, db)
@router.put("/{subscription_id}", response_model=SubscriptionResponse)
async def update_subscription(
subscription_id: str,
request: SubscriptionUpdate,
db: DBSession = Depends(get_db)
):
"""Aktualisiere eine Subscription."""
user_id = get_user_id_from_request()
subscription = db.query(UserAlertSubscriptionDB).filter(
UserAlertSubscriptionDB.id == subscription_id,
UserAlertSubscriptionDB.user_id == user_id
).first()
if not subscription:
raise HTTPException(status_code=404, detail="Subscription nicht gefunden")
# Update Felder
if request.template_ids is not None:
if len(request.template_ids) > 3:
raise HTTPException(status_code=400, detail="Maximal 3 Templates erlaubt")
subscription.selected_template_ids = request.template_ids
if request.notification_email is not None:
subscription.notification_email = request.notification_email
if request.digest_enabled is not None:
subscription.digest_enabled = request.digest_enabled
if request.digest_frequency is not None:
subscription.digest_frequency = request.digest_frequency
if request.is_active is not None:
subscription.is_active = request.is_active
subscription.updated_at = datetime.utcnow()
db.commit()
db.refresh(subscription)
return _subscription_to_response(subscription, db)
@router.delete("/{subscription_id}")
async def deactivate_subscription(
subscription_id: str,
db: DBSession = Depends(get_db)
):
"""Deaktiviere eine Subscription (Soft-Delete)."""
user_id = get_user_id_from_request()
subscription = db.query(UserAlertSubscriptionDB).filter(
UserAlertSubscriptionDB.id == subscription_id,
UserAlertSubscriptionDB.user_id == user_id
).first()
if not subscription:
raise HTTPException(status_code=404, detail="Subscription nicht gefunden")
subscription.is_active = False
subscription.updated_at = datetime.utcnow()
db.commit()
return {"status": "success", "message": "Subscription deaktiviert"}
@router.post("/{subscription_id}/activate-template", response_model=ActivateTemplateResponse)
async def activate_template(
subscription_id: str,
request: ActivateTemplateRequest = None,
db: DBSession = Depends(get_db)
):
"""
Aktiviere die gewählten Templates für eine Subscription.
Erstellt:
- Topics aus Template.topics_config (RSS-Feeds)
- Rules aus Template.rules_config (Filter-Regeln)
- Aktualisiert das User-Profil mit Template.profile_config
"""
user_id = get_user_id_from_request()
subscription = db.query(UserAlertSubscriptionDB).filter(
UserAlertSubscriptionDB.id == subscription_id,
UserAlertSubscriptionDB.user_id == user_id
).first()
if not subscription:
raise HTTPException(status_code=404, detail="Subscription nicht gefunden")
if not subscription.selected_template_ids:
raise HTTPException(status_code=400, detail="Keine Templates ausgewählt")
# Lade Templates
templates = db.query(AlertTemplateDB).filter(
AlertTemplateDB.id.in_(subscription.selected_template_ids)
).all()
topics_created = 0
rules_created = 0
profile_updated = False
for template in templates:
# Topics erstellen
if request is None or request.create_topics:
for topic_config in (template.topics_config or []):
topic = AlertTopicDB(
id=str(uuid.uuid4()),
user_id=user_id,
name=topic_config.get("name", f"Topic from {template.name}"),
description=f"Automatisch erstellt aus Template: {template.name}",
is_active=True,
fetch_interval_minutes=60,
)
db.add(topic)
topics_created += 1
# Rules erstellen
if request is None or request.create_rules:
for rule_config in (template.rules_config or []):
rule = AlertRuleDB(
id=str(uuid.uuid4()),
user_id=user_id,
name=rule_config.get("name", f"Rule from {template.name}"),
description=f"Automatisch erstellt aus Template: {template.name}",
conditions=rule_config.get("conditions", []),
action_type=rule_config.get("action_type", "keep"),
action_config=rule_config.get("action_config", {}),
priority=rule_config.get("priority", 50),
is_active=True,
)
db.add(rule)
rules_created += 1
# Profil aktualisieren
if template.profile_config:
profile = db.query(AlertProfileDB).filter(
AlertProfileDB.user_id == user_id
).first()
if not profile:
profile = AlertProfileDB(
id=str(uuid.uuid4()),
user_id=user_id,
name=f"Profil für {user_id}",
)
db.add(profile)
# Merge priorities
existing_priorities = profile.priorities or []
new_priorities = template.profile_config.get("priorities", [])
for p in new_priorities:
if p not in existing_priorities:
existing_priorities.append(p)
profile.priorities = existing_priorities
# Merge exclusions
existing_exclusions = profile.exclusions or []
new_exclusions = template.profile_config.get("exclusions", [])
for e in new_exclusions:
if e not in existing_exclusions:
existing_exclusions.append(e)
profile.exclusions = existing_exclusions
profile_updated = True
# Markiere Wizard als abgeschlossen
subscription.wizard_completed = True
subscription.updated_at = datetime.utcnow()
db.commit()
return ActivateTemplateResponse(
status="success",
topics_created=topics_created,
rules_created=rules_created,
profile_updated=profile_updated,
message=f"Templates aktiviert: {topics_created} Topics, {rules_created} Rules erstellt."
)

View File

@@ -0,0 +1,410 @@
"""
API Routes für Alert-Templates (Playbooks).
Endpoints für Guided Mode:
- GET /templates - Liste aller verfügbaren Templates
- GET /templates/{template_id} - Template-Details
- POST /templates/{template_id}/preview - Vorschau generieren
- GET /templates/by-role/{role} - Templates für eine Rolle
"""
from typing import Optional, List
from fastapi import APIRouter, Depends, HTTPException, Query
from pydantic import BaseModel, Field
from sqlalchemy.orm import Session as DBSession
from ..db.database import get_db
from ..db.models import AlertTemplateDB, UserRoleEnum
router = APIRouter(prefix="/templates", tags=["templates"])
# ============================================================================
# Request/Response Models
# ============================================================================
class TemplateListItem(BaseModel):
"""Kurzinfo für Template-Liste."""
id: str
slug: str
name: str
description: str
icon: str
category: str
target_roles: List[str]
is_premium: bool
max_cards_per_day: int
sort_order: int
class Config:
from_attributes = True
class TemplateDetail(BaseModel):
"""Vollständige Template-Details."""
id: str
slug: str
name: str
description: str
icon: str
category: str
target_roles: List[str]
topics_config: List[dict]
rules_config: List[dict]
profile_config: dict
importance_config: dict
max_cards_per_day: int
digest_enabled: bool
digest_day: str
is_premium: bool
is_active: bool
class Config:
from_attributes = True
class TemplateListResponse(BaseModel):
"""Response für Template-Liste."""
templates: List[TemplateListItem]
total: int
class PreviewRequest(BaseModel):
"""Request für Template-Vorschau."""
sample_count: int = Field(default=3, ge=1, le=5)
class PreviewItem(BaseModel):
"""Ein Vorschau-Item."""
title: str
snippet: str
importance_level: str
why_relevant: str
source_name: str
class PreviewResponse(BaseModel):
"""Response für Template-Vorschau."""
template_name: str
sample_items: List[PreviewItem]
estimated_daily_count: str
message: str
# ============================================================================
# Endpoints
# ============================================================================
@router.get("", response_model=TemplateListResponse)
async def list_templates(
category: Optional[str] = Query(None, description="Filter nach Kategorie"),
role: Optional[str] = Query(None, description="Filter nach Zielrolle"),
include_premium: bool = Query(True, description="Premium-Templates einschließen"),
db: DBSession = Depends(get_db)
):
"""
Liste alle verfügbaren Alert-Templates.
Templates sind vorkonfigurierte Playbooks für bestimmte Themen
(Förderprogramme, Datenschutz, IT-Security, etc.).
"""
query = db.query(AlertTemplateDB).filter(AlertTemplateDB.is_active == True)
if category:
query = query.filter(AlertTemplateDB.category == category)
if not include_premium:
query = query.filter(AlertTemplateDB.is_premium == False)
templates = query.order_by(AlertTemplateDB.sort_order).all()
# Filter nach Rolle (JSON-Feld)
if role:
templates = [t for t in templates if role in (t.target_roles or [])]
return TemplateListResponse(
templates=[
TemplateListItem(
id=t.id,
slug=t.slug,
name=t.name,
description=t.description,
icon=t.icon or "",
category=t.category or "",
target_roles=t.target_roles or [],
is_premium=t.is_premium or False,
max_cards_per_day=t.max_cards_per_day or 10,
sort_order=t.sort_order or 0,
)
for t in templates
],
total=len(templates)
)
@router.get("/by-role/{role}", response_model=TemplateListResponse)
async def get_templates_by_role(
role: str,
db: DBSession = Depends(get_db)
):
"""
Empfohlene Templates für eine bestimmte Rolle.
Rollen:
- lehrkraft: Fokus auf Unterricht, Fortbildungen, Wettbewerbe
- schulleitung: Fokus auf Administration, Fördermittel, Recht
- it_beauftragte: Fokus auf IT-Security, Datenschutz
"""
# Validiere Rolle
valid_roles = ["lehrkraft", "schulleitung", "it_beauftragte"]
if role not in valid_roles:
raise HTTPException(
status_code=400,
detail=f"Ungültige Rolle. Erlaubt: {', '.join(valid_roles)}"
)
templates = db.query(AlertTemplateDB).filter(
AlertTemplateDB.is_active == True,
AlertTemplateDB.is_premium == False # Nur kostenlose für Empfehlungen
).order_by(AlertTemplateDB.sort_order).all()
# Filter nach Rolle
filtered = [t for t in templates if role in (t.target_roles or [])]
return TemplateListResponse(
templates=[
TemplateListItem(
id=t.id,
slug=t.slug,
name=t.name,
description=t.description,
icon=t.icon or "",
category=t.category or "",
target_roles=t.target_roles or [],
is_premium=t.is_premium or False,
max_cards_per_day=t.max_cards_per_day or 10,
sort_order=t.sort_order or 0,
)
for t in filtered
],
total=len(filtered)
)
@router.get("/{template_id}", response_model=TemplateDetail)
async def get_template(
template_id: str,
db: DBSession = Depends(get_db)
):
"""
Vollständige Details eines Templates abrufen.
Enthält alle Konfigurationen (Topics, Rules, Profile).
"""
template = db.query(AlertTemplateDB).filter(
AlertTemplateDB.id == template_id
).first()
if not template:
# Versuche nach Slug zu suchen
template = db.query(AlertTemplateDB).filter(
AlertTemplateDB.slug == template_id
).first()
if not template:
raise HTTPException(status_code=404, detail="Template nicht gefunden")
return TemplateDetail(
id=template.id,
slug=template.slug,
name=template.name,
description=template.description,
icon=template.icon or "",
category=template.category or "",
target_roles=template.target_roles or [],
topics_config=template.topics_config or [],
rules_config=template.rules_config or [],
profile_config=template.profile_config or {},
importance_config=template.importance_config or {},
max_cards_per_day=template.max_cards_per_day or 10,
digest_enabled=template.digest_enabled if template.digest_enabled is not None else True,
digest_day=template.digest_day or "monday",
is_premium=template.is_premium or False,
is_active=template.is_active if template.is_active is not None else True,
)
@router.post("/{template_id}/preview", response_model=PreviewResponse)
async def preview_template(
template_id: str,
request: PreviewRequest = None,
db: DBSession = Depends(get_db)
):
"""
Generiere eine Vorschau, wie Alerts für dieses Template aussehen würden.
Zeigt Beispiel-Alerts mit Wichtigkeitsstufen und "Warum relevant?"-Erklärungen.
"""
template = db.query(AlertTemplateDB).filter(
AlertTemplateDB.id == template_id
).first()
if not template:
template = db.query(AlertTemplateDB).filter(
AlertTemplateDB.slug == template_id
).first()
if not template:
raise HTTPException(status_code=404, detail="Template nicht gefunden")
# Generiere Beispiel-Alerts basierend auf Template-Konfiguration
sample_items = _generate_preview_items(template)
return PreviewResponse(
template_name=template.name,
sample_items=sample_items[:request.sample_count if request else 3],
estimated_daily_count=f"Ca. {template.max_cards_per_day} Meldungen pro Tag",
message=f"Diese Vorschau zeigt, wie Alerts für '{template.name}' aussehen würden."
)
@router.post("/seed")
async def seed_templates(
force_update: bool = Query(False, description="Bestehende Templates aktualisieren"),
db: DBSession = Depends(get_db)
):
"""
Fügt die vordefinierten Templates in die Datenbank ein.
Nur für Entwicklung/Setup.
"""
from ..data.templates import seed_templates as do_seed
count = do_seed(db, force_update=force_update)
return {
"status": "success",
"templates_created": count,
"message": f"{count} Templates wurden eingefügt/aktualisiert."
}
# ============================================================================
# Helper Functions
# ============================================================================
def _generate_preview_items(template: AlertTemplateDB) -> List[PreviewItem]:
"""
Generiere Beispiel-Alerts für Template-Vorschau.
Diese sind statisch/exemplarisch, nicht aus echten Daten.
"""
# Template-spezifische Beispiele
examples = {
"foerderprogramme": [
PreviewItem(
title="DigitalPakt 2.0: Neue Antragsphase startet am 1. April",
snippet="Das BMBF hat die zweite Phase des DigitalPakt Schule angekündigt...",
importance_level="DRINGEND",
why_relevant="Frist endet in 45 Tagen. Betrifft alle Schulen mit Förderbedarf.",
source_name="Bundesministerium für Bildung"
),
PreviewItem(
title="Landesförderung: 50.000€ für innovative Schulprojekte",
snippet="Das Kultusministerium fördert Schulen, die digitale Lernkonzepte...",
importance_level="WICHTIG",
why_relevant="Passende Förderung für Ihr Bundesland. Keine Eigenbeteiligung erforderlich.",
source_name="Kultusministerium"
),
PreviewItem(
title="Erasmus+ Schulpartnerschaften: Jetzt bewerben",
snippet="Für das Schuljahr 2026/27 können Schulen EU-Förderung beantragen...",
importance_level="PRUEFEN",
why_relevant="EU-Programm mit hoher Fördersumme. Bewerbungsfrist in 3 Monaten.",
source_name="EU-Kommission"
),
],
"abitur-updates": [
PreviewItem(
title="Neue Operatoren für Abitur Deutsch ab 2027",
snippet="Die KMK hat überarbeitete Operatoren für das Fach Deutsch beschlossen...",
importance_level="WICHTIG",
why_relevant="Betrifft die Oberstufenplanung. Anpassung der Klausuren erforderlich.",
source_name="KMK"
),
PreviewItem(
title="Abiturtermine 2026: Prüfungsplan veröffentlicht",
snippet="Das Kultusministerium hat die Termine für das Abitur 2026 bekannt gegeben...",
importance_level="INFO",
why_relevant="Planungsgrundlage für Schuljahreskalender.",
source_name="Kultusministerium"
),
],
"datenschutz-recht": [
PreviewItem(
title="LfDI: Neue Handreichung zu Schülerfotos",
snippet="Der Landesbeauftragte für Datenschutz hat eine aktualisierte...",
importance_level="DRINGEND",
why_relevant="Handlungsbedarf: Bestehende Einwilligungen müssen geprüft werden.",
source_name="Datenschutzbeauftragter"
),
PreviewItem(
title="Microsoft 365 an Schulen: Neue Bewertung",
snippet="Die Datenschutzkonferenz hat ihre Position zu Microsoft 365 aktualisiert...",
importance_level="WICHTIG",
why_relevant="Betrifft Schulen mit Microsoft-Lizenzen. Dokumentationspflicht.",
source_name="DSK"
),
],
"it-security": [
PreviewItem(
title="CVE-2026-1234: Kritische Lücke in Moodle",
snippet="Eine schwerwiegende Sicherheitslücke wurde in Moodle 4.x gefunden...",
importance_level="KRITISCH",
why_relevant="Sofortiges Update erforderlich. Exploit bereits aktiv.",
source_name="BSI CERT-Bund"
),
PreviewItem(
title="Phishing-Welle: Gefälschte Schulportal-Mails",
snippet="Aktuell werden vermehrt Phishing-Mails an Lehrkräfte versendet...",
importance_level="DRINGEND",
why_relevant="Warnung an Kollegium empfohlen. Erkennungsmerkmale beachten.",
source_name="BSI"
),
],
"fortbildungen": [
PreviewItem(
title="Kostenlose Fortbildung: KI im Unterricht",
snippet="Das Landesinstitut bietet eine Online-Fortbildung zu KI-Tools...",
importance_level="PRUEFEN",
why_relevant="Passt zu Ihrem Interessenprofil. Online-Format, 4 Stunden.",
source_name="Landesinstitut"
),
],
"wettbewerbe-projekte": [
PreviewItem(
title="Jugend forscht: Anmeldung bis 30. November",
snippet="Der größte deutsche MINT-Wettbewerb sucht wieder junge Forscher...",
importance_level="WICHTIG",
why_relevant="Frist in 60 Tagen. Für Schüler ab Klasse 4.",
source_name="Jugend forscht e.V."
),
],
}
# Hole Beispiele für dieses Template oder generische
slug = template.slug
if slug in examples:
return examples[slug]
# Generische Beispiele
return [
PreviewItem(
title=f"Beispiel-Meldung für {template.name}",
snippet=f"Dies ist eine Vorschau, wie Alerts für das Thema '{template.name}' aussehen würden.",
importance_level="INFO",
why_relevant="Passend zu Ihren ausgewählten Themen.",
source_name="Beispielquelle"
)
]

View File

@@ -0,0 +1,405 @@
"""
Topic API Routes für Alerts Agent.
CRUD-Operationen für Alert-Topics (Feed-Quellen).
"""
from typing import List, Optional
from datetime import datetime
from fastapi import APIRouter, Depends, HTTPException, BackgroundTasks
from pydantic import BaseModel, Field, HttpUrl
from sqlalchemy.orm import Session as DBSession
from alerts_agent.db import get_db
from alerts_agent.db.repository import TopicRepository, AlertItemRepository
from alerts_agent.db.models import FeedTypeEnum
router = APIRouter(prefix="/topics", tags=["alerts"])
# =============================================================================
# PYDANTIC MODELS
# =============================================================================
class TopicCreate(BaseModel):
"""Request-Model für Topic-Erstellung."""
name: str = Field(..., min_length=1, max_length=255)
description: str = Field(default="", max_length=2000)
feed_url: Optional[str] = Field(default=None, max_length=2000)
feed_type: str = Field(default="rss") # rss, email, webhook
fetch_interval_minutes: int = Field(default=60, ge=5, le=1440)
is_active: bool = Field(default=True)
class TopicUpdate(BaseModel):
"""Request-Model für Topic-Update."""
name: Optional[str] = Field(default=None, min_length=1, max_length=255)
description: Optional[str] = Field(default=None, max_length=2000)
feed_url: Optional[str] = Field(default=None, max_length=2000)
feed_type: Optional[str] = None
fetch_interval_minutes: Optional[int] = Field(default=None, ge=5, le=1440)
is_active: Optional[bool] = None
class TopicResponse(BaseModel):
"""Response-Model für Topic."""
id: str
name: str
description: str
feed_url: Optional[str]
feed_type: str
is_active: bool
fetch_interval_minutes: int
last_fetched_at: Optional[datetime]
last_fetch_error: Optional[str]
total_items_fetched: int
items_kept: int
items_dropped: int
created_at: datetime
updated_at: datetime
class Config:
from_attributes = True
class TopicListResponse(BaseModel):
"""Response-Model für Topic-Liste."""
topics: List[TopicResponse]
total: int
class TopicStatsResponse(BaseModel):
"""Response-Model für Topic-Statistiken."""
topic_id: str
name: str
total_alerts: int
by_status: dict
by_decision: dict
keep_rate: Optional[float]
class FetchResultResponse(BaseModel):
"""Response-Model für manuellen Fetch."""
success: bool
topic_id: str
new_items: int
duplicates_skipped: int
error: Optional[str] = None
# =============================================================================
# API ENDPOINTS
# =============================================================================
@router.post("", response_model=TopicResponse, status_code=201)
async def create_topic(
topic: TopicCreate,
db: DBSession = Depends(get_db),
) -> TopicResponse:
"""
Erstellt ein neues Topic (Feed-Quelle).
Topics repräsentieren Google Alerts RSS-Feeds oder andere Feed-Quellen.
"""
repo = TopicRepository(db)
created = repo.create(
name=topic.name,
description=topic.description,
feed_url=topic.feed_url,
feed_type=topic.feed_type,
fetch_interval_minutes=topic.fetch_interval_minutes,
is_active=topic.is_active,
)
return TopicResponse(
id=created.id,
name=created.name,
description=created.description or "",
feed_url=created.feed_url,
feed_type=created.feed_type.value if created.feed_type else "rss",
is_active=created.is_active,
fetch_interval_minutes=created.fetch_interval_minutes,
last_fetched_at=created.last_fetched_at,
last_fetch_error=created.last_fetch_error,
total_items_fetched=created.total_items_fetched,
items_kept=created.items_kept,
items_dropped=created.items_dropped,
created_at=created.created_at,
updated_at=created.updated_at,
)
@router.get("", response_model=TopicListResponse)
async def list_topics(
is_active: Optional[bool] = None,
db: DBSession = Depends(get_db),
) -> TopicListResponse:
"""
Listet alle Topics auf.
Optional nach aktivem Status filterbar.
"""
repo = TopicRepository(db)
topics = repo.get_all(is_active=is_active)
return TopicListResponse(
topics=[
TopicResponse(
id=t.id,
name=t.name,
description=t.description or "",
feed_url=t.feed_url,
feed_type=t.feed_type.value if t.feed_type else "rss",
is_active=t.is_active,
fetch_interval_minutes=t.fetch_interval_minutes,
last_fetched_at=t.last_fetched_at,
last_fetch_error=t.last_fetch_error,
total_items_fetched=t.total_items_fetched,
items_kept=t.items_kept,
items_dropped=t.items_dropped,
created_at=t.created_at,
updated_at=t.updated_at,
)
for t in topics
],
total=len(topics),
)
@router.get("/{topic_id}", response_model=TopicResponse)
async def get_topic(
topic_id: str,
db: DBSession = Depends(get_db),
) -> TopicResponse:
"""
Ruft ein Topic nach ID ab.
"""
repo = TopicRepository(db)
topic = repo.get_by_id(topic_id)
if not topic:
raise HTTPException(status_code=404, detail="Topic nicht gefunden")
return TopicResponse(
id=topic.id,
name=topic.name,
description=topic.description or "",
feed_url=topic.feed_url,
feed_type=topic.feed_type.value if topic.feed_type else "rss",
is_active=topic.is_active,
fetch_interval_minutes=topic.fetch_interval_minutes,
last_fetched_at=topic.last_fetched_at,
last_fetch_error=topic.last_fetch_error,
total_items_fetched=topic.total_items_fetched,
items_kept=topic.items_kept,
items_dropped=topic.items_dropped,
created_at=topic.created_at,
updated_at=topic.updated_at,
)
@router.put("/{topic_id}", response_model=TopicResponse)
async def update_topic(
topic_id: str,
updates: TopicUpdate,
db: DBSession = Depends(get_db),
) -> TopicResponse:
"""
Aktualisiert ein Topic.
"""
repo = TopicRepository(db)
# Nur übergebene Werte updaten
update_dict = {k: v for k, v in updates.model_dump().items() if v is not None}
if not update_dict:
raise HTTPException(status_code=400, detail="Keine Updates angegeben")
updated = repo.update(topic_id, **update_dict)
if not updated:
raise HTTPException(status_code=404, detail="Topic nicht gefunden")
return TopicResponse(
id=updated.id,
name=updated.name,
description=updated.description or "",
feed_url=updated.feed_url,
feed_type=updated.feed_type.value if updated.feed_type else "rss",
is_active=updated.is_active,
fetch_interval_minutes=updated.fetch_interval_minutes,
last_fetched_at=updated.last_fetched_at,
last_fetch_error=updated.last_fetch_error,
total_items_fetched=updated.total_items_fetched,
items_kept=updated.items_kept,
items_dropped=updated.items_dropped,
created_at=updated.created_at,
updated_at=updated.updated_at,
)
@router.delete("/{topic_id}", status_code=204)
async def delete_topic(
topic_id: str,
db: DBSession = Depends(get_db),
):
"""
Löscht ein Topic und alle zugehörigen Alerts (CASCADE).
"""
repo = TopicRepository(db)
success = repo.delete(topic_id)
if not success:
raise HTTPException(status_code=404, detail="Topic nicht gefunden")
return None
@router.get("/{topic_id}/stats", response_model=TopicStatsResponse)
async def get_topic_stats(
topic_id: str,
db: DBSession = Depends(get_db),
) -> TopicStatsResponse:
"""
Ruft Statistiken für ein Topic ab.
"""
topic_repo = TopicRepository(db)
alert_repo = AlertItemRepository(db)
topic = topic_repo.get_by_id(topic_id)
if not topic:
raise HTTPException(status_code=404, detail="Topic nicht gefunden")
by_status = alert_repo.count_by_status(topic_id)
by_decision = alert_repo.count_by_decision(topic_id)
total = sum(by_status.values())
keep_count = by_decision.get("KEEP", 0)
return TopicStatsResponse(
topic_id=topic_id,
name=topic.name,
total_alerts=total,
by_status=by_status,
by_decision=by_decision,
keep_rate=keep_count / total if total > 0 else None,
)
@router.post("/{topic_id}/fetch", response_model=FetchResultResponse)
async def fetch_topic(
topic_id: str,
background_tasks: BackgroundTasks,
db: DBSession = Depends(get_db),
) -> FetchResultResponse:
"""
Löst einen manuellen Fetch für ein Topic aus.
Der Fetch wird im Hintergrund ausgeführt. Das Ergebnis zeigt
die Anzahl neuer Items und übersprungener Duplikate.
"""
topic_repo = TopicRepository(db)
topic = topic_repo.get_by_id(topic_id)
if not topic:
raise HTTPException(status_code=404, detail="Topic nicht gefunden")
if not topic.feed_url:
raise HTTPException(
status_code=400,
detail="Topic hat keine Feed-URL konfiguriert"
)
# Import hier um zirkuläre Imports zu vermeiden
from alerts_agent.ingestion.rss_fetcher import fetch_and_store_feed
try:
result = await fetch_and_store_feed(
topic_id=topic_id,
feed_url=topic.feed_url,
db=db,
)
return FetchResultResponse(
success=True,
topic_id=topic_id,
new_items=result.get("new_items", 0),
duplicates_skipped=result.get("duplicates_skipped", 0),
)
except Exception as e:
# Fehler im Topic speichern
topic_repo.update(topic_id, last_fetch_error=str(e))
return FetchResultResponse(
success=False,
topic_id=topic_id,
new_items=0,
duplicates_skipped=0,
error=str(e),
)
@router.post("/{topic_id}/activate", response_model=TopicResponse)
async def activate_topic(
topic_id: str,
db: DBSession = Depends(get_db),
) -> TopicResponse:
"""
Aktiviert ein Topic für automatisches Fetching.
"""
repo = TopicRepository(db)
updated = repo.update(topic_id, is_active=True)
if not updated:
raise HTTPException(status_code=404, detail="Topic nicht gefunden")
return TopicResponse(
id=updated.id,
name=updated.name,
description=updated.description or "",
feed_url=updated.feed_url,
feed_type=updated.feed_type.value if updated.feed_type else "rss",
is_active=updated.is_active,
fetch_interval_minutes=updated.fetch_interval_minutes,
last_fetched_at=updated.last_fetched_at,
last_fetch_error=updated.last_fetch_error,
total_items_fetched=updated.total_items_fetched,
items_kept=updated.items_kept,
items_dropped=updated.items_dropped,
created_at=updated.created_at,
updated_at=updated.updated_at,
)
@router.post("/{topic_id}/deactivate", response_model=TopicResponse)
async def deactivate_topic(
topic_id: str,
db: DBSession = Depends(get_db),
) -> TopicResponse:
"""
Deaktiviert ein Topic (stoppt automatisches Fetching).
"""
repo = TopicRepository(db)
updated = repo.update(topic_id, is_active=False)
if not updated:
raise HTTPException(status_code=404, detail="Topic nicht gefunden")
return TopicResponse(
id=updated.id,
name=updated.name,
description=updated.description or "",
feed_url=updated.feed_url,
feed_type=updated.feed_type.value if updated.feed_type else "rss",
is_active=updated.is_active,
fetch_interval_minutes=updated.fetch_interval_minutes,
last_fetched_at=updated.last_fetched_at,
last_fetch_error=updated.last_fetch_error,
total_items_fetched=updated.total_items_fetched,
items_kept=updated.items_kept,
items_dropped=updated.items_dropped,
created_at=updated.created_at,
updated_at=updated.updated_at,
)

View File

@@ -0,0 +1,554 @@
"""
API Routes für den Guided Mode Wizard.
Verwaltet den 3-Schritt Setup-Wizard:
1. Rolle wählen (Lehrkraft, Schulleitung, IT-Beauftragte)
2. Templates auswählen (max. 3)
3. Bestätigung und Aktivierung
Zusätzlich: Migration-Wizard für bestehende Google Alerts.
Endpoints:
- GET /wizard/state - Aktuellen Wizard-Status abrufen
- PUT /wizard/step/{step} - Schritt speichern
- POST /wizard/complete - Wizard abschließen
- POST /wizard/reset - Wizard zurücksetzen
- POST /wizard/migrate/email - E-Mail-Migration starten
- POST /wizard/migrate/rss - RSS-Import
"""
import uuid
from typing import Optional, List, Dict, Any
from datetime import datetime
from fastapi import APIRouter, Depends, HTTPException, Query
from pydantic import BaseModel, Field
from sqlalchemy.orm import Session as DBSession
from ..db.database import get_db
from ..db.models import (
UserAlertSubscriptionDB, AlertTemplateDB, AlertSourceDB,
AlertModeEnum, UserRoleEnum, MigrationModeEnum, FeedTypeEnum
)
router = APIRouter(prefix="/wizard", tags=["wizard"])
# ============================================================================
# Request/Response Models
# ============================================================================
class WizardState(BaseModel):
"""Aktueller Wizard-Status."""
subscription_id: Optional[str] = None
current_step: int = 0 # 0=nicht gestartet, 1-3=Schritte, 4=abgeschlossen
is_completed: bool = False
step_data: Dict[str, Any] = {}
recommended_templates: List[Dict[str, Any]] = []
class Step1Data(BaseModel):
"""Daten für Schritt 1: Rollenwahl."""
role: str = Field(..., description="lehrkraft, schulleitung, it_beauftragte")
class Step2Data(BaseModel):
"""Daten für Schritt 2: Template-Auswahl."""
template_ids: List[str] = Field(..., min_length=1, max_length=3)
class Step3Data(BaseModel):
"""Daten für Schritt 3: Bestätigung."""
notification_email: Optional[str] = None
digest_enabled: bool = True
digest_frequency: str = "weekly"
class StepResponse(BaseModel):
"""Response für Schritt-Update."""
status: str
current_step: int
next_step: int
message: str
recommended_templates: List[Dict[str, Any]] = []
class MigrateEmailRequest(BaseModel):
"""Request für E-Mail-Migration."""
original_label: Optional[str] = Field(default=None, description="Beschreibung des Alerts")
class MigrateEmailResponse(BaseModel):
"""Response für E-Mail-Migration."""
status: str
inbound_address: str
instructions: List[str]
source_id: str
class MigrateRssRequest(BaseModel):
"""Request für RSS-Import."""
rss_urls: List[str] = Field(..., min_length=1, max_length=20)
labels: Optional[List[str]] = None
class MigrateRssResponse(BaseModel):
"""Response für RSS-Import."""
status: str
sources_created: int
topics_created: int
message: str
# ============================================================================
# Helper Functions
# ============================================================================
def get_user_id_from_request() -> str:
"""Extrahiert User-ID aus Request."""
return "demo-user"
def _get_or_create_subscription(db: DBSession, user_id: str) -> UserAlertSubscriptionDB:
"""Hole oder erstelle Subscription für Wizard."""
subscription = db.query(UserAlertSubscriptionDB).filter(
UserAlertSubscriptionDB.user_id == user_id,
UserAlertSubscriptionDB.wizard_completed == False
).first()
if not subscription:
subscription = UserAlertSubscriptionDB(
id=str(uuid.uuid4()),
user_id=user_id,
mode=AlertModeEnum.GUIDED,
wizard_step=0,
wizard_completed=False,
wizard_state={},
is_active=True,
)
db.add(subscription)
db.commit()
db.refresh(subscription)
return subscription
def _get_recommended_templates(db: DBSession, role: str) -> List[Dict[str, Any]]:
"""Hole empfohlene Templates für eine Rolle."""
templates = db.query(AlertTemplateDB).filter(
AlertTemplateDB.is_active == True,
AlertTemplateDB.is_premium == False
).order_by(AlertTemplateDB.sort_order).all()
result = []
for t in templates:
if role in (t.target_roles or []):
result.append({
"id": t.id,
"slug": t.slug,
"name": t.name,
"description": t.description,
"icon": t.icon,
"category": t.category,
"recommended": True,
})
return result
def _generate_inbound_address(user_id: str, source_id: str) -> str:
"""Generiere eindeutige Inbound-E-Mail-Adresse."""
short_id = source_id[:8]
return f"alerts+{short_id}@breakpilot.app"
# ============================================================================
# Wizard Endpoints
# ============================================================================
@router.get("/state", response_model=WizardState)
async def get_wizard_state(
db: DBSession = Depends(get_db)
):
"""
Hole aktuellen Wizard-Status.
Gibt Schritt, gespeicherte Daten und empfohlene Templates zurück.
"""
user_id = get_user_id_from_request()
subscription = db.query(UserAlertSubscriptionDB).filter(
UserAlertSubscriptionDB.user_id == user_id
).order_by(UserAlertSubscriptionDB.created_at.desc()).first()
if not subscription:
return WizardState(
subscription_id=None,
current_step=0,
is_completed=False,
step_data={},
recommended_templates=[],
)
# Empfohlene Templates basierend auf Rolle
role = subscription.user_role.value if subscription.user_role else None
recommended = _get_recommended_templates(db, role) if role else []
return WizardState(
subscription_id=subscription.id,
current_step=subscription.wizard_step or 0,
is_completed=subscription.wizard_completed or False,
step_data=subscription.wizard_state or {},
recommended_templates=recommended,
)
@router.put("/step/1", response_model=StepResponse)
async def save_step_1(
data: Step1Data,
db: DBSession = Depends(get_db)
):
"""
Schritt 1: Rolle speichern.
Wählt die Rolle des Nutzers und gibt passende Template-Empfehlungen.
"""
user_id = get_user_id_from_request()
# Validiere Rolle
try:
role = UserRoleEnum(data.role)
except ValueError:
raise HTTPException(
status_code=400,
detail="Ungültige Rolle. Erlaubt: 'lehrkraft', 'schulleitung', 'it_beauftragte'"
)
subscription = _get_or_create_subscription(db, user_id)
# Update
subscription.user_role = role
subscription.wizard_step = 1
wizard_state = subscription.wizard_state or {}
wizard_state["step1"] = {"role": data.role}
subscription.wizard_state = wizard_state
subscription.updated_at = datetime.utcnow()
db.commit()
db.refresh(subscription)
# Empfohlene Templates
recommended = _get_recommended_templates(db, data.role)
return StepResponse(
status="success",
current_step=1,
next_step=2,
message=f"Rolle '{data.role}' gespeichert. Bitte wählen Sie jetzt Ihre Themen.",
recommended_templates=recommended,
)
@router.put("/step/2", response_model=StepResponse)
async def save_step_2(
data: Step2Data,
db: DBSession = Depends(get_db)
):
"""
Schritt 2: Templates auswählen.
Speichert die ausgewählten Templates (1-3).
"""
user_id = get_user_id_from_request()
subscription = db.query(UserAlertSubscriptionDB).filter(
UserAlertSubscriptionDB.user_id == user_id,
UserAlertSubscriptionDB.wizard_completed == False
).first()
if not subscription:
raise HTTPException(status_code=400, detail="Bitte zuerst Schritt 1 abschließen")
# Validiere Template-IDs
templates = db.query(AlertTemplateDB).filter(
AlertTemplateDB.id.in_(data.template_ids)
).all()
if len(templates) != len(data.template_ids):
raise HTTPException(status_code=400, detail="Eine oder mehrere Template-IDs sind ungültig")
# Update
subscription.selected_template_ids = data.template_ids
subscription.wizard_step = 2
wizard_state = subscription.wizard_state or {}
wizard_state["step2"] = {
"template_ids": data.template_ids,
"template_names": [t.name for t in templates],
}
subscription.wizard_state = wizard_state
subscription.updated_at = datetime.utcnow()
db.commit()
return StepResponse(
status="success",
current_step=2,
next_step=3,
message=f"{len(templates)} Themen ausgewählt. Bitte bestätigen Sie Ihre Auswahl.",
recommended_templates=[],
)
@router.put("/step/3", response_model=StepResponse)
async def save_step_3(
data: Step3Data,
db: DBSession = Depends(get_db)
):
"""
Schritt 3: Digest-Einstellungen und Bestätigung.
Speichert E-Mail und Digest-Präferenzen.
"""
user_id = get_user_id_from_request()
subscription = db.query(UserAlertSubscriptionDB).filter(
UserAlertSubscriptionDB.user_id == user_id,
UserAlertSubscriptionDB.wizard_completed == False
).first()
if not subscription:
raise HTTPException(status_code=400, detail="Bitte zuerst Schritte 1 und 2 abschließen")
if not subscription.selected_template_ids:
raise HTTPException(status_code=400, detail="Bitte zuerst Templates auswählen (Schritt 2)")
# Update
subscription.notification_email = data.notification_email
subscription.digest_enabled = data.digest_enabled
subscription.digest_frequency = data.digest_frequency
subscription.wizard_step = 3
wizard_state = subscription.wizard_state or {}
wizard_state["step3"] = {
"notification_email": data.notification_email,
"digest_enabled": data.digest_enabled,
"digest_frequency": data.digest_frequency,
}
subscription.wizard_state = wizard_state
subscription.updated_at = datetime.utcnow()
db.commit()
return StepResponse(
status="success",
current_step=3,
next_step=4,
message="Einstellungen gespeichert. Klicken Sie auf 'Jetzt starten' um den Wizard abzuschließen.",
recommended_templates=[],
)
@router.post("/complete")
async def complete_wizard(
db: DBSession = Depends(get_db)
):
"""
Wizard abschließen und Templates aktivieren.
Erstellt Topics, Rules und Profile basierend auf den gewählten Templates.
"""
user_id = get_user_id_from_request()
subscription = db.query(UserAlertSubscriptionDB).filter(
UserAlertSubscriptionDB.user_id == user_id,
UserAlertSubscriptionDB.wizard_completed == False
).first()
if not subscription:
raise HTTPException(status_code=400, detail="Kein aktiver Wizard gefunden")
if not subscription.selected_template_ids:
raise HTTPException(status_code=400, detail="Bitte zuerst Templates auswählen")
# Aktiviere Templates (über Subscription-Endpoint)
from .subscriptions import activate_template, ActivateTemplateRequest
# Markiere als abgeschlossen
subscription.wizard_completed = True
subscription.wizard_step = 4
subscription.updated_at = datetime.utcnow()
db.commit()
return {
"status": "success",
"message": "Wizard abgeschlossen! Ihre Alerts werden ab jetzt gesammelt.",
"subscription_id": subscription.id,
"selected_templates": subscription.selected_template_ids,
"next_action": "Besuchen Sie die Inbox, um Ihre ersten Alerts zu sehen.",
}
@router.post("/reset")
async def reset_wizard(
db: DBSession = Depends(get_db)
):
"""Wizard zurücksetzen (für Neustart)."""
user_id = get_user_id_from_request()
subscription = db.query(UserAlertSubscriptionDB).filter(
UserAlertSubscriptionDB.user_id == user_id,
UserAlertSubscriptionDB.wizard_completed == False
).first()
if subscription:
db.delete(subscription)
db.commit()
return {
"status": "success",
"message": "Wizard zurückgesetzt. Sie können neu beginnen.",
}
# ============================================================================
# Migration Endpoints (für bestehende Google Alerts)
# ============================================================================
@router.post("/migrate/email", response_model=MigrateEmailResponse)
async def start_email_migration(
request: MigrateEmailRequest = None,
db: DBSession = Depends(get_db)
):
"""
Starte E-Mail-Migration für bestehende Google Alerts.
Generiert eine eindeutige Inbound-E-Mail-Adresse, an die der Nutzer
seine Google Alerts weiterleiten kann.
"""
user_id = get_user_id_from_request()
# Erstelle AlertSource
source = AlertSourceDB(
id=str(uuid.uuid4()),
user_id=user_id,
source_type=FeedTypeEnum.EMAIL,
original_label=request.original_label if request else "Google Alert Migration",
migration_mode=MigrationModeEnum.FORWARD,
is_active=True,
)
# Generiere Inbound-Adresse
source.inbound_address = _generate_inbound_address(user_id, source.id)
db.add(source)
db.commit()
db.refresh(source)
return MigrateEmailResponse(
status="success",
inbound_address=source.inbound_address,
source_id=source.id,
instructions=[
"1. Öffnen Sie Google Alerts (google.com/alerts)",
"2. Klicken Sie auf das Bearbeiten-Symbol bei Ihrem Alert",
f"3. Ändern Sie die E-Mail-Adresse zu: {source.inbound_address}",
"4. Speichern Sie die Änderung",
"5. Ihre Alerts werden automatisch importiert und gefiltert",
],
)
@router.post("/migrate/rss", response_model=MigrateRssResponse)
async def import_rss_feeds(
request: MigrateRssRequest,
db: DBSession = Depends(get_db)
):
"""
Importiere bestehende Google Alert RSS-Feeds.
Erstellt für jede RSS-URL einen AlertSource und Topic.
"""
user_id = get_user_id_from_request()
from ..db.models import AlertTopicDB
sources_created = 0
topics_created = 0
for i, url in enumerate(request.rss_urls):
# Label aus Request oder generieren
label = None
if request.labels and i < len(request.labels):
label = request.labels[i]
if not label:
label = f"RSS Feed {i + 1}"
# Erstelle AlertSource
source = AlertSourceDB(
id=str(uuid.uuid4()),
user_id=user_id,
source_type=FeedTypeEnum.RSS,
original_label=label,
rss_url=url,
migration_mode=MigrationModeEnum.IMPORT,
is_active=True,
)
db.add(source)
sources_created += 1
# Erstelle Topic
topic = AlertTopicDB(
id=str(uuid.uuid4()),
user_id=user_id,
name=label,
description=f"Importiert aus RSS: {url[:50]}...",
feed_url=url,
feed_type=FeedTypeEnum.RSS,
is_active=True,
fetch_interval_minutes=60,
)
db.add(topic)
# Verknüpfe Source mit Topic
source.topic_id = topic.id
topics_created += 1
db.commit()
return MigrateRssResponse(
status="success",
sources_created=sources_created,
topics_created=topics_created,
message=f"{sources_created} RSS-Feeds importiert. Die Alerts werden automatisch abgerufen.",
)
@router.get("/migrate/sources")
async def list_migration_sources(
db: DBSession = Depends(get_db)
):
"""Liste alle Migration-Quellen des Users."""
user_id = get_user_id_from_request()
sources = db.query(AlertSourceDB).filter(
AlertSourceDB.user_id == user_id
).order_by(AlertSourceDB.created_at.desc()).all()
return {
"sources": [
{
"id": s.id,
"type": s.source_type.value if s.source_type else "unknown",
"label": s.original_label,
"inbound_address": s.inbound_address,
"rss_url": s.rss_url,
"migration_mode": s.migration_mode.value if s.migration_mode else "unknown",
"items_received": s.items_received,
"is_active": s.is_active,
"created_at": s.created_at.isoformat() if s.created_at else None,
}
for s in sources
],
"total": len(sources),
}

View File

@@ -0,0 +1,8 @@
"""
Alert Template Seed Data.
Enthält vorkonfigurierte Templates (Playbooks) für den Guided Mode.
"""
from .templates import ALERT_TEMPLATES, seed_templates
__all__ = ["ALERT_TEMPLATES", "seed_templates"]

View File

@@ -0,0 +1,492 @@
"""
Vorkonfigurierte Alert-Templates (Playbooks) für den Guided Mode.
Diese Templates ermöglichen Lehrern und Schulleitungen einen schnellen Einstieg
ohne RSS-Feeds oder Keywords manuell konfigurieren zu müssen.
Alle Texte in B1/B2 Deutsch, keine IT-Fachbegriffe.
"""
from typing import List, Dict, Any
from sqlalchemy.orm import Session
import uuid
# Standard Importance-Mapping (Score → 5 Stufen)
DEFAULT_IMPORTANCE_CONFIG = {
"kritisch": 0.90, # Ab 90% → Kritisch
"dringend": 0.75, # 75-90% → Dringend
"wichtig": 0.60, # 60-75% → Wichtig
"pruefen": 0.40, # 40-60% → Zu prüfen
# Alles unter 40% → Info
}
# Importance-Mapping für zeitkritische Templates (z.B. Fristen)
DEADLINE_IMPORTANCE_CONFIG = {
"kritisch": 0.85,
"dringend": 0.70,
"wichtig": 0.55,
"pruefen": 0.35,
}
# Importance-Mapping für IT-Security (höhere Schwellen)
SECURITY_IMPORTANCE_CONFIG = {
"kritisch": 0.95,
"dringend": 0.85,
"wichtig": 0.70,
"pruefen": 0.50,
}
ALERT_TEMPLATES: List[Dict[str, Any]] = [
# =========================================================================
# 1. Förderprogramme & Fristen
# =========================================================================
{
"slug": "foerderprogramme",
"name": "Förderprogramme & Fristen",
"description": "Bleiben Sie informiert über Förderanträge, Deadlines und neue Programme für Schulen. Verpassen Sie keine Fristen mehr.",
"icon": "💰",
"category": "administration",
"target_roles": ["schulleitung"],
"topics_config": [
{
"name": "DigitalPakt & Bundes-Förderprogramme",
"keywords": ["DigitalPakt", "Förderprogramm Schule", "Bundesförderung Bildung", "BMBF Schule"],
},
{
"name": "Landesförderung Bildung",
"keywords": ["Landesförderung Schule", "Kultusministerium Förderung", "Schulträger Fördermittel"],
},
{
"name": "EU & Stiftungen",
"keywords": ["Erasmus+ Schule", "EU-Förderung Bildung", "Stiftung Schule", "ESF Bildung"],
},
],
"rules_config": [
{
"name": "Fristen priorisieren",
"conditions": [{"field": "title", "op": "in", "value": ["Frist", "Deadline", "bis zum", "Antragsfrist", "endet am"]}],
"action_type": "tag",
"action_config": {"tags": ["frist"]},
"priority": 100,
},
{
"name": "Ausschluss Stellenanzeigen",
"conditions": [{"field": "title", "op": "in", "value": ["Stellenanzeige", "Wir suchen", "Job"]}],
"action_type": "drop",
"priority": 90,
},
],
"profile_config": {
"priorities": [
{"label": "Fördermittel", "weight": 0.95, "keywords": ["Fördermittel", "Zuschuss", "Antrag", "Förderung"], "description": "Finanzielle Förderung für Schulen"},
{"label": "Fristen", "weight": 0.90, "keywords": ["Frist", "Deadline", "Stichtag", "Bewerbungsschluss"], "description": "Zeitkritische Informationen"},
{"label": "Digitalisierung", "weight": 0.80, "keywords": ["DigitalPakt", "Tablet", "WLAN", "digitale Ausstattung"], "description": "IT-Ausstattung und Infrastruktur"},
],
"exclusions": ["Stellenanzeige", "Werbung", "Seminar buchen", "Anzeige"],
},
"importance_config": DEADLINE_IMPORTANCE_CONFIG,
"max_cards_per_day": 8,
"sort_order": 1,
},
# =========================================================================
# 2. Abitur & Prüfungs-Updates
# =========================================================================
{
"slug": "abitur-updates",
"name": "Abitur & Prüfungs-Updates",
"description": "Aktuelle Informationen zu Abitur-Regelungen, Prüfungsformaten, EPA und KMK-Beschlüssen. Wichtig für alle Oberstufenlehrkräfte.",
"icon": "📝",
"category": "teaching",
"target_roles": ["lehrkraft", "schulleitung"],
"topics_config": [
{
"name": "Abitur-Vorgaben",
"keywords": ["Abitur Vorgaben", "Prüfungsaufgaben Abitur", "Abiturprüfung Änderung"],
},
{
"name": "KMK & EPA",
"keywords": ["KMK Beschluss", "EPA Abitur", "Bildungsstandards Abitur", "Operatoren Abitur"],
},
{
"name": "Prüfungsformate",
"keywords": ["Prüfungsformat Schule", "Klausur Oberstufe", "mündliche Prüfung Abitur"],
},
],
"rules_config": [
{
"name": "Kernfächer priorisieren",
"conditions": [{"field": "title", "op": "in", "value": ["Deutsch", "Mathematik", "Englisch", "Leistungskurs"]}],
"action_type": "tag",
"action_config": {"tags": ["kernfach"]},
"priority": 80,
},
],
"profile_config": {
"priorities": [
{"label": "Abitur-Änderungen", "weight": 0.95, "keywords": ["Abitur", "Prüfung", "Zentralabitur"], "description": "Änderungen an Prüfungsregelungen"},
{"label": "KMK-Beschlüsse", "weight": 0.85, "keywords": ["KMK", "Kultusministerkonferenz", "Bildungsstandards"], "description": "Bundesweite Regelungen"},
{"label": "Bewertung", "weight": 0.75, "keywords": ["Bewertung", "Notenschlüssel", "Erwartungshorizont"], "description": "Bewertungskriterien"},
],
"exclusions": ["Nachhilfe", "Abiturtraining", "Lernhilfe kaufen"],
},
"importance_config": DEFAULT_IMPORTANCE_CONFIG,
"max_cards_per_day": 6,
"sort_order": 2,
},
# =========================================================================
# 3. Fortbildungen für Lehrkräfte
# =========================================================================
{
"slug": "fortbildungen",
"name": "Fortbildungen für Lehrkräfte",
"description": "Relevante Fortbildungsangebote in Ihrer Region. Filtern Sie nach Fach, Format und Anbieter.",
"icon": "🎓",
"category": "teaching",
"target_roles": ["lehrkraft"],
"topics_config": [
{
"name": "Landesinstitut Fortbildungen",
"keywords": ["Fortbildung Lehrer", "Landesinstitut Lehrerfortbildung", "Pädagogische Fortbildung"],
},
{
"name": "Digitale Kompetenzen",
"keywords": ["Fortbildung digital", "Medienkompetenz Lehrer", "digitale Bildung Fortbildung"],
},
{
"name": "Fachfortbildungen",
"keywords": ["Fachfortbildung", "Unterrichtsentwicklung", "Didaktik Fortbildung"],
},
],
"rules_config": [
{
"name": "Online-Formate taggen",
"conditions": [{"field": "title", "op": "in", "value": ["Online", "Webinar", "digital", "virtuell"]}],
"action_type": "tag",
"action_config": {"tags": ["online"]},
"priority": 70,
},
{
"name": "Kostenpflichtige ausschließen",
"conditions": [{"field": "snippet", "op": "in", "value": ["kostenpflichtig", "Teilnahmegebühr", ""]}],
"action_type": "tag",
"action_config": {"tags": ["kostenpflichtig"]},
"priority": 60,
},
],
"profile_config": {
"priorities": [
{"label": "Kostenlose Fortbildungen", "weight": 0.90, "keywords": ["kostenlos", "kostenfrei", "Landesinstitut"], "description": "Staatliche Angebote"},
{"label": "Digitale Medien", "weight": 0.80, "keywords": ["digital", "Tablet", "Medienkompetenz"], "description": "Digitale Bildung"},
{"label": "Inklusion", "weight": 0.75, "keywords": ["Inklusion", "Förderbedarf", "Differenzierung"], "description": "Inklusiver Unterricht"},
],
"exclusions": ["Studium", "Bachelor", "Master", "Referendariat"],
},
"importance_config": DEFAULT_IMPORTANCE_CONFIG,
"max_cards_per_day": 10,
"sort_order": 3,
},
# =========================================================================
# 4. Datenschutz & Rechtsupdates
# =========================================================================
{
"slug": "datenschutz-recht",
"name": "Datenschutz & Rechtsupdates",
"description": "DSGVO-relevante Änderungen, Schulrecht und rechtliche Entwicklungen. Wichtig für Datenschutzbeauftragte und Schulleitungen.",
"icon": "⚖️",
"category": "administration",
"target_roles": ["schulleitung", "it_beauftragte"],
"topics_config": [
{
"name": "DSGVO Schule",
"keywords": ["DSGVO Schule", "Datenschutz Schüler", "Einwilligung Eltern", "personenbezogene Daten Schule"],
},
{
"name": "Schulrecht",
"keywords": ["Schulgesetz Änderung", "Schulordnung neu", "Schulrecht Urteil"],
},
{
"name": "Cloud & Software",
"keywords": ["Cloud Schule DSGVO", "Microsoft 365 Schule", "Videokonferenz Datenschutz"],
},
],
"rules_config": [
{
"name": "Urteile priorisieren",
"conditions": [{"field": "title", "op": "in", "value": ["Urteil", "Gericht", "Beschluss", "Aufsichtsbehörde"]}],
"action_type": "tag",
"action_config": {"tags": ["urteil"]},
"priority": 90,
},
{
"name": "Handlungsbedarf markieren",
"conditions": [{"field": "title", "op": "in", "value": ["ab sofort", "verpflichtend", "muss", "Frist"]}],
"action_type": "tag",
"action_config": {"tags": ["handlungsbedarf"]},
"priority": 85,
},
],
"profile_config": {
"priorities": [
{"label": "DSGVO-Compliance", "weight": 0.95, "keywords": ["DSGVO", "Datenschutz", "Aufsichtsbehörde", "Bußgeld"], "description": "Datenschutzrechtliche Vorgaben"},
{"label": "Schulrecht", "weight": 0.90, "keywords": ["Schulgesetz", "Verordnung", "Erlass"], "description": "Rechtliche Änderungen"},
{"label": "Cloud-Dienste", "weight": 0.80, "keywords": ["Cloud", "Microsoft", "Google", "Zoom"], "description": "Software und Dienste"},
],
"exclusions": ["Werbung", "Seminar buchen", "Beratung anfragen"],
},
"importance_config": DEADLINE_IMPORTANCE_CONFIG,
"max_cards_per_day": 8,
"sort_order": 4,
},
# =========================================================================
# 5. IT-Security Warnungen
# =========================================================================
{
"slug": "it-security",
"name": "IT-Security Warnungen",
"description": "Sicherheitswarnungen und Patches für Schul-IT-Systeme. Kritisch für IT-Beauftragte und Administratoren.",
"icon": "🔒",
"category": "it",
"target_roles": ["it_beauftragte"],
"topics_config": [
{
"name": "BSI & CERT Warnungen",
"keywords": ["BSI Warnung", "CERT-Bund", "Sicherheitslücke", "CVE Schule"],
},
{
"name": "Schul-Software Security",
"keywords": ["Moodle Sicherheit", "IServ Update", "WebUntis Sicherheit", "Nextcloud Patch"],
},
{
"name": "Phishing & Malware",
"keywords": ["Phishing Schule", "Ransomware Bildung", "Malware Warnung"],
},
],
"rules_config": [
{
"name": "CVE-Meldungen priorisieren",
"conditions": [{"field": "title", "op": "regex", "value": "CVE-\\d{4}-\\d+"}],
"action_type": "tag",
"action_config": {"tags": ["cve"]},
"priority": 100,
},
{
"name": "Kritische Patches",
"conditions": [{"field": "title", "op": "in", "value": ["kritisch", "Notfall-Patch", "sofort", "0-day"]}],
"action_type": "tag",
"action_config": {"tags": ["kritisch"]},
"priority": 95,
},
],
"profile_config": {
"priorities": [
{"label": "CVE-Warnungen", "weight": 0.98, "keywords": ["CVE", "Sicherheitslücke", "Schwachstelle", "Exploit"], "description": "Bekannte Sicherheitslücken"},
{"label": "Schul-Software", "weight": 0.90, "keywords": ["Moodle", "IServ", "WebUntis", "Nextcloud", "Schulportal"], "description": "Häufig genutzte Schulsoftware"},
{"label": "Patches", "weight": 0.85, "keywords": ["Patch", "Update", "Sicherheitsupdate", "Hotfix"], "description": "Sicherheitsupdates"},
],
"exclusions": ["Werbung", "Schulung kaufen", "Penetrationstest Angebot"],
},
"importance_config": SECURITY_IMPORTANCE_CONFIG,
"max_cards_per_day": 5,
"sort_order": 5,
},
# =========================================================================
# 6. Wettbewerbe & Projekte
# =========================================================================
{
"slug": "wettbewerbe-projekte",
"name": "Wettbewerbe & Projekte",
"description": "MINT-Wettbewerbe, Erasmus-Projekte, Schülerwettbewerbe und Schulpartnerschaften. Entdecken Sie Chancen für Ihre Schüler.",
"icon": "🏆",
"category": "teaching",
"target_roles": ["lehrkraft", "schulleitung"],
"topics_config": [
{
"name": "MINT-Wettbewerbe",
"keywords": ["MINT Wettbewerb Schule", "Jugend forscht", "Mathematik Olympiade", "Informatik Biber"],
},
{
"name": "Erasmus & Austausch",
"keywords": ["Erasmus+ Schule", "Schüleraustausch", "Schulpartnerschaft Europa"],
},
{
"name": "Kreativ & Sozial",
"keywords": ["Schülerwettbewerb Kunst", "Vorlesewettbewerb", "Umweltpreis Schule", "Sozialer Tag"],
},
],
"rules_config": [
{
"name": "Anmeldefristen",
"conditions": [{"field": "title", "op": "in", "value": ["Anmeldung", "Bewerbung", "Frist", "bis zum"]}],
"action_type": "tag",
"action_config": {"tags": ["frist"]},
"priority": 85,
},
],
"profile_config": {
"priorities": [
{"label": "Wettbewerbe", "weight": 0.90, "keywords": ["Wettbewerb", "Preis", "Auszeichnung", "Gewinner"], "description": "Schülerwettbewerbe"},
{"label": "Erasmus+", "weight": 0.85, "keywords": ["Erasmus", "EU-Programm", "Mobilität", "Austausch"], "description": "Europäische Programme"},
{"label": "MINT", "weight": 0.80, "keywords": ["MINT", "Naturwissenschaft", "Technik", "Informatik"], "description": "MINT-Bereich"},
],
"exclusions": ["Stellenanzeige", "Praktikum", "Ausbildung"],
},
"importance_config": DEADLINE_IMPORTANCE_CONFIG,
"max_cards_per_day": 8,
"sort_order": 6,
},
# =========================================================================
# 7. Personalmarkt (Optional/Premium)
# =========================================================================
{
"slug": "personalmarkt",
"name": "Personalmarkt & Stellen",
"description": "Stellenangebote für Lehrkräfte und Vertretungsstellen in Ihrer Region. Ideal für Schulleitungen mit Personalbedarf.",
"icon": "👥",
"category": "administration",
"target_roles": ["schulleitung"],
"is_premium": True,
"topics_config": [
{
"name": "Lehrerstellen",
"keywords": ["Lehrerstelle", "Lehrkraft gesucht", "Einstellung Lehrer"],
},
{
"name": "Vertretungslehrkräfte",
"keywords": ["Vertretungslehrkraft", "befristete Stelle Lehrer", "Krankheitsvertretung Schule"],
},
{
"name": "Schulsozialarbeit",
"keywords": ["Schulsozialarbeiter", "Sozialpädagoge Schule", "Schulpsychologe"],
},
],
"rules_config": [],
"profile_config": {
"priorities": [
{"label": "Festanstellung", "weight": 0.90, "keywords": ["unbefristet", "Festanstellung", "Planstelle"], "description": "Feste Stellen"},
{"label": "Region", "weight": 0.85, "keywords": [], "description": "Stellen in der Region"},
],
"exclusions": ["Nachhilfe", "Privatlehrer", "freiberuflich"],
},
"importance_config": DEFAULT_IMPORTANCE_CONFIG,
"max_cards_per_day": 10,
"sort_order": 7,
},
# =========================================================================
# 8. Krisenkommunikation (Optional/Premium)
# =========================================================================
{
"slug": "krisenkommunikation",
"name": "Krisenkommunikation",
"description": "Wichtige Meldungen für Schulen in Krisensituationen: Unwetter, Streiks, Verkehrsstörungen, Gesundheitswarnungen.",
"icon": "⚠️",
"category": "administration",
"target_roles": ["schulleitung"],
"is_premium": True,
"topics_config": [
{
"name": "Wetter & Naturereignisse",
"keywords": ["Unwetterwarnung Schule", "Schulausfall Wetter", "Hitzefrei"],
},
{
"name": "Verkehr & ÖPNV",
"keywords": ["Streik ÖPNV", "Verkehrsstörung", "Busausfall Schule"],
},
{
"name": "Gesundheit",
"keywords": ["Gesundheitswarnung Schule", "Infektionsgefahr", "Hygienemaßnahme"],
},
],
"rules_config": [
{
"name": "Sofortmeldungen",
"conditions": [{"field": "title", "op": "in", "value": ["Warnung", "Achtung", "Sofort", "Gefahr", "Ausfall"]}],
"action_type": "tag",
"action_config": {"tags": ["sofort"]},
"priority": 100,
},
],
"profile_config": {
"priorities": [
{"label": "Schulausfall", "weight": 0.98, "keywords": ["Schulausfall", "unterrichtsfrei", "Schule geschlossen"], "description": "Schulschließungen"},
{"label": "Warnungen", "weight": 0.95, "keywords": ["Warnung", "Gefahr", "Achtung"], "description": "Wichtige Warnungen"},
],
"exclusions": ["Werbung", "Versicherung"],
},
"importance_config": SECURITY_IMPORTANCE_CONFIG, # Schnelle Eskalation
"max_cards_per_day": 5,
"sort_order": 8,
},
]
def seed_templates(db: Session, force_update: bool = False) -> int:
"""
Fügt die vordefinierten Templates in die Datenbank ein.
Args:
db: SQLAlchemy Session
force_update: Wenn True, werden bestehende Templates aktualisiert
Returns:
Anzahl der eingefügten/aktualisierten Templates
"""
from alerts_agent.db.models import AlertTemplateDB
count = 0
for template_data in ALERT_TEMPLATES:
existing = db.query(AlertTemplateDB).filter_by(slug=template_data["slug"]).first()
if existing and not force_update:
continue
if existing and force_update:
# Update existing template
for key, value in template_data.items():
if hasattr(existing, key):
setattr(existing, key, value)
count += 1
else:
# Create new template
template = AlertTemplateDB(
id=str(uuid.uuid4()),
**template_data
)
db.add(template)
count += 1
db.commit()
return count
def get_templates_for_role(role: str) -> List[Dict[str, Any]]:
"""
Gibt empfohlene Templates für eine bestimmte Rolle zurück.
Args:
role: "lehrkraft", "schulleitung", oder "it_beauftragte"
Returns:
Liste der passenden Templates (sortiert nach Empfehlung)
"""
return [
t for t in ALERT_TEMPLATES
if role in t.get("target_roles", []) and not t.get("is_premium", False)
]
def get_template_by_slug(slug: str) -> Dict[str, Any] | None:
"""
Gibt ein Template anhand seines Slugs zurück.
"""
for t in ALERT_TEMPLATES:
if t["slug"] == slug:
return t
return None

View File

@@ -0,0 +1,34 @@
"""
Database Module für Alerts Agent.
Stellt PostgreSQL-Anbindung für Alert-Persistenz bereit.
Nutzt die gleiche Base wie classroom_engine für konsistente Migrationen.
"""
from .database import Base, SessionLocal, get_db, engine
from .models import (
AlertTopicDB,
AlertItemDB,
AlertRuleDB,
AlertProfileDB,
AlertSourceEnum,
AlertStatusEnum,
RelevanceDecisionEnum,
FeedTypeEnum,
RuleActionEnum,
)
__all__ = [
"Base",
"SessionLocal",
"get_db",
"engine",
"AlertTopicDB",
"AlertItemDB",
"AlertRuleDB",
"AlertProfileDB",
"AlertSourceEnum",
"AlertStatusEnum",
"RelevanceDecisionEnum",
"FeedTypeEnum",
"RuleActionEnum",
]

View File

@@ -0,0 +1,19 @@
"""
Database Configuration für Alerts Agent.
Nutzt die gleiche PostgreSQL-Instanz und Base wie Classroom Engine.
"""
# Re-export from classroom_engine for consistency
from classroom_engine.database import (
Base,
SessionLocal,
get_db,
engine,
DATABASE_URL,
)
def init_db():
"""Erstellt alle Tabellen (für Entwicklung)."""
from . import models # Import models to register them
Base.metadata.create_all(bind=engine)

View File

@@ -0,0 +1,636 @@
"""
SQLAlchemy Database Models für Alerts Agent.
Persistiert Topics, Alerts, Rules und Profile in PostgreSQL.
Nutzt die gleiche Base wie classroom_engine für konsistente Migrationen.
"""
from datetime import datetime
from sqlalchemy import (
Column, String, Integer, Float, DateTime, JSON,
Boolean, Text, Enum as SQLEnum, ForeignKey, Index
)
from sqlalchemy.orm import relationship
import enum
import uuid
# Import Base from classroom_engine for shared metadata
from classroom_engine.database import Base
class AlertSourceEnum(str, enum.Enum):
"""Quelle des Alerts."""
GOOGLE_ALERTS_RSS = "google_alerts_rss"
GOOGLE_ALERTS_EMAIL = "google_alerts_email"
RSS_FEED = "rss_feed"
WEBHOOK = "webhook"
MANUAL = "manual"
class AlertStatusEnum(str, enum.Enum):
"""Verarbeitungsstatus des Alerts."""
NEW = "new"
PROCESSED = "processed"
DUPLICATE = "duplicate"
SCORED = "scored"
REVIEWED = "reviewed"
ARCHIVED = "archived"
class RelevanceDecisionEnum(str, enum.Enum):
"""Relevanz-Entscheidung."""
KEEP = "KEEP"
DROP = "DROP"
REVIEW = "REVIEW"
class FeedTypeEnum(str, enum.Enum):
"""Typ der Feed-Quelle."""
RSS = "rss"
EMAIL = "email"
WEBHOOK = "webhook"
class RuleActionEnum(str, enum.Enum):
"""Aktionen für Regeln."""
KEEP = "keep"
DROP = "drop"
TAG = "tag"
EMAIL = "email"
WEBHOOK = "webhook"
SLACK = "slack"
class ImportanceLevelEnum(str, enum.Enum):
"""5-stufige Wichtigkeitsskala für Guided Mode."""
INFO = "info" # 0.0-0.4 - Informativ
PRUEFEN = "pruefen" # 0.4-0.6 - Zu prüfen
WICHTIG = "wichtig" # 0.6-0.75 - Wichtig
DRINGEND = "dringend" # 0.75-0.9 - Dringend
KRITISCH = "kritisch" # 0.9-1.0 - Kritisch
class AlertModeEnum(str, enum.Enum):
"""Modus für Alert-Nutzung."""
GUIDED = "guided" # Geführter Modus für Lehrer/Schulleitungen
EXPERT = "expert" # Experten-Modus für IT-affine Nutzer
class MigrationModeEnum(str, enum.Enum):
"""Wie wurden die Alerts migriert."""
FORWARD = "forward" # E-Mail-Weiterleitung
IMPORT = "import" # RSS-Import
RECONSTRUCTED = "reconstructed" # Automatisch rekonstruiert
class DigestStatusEnum(str, enum.Enum):
"""Status der Digest-Generierung."""
PENDING = "pending"
GENERATING = "generating"
SENT = "sent"
FAILED = "failed"
class UserRoleEnum(str, enum.Enum):
"""Rolle des Nutzers für Template-Empfehlungen."""
LEHRKRAFT = "lehrkraft"
SCHULLEITUNG = "schulleitung"
IT_BEAUFTRAGTE = "it_beauftragte"
class AlertTopicDB(Base):
"""
Alert Topic / Feed-Quelle.
Repräsentiert eine Google Alert-Konfiguration oder einen RSS-Feed.
"""
__tablename__ = 'alert_topics'
id = Column(String(36), primary_key=True, default=lambda: str(uuid.uuid4()))
user_id = Column(String(36), nullable=True, index=True) # Optional: Multi-User
# Topic-Details
name = Column(String(255), nullable=False)
description = Column(Text, default="")
# Feed-Konfiguration
feed_url = Column(String(2000), nullable=True)
feed_type = Column(
SQLEnum(FeedTypeEnum),
default=FeedTypeEnum.RSS,
nullable=False
)
# Scheduling
is_active = Column(Boolean, default=True, index=True)
fetch_interval_minutes = Column(Integer, default=60)
last_fetched_at = Column(DateTime, nullable=True)
last_fetch_error = Column(Text, nullable=True)
# Statistiken
total_items_fetched = Column(Integer, default=0)
items_kept = Column(Integer, default=0)
items_dropped = Column(Integer, default=0)
# Timestamps
created_at = Column(DateTime, default=datetime.utcnow)
updated_at = Column(DateTime, default=datetime.utcnow, onupdate=datetime.utcnow)
# Relationships
alerts = relationship("AlertItemDB", back_populates="topic", cascade="all, delete-orphan")
rules = relationship("AlertRuleDB", back_populates="topic", cascade="all, delete-orphan")
def __repr__(self):
return f"<AlertTopic {self.name} ({self.feed_type.value})>"
class AlertItemDB(Base):
"""
Einzelner Alert-Eintrag.
Entspricht einem Artikel/Link aus Google Alerts oder RSS.
"""
__tablename__ = 'alert_items'
id = Column(String(36), primary_key=True, default=lambda: str(uuid.uuid4()))
topic_id = Column(String(36), ForeignKey('alert_topics.id', ondelete='CASCADE'), nullable=False, index=True)
# Content
title = Column(Text, nullable=False)
url = Column(String(2000), nullable=False)
snippet = Column(Text, default="")
article_text = Column(Text, nullable=True) # Volltext (optional)
# Metadaten
lang = Column(String(10), default="de")
published_at = Column(DateTime, nullable=True, index=True)
fetched_at = Column(DateTime, default=datetime.utcnow, index=True)
processed_at = Column(DateTime, nullable=True)
# Source
source = Column(
SQLEnum(AlertSourceEnum),
default=AlertSourceEnum.GOOGLE_ALERTS_RSS,
nullable=False
)
# Deduplication
url_hash = Column(String(64), unique=True, nullable=False, index=True)
content_hash = Column(String(64), nullable=True) # SimHash für Fuzzy-Matching
canonical_url = Column(String(2000), nullable=True)
# Status
status = Column(
SQLEnum(AlertStatusEnum),
default=AlertStatusEnum.NEW,
nullable=False,
index=True
)
cluster_id = Column(String(36), nullable=True) # Gruppierung ähnlicher Alerts
# Relevanz-Scoring
relevance_score = Column(Float, nullable=True)
relevance_decision = Column(
SQLEnum(RelevanceDecisionEnum),
nullable=True,
index=True
)
relevance_reasons = Column(JSON, default=list) # ["matches_priority", ...]
relevance_summary = Column(Text, nullable=True)
scored_by_model = Column(String(100), nullable=True) # "llama3.1:8b"
scored_at = Column(DateTime, nullable=True)
# User Actions
user_marked_relevant = Column(Boolean, nullable=True) # Explizites Feedback
user_tags = Column(JSON, default=list) # ["wichtig", "später lesen"]
user_notes = Column(Text, nullable=True)
# Guided Mode Fields (NEU)
importance_level = Column(
SQLEnum(ImportanceLevelEnum),
nullable=True,
index=True
)
why_relevant = Column(Text, nullable=True) # "Warum relevant?" Erklärung
next_steps = Column(JSON, default=list) # ["Schulleitung informieren", "Frist beachten"]
action_deadline = Column(DateTime, nullable=True) # Falls es eine Frist gibt
source_name = Column(String(255), nullable=True) # "Kultusministerium NRW"
source_credibility = Column(String(50), default="official") # official, news, blog
# Timestamps
created_at = Column(DateTime, default=datetime.utcnow)
updated_at = Column(DateTime, default=datetime.utcnow, onupdate=datetime.utcnow)
# Relationship
topic = relationship("AlertTopicDB", back_populates="alerts")
# Composite Index für häufige Queries
__table_args__ = (
Index('ix_alert_items_topic_status', 'topic_id', 'status'),
Index('ix_alert_items_topic_decision', 'topic_id', 'relevance_decision'),
)
def __repr__(self):
return f"<AlertItem {self.id[:8]}: {self.title[:50]}... ({self.status.value})>"
class AlertRuleDB(Base):
"""
Filterregel für Alerts.
Definiert Bedingungen und Aktionen für automatische Verarbeitung.
"""
__tablename__ = 'alert_rules'
id = Column(String(36), primary_key=True, default=lambda: str(uuid.uuid4()))
topic_id = Column(String(36), ForeignKey('alert_topics.id', ondelete='CASCADE'), nullable=True, index=True)
user_id = Column(String(36), nullable=True, index=True)
# Rule-Details
name = Column(String(255), nullable=False)
description = Column(Text, default="")
# Bedingungen (als JSON)
# Format: [{"field": "title", "op": "contains", "value": "..."}]
conditions = Column(JSON, nullable=False, default=list)
# Aktion
action_type = Column(
SQLEnum(RuleActionEnum),
default=RuleActionEnum.KEEP,
nullable=False
)
action_config = Column(JSON, default=dict) # {"email": "x@y.z", "tags": [...]}
# Priorisierung (höher = wird zuerst ausgeführt)
priority = Column(Integer, default=0, index=True)
is_active = Column(Boolean, default=True, index=True)
# Statistiken
match_count = Column(Integer, default=0)
last_matched_at = Column(DateTime, nullable=True)
# Timestamps
created_at = Column(DateTime, default=datetime.utcnow)
updated_at = Column(DateTime, default=datetime.utcnow, onupdate=datetime.utcnow)
# Relationship
topic = relationship("AlertTopicDB", back_populates="rules")
def __repr__(self):
return f"<AlertRule {self.name} ({self.action_type.value})>"
class AlertProfileDB(Base):
"""
Nutzer-Profil für Relevanz-Scoring.
Speichert Prioritäten, Ausschlüsse und Lern-Beispiele.
"""
__tablename__ = 'alert_profiles'
id = Column(String(36), primary_key=True, default=lambda: str(uuid.uuid4()))
user_id = Column(String(36), unique=True, nullable=True, index=True)
# Name für Anzeige (falls mehrere Profile pro User)
name = Column(String(255), default="Default")
# Relevanz-Kriterien
# Format: [{"label": "Inklusion", "weight": 0.9, "keywords": [...], "description": "..."}]
priorities = Column(JSON, default=list)
# Ausschluss-Keywords
exclusions = Column(JSON, default=list) # ["Stellenanzeige", "Werbung"]
# Few-Shot Beispiele für LLM
# Format: [{"title": "...", "url": "...", "reason": "...", "added_at": "..."}]
positive_examples = Column(JSON, default=list)
negative_examples = Column(JSON, default=list)
# Policies
# Format: {"prefer_german_sources": true, "max_age_days": 30}
policies = Column(JSON, default=dict)
# Statistiken
total_scored = Column(Integer, default=0)
total_kept = Column(Integer, default=0)
total_dropped = Column(Integer, default=0)
accuracy_estimate = Column(Float, nullable=True)
# Timestamps
created_at = Column(DateTime, default=datetime.utcnow)
updated_at = Column(DateTime, default=datetime.utcnow, onupdate=datetime.utcnow)
def __repr__(self):
return f"<AlertProfile {self.name} (user={self.user_id})>"
def get_prompt_context(self) -> str:
"""
Generiere Kontext für LLM-Prompt.
Dieser Text wird in den System-Prompt des Relevanz-Scorers eingefügt.
"""
lines = ["## Relevanzprofil des Nutzers\n"]
# Prioritäten
if self.priorities:
lines.append("### Prioritäten (Themen von Interesse):")
for p in self.priorities:
weight = p.get("weight", 0.5)
weight_label = "Sehr wichtig" if weight > 0.7 else "Wichtig" if weight > 0.4 else "Interessant"
lines.append(f"- **{p.get('label', 'Unbenannt')}** ({weight_label})")
if p.get("description"):
lines.append(f" {p['description']}")
if p.get("keywords"):
lines.append(f" Keywords: {', '.join(p['keywords'])}")
lines.append("")
# Ausschlüsse
if self.exclusions:
lines.append("### Ausschlüsse (ignorieren):")
lines.append(f"Themen mit diesen Keywords: {', '.join(self.exclusions)}")
lines.append("")
# Positive Beispiele (letzte 5)
if self.positive_examples:
lines.append("### Beispiele für relevante Alerts:")
for ex in self.positive_examples[-5:]:
lines.append(f"- \"{ex.get('title', '')}\"")
if ex.get("reason"):
lines.append(f" Grund: {ex['reason']}")
lines.append("")
# Negative Beispiele (letzte 5)
if self.negative_examples:
lines.append("### Beispiele für irrelevante Alerts:")
for ex in self.negative_examples[-5:]:
lines.append(f"- \"{ex.get('title', '')}\"")
if ex.get("reason"):
lines.append(f" Grund: {ex['reason']}")
lines.append("")
# Policies
if self.policies:
lines.append("### Zusätzliche Regeln:")
for key, value in self.policies.items():
lines.append(f"- {key}: {value}")
return "\n".join(lines)
@classmethod
def create_default_education_profile(cls) -> "AlertProfileDB":
"""
Erstelle ein Standard-Profil für Bildungsthemen.
"""
return cls(
name="Bildung Default",
priorities=[
{
"label": "Inklusion",
"weight": 0.9,
"keywords": ["inklusiv", "Förderbedarf", "Behinderung", "Barrierefreiheit"],
"description": "Inklusive Bildung, Förderschulen, Nachteilsausgleich"
},
{
"label": "Datenschutz Schule",
"weight": 0.85,
"keywords": ["DSGVO", "Schülerfotos", "Einwilligung", "personenbezogene Daten"],
"description": "DSGVO in Schulen, Datenschutz bei Klassenfotos"
},
{
"label": "Schulrecht Bayern",
"weight": 0.8,
"keywords": ["BayEUG", "Schulordnung", "Kultusministerium", "Bayern"],
"description": "Bayerisches Schulrecht, Verordnungen"
},
{
"label": "Digitalisierung Schule",
"weight": 0.7,
"keywords": ["DigitalPakt", "Tablet-Klasse", "Lernplattform"],
"description": "Digitale Medien im Unterricht"
},
],
exclusions=["Stellenanzeige", "Praktikum gesucht", "Werbung", "Pressemitteilung"],
policies={
"prefer_german_sources": True,
"max_age_days": 30,
"min_content_length": 100,
}
)
# ============================================================================
# DUAL-MODE SYSTEM: Templates, Subscriptions, Sources, Digests
# ============================================================================
class AlertTemplateDB(Base):
"""
Vorkonfigurierte Alert-Templates (Playbooks).
Für Guided Mode: Lehrer wählen 1-3 Templates statt RSS-Feeds zu konfigurieren.
"""
__tablename__ = 'alert_templates'
id = Column(String(36), primary_key=True, default=lambda: str(uuid.uuid4()))
# Template-Identität
slug = Column(String(100), unique=True, nullable=False) # "foerderprogramme", "abitur-updates"
name = Column(String(255), nullable=False) # "Förderprogramme & Fristen"
description = Column(Text, default="") # B1/B2 Deutsch, 1-2 Sätze
icon = Column(String(50), default="") # Emoji: "💰", "📝", "⚖️"
category = Column(String(100), default="") # "administration", "teaching", "it"
# Zielgruppen (welche Rollen profitieren)
target_roles = Column(JSON, default=list) # ["schulleitung", "lehrkraft"]
# Template-Konfiguration
topics_config = Column(JSON, default=list) # Vorkonfigurierte RSS-Feeds
rules_config = Column(JSON, default=list) # Vorkonfigurierte Regeln
profile_config = Column(JSON, default=dict) # Prioritäten/Ausschlüsse
# Importance-Mapping (Score → 5 Stufen)
importance_config = Column(JSON, default=dict) # {"critical": 0.90, "urgent": 0.75, ...}
# Ausgabe-Einstellungen
max_cards_per_day = Column(Integer, default=10)
digest_enabled = Column(Boolean, default=True)
digest_day = Column(String(20), default="monday") # Tag für wöchentlichen Digest
# Lokalisierung
language = Column(String(10), default="de")
# Metadata
is_active = Column(Boolean, default=True)
is_premium = Column(Boolean, default=False) # Für kostenpflichtige Templates
sort_order = Column(Integer, default=0)
# Timestamps
created_at = Column(DateTime, default=datetime.utcnow)
updated_at = Column(DateTime, default=datetime.utcnow, onupdate=datetime.utcnow)
# Relationships
subscriptions = relationship("UserAlertSubscriptionDB", back_populates="template")
def __repr__(self):
return f"<AlertTemplate {self.slug}: {self.name}>"
class AlertSourceDB(Base):
"""
Alert-Quelle für Migration bestehender Alerts.
Unterstützt: E-Mail-Weiterleitung, RSS-Import, Rekonstruktion.
"""
__tablename__ = 'alert_sources'
id = Column(String(36), primary_key=True, default=lambda: str(uuid.uuid4()))
tenant_id = Column(String(36), nullable=True, index=True) # Für Multi-Tenant
user_id = Column(String(36), nullable=True, index=True)
# Quellen-Typ
source_type = Column(
SQLEnum(FeedTypeEnum),
default=FeedTypeEnum.RSS,
nullable=False
)
# Original-Bezeichnung (vom Kunden)
original_label = Column(String(255), nullable=True) # "EU IT Ausschreibungen"
# E-Mail-Weiterleitung
inbound_address = Column(String(255), nullable=True, unique=True) # alerts+tenant123@breakpilot.app
# RSS-Import
rss_url = Column(String(2000), nullable=True)
# Migration-Modus
migration_mode = Column(
SQLEnum(MigrationModeEnum),
default=MigrationModeEnum.IMPORT,
nullable=False
)
# Verknüpfung zu erstelltem Topic
topic_id = Column(String(36), ForeignKey('alert_topics.id', ondelete='SET NULL'), nullable=True)
# Status
is_active = Column(Boolean, default=True)
items_received = Column(Integer, default=0)
last_item_at = Column(DateTime, nullable=True)
# Timestamps
created_at = Column(DateTime, default=datetime.utcnow)
updated_at = Column(DateTime, default=datetime.utcnow, onupdate=datetime.utcnow)
def __repr__(self):
return f"<AlertSource {self.source_type.value}: {self.original_label}>"
class UserAlertSubscriptionDB(Base):
"""
User-Subscription für Alert-Templates oder Expert-Profile.
Speichert Modus-Wahl, Template-Verknüpfung und Wizard-Zustand.
"""
__tablename__ = 'user_alert_subscriptions'
id = Column(String(36), primary_key=True, default=lambda: str(uuid.uuid4()))
user_id = Column(String(36), nullable=False, index=True)
school_id = Column(String(36), nullable=True, index=True) # Optional: Schulkontext
# Modus-Auswahl
mode = Column(
SQLEnum(AlertModeEnum),
default=AlertModeEnum.GUIDED,
nullable=False
)
# Nutzer-Rolle (für Guided Mode)
user_role = Column(
SQLEnum(UserRoleEnum),
nullable=True
)
# Template-Verknüpfung (Guided Mode) - kann mehrere sein
template_id = Column(String(36), ForeignKey('alert_templates.id', ondelete='SET NULL'), nullable=True)
selected_template_ids = Column(JSON, default=list) # Bis zu 3 Templates
# Profil-Verknüpfung (Expert Mode)
profile_id = Column(String(36), ForeignKey('alert_profiles.id', ondelete='SET NULL'), nullable=True)
# Subscription-Einstellungen
is_active = Column(Boolean, default=True)
notification_email = Column(String(255), nullable=True)
# Digest-Präferenzen
digest_enabled = Column(Boolean, default=True)
digest_frequency = Column(String(20), default="weekly") # weekly, daily
digest_day = Column(String(20), default="monday")
last_digest_sent_at = Column(DateTime, nullable=True)
# Wizard-Zustand (für unvollständige Setups)
wizard_step = Column(Integer, default=0)
wizard_completed = Column(Boolean, default=False)
wizard_state = Column(JSON, default=dict) # Zwischenspeicher für Wizard-Daten
# Timestamps
created_at = Column(DateTime, default=datetime.utcnow)
updated_at = Column(DateTime, default=datetime.utcnow, onupdate=datetime.utcnow)
# Relationships
template = relationship("AlertTemplateDB", back_populates="subscriptions")
profile = relationship("AlertProfileDB")
digests = relationship("AlertDigestDB", back_populates="subscription", cascade="all, delete-orphan")
def __repr__(self):
return f"<UserAlertSubscription {self.user_id} ({self.mode.value})>"
class AlertDigestDB(Base):
"""
Wöchentliche Digest-Zusammenfassung.
Enthält gerenderte Zusammenfassung + Statistiken.
"""
__tablename__ = 'alert_digests'
id = Column(String(36), primary_key=True, default=lambda: str(uuid.uuid4()))
subscription_id = Column(String(36), ForeignKey('user_alert_subscriptions.id', ondelete='CASCADE'), nullable=False, index=True)
user_id = Column(String(36), nullable=False, index=True)
# Zeitraum
period_start = Column(DateTime, nullable=False)
period_end = Column(DateTime, nullable=False)
# Content
title = Column(String(255), default="") # "KW 3/2026 - Ihre Bildungs-Alerts"
summary_html = Column(Text, default="") # Gerenderte HTML-Zusammenfassung
summary_pdf_url = Column(String(500), nullable=True) # Link zum PDF-Export
# Statistiken
total_alerts = Column(Integer, default=0)
kritisch_count = Column(Integer, default=0)
dringend_count = Column(Integer, default=0)
wichtig_count = Column(Integer, default=0)
pruefen_count = Column(Integer, default=0)
info_count = Column(Integer, default=0)
# Enthaltene Alert-IDs
alert_ids = Column(JSON, default=list)
# Status
status = Column(
SQLEnum(DigestStatusEnum),
default=DigestStatusEnum.PENDING,
nullable=False
)
sent_at = Column(DateTime, nullable=True)
error_message = Column(Text, nullable=True)
# Timestamps
created_at = Column(DateTime, default=datetime.utcnow)
# Relationships
subscription = relationship("UserAlertSubscriptionDB", back_populates="digests")
def __repr__(self):
return f"<AlertDigest {self.title} ({self.status.value})>"

View File

@@ -0,0 +1,992 @@
"""
Repository für Alerts Agent - CRUD Operationen für Topics, Items, Rules und Profile.
Abstraktion der Datenbank-Operationen.
"""
import hashlib
from datetime import datetime
from typing import Optional, List, Dict, Any
from sqlalchemy.orm import Session as DBSession
from sqlalchemy.orm.attributes import flag_modified
from sqlalchemy import or_, and_, func
from .models import (
AlertTopicDB, AlertItemDB, AlertRuleDB, AlertProfileDB,
AlertSourceEnum, AlertStatusEnum, RelevanceDecisionEnum,
FeedTypeEnum, RuleActionEnum
)
# =============================================================================
# TOPIC REPOSITORY
# =============================================================================
class TopicRepository:
"""Repository für Alert Topics (Feed-Quellen)."""
def __init__(self, db: DBSession):
self.db = db
# ==================== CREATE ====================
def create(
self,
name: str,
feed_url: str = None,
feed_type: str = "rss",
user_id: str = None,
description: str = "",
fetch_interval_minutes: int = 60,
is_active: bool = True,
) -> AlertTopicDB:
"""Erstellt ein neues Topic."""
import uuid
topic = AlertTopicDB(
id=str(uuid.uuid4()),
user_id=user_id,
name=name,
description=description,
feed_url=feed_url,
feed_type=FeedTypeEnum(feed_type),
fetch_interval_minutes=fetch_interval_minutes,
is_active=is_active,
)
self.db.add(topic)
self.db.commit()
self.db.refresh(topic)
return topic
# ==================== READ ====================
def get_by_id(self, topic_id: str) -> Optional[AlertTopicDB]:
"""Holt ein Topic nach ID."""
return self.db.query(AlertTopicDB).filter(
AlertTopicDB.id == topic_id
).first()
def get_all(
self,
user_id: str = None,
is_active: bool = None,
limit: int = 100,
offset: int = 0,
) -> List[AlertTopicDB]:
"""Holt alle Topics mit optionalen Filtern."""
query = self.db.query(AlertTopicDB)
if user_id:
query = query.filter(AlertTopicDB.user_id == user_id)
if is_active is not None:
query = query.filter(AlertTopicDB.is_active == is_active)
return query.order_by(
AlertTopicDB.created_at.desc()
).offset(offset).limit(limit).all()
def get_active_for_fetch(self) -> List[AlertTopicDB]:
"""Holt alle aktiven Topics die gefetcht werden sollten."""
# Topics wo fetch_interval_minutes vergangen ist
return self.db.query(AlertTopicDB).filter(
AlertTopicDB.is_active == True,
AlertTopicDB.feed_url.isnot(None),
).all()
# ==================== UPDATE ====================
def update(
self,
topic_id: str,
name: str = None,
description: str = None,
feed_url: str = None,
feed_type: str = None,
is_active: bool = None,
fetch_interval_minutes: int = None,
) -> Optional[AlertTopicDB]:
"""Aktualisiert ein Topic."""
topic = self.get_by_id(topic_id)
if not topic:
return None
if name is not None:
topic.name = name
if description is not None:
topic.description = description
if feed_url is not None:
topic.feed_url = feed_url
if feed_type is not None:
topic.feed_type = FeedTypeEnum(feed_type)
if is_active is not None:
topic.is_active = is_active
if fetch_interval_minutes is not None:
topic.fetch_interval_minutes = fetch_interval_minutes
self.db.commit()
self.db.refresh(topic)
return topic
def update_fetch_status(
self,
topic_id: str,
last_fetch_error: str = None,
items_fetched: int = 0,
) -> Optional[AlertTopicDB]:
"""Aktualisiert den Fetch-Status eines Topics."""
topic = self.get_by_id(topic_id)
if not topic:
return None
topic.last_fetched_at = datetime.utcnow()
topic.last_fetch_error = last_fetch_error
topic.total_items_fetched += items_fetched
self.db.commit()
self.db.refresh(topic)
return topic
def increment_stats(
self,
topic_id: str,
kept: int = 0,
dropped: int = 0,
) -> Optional[AlertTopicDB]:
"""Erhöht die Statistiken eines Topics."""
topic = self.get_by_id(topic_id)
if not topic:
return None
topic.items_kept += kept
topic.items_dropped += dropped
self.db.commit()
self.db.refresh(topic)
return topic
# ==================== DELETE ====================
def delete(self, topic_id: str) -> bool:
"""Löscht ein Topic (und alle zugehörigen Items via CASCADE)."""
topic = self.get_by_id(topic_id)
if not topic:
return False
self.db.delete(topic)
self.db.commit()
return True
# ==================== CONVERSION ====================
def to_dict(self, topic: AlertTopicDB) -> Dict[str, Any]:
"""Konvertiert DB-Model zu Dictionary."""
return {
"id": topic.id,
"user_id": topic.user_id,
"name": topic.name,
"description": topic.description,
"feed_url": topic.feed_url,
"feed_type": topic.feed_type.value,
"is_active": topic.is_active,
"fetch_interval_minutes": topic.fetch_interval_minutes,
"last_fetched_at": topic.last_fetched_at.isoformat() if topic.last_fetched_at else None,
"last_fetch_error": topic.last_fetch_error,
"stats": {
"total_items_fetched": topic.total_items_fetched,
"items_kept": topic.items_kept,
"items_dropped": topic.items_dropped,
},
"created_at": topic.created_at.isoformat() if topic.created_at else None,
"updated_at": topic.updated_at.isoformat() if topic.updated_at else None,
}
# =============================================================================
# ALERT ITEM REPOSITORY
# =============================================================================
class AlertItemRepository:
"""Repository für Alert Items (einzelne Alerts/Artikel)."""
def __init__(self, db: DBSession):
self.db = db
# ==================== CREATE ====================
def create(
self,
topic_id: str,
title: str,
url: str,
snippet: str = "",
source: str = "google_alerts_rss",
published_at: datetime = None,
lang: str = "de",
) -> AlertItemDB:
"""Erstellt einen neuen Alert."""
import uuid
# URL-Hash berechnen
url_hash = self._compute_url_hash(url)
alert = AlertItemDB(
id=str(uuid.uuid4()),
topic_id=topic_id,
title=title,
url=url,
snippet=snippet,
source=AlertSourceEnum(source),
published_at=published_at,
lang=lang,
url_hash=url_hash,
canonical_url=self._normalize_url(url),
)
self.db.add(alert)
self.db.commit()
self.db.refresh(alert)
return alert
def create_if_not_exists(
self,
topic_id: str,
title: str,
url: str,
snippet: str = "",
source: str = "google_alerts_rss",
published_at: datetime = None,
) -> Optional[AlertItemDB]:
"""Erstellt einen Alert nur wenn URL noch nicht existiert."""
url_hash = self._compute_url_hash(url)
existing = self.db.query(AlertItemDB).filter(
AlertItemDB.url_hash == url_hash
).first()
if existing:
return None # Duplikat
return self.create(
topic_id=topic_id,
title=title,
url=url,
snippet=snippet,
source=source,
published_at=published_at,
)
# ==================== READ ====================
def get_by_id(self, alert_id: str) -> Optional[AlertItemDB]:
"""Holt einen Alert nach ID."""
return self.db.query(AlertItemDB).filter(
AlertItemDB.id == alert_id
).first()
def get_by_url_hash(self, url_hash: str) -> Optional[AlertItemDB]:
"""Holt einen Alert nach URL-Hash."""
return self.db.query(AlertItemDB).filter(
AlertItemDB.url_hash == url_hash
).first()
def get_inbox(
self,
user_id: str = None,
topic_id: str = None,
decision: str = None,
status: str = None,
limit: int = 50,
offset: int = 0,
) -> List[AlertItemDB]:
"""
Holt Inbox-Items mit Filtern.
Ohne decision werden KEEP und REVIEW angezeigt.
"""
query = self.db.query(AlertItemDB)
if topic_id:
query = query.filter(AlertItemDB.topic_id == topic_id)
if decision:
query = query.filter(
AlertItemDB.relevance_decision == RelevanceDecisionEnum(decision)
)
else:
# Default: KEEP und REVIEW
query = query.filter(
or_(
AlertItemDB.relevance_decision == RelevanceDecisionEnum.KEEP,
AlertItemDB.relevance_decision == RelevanceDecisionEnum.REVIEW,
AlertItemDB.relevance_decision.is_(None)
)
)
if status:
query = query.filter(AlertItemDB.status == AlertStatusEnum(status))
return query.order_by(
AlertItemDB.relevance_score.desc().nullslast(),
AlertItemDB.fetched_at.desc()
).offset(offset).limit(limit).all()
def get_unscored(
self,
topic_id: str = None,
limit: int = 100,
) -> List[AlertItemDB]:
"""Holt alle unbewerteten Alerts."""
query = self.db.query(AlertItemDB).filter(
AlertItemDB.status == AlertStatusEnum.NEW
)
if topic_id:
query = query.filter(AlertItemDB.topic_id == topic_id)
return query.order_by(AlertItemDB.fetched_at.desc()).limit(limit).all()
def get_by_topic(
self,
topic_id: str,
limit: int = 100,
offset: int = 0,
) -> List[AlertItemDB]:
"""Holt alle Alerts eines Topics."""
return self.db.query(AlertItemDB).filter(
AlertItemDB.topic_id == topic_id
).order_by(
AlertItemDB.fetched_at.desc()
).offset(offset).limit(limit).all()
def count_by_status(self, topic_id: str = None) -> Dict[str, int]:
"""Zählt Alerts nach Status."""
query = self.db.query(
AlertItemDB.status,
func.count(AlertItemDB.id).label('count')
)
if topic_id:
query = query.filter(AlertItemDB.topic_id == topic_id)
results = query.group_by(AlertItemDB.status).all()
return {r[0].value: r[1] for r in results}
def count_by_decision(self, topic_id: str = None) -> Dict[str, int]:
"""Zählt Alerts nach Relevanz-Entscheidung."""
query = self.db.query(
AlertItemDB.relevance_decision,
func.count(AlertItemDB.id).label('count')
)
if topic_id:
query = query.filter(AlertItemDB.topic_id == topic_id)
results = query.group_by(AlertItemDB.relevance_decision).all()
return {
(r[0].value if r[0] else "unscored"): r[1]
for r in results
}
# ==================== UPDATE ====================
def update_scoring(
self,
alert_id: str,
score: float,
decision: str,
reasons: List[str] = None,
summary: str = None,
model: str = None,
) -> Optional[AlertItemDB]:
"""Aktualisiert das Scoring eines Alerts."""
alert = self.get_by_id(alert_id)
if not alert:
return None
alert.relevance_score = score
alert.relevance_decision = RelevanceDecisionEnum(decision)
alert.relevance_reasons = reasons or []
alert.relevance_summary = summary
alert.scored_by_model = model
alert.scored_at = datetime.utcnow()
alert.status = AlertStatusEnum.SCORED
alert.processed_at = datetime.utcnow()
self.db.commit()
self.db.refresh(alert)
return alert
def update_status(
self,
alert_id: str,
status: str,
) -> Optional[AlertItemDB]:
"""Aktualisiert den Status eines Alerts."""
alert = self.get_by_id(alert_id)
if not alert:
return None
alert.status = AlertStatusEnum(status)
self.db.commit()
self.db.refresh(alert)
return alert
def mark_reviewed(
self,
alert_id: str,
is_relevant: bool,
notes: str = None,
tags: List[str] = None,
) -> Optional[AlertItemDB]:
"""Markiert einen Alert als reviewed mit Feedback."""
alert = self.get_by_id(alert_id)
if not alert:
return None
alert.status = AlertStatusEnum.REVIEWED
alert.user_marked_relevant = is_relevant
if notes:
alert.user_notes = notes
if tags:
alert.user_tags = tags
self.db.commit()
self.db.refresh(alert)
return alert
def archive(self, alert_id: str) -> Optional[AlertItemDB]:
"""Archiviert einen Alert."""
return self.update_status(alert_id, "archived")
# ==================== DELETE ====================
def delete(self, alert_id: str) -> bool:
"""Löscht einen Alert."""
alert = self.get_by_id(alert_id)
if not alert:
return False
self.db.delete(alert)
self.db.commit()
return True
def delete_old(self, days: int = 90, topic_id: str = None) -> int:
"""Löscht alte archivierte Alerts."""
from datetime import timedelta
cutoff = datetime.utcnow() - timedelta(days=days)
query = self.db.query(AlertItemDB).filter(
AlertItemDB.status == AlertStatusEnum.ARCHIVED,
AlertItemDB.fetched_at < cutoff,
)
if topic_id:
query = query.filter(AlertItemDB.topic_id == topic_id)
count = query.delete()
self.db.commit()
return count
# ==================== FOR RSS FETCHER ====================
def get_existing_urls(self, topic_id: str) -> set:
"""
Holt alle bekannten URL-Hashes für ein Topic.
Wird vom RSS-Fetcher verwendet um Duplikate zu vermeiden.
"""
results = self.db.query(AlertItemDB.url_hash).filter(
AlertItemDB.topic_id == topic_id
).all()
return {r[0] for r in results if r[0]}
def create_from_alert_item(self, alert_item, topic_id: str) -> AlertItemDB:
"""
Erstellt einen Alert aus einem AlertItem-Objekt vom RSS-Fetcher.
Args:
alert_item: AlertItem from rss_fetcher
topic_id: Topic ID to associate with
Returns:
Created AlertItemDB instance
"""
return self.create(
topic_id=topic_id,
title=alert_item.title,
url=alert_item.url,
snippet=alert_item.snippet or "",
source=alert_item.source.value if hasattr(alert_item.source, 'value') else str(alert_item.source),
published_at=alert_item.published_at,
)
# ==================== HELPER ====================
def _compute_url_hash(self, url: str) -> str:
"""Berechnet SHA256 Hash der normalisierten URL."""
normalized = self._normalize_url(url)
return hashlib.sha256(normalized.encode()).hexdigest()[:16]
def _normalize_url(self, url: str) -> str:
"""Normalisiert URL für Deduplizierung."""
import urllib.parse
parsed = urllib.parse.urlparse(url)
# Tracking-Parameter entfernen
tracking_params = {
"utm_source", "utm_medium", "utm_campaign", "utm_content", "utm_term",
"fbclid", "gclid", "ref", "source"
}
query_params = urllib.parse.parse_qs(parsed.query)
cleaned_params = {k: v for k, v in query_params.items()
if k.lower() not in tracking_params}
cleaned_query = urllib.parse.urlencode(cleaned_params, doseq=True)
# Rekonstruiere URL ohne Fragment
normalized = urllib.parse.urlunparse((
parsed.scheme,
parsed.netloc.lower(),
parsed.path.rstrip("/"),
parsed.params,
cleaned_query,
"" # No fragment
))
return normalized
# ==================== CONVERSION ====================
def to_dict(self, alert: AlertItemDB) -> Dict[str, Any]:
"""Konvertiert DB-Model zu Dictionary."""
return {
"id": alert.id,
"topic_id": alert.topic_id,
"title": alert.title,
"url": alert.url,
"snippet": alert.snippet,
"source": alert.source.value,
"lang": alert.lang,
"published_at": alert.published_at.isoformat() if alert.published_at else None,
"fetched_at": alert.fetched_at.isoformat() if alert.fetched_at else None,
"status": alert.status.value,
"relevance": {
"score": alert.relevance_score,
"decision": alert.relevance_decision.value if alert.relevance_decision else None,
"reasons": alert.relevance_reasons,
"summary": alert.relevance_summary,
"model": alert.scored_by_model,
"scored_at": alert.scored_at.isoformat() if alert.scored_at else None,
},
"user_feedback": {
"marked_relevant": alert.user_marked_relevant,
"tags": alert.user_tags,
"notes": alert.user_notes,
},
}
# =============================================================================
# ALERT RULE REPOSITORY
# =============================================================================
class RuleRepository:
"""Repository für Alert Rules (Filterregeln)."""
def __init__(self, db: DBSession):
self.db = db
# ==================== CREATE ====================
def create(
self,
name: str,
conditions: List[Dict],
action_type: str = "keep",
action_config: Dict = None,
topic_id: str = None,
user_id: str = None,
description: str = "",
priority: int = 0,
) -> AlertRuleDB:
"""Erstellt eine neue Regel."""
import uuid
rule = AlertRuleDB(
id=str(uuid.uuid4()),
topic_id=topic_id,
user_id=user_id,
name=name,
description=description,
conditions=conditions,
action_type=RuleActionEnum(action_type),
action_config=action_config or {},
priority=priority,
)
self.db.add(rule)
self.db.commit()
self.db.refresh(rule)
return rule
# ==================== READ ====================
def get_by_id(self, rule_id: str) -> Optional[AlertRuleDB]:
"""Holt eine Regel nach ID."""
return self.db.query(AlertRuleDB).filter(
AlertRuleDB.id == rule_id
).first()
def get_active(
self,
topic_id: str = None,
user_id: str = None,
) -> List[AlertRuleDB]:
"""Holt alle aktiven Regeln, sortiert nach Priorität."""
query = self.db.query(AlertRuleDB).filter(
AlertRuleDB.is_active == True
)
if topic_id:
# Topic-spezifische und globale Regeln
query = query.filter(
or_(
AlertRuleDB.topic_id == topic_id,
AlertRuleDB.topic_id.is_(None)
)
)
if user_id:
query = query.filter(
or_(
AlertRuleDB.user_id == user_id,
AlertRuleDB.user_id.is_(None)
)
)
return query.order_by(AlertRuleDB.priority.desc()).all()
def get_all(
self,
user_id: str = None,
topic_id: str = None,
is_active: bool = None,
) -> List[AlertRuleDB]:
"""Holt alle Regeln mit optionalen Filtern."""
query = self.db.query(AlertRuleDB)
if user_id:
query = query.filter(AlertRuleDB.user_id == user_id)
if topic_id:
query = query.filter(AlertRuleDB.topic_id == topic_id)
if is_active is not None:
query = query.filter(AlertRuleDB.is_active == is_active)
return query.order_by(AlertRuleDB.priority.desc()).all()
# ==================== UPDATE ====================
def update(
self,
rule_id: str,
name: str = None,
description: str = None,
conditions: List[Dict] = None,
action_type: str = None,
action_config: Dict = None,
priority: int = None,
is_active: bool = None,
) -> Optional[AlertRuleDB]:
"""Aktualisiert eine Regel."""
rule = self.get_by_id(rule_id)
if not rule:
return None
if name is not None:
rule.name = name
if description is not None:
rule.description = description
if conditions is not None:
rule.conditions = conditions
if action_type is not None:
rule.action_type = RuleActionEnum(action_type)
if action_config is not None:
rule.action_config = action_config
if priority is not None:
rule.priority = priority
if is_active is not None:
rule.is_active = is_active
self.db.commit()
self.db.refresh(rule)
return rule
def increment_match_count(self, rule_id: str) -> Optional[AlertRuleDB]:
"""Erhöht den Match-Counter einer Regel."""
rule = self.get_by_id(rule_id)
if not rule:
return None
rule.match_count += 1
rule.last_matched_at = datetime.utcnow()
self.db.commit()
self.db.refresh(rule)
return rule
# ==================== DELETE ====================
def delete(self, rule_id: str) -> bool:
"""Löscht eine Regel."""
rule = self.get_by_id(rule_id)
if not rule:
return False
self.db.delete(rule)
self.db.commit()
return True
# ==================== CONVERSION ====================
def to_dict(self, rule: AlertRuleDB) -> Dict[str, Any]:
"""Konvertiert DB-Model zu Dictionary."""
return {
"id": rule.id,
"topic_id": rule.topic_id,
"user_id": rule.user_id,
"name": rule.name,
"description": rule.description,
"conditions": rule.conditions,
"action_type": rule.action_type.value,
"action_config": rule.action_config,
"priority": rule.priority,
"is_active": rule.is_active,
"stats": {
"match_count": rule.match_count,
"last_matched_at": rule.last_matched_at.isoformat() if rule.last_matched_at else None,
},
"created_at": rule.created_at.isoformat() if rule.created_at else None,
"updated_at": rule.updated_at.isoformat() if rule.updated_at else None,
}
# =============================================================================
# ALERT PROFILE REPOSITORY
# =============================================================================
class ProfileRepository:
"""Repository für Alert Profiles (Nutzer-Profile für Relevanz-Scoring)."""
def __init__(self, db: DBSession):
self.db = db
# ==================== CREATE / GET-OR-CREATE ====================
def get_or_create(self, user_id: str = None) -> AlertProfileDB:
"""Holt oder erstellt ein Profil."""
profile = self.get_by_user_id(user_id)
if profile:
return profile
# Neues Profil erstellen
import uuid
profile = AlertProfileDB(
id=str(uuid.uuid4()),
user_id=user_id,
name="Default" if not user_id else f"Profile {user_id[:8]}",
)
self.db.add(profile)
self.db.commit()
self.db.refresh(profile)
return profile
def create_default_education_profile(self, user_id: str = None) -> AlertProfileDB:
"""Erstellt ein Standard-Profil für Bildungsthemen."""
import uuid
profile = AlertProfileDB(
id=str(uuid.uuid4()),
user_id=user_id,
name="Bildung Default",
priorities=[
{
"label": "Inklusion",
"weight": 0.9,
"keywords": ["inklusiv", "Förderbedarf", "Behinderung", "Barrierefreiheit"],
"description": "Inklusive Bildung, Förderschulen, Nachteilsausgleich"
},
{
"label": "Datenschutz Schule",
"weight": 0.85,
"keywords": ["DSGVO", "Schülerfotos", "Einwilligung", "personenbezogene Daten"],
"description": "DSGVO in Schulen, Datenschutz bei Klassenfotos"
},
{
"label": "Schulrecht Bayern",
"weight": 0.8,
"keywords": ["BayEUG", "Schulordnung", "Kultusministerium", "Bayern"],
"description": "Bayerisches Schulrecht, Verordnungen"
},
{
"label": "Digitalisierung Schule",
"weight": 0.7,
"keywords": ["DigitalPakt", "Tablet-Klasse", "Lernplattform"],
"description": "Digitale Medien im Unterricht"
},
],
exclusions=["Stellenanzeige", "Praktikum gesucht", "Werbung", "Pressemitteilung"],
policies={
"prefer_german_sources": True,
"max_age_days": 30,
"min_content_length": 100,
}
)
self.db.add(profile)
self.db.commit()
self.db.refresh(profile)
return profile
# ==================== READ ====================
def get_by_id(self, profile_id: str) -> Optional[AlertProfileDB]:
"""Holt ein Profil nach ID."""
return self.db.query(AlertProfileDB).filter(
AlertProfileDB.id == profile_id
).first()
def get_by_user_id(self, user_id: str) -> Optional[AlertProfileDB]:
"""Holt ein Profil nach User-ID."""
if not user_id:
# Default-Profil ohne User
return self.db.query(AlertProfileDB).filter(
AlertProfileDB.user_id.is_(None)
).first()
return self.db.query(AlertProfileDB).filter(
AlertProfileDB.user_id == user_id
).first()
# ==================== UPDATE ====================
def update_priorities(
self,
profile_id: str,
priorities: List[Dict],
) -> Optional[AlertProfileDB]:
"""Aktualisiert die Prioritäten eines Profils."""
profile = self.get_by_id(profile_id)
if not profile:
return None
profile.priorities = priorities
self.db.commit()
self.db.refresh(profile)
return profile
def update_exclusions(
self,
profile_id: str,
exclusions: List[str],
) -> Optional[AlertProfileDB]:
"""Aktualisiert die Ausschlüsse eines Profils."""
profile = self.get_by_id(profile_id)
if not profile:
return None
profile.exclusions = exclusions
self.db.commit()
self.db.refresh(profile)
return profile
def add_feedback(
self,
profile_id: str,
title: str,
url: str,
is_relevant: bool,
reason: str = "",
) -> Optional[AlertProfileDB]:
"""Fügt Feedback als Beispiel hinzu."""
profile = self.get_by_id(profile_id)
if not profile:
return None
example = {
"title": title,
"url": url,
"reason": reason,
"added_at": datetime.utcnow().isoformat(),
}
if is_relevant:
examples = list(profile.positive_examples or [])
examples.append(example)
profile.positive_examples = examples[-20:] # Max 20
profile.total_kept += 1
flag_modified(profile, "positive_examples")
else:
examples = list(profile.negative_examples or [])
examples.append(example)
profile.negative_examples = examples[-20:] # Max 20
profile.total_dropped += 1
flag_modified(profile, "negative_examples")
profile.total_scored += 1
self.db.commit()
self.db.refresh(profile)
return profile
def update_stats(
self,
profile_id: str,
kept: int = 0,
dropped: int = 0,
) -> Optional[AlertProfileDB]:
"""Aktualisiert die Statistiken eines Profils."""
profile = self.get_by_id(profile_id)
if not profile:
return None
profile.total_scored += kept + dropped
profile.total_kept += kept
profile.total_dropped += dropped
self.db.commit()
self.db.refresh(profile)
return profile
# ==================== DELETE ====================
def delete(self, profile_id: str) -> bool:
"""Löscht ein Profil."""
profile = self.get_by_id(profile_id)
if not profile:
return False
self.db.delete(profile)
self.db.commit()
return True
# ==================== CONVERSION ====================
def to_dict(self, profile: AlertProfileDB) -> Dict[str, Any]:
"""Konvertiert DB-Model zu Dictionary."""
return {
"id": profile.id,
"user_id": profile.user_id,
"name": profile.name,
"priorities": profile.priorities,
"exclusions": profile.exclusions,
"policies": profile.policies,
"examples": {
"positive": len(profile.positive_examples or []),
"negative": len(profile.negative_examples or []),
},
"stats": {
"total_scored": profile.total_scored,
"total_kept": profile.total_kept,
"total_dropped": profile.total_dropped,
"accuracy_estimate": profile.accuracy_estimate,
},
"created_at": profile.created_at.isoformat() if profile.created_at else None,
"updated_at": profile.updated_at.isoformat() if profile.updated_at else None,
}

View File

@@ -0,0 +1,8 @@
"""Alert Ingestion Modules."""
from .rss_fetcher import RSSFetcher, FeedConfig
__all__ = [
"RSSFetcher",
"FeedConfig",
]

View File

@@ -0,0 +1,356 @@
"""
Email Parser für Google Alerts.
Parst Google Alert E-Mails und extrahiert Alert-Items.
Google Alert E-Mail Format:
- Subject: Google Alert - <Suchbegriff>
- Body enthält HTML mit Links zu Artikeln
- Jeder Artikel hat: Titel, URL, Snippet, Quelle
"""
import re
import logging
from dataclasses import dataclass
from datetime import datetime
from typing import List, Optional, Dict, Any
from html import unescape
from urllib.parse import urlparse, parse_qs, unquote
from email import message_from_bytes, message_from_string
from email.message import EmailMessage
from bs4 import BeautifulSoup
logger = logging.getLogger(__name__)
@dataclass
class ParsedAlertEmail:
"""Ergebnis eines geparsten Google Alert E-Mails."""
search_term: str
items: List[Dict[str, Any]]
received_at: datetime
message_id: Optional[str] = None
def extract_real_url(google_redirect_url: str) -> str:
"""
Extrahiert die echte URL aus einem Google Redirect-Link.
Google Alert Links haben das Format:
https://www.google.com/url?rct=j&sa=t&url=<ENCODED_URL>&...
Args:
google_redirect_url: Google Redirect URL
Returns:
Echte Ziel-URL
"""
if "google.com/url" in google_redirect_url:
parsed = urlparse(google_redirect_url)
params = parse_qs(parsed.query)
if "url" in params:
return unquote(params["url"][0])
return google_redirect_url
def clean_text(text: str) -> str:
"""Bereinigt Text von HTML-Entities und überschüssigem Whitespace."""
if not text:
return ""
# HTML-Entities dekodieren
text = unescape(text)
# Überschüssigen Whitespace entfernen
text = re.sub(r'\s+', ' ', text)
return text.strip()
def parse_google_alert_html(html_content: str) -> List[Dict[str, Any]]:
"""
Parst den HTML-Body einer Google Alert E-Mail.
Args:
html_content: HTML-Inhalt der E-Mail
Returns:
Liste von Alert-Items mit title, url, snippet, source
"""
items = []
try:
soup = BeautifulSoup(html_content, 'html.parser')
# Google Alerts verwendet verschiedene Formate
# Format 1: Tabellen-basiert (älteres Format)
for table in soup.find_all('table'):
# Suche nach Links in der Tabelle
for link in table.find_all('a', href=True):
href = link.get('href', '')
# Nur Google-Redirect-Links (echte Alert-Links)
if 'google.com/url' not in href:
continue
real_url = extract_real_url(href)
# Titel ist der Link-Text
title = clean_text(link.get_text())
if not title or len(title) < 5:
continue
# Snippet: Text nach dem Link in der gleichen Zelle
parent = link.find_parent('td') or link.find_parent('div')
snippet = ""
if parent:
# Text nach dem Link extrahieren
full_text = clean_text(parent.get_text())
if title in full_text:
snippet = full_text.replace(title, '').strip()
# Ersten 300 Zeichen als Snippet
snippet = snippet[:300]
# Quelle extrahieren (Domain)
source_domain = urlparse(real_url).netloc
items.append({
"title": title,
"url": real_url,
"snippet": snippet,
"source": source_domain,
})
# Format 2: Div-basiert (neueres Format)
if not items:
for div in soup.find_all('div', class_=re.compile(r'.*')):
for link in div.find_all('a', href=True):
href = link.get('href', '')
if 'google.com/url' not in href:
continue
real_url = extract_real_url(href)
title = clean_text(link.get_text())
if not title or len(title) < 5:
continue
# Duplikate vermeiden
if any(i['url'] == real_url for i in items):
continue
source_domain = urlparse(real_url).netloc
items.append({
"title": title,
"url": real_url,
"snippet": "",
"source": source_domain,
})
except Exception as e:
logger.error(f"Error parsing Google Alert HTML: {e}")
return items
def parse_email_message(
email_bytes: bytes = None,
email_string: str = None,
) -> Optional[ParsedAlertEmail]:
"""
Parst eine E-Mail-Nachricht (Google Alert Format).
Args:
email_bytes: Raw E-Mail als Bytes
email_string: E-Mail als String
Returns:
ParsedAlertEmail oder None bei Fehler
"""
try:
if email_bytes:
msg = message_from_bytes(email_bytes)
elif email_string:
msg = message_from_string(email_string)
else:
return None
# Prüfen ob es eine Google Alert E-Mail ist
subject = msg.get('Subject', '')
if 'Google Alert' not in subject:
logger.debug(f"Not a Google Alert email: {subject}")
return None
# Suchbegriff aus Subject extrahieren
# Format: "Google Alert - <Suchbegriff>"
search_term = ""
if ' - ' in subject:
search_term = subject.split(' - ', 1)[1].strip()
# Message-ID
message_id = msg.get('Message-ID', '')
# Empfangsdatum
date_str = msg.get('Date', '')
received_at = datetime.utcnow() # Fallback
# HTML-Body extrahieren
html_content = ""
if msg.is_multipart():
for part in msg.walk():
content_type = part.get_content_type()
if content_type == 'text/html':
payload = part.get_payload(decode=True)
if payload:
charset = part.get_content_charset() or 'utf-8'
html_content = payload.decode(charset, errors='replace')
break
else:
content_type = msg.get_content_type()
if content_type == 'text/html':
payload = msg.get_payload(decode=True)
if payload:
charset = msg.get_content_charset() or 'utf-8'
html_content = payload.decode(charset, errors='replace')
if not html_content:
logger.warning(f"No HTML content in Google Alert email: {subject}")
return None
# HTML parsen
items = parse_google_alert_html(html_content)
return ParsedAlertEmail(
search_term=search_term,
items=items,
received_at=received_at,
message_id=message_id,
)
except Exception as e:
logger.error(f"Error parsing email message: {e}")
return None
async def process_alert_emails(
emails: List[bytes],
topic_id: str,
db,
) -> Dict[str, int]:
"""
Verarbeitet eine Liste von Google Alert E-Mails und speichert Items in DB.
Args:
emails: Liste von E-Mails als Bytes
topic_id: ID des zugehörigen Topics
db: SQLAlchemy Session
Returns:
Dict mit new_items und duplicates_skipped
"""
from alerts_agent.db.repository import AlertItemRepository
from alerts_agent.db.models import AlertSourceEnum
repo = AlertItemRepository(db)
total_new = 0
total_skipped = 0
for email_bytes in emails:
parsed = parse_email_message(email_bytes=email_bytes)
if not parsed:
continue
for item in parsed.items:
alert = repo.create_if_not_exists(
topic_id=topic_id,
title=item["title"],
url=item["url"],
snippet=item.get("snippet", ""),
source=AlertSourceEnum.GOOGLE_ALERTS_EMAIL,
)
if alert:
total_new += 1
else:
total_skipped += 1
return {
"new_items": total_new,
"duplicates_skipped": total_skipped,
}
# IMAP-Integration für automatisches E-Mail-Fetching
async def fetch_emails_from_imap(
host: str,
username: str,
password: str,
folder: str = "INBOX",
search_criteria: str = 'FROM "googlealerts-noreply@google.com" UNSEEN',
limit: int = 100,
) -> List[bytes]:
"""
Holt E-Mails von einem IMAP-Server.
Args:
host: IMAP-Server Hostname
username: IMAP-Benutzername
password: IMAP-Passwort
folder: IMAP-Ordner (default: INBOX)
search_criteria: IMAP-Suchkriterien
limit: Maximale Anzahl E-Mails
Returns:
Liste von E-Mails als Bytes
"""
try:
import aioimaplib
except ImportError:
logger.error("aioimaplib not installed. Run: pip install aioimaplib")
return []
emails = []
try:
# IMAP-Verbindung
client = aioimaplib.IMAP4_SSL(host)
await client.wait_hello_from_server()
# Login
await client.login(username, password)
# Ordner auswählen
await client.select(folder)
# E-Mails suchen
result, data = await client.search(search_criteria)
if result != 'OK':
logger.error(f"IMAP search failed: {result}")
return []
# Message-IDs extrahieren
message_ids = data[0].split()[-limit:] # Letzte N E-Mails
# E-Mails abrufen
for msg_id in message_ids:
result, data = await client.fetch(msg_id, '(RFC822)')
if result == 'OK' and data:
# data ist eine Liste von Tupeln
for item in data:
if isinstance(item, tuple) and len(item) >= 2:
emails.append(item[1])
# Logout
await client.logout()
except Exception as e:
logger.error(f"IMAP fetch error: {e}")
return emails

View File

@@ -0,0 +1,383 @@
"""
RSS Fetcher für Google Alerts.
Liest Google Alerts RSS Feeds und konvertiert sie in AlertItems.
Google Alerts RSS Feed Format:
- Feed URL: https://google.com/alerts/feeds/<user_id>/<alert_id>
- Entries enthalten: title, link, published, content
"""
import asyncio
import logging
from dataclasses import dataclass, field
from datetime import datetime
from typing import Optional
from html import unescape
import re
import httpx
try:
import feedparser
FEEDPARSER_AVAILABLE = True
except ImportError:
FEEDPARSER_AVAILABLE = False
from ..models.alert_item import AlertItem, AlertSource, AlertStatus
logger = logging.getLogger(__name__)
@dataclass
class FeedConfig:
"""Konfiguration für einen RSS Feed."""
url: str
topic_label: str # z.B. "Inklusion Bayern"
enabled: bool = True
fetch_interval_minutes: int = 60
last_fetched: Optional[datetime] = None
last_entry_id: Optional[str] = None # Für Duplikat-Erkennung
@dataclass
class FetchResult:
"""Ergebnis eines Feed-Fetches."""
feed_url: str
success: bool
items: list = field(default_factory=list) # List[AlertItem]
error: Optional[str] = None
fetched_at: datetime = field(default_factory=datetime.utcnow)
new_items_count: int = 0
skipped_count: int = 0 # Bereits bekannte Items
class RSSFetcher:
"""
Fetcher für Google Alerts RSS Feeds.
Usage:
fetcher = RSSFetcher()
fetcher.add_feed("https://google.com/alerts/feeds/...", "Inklusion")
results = await fetcher.fetch_all()
"""
def __init__(self, timeout: int = 30, user_agent: str = "BreakPilot-AlertAgent/0.1"):
"""
Initialisiere RSSFetcher.
Args:
timeout: HTTP Timeout in Sekunden
user_agent: User-Agent Header
"""
if not FEEDPARSER_AVAILABLE:
raise ImportError(
"feedparser ist nicht installiert. "
"Installiere mit: pip install feedparser"
)
self.feeds: list[FeedConfig] = []
self.timeout = timeout
self.user_agent = user_agent
self._client: Optional[httpx.AsyncClient] = None
def add_feed(self, url: str, topic_label: str, **kwargs) -> None:
"""Füge einen Feed hinzu."""
config = FeedConfig(url=url, topic_label=topic_label, **kwargs)
self.feeds.append(config)
logger.info(f"Feed hinzugefügt: {topic_label} ({url[:50]}...)")
def remove_feed(self, url: str) -> bool:
"""Entferne einen Feed."""
before = len(self.feeds)
self.feeds = [f for f in self.feeds if f.url != url]
return len(self.feeds) < before
async def _get_client(self) -> httpx.AsyncClient:
"""Hole oder erstelle HTTP Client."""
if self._client is None or self._client.is_closed:
self._client = httpx.AsyncClient(
timeout=self.timeout,
headers={"User-Agent": self.user_agent},
follow_redirects=True,
)
return self._client
async def close(self) -> None:
"""Schließe HTTP Client."""
if self._client:
await self._client.aclose()
self._client = None
async def fetch_feed(self, config: FeedConfig,
known_entry_ids: Optional[set] = None) -> FetchResult:
"""
Fetch einen einzelnen Feed.
Args:
config: Feed-Konfiguration
known_entry_ids: Optional Set von bereits bekannten Entry-IDs
Returns:
FetchResult mit AlertItems
"""
result = FetchResult(feed_url=config.url, success=False)
known_ids = known_entry_ids or set()
try:
client = await self._get_client()
response = await client.get(config.url)
response.raise_for_status()
# Parse Feed
feed = feedparser.parse(response.text)
if feed.bozo and feed.bozo_exception:
# Feed hatte Parsing-Fehler, aber möglicherweise noch nutzbar
logger.warning(f"Feed {config.topic_label}: Parsing-Warnung: {feed.bozo_exception}")
if not feed.entries:
logger.info(f"Feed {config.topic_label}: Keine Einträge")
result.success = True
return result
items = []
for entry in feed.entries:
# Entry-ID für Duplikat-Check
entry_id = entry.get("id") or entry.get("link") or entry.get("title")
if entry_id in known_ids:
result.skipped_count += 1
continue
# Konvertiere zu AlertItem
alert = self._entry_to_alert(entry, config)
if alert:
items.append(alert)
result.new_items_count += 1
result.items = items
result.success = True
config.last_fetched = datetime.utcnow()
logger.info(
f"Feed {config.topic_label}: {result.new_items_count} neue, "
f"{result.skipped_count} übersprungen"
)
except httpx.HTTPStatusError as e:
result.error = f"HTTP {e.response.status_code}: {e.response.reason_phrase}"
logger.error(f"Feed {config.topic_label}: {result.error}")
except httpx.RequestError as e:
result.error = f"Request failed: {str(e)}"
logger.error(f"Feed {config.topic_label}: {result.error}")
except Exception as e:
result.error = f"Unexpected error: {str(e)}"
logger.exception(f"Feed {config.topic_label}: Unerwarteter Fehler")
return result
def _entry_to_alert(self, entry: dict, config: FeedConfig) -> Optional[AlertItem]:
"""
Konvertiere feedparser Entry zu AlertItem.
Google Alerts Entry Format:
- title: Titel mit HTML-Entities
- link: URL zum Artikel
- published_parsed: Datum als struct_time
- content[0].value: HTML Content mit Snippet
"""
try:
# Title bereinigen
title = unescape(entry.get("title", ""))
title = self._clean_html(title)
# URL extrahieren
url = entry.get("link", "")
if not url:
return None
# Snippet aus Content extrahieren
snippet = ""
if "content" in entry and entry["content"]:
content_html = entry["content"][0].get("value", "")
snippet = self._clean_html(content_html)
elif "summary" in entry:
snippet = self._clean_html(entry["summary"])
# Datum parsen
published_at = None
if "published_parsed" in entry and entry["published_parsed"]:
try:
published_at = datetime(*entry["published_parsed"][:6])
except (TypeError, ValueError):
pass
# AlertItem erstellen
alert = AlertItem(
source=AlertSource.GOOGLE_ALERTS_RSS,
topic_label=config.topic_label,
feed_url=config.url,
title=title,
url=url,
snippet=snippet[:2000], # Limit snippet length
published_at=published_at,
status=AlertStatus.NEW,
)
return alert
except Exception as e:
logger.warning(f"Entry konnte nicht konvertiert werden: {e}")
return None
def _clean_html(self, html: str) -> str:
"""Entferne HTML Tags und bereinige Text."""
if not html:
return ""
# HTML-Entities dekodieren
text = unescape(html)
# HTML Tags entfernen
text = re.sub(r"<[^>]+>", " ", text)
# Whitespace normalisieren
text = re.sub(r"\s+", " ", text)
return text.strip()
async def fetch_all(self, known_entry_ids: Optional[set] = None,
parallel: bool = True) -> list[FetchResult]:
"""
Fetch alle konfigurierten Feeds.
Args:
known_entry_ids: Set von bekannten Entry-IDs (global)
parallel: Wenn True, fetche parallel
Returns:
Liste von FetchResults
"""
active_feeds = [f for f in self.feeds if f.enabled]
if not active_feeds:
logger.warning("Keine aktiven Feeds konfiguriert")
return []
logger.info(f"Fetche {len(active_feeds)} Feeds...")
if parallel:
tasks = [
self.fetch_feed(config, known_entry_ids)
for config in active_feeds
]
results = await asyncio.gather(*tasks, return_exceptions=True)
# Exceptions in FetchResults konvertieren
processed = []
for i, result in enumerate(results):
if isinstance(result, Exception):
processed.append(FetchResult(
feed_url=active_feeds[i].url,
success=False,
error=str(result)
))
else:
processed.append(result)
return processed
else:
results = []
for config in active_feeds:
result = await self.fetch_feed(config, known_entry_ids)
results.append(result)
return results
def get_all_items(self, results: list[FetchResult]) -> list[AlertItem]:
"""Extrahiere alle AlertItems aus FetchResults."""
items = []
for result in results:
if result.success:
items.extend(result.items)
return items
def get_stats(self, results: list[FetchResult]) -> dict:
"""Generiere Statistiken über Fetch-Ergebnisse."""
total_new = sum(r.new_items_count for r in results)
total_skipped = sum(r.skipped_count for r in results)
successful = sum(1 for r in results if r.success)
failed = sum(1 for r in results if not r.success)
return {
"feeds_total": len(results),
"feeds_successful": successful,
"feeds_failed": failed,
"items_new": total_new,
"items_skipped": total_skipped,
"errors": [r.error for r in results if r.error],
}
async def fetch_and_store_feed(
topic_id: str,
feed_url: str,
db,
) -> dict:
"""
Convenience function to fetch a single feed and store results.
This is the function used by the API to trigger manual fetches.
Args:
topic_id: The topic ID to associate with fetched items
feed_url: The RSS feed URL to fetch
db: Database session for storing results
Returns:
dict with new_items and duplicates_skipped counts
"""
from ..db.repository import AlertItemRepository, TopicRepository
if not FEEDPARSER_AVAILABLE:
raise ImportError("feedparser ist nicht installiert")
fetcher = RSSFetcher()
fetcher.add_feed(feed_url, topic_label=topic_id)
# Get known entry IDs to skip duplicates
alert_repo = AlertItemRepository(db)
existing_urls = alert_repo.get_existing_urls(topic_id)
# Fetch the feed
results = await fetcher.fetch_all(known_entry_ids=existing_urls)
await fetcher.close()
if not results:
return {"new_items": 0, "duplicates_skipped": 0}
result = results[0]
if not result.success:
raise Exception(result.error or "Feed fetch failed")
# Store new items
new_count = 0
for item in result.items:
alert_repo.create_from_alert_item(item, topic_id)
new_count += 1
# Update topic stats
topic_repo = TopicRepository(db)
topic_repo.update_fetch_status(
topic_id,
last_fetch_error=None,
items_fetched=new_count,
)
return {
"new_items": new_count,
"duplicates_skipped": result.skipped_count,
}

View File

@@ -0,0 +1,279 @@
"""
Scheduler für automatisches Feed-Fetching.
Verwendet APScheduler für periodische Jobs basierend auf Topic-Konfiguration.
"""
import logging
from datetime import datetime
from typing import Optional
from apscheduler.schedulers.asyncio import AsyncIOScheduler
from apscheduler.triggers.interval import IntervalTrigger
from apscheduler.jobstores.memory import MemoryJobStore
from sqlalchemy.orm import Session
from alerts_agent.db.database import SessionLocal
from alerts_agent.db.repository import TopicRepository
from alerts_agent.ingestion.rss_fetcher import fetch_and_store_feed
logger = logging.getLogger(__name__)
# Globaler Scheduler (Singleton)
_scheduler: Optional[AsyncIOScheduler] = None
def get_scheduler() -> AsyncIOScheduler:
"""Gibt den globalen Scheduler zurück, erstellt ihn bei Bedarf."""
global _scheduler
if _scheduler is None:
_scheduler = AsyncIOScheduler(
jobstores={"default": MemoryJobStore()},
job_defaults={
"coalesce": True, # Verpasste Jobs zusammenfassen
"max_instances": 1, # Nur eine Instanz pro Job
"misfire_grace_time": 60, # 60s Toleranz für verpasste Jobs
},
)
return _scheduler
async def fetch_topic_job(topic_id: str, feed_url: str) -> None:
"""
Job-Funktion für das Fetchen eines einzelnen Topics.
Wird vom Scheduler aufgerufen.
"""
db = SessionLocal()
try:
logger.info(f"Scheduler: Fetching topic {topic_id}")
result = await fetch_and_store_feed(
topic_id=topic_id,
feed_url=feed_url,
db=db,
)
logger.info(
f"Scheduler: Topic {topic_id} - {result['new_items']} new, "
f"{result['duplicates_skipped']} skipped"
)
except Exception as e:
logger.error(f"Scheduler: Error fetching topic {topic_id}: {e}")
# Fehler im Topic speichern
repo = TopicRepository(db)
repo.update(topic_id, last_fetch_error=str(e))
finally:
db.close()
def schedule_topic(
topic_id: str,
feed_url: str,
interval_minutes: int = 60,
) -> str:
"""
Plant einen periodischen Fetch-Job für ein Topic.
Args:
topic_id: ID des Topics
feed_url: URL des RSS-Feeds
interval_minutes: Intervall in Minuten
Returns:
Job-ID für spätere Referenz
"""
scheduler = get_scheduler()
job_id = f"fetch_topic_{topic_id}"
# Existierenden Job entfernen falls vorhanden
if scheduler.get_job(job_id):
scheduler.remove_job(job_id)
# Neuen Job hinzufügen
scheduler.add_job(
fetch_topic_job,
trigger=IntervalTrigger(minutes=interval_minutes),
id=job_id,
name=f"Fetch Topic {topic_id}",
kwargs={"topic_id": topic_id, "feed_url": feed_url},
replace_existing=True,
)
logger.info(f"Scheduled topic {topic_id} every {interval_minutes} minutes")
return job_id
def unschedule_topic(topic_id: str) -> bool:
"""
Entfernt den Fetch-Job für ein Topic.
Args:
topic_id: ID des Topics
Returns:
True wenn Job entfernt wurde, False wenn nicht gefunden
"""
scheduler = get_scheduler()
job_id = f"fetch_topic_{topic_id}"
if scheduler.get_job(job_id):
scheduler.remove_job(job_id)
logger.info(f"Unscheduled topic {topic_id}")
return True
return False
def reschedule_topic(
topic_id: str,
feed_url: str,
interval_minutes: int,
) -> str:
"""
Aktualisiert das Intervall für einen Topic-Job.
Args:
topic_id: ID des Topics
feed_url: URL des RSS-Feeds (falls geändert)
interval_minutes: Neues Intervall
Returns:
Job-ID
"""
return schedule_topic(topic_id, feed_url, interval_minutes)
def sync_scheduler_with_db() -> dict:
"""
Synchronisiert den Scheduler mit der Datenbank.
Lädt alle aktiven Topics und plant/entfernt Jobs entsprechend.
Returns:
Dict mit scheduled und unscheduled Counts
"""
db = SessionLocal()
scheduler = get_scheduler()
try:
repo = TopicRepository(db)
topics = repo.get_all()
scheduled = 0
unscheduled = 0
# Aktuelle Job-IDs sammeln
expected_job_ids = set()
for topic in topics:
job_id = f"fetch_topic_{topic.id}"
if topic.is_active and topic.feed_url:
# Topic sollte geplant sein
expected_job_ids.add(job_id)
schedule_topic(
topic_id=topic.id,
feed_url=topic.feed_url,
interval_minutes=topic.fetch_interval_minutes,
)
scheduled += 1
else:
# Topic sollte nicht geplant sein
if scheduler.get_job(job_id):
scheduler.remove_job(job_id)
unscheduled += 1
# Orphan-Jobs entfernen (Topics die gelöscht wurden)
for job in scheduler.get_jobs():
if job.id.startswith("fetch_topic_") and job.id not in expected_job_ids:
scheduler.remove_job(job.id)
unscheduled += 1
logger.info(f"Removed orphan job: {job.id}")
return {"scheduled": scheduled, "unscheduled": unscheduled}
finally:
db.close()
def start_scheduler() -> None:
"""
Startet den Scheduler.
Sollte beim App-Start aufgerufen werden.
"""
scheduler = get_scheduler()
if not scheduler.running:
scheduler.start()
logger.info("Alert scheduler started")
# Initial mit DB synchronisieren
result = sync_scheduler_with_db()
logger.info(
f"Scheduler synced: {result['scheduled']} topics scheduled, "
f"{result['unscheduled']} removed"
)
def stop_scheduler() -> None:
"""
Stoppt den Scheduler.
Sollte beim App-Shutdown aufgerufen werden.
"""
scheduler = get_scheduler()
if scheduler.running:
scheduler.shutdown(wait=False)
logger.info("Alert scheduler stopped")
def get_scheduler_status() -> dict:
"""
Gibt den Status des Schedulers zurück.
Returns:
Dict mit running, jobs_count und job_details
"""
scheduler = get_scheduler()
jobs = []
for job in scheduler.get_jobs():
jobs.append({
"id": job.id,
"name": job.name,
"next_run": job.next_run_time.isoformat() if job.next_run_time else None,
"trigger": str(job.trigger),
})
return {
"running": scheduler.running,
"jobs_count": len(jobs),
"jobs": jobs,
}
# Convenience-Funktion für Topic-Aktivierung
async def on_topic_activated(topic_id: str, feed_url: str, interval_minutes: int) -> None:
"""Hook für Topic-Aktivierung - plant den Fetch-Job."""
schedule_topic(topic_id, feed_url, interval_minutes)
async def on_topic_deactivated(topic_id: str) -> None:
"""Hook für Topic-Deaktivierung - entfernt den Fetch-Job."""
unschedule_topic(topic_id)
async def on_topic_updated(
topic_id: str,
feed_url: str,
interval_minutes: int,
is_active: bool,
) -> None:
"""Hook für Topic-Update - aktualisiert oder entfernt den Fetch-Job."""
if is_active and feed_url:
reschedule_topic(topic_id, feed_url, interval_minutes)
else:
unschedule_topic(topic_id)
async def on_topic_deleted(topic_id: str) -> None:
"""Hook für Topic-Löschung - entfernt den Fetch-Job."""
unschedule_topic(topic_id)

View File

@@ -0,0 +1,12 @@
"""Alert Agent Models."""
from .alert_item import AlertItem, AlertSource, AlertStatus
from .relevance_profile import RelevanceProfile, PriorityItem
__all__ = [
"AlertItem",
"AlertSource",
"AlertStatus",
"RelevanceProfile",
"PriorityItem",
]

View File

@@ -0,0 +1,174 @@
"""
AlertItem Model.
Repräsentiert einen einzelnen Alert aus Google Alerts (RSS oder Email).
"""
from dataclasses import dataclass, field
from datetime import datetime
from enum import Enum
from typing import Optional
import hashlib
import uuid
class AlertSource(str, Enum):
"""Quelle des Alerts."""
GOOGLE_ALERTS_RSS = "google_alerts_rss"
GOOGLE_ALERTS_EMAIL = "google_alerts_email"
MANUAL = "manual"
class AlertStatus(str, Enum):
"""Verarbeitungsstatus des Alerts."""
NEW = "new"
PROCESSED = "processed"
DUPLICATE = "duplicate"
SCORED = "scored"
REVIEWED = "reviewed"
ARCHIVED = "archived"
@dataclass
class AlertItem:
"""Ein einzelner Alert-Eintrag."""
# Identifikation
id: str = field(default_factory=lambda: str(uuid.uuid4()))
# Quelle
source: AlertSource = AlertSource.GOOGLE_ALERTS_RSS
topic_label: str = "" # z.B. "Schulrecht Bayern"
feed_url: Optional[str] = None
# Content
title: str = ""
url: str = ""
snippet: str = ""
article_text: Optional[str] = None
# Metadaten
lang: str = "de"
published_at: Optional[datetime] = None
fetched_at: datetime = field(default_factory=datetime.utcnow)
# Deduplication
canonical_url: Optional[str] = None
url_hash: Optional[str] = None
content_hash: Optional[str] = None # SimHash für fuzzy matching
# Verarbeitung
status: AlertStatus = AlertStatus.NEW
cluster_id: Optional[str] = None
# Relevanz (nach Scoring)
relevance_score: Optional[float] = None # 0.0 - 1.0
relevance_decision: Optional[str] = None # KEEP, DROP, REVIEW
relevance_reasons: list = field(default_factory=list)
relevance_summary: Optional[str] = None
def __post_init__(self):
"""Berechne Hashes nach Initialisierung."""
if not self.url_hash and self.url:
self.url_hash = self._compute_url_hash()
if not self.canonical_url and self.url:
self.canonical_url = self._normalize_url(self.url)
def _compute_url_hash(self) -> str:
"""Berechne SHA256 Hash der URL."""
normalized = self._normalize_url(self.url)
return hashlib.sha256(normalized.encode()).hexdigest()[:16]
def _normalize_url(self, url: str) -> str:
"""Normalisiere URL für Deduplizierung."""
# Entferne Tracking-Parameter
import urllib.parse
parsed = urllib.parse.urlparse(url)
# Google News Redirect auflösen
if "news.google.com" in parsed.netloc and "/articles/" in parsed.path:
# news.google.com URLs enthalten die echte URL base64-kodiert
# Hier nur Basic-Handling - echte Auflösung komplexer
pass
# Tracking-Parameter entfernen
tracking_params = {
"utm_source", "utm_medium", "utm_campaign", "utm_content", "utm_term",
"fbclid", "gclid", "ref", "source"
}
query_params = urllib.parse.parse_qs(parsed.query)
cleaned_params = {k: v for k, v in query_params.items()
if k.lower() not in tracking_params}
cleaned_query = urllib.parse.urlencode(cleaned_params, doseq=True)
# Rekonstruiere URL ohne Fragment
normalized = urllib.parse.urlunparse((
parsed.scheme,
parsed.netloc.lower(),
parsed.path.rstrip("/"),
parsed.params,
cleaned_query,
"" # No fragment
))
return normalized
def compute_content_hash(self, text: Optional[str] = None) -> str:
"""
Berechne SimHash des Inhalts für Fuzzy-Matching.
SimHash erlaubt es, ähnliche Texte zu erkennen, auch wenn sie
sich leicht unterscheiden (z.B. verschiedene Quellen zum selben Thema).
"""
from ..processing.dedup import compute_simhash
content = text or self.article_text or self.snippet or self.title
if content:
self.content_hash = compute_simhash(content)
return self.content_hash or ""
def to_dict(self) -> dict:
"""Konvertiere zu Dictionary für JSON/DB."""
return {
"id": self.id,
"source": self.source.value,
"topic_label": self.topic_label,
"feed_url": self.feed_url,
"title": self.title,
"url": self.url,
"snippet": self.snippet,
"article_text": self.article_text,
"lang": self.lang,
"published_at": self.published_at.isoformat() if self.published_at else None,
"fetched_at": self.fetched_at.isoformat() if self.fetched_at else None,
"canonical_url": self.canonical_url,
"url_hash": self.url_hash,
"content_hash": self.content_hash,
"status": self.status.value,
"cluster_id": self.cluster_id,
"relevance_score": self.relevance_score,
"relevance_decision": self.relevance_decision,
"relevance_reasons": self.relevance_reasons,
"relevance_summary": self.relevance_summary,
}
@classmethod
def from_dict(cls, data: dict) -> "AlertItem":
"""Erstelle AlertItem aus Dictionary."""
# Parse Enums
if "source" in data and isinstance(data["source"], str):
data["source"] = AlertSource(data["source"])
if "status" in data and isinstance(data["status"], str):
data["status"] = AlertStatus(data["status"])
# Parse Timestamps
for field_name in ["published_at", "fetched_at"]:
if field_name in data and isinstance(data[field_name], str):
data[field_name] = datetime.fromisoformat(data[field_name])
return cls(**data)
def __repr__(self) -> str:
return f"AlertItem(id={self.id[:8]}, title='{self.title[:50]}...', status={self.status.value})"

View File

@@ -0,0 +1,288 @@
"""
RelevanceProfile Model.
Definiert das Relevanzprofil eines Nutzers für die Alerts-Filterung.
Lernt über Zeit durch Feedback.
"""
from dataclasses import dataclass, field
from datetime import datetime
from typing import Optional
import uuid
@dataclass
class PriorityItem:
"""Ein Prioritäts-Thema im Profil."""
label: str # z.B. "Inklusion", "Datenschutz Schule"
weight: float = 0.5 # 0.0 - 1.0, höher = wichtiger
keywords: list = field(default_factory=list) # Zusätzliche Keywords
description: Optional[str] = None # Kontext für LLM
def to_dict(self) -> dict:
return {
"label": self.label,
"weight": self.weight,
"keywords": self.keywords,
"description": self.description,
}
@classmethod
def from_dict(cls, data: dict) -> "PriorityItem":
return cls(**data)
@dataclass
class RelevanceProfile:
"""
Nutzerprofil für Relevanz-Scoring.
Das Profil wird verwendet, um Alerts auf Relevanz zu prüfen.
Es enthält:
- Prioritäten: Themen die wichtig sind (mit Gewichtung)
- Ausschlüsse: Themen die ignoriert werden sollen
- Positive Beispiele: URLs/Titel die relevant waren
- Negative Beispiele: URLs/Titel die irrelevant waren
- Policies: Zusätzliche Regeln (z.B. nur deutsche Quellen)
"""
# Identifikation
id: str = field(default_factory=lambda: str(uuid.uuid4()))
user_id: Optional[str] = None # Falls benutzerspezifisch
# Relevanz-Kriterien
priorities: list = field(default_factory=list) # List[PriorityItem]
exclusions: list = field(default_factory=list) # Keywords zum Ausschließen
# Beispiele für Few-Shot Learning
positive_examples: list = field(default_factory=list) # Relevante Alerts
negative_examples: list = field(default_factory=list) # Irrelevante Alerts
# Policies
policies: dict = field(default_factory=dict)
# Metadaten
created_at: datetime = field(default_factory=datetime.utcnow)
updated_at: datetime = field(default_factory=datetime.utcnow)
# Statistiken
total_scored: int = 0
total_kept: int = 0
total_dropped: int = 0
accuracy_estimate: Optional[float] = None # Geschätzte Genauigkeit
def add_priority(self, label: str, weight: float = 0.5, **kwargs) -> None:
"""Füge ein Prioritäts-Thema hinzu."""
self.priorities.append(PriorityItem(
label=label,
weight=weight,
**kwargs
))
self.updated_at = datetime.utcnow()
def add_exclusion(self, keyword: str) -> None:
"""Füge ein Ausschluss-Keyword hinzu."""
if keyword not in self.exclusions:
self.exclusions.append(keyword)
self.updated_at = datetime.utcnow()
def add_positive_example(self, title: str, url: str, reason: str = "") -> None:
"""Füge ein positives Beispiel hinzu (für Few-Shot Learning)."""
self.positive_examples.append({
"title": title,
"url": url,
"reason": reason,
"added_at": datetime.utcnow().isoformat(),
})
# Begrenze auf letzte 20 Beispiele
self.positive_examples = self.positive_examples[-20:]
self.updated_at = datetime.utcnow()
def add_negative_example(self, title: str, url: str, reason: str = "") -> None:
"""Füge ein negatives Beispiel hinzu."""
self.negative_examples.append({
"title": title,
"url": url,
"reason": reason,
"added_at": datetime.utcnow().isoformat(),
})
# Begrenze auf letzte 20 Beispiele
self.negative_examples = self.negative_examples[-20:]
self.updated_at = datetime.utcnow()
def update_from_feedback(self, alert_title: str, alert_url: str,
is_relevant: bool, reason: str = "") -> None:
"""
Aktualisiere Profil basierend auf Nutzer-Feedback.
Args:
alert_title: Titel des Alerts
alert_url: URL des Alerts
is_relevant: True wenn der Nutzer den Alert als relevant markiert hat
reason: Optional - Grund für die Entscheidung
"""
if is_relevant:
self.add_positive_example(alert_title, alert_url, reason)
self.total_kept += 1
else:
self.add_negative_example(alert_title, alert_url, reason)
self.total_dropped += 1
self.total_scored += 1
# Aktualisiere Accuracy-Schätzung (vereinfacht)
if self.total_scored > 10:
# Hier könnte eine komplexere Berechnung erfolgen
# basierend auf Vergleich von Vorhersage vs. tatsächlichem Feedback
pass
def get_prompt_context(self) -> str:
"""
Generiere Kontext für LLM-Prompt.
Dieser Text wird in den System-Prompt des Relevanz-Scorers eingefügt.
"""
lines = ["## Relevanzprofil des Nutzers\n"]
# Prioritäten
if self.priorities:
lines.append("### Prioritäten (Themen von Interesse):")
for p in self.priorities:
if isinstance(p, dict):
p = PriorityItem.from_dict(p)
weight_label = "Sehr wichtig" if p.weight > 0.7 else "Wichtig" if p.weight > 0.4 else "Interessant"
lines.append(f"- **{p.label}** ({weight_label})")
if p.description:
lines.append(f" {p.description}")
if p.keywords:
lines.append(f" Keywords: {', '.join(p.keywords)}")
lines.append("")
# Ausschlüsse
if self.exclusions:
lines.append("### Ausschlüsse (ignorieren):")
lines.append(f"Themen mit diesen Keywords: {', '.join(self.exclusions)}")
lines.append("")
# Positive Beispiele
if self.positive_examples:
lines.append("### Beispiele für relevante Alerts:")
for ex in self.positive_examples[-5:]: # Letzte 5
lines.append(f"- \"{ex['title']}\"")
if ex.get("reason"):
lines.append(f" Grund: {ex['reason']}")
lines.append("")
# Negative Beispiele
if self.negative_examples:
lines.append("### Beispiele für irrelevante Alerts:")
for ex in self.negative_examples[-5:]: # Letzte 5
lines.append(f"- \"{ex['title']}\"")
if ex.get("reason"):
lines.append(f" Grund: {ex['reason']}")
lines.append("")
# Policies
if self.policies:
lines.append("### Zusätzliche Regeln:")
for key, value in self.policies.items():
lines.append(f"- {key}: {value}")
return "\n".join(lines)
def to_dict(self) -> dict:
"""Konvertiere zu Dictionary."""
return {
"id": self.id,
"user_id": self.user_id,
"priorities": [p.to_dict() if isinstance(p, PriorityItem) else p
for p in self.priorities],
"exclusions": self.exclusions,
"positive_examples": self.positive_examples,
"negative_examples": self.negative_examples,
"policies": self.policies,
"created_at": self.created_at.isoformat(),
"updated_at": self.updated_at.isoformat(),
"total_scored": self.total_scored,
"total_kept": self.total_kept,
"total_dropped": self.total_dropped,
"accuracy_estimate": self.accuracy_estimate,
}
@classmethod
def from_dict(cls, data: dict) -> "RelevanceProfile":
"""Erstelle RelevanceProfile aus Dictionary."""
# Parse Timestamps
for field_name in ["created_at", "updated_at"]:
if field_name in data and isinstance(data[field_name], str):
data[field_name] = datetime.fromisoformat(data[field_name])
# Parse Priorities
if "priorities" in data:
data["priorities"] = [
PriorityItem.from_dict(p) if isinstance(p, dict) else p
for p in data["priorities"]
]
return cls(**data)
@classmethod
def create_default_education_profile(cls) -> "RelevanceProfile":
"""
Erstelle ein Standard-Profil für Bildungsthemen.
Dieses Profil ist für Lehrkräfte/Schulpersonal optimiert.
"""
profile = cls()
# Bildungs-relevante Prioritäten
profile.add_priority(
"Inklusion",
weight=0.9,
keywords=["inklusiv", "Förderbedarf", "Behinderung", "Barrierefreiheit"],
description="Inklusive Bildung, Förderschulen, Nachteilsausgleich"
)
profile.add_priority(
"Datenschutz Schule",
weight=0.85,
keywords=["DSGVO", "Schülerfotos", "Einwilligung", "personenbezogene Daten"],
description="DSGVO in Schulen, Datenschutz bei Klassenfotos"
)
profile.add_priority(
"Schulrecht Bayern",
weight=0.8,
keywords=["BayEUG", "Schulordnung", "Kultusministerium", "Bayern"],
description="Bayerisches Schulrecht, Verordnungen"
)
profile.add_priority(
"Digitalisierung Schule",
weight=0.7,
keywords=["DigitalPakt", "Tablet-Klasse", "Lernplattform"],
description="Digitale Medien im Unterricht"
)
profile.add_priority(
"Elternarbeit",
weight=0.6,
keywords=["Elternbeirat", "Elternabend", "Kommunikation"],
description="Zusammenarbeit mit Eltern"
)
# Standard-Ausschlüsse
profile.exclusions = [
"Stellenanzeige",
"Praktikum gesucht",
"Werbung",
"Pressemitteilung", # Oft generisch
]
# Policies
profile.policies = {
"prefer_german_sources": True,
"max_age_days": 30, # Ältere Alerts ignorieren
"min_content_length": 100, # Sehr kurze Snippets ignorieren
}
return profile
def __repr__(self) -> str:
return f"RelevanceProfile(id={self.id[:8]}, priorities={len(self.priorities)}, examples={len(self.positive_examples) + len(self.negative_examples)})"

View File

@@ -0,0 +1,12 @@
"""Alert Processing Modules."""
from .dedup import compute_simhash, hamming_distance, find_duplicates
from .relevance_scorer import RelevanceScorer, ScoringResult
__all__ = [
"compute_simhash",
"hamming_distance",
"find_duplicates",
"RelevanceScorer",
"ScoringResult",
]

View File

@@ -0,0 +1,239 @@
"""
Deduplizierung für Alerts.
Nutzt SimHash für Fuzzy-Matching von ähnlichen Texten.
SimHash ist ein Locality-Sensitive Hash, bei dem ähnliche Texte
ähnliche Hashes produzieren.
"""
import hashlib
import re
from typing import Optional
from collections import Counter
# SimHash Parameter
SIMHASH_BITS = 64
SHINGLE_SIZE = 3 # Anzahl aufeinanderfolgender Wörter
def _tokenize(text: str) -> list:
"""
Tokenisiere Text in normalisierte Wörter.
- Lowercase
- Nur alphanumerische Zeichen
- Stoppwörter entfernen (deutsche)
"""
# Deutsche Stoppwörter (häufige Wörter ohne semantischen Wert)
STOPWORDS = {
"der", "die", "das", "den", "dem", "des", "ein", "eine", "einer", "eines",
"und", "oder", "aber", "doch", "wenn", "weil", "dass", "als", "auch",
"ist", "sind", "war", "waren", "wird", "werden", "wurde", "wurden",
"hat", "haben", "hatte", "hatten", "kann", "können", "konnte", "konnten",
"für", "von", "mit", "bei", "nach", "aus", "über", "unter", "vor", "hinter",
"auf", "an", "in", "im", "am", "um", "bis", "durch", "ohne", "gegen",
"nicht", "noch", "nur", "schon", "sehr", "mehr", "sich", "es", "sie", "er",
"wir", "ihr", "ich", "du", "man", "so", "wie", "was", "wer", "wo", "wann",
}
# Normalisiere
text = text.lower()
# Nur Buchstaben, Zahlen und Umlaute
text = re.sub(r"[^a-zäöüß0-9\s]", " ", text)
# Tokenisiere
words = text.split()
# Filtere Stoppwörter und kurze Wörter
words = [w for w in words if w not in STOPWORDS and len(w) > 2]
return words
def _create_shingles(words: list, size: int = SHINGLE_SIZE) -> list:
"""
Erstelle Shingles (n-Gramme) aus Wortliste.
Shingles sind überlappende Sequenzen von Wörtern.
z.B. ["a", "b", "c", "d"] mit size=2 -> ["a b", "b c", "c d"]
"""
if len(words) < size:
return [" ".join(words)] if words else []
return [" ".join(words[i:i+size]) for i in range(len(words) - size + 1)]
def _hash_shingle(shingle: str) -> int:
"""Hash ein Shingle zu einer 64-bit Zahl."""
# Nutze MD5 und nimm erste 8 Bytes (64 bit)
h = hashlib.md5(shingle.encode()).digest()[:8]
return int.from_bytes(h, byteorder="big")
def compute_simhash(text: str) -> str:
"""
Berechne SimHash eines Texts.
SimHash funktioniert wie folgt:
1. Text in Shingles (Wort-n-Gramme) aufteilen
2. Jeden Shingle hashen
3. Für jeden Hash: Wenn Bit=1 -> +1, sonst -1
4. Summieren über alle Hashes
5. Wenn Summe > 0 -> Bit=1, sonst 0
Returns:
16-Zeichen Hex-String (64 bit)
"""
if not text:
return "0" * 16
words = _tokenize(text)
if not words:
return "0" * 16
shingles = _create_shingles(words)
if not shingles:
return "0" * 16
# Bit-Vektoren initialisieren
v = [0] * SIMHASH_BITS
for shingle in shingles:
h = _hash_shingle(shingle)
for i in range(SIMHASH_BITS):
bit = (h >> i) & 1
if bit:
v[i] += 1
else:
v[i] -= 1
# Finalen Hash berechnen
simhash = 0
for i in range(SIMHASH_BITS):
if v[i] > 0:
simhash |= (1 << i)
return format(simhash, "016x")
def hamming_distance(hash1: str, hash2: str) -> int:
"""
Berechne Hamming-Distanz zwischen zwei SimHashes.
Die Hamming-Distanz ist die Anzahl der unterschiedlichen Bits.
Je kleiner, desto ähnlicher sind die Texte.
Typische Schwellenwerte:
- 0-3: Sehr ähnlich (wahrscheinlich Duplikat)
- 4-7: Ähnlich (gleiches Thema)
- 8+: Unterschiedlich
Returns:
Anzahl unterschiedlicher Bits (0-64)
"""
if not hash1 or not hash2:
return SIMHASH_BITS
try:
h1 = int(hash1, 16)
h2 = int(hash2, 16)
except ValueError:
return SIMHASH_BITS
xor = h1 ^ h2
return bin(xor).count("1")
def are_similar(hash1: str, hash2: str, threshold: int = 5) -> bool:
"""
Prüfe ob zwei Hashes auf ähnliche Texte hindeuten.
Args:
hash1: Erster SimHash
hash2: Zweiter SimHash
threshold: Maximale Hamming-Distanz für Ähnlichkeit
Returns:
True wenn Texte wahrscheinlich ähnlich sind
"""
return hamming_distance(hash1, hash2) <= threshold
def find_duplicates(items: list, hash_field: str = "content_hash",
threshold: int = 3) -> dict:
"""
Finde Duplikate/Cluster in einer Liste von Items.
Args:
items: Liste von Objekten mit hash_field Attribut
hash_field: Name des Attributs das den SimHash enthält
threshold: Max Hamming-Distanz für Duplikat-Erkennung
Returns:
Dict mit {item_id: cluster_id} für Duplikate
"""
clusters = {} # cluster_id -> list of items
item_to_cluster = {} # item_id -> cluster_id
cluster_counter = 0
for item in items:
item_id = getattr(item, "id", str(id(item)))
item_hash = getattr(item, hash_field, None)
if not item_hash:
continue
# Suche nach existierendem Cluster
found_cluster = None
for cluster_id, cluster_items in clusters.items():
for existing_item in cluster_items:
existing_hash = getattr(existing_item, hash_field, None)
if existing_hash and hamming_distance(item_hash, existing_hash) <= threshold:
found_cluster = cluster_id
break
if found_cluster:
break
if found_cluster:
clusters[found_cluster].append(item)
item_to_cluster[item_id] = found_cluster
else:
# Neuen Cluster starten
cluster_id = f"cluster_{cluster_counter}"
cluster_counter += 1
clusters[cluster_id] = [item]
item_to_cluster[item_id] = cluster_id
# Filtere Single-Item Cluster (keine echten Duplikate)
duplicates = {}
for item_id, cluster_id in item_to_cluster.items():
if len(clusters[cluster_id]) > 1:
duplicates[item_id] = cluster_id
return duplicates
def exact_url_duplicates(items: list, url_field: str = "canonical_url") -> set:
"""
Finde exakte URL-Duplikate.
Returns:
Set von Item-IDs die Duplikate sind (nicht das Original)
"""
seen_urls = {} # url -> first item id
duplicates = set()
for item in items:
item_id = getattr(item, "id", str(id(item)))
url = getattr(item, url_field, None)
if not url:
continue
if url in seen_urls:
duplicates.add(item_id)
else:
seen_urls[url] = item_id
return duplicates

View File

@@ -0,0 +1,458 @@
"""
Digest Generator fuer Wochenzusammenfassungen.
Generiert LLM-basierte Zusammenfassungen der wichtigsten Alerts:
- Gruppierung nach Wichtigkeit (Kritisch, Dringend, Wichtig, etc.)
- Kurze Zusammenfassung pro Kategorie
- HTML-Ausgabe fuer E-Mail und UI
- PDF-Export
Verwendung:
generator = DigestGenerator(db_session, llm_client)
digest = await generator.generate_weekly_digest(user_id)
"""
import uuid
from typing import List, Dict, Any, Optional
from datetime import datetime, timedelta
from dataclasses import dataclass
import json
import os
from ..db.models import (
AlertItemDB, AlertDigestDB, UserAlertSubscriptionDB,
ImportanceLevelEnum, DigestStatusEnum
)
@dataclass
class DigestSection:
"""Eine Sektion im Digest (z.B. Kritisch, Dringend)."""
importance_level: ImportanceLevelEnum
label_de: str
color: str
items: List[AlertItemDB]
summary: str = ""
@dataclass
class DigestContent:
"""Vollstaendiger Digest-Inhalt."""
user_id: str
period_start: datetime
period_end: datetime
sections: List[DigestSection]
total_alerts: int
critical_count: int
urgent_count: int
introduction: str = ""
html: str = ""
class DigestGenerator:
"""
Generiert Wochenzusammenfassungen fuer Alerts.
Unterstuetzt:
- Lokale Ollama-Modelle
- OpenAI API
- Anthropic API
"""
def __init__(
self,
db_session,
llm_provider: str = "ollama",
llm_model: str = "llama3.2:3b"
):
"""
Initialisiere den Digest Generator.
Args:
db_session: SQLAlchemy Session
llm_provider: "ollama", "openai", oder "anthropic"
llm_model: Modellname
"""
self.db = db_session
self.llm_provider = llm_provider
self.llm_model = llm_model
async def generate_weekly_digest(
self,
user_id: str,
weeks_back: int = 1
) -> Optional[AlertDigestDB]:
"""
Generiere einen Wochendigest fuer einen User.
Args:
user_id: User-ID
weeks_back: Wie viele Wochen zurueck (default: letzte Woche)
Returns:
AlertDigestDB oder None bei Fehler
"""
# Zeitraum berechnen
now = datetime.utcnow()
period_end = now - timedelta(days=now.weekday()) # Montag dieser Woche
period_start = period_end - timedelta(weeks=weeks_back)
# Alerts laden
alerts = self._load_alerts_for_period(user_id, period_start, period_end)
if not alerts:
return None
# Nach Wichtigkeit gruppieren
sections = self._group_by_importance(alerts)
# Digest-Content erstellen
content = DigestContent(
user_id=user_id,
period_start=period_start,
period_end=period_end,
sections=sections,
total_alerts=len(alerts),
critical_count=len([a for a in alerts if a.importance_level == ImportanceLevelEnum.KRITISCH]),
urgent_count=len([a for a in alerts if a.importance_level == ImportanceLevelEnum.DRINGEND])
)
# LLM-Zusammenfassungen generieren
await self._generate_summaries(content)
# HTML generieren
content.html = self._generate_html(content)
# In DB speichern
digest = self._save_digest(content)
return digest
def _load_alerts_for_period(
self,
user_id: str,
start: datetime,
end: datetime
) -> List[AlertItemDB]:
"""Lade alle Alerts fuer einen Zeitraum."""
return self.db.query(AlertItemDB).filter(
AlertItemDB.user_id == user_id,
AlertItemDB.fetched_at >= start,
AlertItemDB.fetched_at < end,
AlertItemDB.status != "dropped"
).order_by(AlertItemDB.fetched_at.desc()).all()
def _group_by_importance(
self,
alerts: List[AlertItemDB]
) -> List[DigestSection]:
"""Gruppiere Alerts nach Wichtigkeit."""
importance_config = [
(ImportanceLevelEnum.KRITISCH, "Kritisch", "#dc2626"),
(ImportanceLevelEnum.DRINGEND, "Dringend", "#ea580c"),
(ImportanceLevelEnum.WICHTIG, "Wichtig", "#d97706"),
(ImportanceLevelEnum.PRUEFEN, "Zu pruefen", "#2563eb"),
(ImportanceLevelEnum.INFO, "Info", "#64748b"),
]
sections = []
for level, label, color in importance_config:
items = [a for a in alerts if a.importance_level == level]
if items:
sections.append(DigestSection(
importance_level=level,
label_de=label,
color=color,
items=items[:5] # Max 5 pro Kategorie
))
return sections
async def _generate_summaries(self, content: DigestContent):
"""Generiere LLM-basierte Zusammenfassungen."""
# Einleitung generieren
content.introduction = await self._generate_introduction(content)
# Zusammenfassungen pro Sektion
for section in content.sections:
section.summary = await self._generate_section_summary(section)
async def _generate_introduction(self, content: DigestContent) -> str:
"""Generiere eine einleitende Zusammenfassung."""
prompt = f"""Du bist ein Assistent fuer Schulleitungen und Lehrkraefte in Deutschland.
Schreibe eine kurze Einleitung (2-3 Saetze) fuer einen Wochenbericht.
Zeitraum: {content.period_start.strftime('%d.%m.%Y')} - {content.period_end.strftime('%d.%m.%Y')}
Gesamt: {content.total_alerts} Meldungen
Kritisch: {content.critical_count}
Dringend: {content.urgent_count}
Schreibe auf Deutsch in einfacher Sprache (B1/B2 Niveau).
Beginne mit "Diese Woche..." oder "In der vergangenen Woche..."."""
return await self._call_llm(prompt, max_tokens=150)
async def _generate_section_summary(self, section: DigestSection) -> str:
"""Generiere Zusammenfassung fuer eine Sektion."""
if not section.items:
return ""
titles = "\n".join([f"- {item.title}" for item in section.items[:5]])
prompt = f"""Fasse diese {len(section.items)} Meldungen der Kategorie "{section.label_de}" in 1-2 Saetzen zusammen:
{titles}
Schreibe auf Deutsch in einfacher Sprache. Nenne die wichtigsten Handlungsbedarfe."""
return await self._call_llm(prompt, max_tokens=100)
async def _call_llm(self, prompt: str, max_tokens: int = 200) -> str:
"""Rufe das LLM auf."""
try:
if self.llm_provider == "ollama":
return await self._call_ollama(prompt, max_tokens)
elif self.llm_provider == "openai":
return await self._call_openai(prompt, max_tokens)
elif self.llm_provider == "anthropic":
return await self._call_anthropic(prompt, max_tokens)
else:
return self._generate_fallback_summary(prompt)
except Exception as e:
print(f"LLM call failed: {e}")
return self._generate_fallback_summary(prompt)
async def _call_ollama(self, prompt: str, max_tokens: int) -> str:
"""Rufe lokales Ollama-Modell auf."""
import httpx
try:
async with httpx.AsyncClient(timeout=30.0) as client:
response = await client.post(
"http://localhost:11434/api/generate",
json={
"model": self.llm_model,
"prompt": prompt,
"stream": False,
"options": {
"num_predict": max_tokens,
"temperature": 0.7
}
}
)
if response.status_code == 200:
data = response.json()
return data.get("response", "").strip()
except Exception as e:
print(f"Ollama error: {e}")
return self._generate_fallback_summary(prompt)
async def _call_openai(self, prompt: str, max_tokens: int) -> str:
"""Rufe OpenAI API auf."""
import httpx
api_key = os.getenv("OPENAI_API_KEY")
if not api_key:
return self._generate_fallback_summary(prompt)
try:
async with httpx.AsyncClient(timeout=30.0) as client:
response = await client.post(
"https://api.openai.com/v1/chat/completions",
headers={
"Authorization": f"Bearer {api_key}",
"Content-Type": "application/json"
},
json={
"model": self.llm_model or "gpt-4o-mini",
"messages": [{"role": "user", "content": prompt}],
"max_tokens": max_tokens,
"temperature": 0.7
}
)
if response.status_code == 200:
data = response.json()
return data["choices"][0]["message"]["content"].strip()
except Exception as e:
print(f"OpenAI error: {e}")
return self._generate_fallback_summary(prompt)
async def _call_anthropic(self, prompt: str, max_tokens: int) -> str:
"""Rufe Anthropic API auf."""
import httpx
api_key = os.getenv("ANTHROPIC_API_KEY")
if not api_key:
return self._generate_fallback_summary(prompt)
try:
async with httpx.AsyncClient(timeout=30.0) as client:
response = await client.post(
"https://api.anthropic.com/v1/messages",
headers={
"x-api-key": api_key,
"anthropic-version": "2023-06-01",
"Content-Type": "application/json"
},
json={
"model": self.llm_model or "claude-3-5-sonnet-latest",
"max_tokens": max_tokens,
"messages": [{"role": "user", "content": prompt}]
}
)
if response.status_code == 200:
data = response.json()
return data["content"][0]["text"].strip()
except Exception as e:
print(f"Anthropic error: {e}")
return self._generate_fallback_summary(prompt)
def _generate_fallback_summary(self, prompt: str) -> str:
"""Fallback ohne LLM."""
if "Einleitung" in prompt or "Wochenbericht" in prompt:
return "Diese Woche haben Sie neue relevante Meldungen erhalten. Hier ist Ihre Zusammenfassung."
return "Mehrere relevante Meldungen zu diesem Thema."
def _generate_html(self, content: DigestContent) -> str:
"""Generiere HTML fuer den Digest."""
sections_html = ""
for section in content.sections:
items_html = ""
for item in section.items:
items_html += f"""
<tr>
<td style="padding: 12px; border-bottom: 1px solid #e2e8f0;">
<div style="font-weight: 500; color: #1e293b; margin-bottom: 4px;">{item.title}</div>
<div style="font-size: 12px; color: #64748b;">{item.source_name or 'Unbekannt'}</div>
</td>
<td style="padding: 12px; border-bottom: 1px solid #e2e8f0; text-align: right;">
<a href="{item.url or '#'}" style="color: #3b82f6; text-decoration: none;">Oeffnen</a>
</td>
</tr>
"""
sections_html += f"""
<div style="margin-bottom: 24px;">
<div style="display: flex; align-items: center; margin-bottom: 12px;">
<span style="display: inline-block; width: 12px; height: 12px; background: {section.color}; border-radius: 50%; margin-right: 8px;"></span>
<h3 style="margin: 0; font-size: 18px; color: #1e293b;">{section.label_de}</h3>
<span style="margin-left: 8px; font-size: 14px; color: #64748b;">({len(section.items)} Meldungen)</span>
</div>
{f'<p style="font-size: 14px; color: #475569; margin-bottom: 12px;">{section.summary}</p>' if section.summary else ''}
<table style="width: 100%; border-collapse: collapse;">
{items_html}
</table>
</div>
"""
return f"""
<!DOCTYPE html>
<html>
<head>
<meta charset="UTF-8">
<title>Wochenbericht - BreakPilot Alerts</title>
</head>
<body style="font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, sans-serif; max-width: 700px; margin: 0 auto; padding: 20px; background: #f8fafc;">
<div style="background: white; border-radius: 12px; padding: 32px; box-shadow: 0 1px 3px rgba(0,0,0,0.1);">
<!-- Header -->
<div style="text-align: center; margin-bottom: 32px; padding-bottom: 24px; border-bottom: 1px solid #e2e8f0;">
<h1 style="margin: 0 0 8px 0; font-size: 24px; color: #1e293b;">Wochenbericht</h1>
<p style="margin: 0; color: #64748b; font-size: 14px;">
{content.period_start.strftime('%d.%m.%Y')} - {content.period_end.strftime('%d.%m.%Y')}
</p>
</div>
<!-- Stats -->
<div style="display: flex; justify-content: center; gap: 32px; margin-bottom: 32px; padding: 16px; background: #f1f5f9; border-radius: 8px;">
<div style="text-align: center;">
<div style="font-size: 28px; font-weight: 700; color: #1e293b;">{content.total_alerts}</div>
<div style="font-size: 12px; color: #64748b;">Gesamt</div>
</div>
<div style="text-align: center;">
<div style="font-size: 28px; font-weight: 700; color: #dc2626;">{content.critical_count}</div>
<div style="font-size: 12px; color: #64748b;">Kritisch</div>
</div>
<div style="text-align: center;">
<div style="font-size: 28px; font-weight: 700; color: #ea580c;">{content.urgent_count}</div>
<div style="font-size: 12px; color: #64748b;">Dringend</div>
</div>
</div>
<!-- Introduction -->
{f'<p style="font-size: 15px; color: #334155; line-height: 1.6; margin-bottom: 24px;">{content.introduction}</p>' if content.introduction else ''}
<!-- Sections -->
{sections_html}
<!-- Footer -->
<div style="margin-top: 32px; padding-top: 24px; border-top: 1px solid #e2e8f0; text-align: center; font-size: 12px; color: #94a3b8;">
<p>Dieser Bericht wurde automatisch von BreakPilot Alerts erstellt.</p>
<p><a href="#" style="color: #3b82f6; text-decoration: none;">Einstellungen anpassen</a> | <a href="#" style="color: #3b82f6; text-decoration: none;">Abmelden</a></p>
</div>
</div>
</body>
</html>
"""
def _save_digest(self, content: DigestContent) -> AlertDigestDB:
"""Speichere Digest in der Datenbank."""
# Finde Subscription fuer User
subscription = self.db.query(UserAlertSubscriptionDB).filter(
UserAlertSubscriptionDB.user_id == content.user_id,
UserAlertSubscriptionDB.is_active == True
).first()
digest = AlertDigestDB(
id=str(uuid.uuid4()),
subscription_id=subscription.id if subscription else None,
user_id=content.user_id,
period_start=content.period_start,
period_end=content.period_end,
summary_html=content.html,
total_alerts=content.total_alerts,
critical_count=content.critical_count,
urgent_count=content.urgent_count,
important_count=sum(len(s.items) for s in content.sections if s.importance_level == ImportanceLevelEnum.WICHTIG),
review_count=sum(len(s.items) for s in content.sections if s.importance_level == ImportanceLevelEnum.PRUEFEN),
info_count=sum(len(s.items) for s in content.sections if s.importance_level == ImportanceLevelEnum.INFO),
status=DigestStatusEnum.PENDING
)
self.db.add(digest)
self.db.commit()
self.db.refresh(digest)
return digest
async def generate_digest_for_all_users(db_session) -> int:
"""
Generiere Digests fuer alle aktiven Subscriptions.
Wird vom Scheduler (z.B. Celery, APScheduler) aufgerufen.
Returns:
Anzahl generierter Digests
"""
# Finde alle aktiven Subscriptions mit Digest aktiviert
subscriptions = db_session.query(UserAlertSubscriptionDB).filter(
UserAlertSubscriptionDB.is_active == True,
UserAlertSubscriptionDB.digest_enabled == True
).all()
generator = DigestGenerator(db_session)
count = 0
for sub in subscriptions:
try:
digest = await generator.generate_weekly_digest(sub.user_id)
if digest:
count += 1
except Exception as e:
print(f"Error generating digest for user {sub.user_id}: {e}")
return count

View File

@@ -0,0 +1,341 @@
"""
Importance Mapping für Guided Mode.
Konvertiert Relevanz-Scores (0.0-1.0) in 5-stufige Wichtigkeitsstufen:
- KRITISCH (90-100%): Sofortiges Handeln erforderlich
- DRINGEND (75-90%): Wichtig, bald handeln
- WICHTIG (60-75%): Beachtenswert
- PRÜFEN (40-60%): Eventuell relevant
- INFO (0-40%): Zur Kenntnisnahme
Zusätzlich: Generierung von "Warum relevant?"-Erklärungen und nächsten Schritten.
"""
from typing import Optional, List, Dict, Any
from datetime import datetime, timedelta
import re
from ..db.models import ImportanceLevelEnum, AlertItemDB
# Re-export fuer einfacheren Import
__all__ = [
'ImportanceLevelEnum',
'score_to_importance',
'importance_to_label_de',
'importance_to_color',
'extract_deadline',
'generate_why_relevant',
'generate_next_steps',
'enrich_alert_for_guided_mode',
'batch_enrich_alerts',
'filter_by_importance',
]
# Standard-Schwellenwerte für Importance-Mapping
DEFAULT_THRESHOLDS = {
"kritisch": 0.90,
"dringend": 0.75,
"wichtig": 0.60,
"pruefen": 0.40,
}
def score_to_importance(
score: float,
thresholds: Dict[str, float] = None
) -> ImportanceLevelEnum:
"""
Konvertiere Relevanz-Score zu Importance-Level.
Args:
score: Relevanz-Score (0.0 - 1.0)
thresholds: Optionale benutzerdefinierte Schwellenwerte
Returns:
ImportanceLevelEnum
"""
if score is None:
return ImportanceLevelEnum.INFO
thresholds = thresholds or DEFAULT_THRESHOLDS
if score >= thresholds.get("kritisch", 0.90):
return ImportanceLevelEnum.KRITISCH
elif score >= thresholds.get("dringend", 0.75):
return ImportanceLevelEnum.DRINGEND
elif score >= thresholds.get("wichtig", 0.60):
return ImportanceLevelEnum.WICHTIG
elif score >= thresholds.get("pruefen", 0.40):
return ImportanceLevelEnum.PRUEFEN
else:
return ImportanceLevelEnum.INFO
def importance_to_label_de(importance: ImportanceLevelEnum) -> str:
"""Deutsches Label für Importance-Level."""
labels = {
ImportanceLevelEnum.KRITISCH: "Kritisch",
ImportanceLevelEnum.DRINGEND: "Dringend",
ImportanceLevelEnum.WICHTIG: "Wichtig",
ImportanceLevelEnum.PRUEFEN: "Zu prüfen",
ImportanceLevelEnum.INFO: "Info",
}
return labels.get(importance, "Info")
def importance_to_color(importance: ImportanceLevelEnum) -> str:
"""CSS-Farbe für Importance-Level (Tailwind-kompatibel)."""
colors = {
ImportanceLevelEnum.KRITISCH: "red",
ImportanceLevelEnum.DRINGEND: "orange",
ImportanceLevelEnum.WICHTIG: "amber",
ImportanceLevelEnum.PRUEFEN: "blue",
ImportanceLevelEnum.INFO: "slate",
}
return colors.get(importance, "slate")
def extract_deadline(text: str) -> Optional[datetime]:
"""
Extrahiere Deadline/Frist aus Text.
Sucht nach Mustern wie:
- "bis zum 15.03.2026"
- "Frist: 1. April"
- "Anmeldeschluss: 30.11."
"""
# Deutsche Datumsformate
patterns = [
r"bis\s+(?:zum\s+)?(\d{1,2})\.(\d{1,2})\.(\d{4})",
r"Frist[:\s]+(\d{1,2})\.(\d{1,2})\.(\d{4})",
r"(?:Anmelde|Bewerbungs)schluss[:\s]+(\d{1,2})\.(\d{1,2})\.?(?:(\d{4}))?",
r"endet\s+am\s+(\d{1,2})\.(\d{1,2})\.(\d{4})?",
]
for pattern in patterns:
match = re.search(pattern, text, re.IGNORECASE)
if match:
day = int(match.group(1))
month = int(match.group(2))
year = int(match.group(3)) if match.group(3) else datetime.now().year
try:
return datetime(year, month, day)
except ValueError:
continue
return None
def generate_why_relevant(
alert: AlertItemDB,
profile_priorities: List[Dict[str, Any]] = None,
matched_keywords: List[str] = None
) -> str:
"""
Generiere "Warum relevant?"-Erklärung für einen Alert.
Args:
alert: Der Alert
profile_priorities: Prioritäten aus dem User-Profil
matched_keywords: Keywords, die gematcht haben
Returns:
Deutsche Erklärung (1-2 Bulletpoints)
"""
reasons = []
# Deadline-basierte Relevanz
deadline = extract_deadline(f"{alert.title} {alert.snippet}")
if deadline:
days_until = (deadline - datetime.now()).days
if days_until <= 0:
reasons.append("Frist abgelaufen oder heute!")
elif days_until <= 7:
reasons.append(f"Frist endet in {days_until} Tagen")
elif days_until <= 30:
reasons.append(f"Frist in ca. {days_until} Tagen")
# Keyword-basierte Relevanz
if matched_keywords and len(matched_keywords) > 0:
keywords_str = ", ".join(matched_keywords[:3])
reasons.append(f"Enthält relevante Begriffe: {keywords_str}")
# Prioritäten-basierte Relevanz
if profile_priorities:
for priority in profile_priorities[:2]:
label = priority.get("label", "")
keywords = priority.get("keywords", [])
text_lower = f"{alert.title} {alert.snippet}".lower()
for kw in keywords:
if kw.lower() in text_lower:
reasons.append(f"Passt zu Ihrem Interesse: {label}")
break
# Score-basierte Relevanz
if alert.relevance_score and alert.relevance_score >= 0.8:
reasons.append("Hohe Übereinstimmung mit Ihrem Profil")
# Fallback
if not reasons:
reasons.append("Passt zu Ihren ausgewählten Themen")
# Formatiere als Bulletpoints
return "".join(reasons[:2])
def generate_next_steps(
alert: AlertItemDB,
template_slug: str = None
) -> List[str]:
"""
Generiere empfohlene nächste Schritte.
Basiert auf Template-Typ und Alert-Inhalt.
"""
steps = []
text = f"{alert.title} {alert.snippet}".lower()
# Template-spezifische Schritte
if template_slug == "foerderprogramme":
if "antrag" in text or "förder" in text:
steps.append("Schulträger über Fördermöglichkeit informieren")
steps.append("Antragsunterlagen prüfen")
if "frist" in text or "deadline" in text:
steps.append("Termin in Kalender eintragen")
elif template_slug == "datenschutz-recht":
if "dsgvo" in text or "datenschutz" in text:
steps.append("Datenschutzbeauftragten informieren")
steps.append("Prüfen, ob Handlungsbedarf besteht")
if "urteil" in text or "gericht" in text:
steps.append("Rechtsfolgen für die Schule prüfen")
elif template_slug == "it-security":
if "cve" in text or "sicherheitslücke" in text:
steps.append("Betroffene Systeme prüfen")
steps.append("Update/Patch einspielen")
if "phishing" in text:
steps.append("Kollegium warnen")
steps.append("Erkennungsmerkmale kommunizieren")
elif template_slug == "abitur-updates":
if "abitur" in text or "prüfung" in text:
steps.append("Fachschaften informieren")
steps.append("Anpassung der Kursplanung prüfen")
elif template_slug == "fortbildungen":
steps.append("Termin und Ort prüfen")
steps.append("Bei Interesse: Anmeldung vornehmen")
elif template_slug == "wettbewerbe-projekte":
steps.append("Passende Schülergruppe identifizieren")
steps.append("Anmeldefrist beachten")
# Allgemeine Schritte als Fallback
if not steps:
steps.append("Quelle öffnen und Details lesen")
if "frist" in text or "bis" in text:
steps.append("Termin notieren")
return steps[:3] # Maximal 3 Schritte
def enrich_alert_for_guided_mode(
alert: AlertItemDB,
profile_priorities: List[Dict[str, Any]] = None,
template_slug: str = None,
importance_thresholds: Dict[str, float] = None
) -> AlertItemDB:
"""
Reichere Alert mit Guided-Mode-spezifischen Feldern an.
Setzt:
- importance_level
- why_relevant
- next_steps
- action_deadline
Args:
alert: Der Alert
profile_priorities: Prioritäten aus dem User-Profil
template_slug: Slug des aktiven Templates
importance_thresholds: Optionale Schwellenwerte
Returns:
Der angereicherte Alert
"""
# Importance Level
alert.importance_level = score_to_importance(
alert.relevance_score,
importance_thresholds
)
# Why Relevant
alert.why_relevant = generate_why_relevant(alert, profile_priorities)
# Next Steps
alert.next_steps = generate_next_steps(alert, template_slug)
# Action Deadline
deadline = extract_deadline(f"{alert.title} {alert.snippet}")
if deadline:
alert.action_deadline = deadline
return alert
def batch_enrich_alerts(
alerts: List[AlertItemDB],
profile_priorities: List[Dict[str, Any]] = None,
template_slug: str = None,
importance_thresholds: Dict[str, float] = None
) -> List[AlertItemDB]:
"""
Reichere mehrere Alerts für Guided Mode an.
"""
return [
enrich_alert_for_guided_mode(
alert,
profile_priorities,
template_slug,
importance_thresholds
)
for alert in alerts
]
def filter_by_importance(
alerts: List[AlertItemDB],
min_level: ImportanceLevelEnum = ImportanceLevelEnum.INFO,
max_count: int = 10
) -> List[AlertItemDB]:
"""
Filtere Alerts nach Mindest-Importance und limitiere Anzahl.
Sortiert nach Importance (höchste zuerst).
"""
# Importance-Ranking (höher = wichtiger)
importance_rank = {
ImportanceLevelEnum.KRITISCH: 5,
ImportanceLevelEnum.DRINGEND: 4,
ImportanceLevelEnum.WICHTIG: 3,
ImportanceLevelEnum.PRUEFEN: 2,
ImportanceLevelEnum.INFO: 1,
}
min_rank = importance_rank.get(min_level, 1)
# Filter
filtered = [
a for a in alerts
if importance_rank.get(a.importance_level, 1) >= min_rank
]
# Sortiere nach Importance (absteigend)
filtered.sort(
key=lambda a: importance_rank.get(a.importance_level, 1),
reverse=True
)
return filtered[:max_count]

View File

@@ -0,0 +1,390 @@
"""
Relevance Scorer für Alerts.
Nutzt das LLM Gateway um Alerts auf Relevanz zu prüfen.
Berücksichtigt das Nutzerprofil für personalisierte Filterung.
"""
import json
import logging
import re
from dataclasses import dataclass, field
from datetime import datetime
from enum import Enum
from typing import Optional
import httpx
from ..models.alert_item import AlertItem, AlertStatus
from ..models.relevance_profile import RelevanceProfile
logger = logging.getLogger(__name__)
class RelevanceDecision(str, Enum):
"""Relevanz-Entscheidung für einen Alert."""
KEEP = "KEEP" # Relevant, in Inbox anzeigen
DROP = "DROP" # Irrelevant, automatisch archivieren
REVIEW = "REVIEW" # Unsicher, Nutzer soll entscheiden
@dataclass
class ScoringResult:
"""Ergebnis des Relevanz-Scorings."""
alert_id: str
score: float # 0.0 - 1.0
decision: RelevanceDecision
reason_codes: list = field(default_factory=list)
summary: Optional[str] = None
entities: dict = field(default_factory=dict) # Extrahierte Entitäten
model_version: str = ""
prompt_version: str = "1.0"
scored_at: datetime = field(default_factory=datetime.utcnow)
error: Optional[str] = None
def to_dict(self) -> dict:
return {
"alert_id": self.alert_id,
"score": self.score,
"decision": self.decision.value,
"reason_codes": self.reason_codes,
"summary": self.summary,
"entities": self.entities,
"model_version": self.model_version,
"prompt_version": self.prompt_version,
"scored_at": self.scored_at.isoformat(),
"error": self.error,
}
# System Prompt für Relevanz-Scoring
RELEVANCE_SYSTEM_PROMPT = """Du bist ein Relevanz-Filter für News-Alerts. Deine Aufgabe ist es, zu bewerten, ob ein Alert für den Nutzer relevant ist.
## Deine Aufgaben:
1. Analysiere den Alert-Titel und Snippet
2. Berücksichtige das Nutzerprofil (Prioritäten, Ausschlüsse, Beispiele)
3. Gib eine Relevanz-Bewertung ab
## Bewertungskriterien:
- **KEEP** (Score 0.7-1.0): Alert ist klar relevant für die Prioritäten des Nutzers
- **REVIEW** (Score 0.4-0.7): Möglicherweise relevant, Nutzer sollte entscheiden
- **DROP** (Score 0.0-0.4): Nicht relevant, kann ignoriert werden
## Ausschluss-Gründe (automatisch DROP):
- Stellenanzeigen, Werbung, Pressemitteilungen (außer hochrelevant)
- Duplicate/sehr ähnliche Meldung zu kürzlichem Alert
- Thema in Ausschlussliste des Nutzers
## Output-Format:
Du MUSST mit einem JSON-Objekt antworten (keine Markdown-Codeblöcke, nur das JSON):
{
"score": 0.85,
"decision": "KEEP",
"reason_codes": ["matches_priority_inklusion", "recent_news"],
"summary": "Kurze Zusammenfassung des Alerts (1-2 Sätze)",
"entities": {
"topics": ["Inklusion", "Bayern"],
"organizations": ["Kultusministerium"],
"date_context": "aktuell"
}
}
Wichtig:
- score ist eine Zahl zwischen 0.0 und 1.0
- decision ist entweder "KEEP", "DROP" oder "REVIEW"
- reason_codes sind kurze, maschinenlesbare Codes
- summary ist auf Deutsch
"""
class RelevanceScorer:
"""
Scorer für Alert-Relevanz.
Nutzt das LLM Gateway für die Bewertung.
"""
def __init__(
self,
gateway_url: str = "http://localhost:8000/llm",
api_key: str = "",
model: str = "breakpilot-teacher-8b",
timeout: int = 30,
):
"""
Initialisiere RelevanceScorer.
Args:
gateway_url: URL des LLM Gateway
api_key: API Key für Gateway
model: Modell für Scoring
timeout: HTTP Timeout
"""
self.gateway_url = gateway_url.rstrip("/")
self.api_key = api_key
self.model = model
self.timeout = timeout
self._client: Optional[httpx.AsyncClient] = None
# Schwellenwerte
self.keep_threshold = 0.7
self.drop_threshold = 0.4
async def _get_client(self) -> httpx.AsyncClient:
"""Hole oder erstelle HTTP Client."""
if self._client is None or self._client.is_closed:
self._client = httpx.AsyncClient(
timeout=self.timeout,
headers={
"X-API-Key": self.api_key,
"Content-Type": "application/json",
},
)
return self._client
async def close(self) -> None:
"""Schließe HTTP Client."""
if self._client:
await self._client.aclose()
self._client = None
def _build_user_prompt(self, alert: AlertItem) -> str:
"""Erstelle User-Prompt für einen Alert."""
parts = [
f"## Alert zu bewerten\n",
f"**Thema-Label:** {alert.topic_label}",
f"**Titel:** {alert.title}",
]
if alert.snippet:
# Snippet auf 500 Zeichen begrenzen
snippet = alert.snippet[:500]
if len(alert.snippet) > 500:
snippet += "..."
parts.append(f"**Snippet:** {snippet}")
if alert.url:
parts.append(f"**URL:** {alert.url}")
if alert.published_at:
parts.append(f"**Veröffentlicht:** {alert.published_at.strftime('%Y-%m-%d')}")
parts.append("\nBewerte diesen Alert und antworte NUR mit dem JSON-Objekt.")
return "\n".join(parts)
def _build_system_prompt(self, profile: Optional[RelevanceProfile] = None) -> str:
"""Erstelle System-Prompt mit optionalem Profil."""
system = RELEVANCE_SYSTEM_PROMPT
if profile:
system += "\n\n" + profile.get_prompt_context()
return system
def _parse_response(self, text: str, alert_id: str) -> ScoringResult:
"""Parse LLM Response in ScoringResult."""
try:
# Versuche JSON zu extrahieren
# Manchmal wrapped das LLM in Markdown Code-Blocks
json_match = re.search(r"\{[\s\S]*\}", text)
if not json_match:
raise ValueError("Kein JSON in Response gefunden")
data = json.loads(json_match.group())
score = float(data.get("score", 0.5))
score = max(0.0, min(1.0, score)) # Clamp to 0-1
decision_str = data.get("decision", "REVIEW").upper()
try:
decision = RelevanceDecision(decision_str)
except ValueError:
# Fallback basierend auf Score
if score >= self.keep_threshold:
decision = RelevanceDecision.KEEP
elif score <= self.drop_threshold:
decision = RelevanceDecision.DROP
else:
decision = RelevanceDecision.REVIEW
return ScoringResult(
alert_id=alert_id,
score=score,
decision=decision,
reason_codes=data.get("reason_codes", []),
summary=data.get("summary"),
entities=data.get("entities", {}),
model_version=self.model,
)
except json.JSONDecodeError as e:
logger.warning(f"JSON Parse Error für Alert {alert_id}: {e}")
return ScoringResult(
alert_id=alert_id,
score=0.5,
decision=RelevanceDecision.REVIEW,
reason_codes=["parse_error"],
error=f"JSON parse error: {str(e)}",
model_version=self.model,
)
except Exception as e:
logger.error(f"Unexpected error parsing response: {e}")
return ScoringResult(
alert_id=alert_id,
score=0.5,
decision=RelevanceDecision.REVIEW,
reason_codes=["error"],
error=str(e),
model_version=self.model,
)
async def score_alert(
self,
alert: AlertItem,
profile: Optional[RelevanceProfile] = None,
) -> ScoringResult:
"""
Bewerte einen einzelnen Alert.
Args:
alert: Der zu bewertende Alert
profile: Optional Nutzerprofil für personalisierte Bewertung
Returns:
ScoringResult mit Bewertung
"""
try:
client = await self._get_client()
# Request Body
payload = {
"model": self.model,
"messages": [
{"role": "system", "content": self._build_system_prompt(profile)},
{"role": "user", "content": self._build_user_prompt(alert)},
],
"temperature": 0.3, # Niedrig für konsistentere Ergebnisse
"max_tokens": 500,
}
response = await client.post(
f"{self.gateway_url}/v1/chat/completions",
json=payload,
)
response.raise_for_status()
data = response.json()
content = data["choices"][0]["message"]["content"]
result = self._parse_response(content, alert.id)
# Update Alert
alert.relevance_score = result.score
alert.relevance_decision = result.decision.value
alert.relevance_reasons = result.reason_codes
alert.relevance_summary = result.summary
alert.status = AlertStatus.SCORED
return result
except httpx.HTTPStatusError as e:
error_msg = f"HTTP {e.response.status_code}: {e.response.text[:200]}"
logger.error(f"Gateway Error für Alert {alert.id}: {error_msg}")
return ScoringResult(
alert_id=alert.id,
score=0.5,
decision=RelevanceDecision.REVIEW,
reason_codes=["gateway_error"],
error=error_msg,
model_version=self.model,
)
except Exception as e:
logger.exception(f"Scoring Error für Alert {alert.id}")
return ScoringResult(
alert_id=alert.id,
score=0.5,
decision=RelevanceDecision.REVIEW,
reason_codes=["error"],
error=str(e),
model_version=self.model,
)
async def score_batch(
self,
alerts: list[AlertItem],
profile: Optional[RelevanceProfile] = None,
skip_scored: bool = True,
) -> list[ScoringResult]:
"""
Bewerte mehrere Alerts.
Args:
alerts: Liste von Alerts
profile: Nutzerprofil
skip_scored: Bereits bewertete überspringen
Returns:
Liste von ScoringResults
"""
results = []
for alert in alerts:
if skip_scored and alert.status == AlertStatus.SCORED:
logger.debug(f"Alert {alert.id} bereits bewertet, überspringe")
continue
result = await self.score_alert(alert, profile)
results.append(result)
# Kurze Pause zwischen Requests um Rate Limits zu vermeiden
# await asyncio.sleep(0.1)
return results
def get_stats(self, results: list[ScoringResult]) -> dict:
"""Generiere Statistiken über Scoring-Ergebnisse."""
total = len(results)
if total == 0:
return {"total": 0}
keep = sum(1 for r in results if r.decision == RelevanceDecision.KEEP)
drop = sum(1 for r in results if r.decision == RelevanceDecision.DROP)
review = sum(1 for r in results if r.decision == RelevanceDecision.REVIEW)
errors = sum(1 for r in results if r.error)
avg_score = sum(r.score for r in results) / total
return {
"total": total,
"keep": keep,
"drop": drop,
"review": review,
"errors": errors,
"keep_rate": keep / total,
"drop_rate": drop / total,
"avg_score": avg_score,
}
# Singleton Instance
_scorer_instance: Optional[RelevanceScorer] = None
def get_relevance_scorer(
gateway_url: str = "http://localhost:8000/llm",
api_key: str = "",
model: str = "breakpilot-teacher-8b",
) -> RelevanceScorer:
"""Hole Singleton RelevanceScorer Instanz."""
global _scorer_instance
if _scorer_instance is None:
_scorer_instance = RelevanceScorer(
gateway_url=gateway_url,
api_key=api_key,
model=model,
)
return _scorer_instance

View File

@@ -0,0 +1,512 @@
"""
Rule Engine für Alerts Agent.
Evaluiert Regeln gegen Alert-Items und führt Aktionen aus.
Regel-Struktur:
- Bedingungen: [{field, operator, value}, ...] (AND-verknüpft)
- Aktion: keep, drop, tag, email, webhook, slack
- Priorität: Höhere Priorität wird zuerst evaluiert
"""
import re
import logging
from dataclasses import dataclass
from typing import List, Dict, Any, Optional, Callable
from enum import Enum
from alerts_agent.db.models import AlertItemDB, AlertRuleDB, RuleActionEnum
logger = logging.getLogger(__name__)
class ConditionOperator(str, Enum):
"""Operatoren für Regel-Bedingungen."""
CONTAINS = "contains"
NOT_CONTAINS = "not_contains"
EQUALS = "equals"
NOT_EQUALS = "not_equals"
STARTS_WITH = "starts_with"
ENDS_WITH = "ends_with"
REGEX = "regex"
GREATER_THAN = "gt"
LESS_THAN = "lt"
GREATER_EQUAL = "gte"
LESS_EQUAL = "lte"
IN_LIST = "in"
NOT_IN_LIST = "not_in"
@dataclass
class RuleCondition:
"""Eine einzelne Regel-Bedingung."""
field: str # "title", "snippet", "url", "source", "relevance_score"
operator: ConditionOperator
value: Any # str, float, list
@classmethod
def from_dict(cls, data: Dict) -> "RuleCondition":
"""Erstellt eine Bedingung aus einem Dict."""
return cls(
field=data.get("field", ""),
operator=ConditionOperator(data.get("operator", data.get("op", "contains"))),
value=data.get("value", ""),
)
@dataclass
class RuleMatch:
"""Ergebnis einer Regel-Evaluierung."""
rule_id: str
rule_name: str
matched: bool
action: RuleActionEnum
action_config: Dict[str, Any]
conditions_met: List[str] # Welche Bedingungen haben gematched
def get_field_value(alert: AlertItemDB, field: str) -> Any:
"""
Extrahiert einen Feldwert aus einem Alert.
Args:
alert: Alert-Item
field: Feldname
Returns:
Feldwert oder None
"""
field_map = {
"title": alert.title,
"snippet": alert.snippet,
"url": alert.url,
"source": alert.source.value if alert.source else "",
"status": alert.status.value if alert.status else "",
"relevance_score": alert.relevance_score,
"relevance_decision": alert.relevance_decision.value if alert.relevance_decision else "",
"lang": alert.lang,
"topic_id": alert.topic_id,
}
return field_map.get(field)
def evaluate_condition(
alert: AlertItemDB,
condition: RuleCondition,
) -> bool:
"""
Evaluiert eine einzelne Bedingung gegen einen Alert.
Args:
alert: Alert-Item
condition: Zu evaluierende Bedingung
Returns:
True wenn Bedingung erfüllt
"""
field_value = get_field_value(alert, condition.field)
if field_value is None:
return False
op = condition.operator
target = condition.value
try:
# String-Operationen (case-insensitive)
if isinstance(field_value, str):
field_lower = field_value.lower()
target_lower = str(target).lower() if isinstance(target, str) else target
if op == ConditionOperator.CONTAINS:
return target_lower in field_lower
elif op == ConditionOperator.NOT_CONTAINS:
return target_lower not in field_lower
elif op == ConditionOperator.EQUALS:
return field_lower == target_lower
elif op == ConditionOperator.NOT_EQUALS:
return field_lower != target_lower
elif op == ConditionOperator.STARTS_WITH:
return field_lower.startswith(target_lower)
elif op == ConditionOperator.ENDS_WITH:
return field_lower.endswith(target_lower)
elif op == ConditionOperator.REGEX:
try:
return bool(re.search(str(target), field_value, re.IGNORECASE))
except re.error:
logger.warning(f"Invalid regex pattern: {target}")
return False
elif op == ConditionOperator.IN_LIST:
if isinstance(target, list):
return any(t.lower() in field_lower for t in target if isinstance(t, str))
return False
elif op == ConditionOperator.NOT_IN_LIST:
if isinstance(target, list):
return not any(t.lower() in field_lower for t in target if isinstance(t, str))
return True
# Numerische Operationen
elif isinstance(field_value, (int, float)):
target_num = float(target) if target else 0
if op == ConditionOperator.EQUALS:
return field_value == target_num
elif op == ConditionOperator.NOT_EQUALS:
return field_value != target_num
elif op == ConditionOperator.GREATER_THAN:
return field_value > target_num
elif op == ConditionOperator.LESS_THAN:
return field_value < target_num
elif op == ConditionOperator.GREATER_EQUAL:
return field_value >= target_num
elif op == ConditionOperator.LESS_EQUAL:
return field_value <= target_num
except Exception as e:
logger.error(f"Error evaluating condition: {e}")
return False
return False
def evaluate_rule(
alert: AlertItemDB,
rule: AlertRuleDB,
) -> RuleMatch:
"""
Evaluiert eine Regel gegen einen Alert.
Alle Bedingungen müssen erfüllt sein (AND-Verknüpfung).
Args:
alert: Alert-Item
rule: Zu evaluierende Regel
Returns:
RuleMatch-Ergebnis
"""
conditions = rule.conditions or []
conditions_met = []
all_matched = True
for cond_dict in conditions:
condition = RuleCondition.from_dict(cond_dict)
if evaluate_condition(alert, condition):
conditions_met.append(f"{condition.field} {condition.operator.value} {condition.value}")
else:
all_matched = False
# Wenn keine Bedingungen definiert sind, matcht die Regel immer
if not conditions:
all_matched = True
return RuleMatch(
rule_id=rule.id,
rule_name=rule.name,
matched=all_matched,
action=rule.action_type,
action_config=rule.action_config or {},
conditions_met=conditions_met,
)
def evaluate_rules_for_alert(
alert: AlertItemDB,
rules: List[AlertRuleDB],
) -> Optional[RuleMatch]:
"""
Evaluiert alle Regeln gegen einen Alert und gibt den ersten Match zurück.
Regeln werden nach Priorität (absteigend) evaluiert.
Args:
alert: Alert-Item
rules: Liste von Regeln (sollte bereits nach Priorität sortiert sein)
Returns:
Erster RuleMatch oder None
"""
for rule in rules:
if not rule.is_active:
continue
# Topic-Filter: Regel gilt nur für bestimmtes Topic
if rule.topic_id and rule.topic_id != alert.topic_id:
continue
match = evaluate_rule(alert, rule)
if match.matched:
logger.debug(
f"Rule '{rule.name}' matched alert '{alert.id[:8]}': "
f"{match.conditions_met}"
)
return match
return None
class RuleEngine:
"""
Rule Engine für Batch-Verarbeitung von Alerts.
Verwendet für das Scoring von mehreren Alerts gleichzeitig.
"""
def __init__(self, db_session):
"""
Initialisiert die Rule Engine.
Args:
db_session: SQLAlchemy Session
"""
self.db = db_session
self._rules_cache: Optional[List[AlertRuleDB]] = None
def _get_active_rules(self) -> List[AlertRuleDB]:
"""Lädt aktive Regeln aus der Datenbank (cached)."""
if self._rules_cache is None:
from alerts_agent.db.repository import RuleRepository
repo = RuleRepository(self.db)
self._rules_cache = repo.get_active()
return self._rules_cache
def clear_cache(self) -> None:
"""Leert den Regel-Cache."""
self._rules_cache = None
def process_alert(
self,
alert: AlertItemDB,
) -> Optional[RuleMatch]:
"""
Verarbeitet einen Alert mit allen aktiven Regeln.
Args:
alert: Alert-Item
Returns:
RuleMatch wenn eine Regel matcht, sonst None
"""
rules = self._get_active_rules()
return evaluate_rules_for_alert(alert, rules)
def process_alerts(
self,
alerts: List[AlertItemDB],
) -> Dict[str, RuleMatch]:
"""
Verarbeitet mehrere Alerts mit allen aktiven Regeln.
Args:
alerts: Liste von Alert-Items
Returns:
Dict von alert_id -> RuleMatch (nur für gematschte Alerts)
"""
rules = self._get_active_rules()
results = {}
for alert in alerts:
match = evaluate_rules_for_alert(alert, rules)
if match:
results[alert.id] = match
return results
def apply_rule_actions(
self,
alert: AlertItemDB,
match: RuleMatch,
) -> Dict[str, Any]:
"""
Wendet die Regel-Aktion auf einen Alert an.
Args:
alert: Alert-Item
match: RuleMatch mit Aktionsinformationen
Returns:
Dict mit Ergebnis der Aktion
"""
from alerts_agent.db.repository import AlertItemRepository, RuleRepository
alert_repo = AlertItemRepository(self.db)
rule_repo = RuleRepository(self.db)
action = match.action
config = match.action_config
result = {"action": action.value, "success": False}
try:
if action == RuleActionEnum.KEEP:
# Alert als KEEP markieren
alert_repo.update_scoring(
alert_id=alert.id,
score=1.0,
decision="KEEP",
reasons=["rule_match"],
summary=f"Matched rule: {match.rule_name}",
model="rule_engine",
)
result["success"] = True
elif action == RuleActionEnum.DROP:
# Alert als DROP markieren
alert_repo.update_scoring(
alert_id=alert.id,
score=0.0,
decision="DROP",
reasons=["rule_match"],
summary=f"Dropped by rule: {match.rule_name}",
model="rule_engine",
)
result["success"] = True
elif action == RuleActionEnum.TAG:
# Tags hinzufügen
tags = config.get("tags", [])
if tags:
existing_tags = alert.user_tags or []
new_tags = list(set(existing_tags + tags))
alert_repo.update(alert.id, user_tags=new_tags)
result["tags_added"] = tags
result["success"] = True
elif action == RuleActionEnum.EMAIL:
# E-Mail-Benachrichtigung senden
# Wird von Actions-Modul behandelt
result["email_config"] = config
result["success"] = True
result["deferred"] = True # Wird später gesendet
elif action == RuleActionEnum.WEBHOOK:
# Webhook aufrufen
# Wird von Actions-Modul behandelt
result["webhook_config"] = config
result["success"] = True
result["deferred"] = True
elif action == RuleActionEnum.SLACK:
# Slack-Nachricht senden
# Wird von Actions-Modul behandelt
result["slack_config"] = config
result["success"] = True
result["deferred"] = True
# Match-Count erhöhen
rule_repo.increment_match_count(match.rule_id)
except Exception as e:
logger.error(f"Error applying rule action: {e}")
result["error"] = str(e)
return result
# Convenience-Funktionen für einfache Nutzung
def create_keyword_rule(
name: str,
keywords: List[str],
action: str = "keep",
field: str = "title",
) -> Dict:
"""
Erstellt eine Keyword-basierte Regel.
Args:
name: Regelname
keywords: Liste von Keywords (OR-verknüpft über IN_LIST)
action: Aktion (keep, drop, tag)
field: Feld zum Prüfen (title, snippet, url)
Returns:
Regel-Definition als Dict
"""
return {
"name": name,
"conditions": [
{
"field": field,
"operator": "in",
"value": keywords,
}
],
"action_type": action,
"action_config": {},
}
def create_exclusion_rule(
name: str,
excluded_terms: List[str],
field: str = "title",
) -> Dict:
"""
Erstellt eine Ausschluss-Regel.
Args:
name: Regelname
excluded_terms: Liste von auszuschließenden Begriffen
field: Feld zum Prüfen
Returns:
Regel-Definition als Dict
"""
return {
"name": name,
"conditions": [
{
"field": field,
"operator": "in",
"value": excluded_terms,
}
],
"action_type": "drop",
"action_config": {},
}
def create_score_threshold_rule(
name: str,
min_score: float,
action: str = "keep",
) -> Dict:
"""
Erstellt eine Score-basierte Regel.
Args:
name: Regelname
min_score: Mindest-Score
action: Aktion bei Erreichen des Scores
Returns:
Regel-Definition als Dict
"""
return {
"name": name,
"conditions": [
{
"field": "relevance_score",
"operator": "gte",
"value": min_score,
}
],
"action_type": action,
"action_config": {},
}

9
backend/api/__init__.py Normal file
View File

@@ -0,0 +1,9 @@
"""
Backend API Module.
Sammelt alle API-Router für die FastAPI-Anwendung.
"""
from . import classroom
__all__ = ["classroom"]

View File

@@ -0,0 +1,70 @@
"""
Classroom API - Modularer Router.
Dieser Router sammelt alle Classroom-bezogenen Endpoints aus den Submodulen.
Für Rückwärtskompatibilität kann der alte classroom_api.py Pfad weiterhin
verwendet werden.
Struktur:
- sessions.py: Session CRUD, Timer, Phasen, History
- templates.py: Stunden-Vorlagen
- homework.py: Hausaufgaben-Tracking
- materials.py: Unterrichtsmaterialien
- analytics.py: Analytics & Reflexionen
- feedback.py: Lehrer-Feedback
- settings.py: Lehrer-Einstellungen
- utility.py: Health, Phases, Export
- context.py: Teacher Context (v1 API)
"""
from fastapi import APIRouter
from .sessions import router as sessions_router
from .templates import router as templates_router
from .homework import router as homework_router
from .materials import router as materials_router
from .analytics import router as analytics_router
from .feedback import router as feedback_router
from .settings import router as settings_router
from .utility import router as utility_router
from .context import router as context_router
# Haupt-Router mit Prefix
router = APIRouter(prefix="/api/classroom", tags=["Classroom"])
# Sub-Router einbinden
router.include_router(sessions_router)
router.include_router(templates_router)
router.include_router(homework_router)
router.include_router(materials_router)
router.include_router(analytics_router)
router.include_router(feedback_router)
router.include_router(settings_router)
router.include_router(utility_router)
router.include_router(context_router)
# Re-exports für einfachen Import
from .models import (
CreateSessionRequest,
SessionResponse,
TimerStatus,
SuggestionsResponse,
)
from .shared import (
ws_manager,
get_session_or_404,
start_timer_broadcast,
stop_timer_broadcast,
)
__all__ = [
"router",
"ws_manager",
"get_session_or_404",
"start_timer_broadcast",
"stop_timer_broadcast",
"CreateSessionRequest",
"SessionResponse",
"TimerStatus",
"SuggestionsResponse",
]

View File

@@ -0,0 +1,343 @@
"""
Classroom API - Analytics & Reflections Endpoints.
Endpoints fuer Session-Analytics und Post-Lesson Reflections (Phase 5).
"""
from uuid import uuid4
from typing import Dict, List, Optional, Any
from datetime import datetime, timedelta
import logging
from fastapi import APIRouter, HTTPException, Query
from pydantic import BaseModel, Field
from .shared import init_db_if_needed, DB_ENABLED, logger
try:
from classroom_engine.database import SessionLocal
from classroom_engine.repository import AnalyticsRepository, ReflectionRepository
from classroom_engine.analytics import LessonReflection
except ImportError:
pass
router = APIRouter(tags=["Analytics"])
# === Pydantic Models ===
class SessionSummaryResponse(BaseModel):
"""Response fuer Session-Summary."""
session_id: str
teacher_id: str
class_id: str
subject: str
topic: Optional[str]
date: Optional[str]
date_formatted: str
total_duration_seconds: int
total_duration_formatted: str
planned_duration_seconds: int
planned_duration_formatted: str
phases_completed: int
total_phases: int
completion_percentage: int
phase_statistics: List[Dict[str, Any]]
total_overtime_seconds: int
total_overtime_formatted: str
phases_with_overtime: int
total_pause_count: int
total_pause_seconds: int
reflection_notes: str = ""
reflection_rating: Optional[int] = None
key_learnings: List[str] = []
class TeacherAnalyticsResponse(BaseModel):
"""Response fuer Lehrer-Analytics."""
teacher_id: str
period_start: Optional[str]
period_end: Optional[str]
total_sessions: int
completed_sessions: int
total_teaching_minutes: int
total_teaching_hours: float
avg_phase_durations: Dict[str, int]
sessions_with_overtime: int
overtime_percentage: int
avg_overtime_seconds: int
avg_overtime_formatted: str
most_overtime_phase: Optional[str]
avg_pause_count: float
avg_pause_duration_seconds: int
subjects_taught: Dict[str, int]
classes_taught: Dict[str, int]
class ReflectionCreate(BaseModel):
"""Request-Body fuer Reflection-Erstellung."""
session_id: str
teacher_id: str
notes: str = ""
overall_rating: Optional[int] = Field(None, ge=1, le=5)
what_worked: List[str] = []
improvements: List[str] = []
notes_for_next_lesson: str = ""
class ReflectionUpdate(BaseModel):
"""Request-Body fuer Reflection-Update."""
notes: Optional[str] = None
overall_rating: Optional[int] = Field(None, ge=1, le=5)
what_worked: Optional[List[str]] = None
improvements: Optional[List[str]] = None
notes_for_next_lesson: Optional[str] = None
class ReflectionResponse(BaseModel):
"""Response fuer eine einzelne Reflection."""
reflection_id: str
session_id: str
teacher_id: str
notes: str
overall_rating: Optional[int]
what_worked: List[str]
improvements: List[str]
notes_for_next_lesson: str
created_at: Optional[str]
updated_at: Optional[str]
class ReflectionListResponse(BaseModel):
"""Response fuer Reflection-Liste."""
reflections: List[ReflectionResponse]
total: int
# === Analytics Endpoints ===
@router.get("/analytics/session/{session_id}")
async def get_session_summary(session_id: str) -> SessionSummaryResponse:
"""Gibt die Analytics-Zusammenfassung einer Session zurueck."""
if not DB_ENABLED:
raise HTTPException(status_code=503, detail="Database not available")
init_db_if_needed()
with SessionLocal() as db:
repo = AnalyticsRepository(db)
summary = repo.get_session_summary(session_id)
if not summary:
raise HTTPException(status_code=404, detail=f"Session {session_id} not found")
return SessionSummaryResponse(**summary.to_dict())
@router.get("/analytics/teacher/{teacher_id}")
async def get_teacher_analytics(
teacher_id: str,
days: int = Query(30, ge=1, le=365)
) -> TeacherAnalyticsResponse:
"""Gibt aggregierte Analytics fuer einen Lehrer zurueck."""
if not DB_ENABLED:
raise HTTPException(status_code=503, detail="Database not available")
init_db_if_needed()
period_end = datetime.utcnow()
period_start = period_end - timedelta(days=days)
with SessionLocal() as db:
repo = AnalyticsRepository(db)
analytics = repo.get_teacher_analytics(teacher_id, period_start, period_end)
return TeacherAnalyticsResponse(**analytics.to_dict())
@router.get("/analytics/phase-trends/{teacher_id}/{phase}")
async def get_phase_trends(
teacher_id: str,
phase: str,
limit: int = Query(20, ge=1, le=100)
) -> Dict[str, Any]:
"""Gibt die Dauer-Trends fuer eine Phase zurueck."""
if phase not in ["einstieg", "erarbeitung", "sicherung", "transfer", "reflexion"]:
raise HTTPException(status_code=400, detail=f"Invalid phase: {phase}")
if not DB_ENABLED:
raise HTTPException(status_code=503, detail="Database not available")
init_db_if_needed()
with SessionLocal() as db:
repo = AnalyticsRepository(db)
trends = repo.get_phase_duration_trends(teacher_id, phase, limit)
return {
"teacher_id": teacher_id,
"phase": phase,
"data_points": trends,
"count": len(trends)
}
@router.get("/analytics/overtime/{teacher_id}")
async def get_overtime_analysis(
teacher_id: str,
limit: int = Query(30, ge=1, le=100)
) -> Dict[str, Any]:
"""Analysiert Overtime-Muster nach Phase."""
if not DB_ENABLED:
raise HTTPException(status_code=503, detail="Database not available")
init_db_if_needed()
with SessionLocal() as db:
repo = AnalyticsRepository(db)
analysis = repo.get_overtime_analysis(teacher_id, limit)
return {
"teacher_id": teacher_id,
"sessions_analyzed": limit,
"phases": analysis
}
# === Reflection Endpoints ===
@router.post("/reflections", status_code=201)
async def create_reflection(data: ReflectionCreate) -> ReflectionResponse:
"""Erstellt eine Post-Lesson Reflection."""
if not DB_ENABLED:
raise HTTPException(status_code=503, detail="Database not available")
init_db_if_needed()
with SessionLocal() as db:
repo = ReflectionRepository(db)
existing = repo.get_by_session(data.session_id)
if existing:
raise HTTPException(
status_code=409,
detail=f"Reflection for session {data.session_id} already exists"
)
reflection = LessonReflection(
reflection_id=str(uuid4()),
session_id=data.session_id,
teacher_id=data.teacher_id,
notes=data.notes,
overall_rating=data.overall_rating,
what_worked=data.what_worked,
improvements=data.improvements,
notes_for_next_lesson=data.notes_for_next_lesson,
created_at=datetime.utcnow(),
)
db_reflection = repo.create(reflection)
result = repo.to_dataclass(db_reflection)
return ReflectionResponse(**result.to_dict())
@router.get("/reflections/session/{session_id}")
async def get_reflection_by_session(session_id: str) -> ReflectionResponse:
"""Holt die Reflection einer Session."""
if not DB_ENABLED:
raise HTTPException(status_code=503, detail="Database not available")
init_db_if_needed()
with SessionLocal() as db:
repo = ReflectionRepository(db)
db_reflection = repo.get_by_session(session_id)
if not db_reflection:
raise HTTPException(status_code=404, detail=f"No reflection for session {session_id}")
result = repo.to_dataclass(db_reflection)
return ReflectionResponse(**result.to_dict())
@router.get("/reflections/teacher/{teacher_id}")
async def get_reflections_by_teacher(
teacher_id: str,
limit: int = Query(20, ge=1, le=100),
offset: int = Query(0, ge=0)
) -> ReflectionListResponse:
"""Holt alle Reflections eines Lehrers."""
if not DB_ENABLED:
raise HTTPException(status_code=503, detail="Database not available")
init_db_if_needed()
with SessionLocal() as db:
repo = ReflectionRepository(db)
db_reflections = repo.get_by_teacher(teacher_id, limit, offset)
reflections = []
for db_ref in db_reflections:
result = repo.to_dataclass(db_ref)
reflections.append(ReflectionResponse(**result.to_dict()))
total = repo.count_by_teacher(teacher_id)
return ReflectionListResponse(reflections=reflections, total=total)
@router.put("/reflections/{reflection_id}")
async def update_reflection(reflection_id: str, data: ReflectionUpdate) -> ReflectionResponse:
"""Aktualisiert eine Reflection."""
if not DB_ENABLED:
raise HTTPException(status_code=503, detail="Database not available")
init_db_if_needed()
with SessionLocal() as db:
repo = ReflectionRepository(db)
db_reflection = repo.get_by_id(reflection_id)
if not db_reflection:
raise HTTPException(status_code=404, detail=f"Reflection {reflection_id} not found")
reflection = repo.to_dataclass(db_reflection)
if data.notes is not None:
reflection.notes = data.notes
if data.overall_rating is not None:
reflection.overall_rating = data.overall_rating
if data.what_worked is not None:
reflection.what_worked = data.what_worked
if data.improvements is not None:
reflection.improvements = data.improvements
if data.notes_for_next_lesson is not None:
reflection.notes_for_next_lesson = data.notes_for_next_lesson
reflection.updated_at = datetime.utcnow()
db_reflection = repo.update(reflection)
result = repo.to_dataclass(db_reflection)
return ReflectionResponse(**result.to_dict())
@router.delete("/reflections/{reflection_id}")
async def delete_reflection(reflection_id: str) -> Dict[str, str]:
"""Loescht eine Reflection."""
if not DB_ENABLED:
raise HTTPException(status_code=503, detail="Database not available")
init_db_if_needed()
with SessionLocal() as db:
repo = ReflectionRepository(db)
db_reflection = repo.get_by_id(reflection_id)
if not db_reflection:
raise HTTPException(status_code=404, detail=f"Reflection {reflection_id} not found")
repo.delete(reflection_id)
return {"status": "deleted", "reflection_id": reflection_id}

View File

@@ -0,0 +1,687 @@
"""
Classroom API - Teacher Context Endpoints (v1 API).
Endpoints fuer Teacher Context, Events, Routines und Antizipations-Engine.
"""
from typing import Dict, List, Optional, Any
from datetime import datetime
import logging
from fastapi import APIRouter, HTTPException, Query, Depends
from pydantic import BaseModel, Field
from sqlalchemy.orm import Session as DBSession
from .shared import init_db_if_needed, DB_ENABLED, logger
try:
from classroom_engine.database import get_db, SessionLocal
from classroom_engine.repository import (
TeacherContextRepository, SchoolyearEventRepository, RecurringRoutineRepository
)
from classroom_engine.context_models import (
MacroPhaseEnum, EventTypeEnum, EventStatusEnum,
RoutineTypeEnum, RecurrencePatternEnum,
FEDERAL_STATES, SCHOOL_TYPES
)
from classroom_engine.antizipation import SuggestionGenerator
except ImportError:
FEDERAL_STATES = {}
SCHOOL_TYPES = {}
router = APIRouter(prefix="/v1", tags=["Teacher Context"])
# === Pydantic Models ===
class SchoolInfo(BaseModel):
federal_state: str
federal_state_name: str
school_type: str
school_type_name: str
class SchoolYearInfo(BaseModel):
id: str
start: Optional[str]
current_week: int
class MacroPhaseInfo(BaseModel):
id: str
label: str
confidence: float
class CoreCounts(BaseModel):
classes: int = 0
exams_scheduled: int = 0
corrections_pending: int = 0
class ContextFlags(BaseModel):
onboarding_completed: bool = False
has_classes: bool = False
has_schedule: bool = False
is_exam_period: bool = False
is_before_holidays: bool = False
class TeacherContextResponse(BaseModel):
schema_version: str = "1.0"
teacher_id: str
school: SchoolInfo
school_year: SchoolYearInfo
macro_phase: MacroPhaseInfo
core_counts: CoreCounts
flags: ContextFlags
class UpdateContextRequest(BaseModel):
federal_state: Optional[str] = None
school_type: Optional[str] = None
schoolyear: Optional[str] = None
schoolyear_start: Optional[str] = None
macro_phase: Optional[str] = None
current_week: Optional[int] = None
class CreateEventRequest(BaseModel):
title: str = Field(..., max_length=300)
event_type: str = "other"
start_date: str
end_date: Optional[str] = None
class_id: Optional[str] = None
subject: Optional[str] = None
description: str = ""
needs_preparation: bool = False
reminder_days_before: int = 3
class EventResponse(BaseModel):
id: str
teacher_id: str
event_type: str
title: str
description: str
start_date: str
end_date: Optional[str]
class_id: Optional[str]
subject: Optional[str]
status: str
needs_preparation: bool
preparation_done: bool
reminder_days_before: int
class CreateRoutineRequest(BaseModel):
title: str
routine_type: str = "other"
recurrence_pattern: str = "weekly"
day_of_week: Optional[int] = None
day_of_month: Optional[int] = None
time_of_day: Optional[str] = None
duration_minutes: int = 60
description: str = ""
class RoutineResponse(BaseModel):
id: str
teacher_id: str
routine_type: str
title: str
description: str
recurrence_pattern: str
day_of_week: Optional[int]
day_of_month: Optional[int]
time_of_day: Optional[str]
duration_minutes: int
is_active: bool
# === Helper Functions ===
def get_macro_phase_label(phase) -> str:
"""Gibt den Anzeigenamen einer Makro-Phase zurueck."""
labels = {
"onboarding": "Einrichtung",
"schuljahresstart": "Schuljahresstart",
"unterrichtsaufbau": "Unterrichtsaufbau",
"leistungsphase_1": "Leistungsphase 1",
"halbjahresabschluss": "Halbjahresabschluss",
"leistungsphase_2": "Leistungsphase 2",
"jahresabschluss": "Jahresabschluss",
}
phase_value = phase.value if hasattr(phase, 'value') else str(phase)
return labels.get(phase_value, phase_value)
def get_default_context_response(teacher_id: str) -> TeacherContextResponse:
"""Gibt eine Default-Context-Response zurueck."""
return TeacherContextResponse(
teacher_id=teacher_id,
school=SchoolInfo(
federal_state="BY",
federal_state_name="Bayern",
school_type="gymnasium",
school_type_name="Gymnasium",
),
school_year=SchoolYearInfo(id="2024-2025", start=None, current_week=1),
macro_phase=MacroPhaseInfo(id="onboarding", label="Einrichtung", confidence=1.0),
core_counts=CoreCounts(),
flags=ContextFlags(),
)
# === Context Endpoints ===
@router.get("/context", response_model=TeacherContextResponse)
async def get_teacher_context(teacher_id: str = Query(...)):
"""Liefert den aktuellen Makro-Kontext eines Lehrers."""
if not DB_ENABLED:
return get_default_context_response(teacher_id)
try:
db = SessionLocal()
repo = TeacherContextRepository(db)
context = repo.get_or_create(teacher_id)
event_repo = SchoolyearEventRepository(db)
upcoming_exams = event_repo.get_upcoming(teacher_id, days=30)
exams_count = len([e for e in upcoming_exams if e.event_type.value == "exam"])
result = TeacherContextResponse(
teacher_id=teacher_id,
school=SchoolInfo(
federal_state=context.federal_state or "BY",
federal_state_name=FEDERAL_STATES.get(context.federal_state, ""),
school_type=context.school_type or "gymnasium",
school_type_name=SCHOOL_TYPES.get(context.school_type, ""),
),
school_year=SchoolYearInfo(
id=context.schoolyear or "2024-2025",
start=context.schoolyear_start.isoformat() if context.schoolyear_start else None,
current_week=context.current_week or 1,
),
macro_phase=MacroPhaseInfo(
id=context.macro_phase.value,
label=get_macro_phase_label(context.macro_phase),
confidence=1.0,
),
core_counts=CoreCounts(
classes=1 if context.has_classes else 0,
exams_scheduled=exams_count,
),
flags=ContextFlags(
onboarding_completed=context.onboarding_completed,
has_classes=context.has_classes,
has_schedule=context.has_schedule,
is_exam_period=context.is_exam_period,
is_before_holidays=context.is_before_holidays,
),
)
db.close()
return result
except Exception as e:
logger.error(f"Failed to get teacher context: {e}")
raise HTTPException(status_code=500, detail=f"Fehler beim Laden des Kontexts: {e}")
@router.put("/context", response_model=TeacherContextResponse)
async def update_teacher_context(teacher_id: str, request: UpdateContextRequest):
"""Aktualisiert den Kontext eines Lehrers."""
if not DB_ENABLED:
raise HTTPException(status_code=503, detail="Datenbank nicht verfuegbar")
try:
db = SessionLocal()
repo = TeacherContextRepository(db)
if request.federal_state and request.federal_state not in FEDERAL_STATES:
raise HTTPException(status_code=400, detail=f"Ungueltiges Bundesland: {request.federal_state}")
if request.school_type and request.school_type not in SCHOOL_TYPES:
raise HTTPException(status_code=400, detail=f"Ungueltige Schulart: {request.school_type}")
schoolyear_start = None
if request.schoolyear_start:
schoolyear_start = datetime.fromisoformat(request.schoolyear_start.replace('Z', '+00:00'))
repo.update_context(
teacher_id=teacher_id,
federal_state=request.federal_state,
school_type=request.school_type,
schoolyear=request.schoolyear,
schoolyear_start=schoolyear_start,
macro_phase=request.macro_phase,
current_week=request.current_week,
)
db.close()
return await get_teacher_context(teacher_id)
except HTTPException:
raise
except Exception as e:
logger.error(f"Failed to update teacher context: {e}")
raise HTTPException(status_code=500, detail=f"Fehler beim Aktualisieren: {e}")
@router.post("/context/complete-onboarding")
async def complete_onboarding(teacher_id: str = Query(...)):
"""Markiert das Onboarding als abgeschlossen."""
if not DB_ENABLED:
return {"success": True, "macro_phase": "schuljahresstart", "note": "DB not available"}
try:
db = SessionLocal()
repo = TeacherContextRepository(db)
context = repo.complete_onboarding(teacher_id)
db.close()
return {"success": True, "macro_phase": context.macro_phase.value, "teacher_id": teacher_id}
except Exception as e:
logger.error(f"Failed to complete onboarding: {e}")
raise HTTPException(status_code=500, detail=f"Fehler: {e}")
@router.post("/context/reset-onboarding")
async def reset_onboarding(teacher_id: str = Query(...)):
"""Setzt das Onboarding zurueck (fuer Tests)."""
if not DB_ENABLED:
return {"success": True, "macro_phase": "onboarding", "note": "DB not available"}
try:
db = SessionLocal()
repo = TeacherContextRepository(db)
context = repo.get_or_create(teacher_id)
context.onboarding_completed = False
context.macro_phase = MacroPhaseEnum.ONBOARDING
db.commit()
db.close()
return {"success": True, "macro_phase": "onboarding", "teacher_id": teacher_id}
except Exception as e:
logger.error(f"Failed to reset onboarding: {e}")
raise HTTPException(status_code=500, detail=f"Fehler: {e}")
# === Events Endpoints ===
@router.get("/events")
async def get_events(
teacher_id: str = Query(...),
status: Optional[str] = None,
event_type: Optional[str] = None,
limit: int = 50
):
"""Holt Events eines Lehrers."""
if not DB_ENABLED:
return {"events": [], "count": 0}
try:
db = SessionLocal()
repo = SchoolyearEventRepository(db)
events = repo.get_by_teacher(teacher_id, status=status, event_type=event_type, limit=limit)
result = {"events": [repo.to_dict(e) for e in events], "count": len(events)}
db.close()
return result
except Exception as e:
logger.error(f"Failed to get events: {e}")
raise HTTPException(status_code=500, detail=f"Fehler: {e}")
@router.get("/events/upcoming")
async def get_upcoming_events(teacher_id: str = Query(...), days: int = 30, limit: int = 10):
"""Holt anstehende Events der naechsten X Tage."""
if not DB_ENABLED:
return {"events": [], "count": 0}
try:
db = SessionLocal()
repo = SchoolyearEventRepository(db)
events = repo.get_upcoming(teacher_id, days=days, limit=limit)
result = {"events": [repo.to_dict(e) for e in events], "count": len(events)}
db.close()
return result
except Exception as e:
logger.error(f"Failed to get upcoming events: {e}")
raise HTTPException(status_code=500, detail=f"Fehler: {e}")
@router.post("/events", response_model=EventResponse)
async def create_event(teacher_id: str, request: CreateEventRequest):
"""Erstellt ein neues Schuljahr-Event."""
if not DB_ENABLED:
raise HTTPException(status_code=503, detail="Datenbank nicht verfuegbar")
try:
db = SessionLocal()
repo = SchoolyearEventRepository(db)
start_date = datetime.fromisoformat(request.start_date.replace('Z', '+00:00'))
end_date = datetime.fromisoformat(request.end_date.replace('Z', '+00:00')) if request.end_date else None
event = repo.create(
teacher_id=teacher_id,
title=request.title,
event_type=request.event_type,
start_date=start_date,
end_date=end_date,
class_id=request.class_id,
subject=request.subject,
description=request.description,
needs_preparation=request.needs_preparation,
reminder_days_before=request.reminder_days_before,
)
result = EventResponse(
id=event.id,
teacher_id=event.teacher_id,
event_type=event.event_type.value,
title=event.title,
description=event.description,
start_date=event.start_date.isoformat(),
end_date=event.end_date.isoformat() if event.end_date else None,
class_id=event.class_id,
subject=event.subject,
status=event.status.value,
needs_preparation=event.needs_preparation,
preparation_done=event.preparation_done,
reminder_days_before=event.reminder_days_before,
)
db.close()
return result
except Exception as e:
logger.error(f"Failed to create event: {e}")
raise HTTPException(status_code=500, detail=f"Fehler: {e}")
@router.delete("/events/{event_id}")
async def delete_event(event_id: str):
"""Loescht ein Event."""
if not DB_ENABLED:
raise HTTPException(status_code=503, detail="Datenbank nicht verfuegbar")
try:
db = SessionLocal()
repo = SchoolyearEventRepository(db)
if repo.delete(event_id):
db.close()
return {"success": True, "deleted_id": event_id}
db.close()
raise HTTPException(status_code=404, detail="Event nicht gefunden")
except HTTPException:
raise
except Exception as e:
logger.error(f"Failed to delete event: {e}")
raise HTTPException(status_code=500, detail=f"Fehler: {e}")
# === Routines Endpoints ===
@router.get("/routines")
async def get_routines(
teacher_id: str = Query(...),
is_active: bool = True,
routine_type: Optional[str] = None
):
"""Holt Routinen eines Lehrers."""
if not DB_ENABLED:
return {"routines": [], "count": 0}
try:
db = SessionLocal()
repo = RecurringRoutineRepository(db)
routines = repo.get_by_teacher(teacher_id, is_active=is_active, routine_type=routine_type)
result = {"routines": [repo.to_dict(r) for r in routines], "count": len(routines)}
db.close()
return result
except Exception as e:
logger.error(f"Failed to get routines: {e}")
raise HTTPException(status_code=500, detail=f"Fehler: {e}")
@router.get("/routines/today")
async def get_today_routines(teacher_id: str = Query(...)):
"""Holt Routinen die heute stattfinden."""
if not DB_ENABLED:
return {"routines": [], "count": 0}
try:
db = SessionLocal()
repo = RecurringRoutineRepository(db)
routines = repo.get_today(teacher_id)
result = {"routines": [repo.to_dict(r) for r in routines], "count": len(routines)}
db.close()
return result
except Exception as e:
logger.error(f"Failed to get today's routines: {e}")
raise HTTPException(status_code=500, detail=f"Fehler: {e}")
@router.post("/routines", response_model=RoutineResponse)
async def create_routine(teacher_id: str, request: CreateRoutineRequest):
"""Erstellt eine neue wiederkehrende Routine."""
if not DB_ENABLED:
raise HTTPException(status_code=503, detail="Datenbank nicht verfuegbar")
try:
db = SessionLocal()
repo = RecurringRoutineRepository(db)
routine = repo.create(
teacher_id=teacher_id,
title=request.title,
routine_type=request.routine_type,
recurrence_pattern=request.recurrence_pattern,
day_of_week=request.day_of_week,
day_of_month=request.day_of_month,
time_of_day=request.time_of_day,
duration_minutes=request.duration_minutes,
description=request.description,
)
result = RoutineResponse(
id=routine.id,
teacher_id=routine.teacher_id,
routine_type=routine.routine_type.value,
title=routine.title,
description=routine.description,
recurrence_pattern=routine.recurrence_pattern.value,
day_of_week=routine.day_of_week,
day_of_month=routine.day_of_month,
time_of_day=routine.time_of_day.isoformat() if routine.time_of_day else None,
duration_minutes=routine.duration_minutes,
is_active=routine.is_active,
)
db.close()
return result
except Exception as e:
logger.error(f"Failed to create routine: {e}")
raise HTTPException(status_code=500, detail=f"Fehler: {e}")
@router.delete("/routines/{routine_id}")
async def delete_routine(routine_id: str):
"""Loescht eine Routine."""
if not DB_ENABLED:
raise HTTPException(status_code=503, detail="Datenbank nicht verfuegbar")
try:
db = SessionLocal()
repo = RecurringRoutineRepository(db)
if repo.delete(routine_id):
db.close()
return {"success": True, "deleted_id": routine_id}
db.close()
raise HTTPException(status_code=404, detail="Routine nicht gefunden")
except HTTPException:
raise
except Exception as e:
logger.error(f"Failed to delete routine: {e}")
raise HTTPException(status_code=500, detail=f"Fehler: {e}")
# === Static Data Endpoints ===
@router.get("/federal-states")
async def get_federal_states_list():
"""Gibt alle Bundeslaender zurueck."""
return {"federal_states": [{"id": k, "name": v} for k, v in FEDERAL_STATES.items()]}
@router.get("/school-types")
async def get_school_types_list():
"""Gibt alle Schularten zurueck."""
return {"school_types": [{"id": k, "name": v} for k, v in SCHOOL_TYPES.items()]}
@router.get("/macro-phases")
async def get_macro_phases_list():
"""Gibt alle Makro-Phasen zurueck."""
return {
"macro_phases": [
{"id": "onboarding", "label": "Einrichtung", "order": 1},
{"id": "schuljahresstart", "label": "Schuljahresstart", "order": 2},
{"id": "unterrichtsaufbau", "label": "Unterrichtsaufbau", "order": 3},
{"id": "leistungsphase_1", "label": "Leistungsphase 1", "order": 4},
{"id": "halbjahresabschluss", "label": "Halbjahresabschluss", "order": 5},
{"id": "leistungsphase_2", "label": "Leistungsphase 2", "order": 6},
{"id": "jahresabschluss", "label": "Jahresabschluss", "order": 7},
]
}
@router.get("/event-types")
async def get_event_types_list():
"""Gibt alle Event-Typen zurueck."""
return {
"event_types": [
{"id": "exam", "label": "Klassenarbeit/Klausur"},
{"id": "parent_evening", "label": "Elternabend"},
{"id": "trip", "label": "Klassenfahrt/Ausflug"},
{"id": "project", "label": "Projektwoche"},
{"id": "other", "label": "Sonstiges"},
]
}
@router.get("/routine-types")
async def get_routine_types_list():
"""Gibt alle Routine-Typen zurueck."""
return {
"routine_types": [
{"id": "teacher_conference", "label": "Lehrerkonferenz"},
{"id": "subject_conference", "label": "Fachkonferenz"},
{"id": "office_hours", "label": "Sprechstunde"},
{"id": "correction_time", "label": "Korrekturzeit"},
{"id": "other", "label": "Sonstiges"},
]
}
# === Antizipations-Engine ===
@router.get("/suggestions")
async def get_suggestions(teacher_id: str = Query(...), limit: int = Query(5, ge=1, le=20)):
"""Generiert kontextbasierte Vorschlaege fuer einen Lehrer."""
if not DB_ENABLED:
return {
"active_contexts": [],
"suggestions": [],
"signals_summary": {"macro_phase": "onboarding"},
"total_suggestions": 0,
}
try:
db = SessionLocal()
generator = SuggestionGenerator(db)
result = generator.generate(teacher_id, limit=limit)
db.close()
return result
except Exception as e:
logger.error(f"Failed to generate suggestions: {e}")
raise HTTPException(status_code=500, detail=f"Fehler: {e}")
# === Sidebar ===
@router.get("/sidebar")
async def get_sidebar(teacher_id: str = Query(...), mode: str = Query("companion")):
"""Generiert das dynamische Sidebar-Model."""
if mode == "companion":
return {
"mode": "companion",
"sections": [
{"id": "SEARCH", "type": "search_bar", "placeholder": "Suchen..."},
{"id": "NOW_RELEVANT", "type": "list", "title": "Jetzt relevant", "items": []},
{
"id": "ALL_MODULES",
"type": "folder",
"label": "Alle Module",
"collapsed": True,
"items": [
{"id": "lesson", "label": "Stundenmodus", "icon": "timer"},
{"id": "classes", "label": "Klassen", "icon": "groups"},
{"id": "exams", "label": "Klausuren", "icon": "quiz"},
],
},
],
}
return {
"mode": "classic",
"sections": [
{
"id": "NAVIGATION",
"type": "tree",
"items": [
{"id": "dashboard", "label": "Dashboard", "icon": "dashboard"},
{"id": "lesson", "label": "Stundenmodus", "icon": "timer"},
{"id": "classes", "label": "Klassen", "icon": "groups"},
],
}
],
}
# === Schuljahres-Pfad ===
@router.get("/path")
async def get_schoolyear_path(teacher_id: str = Query(...)):
"""Generiert den Schuljahres-Pfad mit Meilensteinen."""
current_phase = "onboarding"
if DB_ENABLED:
try:
db = SessionLocal()
repo = TeacherContextRepository(db)
context = repo.get_or_create(teacher_id)
current_phase = context.macro_phase.value
db.close()
except Exception as e:
logger.warning(f"Failed to get context for path: {e}")
phase_order = [
"onboarding", "schuljahresstart", "unterrichtsaufbau",
"leistungsphase_1", "halbjahresabschluss", "leistungsphase_2", "jahresabschluss",
]
current_index = phase_order.index(current_phase) if current_phase in phase_order else 0
milestones = [
{"id": "MS_START", "label": "Start", "phase": "onboarding"},
{"id": "MS_SETUP", "label": "Einrichtung", "phase": "schuljahresstart"},
{"id": "MS_ROUTINE", "label": "Routinen", "phase": "unterrichtsaufbau"},
{"id": "MS_EXAM_1", "label": "Klausuren", "phase": "leistungsphase_1"},
{"id": "MS_HALFYEAR", "label": "Halbjahr", "phase": "halbjahresabschluss"},
{"id": "MS_EXAM_2", "label": "Pruefungen", "phase": "leistungsphase_2"},
{"id": "MS_END", "label": "Abschluss", "phase": "jahresabschluss"},
]
for i, milestone in enumerate(milestones):
phase_index = phase_order.index(milestone["phase"])
if phase_index < current_index:
milestone["status"] = "done"
elif phase_index == current_index:
milestone["status"] = "current"
else:
milestone["status"] = "upcoming"
return {
"milestones": milestones,
"current_milestone_id": milestones[current_index]["id"],
"progress_percent": int((current_index / (len(phase_order) - 1)) * 100),
}

View File

@@ -0,0 +1,271 @@
"""
Classroom API - Feedback Endpoints.
Endpoints fuer Lehrer-Feedback (Feature Request Tracking).
"""
from uuid import uuid4
from typing import Dict, List, Optional, Any
from datetime import datetime
import logging
from fastapi import APIRouter, HTTPException, Query
from pydantic import BaseModel, Field
from .shared import init_db_if_needed, DB_ENABLED, logger
try:
from classroom_engine.database import SessionLocal
from classroom_engine.repository import TeacherFeedbackRepository
except ImportError:
pass
router = APIRouter(tags=["Feedback"])
# In-Memory Storage (Fallback)
_feedback: Dict[str, dict] = {}
# === Pydantic Models ===
class CreateFeedbackRequest(BaseModel):
"""Request zum Erstellen von Feedback."""
teacher_id: str
session_id: Optional[str] = None
category: str = Field(..., description="bug, feature, usability, content, other")
title: str = Field(..., min_length=1, max_length=200)
description: str = Field(..., min_length=10, max_length=5000)
priority: str = Field("medium", description="low, medium, high, critical")
context_data: Optional[Dict[str, Any]] = None
class FeedbackResponse(BaseModel):
"""Response fuer ein Feedback."""
feedback_id: str
teacher_id: str
session_id: Optional[str]
category: str
title: str
description: str
priority: str
status: str
context_data: Optional[Dict[str, Any]]
admin_notes: Optional[str]
created_at: str
updated_at: Optional[str]
class FeedbackListResponse(BaseModel):
"""Response fuer Feedback-Liste."""
feedback: List[FeedbackResponse]
total_count: int
class FeedbackStatsResponse(BaseModel):
"""Response fuer Feedback-Statistiken."""
total: int
by_category: Dict[str, int]
by_status: Dict[str, int]
by_priority: Dict[str, int]
# === Endpoints ===
@router.post("/feedback", response_model=FeedbackResponse, status_code=201)
async def create_feedback(request: CreateFeedbackRequest) -> FeedbackResponse:
"""Erstellt ein neues Feedback."""
init_db_if_needed()
valid_categories = ["bug", "feature", "usability", "content", "other"]
if request.category not in valid_categories:
raise HTTPException(status_code=400, detail=f"Invalid category. Must be one of: {valid_categories}")
valid_priorities = ["low", "medium", "high", "critical"]
if request.priority not in valid_priorities:
raise HTTPException(status_code=400, detail=f"Invalid priority. Must be one of: {valid_priorities}")
feedback_id = str(uuid4())
now = datetime.utcnow()
feedback_data = {
"feedback_id": feedback_id,
"teacher_id": request.teacher_id,
"session_id": request.session_id,
"category": request.category,
"title": request.title,
"description": request.description,
"priority": request.priority,
"status": "open",
"context_data": request.context_data,
"admin_notes": None,
"created_at": now.isoformat(),
"updated_at": None,
}
if DB_ENABLED:
try:
db = SessionLocal()
repo = TeacherFeedbackRepository(db)
repo.create(feedback_data)
db.close()
except Exception as e:
logger.warning(f"DB persist failed for feedback: {e}")
_feedback[feedback_id] = feedback_data
return FeedbackResponse(**feedback_data)
@router.get("/feedback", response_model=FeedbackListResponse)
async def list_feedback(
teacher_id: Optional[str] = Query(None),
category: Optional[str] = Query(None),
status: Optional[str] = Query(None),
priority: Optional[str] = Query(None),
limit: int = Query(50, ge=1, le=100),
offset: int = Query(0, ge=0)
) -> FeedbackListResponse:
"""Listet Feedback (optional gefiltert)."""
init_db_if_needed()
feedback_list = []
if DB_ENABLED:
try:
db = SessionLocal()
repo = TeacherFeedbackRepository(db)
db_feedback = repo.get_all(
teacher_id=teacher_id,
category=category,
status=status,
priority=priority,
limit=limit,
offset=offset
)
for fb in db_feedback:
feedback_list.append(FeedbackResponse(**fb))
total = repo.count(teacher_id=teacher_id, category=category, status=status)
db.close()
return FeedbackListResponse(feedback=feedback_list, total_count=total)
except Exception as e:
logger.warning(f"DB read failed for feedback: {e}")
# Fallback auf Memory
for fb in _feedback.values():
if teacher_id and fb["teacher_id"] != teacher_id:
continue
if category and fb["category"] != category:
continue
if status and fb["status"] != status:
continue
if priority and fb["priority"] != priority:
continue
feedback_list.append(FeedbackResponse(**fb))
total = len(feedback_list)
feedback_list = feedback_list[offset:offset + limit]
return FeedbackListResponse(feedback=feedback_list, total_count=total)
@router.get("/feedback/stats", response_model=FeedbackStatsResponse)
async def get_feedback_stats() -> FeedbackStatsResponse:
"""Gibt Feedback-Statistiken zurueck."""
init_db_if_needed()
if DB_ENABLED:
try:
db = SessionLocal()
repo = TeacherFeedbackRepository(db)
stats = repo.get_stats()
db.close()
return FeedbackStatsResponse(**stats)
except Exception as e:
logger.warning(f"DB read failed for feedback stats: {e}")
# Fallback auf Memory
by_category: Dict[str, int] = {}
by_status: Dict[str, int] = {}
by_priority: Dict[str, int] = {}
for fb in _feedback.values():
cat = fb["category"]
by_category[cat] = by_category.get(cat, 0) + 1
st = fb["status"]
by_status[st] = by_status.get(st, 0) + 1
pr = fb["priority"]
by_priority[pr] = by_priority.get(pr, 0) + 1
return FeedbackStatsResponse(
total=len(_feedback),
by_category=by_category,
by_status=by_status,
by_priority=by_priority,
)
@router.get("/feedback/{feedback_id}")
async def get_feedback(feedback_id: str) -> FeedbackResponse:
"""Ruft ein einzelnes Feedback ab."""
init_db_if_needed()
if feedback_id in _feedback:
return FeedbackResponse(**_feedback[feedback_id])
if DB_ENABLED:
try:
db = SessionLocal()
repo = TeacherFeedbackRepository(db)
fb = repo.get_by_id(feedback_id)
db.close()
if fb:
return FeedbackResponse(**fb)
except Exception as e:
logger.warning(f"DB read failed: {e}")
raise HTTPException(status_code=404, detail="Feedback nicht gefunden")
@router.put("/feedback/{feedback_id}/status")
async def update_feedback_status(
feedback_id: str,
status: str = Query(..., description="open, in_progress, resolved, closed, wont_fix")
) -> FeedbackResponse:
"""Aktualisiert den Status eines Feedbacks."""
init_db_if_needed()
valid_statuses = ["open", "in_progress", "resolved", "closed", "wont_fix"]
if status not in valid_statuses:
raise HTTPException(status_code=400, detail=f"Invalid status. Must be one of: {valid_statuses}")
feedback_data = _feedback.get(feedback_id)
if not feedback_data and DB_ENABLED:
try:
db = SessionLocal()
repo = TeacherFeedbackRepository(db)
feedback_data = repo.get_by_id(feedback_id)
db.close()
except Exception as e:
logger.warning(f"DB read failed: {e}")
if not feedback_data:
raise HTTPException(status_code=404, detail="Feedback nicht gefunden")
feedback_data["status"] = status
feedback_data["updated_at"] = datetime.utcnow().isoformat()
if DB_ENABLED:
try:
db = SessionLocal()
repo = TeacherFeedbackRepository(db)
repo.update_status(feedback_id, status)
db.close()
except Exception as e:
logger.warning(f"DB update failed: {e}")
_feedback[feedback_id] = feedback_data
return FeedbackResponse(**feedback_data)

View File

@@ -0,0 +1,281 @@
"""
Classroom API - Homework Endpoints.
Endpoints fuer Hausaufgaben-Tracking (Feature f20).
"""
from uuid import uuid4
from typing import Dict, List, Optional
from datetime import datetime
import logging
from fastapi import APIRouter, HTTPException, Query
from pydantic import BaseModel, Field
from classroom_engine import Homework, HomeworkStatus
from .shared import init_db_if_needed, DB_ENABLED, logger
try:
from classroom_engine.database import SessionLocal
from classroom_engine.repository import HomeworkRepository
except ImportError:
pass
router = APIRouter(tags=["Homework"])
# In-Memory Storage (Fallback)
_homework: Dict[str, Homework] = {}
# === Pydantic Models ===
class CreateHomeworkRequest(BaseModel):
"""Request zum Erstellen einer Hausaufgabe."""
teacher_id: str
class_id: str
subject: str
title: str = Field(..., max_length=300)
description: str = ""
session_id: Optional[str] = None
due_date: Optional[str] = Field(None, description="ISO-Format Datum")
class UpdateHomeworkRequest(BaseModel):
"""Request zum Aktualisieren einer Hausaufgabe."""
title: Optional[str] = Field(None, max_length=300)
description: Optional[str] = None
due_date: Optional[str] = None
status: Optional[str] = None
class HomeworkResponse(BaseModel):
"""Response fuer eine Hausaufgabe."""
homework_id: str
teacher_id: str
class_id: str
subject: str
title: str
description: str
session_id: Optional[str]
due_date: Optional[str]
status: str
is_overdue: bool
created_at: Optional[str]
updated_at: Optional[str]
class HomeworkListResponse(BaseModel):
"""Response fuer Liste von Hausaufgaben."""
homework: List[HomeworkResponse]
total: int
# === Helper Functions ===
def build_homework_response(hw: Homework) -> HomeworkResponse:
"""Baut eine HomeworkResponse aus einem Homework-Objekt."""
return HomeworkResponse(
homework_id=hw.homework_id,
teacher_id=hw.teacher_id,
class_id=hw.class_id,
subject=hw.subject,
title=hw.title,
description=hw.description,
session_id=hw.session_id,
due_date=hw.due_date.isoformat() if hw.due_date else None,
status=hw.status.value,
is_overdue=hw.is_overdue,
created_at=hw.created_at.isoformat() if hw.created_at else None,
updated_at=hw.updated_at.isoformat() if hw.updated_at else None,
)
# === Endpoints ===
@router.post("/homework", response_model=HomeworkResponse, status_code=201)
async def create_homework(request: CreateHomeworkRequest) -> HomeworkResponse:
"""Erstellt eine neue Hausaufgabe (Feature f20)."""
init_db_if_needed()
due_date = None
if request.due_date:
try:
due_date = datetime.fromisoformat(request.due_date.replace('Z', '+00:00'))
except ValueError:
raise HTTPException(status_code=400, detail="Ungueltiges Datumsformat")
homework = Homework(
homework_id=str(uuid4()),
teacher_id=request.teacher_id,
class_id=request.class_id,
subject=request.subject,
title=request.title,
description=request.description,
session_id=request.session_id,
due_date=due_date,
status=HomeworkStatus.ASSIGNED,
created_at=datetime.utcnow(),
)
if DB_ENABLED:
try:
db = SessionLocal()
repo = HomeworkRepository(db)
repo.create(homework)
db.close()
except Exception as e:
logger.warning(f"DB persist failed for homework: {e}")
_homework[homework.homework_id] = homework
return build_homework_response(homework)
@router.get("/homework", response_model=HomeworkListResponse)
async def list_homework(
teacher_id: str = Query(...),
class_id: Optional[str] = Query(None),
status: Optional[str] = Query(None),
include_completed: bool = Query(False),
limit: int = Query(50, ge=1, le=100)
) -> HomeworkListResponse:
"""Listet Hausaufgaben eines Lehrers (Feature f20)."""
init_db_if_needed()
homework_list = []
if DB_ENABLED:
try:
db = SessionLocal()
repo = HomeworkRepository(db)
if class_id:
db_homework = repo.get_by_class(class_id, teacher_id, include_completed, limit)
else:
db_homework = repo.get_by_teacher(teacher_id, status, limit)
for db_hw in db_homework:
hw = repo.to_dataclass(db_hw)
_homework[hw.homework_id] = hw
homework_list.append(build_homework_response(hw))
db.close()
return HomeworkListResponse(homework=homework_list, total=len(homework_list))
except Exception as e:
logger.warning(f"DB read failed for homework: {e}")
for hw in _homework.values():
if hw.teacher_id != teacher_id:
continue
if class_id and hw.class_id != class_id:
continue
if status and hw.status.value != status:
continue
if not include_completed and hw.status == HomeworkStatus.COMPLETED:
continue
homework_list.append(build_homework_response(hw))
return HomeworkListResponse(homework=homework_list[:limit], total=len(homework_list))
@router.get("/homework/{homework_id}", response_model=HomeworkResponse)
async def get_homework(homework_id: str) -> HomeworkResponse:
"""Ruft eine einzelne Hausaufgabe ab (Feature f20)."""
init_db_if_needed()
if homework_id in _homework:
return build_homework_response(_homework[homework_id])
if DB_ENABLED:
try:
db = SessionLocal()
repo = HomeworkRepository(db)
db_hw = repo.get_by_id(homework_id)
db.close()
if db_hw:
hw = repo.to_dataclass(db_hw)
_homework[hw.homework_id] = hw
return build_homework_response(hw)
except Exception as e:
logger.warning(f"DB read failed: {e}")
raise HTTPException(status_code=404, detail="Hausaufgabe nicht gefunden")
@router.put("/homework/{homework_id}", response_model=HomeworkResponse)
async def update_homework(homework_id: str, request: UpdateHomeworkRequest) -> HomeworkResponse:
"""Aktualisiert eine Hausaufgabe (Feature f20)."""
init_db_if_needed()
homework = _homework.get(homework_id)
if not homework and DB_ENABLED:
try:
db = SessionLocal()
repo = HomeworkRepository(db)
db_hw = repo.get_by_id(homework_id)
db.close()
if db_hw:
homework = repo.to_dataclass(db_hw)
_homework[homework.homework_id] = homework
except Exception as e:
logger.warning(f"DB read failed: {e}")
if not homework:
raise HTTPException(status_code=404, detail="Hausaufgabe nicht gefunden")
if request.title is not None:
homework.title = request.title
if request.description is not None:
homework.description = request.description
if request.due_date is not None:
try:
homework.due_date = datetime.fromisoformat(request.due_date.replace('Z', '+00:00'))
except ValueError:
raise HTTPException(status_code=400, detail="Ungueltiges Datumsformat")
if request.status is not None:
try:
homework.status = HomeworkStatus(request.status)
except ValueError:
raise HTTPException(status_code=400, detail="Ungueltiger Status")
homework.updated_at = datetime.utcnow()
if DB_ENABLED:
try:
db = SessionLocal()
repo = HomeworkRepository(db)
repo.update(homework)
db.close()
except Exception as e:
logger.warning(f"DB update failed: {e}")
_homework[homework_id] = homework
return build_homework_response(homework)
@router.patch("/homework/{homework_id}/status")
async def update_homework_status(
homework_id: str,
status: str = Query(...)
) -> HomeworkResponse:
"""Aktualisiert nur den Status einer Hausaufgabe (Feature f20)."""
return await update_homework(homework_id, UpdateHomeworkRequest(status=status))
@router.delete("/homework/{homework_id}")
async def delete_homework(homework_id: str) -> Dict[str, str]:
"""Loescht eine Hausaufgabe (Feature f20)."""
init_db_if_needed()
if homework_id in _homework:
del _homework[homework_id]
if DB_ENABLED:
try:
db = SessionLocal()
repo = HomeworkRepository(db)
repo.delete(homework_id)
db.close()
except Exception as e:
logger.warning(f"DB delete failed: {e}")
return {"status": "deleted", "homework_id": homework_id}

View File

@@ -0,0 +1,343 @@
"""
Classroom API - Materials Endpoints.
Endpoints fuer Unterrichtsmaterialien (Feature f19).
"""
from uuid import uuid4
from typing import Dict, List, Optional
from datetime import datetime
import logging
from fastapi import APIRouter, HTTPException, Query
from pydantic import BaseModel, Field
from classroom_engine import PhaseMaterial, MaterialType
from .shared import init_db_if_needed, DB_ENABLED, logger
try:
from classroom_engine.database import SessionLocal
from classroom_engine.repository import MaterialRepository
except ImportError:
pass
router = APIRouter(tags=["Materials"])
# In-Memory Storage (Fallback)
_materials: Dict[str, PhaseMaterial] = {}
# === Pydantic Models ===
class CreateMaterialRequest(BaseModel):
"""Request zum Erstellen eines Materials."""
teacher_id: str
title: str = Field(..., max_length=300)
material_type: str = Field("document")
url: Optional[str] = Field(None, max_length=2000)
description: str = ""
phase: Optional[str] = None
subject: str = ""
grade_level: str = ""
tags: List[str] = []
is_public: bool = False
session_id: Optional[str] = None
class UpdateMaterialRequest(BaseModel):
"""Request zum Aktualisieren eines Materials."""
title: Optional[str] = Field(None, max_length=300)
material_type: Optional[str] = None
url: Optional[str] = Field(None, max_length=2000)
description: Optional[str] = None
phase: Optional[str] = None
subject: Optional[str] = None
grade_level: Optional[str] = None
tags: Optional[List[str]] = None
is_public: Optional[bool] = None
class MaterialResponse(BaseModel):
"""Response fuer ein Material."""
material_id: str
teacher_id: str
title: str
material_type: str
url: Optional[str]
description: str
phase: Optional[str]
subject: str
grade_level: str
tags: List[str]
is_public: bool
usage_count: int
session_id: Optional[str]
created_at: Optional[str]
updated_at: Optional[str]
class MaterialListResponse(BaseModel):
"""Response fuer Liste von Materialien."""
materials: List[MaterialResponse]
total: int
# === Helper Functions ===
def build_material_response(mat: PhaseMaterial) -> MaterialResponse:
"""Baut eine MaterialResponse aus einem PhaseMaterial-Objekt."""
return MaterialResponse(
material_id=mat.material_id,
teacher_id=mat.teacher_id,
title=mat.title,
material_type=mat.material_type.value,
url=mat.url,
description=mat.description,
phase=mat.phase,
subject=mat.subject,
grade_level=mat.grade_level,
tags=mat.tags,
is_public=mat.is_public,
usage_count=mat.usage_count,
session_id=mat.session_id,
created_at=mat.created_at.isoformat() if mat.created_at else None,
updated_at=mat.updated_at.isoformat() if mat.updated_at else None,
)
# === Endpoints ===
@router.post("/materials", response_model=MaterialResponse, status_code=201)
async def create_material(request: CreateMaterialRequest) -> MaterialResponse:
"""Erstellt ein neues Material (Feature f19)."""
init_db_if_needed()
try:
mat_type = MaterialType(request.material_type)
except ValueError:
mat_type = MaterialType.DOCUMENT
material = PhaseMaterial(
material_id=str(uuid4()),
teacher_id=request.teacher_id,
title=request.title,
material_type=mat_type,
url=request.url,
description=request.description,
phase=request.phase,
subject=request.subject,
grade_level=request.grade_level,
tags=request.tags,
is_public=request.is_public,
usage_count=0,
session_id=request.session_id,
created_at=datetime.utcnow(),
)
if DB_ENABLED:
try:
db = SessionLocal()
repo = MaterialRepository(db)
repo.create(material)
db.close()
except Exception as e:
logger.warning(f"DB persist failed for material: {e}")
_materials[material.material_id] = material
return build_material_response(material)
@router.get("/materials", response_model=MaterialListResponse)
async def list_materials(
teacher_id: str = Query(...),
phase: Optional[str] = Query(None),
subject: Optional[str] = Query(None),
include_public: bool = Query(True),
limit: int = Query(50, ge=1, le=100)
) -> MaterialListResponse:
"""Listet Materialien eines Lehrers (Feature f19)."""
init_db_if_needed()
materials_list = []
if DB_ENABLED:
try:
db = SessionLocal()
repo = MaterialRepository(db)
if phase:
db_materials = repo.get_by_phase(phase, teacher_id, include_public)
else:
db_materials = repo.get_by_teacher(teacher_id, phase, subject, limit)
for db_mat in db_materials:
mat = repo.to_dataclass(db_mat)
_materials[mat.material_id] = mat
materials_list.append(build_material_response(mat))
db.close()
return MaterialListResponse(materials=materials_list, total=len(materials_list))
except Exception as e:
logger.warning(f"DB read failed for materials: {e}")
for mat in _materials.values():
if mat.teacher_id != teacher_id and not (include_public and mat.is_public):
continue
if phase and mat.phase != phase:
continue
if subject and mat.subject != subject:
continue
materials_list.append(build_material_response(mat))
return MaterialListResponse(materials=materials_list[:limit], total=len(materials_list))
@router.get("/materials/by-phase/{phase}", response_model=MaterialListResponse)
async def get_materials_by_phase(
phase: str,
teacher_id: str = Query(...),
subject: Optional[str] = Query(None),
limit: int = Query(50, ge=1, le=100)
) -> MaterialListResponse:
"""Holt Materialien fuer eine bestimmte Phase (Feature f19)."""
return await list_materials(teacher_id=teacher_id, phase=phase, subject=subject, limit=limit)
@router.get("/materials/{material_id}", response_model=MaterialResponse)
async def get_material(material_id: str) -> MaterialResponse:
"""Ruft ein einzelnes Material ab (Feature f19)."""
init_db_if_needed()
if material_id in _materials:
return build_material_response(_materials[material_id])
if DB_ENABLED:
try:
db = SessionLocal()
repo = MaterialRepository(db)
db_mat = repo.get_by_id(material_id)
db.close()
if db_mat:
mat = repo.to_dataclass(db_mat)
_materials[mat.material_id] = mat
return build_material_response(mat)
except Exception as e:
logger.warning(f"DB read failed: {e}")
raise HTTPException(status_code=404, detail="Material nicht gefunden")
@router.put("/materials/{material_id}", response_model=MaterialResponse)
async def update_material(material_id: str, request: UpdateMaterialRequest) -> MaterialResponse:
"""Aktualisiert ein Material (Feature f19)."""
init_db_if_needed()
material = _materials.get(material_id)
if not material and DB_ENABLED:
try:
db = SessionLocal()
repo = MaterialRepository(db)
db_mat = repo.get_by_id(material_id)
db.close()
if db_mat:
material = repo.to_dataclass(db_mat)
_materials[material.material_id] = material
except Exception as e:
logger.warning(f"DB read failed: {e}")
if not material:
raise HTTPException(status_code=404, detail="Material nicht gefunden")
if request.title is not None:
material.title = request.title
if request.material_type is not None:
try:
material.material_type = MaterialType(request.material_type)
except ValueError:
raise HTTPException(status_code=400, detail="Ungueltiger Material-Typ")
if request.url is not None:
material.url = request.url
if request.description is not None:
material.description = request.description
if request.phase is not None:
material.phase = request.phase
if request.subject is not None:
material.subject = request.subject
if request.grade_level is not None:
material.grade_level = request.grade_level
if request.tags is not None:
material.tags = request.tags
if request.is_public is not None:
material.is_public = request.is_public
material.updated_at = datetime.utcnow()
if DB_ENABLED:
try:
db = SessionLocal()
repo = MaterialRepository(db)
repo.update(material)
db.close()
except Exception as e:
logger.warning(f"DB update failed: {e}")
_materials[material_id] = material
return build_material_response(material)
@router.post("/materials/{material_id}/attach/{session_id}")
async def attach_material_to_session(material_id: str, session_id: str) -> MaterialResponse:
"""Verknuepft ein Material mit einer Session (Feature f19)."""
init_db_if_needed()
material = _materials.get(material_id)
if not material and DB_ENABLED:
try:
db = SessionLocal()
repo = MaterialRepository(db)
db_mat = repo.get_by_id(material_id)
if db_mat:
material = repo.to_dataclass(db_mat)
db.close()
except Exception as e:
logger.warning(f"DB read failed: {e}")
if not material:
raise HTTPException(status_code=404, detail="Material nicht gefunden")
material.session_id = session_id
material.usage_count += 1
material.updated_at = datetime.utcnow()
if DB_ENABLED:
try:
db = SessionLocal()
repo = MaterialRepository(db)
repo.attach_to_session(material_id, session_id)
db.close()
except Exception as e:
logger.warning(f"DB update failed: {e}")
_materials[material_id] = material
return build_material_response(material)
@router.delete("/materials/{material_id}")
async def delete_material(material_id: str) -> Dict[str, str]:
"""Loescht ein Material (Feature f19)."""
init_db_if_needed()
if material_id in _materials:
del _materials[material_id]
if DB_ENABLED:
try:
db = SessionLocal()
repo = MaterialRepository(db)
repo.delete(material_id)
db.close()
except Exception as e:
logger.warning(f"DB delete failed: {e}")
return {"status": "deleted", "material_id": material_id}

View File

@@ -0,0 +1,489 @@
"""
Classroom API - Pydantic Models.
Alle Request/Response Models fuer die Classroom API.
"""
from typing import Dict, List, Optional, Any
from pydantic import BaseModel, Field
# === Request Models ===
class CreateSessionRequest(BaseModel):
"""Request zum Erstellen einer neuen Session."""
teacher_id: str = Field(..., description="ID des Lehrers")
class_id: str = Field(..., description="ID der Klasse")
subject: str = Field(..., description="Unterrichtsfach")
topic: Optional[str] = Field(None, description="Thema der Stunde")
phase_durations: Optional[Dict[str, int]] = Field(
None,
description="Optionale individuelle Phasendauern in Minuten"
)
class NotesRequest(BaseModel):
"""Request zum Aktualisieren von Notizen."""
notes: str = Field("", description="Stundennotizen")
homework: str = Field("", description="Hausaufgaben")
class ExtendTimeRequest(BaseModel):
"""Request zum Verlaengern der aktuellen Phase (Feature f28)."""
minutes: int = Field(5, ge=1, le=30, description="Zusaetzliche Minuten (1-30)")
# === Response Models ===
class PhaseInfo(BaseModel):
"""Informationen zu einer Phase."""
phase: str
display_name: str
icon: str
duration_minutes: int
is_completed: bool
is_current: bool
is_future: bool
class TimerStatus(BaseModel):
"""Timer-Status einer Phase."""
remaining_seconds: int
remaining_formatted: str
total_seconds: int
total_formatted: str
elapsed_seconds: int
elapsed_formatted: str
percentage_remaining: int
percentage_elapsed: int
percentage: int = Field(description="Alias fuer percentage_remaining (Visual Timer)")
warning: bool
overtime: bool
overtime_seconds: int
overtime_formatted: Optional[str]
is_paused: bool = Field(False, description="Ist der Timer pausiert?")
class SuggestionItem(BaseModel):
"""Ein Aktivitaets-Vorschlag."""
id: str
title: str
description: str
activity_type: str
estimated_minutes: int
icon: str
content_url: Optional[str]
class SessionResponse(BaseModel):
"""Vollstaendige Session-Response."""
session_id: str
teacher_id: str
class_id: str
subject: str
topic: Optional[str]
current_phase: str
phase_display_name: str
phase_started_at: Optional[str]
lesson_started_at: Optional[str]
lesson_ended_at: Optional[str]
timer: TimerStatus
phases: List[PhaseInfo]
phase_history: List[Dict[str, Any]]
notes: str
homework: str
is_active: bool
is_ended: bool
is_paused: bool = Field(False, description="Ist die Stunde pausiert?")
class SuggestionsResponse(BaseModel):
"""Response fuer Vorschlaege."""
suggestions: List[SuggestionItem]
current_phase: str
phase_display_name: str
total_available: int
class PhasesListResponse(BaseModel):
"""Liste aller verfuegbaren Phasen."""
phases: List[Dict[str, Any]]
class ActiveSessionsResponse(BaseModel):
"""Liste aktiver Sessions."""
sessions: List[Dict[str, Any]]
count: int
# === Session History Models (Feature f17) ===
class SessionHistoryItem(BaseModel):
"""Ein Eintrag in der Session-Historie."""
session_id: str
teacher_id: str
class_id: str
subject: str
topic: Optional[str]
lesson_started_at: Optional[str]
lesson_ended_at: Optional[str]
total_duration_minutes: int
phases_completed: int
notes: str
homework: str
class SessionHistoryResponse(BaseModel):
"""Response fuer Session-Historie."""
sessions: List[SessionHistoryItem]
total_count: int
page: int
page_size: int
# === Template Models ===
class TemplatePhaseConfig(BaseModel):
"""Konfiguration einer Phase im Template."""
phase: str
duration_minutes: int
activities: List[str] = Field(default_factory=list)
notes: str = ""
class CreateTemplateRequest(BaseModel):
"""Request zum Erstellen eines Templates."""
name: str = Field(..., min_length=1, max_length=100)
description: str = Field("", max_length=500)
subject: str = Field(..., min_length=1)
grade_level: Optional[str] = None
phase_configs: Optional[List[TemplatePhaseConfig]] = None
tags: List[str] = Field(default_factory=list)
is_public: bool = False
class UpdateTemplateRequest(BaseModel):
"""Request zum Aktualisieren eines Templates."""
name: Optional[str] = Field(None, min_length=1, max_length=100)
description: Optional[str] = Field(None, max_length=500)
subject: Optional[str] = None
grade_level: Optional[str] = None
phase_configs: Optional[List[TemplatePhaseConfig]] = None
tags: Optional[List[str]] = None
is_public: Optional[bool] = None
class TemplateResponse(BaseModel):
"""Response fuer ein einzelnes Template."""
template_id: str
name: str
description: str
subject: str
grade_level: Optional[str]
phase_configs: List[TemplatePhaseConfig]
tags: List[str]
is_public: bool
is_system: bool
created_by: str
created_at: str
updated_at: Optional[str]
usage_count: int
class TemplateListResponse(BaseModel):
"""Response fuer Template-Liste."""
templates: List[TemplateResponse]
total_count: int
class CreateFromTemplateRequest(BaseModel):
"""Request zum Erstellen einer Session aus Template."""
template_id: str
class_id: str
topic: Optional[str] = None
phase_duration_overrides: Optional[Dict[str, int]] = None
# === Homework Models ===
class CreateHomeworkRequest(BaseModel):
"""Request zum Erstellen einer Hausaufgabe."""
session_id: Optional[str] = None
teacher_id: str
class_id: str
subject: str
title: str = Field(..., min_length=1, max_length=200)
description: str = Field("", max_length=2000)
due_date: Optional[str] = None
estimated_minutes: Optional[int] = Field(None, ge=5, le=180)
materials: List[str] = Field(default_factory=list)
tags: List[str] = Field(default_factory=list)
class UpdateHomeworkRequest(BaseModel):
"""Request zum Aktualisieren einer Hausaufgabe."""
title: Optional[str] = Field(None, min_length=1, max_length=200)
description: Optional[str] = Field(None, max_length=2000)
due_date: Optional[str] = None
estimated_minutes: Optional[int] = Field(None, ge=5, le=180)
status: Optional[str] = None
materials: Optional[List[str]] = None
tags: Optional[List[str]] = None
class HomeworkResponse(BaseModel):
"""Response fuer eine Hausaufgabe."""
homework_id: str
session_id: Optional[str]
teacher_id: str
class_id: str
subject: str
title: str
description: str
due_date: Optional[str]
estimated_minutes: Optional[int]
status: str
materials: List[str]
tags: List[str]
created_at: str
updated_at: Optional[str]
class HomeworkListResponse(BaseModel):
"""Response fuer Hausaufgaben-Liste."""
homework: List[HomeworkResponse]
total_count: int
# === Material Models ===
class CreateMaterialRequest(BaseModel):
"""Request zum Erstellen eines Materials."""
teacher_id: str
title: str = Field(..., min_length=1, max_length=200)
description: str = Field("", max_length=1000)
material_type: str = Field(..., description="Type: link, document, video, interactive, image")
content_url: Optional[str] = None
content_data: Optional[Dict[str, Any]] = None
phase: Optional[str] = None
subject: Optional[str] = None
tags: List[str] = Field(default_factory=list)
is_public: bool = False
class UpdateMaterialRequest(BaseModel):
"""Request zum Aktualisieren eines Materials."""
title: Optional[str] = Field(None, min_length=1, max_length=200)
description: Optional[str] = Field(None, max_length=1000)
material_type: Optional[str] = None
content_url: Optional[str] = None
content_data: Optional[Dict[str, Any]] = None
phase: Optional[str] = None
subject: Optional[str] = None
tags: Optional[List[str]] = None
is_public: Optional[bool] = None
class MaterialResponse(BaseModel):
"""Response fuer ein Material."""
material_id: str
teacher_id: str
title: str
description: str
material_type: str
content_url: Optional[str]
content_data: Optional[Dict[str, Any]]
phase: Optional[str]
subject: Optional[str]
tags: List[str]
is_public: bool
usage_count: int
created_at: str
updated_at: Optional[str]
class MaterialListResponse(BaseModel):
"""Response fuer Material-Liste."""
materials: List[MaterialResponse]
total_count: int
# === Feedback Models ===
class CreateFeedbackRequest(BaseModel):
"""Request zum Erstellen von Feedback."""
teacher_id: str
session_id: Optional[str] = None
category: str = Field(..., description="bug, feature, usability, content, other")
title: str = Field(..., min_length=1, max_length=200)
description: str = Field(..., min_length=10, max_length=5000)
priority: str = Field("medium", description="low, medium, high, critical")
context_data: Optional[Dict[str, Any]] = None
class FeedbackResponse(BaseModel):
"""Response fuer ein Feedback."""
feedback_id: str
teacher_id: str
session_id: Optional[str]
category: str
title: str
description: str
priority: str
status: str
context_data: Optional[Dict[str, Any]]
admin_notes: Optional[str]
created_at: str
updated_at: Optional[str]
class FeedbackListResponse(BaseModel):
"""Response fuer Feedback-Liste."""
feedback: List[FeedbackResponse]
total_count: int
class FeedbackStatsResponse(BaseModel):
"""Response fuer Feedback-Statistiken."""
total: int
by_category: Dict[str, int]
by_status: Dict[str, int]
by_priority: Dict[str, int]
# === Settings Models ===
class PhaseDurationsUpdate(BaseModel):
"""Update fuer Phasendauern."""
einstieg: Optional[int] = Field(None, ge=1, le=30)
erarbeitung: Optional[int] = Field(None, ge=5, le=45)
sicherung: Optional[int] = Field(None, ge=3, le=20)
transfer: Optional[int] = Field(None, ge=3, le=20)
reflexion: Optional[int] = Field(None, ge=2, le=15)
class PreferencesUpdate(BaseModel):
"""Update fuer Lehrer-Praeferenzen."""
auto_advance: Optional[bool] = None
sound_enabled: Optional[bool] = None
notification_enabled: Optional[bool] = None
theme: Optional[str] = None
language: Optional[str] = None
class TeacherSettingsResponse(BaseModel):
"""Response fuer Lehrer-Einstellungen."""
teacher_id: str
phase_durations: Dict[str, int]
preferences: Dict[str, Any]
created_at: str
updated_at: Optional[str]
# === Analytics Models ===
class ReflectionRequest(BaseModel):
"""Request zum Erstellen/Aktualisieren einer Reflexion."""
session_id: str
teacher_id: str
overall_rating: int = Field(..., ge=1, le=5)
time_management_rating: int = Field(..., ge=1, le=5)
student_engagement_rating: int = Field(..., ge=1, le=5)
goals_achieved_rating: int = Field(..., ge=1, le=5)
what_worked_well: str = Field("", max_length=2000)
what_to_improve: str = Field("", max_length=2000)
notes_for_next_time: str = Field("", max_length=2000)
tags: List[str] = Field(default_factory=list)
class ReflectionResponse(BaseModel):
"""Response fuer eine Reflexion."""
reflection_id: str
session_id: str
teacher_id: str
overall_rating: int
time_management_rating: int
student_engagement_rating: int
goals_achieved_rating: int
what_worked_well: str
what_to_improve: str
notes_for_next_time: str
tags: List[str]
created_at: str
updated_at: Optional[str]
# === Teacher Context Models (v1 API) ===
class TeacherContextResponse(BaseModel):
"""Response fuer Teacher Context."""
teacher_id: str
federal_state: Optional[str]
school_type: Optional[str]
subjects: List[str]
class_levels: List[str]
current_macro_phase: Optional[str]
onboarding_completed: bool
preferences: Dict[str, Any]
created_at: str
updated_at: Optional[str]
class UpdateTeacherContextRequest(BaseModel):
"""Request zum Aktualisieren des Teacher Context."""
federal_state: Optional[str] = None
school_type: Optional[str] = None
subjects: Optional[List[str]] = None
class_levels: Optional[List[str]] = None
current_macro_phase: Optional[str] = None
preferences: Optional[Dict[str, Any]] = None
class EventResponse(BaseModel):
"""Response fuer ein Schuljahres-Event."""
event_id: str
teacher_id: str
title: str
event_type: str
start_date: str
end_date: Optional[str]
description: Optional[str]
status: str
metadata: Optional[Dict[str, Any]]
created_at: str
class CreateEventRequest(BaseModel):
"""Request zum Erstellen eines Events."""
title: str = Field(..., min_length=1, max_length=200)
event_type: str
start_date: str
end_date: Optional[str] = None
description: Optional[str] = Field(None, max_length=1000)
metadata: Optional[Dict[str, Any]] = None
class RoutineResponse(BaseModel):
"""Response fuer eine wiederkehrende Routine."""
routine_id: str
teacher_id: str
title: str
routine_type: str
recurrence_pattern: str
day_of_week: Optional[int]
time_of_day: Optional[str]
description: Optional[str]
is_active: bool
metadata: Optional[Dict[str, Any]]
created_at: str
class CreateRoutineRequest(BaseModel):
"""Request zum Erstellen einer Routine."""
title: str = Field(..., min_length=1, max_length=200)
routine_type: str
recurrence_pattern: str
day_of_week: Optional[int] = Field(None, ge=0, le=6)
time_of_day: Optional[str] = None
description: Optional[str] = Field(None, max_length=500)
metadata: Optional[Dict[str, Any]] = None

View File

@@ -0,0 +1,434 @@
"""
Classroom API - Session Endpoints.
Endpoints fuer Session-Management, Timer, Phasen-Kontrolle und History.
"""
from uuid import uuid4
from typing import Dict, List, Optional
from datetime import datetime
import logging
from fastapi import APIRouter, HTTPException, Query
from classroom_engine import (
LessonPhase,
LessonSession,
LessonStateMachine,
PhaseTimer,
SuggestionEngine,
)
from .models import (
CreateSessionRequest,
NotesRequest,
ExtendTimeRequest,
SessionResponse,
TimerStatus,
SuggestionItem,
SuggestionsResponse,
PhaseInfo,
SessionHistoryItem,
SessionHistoryResponse,
)
from .shared import (
init_db_if_needed,
get_session_or_404,
persist_session,
get_sessions,
add_session,
ws_manager,
DB_ENABLED,
logger,
)
# Database imports
try:
from classroom_engine.database import SessionLocal
from classroom_engine.repository import SessionRepository
except ImportError:
pass
router = APIRouter(tags=["Sessions"])
def build_session_response(session: LessonSession) -> SessionResponse:
"""Baut die vollstaendige Session-Response."""
fsm = LessonStateMachine()
timer = PhaseTimer()
timer_status = timer.get_phase_status(session)
phases_info = fsm.get_phases_info(session)
return SessionResponse(
session_id=session.session_id,
teacher_id=session.teacher_id,
class_id=session.class_id,
subject=session.subject,
topic=session.topic,
current_phase=session.current_phase.value,
phase_display_name=session.get_phase_display_name(),
phase_started_at=session.phase_started_at.isoformat() if session.phase_started_at else None,
lesson_started_at=session.lesson_started_at.isoformat() if session.lesson_started_at else None,
lesson_ended_at=session.lesson_ended_at.isoformat() if session.lesson_ended_at else None,
timer=TimerStatus(**timer_status),
phases=[PhaseInfo(**p) for p in phases_info],
phase_history=session.phase_history,
notes=session.notes,
homework=session.homework,
is_active=fsm.is_lesson_active(session),
is_ended=fsm.is_lesson_ended(session),
is_paused=session.is_paused,
)
async def notify_phase_change(session_id: str, phase: str, extra_data: dict = None):
"""Benachrichtigt WebSocket-Clients ueber Phasenwechsel."""
data = {"phase": phase}
if extra_data:
data.update(extra_data)
await ws_manager.broadcast_phase_change(session_id, data)
async def notify_session_ended(session_id: str):
"""Benachrichtigt WebSocket-Clients ueber Session-Ende."""
await ws_manager.broadcast_session_ended(session_id)
# === Session CRUD Endpoints ===
@router.post("/sessions", response_model=SessionResponse)
async def create_session(request: CreateSessionRequest) -> SessionResponse:
"""
Erstellt eine neue Unterrichtsstunde (Session).
Die Stunde ist nach Erstellung im Status NOT_STARTED.
Zum Starten muss /sessions/{id}/start aufgerufen werden.
"""
init_db_if_needed()
phase_durations = {
"einstieg": 8,
"erarbeitung": 20,
"sicherung": 10,
"transfer": 7,
"reflexion": 5,
}
if request.phase_durations:
phase_durations.update(request.phase_durations)
session = LessonSession(
session_id=str(uuid4()),
teacher_id=request.teacher_id,
class_id=request.class_id,
subject=request.subject,
topic=request.topic,
phase_durations=phase_durations,
)
add_session(session)
return build_session_response(session)
@router.get("/sessions/{session_id}", response_model=SessionResponse)
async def get_session(session_id: str) -> SessionResponse:
"""
Ruft den aktuellen Status einer Session ab.
Enthaelt alle Informationen inkl. Timer-Status und Phasen-Timeline.
"""
session = get_session_or_404(session_id)
return build_session_response(session)
@router.post("/sessions/{session_id}/start", response_model=SessionResponse)
async def start_lesson(session_id: str) -> SessionResponse:
"""
Startet die Unterrichtsstunde.
Wechselt von NOT_STARTED zur ersten Phase (EINSTIEG).
"""
session = get_session_or_404(session_id)
if session.current_phase != LessonPhase.NOT_STARTED:
raise HTTPException(
status_code=400,
detail=f"Stunde bereits gestartet (aktuelle Phase: {session.current_phase.value})"
)
fsm = LessonStateMachine()
session = fsm.transition(session, LessonPhase.EINSTIEG)
persist_session(session)
return build_session_response(session)
@router.post("/sessions/{session_id}/next-phase", response_model=SessionResponse)
async def next_phase(session_id: str) -> SessionResponse:
"""
Wechselt zur naechsten Phase.
Wirft 400 wenn keine naechste Phase verfuegbar (z.B. bei ENDED).
"""
session = get_session_or_404(session_id)
fsm = LessonStateMachine()
next_p = fsm.next_phase(session.current_phase)
if not next_p:
raise HTTPException(
status_code=400,
detail=f"Keine naechste Phase verfuegbar (aktuelle Phase: {session.current_phase.value})"
)
session = fsm.transition(session, next_p)
persist_session(session)
response = build_session_response(session)
await notify_phase_change(session_id, session.current_phase.value, {
"phase_display_name": session.get_phase_display_name(),
"is_ended": session.current_phase == LessonPhase.ENDED
})
return response
@router.post("/sessions/{session_id}/end", response_model=SessionResponse)
async def end_lesson(session_id: str) -> SessionResponse:
"""
Beendet die Unterrichtsstunde sofort.
Kann von jeder aktiven Phase aus aufgerufen werden.
"""
session = get_session_or_404(session_id)
if session.current_phase == LessonPhase.ENDED:
raise HTTPException(status_code=400, detail="Stunde bereits beendet")
if session.current_phase == LessonPhase.NOT_STARTED:
raise HTTPException(status_code=400, detail="Stunde noch nicht gestartet")
fsm = LessonStateMachine()
while session.current_phase != LessonPhase.ENDED:
next_p = fsm.next_phase(session.current_phase)
if next_p:
session = fsm.transition(session, next_p)
else:
break
persist_session(session)
await notify_session_ended(session_id)
return build_session_response(session)
# === Quick Actions (Feature f26/f27/f28) ===
@router.post("/sessions/{session_id}/pause", response_model=SessionResponse)
async def toggle_pause(session_id: str) -> SessionResponse:
"""
Pausiert oder setzt die laufende Stunde fort (Feature f27).
Toggle-Funktion: Wenn pausiert -> fortsetzen, wenn laufend -> pausieren.
"""
session = get_session_or_404(session_id)
if session.current_phase in [LessonPhase.NOT_STARTED, LessonPhase.ENDED]:
raise HTTPException(status_code=400, detail="Stunde ist nicht aktiv")
if session.is_paused:
if session.pause_started_at:
pause_duration = (datetime.utcnow() - session.pause_started_at).total_seconds()
session.total_paused_seconds += int(pause_duration)
session.is_paused = False
session.pause_started_at = None
else:
session.is_paused = True
session.pause_started_at = datetime.utcnow()
persist_session(session)
return build_session_response(session)
@router.post("/sessions/{session_id}/extend", response_model=SessionResponse)
async def extend_phase(session_id: str, request: ExtendTimeRequest) -> SessionResponse:
"""
Verlaengert die aktuelle Phase um zusaetzliche Minuten (Feature f28).
"""
session = get_session_or_404(session_id)
if session.current_phase in [LessonPhase.NOT_STARTED, LessonPhase.ENDED]:
raise HTTPException(status_code=400, detail="Stunde ist nicht aktiv")
phase_id = session.current_phase.value
current_duration = session.phase_durations.get(phase_id, 10)
session.phase_durations[phase_id] = current_duration + request.minutes
persist_session(session)
return build_session_response(session)
@router.get("/sessions/{session_id}/timer", response_model=TimerStatus)
async def get_timer(session_id: str) -> TimerStatus:
"""
Ruft den Timer-Status der aktuellen Phase ab.
"""
session = get_session_or_404(session_id)
timer = PhaseTimer()
status = timer.get_phase_status(session)
return TimerStatus(**status)
@router.get("/sessions/{session_id}/suggestions", response_model=SuggestionsResponse)
async def get_suggestions(
session_id: str,
limit: int = Query(3, ge=1, le=10, description="Anzahl Vorschlaege")
) -> SuggestionsResponse:
"""
Ruft phasenspezifische Aktivitaets-Vorschlaege ab.
"""
session = get_session_or_404(session_id)
engine = SuggestionEngine()
response = engine.get_suggestions_response(session, limit)
return SuggestionsResponse(
suggestions=[SuggestionItem(**s) for s in response["suggestions"]],
current_phase=response["current_phase"],
phase_display_name=response["phase_display_name"],
total_available=response["total_available"],
)
@router.put("/sessions/{session_id}/notes", response_model=SessionResponse)
async def update_notes(session_id: str, request: NotesRequest) -> SessionResponse:
"""
Aktualisiert Notizen und Hausaufgaben der Stunde.
"""
session = get_session_or_404(session_id)
session.notes = request.notes
session.homework = request.homework
persist_session(session)
return build_session_response(session)
@router.delete("/sessions/{session_id}")
async def delete_session(session_id: str) -> Dict[str, str]:
"""
Loescht eine Session.
"""
sessions = get_sessions()
if session_id not in sessions:
raise HTTPException(status_code=404, detail="Session nicht gefunden")
del sessions[session_id]
if DB_ENABLED:
try:
db = SessionLocal()
repo = SessionRepository(db)
repo.delete(session_id)
db.close()
except Exception as e:
logger.error(f"Failed to delete session {session_id} from DB: {e}")
return {"status": "deleted", "session_id": session_id}
# === Session History (Feature f17) ===
@router.get("/history/{teacher_id}", response_model=SessionHistoryResponse)
async def get_session_history(
teacher_id: str,
limit: int = Query(20, ge=1, le=100, description="Max. Anzahl Eintraege"),
offset: int = Query(0, ge=0, description="Offset fuer Pagination")
) -> SessionHistoryResponse:
"""
Ruft die Session-History eines Lehrers ab (Feature f17).
"""
init_db_if_needed()
sessions = get_sessions()
if not DB_ENABLED:
ended_sessions = [
s for s in sessions.values()
if s.teacher_id == teacher_id and s.current_phase == LessonPhase.ENDED
]
ended_sessions.sort(
key=lambda x: x.lesson_ended_at or datetime.min,
reverse=True
)
paginated = ended_sessions[offset:offset + limit]
items = []
for s in paginated:
duration = None
if s.lesson_started_at and s.lesson_ended_at:
duration = int((s.lesson_ended_at - s.lesson_started_at).total_seconds() / 60)
items.append(SessionHistoryItem(
session_id=s.session_id,
teacher_id=s.teacher_id,
class_id=s.class_id,
subject=s.subject,
topic=s.topic,
lesson_started_at=s.lesson_started_at.isoformat() if s.lesson_started_at else None,
lesson_ended_at=s.lesson_ended_at.isoformat() if s.lesson_ended_at else None,
total_duration_minutes=duration,
phases_completed=len(s.phase_history),
notes=s.notes,
homework=s.homework,
))
return SessionHistoryResponse(
sessions=items,
total_count=len(ended_sessions),
page=offset // limit + 1,
page_size=limit,
)
try:
db = SessionLocal()
repo = SessionRepository(db)
db_sessions = repo.get_history_by_teacher(teacher_id, limit, offset)
from classroom_engine.db_models import LessonSessionDB, LessonPhaseEnum
total_count = db.query(LessonSessionDB).filter(
LessonSessionDB.teacher_id == teacher_id,
LessonSessionDB.current_phase == LessonPhaseEnum.ENDED
).count()
items = []
for db_session in db_sessions:
duration = None
if db_session.lesson_started_at and db_session.lesson_ended_at:
duration = int((db_session.lesson_ended_at - db_session.lesson_started_at).total_seconds() / 60)
phase_history = db_session.phase_history or []
items.append(SessionHistoryItem(
session_id=db_session.id,
teacher_id=db_session.teacher_id,
class_id=db_session.class_id,
subject=db_session.subject,
topic=db_session.topic,
lesson_started_at=db_session.lesson_started_at.isoformat() if db_session.lesson_started_at else None,
lesson_ended_at=db_session.lesson_ended_at.isoformat() if db_session.lesson_ended_at else None,
total_duration_minutes=duration,
phases_completed=len(phase_history),
notes=db_session.notes or "",
homework=db_session.homework or "",
))
db.close()
return SessionHistoryResponse(
sessions=items,
total_count=total_count,
page=offset // limit + 1,
page_size=limit,
)
except Exception as e:
logger.error(f"Failed to get session history: {e}")
raise HTTPException(status_code=500, detail="Fehler beim Laden der History")

View File

@@ -0,0 +1,201 @@
"""
Classroom API - Settings Endpoints.
Endpoints fuer Lehrer-Einstellungen.
"""
from typing import Dict, Optional, Any
from datetime import datetime
import logging
from fastapi import APIRouter, HTTPException, Query
from pydantic import BaseModel, Field
from classroom_engine import get_default_durations
from .shared import init_db_if_needed, DB_ENABLED, logger
try:
from classroom_engine.database import SessionLocal
from classroom_engine.repository import TeacherSettingsRepository
except ImportError:
pass
router = APIRouter(tags=["Settings"])
# In-Memory Storage (Fallback)
_settings: Dict[str, dict] = {}
# === Pydantic Models ===
class PhaseDurationsUpdate(BaseModel):
"""Update fuer Phasendauern."""
einstieg: Optional[int] = Field(None, ge=1, le=30)
erarbeitung: Optional[int] = Field(None, ge=5, le=45)
sicherung: Optional[int] = Field(None, ge=3, le=20)
transfer: Optional[int] = Field(None, ge=3, le=20)
reflexion: Optional[int] = Field(None, ge=2, le=15)
class PreferencesUpdate(BaseModel):
"""Update fuer Lehrer-Praeferenzen."""
auto_advance: Optional[bool] = None
sound_enabled: Optional[bool] = None
notification_enabled: Optional[bool] = None
theme: Optional[str] = None
language: Optional[str] = None
class TeacherSettingsResponse(BaseModel):
"""Response fuer Lehrer-Einstellungen."""
teacher_id: str
phase_durations: Dict[str, int]
preferences: Dict[str, Any]
created_at: str
updated_at: Optional[str]
# === Helper Functions ===
def get_default_settings(teacher_id: str) -> dict:
"""Gibt die Default-Einstellungen zurueck."""
return {
"teacher_id": teacher_id,
"phase_durations": get_default_durations(),
"preferences": {
"auto_advance": False,
"sound_enabled": True,
"notification_enabled": True,
"theme": "light",
"language": "de",
},
"created_at": datetime.utcnow().isoformat(),
"updated_at": None,
}
# === Endpoints ===
@router.get("/settings/{teacher_id}", response_model=TeacherSettingsResponse)
async def get_teacher_settings(teacher_id: str) -> TeacherSettingsResponse:
"""Ruft die Einstellungen eines Lehrers ab."""
init_db_if_needed()
# Aus Memory pruefen
if teacher_id in _settings:
return TeacherSettingsResponse(**_settings[teacher_id])
# Aus DB laden
if DB_ENABLED:
try:
db = SessionLocal()
repo = TeacherSettingsRepository(db)
db_settings = repo.get_by_teacher(teacher_id)
db.close()
if db_settings:
settings_data = repo.to_dict(db_settings)
_settings[teacher_id] = settings_data
return TeacherSettingsResponse(**settings_data)
except Exception as e:
logger.warning(f"DB read failed for settings: {e}")
# Default-Einstellungen erstellen
settings_data = get_default_settings(teacher_id)
_settings[teacher_id] = settings_data
if DB_ENABLED:
try:
db = SessionLocal()
repo = TeacherSettingsRepository(db)
repo.create(settings_data)
db.close()
except Exception as e:
logger.warning(f"DB persist failed for settings: {e}")
return TeacherSettingsResponse(**settings_data)
@router.put("/settings/{teacher_id}/durations", response_model=TeacherSettingsResponse)
async def update_phase_durations(
teacher_id: str,
request: PhaseDurationsUpdate
) -> TeacherSettingsResponse:
"""Aktualisiert die Phasendauern eines Lehrers."""
init_db_if_needed()
# Aktuelle Einstellungen laden
current = await get_teacher_settings(teacher_id)
settings_data = _settings.get(teacher_id, get_default_settings(teacher_id))
# Nur uebergebene Werte aktualisieren
durations = settings_data["phase_durations"]
if request.einstieg is not None:
durations["einstieg"] = request.einstieg
if request.erarbeitung is not None:
durations["erarbeitung"] = request.erarbeitung
if request.sicherung is not None:
durations["sicherung"] = request.sicherung
if request.transfer is not None:
durations["transfer"] = request.transfer
if request.reflexion is not None:
durations["reflexion"] = request.reflexion
settings_data["phase_durations"] = durations
settings_data["updated_at"] = datetime.utcnow().isoformat()
# In DB speichern
if DB_ENABLED:
try:
db = SessionLocal()
repo = TeacherSettingsRepository(db)
repo.update_durations(teacher_id, durations)
db.close()
except Exception as e:
logger.warning(f"DB update failed for durations: {e}")
_settings[teacher_id] = settings_data
return TeacherSettingsResponse(**settings_data)
@router.put("/settings/{teacher_id}/preferences", response_model=TeacherSettingsResponse)
async def update_preferences(
teacher_id: str,
request: PreferencesUpdate
) -> TeacherSettingsResponse:
"""Aktualisiert die Praeferenzen eines Lehrers."""
init_db_if_needed()
# Aktuelle Einstellungen laden
current = await get_teacher_settings(teacher_id)
settings_data = _settings.get(teacher_id, get_default_settings(teacher_id))
# Nur uebergebene Werte aktualisieren
prefs = settings_data["preferences"]
if request.auto_advance is not None:
prefs["auto_advance"] = request.auto_advance
if request.sound_enabled is not None:
prefs["sound_enabled"] = request.sound_enabled
if request.notification_enabled is not None:
prefs["notification_enabled"] = request.notification_enabled
if request.theme is not None:
prefs["theme"] = request.theme
if request.language is not None:
prefs["language"] = request.language
settings_data["preferences"] = prefs
settings_data["updated_at"] = datetime.utcnow().isoformat()
# In DB speichern
if DB_ENABLED:
try:
db = SessionLocal()
repo = TeacherSettingsRepository(db)
repo.update_preferences(teacher_id, prefs)
db.close()
except Exception as e:
logger.warning(f"DB update failed for preferences: {e}")
_settings[teacher_id] = settings_data
return TeacherSettingsResponse(**settings_data)

View File

@@ -0,0 +1,341 @@
"""
Classroom API - Shared State und Helper Functions.
Zentrale Komponenten die von allen Classroom-Modulen verwendet werden.
"""
from typing import Dict, List, Optional, Any
from datetime import datetime
import os
import logging
import asyncio
import json
from fastapi import HTTPException, WebSocket, Request
# Auth imports (Phase 7: Keycloak Integration)
try:
from auth import get_current_user
AUTH_ENABLED = True
except ImportError:
AUTH_ENABLED = False
logging.warning("Auth module not available, using demo user fallback")
from classroom_engine import (
LessonPhase,
LessonSession,
LessonStateMachine,
PhaseTimer,
)
# Database imports (Feature f22)
try:
from classroom_engine.database import get_db, init_db, SessionLocal
from classroom_engine.repository import SessionRepository
DB_ENABLED = True
except ImportError:
DB_ENABLED = False
logging.warning("Classroom DB not available, using in-memory storage only")
logger = logging.getLogger(__name__)
# === WebSocket Connection Manager (Phase 6: Real-time) ===
class ConnectionManager:
"""
Verwaltet WebSocket-Verbindungen fuer Echtzeit-Timer-Updates.
Features:
- Session-basierte Verbindungen (jede Session hat eigene Clients)
- Automatisches Cleanup bei Disconnect
- Broadcast an alle Clients einer Session
- Multi-Device Support
"""
def __init__(self):
# session_id -> Set[WebSocket]
self._connections: Dict[str, set] = {}
# WebSocket -> session_id (reverse lookup)
self._websocket_sessions: Dict[WebSocket, str] = {}
self._lock = asyncio.Lock()
async def connect(self, websocket: WebSocket, session_id: str):
"""Verbindet einen Client mit einer Session."""
await websocket.accept()
async with self._lock:
if session_id not in self._connections:
self._connections[session_id] = set()
self._connections[session_id].add(websocket)
self._websocket_sessions[websocket] = session_id
logger.info(f"WebSocket connected to session {session_id}, total clients: {len(self._connections[session_id])}")
async def disconnect(self, websocket: WebSocket):
"""Trennt einen Client."""
async with self._lock:
session_id = self._websocket_sessions.pop(websocket, None)
if session_id and session_id in self._connections:
self._connections[session_id].discard(websocket)
if not self._connections[session_id]:
del self._connections[session_id]
logger.info(f"WebSocket disconnected from session {session_id}")
async def broadcast_to_session(self, session_id: str, message: dict):
"""Sendet eine Nachricht an alle Clients einer Session."""
async with self._lock:
connections = self._connections.get(session_id, set()).copy()
if not connections:
return
message_json = json.dumps(message)
dead_connections = []
for websocket in connections:
try:
await websocket.send_text(message_json)
except Exception as e:
logger.warning(f"Failed to send to websocket: {e}")
dead_connections.append(websocket)
# Cleanup dead connections
for ws in dead_connections:
await self.disconnect(ws)
async def broadcast_timer_update(self, session_id: str, timer_data: dict):
"""Sendet Timer-Update an alle Clients einer Session."""
await self.broadcast_to_session(session_id, {
"type": "timer_update",
"data": timer_data
})
async def broadcast_phase_change(self, session_id: str, phase_data: dict):
"""Sendet Phasenwechsel-Event an alle Clients."""
await self.broadcast_to_session(session_id, {
"type": "phase_change",
"data": phase_data
})
async def broadcast_session_ended(self, session_id: str):
"""Sendet Session-Ende-Event an alle Clients."""
await self.broadcast_to_session(session_id, {
"type": "session_ended",
"data": {"session_id": session_id}
})
def get_client_count(self, session_id: str) -> int:
"""Gibt die Anzahl der verbundenen Clients fuer eine Session zurueck."""
return len(self._connections.get(session_id, set()))
def get_active_sessions(self) -> List[str]:
"""Gibt alle Sessions mit aktiven WebSocket-Verbindungen zurueck."""
return list(self._connections.keys())
# Global instances
ws_manager = ConnectionManager()
_sessions: Dict[str, LessonSession] = {}
_db_initialized = False
_timer_broadcast_task: Optional[asyncio.Task] = None
# === Demo User ===
DEMO_USER = {
"user_id": "demo-teacher",
"email": "demo@breakpilot.app",
"name": "Demo Lehrer",
"given_name": "Demo",
"family_name": "Lehrer",
"role": "teacher",
"is_demo": True
}
# === Timer Broadcast Functions ===
async def _timer_broadcast_loop():
"""
Hintergrund-Task der Timer-Updates alle 1 Sekunde an verbundene Clients sendet.
"""
logger.info("Timer broadcast loop started")
while True:
try:
await asyncio.sleep(1)
active_ws_sessions = ws_manager.get_active_sessions()
if not active_ws_sessions:
continue
for session_id in active_ws_sessions:
session = _sessions.get(session_id)
if not session or session.is_ended:
continue
timer_status = build_timer_status(session)
await ws_manager.broadcast_timer_update(session_id, timer_status)
except asyncio.CancelledError:
logger.info("Timer broadcast loop cancelled")
break
except Exception as e:
logger.error(f"Error in timer broadcast loop: {e}")
await asyncio.sleep(5)
def start_timer_broadcast():
"""Startet den Timer-Broadcast-Task wenn noch nicht laufend."""
global _timer_broadcast_task
if _timer_broadcast_task is None or _timer_broadcast_task.done():
_timer_broadcast_task = asyncio.create_task(_timer_broadcast_loop())
logger.info("Timer broadcast task created")
def stop_timer_broadcast():
"""Stoppt den Timer-Broadcast-Task."""
global _timer_broadcast_task
if _timer_broadcast_task and not _timer_broadcast_task.done():
_timer_broadcast_task.cancel()
logger.info("Timer broadcast task cancelled")
# === Database Functions ===
def init_db_if_needed():
"""Initialisiert DB und laedt aktive Sessions beim ersten Aufruf."""
global _db_initialized
if _db_initialized or not DB_ENABLED:
return
try:
init_db()
_load_active_sessions_from_db()
_db_initialized = True
logger.info("Classroom DB initialized, loaded active sessions")
except Exception as e:
logger.error(f"Failed to initialize Classroom DB: {e}")
def _load_active_sessions_from_db():
"""Laedt alle aktiven Sessions aus der DB in den Memory-Cache."""
if not DB_ENABLED:
return
try:
db = SessionLocal()
repo = SessionRepository(db)
from classroom_engine.db_models import LessonSessionDB, LessonPhaseEnum
active_db_sessions = db.query(LessonSessionDB).filter(
LessonSessionDB.current_phase != LessonPhaseEnum.ENDED
).all()
for db_session in active_db_sessions:
session = repo.to_dataclass(db_session)
_sessions[session.session_id] = session
logger.info(f"Loaded session {session.session_id} from DB")
db.close()
except Exception as e:
logger.error(f"Failed to load sessions from DB: {e}")
def persist_session(session: LessonSession):
"""Speichert/aktualisiert Session in der DB."""
if not DB_ENABLED:
return
try:
db = SessionLocal()
repo = SessionRepository(db)
existing = repo.get_by_id(session.session_id)
if existing:
repo.update(session)
else:
repo.create(session)
db.close()
except Exception as e:
logger.error(f"Failed to persist session {session.session_id}: {e}")
# === Auth Functions ===
async def get_optional_current_user(request: Request) -> Dict[str, Any]:
"""
Optionale Authentifizierung - gibt Demo-User zurueck wenn kein Token.
"""
if not AUTH_ENABLED:
return DEMO_USER
auth_header = request.headers.get("Authorization", "")
if not auth_header or not auth_header.startswith("Bearer "):
env = os.environ.get("ENVIRONMENT", "development")
if env == "development":
return DEMO_USER
raise HTTPException(status_code=401, detail="Nicht authentifiziert")
try:
return await get_current_user(request)
except Exception as e:
logger.warning(f"Auth failed: {e}")
env = os.environ.get("ENVIRONMENT", "development")
if env == "development":
return DEMO_USER
raise HTTPException(status_code=401, detail="Authentifizierung fehlgeschlagen")
# === Session Helpers ===
def get_session_or_404(session_id: str) -> LessonSession:
"""Holt eine Session oder wirft 404. Prueft auch DB bei Cache-Miss."""
init_db_if_needed()
session = _sessions.get(session_id)
if session:
return session
if DB_ENABLED:
try:
db = SessionLocal()
repo = SessionRepository(db)
db_session = repo.get_by_id(session_id)
if db_session:
session = repo.to_dataclass(db_session)
_sessions[session.session_id] = session
db.close()
return session
db.close()
except Exception as e:
logger.error(f"Failed to load session {session_id} from DB: {e}")
raise HTTPException(status_code=404, detail="Session nicht gefunden")
def build_timer_status(session: LessonSession) -> dict:
"""Baut Timer-Status als dict fuer WebSocket-Broadcast."""
timer = PhaseTimer()
status = timer.get_phase_status(session)
status["session_id"] = session.session_id
status["current_phase"] = session.current_phase.value
status["is_paused"] = session.is_paused
status["timestamp"] = datetime.utcnow().isoformat()
return status
def get_sessions() -> Dict[str, LessonSession]:
"""Gibt das Sessions-Dictionary zurueck."""
return _sessions
def add_session(session: LessonSession):
"""Fuegt eine Session zum Cache hinzu und persistiert sie."""
_sessions[session.session_id] = session
persist_session(session)
def remove_session(session_id: str):
"""Entfernt eine Session aus dem Cache."""
_sessions.pop(session_id, None)

View File

@@ -0,0 +1,392 @@
"""
Classroom API - Template Endpoints.
Endpoints fuer Stunden-Vorlagen (Feature f37).
"""
from uuid import uuid4
from typing import Dict, List, Optional
from datetime import datetime
import logging
from fastapi import APIRouter, HTTPException, Query
from pydantic import BaseModel, Field
from classroom_engine import (
LessonSession,
LessonTemplate,
SYSTEM_TEMPLATES,
get_default_durations,
)
from .models import SessionResponse
from .shared import (
init_db_if_needed,
get_sessions,
persist_session,
DB_ENABLED,
logger,
)
from .sessions import build_session_response
try:
from classroom_engine.database import SessionLocal
from classroom_engine.repository import TemplateRepository
except ImportError:
pass
router = APIRouter(tags=["Templates"])
# === Pydantic Models ===
class TemplateCreate(BaseModel):
"""Request zum Erstellen einer Vorlage."""
name: str = Field(..., min_length=1, max_length=200)
description: str = Field("", max_length=1000)
subject: str = Field("", max_length=100)
grade_level: str = Field("", max_length=50)
phase_durations: Dict[str, int] = Field(default_factory=get_default_durations)
default_topic: str = Field("", max_length=500)
default_notes: str = Field("")
is_public: bool = Field(False)
class TemplateUpdate(BaseModel):
"""Request zum Aktualisieren einer Vorlage."""
name: Optional[str] = Field(None, min_length=1, max_length=200)
description: Optional[str] = Field(None, max_length=1000)
subject: Optional[str] = Field(None, max_length=100)
grade_level: Optional[str] = Field(None, max_length=50)
phase_durations: Optional[Dict[str, int]] = None
default_topic: Optional[str] = Field(None, max_length=500)
default_notes: Optional[str] = None
is_public: Optional[bool] = None
class TemplateResponse(BaseModel):
"""Response fuer eine einzelne Vorlage."""
template_id: str
teacher_id: str
name: str
description: str
subject: str
grade_level: str
phase_durations: Dict[str, int]
default_topic: str
default_notes: str
is_public: bool
usage_count: int
total_duration_minutes: int
created_at: Optional[str]
updated_at: Optional[str]
is_system_template: bool = False
class TemplateListResponse(BaseModel):
"""Response fuer Template-Liste."""
templates: List[TemplateResponse]
total_count: int
# === Helper Functions ===
def build_template_response(template: LessonTemplate, is_system: bool = False) -> TemplateResponse:
"""Baut eine Template-Response."""
return TemplateResponse(
template_id=template.template_id,
teacher_id=template.teacher_id,
name=template.name,
description=template.description,
subject=template.subject,
grade_level=template.grade_level,
phase_durations=template.phase_durations,
default_topic=template.default_topic,
default_notes=template.default_notes,
is_public=template.is_public,
usage_count=template.usage_count,
total_duration_minutes=sum(template.phase_durations.values()),
created_at=template.created_at.isoformat() if template.created_at else None,
updated_at=template.updated_at.isoformat() if template.updated_at else None,
is_system_template=is_system,
)
def get_system_templates() -> List[TemplateResponse]:
"""Gibt die vordefinierten System-Templates zurueck."""
templates = []
for t in SYSTEM_TEMPLATES:
template = LessonTemplate(
template_id=t["template_id"],
teacher_id="system",
name=t["name"],
description=t.get("description", ""),
phase_durations=t["phase_durations"],
is_public=True,
usage_count=0,
)
templates.append(build_template_response(template, is_system=True))
return templates
# === Endpoints ===
@router.get("/templates", response_model=TemplateListResponse)
async def list_templates(
teacher_id: Optional[str] = Query(None),
subject: Optional[str] = Query(None),
include_system: bool = Query(True)
) -> TemplateListResponse:
"""Listet verfuegbare Stunden-Vorlagen (Feature f37)."""
init_db_if_needed()
templates: List[TemplateResponse] = []
if include_system:
templates.extend(get_system_templates())
if DB_ENABLED:
try:
db = SessionLocal()
repo = TemplateRepository(db)
if subject:
db_templates = repo.get_by_subject(subject, teacher_id)
elif teacher_id:
db_templates = repo.get_by_teacher(teacher_id, include_public=True)
else:
db_templates = repo.get_public_templates()
for db_t in db_templates:
template = repo.to_dataclass(db_t)
templates.append(build_template_response(template))
db.close()
except Exception as e:
logger.error(f"Failed to load templates from DB: {e}")
return TemplateListResponse(templates=templates, total_count=len(templates))
@router.get("/templates/{template_id}", response_model=TemplateResponse)
async def get_template(template_id: str) -> TemplateResponse:
"""Ruft eine einzelne Vorlage ab."""
init_db_if_needed()
for t in SYSTEM_TEMPLATES:
if t["template_id"] == template_id:
template = LessonTemplate(
template_id=t["template_id"],
teacher_id="system",
name=t["name"],
description=t.get("description", ""),
phase_durations=t["phase_durations"],
is_public=True,
)
return build_template_response(template, is_system=True)
if DB_ENABLED:
try:
db = SessionLocal()
repo = TemplateRepository(db)
db_template = repo.get_by_id(template_id)
if db_template:
template = repo.to_dataclass(db_template)
db.close()
return build_template_response(template)
db.close()
except Exception as e:
logger.error(f"Failed to get template {template_id}: {e}")
raise HTTPException(status_code=404, detail="Vorlage nicht gefunden")
@router.post("/templates", response_model=TemplateResponse, status_code=201)
async def create_template(
request: TemplateCreate,
teacher_id: str = Query(...)
) -> TemplateResponse:
"""Erstellt eine neue Stunden-Vorlage."""
init_db_if_needed()
if not DB_ENABLED:
raise HTTPException(status_code=503, detail="Datenbank nicht verfuegbar")
template = LessonTemplate(
template_id=str(uuid4()),
teacher_id=teacher_id,
name=request.name,
description=request.description,
subject=request.subject,
grade_level=request.grade_level,
phase_durations=request.phase_durations,
default_topic=request.default_topic,
default_notes=request.default_notes,
is_public=request.is_public,
created_at=datetime.utcnow(),
)
try:
db = SessionLocal()
repo = TemplateRepository(db)
db_template = repo.create(template)
template = repo.to_dataclass(db_template)
db.close()
return build_template_response(template)
except Exception as e:
logger.error(f"Failed to create template: {e}")
raise HTTPException(status_code=500, detail="Fehler beim Erstellen der Vorlage")
@router.put("/templates/{template_id}", response_model=TemplateResponse)
async def update_template(
template_id: str,
request: TemplateUpdate,
teacher_id: str = Query(...)
) -> TemplateResponse:
"""Aktualisiert eine Stunden-Vorlage."""
init_db_if_needed()
for t in SYSTEM_TEMPLATES:
if t["template_id"] == template_id:
raise HTTPException(status_code=403, detail="System-Vorlagen koennen nicht bearbeitet werden")
if not DB_ENABLED:
raise HTTPException(status_code=503, detail="Datenbank nicht verfuegbar")
try:
db = SessionLocal()
repo = TemplateRepository(db)
db_template = repo.get_by_id(template_id)
if not db_template:
db.close()
raise HTTPException(status_code=404, detail="Vorlage nicht gefunden")
if db_template.teacher_id != teacher_id:
db.close()
raise HTTPException(status_code=403, detail="Keine Berechtigung")
template = repo.to_dataclass(db_template)
if request.name is not None:
template.name = request.name
if request.description is not None:
template.description = request.description
if request.subject is not None:
template.subject = request.subject
if request.grade_level is not None:
template.grade_level = request.grade_level
if request.phase_durations is not None:
template.phase_durations = request.phase_durations
if request.default_topic is not None:
template.default_topic = request.default_topic
if request.default_notes is not None:
template.default_notes = request.default_notes
if request.is_public is not None:
template.is_public = request.is_public
db_template = repo.update(template)
template = repo.to_dataclass(db_template)
db.close()
return build_template_response(template)
except HTTPException:
raise
except Exception as e:
logger.error(f"Failed to update template {template_id}: {e}")
raise HTTPException(status_code=500, detail="Fehler beim Aktualisieren der Vorlage")
@router.delete("/templates/{template_id}")
async def delete_template(
template_id: str,
teacher_id: str = Query(...)
) -> Dict[str, str]:
"""Loescht eine Stunden-Vorlage."""
init_db_if_needed()
for t in SYSTEM_TEMPLATES:
if t["template_id"] == template_id:
raise HTTPException(status_code=403, detail="System-Vorlagen koennen nicht geloescht werden")
if not DB_ENABLED:
raise HTTPException(status_code=503, detail="Datenbank nicht verfuegbar")
try:
db = SessionLocal()
repo = TemplateRepository(db)
db_template = repo.get_by_id(template_id)
if not db_template:
db.close()
raise HTTPException(status_code=404, detail="Vorlage nicht gefunden")
if db_template.teacher_id != teacher_id:
db.close()
raise HTTPException(status_code=403, detail="Keine Berechtigung")
repo.delete(template_id)
db.close()
return {"status": "deleted", "template_id": template_id}
except HTTPException:
raise
except Exception as e:
logger.error(f"Failed to delete template {template_id}: {e}")
raise HTTPException(status_code=500, detail="Fehler beim Loeschen der Vorlage")
@router.post("/sessions/from-template", response_model=SessionResponse)
async def create_session_from_template(
template_id: str = Query(...),
teacher_id: str = Query(...),
class_id: str = Query(...),
topic: Optional[str] = Query(None)
) -> SessionResponse:
"""Erstellt eine neue Session basierend auf einer Vorlage."""
init_db_if_needed()
template_data = None
is_system = False
for t in SYSTEM_TEMPLATES:
if t["template_id"] == template_id:
template_data = t
is_system = True
break
if not template_data and DB_ENABLED:
try:
db = SessionLocal()
repo = TemplateRepository(db)
db_template = repo.get_by_id(template_id)
if db_template:
template_data = {
"phase_durations": db_template.phase_durations or get_default_durations(),
"subject": db_template.subject or "",
"default_topic": db_template.default_topic or "",
"default_notes": db_template.default_notes or "",
}
repo.increment_usage(template_id)
db.close()
except Exception as e:
logger.error(f"Failed to load template {template_id}: {e}")
if not template_data:
raise HTTPException(status_code=404, detail="Vorlage nicht gefunden")
session = LessonSession(
session_id=str(uuid4()),
teacher_id=teacher_id,
class_id=class_id,
subject=template_data.get("subject", ""),
topic=topic or template_data.get("default_topic", ""),
phase_durations=template_data["phase_durations"],
notes=template_data.get("default_notes", ""),
)
sessions = get_sessions()
sessions[session.session_id] = session
persist_session(session)
return build_session_response(session)

View File

@@ -0,0 +1,185 @@
"""
Classroom API - Utility Endpoints.
Health-Check, Phasen-Liste und andere Utility-Endpoints.
"""
from typing import Dict, List, Optional, Any
from datetime import datetime
import logging
from fastapi import APIRouter, HTTPException, Query
from fastapi.responses import HTMLResponse
from sqlalchemy import text
from pydantic import BaseModel
from classroom_engine import LESSON_PHASES, LessonStateMachine
from .shared import (
init_db_if_needed,
get_sessions,
get_session_or_404,
ws_manager,
DB_ENABLED,
logger,
)
try:
from classroom_engine.database import SessionLocal
except ImportError:
pass
router = APIRouter(tags=["Utility"])
# === Pydantic Models ===
class PhasesListResponse(BaseModel):
"""Liste aller verfuegbaren Phasen."""
phases: List[Dict[str, Any]]
class ActiveSessionsResponse(BaseModel):
"""Liste aktiver Sessions."""
sessions: List[Dict[str, Any]]
count: int
# === Endpoints ===
@router.get("/phases", response_model=PhasesListResponse)
async def list_phases() -> PhasesListResponse:
"""Listet alle verfuegbaren Unterrichtsphasen mit Metadaten."""
phases = []
for phase_id, config in LESSON_PHASES.items():
phases.append({
"phase": phase_id,
"display_name": config["display_name"],
"default_duration_minutes": config["default_duration_minutes"],
"activities": config["activities"],
"icon": config["icon"],
"description": config.get("description", ""),
})
return PhasesListResponse(phases=phases)
@router.get("/sessions", response_model=ActiveSessionsResponse)
async def list_active_sessions(
teacher_id: Optional[str] = Query(None)
) -> ActiveSessionsResponse:
"""Listet alle (optionally gefilterten) Sessions."""
sessions = get_sessions()
sessions_list = []
for session in sessions.values():
if teacher_id and session.teacher_id != teacher_id:
continue
fsm = LessonStateMachine()
sessions_list.append({
"session_id": session.session_id,
"teacher_id": session.teacher_id,
"class_id": session.class_id,
"subject": session.subject,
"current_phase": session.current_phase.value,
"is_active": fsm.is_lesson_active(session),
"lesson_started_at": session.lesson_started_at.isoformat() if session.lesson_started_at else None,
})
return ActiveSessionsResponse(sessions=sessions_list, count=len(sessions_list))
@router.get("/health")
async def health_check() -> Dict[str, Any]:
"""Health-Check fuer den Classroom Service."""
db_status = "disabled"
if DB_ENABLED:
try:
db = SessionLocal()
db.execute(text("SELECT 1"))
db.close()
db_status = "connected"
except Exception as e:
db_status = f"error: {str(e)}"
sessions = get_sessions()
return {
"status": "healthy",
"service": "classroom-engine",
"active_sessions": len(sessions),
"db_enabled": DB_ENABLED,
"db_status": db_status,
"websocket_connections": sum(
ws_manager.get_client_count(sid) for sid in ws_manager.get_active_sessions()
),
"timestamp": datetime.utcnow().isoformat(),
}
@router.get("/ws/status")
async def websocket_status() -> Dict[str, Any]:
"""Status der WebSocket-Verbindungen."""
active_sessions = ws_manager.get_active_sessions()
session_counts = {
sid: ws_manager.get_client_count(sid) for sid in active_sessions
}
return {
"active_sessions": len(active_sessions),
"session_connections": session_counts,
"total_connections": sum(session_counts.values()),
"timestamp": datetime.utcnow().isoformat(),
}
@router.get("/export/session/{session_id}", response_class=HTMLResponse)
async def export_session_html(session_id: str) -> HTMLResponse:
"""Exportiert eine Session als HTML-Dokument."""
session = get_session_or_404(session_id)
# Einfacher HTML-Export
html = f"""
<!DOCTYPE html>
<html lang="de">
<head>
<meta charset="UTF-8">
<title>Session Export - {session.subject}</title>
<style>
body {{ font-family: Arial, sans-serif; max-width: 800px; margin: 0 auto; padding: 20px; }}
h1 {{ color: #333; }}
.meta {{ color: #666; margin-bottom: 20px; }}
.section {{ margin: 20px 0; padding: 15px; background: #f5f5f5; border-radius: 8px; }}
.phase {{ display: flex; justify-content: space-between; padding: 10px 0; border-bottom: 1px solid #ddd; }}
</style>
</head>
<body>
<h1>{session.subject}: {session.topic or 'Ohne Thema'}</h1>
<div class="meta">
<p>Klasse: {session.class_id}</p>
<p>Datum: {session.lesson_started_at.strftime('%d.%m.%Y %H:%M') if session.lesson_started_at else 'Nicht gestartet'}</p>
<p>Status: {session.current_phase.value}</p>
</div>
<div class="section">
<h2>Phasen</h2>
{"".join(f'<div class="phase"><span>{p.get("phase", "")}</span><span>{p.get("duration_seconds", 0) // 60} min</span></div>' for p in session.phase_history)}
</div>
<div class="section">
<h2>Notizen</h2>
<p>{session.notes or 'Keine Notizen'}</p>
</div>
<div class="section">
<h2>Hausaufgaben</h2>
<p>{session.homework or 'Keine Hausaufgaben'}</p>
</div>
<footer style="margin-top: 40px; color: #999; font-size: 12px;">
Exportiert am {datetime.utcnow().strftime('%d.%m.%Y %H:%M')} UTC - BreakPilot Classroom
</footer>
</body>
</html>
"""
return HTMLResponse(content=html)

View File

@@ -0,0 +1,35 @@
"""
Test Registry API
Zentrales Dashboard fuer alle Tests im Breakpilot-System.
Aggregiert Tests aus allen Services.
Phase 1 Update (2026-02-02):
- PostgreSQL-Integration fuer persistente Speicherung
- Backlog-Management mit Status-Workflow
- Historie und Trends ueber Zeit
"""
from .registry import router
from .database import get_db, get_db_session, init_db
from .repository import TestRepository
from .db_models import (
TestRunDB,
TestResultDB,
FailedTestBacklogDB,
TestFixHistoryDB,
TestServiceStatsDB
)
__all__ = [
"router",
"get_db",
"get_db_session",
"init_db",
"TestRepository",
"TestRunDB",
"TestResultDB",
"FailedTestBacklogDB",
"TestFixHistoryDB",
"TestServiceStatsDB"
]

View File

@@ -0,0 +1,91 @@
"""
Database Configuration fuer Test Registry.
PostgreSQL-Anbindung fuer persistente Test-Speicherung.
Ersetzt die bisherige JSON-basierte Speicherung.
"""
import os
from contextlib import contextmanager
from sqlalchemy import create_engine
from sqlalchemy.orm import sessionmaker, Session, declarative_base
# Eigene Base fuer Test Registry - unabhaengig von anderen Modulen
# Dies vermeidet Import-Probleme in CI/CD Umgebungen
Base = declarative_base()
# Database URL from environment (nutzt gleiche DB wie Backend)
_raw_url = os.getenv(
"DATABASE_URL",
"postgresql://breakpilot:breakpilot123@postgres:5432/breakpilot_db"
)
# SQLAlchemy 2.0 erfordert "postgresql://" statt "postgres://"
DATABASE_URL = _raw_url.replace("postgres://", "postgresql://", 1) if _raw_url.startswith("postgres://") else _raw_url
# Engine configuration mit Connection Pool
engine = create_engine(
DATABASE_URL,
pool_pre_ping=True, # Prueft Connections vor Nutzung
pool_size=5, # Standard Pool-Groesse
max_overflow=10, # Zusaetzliche Connections bei Bedarf
pool_recycle=3600, # Recycle nach 1 Stunde
echo=os.getenv("SQL_ECHO", "false").lower() == "true"
)
# Session factory
SessionLocal = sessionmaker(autocommit=False, autoflush=False, bind=engine)
def get_db():
"""
Database dependency for FastAPI endpoints.
Yields a database session and ensures cleanup.
"""
db = SessionLocal()
try:
yield db
finally:
db.close()
@contextmanager
def get_db_session():
"""
Context manager for database sessions.
Use this for background tasks and non-FastAPI code.
Example:
with get_db_session() as db:
db.query(TestRun).all()
"""
db = SessionLocal()
try:
yield db
db.commit()
except Exception:
db.rollback()
raise
finally:
db.close()
def init_db():
"""
Erstellt alle Tabellen.
In Produktion sollte Alembic verwendet werden.
"""
from . import db_models # Import models to register them
Base.metadata.create_all(bind=engine)
def check_db_connection() -> bool:
"""
Prueft ob die Datenbankverbindung funktioniert.
Nuetzlich fuer Health-Checks.
"""
try:
from sqlalchemy import text
with get_db_session() as db:
db.execute(text("SELECT 1"))
return True
except Exception:
return False

View File

@@ -0,0 +1,227 @@
"""
SQLAlchemy Models fuer Test Registry.
Definiert die Datenbank-Tabellen fuer persistente Test-Speicherung:
- TestRunDB: Jeder Test-Durchlauf
- TestResultDB: Einzelne Test-Ergebnisse
- FailedTestBacklogDB: Persistenter Backlog fuer zu fixende Tests
- TestFixHistoryDB: Historie aller Fix-Versuche
- TestServiceStatsDB: Aggregierte Statistiken pro Service
"""
from datetime import datetime
from sqlalchemy import (
Column, Integer, String, Float, Text, DateTime, Boolean,
ForeignKey, UniqueConstraint, Index
)
from sqlalchemy.orm import relationship
# Nutze die gleiche Base wie Classroom Engine fuer konsistente Migrations
from classroom_engine.database import Base
class TestRunDB(Base):
"""
Speichert jeden Test-Durchlauf.
Enthaelt Metadaten und Aggregat-Statistiken.
"""
__tablename__ = 'test_runs'
id = Column(Integer, primary_key=True, autoincrement=True)
run_id = Column(String(50), unique=True, nullable=False, index=True)
service = Column(String(100), nullable=False, index=True)
framework = Column(String(50), nullable=False)
started_at = Column(DateTime, nullable=False, index=True)
completed_at = Column(DateTime, nullable=True)
status = Column(String(20), nullable=False) # queued, running, completed, failed
total_tests = Column(Integer, default=0)
passed_tests = Column(Integer, default=0)
failed_tests = Column(Integer, default=0)
skipped_tests = Column(Integer, default=0)
duration_seconds = Column(Float, default=0)
git_commit = Column(String(40), nullable=True)
git_branch = Column(String(100), nullable=True)
triggered_by = Column(String(50), nullable=True) # manual, ci, schedule
output = Column(Text, nullable=True)
created_at = Column(DateTime, default=datetime.utcnow)
# Relationship zu einzelnen Test-Ergebnissen
results = relationship("TestResultDB", back_populates="run", cascade="all, delete-orphan")
def to_dict(self):
return {
"id": self.run_id,
"run_id": self.run_id,
"service": self.service,
"framework": self.framework,
"started_at": self.started_at.isoformat() if self.started_at else None,
"completed_at": self.completed_at.isoformat() if self.completed_at else None,
"status": self.status,
"total_tests": self.total_tests,
"passed_tests": self.passed_tests,
"failed_tests": self.failed_tests,
"skipped_tests": self.skipped_tests,
"duration_seconds": self.duration_seconds,
"git_commit": self.git_commit,
"git_branch": self.git_branch,
"triggered_by": self.triggered_by,
}
class TestResultDB(Base):
"""
Speichert einzelne Test-Ergebnisse pro Run.
Ermoeglicht detaillierte Analyse fehlgeschlagener Tests.
"""
__tablename__ = 'test_results'
id = Column(Integer, primary_key=True, autoincrement=True)
run_id = Column(String(50), ForeignKey('test_runs.run_id', ondelete='CASCADE'), nullable=False, index=True)
test_name = Column(String(500), nullable=False, index=True)
test_file = Column(String(500), nullable=True)
line_number = Column(Integer, nullable=True)
status = Column(String(20), nullable=False, index=True) # passed, failed, skipped, error
duration_ms = Column(Float, nullable=True)
error_message = Column(Text, nullable=True)
error_type = Column(String(100), nullable=True)
output = Column(Text, nullable=True)
created_at = Column(DateTime, default=datetime.utcnow)
# Relationship zum Run
run = relationship("TestRunDB", back_populates="results")
def to_dict(self):
return {
"id": self.id,
"run_id": self.run_id,
"test_name": self.test_name,
"test_file": self.test_file,
"line_number": self.line_number,
"status": self.status,
"duration_ms": self.duration_ms,
"error_message": self.error_message,
"error_type": self.error_type,
}
class FailedTestBacklogDB(Base):
"""
Persistenter Backlog fuer fehlgeschlagene Tests.
Aggregiert Fehler ueber mehrere Runs hinweg.
"""
__tablename__ = 'failed_tests_backlog'
__table_args__ = (
UniqueConstraint('test_name', 'service', name='uq_backlog_test_service'),
)
id = Column(Integer, primary_key=True, autoincrement=True)
test_name = Column(String(500), nullable=False)
test_file = Column(String(500), nullable=True)
service = Column(String(100), nullable=False, index=True)
framework = Column(String(50), nullable=True)
error_message = Column(Text, nullable=True)
error_type = Column(String(100), nullable=True)
first_failed_at = Column(DateTime, nullable=False)
last_failed_at = Column(DateTime, nullable=False)
failure_count = Column(Integer, default=1)
status = Column(String(30), default='open', index=True) # open, in_progress, fixed, wont_fix, flaky
priority = Column(String(20), default='medium', index=True) # critical, high, medium, low
assigned_to = Column(String(100), nullable=True)
fix_suggestion = Column(Text, nullable=True)
notes = Column(Text, nullable=True)
# Resolution-Felder (auto-close wenn Tests bestehen)
resolved_at = Column(DateTime, nullable=True)
resolution_commit = Column(String(50), nullable=True)
resolution_notes = Column(Text, nullable=True)
created_at = Column(DateTime, default=datetime.utcnow)
updated_at = Column(DateTime, default=datetime.utcnow, onupdate=datetime.utcnow)
# Relationship zu Fix-Historie
fixes = relationship("TestFixHistoryDB", back_populates="backlog_item", cascade="all, delete-orphan")
def to_dict(self):
return {
"id": self.id,
"test_name": self.test_name,
"test_file": self.test_file,
"service": self.service,
"framework": self.framework,
"error_message": self.error_message,
"error_type": self.error_type,
"first_failed_at": self.first_failed_at.isoformat() if self.first_failed_at else None,
"last_failed_at": self.last_failed_at.isoformat() if self.last_failed_at else None,
"failure_count": self.failure_count,
"status": self.status,
"priority": self.priority,
"assigned_to": self.assigned_to,
"fix_suggestion": self.fix_suggestion,
"notes": self.notes,
"resolved_at": self.resolved_at.isoformat() if self.resolved_at else None,
"resolution_commit": self.resolution_commit,
"resolution_notes": self.resolution_notes,
"created_at": self.created_at.isoformat() if self.created_at else None,
"updated_at": self.updated_at.isoformat() if self.updated_at else None,
}
class TestFixHistoryDB(Base):
"""
Historie aller Fix-Versuche fuer einen Backlog-Eintrag.
Ermoeglicht Tracking von Auto-Fix und manuellen Fixes.
"""
__tablename__ = 'test_fixes_history'
id = Column(Integer, primary_key=True, autoincrement=True)
backlog_id = Column(Integer, ForeignKey('failed_tests_backlog.id', ondelete='CASCADE'), nullable=False, index=True)
fix_type = Column(String(50), nullable=True) # manual, auto_claude, auto_script
fix_description = Column(Text, nullable=True)
commit_hash = Column(String(40), nullable=True)
success = Column(Boolean, nullable=True)
created_at = Column(DateTime, default=datetime.utcnow)
# Relationship zum Backlog-Item
backlog_item = relationship("FailedTestBacklogDB", back_populates="fixes")
def to_dict(self):
return {
"id": self.id,
"backlog_id": self.backlog_id,
"fix_type": self.fix_type,
"fix_description": self.fix_description,
"commit_hash": self.commit_hash,
"success": self.success,
"created_at": self.created_at.isoformat() if self.created_at else None,
}
class TestServiceStatsDB(Base):
"""
Aggregierte Statistiken pro Service.
Wird nach jedem Test-Run aktualisiert fuer schnelle Abfragen.
"""
__tablename__ = 'test_service_stats'
id = Column(Integer, primary_key=True, autoincrement=True)
service = Column(String(100), unique=True, nullable=False)
total_tests = Column(Integer, default=0)
passed_tests = Column(Integer, default=0)
failed_tests = Column(Integer, default=0)
skipped_tests = Column(Integer, default=0)
pass_rate = Column(Float, default=0.0)
last_run_id = Column(String(50), nullable=True)
last_run_at = Column(DateTime, nullable=True)
last_status = Column(String(20), nullable=True)
updated_at = Column(DateTime, default=datetime.utcnow, onupdate=datetime.utcnow)
def to_dict(self):
return {
"service": self.service,
"total_tests": self.total_tests,
"passed_tests": self.passed_tests,
"failed_tests": self.failed_tests,
"skipped_tests": self.skipped_tests,
"pass_rate": round(self.pass_rate, 1) if self.pass_rate else 0.0,
"last_run_id": self.last_run_id,
"last_run_at": self.last_run_at.isoformat() if self.last_run_at else None,
"last_status": self.last_status,
"updated_at": self.updated_at.isoformat() if self.updated_at else None,
}

277
backend/api/tests/models.py Normal file
View File

@@ -0,0 +1,277 @@
"""
Test Registry Data Models
Definiert die Datenstrukturen fuer das zentrale Test-Dashboard.
"""
from dataclasses import dataclass, field
from datetime import datetime
from enum import Enum
from typing import Optional, List, Dict, Any
class TestFramework(str, Enum):
GO_TEST = "go_test"
PYTEST = "pytest"
JEST = "jest"
PLAYWRIGHT = "playwright"
BQAS_GOLDEN = "bqas_golden"
BQAS_RAG = "bqas_rag"
BQAS_SYNTHETIC = "bqas_synthetic"
class TestCategory(str, Enum):
UNIT = "unit"
INTEGRATION = "integration"
E2E = "e2e"
BQAS = "bqas"
SECURITY = "security"
PERFORMANCE = "performance"
class TestStatus(str, Enum):
PENDING = "pending"
RUNNING = "running"
PASSED = "passed"
FAILED = "failed"
SKIPPED = "skipped"
ERROR = "error"
class RunStatus(str, Enum):
QUEUED = "queued"
RUNNING = "running"
COMPLETED = "completed"
FAILED = "failed"
CANCELLED = "cancelled"
@dataclass
class TestCase:
"""Einzelner Testfall"""
id: str
name: str
file_path: str
line_number: Optional[int] = None
framework: TestFramework = TestFramework.GO_TEST
category: TestCategory = TestCategory.UNIT
duration_ms: Optional[float] = None
status: TestStatus = TestStatus.PENDING
error_message: Optional[str] = None
output: Optional[str] = None
@dataclass
class TestSuite:
"""Test-Suite eines Services"""
id: str
service: str
name: str
framework: TestFramework
category: TestCategory
base_path: str
pattern: str # z.B. "*_test.go" oder "test_*.py"
tests: List[TestCase] = field(default_factory=list)
total_tests: int = 0
passed_tests: int = 0
failed_tests: int = 0
skipped_tests: int = 0
duration_ms: float = 0.0
coverage_percent: Optional[float] = None
last_run: Optional[datetime] = None
status: TestStatus = TestStatus.PENDING
@dataclass
class TestRun:
"""Ein Test-Durchlauf"""
id: str
suite_id: str
service: str
started_at: datetime
completed_at: Optional[datetime] = None
status: RunStatus = RunStatus.QUEUED
total_tests: int = 0
passed_tests: int = 0
failed_tests: int = 0
skipped_tests: int = 0
duration_seconds: float = 0.0
git_commit: Optional[str] = None
git_branch: Optional[str] = None
coverage_percent: Optional[float] = None
triggered_by: str = "manual"
output: Optional[str] = None
failed_test_ids: List[str] = field(default_factory=list)
@dataclass
class CoverageReport:
"""Coverage-Bericht fuer einen Service"""
service: str
framework: TestFramework
line_coverage: float
branch_coverage: Optional[float] = None
function_coverage: Optional[float] = None
statement_coverage: Optional[float] = None
uncovered_files: List[str] = field(default_factory=list)
timestamp: datetime = field(default_factory=datetime.now)
@dataclass
class ServiceTestInfo:
"""Aggregierte Test-Informationen fuer einen Service"""
service: str
display_name: str
port: Optional[int] = None
language: str = "unknown"
total_tests: int = 0
passed_tests: int = 0
failed_tests: int = 0
skipped_tests: int = 0
pass_rate: float = 0.0
coverage_percent: Optional[float] = None
last_run: Optional[datetime] = None
status: TestStatus = TestStatus.PENDING
suites: List[TestSuite] = field(default_factory=list)
@dataclass
class TestRegistryStats:
"""Gesamtstatistik des Test-Registrys"""
total_tests: int = 0
total_passed: int = 0
total_failed: int = 0
total_skipped: int = 0
overall_pass_rate: float = 0.0
average_coverage: Optional[float] = None
services_count: int = 0
last_full_run: Optional[datetime] = None
by_category: Dict[str, int] = field(default_factory=dict)
by_framework: Dict[str, int] = field(default_factory=dict)
# Service-Definitionen mit Test-Informationen
SERVICE_DEFINITIONS = [
{
"service": "consent-service",
"display_name": "Consent Service",
"port": 8081,
"language": "go",
"base_path": "/consent-service",
"test_pattern": "*_test.go",
"framework": TestFramework.GO_TEST,
},
{
"service": "backend",
"display_name": "Python Backend",
"port": 8000,
"language": "python",
"base_path": "/backend/tests",
"test_pattern": "test_*.py",
"framework": TestFramework.PYTEST,
},
{
"service": "voice-service",
"display_name": "Voice Service",
"port": 8091,
"language": "python",
"base_path": "/app/tests",
"test_pattern": "test_*.py",
"framework": TestFramework.PYTEST,
"container_name": "breakpilot-pwa-voice-service",
"run_in_container": True,
"pytest_args": "--ignore=/app/tests/bqas", # Exclude BQAS tests - run separately
},
{
"service": "klausur-service",
"display_name": "Klausur Service",
"port": 8086,
"language": "python",
"base_path": "/app/tests",
"test_pattern": "test_*.py",
"framework": TestFramework.PYTEST,
"container_name": "breakpilot-pwa-klausur-service",
"run_in_container": True,
},
{
"service": "billing-service",
"display_name": "Billing Service",
"port": 8082,
"language": "go",
"base_path": "/billing-service",
"test_pattern": "*_test.go",
"framework": TestFramework.GO_TEST,
},
{
"service": "school-service",
"display_name": "School Service",
"port": 8084,
"language": "go",
"base_path": "/school-service",
"test_pattern": "*_test.go",
"framework": TestFramework.GO_TEST,
},
{
"service": "edu-search-service",
"display_name": "Edu Search Service",
"port": 8088,
"language": "go",
"base_path": "/edu-search-service",
"test_pattern": "*_test.go",
"framework": TestFramework.GO_TEST,
},
{
"service": "ai-compliance-sdk",
"display_name": "AI Compliance SDK",
"port": None,
"language": "go",
"base_path": "/ai-compliance-sdk",
"test_pattern": "*_test.go",
"framework": TestFramework.GO_TEST,
},
{
"service": "geo-service",
"display_name": "Geo Service",
"port": 8089,
"language": "mixed",
"base_path": "/geo-service",
"test_pattern": "*_test.go",
"framework": TestFramework.GO_TEST,
"disabled": True, # Keine Tests vorhanden - Verzeichnis ist leer
"disabled_reason": "Keine Test-Dateien vorhanden",
},
{
"service": "website",
"display_name": "Website (Jest)",
"port": 3000,
"language": "typescript",
"base_path": "/website",
"test_pattern": "*.test.{ts,tsx}",
"framework": TestFramework.JEST,
"requires_setup": True, # Erfordert npm install im Website-Verzeichnis
"setup_note": "Fuehren Sie 'npm install' im website-Verzeichnis aus, um Tests lokal auszufuehren",
},
# Website E2E entfernt - keine Playwright-Tests vorhanden
{
"service": "bqas-golden",
"display_name": "BQAS Golden Suite",
"port": 8091,
"language": "python",
"base_path": "/app/tests/bqas/test_golden.py",
"test_pattern": "test_*.py",
"framework": TestFramework.PYTEST,
"container_name": "breakpilot-pwa-voice-service",
"run_in_container": True,
},
{
"service": "bqas-rag",
"display_name": "BQAS RAG Tests",
"port": 8091,
"language": "python",
"base_path": "/app/tests/bqas/test_rag.py",
"test_pattern": "test_*.py",
"framework": TestFramework.PYTEST,
"container_name": "breakpilot-pwa-voice-service",
"run_in_container": True,
},
]

Some files were not shown because too many files have changed in this diff Show More