[split-required] Split final 43 files (500-668 LOC) to complete refactoring

klausur-service (11 files):
- cv_gutter_repair, ocr_pipeline_regression, upload_api
- ocr_pipeline_sessions, smart_spell, nru_worksheet_generator
- ocr_pipeline_overlays, mail/aggregator, zeugnis_api
- cv_syllable_detect, self_rag

backend-lehrer (17 files):
- classroom_engine/suggestions, generators/quiz_generator
- worksheets_api, llm_gateway/comparison, state_engine_api
- classroom/models (→ 4 submodules), services/file_processor
- alerts_agent/api/wizard+digests+routes, content_generators/pdf
- classroom/routes/sessions, llm_gateway/inference
- classroom_engine/analytics, auth/keycloak_auth
- alerts_agent/processing/rule_engine, ai_processor/print_versions

agent-core (5 files):
- brain/memory_store, brain/knowledge_graph, brain/context_manager
- orchestrator/supervisor, sessions/session_manager

admin-lehrer (5 components):
- GridOverlay, StepGridReview, DevOpsPipelineSidebar
- DataFlowDiagram, sbom/wizard/page

website (2 files):
- DependencyMap, lehrer/abitur-archiv

Other: nibis_ingestion, grid_detection_service, export-doclayout-onnx

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
Benjamin Admin
2026-04-25 09:41:42 +02:00
parent 451365a312
commit bd4b956e3c
113 changed files with 13790 additions and 14148 deletions

View File

@@ -10,66 +10,27 @@ Generiert:
import logging
import json
import re
from typing import List, Dict, Any, Optional, Tuple
from dataclasses import dataclass
from enum import Enum
from typing import List, Dict, Any, Optional
from .quiz_models import (
QuizType,
TrueFalseQuestion,
MatchingPair,
SortingItem,
OpenQuestion,
Quiz,
)
from .quiz_helpers import (
extract_factual_sentences,
negate_sentence,
extract_definitions,
extract_sequence,
extract_keywords,
)
logger = logging.getLogger(__name__)
class QuizType(str, Enum):
"""Typen von Quiz-Aufgaben."""
TRUE_FALSE = "true_false"
MATCHING = "matching"
SORTING = "sorting"
OPEN_ENDED = "open_ended"
@dataclass
class TrueFalseQuestion:
"""Eine Wahr/Falsch-Frage."""
statement: str
is_true: bool
explanation: str
source_reference: Optional[str] = None
@dataclass
class MatchingPair:
"""Ein Zuordnungspaar."""
left: str
right: str
hint: Optional[str] = None
@dataclass
class SortingItem:
"""Ein Element zum Sortieren."""
text: str
correct_position: int
category: Optional[str] = None
@dataclass
class OpenQuestion:
"""Eine offene Frage."""
question: str
model_answer: str
keywords: List[str]
points: int = 1
@dataclass
class Quiz:
"""Ein komplettes Quiz."""
quiz_type: QuizType
title: str
questions: List[Any] # Je nach Typ unterschiedlich
topic: Optional[str] = None
difficulty: str = "medium"
class QuizGenerator:
"""
Generiert verschiedene Quiz-Typen aus Quelltexten.
@@ -146,13 +107,12 @@ class QuizGenerator:
return self._generate_true_false_llm(source_text, num_questions, difficulty)
# Automatische Generierung
sentences = self._extract_factual_sentences(source_text)
sentences = extract_factual_sentences(source_text)
questions = []
for i, sentence in enumerate(sentences[:num_questions]):
# Abwechselnd wahre und falsche Aussagen
if i % 2 == 0:
# Wahre Aussage
questions.append(TrueFalseQuestion(
statement=sentence,
is_true=True,
@@ -160,8 +120,7 @@ class QuizGenerator:
source_reference=sentence[:50]
))
else:
# Falsche Aussage (Negation)
false_statement = self._negate_sentence(sentence)
false_statement = negate_sentence(sentence)
questions.append(TrueFalseQuestion(
statement=false_statement,
is_true=False,
@@ -222,9 +181,8 @@ Antworte im JSON-Format:
if self.llm_client:
return self._generate_matching_llm(source_text, num_pairs, difficulty)
# Automatische Generierung: Begriff -> Definition
pairs = []
definitions = self._extract_definitions(source_text)
definitions = extract_definitions(source_text)
for term, definition in definitions[:num_pairs]:
pairs.append(MatchingPair(
@@ -286,9 +244,8 @@ Antworte im JSON-Format:
if self.llm_client:
return self._generate_sorting_llm(source_text, num_items, difficulty)
# Automatische Generierung: Chronologische Reihenfolge
items = []
steps = self._extract_sequence(source_text)
steps = extract_sequence(source_text)
for i, step in enumerate(steps[:num_items]):
items.append(SortingItem(
@@ -349,9 +306,8 @@ Antworte im JSON-Format:
if self.llm_client:
return self._generate_open_ended_llm(source_text, num_questions, difficulty)
# Automatische Generierung
questions = []
sentences = self._extract_factual_sentences(source_text)
sentences = extract_factual_sentences(source_text)
question_starters = [
"Was bedeutet",
@@ -362,8 +318,7 @@ Antworte im JSON-Format:
]
for i, sentence in enumerate(sentences[:num_questions]):
# Extrahiere Schlüsselwort
keywords = self._extract_keywords(sentence)
keywords = extract_keywords(sentence)
if keywords:
keyword = keywords[0]
starter = question_starters[i % len(question_starters)]
@@ -421,76 +376,6 @@ Antworte im JSON-Format:
logger.error(f"LLM error: {e}")
return self._generate_open_ended(source_text, num_questions, difficulty)
# Hilfsmethoden
def _extract_factual_sentences(self, text: str) -> List[str]:
"""Extrahiert Fakten-Sätze aus dem Text."""
sentences = re.split(r'[.!?]+', text)
factual = []
for sentence in sentences:
sentence = sentence.strip()
# Filtere zu kurze oder fragende Sätze
if len(sentence) > 20 and '?' not in sentence:
factual.append(sentence)
return factual
def _negate_sentence(self, sentence: str) -> str:
"""Negiert eine Aussage einfach."""
# Einfache Negation durch Einfügen von "nicht"
words = sentence.split()
if len(words) > 2:
# Nach erstem Verb "nicht" einfügen
for i, word in enumerate(words):
if word.endswith(('t', 'en', 'st')) and i > 0:
words.insert(i + 1, 'nicht')
break
return ' '.join(words)
def _extract_definitions(self, text: str) -> List[Tuple[str, str]]:
"""Extrahiert Begriff-Definition-Paare."""
definitions = []
# Suche nach Mustern wie "X ist Y" oder "X bezeichnet Y"
patterns = [
r'(\w+)\s+ist\s+(.+?)[.]',
r'(\w+)\s+bezeichnet\s+(.+?)[.]',
r'(\w+)\s+bedeutet\s+(.+?)[.]',
r'(\w+):\s+(.+?)[.]',
]
for pattern in patterns:
matches = re.findall(pattern, text)
for term, definition in matches:
if len(definition) > 10:
definitions.append((term, definition.strip()))
return definitions
def _extract_sequence(self, text: str) -> List[str]:
"""Extrahiert eine Sequenz von Schritten."""
steps = []
# Suche nach nummerierten Schritten
numbered = re.findall(r'\d+[.)]\s*([^.]+)', text)
steps.extend(numbered)
# Suche nach Signalwörtern
signal_words = ['zuerst', 'dann', 'danach', 'anschließend', 'schließlich']
for word in signal_words:
pattern = rf'{word}\s+([^.]+)'
matches = re.findall(pattern, text, re.IGNORECASE)
steps.extend(matches)
return steps
def _extract_keywords(self, text: str) -> List[str]:
"""Extrahiert Schlüsselwörter."""
# Längere Wörter mit Großbuchstaben (meist Substantive)
words = re.findall(r'\b[A-ZÄÖÜ][a-zäöüß]+\b', text)
return list(set(words))[:5]
def _empty_quiz(self, quiz_type: QuizType, title: str) -> Quiz:
"""Erstellt leeres Quiz bei Fehler."""
return Quiz(
@@ -549,7 +434,6 @@ Antworte im JSON-Format:
return self._true_false_to_h5p(quiz)
elif quiz.quiz_type == QuizType.MATCHING:
return self._matching_to_h5p(quiz)
# Weitere Typen...
return {}
def _true_false_to_h5p(self, quiz: Quiz) -> Dict[str, Any]:

View File

@@ -0,0 +1,70 @@
"""
Quiz Helpers - Text-Verarbeitungs-Hilfsfunktionen fuer Quiz-Generierung.
"""
import re
from typing import List, Tuple
def extract_factual_sentences(text: str) -> List[str]:
"""Extrahiert Fakten-Sätze aus dem Text."""
sentences = re.split(r'[.!?]+', text)
factual = []
for sentence in sentences:
sentence = sentence.strip()
if len(sentence) > 20 and '?' not in sentence:
factual.append(sentence)
return factual
def negate_sentence(sentence: str) -> str:
"""Negiert eine Aussage einfach."""
words = sentence.split()
if len(words) > 2:
for i, word in enumerate(words):
if word.endswith(('t', 'en', 'st')) and i > 0:
words.insert(i + 1, 'nicht')
break
return ' '.join(words)
def extract_definitions(text: str) -> List[Tuple[str, str]]:
"""Extrahiert Begriff-Definition-Paare."""
definitions = []
patterns = [
r'(\w+)\s+ist\s+(.+?)[.]',
r'(\w+)\s+bezeichnet\s+(.+?)[.]',
r'(\w+)\s+bedeutet\s+(.+?)[.]',
r'(\w+):\s+(.+?)[.]',
]
for pattern in patterns:
matches = re.findall(pattern, text)
for term, definition in matches:
if len(definition) > 10:
definitions.append((term, definition.strip()))
return definitions
def extract_sequence(text: str) -> List[str]:
"""Extrahiert eine Sequenz von Schritten."""
steps = []
numbered = re.findall(r'\d+[.)]\s*([^.]+)', text)
steps.extend(numbered)
signal_words = ['zuerst', 'dann', 'danach', 'anschließend', 'schließlich']
for word in signal_words:
pattern = rf'{word}\s+([^.]+)'
matches = re.findall(pattern, text, re.IGNORECASE)
steps.extend(matches)
return steps
def extract_keywords(text: str) -> List[str]:
"""Extrahiert Schlüsselwörter."""
words = re.findall(r'\b[A-ZÄÖÜ][a-zäöüß]+\b', text)
return list(set(words))[:5]

View File

@@ -0,0 +1,65 @@
"""
Quiz Models - Datenmodelle fuer Quiz-Generierung.
Enthaelt alle Dataclasses und Enums fuer Quiz-Typen:
- True/False Fragen
- Zuordnungsaufgaben (Matching)
- Sortieraufgaben
- Offene Fragen
"""
from typing import List, Any, Optional
from dataclasses import dataclass
from enum import Enum
class QuizType(str, Enum):
"""Typen von Quiz-Aufgaben."""
TRUE_FALSE = "true_false"
MATCHING = "matching"
SORTING = "sorting"
OPEN_ENDED = "open_ended"
@dataclass
class TrueFalseQuestion:
"""Eine Wahr/Falsch-Frage."""
statement: str
is_true: bool
explanation: str
source_reference: Optional[str] = None
@dataclass
class MatchingPair:
"""Ein Zuordnungspaar."""
left: str
right: str
hint: Optional[str] = None
@dataclass
class SortingItem:
"""Ein Element zum Sortieren."""
text: str
correct_position: int
category: Optional[str] = None
@dataclass
class OpenQuestion:
"""Eine offene Frage."""
question: str
model_answer: str
keywords: List[str]
points: int = 1
@dataclass
class Quiz:
"""Ein komplettes Quiz."""
quiz_type: QuizType
title: str
questions: List[Any] # Je nach Typ unterschiedlich
topic: Optional[str] = None
difficulty: str = "medium"