klausur-service (11 files): - cv_gutter_repair, ocr_pipeline_regression, upload_api - ocr_pipeline_sessions, smart_spell, nru_worksheet_generator - ocr_pipeline_overlays, mail/aggregator, zeugnis_api - cv_syllable_detect, self_rag backend-lehrer (17 files): - classroom_engine/suggestions, generators/quiz_generator - worksheets_api, llm_gateway/comparison, state_engine_api - classroom/models (→ 4 submodules), services/file_processor - alerts_agent/api/wizard+digests+routes, content_generators/pdf - classroom/routes/sessions, llm_gateway/inference - classroom_engine/analytics, auth/keycloak_auth - alerts_agent/processing/rule_engine, ai_processor/print_versions agent-core (5 files): - brain/memory_store, brain/knowledge_graph, brain/context_manager - orchestrator/supervisor, sessions/session_manager admin-lehrer (5 components): - GridOverlay, StepGridReview, DevOpsPipelineSidebar - DataFlowDiagram, sbom/wizard/page website (2 files): - DependencyMap, lehrer/abitur-archiv Other: nibis_ingestion, grid_detection_service, export-doclayout-onnx Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
71 lines
1.9 KiB
Python
71 lines
1.9 KiB
Python
"""
|
|
Quiz Helpers - Text-Verarbeitungs-Hilfsfunktionen fuer Quiz-Generierung.
|
|
"""
|
|
|
|
import re
|
|
from typing import List, Tuple
|
|
|
|
|
|
def extract_factual_sentences(text: str) -> List[str]:
|
|
"""Extrahiert Fakten-Sätze aus dem Text."""
|
|
sentences = re.split(r'[.!?]+', text)
|
|
factual = []
|
|
|
|
for sentence in sentences:
|
|
sentence = sentence.strip()
|
|
if len(sentence) > 20 and '?' not in sentence:
|
|
factual.append(sentence)
|
|
|
|
return factual
|
|
|
|
|
|
def negate_sentence(sentence: str) -> str:
|
|
"""Negiert eine Aussage einfach."""
|
|
words = sentence.split()
|
|
if len(words) > 2:
|
|
for i, word in enumerate(words):
|
|
if word.endswith(('t', 'en', 'st')) and i > 0:
|
|
words.insert(i + 1, 'nicht')
|
|
break
|
|
return ' '.join(words)
|
|
|
|
|
|
def extract_definitions(text: str) -> List[Tuple[str, str]]:
|
|
"""Extrahiert Begriff-Definition-Paare."""
|
|
definitions = []
|
|
patterns = [
|
|
r'(\w+)\s+ist\s+(.+?)[.]',
|
|
r'(\w+)\s+bezeichnet\s+(.+?)[.]',
|
|
r'(\w+)\s+bedeutet\s+(.+?)[.]',
|
|
r'(\w+):\s+(.+?)[.]',
|
|
]
|
|
|
|
for pattern in patterns:
|
|
matches = re.findall(pattern, text)
|
|
for term, definition in matches:
|
|
if len(definition) > 10:
|
|
definitions.append((term, definition.strip()))
|
|
|
|
return definitions
|
|
|
|
|
|
def extract_sequence(text: str) -> List[str]:
|
|
"""Extrahiert eine Sequenz von Schritten."""
|
|
steps = []
|
|
numbered = re.findall(r'\d+[.)]\s*([^.]+)', text)
|
|
steps.extend(numbered)
|
|
|
|
signal_words = ['zuerst', 'dann', 'danach', 'anschließend', 'schließlich']
|
|
for word in signal_words:
|
|
pattern = rf'{word}\s+([^.]+)'
|
|
matches = re.findall(pattern, text, re.IGNORECASE)
|
|
steps.extend(matches)
|
|
|
|
return steps
|
|
|
|
|
|
def extract_keywords(text: str) -> List[str]:
|
|
"""Extrahiert Schlüsselwörter."""
|
|
words = re.findall(r'\b[A-ZÄÖÜ][a-zäöüß]+\b', text)
|
|
return list(set(words))[:5]
|