Files
breakpilot-lehrer/klausur-service/backend/vocab_learn_bridge.py
Benjamin Admin 20a0585eb1
Some checks failed
CI / go-lint (push) Has been skipped
CI / python-lint (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / test-go-school (push) Successful in 44s
CI / test-go-edu-search (push) Successful in 51s
CI / test-python-klausur (push) Failing after 2m44s
CI / test-python-agent-core (push) Successful in 33s
CI / test-nodejs-website (push) Successful in 34s
Add interactive learning modules MVP (Phases 1-3.1)
New feature: After OCR vocabulary extraction, users can generate interactive
learning modules (flashcards, quiz, type trainer) with one click.

Frontend (studio-v2):
- Fortune Sheet spreadsheet editor tab in vocab-worksheet
- "Lernmodule generieren" button in ExportTab
- /learn page with unit overview and exercise type cards
- /learn/[unitId]/flashcards — Flip-card trainer with Leitner spaced repetition
- /learn/[unitId]/quiz — Multiple choice quiz with explanations
- /learn/[unitId]/type — Type-in trainer with Levenshtein distance feedback
- AudioButton component using Web Speech API for EN+DE TTS

Backend (klausur-service):
- vocab_learn_bridge.py: Converts VocabularyEntry[] to analysis_data format
- POST /sessions/{id}/generate-learning-unit endpoint

Backend (backend-lehrer):
- generate-qa, generate-mc, generate-cloze endpoints on learning units
- get-qa/mc/cloze data retrieval endpoints
- Leitner progress update + next review items endpoints

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-04-16 07:13:23 +02:00

197 lines
6.7 KiB
Python

"""
Vocab Learn Bridge — Converts vocabulary session data into Learning Units.
Bridges klausur-service (vocab extraction) with backend-lehrer (learning units + generators).
Creates a Learning Unit in backend-lehrer, then triggers MC/Cloze/QA generation.
DATENSCHUTZ: All communication stays within Docker network (breakpilot-network).
"""
import os
import json
import logging
import httpx
from typing import List, Dict, Any, Optional
logger = logging.getLogger(__name__)
BACKEND_LEHRER_URL = os.getenv("BACKEND_LEHRER_URL", "http://backend-lehrer:8001")
def vocab_to_analysis_data(session_name: str, vocabulary: List[Dict[str, Any]]) -> Dict[str, Any]:
"""
Convert vocabulary entries from a vocab session into the analysis_data format
expected by backend-lehrer generators (MC, Cloze, QA).
The generators consume:
- title: Display name
- subject: Subject area
- grade_level: Target grade
- canonical_text: Full text representation
- printed_blocks: Individual text blocks
- vocabulary: Original vocab data (for vocab-specific modules)
"""
canonical_lines = []
printed_blocks = []
for v in vocabulary:
en = v.get("english", "").strip()
de = v.get("german", "").strip()
example = v.get("example_sentence", "").strip()
if not en and not de:
continue
line = f"{en} = {de}"
if example:
line += f" ({example})"
canonical_lines.append(line)
block_text = f"{en}{de}"
if example:
block_text += f" | {example}"
printed_blocks.append({"text": block_text})
return {
"title": session_name,
"subject": "English Vocabulary",
"grade_level": "5-8",
"canonical_text": "\n".join(canonical_lines),
"printed_blocks": printed_blocks,
"vocabulary": vocabulary,
}
async def create_learning_unit(
session_name: str,
vocabulary: List[Dict[str, Any]],
grade: Optional[str] = None,
) -> Dict[str, Any]:
"""
Create a Learning Unit in backend-lehrer from vocabulary data.
Steps:
1. Create unit via POST /api/learning-units/
2. Return the created unit info
Returns dict with unit_id, status, vocabulary_count.
"""
if not vocabulary:
raise ValueError("No vocabulary entries provided")
analysis_data = vocab_to_analysis_data(session_name, vocabulary)
async with httpx.AsyncClient(timeout=30.0) as client:
# 1. Create Learning Unit
create_payload = {
"title": session_name,
"subject": "Englisch",
"grade": grade or "5-8",
}
try:
resp = await client.post(
f"{BACKEND_LEHRER_URL}/api/learning-units/",
json=create_payload,
)
resp.raise_for_status()
unit = resp.json()
except httpx.HTTPError as e:
logger.error(f"Failed to create learning unit: {e}")
raise RuntimeError(f"Backend-Lehrer nicht erreichbar: {e}")
unit_id = unit.get("id")
if not unit_id:
raise RuntimeError("Learning Unit created but no ID returned")
logger.info(f"Created learning unit {unit_id} with {len(vocabulary)} vocabulary entries")
# 2. Save analysis_data as JSON file for generators
analysis_dir = os.path.expanduser("~/Arbeitsblaetter/Lerneinheiten")
os.makedirs(analysis_dir, exist_ok=True)
analysis_path = os.path.join(analysis_dir, f"{unit_id}_analyse.json")
with open(analysis_path, "w", encoding="utf-8") as f:
json.dump(analysis_data, f, ensure_ascii=False, indent=2)
logger.info(f"Saved analysis data to {analysis_path}")
return {
"unit_id": unit_id,
"unit": unit,
"analysis_path": analysis_path,
"vocabulary_count": len(vocabulary),
"status": "created",
}
async def generate_learning_modules(
unit_id: str,
analysis_path: str,
) -> Dict[str, Any]:
"""
Trigger MC, Cloze, and QA generation from analysis data.
Imports generators directly (they run in-process for klausur-service)
or calls backend-lehrer API if generators aren't available locally.
Returns dict with generation results.
"""
results = {
"unit_id": unit_id,
"mc": {"status": "pending"},
"cloze": {"status": "pending"},
"qa": {"status": "pending"},
}
# Load analysis data
with open(analysis_path, "r", encoding="utf-8") as f:
analysis_data = json.load(f)
# Try to generate via backend-lehrer API
async with httpx.AsyncClient(timeout=120.0) as client:
# Generate QA (includes Leitner fields)
try:
resp = await client.post(
f"{BACKEND_LEHRER_URL}/api/learning-units/{unit_id}/generate-qa",
json={"analysis_data": analysis_data, "num_questions": min(len(analysis_data.get("vocabulary", [])), 20)},
)
if resp.status_code == 200:
results["qa"] = {"status": "generated", "data": resp.json()}
else:
logger.warning(f"QA generation returned {resp.status_code}")
results["qa"] = {"status": "skipped", "reason": f"HTTP {resp.status_code}"}
except Exception as e:
logger.warning(f"QA generation failed: {e}")
results["qa"] = {"status": "error", "reason": str(e)}
# Generate MC
try:
resp = await client.post(
f"{BACKEND_LEHRER_URL}/api/learning-units/{unit_id}/generate-mc",
json={"analysis_data": analysis_data, "num_questions": min(len(analysis_data.get("vocabulary", [])), 10)},
)
if resp.status_code == 200:
results["mc"] = {"status": "generated", "data": resp.json()}
else:
results["mc"] = {"status": "skipped", "reason": f"HTTP {resp.status_code}"}
except Exception as e:
logger.warning(f"MC generation failed: {e}")
results["mc"] = {"status": "error", "reason": str(e)}
# Generate Cloze
try:
resp = await client.post(
f"{BACKEND_LEHRER_URL}/api/learning-units/{unit_id}/generate-cloze",
json={"analysis_data": analysis_data},
)
if resp.status_code == 200:
results["cloze"] = {"status": "generated", "data": resp.json()}
else:
results["cloze"] = {"status": "skipped", "reason": f"HTTP {resp.status_code}"}
except Exception as e:
logger.warning(f"Cloze generation failed: {e}")
results["cloze"] = {"status": "error", "reason": str(e)}
return results