""" Vocab Learn Bridge — Converts vocabulary session data into Learning Units. Bridges klausur-service (vocab extraction) with backend-lehrer (learning units + generators). Creates a Learning Unit in backend-lehrer, then triggers MC/Cloze/QA generation. DATENSCHUTZ: All communication stays within Docker network (breakpilot-network). """ import os import json import logging import httpx from typing import List, Dict, Any, Optional logger = logging.getLogger(__name__) BACKEND_LEHRER_URL = os.getenv("BACKEND_LEHRER_URL", "http://backend-lehrer:8001") def vocab_to_analysis_data(session_name: str, vocabulary: List[Dict[str, Any]]) -> Dict[str, Any]: """ Convert vocabulary entries from a vocab session into the analysis_data format expected by backend-lehrer generators (MC, Cloze, QA). The generators consume: - title: Display name - subject: Subject area - grade_level: Target grade - canonical_text: Full text representation - printed_blocks: Individual text blocks - vocabulary: Original vocab data (for vocab-specific modules) """ canonical_lines = [] printed_blocks = [] for v in vocabulary: en = v.get("english", "").strip() de = v.get("german", "").strip() example = v.get("example_sentence", "").strip() if not en and not de: continue line = f"{en} = {de}" if example: line += f" ({example})" canonical_lines.append(line) block_text = f"{en} — {de}" if example: block_text += f" | {example}" printed_blocks.append({"text": block_text}) return { "title": session_name, "subject": "English Vocabulary", "grade_level": "5-8", "canonical_text": "\n".join(canonical_lines), "printed_blocks": printed_blocks, "vocabulary": vocabulary, } async def create_learning_unit( session_name: str, vocabulary: List[Dict[str, Any]], grade: Optional[str] = None, ) -> Dict[str, Any]: """ Create a Learning Unit in backend-lehrer from vocabulary data. Steps: 1. Create unit via POST /api/learning-units/ 2. Return the created unit info Returns dict with unit_id, status, vocabulary_count. """ if not vocabulary: raise ValueError("No vocabulary entries provided") analysis_data = vocab_to_analysis_data(session_name, vocabulary) async with httpx.AsyncClient(timeout=30.0) as client: # 1. Create Learning Unit create_payload = { "title": session_name, "subject": "Englisch", "grade": grade or "5-8", } try: resp = await client.post( f"{BACKEND_LEHRER_URL}/api/learning-units/", json=create_payload, ) resp.raise_for_status() unit = resp.json() except httpx.HTTPError as e: logger.error(f"Failed to create learning unit: {e}") raise RuntimeError(f"Backend-Lehrer nicht erreichbar: {e}") unit_id = unit.get("id") if not unit_id: raise RuntimeError("Learning Unit created but no ID returned") logger.info(f"Created learning unit {unit_id} with {len(vocabulary)} vocabulary entries") # 2. Save analysis_data as JSON file for generators analysis_dir = os.path.expanduser("~/Arbeitsblaetter/Lerneinheiten") os.makedirs(analysis_dir, exist_ok=True) analysis_path = os.path.join(analysis_dir, f"{unit_id}_analyse.json") with open(analysis_path, "w", encoding="utf-8") as f: json.dump(analysis_data, f, ensure_ascii=False, indent=2) logger.info(f"Saved analysis data to {analysis_path}") return { "unit_id": unit_id, "unit": unit, "analysis_path": analysis_path, "vocabulary_count": len(vocabulary), "status": "created", } async def generate_learning_modules( unit_id: str, analysis_path: str, ) -> Dict[str, Any]: """ Trigger MC, Cloze, and QA generation from analysis data. Imports generators directly (they run in-process for klausur-service) or calls backend-lehrer API if generators aren't available locally. Returns dict with generation results. """ results = { "unit_id": unit_id, "mc": {"status": "pending"}, "cloze": {"status": "pending"}, "qa": {"status": "pending"}, } # Load analysis data with open(analysis_path, "r", encoding="utf-8") as f: analysis_data = json.load(f) # Try to generate via backend-lehrer API async with httpx.AsyncClient(timeout=120.0) as client: # Generate QA (includes Leitner fields) try: resp = await client.post( f"{BACKEND_LEHRER_URL}/api/learning-units/{unit_id}/generate-qa", json={"analysis_data": analysis_data, "num_questions": min(len(analysis_data.get("vocabulary", [])), 20)}, ) if resp.status_code == 200: results["qa"] = {"status": "generated", "data": resp.json()} else: logger.warning(f"QA generation returned {resp.status_code}") results["qa"] = {"status": "skipped", "reason": f"HTTP {resp.status_code}"} except Exception as e: logger.warning(f"QA generation failed: {e}") results["qa"] = {"status": "error", "reason": str(e)} # Generate MC try: resp = await client.post( f"{BACKEND_LEHRER_URL}/api/learning-units/{unit_id}/generate-mc", json={"analysis_data": analysis_data, "num_questions": min(len(analysis_data.get("vocabulary", [])), 10)}, ) if resp.status_code == 200: results["mc"] = {"status": "generated", "data": resp.json()} else: results["mc"] = {"status": "skipped", "reason": f"HTTP {resp.status_code}"} except Exception as e: logger.warning(f"MC generation failed: {e}") results["mc"] = {"status": "error", "reason": str(e)} # Generate Cloze try: resp = await client.post( f"{BACKEND_LEHRER_URL}/api/learning-units/{unit_id}/generate-cloze", json={"analysis_data": analysis_data}, ) if resp.status_code == 200: results["cloze"] = {"status": "generated", "data": resp.json()} else: results["cloze"] = {"status": "skipped", "reason": f"HTTP {resp.status_code}"} except Exception as e: logger.warning(f"Cloze generation failed: {e}") results["cloze"] = {"status": "error", "reason": str(e)} return results