This repository has been archived on 2026-02-15. You can view files and clone it. You cannot open issues or pull requests or push a commit.
Files
breakpilot-pwa/backend/ai_processing/mc_generator.py
BreakPilot Dev 19855efacc
Some checks failed
Tests / Go Tests (push) Has been cancelled
Tests / Python Tests (push) Has been cancelled
Tests / Integration Tests (push) Has been cancelled
Tests / Go Lint (push) Has been cancelled
Tests / Python Lint (push) Has been cancelled
Tests / Security Scan (push) Has been cancelled
Tests / All Checks Passed (push) Has been cancelled
Security Scanning / Secret Scanning (push) Has been cancelled
Security Scanning / Dependency Vulnerability Scan (push) Has been cancelled
Security Scanning / Go Security Scan (push) Has been cancelled
Security Scanning / Python Security Scan (push) Has been cancelled
Security Scanning / Node.js Security Scan (push) Has been cancelled
Security Scanning / Docker Image Security (push) Has been cancelled
Security Scanning / Security Summary (push) Has been cancelled
CI/CD Pipeline / Go Tests (push) Has been cancelled
CI/CD Pipeline / Python Tests (push) Has been cancelled
CI/CD Pipeline / Website Tests (push) Has been cancelled
CI/CD Pipeline / Linting (push) Has been cancelled
CI/CD Pipeline / Security Scan (push) Has been cancelled
CI/CD Pipeline / Docker Build & Push (push) Has been cancelled
CI/CD Pipeline / Integration Tests (push) Has been cancelled
CI/CD Pipeline / Deploy to Staging (push) Has been cancelled
CI/CD Pipeline / Deploy to Production (push) Has been cancelled
CI/CD Pipeline / CI Summary (push) Has been cancelled
ci/woodpecker/manual/build-ci-image Pipeline was successful
ci/woodpecker/manual/main Pipeline failed
feat: BreakPilot PWA - Full codebase (clean push without large binaries)
All services: admin-v2, studio-v2, website, ai-compliance-sdk,
consent-service, klausur-service, voice-service, and infrastructure.
Large PDFs and compiled binaries excluded via .gitignore.
2026-02-11 13:25:58 +01:00

317 lines
9.6 KiB
Python

"""
AI Processing - Multiple Choice Generator.
Generiert Multiple-Choice-Fragen aus Arbeitsblatt-Analysen.
"""
from pathlib import Path
import json
import random
import os
import requests
import logging
from .core import (
get_openai_api_key,
get_vision_api,
BEREINIGT_DIR,
)
logger = logging.getLogger(__name__)
def _generate_mc_with_openai(analysis_data: dict, num_questions: int = 5) -> dict:
"""
Generiert Multiple-Choice-Fragen basierend auf der Arbeitsblatt-Analyse.
Verwendet OpenAI GPT-4o-mini für die Generierung.
Schwierigkeitsgrad entspricht dem Original (grade_level aus Analyse).
"""
api_key = get_openai_api_key()
# Extrahiere relevante Inhalte aus der Analyse
title = analysis_data.get("title") or "Arbeitsblatt"
subject = analysis_data.get("subject") or "Allgemein"
grade_level = analysis_data.get("grade_level") or "unbekannt"
canonical_text = analysis_data.get("canonical_text") or ""
printed_blocks = analysis_data.get("printed_blocks") or []
# Baue den Textinhalt zusammen
content_parts = []
if canonical_text:
content_parts.append(canonical_text)
for block in printed_blocks:
text = block.get("text", "").strip()
if text and text not in content_parts:
content_parts.append(text)
worksheet_content = "\n\n".join(content_parts)
if not worksheet_content.strip():
logger.warning("Kein Textinhalt für MC-Generierung gefunden")
return {"questions": [], "metadata": {"error": "Kein Textinhalt gefunden"}}
url = "https://api.openai.com/v1/chat/completions"
headers = {"Authorization": f"Bearer {api_key}", "Content-Type": "application/json"}
system_prompt = f"""Du bist ein erfahrener Pädagoge, der Multiple-Choice-Fragen für Schüler erstellt.
WICHTIGE REGELN:
1. SCHWIERIGKEITSGRAD: Die Fragen müssen exakt dem Niveau "{grade_level}" entsprechen.
- Nicht zu leicht, nicht zu schwer
- Passend für das angegebene Klassenniveau
2. INHALTSTREUE: Alle Fragen müssen sich direkt auf den gegebenen Text beziehen.
- Keine Fragen zu Themen, die nicht im Text vorkommen
- Die richtige Antwort muss aus dem Text ableitbar sein
3. QUALITÄT DER DISTRAKTOREN (falsche Antworten):
- Müssen plausibel klingen
- Dürfen nicht offensichtlich falsch sein
- Sollten typische Schüler-Missverständnisse widerspiegeln
4. AUSGABEFORMAT: Gib deine Antwort AUSSCHLIESSLICH als gültiges JSON zurück."""
user_prompt = f"""Erstelle {num_questions} Multiple-Choice-Fragen basierend auf diesem Arbeitsblatt:
TITEL: {title}
FACH: {subject}
KLASSENSTUFE: {grade_level}
INHALT DES ARBEITSBLATTS:
{worksheet_content}
Gib die Fragen als JSON zurück:
{{
"questions": [
{{
"id": "q1",
"question": "Die Fragestellung hier",
"options": [
{{"id": "a", "text": "Antwort A"}},
{{"id": "b", "text": "Antwort B"}},
{{"id": "c", "text": "Antwort C"}},
{{"id": "d", "text": "Antwort D"}}
],
"correct_answer": "a",
"explanation": "Kurze Erklärung warum diese Antwort richtig ist"
}}
],
"metadata": {{
"subject": "{subject}",
"grade_level": "{grade_level}",
"source_title": "{title}",
"num_questions": {num_questions}
}}
}}"""
payload = {
"model": "gpt-4o-mini",
"response_format": {"type": "json_object"},
"messages": [
{"role": "system", "content": system_prompt},
{"role": "user", "content": user_prompt},
],
"max_tokens": 2000,
"temperature": 0.7,
}
response = requests.post(url, headers=headers, json=payload)
response.raise_for_status()
data = response.json()
try:
content = data["choices"][0]["message"]["content"]
mc_data = json.loads(content)
except (KeyError, json.JSONDecodeError) as e:
raise RuntimeError(f"Fehler bei MC-Generierung: {e}")
return mc_data
def _generate_mc_with_claude(analysis_data: dict, num_questions: int = 5) -> dict:
"""
Generiert Multiple-Choice-Fragen mit Claude API.
"""
import anthropic
api_key = os.getenv("ANTHROPIC_API_KEY")
if not api_key:
raise RuntimeError("ANTHROPIC_API_KEY ist nicht gesetzt.")
client = anthropic.Anthropic(api_key=api_key)
# Extrahiere relevante Inhalte
title = analysis_data.get("title") or "Arbeitsblatt"
subject = analysis_data.get("subject") or "Allgemein"
grade_level = analysis_data.get("grade_level") or "unbekannt"
canonical_text = analysis_data.get("canonical_text") or ""
printed_blocks = analysis_data.get("printed_blocks") or []
content_parts = []
if canonical_text:
content_parts.append(canonical_text)
for block in printed_blocks:
text = block.get("text", "").strip()
if text and text not in content_parts:
content_parts.append(text)
worksheet_content = "\n\n".join(content_parts)
if not worksheet_content.strip():
return {"questions": [], "metadata": {"error": "Kein Textinhalt gefunden"}}
prompt = f"""Erstelle {num_questions} Multiple-Choice-Fragen basierend auf diesem Arbeitsblatt.
WICHTIGE REGELN:
1. SCHWIERIGKEITSGRAD: Exakt Niveau "{grade_level}" - nicht leichter, nicht schwerer
2. INHALTSTREUE: Nur Fragen zum gegebenen Text
3. QUALITÄT: Plausible Distraktoren (falsche Antworten)
TITEL: {title}
FACH: {subject}
KLASSENSTUFE: {grade_level}
INHALT:
{worksheet_content}
Antworte NUR mit diesem JSON-Format:
{{
"questions": [
{{
"id": "q1",
"question": "Fragestellung",
"options": [
{{"id": "a", "text": "Antwort A"}},
{{"id": "b", "text": "Antwort B"}},
{{"id": "c", "text": "Antwort C"}},
{{"id": "d", "text": "Antwort D"}}
],
"correct_answer": "a",
"explanation": "Erklärung"
}}
],
"metadata": {{
"subject": "{subject}",
"grade_level": "{grade_level}",
"source_title": "{title}",
"num_questions": {num_questions}
}}
}}"""
message = client.messages.create(
model="claude-3-5-sonnet-20241022",
max_tokens=2000,
messages=[{"role": "user", "content": prompt}]
)
content = message.content[0].text
# Versuche JSON zu extrahieren
try:
# Falls in Code-Block eingebettet
if "```json" in content:
content = content.split("```json")[1].split("```")[0]
elif "```" in content:
content = content.split("```")[1].split("```")[0]
mc_data = json.loads(content.strip())
except json.JSONDecodeError as e:
raise RuntimeError(f"Claude hat ungültiges JSON geliefert: {e}")
return mc_data
def _shuffle_mc_options(mc_data: dict) -> dict:
"""
Mischt die Antwort-Optionen jeder Frage zufällig durch.
Aktualisiert auch correct_answer entsprechend.
Dies stellt sicher, dass die richtige Antwort nicht immer an der gleichen Position steht.
"""
if "questions" not in mc_data:
return mc_data
for question in mc_data["questions"]:
options = question.get("options", [])
correct_id = question.get("correct_answer")
if not options or not correct_id:
continue
# Finde den Text der richtigen Antwort
correct_text = None
for opt in options:
if opt.get("id") == correct_id:
correct_text = opt.get("text")
break
# Mische die Optionen
random.shuffle(options)
# Vergebe neue IDs (a, b, c, d) und finde neue Position der richtigen Antwort
new_ids = ["a", "b", "c", "d"]
new_correct = None
for i, opt in enumerate(options):
if i < len(new_ids):
if opt.get("text") == correct_text:
new_correct = new_ids[i]
opt["id"] = new_ids[i]
if new_correct:
question["correct_answer"] = new_correct
question["options"] = options
return mc_data
def generate_mc_from_analysis(analysis_path: Path, num_questions: int = 5) -> Path:
"""
Generiert Multiple-Choice-Fragen aus einer Analyse-JSON-Datei.
Die Fragen werden:
- Basierend auf dem extrahierten Text erstellt
- Auf dem Schwierigkeitsniveau des Originals gehalten
- Mit zufällig angeordneten Antworten versehen
Args:
analysis_path: Pfad zur *_analyse.json Datei
num_questions: Anzahl der zu generierenden Fragen (Standard: 5)
Returns:
Pfad zur generierten *_mc.json Datei
"""
if not analysis_path.exists():
raise FileNotFoundError(f"Analysedatei nicht gefunden: {analysis_path}")
try:
analysis_data = json.loads(analysis_path.read_text(encoding="utf-8"))
except json.JSONDecodeError as e:
raise RuntimeError(f"Ungültige Analyse-JSON: {e}")
logger.info(f"Generiere MC-Fragen für: {analysis_path.name}")
vision_api = get_vision_api()
# Generiere MC-Fragen (nutze konfigurierte API)
if vision_api == "claude":
try:
mc_data = _generate_mc_with_claude(analysis_data, num_questions)
except Exception as e:
logger.warning(f"Claude MC-Generierung fehlgeschlagen, nutze OpenAI: {e}")
mc_data = _generate_mc_with_openai(analysis_data, num_questions)
else:
mc_data = _generate_mc_with_openai(analysis_data, num_questions)
# Mische die Antwort-Positionen durch
mc_data = _shuffle_mc_options(mc_data)
# Speichere MC-Daten
out_name = analysis_path.stem.replace("_analyse", "") + "_mc.json"
out_path = BEREINIGT_DIR / out_name
out_path.write_text(json.dumps(mc_data, ensure_ascii=False, indent=2), encoding="utf-8")
logger.info(f"MC-Fragen gespeichert: {out_path.name}")
return out_path