fix: Restore all files lost during destructive rebase
A previous `git pull --rebase origin main` dropped 177 local commits,
losing 3400+ files across admin-v2, backend, studio-v2, website,
klausur-service, and many other services. The partial restore attempt
(660295e2) only recovered some files.
This commit restores all missing files from pre-rebase ref 98933f5e
while preserving post-rebase additions (night-scheduler, night-mode UI,
NightModeWidget dashboard integration).
Restored features include:
- AI Module Sidebar (FAB), OCR Labeling, OCR Compare
- GPU Dashboard, RAG Pipeline, Magic Help
- Klausur-Korrektur (8 files), Abitur-Archiv (5+ files)
- Companion, Zeugnisse-Crawler, Screen Flow
- Full backend, studio-v2, website, klausur-service
- All compliance SDKs, agent-core, voice-service
- CI/CD configs, documentation, scripts
Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
209
backend/ai_processing/analysis.py
Normal file
209
backend/ai_processing/analysis.py
Normal file
@@ -0,0 +1,209 @@
|
||||
"""
|
||||
AI Processing - Worksheet Analysis.
|
||||
|
||||
Strukturierte Analyse von Arbeitsblättern mit OpenAI oder Claude.
|
||||
"""
|
||||
|
||||
from pathlib import Path
|
||||
import json
|
||||
import requests
|
||||
import logging
|
||||
|
||||
from .core import (
|
||||
get_openai_api_key,
|
||||
encode_image_to_data_url,
|
||||
BEREINIGT_DIR,
|
||||
get_vision_api,
|
||||
)
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def describe_scan_with_ai(input_path: Path) -> Path:
|
||||
"""Vision-Modell gibt eine kurze Beschreibung des Arbeitsblatts zurück."""
|
||||
if not input_path.exists():
|
||||
raise FileNotFoundError(f"Eingabedatei nicht gefunden: {input_path}")
|
||||
|
||||
api_key = get_openai_api_key()
|
||||
image_data_url = encode_image_to_data_url(input_path)
|
||||
|
||||
url = "https://api.openai.com/v1/chat/completions"
|
||||
headers = {"Authorization": f"Bearer {api_key}", "Content-Type": "application/json"}
|
||||
payload = {
|
||||
"model": "gpt-4o-mini",
|
||||
"messages": [
|
||||
{
|
||||
"role": "system",
|
||||
"content": "Du bist ein hilfreicher Assistent, der Schul-Arbeitsblätter knapp beschreibt.",
|
||||
},
|
||||
{
|
||||
"role": "user",
|
||||
"content": [
|
||||
{
|
||||
"type": "text",
|
||||
"text": (
|
||||
"Beschreibe dieses Arbeitsblatt knapp: Thema, Art der Aufgaben "
|
||||
"(z.B. Lückentext, Multiple Choice, Rechenaufgaben) und groben Inhalt."
|
||||
),
|
||||
},
|
||||
{"type": "image_url", "image_url": {"url": image_data_url}},
|
||||
],
|
||||
},
|
||||
],
|
||||
"max_tokens": 400,
|
||||
}
|
||||
|
||||
response = requests.post(url, headers=headers, json=payload)
|
||||
response.raise_for_status()
|
||||
data = response.json()
|
||||
|
||||
try:
|
||||
description = data["choices"][0]["message"]["content"]
|
||||
except Exception as e:
|
||||
raise RuntimeError(f"Unerwartete Antwortstruktur von der KI: {e}\nAntwort: {data}") from e
|
||||
|
||||
out_name = input_path.stem + "_beschreibung.txt"
|
||||
out_path = BEREINIGT_DIR / out_name
|
||||
out_path.write_text(description, encoding="utf-8")
|
||||
return out_path
|
||||
|
||||
|
||||
def _get_analysis_system_prompt() -> str:
|
||||
"""Gibt den System-Prompt für die Arbeitsblatt-Analyse zurück."""
|
||||
return (
|
||||
"Du bist ein Experte für die Analyse von Schul-Arbeitsblättern.\n\n"
|
||||
"HAUPTAUFGABEN:\n"
|
||||
"1. Erkenne ALLE gedruckten Elemente: Text, Überschriften, Tabellen, Linien, Kästchen, Diagramme, Illustrationen\n"
|
||||
"2. Identifiziere ALLE handschriftlichen Ergänzungen: Antworten, Zahlen, Buchstaben, Notizen, Zeichnungen\n"
|
||||
"3. Bestimme präzise Positionen (Bounding Boxes in Pixeln) für JEDES Element\n\n"
|
||||
"KRITISCH - DIAGRAMME & ILLUSTRATIONEN:\n"
|
||||
"- Suche aktiv nach: anatomischen Zeichnungen, beschrifteten Diagrammen, Grafiken, Tabellen, Skizzen\n"
|
||||
"- Wenn du irgendeine bildliche Darstellung siehst (z.B. Auge, Pflanze, Karte, Schaubild), setze 'has_diagram: true'\n"
|
||||
"- Für JEDES visuelle Element: Erstelle einen Eintrag in 'diagram_elements' mit genauer Position\n"
|
||||
"- Beschrifte-Linien (von Beschriftung zu Bildteil) gehören zum Diagramm!\n\n"
|
||||
"HANDSCHRIFT ERKENNUNG:\n"
|
||||
"- Unterscheide gedruckt vs. handgeschrieben anhand der Schriftart\n"
|
||||
"- Klassifiziere Farbe: blau/schwarz/rot/pencil (Bleistift)\n"
|
||||
"- Durchgestrichene Wörter separat auflisten\n\n"
|
||||
"AUSGABE: Gib deine Antwort AUSSCHLIESSLICH als gültiges JSON zurück (kein Markdown, keine Code-Blöcke)."
|
||||
)
|
||||
|
||||
|
||||
def _analyze_with_openai(input_path: Path) -> Path:
|
||||
"""Strukturierte JSON-Analyse des Arbeitsblatts mit OpenAI."""
|
||||
if not input_path.exists():
|
||||
raise FileNotFoundError(f"Eingabedatei nicht gefunden: {input_path}")
|
||||
|
||||
api_key = get_openai_api_key()
|
||||
image_data_url = encode_image_to_data_url(input_path)
|
||||
|
||||
url = "https://api.openai.com/v1/chat/completions"
|
||||
headers = {"Authorization": f"Bearer {api_key}", "Content-Type": "application/json"}
|
||||
|
||||
system_prompt = _get_analysis_system_prompt()
|
||||
|
||||
user_text = '''Analysiere dieses Arbeitsblatt und gib ein JSON mit folgendem Aufbau zurück:
|
||||
|
||||
{
|
||||
"title": string | null,
|
||||
"subject": string | null,
|
||||
"grade_level": string | null,
|
||||
"instructions": string | null,
|
||||
"canonical_text": string,
|
||||
"printed_blocks": [...],
|
||||
"handwritten_annotations": [...],
|
||||
"struck_through_words": [...],
|
||||
"tasks": [...],
|
||||
"has_diagram": boolean,
|
||||
"diagram_elements": [...]
|
||||
}'''
|
||||
|
||||
payload = {
|
||||
"model": "gpt-4o-mini",
|
||||
"messages": [
|
||||
{"role": "system", "content": system_prompt},
|
||||
{
|
||||
"role": "user",
|
||||
"content": [
|
||||
{"type": "text", "text": user_text},
|
||||
{"type": "image_url", "image_url": {"url": image_data_url}},
|
||||
],
|
||||
},
|
||||
],
|
||||
"max_tokens": 2500,
|
||||
}
|
||||
|
||||
response = requests.post(url, headers=headers, json=payload)
|
||||
response.raise_for_status()
|
||||
data = response.json()
|
||||
|
||||
try:
|
||||
raw_content = data["choices"][0]["message"]["content"]
|
||||
# JSON-Block extrahieren falls in Markdown eingebettet
|
||||
if "```json" in raw_content:
|
||||
raw_content = raw_content.split("```json")[1].split("```")[0].strip()
|
||||
elif "```" in raw_content:
|
||||
raw_content = raw_content.split("```")[1].split("```")[0].strip()
|
||||
obj = json.loads(raw_content)
|
||||
except json.JSONDecodeError as e:
|
||||
raise RuntimeError(f"KI hat kein valides JSON zurückgegeben: {e}\nAntwort: {raw_content}") from e
|
||||
except Exception as e:
|
||||
raise RuntimeError(f"Unerwartete Antwortstruktur: {e}\nAntwort: {data}") from e
|
||||
|
||||
out_name = input_path.stem + "_analyse.json"
|
||||
out_path = BEREINIGT_DIR / out_name
|
||||
out_path.write_text(json.dumps(obj, ensure_ascii=False, indent=2), encoding="utf-8")
|
||||
return out_path
|
||||
|
||||
|
||||
def _analyze_with_claude(input_path: Path) -> Path:
|
||||
"""Strukturierte JSON-Analyse mit Claude Vision API."""
|
||||
from claude_vision import analyze_worksheet_with_claude
|
||||
|
||||
if not input_path.exists():
|
||||
raise FileNotFoundError(f"Eingabedatei nicht gefunden: {input_path}")
|
||||
|
||||
logger.info(f"Analyzing with Claude Vision: {input_path.name}")
|
||||
|
||||
try:
|
||||
analysis_data = analyze_worksheet_with_claude(input_path, max_tokens=2500)
|
||||
|
||||
out_name = input_path.stem + "_analyse.json"
|
||||
out_path = BEREINIGT_DIR / out_name
|
||||
out_path.write_text(
|
||||
json.dumps(analysis_data, ensure_ascii=False, indent=2),
|
||||
encoding="utf-8"
|
||||
)
|
||||
|
||||
logger.info(f"Claude analysis saved: {out_path.name}")
|
||||
return out_path
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Claude analysis failed: {e}")
|
||||
raise
|
||||
|
||||
|
||||
def analyze_scan_structure_with_ai(input_path: Path) -> Path:
|
||||
"""
|
||||
Strukturierte JSON-Analyse des Arbeitsblatts (Hybrid-Modus).
|
||||
|
||||
Verwendet die in VISION_API konfigurierte API:
|
||||
- "claude" (Standard): Claude 3.5 Sonnet - bessere OCR, Layout-Erkennung
|
||||
- "openai": OpenAI GPT-4o-mini - günstiger, schneller
|
||||
"""
|
||||
vision_api = get_vision_api()
|
||||
logger.info(f"Using Vision API: {vision_api}")
|
||||
|
||||
if vision_api == "claude":
|
||||
try:
|
||||
return _analyze_with_claude(input_path)
|
||||
except Exception as e:
|
||||
logger.warning(f"Claude failed, falling back to OpenAI: {e}")
|
||||
return _analyze_with_openai(input_path)
|
||||
|
||||
elif vision_api == "openai":
|
||||
return _analyze_with_openai(input_path)
|
||||
|
||||
else:
|
||||
logger.warning(f"Unknown VISION_API '{vision_api}', using Claude as default")
|
||||
return _analyze_with_claude(input_path)
|
||||
Reference in New Issue
Block a user