This repository has been archived on 2026-02-15. You can view files and clone it. You cannot open issues or pull requests or push a commit.
Files
breakpilot-pwa/backend/claude_vision.py
Benjamin Admin 21a844cb8a fix: Restore all files lost during destructive rebase
A previous `git pull --rebase origin main` dropped 177 local commits,
losing 3400+ files across admin-v2, backend, studio-v2, website,
klausur-service, and many other services. The partial restore attempt
(660295e2) only recovered some files.

This commit restores all missing files from pre-rebase ref 98933f5e
while preserving post-rebase additions (night-scheduler, night-mode UI,
NightModeWidget dashboard integration).

Restored features include:
- AI Module Sidebar (FAB), OCR Labeling, OCR Compare
- GPU Dashboard, RAG Pipeline, Magic Help
- Klausur-Korrektur (8 files), Abitur-Archiv (5+ files)
- Companion, Zeugnisse-Crawler, Screen Flow
- Full backend, studio-v2, website, klausur-service
- All compliance SDKs, agent-core, voice-service
- CI/CD configs, documentation, scripts

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-09 09:51:32 +01:00

300 lines
9.0 KiB
Python

"""
Claude Vision API Integration for Worksheet Analysis
Uses Anthropic's Claude 3.5 Sonnet for superior OCR and layout understanding.
"""
import os
import base64
import json
from pathlib import Path
from typing import Dict, Optional
import logging
logger = logging.getLogger(__name__)
# Try to import Anthropic SDK
try:
from anthropic import Anthropic
ANTHROPIC_AVAILABLE = True
except ImportError:
ANTHROPIC_AVAILABLE = False
logger.warning("Anthropic SDK not installed. Run: pip install anthropic")
def _get_anthropic_api_key() -> str:
"""Get Anthropic API key from environment variable"""
api_key = os.getenv("ANTHROPIC_API_KEY")
if not api_key:
raise RuntimeError(
"ANTHROPIC_API_KEY ist nicht gesetzt. "
"Bitte API-Schlüssel als Umgebungsvariable setzen:\n"
"export ANTHROPIC_API_KEY='sk-ant-api03-...'"
)
return api_key
def _encode_image_to_base64(image_path: Path) -> tuple[str, str]:
"""
Encode image to base64 for Claude API.
Returns:
(base64_string, media_type)
"""
image_bytes = image_path.read_bytes()
image_b64 = base64.standard_b64encode(image_bytes).decode("utf-8")
# Determine media type from extension
ext = image_path.suffix.lower()
media_type_map = {
'.jpg': 'image/jpeg',
'.jpeg': 'image/jpeg',
'.png': 'image/png',
'.gif': 'image/gif',
'.webp': 'image/webp'
}
media_type = media_type_map.get(ext, 'image/jpeg')
return image_b64, media_type
def analyze_worksheet_with_claude(
image_path: Path,
max_tokens: int = 2500,
model: str = "claude-3-5-sonnet-20241022"
) -> Dict:
"""
Analyze worksheet using Claude Vision API.
Args:
image_path: Path to worksheet image
max_tokens: Maximum tokens in response (default 2500)
model: Claude model to use (default: Claude 3.5 Sonnet)
Returns:
Analysis dict with same structure as OpenAI version
Raises:
RuntimeError: If API key not set or SDK not installed
Exception: If API call fails
"""
if not ANTHROPIC_AVAILABLE:
raise RuntimeError("Anthropic SDK nicht installiert. Run: pip install anthropic")
if not image_path.exists():
raise FileNotFoundError(f"Image not found: {image_path}")
# Get API key
api_key = _get_anthropic_api_key()
# Initialize Anthropic client
client = Anthropic(api_key=api_key)
# Encode image
image_b64, media_type = _encode_image_to_base64(image_path)
# System prompt (instructions)
system_prompt = """Du bist ein Experte für die Analyse von Schul-Arbeitsblättern.
Deine Aufgabe ist es, das Arbeitsblatt detailliert zu analysieren und strukturierte Informationen zu extrahieren:
1. **Gedruckter Text**: Erkenne den VOLLSTÄNDIGEN gedruckten Text inklusive durchgestrichener Wörter
2. **Handschrift**: Identifiziere alle handschriftlichen Eintragungen (Schülerantworten, Korrekturen, Notizen)
3. **Layout**: Bestimme räumliche Positionen aller Elemente (Bounding Boxes in Pixeln)
4. **Diagramme**: Erkenne gedruckte Illustrationen, Grafiken, Diagramme
5. **Farben**: Klassifiziere Handschrift nach Farbe (blau/schwarz/rot/Bleistift)
WICHTIG: Gib deine Antwort als gültiges JSON zurück, nicht als Markdown Code Block!"""
# User prompt with JSON schema
user_prompt = """Analysiere dieses Arbeitsblatt und gib ein JSON mit folgendem Aufbau zurück:
{
"title": string | null,
"subject": string | null,
"grade_level": string | null,
"instructions": string | null,
"canonical_text": string | null,
"printed_blocks": [
{
"id": string,
"role": "title" | "instructions" | "body" | "other",
"text": string
}
],
"layout": {
"page_structure": {
"has_diagram": boolean,
"orientation": "portrait" | "landscape"
},
"text_regions": [
{
"id": string,
"type": "title" | "paragraph" | "list" | "instruction",
"text": string,
"bounding_box": {"x": int, "y": int, "width": int, "height": int},
"font_characteristics": {
"is_bold": boolean,
"approximate_size": "large" | "medium" | "small"
}
}
],
"diagram_elements": [
{
"id": string,
"type": "illustration" | "chart" | "graph" | "shape",
"description": string,
"bounding_box": {"x": int, "y": int, "width": int, "height": int},
"preserve": boolean
}
]
},
"handwriting_regions": [
{
"id": string,
"text": string,
"type": "student_answer" | "correction" | "note" | "drawing",
"bounding_box": {"x": int, "y": int, "width": int, "height": int},
"color_hint": "blue" | "black" | "red" | "pencil" | "unknown"
}
],
"handwritten_annotations": [
{
"text": string,
"approx_location": string
}
],
"struck_through_words": [
{
"text": string,
"context": string
}
],
"tasks": [
{
"id": string,
"type": "cloze" | "mcq" | "short_answer" | "math" | "other",
"description": string,
"text_with_gaps": string | null,
"gaps": [
{
"id": string,
"solution": string,
"position_hint": string
}
]
}
]
}
WICHTIGE HINWEISE:
- "canonical_text" enthält den KORRIGIERTEN gedruckten Text OHNE Handschrift und OHNE durchgestrichene Wörter
- "struck_through_words" enthält alle durchgestrichenen Wörter mit Kontext
- Bounding Boxes sind ungefähre Pixel-Positionen (x, y von oben links, width/height in Pixeln)
- "layout.text_regions" sollte alle gedruckten Textbereiche mit genauen Positionen enthalten
- "handwriting_regions" sollte alle handschriftlichen Bereiche mit Farb-Hinweisen enthalten
- Setze "preserve": true für Diagramm-Elemente die erhalten bleiben sollen
- Durchgestrichene Wörter NUR in "struck_through_words", NICHT in "canonical_text"
Gib NUR das JSON zurück, ohne Code-Block-Marker!"""
try:
logger.info(f"Calling Claude API for analysis of {image_path.name}")
# Call Claude API
response = client.messages.create(
model=model,
max_tokens=max_tokens,
system=system_prompt,
messages=[
{
"role": "user",
"content": [
{
"type": "image",
"source": {
"type": "base64",
"media_type": media_type,
"data": image_b64,
},
},
{
"type": "text",
"text": user_prompt
}
],
}
],
)
# Extract text from response
if not response.content:
raise RuntimeError("Empty response from Claude API")
# Get first text block
text_content = None
for block in response.content:
if block.type == "text":
text_content = block.text
break
if not text_content:
raise RuntimeError("No text content in Claude response")
logger.info(f"Received response from Claude ({len(text_content)} chars)")
# Parse JSON
# Claude might wrap JSON in ```json ... ```, remove if present
text_content = text_content.strip()
if text_content.startswith("```json"):
text_content = text_content[7:]
if text_content.startswith("```"):
text_content = text_content[3:]
if text_content.endswith("```"):
text_content = text_content[:-3]
text_content = text_content.strip()
try:
analysis_data = json.loads(text_content)
except json.JSONDecodeError as e:
logger.error(f"Failed to parse Claude JSON response: {e}")
logger.error(f"Response text: {text_content[:500]}...")
raise RuntimeError(f"Invalid JSON from Claude: {e}\nContent: {text_content[:200]}...") from e
logger.info("Successfully parsed Claude analysis")
return analysis_data
except Exception as e:
logger.error(f"Claude API call failed: {e}")
raise
def test_claude_connection() -> bool:
"""
Test if Claude API is accessible with current credentials.
Returns:
True if connection successful, False otherwise
"""
if not ANTHROPIC_AVAILABLE:
logger.error("Anthropic SDK not installed")
return False
try:
api_key = _get_anthropic_api_key()
client = Anthropic(api_key=api_key)
# Simple test call
response = client.messages.create(
model="claude-3-5-sonnet-20241022",
max_tokens=10,
messages=[{"role": "user", "content": "Test"}]
)
logger.info("✅ Claude API connection successful")
return True
except Exception as e:
logger.error(f"❌ Claude API connection failed: {e}")
return False