""" AI Processor - Scan Analyzer Vision-based analysis of worksheets using OpenAI and Claude APIs. """ from pathlib import Path import json import logging import shutil import requests from ..config import ( VISION_API, BEREINIGT_DIR, get_openai_api_key, ) from ..utils import encode_image_to_data_url logger = logging.getLogger(__name__) def describe_scan_with_ai(input_path: Path) -> Path: """ Vision model gives a short description of the worksheet. Args: input_path: Path to the input image Returns: Path to the description text file """ if not input_path.exists(): raise FileNotFoundError(f"Eingabedatei nicht gefunden: {input_path}") api_key = get_openai_api_key() image_data_url = encode_image_to_data_url(input_path) url = "https://api.openai.com/v1/chat/completions" headers = {"Authorization": f"Bearer {api_key}", "Content-Type": "application/json"} payload = { "model": "gpt-4o-mini", "messages": [ { "role": "system", "content": "Du bist ein hilfreicher Assistent, der Schul-Arbeitsblaetter knapp beschreibt.", }, { "role": "user", "content": [ { "type": "text", "text": ( "Beschreibe dieses Arbeitsblatt knapp: Thema, Art der Aufgaben " "(z.B. Lueckentext, Multiple Choice, Rechenaufgaben) und groben Inhalt." ), }, {"type": "image_url", "image_url": {"url": image_data_url}}, ], }, ], "max_tokens": 400, } response = requests.post(url, headers=headers, json=payload) response.raise_for_status() data = response.json() try: description = data["choices"][0]["message"]["content"] except Exception as e: raise RuntimeError(f"Unerwartete Antwortstruktur von der KI: {e}\nAntwort: {data}") from e out_name = input_path.stem + "_beschreibung.txt" out_path = BEREINIGT_DIR / out_name out_path.write_text(description, encoding="utf-8") return out_path def _analyze_with_openai(input_path: Path) -> Path: """ Structured JSON analysis of the worksheet using OpenAI. Features: - canonical_text: complete corrected text without handwriting - printed_blocks: structured blocks of printed text - handwritten_annotations: student handwritten notes - struck_through_words: crossed out words """ if not input_path.exists(): raise FileNotFoundError(f"Eingabedatei nicht gefunden: {input_path}") api_key = get_openai_api_key() image_data_url = encode_image_to_data_url(input_path) url = "https://api.openai.com/v1/chat/completions" headers = {"Authorization": f"Bearer {api_key}", "Content-Type": "application/json"} system_prompt = ( "Du bist ein Experte fuer die Analyse von Schul-Arbeitsblaettern.\n\n" "HAUPTAUFGABEN:\n" "1. Erkenne ALLE gedruckten Elemente: Text, Ueberschriften, Tabellen, Linien, Kaestchen, Diagramme, Illustrationen\n" "2. Identifiziere ALLE handschriftlichen Ergaenzungen: Antworten, Zahlen, Buchstaben, Notizen, Zeichnungen\n" "3. Bestimme praezise Positionen (Bounding Boxes in Pixeln) fuer JEDES Element\n\n" "KRITISCH - DIAGRAMME & ILLUSTRATIONEN:\n" "- Suche aktiv nach: anatomischen Zeichnungen, beschrifteten Diagrammen, Grafiken, Tabellen, Skizzen\n" "- Wenn du irgendeine bildliche Darstellung siehst (z.B. Auge, Pflanze, Karte, Schaubild), setze 'has_diagram: true'\n" "- Fuer JEDES visuelle Element: Erstelle einen Eintrag in 'diagram_elements' mit genauer Position\n" "- Beschrifte-Linien (von Beschriftung zu Bildteil) gehoeren zum Diagramm!\n\n" "HANDSCHRIFT ERKENNUNG:\n" "- Unterscheide gedruckt vs. handgeschrieben anhand der Schriftart\n" "- Klassifiziere Farbe: blau/schwarz/rot/pencil (Bleistift)\n" "- Durchgestrichene Woerter separat auflisten\n\n" "AUSGABE: Gib deine Antwort AUSSCHLIESSLICH als gueltiges JSON zurueck (kein Markdown, keine Code-Bloecke)." ) user_text = _get_analysis_user_prompt() payload = { "model": "gpt-4o-mini", "response_format": {"type": "json_object"}, "messages": [ {"role": "system", "content": system_prompt}, { "role": "user", "content": [ {"type": "text", "text": user_text}, {"type": "image_url", "image_url": {"url": image_data_url}}, ], }, ], "max_tokens": 2500, "temperature": 0.15, } response = requests.post(url, headers=headers, json=payload) response.raise_for_status() data = response.json() try: content = data["choices"][0]["message"]["content"] except Exception as e: raise RuntimeError(f"Unerwartete Antwortstruktur von der KI: {e}\nAntwort: {data}") from e try: obj = json.loads(content) except json.JSONDecodeError as e: raise RuntimeError(f"Modell hat ungueltiges JSON geliefert: {e}\nInhalt: {content}") from e out_name = input_path.stem + "_analyse.json" out_path = BEREINIGT_DIR / out_name out_path.write_text(json.dumps(obj, ensure_ascii=False, indent=2), encoding="utf-8") return out_path def _analyze_with_claude(input_path: Path) -> Path: """ Structured JSON analysis with Claude Vision API. Uses Claude 3.5 Sonnet for better OCR and layout detection. """ from claude_vision import analyze_worksheet_with_claude if not input_path.exists(): raise FileNotFoundError(f"Eingabedatei nicht gefunden: {input_path}") logger.info(f"Analyzing with Claude Vision: {input_path.name}") try: analysis_data = analyze_worksheet_with_claude( input_path, max_tokens=2500 ) out_name = input_path.stem + "_analyse.json" out_path = BEREINIGT_DIR / out_name out_path.write_text( json.dumps(analysis_data, ensure_ascii=False, indent=2), encoding="utf-8" ) logger.info(f"Claude analysis saved: {out_path.name}") return out_path except Exception as e: logger.error(f"Claude analysis failed: {e}") raise def analyze_scan_structure_with_ai(input_path: Path) -> Path: """ Structured JSON analysis of the worksheet (Hybrid mode). Uses the API configured in VISION_API: - "claude" (default): Claude 3.5 Sonnet - better OCR, layout detection - "openai": OpenAI GPT-4o-mini - cheaper, faster Switch via environment variable: export VISION_API="claude" # or "openai" Returns: Path to analysis JSON file """ logger.info(f"Using Vision API: {VISION_API}") if VISION_API == "claude": try: return _analyze_with_claude(input_path) except Exception as e: logger.warning(f"Claude failed, falling back to OpenAI: {e}") return _analyze_with_openai(input_path) elif VISION_API == "openai": return _analyze_with_openai(input_path) else: logger.warning(f"Unknown VISION_API '{VISION_API}', using Claude as default") return _analyze_with_claude(input_path) def remove_handwriting_from_scan(input_path: Path) -> Path: """ Remove handwriting from worksheet scan using AI-guided image processing. Process: 1. Load corresponding analysis JSON (from Stage 1) 2. Apply multi-strategy cleaning using WorksheetCleaner 3. Preserve diagrams and printed content 4. Save cleaned image Returns: Path to cleaned image (*_clean.jpg) """ if not input_path.exists(): raise FileNotFoundError(f"Eingabedatei nicht gefunden: {input_path}") from image_cleaner import WorksheetCleaner # Load analysis JSON (from Stage 1) analysis_name = input_path.stem + "_analyse.json" analysis_path = BEREINIGT_DIR / analysis_name # If analysis doesn't exist, run it first if not analysis_path.exists(): logger.info(f"Analysis not found for {input_path.name}, running analysis first") analysis_path = analyze_scan_structure_with_ai(input_path) # Load analysis data try: analysis_data = json.loads(analysis_path.read_text(encoding='utf-8')) except json.JSONDecodeError as e: logger.error(f"Invalid analysis JSON: {analysis_path}\n{e}") analysis_data = { "layout": {"text_regions": [], "diagram_elements": []}, "handwriting_regions": [] } # Prepare output path output_name = input_path.stem + "_clean" + input_path.suffix output_path = BEREINIGT_DIR / output_name # Clean the image using WorksheetCleaner cleaner = WorksheetCleaner(debug_mode=False) try: cleaned_path = cleaner.clean_worksheet(input_path, analysis_data, output_path) logger.info(f"Successfully cleaned {input_path.name}") return cleaned_path except Exception as e: logger.error(f"Cleaning failed for {input_path.name}, using original: {e}") shutil.copy2(input_path, output_path) return output_path def _get_analysis_user_prompt() -> str: """Get the user prompt for worksheet analysis.""" return ( "Analysiere dieses Arbeitsblatt und gib ein JSON mit folgendem Aufbau zurueck:\n\n" "{\n" ' "title": string | null,\n' ' "subject": string | null,\n' ' "grade_level": string | null,\n' ' "instructions": string | null,\n' ' "canonical_text": string | null,\n' ' "printed_blocks": [\n' " {\n" ' "id": string,\n' ' "role": "title" | "instructions" | "body" | "other",\n' ' "text": string\n' " }\n" " ],\n" ' "layout": {\n' ' "page_structure": {\n' ' "has_diagram": boolean,\n' ' "orientation": "portrait" | "landscape"\n' " },\n" ' "text_regions": [...],\n' ' "diagram_elements": [...]\n' " },\n" ' "handwriting_regions": [...],\n' ' "handwritten_annotations": [...],\n' ' "struck_through_words": [...],\n' ' "tasks": [...]\n' "}\n\n" "WICHTIG - BITTE GENAU BEACHTEN:\n" "1. CANONICAL TEXT: Nur gedruckter Text, OHNE Handschrift\n" "2. DIAGRAMME: Bei JEDER Zeichnung/Grafik has_diagram: true setzen\n" "3. HANDSCHRIFT: Mit Farb-Klassifizierung und Bounding Boxes\n" "4. Bei Unsicherheit: null oder leeres Array" )