breakpilot-pwa/backend/ai_processor/vision/scan_analyzer.py

"""
AI Processor - Scan Analyzer

Vision-based analysis of worksheets using OpenAI and Claude APIs.
"""

from pathlib import Path
import json
import logging
import shutil
import requests

from ..config import (
    VISION_API,
    BEREINIGT_DIR,
    get_openai_api_key,
)
from ..utils import encode_image_to_data_url

logger = logging.getLogger(__name__)


def describe_scan_with_ai(input_path: Path) -> Path:
    """
    Vision model gives a short description of the worksheet.

    Args:
        input_path: Path to the input image

    Returns:
        Path to the description text file
    """
    if not input_path.exists():
        raise FileNotFoundError(f"Eingabedatei nicht gefunden: {input_path}")

    api_key = get_openai_api_key()
    image_data_url = encode_image_to_data_url(input_path)

    url = "https://api.openai.com/v1/chat/completions"
    headers = {"Authorization": f"Bearer {api_key}", "Content-Type": "application/json"}
    payload = {
        "model": "gpt-4o-mini",
        "messages": [
            {
                "role": "system",
                "content": "Du bist ein hilfreicher Assistent, der Schul-Arbeitsblaetter knapp beschreibt.",
            },
            {
                "role": "user",
                "content": [
                    {
                        "type": "text",
                        "text": (
                            "Beschreibe dieses Arbeitsblatt knapp: Thema, Art der Aufgaben "
                            "(z.B. Lueckentext, Multiple Choice, Rechenaufgaben) und groben Inhalt."
                        ),
                    },
                    {"type": "image_url", "image_url": {"url": image_data_url}},
                ],
            },
        ],
        "max_tokens": 400,
    }

    response = requests.post(url, headers=headers, json=payload)
    response.raise_for_status()
    data = response.json()
    try:
        description = data["choices"][0]["message"]["content"]
    except Exception as e:
        raise RuntimeError(f"Unerwartete Antwortstruktur von der KI: {e}\nAntwort: {data}") from e

    out_name = input_path.stem + "_beschreibung.txt"
    out_path = BEREINIGT_DIR / out_name
    out_path.write_text(description, encoding="utf-8")
    return out_path


def _analyze_with_openai(input_path: Path) -> Path:
    """
    Structured JSON analysis of the worksheet using OpenAI.

    Features:
    - canonical_text: complete corrected text without handwriting
    - printed_blocks: structured blocks of printed text
    - handwritten_annotations: student handwritten notes
    - struck_through_words: crossed out words
    """
    if not input_path.exists():
        raise FileNotFoundError(f"Eingabedatei nicht gefunden: {input_path}")

    api_key = get_openai_api_key()
    image_data_url = encode_image_to_data_url(input_path)

    url = "https://api.openai.com/v1/chat/completions"
    headers = {"Authorization": f"Bearer {api_key}", "Content-Type": "application/json"}

    system_prompt = (
        "Du bist ein Experte fuer die Analyse von Schul-Arbeitsblaettern.\n\n"
        "HAUPTAUFGABEN:\n"
        "1. Erkenne ALLE gedruckten Elemente: Text, Ueberschriften, Tabellen, Linien, Kaestchen, Diagramme, Illustrationen\n"
        "2. Identifiziere ALLE handschriftlichen Ergaenzungen: Antworten, Zahlen, Buchstaben, Notizen, Zeichnungen\n"
        "3. Bestimme praezise Positionen (Bounding Boxes in Pixeln) fuer JEDES Element\n\n"
        "KRITISCH - DIAGRAMME & ILLUSTRATIONEN:\n"
        "- Suche aktiv nach: anatomischen Zeichnungen, beschrifteten Diagrammen, Grafiken, Tabellen, Skizzen\n"
        "- Wenn du irgendeine bildliche Darstellung siehst (z.B. Auge, Pflanze, Karte, Schaubild), setze 'has_diagram: true'\n"
        "- Fuer JEDES visuelle Element: Erstelle einen Eintrag in 'diagram_elements' mit genauer Position\n"
        "- Beschrifte-Linien (von Beschriftung zu Bildteil) gehoeren zum Diagramm!\n\n"
        "HANDSCHRIFT ERKENNUNG:\n"
        "- Unterscheide gedruckt vs. handgeschrieben anhand der Schriftart\n"
        "- Klassifiziere Farbe: blau/schwarz/rot/pencil (Bleistift)\n"
        "- Durchgestrichene Woerter separat auflisten\n\n"
        "AUSGABE: Gib deine Antwort AUSSCHLIESSLICH als gueltiges JSON zurueck (kein Markdown, keine Code-Bloecke)."
    )

    user_text = _get_analysis_user_prompt()

    payload = {
        "model": "gpt-4o-mini",
        "response_format": {"type": "json_object"},
        "messages": [
            {"role": "system", "content": system_prompt},
            {
                "role": "user",
                "content": [
                    {"type": "text", "text": user_text},
                    {"type": "image_url", "image_url": {"url": image_data_url}},
                ],
            },
        ],
        "max_tokens": 2500,
        "temperature": 0.15,
    }

    response = requests.post(url, headers=headers, json=payload)
    response.raise_for_status()
    data = response.json()
    try:
        content = data["choices"][0]["message"]["content"]
    except Exception as e:
        raise RuntimeError(f"Unerwartete Antwortstruktur von der KI: {e}\nAntwort: {data}") from e

    try:
        obj = json.loads(content)
    except json.JSONDecodeError as e:
        raise RuntimeError(f"Modell hat ungueltiges JSON geliefert: {e}\nInhalt: {content}") from e

    out_name = input_path.stem + "_analyse.json"
    out_path = BEREINIGT_DIR / out_name
    out_path.write_text(json.dumps(obj, ensure_ascii=False, indent=2), encoding="utf-8")
    return out_path


def _analyze_with_claude(input_path: Path) -> Path:
    """
    Structured JSON analysis with Claude Vision API.

    Uses Claude 3.5 Sonnet for better OCR and layout detection.
    """
    from claude_vision import analyze_worksheet_with_claude

    if not input_path.exists():
        raise FileNotFoundError(f"Eingabedatei nicht gefunden: {input_path}")

    logger.info(f"Analyzing with Claude Vision: {input_path.name}")

    try:
        analysis_data = analyze_worksheet_with_claude(
            input_path,
            max_tokens=2500
        )

        out_name = input_path.stem + "_analyse.json"
        out_path = BEREINIGT_DIR / out_name
        out_path.write_text(
            json.dumps(analysis_data, ensure_ascii=False, indent=2),
            encoding="utf-8"
        )

        logger.info(f"Claude analysis saved: {out_path.name}")
        return out_path

    except Exception as e:
        logger.error(f"Claude analysis failed: {e}")
        raise


def analyze_scan_structure_with_ai(input_path: Path) -> Path:
    """
    Structured JSON analysis of the worksheet (Hybrid mode).

    Uses the API configured in VISION_API:
    - "claude" (default): Claude 3.5 Sonnet - better OCR, layout detection
    - "openai": OpenAI GPT-4o-mini - cheaper, faster

    Switch via environment variable:
        export VISION_API="claude"  # or "openai"

    Returns:
        Path to analysis JSON file
    """
    logger.info(f"Using Vision API: {VISION_API}")

    if VISION_API == "claude":
        try:
            return _analyze_with_claude(input_path)
        except Exception as e:
            logger.warning(f"Claude failed, falling back to OpenAI: {e}")
            return _analyze_with_openai(input_path)

    elif VISION_API == "openai":
        return _analyze_with_openai(input_path)

    else:
        logger.warning(f"Unknown VISION_API '{VISION_API}', using Claude as default")
        return _analyze_with_claude(input_path)


def remove_handwriting_from_scan(input_path: Path) -> Path:
    """
    Remove handwriting from worksheet scan using AI-guided image processing.

    Process:
    1. Load corresponding analysis JSON (from Stage 1)
    2. Apply multi-strategy cleaning using WorksheetCleaner
    3. Preserve diagrams and printed content
    4. Save cleaned image

    Returns:
        Path to cleaned image (*_clean.jpg)
    """
    if not input_path.exists():
        raise FileNotFoundError(f"Eingabedatei nicht gefunden: {input_path}")

    from image_cleaner import WorksheetCleaner

    # Load analysis JSON (from Stage 1)
    analysis_name = input_path.stem + "_analyse.json"
    analysis_path = BEREINIGT_DIR / analysis_name

    # If analysis doesn't exist, run it first
    if not analysis_path.exists():
        logger.info(f"Analysis not found for {input_path.name}, running analysis first")
        analysis_path = analyze_scan_structure_with_ai(input_path)

    # Load analysis data
    try:
        analysis_data = json.loads(analysis_path.read_text(encoding='utf-8'))
    except json.JSONDecodeError as e:
        logger.error(f"Invalid analysis JSON: {analysis_path}\n{e}")
        analysis_data = {
            "layout": {"text_regions": [], "diagram_elements": []},
            "handwriting_regions": []
        }

    # Prepare output path
    output_name = input_path.stem + "_clean" + input_path.suffix
    output_path = BEREINIGT_DIR / output_name

    # Clean the image using WorksheetCleaner
    cleaner = WorksheetCleaner(debug_mode=False)
    try:
        cleaned_path = cleaner.clean_worksheet(input_path, analysis_data, output_path)
        logger.info(f"Successfully cleaned {input_path.name}")
        return cleaned_path
    except Exception as e:
        logger.error(f"Cleaning failed for {input_path.name}, using original: {e}")
        shutil.copy2(input_path, output_path)
        return output_path


def _get_analysis_user_prompt() -> str:
    """Get the user prompt for worksheet analysis."""
    return (
        "Analysiere dieses Arbeitsblatt und gib ein JSON mit folgendem Aufbau zurueck:\n\n"
        "{\n"
        '  "title": string | null,\n'
        '  "subject": string | null,\n'
        '  "grade_level": string | null,\n'
        '  "instructions": string | null,\n'
        '  "canonical_text": string | null,\n'
        '  "printed_blocks": [\n'
        "    {\n"
        '      "id": string,\n'
        '      "role": "title" | "instructions" | "body" | "other",\n'
        '      "text": string\n'
        "    }\n"
        "  ],\n"
        '  "layout": {\n'
        '    "page_structure": {\n'
        '      "has_diagram": boolean,\n'
        '      "orientation": "portrait" | "landscape"\n'
        "    },\n"
        '    "text_regions": [...],\n'
        '    "diagram_elements": [...]\n'
        "  },\n"
        '  "handwriting_regions": [...],\n'
        '  "handwritten_annotations": [...],\n'
        '  "struck_through_words": [...],\n'
        '  "tasks": [...]\n'
        "}\n\n"
        "WICHTIG - BITTE GENAU BEACHTEN:\n"
        "1. CANONICAL TEXT: Nur gedruckter Text, OHNE Handschrift\n"
        "2. DIAGRAMME: Bei JEDER Zeichnung/Grafik has_diagram: true setzen\n"
        "3. HANDSCHRIFT: Mit Farb-Klassifizierung und Bounding Boxes\n"
        "4. Bei Unsicherheit: null oder leeres Array"
    )