"""
OCR Labeling - Helper Functions and OCR Wrappers

Extracted from ocr_labeling_api.py to keep files under 500 LOC.

DATENSCHUTZ/PRIVACY:
- Alle Verarbeitung erfolgt lokal (Mac Mini mit Ollama)
- Keine Daten werden an externe Server gesendet
"""

import os
import hashlib

from ocr_labeling_models import LOCAL_STORAGE_PATH

# Try to import Vision OCR service
try:
    import sys
    sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..', '..', 'backend', 'klausur', 'services'))
    from vision_ocr_service import get_vision_ocr_service, VisionOCRService
    VISION_OCR_AVAILABLE = True
except ImportError:
    VISION_OCR_AVAILABLE = False
    print("Warning: Vision OCR service not available")

# Try to import PaddleOCR from hybrid_vocab_extractor
try:
    from hybrid_vocab_extractor import run_paddle_ocr
    PADDLEOCR_AVAILABLE = True
except ImportError:
    PADDLEOCR_AVAILABLE = False
    print("Warning: PaddleOCR not available")

# Try to import TrOCR service
try:
    from services.trocr_service import run_trocr_ocr
    TROCR_AVAILABLE = True
except ImportError:
    TROCR_AVAILABLE = False
    print("Warning: TrOCR service not available")

# Try to import Donut service
try:
    from services.donut_ocr_service import run_donut_ocr
    DONUT_AVAILABLE = True
except ImportError:
    DONUT_AVAILABLE = False
    print("Warning: Donut OCR service not available")

# Try to import MinIO storage
try:
    from minio_storage import upload_ocr_image, get_ocr_image, MINIO_BUCKET
    MINIO_AVAILABLE = True
except ImportError:
    MINIO_AVAILABLE = False
    print("Warning: MinIO storage not available, using local storage")

# Try to import Training Export Service
try:
    from training_export_service import (
        TrainingExportService,
        TrainingSample,
        get_training_export_service,
    )
    TRAINING_EXPORT_AVAILABLE = True
except ImportError:
    TRAINING_EXPORT_AVAILABLE = False
    print("Warning: Training export service not available")


# =============================================================================
# Helper Functions
# =============================================================================

def compute_image_hash(image_data: bytes) -> str:
    """Compute SHA256 hash of image data."""
    return hashlib.sha256(image_data).hexdigest()


async def run_ocr_on_image(image_data: bytes, filename: str, model: str = "llama3.2-vision:11b") -> tuple:
    """
    Run OCR on an image using the specified model.

    Models:
        - llama3.2-vision:11b: Vision LLM (default, best for handwriting)
        - trocr: Microsoft TrOCR (fast for printed text)
        - paddleocr: PaddleOCR + LLM hybrid (4x faster)
        - donut: Document Understanding Transformer (structured documents)

    Returns:
        Tuple of (ocr_text, confidence)
    """
    print(f"Running OCR with model: {model}")

    # Route to appropriate OCR service based on model
    if model == "paddleocr":
        return await run_paddleocr_wrapper(image_data, filename)
    elif model == "donut":
        return await run_donut_wrapper(image_data, filename)
    elif model == "trocr":
        return await run_trocr_wrapper(image_data, filename)
    else:
        # Default: Vision LLM (llama3.2-vision or similar)
        return await run_vision_ocr_wrapper(image_data, filename)


async def run_vision_ocr_wrapper(image_data: bytes, filename: str) -> tuple:
    """Vision LLM OCR wrapper."""
    if not VISION_OCR_AVAILABLE:
        print("Vision OCR service not available")
        return None, 0.0

    try:
        service = get_vision_ocr_service()
        if not await service.is_available():
            print("Vision OCR service not available (is_available check failed)")
            return None, 0.0

        result = await service.extract_text(
            image_data,
            filename=filename,
            is_handwriting=True
        )
        return result.text, result.confidence
    except Exception as e:
        print(f"Vision OCR failed: {e}")
        return None, 0.0


async def run_paddleocr_wrapper(image_data: bytes, filename: str) -> tuple:
    """PaddleOCR wrapper - uses hybrid_vocab_extractor."""
    if not PADDLEOCR_AVAILABLE:
        print("PaddleOCR not available, falling back to Vision OCR")
        return await run_vision_ocr_wrapper(image_data, filename)

    try:
        # run_paddle_ocr returns (regions, raw_text)
        regions, raw_text = run_paddle_ocr(image_data)

        if not raw_text:
            print("PaddleOCR returned empty text")
            return None, 0.0

        # Calculate average confidence from regions
        if regions:
            avg_confidence = sum(r.confidence for r in regions) / len(regions)
        else:
            avg_confidence = 0.5

        return raw_text, avg_confidence
    except Exception as e:
        print(f"PaddleOCR failed: {e}, falling back to Vision OCR")
        return await run_vision_ocr_wrapper(image_data, filename)


async def run_trocr_wrapper(image_data: bytes, filename: str) -> tuple:
    """TrOCR wrapper."""
    if not TROCR_AVAILABLE:
        print("TrOCR not available, falling back to Vision OCR")
        return await run_vision_ocr_wrapper(image_data, filename)

    try:
        text, confidence = await run_trocr_ocr(image_data)
        return text, confidence
    except Exception as e:
        print(f"TrOCR failed: {e}, falling back to Vision OCR")
        return await run_vision_ocr_wrapper(image_data, filename)


async def run_donut_wrapper(image_data: bytes, filename: str) -> tuple:
    """Donut OCR wrapper."""
    if not DONUT_AVAILABLE:
        print("Donut not available, falling back to Vision OCR")
        return await run_vision_ocr_wrapper(image_data, filename)

    try:
        text, confidence = await run_donut_ocr(image_data)
        return text, confidence
    except Exception as e:
        print(f"Donut OCR failed: {e}, falling back to Vision OCR")
        return await run_vision_ocr_wrapper(image_data, filename)


def save_image_locally(session_id: str, item_id: str, image_data: bytes, extension: str = "png") -> str:
    """Save image to local storage."""
    session_dir = os.path.join(LOCAL_STORAGE_PATH, session_id)
    os.makedirs(session_dir, exist_ok=True)

    filename = f"{item_id}.{extension}"
    filepath = os.path.join(session_dir, filename)

    with open(filepath, 'wb') as f:
        f.write(image_data)

    return filepath


def get_image_url(image_path: str) -> str:
    """Get URL for an image."""
    # For local images, return a relative path that the frontend can use
    if image_path.startswith(LOCAL_STORAGE_PATH):
        relative_path = image_path[len(LOCAL_STORAGE_PATH):].lstrip('/')
        return f"/api/v1/ocr-label/images/{relative_path}"
    # For MinIO images, the path is already a URL or key
    return image_path