Files
breakpilot-lehrer/backend-lehrer/ai_processing/image_processor.py
Benjamin Boenisch 5a31f52310 Initial commit: breakpilot-lehrer - Lehrer KI Platform
Services: Admin-Lehrer, Backend-Lehrer, Studio v2, Website,
Klausur-Service, School-Service, Voice-Service, Geo-Service,
BreakPilot Drive, Agent-Core

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-11 23:47:26 +01:00

79 lines
2.5 KiB
Python

"""
AI Processing - Image Processor.
Entfernt Handschrift aus Arbeitsblatt-Scans.
"""
from pathlib import Path
import shutil
import json
import logging
from .core import BEREINIGT_DIR
from .analysis import analyze_scan_structure_with_ai
logger = logging.getLogger(__name__)
def remove_handwriting_from_scan(input_path: Path) -> Path:
"""
Remove handwriting from worksheet scan using AI-guided image processing (Stage 2).
Process:
1. Load corresponding analysis JSON (from Stage 1)
2. Apply multi-strategy cleaning using WorksheetCleaner:
- Color-based filtering (blue ink)
- AI-guided region masking
- Stroke thickness analysis
3. Preserve diagrams and printed content
4. Save cleaned image
Returns:
Path to cleaned image (*_clean.jpg)
Raises:
FileNotFoundError: If input file not found
RuntimeError: If cleaning fails (falls back to copy)
"""
if not input_path.exists():
raise FileNotFoundError(f"Eingabedatei nicht gefunden: {input_path}")
# Import WorksheetCleaner
from image_cleaner import WorksheetCleaner
# Load analysis JSON (from Stage 1)
analysis_name = input_path.stem + "_analyse.json"
analysis_path = BEREINIGT_DIR / analysis_name
# If analysis doesn't exist, run it first
if not analysis_path.exists():
logger.info(f"Analysis not found for {input_path.name}, running analysis first")
analysis_path = analyze_scan_structure_with_ai(input_path)
# Load analysis data
try:
analysis_data = json.loads(analysis_path.read_text(encoding='utf-8'))
except json.JSONDecodeError as e:
logger.error(f"Invalid analysis JSON: {analysis_path}\n{e}")
# Fallback: create minimal analysis structure
analysis_data = {
"layout": {"text_regions": [], "diagram_elements": []},
"handwriting_regions": []
}
# Prepare output path
output_name = input_path.stem + "_clean" + input_path.suffix
output_path = BEREINIGT_DIR / output_name
# Clean the image using WorksheetCleaner
cleaner = WorksheetCleaner(debug_mode=False)
try:
cleaned_path = cleaner.clean_worksheet(input_path, analysis_data, output_path)
logger.info(f"Successfully cleaned {input_path.name}")
return cleaned_path
except Exception as e:
# Fallback: if cleaning fails, copy original
logger.error(f"Cleaning failed for {input_path.name}, using original: {e}")
shutil.copy2(input_path, output_path)
return output_path