Services: Admin-Lehrer, Backend-Lehrer, Studio v2, Website, Klausur-Service, School-Service, Voice-Service, Geo-Service, BreakPilot Drive, Agent-Core Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
79 lines
2.5 KiB
Python
79 lines
2.5 KiB
Python
"""
|
|
AI Processing - Image Processor.
|
|
|
|
Entfernt Handschrift aus Arbeitsblatt-Scans.
|
|
"""
|
|
|
|
from pathlib import Path
|
|
import shutil
|
|
import json
|
|
import logging
|
|
|
|
from .core import BEREINIGT_DIR
|
|
from .analysis import analyze_scan_structure_with_ai
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
def remove_handwriting_from_scan(input_path: Path) -> Path:
|
|
"""
|
|
Remove handwriting from worksheet scan using AI-guided image processing (Stage 2).
|
|
|
|
Process:
|
|
1. Load corresponding analysis JSON (from Stage 1)
|
|
2. Apply multi-strategy cleaning using WorksheetCleaner:
|
|
- Color-based filtering (blue ink)
|
|
- AI-guided region masking
|
|
- Stroke thickness analysis
|
|
3. Preserve diagrams and printed content
|
|
4. Save cleaned image
|
|
|
|
Returns:
|
|
Path to cleaned image (*_clean.jpg)
|
|
|
|
Raises:
|
|
FileNotFoundError: If input file not found
|
|
RuntimeError: If cleaning fails (falls back to copy)
|
|
"""
|
|
if not input_path.exists():
|
|
raise FileNotFoundError(f"Eingabedatei nicht gefunden: {input_path}")
|
|
|
|
# Import WorksheetCleaner
|
|
from image_cleaner import WorksheetCleaner
|
|
|
|
# Load analysis JSON (from Stage 1)
|
|
analysis_name = input_path.stem + "_analyse.json"
|
|
analysis_path = BEREINIGT_DIR / analysis_name
|
|
|
|
# If analysis doesn't exist, run it first
|
|
if not analysis_path.exists():
|
|
logger.info(f"Analysis not found for {input_path.name}, running analysis first")
|
|
analysis_path = analyze_scan_structure_with_ai(input_path)
|
|
|
|
# Load analysis data
|
|
try:
|
|
analysis_data = json.loads(analysis_path.read_text(encoding='utf-8'))
|
|
except json.JSONDecodeError as e:
|
|
logger.error(f"Invalid analysis JSON: {analysis_path}\n{e}")
|
|
# Fallback: create minimal analysis structure
|
|
analysis_data = {
|
|
"layout": {"text_regions": [], "diagram_elements": []},
|
|
"handwriting_regions": []
|
|
}
|
|
|
|
# Prepare output path
|
|
output_name = input_path.stem + "_clean" + input_path.suffix
|
|
output_path = BEREINIGT_DIR / output_name
|
|
|
|
# Clean the image using WorksheetCleaner
|
|
cleaner = WorksheetCleaner(debug_mode=False)
|
|
try:
|
|
cleaned_path = cleaner.clean_worksheet(input_path, analysis_data, output_path)
|
|
logger.info(f"Successfully cleaned {input_path.name}")
|
|
return cleaned_path
|
|
except Exception as e:
|
|
# Fallback: if cleaning fails, copy original
|
|
logger.error(f"Cleaning failed for {input_path.name}, using original: {e}")
|
|
shutil.copy2(input_path, output_path)
|
|
return output_path
|