This repository has been archived on 2026-02-15. You can view files and clone it. You cannot open issues or pull requests or push a commit.
Files
breakpilot-pwa/backend/ai_processing/image_processor.py
Benjamin Admin bfdaf63ba9 fix: Restore all files lost during destructive rebase
A previous `git pull --rebase origin main` dropped 177 local commits,
losing 3400+ files across admin-v2, backend, studio-v2, website,
klausur-service, and many other services. The partial restore attempt
(660295e2) only recovered some files.

This commit restores all missing files from pre-rebase ref 98933f5e
while preserving post-rebase additions (night-scheduler, night-mode UI,
NightModeWidget dashboard integration).

Restored features include:
- AI Module Sidebar (FAB), OCR Labeling, OCR Compare
- GPU Dashboard, RAG Pipeline, Magic Help
- Klausur-Korrektur (8 files), Abitur-Archiv (5+ files)
- Companion, Zeugnisse-Crawler, Screen Flow
- Full backend, studio-v2, website, klausur-service
- All compliance SDKs, agent-core, voice-service
- CI/CD configs, documentation, scripts

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-09 09:51:32 +01:00

79 lines
2.5 KiB
Python

"""
AI Processing - Image Processor.
Entfernt Handschrift aus Arbeitsblatt-Scans.
"""
from pathlib import Path
import shutil
import json
import logging
from .core import BEREINIGT_DIR
from .analysis import analyze_scan_structure_with_ai
logger = logging.getLogger(__name__)
def remove_handwriting_from_scan(input_path: Path) -> Path:
"""
Remove handwriting from worksheet scan using AI-guided image processing (Stage 2).
Process:
1. Load corresponding analysis JSON (from Stage 1)
2. Apply multi-strategy cleaning using WorksheetCleaner:
- Color-based filtering (blue ink)
- AI-guided region masking
- Stroke thickness analysis
3. Preserve diagrams and printed content
4. Save cleaned image
Returns:
Path to cleaned image (*_clean.jpg)
Raises:
FileNotFoundError: If input file not found
RuntimeError: If cleaning fails (falls back to copy)
"""
if not input_path.exists():
raise FileNotFoundError(f"Eingabedatei nicht gefunden: {input_path}")
# Import WorksheetCleaner
from image_cleaner import WorksheetCleaner
# Load analysis JSON (from Stage 1)
analysis_name = input_path.stem + "_analyse.json"
analysis_path = BEREINIGT_DIR / analysis_name
# If analysis doesn't exist, run it first
if not analysis_path.exists():
logger.info(f"Analysis not found for {input_path.name}, running analysis first")
analysis_path = analyze_scan_structure_with_ai(input_path)
# Load analysis data
try:
analysis_data = json.loads(analysis_path.read_text(encoding='utf-8'))
except json.JSONDecodeError as e:
logger.error(f"Invalid analysis JSON: {analysis_path}\n{e}")
# Fallback: create minimal analysis structure
analysis_data = {
"layout": {"text_regions": [], "diagram_elements": []},
"handwriting_regions": []
}
# Prepare output path
output_name = input_path.stem + "_clean" + input_path.suffix
output_path = BEREINIGT_DIR / output_name
# Clean the image using WorksheetCleaner
cleaner = WorksheetCleaner(debug_mode=False)
try:
cleaned_path = cleaner.clean_worksheet(input_path, analysis_data, output_path)
logger.info(f"Successfully cleaned {input_path.name}")
return cleaned_path
except Exception as e:
# Fallback: if cleaning fails, copy original
logger.error(f"Cleaning failed for {input_path.name}, using original: {e}")
shutil.copy2(input_path, output_path)
return output_path