This repository has been archived on 2026-02-15. You can view files and clone it. You cannot open issues or pull requests or push a commit.
Files
breakpilot-pwa/backend/ai_processing/image_processor.py
BreakPilot Dev 19855efacc
Some checks failed
Tests / Go Tests (push) Has been cancelled
Tests / Python Tests (push) Has been cancelled
Tests / Integration Tests (push) Has been cancelled
Tests / Go Lint (push) Has been cancelled
Tests / Python Lint (push) Has been cancelled
Tests / Security Scan (push) Has been cancelled
Tests / All Checks Passed (push) Has been cancelled
Security Scanning / Secret Scanning (push) Has been cancelled
Security Scanning / Dependency Vulnerability Scan (push) Has been cancelled
Security Scanning / Go Security Scan (push) Has been cancelled
Security Scanning / Python Security Scan (push) Has been cancelled
Security Scanning / Node.js Security Scan (push) Has been cancelled
Security Scanning / Docker Image Security (push) Has been cancelled
Security Scanning / Security Summary (push) Has been cancelled
CI/CD Pipeline / Go Tests (push) Has been cancelled
CI/CD Pipeline / Python Tests (push) Has been cancelled
CI/CD Pipeline / Website Tests (push) Has been cancelled
CI/CD Pipeline / Linting (push) Has been cancelled
CI/CD Pipeline / Security Scan (push) Has been cancelled
CI/CD Pipeline / Docker Build & Push (push) Has been cancelled
CI/CD Pipeline / Integration Tests (push) Has been cancelled
CI/CD Pipeline / Deploy to Staging (push) Has been cancelled
CI/CD Pipeline / Deploy to Production (push) Has been cancelled
CI/CD Pipeline / CI Summary (push) Has been cancelled
ci/woodpecker/manual/build-ci-image Pipeline was successful
ci/woodpecker/manual/main Pipeline failed
feat: BreakPilot PWA - Full codebase (clean push without large binaries)
All services: admin-v2, studio-v2, website, ai-compliance-sdk,
consent-service, klausur-service, voice-service, and infrastructure.
Large PDFs and compiled binaries excluded via .gitignore.
2026-02-11 13:25:58 +01:00

79 lines
2.5 KiB
Python

"""
AI Processing - Image Processor.
Entfernt Handschrift aus Arbeitsblatt-Scans.
"""
from pathlib import Path
import shutil
import json
import logging
from .core import BEREINIGT_DIR
from .analysis import analyze_scan_structure_with_ai
logger = logging.getLogger(__name__)
def remove_handwriting_from_scan(input_path: Path) -> Path:
"""
Remove handwriting from worksheet scan using AI-guided image processing (Stage 2).
Process:
1. Load corresponding analysis JSON (from Stage 1)
2. Apply multi-strategy cleaning using WorksheetCleaner:
- Color-based filtering (blue ink)
- AI-guided region masking
- Stroke thickness analysis
3. Preserve diagrams and printed content
4. Save cleaned image
Returns:
Path to cleaned image (*_clean.jpg)
Raises:
FileNotFoundError: If input file not found
RuntimeError: If cleaning fails (falls back to copy)
"""
if not input_path.exists():
raise FileNotFoundError(f"Eingabedatei nicht gefunden: {input_path}")
# Import WorksheetCleaner
from image_cleaner import WorksheetCleaner
# Load analysis JSON (from Stage 1)
analysis_name = input_path.stem + "_analyse.json"
analysis_path = BEREINIGT_DIR / analysis_name
# If analysis doesn't exist, run it first
if not analysis_path.exists():
logger.info(f"Analysis not found for {input_path.name}, running analysis first")
analysis_path = analyze_scan_structure_with_ai(input_path)
# Load analysis data
try:
analysis_data = json.loads(analysis_path.read_text(encoding='utf-8'))
except json.JSONDecodeError as e:
logger.error(f"Invalid analysis JSON: {analysis_path}\n{e}")
# Fallback: create minimal analysis structure
analysis_data = {
"layout": {"text_regions": [], "diagram_elements": []},
"handwriting_regions": []
}
# Prepare output path
output_name = input_path.stem + "_clean" + input_path.suffix
output_path = BEREINIGT_DIR / output_name
# Clean the image using WorksheetCleaner
cleaner = WorksheetCleaner(debug_mode=False)
try:
cleaned_path = cleaner.clean_worksheet(input_path, analysis_data, output_path)
logger.info(f"Successfully cleaned {input_path.name}")
return cleaned_path
except Exception as e:
# Fallback: if cleaning fails, copy original
logger.error(f"Cleaning failed for {input_path.name}, using original: {e}")
shutil.copy2(input_path, output_path)
return output_path