This repository has been archived on 2026-02-15. You can view files and clone it. You cannot open issues or pull requests or push a commit.
Files
breakpilot-pwa/backend/test_cleaning.py
Benjamin Admin 21a844cb8a fix: Restore all files lost during destructive rebase
A previous `git pull --rebase origin main` dropped 177 local commits,
losing 3400+ files across admin-v2, backend, studio-v2, website,
klausur-service, and many other services. The partial restore attempt
(660295e2) only recovered some files.

This commit restores all missing files from pre-rebase ref 98933f5e
while preserving post-rebase additions (night-scheduler, night-mode UI,
NightModeWidget dashboard integration).

Restored features include:
- AI Module Sidebar (FAB), OCR Labeling, OCR Compare
- GPU Dashboard, RAG Pipeline, Magic Help
- Klausur-Korrektur (8 files), Abitur-Archiv (5+ files)
- Companion, Zeugnisse-Crawler, Screen Flow
- Full backend, studio-v2, website, klausur-service
- All compliance SDKs, agent-core, voice-service
- CI/CD configs, documentation, scripts

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-09 09:51:32 +01:00

110 lines
3.4 KiB
Python

"""
Test script for worksheet cleaning pipeline
"""
from pathlib import Path
import json
import sys
# Import functions from ai_processor
from ai_processor import analyze_scan_structure_with_ai, remove_handwriting_from_scan
def test_worksheet_cleaning(filename: str):
"""Test the complete cleaning pipeline"""
# Paths
eingang_dir = Path.home() / "Arbeitsblaetter" / "Eingang"
bereinigt_dir = Path.home() / "Arbeitsblaetter" / "Bereinigt"
input_path = eingang_dir / filename
if not input_path.exists():
print(f"❌ Error: File not found: {input_path}")
return False
print(f"\n{'='*60}")
print(f"🧪 TESTING WORKSHEET CLEANING PIPELINE")
print(f"{'='*60}")
print(f"Input file: {filename}")
print(f"{'='*60}\n")
# Stage 1: AI Analysis
print("📊 Stage 1: AI Analysis (Enhanced)")
print("-" * 60)
try:
analysis_path = analyze_scan_structure_with_ai(input_path)
print(f"✅ Analysis completed: {analysis_path.name}")
# Load and display analysis
analysis_data = json.loads(analysis_path.read_text(encoding='utf-8'))
print(f"\n📋 Analysis Results:")
print(f" - Title: {analysis_data.get('title')}")
print(f" - Subject: {analysis_data.get('subject')}")
print(f" - Grade Level: {analysis_data.get('grade_level')}")
# Layout info
layout = analysis_data.get('layout', {})
text_regions = layout.get('text_regions', [])
diagram_elements = layout.get('diagram_elements', [])
print(f" - Text regions: {len(text_regions)}")
print(f" - Diagram elements: {len(diagram_elements)}")
# Handwriting info
hw_regions = analysis_data.get('handwriting_regions', [])
print(f" - Handwriting regions: {len(hw_regions)}")
if hw_regions:
print(f"\n 🖊️ Handwriting detected:")
for i, hw in enumerate(hw_regions[:3], 1): # Show first 3
print(f" {i}. Type: {hw.get('type')}, Color: {hw.get('color_hint')}")
print(f" Text: '{hw.get('text', '')[:50]}...'")
print()
except Exception as e:
print(f"❌ Analysis failed: {e}")
import traceback
traceback.print_exc()
return False
# Stage 2: Image Cleaning
print("🧹 Stage 2: Image Cleaning (OpenCV + AI)")
print("-" * 60)
try:
cleaned_path = remove_handwriting_from_scan(input_path)
print(f"✅ Cleaning completed: {cleaned_path.name}")
# Check file size
original_size = input_path.stat().st_size / 1024
cleaned_size = cleaned_path.stat().st_size / 1024
print(f" - Original size: {original_size:.1f} KB")
print(f" - Cleaned size: {cleaned_size:.1f} KB")
except Exception as e:
print(f"❌ Cleaning failed: {e}")
import traceback
traceback.print_exc()
return False
# Summary
print(f"\n{'='*60}")
print("✅ TEST COMPLETED SUCCESSFULLY")
print(f"{'='*60}")
print(f"\n📂 Output files in: {bereinigt_dir}")
print(f" - {input_path.stem}_analyse.json")
print(f" - {input_path.stem}_clean.jpg")
print()
return True
if __name__ == "__main__":
# Test with Handschrift.JPG
filename = "2025-12-10_Handschrift.JPG"
if len(sys.argv) > 1:
filename = sys.argv[1]
success = test_worksheet_cleaning(filename)
sys.exit(0 if success else 1)