""" Test script for worksheet cleaning pipeline """ from pathlib import Path import json import sys # Import functions from ai_processor from ai_processor import analyze_scan_structure_with_ai, remove_handwriting_from_scan def test_worksheet_cleaning(filename: str): """Test the complete cleaning pipeline""" # Paths eingang_dir = Path.home() / "Arbeitsblaetter" / "Eingang" bereinigt_dir = Path.home() / "Arbeitsblaetter" / "Bereinigt" input_path = eingang_dir / filename if not input_path.exists(): print(f"โŒ Error: File not found: {input_path}") return False print(f"\n{'='*60}") print(f"๐Ÿงช TESTING WORKSHEET CLEANING PIPELINE") print(f"{'='*60}") print(f"Input file: {filename}") print(f"{'='*60}\n") # Stage 1: AI Analysis print("๐Ÿ“Š Stage 1: AI Analysis (Enhanced)") print("-" * 60) try: analysis_path = analyze_scan_structure_with_ai(input_path) print(f"โœ… Analysis completed: {analysis_path.name}") # Load and display analysis analysis_data = json.loads(analysis_path.read_text(encoding='utf-8')) print(f"\n๐Ÿ“‹ Analysis Results:") print(f" - Title: {analysis_data.get('title')}") print(f" - Subject: {analysis_data.get('subject')}") print(f" - Grade Level: {analysis_data.get('grade_level')}") # Layout info layout = analysis_data.get('layout', {}) text_regions = layout.get('text_regions', []) diagram_elements = layout.get('diagram_elements', []) print(f" - Text regions: {len(text_regions)}") print(f" - Diagram elements: {len(diagram_elements)}") # Handwriting info hw_regions = analysis_data.get('handwriting_regions', []) print(f" - Handwriting regions: {len(hw_regions)}") if hw_regions: print(f"\n ๐Ÿ–Š๏ธ Handwriting detected:") for i, hw in enumerate(hw_regions[:3], 1): # Show first 3 print(f" {i}. Type: {hw.get('type')}, Color: {hw.get('color_hint')}") print(f" Text: '{hw.get('text', '')[:50]}...'") print() except Exception as e: print(f"โŒ Analysis failed: {e}") import traceback traceback.print_exc() return False # Stage 2: Image Cleaning print("๐Ÿงน Stage 2: Image Cleaning (OpenCV + AI)") print("-" * 60) try: cleaned_path = remove_handwriting_from_scan(input_path) print(f"โœ… Cleaning completed: {cleaned_path.name}") # Check file size original_size = input_path.stat().st_size / 1024 cleaned_size = cleaned_path.stat().st_size / 1024 print(f" - Original size: {original_size:.1f} KB") print(f" - Cleaned size: {cleaned_size:.1f} KB") except Exception as e: print(f"โŒ Cleaning failed: {e}") import traceback traceback.print_exc() return False # Summary print(f"\n{'='*60}") print("โœ… TEST COMPLETED SUCCESSFULLY") print(f"{'='*60}") print(f"\n๐Ÿ“‚ Output files in: {bereinigt_dir}") print(f" - {input_path.stem}_analyse.json") print(f" - {input_path.stem}_clean.jpg") print() return True if __name__ == "__main__": # Test with Handschrift.JPG filename = "2025-12-10_Handschrift.JPG" if len(sys.argv) > 1: filename = sys.argv[1] success = test_worksheet_cleaning(filename) sys.exit(0 if success else 1)