This repository has been archived on 2026-02-15. You can view files and clone it. You cannot open issues or pull requests or push a commit.
Files
breakpilot-pwa/backend/test_cleaning.py
BreakPilot Dev 19855efacc
Some checks failed
Tests / Go Tests (push) Has been cancelled
Tests / Python Tests (push) Has been cancelled
Tests / Integration Tests (push) Has been cancelled
Tests / Go Lint (push) Has been cancelled
Tests / Python Lint (push) Has been cancelled
Tests / Security Scan (push) Has been cancelled
Tests / All Checks Passed (push) Has been cancelled
Security Scanning / Secret Scanning (push) Has been cancelled
Security Scanning / Dependency Vulnerability Scan (push) Has been cancelled
Security Scanning / Go Security Scan (push) Has been cancelled
Security Scanning / Python Security Scan (push) Has been cancelled
Security Scanning / Node.js Security Scan (push) Has been cancelled
Security Scanning / Docker Image Security (push) Has been cancelled
Security Scanning / Security Summary (push) Has been cancelled
CI/CD Pipeline / Go Tests (push) Has been cancelled
CI/CD Pipeline / Python Tests (push) Has been cancelled
CI/CD Pipeline / Website Tests (push) Has been cancelled
CI/CD Pipeline / Linting (push) Has been cancelled
CI/CD Pipeline / Security Scan (push) Has been cancelled
CI/CD Pipeline / Docker Build & Push (push) Has been cancelled
CI/CD Pipeline / Integration Tests (push) Has been cancelled
CI/CD Pipeline / Deploy to Staging (push) Has been cancelled
CI/CD Pipeline / Deploy to Production (push) Has been cancelled
CI/CD Pipeline / CI Summary (push) Has been cancelled
ci/woodpecker/manual/build-ci-image Pipeline was successful
ci/woodpecker/manual/main Pipeline failed
feat: BreakPilot PWA - Full codebase (clean push without large binaries)
All services: admin-v2, studio-v2, website, ai-compliance-sdk,
consent-service, klausur-service, voice-service, and infrastructure.
Large PDFs and compiled binaries excluded via .gitignore.
2026-02-11 13:25:58 +01:00

110 lines
3.4 KiB
Python

"""
Test script for worksheet cleaning pipeline
"""
from pathlib import Path
import json
import sys
# Import functions from ai_processor
from ai_processor import analyze_scan_structure_with_ai, remove_handwriting_from_scan
def test_worksheet_cleaning(filename: str):
"""Test the complete cleaning pipeline"""
# Paths
eingang_dir = Path.home() / "Arbeitsblaetter" / "Eingang"
bereinigt_dir = Path.home() / "Arbeitsblaetter" / "Bereinigt"
input_path = eingang_dir / filename
if not input_path.exists():
print(f"❌ Error: File not found: {input_path}")
return False
print(f"\n{'='*60}")
print(f"🧪 TESTING WORKSHEET CLEANING PIPELINE")
print(f"{'='*60}")
print(f"Input file: {filename}")
print(f"{'='*60}\n")
# Stage 1: AI Analysis
print("📊 Stage 1: AI Analysis (Enhanced)")
print("-" * 60)
try:
analysis_path = analyze_scan_structure_with_ai(input_path)
print(f"✅ Analysis completed: {analysis_path.name}")
# Load and display analysis
analysis_data = json.loads(analysis_path.read_text(encoding='utf-8'))
print(f"\n📋 Analysis Results:")
print(f" - Title: {analysis_data.get('title')}")
print(f" - Subject: {analysis_data.get('subject')}")
print(f" - Grade Level: {analysis_data.get('grade_level')}")
# Layout info
layout = analysis_data.get('layout', {})
text_regions = layout.get('text_regions', [])
diagram_elements = layout.get('diagram_elements', [])
print(f" - Text regions: {len(text_regions)}")
print(f" - Diagram elements: {len(diagram_elements)}")
# Handwriting info
hw_regions = analysis_data.get('handwriting_regions', [])
print(f" - Handwriting regions: {len(hw_regions)}")
if hw_regions:
print(f"\n 🖊️ Handwriting detected:")
for i, hw in enumerate(hw_regions[:3], 1): # Show first 3
print(f" {i}. Type: {hw.get('type')}, Color: {hw.get('color_hint')}")
print(f" Text: '{hw.get('text', '')[:50]}...'")
print()
except Exception as e:
print(f"❌ Analysis failed: {e}")
import traceback
traceback.print_exc()
return False
# Stage 2: Image Cleaning
print("🧹 Stage 2: Image Cleaning (OpenCV + AI)")
print("-" * 60)
try:
cleaned_path = remove_handwriting_from_scan(input_path)
print(f"✅ Cleaning completed: {cleaned_path.name}")
# Check file size
original_size = input_path.stat().st_size / 1024
cleaned_size = cleaned_path.stat().st_size / 1024
print(f" - Original size: {original_size:.1f} KB")
print(f" - Cleaned size: {cleaned_size:.1f} KB")
except Exception as e:
print(f"❌ Cleaning failed: {e}")
import traceback
traceback.print_exc()
return False
# Summary
print(f"\n{'='*60}")
print("✅ TEST COMPLETED SUCCESSFULLY")
print(f"{'='*60}")
print(f"\n📂 Output files in: {bereinigt_dir}")
print(f" - {input_path.stem}_analyse.json")
print(f" - {input_path.stem}_clean.jpg")
print()
return True
if __name__ == "__main__":
# Test with Handschrift.JPG
filename = "2025-12-10_Handschrift.JPG"
if len(sys.argv) > 1:
filename = sys.argv[1]
success = test_worksheet_cleaning(filename)
sys.exit(0 if success else 1)