Some checks failed
Tests / Go Tests (push) Has been cancelled
Tests / Python Tests (push) Has been cancelled
Tests / Integration Tests (push) Has been cancelled
Tests / Go Lint (push) Has been cancelled
Tests / Python Lint (push) Has been cancelled
Tests / Security Scan (push) Has been cancelled
Tests / All Checks Passed (push) Has been cancelled
Security Scanning / Secret Scanning (push) Has been cancelled
Security Scanning / Dependency Vulnerability Scan (push) Has been cancelled
Security Scanning / Go Security Scan (push) Has been cancelled
Security Scanning / Python Security Scan (push) Has been cancelled
Security Scanning / Node.js Security Scan (push) Has been cancelled
Security Scanning / Docker Image Security (push) Has been cancelled
Security Scanning / Security Summary (push) Has been cancelled
CI/CD Pipeline / Go Tests (push) Has been cancelled
CI/CD Pipeline / Python Tests (push) Has been cancelled
CI/CD Pipeline / Website Tests (push) Has been cancelled
CI/CD Pipeline / Linting (push) Has been cancelled
CI/CD Pipeline / Security Scan (push) Has been cancelled
CI/CD Pipeline / Docker Build & Push (push) Has been cancelled
CI/CD Pipeline / Integration Tests (push) Has been cancelled
CI/CD Pipeline / Deploy to Staging (push) Has been cancelled
CI/CD Pipeline / Deploy to Production (push) Has been cancelled
CI/CD Pipeline / CI Summary (push) Has been cancelled
ci/woodpecker/manual/build-ci-image Pipeline was successful
ci/woodpecker/manual/main Pipeline failed
All services: admin-v2, studio-v2, website, ai-compliance-sdk, consent-service, klausur-service, voice-service, and infrastructure. Large PDFs and compiled binaries excluded via .gitignore.
110 lines
3.4 KiB
Python
110 lines
3.4 KiB
Python
"""
|
|
Test script for worksheet cleaning pipeline
|
|
"""
|
|
|
|
from pathlib import Path
|
|
import json
|
|
import sys
|
|
|
|
# Import functions from ai_processor
|
|
from ai_processor import analyze_scan_structure_with_ai, remove_handwriting_from_scan
|
|
|
|
def test_worksheet_cleaning(filename: str):
|
|
"""Test the complete cleaning pipeline"""
|
|
|
|
# Paths
|
|
eingang_dir = Path.home() / "Arbeitsblaetter" / "Eingang"
|
|
bereinigt_dir = Path.home() / "Arbeitsblaetter" / "Bereinigt"
|
|
|
|
input_path = eingang_dir / filename
|
|
|
|
if not input_path.exists():
|
|
print(f"❌ Error: File not found: {input_path}")
|
|
return False
|
|
|
|
print(f"\n{'='*60}")
|
|
print(f"🧪 TESTING WORKSHEET CLEANING PIPELINE")
|
|
print(f"{'='*60}")
|
|
print(f"Input file: {filename}")
|
|
print(f"{'='*60}\n")
|
|
|
|
# Stage 1: AI Analysis
|
|
print("📊 Stage 1: AI Analysis (Enhanced)")
|
|
print("-" * 60)
|
|
try:
|
|
analysis_path = analyze_scan_structure_with_ai(input_path)
|
|
print(f"✅ Analysis completed: {analysis_path.name}")
|
|
|
|
# Load and display analysis
|
|
analysis_data = json.loads(analysis_path.read_text(encoding='utf-8'))
|
|
|
|
print(f"\n📋 Analysis Results:")
|
|
print(f" - Title: {analysis_data.get('title')}")
|
|
print(f" - Subject: {analysis_data.get('subject')}")
|
|
print(f" - Grade Level: {analysis_data.get('grade_level')}")
|
|
|
|
# Layout info
|
|
layout = analysis_data.get('layout', {})
|
|
text_regions = layout.get('text_regions', [])
|
|
diagram_elements = layout.get('diagram_elements', [])
|
|
print(f" - Text regions: {len(text_regions)}")
|
|
print(f" - Diagram elements: {len(diagram_elements)}")
|
|
|
|
# Handwriting info
|
|
hw_regions = analysis_data.get('handwriting_regions', [])
|
|
print(f" - Handwriting regions: {len(hw_regions)}")
|
|
|
|
if hw_regions:
|
|
print(f"\n 🖊️ Handwriting detected:")
|
|
for i, hw in enumerate(hw_regions[:3], 1): # Show first 3
|
|
print(f" {i}. Type: {hw.get('type')}, Color: {hw.get('color_hint')}")
|
|
print(f" Text: '{hw.get('text', '')[:50]}...'")
|
|
|
|
print()
|
|
|
|
except Exception as e:
|
|
print(f"❌ Analysis failed: {e}")
|
|
import traceback
|
|
traceback.print_exc()
|
|
return False
|
|
|
|
# Stage 2: Image Cleaning
|
|
print("🧹 Stage 2: Image Cleaning (OpenCV + AI)")
|
|
print("-" * 60)
|
|
try:
|
|
cleaned_path = remove_handwriting_from_scan(input_path)
|
|
print(f"✅ Cleaning completed: {cleaned_path.name}")
|
|
|
|
# Check file size
|
|
original_size = input_path.stat().st_size / 1024
|
|
cleaned_size = cleaned_path.stat().st_size / 1024
|
|
print(f" - Original size: {original_size:.1f} KB")
|
|
print(f" - Cleaned size: {cleaned_size:.1f} KB")
|
|
|
|
except Exception as e:
|
|
print(f"❌ Cleaning failed: {e}")
|
|
import traceback
|
|
traceback.print_exc()
|
|
return False
|
|
|
|
# Summary
|
|
print(f"\n{'='*60}")
|
|
print("✅ TEST COMPLETED SUCCESSFULLY")
|
|
print(f"{'='*60}")
|
|
print(f"\n📂 Output files in: {bereinigt_dir}")
|
|
print(f" - {input_path.stem}_analyse.json")
|
|
print(f" - {input_path.stem}_clean.jpg")
|
|
print()
|
|
|
|
return True
|
|
|
|
if __name__ == "__main__":
|
|
# Test with Handschrift.JPG
|
|
filename = "2025-12-10_Handschrift.JPG"
|
|
|
|
if len(sys.argv) > 1:
|
|
filename = sys.argv[1]
|
|
|
|
success = test_worksheet_cleaning(filename)
|
|
sys.exit(0 if success else 1)
|