This repository has been archived on 2026-02-15. You can view files and clone it. You cannot open issues or pull requests or push a commit.
Files
breakpilot-pwa/backend/test_api_comparison.py
Benjamin Admin bfdaf63ba9 fix: Restore all files lost during destructive rebase
A previous `git pull --rebase origin main` dropped 177 local commits,
losing 3400+ files across admin-v2, backend, studio-v2, website,
klausur-service, and many other services. The partial restore attempt
(660295e2) only recovered some files.

This commit restores all missing files from pre-rebase ref 98933f5e
while preserving post-rebase additions (night-scheduler, night-mode UI,
NightModeWidget dashboard integration).

Restored features include:
- AI Module Sidebar (FAB), OCR Labeling, OCR Compare
- GPU Dashboard, RAG Pipeline, Magic Help
- Klausur-Korrektur (8 files), Abitur-Archiv (5+ files)
- Companion, Zeugnisse-Crawler, Screen Flow
- Full backend, studio-v2, website, klausur-service
- All compliance SDKs, agent-core, voice-service
- CI/CD configs, documentation, scripts

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-09 09:51:32 +01:00

181 lines
6.1 KiB
Python

"""
A/B Test: Claude vs OpenAI Vision APIs
Compares both APIs side-by-side on the same worksheet.
"""
from pathlib import Path
import json
import time
from ai_processor import _analyze_with_claude, _analyze_with_openai
def compare_apis(filename: str):
"""
Test both APIs and compare results.
Args:
filename: Name of file in ~/Arbeitsblaetter/Eingang/
"""
eingang_dir = Path.home() / "Arbeitsblaetter" / "Eingang"
bereinigt_dir = Path.home() / "Arbeitsblaetter" / "Bereinigt"
input_path = eingang_dir / filename
if not input_path.exists():
print(f"❌ File not found: {input_path}")
return
print("\n" + "=" * 70)
print("🔬 A/B TEST: Claude vs OpenAI Vision APIs")
print("=" * 70)
print(f"File: {filename}\n")
results = {}
# Test Claude
print("🟣 Testing Claude 3.5 Sonnet...")
print("-" * 70)
try:
start = time.time()
claude_result = _analyze_with_claude(input_path)
claude_time = time.time() - start
claude_data = json.loads(claude_result.read_text(encoding='utf-8'))
print(f"✅ Claude completed in {claude_time:.2f}s")
print(f" Title: {claude_data.get('title')}")
print(f" Subject: {claude_data.get('subject')}")
print(f" Grade: {claude_data.get('grade_level')}")
# Count elements
layout = claude_data.get('layout', {})
text_regions = layout.get('text_regions', [])
diagrams = layout.get('diagram_elements', [])
handwriting = claude_data.get('handwriting_regions', [])
print(f" Text regions: {len(text_regions)}")
print(f" Diagrams: {len(diagrams)}")
print(f" Handwriting regions: {len(handwriting)}")
if handwriting:
print(f"\n 🖊️ Handwriting detected:")
for i, hw in enumerate(handwriting[:2], 1):
print(f" {i}. {hw.get('type')} ({hw.get('color_hint')})")
print(f" Text: '{hw.get('text', '')[:60]}...'")
results['claude'] = {
'time': claude_time,
'data': claude_data,
'file': claude_result
}
# Rename to _analyse_claude.json
claude_comparison_path = bereinigt_dir / f"{input_path.stem}_analyse_claude.json"
claude_result.rename(claude_comparison_path)
results['claude']['file'] = claude_comparison_path
except Exception as e:
print(f"❌ Claude failed: {e}")
results['claude'] = {'error': str(e)}
print()
# Test OpenAI
print("🔵 Testing OpenAI GPT-4o-mini...")
print("-" * 70)
try:
start = time.time()
openai_result = _analyze_with_openai(input_path)
openai_time = time.time() - start
openai_data = json.loads(openai_result.read_text(encoding='utf-8'))
print(f"✅ OpenAI completed in {openai_time:.2f}s")
print(f" Title: {openai_data.get('title')}")
print(f" Subject: {openai_data.get('subject')}")
print(f" Grade: {openai_data.get('grade_level')}")
# Count elements
layout = openai_data.get('layout', {})
text_regions = layout.get('text_regions', [])
diagrams = layout.get('diagram_elements', [])
handwriting = openai_data.get('handwriting_regions', [])
print(f" Text regions: {len(text_regions)}")
print(f" Diagrams: {len(diagrams)}")
print(f" Handwriting regions: {len(handwriting)}")
if handwriting:
print(f"\n 🖊️ Handwriting detected:")
for i, hw in enumerate(handwriting[:2], 1):
print(f" {i}. {hw.get('type')} ({hw.get('color_hint')})")
print(f" Text: '{hw.get('text', '')[:60]}...'")
results['openai'] = {
'time': openai_time,
'data': openai_data,
'file': openai_result
}
# Rename to _analyse_openai.json
openai_comparison_path = bereinigt_dir / f"{input_path.stem}_analyse_openai.json"
openai_result.rename(openai_comparison_path)
results['openai']['file'] = openai_comparison_path
except Exception as e:
print(f"❌ OpenAI failed: {e}")
results['openai'] = {'error': str(e)}
# Comparison
print("\n" + "=" * 70)
print("📊 COMPARISON")
print("=" * 70)
if 'claude' in results and 'openai' in results:
if 'error' not in results['claude'] and 'error' not in results['openai']:
claude_time = results['claude']['time']
openai_time = results['openai']['time']
print(f"⏱️ Speed:")
print(f" Claude: {claude_time:.2f}s")
print(f" OpenAI: {openai_time:.2f}s")
if claude_time < openai_time:
print(f" → Claude is {openai_time/claude_time:.1f}x faster")
else:
print(f" → OpenAI is {claude_time/openai_time:.1f}x faster")
# Compare canonical text length
claude_text = results['claude']['data'].get('canonical_text', '')
openai_text = results['openai']['data'].get('canonical_text', '')
print(f"\n📝 Canonical Text Length:")
print(f" Claude: {len(claude_text)} chars")
print(f" OpenAI: {len(openai_text)} chars")
# Compare handwriting detection
claude_hw = results['claude']['data'].get('handwriting_regions', [])
openai_hw = results['openai']['data'].get('handwriting_regions', [])
print(f"\n🖊️ Handwriting Regions:")
print(f" Claude: {len(claude_hw)} regions")
print(f" OpenAI: {len(openai_hw)} regions")
print(f"\n📁 Comparison files saved:")
print(f" {results['claude']['file'].name}")
print(f" {results['openai']['file'].name}")
print("\n" + "=" * 70)
print("✅ A/B TEST COMPLETE")
print("=" * 70)
print("\nReview the JSON files to compare quality and accuracy.\n")
if __name__ == "__main__":
import sys
filename = "2025-12-10_Handschrift.JPG"
if len(sys.argv) > 1:
filename = sys.argv[1]
compare_apis(filename)