""" A/B Test: Claude vs OpenAI Vision APIs Compares both APIs side-by-side on the same worksheet. """ from pathlib import Path import json import time from ai_processor import _analyze_with_claude, _analyze_with_openai def compare_apis(filename: str): """ Test both APIs and compare results. Args: filename: Name of file in ~/Arbeitsblaetter/Eingang/ """ eingang_dir = Path.home() / "Arbeitsblaetter" / "Eingang" bereinigt_dir = Path.home() / "Arbeitsblaetter" / "Bereinigt" input_path = eingang_dir / filename if not input_path.exists(): print(f"āŒ File not found: {input_path}") return print("\n" + "=" * 70) print("šŸ”¬ A/B TEST: Claude vs OpenAI Vision APIs") print("=" * 70) print(f"File: {filename}\n") results = {} # Test Claude print("🟣 Testing Claude 3.5 Sonnet...") print("-" * 70) try: start = time.time() claude_result = _analyze_with_claude(input_path) claude_time = time.time() - start claude_data = json.loads(claude_result.read_text(encoding='utf-8')) print(f"āœ… Claude completed in {claude_time:.2f}s") print(f" Title: {claude_data.get('title')}") print(f" Subject: {claude_data.get('subject')}") print(f" Grade: {claude_data.get('grade_level')}") # Count elements layout = claude_data.get('layout', {}) text_regions = layout.get('text_regions', []) diagrams = layout.get('diagram_elements', []) handwriting = claude_data.get('handwriting_regions', []) print(f" Text regions: {len(text_regions)}") print(f" Diagrams: {len(diagrams)}") print(f" Handwriting regions: {len(handwriting)}") if handwriting: print(f"\n šŸ–Šļø Handwriting detected:") for i, hw in enumerate(handwriting[:2], 1): print(f" {i}. {hw.get('type')} ({hw.get('color_hint')})") print(f" Text: '{hw.get('text', '')[:60]}...'") results['claude'] = { 'time': claude_time, 'data': claude_data, 'file': claude_result } # Rename to _analyse_claude.json claude_comparison_path = bereinigt_dir / f"{input_path.stem}_analyse_claude.json" claude_result.rename(claude_comparison_path) results['claude']['file'] = claude_comparison_path except Exception as e: print(f"āŒ Claude failed: {e}") results['claude'] = {'error': str(e)} print() # Test OpenAI print("šŸ”µ Testing OpenAI GPT-4o-mini...") print("-" * 70) try: start = time.time() openai_result = _analyze_with_openai(input_path) openai_time = time.time() - start openai_data = json.loads(openai_result.read_text(encoding='utf-8')) print(f"āœ… OpenAI completed in {openai_time:.2f}s") print(f" Title: {openai_data.get('title')}") print(f" Subject: {openai_data.get('subject')}") print(f" Grade: {openai_data.get('grade_level')}") # Count elements layout = openai_data.get('layout', {}) text_regions = layout.get('text_regions', []) diagrams = layout.get('diagram_elements', []) handwriting = openai_data.get('handwriting_regions', []) print(f" Text regions: {len(text_regions)}") print(f" Diagrams: {len(diagrams)}") print(f" Handwriting regions: {len(handwriting)}") if handwriting: print(f"\n šŸ–Šļø Handwriting detected:") for i, hw in enumerate(handwriting[:2], 1): print(f" {i}. {hw.get('type')} ({hw.get('color_hint')})") print(f" Text: '{hw.get('text', '')[:60]}...'") results['openai'] = { 'time': openai_time, 'data': openai_data, 'file': openai_result } # Rename to _analyse_openai.json openai_comparison_path = bereinigt_dir / f"{input_path.stem}_analyse_openai.json" openai_result.rename(openai_comparison_path) results['openai']['file'] = openai_comparison_path except Exception as e: print(f"āŒ OpenAI failed: {e}") results['openai'] = {'error': str(e)} # Comparison print("\n" + "=" * 70) print("šŸ“Š COMPARISON") print("=" * 70) if 'claude' in results and 'openai' in results: if 'error' not in results['claude'] and 'error' not in results['openai']: claude_time = results['claude']['time'] openai_time = results['openai']['time'] print(f"ā±ļø Speed:") print(f" Claude: {claude_time:.2f}s") print(f" OpenAI: {openai_time:.2f}s") if claude_time < openai_time: print(f" → Claude is {openai_time/claude_time:.1f}x faster") else: print(f" → OpenAI is {claude_time/openai_time:.1f}x faster") # Compare canonical text length claude_text = results['claude']['data'].get('canonical_text', '') openai_text = results['openai']['data'].get('canonical_text', '') print(f"\nšŸ“ Canonical Text Length:") print(f" Claude: {len(claude_text)} chars") print(f" OpenAI: {len(openai_text)} chars") # Compare handwriting detection claude_hw = results['claude']['data'].get('handwriting_regions', []) openai_hw = results['openai']['data'].get('handwriting_regions', []) print(f"\nšŸ–Šļø Handwriting Regions:") print(f" Claude: {len(claude_hw)} regions") print(f" OpenAI: {len(openai_hw)} regions") print(f"\nšŸ“ Comparison files saved:") print(f" {results['claude']['file'].name}") print(f" {results['openai']['file'].name}") print("\n" + "=" * 70) print("āœ… A/B TEST COMPLETE") print("=" * 70) print("\nReview the JSON files to compare quality and accuracy.\n") if __name__ == "__main__": import sys filename = "2025-12-10_Handschrift.JPG" if len(sys.argv) > 1: filename = sys.argv[1] compare_apis(filename)