[split-required] Split 500-1000 LOC files across all services

backend-lehrer (5 files): - alerts_agent/db/repository.py (992 → 5), abitur_docs_api.py (956 → 3) - teacher_dashboard_api.py (951 → 3), services/pdf_service.py (916 → 3) - mail/mail_db.py (987 → 6) klausur-service (5 files): - legal_templates_ingestion.py (942 → 3), ocr_pipeline_postprocess.py (929 → 4) - ocr_pipeline_words.py (876 → 3), ocr_pipeline_ocr_merge.py (616 → 2) - KorrekturPage.tsx (956 → 6) website (5 pages): - mail (985 → 9), edu-search (958 → 8), mac-mini (950 → 7) - ocr-labeling (946 → 7), audit-workspace (871 → 4) studio-v2 (5 files + 1 deleted): - page.tsx (946 → 5), MessagesContext.tsx (925 → 4) - korrektur (914 → 6), worksheet-cleanup (899 → 6) - useVocabWorksheet.ts (888 → 3) - Deleted dead page-original.tsx (934 LOC) Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-04-24 23:35:37 +02:00
parent 6811264756
commit b6983ab1dc
99 changed files with 13484 additions and 16106 deletions
--- a/studio-v2/app/vocab-worksheet/usePageProcessing.ts
+++ b/studio-v2/app/vocab-worksheet/usePageProcessing.ts
@@ -0,0 +1,97 @@
+import type { VocabularyEntry, OcrPrompts, IpaMode, SyllableMode } from './types'
+import { getApiBase } from './constants'
+
+/**
+ * Process a single page and return vocabulary + optional scan quality info.
+ */
+export async function processSinglePage(
+  sessionId: string,
+  pageIndex: number,
+  ipa: IpaMode,
+  syllable: SyllableMode,
+  ocrPrompts: OcrPrompts,
+  ocrEnhance: boolean,
+  ocrMaxCols: number,
+  ocrMinConf: number,
+): Promise<{ success: boolean; vocabulary: VocabularyEntry[]; error?: string; scanQuality?: any }> {
+  const API_BASE = getApiBase()
+
+  try {
+    const params = new URLSearchParams({
+      ipa_mode: ipa,
+      syllable_mode: syllable,
+      enhance: String(ocrEnhance),
+      max_cols: String(ocrMaxCols),
+      min_conf: String(ocrMinConf),
+    })
+    const res = await fetch(`${API_BASE}/api/v1/vocab/sessions/${sessionId}/process-single-page/${pageIndex}?${params}`, {
+      method: 'POST',
+      headers: { 'Content-Type': 'application/json' },
+      body: JSON.stringify({ ocr_prompts: ocrPrompts }),
+    })
+
+    if (!res.ok) {
+      const errBody = await res.json().catch(() => ({}))
+      const detail = errBody.detail || `HTTP ${res.status}`
+      return { success: false, vocabulary: [], error: `Seite ${pageIndex + 1}: ${detail}` }
+    }
+
+    const data = await res.json()
+    if (!data.success) {
+      return { success: false, vocabulary: [], error: data.error || `Seite ${pageIndex + 1}: Unbekannter Fehler` }
+    }
+
+    return { success: true, vocabulary: data.vocabulary || [], scanQuality: data.scan_quality }
+  } catch (e) {
+    return { success: false, vocabulary: [], error: `Seite ${pageIndex + 1}: ${e instanceof Error ? e.message : 'Netzwerkfehler'}` }
+  }
+}
+
+/**
+ * Reprocess pages with updated IPA/syllable settings.
+ * Returns the new vocabulary array.
+ */
+export async function reprocessPagesFlow(
+  sessionId: string,
+  pagesToReprocess: number[],
+  ipa: IpaMode,
+  syllable: SyllableMode,
+  ocrPrompts: OcrPrompts,
+  ocrEnhance: boolean,
+  ocrMaxCols: number,
+  ocrMinConf: number,
+  setExtractionStatus: (s: string) => void,
+): Promise<{ vocabulary: VocabularyEntry[]; qualityInfo: string }> {
+  const API_BASE = getApiBase()
+  const allVocab: VocabularyEntry[] = []
+  let lastQuality: any = null
+
+  for (const pageIndex of pagesToReprocess) {
+    setExtractionStatus(`Verarbeite Seite ${pageIndex + 1}...`)
+    try {
+      const params = new URLSearchParams({
+        ipa_mode: ipa,
+        syllable_mode: syllable,
+        enhance: String(ocrEnhance),
+        max_cols: String(ocrMaxCols),
+        min_conf: String(ocrMinConf),
+      })
+      const res = await fetch(`${API_BASE}/api/v1/vocab/sessions/${sessionId}/process-single-page/${pageIndex}?${params}`, {
+        method: 'POST',
+        headers: { 'Content-Type': 'application/json' },
+        body: JSON.stringify({ ocr_prompts: ocrPrompts }),
+      })
+      if (res.ok) {
+        const data = await res.json()
+        if (data.vocabulary) allVocab.push(...data.vocabulary)
+        if (data.scan_quality) lastQuality = data.scan_quality
+      }
+    } catch { /* ignore individual page failures */ }
+  }
+
+  const qualityInfo = lastQuality
+    ? ` | Qualitaet: ${lastQuality.quality_pct}%${lastQuality.is_degraded ? ' (degradiert!)' : ''} | Blur: ${lastQuality.blur_score} | Kontrast: ${lastQuality.contrast_score}`
+    : ''
+
+  return { vocabulary: allVocab, qualityInfo }
+}