[split-required] Split 500-1000 LOC files across all services
backend-lehrer (5 files): - alerts_agent/db/repository.py (992 → 5), abitur_docs_api.py (956 → 3) - teacher_dashboard_api.py (951 → 3), services/pdf_service.py (916 → 3) - mail/mail_db.py (987 → 6) klausur-service (5 files): - legal_templates_ingestion.py (942 → 3), ocr_pipeline_postprocess.py (929 → 4) - ocr_pipeline_words.py (876 → 3), ocr_pipeline_ocr_merge.py (616 → 2) - KorrekturPage.tsx (956 → 6) website (5 pages): - mail (985 → 9), edu-search (958 → 8), mac-mini (950 → 7) - ocr-labeling (946 → 7), audit-workspace (871 → 4) studio-v2 (5 files + 1 deleted): - page.tsx (946 → 5), MessagesContext.tsx (925 → 4) - korrektur (914 → 6), worksheet-cleanup (899 → 6) - useVocabWorksheet.ts (888 → 3) - Deleted dead page-original.tsx (934 LOC) Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
97
studio-v2/app/vocab-worksheet/usePageProcessing.ts
Normal file
97
studio-v2/app/vocab-worksheet/usePageProcessing.ts
Normal file
@@ -0,0 +1,97 @@
|
||||
import type { VocabularyEntry, OcrPrompts, IpaMode, SyllableMode } from './types'
|
||||
import { getApiBase } from './constants'
|
||||
|
||||
/**
|
||||
* Process a single page and return vocabulary + optional scan quality info.
|
||||
*/
|
||||
export async function processSinglePage(
|
||||
sessionId: string,
|
||||
pageIndex: number,
|
||||
ipa: IpaMode,
|
||||
syllable: SyllableMode,
|
||||
ocrPrompts: OcrPrompts,
|
||||
ocrEnhance: boolean,
|
||||
ocrMaxCols: number,
|
||||
ocrMinConf: number,
|
||||
): Promise<{ success: boolean; vocabulary: VocabularyEntry[]; error?: string; scanQuality?: any }> {
|
||||
const API_BASE = getApiBase()
|
||||
|
||||
try {
|
||||
const params = new URLSearchParams({
|
||||
ipa_mode: ipa,
|
||||
syllable_mode: syllable,
|
||||
enhance: String(ocrEnhance),
|
||||
max_cols: String(ocrMaxCols),
|
||||
min_conf: String(ocrMinConf),
|
||||
})
|
||||
const res = await fetch(`${API_BASE}/api/v1/vocab/sessions/${sessionId}/process-single-page/${pageIndex}?${params}`, {
|
||||
method: 'POST',
|
||||
headers: { 'Content-Type': 'application/json' },
|
||||
body: JSON.stringify({ ocr_prompts: ocrPrompts }),
|
||||
})
|
||||
|
||||
if (!res.ok) {
|
||||
const errBody = await res.json().catch(() => ({}))
|
||||
const detail = errBody.detail || `HTTP ${res.status}`
|
||||
return { success: false, vocabulary: [], error: `Seite ${pageIndex + 1}: ${detail}` }
|
||||
}
|
||||
|
||||
const data = await res.json()
|
||||
if (!data.success) {
|
||||
return { success: false, vocabulary: [], error: data.error || `Seite ${pageIndex + 1}: Unbekannter Fehler` }
|
||||
}
|
||||
|
||||
return { success: true, vocabulary: data.vocabulary || [], scanQuality: data.scan_quality }
|
||||
} catch (e) {
|
||||
return { success: false, vocabulary: [], error: `Seite ${pageIndex + 1}: ${e instanceof Error ? e.message : 'Netzwerkfehler'}` }
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Reprocess pages with updated IPA/syllable settings.
|
||||
* Returns the new vocabulary array.
|
||||
*/
|
||||
export async function reprocessPagesFlow(
|
||||
sessionId: string,
|
||||
pagesToReprocess: number[],
|
||||
ipa: IpaMode,
|
||||
syllable: SyllableMode,
|
||||
ocrPrompts: OcrPrompts,
|
||||
ocrEnhance: boolean,
|
||||
ocrMaxCols: number,
|
||||
ocrMinConf: number,
|
||||
setExtractionStatus: (s: string) => void,
|
||||
): Promise<{ vocabulary: VocabularyEntry[]; qualityInfo: string }> {
|
||||
const API_BASE = getApiBase()
|
||||
const allVocab: VocabularyEntry[] = []
|
||||
let lastQuality: any = null
|
||||
|
||||
for (const pageIndex of pagesToReprocess) {
|
||||
setExtractionStatus(`Verarbeite Seite ${pageIndex + 1}...`)
|
||||
try {
|
||||
const params = new URLSearchParams({
|
||||
ipa_mode: ipa,
|
||||
syllable_mode: syllable,
|
||||
enhance: String(ocrEnhance),
|
||||
max_cols: String(ocrMaxCols),
|
||||
min_conf: String(ocrMinConf),
|
||||
})
|
||||
const res = await fetch(`${API_BASE}/api/v1/vocab/sessions/${sessionId}/process-single-page/${pageIndex}?${params}`, {
|
||||
method: 'POST',
|
||||
headers: { 'Content-Type': 'application/json' },
|
||||
body: JSON.stringify({ ocr_prompts: ocrPrompts }),
|
||||
})
|
||||
if (res.ok) {
|
||||
const data = await res.json()
|
||||
if (data.vocabulary) allVocab.push(...data.vocabulary)
|
||||
if (data.scan_quality) lastQuality = data.scan_quality
|
||||
}
|
||||
} catch { /* ignore individual page failures */ }
|
||||
}
|
||||
|
||||
const qualityInfo = lastQuality
|
||||
? ` | Qualitaet: ${lastQuality.quality_pct}%${lastQuality.is_degraded ? ' (degradiert!)' : ''} | Blur: ${lastQuality.blur_score} | Kontrast: ${lastQuality.contrast_score}`
|
||||
: ''
|
||||
|
||||
return { vocabulary: allVocab, qualityInfo }
|
||||
}
|
||||
Reference in New Issue
Block a user