Add SmartSpellChecker + refactor vocab-worksheet page.tsx
Some checks failed
CI / go-lint (push) Has been skipped
CI / python-lint (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / test-go-school (push) Successful in 45s
CI / test-go-edu-search (push) Successful in 43s
CI / test-python-klausur (push) Failing after 2m51s
CI / test-python-agent-core (push) Successful in 36s
CI / test-nodejs-website (push) Successful in 37s
Some checks failed
CI / go-lint (push) Has been skipped
CI / python-lint (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / test-go-school (push) Successful in 45s
CI / test-go-edu-search (push) Successful in 43s
CI / test-python-klausur (push) Failing after 2m51s
CI / test-python-agent-core (push) Successful in 36s
CI / test-nodejs-website (push) Successful in 37s
SmartSpellChecker (klausur-service): - Language-aware OCR post-correction without LLMs - Dual-dictionary heuristic for EN/DE language detection - Context-based a/I disambiguation via bigram lookup - Multi-digit substitution (sch00l→school) - Cross-language guard (don't false-correct DE words in EN column) - Umlaut correction (Schuler→Schüler, uber→über) - Integrated into spell_review_entries_sync() pipeline - 31 tests, 9ms/100 corrections Vocab-worksheet refactoring (studio-v2): - Split 2337-line page.tsx into 14 files - Custom hook useVocabWorksheet.ts (all state + logic) - 9 components in components/ directory - types.ts, constants.ts for shared definitions Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -881,10 +881,25 @@ def spell_review_entries_sync(entries: List[Dict]) -> Dict:
|
||||
"""Rule-based OCR correction: spell-checker + structural heuristics.
|
||||
|
||||
Deterministic — never translates, never touches IPA, never hallucinates.
|
||||
Uses SmartSpellChecker for language-aware corrections with context-based
|
||||
disambiguation (a/I), multi-digit substitution, and cross-language guard.
|
||||
"""
|
||||
t0 = time.time()
|
||||
changes: List[Dict] = []
|
||||
all_corrected: List[Dict] = []
|
||||
|
||||
# Use SmartSpellChecker if available, fall back to legacy _spell_fix_field
|
||||
_smart = None
|
||||
try:
|
||||
from smart_spell import SmartSpellChecker
|
||||
_smart = SmartSpellChecker()
|
||||
logger.debug("spell_review: using SmartSpellChecker")
|
||||
except Exception:
|
||||
logger.debug("spell_review: SmartSpellChecker not available, using legacy")
|
||||
|
||||
# Map field names → language codes for SmartSpellChecker
|
||||
_LANG_MAP = {"english": "en", "german": "de", "example": "auto"}
|
||||
|
||||
for i, entry in enumerate(entries):
|
||||
e = dict(entry)
|
||||
# Page-ref normalization (always, regardless of review status)
|
||||
@@ -907,9 +922,18 @@ def spell_review_entries_sync(entries: List[Dict]) -> Dict:
|
||||
old_val = (e.get(field_name) or "").strip()
|
||||
if not old_val:
|
||||
continue
|
||||
# example field is mixed-language — try German first (for umlauts)
|
||||
lang = "german" if field_name in ("german", "example") else "english"
|
||||
new_val, was_changed = _spell_fix_field(old_val, field=lang)
|
||||
|
||||
if _smart:
|
||||
# SmartSpellChecker path — language-aware, context-based
|
||||
lang_code = _LANG_MAP.get(field_name, "en")
|
||||
result = _smart.correct_text(old_val, lang=lang_code)
|
||||
new_val = result.corrected
|
||||
was_changed = result.changed
|
||||
else:
|
||||
# Legacy path
|
||||
lang = "german" if field_name in ("german", "example") else "english"
|
||||
new_val, was_changed = _spell_fix_field(old_val, field=lang)
|
||||
|
||||
if was_changed and new_val != old_val:
|
||||
changes.append({
|
||||
"row_index": e.get("row_index", i),
|
||||
@@ -921,12 +945,13 @@ def spell_review_entries_sync(entries: List[Dict]) -> Dict:
|
||||
e["llm_corrected"] = True
|
||||
all_corrected.append(e)
|
||||
duration_ms = int((time.time() - t0) * 1000)
|
||||
model_name = "smart-spell-checker" if _smart else "spell-checker"
|
||||
return {
|
||||
"entries_original": entries,
|
||||
"entries_corrected": all_corrected,
|
||||
"changes": changes,
|
||||
"skipped_count": 0,
|
||||
"model_used": "spell-checker",
|
||||
"model_used": model_name,
|
||||
"duration_ms": duration_ms,
|
||||
}
|
||||
|
||||
|
||||
369
klausur-service/backend/smart_spell.py
Normal file
369
klausur-service/backend/smart_spell.py
Normal file
@@ -0,0 +1,369 @@
|
||||
"""
|
||||
SmartSpellChecker — Language-aware OCR post-correction without LLMs.
|
||||
|
||||
Uses pyspellchecker (MIT) with dual EN+DE dictionaries for:
|
||||
- Automatic language detection per word (dual-dictionary heuristic)
|
||||
- OCR error correction (digit↔letter, umlauts, transpositions)
|
||||
- Context-based disambiguation (a/I, l/I) via bigram lookup
|
||||
- Mixed-language support for example sentences
|
||||
|
||||
Lizenz: Apache 2.0 (kommerziell nutzbar)
|
||||
"""
|
||||
|
||||
import logging
|
||||
import re
|
||||
from dataclasses import dataclass, field
|
||||
from typing import Dict, List, Literal, Optional, Set, Tuple
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Init
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
try:
|
||||
from spellchecker import SpellChecker as _SpellChecker
|
||||
_en_spell = _SpellChecker(language='en', distance=1)
|
||||
_de_spell = _SpellChecker(language='de', distance=1)
|
||||
_AVAILABLE = True
|
||||
except ImportError:
|
||||
_AVAILABLE = False
|
||||
logger.warning("pyspellchecker not installed — SmartSpellChecker disabled")
|
||||
|
||||
Lang = Literal["en", "de", "both", "unknown"]
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Bigram context for a/I disambiguation
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
# Words that commonly follow "I" (subject pronoun → verb/modal)
|
||||
_I_FOLLOWERS: frozenset = frozenset({
|
||||
"am", "was", "have", "had", "do", "did", "will", "would", "can",
|
||||
"could", "should", "shall", "may", "might", "must",
|
||||
"think", "know", "see", "want", "need", "like", "love", "hate",
|
||||
"go", "went", "come", "came", "say", "said", "get", "got",
|
||||
"make", "made", "take", "took", "give", "gave", "tell", "told",
|
||||
"feel", "felt", "find", "found", "believe", "hope", "wish",
|
||||
"remember", "forget", "understand", "mean", "meant",
|
||||
"don't", "didn't", "can't", "won't", "couldn't", "wouldn't",
|
||||
"shouldn't", "haven't", "hadn't", "isn't", "wasn't",
|
||||
"really", "just", "also", "always", "never", "often", "sometimes",
|
||||
})
|
||||
|
||||
# Words that commonly follow "a" (article → noun/adjective)
|
||||
_A_FOLLOWERS: frozenset = frozenset({
|
||||
"lot", "few", "little", "bit", "good", "bad", "great", "new", "old",
|
||||
"long", "short", "big", "small", "large", "huge", "tiny",
|
||||
"nice", "beautiful", "wonderful", "terrible", "horrible",
|
||||
"man", "woman", "boy", "girl", "child", "dog", "cat", "bird",
|
||||
"book", "car", "house", "room", "school", "teacher", "student",
|
||||
"day", "week", "month", "year", "time", "place", "way",
|
||||
"friend", "family", "person", "problem", "question", "story",
|
||||
"very", "really", "quite", "rather", "pretty", "single",
|
||||
})
|
||||
|
||||
# Digit→letter substitutions (OCR confusion)
|
||||
_DIGIT_SUBS: Dict[str, List[str]] = {
|
||||
'0': ['o', 'O'],
|
||||
'1': ['l', 'I'],
|
||||
'5': ['s', 'S'],
|
||||
'6': ['g', 'G'],
|
||||
'8': ['b', 'B'],
|
||||
'|': ['I', 'l'],
|
||||
}
|
||||
_SUSPICIOUS_CHARS = frozenset(_DIGIT_SUBS.keys())
|
||||
|
||||
# Umlaut confusion: OCR drops dots (ü→u, ä→a, ö→o)
|
||||
_UMLAUT_MAP = {
|
||||
'a': 'ä', 'o': 'ö', 'u': 'ü', 'i': 'ü',
|
||||
'A': 'Ä', 'O': 'Ö', 'U': 'Ü', 'I': 'Ü',
|
||||
}
|
||||
|
||||
# Tokenizer
|
||||
_TOKEN_RE = re.compile(r"([A-Za-zÄÖÜäöüß'|]+)([^A-Za-zÄÖÜäöüß'|]*)")
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Data types
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
@dataclass
|
||||
class CorrectionResult:
|
||||
original: str
|
||||
corrected: str
|
||||
lang_detected: Lang
|
||||
changed: bool
|
||||
changes: List[str] = field(default_factory=list)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Core class
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
class SmartSpellChecker:
|
||||
"""Language-aware OCR spell checker using pyspellchecker (no LLM)."""
|
||||
|
||||
def __init__(self):
|
||||
if not _AVAILABLE:
|
||||
raise RuntimeError("pyspellchecker not installed")
|
||||
self.en = _en_spell
|
||||
self.de = _de_spell
|
||||
|
||||
# --- Language detection ---
|
||||
|
||||
def detect_word_lang(self, word: str) -> Lang:
|
||||
"""Detect language of a single word using dual-dict heuristic."""
|
||||
w = word.lower().strip(".,;:!?\"'()")
|
||||
if not w:
|
||||
return "unknown"
|
||||
in_en = bool(self.en.known([w]))
|
||||
in_de = bool(self.de.known([w]))
|
||||
if in_en and in_de:
|
||||
return "both"
|
||||
if in_en:
|
||||
return "en"
|
||||
if in_de:
|
||||
return "de"
|
||||
return "unknown"
|
||||
|
||||
def detect_text_lang(self, text: str) -> Lang:
|
||||
"""Detect dominant language of a text string (sentence/phrase)."""
|
||||
words = re.findall(r"[A-Za-zÄÖÜäöüß]+", text)
|
||||
if not words:
|
||||
return "unknown"
|
||||
|
||||
en_count = 0
|
||||
de_count = 0
|
||||
for w in words:
|
||||
lang = self.detect_word_lang(w)
|
||||
if lang == "en":
|
||||
en_count += 1
|
||||
elif lang == "de":
|
||||
de_count += 1
|
||||
# "both" doesn't count for either
|
||||
|
||||
if en_count > de_count:
|
||||
return "en"
|
||||
if de_count > en_count:
|
||||
return "de"
|
||||
if en_count == de_count and en_count > 0:
|
||||
return "both"
|
||||
return "unknown"
|
||||
|
||||
# --- Single-word correction ---
|
||||
|
||||
def _known(self, word: str) -> bool:
|
||||
"""True if word is known in EN or DE dictionary."""
|
||||
w = word.lower()
|
||||
return bool(self.en.known([w])) or bool(self.de.known([w]))
|
||||
|
||||
def _known_in(self, word: str, lang: str) -> bool:
|
||||
"""True if word is known in a specific language dictionary."""
|
||||
w = word.lower()
|
||||
spell = self.en if lang == "en" else self.de
|
||||
return bool(spell.known([w]))
|
||||
|
||||
def correct_word(self, word: str, lang: str = "en",
|
||||
prev_word: str = "", next_word: str = "") -> Optional[str]:
|
||||
"""Correct a single word for the given language.
|
||||
|
||||
Returns None if no correction needed, or the corrected string.
|
||||
|
||||
Args:
|
||||
word: The word to check/correct
|
||||
lang: Expected language ("en" or "de")
|
||||
prev_word: Previous word (for context)
|
||||
next_word: Next word (for context)
|
||||
"""
|
||||
if not word or not word.strip():
|
||||
return None
|
||||
|
||||
# Skip numbers, abbreviations with dots, very short tokens
|
||||
if word.isdigit() or '.' in word:
|
||||
return None
|
||||
|
||||
has_suspicious = any(ch in _SUSPICIOUS_CHARS for ch in word)
|
||||
|
||||
# 1. Already known → no fix
|
||||
if self._known(word):
|
||||
# But check a/I disambiguation for single-char words
|
||||
if word.lower() in ('l', '|') and next_word:
|
||||
return self._disambiguate_a_I(word, next_word)
|
||||
return None
|
||||
|
||||
# 2. Digit/pipe substitution
|
||||
if has_suspicious:
|
||||
if word == '|':
|
||||
return 'I'
|
||||
# Try single-char substitutions
|
||||
for i, ch in enumerate(word):
|
||||
if ch not in _DIGIT_SUBS:
|
||||
continue
|
||||
for replacement in _DIGIT_SUBS[ch]:
|
||||
candidate = word[:i] + replacement + word[i + 1:]
|
||||
if self._known(candidate):
|
||||
return candidate
|
||||
# Try multi-char substitution (e.g., "sch00l" → "school")
|
||||
multi = self._try_multi_digit_sub(word)
|
||||
if multi:
|
||||
return multi
|
||||
|
||||
# 3. Umlaut correction (German)
|
||||
if lang == "de" and len(word) >= 3 and word.isalpha():
|
||||
umlaut_fix = self._try_umlaut_fix(word)
|
||||
if umlaut_fix:
|
||||
return umlaut_fix
|
||||
|
||||
# 4. General spell correction
|
||||
if not has_suspicious and len(word) >= 3 and word.isalpha():
|
||||
# Safety: don't correct if the word is valid in the OTHER language
|
||||
# (either directly or via umlaut fix)
|
||||
other_lang = "de" if lang == "en" else "en"
|
||||
if self._known_in(word, other_lang):
|
||||
return None
|
||||
if other_lang == "de" and self._try_umlaut_fix(word):
|
||||
return None # has a valid DE umlaut variant → don't touch
|
||||
|
||||
spell = self.en if lang == "en" else self.de
|
||||
correction = spell.correction(word.lower())
|
||||
if correction and correction != word.lower():
|
||||
if word[0].isupper():
|
||||
correction = correction[0].upper() + correction[1:]
|
||||
if self._known(correction):
|
||||
return correction
|
||||
|
||||
return None
|
||||
|
||||
# --- Multi-digit substitution ---
|
||||
|
||||
def _try_multi_digit_sub(self, word: str) -> Optional[str]:
|
||||
"""Try replacing multiple digits simultaneously."""
|
||||
positions = [(i, ch) for i, ch in enumerate(word) if ch in _DIGIT_SUBS]
|
||||
if len(positions) < 1 or len(positions) > 4:
|
||||
return None
|
||||
|
||||
# Try all combinations (max 2^4 = 16 for 4 positions)
|
||||
chars = list(word)
|
||||
best = None
|
||||
self._multi_sub_recurse(chars, positions, 0, best_result=[None])
|
||||
return self._multi_sub_recurse_result
|
||||
|
||||
_multi_sub_recurse_result: Optional[str] = None
|
||||
|
||||
def _try_multi_digit_sub(self, word: str) -> Optional[str]:
|
||||
"""Try replacing multiple digits simultaneously using BFS."""
|
||||
positions = [(i, ch) for i, ch in enumerate(word) if ch in _DIGIT_SUBS]
|
||||
if not positions or len(positions) > 4:
|
||||
return None
|
||||
|
||||
# BFS over substitution combinations
|
||||
queue = [list(word)]
|
||||
for pos, ch in positions:
|
||||
next_queue = []
|
||||
for current in queue:
|
||||
# Keep original
|
||||
next_queue.append(current[:])
|
||||
# Try each substitution
|
||||
for repl in _DIGIT_SUBS[ch]:
|
||||
variant = current[:]
|
||||
variant[pos] = repl
|
||||
next_queue.append(variant)
|
||||
queue = next_queue
|
||||
|
||||
# Check which combinations produce known words
|
||||
for combo in queue:
|
||||
candidate = "".join(combo)
|
||||
if candidate != word and self._known(candidate):
|
||||
return candidate
|
||||
|
||||
return None
|
||||
|
||||
# --- Umlaut fix ---
|
||||
|
||||
def _try_umlaut_fix(self, word: str) -> Optional[str]:
|
||||
"""Try single-char umlaut substitutions for German words."""
|
||||
for i, ch in enumerate(word):
|
||||
if ch in _UMLAUT_MAP:
|
||||
candidate = word[:i] + _UMLAUT_MAP[ch] + word[i + 1:]
|
||||
if self._known(candidate):
|
||||
return candidate
|
||||
return None
|
||||
|
||||
# --- a/I disambiguation ---
|
||||
|
||||
def _disambiguate_a_I(self, token: str, next_word: str) -> Optional[str]:
|
||||
"""Disambiguate 'a' vs 'I' (and OCR variants like 'l', '|')."""
|
||||
nw = next_word.lower().strip(".,;:!?")
|
||||
if nw in _I_FOLLOWERS:
|
||||
return "I"
|
||||
if nw in _A_FOLLOWERS:
|
||||
return "a"
|
||||
# Fallback: check if next word is more commonly a verb (→I) or noun/adj (→a)
|
||||
# Simple heuristic: if next word starts with uppercase (and isn't first in sentence)
|
||||
# it's likely a German noun following "I"... but in English context, uppercase
|
||||
# after "I" is unusual.
|
||||
return None # uncertain, don't change
|
||||
|
||||
# --- Full text correction ---
|
||||
|
||||
def correct_text(self, text: str, lang: str = "en") -> CorrectionResult:
|
||||
"""Correct a full text string (field value).
|
||||
|
||||
Args:
|
||||
text: The text to correct
|
||||
lang: Expected language ("en" or "de")
|
||||
"""
|
||||
if not text or not text.strip():
|
||||
return CorrectionResult(text, text, "unknown", False)
|
||||
|
||||
detected = self.detect_text_lang(text) if lang == "auto" else lang
|
||||
|
||||
parts: List[str] = []
|
||||
changes: List[str] = []
|
||||
tokens = list(_TOKEN_RE.finditer(text))
|
||||
|
||||
for idx, m in enumerate(tokens):
|
||||
token, sep = m.group(1), m.group(2)
|
||||
next_word = tokens[idx + 1].group(1) if idx + 1 < len(tokens) else ""
|
||||
prev_word = tokens[idx - 1].group(1) if idx > 0 else ""
|
||||
|
||||
correction = self.correct_word(
|
||||
token, lang=detected if detected in ("en", "de") else "en",
|
||||
prev_word=prev_word, next_word=next_word,
|
||||
)
|
||||
if correction and correction != token:
|
||||
changes.append(f"{token}→{correction}")
|
||||
parts.append(correction)
|
||||
else:
|
||||
parts.append(token)
|
||||
parts.append(sep)
|
||||
|
||||
# Append any trailing text
|
||||
last_end = tokens[-1].end() if tokens else 0
|
||||
if last_end < len(text):
|
||||
parts.append(text[last_end:])
|
||||
|
||||
corrected = "".join(parts)
|
||||
return CorrectionResult(
|
||||
original=text,
|
||||
corrected=corrected,
|
||||
lang_detected=detected,
|
||||
changed=corrected != text,
|
||||
changes=changes,
|
||||
)
|
||||
|
||||
# --- Vocabulary entry correction ---
|
||||
|
||||
def correct_vocab_entry(self, english: str, german: str,
|
||||
example: str = "") -> Dict[str, CorrectionResult]:
|
||||
"""Correct a full vocabulary entry (EN + DE + example).
|
||||
|
||||
Uses column position to determine language — the most reliable signal.
|
||||
"""
|
||||
results = {}
|
||||
results["english"] = self.correct_text(english, lang="en")
|
||||
results["german"] = self.correct_text(german, lang="de")
|
||||
if example:
|
||||
# For examples, auto-detect language
|
||||
results["example"] = self.correct_text(example, lang="auto")
|
||||
return results
|
||||
210
klausur-service/backend/tests/test_smart_spell.py
Normal file
210
klausur-service/backend/tests/test_smart_spell.py
Normal file
@@ -0,0 +1,210 @@
|
||||
"""Tests for SmartSpellChecker — language-aware OCR post-correction."""
|
||||
|
||||
import pytest
|
||||
import sys, os
|
||||
sys.path.insert(0, os.path.join(os.path.dirname(__file__), ".."))
|
||||
|
||||
from smart_spell import SmartSpellChecker, CorrectionResult
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def sc():
|
||||
return SmartSpellChecker()
|
||||
|
||||
|
||||
# ─── Language Detection ──────────────────────────────────────────────────────
|
||||
|
||||
|
||||
class TestLanguageDetection:
|
||||
|
||||
def test_clear_english_words(self, sc):
|
||||
for word in ("school", "beautiful", "homework", "yesterday", "because"):
|
||||
assert sc.detect_word_lang(word) in ("en", "both"), f"{word} should be EN"
|
||||
|
||||
def test_clear_german_words(self, sc):
|
||||
for word in ("Schule", "Hausaufgaben", "Freundschaft", "Straße", "Entschuldigung"):
|
||||
assert sc.detect_word_lang(word) in ("de", "both"), f"{word} should be DE"
|
||||
|
||||
def test_ambiguous_words(self, sc):
|
||||
"""Words that exist in both languages."""
|
||||
for word in ("Hand", "Finger", "Arm", "Name", "Ball"):
|
||||
assert sc.detect_word_lang(word) == "both", f"{word} should be 'both'"
|
||||
|
||||
def test_unknown_words(self, sc):
|
||||
assert sc.detect_word_lang("xyzqwk") == "unknown"
|
||||
assert sc.detect_word_lang("") == "unknown"
|
||||
|
||||
def test_english_sentence(self, sc):
|
||||
assert sc.detect_text_lang("I go to school every day") == "en"
|
||||
|
||||
def test_german_sentence(self, sc):
|
||||
assert sc.detect_text_lang("Ich gehe jeden Tag zur Schule") == "de"
|
||||
|
||||
def test_mixed_sentence(self, sc):
|
||||
# Dominant language should win
|
||||
lang = sc.detect_text_lang("I like to play Fußball with my Freunde")
|
||||
assert lang in ("en", "both")
|
||||
|
||||
|
||||
# ─── Single Word Correction ────────────────────────────────────────────────
|
||||
|
||||
|
||||
class TestSingleWordCorrection:
|
||||
|
||||
def test_known_word_not_changed(self, sc):
|
||||
assert sc.correct_word("school", "en") is None
|
||||
assert sc.correct_word("Freund", "de") is None
|
||||
|
||||
def test_digit_letter_single(self, sc):
|
||||
assert sc.correct_word("g0od", "en") == "good"
|
||||
assert sc.correct_word("he1lo", "en") == "hello"
|
||||
|
||||
def test_digit_letter_multi(self, sc):
|
||||
"""Multiple digit substitutions (e.g., sch00l)."""
|
||||
result = sc.correct_word("sch00l", "en")
|
||||
assert result == "school", f"Expected 'school', got '{result}'"
|
||||
|
||||
def test_pipe_to_I(self, sc):
|
||||
assert sc.correct_word("|", "en") == "I"
|
||||
|
||||
def test_umlaut_schuler(self, sc):
|
||||
assert sc.correct_word("Schuler", "de") == "Schüler"
|
||||
|
||||
def test_umlaut_uber(self, sc):
|
||||
assert sc.correct_word("uber", "de") == "über"
|
||||
|
||||
def test_umlaut_bucher(self, sc):
|
||||
assert sc.correct_word("Bucher", "de") == "Bücher"
|
||||
|
||||
def test_umlaut_turkei(self, sc):
|
||||
assert sc.correct_word("Turkei", "de") == "Türkei"
|
||||
|
||||
def test_missing_char(self, sc):
|
||||
assert sc.correct_word("beautful", "en") == "beautiful"
|
||||
|
||||
def test_transposition(self, sc):
|
||||
assert sc.correct_word("teh", "en") == "the"
|
||||
|
||||
def test_swap(self, sc):
|
||||
assert sc.correct_word("freind", "en") == "friend"
|
||||
|
||||
def test_no_false_correction_cross_lang(self, sc):
|
||||
"""Don't correct a word that's valid in the other language.
|
||||
|
||||
'Schuler' in the EN column should NOT be corrected to 'Schuyler'
|
||||
because 'Schüler' is valid German — it's likely a German word
|
||||
that ended up in the wrong column (or is a surname).
|
||||
"""
|
||||
# Schuler is valid DE (after umlaut fix → Schüler), so
|
||||
# in the EN column it should be left alone
|
||||
result = sc.correct_word("Schuler", "en")
|
||||
# Should either be None (no change) or not "Schuyler"
|
||||
assert result != "Schuyler", "Should not false-correct German word in EN column"
|
||||
|
||||
|
||||
# ─── a/I Disambiguation ──────────────────────────────────────────────────────
|
||||
|
||||
|
||||
class TestAIDisambiguation:
|
||||
|
||||
def test_I_before_verb(self, sc):
|
||||
assert sc._disambiguate_a_I("l", "am") == "I"
|
||||
assert sc._disambiguate_a_I("l", "was") == "I"
|
||||
assert sc._disambiguate_a_I("l", "think") == "I"
|
||||
assert sc._disambiguate_a_I("l", "have") == "I"
|
||||
assert sc._disambiguate_a_I("l", "don't") == "I"
|
||||
|
||||
def test_a_before_noun_adj(self, sc):
|
||||
assert sc._disambiguate_a_I("a", "book") == "a"
|
||||
assert sc._disambiguate_a_I("a", "cat") == "a"
|
||||
assert sc._disambiguate_a_I("a", "big") == "a"
|
||||
assert sc._disambiguate_a_I("a", "lot") == "a"
|
||||
|
||||
def test_uncertain_returns_none(self, sc):
|
||||
"""When context is ambiguous, return None (don't change)."""
|
||||
assert sc._disambiguate_a_I("l", "xyzqwk") is None
|
||||
|
||||
|
||||
# ─── Full Text Correction ───────────────────────────────────────────────────
|
||||
|
||||
|
||||
class TestFullTextCorrection:
|
||||
|
||||
def test_english_sentence(self, sc):
|
||||
result = sc.correct_text("teh cat is beautful", "en")
|
||||
assert result.changed
|
||||
assert "the" in result.corrected
|
||||
assert "beautiful" in result.corrected
|
||||
|
||||
def test_german_sentence_no_change(self, sc):
|
||||
result = sc.correct_text("Ich gehe zur Schule", "de")
|
||||
assert not result.changed
|
||||
|
||||
def test_german_umlaut_fix(self, sc):
|
||||
result = sc.correct_text("Der Schuler liest Bucher", "de")
|
||||
assert "Schüler" in result.corrected
|
||||
assert "Bücher" in result.corrected
|
||||
|
||||
def test_preserves_punctuation(self, sc):
|
||||
result = sc.correct_text("teh cat, beautful!", "en")
|
||||
assert "," in result.corrected
|
||||
assert "!" in result.corrected
|
||||
|
||||
def test_empty_text(self, sc):
|
||||
result = sc.correct_text("", "en")
|
||||
assert not result.changed
|
||||
assert result.corrected == ""
|
||||
|
||||
|
||||
# ─── Vocab Entry Correction ─────────────────────────────────────────────────
|
||||
|
||||
|
||||
class TestVocabEntryCorrection:
|
||||
|
||||
def test_basic_entry(self, sc):
|
||||
results = sc.correct_vocab_entry(
|
||||
english="beautful",
|
||||
german="schön",
|
||||
)
|
||||
assert results["english"].corrected == "beautiful"
|
||||
assert results["german"].changed is False
|
||||
|
||||
def test_umlaut_in_german(self, sc):
|
||||
results = sc.correct_vocab_entry(
|
||||
english="school",
|
||||
german="Schuler",
|
||||
)
|
||||
assert results["english"].changed is False
|
||||
assert results["german"].corrected == "Schüler"
|
||||
|
||||
def test_example_auto_detect(self, sc):
|
||||
results = sc.correct_vocab_entry(
|
||||
english="friend",
|
||||
german="Freund",
|
||||
example="My best freind lives in Berlin",
|
||||
)
|
||||
assert "friend" in results["example"].corrected
|
||||
|
||||
|
||||
# ─── Speed ─────────────────────────────────────────────────────────────────
|
||||
|
||||
|
||||
class TestSpeed:
|
||||
|
||||
def test_100_corrections_under_500ms(self, sc):
|
||||
"""100 word corrections should complete in under 500ms."""
|
||||
import time
|
||||
words = [
|
||||
("beautful", "en"), ("teh", "en"), ("freind", "en"),
|
||||
("homwork", "en"), ("yesturday", "en"),
|
||||
("Schuler", "de"), ("Bucher", "de"), ("Turkei", "de"),
|
||||
("uber", "de"), ("Ubung", "de"),
|
||||
] * 10
|
||||
|
||||
t0 = time.time()
|
||||
for word, lang in words:
|
||||
sc.correct_word(word, lang)
|
||||
dt = time.time() - t0
|
||||
|
||||
print(f"\n 100 corrections in {dt*1000:.0f}ms")
|
||||
assert dt < 0.5, f"Too slow: {dt*1000:.0f}ms"
|
||||
494
klausur-service/backend/tests/test_spell_benchmark.py
Normal file
494
klausur-service/backend/tests/test_spell_benchmark.py
Normal file
@@ -0,0 +1,494 @@
|
||||
"""
|
||||
Benchmark: Spell-checking & language detection approaches for OCR post-correction.
|
||||
|
||||
Tests pyspellchecker (already used), symspellpy (candidate), and
|
||||
dual-dictionary language detection heuristic on real vocabulary OCR data.
|
||||
|
||||
Run: pytest tests/test_spell_benchmark.py -v -s
|
||||
"""
|
||||
|
||||
import time
|
||||
import pytest
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Helpers
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def _load_pyspellchecker():
|
||||
from spellchecker import SpellChecker
|
||||
en = SpellChecker(language='en', distance=1)
|
||||
de = SpellChecker(language='de', distance=1)
|
||||
return en, de
|
||||
|
||||
|
||||
def _load_symspellpy():
|
||||
"""Load symspellpy with English frequency dict (bundled)."""
|
||||
from symspellpy import SymSpell, Verbosity
|
||||
sym = SymSpell(max_dictionary_edit_distance=2)
|
||||
# Use bundled English frequency dict
|
||||
import pkg_resources
|
||||
dict_path = pkg_resources.resource_filename("symspellpy", "frequency_dictionary_en_82_765.txt")
|
||||
sym.load_dictionary(dict_path, term_index=0, count_index=1)
|
||||
return sym, Verbosity
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Test data: (ocr_output, expected_correction, language, category)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
OCR_TEST_CASES = [
|
||||
# --- Single-char ambiguity ---
|
||||
("l am a student", "I am a student", "en", "a_vs_I"),
|
||||
("a book", "a book", "en", "a_vs_I"), # should NOT change
|
||||
("I like cats", "I like cats", "en", "a_vs_I"), # should NOT change
|
||||
("lt is raining", "It is raining", "en", "a_vs_I"), # l→I at start
|
||||
|
||||
# --- Digit-letter confusion ---
|
||||
("g0od", "good", "en", "digit_letter"),
|
||||
("sch00l", "school", "en", "digit_letter"),
|
||||
("he1lo", "hello", "en", "digit_letter"),
|
||||
("Sch0n", "Schon", "de", "digit_letter"), # German
|
||||
|
||||
# --- Umlaut drops ---
|
||||
("schon", "schön", "de", "umlaut"), # context: "schon" is also valid DE!
|
||||
("Schuler", "Schüler", "de", "umlaut"),
|
||||
("uber", "über", "de", "umlaut"),
|
||||
("Bucher", "Bücher", "de", "umlaut"),
|
||||
("Turkei", "Türkei", "de", "umlaut"),
|
||||
|
||||
# --- Common OCR errors ---
|
||||
("beautful", "beautiful", "en", "missing_char"),
|
||||
("teh", "the", "en", "transposition"),
|
||||
("becasue", "because", "en", "transposition"),
|
||||
("freind", "friend", "en", "swap"),
|
||||
("Freund", "Freund", "de", "correct"), # already correct
|
||||
|
||||
# --- Merged words ---
|
||||
("atmyschool", "at my school", "en", "merged"),
|
||||
("goodidea", "good idea", "en", "merged"),
|
||||
|
||||
# --- Mixed language example sentences ---
|
||||
("I go to teh school", "I go to the school", "en", "sentence"),
|
||||
("Ich gehe zur Schule", "Ich gehe zur Schule", "de", "sentence_correct"),
|
||||
]
|
||||
|
||||
# Language detection test: (word, expected_language)
|
||||
LANG_DETECT_CASES = [
|
||||
# Clear English
|
||||
("school", "en"),
|
||||
("beautiful", "en"),
|
||||
("homework", "en"),
|
||||
("yesterday", "en"),
|
||||
("children", "en"),
|
||||
("because", "en"),
|
||||
("environment", "en"),
|
||||
("although", "en"),
|
||||
|
||||
# Clear German
|
||||
("Schule", "de"),
|
||||
("Hausaufgaben", "de"),
|
||||
("Freundschaft", "de"),
|
||||
("Umwelt", "de"),
|
||||
("Kindergarten", "de"), # also used in English!
|
||||
("Bücher", "de"),
|
||||
("Straße", "de"),
|
||||
("Entschuldigung", "de"),
|
||||
|
||||
# Ambiguous (exist in both)
|
||||
("Hand", "both"),
|
||||
("Finger", "both"),
|
||||
("Arm", "both"),
|
||||
("Name", "both"),
|
||||
("Ball", "both"),
|
||||
|
||||
# Short/tricky
|
||||
("a", "en"),
|
||||
("I", "en"),
|
||||
("in", "both"),
|
||||
("an", "both"),
|
||||
("the", "en"),
|
||||
("die", "de"),
|
||||
("der", "de"),
|
||||
("to", "en"),
|
||||
("zu", "de"),
|
||||
]
|
||||
|
||||
|
||||
# ===========================================================================
|
||||
# Tests
|
||||
# ===========================================================================
|
||||
|
||||
|
||||
class TestPyspellchecker:
|
||||
"""Test pyspellchecker capabilities for OCR correction."""
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
def setup(self):
|
||||
self.en, self.de = _load_pyspellchecker()
|
||||
|
||||
def test_known_words(self):
|
||||
"""Verify basic dictionary lookup."""
|
||||
assert self.en.known(["school"])
|
||||
assert self.en.known(["beautiful"])
|
||||
assert self.de.known(["schule"]) # lowercase
|
||||
assert self.de.known(["freund"])
|
||||
# Not known
|
||||
assert not self.en.known(["xyzqwk"])
|
||||
assert not self.de.known(["xyzqwk"])
|
||||
|
||||
def test_correction_quality(self):
|
||||
"""Test correction suggestions for OCR errors."""
|
||||
results = []
|
||||
for ocr, expected, lang, category in OCR_TEST_CASES:
|
||||
if category in ("sentence", "sentence_correct", "merged", "a_vs_I"):
|
||||
continue # skip multi-word cases
|
||||
|
||||
spell = self.en if lang == "en" else self.de
|
||||
words = ocr.split()
|
||||
corrected = []
|
||||
for w in words:
|
||||
if spell.known([w.lower()]):
|
||||
corrected.append(w)
|
||||
else:
|
||||
fix = spell.correction(w.lower())
|
||||
if fix and fix != w.lower():
|
||||
# Preserve case
|
||||
if w[0].isupper():
|
||||
fix = fix[0].upper() + fix[1:]
|
||||
corrected.append(fix)
|
||||
else:
|
||||
corrected.append(w)
|
||||
result = " ".join(corrected)
|
||||
ok = result == expected
|
||||
results.append((ocr, expected, result, ok, category))
|
||||
if not ok:
|
||||
print(f" MISS: '{ocr}' → '{result}' (expected '{expected}') [{category}]")
|
||||
else:
|
||||
print(f" OK: '{ocr}' → '{result}' [{category}]")
|
||||
|
||||
correct = sum(1 for *_, ok, _ in results if ok)
|
||||
total = len(results)
|
||||
print(f"\npyspellchecker: {correct}/{total} correct ({100*correct/total:.0f}%)")
|
||||
|
||||
def test_language_detection_heuristic(self):
|
||||
"""Test dual-dictionary language detection."""
|
||||
results = []
|
||||
for word, expected_lang in LANG_DETECT_CASES:
|
||||
w = word.lower()
|
||||
in_en = bool(self.en.known([w]))
|
||||
in_de = bool(self.de.known([w]))
|
||||
|
||||
if in_en and in_de:
|
||||
detected = "both"
|
||||
elif in_en:
|
||||
detected = "en"
|
||||
elif in_de:
|
||||
detected = "de"
|
||||
else:
|
||||
detected = "unknown"
|
||||
|
||||
ok = detected == expected_lang
|
||||
results.append((word, expected_lang, detected, ok))
|
||||
if not ok:
|
||||
print(f" MISS: '{word}' → {detected} (expected {expected_lang})")
|
||||
else:
|
||||
print(f" OK: '{word}' → {detected}")
|
||||
|
||||
correct = sum(1 for *_, ok in results if ok)
|
||||
total = len(results)
|
||||
print(f"\nLang detection heuristic: {correct}/{total} correct ({100*correct/total:.0f}%)")
|
||||
|
||||
def test_umlaut_awareness(self):
|
||||
"""Test if pyspellchecker suggests umlaut corrections."""
|
||||
# "Schuler" should suggest "Schüler"
|
||||
candidates = self.de.candidates("schuler")
|
||||
print(f" 'schuler' candidates: {candidates}")
|
||||
# "uber" should suggest "über"
|
||||
candidates_uber = self.de.candidates("uber")
|
||||
print(f" 'uber' candidates: {candidates_uber}")
|
||||
# "Turkei" should suggest "Türkei"
|
||||
candidates_turkei = self.de.candidates("turkei")
|
||||
print(f" 'turkei' candidates: {candidates_turkei}")
|
||||
|
||||
def test_speed_100_words(self):
|
||||
"""Measure correction speed for 100 words."""
|
||||
words_en = ["beautful", "teh", "becasue", "freind", "shcool",
|
||||
"homwork", "yesturday", "chilren", "becuse", "enviroment"] * 10
|
||||
t0 = time.time()
|
||||
for w in words_en:
|
||||
self.en.correction(w)
|
||||
dt = time.time() - t0
|
||||
print(f"\n pyspellchecker: 100 EN corrections in {dt*1000:.0f}ms")
|
||||
|
||||
words_de = ["schuler", "bucher", "turkei", "strasze", "entschuldigung",
|
||||
"kindergaten", "freumd", "hauaufgaben", "umwlt", "ubung"] * 10
|
||||
t0 = time.time()
|
||||
for w in words_de:
|
||||
self.de.correction(w)
|
||||
dt = time.time() - t0
|
||||
print(f" pyspellchecker: 100 DE corrections in {dt*1000:.0f}ms")
|
||||
|
||||
|
||||
class TestSymspellpy:
|
||||
"""Test symspellpy as a faster alternative."""
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
def setup(self):
|
||||
try:
|
||||
self.sym, self.Verbosity = _load_symspellpy()
|
||||
self.available = True
|
||||
except (ImportError, FileNotFoundError) as e:
|
||||
self.available = False
|
||||
pytest.skip(f"symspellpy not installed: {e}")
|
||||
|
||||
def test_correction_quality(self):
|
||||
"""Test symspellpy corrections (EN only — no DE dict bundled)."""
|
||||
en_cases = [(o, e, c) for o, e, _, c in OCR_TEST_CASES
|
||||
if _ == "en" and c not in ("sentence", "sentence_correct", "merged", "a_vs_I")]
|
||||
|
||||
results = []
|
||||
for ocr, expected, category in en_cases:
|
||||
suggestions = self.sym.lookup(ocr.lower(), self.Verbosity.CLOSEST, max_edit_distance=2)
|
||||
if suggestions:
|
||||
fix = suggestions[0].term
|
||||
if ocr[0].isupper():
|
||||
fix = fix[0].upper() + fix[1:]
|
||||
result = fix
|
||||
else:
|
||||
result = ocr
|
||||
|
||||
ok = result == expected
|
||||
results.append((ocr, expected, result, ok, category))
|
||||
status = "OK" if ok else "MISS"
|
||||
print(f" {status}: '{ocr}' → '{result}' (expected '{expected}') [{category}]")
|
||||
|
||||
correct = sum(1 for *_, ok, _ in results if ok)
|
||||
total = len(results)
|
||||
print(f"\nsymspellpy EN: {correct}/{total} correct ({100*correct/total:.0f}%)")
|
||||
|
||||
def test_speed_100_words(self):
|
||||
"""Measure symspellpy correction speed for 100 words."""
|
||||
words = ["beautful", "teh", "becasue", "freind", "shcool",
|
||||
"homwork", "yesturday", "chilren", "becuse", "enviroment"] * 10
|
||||
t0 = time.time()
|
||||
for w in words:
|
||||
self.sym.lookup(w, self.Verbosity.CLOSEST, max_edit_distance=2)
|
||||
dt = time.time() - t0
|
||||
print(f"\n symspellpy: 100 EN corrections in {dt*1000:.0f}ms")
|
||||
|
||||
def test_compound_segmentation(self):
|
||||
"""Test symspellpy's word segmentation for merged words."""
|
||||
cases = [
|
||||
("atmyschool", "at my school"),
|
||||
("goodidea", "good idea"),
|
||||
("makeadecision", "make a decision"),
|
||||
]
|
||||
for merged, expected in cases:
|
||||
result = self.sym.word_segmentation(merged)
|
||||
ok = result.corrected_string == expected
|
||||
status = "OK" if ok else "MISS"
|
||||
print(f" {status}: '{merged}' → '{result.corrected_string}' (expected '{expected}')")
|
||||
|
||||
|
||||
class TestContextDisambiguation:
|
||||
"""Test context-based disambiguation for a/I and similar cases."""
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
def setup(self):
|
||||
self.en, self.de = _load_pyspellchecker()
|
||||
|
||||
def test_bigram_context(self):
|
||||
"""Use simple bigram heuristic for a/I disambiguation.
|
||||
|
||||
Approach: check if 'a <next_word>' or 'I <next_word>' is more
|
||||
common by checking if <next_word> is a noun (follows 'a') or
|
||||
verb (follows 'I').
|
||||
"""
|
||||
# Common words that follow "I" (verbs)
|
||||
i_followers = {"am", "was", "have", "had", "do", "did", "will",
|
||||
"would", "can", "could", "should", "shall", "may",
|
||||
"might", "think", "know", "see", "want", "need",
|
||||
"like", "love", "hate", "go", "went", "come",
|
||||
"came", "say", "said", "get", "got", "make", "made",
|
||||
"take", "took", "give", "gave", "tell", "told",
|
||||
"feel", "felt", "find", "found", "believe", "hope",
|
||||
"remember", "forget", "understand", "mean", "meant",
|
||||
"don't", "didn't", "can't", "won't", "couldn't",
|
||||
"shouldn't", "wouldn't", "haven't", "hadn't"}
|
||||
|
||||
# Common words that follow "a" (nouns/adjectives)
|
||||
a_followers = {"lot", "few", "little", "bit", "good", "bad",
|
||||
"big", "small", "great", "new", "old", "long",
|
||||
"short", "man", "woman", "boy", "girl", "dog",
|
||||
"cat", "book", "car", "house", "day", "year",
|
||||
"nice", "beautiful", "large", "huge", "tiny"}
|
||||
|
||||
def disambiguate_a_I(token: str, next_word: str) -> str:
|
||||
"""Given an ambiguous 'a' or 'I' (or 'l'), pick the right one."""
|
||||
nw = next_word.lower()
|
||||
if nw in i_followers:
|
||||
return "I"
|
||||
if nw in a_followers:
|
||||
return "a"
|
||||
# Fallback: if next word is known verb → I, known adj/noun → a
|
||||
# For now, use a simple heuristic: lowercase → "a", uppercase first letter → "I"
|
||||
return token # no change if uncertain
|
||||
|
||||
cases = [
|
||||
("l", "am", "I"),
|
||||
("l", "was", "I"),
|
||||
("l", "think", "I"),
|
||||
("a", "book", "a"),
|
||||
("a", "cat", "a"),
|
||||
("a", "lot", "a"),
|
||||
("l", "big", "a"), # "a big ..."
|
||||
("a", "have", "I"), # "I have ..."
|
||||
]
|
||||
|
||||
results = []
|
||||
for token, next_word, expected in cases:
|
||||
result = disambiguate_a_I(token, next_word)
|
||||
ok = result == expected
|
||||
results.append((token, next_word, expected, result, ok))
|
||||
status = "OK" if ok else "MISS"
|
||||
print(f" {status}: '{token} {next_word}...' → '{result}' (expected '{expected}')")
|
||||
|
||||
correct = sum(1 for *_, ok in results if ok)
|
||||
total = len(results)
|
||||
print(f"\na/I disambiguation: {correct}/{total} correct ({100*correct/total:.0f}%)")
|
||||
|
||||
|
||||
class TestLangDetectLibrary:
|
||||
"""Test py3langid or langdetect if available."""
|
||||
|
||||
def test_py3langid(self):
|
||||
try:
|
||||
import langid
|
||||
except ImportError:
|
||||
pytest.skip("langid not installed")
|
||||
|
||||
sentences = [
|
||||
("I go to school every day", "en"),
|
||||
("Ich gehe jeden Tag zur Schule", "de"),
|
||||
("The weather is nice today", "en"),
|
||||
("Das Wetter ist heute schön", "de"),
|
||||
("She likes to play football", "en"),
|
||||
("Er spielt gerne Fußball", "de"),
|
||||
]
|
||||
|
||||
results = []
|
||||
for text, expected in sentences:
|
||||
lang, confidence = langid.classify(text)
|
||||
ok = lang == expected
|
||||
results.append(ok)
|
||||
status = "OK" if ok else "MISS"
|
||||
print(f" {status}: '{text[:40]}...' → {lang} ({confidence:.2f}) (expected {expected})")
|
||||
|
||||
correct = sum(results)
|
||||
print(f"\nlangid sentence detection: {correct}/{len(results)} correct")
|
||||
|
||||
def test_langid_single_words(self):
|
||||
"""langid on single words — expected to be unreliable."""
|
||||
try:
|
||||
import langid
|
||||
except ImportError:
|
||||
pytest.skip("langid not installed")
|
||||
|
||||
words = [("school", "en"), ("Schule", "de"), ("book", "en"),
|
||||
("Buch", "de"), ("car", "en"), ("Auto", "de"),
|
||||
("a", "en"), ("I", "en"), ("der", "de"), ("the", "en")]
|
||||
|
||||
results = []
|
||||
for word, expected in words:
|
||||
lang, conf = langid.classify(word)
|
||||
ok = lang == expected
|
||||
results.append(ok)
|
||||
status = "OK" if ok else "MISS"
|
||||
print(f" {status}: '{word}' → {lang} ({conf:.2f}) (expected {expected})")
|
||||
|
||||
correct = sum(results)
|
||||
print(f"\nlangid single-word: {correct}/{len(results)} correct")
|
||||
|
||||
|
||||
class TestIntegratedApproach:
|
||||
"""Test the combined approach: dict-heuristic for lang + spell correction."""
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
def setup(self):
|
||||
self.en, self.de = _load_pyspellchecker()
|
||||
|
||||
def detect_language(self, word: str) -> str:
|
||||
"""Dual-dict heuristic language detection."""
|
||||
w = word.lower()
|
||||
# Skip very short words — too ambiguous
|
||||
if len(w) <= 2:
|
||||
return "ambiguous"
|
||||
in_en = bool(self.en.known([w]))
|
||||
in_de = bool(self.de.known([w]))
|
||||
if in_en and in_de:
|
||||
return "both"
|
||||
if in_en:
|
||||
return "en"
|
||||
if in_de:
|
||||
return "de"
|
||||
return "unknown"
|
||||
|
||||
def correct_word(self, word: str, expected_lang: str) -> str:
|
||||
"""Correct a single word given the expected language."""
|
||||
w_lower = word.lower()
|
||||
spell = self.en if expected_lang == "en" else self.de
|
||||
|
||||
# Already known
|
||||
if spell.known([w_lower]):
|
||||
return word
|
||||
|
||||
# Also check the other language — might be fine
|
||||
other = self.de if expected_lang == "en" else self.en
|
||||
if other.known([w_lower]):
|
||||
return word # valid in the other language
|
||||
|
||||
# Try correction
|
||||
fix = spell.correction(w_lower)
|
||||
if fix and fix != w_lower:
|
||||
if word[0].isupper():
|
||||
fix = fix[0].upper() + fix[1:]
|
||||
return fix
|
||||
|
||||
return word
|
||||
|
||||
def test_full_pipeline(self):
|
||||
"""Test: detect language → correct with appropriate dict."""
|
||||
vocab_entries = [
|
||||
# (english_col, german_col, expected_en, expected_de)
|
||||
("beautful", "schön", "beautiful", "schön"),
|
||||
("school", "Schule", "school", "Schule"),
|
||||
("teh cat", "die Katze", "the cat", "die Katze"),
|
||||
("freind", "Freund", "friend", "Freund"),
|
||||
("homwork", "Hausaufgaben", "homework", "Hausaufgaben"),
|
||||
("Schuler", "Schuler", "Schuler", "Schüler"), # DE umlaut: Schüler
|
||||
]
|
||||
|
||||
en_correct = 0
|
||||
de_correct = 0
|
||||
total = len(vocab_entries)
|
||||
|
||||
for en_ocr, de_ocr, exp_en, exp_de in vocab_entries:
|
||||
# Correct each word in the column
|
||||
en_words = en_ocr.split()
|
||||
de_words = de_ocr.split()
|
||||
en_fixed = " ".join(self.correct_word(w, "en") for w in en_words)
|
||||
de_fixed = " ".join(self.correct_word(w, "de") for w in de_words)
|
||||
|
||||
en_ok = en_fixed == exp_en
|
||||
de_ok = de_fixed == exp_de
|
||||
en_correct += en_ok
|
||||
de_correct += de_ok
|
||||
|
||||
en_status = "OK" if en_ok else "MISS"
|
||||
de_status = "OK" if de_ok else "MISS"
|
||||
print(f" EN {en_status}: '{en_ocr}' → '{en_fixed}' (expected '{exp_en}')")
|
||||
print(f" DE {de_status}: '{de_ocr}' → '{de_fixed}' (expected '{exp_de}')")
|
||||
|
||||
print(f"\nEN corrections: {en_correct}/{total} correct")
|
||||
print(f"DE corrections: {de_correct}/{total} correct")
|
||||
57
studio-v2/app/vocab-worksheet/components/ExportTab.tsx
Normal file
57
studio-v2/app/vocab-worksheet/components/ExportTab.tsx
Normal file
@@ -0,0 +1,57 @@
|
||||
'use client'
|
||||
|
||||
import React from 'react'
|
||||
import type { VocabWorksheetHook } from '../types'
|
||||
|
||||
export function ExportTab({ h }: { h: VocabWorksheetHook }) {
|
||||
const { isDark, glassCard } = h
|
||||
|
||||
return (
|
||||
<div className={`${glassCard} rounded-2xl p-6`}>
|
||||
<h2 className={`text-lg font-semibold mb-4 ${isDark ? 'text-white' : 'text-slate-900'}`}>PDF herunterladen</h2>
|
||||
|
||||
{h.worksheetId ? (
|
||||
<div className="space-y-4">
|
||||
<div className={`p-4 rounded-xl ${isDark ? 'bg-green-500/20 border border-green-500/30' : 'bg-green-100 border border-green-200'}`}>
|
||||
<div className="flex items-center gap-3">
|
||||
<svg className="w-6 h-6 text-green-500" fill="none" stroke="currentColor" viewBox="0 0 24 24">
|
||||
<path strokeLinecap="round" strokeLinejoin="round" strokeWidth={2} d="M5 13l4 4L19 7" />
|
||||
</svg>
|
||||
<span className={`font-medium ${isDark ? 'text-green-200' : 'text-green-700'}`}>Arbeitsblatt erfolgreich generiert!</span>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div className="grid grid-cols-2 gap-4">
|
||||
<button onClick={() => h.downloadPDF('worksheet')} className={`${glassCard} p-6 rounded-xl text-left transition-all hover:shadow-lg ${isDark ? 'hover:border-purple-400/50' : 'hover:border-purple-500'}`}>
|
||||
<div className={`w-12 h-12 mb-3 rounded-xl flex items-center justify-center ${isDark ? 'bg-purple-500/30' : 'bg-purple-100'}`}>
|
||||
<svg className={`w-6 h-6 ${isDark ? 'text-purple-300' : 'text-purple-600'}`} fill="none" stroke="currentColor" viewBox="0 0 24 24">
|
||||
<path strokeLinecap="round" strokeLinejoin="round" strokeWidth={1.5} d="M12 10v6m0 0l-3-3m3 3l3-3m2 8H7a2 2 0 01-2-2V5a2 2 0 012-2h5.586a1 1 0 01.707.293l5.414 5.414a1 1 0 01.293.707V19a2 2 0 01-2 2z" />
|
||||
</svg>
|
||||
</div>
|
||||
<h3 className={`font-semibold mb-1 ${isDark ? 'text-white' : 'text-slate-900'}`}>Arbeitsblatt</h3>
|
||||
<p className={`text-sm ${isDark ? 'text-white/60' : 'text-slate-500'}`}>PDF zum Ausdrucken</p>
|
||||
</button>
|
||||
|
||||
{h.includeSolutions && (
|
||||
<button onClick={() => h.downloadPDF('solution')} className={`${glassCard} p-6 rounded-xl text-left transition-all hover:shadow-lg ${isDark ? 'hover:border-green-400/50' : 'hover:border-green-500'}`}>
|
||||
<div className={`w-12 h-12 mb-3 rounded-xl flex items-center justify-center ${isDark ? 'bg-green-500/30' : 'bg-green-100'}`}>
|
||||
<svg className={`w-6 h-6 ${isDark ? 'text-green-300' : 'text-green-600'}`} fill="none" stroke="currentColor" viewBox="0 0 24 24">
|
||||
<path strokeLinecap="round" strokeLinejoin="round" strokeWidth={1.5} d="M9 12l2 2 4-4m6 2a9 9 0 11-18 0 9 9 0 0118 0z" />
|
||||
</svg>
|
||||
</div>
|
||||
<h3 className={`font-semibold mb-1 ${isDark ? 'text-white' : 'text-slate-900'}`}>Loesungsblatt</h3>
|
||||
<p className={`text-sm ${isDark ? 'text-white/60' : 'text-slate-500'}`}>PDF mit Loesungen</p>
|
||||
</button>
|
||||
)}
|
||||
</div>
|
||||
|
||||
<button onClick={h.resetSession} className={`w-full py-3 rounded-xl border font-medium transition-colors ${isDark ? 'border-white/20 text-white/80 hover:bg-white/10' : 'border-slate-300 text-slate-700 hover:bg-slate-50'}`}>
|
||||
Neues Arbeitsblatt erstellen
|
||||
</button>
|
||||
</div>
|
||||
) : (
|
||||
<p className={`text-center py-12 ${isDark ? 'text-white/60' : 'text-slate-500'}`}>Noch kein Arbeitsblatt generiert.</p>
|
||||
)}
|
||||
</div>
|
||||
)
|
||||
}
|
||||
@@ -0,0 +1,39 @@
|
||||
'use client'
|
||||
|
||||
import React from 'react'
|
||||
import type { VocabWorksheetHook } from '../types'
|
||||
|
||||
export function FullscreenPreview({ h }: { h: VocabWorksheetHook }) {
|
||||
return (
|
||||
<div className="fixed inset-0 z-50 bg-black/80 backdrop-blur-sm flex items-center justify-center" onClick={() => h.setShowFullPreview(false)}>
|
||||
<button
|
||||
onClick={() => h.setShowFullPreview(false)}
|
||||
className="absolute top-4 right-4 p-2 rounded-full bg-white/10 hover:bg-white/20 text-white z-10 transition-colors"
|
||||
>
|
||||
<svg className="w-6 h-6" fill="none" stroke="currentColor" viewBox="0 0 24 24">
|
||||
<path strokeLinecap="round" strokeLinejoin="round" strokeWidth={2} d="M6 18L18 6M6 6l12 12" />
|
||||
</svg>
|
||||
</button>
|
||||
<div className="max-w-[95vw] max-h-[95vh] overflow-auto" onClick={(e) => e.stopPropagation()}>
|
||||
{h.directFile?.type.startsWith('image/') && h.directFilePreview && (
|
||||
<img src={h.directFilePreview} alt="Original" className="max-w-none" />
|
||||
)}
|
||||
{h.directFile?.type === 'application/pdf' && h.directFilePreview && (
|
||||
<iframe src={h.directFilePreview} className="border-0 rounded-xl bg-white" style={{ width: '90vw', height: '90vh' }} />
|
||||
)}
|
||||
{h.selectedMobileFile && !h.directFile && (
|
||||
h.selectedMobileFile.type.startsWith('image/')
|
||||
? <img src={h.selectedMobileFile.dataUrl} alt="Original" className="max-w-none" />
|
||||
: <iframe src={h.selectedMobileFile.dataUrl} className="border-0 rounded-xl bg-white" style={{ width: '90vw', height: '90vh' }} />
|
||||
)}
|
||||
{h.selectedDocumentId && !h.directFile && !h.selectedMobileFile && (() => {
|
||||
const doc = h.storedDocuments.find(d => d.id === h.selectedDocumentId)
|
||||
if (!doc?.url) return null
|
||||
return doc.type.startsWith('image/')
|
||||
? <img src={doc.url} alt="Original" className="max-w-none" />
|
||||
: <iframe src={doc.url} className="border-0 rounded-xl bg-white" style={{ width: '90vw', height: '90vh' }} />
|
||||
})()}
|
||||
</div>
|
||||
</div>
|
||||
)
|
||||
}
|
||||
135
studio-v2/app/vocab-worksheet/components/OcrComparisonModal.tsx
Normal file
135
studio-v2/app/vocab-worksheet/components/OcrComparisonModal.tsx
Normal file
@@ -0,0 +1,135 @@
|
||||
'use client'
|
||||
|
||||
import React from 'react'
|
||||
import type { VocabWorksheetHook } from '../types'
|
||||
|
||||
export function OcrComparisonModal({ h }: { h: VocabWorksheetHook }) {
|
||||
const { isDark, glassCard } = h
|
||||
|
||||
return (
|
||||
<div className="fixed inset-0 z-50 flex items-center justify-center p-4 bg-black/50 backdrop-blur-sm">
|
||||
<div className={`relative w-full max-w-6xl max-h-[90vh] overflow-auto rounded-3xl ${glassCard} p-6`}>
|
||||
{/* Header */}
|
||||
<div className="flex items-center justify-between mb-6">
|
||||
<div>
|
||||
<h2 className={`text-xl font-bold ${isDark ? 'text-white' : 'text-slate-900'}`}>
|
||||
OCR-Methoden Vergleich
|
||||
</h2>
|
||||
<p className={`text-sm ${isDark ? 'text-white/60' : 'text-slate-500'}`}>
|
||||
Seite {h.ocrComparePageIndex !== null ? h.ocrComparePageIndex + 1 : '-'}
|
||||
</p>
|
||||
</div>
|
||||
<button
|
||||
onClick={() => h.setShowOcrComparison(false)}
|
||||
className={`p-2 rounded-xl ${isDark ? 'hover:bg-white/10 text-white' : 'hover:bg-black/5 text-slate-500'}`}
|
||||
>
|
||||
<svg className="w-6 h-6" fill="none" stroke="currentColor" viewBox="0 0 24 24">
|
||||
<path strokeLinecap="round" strokeLinejoin="round" strokeWidth={2} d="M6 18L18 6M6 6l12 12" />
|
||||
</svg>
|
||||
</button>
|
||||
</div>
|
||||
|
||||
{/* Loading State */}
|
||||
{h.isComparingOcr && (
|
||||
<div className="flex flex-col items-center justify-center py-12">
|
||||
<div className="w-12 h-12 border-4 border-purple-500 border-t-transparent rounded-full animate-spin mb-4" />
|
||||
<p className={isDark ? 'text-white/60' : 'text-slate-500'}>
|
||||
Vergleiche OCR-Methoden... (kann 1-2 Minuten dauern)
|
||||
</p>
|
||||
</div>
|
||||
)}
|
||||
|
||||
{/* Error State */}
|
||||
{h.ocrCompareError && (
|
||||
<div className={`p-4 rounded-xl ${isDark ? 'bg-red-500/20 text-red-300' : 'bg-red-100 text-red-700'}`}>
|
||||
Fehler: {h.ocrCompareError}
|
||||
</div>
|
||||
)}
|
||||
|
||||
{/* Results */}
|
||||
{h.ocrCompareResult && !h.isComparingOcr && (
|
||||
<div className="space-y-6">
|
||||
{/* Method Results Grid */}
|
||||
<div className="grid grid-cols-1 md:grid-cols-2 lg:grid-cols-3 gap-4">
|
||||
{Object.entries(h.ocrCompareResult.methods || {}).map(([key, method]: [string, any]) => (
|
||||
<div
|
||||
key={key}
|
||||
className={`p-4 rounded-2xl ${
|
||||
h.ocrCompareResult.recommendation?.best_method === key
|
||||
? (isDark ? 'bg-green-500/20 border border-green-500/50' : 'bg-green-100 border border-green-300')
|
||||
: (isDark ? 'bg-white/5 border border-white/10' : 'bg-white/50 border border-black/10')
|
||||
}`}
|
||||
>
|
||||
<div className="flex items-center justify-between mb-3">
|
||||
<h3 className={`font-semibold ${isDark ? 'text-white' : 'text-slate-900'}`}>
|
||||
{method.name}
|
||||
</h3>
|
||||
{h.ocrCompareResult.recommendation?.best_method === key && (
|
||||
<span className="px-2 py-1 text-xs font-medium bg-green-500 text-white rounded-full">
|
||||
Beste
|
||||
</span>
|
||||
)}
|
||||
</div>
|
||||
|
||||
{method.success ? (
|
||||
<>
|
||||
<div className={`text-sm mb-2 ${isDark ? 'text-white/60' : 'text-slate-500'}`}>
|
||||
<span className="font-medium">{method.vocabulary_count}</span> Vokabeln in <span className="font-medium">{method.duration_seconds}s</span>
|
||||
</div>
|
||||
|
||||
{method.vocabulary && method.vocabulary.length > 0 && (
|
||||
<div className={`max-h-48 overflow-y-auto rounded-xl p-2 ${isDark ? 'bg-black/20' : 'bg-white/50'}`}>
|
||||
{method.vocabulary.slice(0, 10).map((v: any, idx: number) => (
|
||||
<div key={idx} className={`text-sm py-1 border-b last:border-0 ${isDark ? 'border-white/10 text-white/80' : 'border-black/5 text-slate-700'}`}>
|
||||
<span className="font-medium">{v.english}</span> = {v.german}
|
||||
</div>
|
||||
))}
|
||||
{method.vocabulary.length > 10 && (
|
||||
<div className={`text-xs mt-2 ${isDark ? 'text-white/40' : 'text-slate-400'}`}>
|
||||
+ {method.vocabulary.length - 10} weitere...
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
)}
|
||||
</>
|
||||
) : (
|
||||
<div className={`text-sm ${isDark ? 'text-red-300' : 'text-red-600'}`}>
|
||||
{method.error || 'Fehler'}
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
))}
|
||||
</div>
|
||||
|
||||
{/* Comparison Summary */}
|
||||
{h.ocrCompareResult.comparison && (
|
||||
<div className={`p-4 rounded-2xl ${isDark ? 'bg-blue-500/20 border border-blue-500/30' : 'bg-blue-100 border border-blue-200'}`}>
|
||||
<h3 className={`font-semibold mb-3 ${isDark ? 'text-blue-300' : 'text-blue-900'}`}>
|
||||
Uebereinstimmung
|
||||
</h3>
|
||||
<div className="grid grid-cols-2 md:grid-cols-4 gap-4 text-sm">
|
||||
<div>
|
||||
<span className={isDark ? 'text-blue-200' : 'text-blue-700'}>Von allen erkannt:</span>
|
||||
<span className="ml-2 font-bold">{h.ocrCompareResult.comparison.found_by_all_methods?.length || 0}</span>
|
||||
</div>
|
||||
<div>
|
||||
<span className={isDark ? 'text-blue-200' : 'text-blue-700'}>Nur teilweise:</span>
|
||||
<span className="ml-2 font-bold">{h.ocrCompareResult.comparison.found_by_some_methods?.length || 0}</span>
|
||||
</div>
|
||||
<div>
|
||||
<span className={isDark ? 'text-blue-200' : 'text-blue-700'}>Gesamt einzigartig:</span>
|
||||
<span className="ml-2 font-bold">{h.ocrCompareResult.comparison.total_unique_vocabulary || 0}</span>
|
||||
</div>
|
||||
<div>
|
||||
<span className={isDark ? 'text-blue-200' : 'text-blue-700'}>Uebereinstimmung:</span>
|
||||
<span className="ml-2 font-bold">{Math.round((h.ocrCompareResult.comparison.agreement_rate || 0) * 100)}%</span>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
</div>
|
||||
)
|
||||
}
|
||||
125
studio-v2/app/vocab-worksheet/components/OcrSettingsPanel.tsx
Normal file
125
studio-v2/app/vocab-worksheet/components/OcrSettingsPanel.tsx
Normal file
@@ -0,0 +1,125 @@
|
||||
'use client'
|
||||
|
||||
import React from 'react'
|
||||
import type { VocabWorksheetHook } from '../types'
|
||||
import { defaultOcrPrompts } from '../constants'
|
||||
|
||||
export function OcrSettingsPanel({ h }: { h: VocabWorksheetHook }) {
|
||||
const { isDark, glassCard, glassInput } = h
|
||||
|
||||
return (
|
||||
<div className={`${glassCard} rounded-2xl p-6 mb-6`}>
|
||||
<div className="flex items-center justify-between mb-4">
|
||||
<h2 className={`text-lg font-semibold ${isDark ? 'text-white' : 'text-slate-900'}`}>
|
||||
OCR-Filter Einstellungen
|
||||
</h2>
|
||||
<button
|
||||
onClick={() => h.setShowSettings(false)}
|
||||
className={`p-1 rounded-lg ${isDark ? 'hover:bg-white/10 text-white/60' : 'hover:bg-black/5 text-slate-500'}`}
|
||||
>
|
||||
<svg className="w-5 h-5" fill="none" stroke="currentColor" viewBox="0 0 24 24">
|
||||
<path strokeLinecap="round" strokeLinejoin="round" strokeWidth={2} d="M6 18L18 6M6 6l12 12" />
|
||||
</svg>
|
||||
</button>
|
||||
</div>
|
||||
|
||||
<div className={`p-4 rounded-xl mb-4 ${isDark ? 'bg-blue-500/20 text-blue-200' : 'bg-blue-100 text-blue-800'}`}>
|
||||
<p className="text-sm">
|
||||
Diese Einstellungen helfen, unerwuenschte Elemente wie Seitenzahlen, Kapitelnamen oder Kopfzeilen aus dem OCR-Ergebnis zu filtern.
|
||||
</p>
|
||||
</div>
|
||||
|
||||
<div className="grid grid-cols-1 md:grid-cols-2 gap-6">
|
||||
{/* Checkboxes */}
|
||||
<div className="space-y-3">
|
||||
<label className={`flex items-center gap-3 cursor-pointer ${isDark ? 'text-white' : 'text-slate-900'}`}>
|
||||
<input
|
||||
type="checkbox"
|
||||
checked={h.ocrPrompts.filterHeaders}
|
||||
onChange={(e) => h.saveOcrPrompts({ ...h.ocrPrompts, filterHeaders: e.target.checked })}
|
||||
className="w-5 h-5 rounded border-2 border-purple-500 text-purple-500 focus:ring-purple-500"
|
||||
/>
|
||||
<span>Kopfzeilen filtern (z.B. Kapitelnamen)</span>
|
||||
</label>
|
||||
|
||||
<label className={`flex items-center gap-3 cursor-pointer ${isDark ? 'text-white' : 'text-slate-900'}`}>
|
||||
<input
|
||||
type="checkbox"
|
||||
checked={h.ocrPrompts.filterFooters}
|
||||
onChange={(e) => h.saveOcrPrompts({ ...h.ocrPrompts, filterFooters: e.target.checked })}
|
||||
className="w-5 h-5 rounded border-2 border-purple-500 text-purple-500 focus:ring-purple-500"
|
||||
/>
|
||||
<span>Fusszeilen filtern</span>
|
||||
</label>
|
||||
|
||||
<label className={`flex items-center gap-3 cursor-pointer ${isDark ? 'text-white' : 'text-slate-900'}`}>
|
||||
<input
|
||||
type="checkbox"
|
||||
checked={h.ocrPrompts.filterPageNumbers}
|
||||
onChange={(e) => h.saveOcrPrompts({ ...h.ocrPrompts, filterPageNumbers: e.target.checked })}
|
||||
className="w-5 h-5 rounded border-2 border-purple-500 text-purple-500 focus:ring-purple-500"
|
||||
/>
|
||||
<span>Seitenzahlen filtern (auch ausgeschrieben: "zweihundertzwoelf")</span>
|
||||
</label>
|
||||
</div>
|
||||
|
||||
{/* Patterns */}
|
||||
<div className="space-y-4">
|
||||
<div>
|
||||
<label className={`block text-sm font-medium mb-2 ${isDark ? 'text-white/70' : 'text-slate-600'}`}>
|
||||
Kopfzeilen-Muster (kommagetrennt)
|
||||
</label>
|
||||
<input
|
||||
type="text"
|
||||
value={h.ocrPrompts.headerPatterns.join(', ')}
|
||||
onChange={(e) => h.saveOcrPrompts({
|
||||
...h.ocrPrompts,
|
||||
headerPatterns: e.target.value.split(',').map(s => s.trim()).filter(Boolean)
|
||||
})}
|
||||
placeholder="Unit, Chapter, Lesson..."
|
||||
className={`w-full px-4 py-2 rounded-xl border ${glassInput} focus:outline-none focus:ring-2 focus:ring-purple-500`}
|
||||
/>
|
||||
</div>
|
||||
|
||||
<div>
|
||||
<label className={`block text-sm font-medium mb-2 ${isDark ? 'text-white/70' : 'text-slate-600'}`}>
|
||||
Fusszeilen-Muster (kommagetrennt)
|
||||
</label>
|
||||
<input
|
||||
type="text"
|
||||
value={h.ocrPrompts.footerPatterns.join(', ')}
|
||||
onChange={(e) => h.saveOcrPrompts({
|
||||
...h.ocrPrompts,
|
||||
footerPatterns: e.target.value.split(',').map(s => s.trim()).filter(Boolean)
|
||||
})}
|
||||
placeholder="zweihundert, Page, Seite..."
|
||||
className={`w-full px-4 py-2 rounded-xl border ${glassInput} focus:outline-none focus:ring-2 focus:ring-purple-500`}
|
||||
/>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div className="mt-4">
|
||||
<label className={`block text-sm font-medium mb-2 ${isDark ? 'text-white/70' : 'text-slate-600'}`}>
|
||||
Zusaetzlicher Filter-Prompt (optional)
|
||||
</label>
|
||||
<textarea
|
||||
value={h.ocrPrompts.customFilter}
|
||||
onChange={(e) => h.saveOcrPrompts({ ...h.ocrPrompts, customFilter: e.target.value })}
|
||||
placeholder="z.B.: Ignoriere alle Zeilen, die nur Zahlen oder Buchstaben enthalten..."
|
||||
rows={2}
|
||||
className={`w-full px-4 py-2 rounded-xl border ${glassInput} focus:outline-none focus:ring-2 focus:ring-purple-500 resize-none`}
|
||||
/>
|
||||
</div>
|
||||
|
||||
<div className="mt-4 flex justify-end">
|
||||
<button
|
||||
onClick={() => h.saveOcrPrompts(defaultOcrPrompts)}
|
||||
className={`px-4 py-2 rounded-xl text-sm ${isDark ? 'text-white/60 hover:text-white' : 'text-slate-500 hover:text-slate-700'}`}
|
||||
>
|
||||
Auf Standard zuruecksetzen
|
||||
</button>
|
||||
</div>
|
||||
</div>
|
||||
)
|
||||
}
|
||||
108
studio-v2/app/vocab-worksheet/components/PageSelection.tsx
Normal file
108
studio-v2/app/vocab-worksheet/components/PageSelection.tsx
Normal file
@@ -0,0 +1,108 @@
|
||||
'use client'
|
||||
|
||||
import React from 'react'
|
||||
import type { VocabWorksheetHook } from '../types'
|
||||
|
||||
export function PageSelection({ h }: { h: VocabWorksheetHook }) {
|
||||
const { isDark, glassCard } = h
|
||||
|
||||
return (
|
||||
<div className={`${glassCard} rounded-2xl p-6`}>
|
||||
<div className="flex items-center justify-between mb-4">
|
||||
<h2 className={`text-lg font-semibold ${isDark ? 'text-white' : 'text-slate-900'}`}>
|
||||
PDF-Seiten auswaehlen ({h.selectedPages.length} von {h.pdfPageCount - h.excludedPages.length} ausgewaehlt)
|
||||
</h2>
|
||||
<div className="flex gap-2">
|
||||
{h.excludedPages.length > 0 && (
|
||||
<button onClick={h.restoreExcludedPages} className={`px-3 py-1 rounded-lg text-sm ${isDark ? 'bg-orange-500/20 text-orange-300 hover:bg-orange-500/30' : 'bg-orange-100 text-orange-700 hover:bg-orange-200'}`}>
|
||||
{h.excludedPages.length} ausgeblendet - wiederherstellen
|
||||
</button>
|
||||
)}
|
||||
<button onClick={h.selectAllPages} className={`px-3 py-1 rounded-lg text-sm transition-colors ${isDark ? 'bg-white/10 hover:bg-white/20 text-white' : 'bg-slate-100 hover:bg-slate-200 text-slate-900'}`}>
|
||||
Alle
|
||||
</button>
|
||||
<button onClick={h.selectNoPages} className={`px-3 py-1 rounded-lg text-sm transition-colors ${isDark ? 'bg-white/10 hover:bg-white/20 text-white' : 'bg-slate-100 hover:bg-slate-200 text-slate-900'}`}>
|
||||
Keine
|
||||
</button>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<p className={`text-sm mb-4 ${isDark ? 'text-white/60' : 'text-slate-500'}`}>
|
||||
Klicken Sie auf eine Seite um sie auszuwaehlen. Klicken Sie auf das X um leere Seiten auszublenden.
|
||||
</p>
|
||||
|
||||
{h.isLoadingThumbnails ? (
|
||||
<div className="flex items-center justify-center py-12">
|
||||
<div className="w-8 h-8 border-4 border-purple-500 border-t-transparent rounded-full animate-spin" />
|
||||
<span className={`ml-3 ${isDark ? 'text-white/60' : 'text-slate-500'}`}>Lade Seitenvorschau...</span>
|
||||
</div>
|
||||
) : (
|
||||
<div className="grid grid-cols-2 sm:grid-cols-3 md:grid-cols-4 lg:grid-cols-6 gap-4 mb-6">
|
||||
{h.pagesThumbnails.map((thumb, idx) => {
|
||||
if (h.excludedPages.includes(idx)) return null
|
||||
return (
|
||||
<div key={idx} className="relative group">
|
||||
{/* Exclude/Delete Button */}
|
||||
<button
|
||||
onClick={(e) => h.excludePage(idx, e)}
|
||||
className="absolute top-1 left-1 z-10 p-1 rounded-full opacity-0 group-hover:opacity-100 transition-opacity bg-red-500/80 hover:bg-red-600 text-white"
|
||||
title="Seite ausblenden"
|
||||
>
|
||||
<svg className="w-3 h-3" fill="none" stroke="currentColor" viewBox="0 0 24 24">
|
||||
<path strokeLinecap="round" strokeLinejoin="round" strokeWidth={2} d="M6 18L18 6M6 6l12 12" />
|
||||
</svg>
|
||||
</button>
|
||||
|
||||
{/* OCR Compare Button */}
|
||||
<button
|
||||
onClick={(e) => { e.stopPropagation(); h.runOcrComparison(idx); }}
|
||||
className="absolute top-1 right-1 z-10 p-1 rounded-full opacity-0 group-hover:opacity-100 transition-opacity bg-blue-500/80 hover:bg-blue-600 text-white"
|
||||
title="OCR-Methoden vergleichen"
|
||||
>
|
||||
<svg className="w-3 h-3" fill="none" stroke="currentColor" viewBox="0 0 24 24">
|
||||
<path strokeLinecap="round" strokeLinejoin="round" strokeWidth={2} d="M9 19v-6a2 2 0 00-2-2H5a2 2 0 00-2 2v6a2 2 0 002 2h2a2 2 0 002-2zm0 0V9a2 2 0 012-2h2a2 2 0 012 2v10m-6 0a2 2 0 002 2h2a2 2 0 002-2m0 0V5a2 2 0 012-2h2a2 2 0 012 2v14a2 2 0 01-2 2h-2a2 2 0 01-2-2z" />
|
||||
</svg>
|
||||
</button>
|
||||
|
||||
<button
|
||||
onClick={() => h.togglePageSelection(idx)}
|
||||
className={`relative rounded-xl overflow-hidden border-2 transition-all w-full ${
|
||||
h.selectedPages.includes(idx)
|
||||
? 'border-purple-500 ring-2 ring-purple-500/50'
|
||||
: (isDark ? 'border-white/20 hover:border-white/40' : 'border-slate-200 hover:border-slate-300')
|
||||
}`}
|
||||
>
|
||||
<img src={thumb} alt={`Seite ${idx + 1}`} className="w-full h-auto" />
|
||||
<div className={`absolute bottom-0 left-0 right-0 py-1 text-center text-xs font-medium ${
|
||||
h.selectedPages.includes(idx)
|
||||
? 'bg-purple-500 text-white'
|
||||
: (isDark ? 'bg-black/60 text-white/80' : 'bg-white/90 text-slate-700')
|
||||
}`}>
|
||||
Seite {idx + 1}
|
||||
</div>
|
||||
{h.selectedPages.includes(idx) && (
|
||||
<div className="absolute top-2 right-2 w-6 h-6 bg-purple-500 rounded-full flex items-center justify-center">
|
||||
<svg className="w-4 h-4 text-white" fill="none" stroke="currentColor" viewBox="0 0 24 24">
|
||||
<path strokeLinecap="round" strokeLinejoin="round" strokeWidth={2} d="M5 13l4 4L19 7" />
|
||||
</svg>
|
||||
</div>
|
||||
)}
|
||||
</button>
|
||||
</div>
|
||||
)
|
||||
})}
|
||||
</div>
|
||||
)}
|
||||
|
||||
<div className="flex justify-center">
|
||||
<button
|
||||
onClick={h.processSelectedPages}
|
||||
disabled={h.selectedPages.length === 0 || h.isExtracting}
|
||||
className="px-8 py-4 bg-gradient-to-r from-purple-500 to-pink-500 text-white rounded-2xl font-semibold disabled:opacity-50 hover:shadow-xl hover:shadow-purple-500/30 transition-all transform hover:scale-105"
|
||||
>
|
||||
{h.isExtracting ? 'Extrahiere Vokabeln...' : `${h.selectedPages.length} Seiten verarbeiten`}
|
||||
</button>
|
||||
</div>
|
||||
</div>
|
||||
)
|
||||
}
|
||||
31
studio-v2/app/vocab-worksheet/components/QRCodeModal.tsx
Normal file
31
studio-v2/app/vocab-worksheet/components/QRCodeModal.tsx
Normal file
@@ -0,0 +1,31 @@
|
||||
'use client'
|
||||
|
||||
import React from 'react'
|
||||
import { QRCodeUpload } from '@/components/QRCodeUpload'
|
||||
import type { VocabWorksheetHook } from '../types'
|
||||
|
||||
export function QRCodeModal({ h }: { h: VocabWorksheetHook }) {
|
||||
const { isDark } = h
|
||||
|
||||
return (
|
||||
<div className="fixed inset-0 z-50 flex items-center justify-center p-4">
|
||||
<div className="absolute inset-0 bg-black/50 backdrop-blur-sm" onClick={() => h.setShowQRModal(false)} />
|
||||
<div className={`relative w-full max-w-md rounded-3xl ${
|
||||
isDark ? 'bg-slate-900' : 'bg-white'
|
||||
}`}>
|
||||
<QRCodeUpload
|
||||
sessionId={h.uploadSessionId}
|
||||
onClose={() => h.setShowQRModal(false)}
|
||||
onFilesChanged={(files) => {
|
||||
h.setMobileUploadedFiles(files)
|
||||
if (files.length > 0) {
|
||||
h.setSelectedMobileFile(files[files.length - 1])
|
||||
h.setDirectFile(null)
|
||||
h.setSelectedDocumentId(null)
|
||||
}
|
||||
}}
|
||||
/>
|
||||
</div>
|
||||
</div>
|
||||
)
|
||||
}
|
||||
315
studio-v2/app/vocab-worksheet/components/UploadScreen.tsx
Normal file
315
studio-v2/app/vocab-worksheet/components/UploadScreen.tsx
Normal file
@@ -0,0 +1,315 @@
|
||||
'use client'
|
||||
|
||||
import React from 'react'
|
||||
import type { VocabWorksheetHook } from '../types'
|
||||
import { formatFileSize } from '../constants'
|
||||
|
||||
export function UploadScreen({ h }: { h: VocabWorksheetHook }) {
|
||||
const { isDark, glassCard, glassInput } = h
|
||||
|
||||
return (
|
||||
<div className="space-y-6">
|
||||
{/* Existing Sessions */}
|
||||
{h.existingSessions.length > 0 && (
|
||||
<div className={`${glassCard} rounded-2xl p-6`}>
|
||||
<h2 className={`text-lg font-semibold mb-4 ${isDark ? 'text-white' : 'text-slate-900'}`}>
|
||||
Vorhandene Sessions fortsetzen
|
||||
</h2>
|
||||
{h.isLoadingSessions ? (
|
||||
<div className="flex items-center gap-3 py-4">
|
||||
<div className="w-5 h-5 border-2 border-purple-500 border-t-transparent rounded-full animate-spin" />
|
||||
<span className={isDark ? 'text-white/60' : 'text-slate-500'}>Lade Sessions...</span>
|
||||
</div>
|
||||
) : (
|
||||
<div className="grid grid-cols-1 md:grid-cols-2 lg:grid-cols-3 gap-4">
|
||||
{h.existingSessions.map((s) => (
|
||||
<div
|
||||
key={s.id}
|
||||
className={`${glassCard} p-4 rounded-xl text-left transition-all hover:shadow-lg relative group cursor-pointer ${
|
||||
isDark ? 'hover:border-purple-400/50' : 'hover:border-purple-400'
|
||||
}`}
|
||||
onClick={() => h.resumeSession(s)}
|
||||
>
|
||||
{/* Delete Button */}
|
||||
<button
|
||||
onClick={(e) => h.deleteSession(s.id, e)}
|
||||
className={`absolute top-2 right-2 p-1.5 rounded-lg opacity-0 group-hover:opacity-100 transition-opacity ${
|
||||
isDark ? 'hover:bg-red-500/20 text-red-400' : 'hover:bg-red-100 text-red-500'
|
||||
}`}
|
||||
title="Session loeschen"
|
||||
>
|
||||
<svg className="w-4 h-4" fill="none" stroke="currentColor" viewBox="0 0 24 24">
|
||||
<path strokeLinecap="round" strokeLinejoin="round" strokeWidth={2} d="M19 7l-.867 12.142A2 2 0 0116.138 21H7.862a2 2 0 01-1.995-1.858L5 7m5 4v6m4-6v6m1-10V4a1 1 0 00-1-1h-4a1 1 0 00-1 1v3M4 7h16" />
|
||||
</svg>
|
||||
</button>
|
||||
|
||||
<div className="flex items-start gap-3">
|
||||
<div className={`w-10 h-10 rounded-lg flex items-center justify-center flex-shrink-0 ${
|
||||
s.status === 'extracted' || s.status === 'completed'
|
||||
? (isDark ? 'bg-green-500/30' : 'bg-green-100')
|
||||
: (isDark ? 'bg-white/10' : 'bg-slate-100')
|
||||
}`}>
|
||||
{s.status === 'extracted' || s.status === 'completed' ? (
|
||||
<svg className="w-5 h-5 text-green-500" fill="none" stroke="currentColor" viewBox="0 0 24 24">
|
||||
<path strokeLinecap="round" strokeLinejoin="round" strokeWidth={2} d="M5 13l4 4L19 7" />
|
||||
</svg>
|
||||
) : (
|
||||
<svg className={`w-5 h-5 ${isDark ? 'text-white/40' : 'text-slate-400'}`} fill="none" stroke="currentColor" viewBox="0 0 24 24">
|
||||
<path strokeLinecap="round" strokeLinejoin="round" strokeWidth={2} d="M12 6v6m0 0v6m0-6h6m-6 0H6" />
|
||||
</svg>
|
||||
)}
|
||||
</div>
|
||||
<div className="flex-1 min-w-0">
|
||||
<h3 className={`font-medium truncate ${isDark ? 'text-white' : 'text-slate-900'}`}>{s.name}</h3>
|
||||
<p className={`text-sm ${isDark ? 'text-white/60' : 'text-slate-500'}`}>
|
||||
{s.vocabulary_count} Vokabeln
|
||||
{s.status === 'pending' && ' • Nicht gestartet'}
|
||||
{s.status === 'extracted' && ' • Bereit'}
|
||||
{s.status === 'completed' && ' • Abgeschlossen'}
|
||||
</p>
|
||||
{s.created_at && (
|
||||
<p className={`text-xs mt-1 ${isDark ? 'text-white/40' : 'text-slate-400'}`}>
|
||||
{new Date(s.created_at).toLocaleDateString('de-DE', {
|
||||
day: '2-digit',
|
||||
month: '2-digit',
|
||||
year: 'numeric',
|
||||
hour: '2-digit',
|
||||
minute: '2-digit'
|
||||
})}
|
||||
</p>
|
||||
)}
|
||||
</div>
|
||||
<svg className={`w-5 h-5 flex-shrink-0 ${isDark ? 'text-white/30' : 'text-slate-300'}`} fill="none" stroke="currentColor" viewBox="0 0 24 24">
|
||||
<path strokeLinecap="round" strokeLinejoin="round" strokeWidth={2} d="M9 5l7 7-7 7" />
|
||||
</svg>
|
||||
</div>
|
||||
</div>
|
||||
))}
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
)}
|
||||
|
||||
{/* Explanation */}
|
||||
<div className={`${glassCard} rounded-2xl p-6 ${isDark ? 'bg-gradient-to-br from-purple-500/20 to-pink-500/20' : 'bg-gradient-to-br from-purple-100/50 to-pink-100/50'}`}>
|
||||
<h2 className={`text-lg font-semibold mb-3 ${isDark ? 'text-white' : 'text-slate-900'}`}>
|
||||
{h.existingSessions.length > 0 ? 'Oder neue Session starten:' : 'So funktioniert es:'}
|
||||
</h2>
|
||||
<ol className={`space-y-2 ${isDark ? 'text-white/70' : 'text-slate-600'}`}>
|
||||
{['Dokument (Bild oder PDF) auswaehlen', 'Vorschau pruefen und Session benennen', 'Bei PDFs: Seiten auswaehlen die verarbeitet werden sollen', 'KI extrahiert Vokabeln — pruefen, korrigieren, Arbeitsblatt-Typ waehlen', 'PDF herunterladen und ausdrucken'].map((text, i) => (
|
||||
<li key={i} className="flex items-start gap-2">
|
||||
<span className={`w-6 h-6 rounded-full flex items-center justify-center text-xs font-bold flex-shrink-0 ${isDark ? 'bg-purple-500/30 text-purple-300' : 'bg-purple-200 text-purple-700'}`}>{i + 1}</span>
|
||||
<span>{text}</span>
|
||||
</li>
|
||||
))}
|
||||
</ol>
|
||||
</div>
|
||||
|
||||
{/* Step 1: Document Selection */}
|
||||
<div className={`${glassCard} rounded-2xl p-6`}>
|
||||
<h2 className={`text-lg font-semibold mb-4 ${isDark ? 'text-white' : 'text-slate-900'}`}>
|
||||
1. Dokument auswaehlen
|
||||
</h2>
|
||||
|
||||
<input ref={h.directFileInputRef} type="file" accept="image/png,image/jpeg,image/jpg,application/pdf" onChange={h.handleDirectFileSelect} className="hidden" />
|
||||
|
||||
<div className="grid grid-cols-2 gap-3 mb-4">
|
||||
{/* File Upload Button */}
|
||||
<button
|
||||
onClick={() => h.directFileInputRef.current?.click()}
|
||||
className={`p-4 rounded-xl border-2 border-dashed transition-all ${
|
||||
h.directFile
|
||||
? (isDark ? 'border-green-400/50 bg-green-500/20' : 'border-green-500 bg-green-50')
|
||||
: (isDark ? 'border-white/20 hover:border-purple-400/50' : 'border-slate-300 hover:border-purple-500')
|
||||
}`}
|
||||
>
|
||||
{h.directFile ? (
|
||||
<div className="flex items-center gap-3">
|
||||
<span className="text-2xl">{h.directFile.type === 'application/pdf' ? '📄' : '🖼️'}</span>
|
||||
<div className="text-left flex-1 min-w-0">
|
||||
<p className={`font-medium truncate ${isDark ? 'text-white' : 'text-slate-900'}`}>{h.directFile.name}</p>
|
||||
<p className={`text-xs ${isDark ? 'text-white/60' : 'text-slate-500'}`}>{formatFileSize(h.directFile.size)}</p>
|
||||
</div>
|
||||
<svg className="w-5 h-5 text-green-500 flex-shrink-0" fill="none" stroke="currentColor" viewBox="0 0 24 24">
|
||||
<path strokeLinecap="round" strokeLinejoin="round" strokeWidth={2} d="M5 13l4 4L19 7" />
|
||||
</svg>
|
||||
</div>
|
||||
) : (
|
||||
<div className={`text-center ${isDark ? 'text-white/60' : 'text-slate-500'}`}>
|
||||
<span className="text-2xl block mb-1">📁</span>
|
||||
<span className="text-sm">Datei auswaehlen</span>
|
||||
</div>
|
||||
)}
|
||||
</button>
|
||||
|
||||
{/* QR Code Upload Button */}
|
||||
<button
|
||||
onClick={() => h.setShowQRModal(true)}
|
||||
className={`p-4 rounded-xl border-2 border-dashed transition-all ${
|
||||
h.selectedMobileFile
|
||||
? (isDark ? 'border-green-400/50 bg-green-500/20' : 'border-green-500 bg-green-50')
|
||||
: (isDark ? 'border-white/20 hover:border-purple-400/50' : 'border-slate-300 hover:border-purple-500')
|
||||
}`}
|
||||
>
|
||||
{h.selectedMobileFile ? (
|
||||
<div className="flex items-center gap-3">
|
||||
<span className="text-2xl">{h.selectedMobileFile.type.startsWith('image/') ? '🖼️' : '📄'}</span>
|
||||
<div className="text-left flex-1 min-w-0">
|
||||
<p className={`font-medium truncate text-sm ${isDark ? 'text-white' : 'text-slate-900'}`}>{h.selectedMobileFile.name}</p>
|
||||
<p className={`text-xs ${isDark ? 'text-white/60' : 'text-slate-500'}`}>vom Handy</p>
|
||||
</div>
|
||||
<svg className="w-5 h-5 text-green-500 flex-shrink-0" fill="none" stroke="currentColor" viewBox="0 0 24 24">
|
||||
<path strokeLinecap="round" strokeLinejoin="round" strokeWidth={2} d="M5 13l4 4L19 7" />
|
||||
</svg>
|
||||
</div>
|
||||
) : (
|
||||
<div className={`text-center ${isDark ? 'text-white/60' : 'text-slate-500'}`}>
|
||||
<span className="text-2xl block mb-1">📱</span>
|
||||
<span className="text-sm">Mit Handy scannen</span>
|
||||
</div>
|
||||
)}
|
||||
</button>
|
||||
</div>
|
||||
|
||||
{/* Mobile Uploaded Files */}
|
||||
{h.mobileUploadedFiles.length > 0 && !h.directFile && (
|
||||
<>
|
||||
<div className={`text-center text-sm mb-3 ${isDark ? 'text-white/40' : 'text-slate-400'}`}>— Vom Handy hochgeladen —</div>
|
||||
<div className="space-y-2 max-h-32 overflow-y-auto mb-4">
|
||||
{h.mobileUploadedFiles.map((file) => (
|
||||
<button
|
||||
key={file.id}
|
||||
onClick={() => { h.setSelectedMobileFile(file); h.setDirectFile(null); h.setSelectedDocumentId(null); h.setError(null) }}
|
||||
className={`w-full flex items-center gap-3 p-3 rounded-xl text-left transition-all ${
|
||||
h.selectedMobileFile?.id === file.id
|
||||
? (isDark ? 'bg-green-500/30 border-2 border-green-400/50' : 'bg-green-100 border-2 border-green-500')
|
||||
: (isDark ? 'bg-white/5 border-2 border-transparent hover:border-white/20' : 'bg-slate-50 border-2 border-transparent hover:border-slate-200')
|
||||
}`}
|
||||
>
|
||||
<span className="text-xl">{file.type.startsWith('image/') ? '🖼️' : '📄'}</span>
|
||||
<div className="flex-1 min-w-0">
|
||||
<p className={`font-medium truncate ${isDark ? 'text-white' : 'text-slate-900'}`}>{file.name}</p>
|
||||
<p className={`text-xs ${isDark ? 'text-white/60' : 'text-slate-500'}`}>{formatFileSize(file.size)}</p>
|
||||
</div>
|
||||
{h.selectedMobileFile?.id === file.id && (
|
||||
<svg className="w-5 h-5 text-green-500" fill="none" stroke="currentColor" viewBox="0 0 24 24">
|
||||
<path strokeLinecap="round" strokeLinejoin="round" strokeWidth={2} d="M5 13l4 4L19 7" />
|
||||
</svg>
|
||||
)}
|
||||
</button>
|
||||
))}
|
||||
</div>
|
||||
</>
|
||||
)}
|
||||
|
||||
{/* Stored Documents */}
|
||||
{h.storedDocuments.length > 0 && !h.directFile && !h.selectedMobileFile && (
|
||||
<>
|
||||
<div className={`text-center text-sm mb-3 ${isDark ? 'text-white/40' : 'text-slate-400'}`}>— oder aus Ihren Dokumenten —</div>
|
||||
<div className="space-y-2 max-h-32 overflow-y-auto">
|
||||
{h.storedDocuments.map((doc) => (
|
||||
<button
|
||||
key={doc.id}
|
||||
onClick={() => { h.setSelectedDocumentId(doc.id); h.setDirectFile(null); h.setSelectedMobileFile(null); h.setError(null) }}
|
||||
className={`w-full flex items-center gap-3 p-3 rounded-xl text-left transition-all ${
|
||||
h.selectedDocumentId === doc.id
|
||||
? (isDark ? 'bg-purple-500/30 border-2 border-purple-400/50' : 'bg-purple-100 border-2 border-purple-500')
|
||||
: (isDark ? 'bg-white/5 border-2 border-transparent hover:border-white/20' : 'bg-slate-50 border-2 border-transparent hover:border-slate-200')
|
||||
}`}
|
||||
>
|
||||
<span className="text-xl">{doc.type === 'application/pdf' ? '📄' : '🖼️'}</span>
|
||||
<div className="flex-1 min-w-0">
|
||||
<p className={`font-medium truncate ${isDark ? 'text-white' : 'text-slate-900'}`}>{doc.name}</p>
|
||||
<p className={`text-xs ${isDark ? 'text-white/60' : 'text-slate-500'}`}>{formatFileSize(doc.size)}</p>
|
||||
</div>
|
||||
{h.selectedDocumentId === doc.id && (
|
||||
<svg className="w-5 h-5 text-purple-500" fill="none" stroke="currentColor" viewBox="0 0 24 24">
|
||||
<path strokeLinecap="round" strokeLinejoin="round" strokeWidth={2} d="M5 13l4 4L19 7" />
|
||||
</svg>
|
||||
)}
|
||||
</button>
|
||||
))}
|
||||
</div>
|
||||
</>
|
||||
)}
|
||||
</div>
|
||||
|
||||
{/* Step 2: Preview + Session Name */}
|
||||
{(h.directFile || h.selectedMobileFile || h.selectedDocumentId) && (
|
||||
<div className="grid grid-cols-1 lg:grid-cols-5 gap-6">
|
||||
{/* Document Preview */}
|
||||
<div className={`${glassCard} rounded-2xl p-6 lg:col-span-3`}>
|
||||
<div className="flex items-center justify-between mb-4">
|
||||
<h2 className={`text-lg font-semibold ${isDark ? 'text-white' : 'text-slate-900'}`}>
|
||||
Vorschau
|
||||
</h2>
|
||||
<button
|
||||
onClick={() => h.setShowFullPreview(true)}
|
||||
className={`px-3 py-1.5 rounded-lg text-sm font-medium transition-all flex items-center gap-2 ${
|
||||
isDark ? 'bg-white/10 hover:bg-white/20 text-white' : 'bg-slate-100 hover:bg-slate-200 text-slate-700'
|
||||
}`}
|
||||
>
|
||||
<svg className="w-4 h-4" fill="none" stroke="currentColor" viewBox="0 0 24 24">
|
||||
<path strokeLinecap="round" strokeLinejoin="round" strokeWidth={2} d="M21 21l-6-6m2-5a7 7 0 11-14 0 7 7 0 0114 0zM10 7v3m0 0v3m0-3h3m-3 0H7" />
|
||||
</svg>
|
||||
Originalgroesse
|
||||
</button>
|
||||
</div>
|
||||
<div className={`max-h-[60vh] overflow-auto rounded-xl border ${isDark ? 'border-white/10' : 'border-black/10'}`}>
|
||||
{h.directFile?.type.startsWith('image/') && h.directFilePreview && (
|
||||
<img src={h.directFilePreview} alt="Vorschau" className="w-full h-auto" />
|
||||
)}
|
||||
{h.directFile?.type === 'application/pdf' && h.directFilePreview && (
|
||||
<iframe src={h.directFilePreview} className="w-full border-0 rounded-xl" style={{ height: '60vh' }} />
|
||||
)}
|
||||
{h.selectedMobileFile && !h.directFile && (
|
||||
h.selectedMobileFile.type.startsWith('image/')
|
||||
? <img src={h.selectedMobileFile.dataUrl} alt="Vorschau" className="w-full h-auto" />
|
||||
: <iframe src={h.selectedMobileFile.dataUrl} className="w-full border-0 rounded-xl" style={{ height: '60vh' }} />
|
||||
)}
|
||||
{h.selectedDocumentId && !h.directFile && !h.selectedMobileFile && (() => {
|
||||
const doc = h.storedDocuments.find(d => d.id === h.selectedDocumentId)
|
||||
if (!doc?.url) return <p className={`p-8 text-center ${isDark ? 'text-white/40' : 'text-slate-400'}`}>Keine Vorschau verfuegbar</p>
|
||||
return doc.type.startsWith('image/')
|
||||
? <img src={doc.url} alt="Vorschau" className="w-full h-auto" />
|
||||
: <iframe src={doc.url} className="w-full border-0 rounded-xl" style={{ height: '60vh' }} />
|
||||
})()}
|
||||
</div>
|
||||
</div>
|
||||
|
||||
{/* Session Name + Start */}
|
||||
<div className={`${glassCard} rounded-2xl p-6 lg:col-span-2 flex flex-col`}>
|
||||
<h2 className={`text-lg font-semibold mb-4 ${isDark ? 'text-white' : 'text-slate-900'}`}>
|
||||
2. Session benennen
|
||||
</h2>
|
||||
<input
|
||||
type="text"
|
||||
value={h.sessionName}
|
||||
onChange={(e) => { h.setSessionName(e.target.value); h.setError(null) }}
|
||||
placeholder="z.B. Englisch Klasse 7 - Unit 3"
|
||||
className={`w-full px-4 py-3 rounded-xl border ${glassInput} focus:outline-none focus:ring-2 focus:ring-purple-500 mb-4`}
|
||||
autoFocus
|
||||
/>
|
||||
<p className={`text-sm mb-6 ${isDark ? 'text-white/50' : 'text-slate-500'}`}>
|
||||
Benennen Sie die Session z.B. nach dem Schulbuch-Kapitel, damit Sie sie spaeter wiederfinden.
|
||||
</p>
|
||||
<div className="flex-1" />
|
||||
<button
|
||||
onClick={() => {
|
||||
if (!h.sessionName.trim()) {
|
||||
h.setError('Bitte geben Sie einen Session-Namen ein (z.B. "Englisch Klasse 7 - Unit 3")')
|
||||
return
|
||||
}
|
||||
h.startSession()
|
||||
}}
|
||||
disabled={h.isCreatingSession || !h.sessionName.trim()}
|
||||
className="w-full px-6 py-4 bg-gradient-to-r from-purple-500 to-pink-500 text-white rounded-2xl font-semibold text-lg disabled:opacity-50 hover:shadow-xl hover:shadow-purple-500/30 transition-all transform hover:scale-105"
|
||||
>
|
||||
{h.isCreatingSession ? 'Verarbeite...' : 'Weiter →'}
|
||||
</button>
|
||||
</div>
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
)
|
||||
}
|
||||
305
studio-v2/app/vocab-worksheet/components/VocabularyTab.tsx
Normal file
305
studio-v2/app/vocab-worksheet/components/VocabularyTab.tsx
Normal file
@@ -0,0 +1,305 @@
|
||||
'use client'
|
||||
|
||||
import React from 'react'
|
||||
import type { VocabWorksheetHook, IpaMode, SyllableMode } from '../types'
|
||||
import { getApiBase } from '../constants'
|
||||
|
||||
export function VocabularyTab({ h }: { h: VocabWorksheetHook }) {
|
||||
const { isDark, glassCard, glassInput } = h
|
||||
const extras = h.getAllExtraColumns()
|
||||
const baseCols = 3 + extras.length
|
||||
const gridCols = `14px 32px 36px repeat(${baseCols}, 1fr) 32px`
|
||||
|
||||
return (
|
||||
<div className="flex flex-col lg:flex-row gap-4" style={{ height: 'calc(100vh - 240px)', minHeight: '500px' }}>
|
||||
{/* Left: Original pages */}
|
||||
<div className={`${glassCard} rounded-2xl p-4 lg:w-1/3 flex flex-col overflow-hidden`}>
|
||||
<h2 className={`text-sm font-semibold mb-3 flex-shrink-0 ${isDark ? 'text-white/70' : 'text-slate-600'}`}>
|
||||
Original ({(() => { const pp = h.selectedPages.length > 0 ? h.selectedPages : [...new Set(h.vocabulary.map(v => (v.source_page || 1) - 1))]; return pp.length; })()} Seiten)
|
||||
</h2>
|
||||
<div className="flex-1 overflow-y-auto space-y-3">
|
||||
{(() => {
|
||||
const processedPageIndices = h.selectedPages.length > 0
|
||||
? h.selectedPages
|
||||
: [...new Set(h.vocabulary.map(v => (v.source_page || 1) - 1))].sort((a, b) => a - b)
|
||||
|
||||
const apiBase = getApiBase()
|
||||
const pagesToShow = processedPageIndices
|
||||
.filter(idx => idx >= 0)
|
||||
.map(idx => ({
|
||||
idx,
|
||||
src: h.session ? `${apiBase}/api/v1/vocab/sessions/${h.session.id}/pdf-page-image/${idx}` : null,
|
||||
}))
|
||||
.filter(t => t.src !== null) as { idx: number; src: string }[]
|
||||
|
||||
if (pagesToShow.length > 0) {
|
||||
return pagesToShow.map(({ idx, src }) => (
|
||||
<div key={idx} className={`relative rounded-xl overflow-hidden border ${isDark ? 'border-white/10' : 'border-black/10'}`}>
|
||||
<div className={`absolute top-2 left-2 px-2 py-0.5 rounded-lg text-xs font-medium z-10 ${isDark ? 'bg-black/60 text-white' : 'bg-white/90 text-slate-700'}`}>
|
||||
S. {idx + 1}
|
||||
</div>
|
||||
<img src={src} alt={`Seite ${idx + 1}`} className="w-full h-auto" />
|
||||
</div>
|
||||
))
|
||||
}
|
||||
if (h.uploadedImage) {
|
||||
return (
|
||||
<div className={`relative rounded-xl overflow-hidden border ${isDark ? 'border-white/10' : 'border-black/10'}`}>
|
||||
<img src={h.uploadedImage} alt="Arbeitsblatt" className="w-full h-auto" />
|
||||
</div>
|
||||
)
|
||||
}
|
||||
return (
|
||||
<div className={`flex-1 flex items-center justify-center py-12 ${isDark ? 'text-white/40' : 'text-slate-400'}`}>
|
||||
<div className="text-center">
|
||||
<svg className="w-12 h-12 mx-auto mb-2 opacity-50" fill="none" stroke="currentColor" viewBox="0 0 24 24">
|
||||
<path strokeLinecap="round" strokeLinejoin="round" strokeWidth={1.5} d="M4 16l4.586-4.586a2 2 0 012.828 0L16 16m-2-2l1.586-1.586a2 2 0 012.828 0L20 14m-6-6h.01M6 20h12a2 2 0 002-2V6a2 2 0 00-2-2H6a2 2 0 00-2 2v12a2 2 0 002 2z" />
|
||||
</svg>
|
||||
<p className="text-xs">Kein Bild verfuegbar</p>
|
||||
</div>
|
||||
</div>
|
||||
)
|
||||
})()}
|
||||
</div>
|
||||
</div>
|
||||
|
||||
{/* Right: Vocabulary table */}
|
||||
<div className={`${glassCard} rounded-2xl p-4 lg:w-2/3 flex flex-col overflow-hidden`}>
|
||||
<div className="flex items-center justify-between mb-3 flex-shrink-0">
|
||||
<h2 className={`text-lg font-semibold ${isDark ? 'text-white' : 'text-slate-900'}`}>
|
||||
Vokabeln ({h.vocabulary.length})
|
||||
</h2>
|
||||
<div className="flex items-center gap-2">
|
||||
{/* IPA mode */}
|
||||
<select
|
||||
value={h.ipaMode}
|
||||
onChange={(e) => {
|
||||
const newIpa = e.target.value as IpaMode
|
||||
h.setIpaMode(newIpa)
|
||||
h.reprocessPages(newIpa, h.syllableMode)
|
||||
}}
|
||||
className={`px-2 py-1.5 text-xs rounded-md border ${isDark ? 'border-white/20 bg-white/10 text-white' : 'border-gray-200 bg-white text-gray-600'}`}
|
||||
title="Lautschrift (IPA)"
|
||||
>
|
||||
<option value="none">IPA: Aus</option>
|
||||
<option value="auto">IPA: Auto</option>
|
||||
<option value="en">IPA: nur EN</option>
|
||||
<option value="de">IPA: nur DE</option>
|
||||
<option value="all">IPA: Alle</option>
|
||||
</select>
|
||||
{/* Syllable mode */}
|
||||
<select
|
||||
value={h.syllableMode}
|
||||
onChange={(e) => {
|
||||
const newSyl = e.target.value as SyllableMode
|
||||
h.setSyllableMode(newSyl)
|
||||
h.reprocessPages(h.ipaMode, newSyl)
|
||||
}}
|
||||
className={`px-2 py-1.5 text-xs rounded-md border ${isDark ? 'border-white/20 bg-white/10 text-white' : 'border-gray-200 bg-white text-gray-600'}`}
|
||||
title="Silbentrennung"
|
||||
>
|
||||
<option value="none">Silben: Aus</option>
|
||||
<option value="auto">Silben: Original</option>
|
||||
<option value="en">Silben: nur EN</option>
|
||||
<option value="de">Silben: nur DE</option>
|
||||
<option value="all">Silben: Alle</option>
|
||||
</select>
|
||||
<button onClick={h.saveVocabulary} className={`px-4 py-2 rounded-xl text-sm font-medium transition-colors ${isDark ? 'bg-white/10 hover:bg-white/20 text-white' : 'bg-slate-100 hover:bg-slate-200 text-slate-900'}`}>
|
||||
Speichern
|
||||
</button>
|
||||
<button onClick={() => h.setActiveTab('worksheet')} className="px-4 py-2 rounded-xl text-sm font-medium bg-gradient-to-r from-purple-500 to-pink-500 text-white hover:shadow-lg transition-all">
|
||||
Weiter →
|
||||
</button>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
{/* Error messages for failed pages */}
|
||||
{h.processingErrors.length > 0 && (
|
||||
<div className={`rounded-xl p-3 mb-3 flex-shrink-0 ${isDark ? 'bg-orange-500/20 text-orange-200 border border-orange-500/30' : 'bg-orange-100 text-orange-700 border border-orange-200'}`}>
|
||||
<div className="font-medium mb-1 text-sm">Einige Seiten konnten nicht verarbeitet werden:</div>
|
||||
<ul className="text-xs space-y-0.5">
|
||||
{h.processingErrors.map((err, idx) => (
|
||||
<li key={idx}>• {err}</li>
|
||||
))}
|
||||
</ul>
|
||||
</div>
|
||||
)}
|
||||
|
||||
{/* Processing Progress */}
|
||||
{h.currentlyProcessingPage && (
|
||||
<div className={`rounded-xl p-3 mb-3 flex-shrink-0 ${isDark ? 'bg-purple-500/20 border border-purple-500/30' : 'bg-purple-100 border border-purple-200'}`}>
|
||||
<div className="flex items-center gap-3">
|
||||
<div className={`w-4 h-4 border-2 ${isDark ? 'border-purple-300' : 'border-purple-600'} border-t-transparent rounded-full animate-spin`} />
|
||||
<div>
|
||||
<div className={`text-sm font-medium ${isDark ? 'text-purple-200' : 'text-purple-700'}`}>Verarbeite Seite {h.currentlyProcessingPage}...</div>
|
||||
<div className={`text-xs ${isDark ? 'text-purple-300/70' : 'text-purple-600'}`}>
|
||||
{h.successfulPages.length > 0 && `${h.successfulPages.length} Seite(n) fertig • `}
|
||||
{h.vocabulary.length} Vokabeln bisher
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
)}
|
||||
|
||||
{/* Success info */}
|
||||
{!h.currentlyProcessingPage && h.successfulPages.length > 0 && h.failedPages.length === 0 && (
|
||||
<div className={`rounded-xl p-2 mb-3 text-xs flex-shrink-0 ${isDark ? 'bg-green-500/20 text-green-200 border border-green-500/30' : 'bg-green-100 text-green-700 border border-green-200'}`}>
|
||||
Alle {h.successfulPages.length} Seite(n) erfolgreich verarbeitet - {h.vocabulary.length} Vokabeln insgesamt
|
||||
</div>
|
||||
)}
|
||||
|
||||
{/* Partial success info */}
|
||||
{!h.currentlyProcessingPage && h.successfulPages.length > 0 && h.failedPages.length > 0 && (
|
||||
<div className={`rounded-xl p-2 mb-3 text-xs flex-shrink-0 ${isDark ? 'bg-yellow-500/20 text-yellow-200 border border-yellow-500/30' : 'bg-yellow-100 text-yellow-700 border border-yellow-200'}`}>
|
||||
{h.successfulPages.length} Seite(n) erfolgreich, {h.failedPages.length} fehlgeschlagen - {h.vocabulary.length} Vokabeln extrahiert
|
||||
</div>
|
||||
)}
|
||||
|
||||
{h.vocabulary.length === 0 ? (
|
||||
<p className={`text-center py-8 ${isDark ? 'text-white/60' : 'text-slate-500'}`}>Keine Vokabeln gefunden.</p>
|
||||
) : (
|
||||
<div className="flex flex-col flex-1 overflow-hidden">
|
||||
{/* Fixed Header */}
|
||||
<div className={`flex-shrink-0 grid gap-1 px-2 py-2 text-sm font-medium border-b items-center ${isDark ? 'border-white/10 text-white/60' : 'border-black/10 text-slate-500'}`} style={{ gridTemplateColumns: gridCols }}>
|
||||
<div>{/* insert-triangle spacer */}</div>
|
||||
<div className="flex items-center justify-center">
|
||||
<input
|
||||
type="checkbox"
|
||||
checked={h.vocabulary.length > 0 && h.vocabulary.every(v => v.selected)}
|
||||
onChange={h.toggleAllSelection}
|
||||
className="w-4 h-4 rounded border-gray-300 text-purple-600 focus:ring-purple-500 cursor-pointer"
|
||||
title="Alle auswaehlen"
|
||||
/>
|
||||
</div>
|
||||
<div>S.</div>
|
||||
<div>Englisch</div>
|
||||
<div>Deutsch</div>
|
||||
<div>Beispiel</div>
|
||||
{extras.map(col => (
|
||||
<div key={col.key} className="flex items-center gap-1 group">
|
||||
<span className="truncate">{col.label}</span>
|
||||
<button
|
||||
onClick={() => {
|
||||
const page = Object.entries(h.pageExtraColumns).find(([, cols]) => cols.some(c => c.key === col.key))
|
||||
if (page) h.removeExtraColumn(Number(page[0]), col.key)
|
||||
}}
|
||||
className={`opacity-0 group-hover:opacity-100 transition-opacity ${isDark ? 'text-red-400 hover:text-red-300' : 'text-red-500 hover:text-red-600'}`}
|
||||
title="Spalte entfernen"
|
||||
>
|
||||
<svg className="w-3 h-3" fill="none" stroke="currentColor" viewBox="0 0 24 24"><path strokeLinecap="round" strokeLinejoin="round" strokeWidth={2} d="M6 18L18 6M6 6l12 12" /></svg>
|
||||
</button>
|
||||
</div>
|
||||
))}
|
||||
<div className="flex items-center justify-center">
|
||||
<button
|
||||
onClick={() => h.addExtraColumn(0)}
|
||||
className={`p-0.5 rounded transition-colors ${isDark ? 'hover:bg-white/10 text-white/40 hover:text-white/70' : 'hover:bg-slate-200 text-slate-400 hover:text-slate-600'}`}
|
||||
title="Spalte hinzufuegen"
|
||||
>
|
||||
<svg className="w-4 h-4" fill="none" stroke="currentColor" viewBox="0 0 24 24"><path strokeLinecap="round" strokeLinejoin="round" strokeWidth={2} d="M12 4v16m8-8H4" /></svg>
|
||||
</button>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
{/* Scrollable Content */}
|
||||
<div className="flex-1 overflow-y-auto">
|
||||
{h.vocabulary.map((entry, index) => (
|
||||
<React.Fragment key={entry.id}>
|
||||
<div className={`grid gap-1 px-2 py-1 items-center ${isDark ? 'hover:bg-white/5' : 'hover:bg-black/5'}`} style={{ gridTemplateColumns: gridCols }}>
|
||||
<button
|
||||
onClick={() => h.addVocabularyEntry(index)}
|
||||
className={`w-3.5 h-3.5 flex items-center justify-center opacity-0 hover:opacity-100 transition-opacity ${isDark ? 'text-purple-400' : 'text-purple-500'}`}
|
||||
title="Zeile einfuegen"
|
||||
>
|
||||
<svg className="w-2.5 h-2.5" viewBox="0 0 10 10" fill="currentColor"><polygon points="0,0 10,5 0,10" /></svg>
|
||||
</button>
|
||||
<div className="flex items-center justify-center">
|
||||
<input
|
||||
type="checkbox"
|
||||
checked={entry.selected || false}
|
||||
onChange={() => h.toggleVocabularySelection(entry.id)}
|
||||
className="w-4 h-4 rounded border-gray-300 text-purple-600 focus:ring-purple-500 cursor-pointer"
|
||||
/>
|
||||
</div>
|
||||
<div className={`flex items-center justify-center text-xs font-medium rounded ${isDark ? 'bg-white/10 text-white/60' : 'bg-black/10 text-slate-600'}`}>
|
||||
{entry.source_page || '-'}
|
||||
</div>
|
||||
<input
|
||||
type="text"
|
||||
value={entry.english}
|
||||
onChange={(e) => h.updateVocabularyEntry(entry.id, 'english', e.target.value)}
|
||||
className={`px-2 py-1 rounded-lg border text-sm min-w-0 ${glassInput} focus:outline-none focus:ring-1 focus:ring-purple-500`}
|
||||
/>
|
||||
<input
|
||||
type="text"
|
||||
value={entry.german}
|
||||
onChange={(e) => h.updateVocabularyEntry(entry.id, 'german', e.target.value)}
|
||||
className={`px-2 py-1 rounded-lg border text-sm min-w-0 ${glassInput} focus:outline-none focus:ring-1 focus:ring-purple-500`}
|
||||
/>
|
||||
<input
|
||||
type="text"
|
||||
value={entry.example_sentence || ''}
|
||||
onChange={(e) => h.updateVocabularyEntry(entry.id, 'example_sentence', e.target.value)}
|
||||
placeholder="Beispiel"
|
||||
className={`px-2 py-1 rounded-lg border text-sm min-w-0 ${glassInput} focus:outline-none focus:ring-1 focus:ring-purple-500`}
|
||||
/>
|
||||
{extras.map(col => (
|
||||
<input
|
||||
key={col.key}
|
||||
type="text"
|
||||
value={(entry.extras && entry.extras[col.key]) || ''}
|
||||
onChange={(e) => h.updateVocabularyEntry(entry.id, col.key, e.target.value)}
|
||||
placeholder={col.label}
|
||||
className={`px-2 py-1 rounded-lg border text-sm min-w-0 ${glassInput} focus:outline-none focus:ring-1 focus:ring-purple-500`}
|
||||
/>
|
||||
))}
|
||||
<button onClick={() => h.deleteVocabularyEntry(entry.id)} className={`p-1 rounded-lg ${isDark ? 'hover:bg-red-500/20 text-red-400' : 'hover:bg-red-100 text-red-500'}`}>
|
||||
<svg className="w-4 h-4" fill="none" stroke="currentColor" viewBox="0 0 24 24">
|
||||
<path strokeLinecap="round" strokeLinejoin="round" strokeWidth={2} d="M19 7l-.867 12.142A2 2 0 0116.138 21H7.862a2 2 0 01-1.995-1.858L5 7m5 4v6m4-6v6m1-10V4a1 1 0 00-1-1h-4a1 1 0 00-1 1v3M4 7h16" />
|
||||
</svg>
|
||||
</button>
|
||||
</div>
|
||||
</React.Fragment>
|
||||
))}
|
||||
{/* Final insert triangle */}
|
||||
<div className="px-2 py-1">
|
||||
<button
|
||||
onClick={() => h.addVocabularyEntry()}
|
||||
className={`w-3.5 h-3.5 flex items-center justify-center opacity-30 hover:opacity-100 transition-opacity ${isDark ? 'text-purple-400' : 'text-purple-500'}`}
|
||||
title="Zeile am Ende einfuegen"
|
||||
>
|
||||
<svg className="w-2.5 h-2.5" viewBox="0 0 10 10" fill="currentColor"><polygon points="0,0 10,5 0,10" /></svg>
|
||||
</button>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
{/* Footer */}
|
||||
<div className={`flex-shrink-0 pt-2 border-t flex items-center justify-between text-xs ${isDark ? 'border-white/10 text-white/50' : 'border-black/10 text-slate-400'}`}>
|
||||
<span>
|
||||
{h.vocabulary.length} Vokabeln
|
||||
{h.vocabulary.filter(v => v.selected).length > 0 && ` (${h.vocabulary.filter(v => v.selected).length} ausgewaehlt)`}
|
||||
{(() => {
|
||||
const pages = [...new Set(h.vocabulary.map(v => v.source_page).filter(Boolean))].sort((a, b) => (a || 0) - (b || 0))
|
||||
return pages.length > 1 ? ` • Seiten: ${pages.join(', ')}` : ''
|
||||
})()}
|
||||
</span>
|
||||
<button
|
||||
onClick={() => h.addVocabularyEntry()}
|
||||
className={`px-3 py-1 rounded-lg text-xs flex items-center gap-1 transition-colors ${
|
||||
isDark
|
||||
? 'bg-white/10 hover:bg-white/20 text-white/70'
|
||||
: 'bg-slate-100 hover:bg-slate-200 text-slate-600'
|
||||
}`}
|
||||
>
|
||||
<svg className="w-3 h-3" fill="none" stroke="currentColor" viewBox="0 0 24 24">
|
||||
<path strokeLinecap="round" strokeLinejoin="round" strokeWidth={2} d="M12 4v16m8-8H4" />
|
||||
</svg>
|
||||
Zeile
|
||||
</button>
|
||||
</div>
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
</div>
|
||||
)
|
||||
}
|
||||
155
studio-v2/app/vocab-worksheet/components/WorksheetTab.tsx
Normal file
155
studio-v2/app/vocab-worksheet/components/WorksheetTab.tsx
Normal file
@@ -0,0 +1,155 @@
|
||||
'use client'
|
||||
|
||||
import React from 'react'
|
||||
import type { VocabWorksheetHook } from '../types'
|
||||
import { worksheetFormats, worksheetTypes } from '../constants'
|
||||
|
||||
export function WorksheetTab({ h }: { h: VocabWorksheetHook }) {
|
||||
const { isDark, glassCard, glassInput } = h
|
||||
|
||||
return (
|
||||
<div className={`${glassCard} rounded-2xl p-6`}>
|
||||
{/* Step 1: Format Selection */}
|
||||
<div className="mb-8">
|
||||
<h2 className={`text-lg font-semibold mb-4 ${isDark ? 'text-white' : 'text-slate-900'}`}>
|
||||
1. Vorlage waehlen
|
||||
</h2>
|
||||
<div className="grid grid-cols-2 gap-4">
|
||||
{worksheetFormats.map((format) => (
|
||||
<button
|
||||
key={format.id}
|
||||
onClick={() => h.setSelectedFormat(format.id)}
|
||||
className={`p-5 rounded-xl border text-left transition-all ${
|
||||
h.selectedFormat === format.id
|
||||
? (isDark ? 'border-purple-400/50 bg-purple-500/20 ring-2 ring-purple-500/50' : 'border-purple-500 bg-purple-50 ring-2 ring-purple-500/30')
|
||||
: (isDark ? 'border-white/20 hover:border-white/40' : 'border-slate-200 hover:border-slate-300')
|
||||
}`}
|
||||
>
|
||||
<div className="flex items-start gap-3">
|
||||
<div className={`w-10 h-10 rounded-lg flex items-center justify-center shrink-0 ${
|
||||
h.selectedFormat === format.id
|
||||
? (isDark ? 'bg-purple-500/30' : 'bg-purple-200')
|
||||
: (isDark ? 'bg-white/10' : 'bg-slate-100')
|
||||
}`}>
|
||||
{format.id === 'standard' ? (
|
||||
<svg className={`w-5 h-5 ${h.selectedFormat === format.id ? 'text-purple-400' : (isDark ? 'text-white/60' : 'text-slate-500')}`} fill="none" stroke="currentColor" viewBox="0 0 24 24">
|
||||
<path strokeLinecap="round" strokeLinejoin="round" strokeWidth={1.5} d="M9 12h6m-6 4h6m2 5H7a2 2 0 01-2-2V5a2 2 0 012-2h5.586a1 1 0 01.707.293l5.414 5.414a1 1 0 01.293.707V19a2 2 0 01-2 2z" />
|
||||
</svg>
|
||||
) : (
|
||||
<svg className={`w-5 h-5 ${h.selectedFormat === format.id ? 'text-purple-400' : (isDark ? 'text-white/60' : 'text-slate-500')}`} fill="none" stroke="currentColor" viewBox="0 0 24 24">
|
||||
<path strokeLinecap="round" strokeLinejoin="round" strokeWidth={1.5} d="M4 5a1 1 0 011-1h14a1 1 0 011 1v2a1 1 0 01-1 1H5a1 1 0 01-1-1V5zM4 13a1 1 0 011-1h6a1 1 0 011 1v6a1 1 0 01-1 1H5a1 1 0 01-1-1v-6zM16 13a1 1 0 011-1h2a1 1 0 011 1v6a1 1 0 01-1 1h-2a1 1 0 01-1-1v-6z" />
|
||||
</svg>
|
||||
)}
|
||||
</div>
|
||||
<div className="flex-1">
|
||||
<div className="flex items-center justify-between">
|
||||
<span className={`font-medium ${isDark ? 'text-white' : 'text-slate-900'}`}>{format.label}</span>
|
||||
{h.selectedFormat === format.id && (
|
||||
<svg className="w-5 h-5 text-purple-500" fill="none" stroke="currentColor" viewBox="0 0 24 24">
|
||||
<path strokeLinecap="round" strokeLinejoin="round" strokeWidth={2} d="M5 13l4 4L19 7" />
|
||||
</svg>
|
||||
)}
|
||||
</div>
|
||||
<p className={`text-sm mt-1 ${isDark ? 'text-white/60' : 'text-slate-500'}`}>{format.description}</p>
|
||||
</div>
|
||||
</div>
|
||||
</button>
|
||||
))}
|
||||
</div>
|
||||
</div>
|
||||
|
||||
{/* Step 2: Configuration */}
|
||||
<div className="mb-6">
|
||||
<h2 className={`text-lg font-semibold mb-4 ${isDark ? 'text-white' : 'text-slate-900'}`}>
|
||||
2. Arbeitsblatt konfigurieren
|
||||
</h2>
|
||||
|
||||
{/* Title */}
|
||||
<div className="mb-6">
|
||||
<label className={`block text-sm font-medium mb-2 ${isDark ? 'text-white/60' : 'text-slate-500'}`}>Titel</label>
|
||||
<input
|
||||
type="text"
|
||||
value={h.worksheetTitle}
|
||||
onChange={(e) => h.setWorksheetTitle(e.target.value)}
|
||||
placeholder="z.B. Vokabeln Unit 3"
|
||||
className={`w-full px-4 py-3 rounded-xl border ${glassInput} focus:outline-none focus:ring-2 focus:ring-purple-500`}
|
||||
/>
|
||||
</div>
|
||||
|
||||
{/* Standard format options */}
|
||||
{h.selectedFormat === 'standard' && (
|
||||
<>
|
||||
<div className="mb-6">
|
||||
<label className={`block text-sm font-medium mb-3 ${isDark ? 'text-white/60' : 'text-slate-500'}`}>Arbeitsblatt-Typen</label>
|
||||
<div className="grid grid-cols-2 gap-3">
|
||||
{worksheetTypes.map((type) => (
|
||||
<button
|
||||
key={type.id}
|
||||
onClick={() => h.toggleWorksheetType(type.id)}
|
||||
className={`p-4 rounded-xl border text-left transition-all ${
|
||||
h.selectedTypes.includes(type.id)
|
||||
? (isDark ? 'border-purple-400/50 bg-purple-500/20' : 'border-purple-500 bg-purple-50')
|
||||
: (isDark ? 'border-white/20 hover:border-white/40' : 'border-slate-200 hover:border-slate-300')
|
||||
}`}
|
||||
>
|
||||
<div className="flex items-center justify-between">
|
||||
<span className={`font-medium ${isDark ? 'text-white' : 'text-slate-900'}`}>{type.label}</span>
|
||||
{h.selectedTypes.includes(type.id) && <svg className="w-5 h-5 text-purple-500" fill="none" stroke="currentColor" viewBox="0 0 24 24"><path strokeLinecap="round" strokeLinejoin="round" strokeWidth={2} d="M5 13l4 4L19 7" /></svg>}
|
||||
</div>
|
||||
<p className={`text-sm mt-1 ${isDark ? 'text-white/60' : 'text-slate-500'}`}>{type.description}</p>
|
||||
</button>
|
||||
))}
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div className="grid grid-cols-2 gap-6 mb-6">
|
||||
<div>
|
||||
<label className={`block text-sm font-medium mb-2 ${isDark ? 'text-white/60' : 'text-slate-500'}`}>Zeilenhoehe</label>
|
||||
<select value={h.lineHeight} onChange={(e) => h.setLineHeight(e.target.value)} className={`w-full px-4 py-3 rounded-xl border ${glassInput} focus:outline-none focus:ring-2 focus:ring-purple-500`}>
|
||||
<option value="normal">Normal</option>
|
||||
<option value="large">Gross</option>
|
||||
<option value="extra-large">Extra gross</option>
|
||||
</select>
|
||||
</div>
|
||||
<div className="flex items-center">
|
||||
<label className={`flex items-center gap-3 cursor-pointer ${isDark ? 'text-white' : 'text-slate-900'}`}>
|
||||
<input type="checkbox" checked={h.includeSolutions} onChange={(e) => h.setIncludeSolutions(e.target.checked)} className="w-5 h-5 rounded border-2 border-purple-500 text-purple-500 focus:ring-purple-500" />
|
||||
<span>Loesungsblatt erstellen</span>
|
||||
</label>
|
||||
</div>
|
||||
</div>
|
||||
</>
|
||||
)}
|
||||
|
||||
{/* NRU format options */}
|
||||
{h.selectedFormat === 'nru' && (
|
||||
<div className="space-y-4">
|
||||
<div className={`p-4 rounded-xl ${isDark ? 'bg-indigo-500/20 border border-indigo-500/30' : 'bg-indigo-50 border border-indigo-200'}`}>
|
||||
<h4 className={`font-medium mb-2 ${isDark ? 'text-indigo-200' : 'text-indigo-700'}`}>NRU-Format Uebersicht:</h4>
|
||||
<ul className={`text-sm space-y-1 ${isDark ? 'text-indigo-200/80' : 'text-indigo-600'}`}>
|
||||
<li>• <strong>Vokabeln:</strong> 3-Spalten-Tabelle (Englisch | Deutsch leer | Korrektur leer)</li>
|
||||
<li>• <strong>Lernsaetze:</strong> Deutscher Satz + 2 leere Zeilen fuer englische Uebersetzung</li>
|
||||
<li>• Pro gescannter Seite werden 2 Arbeitsblatt-Seiten erzeugt</li>
|
||||
</ul>
|
||||
</div>
|
||||
|
||||
<div className="flex items-center">
|
||||
<label className={`flex items-center gap-3 cursor-pointer ${isDark ? 'text-white' : 'text-slate-900'}`}>
|
||||
<input type="checkbox" checked={h.includeSolutions} onChange={(e) => h.setIncludeSolutions(e.target.checked)} className="w-5 h-5 rounded border-2 border-purple-500 text-purple-500 focus:ring-purple-500" />
|
||||
<span>Loesungsblatt erstellen (mit deutschen Uebersetzungen)</span>
|
||||
</label>
|
||||
</div>
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
|
||||
<button
|
||||
onClick={h.generateWorksheet}
|
||||
disabled={(h.selectedFormat === 'standard' && h.selectedTypes.length === 0) || h.isGenerating}
|
||||
className="w-full py-4 bg-gradient-to-r from-purple-500 to-pink-500 text-white rounded-xl font-semibold disabled:opacity-50 hover:shadow-xl hover:shadow-purple-500/30 transition-all"
|
||||
>
|
||||
{h.isGenerating ? 'Generiere PDF...' : `${h.selectedFormat === 'nru' ? 'NRU-Arbeitsblatt' : 'Arbeitsblatt'} generieren`}
|
||||
</button>
|
||||
</div>
|
||||
)
|
||||
}
|
||||
56
studio-v2/app/vocab-worksheet/constants.ts
Normal file
56
studio-v2/app/vocab-worksheet/constants.ts
Normal file
@@ -0,0 +1,56 @@
|
||||
import type { OcrPrompts, WorksheetFormat, WorksheetType } from './types'
|
||||
|
||||
// API Base URL - dynamisch basierend auf Browser-Host
|
||||
// Verwendet /klausur-api/ Proxy um Zertifikat-Probleme zu vermeiden
|
||||
export const getApiBase = () => {
|
||||
if (typeof window === 'undefined') return 'http://localhost:8086'
|
||||
const { hostname, protocol } = window.location
|
||||
if (hostname === 'localhost') return 'http://localhost:8086'
|
||||
return `${protocol}//${hostname}/klausur-api`
|
||||
}
|
||||
|
||||
// LocalStorage Keys
|
||||
export const DOCUMENTS_KEY = 'bp_documents'
|
||||
export const OCR_PROMPTS_KEY = 'bp_ocr_prompts'
|
||||
export const SESSION_ID_KEY = 'bp_upload_session'
|
||||
|
||||
// Worksheet format templates
|
||||
export const worksheetFormats: { id: WorksheetFormat; label: string; description: string; icon: string }[] = [
|
||||
{
|
||||
id: 'standard',
|
||||
label: 'Standard-Format',
|
||||
description: 'Klassisches Arbeitsblatt mit waehlbarer Uebersetzungsrichtung',
|
||||
icon: 'document'
|
||||
},
|
||||
{
|
||||
id: 'nru',
|
||||
label: 'NRU-Vorlage',
|
||||
description: '3-Spalten-Tabelle (EN|DE|Korrektur) + Lernsaetze mit Uebersetzungszeilen',
|
||||
icon: 'template'
|
||||
},
|
||||
]
|
||||
|
||||
// Default OCR filtering prompts
|
||||
export const defaultOcrPrompts: OcrPrompts = {
|
||||
filterHeaders: true,
|
||||
filterFooters: true,
|
||||
filterPageNumbers: true,
|
||||
customFilter: '',
|
||||
headerPatterns: ['Unit', 'Chapter', 'Lesson', 'Kapitel', 'Lektion'],
|
||||
footerPatterns: ['zweihundert', 'dreihundert', 'vierhundert', 'Page', 'Seite']
|
||||
}
|
||||
|
||||
export const worksheetTypes: { id: WorksheetType; label: string; description: string }[] = [
|
||||
{ id: 'en_to_de', label: 'Englisch → Deutsch', description: 'Englische Woerter uebersetzen' },
|
||||
{ id: 'de_to_en', label: 'Deutsch → Englisch', description: 'Deutsche Woerter uebersetzen' },
|
||||
{ id: 'copy', label: 'Abschreibuebung', description: 'Woerter mehrfach schreiben' },
|
||||
{ id: 'gap_fill', label: 'Lueckensaetze', description: 'Saetze mit Luecken ausfuellen' },
|
||||
]
|
||||
|
||||
export const formatFileSize = (bytes: number): string => {
|
||||
if (bytes === 0) return '0 B'
|
||||
const k = 1024
|
||||
const sizes = ['B', 'KB', 'MB', 'GB']
|
||||
const i = Math.floor(Math.log(bytes) / Math.log(k))
|
||||
return parseFloat((bytes / Math.pow(k, i)).toFixed(1)) + ' ' + sizes[i]
|
||||
}
|
||||
File diff suppressed because it is too large
Load Diff
189
studio-v2/app/vocab-worksheet/types.ts
Normal file
189
studio-v2/app/vocab-worksheet/types.ts
Normal file
@@ -0,0 +1,189 @@
|
||||
import { UploadedFile } from '@/components/QRCodeUpload'
|
||||
|
||||
export interface VocabularyEntry {
|
||||
id: string
|
||||
english: string
|
||||
german: string
|
||||
example_sentence?: string
|
||||
example_sentence_gap?: string
|
||||
word_type?: string
|
||||
source_page?: number
|
||||
selected?: boolean
|
||||
extras?: Record<string, string>
|
||||
}
|
||||
|
||||
export interface ExtraColumn {
|
||||
key: string
|
||||
label: string
|
||||
}
|
||||
|
||||
export interface Session {
|
||||
id: string
|
||||
name: string
|
||||
status: string
|
||||
vocabulary_count: number
|
||||
image_path?: string
|
||||
description?: string
|
||||
source_language?: string
|
||||
target_language?: string
|
||||
created_at?: string
|
||||
}
|
||||
|
||||
export interface StoredDocument {
|
||||
id: string
|
||||
name: string
|
||||
type: string
|
||||
size: number
|
||||
uploadedAt: Date
|
||||
url?: string
|
||||
}
|
||||
|
||||
export interface OcrPrompts {
|
||||
filterHeaders: boolean
|
||||
filterFooters: boolean
|
||||
filterPageNumbers: boolean
|
||||
customFilter: string
|
||||
headerPatterns: string[]
|
||||
footerPatterns: string[]
|
||||
}
|
||||
|
||||
export type TabId = 'upload' | 'pages' | 'vocabulary' | 'worksheet' | 'export' | 'settings'
|
||||
export type WorksheetType = 'en_to_de' | 'de_to_en' | 'copy' | 'gap_fill'
|
||||
export type WorksheetFormat = 'standard' | 'nru'
|
||||
export type IpaMode = 'auto' | 'en' | 'de' | 'all' | 'none'
|
||||
export type SyllableMode = 'auto' | 'en' | 'de' | 'all' | 'none'
|
||||
|
||||
/** Return type of useVocabWorksheet — used as props by all child components */
|
||||
export interface VocabWorksheetHook {
|
||||
// Mounted (SSR guard)
|
||||
mounted: boolean
|
||||
|
||||
// Theme
|
||||
isDark: boolean
|
||||
glassCard: string
|
||||
glassInput: string
|
||||
|
||||
// Tab
|
||||
activeTab: TabId
|
||||
setActiveTab: (tab: TabId) => void
|
||||
|
||||
// Session
|
||||
session: Session | null
|
||||
sessionName: string
|
||||
setSessionName: (name: string) => void
|
||||
isCreatingSession: boolean
|
||||
error: string | null
|
||||
setError: (err: string | null) => void
|
||||
extractionStatus: string
|
||||
|
||||
// Existing sessions
|
||||
existingSessions: Session[]
|
||||
isLoadingSessions: boolean
|
||||
|
||||
// Documents
|
||||
storedDocuments: StoredDocument[]
|
||||
selectedDocumentId: string | null
|
||||
setSelectedDocumentId: (id: string | null) => void
|
||||
|
||||
// Direct file
|
||||
directFile: File | null
|
||||
setDirectFile: (f: File | null) => void
|
||||
directFilePreview: string | null
|
||||
showFullPreview: boolean
|
||||
setShowFullPreview: (show: boolean) => void
|
||||
directFileInputRef: React.RefObject<HTMLInputElement | null>
|
||||
|
||||
// PDF pages
|
||||
pdfPageCount: number
|
||||
selectedPages: number[]
|
||||
pagesThumbnails: string[]
|
||||
isLoadingThumbnails: boolean
|
||||
excludedPages: number[]
|
||||
|
||||
// Extra columns
|
||||
pageExtraColumns: Record<number, ExtraColumn[]>
|
||||
|
||||
// Upload
|
||||
uploadedImage: string | null
|
||||
isExtracting: boolean
|
||||
|
||||
// Vocabulary
|
||||
vocabulary: VocabularyEntry[]
|
||||
|
||||
// Worksheet
|
||||
selectedTypes: WorksheetType[]
|
||||
worksheetTitle: string
|
||||
setWorksheetTitle: (title: string) => void
|
||||
includeSolutions: boolean
|
||||
setIncludeSolutions: (inc: boolean) => void
|
||||
lineHeight: string
|
||||
setLineHeight: (lh: string) => void
|
||||
selectedFormat: WorksheetFormat
|
||||
setSelectedFormat: (f: WorksheetFormat) => void
|
||||
ipaMode: IpaMode
|
||||
setIpaMode: (m: IpaMode) => void
|
||||
syllableMode: SyllableMode
|
||||
setSyllableMode: (m: SyllableMode) => void
|
||||
|
||||
// Export
|
||||
worksheetId: string | null
|
||||
isGenerating: boolean
|
||||
|
||||
// Processing
|
||||
processingErrors: string[]
|
||||
successfulPages: number[]
|
||||
failedPages: number[]
|
||||
currentlyProcessingPage: number | null
|
||||
|
||||
// OCR settings
|
||||
ocrPrompts: OcrPrompts
|
||||
showSettings: boolean
|
||||
setShowSettings: (show: boolean) => void
|
||||
|
||||
// QR
|
||||
showQRModal: boolean
|
||||
setShowQRModal: (show: boolean) => void
|
||||
uploadSessionId: string
|
||||
mobileUploadedFiles: UploadedFile[]
|
||||
selectedMobileFile: UploadedFile | null
|
||||
setSelectedMobileFile: (f: UploadedFile | null) => void
|
||||
setMobileUploadedFiles: (files: UploadedFile[]) => void
|
||||
|
||||
// OCR Comparison
|
||||
showOcrComparison: boolean
|
||||
setShowOcrComparison: (show: boolean) => void
|
||||
ocrComparePageIndex: number | null
|
||||
ocrCompareResult: any
|
||||
isComparingOcr: boolean
|
||||
ocrCompareError: string | null
|
||||
|
||||
// Handlers
|
||||
handleDirectFileSelect: (e: React.ChangeEvent<HTMLInputElement>) => void
|
||||
startSession: () => Promise<void>
|
||||
processSelectedPages: () => Promise<void>
|
||||
togglePageSelection: (idx: number) => void
|
||||
selectAllPages: () => void
|
||||
selectNoPages: () => void
|
||||
excludePage: (idx: number, e: React.MouseEvent) => void
|
||||
restoreExcludedPages: () => void
|
||||
runOcrComparison: (pageIdx: number) => Promise<void>
|
||||
updateVocabularyEntry: (id: string, field: string, value: string) => void
|
||||
addExtraColumn: (page: number) => void
|
||||
removeExtraColumn: (page: number, key: string) => void
|
||||
getExtraColumnsForPage: (page: number) => ExtraColumn[]
|
||||
getAllExtraColumns: () => ExtraColumn[]
|
||||
deleteVocabularyEntry: (id: string) => void
|
||||
toggleVocabularySelection: (id: string) => void
|
||||
toggleAllSelection: () => void
|
||||
addVocabularyEntry: (atIndex?: number) => void
|
||||
saveVocabulary: () => Promise<void>
|
||||
generateWorksheet: () => Promise<void>
|
||||
downloadPDF: (type: 'worksheet' | 'solution') => void
|
||||
toggleWorksheetType: (type: WorksheetType) => void
|
||||
resumeSession: (session: Session) => Promise<void>
|
||||
resetSession: () => Promise<void>
|
||||
deleteSession: (id: string, e: React.MouseEvent) => Promise<void>
|
||||
saveOcrPrompts: (prompts: OcrPrompts) => void
|
||||
formatFileSize: (bytes: number) => string
|
||||
reprocessPages: (ipa: IpaMode, syllable: SyllableMode) => void
|
||||
}
|
||||
843
studio-v2/app/vocab-worksheet/useVocabWorksheet.ts
Normal file
843
studio-v2/app/vocab-worksheet/useVocabWorksheet.ts
Normal file
@@ -0,0 +1,843 @@
|
||||
'use client'
|
||||
|
||||
import { useState, useRef, useEffect } from 'react'
|
||||
import { useTheme } from '@/lib/ThemeContext'
|
||||
import { useLanguage } from '@/lib/LanguageContext'
|
||||
import { useRouter } from 'next/navigation'
|
||||
import { useActivity } from '@/lib/ActivityContext'
|
||||
import type { UploadedFile } from '@/components/QRCodeUpload'
|
||||
|
||||
import type {
|
||||
VocabularyEntry, ExtraColumn, Session, StoredDocument, OcrPrompts,
|
||||
TabId, WorksheetType, WorksheetFormat, IpaMode, SyllableMode,
|
||||
VocabWorksheetHook,
|
||||
} from './types'
|
||||
import {
|
||||
getApiBase, DOCUMENTS_KEY, OCR_PROMPTS_KEY, SESSION_ID_KEY,
|
||||
defaultOcrPrompts, formatFileSize,
|
||||
} from './constants'
|
||||
|
||||
export function useVocabWorksheet(): VocabWorksheetHook {
|
||||
const { isDark } = useTheme()
|
||||
const { t } = useLanguage()
|
||||
const router = useRouter()
|
||||
const { startActivity, completeActivity } = useActivity()
|
||||
const [mounted, setMounted] = useState(false)
|
||||
|
||||
// Tab state
|
||||
const [activeTab, setActiveTab] = useState<TabId>('upload')
|
||||
|
||||
// Session state
|
||||
const [session, setSession] = useState<Session | null>(null)
|
||||
const [sessionName, setSessionName] = useState('')
|
||||
const [isCreatingSession, setIsCreatingSession] = useState(false)
|
||||
const [error, setError] = useState<string | null>(null)
|
||||
const [extractionStatus, setExtractionStatus] = useState<string>('')
|
||||
|
||||
// Existing sessions list
|
||||
const [existingSessions, setExistingSessions] = useState<Session[]>([])
|
||||
const [isLoadingSessions, setIsLoadingSessions] = useState(true)
|
||||
|
||||
// Documents from storage
|
||||
const [storedDocuments, setStoredDocuments] = useState<StoredDocument[]>([])
|
||||
const [selectedDocumentId, setSelectedDocumentId] = useState<string | null>(null)
|
||||
|
||||
// Direct file upload
|
||||
const [directFile, setDirectFile] = useState<File | null>(null)
|
||||
const [directFilePreview, setDirectFilePreview] = useState<string | null>(null)
|
||||
const [showFullPreview, setShowFullPreview] = useState(false)
|
||||
const directFileInputRef = useRef<HTMLInputElement>(null)
|
||||
|
||||
// PDF page selection state
|
||||
const [pdfPageCount, setPdfPageCount] = useState<number>(0)
|
||||
const [selectedPages, setSelectedPages] = useState<number[]>([])
|
||||
const [pagesThumbnails, setPagesThumbnails] = useState<string[]>([])
|
||||
const [isLoadingThumbnails, setIsLoadingThumbnails] = useState(false)
|
||||
const [excludedPages, setExcludedPages] = useState<number[]>([])
|
||||
|
||||
// Dynamic extra columns per source page
|
||||
const [pageExtraColumns, setPageExtraColumns] = useState<Record<number, ExtraColumn[]>>({})
|
||||
|
||||
// Upload state
|
||||
const [uploadedImage, setUploadedImage] = useState<string | null>(null)
|
||||
const [isExtracting, setIsExtracting] = useState(false)
|
||||
const fileInputRef = useRef<HTMLInputElement>(null)
|
||||
|
||||
// Vocabulary state
|
||||
const [vocabulary, setVocabulary] = useState<VocabularyEntry[]>([])
|
||||
|
||||
// Worksheet state
|
||||
const [selectedTypes, setSelectedTypes] = useState<WorksheetType[]>(['en_to_de'])
|
||||
const [worksheetTitle, setWorksheetTitle] = useState('')
|
||||
const [includeSolutions, setIncludeSolutions] = useState(true)
|
||||
const [lineHeight, setLineHeight] = useState('normal')
|
||||
const [selectedFormat, setSelectedFormat] = useState<WorksheetFormat>('standard')
|
||||
const [ipaMode, setIpaMode] = useState<IpaMode>('none')
|
||||
const [syllableMode, setSyllableMode] = useState<SyllableMode>('none')
|
||||
|
||||
// Export state
|
||||
const [worksheetId, setWorksheetId] = useState<string | null>(null)
|
||||
const [isGenerating, setIsGenerating] = useState(false)
|
||||
|
||||
// Processing results
|
||||
const [processingErrors, setProcessingErrors] = useState<string[]>([])
|
||||
const [successfulPages, setSuccessfulPages] = useState<number[]>([])
|
||||
const [failedPages, setFailedPages] = useState<number[]>([])
|
||||
const [currentlyProcessingPage, setCurrentlyProcessingPage] = useState<number | null>(null)
|
||||
const [processingQueue, setProcessingQueue] = useState<number[]>([])
|
||||
|
||||
// OCR Prompts/Settings
|
||||
const [ocrPrompts, setOcrPrompts] = useState<OcrPrompts>(defaultOcrPrompts)
|
||||
const [showSettings, setShowSettings] = useState(false)
|
||||
|
||||
// QR Code Upload
|
||||
const [showQRModal, setShowQRModal] = useState(false)
|
||||
const [uploadSessionId, setUploadSessionId] = useState('')
|
||||
const [mobileUploadedFiles, setMobileUploadedFiles] = useState<UploadedFile[]>([])
|
||||
const [selectedMobileFile, setSelectedMobileFile] = useState<UploadedFile | null>(null)
|
||||
|
||||
// OCR Comparison
|
||||
const [showOcrComparison, setShowOcrComparison] = useState(false)
|
||||
const [ocrComparePageIndex, setOcrComparePageIndex] = useState<number | null>(null)
|
||||
const [ocrCompareResult, setOcrCompareResult] = useState<any>(null)
|
||||
const [isComparingOcr, setIsComparingOcr] = useState(false)
|
||||
const [ocrCompareError, setOcrCompareError] = useState<string | null>(null)
|
||||
|
||||
// --- Effects ---
|
||||
|
||||
// SSR Safety
|
||||
useEffect(() => {
|
||||
setMounted(true)
|
||||
let storedSessionId = localStorage.getItem(SESSION_ID_KEY)
|
||||
if (!storedSessionId) {
|
||||
storedSessionId = `vocab-${Date.now()}-${Math.random().toString(36).substr(2, 9)}`
|
||||
localStorage.setItem(SESSION_ID_KEY, storedSessionId)
|
||||
}
|
||||
setUploadSessionId(storedSessionId)
|
||||
}, [])
|
||||
|
||||
// Load OCR prompts from localStorage
|
||||
useEffect(() => {
|
||||
if (!mounted) return
|
||||
const stored = localStorage.getItem(OCR_PROMPTS_KEY)
|
||||
if (stored) {
|
||||
try {
|
||||
setOcrPrompts({ ...defaultOcrPrompts, ...JSON.parse(stored) })
|
||||
} catch (e) {
|
||||
console.error('Failed to parse OCR prompts:', e)
|
||||
}
|
||||
}
|
||||
}, [mounted])
|
||||
|
||||
// Load documents from localStorage
|
||||
useEffect(() => {
|
||||
if (!mounted) return
|
||||
const stored = localStorage.getItem(DOCUMENTS_KEY)
|
||||
if (stored) {
|
||||
try {
|
||||
const docs = JSON.parse(stored)
|
||||
const imagesDocs = docs.filter((d: StoredDocument) =>
|
||||
d.type?.startsWith('image/') || d.type === 'application/pdf'
|
||||
)
|
||||
setStoredDocuments(imagesDocs)
|
||||
} catch (e) {
|
||||
console.error('Failed to parse stored documents:', e)
|
||||
}
|
||||
}
|
||||
}, [mounted])
|
||||
|
||||
// Load existing sessions from API
|
||||
useEffect(() => {
|
||||
if (!mounted) return
|
||||
const loadSessions = async () => {
|
||||
const API_BASE = getApiBase()
|
||||
try {
|
||||
const res = await fetch(`${API_BASE}/api/v1/vocab/sessions`)
|
||||
if (res.ok) {
|
||||
const sessions = await res.json()
|
||||
setExistingSessions(sessions)
|
||||
}
|
||||
} catch (e) {
|
||||
console.error('Failed to load sessions:', e)
|
||||
} finally {
|
||||
setIsLoadingSessions(false)
|
||||
}
|
||||
}
|
||||
loadSessions()
|
||||
}, [mounted])
|
||||
|
||||
// --- Glassmorphism styles ---
|
||||
|
||||
const glassCard = isDark
|
||||
? 'backdrop-blur-xl bg-white/10 border border-white/20'
|
||||
: 'backdrop-blur-xl bg-white/70 border border-black/10'
|
||||
|
||||
const glassInput = isDark
|
||||
? 'bg-white/10 border-white/20 text-white placeholder-white/40 focus:border-purple-400'
|
||||
: 'bg-white/50 border-black/10 text-slate-900 placeholder-slate-400 focus:border-purple-500'
|
||||
|
||||
// --- Handlers ---
|
||||
|
||||
const saveOcrPrompts = (prompts: OcrPrompts) => {
|
||||
setOcrPrompts(prompts)
|
||||
localStorage.setItem(OCR_PROMPTS_KEY, JSON.stringify(prompts))
|
||||
}
|
||||
|
||||
const handleDirectFileSelect = (e: React.ChangeEvent<HTMLInputElement>) => {
|
||||
const file = e.target.files?.[0]
|
||||
if (!file) return
|
||||
|
||||
setDirectFile(file)
|
||||
setSelectedDocumentId(null)
|
||||
setSelectedMobileFile(null)
|
||||
|
||||
if (file.type.startsWith('image/')) {
|
||||
const reader = new FileReader()
|
||||
reader.onload = (ev) => {
|
||||
setDirectFilePreview(ev.target?.result as string)
|
||||
}
|
||||
reader.readAsDataURL(file)
|
||||
} else if (file.type === 'application/pdf') {
|
||||
setDirectFilePreview(URL.createObjectURL(file))
|
||||
} else {
|
||||
setDirectFilePreview(null)
|
||||
}
|
||||
}
|
||||
|
||||
const startSession = async () => {
|
||||
if (!sessionName.trim()) {
|
||||
setError('Bitte geben Sie einen Namen fuer die Session ein.')
|
||||
return
|
||||
}
|
||||
if (!selectedDocumentId && !directFile && !selectedMobileFile) {
|
||||
setError('Bitte waehlen Sie ein Dokument aus oder laden Sie eine Datei hoch.')
|
||||
return
|
||||
}
|
||||
|
||||
setError(null)
|
||||
setIsCreatingSession(true)
|
||||
setExtractionStatus('Session wird erstellt...')
|
||||
|
||||
const API_BASE = getApiBase()
|
||||
|
||||
try {
|
||||
const sessionRes = await fetch(`${API_BASE}/api/v1/vocab/sessions`, {
|
||||
method: 'POST',
|
||||
headers: { 'Content-Type': 'application/json' },
|
||||
body: JSON.stringify({
|
||||
name: sessionName,
|
||||
ocr_prompts: ocrPrompts
|
||||
}),
|
||||
})
|
||||
|
||||
if (!sessionRes.ok) {
|
||||
throw new Error('Session konnte nicht erstellt werden')
|
||||
}
|
||||
|
||||
const sessionData = await sessionRes.json()
|
||||
setSession(sessionData)
|
||||
setWorksheetTitle(sessionName)
|
||||
|
||||
startActivity('vocab_extraction', { description: sessionName })
|
||||
|
||||
let file: File
|
||||
let isPdf = false
|
||||
|
||||
if (directFile) {
|
||||
file = directFile
|
||||
isPdf = directFile.type === 'application/pdf'
|
||||
} else if (selectedMobileFile) {
|
||||
isPdf = selectedMobileFile.type === 'application/pdf'
|
||||
const base64Data = selectedMobileFile.dataUrl.split(',')[1]
|
||||
const byteCharacters = atob(base64Data)
|
||||
const byteNumbers = new Array(byteCharacters.length)
|
||||
for (let i = 0; i < byteCharacters.length; i++) {
|
||||
byteNumbers[i] = byteCharacters.charCodeAt(i)
|
||||
}
|
||||
const byteArray = new Uint8Array(byteNumbers)
|
||||
const blob = new Blob([byteArray], { type: selectedMobileFile.type })
|
||||
file = new File([blob], selectedMobileFile.name, { type: selectedMobileFile.type })
|
||||
} else {
|
||||
const selectedDoc = storedDocuments.find(d => d.id === selectedDocumentId)
|
||||
if (!selectedDoc || !selectedDoc.url) {
|
||||
throw new Error('Das ausgewaehlte Dokument ist nicht verfuegbar.')
|
||||
}
|
||||
|
||||
isPdf = selectedDoc.type === 'application/pdf'
|
||||
|
||||
const base64Data = selectedDoc.url.split(',')[1]
|
||||
const byteCharacters = atob(base64Data)
|
||||
const byteNumbers = new Array(byteCharacters.length)
|
||||
for (let i = 0; i < byteCharacters.length; i++) {
|
||||
byteNumbers[i] = byteCharacters.charCodeAt(i)
|
||||
}
|
||||
const byteArray = new Uint8Array(byteNumbers)
|
||||
const blob = new Blob([byteArray], { type: selectedDoc.type })
|
||||
file = new File([blob], selectedDoc.name, { type: selectedDoc.type })
|
||||
}
|
||||
|
||||
if (isPdf) {
|
||||
setExtractionStatus('PDF wird hochgeladen...')
|
||||
|
||||
const formData = new FormData()
|
||||
formData.append('file', file)
|
||||
|
||||
const pdfInfoRes = await fetch(`${API_BASE}/api/v1/vocab/sessions/${sessionData.id}/upload-pdf-info`, {
|
||||
method: 'POST',
|
||||
body: formData,
|
||||
})
|
||||
|
||||
if (!pdfInfoRes.ok) {
|
||||
throw new Error('PDF konnte nicht verarbeitet werden')
|
||||
}
|
||||
|
||||
const pdfInfo = await pdfInfoRes.json()
|
||||
setPdfPageCount(pdfInfo.page_count)
|
||||
setSelectedPages(Array.from({ length: pdfInfo.page_count }, (_, i) => i))
|
||||
|
||||
setActiveTab('pages')
|
||||
setExtractionStatus(`${pdfInfo.page_count} Seiten erkannt. Vorschau wird geladen...`)
|
||||
setIsLoadingThumbnails(true)
|
||||
|
||||
const thumbnails: string[] = []
|
||||
for (let i = 0; i < pdfInfo.page_count; i++) {
|
||||
try {
|
||||
const thumbRes = await fetch(`${API_BASE}/api/v1/vocab/sessions/${sessionData.id}/pdf-thumbnail/${i}?hires=true`)
|
||||
if (thumbRes.ok) {
|
||||
const blob = await thumbRes.blob()
|
||||
thumbnails.push(URL.createObjectURL(blob))
|
||||
}
|
||||
} catch (e) {
|
||||
console.error(`Failed to load thumbnail for page ${i}`)
|
||||
}
|
||||
}
|
||||
|
||||
setPagesThumbnails(thumbnails)
|
||||
setIsLoadingThumbnails(false)
|
||||
setExtractionStatus(`${pdfInfo.page_count} Seiten bereit. Waehlen Sie die zu verarbeitenden Seiten.`)
|
||||
|
||||
} else {
|
||||
setExtractionStatus('KI analysiert das Bild... (kann 30-60 Sekunden dauern)')
|
||||
|
||||
const formData = new FormData()
|
||||
formData.append('file', file)
|
||||
|
||||
const uploadRes = await fetch(`${API_BASE}/api/v1/vocab/sessions/${sessionData.id}/upload`, {
|
||||
method: 'POST',
|
||||
body: formData,
|
||||
})
|
||||
|
||||
if (!uploadRes.ok) {
|
||||
throw new Error('Bild konnte nicht verarbeitet werden')
|
||||
}
|
||||
|
||||
const uploadData = await uploadRes.json()
|
||||
setSession(prev => prev ? { ...prev, status: 'extracted', vocabulary_count: uploadData.vocabulary_count } : null)
|
||||
|
||||
const vocabRes = await fetch(`${API_BASE}/api/v1/vocab/sessions/${sessionData.id}/vocabulary`)
|
||||
if (vocabRes.ok) {
|
||||
const vocabData = await vocabRes.json()
|
||||
setVocabulary(vocabData.vocabulary || [])
|
||||
setExtractionStatus(`${vocabData.vocabulary?.length || 0} Vokabeln gefunden!`)
|
||||
}
|
||||
|
||||
await new Promise(r => setTimeout(r, 1000))
|
||||
setActiveTab('vocabulary')
|
||||
}
|
||||
|
||||
} catch (error) {
|
||||
console.error('Session start failed:', error)
|
||||
setError(error instanceof Error ? error.message : 'Ein Fehler ist aufgetreten')
|
||||
setExtractionStatus('')
|
||||
setSession(null)
|
||||
} finally {
|
||||
setIsCreatingSession(false)
|
||||
}
|
||||
}
|
||||
|
||||
const processSinglePage = async (pageIndex: number, ipa: IpaMode, syllable: SyllableMode): Promise<{ success: boolean; vocabulary: VocabularyEntry[]; error?: string }> => {
|
||||
const API_BASE = getApiBase()
|
||||
|
||||
try {
|
||||
const res = await fetch(`${API_BASE}/api/v1/vocab/sessions/${session!.id}/process-single-page/${pageIndex}?ipa_mode=${ipa}&syllable_mode=${syllable}`, {
|
||||
method: 'POST',
|
||||
headers: { 'Content-Type': 'application/json' },
|
||||
body: JSON.stringify({ ocr_prompts: ocrPrompts }),
|
||||
})
|
||||
|
||||
if (!res.ok) {
|
||||
const errBody = await res.json().catch(() => ({}))
|
||||
const detail = errBody.detail || `HTTP ${res.status}`
|
||||
return { success: false, vocabulary: [], error: `Seite ${pageIndex + 1}: ${detail}` }
|
||||
}
|
||||
|
||||
const data = await res.json()
|
||||
|
||||
if (!data.success) {
|
||||
return { success: false, vocabulary: [], error: data.error || `Seite ${pageIndex + 1}: Unbekannter Fehler` }
|
||||
}
|
||||
|
||||
return { success: true, vocabulary: data.vocabulary || [] }
|
||||
} catch (e) {
|
||||
return { success: false, vocabulary: [], error: `Seite ${pageIndex + 1}: ${e instanceof Error ? e.message : 'Netzwerkfehler'}` }
|
||||
}
|
||||
}
|
||||
|
||||
const processSelectedPages = async () => {
|
||||
if (!session || selectedPages.length === 0) return
|
||||
|
||||
const pagesToProcess = [...selectedPages].sort((a, b) => a - b)
|
||||
|
||||
setIsExtracting(true)
|
||||
setProcessingErrors([])
|
||||
setSuccessfulPages([])
|
||||
setFailedPages([])
|
||||
setProcessingQueue(pagesToProcess)
|
||||
setVocabulary([])
|
||||
|
||||
setActiveTab('vocabulary')
|
||||
|
||||
const API_BASE = getApiBase()
|
||||
const errors: string[] = []
|
||||
const successful: number[] = []
|
||||
const failed: number[] = []
|
||||
|
||||
for (let i = 0; i < pagesToProcess.length; i++) {
|
||||
const pageIndex = pagesToProcess[i]
|
||||
setCurrentlyProcessingPage(pageIndex + 1)
|
||||
setExtractionStatus(`Verarbeite Seite ${pageIndex + 1} von ${pagesToProcess.length}... (kann 30-60 Sekunden dauern)`)
|
||||
|
||||
const result = await processSinglePage(pageIndex, ipaMode, syllableMode)
|
||||
|
||||
if (result.success) {
|
||||
successful.push(pageIndex + 1)
|
||||
setSuccessfulPages([...successful])
|
||||
setVocabulary(prev => [...prev, ...result.vocabulary])
|
||||
setExtractionStatus(`Seite ${pageIndex + 1} fertig: ${result.vocabulary.length} Vokabeln gefunden`)
|
||||
} else {
|
||||
failed.push(pageIndex + 1)
|
||||
setFailedPages([...failed])
|
||||
if (result.error) {
|
||||
errors.push(result.error)
|
||||
setProcessingErrors([...errors])
|
||||
}
|
||||
setExtractionStatus(`Seite ${pageIndex + 1} fehlgeschlagen`)
|
||||
}
|
||||
|
||||
await new Promise(r => setTimeout(r, 500))
|
||||
}
|
||||
|
||||
setCurrentlyProcessingPage(null)
|
||||
setProcessingQueue([])
|
||||
setIsExtracting(false)
|
||||
|
||||
if (successful.length === pagesToProcess.length) {
|
||||
setExtractionStatus(`Fertig! Alle ${successful.length} Seiten verarbeitet.`)
|
||||
} else if (successful.length > 0) {
|
||||
setExtractionStatus(`${successful.length} von ${pagesToProcess.length} Seiten verarbeitet. ${failed.length} fehlgeschlagen.`)
|
||||
} else {
|
||||
setExtractionStatus(`Alle Seiten fehlgeschlagen.`)
|
||||
}
|
||||
|
||||
// Reload thumbnails for processed pages (server may have rotated them)
|
||||
if (successful.length > 0 && session) {
|
||||
const updatedThumbs = [...pagesThumbnails]
|
||||
for (const pageNum of successful) {
|
||||
const idx = pageNum - 1
|
||||
try {
|
||||
const thumbRes = await fetch(`${API_BASE}/api/v1/vocab/sessions/${session.id}/pdf-thumbnail/${idx}?hires=true&t=${Date.now()}`)
|
||||
if (thumbRes.ok) {
|
||||
const blob = await thumbRes.blob()
|
||||
if (updatedThumbs[idx]) URL.revokeObjectURL(updatedThumbs[idx])
|
||||
updatedThumbs[idx] = URL.createObjectURL(blob)
|
||||
}
|
||||
} catch (e) {
|
||||
console.error(`Failed to refresh thumbnail for page ${pageNum}`)
|
||||
}
|
||||
}
|
||||
setPagesThumbnails(updatedThumbs)
|
||||
}
|
||||
|
||||
setSession(prev => prev ? { ...prev, status: 'extracted' } : null)
|
||||
}
|
||||
|
||||
const togglePageSelection = (pageIndex: number) => {
|
||||
setSelectedPages(prev =>
|
||||
prev.includes(pageIndex)
|
||||
? prev.filter(p => p !== pageIndex)
|
||||
: [...prev, pageIndex].sort((a, b) => a - b)
|
||||
)
|
||||
}
|
||||
|
||||
const selectAllPages = () => setSelectedPages(
|
||||
Array.from({ length: pdfPageCount }, (_, i) => i).filter(p => !excludedPages.includes(p))
|
||||
)
|
||||
const selectNoPages = () => setSelectedPages([])
|
||||
|
||||
const excludePage = (pageIndex: number, e: React.MouseEvent) => {
|
||||
e.stopPropagation()
|
||||
setExcludedPages(prev => [...prev, pageIndex])
|
||||
setSelectedPages(prev => prev.filter(p => p !== pageIndex))
|
||||
}
|
||||
|
||||
const restoreExcludedPages = () => {
|
||||
setExcludedPages([])
|
||||
}
|
||||
|
||||
const runOcrComparison = async (pageIndex: number) => {
|
||||
if (!session) return
|
||||
|
||||
setOcrComparePageIndex(pageIndex)
|
||||
setShowOcrComparison(true)
|
||||
setIsComparingOcr(true)
|
||||
setOcrCompareError(null)
|
||||
setOcrCompareResult(null)
|
||||
|
||||
const API_BASE = getApiBase()
|
||||
|
||||
try {
|
||||
const res = await fetch(`${API_BASE}/api/v1/vocab/sessions/${session.id}/compare-ocr/${pageIndex}`, {
|
||||
method: 'POST',
|
||||
})
|
||||
|
||||
if (!res.ok) {
|
||||
throw new Error(`HTTP ${res.status}`)
|
||||
}
|
||||
|
||||
const data = await res.json()
|
||||
setOcrCompareResult(data)
|
||||
} catch (e) {
|
||||
setOcrCompareError(e instanceof Error ? e.message : 'Vergleich fehlgeschlagen')
|
||||
} finally {
|
||||
setIsComparingOcr(false)
|
||||
}
|
||||
}
|
||||
|
||||
const updateVocabularyEntry = (id: string, field: string, value: string) => {
|
||||
setVocabulary(prev => prev.map(v => {
|
||||
if (v.id !== id) return v
|
||||
if (field === 'english' || field === 'german' || field === 'example_sentence' || field === 'word_type') {
|
||||
return { ...v, [field]: value }
|
||||
}
|
||||
return { ...v, extras: { ...(v.extras || {}), [field]: value } }
|
||||
}))
|
||||
}
|
||||
|
||||
const addExtraColumn = (sourcePage: number) => {
|
||||
const label = prompt('Spaltenname:')
|
||||
if (!label || !label.trim()) return
|
||||
const key = `extra_${Date.now()}`
|
||||
setPageExtraColumns(prev => ({
|
||||
...prev,
|
||||
[sourcePage]: [...(prev[sourcePage] || []), { key, label: label.trim() }],
|
||||
}))
|
||||
}
|
||||
|
||||
const removeExtraColumn = (sourcePage: number, key: string) => {
|
||||
setPageExtraColumns(prev => ({
|
||||
...prev,
|
||||
[sourcePage]: (prev[sourcePage] || []).filter(c => c.key !== key),
|
||||
}))
|
||||
setVocabulary(prev => prev.map(v => {
|
||||
if (!v.extras || !(key in v.extras)) return v
|
||||
const { [key]: _, ...rest } = v.extras
|
||||
return { ...v, extras: rest }
|
||||
}))
|
||||
}
|
||||
|
||||
const getExtraColumnsForPage = (sourcePage: number): ExtraColumn[] => {
|
||||
const global = pageExtraColumns[0] || []
|
||||
const pageSpecific = pageExtraColumns[sourcePage] || []
|
||||
return [...global, ...pageSpecific]
|
||||
}
|
||||
|
||||
const getAllExtraColumns = (): ExtraColumn[] => {
|
||||
const seen = new Set<string>()
|
||||
const result: ExtraColumn[] = []
|
||||
for (const cols of Object.values(pageExtraColumns)) {
|
||||
for (const col of cols) {
|
||||
if (!seen.has(col.key)) {
|
||||
seen.add(col.key)
|
||||
result.push(col)
|
||||
}
|
||||
}
|
||||
}
|
||||
return result
|
||||
}
|
||||
|
||||
const deleteVocabularyEntry = (id: string) => {
|
||||
setVocabulary(prev => prev.filter(v => v.id !== id))
|
||||
}
|
||||
|
||||
const toggleVocabularySelection = (id: string) => {
|
||||
setVocabulary(prev => prev.map(v =>
|
||||
v.id === id ? { ...v, selected: !v.selected } : v
|
||||
))
|
||||
}
|
||||
|
||||
const toggleAllSelection = () => {
|
||||
const allSelected = vocabulary.every(v => v.selected)
|
||||
setVocabulary(prev => prev.map(v => ({ ...v, selected: !allSelected })))
|
||||
}
|
||||
|
||||
const addVocabularyEntry = (atIndex?: number) => {
|
||||
const newEntry: VocabularyEntry = {
|
||||
id: `new-${Date.now()}`,
|
||||
english: '',
|
||||
german: '',
|
||||
example_sentence: '',
|
||||
selected: true
|
||||
}
|
||||
setVocabulary(prev => {
|
||||
if (atIndex === undefined) {
|
||||
return [...prev, newEntry]
|
||||
}
|
||||
const newList = [...prev]
|
||||
newList.splice(atIndex, 0, newEntry)
|
||||
return newList
|
||||
})
|
||||
}
|
||||
|
||||
const saveVocabulary = async () => {
|
||||
if (!session) return
|
||||
const API_BASE = getApiBase()
|
||||
|
||||
try {
|
||||
await fetch(`${API_BASE}/api/v1/vocab/sessions/${session.id}/vocabulary`, {
|
||||
method: 'PUT',
|
||||
headers: { 'Content-Type': 'application/json' },
|
||||
body: JSON.stringify({ vocabulary }),
|
||||
})
|
||||
} catch (error) {
|
||||
console.error('Failed to save vocabulary:', error)
|
||||
}
|
||||
}
|
||||
|
||||
const generateWorksheet = async () => {
|
||||
if (!session) return
|
||||
if (selectedFormat === 'standard' && selectedTypes.length === 0) return
|
||||
|
||||
setIsGenerating(true)
|
||||
const API_BASE = getApiBase()
|
||||
|
||||
try {
|
||||
await saveVocabulary()
|
||||
|
||||
let res: Response
|
||||
|
||||
if (selectedFormat === 'nru') {
|
||||
res = await fetch(`${API_BASE}/api/v1/vocab/sessions/${session.id}/generate-nru`, {
|
||||
method: 'POST',
|
||||
headers: { 'Content-Type': 'application/json' },
|
||||
body: JSON.stringify({
|
||||
title: worksheetTitle || session.name,
|
||||
include_solutions: includeSolutions,
|
||||
}),
|
||||
})
|
||||
} else {
|
||||
res = await fetch(`${API_BASE}/api/v1/vocab/sessions/${session.id}/generate`, {
|
||||
method: 'POST',
|
||||
headers: { 'Content-Type': 'application/json' },
|
||||
body: JSON.stringify({
|
||||
worksheet_types: selectedTypes,
|
||||
title: worksheetTitle || session.name,
|
||||
include_solutions: includeSolutions,
|
||||
line_height: lineHeight,
|
||||
}),
|
||||
})
|
||||
}
|
||||
|
||||
if (res.ok) {
|
||||
const data = await res.json()
|
||||
setWorksheetId(data.worksheet_id || data.id)
|
||||
setActiveTab('export')
|
||||
completeActivity({ vocabCount: vocabulary.length })
|
||||
}
|
||||
} catch (error) {
|
||||
console.error('Failed to generate worksheet:', error)
|
||||
} finally {
|
||||
setIsGenerating(false)
|
||||
}
|
||||
}
|
||||
|
||||
const downloadPDF = (type: 'worksheet' | 'solution') => {
|
||||
if (!worksheetId) return
|
||||
const API_BASE = getApiBase()
|
||||
const endpoint = type === 'worksheet' ? 'pdf' : 'solution'
|
||||
window.open(`${API_BASE}/api/v1/vocab/worksheets/${worksheetId}/${endpoint}`, '_blank')
|
||||
}
|
||||
|
||||
const toggleWorksheetType = (type: WorksheetType) => {
|
||||
setSelectedTypes(prev =>
|
||||
prev.includes(type) ? prev.filter(t => t !== type) : [...prev, type]
|
||||
)
|
||||
}
|
||||
|
||||
const resumeSession = async (existingSession: Session) => {
|
||||
setError(null)
|
||||
setExtractionStatus('Session wird geladen...')
|
||||
|
||||
const API_BASE = getApiBase()
|
||||
|
||||
try {
|
||||
const sessionRes = await fetch(`${API_BASE}/api/v1/vocab/sessions/${existingSession.id}`)
|
||||
if (!sessionRes.ok) throw new Error('Session nicht gefunden')
|
||||
const sessionData = await sessionRes.json()
|
||||
setSession(sessionData)
|
||||
setWorksheetTitle(sessionData.name)
|
||||
|
||||
if (sessionData.status === 'extracted' || sessionData.status === 'completed') {
|
||||
const vocabRes = await fetch(`${API_BASE}/api/v1/vocab/sessions/${existingSession.id}/vocabulary`)
|
||||
if (vocabRes.ok) {
|
||||
const vocabData = await vocabRes.json()
|
||||
setVocabulary(vocabData.vocabulary || [])
|
||||
}
|
||||
setActiveTab('vocabulary')
|
||||
setExtractionStatus('')
|
||||
} else if (sessionData.status === 'pending') {
|
||||
setActiveTab('upload')
|
||||
setExtractionStatus('Diese Session hat noch keine Vokabeln. Bitte laden Sie ein Dokument hoch.')
|
||||
} else {
|
||||
setActiveTab('vocabulary')
|
||||
setExtractionStatus('')
|
||||
}
|
||||
|
||||
} catch (error) {
|
||||
console.error('Failed to resume session:', error)
|
||||
setError(error instanceof Error ? error.message : 'Fehler beim Laden der Session')
|
||||
setExtractionStatus('')
|
||||
}
|
||||
}
|
||||
|
||||
const resetSession = async () => {
|
||||
setSession(null)
|
||||
setSessionName('')
|
||||
setVocabulary([])
|
||||
setUploadedImage(null)
|
||||
setWorksheetId(null)
|
||||
setSelectedDocumentId(null)
|
||||
setDirectFile(null)
|
||||
setDirectFilePreview(null)
|
||||
setShowFullPreview(false)
|
||||
setPdfPageCount(0)
|
||||
setSelectedPages([])
|
||||
setPagesThumbnails([])
|
||||
setExcludedPages([])
|
||||
setActiveTab('upload')
|
||||
setError(null)
|
||||
setExtractionStatus('')
|
||||
|
||||
const API_BASE = getApiBase()
|
||||
try {
|
||||
const res = await fetch(`${API_BASE}/api/v1/vocab/sessions`)
|
||||
if (res.ok) {
|
||||
const sessions = await res.json()
|
||||
setExistingSessions(sessions)
|
||||
}
|
||||
} catch (e) {
|
||||
console.error('Failed to reload sessions:', e)
|
||||
}
|
||||
}
|
||||
|
||||
const deleteSession = async (sessionId: string, e: React.MouseEvent) => {
|
||||
e.stopPropagation()
|
||||
if (!confirm('Session wirklich loeschen? Diese Aktion kann nicht rueckgaengig gemacht werden.')) {
|
||||
return
|
||||
}
|
||||
|
||||
const API_BASE = getApiBase()
|
||||
try {
|
||||
const res = await fetch(`${API_BASE}/api/v1/vocab/sessions/${sessionId}`, {
|
||||
method: 'DELETE',
|
||||
})
|
||||
if (res.ok) {
|
||||
setExistingSessions(prev => prev.filter(s => s.id !== sessionId))
|
||||
}
|
||||
} catch (e) {
|
||||
console.error('Failed to delete session:', e)
|
||||
}
|
||||
}
|
||||
|
||||
// Reprocess all successful pages with new IPA/syllable modes
|
||||
const reprocessPages = (ipa: IpaMode, syllable: SyllableMode) => {
|
||||
if (!session || successfulPages.length === 0) return
|
||||
|
||||
setIsExtracting(true)
|
||||
setExtractionStatus('Verarbeite mit neuen Einstellungen...')
|
||||
const pagesToReprocess = successfulPages.map(p => p - 1)
|
||||
const API_BASE = getApiBase()
|
||||
|
||||
;(async () => {
|
||||
const allVocab: VocabularyEntry[] = []
|
||||
for (const pageIndex of pagesToReprocess) {
|
||||
try {
|
||||
const res = await fetch(`${API_BASE}/api/v1/vocab/sessions/${session.id}/process-single-page/${pageIndex}?ipa_mode=${ipa}&syllable_mode=${syllable}`, {
|
||||
method: 'POST',
|
||||
headers: { 'Content-Type': 'application/json' },
|
||||
body: JSON.stringify({ ocr_prompts: ocrPrompts }),
|
||||
})
|
||||
if (res.ok) {
|
||||
const data = await res.json()
|
||||
if (data.vocabulary) allVocab.push(...data.vocabulary)
|
||||
}
|
||||
} catch {}
|
||||
}
|
||||
setVocabulary(allVocab)
|
||||
setIsExtracting(false)
|
||||
setExtractionStatus(`${allVocab.length} Vokabeln mit neuen Einstellungen`)
|
||||
})()
|
||||
}
|
||||
|
||||
return {
|
||||
// Mounted
|
||||
mounted,
|
||||
// Theme
|
||||
isDark, glassCard, glassInput,
|
||||
// Tab
|
||||
activeTab, setActiveTab,
|
||||
// Session
|
||||
session, sessionName, setSessionName, isCreatingSession, error, setError, extractionStatus,
|
||||
// Existing sessions
|
||||
existingSessions, isLoadingSessions,
|
||||
// Documents
|
||||
storedDocuments, selectedDocumentId, setSelectedDocumentId,
|
||||
// Direct file
|
||||
directFile, setDirectFile, directFilePreview, showFullPreview, setShowFullPreview, directFileInputRef,
|
||||
// PDF pages
|
||||
pdfPageCount, selectedPages, pagesThumbnails, isLoadingThumbnails, excludedPages,
|
||||
// Extra columns
|
||||
pageExtraColumns,
|
||||
// Upload
|
||||
uploadedImage, isExtracting,
|
||||
// Vocabulary
|
||||
vocabulary,
|
||||
// Worksheet
|
||||
selectedTypes, worksheetTitle, setWorksheetTitle,
|
||||
includeSolutions, setIncludeSolutions,
|
||||
lineHeight, setLineHeight,
|
||||
selectedFormat, setSelectedFormat,
|
||||
ipaMode, setIpaMode, syllableMode, setSyllableMode,
|
||||
// Export
|
||||
worksheetId, isGenerating,
|
||||
// Processing
|
||||
processingErrors, successfulPages, failedPages, currentlyProcessingPage,
|
||||
// OCR settings
|
||||
ocrPrompts, showSettings, setShowSettings,
|
||||
// QR
|
||||
showQRModal, setShowQRModal, uploadSessionId,
|
||||
mobileUploadedFiles, selectedMobileFile, setSelectedMobileFile, setMobileUploadedFiles,
|
||||
// OCR Comparison
|
||||
showOcrComparison, setShowOcrComparison,
|
||||
ocrComparePageIndex, ocrCompareResult, isComparingOcr, ocrCompareError,
|
||||
// Handlers
|
||||
handleDirectFileSelect, startSession, processSelectedPages,
|
||||
togglePageSelection, selectAllPages, selectNoPages, excludePage, restoreExcludedPages,
|
||||
runOcrComparison,
|
||||
updateVocabularyEntry, addExtraColumn, removeExtraColumn,
|
||||
getExtraColumnsForPage, getAllExtraColumns,
|
||||
deleteVocabularyEntry, toggleVocabularySelection, toggleAllSelection, addVocabularyEntry,
|
||||
saveVocabulary, generateWorksheet, downloadPDF, toggleWorksheetType,
|
||||
resumeSession, resetSession, deleteSession,
|
||||
saveOcrPrompts, formatFileSize, reprocessPages,
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user