Add SmartSpellChecker + refactor vocab-worksheet page.tsx
Some checks failed
CI / go-lint (push) Has been skipped
CI / python-lint (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / test-go-school (push) Successful in 45s
CI / test-go-edu-search (push) Successful in 43s
CI / test-python-klausur (push) Failing after 2m51s
CI / test-python-agent-core (push) Successful in 36s
CI / test-nodejs-website (push) Successful in 37s

SmartSpellChecker (klausur-service):
- Language-aware OCR post-correction without LLMs
- Dual-dictionary heuristic for EN/DE language detection
- Context-based a/I disambiguation via bigram lookup
- Multi-digit substitution (sch00l→school)
- Cross-language guard (don't false-correct DE words in EN column)
- Umlaut correction (Schuler→Schüler, uber→über)
- Integrated into spell_review_entries_sync() pipeline
- 31 tests, 9ms/100 corrections

Vocab-worksheet refactoring (studio-v2):
- Split 2337-line page.tsx into 14 files
- Custom hook useVocabWorksheet.ts (all state + logic)
- 9 components in components/ directory
- types.ts, constants.ts for shared definitions

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
Benjamin Admin
2026-04-12 12:25:01 +02:00
parent 04fa01661c
commit 909d0729f6
17 changed files with 3545 additions and 2228 deletions

View File

@@ -881,10 +881,25 @@ def spell_review_entries_sync(entries: List[Dict]) -> Dict:
"""Rule-based OCR correction: spell-checker + structural heuristics.
Deterministic — never translates, never touches IPA, never hallucinates.
Uses SmartSpellChecker for language-aware corrections with context-based
disambiguation (a/I), multi-digit substitution, and cross-language guard.
"""
t0 = time.time()
changes: List[Dict] = []
all_corrected: List[Dict] = []
# Use SmartSpellChecker if available, fall back to legacy _spell_fix_field
_smart = None
try:
from smart_spell import SmartSpellChecker
_smart = SmartSpellChecker()
logger.debug("spell_review: using SmartSpellChecker")
except Exception:
logger.debug("spell_review: SmartSpellChecker not available, using legacy")
# Map field names → language codes for SmartSpellChecker
_LANG_MAP = {"english": "en", "german": "de", "example": "auto"}
for i, entry in enumerate(entries):
e = dict(entry)
# Page-ref normalization (always, regardless of review status)
@@ -907,9 +922,18 @@ def spell_review_entries_sync(entries: List[Dict]) -> Dict:
old_val = (e.get(field_name) or "").strip()
if not old_val:
continue
# example field is mixed-language — try German first (for umlauts)
lang = "german" if field_name in ("german", "example") else "english"
new_val, was_changed = _spell_fix_field(old_val, field=lang)
if _smart:
# SmartSpellChecker path — language-aware, context-based
lang_code = _LANG_MAP.get(field_name, "en")
result = _smart.correct_text(old_val, lang=lang_code)
new_val = result.corrected
was_changed = result.changed
else:
# Legacy path
lang = "german" if field_name in ("german", "example") else "english"
new_val, was_changed = _spell_fix_field(old_val, field=lang)
if was_changed and new_val != old_val:
changes.append({
"row_index": e.get("row_index", i),
@@ -921,12 +945,13 @@ def spell_review_entries_sync(entries: List[Dict]) -> Dict:
e["llm_corrected"] = True
all_corrected.append(e)
duration_ms = int((time.time() - t0) * 1000)
model_name = "smart-spell-checker" if _smart else "spell-checker"
return {
"entries_original": entries,
"entries_corrected": all_corrected,
"changes": changes,
"skipped_count": 0,
"model_used": "spell-checker",
"model_used": model_name,
"duration_ms": duration_ms,
}

View File

@@ -0,0 +1,369 @@
"""
SmartSpellChecker — Language-aware OCR post-correction without LLMs.
Uses pyspellchecker (MIT) with dual EN+DE dictionaries for:
- Automatic language detection per word (dual-dictionary heuristic)
- OCR error correction (digit↔letter, umlauts, transpositions)
- Context-based disambiguation (a/I, l/I) via bigram lookup
- Mixed-language support for example sentences
Lizenz: Apache 2.0 (kommerziell nutzbar)
"""
import logging
import re
from dataclasses import dataclass, field
from typing import Dict, List, Literal, Optional, Set, Tuple
logger = logging.getLogger(__name__)
# ---------------------------------------------------------------------------
# Init
# ---------------------------------------------------------------------------
try:
from spellchecker import SpellChecker as _SpellChecker
_en_spell = _SpellChecker(language='en', distance=1)
_de_spell = _SpellChecker(language='de', distance=1)
_AVAILABLE = True
except ImportError:
_AVAILABLE = False
logger.warning("pyspellchecker not installed — SmartSpellChecker disabled")
Lang = Literal["en", "de", "both", "unknown"]
# ---------------------------------------------------------------------------
# Bigram context for a/I disambiguation
# ---------------------------------------------------------------------------
# Words that commonly follow "I" (subject pronoun → verb/modal)
_I_FOLLOWERS: frozenset = frozenset({
"am", "was", "have", "had", "do", "did", "will", "would", "can",
"could", "should", "shall", "may", "might", "must",
"think", "know", "see", "want", "need", "like", "love", "hate",
"go", "went", "come", "came", "say", "said", "get", "got",
"make", "made", "take", "took", "give", "gave", "tell", "told",
"feel", "felt", "find", "found", "believe", "hope", "wish",
"remember", "forget", "understand", "mean", "meant",
"don't", "didn't", "can't", "won't", "couldn't", "wouldn't",
"shouldn't", "haven't", "hadn't", "isn't", "wasn't",
"really", "just", "also", "always", "never", "often", "sometimes",
})
# Words that commonly follow "a" (article → noun/adjective)
_A_FOLLOWERS: frozenset = frozenset({
"lot", "few", "little", "bit", "good", "bad", "great", "new", "old",
"long", "short", "big", "small", "large", "huge", "tiny",
"nice", "beautiful", "wonderful", "terrible", "horrible",
"man", "woman", "boy", "girl", "child", "dog", "cat", "bird",
"book", "car", "house", "room", "school", "teacher", "student",
"day", "week", "month", "year", "time", "place", "way",
"friend", "family", "person", "problem", "question", "story",
"very", "really", "quite", "rather", "pretty", "single",
})
# Digit→letter substitutions (OCR confusion)
_DIGIT_SUBS: Dict[str, List[str]] = {
'0': ['o', 'O'],
'1': ['l', 'I'],
'5': ['s', 'S'],
'6': ['g', 'G'],
'8': ['b', 'B'],
'|': ['I', 'l'],
}
_SUSPICIOUS_CHARS = frozenset(_DIGIT_SUBS.keys())
# Umlaut confusion: OCR drops dots (ü→u, ä→a, ö→o)
_UMLAUT_MAP = {
'a': 'ä', 'o': 'ö', 'u': 'ü', 'i': 'ü',
'A': 'Ä', 'O': 'Ö', 'U': 'Ü', 'I': 'Ü',
}
# Tokenizer
_TOKEN_RE = re.compile(r"([A-Za-zÄÖÜäöüß'|]+)([^A-Za-zÄÖÜäöüß'|]*)")
# ---------------------------------------------------------------------------
# Data types
# ---------------------------------------------------------------------------
@dataclass
class CorrectionResult:
original: str
corrected: str
lang_detected: Lang
changed: bool
changes: List[str] = field(default_factory=list)
# ---------------------------------------------------------------------------
# Core class
# ---------------------------------------------------------------------------
class SmartSpellChecker:
"""Language-aware OCR spell checker using pyspellchecker (no LLM)."""
def __init__(self):
if not _AVAILABLE:
raise RuntimeError("pyspellchecker not installed")
self.en = _en_spell
self.de = _de_spell
# --- Language detection ---
def detect_word_lang(self, word: str) -> Lang:
"""Detect language of a single word using dual-dict heuristic."""
w = word.lower().strip(".,;:!?\"'()")
if not w:
return "unknown"
in_en = bool(self.en.known([w]))
in_de = bool(self.de.known([w]))
if in_en and in_de:
return "both"
if in_en:
return "en"
if in_de:
return "de"
return "unknown"
def detect_text_lang(self, text: str) -> Lang:
"""Detect dominant language of a text string (sentence/phrase)."""
words = re.findall(r"[A-Za-zÄÖÜäöüß]+", text)
if not words:
return "unknown"
en_count = 0
de_count = 0
for w in words:
lang = self.detect_word_lang(w)
if lang == "en":
en_count += 1
elif lang == "de":
de_count += 1
# "both" doesn't count for either
if en_count > de_count:
return "en"
if de_count > en_count:
return "de"
if en_count == de_count and en_count > 0:
return "both"
return "unknown"
# --- Single-word correction ---
def _known(self, word: str) -> bool:
"""True if word is known in EN or DE dictionary."""
w = word.lower()
return bool(self.en.known([w])) or bool(self.de.known([w]))
def _known_in(self, word: str, lang: str) -> bool:
"""True if word is known in a specific language dictionary."""
w = word.lower()
spell = self.en if lang == "en" else self.de
return bool(spell.known([w]))
def correct_word(self, word: str, lang: str = "en",
prev_word: str = "", next_word: str = "") -> Optional[str]:
"""Correct a single word for the given language.
Returns None if no correction needed, or the corrected string.
Args:
word: The word to check/correct
lang: Expected language ("en" or "de")
prev_word: Previous word (for context)
next_word: Next word (for context)
"""
if not word or not word.strip():
return None
# Skip numbers, abbreviations with dots, very short tokens
if word.isdigit() or '.' in word:
return None
has_suspicious = any(ch in _SUSPICIOUS_CHARS for ch in word)
# 1. Already known → no fix
if self._known(word):
# But check a/I disambiguation for single-char words
if word.lower() in ('l', '|') and next_word:
return self._disambiguate_a_I(word, next_word)
return None
# 2. Digit/pipe substitution
if has_suspicious:
if word == '|':
return 'I'
# Try single-char substitutions
for i, ch in enumerate(word):
if ch not in _DIGIT_SUBS:
continue
for replacement in _DIGIT_SUBS[ch]:
candidate = word[:i] + replacement + word[i + 1:]
if self._known(candidate):
return candidate
# Try multi-char substitution (e.g., "sch00l" → "school")
multi = self._try_multi_digit_sub(word)
if multi:
return multi
# 3. Umlaut correction (German)
if lang == "de" and len(word) >= 3 and word.isalpha():
umlaut_fix = self._try_umlaut_fix(word)
if umlaut_fix:
return umlaut_fix
# 4. General spell correction
if not has_suspicious and len(word) >= 3 and word.isalpha():
# Safety: don't correct if the word is valid in the OTHER language
# (either directly or via umlaut fix)
other_lang = "de" if lang == "en" else "en"
if self._known_in(word, other_lang):
return None
if other_lang == "de" and self._try_umlaut_fix(word):
return None # has a valid DE umlaut variant → don't touch
spell = self.en if lang == "en" else self.de
correction = spell.correction(word.lower())
if correction and correction != word.lower():
if word[0].isupper():
correction = correction[0].upper() + correction[1:]
if self._known(correction):
return correction
return None
# --- Multi-digit substitution ---
def _try_multi_digit_sub(self, word: str) -> Optional[str]:
"""Try replacing multiple digits simultaneously."""
positions = [(i, ch) for i, ch in enumerate(word) if ch in _DIGIT_SUBS]
if len(positions) < 1 or len(positions) > 4:
return None
# Try all combinations (max 2^4 = 16 for 4 positions)
chars = list(word)
best = None
self._multi_sub_recurse(chars, positions, 0, best_result=[None])
return self._multi_sub_recurse_result
_multi_sub_recurse_result: Optional[str] = None
def _try_multi_digit_sub(self, word: str) -> Optional[str]:
"""Try replacing multiple digits simultaneously using BFS."""
positions = [(i, ch) for i, ch in enumerate(word) if ch in _DIGIT_SUBS]
if not positions or len(positions) > 4:
return None
# BFS over substitution combinations
queue = [list(word)]
for pos, ch in positions:
next_queue = []
for current in queue:
# Keep original
next_queue.append(current[:])
# Try each substitution
for repl in _DIGIT_SUBS[ch]:
variant = current[:]
variant[pos] = repl
next_queue.append(variant)
queue = next_queue
# Check which combinations produce known words
for combo in queue:
candidate = "".join(combo)
if candidate != word and self._known(candidate):
return candidate
return None
# --- Umlaut fix ---
def _try_umlaut_fix(self, word: str) -> Optional[str]:
"""Try single-char umlaut substitutions for German words."""
for i, ch in enumerate(word):
if ch in _UMLAUT_MAP:
candidate = word[:i] + _UMLAUT_MAP[ch] + word[i + 1:]
if self._known(candidate):
return candidate
return None
# --- a/I disambiguation ---
def _disambiguate_a_I(self, token: str, next_word: str) -> Optional[str]:
"""Disambiguate 'a' vs 'I' (and OCR variants like 'l', '|')."""
nw = next_word.lower().strip(".,;:!?")
if nw in _I_FOLLOWERS:
return "I"
if nw in _A_FOLLOWERS:
return "a"
# Fallback: check if next word is more commonly a verb (→I) or noun/adj (→a)
# Simple heuristic: if next word starts with uppercase (and isn't first in sentence)
# it's likely a German noun following "I"... but in English context, uppercase
# after "I" is unusual.
return None # uncertain, don't change
# --- Full text correction ---
def correct_text(self, text: str, lang: str = "en") -> CorrectionResult:
"""Correct a full text string (field value).
Args:
text: The text to correct
lang: Expected language ("en" or "de")
"""
if not text or not text.strip():
return CorrectionResult(text, text, "unknown", False)
detected = self.detect_text_lang(text) if lang == "auto" else lang
parts: List[str] = []
changes: List[str] = []
tokens = list(_TOKEN_RE.finditer(text))
for idx, m in enumerate(tokens):
token, sep = m.group(1), m.group(2)
next_word = tokens[idx + 1].group(1) if idx + 1 < len(tokens) else ""
prev_word = tokens[idx - 1].group(1) if idx > 0 else ""
correction = self.correct_word(
token, lang=detected if detected in ("en", "de") else "en",
prev_word=prev_word, next_word=next_word,
)
if correction and correction != token:
changes.append(f"{token}{correction}")
parts.append(correction)
else:
parts.append(token)
parts.append(sep)
# Append any trailing text
last_end = tokens[-1].end() if tokens else 0
if last_end < len(text):
parts.append(text[last_end:])
corrected = "".join(parts)
return CorrectionResult(
original=text,
corrected=corrected,
lang_detected=detected,
changed=corrected != text,
changes=changes,
)
# --- Vocabulary entry correction ---
def correct_vocab_entry(self, english: str, german: str,
example: str = "") -> Dict[str, CorrectionResult]:
"""Correct a full vocabulary entry (EN + DE + example).
Uses column position to determine language — the most reliable signal.
"""
results = {}
results["english"] = self.correct_text(english, lang="en")
results["german"] = self.correct_text(german, lang="de")
if example:
# For examples, auto-detect language
results["example"] = self.correct_text(example, lang="auto")
return results

View File

@@ -0,0 +1,210 @@
"""Tests for SmartSpellChecker — language-aware OCR post-correction."""
import pytest
import sys, os
sys.path.insert(0, os.path.join(os.path.dirname(__file__), ".."))
from smart_spell import SmartSpellChecker, CorrectionResult
@pytest.fixture
def sc():
return SmartSpellChecker()
# ─── Language Detection ──────────────────────────────────────────────────────
class TestLanguageDetection:
def test_clear_english_words(self, sc):
for word in ("school", "beautiful", "homework", "yesterday", "because"):
assert sc.detect_word_lang(word) in ("en", "both"), f"{word} should be EN"
def test_clear_german_words(self, sc):
for word in ("Schule", "Hausaufgaben", "Freundschaft", "Straße", "Entschuldigung"):
assert sc.detect_word_lang(word) in ("de", "both"), f"{word} should be DE"
def test_ambiguous_words(self, sc):
"""Words that exist in both languages."""
for word in ("Hand", "Finger", "Arm", "Name", "Ball"):
assert sc.detect_word_lang(word) == "both", f"{word} should be 'both'"
def test_unknown_words(self, sc):
assert sc.detect_word_lang("xyzqwk") == "unknown"
assert sc.detect_word_lang("") == "unknown"
def test_english_sentence(self, sc):
assert sc.detect_text_lang("I go to school every day") == "en"
def test_german_sentence(self, sc):
assert sc.detect_text_lang("Ich gehe jeden Tag zur Schule") == "de"
def test_mixed_sentence(self, sc):
# Dominant language should win
lang = sc.detect_text_lang("I like to play Fußball with my Freunde")
assert lang in ("en", "both")
# ─── Single Word Correction ────────────────────────────────────────────────
class TestSingleWordCorrection:
def test_known_word_not_changed(self, sc):
assert sc.correct_word("school", "en") is None
assert sc.correct_word("Freund", "de") is None
def test_digit_letter_single(self, sc):
assert sc.correct_word("g0od", "en") == "good"
assert sc.correct_word("he1lo", "en") == "hello"
def test_digit_letter_multi(self, sc):
"""Multiple digit substitutions (e.g., sch00l)."""
result = sc.correct_word("sch00l", "en")
assert result == "school", f"Expected 'school', got '{result}'"
def test_pipe_to_I(self, sc):
assert sc.correct_word("|", "en") == "I"
def test_umlaut_schuler(self, sc):
assert sc.correct_word("Schuler", "de") == "Schüler"
def test_umlaut_uber(self, sc):
assert sc.correct_word("uber", "de") == "über"
def test_umlaut_bucher(self, sc):
assert sc.correct_word("Bucher", "de") == "Bücher"
def test_umlaut_turkei(self, sc):
assert sc.correct_word("Turkei", "de") == "Türkei"
def test_missing_char(self, sc):
assert sc.correct_word("beautful", "en") == "beautiful"
def test_transposition(self, sc):
assert sc.correct_word("teh", "en") == "the"
def test_swap(self, sc):
assert sc.correct_word("freind", "en") == "friend"
def test_no_false_correction_cross_lang(self, sc):
"""Don't correct a word that's valid in the other language.
'Schuler' in the EN column should NOT be corrected to 'Schuyler'
because 'Schüler' is valid German — it's likely a German word
that ended up in the wrong column (or is a surname).
"""
# Schuler is valid DE (after umlaut fix → Schüler), so
# in the EN column it should be left alone
result = sc.correct_word("Schuler", "en")
# Should either be None (no change) or not "Schuyler"
assert result != "Schuyler", "Should not false-correct German word in EN column"
# ─── a/I Disambiguation ──────────────────────────────────────────────────────
class TestAIDisambiguation:
def test_I_before_verb(self, sc):
assert sc._disambiguate_a_I("l", "am") == "I"
assert sc._disambiguate_a_I("l", "was") == "I"
assert sc._disambiguate_a_I("l", "think") == "I"
assert sc._disambiguate_a_I("l", "have") == "I"
assert sc._disambiguate_a_I("l", "don't") == "I"
def test_a_before_noun_adj(self, sc):
assert sc._disambiguate_a_I("a", "book") == "a"
assert sc._disambiguate_a_I("a", "cat") == "a"
assert sc._disambiguate_a_I("a", "big") == "a"
assert sc._disambiguate_a_I("a", "lot") == "a"
def test_uncertain_returns_none(self, sc):
"""When context is ambiguous, return None (don't change)."""
assert sc._disambiguate_a_I("l", "xyzqwk") is None
# ─── Full Text Correction ───────────────────────────────────────────────────
class TestFullTextCorrection:
def test_english_sentence(self, sc):
result = sc.correct_text("teh cat is beautful", "en")
assert result.changed
assert "the" in result.corrected
assert "beautiful" in result.corrected
def test_german_sentence_no_change(self, sc):
result = sc.correct_text("Ich gehe zur Schule", "de")
assert not result.changed
def test_german_umlaut_fix(self, sc):
result = sc.correct_text("Der Schuler liest Bucher", "de")
assert "Schüler" in result.corrected
assert "Bücher" in result.corrected
def test_preserves_punctuation(self, sc):
result = sc.correct_text("teh cat, beautful!", "en")
assert "," in result.corrected
assert "!" in result.corrected
def test_empty_text(self, sc):
result = sc.correct_text("", "en")
assert not result.changed
assert result.corrected == ""
# ─── Vocab Entry Correction ─────────────────────────────────────────────────
class TestVocabEntryCorrection:
def test_basic_entry(self, sc):
results = sc.correct_vocab_entry(
english="beautful",
german="schön",
)
assert results["english"].corrected == "beautiful"
assert results["german"].changed is False
def test_umlaut_in_german(self, sc):
results = sc.correct_vocab_entry(
english="school",
german="Schuler",
)
assert results["english"].changed is False
assert results["german"].corrected == "Schüler"
def test_example_auto_detect(self, sc):
results = sc.correct_vocab_entry(
english="friend",
german="Freund",
example="My best freind lives in Berlin",
)
assert "friend" in results["example"].corrected
# ─── Speed ─────────────────────────────────────────────────────────────────
class TestSpeed:
def test_100_corrections_under_500ms(self, sc):
"""100 word corrections should complete in under 500ms."""
import time
words = [
("beautful", "en"), ("teh", "en"), ("freind", "en"),
("homwork", "en"), ("yesturday", "en"),
("Schuler", "de"), ("Bucher", "de"), ("Turkei", "de"),
("uber", "de"), ("Ubung", "de"),
] * 10
t0 = time.time()
for word, lang in words:
sc.correct_word(word, lang)
dt = time.time() - t0
print(f"\n 100 corrections in {dt*1000:.0f}ms")
assert dt < 0.5, f"Too slow: {dt*1000:.0f}ms"

View File

@@ -0,0 +1,494 @@
"""
Benchmark: Spell-checking & language detection approaches for OCR post-correction.
Tests pyspellchecker (already used), symspellpy (candidate), and
dual-dictionary language detection heuristic on real vocabulary OCR data.
Run: pytest tests/test_spell_benchmark.py -v -s
"""
import time
import pytest
# ---------------------------------------------------------------------------
# Helpers
# ---------------------------------------------------------------------------
def _load_pyspellchecker():
from spellchecker import SpellChecker
en = SpellChecker(language='en', distance=1)
de = SpellChecker(language='de', distance=1)
return en, de
def _load_symspellpy():
"""Load symspellpy with English frequency dict (bundled)."""
from symspellpy import SymSpell, Verbosity
sym = SymSpell(max_dictionary_edit_distance=2)
# Use bundled English frequency dict
import pkg_resources
dict_path = pkg_resources.resource_filename("symspellpy", "frequency_dictionary_en_82_765.txt")
sym.load_dictionary(dict_path, term_index=0, count_index=1)
return sym, Verbosity
# ---------------------------------------------------------------------------
# Test data: (ocr_output, expected_correction, language, category)
# ---------------------------------------------------------------------------
OCR_TEST_CASES = [
# --- Single-char ambiguity ---
("l am a student", "I am a student", "en", "a_vs_I"),
("a book", "a book", "en", "a_vs_I"), # should NOT change
("I like cats", "I like cats", "en", "a_vs_I"), # should NOT change
("lt is raining", "It is raining", "en", "a_vs_I"), # l→I at start
# --- Digit-letter confusion ---
("g0od", "good", "en", "digit_letter"),
("sch00l", "school", "en", "digit_letter"),
("he1lo", "hello", "en", "digit_letter"),
("Sch0n", "Schon", "de", "digit_letter"), # German
# --- Umlaut drops ---
("schon", "schön", "de", "umlaut"), # context: "schon" is also valid DE!
("Schuler", "Schüler", "de", "umlaut"),
("uber", "über", "de", "umlaut"),
("Bucher", "Bücher", "de", "umlaut"),
("Turkei", "Türkei", "de", "umlaut"),
# --- Common OCR errors ---
("beautful", "beautiful", "en", "missing_char"),
("teh", "the", "en", "transposition"),
("becasue", "because", "en", "transposition"),
("freind", "friend", "en", "swap"),
("Freund", "Freund", "de", "correct"), # already correct
# --- Merged words ---
("atmyschool", "at my school", "en", "merged"),
("goodidea", "good idea", "en", "merged"),
# --- Mixed language example sentences ---
("I go to teh school", "I go to the school", "en", "sentence"),
("Ich gehe zur Schule", "Ich gehe zur Schule", "de", "sentence_correct"),
]
# Language detection test: (word, expected_language)
LANG_DETECT_CASES = [
# Clear English
("school", "en"),
("beautiful", "en"),
("homework", "en"),
("yesterday", "en"),
("children", "en"),
("because", "en"),
("environment", "en"),
("although", "en"),
# Clear German
("Schule", "de"),
("Hausaufgaben", "de"),
("Freundschaft", "de"),
("Umwelt", "de"),
("Kindergarten", "de"), # also used in English!
("Bücher", "de"),
("Straße", "de"),
("Entschuldigung", "de"),
# Ambiguous (exist in both)
("Hand", "both"),
("Finger", "both"),
("Arm", "both"),
("Name", "both"),
("Ball", "both"),
# Short/tricky
("a", "en"),
("I", "en"),
("in", "both"),
("an", "both"),
("the", "en"),
("die", "de"),
("der", "de"),
("to", "en"),
("zu", "de"),
]
# ===========================================================================
# Tests
# ===========================================================================
class TestPyspellchecker:
"""Test pyspellchecker capabilities for OCR correction."""
@pytest.fixture(autouse=True)
def setup(self):
self.en, self.de = _load_pyspellchecker()
def test_known_words(self):
"""Verify basic dictionary lookup."""
assert self.en.known(["school"])
assert self.en.known(["beautiful"])
assert self.de.known(["schule"]) # lowercase
assert self.de.known(["freund"])
# Not known
assert not self.en.known(["xyzqwk"])
assert not self.de.known(["xyzqwk"])
def test_correction_quality(self):
"""Test correction suggestions for OCR errors."""
results = []
for ocr, expected, lang, category in OCR_TEST_CASES:
if category in ("sentence", "sentence_correct", "merged", "a_vs_I"):
continue # skip multi-word cases
spell = self.en if lang == "en" else self.de
words = ocr.split()
corrected = []
for w in words:
if spell.known([w.lower()]):
corrected.append(w)
else:
fix = spell.correction(w.lower())
if fix and fix != w.lower():
# Preserve case
if w[0].isupper():
fix = fix[0].upper() + fix[1:]
corrected.append(fix)
else:
corrected.append(w)
result = " ".join(corrected)
ok = result == expected
results.append((ocr, expected, result, ok, category))
if not ok:
print(f" MISS: '{ocr}''{result}' (expected '{expected}') [{category}]")
else:
print(f" OK: '{ocr}''{result}' [{category}]")
correct = sum(1 for *_, ok, _ in results if ok)
total = len(results)
print(f"\npyspellchecker: {correct}/{total} correct ({100*correct/total:.0f}%)")
def test_language_detection_heuristic(self):
"""Test dual-dictionary language detection."""
results = []
for word, expected_lang in LANG_DETECT_CASES:
w = word.lower()
in_en = bool(self.en.known([w]))
in_de = bool(self.de.known([w]))
if in_en and in_de:
detected = "both"
elif in_en:
detected = "en"
elif in_de:
detected = "de"
else:
detected = "unknown"
ok = detected == expected_lang
results.append((word, expected_lang, detected, ok))
if not ok:
print(f" MISS: '{word}'{detected} (expected {expected_lang})")
else:
print(f" OK: '{word}'{detected}")
correct = sum(1 for *_, ok in results if ok)
total = len(results)
print(f"\nLang detection heuristic: {correct}/{total} correct ({100*correct/total:.0f}%)")
def test_umlaut_awareness(self):
"""Test if pyspellchecker suggests umlaut corrections."""
# "Schuler" should suggest "Schüler"
candidates = self.de.candidates("schuler")
print(f" 'schuler' candidates: {candidates}")
# "uber" should suggest "über"
candidates_uber = self.de.candidates("uber")
print(f" 'uber' candidates: {candidates_uber}")
# "Turkei" should suggest "Türkei"
candidates_turkei = self.de.candidates("turkei")
print(f" 'turkei' candidates: {candidates_turkei}")
def test_speed_100_words(self):
"""Measure correction speed for 100 words."""
words_en = ["beautful", "teh", "becasue", "freind", "shcool",
"homwork", "yesturday", "chilren", "becuse", "enviroment"] * 10
t0 = time.time()
for w in words_en:
self.en.correction(w)
dt = time.time() - t0
print(f"\n pyspellchecker: 100 EN corrections in {dt*1000:.0f}ms")
words_de = ["schuler", "bucher", "turkei", "strasze", "entschuldigung",
"kindergaten", "freumd", "hauaufgaben", "umwlt", "ubung"] * 10
t0 = time.time()
for w in words_de:
self.de.correction(w)
dt = time.time() - t0
print(f" pyspellchecker: 100 DE corrections in {dt*1000:.0f}ms")
class TestSymspellpy:
"""Test symspellpy as a faster alternative."""
@pytest.fixture(autouse=True)
def setup(self):
try:
self.sym, self.Verbosity = _load_symspellpy()
self.available = True
except (ImportError, FileNotFoundError) as e:
self.available = False
pytest.skip(f"symspellpy not installed: {e}")
def test_correction_quality(self):
"""Test symspellpy corrections (EN only — no DE dict bundled)."""
en_cases = [(o, e, c) for o, e, _, c in OCR_TEST_CASES
if _ == "en" and c not in ("sentence", "sentence_correct", "merged", "a_vs_I")]
results = []
for ocr, expected, category in en_cases:
suggestions = self.sym.lookup(ocr.lower(), self.Verbosity.CLOSEST, max_edit_distance=2)
if suggestions:
fix = suggestions[0].term
if ocr[0].isupper():
fix = fix[0].upper() + fix[1:]
result = fix
else:
result = ocr
ok = result == expected
results.append((ocr, expected, result, ok, category))
status = "OK" if ok else "MISS"
print(f" {status}: '{ocr}''{result}' (expected '{expected}') [{category}]")
correct = sum(1 for *_, ok, _ in results if ok)
total = len(results)
print(f"\nsymspellpy EN: {correct}/{total} correct ({100*correct/total:.0f}%)")
def test_speed_100_words(self):
"""Measure symspellpy correction speed for 100 words."""
words = ["beautful", "teh", "becasue", "freind", "shcool",
"homwork", "yesturday", "chilren", "becuse", "enviroment"] * 10
t0 = time.time()
for w in words:
self.sym.lookup(w, self.Verbosity.CLOSEST, max_edit_distance=2)
dt = time.time() - t0
print(f"\n symspellpy: 100 EN corrections in {dt*1000:.0f}ms")
def test_compound_segmentation(self):
"""Test symspellpy's word segmentation for merged words."""
cases = [
("atmyschool", "at my school"),
("goodidea", "good idea"),
("makeadecision", "make a decision"),
]
for merged, expected in cases:
result = self.sym.word_segmentation(merged)
ok = result.corrected_string == expected
status = "OK" if ok else "MISS"
print(f" {status}: '{merged}''{result.corrected_string}' (expected '{expected}')")
class TestContextDisambiguation:
"""Test context-based disambiguation for a/I and similar cases."""
@pytest.fixture(autouse=True)
def setup(self):
self.en, self.de = _load_pyspellchecker()
def test_bigram_context(self):
"""Use simple bigram heuristic for a/I disambiguation.
Approach: check if 'a <next_word>' or 'I <next_word>' is more
common by checking if <next_word> is a noun (follows 'a') or
verb (follows 'I').
"""
# Common words that follow "I" (verbs)
i_followers = {"am", "was", "have", "had", "do", "did", "will",
"would", "can", "could", "should", "shall", "may",
"might", "think", "know", "see", "want", "need",
"like", "love", "hate", "go", "went", "come",
"came", "say", "said", "get", "got", "make", "made",
"take", "took", "give", "gave", "tell", "told",
"feel", "felt", "find", "found", "believe", "hope",
"remember", "forget", "understand", "mean", "meant",
"don't", "didn't", "can't", "won't", "couldn't",
"shouldn't", "wouldn't", "haven't", "hadn't"}
# Common words that follow "a" (nouns/adjectives)
a_followers = {"lot", "few", "little", "bit", "good", "bad",
"big", "small", "great", "new", "old", "long",
"short", "man", "woman", "boy", "girl", "dog",
"cat", "book", "car", "house", "day", "year",
"nice", "beautiful", "large", "huge", "tiny"}
def disambiguate_a_I(token: str, next_word: str) -> str:
"""Given an ambiguous 'a' or 'I' (or 'l'), pick the right one."""
nw = next_word.lower()
if nw in i_followers:
return "I"
if nw in a_followers:
return "a"
# Fallback: if next word is known verb → I, known adj/noun → a
# For now, use a simple heuristic: lowercase → "a", uppercase first letter → "I"
return token # no change if uncertain
cases = [
("l", "am", "I"),
("l", "was", "I"),
("l", "think", "I"),
("a", "book", "a"),
("a", "cat", "a"),
("a", "lot", "a"),
("l", "big", "a"), # "a big ..."
("a", "have", "I"), # "I have ..."
]
results = []
for token, next_word, expected in cases:
result = disambiguate_a_I(token, next_word)
ok = result == expected
results.append((token, next_word, expected, result, ok))
status = "OK" if ok else "MISS"
print(f" {status}: '{token} {next_word}...''{result}' (expected '{expected}')")
correct = sum(1 for *_, ok in results if ok)
total = len(results)
print(f"\na/I disambiguation: {correct}/{total} correct ({100*correct/total:.0f}%)")
class TestLangDetectLibrary:
"""Test py3langid or langdetect if available."""
def test_py3langid(self):
try:
import langid
except ImportError:
pytest.skip("langid not installed")
sentences = [
("I go to school every day", "en"),
("Ich gehe jeden Tag zur Schule", "de"),
("The weather is nice today", "en"),
("Das Wetter ist heute schön", "de"),
("She likes to play football", "en"),
("Er spielt gerne Fußball", "de"),
]
results = []
for text, expected in sentences:
lang, confidence = langid.classify(text)
ok = lang == expected
results.append(ok)
status = "OK" if ok else "MISS"
print(f" {status}: '{text[:40]}...'{lang} ({confidence:.2f}) (expected {expected})")
correct = sum(results)
print(f"\nlangid sentence detection: {correct}/{len(results)} correct")
def test_langid_single_words(self):
"""langid on single words — expected to be unreliable."""
try:
import langid
except ImportError:
pytest.skip("langid not installed")
words = [("school", "en"), ("Schule", "de"), ("book", "en"),
("Buch", "de"), ("car", "en"), ("Auto", "de"),
("a", "en"), ("I", "en"), ("der", "de"), ("the", "en")]
results = []
for word, expected in words:
lang, conf = langid.classify(word)
ok = lang == expected
results.append(ok)
status = "OK" if ok else "MISS"
print(f" {status}: '{word}'{lang} ({conf:.2f}) (expected {expected})")
correct = sum(results)
print(f"\nlangid single-word: {correct}/{len(results)} correct")
class TestIntegratedApproach:
"""Test the combined approach: dict-heuristic for lang + spell correction."""
@pytest.fixture(autouse=True)
def setup(self):
self.en, self.de = _load_pyspellchecker()
def detect_language(self, word: str) -> str:
"""Dual-dict heuristic language detection."""
w = word.lower()
# Skip very short words — too ambiguous
if len(w) <= 2:
return "ambiguous"
in_en = bool(self.en.known([w]))
in_de = bool(self.de.known([w]))
if in_en and in_de:
return "both"
if in_en:
return "en"
if in_de:
return "de"
return "unknown"
def correct_word(self, word: str, expected_lang: str) -> str:
"""Correct a single word given the expected language."""
w_lower = word.lower()
spell = self.en if expected_lang == "en" else self.de
# Already known
if spell.known([w_lower]):
return word
# Also check the other language — might be fine
other = self.de if expected_lang == "en" else self.en
if other.known([w_lower]):
return word # valid in the other language
# Try correction
fix = spell.correction(w_lower)
if fix and fix != w_lower:
if word[0].isupper():
fix = fix[0].upper() + fix[1:]
return fix
return word
def test_full_pipeline(self):
"""Test: detect language → correct with appropriate dict."""
vocab_entries = [
# (english_col, german_col, expected_en, expected_de)
("beautful", "schön", "beautiful", "schön"),
("school", "Schule", "school", "Schule"),
("teh cat", "die Katze", "the cat", "die Katze"),
("freind", "Freund", "friend", "Freund"),
("homwork", "Hausaufgaben", "homework", "Hausaufgaben"),
("Schuler", "Schuler", "Schuler", "Schüler"), # DE umlaut: Schüler
]
en_correct = 0
de_correct = 0
total = len(vocab_entries)
for en_ocr, de_ocr, exp_en, exp_de in vocab_entries:
# Correct each word in the column
en_words = en_ocr.split()
de_words = de_ocr.split()
en_fixed = " ".join(self.correct_word(w, "en") for w in en_words)
de_fixed = " ".join(self.correct_word(w, "de") for w in de_words)
en_ok = en_fixed == exp_en
de_ok = de_fixed == exp_de
en_correct += en_ok
de_correct += de_ok
en_status = "OK" if en_ok else "MISS"
de_status = "OK" if de_ok else "MISS"
print(f" EN {en_status}: '{en_ocr}''{en_fixed}' (expected '{exp_en}')")
print(f" DE {de_status}: '{de_ocr}''{de_fixed}' (expected '{exp_de}')")
print(f"\nEN corrections: {en_correct}/{total} correct")
print(f"DE corrections: {de_correct}/{total} correct")

View File

@@ -0,0 +1,57 @@
'use client'
import React from 'react'
import type { VocabWorksheetHook } from '../types'
export function ExportTab({ h }: { h: VocabWorksheetHook }) {
const { isDark, glassCard } = h
return (
<div className={`${glassCard} rounded-2xl p-6`}>
<h2 className={`text-lg font-semibold mb-4 ${isDark ? 'text-white' : 'text-slate-900'}`}>PDF herunterladen</h2>
{h.worksheetId ? (
<div className="space-y-4">
<div className={`p-4 rounded-xl ${isDark ? 'bg-green-500/20 border border-green-500/30' : 'bg-green-100 border border-green-200'}`}>
<div className="flex items-center gap-3">
<svg className="w-6 h-6 text-green-500" fill="none" stroke="currentColor" viewBox="0 0 24 24">
<path strokeLinecap="round" strokeLinejoin="round" strokeWidth={2} d="M5 13l4 4L19 7" />
</svg>
<span className={`font-medium ${isDark ? 'text-green-200' : 'text-green-700'}`}>Arbeitsblatt erfolgreich generiert!</span>
</div>
</div>
<div className="grid grid-cols-2 gap-4">
<button onClick={() => h.downloadPDF('worksheet')} className={`${glassCard} p-6 rounded-xl text-left transition-all hover:shadow-lg ${isDark ? 'hover:border-purple-400/50' : 'hover:border-purple-500'}`}>
<div className={`w-12 h-12 mb-3 rounded-xl flex items-center justify-center ${isDark ? 'bg-purple-500/30' : 'bg-purple-100'}`}>
<svg className={`w-6 h-6 ${isDark ? 'text-purple-300' : 'text-purple-600'}`} fill="none" stroke="currentColor" viewBox="0 0 24 24">
<path strokeLinecap="round" strokeLinejoin="round" strokeWidth={1.5} d="M12 10v6m0 0l-3-3m3 3l3-3m2 8H7a2 2 0 01-2-2V5a2 2 0 012-2h5.586a1 1 0 01.707.293l5.414 5.414a1 1 0 01.293.707V19a2 2 0 01-2 2z" />
</svg>
</div>
<h3 className={`font-semibold mb-1 ${isDark ? 'text-white' : 'text-slate-900'}`}>Arbeitsblatt</h3>
<p className={`text-sm ${isDark ? 'text-white/60' : 'text-slate-500'}`}>PDF zum Ausdrucken</p>
</button>
{h.includeSolutions && (
<button onClick={() => h.downloadPDF('solution')} className={`${glassCard} p-6 rounded-xl text-left transition-all hover:shadow-lg ${isDark ? 'hover:border-green-400/50' : 'hover:border-green-500'}`}>
<div className={`w-12 h-12 mb-3 rounded-xl flex items-center justify-center ${isDark ? 'bg-green-500/30' : 'bg-green-100'}`}>
<svg className={`w-6 h-6 ${isDark ? 'text-green-300' : 'text-green-600'}`} fill="none" stroke="currentColor" viewBox="0 0 24 24">
<path strokeLinecap="round" strokeLinejoin="round" strokeWidth={1.5} d="M9 12l2 2 4-4m6 2a9 9 0 11-18 0 9 9 0 0118 0z" />
</svg>
</div>
<h3 className={`font-semibold mb-1 ${isDark ? 'text-white' : 'text-slate-900'}`}>Loesungsblatt</h3>
<p className={`text-sm ${isDark ? 'text-white/60' : 'text-slate-500'}`}>PDF mit Loesungen</p>
</button>
)}
</div>
<button onClick={h.resetSession} className={`w-full py-3 rounded-xl border font-medium transition-colors ${isDark ? 'border-white/20 text-white/80 hover:bg-white/10' : 'border-slate-300 text-slate-700 hover:bg-slate-50'}`}>
Neues Arbeitsblatt erstellen
</button>
</div>
) : (
<p className={`text-center py-12 ${isDark ? 'text-white/60' : 'text-slate-500'}`}>Noch kein Arbeitsblatt generiert.</p>
)}
</div>
)
}

View File

@@ -0,0 +1,39 @@
'use client'
import React from 'react'
import type { VocabWorksheetHook } from '../types'
export function FullscreenPreview({ h }: { h: VocabWorksheetHook }) {
return (
<div className="fixed inset-0 z-50 bg-black/80 backdrop-blur-sm flex items-center justify-center" onClick={() => h.setShowFullPreview(false)}>
<button
onClick={() => h.setShowFullPreview(false)}
className="absolute top-4 right-4 p-2 rounded-full bg-white/10 hover:bg-white/20 text-white z-10 transition-colors"
>
<svg className="w-6 h-6" fill="none" stroke="currentColor" viewBox="0 0 24 24">
<path strokeLinecap="round" strokeLinejoin="round" strokeWidth={2} d="M6 18L18 6M6 6l12 12" />
</svg>
</button>
<div className="max-w-[95vw] max-h-[95vh] overflow-auto" onClick={(e) => e.stopPropagation()}>
{h.directFile?.type.startsWith('image/') && h.directFilePreview && (
<img src={h.directFilePreview} alt="Original" className="max-w-none" />
)}
{h.directFile?.type === 'application/pdf' && h.directFilePreview && (
<iframe src={h.directFilePreview} className="border-0 rounded-xl bg-white" style={{ width: '90vw', height: '90vh' }} />
)}
{h.selectedMobileFile && !h.directFile && (
h.selectedMobileFile.type.startsWith('image/')
? <img src={h.selectedMobileFile.dataUrl} alt="Original" className="max-w-none" />
: <iframe src={h.selectedMobileFile.dataUrl} className="border-0 rounded-xl bg-white" style={{ width: '90vw', height: '90vh' }} />
)}
{h.selectedDocumentId && !h.directFile && !h.selectedMobileFile && (() => {
const doc = h.storedDocuments.find(d => d.id === h.selectedDocumentId)
if (!doc?.url) return null
return doc.type.startsWith('image/')
? <img src={doc.url} alt="Original" className="max-w-none" />
: <iframe src={doc.url} className="border-0 rounded-xl bg-white" style={{ width: '90vw', height: '90vh' }} />
})()}
</div>
</div>
)
}

View File

@@ -0,0 +1,135 @@
'use client'
import React from 'react'
import type { VocabWorksheetHook } from '../types'
export function OcrComparisonModal({ h }: { h: VocabWorksheetHook }) {
const { isDark, glassCard } = h
return (
<div className="fixed inset-0 z-50 flex items-center justify-center p-4 bg-black/50 backdrop-blur-sm">
<div className={`relative w-full max-w-6xl max-h-[90vh] overflow-auto rounded-3xl ${glassCard} p-6`}>
{/* Header */}
<div className="flex items-center justify-between mb-6">
<div>
<h2 className={`text-xl font-bold ${isDark ? 'text-white' : 'text-slate-900'}`}>
OCR-Methoden Vergleich
</h2>
<p className={`text-sm ${isDark ? 'text-white/60' : 'text-slate-500'}`}>
Seite {h.ocrComparePageIndex !== null ? h.ocrComparePageIndex + 1 : '-'}
</p>
</div>
<button
onClick={() => h.setShowOcrComparison(false)}
className={`p-2 rounded-xl ${isDark ? 'hover:bg-white/10 text-white' : 'hover:bg-black/5 text-slate-500'}`}
>
<svg className="w-6 h-6" fill="none" stroke="currentColor" viewBox="0 0 24 24">
<path strokeLinecap="round" strokeLinejoin="round" strokeWidth={2} d="M6 18L18 6M6 6l12 12" />
</svg>
</button>
</div>
{/* Loading State */}
{h.isComparingOcr && (
<div className="flex flex-col items-center justify-center py-12">
<div className="w-12 h-12 border-4 border-purple-500 border-t-transparent rounded-full animate-spin mb-4" />
<p className={isDark ? 'text-white/60' : 'text-slate-500'}>
Vergleiche OCR-Methoden... (kann 1-2 Minuten dauern)
</p>
</div>
)}
{/* Error State */}
{h.ocrCompareError && (
<div className={`p-4 rounded-xl ${isDark ? 'bg-red-500/20 text-red-300' : 'bg-red-100 text-red-700'}`}>
Fehler: {h.ocrCompareError}
</div>
)}
{/* Results */}
{h.ocrCompareResult && !h.isComparingOcr && (
<div className="space-y-6">
{/* Method Results Grid */}
<div className="grid grid-cols-1 md:grid-cols-2 lg:grid-cols-3 gap-4">
{Object.entries(h.ocrCompareResult.methods || {}).map(([key, method]: [string, any]) => (
<div
key={key}
className={`p-4 rounded-2xl ${
h.ocrCompareResult.recommendation?.best_method === key
? (isDark ? 'bg-green-500/20 border border-green-500/50' : 'bg-green-100 border border-green-300')
: (isDark ? 'bg-white/5 border border-white/10' : 'bg-white/50 border border-black/10')
}`}
>
<div className="flex items-center justify-between mb-3">
<h3 className={`font-semibold ${isDark ? 'text-white' : 'text-slate-900'}`}>
{method.name}
</h3>
{h.ocrCompareResult.recommendation?.best_method === key && (
<span className="px-2 py-1 text-xs font-medium bg-green-500 text-white rounded-full">
Beste
</span>
)}
</div>
{method.success ? (
<>
<div className={`text-sm mb-2 ${isDark ? 'text-white/60' : 'text-slate-500'}`}>
<span className="font-medium">{method.vocabulary_count}</span> Vokabeln in <span className="font-medium">{method.duration_seconds}s</span>
</div>
{method.vocabulary && method.vocabulary.length > 0 && (
<div className={`max-h-48 overflow-y-auto rounded-xl p-2 ${isDark ? 'bg-black/20' : 'bg-white/50'}`}>
{method.vocabulary.slice(0, 10).map((v: any, idx: number) => (
<div key={idx} className={`text-sm py-1 border-b last:border-0 ${isDark ? 'border-white/10 text-white/80' : 'border-black/5 text-slate-700'}`}>
<span className="font-medium">{v.english}</span> = {v.german}
</div>
))}
{method.vocabulary.length > 10 && (
<div className={`text-xs mt-2 ${isDark ? 'text-white/40' : 'text-slate-400'}`}>
+ {method.vocabulary.length - 10} weitere...
</div>
)}
</div>
)}
</>
) : (
<div className={`text-sm ${isDark ? 'text-red-300' : 'text-red-600'}`}>
{method.error || 'Fehler'}
</div>
)}
</div>
))}
</div>
{/* Comparison Summary */}
{h.ocrCompareResult.comparison && (
<div className={`p-4 rounded-2xl ${isDark ? 'bg-blue-500/20 border border-blue-500/30' : 'bg-blue-100 border border-blue-200'}`}>
<h3 className={`font-semibold mb-3 ${isDark ? 'text-blue-300' : 'text-blue-900'}`}>
Uebereinstimmung
</h3>
<div className="grid grid-cols-2 md:grid-cols-4 gap-4 text-sm">
<div>
<span className={isDark ? 'text-blue-200' : 'text-blue-700'}>Von allen erkannt:</span>
<span className="ml-2 font-bold">{h.ocrCompareResult.comparison.found_by_all_methods?.length || 0}</span>
</div>
<div>
<span className={isDark ? 'text-blue-200' : 'text-blue-700'}>Nur teilweise:</span>
<span className="ml-2 font-bold">{h.ocrCompareResult.comparison.found_by_some_methods?.length || 0}</span>
</div>
<div>
<span className={isDark ? 'text-blue-200' : 'text-blue-700'}>Gesamt einzigartig:</span>
<span className="ml-2 font-bold">{h.ocrCompareResult.comparison.total_unique_vocabulary || 0}</span>
</div>
<div>
<span className={isDark ? 'text-blue-200' : 'text-blue-700'}>Uebereinstimmung:</span>
<span className="ml-2 font-bold">{Math.round((h.ocrCompareResult.comparison.agreement_rate || 0) * 100)}%</span>
</div>
</div>
</div>
)}
</div>
)}
</div>
</div>
)
}

View File

@@ -0,0 +1,125 @@
'use client'
import React from 'react'
import type { VocabWorksheetHook } from '../types'
import { defaultOcrPrompts } from '../constants'
export function OcrSettingsPanel({ h }: { h: VocabWorksheetHook }) {
const { isDark, glassCard, glassInput } = h
return (
<div className={`${glassCard} rounded-2xl p-6 mb-6`}>
<div className="flex items-center justify-between mb-4">
<h2 className={`text-lg font-semibold ${isDark ? 'text-white' : 'text-slate-900'}`}>
OCR-Filter Einstellungen
</h2>
<button
onClick={() => h.setShowSettings(false)}
className={`p-1 rounded-lg ${isDark ? 'hover:bg-white/10 text-white/60' : 'hover:bg-black/5 text-slate-500'}`}
>
<svg className="w-5 h-5" fill="none" stroke="currentColor" viewBox="0 0 24 24">
<path strokeLinecap="round" strokeLinejoin="round" strokeWidth={2} d="M6 18L18 6M6 6l12 12" />
</svg>
</button>
</div>
<div className={`p-4 rounded-xl mb-4 ${isDark ? 'bg-blue-500/20 text-blue-200' : 'bg-blue-100 text-blue-800'}`}>
<p className="text-sm">
Diese Einstellungen helfen, unerwuenschte Elemente wie Seitenzahlen, Kapitelnamen oder Kopfzeilen aus dem OCR-Ergebnis zu filtern.
</p>
</div>
<div className="grid grid-cols-1 md:grid-cols-2 gap-6">
{/* Checkboxes */}
<div className="space-y-3">
<label className={`flex items-center gap-3 cursor-pointer ${isDark ? 'text-white' : 'text-slate-900'}`}>
<input
type="checkbox"
checked={h.ocrPrompts.filterHeaders}
onChange={(e) => h.saveOcrPrompts({ ...h.ocrPrompts, filterHeaders: e.target.checked })}
className="w-5 h-5 rounded border-2 border-purple-500 text-purple-500 focus:ring-purple-500"
/>
<span>Kopfzeilen filtern (z.B. Kapitelnamen)</span>
</label>
<label className={`flex items-center gap-3 cursor-pointer ${isDark ? 'text-white' : 'text-slate-900'}`}>
<input
type="checkbox"
checked={h.ocrPrompts.filterFooters}
onChange={(e) => h.saveOcrPrompts({ ...h.ocrPrompts, filterFooters: e.target.checked })}
className="w-5 h-5 rounded border-2 border-purple-500 text-purple-500 focus:ring-purple-500"
/>
<span>Fusszeilen filtern</span>
</label>
<label className={`flex items-center gap-3 cursor-pointer ${isDark ? 'text-white' : 'text-slate-900'}`}>
<input
type="checkbox"
checked={h.ocrPrompts.filterPageNumbers}
onChange={(e) => h.saveOcrPrompts({ ...h.ocrPrompts, filterPageNumbers: e.target.checked })}
className="w-5 h-5 rounded border-2 border-purple-500 text-purple-500 focus:ring-purple-500"
/>
<span>Seitenzahlen filtern (auch ausgeschrieben: &quot;zweihundertzwoelf&quot;)</span>
</label>
</div>
{/* Patterns */}
<div className="space-y-4">
<div>
<label className={`block text-sm font-medium mb-2 ${isDark ? 'text-white/70' : 'text-slate-600'}`}>
Kopfzeilen-Muster (kommagetrennt)
</label>
<input
type="text"
value={h.ocrPrompts.headerPatterns.join(', ')}
onChange={(e) => h.saveOcrPrompts({
...h.ocrPrompts,
headerPatterns: e.target.value.split(',').map(s => s.trim()).filter(Boolean)
})}
placeholder="Unit, Chapter, Lesson..."
className={`w-full px-4 py-2 rounded-xl border ${glassInput} focus:outline-none focus:ring-2 focus:ring-purple-500`}
/>
</div>
<div>
<label className={`block text-sm font-medium mb-2 ${isDark ? 'text-white/70' : 'text-slate-600'}`}>
Fusszeilen-Muster (kommagetrennt)
</label>
<input
type="text"
value={h.ocrPrompts.footerPatterns.join(', ')}
onChange={(e) => h.saveOcrPrompts({
...h.ocrPrompts,
footerPatterns: e.target.value.split(',').map(s => s.trim()).filter(Boolean)
})}
placeholder="zweihundert, Page, Seite..."
className={`w-full px-4 py-2 rounded-xl border ${glassInput} focus:outline-none focus:ring-2 focus:ring-purple-500`}
/>
</div>
</div>
</div>
<div className="mt-4">
<label className={`block text-sm font-medium mb-2 ${isDark ? 'text-white/70' : 'text-slate-600'}`}>
Zusaetzlicher Filter-Prompt (optional)
</label>
<textarea
value={h.ocrPrompts.customFilter}
onChange={(e) => h.saveOcrPrompts({ ...h.ocrPrompts, customFilter: e.target.value })}
placeholder="z.B.: Ignoriere alle Zeilen, die nur Zahlen oder Buchstaben enthalten..."
rows={2}
className={`w-full px-4 py-2 rounded-xl border ${glassInput} focus:outline-none focus:ring-2 focus:ring-purple-500 resize-none`}
/>
</div>
<div className="mt-4 flex justify-end">
<button
onClick={() => h.saveOcrPrompts(defaultOcrPrompts)}
className={`px-4 py-2 rounded-xl text-sm ${isDark ? 'text-white/60 hover:text-white' : 'text-slate-500 hover:text-slate-700'}`}
>
Auf Standard zuruecksetzen
</button>
</div>
</div>
)
}

View File

@@ -0,0 +1,108 @@
'use client'
import React from 'react'
import type { VocabWorksheetHook } from '../types'
export function PageSelection({ h }: { h: VocabWorksheetHook }) {
const { isDark, glassCard } = h
return (
<div className={`${glassCard} rounded-2xl p-6`}>
<div className="flex items-center justify-between mb-4">
<h2 className={`text-lg font-semibold ${isDark ? 'text-white' : 'text-slate-900'}`}>
PDF-Seiten auswaehlen ({h.selectedPages.length} von {h.pdfPageCount - h.excludedPages.length} ausgewaehlt)
</h2>
<div className="flex gap-2">
{h.excludedPages.length > 0 && (
<button onClick={h.restoreExcludedPages} className={`px-3 py-1 rounded-lg text-sm ${isDark ? 'bg-orange-500/20 text-orange-300 hover:bg-orange-500/30' : 'bg-orange-100 text-orange-700 hover:bg-orange-200'}`}>
{h.excludedPages.length} ausgeblendet - wiederherstellen
</button>
)}
<button onClick={h.selectAllPages} className={`px-3 py-1 rounded-lg text-sm transition-colors ${isDark ? 'bg-white/10 hover:bg-white/20 text-white' : 'bg-slate-100 hover:bg-slate-200 text-slate-900'}`}>
Alle
</button>
<button onClick={h.selectNoPages} className={`px-3 py-1 rounded-lg text-sm transition-colors ${isDark ? 'bg-white/10 hover:bg-white/20 text-white' : 'bg-slate-100 hover:bg-slate-200 text-slate-900'}`}>
Keine
</button>
</div>
</div>
<p className={`text-sm mb-4 ${isDark ? 'text-white/60' : 'text-slate-500'}`}>
Klicken Sie auf eine Seite um sie auszuwaehlen. Klicken Sie auf das X um leere Seiten auszublenden.
</p>
{h.isLoadingThumbnails ? (
<div className="flex items-center justify-center py-12">
<div className="w-8 h-8 border-4 border-purple-500 border-t-transparent rounded-full animate-spin" />
<span className={`ml-3 ${isDark ? 'text-white/60' : 'text-slate-500'}`}>Lade Seitenvorschau...</span>
</div>
) : (
<div className="grid grid-cols-2 sm:grid-cols-3 md:grid-cols-4 lg:grid-cols-6 gap-4 mb-6">
{h.pagesThumbnails.map((thumb, idx) => {
if (h.excludedPages.includes(idx)) return null
return (
<div key={idx} className="relative group">
{/* Exclude/Delete Button */}
<button
onClick={(e) => h.excludePage(idx, e)}
className="absolute top-1 left-1 z-10 p-1 rounded-full opacity-0 group-hover:opacity-100 transition-opacity bg-red-500/80 hover:bg-red-600 text-white"
title="Seite ausblenden"
>
<svg className="w-3 h-3" fill="none" stroke="currentColor" viewBox="0 0 24 24">
<path strokeLinecap="round" strokeLinejoin="round" strokeWidth={2} d="M6 18L18 6M6 6l12 12" />
</svg>
</button>
{/* OCR Compare Button */}
<button
onClick={(e) => { e.stopPropagation(); h.runOcrComparison(idx); }}
className="absolute top-1 right-1 z-10 p-1 rounded-full opacity-0 group-hover:opacity-100 transition-opacity bg-blue-500/80 hover:bg-blue-600 text-white"
title="OCR-Methoden vergleichen"
>
<svg className="w-3 h-3" fill="none" stroke="currentColor" viewBox="0 0 24 24">
<path strokeLinecap="round" strokeLinejoin="round" strokeWidth={2} d="M9 19v-6a2 2 0 00-2-2H5a2 2 0 00-2 2v6a2 2 0 002 2h2a2 2 0 002-2zm0 0V9a2 2 0 012-2h2a2 2 0 012 2v10m-6 0a2 2 0 002 2h2a2 2 0 002-2m0 0V5a2 2 0 012-2h2a2 2 0 012 2v14a2 2 0 01-2 2h-2a2 2 0 01-2-2z" />
</svg>
</button>
<button
onClick={() => h.togglePageSelection(idx)}
className={`relative rounded-xl overflow-hidden border-2 transition-all w-full ${
h.selectedPages.includes(idx)
? 'border-purple-500 ring-2 ring-purple-500/50'
: (isDark ? 'border-white/20 hover:border-white/40' : 'border-slate-200 hover:border-slate-300')
}`}
>
<img src={thumb} alt={`Seite ${idx + 1}`} className="w-full h-auto" />
<div className={`absolute bottom-0 left-0 right-0 py-1 text-center text-xs font-medium ${
h.selectedPages.includes(idx)
? 'bg-purple-500 text-white'
: (isDark ? 'bg-black/60 text-white/80' : 'bg-white/90 text-slate-700')
}`}>
Seite {idx + 1}
</div>
{h.selectedPages.includes(idx) && (
<div className="absolute top-2 right-2 w-6 h-6 bg-purple-500 rounded-full flex items-center justify-center">
<svg className="w-4 h-4 text-white" fill="none" stroke="currentColor" viewBox="0 0 24 24">
<path strokeLinecap="round" strokeLinejoin="round" strokeWidth={2} d="M5 13l4 4L19 7" />
</svg>
</div>
)}
</button>
</div>
)
})}
</div>
)}
<div className="flex justify-center">
<button
onClick={h.processSelectedPages}
disabled={h.selectedPages.length === 0 || h.isExtracting}
className="px-8 py-4 bg-gradient-to-r from-purple-500 to-pink-500 text-white rounded-2xl font-semibold disabled:opacity-50 hover:shadow-xl hover:shadow-purple-500/30 transition-all transform hover:scale-105"
>
{h.isExtracting ? 'Extrahiere Vokabeln...' : `${h.selectedPages.length} Seiten verarbeiten`}
</button>
</div>
</div>
)
}

View File

@@ -0,0 +1,31 @@
'use client'
import React from 'react'
import { QRCodeUpload } from '@/components/QRCodeUpload'
import type { VocabWorksheetHook } from '../types'
export function QRCodeModal({ h }: { h: VocabWorksheetHook }) {
const { isDark } = h
return (
<div className="fixed inset-0 z-50 flex items-center justify-center p-4">
<div className="absolute inset-0 bg-black/50 backdrop-blur-sm" onClick={() => h.setShowQRModal(false)} />
<div className={`relative w-full max-w-md rounded-3xl ${
isDark ? 'bg-slate-900' : 'bg-white'
}`}>
<QRCodeUpload
sessionId={h.uploadSessionId}
onClose={() => h.setShowQRModal(false)}
onFilesChanged={(files) => {
h.setMobileUploadedFiles(files)
if (files.length > 0) {
h.setSelectedMobileFile(files[files.length - 1])
h.setDirectFile(null)
h.setSelectedDocumentId(null)
}
}}
/>
</div>
</div>
)
}

View File

@@ -0,0 +1,315 @@
'use client'
import React from 'react'
import type { VocabWorksheetHook } from '../types'
import { formatFileSize } from '../constants'
export function UploadScreen({ h }: { h: VocabWorksheetHook }) {
const { isDark, glassCard, glassInput } = h
return (
<div className="space-y-6">
{/* Existing Sessions */}
{h.existingSessions.length > 0 && (
<div className={`${glassCard} rounded-2xl p-6`}>
<h2 className={`text-lg font-semibold mb-4 ${isDark ? 'text-white' : 'text-slate-900'}`}>
Vorhandene Sessions fortsetzen
</h2>
{h.isLoadingSessions ? (
<div className="flex items-center gap-3 py-4">
<div className="w-5 h-5 border-2 border-purple-500 border-t-transparent rounded-full animate-spin" />
<span className={isDark ? 'text-white/60' : 'text-slate-500'}>Lade Sessions...</span>
</div>
) : (
<div className="grid grid-cols-1 md:grid-cols-2 lg:grid-cols-3 gap-4">
{h.existingSessions.map((s) => (
<div
key={s.id}
className={`${glassCard} p-4 rounded-xl text-left transition-all hover:shadow-lg relative group cursor-pointer ${
isDark ? 'hover:border-purple-400/50' : 'hover:border-purple-400'
}`}
onClick={() => h.resumeSession(s)}
>
{/* Delete Button */}
<button
onClick={(e) => h.deleteSession(s.id, e)}
className={`absolute top-2 right-2 p-1.5 rounded-lg opacity-0 group-hover:opacity-100 transition-opacity ${
isDark ? 'hover:bg-red-500/20 text-red-400' : 'hover:bg-red-100 text-red-500'
}`}
title="Session loeschen"
>
<svg className="w-4 h-4" fill="none" stroke="currentColor" viewBox="0 0 24 24">
<path strokeLinecap="round" strokeLinejoin="round" strokeWidth={2} d="M19 7l-.867 12.142A2 2 0 0116.138 21H7.862a2 2 0 01-1.995-1.858L5 7m5 4v6m4-6v6m1-10V4a1 1 0 00-1-1h-4a1 1 0 00-1 1v3M4 7h16" />
</svg>
</button>
<div className="flex items-start gap-3">
<div className={`w-10 h-10 rounded-lg flex items-center justify-center flex-shrink-0 ${
s.status === 'extracted' || s.status === 'completed'
? (isDark ? 'bg-green-500/30' : 'bg-green-100')
: (isDark ? 'bg-white/10' : 'bg-slate-100')
}`}>
{s.status === 'extracted' || s.status === 'completed' ? (
<svg className="w-5 h-5 text-green-500" fill="none" stroke="currentColor" viewBox="0 0 24 24">
<path strokeLinecap="round" strokeLinejoin="round" strokeWidth={2} d="M5 13l4 4L19 7" />
</svg>
) : (
<svg className={`w-5 h-5 ${isDark ? 'text-white/40' : 'text-slate-400'}`} fill="none" stroke="currentColor" viewBox="0 0 24 24">
<path strokeLinecap="round" strokeLinejoin="round" strokeWidth={2} d="M12 6v6m0 0v6m0-6h6m-6 0H6" />
</svg>
)}
</div>
<div className="flex-1 min-w-0">
<h3 className={`font-medium truncate ${isDark ? 'text-white' : 'text-slate-900'}`}>{s.name}</h3>
<p className={`text-sm ${isDark ? 'text-white/60' : 'text-slate-500'}`}>
{s.vocabulary_count} Vokabeln
{s.status === 'pending' && ' • Nicht gestartet'}
{s.status === 'extracted' && ' • Bereit'}
{s.status === 'completed' && ' • Abgeschlossen'}
</p>
{s.created_at && (
<p className={`text-xs mt-1 ${isDark ? 'text-white/40' : 'text-slate-400'}`}>
{new Date(s.created_at).toLocaleDateString('de-DE', {
day: '2-digit',
month: '2-digit',
year: 'numeric',
hour: '2-digit',
minute: '2-digit'
})}
</p>
)}
</div>
<svg className={`w-5 h-5 flex-shrink-0 ${isDark ? 'text-white/30' : 'text-slate-300'}`} fill="none" stroke="currentColor" viewBox="0 0 24 24">
<path strokeLinecap="round" strokeLinejoin="round" strokeWidth={2} d="M9 5l7 7-7 7" />
</svg>
</div>
</div>
))}
</div>
)}
</div>
)}
{/* Explanation */}
<div className={`${glassCard} rounded-2xl p-6 ${isDark ? 'bg-gradient-to-br from-purple-500/20 to-pink-500/20' : 'bg-gradient-to-br from-purple-100/50 to-pink-100/50'}`}>
<h2 className={`text-lg font-semibold mb-3 ${isDark ? 'text-white' : 'text-slate-900'}`}>
{h.existingSessions.length > 0 ? 'Oder neue Session starten:' : 'So funktioniert es:'}
</h2>
<ol className={`space-y-2 ${isDark ? 'text-white/70' : 'text-slate-600'}`}>
{['Dokument (Bild oder PDF) auswaehlen', 'Vorschau pruefen und Session benennen', 'Bei PDFs: Seiten auswaehlen die verarbeitet werden sollen', 'KI extrahiert Vokabeln — pruefen, korrigieren, Arbeitsblatt-Typ waehlen', 'PDF herunterladen und ausdrucken'].map((text, i) => (
<li key={i} className="flex items-start gap-2">
<span className={`w-6 h-6 rounded-full flex items-center justify-center text-xs font-bold flex-shrink-0 ${isDark ? 'bg-purple-500/30 text-purple-300' : 'bg-purple-200 text-purple-700'}`}>{i + 1}</span>
<span>{text}</span>
</li>
))}
</ol>
</div>
{/* Step 1: Document Selection */}
<div className={`${glassCard} rounded-2xl p-6`}>
<h2 className={`text-lg font-semibold mb-4 ${isDark ? 'text-white' : 'text-slate-900'}`}>
1. Dokument auswaehlen
</h2>
<input ref={h.directFileInputRef} type="file" accept="image/png,image/jpeg,image/jpg,application/pdf" onChange={h.handleDirectFileSelect} className="hidden" />
<div className="grid grid-cols-2 gap-3 mb-4">
{/* File Upload Button */}
<button
onClick={() => h.directFileInputRef.current?.click()}
className={`p-4 rounded-xl border-2 border-dashed transition-all ${
h.directFile
? (isDark ? 'border-green-400/50 bg-green-500/20' : 'border-green-500 bg-green-50')
: (isDark ? 'border-white/20 hover:border-purple-400/50' : 'border-slate-300 hover:border-purple-500')
}`}
>
{h.directFile ? (
<div className="flex items-center gap-3">
<span className="text-2xl">{h.directFile.type === 'application/pdf' ? '📄' : '🖼️'}</span>
<div className="text-left flex-1 min-w-0">
<p className={`font-medium truncate ${isDark ? 'text-white' : 'text-slate-900'}`}>{h.directFile.name}</p>
<p className={`text-xs ${isDark ? 'text-white/60' : 'text-slate-500'}`}>{formatFileSize(h.directFile.size)}</p>
</div>
<svg className="w-5 h-5 text-green-500 flex-shrink-0" fill="none" stroke="currentColor" viewBox="0 0 24 24">
<path strokeLinecap="round" strokeLinejoin="round" strokeWidth={2} d="M5 13l4 4L19 7" />
</svg>
</div>
) : (
<div className={`text-center ${isDark ? 'text-white/60' : 'text-slate-500'}`}>
<span className="text-2xl block mb-1">📁</span>
<span className="text-sm">Datei auswaehlen</span>
</div>
)}
</button>
{/* QR Code Upload Button */}
<button
onClick={() => h.setShowQRModal(true)}
className={`p-4 rounded-xl border-2 border-dashed transition-all ${
h.selectedMobileFile
? (isDark ? 'border-green-400/50 bg-green-500/20' : 'border-green-500 bg-green-50')
: (isDark ? 'border-white/20 hover:border-purple-400/50' : 'border-slate-300 hover:border-purple-500')
}`}
>
{h.selectedMobileFile ? (
<div className="flex items-center gap-3">
<span className="text-2xl">{h.selectedMobileFile.type.startsWith('image/') ? '🖼️' : '📄'}</span>
<div className="text-left flex-1 min-w-0">
<p className={`font-medium truncate text-sm ${isDark ? 'text-white' : 'text-slate-900'}`}>{h.selectedMobileFile.name}</p>
<p className={`text-xs ${isDark ? 'text-white/60' : 'text-slate-500'}`}>vom Handy</p>
</div>
<svg className="w-5 h-5 text-green-500 flex-shrink-0" fill="none" stroke="currentColor" viewBox="0 0 24 24">
<path strokeLinecap="round" strokeLinejoin="round" strokeWidth={2} d="M5 13l4 4L19 7" />
</svg>
</div>
) : (
<div className={`text-center ${isDark ? 'text-white/60' : 'text-slate-500'}`}>
<span className="text-2xl block mb-1">📱</span>
<span className="text-sm">Mit Handy scannen</span>
</div>
)}
</button>
</div>
{/* Mobile Uploaded Files */}
{h.mobileUploadedFiles.length > 0 && !h.directFile && (
<>
<div className={`text-center text-sm mb-3 ${isDark ? 'text-white/40' : 'text-slate-400'}`}> Vom Handy hochgeladen </div>
<div className="space-y-2 max-h-32 overflow-y-auto mb-4">
{h.mobileUploadedFiles.map((file) => (
<button
key={file.id}
onClick={() => { h.setSelectedMobileFile(file); h.setDirectFile(null); h.setSelectedDocumentId(null); h.setError(null) }}
className={`w-full flex items-center gap-3 p-3 rounded-xl text-left transition-all ${
h.selectedMobileFile?.id === file.id
? (isDark ? 'bg-green-500/30 border-2 border-green-400/50' : 'bg-green-100 border-2 border-green-500')
: (isDark ? 'bg-white/5 border-2 border-transparent hover:border-white/20' : 'bg-slate-50 border-2 border-transparent hover:border-slate-200')
}`}
>
<span className="text-xl">{file.type.startsWith('image/') ? '🖼️' : '📄'}</span>
<div className="flex-1 min-w-0">
<p className={`font-medium truncate ${isDark ? 'text-white' : 'text-slate-900'}`}>{file.name}</p>
<p className={`text-xs ${isDark ? 'text-white/60' : 'text-slate-500'}`}>{formatFileSize(file.size)}</p>
</div>
{h.selectedMobileFile?.id === file.id && (
<svg className="w-5 h-5 text-green-500" fill="none" stroke="currentColor" viewBox="0 0 24 24">
<path strokeLinecap="round" strokeLinejoin="round" strokeWidth={2} d="M5 13l4 4L19 7" />
</svg>
)}
</button>
))}
</div>
</>
)}
{/* Stored Documents */}
{h.storedDocuments.length > 0 && !h.directFile && !h.selectedMobileFile && (
<>
<div className={`text-center text-sm mb-3 ${isDark ? 'text-white/40' : 'text-slate-400'}`}> oder aus Ihren Dokumenten </div>
<div className="space-y-2 max-h-32 overflow-y-auto">
{h.storedDocuments.map((doc) => (
<button
key={doc.id}
onClick={() => { h.setSelectedDocumentId(doc.id); h.setDirectFile(null); h.setSelectedMobileFile(null); h.setError(null) }}
className={`w-full flex items-center gap-3 p-3 rounded-xl text-left transition-all ${
h.selectedDocumentId === doc.id
? (isDark ? 'bg-purple-500/30 border-2 border-purple-400/50' : 'bg-purple-100 border-2 border-purple-500')
: (isDark ? 'bg-white/5 border-2 border-transparent hover:border-white/20' : 'bg-slate-50 border-2 border-transparent hover:border-slate-200')
}`}
>
<span className="text-xl">{doc.type === 'application/pdf' ? '📄' : '🖼️'}</span>
<div className="flex-1 min-w-0">
<p className={`font-medium truncate ${isDark ? 'text-white' : 'text-slate-900'}`}>{doc.name}</p>
<p className={`text-xs ${isDark ? 'text-white/60' : 'text-slate-500'}`}>{formatFileSize(doc.size)}</p>
</div>
{h.selectedDocumentId === doc.id && (
<svg className="w-5 h-5 text-purple-500" fill="none" stroke="currentColor" viewBox="0 0 24 24">
<path strokeLinecap="round" strokeLinejoin="round" strokeWidth={2} d="M5 13l4 4L19 7" />
</svg>
)}
</button>
))}
</div>
</>
)}
</div>
{/* Step 2: Preview + Session Name */}
{(h.directFile || h.selectedMobileFile || h.selectedDocumentId) && (
<div className="grid grid-cols-1 lg:grid-cols-5 gap-6">
{/* Document Preview */}
<div className={`${glassCard} rounded-2xl p-6 lg:col-span-3`}>
<div className="flex items-center justify-between mb-4">
<h2 className={`text-lg font-semibold ${isDark ? 'text-white' : 'text-slate-900'}`}>
Vorschau
</h2>
<button
onClick={() => h.setShowFullPreview(true)}
className={`px-3 py-1.5 rounded-lg text-sm font-medium transition-all flex items-center gap-2 ${
isDark ? 'bg-white/10 hover:bg-white/20 text-white' : 'bg-slate-100 hover:bg-slate-200 text-slate-700'
}`}
>
<svg className="w-4 h-4" fill="none" stroke="currentColor" viewBox="0 0 24 24">
<path strokeLinecap="round" strokeLinejoin="round" strokeWidth={2} d="M21 21l-6-6m2-5a7 7 0 11-14 0 7 7 0 0114 0zM10 7v3m0 0v3m0-3h3m-3 0H7" />
</svg>
Originalgroesse
</button>
</div>
<div className={`max-h-[60vh] overflow-auto rounded-xl border ${isDark ? 'border-white/10' : 'border-black/10'}`}>
{h.directFile?.type.startsWith('image/') && h.directFilePreview && (
<img src={h.directFilePreview} alt="Vorschau" className="w-full h-auto" />
)}
{h.directFile?.type === 'application/pdf' && h.directFilePreview && (
<iframe src={h.directFilePreview} className="w-full border-0 rounded-xl" style={{ height: '60vh' }} />
)}
{h.selectedMobileFile && !h.directFile && (
h.selectedMobileFile.type.startsWith('image/')
? <img src={h.selectedMobileFile.dataUrl} alt="Vorschau" className="w-full h-auto" />
: <iframe src={h.selectedMobileFile.dataUrl} className="w-full border-0 rounded-xl" style={{ height: '60vh' }} />
)}
{h.selectedDocumentId && !h.directFile && !h.selectedMobileFile && (() => {
const doc = h.storedDocuments.find(d => d.id === h.selectedDocumentId)
if (!doc?.url) return <p className={`p-8 text-center ${isDark ? 'text-white/40' : 'text-slate-400'}`}>Keine Vorschau verfuegbar</p>
return doc.type.startsWith('image/')
? <img src={doc.url} alt="Vorschau" className="w-full h-auto" />
: <iframe src={doc.url} className="w-full border-0 rounded-xl" style={{ height: '60vh' }} />
})()}
</div>
</div>
{/* Session Name + Start */}
<div className={`${glassCard} rounded-2xl p-6 lg:col-span-2 flex flex-col`}>
<h2 className={`text-lg font-semibold mb-4 ${isDark ? 'text-white' : 'text-slate-900'}`}>
2. Session benennen
</h2>
<input
type="text"
value={h.sessionName}
onChange={(e) => { h.setSessionName(e.target.value); h.setError(null) }}
placeholder="z.B. Englisch Klasse 7 - Unit 3"
className={`w-full px-4 py-3 rounded-xl border ${glassInput} focus:outline-none focus:ring-2 focus:ring-purple-500 mb-4`}
autoFocus
/>
<p className={`text-sm mb-6 ${isDark ? 'text-white/50' : 'text-slate-500'}`}>
Benennen Sie die Session z.B. nach dem Schulbuch-Kapitel, damit Sie sie spaeter wiederfinden.
</p>
<div className="flex-1" />
<button
onClick={() => {
if (!h.sessionName.trim()) {
h.setError('Bitte geben Sie einen Session-Namen ein (z.B. "Englisch Klasse 7 - Unit 3")')
return
}
h.startSession()
}}
disabled={h.isCreatingSession || !h.sessionName.trim()}
className="w-full px-6 py-4 bg-gradient-to-r from-purple-500 to-pink-500 text-white rounded-2xl font-semibold text-lg disabled:opacity-50 hover:shadow-xl hover:shadow-purple-500/30 transition-all transform hover:scale-105"
>
{h.isCreatingSession ? 'Verarbeite...' : 'Weiter →'}
</button>
</div>
</div>
)}
</div>
)
}

View File

@@ -0,0 +1,305 @@
'use client'
import React from 'react'
import type { VocabWorksheetHook, IpaMode, SyllableMode } from '../types'
import { getApiBase } from '../constants'
export function VocabularyTab({ h }: { h: VocabWorksheetHook }) {
const { isDark, glassCard, glassInput } = h
const extras = h.getAllExtraColumns()
const baseCols = 3 + extras.length
const gridCols = `14px 32px 36px repeat(${baseCols}, 1fr) 32px`
return (
<div className="flex flex-col lg:flex-row gap-4" style={{ height: 'calc(100vh - 240px)', minHeight: '500px' }}>
{/* Left: Original pages */}
<div className={`${glassCard} rounded-2xl p-4 lg:w-1/3 flex flex-col overflow-hidden`}>
<h2 className={`text-sm font-semibold mb-3 flex-shrink-0 ${isDark ? 'text-white/70' : 'text-slate-600'}`}>
Original ({(() => { const pp = h.selectedPages.length > 0 ? h.selectedPages : [...new Set(h.vocabulary.map(v => (v.source_page || 1) - 1))]; return pp.length; })()} Seiten)
</h2>
<div className="flex-1 overflow-y-auto space-y-3">
{(() => {
const processedPageIndices = h.selectedPages.length > 0
? h.selectedPages
: [...new Set(h.vocabulary.map(v => (v.source_page || 1) - 1))].sort((a, b) => a - b)
const apiBase = getApiBase()
const pagesToShow = processedPageIndices
.filter(idx => idx >= 0)
.map(idx => ({
idx,
src: h.session ? `${apiBase}/api/v1/vocab/sessions/${h.session.id}/pdf-page-image/${idx}` : null,
}))
.filter(t => t.src !== null) as { idx: number; src: string }[]
if (pagesToShow.length > 0) {
return pagesToShow.map(({ idx, src }) => (
<div key={idx} className={`relative rounded-xl overflow-hidden border ${isDark ? 'border-white/10' : 'border-black/10'}`}>
<div className={`absolute top-2 left-2 px-2 py-0.5 rounded-lg text-xs font-medium z-10 ${isDark ? 'bg-black/60 text-white' : 'bg-white/90 text-slate-700'}`}>
S. {idx + 1}
</div>
<img src={src} alt={`Seite ${idx + 1}`} className="w-full h-auto" />
</div>
))
}
if (h.uploadedImage) {
return (
<div className={`relative rounded-xl overflow-hidden border ${isDark ? 'border-white/10' : 'border-black/10'}`}>
<img src={h.uploadedImage} alt="Arbeitsblatt" className="w-full h-auto" />
</div>
)
}
return (
<div className={`flex-1 flex items-center justify-center py-12 ${isDark ? 'text-white/40' : 'text-slate-400'}`}>
<div className="text-center">
<svg className="w-12 h-12 mx-auto mb-2 opacity-50" fill="none" stroke="currentColor" viewBox="0 0 24 24">
<path strokeLinecap="round" strokeLinejoin="round" strokeWidth={1.5} d="M4 16l4.586-4.586a2 2 0 012.828 0L16 16m-2-2l1.586-1.586a2 2 0 012.828 0L20 14m-6-6h.01M6 20h12a2 2 0 002-2V6a2 2 0 00-2-2H6a2 2 0 00-2 2v12a2 2 0 002 2z" />
</svg>
<p className="text-xs">Kein Bild verfuegbar</p>
</div>
</div>
)
})()}
</div>
</div>
{/* Right: Vocabulary table */}
<div className={`${glassCard} rounded-2xl p-4 lg:w-2/3 flex flex-col overflow-hidden`}>
<div className="flex items-center justify-between mb-3 flex-shrink-0">
<h2 className={`text-lg font-semibold ${isDark ? 'text-white' : 'text-slate-900'}`}>
Vokabeln ({h.vocabulary.length})
</h2>
<div className="flex items-center gap-2">
{/* IPA mode */}
<select
value={h.ipaMode}
onChange={(e) => {
const newIpa = e.target.value as IpaMode
h.setIpaMode(newIpa)
h.reprocessPages(newIpa, h.syllableMode)
}}
className={`px-2 py-1.5 text-xs rounded-md border ${isDark ? 'border-white/20 bg-white/10 text-white' : 'border-gray-200 bg-white text-gray-600'}`}
title="Lautschrift (IPA)"
>
<option value="none">IPA: Aus</option>
<option value="auto">IPA: Auto</option>
<option value="en">IPA: nur EN</option>
<option value="de">IPA: nur DE</option>
<option value="all">IPA: Alle</option>
</select>
{/* Syllable mode */}
<select
value={h.syllableMode}
onChange={(e) => {
const newSyl = e.target.value as SyllableMode
h.setSyllableMode(newSyl)
h.reprocessPages(h.ipaMode, newSyl)
}}
className={`px-2 py-1.5 text-xs rounded-md border ${isDark ? 'border-white/20 bg-white/10 text-white' : 'border-gray-200 bg-white text-gray-600'}`}
title="Silbentrennung"
>
<option value="none">Silben: Aus</option>
<option value="auto">Silben: Original</option>
<option value="en">Silben: nur EN</option>
<option value="de">Silben: nur DE</option>
<option value="all">Silben: Alle</option>
</select>
<button onClick={h.saveVocabulary} className={`px-4 py-2 rounded-xl text-sm font-medium transition-colors ${isDark ? 'bg-white/10 hover:bg-white/20 text-white' : 'bg-slate-100 hover:bg-slate-200 text-slate-900'}`}>
Speichern
</button>
<button onClick={() => h.setActiveTab('worksheet')} className="px-4 py-2 rounded-xl text-sm font-medium bg-gradient-to-r from-purple-500 to-pink-500 text-white hover:shadow-lg transition-all">
Weiter
</button>
</div>
</div>
{/* Error messages for failed pages */}
{h.processingErrors.length > 0 && (
<div className={`rounded-xl p-3 mb-3 flex-shrink-0 ${isDark ? 'bg-orange-500/20 text-orange-200 border border-orange-500/30' : 'bg-orange-100 text-orange-700 border border-orange-200'}`}>
<div className="font-medium mb-1 text-sm">Einige Seiten konnten nicht verarbeitet werden:</div>
<ul className="text-xs space-y-0.5">
{h.processingErrors.map((err, idx) => (
<li key={idx}> {err}</li>
))}
</ul>
</div>
)}
{/* Processing Progress */}
{h.currentlyProcessingPage && (
<div className={`rounded-xl p-3 mb-3 flex-shrink-0 ${isDark ? 'bg-purple-500/20 border border-purple-500/30' : 'bg-purple-100 border border-purple-200'}`}>
<div className="flex items-center gap-3">
<div className={`w-4 h-4 border-2 ${isDark ? 'border-purple-300' : 'border-purple-600'} border-t-transparent rounded-full animate-spin`} />
<div>
<div className={`text-sm font-medium ${isDark ? 'text-purple-200' : 'text-purple-700'}`}>Verarbeite Seite {h.currentlyProcessingPage}...</div>
<div className={`text-xs ${isDark ? 'text-purple-300/70' : 'text-purple-600'}`}>
{h.successfulPages.length > 0 && `${h.successfulPages.length} Seite(n) fertig • `}
{h.vocabulary.length} Vokabeln bisher
</div>
</div>
</div>
</div>
)}
{/* Success info */}
{!h.currentlyProcessingPage && h.successfulPages.length > 0 && h.failedPages.length === 0 && (
<div className={`rounded-xl p-2 mb-3 text-xs flex-shrink-0 ${isDark ? 'bg-green-500/20 text-green-200 border border-green-500/30' : 'bg-green-100 text-green-700 border border-green-200'}`}>
Alle {h.successfulPages.length} Seite(n) erfolgreich verarbeitet - {h.vocabulary.length} Vokabeln insgesamt
</div>
)}
{/* Partial success info */}
{!h.currentlyProcessingPage && h.successfulPages.length > 0 && h.failedPages.length > 0 && (
<div className={`rounded-xl p-2 mb-3 text-xs flex-shrink-0 ${isDark ? 'bg-yellow-500/20 text-yellow-200 border border-yellow-500/30' : 'bg-yellow-100 text-yellow-700 border border-yellow-200'}`}>
{h.successfulPages.length} Seite(n) erfolgreich, {h.failedPages.length} fehlgeschlagen - {h.vocabulary.length} Vokabeln extrahiert
</div>
)}
{h.vocabulary.length === 0 ? (
<p className={`text-center py-8 ${isDark ? 'text-white/60' : 'text-slate-500'}`}>Keine Vokabeln gefunden.</p>
) : (
<div className="flex flex-col flex-1 overflow-hidden">
{/* Fixed Header */}
<div className={`flex-shrink-0 grid gap-1 px-2 py-2 text-sm font-medium border-b items-center ${isDark ? 'border-white/10 text-white/60' : 'border-black/10 text-slate-500'}`} style={{ gridTemplateColumns: gridCols }}>
<div>{/* insert-triangle spacer */}</div>
<div className="flex items-center justify-center">
<input
type="checkbox"
checked={h.vocabulary.length > 0 && h.vocabulary.every(v => v.selected)}
onChange={h.toggleAllSelection}
className="w-4 h-4 rounded border-gray-300 text-purple-600 focus:ring-purple-500 cursor-pointer"
title="Alle auswaehlen"
/>
</div>
<div>S.</div>
<div>Englisch</div>
<div>Deutsch</div>
<div>Beispiel</div>
{extras.map(col => (
<div key={col.key} className="flex items-center gap-1 group">
<span className="truncate">{col.label}</span>
<button
onClick={() => {
const page = Object.entries(h.pageExtraColumns).find(([, cols]) => cols.some(c => c.key === col.key))
if (page) h.removeExtraColumn(Number(page[0]), col.key)
}}
className={`opacity-0 group-hover:opacity-100 transition-opacity ${isDark ? 'text-red-400 hover:text-red-300' : 'text-red-500 hover:text-red-600'}`}
title="Spalte entfernen"
>
<svg className="w-3 h-3" fill="none" stroke="currentColor" viewBox="0 0 24 24"><path strokeLinecap="round" strokeLinejoin="round" strokeWidth={2} d="M6 18L18 6M6 6l12 12" /></svg>
</button>
</div>
))}
<div className="flex items-center justify-center">
<button
onClick={() => h.addExtraColumn(0)}
className={`p-0.5 rounded transition-colors ${isDark ? 'hover:bg-white/10 text-white/40 hover:text-white/70' : 'hover:bg-slate-200 text-slate-400 hover:text-slate-600'}`}
title="Spalte hinzufuegen"
>
<svg className="w-4 h-4" fill="none" stroke="currentColor" viewBox="0 0 24 24"><path strokeLinecap="round" strokeLinejoin="round" strokeWidth={2} d="M12 4v16m8-8H4" /></svg>
</button>
</div>
</div>
{/* Scrollable Content */}
<div className="flex-1 overflow-y-auto">
{h.vocabulary.map((entry, index) => (
<React.Fragment key={entry.id}>
<div className={`grid gap-1 px-2 py-1 items-center ${isDark ? 'hover:bg-white/5' : 'hover:bg-black/5'}`} style={{ gridTemplateColumns: gridCols }}>
<button
onClick={() => h.addVocabularyEntry(index)}
className={`w-3.5 h-3.5 flex items-center justify-center opacity-0 hover:opacity-100 transition-opacity ${isDark ? 'text-purple-400' : 'text-purple-500'}`}
title="Zeile einfuegen"
>
<svg className="w-2.5 h-2.5" viewBox="0 0 10 10" fill="currentColor"><polygon points="0,0 10,5 0,10" /></svg>
</button>
<div className="flex items-center justify-center">
<input
type="checkbox"
checked={entry.selected || false}
onChange={() => h.toggleVocabularySelection(entry.id)}
className="w-4 h-4 rounded border-gray-300 text-purple-600 focus:ring-purple-500 cursor-pointer"
/>
</div>
<div className={`flex items-center justify-center text-xs font-medium rounded ${isDark ? 'bg-white/10 text-white/60' : 'bg-black/10 text-slate-600'}`}>
{entry.source_page || '-'}
</div>
<input
type="text"
value={entry.english}
onChange={(e) => h.updateVocabularyEntry(entry.id, 'english', e.target.value)}
className={`px-2 py-1 rounded-lg border text-sm min-w-0 ${glassInput} focus:outline-none focus:ring-1 focus:ring-purple-500`}
/>
<input
type="text"
value={entry.german}
onChange={(e) => h.updateVocabularyEntry(entry.id, 'german', e.target.value)}
className={`px-2 py-1 rounded-lg border text-sm min-w-0 ${glassInput} focus:outline-none focus:ring-1 focus:ring-purple-500`}
/>
<input
type="text"
value={entry.example_sentence || ''}
onChange={(e) => h.updateVocabularyEntry(entry.id, 'example_sentence', e.target.value)}
placeholder="Beispiel"
className={`px-2 py-1 rounded-lg border text-sm min-w-0 ${glassInput} focus:outline-none focus:ring-1 focus:ring-purple-500`}
/>
{extras.map(col => (
<input
key={col.key}
type="text"
value={(entry.extras && entry.extras[col.key]) || ''}
onChange={(e) => h.updateVocabularyEntry(entry.id, col.key, e.target.value)}
placeholder={col.label}
className={`px-2 py-1 rounded-lg border text-sm min-w-0 ${glassInput} focus:outline-none focus:ring-1 focus:ring-purple-500`}
/>
))}
<button onClick={() => h.deleteVocabularyEntry(entry.id)} className={`p-1 rounded-lg ${isDark ? 'hover:bg-red-500/20 text-red-400' : 'hover:bg-red-100 text-red-500'}`}>
<svg className="w-4 h-4" fill="none" stroke="currentColor" viewBox="0 0 24 24">
<path strokeLinecap="round" strokeLinejoin="round" strokeWidth={2} d="M19 7l-.867 12.142A2 2 0 0116.138 21H7.862a2 2 0 01-1.995-1.858L5 7m5 4v6m4-6v6m1-10V4a1 1 0 00-1-1h-4a1 1 0 00-1 1v3M4 7h16" />
</svg>
</button>
</div>
</React.Fragment>
))}
{/* Final insert triangle */}
<div className="px-2 py-1">
<button
onClick={() => h.addVocabularyEntry()}
className={`w-3.5 h-3.5 flex items-center justify-center opacity-30 hover:opacity-100 transition-opacity ${isDark ? 'text-purple-400' : 'text-purple-500'}`}
title="Zeile am Ende einfuegen"
>
<svg className="w-2.5 h-2.5" viewBox="0 0 10 10" fill="currentColor"><polygon points="0,0 10,5 0,10" /></svg>
</button>
</div>
</div>
{/* Footer */}
<div className={`flex-shrink-0 pt-2 border-t flex items-center justify-between text-xs ${isDark ? 'border-white/10 text-white/50' : 'border-black/10 text-slate-400'}`}>
<span>
{h.vocabulary.length} Vokabeln
{h.vocabulary.filter(v => v.selected).length > 0 && ` (${h.vocabulary.filter(v => v.selected).length} ausgewaehlt)`}
{(() => {
const pages = [...new Set(h.vocabulary.map(v => v.source_page).filter(Boolean))].sort((a, b) => (a || 0) - (b || 0))
return pages.length > 1 ? ` • Seiten: ${pages.join(', ')}` : ''
})()}
</span>
<button
onClick={() => h.addVocabularyEntry()}
className={`px-3 py-1 rounded-lg text-xs flex items-center gap-1 transition-colors ${
isDark
? 'bg-white/10 hover:bg-white/20 text-white/70'
: 'bg-slate-100 hover:bg-slate-200 text-slate-600'
}`}
>
<svg className="w-3 h-3" fill="none" stroke="currentColor" viewBox="0 0 24 24">
<path strokeLinecap="round" strokeLinejoin="round" strokeWidth={2} d="M12 4v16m8-8H4" />
</svg>
Zeile
</button>
</div>
</div>
)}
</div>
</div>
)
}

View File

@@ -0,0 +1,155 @@
'use client'
import React from 'react'
import type { VocabWorksheetHook } from '../types'
import { worksheetFormats, worksheetTypes } from '../constants'
export function WorksheetTab({ h }: { h: VocabWorksheetHook }) {
const { isDark, glassCard, glassInput } = h
return (
<div className={`${glassCard} rounded-2xl p-6`}>
{/* Step 1: Format Selection */}
<div className="mb-8">
<h2 className={`text-lg font-semibold mb-4 ${isDark ? 'text-white' : 'text-slate-900'}`}>
1. Vorlage waehlen
</h2>
<div className="grid grid-cols-2 gap-4">
{worksheetFormats.map((format) => (
<button
key={format.id}
onClick={() => h.setSelectedFormat(format.id)}
className={`p-5 rounded-xl border text-left transition-all ${
h.selectedFormat === format.id
? (isDark ? 'border-purple-400/50 bg-purple-500/20 ring-2 ring-purple-500/50' : 'border-purple-500 bg-purple-50 ring-2 ring-purple-500/30')
: (isDark ? 'border-white/20 hover:border-white/40' : 'border-slate-200 hover:border-slate-300')
}`}
>
<div className="flex items-start gap-3">
<div className={`w-10 h-10 rounded-lg flex items-center justify-center shrink-0 ${
h.selectedFormat === format.id
? (isDark ? 'bg-purple-500/30' : 'bg-purple-200')
: (isDark ? 'bg-white/10' : 'bg-slate-100')
}`}>
{format.id === 'standard' ? (
<svg className={`w-5 h-5 ${h.selectedFormat === format.id ? 'text-purple-400' : (isDark ? 'text-white/60' : 'text-slate-500')}`} fill="none" stroke="currentColor" viewBox="0 0 24 24">
<path strokeLinecap="round" strokeLinejoin="round" strokeWidth={1.5} d="M9 12h6m-6 4h6m2 5H7a2 2 0 01-2-2V5a2 2 0 012-2h5.586a1 1 0 01.707.293l5.414 5.414a1 1 0 01.293.707V19a2 2 0 01-2 2z" />
</svg>
) : (
<svg className={`w-5 h-5 ${h.selectedFormat === format.id ? 'text-purple-400' : (isDark ? 'text-white/60' : 'text-slate-500')}`} fill="none" stroke="currentColor" viewBox="0 0 24 24">
<path strokeLinecap="round" strokeLinejoin="round" strokeWidth={1.5} d="M4 5a1 1 0 011-1h14a1 1 0 011 1v2a1 1 0 01-1 1H5a1 1 0 01-1-1V5zM4 13a1 1 0 011-1h6a1 1 0 011 1v6a1 1 0 01-1 1H5a1 1 0 01-1-1v-6zM16 13a1 1 0 011-1h2a1 1 0 011 1v6a1 1 0 01-1 1h-2a1 1 0 01-1-1v-6z" />
</svg>
)}
</div>
<div className="flex-1">
<div className="flex items-center justify-between">
<span className={`font-medium ${isDark ? 'text-white' : 'text-slate-900'}`}>{format.label}</span>
{h.selectedFormat === format.id && (
<svg className="w-5 h-5 text-purple-500" fill="none" stroke="currentColor" viewBox="0 0 24 24">
<path strokeLinecap="round" strokeLinejoin="round" strokeWidth={2} d="M5 13l4 4L19 7" />
</svg>
)}
</div>
<p className={`text-sm mt-1 ${isDark ? 'text-white/60' : 'text-slate-500'}`}>{format.description}</p>
</div>
</div>
</button>
))}
</div>
</div>
{/* Step 2: Configuration */}
<div className="mb-6">
<h2 className={`text-lg font-semibold mb-4 ${isDark ? 'text-white' : 'text-slate-900'}`}>
2. Arbeitsblatt konfigurieren
</h2>
{/* Title */}
<div className="mb-6">
<label className={`block text-sm font-medium mb-2 ${isDark ? 'text-white/60' : 'text-slate-500'}`}>Titel</label>
<input
type="text"
value={h.worksheetTitle}
onChange={(e) => h.setWorksheetTitle(e.target.value)}
placeholder="z.B. Vokabeln Unit 3"
className={`w-full px-4 py-3 rounded-xl border ${glassInput} focus:outline-none focus:ring-2 focus:ring-purple-500`}
/>
</div>
{/* Standard format options */}
{h.selectedFormat === 'standard' && (
<>
<div className="mb-6">
<label className={`block text-sm font-medium mb-3 ${isDark ? 'text-white/60' : 'text-slate-500'}`}>Arbeitsblatt-Typen</label>
<div className="grid grid-cols-2 gap-3">
{worksheetTypes.map((type) => (
<button
key={type.id}
onClick={() => h.toggleWorksheetType(type.id)}
className={`p-4 rounded-xl border text-left transition-all ${
h.selectedTypes.includes(type.id)
? (isDark ? 'border-purple-400/50 bg-purple-500/20' : 'border-purple-500 bg-purple-50')
: (isDark ? 'border-white/20 hover:border-white/40' : 'border-slate-200 hover:border-slate-300')
}`}
>
<div className="flex items-center justify-between">
<span className={`font-medium ${isDark ? 'text-white' : 'text-slate-900'}`}>{type.label}</span>
{h.selectedTypes.includes(type.id) && <svg className="w-5 h-5 text-purple-500" fill="none" stroke="currentColor" viewBox="0 0 24 24"><path strokeLinecap="round" strokeLinejoin="round" strokeWidth={2} d="M5 13l4 4L19 7" /></svg>}
</div>
<p className={`text-sm mt-1 ${isDark ? 'text-white/60' : 'text-slate-500'}`}>{type.description}</p>
</button>
))}
</div>
</div>
<div className="grid grid-cols-2 gap-6 mb-6">
<div>
<label className={`block text-sm font-medium mb-2 ${isDark ? 'text-white/60' : 'text-slate-500'}`}>Zeilenhoehe</label>
<select value={h.lineHeight} onChange={(e) => h.setLineHeight(e.target.value)} className={`w-full px-4 py-3 rounded-xl border ${glassInput} focus:outline-none focus:ring-2 focus:ring-purple-500`}>
<option value="normal">Normal</option>
<option value="large">Gross</option>
<option value="extra-large">Extra gross</option>
</select>
</div>
<div className="flex items-center">
<label className={`flex items-center gap-3 cursor-pointer ${isDark ? 'text-white' : 'text-slate-900'}`}>
<input type="checkbox" checked={h.includeSolutions} onChange={(e) => h.setIncludeSolutions(e.target.checked)} className="w-5 h-5 rounded border-2 border-purple-500 text-purple-500 focus:ring-purple-500" />
<span>Loesungsblatt erstellen</span>
</label>
</div>
</div>
</>
)}
{/* NRU format options */}
{h.selectedFormat === 'nru' && (
<div className="space-y-4">
<div className={`p-4 rounded-xl ${isDark ? 'bg-indigo-500/20 border border-indigo-500/30' : 'bg-indigo-50 border border-indigo-200'}`}>
<h4 className={`font-medium mb-2 ${isDark ? 'text-indigo-200' : 'text-indigo-700'}`}>NRU-Format Uebersicht:</h4>
<ul className={`text-sm space-y-1 ${isDark ? 'text-indigo-200/80' : 'text-indigo-600'}`}>
<li> <strong>Vokabeln:</strong> 3-Spalten-Tabelle (Englisch | Deutsch leer | Korrektur leer)</li>
<li> <strong>Lernsaetze:</strong> Deutscher Satz + 2 leere Zeilen fuer englische Uebersetzung</li>
<li> Pro gescannter Seite werden 2 Arbeitsblatt-Seiten erzeugt</li>
</ul>
</div>
<div className="flex items-center">
<label className={`flex items-center gap-3 cursor-pointer ${isDark ? 'text-white' : 'text-slate-900'}`}>
<input type="checkbox" checked={h.includeSolutions} onChange={(e) => h.setIncludeSolutions(e.target.checked)} className="w-5 h-5 rounded border-2 border-purple-500 text-purple-500 focus:ring-purple-500" />
<span>Loesungsblatt erstellen (mit deutschen Uebersetzungen)</span>
</label>
</div>
</div>
)}
</div>
<button
onClick={h.generateWorksheet}
disabled={(h.selectedFormat === 'standard' && h.selectedTypes.length === 0) || h.isGenerating}
className="w-full py-4 bg-gradient-to-r from-purple-500 to-pink-500 text-white rounded-xl font-semibold disabled:opacity-50 hover:shadow-xl hover:shadow-purple-500/30 transition-all"
>
{h.isGenerating ? 'Generiere PDF...' : `${h.selectedFormat === 'nru' ? 'NRU-Arbeitsblatt' : 'Arbeitsblatt'} generieren`}
</button>
</div>
)
}

View File

@@ -0,0 +1,56 @@
import type { OcrPrompts, WorksheetFormat, WorksheetType } from './types'
// API Base URL - dynamisch basierend auf Browser-Host
// Verwendet /klausur-api/ Proxy um Zertifikat-Probleme zu vermeiden
export const getApiBase = () => {
if (typeof window === 'undefined') return 'http://localhost:8086'
const { hostname, protocol } = window.location
if (hostname === 'localhost') return 'http://localhost:8086'
return `${protocol}//${hostname}/klausur-api`
}
// LocalStorage Keys
export const DOCUMENTS_KEY = 'bp_documents'
export const OCR_PROMPTS_KEY = 'bp_ocr_prompts'
export const SESSION_ID_KEY = 'bp_upload_session'
// Worksheet format templates
export const worksheetFormats: { id: WorksheetFormat; label: string; description: string; icon: string }[] = [
{
id: 'standard',
label: 'Standard-Format',
description: 'Klassisches Arbeitsblatt mit waehlbarer Uebersetzungsrichtung',
icon: 'document'
},
{
id: 'nru',
label: 'NRU-Vorlage',
description: '3-Spalten-Tabelle (EN|DE|Korrektur) + Lernsaetze mit Uebersetzungszeilen',
icon: 'template'
},
]
// Default OCR filtering prompts
export const defaultOcrPrompts: OcrPrompts = {
filterHeaders: true,
filterFooters: true,
filterPageNumbers: true,
customFilter: '',
headerPatterns: ['Unit', 'Chapter', 'Lesson', 'Kapitel', 'Lektion'],
footerPatterns: ['zweihundert', 'dreihundert', 'vierhundert', 'Page', 'Seite']
}
export const worksheetTypes: { id: WorksheetType; label: string; description: string }[] = [
{ id: 'en_to_de', label: 'Englisch → Deutsch', description: 'Englische Woerter uebersetzen' },
{ id: 'de_to_en', label: 'Deutsch → Englisch', description: 'Deutsche Woerter uebersetzen' },
{ id: 'copy', label: 'Abschreibuebung', description: 'Woerter mehrfach schreiben' },
{ id: 'gap_fill', label: 'Lueckensaetze', description: 'Saetze mit Luecken ausfuellen' },
]
export const formatFileSize = (bytes: number): string => {
if (bytes === 0) return '0 B'
const k = 1024
const sizes = ['B', 'KB', 'MB', 'GB']
const i = Math.floor(Math.log(bytes) / Math.log(k))
return parseFloat((bytes / Math.pow(k, i)).toFixed(1)) + ' ' + sizes[i]
}

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,189 @@
import { UploadedFile } from '@/components/QRCodeUpload'
export interface VocabularyEntry {
id: string
english: string
german: string
example_sentence?: string
example_sentence_gap?: string
word_type?: string
source_page?: number
selected?: boolean
extras?: Record<string, string>
}
export interface ExtraColumn {
key: string
label: string
}
export interface Session {
id: string
name: string
status: string
vocabulary_count: number
image_path?: string
description?: string
source_language?: string
target_language?: string
created_at?: string
}
export interface StoredDocument {
id: string
name: string
type: string
size: number
uploadedAt: Date
url?: string
}
export interface OcrPrompts {
filterHeaders: boolean
filterFooters: boolean
filterPageNumbers: boolean
customFilter: string
headerPatterns: string[]
footerPatterns: string[]
}
export type TabId = 'upload' | 'pages' | 'vocabulary' | 'worksheet' | 'export' | 'settings'
export type WorksheetType = 'en_to_de' | 'de_to_en' | 'copy' | 'gap_fill'
export type WorksheetFormat = 'standard' | 'nru'
export type IpaMode = 'auto' | 'en' | 'de' | 'all' | 'none'
export type SyllableMode = 'auto' | 'en' | 'de' | 'all' | 'none'
/** Return type of useVocabWorksheet — used as props by all child components */
export interface VocabWorksheetHook {
// Mounted (SSR guard)
mounted: boolean
// Theme
isDark: boolean
glassCard: string
glassInput: string
// Tab
activeTab: TabId
setActiveTab: (tab: TabId) => void
// Session
session: Session | null
sessionName: string
setSessionName: (name: string) => void
isCreatingSession: boolean
error: string | null
setError: (err: string | null) => void
extractionStatus: string
// Existing sessions
existingSessions: Session[]
isLoadingSessions: boolean
// Documents
storedDocuments: StoredDocument[]
selectedDocumentId: string | null
setSelectedDocumentId: (id: string | null) => void
// Direct file
directFile: File | null
setDirectFile: (f: File | null) => void
directFilePreview: string | null
showFullPreview: boolean
setShowFullPreview: (show: boolean) => void
directFileInputRef: React.RefObject<HTMLInputElement | null>
// PDF pages
pdfPageCount: number
selectedPages: number[]
pagesThumbnails: string[]
isLoadingThumbnails: boolean
excludedPages: number[]
// Extra columns
pageExtraColumns: Record<number, ExtraColumn[]>
// Upload
uploadedImage: string | null
isExtracting: boolean
// Vocabulary
vocabulary: VocabularyEntry[]
// Worksheet
selectedTypes: WorksheetType[]
worksheetTitle: string
setWorksheetTitle: (title: string) => void
includeSolutions: boolean
setIncludeSolutions: (inc: boolean) => void
lineHeight: string
setLineHeight: (lh: string) => void
selectedFormat: WorksheetFormat
setSelectedFormat: (f: WorksheetFormat) => void
ipaMode: IpaMode
setIpaMode: (m: IpaMode) => void
syllableMode: SyllableMode
setSyllableMode: (m: SyllableMode) => void
// Export
worksheetId: string | null
isGenerating: boolean
// Processing
processingErrors: string[]
successfulPages: number[]
failedPages: number[]
currentlyProcessingPage: number | null
// OCR settings
ocrPrompts: OcrPrompts
showSettings: boolean
setShowSettings: (show: boolean) => void
// QR
showQRModal: boolean
setShowQRModal: (show: boolean) => void
uploadSessionId: string
mobileUploadedFiles: UploadedFile[]
selectedMobileFile: UploadedFile | null
setSelectedMobileFile: (f: UploadedFile | null) => void
setMobileUploadedFiles: (files: UploadedFile[]) => void
// OCR Comparison
showOcrComparison: boolean
setShowOcrComparison: (show: boolean) => void
ocrComparePageIndex: number | null
ocrCompareResult: any
isComparingOcr: boolean
ocrCompareError: string | null
// Handlers
handleDirectFileSelect: (e: React.ChangeEvent<HTMLInputElement>) => void
startSession: () => Promise<void>
processSelectedPages: () => Promise<void>
togglePageSelection: (idx: number) => void
selectAllPages: () => void
selectNoPages: () => void
excludePage: (idx: number, e: React.MouseEvent) => void
restoreExcludedPages: () => void
runOcrComparison: (pageIdx: number) => Promise<void>
updateVocabularyEntry: (id: string, field: string, value: string) => void
addExtraColumn: (page: number) => void
removeExtraColumn: (page: number, key: string) => void
getExtraColumnsForPage: (page: number) => ExtraColumn[]
getAllExtraColumns: () => ExtraColumn[]
deleteVocabularyEntry: (id: string) => void
toggleVocabularySelection: (id: string) => void
toggleAllSelection: () => void
addVocabularyEntry: (atIndex?: number) => void
saveVocabulary: () => Promise<void>
generateWorksheet: () => Promise<void>
downloadPDF: (type: 'worksheet' | 'solution') => void
toggleWorksheetType: (type: WorksheetType) => void
resumeSession: (session: Session) => Promise<void>
resetSession: () => Promise<void>
deleteSession: (id: string, e: React.MouseEvent) => Promise<void>
saveOcrPrompts: (prompts: OcrPrompts) => void
formatFileSize: (bytes: number) => string
reprocessPages: (ipa: IpaMode, syllable: SyllableMode) => void
}

View File

@@ -0,0 +1,843 @@
'use client'
import { useState, useRef, useEffect } from 'react'
import { useTheme } from '@/lib/ThemeContext'
import { useLanguage } from '@/lib/LanguageContext'
import { useRouter } from 'next/navigation'
import { useActivity } from '@/lib/ActivityContext'
import type { UploadedFile } from '@/components/QRCodeUpload'
import type {
VocabularyEntry, ExtraColumn, Session, StoredDocument, OcrPrompts,
TabId, WorksheetType, WorksheetFormat, IpaMode, SyllableMode,
VocabWorksheetHook,
} from './types'
import {
getApiBase, DOCUMENTS_KEY, OCR_PROMPTS_KEY, SESSION_ID_KEY,
defaultOcrPrompts, formatFileSize,
} from './constants'
export function useVocabWorksheet(): VocabWorksheetHook {
const { isDark } = useTheme()
const { t } = useLanguage()
const router = useRouter()
const { startActivity, completeActivity } = useActivity()
const [mounted, setMounted] = useState(false)
// Tab state
const [activeTab, setActiveTab] = useState<TabId>('upload')
// Session state
const [session, setSession] = useState<Session | null>(null)
const [sessionName, setSessionName] = useState('')
const [isCreatingSession, setIsCreatingSession] = useState(false)
const [error, setError] = useState<string | null>(null)
const [extractionStatus, setExtractionStatus] = useState<string>('')
// Existing sessions list
const [existingSessions, setExistingSessions] = useState<Session[]>([])
const [isLoadingSessions, setIsLoadingSessions] = useState(true)
// Documents from storage
const [storedDocuments, setStoredDocuments] = useState<StoredDocument[]>([])
const [selectedDocumentId, setSelectedDocumentId] = useState<string | null>(null)
// Direct file upload
const [directFile, setDirectFile] = useState<File | null>(null)
const [directFilePreview, setDirectFilePreview] = useState<string | null>(null)
const [showFullPreview, setShowFullPreview] = useState(false)
const directFileInputRef = useRef<HTMLInputElement>(null)
// PDF page selection state
const [pdfPageCount, setPdfPageCount] = useState<number>(0)
const [selectedPages, setSelectedPages] = useState<number[]>([])
const [pagesThumbnails, setPagesThumbnails] = useState<string[]>([])
const [isLoadingThumbnails, setIsLoadingThumbnails] = useState(false)
const [excludedPages, setExcludedPages] = useState<number[]>([])
// Dynamic extra columns per source page
const [pageExtraColumns, setPageExtraColumns] = useState<Record<number, ExtraColumn[]>>({})
// Upload state
const [uploadedImage, setUploadedImage] = useState<string | null>(null)
const [isExtracting, setIsExtracting] = useState(false)
const fileInputRef = useRef<HTMLInputElement>(null)
// Vocabulary state
const [vocabulary, setVocabulary] = useState<VocabularyEntry[]>([])
// Worksheet state
const [selectedTypes, setSelectedTypes] = useState<WorksheetType[]>(['en_to_de'])
const [worksheetTitle, setWorksheetTitle] = useState('')
const [includeSolutions, setIncludeSolutions] = useState(true)
const [lineHeight, setLineHeight] = useState('normal')
const [selectedFormat, setSelectedFormat] = useState<WorksheetFormat>('standard')
const [ipaMode, setIpaMode] = useState<IpaMode>('none')
const [syllableMode, setSyllableMode] = useState<SyllableMode>('none')
// Export state
const [worksheetId, setWorksheetId] = useState<string | null>(null)
const [isGenerating, setIsGenerating] = useState(false)
// Processing results
const [processingErrors, setProcessingErrors] = useState<string[]>([])
const [successfulPages, setSuccessfulPages] = useState<number[]>([])
const [failedPages, setFailedPages] = useState<number[]>([])
const [currentlyProcessingPage, setCurrentlyProcessingPage] = useState<number | null>(null)
const [processingQueue, setProcessingQueue] = useState<number[]>([])
// OCR Prompts/Settings
const [ocrPrompts, setOcrPrompts] = useState<OcrPrompts>(defaultOcrPrompts)
const [showSettings, setShowSettings] = useState(false)
// QR Code Upload
const [showQRModal, setShowQRModal] = useState(false)
const [uploadSessionId, setUploadSessionId] = useState('')
const [mobileUploadedFiles, setMobileUploadedFiles] = useState<UploadedFile[]>([])
const [selectedMobileFile, setSelectedMobileFile] = useState<UploadedFile | null>(null)
// OCR Comparison
const [showOcrComparison, setShowOcrComparison] = useState(false)
const [ocrComparePageIndex, setOcrComparePageIndex] = useState<number | null>(null)
const [ocrCompareResult, setOcrCompareResult] = useState<any>(null)
const [isComparingOcr, setIsComparingOcr] = useState(false)
const [ocrCompareError, setOcrCompareError] = useState<string | null>(null)
// --- Effects ---
// SSR Safety
useEffect(() => {
setMounted(true)
let storedSessionId = localStorage.getItem(SESSION_ID_KEY)
if (!storedSessionId) {
storedSessionId = `vocab-${Date.now()}-${Math.random().toString(36).substr(2, 9)}`
localStorage.setItem(SESSION_ID_KEY, storedSessionId)
}
setUploadSessionId(storedSessionId)
}, [])
// Load OCR prompts from localStorage
useEffect(() => {
if (!mounted) return
const stored = localStorage.getItem(OCR_PROMPTS_KEY)
if (stored) {
try {
setOcrPrompts({ ...defaultOcrPrompts, ...JSON.parse(stored) })
} catch (e) {
console.error('Failed to parse OCR prompts:', e)
}
}
}, [mounted])
// Load documents from localStorage
useEffect(() => {
if (!mounted) return
const stored = localStorage.getItem(DOCUMENTS_KEY)
if (stored) {
try {
const docs = JSON.parse(stored)
const imagesDocs = docs.filter((d: StoredDocument) =>
d.type?.startsWith('image/') || d.type === 'application/pdf'
)
setStoredDocuments(imagesDocs)
} catch (e) {
console.error('Failed to parse stored documents:', e)
}
}
}, [mounted])
// Load existing sessions from API
useEffect(() => {
if (!mounted) return
const loadSessions = async () => {
const API_BASE = getApiBase()
try {
const res = await fetch(`${API_BASE}/api/v1/vocab/sessions`)
if (res.ok) {
const sessions = await res.json()
setExistingSessions(sessions)
}
} catch (e) {
console.error('Failed to load sessions:', e)
} finally {
setIsLoadingSessions(false)
}
}
loadSessions()
}, [mounted])
// --- Glassmorphism styles ---
const glassCard = isDark
? 'backdrop-blur-xl bg-white/10 border border-white/20'
: 'backdrop-blur-xl bg-white/70 border border-black/10'
const glassInput = isDark
? 'bg-white/10 border-white/20 text-white placeholder-white/40 focus:border-purple-400'
: 'bg-white/50 border-black/10 text-slate-900 placeholder-slate-400 focus:border-purple-500'
// --- Handlers ---
const saveOcrPrompts = (prompts: OcrPrompts) => {
setOcrPrompts(prompts)
localStorage.setItem(OCR_PROMPTS_KEY, JSON.stringify(prompts))
}
const handleDirectFileSelect = (e: React.ChangeEvent<HTMLInputElement>) => {
const file = e.target.files?.[0]
if (!file) return
setDirectFile(file)
setSelectedDocumentId(null)
setSelectedMobileFile(null)
if (file.type.startsWith('image/')) {
const reader = new FileReader()
reader.onload = (ev) => {
setDirectFilePreview(ev.target?.result as string)
}
reader.readAsDataURL(file)
} else if (file.type === 'application/pdf') {
setDirectFilePreview(URL.createObjectURL(file))
} else {
setDirectFilePreview(null)
}
}
const startSession = async () => {
if (!sessionName.trim()) {
setError('Bitte geben Sie einen Namen fuer die Session ein.')
return
}
if (!selectedDocumentId && !directFile && !selectedMobileFile) {
setError('Bitte waehlen Sie ein Dokument aus oder laden Sie eine Datei hoch.')
return
}
setError(null)
setIsCreatingSession(true)
setExtractionStatus('Session wird erstellt...')
const API_BASE = getApiBase()
try {
const sessionRes = await fetch(`${API_BASE}/api/v1/vocab/sessions`, {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({
name: sessionName,
ocr_prompts: ocrPrompts
}),
})
if (!sessionRes.ok) {
throw new Error('Session konnte nicht erstellt werden')
}
const sessionData = await sessionRes.json()
setSession(sessionData)
setWorksheetTitle(sessionName)
startActivity('vocab_extraction', { description: sessionName })
let file: File
let isPdf = false
if (directFile) {
file = directFile
isPdf = directFile.type === 'application/pdf'
} else if (selectedMobileFile) {
isPdf = selectedMobileFile.type === 'application/pdf'
const base64Data = selectedMobileFile.dataUrl.split(',')[1]
const byteCharacters = atob(base64Data)
const byteNumbers = new Array(byteCharacters.length)
for (let i = 0; i < byteCharacters.length; i++) {
byteNumbers[i] = byteCharacters.charCodeAt(i)
}
const byteArray = new Uint8Array(byteNumbers)
const blob = new Blob([byteArray], { type: selectedMobileFile.type })
file = new File([blob], selectedMobileFile.name, { type: selectedMobileFile.type })
} else {
const selectedDoc = storedDocuments.find(d => d.id === selectedDocumentId)
if (!selectedDoc || !selectedDoc.url) {
throw new Error('Das ausgewaehlte Dokument ist nicht verfuegbar.')
}
isPdf = selectedDoc.type === 'application/pdf'
const base64Data = selectedDoc.url.split(',')[1]
const byteCharacters = atob(base64Data)
const byteNumbers = new Array(byteCharacters.length)
for (let i = 0; i < byteCharacters.length; i++) {
byteNumbers[i] = byteCharacters.charCodeAt(i)
}
const byteArray = new Uint8Array(byteNumbers)
const blob = new Blob([byteArray], { type: selectedDoc.type })
file = new File([blob], selectedDoc.name, { type: selectedDoc.type })
}
if (isPdf) {
setExtractionStatus('PDF wird hochgeladen...')
const formData = new FormData()
formData.append('file', file)
const pdfInfoRes = await fetch(`${API_BASE}/api/v1/vocab/sessions/${sessionData.id}/upload-pdf-info`, {
method: 'POST',
body: formData,
})
if (!pdfInfoRes.ok) {
throw new Error('PDF konnte nicht verarbeitet werden')
}
const pdfInfo = await pdfInfoRes.json()
setPdfPageCount(pdfInfo.page_count)
setSelectedPages(Array.from({ length: pdfInfo.page_count }, (_, i) => i))
setActiveTab('pages')
setExtractionStatus(`${pdfInfo.page_count} Seiten erkannt. Vorschau wird geladen...`)
setIsLoadingThumbnails(true)
const thumbnails: string[] = []
for (let i = 0; i < pdfInfo.page_count; i++) {
try {
const thumbRes = await fetch(`${API_BASE}/api/v1/vocab/sessions/${sessionData.id}/pdf-thumbnail/${i}?hires=true`)
if (thumbRes.ok) {
const blob = await thumbRes.blob()
thumbnails.push(URL.createObjectURL(blob))
}
} catch (e) {
console.error(`Failed to load thumbnail for page ${i}`)
}
}
setPagesThumbnails(thumbnails)
setIsLoadingThumbnails(false)
setExtractionStatus(`${pdfInfo.page_count} Seiten bereit. Waehlen Sie die zu verarbeitenden Seiten.`)
} else {
setExtractionStatus('KI analysiert das Bild... (kann 30-60 Sekunden dauern)')
const formData = new FormData()
formData.append('file', file)
const uploadRes = await fetch(`${API_BASE}/api/v1/vocab/sessions/${sessionData.id}/upload`, {
method: 'POST',
body: formData,
})
if (!uploadRes.ok) {
throw new Error('Bild konnte nicht verarbeitet werden')
}
const uploadData = await uploadRes.json()
setSession(prev => prev ? { ...prev, status: 'extracted', vocabulary_count: uploadData.vocabulary_count } : null)
const vocabRes = await fetch(`${API_BASE}/api/v1/vocab/sessions/${sessionData.id}/vocabulary`)
if (vocabRes.ok) {
const vocabData = await vocabRes.json()
setVocabulary(vocabData.vocabulary || [])
setExtractionStatus(`${vocabData.vocabulary?.length || 0} Vokabeln gefunden!`)
}
await new Promise(r => setTimeout(r, 1000))
setActiveTab('vocabulary')
}
} catch (error) {
console.error('Session start failed:', error)
setError(error instanceof Error ? error.message : 'Ein Fehler ist aufgetreten')
setExtractionStatus('')
setSession(null)
} finally {
setIsCreatingSession(false)
}
}
const processSinglePage = async (pageIndex: number, ipa: IpaMode, syllable: SyllableMode): Promise<{ success: boolean; vocabulary: VocabularyEntry[]; error?: string }> => {
const API_BASE = getApiBase()
try {
const res = await fetch(`${API_BASE}/api/v1/vocab/sessions/${session!.id}/process-single-page/${pageIndex}?ipa_mode=${ipa}&syllable_mode=${syllable}`, {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({ ocr_prompts: ocrPrompts }),
})
if (!res.ok) {
const errBody = await res.json().catch(() => ({}))
const detail = errBody.detail || `HTTP ${res.status}`
return { success: false, vocabulary: [], error: `Seite ${pageIndex + 1}: ${detail}` }
}
const data = await res.json()
if (!data.success) {
return { success: false, vocabulary: [], error: data.error || `Seite ${pageIndex + 1}: Unbekannter Fehler` }
}
return { success: true, vocabulary: data.vocabulary || [] }
} catch (e) {
return { success: false, vocabulary: [], error: `Seite ${pageIndex + 1}: ${e instanceof Error ? e.message : 'Netzwerkfehler'}` }
}
}
const processSelectedPages = async () => {
if (!session || selectedPages.length === 0) return
const pagesToProcess = [...selectedPages].sort((a, b) => a - b)
setIsExtracting(true)
setProcessingErrors([])
setSuccessfulPages([])
setFailedPages([])
setProcessingQueue(pagesToProcess)
setVocabulary([])
setActiveTab('vocabulary')
const API_BASE = getApiBase()
const errors: string[] = []
const successful: number[] = []
const failed: number[] = []
for (let i = 0; i < pagesToProcess.length; i++) {
const pageIndex = pagesToProcess[i]
setCurrentlyProcessingPage(pageIndex + 1)
setExtractionStatus(`Verarbeite Seite ${pageIndex + 1} von ${pagesToProcess.length}... (kann 30-60 Sekunden dauern)`)
const result = await processSinglePage(pageIndex, ipaMode, syllableMode)
if (result.success) {
successful.push(pageIndex + 1)
setSuccessfulPages([...successful])
setVocabulary(prev => [...prev, ...result.vocabulary])
setExtractionStatus(`Seite ${pageIndex + 1} fertig: ${result.vocabulary.length} Vokabeln gefunden`)
} else {
failed.push(pageIndex + 1)
setFailedPages([...failed])
if (result.error) {
errors.push(result.error)
setProcessingErrors([...errors])
}
setExtractionStatus(`Seite ${pageIndex + 1} fehlgeschlagen`)
}
await new Promise(r => setTimeout(r, 500))
}
setCurrentlyProcessingPage(null)
setProcessingQueue([])
setIsExtracting(false)
if (successful.length === pagesToProcess.length) {
setExtractionStatus(`Fertig! Alle ${successful.length} Seiten verarbeitet.`)
} else if (successful.length > 0) {
setExtractionStatus(`${successful.length} von ${pagesToProcess.length} Seiten verarbeitet. ${failed.length} fehlgeschlagen.`)
} else {
setExtractionStatus(`Alle Seiten fehlgeschlagen.`)
}
// Reload thumbnails for processed pages (server may have rotated them)
if (successful.length > 0 && session) {
const updatedThumbs = [...pagesThumbnails]
for (const pageNum of successful) {
const idx = pageNum - 1
try {
const thumbRes = await fetch(`${API_BASE}/api/v1/vocab/sessions/${session.id}/pdf-thumbnail/${idx}?hires=true&t=${Date.now()}`)
if (thumbRes.ok) {
const blob = await thumbRes.blob()
if (updatedThumbs[idx]) URL.revokeObjectURL(updatedThumbs[idx])
updatedThumbs[idx] = URL.createObjectURL(blob)
}
} catch (e) {
console.error(`Failed to refresh thumbnail for page ${pageNum}`)
}
}
setPagesThumbnails(updatedThumbs)
}
setSession(prev => prev ? { ...prev, status: 'extracted' } : null)
}
const togglePageSelection = (pageIndex: number) => {
setSelectedPages(prev =>
prev.includes(pageIndex)
? prev.filter(p => p !== pageIndex)
: [...prev, pageIndex].sort((a, b) => a - b)
)
}
const selectAllPages = () => setSelectedPages(
Array.from({ length: pdfPageCount }, (_, i) => i).filter(p => !excludedPages.includes(p))
)
const selectNoPages = () => setSelectedPages([])
const excludePage = (pageIndex: number, e: React.MouseEvent) => {
e.stopPropagation()
setExcludedPages(prev => [...prev, pageIndex])
setSelectedPages(prev => prev.filter(p => p !== pageIndex))
}
const restoreExcludedPages = () => {
setExcludedPages([])
}
const runOcrComparison = async (pageIndex: number) => {
if (!session) return
setOcrComparePageIndex(pageIndex)
setShowOcrComparison(true)
setIsComparingOcr(true)
setOcrCompareError(null)
setOcrCompareResult(null)
const API_BASE = getApiBase()
try {
const res = await fetch(`${API_BASE}/api/v1/vocab/sessions/${session.id}/compare-ocr/${pageIndex}`, {
method: 'POST',
})
if (!res.ok) {
throw new Error(`HTTP ${res.status}`)
}
const data = await res.json()
setOcrCompareResult(data)
} catch (e) {
setOcrCompareError(e instanceof Error ? e.message : 'Vergleich fehlgeschlagen')
} finally {
setIsComparingOcr(false)
}
}
const updateVocabularyEntry = (id: string, field: string, value: string) => {
setVocabulary(prev => prev.map(v => {
if (v.id !== id) return v
if (field === 'english' || field === 'german' || field === 'example_sentence' || field === 'word_type') {
return { ...v, [field]: value }
}
return { ...v, extras: { ...(v.extras || {}), [field]: value } }
}))
}
const addExtraColumn = (sourcePage: number) => {
const label = prompt('Spaltenname:')
if (!label || !label.trim()) return
const key = `extra_${Date.now()}`
setPageExtraColumns(prev => ({
...prev,
[sourcePage]: [...(prev[sourcePage] || []), { key, label: label.trim() }],
}))
}
const removeExtraColumn = (sourcePage: number, key: string) => {
setPageExtraColumns(prev => ({
...prev,
[sourcePage]: (prev[sourcePage] || []).filter(c => c.key !== key),
}))
setVocabulary(prev => prev.map(v => {
if (!v.extras || !(key in v.extras)) return v
const { [key]: _, ...rest } = v.extras
return { ...v, extras: rest }
}))
}
const getExtraColumnsForPage = (sourcePage: number): ExtraColumn[] => {
const global = pageExtraColumns[0] || []
const pageSpecific = pageExtraColumns[sourcePage] || []
return [...global, ...pageSpecific]
}
const getAllExtraColumns = (): ExtraColumn[] => {
const seen = new Set<string>()
const result: ExtraColumn[] = []
for (const cols of Object.values(pageExtraColumns)) {
for (const col of cols) {
if (!seen.has(col.key)) {
seen.add(col.key)
result.push(col)
}
}
}
return result
}
const deleteVocabularyEntry = (id: string) => {
setVocabulary(prev => prev.filter(v => v.id !== id))
}
const toggleVocabularySelection = (id: string) => {
setVocabulary(prev => prev.map(v =>
v.id === id ? { ...v, selected: !v.selected } : v
))
}
const toggleAllSelection = () => {
const allSelected = vocabulary.every(v => v.selected)
setVocabulary(prev => prev.map(v => ({ ...v, selected: !allSelected })))
}
const addVocabularyEntry = (atIndex?: number) => {
const newEntry: VocabularyEntry = {
id: `new-${Date.now()}`,
english: '',
german: '',
example_sentence: '',
selected: true
}
setVocabulary(prev => {
if (atIndex === undefined) {
return [...prev, newEntry]
}
const newList = [...prev]
newList.splice(atIndex, 0, newEntry)
return newList
})
}
const saveVocabulary = async () => {
if (!session) return
const API_BASE = getApiBase()
try {
await fetch(`${API_BASE}/api/v1/vocab/sessions/${session.id}/vocabulary`, {
method: 'PUT',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({ vocabulary }),
})
} catch (error) {
console.error('Failed to save vocabulary:', error)
}
}
const generateWorksheet = async () => {
if (!session) return
if (selectedFormat === 'standard' && selectedTypes.length === 0) return
setIsGenerating(true)
const API_BASE = getApiBase()
try {
await saveVocabulary()
let res: Response
if (selectedFormat === 'nru') {
res = await fetch(`${API_BASE}/api/v1/vocab/sessions/${session.id}/generate-nru`, {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({
title: worksheetTitle || session.name,
include_solutions: includeSolutions,
}),
})
} else {
res = await fetch(`${API_BASE}/api/v1/vocab/sessions/${session.id}/generate`, {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({
worksheet_types: selectedTypes,
title: worksheetTitle || session.name,
include_solutions: includeSolutions,
line_height: lineHeight,
}),
})
}
if (res.ok) {
const data = await res.json()
setWorksheetId(data.worksheet_id || data.id)
setActiveTab('export')
completeActivity({ vocabCount: vocabulary.length })
}
} catch (error) {
console.error('Failed to generate worksheet:', error)
} finally {
setIsGenerating(false)
}
}
const downloadPDF = (type: 'worksheet' | 'solution') => {
if (!worksheetId) return
const API_BASE = getApiBase()
const endpoint = type === 'worksheet' ? 'pdf' : 'solution'
window.open(`${API_BASE}/api/v1/vocab/worksheets/${worksheetId}/${endpoint}`, '_blank')
}
const toggleWorksheetType = (type: WorksheetType) => {
setSelectedTypes(prev =>
prev.includes(type) ? prev.filter(t => t !== type) : [...prev, type]
)
}
const resumeSession = async (existingSession: Session) => {
setError(null)
setExtractionStatus('Session wird geladen...')
const API_BASE = getApiBase()
try {
const sessionRes = await fetch(`${API_BASE}/api/v1/vocab/sessions/${existingSession.id}`)
if (!sessionRes.ok) throw new Error('Session nicht gefunden')
const sessionData = await sessionRes.json()
setSession(sessionData)
setWorksheetTitle(sessionData.name)
if (sessionData.status === 'extracted' || sessionData.status === 'completed') {
const vocabRes = await fetch(`${API_BASE}/api/v1/vocab/sessions/${existingSession.id}/vocabulary`)
if (vocabRes.ok) {
const vocabData = await vocabRes.json()
setVocabulary(vocabData.vocabulary || [])
}
setActiveTab('vocabulary')
setExtractionStatus('')
} else if (sessionData.status === 'pending') {
setActiveTab('upload')
setExtractionStatus('Diese Session hat noch keine Vokabeln. Bitte laden Sie ein Dokument hoch.')
} else {
setActiveTab('vocabulary')
setExtractionStatus('')
}
} catch (error) {
console.error('Failed to resume session:', error)
setError(error instanceof Error ? error.message : 'Fehler beim Laden der Session')
setExtractionStatus('')
}
}
const resetSession = async () => {
setSession(null)
setSessionName('')
setVocabulary([])
setUploadedImage(null)
setWorksheetId(null)
setSelectedDocumentId(null)
setDirectFile(null)
setDirectFilePreview(null)
setShowFullPreview(false)
setPdfPageCount(0)
setSelectedPages([])
setPagesThumbnails([])
setExcludedPages([])
setActiveTab('upload')
setError(null)
setExtractionStatus('')
const API_BASE = getApiBase()
try {
const res = await fetch(`${API_BASE}/api/v1/vocab/sessions`)
if (res.ok) {
const sessions = await res.json()
setExistingSessions(sessions)
}
} catch (e) {
console.error('Failed to reload sessions:', e)
}
}
const deleteSession = async (sessionId: string, e: React.MouseEvent) => {
e.stopPropagation()
if (!confirm('Session wirklich loeschen? Diese Aktion kann nicht rueckgaengig gemacht werden.')) {
return
}
const API_BASE = getApiBase()
try {
const res = await fetch(`${API_BASE}/api/v1/vocab/sessions/${sessionId}`, {
method: 'DELETE',
})
if (res.ok) {
setExistingSessions(prev => prev.filter(s => s.id !== sessionId))
}
} catch (e) {
console.error('Failed to delete session:', e)
}
}
// Reprocess all successful pages with new IPA/syllable modes
const reprocessPages = (ipa: IpaMode, syllable: SyllableMode) => {
if (!session || successfulPages.length === 0) return
setIsExtracting(true)
setExtractionStatus('Verarbeite mit neuen Einstellungen...')
const pagesToReprocess = successfulPages.map(p => p - 1)
const API_BASE = getApiBase()
;(async () => {
const allVocab: VocabularyEntry[] = []
for (const pageIndex of pagesToReprocess) {
try {
const res = await fetch(`${API_BASE}/api/v1/vocab/sessions/${session.id}/process-single-page/${pageIndex}?ipa_mode=${ipa}&syllable_mode=${syllable}`, {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({ ocr_prompts: ocrPrompts }),
})
if (res.ok) {
const data = await res.json()
if (data.vocabulary) allVocab.push(...data.vocabulary)
}
} catch {}
}
setVocabulary(allVocab)
setIsExtracting(false)
setExtractionStatus(`${allVocab.length} Vokabeln mit neuen Einstellungen`)
})()
}
return {
// Mounted
mounted,
// Theme
isDark, glassCard, glassInput,
// Tab
activeTab, setActiveTab,
// Session
session, sessionName, setSessionName, isCreatingSession, error, setError, extractionStatus,
// Existing sessions
existingSessions, isLoadingSessions,
// Documents
storedDocuments, selectedDocumentId, setSelectedDocumentId,
// Direct file
directFile, setDirectFile, directFilePreview, showFullPreview, setShowFullPreview, directFileInputRef,
// PDF pages
pdfPageCount, selectedPages, pagesThumbnails, isLoadingThumbnails, excludedPages,
// Extra columns
pageExtraColumns,
// Upload
uploadedImage, isExtracting,
// Vocabulary
vocabulary,
// Worksheet
selectedTypes, worksheetTitle, setWorksheetTitle,
includeSolutions, setIncludeSolutions,
lineHeight, setLineHeight,
selectedFormat, setSelectedFormat,
ipaMode, setIpaMode, syllableMode, setSyllableMode,
// Export
worksheetId, isGenerating,
// Processing
processingErrors, successfulPages, failedPages, currentlyProcessingPage,
// OCR settings
ocrPrompts, showSettings, setShowSettings,
// QR
showQRModal, setShowQRModal, uploadSessionId,
mobileUploadedFiles, selectedMobileFile, setSelectedMobileFile, setMobileUploadedFiles,
// OCR Comparison
showOcrComparison, setShowOcrComparison,
ocrComparePageIndex, ocrCompareResult, isComparingOcr, ocrCompareError,
// Handlers
handleDirectFileSelect, startSession, processSelectedPages,
togglePageSelection, selectAllPages, selectNoPages, excludePage, restoreExcludedPages,
runOcrComparison,
updateVocabularyEntry, addExtraColumn, removeExtraColumn,
getExtraColumnsForPage, getAllExtraColumns,
deleteVocabularyEntry, toggleVocabularySelection, toggleAllSelection, addVocabularyEntry,
saveVocabulary, generateWorksheet, downloadPDF, toggleWorksheetType,
resumeSession, resetSession, deleteSession,
saveOcrPrompts, formatFileSize, reprocessPages,
}
}