Add SmartSpellChecker + refactor vocab-worksheet page.tsx

SmartSpellChecker (klausur-service): - Language-aware OCR post-correction without LLMs - Dual-dictionary heuristic for EN/DE language detection - Context-based a/I disambiguation via bigram lookup - Multi-digit substitution (sch00l→school) - Cross-language guard (don't false-correct DE words in EN column) - Umlaut correction (Schuler→Schüler, uber→über) - Integrated into spell_review_entries_sync() pipeline - 31 tests, 9ms/100 corrections Vocab-worksheet refactoring (studio-v2): - Split 2337-line page.tsx into 14 files - Custom hook useVocabWorksheet.ts (all state + logic) - 9 components in components/ directory - types.ts, constants.ts for shared definitions Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-04-12 12:25:01 +02:00
parent 04fa01661c
commit 909d0729f6
17 changed files with 3545 additions and 2228 deletions
@@ -881,10 +881,25 @@ def spell_review_entries_sync(entries: List[Dict]) -> Dict:
    """Rule-based OCR correction: spell-checker + structural heuristics.

    Deterministic — never translates, never touches IPA, never hallucinates.
+    Uses SmartSpellChecker for language-aware corrections with context-based
+    disambiguation (a/I), multi-digit substitution, and cross-language guard.
    """
    t0 = time.time()
    changes: List[Dict] = []
    all_corrected: List[Dict] = []
+
+    # Use SmartSpellChecker if available, fall back to legacy _spell_fix_field
+    _smart = None
+    try:
+        from smart_spell import SmartSpellChecker
+        _smart = SmartSpellChecker()
+        logger.debug("spell_review: using SmartSpellChecker")
+    except Exception:
+        logger.debug("spell_review: SmartSpellChecker not available, using legacy")
+
+    # Map field names → language codes for SmartSpellChecker
+    _LANG_MAP = {"english": "en", "german": "de", "example": "auto"}
+
    for i, entry in enumerate(entries):
        e = dict(entry)
        # Page-ref normalization (always, regardless of review status)
@@ -907,9 +922,18 @@ def spell_review_entries_sync(entries: List[Dict]) -> Dict:
            old_val = (e.get(field_name) or "").strip()
            if not old_val:
                continue
-            # example field is mixed-language — try German first (for umlauts)
-            lang = "german" if field_name in ("german", "example") else "english"
-            new_val, was_changed = _spell_fix_field(old_val, field=lang)
+
+            if _smart:
+                # SmartSpellChecker path — language-aware, context-based
+                lang_code = _LANG_MAP.get(field_name, "en")
+                result = _smart.correct_text(old_val, lang=lang_code)
+                new_val = result.corrected
+                was_changed = result.changed
+            else:
+                # Legacy path
+                lang = "german" if field_name in ("german", "example") else "english"
+                new_val, was_changed = _spell_fix_field(old_val, field=lang)
+
            if was_changed and new_val != old_val:
                changes.append({
                    "row_index": e.get("row_index", i),
@@ -921,12 +945,13 @@ def spell_review_entries_sync(entries: List[Dict]) -> Dict:
                e["llm_corrected"] = True
        all_corrected.append(e)
    duration_ms = int((time.time() - t0) * 1000)
+    model_name = "smart-spell-checker" if _smart else "spell-checker"
    return {
        "entries_original": entries,
        "entries_corrected": all_corrected,
        "changes": changes,
        "skipped_count": 0,
-        "model_used": "spell-checker",
+        "model_used": model_name,
        "duration_ms": duration_ms,
    }

@@ -0,0 +1,369 @@
+"""
+SmartSpellChecker — Language-aware OCR post-correction without LLMs.
+
+Uses pyspellchecker (MIT) with dual EN+DE dictionaries for:
+- Automatic language detection per word (dual-dictionary heuristic)
+- OCR error correction (digit↔letter, umlauts, transpositions)
+- Context-based disambiguation (a/I, l/I) via bigram lookup
+- Mixed-language support for example sentences
+
+Lizenz: Apache 2.0 (kommerziell nutzbar)
+"""
+
+import logging
+import re
+from dataclasses import dataclass, field
+from typing import Dict, List, Literal, Optional, Set, Tuple
+
+logger = logging.getLogger(__name__)
+
+# ---------------------------------------------------------------------------
+# Init
+# ---------------------------------------------------------------------------
+
+try:
+    from spellchecker import SpellChecker as _SpellChecker
+    _en_spell = _SpellChecker(language='en', distance=1)
+    _de_spell = _SpellChecker(language='de', distance=1)
+    _AVAILABLE = True
+except ImportError:
+    _AVAILABLE = False
+    logger.warning("pyspellchecker not installed — SmartSpellChecker disabled")
+
+Lang = Literal["en", "de", "both", "unknown"]
+
+# ---------------------------------------------------------------------------
+# Bigram context for a/I disambiguation
+# ---------------------------------------------------------------------------
+
+# Words that commonly follow "I" (subject pronoun → verb/modal)
+_I_FOLLOWERS: frozenset = frozenset({
+    "am", "was", "have", "had", "do", "did", "will", "would", "can",
+    "could", "should", "shall", "may", "might", "must",
+    "think", "know", "see", "want", "need", "like", "love", "hate",
+    "go", "went", "come", "came", "say", "said", "get", "got",
+    "make", "made", "take", "took", "give", "gave", "tell", "told",
+    "feel", "felt", "find", "found", "believe", "hope", "wish",
+    "remember", "forget", "understand", "mean", "meant",
+    "don't", "didn't", "can't", "won't", "couldn't", "wouldn't",
+    "shouldn't", "haven't", "hadn't", "isn't", "wasn't",
+    "really", "just", "also", "always", "never", "often", "sometimes",
+})
+
+# Words that commonly follow "a" (article → noun/adjective)
+_A_FOLLOWERS: frozenset = frozenset({
+    "lot", "few", "little", "bit", "good", "bad", "great", "new", "old",
+    "long", "short", "big", "small", "large", "huge", "tiny",
+    "nice", "beautiful", "wonderful", "terrible", "horrible",
+    "man", "woman", "boy", "girl", "child", "dog", "cat", "bird",
+    "book", "car", "house", "room", "school", "teacher", "student",
+    "day", "week", "month", "year", "time", "place", "way",
+    "friend", "family", "person", "problem", "question", "story",
+    "very", "really", "quite", "rather", "pretty", "single",
+})
+
+# Digit→letter substitutions (OCR confusion)
+_DIGIT_SUBS: Dict[str, List[str]] = {
+    '0': ['o', 'O'],
+    '1': ['l', 'I'],
+    '5': ['s', 'S'],
+    '6': ['g', 'G'],
+    '8': ['b', 'B'],
+    '|': ['I', 'l'],
+}
+_SUSPICIOUS_CHARS = frozenset(_DIGIT_SUBS.keys())
+
+# Umlaut confusion: OCR drops dots (ü→u, ä→a, ö→o)
+_UMLAUT_MAP = {
+    'a': 'ä', 'o': 'ö', 'u': 'ü', 'i': 'ü',
+    'A': 'Ä', 'O': 'Ö', 'U': 'Ü', 'I': 'Ü',
+}
+
+# Tokenizer
+_TOKEN_RE = re.compile(r"([A-Za-zÄÖÜäöüß'|]+)([^A-Za-zÄÖÜäöüß'|]*)")
+
+
+# ---------------------------------------------------------------------------
+# Data types
+# ---------------------------------------------------------------------------
+
+@dataclass
+class CorrectionResult:
+    original: str
+    corrected: str
+    lang_detected: Lang
+    changed: bool
+    changes: List[str] = field(default_factory=list)
+
+
+# ---------------------------------------------------------------------------
+# Core class
+# ---------------------------------------------------------------------------
+
+class SmartSpellChecker:
+    """Language-aware OCR spell checker using pyspellchecker (no LLM)."""
+
+    def __init__(self):
+        if not _AVAILABLE:
+            raise RuntimeError("pyspellchecker not installed")
+        self.en = _en_spell
+        self.de = _de_spell
+
+    # --- Language detection ---
+
+    def detect_word_lang(self, word: str) -> Lang:
+        """Detect language of a single word using dual-dict heuristic."""
+        w = word.lower().strip(".,;:!?\"'()")
+        if not w:
+            return "unknown"
+        in_en = bool(self.en.known([w]))
+        in_de = bool(self.de.known([w]))
+        if in_en and in_de:
+            return "both"
+        if in_en:
+            return "en"
+        if in_de:
+            return "de"
+        return "unknown"
+
+    def detect_text_lang(self, text: str) -> Lang:
+        """Detect dominant language of a text string (sentence/phrase)."""
+        words = re.findall(r"[A-Za-zÄÖÜäöüß]+", text)
+        if not words:
+            return "unknown"
+
+        en_count = 0
+        de_count = 0
+        for w in words:
+            lang = self.detect_word_lang(w)
+            if lang == "en":
+                en_count += 1
+            elif lang == "de":
+                de_count += 1
+            # "both" doesn't count for either
+
+        if en_count > de_count:
+            return "en"
+        if de_count > en_count:
+            return "de"
+        if en_count == de_count and en_count > 0:
+            return "both"
+        return "unknown"
+
+    # --- Single-word correction ---
+
+    def _known(self, word: str) -> bool:
+        """True if word is known in EN or DE dictionary."""
+        w = word.lower()
+        return bool(self.en.known([w])) or bool(self.de.known([w]))
+
+    def _known_in(self, word: str, lang: str) -> bool:
+        """True if word is known in a specific language dictionary."""
+        w = word.lower()
+        spell = self.en if lang == "en" else self.de
+        return bool(spell.known([w]))
+
+    def correct_word(self, word: str, lang: str = "en",
+                     prev_word: str = "", next_word: str = "") -> Optional[str]:
+        """Correct a single word for the given language.
+
+        Returns None if no correction needed, or the corrected string.
+
+        Args:
+            word: The word to check/correct
+            lang: Expected language ("en" or "de")
+            prev_word: Previous word (for context)
+            next_word: Next word (for context)
+        """
+        if not word or not word.strip():
+            return None
+
+        # Skip numbers, abbreviations with dots, very short tokens
+        if word.isdigit() or '.' in word:
+            return None
+
+        has_suspicious = any(ch in _SUSPICIOUS_CHARS for ch in word)
+
+        # 1. Already known → no fix
+        if self._known(word):
+            # But check a/I disambiguation for single-char words
+            if word.lower() in ('l', '|') and next_word:
+                return self._disambiguate_a_I(word, next_word)
+            return None
+
+        # 2. Digit/pipe substitution
+        if has_suspicious:
+            if word == '|':
+                return 'I'
+            # Try single-char substitutions
+            for i, ch in enumerate(word):
+                if ch not in _DIGIT_SUBS:
+                    continue
+                for replacement in _DIGIT_SUBS[ch]:
+                    candidate = word[:i] + replacement + word[i + 1:]
+                    if self._known(candidate):
+                        return candidate
+            # Try multi-char substitution (e.g., "sch00l" → "school")
+            multi = self._try_multi_digit_sub(word)
+            if multi:
+                return multi
+
+        # 3. Umlaut correction (German)
+        if lang == "de" and len(word) >= 3 and word.isalpha():
+            umlaut_fix = self._try_umlaut_fix(word)
+            if umlaut_fix:
+                return umlaut_fix
+
+        # 4. General spell correction
+        if not has_suspicious and len(word) >= 3 and word.isalpha():
+            # Safety: don't correct if the word is valid in the OTHER language
+            # (either directly or via umlaut fix)
+            other_lang = "de" if lang == "en" else "en"
+            if self._known_in(word, other_lang):
+                return None
+            if other_lang == "de" and self._try_umlaut_fix(word):
+                return None  # has a valid DE umlaut variant → don't touch
+
+            spell = self.en if lang == "en" else self.de
+            correction = spell.correction(word.lower())
+            if correction and correction != word.lower():
+                if word[0].isupper():
+                    correction = correction[0].upper() + correction[1:]
+                if self._known(correction):
+                    return correction
+
+        return None
+
+    # --- Multi-digit substitution ---
+
+    def _try_multi_digit_sub(self, word: str) -> Optional[str]:
+        """Try replacing multiple digits simultaneously."""
+        positions = [(i, ch) for i, ch in enumerate(word) if ch in _DIGIT_SUBS]
+        if len(positions) < 1 or len(positions) > 4:
+            return None
+
+        # Try all combinations (max 2^4 = 16 for 4 positions)
+        chars = list(word)
+        best = None
+        self._multi_sub_recurse(chars, positions, 0, best_result=[None])
+        return self._multi_sub_recurse_result
+
+    _multi_sub_recurse_result: Optional[str] = None
+
+    def _try_multi_digit_sub(self, word: str) -> Optional[str]:
+        """Try replacing multiple digits simultaneously using BFS."""
+        positions = [(i, ch) for i, ch in enumerate(word) if ch in _DIGIT_SUBS]
+        if not positions or len(positions) > 4:
+            return None
+
+        # BFS over substitution combinations
+        queue = [list(word)]
+        for pos, ch in positions:
+            next_queue = []
+            for current in queue:
+                # Keep original
+                next_queue.append(current[:])
+                # Try each substitution
+                for repl in _DIGIT_SUBS[ch]:
+                    variant = current[:]
+                    variant[pos] = repl
+                    next_queue.append(variant)
+            queue = next_queue
+
+        # Check which combinations produce known words
+        for combo in queue:
+            candidate = "".join(combo)
+            if candidate != word and self._known(candidate):
+                return candidate
+
+        return None
+
+    # --- Umlaut fix ---
+
+    def _try_umlaut_fix(self, word: str) -> Optional[str]:
+        """Try single-char umlaut substitutions for German words."""
+        for i, ch in enumerate(word):
+            if ch in _UMLAUT_MAP:
+                candidate = word[:i] + _UMLAUT_MAP[ch] + word[i + 1:]
+                if self._known(candidate):
+                    return candidate
+        return None
+
+    # --- a/I disambiguation ---
+
+    def _disambiguate_a_I(self, token: str, next_word: str) -> Optional[str]:
+        """Disambiguate 'a' vs 'I' (and OCR variants like 'l', '|')."""
+        nw = next_word.lower().strip(".,;:!?")
+        if nw in _I_FOLLOWERS:
+            return "I"
+        if nw in _A_FOLLOWERS:
+            return "a"
+        # Fallback: check if next word is more commonly a verb (→I) or noun/adj (→a)
+        # Simple heuristic: if next word starts with uppercase (and isn't first in sentence)
+        # it's likely a German noun following "I"... but in English context, uppercase
+        # after "I" is unusual.
+        return None  # uncertain, don't change
+
+    # --- Full text correction ---
+
+    def correct_text(self, text: str, lang: str = "en") -> CorrectionResult:
+        """Correct a full text string (field value).
+
+        Args:
+            text: The text to correct
+            lang: Expected language ("en" or "de")
+        """
+        if not text or not text.strip():
+            return CorrectionResult(text, text, "unknown", False)
+
+        detected = self.detect_text_lang(text) if lang == "auto" else lang
+
+        parts: List[str] = []
+        changes: List[str] = []
+        tokens = list(_TOKEN_RE.finditer(text))
+
+        for idx, m in enumerate(tokens):
+            token, sep = m.group(1), m.group(2)
+            next_word = tokens[idx + 1].group(1) if idx + 1 < len(tokens) else ""
+            prev_word = tokens[idx - 1].group(1) if idx > 0 else ""
+
+            correction = self.correct_word(
+                token, lang=detected if detected in ("en", "de") else "en",
+                prev_word=prev_word, next_word=next_word,
+            )
+            if correction and correction != token:
+                changes.append(f"{token}→{correction}")
+                parts.append(correction)
+            else:
+                parts.append(token)
+            parts.append(sep)
+
+        # Append any trailing text
+        last_end = tokens[-1].end() if tokens else 0
+        if last_end < len(text):
+            parts.append(text[last_end:])
+
+        corrected = "".join(parts)
+        return CorrectionResult(
+            original=text,
+            corrected=corrected,
+            lang_detected=detected,
+            changed=corrected != text,
+            changes=changes,
+        )
+
+    # --- Vocabulary entry correction ---
+
+    def correct_vocab_entry(self, english: str, german: str,
+                            example: str = "") -> Dict[str, CorrectionResult]:
+        """Correct a full vocabulary entry (EN + DE + example).
+
+        Uses column position to determine language — the most reliable signal.
+        """
+        results = {}
+        results["english"] = self.correct_text(english, lang="en")
+        results["german"] = self.correct_text(german, lang="de")
+        if example:
+            # For examples, auto-detect language
+            results["example"] = self.correct_text(example, lang="auto")
+        return results
@@ -0,0 +1,210 @@
+"""Tests for SmartSpellChecker — language-aware OCR post-correction."""
+
+import pytest
+import sys, os
+sys.path.insert(0, os.path.join(os.path.dirname(__file__), ".."))
+
+from smart_spell import SmartSpellChecker, CorrectionResult
+
+
+@pytest.fixture
+def sc():
+    return SmartSpellChecker()
+
+
+# ─── Language Detection ──────────────────────────────────────────────────────
+
+
+class TestLanguageDetection:
+
+    def test_clear_english_words(self, sc):
+        for word in ("school", "beautiful", "homework", "yesterday", "because"):
+            assert sc.detect_word_lang(word) in ("en", "both"), f"{word} should be EN"
+
+    def test_clear_german_words(self, sc):
+        for word in ("Schule", "Hausaufgaben", "Freundschaft", "Straße", "Entschuldigung"):
+            assert sc.detect_word_lang(word) in ("de", "both"), f"{word} should be DE"
+
+    def test_ambiguous_words(self, sc):
+        """Words that exist in both languages."""
+        for word in ("Hand", "Finger", "Arm", "Name", "Ball"):
+            assert sc.detect_word_lang(word) == "both", f"{word} should be 'both'"
+
+    def test_unknown_words(self, sc):
+        assert sc.detect_word_lang("xyzqwk") == "unknown"
+        assert sc.detect_word_lang("") == "unknown"
+
+    def test_english_sentence(self, sc):
+        assert sc.detect_text_lang("I go to school every day") == "en"
+
+    def test_german_sentence(self, sc):
+        assert sc.detect_text_lang("Ich gehe jeden Tag zur Schule") == "de"
+
+    def test_mixed_sentence(self, sc):
+        # Dominant language should win
+        lang = sc.detect_text_lang("I like to play Fußball with my Freunde")
+        assert lang in ("en", "both")
+
+
+# ─── Single Word Correction ────────────────────────────────────────────────
+
+
+class TestSingleWordCorrection:
+
+    def test_known_word_not_changed(self, sc):
+        assert sc.correct_word("school", "en") is None
+        assert sc.correct_word("Freund", "de") is None
+
+    def test_digit_letter_single(self, sc):
+        assert sc.correct_word("g0od", "en") == "good"
+        assert sc.correct_word("he1lo", "en") == "hello"
+
+    def test_digit_letter_multi(self, sc):
+        """Multiple digit substitutions (e.g., sch00l)."""
+        result = sc.correct_word("sch00l", "en")
+        assert result == "school", f"Expected 'school', got '{result}'"
+
+    def test_pipe_to_I(self, sc):
+        assert sc.correct_word("|", "en") == "I"
+
+    def test_umlaut_schuler(self, sc):
+        assert sc.correct_word("Schuler", "de") == "Schüler"
+
+    def test_umlaut_uber(self, sc):
+        assert sc.correct_word("uber", "de") == "über"
+
+    def test_umlaut_bucher(self, sc):
+        assert sc.correct_word("Bucher", "de") == "Bücher"
+
+    def test_umlaut_turkei(self, sc):
+        assert sc.correct_word("Turkei", "de") == "Türkei"
+
+    def test_missing_char(self, sc):
+        assert sc.correct_word("beautful", "en") == "beautiful"
+
+    def test_transposition(self, sc):
+        assert sc.correct_word("teh", "en") == "the"
+
+    def test_swap(self, sc):
+        assert sc.correct_word("freind", "en") == "friend"
+
+    def test_no_false_correction_cross_lang(self, sc):
+        """Don't correct a word that's valid in the other language.
+
+        'Schuler' in the EN column should NOT be corrected to 'Schuyler'
+        because 'Schüler' is valid German — it's likely a German word
+        that ended up in the wrong column (or is a surname).
+        """
+        # Schuler is valid DE (after umlaut fix → Schüler), so
+        # in the EN column it should be left alone
+        result = sc.correct_word("Schuler", "en")
+        # Should either be None (no change) or not "Schuyler"
+        assert result != "Schuyler", "Should not false-correct German word in EN column"
+
+
+# ─── a/I Disambiguation ──────────────────────────────────────────────────────
+
+
+class TestAIDisambiguation:
+
+    def test_I_before_verb(self, sc):
+        assert sc._disambiguate_a_I("l", "am") == "I"
+        assert sc._disambiguate_a_I("l", "was") == "I"
+        assert sc._disambiguate_a_I("l", "think") == "I"
+        assert sc._disambiguate_a_I("l", "have") == "I"
+        assert sc._disambiguate_a_I("l", "don't") == "I"
+
+    def test_a_before_noun_adj(self, sc):
+        assert sc._disambiguate_a_I("a", "book") == "a"
+        assert sc._disambiguate_a_I("a", "cat") == "a"
+        assert sc._disambiguate_a_I("a", "big") == "a"
+        assert sc._disambiguate_a_I("a", "lot") == "a"
+
+    def test_uncertain_returns_none(self, sc):
+        """When context is ambiguous, return None (don't change)."""
+        assert sc._disambiguate_a_I("l", "xyzqwk") is None
+
+
+# ─── Full Text Correction ───────────────────────────────────────────────────
+
+
+class TestFullTextCorrection:
+
+    def test_english_sentence(self, sc):
+        result = sc.correct_text("teh cat is beautful", "en")
+        assert result.changed
+        assert "the" in result.corrected
+        assert "beautiful" in result.corrected
+
+    def test_german_sentence_no_change(self, sc):
+        result = sc.correct_text("Ich gehe zur Schule", "de")
+        assert not result.changed
+
+    def test_german_umlaut_fix(self, sc):
+        result = sc.correct_text("Der Schuler liest Bucher", "de")
+        assert "Schüler" in result.corrected
+        assert "Bücher" in result.corrected
+
+    def test_preserves_punctuation(self, sc):
+        result = sc.correct_text("teh cat, beautful!", "en")
+        assert "," in result.corrected
+        assert "!" in result.corrected
+
+    def test_empty_text(self, sc):
+        result = sc.correct_text("", "en")
+        assert not result.changed
+        assert result.corrected == ""
+
+
+# ─── Vocab Entry Correction ─────────────────────────────────────────────────
+
+
+class TestVocabEntryCorrection:
+
+    def test_basic_entry(self, sc):
+        results = sc.correct_vocab_entry(
+            english="beautful",
+            german="schön",
+        )
+        assert results["english"].corrected == "beautiful"
+        assert results["german"].changed is False
+
+    def test_umlaut_in_german(self, sc):
+        results = sc.correct_vocab_entry(
+            english="school",
+            german="Schuler",
+        )
+        assert results["english"].changed is False
+        assert results["german"].corrected == "Schüler"
+
+    def test_example_auto_detect(self, sc):
+        results = sc.correct_vocab_entry(
+            english="friend",
+            german="Freund",
+            example="My best freind lives in Berlin",
+        )
+        assert "friend" in results["example"].corrected
+
+
+# ─── Speed ─────────────────────────────────────────────────────────────────
+
+
+class TestSpeed:
+
+    def test_100_corrections_under_500ms(self, sc):
+        """100 word corrections should complete in under 500ms."""
+        import time
+        words = [
+            ("beautful", "en"), ("teh", "en"), ("freind", "en"),
+            ("homwork", "en"), ("yesturday", "en"),
+            ("Schuler", "de"), ("Bucher", "de"), ("Turkei", "de"),
+            ("uber", "de"), ("Ubung", "de"),
+        ] * 10
+
+        t0 = time.time()
+        for word, lang in words:
+            sc.correct_word(word, lang)
+        dt = time.time() - t0
+
+        print(f"\n  100 corrections in {dt*1000:.0f}ms")
+        assert dt < 0.5, f"Too slow: {dt*1000:.0f}ms"
@@ -0,0 +1,494 @@
+"""
+Benchmark: Spell-checking & language detection approaches for OCR post-correction.
+
+Tests pyspellchecker (already used), symspellpy (candidate), and
+dual-dictionary language detection heuristic on real vocabulary OCR data.
+
+Run:  pytest tests/test_spell_benchmark.py -v -s
+"""
+
+import time
+import pytest
+
+
+# ---------------------------------------------------------------------------
+# Helpers
+# ---------------------------------------------------------------------------
+
+def _load_pyspellchecker():
+    from spellchecker import SpellChecker
+    en = SpellChecker(language='en', distance=1)
+    de = SpellChecker(language='de', distance=1)
+    return en, de
+
+
+def _load_symspellpy():
+    """Load symspellpy with English frequency dict (bundled)."""
+    from symspellpy import SymSpell, Verbosity
+    sym = SymSpell(max_dictionary_edit_distance=2)
+    # Use bundled English frequency dict
+    import pkg_resources
+    dict_path = pkg_resources.resource_filename("symspellpy", "frequency_dictionary_en_82_765.txt")
+    sym.load_dictionary(dict_path, term_index=0, count_index=1)
+    return sym, Verbosity
+
+
+# ---------------------------------------------------------------------------
+# Test data: (ocr_output, expected_correction, language, category)
+# ---------------------------------------------------------------------------
+
+OCR_TEST_CASES = [
+    # --- Single-char ambiguity ---
+    ("l am a student", "I am a student", "en", "a_vs_I"),
+    ("a book", "a book", "en", "a_vs_I"),  # should NOT change
+    ("I like cats", "I like cats", "en", "a_vs_I"),  # should NOT change
+    ("lt is raining", "It is raining", "en", "a_vs_I"),  # l→I at start
+
+    # --- Digit-letter confusion ---
+    ("g0od", "good", "en", "digit_letter"),
+    ("sch00l", "school", "en", "digit_letter"),
+    ("he1lo", "hello", "en", "digit_letter"),
+    ("Sch0n", "Schon", "de", "digit_letter"),  # German
+
+    # --- Umlaut drops ---
+    ("schon", "schön", "de", "umlaut"),  # context: "schon" is also valid DE!
+    ("Schuler", "Schüler", "de", "umlaut"),
+    ("uber", "über", "de", "umlaut"),
+    ("Bucher", "Bücher", "de", "umlaut"),
+    ("Turkei", "Türkei", "de", "umlaut"),
+
+    # --- Common OCR errors ---
+    ("beautful", "beautiful", "en", "missing_char"),
+    ("teh", "the", "en", "transposition"),
+    ("becasue", "because", "en", "transposition"),
+    ("freind", "friend", "en", "swap"),
+    ("Freund", "Freund", "de", "correct"),  # already correct
+
+    # --- Merged words ---
+    ("atmyschool", "at my school", "en", "merged"),
+    ("goodidea", "good idea", "en", "merged"),
+
+    # --- Mixed language example sentences ---
+    ("I go to teh school", "I go to the school", "en", "sentence"),
+    ("Ich gehe zur Schule", "Ich gehe zur Schule", "de", "sentence_correct"),
+]
+
+# Language detection test: (word, expected_language)
+LANG_DETECT_CASES = [
+    # Clear English
+    ("school", "en"),
+    ("beautiful", "en"),
+    ("homework", "en"),
+    ("yesterday", "en"),
+    ("children", "en"),
+    ("because", "en"),
+    ("environment", "en"),
+    ("although", "en"),
+
+    # Clear German
+    ("Schule", "de"),
+    ("Hausaufgaben", "de"),
+    ("Freundschaft", "de"),
+    ("Umwelt", "de"),
+    ("Kindergarten", "de"),  # also used in English!
+    ("Bücher", "de"),
+    ("Straße", "de"),
+    ("Entschuldigung", "de"),
+
+    # Ambiguous (exist in both)
+    ("Hand", "both"),
+    ("Finger", "both"),
+    ("Arm", "both"),
+    ("Name", "both"),
+    ("Ball", "both"),
+
+    # Short/tricky
+    ("a", "en"),
+    ("I", "en"),
+    ("in", "both"),
+    ("an", "both"),
+    ("the", "en"),
+    ("die", "de"),
+    ("der", "de"),
+    ("to", "en"),
+    ("zu", "de"),
+]
+
+
+# ===========================================================================
+# Tests
+# ===========================================================================
+
+
+class TestPyspellchecker:
+    """Test pyspellchecker capabilities for OCR correction."""
+
+    @pytest.fixture(autouse=True)
+    def setup(self):
+        self.en, self.de = _load_pyspellchecker()
+
+    def test_known_words(self):
+        """Verify basic dictionary lookup."""
+        assert self.en.known(["school"])
+        assert self.en.known(["beautiful"])
+        assert self.de.known(["schule"])  # lowercase
+        assert self.de.known(["freund"])
+        # Not known
+        assert not self.en.known(["xyzqwk"])
+        assert not self.de.known(["xyzqwk"])
+
+    def test_correction_quality(self):
+        """Test correction suggestions for OCR errors."""
+        results = []
+        for ocr, expected, lang, category in OCR_TEST_CASES:
+            if category in ("sentence", "sentence_correct", "merged", "a_vs_I"):
+                continue  # skip multi-word cases
+
+            spell = self.en if lang == "en" else self.de
+            words = ocr.split()
+            corrected = []
+            for w in words:
+                if spell.known([w.lower()]):
+                    corrected.append(w)
+                else:
+                    fix = spell.correction(w.lower())
+                    if fix and fix != w.lower():
+                        # Preserve case
+                        if w[0].isupper():
+                            fix = fix[0].upper() + fix[1:]
+                        corrected.append(fix)
+                    else:
+                        corrected.append(w)
+            result = " ".join(corrected)
+            ok = result == expected
+            results.append((ocr, expected, result, ok, category))
+            if not ok:
+                print(f"  MISS: '{ocr}' → '{result}' (expected '{expected}') [{category}]")
+            else:
+                print(f"  OK:   '{ocr}' → '{result}' [{category}]")
+
+        correct = sum(1 for *_, ok, _ in results if ok)
+        total = len(results)
+        print(f"\npyspellchecker: {correct}/{total} correct ({100*correct/total:.0f}%)")
+
+    def test_language_detection_heuristic(self):
+        """Test dual-dictionary language detection."""
+        results = []
+        for word, expected_lang in LANG_DETECT_CASES:
+            w = word.lower()
+            in_en = bool(self.en.known([w]))
+            in_de = bool(self.de.known([w]))
+
+            if in_en and in_de:
+                detected = "both"
+            elif in_en:
+                detected = "en"
+            elif in_de:
+                detected = "de"
+            else:
+                detected = "unknown"
+
+            ok = detected == expected_lang
+            results.append((word, expected_lang, detected, ok))
+            if not ok:
+                print(f"  MISS: '{word}' → {detected} (expected {expected_lang})")
+            else:
+                print(f"  OK:   '{word}' → {detected}")
+
+        correct = sum(1 for *_, ok in results if ok)
+        total = len(results)
+        print(f"\nLang detection heuristic: {correct}/{total} correct ({100*correct/total:.0f}%)")
+
+    def test_umlaut_awareness(self):
+        """Test if pyspellchecker suggests umlaut corrections."""
+        # "Schuler" should suggest "Schüler"
+        candidates = self.de.candidates("schuler")
+        print(f"  'schuler' candidates: {candidates}")
+        # "uber" should suggest "über"
+        candidates_uber = self.de.candidates("uber")
+        print(f"  'uber' candidates: {candidates_uber}")
+        # "Turkei" should suggest "Türkei"
+        candidates_turkei = self.de.candidates("turkei")
+        print(f"  'turkei' candidates: {candidates_turkei}")
+
+    def test_speed_100_words(self):
+        """Measure correction speed for 100 words."""
+        words_en = ["beautful", "teh", "becasue", "freind", "shcool",
+                     "homwork", "yesturday", "chilren", "becuse", "enviroment"] * 10
+        t0 = time.time()
+        for w in words_en:
+            self.en.correction(w)
+        dt = time.time() - t0
+        print(f"\n  pyspellchecker: 100 EN corrections in {dt*1000:.0f}ms")
+
+        words_de = ["schuler", "bucher", "turkei", "strasze", "entschuldigung",
+                     "kindergaten", "freumd", "hauaufgaben", "umwlt", "ubung"] * 10
+        t0 = time.time()
+        for w in words_de:
+            self.de.correction(w)
+        dt = time.time() - t0
+        print(f"  pyspellchecker: 100 DE corrections in {dt*1000:.0f}ms")
+
+
+class TestSymspellpy:
+    """Test symspellpy as a faster alternative."""
+
+    @pytest.fixture(autouse=True)
+    def setup(self):
+        try:
+            self.sym, self.Verbosity = _load_symspellpy()
+            self.available = True
+        except (ImportError, FileNotFoundError) as e:
+            self.available = False
+            pytest.skip(f"symspellpy not installed: {e}")
+
+    def test_correction_quality(self):
+        """Test symspellpy corrections (EN only — no DE dict bundled)."""
+        en_cases = [(o, e, c) for o, e, _, c in OCR_TEST_CASES
+                    if _ == "en" and c not in ("sentence", "sentence_correct", "merged", "a_vs_I")]
+
+        results = []
+        for ocr, expected, category in en_cases:
+            suggestions = self.sym.lookup(ocr.lower(), self.Verbosity.CLOSEST, max_edit_distance=2)
+            if suggestions:
+                fix = suggestions[0].term
+                if ocr[0].isupper():
+                    fix = fix[0].upper() + fix[1:]
+                result = fix
+            else:
+                result = ocr
+
+            ok = result == expected
+            results.append((ocr, expected, result, ok, category))
+            status = "OK" if ok else "MISS"
+            print(f"  {status}: '{ocr}' → '{result}' (expected '{expected}') [{category}]")
+
+        correct = sum(1 for *_, ok, _ in results if ok)
+        total = len(results)
+        print(f"\nsymspellpy EN: {correct}/{total} correct ({100*correct/total:.0f}%)")
+
+    def test_speed_100_words(self):
+        """Measure symspellpy correction speed for 100 words."""
+        words = ["beautful", "teh", "becasue", "freind", "shcool",
+                 "homwork", "yesturday", "chilren", "becuse", "enviroment"] * 10
+        t0 = time.time()
+        for w in words:
+            self.sym.lookup(w, self.Verbosity.CLOSEST, max_edit_distance=2)
+        dt = time.time() - t0
+        print(f"\n  symspellpy: 100 EN corrections in {dt*1000:.0f}ms")
+
+    def test_compound_segmentation(self):
+        """Test symspellpy's word segmentation for merged words."""
+        cases = [
+            ("atmyschool", "at my school"),
+            ("goodidea", "good idea"),
+            ("makeadecision", "make a decision"),
+        ]
+        for merged, expected in cases:
+            result = self.sym.word_segmentation(merged)
+            ok = result.corrected_string == expected
+            status = "OK" if ok else "MISS"
+            print(f"  {status}: '{merged}' → '{result.corrected_string}' (expected '{expected}')")
+
+
+class TestContextDisambiguation:
+    """Test context-based disambiguation for a/I and similar cases."""
+
+    @pytest.fixture(autouse=True)
+    def setup(self):
+        self.en, self.de = _load_pyspellchecker()
+
+    def test_bigram_context(self):
+        """Use simple bigram heuristic for a/I disambiguation.
+
+        Approach: check if 'a <next_word>' or 'I <next_word>' is more
+        common by checking if <next_word> is a noun (follows 'a') or
+        verb (follows 'I').
+        """
+        # Common words that follow "I" (verbs)
+        i_followers = {"am", "was", "have", "had", "do", "did", "will",
+                       "would", "can", "could", "should", "shall", "may",
+                       "might", "think", "know", "see", "want", "need",
+                       "like", "love", "hate", "go", "went", "come",
+                       "came", "say", "said", "get", "got", "make", "made",
+                       "take", "took", "give", "gave", "tell", "told",
+                       "feel", "felt", "find", "found", "believe", "hope",
+                       "remember", "forget", "understand", "mean", "meant",
+                       "don't", "didn't", "can't", "won't", "couldn't",
+                       "shouldn't", "wouldn't", "haven't", "hadn't"}
+
+        # Common words that follow "a" (nouns/adjectives)
+        a_followers = {"lot", "few", "little", "bit", "good", "bad",
+                       "big", "small", "great", "new", "old", "long",
+                       "short", "man", "woman", "boy", "girl", "dog",
+                       "cat", "book", "car", "house", "day", "year",
+                       "nice", "beautiful", "large", "huge", "tiny"}
+
+        def disambiguate_a_I(token: str, next_word: str) -> str:
+            """Given an ambiguous 'a' or 'I' (or 'l'), pick the right one."""
+            nw = next_word.lower()
+            if nw in i_followers:
+                return "I"
+            if nw in a_followers:
+                return "a"
+            # Fallback: if next word is known verb → I, known adj/noun → a
+            # For now, use a simple heuristic: lowercase → "a", uppercase first letter → "I"
+            return token  # no change if uncertain
+
+        cases = [
+            ("l", "am", "I"),
+            ("l", "was", "I"),
+            ("l", "think", "I"),
+            ("a", "book", "a"),
+            ("a", "cat", "a"),
+            ("a", "lot", "a"),
+            ("l", "big", "a"),  # "a big ..."
+            ("a", "have", "I"),  # "I have ..."
+        ]
+
+        results = []
+        for token, next_word, expected in cases:
+            result = disambiguate_a_I(token, next_word)
+            ok = result == expected
+            results.append((token, next_word, expected, result, ok))
+            status = "OK" if ok else "MISS"
+            print(f"  {status}: '{token} {next_word}...' → '{result}' (expected '{expected}')")
+
+        correct = sum(1 for *_, ok in results if ok)
+        total = len(results)
+        print(f"\na/I disambiguation: {correct}/{total} correct ({100*correct/total:.0f}%)")
+
+
+class TestLangDetectLibrary:
+    """Test py3langid or langdetect if available."""
+
+    def test_py3langid(self):
+        try:
+            import langid
+        except ImportError:
+            pytest.skip("langid not installed")
+
+        sentences = [
+            ("I go to school every day", "en"),
+            ("Ich gehe jeden Tag zur Schule", "de"),
+            ("The weather is nice today", "en"),
+            ("Das Wetter ist heute schön", "de"),
+            ("She likes to play football", "en"),
+            ("Er spielt gerne Fußball", "de"),
+        ]
+
+        results = []
+        for text, expected in sentences:
+            lang, confidence = langid.classify(text)
+            ok = lang == expected
+            results.append(ok)
+            status = "OK" if ok else "MISS"
+            print(f"  {status}: '{text[:40]}...' → {lang} ({confidence:.2f}) (expected {expected})")
+
+        correct = sum(results)
+        print(f"\nlangid sentence detection: {correct}/{len(results)} correct")
+
+    def test_langid_single_words(self):
+        """langid on single words — expected to be unreliable."""
+        try:
+            import langid
+        except ImportError:
+            pytest.skip("langid not installed")
+
+        words = [("school", "en"), ("Schule", "de"), ("book", "en"),
+                 ("Buch", "de"), ("car", "en"), ("Auto", "de"),
+                 ("a", "en"), ("I", "en"), ("der", "de"), ("the", "en")]
+
+        results = []
+        for word, expected in words:
+            lang, conf = langid.classify(word)
+            ok = lang == expected
+            results.append(ok)
+            status = "OK" if ok else "MISS"
+            print(f"  {status}: '{word}' → {lang} ({conf:.2f}) (expected {expected})")
+
+        correct = sum(results)
+        print(f"\nlangid single-word: {correct}/{len(results)} correct")
+
+
+class TestIntegratedApproach:
+    """Test the combined approach: dict-heuristic for lang + spell correction."""
+
+    @pytest.fixture(autouse=True)
+    def setup(self):
+        self.en, self.de = _load_pyspellchecker()
+
+    def detect_language(self, word: str) -> str:
+        """Dual-dict heuristic language detection."""
+        w = word.lower()
+        # Skip very short words — too ambiguous
+        if len(w) <= 2:
+            return "ambiguous"
+        in_en = bool(self.en.known([w]))
+        in_de = bool(self.de.known([w]))
+        if in_en and in_de:
+            return "both"
+        if in_en:
+            return "en"
+        if in_de:
+            return "de"
+        return "unknown"
+
+    def correct_word(self, word: str, expected_lang: str) -> str:
+        """Correct a single word given the expected language."""
+        w_lower = word.lower()
+        spell = self.en if expected_lang == "en" else self.de
+
+        # Already known
+        if spell.known([w_lower]):
+            return word
+
+        # Also check the other language — might be fine
+        other = self.de if expected_lang == "en" else self.en
+        if other.known([w_lower]):
+            return word  # valid in the other language
+
+        # Try correction
+        fix = spell.correction(w_lower)
+        if fix and fix != w_lower:
+            if word[0].isupper():
+                fix = fix[0].upper() + fix[1:]
+            return fix
+
+        return word
+
+    def test_full_pipeline(self):
+        """Test: detect language → correct with appropriate dict."""
+        vocab_entries = [
+            # (english_col, german_col, expected_en, expected_de)
+            ("beautful", "schön", "beautiful", "schön"),
+            ("school", "Schule", "school", "Schule"),
+            ("teh cat", "die Katze", "the cat", "die Katze"),
+            ("freind", "Freund", "friend", "Freund"),
+            ("homwork", "Hausaufgaben", "homework", "Hausaufgaben"),
+            ("Schuler", "Schuler", "Schuler", "Schüler"),  # DE umlaut: Schüler
+        ]
+
+        en_correct = 0
+        de_correct = 0
+        total = len(vocab_entries)
+
+        for en_ocr, de_ocr, exp_en, exp_de in vocab_entries:
+            # Correct each word in the column
+            en_words = en_ocr.split()
+            de_words = de_ocr.split()
+            en_fixed = " ".join(self.correct_word(w, "en") for w in en_words)
+            de_fixed = " ".join(self.correct_word(w, "de") for w in de_words)
+
+            en_ok = en_fixed == exp_en
+            de_ok = de_fixed == exp_de
+            en_correct += en_ok
+            de_correct += de_ok
+
+            en_status = "OK" if en_ok else "MISS"
+            de_status = "OK" if de_ok else "MISS"
+            print(f"  EN {en_status}: '{en_ocr}' → '{en_fixed}' (expected '{exp_en}')")
+            print(f"  DE {de_status}: '{de_ocr}' → '{de_fixed}' (expected '{exp_de}')")
+
+        print(f"\nEN corrections: {en_correct}/{total} correct")
+        print(f"DE corrections: {de_correct}/{total} correct")
@@ -0,0 +1,57 @@
+'use client'
+
+import React from 'react'
+import type { VocabWorksheetHook } from '../types'
+
+export function ExportTab({ h }: { h: VocabWorksheetHook }) {
+  const { isDark, glassCard } = h
+
+  return (
+    <div className={`${glassCard} rounded-2xl p-6`}>
+      <h2 className={`text-lg font-semibold mb-4 ${isDark ? 'text-white' : 'text-slate-900'}`}>PDF herunterladen</h2>
+
+      {h.worksheetId ? (
+        <div className="space-y-4">
+          <div className={`p-4 rounded-xl ${isDark ? 'bg-green-500/20 border border-green-500/30' : 'bg-green-100 border border-green-200'}`}>
+            <div className="flex items-center gap-3">
+              <svg className="w-6 h-6 text-green-500" fill="none" stroke="currentColor" viewBox="0 0 24 24">
+                <path strokeLinecap="round" strokeLinejoin="round" strokeWidth={2} d="M5 13l4 4L19 7" />
+              </svg>
+              <span className={`font-medium ${isDark ? 'text-green-200' : 'text-green-700'}`}>Arbeitsblatt erfolgreich generiert!</span>
+            </div>
+          </div>
+
+          <div className="grid grid-cols-2 gap-4">
+            <button onClick={() => h.downloadPDF('worksheet')} className={`${glassCard} p-6 rounded-xl text-left transition-all hover:shadow-lg ${isDark ? 'hover:border-purple-400/50' : 'hover:border-purple-500'}`}>
+              <div className={`w-12 h-12 mb-3 rounded-xl flex items-center justify-center ${isDark ? 'bg-purple-500/30' : 'bg-purple-100'}`}>
+                <svg className={`w-6 h-6 ${isDark ? 'text-purple-300' : 'text-purple-600'}`} fill="none" stroke="currentColor" viewBox="0 0 24 24">
+                  <path strokeLinecap="round" strokeLinejoin="round" strokeWidth={1.5} d="M12 10v6m0 0l-3-3m3 3l3-3m2 8H7a2 2 0 01-2-2V5a2 2 0 012-2h5.586a1 1 0 01.707.293l5.414 5.414a1 1 0 01.293.707V19a2 2 0 01-2 2z" />
+                </svg>
+              </div>
+              <h3 className={`font-semibold mb-1 ${isDark ? 'text-white' : 'text-slate-900'}`}>Arbeitsblatt</h3>
+              <p className={`text-sm ${isDark ? 'text-white/60' : 'text-slate-500'}`}>PDF zum Ausdrucken</p>
+            </button>
+
+            {h.includeSolutions && (
+              <button onClick={() => h.downloadPDF('solution')} className={`${glassCard} p-6 rounded-xl text-left transition-all hover:shadow-lg ${isDark ? 'hover:border-green-400/50' : 'hover:border-green-500'}`}>
+                <div className={`w-12 h-12 mb-3 rounded-xl flex items-center justify-center ${isDark ? 'bg-green-500/30' : 'bg-green-100'}`}>
+                  <svg className={`w-6 h-6 ${isDark ? 'text-green-300' : 'text-green-600'}`} fill="none" stroke="currentColor" viewBox="0 0 24 24">
+                    <path strokeLinecap="round" strokeLinejoin="round" strokeWidth={1.5} d="M9 12l2 2 4-4m6 2a9 9 0 11-18 0 9 9 0 0118 0z" />
+                  </svg>
+                </div>
+                <h3 className={`font-semibold mb-1 ${isDark ? 'text-white' : 'text-slate-900'}`}>Loesungsblatt</h3>
+                <p className={`text-sm ${isDark ? 'text-white/60' : 'text-slate-500'}`}>PDF mit Loesungen</p>
+              </button>
+            )}
+          </div>
+
+          <button onClick={h.resetSession} className={`w-full py-3 rounded-xl border font-medium transition-colors ${isDark ? 'border-white/20 text-white/80 hover:bg-white/10' : 'border-slate-300 text-slate-700 hover:bg-slate-50'}`}>
+            Neues Arbeitsblatt erstellen
+          </button>
+        </div>
+      ) : (
+        <p className={`text-center py-12 ${isDark ? 'text-white/60' : 'text-slate-500'}`}>Noch kein Arbeitsblatt generiert.</p>
+      )}
+    </div>
+  )
+}
@@ -0,0 +1,39 @@
+'use client'
+
+import React from 'react'
+import type { VocabWorksheetHook } from '../types'
+
+export function FullscreenPreview({ h }: { h: VocabWorksheetHook }) {
+  return (
+    <div className="fixed inset-0 z-50 bg-black/80 backdrop-blur-sm flex items-center justify-center" onClick={() => h.setShowFullPreview(false)}>
+      <button
+        onClick={() => h.setShowFullPreview(false)}
+        className="absolute top-4 right-4 p-2 rounded-full bg-white/10 hover:bg-white/20 text-white z-10 transition-colors"
+      >
+        <svg className="w-6 h-6" fill="none" stroke="currentColor" viewBox="0 0 24 24">
+          <path strokeLinecap="round" strokeLinejoin="round" strokeWidth={2} d="M6 18L18 6M6 6l12 12" />
+        </svg>
+      </button>
+      <div className="max-w-[95vw] max-h-[95vh] overflow-auto" onClick={(e) => e.stopPropagation()}>
+        {h.directFile?.type.startsWith('image/') && h.directFilePreview && (
+          <img src={h.directFilePreview} alt="Original" className="max-w-none" />
+        )}
+        {h.directFile?.type === 'application/pdf' && h.directFilePreview && (
+          <iframe src={h.directFilePreview} className="border-0 rounded-xl bg-white" style={{ width: '90vw', height: '90vh' }} />
+        )}
+        {h.selectedMobileFile && !h.directFile && (
+          h.selectedMobileFile.type.startsWith('image/')
+            ? <img src={h.selectedMobileFile.dataUrl} alt="Original" className="max-w-none" />
+            : <iframe src={h.selectedMobileFile.dataUrl} className="border-0 rounded-xl bg-white" style={{ width: '90vw', height: '90vh' }} />
+        )}
+        {h.selectedDocumentId && !h.directFile && !h.selectedMobileFile && (() => {
+          const doc = h.storedDocuments.find(d => d.id === h.selectedDocumentId)
+          if (!doc?.url) return null
+          return doc.type.startsWith('image/')
+            ? <img src={doc.url} alt="Original" className="max-w-none" />
+            : <iframe src={doc.url} className="border-0 rounded-xl bg-white" style={{ width: '90vw', height: '90vh' }} />
+        })()}
+      </div>
+    </div>
+  )
+}
@@ -0,0 +1,135 @@
+'use client'
+
+import React from 'react'
+import type { VocabWorksheetHook } from '../types'
+
+export function OcrComparisonModal({ h }: { h: VocabWorksheetHook }) {
+  const { isDark, glassCard } = h
+
+  return (
+    <div className="fixed inset-0 z-50 flex items-center justify-center p-4 bg-black/50 backdrop-blur-sm">
+      <div className={`relative w-full max-w-6xl max-h-[90vh] overflow-auto rounded-3xl ${glassCard} p-6`}>
+        {/* Header */}
+        <div className="flex items-center justify-between mb-6">
+          <div>
+            <h2 className={`text-xl font-bold ${isDark ? 'text-white' : 'text-slate-900'}`}>
+              OCR-Methoden Vergleich
+            </h2>
+            <p className={`text-sm ${isDark ? 'text-white/60' : 'text-slate-500'}`}>
+              Seite {h.ocrComparePageIndex !== null ? h.ocrComparePageIndex + 1 : '-'}
+            </p>
+          </div>
+          <button
+            onClick={() => h.setShowOcrComparison(false)}
+            className={`p-2 rounded-xl ${isDark ? 'hover:bg-white/10 text-white' : 'hover:bg-black/5 text-slate-500'}`}
+          >
+            <svg className="w-6 h-6" fill="none" stroke="currentColor" viewBox="0 0 24 24">
+              <path strokeLinecap="round" strokeLinejoin="round" strokeWidth={2} d="M6 18L18 6M6 6l12 12" />
+            </svg>
+          </button>
+        </div>
+
+        {/* Loading State */}
+        {h.isComparingOcr && (
+          <div className="flex flex-col items-center justify-center py-12">
+            <div className="w-12 h-12 border-4 border-purple-500 border-t-transparent rounded-full animate-spin mb-4" />
+            <p className={isDark ? 'text-white/60' : 'text-slate-500'}>
+              Vergleiche OCR-Methoden... (kann 1-2 Minuten dauern)
+            </p>
+          </div>
+        )}
+
+        {/* Error State */}
+        {h.ocrCompareError && (
+          <div className={`p-4 rounded-xl ${isDark ? 'bg-red-500/20 text-red-300' : 'bg-red-100 text-red-700'}`}>
+            Fehler: {h.ocrCompareError}
+          </div>
+        )}
+
+        {/* Results */}
+        {h.ocrCompareResult && !h.isComparingOcr && (
+          <div className="space-y-6">
+            {/* Method Results Grid */}
+            <div className="grid grid-cols-1 md:grid-cols-2 lg:grid-cols-3 gap-4">
+              {Object.entries(h.ocrCompareResult.methods || {}).map(([key, method]: [string, any]) => (
+                <div
+                  key={key}
+                  className={`p-4 rounded-2xl ${
+                    h.ocrCompareResult.recommendation?.best_method === key
+                      ? (isDark ? 'bg-green-500/20 border border-green-500/50' : 'bg-green-100 border border-green-300')
+                      : (isDark ? 'bg-white/5 border border-white/10' : 'bg-white/50 border border-black/10')
+                  }`}
+                >
+                  <div className="flex items-center justify-between mb-3">
+                    <h3 className={`font-semibold ${isDark ? 'text-white' : 'text-slate-900'}`}>
+                      {method.name}
+                    </h3>
+                    {h.ocrCompareResult.recommendation?.best_method === key && (
+                      <span className="px-2 py-1 text-xs font-medium bg-green-500 text-white rounded-full">
+                        Beste
+                      </span>
+                    )}
+                  </div>
+
+                  {method.success ? (
+                    <>
+                      <div className={`text-sm mb-2 ${isDark ? 'text-white/60' : 'text-slate-500'}`}>
+                        <span className="font-medium">{method.vocabulary_count}</span> Vokabeln in <span className="font-medium">{method.duration_seconds}s</span>
+                      </div>
+
+                      {method.vocabulary && method.vocabulary.length > 0 && (
+                        <div className={`max-h-48 overflow-y-auto rounded-xl p-2 ${isDark ? 'bg-black/20' : 'bg-white/50'}`}>
+                          {method.vocabulary.slice(0, 10).map((v: any, idx: number) => (
+                            <div key={idx} className={`text-sm py-1 border-b last:border-0 ${isDark ? 'border-white/10 text-white/80' : 'border-black/5 text-slate-700'}`}>
+                              <span className="font-medium">{v.english}</span> = {v.german}
+                            </div>
+                          ))}
+                          {method.vocabulary.length > 10 && (
+                            <div className={`text-xs mt-2 ${isDark ? 'text-white/40' : 'text-slate-400'}`}>
+                              + {method.vocabulary.length - 10} weitere...
+                            </div>
+                          )}
+                        </div>
+                      )}
+                    </>
+                  ) : (
+                    <div className={`text-sm ${isDark ? 'text-red-300' : 'text-red-600'}`}>
+                      {method.error || 'Fehler'}
+                    </div>
+                  )}
+                </div>
+              ))}
+            </div>
+
+            {/* Comparison Summary */}
+            {h.ocrCompareResult.comparison && (
+              <div className={`p-4 rounded-2xl ${isDark ? 'bg-blue-500/20 border border-blue-500/30' : 'bg-blue-100 border border-blue-200'}`}>
+                <h3 className={`font-semibold mb-3 ${isDark ? 'text-blue-300' : 'text-blue-900'}`}>
+                  Uebereinstimmung
+                </h3>
+                <div className="grid grid-cols-2 md:grid-cols-4 gap-4 text-sm">
+                  <div>
+                    <span className={isDark ? 'text-blue-200' : 'text-blue-700'}>Von allen erkannt:</span>
+                    <span className="ml-2 font-bold">{h.ocrCompareResult.comparison.found_by_all_methods?.length || 0}</span>
+                  </div>
+                  <div>
+                    <span className={isDark ? 'text-blue-200' : 'text-blue-700'}>Nur teilweise:</span>
+                    <span className="ml-2 font-bold">{h.ocrCompareResult.comparison.found_by_some_methods?.length || 0}</span>
+                  </div>
+                  <div>
+                    <span className={isDark ? 'text-blue-200' : 'text-blue-700'}>Gesamt einzigartig:</span>
+                    <span className="ml-2 font-bold">{h.ocrCompareResult.comparison.total_unique_vocabulary || 0}</span>
+                  </div>
+                  <div>
+                    <span className={isDark ? 'text-blue-200' : 'text-blue-700'}>Uebereinstimmung:</span>
+                    <span className="ml-2 font-bold">{Math.round((h.ocrCompareResult.comparison.agreement_rate || 0) * 100)}%</span>
+                  </div>
+                </div>
+              </div>
+            )}
+          </div>
+        )}
+      </div>
+    </div>
+  )
+}
@@ -0,0 +1,125 @@
+'use client'
+
+import React from 'react'
+import type { VocabWorksheetHook } from '../types'
+import { defaultOcrPrompts } from '../constants'
+
+export function OcrSettingsPanel({ h }: { h: VocabWorksheetHook }) {
+  const { isDark, glassCard, glassInput } = h
+
+  return (
+    <div className={`${glassCard} rounded-2xl p-6 mb-6`}>
+      <div className="flex items-center justify-between mb-4">
+        <h2 className={`text-lg font-semibold ${isDark ? 'text-white' : 'text-slate-900'}`}>
+          OCR-Filter Einstellungen
+        </h2>
+        <button
+          onClick={() => h.setShowSettings(false)}
+          className={`p-1 rounded-lg ${isDark ? 'hover:bg-white/10 text-white/60' : 'hover:bg-black/5 text-slate-500'}`}
+        >
+          <svg className="w-5 h-5" fill="none" stroke="currentColor" viewBox="0 0 24 24">
+            <path strokeLinecap="round" strokeLinejoin="round" strokeWidth={2} d="M6 18L18 6M6 6l12 12" />
+          </svg>
+        </button>
+      </div>
+
+      <div className={`p-4 rounded-xl mb-4 ${isDark ? 'bg-blue-500/20 text-blue-200' : 'bg-blue-100 text-blue-800'}`}>
+        <p className="text-sm">
+          Diese Einstellungen helfen, unerwuenschte Elemente wie Seitenzahlen, Kapitelnamen oder Kopfzeilen aus dem OCR-Ergebnis zu filtern.
+        </p>
+      </div>
+
+      <div className="grid grid-cols-1 md:grid-cols-2 gap-6">
+        {/* Checkboxes */}
+        <div className="space-y-3">
+          <label className={`flex items-center gap-3 cursor-pointer ${isDark ? 'text-white' : 'text-slate-900'}`}>
+            <input
+              type="checkbox"
+              checked={h.ocrPrompts.filterHeaders}
+              onChange={(e) => h.saveOcrPrompts({ ...h.ocrPrompts, filterHeaders: e.target.checked })}
+              className="w-5 h-5 rounded border-2 border-purple-500 text-purple-500 focus:ring-purple-500"
+            />
+            <span>Kopfzeilen filtern (z.B. Kapitelnamen)</span>
+          </label>
+
+          <label className={`flex items-center gap-3 cursor-pointer ${isDark ? 'text-white' : 'text-slate-900'}`}>
+            <input
+              type="checkbox"
+              checked={h.ocrPrompts.filterFooters}
+              onChange={(e) => h.saveOcrPrompts({ ...h.ocrPrompts, filterFooters: e.target.checked })}
+              className="w-5 h-5 rounded border-2 border-purple-500 text-purple-500 focus:ring-purple-500"
+            />
+            <span>Fusszeilen filtern</span>
+          </label>
+
+          <label className={`flex items-center gap-3 cursor-pointer ${isDark ? 'text-white' : 'text-slate-900'}`}>
+            <input
+              type="checkbox"
+              checked={h.ocrPrompts.filterPageNumbers}
+              onChange={(e) => h.saveOcrPrompts({ ...h.ocrPrompts, filterPageNumbers: e.target.checked })}
+              className="w-5 h-5 rounded border-2 border-purple-500 text-purple-500 focus:ring-purple-500"
+            />
+            <span>Seitenzahlen filtern (auch ausgeschrieben: &quot;zweihundertzwoelf&quot;)</span>
+          </label>
+        </div>
+
+        {/* Patterns */}
+        <div className="space-y-4">
+          <div>
+            <label className={`block text-sm font-medium mb-2 ${isDark ? 'text-white/70' : 'text-slate-600'}`}>
+              Kopfzeilen-Muster (kommagetrennt)
+            </label>
+            <input
+              type="text"
+              value={h.ocrPrompts.headerPatterns.join(', ')}
+              onChange={(e) => h.saveOcrPrompts({
+                ...h.ocrPrompts,
+                headerPatterns: e.target.value.split(',').map(s => s.trim()).filter(Boolean)
+              })}
+              placeholder="Unit, Chapter, Lesson..."
+              className={`w-full px-4 py-2 rounded-xl border ${glassInput} focus:outline-none focus:ring-2 focus:ring-purple-500`}
+            />
+          </div>
+
+          <div>
+            <label className={`block text-sm font-medium mb-2 ${isDark ? 'text-white/70' : 'text-slate-600'}`}>
+              Fusszeilen-Muster (kommagetrennt)
+            </label>
+            <input
+              type="text"
+              value={h.ocrPrompts.footerPatterns.join(', ')}
+              onChange={(e) => h.saveOcrPrompts({
+                ...h.ocrPrompts,
+                footerPatterns: e.target.value.split(',').map(s => s.trim()).filter(Boolean)
+              })}
+              placeholder="zweihundert, Page, Seite..."
+              className={`w-full px-4 py-2 rounded-xl border ${glassInput} focus:outline-none focus:ring-2 focus:ring-purple-500`}
+            />
+          </div>
+        </div>
+      </div>
+
+      <div className="mt-4">
+        <label className={`block text-sm font-medium mb-2 ${isDark ? 'text-white/70' : 'text-slate-600'}`}>
+          Zusaetzlicher Filter-Prompt (optional)
+        </label>
+        <textarea
+          value={h.ocrPrompts.customFilter}
+          onChange={(e) => h.saveOcrPrompts({ ...h.ocrPrompts, customFilter: e.target.value })}
+          placeholder="z.B.: Ignoriere alle Zeilen, die nur Zahlen oder Buchstaben enthalten..."
+          rows={2}
+          className={`w-full px-4 py-2 rounded-xl border ${glassInput} focus:outline-none focus:ring-2 focus:ring-purple-500 resize-none`}
+        />
+      </div>
+
+      <div className="mt-4 flex justify-end">
+        <button
+          onClick={() => h.saveOcrPrompts(defaultOcrPrompts)}
+          className={`px-4 py-2 rounded-xl text-sm ${isDark ? 'text-white/60 hover:text-white' : 'text-slate-500 hover:text-slate-700'}`}
+        >
+          Auf Standard zuruecksetzen
+        </button>
+      </div>
+    </div>
+  )
+}
@@ -0,0 +1,108 @@
+'use client'
+
+import React from 'react'
+import type { VocabWorksheetHook } from '../types'
+
+export function PageSelection({ h }: { h: VocabWorksheetHook }) {
+  const { isDark, glassCard } = h
+
+  return (
+    <div className={`${glassCard} rounded-2xl p-6`}>
+      <div className="flex items-center justify-between mb-4">
+        <h2 className={`text-lg font-semibold ${isDark ? 'text-white' : 'text-slate-900'}`}>
+          PDF-Seiten auswaehlen ({h.selectedPages.length} von {h.pdfPageCount - h.excludedPages.length} ausgewaehlt)
+        </h2>
+        <div className="flex gap-2">
+          {h.excludedPages.length > 0 && (
+            <button onClick={h.restoreExcludedPages} className={`px-3 py-1 rounded-lg text-sm ${isDark ? 'bg-orange-500/20 text-orange-300 hover:bg-orange-500/30' : 'bg-orange-100 text-orange-700 hover:bg-orange-200'}`}>
+              {h.excludedPages.length} ausgeblendet - wiederherstellen
+            </button>
+          )}
+          <button onClick={h.selectAllPages} className={`px-3 py-1 rounded-lg text-sm transition-colors ${isDark ? 'bg-white/10 hover:bg-white/20 text-white' : 'bg-slate-100 hover:bg-slate-200 text-slate-900'}`}>
+            Alle
+          </button>
+          <button onClick={h.selectNoPages} className={`px-3 py-1 rounded-lg text-sm transition-colors ${isDark ? 'bg-white/10 hover:bg-white/20 text-white' : 'bg-slate-100 hover:bg-slate-200 text-slate-900'}`}>
+            Keine
+          </button>
+        </div>
+      </div>
+
+      <p className={`text-sm mb-4 ${isDark ? 'text-white/60' : 'text-slate-500'}`}>
+        Klicken Sie auf eine Seite um sie auszuwaehlen. Klicken Sie auf das X um leere Seiten auszublenden.
+      </p>
+
+      {h.isLoadingThumbnails ? (
+        <div className="flex items-center justify-center py-12">
+          <div className="w-8 h-8 border-4 border-purple-500 border-t-transparent rounded-full animate-spin" />
+          <span className={`ml-3 ${isDark ? 'text-white/60' : 'text-slate-500'}`}>Lade Seitenvorschau...</span>
+        </div>
+      ) : (
+        <div className="grid grid-cols-2 sm:grid-cols-3 md:grid-cols-4 lg:grid-cols-6 gap-4 mb-6">
+          {h.pagesThumbnails.map((thumb, idx) => {
+            if (h.excludedPages.includes(idx)) return null
+            return (
+              <div key={idx} className="relative group">
+                {/* Exclude/Delete Button */}
+                <button
+                  onClick={(e) => h.excludePage(idx, e)}
+                  className="absolute top-1 left-1 z-10 p-1 rounded-full opacity-0 group-hover:opacity-100 transition-opacity bg-red-500/80 hover:bg-red-600 text-white"
+                  title="Seite ausblenden"
+                >
+                  <svg className="w-3 h-3" fill="none" stroke="currentColor" viewBox="0 0 24 24">
+                    <path strokeLinecap="round" strokeLinejoin="round" strokeWidth={2} d="M6 18L18 6M6 6l12 12" />
+                  </svg>
+                </button>
+
+                {/* OCR Compare Button */}
+                <button
+                  onClick={(e) => { e.stopPropagation(); h.runOcrComparison(idx); }}
+                  className="absolute top-1 right-1 z-10 p-1 rounded-full opacity-0 group-hover:opacity-100 transition-opacity bg-blue-500/80 hover:bg-blue-600 text-white"
+                  title="OCR-Methoden vergleichen"
+                >
+                  <svg className="w-3 h-3" fill="none" stroke="currentColor" viewBox="0 0 24 24">
+                    <path strokeLinecap="round" strokeLinejoin="round" strokeWidth={2} d="M9 19v-6a2 2 0 00-2-2H5a2 2 0 00-2 2v6a2 2 0 002 2h2a2 2 0 002-2zm0 0V9a2 2 0 012-2h2a2 2 0 012 2v10m-6 0a2 2 0 002 2h2a2 2 0 002-2m0 0V5a2 2 0 012-2h2a2 2 0 012 2v14a2 2 0 01-2 2h-2a2 2 0 01-2-2z" />
+                  </svg>
+                </button>
+
+                <button
+                  onClick={() => h.togglePageSelection(idx)}
+                  className={`relative rounded-xl overflow-hidden border-2 transition-all w-full ${
+                    h.selectedPages.includes(idx)
+                      ? 'border-purple-500 ring-2 ring-purple-500/50'
+                      : (isDark ? 'border-white/20 hover:border-white/40' : 'border-slate-200 hover:border-slate-300')
+                  }`}
+                >
+                  <img src={thumb} alt={`Seite ${idx + 1}`} className="w-full h-auto" />
+                  <div className={`absolute bottom-0 left-0 right-0 py-1 text-center text-xs font-medium ${
+                    h.selectedPages.includes(idx)
+                      ? 'bg-purple-500 text-white'
+                      : (isDark ? 'bg-black/60 text-white/80' : 'bg-white/90 text-slate-700')
+                  }`}>
+                    Seite {idx + 1}
+                  </div>
+                  {h.selectedPages.includes(idx) && (
+                    <div className="absolute top-2 right-2 w-6 h-6 bg-purple-500 rounded-full flex items-center justify-center">
+                      <svg className="w-4 h-4 text-white" fill="none" stroke="currentColor" viewBox="0 0 24 24">
+                        <path strokeLinecap="round" strokeLinejoin="round" strokeWidth={2} d="M5 13l4 4L19 7" />
+                      </svg>
+                    </div>
+                  )}
+                </button>
+              </div>
+            )
+          })}
+        </div>
+      )}
+
+      <div className="flex justify-center">
+        <button
+          onClick={h.processSelectedPages}
+          disabled={h.selectedPages.length === 0 || h.isExtracting}
+          className="px-8 py-4 bg-gradient-to-r from-purple-500 to-pink-500 text-white rounded-2xl font-semibold disabled:opacity-50 hover:shadow-xl hover:shadow-purple-500/30 transition-all transform hover:scale-105"
+        >
+          {h.isExtracting ? 'Extrahiere Vokabeln...' : `${h.selectedPages.length} Seiten verarbeiten`}
+        </button>
+      </div>
+    </div>
+  )
+}
@@ -0,0 +1,31 @@
+'use client'
+
+import React from 'react'
+import { QRCodeUpload } from '@/components/QRCodeUpload'
+import type { VocabWorksheetHook } from '../types'
+
+export function QRCodeModal({ h }: { h: VocabWorksheetHook }) {
+  const { isDark } = h
+
+  return (
+    <div className="fixed inset-0 z-50 flex items-center justify-center p-4">
+      <div className="absolute inset-0 bg-black/50 backdrop-blur-sm" onClick={() => h.setShowQRModal(false)} />
+      <div className={`relative w-full max-w-md rounded-3xl ${
+        isDark ? 'bg-slate-900' : 'bg-white'
+      }`}>
+        <QRCodeUpload
+          sessionId={h.uploadSessionId}
+          onClose={() => h.setShowQRModal(false)}
+          onFilesChanged={(files) => {
+            h.setMobileUploadedFiles(files)
+            if (files.length > 0) {
+              h.setSelectedMobileFile(files[files.length - 1])
+              h.setDirectFile(null)
+              h.setSelectedDocumentId(null)
+            }
+          }}
+        />
+      </div>
+    </div>
+  )
+}
@@ -0,0 +1,315 @@
+'use client'
+
+import React from 'react'
+import type { VocabWorksheetHook } from '../types'
+import { formatFileSize } from '../constants'
+
+export function UploadScreen({ h }: { h: VocabWorksheetHook }) {
+  const { isDark, glassCard, glassInput } = h
+
+  return (
+    <div className="space-y-6">
+      {/* Existing Sessions */}
+      {h.existingSessions.length > 0 && (
+        <div className={`${glassCard} rounded-2xl p-6`}>
+          <h2 className={`text-lg font-semibold mb-4 ${isDark ? 'text-white' : 'text-slate-900'}`}>
+            Vorhandene Sessions fortsetzen
+          </h2>
+          {h.isLoadingSessions ? (
+            <div className="flex items-center gap-3 py-4">
+              <div className="w-5 h-5 border-2 border-purple-500 border-t-transparent rounded-full animate-spin" />
+              <span className={isDark ? 'text-white/60' : 'text-slate-500'}>Lade Sessions...</span>
+            </div>
+          ) : (
+            <div className="grid grid-cols-1 md:grid-cols-2 lg:grid-cols-3 gap-4">
+              {h.existingSessions.map((s) => (
+                <div
+                  key={s.id}
+                  className={`${glassCard} p-4 rounded-xl text-left transition-all hover:shadow-lg relative group cursor-pointer ${
+                    isDark ? 'hover:border-purple-400/50' : 'hover:border-purple-400'
+                  }`}
+                  onClick={() => h.resumeSession(s)}
+                >
+                  {/* Delete Button */}
+                  <button
+                    onClick={(e) => h.deleteSession(s.id, e)}
+                    className={`absolute top-2 right-2 p-1.5 rounded-lg opacity-0 group-hover:opacity-100 transition-opacity ${
+                      isDark ? 'hover:bg-red-500/20 text-red-400' : 'hover:bg-red-100 text-red-500'
+                    }`}
+                    title="Session loeschen"
+                  >
+                    <svg className="w-4 h-4" fill="none" stroke="currentColor" viewBox="0 0 24 24">
+                      <path strokeLinecap="round" strokeLinejoin="round" strokeWidth={2} d="M19 7l-.867 12.142A2 2 0 0116.138 21H7.862a2 2 0 01-1.995-1.858L5 7m5 4v6m4-6v6m1-10V4a1 1 0 00-1-1h-4a1 1 0 00-1 1v3M4 7h16" />
+                    </svg>
+                  </button>
+
+                  <div className="flex items-start gap-3">
+                    <div className={`w-10 h-10 rounded-lg flex items-center justify-center flex-shrink-0 ${
+                      s.status === 'extracted' || s.status === 'completed'
+                        ? (isDark ? 'bg-green-500/30' : 'bg-green-100')
+                        : (isDark ? 'bg-white/10' : 'bg-slate-100')
+                    }`}>
+                      {s.status === 'extracted' || s.status === 'completed' ? (
+                        <svg className="w-5 h-5 text-green-500" fill="none" stroke="currentColor" viewBox="0 0 24 24">
+                          <path strokeLinecap="round" strokeLinejoin="round" strokeWidth={2} d="M5 13l4 4L19 7" />
+                        </svg>
+                      ) : (
+                        <svg className={`w-5 h-5 ${isDark ? 'text-white/40' : 'text-slate-400'}`} fill="none" stroke="currentColor" viewBox="0 0 24 24">
+                          <path strokeLinecap="round" strokeLinejoin="round" strokeWidth={2} d="M12 6v6m0 0v6m0-6h6m-6 0H6" />
+                        </svg>
+                      )}
+                    </div>
+                    <div className="flex-1 min-w-0">
+                      <h3 className={`font-medium truncate ${isDark ? 'text-white' : 'text-slate-900'}`}>{s.name}</h3>
+                      <p className={`text-sm ${isDark ? 'text-white/60' : 'text-slate-500'}`}>
+                        {s.vocabulary_count} Vokabeln
+                        {s.status === 'pending' && ' • Nicht gestartet'}
+                        {s.status === 'extracted' && ' • Bereit'}
+                        {s.status === 'completed' && ' • Abgeschlossen'}
+                      </p>
+                      {s.created_at && (
+                        <p className={`text-xs mt-1 ${isDark ? 'text-white/40' : 'text-slate-400'}`}>
+                          {new Date(s.created_at).toLocaleDateString('de-DE', {
+                            day: '2-digit',
+                            month: '2-digit',
+                            year: 'numeric',
+                            hour: '2-digit',
+                            minute: '2-digit'
+                          })}
+                        </p>
+                      )}
+                    </div>
+                    <svg className={`w-5 h-5 flex-shrink-0 ${isDark ? 'text-white/30' : 'text-slate-300'}`} fill="none" stroke="currentColor" viewBox="0 0 24 24">
+                      <path strokeLinecap="round" strokeLinejoin="round" strokeWidth={2} d="M9 5l7 7-7 7" />
+                    </svg>
+                  </div>
+                </div>
+              ))}
+            </div>
+          )}
+        </div>
+      )}
+
+      {/* Explanation */}
+      <div className={`${glassCard} rounded-2xl p-6 ${isDark ? 'bg-gradient-to-br from-purple-500/20 to-pink-500/20' : 'bg-gradient-to-br from-purple-100/50 to-pink-100/50'}`}>
+        <h2 className={`text-lg font-semibold mb-3 ${isDark ? 'text-white' : 'text-slate-900'}`}>
+          {h.existingSessions.length > 0 ? 'Oder neue Session starten:' : 'So funktioniert es:'}
+        </h2>
+        <ol className={`space-y-2 ${isDark ? 'text-white/70' : 'text-slate-600'}`}>
+          {['Dokument (Bild oder PDF) auswaehlen', 'Vorschau pruefen und Session benennen', 'Bei PDFs: Seiten auswaehlen die verarbeitet werden sollen', 'KI extrahiert Vokabeln — pruefen, korrigieren, Arbeitsblatt-Typ waehlen', 'PDF herunterladen und ausdrucken'].map((text, i) => (
+            <li key={i} className="flex items-start gap-2">
+              <span className={`w-6 h-6 rounded-full flex items-center justify-center text-xs font-bold flex-shrink-0 ${isDark ? 'bg-purple-500/30 text-purple-300' : 'bg-purple-200 text-purple-700'}`}>{i + 1}</span>
+              <span>{text}</span>
+            </li>
+          ))}
+        </ol>
+      </div>
+
+      {/* Step 1: Document Selection */}
+      <div className={`${glassCard} rounded-2xl p-6`}>
+        <h2 className={`text-lg font-semibold mb-4 ${isDark ? 'text-white' : 'text-slate-900'}`}>
+          1. Dokument auswaehlen
+        </h2>
+
+        <input ref={h.directFileInputRef} type="file" accept="image/png,image/jpeg,image/jpg,application/pdf" onChange={h.handleDirectFileSelect} className="hidden" />
+
+        <div className="grid grid-cols-2 gap-3 mb-4">
+          {/* File Upload Button */}
+          <button
+            onClick={() => h.directFileInputRef.current?.click()}
+            className={`p-4 rounded-xl border-2 border-dashed transition-all ${
+              h.directFile
+                ? (isDark ? 'border-green-400/50 bg-green-500/20' : 'border-green-500 bg-green-50')
+                : (isDark ? 'border-white/20 hover:border-purple-400/50' : 'border-slate-300 hover:border-purple-500')
+            }`}
+          >
+            {h.directFile ? (
+              <div className="flex items-center gap-3">
+                <span className="text-2xl">{h.directFile.type === 'application/pdf' ? '📄' : '🖼️'}</span>
+                <div className="text-left flex-1 min-w-0">
+                  <p className={`font-medium truncate ${isDark ? 'text-white' : 'text-slate-900'}`}>{h.directFile.name}</p>
+                  <p className={`text-xs ${isDark ? 'text-white/60' : 'text-slate-500'}`}>{formatFileSize(h.directFile.size)}</p>
+                </div>
+                <svg className="w-5 h-5 text-green-500 flex-shrink-0" fill="none" stroke="currentColor" viewBox="0 0 24 24">
+                  <path strokeLinecap="round" strokeLinejoin="round" strokeWidth={2} d="M5 13l4 4L19 7" />
+                </svg>
+              </div>
+            ) : (
+              <div className={`text-center ${isDark ? 'text-white/60' : 'text-slate-500'}`}>
+                <span className="text-2xl block mb-1">📁</span>
+                <span className="text-sm">Datei auswaehlen</span>
+              </div>
+            )}
+          </button>
+
+          {/* QR Code Upload Button */}
+          <button
+            onClick={() => h.setShowQRModal(true)}
+            className={`p-4 rounded-xl border-2 border-dashed transition-all ${
+              h.selectedMobileFile
+                ? (isDark ? 'border-green-400/50 bg-green-500/20' : 'border-green-500 bg-green-50')
+                : (isDark ? 'border-white/20 hover:border-purple-400/50' : 'border-slate-300 hover:border-purple-500')
+            }`}
+          >
+            {h.selectedMobileFile ? (
+              <div className="flex items-center gap-3">
+                <span className="text-2xl">{h.selectedMobileFile.type.startsWith('image/') ? '🖼️' : '📄'}</span>
+                <div className="text-left flex-1 min-w-0">
+                  <p className={`font-medium truncate text-sm ${isDark ? 'text-white' : 'text-slate-900'}`}>{h.selectedMobileFile.name}</p>
+                  <p className={`text-xs ${isDark ? 'text-white/60' : 'text-slate-500'}`}>vom Handy</p>
+                </div>
+                <svg className="w-5 h-5 text-green-500 flex-shrink-0" fill="none" stroke="currentColor" viewBox="0 0 24 24">
+                  <path strokeLinecap="round" strokeLinejoin="round" strokeWidth={2} d="M5 13l4 4L19 7" />
+                </svg>
+              </div>
+            ) : (
+              <div className={`text-center ${isDark ? 'text-white/60' : 'text-slate-500'}`}>
+                <span className="text-2xl block mb-1">📱</span>
+                <span className="text-sm">Mit Handy scannen</span>
+              </div>
+            )}
+          </button>
+        </div>
+
+        {/* Mobile Uploaded Files */}
+        {h.mobileUploadedFiles.length > 0 && !h.directFile && (
+          <>
+            <div className={`text-center text-sm mb-3 ${isDark ? 'text-white/40' : 'text-slate-400'}`}>— Vom Handy hochgeladen —</div>
+            <div className="space-y-2 max-h-32 overflow-y-auto mb-4">
+              {h.mobileUploadedFiles.map((file) => (
+                <button
+                  key={file.id}
+                  onClick={() => { h.setSelectedMobileFile(file); h.setDirectFile(null); h.setSelectedDocumentId(null); h.setError(null) }}
+                  className={`w-full flex items-center gap-3 p-3 rounded-xl text-left transition-all ${
+                    h.selectedMobileFile?.id === file.id
+                      ? (isDark ? 'bg-green-500/30 border-2 border-green-400/50' : 'bg-green-100 border-2 border-green-500')
+                      : (isDark ? 'bg-white/5 border-2 border-transparent hover:border-white/20' : 'bg-slate-50 border-2 border-transparent hover:border-slate-200')
+                  }`}
+                >
+                  <span className="text-xl">{file.type.startsWith('image/') ? '🖼️' : '📄'}</span>
+                  <div className="flex-1 min-w-0">
+                    <p className={`font-medium truncate ${isDark ? 'text-white' : 'text-slate-900'}`}>{file.name}</p>
+                    <p className={`text-xs ${isDark ? 'text-white/60' : 'text-slate-500'}`}>{formatFileSize(file.size)}</p>
+                  </div>
+                  {h.selectedMobileFile?.id === file.id && (
+                    <svg className="w-5 h-5 text-green-500" fill="none" stroke="currentColor" viewBox="0 0 24 24">
+                      <path strokeLinecap="round" strokeLinejoin="round" strokeWidth={2} d="M5 13l4 4L19 7" />
+                    </svg>
+                  )}
+                </button>
+              ))}
+            </div>
+          </>
+        )}
+
+        {/* Stored Documents */}
+        {h.storedDocuments.length > 0 && !h.directFile && !h.selectedMobileFile && (
+          <>
+            <div className={`text-center text-sm mb-3 ${isDark ? 'text-white/40' : 'text-slate-400'}`}>— oder aus Ihren Dokumenten —</div>
+            <div className="space-y-2 max-h-32 overflow-y-auto">
+              {h.storedDocuments.map((doc) => (
+                <button
+                  key={doc.id}
+                  onClick={() => { h.setSelectedDocumentId(doc.id); h.setDirectFile(null); h.setSelectedMobileFile(null); h.setError(null) }}
+                  className={`w-full flex items-center gap-3 p-3 rounded-xl text-left transition-all ${
+                    h.selectedDocumentId === doc.id
+                      ? (isDark ? 'bg-purple-500/30 border-2 border-purple-400/50' : 'bg-purple-100 border-2 border-purple-500')
+                      : (isDark ? 'bg-white/5 border-2 border-transparent hover:border-white/20' : 'bg-slate-50 border-2 border-transparent hover:border-slate-200')
+                  }`}
+                >
+                  <span className="text-xl">{doc.type === 'application/pdf' ? '📄' : '🖼️'}</span>
+                  <div className="flex-1 min-w-0">
+                    <p className={`font-medium truncate ${isDark ? 'text-white' : 'text-slate-900'}`}>{doc.name}</p>
+                    <p className={`text-xs ${isDark ? 'text-white/60' : 'text-slate-500'}`}>{formatFileSize(doc.size)}</p>
+                  </div>
+                  {h.selectedDocumentId === doc.id && (
+                    <svg className="w-5 h-5 text-purple-500" fill="none" stroke="currentColor" viewBox="0 0 24 24">
+                      <path strokeLinecap="round" strokeLinejoin="round" strokeWidth={2} d="M5 13l4 4L19 7" />
+                    </svg>
+                  )}
+                </button>
+              ))}
+            </div>
+          </>
+        )}
+      </div>
+
+      {/* Step 2: Preview + Session Name */}
+      {(h.directFile || h.selectedMobileFile || h.selectedDocumentId) && (
+        <div className="grid grid-cols-1 lg:grid-cols-5 gap-6">
+          {/* Document Preview */}
+          <div className={`${glassCard} rounded-2xl p-6 lg:col-span-3`}>
+            <div className="flex items-center justify-between mb-4">
+              <h2 className={`text-lg font-semibold ${isDark ? 'text-white' : 'text-slate-900'}`}>
+                Vorschau
+              </h2>
+              <button
+                onClick={() => h.setShowFullPreview(true)}
+                className={`px-3 py-1.5 rounded-lg text-sm font-medium transition-all flex items-center gap-2 ${
+                  isDark ? 'bg-white/10 hover:bg-white/20 text-white' : 'bg-slate-100 hover:bg-slate-200 text-slate-700'
+                }`}
+              >
+                <svg className="w-4 h-4" fill="none" stroke="currentColor" viewBox="0 0 24 24">
+                  <path strokeLinecap="round" strokeLinejoin="round" strokeWidth={2} d="M21 21l-6-6m2-5a7 7 0 11-14 0 7 7 0 0114 0zM10 7v3m0 0v3m0-3h3m-3 0H7" />
+                </svg>
+                Originalgroesse
+              </button>
+            </div>
+            <div className={`max-h-[60vh] overflow-auto rounded-xl border ${isDark ? 'border-white/10' : 'border-black/10'}`}>
+              {h.directFile?.type.startsWith('image/') && h.directFilePreview && (
+                <img src={h.directFilePreview} alt="Vorschau" className="w-full h-auto" />
+              )}
+              {h.directFile?.type === 'application/pdf' && h.directFilePreview && (
+                <iframe src={h.directFilePreview} className="w-full border-0 rounded-xl" style={{ height: '60vh' }} />
+              )}
+              {h.selectedMobileFile && !h.directFile && (
+                h.selectedMobileFile.type.startsWith('image/')
+                  ? <img src={h.selectedMobileFile.dataUrl} alt="Vorschau" className="w-full h-auto" />
+                  : <iframe src={h.selectedMobileFile.dataUrl} className="w-full border-0 rounded-xl" style={{ height: '60vh' }} />
+              )}
+              {h.selectedDocumentId && !h.directFile && !h.selectedMobileFile && (() => {
+                const doc = h.storedDocuments.find(d => d.id === h.selectedDocumentId)
+                if (!doc?.url) return <p className={`p-8 text-center ${isDark ? 'text-white/40' : 'text-slate-400'}`}>Keine Vorschau verfuegbar</p>
+                return doc.type.startsWith('image/')
+                  ? <img src={doc.url} alt="Vorschau" className="w-full h-auto" />
+                  : <iframe src={doc.url} className="w-full border-0 rounded-xl" style={{ height: '60vh' }} />
+              })()}
+            </div>
+          </div>
+
+          {/* Session Name + Start */}
+          <div className={`${glassCard} rounded-2xl p-6 lg:col-span-2 flex flex-col`}>
+            <h2 className={`text-lg font-semibold mb-4 ${isDark ? 'text-white' : 'text-slate-900'}`}>
+              2. Session benennen
+            </h2>
+            <input
+              type="text"
+              value={h.sessionName}
+              onChange={(e) => { h.setSessionName(e.target.value); h.setError(null) }}
+              placeholder="z.B. Englisch Klasse 7 - Unit 3"
+              className={`w-full px-4 py-3 rounded-xl border ${glassInput} focus:outline-none focus:ring-2 focus:ring-purple-500 mb-4`}
+              autoFocus
+            />
+            <p className={`text-sm mb-6 ${isDark ? 'text-white/50' : 'text-slate-500'}`}>
+              Benennen Sie die Session z.B. nach dem Schulbuch-Kapitel, damit Sie sie spaeter wiederfinden.
+            </p>
+            <div className="flex-1" />
+            <button
+              onClick={() => {
+                if (!h.sessionName.trim()) {
+                  h.setError('Bitte geben Sie einen Session-Namen ein (z.B. "Englisch Klasse 7 - Unit 3")')
+                  return
+                }
+                h.startSession()
+              }}
+              disabled={h.isCreatingSession || !h.sessionName.trim()}
+              className="w-full px-6 py-4 bg-gradient-to-r from-purple-500 to-pink-500 text-white rounded-2xl font-semibold text-lg disabled:opacity-50 hover:shadow-xl hover:shadow-purple-500/30 transition-all transform hover:scale-105"
+            >
+              {h.isCreatingSession ? 'Verarbeite...' : 'Weiter →'}
+            </button>
+          </div>
+        </div>
+      )}
+    </div>
+  )
+}
@@ -0,0 +1,305 @@
+'use client'
+
+import React from 'react'
+import type { VocabWorksheetHook, IpaMode, SyllableMode } from '../types'
+import { getApiBase } from '../constants'
+
+export function VocabularyTab({ h }: { h: VocabWorksheetHook }) {
+  const { isDark, glassCard, glassInput } = h
+  const extras = h.getAllExtraColumns()
+  const baseCols = 3 + extras.length
+  const gridCols = `14px 32px 36px repeat(${baseCols}, 1fr) 32px`
+
+  return (
+    <div className="flex flex-col lg:flex-row gap-4" style={{ height: 'calc(100vh - 240px)', minHeight: '500px' }}>
+      {/* Left: Original pages */}
+      <div className={`${glassCard} rounded-2xl p-4 lg:w-1/3 flex flex-col overflow-hidden`}>
+        <h2 className={`text-sm font-semibold mb-3 flex-shrink-0 ${isDark ? 'text-white/70' : 'text-slate-600'}`}>
+          Original ({(() => { const pp = h.selectedPages.length > 0 ? h.selectedPages : [...new Set(h.vocabulary.map(v => (v.source_page || 1) - 1))]; return pp.length; })()} Seiten)
+        </h2>
+        <div className="flex-1 overflow-y-auto space-y-3">
+          {(() => {
+            const processedPageIndices = h.selectedPages.length > 0
+              ? h.selectedPages
+              : [...new Set(h.vocabulary.map(v => (v.source_page || 1) - 1))].sort((a, b) => a - b)
+
+            const apiBase = getApiBase()
+            const pagesToShow = processedPageIndices
+              .filter(idx => idx >= 0)
+              .map(idx => ({
+                idx,
+                src: h.session ? `${apiBase}/api/v1/vocab/sessions/${h.session.id}/pdf-page-image/${idx}` : null,
+              }))
+              .filter(t => t.src !== null) as { idx: number; src: string }[]
+
+            if (pagesToShow.length > 0) {
+              return pagesToShow.map(({ idx, src }) => (
+                <div key={idx} className={`relative rounded-xl overflow-hidden border ${isDark ? 'border-white/10' : 'border-black/10'}`}>
+                  <div className={`absolute top-2 left-2 px-2 py-0.5 rounded-lg text-xs font-medium z-10 ${isDark ? 'bg-black/60 text-white' : 'bg-white/90 text-slate-700'}`}>
+                    S. {idx + 1}
+                  </div>
+                  <img src={src} alt={`Seite ${idx + 1}`} className="w-full h-auto" />
+                </div>
+              ))
+            }
+            if (h.uploadedImage) {
+              return (
+                <div className={`relative rounded-xl overflow-hidden border ${isDark ? 'border-white/10' : 'border-black/10'}`}>
+                  <img src={h.uploadedImage} alt="Arbeitsblatt" className="w-full h-auto" />
+                </div>
+              )
+            }
+            return (
+              <div className={`flex-1 flex items-center justify-center py-12 ${isDark ? 'text-white/40' : 'text-slate-400'}`}>
+                <div className="text-center">
+                  <svg className="w-12 h-12 mx-auto mb-2 opacity-50" fill="none" stroke="currentColor" viewBox="0 0 24 24">
+                    <path strokeLinecap="round" strokeLinejoin="round" strokeWidth={1.5} d="M4 16l4.586-4.586a2 2 0 012.828 0L16 16m-2-2l1.586-1.586a2 2 0 012.828 0L20 14m-6-6h.01M6 20h12a2 2 0 002-2V6a2 2 0 00-2-2H6a2 2 0 00-2 2v12a2 2 0 002 2z" />
+                  </svg>
+                  <p className="text-xs">Kein Bild verfuegbar</p>
+                </div>
+              </div>
+            )
+          })()}
+        </div>
+      </div>
+
+      {/* Right: Vocabulary table */}
+      <div className={`${glassCard} rounded-2xl p-4 lg:w-2/3 flex flex-col overflow-hidden`}>
+        <div className="flex items-center justify-between mb-3 flex-shrink-0">
+          <h2 className={`text-lg font-semibold ${isDark ? 'text-white' : 'text-slate-900'}`}>
+            Vokabeln ({h.vocabulary.length})
+          </h2>
+          <div className="flex items-center gap-2">
+            {/* IPA mode */}
+            <select
+              value={h.ipaMode}
+              onChange={(e) => {
+                const newIpa = e.target.value as IpaMode
+                h.setIpaMode(newIpa)
+                h.reprocessPages(newIpa, h.syllableMode)
+              }}
+              className={`px-2 py-1.5 text-xs rounded-md border ${isDark ? 'border-white/20 bg-white/10 text-white' : 'border-gray-200 bg-white text-gray-600'}`}
+              title="Lautschrift (IPA)"
+            >
+              <option value="none">IPA: Aus</option>
+              <option value="auto">IPA: Auto</option>
+              <option value="en">IPA: nur EN</option>
+              <option value="de">IPA: nur DE</option>
+              <option value="all">IPA: Alle</option>
+            </select>
+            {/* Syllable mode */}
+            <select
+              value={h.syllableMode}
+              onChange={(e) => {
+                const newSyl = e.target.value as SyllableMode
+                h.setSyllableMode(newSyl)
+                h.reprocessPages(h.ipaMode, newSyl)
+              }}
+              className={`px-2 py-1.5 text-xs rounded-md border ${isDark ? 'border-white/20 bg-white/10 text-white' : 'border-gray-200 bg-white text-gray-600'}`}
+              title="Silbentrennung"
+            >
+              <option value="none">Silben: Aus</option>
+              <option value="auto">Silben: Original</option>
+              <option value="en">Silben: nur EN</option>
+              <option value="de">Silben: nur DE</option>
+              <option value="all">Silben: Alle</option>
+            </select>
+            <button onClick={h.saveVocabulary} className={`px-4 py-2 rounded-xl text-sm font-medium transition-colors ${isDark ? 'bg-white/10 hover:bg-white/20 text-white' : 'bg-slate-100 hover:bg-slate-200 text-slate-900'}`}>
+              Speichern
+            </button>
+            <button onClick={() => h.setActiveTab('worksheet')} className="px-4 py-2 rounded-xl text-sm font-medium bg-gradient-to-r from-purple-500 to-pink-500 text-white hover:shadow-lg transition-all">
+              Weiter →
+            </button>
+          </div>
+        </div>
+
+        {/* Error messages for failed pages */}
+        {h.processingErrors.length > 0 && (
+          <div className={`rounded-xl p-3 mb-3 flex-shrink-0 ${isDark ? 'bg-orange-500/20 text-orange-200 border border-orange-500/30' : 'bg-orange-100 text-orange-700 border border-orange-200'}`}>
+            <div className="font-medium mb-1 text-sm">Einige Seiten konnten nicht verarbeitet werden:</div>
+            <ul className="text-xs space-y-0.5">
+              {h.processingErrors.map((err, idx) => (
+                <li key={idx}>• {err}</li>
+              ))}
+            </ul>
+          </div>
+        )}
+
+        {/* Processing Progress */}
+        {h.currentlyProcessingPage && (
+          <div className={`rounded-xl p-3 mb-3 flex-shrink-0 ${isDark ? 'bg-purple-500/20 border border-purple-500/30' : 'bg-purple-100 border border-purple-200'}`}>
+            <div className="flex items-center gap-3">
+              <div className={`w-4 h-4 border-2 ${isDark ? 'border-purple-300' : 'border-purple-600'} border-t-transparent rounded-full animate-spin`} />
+              <div>
+                <div className={`text-sm font-medium ${isDark ? 'text-purple-200' : 'text-purple-700'}`}>Verarbeite Seite {h.currentlyProcessingPage}...</div>
+                <div className={`text-xs ${isDark ? 'text-purple-300/70' : 'text-purple-600'}`}>
+                  {h.successfulPages.length > 0 && `${h.successfulPages.length} Seite(n) fertig • `}
+                  {h.vocabulary.length} Vokabeln bisher
+                </div>
+              </div>
+            </div>
+          </div>
+        )}
+
+        {/* Success info */}
+        {!h.currentlyProcessingPage && h.successfulPages.length > 0 && h.failedPages.length === 0 && (
+          <div className={`rounded-xl p-2 mb-3 text-xs flex-shrink-0 ${isDark ? 'bg-green-500/20 text-green-200 border border-green-500/30' : 'bg-green-100 text-green-700 border border-green-200'}`}>
+            Alle {h.successfulPages.length} Seite(n) erfolgreich verarbeitet - {h.vocabulary.length} Vokabeln insgesamt
+          </div>
+        )}
+
+        {/* Partial success info */}
+        {!h.currentlyProcessingPage && h.successfulPages.length > 0 && h.failedPages.length > 0 && (
+          <div className={`rounded-xl p-2 mb-3 text-xs flex-shrink-0 ${isDark ? 'bg-yellow-500/20 text-yellow-200 border border-yellow-500/30' : 'bg-yellow-100 text-yellow-700 border border-yellow-200'}`}>
+            {h.successfulPages.length} Seite(n) erfolgreich, {h.failedPages.length} fehlgeschlagen - {h.vocabulary.length} Vokabeln extrahiert
+          </div>
+        )}
+
+        {h.vocabulary.length === 0 ? (
+          <p className={`text-center py-8 ${isDark ? 'text-white/60' : 'text-slate-500'}`}>Keine Vokabeln gefunden.</p>
+        ) : (
+          <div className="flex flex-col flex-1 overflow-hidden">
+            {/* Fixed Header */}
+            <div className={`flex-shrink-0 grid gap-1 px-2 py-2 text-sm font-medium border-b items-center ${isDark ? 'border-white/10 text-white/60' : 'border-black/10 text-slate-500'}`} style={{ gridTemplateColumns: gridCols }}>
+              <div>{/* insert-triangle spacer */}</div>
+              <div className="flex items-center justify-center">
+                <input
+                  type="checkbox"
+                  checked={h.vocabulary.length > 0 && h.vocabulary.every(v => v.selected)}
+                  onChange={h.toggleAllSelection}
+                  className="w-4 h-4 rounded border-gray-300 text-purple-600 focus:ring-purple-500 cursor-pointer"
+                  title="Alle auswaehlen"
+                />
+              </div>
+              <div>S.</div>
+              <div>Englisch</div>
+              <div>Deutsch</div>
+              <div>Beispiel</div>
+              {extras.map(col => (
+                <div key={col.key} className="flex items-center gap-1 group">
+                  <span className="truncate">{col.label}</span>
+                  <button
+                    onClick={() => {
+                      const page = Object.entries(h.pageExtraColumns).find(([, cols]) => cols.some(c => c.key === col.key))
+                      if (page) h.removeExtraColumn(Number(page[0]), col.key)
+                    }}
+                    className={`opacity-0 group-hover:opacity-100 transition-opacity ${isDark ? 'text-red-400 hover:text-red-300' : 'text-red-500 hover:text-red-600'}`}
+                    title="Spalte entfernen"
+                  >
+                    <svg className="w-3 h-3" fill="none" stroke="currentColor" viewBox="0 0 24 24"><path strokeLinecap="round" strokeLinejoin="round" strokeWidth={2} d="M6 18L18 6M6 6l12 12" /></svg>
+                  </button>
+                </div>
+              ))}
+              <div className="flex items-center justify-center">
+                <button
+                  onClick={() => h.addExtraColumn(0)}
+                  className={`p-0.5 rounded transition-colors ${isDark ? 'hover:bg-white/10 text-white/40 hover:text-white/70' : 'hover:bg-slate-200 text-slate-400 hover:text-slate-600'}`}
+                  title="Spalte hinzufuegen"
+                >
+                  <svg className="w-4 h-4" fill="none" stroke="currentColor" viewBox="0 0 24 24"><path strokeLinecap="round" strokeLinejoin="round" strokeWidth={2} d="M12 4v16m8-8H4" /></svg>
+                </button>
+              </div>
+            </div>
+
+            {/* Scrollable Content */}
+            <div className="flex-1 overflow-y-auto">
+              {h.vocabulary.map((entry, index) => (
+                <React.Fragment key={entry.id}>
+                  <div className={`grid gap-1 px-2 py-1 items-center ${isDark ? 'hover:bg-white/5' : 'hover:bg-black/5'}`} style={{ gridTemplateColumns: gridCols }}>
+                    <button
+                      onClick={() => h.addVocabularyEntry(index)}
+                      className={`w-3.5 h-3.5 flex items-center justify-center opacity-0 hover:opacity-100 transition-opacity ${isDark ? 'text-purple-400' : 'text-purple-500'}`}
+                      title="Zeile einfuegen"
+                    >
+                      <svg className="w-2.5 h-2.5" viewBox="0 0 10 10" fill="currentColor"><polygon points="0,0 10,5 0,10" /></svg>
+                    </button>
+                    <div className="flex items-center justify-center">
+                      <input
+                        type="checkbox"
+                        checked={entry.selected || false}
+                        onChange={() => h.toggleVocabularySelection(entry.id)}
+                        className="w-4 h-4 rounded border-gray-300 text-purple-600 focus:ring-purple-500 cursor-pointer"
+                      />
+                    </div>
+                    <div className={`flex items-center justify-center text-xs font-medium rounded ${isDark ? 'bg-white/10 text-white/60' : 'bg-black/10 text-slate-600'}`}>
+                      {entry.source_page || '-'}
+                    </div>
+                    <input
+                      type="text"
+                      value={entry.english}
+                      onChange={(e) => h.updateVocabularyEntry(entry.id, 'english', e.target.value)}
+                      className={`px-2 py-1 rounded-lg border text-sm min-w-0 ${glassInput} focus:outline-none focus:ring-1 focus:ring-purple-500`}
+                    />
+                    <input
+                      type="text"
+                      value={entry.german}
+                      onChange={(e) => h.updateVocabularyEntry(entry.id, 'german', e.target.value)}
+                      className={`px-2 py-1 rounded-lg border text-sm min-w-0 ${glassInput} focus:outline-none focus:ring-1 focus:ring-purple-500`}
+                    />
+                    <input
+                      type="text"
+                      value={entry.example_sentence || ''}
+                      onChange={(e) => h.updateVocabularyEntry(entry.id, 'example_sentence', e.target.value)}
+                      placeholder="Beispiel"
+                      className={`px-2 py-1 rounded-lg border text-sm min-w-0 ${glassInput} focus:outline-none focus:ring-1 focus:ring-purple-500`}
+                    />
+                    {extras.map(col => (
+                      <input
+                        key={col.key}
+                        type="text"
+                        value={(entry.extras && entry.extras[col.key]) || ''}
+                        onChange={(e) => h.updateVocabularyEntry(entry.id, col.key, e.target.value)}
+                        placeholder={col.label}
+                        className={`px-2 py-1 rounded-lg border text-sm min-w-0 ${glassInput} focus:outline-none focus:ring-1 focus:ring-purple-500`}
+                      />
+                    ))}
+                    <button onClick={() => h.deleteVocabularyEntry(entry.id)} className={`p-1 rounded-lg ${isDark ? 'hover:bg-red-500/20 text-red-400' : 'hover:bg-red-100 text-red-500'}`}>
+                      <svg className="w-4 h-4" fill="none" stroke="currentColor" viewBox="0 0 24 24">
+                        <path strokeLinecap="round" strokeLinejoin="round" strokeWidth={2} d="M19 7l-.867 12.142A2 2 0 0116.138 21H7.862a2 2 0 01-1.995-1.858L5 7m5 4v6m4-6v6m1-10V4a1 1 0 00-1-1h-4a1 1 0 00-1 1v3M4 7h16" />
+                      </svg>
+                    </button>
+                  </div>
+                </React.Fragment>
+              ))}
+              {/* Final insert triangle */}
+              <div className="px-2 py-1">
+                <button
+                  onClick={() => h.addVocabularyEntry()}
+                  className={`w-3.5 h-3.5 flex items-center justify-center opacity-30 hover:opacity-100 transition-opacity ${isDark ? 'text-purple-400' : 'text-purple-500'}`}
+                  title="Zeile am Ende einfuegen"
+                >
+                  <svg className="w-2.5 h-2.5" viewBox="0 0 10 10" fill="currentColor"><polygon points="0,0 10,5 0,10" /></svg>
+                </button>
+              </div>
+            </div>
+
+            {/* Footer */}
+            <div className={`flex-shrink-0 pt-2 border-t flex items-center justify-between text-xs ${isDark ? 'border-white/10 text-white/50' : 'border-black/10 text-slate-400'}`}>
+              <span>
+                {h.vocabulary.length} Vokabeln
+                {h.vocabulary.filter(v => v.selected).length > 0 && ` (${h.vocabulary.filter(v => v.selected).length} ausgewaehlt)`}
+                {(() => {
+                  const pages = [...new Set(h.vocabulary.map(v => v.source_page).filter(Boolean))].sort((a, b) => (a || 0) - (b || 0))
+                  return pages.length > 1 ? ` • Seiten: ${pages.join(', ')}` : ''
+                })()}
+              </span>
+              <button
+                onClick={() => h.addVocabularyEntry()}
+                className={`px-3 py-1 rounded-lg text-xs flex items-center gap-1 transition-colors ${
+                  isDark
+                    ? 'bg-white/10 hover:bg-white/20 text-white/70'
+                    : 'bg-slate-100 hover:bg-slate-200 text-slate-600'
+                }`}
+              >
+                <svg className="w-3 h-3" fill="none" stroke="currentColor" viewBox="0 0 24 24">
+                  <path strokeLinecap="round" strokeLinejoin="round" strokeWidth={2} d="M12 4v16m8-8H4" />
+                </svg>
+                Zeile
+              </button>
+            </div>
+          </div>
+        )}
+      </div>
+    </div>
+  )
+}
@@ -0,0 +1,155 @@
+'use client'
+
+import React from 'react'
+import type { VocabWorksheetHook } from '../types'
+import { worksheetFormats, worksheetTypes } from '../constants'
+
+export function WorksheetTab({ h }: { h: VocabWorksheetHook }) {
+  const { isDark, glassCard, glassInput } = h
+
+  return (
+    <div className={`${glassCard} rounded-2xl p-6`}>
+      {/* Step 1: Format Selection */}
+      <div className="mb-8">
+        <h2 className={`text-lg font-semibold mb-4 ${isDark ? 'text-white' : 'text-slate-900'}`}>
+          1. Vorlage waehlen
+        </h2>
+        <div className="grid grid-cols-2 gap-4">
+          {worksheetFormats.map((format) => (
+            <button
+              key={format.id}
+              onClick={() => h.setSelectedFormat(format.id)}
+              className={`p-5 rounded-xl border text-left transition-all ${
+                h.selectedFormat === format.id
+                  ? (isDark ? 'border-purple-400/50 bg-purple-500/20 ring-2 ring-purple-500/50' : 'border-purple-500 bg-purple-50 ring-2 ring-purple-500/30')
+                  : (isDark ? 'border-white/20 hover:border-white/40' : 'border-slate-200 hover:border-slate-300')
+              }`}
+            >
+              <div className="flex items-start gap-3">
+                <div className={`w-10 h-10 rounded-lg flex items-center justify-center shrink-0 ${
+                  h.selectedFormat === format.id
+                    ? (isDark ? 'bg-purple-500/30' : 'bg-purple-200')
+                    : (isDark ? 'bg-white/10' : 'bg-slate-100')
+                }`}>
+                  {format.id === 'standard' ? (
+                    <svg className={`w-5 h-5 ${h.selectedFormat === format.id ? 'text-purple-400' : (isDark ? 'text-white/60' : 'text-slate-500')}`} fill="none" stroke="currentColor" viewBox="0 0 24 24">
+                      <path strokeLinecap="round" strokeLinejoin="round" strokeWidth={1.5} d="M9 12h6m-6 4h6m2 5H7a2 2 0 01-2-2V5a2 2 0 012-2h5.586a1 1 0 01.707.293l5.414 5.414a1 1 0 01.293.707V19a2 2 0 01-2 2z" />
+                    </svg>
+                  ) : (
+                    <svg className={`w-5 h-5 ${h.selectedFormat === format.id ? 'text-purple-400' : (isDark ? 'text-white/60' : 'text-slate-500')}`} fill="none" stroke="currentColor" viewBox="0 0 24 24">
+                      <path strokeLinecap="round" strokeLinejoin="round" strokeWidth={1.5} d="M4 5a1 1 0 011-1h14a1 1 0 011 1v2a1 1 0 01-1 1H5a1 1 0 01-1-1V5zM4 13a1 1 0 011-1h6a1 1 0 011 1v6a1 1 0 01-1 1H5a1 1 0 01-1-1v-6zM16 13a1 1 0 011-1h2a1 1 0 011 1v6a1 1 0 01-1 1h-2a1 1 0 01-1-1v-6z" />
+                    </svg>
+                  )}
+                </div>
+                <div className="flex-1">
+                  <div className="flex items-center justify-between">
+                    <span className={`font-medium ${isDark ? 'text-white' : 'text-slate-900'}`}>{format.label}</span>
+                    {h.selectedFormat === format.id && (
+                      <svg className="w-5 h-5 text-purple-500" fill="none" stroke="currentColor" viewBox="0 0 24 24">
+                        <path strokeLinecap="round" strokeLinejoin="round" strokeWidth={2} d="M5 13l4 4L19 7" />
+                      </svg>
+                    )}
+                  </div>
+                  <p className={`text-sm mt-1 ${isDark ? 'text-white/60' : 'text-slate-500'}`}>{format.description}</p>
+                </div>
+              </div>
+            </button>
+          ))}
+        </div>
+      </div>
+
+      {/* Step 2: Configuration */}
+      <div className="mb-6">
+        <h2 className={`text-lg font-semibold mb-4 ${isDark ? 'text-white' : 'text-slate-900'}`}>
+          2. Arbeitsblatt konfigurieren
+        </h2>
+
+        {/* Title */}
+        <div className="mb-6">
+          <label className={`block text-sm font-medium mb-2 ${isDark ? 'text-white/60' : 'text-slate-500'}`}>Titel</label>
+          <input
+            type="text"
+            value={h.worksheetTitle}
+            onChange={(e) => h.setWorksheetTitle(e.target.value)}
+            placeholder="z.B. Vokabeln Unit 3"
+            className={`w-full px-4 py-3 rounded-xl border ${glassInput} focus:outline-none focus:ring-2 focus:ring-purple-500`}
+          />
+        </div>
+
+        {/* Standard format options */}
+        {h.selectedFormat === 'standard' && (
+          <>
+            <div className="mb-6">
+              <label className={`block text-sm font-medium mb-3 ${isDark ? 'text-white/60' : 'text-slate-500'}`}>Arbeitsblatt-Typen</label>
+              <div className="grid grid-cols-2 gap-3">
+                {worksheetTypes.map((type) => (
+                  <button
+                    key={type.id}
+                    onClick={() => h.toggleWorksheetType(type.id)}
+                    className={`p-4 rounded-xl border text-left transition-all ${
+                      h.selectedTypes.includes(type.id)
+                        ? (isDark ? 'border-purple-400/50 bg-purple-500/20' : 'border-purple-500 bg-purple-50')
+                        : (isDark ? 'border-white/20 hover:border-white/40' : 'border-slate-200 hover:border-slate-300')
+                    }`}
+                  >
+                    <div className="flex items-center justify-between">
+                      <span className={`font-medium ${isDark ? 'text-white' : 'text-slate-900'}`}>{type.label}</span>
+                      {h.selectedTypes.includes(type.id) && <svg className="w-5 h-5 text-purple-500" fill="none" stroke="currentColor" viewBox="0 0 24 24"><path strokeLinecap="round" strokeLinejoin="round" strokeWidth={2} d="M5 13l4 4L19 7" /></svg>}
+                    </div>
+                    <p className={`text-sm mt-1 ${isDark ? 'text-white/60' : 'text-slate-500'}`}>{type.description}</p>
+                  </button>
+                ))}
+              </div>
+            </div>
+
+            <div className="grid grid-cols-2 gap-6 mb-6">
+              <div>
+                <label className={`block text-sm font-medium mb-2 ${isDark ? 'text-white/60' : 'text-slate-500'}`}>Zeilenhoehe</label>
+                <select value={h.lineHeight} onChange={(e) => h.setLineHeight(e.target.value)} className={`w-full px-4 py-3 rounded-xl border ${glassInput} focus:outline-none focus:ring-2 focus:ring-purple-500`}>
+                  <option value="normal">Normal</option>
+                  <option value="large">Gross</option>
+                  <option value="extra-large">Extra gross</option>
+                </select>
+              </div>
+              <div className="flex items-center">
+                <label className={`flex items-center gap-3 cursor-pointer ${isDark ? 'text-white' : 'text-slate-900'}`}>
+                  <input type="checkbox" checked={h.includeSolutions} onChange={(e) => h.setIncludeSolutions(e.target.checked)} className="w-5 h-5 rounded border-2 border-purple-500 text-purple-500 focus:ring-purple-500" />
+                  <span>Loesungsblatt erstellen</span>
+                </label>
+              </div>
+            </div>
+          </>
+        )}
+
+        {/* NRU format options */}
+        {h.selectedFormat === 'nru' && (
+          <div className="space-y-4">
+            <div className={`p-4 rounded-xl ${isDark ? 'bg-indigo-500/20 border border-indigo-500/30' : 'bg-indigo-50 border border-indigo-200'}`}>
+              <h4 className={`font-medium mb-2 ${isDark ? 'text-indigo-200' : 'text-indigo-700'}`}>NRU-Format Uebersicht:</h4>
+              <ul className={`text-sm space-y-1 ${isDark ? 'text-indigo-200/80' : 'text-indigo-600'}`}>
+                <li>• <strong>Vokabeln:</strong> 3-Spalten-Tabelle (Englisch | Deutsch leer | Korrektur leer)</li>
+                <li>• <strong>Lernsaetze:</strong> Deutscher Satz + 2 leere Zeilen fuer englische Uebersetzung</li>
+                <li>• Pro gescannter Seite werden 2 Arbeitsblatt-Seiten erzeugt</li>
+              </ul>
+            </div>
+
+            <div className="flex items-center">
+              <label className={`flex items-center gap-3 cursor-pointer ${isDark ? 'text-white' : 'text-slate-900'}`}>
+                <input type="checkbox" checked={h.includeSolutions} onChange={(e) => h.setIncludeSolutions(e.target.checked)} className="w-5 h-5 rounded border-2 border-purple-500 text-purple-500 focus:ring-purple-500" />
+                <span>Loesungsblatt erstellen (mit deutschen Uebersetzungen)</span>
+              </label>
+            </div>
+          </div>
+        )}
+      </div>
+
+      <button
+        onClick={h.generateWorksheet}
+        disabled={(h.selectedFormat === 'standard' && h.selectedTypes.length === 0) || h.isGenerating}
+        className="w-full py-4 bg-gradient-to-r from-purple-500 to-pink-500 text-white rounded-xl font-semibold disabled:opacity-50 hover:shadow-xl hover:shadow-purple-500/30 transition-all"
+      >
+        {h.isGenerating ? 'Generiere PDF...' : `${h.selectedFormat === 'nru' ? 'NRU-Arbeitsblatt' : 'Arbeitsblatt'} generieren`}
+      </button>
+    </div>
+  )
+}
@@ -0,0 +1,56 @@
+import type { OcrPrompts, WorksheetFormat, WorksheetType } from './types'
+
+// API Base URL - dynamisch basierend auf Browser-Host
+// Verwendet /klausur-api/ Proxy um Zertifikat-Probleme zu vermeiden
+export const getApiBase = () => {
+  if (typeof window === 'undefined') return 'http://localhost:8086'
+  const { hostname, protocol } = window.location
+  if (hostname === 'localhost') return 'http://localhost:8086'
+  return `${protocol}//${hostname}/klausur-api`
+}
+
+// LocalStorage Keys
+export const DOCUMENTS_KEY = 'bp_documents'
+export const OCR_PROMPTS_KEY = 'bp_ocr_prompts'
+export const SESSION_ID_KEY = 'bp_upload_session'
+
+// Worksheet format templates
+export const worksheetFormats: { id: WorksheetFormat; label: string; description: string; icon: string }[] = [
+  {
+    id: 'standard',
+    label: 'Standard-Format',
+    description: 'Klassisches Arbeitsblatt mit waehlbarer Uebersetzungsrichtung',
+    icon: 'document'
+  },
+  {
+    id: 'nru',
+    label: 'NRU-Vorlage',
+    description: '3-Spalten-Tabelle (EN|DE|Korrektur) + Lernsaetze mit Uebersetzungszeilen',
+    icon: 'template'
+  },
+]
+
+// Default OCR filtering prompts
+export const defaultOcrPrompts: OcrPrompts = {
+  filterHeaders: true,
+  filterFooters: true,
+  filterPageNumbers: true,
+  customFilter: '',
+  headerPatterns: ['Unit', 'Chapter', 'Lesson', 'Kapitel', 'Lektion'],
+  footerPatterns: ['zweihundert', 'dreihundert', 'vierhundert', 'Page', 'Seite']
+}
+
+export const worksheetTypes: { id: WorksheetType; label: string; description: string }[] = [
+  { id: 'en_to_de', label: 'Englisch → Deutsch', description: 'Englische Woerter uebersetzen' },
+  { id: 'de_to_en', label: 'Deutsch → Englisch', description: 'Deutsche Woerter uebersetzen' },
+  { id: 'copy', label: 'Abschreibuebung', description: 'Woerter mehrfach schreiben' },
+  { id: 'gap_fill', label: 'Lueckensaetze', description: 'Saetze mit Luecken ausfuellen' },
+]
+
+export const formatFileSize = (bytes: number): string => {
+  if (bytes === 0) return '0 B'
+  const k = 1024
+  const sizes = ['B', 'KB', 'MB', 'GB']
+  const i = Math.floor(Math.log(bytes) / Math.log(k))
+  return parseFloat((bytes / Math.pow(k, i)).toFixed(1)) + ' ' + sizes[i]
+}
@@ -0,0 +1,189 @@
+import { UploadedFile } from '@/components/QRCodeUpload'
+
+export interface VocabularyEntry {
+  id: string
+  english: string
+  german: string
+  example_sentence?: string
+  example_sentence_gap?: string
+  word_type?: string
+  source_page?: number
+  selected?: boolean
+  extras?: Record<string, string>
+}
+
+export interface ExtraColumn {
+  key: string
+  label: string
+}
+
+export interface Session {
+  id: string
+  name: string
+  status: string
+  vocabulary_count: number
+  image_path?: string
+  description?: string
+  source_language?: string
+  target_language?: string
+  created_at?: string
+}
+
+export interface StoredDocument {
+  id: string
+  name: string
+  type: string
+  size: number
+  uploadedAt: Date
+  url?: string
+}
+
+export interface OcrPrompts {
+  filterHeaders: boolean
+  filterFooters: boolean
+  filterPageNumbers: boolean
+  customFilter: string
+  headerPatterns: string[]
+  footerPatterns: string[]
+}
+
+export type TabId = 'upload' | 'pages' | 'vocabulary' | 'worksheet' | 'export' | 'settings'
+export type WorksheetType = 'en_to_de' | 'de_to_en' | 'copy' | 'gap_fill'
+export type WorksheetFormat = 'standard' | 'nru'
+export type IpaMode = 'auto' | 'en' | 'de' | 'all' | 'none'
+export type SyllableMode = 'auto' | 'en' | 'de' | 'all' | 'none'
+
+/** Return type of useVocabWorksheet — used as props by all child components */
+export interface VocabWorksheetHook {
+  // Mounted (SSR guard)
+  mounted: boolean
+
+  // Theme
+  isDark: boolean
+  glassCard: string
+  glassInput: string
+
+  // Tab
+  activeTab: TabId
+  setActiveTab: (tab: TabId) => void
+
+  // Session
+  session: Session | null
+  sessionName: string
+  setSessionName: (name: string) => void
+  isCreatingSession: boolean
+  error: string | null
+  setError: (err: string | null) => void
+  extractionStatus: string
+
+  // Existing sessions
+  existingSessions: Session[]
+  isLoadingSessions: boolean
+
+  // Documents
+  storedDocuments: StoredDocument[]
+  selectedDocumentId: string | null
+  setSelectedDocumentId: (id: string | null) => void
+
+  // Direct file
+  directFile: File | null
+  setDirectFile: (f: File | null) => void
+  directFilePreview: string | null
+  showFullPreview: boolean
+  setShowFullPreview: (show: boolean) => void
+  directFileInputRef: React.RefObject<HTMLInputElement | null>
+
+  // PDF pages
+  pdfPageCount: number
+  selectedPages: number[]
+  pagesThumbnails: string[]
+  isLoadingThumbnails: boolean
+  excludedPages: number[]
+
+  // Extra columns
+  pageExtraColumns: Record<number, ExtraColumn[]>
+
+  // Upload
+  uploadedImage: string | null
+  isExtracting: boolean
+
+  // Vocabulary
+  vocabulary: VocabularyEntry[]
+
+  // Worksheet
+  selectedTypes: WorksheetType[]
+  worksheetTitle: string
+  setWorksheetTitle: (title: string) => void
+  includeSolutions: boolean
+  setIncludeSolutions: (inc: boolean) => void
+  lineHeight: string
+  setLineHeight: (lh: string) => void
+  selectedFormat: WorksheetFormat
+  setSelectedFormat: (f: WorksheetFormat) => void
+  ipaMode: IpaMode
+  setIpaMode: (m: IpaMode) => void
+  syllableMode: SyllableMode
+  setSyllableMode: (m: SyllableMode) => void
+
+  // Export
+  worksheetId: string | null
+  isGenerating: boolean
+
+  // Processing
+  processingErrors: string[]
+  successfulPages: number[]
+  failedPages: number[]
+  currentlyProcessingPage: number | null
+
+  // OCR settings
+  ocrPrompts: OcrPrompts
+  showSettings: boolean
+  setShowSettings: (show: boolean) => void
+
+  // QR
+  showQRModal: boolean
+  setShowQRModal: (show: boolean) => void
+  uploadSessionId: string
+  mobileUploadedFiles: UploadedFile[]
+  selectedMobileFile: UploadedFile | null
+  setSelectedMobileFile: (f: UploadedFile | null) => void
+  setMobileUploadedFiles: (files: UploadedFile[]) => void
+
+  // OCR Comparison
+  showOcrComparison: boolean
+  setShowOcrComparison: (show: boolean) => void
+  ocrComparePageIndex: number | null
+  ocrCompareResult: any
+  isComparingOcr: boolean
+  ocrCompareError: string | null
+
+  // Handlers
+  handleDirectFileSelect: (e: React.ChangeEvent<HTMLInputElement>) => void
+  startSession: () => Promise<void>
+  processSelectedPages: () => Promise<void>
+  togglePageSelection: (idx: number) => void
+  selectAllPages: () => void
+  selectNoPages: () => void
+  excludePage: (idx: number, e: React.MouseEvent) => void
+  restoreExcludedPages: () => void
+  runOcrComparison: (pageIdx: number) => Promise<void>
+  updateVocabularyEntry: (id: string, field: string, value: string) => void
+  addExtraColumn: (page: number) => void
+  removeExtraColumn: (page: number, key: string) => void
+  getExtraColumnsForPage: (page: number) => ExtraColumn[]
+  getAllExtraColumns: () => ExtraColumn[]
+  deleteVocabularyEntry: (id: string) => void
+  toggleVocabularySelection: (id: string) => void
+  toggleAllSelection: () => void
+  addVocabularyEntry: (atIndex?: number) => void
+  saveVocabulary: () => Promise<void>
+  generateWorksheet: () => Promise<void>
+  downloadPDF: (type: 'worksheet' | 'solution') => void
+  toggleWorksheetType: (type: WorksheetType) => void
+  resumeSession: (session: Session) => Promise<void>
+  resetSession: () => Promise<void>
+  deleteSession: (id: string, e: React.MouseEvent) => Promise<void>
+  saveOcrPrompts: (prompts: OcrPrompts) => void
+  formatFileSize: (bytes: number) => string
+  reprocessPages: (ipa: IpaMode, syllable: SyllableMode) => void
+}
@@ -0,0 +1,843 @@
+'use client'
+
+import { useState, useRef, useEffect } from 'react'
+import { useTheme } from '@/lib/ThemeContext'
+import { useLanguage } from '@/lib/LanguageContext'
+import { useRouter } from 'next/navigation'
+import { useActivity } from '@/lib/ActivityContext'
+import type { UploadedFile } from '@/components/QRCodeUpload'
+
+import type {
+  VocabularyEntry, ExtraColumn, Session, StoredDocument, OcrPrompts,
+  TabId, WorksheetType, WorksheetFormat, IpaMode, SyllableMode,
+  VocabWorksheetHook,
+} from './types'
+import {
+  getApiBase, DOCUMENTS_KEY, OCR_PROMPTS_KEY, SESSION_ID_KEY,
+  defaultOcrPrompts, formatFileSize,
+} from './constants'
+
+export function useVocabWorksheet(): VocabWorksheetHook {
+  const { isDark } = useTheme()
+  const { t } = useLanguage()
+  const router = useRouter()
+  const { startActivity, completeActivity } = useActivity()
+  const [mounted, setMounted] = useState(false)
+
+  // Tab state
+  const [activeTab, setActiveTab] = useState<TabId>('upload')
+
+  // Session state
+  const [session, setSession] = useState<Session | null>(null)
+  const [sessionName, setSessionName] = useState('')
+  const [isCreatingSession, setIsCreatingSession] = useState(false)
+  const [error, setError] = useState<string | null>(null)
+  const [extractionStatus, setExtractionStatus] = useState<string>('')
+
+  // Existing sessions list
+  const [existingSessions, setExistingSessions] = useState<Session[]>([])
+  const [isLoadingSessions, setIsLoadingSessions] = useState(true)
+
+  // Documents from storage
+  const [storedDocuments, setStoredDocuments] = useState<StoredDocument[]>([])
+  const [selectedDocumentId, setSelectedDocumentId] = useState<string | null>(null)
+
+  // Direct file upload
+  const [directFile, setDirectFile] = useState<File | null>(null)
+  const [directFilePreview, setDirectFilePreview] = useState<string | null>(null)
+  const [showFullPreview, setShowFullPreview] = useState(false)
+  const directFileInputRef = useRef<HTMLInputElement>(null)
+
+  // PDF page selection state
+  const [pdfPageCount, setPdfPageCount] = useState<number>(0)
+  const [selectedPages, setSelectedPages] = useState<number[]>([])
+  const [pagesThumbnails, setPagesThumbnails] = useState<string[]>([])
+  const [isLoadingThumbnails, setIsLoadingThumbnails] = useState(false)
+  const [excludedPages, setExcludedPages] = useState<number[]>([])
+
+  // Dynamic extra columns per source page
+  const [pageExtraColumns, setPageExtraColumns] = useState<Record<number, ExtraColumn[]>>({})
+
+  // Upload state
+  const [uploadedImage, setUploadedImage] = useState<string | null>(null)
+  const [isExtracting, setIsExtracting] = useState(false)
+  const fileInputRef = useRef<HTMLInputElement>(null)
+
+  // Vocabulary state
+  const [vocabulary, setVocabulary] = useState<VocabularyEntry[]>([])
+
+  // Worksheet state
+  const [selectedTypes, setSelectedTypes] = useState<WorksheetType[]>(['en_to_de'])
+  const [worksheetTitle, setWorksheetTitle] = useState('')
+  const [includeSolutions, setIncludeSolutions] = useState(true)
+  const [lineHeight, setLineHeight] = useState('normal')
+  const [selectedFormat, setSelectedFormat] = useState<WorksheetFormat>('standard')
+  const [ipaMode, setIpaMode] = useState<IpaMode>('none')
+  const [syllableMode, setSyllableMode] = useState<SyllableMode>('none')
+
+  // Export state
+  const [worksheetId, setWorksheetId] = useState<string | null>(null)
+  const [isGenerating, setIsGenerating] = useState(false)
+
+  // Processing results
+  const [processingErrors, setProcessingErrors] = useState<string[]>([])
+  const [successfulPages, setSuccessfulPages] = useState<number[]>([])
+  const [failedPages, setFailedPages] = useState<number[]>([])
+  const [currentlyProcessingPage, setCurrentlyProcessingPage] = useState<number | null>(null)
+  const [processingQueue, setProcessingQueue] = useState<number[]>([])
+
+  // OCR Prompts/Settings
+  const [ocrPrompts, setOcrPrompts] = useState<OcrPrompts>(defaultOcrPrompts)
+  const [showSettings, setShowSettings] = useState(false)
+
+  // QR Code Upload
+  const [showQRModal, setShowQRModal] = useState(false)
+  const [uploadSessionId, setUploadSessionId] = useState('')
+  const [mobileUploadedFiles, setMobileUploadedFiles] = useState<UploadedFile[]>([])
+  const [selectedMobileFile, setSelectedMobileFile] = useState<UploadedFile | null>(null)
+
+  // OCR Comparison
+  const [showOcrComparison, setShowOcrComparison] = useState(false)
+  const [ocrComparePageIndex, setOcrComparePageIndex] = useState<number | null>(null)
+  const [ocrCompareResult, setOcrCompareResult] = useState<any>(null)
+  const [isComparingOcr, setIsComparingOcr] = useState(false)
+  const [ocrCompareError, setOcrCompareError] = useState<string | null>(null)
+
+  // --- Effects ---
+
+  // SSR Safety
+  useEffect(() => {
+    setMounted(true)
+    let storedSessionId = localStorage.getItem(SESSION_ID_KEY)
+    if (!storedSessionId) {
+      storedSessionId = `vocab-${Date.now()}-${Math.random().toString(36).substr(2, 9)}`
+      localStorage.setItem(SESSION_ID_KEY, storedSessionId)
+    }
+    setUploadSessionId(storedSessionId)
+  }, [])
+
+  // Load OCR prompts from localStorage
+  useEffect(() => {
+    if (!mounted) return
+    const stored = localStorage.getItem(OCR_PROMPTS_KEY)
+    if (stored) {
+      try {
+        setOcrPrompts({ ...defaultOcrPrompts, ...JSON.parse(stored) })
+      } catch (e) {
+        console.error('Failed to parse OCR prompts:', e)
+      }
+    }
+  }, [mounted])
+
+  // Load documents from localStorage
+  useEffect(() => {
+    if (!mounted) return
+    const stored = localStorage.getItem(DOCUMENTS_KEY)
+    if (stored) {
+      try {
+        const docs = JSON.parse(stored)
+        const imagesDocs = docs.filter((d: StoredDocument) =>
+          d.type?.startsWith('image/') || d.type === 'application/pdf'
+        )
+        setStoredDocuments(imagesDocs)
+      } catch (e) {
+        console.error('Failed to parse stored documents:', e)
+      }
+    }
+  }, [mounted])
+
+  // Load existing sessions from API
+  useEffect(() => {
+    if (!mounted) return
+    const loadSessions = async () => {
+      const API_BASE = getApiBase()
+      try {
+        const res = await fetch(`${API_BASE}/api/v1/vocab/sessions`)
+        if (res.ok) {
+          const sessions = await res.json()
+          setExistingSessions(sessions)
+        }
+      } catch (e) {
+        console.error('Failed to load sessions:', e)
+      } finally {
+        setIsLoadingSessions(false)
+      }
+    }
+    loadSessions()
+  }, [mounted])
+
+  // --- Glassmorphism styles ---
+
+  const glassCard = isDark
+    ? 'backdrop-blur-xl bg-white/10 border border-white/20'
+    : 'backdrop-blur-xl bg-white/70 border border-black/10'
+
+  const glassInput = isDark
+    ? 'bg-white/10 border-white/20 text-white placeholder-white/40 focus:border-purple-400'
+    : 'bg-white/50 border-black/10 text-slate-900 placeholder-slate-400 focus:border-purple-500'
+
+  // --- Handlers ---
+
+  const saveOcrPrompts = (prompts: OcrPrompts) => {
+    setOcrPrompts(prompts)
+    localStorage.setItem(OCR_PROMPTS_KEY, JSON.stringify(prompts))
+  }
+
+  const handleDirectFileSelect = (e: React.ChangeEvent<HTMLInputElement>) => {
+    const file = e.target.files?.[0]
+    if (!file) return
+
+    setDirectFile(file)
+    setSelectedDocumentId(null)
+    setSelectedMobileFile(null)
+
+    if (file.type.startsWith('image/')) {
+      const reader = new FileReader()
+      reader.onload = (ev) => {
+        setDirectFilePreview(ev.target?.result as string)
+      }
+      reader.readAsDataURL(file)
+    } else if (file.type === 'application/pdf') {
+      setDirectFilePreview(URL.createObjectURL(file))
+    } else {
+      setDirectFilePreview(null)
+    }
+  }
+
+  const startSession = async () => {
+    if (!sessionName.trim()) {
+      setError('Bitte geben Sie einen Namen fuer die Session ein.')
+      return
+    }
+    if (!selectedDocumentId && !directFile && !selectedMobileFile) {
+      setError('Bitte waehlen Sie ein Dokument aus oder laden Sie eine Datei hoch.')
+      return
+    }
+
+    setError(null)
+    setIsCreatingSession(true)
+    setExtractionStatus('Session wird erstellt...')
+
+    const API_BASE = getApiBase()
+
+    try {
+      const sessionRes = await fetch(`${API_BASE}/api/v1/vocab/sessions`, {
+        method: 'POST',
+        headers: { 'Content-Type': 'application/json' },
+        body: JSON.stringify({
+          name: sessionName,
+          ocr_prompts: ocrPrompts
+        }),
+      })
+
+      if (!sessionRes.ok) {
+        throw new Error('Session konnte nicht erstellt werden')
+      }
+
+      const sessionData = await sessionRes.json()
+      setSession(sessionData)
+      setWorksheetTitle(sessionName)
+
+      startActivity('vocab_extraction', { description: sessionName })
+
+      let file: File
+      let isPdf = false
+
+      if (directFile) {
+        file = directFile
+        isPdf = directFile.type === 'application/pdf'
+      } else if (selectedMobileFile) {
+        isPdf = selectedMobileFile.type === 'application/pdf'
+        const base64Data = selectedMobileFile.dataUrl.split(',')[1]
+        const byteCharacters = atob(base64Data)
+        const byteNumbers = new Array(byteCharacters.length)
+        for (let i = 0; i < byteCharacters.length; i++) {
+          byteNumbers[i] = byteCharacters.charCodeAt(i)
+        }
+        const byteArray = new Uint8Array(byteNumbers)
+        const blob = new Blob([byteArray], { type: selectedMobileFile.type })
+        file = new File([blob], selectedMobileFile.name, { type: selectedMobileFile.type })
+      } else {
+        const selectedDoc = storedDocuments.find(d => d.id === selectedDocumentId)
+        if (!selectedDoc || !selectedDoc.url) {
+          throw new Error('Das ausgewaehlte Dokument ist nicht verfuegbar.')
+        }
+
+        isPdf = selectedDoc.type === 'application/pdf'
+
+        const base64Data = selectedDoc.url.split(',')[1]
+        const byteCharacters = atob(base64Data)
+        const byteNumbers = new Array(byteCharacters.length)
+        for (let i = 0; i < byteCharacters.length; i++) {
+          byteNumbers[i] = byteCharacters.charCodeAt(i)
+        }
+        const byteArray = new Uint8Array(byteNumbers)
+        const blob = new Blob([byteArray], { type: selectedDoc.type })
+        file = new File([blob], selectedDoc.name, { type: selectedDoc.type })
+      }
+
+      if (isPdf) {
+        setExtractionStatus('PDF wird hochgeladen...')
+
+        const formData = new FormData()
+        formData.append('file', file)
+
+        const pdfInfoRes = await fetch(`${API_BASE}/api/v1/vocab/sessions/${sessionData.id}/upload-pdf-info`, {
+          method: 'POST',
+          body: formData,
+        })
+
+        if (!pdfInfoRes.ok) {
+          throw new Error('PDF konnte nicht verarbeitet werden')
+        }
+
+        const pdfInfo = await pdfInfoRes.json()
+        setPdfPageCount(pdfInfo.page_count)
+        setSelectedPages(Array.from({ length: pdfInfo.page_count }, (_, i) => i))
+
+        setActiveTab('pages')
+        setExtractionStatus(`${pdfInfo.page_count} Seiten erkannt. Vorschau wird geladen...`)
+        setIsLoadingThumbnails(true)
+
+        const thumbnails: string[] = []
+        for (let i = 0; i < pdfInfo.page_count; i++) {
+          try {
+            const thumbRes = await fetch(`${API_BASE}/api/v1/vocab/sessions/${sessionData.id}/pdf-thumbnail/${i}?hires=true`)
+            if (thumbRes.ok) {
+              const blob = await thumbRes.blob()
+              thumbnails.push(URL.createObjectURL(blob))
+            }
+          } catch (e) {
+            console.error(`Failed to load thumbnail for page ${i}`)
+          }
+        }
+
+        setPagesThumbnails(thumbnails)
+        setIsLoadingThumbnails(false)
+        setExtractionStatus(`${pdfInfo.page_count} Seiten bereit. Waehlen Sie die zu verarbeitenden Seiten.`)
+
+      } else {
+        setExtractionStatus('KI analysiert das Bild... (kann 30-60 Sekunden dauern)')
+
+        const formData = new FormData()
+        formData.append('file', file)
+
+        const uploadRes = await fetch(`${API_BASE}/api/v1/vocab/sessions/${sessionData.id}/upload`, {
+          method: 'POST',
+          body: formData,
+        })
+
+        if (!uploadRes.ok) {
+          throw new Error('Bild konnte nicht verarbeitet werden')
+        }
+
+        const uploadData = await uploadRes.json()
+        setSession(prev => prev ? { ...prev, status: 'extracted', vocabulary_count: uploadData.vocabulary_count } : null)
+
+        const vocabRes = await fetch(`${API_BASE}/api/v1/vocab/sessions/${sessionData.id}/vocabulary`)
+        if (vocabRes.ok) {
+          const vocabData = await vocabRes.json()
+          setVocabulary(vocabData.vocabulary || [])
+          setExtractionStatus(`${vocabData.vocabulary?.length || 0} Vokabeln gefunden!`)
+        }
+
+        await new Promise(r => setTimeout(r, 1000))
+        setActiveTab('vocabulary')
+      }
+
+    } catch (error) {
+      console.error('Session start failed:', error)
+      setError(error instanceof Error ? error.message : 'Ein Fehler ist aufgetreten')
+      setExtractionStatus('')
+      setSession(null)
+    } finally {
+      setIsCreatingSession(false)
+    }
+  }
+
+  const processSinglePage = async (pageIndex: number, ipa: IpaMode, syllable: SyllableMode): Promise<{ success: boolean; vocabulary: VocabularyEntry[]; error?: string }> => {
+    const API_BASE = getApiBase()
+
+    try {
+      const res = await fetch(`${API_BASE}/api/v1/vocab/sessions/${session!.id}/process-single-page/${pageIndex}?ipa_mode=${ipa}&syllable_mode=${syllable}`, {
+        method: 'POST',
+        headers: { 'Content-Type': 'application/json' },
+        body: JSON.stringify({ ocr_prompts: ocrPrompts }),
+      })
+
+      if (!res.ok) {
+        const errBody = await res.json().catch(() => ({}))
+        const detail = errBody.detail || `HTTP ${res.status}`
+        return { success: false, vocabulary: [], error: `Seite ${pageIndex + 1}: ${detail}` }
+      }
+
+      const data = await res.json()
+
+      if (!data.success) {
+        return { success: false, vocabulary: [], error: data.error || `Seite ${pageIndex + 1}: Unbekannter Fehler` }
+      }
+
+      return { success: true, vocabulary: data.vocabulary || [] }
+    } catch (e) {
+      return { success: false, vocabulary: [], error: `Seite ${pageIndex + 1}: ${e instanceof Error ? e.message : 'Netzwerkfehler'}` }
+    }
+  }
+
+  const processSelectedPages = async () => {
+    if (!session || selectedPages.length === 0) return
+
+    const pagesToProcess = [...selectedPages].sort((a, b) => a - b)
+
+    setIsExtracting(true)
+    setProcessingErrors([])
+    setSuccessfulPages([])
+    setFailedPages([])
+    setProcessingQueue(pagesToProcess)
+    setVocabulary([])
+
+    setActiveTab('vocabulary')
+
+    const API_BASE = getApiBase()
+    const errors: string[] = []
+    const successful: number[] = []
+    const failed: number[] = []
+
+    for (let i = 0; i < pagesToProcess.length; i++) {
+      const pageIndex = pagesToProcess[i]
+      setCurrentlyProcessingPage(pageIndex + 1)
+      setExtractionStatus(`Verarbeite Seite ${pageIndex + 1} von ${pagesToProcess.length}... (kann 30-60 Sekunden dauern)`)
+
+      const result = await processSinglePage(pageIndex, ipaMode, syllableMode)
+
+      if (result.success) {
+        successful.push(pageIndex + 1)
+        setSuccessfulPages([...successful])
+        setVocabulary(prev => [...prev, ...result.vocabulary])
+        setExtractionStatus(`Seite ${pageIndex + 1} fertig: ${result.vocabulary.length} Vokabeln gefunden`)
+      } else {
+        failed.push(pageIndex + 1)
+        setFailedPages([...failed])
+        if (result.error) {
+          errors.push(result.error)
+          setProcessingErrors([...errors])
+        }
+        setExtractionStatus(`Seite ${pageIndex + 1} fehlgeschlagen`)
+      }
+
+      await new Promise(r => setTimeout(r, 500))
+    }
+
+    setCurrentlyProcessingPage(null)
+    setProcessingQueue([])
+    setIsExtracting(false)
+
+    if (successful.length === pagesToProcess.length) {
+      setExtractionStatus(`Fertig! Alle ${successful.length} Seiten verarbeitet.`)
+    } else if (successful.length > 0) {
+      setExtractionStatus(`${successful.length} von ${pagesToProcess.length} Seiten verarbeitet. ${failed.length} fehlgeschlagen.`)
+    } else {
+      setExtractionStatus(`Alle Seiten fehlgeschlagen.`)
+    }
+
+    // Reload thumbnails for processed pages (server may have rotated them)
+    if (successful.length > 0 && session) {
+      const updatedThumbs = [...pagesThumbnails]
+      for (const pageNum of successful) {
+        const idx = pageNum - 1
+        try {
+          const thumbRes = await fetch(`${API_BASE}/api/v1/vocab/sessions/${session.id}/pdf-thumbnail/${idx}?hires=true&t=${Date.now()}`)
+          if (thumbRes.ok) {
+            const blob = await thumbRes.blob()
+            if (updatedThumbs[idx]) URL.revokeObjectURL(updatedThumbs[idx])
+            updatedThumbs[idx] = URL.createObjectURL(blob)
+          }
+        } catch (e) {
+          console.error(`Failed to refresh thumbnail for page ${pageNum}`)
+        }
+      }
+      setPagesThumbnails(updatedThumbs)
+    }
+
+    setSession(prev => prev ? { ...prev, status: 'extracted' } : null)
+  }
+
+  const togglePageSelection = (pageIndex: number) => {
+    setSelectedPages(prev =>
+      prev.includes(pageIndex)
+        ? prev.filter(p => p !== pageIndex)
+        : [...prev, pageIndex].sort((a, b) => a - b)
+    )
+  }
+
+  const selectAllPages = () => setSelectedPages(
+    Array.from({ length: pdfPageCount }, (_, i) => i).filter(p => !excludedPages.includes(p))
+  )
+  const selectNoPages = () => setSelectedPages([])
+
+  const excludePage = (pageIndex: number, e: React.MouseEvent) => {
+    e.stopPropagation()
+    setExcludedPages(prev => [...prev, pageIndex])
+    setSelectedPages(prev => prev.filter(p => p !== pageIndex))
+  }
+
+  const restoreExcludedPages = () => {
+    setExcludedPages([])
+  }
+
+  const runOcrComparison = async (pageIndex: number) => {
+    if (!session) return
+
+    setOcrComparePageIndex(pageIndex)
+    setShowOcrComparison(true)
+    setIsComparingOcr(true)
+    setOcrCompareError(null)
+    setOcrCompareResult(null)
+
+    const API_BASE = getApiBase()
+
+    try {
+      const res = await fetch(`${API_BASE}/api/v1/vocab/sessions/${session.id}/compare-ocr/${pageIndex}`, {
+        method: 'POST',
+      })
+
+      if (!res.ok) {
+        throw new Error(`HTTP ${res.status}`)
+      }
+
+      const data = await res.json()
+      setOcrCompareResult(data)
+    } catch (e) {
+      setOcrCompareError(e instanceof Error ? e.message : 'Vergleich fehlgeschlagen')
+    } finally {
+      setIsComparingOcr(false)
+    }
+  }
+
+  const updateVocabularyEntry = (id: string, field: string, value: string) => {
+    setVocabulary(prev => prev.map(v => {
+      if (v.id !== id) return v
+      if (field === 'english' || field === 'german' || field === 'example_sentence' || field === 'word_type') {
+        return { ...v, [field]: value }
+      }
+      return { ...v, extras: { ...(v.extras || {}), [field]: value } }
+    }))
+  }
+
+  const addExtraColumn = (sourcePage: number) => {
+    const label = prompt('Spaltenname:')
+    if (!label || !label.trim()) return
+    const key = `extra_${Date.now()}`
+    setPageExtraColumns(prev => ({
+      ...prev,
+      [sourcePage]: [...(prev[sourcePage] || []), { key, label: label.trim() }],
+    }))
+  }
+
+  const removeExtraColumn = (sourcePage: number, key: string) => {
+    setPageExtraColumns(prev => ({
+      ...prev,
+      [sourcePage]: (prev[sourcePage] || []).filter(c => c.key !== key),
+    }))
+    setVocabulary(prev => prev.map(v => {
+      if (!v.extras || !(key in v.extras)) return v
+      const { [key]: _, ...rest } = v.extras
+      return { ...v, extras: rest }
+    }))
+  }
+
+  const getExtraColumnsForPage = (sourcePage: number): ExtraColumn[] => {
+    const global = pageExtraColumns[0] || []
+    const pageSpecific = pageExtraColumns[sourcePage] || []
+    return [...global, ...pageSpecific]
+  }
+
+  const getAllExtraColumns = (): ExtraColumn[] => {
+    const seen = new Set<string>()
+    const result: ExtraColumn[] = []
+    for (const cols of Object.values(pageExtraColumns)) {
+      for (const col of cols) {
+        if (!seen.has(col.key)) {
+          seen.add(col.key)
+          result.push(col)
+        }
+      }
+    }
+    return result
+  }
+
+  const deleteVocabularyEntry = (id: string) => {
+    setVocabulary(prev => prev.filter(v => v.id !== id))
+  }
+
+  const toggleVocabularySelection = (id: string) => {
+    setVocabulary(prev => prev.map(v =>
+      v.id === id ? { ...v, selected: !v.selected } : v
+    ))
+  }
+
+  const toggleAllSelection = () => {
+    const allSelected = vocabulary.every(v => v.selected)
+    setVocabulary(prev => prev.map(v => ({ ...v, selected: !allSelected })))
+  }
+
+  const addVocabularyEntry = (atIndex?: number) => {
+    const newEntry: VocabularyEntry = {
+      id: `new-${Date.now()}`,
+      english: '',
+      german: '',
+      example_sentence: '',
+      selected: true
+    }
+    setVocabulary(prev => {
+      if (atIndex === undefined) {
+        return [...prev, newEntry]
+      }
+      const newList = [...prev]
+      newList.splice(atIndex, 0, newEntry)
+      return newList
+    })
+  }
+
+  const saveVocabulary = async () => {
+    if (!session) return
+    const API_BASE = getApiBase()
+
+    try {
+      await fetch(`${API_BASE}/api/v1/vocab/sessions/${session.id}/vocabulary`, {
+        method: 'PUT',
+        headers: { 'Content-Type': 'application/json' },
+        body: JSON.stringify({ vocabulary }),
+      })
+    } catch (error) {
+      console.error('Failed to save vocabulary:', error)
+    }
+  }
+
+  const generateWorksheet = async () => {
+    if (!session) return
+    if (selectedFormat === 'standard' && selectedTypes.length === 0) return
+
+    setIsGenerating(true)
+    const API_BASE = getApiBase()
+
+    try {
+      await saveVocabulary()
+
+      let res: Response
+
+      if (selectedFormat === 'nru') {
+        res = await fetch(`${API_BASE}/api/v1/vocab/sessions/${session.id}/generate-nru`, {
+          method: 'POST',
+          headers: { 'Content-Type': 'application/json' },
+          body: JSON.stringify({
+            title: worksheetTitle || session.name,
+            include_solutions: includeSolutions,
+          }),
+        })
+      } else {
+        res = await fetch(`${API_BASE}/api/v1/vocab/sessions/${session.id}/generate`, {
+          method: 'POST',
+          headers: { 'Content-Type': 'application/json' },
+          body: JSON.stringify({
+            worksheet_types: selectedTypes,
+            title: worksheetTitle || session.name,
+            include_solutions: includeSolutions,
+            line_height: lineHeight,
+          }),
+        })
+      }
+
+      if (res.ok) {
+        const data = await res.json()
+        setWorksheetId(data.worksheet_id || data.id)
+        setActiveTab('export')
+        completeActivity({ vocabCount: vocabulary.length })
+      }
+    } catch (error) {
+      console.error('Failed to generate worksheet:', error)
+    } finally {
+      setIsGenerating(false)
+    }
+  }
+
+  const downloadPDF = (type: 'worksheet' | 'solution') => {
+    if (!worksheetId) return
+    const API_BASE = getApiBase()
+    const endpoint = type === 'worksheet' ? 'pdf' : 'solution'
+    window.open(`${API_BASE}/api/v1/vocab/worksheets/${worksheetId}/${endpoint}`, '_blank')
+  }
+
+  const toggleWorksheetType = (type: WorksheetType) => {
+    setSelectedTypes(prev =>
+      prev.includes(type) ? prev.filter(t => t !== type) : [...prev, type]
+    )
+  }
+
+  const resumeSession = async (existingSession: Session) => {
+    setError(null)
+    setExtractionStatus('Session wird geladen...')
+
+    const API_BASE = getApiBase()
+
+    try {
+      const sessionRes = await fetch(`${API_BASE}/api/v1/vocab/sessions/${existingSession.id}`)
+      if (!sessionRes.ok) throw new Error('Session nicht gefunden')
+      const sessionData = await sessionRes.json()
+      setSession(sessionData)
+      setWorksheetTitle(sessionData.name)
+
+      if (sessionData.status === 'extracted' || sessionData.status === 'completed') {
+        const vocabRes = await fetch(`${API_BASE}/api/v1/vocab/sessions/${existingSession.id}/vocabulary`)
+        if (vocabRes.ok) {
+          const vocabData = await vocabRes.json()
+          setVocabulary(vocabData.vocabulary || [])
+        }
+        setActiveTab('vocabulary')
+        setExtractionStatus('')
+      } else if (sessionData.status === 'pending') {
+        setActiveTab('upload')
+        setExtractionStatus('Diese Session hat noch keine Vokabeln. Bitte laden Sie ein Dokument hoch.')
+      } else {
+        setActiveTab('vocabulary')
+        setExtractionStatus('')
+      }
+
+    } catch (error) {
+      console.error('Failed to resume session:', error)
+      setError(error instanceof Error ? error.message : 'Fehler beim Laden der Session')
+      setExtractionStatus('')
+    }
+  }
+
+  const resetSession = async () => {
+    setSession(null)
+    setSessionName('')
+    setVocabulary([])
+    setUploadedImage(null)
+    setWorksheetId(null)
+    setSelectedDocumentId(null)
+    setDirectFile(null)
+    setDirectFilePreview(null)
+    setShowFullPreview(false)
+    setPdfPageCount(0)
+    setSelectedPages([])
+    setPagesThumbnails([])
+    setExcludedPages([])
+    setActiveTab('upload')
+    setError(null)
+    setExtractionStatus('')
+
+    const API_BASE = getApiBase()
+    try {
+      const res = await fetch(`${API_BASE}/api/v1/vocab/sessions`)
+      if (res.ok) {
+        const sessions = await res.json()
+        setExistingSessions(sessions)
+      }
+    } catch (e) {
+      console.error('Failed to reload sessions:', e)
+    }
+  }
+
+  const deleteSession = async (sessionId: string, e: React.MouseEvent) => {
+    e.stopPropagation()
+    if (!confirm('Session wirklich loeschen? Diese Aktion kann nicht rueckgaengig gemacht werden.')) {
+      return
+    }
+
+    const API_BASE = getApiBase()
+    try {
+      const res = await fetch(`${API_BASE}/api/v1/vocab/sessions/${sessionId}`, {
+        method: 'DELETE',
+      })
+      if (res.ok) {
+        setExistingSessions(prev => prev.filter(s => s.id !== sessionId))
+      }
+    } catch (e) {
+      console.error('Failed to delete session:', e)
+    }
+  }
+
+  // Reprocess all successful pages with new IPA/syllable modes
+  const reprocessPages = (ipa: IpaMode, syllable: SyllableMode) => {
+    if (!session || successfulPages.length === 0) return
+
+    setIsExtracting(true)
+    setExtractionStatus('Verarbeite mit neuen Einstellungen...')
+    const pagesToReprocess = successfulPages.map(p => p - 1)
+    const API_BASE = getApiBase()
+
+    ;(async () => {
+      const allVocab: VocabularyEntry[] = []
+      for (const pageIndex of pagesToReprocess) {
+        try {
+          const res = await fetch(`${API_BASE}/api/v1/vocab/sessions/${session.id}/process-single-page/${pageIndex}?ipa_mode=${ipa}&syllable_mode=${syllable}`, {
+            method: 'POST',
+            headers: { 'Content-Type': 'application/json' },
+            body: JSON.stringify({ ocr_prompts: ocrPrompts }),
+          })
+          if (res.ok) {
+            const data = await res.json()
+            if (data.vocabulary) allVocab.push(...data.vocabulary)
+          }
+        } catch {}
+      }
+      setVocabulary(allVocab)
+      setIsExtracting(false)
+      setExtractionStatus(`${allVocab.length} Vokabeln mit neuen Einstellungen`)
+    })()
+  }
+
+  return {
+    // Mounted
+    mounted,
+    // Theme
+    isDark, glassCard, glassInput,
+    // Tab
+    activeTab, setActiveTab,
+    // Session
+    session, sessionName, setSessionName, isCreatingSession, error, setError, extractionStatus,
+    // Existing sessions
+    existingSessions, isLoadingSessions,
+    // Documents
+    storedDocuments, selectedDocumentId, setSelectedDocumentId,
+    // Direct file
+    directFile, setDirectFile, directFilePreview, showFullPreview, setShowFullPreview, directFileInputRef,
+    // PDF pages
+    pdfPageCount, selectedPages, pagesThumbnails, isLoadingThumbnails, excludedPages,
+    // Extra columns
+    pageExtraColumns,
+    // Upload
+    uploadedImage, isExtracting,
+    // Vocabulary
+    vocabulary,
+    // Worksheet
+    selectedTypes, worksheetTitle, setWorksheetTitle,
+    includeSolutions, setIncludeSolutions,
+    lineHeight, setLineHeight,
+    selectedFormat, setSelectedFormat,
+    ipaMode, setIpaMode, syllableMode, setSyllableMode,
+    // Export
+    worksheetId, isGenerating,
+    // Processing
+    processingErrors, successfulPages, failedPages, currentlyProcessingPage,
+    // OCR settings
+    ocrPrompts, showSettings, setShowSettings,
+    // QR
+    showQRModal, setShowQRModal, uploadSessionId,
+    mobileUploadedFiles, selectedMobileFile, setSelectedMobileFile, setMobileUploadedFiles,
+    // OCR Comparison
+    showOcrComparison, setShowOcrComparison,
+    ocrComparePageIndex, ocrCompareResult, isComparingOcr, ocrCompareError,
+    // Handlers
+    handleDirectFileSelect, startSession, processSelectedPages,
+    togglePageSelection, selectAllPages, selectNoPages, excludePage, restoreExcludedPages,
+    runOcrComparison,
+    updateVocabularyEntry, addExtraColumn, removeExtraColumn,
+    getExtraColumnsForPage, getAllExtraColumns,
+    deleteVocabularyEntry, toggleVocabularySelection, toggleAllSelection, addVocabularyEntry,
+    saveVocabulary, generateWorksheet, downloadPDF, toggleWorksheetType,
+    resumeSession, resetSession, deleteSession,
+    saveOcrPrompts, formatFileSize, reprocessPages,
+  }
+}