diff --git a/klausur-service/backend/cv_vocab_pipeline.py b/klausur-service/backend/cv_vocab_pipeline.py
index 5a02616..3c4d0f1 100644
--- a/klausur-service/backend/cv_vocab_pipeline.py
+++ b/klausur-service/backend/cv_vocab_pipeline.py
@@ -3177,6 +3177,55 @@ _COMMON_SHORT_WORDS: set = {
     'wut', 'zum', 'zur',
 }
 
+# Known abbreviations found in EN/DE textbooks and dictionaries.
+# Stored WITHOUT trailing period (the noise filter strips periods).
+# These rescue tokens like "sth." / "sb." / "usw." from being deleted.
+_KNOWN_ABBREVIATIONS: set = {
+    # EN dictionary meta-words
+    'sth', 'sb', 'smth', 'smb', 'sbd',
+    # EN general
+    'etc', 'eg', 'ie', 'esp', 'approx', 'dept', 'govt', 'corp',
+    'inc', 'ltd', 'vs', 'cf', 'ibid', 'nb', 'ps', 'asap',
+    # EN references / textbook
+    'p', 'pp', 'ch', 'chap', 'fig', 'figs', 'no', 'nos', 'nr',
+    'vol', 'vols', 'ed', 'eds', 'rev', 'repr', 'trans', 'ff',
+    'fn', 'sec', 'par', 'para', 'app', 'abbr', 'ex', 'exs',
+    'ans', 'wb', 'tb', 'vocab',
+    # EN parts of speech / grammar
+    'adj', 'adv', 'prep', 'conj', 'pron', 'det', 'art', 'interj',
+    'aux', 'mod', 'inf', 'pt', 'pres', 'pret', 'ger',
+    'sg', 'pl', 'sing', 'irreg', 'reg', 'intr', 'intrans',
+    'refl', 'pass', 'imper', 'subj', 'ind', 'perf', 'fut',
+    'attr', 'pred', 'comp', 'superl', 'pos', 'neg',
+    'lit', 'colloq', 'sl', 'dial', 'arch', 'obs', 'fml', 'infml',
+    'syn', 'ant', 'opp', 'var', 'orig',
+    # EN titles
+    'mr', 'mrs', 'ms', 'dr', 'prof', 'st', 'jr', 'sr',
+    # EN pronunciation
+    'br', 'am', 'brit', 'amer',
+    # EN units
+    'hr', 'hrs', 'min', 'km', 'cm', 'mm', 'kg', 'mg', 'ml',
+    # DE general
+    'usw', 'bzw', 'evtl', 'ggf', 'ggfs', 'sog', 'eigtl', 'allg',
+    'bes', 'insb', 'insbes', 'bspw', 'ca',
+    'od', 'ua', 'sa', 'vgl', 'zb', 'dh', 'zt', 'idr',
+    'inkl', 'exkl', 'zzgl', 'abzgl',
+    # DE references
+    'abs', 'abschn', 'abt', 'anm', 'ausg', 'aufl', 'bd', 'bde',
+    'bearb', 'ebd', 'hrsg', 'hg', 'jg', 'jh', 'jhd', 'kap',
+    's', 'sp', 'zit', 'zs', 'vlg',
+    # DE grammar
+    'nom', 'akk', 'dat', 'gen', 'konj', 'subst', 'obj',
+    'praet', 'imp', 'part', 'mask', 'fem', 'neutr',
+    'trennb', 'untrennb', 'ugs', 'geh', 'pej',
+    # DE regional
+    'nordd', 'österr', 'schweiz',
+    # Linguistic
+    'lex', 'morph', 'phon', 'phonet', 'sem', 'synt', 'etym',
+    'deriv', 'pref', 'suf', 'suff', 'dim', 'coll',
+    'count', 'uncount', 'indef', 'def', 'poss', 'demon',
+}
+
 
 def _is_noise_tail_token(token: str) -> bool:
     """Check if a token at the END of cell text is trailing OCR noise.
@@ -3209,6 +3258,10 @@ def _is_noise_tail_token(token: str) -> bool:
     # Extract only alpha characters for dictionary lookup
     cleaned = ''.join(alpha_chars)
 
+    # Known abbreviations (e.g. "sth.", "usw.", "adj.") — always keep
+    if cleaned.lower() in _KNOWN_ABBREVIATIONS:
+        return False
+
     # Strip normal trailing punctuation before checking for internal noise.
     stripped_punct = re.sub(r'[.,;:!?]+$', '', t)  # "cupcakes." → "cupcakes"
     t_check = stripped_punct if stripped_punct else t
@@ -3248,12 +3301,16 @@ def _is_garbage_text(text: str) -> bool:
     """
     words = _RE_REAL_WORD.findall(text)
     if not words:
+        # Check if any token is a known abbreviation (e.g. "e.g.")
+        alpha_only = ''.join(_RE_ALPHA.findall(text)).lower()
+        if alpha_only in _KNOWN_ABBREVIATIONS:
+            return False
         return True
 
     for w in words:
         wl = w.lower()
-        # Known short word → not garbage
-        if wl in _COMMON_SHORT_WORDS:
+        # Known short word or abbreviation → not garbage
+        if wl in _COMMON_SHORT_WORDS or wl in _KNOWN_ABBREVIATIONS:
             return False
         # Long word (>= 4 chars): check vowel/consonant ratio.
         # Real EN/DE words have 20-60% vowels.  Garbage like "uanoaain"
@@ -3280,7 +3337,10 @@ def _clean_cell_text(text: str) -> str:
 
     # --- Filter 1: No real word at all ---
     if not _RE_REAL_WORD.search(stripped):
-        return ''
+        # Exception: dotted abbreviations like "e.g.", "z.B.", "i.e."
+        alpha_only = ''.join(_RE_ALPHA.findall(stripped)).lower()
+        if alpha_only not in _KNOWN_ABBREVIATIONS:
+            return ''
 
     # --- Filter 2: Entire text is garbage ---
     if _is_garbage_text(stripped):