Some checks failed
CI / go-lint (push) Has been skipped
CI / python-lint (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / test-go-school (push) Successful in 28s
CI / test-go-edu-search (push) Successful in 28s
CI / test-python-klausur (push) Failing after 2m22s
CI / test-python-agent-core (push) Successful in 21s
CI / test-nodejs-website (push) Successful in 23s
korrektur/ zeugnis/ admin/ compliance/ worksheet/ training/ metrics/ 52 shims, relative imports, RAG untouched. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
71 lines
1.8 KiB
Python
71 lines
1.8 KiB
Python
"""
|
|
NRU Worksheet Models — data classes and entry separation logic.
|
|
|
|
Extracted from nru_worksheet_generator.py for modularity.
|
|
"""
|
|
|
|
import logging
|
|
from typing import List, Dict, Tuple
|
|
from dataclasses import dataclass
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
@dataclass
|
|
class VocabEntry:
|
|
english: str
|
|
german: str
|
|
source_page: int = 1
|
|
|
|
|
|
@dataclass
|
|
class SentenceEntry:
|
|
german: str
|
|
english: str # For solution sheet
|
|
source_page: int = 1
|
|
|
|
|
|
def separate_vocab_and_sentences(entries: List[Dict]) -> Tuple[List[VocabEntry], List[SentenceEntry]]:
|
|
"""
|
|
Separate vocabulary entries into single words/phrases and full sentences.
|
|
|
|
Sentences are identified by:
|
|
- Ending with punctuation (. ! ?)
|
|
- Being longer than 40 characters
|
|
- Containing multiple words with capital letters mid-sentence
|
|
"""
|
|
vocab_list = []
|
|
sentence_list = []
|
|
|
|
for entry in entries:
|
|
english = entry.get("english", "").strip()
|
|
german = entry.get("german", "").strip()
|
|
source_page = entry.get("source_page", 1)
|
|
|
|
if not english or not german:
|
|
continue
|
|
|
|
# Detect if this is a sentence
|
|
is_sentence = (
|
|
english.endswith('.') or
|
|
english.endswith('!') or
|
|
english.endswith('?') or
|
|
len(english) > 50 or
|
|
(len(english.split()) > 5 and any(w[0].isupper() for w in english.split()[1:] if w))
|
|
)
|
|
|
|
if is_sentence:
|
|
sentence_list.append(SentenceEntry(
|
|
german=german,
|
|
english=english,
|
|
source_page=source_page
|
|
))
|
|
else:
|
|
vocab_list.append(VocabEntry(
|
|
english=english,
|
|
german=german,
|
|
source_page=source_page
|
|
))
|
|
|
|
return vocab_list, sentence_list
|