""" NRU Worksheet Models — data classes and entry separation logic. Extracted from nru_worksheet_generator.py for modularity. """ import logging from typing import List, Dict, Tuple from dataclasses import dataclass logger = logging.getLogger(__name__) @dataclass class VocabEntry: english: str german: str source_page: int = 1 @dataclass class SentenceEntry: german: str english: str # For solution sheet source_page: int = 1 def separate_vocab_and_sentences(entries: List[Dict]) -> Tuple[List[VocabEntry], List[SentenceEntry]]: """ Separate vocabulary entries into single words/phrases and full sentences. Sentences are identified by: - Ending with punctuation (. ! ?) - Being longer than 40 characters - Containing multiple words with capital letters mid-sentence """ vocab_list = [] sentence_list = [] for entry in entries: english = entry.get("english", "").strip() german = entry.get("german", "").strip() source_page = entry.get("source_page", 1) if not english or not german: continue # Detect if this is a sentence is_sentence = ( english.endswith('.') or english.endswith('!') or english.endswith('?') or len(english) > 50 or (len(english.split()) > 5 and any(w[0].isupper() for w in english.split()[1:] if w)) ) if is_sentence: sentence_list.append(SentenceEntry( german=german, english=english, source_page=source_page )) else: vocab_list.append(VocabEntry( english=english, german=german, source_page=source_page )) return vocab_list, sentence_list