""" NRU Worksheet Generator - Generate vocabulary worksheets in NRU format. Format: - Page 1 (Vokabeln): 3-column table - Column 1: English vocabulary - Column 2: Empty (child writes German translation) - Column 3: Empty (child writes corrected English after parent review) - Page 2 (Lernsätze): Full-width table - Row 1: German sentence (pre-filled) - Row 2-3: Empty lines (child writes English translation) Per scanned page, we generate 2 worksheet pages. """ import io import logging from typing import List, Dict, Tuple from dataclasses import dataclass logger = logging.getLogger(__name__) @dataclass class VocabEntry: english: str german: str source_page: int = 1 @dataclass class SentenceEntry: german: str english: str # For solution sheet source_page: int = 1 def separate_vocab_and_sentences(entries: List[Dict]) -> Tuple[List[VocabEntry], List[SentenceEntry]]: """ Separate vocabulary entries into single words/phrases and full sentences. Sentences are identified by: - Ending with punctuation (. ! ?) - Being longer than 40 characters - Containing multiple words with capital letters mid-sentence """ vocab_list = [] sentence_list = [] for entry in entries: english = entry.get("english", "").strip() german = entry.get("german", "").strip() source_page = entry.get("source_page", 1) if not english or not german: continue # Detect if this is a sentence is_sentence = ( english.endswith('.') or english.endswith('!') or english.endswith('?') or len(english) > 50 or (len(english.split()) > 5 and any(w[0].isupper() for w in english.split()[1:] if w)) ) if is_sentence: sentence_list.append(SentenceEntry( german=german, english=english, source_page=source_page )) else: vocab_list.append(VocabEntry( english=english, german=german, source_page=source_page )) return vocab_list, sentence_list def generate_nru_html( vocab_list: List[VocabEntry], sentence_list: List[SentenceEntry], page_number: int, title: str = "Vokabeltest", show_solutions: bool = False, line_height_px: int = 28 ) -> str: """ Generate HTML for NRU-format worksheet. Returns HTML for 2 pages: - Page 1: Vocabulary table (3 columns) - Page 2: Sentence practice (full width) """ # Filter by page page_vocab = [v for v in vocab_list if v.source_page == page_number] page_sentences = [s for s in sentence_list if s.source_page == page_number] html = f""" """ # ========== PAGE 1: VOCABULARY TABLE ========== if page_vocab: html += f"""

{title} - Vokabeln (Seite {page_number})

Name: _________________________ Datum: _____________
""" for v in page_vocab: if show_solutions: html += f""" """ else: html += f""" """ html += """
Englisch Deutsch Korrektur
{v.english} {v.german}
{v.english}
Vokabeln aus Unit
""" # ========== PAGE 2: SENTENCE PRACTICE ========== if page_sentences: html += f"""

{title} - Lernsaetze (Seite {page_number})

Name: _________________________ Datum: _____________
""" for s in page_sentences: html += f""" """ if show_solutions: html += f""" """ else: html += """ """ html += """
{s.german}
{s.english}
""" html += """
Lernsaetze aus Unit
""" html += """ """ return html def generate_nru_worksheet_html( entries: List[Dict], title: str = "Vokabeltest", show_solutions: bool = False, specific_pages: List[int] = None ) -> str: """ Generate complete NRU worksheet HTML for all pages. Args: entries: List of vocabulary entries with source_page title: Worksheet title show_solutions: Whether to show answers specific_pages: List of specific page numbers to include (1-indexed) Returns: Complete HTML document """ # Separate into vocab and sentences vocab_list, sentence_list = separate_vocab_and_sentences(entries) # Get unique page numbers all_pages = set() for v in vocab_list: all_pages.add(v.source_page) for s in sentence_list: all_pages.add(s.source_page) # Filter to specific pages if requested if specific_pages: all_pages = all_pages.intersection(set(specific_pages)) pages_sorted = sorted(all_pages) logger.info(f"Generating NRU worksheet for pages {pages_sorted}") logger.info(f"Total vocab: {len(vocab_list)}, Total sentences: {len(sentence_list)}") # Generate HTML for each page combined_html = """ """ for page_num in pages_sorted: page_vocab = [v for v in vocab_list if v.source_page == page_num] page_sentences = [s for s in sentence_list if s.source_page == page_num] # PAGE 1: VOCABULARY TABLE if page_vocab: combined_html += f"""

{title} - Vokabeln (Seite {page_num})

Name: _________________________ Datum: _____________
""" for v in page_vocab: if show_solutions: combined_html += f""" """ else: combined_html += f""" """ combined_html += f"""
Englisch Deutsch Korrektur
{v.english} {v.german}
{v.english}
{title} - Seite {page_num}
""" # PAGE 2: SENTENCE PRACTICE if page_sentences: combined_html += f"""

{title} - Lernsaetze (Seite {page_num})

Name: _________________________ Datum: _____________
""" for s in page_sentences: combined_html += f""" """ if show_solutions: combined_html += f""" """ else: combined_html += """ """ combined_html += """
{s.german}
{s.english}
""" combined_html += f"""
{title} - Seite {page_num}
""" combined_html += """ """ return combined_html async def generate_nru_pdf(entries: List[Dict], title: str = "Vokabeltest", include_solutions: bool = True) -> Tuple[bytes, bytes]: """ Generate NRU worksheet PDFs. Returns: Tuple of (worksheet_pdf_bytes, solution_pdf_bytes) """ from weasyprint import HTML # Generate worksheet HTML worksheet_html = generate_nru_worksheet_html(entries, title, show_solutions=False) worksheet_pdf = HTML(string=worksheet_html).write_pdf() # Generate solution HTML solution_pdf = None if include_solutions: solution_html = generate_nru_worksheet_html(entries, title, show_solutions=True) solution_pdf = HTML(string=solution_html).write_pdf() return worksheet_pdf, solution_pdf