"""
NRU Worksheet Generator - Generate vocabulary worksheets in NRU format.
Format:
- Page 1 (Vokabeln): 3-column table
- Column 1: English vocabulary
- Column 2: Empty (child writes German translation)
- Column 3: Empty (child writes corrected English after parent review)
- Page 2 (Lernsätze): Full-width table
- Row 1: German sentence (pre-filled)
- Row 2-3: Empty lines (child writes English translation)
Per scanned page, we generate 2 worksheet pages.
"""
import io
import logging
from typing import List, Dict, Tuple
from dataclasses import dataclass
logger = logging.getLogger(__name__)
@dataclass
class VocabEntry:
english: str
german: str
source_page: int = 1
@dataclass
class SentenceEntry:
german: str
english: str # For solution sheet
source_page: int = 1
def separate_vocab_and_sentences(entries: List[Dict]) -> Tuple[List[VocabEntry], List[SentenceEntry]]:
"""
Separate vocabulary entries into single words/phrases and full sentences.
Sentences are identified by:
- Ending with punctuation (. ! ?)
- Being longer than 40 characters
- Containing multiple words with capital letters mid-sentence
"""
vocab_list = []
sentence_list = []
for entry in entries:
english = entry.get("english", "").strip()
german = entry.get("german", "").strip()
source_page = entry.get("source_page", 1)
if not english or not german:
continue
# Detect if this is a sentence
is_sentence = (
english.endswith('.') or
english.endswith('!') or
english.endswith('?') or
len(english) > 50 or
(len(english.split()) > 5 and any(w[0].isupper() for w in english.split()[1:] if w))
)
if is_sentence:
sentence_list.append(SentenceEntry(
german=german,
english=english,
source_page=source_page
))
else:
vocab_list.append(VocabEntry(
english=english,
german=german,
source_page=source_page
))
return vocab_list, sentence_list
def generate_nru_html(
vocab_list: List[VocabEntry],
sentence_list: List[SentenceEntry],
page_number: int,
title: str = "Vokabeltest",
show_solutions: bool = False,
line_height_px: int = 28
) -> str:
"""
Generate HTML for NRU-format worksheet.
Returns HTML for 2 pages:
- Page 1: Vocabulary table (3 columns)
- Page 2: Sentence practice (full width)
"""
# Filter by page
page_vocab = [v for v in vocab_list if v.source_page == page_number]
page_sentences = [s for s in sentence_list if s.source_page == page_number]
html = f"""
"""
for s in page_sentences:
html += f"""
"""
if show_solutions:
html += f"""
| {s.english} |
|
"""
else:
html += """
|
|
"""
html += """
"""
html += """
Lernsaetze aus Unit
"""
html += """
"""
return html
def generate_nru_worksheet_html(
entries: List[Dict],
title: str = "Vokabeltest",
show_solutions: bool = False,
specific_pages: List[int] = None
) -> str:
"""
Generate complete NRU worksheet HTML for all pages.
Args:
entries: List of vocabulary entries with source_page
title: Worksheet title
show_solutions: Whether to show answers
specific_pages: List of specific page numbers to include (1-indexed)
Returns:
Complete HTML document
"""
# Separate into vocab and sentences
vocab_list, sentence_list = separate_vocab_and_sentences(entries)
# Get unique page numbers
all_pages = set()
for v in vocab_list:
all_pages.add(v.source_page)
for s in sentence_list:
all_pages.add(s.source_page)
# Filter to specific pages if requested
if specific_pages:
all_pages = all_pages.intersection(set(specific_pages))
pages_sorted = sorted(all_pages)
logger.info(f"Generating NRU worksheet for pages {pages_sorted}")
logger.info(f"Total vocab: {len(vocab_list)}, Total sentences: {len(sentence_list)}")
# Generate HTML for each page
combined_html = """
| Englisch |
Deutsch |
Korrektur |
"""
for v in page_vocab:
if show_solutions:
combined_html += f"""
| {v.english} |
{v.german} |
|
"""
else:
combined_html += f"""
| {v.english} |
|
|
"""
combined_html += f"""
{title} - Seite {page_num}
"""
# PAGE 2: SENTENCE PRACTICE
if page_sentences:
combined_html += f"""
"""
for s in page_sentences:
combined_html += f"""
"""
if show_solutions:
combined_html += f"""
| {s.english} |
|
"""
else:
combined_html += """
|
|
"""
combined_html += """
"""
combined_html += f"""
{title} - Seite {page_num}
"""
combined_html += """
"""
return combined_html
async def generate_nru_pdf(entries: List[Dict], title: str = "Vokabeltest", include_solutions: bool = True) -> Tuple[bytes, bytes]:
"""
Generate NRU worksheet PDFs.
Returns:
Tuple of (worksheet_pdf_bytes, solution_pdf_bytes)
"""
from weasyprint import HTML
# Generate worksheet HTML
worksheet_html = generate_nru_worksheet_html(entries, title, show_solutions=False)
worksheet_pdf = HTML(string=worksheet_html).write_pdf()
# Generate solution HTML
solution_pdf = None
if include_solutions:
solution_html = generate_nru_worksheet_html(entries, title, show_solutions=True)
solution_pdf = HTML(string=solution_html).write_pdf()
return worksheet_pdf, solution_pdf