Files
breakpilot-lehrer/klausur-service/backend/nru_worksheet_generator.py
Benjamin Boenisch 5a31f52310 Initial commit: breakpilot-lehrer - Lehrer KI Platform
Services: Admin-Lehrer, Backend-Lehrer, Studio v2, Website,
Klausur-Service, School-Service, Voice-Service, Geo-Service,
BreakPilot Drive, Agent-Core

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-11 23:47:26 +01:00

558 lines
14 KiB
Python

"""
NRU Worksheet Generator - Generate vocabulary worksheets in NRU format.
Format:
- Page 1 (Vokabeln): 3-column table
- Column 1: English vocabulary
- Column 2: Empty (child writes German translation)
- Column 3: Empty (child writes corrected English after parent review)
- Page 2 (Lernsätze): Full-width table
- Row 1: German sentence (pre-filled)
- Row 2-3: Empty lines (child writes English translation)
Per scanned page, we generate 2 worksheet pages.
"""
import io
import logging
from typing import List, Dict, Tuple
from dataclasses import dataclass
logger = logging.getLogger(__name__)
@dataclass
class VocabEntry:
english: str
german: str
source_page: int = 1
@dataclass
class SentenceEntry:
german: str
english: str # For solution sheet
source_page: int = 1
def separate_vocab_and_sentences(entries: List[Dict]) -> Tuple[List[VocabEntry], List[SentenceEntry]]:
"""
Separate vocabulary entries into single words/phrases and full sentences.
Sentences are identified by:
- Ending with punctuation (. ! ?)
- Being longer than 40 characters
- Containing multiple words with capital letters mid-sentence
"""
vocab_list = []
sentence_list = []
for entry in entries:
english = entry.get("english", "").strip()
german = entry.get("german", "").strip()
source_page = entry.get("source_page", 1)
if not english or not german:
continue
# Detect if this is a sentence
is_sentence = (
english.endswith('.') or
english.endswith('!') or
english.endswith('?') or
len(english) > 50 or
(len(english.split()) > 5 and any(w[0].isupper() for w in english.split()[1:] if w))
)
if is_sentence:
sentence_list.append(SentenceEntry(
german=german,
english=english,
source_page=source_page
))
else:
vocab_list.append(VocabEntry(
english=english,
german=german,
source_page=source_page
))
return vocab_list, sentence_list
def generate_nru_html(
vocab_list: List[VocabEntry],
sentence_list: List[SentenceEntry],
page_number: int,
title: str = "Vokabeltest",
show_solutions: bool = False,
line_height_px: int = 28
) -> str:
"""
Generate HTML for NRU-format worksheet.
Returns HTML for 2 pages:
- Page 1: Vocabulary table (3 columns)
- Page 2: Sentence practice (full width)
"""
# Filter by page
page_vocab = [v for v in vocab_list if v.source_page == page_number]
page_sentences = [s for s in sentence_list if s.source_page == page_number]
html = f"""<!DOCTYPE html>
<html>
<head>
<meta charset="UTF-8">
<style>
@page {{
size: A4;
margin: 1.5cm 2cm;
}}
* {{
box-sizing: border-box;
}}
body {{
font-family: Arial, Helvetica, sans-serif;
font-size: 12pt;
line-height: 1.4;
margin: 0;
padding: 0;
}}
.page {{
page-break-after: always;
min-height: 100%;
}}
.page:last-child {{
page-break-after: avoid;
}}
h1 {{
font-size: 16pt;
margin: 0 0 8px 0;
text-align: center;
}}
.header {{
margin-bottom: 15px;
}}
.name-line {{
font-size: 11pt;
margin-bottom: 10px;
}}
/* Vocabulary Table - 3 columns */
.vocab-table {{
width: 100%;
border-collapse: collapse;
table-layout: fixed;
}}
.vocab-table th {{
background: #f0f0f0;
border: 1px solid #333;
padding: 6px 8px;
font-weight: bold;
font-size: 11pt;
text-align: left;
}}
.vocab-table td {{
border: 1px solid #333;
padding: 4px 8px;
height: {line_height_px}px;
vertical-align: middle;
}}
.vocab-table .col-english {{ width: 35%; }}
.vocab-table .col-german {{ width: 35%; }}
.vocab-table .col-correction {{ width: 30%; }}
.vocab-answer {{
color: #0066cc;
font-style: italic;
}}
/* Sentence Table - full width */
.sentence-table {{
width: 100%;
border-collapse: collapse;
margin-bottom: 15px;
}}
.sentence-table td {{
border: 1px solid #333;
padding: 6px 10px;
}}
.sentence-header {{
background: #f5f5f5;
font-weight: normal;
min-height: 30px;
}}
.sentence-line {{
height: {line_height_px + 4}px;
}}
.sentence-answer {{
color: #0066cc;
font-style: italic;
font-size: 11pt;
}}
.page-info {{
font-size: 9pt;
color: #666;
text-align: right;
margin-top: 10px;
}}
</style>
</head>
<body>
"""
# ========== PAGE 1: VOCABULARY TABLE ==========
if page_vocab:
html += f"""
<div class="page">
<div class="header">
<h1>{title} - Vokabeln (Seite {page_number})</h1>
<div class="name-line">Name: _________________________ Datum: _____________</div>
</div>
<table class="vocab-table">
<thead>
<tr>
<th class="col-english">Englisch</th>
<th class="col-german">Deutsch</th>
<th class="col-correction">Korrektur</th>
</tr>
</thead>
<tbody>
"""
for v in page_vocab:
if show_solutions:
html += f"""
<tr>
<td>{v.english}</td>
<td class="vocab-answer">{v.german}</td>
<td></td>
</tr>
"""
else:
html += f"""
<tr>
<td>{v.english}</td>
<td></td>
<td></td>
</tr>
"""
html += """
</tbody>
</table>
<div class="page-info">Vokabeln aus Unit</div>
</div>
"""
# ========== PAGE 2: SENTENCE PRACTICE ==========
if page_sentences:
html += f"""
<div class="page">
<div class="header">
<h1>{title} - Lernsaetze (Seite {page_number})</h1>
<div class="name-line">Name: _________________________ Datum: _____________</div>
</div>
"""
for s in page_sentences:
html += f"""
<table class="sentence-table">
<tr>
<td class="sentence-header">{s.german}</td>
</tr>
"""
if show_solutions:
html += f"""
<tr>
<td class="sentence-line sentence-answer">{s.english}</td>
</tr>
<tr>
<td class="sentence-line"></td>
</tr>
"""
else:
html += """
<tr>
<td class="sentence-line"></td>
</tr>
<tr>
<td class="sentence-line"></td>
</tr>
"""
html += """
</table>
"""
html += """
<div class="page-info">Lernsaetze aus Unit</div>
</div>
"""
html += """
</body>
</html>
"""
return html
def generate_nru_worksheet_html(
entries: List[Dict],
title: str = "Vokabeltest",
show_solutions: bool = False,
specific_pages: List[int] = None
) -> str:
"""
Generate complete NRU worksheet HTML for all pages.
Args:
entries: List of vocabulary entries with source_page
title: Worksheet title
show_solutions: Whether to show answers
specific_pages: List of specific page numbers to include (1-indexed)
Returns:
Complete HTML document
"""
# Separate into vocab and sentences
vocab_list, sentence_list = separate_vocab_and_sentences(entries)
# Get unique page numbers
all_pages = set()
for v in vocab_list:
all_pages.add(v.source_page)
for s in sentence_list:
all_pages.add(s.source_page)
# Filter to specific pages if requested
if specific_pages:
all_pages = all_pages.intersection(set(specific_pages))
pages_sorted = sorted(all_pages)
logger.info(f"Generating NRU worksheet for pages {pages_sorted}")
logger.info(f"Total vocab: {len(vocab_list)}, Total sentences: {len(sentence_list)}")
# Generate HTML for each page
combined_html = """<!DOCTYPE html>
<html>
<head>
<meta charset="UTF-8">
<style>
@page {
size: A4;
margin: 1.5cm 2cm;
}
* {
box-sizing: border-box;
}
body {
font-family: Arial, Helvetica, sans-serif;
font-size: 12pt;
line-height: 1.4;
margin: 0;
padding: 0;
}
.page {
page-break-after: always;
min-height: 100%;
}
.page:last-child {
page-break-after: avoid;
}
h1 {
font-size: 16pt;
margin: 0 0 8px 0;
text-align: center;
}
.header {
margin-bottom: 15px;
}
.name-line {
font-size: 11pt;
margin-bottom: 10px;
}
/* Vocabulary Table - 3 columns */
.vocab-table {
width: 100%;
border-collapse: collapse;
table-layout: fixed;
}
.vocab-table th {
background: #f0f0f0;
border: 1px solid #333;
padding: 6px 8px;
font-weight: bold;
font-size: 11pt;
text-align: left;
}
.vocab-table td {
border: 1px solid #333;
padding: 4px 8px;
height: 28px;
vertical-align: middle;
}
.vocab-table .col-english { width: 35%; }
.vocab-table .col-german { width: 35%; }
.vocab-table .col-correction { width: 30%; }
.vocab-answer {
color: #0066cc;
font-style: italic;
}
/* Sentence Table - full width */
.sentence-table {
width: 100%;
border-collapse: collapse;
margin-bottom: 15px;
}
.sentence-table td {
border: 1px solid #333;
padding: 6px 10px;
}
.sentence-header {
background: #f5f5f5;
font-weight: normal;
min-height: 30px;
}
.sentence-line {
height: 32px;
}
.sentence-answer {
color: #0066cc;
font-style: italic;
font-size: 11pt;
}
.page-info {
font-size: 9pt;
color: #666;
text-align: right;
margin-top: 10px;
}
</style>
</head>
<body>
"""
for page_num in pages_sorted:
page_vocab = [v for v in vocab_list if v.source_page == page_num]
page_sentences = [s for s in sentence_list if s.source_page == page_num]
# PAGE 1: VOCABULARY TABLE
if page_vocab:
combined_html += f"""
<div class="page">
<div class="header">
<h1>{title} - Vokabeln (Seite {page_num})</h1>
<div class="name-line">Name: _________________________ Datum: _____________</div>
</div>
<table class="vocab-table">
<thead>
<tr>
<th class="col-english">Englisch</th>
<th class="col-german">Deutsch</th>
<th class="col-correction">Korrektur</th>
</tr>
</thead>
<tbody>
"""
for v in page_vocab:
if show_solutions:
combined_html += f"""
<tr>
<td>{v.english}</td>
<td class="vocab-answer">{v.german}</td>
<td></td>
</tr>
"""
else:
combined_html += f"""
<tr>
<td>{v.english}</td>
<td></td>
<td></td>
</tr>
"""
combined_html += f"""
</tbody>
</table>
<div class="page-info">{title} - Seite {page_num}</div>
</div>
"""
# PAGE 2: SENTENCE PRACTICE
if page_sentences:
combined_html += f"""
<div class="page">
<div class="header">
<h1>{title} - Lernsaetze (Seite {page_num})</h1>
<div class="name-line">Name: _________________________ Datum: _____________</div>
</div>
"""
for s in page_sentences:
combined_html += f"""
<table class="sentence-table">
<tr>
<td class="sentence-header">{s.german}</td>
</tr>
"""
if show_solutions:
combined_html += f"""
<tr>
<td class="sentence-line sentence-answer">{s.english}</td>
</tr>
<tr>
<td class="sentence-line"></td>
</tr>
"""
else:
combined_html += """
<tr>
<td class="sentence-line"></td>
</tr>
<tr>
<td class="sentence-line"></td>
</tr>
"""
combined_html += """
</table>
"""
combined_html += f"""
<div class="page-info">{title} - Seite {page_num}</div>
</div>
"""
combined_html += """
</body>
</html>
"""
return combined_html
async def generate_nru_pdf(entries: List[Dict], title: str = "Vokabeltest", include_solutions: bool = True) -> Tuple[bytes, bytes]:
"""
Generate NRU worksheet PDFs.
Returns:
Tuple of (worksheet_pdf_bytes, solution_pdf_bytes)
"""
from weasyprint import HTML
# Generate worksheet HTML
worksheet_html = generate_nru_worksheet_html(entries, title, show_solutions=False)
worksheet_pdf = HTML(string=worksheet_html).write_pdf()
# Generate solution HTML
solution_pdf = None
if include_solutions:
solution_html = generate_nru_worksheet_html(entries, title, show_solutions=True)
solution_pdf = HTML(string=solution_html).write_pdf()
return worksheet_pdf, solution_pdf