Fix: Remove broken getKlausurApiUrl and clean up empty lines
Some checks failed
CI / go-lint (push) Has been skipped
CI / python-lint (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / test-go-school (push) Successful in 42s
CI / test-go-edu-search (push) Successful in 34s
CI / test-python-klausur (push) Failing after 2m51s
CI / test-python-agent-core (push) Successful in 21s
CI / test-nodejs-website (push) Successful in 29s

sed replacement left orphaned hostname references in story page
and empty lines in getApiBase functions.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
Benjamin Admin
2026-04-24 16:02:04 +02:00
parent b07f802c24
commit 9ba420fa91
150 changed files with 30231 additions and 32053 deletions

View File

@@ -0,0 +1,260 @@
"""
Vocabulary Worksheet Generation — HTML/PDF generation and PDF utilities.
Extracted from vocab_worksheet_api.py to keep modules under 500 LOC.
Functions:
- generate_worksheet_html(): Build HTML for various worksheet types
- generate_worksheet_pdf(): Convert HTML to PDF via WeasyPrint
- get_pdf_page_count(): Count pages in a PDF (PyMuPDF)
- convert_pdf_page_to_image(): Render single PDF page to PNG
- convert_pdf_to_images(): Render multiple PDF pages to PNG
"""
import io
import logging
import os
from typing import List, Optional
from fastapi import HTTPException
from vocab_worksheet_models import VocabularyEntry, WorksheetType
logger = logging.getLogger(__name__)
# Optional dependency: WeasyPrint
try:
from weasyprint import HTML as _WeasyHTML
WEASYPRINT_AVAILABLE = True
except (ImportError, OSError):
WEASYPRINT_AVAILABLE = False
logger.warning("WeasyPrint not available")
# Optional dependency: PyMuPDF
try:
import fitz # PyMuPDF
FITZ_AVAILABLE = True
except ImportError:
FITZ_AVAILABLE = False
logger.warning("PyMuPDF (fitz) not available")
# =============================================================================
# Worksheet HTML Generation
# =============================================================================
def generate_worksheet_html(
vocabulary: List[VocabularyEntry],
worksheet_type: WorksheetType,
title: str,
show_solutions: bool = False,
repetitions: int = 3,
line_height: str = "normal"
) -> str:
"""Generate HTML for a worksheet."""
# Line height CSS
line_heights = {
"normal": "2.5em",
"large": "3.5em",
"extra-large": "4.5em"
}
lh = line_heights.get(line_height, "2.5em")
html = f"""<!DOCTYPE html>
<html>
<head>
<meta charset="UTF-8">
<style>
@page {{ size: A4; margin: 2cm; }}
body {{ font-family: 'Segoe UI', Arial, sans-serif; font-size: 14px; }}
h1 {{ font-size: 24px; margin-bottom: 10px; }}
.meta {{ color: #666; margin-bottom: 20px; }}
.name-line {{ margin-bottom: 30px; }}
.vocab-table {{ width: 100%; border-collapse: collapse; }}
.vocab-table td {{ padding: 8px; border-bottom: 1px solid #ddd; line-height: {lh}; }}
.vocab-word {{ width: 40%; font-weight: 500; }}
.vocab-blank {{ width: 60%; border-bottom: 2px dotted #999; }}
.vocab-answer {{ width: 60%; color: #2563eb; }}
.gap {{ border-bottom: 2px solid #333; min-width: 100px; display: inline-block; }}
.hint {{ color: #666; font-style: italic; font-size: 12px; }}
.section {{ margin-top: 30px; }}
.section-title {{ font-size: 16px; font-weight: 600; margin-bottom: 15px; color: #374151; }}
</style>
</head>
<body>
<h1>{title}</h1>
<div class="name-line">Name: _________________________ Datum: _____________</div>
"""
if worksheet_type == WorksheetType.EN_TO_DE:
html += '<div class="section"><div class="section-title">Uebersetze ins Deutsche:</div>'
html += '<table class="vocab-table">'
for entry in vocabulary:
if show_solutions:
html += f'<tr><td class="vocab-word">{entry.english}</td><td class="vocab-answer">{entry.german}</td></tr>'
else:
html += f'<tr><td class="vocab-word">{entry.english}</td><td class="vocab-blank"></td></tr>'
html += '</table></div>'
elif worksheet_type == WorksheetType.DE_TO_EN:
html += '<div class="section"><div class="section-title">Uebersetze ins Englische:</div>'
html += '<table class="vocab-table">'
for entry in vocabulary:
if show_solutions:
html += f'<tr><td class="vocab-word">{entry.german}</td><td class="vocab-answer">{entry.english}</td></tr>'
else:
html += f'<tr><td class="vocab-word">{entry.german}</td><td class="vocab-blank"></td></tr>'
html += '</table></div>'
elif worksheet_type == WorksheetType.COPY_PRACTICE:
html += '<div class="section"><div class="section-title">Schreibe jedes Wort mehrmals:</div>'
html += '<table class="vocab-table">'
for entry in vocabulary:
html += f'<tr><td class="vocab-word">{entry.english}</td>'
html += '<td class="vocab-blank">'
if show_solutions:
html += f' {entry.english} ' * repetitions
html += '</td></tr>'
html += '</table></div>'
elif worksheet_type == WorksheetType.GAP_FILL:
entries_with_examples = [e for e in vocabulary if e.example_sentence]
if entries_with_examples:
html += '<div class="section"><div class="section-title">Fuege das passende Wort ein:</div>'
for i, entry in enumerate(entries_with_examples, 1):
# Create gap sentence by removing the English word
gap_sentence = entry.example_sentence
for word in entry.english.split():
if word.lower() in gap_sentence.lower():
gap_sentence = gap_sentence.replace(word, '<span class="gap"></span>')
gap_sentence = gap_sentence.replace(word.capitalize(), '<span class="gap"></span>')
gap_sentence = gap_sentence.replace(word.lower(), '<span class="gap"></span>')
break
html += f'<p>{i}. {gap_sentence}</p>'
if show_solutions:
html += f'<p class="hint">Loesung: {entry.english}</p>'
else:
html += f'<p class="hint">({entry.german})</p>'
html += '</div>'
html += '</body></html>'
return html
# =============================================================================
# Worksheet PDF Generation
# =============================================================================
async def generate_worksheet_pdf(html: str) -> bytes:
"""Generate PDF from HTML using WeasyPrint."""
try:
from weasyprint import HTML
pdf_bytes = HTML(string=html).write_pdf()
return pdf_bytes
except ImportError:
logger.warning("WeasyPrint not available, returning HTML")
return html.encode('utf-8')
except Exception as e:
logger.error(f"PDF generation failed: {e}")
raise
# =============================================================================
# PDF Utilities (PyMuPDF)
# =============================================================================
def get_pdf_page_count(pdf_data: bytes) -> int:
"""Get the number of pages in a PDF."""
try:
import fitz
pdf_document = fitz.open(stream=pdf_data, filetype="pdf")
count = pdf_document.page_count
pdf_document.close()
return count
except Exception as e:
logger.error(f"Failed to get PDF page count: {e}")
return 0
async def convert_pdf_page_to_image(pdf_data: bytes, page_number: int = 0, thumbnail: bool = False) -> bytes:
"""Convert a specific page of PDF to PNG image using PyMuPDF.
Args:
pdf_data: PDF file as bytes
page_number: 0-indexed page number
thumbnail: If True, return a smaller thumbnail image
"""
try:
import fitz # PyMuPDF
pdf_document = fitz.open(stream=pdf_data, filetype="pdf")
if pdf_document.page_count == 0:
raise ValueError("PDF has no pages")
if page_number >= pdf_document.page_count:
raise ValueError(f"Page {page_number} does not exist (PDF has {pdf_document.page_count} pages)")
page = pdf_document[page_number]
# Render page to image
# For thumbnails: lower resolution, for OCR: higher resolution
zoom = 0.5 if thumbnail else 2.0
mat = fitz.Matrix(zoom, zoom)
pix = page.get_pixmap(matrix=mat)
png_data = pix.tobytes("png")
pdf_document.close()
logger.info(f"Converted PDF page {page_number} to PNG: {len(png_data)} bytes (thumbnail={thumbnail})")
return png_data
except ImportError:
logger.error("PyMuPDF (fitz) not installed")
raise HTTPException(status_code=500, detail="PDF conversion not available - PyMuPDF not installed")
except Exception as e:
logger.error(f"PDF conversion failed: {e}")
raise HTTPException(status_code=400, detail=f"PDF conversion failed: {str(e)}")
async def convert_pdf_to_images(pdf_data: bytes, pages: List[int] = None) -> List[bytes]:
"""Convert multiple pages of PDF to PNG images.
Args:
pdf_data: PDF file as bytes
pages: List of 0-indexed page numbers to convert. If None, convert all pages.
"""
try:
import fitz
pdf_document = fitz.open(stream=pdf_data, filetype="pdf")
if pdf_document.page_count == 0:
raise ValueError("PDF has no pages")
# If no pages specified, convert all
if pages is None:
pages = list(range(pdf_document.page_count))
images = []
zoom = 2.0
mat = fitz.Matrix(zoom, zoom)
for page_num in pages:
if page_num < pdf_document.page_count:
page = pdf_document[page_num]
pix = page.get_pixmap(matrix=mat)
images.append(pix.tobytes("png"))
pdf_document.close()
logger.info(f"Converted {len(images)} PDF pages to images")
return images
except ImportError:
logger.error("PyMuPDF (fitz) not installed")
raise HTTPException(status_code=500, detail="PDF conversion not available")
except Exception as e:
logger.error(f"PDF conversion failed: {e}")
raise HTTPException(status_code=400, detail=f"PDF conversion failed: {str(e)}")