Some checks failed
CI / go-lint (push) Has been skipped
CI / python-lint (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / test-go-school (push) Successful in 42s
CI / test-go-edu-search (push) Successful in 34s
CI / test-python-klausur (push) Failing after 2m51s
CI / test-python-agent-core (push) Successful in 21s
CI / test-nodejs-website (push) Successful in 29s
sed replacement left orphaned hostname references in story page and empty lines in getApiBase functions. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
261 lines
9.6 KiB
Python
261 lines
9.6 KiB
Python
"""
|
|
Vocabulary Worksheet Generation — HTML/PDF generation and PDF utilities.
|
|
|
|
Extracted from vocab_worksheet_api.py to keep modules under 500 LOC.
|
|
|
|
Functions:
|
|
- generate_worksheet_html(): Build HTML for various worksheet types
|
|
- generate_worksheet_pdf(): Convert HTML to PDF via WeasyPrint
|
|
- get_pdf_page_count(): Count pages in a PDF (PyMuPDF)
|
|
- convert_pdf_page_to_image(): Render single PDF page to PNG
|
|
- convert_pdf_to_images(): Render multiple PDF pages to PNG
|
|
"""
|
|
|
|
import io
|
|
import logging
|
|
import os
|
|
from typing import List, Optional
|
|
|
|
from fastapi import HTTPException
|
|
|
|
from vocab_worksheet_models import VocabularyEntry, WorksheetType
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
# Optional dependency: WeasyPrint
|
|
try:
|
|
from weasyprint import HTML as _WeasyHTML
|
|
WEASYPRINT_AVAILABLE = True
|
|
except (ImportError, OSError):
|
|
WEASYPRINT_AVAILABLE = False
|
|
logger.warning("WeasyPrint not available")
|
|
|
|
# Optional dependency: PyMuPDF
|
|
try:
|
|
import fitz # PyMuPDF
|
|
FITZ_AVAILABLE = True
|
|
except ImportError:
|
|
FITZ_AVAILABLE = False
|
|
logger.warning("PyMuPDF (fitz) not available")
|
|
|
|
|
|
# =============================================================================
|
|
# Worksheet HTML Generation
|
|
# =============================================================================
|
|
|
|
def generate_worksheet_html(
|
|
vocabulary: List[VocabularyEntry],
|
|
worksheet_type: WorksheetType,
|
|
title: str,
|
|
show_solutions: bool = False,
|
|
repetitions: int = 3,
|
|
line_height: str = "normal"
|
|
) -> str:
|
|
"""Generate HTML for a worksheet."""
|
|
|
|
# Line height CSS
|
|
line_heights = {
|
|
"normal": "2.5em",
|
|
"large": "3.5em",
|
|
"extra-large": "4.5em"
|
|
}
|
|
lh = line_heights.get(line_height, "2.5em")
|
|
|
|
html = f"""<!DOCTYPE html>
|
|
<html>
|
|
<head>
|
|
<meta charset="UTF-8">
|
|
<style>
|
|
@page {{ size: A4; margin: 2cm; }}
|
|
body {{ font-family: 'Segoe UI', Arial, sans-serif; font-size: 14px; }}
|
|
h1 {{ font-size: 24px; margin-bottom: 10px; }}
|
|
.meta {{ color: #666; margin-bottom: 20px; }}
|
|
.name-line {{ margin-bottom: 30px; }}
|
|
.vocab-table {{ width: 100%; border-collapse: collapse; }}
|
|
.vocab-table td {{ padding: 8px; border-bottom: 1px solid #ddd; line-height: {lh}; }}
|
|
.vocab-word {{ width: 40%; font-weight: 500; }}
|
|
.vocab-blank {{ width: 60%; border-bottom: 2px dotted #999; }}
|
|
.vocab-answer {{ width: 60%; color: #2563eb; }}
|
|
.gap {{ border-bottom: 2px solid #333; min-width: 100px; display: inline-block; }}
|
|
.hint {{ color: #666; font-style: italic; font-size: 12px; }}
|
|
.section {{ margin-top: 30px; }}
|
|
.section-title {{ font-size: 16px; font-weight: 600; margin-bottom: 15px; color: #374151; }}
|
|
</style>
|
|
</head>
|
|
<body>
|
|
<h1>{title}</h1>
|
|
<div class="name-line">Name: _________________________ Datum: _____________</div>
|
|
"""
|
|
|
|
if worksheet_type == WorksheetType.EN_TO_DE:
|
|
html += '<div class="section"><div class="section-title">Uebersetze ins Deutsche:</div>'
|
|
html += '<table class="vocab-table">'
|
|
for entry in vocabulary:
|
|
if show_solutions:
|
|
html += f'<tr><td class="vocab-word">{entry.english}</td><td class="vocab-answer">{entry.german}</td></tr>'
|
|
else:
|
|
html += f'<tr><td class="vocab-word">{entry.english}</td><td class="vocab-blank"></td></tr>'
|
|
html += '</table></div>'
|
|
|
|
elif worksheet_type == WorksheetType.DE_TO_EN:
|
|
html += '<div class="section"><div class="section-title">Uebersetze ins Englische:</div>'
|
|
html += '<table class="vocab-table">'
|
|
for entry in vocabulary:
|
|
if show_solutions:
|
|
html += f'<tr><td class="vocab-word">{entry.german}</td><td class="vocab-answer">{entry.english}</td></tr>'
|
|
else:
|
|
html += f'<tr><td class="vocab-word">{entry.german}</td><td class="vocab-blank"></td></tr>'
|
|
html += '</table></div>'
|
|
|
|
elif worksheet_type == WorksheetType.COPY_PRACTICE:
|
|
html += '<div class="section"><div class="section-title">Schreibe jedes Wort mehrmals:</div>'
|
|
html += '<table class="vocab-table">'
|
|
for entry in vocabulary:
|
|
html += f'<tr><td class="vocab-word">{entry.english}</td>'
|
|
html += '<td class="vocab-blank">'
|
|
if show_solutions:
|
|
html += f' {entry.english} ' * repetitions
|
|
html += '</td></tr>'
|
|
html += '</table></div>'
|
|
|
|
elif worksheet_type == WorksheetType.GAP_FILL:
|
|
entries_with_examples = [e for e in vocabulary if e.example_sentence]
|
|
if entries_with_examples:
|
|
html += '<div class="section"><div class="section-title">Fuege das passende Wort ein:</div>'
|
|
for i, entry in enumerate(entries_with_examples, 1):
|
|
# Create gap sentence by removing the English word
|
|
gap_sentence = entry.example_sentence
|
|
for word in entry.english.split():
|
|
if word.lower() in gap_sentence.lower():
|
|
gap_sentence = gap_sentence.replace(word, '<span class="gap"></span>')
|
|
gap_sentence = gap_sentence.replace(word.capitalize(), '<span class="gap"></span>')
|
|
gap_sentence = gap_sentence.replace(word.lower(), '<span class="gap"></span>')
|
|
break
|
|
|
|
html += f'<p>{i}. {gap_sentence}</p>'
|
|
if show_solutions:
|
|
html += f'<p class="hint">Loesung: {entry.english}</p>'
|
|
else:
|
|
html += f'<p class="hint">({entry.german})</p>'
|
|
html += '</div>'
|
|
|
|
html += '</body></html>'
|
|
return html
|
|
|
|
|
|
# =============================================================================
|
|
# Worksheet PDF Generation
|
|
# =============================================================================
|
|
|
|
async def generate_worksheet_pdf(html: str) -> bytes:
|
|
"""Generate PDF from HTML using WeasyPrint."""
|
|
try:
|
|
from weasyprint import HTML
|
|
pdf_bytes = HTML(string=html).write_pdf()
|
|
return pdf_bytes
|
|
except ImportError:
|
|
logger.warning("WeasyPrint not available, returning HTML")
|
|
return html.encode('utf-8')
|
|
except Exception as e:
|
|
logger.error(f"PDF generation failed: {e}")
|
|
raise
|
|
|
|
|
|
# =============================================================================
|
|
# PDF Utilities (PyMuPDF)
|
|
# =============================================================================
|
|
|
|
def get_pdf_page_count(pdf_data: bytes) -> int:
|
|
"""Get the number of pages in a PDF."""
|
|
try:
|
|
import fitz
|
|
pdf_document = fitz.open(stream=pdf_data, filetype="pdf")
|
|
count = pdf_document.page_count
|
|
pdf_document.close()
|
|
return count
|
|
except Exception as e:
|
|
logger.error(f"Failed to get PDF page count: {e}")
|
|
return 0
|
|
|
|
|
|
async def convert_pdf_page_to_image(pdf_data: bytes, page_number: int = 0, thumbnail: bool = False) -> bytes:
|
|
"""Convert a specific page of PDF to PNG image using PyMuPDF.
|
|
|
|
Args:
|
|
pdf_data: PDF file as bytes
|
|
page_number: 0-indexed page number
|
|
thumbnail: If True, return a smaller thumbnail image
|
|
"""
|
|
try:
|
|
import fitz # PyMuPDF
|
|
|
|
pdf_document = fitz.open(stream=pdf_data, filetype="pdf")
|
|
|
|
if pdf_document.page_count == 0:
|
|
raise ValueError("PDF has no pages")
|
|
|
|
if page_number >= pdf_document.page_count:
|
|
raise ValueError(f"Page {page_number} does not exist (PDF has {pdf_document.page_count} pages)")
|
|
|
|
page = pdf_document[page_number]
|
|
|
|
# Render page to image
|
|
# For thumbnails: lower resolution, for OCR: higher resolution
|
|
zoom = 0.5 if thumbnail else 2.0
|
|
mat = fitz.Matrix(zoom, zoom)
|
|
pix = page.get_pixmap(matrix=mat)
|
|
|
|
png_data = pix.tobytes("png")
|
|
pdf_document.close()
|
|
|
|
logger.info(f"Converted PDF page {page_number} to PNG: {len(png_data)} bytes (thumbnail={thumbnail})")
|
|
return png_data
|
|
|
|
except ImportError:
|
|
logger.error("PyMuPDF (fitz) not installed")
|
|
raise HTTPException(status_code=500, detail="PDF conversion not available - PyMuPDF not installed")
|
|
except Exception as e:
|
|
logger.error(f"PDF conversion failed: {e}")
|
|
raise HTTPException(status_code=400, detail=f"PDF conversion failed: {str(e)}")
|
|
|
|
|
|
async def convert_pdf_to_images(pdf_data: bytes, pages: List[int] = None) -> List[bytes]:
|
|
"""Convert multiple pages of PDF to PNG images.
|
|
|
|
Args:
|
|
pdf_data: PDF file as bytes
|
|
pages: List of 0-indexed page numbers to convert. If None, convert all pages.
|
|
"""
|
|
try:
|
|
import fitz
|
|
|
|
pdf_document = fitz.open(stream=pdf_data, filetype="pdf")
|
|
|
|
if pdf_document.page_count == 0:
|
|
raise ValueError("PDF has no pages")
|
|
|
|
# If no pages specified, convert all
|
|
if pages is None:
|
|
pages = list(range(pdf_document.page_count))
|
|
|
|
images = []
|
|
zoom = 2.0
|
|
mat = fitz.Matrix(zoom, zoom)
|
|
|
|
for page_num in pages:
|
|
if page_num < pdf_document.page_count:
|
|
page = pdf_document[page_num]
|
|
pix = page.get_pixmap(matrix=mat)
|
|
images.append(pix.tobytes("png"))
|
|
|
|
pdf_document.close()
|
|
logger.info(f"Converted {len(images)} PDF pages to images")
|
|
return images
|
|
|
|
except ImportError:
|
|
logger.error("PyMuPDF (fitz) not installed")
|
|
raise HTTPException(status_code=500, detail="PDF conversion not available")
|
|
except Exception as e:
|
|
logger.error(f"PDF conversion failed: {e}")
|
|
raise HTTPException(status_code=400, detail=f"PDF conversion failed: {str(e)}")
|