Fix: Remove broken getKlausurApiUrl and clean up empty lines

sed replacement left orphaned hostname references in story page and empty lines in getApiBase functions. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-04-24 16:02:04 +02:00
parent b07f802c24
commit 9ba420fa91
150 changed files with 30231 additions and 32053 deletions
--- a/klausur-service/backend/vocab_worksheet_generation.py
+++ b/klausur-service/backend/vocab_worksheet_generation.py
@@ -0,0 +1,260 @@
+"""
+Vocabulary Worksheet Generation — HTML/PDF generation and PDF utilities.
+
+Extracted from vocab_worksheet_api.py to keep modules under 500 LOC.
+
+Functions:
+  - generate_worksheet_html(): Build HTML for various worksheet types
+  - generate_worksheet_pdf():  Convert HTML to PDF via WeasyPrint
+  - get_pdf_page_count():      Count pages in a PDF (PyMuPDF)
+  - convert_pdf_page_to_image(): Render single PDF page to PNG
+  - convert_pdf_to_images():     Render multiple PDF pages to PNG
+"""
+
+import io
+import logging
+import os
+from typing import List, Optional
+
+from fastapi import HTTPException
+
+from vocab_worksheet_models import VocabularyEntry, WorksheetType
+
+logger = logging.getLogger(__name__)
+
+# Optional dependency: WeasyPrint
+try:
+    from weasyprint import HTML as _WeasyHTML
+    WEASYPRINT_AVAILABLE = True
+except (ImportError, OSError):
+    WEASYPRINT_AVAILABLE = False
+    logger.warning("WeasyPrint not available")
+
+# Optional dependency: PyMuPDF
+try:
+    import fitz  # PyMuPDF
+    FITZ_AVAILABLE = True
+except ImportError:
+    FITZ_AVAILABLE = False
+    logger.warning("PyMuPDF (fitz) not available")
+
+
+# =============================================================================
+# Worksheet HTML Generation
+# =============================================================================
+
+def generate_worksheet_html(
+    vocabulary: List[VocabularyEntry],
+    worksheet_type: WorksheetType,
+    title: str,
+    show_solutions: bool = False,
+    repetitions: int = 3,
+    line_height: str = "normal"
+) -> str:
+    """Generate HTML for a worksheet."""
+
+    # Line height CSS
+    line_heights = {
+        "normal": "2.5em",
+        "large": "3.5em",
+        "extra-large": "4.5em"
+    }
+    lh = line_heights.get(line_height, "2.5em")
+
+    html = f"""<!DOCTYPE html>
+<html>
+<head>
+    <meta charset="UTF-8">
+    <style>
+        @page {{ size: A4; margin: 2cm; }}
+        body {{ font-family: 'Segoe UI', Arial, sans-serif; font-size: 14px; }}
+        h1 {{ font-size: 24px; margin-bottom: 10px; }}
+        .meta {{ color: #666; margin-bottom: 20px; }}
+        .name-line {{ margin-bottom: 30px; }}
+        .vocab-table {{ width: 100%; border-collapse: collapse; }}
+        .vocab-table td {{ padding: 8px; border-bottom: 1px solid #ddd; line-height: {lh}; }}
+        .vocab-word {{ width: 40%; font-weight: 500; }}
+        .vocab-blank {{ width: 60%; border-bottom: 2px dotted #999; }}
+        .vocab-answer {{ width: 60%; color: #2563eb; }}
+        .gap {{ border-bottom: 2px solid #333; min-width: 100px; display: inline-block; }}
+        .hint {{ color: #666; font-style: italic; font-size: 12px; }}
+        .section {{ margin-top: 30px; }}
+        .section-title {{ font-size: 16px; font-weight: 600; margin-bottom: 15px; color: #374151; }}
+    </style>
+</head>
+<body>
+    <h1>{title}</h1>
+    <div class="name-line">Name: _________________________ Datum: _____________</div>
+"""
+
+    if worksheet_type == WorksheetType.EN_TO_DE:
+        html += '<div class="section"><div class="section-title">Uebersetze ins Deutsche:</div>'
+        html += '<table class="vocab-table">'
+        for entry in vocabulary:
+            if show_solutions:
+                html += f'<tr><td class="vocab-word">{entry.english}</td><td class="vocab-answer">{entry.german}</td></tr>'
+            else:
+                html += f'<tr><td class="vocab-word">{entry.english}</td><td class="vocab-blank"></td></tr>'
+        html += '</table></div>'
+
+    elif worksheet_type == WorksheetType.DE_TO_EN:
+        html += '<div class="section"><div class="section-title">Uebersetze ins Englische:</div>'
+        html += '<table class="vocab-table">'
+        for entry in vocabulary:
+            if show_solutions:
+                html += f'<tr><td class="vocab-word">{entry.german}</td><td class="vocab-answer">{entry.english}</td></tr>'
+            else:
+                html += f'<tr><td class="vocab-word">{entry.german}</td><td class="vocab-blank"></td></tr>'
+        html += '</table></div>'
+
+    elif worksheet_type == WorksheetType.COPY_PRACTICE:
+        html += '<div class="section"><div class="section-title">Schreibe jedes Wort mehrmals:</div>'
+        html += '<table class="vocab-table">'
+        for entry in vocabulary:
+            html += f'<tr><td class="vocab-word">{entry.english}</td>'
+            html += '<td class="vocab-blank">'
+            if show_solutions:
+                html += f' {entry.english} ' * repetitions
+            html += '</td></tr>'
+        html += '</table></div>'
+
+    elif worksheet_type == WorksheetType.GAP_FILL:
+        entries_with_examples = [e for e in vocabulary if e.example_sentence]
+        if entries_with_examples:
+            html += '<div class="section"><div class="section-title">Fuege das passende Wort ein:</div>'
+            for i, entry in enumerate(entries_with_examples, 1):
+                # Create gap sentence by removing the English word
+                gap_sentence = entry.example_sentence
+                for word in entry.english.split():
+                    if word.lower() in gap_sentence.lower():
+                        gap_sentence = gap_sentence.replace(word, '<span class="gap"></span>')
+                        gap_sentence = gap_sentence.replace(word.capitalize(), '<span class="gap"></span>')
+                        gap_sentence = gap_sentence.replace(word.lower(), '<span class="gap"></span>')
+                        break
+
+                html += f'<p>{i}. {gap_sentence}</p>'
+                if show_solutions:
+                    html += f'<p class="hint">Loesung: {entry.english}</p>'
+                else:
+                    html += f'<p class="hint">({entry.german})</p>'
+            html += '</div>'
+
+    html += '</body></html>'
+    return html
+
+
+# =============================================================================
+# Worksheet PDF Generation
+# =============================================================================
+
+async def generate_worksheet_pdf(html: str) -> bytes:
+    """Generate PDF from HTML using WeasyPrint."""
+    try:
+        from weasyprint import HTML
+        pdf_bytes = HTML(string=html).write_pdf()
+        return pdf_bytes
+    except ImportError:
+        logger.warning("WeasyPrint not available, returning HTML")
+        return html.encode('utf-8')
+    except Exception as e:
+        logger.error(f"PDF generation failed: {e}")
+        raise
+
+
+# =============================================================================
+# PDF Utilities (PyMuPDF)
+# =============================================================================
+
+def get_pdf_page_count(pdf_data: bytes) -> int:
+    """Get the number of pages in a PDF."""
+    try:
+        import fitz
+        pdf_document = fitz.open(stream=pdf_data, filetype="pdf")
+        count = pdf_document.page_count
+        pdf_document.close()
+        return count
+    except Exception as e:
+        logger.error(f"Failed to get PDF page count: {e}")
+        return 0
+
+
+async def convert_pdf_page_to_image(pdf_data: bytes, page_number: int = 0, thumbnail: bool = False) -> bytes:
+    """Convert a specific page of PDF to PNG image using PyMuPDF.
+
+    Args:
+        pdf_data: PDF file as bytes
+        page_number: 0-indexed page number
+        thumbnail: If True, return a smaller thumbnail image
+    """
+    try:
+        import fitz  # PyMuPDF
+
+        pdf_document = fitz.open(stream=pdf_data, filetype="pdf")
+
+        if pdf_document.page_count == 0:
+            raise ValueError("PDF has no pages")
+
+        if page_number >= pdf_document.page_count:
+            raise ValueError(f"Page {page_number} does not exist (PDF has {pdf_document.page_count} pages)")
+
+        page = pdf_document[page_number]
+
+        # Render page to image
+        # For thumbnails: lower resolution, for OCR: higher resolution
+        zoom = 0.5 if thumbnail else 2.0
+        mat = fitz.Matrix(zoom, zoom)
+        pix = page.get_pixmap(matrix=mat)
+
+        png_data = pix.tobytes("png")
+        pdf_document.close()
+
+        logger.info(f"Converted PDF page {page_number} to PNG: {len(png_data)} bytes (thumbnail={thumbnail})")
+        return png_data
+
+    except ImportError:
+        logger.error("PyMuPDF (fitz) not installed")
+        raise HTTPException(status_code=500, detail="PDF conversion not available - PyMuPDF not installed")
+    except Exception as e:
+        logger.error(f"PDF conversion failed: {e}")
+        raise HTTPException(status_code=400, detail=f"PDF conversion failed: {str(e)}")
+
+
+async def convert_pdf_to_images(pdf_data: bytes, pages: List[int] = None) -> List[bytes]:
+    """Convert multiple pages of PDF to PNG images.
+
+    Args:
+        pdf_data: PDF file as bytes
+        pages: List of 0-indexed page numbers to convert. If None, convert all pages.
+    """
+    try:
+        import fitz
+
+        pdf_document = fitz.open(stream=pdf_data, filetype="pdf")
+
+        if pdf_document.page_count == 0:
+            raise ValueError("PDF has no pages")
+
+        # If no pages specified, convert all
+        if pages is None:
+            pages = list(range(pdf_document.page_count))
+
+        images = []
+        zoom = 2.0
+        mat = fitz.Matrix(zoom, zoom)
+
+        for page_num in pages:
+            if page_num < pdf_document.page_count:
+                page = pdf_document[page_num]
+                pix = page.get_pixmap(matrix=mat)
+                images.append(pix.tobytes("png"))
+
+        pdf_document.close()
+        logger.info(f"Converted {len(images)} PDF pages to images")
+        return images
+
+    except ImportError:
+        logger.error("PyMuPDF (fitz) not installed")
+        raise HTTPException(status_code=500, detail="PDF conversion not available")
+    except Exception as e:
+        logger.error(f"PDF conversion failed: {e}")
+        raise HTTPException(status_code=400, detail=f"PDF conversion failed: {str(e)}")