""" Worksheet Editor API - Backend Endpoints for Visual Worksheet Editor Provides endpoints for: - AI Image generation via Ollama/Stable Diffusion - Worksheet Save/Load - PDF Export """ import os import io import uuid import json import base64 import logging from datetime import datetime, timezone from typing import Optional, List, Dict, Any from enum import Enum from dataclasses import dataclass, field, asdict from fastapi import APIRouter, HTTPException, Request, BackgroundTasks from fastapi.responses import FileResponse, StreamingResponse from pydantic import BaseModel, Field import httpx # PDF Generation try: from reportlab.lib import colors from reportlab.lib.pagesizes import A4 from reportlab.lib.units import mm from reportlab.pdfgen import canvas from reportlab.lib.styles import getSampleStyleSheet REPORTLAB_AVAILABLE = True except ImportError: REPORTLAB_AVAILABLE = False logger = logging.getLogger(__name__) # ============================================= # CONFIGURATION # ============================================= OLLAMA_URL = os.getenv("OLLAMA_URL", "http://host.docker.internal:11434") SD_MODEL = os.getenv("SD_MODEL", "stable-diffusion") # or specific SD model WORKSHEET_STORAGE_DIR = os.getenv("WORKSHEET_STORAGE_DIR", os.path.join(os.path.dirname(os.path.abspath(__file__)), "worksheet-storage")) # Ensure storage directory exists os.makedirs(WORKSHEET_STORAGE_DIR, exist_ok=True) # ============================================= # ENUMS & MODELS # ============================================= class AIImageStyle(str, Enum): REALISTIC = "realistic" CARTOON = "cartoon" SKETCH = "sketch" CLIPART = "clipart" EDUCATIONAL = "educational" class WorksheetStatus(str, Enum): DRAFT = "draft" PUBLISHED = "published" ARCHIVED = "archived" # Style prompt modifiers STYLE_PROMPTS = { AIImageStyle.REALISTIC: "photorealistic, high detail, professional photography", AIImageStyle.CARTOON: "cartoon style, colorful, child-friendly, simple shapes", AIImageStyle.SKETCH: "pencil sketch, hand-drawn, black and white, artistic", AIImageStyle.CLIPART: "clipart style, flat design, simple, vector-like", AIImageStyle.EDUCATIONAL: "educational illustration, clear, informative, textbook style" } # ============================================= # REQUEST/RESPONSE MODELS # ============================================= class AIImageRequest(BaseModel): prompt: str = Field(..., min_length=3, max_length=500) style: AIImageStyle = AIImageStyle.EDUCATIONAL width: int = Field(512, ge=256, le=1024) height: int = Field(512, ge=256, le=1024) class AIImageResponse(BaseModel): image_base64: str prompt_used: str error: Optional[str] = None class PageData(BaseModel): id: str index: int canvasJSON: str class PageFormat(BaseModel): width: float = 210 height: float = 297 orientation: str = "portrait" margins: Dict[str, float] = {"top": 15, "right": 15, "bottom": 15, "left": 15} class WorksheetSaveRequest(BaseModel): id: Optional[str] = None title: str description: Optional[str] = None pages: List[PageData] pageFormat: Optional[PageFormat] = None class WorksheetResponse(BaseModel): id: str title: str description: Optional[str] pages: List[PageData] pageFormat: PageFormat createdAt: str updatedAt: str # ============================================= # IN-MEMORY STORAGE (Development) # ============================================= worksheets_db: Dict[str, Dict] = {} # ============================================= # ROUTER # ============================================= router = APIRouter(prefix="/api/v1/worksheet", tags=["Worksheet Editor"]) # ============================================= # AI IMAGE GENERATION # ============================================= @router.post("/ai-image", response_model=AIImageResponse) async def generate_ai_image(request: AIImageRequest): """ Generate an AI image using Ollama with a text-to-image model. Supported models: - stable-diffusion (via Ollama) - sd3.5-medium - llava (for image understanding, not generation) Falls back to a placeholder if Ollama is not available. """ try: # Build enhanced prompt with style style_modifier = STYLE_PROMPTS.get(request.style, "") enhanced_prompt = f"{request.prompt}, {style_modifier}" logger.info(f"Generating AI image: {enhanced_prompt[:100]}...") # Check if Ollama is available async with httpx.AsyncClient(timeout=10.0) as check_client: try: health_response = await check_client.get(f"{OLLAMA_URL}/api/tags") if health_response.status_code != 200: raise HTTPException(status_code=503, detail="Ollama service not available") except httpx.ConnectError: logger.warning("Ollama not reachable, returning placeholder") # Return a placeholder image (simple colored rectangle) return _generate_placeholder_image(request, enhanced_prompt) # Try to generate with Stable Diffusion via Ollama # Note: Ollama doesn't natively support SD, this is a placeholder for when it does # or when using a compatible endpoint try: async with httpx.AsyncClient(timeout=300.0) as client: # Check if SD model is available tags_response = await client.get(f"{OLLAMA_URL}/api/tags") available_models = [m.get("name", "") for m in tags_response.json().get("models", [])] # Look for SD-compatible model sd_model = None for model in available_models: if "stable" in model.lower() or "sd" in model.lower() or "diffusion" in model.lower(): sd_model = model break if not sd_model: logger.warning("No Stable Diffusion model found in Ollama") return _generate_placeholder_image(request, enhanced_prompt) # Generate image (this would need Ollama's image generation API) # For now, return placeholder logger.info(f"SD model found: {sd_model}, but image generation API not implemented") return _generate_placeholder_image(request, enhanced_prompt) except Exception as e: logger.error(f"Image generation failed: {e}") return _generate_placeholder_image(request, enhanced_prompt) except HTTPException: raise except Exception as e: logger.error(f"AI image generation error: {e}") raise HTTPException(status_code=500, detail=str(e)) def _generate_placeholder_image(request: AIImageRequest, prompt: str) -> AIImageResponse: """ Generate a placeholder image when AI generation is not available. Creates a simple SVG-based placeholder with the prompt text. """ from PIL import Image, ImageDraw, ImageFont # Create image width, height = request.width, request.height # Style-based colors style_colors = { AIImageStyle.REALISTIC: ("#2563eb", "#dbeafe"), AIImageStyle.CARTOON: ("#f97316", "#ffedd5"), AIImageStyle.SKETCH: ("#6b7280", "#f3f4f6"), AIImageStyle.CLIPART: ("#8b5cf6", "#ede9fe"), AIImageStyle.EDUCATIONAL: ("#059669", "#d1fae5"), } fg_color, bg_color = style_colors.get(request.style, ("#6366f1", "#e0e7ff")) # Create image with Pillow img = Image.new('RGB', (width, height), bg_color) draw = ImageDraw.Draw(img) # Draw border draw.rectangle([5, 5, width-6, height-6], outline=fg_color, width=3) # Draw icon (simple shapes) cx, cy = width // 2, height // 2 - 30 draw.ellipse([cx-40, cy-40, cx+40, cy+40], outline=fg_color, width=3) draw.line([cx-20, cy-10, cx+20, cy-10], fill=fg_color, width=3) draw.line([cx, cy-10, cx, cy+20], fill=fg_color, width=3) # Draw text try: font = ImageFont.truetype("/usr/share/fonts/truetype/dejavu/DejaVuSans.ttf", 14) except: font = ImageFont.load_default() # Wrap text max_chars = 40 lines = [] words = prompt[:200].split() current_line = "" for word in words: if len(current_line) + len(word) + 1 <= max_chars: current_line += (" " + word if current_line else word) else: if current_line: lines.append(current_line) current_line = word if current_line: lines.append(current_line) text_y = cy + 60 for line in lines[:4]: # Max 4 lines bbox = draw.textbbox((0, 0), line, font=font) text_width = bbox[2] - bbox[0] draw.text((cx - text_width // 2, text_y), line, fill=fg_color, font=font) text_y += 20 # Draw "AI Placeholder" badge badge_text = "KI-Bild (Platzhalter)" try: badge_font = ImageFont.truetype("/usr/share/fonts/truetype/dejavu/DejaVuSans.ttf", 10) except: badge_font = font draw.rectangle([10, height-30, 150, height-10], fill=fg_color) draw.text((15, height-27), badge_text, fill="white", font=badge_font) # Convert to base64 buffer = io.BytesIO() img.save(buffer, format='PNG') buffer.seek(0) image_base64 = f"data:image/png;base64,{base64.b64encode(buffer.getvalue()).decode('utf-8')}" return AIImageResponse( image_base64=image_base64, prompt_used=prompt, error="AI image generation not available. Using placeholder." ) # ============================================= # WORKSHEET SAVE/LOAD # ============================================= @router.post("/save", response_model=WorksheetResponse) async def save_worksheet(request: WorksheetSaveRequest): """ Save a worksheet document. - If id is provided, updates existing worksheet - If id is not provided, creates new worksheet """ try: now = datetime.now(timezone.utc).isoformat() # Generate or use existing ID worksheet_id = request.id or f"ws_{uuid.uuid4().hex[:12]}" # Build worksheet data worksheet = { "id": worksheet_id, "title": request.title, "description": request.description, "pages": [p.dict() for p in request.pages], "pageFormat": (request.pageFormat or PageFormat()).dict(), "createdAt": worksheets_db.get(worksheet_id, {}).get("createdAt", now), "updatedAt": now } # Save to in-memory storage worksheets_db[worksheet_id] = worksheet # Also persist to file filepath = os.path.join(WORKSHEET_STORAGE_DIR, f"{worksheet_id}.json") with open(filepath, 'w', encoding='utf-8') as f: json.dump(worksheet, f, ensure_ascii=False, indent=2) logger.info(f"Saved worksheet: {worksheet_id}") return WorksheetResponse(**worksheet) except Exception as e: logger.error(f"Failed to save worksheet: {e}") raise HTTPException(status_code=500, detail=f"Failed to save: {str(e)}") @router.get("/{worksheet_id}", response_model=WorksheetResponse) async def get_worksheet(worksheet_id: str): """ Load a worksheet document by ID. """ try: # Try in-memory first if worksheet_id in worksheets_db: return WorksheetResponse(**worksheets_db[worksheet_id]) # Try file storage filepath = os.path.join(WORKSHEET_STORAGE_DIR, f"{worksheet_id}.json") if os.path.exists(filepath): with open(filepath, 'r', encoding='utf-8') as f: worksheet = json.load(f) worksheets_db[worksheet_id] = worksheet # Cache it return WorksheetResponse(**worksheet) raise HTTPException(status_code=404, detail="Worksheet not found") except HTTPException: raise except Exception as e: logger.error(f"Failed to load worksheet {worksheet_id}: {e}") raise HTTPException(status_code=500, detail=f"Failed to load: {str(e)}") @router.get("/list/all") async def list_worksheets(): """ List all available worksheets. """ try: worksheets = [] # Load from file storage for filename in os.listdir(WORKSHEET_STORAGE_DIR): if filename.endswith('.json'): filepath = os.path.join(WORKSHEET_STORAGE_DIR, filename) try: with open(filepath, 'r', encoding='utf-8') as f: worksheet = json.load(f) worksheets.append({ "id": worksheet.get("id"), "title": worksheet.get("title"), "description": worksheet.get("description"), "pageCount": len(worksheet.get("pages", [])), "updatedAt": worksheet.get("updatedAt"), "createdAt": worksheet.get("createdAt") }) except Exception as e: logger.warning(f"Failed to load {filename}: {e}") # Sort by updatedAt descending worksheets.sort(key=lambda x: x.get("updatedAt", ""), reverse=True) return {"worksheets": worksheets, "total": len(worksheets)} except Exception as e: logger.error(f"Failed to list worksheets: {e}") raise HTTPException(status_code=500, detail=str(e)) @router.delete("/{worksheet_id}") async def delete_worksheet(worksheet_id: str): """ Delete a worksheet document. """ try: # Remove from memory if worksheet_id in worksheets_db: del worksheets_db[worksheet_id] # Remove file filepath = os.path.join(WORKSHEET_STORAGE_DIR, f"{worksheet_id}.json") if os.path.exists(filepath): os.remove(filepath) logger.info(f"Deleted worksheet: {worksheet_id}") return {"status": "deleted", "id": worksheet_id} raise HTTPException(status_code=404, detail="Worksheet not found") except HTTPException: raise except Exception as e: logger.error(f"Failed to delete worksheet {worksheet_id}: {e}") raise HTTPException(status_code=500, detail=str(e)) # ============================================= # PDF EXPORT # ============================================= @router.post("/{worksheet_id}/export-pdf") async def export_worksheet_pdf(worksheet_id: str): """ Export worksheet as PDF. Note: This creates a basic PDF. For full canvas rendering, the frontend should use pdf-lib with canvas.toDataURL(). """ if not REPORTLAB_AVAILABLE: raise HTTPException(status_code=501, detail="PDF export not available (reportlab not installed)") try: # Load worksheet worksheet = worksheets_db.get(worksheet_id) if not worksheet: filepath = os.path.join(WORKSHEET_STORAGE_DIR, f"{worksheet_id}.json") if os.path.exists(filepath): with open(filepath, 'r', encoding='utf-8') as f: worksheet = json.load(f) else: raise HTTPException(status_code=404, detail="Worksheet not found") # Create PDF buffer = io.BytesIO() c = canvas.Canvas(buffer, pagesize=A4) page_width, page_height = A4 for page_data in worksheet.get("pages", []): # Add title on first page if page_data.get("index", 0) == 0: c.setFont("Helvetica-Bold", 18) c.drawString(50, page_height - 50, worksheet.get("title", "Arbeitsblatt")) c.setFont("Helvetica", 10) c.drawString(50, page_height - 70, f"Erstellt: {worksheet.get('createdAt', '')[:10]}") # Parse canvas JSON and render basic elements canvas_json_str = page_data.get("canvasJSON", "{}") if canvas_json_str: try: canvas_data = json.loads(canvas_json_str) objects = canvas_data.get("objects", []) for obj in objects: obj_type = obj.get("type", "") if obj_type in ["text", "i-text", "textbox"]: # Render text text = obj.get("text", "") left = obj.get("left", 50) top = obj.get("top", 100) font_size = obj.get("fontSize", 12) # Convert from canvas coords to PDF coords pdf_x = left * 0.75 # Approximate scale pdf_y = page_height - (top * 0.75) c.setFont("Helvetica", min(font_size, 24)) c.drawString(pdf_x, pdf_y, text[:100]) elif obj_type == "rect": # Render rectangle left = obj.get("left", 0) * 0.75 top = obj.get("top", 0) * 0.75 width = obj.get("width", 50) * 0.75 height = obj.get("height", 30) * 0.75 c.rect(left, page_height - top - height, width, height) elif obj_type == "circle": # Render circle left = obj.get("left", 0) * 0.75 top = obj.get("top", 0) * 0.75 radius = obj.get("radius", 25) * 0.75 c.circle(left + radius, page_height - top - radius, radius) except json.JSONDecodeError: pass c.showPage() c.save() buffer.seek(0) filename = f"{worksheet.get('title', 'worksheet').replace(' ', '_')}.pdf" return StreamingResponse( buffer, media_type="application/pdf", headers={"Content-Disposition": f"attachment; filename={filename}"} ) except HTTPException: raise except Exception as e: logger.error(f"PDF export failed: {e}") raise HTTPException(status_code=500, detail=str(e)) # ============================================= # AI WORKSHEET MODIFICATION # ============================================= class AIModifyRequest(BaseModel): prompt: str = Field(..., min_length=3, max_length=1000) canvas_json: str model: str = "qwen2.5vl:32b" class AIModifyResponse(BaseModel): modified_canvas_json: Optional[str] = None message: str error: Optional[str] = None @router.post("/ai-modify", response_model=AIModifyResponse) async def modify_worksheet_with_ai(request: AIModifyRequest): """ Modify a worksheet using AI based on natural language prompt. Uses Ollama with qwen2.5vl:32b to understand the canvas state and generate modifications based on the user's request. """ try: logger.info(f"AI modify request: {request.prompt[:100]}...") # Parse current canvas state try: canvas_data = json.loads(request.canvas_json) except json.JSONDecodeError: return AIModifyResponse( message="Fehler beim Parsen des Canvas", error="Invalid canvas JSON" ) # Build system prompt for the AI system_prompt = """Du bist ein Assistent fuer die Bearbeitung von Arbeitsblaettern. Du erhaeltst den aktuellen Zustand eines Canvas im JSON-Format und eine Anweisung des Nutzers. Deine Aufgabe ist es, die gewuenschten Aenderungen am Canvas vorzunehmen. Der Canvas verwendet Fabric.js. Hier sind die wichtigsten Objekttypen: - i-text: Interaktiver Text mit fontFamily, fontSize, fill, left, top - rect: Rechteck mit left, top, width, height, fill, stroke, strokeWidth - circle: Kreis mit left, top, radius, fill, stroke, strokeWidth - line: Linie mit x1, y1, x2, y2, stroke, strokeWidth Das Canvas ist 794x1123 Pixel (A4 bei 96 DPI). Antworte NUR mit einem JSON-Objekt in diesem Format: { "action": "modify" oder "add" oder "delete" oder "info", "objects": [...], // Neue/modifizierte Objekte (bei modify/add) "message": "Kurze Beschreibung der Aenderung" } Wenn du Objekte hinzufuegst, generiere eindeutige IDs im Format "obj__". """ user_prompt = f"""Aktueller Canvas-Zustand: ```json {json.dumps(canvas_data, indent=2)[:5000]} ``` Nutzer-Anweisung: {request.prompt} Fuehre die Aenderung durch und antworte mit dem JSON-Objekt.""" # Call Ollama try: async with httpx.AsyncClient(timeout=120.0) as client: response = await client.post( f"{OLLAMA_URL}/api/generate", json={ "model": request.model, "prompt": user_prompt, "system": system_prompt, "stream": False, "options": { "temperature": 0.3, "num_predict": 4096 } } ) if response.status_code != 200: logger.warning(f"Ollama error: {response.status_code}, trying local fallback") # Fallback: Try to handle simple requests locally return _handle_simple_modification(request.prompt, canvas_data) result = response.json() ai_response = result.get("response", "") except httpx.ConnectError: logger.warning("Ollama not reachable") # Fallback: Try to handle simple requests locally return _handle_simple_modification(request.prompt, canvas_data) except httpx.TimeoutException: logger.warning("Ollama timeout, trying local fallback") # Fallback: Try to handle simple requests locally return _handle_simple_modification(request.prompt, canvas_data) # Parse AI response try: # Find JSON in response json_start = ai_response.find('{') json_end = ai_response.rfind('}') + 1 if json_start == -1 or json_end <= json_start: logger.warning(f"No JSON found in AI response: {ai_response[:200]}") return AIModifyResponse( message="KI konnte die Anfrage nicht verarbeiten", error="No JSON in response" ) ai_json = json.loads(ai_response[json_start:json_end]) action = ai_json.get("action", "info") message = ai_json.get("message", "Aenderungen angewendet") new_objects = ai_json.get("objects", []) if action == "info": return AIModifyResponse(message=message) if action == "add" and new_objects: # Add new objects to canvas existing_objects = canvas_data.get("objects", []) existing_objects.extend(new_objects) canvas_data["objects"] = existing_objects return AIModifyResponse( modified_canvas_json=json.dumps(canvas_data), message=message ) if action == "modify" and new_objects: # Replace matching objects or add new ones existing_objects = canvas_data.get("objects", []) new_ids = {obj.get("id") for obj in new_objects if obj.get("id")} # Keep objects that aren't being modified kept_objects = [obj for obj in existing_objects if obj.get("id") not in new_ids] kept_objects.extend(new_objects) canvas_data["objects"] = kept_objects return AIModifyResponse( modified_canvas_json=json.dumps(canvas_data), message=message ) if action == "delete": # Delete objects by ID delete_ids = ai_json.get("delete_ids", []) if delete_ids: existing_objects = canvas_data.get("objects", []) canvas_data["objects"] = [obj for obj in existing_objects if obj.get("id") not in delete_ids] return AIModifyResponse( modified_canvas_json=json.dumps(canvas_data), message=message ) return AIModifyResponse(message=message) except json.JSONDecodeError as e: logger.error(f"Failed to parse AI JSON: {e}") return AIModifyResponse( message="Fehler beim Verarbeiten der KI-Antwort", error=str(e) ) except Exception as e: logger.error(f"AI modify error: {e}") return AIModifyResponse( message="Ein unerwarteter Fehler ist aufgetreten", error=str(e) ) def _handle_simple_modification(prompt: str, canvas_data: dict) -> AIModifyResponse: """ Handle simple modifications locally when Ollama is not available. Supports basic commands like adding headings, lines, etc. """ import time import random prompt_lower = prompt.lower() objects = canvas_data.get("objects", []) def generate_id(): return f"obj_{int(time.time()*1000)}_{random.randint(1000, 9999)}" # Add heading if "ueberschrift" in prompt_lower or "titel" in prompt_lower or "heading" in prompt_lower: # Extract text if provided in quotes import re text_match = re.search(r'"([^"]+)"', prompt) text = text_match.group(1) if text_match else "Ueberschrift" new_text = { "type": "i-text", "id": generate_id(), "text": text, "left": 397, # Center of A4 "top": 50, "originX": "center", "fontFamily": "Arial", "fontSize": 28, "fontWeight": "bold", "fill": "#000000" } objects.append(new_text) canvas_data["objects"] = objects return AIModifyResponse( modified_canvas_json=json.dumps(canvas_data), message=f"Ueberschrift '{text}' hinzugefuegt" ) # Add lines for writing if "linie" in prompt_lower or "line" in prompt_lower or "schreib" in prompt_lower: # Count how many lines import re num_match = re.search(r'(\d+)', prompt) num_lines = int(num_match.group(1)) if num_match else 5 num_lines = min(num_lines, 20) # Max 20 lines start_y = 150 line_spacing = 40 for i in range(num_lines): new_line = { "type": "line", "id": generate_id(), "x1": 60, "y1": start_y + i * line_spacing, "x2": 734, "y2": start_y + i * line_spacing, "stroke": "#cccccc", "strokeWidth": 1 } objects.append(new_line) canvas_data["objects"] = objects return AIModifyResponse( modified_canvas_json=json.dumps(canvas_data), message=f"{num_lines} Schreiblinien hinzugefuegt" ) # Make text bigger if "groesser" in prompt_lower or "bigger" in prompt_lower or "larger" in prompt_lower: modified = 0 for obj in objects: if obj.get("type") in ["i-text", "text", "textbox"]: current_size = obj.get("fontSize", 16) obj["fontSize"] = int(current_size * 1.25) modified += 1 canvas_data["objects"] = objects if modified > 0: return AIModifyResponse( modified_canvas_json=json.dumps(canvas_data), message=f"{modified} Texte vergroessert" ) # Center elements if "zentrier" in prompt_lower or "center" in prompt_lower or "mitte" in prompt_lower: center_x = 397 for obj in objects: if not obj.get("isGrid"): obj["left"] = center_x obj["originX"] = "center" canvas_data["objects"] = objects return AIModifyResponse( modified_canvas_json=json.dumps(canvas_data), message="Elemente zentriert" ) # Add numbering if "nummer" in prompt_lower or "nummerier" in prompt_lower or "1-10" in prompt_lower: import re range_match = re.search(r'(\d+)\s*[-bis]+\s*(\d+)', prompt) if range_match: start, end = int(range_match.group(1)), int(range_match.group(2)) else: start, end = 1, 10 y = 100 for i in range(start, min(end + 1, start + 20)): new_text = { "type": "i-text", "id": generate_id(), "text": f"{i}.", "left": 40, "top": y, "fontFamily": "Arial", "fontSize": 14, "fill": "#000000" } objects.append(new_text) y += 35 canvas_data["objects"] = objects return AIModifyResponse( modified_canvas_json=json.dumps(canvas_data), message=f"Nummerierung {start}-{end} hinzugefuegt" ) # Add rectangle/box if "rechteck" in prompt_lower or "box" in prompt_lower or "kasten" in prompt_lower: new_rect = { "type": "rect", "id": generate_id(), "left": 100, "top": 200, "width": 200, "height": 100, "fill": "transparent", "stroke": "#000000", "strokeWidth": 2 } objects.append(new_rect) canvas_data["objects"] = objects return AIModifyResponse( modified_canvas_json=json.dumps(canvas_data), message="Rechteck hinzugefuegt" ) # Add grid/raster if "raster" in prompt_lower or "grid" in prompt_lower or "tabelle" in prompt_lower: import re # Parse dimensions like "3x4", "3/4", "3 mal 4", "3 by 4" dim_match = re.search(r'(\d+)\s*[x/×\*mal by]\s*(\d+)', prompt_lower) if dim_match: cols = int(dim_match.group(1)) rows = int(dim_match.group(2)) else: # Try single numbers nums = re.findall(r'(\d+)', prompt) if len(nums) >= 2: cols, rows = int(nums[0]), int(nums[1]) else: cols, rows = 3, 4 # Default grid # Limit grid size cols = min(max(1, cols), 10) rows = min(max(1, rows), 15) # Canvas dimensions (A4 at 96 DPI) canvas_width = 794 canvas_height = 1123 # Grid positioning margin = 60 available_width = canvas_width - 2 * margin available_height = canvas_height - 2 * margin - 80 # Leave space for header cell_width = available_width / cols cell_height = min(available_height / rows, 80) # Max cell height start_x = margin start_y = 120 # Below potential header # Create grid lines grid_objects = [] # Horizontal lines for r in range(rows + 1): y = start_y + r * cell_height grid_objects.append({ "type": "line", "id": generate_id(), "x1": start_x, "y1": y, "x2": start_x + cols * cell_width, "y2": y, "stroke": "#666666", "strokeWidth": 1, "isGrid": True }) # Vertical lines for c in range(cols + 1): x = start_x + c * cell_width grid_objects.append({ "type": "line", "id": generate_id(), "x1": x, "y1": start_y, "x2": x, "y2": start_y + rows * cell_height, "stroke": "#666666", "strokeWidth": 1, "isGrid": True }) objects.extend(grid_objects) canvas_data["objects"] = objects return AIModifyResponse( modified_canvas_json=json.dumps(canvas_data), message=f"{cols}x{rows} Raster hinzugefuegt ({cols} Spalten, {rows} Zeilen)" ) # Default: Ollama needed return AIModifyResponse( message="Diese Aenderung erfordert den KI-Service. Bitte stellen Sie sicher, dass Ollama laeuft.", error="Complex modification requires Ollama" ) # ============================================= # HEALTH CHECK # ============================================= @router.get("/health/check") async def health_check(): """ Check worksheet editor API health and dependencies. """ status = { "status": "healthy", "ollama": False, "storage": os.path.exists(WORKSHEET_STORAGE_DIR), "reportlab": REPORTLAB_AVAILABLE, "worksheets_count": len(worksheets_db) } # Check Ollama try: async with httpx.AsyncClient(timeout=5.0) as client: response = await client.get(f"{OLLAMA_URL}/api/tags") status["ollama"] = response.status_code == 200 except: pass return status # ============================================= # DOCUMENT RECONSTRUCTION FROM VOCAB SESSION # ============================================= class ReconstructRequest(BaseModel): session_id: str page_number: int = 1 include_images: bool = True regenerate_graphics: bool = False class ReconstructResponse(BaseModel): canvas_json: str page_width: int page_height: int elements_count: int vocabulary_matched: int message: str error: Optional[str] = None @router.post("/reconstruct-from-session", response_model=ReconstructResponse) async def reconstruct_document_from_session(request: ReconstructRequest): """ Reconstruct a document from a vocab session into Fabric.js canvas format. This endpoint: 1. Loads the original PDF from the vocab session 2. Runs OCR with position tracking 3. Uses vision LLM to understand layout (headers, images, columns) 4. Creates Fabric.js canvas JSON with positioned elements 5. Maps extracted vocabulary to their positions Returns canvas JSON ready to load into the worksheet editor. """ try: # Import vocab session storage from vocab_worksheet_api import _sessions, convert_pdf_page_to_image # Check if session exists if request.session_id not in _sessions: raise HTTPException(status_code=404, detail=f"Session {request.session_id} not found") session = _sessions[request.session_id] # Check if PDF data exists if not session.get("pdf_data"): raise HTTPException(status_code=400, detail="Session has no PDF data") pdf_data = session["pdf_data"] page_count = session.get("pdf_page_count", 1) if request.page_number < 1 or request.page_number > page_count: raise HTTPException( status_code=400, detail=f"Page {request.page_number} not found. PDF has {page_count} pages." ) # Get extracted vocabulary for this page vocabulary = session.get("vocabulary", []) page_vocab = [v for v in vocabulary if v.get("source_page") == request.page_number] logger.info(f"Reconstructing page {request.page_number} from session {request.session_id}") logger.info(f"Found {len(page_vocab)} vocabulary items for this page") # Convert PDF page to image (async function) image_bytes = await convert_pdf_page_to_image(pdf_data, request.page_number) if not image_bytes: raise HTTPException(status_code=500, detail="Failed to convert PDF page to image") # Get image dimensions from PIL import Image img = Image.open(io.BytesIO(image_bytes)) img_width, img_height = img.size # Run OCR with positions from hybrid_vocab_extractor import run_paddle_ocr, OCRRegion ocr_regions, raw_text = run_paddle_ocr(image_bytes) logger.info(f"OCR found {len(ocr_regions)} text regions") # Scale factor: Convert image pixels to A4 canvas pixels (794x1123) A4_WIDTH = 794 A4_HEIGHT = 1123 scale_x = A4_WIDTH / img_width scale_y = A4_HEIGHT / img_height # Build Fabric.js objects fabric_objects = [] # 1. Add white background fabric_objects.append({ "type": "rect", "left": 0, "top": 0, "width": A4_WIDTH, "height": A4_HEIGHT, "fill": "#ffffff", "selectable": False, "evented": False, "isBackground": True }) # 2. Group OCR regions by Y-coordinate to detect rows sorted_regions = sorted(ocr_regions, key=lambda r: (r.y1, r.x1)) # 3. Detect headers (larger text at top) headers = [] body_regions = [] for region in sorted_regions: height = region.y2 - region.y1 # Headers are typically taller and near the top if region.y1 < img_height * 0.15 and height > 30: headers.append(region) else: body_regions.append(region) # 4. Create text objects for each region vocab_matched = 0 for region in sorted_regions: # Scale positions to A4 left = int(region.x1 * scale_x) top = int(region.y1 * scale_y) # Determine if this is a header is_header = region in headers # Determine font size based on region height region_height = region.y2 - region.y1 base_font_size = max(10, min(32, int(region_height * scale_y * 0.8))) if is_header: base_font_size = max(base_font_size, 24) # Check if this text matches vocabulary is_vocab = False vocab_match = None for v in page_vocab: if v.get("english", "").lower() in region.text.lower() or \ v.get("german", "").lower() in region.text.lower(): is_vocab = True vocab_match = v vocab_matched += 1 break # Create Fabric.js text object text_obj = { "type": "i-text", "id": f"text_{uuid.uuid4().hex[:8]}", "left": left, "top": top, "text": region.text, "fontFamily": "Arial", "fontSize": base_font_size, "fontWeight": "bold" if is_header else "normal", "fill": "#000000", "originX": "left", "originY": "top", } # Add metadata for vocabulary items if is_vocab and vocab_match: text_obj["isVocabulary"] = True text_obj["vocabularyId"] = vocab_match.get("id") text_obj["english"] = vocab_match.get("english") text_obj["german"] = vocab_match.get("german") fabric_objects.append(text_obj) # 5. If include_images, try to detect and extract image regions if request.include_images: image_regions = await _detect_image_regions(image_bytes, ocr_regions, img_width, img_height) for i, img_region in enumerate(image_regions): # Extract image region from original img_x1 = int(img_region["x1"]) img_y1 = int(img_region["y1"]) img_x2 = int(img_region["x2"]) img_y2 = int(img_region["y2"]) # Crop the region cropped = img.crop((img_x1, img_y1, img_x2, img_y2)) # Convert to base64 buffer = io.BytesIO() cropped.save(buffer, format='PNG') buffer.seek(0) img_base64 = f"data:image/png;base64,{base64.b64encode(buffer.getvalue()).decode('utf-8')}" # Create Fabric.js image object fabric_objects.append({ "type": "image", "id": f"img_{uuid.uuid4().hex[:8]}", "left": int(img_x1 * scale_x), "top": int(img_y1 * scale_y), "width": int((img_x2 - img_x1) * scale_x), "height": int((img_y2 - img_y1) * scale_y), "src": img_base64, "scaleX": 1, "scaleY": 1, }) # Build canvas JSON canvas_data = { "version": "6.0.0", "objects": fabric_objects, "background": "#ffffff" } return ReconstructResponse( canvas_json=json.dumps(canvas_data), page_width=A4_WIDTH, page_height=A4_HEIGHT, elements_count=len(fabric_objects), vocabulary_matched=vocab_matched, message=f"Reconstructed page {request.page_number} with {len(fabric_objects)} elements, {vocab_matched} vocabulary items matched" ) except HTTPException: raise except Exception as e: logger.error(f"Document reconstruction failed: {e}") import traceback logger.error(traceback.format_exc()) raise HTTPException(status_code=500, detail=str(e)) async def _detect_image_regions( image_bytes: bytes, ocr_regions: list, img_width: int, img_height: int ) -> List[Dict]: """ Detect image/graphic regions in the document. Uses a simple approach: 1. Find large gaps between text regions (potential image areas) 2. Use edge detection to find bounded regions 3. Filter out text areas """ from PIL import Image import numpy as np try: img = Image.open(io.BytesIO(image_bytes)) img_array = np.array(img.convert('L')) # Grayscale # Create a mask of text regions text_mask = np.ones_like(img_array, dtype=bool) for region in ocr_regions: x1 = max(0, region.x1 - 5) y1 = max(0, region.y1 - 5) x2 = min(img_width, region.x2 + 5) y2 = min(img_height, region.y2 + 5) text_mask[y1:y2, x1:x2] = False # Find contours in non-text areas # Simple approach: look for rectangular regions with significant content image_regions = [] # Use edge detection import cv2 edges = cv2.Canny(img_array, 50, 150) # Apply text mask edges[~text_mask] = 0 # Find contours contours, _ = cv2.findContours(edges, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) for contour in contours: x, y, w, h = cv2.boundingRect(contour) # Filter: minimum size for images (at least 50x50 pixels) if w > 50 and h > 50: # Filter: not too large (not the whole page) if w < img_width * 0.9 and h < img_height * 0.9: # Check if this region has actual content (not just edges) region_content = img_array[y:y+h, x:x+w] variance = np.var(region_content) if variance > 500: # Has enough visual content image_regions.append({ "x1": x, "y1": y, "x2": x + w, "y2": y + h }) # Remove overlapping regions (keep larger ones) filtered_regions = [] for region in sorted(image_regions, key=lambda r: (r["x2"]-r["x1"])*(r["y2"]-r["y1"]), reverse=True): overlaps = False for existing in filtered_regions: # Check overlap if not (region["x2"] < existing["x1"] or region["x1"] > existing["x2"] or region["y2"] < existing["y1"] or region["y1"] > existing["y2"]): overlaps = True break if not overlaps: filtered_regions.append(region) logger.info(f"Detected {len(filtered_regions)} image regions") return filtered_regions[:10] # Limit to 10 images max except Exception as e: logger.warning(f"Image region detection failed: {e}") return [] @router.get("/sessions/available") async def get_available_sessions(): """ Get list of available vocab sessions that can be reconstructed. """ try: from vocab_worksheet_api import _sessions available = [] for session_id, session in _sessions.items(): if session.get("pdf_data"): # Only sessions with PDF available.append({ "id": session_id, "name": session.get("name", "Unnamed"), "description": session.get("description"), "vocabulary_count": len(session.get("vocabulary", [])), "page_count": session.get("pdf_page_count", 1), "status": session.get("status", "unknown"), "created_at": session.get("created_at", "").isoformat() if session.get("created_at") else None }) return {"sessions": available, "total": len(available)} except Exception as e: logger.error(f"Failed to list sessions: {e}") raise HTTPException(status_code=500, detail=str(e))