fix: Restore all files lost during destructive rebase

A previous `git pull --rebase origin main` dropped 177 local commits, losing 3400+ files across admin-v2, backend, studio-v2, website, klausur-service, and many other services. The partial restore attempt (660295e2) only recovered some files. This commit restores all missing files from pre-rebase ref 98933f5e while preserving post-rebase additions (night-scheduler, night-mode UI, NightModeWidget dashboard integration). Restored features include: - AI Module Sidebar (FAB), OCR Labeling, OCR Compare - GPU Dashboard, RAG Pipeline, Magic Help - Klausur-Korrektur (8 files), Abitur-Archiv (5+ files) - Companion, Zeugnisse-Crawler, Screen Flow - Full backend, studio-v2, website, klausur-service - All compliance SDKs, agent-core, voice-service - CI/CD configs, documentation, scripts Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-09 09:51:32 +01:00
parent f7487ee240
commit 21a844cb8a
1986 changed files with 744143 additions and 1731 deletions
@@ -0,0 +1,16 @@
+"""
+Services Package
+AI Content Generation Services
+"""
+
+from .claude_service import ClaudeService
+from .youtube_service import YouTubeService
+from .material_analyzer import MaterialAnalyzer
+from .content_generator import ContentGenerator
+
+__all__ = [
+    "ClaudeService",
+    "YouTubeService",
+    "MaterialAnalyzer",
+    "ContentGenerator"
+]
@@ -0,0 +1,364 @@
+"""
+Claude Service
+Integration mit Claude API für Content-Generierung
+"""
+
+import os
+from typing import List, Dict, Any, Optional
+from anthropic import Anthropic
+
+
+class ClaudeService:
+    """Claude API Service"""
+
+    def __init__(self):
+        self.api_key = os.getenv("ANTHROPIC_API_KEY")
+        self.client = Anthropic(api_key=self.api_key) if self.api_key else None
+        self.model = "claude-sonnet-4-5-20251101"  # Latest model
+
+    def is_configured(self) -> bool:
+        """Check if API key is configured"""
+        return self.client is not None
+
+    async def generate_content(
+        self,
+        prompt: str,
+        system_prompt: Optional[str] = None,
+        max_tokens: int = 4000,
+        temperature: float = 1.0
+    ) -> str:
+        """
+        Generate content with Claude
+
+        Args:
+            prompt: User prompt
+            system_prompt: System prompt (optional)
+            max_tokens: Maximum tokens to generate
+            temperature: Sampling temperature
+
+        Returns:
+            Generated text
+        """
+        if not self.client:
+            raise ValueError("Claude API not configured. Set ANTHROPIC_API_KEY environment variable.")
+
+        messages = [{"role": "user", "content": prompt}]
+
+        kwargs = {
+            "model": self.model,
+            "max_tokens": max_tokens,
+            "temperature": temperature,
+            "messages": messages
+        }
+
+        if system_prompt:
+            kwargs["system"] = system_prompt
+
+        response = self.client.messages.create(**kwargs)
+        return response.content[0].text
+
+    async def generate_quiz_questions(
+        self,
+        topic: str,
+        materials: List[Dict[str, Any]],
+        target_grade: str,
+        num_questions: int = 10
+    ) -> List[Dict[str, Any]]:
+        """Generate Quiz questions"""
+
+        material_text = self._format_materials(materials)
+
+        prompt = f"""Erstelle {num_questions} Multiple-Choice-Fragen zum Thema "{topic}" für Klassenstufe {target_grade}.
+
+Materialien:
+{material_text}
+
+Erstelle Fragen die:
+1. Das Verständnis testen
+2. Auf den Materialien basieren
+3. Altersgerecht sind
+4. 4 Antwortmöglichkeiten haben (1 richtig, 3 falsch)
+
+Formatiere die Ausgabe als JSON-Array:
+[
+  {{
+    "question": "Frage text?",
+    "options": ["Option A", "Option B", "Option C", "Option D"],
+    "correct_answer": 0,
+    "explanation": "Erklärung warum die Antwort richtig ist"
+  }}
+]
+
+Nur das JSON-Array zurückgeben, keine zusätzlichen Texte."""
+
+        response = await self.generate_content(
+            prompt=prompt,
+            system_prompt="Du bist ein pädagogischer Experte der Quizfragen erstellt."
+        )
+
+        # Parse JSON
+        import json
+        try:
+            questions = json.loads(response)
+            return questions
+        except json.JSONDecodeError:
+            # Try to extract JSON from response
+            import re
+            json_match = re.search(r'\[.*\]', response, re.DOTALL)
+            if json_match:
+                questions = json.loads(json_match.group())
+                return questions
+            raise ValueError("Could not parse quiz questions from Claude response")
+
+    async def generate_flashcards(
+        self,
+        topic: str,
+        materials: List[Dict[str, Any]],
+        target_grade: str,
+        num_cards: int = 15
+    ) -> List[Dict[str, str]]:
+        """Generate Flashcards"""
+
+        material_text = self._format_materials(materials)
+
+        prompt = f"""Erstelle {num_cards} Lernkarten (Flashcards) zum Thema "{topic}" für Klassenstufe {target_grade}.
+
+Materialien:
+{material_text}
+
+Erstelle Karten die:
+1. Wichtige Begriffe und Konzepte abdecken
+2. Kurz und prägnant sind
+3. Zum Wiederholen geeignet sind
+
+Formatiere die Ausgabe als JSON-Array:
+[
+  {{
+    "front": "Begriff oder Frage",
+    "back": "Definition oder Antwort"
+  }}
+]
+
+Nur das JSON-Array zurückgeben."""
+
+        response = await self.generate_content(
+            prompt=prompt,
+            system_prompt="Du bist ein Experte für Lernkarten-Design."
+        )
+
+        import json
+        import re
+        json_match = re.search(r'\[.*\]', response, re.DOTALL)
+        if json_match:
+            return json.loads(json_match.group())
+        return json.loads(response)
+
+    async def generate_fill_blanks_text(
+        self,
+        topic: str,
+        materials: List[Dict[str, Any]],
+        target_grade: str
+    ) -> Dict[str, Any]:
+        """Generate Fill-in-the-Blanks exercise"""
+
+        material_text = self._format_materials(materials)
+
+        prompt = f"""Erstelle einen Lückentext zum Thema "{topic}" für Klassenstufe {target_grade}.
+
+Materialien:
+{material_text}
+
+Erstelle einen Text mit 10-15 Lücken. Markiere Lücken mit *Wort*.
+
+Formatiere als JSON:
+{{
+  "title": "Titel des Lückentexts",
+  "text": "Der Text mit *Lücken* markiert...",
+  "hints": "Hilfreiche Hinweise"
+}}
+
+Nur JSON zurückgeben."""
+
+        response = await self.generate_content(
+            prompt=prompt,
+            system_prompt="Du bist ein Experte für Lückentexte."
+        )
+
+        import json
+        import re
+        json_match = re.search(r'\{.*\}', response, re.DOTALL)
+        if json_match:
+            return json.loads(json_match.group())
+        return json.loads(response)
+
+    async def generate_drag_drop_exercise(
+        self,
+        topic: str,
+        materials: List[Dict[str, Any]],
+        target_grade: str
+    ) -> Dict[str, Any]:
+        """Generate Drag-and-Drop exercise"""
+
+        material_text = self._format_materials(materials)
+
+        prompt = f"""Erstelle eine Drag-and-Drop Zuordnungsaufgabe zum Thema "{topic}" für Klassenstufe {target_grade}.
+
+Materialien:
+{material_text}
+
+Erstelle 3-4 Kategorien (Zonen) und 8-12 Elemente zum Zuordnen.
+
+Formatiere als JSON:
+{{
+  "title": "Titel der Aufgabe",
+  "question": "Aufgabenstellung",
+  "zones": [
+    {{ "id": 1, "name": "Kategorie 1" }},
+    {{ "id": 2, "name": "Kategorie 2" }}
+  ],
+  "draggables": [
+    {{ "id": 1, "text": "Element 1", "correctZoneId": 1 }},
+    {{ "id": 2, "text": "Element 2", "correctZoneId": 2 }}
+  ]
+}}
+
+Nur JSON zurückgeben."""
+
+        response = await self.generate_content(
+            prompt=prompt,
+            system_prompt="Du bist ein Experte für interaktive Lernaufgaben."
+        )
+
+        import json
+        import re
+        json_match = re.search(r'\{.*\}', response, re.DOTALL)
+        if json_match:
+            return json.loads(json_match.group())
+        return json.loads(response)
+
+    async def generate_memory_pairs(
+        self,
+        topic: str,
+        materials: List[Dict[str, Any]],
+        target_grade: str,
+        num_pairs: int = 8
+    ) -> List[Dict[str, str]]:
+        """Generate Memory Game pairs"""
+
+        material_text = self._format_materials(materials)
+
+        prompt = f"""Erstelle {num_pairs} Memory-Paare zum Thema "{topic}" für Klassenstufe {target_grade}.
+
+Materialien:
+{material_text}
+
+Jedes Paar besteht aus zwei zusammengehörigen Begriffen/Konzepten.
+
+Formatiere als JSON-Array:
+[
+  {{ "card1": "Begriff 1", "card2": "Zugehöriger Begriff" }}
+]
+
+Nur JSON zurückgeben."""
+
+        response = await self.generate_content(
+            prompt=prompt,
+            system_prompt="Du bist ein Experte für Memory-Spiele."
+        )
+
+        import json
+        import re
+        json_match = re.search(r'\[.*\]', response, re.DOTALL)
+        if json_match:
+            return json.loads(json_match.group())
+        return json.loads(response)
+
+    async def generate_timeline_events(
+        self,
+        topic: str,
+        materials: List[Dict[str, Any]],
+        target_grade: str
+    ) -> List[Dict[str, Any]]:
+        """Generate Timeline events"""
+
+        material_text = self._format_materials(materials)
+
+        prompt = f"""Erstelle eine Timeline mit 5-8 Ereignissen zum Thema "{topic}" für Klassenstufe {target_grade}.
+
+Materialien:
+{material_text}
+
+Formatiere als JSON-Array:
+[
+  {{
+    "year": "Jahr oder Zeitpunkt",
+    "title": "Ereignis Titel",
+    "description": "Kurze Beschreibung"
+  }}
+]
+
+Nur JSON zurückgeben."""
+
+        response = await self.generate_content(
+            prompt=prompt,
+            system_prompt="Du bist ein Experte für chronologische Darstellungen."
+        )
+
+        import json
+        import re
+        json_match = re.search(r'\[.*\]', response, re.DOTALL)
+        if json_match:
+            return json.loads(json_match.group())
+        return json.loads(response)
+
+    async def generate_presentation_slides(
+        self,
+        topic: str,
+        materials: List[Dict[str, Any]],
+        target_grade: str,
+        num_slides: int = 5
+    ) -> List[Dict[str, str]]:
+        """Generate Presentation slides"""
+
+        material_text = self._format_materials(materials)
+
+        prompt = f"""Erstelle {num_slides} Präsentationsfolien zum Thema "{topic}" für Klassenstufe {target_grade}.
+
+Materialien:
+{material_text}
+
+Formatiere als JSON-Array:
+[
+  {{
+    "title": "Folien Titel",
+    "content": "Folien Inhalt (2-4 Sätze)",
+    "backgroundColor": "#ffffff"
+  }}
+]
+
+Nur JSON zurückgeben."""
+
+        response = await self.generate_content(
+            prompt=prompt,
+            system_prompt="Du bist ein Experte für Präsentationen."
+        )
+
+        import json
+        import re
+        json_match = re.search(r'\[.*\]', response, re.DOTALL)
+        if json_match:
+            return json.loads(json_match.group())
+        return json.loads(response)
+
+    def _format_materials(self, materials: List[Dict[str, Any]]) -> str:
+        """Format materials for prompt"""
+        if not materials:
+            return "Keine Materialien vorhanden."
+
+        formatted = []
+        for i, material in enumerate(materials, 1):
+            formatted.append(f"Material {i} ({material.get('type', 'unknown')}):")
+            formatted.append(material.get('content', '')[:2000])  # Limit content
+            formatted.append("")
+
+        return "\n".join(formatted)
@@ -0,0 +1,341 @@
+"""
+Content Generator
+Orchestriert die Generierung aller 8 H5P Content-Typen
+"""
+
+from typing import List, Dict, Any, Optional
+from datetime import datetime
+import json
+
+
+class ContentGenerator:
+    """H5P Content Generator - Orchestrator"""
+
+    def __init__(self, claude_service, youtube_service):
+        self.claude = claude_service
+        self.youtube = youtube_service
+
+    async def generate_all_content_types(
+        self,
+        topic: str,
+        description: Optional[str],
+        target_grade: str,
+        materials: List[Dict[str, Any]],
+        videos: List[Dict[str, Any]]
+    ) -> Dict[str, Any]:
+        """
+        Generate all 8 H5P content types
+
+        Returns:
+            Dictionary with all generated content
+        """
+
+        result = {
+            "topic": topic,
+            "description": description,
+            "target_grade": target_grade,
+            "generated_at": datetime.utcnow().isoformat(),
+            "content_types": {}
+        }
+
+        # 1. Quiz
+        try:
+            quiz_data = await self._generate_quiz(topic, description, target_grade, materials)
+            result["content_types"]["quiz"] = quiz_data
+        except Exception as e:
+            result["content_types"]["quiz"] = {"error": str(e)}
+
+        # 2. Interactive Video
+        try:
+            video_data = await self._generate_interactive_video(topic, description, target_grade, materials, videos)
+            result["content_types"]["interactive_video"] = video_data
+        except Exception as e:
+            result["content_types"]["interactive_video"] = {"error": str(e)}
+
+        # 3. Course Presentation
+        try:
+            presentation_data = await self._generate_presentation(topic, description, target_grade, materials)
+            result["content_types"]["course_presentation"] = presentation_data
+        except Exception as e:
+            result["content_types"]["course_presentation"] = {"error": str(e)}
+
+        # 4. Flashcards
+        try:
+            flashcards_data = await self._generate_flashcards(topic, description, target_grade, materials)
+            result["content_types"]["flashcards"] = flashcards_data
+        except Exception as e:
+            result["content_types"]["flashcards"] = {"error": str(e)}
+
+        # 5. Timeline
+        try:
+            timeline_data = await self._generate_timeline(topic, description, target_grade, materials)
+            result["content_types"]["timeline"] = timeline_data
+        except Exception as e:
+            result["content_types"]["timeline"] = {"error": str(e)}
+
+        # 6. Drag and Drop
+        try:
+            dragdrop_data = await self._generate_drag_drop(topic, description, target_grade, materials)
+            result["content_types"]["drag_drop"] = dragdrop_data
+        except Exception as e:
+            result["content_types"]["drag_drop"] = {"error": str(e)}
+
+        # 7. Fill in the Blanks
+        try:
+            fillblanks_data = await self._generate_fill_blanks(topic, description, target_grade, materials)
+            result["content_types"]["fill_blanks"] = fillblanks_data
+        except Exception as e:
+            result["content_types"]["fill_blanks"] = {"error": str(e)}
+
+        # 8. Memory Game
+        try:
+            memory_data = await self._generate_memory(topic, description, target_grade, materials)
+            result["content_types"]["memory"] = memory_data
+        except Exception as e:
+            result["content_types"]["memory"] = {"error": str(e)}
+
+        return result
+
+    async def _generate_quiz(
+        self,
+        topic: str,
+        description: Optional[str],
+        target_grade: str,
+        materials: List[Dict[str, Any]]
+    ) -> Dict[str, Any]:
+        """Generate Quiz content"""
+
+        questions = await self.claude.generate_quiz_questions(
+            topic=topic,
+            materials=materials,
+            target_grade=target_grade,
+            num_questions=10
+        )
+
+        return {
+            "type": "quiz",
+            "title": f"Quiz: {topic}",
+            "description": description or f"Teste dein Wissen über {topic}",
+            "questions": questions
+        }
+
+    async def _generate_interactive_video(
+        self,
+        topic: str,
+        description: Optional[str],
+        target_grade: str,
+        materials: List[Dict[str, Any]],
+        videos: List[Dict[str, Any]]
+    ) -> Dict[str, Any]:
+        """Generate Interactive Video content"""
+
+        # Wähle bestes Video (falls vorhanden)
+        if not videos or len(videos) == 0:
+            return {
+                "error": "Keine Videos gefunden",
+                "note": "Lehrer muss manuell Video-URL eingeben"
+            }
+
+        best_video = videos[0]  # Nimm erstes Video
+
+        # Hole Transkript
+        video_id = best_video.get("video_id")
+        if not video_id or video_id == "EXAMPLE_VIDEO_ID":
+            # Fallback: Generiere generische Interaktionen
+            return {
+                "type": "interactive-video",
+                "title": f"Interaktives Video: {topic}",
+                "videoUrl": "https://www.youtube.com/watch?v=EXAMPLE",
+                "description": description or f"Lerne über {topic} mit diesem interaktiven Video",
+                "interactions": [
+                    {
+                        "time": "01:00",
+                        "type": "question",
+                        "title": "Verständnisfrage",
+                        "content": f"Was ist das Hauptthema dieses Videos über {topic}?"
+                    },
+                    {
+                        "time": "03:00",
+                        "type": "info",
+                        "title": "Wichtiger Hinweis",
+                        "content": "Achte auf die wichtigsten Konzepte, die jetzt erklärt werden."
+                    }
+                ],
+                "note": "Generische Interaktionen - Lehrer sollte echte Video-URL eingeben"
+            }
+
+        # Echtes Video mit Transkript
+        transcript_data = await self.youtube.get_video_transcript(video_id)
+
+        if transcript_data:
+            # Generate interactions using Claude
+            interactions = await self.youtube.generate_video_interactions_with_claude(
+                video_id=video_id,
+                topic=topic,
+                transcript_data=transcript_data["transcript"],
+                claude_service=self.claude,
+                num_interactions=5
+            )
+        else:
+            # Fallback ohne Transkript
+            interactions = []
+
+        return {
+            "type": "interactive-video",
+            "title": best_video.get("title", f"Video: {topic}"),
+            "videoUrl": best_video.get("url"),
+            "description": description or f"Interaktives Video über {topic}",
+            "interactions": interactions
+        }
+
+    async def _generate_presentation(
+        self,
+        topic: str,
+        description: Optional[str],
+        target_grade: str,
+        materials: List[Dict[str, Any]]
+    ) -> Dict[str, Any]:
+        """Generate Course Presentation content"""
+
+        slides = await self.claude.generate_presentation_slides(
+            topic=topic,
+            materials=materials,
+            target_grade=target_grade,
+            num_slides=6
+        )
+
+        # Add IDs to slides
+        for i, slide in enumerate(slides, 1):
+            slide["id"] = i
+
+        return {
+            "type": "course-presentation",
+            "title": f"Präsentation: {topic}",
+            "description": description or f"Lerne alles über {topic}",
+            "slides": slides
+        }
+
+    async def _generate_flashcards(
+        self,
+        topic: str,
+        description: Optional[str],
+        target_grade: str,
+        materials: List[Dict[str, Any]]
+    ) -> Dict[str, Any]:
+        """Generate Flashcards content"""
+
+        cards = await self.claude.generate_flashcards(
+            topic=topic,
+            materials=materials,
+            target_grade=target_grade,
+            num_cards=15
+        )
+
+        # Add IDs to cards
+        for i, card in enumerate(cards, 1):
+            card["id"] = i
+
+        return {
+            "type": "flashcards",
+            "title": f"Lernkarten: {topic}",
+            "description": description or f"Wiederhole wichtige Begriffe zu {topic}",
+            "cards": cards
+        }
+
+    async def _generate_timeline(
+        self,
+        topic: str,
+        description: Optional[str],
+        target_grade: str,
+        materials: List[Dict[str, Any]]
+    ) -> Dict[str, Any]:
+        """Generate Timeline content"""
+
+        events = await self.claude.generate_timeline_events(
+            topic=topic,
+            materials=materials,
+            target_grade=target_grade
+        )
+
+        # Add IDs to events
+        for i, event in enumerate(events, 1):
+            event["id"] = i
+
+        return {
+            "type": "timeline",
+            "title": f"Zeitleiste: {topic}",
+            "description": description or f"Chronologie von {topic}",
+            "events": events
+        }
+
+    async def _generate_drag_drop(
+        self,
+        topic: str,
+        description: Optional[str],
+        target_grade: str,
+        materials: List[Dict[str, Any]]
+    ) -> Dict[str, Any]:
+        """Generate Drag and Drop content"""
+
+        exercise = await self.claude.generate_drag_drop_exercise(
+            topic=topic,
+            materials=materials,
+            target_grade=target_grade
+        )
+
+        return {
+            "type": "drag-drop",
+            "title": exercise.get("title", f"Zuordnung: {topic}"),
+            "question": exercise.get("question", "Ziehe die Elemente in die richtigen Kategorien."),
+            "zones": exercise.get("zones", []),
+            "draggables": exercise.get("draggables", [])
+        }
+
+    async def _generate_fill_blanks(
+        self,
+        topic: str,
+        description: Optional[str],
+        target_grade: str,
+        materials: List[Dict[str, Any]]
+    ) -> Dict[str, Any]:
+        """Generate Fill in the Blanks content"""
+
+        exercise = await self.claude.generate_fill_blanks_text(
+            topic=topic,
+            materials=materials,
+            target_grade=target_grade
+        )
+
+        return {
+            "type": "fill-blanks",
+            "title": exercise.get("title", f"Lückentext: {topic}"),
+            "text": exercise.get("text", ""),
+            "hints": exercise.get("hints", "")
+        }
+
+    async def _generate_memory(
+        self,
+        topic: str,
+        description: Optional[str],
+        target_grade: str,
+        materials: List[Dict[str, Any]]
+    ) -> Dict[str, Any]:
+        """Generate Memory Game content"""
+
+        pairs = await self.claude.generate_memory_pairs(
+            topic=topic,
+            materials=materials,
+            target_grade=target_grade,
+            num_pairs=8
+        )
+
+        # Add IDs to pairs
+        for i, pair in enumerate(pairs, 1):
+            pair["id"] = i
+
+        return {
+            "type": "memory",
+            "title": f"Memory: {topic}",
+            "description": description or f"Finde die passenden Paare zu {topic}",
+            "pairs": pairs
+        }
@@ -0,0 +1,197 @@
+"""
+Material Analyzer
+Analysiert hochgeladene Lernmaterialien (PDF, Images, DOCX)
+"""
+
+from typing import Dict, Any, Optional
+import io
+from PyPDF2 import PdfReader
+from PIL import Image
+import pytesseract
+from docx import Document
+import mammoth
+
+
+class MaterialAnalyzer:
+    """Analyzer für verschiedene Material-Typen"""
+
+    async def analyze(self, filename: str, content: bytes) -> Dict[str, Any]:
+        """
+        Analyze uploaded material
+
+        Args:
+            filename: Name der Datei
+            content: Datei-Content als bytes
+
+        Returns:
+            Strukturierte Material-Daten
+        """
+        file_ext = filename.lower().split('.')[-1]
+
+        try:
+            if file_ext == 'pdf':
+                return await self._analyze_pdf(filename, content)
+            elif file_ext in ['png', 'jpg', 'jpeg']:
+                return await self._analyze_image(filename, content)
+            elif file_ext == 'docx':
+                return await self._analyze_docx(filename, content)
+            elif file_ext == 'txt':
+                return await self._analyze_text(filename, content)
+            else:
+                return {
+                    "filename": filename,
+                    "type": "unknown",
+                    "content": "",
+                    "error": f"Unsupported file type: {file_ext}"
+                }
+
+        except Exception as e:
+            return {
+                "filename": filename,
+                "type": "error",
+                "content": "",
+                "error": str(e)
+            }
+
+    async def _analyze_pdf(self, filename: str, content: bytes) -> Dict[str, Any]:
+        """Extract text from PDF"""
+        try:
+            pdf_file = io.BytesIO(content)
+            reader = PdfReader(pdf_file)
+
+            text_content = []
+            num_pages = len(reader.pages)
+
+            for page_num, page in enumerate(reader.pages, 1):
+                text = page.extract_text()
+                if text.strip():
+                    text_content.append(f"--- Seite {page_num} ---")
+                    text_content.append(text)
+
+            return {
+                "filename": filename,
+                "type": "pdf",
+                "num_pages": num_pages,
+                "content": "\n".join(text_content),
+                "success": True
+            }
+
+        except Exception as e:
+            return {
+                "filename": filename,
+                "type": "pdf",
+                "content": "",
+                "error": f"PDF extraction failed: {str(e)}"
+            }
+
+    async def _analyze_image(self, filename: str, content: bytes) -> Dict[str, Any]:
+        """
+        Analyze image - OCR for text extraction
+        Note: Requires tesseract installed
+        """
+        try:
+            image = Image.open(io.BytesIO(content))
+
+            # Image metadata
+            width, height = image.size
+            mode = image.mode
+
+            # OCR text extraction (if tesseract available)
+            ocr_text = ""
+            try:
+                ocr_text = pytesseract.image_to_string(image, lang='deu')
+            except Exception as ocr_error:
+                ocr_text = f"[OCR not available: {str(ocr_error)}]"
+
+            return {
+                "filename": filename,
+                "type": "image",
+                "width": width,
+                "height": height,
+                "mode": mode,
+                "content": ocr_text,
+                "note": "Image als Diagramm/Skizze erkannt. OCR Text extrahiert.",
+                "success": True
+            }
+
+        except Exception as e:
+            return {
+                "filename": filename,
+                "type": "image",
+                "content": "",
+                "error": f"Image analysis failed: {str(e)}"
+            }
+
+    async def _analyze_docx(self, filename: str, content: bytes) -> Dict[str, Any]:
+        """Extract text from DOCX"""
+        try:
+            # Methode 1: python-docx
+            try:
+                doc = Document(io.BytesIO(content))
+                paragraphs = []
+                for para in doc.paragraphs:
+                    if para.text.strip():
+                        paragraphs.append(para.text)
+
+                text_content = "\n".join(paragraphs)
+
+            except:
+                # Methode 2: mammoth (bessere Formatierung)
+                result = mammoth.convert_to_text(io.BytesIO(content))
+                text_content = result.value
+
+            return {
+                "filename": filename,
+                "type": "docx",
+                "content": text_content,
+                "success": True
+            }
+
+        except Exception as e:
+            return {
+                "filename": filename,
+                "type": "docx",
+                "content": "",
+                "error": f"DOCX extraction failed: {str(e)}"
+            }
+
+    async def _analyze_text(self, filename: str, content: bytes) -> Dict[str, Any]:
+        """Extract text from plain text file"""
+        try:
+            text = content.decode('utf-8')
+
+            return {
+                "filename": filename,
+                "type": "text",
+                "content": text,
+                "success": True
+            }
+
+        except Exception as e:
+            return {
+                "filename": filename,
+                "type": "text",
+                "content": "",
+                "error": f"Text extraction failed: {str(e)}"
+            }
+
+    def extract_key_concepts(self, materials: list[Dict[str, Any]]) -> list[str]:
+        """
+        Extract key concepts from materials
+        Simple heuristic: Find capitalized words, frequent terms
+
+        In production: Use Claude AI for better concept extraction
+        """
+        all_text = " ".join([m.get("content", "") for m in materials])
+
+        # Simple extraction: Capitalized words (potential concepts)
+        import re
+        words = re.findall(r'\b[A-ZÄÖÜ][a-zäöüß]+\b', all_text)
+
+        # Count frequency
+        from collections import Counter
+        word_counts = Counter(words)
+
+        # Return top 20 concepts
+        concepts = [word for word, count in word_counts.most_common(20)]
+        return concepts
@@ -0,0 +1,243 @@
+"""
+YouTube Service
+Video-Suche und Transkript-Analyse für Interactive Video Content
+"""
+
+import os
+from typing import List, Dict, Any, Optional
+from youtube_transcript_api import YouTubeTranscriptApi
+import re
+
+
+class YouTubeService:
+    """YouTube Integration Service"""
+
+    def __init__(self):
+        # Kein API Key nötig für Transcript API
+        # Für Video-Suche würden wir YouTube Data API brauchen (optional)
+        self.youtube_api_key = os.getenv("YOUTUBE_API_KEY")
+
+    def is_configured(self) -> bool:
+        """Check if YouTube API is configured"""
+        # Transcript API funktioniert ohne Key
+        return True
+
+    async def search_videos(
+        self,
+        query: str,
+        max_results: int = 5
+    ) -> List[Dict[str, Any]]:
+        """
+        Search YouTube videos
+
+        NOTE: Ohne YouTube Data API Key nutzen wir eine Fallback-Methode
+        oder können später die API integrieren.
+        """
+
+        # TODO: YouTube Data API Integration
+        # Für jetzt: Fallback mit bekannten Educational Channels
+
+        # Beispiel: Terra X, SimpleClub, MrWissen2go etc.
+        # In Production würde hier die YouTube Data API search.list verwendet
+
+        return [
+            {
+                "video_id": "EXAMPLE_VIDEO_ID",
+                "title": f"Video zum Thema: {query}",
+                "channel": "Educational Channel",
+                "url": f"https://www.youtube.com/watch?v=EXAMPLE_VIDEO_ID",
+                "has_transcript": False,
+                "note": "Use real YouTube Data API in production"
+            }
+        ]
+
+    async def get_video_transcript(
+        self,
+        video_id: str,
+        languages: List[str] = ["de", "en"]
+    ) -> Optional[Dict[str, Any]]:
+        """
+        Get video transcript
+
+        Args:
+            video_id: YouTube video ID
+            languages: Preferred languages (default: German, English)
+
+        Returns:
+            Transcript data with timestamps
+        """
+        try:
+            # Hole Transkript
+            transcript_list = YouTubeTranscriptApi.list_transcripts(video_id)
+
+            # Versuche bevorzugte Sprachen
+            transcript = None
+            for lang in languages:
+                try:
+                    transcript = transcript_list.find_transcript([lang])
+                    break
+                except:
+                    continue
+
+            # Falls keine bevorzugte Sprache, nimm die erste verfügbare
+            if not transcript:
+                transcript = transcript_list.find_transcript(
+                    transcript_list._manually_created_transcripts.keys()
+                )
+
+            # Hole Transcript-Daten
+            transcript_data = transcript.fetch()
+
+            return {
+                "video_id": video_id,
+                "language": transcript.language_code,
+                "is_generated": transcript.is_generated,
+                "transcript": transcript_data
+            }
+
+        except Exception as e:
+            print(f"Error fetching transcript for {video_id}: {e}")
+            return None
+
+    def extract_key_moments(
+        self,
+        transcript_data: List[Dict[str, Any]],
+        num_moments: int = 5
+    ) -> List[Dict[str, Any]]:
+        """
+        Extract key moments from transcript
+        Einfache Heuristik: Nimm Momente gleichmäßig verteilt
+
+        In einer verbesserten Version: Nutze Claude AI um wichtige Momente zu identifizieren
+        """
+        if not transcript_data:
+            return []
+
+        total_duration = transcript_data[-1]['start'] + transcript_data[-1]['duration']
+        interval = total_duration / (num_moments + 1)
+
+        key_moments = []
+        for i in range(1, num_moments + 1):
+            target_time = interval * i
+
+            # Finde nächsten Transcript-Eintrag
+            closest_entry = min(
+                transcript_data,
+                key=lambda x: abs(x['start'] - target_time)
+            )
+
+            key_moments.append({
+                "time": self._format_timestamp(closest_entry['start']),
+                "seconds": closest_entry['start'],
+                "text": closest_entry['text']
+            })
+
+        return key_moments
+
+    async def generate_video_interactions_with_claude(
+        self,
+        video_id: str,
+        topic: str,
+        transcript_data: List[Dict[str, Any]],
+        claude_service: Any,
+        num_interactions: int = 5
+    ) -> List[Dict[str, Any]]:
+        """
+        Generate interactive elements for video using Claude AI
+
+        Args:
+            video_id: YouTube video ID
+            topic: Video topic
+            transcript_data: Full transcript
+            claude_service: Claude service instance
+            num_interactions: Number of interactions to generate
+
+        Returns:
+            List of interactions with timestamps
+        """
+
+        # Erstelle Transkript-Text
+        full_text = self._create_transcript_text(transcript_data)
+
+        prompt = f"""Analysiere dieses Video-Transkript zum Thema "{topic}" und identifiziere {num_interactions} wichtige Momente für interaktive Elemente.
+
+Transkript:
+{full_text[:8000]}  # Limit für Token-Effizienz
+
+Für jeden Moment, erstelle:
+1. Einen Zeitstempel (in Sekunden)
+2. Einen Interaktionstyp (question, info, oder link)
+3. Einen Titel
+4. Den Inhalt (Frage, Information, oder URL)
+
+Formatiere als JSON-Array:
+[
+  {{
+    "seconds": 45,
+    "type": "question",
+    "title": "Verständnisfrage",
+    "content": "Was ist die Hauptfunktion...?"
+  }},
+  {{
+    "seconds": 120,
+    "type": "info",
+    "title": "Wichtiger Hinweis",
+    "content": "Beachte dass..."
+  }}
+]
+
+Wähle Momente die:
+- Wichtige Konzepte einführen
+- Verständnis testen
+- Zusatzinformationen bieten
+
+Nur JSON zurückgeben."""
+
+        response = await claude_service.generate_content(
+            prompt=prompt,
+            system_prompt="Du bist ein Experte für interaktive Video-Didaktik."
+        )
+
+        # Parse JSON
+        import json
+        import re
+        json_match = re.search(r'\[.*\]', response, re.DOTALL)
+        if json_match:
+            interactions = json.loads(json_match.group())
+
+            # Konvertiere Sekunden zu mm:ss Format
+            for interaction in interactions:
+                interaction['time'] = self._format_timestamp(interaction['seconds'])
+
+            return interactions
+
+        return []
+
+    def _create_transcript_text(self, transcript_data: List[Dict[str, Any]]) -> str:
+        """Create readable text from transcript"""
+        lines = []
+        for entry in transcript_data:
+            timestamp = self._format_timestamp(entry['start'])
+            lines.append(f"[{timestamp}] {entry['text']}")
+        return "\n".join(lines)
+
+    def _format_timestamp(self, seconds: float) -> str:
+        """Format seconds to mm:ss"""
+        minutes = int(seconds // 60)
+        secs = int(seconds % 60)
+        return f"{minutes:02d}:{secs:02d}"
+
+    def extract_video_id_from_url(self, url: str) -> Optional[str]:
+        """Extract video ID from YouTube URL"""
+        patterns = [
+            r'(?:youtube\.com\/watch\?v=)([^&]+)',
+            r'(?:youtu\.be\/)([^?]+)',
+            r'(?:youtube\.com\/embed\/)([^?]+)'
+        ]
+
+        for pattern in patterns:
+            match = re.search(pattern, url)
+            if match:
+                return match.group(1)
+
+        return None