fix: Restore all files lost during destructive rebase

A previous `git pull --rebase origin main` dropped 177 local commits, losing 3400+ files across admin-v2, backend, studio-v2, website, klausur-service, and many other services. The partial restore attempt (660295e2) only recovered some files. This commit restores all missing files from pre-rebase ref 98933f5e while preserving post-rebase additions (night-scheduler, night-mode UI, NightModeWidget dashboard integration). Restored features include: - AI Module Sidebar (FAB), OCR Labeling, OCR Compare - GPU Dashboard, RAG Pipeline, Magic Help - Klausur-Korrektur (8 files), Abitur-Archiv (5+ files) - Companion, Zeugnisse-Crawler, Screen Flow - Full backend, studio-v2, website, klausur-service - All compliance SDKs, agent-core, voice-service - CI/CD configs, documentation, scripts Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-09 09:51:32 +01:00
parent f7487ee240
commit 21a844cb8a
1986 changed files with 744143 additions and 1731 deletions
--- a/ai-content-generator/app/services/youtube_service.py
+++ b/ai-content-generator/app/services/youtube_service.py
@@ -0,0 +1,243 @@
+"""
+YouTube Service
+Video-Suche und Transkript-Analyse für Interactive Video Content
+"""
+
+import os
+from typing import List, Dict, Any, Optional
+from youtube_transcript_api import YouTubeTranscriptApi
+import re
+
+
+class YouTubeService:
+    """YouTube Integration Service"""
+
+    def __init__(self):
+        # Kein API Key nötig für Transcript API
+        # Für Video-Suche würden wir YouTube Data API brauchen (optional)
+        self.youtube_api_key = os.getenv("YOUTUBE_API_KEY")
+
+    def is_configured(self) -> bool:
+        """Check if YouTube API is configured"""
+        # Transcript API funktioniert ohne Key
+        return True
+
+    async def search_videos(
+        self,
+        query: str,
+        max_results: int = 5
+    ) -> List[Dict[str, Any]]:
+        """
+        Search YouTube videos
+
+        NOTE: Ohne YouTube Data API Key nutzen wir eine Fallback-Methode
+        oder können später die API integrieren.
+        """
+
+        # TODO: YouTube Data API Integration
+        # Für jetzt: Fallback mit bekannten Educational Channels
+
+        # Beispiel: Terra X, SimpleClub, MrWissen2go etc.
+        # In Production würde hier die YouTube Data API search.list verwendet
+
+        return [
+            {
+                "video_id": "EXAMPLE_VIDEO_ID",
+                "title": f"Video zum Thema: {query}",
+                "channel": "Educational Channel",
+                "url": f"https://www.youtube.com/watch?v=EXAMPLE_VIDEO_ID",
+                "has_transcript": False,
+                "note": "Use real YouTube Data API in production"
+            }
+        ]
+
+    async def get_video_transcript(
+        self,
+        video_id: str,
+        languages: List[str] = ["de", "en"]
+    ) -> Optional[Dict[str, Any]]:
+        """
+        Get video transcript
+
+        Args:
+            video_id: YouTube video ID
+            languages: Preferred languages (default: German, English)
+
+        Returns:
+            Transcript data with timestamps
+        """
+        try:
+            # Hole Transkript
+            transcript_list = YouTubeTranscriptApi.list_transcripts(video_id)
+
+            # Versuche bevorzugte Sprachen
+            transcript = None
+            for lang in languages:
+                try:
+                    transcript = transcript_list.find_transcript([lang])
+                    break
+                except:
+                    continue
+
+            # Falls keine bevorzugte Sprache, nimm die erste verfügbare
+            if not transcript:
+                transcript = transcript_list.find_transcript(
+                    transcript_list._manually_created_transcripts.keys()
+                )
+
+            # Hole Transcript-Daten
+            transcript_data = transcript.fetch()
+
+            return {
+                "video_id": video_id,
+                "language": transcript.language_code,
+                "is_generated": transcript.is_generated,
+                "transcript": transcript_data
+            }
+
+        except Exception as e:
+            print(f"Error fetching transcript for {video_id}: {e}")
+            return None
+
+    def extract_key_moments(
+        self,
+        transcript_data: List[Dict[str, Any]],
+        num_moments: int = 5
+    ) -> List[Dict[str, Any]]:
+        """
+        Extract key moments from transcript
+        Einfache Heuristik: Nimm Momente gleichmäßig verteilt
+
+        In einer verbesserten Version: Nutze Claude AI um wichtige Momente zu identifizieren
+        """
+        if not transcript_data:
+            return []
+
+        total_duration = transcript_data[-1]['start'] + transcript_data[-1]['duration']
+        interval = total_duration / (num_moments + 1)
+
+        key_moments = []
+        for i in range(1, num_moments + 1):
+            target_time = interval * i
+
+            # Finde nächsten Transcript-Eintrag
+            closest_entry = min(
+                transcript_data,
+                key=lambda x: abs(x['start'] - target_time)
+            )
+
+            key_moments.append({
+                "time": self._format_timestamp(closest_entry['start']),
+                "seconds": closest_entry['start'],
+                "text": closest_entry['text']
+            })
+
+        return key_moments
+
+    async def generate_video_interactions_with_claude(
+        self,
+        video_id: str,
+        topic: str,
+        transcript_data: List[Dict[str, Any]],
+        claude_service: Any,
+        num_interactions: int = 5
+    ) -> List[Dict[str, Any]]:
+        """
+        Generate interactive elements for video using Claude AI
+
+        Args:
+            video_id: YouTube video ID
+            topic: Video topic
+            transcript_data: Full transcript
+            claude_service: Claude service instance
+            num_interactions: Number of interactions to generate
+
+        Returns:
+            List of interactions with timestamps
+        """
+
+        # Erstelle Transkript-Text
+        full_text = self._create_transcript_text(transcript_data)
+
+        prompt = f"""Analysiere dieses Video-Transkript zum Thema "{topic}" und identifiziere {num_interactions} wichtige Momente für interaktive Elemente.
+
+Transkript:
+{full_text[:8000]}  # Limit für Token-Effizienz
+
+Für jeden Moment, erstelle:
+1. Einen Zeitstempel (in Sekunden)
+2. Einen Interaktionstyp (question, info, oder link)
+3. Einen Titel
+4. Den Inhalt (Frage, Information, oder URL)
+
+Formatiere als JSON-Array:
+[
+  {{
+    "seconds": 45,
+    "type": "question",
+    "title": "Verständnisfrage",
+    "content": "Was ist die Hauptfunktion...?"
+  }},
+  {{
+    "seconds": 120,
+    "type": "info",
+    "title": "Wichtiger Hinweis",
+    "content": "Beachte dass..."
+  }}
+]
+
+Wähle Momente die:
+- Wichtige Konzepte einführen
+- Verständnis testen
+- Zusatzinformationen bieten
+
+Nur JSON zurückgeben."""
+
+        response = await claude_service.generate_content(
+            prompt=prompt,
+            system_prompt="Du bist ein Experte für interaktive Video-Didaktik."
+        )
+
+        # Parse JSON
+        import json
+        import re
+        json_match = re.search(r'\[.*\]', response, re.DOTALL)
+        if json_match:
+            interactions = json.loads(json_match.group())
+
+            # Konvertiere Sekunden zu mm:ss Format
+            for interaction in interactions:
+                interaction['time'] = self._format_timestamp(interaction['seconds'])
+
+            return interactions
+
+        return []
+
+    def _create_transcript_text(self, transcript_data: List[Dict[str, Any]]) -> str:
+        """Create readable text from transcript"""
+        lines = []
+        for entry in transcript_data:
+            timestamp = self._format_timestamp(entry['start'])
+            lines.append(f"[{timestamp}] {entry['text']}")
+        return "\n".join(lines)
+
+    def _format_timestamp(self, seconds: float) -> str:
+        """Format seconds to mm:ss"""
+        minutes = int(seconds // 60)
+        secs = int(seconds % 60)
+        return f"{minutes:02d}:{secs:02d}"
+
+    def extract_video_id_from_url(self, url: str) -> Optional[str]:
+        """Extract video ID from YouTube URL"""
+        patterns = [
+            r'(?:youtube\.com\/watch\?v=)([^&]+)',
+            r'(?:youtu\.be\/)([^?]+)',
+            r'(?:youtube\.com\/embed\/)([^?]+)'
+        ]
+
+        for pattern in patterns:
+            match = re.search(pattern, url)
+            if match:
+                return match.group(1)
+
+        return None