A previous `git pull --rebase origin main` dropped 177 local commits,
losing 3400+ files across admin-v2, backend, studio-v2, website,
klausur-service, and many other services. The partial restore attempt
(660295e2) only recovered some files.
This commit restores all missing files from pre-rebase ref 98933f5e
while preserving post-rebase additions (night-scheduler, night-mode UI,
NightModeWidget dashboard integration).
Restored features include:
- AI Module Sidebar (FAB), OCR Labeling, OCR Compare
- GPU Dashboard, RAG Pipeline, Magic Help
- Klausur-Korrektur (8 files), Abitur-Archiv (5+ files)
- Companion, Zeugnisse-Crawler, Screen Flow
- Full backend, studio-v2, website, klausur-service
- All compliance SDKs, agent-core, voice-service
- CI/CD configs, documentation, scripts
Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
244 lines
7.2 KiB
Python
244 lines
7.2 KiB
Python
"""
|
|
YouTube Service
|
|
Video-Suche und Transkript-Analyse für Interactive Video Content
|
|
"""
|
|
|
|
import os
|
|
from typing import List, Dict, Any, Optional
|
|
from youtube_transcript_api import YouTubeTranscriptApi
|
|
import re
|
|
|
|
|
|
class YouTubeService:
|
|
"""YouTube Integration Service"""
|
|
|
|
def __init__(self):
|
|
# Kein API Key nötig für Transcript API
|
|
# Für Video-Suche würden wir YouTube Data API brauchen (optional)
|
|
self.youtube_api_key = os.getenv("YOUTUBE_API_KEY")
|
|
|
|
def is_configured(self) -> bool:
|
|
"""Check if YouTube API is configured"""
|
|
# Transcript API funktioniert ohne Key
|
|
return True
|
|
|
|
async def search_videos(
|
|
self,
|
|
query: str,
|
|
max_results: int = 5
|
|
) -> List[Dict[str, Any]]:
|
|
"""
|
|
Search YouTube videos
|
|
|
|
NOTE: Ohne YouTube Data API Key nutzen wir eine Fallback-Methode
|
|
oder können später die API integrieren.
|
|
"""
|
|
|
|
# TODO: YouTube Data API Integration
|
|
# Für jetzt: Fallback mit bekannten Educational Channels
|
|
|
|
# Beispiel: Terra X, SimpleClub, MrWissen2go etc.
|
|
# In Production würde hier die YouTube Data API search.list verwendet
|
|
|
|
return [
|
|
{
|
|
"video_id": "EXAMPLE_VIDEO_ID",
|
|
"title": f"Video zum Thema: {query}",
|
|
"channel": "Educational Channel",
|
|
"url": f"https://www.youtube.com/watch?v=EXAMPLE_VIDEO_ID",
|
|
"has_transcript": False,
|
|
"note": "Use real YouTube Data API in production"
|
|
}
|
|
]
|
|
|
|
async def get_video_transcript(
|
|
self,
|
|
video_id: str,
|
|
languages: List[str] = ["de", "en"]
|
|
) -> Optional[Dict[str, Any]]:
|
|
"""
|
|
Get video transcript
|
|
|
|
Args:
|
|
video_id: YouTube video ID
|
|
languages: Preferred languages (default: German, English)
|
|
|
|
Returns:
|
|
Transcript data with timestamps
|
|
"""
|
|
try:
|
|
# Hole Transkript
|
|
transcript_list = YouTubeTranscriptApi.list_transcripts(video_id)
|
|
|
|
# Versuche bevorzugte Sprachen
|
|
transcript = None
|
|
for lang in languages:
|
|
try:
|
|
transcript = transcript_list.find_transcript([lang])
|
|
break
|
|
except:
|
|
continue
|
|
|
|
# Falls keine bevorzugte Sprache, nimm die erste verfügbare
|
|
if not transcript:
|
|
transcript = transcript_list.find_transcript(
|
|
transcript_list._manually_created_transcripts.keys()
|
|
)
|
|
|
|
# Hole Transcript-Daten
|
|
transcript_data = transcript.fetch()
|
|
|
|
return {
|
|
"video_id": video_id,
|
|
"language": transcript.language_code,
|
|
"is_generated": transcript.is_generated,
|
|
"transcript": transcript_data
|
|
}
|
|
|
|
except Exception as e:
|
|
print(f"Error fetching transcript for {video_id}: {e}")
|
|
return None
|
|
|
|
def extract_key_moments(
|
|
self,
|
|
transcript_data: List[Dict[str, Any]],
|
|
num_moments: int = 5
|
|
) -> List[Dict[str, Any]]:
|
|
"""
|
|
Extract key moments from transcript
|
|
Einfache Heuristik: Nimm Momente gleichmäßig verteilt
|
|
|
|
In einer verbesserten Version: Nutze Claude AI um wichtige Momente zu identifizieren
|
|
"""
|
|
if not transcript_data:
|
|
return []
|
|
|
|
total_duration = transcript_data[-1]['start'] + transcript_data[-1]['duration']
|
|
interval = total_duration / (num_moments + 1)
|
|
|
|
key_moments = []
|
|
for i in range(1, num_moments + 1):
|
|
target_time = interval * i
|
|
|
|
# Finde nächsten Transcript-Eintrag
|
|
closest_entry = min(
|
|
transcript_data,
|
|
key=lambda x: abs(x['start'] - target_time)
|
|
)
|
|
|
|
key_moments.append({
|
|
"time": self._format_timestamp(closest_entry['start']),
|
|
"seconds": closest_entry['start'],
|
|
"text": closest_entry['text']
|
|
})
|
|
|
|
return key_moments
|
|
|
|
async def generate_video_interactions_with_claude(
|
|
self,
|
|
video_id: str,
|
|
topic: str,
|
|
transcript_data: List[Dict[str, Any]],
|
|
claude_service: Any,
|
|
num_interactions: int = 5
|
|
) -> List[Dict[str, Any]]:
|
|
"""
|
|
Generate interactive elements for video using Claude AI
|
|
|
|
Args:
|
|
video_id: YouTube video ID
|
|
topic: Video topic
|
|
transcript_data: Full transcript
|
|
claude_service: Claude service instance
|
|
num_interactions: Number of interactions to generate
|
|
|
|
Returns:
|
|
List of interactions with timestamps
|
|
"""
|
|
|
|
# Erstelle Transkript-Text
|
|
full_text = self._create_transcript_text(transcript_data)
|
|
|
|
prompt = f"""Analysiere dieses Video-Transkript zum Thema "{topic}" und identifiziere {num_interactions} wichtige Momente für interaktive Elemente.
|
|
|
|
Transkript:
|
|
{full_text[:8000]} # Limit für Token-Effizienz
|
|
|
|
Für jeden Moment, erstelle:
|
|
1. Einen Zeitstempel (in Sekunden)
|
|
2. Einen Interaktionstyp (question, info, oder link)
|
|
3. Einen Titel
|
|
4. Den Inhalt (Frage, Information, oder URL)
|
|
|
|
Formatiere als JSON-Array:
|
|
[
|
|
{{
|
|
"seconds": 45,
|
|
"type": "question",
|
|
"title": "Verständnisfrage",
|
|
"content": "Was ist die Hauptfunktion...?"
|
|
}},
|
|
{{
|
|
"seconds": 120,
|
|
"type": "info",
|
|
"title": "Wichtiger Hinweis",
|
|
"content": "Beachte dass..."
|
|
}}
|
|
]
|
|
|
|
Wähle Momente die:
|
|
- Wichtige Konzepte einführen
|
|
- Verständnis testen
|
|
- Zusatzinformationen bieten
|
|
|
|
Nur JSON zurückgeben."""
|
|
|
|
response = await claude_service.generate_content(
|
|
prompt=prompt,
|
|
system_prompt="Du bist ein Experte für interaktive Video-Didaktik."
|
|
)
|
|
|
|
# Parse JSON
|
|
import json
|
|
import re
|
|
json_match = re.search(r'\[.*\]', response, re.DOTALL)
|
|
if json_match:
|
|
interactions = json.loads(json_match.group())
|
|
|
|
# Konvertiere Sekunden zu mm:ss Format
|
|
for interaction in interactions:
|
|
interaction['time'] = self._format_timestamp(interaction['seconds'])
|
|
|
|
return interactions
|
|
|
|
return []
|
|
|
|
def _create_transcript_text(self, transcript_data: List[Dict[str, Any]]) -> str:
|
|
"""Create readable text from transcript"""
|
|
lines = []
|
|
for entry in transcript_data:
|
|
timestamp = self._format_timestamp(entry['start'])
|
|
lines.append(f"[{timestamp}] {entry['text']}")
|
|
return "\n".join(lines)
|
|
|
|
def _format_timestamp(self, seconds: float) -> str:
|
|
"""Format seconds to mm:ss"""
|
|
minutes = int(seconds // 60)
|
|
secs = int(seconds % 60)
|
|
return f"{minutes:02d}:{secs:02d}"
|
|
|
|
def extract_video_id_from_url(self, url: str) -> Optional[str]:
|
|
"""Extract video ID from YouTube URL"""
|
|
patterns = [
|
|
r'(?:youtube\.com\/watch\?v=)([^&]+)',
|
|
r'(?:youtu\.be\/)([^?]+)',
|
|
r'(?:youtube\.com\/embed\/)([^?]+)'
|
|
]
|
|
|
|
for pattern in patterns:
|
|
match = re.search(pattern, url)
|
|
if match:
|
|
return match.group(1)
|
|
|
|
return None
|