fix: Restore all files lost during destructive rebase
A previous `git pull --rebase origin main` dropped 177 local commits,
losing 3400+ files across admin-v2, backend, studio-v2, website,
klausur-service, and many other services. The partial restore attempt
(660295e2) only recovered some files.
This commit restores all missing files from pre-rebase ref 98933f5e
while preserving post-rebase additions (night-scheduler, night-mode UI,
NightModeWidget dashboard integration).
Restored features include:
- AI Module Sidebar (FAB), OCR Labeling, OCR Compare
- GPU Dashboard, RAG Pipeline, Magic Help
- Klausur-Korrektur (8 files), Abitur-Archiv (5+ files)
- Companion, Zeugnisse-Crawler, Screen Flow
- Full backend, studio-v2, website, klausur-service
- All compliance SDKs, agent-core, voice-service
- CI/CD configs, documentation, scripts
Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
243
ai-content-generator/app/services/youtube_service.py
Normal file
243
ai-content-generator/app/services/youtube_service.py
Normal file
@@ -0,0 +1,243 @@
|
||||
"""
|
||||
YouTube Service
|
||||
Video-Suche und Transkript-Analyse für Interactive Video Content
|
||||
"""
|
||||
|
||||
import os
|
||||
from typing import List, Dict, Any, Optional
|
||||
from youtube_transcript_api import YouTubeTranscriptApi
|
||||
import re
|
||||
|
||||
|
||||
class YouTubeService:
|
||||
"""YouTube Integration Service"""
|
||||
|
||||
def __init__(self):
|
||||
# Kein API Key nötig für Transcript API
|
||||
# Für Video-Suche würden wir YouTube Data API brauchen (optional)
|
||||
self.youtube_api_key = os.getenv("YOUTUBE_API_KEY")
|
||||
|
||||
def is_configured(self) -> bool:
|
||||
"""Check if YouTube API is configured"""
|
||||
# Transcript API funktioniert ohne Key
|
||||
return True
|
||||
|
||||
async def search_videos(
|
||||
self,
|
||||
query: str,
|
||||
max_results: int = 5
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
Search YouTube videos
|
||||
|
||||
NOTE: Ohne YouTube Data API Key nutzen wir eine Fallback-Methode
|
||||
oder können später die API integrieren.
|
||||
"""
|
||||
|
||||
# TODO: YouTube Data API Integration
|
||||
# Für jetzt: Fallback mit bekannten Educational Channels
|
||||
|
||||
# Beispiel: Terra X, SimpleClub, MrWissen2go etc.
|
||||
# In Production würde hier die YouTube Data API search.list verwendet
|
||||
|
||||
return [
|
||||
{
|
||||
"video_id": "EXAMPLE_VIDEO_ID",
|
||||
"title": f"Video zum Thema: {query}",
|
||||
"channel": "Educational Channel",
|
||||
"url": f"https://www.youtube.com/watch?v=EXAMPLE_VIDEO_ID",
|
||||
"has_transcript": False,
|
||||
"note": "Use real YouTube Data API in production"
|
||||
}
|
||||
]
|
||||
|
||||
async def get_video_transcript(
|
||||
self,
|
||||
video_id: str,
|
||||
languages: List[str] = ["de", "en"]
|
||||
) -> Optional[Dict[str, Any]]:
|
||||
"""
|
||||
Get video transcript
|
||||
|
||||
Args:
|
||||
video_id: YouTube video ID
|
||||
languages: Preferred languages (default: German, English)
|
||||
|
||||
Returns:
|
||||
Transcript data with timestamps
|
||||
"""
|
||||
try:
|
||||
# Hole Transkript
|
||||
transcript_list = YouTubeTranscriptApi.list_transcripts(video_id)
|
||||
|
||||
# Versuche bevorzugte Sprachen
|
||||
transcript = None
|
||||
for lang in languages:
|
||||
try:
|
||||
transcript = transcript_list.find_transcript([lang])
|
||||
break
|
||||
except:
|
||||
continue
|
||||
|
||||
# Falls keine bevorzugte Sprache, nimm die erste verfügbare
|
||||
if not transcript:
|
||||
transcript = transcript_list.find_transcript(
|
||||
transcript_list._manually_created_transcripts.keys()
|
||||
)
|
||||
|
||||
# Hole Transcript-Daten
|
||||
transcript_data = transcript.fetch()
|
||||
|
||||
return {
|
||||
"video_id": video_id,
|
||||
"language": transcript.language_code,
|
||||
"is_generated": transcript.is_generated,
|
||||
"transcript": transcript_data
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
print(f"Error fetching transcript for {video_id}: {e}")
|
||||
return None
|
||||
|
||||
def extract_key_moments(
|
||||
self,
|
||||
transcript_data: List[Dict[str, Any]],
|
||||
num_moments: int = 5
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
Extract key moments from transcript
|
||||
Einfache Heuristik: Nimm Momente gleichmäßig verteilt
|
||||
|
||||
In einer verbesserten Version: Nutze Claude AI um wichtige Momente zu identifizieren
|
||||
"""
|
||||
if not transcript_data:
|
||||
return []
|
||||
|
||||
total_duration = transcript_data[-1]['start'] + transcript_data[-1]['duration']
|
||||
interval = total_duration / (num_moments + 1)
|
||||
|
||||
key_moments = []
|
||||
for i in range(1, num_moments + 1):
|
||||
target_time = interval * i
|
||||
|
||||
# Finde nächsten Transcript-Eintrag
|
||||
closest_entry = min(
|
||||
transcript_data,
|
||||
key=lambda x: abs(x['start'] - target_time)
|
||||
)
|
||||
|
||||
key_moments.append({
|
||||
"time": self._format_timestamp(closest_entry['start']),
|
||||
"seconds": closest_entry['start'],
|
||||
"text": closest_entry['text']
|
||||
})
|
||||
|
||||
return key_moments
|
||||
|
||||
async def generate_video_interactions_with_claude(
|
||||
self,
|
||||
video_id: str,
|
||||
topic: str,
|
||||
transcript_data: List[Dict[str, Any]],
|
||||
claude_service: Any,
|
||||
num_interactions: int = 5
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
Generate interactive elements for video using Claude AI
|
||||
|
||||
Args:
|
||||
video_id: YouTube video ID
|
||||
topic: Video topic
|
||||
transcript_data: Full transcript
|
||||
claude_service: Claude service instance
|
||||
num_interactions: Number of interactions to generate
|
||||
|
||||
Returns:
|
||||
List of interactions with timestamps
|
||||
"""
|
||||
|
||||
# Erstelle Transkript-Text
|
||||
full_text = self._create_transcript_text(transcript_data)
|
||||
|
||||
prompt = f"""Analysiere dieses Video-Transkript zum Thema "{topic}" und identifiziere {num_interactions} wichtige Momente für interaktive Elemente.
|
||||
|
||||
Transkript:
|
||||
{full_text[:8000]} # Limit für Token-Effizienz
|
||||
|
||||
Für jeden Moment, erstelle:
|
||||
1. Einen Zeitstempel (in Sekunden)
|
||||
2. Einen Interaktionstyp (question, info, oder link)
|
||||
3. Einen Titel
|
||||
4. Den Inhalt (Frage, Information, oder URL)
|
||||
|
||||
Formatiere als JSON-Array:
|
||||
[
|
||||
{{
|
||||
"seconds": 45,
|
||||
"type": "question",
|
||||
"title": "Verständnisfrage",
|
||||
"content": "Was ist die Hauptfunktion...?"
|
||||
}},
|
||||
{{
|
||||
"seconds": 120,
|
||||
"type": "info",
|
||||
"title": "Wichtiger Hinweis",
|
||||
"content": "Beachte dass..."
|
||||
}}
|
||||
]
|
||||
|
||||
Wähle Momente die:
|
||||
- Wichtige Konzepte einführen
|
||||
- Verständnis testen
|
||||
- Zusatzinformationen bieten
|
||||
|
||||
Nur JSON zurückgeben."""
|
||||
|
||||
response = await claude_service.generate_content(
|
||||
prompt=prompt,
|
||||
system_prompt="Du bist ein Experte für interaktive Video-Didaktik."
|
||||
)
|
||||
|
||||
# Parse JSON
|
||||
import json
|
||||
import re
|
||||
json_match = re.search(r'\[.*\]', response, re.DOTALL)
|
||||
if json_match:
|
||||
interactions = json.loads(json_match.group())
|
||||
|
||||
# Konvertiere Sekunden zu mm:ss Format
|
||||
for interaction in interactions:
|
||||
interaction['time'] = self._format_timestamp(interaction['seconds'])
|
||||
|
||||
return interactions
|
||||
|
||||
return []
|
||||
|
||||
def _create_transcript_text(self, transcript_data: List[Dict[str, Any]]) -> str:
|
||||
"""Create readable text from transcript"""
|
||||
lines = []
|
||||
for entry in transcript_data:
|
||||
timestamp = self._format_timestamp(entry['start'])
|
||||
lines.append(f"[{timestamp}] {entry['text']}")
|
||||
return "\n".join(lines)
|
||||
|
||||
def _format_timestamp(self, seconds: float) -> str:
|
||||
"""Format seconds to mm:ss"""
|
||||
minutes = int(seconds // 60)
|
||||
secs = int(seconds % 60)
|
||||
return f"{minutes:02d}:{secs:02d}"
|
||||
|
||||
def extract_video_id_from_url(self, url: str) -> Optional[str]:
|
||||
"""Extract video ID from YouTube URL"""
|
||||
patterns = [
|
||||
r'(?:youtube\.com\/watch\?v=)([^&]+)',
|
||||
r'(?:youtu\.be\/)([^?]+)',
|
||||
r'(?:youtube\.com\/embed\/)([^?]+)'
|
||||
]
|
||||
|
||||
for pattern in patterns:
|
||||
match = re.search(pattern, url)
|
||||
if match:
|
||||
return match.group(1)
|
||||
|
||||
return None
|
||||
Reference in New Issue
Block a user