fix: Restore all files lost during destructive rebase

A previous `git pull --rebase origin main` dropped 177 local commits,
losing 3400+ files across admin-v2, backend, studio-v2, website,
klausur-service, and many other services. The partial restore attempt
(660295e2) only recovered some files.

This commit restores all missing files from pre-rebase ref 98933f5e
while preserving post-rebase additions (night-scheduler, night-mode UI,
NightModeWidget dashboard integration).

Restored features include:
- AI Module Sidebar (FAB), OCR Labeling, OCR Compare
- GPU Dashboard, RAG Pipeline, Magic Help
- Klausur-Korrektur (8 files), Abitur-Archiv (5+ files)
- Companion, Zeugnisse-Crawler, Screen Flow
- Full backend, studio-v2, website, klausur-service
- All compliance SDKs, agent-core, voice-service
- CI/CD configs, documentation, scripts

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
Benjamin Admin
2026-02-09 09:51:32 +01:00
parent f7487ee240
commit 21a844cb8a
1986 changed files with 744143 additions and 1731 deletions

View File

@@ -0,0 +1,16 @@
"""
Services Package
AI Content Generation Services
"""
from .claude_service import ClaudeService
from .youtube_service import YouTubeService
from .material_analyzer import MaterialAnalyzer
from .content_generator import ContentGenerator
__all__ = [
"ClaudeService",
"YouTubeService",
"MaterialAnalyzer",
"ContentGenerator"
]

View File

@@ -0,0 +1,364 @@
"""
Claude Service
Integration mit Claude API für Content-Generierung
"""
import os
from typing import List, Dict, Any, Optional
from anthropic import Anthropic
class ClaudeService:
"""Claude API Service"""
def __init__(self):
self.api_key = os.getenv("ANTHROPIC_API_KEY")
self.client = Anthropic(api_key=self.api_key) if self.api_key else None
self.model = "claude-sonnet-4-5-20251101" # Latest model
def is_configured(self) -> bool:
"""Check if API key is configured"""
return self.client is not None
async def generate_content(
self,
prompt: str,
system_prompt: Optional[str] = None,
max_tokens: int = 4000,
temperature: float = 1.0
) -> str:
"""
Generate content with Claude
Args:
prompt: User prompt
system_prompt: System prompt (optional)
max_tokens: Maximum tokens to generate
temperature: Sampling temperature
Returns:
Generated text
"""
if not self.client:
raise ValueError("Claude API not configured. Set ANTHROPIC_API_KEY environment variable.")
messages = [{"role": "user", "content": prompt}]
kwargs = {
"model": self.model,
"max_tokens": max_tokens,
"temperature": temperature,
"messages": messages
}
if system_prompt:
kwargs["system"] = system_prompt
response = self.client.messages.create(**kwargs)
return response.content[0].text
async def generate_quiz_questions(
self,
topic: str,
materials: List[Dict[str, Any]],
target_grade: str,
num_questions: int = 10
) -> List[Dict[str, Any]]:
"""Generate Quiz questions"""
material_text = self._format_materials(materials)
prompt = f"""Erstelle {num_questions} Multiple-Choice-Fragen zum Thema "{topic}" für Klassenstufe {target_grade}.
Materialien:
{material_text}
Erstelle Fragen die:
1. Das Verständnis testen
2. Auf den Materialien basieren
3. Altersgerecht sind
4. 4 Antwortmöglichkeiten haben (1 richtig, 3 falsch)
Formatiere die Ausgabe als JSON-Array:
[
{{
"question": "Frage text?",
"options": ["Option A", "Option B", "Option C", "Option D"],
"correct_answer": 0,
"explanation": "Erklärung warum die Antwort richtig ist"
}}
]
Nur das JSON-Array zurückgeben, keine zusätzlichen Texte."""
response = await self.generate_content(
prompt=prompt,
system_prompt="Du bist ein pädagogischer Experte der Quizfragen erstellt."
)
# Parse JSON
import json
try:
questions = json.loads(response)
return questions
except json.JSONDecodeError:
# Try to extract JSON from response
import re
json_match = re.search(r'\[.*\]', response, re.DOTALL)
if json_match:
questions = json.loads(json_match.group())
return questions
raise ValueError("Could not parse quiz questions from Claude response")
async def generate_flashcards(
self,
topic: str,
materials: List[Dict[str, Any]],
target_grade: str,
num_cards: int = 15
) -> List[Dict[str, str]]:
"""Generate Flashcards"""
material_text = self._format_materials(materials)
prompt = f"""Erstelle {num_cards} Lernkarten (Flashcards) zum Thema "{topic}" für Klassenstufe {target_grade}.
Materialien:
{material_text}
Erstelle Karten die:
1. Wichtige Begriffe und Konzepte abdecken
2. Kurz und prägnant sind
3. Zum Wiederholen geeignet sind
Formatiere die Ausgabe als JSON-Array:
[
{{
"front": "Begriff oder Frage",
"back": "Definition oder Antwort"
}}
]
Nur das JSON-Array zurückgeben."""
response = await self.generate_content(
prompt=prompt,
system_prompt="Du bist ein Experte für Lernkarten-Design."
)
import json
import re
json_match = re.search(r'\[.*\]', response, re.DOTALL)
if json_match:
return json.loads(json_match.group())
return json.loads(response)
async def generate_fill_blanks_text(
self,
topic: str,
materials: List[Dict[str, Any]],
target_grade: str
) -> Dict[str, Any]:
"""Generate Fill-in-the-Blanks exercise"""
material_text = self._format_materials(materials)
prompt = f"""Erstelle einen Lückentext zum Thema "{topic}" für Klassenstufe {target_grade}.
Materialien:
{material_text}
Erstelle einen Text mit 10-15 Lücken. Markiere Lücken mit *Wort*.
Formatiere als JSON:
{{
"title": "Titel des Lückentexts",
"text": "Der Text mit *Lücken* markiert...",
"hints": "Hilfreiche Hinweise"
}}
Nur JSON zurückgeben."""
response = await self.generate_content(
prompt=prompt,
system_prompt="Du bist ein Experte für Lückentexte."
)
import json
import re
json_match = re.search(r'\{.*\}', response, re.DOTALL)
if json_match:
return json.loads(json_match.group())
return json.loads(response)
async def generate_drag_drop_exercise(
self,
topic: str,
materials: List[Dict[str, Any]],
target_grade: str
) -> Dict[str, Any]:
"""Generate Drag-and-Drop exercise"""
material_text = self._format_materials(materials)
prompt = f"""Erstelle eine Drag-and-Drop Zuordnungsaufgabe zum Thema "{topic}" für Klassenstufe {target_grade}.
Materialien:
{material_text}
Erstelle 3-4 Kategorien (Zonen) und 8-12 Elemente zum Zuordnen.
Formatiere als JSON:
{{
"title": "Titel der Aufgabe",
"question": "Aufgabenstellung",
"zones": [
{{ "id": 1, "name": "Kategorie 1" }},
{{ "id": 2, "name": "Kategorie 2" }}
],
"draggables": [
{{ "id": 1, "text": "Element 1", "correctZoneId": 1 }},
{{ "id": 2, "text": "Element 2", "correctZoneId": 2 }}
]
}}
Nur JSON zurückgeben."""
response = await self.generate_content(
prompt=prompt,
system_prompt="Du bist ein Experte für interaktive Lernaufgaben."
)
import json
import re
json_match = re.search(r'\{.*\}', response, re.DOTALL)
if json_match:
return json.loads(json_match.group())
return json.loads(response)
async def generate_memory_pairs(
self,
topic: str,
materials: List[Dict[str, Any]],
target_grade: str,
num_pairs: int = 8
) -> List[Dict[str, str]]:
"""Generate Memory Game pairs"""
material_text = self._format_materials(materials)
prompt = f"""Erstelle {num_pairs} Memory-Paare zum Thema "{topic}" für Klassenstufe {target_grade}.
Materialien:
{material_text}
Jedes Paar besteht aus zwei zusammengehörigen Begriffen/Konzepten.
Formatiere als JSON-Array:
[
{{ "card1": "Begriff 1", "card2": "Zugehöriger Begriff" }}
]
Nur JSON zurückgeben."""
response = await self.generate_content(
prompt=prompt,
system_prompt="Du bist ein Experte für Memory-Spiele."
)
import json
import re
json_match = re.search(r'\[.*\]', response, re.DOTALL)
if json_match:
return json.loads(json_match.group())
return json.loads(response)
async def generate_timeline_events(
self,
topic: str,
materials: List[Dict[str, Any]],
target_grade: str
) -> List[Dict[str, Any]]:
"""Generate Timeline events"""
material_text = self._format_materials(materials)
prompt = f"""Erstelle eine Timeline mit 5-8 Ereignissen zum Thema "{topic}" für Klassenstufe {target_grade}.
Materialien:
{material_text}
Formatiere als JSON-Array:
[
{{
"year": "Jahr oder Zeitpunkt",
"title": "Ereignis Titel",
"description": "Kurze Beschreibung"
}}
]
Nur JSON zurückgeben."""
response = await self.generate_content(
prompt=prompt,
system_prompt="Du bist ein Experte für chronologische Darstellungen."
)
import json
import re
json_match = re.search(r'\[.*\]', response, re.DOTALL)
if json_match:
return json.loads(json_match.group())
return json.loads(response)
async def generate_presentation_slides(
self,
topic: str,
materials: List[Dict[str, Any]],
target_grade: str,
num_slides: int = 5
) -> List[Dict[str, str]]:
"""Generate Presentation slides"""
material_text = self._format_materials(materials)
prompt = f"""Erstelle {num_slides} Präsentationsfolien zum Thema "{topic}" für Klassenstufe {target_grade}.
Materialien:
{material_text}
Formatiere als JSON-Array:
[
{{
"title": "Folien Titel",
"content": "Folien Inhalt (2-4 Sätze)",
"backgroundColor": "#ffffff"
}}
]
Nur JSON zurückgeben."""
response = await self.generate_content(
prompt=prompt,
system_prompt="Du bist ein Experte für Präsentationen."
)
import json
import re
json_match = re.search(r'\[.*\]', response, re.DOTALL)
if json_match:
return json.loads(json_match.group())
return json.loads(response)
def _format_materials(self, materials: List[Dict[str, Any]]) -> str:
"""Format materials for prompt"""
if not materials:
return "Keine Materialien vorhanden."
formatted = []
for i, material in enumerate(materials, 1):
formatted.append(f"Material {i} ({material.get('type', 'unknown')}):")
formatted.append(material.get('content', '')[:2000]) # Limit content
formatted.append("")
return "\n".join(formatted)

View File

@@ -0,0 +1,341 @@
"""
Content Generator
Orchestriert die Generierung aller 8 H5P Content-Typen
"""
from typing import List, Dict, Any, Optional
from datetime import datetime
import json
class ContentGenerator:
"""H5P Content Generator - Orchestrator"""
def __init__(self, claude_service, youtube_service):
self.claude = claude_service
self.youtube = youtube_service
async def generate_all_content_types(
self,
topic: str,
description: Optional[str],
target_grade: str,
materials: List[Dict[str, Any]],
videos: List[Dict[str, Any]]
) -> Dict[str, Any]:
"""
Generate all 8 H5P content types
Returns:
Dictionary with all generated content
"""
result = {
"topic": topic,
"description": description,
"target_grade": target_grade,
"generated_at": datetime.utcnow().isoformat(),
"content_types": {}
}
# 1. Quiz
try:
quiz_data = await self._generate_quiz(topic, description, target_grade, materials)
result["content_types"]["quiz"] = quiz_data
except Exception as e:
result["content_types"]["quiz"] = {"error": str(e)}
# 2. Interactive Video
try:
video_data = await self._generate_interactive_video(topic, description, target_grade, materials, videos)
result["content_types"]["interactive_video"] = video_data
except Exception as e:
result["content_types"]["interactive_video"] = {"error": str(e)}
# 3. Course Presentation
try:
presentation_data = await self._generate_presentation(topic, description, target_grade, materials)
result["content_types"]["course_presentation"] = presentation_data
except Exception as e:
result["content_types"]["course_presentation"] = {"error": str(e)}
# 4. Flashcards
try:
flashcards_data = await self._generate_flashcards(topic, description, target_grade, materials)
result["content_types"]["flashcards"] = flashcards_data
except Exception as e:
result["content_types"]["flashcards"] = {"error": str(e)}
# 5. Timeline
try:
timeline_data = await self._generate_timeline(topic, description, target_grade, materials)
result["content_types"]["timeline"] = timeline_data
except Exception as e:
result["content_types"]["timeline"] = {"error": str(e)}
# 6. Drag and Drop
try:
dragdrop_data = await self._generate_drag_drop(topic, description, target_grade, materials)
result["content_types"]["drag_drop"] = dragdrop_data
except Exception as e:
result["content_types"]["drag_drop"] = {"error": str(e)}
# 7. Fill in the Blanks
try:
fillblanks_data = await self._generate_fill_blanks(topic, description, target_grade, materials)
result["content_types"]["fill_blanks"] = fillblanks_data
except Exception as e:
result["content_types"]["fill_blanks"] = {"error": str(e)}
# 8. Memory Game
try:
memory_data = await self._generate_memory(topic, description, target_grade, materials)
result["content_types"]["memory"] = memory_data
except Exception as e:
result["content_types"]["memory"] = {"error": str(e)}
return result
async def _generate_quiz(
self,
topic: str,
description: Optional[str],
target_grade: str,
materials: List[Dict[str, Any]]
) -> Dict[str, Any]:
"""Generate Quiz content"""
questions = await self.claude.generate_quiz_questions(
topic=topic,
materials=materials,
target_grade=target_grade,
num_questions=10
)
return {
"type": "quiz",
"title": f"Quiz: {topic}",
"description": description or f"Teste dein Wissen über {topic}",
"questions": questions
}
async def _generate_interactive_video(
self,
topic: str,
description: Optional[str],
target_grade: str,
materials: List[Dict[str, Any]],
videos: List[Dict[str, Any]]
) -> Dict[str, Any]:
"""Generate Interactive Video content"""
# Wähle bestes Video (falls vorhanden)
if not videos or len(videos) == 0:
return {
"error": "Keine Videos gefunden",
"note": "Lehrer muss manuell Video-URL eingeben"
}
best_video = videos[0] # Nimm erstes Video
# Hole Transkript
video_id = best_video.get("video_id")
if not video_id or video_id == "EXAMPLE_VIDEO_ID":
# Fallback: Generiere generische Interaktionen
return {
"type": "interactive-video",
"title": f"Interaktives Video: {topic}",
"videoUrl": "https://www.youtube.com/watch?v=EXAMPLE",
"description": description or f"Lerne über {topic} mit diesem interaktiven Video",
"interactions": [
{
"time": "01:00",
"type": "question",
"title": "Verständnisfrage",
"content": f"Was ist das Hauptthema dieses Videos über {topic}?"
},
{
"time": "03:00",
"type": "info",
"title": "Wichtiger Hinweis",
"content": "Achte auf die wichtigsten Konzepte, die jetzt erklärt werden."
}
],
"note": "Generische Interaktionen - Lehrer sollte echte Video-URL eingeben"
}
# Echtes Video mit Transkript
transcript_data = await self.youtube.get_video_transcript(video_id)
if transcript_data:
# Generate interactions using Claude
interactions = await self.youtube.generate_video_interactions_with_claude(
video_id=video_id,
topic=topic,
transcript_data=transcript_data["transcript"],
claude_service=self.claude,
num_interactions=5
)
else:
# Fallback ohne Transkript
interactions = []
return {
"type": "interactive-video",
"title": best_video.get("title", f"Video: {topic}"),
"videoUrl": best_video.get("url"),
"description": description or f"Interaktives Video über {topic}",
"interactions": interactions
}
async def _generate_presentation(
self,
topic: str,
description: Optional[str],
target_grade: str,
materials: List[Dict[str, Any]]
) -> Dict[str, Any]:
"""Generate Course Presentation content"""
slides = await self.claude.generate_presentation_slides(
topic=topic,
materials=materials,
target_grade=target_grade,
num_slides=6
)
# Add IDs to slides
for i, slide in enumerate(slides, 1):
slide["id"] = i
return {
"type": "course-presentation",
"title": f"Präsentation: {topic}",
"description": description or f"Lerne alles über {topic}",
"slides": slides
}
async def _generate_flashcards(
self,
topic: str,
description: Optional[str],
target_grade: str,
materials: List[Dict[str, Any]]
) -> Dict[str, Any]:
"""Generate Flashcards content"""
cards = await self.claude.generate_flashcards(
topic=topic,
materials=materials,
target_grade=target_grade,
num_cards=15
)
# Add IDs to cards
for i, card in enumerate(cards, 1):
card["id"] = i
return {
"type": "flashcards",
"title": f"Lernkarten: {topic}",
"description": description or f"Wiederhole wichtige Begriffe zu {topic}",
"cards": cards
}
async def _generate_timeline(
self,
topic: str,
description: Optional[str],
target_grade: str,
materials: List[Dict[str, Any]]
) -> Dict[str, Any]:
"""Generate Timeline content"""
events = await self.claude.generate_timeline_events(
topic=topic,
materials=materials,
target_grade=target_grade
)
# Add IDs to events
for i, event in enumerate(events, 1):
event["id"] = i
return {
"type": "timeline",
"title": f"Zeitleiste: {topic}",
"description": description or f"Chronologie von {topic}",
"events": events
}
async def _generate_drag_drop(
self,
topic: str,
description: Optional[str],
target_grade: str,
materials: List[Dict[str, Any]]
) -> Dict[str, Any]:
"""Generate Drag and Drop content"""
exercise = await self.claude.generate_drag_drop_exercise(
topic=topic,
materials=materials,
target_grade=target_grade
)
return {
"type": "drag-drop",
"title": exercise.get("title", f"Zuordnung: {topic}"),
"question": exercise.get("question", "Ziehe die Elemente in die richtigen Kategorien."),
"zones": exercise.get("zones", []),
"draggables": exercise.get("draggables", [])
}
async def _generate_fill_blanks(
self,
topic: str,
description: Optional[str],
target_grade: str,
materials: List[Dict[str, Any]]
) -> Dict[str, Any]:
"""Generate Fill in the Blanks content"""
exercise = await self.claude.generate_fill_blanks_text(
topic=topic,
materials=materials,
target_grade=target_grade
)
return {
"type": "fill-blanks",
"title": exercise.get("title", f"Lückentext: {topic}"),
"text": exercise.get("text", ""),
"hints": exercise.get("hints", "")
}
async def _generate_memory(
self,
topic: str,
description: Optional[str],
target_grade: str,
materials: List[Dict[str, Any]]
) -> Dict[str, Any]:
"""Generate Memory Game content"""
pairs = await self.claude.generate_memory_pairs(
topic=topic,
materials=materials,
target_grade=target_grade,
num_pairs=8
)
# Add IDs to pairs
for i, pair in enumerate(pairs, 1):
pair["id"] = i
return {
"type": "memory",
"title": f"Memory: {topic}",
"description": description or f"Finde die passenden Paare zu {topic}",
"pairs": pairs
}

View File

@@ -0,0 +1,197 @@
"""
Material Analyzer
Analysiert hochgeladene Lernmaterialien (PDF, Images, DOCX)
"""
from typing import Dict, Any, Optional
import io
from PyPDF2 import PdfReader
from PIL import Image
import pytesseract
from docx import Document
import mammoth
class MaterialAnalyzer:
"""Analyzer für verschiedene Material-Typen"""
async def analyze(self, filename: str, content: bytes) -> Dict[str, Any]:
"""
Analyze uploaded material
Args:
filename: Name der Datei
content: Datei-Content als bytes
Returns:
Strukturierte Material-Daten
"""
file_ext = filename.lower().split('.')[-1]
try:
if file_ext == 'pdf':
return await self._analyze_pdf(filename, content)
elif file_ext in ['png', 'jpg', 'jpeg']:
return await self._analyze_image(filename, content)
elif file_ext == 'docx':
return await self._analyze_docx(filename, content)
elif file_ext == 'txt':
return await self._analyze_text(filename, content)
else:
return {
"filename": filename,
"type": "unknown",
"content": "",
"error": f"Unsupported file type: {file_ext}"
}
except Exception as e:
return {
"filename": filename,
"type": "error",
"content": "",
"error": str(e)
}
async def _analyze_pdf(self, filename: str, content: bytes) -> Dict[str, Any]:
"""Extract text from PDF"""
try:
pdf_file = io.BytesIO(content)
reader = PdfReader(pdf_file)
text_content = []
num_pages = len(reader.pages)
for page_num, page in enumerate(reader.pages, 1):
text = page.extract_text()
if text.strip():
text_content.append(f"--- Seite {page_num} ---")
text_content.append(text)
return {
"filename": filename,
"type": "pdf",
"num_pages": num_pages,
"content": "\n".join(text_content),
"success": True
}
except Exception as e:
return {
"filename": filename,
"type": "pdf",
"content": "",
"error": f"PDF extraction failed: {str(e)}"
}
async def _analyze_image(self, filename: str, content: bytes) -> Dict[str, Any]:
"""
Analyze image - OCR for text extraction
Note: Requires tesseract installed
"""
try:
image = Image.open(io.BytesIO(content))
# Image metadata
width, height = image.size
mode = image.mode
# OCR text extraction (if tesseract available)
ocr_text = ""
try:
ocr_text = pytesseract.image_to_string(image, lang='deu')
except Exception as ocr_error:
ocr_text = f"[OCR not available: {str(ocr_error)}]"
return {
"filename": filename,
"type": "image",
"width": width,
"height": height,
"mode": mode,
"content": ocr_text,
"note": "Image als Diagramm/Skizze erkannt. OCR Text extrahiert.",
"success": True
}
except Exception as e:
return {
"filename": filename,
"type": "image",
"content": "",
"error": f"Image analysis failed: {str(e)}"
}
async def _analyze_docx(self, filename: str, content: bytes) -> Dict[str, Any]:
"""Extract text from DOCX"""
try:
# Methode 1: python-docx
try:
doc = Document(io.BytesIO(content))
paragraphs = []
for para in doc.paragraphs:
if para.text.strip():
paragraphs.append(para.text)
text_content = "\n".join(paragraphs)
except:
# Methode 2: mammoth (bessere Formatierung)
result = mammoth.convert_to_text(io.BytesIO(content))
text_content = result.value
return {
"filename": filename,
"type": "docx",
"content": text_content,
"success": True
}
except Exception as e:
return {
"filename": filename,
"type": "docx",
"content": "",
"error": f"DOCX extraction failed: {str(e)}"
}
async def _analyze_text(self, filename: str, content: bytes) -> Dict[str, Any]:
"""Extract text from plain text file"""
try:
text = content.decode('utf-8')
return {
"filename": filename,
"type": "text",
"content": text,
"success": True
}
except Exception as e:
return {
"filename": filename,
"type": "text",
"content": "",
"error": f"Text extraction failed: {str(e)}"
}
def extract_key_concepts(self, materials: list[Dict[str, Any]]) -> list[str]:
"""
Extract key concepts from materials
Simple heuristic: Find capitalized words, frequent terms
In production: Use Claude AI for better concept extraction
"""
all_text = " ".join([m.get("content", "") for m in materials])
# Simple extraction: Capitalized words (potential concepts)
import re
words = re.findall(r'\b[A-ZÄÖÜ][a-zäöüß]+\b', all_text)
# Count frequency
from collections import Counter
word_counts = Counter(words)
# Return top 20 concepts
concepts = [word for word, count in word_counts.most_common(20)]
return concepts

View File

@@ -0,0 +1,243 @@
"""
YouTube Service
Video-Suche und Transkript-Analyse für Interactive Video Content
"""
import os
from typing import List, Dict, Any, Optional
from youtube_transcript_api import YouTubeTranscriptApi
import re
class YouTubeService:
"""YouTube Integration Service"""
def __init__(self):
# Kein API Key nötig für Transcript API
# Für Video-Suche würden wir YouTube Data API brauchen (optional)
self.youtube_api_key = os.getenv("YOUTUBE_API_KEY")
def is_configured(self) -> bool:
"""Check if YouTube API is configured"""
# Transcript API funktioniert ohne Key
return True
async def search_videos(
self,
query: str,
max_results: int = 5
) -> List[Dict[str, Any]]:
"""
Search YouTube videos
NOTE: Ohne YouTube Data API Key nutzen wir eine Fallback-Methode
oder können später die API integrieren.
"""
# TODO: YouTube Data API Integration
# Für jetzt: Fallback mit bekannten Educational Channels
# Beispiel: Terra X, SimpleClub, MrWissen2go etc.
# In Production würde hier die YouTube Data API search.list verwendet
return [
{
"video_id": "EXAMPLE_VIDEO_ID",
"title": f"Video zum Thema: {query}",
"channel": "Educational Channel",
"url": f"https://www.youtube.com/watch?v=EXAMPLE_VIDEO_ID",
"has_transcript": False,
"note": "Use real YouTube Data API in production"
}
]
async def get_video_transcript(
self,
video_id: str,
languages: List[str] = ["de", "en"]
) -> Optional[Dict[str, Any]]:
"""
Get video transcript
Args:
video_id: YouTube video ID
languages: Preferred languages (default: German, English)
Returns:
Transcript data with timestamps
"""
try:
# Hole Transkript
transcript_list = YouTubeTranscriptApi.list_transcripts(video_id)
# Versuche bevorzugte Sprachen
transcript = None
for lang in languages:
try:
transcript = transcript_list.find_transcript([lang])
break
except:
continue
# Falls keine bevorzugte Sprache, nimm die erste verfügbare
if not transcript:
transcript = transcript_list.find_transcript(
transcript_list._manually_created_transcripts.keys()
)
# Hole Transcript-Daten
transcript_data = transcript.fetch()
return {
"video_id": video_id,
"language": transcript.language_code,
"is_generated": transcript.is_generated,
"transcript": transcript_data
}
except Exception as e:
print(f"Error fetching transcript for {video_id}: {e}")
return None
def extract_key_moments(
self,
transcript_data: List[Dict[str, Any]],
num_moments: int = 5
) -> List[Dict[str, Any]]:
"""
Extract key moments from transcript
Einfache Heuristik: Nimm Momente gleichmäßig verteilt
In einer verbesserten Version: Nutze Claude AI um wichtige Momente zu identifizieren
"""
if not transcript_data:
return []
total_duration = transcript_data[-1]['start'] + transcript_data[-1]['duration']
interval = total_duration / (num_moments + 1)
key_moments = []
for i in range(1, num_moments + 1):
target_time = interval * i
# Finde nächsten Transcript-Eintrag
closest_entry = min(
transcript_data,
key=lambda x: abs(x['start'] - target_time)
)
key_moments.append({
"time": self._format_timestamp(closest_entry['start']),
"seconds": closest_entry['start'],
"text": closest_entry['text']
})
return key_moments
async def generate_video_interactions_with_claude(
self,
video_id: str,
topic: str,
transcript_data: List[Dict[str, Any]],
claude_service: Any,
num_interactions: int = 5
) -> List[Dict[str, Any]]:
"""
Generate interactive elements for video using Claude AI
Args:
video_id: YouTube video ID
topic: Video topic
transcript_data: Full transcript
claude_service: Claude service instance
num_interactions: Number of interactions to generate
Returns:
List of interactions with timestamps
"""
# Erstelle Transkript-Text
full_text = self._create_transcript_text(transcript_data)
prompt = f"""Analysiere dieses Video-Transkript zum Thema "{topic}" und identifiziere {num_interactions} wichtige Momente für interaktive Elemente.
Transkript:
{full_text[:8000]} # Limit für Token-Effizienz
Für jeden Moment, erstelle:
1. Einen Zeitstempel (in Sekunden)
2. Einen Interaktionstyp (question, info, oder link)
3. Einen Titel
4. Den Inhalt (Frage, Information, oder URL)
Formatiere als JSON-Array:
[
{{
"seconds": 45,
"type": "question",
"title": "Verständnisfrage",
"content": "Was ist die Hauptfunktion...?"
}},
{{
"seconds": 120,
"type": "info",
"title": "Wichtiger Hinweis",
"content": "Beachte dass..."
}}
]
Wähle Momente die:
- Wichtige Konzepte einführen
- Verständnis testen
- Zusatzinformationen bieten
Nur JSON zurückgeben."""
response = await claude_service.generate_content(
prompt=prompt,
system_prompt="Du bist ein Experte für interaktive Video-Didaktik."
)
# Parse JSON
import json
import re
json_match = re.search(r'\[.*\]', response, re.DOTALL)
if json_match:
interactions = json.loads(json_match.group())
# Konvertiere Sekunden zu mm:ss Format
for interaction in interactions:
interaction['time'] = self._format_timestamp(interaction['seconds'])
return interactions
return []
def _create_transcript_text(self, transcript_data: List[Dict[str, Any]]) -> str:
"""Create readable text from transcript"""
lines = []
for entry in transcript_data:
timestamp = self._format_timestamp(entry['start'])
lines.append(f"[{timestamp}] {entry['text']}")
return "\n".join(lines)
def _format_timestamp(self, seconds: float) -> str:
"""Format seconds to mm:ss"""
minutes = int(seconds // 60)
secs = int(seconds % 60)
return f"{minutes:02d}:{secs:02d}"
def extract_video_id_from_url(self, url: str) -> Optional[str]:
"""Extract video ID from YouTube URL"""
patterns = [
r'(?:youtube\.com\/watch\?v=)([^&]+)',
r'(?:youtu\.be\/)([^?]+)',
r'(?:youtube\.com\/embed\/)([^?]+)'
]
for pattern in patterns:
match = re.search(pattern, url)
if match:
return match.group(1)
return None