A previous `git pull --rebase origin main` dropped 177 local commits,
losing 3400+ files across admin-v2, backend, studio-v2, website,
klausur-service, and many other services. The partial restore attempt
(660295e2) only recovered some files.
This commit restores all missing files from pre-rebase ref 98933f5e
while preserving post-rebase additions (night-scheduler, night-mode UI,
NightModeWidget dashboard integration).
Restored features include:
- AI Module Sidebar (FAB), OCR Labeling, OCR Compare
- GPU Dashboard, RAG Pipeline, Magic Help
- Klausur-Korrektur (8 files), Abitur-Archiv (5+ files)
- Companion, Zeugnisse-Crawler, Screen Flow
- Full backend, studio-v2, website, klausur-service
- All compliance SDKs, agent-core, voice-service
- CI/CD configs, documentation, scripts
Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
537 lines
19 KiB
Python
537 lines
19 KiB
Python
"""
|
|
BreakPilot Meeting Minutes Generator
|
|
|
|
Generiert KI-basierte Meeting-Protokolle aus Transkriptionen.
|
|
Nutzt das LLM Gateway (Ollama/vLLM/Anthropic) fuer lokale Verarbeitung.
|
|
|
|
Lizenz: MIT (kommerziell nutzbar)
|
|
"""
|
|
|
|
import os
|
|
import json
|
|
import logging
|
|
import httpx
|
|
from datetime import datetime
|
|
from typing import Optional, List
|
|
from dataclasses import dataclass, asdict
|
|
from pydantic import BaseModel, Field
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
# ==========================================
|
|
# CONFIGURATION
|
|
# ==========================================
|
|
|
|
LLM_GATEWAY_URL = os.getenv("LLM_GATEWAY_URL", "http://localhost:8002")
|
|
LLM_MODEL = os.getenv("MEETING_MINUTES_MODEL", "breakpilot-teacher-8b")
|
|
LLM_TIMEOUT = int(os.getenv("LLM_TIMEOUT", "120"))
|
|
|
|
|
|
# ==========================================
|
|
# PYDANTIC MODELS
|
|
# ==========================================
|
|
|
|
class ActionItem(BaseModel):
|
|
"""Ein Aktionspunkt aus dem Meeting."""
|
|
task: str = Field(..., description="Die zu erledigende Aufgabe")
|
|
assignee: Optional[str] = Field(None, description="Verantwortliche Person (SPEAKER_XX oder Name)")
|
|
deadline: Optional[str] = Field(None, description="Faelligkeit, falls erwaehnt")
|
|
priority: str = Field(default="normal", description="Prioritaet: high, normal, low")
|
|
|
|
|
|
class Decision(BaseModel):
|
|
"""Eine getroffene Entscheidung."""
|
|
topic: str = Field(..., description="Thema der Entscheidung")
|
|
decision: str = Field(..., description="Die getroffene Entscheidung")
|
|
rationale: Optional[str] = Field(None, description="Begruendung, falls erwaehnt")
|
|
|
|
|
|
class TopicSummary(BaseModel):
|
|
"""Zusammenfassung eines besprochenen Themas."""
|
|
title: str = Field(..., description="Titel des Themas")
|
|
summary: str = Field(..., description="Kurze Zusammenfassung")
|
|
participants: List[str] = Field(default_factory=list, description="Beteiligte Sprecher")
|
|
duration_estimate: Optional[str] = Field(None, description="Geschaetzte Dauer")
|
|
|
|
|
|
class MeetingMinutes(BaseModel):
|
|
"""Vollstaendiges Meeting-Protokoll."""
|
|
id: str
|
|
recording_id: str
|
|
transcription_id: str
|
|
|
|
# Metadaten
|
|
title: str = Field(..., description="Titel des Meetings")
|
|
date: str = Field(..., description="Datum des Meetings")
|
|
duration_minutes: Optional[int] = Field(None, description="Dauer in Minuten")
|
|
participant_count: int = Field(default=0, description="Anzahl Teilnehmer")
|
|
language: str = Field(default="de", description="Sprache")
|
|
|
|
# Inhalt
|
|
summary: str = Field(..., description="Zusammenfassung in 3-5 Saetzen")
|
|
topics: List[TopicSummary] = Field(default_factory=list, description="Besprochene Themen")
|
|
decisions: List[Decision] = Field(default_factory=list, description="Getroffene Entscheidungen")
|
|
action_items: List[ActionItem] = Field(default_factory=list, description="Aktionspunkte/TODOs")
|
|
open_questions: List[str] = Field(default_factory=list, description="Offene Fragen")
|
|
|
|
# KI-Metadaten
|
|
model_used: str = Field(..., description="Verwendetes LLM")
|
|
generated_at: datetime = Field(default_factory=datetime.utcnow)
|
|
generation_time_seconds: Optional[float] = Field(None, description="Generierungszeit")
|
|
|
|
# Status
|
|
status: str = Field(default="completed", description="Status: pending, processing, completed, failed")
|
|
error_message: Optional[str] = Field(None, description="Fehlermeldung bei Status=failed")
|
|
|
|
|
|
class MinutesGenerationRequest(BaseModel):
|
|
"""Anfrage zur Protokoll-Generierung."""
|
|
title: Optional[str] = Field(None, description="Meeting-Titel (optional, wird generiert)")
|
|
model: str = Field(default=LLM_MODEL, description="LLM Modell")
|
|
include_action_items: bool = Field(default=True, description="Action Items extrahieren")
|
|
include_decisions: bool = Field(default=True, description="Entscheidungen extrahieren")
|
|
max_topics: int = Field(default=10, description="Maximale Anzahl Themen")
|
|
|
|
|
|
# ==========================================
|
|
# PROMPTS (German, Education Context)
|
|
# ==========================================
|
|
|
|
SYSTEM_PROMPT = """Du bist ein Assistent für die Erstellung von Meeting-Protokollen in deutschen Bildungseinrichtungen (Schulen, Universitäten).
|
|
|
|
Deine Aufgabe ist es, aus einer Transkription ein strukturiertes Protokoll zu erstellen.
|
|
|
|
WICHTIG:
|
|
- Schreibe professionell und sachlich auf Deutsch
|
|
- Verwende die formelle Anrede (Sie)
|
|
- Halte dich an die Fakten der Transkription
|
|
- Erfinde KEINE Informationen, die nicht in der Transkription stehen
|
|
- Sprecher werden als SPEAKER_00, SPEAKER_01 etc. bezeichnet - behalte diese Bezeichnungen bei
|
|
- Wenn du dir bei etwas unsicher bist, schreibe "Unklar:" davor
|
|
|
|
Format für die Ausgabe (JSON):
|
|
{
|
|
"summary": "3-5 Sätze Zusammenfassung",
|
|
"topics": [
|
|
{"title": "Thema", "summary": "Kurzbeschreibung", "participants": ["SPEAKER_00"]}
|
|
],
|
|
"decisions": [
|
|
{"topic": "Thema", "decision": "Was wurde entschieden", "rationale": "Begründung oder null"}
|
|
],
|
|
"action_items": [
|
|
{"task": "Aufgabe", "assignee": "SPEAKER_XX oder null", "deadline": "Datum oder null", "priority": "high/normal/low"}
|
|
],
|
|
"open_questions": ["Frage 1", "Frage 2"]
|
|
}"""
|
|
|
|
EXTRACTION_PROMPT = """Analysiere folgende Meeting-Transkription und erstelle ein strukturiertes Protokoll.
|
|
|
|
Meeting-Titel: {title}
|
|
Datum: {date}
|
|
Dauer: {duration} Minuten
|
|
Teilnehmer: {participant_count}
|
|
|
|
--- TRANSKRIPTION ---
|
|
{transcript}
|
|
--- ENDE TRANSKRIPTION ---
|
|
|
|
Erstelle ein JSON-Protokoll mit:
|
|
1. summary: Zusammenfassung in 3-5 Sätzen
|
|
2. topics: Liste der besprochenen Themen (maximal {max_topics})
|
|
3. decisions: Alle getroffenen Entscheidungen
|
|
4. action_items: Alle Aufgaben/TODOs mit Verantwortlichen (falls genannt)
|
|
5. open_questions: Offene Fragen, die nicht beantwortet wurden
|
|
|
|
Antworte NUR mit dem JSON-Objekt, ohne zusätzlichen Text."""
|
|
|
|
|
|
# ==========================================
|
|
# MEETING MINUTES GENERATOR
|
|
# ==========================================
|
|
|
|
class MeetingMinutesGenerator:
|
|
"""Generator fuer Meeting-Protokolle aus Transkriptionen."""
|
|
|
|
def __init__(self, llm_gateway_url: str = LLM_GATEWAY_URL):
|
|
self.llm_gateway_url = llm_gateway_url
|
|
self._client: Optional[httpx.AsyncClient] = None
|
|
|
|
async def get_client(self) -> httpx.AsyncClient:
|
|
"""Lazy initialization des HTTP Clients."""
|
|
if self._client is None:
|
|
self._client = httpx.AsyncClient(timeout=LLM_TIMEOUT)
|
|
return self._client
|
|
|
|
async def close(self):
|
|
"""Schliesst den HTTP Client."""
|
|
if self._client:
|
|
await self._client.aclose()
|
|
self._client = None
|
|
|
|
async def _call_llm(
|
|
self,
|
|
messages: List[dict],
|
|
model: str = LLM_MODEL,
|
|
temperature: float = 0.3,
|
|
max_tokens: int = 4096
|
|
) -> str:
|
|
"""Ruft das LLM Gateway auf."""
|
|
client = await self.get_client()
|
|
|
|
payload = {
|
|
"model": model,
|
|
"messages": messages,
|
|
"temperature": temperature,
|
|
"max_tokens": max_tokens,
|
|
"stream": False
|
|
}
|
|
|
|
try:
|
|
response = await client.post(
|
|
f"{self.llm_gateway_url}/v1/chat/completions",
|
|
json=payload,
|
|
timeout=LLM_TIMEOUT
|
|
)
|
|
response.raise_for_status()
|
|
data = response.json()
|
|
|
|
content = data.get("choices", [{}])[0].get("message", {}).get("content", "")
|
|
return content
|
|
|
|
except httpx.TimeoutException:
|
|
logger.error("LLM Gateway timeout")
|
|
raise RuntimeError("LLM Gateway antwortet nicht (Timeout)")
|
|
except httpx.HTTPStatusError as e:
|
|
logger.error(f"LLM Gateway error: {e.response.status_code}")
|
|
raise RuntimeError(f"LLM Gateway Fehler: {e.response.status_code}")
|
|
except Exception as e:
|
|
logger.error(f"LLM call failed: {e}")
|
|
raise RuntimeError(f"LLM Aufruf fehlgeschlagen: {str(e)}")
|
|
|
|
def _parse_llm_response(self, response: str) -> dict:
|
|
"""Parst die LLM-Antwort als JSON."""
|
|
# Versuche JSON aus der Antwort zu extrahieren
|
|
response = response.strip()
|
|
|
|
# Entferne eventuelle Markdown Code-Bloecke
|
|
if response.startswith("```json"):
|
|
response = response[7:]
|
|
if response.startswith("```"):
|
|
response = response[3:]
|
|
if response.endswith("```"):
|
|
response = response[:-3]
|
|
|
|
response = response.strip()
|
|
|
|
try:
|
|
return json.loads(response)
|
|
except json.JSONDecodeError as e:
|
|
logger.warning(f"JSON parse error: {e}. Response: {response[:200]}...")
|
|
# Fallback: Leeres Protokoll
|
|
return {
|
|
"summary": "Protokoll konnte nicht automatisch erstellt werden.",
|
|
"topics": [],
|
|
"decisions": [],
|
|
"action_items": [],
|
|
"open_questions": []
|
|
}
|
|
|
|
async def generate(
|
|
self,
|
|
transcript: str,
|
|
recording_id: str,
|
|
transcription_id: str,
|
|
title: Optional[str] = None,
|
|
date: Optional[str] = None,
|
|
duration_minutes: Optional[int] = None,
|
|
participant_count: int = 0,
|
|
model: str = LLM_MODEL,
|
|
max_topics: int = 10,
|
|
include_action_items: bool = True,
|
|
include_decisions: bool = True
|
|
) -> MeetingMinutes:
|
|
"""
|
|
Generiert Meeting Minutes aus einer Transkription.
|
|
|
|
Args:
|
|
transcript: Die vollstaendige Transkription
|
|
recording_id: ID der Aufzeichnung
|
|
transcription_id: ID der Transkription
|
|
title: Meeting-Titel (wird generiert falls nicht angegeben)
|
|
date: Datum des Meetings
|
|
duration_minutes: Dauer in Minuten
|
|
participant_count: Anzahl Teilnehmer
|
|
model: LLM Modell
|
|
max_topics: Maximale Anzahl Themen
|
|
include_action_items: Action Items extrahieren
|
|
include_decisions: Entscheidungen extrahieren
|
|
|
|
Returns:
|
|
MeetingMinutes: Das generierte Protokoll
|
|
"""
|
|
import uuid
|
|
import time
|
|
|
|
start_time = time.time()
|
|
minutes_id = str(uuid.uuid4())
|
|
|
|
# Defaults
|
|
if not title:
|
|
title = f"Meeting vom {date or datetime.utcnow().strftime('%d.%m.%Y')}"
|
|
if not date:
|
|
date = datetime.utcnow().strftime("%d.%m.%Y")
|
|
|
|
# Transkription kuerzen falls zu lang (max ~8000 Tokens ~ 32000 chars)
|
|
max_chars = 32000
|
|
if len(transcript) > max_chars:
|
|
logger.warning(f"Transcript too long ({len(transcript)} chars), truncating...")
|
|
transcript = transcript[:max_chars] + "\n\n[... Transkription gekürzt ...]"
|
|
|
|
# Prompt erstellen
|
|
user_prompt = EXTRACTION_PROMPT.format(
|
|
title=title,
|
|
date=date,
|
|
duration=duration_minutes or "unbekannt",
|
|
participant_count=participant_count,
|
|
transcript=transcript,
|
|
max_topics=max_topics
|
|
)
|
|
|
|
messages = [
|
|
{"role": "system", "content": SYSTEM_PROMPT},
|
|
{"role": "user", "content": user_prompt}
|
|
]
|
|
|
|
try:
|
|
# LLM aufrufen
|
|
logger.info(f"Generating minutes for recording {recording_id} using {model}")
|
|
response = await self._call_llm(messages, model=model)
|
|
|
|
# Antwort parsen
|
|
parsed = self._parse_llm_response(response)
|
|
|
|
generation_time = time.time() - start_time
|
|
|
|
# MeetingMinutes erstellen
|
|
minutes = MeetingMinutes(
|
|
id=minutes_id,
|
|
recording_id=recording_id,
|
|
transcription_id=transcription_id,
|
|
title=title,
|
|
date=date,
|
|
duration_minutes=duration_minutes,
|
|
participant_count=participant_count,
|
|
language="de",
|
|
summary=parsed.get("summary", "Zusammenfassung nicht verfügbar."),
|
|
topics=[
|
|
TopicSummary(**t) for t in parsed.get("topics", [])
|
|
] if parsed.get("topics") else [],
|
|
decisions=[
|
|
Decision(**d) for d in parsed.get("decisions", [])
|
|
] if include_decisions and parsed.get("decisions") else [],
|
|
action_items=[
|
|
ActionItem(**a) for a in parsed.get("action_items", [])
|
|
] if include_action_items and parsed.get("action_items") else [],
|
|
open_questions=parsed.get("open_questions", []),
|
|
model_used=model,
|
|
generated_at=datetime.utcnow(),
|
|
generation_time_seconds=round(generation_time, 2),
|
|
status="completed"
|
|
)
|
|
|
|
logger.info(f"Minutes generated in {generation_time:.2f}s: {len(minutes.topics)} topics, {len(minutes.action_items)} action items")
|
|
|
|
return minutes
|
|
|
|
except Exception as e:
|
|
logger.error(f"Minutes generation failed: {e}")
|
|
return MeetingMinutes(
|
|
id=minutes_id,
|
|
recording_id=recording_id,
|
|
transcription_id=transcription_id,
|
|
title=title,
|
|
date=date,
|
|
duration_minutes=duration_minutes,
|
|
participant_count=participant_count,
|
|
language="de",
|
|
summary="",
|
|
model_used=model,
|
|
status="failed",
|
|
error_message=str(e)
|
|
)
|
|
|
|
|
|
# ==========================================
|
|
# EXPORT FUNCTIONS
|
|
# ==========================================
|
|
|
|
def minutes_to_markdown(minutes: MeetingMinutes) -> str:
|
|
"""Exportiert Meeting Minutes als Markdown."""
|
|
md = f"""# {minutes.title}
|
|
|
|
**Datum:** {minutes.date}
|
|
**Dauer:** {minutes.duration_minutes or 'unbekannt'} Minuten
|
|
**Teilnehmer:** {minutes.participant_count}
|
|
|
|
---
|
|
|
|
## Zusammenfassung
|
|
|
|
{minutes.summary}
|
|
|
|
---
|
|
|
|
## Besprochene Themen
|
|
|
|
"""
|
|
|
|
for i, topic in enumerate(minutes.topics, 1):
|
|
md += f"### {i}. {topic.title}\n\n"
|
|
md += f"{topic.summary}\n\n"
|
|
if topic.participants:
|
|
md += f"*Beteiligte: {', '.join(topic.participants)}*\n\n"
|
|
|
|
if minutes.decisions:
|
|
md += "---\n\n## Entscheidungen\n\n"
|
|
for decision in minutes.decisions:
|
|
md += f"- **{decision.topic}:** {decision.decision}"
|
|
if decision.rationale:
|
|
md += f" *(Begründung: {decision.rationale})*"
|
|
md += "\n"
|
|
md += "\n"
|
|
|
|
if minutes.action_items:
|
|
md += "---\n\n## Action Items\n\n"
|
|
md += "| Aufgabe | Verantwortlich | Fällig | Priorität |\n"
|
|
md += "|---------|----------------|--------|----------|\n"
|
|
for item in minutes.action_items:
|
|
md += f"| {item.task} | {item.assignee or '-'} | {item.deadline or '-'} | {item.priority} |\n"
|
|
md += "\n"
|
|
|
|
if minutes.open_questions:
|
|
md += "---\n\n## Offene Fragen\n\n"
|
|
for q in minutes.open_questions:
|
|
md += f"- {q}\n"
|
|
md += "\n"
|
|
|
|
md += f"""---
|
|
|
|
*Generiert am {minutes.generated_at.strftime('%d.%m.%Y um %H:%M Uhr')} mit {minutes.model_used}*
|
|
*Generierungszeit: {minutes.generation_time_seconds or 0:.1f} Sekunden*
|
|
"""
|
|
|
|
return md
|
|
|
|
|
|
def minutes_to_html(minutes: MeetingMinutes) -> str:
|
|
"""Exportiert Meeting Minutes als HTML (fuer PDF-Konvertierung)."""
|
|
html = f"""<!DOCTYPE html>
|
|
<html lang="de">
|
|
<head>
|
|
<meta charset="UTF-8">
|
|
<title>{minutes.title}</title>
|
|
<style>
|
|
body {{ font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif; max-width: 800px; margin: 0 auto; padding: 20px; }}
|
|
h1 {{ color: #1a365d; border-bottom: 2px solid #3182ce; padding-bottom: 10px; }}
|
|
h2 {{ color: #2c5282; margin-top: 30px; }}
|
|
h3 {{ color: #2d3748; }}
|
|
.meta {{ background: #f7fafc; padding: 15px; border-radius: 8px; margin-bottom: 20px; }}
|
|
.meta p {{ margin: 5px 0; }}
|
|
.summary {{ background: #ebf8ff; padding: 15px; border-left: 4px solid #3182ce; margin: 20px 0; }}
|
|
table {{ width: 100%; border-collapse: collapse; margin: 15px 0; }}
|
|
th, td {{ border: 1px solid #e2e8f0; padding: 10px; text-align: left; }}
|
|
th {{ background: #edf2f7; }}
|
|
.priority-high {{ color: #c53030; font-weight: bold; }}
|
|
.priority-normal {{ color: #2d3748; }}
|
|
.priority-low {{ color: #718096; }}
|
|
.decision {{ background: #f0fff4; padding: 10px; border-left: 4px solid #38a169; margin: 10px 0; }}
|
|
.question {{ background: #fffaf0; padding: 10px; border-left: 4px solid #dd6b20; margin: 10px 0; }}
|
|
.footer {{ margin-top: 40px; padding-top: 20px; border-top: 1px solid #e2e8f0; font-size: 0.9em; color: #718096; }}
|
|
</style>
|
|
</head>
|
|
<body>
|
|
<h1>{minutes.title}</h1>
|
|
|
|
<div class="meta">
|
|
<p><strong>Datum:</strong> {minutes.date}</p>
|
|
<p><strong>Dauer:</strong> {minutes.duration_minutes or 'unbekannt'} Minuten</p>
|
|
<p><strong>Teilnehmer:</strong> {minutes.participant_count}</p>
|
|
</div>
|
|
|
|
<h2>Zusammenfassung</h2>
|
|
<div class="summary">
|
|
<p>{minutes.summary}</p>
|
|
</div>
|
|
|
|
<h2>Besprochene Themen</h2>
|
|
"""
|
|
|
|
for i, topic in enumerate(minutes.topics, 1):
|
|
html += f""" <h3>{i}. {topic.title}</h3>
|
|
<p>{topic.summary}</p>
|
|
"""
|
|
if topic.participants:
|
|
html += f" <p><em>Beteiligte: {', '.join(topic.participants)}</em></p>\n"
|
|
|
|
if minutes.decisions:
|
|
html += " <h2>Entscheidungen</h2>\n"
|
|
for decision in minutes.decisions:
|
|
html += f""" <div class="decision">
|
|
<strong>{decision.topic}:</strong> {decision.decision}
|
|
"""
|
|
if decision.rationale:
|
|
html += f" <br><em>Begründung: {decision.rationale}</em>\n"
|
|
html += " </div>\n"
|
|
|
|
if minutes.action_items:
|
|
html += """ <h2>Action Items</h2>
|
|
<table>
|
|
<thead>
|
|
<tr><th>Aufgabe</th><th>Verantwortlich</th><th>Fällig</th><th>Priorität</th></tr>
|
|
</thead>
|
|
<tbody>
|
|
"""
|
|
for item in minutes.action_items:
|
|
priority_class = f"priority-{item.priority}"
|
|
html += f""" <tr>
|
|
<td>{item.task}</td>
|
|
<td>{item.assignee or '-'}</td>
|
|
<td>{item.deadline or '-'}</td>
|
|
<td class="{priority_class}">{item.priority}</td>
|
|
</tr>
|
|
"""
|
|
html += """ </tbody>
|
|
</table>
|
|
"""
|
|
|
|
if minutes.open_questions:
|
|
html += " <h2>Offene Fragen</h2>\n"
|
|
for q in minutes.open_questions:
|
|
html += f' <div class="question">{q}</div>\n'
|
|
|
|
html += f"""
|
|
<div class="footer">
|
|
<p>Generiert am {minutes.generated_at.strftime('%d.%m.%Y um %H:%M Uhr')} mit {minutes.model_used}</p>
|
|
<p>Generierungszeit: {minutes.generation_time_seconds or 0:.1f} Sekunden</p>
|
|
</div>
|
|
</body>
|
|
</html>
|
|
"""
|
|
|
|
return html
|
|
|
|
|
|
# ==========================================
|
|
# SINGLETON
|
|
# ==========================================
|
|
|
|
_generator: Optional[MeetingMinutesGenerator] = None
|
|
|
|
|
|
def get_minutes_generator() -> MeetingMinutesGenerator:
|
|
"""Gibt den Meeting Minutes Generator Singleton zurueck."""
|
|
global _generator
|
|
if _generator is None:
|
|
_generator = MeetingMinutesGenerator()
|
|
return _generator
|