This repository has been archived on 2026-02-15. You can view files and clone it. You cannot open issues or pull requests or push a commit.
Files
breakpilot-pwa/backend/meeting_minutes_generator.py
Benjamin Admin bfdaf63ba9 fix: Restore all files lost during destructive rebase
A previous `git pull --rebase origin main` dropped 177 local commits,
losing 3400+ files across admin-v2, backend, studio-v2, website,
klausur-service, and many other services. The partial restore attempt
(660295e2) only recovered some files.

This commit restores all missing files from pre-rebase ref 98933f5e
while preserving post-rebase additions (night-scheduler, night-mode UI,
NightModeWidget dashboard integration).

Restored features include:
- AI Module Sidebar (FAB), OCR Labeling, OCR Compare
- GPU Dashboard, RAG Pipeline, Magic Help
- Klausur-Korrektur (8 files), Abitur-Archiv (5+ files)
- Companion, Zeugnisse-Crawler, Screen Flow
- Full backend, studio-v2, website, klausur-service
- All compliance SDKs, agent-core, voice-service
- CI/CD configs, documentation, scripts

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-09 09:51:32 +01:00

537 lines
19 KiB
Python

"""
BreakPilot Meeting Minutes Generator
Generiert KI-basierte Meeting-Protokolle aus Transkriptionen.
Nutzt das LLM Gateway (Ollama/vLLM/Anthropic) fuer lokale Verarbeitung.
Lizenz: MIT (kommerziell nutzbar)
"""
import os
import json
import logging
import httpx
from datetime import datetime
from typing import Optional, List
from dataclasses import dataclass, asdict
from pydantic import BaseModel, Field
logger = logging.getLogger(__name__)
# ==========================================
# CONFIGURATION
# ==========================================
LLM_GATEWAY_URL = os.getenv("LLM_GATEWAY_URL", "http://localhost:8002")
LLM_MODEL = os.getenv("MEETING_MINUTES_MODEL", "breakpilot-teacher-8b")
LLM_TIMEOUT = int(os.getenv("LLM_TIMEOUT", "120"))
# ==========================================
# PYDANTIC MODELS
# ==========================================
class ActionItem(BaseModel):
"""Ein Aktionspunkt aus dem Meeting."""
task: str = Field(..., description="Die zu erledigende Aufgabe")
assignee: Optional[str] = Field(None, description="Verantwortliche Person (SPEAKER_XX oder Name)")
deadline: Optional[str] = Field(None, description="Faelligkeit, falls erwaehnt")
priority: str = Field(default="normal", description="Prioritaet: high, normal, low")
class Decision(BaseModel):
"""Eine getroffene Entscheidung."""
topic: str = Field(..., description="Thema der Entscheidung")
decision: str = Field(..., description="Die getroffene Entscheidung")
rationale: Optional[str] = Field(None, description="Begruendung, falls erwaehnt")
class TopicSummary(BaseModel):
"""Zusammenfassung eines besprochenen Themas."""
title: str = Field(..., description="Titel des Themas")
summary: str = Field(..., description="Kurze Zusammenfassung")
participants: List[str] = Field(default_factory=list, description="Beteiligte Sprecher")
duration_estimate: Optional[str] = Field(None, description="Geschaetzte Dauer")
class MeetingMinutes(BaseModel):
"""Vollstaendiges Meeting-Protokoll."""
id: str
recording_id: str
transcription_id: str
# Metadaten
title: str = Field(..., description="Titel des Meetings")
date: str = Field(..., description="Datum des Meetings")
duration_minutes: Optional[int] = Field(None, description="Dauer in Minuten")
participant_count: int = Field(default=0, description="Anzahl Teilnehmer")
language: str = Field(default="de", description="Sprache")
# Inhalt
summary: str = Field(..., description="Zusammenfassung in 3-5 Saetzen")
topics: List[TopicSummary] = Field(default_factory=list, description="Besprochene Themen")
decisions: List[Decision] = Field(default_factory=list, description="Getroffene Entscheidungen")
action_items: List[ActionItem] = Field(default_factory=list, description="Aktionspunkte/TODOs")
open_questions: List[str] = Field(default_factory=list, description="Offene Fragen")
# KI-Metadaten
model_used: str = Field(..., description="Verwendetes LLM")
generated_at: datetime = Field(default_factory=datetime.utcnow)
generation_time_seconds: Optional[float] = Field(None, description="Generierungszeit")
# Status
status: str = Field(default="completed", description="Status: pending, processing, completed, failed")
error_message: Optional[str] = Field(None, description="Fehlermeldung bei Status=failed")
class MinutesGenerationRequest(BaseModel):
"""Anfrage zur Protokoll-Generierung."""
title: Optional[str] = Field(None, description="Meeting-Titel (optional, wird generiert)")
model: str = Field(default=LLM_MODEL, description="LLM Modell")
include_action_items: bool = Field(default=True, description="Action Items extrahieren")
include_decisions: bool = Field(default=True, description="Entscheidungen extrahieren")
max_topics: int = Field(default=10, description="Maximale Anzahl Themen")
# ==========================================
# PROMPTS (German, Education Context)
# ==========================================
SYSTEM_PROMPT = """Du bist ein Assistent für die Erstellung von Meeting-Protokollen in deutschen Bildungseinrichtungen (Schulen, Universitäten).
Deine Aufgabe ist es, aus einer Transkription ein strukturiertes Protokoll zu erstellen.
WICHTIG:
- Schreibe professionell und sachlich auf Deutsch
- Verwende die formelle Anrede (Sie)
- Halte dich an die Fakten der Transkription
- Erfinde KEINE Informationen, die nicht in der Transkription stehen
- Sprecher werden als SPEAKER_00, SPEAKER_01 etc. bezeichnet - behalte diese Bezeichnungen bei
- Wenn du dir bei etwas unsicher bist, schreibe "Unklar:" davor
Format für die Ausgabe (JSON):
{
"summary": "3-5 Sätze Zusammenfassung",
"topics": [
{"title": "Thema", "summary": "Kurzbeschreibung", "participants": ["SPEAKER_00"]}
],
"decisions": [
{"topic": "Thema", "decision": "Was wurde entschieden", "rationale": "Begründung oder null"}
],
"action_items": [
{"task": "Aufgabe", "assignee": "SPEAKER_XX oder null", "deadline": "Datum oder null", "priority": "high/normal/low"}
],
"open_questions": ["Frage 1", "Frage 2"]
}"""
EXTRACTION_PROMPT = """Analysiere folgende Meeting-Transkription und erstelle ein strukturiertes Protokoll.
Meeting-Titel: {title}
Datum: {date}
Dauer: {duration} Minuten
Teilnehmer: {participant_count}
--- TRANSKRIPTION ---
{transcript}
--- ENDE TRANSKRIPTION ---
Erstelle ein JSON-Protokoll mit:
1. summary: Zusammenfassung in 3-5 Sätzen
2. topics: Liste der besprochenen Themen (maximal {max_topics})
3. decisions: Alle getroffenen Entscheidungen
4. action_items: Alle Aufgaben/TODOs mit Verantwortlichen (falls genannt)
5. open_questions: Offene Fragen, die nicht beantwortet wurden
Antworte NUR mit dem JSON-Objekt, ohne zusätzlichen Text."""
# ==========================================
# MEETING MINUTES GENERATOR
# ==========================================
class MeetingMinutesGenerator:
"""Generator fuer Meeting-Protokolle aus Transkriptionen."""
def __init__(self, llm_gateway_url: str = LLM_GATEWAY_URL):
self.llm_gateway_url = llm_gateway_url
self._client: Optional[httpx.AsyncClient] = None
async def get_client(self) -> httpx.AsyncClient:
"""Lazy initialization des HTTP Clients."""
if self._client is None:
self._client = httpx.AsyncClient(timeout=LLM_TIMEOUT)
return self._client
async def close(self):
"""Schliesst den HTTP Client."""
if self._client:
await self._client.aclose()
self._client = None
async def _call_llm(
self,
messages: List[dict],
model: str = LLM_MODEL,
temperature: float = 0.3,
max_tokens: int = 4096
) -> str:
"""Ruft das LLM Gateway auf."""
client = await self.get_client()
payload = {
"model": model,
"messages": messages,
"temperature": temperature,
"max_tokens": max_tokens,
"stream": False
}
try:
response = await client.post(
f"{self.llm_gateway_url}/v1/chat/completions",
json=payload,
timeout=LLM_TIMEOUT
)
response.raise_for_status()
data = response.json()
content = data.get("choices", [{}])[0].get("message", {}).get("content", "")
return content
except httpx.TimeoutException:
logger.error("LLM Gateway timeout")
raise RuntimeError("LLM Gateway antwortet nicht (Timeout)")
except httpx.HTTPStatusError as e:
logger.error(f"LLM Gateway error: {e.response.status_code}")
raise RuntimeError(f"LLM Gateway Fehler: {e.response.status_code}")
except Exception as e:
logger.error(f"LLM call failed: {e}")
raise RuntimeError(f"LLM Aufruf fehlgeschlagen: {str(e)}")
def _parse_llm_response(self, response: str) -> dict:
"""Parst die LLM-Antwort als JSON."""
# Versuche JSON aus der Antwort zu extrahieren
response = response.strip()
# Entferne eventuelle Markdown Code-Bloecke
if response.startswith("```json"):
response = response[7:]
if response.startswith("```"):
response = response[3:]
if response.endswith("```"):
response = response[:-3]
response = response.strip()
try:
return json.loads(response)
except json.JSONDecodeError as e:
logger.warning(f"JSON parse error: {e}. Response: {response[:200]}...")
# Fallback: Leeres Protokoll
return {
"summary": "Protokoll konnte nicht automatisch erstellt werden.",
"topics": [],
"decisions": [],
"action_items": [],
"open_questions": []
}
async def generate(
self,
transcript: str,
recording_id: str,
transcription_id: str,
title: Optional[str] = None,
date: Optional[str] = None,
duration_minutes: Optional[int] = None,
participant_count: int = 0,
model: str = LLM_MODEL,
max_topics: int = 10,
include_action_items: bool = True,
include_decisions: bool = True
) -> MeetingMinutes:
"""
Generiert Meeting Minutes aus einer Transkription.
Args:
transcript: Die vollstaendige Transkription
recording_id: ID der Aufzeichnung
transcription_id: ID der Transkription
title: Meeting-Titel (wird generiert falls nicht angegeben)
date: Datum des Meetings
duration_minutes: Dauer in Minuten
participant_count: Anzahl Teilnehmer
model: LLM Modell
max_topics: Maximale Anzahl Themen
include_action_items: Action Items extrahieren
include_decisions: Entscheidungen extrahieren
Returns:
MeetingMinutes: Das generierte Protokoll
"""
import uuid
import time
start_time = time.time()
minutes_id = str(uuid.uuid4())
# Defaults
if not title:
title = f"Meeting vom {date or datetime.utcnow().strftime('%d.%m.%Y')}"
if not date:
date = datetime.utcnow().strftime("%d.%m.%Y")
# Transkription kuerzen falls zu lang (max ~8000 Tokens ~ 32000 chars)
max_chars = 32000
if len(transcript) > max_chars:
logger.warning(f"Transcript too long ({len(transcript)} chars), truncating...")
transcript = transcript[:max_chars] + "\n\n[... Transkription gekürzt ...]"
# Prompt erstellen
user_prompt = EXTRACTION_PROMPT.format(
title=title,
date=date,
duration=duration_minutes or "unbekannt",
participant_count=participant_count,
transcript=transcript,
max_topics=max_topics
)
messages = [
{"role": "system", "content": SYSTEM_PROMPT},
{"role": "user", "content": user_prompt}
]
try:
# LLM aufrufen
logger.info(f"Generating minutes for recording {recording_id} using {model}")
response = await self._call_llm(messages, model=model)
# Antwort parsen
parsed = self._parse_llm_response(response)
generation_time = time.time() - start_time
# MeetingMinutes erstellen
minutes = MeetingMinutes(
id=minutes_id,
recording_id=recording_id,
transcription_id=transcription_id,
title=title,
date=date,
duration_minutes=duration_minutes,
participant_count=participant_count,
language="de",
summary=parsed.get("summary", "Zusammenfassung nicht verfügbar."),
topics=[
TopicSummary(**t) for t in parsed.get("topics", [])
] if parsed.get("topics") else [],
decisions=[
Decision(**d) for d in parsed.get("decisions", [])
] if include_decisions and parsed.get("decisions") else [],
action_items=[
ActionItem(**a) for a in parsed.get("action_items", [])
] if include_action_items and parsed.get("action_items") else [],
open_questions=parsed.get("open_questions", []),
model_used=model,
generated_at=datetime.utcnow(),
generation_time_seconds=round(generation_time, 2),
status="completed"
)
logger.info(f"Minutes generated in {generation_time:.2f}s: {len(minutes.topics)} topics, {len(minutes.action_items)} action items")
return minutes
except Exception as e:
logger.error(f"Minutes generation failed: {e}")
return MeetingMinutes(
id=minutes_id,
recording_id=recording_id,
transcription_id=transcription_id,
title=title,
date=date,
duration_minutes=duration_minutes,
participant_count=participant_count,
language="de",
summary="",
model_used=model,
status="failed",
error_message=str(e)
)
# ==========================================
# EXPORT FUNCTIONS
# ==========================================
def minutes_to_markdown(minutes: MeetingMinutes) -> str:
"""Exportiert Meeting Minutes als Markdown."""
md = f"""# {minutes.title}
**Datum:** {minutes.date}
**Dauer:** {minutes.duration_minutes or 'unbekannt'} Minuten
**Teilnehmer:** {minutes.participant_count}
---
## Zusammenfassung
{minutes.summary}
---
## Besprochene Themen
"""
for i, topic in enumerate(minutes.topics, 1):
md += f"### {i}. {topic.title}\n\n"
md += f"{topic.summary}\n\n"
if topic.participants:
md += f"*Beteiligte: {', '.join(topic.participants)}*\n\n"
if minutes.decisions:
md += "---\n\n## Entscheidungen\n\n"
for decision in minutes.decisions:
md += f"- **{decision.topic}:** {decision.decision}"
if decision.rationale:
md += f" *(Begründung: {decision.rationale})*"
md += "\n"
md += "\n"
if minutes.action_items:
md += "---\n\n## Action Items\n\n"
md += "| Aufgabe | Verantwortlich | Fällig | Priorität |\n"
md += "|---------|----------------|--------|----------|\n"
for item in minutes.action_items:
md += f"| {item.task} | {item.assignee or '-'} | {item.deadline or '-'} | {item.priority} |\n"
md += "\n"
if minutes.open_questions:
md += "---\n\n## Offene Fragen\n\n"
for q in minutes.open_questions:
md += f"- {q}\n"
md += "\n"
md += f"""---
*Generiert am {minutes.generated_at.strftime('%d.%m.%Y um %H:%M Uhr')} mit {minutes.model_used}*
*Generierungszeit: {minutes.generation_time_seconds or 0:.1f} Sekunden*
"""
return md
def minutes_to_html(minutes: MeetingMinutes) -> str:
"""Exportiert Meeting Minutes als HTML (fuer PDF-Konvertierung)."""
html = f"""<!DOCTYPE html>
<html lang="de">
<head>
<meta charset="UTF-8">
<title>{minutes.title}</title>
<style>
body {{ font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif; max-width: 800px; margin: 0 auto; padding: 20px; }}
h1 {{ color: #1a365d; border-bottom: 2px solid #3182ce; padding-bottom: 10px; }}
h2 {{ color: #2c5282; margin-top: 30px; }}
h3 {{ color: #2d3748; }}
.meta {{ background: #f7fafc; padding: 15px; border-radius: 8px; margin-bottom: 20px; }}
.meta p {{ margin: 5px 0; }}
.summary {{ background: #ebf8ff; padding: 15px; border-left: 4px solid #3182ce; margin: 20px 0; }}
table {{ width: 100%; border-collapse: collapse; margin: 15px 0; }}
th, td {{ border: 1px solid #e2e8f0; padding: 10px; text-align: left; }}
th {{ background: #edf2f7; }}
.priority-high {{ color: #c53030; font-weight: bold; }}
.priority-normal {{ color: #2d3748; }}
.priority-low {{ color: #718096; }}
.decision {{ background: #f0fff4; padding: 10px; border-left: 4px solid #38a169; margin: 10px 0; }}
.question {{ background: #fffaf0; padding: 10px; border-left: 4px solid #dd6b20; margin: 10px 0; }}
.footer {{ margin-top: 40px; padding-top: 20px; border-top: 1px solid #e2e8f0; font-size: 0.9em; color: #718096; }}
</style>
</head>
<body>
<h1>{minutes.title}</h1>
<div class="meta">
<p><strong>Datum:</strong> {minutes.date}</p>
<p><strong>Dauer:</strong> {minutes.duration_minutes or 'unbekannt'} Minuten</p>
<p><strong>Teilnehmer:</strong> {minutes.participant_count}</p>
</div>
<h2>Zusammenfassung</h2>
<div class="summary">
<p>{minutes.summary}</p>
</div>
<h2>Besprochene Themen</h2>
"""
for i, topic in enumerate(minutes.topics, 1):
html += f""" <h3>{i}. {topic.title}</h3>
<p>{topic.summary}</p>
"""
if topic.participants:
html += f" <p><em>Beteiligte: {', '.join(topic.participants)}</em></p>\n"
if minutes.decisions:
html += " <h2>Entscheidungen</h2>\n"
for decision in minutes.decisions:
html += f""" <div class="decision">
<strong>{decision.topic}:</strong> {decision.decision}
"""
if decision.rationale:
html += f" <br><em>Begründung: {decision.rationale}</em>\n"
html += " </div>\n"
if minutes.action_items:
html += """ <h2>Action Items</h2>
<table>
<thead>
<tr><th>Aufgabe</th><th>Verantwortlich</th><th>Fällig</th><th>Priorität</th></tr>
</thead>
<tbody>
"""
for item in minutes.action_items:
priority_class = f"priority-{item.priority}"
html += f""" <tr>
<td>{item.task}</td>
<td>{item.assignee or '-'}</td>
<td>{item.deadline or '-'}</td>
<td class="{priority_class}">{item.priority}</td>
</tr>
"""
html += """ </tbody>
</table>
"""
if minutes.open_questions:
html += " <h2>Offene Fragen</h2>\n"
for q in minutes.open_questions:
html += f' <div class="question">{q}</div>\n'
html += f"""
<div class="footer">
<p>Generiert am {minutes.generated_at.strftime('%d.%m.%Y um %H:%M Uhr')} mit {minutes.model_used}</p>
<p>Generierungszeit: {minutes.generation_time_seconds or 0:.1f} Sekunden</p>
</div>
</body>
</html>
"""
return html
# ==========================================
# SINGLETON
# ==========================================
_generator: Optional[MeetingMinutesGenerator] = None
def get_minutes_generator() -> MeetingMinutesGenerator:
"""Gibt den Meeting Minutes Generator Singleton zurueck."""
global _generator
if _generator is None:
_generator = MeetingMinutesGenerator()
return _generator