This repository has been archived on 2026-02-15. You can view files and clone it. You cannot open issues or pull requests or push a commit.
Files
breakpilot-pwa/backend/meeting_minutes_generator.py
BreakPilot Dev 19855efacc
Some checks failed
Tests / Go Tests (push) Has been cancelled
Tests / Python Tests (push) Has been cancelled
Tests / Integration Tests (push) Has been cancelled
Tests / Go Lint (push) Has been cancelled
Tests / Python Lint (push) Has been cancelled
Tests / Security Scan (push) Has been cancelled
Tests / All Checks Passed (push) Has been cancelled
Security Scanning / Secret Scanning (push) Has been cancelled
Security Scanning / Dependency Vulnerability Scan (push) Has been cancelled
Security Scanning / Go Security Scan (push) Has been cancelled
Security Scanning / Python Security Scan (push) Has been cancelled
Security Scanning / Node.js Security Scan (push) Has been cancelled
Security Scanning / Docker Image Security (push) Has been cancelled
Security Scanning / Security Summary (push) Has been cancelled
CI/CD Pipeline / Go Tests (push) Has been cancelled
CI/CD Pipeline / Python Tests (push) Has been cancelled
CI/CD Pipeline / Website Tests (push) Has been cancelled
CI/CD Pipeline / Linting (push) Has been cancelled
CI/CD Pipeline / Security Scan (push) Has been cancelled
CI/CD Pipeline / Docker Build & Push (push) Has been cancelled
CI/CD Pipeline / Integration Tests (push) Has been cancelled
CI/CD Pipeline / Deploy to Staging (push) Has been cancelled
CI/CD Pipeline / Deploy to Production (push) Has been cancelled
CI/CD Pipeline / CI Summary (push) Has been cancelled
ci/woodpecker/manual/build-ci-image Pipeline was successful
ci/woodpecker/manual/main Pipeline failed
feat: BreakPilot PWA - Full codebase (clean push without large binaries)
All services: admin-v2, studio-v2, website, ai-compliance-sdk,
consent-service, klausur-service, voice-service, and infrastructure.
Large PDFs and compiled binaries excluded via .gitignore.
2026-02-11 13:25:58 +01:00

537 lines
19 KiB
Python

"""
BreakPilot Meeting Minutes Generator
Generiert KI-basierte Meeting-Protokolle aus Transkriptionen.
Nutzt das LLM Gateway (Ollama/vLLM/Anthropic) fuer lokale Verarbeitung.
Lizenz: MIT (kommerziell nutzbar)
"""
import os
import json
import logging
import httpx
from datetime import datetime
from typing import Optional, List
from dataclasses import dataclass, asdict
from pydantic import BaseModel, Field
logger = logging.getLogger(__name__)
# ==========================================
# CONFIGURATION
# ==========================================
LLM_GATEWAY_URL = os.getenv("LLM_GATEWAY_URL", "http://localhost:8002")
LLM_MODEL = os.getenv("MEETING_MINUTES_MODEL", "breakpilot-teacher-8b")
LLM_TIMEOUT = int(os.getenv("LLM_TIMEOUT", "120"))
# ==========================================
# PYDANTIC MODELS
# ==========================================
class ActionItem(BaseModel):
"""Ein Aktionspunkt aus dem Meeting."""
task: str = Field(..., description="Die zu erledigende Aufgabe")
assignee: Optional[str] = Field(None, description="Verantwortliche Person (SPEAKER_XX oder Name)")
deadline: Optional[str] = Field(None, description="Faelligkeit, falls erwaehnt")
priority: str = Field(default="normal", description="Prioritaet: high, normal, low")
class Decision(BaseModel):
"""Eine getroffene Entscheidung."""
topic: str = Field(..., description="Thema der Entscheidung")
decision: str = Field(..., description="Die getroffene Entscheidung")
rationale: Optional[str] = Field(None, description="Begruendung, falls erwaehnt")
class TopicSummary(BaseModel):
"""Zusammenfassung eines besprochenen Themas."""
title: str = Field(..., description="Titel des Themas")
summary: str = Field(..., description="Kurze Zusammenfassung")
participants: List[str] = Field(default_factory=list, description="Beteiligte Sprecher")
duration_estimate: Optional[str] = Field(None, description="Geschaetzte Dauer")
class MeetingMinutes(BaseModel):
"""Vollstaendiges Meeting-Protokoll."""
id: str
recording_id: str
transcription_id: str
# Metadaten
title: str = Field(..., description="Titel des Meetings")
date: str = Field(..., description="Datum des Meetings")
duration_minutes: Optional[int] = Field(None, description="Dauer in Minuten")
participant_count: int = Field(default=0, description="Anzahl Teilnehmer")
language: str = Field(default="de", description="Sprache")
# Inhalt
summary: str = Field(..., description="Zusammenfassung in 3-5 Saetzen")
topics: List[TopicSummary] = Field(default_factory=list, description="Besprochene Themen")
decisions: List[Decision] = Field(default_factory=list, description="Getroffene Entscheidungen")
action_items: List[ActionItem] = Field(default_factory=list, description="Aktionspunkte/TODOs")
open_questions: List[str] = Field(default_factory=list, description="Offene Fragen")
# KI-Metadaten
model_used: str = Field(..., description="Verwendetes LLM")
generated_at: datetime = Field(default_factory=datetime.utcnow)
generation_time_seconds: Optional[float] = Field(None, description="Generierungszeit")
# Status
status: str = Field(default="completed", description="Status: pending, processing, completed, failed")
error_message: Optional[str] = Field(None, description="Fehlermeldung bei Status=failed")
class MinutesGenerationRequest(BaseModel):
"""Anfrage zur Protokoll-Generierung."""
title: Optional[str] = Field(None, description="Meeting-Titel (optional, wird generiert)")
model: str = Field(default=LLM_MODEL, description="LLM Modell")
include_action_items: bool = Field(default=True, description="Action Items extrahieren")
include_decisions: bool = Field(default=True, description="Entscheidungen extrahieren")
max_topics: int = Field(default=10, description="Maximale Anzahl Themen")
# ==========================================
# PROMPTS (German, Education Context)
# ==========================================
SYSTEM_PROMPT = """Du bist ein Assistent für die Erstellung von Meeting-Protokollen in deutschen Bildungseinrichtungen (Schulen, Universitäten).
Deine Aufgabe ist es, aus einer Transkription ein strukturiertes Protokoll zu erstellen.
WICHTIG:
- Schreibe professionell und sachlich auf Deutsch
- Verwende die formelle Anrede (Sie)
- Halte dich an die Fakten der Transkription
- Erfinde KEINE Informationen, die nicht in der Transkription stehen
- Sprecher werden als SPEAKER_00, SPEAKER_01 etc. bezeichnet - behalte diese Bezeichnungen bei
- Wenn du dir bei etwas unsicher bist, schreibe "Unklar:" davor
Format für die Ausgabe (JSON):
{
"summary": "3-5 Sätze Zusammenfassung",
"topics": [
{"title": "Thema", "summary": "Kurzbeschreibung", "participants": ["SPEAKER_00"]}
],
"decisions": [
{"topic": "Thema", "decision": "Was wurde entschieden", "rationale": "Begründung oder null"}
],
"action_items": [
{"task": "Aufgabe", "assignee": "SPEAKER_XX oder null", "deadline": "Datum oder null", "priority": "high/normal/low"}
],
"open_questions": ["Frage 1", "Frage 2"]
}"""
EXTRACTION_PROMPT = """Analysiere folgende Meeting-Transkription und erstelle ein strukturiertes Protokoll.
Meeting-Titel: {title}
Datum: {date}
Dauer: {duration} Minuten
Teilnehmer: {participant_count}
--- TRANSKRIPTION ---
{transcript}
--- ENDE TRANSKRIPTION ---
Erstelle ein JSON-Protokoll mit:
1. summary: Zusammenfassung in 3-5 Sätzen
2. topics: Liste der besprochenen Themen (maximal {max_topics})
3. decisions: Alle getroffenen Entscheidungen
4. action_items: Alle Aufgaben/TODOs mit Verantwortlichen (falls genannt)
5. open_questions: Offene Fragen, die nicht beantwortet wurden
Antworte NUR mit dem JSON-Objekt, ohne zusätzlichen Text."""
# ==========================================
# MEETING MINUTES GENERATOR
# ==========================================
class MeetingMinutesGenerator:
"""Generator fuer Meeting-Protokolle aus Transkriptionen."""
def __init__(self, llm_gateway_url: str = LLM_GATEWAY_URL):
self.llm_gateway_url = llm_gateway_url
self._client: Optional[httpx.AsyncClient] = None
async def get_client(self) -> httpx.AsyncClient:
"""Lazy initialization des HTTP Clients."""
if self._client is None:
self._client = httpx.AsyncClient(timeout=LLM_TIMEOUT)
return self._client
async def close(self):
"""Schliesst den HTTP Client."""
if self._client:
await self._client.aclose()
self._client = None
async def _call_llm(
self,
messages: List[dict],
model: str = LLM_MODEL,
temperature: float = 0.3,
max_tokens: int = 4096
) -> str:
"""Ruft das LLM Gateway auf."""
client = await self.get_client()
payload = {
"model": model,
"messages": messages,
"temperature": temperature,
"max_tokens": max_tokens,
"stream": False
}
try:
response = await client.post(
f"{self.llm_gateway_url}/v1/chat/completions",
json=payload,
timeout=LLM_TIMEOUT
)
response.raise_for_status()
data = response.json()
content = data.get("choices", [{}])[0].get("message", {}).get("content", "")
return content
except httpx.TimeoutException:
logger.error("LLM Gateway timeout")
raise RuntimeError("LLM Gateway antwortet nicht (Timeout)")
except httpx.HTTPStatusError as e:
logger.error(f"LLM Gateway error: {e.response.status_code}")
raise RuntimeError(f"LLM Gateway Fehler: {e.response.status_code}")
except Exception as e:
logger.error(f"LLM call failed: {e}")
raise RuntimeError(f"LLM Aufruf fehlgeschlagen: {str(e)}")
def _parse_llm_response(self, response: str) -> dict:
"""Parst die LLM-Antwort als JSON."""
# Versuche JSON aus der Antwort zu extrahieren
response = response.strip()
# Entferne eventuelle Markdown Code-Bloecke
if response.startswith("```json"):
response = response[7:]
if response.startswith("```"):
response = response[3:]
if response.endswith("```"):
response = response[:-3]
response = response.strip()
try:
return json.loads(response)
except json.JSONDecodeError as e:
logger.warning(f"JSON parse error: {e}. Response: {response[:200]}...")
# Fallback: Leeres Protokoll
return {
"summary": "Protokoll konnte nicht automatisch erstellt werden.",
"topics": [],
"decisions": [],
"action_items": [],
"open_questions": []
}
async def generate(
self,
transcript: str,
recording_id: str,
transcription_id: str,
title: Optional[str] = None,
date: Optional[str] = None,
duration_minutes: Optional[int] = None,
participant_count: int = 0,
model: str = LLM_MODEL,
max_topics: int = 10,
include_action_items: bool = True,
include_decisions: bool = True
) -> MeetingMinutes:
"""
Generiert Meeting Minutes aus einer Transkription.
Args:
transcript: Die vollstaendige Transkription
recording_id: ID der Aufzeichnung
transcription_id: ID der Transkription
title: Meeting-Titel (wird generiert falls nicht angegeben)
date: Datum des Meetings
duration_minutes: Dauer in Minuten
participant_count: Anzahl Teilnehmer
model: LLM Modell
max_topics: Maximale Anzahl Themen
include_action_items: Action Items extrahieren
include_decisions: Entscheidungen extrahieren
Returns:
MeetingMinutes: Das generierte Protokoll
"""
import uuid
import time
start_time = time.time()
minutes_id = str(uuid.uuid4())
# Defaults
if not title:
title = f"Meeting vom {date or datetime.utcnow().strftime('%d.%m.%Y')}"
if not date:
date = datetime.utcnow().strftime("%d.%m.%Y")
# Transkription kuerzen falls zu lang (max ~8000 Tokens ~ 32000 chars)
max_chars = 32000
if len(transcript) > max_chars:
logger.warning(f"Transcript too long ({len(transcript)} chars), truncating...")
transcript = transcript[:max_chars] + "\n\n[... Transkription gekürzt ...]"
# Prompt erstellen
user_prompt = EXTRACTION_PROMPT.format(
title=title,
date=date,
duration=duration_minutes or "unbekannt",
participant_count=participant_count,
transcript=transcript,
max_topics=max_topics
)
messages = [
{"role": "system", "content": SYSTEM_PROMPT},
{"role": "user", "content": user_prompt}
]
try:
# LLM aufrufen
logger.info(f"Generating minutes for recording {recording_id} using {model}")
response = await self._call_llm(messages, model=model)
# Antwort parsen
parsed = self._parse_llm_response(response)
generation_time = time.time() - start_time
# MeetingMinutes erstellen
minutes = MeetingMinutes(
id=minutes_id,
recording_id=recording_id,
transcription_id=transcription_id,
title=title,
date=date,
duration_minutes=duration_minutes,
participant_count=participant_count,
language="de",
summary=parsed.get("summary", "Zusammenfassung nicht verfügbar."),
topics=[
TopicSummary(**t) for t in parsed.get("topics", [])
] if parsed.get("topics") else [],
decisions=[
Decision(**d) for d in parsed.get("decisions", [])
] if include_decisions and parsed.get("decisions") else [],
action_items=[
ActionItem(**a) for a in parsed.get("action_items", [])
] if include_action_items and parsed.get("action_items") else [],
open_questions=parsed.get("open_questions", []),
model_used=model,
generated_at=datetime.utcnow(),
generation_time_seconds=round(generation_time, 2),
status="completed"
)
logger.info(f"Minutes generated in {generation_time:.2f}s: {len(minutes.topics)} topics, {len(minutes.action_items)} action items")
return minutes
except Exception as e:
logger.error(f"Minutes generation failed: {e}")
return MeetingMinutes(
id=minutes_id,
recording_id=recording_id,
transcription_id=transcription_id,
title=title,
date=date,
duration_minutes=duration_minutes,
participant_count=participant_count,
language="de",
summary="",
model_used=model,
status="failed",
error_message=str(e)
)
# ==========================================
# EXPORT FUNCTIONS
# ==========================================
def minutes_to_markdown(minutes: MeetingMinutes) -> str:
"""Exportiert Meeting Minutes als Markdown."""
md = f"""# {minutes.title}
**Datum:** {minutes.date}
**Dauer:** {minutes.duration_minutes or 'unbekannt'} Minuten
**Teilnehmer:** {minutes.participant_count}
---
## Zusammenfassung
{minutes.summary}
---
## Besprochene Themen
"""
for i, topic in enumerate(minutes.topics, 1):
md += f"### {i}. {topic.title}\n\n"
md += f"{topic.summary}\n\n"
if topic.participants:
md += f"*Beteiligte: {', '.join(topic.participants)}*\n\n"
if minutes.decisions:
md += "---\n\n## Entscheidungen\n\n"
for decision in minutes.decisions:
md += f"- **{decision.topic}:** {decision.decision}"
if decision.rationale:
md += f" *(Begründung: {decision.rationale})*"
md += "\n"
md += "\n"
if minutes.action_items:
md += "---\n\n## Action Items\n\n"
md += "| Aufgabe | Verantwortlich | Fällig | Priorität |\n"
md += "|---------|----------------|--------|----------|\n"
for item in minutes.action_items:
md += f"| {item.task} | {item.assignee or '-'} | {item.deadline or '-'} | {item.priority} |\n"
md += "\n"
if minutes.open_questions:
md += "---\n\n## Offene Fragen\n\n"
for q in minutes.open_questions:
md += f"- {q}\n"
md += "\n"
md += f"""---
*Generiert am {minutes.generated_at.strftime('%d.%m.%Y um %H:%M Uhr')} mit {minutes.model_used}*
*Generierungszeit: {minutes.generation_time_seconds or 0:.1f} Sekunden*
"""
return md
def minutes_to_html(minutes: MeetingMinutes) -> str:
"""Exportiert Meeting Minutes als HTML (fuer PDF-Konvertierung)."""
html = f"""<!DOCTYPE html>
<html lang="de">
<head>
<meta charset="UTF-8">
<title>{minutes.title}</title>
<style>
body {{ font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif; max-width: 800px; margin: 0 auto; padding: 20px; }}
h1 {{ color: #1a365d; border-bottom: 2px solid #3182ce; padding-bottom: 10px; }}
h2 {{ color: #2c5282; margin-top: 30px; }}
h3 {{ color: #2d3748; }}
.meta {{ background: #f7fafc; padding: 15px; border-radius: 8px; margin-bottom: 20px; }}
.meta p {{ margin: 5px 0; }}
.summary {{ background: #ebf8ff; padding: 15px; border-left: 4px solid #3182ce; margin: 20px 0; }}
table {{ width: 100%; border-collapse: collapse; margin: 15px 0; }}
th, td {{ border: 1px solid #e2e8f0; padding: 10px; text-align: left; }}
th {{ background: #edf2f7; }}
.priority-high {{ color: #c53030; font-weight: bold; }}
.priority-normal {{ color: #2d3748; }}
.priority-low {{ color: #718096; }}
.decision {{ background: #f0fff4; padding: 10px; border-left: 4px solid #38a169; margin: 10px 0; }}
.question {{ background: #fffaf0; padding: 10px; border-left: 4px solid #dd6b20; margin: 10px 0; }}
.footer {{ margin-top: 40px; padding-top: 20px; border-top: 1px solid #e2e8f0; font-size: 0.9em; color: #718096; }}
</style>
</head>
<body>
<h1>{minutes.title}</h1>
<div class="meta">
<p><strong>Datum:</strong> {minutes.date}</p>
<p><strong>Dauer:</strong> {minutes.duration_minutes or 'unbekannt'} Minuten</p>
<p><strong>Teilnehmer:</strong> {minutes.participant_count}</p>
</div>
<h2>Zusammenfassung</h2>
<div class="summary">
<p>{minutes.summary}</p>
</div>
<h2>Besprochene Themen</h2>
"""
for i, topic in enumerate(minutes.topics, 1):
html += f""" <h3>{i}. {topic.title}</h3>
<p>{topic.summary}</p>
"""
if topic.participants:
html += f" <p><em>Beteiligte: {', '.join(topic.participants)}</em></p>\n"
if minutes.decisions:
html += " <h2>Entscheidungen</h2>\n"
for decision in minutes.decisions:
html += f""" <div class="decision">
<strong>{decision.topic}:</strong> {decision.decision}
"""
if decision.rationale:
html += f" <br><em>Begründung: {decision.rationale}</em>\n"
html += " </div>\n"
if minutes.action_items:
html += """ <h2>Action Items</h2>
<table>
<thead>
<tr><th>Aufgabe</th><th>Verantwortlich</th><th>Fällig</th><th>Priorität</th></tr>
</thead>
<tbody>
"""
for item in minutes.action_items:
priority_class = f"priority-{item.priority}"
html += f""" <tr>
<td>{item.task}</td>
<td>{item.assignee or '-'}</td>
<td>{item.deadline or '-'}</td>
<td class="{priority_class}">{item.priority}</td>
</tr>
"""
html += """ </tbody>
</table>
"""
if minutes.open_questions:
html += " <h2>Offene Fragen</h2>\n"
for q in minutes.open_questions:
html += f' <div class="question">{q}</div>\n'
html += f"""
<div class="footer">
<p>Generiert am {minutes.generated_at.strftime('%d.%m.%Y um %H:%M Uhr')} mit {minutes.model_used}</p>
<p>Generierungszeit: {minutes.generation_time_seconds or 0:.1f} Sekunden</p>
</div>
</body>
</html>
"""
return html
# ==========================================
# SINGLETON
# ==========================================
_generator: Optional[MeetingMinutesGenerator] = None
def get_minutes_generator() -> MeetingMinutesGenerator:
"""Gibt den Meeting Minutes Generator Singleton zurueck."""
global _generator
if _generator is None:
_generator = MeetingMinutesGenerator()
return _generator