breakpilot-pwa/backend/meeting_minutes_generator.py

"""
BreakPilot Meeting Minutes Generator

Generiert KI-basierte Meeting-Protokolle aus Transkriptionen.
Nutzt das LLM Gateway (Ollama/vLLM/Anthropic) fuer lokale Verarbeitung.

Lizenz: MIT (kommerziell nutzbar)
"""

import os
import json
import logging
import httpx
from datetime import datetime
from typing import Optional, List
from dataclasses import dataclass, asdict
from pydantic import BaseModel, Field

logger = logging.getLogger(__name__)

# ==========================================
# CONFIGURATION
# ==========================================

LLM_GATEWAY_URL = os.getenv("LLM_GATEWAY_URL", "http://localhost:8002")
LLM_MODEL = os.getenv("MEETING_MINUTES_MODEL", "breakpilot-teacher-8b")
LLM_TIMEOUT = int(os.getenv("LLM_TIMEOUT", "120"))


# ==========================================
# PYDANTIC MODELS
# ==========================================

class ActionItem(BaseModel):
    """Ein Aktionspunkt aus dem Meeting."""
    task: str = Field(..., description="Die zu erledigende Aufgabe")
    assignee: Optional[str] = Field(None, description="Verantwortliche Person (SPEAKER_XX oder Name)")
    deadline: Optional[str] = Field(None, description="Faelligkeit, falls erwaehnt")
    priority: str = Field(default="normal", description="Prioritaet: high, normal, low")


class Decision(BaseModel):
    """Eine getroffene Entscheidung."""
    topic: str = Field(..., description="Thema der Entscheidung")
    decision: str = Field(..., description="Die getroffene Entscheidung")
    rationale: Optional[str] = Field(None, description="Begruendung, falls erwaehnt")


class TopicSummary(BaseModel):
    """Zusammenfassung eines besprochenen Themas."""
    title: str = Field(..., description="Titel des Themas")
    summary: str = Field(..., description="Kurze Zusammenfassung")
    participants: List[str] = Field(default_factory=list, description="Beteiligte Sprecher")
    duration_estimate: Optional[str] = Field(None, description="Geschaetzte Dauer")


class MeetingMinutes(BaseModel):
    """Vollstaendiges Meeting-Protokoll."""
    id: str
    recording_id: str
    transcription_id: str

    # Metadaten
    title: str = Field(..., description="Titel des Meetings")
    date: str = Field(..., description="Datum des Meetings")
    duration_minutes: Optional[int] = Field(None, description="Dauer in Minuten")
    participant_count: int = Field(default=0, description="Anzahl Teilnehmer")
    language: str = Field(default="de", description="Sprache")

    # Inhalt
    summary: str = Field(..., description="Zusammenfassung in 3-5 Saetzen")
    topics: List[TopicSummary] = Field(default_factory=list, description="Besprochene Themen")
    decisions: List[Decision] = Field(default_factory=list, description="Getroffene Entscheidungen")
    action_items: List[ActionItem] = Field(default_factory=list, description="Aktionspunkte/TODOs")
    open_questions: List[str] = Field(default_factory=list, description="Offene Fragen")

    # KI-Metadaten
    model_used: str = Field(..., description="Verwendetes LLM")
    generated_at: datetime = Field(default_factory=datetime.utcnow)
    generation_time_seconds: Optional[float] = Field(None, description="Generierungszeit")

    # Status
    status: str = Field(default="completed", description="Status: pending, processing, completed, failed")
    error_message: Optional[str] = Field(None, description="Fehlermeldung bei Status=failed")


class MinutesGenerationRequest(BaseModel):
    """Anfrage zur Protokoll-Generierung."""
    title: Optional[str] = Field(None, description="Meeting-Titel (optional, wird generiert)")
    model: str = Field(default=LLM_MODEL, description="LLM Modell")
    include_action_items: bool = Field(default=True, description="Action Items extrahieren")
    include_decisions: bool = Field(default=True, description="Entscheidungen extrahieren")
    max_topics: int = Field(default=10, description="Maximale Anzahl Themen")


# ==========================================
# PROMPTS (German, Education Context)
# ==========================================

SYSTEM_PROMPT = """Du bist ein Assistent für die Erstellung von Meeting-Protokollen in deutschen Bildungseinrichtungen (Schulen, Universitäten).

Deine Aufgabe ist es, aus einer Transkription ein strukturiertes Protokoll zu erstellen.

WICHTIG:
- Schreibe professionell und sachlich auf Deutsch
- Verwende die formelle Anrede (Sie)
- Halte dich an die Fakten der Transkription
- Erfinde KEINE Informationen, die nicht in der Transkription stehen
- Sprecher werden als SPEAKER_00, SPEAKER_01 etc. bezeichnet - behalte diese Bezeichnungen bei
- Wenn du dir bei etwas unsicher bist, schreibe "Unklar:" davor

Format für die Ausgabe (JSON):
{
  "summary": "3-5 Sätze Zusammenfassung",
  "topics": [
    {"title": "Thema", "summary": "Kurzbeschreibung", "participants": ["SPEAKER_00"]}
  ],
  "decisions": [
    {"topic": "Thema", "decision": "Was wurde entschieden", "rationale": "Begründung oder null"}
  ],
  "action_items": [
    {"task": "Aufgabe", "assignee": "SPEAKER_XX oder null", "deadline": "Datum oder null", "priority": "high/normal/low"}
  ],
  "open_questions": ["Frage 1", "Frage 2"]
}"""

EXTRACTION_PROMPT = """Analysiere folgende Meeting-Transkription und erstelle ein strukturiertes Protokoll.

Meeting-Titel: {title}
Datum: {date}
Dauer: {duration} Minuten
Teilnehmer: {participant_count}

--- TRANSKRIPTION ---
{transcript}
--- ENDE TRANSKRIPTION ---

Erstelle ein JSON-Protokoll mit:
1. summary: Zusammenfassung in 3-5 Sätzen
2. topics: Liste der besprochenen Themen (maximal {max_topics})
3. decisions: Alle getroffenen Entscheidungen
4. action_items: Alle Aufgaben/TODOs mit Verantwortlichen (falls genannt)
5. open_questions: Offene Fragen, die nicht beantwortet wurden

Antworte NUR mit dem JSON-Objekt, ohne zusätzlichen Text."""


# ==========================================
# MEETING MINUTES GENERATOR
# ==========================================

class MeetingMinutesGenerator:
    """Generator fuer Meeting-Protokolle aus Transkriptionen."""

    def __init__(self, llm_gateway_url: str = LLM_GATEWAY_URL):
        self.llm_gateway_url = llm_gateway_url
        self._client: Optional[httpx.AsyncClient] = None

    async def get_client(self) -> httpx.AsyncClient:
        """Lazy initialization des HTTP Clients."""
        if self._client is None:
            self._client = httpx.AsyncClient(timeout=LLM_TIMEOUT)
        return self._client

    async def close(self):
        """Schliesst den HTTP Client."""
        if self._client:
            await self._client.aclose()
            self._client = None

    async def _call_llm(
        self,
        messages: List[dict],
        model: str = LLM_MODEL,
        temperature: float = 0.3,
        max_tokens: int = 4096
    ) -> str:
        """Ruft das LLM Gateway auf."""
        client = await self.get_client()

        payload = {
            "model": model,
            "messages": messages,
            "temperature": temperature,
            "max_tokens": max_tokens,
            "stream": False
        }

        try:
            response = await client.post(
                f"{self.llm_gateway_url}/v1/chat/completions",
                json=payload,
                timeout=LLM_TIMEOUT
            )
            response.raise_for_status()
            data = response.json()

            content = data.get("choices", [{}])[0].get("message", {}).get("content", "")
            return content

        except httpx.TimeoutException:
            logger.error("LLM Gateway timeout")
            raise RuntimeError("LLM Gateway antwortet nicht (Timeout)")
        except httpx.HTTPStatusError as e:
            logger.error(f"LLM Gateway error: {e.response.status_code}")
            raise RuntimeError(f"LLM Gateway Fehler: {e.response.status_code}")
        except Exception as e:
            logger.error(f"LLM call failed: {e}")
            raise RuntimeError(f"LLM Aufruf fehlgeschlagen: {str(e)}")

    def _parse_llm_response(self, response: str) -> dict:
        """Parst die LLM-Antwort als JSON."""
        # Versuche JSON aus der Antwort zu extrahieren
        response = response.strip()

        # Entferne eventuelle Markdown Code-Bloecke
        if response.startswith("```json"):
            response = response[7:]
        if response.startswith("```"):
            response = response[3:]
        if response.endswith("```"):
            response = response[:-3]

        response = response.strip()

        try:
            return json.loads(response)
        except json.JSONDecodeError as e:
            logger.warning(f"JSON parse error: {e}. Response: {response[:200]}...")
            # Fallback: Leeres Protokoll
            return {
                "summary": "Protokoll konnte nicht automatisch erstellt werden.",
                "topics": [],
                "decisions": [],
                "action_items": [],
                "open_questions": []
            }

    async def generate(
        self,
        transcript: str,
        recording_id: str,
        transcription_id: str,
        title: Optional[str] = None,
        date: Optional[str] = None,
        duration_minutes: Optional[int] = None,
        participant_count: int = 0,
        model: str = LLM_MODEL,
        max_topics: int = 10,
        include_action_items: bool = True,
        include_decisions: bool = True
    ) -> MeetingMinutes:
        """
        Generiert Meeting Minutes aus einer Transkription.

        Args:
            transcript: Die vollstaendige Transkription
            recording_id: ID der Aufzeichnung
            transcription_id: ID der Transkription
            title: Meeting-Titel (wird generiert falls nicht angegeben)
            date: Datum des Meetings
            duration_minutes: Dauer in Minuten
            participant_count: Anzahl Teilnehmer
            model: LLM Modell
            max_topics: Maximale Anzahl Themen
            include_action_items: Action Items extrahieren
            include_decisions: Entscheidungen extrahieren

        Returns:
            MeetingMinutes: Das generierte Protokoll
        """
        import uuid
        import time

        start_time = time.time()
        minutes_id = str(uuid.uuid4())

        # Defaults
        if not title:
            title = f"Meeting vom {date or datetime.utcnow().strftime('%d.%m.%Y')}"
        if not date:
            date = datetime.utcnow().strftime("%d.%m.%Y")

        # Transkription kuerzen falls zu lang (max ~8000 Tokens ~ 32000 chars)
        max_chars = 32000
        if len(transcript) > max_chars:
            logger.warning(f"Transcript too long ({len(transcript)} chars), truncating...")
            transcript = transcript[:max_chars] + "\n\n[... Transkription gekürzt ...]"

        # Prompt erstellen
        user_prompt = EXTRACTION_PROMPT.format(
            title=title,
            date=date,
            duration=duration_minutes or "unbekannt",
            participant_count=participant_count,
            transcript=transcript,
            max_topics=max_topics
        )

        messages = [
            {"role": "system", "content": SYSTEM_PROMPT},
            {"role": "user", "content": user_prompt}
        ]

        try:
            # LLM aufrufen
            logger.info(f"Generating minutes for recording {recording_id} using {model}")
            response = await self._call_llm(messages, model=model)

            # Antwort parsen
            parsed = self._parse_llm_response(response)

            generation_time = time.time() - start_time

            # MeetingMinutes erstellen
            minutes = MeetingMinutes(
                id=minutes_id,
                recording_id=recording_id,
                transcription_id=transcription_id,
                title=title,
                date=date,
                duration_minutes=duration_minutes,
                participant_count=participant_count,
                language="de",
                summary=parsed.get("summary", "Zusammenfassung nicht verfügbar."),
                topics=[
                    TopicSummary(**t) for t in parsed.get("topics", [])
                ] if parsed.get("topics") else [],
                decisions=[
                    Decision(**d) for d in parsed.get("decisions", [])
                ] if include_decisions and parsed.get("decisions") else [],
                action_items=[
                    ActionItem(**a) for a in parsed.get("action_items", [])
                ] if include_action_items and parsed.get("action_items") else [],
                open_questions=parsed.get("open_questions", []),
                model_used=model,
                generated_at=datetime.utcnow(),
                generation_time_seconds=round(generation_time, 2),
                status="completed"
            )

            logger.info(f"Minutes generated in {generation_time:.2f}s: {len(minutes.topics)} topics, {len(minutes.action_items)} action items")

            return minutes

        except Exception as e:
            logger.error(f"Minutes generation failed: {e}")
            return MeetingMinutes(
                id=minutes_id,
                recording_id=recording_id,
                transcription_id=transcription_id,
                title=title,
                date=date,
                duration_minutes=duration_minutes,
                participant_count=participant_count,
                language="de",
                summary="",
                model_used=model,
                status="failed",
                error_message=str(e)
            )


# ==========================================
# EXPORT FUNCTIONS
# ==========================================

def minutes_to_markdown(minutes: MeetingMinutes) -> str:
    """Exportiert Meeting Minutes als Markdown."""
    md = f"""# {minutes.title}

**Datum:** {minutes.date}
**Dauer:** {minutes.duration_minutes or 'unbekannt'} Minuten
**Teilnehmer:** {minutes.participant_count}

---

## Zusammenfassung

{minutes.summary}

---

## Besprochene Themen

"""

    for i, topic in enumerate(minutes.topics, 1):
        md += f"### {i}. {topic.title}\n\n"
        md += f"{topic.summary}\n\n"
        if topic.participants:
            md += f"*Beteiligte: {', '.join(topic.participants)}*\n\n"

    if minutes.decisions:
        md += "---\n\n## Entscheidungen\n\n"
        for decision in minutes.decisions:
            md += f"- **{decision.topic}:** {decision.decision}"
            if decision.rationale:
                md += f" *(Begründung: {decision.rationale})*"
            md += "\n"
        md += "\n"

    if minutes.action_items:
        md += "---\n\n## Action Items\n\n"
        md += "| Aufgabe | Verantwortlich | Fällig | Priorität |\n"
        md += "|---------|----------------|--------|----------|\n"
        for item in minutes.action_items:
            md += f"| {item.task} | {item.assignee or '-'} | {item.deadline or '-'} | {item.priority} |\n"
        md += "\n"

    if minutes.open_questions:
        md += "---\n\n## Offene Fragen\n\n"
        for q in minutes.open_questions:
            md += f"- {q}\n"
        md += "\n"

    md += f"""---

*Generiert am {minutes.generated_at.strftime('%d.%m.%Y um %H:%M Uhr')} mit {minutes.model_used}*
*Generierungszeit: {minutes.generation_time_seconds or 0:.1f} Sekunden*
"""

    return md


def minutes_to_html(minutes: MeetingMinutes) -> str:
    """Exportiert Meeting Minutes als HTML (fuer PDF-Konvertierung)."""
    html = f"""<!DOCTYPE html>
<html lang="de">
<head>
    <meta charset="UTF-8">
    <title>{minutes.title}</title>
    <style>
        body {{ font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif; max-width: 800px; margin: 0 auto; padding: 20px; }}
        h1 {{ color: #1a365d; border-bottom: 2px solid #3182ce; padding-bottom: 10px; }}
        h2 {{ color: #2c5282; margin-top: 30px; }}
        h3 {{ color: #2d3748; }}
        .meta {{ background: #f7fafc; padding: 15px; border-radius: 8px; margin-bottom: 20px; }}
        .meta p {{ margin: 5px 0; }}
        .summary {{ background: #ebf8ff; padding: 15px; border-left: 4px solid #3182ce; margin: 20px 0; }}
        table {{ width: 100%; border-collapse: collapse; margin: 15px 0; }}
        th, td {{ border: 1px solid #e2e8f0; padding: 10px; text-align: left; }}
        th {{ background: #edf2f7; }}
        .priority-high {{ color: #c53030; font-weight: bold; }}
        .priority-normal {{ color: #2d3748; }}
        .priority-low {{ color: #718096; }}
        .decision {{ background: #f0fff4; padding: 10px; border-left: 4px solid #38a169; margin: 10px 0; }}
        .question {{ background: #fffaf0; padding: 10px; border-left: 4px solid #dd6b20; margin: 10px 0; }}
        .footer {{ margin-top: 40px; padding-top: 20px; border-top: 1px solid #e2e8f0; font-size: 0.9em; color: #718096; }}
    </style>
</head>
<body>
    <h1>{minutes.title}</h1>

    <div class="meta">
        <p><strong>Datum:</strong> {minutes.date}</p>
        <p><strong>Dauer:</strong> {minutes.duration_minutes or 'unbekannt'} Minuten</p>
        <p><strong>Teilnehmer:</strong> {minutes.participant_count}</p>
    </div>

    <h2>Zusammenfassung</h2>
    <div class="summary">
        <p>{minutes.summary}</p>
    </div>

    <h2>Besprochene Themen</h2>
"""

    for i, topic in enumerate(minutes.topics, 1):
        html += f"""    <h3>{i}. {topic.title}</h3>
    <p>{topic.summary}</p>
"""
        if topic.participants:
            html += f"    <p><em>Beteiligte: {', '.join(topic.participants)}</em></p>\n"

    if minutes.decisions:
        html += "    <h2>Entscheidungen</h2>\n"
        for decision in minutes.decisions:
            html += f"""    <div class="decision">
        <strong>{decision.topic}:</strong> {decision.decision}
"""
            if decision.rationale:
                html += f"        <br><em>Begründung: {decision.rationale}</em>\n"
            html += "    </div>\n"

    if minutes.action_items:
        html += """    <h2>Action Items</h2>
    <table>
        <thead>
            <tr><th>Aufgabe</th><th>Verantwortlich</th><th>Fällig</th><th>Priorität</th></tr>
        </thead>
        <tbody>
"""
        for item in minutes.action_items:
            priority_class = f"priority-{item.priority}"
            html += f"""            <tr>
                <td>{item.task}</td>
                <td>{item.assignee or '-'}</td>
                <td>{item.deadline or '-'}</td>
                <td class="{priority_class}">{item.priority}</td>
            </tr>
"""
        html += """        </tbody>
    </table>
"""

    if minutes.open_questions:
        html += "    <h2>Offene Fragen</h2>\n"
        for q in minutes.open_questions:
            html += f'    <div class="question">{q}</div>\n'

    html += f"""
    <div class="footer">
        <p>Generiert am {minutes.generated_at.strftime('%d.%m.%Y um %H:%M Uhr')} mit {minutes.model_used}</p>
        <p>Generierungszeit: {minutes.generation_time_seconds or 0:.1f} Sekunden</p>
    </div>
</body>
</html>
"""

    return html


# ==========================================
# SINGLETON
# ==========================================

_generator: Optional[MeetingMinutesGenerator] = None


def get_minutes_generator() -> MeetingMinutesGenerator:
    """Gibt den Meeting Minutes Generator Singleton zurueck."""
    global _generator
    if _generator is None:
        _generator = MeetingMinutesGenerator()
    return _generator