feat(pitch-deck): add AI Presenter mode with LiteLLM migration and FAQ system

- Migrate chat API from Ollama to LiteLLM (OpenAI-compatible SSE) - Add 15-min presenter storyline with bilingual scripts for all 20 slides - Add FAQ system (30 entries) with keyword matching for instant answers - Add IntroPresenterSlide with avatar placeholder and start button - Add PresenterOverlay (progress bar, subtitle text, play/pause/stop) - Add AvatarPlaceholder with pulse animation during speaking - Add usePresenterMode hook (state machine: idle→presenting→paused→answering→resuming) - Add 'P' keyboard shortcut to toggle presenter mode - Support [GOTO:slide-id] markers in chat responses - Dynamic slide count (was hardcoded 13, now from SLIDE_ORDER) - TTS stub prepared for future Piper integration Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-20 11:45:55 +01:00
parent df0a9d6cf0
commit 3a2567b44d
20 changed files with 2434 additions and 164 deletions
@@ -1,8 +1,36 @@
 import { NextRequest, NextResponse } from 'next/server'
 import pool from '@/lib/db'
+import { SLIDE_ORDER } from '@/lib/hooks/useSlideNavigation'

-const OLLAMA_URL = process.env.OLLAMA_URL || 'http://host.docker.internal:11434'
-const OLLAMA_MODEL = process.env.OLLAMA_MODEL || 'qwen2.5:32b'
+const LITELLM_URL = process.env.LITELLM_URL || 'https://llm-dev.meghsakha.com'
+const LITELLM_MODEL = process.env.LITELLM_MODEL || 'gpt-oss-120b'
+const LITELLM_API_KEY = process.env.LITELLM_API_KEY || ''
+
+// Build SLIDE_NAMES dynamically from SLIDE_ORDER
+const SLIDE_DISPLAY_NAMES: Record<string, { de: string; en: string }> = {
+  'intro-presenter': { de: 'Intro', en: 'Intro' },
+  'cover': { de: 'Cover', en: 'Cover' },
+  'problem': { de: 'Das Problem', en: 'The Problem' },
+  'solution': { de: 'Die Loesung', en: 'The Solution' },
+  'product': { de: 'Produkte', en: 'Products' },
+  'how-it-works': { de: 'So funktionierts', en: 'How It Works' },
+  'market': { de: 'Markt', en: 'Market' },
+  'business-model': { de: 'Geschaeftsmodell', en: 'Business Model' },
+  'traction': { de: 'Traction', en: 'Traction' },
+  'competition': { de: 'Wettbewerb', en: 'Competition' },
+  'team': { de: 'Team', en: 'Team' },
+  'financials': { de: 'Finanzen', en: 'Financials' },
+  'the-ask': { de: 'The Ask', en: 'The Ask' },
+  'ai-qa': { de: 'KI Q&A', en: 'AI Q&A' },
+  'annex-assumptions': { de: 'Anhang: Annahmen', en: 'Appendix: Assumptions' },
+  'annex-architecture': { de: 'Anhang: Architektur', en: 'Appendix: Architecture' },
+  'annex-gtm': { de: 'Anhang: Go-to-Market', en: 'Appendix: Go-to-Market' },
+  'annex-regulatory': { de: 'Anhang: Regulatorik', en: 'Appendix: Regulatory' },
+  'annex-engineering': { de: 'Anhang: Engineering', en: 'Appendix: Engineering' },
+  'annex-aipipeline': { de: 'Anhang: KI-Pipeline', en: 'Appendix: AI Pipeline' },
+}
+
+const slideCount = SLIDE_ORDER.length

 const SYSTEM_PROMPT = `# Investor Agent — BreakPilot ComplAI

@@ -40,7 +68,8 @@ Stattdessen: "Proprietaere KI-Engine", "Self-Hosted Appliance auf Apple-Hardware
 ## Slide-Awareness (IMMER beachten)
 Du erhaeltst den aktuellen Slide-Kontext. Nutze ihn fuer kontextuelle Antworten.
 Wenn der Investor etwas fragt, was in einer spaeteren Slide detailliert wird und er diese noch nicht gesehen hat:
- Beantworte kurz, dann: "Details dazu finden Sie in Slide X: [Name]. Moechten Sie dorthin springen? [GOTO:X]"
+- Beantworte kurz, dann: "Details dazu finden Sie in Slide X: [Name]. Moechten Sie dorthin springen? [GOTO:slide-id]"
+- Verwende [GOTO:slide-id] mit der Slide-ID (z.B. [GOTO:financials], [GOTO:competition])

 ## FOLLOW-UP FRAGEN — KRITISCHE PFLICHT

@@ -118,47 +147,65 @@ ${JSON.stringify(features.rows, null, 2)}
 export async function POST(request: NextRequest) {
  try {
    const body = await request.json()
-    const { message, history = [], lang = 'de', slideContext } = body
+    const { message, history = [], lang = 'de', slideContext, faqAnswer } = body

    if (!message || typeof message !== 'string') {
      return NextResponse.json({ error: 'Message is required' }, { status: 400 })
    }

+    // FAQ shortcut: if client sends a pre-cached FAQ answer, stream it directly (no LLM call)
+    if (faqAnswer && typeof faqAnswer === 'string') {
+      const encoder = new TextEncoder()
+      const stream = new ReadableStream({
+        start(controller) {
+          // Stream the FAQ answer in chunks for consistent UX
+          const words = faqAnswer.split(' ')
+          let i = 0
+          const interval = setInterval(() => {
+            if (i < words.length) {
+              const chunk = (i === 0 ? '' : ' ') + words[i]
+              controller.enqueue(encoder.encode(chunk))
+              i++
+            } else {
+              clearInterval(interval)
+              controller.close()
+            }
+          }, 30)
+        },
+      })
+
+      return new NextResponse(stream, {
+        headers: {
+          'Content-Type': 'text/plain; charset=utf-8',
+          'Cache-Control': 'no-cache',
+          'Connection': 'keep-alive',
+        },
+      })
+    }
+
    const pitchContext = await loadPitchContext()

    let systemContent = SYSTEM_PROMPT
    if (pitchContext) {
      systemContent += '\n' + pitchContext
    }
+
    // Slide context for contextual awareness
    if (slideContext) {
-      const SLIDE_NAMES: Record<string, { de: string; en: string; index: number }> = {
-        'cover': { de: 'Cover', en: 'Cover', index: 0 },
-        'problem': { de: 'Das Problem', en: 'The Problem', index: 1 },
-        'solution': { de: 'Die Loesung', en: 'The Solution', index: 2 },
-        'product': { de: 'Produkte', en: 'Products', index: 3 },
-        'how-it-works': { de: 'So funktionierts', en: 'How It Works', index: 4 },
-        'market': { de: 'Markt', en: 'Market', index: 5 },
-        'business-model': { de: 'Geschaeftsmodell', en: 'Business Model', index: 6 },
-        'traction': { de: 'Traction', en: 'Traction', index: 7 },
-        'competition': { de: 'Wettbewerb', en: 'Competition', index: 8 },
-        'team': { de: 'Team', en: 'Team', index: 9 },
-        'financials': { de: 'Finanzen', en: 'Financials', index: 10 },
-        'the-ask': { de: 'The Ask', en: 'The Ask', index: 11 },
-        'ai-qa': { de: 'KI Q&A', en: 'AI Q&A', index: 12 },
-      }
-      const slideKeys = Object.keys(SLIDE_NAMES)
      const visited: number[] = slideContext.visitedSlides || []
-      const currentSlideName = SLIDE_NAMES[slideContext.currentSlide]?.[lang] || slideContext.currentSlide
-      const notYetSeen = Object.entries(SLIDE_NAMES)
-        .filter(([, v]) => !visited.includes(v.index))
-        .map(([, v]) => `${v.index + 1}. ${v[lang]}`)
+      const currentSlideId = slideContext.currentSlide
+      const currentSlideName = SLIDE_DISPLAY_NAMES[currentSlideId]?.[lang] || currentSlideId
+      const notYetSeen = SLIDE_ORDER
+        .map((id, idx) => ({ id, idx, name: SLIDE_DISPLAY_NAMES[id]?.[lang] || id }))
+        .filter(s => !visited.includes(s.idx))
+        .map(s => `${s.idx + 1}. ${s.name}`)

      systemContent += `\n\n## Slide-Kontext (WICHTIG fuer kontextuelle Antworten)
- Aktuelle Slide: "${currentSlideName}" (Nr. ${slideContext.currentIndex + 1} von 13)
- Bereits besuchte Slides: ${visited.map((i: number) => SLIDE_NAMES[slideKeys[i]]?.[lang]).filter(Boolean).join(', ')}
+- Aktuelle Slide: "${currentSlideName}" (Nr. ${slideContext.currentIndex + 1} von ${slideCount})
+- Bereits besuchte Slides: ${visited.map((i: number) => SLIDE_DISPLAY_NAMES[SLIDE_ORDER[i]]?.[lang] || SLIDE_ORDER[i]).filter(Boolean).join(', ')}
 - Noch nicht gesehene Slides: ${notYetSeen.join(', ')}
 - Ist Erstbesuch: ${visited.length <= 1 ? 'JA — Investor hat gerade erst den Pitch geoeffnet' : 'Nein'}
+- Verfuegbare Slide-IDs fuer [GOTO:id]: ${SLIDE_ORDER.join(', ')}
 `
    }

@@ -173,54 +220,84 @@ export async function POST(request: NextRequest) {
      { role: 'user', content: message + '\n\n(Erinnerung: Beende deine Antwort IMMER mit "---" gefolgt von 3 Folgefragen im Format "[Q] Frage?")' },
    ]

-    const ollamaResponse = await fetch(`${OLLAMA_URL}/api/chat`, {
+    // LiteLLM (OpenAI-compatible API)
+    const headers: Record<string, string> = {
+      'Content-Type': 'application/json',
+    }
+    if (LITELLM_API_KEY) {
+      headers['Authorization'] = `Bearer ${LITELLM_API_KEY}`
+    }
+
+    const llmResponse = await fetch(`${LITELLM_URL}/v1/chat/completions`, {
      method: 'POST',
-      headers: { 'Content-Type': 'application/json' },
+      headers,
      body: JSON.stringify({
-        model: OLLAMA_MODEL,
+        model: LITELLM_MODEL,
        messages,
        stream: true,
-        think: false,
-        options: {
-          temperature: 0.4,
-          num_predict: 4096,
-          num_ctx: 8192,
-        },
+        temperature: 0.4,
+        max_tokens: 4096,
      }),
      signal: AbortSignal.timeout(120000),
    })

-    if (!ollamaResponse.ok) {
-      const errorText = await ollamaResponse.text()
-      console.error('Ollama error:', ollamaResponse.status, errorText)
+    if (!llmResponse.ok) {
+      const errorText = await llmResponse.text()
+      console.error('LiteLLM error:', llmResponse.status, errorText)
      return NextResponse.json(
-        { error: `LLM nicht erreichbar (Status ${ollamaResponse.status}).` },
+        { error: `LLM nicht erreichbar (Status ${llmResponse.status}).` },
        { status: 502 }
      )
    }

+    // Parse SSE stream from LiteLLM and emit plain text to client
    const encoder = new TextEncoder()
    const stream = new ReadableStream({
      async start(controller) {
-        const reader = ollamaResponse.body!.getReader()
+        const reader = llmResponse.body!.getReader()
        const decoder = new TextDecoder()
+        let buffer = ''

        try {
          while (true) {
            const { done, value } = await reader.read()
            if (done) break

-            const chunk = decoder.decode(value, { stream: true })
-            const lines = chunk.split('\n').filter((l) => l.trim())
+            buffer += decoder.decode(value, { stream: true })
+            const lines = buffer.split('\n')
+            // Keep the last (potentially incomplete) line in the buffer
+            buffer = lines.pop() || ''

            for (const line of lines) {
+              const trimmed = line.trim()
+              if (!trimmed || !trimmed.startsWith('data: ')) continue
+              const data = trimmed.slice(6)
+              if (data === '[DONE]') continue
+
              try {
-                const json = JSON.parse(line)
-                if (json.message?.content) {
-                  controller.enqueue(encoder.encode(json.message.content))
+                const json = JSON.parse(data)
+                const content = json.choices?.[0]?.delta?.content
+                if (content) {
+                  controller.enqueue(encoder.encode(content))
                }
              } catch {
-                // Partial JSON line, skip
+                // Partial JSON, skip
+              }
+            }
+          }
+
+          // Process any remaining buffer
+          if (buffer.trim()) {
+            const trimmed = buffer.trim()
+            if (trimmed.startsWith('data: ') && trimmed.slice(6) !== '[DONE]') {
+              try {
+                const json = JSON.parse(trimmed.slice(6))
+                const content = json.choices?.[0]?.delta?.content
+                if (content) {
+                  controller.enqueue(encoder.encode(content))
+                }
+              } catch {
+                // Ignore
              }
            }
          }