fix(advisor): Compliance-Advisor auf prod reparieren — RAG via ai-sdk (bge-m3) + OVH-LLM

Der Floating-Compliance-Advisor war auf prod kaputt (502): RAG ging ueber rag-service:8097 (auf prod nicht vorhanden) und der Chat ueber OLLAMA_URL=ollama-embed (embedding-only, kein qwen2.5vl). - RAG laeuft jetzt ueber die ai-compliance-sdk /sdk/v1/rag/search (bge-m3, prod-erreichbar) statt rag-service -> profitiert vom reicheren Embedding. (lib/sdk/agents/advisor-rag.ts) - LLM-Kaskade: OVH/LiteLLM (gpt-oss-120b) zuerst, Ollama als Dev-Fallback. (lib/sdk/agents/advisor-llm.ts; OVH-Env via orca-infra admin-Block) - ai-sdk: bp_compliance_recht in AllowedCollections ergaenzt (Whitelist war inkonsistent — die Fehlermeldung listete es bereits als erlaubt). - Route auf die Module umgestellt (duenn); Controls-Augmentation unveraendert. - Tests: advisor-rag + advisor-llm. Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
2026-06-19 09:22:44 +02:00
parent f0a0a887fd
commit cd3e0b15ad
6 changed files with 381 additions and 182 deletions
@@ -1,35 +1,22 @@
 /**
 * Compliance Advisor Chat API
 *
- * Connects the ComplianceAdvisorWidget to:
- * 1. Multi-Collection RAG search (rag-service) for context across 6 collections
- * 2. Ollama LLM (32B) for generating answers
+ * Verbindet das ComplianceAdvisorWidget mit:
+ *   1. Multi-Collection-RAG ueber die ai-compliance-sdk (bge-m3) — siehe advisor-rag
+ *   2. Strukturierten Controls zum erkannten Thema — buildControlsContext
+ *   3. LLM-Kaskade OVH (prod) -> Ollama (Dev) — siehe advisor-llm
 *
- * Supports country-specific filtering (DE, AT, CH, EU).
- * Streams the LLM response back as plain text.
+ * Laenderspezifische Filterung (DE, AT, CH, EU). Streamt die Antwort als Text.
 */

 import { NextRequest, NextResponse } from 'next/server'
 import { readSoulFile } from '@/lib/sdk/agents/soul-reader'
 import { buildControlsContext } from '@/lib/sdk/agents/controls-augmentation'
-
-const RAG_SERVICE_URL = process.env.RAG_SERVICE_URL || 'http://rag-service:8097'
-const OLLAMA_URL = process.env.OLLAMA_URL || 'http://host.docker.internal:11434'
-const LLM_MODEL = process.env.COMPLIANCE_LLM_MODEL || 'qwen2.5vl:32b'
-
-// All compliance-relevant collections (without NiBiS)
-const COMPLIANCE_COLLECTIONS = [
-  'bp_compliance_gesetze',
-  'bp_compliance_ce',
-  'bp_compliance_datenschutz',
-  'bp_dsfa_corpus',
-  'bp_compliance_recht',
-  'bp_legal_templates',
-] as const
+import { queryAdvisorRAG } from '@/lib/sdk/agents/advisor-rag'
+import { streamAdvisorAnswer, type ChatMessage } from '@/lib/sdk/agents/advisor-llm'

 type Country = 'DE' | 'AT' | 'CH' | 'EU'

-// Fallback SOUL prompt (used when .soul.md file is unavailable)
 const FALLBACK_SYSTEM_PROMPT = `# Compliance Advisor Agent

 ## Identitaet
@@ -49,81 +36,24 @@ const COUNTRY_LABELS: Record<Country, string> = {
  EU: 'EU-weit',
 }

-interface RAGSearchResult {
-  content: string
-  source_name?: string
-  source_code?: string
-  attribution_text?: string
-  score: number
-  collection?: string
-  metadata?: Record<string, unknown>
-}
-
-/**
- * Query multiple RAG collections in parallel, with optional country filter
- */
-async function queryMultiCollectionRAG(query: string, country?: Country): Promise<string> {
-  try {
-    const searchPromises = COMPLIANCE_COLLECTIONS.map(async (collection) => {
-      const searchBody: Record<string, unknown> = {
-        query,
-        collection,
-        top_k: 3,
-      }
-
-      // Apply country filter for gesetze collection
-      if (collection === 'bp_compliance_gesetze' && country && country !== 'EU') {
-        searchBody.metadata_filter = {
-          must: [
-            {
-              key: 'country',
-              match: { any: [country, 'EU'] },
-            },
-          ],
-        }
-      }
-
-      const res = await fetch(`${RAG_SERVICE_URL}/api/v1/search`, {
-        method: 'POST',
-        headers: { 'Content-Type': 'application/json' },
-        body: JSON.stringify(searchBody),
-        signal: AbortSignal.timeout(10000),
-      })
-
-      if (!res.ok) return []
-
-      const data = await res.json()
-      return (data.results || []).map((r: RAGSearchResult) => ({
-        ...r,
-        collection,
-      }))
-    })
-
-    const settled = await Promise.allSettled(searchPromises)
-    const allResults: RAGSearchResult[] = []
-
-    for (const result of settled) {
-      if (result.status === 'fulfilled') {
-        allResults.push(...result.value)
-      }
-    }
-
-    // Sort by score descending, take top 8
-    allResults.sort((a, b) => b.score - a.score)
-    const topResults = allResults.slice(0, 8)
-
-    if (topResults.length === 0) return ''
-
-    return topResults
-      .map((r, i) => {
-        const source = r.source_name || r.source_code || 'Unbekannt'
-        return `[Quelle ${i + 1}: ${source}]\n${r.content || ''}`
-      })
-      .join('\n\n---\n\n')
-  } catch (error) {
-    console.warn('Multi-collection RAG query error (continuing without context):', error)
-    return ''
-  }
+function countryBlock(c: Country): string {
+  const label = COUNTRY_LABELS[c]
+  const nationalLaws =
+    c === 'DE'
+      ? 'BDSG, TDDDG, TKG, UWG'
+      : c === 'AT'
+        ? 'AT DSG, ECG, TKG, KSchG, MedienG'
+        : 'CH DSG, DSV, OR, UWG, FMG'
+  const guidance =
+    c === 'EU'
+      ? 'EU-weiten Fragen: Beziehe dich auf EU-Verordnungen und -Richtlinien'
+      : `${label}: Beziehe nationale Gesetze (${nationalLaws}) mit ein`
+  return `\n\n## Laenderspezifische Auskunft
+Der Nutzer hat "${label} (${c})" gewaehlt.
+- Beziehe dich AUSSCHLIESSLICH auf ${c}-Recht + anwendbares EU-Recht
+- Nenne IMMER explizit das Land in deiner Antwort
+- Verwende NIEMALS Gesetze eines anderen Landes
+- Bei ${guidance}`
 }

 export async function POST(request: NextRequest) {
@@ -135,42 +65,28 @@ export async function POST(request: NextRequest) {
      return NextResponse.json({ error: 'Message is required' }, { status: 400 })
    }

-    // Validate country parameter
-    const validCountry = ['DE', 'AT', 'CH', 'EU'].includes(country) ? (country as Country) : undefined
+    const validCountry = (['DE', 'AT', 'CH', 'EU'] as const).includes(country)
+      ? (country as Country)
+      : undefined

-    // 1. Query RAG across all collections + structured controls for the topic
-    //    (both local; the controls block lets the agent answer from real Control-IDs)
+    // 1. RAG (ai-sdk, bge-m3) + strukturierte Controls zum Thema — beide parallel
    const [ragContext, controlsContext] = await Promise.all([
-      queryMultiCollectionRAG(message, validCountry),
+      queryAdvisorRAG(message),
      buildControlsContext(message),
    ])

-    // 2. Build system prompt with RAG context + country
+    // 2. System-Prompt zusammenbauen
    const soulPrompt = await readSoulFile('compliance-advisor')
    let systemContent = soulPrompt || FALLBACK_SYSTEM_PROMPT
-
-    if (validCountry) {
-      const countryLabel = COUNTRY_LABELS[validCountry]
-      systemContent += `\n\n## Laenderspezifische Auskunft
-Der Nutzer hat "${countryLabel} (${validCountry})" gewaehlt.
- Beziehe dich AUSSCHLIESSLICH auf ${validCountry}-Recht + anwendbares EU-Recht
- Nenne IMMER explizit das Land in deiner Antwort
- Verwende NIEMALS Gesetze eines anderen Landes
- Bei ${validCountry === 'EU' ? 'EU-weiten Fragen: Beziehe dich auf EU-Verordnungen und -Richtlinien' : `${countryLabel}: Beziehe nationale Gesetze (${validCountry === 'DE' ? 'BDSG, TDDDG, TKG, UWG' : validCountry === 'AT' ? 'AT DSG, ECG, TKG, KSchG, MedienG' : 'CH DSG, DSV, OR, UWG, FMG'}) mit ein`}`
-    }
-
+    if (validCountry) systemContent += countryBlock(validCountry)
    if (ragContext) {
      systemContent += `\n\n## Relevanter Kontext aus dem RAG-System\n\nNutze die folgenden Quellen fuer deine Antwort. Verweise in deiner Antwort auf die jeweilige Quelle:\n\n${ragContext}`
    }
-
-    if (controlsContext) {
-      systemContent += `\n\n${controlsContext}`
-    }
-
+    if (controlsContext) systemContent += `\n\n${controlsContext}`
    systemContent += `\n\n## Aktueller SDK-Schritt\nDer Nutzer befindet sich im SDK-Schritt: ${currentStep}`

-    // 3. Build messages array (limit history to last 6 messages)
-    const messages = [
+    // 3. Nachrichten (History auf die letzten 6 begrenzen)
+    const messages: ChatMessage[] = [
      { role: 'system', content: systemContent },
      ...history.slice(-6).map((h: { role: string; content: string }) => ({
        role: h.role === 'user' ? 'user' : 'assistant',
@@ -179,82 +95,27 @@ Der Nutzer hat "${countryLabel} (${validCountry})" gewaehlt.
      { role: 'user', content: message },
    ]

-    // 4. Call Ollama with streaming
-    const ollamaResponse = await fetch(`${OLLAMA_URL}/api/chat`, {
-      method: 'POST',
-      headers: { 'Content-Type': 'application/json' },
-      body: JSON.stringify({
-        model: LLM_MODEL,
-        messages,
-        stream: true,
-        think: false,
-        // Modell im VRAM halten → kein Kaltstart bei der naechsten Frage
-        // (Kaltstart eines 35b-Modells war die Ursache fuer "Load failed").
-        keep_alive: '30m',
-        options: {
-          temperature: 0.3,
-          num_predict: 8192,
-          num_ctx: 8192,
-        },
-      }),
-      signal: AbortSignal.timeout(120000),
-    })
-
-    if (!ollamaResponse.ok) {
-      const errorText = await ollamaResponse.text()
-      console.error('Ollama error:', ollamaResponse.status, errorText)
+    // 4. LLM-Kaskade -> Plain-Text-Stream
+    const stream = await streamAdvisorAnswer(messages)
+    if (!stream) {
      return NextResponse.json(
-        { error: `LLM nicht erreichbar (Status ${ollamaResponse.status}). Ist Ollama mit dem Modell ${LLM_MODEL} gestartet?` },
-        { status: 502 }
+        { error: 'LLM nicht erreichbar. Weder OVH/LiteLLM noch Ollama haben geantwortet.' },
+        { status: 502 },
      )
    }

-    // 5. Stream response back as plain text
-    const encoder = new TextEncoder()
-    const stream = new ReadableStream({
-      async start(controller) {
-        const reader = ollamaResponse.body!.getReader()
-        const decoder = new TextDecoder()
-
-        try {
-          while (true) {
-            const { done, value } = await reader.read()
-            if (done) break
-
-            const chunk = decoder.decode(value, { stream: true })
-            const lines = chunk.split('\n').filter((l) => l.trim())
-
-            for (const line of lines) {
-              try {
-                const json = JSON.parse(line)
-                if (json.message?.content) {
-                  controller.enqueue(encoder.encode(json.message.content))
-                }
-              } catch {
-                // Partial JSON line, skip
-              }
-            }
-          }
-        } catch (error) {
-          console.error('Stream read error:', error)
-        } finally {
-          controller.close()
-        }
-      },
-    })
-
    return new NextResponse(stream, {
      headers: {
        'Content-Type': 'text/plain; charset=utf-8',
        'Cache-Control': 'no-cache',
-        'Connection': 'keep-alive',
+        Connection: 'keep-alive',
      },
    })
  } catch (error) {
    console.error('Compliance advisor chat error:', error)
    return NextResponse.json(
-      { error: 'Verbindung zum LLM fehlgeschlagen. Bitte pruefen Sie ob Ollama laeuft.' },
-      { status: 503 }
+      { error: 'Verbindung zum LLM fehlgeschlagen.' },
+      { status: 503 },
    )
  }
 }