fix(advisor): Compliance-Advisor auf prod reparieren — RAG via ai-sdk (bge-m3) + OVH-LLM

Der Floating-Compliance-Advisor war auf prod kaputt (502): RAG ging ueber rag-service:8097 (auf prod nicht vorhanden) und der Chat ueber OLLAMA_URL=ollama-embed (embedding-only, kein qwen2.5vl). - RAG laeuft jetzt ueber die ai-compliance-sdk /sdk/v1/rag/search (bge-m3, prod-erreichbar) statt rag-service -> profitiert vom reicheren Embedding. (lib/sdk/agents/advisor-rag.ts) - LLM-Kaskade: OVH/LiteLLM (gpt-oss-120b) zuerst, Ollama als Dev-Fallback. (lib/sdk/agents/advisor-llm.ts; OVH-Env via orca-infra admin-Block) - ai-sdk: bp_compliance_recht in AllowedCollections ergaenzt (Whitelist war inkonsistent — die Fehlermeldung listete es bereits als erlaubt). - Route auf die Module umgestellt (duenn); Controls-Augmentation unveraendert. - Tests: advisor-rag + advisor-llm. Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
2026-06-19 09:22:44 +02:00
parent f0a0a887fd
commit cd3e0b15ad
6 changed files with 381 additions and 182 deletions
@@ -1,35 +1,22 @@
 /**
 * Compliance Advisor Chat API
 *
- * Connects the ComplianceAdvisorWidget to:
- * 1. Multi-Collection RAG search (rag-service) for context across 6 collections
- * 2. Ollama LLM (32B) for generating answers
+ * Verbindet das ComplianceAdvisorWidget mit:
+ *   1. Multi-Collection-RAG ueber die ai-compliance-sdk (bge-m3) — siehe advisor-rag
+ *   2. Strukturierten Controls zum erkannten Thema — buildControlsContext
+ *   3. LLM-Kaskade OVH (prod) -> Ollama (Dev) — siehe advisor-llm
 *
- * Supports country-specific filtering (DE, AT, CH, EU).
- * Streams the LLM response back as plain text.
+ * Laenderspezifische Filterung (DE, AT, CH, EU). Streamt die Antwort als Text.
 */

 import { NextRequest, NextResponse } from 'next/server'
 import { readSoulFile } from '@/lib/sdk/agents/soul-reader'
 import { buildControlsContext } from '@/lib/sdk/agents/controls-augmentation'
-
-const RAG_SERVICE_URL = process.env.RAG_SERVICE_URL || 'http://rag-service:8097'
-const OLLAMA_URL = process.env.OLLAMA_URL || 'http://host.docker.internal:11434'
-const LLM_MODEL = process.env.COMPLIANCE_LLM_MODEL || 'qwen2.5vl:32b'
-
-// All compliance-relevant collections (without NiBiS)
-const COMPLIANCE_COLLECTIONS = [
-  'bp_compliance_gesetze',
-  'bp_compliance_ce',
-  'bp_compliance_datenschutz',
-  'bp_dsfa_corpus',
-  'bp_compliance_recht',
-  'bp_legal_templates',
-] as const
+import { queryAdvisorRAG } from '@/lib/sdk/agents/advisor-rag'
+import { streamAdvisorAnswer, type ChatMessage } from '@/lib/sdk/agents/advisor-llm'

 type Country = 'DE' | 'AT' | 'CH' | 'EU'

-// Fallback SOUL prompt (used when .soul.md file is unavailable)
 const FALLBACK_SYSTEM_PROMPT = `# Compliance Advisor Agent

 ## Identitaet
@@ -49,81 +36,24 @@ const COUNTRY_LABELS: Record<Country, string> = {
  EU: 'EU-weit',
 }

-interface RAGSearchResult {
-  content: string
-  source_name?: string
-  source_code?: string
-  attribution_text?: string
-  score: number
-  collection?: string
-  metadata?: Record<string, unknown>
-}
-
-/**
- * Query multiple RAG collections in parallel, with optional country filter
- */
-async function queryMultiCollectionRAG(query: string, country?: Country): Promise<string> {
-  try {
-    const searchPromises = COMPLIANCE_COLLECTIONS.map(async (collection) => {
-      const searchBody: Record<string, unknown> = {
-        query,
-        collection,
-        top_k: 3,
-      }
-
-      // Apply country filter for gesetze collection
-      if (collection === 'bp_compliance_gesetze' && country && country !== 'EU') {
-        searchBody.metadata_filter = {
-          must: [
-            {
-              key: 'country',
-              match: { any: [country, 'EU'] },
-            },
-          ],
-        }
-      }
-
-      const res = await fetch(`${RAG_SERVICE_URL}/api/v1/search`, {
-        method: 'POST',
-        headers: { 'Content-Type': 'application/json' },
-        body: JSON.stringify(searchBody),
-        signal: AbortSignal.timeout(10000),
-      })
-
-      if (!res.ok) return []
-
-      const data = await res.json()
-      return (data.results || []).map((r: RAGSearchResult) => ({
-        ...r,
-        collection,
-      }))
-    })
-
-    const settled = await Promise.allSettled(searchPromises)
-    const allResults: RAGSearchResult[] = []
-
-    for (const result of settled) {
-      if (result.status === 'fulfilled') {
-        allResults.push(...result.value)
-      }
-    }
-
-    // Sort by score descending, take top 8
-    allResults.sort((a, b) => b.score - a.score)
-    const topResults = allResults.slice(0, 8)
-
-    if (topResults.length === 0) return ''
-
-    return topResults
-      .map((r, i) => {
-        const source = r.source_name || r.source_code || 'Unbekannt'
-        return `[Quelle ${i + 1}: ${source}]\n${r.content || ''}`
-      })
-      .join('\n\n---\n\n')
-  } catch (error) {
-    console.warn('Multi-collection RAG query error (continuing without context):', error)
-    return ''
-  }
+function countryBlock(c: Country): string {
+  const label = COUNTRY_LABELS[c]
+  const nationalLaws =
+    c === 'DE'
+      ? 'BDSG, TDDDG, TKG, UWG'
+      : c === 'AT'
+        ? 'AT DSG, ECG, TKG, KSchG, MedienG'
+        : 'CH DSG, DSV, OR, UWG, FMG'
+  const guidance =
+    c === 'EU'
+      ? 'EU-weiten Fragen: Beziehe dich auf EU-Verordnungen und -Richtlinien'
+      : `${label}: Beziehe nationale Gesetze (${nationalLaws}) mit ein`
+  return `\n\n## Laenderspezifische Auskunft
+Der Nutzer hat "${label} (${c})" gewaehlt.
+- Beziehe dich AUSSCHLIESSLICH auf ${c}-Recht + anwendbares EU-Recht
+- Nenne IMMER explizit das Land in deiner Antwort
+- Verwende NIEMALS Gesetze eines anderen Landes
+- Bei ${guidance}`
 }

 export async function POST(request: NextRequest) {
@@ -135,42 +65,28 @@ export async function POST(request: NextRequest) {
      return NextResponse.json({ error: 'Message is required' }, { status: 400 })
    }

-    // Validate country parameter
-    const validCountry = ['DE', 'AT', 'CH', 'EU'].includes(country) ? (country as Country) : undefined
+    const validCountry = (['DE', 'AT', 'CH', 'EU'] as const).includes(country)
+      ? (country as Country)
+      : undefined

-    // 1. Query RAG across all collections + structured controls for the topic
-    //    (both local; the controls block lets the agent answer from real Control-IDs)
+    // 1. RAG (ai-sdk, bge-m3) + strukturierte Controls zum Thema — beide parallel
    const [ragContext, controlsContext] = await Promise.all([
-      queryMultiCollectionRAG(message, validCountry),
+      queryAdvisorRAG(message),
      buildControlsContext(message),
    ])

-    // 2. Build system prompt with RAG context + country
+    // 2. System-Prompt zusammenbauen
    const soulPrompt = await readSoulFile('compliance-advisor')
    let systemContent = soulPrompt || FALLBACK_SYSTEM_PROMPT
-
-    if (validCountry) {
-      const countryLabel = COUNTRY_LABELS[validCountry]
-      systemContent += `\n\n## Laenderspezifische Auskunft
-Der Nutzer hat "${countryLabel} (${validCountry})" gewaehlt.
- Beziehe dich AUSSCHLIESSLICH auf ${validCountry}-Recht + anwendbares EU-Recht
- Nenne IMMER explizit das Land in deiner Antwort
- Verwende NIEMALS Gesetze eines anderen Landes
- Bei ${validCountry === 'EU' ? 'EU-weiten Fragen: Beziehe dich auf EU-Verordnungen und -Richtlinien' : `${countryLabel}: Beziehe nationale Gesetze (${validCountry === 'DE' ? 'BDSG, TDDDG, TKG, UWG' : validCountry === 'AT' ? 'AT DSG, ECG, TKG, KSchG, MedienG' : 'CH DSG, DSV, OR, UWG, FMG'}) mit ein`}`
-    }
-
+    if (validCountry) systemContent += countryBlock(validCountry)
    if (ragContext) {
      systemContent += `\n\n## Relevanter Kontext aus dem RAG-System\n\nNutze die folgenden Quellen fuer deine Antwort. Verweise in deiner Antwort auf die jeweilige Quelle:\n\n${ragContext}`
    }
-
-    if (controlsContext) {
-      systemContent += `\n\n${controlsContext}`
-    }
-
+    if (controlsContext) systemContent += `\n\n${controlsContext}`
    systemContent += `\n\n## Aktueller SDK-Schritt\nDer Nutzer befindet sich im SDK-Schritt: ${currentStep}`

-    // 3. Build messages array (limit history to last 6 messages)
-    const messages = [
+    // 3. Nachrichten (History auf die letzten 6 begrenzen)
+    const messages: ChatMessage[] = [
      { role: 'system', content: systemContent },
      ...history.slice(-6).map((h: { role: string; content: string }) => ({
        role: h.role === 'user' ? 'user' : 'assistant',
@@ -179,82 +95,27 @@ Der Nutzer hat "${countryLabel} (${validCountry})" gewaehlt.
      { role: 'user', content: message },
    ]

-    // 4. Call Ollama with streaming
-    const ollamaResponse = await fetch(`${OLLAMA_URL}/api/chat`, {
-      method: 'POST',
-      headers: { 'Content-Type': 'application/json' },
-      body: JSON.stringify({
-        model: LLM_MODEL,
-        messages,
-        stream: true,
-        think: false,
-        // Modell im VRAM halten → kein Kaltstart bei der naechsten Frage
-        // (Kaltstart eines 35b-Modells war die Ursache fuer "Load failed").
-        keep_alive: '30m',
-        options: {
-          temperature: 0.3,
-          num_predict: 8192,
-          num_ctx: 8192,
-        },
-      }),
-      signal: AbortSignal.timeout(120000),
-    })
-
-    if (!ollamaResponse.ok) {
-      const errorText = await ollamaResponse.text()
-      console.error('Ollama error:', ollamaResponse.status, errorText)
+    // 4. LLM-Kaskade -> Plain-Text-Stream
+    const stream = await streamAdvisorAnswer(messages)
+    if (!stream) {
      return NextResponse.json(
-        { error: `LLM nicht erreichbar (Status ${ollamaResponse.status}). Ist Ollama mit dem Modell ${LLM_MODEL} gestartet?` },
-        { status: 502 }
+        { error: 'LLM nicht erreichbar. Weder OVH/LiteLLM noch Ollama haben geantwortet.' },
+        { status: 502 },
      )
    }

-    // 5. Stream response back as plain text
-    const encoder = new TextEncoder()
-    const stream = new ReadableStream({
-      async start(controller) {
-        const reader = ollamaResponse.body!.getReader()
-        const decoder = new TextDecoder()
-
-        try {
-          while (true) {
-            const { done, value } = await reader.read()
-            if (done) break
-
-            const chunk = decoder.decode(value, { stream: true })
-            const lines = chunk.split('\n').filter((l) => l.trim())
-
-            for (const line of lines) {
-              try {
-                const json = JSON.parse(line)
-                if (json.message?.content) {
-                  controller.enqueue(encoder.encode(json.message.content))
-                }
-              } catch {
-                // Partial JSON line, skip
-              }
-            }
-          }
-        } catch (error) {
-          console.error('Stream read error:', error)
-        } finally {
-          controller.close()
-        }
-      },
-    })
-
    return new NextResponse(stream, {
      headers: {
        'Content-Type': 'text/plain; charset=utf-8',
        'Cache-Control': 'no-cache',
-        'Connection': 'keep-alive',
+        Connection: 'keep-alive',
      },
    })
  } catch (error) {
    console.error('Compliance advisor chat error:', error)
    return NextResponse.json(
-      { error: 'Verbindung zum LLM fehlgeschlagen. Bitte pruefen Sie ob Ollama laeuft.' },
-      { status: 503 }
+      { error: 'Verbindung zum LLM fehlgeschlagen.' },
+      { status: 503 },
    )
  }
 }
@@ -0,0 +1,31 @@
+/**
+ * Tests fuer die LLM-Stream-Parser des Advisors (Ollama-NDJSON + OVH/OpenAI-SSE).
+ */
+
+import { describe, it, expect } from 'vitest'
+import { parseOllamaLine, parseSSELine } from '../advisor-llm'
+
+describe('parseOllamaLine', () => {
+  it('extrahiert message.content', () => {
+    expect(parseOllamaLine('{"message":{"content":"Hallo"}}')).toBe('Hallo')
+  })
+  it('ignoriert leere/kaputte Zeilen', () => {
+    expect(parseOllamaLine('')).toBeNull()
+    expect(parseOllamaLine('  ')).toBeNull()
+    expect(parseOllamaLine('not-json')).toBeNull()
+    expect(parseOllamaLine('{"message":{}}')).toBeNull()
+  })
+})
+
+describe('parseSSELine', () => {
+  it('extrahiert choices[0].delta.content aus data:-Zeilen', () => {
+    expect(parseSSELine('data: {"choices":[{"delta":{"content":"Hi"}}]}')).toBe('Hi')
+  })
+  it('ignoriert [DONE], Nicht-data-Zeilen und kaputtes JSON', () => {
+    expect(parseSSELine('data: [DONE]')).toBeNull()
+    expect(parseSSELine('event: message')).toBeNull()
+    expect(parseSSELine('')).toBeNull()
+    expect(parseSSELine('data: {bad json')).toBeNull()
+    expect(parseSSELine('data: {"choices":[{"delta":{}}]}')).toBeNull()
+  })
+})
@@ -0,0 +1,75 @@
+/**
+ * Tests fuer die Advisor-RAG-Suche (ai-sdk, bge-m3).
+ */
+
+import { describe, it, expect, beforeEach, vi } from 'vitest'
+
+const mockFetch = vi.fn()
+vi.stubGlobal('fetch', mockFetch)
+
+describe('advisor-rag', () => {
+  let mod: typeof import('../advisor-rag')
+
+  beforeEach(async () => {
+    vi.resetModules()
+    mockFetch.mockReset()
+    mod = await import('../advisor-rag')
+  })
+
+  describe('mapSdkResults', () => {
+    it('mappt ai-sdk-Felder auf {content, source, score}', () => {
+      const out = mod.mapSdkResults([
+        { text: 'Art. 35 DSGVO ...', regulation_short: 'DSGVO', score: 0.91 },
+      ])
+      expect(out).toEqual([{ content: 'Art. 35 DSGVO ...', source: 'DSGVO', score: 0.91 }])
+    })
+
+    it('faellt auf regulation_name/code zurueck und filtert leere Inhalte', () => {
+      const out = mod.mapSdkResults([
+        { text: '', regulation_short: 'X' },
+        { text: 'Inhalt', regulation_name: 'BDSG' },
+        { text: 'Inhalt2', regulation_code: 'EU_2016_679' },
+      ])
+      expect(out).toEqual([
+        { content: 'Inhalt', source: 'BDSG', score: 0 },
+        { content: 'Inhalt2', source: 'EU_2016_679', score: 0 },
+      ])
+    })
+  })
+
+  describe('queryAdvisorRAG', () => {
+    it('fragt alle 6 Collections ab und formatiert die Treffer', async () => {
+      mockFetch.mockResolvedValue({
+        ok: true,
+        json: async () => ({ results: [{ text: 'Inhalt A', regulation_short: 'DSGVO', score: 0.9 }] }),
+      })
+      const result = await mod.queryAdvisorRAG('Was ist eine DSFA?')
+      expect(result).toContain('[Quelle 1: DSGVO]')
+      expect(result).toContain('Inhalt A')
+      expect(mockFetch).toHaveBeenCalledTimes(mod.COMPLIANCE_COLLECTIONS.length)
+    })
+
+    it('ruft die ai-sdk /sdk/v1/rag/search mit collection + top_k auf', async () => {
+      mockFetch.mockResolvedValue({ ok: true, json: async () => ({ results: [] }) })
+      await mod.queryAdvisorRAG('test')
+      expect(mockFetch).toHaveBeenCalledWith(
+        expect.stringContaining('/sdk/v1/rag/search'),
+        expect.objectContaining({ method: 'POST' }),
+      )
+      const body = JSON.parse(mockFetch.mock.calls[0][1].body)
+      expect(body).toMatchObject({ query: 'test', top_k: 3 })
+      expect(mod.COMPLIANCE_COLLECTIONS).toContain(body.collection)
+    })
+
+    it('liefert leeren String wenn das RAG-Backend nicht erreichbar ist (graceful)', async () => {
+      mockFetch.mockRejectedValue(new Error('connection refused'))
+      const result = await mod.queryAdvisorRAG('test')
+      expect(result).toBe('')
+    })
+
+    it('umfasst genau die 6 Compliance-Collections', () => {
+      expect(mod.COMPLIANCE_COLLECTIONS).toHaveLength(6)
+      expect(mod.COMPLIANCE_COLLECTIONS).toContain('bp_compliance_recht')
+    })
+  })
+})
@@ -0,0 +1,140 @@
+/**
+ * Compliance-Advisor LLM-Kaskade.
+ *
+ * Reihenfolge:
+ *   1. OVH / LiteLLM (OpenAI-kompatibel, SSE-Streaming) — prod-LLM, wenn
+ *      OVH_LLM_URL + OVH_LLM_MODEL gesetzt sind.
+ *   2. Ollama-Chat (NDJSON-Streaming) — lokale Entwicklung / Fallback.
+ *
+ * Auf prod zeigt OLLAMA_URL auf den Embedding-only-Dienst (kein Chat-Modell),
+ * deshalb ist OVH dort der einzige funktionierende Pfad. Lokal (ohne OVH-Env)
+ * laeuft der Advisor weiter ueber Ollama. Beide Quellen werden auf einen
+ * einheitlichen Plain-Text-Stream normalisiert.
+ */
+
+const OLLAMA_URL = process.env.OLLAMA_URL || 'http://host.docker.internal:11434'
+const OLLAMA_MODEL = process.env.COMPLIANCE_LLM_MODEL || 'qwen2.5vl:32b'
+const OVH_URL = (process.env.OVH_LLM_URL || '').replace(/\/+$/, '')
+const OVH_MODEL = process.env.OVH_LLM_MODEL || ''
+const OVH_KEY = process.env.OVH_LLM_KEY || ''
+
+export interface ChatMessage {
+  role: string
+  content: string
+}
+
+const encoder = new TextEncoder()
+
+/** Extrahiert den Text-Delta aus einer Ollama-NDJSON-Zeile (message.content). */
+export function parseOllamaLine(line: string): string | null {
+  const t = line.trim()
+  if (!t) return null
+  try {
+    const j = JSON.parse(t)
+    return j?.message?.content || null
+  } catch {
+    return null
+  }
+}
+
+/** Extrahiert den Text-Delta aus einer OpenAI/OVH-SSE-Zeile (choices[].delta.content). */
+export function parseSSELine(line: string): string | null {
+  const t = line.trim()
+  if (!t.startsWith('data:')) return null
+  const payload = t.slice(5).trim()
+  if (!payload || payload === '[DONE]') return null
+  try {
+    const j = JSON.parse(payload)
+    return j?.choices?.[0]?.delta?.content || null
+  } catch {
+    return null
+  }
+}
+
+function textStream(
+  upstream: Response,
+  parseLine: (line: string) => string | null,
+): ReadableStream<Uint8Array> {
+  return new ReadableStream({
+    async start(controller) {
+      const reader = upstream.body!.getReader()
+      const decoder = new TextDecoder()
+      let buf = ''
+      try {
+        for (;;) {
+          const { done, value } = await reader.read()
+          if (done) break
+          buf += decoder.decode(value, { stream: true })
+          const lines = buf.split('\n')
+          buf = lines.pop() || ''
+          for (const line of lines) {
+            const delta = parseLine(line)
+            if (delta) controller.enqueue(encoder.encode(delta))
+          }
+        }
+        const tail = parseLine(buf)
+        if (tail) controller.enqueue(encoder.encode(tail))
+      } finally {
+        controller.close()
+      }
+    },
+  })
+}
+
+async function tryOVH(messages: ChatMessage[]): Promise<Response | null> {
+  if (!OVH_URL || !OVH_MODEL) return null
+  try {
+    const headers: Record<string, string> = { 'Content-Type': 'application/json' }
+    if (OVH_KEY) headers['Authorization'] = `Bearer ${OVH_KEY}`
+    const r = await fetch(`${OVH_URL}/v1/chat/completions`, {
+      method: 'POST',
+      headers,
+      body: JSON.stringify({
+        model: OVH_MODEL,
+        messages,
+        stream: true,
+        temperature: 0.3,
+        max_tokens: 4096,
+      }),
+      signal: AbortSignal.timeout(120000),
+    })
+    return r.ok && r.body ? r : null
+  } catch {
+    return null
+  }
+}
+
+async function tryOllama(messages: ChatMessage[]): Promise<Response | null> {
+  try {
+    const r = await fetch(`${OLLAMA_URL}/api/chat`, {
+      method: 'POST',
+      headers: { 'Content-Type': 'application/json' },
+      body: JSON.stringify({
+        model: OLLAMA_MODEL,
+        messages,
+        stream: true,
+        think: false,
+        keep_alive: '30m',
+        options: { temperature: 0.3, num_predict: 4096, num_ctx: 8192 },
+      }),
+      signal: AbortSignal.timeout(120000),
+    })
+    return r.ok && r.body ? r : null
+  } catch {
+    return null
+  }
+}
+
+/**
+ * Liefert einen Plain-Text-Stream der LLM-Antwort. OVH zuerst (prod), dann
+ * Ollama (Dev/Fallback). null = kein LLM erreichbar (Caller antwortet mit 502).
+ */
+export async function streamAdvisorAnswer(
+  messages: ChatMessage[],
+): Promise<ReadableStream<Uint8Array> | null> {
+  const ovh = await tryOVH(messages)
+  if (ovh) return textStream(ovh, parseSSELine)
+  const ollama = await tryOllama(messages)
+  if (ollama) return textStream(ollama, parseOllamaLine)
+  return null
+}
@@ -0,0 +1,91 @@
+/**
+ * Compliance-Advisor RAG-Suche.
+ *
+ * Fragt die ai-compliance-sdk (`/sdk/v1/rag/search`) ab statt des frueheren
+ * `rag-service:8097` (auf prod nicht erreichbar). Die ai-sdk embeddet die Query
+ * mit bge-m3 (prod: ollama-embed) und sucht in den Qdrant-Compliance-Collections
+ * — damit profitiert der Advisor vom reicheren Embedding.
+ *
+ * Fehler je Collection werden geschluckt (graceful: Antwort ohne diesen Treffer).
+ */
+
+const SDK_URL =
+  process.env.SDK_API_URL || process.env.SDK_URL || 'http://ai-compliance-sdk:8090'
+
+const DEFAULT_USER = '00000000-0000-0000-0000-000000000001'
+const DEFAULT_TENANT =
+  process.env.DEFAULT_TENANT_ID || '9282a473-5c95-4b3a-bf78-0ecc0ec71d3e'
+
+// Compliance-relevante Collections (ai-sdk-Whitelist `AllowedCollections`).
+export const COMPLIANCE_COLLECTIONS = [
+  'bp_compliance_gesetze',
+  'bp_compliance_ce',
+  'bp_compliance_datenschutz',
+  'bp_dsfa_corpus',
+  'bp_compliance_recht',
+  'bp_legal_templates',
+] as const
+
+interface SdkRagResult {
+  text?: string
+  regulation_code?: string
+  regulation_name?: string
+  regulation_short?: string
+  category?: string
+  source_url?: string
+  score?: number
+}
+
+interface ScoredPassage {
+  content: string
+  source: string
+  score: number
+}
+
+/** Normalisiert eine ai-sdk-RAG-Antwort auf {content, source, score}. */
+export function mapSdkResults(results: SdkRagResult[] | undefined): ScoredPassage[] {
+  return (results || [])
+    .map((r) => ({
+      content: r.text || '',
+      source: r.regulation_short || r.regulation_name || r.regulation_code || 'Unbekannt',
+      score: typeof r.score === 'number' ? r.score : 0,
+    }))
+    .filter((p) => p.content)
+}
+
+async function searchCollection(collection: string, query: string): Promise<ScoredPassage[]> {
+  try {
+    const res = await fetch(`${SDK_URL}/sdk/v1/rag/search`, {
+      method: 'POST',
+      headers: {
+        'Content-Type': 'application/json',
+        'X-User-ID': DEFAULT_USER,
+        'X-Tenant-ID': DEFAULT_TENANT,
+      },
+      body: JSON.stringify({ query, collection, top_k: 3 }),
+      signal: AbortSignal.timeout(10000),
+    })
+    if (!res.ok) return []
+    const data = await res.json()
+    return mapSdkResults(data.results)
+  } catch {
+    return []
+  }
+}
+
+/**
+ * Fragt alle Compliance-Collections parallel ab und liefert die Top-8-Passagen
+ * als formatierten Kontextblock (oder '' wenn nichts erreichbar/gefunden).
+ */
+export async function queryAdvisorRAG(query: string): Promise<string> {
+  const settled = await Promise.all(
+    COMPLIANCE_COLLECTIONS.map((c) => searchCollection(c, query)),
+  )
+  const all = settled.flat()
+  if (all.length === 0) return ''
+  all.sort((a, b) => b.score - a.score)
+  return all
+    .slice(0, 8)
+    .map((r, i) => `[Quelle ${i + 1}: ${r.source}]\n${r.content}`)
+    .join('\n\n---\n\n')
+}