fix(drafting): Drafting-Engine auf prod reparieren — RAG via ai-sdk + OVH-LLM-Kaskade

Die Drafting-Engine (Dokument-Entwurf, v2-Pipeline, Validierung, Drafting-Chat, Vendor-Vertragspruefung) war auf prod doppelt tot: - RAG ueber bp-core-rag-service:8097 (existiert auf prod nicht) - LLM ueber OLLAMA_URL/api/chat mit qwen2.5vl (prod = ollama-embed, kein Chat-Modell) Fix (analog zum Compliance-Advisor): - rag-query.ts -> ai-compliance-sdk /sdk/v1/rag/search (bge-m3, prod-erreichbar). - Neue lib/sdk/drafting-engine/llm-cascade.ts: OVH/LiteLLM (gpt-oss-120b) zuerst, Ollama als Dev-Fallback; cascadeComplete (JSON) + cascadeStream. Das Backend nutzt OVH+JSON bereits erfolgreich auf prod (extract-datasheet). - 5 Aufrufstellen (draft-helpers, draft-helpers-v2, validate, chat, vendor-review) auf die Kaskade umgestellt; keine direkten Ollama-Calls mehr. - Tests: llm-cascade + rag-query aktualisiert. Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
2026-06-19 10:02:06 +02:00
parent cd3e0b15ad
commit 90a70c8404
9 changed files with 398 additions and 203 deletions
@@ -0,0 +1,81 @@
+/**
+ * Tests fuer die Drafting-Engine LLM-Kaskade (OVH -> Ollama) + Stream-Parser.
+ */
+
+import { describe, it, expect, beforeEach, afterEach, vi } from 'vitest'
+
+const mockFetch = vi.fn()
+vi.stubGlobal('fetch', mockFetch)
+
+describe('llm-cascade parser', () => {
+  it('parseOllamaLine extrahiert message.content', async () => {
+    const { parseOllamaLine } = await import('../llm-cascade')
+    expect(parseOllamaLine('{"message":{"content":"X"}}')).toBe('X')
+    expect(parseOllamaLine('')).toBeNull()
+    expect(parseOllamaLine('kaputt')).toBeNull()
+  })
+
+  it('parseSSELine extrahiert delta.content', async () => {
+    const { parseSSELine } = await import('../llm-cascade')
+    expect(parseSSELine('data: {"choices":[{"delta":{"content":"Y"}}]}')).toBe('Y')
+    expect(parseSSELine('data: [DONE]')).toBeNull()
+    expect(parseSSELine('event: ping')).toBeNull()
+  })
+})
+
+describe('cascadeComplete', () => {
+  beforeEach(() => {
+    vi.resetModules()
+    mockFetch.mockReset()
+  })
+  afterEach(() => {
+    vi.unstubAllEnvs()
+  })
+
+  it('nutzt OVH zuerst wenn konfiguriert (json + response_format)', async () => {
+    vi.stubEnv('OVH_LLM_URL', 'https://ovh.test')
+    vi.stubEnv('OVH_LLM_MODEL', 'gpt-oss-120b')
+    vi.stubEnv('OVH_LLM_KEY', 'k')
+    const { cascadeComplete } = await import('../llm-cascade')
+    mockFetch.mockResolvedValueOnce({
+      ok: true,
+      json: async () => ({ choices: [{ message: { content: '{"ok":1}' } }], usage: { completion_tokens: 42 } }),
+    })
+    const r = await cascadeComplete([{ role: 'user', content: 'hi' }], { json: true, maxTokens: 1000 })
+    expect(r).toEqual({ content: '{"ok":1}', tokensUsed: 42, provider: 'ovh' })
+    const [url, opts] = mockFetch.mock.calls[0]
+    expect(url).toContain('/v1/chat/completions')
+    const body = JSON.parse(opts.body)
+    expect(body.response_format).toEqual({ type: 'json_object' })
+    expect(body.stream).toBe(false)
+  })
+
+  it('faellt auf Ollama zurueck wenn OVH nicht konfiguriert ist', async () => {
+    const { cascadeComplete } = await import('../llm-cascade')
+    mockFetch.mockResolvedValueOnce({
+      ok: true,
+      json: async () => ({ message: { content: 'hallo' }, eval_count: 7 }),
+    })
+    const r = await cascadeComplete([{ role: 'user', content: 'hi' }])
+    expect(r).toEqual({ content: 'hallo', tokensUsed: 7, provider: 'ollama' })
+    expect(mockFetch.mock.calls[0][0]).toContain('/api/chat')
+  })
+
+  it('faellt auf Ollama zurueck wenn OVH einen Fehler liefert', async () => {
+    vi.stubEnv('OVH_LLM_URL', 'https://ovh.test')
+    vi.stubEnv('OVH_LLM_MODEL', 'gpt-oss-120b')
+    const { cascadeComplete } = await import('../llm-cascade')
+    mockFetch
+      .mockResolvedValueOnce({ ok: false, status: 502 })
+      .mockResolvedValueOnce({ ok: true, json: async () => ({ message: { content: 'fallback' }, eval_count: 3 }) })
+    const r = await cascadeComplete([{ role: 'user', content: 'hi' }])
+    expect(r?.provider).toBe('ollama')
+    expect(r?.content).toBe('fallback')
+  })
+
+  it('liefert null wenn weder OVH noch Ollama antworten', async () => {
+    const { cascadeComplete } = await import('../llm-cascade')
+    mockFetch.mockResolvedValue({ ok: false, status: 500 })
+    expect(await cascadeComplete([{ role: 'user', content: 'hi' }])).toBeNull()
+  })
+})
@@ -1,5 +1,5 @@
 /**
- * Tests for the shared queryRAG utility.
+ * Tests for the shared queryRAG utility (ai-sdk /sdk/v1/rag/search, bge-m3).
 */

 import { describe, it, expect, beforeEach, vi } from 'vitest'
@@ -19,13 +19,13 @@ describe('queryRAG', () => {
    queryRAG = mod.queryRAG
  })

-  it('should return formatted results on success', async () => {
+  it('should return formatted results on success (ai-sdk shape)', async () => {
    mockFetch.mockResolvedValueOnce({
      ok: true,
      json: async () => ({
        results: [
-          { source_name: 'DSGVO', content: 'Art. 35 regelt die DSFA...' },
-          { source_code: 'EU_2016_679', content: 'Risikobewertung erforderlich' },
+          { text: 'Art. 35 regelt die DSFA...', regulation_short: 'DSGVO' },
+          { text: 'Risikobewertung erforderlich', regulation_code: 'EU_2016_679' },
        ],
      }),
    })
@@ -38,7 +38,7 @@ describe('queryRAG', () => {
    expect(mockFetch).toHaveBeenCalledTimes(1)
  })

-  it('should send POST request to RAG_SERVICE_URL', async () => {
+  it('should POST to the ai-sdk /sdk/v1/rag/search endpoint', async () => {
    mockFetch.mockResolvedValueOnce({
      ok: true,
      json: async () => ({ results: [] }),
@@ -47,10 +47,10 @@ describe('queryRAG', () => {
    await queryRAG('test query')

    expect(mockFetch).toHaveBeenCalledWith(
-      expect.stringContaining('/api/v1/search'),
+      expect.stringContaining('/sdk/v1/rag/search'),
      expect.objectContaining({
        method: 'POST',
-        headers: { 'Content-Type': 'application/json' },
+        headers: expect.objectContaining({ 'Content-Type': 'application/json' }),
      })
    )
  })
@@ -99,43 +99,24 @@ describe('queryRAG', () => {
  })

  it('should return empty string on HTTP error', async () => {
-    mockFetch.mockResolvedValueOnce({
-      ok: false,
-      status: 500,
-    })
-
-    const result = await queryRAG('test query')
-
-    expect(result).toBe('')
+    mockFetch.mockResolvedValueOnce({ ok: false, status: 500 })
+    expect(await queryRAG('test query')).toBe('')
  })

  it('should return empty string on network error', async () => {
    mockFetch.mockRejectedValueOnce(new Error('Connection refused'))
-
-    const result = await queryRAG('test query')
-
-    expect(result).toBe('')
+    expect(await queryRAG('test query')).toBe('')
  })

  it('should return empty string when no results', async () => {
-    mockFetch.mockResolvedValueOnce({
-      ok: true,
-      json: async () => ({ results: [] }),
-    })
-
-    const result = await queryRAG('test query')
-
-    expect(result).toBe('')
+    mockFetch.mockResolvedValueOnce({ ok: true, json: async () => ({ results: [] }) })
+    expect(await queryRAG('test query')).toBe('')
  })

-  it('should handle results with missing fields gracefully', async () => {
+  it('should handle results with missing source fields gracefully', async () => {
    mockFetch.mockResolvedValueOnce({
      ok: true,
-      json: async () => ({
-        results: [
-          { content: 'Some content without source' },
-        ],
-      }),
+      json: async () => ({ results: [{ text: 'Some content without source' }] }),
    })

    const result = await queryRAG('test')