feat: Phase 3 — RAG-Anbindung fuer alle 18 Dokumenttypen + Vendor Contract Review
All checks were successful
CI / go-lint (push) Has been skipped
CI / python-lint (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / test-go-ai-compliance (push) Successful in 34s
CI / test-python-backend-compliance (push) Successful in 26s
CI / test-python-document-crawler (push) Successful in 21s
CI / test-python-dsms-gateway (push) Successful in 17s

Migrate queryRAG from klausur-service GET to bp-core-rag-service POST with
multi-collection support. Each of the 18 ScopeDocumentType now gets a
type-specific RAG collection and optimized search query instead of the
generic fallback. Vendor-compliance contract review now uses LLM + RAG
for real analysis with mock fallback on error.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
Benjamin Admin
2026-03-02 10:10:32 +01:00
parent d9f819e5be
commit cd15ab0932
9 changed files with 469 additions and 57 deletions

View File

@@ -0,0 +1,59 @@
/**
* Tests for RAG Config — ensures all document types have valid mappings.
*/
import { describe, it, expect } from 'vitest'
import { DOCUMENT_RAG_CONFIG } from '../rag-config'
import type { ScopeDocumentType } from '@/lib/sdk/compliance-scope-types'
// All 18 ScopeDocumentType values
const ALL_DOCUMENT_TYPES: ScopeDocumentType[] = [
'vvt', 'lf', 'tom', 'av_vertrag', 'dsi', 'betroffenenrechte',
'dsfa', 'daten_transfer', 'datenpannen', 'einwilligung',
'vertragsmanagement', 'schulung', 'audit_log', 'risikoanalyse',
'notfallplan', 'zertifizierung', 'datenschutzmanagement', 'iace_ce_assessment',
]
// Known RAG collections in bp-core-rag-service
const VALID_COLLECTIONS = [
'bp_dsfa_corpus',
'bp_compliance_datenschutz',
'bp_compliance_gesetze',
'bp_compliance_recht',
'bp_compliance_ce',
]
describe('DOCUMENT_RAG_CONFIG', () => {
it('should have an entry for all 18 ScopeDocumentType values', () => {
for (const docType of ALL_DOCUMENT_TYPES) {
expect(DOCUMENT_RAG_CONFIG[docType]).toBeDefined()
}
})
it('should have exactly 18 entries', () => {
expect(Object.keys(DOCUMENT_RAG_CONFIG)).toHaveLength(18)
})
it('should use valid collection names', () => {
for (const [, config] of Object.entries(DOCUMENT_RAG_CONFIG)) {
expect(VALID_COLLECTIONS).toContain(config.collection)
}
})
it('should have non-empty queries for all types', () => {
for (const [, config] of Object.entries(DOCUMENT_RAG_CONFIG)) {
expect(config.query).toBeTruthy()
expect(config.query.length).toBeGreaterThan(5)
}
})
it('should have DSFA mapped to bp_dsfa_corpus', () => {
expect(DOCUMENT_RAG_CONFIG.dsfa.collection).toBe('bp_dsfa_corpus')
})
it('should have unique queries for each document type', () => {
const queries = Object.values(DOCUMENT_RAG_CONFIG).map(c => c.query)
const uniqueQueries = new Set(queries)
expect(uniqueQueries.size).toBe(queries.length)
})
})

View File

@@ -2,18 +2,17 @@
* Tests for the shared queryRAG utility.
*/
// Mock fetch globally before importing
const mockFetch = jest.fn()
global.fetch = mockFetch
import { describe, it, expect, beforeEach, vi } from 'vitest'
// Reset modules to pick up our mock
jest.resetModules()
// Mock fetch globally
const mockFetch = vi.fn()
vi.stubGlobal('fetch', mockFetch)
describe('queryRAG', () => {
let queryRAG: (query: string, topK?: number) => Promise<string>
let queryRAG: (query: string, topK?: number, collection?: string) => Promise<string>
beforeEach(async () => {
jest.resetModules()
vi.resetModules()
mockFetch.mockReset()
// Dynamic import to pick up fresh env
const mod = await import('../rag-query')
@@ -39,6 +38,66 @@ describe('queryRAG', () => {
expect(mockFetch).toHaveBeenCalledTimes(1)
})
it('should send POST request to RAG_SERVICE_URL', async () => {
mockFetch.mockResolvedValueOnce({
ok: true,
json: async () => ({ results: [] }),
})
await queryRAG('test query')
expect(mockFetch).toHaveBeenCalledWith(
expect.stringContaining('/api/v1/search'),
expect.objectContaining({
method: 'POST',
headers: { 'Content-Type': 'application/json' },
})
)
})
it('should include collection in request body when provided', async () => {
mockFetch.mockResolvedValueOnce({
ok: true,
json: async () => ({ results: [] }),
})
await queryRAG('test query', 3, 'bp_dsfa_corpus')
const callArgs = mockFetch.mock.calls[0]
const body = JSON.parse(callArgs[1].body)
expect(body.collection).toBe('bp_dsfa_corpus')
expect(body.query).toBe('test query')
expect(body.top_k).toBe(3)
})
it('should omit collection from body when not provided', async () => {
mockFetch.mockResolvedValueOnce({
ok: true,
json: async () => ({ results: [] }),
})
await queryRAG('test query')
const callArgs = mockFetch.mock.calls[0]
const body = JSON.parse(callArgs[1].body)
expect(body.collection).toBeUndefined()
expect(body.query).toBe('test query')
expect(body.top_k).toBe(3)
})
it('should pass custom topK in request body', async () => {
mockFetch.mockResolvedValueOnce({
ok: true,
json: async () => ({ results: [] }),
})
await queryRAG('test query', 7)
const callArgs = mockFetch.mock.calls[0]
const body = JSON.parse(callArgs[1].body)
expect(body.top_k).toBe(7)
})
it('should return empty string on HTTP error', async () => {
mockFetch.mockResolvedValueOnce({
ok: false,
@@ -69,30 +128,6 @@ describe('queryRAG', () => {
expect(result).toBe('')
})
it('should pass topK parameter in URL', async () => {
mockFetch.mockResolvedValueOnce({
ok: true,
json: async () => ({ results: [] }),
})
await queryRAG('test query', 7)
const calledUrl = mockFetch.mock.calls[0][0] as string
expect(calledUrl).toContain('top_k=7')
})
it('should use default topK of 3', async () => {
mockFetch.mockResolvedValueOnce({
ok: true,
json: async () => ({ results: [] }),
})
await queryRAG('test query')
const calledUrl = mockFetch.mock.calls[0][0] as string
expect(calledUrl).toContain('top_k=3')
})
it('should handle results with missing fields gracefully', async () => {
mockFetch.mockResolvedValueOnce({
ok: true,

View File

@@ -0,0 +1,90 @@
/**
* RAG Configuration per Document Type
*
* Maps each ScopeDocumentType to its optimal RAG collection and search query.
* Used by the Drafting Engine to fetch type-specific legal context.
*/
import type { ScopeDocumentType } from '@/lib/sdk/compliance-scope-types'
export interface DocumentRAGConfig {
/** RAG collection name in bp-core-rag-service */
collection: string
/** Optimized search query for this document type */
query: string
}
export const DOCUMENT_RAG_CONFIG: Record<ScopeDocumentType, DocumentRAGConfig> = {
dsfa: {
collection: 'bp_dsfa_corpus',
query: 'Art. 35 DSGVO Risikobewertung Massnahmen',
},
tom: {
collection: 'bp_compliance_datenschutz',
query: 'Art. 32 DSGVO Sicherheit Verarbeitung',
},
vvt: {
collection: 'bp_compliance_gesetze',
query: 'Art. 30 DSGVO Dokumentationspflicht',
},
lf: {
collection: 'bp_compliance_recht',
query: 'Aufbewahrungsfristen Loeschkonzept',
},
dsi: {
collection: 'bp_compliance_datenschutz',
query: 'Art. 13 Art. 14 DSGVO Transparenz',
},
betroffenenrechte: {
collection: 'bp_compliance_recht',
query: 'Art. 15 bis 22 DSGVO Auskunft Loeschung',
},
datenpannen: {
collection: 'bp_compliance_recht',
query: 'Art. 33 Art. 34 DSGVO Meldepflicht',
},
daten_transfer: {
collection: 'bp_compliance_ce',
query: 'Kapitel V DSGVO Standardvertragsklauseln',
},
einwilligung: {
collection: 'bp_compliance_datenschutz',
query: 'Art. 6 Art. 7 Art. 9 DSGVO Widerruf',
},
vertragsmanagement: {
collection: 'bp_compliance_recht',
query: 'AVV Art. 28 DSGVO Vertragsanforderungen',
},
schulung: {
collection: 'bp_compliance_datenschutz',
query: 'Datenschutz Schulung Awareness',
},
audit_log: {
collection: 'bp_compliance_datenschutz',
query: 'Audit Logging Art. 5 Abs. 2 DSGVO',
},
risikoanalyse: {
collection: 'bp_compliance_ce',
query: 'Risikoanalyse Risikobewertung Framework',
},
notfallplan: {
collection: 'bp_compliance_recht',
query: 'Notfallplan Incident Response Krisenmanagement',
},
zertifizierung: {
collection: 'bp_compliance_ce',
query: 'ISO 27001 ISO 27701 Art. 42 DSGVO',
},
datenschutzmanagement: {
collection: 'bp_compliance_datenschutz',
query: 'DSMS PDCA Organisation',
},
iace_ce_assessment: {
collection: 'bp_compliance_ce',
query: 'AI Act KI-Verordnung CE-Konformitaet',
},
av_vertrag: {
collection: 'bp_compliance_recht',
query: 'AVV Art. 28 DSGVO Mindestinhalte',
},
}

View File

@@ -1,24 +1,35 @@
/**
* Shared RAG query utility for the Drafting Engine.
*
* Queries the DSFA RAG corpus via klausur-service for relevant legal context.
* Queries the bp-core-rag-service for relevant legal context.
* Supports multi-collection search via POST /api/v1/search.
* Used by both chat and draft routes.
*/
const KLAUSUR_SERVICE_URL = process.env.KLAUSUR_SERVICE_URL || 'http://klausur-service:8086'
const RAG_SERVICE_URL = process.env.RAG_SERVICE_URL || 'http://bp-core-rag-service:8097'
/**
* Query the RAG corpus for relevant legal documents.
*
* @param query - The search query (e.g. "DSFA Art. 35 DSGVO")
* @param query - The search query (e.g. "Art. 35 DSGVO Risikobewertung")
* @param topK - Number of results to return (default: 3)
* @param collection - Optional RAG collection name (e.g. "bp_dsfa_corpus")
* @returns Formatted string of legal context, or empty string on error
*/
export async function queryRAG(query: string, topK = 3): Promise<string> {
export async function queryRAG(query: string, topK = 3, collection?: string): Promise<string> {
try {
const url = `${KLAUSUR_SERVICE_URL}/api/v1/dsfa-rag/search?query=${encodeURIComponent(query)}&top_k=${topK}`
const res = await fetch(url, {
const body: Record<string, unknown> = {
query,
top_k: topK,
}
if (collection) {
body.collection = collection
}
const res = await fetch(`${RAG_SERVICE_URL}/api/v1/search`, {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify(body),
signal: AbortSignal.timeout(10000),
})