90a70c8404
CI / python-lint (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / iace-gt-coverage (push) Has been skipped
CI / test-python-backend (push) Has been skipped
CI / test-python-document-crawler (push) Has been skipped
CI / test-python-dsms-gateway (push) Has been skipped
CI / detect-changes (push) Successful in 7s
CI / branch-name (push) Has been skipped
CI / guardrail-integrity (push) Has been skipped
CI / secret-scan (push) Has been skipped
CI / dep-audit (push) Has been skipped
CI / sbom-scan (push) Has been skipped
CI / build-sha-integrity (push) Successful in 5s
CI / validate-canonical-controls (push) Successful in 4s
CI / loc-budget (push) Successful in 17s
CI / go-lint (push) Has been skipped
CI / nodejs-build (push) Successful in 3m2s
CI / test-go (push) Has been skipped
Die Drafting-Engine (Dokument-Entwurf, v2-Pipeline, Validierung, Drafting-Chat, Vendor-Vertragspruefung) war auf prod doppelt tot: - RAG ueber bp-core-rag-service:8097 (existiert auf prod nicht) - LLM ueber OLLAMA_URL/api/chat mit qwen2.5vl (prod = ollama-embed, kein Chat-Modell) Fix (analog zum Compliance-Advisor): - rag-query.ts -> ai-compliance-sdk /sdk/v1/rag/search (bge-m3, prod-erreichbar). - Neue lib/sdk/drafting-engine/llm-cascade.ts: OVH/LiteLLM (gpt-oss-120b) zuerst, Ollama als Dev-Fallback; cascadeComplete (JSON) + cascadeStream. Das Backend nutzt OVH+JSON bereits erfolgreich auf prod (extract-datasheet). - 5 Aufrufstellen (draft-helpers, draft-helpers-v2, validate, chat, vendor-review) auf die Kaskade umgestellt; keine direkten Ollama-Calls mehr. - Tests: llm-cascade + rag-query aktualisiert. Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
111 lines
4.2 KiB
TypeScript
111 lines
4.2 KiB
TypeScript
/**
|
|
* Drafting Engine Chat API
|
|
*
|
|
* Verbindet das DraftingEngineWidget mit dem LLM Backend.
|
|
* Unterstuetzt alle 4 Modi: explain, ask, draft, validate.
|
|
* Nutzt State-Projection fuer token-effiziente Kontextgabe.
|
|
*/
|
|
|
|
import { NextRequest, NextResponse } from 'next/server'
|
|
import { queryRAG } from '@/lib/sdk/drafting-engine/rag-query'
|
|
import { DOCUMENT_RAG_CONFIG } from '@/lib/sdk/drafting-engine/rag-config'
|
|
import { readSoulFile } from '@/lib/sdk/agents/soul-reader'
|
|
import type { ScopeDocumentType } from '@/lib/sdk/compliance-scope-types'
|
|
import { cascadeStream } from '@/lib/sdk/drafting-engine/llm-cascade'
|
|
|
|
// Fallback SOUL prompt (used when .soul.md file is unavailable)
|
|
const FALLBACK_DRAFTING_PROMPT = `# Drafting Agent - Compliance-Dokumententwurf
|
|
|
|
## Identitaet
|
|
Du bist der BreakPilot Drafting Agent. Du hilfst Nutzern des AI Compliance SDK,
|
|
DSGVO-konforme Compliance-Dokumente zu entwerfen und Konsistenz sicherzustellen.
|
|
|
|
## Strikte Constraints
|
|
- Gib praxisnahe Hinweise, KEINE konkrete Rechtsberatung
|
|
- Kommuniziere auf Deutsch, sachlich und verstaendlich
|
|
- Fuelle fehlende Informationen mit [PLATZHALTER: ...] Markierung`
|
|
|
|
export async function POST(request: NextRequest) {
|
|
try {
|
|
const body = await request.json()
|
|
const {
|
|
message,
|
|
history = [],
|
|
sdkStateProjection,
|
|
mode = 'explain',
|
|
documentType,
|
|
} = body
|
|
|
|
if (!message || typeof message !== 'string') {
|
|
return NextResponse.json({ error: 'Message is required' }, { status: 400 })
|
|
}
|
|
|
|
// 1. Query RAG for legal context (use type-specific collection + query boost if available)
|
|
const ragConfig = documentType ? DOCUMENT_RAG_CONFIG[documentType as ScopeDocumentType] : undefined
|
|
const ragQuery = ragConfig ? `${ragConfig.query} ${message}` : message
|
|
const ragContext = await queryRAG(ragQuery, 3, ragConfig?.collection)
|
|
|
|
// 2. Build system prompt with mode-specific instructions + state projection
|
|
const soulPrompt = await readSoulFile('drafting-agent')
|
|
let systemContent = soulPrompt || FALLBACK_DRAFTING_PROMPT
|
|
|
|
// Mode-specific instructions
|
|
const modeInstructions: Record<string, string> = {
|
|
explain: '\n\n## Aktueller Modus: EXPLAIN\nBeantworte Fragen verstaendlich mit Quellenangaben.',
|
|
ask: '\n\n## Aktueller Modus: ASK\nAnalysiere Luecken und stelle gezielte Fragen. Eine Frage pro Antwort.',
|
|
draft: `\n\n## Aktueller Modus: DRAFT\nEntwirf strukturierte Dokument-Sections. Dokumenttyp: ${documentType || 'nicht spezifiziert'}.\nAntworte mit JSON wenn ein Draft angefragt wird.`,
|
|
validate: '\n\n## Aktueller Modus: VALIDATE\nPruefe Cross-Dokument-Konsistenz. Gib Errors, Warnings und Suggestions zurueck.',
|
|
}
|
|
systemContent += modeInstructions[mode] || modeInstructions.explain
|
|
|
|
// Add state projection context
|
|
if (sdkStateProjection) {
|
|
systemContent += `\n\n## SDK-State Projektion (${mode}-Kontext)\n${JSON.stringify(sdkStateProjection, null, 0).slice(0, 3000)}`
|
|
}
|
|
|
|
// Add RAG context
|
|
if (ragContext) {
|
|
systemContent += `\n\n## Relevanter Rechtskontext\n${ragContext}`
|
|
}
|
|
|
|
// 3. Build messages array
|
|
const messages = [
|
|
{ role: 'system', content: systemContent },
|
|
...history.slice(-10).map((h: { role: string; content: string }) => ({
|
|
role: h.role === 'user' ? 'user' : 'assistant',
|
|
content: h.content,
|
|
})),
|
|
{ role: 'user', content: message },
|
|
]
|
|
|
|
// 4. Call LLM with streaming
|
|
// 4. LLM-Kaskade (OVH -> Ollama) -> Plain-Text-Stream
|
|
const stream = await cascadeStream(messages, {
|
|
temperature: mode === 'draft' ? 0.2 : 0.3,
|
|
maxTokens: mode === 'draft' ? 16384 : 8192,
|
|
timeoutMs: 120000,
|
|
})
|
|
|
|
if (!stream) {
|
|
return NextResponse.json(
|
|
{ error: 'LLM nicht erreichbar (weder OVH noch Ollama)' },
|
|
{ status: 502 }
|
|
)
|
|
}
|
|
|
|
return new NextResponse(stream, {
|
|
headers: {
|
|
'Content-Type': 'text/plain; charset=utf-8',
|
|
'Cache-Control': 'no-cache',
|
|
'Connection': 'keep-alive',
|
|
},
|
|
})
|
|
} catch (error) {
|
|
console.error('Drafting engine chat error:', error)
|
|
return NextResponse.json(
|
|
{ error: 'Verbindung zum LLM fehlgeschlagen.' },
|
|
{ status: 503 }
|
|
)
|
|
}
|
|
}
|