breakpilot-compliance/admin-compliance/app/api/sdk/drafting-engine/chat/route.ts

/**
 * Drafting Engine Chat API
 *
 * Verbindet das DraftingEngineWidget mit dem LLM Backend.
 * Unterstuetzt alle 4 Modi: explain, ask, draft, validate.
 * Nutzt State-Projection fuer token-effiziente Kontextgabe.
 */

import { NextRequest, NextResponse } from 'next/server'
import { queryRAG } from '@/lib/sdk/drafting-engine/rag-query'
import { DOCUMENT_RAG_CONFIG } from '@/lib/sdk/drafting-engine/rag-config'
import { readSoulFile } from '@/lib/sdk/agents/soul-reader'
import type { ScopeDocumentType } from '@/lib/sdk/compliance-scope-types'
import { cascadeStream } from '@/lib/sdk/drafting-engine/llm-cascade'

// Fallback SOUL prompt (used when .soul.md file is unavailable)
const FALLBACK_DRAFTING_PROMPT = `# Drafting Agent - Compliance-Dokumententwurf

## Identitaet
Du bist der BreakPilot Drafting Agent. Du hilfst Nutzern des AI Compliance SDK,
DSGVO-konforme Compliance-Dokumente zu entwerfen und Konsistenz sicherzustellen.

## Strikte Constraints
- Gib praxisnahe Hinweise, KEINE konkrete Rechtsberatung
- Kommuniziere auf Deutsch, sachlich und verstaendlich
- Fuelle fehlende Informationen mit [PLATZHALTER: ...] Markierung`

export async function POST(request: NextRequest) {
  try {
    const body = await request.json()
    const {
      message,
      history = [],
      sdkStateProjection,
      mode = 'explain',
      documentType,
    } = body

    if (!message || typeof message !== 'string') {
      return NextResponse.json({ error: 'Message is required' }, { status: 400 })
    }

    // 1. Query RAG for legal context (use type-specific collection + query boost if available)
    const ragConfig = documentType ? DOCUMENT_RAG_CONFIG[documentType as ScopeDocumentType] : undefined
    const ragQuery = ragConfig ? `${ragConfig.query} ${message}` : message
    const ragContext = await queryRAG(ragQuery, 3, ragConfig?.collection)

    // 2. Build system prompt with mode-specific instructions + state projection
    const soulPrompt = await readSoulFile('drafting-agent')
    let systemContent = soulPrompt || FALLBACK_DRAFTING_PROMPT

    // Mode-specific instructions
    const modeInstructions: Record<string, string> = {
      explain: '\n\n## Aktueller Modus: EXPLAIN\nBeantworte Fragen verstaendlich mit Quellenangaben.',
      ask: '\n\n## Aktueller Modus: ASK\nAnalysiere Luecken und stelle gezielte Fragen. Eine Frage pro Antwort.',
      draft: `\n\n## Aktueller Modus: DRAFT\nEntwirf strukturierte Dokument-Sections. Dokumenttyp: ${documentType || 'nicht spezifiziert'}.\nAntworte mit JSON wenn ein Draft angefragt wird.`,
      validate: '\n\n## Aktueller Modus: VALIDATE\nPruefe Cross-Dokument-Konsistenz. Gib Errors, Warnings und Suggestions zurueck.',
    }
    systemContent += modeInstructions[mode] || modeInstructions.explain

    // Add state projection context
    if (sdkStateProjection) {
      systemContent += `\n\n## SDK-State Projektion (${mode}-Kontext)\n${JSON.stringify(sdkStateProjection, null, 0).slice(0, 3000)}`
    }

    // Add RAG context
    if (ragContext) {
      systemContent += `\n\n## Relevanter Rechtskontext\n${ragContext}`
    }

    // 3. Build messages array
    const messages = [
      { role: 'system', content: systemContent },
      ...history.slice(-10).map((h: { role: string; content: string }) => ({
        role: h.role === 'user' ? 'user' : 'assistant',
        content: h.content,
      })),
      { role: 'user', content: message },
    ]

    // 4. Call LLM with streaming
    // 4. LLM-Kaskade (OVH -> Ollama) -> Plain-Text-Stream
    const stream = await cascadeStream(messages, {
      temperature: mode === 'draft' ? 0.2 : 0.3,
      maxTokens: mode === 'draft' ? 16384 : 8192,
      timeoutMs: 120000,
    })

    if (!stream) {
      return NextResponse.json(
        { error: 'LLM nicht erreichbar (weder OVH noch Ollama)' },
        { status: 502 }
      )
    }

    return new NextResponse(stream, {
      headers: {
        'Content-Type': 'text/plain; charset=utf-8',
        'Cache-Control': 'no-cache',
        'Connection': 'keep-alive',
      },
    })
  } catch (error) {
    console.error('Drafting engine chat error:', error)
    return NextResponse.json(
      { error: 'Verbindung zum LLM fehlgeschlagen.' },
      { status: 503 }
    )
  }
}