diff --git a/admin-compliance/app/api/sdk/compliance-advisor/chat/route.ts b/admin-compliance/app/api/sdk/compliance-advisor/chat/route.ts index dd1afe7d..516b9f43 100644 --- a/admin-compliance/app/api/sdk/compliance-advisor/chat/route.ts +++ b/admin-compliance/app/api/sdk/compliance-advisor/chat/route.ts @@ -1,52 +1,28 @@ /** - * Compliance Advisor Chat API — Evidence Workspace envelope. + * Compliance Advisor Chat API — Clarity-Gate orchestration. * - * Verbindet das ComplianceAdvisorWidget mit: - * 1. Strukturierter RAG-Evidence ueber die ai-compliance-sdk — siehe advisor-rag - * 2. Strukturierten Controls zum erkannten Thema — buildControlsContext - * 3. LLM-Kaskade OVH (prod) -> Ollama (Dev) — siehe advisor-llm - * - * Antwort-Format (evidence-meta-v1): ERSTE Zeile = JSON `AdvisorEvidenceMeta` - * (Quellen/Abbildungen/Fussnoten/Stats), danach streamt die Antwort als Markdown-Text. - * Das Frontend rendert NUR diese strukturierten Daten und parst NIE den Antworttext. + * Consumes the SDK/RAG /retrieve (evidence/visual_evidence/footnotes/clarity) and returns the + * FE-facing contract (advisor-clarity-gate-contract): + * - clarify mode -> short L1 general answer (no RAG) + domain context chips + * - answer mode -> L2 answer over the scoped evidence with [n] citation markers + * Citations are generated here ([n] -> nth evidence unit). The FE renders ONLY this structured data. */ import { NextRequest, NextResponse } from 'next/server' import { readSoulFile } from '@/lib/sdk/agents/soul-reader' -import { buildControlsContext } from '@/lib/sdk/agents/controls-augmentation' -import { retrieveAdvisorEvidence } from '@/lib/sdk/agents/advisor-rag' -import { adaptEvidence, type RawFigure, type RawFootnote } from '@/lib/sdk/advisor/evidence-adapter' -import { streamAdvisorAnswer, type ChatMessage } from '@/lib/sdk/agents/advisor-llm' +import { retrieveFull } from '@/lib/sdk/agents/advisor-rag' +import { completeAdvisorAnswer, type ChatMessage } from '@/lib/sdk/agents/advisor-llm' +import { + buildCitations, + mapClarity, + mapFootnotes, + numberedEvidenceForPrompt, + resolveMode, +} from '@/lib/sdk/advisor/retrieve-mapping' +import type { AdvisorResponse } from '@/lib/sdk/advisor/contract' type Country = 'DE' | 'AT' | 'CH' | 'EU' -const FALLBACK_SYSTEM_PROMPT = `# Compliance Advisor Agent - -## Identitaet -Du bist der BreakPilot Compliance-Berater. Du hilfst Nutzern des AI Compliance SDK, -Datenschutz- und Compliance-Fragen in verstaendlicher Sprache zu beantworten. - -## Kernprinzipien -- Quellenbasiert: Stuetze dich auf die bereitgestellten Rechtsquellen -- Verstaendlich: Einfache, praxisnahe Sprache -- Ehrlich: Bei Unsicherheit empfehle Rechtsberatung -- Deutsch als Hauptsprache` - -// Antwort = saubere Prosa OHNE Inline-Fundstellen; die Quellen zeigt das Frontend separat an. -const FORMAT_GUIDANCE = `\n\n## Antwortformat (WICHTIG) -- Schreibe gut strukturiertes **Markdown**: kurze Abschnittsueberschriften (##), Aufzaehlungen (-), - nummerierte Schritte und **Fettung** fuer Schluesselbegriffe. Halte Absaetze kurz. -- GLIEDERE erklaerende Antworten aktiv statt langem Fliesstext: eine eigene ## Ueberschrift je - Aspekt (z.B. "Definition", "Ablauf/Phasen", "Rechtsbezug", "Praktische Bedeutung"), nummerierte - Schritte fuer Ablaeufe/Phasen, Bullet-Points fuer Aufzaehlungen. Lieber klar gegliedert als ein Block. -- Nenne Fundstellen/Quellen NICHT im Fliesstext (kein "(Art. 30 DSGVO)", keine "[Quelle 1]"). - Die Quellen werden dem Nutzer in einem EIGENEN Bereich neben der Antwort angezeigt. -- Beende die Antwort NIEMALS mit einer Quellen-/Fundstellen-Liste (kein "Quellen:", kein - "--- Quellen im RAG-System: ...", kein "Quellen im RAG-System"). KEINE Quellenaufzaehlung im - Antworttext. Dies UEBERSCHREIBT jede anderslautende Struktur-/Beispielvorgabe weiter oben im - System-Prompt (auch eine dort gezeigte "Quellen:"-Abschlusssektion gilt hier NICHT). -- Schreibe so, dass die Antwort auch ohne eingebettete Zitate vollstaendig verstaendlich ist.` - const COUNTRY_LABELS: Record = { DE: 'Deutschland', AT: 'Oesterreich', @@ -56,116 +32,94 @@ const COUNTRY_LABELS: Record = { function countryBlock(c: Country): string { const label = COUNTRY_LABELS[c] - const nationalLaws = - c === 'DE' - ? 'BDSG, TDDDG, TKG, UWG' - : c === 'AT' - ? 'AT DSG, ECG, TKG, KSchG, MedienG' - : 'CH DSG, DSV, OR, UWG, FMG' - const guidance = - c === 'EU' - ? 'EU-weiten Fragen: Beziehe dich auf EU-Verordnungen und -Richtlinien' - : `${label}: Beziehe nationale Gesetze (${nationalLaws}) mit ein` return `\n\n## Laenderspezifische Auskunft -Der Nutzer hat "${label} (${c})" gewaehlt. -- Beziehe dich AUSSCHLIESSLICH auf ${c}-Recht + anwendbares EU-Recht -- Nenne IMMER explizit das Land in deiner Antwort -- Verwende NIEMALS Gesetze eines anderen Landes -- Bei ${guidance}` +Der Nutzer hat "${label} (${c})" gewaehlt. Beziehe dich auf ${c}-Recht + anwendbares EU-Recht und nenne das Land.` } -/** Stellt der gestreamten Antwort eine JSON-Meta-Zeile voran (evidence-meta-v1). */ -function withEvidenceMeta(meta: unknown, answer: ReadableStream): ReadableStream { - const encoder = new TextEncoder() - const metaLine = JSON.stringify(meta) + '\n' - return new ReadableStream({ - async start(controller) { - controller.enqueue(encoder.encode(metaLine)) - const reader = answer.getReader() - try { - for (;;) { - const { done, value } = await reader.read() - if (done) break - if (value) controller.enqueue(value) - } - } catch (e) { - controller.error(e) - return - } - controller.close() - }, - }) +// L1: general knowledge, deliberately NOT grounded (the clarify step precedes the legal retrieval). +const L1_SYSTEM = `Du bist der BreakPilot Compliance-Berater. Gib eine KURZE, allgemeine Definition/Erklaerung +des gefragten Begriffs aus Allgemeinwissen — 2 bis 4 Saetze, Markdown, neutral. NENNE KEINE Rechtsquellen, +Paragraphen, Artikel oder Fundstellen; der Nutzer waehlt anschliessend einen konkreten Kontext, erst dann +folgen belegte Quellen. Wenn der Begriff in mehreren Bereichen vorkommt, erwaehne das in einem Halbsatz.` + +const FALLBACK_SYSTEM = `Du bist der BreakPilot Compliance-Berater. Antworte quellenbasiert, verstaendlich und ehrlich auf Deutsch.` + +function answerSystem(soul: string | null, country: Country | undefined, evidenceBlock: string): string { + let s = soul || FALLBACK_SYSTEM + if (country) s += countryBlock(country) + s += `\n\n## Belegte Evidence (nummeriert — DEINE EINZIGEN Quellen)\n${evidenceBlock || '(keine Evidence gefunden)'}` + s += `\n\n## Antwortformat (WICHTIG) +- Gut gegliedertes Markdown: kurze ## Ueberschriften je Aspekt, Aufzaehlungen, **Fettung** fuer Kernbegriffe. +- Belege Kernaussagen mit [n], wobei n die NUMMER der Evidence-Quelle oben ist (z. B. [1], [2]). +- Nenne KEINE Quellen-/Fundstellen-Liste im Fliesstext — die Quellen werden dem Nutzer separat angezeigt. +- Triff KEINE Aussage, die nicht durch die nummerierte Evidence belegt ist; fehlt der Beleg, sage das offen.` + return s } export async function POST(request: NextRequest) { try { const body = await request.json() - const { message, history = [], currentStep = 'default', country } = body - - if (!message || typeof message !== 'string') { - return NextResponse.json({ error: 'Message is required' }, { status: 400 }) - } - - const validCountry = (['DE', 'AT', 'CH', 'EU'] as const).includes(country) - ? (country as Country) + const question = String(body.question ?? body.message ?? '').trim() + const context: string | null = body.context ?? null + const country = (['DE', 'AT', 'CH', 'EU'] as const).includes(body.country) + ? (body.country as Country) : undefined - // 1. Strukturierte RAG-Evidence + Controls zum Thema — parallel - const [evidence, controlsContext] = await Promise.all([ - retrieveAdvisorEvidence(message), - buildControlsContext(message), - ]) - - // 2. Evidence-Meta fuer das Frontend (strukturiert, nicht geparst) - const meta = adaptEvidence({ - results: evidence.results, - figures: evidence.figures as RawFigure[] | undefined, - footnotes: evidence.footnotes as RawFootnote[] | undefined, - }) - - // 3. System-Prompt - const soulPrompt = await readSoulFile('compliance-advisor') - let systemContent = soulPrompt || FALLBACK_SYSTEM_PROMPT - if (validCountry) systemContent += countryBlock(validCountry) - if (evidence.contextText) { - systemContent += `\n\n## Relevanter Kontext aus dem RAG-System (deine EINZIGEN Rechtsquellen)\n\nDies sind deine einzigen zulaessigen Rechtsquellen. Triff keine konkrete Rechtsaussage (Zahl, Frist, Schwelle, Pflicht, Fundstelle), die nicht hier oder im Controls-Block belegt ist — sonst sage offen, dass du sie aus deinen Quellen nicht belegen kannst.\n\n${evidence.contextText}` + if (!question) { + return NextResponse.json({ error: 'Question is required' }, { status: 400 }) } - if (controlsContext) systemContent += `\n\n${controlsContext}` - systemContent += `\n\n## Aktueller SDK-Schritt\nDer Nutzer befindet sich im SDK-Schritt: ${currentStep}` - systemContent += FORMAT_GUIDANCE // LAST instruction: overrides the soul's trailing "Quellen" structure/example - // 4. Nachrichten (History auf die letzten 6 begrenzen) + const retrieved = await retrieveFull(question, context) + const mode = resolveMode(retrieved.clarity?.mode, !!context) + + if (mode === 'clarify') { + const general = await completeAdvisorAnswer([ + { role: 'system', content: L1_SYSTEM }, + { role: 'user', content: question }, + ]) + if (general === null) { + return NextResponse.json({ error: 'LLM nicht erreichbar.' }, { status: 502 }) + } + const resp: AdvisorResponse = { + mode: 'clarify', + question, + clarity: mapClarity(retrieved.clarity, 'clarify'), + general_answer: general, + answer: null, + scoped_query: null, + evidence: [], + citations: [], + visual_evidence: [], + footnotes: [], + } + return NextResponse.json(resp) + } + + const evidence = retrieved.evidence ?? [] + const soul = await readSoulFile('compliance-advisor') const messages: ChatMessage[] = [ - { role: 'system', content: systemContent }, - ...history.slice(-6).map((h: { role: string; content: string }) => ({ - role: h.role === 'user' ? 'user' : 'assistant', - content: h.content, - })), - { role: 'user', content: message }, + { role: 'system', content: answerSystem(soul, country, numberedEvidenceForPrompt(evidence)) }, + { role: 'user', content: question }, ] - - // 5. LLM-Kaskade -> Meta-Zeile + Text-Stream - const stream = await streamAdvisorAnswer(messages) - if (!stream) { - return NextResponse.json( - { error: 'LLM nicht erreichbar. Weder OVH/LiteLLM noch Ollama haben geantwortet.' }, - { status: 502 }, - ) + const answer = await completeAdvisorAnswer(messages) + if (answer === null) { + return NextResponse.json({ error: 'LLM nicht erreichbar.' }, { status: 502 }) } - - return new NextResponse(withEvidenceMeta(meta, stream), { - headers: { - 'Content-Type': 'text/plain; charset=utf-8', - 'Cache-Control': 'no-cache', - Connection: 'keep-alive', - 'X-Advisor-Format': 'evidence-meta-v1', - }, - }) + const resp: AdvisorResponse = { + mode: 'answer', + question, + clarity: mapClarity(retrieved.clarity, 'answer'), + general_answer: null, + answer, + scoped_query: context, + evidence, + citations: buildCitations(evidence), + visual_evidence: retrieved.visual_evidence ?? [], + footnotes: mapFootnotes(retrieved.footnotes), + } + return NextResponse.json(resp) } catch (error) { console.error('Compliance advisor chat error:', error) - return NextResponse.json( - { error: 'Verbindung zum LLM fehlgeschlagen.' }, - { status: 503 }, - ) + return NextResponse.json({ error: 'Verbindung zum Advisor fehlgeschlagen.' }, { status: 503 }) } } diff --git a/admin-compliance/lib/sdk/__tests__/advisor-evidence-adapter.test.ts b/admin-compliance/lib/sdk/__tests__/advisor-evidence-adapter.test.ts deleted file mode 100644 index 5a903083..00000000 --- a/admin-compliance/lib/sdk/__tests__/advisor-evidence-adapter.test.ts +++ /dev/null @@ -1,105 +0,0 @@ -import { describe, it, expect } from 'vitest' -import { adaptEvidence } from '../advisor/evidence-adapter' -import type { SdkRagResult } from '../agents/advisor-rag' - -describe('adaptEvidence', () => { - it('maps a structured RAG result to a hierarchical Knowledge Unit', () => { - const results: SdkRagResult[] = [ - { - text: 'Der Verantwortliche fuehrt ein Verzeichnis ...', - regulation_code: 'DSGVO', - regulation_short: 'DSGVO', - article_label: 'Art. 30 DSGVO', - article: 'Art. 30', - paragraph: 'Abs. 1', - source_url: 'https://example.test/dsgvo-30', - score: 0.9, - }, - ] - const { sources, stats } = adaptEvidence({ results }) - expect(sources).toHaveLength(1) - expect(sources[0].label).toBe('Art. 30 DSGVO') - expect(sources[0].section).toBe('Art. 30') - expect(sources[0].paragraph).toBe('Abs. 1') - expect(sources[0].open?.originalUrl).toBe('https://example.test/dsgvo-30') - expect(sources[0].snippet).toContain('Verzeichnis') - expect(stats.sources).toBe(1) - expect(stats.regulations).toBe(1) - }) - - it('dedupes the same citation and keeps the highest score', () => { - const base: SdkRagResult = { - text: 'x', - regulation_code: 'CRA', - regulation_short: 'CRA', - article_label: 'Annex I', - article: 'Annex I', - } - const { sources } = adaptEvidence({ - results: [ - { ...base, score: 0.4 }, - { ...base, score: 0.8 }, - ], - }) - expect(sources).toHaveLength(1) - expect(sources[0].score).toBe(0.8) - }) - - it('counts distinct regulations in stats', () => { - const { stats } = adaptEvidence({ - results: [ - { text: 'a', regulation_code: 'DSGVO', article_label: 'Art. 5' }, - { text: 'b', regulation_code: 'DSGVO', article_label: 'Art. 6' }, - { text: 'c', regulation_code: 'BDSG', article_label: '§ 38' }, - ], - }) - expect(stats.sources).toBe(3) - expect(stats.regulations).toBe(2) - }) - - it('labels recitals as Erwaegungsgrund', () => { - const { sources } = adaptEvidence({ - results: [{ text: 'r', regulation_code: 'DSGVO', is_recital: true, article: '47' }], - }) - expect(sources[0].section).toBe('Erwägungsgrund 47') - }) - - it('maps figures (C8) to figure units and counts them', () => { - const { figures, stats } = adaptEvidence({ - results: [], - figures: [ - { - figure_id: 'fig-pdca', - label: 'Abbildung 3', - caption: 'PDCA-Zyklus', - regulation_short: 'EDPB WP248', - vision_summary: 'Kreislauf Plan-Do-Check-Act', - image_url: 'https://example.test/abb3.png', - }, - ], - }) - expect(figures).toHaveLength(1) - expect(figures[0].label).toBe('Abbildung 3') - expect(figures[0].caption).toBe('PDCA-Zyklus') - expect(figures[0].imageUrl).toBe('https://example.test/abb3.png') - expect(stats.figures).toBe(1) - }) - - it('maps footnotes (C-FN) and counts them', () => { - const { footnotes, stats } = adaptEvidence({ - results: [], - footnotes: [{ number: 17, regulation_short: 'EDPB WP248', section: 'Kapitel III.B', text: 'siehe ...' }], - }) - expect(footnotes).toHaveLength(1) - expect(footnotes[0].ref).toBe('Fußnote 17') - expect(stats.footnotes).toBe(1) - }) - - it('returns empty evidence for empty input', () => { - const meta = adaptEvidence({}) - expect(meta.sources).toEqual([]) - expect(meta.figures).toEqual([]) - expect(meta.footnotes).toEqual([]) - expect(meta.stats).toEqual({ sources: 0, regulations: 0, figures: 0, footnotes: 0 }) - }) -}) diff --git a/admin-compliance/lib/sdk/__tests__/advisor-retrieve-mapping.test.ts b/admin-compliance/lib/sdk/__tests__/advisor-retrieve-mapping.test.ts new file mode 100644 index 00000000..71e60932 --- /dev/null +++ b/admin-compliance/lib/sdk/__tests__/advisor-retrieve-mapping.test.ts @@ -0,0 +1,70 @@ +import { describe, it, expect } from 'vitest' +import { + resolveMode, + mapClarity, + mapFootnotes, + buildCitations, + numberedEvidenceForPrompt, +} from '../advisor/retrieve-mapping' +import type { EvidenceUnit } from '../advisor/contract' + +describe('resolveMode', () => { + it('a chosen context always forces answer', () => expect(resolveMode('clarify', true)).toBe('answer')) + it('clarify + no context -> clarify', () => expect(resolveMode('clarify', false)).toBe('clarify')) + it('answer -> answer', () => expect(resolveMode('answer', false)).toBe('answer')) + it('unknown/undefined -> answer', () => expect(resolveMode(undefined, false)).toBe('answer')) +}) + +describe('mapClarity', () => { + it('clarify maps candidate_contexts -> suggested_contexts', () => { + const c = mapClarity( + { mode: 'clarify', concentration: 0.3, candidate_contexts: [{ id: 'ds', label: 'Datenschutz', hits: 5 }] }, + 'clarify', + ) + expect(c.is_underspecified).toBe(true) + expect(c.suggested_contexts).toEqual([{ id: 'ds', label: 'Datenschutz' }]) + }) + it('answer keeps dominant_context, drops suggestions', () => { + const c = mapClarity({ mode: 'answer', concentration: 0.88, dominant_context: 'ds' }, 'answer') + expect(c.is_underspecified).toBe(false) + expect(c.dominant_context).toBe('ds') + expect(c.suggested_contexts).toBeUndefined() + }) +}) + +const ev: EvidenceUnit[] = [ + { evidence_id: 'e1', document: 'DSGVO', section: 'Art. 30', paragraph: 'Abs. 1', snippet: 'x' }, + { evidence_id: 'e2', document: 'BDSG', section: '§ 38' }, +] + +describe('buildCitations', () => { + it('numbers citations 1..n mapped to evidence', () => { + const cs = buildCitations(ev) + expect(cs).toHaveLength(2) + expect(cs[0]).toMatchObject({ citation_id: 'c1', number: 1, evidence_id: 'e1' }) + expect(cs[1].number).toBe(2) + }) +}) + +describe('numberedEvidenceForPrompt', () => { + it('prefixes each unit with [n] + its location', () => { + const s = numberedEvidenceForPrompt(ev) + expect(s).toContain('[1] DSGVO Art. 30 Abs. 1') + expect(s).toContain('[2] BDSG § 38') + }) +}) + +describe('mapFootnotes', () => { + it('remaps a /retrieve footnote to the contract footnote', () => { + const fns = mapFootnotes([ + { id: 'f1', number: 17, regulation_short: 'EDPB WP248', section: 'Kap III', text: 't' }, + ]) + expect(fns[0]).toMatchObject({ + footnote_id: 'f1', + ref: 'Fußnote 17', + document: 'EDPB WP248', + section: 'Kap III', + text: 't', + }) + }) +}) diff --git a/admin-compliance/lib/sdk/advisor/contract.ts b/admin-compliance/lib/sdk/advisor/contract.ts index 77f41c60..47e4f70a 100644 --- a/admin-compliance/lib/sdk/advisor/contract.ts +++ b/admin-compliance/lib/sdk/advisor/contract.ts @@ -23,11 +23,14 @@ export interface EvidenceUnit { paragraph?: string snippet?: string url?: string + regulation_code?: string // preferred key for family grouping (from /retrieve) + context?: string // knowledge space / domain } /** Numbered [n] <-> evidence coupling, produced by the SDK (not parsed from the answer). */ export interface Citation { citation_id: string + number?: number // 1-based marker number ([n]) evidence_id: string document: string section?: string | null diff --git a/admin-compliance/lib/sdk/advisor/evidence-adapter.ts b/admin-compliance/lib/sdk/advisor/evidence-adapter.ts deleted file mode 100644 index 31880696..00000000 --- a/admin-compliance/lib/sdk/advisor/evidence-adapter.ts +++ /dev/null @@ -1,145 +0,0 @@ -// Adapter: RAG/compiler output -> the structured AdvisorEvidenceMeta the Evidence Workspace renders. -// This is the ONLY place that maps backend shapes to the frontend envelope. The frontend never -// parses the answer text — all structure originates here from structured fields. - -import type { - AdvisorEvidenceMeta, - FigureUnit, - FootnoteUnit, - KnowledgeUnit, - RegulationRef, -} from './evidence' -import { deriveStats } from './evidence' -import type { SdkRagResult } from '../agents/advisor-rag' - -/** Provisional raw figure (C8) shape — reconcile with the RAG-ingestion contract (board). */ -export interface RawFigure { - figure_id?: string - id?: string - label?: string // "Abbildung 3" - caption?: string - topic?: string - regulation_code?: string - regulation_short?: string - regulation_name?: string - section?: string - vision_summary?: string - description?: string - image_url?: string - url?: string -} - -/** Provisional raw footnote (C-FN) shape — reconcile with the RAG-ingestion contract (board). */ -export interface RawFootnote { - id?: string - ref?: string - number?: string | number - regulation_code?: string - regulation_short?: string - regulation_name?: string - section?: string - text?: string -} - -export interface RawEvidenceInput { - results?: SdkRagResult[] - figures?: RawFigure[] - footnotes?: RawFootnote[] -} - -function regulationRef( - code?: string, - name?: string, - short?: string, -): RegulationRef { - return { - code: (code || short || name || 'unknown').toLowerCase().replace(/\s+/g, '_'), - name: name || undefined, - short: short || name || code || 'Quelle', - } -} - -function truncate(text: string, max = 240): string { - const t = text.trim().replace(/\s+/g, ' ') - return t.length > max ? `${t.slice(0, max - 1)}…` : t -} - -function toKnowledgeUnit(r: SdkRagResult, idx: number): KnowledgeUnit | null { - const regulation = regulationRef(r.regulation_code, r.regulation_name, r.regulation_short) - const section = r.is_recital - ? `Erwägungsgrund ${r.article ?? ''}`.trim() - : r.article || undefined - const label = r.article_label?.trim() || undefined - // Drop empty placeholders: a unit needs at least a label or a section to be meaningful. - if (!label && !section && !regulation.name && regulation.short === 'Quelle') return null - return { - id: `src-${idx}`, - regulation, - section, - paragraph: r.paragraph || undefined, - subsection: r.sub || undefined, - label, - score: typeof r.score === 'number' ? r.score : undefined, - snippet: r.text ? truncate(r.text) : undefined, - open: r.source_url ? { originalUrl: r.source_url } : undefined, - } -} - -function dedupeKey(u: KnowledgeUnit): string { - return [u.regulation.code, u.section, u.paragraph, u.subsection, u.label] - .map((x) => x || '') - .join('|') -} - -function toFigureUnit(f: RawFigure, idx: number): FigureUnit | null { - const id = f.figure_id || f.id - const imageUrl = f.image_url || f.url - if (!id && !imageUrl && !f.label) return null - return { - id: id || `fig-${idx}`, - label: f.label || `Abbildung ${idx + 1}`, - caption: f.caption || undefined, - topic: f.topic || undefined, - source: regulationRef(f.regulation_code, f.regulation_name, f.regulation_short), - section: f.section || undefined, - visionSummary: f.vision_summary || f.description || undefined, - imageUrl: imageUrl || undefined, - } -} - -function toFootnoteUnit(f: RawFootnote, idx: number): FootnoteUnit | null { - const ref = f.ref || (f.number != null ? `Fußnote ${f.number}` : undefined) - if (!ref && !f.text) return null - return { - id: f.id || `fn-${idx}`, - ref: ref || `Fußnote ${idx + 1}`, - source: regulationRef(f.regulation_code, f.regulation_name, f.regulation_short), - section: f.section || undefined, - text: f.text || undefined, - } -} - -/** - * Build the structured evidence meta. Sources are deduped (same citation retrieved multiple - * times collapses to one, keeping the highest score) and order is preserved by score. - */ -export function adaptEvidence(input: RawEvidenceInput): AdvisorEvidenceMeta { - const seen = new Map() - ;(input.results || []).forEach((r, i) => { - const unit = toKnowledgeUnit(r, i) - if (!unit) return - const key = dedupeKey(unit) - const existing = seen.get(key) - if (!existing || (unit.score ?? 0) > (existing.score ?? 0)) seen.set(key, unit) - }) - const sources = [...seen.values()].sort((a, b) => (b.score ?? 0) - (a.score ?? 0)) - const figures = (input.figures || []) - .map(toFigureUnit) - .filter((x): x is FigureUnit => x !== null) - const footnotes = (input.footnotes || []) - .map(toFootnoteUnit) - .filter((x): x is FootnoteUnit => x !== null) - - const meta = { sources, figures, footnotes } - return { ...meta, stats: deriveStats(meta) } -} diff --git a/admin-compliance/lib/sdk/advisor/evidence.ts b/admin-compliance/lib/sdk/advisor/evidence.ts deleted file mode 100644 index 38923084..00000000 --- a/admin-compliance/lib/sdk/advisor/evidence.ts +++ /dev/null @@ -1,103 +0,0 @@ -// Structured evidence contract for the Compliance Advisor "Evidence Workspace". -// -// HARD RULE (architecture): the frontend renders ONLY these structured fields and -// NEVER parses the answer text. All structure (sources, figures, footnotes) is owned -// by the SDK/compiler (C-stages) and surfaced as data. The proxy is the adapter that -// fills this envelope from RAG/compiler output. See memory: advisor-evidence-workspace-no-parse. - -/** A regulation / document reference (CRA, EDPB WP248, MaschinenVO, ...). */ -export interface RegulationRef { - code: string // canonical id, e.g. "cra", "edpb_wp248", "maschinenvo" - name?: string // full name - short?: string // short label shown in the card header -} - -/** Openable targets for an evidence item — present only when the SDK can resolve them. */ -export interface OpenTargets { - originalUrl?: string // original text / source_url - chunkId?: string // retrieved chunk - footnoteId?: string // C-FN - figureId?: string // C8 -} - -/** - * A retrieved source as a hierarchical Knowledge Unit, mirroring the compiler: - * Regelwerk -> Section (C1/C2) -> Paragraph -> Footnote (C-FN). - * Rendered as a card, not a text-list line. E.g. "EDPB WP248 / Kapitel III.B / Fußnote 17". - */ -export interface KnowledgeUnit { - id: string - regulation: RegulationRef - section?: string // "Annex I" / "Kapitel III.B" / "Anhang III" - subsection?: string // "Abschnitt 2.3" - paragraph?: string // Absatz / paragraph - footnoteRef?: string // "Fußnote 17" when this unit IS a footnote-backed source - label?: string // pre-formatted citation fallback, e.g. "BDSG § 38 Abs. 1" - score?: number // retrieval score (optional) - snippet?: string // short passage preview (optional) — lets the user peek the cited text - open?: OpenTargets -} - -/** A figure (C8) as a Knowledge Unit — never a bare image. Only present when figures exist. */ -export interface FigureUnit { - id: string // figure_id - label: string // "Abbildung 3" - caption?: string // "PDCA-Zyklus" - topic?: string - source: RegulationRef // "EDPB ..." - section?: string - visionSummary?: string // vision/LLM description of the figure - imageUrl?: string // Playwright PNG; undefined until the RAG-ingestion contract delivers it -} - -/** A footnote (C-FN) as a first-class evidence item. */ -export interface FootnoteUnit { - id: string - ref: string // "Fußnote 17" - source: RegulationRef - section?: string - text?: string -} - -/** Counts for the stats bar above the answer ("Diese Antwort basiert auf N Quellen"). */ -export interface AdvisorStats { - sources: number - regulations: number // distinct Regelwerke - figures: number - footnotes: number -} - -/** - * Meta sent by the proxy FIRST (one JSON line), then the answer streams as tokens. - * RAG runs before the LLM, so all evidence is known up front and the panes render - * immediately while the answer streams in. - */ -export interface AdvisorEvidenceMeta { - stats: AdvisorStats - sources: KnowledgeUnit[] - figures: FigureUnit[] - footnotes: FootnoteUnit[] - relatedDocs?: KnowledgeUnit[] -} - -/** The full evidence a single answer turn holds (meta + the streamed answer markdown). */ -export interface AdvisorEvidence extends AdvisorEvidenceMeta { - answer: string // markdown prose, NO inline citations (sources live in the pane) -} - -export function emptyStats(): AdvisorStats { - return { sources: 0, regulations: 0, figures: 0, footnotes: 0 } -} - -/** Pure derivation of the stats bar from the evidence items (no parsing of answer text). */ -export function deriveStats( - e: Pick, -): AdvisorStats { - const regulations = new Set(e.sources.map((s) => s.regulation.code)) - return { - sources: e.sources.length, - regulations: regulations.size, - figures: e.figures.length, - footnotes: e.footnotes.length, - } -} diff --git a/admin-compliance/lib/sdk/advisor/regulation-display.ts b/admin-compliance/lib/sdk/advisor/regulation-display.ts index 2a125981..704f4aac 100644 --- a/admin-compliance/lib/sdk/advisor/regulation-display.ts +++ b/admin-compliance/lib/sdk/advisor/regulation-display.ts @@ -2,7 +2,11 @@ // friendly label (+ chapter for multi-part works like the DSK SDM). Presentation layer only: // it bridges G2 (clean RAG metadata) and keeps working once codes are clean. Extend the table freely. -import type { RegulationRef } from './evidence' +export interface RegulationRef { + code?: string + name?: string + short?: string +} export interface RegulationDisplay { familyKey: string // stable key used to GROUP evidence diff --git a/admin-compliance/lib/sdk/advisor/retrieve-mapping.ts b/admin-compliance/lib/sdk/advisor/retrieve-mapping.ts new file mode 100644 index 00000000..187a3a92 --- /dev/null +++ b/admin-compliance/lib/sdk/advisor/retrieve-mapping.ts @@ -0,0 +1,83 @@ +// Pure mappings from the Go /retrieve response (SDK/RAG-owned; board 2026-07-01 12:25) +// to the FE-facing advisor contract. Kept pure + testable; the orchestration (route.ts) wires them. + +import type { Citation, ClarityInfo, EvidenceUnit, Footnote, VisualEvidence } from './contract' + +export interface RetrieveClarity { + mode?: string // 'clarify' | 'answer' + reason?: string // e.g. 'middle_band_llm_needed' + concentration?: number + domain_count?: number + dominant_context?: string + candidate_contexts?: { id: string; label: string; hits?: number }[] +} + +export interface RetrieveFootnote { + id?: string + ref?: string + number?: number + regulation_code?: string + regulation_short?: string + regulation_name?: string + section?: string + text?: string +} + +export interface RetrieveResponse { + evidence?: EvidenceUnit[] + visual_evidence?: VisualEvidence[] + footnotes?: RetrieveFootnote[] + clarity?: RetrieveClarity + results?: unknown[] + tables?: unknown[] // C6 — not in the FE contract yet (future TablesPane) +} + +/** clarify unless a context was chosen; /retrieve's clarity.mode decides for un-scoped queries. */ +export function resolveMode(clarityMode: string | undefined, hasContext: boolean): 'clarify' | 'answer' { + if (hasContext) return 'answer' + return clarityMode === 'clarify' ? 'clarify' : 'answer' +} + +export function mapClarity(c: RetrieveClarity | undefined, mode: 'clarify' | 'answer'): ClarityInfo { + return { + is_underspecified: mode === 'clarify', + concentration: c?.concentration ?? 0, + dominant_context: c?.dominant_context, + suggested_contexts: + mode === 'clarify' ? (c?.candidate_contexts ?? []).map((cc) => ({ id: cc.id, label: cc.label })) : undefined, + } +} + +export function mapFootnotes(fns: RetrieveFootnote[] | undefined): Footnote[] { + return (fns ?? []).map((f) => ({ + footnote_id: f.id, + ref: f.ref ?? (f.number != null ? `Fußnote ${f.number}` : undefined), + document: f.regulation_short || f.regulation_name || f.regulation_code, + section: f.section, + text: f.text, + })) +} + +/** Citations are generated by the orchestration (not by /retrieve): [n] -> nth evidence unit. */ +export function buildCitations(evidence: EvidenceUnit[]): Citation[] { + return evidence.map((e, i) => ({ + citation_id: `c${i + 1}`, + number: i + 1, + evidence_id: e.evidence_id, + document: e.document, + section: e.section ?? null, + paragraph: e.paragraph ?? null, + footnote: null, + figure: null, + })) +} + +/** Numbered evidence list injected into the L2 prompt so the LLM can cite [n]. */ +export function numberedEvidenceForPrompt(evidence: EvidenceUnit[]): string { + return evidence + .map((e, i) => { + const loc = [e.document, e.section, e.paragraph].filter(Boolean).join(' ') + return `[${i + 1}] ${loc}\n${e.snippet ?? ''}`.trim() + }) + .join('\n\n') +} diff --git a/admin-compliance/lib/sdk/agents/advisor-llm.ts b/admin-compliance/lib/sdk/agents/advisor-llm.ts index fd0ecfc8..010051c8 100644 --- a/admin-compliance/lib/sdk/agents/advisor-llm.ts +++ b/admin-compliance/lib/sdk/agents/advisor-llm.ts @@ -138,3 +138,26 @@ export async function streamAdvisorAnswer( if (ollama) return textStream(ollama, parseOllamaLine) return null } + +/** + * Nicht-streamende Variante: sammelt die vollstaendige LLM-Antwort als String (fuer die + * JSON-Contract-Antwort der Advisor-Orchestrierung). null = kein LLM erreichbar. + */ +export async function completeAdvisorAnswer(messages: ChatMessage[]): Promise { + const stream = await streamAdvisorAnswer(messages) + if (!stream) return null + const reader = stream.getReader() + const decoder = new TextDecoder() + let out = '' + try { + for (;;) { + const { done, value } = await reader.read() + if (done) break + if (value) out += decoder.decode(value, { stream: true }) + } + out += decoder.decode() + } finally { + reader.releaseLock() + } + return out +} diff --git a/admin-compliance/lib/sdk/agents/advisor-rag.ts b/admin-compliance/lib/sdk/agents/advisor-rag.ts index 1bd3c938..2b785b23 100644 --- a/admin-compliance/lib/sdk/agents/advisor-rag.ts +++ b/admin-compliance/lib/sdk/agents/advisor-rag.ts @@ -12,6 +12,8 @@ * vorformatierten Kontext-Block fuer den LLM-Prompt. Fehler werden geschluckt (graceful). */ +import type { RetrieveResponse } from '@/lib/sdk/advisor/retrieve-mapping' + const SDK_URL = process.env.SDK_API_URL || process.env.SDK_URL || 'http://ai-compliance-sdk:8090' @@ -122,3 +124,27 @@ export async function retrieveAdvisorEvidence(query: string): Promise { return (await retrieveAdvisorEvidence(query)).contextText } + +/** + * Voller `/retrieve`-Aufruf fuer die Clarity-Gate-Orchestrierung: liefert die strukturierte + * SDK/RAG-Response (evidence/visual_evidence/footnotes/tables/clarity/results). `context` scopet + * den 2. Aufruf auf die gewaehlte Domaene. Fehler -> leeres Ergebnis (graceful). + */ +export async function retrieveFull(query: string, context?: string | null): Promise { + try { + const res = await fetch(`${SDK_URL}/sdk/v1/rag/retrieve`, { + method: 'POST', + headers: { + 'Content-Type': 'application/json', + 'X-User-ID': DEFAULT_USER, + 'X-Tenant-ID': DEFAULT_TENANT, + }, + body: JSON.stringify({ query, top_k: 8, ...(context ? { context } : {}) }), + signal: AbortSignal.timeout(15000), + }) + if (res.ok) return ((await res.json()) as RetrieveResponse) || {} + } catch { + // graceful: keine Verbindung -> leeres Ergebnis + } + return {} +}