Files
breakpilot-lehrer/admin-lehrer/lib/sdk/tom-generator/ai/document-analyzer.ts
Benjamin Boenisch 5a31f52310 Initial commit: breakpilot-lehrer - Lehrer KI Platform
Services: Admin-Lehrer, Backend-Lehrer, Studio v2, Website,
Klausur-Service, School-Service, Voice-Service, Geo-Service,
BreakPilot Drive, Agent-Core

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-11 23:47:26 +01:00

415 lines
11 KiB
TypeScript

// =============================================================================
// TOM Generator Document Analyzer
// AI-powered analysis of uploaded evidence documents
// =============================================================================
import {
EvidenceDocument,
AIDocumentAnalysis,
ExtractedClause,
DocumentType,
} from '../types'
import {
getDocumentAnalysisPrompt,
getDocumentTypeDetectionPrompt,
DocumentAnalysisPromptContext,
} from './prompts'
import { getAllControls } from '../controls/loader'
// =============================================================================
// TYPES
// =============================================================================
export interface AnalysisResult {
success: boolean
analysis: AIDocumentAnalysis | null
error?: string
}
export interface DocumentTypeDetectionResult {
documentType: DocumentType
confidence: number
reasoning: string
}
// =============================================================================
// DOCUMENT ANALYZER CLASS
// =============================================================================
export class TOMDocumentAnalyzer {
private apiEndpoint: string
private apiKey: string | null
constructor(options?: { apiEndpoint?: string; apiKey?: string }) {
this.apiEndpoint = options?.apiEndpoint || '/api/sdk/v1/tom-generator/evidence/analyze'
this.apiKey = options?.apiKey || null
}
/**
* Analyze a document and extract relevant TOM information
*/
async analyzeDocument(
document: EvidenceDocument,
documentText: string,
language: 'de' | 'en' = 'de'
): Promise<AnalysisResult> {
try {
// Get all control IDs for context
const allControls = getAllControls()
const controlIds = allControls.map((c) => c.id)
// Build the prompt context
const promptContext: DocumentAnalysisPromptContext = {
documentType: document.documentType,
documentText,
controlIds,
language,
}
const prompt = getDocumentAnalysisPrompt(promptContext)
// Call the AI API
const response = await this.callAI(prompt)
if (!response.success || !response.data) {
return {
success: false,
analysis: null,
error: response.error || 'Failed to analyze document',
}
}
// Parse the AI response
const parsedResponse = this.parseAnalysisResponse(response.data)
const analysis: AIDocumentAnalysis = {
summary: parsedResponse.summary,
extractedClauses: parsedResponse.extractedClauses,
applicableControls: parsedResponse.applicableControls,
gaps: parsedResponse.gaps,
confidence: parsedResponse.confidence,
analyzedAt: new Date(),
}
return {
success: true,
analysis,
}
} catch (error) {
return {
success: false,
analysis: null,
error: error instanceof Error ? error.message : 'Unknown error',
}
}
}
/**
* Detect the document type from content
*/
async detectDocumentType(
documentText: string,
filename: string
): Promise<DocumentTypeDetectionResult> {
try {
const prompt = getDocumentTypeDetectionPrompt(documentText, filename)
const response = await this.callAI(prompt)
if (!response.success || !response.data) {
return {
documentType: 'OTHER',
confidence: 0,
reasoning: 'Could not detect document type',
}
}
const parsed = this.parseJSONResponse(response.data)
return {
documentType: this.mapDocumentType(String(parsed.documentType || 'OTHER')),
confidence: typeof parsed.confidence === 'number' ? parsed.confidence : 0,
reasoning: typeof parsed.reasoning === 'string' ? parsed.reasoning : '',
}
} catch (error) {
return {
documentType: 'OTHER',
confidence: 0,
reasoning: error instanceof Error ? error.message : 'Detection failed',
}
}
}
/**
* Link document to applicable controls based on analysis
*/
async suggestControlLinks(
analysis: AIDocumentAnalysis
): Promise<string[]> {
// Use the applicable controls from the analysis
const suggestedControls = [...analysis.applicableControls]
// Also check extracted clauses for related controls
for (const clause of analysis.extractedClauses) {
if (clause.relatedControlId && !suggestedControls.includes(clause.relatedControlId)) {
suggestedControls.push(clause.relatedControlId)
}
}
return suggestedControls
}
/**
* Calculate evidence coverage for a control
*/
calculateEvidenceCoverage(
controlId: string,
documents: EvidenceDocument[]
): {
coverage: number
linkedDocuments: string[]
missingEvidence: string[]
} {
const control = getAllControls().find((c) => c.id === controlId)
if (!control) {
return { coverage: 0, linkedDocuments: [], missingEvidence: [] }
}
const linkedDocuments: string[] = []
const coveredRequirements = new Set<string>()
for (const doc of documents) {
// Check if document is explicitly linked
if (doc.linkedControlIds.includes(controlId)) {
linkedDocuments.push(doc.id)
}
// Check if AI analysis suggests this document covers the control
if (doc.aiAnalysis?.applicableControls.includes(controlId)) {
if (!linkedDocuments.includes(doc.id)) {
linkedDocuments.push(doc.id)
}
}
// Check which evidence requirements are covered
if (doc.aiAnalysis) {
for (const requirement of control.evidenceRequirements) {
const reqLower = requirement.toLowerCase()
if (
doc.aiAnalysis.summary.toLowerCase().includes(reqLower) ||
doc.aiAnalysis.extractedClauses.some((c) =>
c.text.toLowerCase().includes(reqLower)
)
) {
coveredRequirements.add(requirement)
}
}
}
}
const missingEvidence = control.evidenceRequirements.filter(
(req) => !coveredRequirements.has(req)
)
const coverage =
control.evidenceRequirements.length > 0
? Math.round(
(coveredRequirements.size / control.evidenceRequirements.length) * 100
)
: 100
return {
coverage,
linkedDocuments,
missingEvidence,
}
}
/**
* Call the AI API
*/
private async callAI(
prompt: string
): Promise<{ success: boolean; data?: string; error?: string }> {
try {
const headers: Record<string, string> = {
'Content-Type': 'application/json',
}
if (this.apiKey) {
headers['Authorization'] = `Bearer ${this.apiKey}`
}
const response = await fetch(this.apiEndpoint, {
method: 'POST',
headers,
body: JSON.stringify({ prompt }),
})
if (!response.ok) {
return {
success: false,
error: `API error: ${response.status} ${response.statusText}`,
}
}
const data = await response.json()
return {
success: true,
data: data.response || data.content || JSON.stringify(data),
}
} catch (error) {
return {
success: false,
error: error instanceof Error ? error.message : 'API call failed',
}
}
}
/**
* Parse the AI analysis response
*/
private parseAnalysisResponse(response: string): {
summary: string
extractedClauses: ExtractedClause[]
applicableControls: string[]
gaps: string[]
confidence: number
} {
const parsed = this.parseJSONResponse(response)
return {
summary: typeof parsed.summary === 'string' ? parsed.summary : '',
extractedClauses: (Array.isArray(parsed.extractedClauses) ? parsed.extractedClauses : []).map(
(clause: Record<string, unknown>) => ({
id: String(clause.id || ''),
text: String(clause.text || ''),
type: String(clause.type || ''),
relatedControlId: clause.relatedControlId
? String(clause.relatedControlId)
: null,
})
),
applicableControls: Array.isArray(parsed.applicableControls)
? parsed.applicableControls.map(String)
: [],
gaps: Array.isArray(parsed.gaps) ? parsed.gaps.map(String) : [],
confidence: typeof parsed.confidence === 'number' ? parsed.confidence : 0,
}
}
/**
* Parse JSON from AI response (handles markdown code blocks)
*/
private parseJSONResponse(response: string): Record<string, unknown> {
let jsonStr = response.trim()
// Remove markdown code blocks if present
if (jsonStr.startsWith('```json')) {
jsonStr = jsonStr.slice(7)
} else if (jsonStr.startsWith('```')) {
jsonStr = jsonStr.slice(3)
}
if (jsonStr.endsWith('```')) {
jsonStr = jsonStr.slice(0, -3)
}
jsonStr = jsonStr.trim()
try {
return JSON.parse(jsonStr)
} catch {
// Try to extract JSON from the response
const jsonMatch = jsonStr.match(/\{[\s\S]*\}/)
if (jsonMatch) {
try {
return JSON.parse(jsonMatch[0])
} catch {
return {}
}
}
return {}
}
}
/**
* Map string to DocumentType
*/
private mapDocumentType(type: string): DocumentType {
const typeMap: Record<string, DocumentType> = {
AVV: 'AVV',
DPA: 'DPA',
SLA: 'SLA',
NDA: 'NDA',
POLICY: 'POLICY',
CERTIFICATE: 'CERTIFICATE',
AUDIT_REPORT: 'AUDIT_REPORT',
OTHER: 'OTHER',
}
return typeMap[type.toUpperCase()] || 'OTHER'
}
}
// =============================================================================
// SINGLETON INSTANCE
// =============================================================================
let analyzerInstance: TOMDocumentAnalyzer | null = null
export function getDocumentAnalyzer(
options?: { apiEndpoint?: string; apiKey?: string }
): TOMDocumentAnalyzer {
if (!analyzerInstance) {
analyzerInstance = new TOMDocumentAnalyzer(options)
}
return analyzerInstance
}
// =============================================================================
// HELPER FUNCTIONS
// =============================================================================
/**
* Quick document analysis
*/
export async function analyzeEvidenceDocument(
document: EvidenceDocument,
documentText: string,
language: 'de' | 'en' = 'de'
): Promise<AnalysisResult> {
return getDocumentAnalyzer().analyzeDocument(document, documentText, language)
}
/**
* Quick document type detection
*/
export async function detectEvidenceDocumentType(
documentText: string,
filename: string
): Promise<DocumentTypeDetectionResult> {
return getDocumentAnalyzer().detectDocumentType(documentText, filename)
}
/**
* Get evidence gaps for all controls
*/
export function getEvidenceGapsForAllControls(
documents: EvidenceDocument[]
): Map<string, { coverage: number; missing: string[] }> {
const analyzer = getDocumentAnalyzer()
const allControls = getAllControls()
const gaps = new Map<string, { coverage: number; missing: string[] }>()
for (const control of allControls) {
const result = analyzer.calculateEvidenceCoverage(control.id, documents)
gaps.set(control.id, {
coverage: result.coverage,
missing: result.missingEvidence,
})
}
return gaps
}