breakpilot-lehrer/admin-lehrer/lib/sdk/tom-generator/ai/document-analyzer.ts

// =============================================================================
// TOM Generator Document Analyzer
// AI-powered analysis of uploaded evidence documents
// =============================================================================

import {
  EvidenceDocument,
  AIDocumentAnalysis,
  ExtractedClause,
  DocumentType,
} from '../types'
import {
  getDocumentAnalysisPrompt,
  getDocumentTypeDetectionPrompt,
  DocumentAnalysisPromptContext,
} from './prompts'
import { getAllControls } from '../controls/loader'

// =============================================================================
// TYPES
// =============================================================================

export interface AnalysisResult {
  success: boolean
  analysis: AIDocumentAnalysis | null
  error?: string
}

export interface DocumentTypeDetectionResult {
  documentType: DocumentType
  confidence: number
  reasoning: string
}

// =============================================================================
// DOCUMENT ANALYZER CLASS
// =============================================================================

export class TOMDocumentAnalyzer {
  private apiEndpoint: string
  private apiKey: string | null

  constructor(options?: { apiEndpoint?: string; apiKey?: string }) {
    this.apiEndpoint = options?.apiEndpoint || '/api/sdk/v1/tom-generator/evidence/analyze'
    this.apiKey = options?.apiKey || null
  }

  /**
   * Analyze a document and extract relevant TOM information
   */
  async analyzeDocument(
    document: EvidenceDocument,
    documentText: string,
    language: 'de' | 'en' = 'de'
  ): Promise<AnalysisResult> {
    try {
      // Get all control IDs for context
      const allControls = getAllControls()
      const controlIds = allControls.map((c) => c.id)

      // Build the prompt context
      const promptContext: DocumentAnalysisPromptContext = {
        documentType: document.documentType,
        documentText,
        controlIds,
        language,
      }

      const prompt = getDocumentAnalysisPrompt(promptContext)

      // Call the AI API
      const response = await this.callAI(prompt)

      if (!response.success || !response.data) {
        return {
          success: false,
          analysis: null,
          error: response.error || 'Failed to analyze document',
        }
      }

      // Parse the AI response
      const parsedResponse = this.parseAnalysisResponse(response.data)

      const analysis: AIDocumentAnalysis = {
        summary: parsedResponse.summary,
        extractedClauses: parsedResponse.extractedClauses,
        applicableControls: parsedResponse.applicableControls,
        gaps: parsedResponse.gaps,
        confidence: parsedResponse.confidence,
        analyzedAt: new Date(),
      }

      return {
        success: true,
        analysis,
      }
    } catch (error) {
      return {
        success: false,
        analysis: null,
        error: error instanceof Error ? error.message : 'Unknown error',
      }
    }
  }

  /**
   * Detect the document type from content
   */
  async detectDocumentType(
    documentText: string,
    filename: string
  ): Promise<DocumentTypeDetectionResult> {
    try {
      const prompt = getDocumentTypeDetectionPrompt(documentText, filename)
      const response = await this.callAI(prompt)

      if (!response.success || !response.data) {
        return {
          documentType: 'OTHER',
          confidence: 0,
          reasoning: 'Could not detect document type',
        }
      }

      const parsed = this.parseJSONResponse(response.data)

      return {
        documentType: this.mapDocumentType(String(parsed.documentType || 'OTHER')),
        confidence: typeof parsed.confidence === 'number' ? parsed.confidence : 0,
        reasoning: typeof parsed.reasoning === 'string' ? parsed.reasoning : '',
      }
    } catch (error) {
      return {
        documentType: 'OTHER',
        confidence: 0,
        reasoning: error instanceof Error ? error.message : 'Detection failed',
      }
    }
  }

  /**
   * Link document to applicable controls based on analysis
   */
  async suggestControlLinks(
    analysis: AIDocumentAnalysis
  ): Promise<string[]> {
    // Use the applicable controls from the analysis
    const suggestedControls = [...analysis.applicableControls]

    // Also check extracted clauses for related controls
    for (const clause of analysis.extractedClauses) {
      if (clause.relatedControlId && !suggestedControls.includes(clause.relatedControlId)) {
        suggestedControls.push(clause.relatedControlId)
      }
    }

    return suggestedControls
  }

  /**
   * Calculate evidence coverage for a control
   */
  calculateEvidenceCoverage(
    controlId: string,
    documents: EvidenceDocument[]
  ): {
    coverage: number
    linkedDocuments: string[]
    missingEvidence: string[]
  } {
    const control = getAllControls().find((c) => c.id === controlId)
    if (!control) {
      return { coverage: 0, linkedDocuments: [], missingEvidence: [] }
    }

    const linkedDocuments: string[] = []
    const coveredRequirements = new Set<string>()

    for (const doc of documents) {
      // Check if document is explicitly linked
      if (doc.linkedControlIds.includes(controlId)) {
        linkedDocuments.push(doc.id)
      }

      // Check if AI analysis suggests this document covers the control
      if (doc.aiAnalysis?.applicableControls.includes(controlId)) {
        if (!linkedDocuments.includes(doc.id)) {
          linkedDocuments.push(doc.id)
        }
      }

      // Check which evidence requirements are covered
      if (doc.aiAnalysis) {
        for (const requirement of control.evidenceRequirements) {
          const reqLower = requirement.toLowerCase()
          if (
            doc.aiAnalysis.summary.toLowerCase().includes(reqLower) ||
            doc.aiAnalysis.extractedClauses.some((c) =>
              c.text.toLowerCase().includes(reqLower)
            )
          ) {
            coveredRequirements.add(requirement)
          }
        }
      }
    }

    const missingEvidence = control.evidenceRequirements.filter(
      (req) => !coveredRequirements.has(req)
    )

    const coverage =
      control.evidenceRequirements.length > 0
        ? Math.round(
            (coveredRequirements.size / control.evidenceRequirements.length) * 100
          )
        : 100

    return {
      coverage,
      linkedDocuments,
      missingEvidence,
    }
  }

  /**
   * Call the AI API
   */
  private async callAI(
    prompt: string
  ): Promise<{ success: boolean; data?: string; error?: string }> {
    try {
      const headers: Record<string, string> = {
        'Content-Type': 'application/json',
      }

      if (this.apiKey) {
        headers['Authorization'] = `Bearer ${this.apiKey}`
      }

      const response = await fetch(this.apiEndpoint, {
        method: 'POST',
        headers,
        body: JSON.stringify({ prompt }),
      })

      if (!response.ok) {
        return {
          success: false,
          error: `API error: ${response.status} ${response.statusText}`,
        }
      }

      const data = await response.json()

      return {
        success: true,
        data: data.response || data.content || JSON.stringify(data),
      }
    } catch (error) {
      return {
        success: false,
        error: error instanceof Error ? error.message : 'API call failed',
      }
    }
  }

  /**
   * Parse the AI analysis response
   */
  private parseAnalysisResponse(response: string): {
    summary: string
    extractedClauses: ExtractedClause[]
    applicableControls: string[]
    gaps: string[]
    confidence: number
  } {
    const parsed = this.parseJSONResponse(response)

    return {
      summary: typeof parsed.summary === 'string' ? parsed.summary : '',
      extractedClauses: (Array.isArray(parsed.extractedClauses) ? parsed.extractedClauses : []).map(
        (clause: Record<string, unknown>) => ({
          id: String(clause.id || ''),
          text: String(clause.text || ''),
          type: String(clause.type || ''),
          relatedControlId: clause.relatedControlId
            ? String(clause.relatedControlId)
            : null,
        })
      ),
      applicableControls: Array.isArray(parsed.applicableControls)
        ? parsed.applicableControls.map(String)
        : [],
      gaps: Array.isArray(parsed.gaps) ? parsed.gaps.map(String) : [],
      confidence: typeof parsed.confidence === 'number' ? parsed.confidence : 0,
    }
  }

  /**
   * Parse JSON from AI response (handles markdown code blocks)
   */
  private parseJSONResponse(response: string): Record<string, unknown> {
    let jsonStr = response.trim()

    // Remove markdown code blocks if present
    if (jsonStr.startsWith('```json')) {
      jsonStr = jsonStr.slice(7)
    } else if (jsonStr.startsWith('```')) {
      jsonStr = jsonStr.slice(3)
    }

    if (jsonStr.endsWith('```')) {
      jsonStr = jsonStr.slice(0, -3)
    }

    jsonStr = jsonStr.trim()

    try {
      return JSON.parse(jsonStr)
    } catch {
      // Try to extract JSON from the response
      const jsonMatch = jsonStr.match(/\{[\s\S]*\}/)
      if (jsonMatch) {
        try {
          return JSON.parse(jsonMatch[0])
        } catch {
          return {}
        }
      }
      return {}
    }
  }

  /**
   * Map string to DocumentType
   */
  private mapDocumentType(type: string): DocumentType {
    const typeMap: Record<string, DocumentType> = {
      AVV: 'AVV',
      DPA: 'DPA',
      SLA: 'SLA',
      NDA: 'NDA',
      POLICY: 'POLICY',
      CERTIFICATE: 'CERTIFICATE',
      AUDIT_REPORT: 'AUDIT_REPORT',
      OTHER: 'OTHER',
    }

    return typeMap[type.toUpperCase()] || 'OTHER'
  }
}

// =============================================================================
// SINGLETON INSTANCE
// =============================================================================

let analyzerInstance: TOMDocumentAnalyzer | null = null

export function getDocumentAnalyzer(
  options?: { apiEndpoint?: string; apiKey?: string }
): TOMDocumentAnalyzer {
  if (!analyzerInstance) {
    analyzerInstance = new TOMDocumentAnalyzer(options)
  }
  return analyzerInstance
}

// =============================================================================
// HELPER FUNCTIONS
// =============================================================================

/**
 * Quick document analysis
 */
export async function analyzeEvidenceDocument(
  document: EvidenceDocument,
  documentText: string,
  language: 'de' | 'en' = 'de'
): Promise<AnalysisResult> {
  return getDocumentAnalyzer().analyzeDocument(document, documentText, language)
}

/**
 * Quick document type detection
 */
export async function detectEvidenceDocumentType(
  documentText: string,
  filename: string
): Promise<DocumentTypeDetectionResult> {
  return getDocumentAnalyzer().detectDocumentType(documentText, filename)
}

/**
 * Get evidence gaps for all controls
 */
export function getEvidenceGapsForAllControls(
  documents: EvidenceDocument[]
): Map<string, { coverage: number; missing: string[] }> {
  const analyzer = getDocumentAnalyzer()
  const allControls = getAllControls()
  const gaps = new Map<string, { coverage: number; missing: string[] }>()

  for (const control of allControls) {
    const result = analyzer.calculateEvidenceCoverage(control.id, documents)
    gaps.set(control.id, {
      coverage: result.coverage,
      missing: result.missingEvidence,
    })
  }

  return gaps
}