breakpilot-lehrer/admin-lehrer/lib/sdk/vendor-compliance/contract-review/analyzer.ts

/**
 * Contract Analyzer
 *
 * LLM-based contract review for GDPR compliance
 */

import {
  Finding,
  Citation,
  FindingType,
  FindingCategory,
  FindingSeverity,
  DocumentType,
  LocalizedText,
} from '../types'
import { AVV_CHECKLIST, INCIDENT_CHECKLIST, TRANSFER_CHECKLIST } from './checklists'

// ==========================================
// TYPES
// ==========================================

export interface ContractAnalysisRequest {
  contractId: string
  vendorId: string
  tenantId: string
  documentText: string
  documentType?: DocumentType
  language?: 'de' | 'en'
  analysisScope?: AnalysisScope[]
}

export interface ContractAnalysisResponse {
  documentType: DocumentType
  language: 'de' | 'en'
  parties: ContractPartyInfo[]
  findings: Finding[]
  complianceScore: number
  topRisks: LocalizedText[]
  requiredActions: LocalizedText[]
  metadata: ExtractedMetadata
}

export interface ContractPartyInfo {
  role: 'CONTROLLER' | 'PROCESSOR' | 'PARTY'
  name: string
  address?: string
}

export interface ExtractedMetadata {
  effectiveDate?: string
  expirationDate?: string
  autoRenewal?: boolean
  terminationNoticePeriod?: number
  governingLaw?: string
  jurisdiction?: string
}

export type AnalysisScope =
  | 'AVV_COMPLIANCE'
  | 'SUBPROCESSOR'
  | 'INCIDENT_RESPONSE'
  | 'AUDIT_RIGHTS'
  | 'DELETION'
  | 'TOM'
  | 'TRANSFER'
  | 'LIABILITY'
  | 'SLA'

// ==========================================
// SYSTEM PROMPTS
// ==========================================

export const CONTRACT_REVIEW_SYSTEM_PROMPT = `Du bist ein Datenschutz-Rechtsexperte, der Verträge auf DSGVO-Konformität prüft.

WICHTIG:
1. Jede Feststellung MUSS mit einer Textstelle belegt werden (Citation)
2. Gib niemals Rechtsberatung - nur Compliance-Hinweise
3. Markiere unklare Stellen als UNKNOWN, nicht als GAP
4. Sei konservativ: im Zweifel RISK statt OK

PRÜFUNGSSCHEMA Art. 28 DSGVO AVV:
${AVV_CHECKLIST.map((item) => `- ${item.id}: ${item.requirement.de} (${item.article})`).join('\n')}

INCIDENT RESPONSE:
${INCIDENT_CHECKLIST.map((item) => `- ${item.id}: ${item.requirement.de} (${item.article})`).join('\n')}

DRITTLANDTRANSFER:
${TRANSFER_CHECKLIST.map((item) => `- ${item.id}: ${item.requirement.de} (${item.article})`).join('\n')}

AUSGABEFORMAT (JSON):
{
  "document_type": "AVV|MSA|SLA|SCC|NDA|TOM_ANNEX|OTHER|UNKNOWN",
  "language": "de|en",
  "parties": [
    {
      "role": "CONTROLLER|PROCESSOR|PARTY",
      "name": "...",
      "address": "..."
    }
  ],
  "findings": [
    {
      "category": "AVV_CONTENT|SUBPROCESSOR|INCIDENT|AUDIT_RIGHTS|DELETION|TOM|TRANSFER|LIABILITY|SLA|DATA_SUBJECT_RIGHTS|CONFIDENTIALITY|INSTRUCTION|GENERAL",
      "type": "OK|GAP|RISK|UNKNOWN",
      "severity": "LOW|MEDIUM|HIGH|CRITICAL",
      "title_de": "...",
      "title_en": "...",
      "description_de": "...",
      "description_en": "...",
      "recommendation_de": "...",
      "recommendation_en": "...",
      "citations": [
        {
          "page": 3,
          "quoted_text": "Der Auftragnehmer...",
          "start_char": 1234,
          "end_char": 1456
        }
      ],
      "affected_requirement": "Art. 28 Abs. 3 lit. a DSGVO"
    }
  ],
  "compliance_score": 72,
  "top_risks": [
    {"de": "...", "en": "..."}
  ],
  "required_actions": [
    {"de": "...", "en": "..."}
  ],
  "metadata": {
    "effective_date": "2024-01-01",
    "expiration_date": "2025-12-31",
    "auto_renewal": true,
    "termination_notice_period": 90,
    "governing_law": "Germany",
    "jurisdiction": "Frankfurt am Main"
  }
}`

export const CONTRACT_CLASSIFICATION_PROMPT = `Analysiere den folgenden Vertragstext und klassifiziere ihn:

1. Dokumenttyp (AVV, MSA, SLA, SCC, NDA, TOM_ANNEX, OTHER)
2. Sprache (de, en)
3. Vertragsparteien mit Rollen

Antworte im JSON-Format:
{
  "document_type": "...",
  "language": "...",
  "parties": [...]
}`

export const METADATA_EXTRACTION_PROMPT = `Extrahiere die folgenden Metadaten aus dem Vertrag:

1. Inkrafttreten / Effective Date
2. Laufzeit / Ablaufdatum
3. Automatische Verlängerung
4. Kündigungsfrist
5. Anwendbares Recht
6. Gerichtsstand

Antworte im JSON-Format.`

// ==========================================
// ANALYSIS FUNCTIONS
// ==========================================

/**
 * Analyze a contract for GDPR compliance
 */
export async function analyzeContract(
  request: ContractAnalysisRequest
): Promise<ContractAnalysisResponse> {
  // This function would typically call an LLM API
  // For now, we provide the structure that would be used

  const apiEndpoint = '/api/sdk/v1/vendor-compliance/contracts/analyze'

  const response = await fetch(apiEndpoint, {
    method: 'POST',
    headers: {
      'Content-Type': 'application/json',
    },
    body: JSON.stringify({
      contract_id: request.contractId,
      vendor_id: request.vendorId,
      tenant_id: request.tenantId,
      document_text: request.documentText,
      document_type: request.documentType,
      language: request.language || 'de',
      analysis_scope: request.analysisScope || [
        'AVV_COMPLIANCE',
        'SUBPROCESSOR',
        'INCIDENT_RESPONSE',
        'AUDIT_RIGHTS',
        'DELETION',
        'TOM',
        'TRANSFER',
      ],
    }),
  })

  if (!response.ok) {
    throw new Error('Contract analysis failed')
  }

  const result = await response.json()
  return transformAnalysisResponse(result, request)
}

/**
 * Transform LLM response to typed response
 */
function transformAnalysisResponse(
  llmResponse: Record<string, unknown>,
  request: ContractAnalysisRequest
): ContractAnalysisResponse {
  const findings: Finding[] = (llmResponse.findings as Array<Record<string, unknown>> || []).map((f, idx) => ({
    id: `finding-${request.contractId}-${idx}`,
    tenantId: request.tenantId,
    contractId: request.contractId,
    vendorId: request.vendorId,
    type: (f.type as FindingType) || 'UNKNOWN',
    category: (f.category as FindingCategory) || 'GENERAL',
    severity: (f.severity as FindingSeverity) || 'MEDIUM',
    title: {
      de: (f.title_de as string) || '',
      en: (f.title_en as string) || '',
    },
    description: {
      de: (f.description_de as string) || '',
      en: (f.description_en as string) || '',
    },
    recommendation: f.recommendation_de ? {
      de: f.recommendation_de as string,
      en: (f.recommendation_en as string) || '',
    } : undefined,
    citations: ((f.citations as Array<Record<string, unknown>>) || []).map((c) => ({
      documentId: request.contractId,
      page: (c.page as number) || 1,
      startChar: (c.start_char as number) || 0,
      endChar: (c.end_char as number) || 0,
      quotedText: (c.quoted_text as string) || '',
      quoteHash: generateQuoteHash((c.quoted_text as string) || ''),
    })),
    affectedRequirement: f.affected_requirement as string | undefined,
    triggeredControls: [],
    status: 'OPEN',
    createdAt: new Date(),
    updatedAt: new Date(),
  }))

  const metadata = llmResponse.metadata as Record<string, unknown> || {}

  return {
    documentType: (llmResponse.document_type as DocumentType) || 'OTHER',
    language: (llmResponse.language as 'de' | 'en') || 'de',
    parties: ((llmResponse.parties as Array<Record<string, unknown>>) || []).map((p) => ({
      role: (p.role as 'CONTROLLER' | 'PROCESSOR' | 'PARTY') || 'PARTY',
      name: (p.name as string) || '',
      address: p.address as string | undefined,
    })),
    findings,
    complianceScore: (llmResponse.compliance_score as number) || 0,
    topRisks: ((llmResponse.top_risks as Array<Record<string, string>>) || []).map((r) => ({
      de: r.de || '',
      en: r.en || '',
    })),
    requiredActions: ((llmResponse.required_actions as Array<Record<string, string>>) || []).map((a) => ({
      de: a.de || '',
      en: a.en || '',
    })),
    metadata: {
      effectiveDate: metadata.effective_date as string | undefined,
      expirationDate: metadata.expiration_date as string | undefined,
      autoRenewal: metadata.auto_renewal as boolean | undefined,
      terminationNoticePeriod: metadata.termination_notice_period as number | undefined,
      governingLaw: metadata.governing_law as string | undefined,
      jurisdiction: metadata.jurisdiction as string | undefined,
    },
  }
}

/**
 * Generate a hash for quote verification
 */
function generateQuoteHash(text: string): string {
  // Simple hash for demo - in production use crypto.subtle.digest
  let hash = 0
  for (let i = 0; i < text.length; i++) {
    const char = text.charCodeAt(i)
    hash = ((hash << 5) - hash) + char
    hash = hash & hash
  }
  return Math.abs(hash).toString(16).padStart(16, '0')
}

// ==========================================
// CITATION UTILITIES
// ==========================================

/**
 * Verify citation integrity
 */
export function verifyCitation(
  citation: Citation,
  documentText: string
): boolean {
  const extractedText = documentText.substring(citation.startChar, citation.endChar)
  const expectedHash = generateQuoteHash(extractedText)
  return citation.quoteHash === expectedHash
}

/**
 * Find citation context in document
 */
export function getCitationContext(
  citation: Citation,
  documentText: string,
  contextChars: number = 100
): {
  before: string
  quoted: string
  after: string
} {
  const start = Math.max(0, citation.startChar - contextChars)
  const end = Math.min(documentText.length, citation.endChar + contextChars)

  return {
    before: documentText.substring(start, citation.startChar),
    quoted: documentText.substring(citation.startChar, citation.endChar),
    after: documentText.substring(citation.endChar, end),
  }
}

/**
 * Highlight citations in text
 */
export function highlightCitations(
  documentText: string,
  citations: Citation[]
): string {
  // Sort citations by start position (reverse to avoid offset issues)
  const sortedCitations = [...citations].sort((a, b) => b.startChar - a.startChar)

  let result = documentText

  for (const citation of sortedCitations) {
    const before = result.substring(0, citation.startChar)
    const quoted = result.substring(citation.startChar, citation.endChar)
    const after = result.substring(citation.endChar)

    result = `${before}<mark data-citation-id="${citation.documentId}">${quoted}</mark>${after}`
  }

  return result
}

// ==========================================
// COMPLIANCE SCORE CALCULATION
// ==========================================

export interface ComplianceScoreBreakdown {
  totalScore: number
  categoryScores: Record<FindingCategory, number>
  severityCounts: Record<FindingSeverity, number>
  findingCounts: {
    total: number
    gaps: number
    risks: number
    ok: number
    unknown: number
  }
}

/**
 * Calculate detailed compliance score
 */
export function calculateComplianceScore(findings: Finding[]): ComplianceScoreBreakdown {
  const severityWeights: Record<FindingSeverity, number> = {
    CRITICAL: 25,
    HIGH: 15,
    MEDIUM: 8,
    LOW: 3,
  }

  const categoryWeights: Partial<Record<FindingCategory, number>> = {
    AVV_CONTENT: 1.5,
    SUBPROCESSOR: 1.3,
    INCIDENT: 1.3,
    DELETION: 1.2,
    AUDIT_RIGHTS: 1.1,
    TOM: 1.2,
    TRANSFER: 1.4,
  }

  let totalDeductions = 0
  const maxPossibleDeductions = 100

  const categoryScores: Partial<Record<FindingCategory, number>> = {}
  const severityCounts: Record<FindingSeverity, number> = {
    LOW: 0,
    MEDIUM: 0,
    HIGH: 0,
    CRITICAL: 0,
  }

  let gaps = 0
  let risks = 0
  let ok = 0
  let unknown = 0

  for (const finding of findings) {
    severityCounts[finding.severity]++

    switch (finding.type) {
      case 'GAP':
        gaps++
        totalDeductions += severityWeights[finding.severity] * (categoryWeights[finding.category] || 1)
        break
      case 'RISK':
        risks++
        totalDeductions += severityWeights[finding.severity] * 0.7 * (categoryWeights[finding.category] || 1)
        break
      case 'OK':
        ok++
        break
      case 'UNKNOWN':
        unknown++
        totalDeductions += severityWeights[finding.severity] * 0.3 * (categoryWeights[finding.category] || 1)
        break
    }
  }

  // Calculate category scores
  const categories = new Set(findings.map((f) => f.category))
  for (const category of categories) {
    const categoryFindings = findings.filter((f) => f.category === category)
    const categoryOk = categoryFindings.filter((f) => f.type === 'OK').length
    const categoryTotal = categoryFindings.length
    categoryScores[category] = categoryTotal > 0 ? Math.round((categoryOk / categoryTotal) * 100) : 100
  }

  const totalScore = Math.max(0, Math.round(100 - (totalDeductions / maxPossibleDeductions) * 100))

  return {
    totalScore,
    categoryScores: categoryScores as Record<FindingCategory, number>,
    severityCounts,
    findingCounts: {
      total: findings.length,
      gaps,
      risks,
      ok,
      unknown,
    },
  }
}