diff --git a/admin-v2/app/(sdk)/sdk/document-crawler/page.tsx b/admin-v2/app/(sdk)/sdk/document-crawler/page.tsx new file mode 100644 index 0000000..265edc4 --- /dev/null +++ b/admin-v2/app/(sdk)/sdk/document-crawler/page.tsx @@ -0,0 +1,839 @@ +'use client' + +import { useState, useEffect, useCallback } from 'react' + +// ============================================================================= +// TYPES +// ============================================================================= + +interface CrawlSource { + id: string + name: string + source_type: string + path: string + file_extensions: string[] + max_depth: number + exclude_patterns: string[] + enabled: boolean + created_at: string +} + +interface CrawlJob { + id: string + source_id: string + source_name?: string + status: 'pending' | 'running' | 'completed' | 'failed' | 'cancelled' + job_type: 'full' | 'delta' + files_found: number + files_processed: number + files_new: number + files_changed: number + files_skipped: number + files_error: number + error_message?: string + started_at?: string + completed_at?: string + created_at: string +} + +interface CrawlDocument { + id: string + file_name: string + file_extension: string + file_size_bytes: number + classification: string | null + classification_confidence: number | null + classification_corrected: boolean + extraction_status: string + archived: boolean + ipfs_cid: string | null + first_seen_at: string + last_seen_at: string + version_count: number + source_name?: string +} + +interface OnboardingReport { + id: string + total_documents_found: number + classification_breakdown: Record + gaps: GapItem[] + compliance_score: number + gap_summary?: { critical: number; high: number; medium: number } + created_at: string +} + +interface GapItem { + id: string + category: string + description: string + severity: 'CRITICAL' | 'HIGH' | 'MEDIUM' + regulation: string + requiredAction: string +} + +// ============================================================================= +// API HELPERS +// ============================================================================= + +const TENANT_ID = '00000000-0000-0000-0000-000000000001' // Default tenant + +async function api(path: string, options: RequestInit = {}) { + const res = await fetch(`/api/sdk/v1/crawler/${path}`, { + ...options, + headers: { + 'Content-Type': 'application/json', + 'X-Tenant-ID': TENANT_ID, + ...options.headers, + }, + }) + if (res.status === 204) return null + return res.json() +} + +// ============================================================================= +// CLASSIFICATION LABELS +// ============================================================================= + +const CLASSIFICATION_LABELS: Record = { + VVT: { label: 'VVT', color: 'bg-blue-100 text-blue-700' }, + TOM: { label: 'TOM', color: 'bg-green-100 text-green-700' }, + DSE: { label: 'DSE', color: 'bg-purple-100 text-purple-700' }, + AVV: { label: 'AVV', color: 'bg-orange-100 text-orange-700' }, + DSFA: { label: 'DSFA', color: 'bg-red-100 text-red-700' }, + Loeschkonzept: { label: 'Loeschkonzept', color: 'bg-yellow-100 text-yellow-700' }, + Einwilligung: { label: 'Einwilligung', color: 'bg-pink-100 text-pink-700' }, + Vertrag: { label: 'Vertrag', color: 'bg-indigo-100 text-indigo-700' }, + Richtlinie: { label: 'Richtlinie', color: 'bg-teal-100 text-teal-700' }, + Schulungsnachweis: { label: 'Schulung', color: 'bg-cyan-100 text-cyan-700' }, + Sonstiges: { label: 'Sonstiges', color: 'bg-gray-100 text-gray-700' }, +} + +const ALL_CLASSIFICATIONS = Object.keys(CLASSIFICATION_LABELS) + +// ============================================================================= +// TAB: QUELLEN (Sources) +// ============================================================================= + +function SourcesTab() { + const [sources, setSources] = useState([]) + const [loading, setLoading] = useState(true) + const [showForm, setShowForm] = useState(false) + const [formName, setFormName] = useState('') + const [formPath, setFormPath] = useState('') + const [testResult, setTestResult] = useState>({}) + + const loadSources = useCallback(async () => { + setLoading(true) + try { + const data = await api('sources') + setSources(data || []) + } catch { /* ignore */ } + setLoading(false) + }, []) + + useEffect(() => { loadSources() }, [loadSources]) + + const handleCreate = async () => { + if (!formName || !formPath) return + await api('sources', { + method: 'POST', + body: JSON.stringify({ name: formName, path: formPath }), + }) + setFormName('') + setFormPath('') + setShowForm(false) + loadSources() + } + + const handleDelete = async (id: string) => { + await api(`sources/${id}`, { method: 'DELETE' }) + loadSources() + } + + const handleToggle = async (source: CrawlSource) => { + await api(`sources/${source.id}`, { + method: 'PUT', + body: JSON.stringify({ enabled: !source.enabled }), + }) + loadSources() + } + + const handleTest = async (id: string) => { + setTestResult(prev => ({ ...prev, [id]: 'testing...' })) + const result = await api(`sources/${id}/test`, { method: 'POST' }) + setTestResult(prev => ({ ...prev, [id]: result?.message || 'Fehler' })) + } + + return ( +
+
+

Crawl-Quellen

+ +
+ + {showForm && ( +
+
+ + setFormName(e.target.value)} + placeholder="z.B. Compliance-Ordner" + className="w-full px-3 py-2 border border-gray-300 rounded-lg text-sm focus:ring-2 focus:ring-purple-500" + /> +
+
+ + setFormPath(e.target.value)} + placeholder="z.B. compliance-docs" + className="w-full px-3 py-2 border border-gray-300 rounded-lg text-sm focus:ring-2 focus:ring-purple-500" + /> +
+
+ + +
+
+ )} + + {loading ? ( +
Laden...
+ ) : sources.length === 0 ? ( +
+

Keine Quellen konfiguriert

+

Erstellen Sie eine Crawl-Quelle um Dokumente zu scannen.

+
+ ) : ( +
+ {sources.map(s => ( +
+
+
+
{s.name}
+
{s.path}
+
+ Tiefe: {s.max_depth} | Formate: {(typeof s.file_extensions === 'string' ? JSON.parse(s.file_extensions) : s.file_extensions).join(', ')} +
+
+ {testResult[s.id] && ( + {testResult[s.id]} + )} + + + +
+ ))} +
+ )} +
+ ) +} + +// ============================================================================= +// TAB: CRAWL-JOBS +// ============================================================================= + +function JobsTab() { + const [jobs, setJobs] = useState([]) + const [sources, setSources] = useState([]) + const [selectedSource, setSelectedSource] = useState('') + const [jobType, setJobType] = useState<'full' | 'delta'>('full') + const [loading, setLoading] = useState(true) + + const loadData = useCallback(async () => { + setLoading(true) + try { + const [j, s] = await Promise.all([api('jobs'), api('sources')]) + setJobs(j || []) + setSources(s || []) + if (!selectedSource && s?.length > 0) setSelectedSource(s[0].id) + } catch { /* ignore */ } + setLoading(false) + }, [selectedSource]) + + useEffect(() => { loadData() }, [loadData]) + + // Auto-refresh running jobs + useEffect(() => { + const hasRunning = jobs.some(j => j.status === 'running' || j.status === 'pending') + if (!hasRunning) return + const interval = setInterval(loadData, 3000) + return () => clearInterval(interval) + }, [jobs, loadData]) + + const handleTrigger = async () => { + if (!selectedSource) return + await api('jobs', { + method: 'POST', + body: JSON.stringify({ source_id: selectedSource, job_type: jobType }), + }) + loadData() + } + + const handleCancel = async (id: string) => { + await api(`jobs/${id}/cancel`, { method: 'POST' }) + loadData() + } + + const statusColor = (s: string) => { + switch (s) { + case 'completed': return 'bg-green-100 text-green-700' + case 'running': return 'bg-blue-100 text-blue-700' + case 'pending': return 'bg-yellow-100 text-yellow-700' + case 'failed': return 'bg-red-100 text-red-700' + case 'cancelled': return 'bg-gray-100 text-gray-600' + default: return 'bg-gray-100 text-gray-700' + } + } + + return ( +
+ {/* Trigger form */} +
+

Neuen Crawl starten

+
+
+ + +
+
+ + +
+ +
+
+ + {/* Job list */} + {loading ? ( +
Laden...
+ ) : jobs.length === 0 ? ( +
+ Noch keine Crawl-Jobs ausgefuehrt. +
+ ) : ( +
+ {jobs.map(job => ( +
+
+
+ + {job.status} + + {job.source_name || 'Quelle'} + {job.job_type === 'delta' ? 'Delta' : 'Voll'} +
+
+ {(job.status === 'running' || job.status === 'pending') && ( + + )} + + {new Date(job.created_at).toLocaleString('de-DE')} + +
+
+ + {/* Progress */} + {job.status === 'running' && job.files_found > 0 && ( +
+
+
+
+
+ {job.files_processed} / {job.files_found} Dateien verarbeitet +
+
+ )} + + {/* Stats */} +
+
+
{job.files_found}
+
Gefunden
+
+
+
{job.files_processed}
+
Verarbeitet
+
+
+
{job.files_new}
+
Neu
+
+
+
{job.files_changed}
+
Geaendert
+
+
+
{job.files_skipped}
+
Uebersprungen
+
+
+
{job.files_error}
+
Fehler
+
+
+
+ ))} +
+ )} +
+ ) +} + +// ============================================================================= +// TAB: DOKUMENTE +// ============================================================================= + +function DocumentsTab() { + const [docs, setDocs] = useState([]) + const [total, setTotal] = useState(0) + const [loading, setLoading] = useState(true) + const [filterClass, setFilterClass] = useState('') + const [archiving, setArchiving] = useState>({}) + + const loadDocs = useCallback(async () => { + setLoading(true) + try { + const params = filterClass ? `?classification=${filterClass}` : '' + const data = await api(`documents${params}`) + setDocs(data?.documents || []) + setTotal(data?.total || 0) + } catch { /* ignore */ } + setLoading(false) + }, [filterClass]) + + useEffect(() => { loadDocs() }, [loadDocs]) + + const handleReclassify = async (docId: string, newClass: string) => { + await api(`documents/${docId}/classify`, { + method: 'PUT', + body: JSON.stringify({ classification: newClass }), + }) + loadDocs() + } + + const handleArchive = async (docId: string) => { + setArchiving(prev => ({ ...prev, [docId]: true })) + try { + await api(`documents/${docId}/archive`, { method: 'POST' }) + loadDocs() + } catch { /* ignore */ } + setArchiving(prev => ({ ...prev, [docId]: false })) + } + + const formatSize = (bytes: number) => { + if (bytes < 1024) return `${bytes} B` + if (bytes < 1024 * 1024) return `${(bytes / 1024).toFixed(1)} KB` + return `${(bytes / 1024 / 1024).toFixed(1)} MB` + } + + return ( +
+
+

{total} Dokumente

+ +
+ + {loading ? ( +
Laden...
+ ) : docs.length === 0 ? ( +
+ Keine Dokumente gefunden. Starten Sie zuerst einen Crawl-Job. +
+ ) : ( +
+ + + + + + + + + + + + + {docs.map(doc => { + const cls = CLASSIFICATION_LABELS[doc.classification || ''] || CLASSIFICATION_LABELS['Sonstiges'] + return ( + + + + + + + + + ) + })} + +
DateiKategorieKonfidenzGroesseArchivAktionen
+
{doc.file_name}
+
{doc.source_name}
+
+ + {doc.classification_corrected && ( + * + )} + + {doc.classification_confidence != null && ( +
+
+
+
+ + {(doc.classification_confidence * 100).toFixed(0)}% + +
+ )} +
{formatSize(doc.file_size_bytes)} + {doc.archived ? ( + IPFS + ) : ( + - + )} + + {!doc.archived && ( + + )} +
+
+ )} +
+ ) +} + +// ============================================================================= +// TAB: ONBOARDING-REPORT +// ============================================================================= + +function ReportTab() { + const [reports, setReports] = useState([]) + const [activeReport, setActiveReport] = useState(null) + const [loading, setLoading] = useState(true) + const [generating, setGenerating] = useState(false) + + const loadReports = useCallback(async () => { + setLoading(true) + try { + const data = await api('reports') + setReports(data || []) + if (data?.length > 0 && !activeReport) { + const detail = await api(`reports/${data[0].id}`) + setActiveReport(detail) + } + } catch { /* ignore */ } + setLoading(false) + }, [activeReport]) + + useEffect(() => { loadReports() }, [loadReports]) + + const handleGenerate = async () => { + setGenerating(true) + try { + const result = await api('reports/generate', { + method: 'POST', + body: JSON.stringify({}), + }) + setActiveReport(result) + loadReports() + } catch { /* ignore */ } + setGenerating(false) + } + + const handleSelectReport = async (id: string) => { + const detail = await api(`reports/${id}`) + setActiveReport(detail) + } + + // Compliance score ring + const ComplianceRing = ({ score }: { score: number }) => { + const radius = 50 + const circumference = 2 * Math.PI * radius + const offset = circumference - (score / 100) * circumference + const color = score >= 75 ? '#16a34a' : score >= 50 ? '#f59e0b' : '#dc2626' + + return ( +
+ + + + +
+ {score.toFixed(0)}% + Compliance +
+
+ ) + } + + return ( +
+
+

Onboarding-Report

+ +
+ + {/* Report selector */} + {reports.length > 1 && ( +
+ {reports.map(r => ( + + ))} +
+ )} + + {loading ? ( +
Laden...
+ ) : !activeReport ? ( +
+

Kein Report vorhanden

+

Fuehren Sie zuerst einen Crawl durch und generieren Sie dann einen Report.

+
+ ) : ( +
+ {/* Score + Stats */} +
+
+ +
+
+
{activeReport.total_documents_found}
+
Dokumente gefunden
+
+
+
+ {Object.keys(activeReport.classification_breakdown || {}).length} +
+
Kategorien abgedeckt
+
+
+
+ {(activeReport.gaps || []).length} +
+
Luecken identifiziert
+
+
+
+
+ + {/* Classification breakdown */} +
+

Dokumenten-Verteilung

+
+ {Object.entries(activeReport.classification_breakdown || {}).map(([cat, count]) => { + const cls = CLASSIFICATION_LABELS[cat] || CLASSIFICATION_LABELS['Sonstiges'] + return ( + + {cls.label}: {count as number} + + ) + })} + {Object.keys(activeReport.classification_breakdown || {}).length === 0 && ( + Keine Dokumente klassifiziert + )} +
+
+ + {/* Gap summary */} + {activeReport.gap_summary && ( +
+
+
{activeReport.gap_summary.critical}
+
Kritisch
+
+
+
{activeReport.gap_summary.high}
+
Hoch
+
+
+
{activeReport.gap_summary.medium}
+
Mittel
+
+
+ )} + + {/* Gap details */} + {(activeReport.gaps || []).length > 0 && ( +
+

Compliance-Luecken

+
+ {activeReport.gaps.map((gap) => ( +
+
+
+
{gap.category}
+

{gap.description}

+
+ + {gap.severity} + +
+
+ Regulierung: {gap.regulation} | Aktion: {gap.requiredAction} +
+
+ ))} +
+
+ )} +
+ )} +
+ ) +} + +// ============================================================================= +// MAIN PAGE +// ============================================================================= + +type Tab = 'sources' | 'jobs' | 'documents' | 'report' + +export default function DocumentCrawlerPage() { + const [activeTab, setActiveTab] = useState('sources') + + const tabs: { id: Tab; label: string }[] = [ + { id: 'sources', label: 'Quellen' }, + { id: 'jobs', label: 'Crawl-Jobs' }, + { id: 'documents', label: 'Dokumente' }, + { id: 'report', label: 'Onboarding-Report' }, + ] + + return ( +
+ {/* Header */} +
+

Document Crawler & Auto-Onboarding

+

+ Automatisches Scannen von Dateisystemen, KI-Klassifizierung, IPFS-Archivierung und Compliance Gap-Analyse. +

+
+ + {/* Tabs */} +
+ +
+ + {/* Tab content */} + {activeTab === 'sources' && } + {activeTab === 'jobs' && } + {activeTab === 'documents' && } + {activeTab === 'report' && } +
+ ) +} diff --git a/admin-v2/app/api/sdk/v1/crawler/[[...path]]/route.ts b/admin-v2/app/api/sdk/v1/crawler/[[...path]]/route.ts new file mode 100644 index 0000000..5512882 --- /dev/null +++ b/admin-v2/app/api/sdk/v1/crawler/[[...path]]/route.ts @@ -0,0 +1,114 @@ +/** + * Document Crawler API Proxy - Catch-all route + * Proxies all /api/sdk/v1/crawler/* requests to document-crawler service (port 8098) + */ + +import { NextRequest, NextResponse } from 'next/server' + +const CRAWLER_BACKEND_URL = process.env.CRAWLER_API_URL || 'http://document-crawler:8098' + +async function proxyRequest( + request: NextRequest, + pathSegments: string[] | undefined, + method: string +) { + const pathStr = pathSegments?.join('/') || '' + const searchParams = request.nextUrl.searchParams.toString() + const basePath = `${CRAWLER_BACKEND_URL}/api/v1/crawler` + const url = pathStr + ? `${basePath}/${pathStr}${searchParams ? `?${searchParams}` : ''}` + : `${basePath}${searchParams ? `?${searchParams}` : ''}` + + try { + const headers: HeadersInit = { + 'Content-Type': 'application/json', + } + + // Forward all relevant headers + const headerNames = ['authorization', 'x-tenant-id', 'x-user-id', 'x-namespace-id', 'x-tenant-slug'] + for (const name of headerNames) { + const value = request.headers.get(name) + if (value) { + headers[name] = value + } + } + + const fetchOptions: RequestInit = { + method, + headers, + signal: AbortSignal.timeout(30000), + } + + // Forward body for non-GET requests + if (method !== 'GET' && method !== 'DELETE') { + try { + const body = await request.json() + fetchOptions.body = JSON.stringify(body) + } catch { + // No body or non-JSON body + } + } + + const response = await fetch(url, fetchOptions) + + if (!response.ok) { + const errorText = await response.text() + let errorJson + try { + errorJson = JSON.parse(errorText) + } catch { + errorJson = { error: errorText } + } + return NextResponse.json( + { error: `Backend Error: ${response.status}`, ...errorJson }, + { status: response.status } + ) + } + + // Handle 204 No Content + if (response.status === 204) { + return new NextResponse(null, { status: 204 }) + } + + const data = await response.json() + return NextResponse.json(data) + } catch (error) { + console.error('Document Crawler API proxy error:', error) + return NextResponse.json( + { error: 'Verbindung zum Document Crawler Backend fehlgeschlagen' }, + { status: 503 } + ) + } +} + +export async function GET( + request: NextRequest, + { params }: { params: Promise<{ path?: string[] }> } +) { + const { path } = await params + return proxyRequest(request, path, 'GET') +} + +export async function POST( + request: NextRequest, + { params }: { params: Promise<{ path?: string[] }> } +) { + const { path } = await params + return proxyRequest(request, path, 'POST') +} + +export async function PUT( + request: NextRequest, + { params }: { params: Promise<{ path?: string[] }> } +) { + const { path } = await params + return proxyRequest(request, path, 'PUT') +} + +export async function DELETE( + request: NextRequest, + { params }: { params: Promise<{ path?: string[] }> } +) { + const { path } = await params + return proxyRequest(request, path, 'DELETE') +}