'use client' import { useState, useEffect, useCallback } from 'react' // ============================================================================= // TYPES // ============================================================================= interface CrawlSource { id: string name: string source_type: string path: string file_extensions: string[] max_depth: number exclude_patterns: string[] enabled: boolean created_at: string } interface CrawlJob { id: string source_id: string source_name?: string status: 'pending' | 'running' | 'completed' | 'failed' | 'cancelled' job_type: 'full' | 'delta' files_found: number files_processed: number files_new: number files_changed: number files_skipped: number files_error: number error_message?: string started_at?: string completed_at?: string created_at: string } interface CrawlDocument { id: string file_name: string file_extension: string file_size_bytes: number classification: string | null classification_confidence: number | null classification_corrected: boolean extraction_status: string archived: boolean ipfs_cid: string | null first_seen_at: string last_seen_at: string version_count: number source_name?: string } interface OnboardingReport { id: string total_documents_found: number classification_breakdown: Record gaps: GapItem[] compliance_score: number gap_summary?: { critical: number; high: number; medium: number } created_at: string } interface GapItem { id: string category: string description: string severity: 'CRITICAL' | 'HIGH' | 'MEDIUM' regulation: string requiredAction: string } // ============================================================================= // API HELPERS // ============================================================================= const TENANT_ID = '00000000-0000-0000-0000-000000000001' // Default tenant async function api(path: string, options: RequestInit = {}) { const res = await fetch(`/api/sdk/v1/crawler/${path}`, { ...options, headers: { 'Content-Type': 'application/json', 'X-Tenant-ID': TENANT_ID, ...options.headers, }, }) if (res.status === 204) return null return res.json() } // ============================================================================= // CLASSIFICATION LABELS // ============================================================================= const CLASSIFICATION_LABELS: Record = { VVT: { label: 'VVT', color: 'bg-blue-100 text-blue-700' }, TOM: { label: 'TOM', color: 'bg-green-100 text-green-700' }, DSE: { label: 'DSE', color: 'bg-purple-100 text-purple-700' }, AVV: { label: 'AVV', color: 'bg-orange-100 text-orange-700' }, DSFA: { label: 'DSFA', color: 'bg-red-100 text-red-700' }, Loeschkonzept: { label: 'Loeschkonzept', color: 'bg-yellow-100 text-yellow-700' }, Einwilligung: { label: 'Einwilligung', color: 'bg-pink-100 text-pink-700' }, Vertrag: { label: 'Vertrag', color: 'bg-indigo-100 text-indigo-700' }, Richtlinie: { label: 'Richtlinie', color: 'bg-teal-100 text-teal-700' }, Schulungsnachweis: { label: 'Schulung', color: 'bg-cyan-100 text-cyan-700' }, Sonstiges: { label: 'Sonstiges', color: 'bg-gray-100 text-gray-700' }, } const ALL_CLASSIFICATIONS = Object.keys(CLASSIFICATION_LABELS) // ============================================================================= // TAB: QUELLEN (Sources) // ============================================================================= function SourcesTab() { const [sources, setSources] = useState([]) const [loading, setLoading] = useState(true) const [showForm, setShowForm] = useState(false) const [formName, setFormName] = useState('') const [formPath, setFormPath] = useState('') const [testResult, setTestResult] = useState>({}) const loadSources = useCallback(async () => { setLoading(true) try { const data = await api('sources') setSources(data || []) } catch { /* ignore */ } setLoading(false) }, []) useEffect(() => { loadSources() }, [loadSources]) const handleCreate = async () => { if (!formName || !formPath) return await api('sources', { method: 'POST', body: JSON.stringify({ name: formName, path: formPath }), }) setFormName('') setFormPath('') setShowForm(false) loadSources() } const handleDelete = async (id: string) => { await api(`sources/${id}`, { method: 'DELETE' }) loadSources() } const handleToggle = async (source: CrawlSource) => { await api(`sources/${source.id}`, { method: 'PUT', body: JSON.stringify({ enabled: !source.enabled }), }) loadSources() } const handleTest = async (id: string) => { setTestResult(prev => ({ ...prev, [id]: 'testing...' })) const result = await api(`sources/${id}/test`, { method: 'POST' }) setTestResult(prev => ({ ...prev, [id]: result?.message || 'Fehler' })) } return (

Crawl-Quellen

{showForm && (
setFormName(e.target.value)} placeholder="z.B. Compliance-Ordner" className="w-full px-3 py-2 border border-gray-300 rounded-lg text-sm focus:ring-2 focus:ring-purple-500" />
setFormPath(e.target.value)} placeholder="z.B. compliance-docs" className="w-full px-3 py-2 border border-gray-300 rounded-lg text-sm focus:ring-2 focus:ring-purple-500" />
)} {loading ? (
Laden...
) : sources.length === 0 ? (

Keine Quellen konfiguriert

Erstellen Sie eine Crawl-Quelle um Dokumente zu scannen.

) : (
{sources.map(s => (
{s.name}
{s.path}
Tiefe: {s.max_depth} | Formate: {(typeof s.file_extensions === 'string' ? JSON.parse(s.file_extensions) : s.file_extensions).join(', ')}
{testResult[s.id] && ( {testResult[s.id]} )}
))}
)}
) } // ============================================================================= // TAB: CRAWL-JOBS // ============================================================================= function JobsTab() { const [jobs, setJobs] = useState([]) const [sources, setSources] = useState([]) const [selectedSource, setSelectedSource] = useState('') const [jobType, setJobType] = useState<'full' | 'delta'>('full') const [loading, setLoading] = useState(true) const loadData = useCallback(async () => { setLoading(true) try { const [j, s] = await Promise.all([api('jobs'), api('sources')]) setJobs(j || []) setSources(s || []) if (!selectedSource && s?.length > 0) setSelectedSource(s[0].id) } catch { /* ignore */ } setLoading(false) }, [selectedSource]) useEffect(() => { loadData() }, [loadData]) // Auto-refresh running jobs useEffect(() => { const hasRunning = jobs.some(j => j.status === 'running' || j.status === 'pending') if (!hasRunning) return const interval = setInterval(loadData, 3000) return () => clearInterval(interval) }, [jobs, loadData]) const handleTrigger = async () => { if (!selectedSource) return await api('jobs', { method: 'POST', body: JSON.stringify({ source_id: selectedSource, job_type: jobType }), }) loadData() } const handleCancel = async (id: string) => { await api(`jobs/${id}/cancel`, { method: 'POST' }) loadData() } const statusColor = (s: string) => { switch (s) { case 'completed': return 'bg-green-100 text-green-700' case 'running': return 'bg-blue-100 text-blue-700' case 'pending': return 'bg-yellow-100 text-yellow-700' case 'failed': return 'bg-red-100 text-red-700' case 'cancelled': return 'bg-gray-100 text-gray-600' default: return 'bg-gray-100 text-gray-700' } } return (
{/* Trigger form */}

Neuen Crawl starten

{/* Job list */} {loading ? (
Laden...
) : jobs.length === 0 ? (
Noch keine Crawl-Jobs ausgefuehrt.
) : (
{jobs.map(job => (
{job.status} {job.source_name || 'Quelle'} {job.job_type === 'delta' ? 'Delta' : 'Voll'}
{(job.status === 'running' || job.status === 'pending') && ( )} {new Date(job.created_at).toLocaleString('de-DE')}
{/* Progress */} {job.status === 'running' && job.files_found > 0 && (
{job.files_processed} / {job.files_found} Dateien verarbeitet
)} {/* Stats */}
{job.files_found}
Gefunden
{job.files_processed}
Verarbeitet
{job.files_new}
Neu
{job.files_changed}
Geaendert
{job.files_skipped}
Uebersprungen
{job.files_error}
Fehler
))}
)}
) } // ============================================================================= // TAB: DOKUMENTE // ============================================================================= function DocumentsTab() { const [docs, setDocs] = useState([]) const [total, setTotal] = useState(0) const [loading, setLoading] = useState(true) const [filterClass, setFilterClass] = useState('') const [archiving, setArchiving] = useState>({}) const loadDocs = useCallback(async () => { setLoading(true) try { const params = filterClass ? `?classification=${filterClass}` : '' const data = await api(`documents${params}`) setDocs(data?.documents || []) setTotal(data?.total || 0) } catch { /* ignore */ } setLoading(false) }, [filterClass]) useEffect(() => { loadDocs() }, [loadDocs]) const handleReclassify = async (docId: string, newClass: string) => { await api(`documents/${docId}/classify`, { method: 'PUT', body: JSON.stringify({ classification: newClass }), }) loadDocs() } const handleArchive = async (docId: string) => { setArchiving(prev => ({ ...prev, [docId]: true })) try { await api(`documents/${docId}/archive`, { method: 'POST' }) loadDocs() } catch { /* ignore */ } setArchiving(prev => ({ ...prev, [docId]: false })) } const formatSize = (bytes: number) => { if (bytes < 1024) return `${bytes} B` if (bytes < 1024 * 1024) return `${(bytes / 1024).toFixed(1)} KB` return `${(bytes / 1024 / 1024).toFixed(1)} MB` } return (

{total} Dokumente

{loading ? (
Laden...
) : docs.length === 0 ? (
Keine Dokumente gefunden. Starten Sie zuerst einen Crawl-Job.
) : (
{docs.map(doc => { const cls = CLASSIFICATION_LABELS[doc.classification || ''] || CLASSIFICATION_LABELS['Sonstiges'] return ( ) })}
Datei Kategorie Konfidenz Groesse Archiv Aktionen
{doc.file_name}
{doc.source_name}
{doc.classification_corrected && ( * )} {doc.classification_confidence != null && (
{(doc.classification_confidence * 100).toFixed(0)}%
)}
{formatSize(doc.file_size_bytes)} {doc.archived ? ( IPFS ) : ( - )} {!doc.archived && ( )}
)}
) } // ============================================================================= // TAB: ONBOARDING-REPORT // ============================================================================= function ReportTab() { const [reports, setReports] = useState([]) const [activeReport, setActiveReport] = useState(null) const [loading, setLoading] = useState(true) const [generating, setGenerating] = useState(false) const loadReports = useCallback(async () => { setLoading(true) try { const data = await api('reports') setReports(data || []) if (data?.length > 0 && !activeReport) { const detail = await api(`reports/${data[0].id}`) setActiveReport(detail) } } catch { /* ignore */ } setLoading(false) }, [activeReport]) useEffect(() => { loadReports() }, [loadReports]) const handleGenerate = async () => { setGenerating(true) try { const result = await api('reports/generate', { method: 'POST', body: JSON.stringify({}), }) setActiveReport(result) loadReports() } catch { /* ignore */ } setGenerating(false) } const handleSelectReport = async (id: string) => { const detail = await api(`reports/${id}`) setActiveReport(detail) } // Compliance score ring const ComplianceRing = ({ score }: { score: number }) => { const radius = 50 const circumference = 2 * Math.PI * radius const offset = circumference - (score / 100) * circumference const color = score >= 75 ? '#16a34a' : score >= 50 ? '#f59e0b' : '#dc2626' return (
{score.toFixed(0)}% Compliance
) } return (

Onboarding-Report

{/* Report selector */} {reports.length > 1 && (
{reports.map(r => ( ))}
)} {loading ? (
Laden...
) : !activeReport ? (

Kein Report vorhanden

Fuehren Sie zuerst einen Crawl durch und generieren Sie dann einen Report.

) : (
{/* Score + Stats */}
{activeReport.total_documents_found}
Dokumente gefunden
{Object.keys(activeReport.classification_breakdown || {}).length}
Kategorien abgedeckt
{(activeReport.gaps || []).length}
Luecken identifiziert
{/* Classification breakdown */}

Dokumenten-Verteilung

{Object.entries(activeReport.classification_breakdown || {}).map(([cat, count]) => { const cls = CLASSIFICATION_LABELS[cat] || CLASSIFICATION_LABELS['Sonstiges'] return ( {cls.label}: {count as number} ) })} {Object.keys(activeReport.classification_breakdown || {}).length === 0 && ( Keine Dokumente klassifiziert )}
{/* Gap summary */} {activeReport.gap_summary && (
{activeReport.gap_summary.critical}
Kritisch
{activeReport.gap_summary.high}
Hoch
{activeReport.gap_summary.medium}
Mittel
)} {/* Gap details */} {(activeReport.gaps || []).length > 0 && (

Compliance-Luecken

{activeReport.gaps.map((gap) => (
{gap.category}

{gap.description}

{gap.severity}
Regulierung: {gap.regulation} | Aktion: {gap.requiredAction}
))}
)}
)}
) } // ============================================================================= // MAIN PAGE // ============================================================================= type Tab = 'sources' | 'jobs' | 'documents' | 'report' export default function DocumentCrawlerPage() { const [activeTab, setActiveTab] = useState('sources') const tabs: { id: Tab; label: string }[] = [ { id: 'sources', label: 'Quellen' }, { id: 'jobs', label: 'Crawl-Jobs' }, { id: 'documents', label: 'Dokumente' }, { id: 'report', label: 'Onboarding-Report' }, ] return (
{/* Header */}

Document Crawler & Auto-Onboarding

Automatisches Scannen von Dateisystemen, KI-Klassifizierung, IPFS-Archivierung und Compliance Gap-Analyse.

{/* Tabs */}
{/* Tab content */} {activeTab === 'sources' && } {activeTab === 'jobs' && } {activeTab === 'documents' && } {activeTab === 'report' && }
) }