Services: Admin-Lehrer, Backend-Lehrer, Studio v2, Website, Klausur-Service, School-Service, Voice-Service, Geo-Service, BreakPilot Drive, Agent-Core Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
486 lines
18 KiB
TypeScript
486 lines
18 KiB
TypeScript
'use client'
|
|
|
|
import { useState, useEffect, useCallback } from 'react'
|
|
|
|
// Types
|
|
interface BundeslandStats {
|
|
bundesland: string
|
|
name: string
|
|
training_allowed: boolean
|
|
document_count: number
|
|
indexed_count: number
|
|
last_crawled: string | null
|
|
}
|
|
|
|
interface CrawlerStatus {
|
|
is_running: boolean
|
|
current_source: string | null
|
|
current_bundesland: string | null
|
|
queue_length: number
|
|
documents_crawled_today: number
|
|
documents_indexed_today: number
|
|
errors_today: number
|
|
last_activity: string | null
|
|
}
|
|
|
|
interface ZeugnisStats {
|
|
total_sources: number
|
|
total_documents: number
|
|
indexed_documents: number
|
|
training_allowed_documents: number
|
|
active_crawls: number
|
|
per_bundesland: BundeslandStats[]
|
|
}
|
|
|
|
interface Document {
|
|
id: string
|
|
title: string
|
|
url: string
|
|
bundesland: string
|
|
source_name: string
|
|
training_allowed: boolean
|
|
indexed_in_qdrant: boolean
|
|
created_at: string
|
|
}
|
|
|
|
// Status badge component
|
|
function StatusBadge({ allowed }: { allowed: boolean }) {
|
|
return (
|
|
<span className={`inline-flex items-center px-2.5 py-0.5 rounded-full text-xs font-medium ${
|
|
allowed
|
|
? 'bg-green-100 text-green-800 dark:bg-green-900 dark:text-green-200'
|
|
: 'bg-red-100 text-red-800 dark:bg-red-900 dark:text-red-200'
|
|
}`}>
|
|
{allowed ? 'Training erlaubt' : 'Kein Training'}
|
|
</span>
|
|
)
|
|
}
|
|
|
|
// Stats card component
|
|
function StatsCard({ title, value, subtitle, icon }: {
|
|
title: string
|
|
value: number | string
|
|
subtitle?: string
|
|
icon: string
|
|
}) {
|
|
return (
|
|
<div className="bg-white dark:bg-gray-800 rounded-lg shadow p-5 border border-gray-200 dark:border-gray-700">
|
|
<div className="flex items-center">
|
|
<div className="flex-shrink-0">
|
|
<span className="text-2xl">{icon}</span>
|
|
</div>
|
|
<div className="ml-4">
|
|
<p className="text-sm font-medium text-gray-500 dark:text-gray-400">{title}</p>
|
|
<p className="text-2xl font-semibold text-gray-900 dark:text-white">{value}</p>
|
|
{subtitle && (
|
|
<p className="text-xs text-gray-400 dark:text-gray-500">{subtitle}</p>
|
|
)}
|
|
</div>
|
|
</div>
|
|
</div>
|
|
)
|
|
}
|
|
|
|
export default function ZeugnisseCrawlerPage() {
|
|
const [stats, setStats] = useState<ZeugnisStats | null>(null)
|
|
const [bundeslandStats, setBundeslandStats] = useState<BundeslandStats[]>([])
|
|
const [crawlerStatus, setCrawlerStatus] = useState<CrawlerStatus | null>(null)
|
|
const [documents, setDocuments] = useState<Document[]>([])
|
|
const [loading, setLoading] = useState(true)
|
|
const [error, setError] = useState<string | null>(null)
|
|
const [selectedBundesland, setSelectedBundesland] = useState<string | null>(null)
|
|
const [isStarting, setIsStarting] = useState(false)
|
|
const [isStopping, setIsStopping] = useState(false)
|
|
|
|
// Fetch all data
|
|
const fetchData = useCallback(async () => {
|
|
try {
|
|
const [statsRes, bundeslandRes, statusRes, docsRes] = await Promise.all([
|
|
fetch('/api/admin/zeugnisse-crawler?action=stats'),
|
|
fetch('/api/admin/zeugnisse-crawler?action=bundesland-stats'),
|
|
fetch('/api/admin/zeugnisse-crawler?action=status'),
|
|
fetch(`/api/admin/zeugnisse-crawler?action=documents${selectedBundesland ? `&bundesland=${selectedBundesland}` : ''}`),
|
|
])
|
|
|
|
if (statsRes.ok) {
|
|
const data = await statsRes.json()
|
|
setStats(data)
|
|
}
|
|
|
|
if (bundeslandRes.ok) {
|
|
const data = await bundeslandRes.json()
|
|
setBundeslandStats(data)
|
|
}
|
|
|
|
if (statusRes.ok) {
|
|
const data = await statusRes.json()
|
|
setCrawlerStatus(data)
|
|
}
|
|
|
|
if (docsRes.ok) {
|
|
const data = await docsRes.json()
|
|
setDocuments(data)
|
|
}
|
|
|
|
setError(null)
|
|
} catch (err) {
|
|
setError('Fehler beim Laden der Daten')
|
|
console.error(err)
|
|
} finally {
|
|
setLoading(false)
|
|
}
|
|
}, [selectedBundesland])
|
|
|
|
// Initial load and polling
|
|
useEffect(() => {
|
|
fetchData()
|
|
const interval = setInterval(fetchData, 5000)
|
|
return () => clearInterval(interval)
|
|
}, [fetchData])
|
|
|
|
// Start crawler
|
|
const startCrawler = async (bundesland?: string) => {
|
|
setIsStarting(true)
|
|
try {
|
|
const res = await fetch('/api/admin/zeugnisse-crawler?action=start', {
|
|
method: 'POST',
|
|
headers: { 'Content-Type': 'application/json' },
|
|
body: JSON.stringify({ bundesland }),
|
|
})
|
|
if (!res.ok) {
|
|
const data = await res.json()
|
|
throw new Error(data.detail || 'Start fehlgeschlagen')
|
|
}
|
|
await fetchData()
|
|
} catch (err: any) {
|
|
setError(err.message)
|
|
} finally {
|
|
setIsStarting(false)
|
|
}
|
|
}
|
|
|
|
// Stop crawler
|
|
const stopCrawler = async () => {
|
|
setIsStopping(true)
|
|
try {
|
|
const res = await fetch('/api/admin/zeugnisse-crawler?action=stop', {
|
|
method: 'POST',
|
|
})
|
|
if (!res.ok) {
|
|
const data = await res.json()
|
|
throw new Error(data.detail || 'Stop fehlgeschlagen')
|
|
}
|
|
await fetchData()
|
|
} catch (err: any) {
|
|
setError(err.message)
|
|
} finally {
|
|
setIsStopping(false)
|
|
}
|
|
}
|
|
|
|
// Initialize sources
|
|
const initializeSources = async () => {
|
|
try {
|
|
const res = await fetch('/api/admin/zeugnisse-crawler?action=init', {
|
|
method: 'POST',
|
|
})
|
|
if (!res.ok) {
|
|
const data = await res.json()
|
|
throw new Error(data.detail || 'Initialisierung fehlgeschlagen')
|
|
}
|
|
await fetchData()
|
|
} catch (err: any) {
|
|
setError(err.message)
|
|
}
|
|
}
|
|
|
|
if (loading) {
|
|
return (
|
|
<div className="min-h-screen bg-gray-50 dark:bg-gray-900 p-8">
|
|
<div className="max-w-7xl mx-auto">
|
|
<div className="animate-pulse">
|
|
<div className="h-8 bg-gray-200 dark:bg-gray-700 rounded w-1/4 mb-6"></div>
|
|
<div className="grid grid-cols-4 gap-4 mb-6">
|
|
{[1, 2, 3, 4].map(i => (
|
|
<div key={i} className="h-24 bg-gray-200 dark:bg-gray-700 rounded"></div>
|
|
))}
|
|
</div>
|
|
</div>
|
|
</div>
|
|
</div>
|
|
)
|
|
}
|
|
|
|
return (
|
|
<div className="min-h-screen bg-gray-50 dark:bg-gray-900 p-8">
|
|
<div className="max-w-7xl mx-auto">
|
|
{/* Header */}
|
|
<div className="flex justify-between items-center mb-6">
|
|
<div>
|
|
<h1 className="text-2xl font-bold text-gray-900 dark:text-white">
|
|
Zeugnisse Rights-Aware Crawler
|
|
</h1>
|
|
<p className="text-gray-500 dark:text-gray-400">
|
|
Crawlt und indexiert Zeugnisverordnungen aller 16 Bundesländer
|
|
</p>
|
|
</div>
|
|
<div className="flex gap-2">
|
|
<button
|
|
onClick={initializeSources}
|
|
className="px-4 py-2 bg-gray-600 text-white rounded-lg hover:bg-gray-700 transition"
|
|
>
|
|
Quellen initialisieren
|
|
</button>
|
|
{crawlerStatus?.is_running ? (
|
|
<button
|
|
onClick={stopCrawler}
|
|
disabled={isStopping}
|
|
className="px-4 py-2 bg-red-600 text-white rounded-lg hover:bg-red-700 transition disabled:opacity-50"
|
|
>
|
|
{isStopping ? 'Stoppe...' : 'Crawler stoppen'}
|
|
</button>
|
|
) : (
|
|
<button
|
|
onClick={() => startCrawler()}
|
|
disabled={isStarting}
|
|
className="px-4 py-2 bg-green-600 text-white rounded-lg hover:bg-green-700 transition disabled:opacity-50"
|
|
>
|
|
{isStarting ? 'Starte...' : 'Crawler starten'}
|
|
</button>
|
|
)}
|
|
</div>
|
|
</div>
|
|
|
|
{/* Error message */}
|
|
{error && (
|
|
<div className="mb-6 p-4 bg-red-100 dark:bg-red-900 text-red-800 dark:text-red-200 rounded-lg">
|
|
{error}
|
|
<button onClick={() => setError(null)} className="ml-4 underline">Schließen</button>
|
|
</div>
|
|
)}
|
|
|
|
{/* Stats Cards */}
|
|
<div className="grid grid-cols-1 md:grid-cols-2 lg:grid-cols-4 gap-4 mb-6">
|
|
<StatsCard
|
|
title="Bundesländer"
|
|
value={stats?.total_sources || 16}
|
|
icon="🗺️"
|
|
/>
|
|
<StatsCard
|
|
title="Dokumente gesamt"
|
|
value={stats?.total_documents || 0}
|
|
subtitle={`${stats?.indexed_documents || 0} indexiert`}
|
|
icon="📄"
|
|
/>
|
|
<StatsCard
|
|
title="Training erlaubt"
|
|
value={stats?.training_allowed_documents || 0}
|
|
subtitle="Für KI-Training nutzbar"
|
|
icon="✅"
|
|
/>
|
|
<StatsCard
|
|
title="Heute gecrawlt"
|
|
value={crawlerStatus?.documents_crawled_today || 0}
|
|
subtitle={`${crawlerStatus?.errors_today || 0} Fehler`}
|
|
icon="🔄"
|
|
/>
|
|
</div>
|
|
|
|
{/* Crawler Status */}
|
|
{crawlerStatus?.is_running && (
|
|
<div className="mb-6 p-4 bg-blue-50 dark:bg-blue-900/20 border border-blue-200 dark:border-blue-800 rounded-lg">
|
|
<div className="flex items-center">
|
|
<div className="animate-spin rounded-full h-4 w-4 border-2 border-blue-600 border-t-transparent mr-3"></div>
|
|
<div>
|
|
<p className="font-medium text-blue-800 dark:text-blue-200">
|
|
Crawler läuft
|
|
</p>
|
|
{crawlerStatus.current_bundesland && (
|
|
<p className="text-sm text-blue-600 dark:text-blue-400">
|
|
Aktuell: {crawlerStatus.current_bundesland.toUpperCase()}
|
|
</p>
|
|
)}
|
|
</div>
|
|
</div>
|
|
</div>
|
|
)}
|
|
|
|
{/* Bundesland Overview */}
|
|
<div className="bg-white dark:bg-gray-800 rounded-lg shadow border border-gray-200 dark:border-gray-700 mb-6">
|
|
<div className="px-6 py-4 border-b border-gray-200 dark:border-gray-700">
|
|
<h2 className="text-lg font-semibold text-gray-900 dark:text-white">
|
|
Bundesländer-Übersicht
|
|
</h2>
|
|
</div>
|
|
<div className="overflow-x-auto">
|
|
<table className="min-w-full divide-y divide-gray-200 dark:divide-gray-700">
|
|
<thead className="bg-gray-50 dark:bg-gray-900">
|
|
<tr>
|
|
<th className="px-6 py-3 text-left text-xs font-medium text-gray-500 dark:text-gray-400 uppercase tracking-wider">
|
|
Bundesland
|
|
</th>
|
|
<th className="px-6 py-3 text-left text-xs font-medium text-gray-500 dark:text-gray-400 uppercase tracking-wider">
|
|
Training-Status
|
|
</th>
|
|
<th className="px-6 py-3 text-left text-xs font-medium text-gray-500 dark:text-gray-400 uppercase tracking-wider">
|
|
Dokumente
|
|
</th>
|
|
<th className="px-6 py-3 text-left text-xs font-medium text-gray-500 dark:text-gray-400 uppercase tracking-wider">
|
|
Indexiert
|
|
</th>
|
|
<th className="px-6 py-3 text-left text-xs font-medium text-gray-500 dark:text-gray-400 uppercase tracking-wider">
|
|
Letzter Crawl
|
|
</th>
|
|
<th className="px-6 py-3 text-right text-xs font-medium text-gray-500 dark:text-gray-400 uppercase tracking-wider">
|
|
Aktion
|
|
</th>
|
|
</tr>
|
|
</thead>
|
|
<tbody className="bg-white dark:bg-gray-800 divide-y divide-gray-200 dark:divide-gray-700">
|
|
{bundeslandStats.map((bl) => (
|
|
<tr key={bl.bundesland} className="hover:bg-gray-50 dark:hover:bg-gray-700">
|
|
<td className="px-6 py-4 whitespace-nowrap">
|
|
<div className="flex items-center">
|
|
<span className="font-medium text-gray-900 dark:text-white">
|
|
{bl.name}
|
|
</span>
|
|
<span className="ml-2 text-xs text-gray-500 dark:text-gray-400">
|
|
({bl.bundesland.toUpperCase()})
|
|
</span>
|
|
</div>
|
|
</td>
|
|
<td className="px-6 py-4 whitespace-nowrap">
|
|
<StatusBadge allowed={bl.training_allowed} />
|
|
</td>
|
|
<td className="px-6 py-4 whitespace-nowrap text-gray-900 dark:text-white">
|
|
{bl.document_count}
|
|
</td>
|
|
<td className="px-6 py-4 whitespace-nowrap text-gray-900 dark:text-white">
|
|
{bl.indexed_count}
|
|
</td>
|
|
<td className="px-6 py-4 whitespace-nowrap text-gray-500 dark:text-gray-400">
|
|
{bl.last_crawled
|
|
? new Date(bl.last_crawled).toLocaleDateString('de-DE')
|
|
: '-'
|
|
}
|
|
</td>
|
|
<td className="px-6 py-4 whitespace-nowrap text-right">
|
|
<button
|
|
onClick={() => {
|
|
setSelectedBundesland(bl.bundesland)
|
|
startCrawler(bl.bundesland)
|
|
}}
|
|
disabled={crawlerStatus?.is_running}
|
|
className="text-blue-600 hover:text-blue-800 dark:text-blue-400 dark:hover:text-blue-300 disabled:opacity-50 disabled:cursor-not-allowed"
|
|
>
|
|
Crawlen
|
|
</button>
|
|
</td>
|
|
</tr>
|
|
))}
|
|
</tbody>
|
|
</table>
|
|
</div>
|
|
</div>
|
|
|
|
{/* Documents List */}
|
|
<div className="bg-white dark:bg-gray-800 rounded-lg shadow border border-gray-200 dark:border-gray-700">
|
|
<div className="px-6 py-4 border-b border-gray-200 dark:border-gray-700 flex justify-between items-center">
|
|
<h2 className="text-lg font-semibold text-gray-900 dark:text-white">
|
|
Dokumente
|
|
</h2>
|
|
<select
|
|
value={selectedBundesland || ''}
|
|
onChange={(e) => setSelectedBundesland(e.target.value || null)}
|
|
className="px-3 py-1.5 border border-gray-300 dark:border-gray-600 rounded-lg bg-white dark:bg-gray-700 text-gray-900 dark:text-white"
|
|
>
|
|
<option value="">Alle Bundesländer</option>
|
|
{bundeslandStats.map((bl) => (
|
|
<option key={bl.bundesland} value={bl.bundesland}>
|
|
{bl.name}
|
|
</option>
|
|
))}
|
|
</select>
|
|
</div>
|
|
<div className="overflow-x-auto">
|
|
<table className="min-w-full divide-y divide-gray-200 dark:divide-gray-700">
|
|
<thead className="bg-gray-50 dark:bg-gray-900">
|
|
<tr>
|
|
<th className="px-6 py-3 text-left text-xs font-medium text-gray-500 dark:text-gray-400 uppercase tracking-wider">
|
|
Titel
|
|
</th>
|
|
<th className="px-6 py-3 text-left text-xs font-medium text-gray-500 dark:text-gray-400 uppercase tracking-wider">
|
|
Bundesland
|
|
</th>
|
|
<th className="px-6 py-3 text-left text-xs font-medium text-gray-500 dark:text-gray-400 uppercase tracking-wider">
|
|
Status
|
|
</th>
|
|
<th className="px-6 py-3 text-left text-xs font-medium text-gray-500 dark:text-gray-400 uppercase tracking-wider">
|
|
Erstellt
|
|
</th>
|
|
</tr>
|
|
</thead>
|
|
<tbody className="bg-white dark:bg-gray-800 divide-y divide-gray-200 dark:divide-gray-700">
|
|
{documents.length === 0 ? (
|
|
<tr>
|
|
<td colSpan={4} className="px-6 py-8 text-center text-gray-500 dark:text-gray-400">
|
|
Keine Dokumente vorhanden. Starten Sie den Crawler, um Dokumente zu sammeln.
|
|
</td>
|
|
</tr>
|
|
) : (
|
|
documents.map((doc) => (
|
|
<tr key={doc.id} className="hover:bg-gray-50 dark:hover:bg-gray-700">
|
|
<td className="px-6 py-4">
|
|
<div className="flex items-center">
|
|
<a
|
|
href={doc.url}
|
|
target="_blank"
|
|
rel="noopener noreferrer"
|
|
className="text-blue-600 hover:text-blue-800 dark:text-blue-400 font-medium"
|
|
>
|
|
{doc.title || 'Unbenannt'}
|
|
</a>
|
|
</div>
|
|
</td>
|
|
<td className="px-6 py-4 whitespace-nowrap">
|
|
<span className="text-gray-900 dark:text-white">
|
|
{doc.bundesland?.toUpperCase()}
|
|
</span>
|
|
</td>
|
|
<td className="px-6 py-4 whitespace-nowrap">
|
|
<div className="flex gap-2">
|
|
<StatusBadge allowed={doc.training_allowed} />
|
|
{doc.indexed_in_qdrant && (
|
|
<span className="inline-flex items-center px-2.5 py-0.5 rounded-full text-xs font-medium bg-blue-100 text-blue-800 dark:bg-blue-900 dark:text-blue-200">
|
|
Indexiert
|
|
</span>
|
|
)}
|
|
</div>
|
|
</td>
|
|
<td className="px-6 py-4 whitespace-nowrap text-gray-500 dark:text-gray-400">
|
|
{new Date(doc.created_at).toLocaleDateString('de-DE')}
|
|
</td>
|
|
</tr>
|
|
))
|
|
)}
|
|
</tbody>
|
|
</table>
|
|
</div>
|
|
</div>
|
|
|
|
{/* Rights Legend */}
|
|
<div className="mt-6 p-4 bg-gray-100 dark:bg-gray-800 rounded-lg">
|
|
<h3 className="font-medium text-gray-900 dark:text-white mb-2">
|
|
Hinweis zu Training-Berechtigungen
|
|
</h3>
|
|
<p className="text-sm text-gray-600 dark:text-gray-400">
|
|
Dokumente mit <StatusBadge allowed={true} /> sind als amtliche Werke nach §5 UrhG
|
|
gemeinfrei und können für das KI-Training verwendet werden. Dokumente ohne explizite
|
|
Lizenzierung werden <span className="font-medium">nicht</span> für das Training verwendet.
|
|
</p>
|
|
</div>
|
|
</div>
|
|
</div>
|
|
)
|
|
}
|