Files
breakpilot-lehrer/website/app/admin/zeugnisse-crawler/page.tsx
Benjamin Boenisch 5a31f52310 Initial commit: breakpilot-lehrer - Lehrer KI Platform
Services: Admin-Lehrer, Backend-Lehrer, Studio v2, Website,
Klausur-Service, School-Service, Voice-Service, Geo-Service,
BreakPilot Drive, Agent-Core

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-11 23:47:26 +01:00

486 lines
18 KiB
TypeScript

'use client'
import { useState, useEffect, useCallback } from 'react'
// Types
interface BundeslandStats {
bundesland: string
name: string
training_allowed: boolean
document_count: number
indexed_count: number
last_crawled: string | null
}
interface CrawlerStatus {
is_running: boolean
current_source: string | null
current_bundesland: string | null
queue_length: number
documents_crawled_today: number
documents_indexed_today: number
errors_today: number
last_activity: string | null
}
interface ZeugnisStats {
total_sources: number
total_documents: number
indexed_documents: number
training_allowed_documents: number
active_crawls: number
per_bundesland: BundeslandStats[]
}
interface Document {
id: string
title: string
url: string
bundesland: string
source_name: string
training_allowed: boolean
indexed_in_qdrant: boolean
created_at: string
}
// Status badge component
function StatusBadge({ allowed }: { allowed: boolean }) {
return (
<span className={`inline-flex items-center px-2.5 py-0.5 rounded-full text-xs font-medium ${
allowed
? 'bg-green-100 text-green-800 dark:bg-green-900 dark:text-green-200'
: 'bg-red-100 text-red-800 dark:bg-red-900 dark:text-red-200'
}`}>
{allowed ? 'Training erlaubt' : 'Kein Training'}
</span>
)
}
// Stats card component
function StatsCard({ title, value, subtitle, icon }: {
title: string
value: number | string
subtitle?: string
icon: string
}) {
return (
<div className="bg-white dark:bg-gray-800 rounded-lg shadow p-5 border border-gray-200 dark:border-gray-700">
<div className="flex items-center">
<div className="flex-shrink-0">
<span className="text-2xl">{icon}</span>
</div>
<div className="ml-4">
<p className="text-sm font-medium text-gray-500 dark:text-gray-400">{title}</p>
<p className="text-2xl font-semibold text-gray-900 dark:text-white">{value}</p>
{subtitle && (
<p className="text-xs text-gray-400 dark:text-gray-500">{subtitle}</p>
)}
</div>
</div>
</div>
)
}
export default function ZeugnisseCrawlerPage() {
const [stats, setStats] = useState<ZeugnisStats | null>(null)
const [bundeslandStats, setBundeslandStats] = useState<BundeslandStats[]>([])
const [crawlerStatus, setCrawlerStatus] = useState<CrawlerStatus | null>(null)
const [documents, setDocuments] = useState<Document[]>([])
const [loading, setLoading] = useState(true)
const [error, setError] = useState<string | null>(null)
const [selectedBundesland, setSelectedBundesland] = useState<string | null>(null)
const [isStarting, setIsStarting] = useState(false)
const [isStopping, setIsStopping] = useState(false)
// Fetch all data
const fetchData = useCallback(async () => {
try {
const [statsRes, bundeslandRes, statusRes, docsRes] = await Promise.all([
fetch('/api/admin/zeugnisse-crawler?action=stats'),
fetch('/api/admin/zeugnisse-crawler?action=bundesland-stats'),
fetch('/api/admin/zeugnisse-crawler?action=status'),
fetch(`/api/admin/zeugnisse-crawler?action=documents${selectedBundesland ? `&bundesland=${selectedBundesland}` : ''}`),
])
if (statsRes.ok) {
const data = await statsRes.json()
setStats(data)
}
if (bundeslandRes.ok) {
const data = await bundeslandRes.json()
setBundeslandStats(data)
}
if (statusRes.ok) {
const data = await statusRes.json()
setCrawlerStatus(data)
}
if (docsRes.ok) {
const data = await docsRes.json()
setDocuments(data)
}
setError(null)
} catch (err) {
setError('Fehler beim Laden der Daten')
console.error(err)
} finally {
setLoading(false)
}
}, [selectedBundesland])
// Initial load and polling
useEffect(() => {
fetchData()
const interval = setInterval(fetchData, 5000)
return () => clearInterval(interval)
}, [fetchData])
// Start crawler
const startCrawler = async (bundesland?: string) => {
setIsStarting(true)
try {
const res = await fetch('/api/admin/zeugnisse-crawler?action=start', {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({ bundesland }),
})
if (!res.ok) {
const data = await res.json()
throw new Error(data.detail || 'Start fehlgeschlagen')
}
await fetchData()
} catch (err: any) {
setError(err.message)
} finally {
setIsStarting(false)
}
}
// Stop crawler
const stopCrawler = async () => {
setIsStopping(true)
try {
const res = await fetch('/api/admin/zeugnisse-crawler?action=stop', {
method: 'POST',
})
if (!res.ok) {
const data = await res.json()
throw new Error(data.detail || 'Stop fehlgeschlagen')
}
await fetchData()
} catch (err: any) {
setError(err.message)
} finally {
setIsStopping(false)
}
}
// Initialize sources
const initializeSources = async () => {
try {
const res = await fetch('/api/admin/zeugnisse-crawler?action=init', {
method: 'POST',
})
if (!res.ok) {
const data = await res.json()
throw new Error(data.detail || 'Initialisierung fehlgeschlagen')
}
await fetchData()
} catch (err: any) {
setError(err.message)
}
}
if (loading) {
return (
<div className="min-h-screen bg-gray-50 dark:bg-gray-900 p-8">
<div className="max-w-7xl mx-auto">
<div className="animate-pulse">
<div className="h-8 bg-gray-200 dark:bg-gray-700 rounded w-1/4 mb-6"></div>
<div className="grid grid-cols-4 gap-4 mb-6">
{[1, 2, 3, 4].map(i => (
<div key={i} className="h-24 bg-gray-200 dark:bg-gray-700 rounded"></div>
))}
</div>
</div>
</div>
</div>
)
}
return (
<div className="min-h-screen bg-gray-50 dark:bg-gray-900 p-8">
<div className="max-w-7xl mx-auto">
{/* Header */}
<div className="flex justify-between items-center mb-6">
<div>
<h1 className="text-2xl font-bold text-gray-900 dark:text-white">
Zeugnisse Rights-Aware Crawler
</h1>
<p className="text-gray-500 dark:text-gray-400">
Crawlt und indexiert Zeugnisverordnungen aller 16 Bundesländer
</p>
</div>
<div className="flex gap-2">
<button
onClick={initializeSources}
className="px-4 py-2 bg-gray-600 text-white rounded-lg hover:bg-gray-700 transition"
>
Quellen initialisieren
</button>
{crawlerStatus?.is_running ? (
<button
onClick={stopCrawler}
disabled={isStopping}
className="px-4 py-2 bg-red-600 text-white rounded-lg hover:bg-red-700 transition disabled:opacity-50"
>
{isStopping ? 'Stoppe...' : 'Crawler stoppen'}
</button>
) : (
<button
onClick={() => startCrawler()}
disabled={isStarting}
className="px-4 py-2 bg-green-600 text-white rounded-lg hover:bg-green-700 transition disabled:opacity-50"
>
{isStarting ? 'Starte...' : 'Crawler starten'}
</button>
)}
</div>
</div>
{/* Error message */}
{error && (
<div className="mb-6 p-4 bg-red-100 dark:bg-red-900 text-red-800 dark:text-red-200 rounded-lg">
{error}
<button onClick={() => setError(null)} className="ml-4 underline">Schließen</button>
</div>
)}
{/* Stats Cards */}
<div className="grid grid-cols-1 md:grid-cols-2 lg:grid-cols-4 gap-4 mb-6">
<StatsCard
title="Bundesländer"
value={stats?.total_sources || 16}
icon="🗺️"
/>
<StatsCard
title="Dokumente gesamt"
value={stats?.total_documents || 0}
subtitle={`${stats?.indexed_documents || 0} indexiert`}
icon="📄"
/>
<StatsCard
title="Training erlaubt"
value={stats?.training_allowed_documents || 0}
subtitle="Für KI-Training nutzbar"
icon="✅"
/>
<StatsCard
title="Heute gecrawlt"
value={crawlerStatus?.documents_crawled_today || 0}
subtitle={`${crawlerStatus?.errors_today || 0} Fehler`}
icon="🔄"
/>
</div>
{/* Crawler Status */}
{crawlerStatus?.is_running && (
<div className="mb-6 p-4 bg-blue-50 dark:bg-blue-900/20 border border-blue-200 dark:border-blue-800 rounded-lg">
<div className="flex items-center">
<div className="animate-spin rounded-full h-4 w-4 border-2 border-blue-600 border-t-transparent mr-3"></div>
<div>
<p className="font-medium text-blue-800 dark:text-blue-200">
Crawler läuft
</p>
{crawlerStatus.current_bundesland && (
<p className="text-sm text-blue-600 dark:text-blue-400">
Aktuell: {crawlerStatus.current_bundesland.toUpperCase()}
</p>
)}
</div>
</div>
</div>
)}
{/* Bundesland Overview */}
<div className="bg-white dark:bg-gray-800 rounded-lg shadow border border-gray-200 dark:border-gray-700 mb-6">
<div className="px-6 py-4 border-b border-gray-200 dark:border-gray-700">
<h2 className="text-lg font-semibold text-gray-900 dark:text-white">
Bundesländer-Übersicht
</h2>
</div>
<div className="overflow-x-auto">
<table className="min-w-full divide-y divide-gray-200 dark:divide-gray-700">
<thead className="bg-gray-50 dark:bg-gray-900">
<tr>
<th className="px-6 py-3 text-left text-xs font-medium text-gray-500 dark:text-gray-400 uppercase tracking-wider">
Bundesland
</th>
<th className="px-6 py-3 text-left text-xs font-medium text-gray-500 dark:text-gray-400 uppercase tracking-wider">
Training-Status
</th>
<th className="px-6 py-3 text-left text-xs font-medium text-gray-500 dark:text-gray-400 uppercase tracking-wider">
Dokumente
</th>
<th className="px-6 py-3 text-left text-xs font-medium text-gray-500 dark:text-gray-400 uppercase tracking-wider">
Indexiert
</th>
<th className="px-6 py-3 text-left text-xs font-medium text-gray-500 dark:text-gray-400 uppercase tracking-wider">
Letzter Crawl
</th>
<th className="px-6 py-3 text-right text-xs font-medium text-gray-500 dark:text-gray-400 uppercase tracking-wider">
Aktion
</th>
</tr>
</thead>
<tbody className="bg-white dark:bg-gray-800 divide-y divide-gray-200 dark:divide-gray-700">
{bundeslandStats.map((bl) => (
<tr key={bl.bundesland} className="hover:bg-gray-50 dark:hover:bg-gray-700">
<td className="px-6 py-4 whitespace-nowrap">
<div className="flex items-center">
<span className="font-medium text-gray-900 dark:text-white">
{bl.name}
</span>
<span className="ml-2 text-xs text-gray-500 dark:text-gray-400">
({bl.bundesland.toUpperCase()})
</span>
</div>
</td>
<td className="px-6 py-4 whitespace-nowrap">
<StatusBadge allowed={bl.training_allowed} />
</td>
<td className="px-6 py-4 whitespace-nowrap text-gray-900 dark:text-white">
{bl.document_count}
</td>
<td className="px-6 py-4 whitespace-nowrap text-gray-900 dark:text-white">
{bl.indexed_count}
</td>
<td className="px-6 py-4 whitespace-nowrap text-gray-500 dark:text-gray-400">
{bl.last_crawled
? new Date(bl.last_crawled).toLocaleDateString('de-DE')
: '-'
}
</td>
<td className="px-6 py-4 whitespace-nowrap text-right">
<button
onClick={() => {
setSelectedBundesland(bl.bundesland)
startCrawler(bl.bundesland)
}}
disabled={crawlerStatus?.is_running}
className="text-blue-600 hover:text-blue-800 dark:text-blue-400 dark:hover:text-blue-300 disabled:opacity-50 disabled:cursor-not-allowed"
>
Crawlen
</button>
</td>
</tr>
))}
</tbody>
</table>
</div>
</div>
{/* Documents List */}
<div className="bg-white dark:bg-gray-800 rounded-lg shadow border border-gray-200 dark:border-gray-700">
<div className="px-6 py-4 border-b border-gray-200 dark:border-gray-700 flex justify-between items-center">
<h2 className="text-lg font-semibold text-gray-900 dark:text-white">
Dokumente
</h2>
<select
value={selectedBundesland || ''}
onChange={(e) => setSelectedBundesland(e.target.value || null)}
className="px-3 py-1.5 border border-gray-300 dark:border-gray-600 rounded-lg bg-white dark:bg-gray-700 text-gray-900 dark:text-white"
>
<option value="">Alle Bundesländer</option>
{bundeslandStats.map((bl) => (
<option key={bl.bundesland} value={bl.bundesland}>
{bl.name}
</option>
))}
</select>
</div>
<div className="overflow-x-auto">
<table className="min-w-full divide-y divide-gray-200 dark:divide-gray-700">
<thead className="bg-gray-50 dark:bg-gray-900">
<tr>
<th className="px-6 py-3 text-left text-xs font-medium text-gray-500 dark:text-gray-400 uppercase tracking-wider">
Titel
</th>
<th className="px-6 py-3 text-left text-xs font-medium text-gray-500 dark:text-gray-400 uppercase tracking-wider">
Bundesland
</th>
<th className="px-6 py-3 text-left text-xs font-medium text-gray-500 dark:text-gray-400 uppercase tracking-wider">
Status
</th>
<th className="px-6 py-3 text-left text-xs font-medium text-gray-500 dark:text-gray-400 uppercase tracking-wider">
Erstellt
</th>
</tr>
</thead>
<tbody className="bg-white dark:bg-gray-800 divide-y divide-gray-200 dark:divide-gray-700">
{documents.length === 0 ? (
<tr>
<td colSpan={4} className="px-6 py-8 text-center text-gray-500 dark:text-gray-400">
Keine Dokumente vorhanden. Starten Sie den Crawler, um Dokumente zu sammeln.
</td>
</tr>
) : (
documents.map((doc) => (
<tr key={doc.id} className="hover:bg-gray-50 dark:hover:bg-gray-700">
<td className="px-6 py-4">
<div className="flex items-center">
<a
href={doc.url}
target="_blank"
rel="noopener noreferrer"
className="text-blue-600 hover:text-blue-800 dark:text-blue-400 font-medium"
>
{doc.title || 'Unbenannt'}
</a>
</div>
</td>
<td className="px-6 py-4 whitespace-nowrap">
<span className="text-gray-900 dark:text-white">
{doc.bundesland?.toUpperCase()}
</span>
</td>
<td className="px-6 py-4 whitespace-nowrap">
<div className="flex gap-2">
<StatusBadge allowed={doc.training_allowed} />
{doc.indexed_in_qdrant && (
<span className="inline-flex items-center px-2.5 py-0.5 rounded-full text-xs font-medium bg-blue-100 text-blue-800 dark:bg-blue-900 dark:text-blue-200">
Indexiert
</span>
)}
</div>
</td>
<td className="px-6 py-4 whitespace-nowrap text-gray-500 dark:text-gray-400">
{new Date(doc.created_at).toLocaleDateString('de-DE')}
</td>
</tr>
))
)}
</tbody>
</table>
</div>
</div>
{/* Rights Legend */}
<div className="mt-6 p-4 bg-gray-100 dark:bg-gray-800 rounded-lg">
<h3 className="font-medium text-gray-900 dark:text-white mb-2">
Hinweis zu Training-Berechtigungen
</h3>
<p className="text-sm text-gray-600 dark:text-gray-400">
Dokumente mit <StatusBadge allowed={true} /> sind als amtliche Werke nach §5 UrhG
gemeinfrei und können für das KI-Training verwendet werden. Dokumente ohne explizite
Lizenzierung werden <span className="font-medium">nicht</span> für das Training verwendet.
</p>
</div>
</div>
</div>
)
}