fix: Restore all files lost during destructive rebase
A previous `git pull --rebase origin main` dropped 177 local commits,
losing 3400+ files across admin-v2, backend, studio-v2, website,
klausur-service, and many other services. The partial restore attempt
(660295e2) only recovered some files.
This commit restores all missing files from pre-rebase ref 98933f5e
while preserving post-rebase additions (night-scheduler, night-mode UI,
NightModeWidget dashboard integration).
Restored features include:
- AI Module Sidebar (FAB), OCR Labeling, OCR Compare
- GPU Dashboard, RAG Pipeline, Magic Help
- Klausur-Korrektur (8 files), Abitur-Archiv (5+ files)
- Companion, Zeugnisse-Crawler, Screen Flow
- Full backend, studio-v2, website, klausur-service
- All compliance SDKs, agent-core, voice-service
- CI/CD configs, documentation, scripts
Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
485
website/app/admin/zeugnisse-crawler/page.tsx
Normal file
485
website/app/admin/zeugnisse-crawler/page.tsx
Normal file
@@ -0,0 +1,485 @@
|
||||
'use client'
|
||||
|
||||
import { useState, useEffect, useCallback } from 'react'
|
||||
|
||||
// Types
|
||||
interface BundeslandStats {
|
||||
bundesland: string
|
||||
name: string
|
||||
training_allowed: boolean
|
||||
document_count: number
|
||||
indexed_count: number
|
||||
last_crawled: string | null
|
||||
}
|
||||
|
||||
interface CrawlerStatus {
|
||||
is_running: boolean
|
||||
current_source: string | null
|
||||
current_bundesland: string | null
|
||||
queue_length: number
|
||||
documents_crawled_today: number
|
||||
documents_indexed_today: number
|
||||
errors_today: number
|
||||
last_activity: string | null
|
||||
}
|
||||
|
||||
interface ZeugnisStats {
|
||||
total_sources: number
|
||||
total_documents: number
|
||||
indexed_documents: number
|
||||
training_allowed_documents: number
|
||||
active_crawls: number
|
||||
per_bundesland: BundeslandStats[]
|
||||
}
|
||||
|
||||
interface Document {
|
||||
id: string
|
||||
title: string
|
||||
url: string
|
||||
bundesland: string
|
||||
source_name: string
|
||||
training_allowed: boolean
|
||||
indexed_in_qdrant: boolean
|
||||
created_at: string
|
||||
}
|
||||
|
||||
// Status badge component
|
||||
function StatusBadge({ allowed }: { allowed: boolean }) {
|
||||
return (
|
||||
<span className={`inline-flex items-center px-2.5 py-0.5 rounded-full text-xs font-medium ${
|
||||
allowed
|
||||
? 'bg-green-100 text-green-800 dark:bg-green-900 dark:text-green-200'
|
||||
: 'bg-red-100 text-red-800 dark:bg-red-900 dark:text-red-200'
|
||||
}`}>
|
||||
{allowed ? 'Training erlaubt' : 'Kein Training'}
|
||||
</span>
|
||||
)
|
||||
}
|
||||
|
||||
// Stats card component
|
||||
function StatsCard({ title, value, subtitle, icon }: {
|
||||
title: string
|
||||
value: number | string
|
||||
subtitle?: string
|
||||
icon: string
|
||||
}) {
|
||||
return (
|
||||
<div className="bg-white dark:bg-gray-800 rounded-lg shadow p-5 border border-gray-200 dark:border-gray-700">
|
||||
<div className="flex items-center">
|
||||
<div className="flex-shrink-0">
|
||||
<span className="text-2xl">{icon}</span>
|
||||
</div>
|
||||
<div className="ml-4">
|
||||
<p className="text-sm font-medium text-gray-500 dark:text-gray-400">{title}</p>
|
||||
<p className="text-2xl font-semibold text-gray-900 dark:text-white">{value}</p>
|
||||
{subtitle && (
|
||||
<p className="text-xs text-gray-400 dark:text-gray-500">{subtitle}</p>
|
||||
)}
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
)
|
||||
}
|
||||
|
||||
export default function ZeugnisseCrawlerPage() {
|
||||
const [stats, setStats] = useState<ZeugnisStats | null>(null)
|
||||
const [bundeslandStats, setBundeslandStats] = useState<BundeslandStats[]>([])
|
||||
const [crawlerStatus, setCrawlerStatus] = useState<CrawlerStatus | null>(null)
|
||||
const [documents, setDocuments] = useState<Document[]>([])
|
||||
const [loading, setLoading] = useState(true)
|
||||
const [error, setError] = useState<string | null>(null)
|
||||
const [selectedBundesland, setSelectedBundesland] = useState<string | null>(null)
|
||||
const [isStarting, setIsStarting] = useState(false)
|
||||
const [isStopping, setIsStopping] = useState(false)
|
||||
|
||||
// Fetch all data
|
||||
const fetchData = useCallback(async () => {
|
||||
try {
|
||||
const [statsRes, bundeslandRes, statusRes, docsRes] = await Promise.all([
|
||||
fetch('/api/admin/zeugnisse-crawler?action=stats'),
|
||||
fetch('/api/admin/zeugnisse-crawler?action=bundesland-stats'),
|
||||
fetch('/api/admin/zeugnisse-crawler?action=status'),
|
||||
fetch(`/api/admin/zeugnisse-crawler?action=documents${selectedBundesland ? `&bundesland=${selectedBundesland}` : ''}`),
|
||||
])
|
||||
|
||||
if (statsRes.ok) {
|
||||
const data = await statsRes.json()
|
||||
setStats(data)
|
||||
}
|
||||
|
||||
if (bundeslandRes.ok) {
|
||||
const data = await bundeslandRes.json()
|
||||
setBundeslandStats(data)
|
||||
}
|
||||
|
||||
if (statusRes.ok) {
|
||||
const data = await statusRes.json()
|
||||
setCrawlerStatus(data)
|
||||
}
|
||||
|
||||
if (docsRes.ok) {
|
||||
const data = await docsRes.json()
|
||||
setDocuments(data)
|
||||
}
|
||||
|
||||
setError(null)
|
||||
} catch (err) {
|
||||
setError('Fehler beim Laden der Daten')
|
||||
console.error(err)
|
||||
} finally {
|
||||
setLoading(false)
|
||||
}
|
||||
}, [selectedBundesland])
|
||||
|
||||
// Initial load and polling
|
||||
useEffect(() => {
|
||||
fetchData()
|
||||
const interval = setInterval(fetchData, 5000)
|
||||
return () => clearInterval(interval)
|
||||
}, [fetchData])
|
||||
|
||||
// Start crawler
|
||||
const startCrawler = async (bundesland?: string) => {
|
||||
setIsStarting(true)
|
||||
try {
|
||||
const res = await fetch('/api/admin/zeugnisse-crawler?action=start', {
|
||||
method: 'POST',
|
||||
headers: { 'Content-Type': 'application/json' },
|
||||
body: JSON.stringify({ bundesland }),
|
||||
})
|
||||
if (!res.ok) {
|
||||
const data = await res.json()
|
||||
throw new Error(data.detail || 'Start fehlgeschlagen')
|
||||
}
|
||||
await fetchData()
|
||||
} catch (err: any) {
|
||||
setError(err.message)
|
||||
} finally {
|
||||
setIsStarting(false)
|
||||
}
|
||||
}
|
||||
|
||||
// Stop crawler
|
||||
const stopCrawler = async () => {
|
||||
setIsStopping(true)
|
||||
try {
|
||||
const res = await fetch('/api/admin/zeugnisse-crawler?action=stop', {
|
||||
method: 'POST',
|
||||
})
|
||||
if (!res.ok) {
|
||||
const data = await res.json()
|
||||
throw new Error(data.detail || 'Stop fehlgeschlagen')
|
||||
}
|
||||
await fetchData()
|
||||
} catch (err: any) {
|
||||
setError(err.message)
|
||||
} finally {
|
||||
setIsStopping(false)
|
||||
}
|
||||
}
|
||||
|
||||
// Initialize sources
|
||||
const initializeSources = async () => {
|
||||
try {
|
||||
const res = await fetch('/api/admin/zeugnisse-crawler?action=init', {
|
||||
method: 'POST',
|
||||
})
|
||||
if (!res.ok) {
|
||||
const data = await res.json()
|
||||
throw new Error(data.detail || 'Initialisierung fehlgeschlagen')
|
||||
}
|
||||
await fetchData()
|
||||
} catch (err: any) {
|
||||
setError(err.message)
|
||||
}
|
||||
}
|
||||
|
||||
if (loading) {
|
||||
return (
|
||||
<div className="min-h-screen bg-gray-50 dark:bg-gray-900 p-8">
|
||||
<div className="max-w-7xl mx-auto">
|
||||
<div className="animate-pulse">
|
||||
<div className="h-8 bg-gray-200 dark:bg-gray-700 rounded w-1/4 mb-6"></div>
|
||||
<div className="grid grid-cols-4 gap-4 mb-6">
|
||||
{[1, 2, 3, 4].map(i => (
|
||||
<div key={i} className="h-24 bg-gray-200 dark:bg-gray-700 rounded"></div>
|
||||
))}
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
)
|
||||
}
|
||||
|
||||
return (
|
||||
<div className="min-h-screen bg-gray-50 dark:bg-gray-900 p-8">
|
||||
<div className="max-w-7xl mx-auto">
|
||||
{/* Header */}
|
||||
<div className="flex justify-between items-center mb-6">
|
||||
<div>
|
||||
<h1 className="text-2xl font-bold text-gray-900 dark:text-white">
|
||||
Zeugnisse Rights-Aware Crawler
|
||||
</h1>
|
||||
<p className="text-gray-500 dark:text-gray-400">
|
||||
Crawlt und indexiert Zeugnisverordnungen aller 16 Bundesländer
|
||||
</p>
|
||||
</div>
|
||||
<div className="flex gap-2">
|
||||
<button
|
||||
onClick={initializeSources}
|
||||
className="px-4 py-2 bg-gray-600 text-white rounded-lg hover:bg-gray-700 transition"
|
||||
>
|
||||
Quellen initialisieren
|
||||
</button>
|
||||
{crawlerStatus?.is_running ? (
|
||||
<button
|
||||
onClick={stopCrawler}
|
||||
disabled={isStopping}
|
||||
className="px-4 py-2 bg-red-600 text-white rounded-lg hover:bg-red-700 transition disabled:opacity-50"
|
||||
>
|
||||
{isStopping ? 'Stoppe...' : 'Crawler stoppen'}
|
||||
</button>
|
||||
) : (
|
||||
<button
|
||||
onClick={() => startCrawler()}
|
||||
disabled={isStarting}
|
||||
className="px-4 py-2 bg-green-600 text-white rounded-lg hover:bg-green-700 transition disabled:opacity-50"
|
||||
>
|
||||
{isStarting ? 'Starte...' : 'Crawler starten'}
|
||||
</button>
|
||||
)}
|
||||
</div>
|
||||
</div>
|
||||
|
||||
{/* Error message */}
|
||||
{error && (
|
||||
<div className="mb-6 p-4 bg-red-100 dark:bg-red-900 text-red-800 dark:text-red-200 rounded-lg">
|
||||
{error}
|
||||
<button onClick={() => setError(null)} className="ml-4 underline">Schließen</button>
|
||||
</div>
|
||||
)}
|
||||
|
||||
{/* Stats Cards */}
|
||||
<div className="grid grid-cols-1 md:grid-cols-2 lg:grid-cols-4 gap-4 mb-6">
|
||||
<StatsCard
|
||||
title="Bundesländer"
|
||||
value={stats?.total_sources || 16}
|
||||
icon="🗺️"
|
||||
/>
|
||||
<StatsCard
|
||||
title="Dokumente gesamt"
|
||||
value={stats?.total_documents || 0}
|
||||
subtitle={`${stats?.indexed_documents || 0} indexiert`}
|
||||
icon="📄"
|
||||
/>
|
||||
<StatsCard
|
||||
title="Training erlaubt"
|
||||
value={stats?.training_allowed_documents || 0}
|
||||
subtitle="Für KI-Training nutzbar"
|
||||
icon="✅"
|
||||
/>
|
||||
<StatsCard
|
||||
title="Heute gecrawlt"
|
||||
value={crawlerStatus?.documents_crawled_today || 0}
|
||||
subtitle={`${crawlerStatus?.errors_today || 0} Fehler`}
|
||||
icon="🔄"
|
||||
/>
|
||||
</div>
|
||||
|
||||
{/* Crawler Status */}
|
||||
{crawlerStatus?.is_running && (
|
||||
<div className="mb-6 p-4 bg-blue-50 dark:bg-blue-900/20 border border-blue-200 dark:border-blue-800 rounded-lg">
|
||||
<div className="flex items-center">
|
||||
<div className="animate-spin rounded-full h-4 w-4 border-2 border-blue-600 border-t-transparent mr-3"></div>
|
||||
<div>
|
||||
<p className="font-medium text-blue-800 dark:text-blue-200">
|
||||
Crawler läuft
|
||||
</p>
|
||||
{crawlerStatus.current_bundesland && (
|
||||
<p className="text-sm text-blue-600 dark:text-blue-400">
|
||||
Aktuell: {crawlerStatus.current_bundesland.toUpperCase()}
|
||||
</p>
|
||||
)}
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
)}
|
||||
|
||||
{/* Bundesland Overview */}
|
||||
<div className="bg-white dark:bg-gray-800 rounded-lg shadow border border-gray-200 dark:border-gray-700 mb-6">
|
||||
<div className="px-6 py-4 border-b border-gray-200 dark:border-gray-700">
|
||||
<h2 className="text-lg font-semibold text-gray-900 dark:text-white">
|
||||
Bundesländer-Übersicht
|
||||
</h2>
|
||||
</div>
|
||||
<div className="overflow-x-auto">
|
||||
<table className="min-w-full divide-y divide-gray-200 dark:divide-gray-700">
|
||||
<thead className="bg-gray-50 dark:bg-gray-900">
|
||||
<tr>
|
||||
<th className="px-6 py-3 text-left text-xs font-medium text-gray-500 dark:text-gray-400 uppercase tracking-wider">
|
||||
Bundesland
|
||||
</th>
|
||||
<th className="px-6 py-3 text-left text-xs font-medium text-gray-500 dark:text-gray-400 uppercase tracking-wider">
|
||||
Training-Status
|
||||
</th>
|
||||
<th className="px-6 py-3 text-left text-xs font-medium text-gray-500 dark:text-gray-400 uppercase tracking-wider">
|
||||
Dokumente
|
||||
</th>
|
||||
<th className="px-6 py-3 text-left text-xs font-medium text-gray-500 dark:text-gray-400 uppercase tracking-wider">
|
||||
Indexiert
|
||||
</th>
|
||||
<th className="px-6 py-3 text-left text-xs font-medium text-gray-500 dark:text-gray-400 uppercase tracking-wider">
|
||||
Letzter Crawl
|
||||
</th>
|
||||
<th className="px-6 py-3 text-right text-xs font-medium text-gray-500 dark:text-gray-400 uppercase tracking-wider">
|
||||
Aktion
|
||||
</th>
|
||||
</tr>
|
||||
</thead>
|
||||
<tbody className="bg-white dark:bg-gray-800 divide-y divide-gray-200 dark:divide-gray-700">
|
||||
{bundeslandStats.map((bl) => (
|
||||
<tr key={bl.bundesland} className="hover:bg-gray-50 dark:hover:bg-gray-700">
|
||||
<td className="px-6 py-4 whitespace-nowrap">
|
||||
<div className="flex items-center">
|
||||
<span className="font-medium text-gray-900 dark:text-white">
|
||||
{bl.name}
|
||||
</span>
|
||||
<span className="ml-2 text-xs text-gray-500 dark:text-gray-400">
|
||||
({bl.bundesland.toUpperCase()})
|
||||
</span>
|
||||
</div>
|
||||
</td>
|
||||
<td className="px-6 py-4 whitespace-nowrap">
|
||||
<StatusBadge allowed={bl.training_allowed} />
|
||||
</td>
|
||||
<td className="px-6 py-4 whitespace-nowrap text-gray-900 dark:text-white">
|
||||
{bl.document_count}
|
||||
</td>
|
||||
<td className="px-6 py-4 whitespace-nowrap text-gray-900 dark:text-white">
|
||||
{bl.indexed_count}
|
||||
</td>
|
||||
<td className="px-6 py-4 whitespace-nowrap text-gray-500 dark:text-gray-400">
|
||||
{bl.last_crawled
|
||||
? new Date(bl.last_crawled).toLocaleDateString('de-DE')
|
||||
: '-'
|
||||
}
|
||||
</td>
|
||||
<td className="px-6 py-4 whitespace-nowrap text-right">
|
||||
<button
|
||||
onClick={() => {
|
||||
setSelectedBundesland(bl.bundesland)
|
||||
startCrawler(bl.bundesland)
|
||||
}}
|
||||
disabled={crawlerStatus?.is_running}
|
||||
className="text-blue-600 hover:text-blue-800 dark:text-blue-400 dark:hover:text-blue-300 disabled:opacity-50 disabled:cursor-not-allowed"
|
||||
>
|
||||
Crawlen
|
||||
</button>
|
||||
</td>
|
||||
</tr>
|
||||
))}
|
||||
</tbody>
|
||||
</table>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
{/* Documents List */}
|
||||
<div className="bg-white dark:bg-gray-800 rounded-lg shadow border border-gray-200 dark:border-gray-700">
|
||||
<div className="px-6 py-4 border-b border-gray-200 dark:border-gray-700 flex justify-between items-center">
|
||||
<h2 className="text-lg font-semibold text-gray-900 dark:text-white">
|
||||
Dokumente
|
||||
</h2>
|
||||
<select
|
||||
value={selectedBundesland || ''}
|
||||
onChange={(e) => setSelectedBundesland(e.target.value || null)}
|
||||
className="px-3 py-1.5 border border-gray-300 dark:border-gray-600 rounded-lg bg-white dark:bg-gray-700 text-gray-900 dark:text-white"
|
||||
>
|
||||
<option value="">Alle Bundesländer</option>
|
||||
{bundeslandStats.map((bl) => (
|
||||
<option key={bl.bundesland} value={bl.bundesland}>
|
||||
{bl.name}
|
||||
</option>
|
||||
))}
|
||||
</select>
|
||||
</div>
|
||||
<div className="overflow-x-auto">
|
||||
<table className="min-w-full divide-y divide-gray-200 dark:divide-gray-700">
|
||||
<thead className="bg-gray-50 dark:bg-gray-900">
|
||||
<tr>
|
||||
<th className="px-6 py-3 text-left text-xs font-medium text-gray-500 dark:text-gray-400 uppercase tracking-wider">
|
||||
Titel
|
||||
</th>
|
||||
<th className="px-6 py-3 text-left text-xs font-medium text-gray-500 dark:text-gray-400 uppercase tracking-wider">
|
||||
Bundesland
|
||||
</th>
|
||||
<th className="px-6 py-3 text-left text-xs font-medium text-gray-500 dark:text-gray-400 uppercase tracking-wider">
|
||||
Status
|
||||
</th>
|
||||
<th className="px-6 py-3 text-left text-xs font-medium text-gray-500 dark:text-gray-400 uppercase tracking-wider">
|
||||
Erstellt
|
||||
</th>
|
||||
</tr>
|
||||
</thead>
|
||||
<tbody className="bg-white dark:bg-gray-800 divide-y divide-gray-200 dark:divide-gray-700">
|
||||
{documents.length === 0 ? (
|
||||
<tr>
|
||||
<td colSpan={4} className="px-6 py-8 text-center text-gray-500 dark:text-gray-400">
|
||||
Keine Dokumente vorhanden. Starten Sie den Crawler, um Dokumente zu sammeln.
|
||||
</td>
|
||||
</tr>
|
||||
) : (
|
||||
documents.map((doc) => (
|
||||
<tr key={doc.id} className="hover:bg-gray-50 dark:hover:bg-gray-700">
|
||||
<td className="px-6 py-4">
|
||||
<div className="flex items-center">
|
||||
<a
|
||||
href={doc.url}
|
||||
target="_blank"
|
||||
rel="noopener noreferrer"
|
||||
className="text-blue-600 hover:text-blue-800 dark:text-blue-400 font-medium"
|
||||
>
|
||||
{doc.title || 'Unbenannt'}
|
||||
</a>
|
||||
</div>
|
||||
</td>
|
||||
<td className="px-6 py-4 whitespace-nowrap">
|
||||
<span className="text-gray-900 dark:text-white">
|
||||
{doc.bundesland?.toUpperCase()}
|
||||
</span>
|
||||
</td>
|
||||
<td className="px-6 py-4 whitespace-nowrap">
|
||||
<div className="flex gap-2">
|
||||
<StatusBadge allowed={doc.training_allowed} />
|
||||
{doc.indexed_in_qdrant && (
|
||||
<span className="inline-flex items-center px-2.5 py-0.5 rounded-full text-xs font-medium bg-blue-100 text-blue-800 dark:bg-blue-900 dark:text-blue-200">
|
||||
Indexiert
|
||||
</span>
|
||||
)}
|
||||
</div>
|
||||
</td>
|
||||
<td className="px-6 py-4 whitespace-nowrap text-gray-500 dark:text-gray-400">
|
||||
{new Date(doc.created_at).toLocaleDateString('de-DE')}
|
||||
</td>
|
||||
</tr>
|
||||
))
|
||||
)}
|
||||
</tbody>
|
||||
</table>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
{/* Rights Legend */}
|
||||
<div className="mt-6 p-4 bg-gray-100 dark:bg-gray-800 rounded-lg">
|
||||
<h3 className="font-medium text-gray-900 dark:text-white mb-2">
|
||||
Hinweis zu Training-Berechtigungen
|
||||
</h3>
|
||||
<p className="text-sm text-gray-600 dark:text-gray-400">
|
||||
Dokumente mit <StatusBadge allowed={true} /> sind als amtliche Werke nach §5 UrhG
|
||||
gemeinfrei und können für das KI-Training verwendet werden. Dokumente ohne explizite
|
||||
Lizenzierung werden <span className="font-medium">nicht</span> für das Training verwendet.
|
||||
</p>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
)
|
||||
}
|
||||
Reference in New Issue
Block a user