backend-lehrer (11 files): - llm_gateway/routes/schools.py (867 → 5), recording_api.py (848 → 6) - messenger_api.py (840 → 5), print_generator.py (824 → 5) - unit_analytics_api.py (751 → 5), classroom/routes/context.py (726 → 4) - llm_gateway/routes/edu_search_seeds.py (710 → 4) klausur-service (12 files): - ocr_labeling_api.py (845 → 4), metrics_db.py (833 → 4) - legal_corpus_api.py (790 → 4), page_crop.py (758 → 3) - mail/ai_service.py (747 → 4), github_crawler.py (767 → 3) - trocr_service.py (730 → 4), full_compliance_pipeline.py (723 → 4) - dsfa_rag_api.py (715 → 4), ocr_pipeline_auto.py (705 → 4) website (6 pages): - audit-checklist (867 → 8), content (806 → 6) - screen-flow (790 → 4), scraper (789 → 5) - zeugnisse (776 → 5), modules (745 → 4) Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
163 lines
12 KiB
TypeScript
163 lines
12 KiB
TypeScript
'use client'
|
|
|
|
import {
|
|
Source, ScraperStatus, ScrapeResult,
|
|
PDFDocument, PDFExtractionResult,
|
|
} from './types'
|
|
import SourceCard from './SourceCard'
|
|
|
|
interface ScraperTabsProps {
|
|
activeTab: string
|
|
sources: Source[]
|
|
pdfDocuments: PDFDocument[]
|
|
status: ScraperStatus | null
|
|
scraping: boolean
|
|
extracting: boolean
|
|
results: ScrapeResult[]
|
|
pdfResult: PDFExtractionResult | null
|
|
handleScrapeAll: () => void
|
|
handleScrapeSingle: (code: string, force: boolean) => void
|
|
handleExtractPdf: (code: string, saveToDb: boolean, force: boolean) => void
|
|
}
|
|
|
|
export default function ScraperTabs(props: ScraperTabsProps) {
|
|
const { activeTab, sources, pdfDocuments, status, scraping, extracting, results, pdfResult } = props
|
|
|
|
if (activeTab === 'sources') {
|
|
return (
|
|
<div>
|
|
<div className="flex justify-between items-center mb-6">
|
|
<div>
|
|
<h3 className="text-lg font-semibold text-slate-900">Regulierungsquellen</h3>
|
|
<p className="text-sm text-slate-500">EU-Lex, BSI-TR und deutsche Gesetze</p>
|
|
</div>
|
|
<button onClick={props.handleScrapeAll} disabled={scraping} className="px-4 py-2 bg-primary-600 text-white rounded-lg hover:bg-primary-700 transition-colors disabled:opacity-50 disabled:cursor-not-allowed flex items-center gap-2">
|
|
{scraping ? (<><svg className="w-4 h-4 animate-spin" fill="none" viewBox="0 0 24 24"><circle className="opacity-25" cx="12" cy="12" r="10" stroke="currentColor" strokeWidth="4" /><path className="opacity-75" fill="currentColor" d="M4 12a8 8 0 018-8V0C5.373 0 0 5.373 0 12h4z" /></svg>Laeuft...</>) : (<><svg className="w-4 h-4" fill="none" stroke="currentColor" viewBox="0 0 24 24"><path strokeLinecap="round" strokeLinejoin="round" strokeWidth={2} d="M4 4v5h.582m15.356 2A8.001 8.001 0 004.582 9m0 0H9m11 11v-5h-.581m0 0a8.003 8.003 0 01-15.357-2m15.357 2H15" /></svg>Alle Quellen scrapen</>)}
|
|
</button>
|
|
</div>
|
|
<div className="space-y-6">
|
|
<div>
|
|
<h4 className="text-sm font-medium text-slate-700 mb-3 flex items-center gap-2"><span className="text-lg">🇪🇺</span> EU-Regulierungen (EUR-Lex)</h4>
|
|
<div className="grid gap-3">{sources.filter(s => s.source_type === 'eur_lex').map(source => (<SourceCard key={source.code} source={source} onScrape={props.handleScrapeSingle} scraping={scraping} />))}</div>
|
|
</div>
|
|
<div>
|
|
<h4 className="text-sm font-medium text-slate-700 mb-3 flex items-center gap-2"><span className="text-lg">🔒</span> BSI Technical Guidelines</h4>
|
|
<div className="grid gap-3">{sources.filter(s => s.source_type === 'bsi_pdf').map(source => (<SourceCard key={source.code} source={source} onScrape={props.handleScrapeSingle} scraping={scraping} />))}</div>
|
|
</div>
|
|
</div>
|
|
</div>
|
|
)
|
|
}
|
|
|
|
if (activeTab === 'pdf') {
|
|
return (
|
|
<div>
|
|
<div className="mb-6">
|
|
<h3 className="text-lg font-semibold text-slate-900">PDF-Extraktion (PyMuPDF)</h3>
|
|
<p className="text-sm text-slate-500">Extrahiert ALLE Pruefaspekte aus BSI-TR-03161 PDFs mit Regex-Pattern-Matching</p>
|
|
</div>
|
|
<div className="space-y-4">
|
|
{pdfDocuments.map(doc => (
|
|
<div key={doc.code} className="bg-slate-50 rounded-lg p-4 border border-slate-200">
|
|
<div className="flex items-center justify-between">
|
|
<div className="flex items-center gap-3">
|
|
<span className="text-3xl">📄</span>
|
|
<div>
|
|
<div className="flex items-center gap-2">
|
|
<span className="font-semibold text-slate-900">{doc.code}</span>
|
|
<span className={`px-2 py-0.5 rounded text-xs font-medium ${doc.available ? 'bg-green-100 text-green-700' : 'bg-red-100 text-red-700'}`}>{doc.available ? 'Verfuegbar' : 'Nicht gefunden'}</span>
|
|
</div>
|
|
<div className="text-sm text-slate-600">{doc.name}</div>
|
|
<div className="text-xs text-slate-500">{doc.description}</div>
|
|
<div className="text-xs text-slate-400 mt-1">Erwartete Pruefaspekte: {doc.expected_aspects}</div>
|
|
</div>
|
|
</div>
|
|
<div className="flex gap-2">
|
|
<button onClick={() => props.handleExtractPdf(doc.code, true, false)} disabled={extracting || !doc.available} className="px-4 py-2 bg-primary-600 text-white rounded-lg hover:bg-primary-700 transition-colors disabled:opacity-50 disabled:cursor-not-allowed flex items-center gap-2">
|
|
{extracting ? (<><svg className="w-4 h-4 animate-spin" fill="none" viewBox="0 0 24 24"><circle className="opacity-25" cx="12" cy="12" r="10" stroke="currentColor" strokeWidth="4" /><path className="opacity-75" fill="currentColor" d="M4 12a8 8 0 018-8V0C5.373 0 0 5.373 0 12h4z" /></svg>Extrahiere...</>) : (<><svg className="w-4 h-4" fill="none" stroke="currentColor" viewBox="0 0 24 24"><path strokeLinecap="round" strokeLinejoin="round" strokeWidth={2} d="M9 12h6m-6 4h6m2 5H7a2 2 0 01-2-2V5a2 2 0 012-2h5.586a1 1 0 01.707.293l5.414 5.414a1 1 0 01.293.707V19a2 2 0 01-2 2z" /></svg>Extrahieren</>)}
|
|
</button>
|
|
<button onClick={() => props.handleExtractPdf(doc.code, true, true)} disabled={extracting || !doc.available} className="px-3 py-2 bg-orange-100 text-orange-700 rounded-lg hover:bg-orange-200 transition-colors disabled:opacity-50 disabled:cursor-not-allowed" title="Force: Loescht vorhandene und extrahiert neu">Force</button>
|
|
</div>
|
|
</div>
|
|
</div>
|
|
))}
|
|
</div>
|
|
{pdfResult && (
|
|
<div className="mt-6 bg-green-50 rounded-lg p-4 border border-green-200">
|
|
<h4 className="font-semibold text-green-800 mb-3">Letztes Extraktions-Ergebnis</h4>
|
|
<div className="grid grid-cols-3 gap-4 mb-4">
|
|
<div className="text-center p-3 bg-white rounded-lg"><div className="text-2xl font-bold text-green-700">{pdfResult.total_aspects}</div><div className="text-sm text-slate-500">Pruefaspekte gefunden</div></div>
|
|
<div className="text-center p-3 bg-white rounded-lg"><div className="text-2xl font-bold text-blue-700">{pdfResult.requirements_created}</div><div className="text-sm text-slate-500">Requirements erstellt</div></div>
|
|
<div className="text-center p-3 bg-white rounded-lg"><div className="text-2xl font-bold text-slate-700">{Object.keys(pdfResult.statistics.by_category || {}).length}</div><div className="text-sm text-slate-500">Kategorien</div></div>
|
|
</div>
|
|
{pdfResult.statistics.by_category && Object.keys(pdfResult.statistics.by_category).length > 0 && (
|
|
<div><h5 className="text-sm font-medium text-slate-700 mb-2">Nach Kategorie:</h5><div className="flex flex-wrap gap-2">{Object.entries(pdfResult.statistics.by_category).map(([cat, count]) => (<span key={cat} className="px-2 py-1 bg-white rounded text-xs text-slate-600">{cat}: <strong>{count}</strong></span>))}</div></div>
|
|
)}
|
|
</div>
|
|
)}
|
|
<div className="mt-6 bg-blue-50 rounded-lg p-4 border border-blue-200">
|
|
<h4 className="font-semibold text-blue-800 mb-2">Wie funktioniert die PDF-Extraktion?</h4>
|
|
<ul className="text-sm text-blue-700 space-y-1">
|
|
<li>- <strong>PyMuPDF (fitz)</strong> liest den PDF-Text</li>
|
|
<li>- <strong>Regex-Pattern</strong> finden Aspekte wie O.Auth_1, O.Sess_2, T.Network_1</li>
|
|
<li>- <strong>Kontextanalyse</strong> extrahiert Titel, Kategorie und Anforderungsstufe (MUSS/SOLL/KANN)</li>
|
|
<li>- <strong>Automatische Speicherung</strong> erstellt Requirements in der Datenbank</li>
|
|
</ul>
|
|
</div>
|
|
</div>
|
|
)
|
|
}
|
|
|
|
if (activeTab === 'status' && status) {
|
|
return (
|
|
<div className="space-y-6">
|
|
<div className="bg-slate-50 rounded-lg p-6">
|
|
<div className="flex items-center justify-between mb-4">
|
|
<div><h3 className="text-lg font-semibold text-slate-900">Scraper-Status</h3><p className="text-sm text-slate-500">Letzter Lauf: {status.stats.last_run ? new Date(status.stats.last_run).toLocaleString('de-DE') : 'Noch nie'}</p></div>
|
|
<div className={`px-3 py-1.5 rounded-full text-sm font-medium ${status.status === 'running' ? 'bg-blue-100 text-blue-700' : status.status === 'error' ? 'bg-red-100 text-red-700' : status.status === 'completed' ? 'bg-green-100 text-green-700' : 'bg-gray-100 text-gray-700'}`}>
|
|
{status.status === 'running' ? 'Laeuft' : status.status === 'error' ? 'Fehler' : status.status === 'completed' ? 'Abgeschlossen' : 'Bereit'}
|
|
</div>
|
|
</div>
|
|
<div className="grid grid-cols-3 gap-4">
|
|
<div className="text-center p-4 bg-white rounded-lg"><div className="text-2xl font-bold text-slate-900">{status.stats.sources_processed}</div><div className="text-sm text-slate-500">Quellen verarbeitet</div></div>
|
|
<div className="text-center p-4 bg-white rounded-lg"><div className="text-2xl font-bold text-green-600">{status.stats.requirements_extracted}</div><div className="text-sm text-slate-500">Anforderungen extrahiert</div></div>
|
|
<div className="text-center p-4 bg-white rounded-lg"><div className="text-2xl font-bold text-red-600">{status.stats.errors}</div><div className="text-sm text-slate-500">Fehler</div></div>
|
|
</div>
|
|
{status.last_error && (<div className="mt-4 p-3 bg-red-50 rounded-lg text-sm text-red-700"><strong>Letzter Fehler:</strong> {status.last_error}</div>)}
|
|
</div>
|
|
<div className="bg-white border border-slate-200 rounded-lg p-6">
|
|
<h4 className="font-semibold text-slate-900 mb-4">Wie funktioniert der Scraper?</h4>
|
|
<div className="space-y-3 text-sm text-slate-600">
|
|
{[{ n: '1', t: 'EUR-Lex Abruf', d: 'Holt HTML-Version der EU-Verordnung, extrahiert Artikel und Absaetze' }, { n: '2', t: 'BSI-TR Parsing', d: 'Extrahiert Pruefaspekte (O.Auth_1, O.Sess_1, etc.) aus den TR-Dokumenten' }, { n: '3', t: 'Datenbank-Speicherung', d: 'Jede Anforderung wird als Requirement in der Compliance-DB gespeichert' }].map(s => (
|
|
<div key={s.n} className="flex items-start gap-3"><div className="w-6 h-6 bg-blue-100 rounded-full flex items-center justify-center text-blue-600 font-bold">{s.n}</div><div><strong>{s.t}</strong>: {s.d}</div></div>
|
|
))}
|
|
<div className="flex items-start gap-3"><div className="w-6 h-6 bg-green-100 rounded-full flex items-center justify-center text-green-600 font-bold">✓</div><div><strong>Audit-Workspace</strong>: Anforderungen koennen mit Implementierungsdetails angereichert werden</div></div>
|
|
</div>
|
|
</div>
|
|
</div>
|
|
)
|
|
}
|
|
|
|
// logs tab
|
|
return (
|
|
<div>
|
|
<h3 className="text-lg font-semibold text-slate-900 mb-4">Letzte Ergebnisse</h3>
|
|
{results.length === 0 ? (
|
|
<div className="text-center py-12 text-slate-500">Keine Ergebnisse vorhanden. Starte einen Scrape-Vorgang.</div>
|
|
) : (
|
|
<div className="space-y-2">
|
|
{results.map((result, idx) => (
|
|
<div key={idx} className={`p-3 rounded-lg flex items-center justify-between ${result.error ? 'bg-red-50' : result.reason ? 'bg-yellow-50' : 'bg-green-50'}`}>
|
|
<div className="flex items-center gap-3">
|
|
<span className="text-lg">{result.error ? '❌' : result.reason ? '⏭️' : '✅'}</span>
|
|
<span className="font-medium">{result.code}</span>
|
|
<span className="text-sm text-slate-500">{result.error || result.reason || `${result.requirements_extracted} Anforderungen`}</span>
|
|
</div>
|
|
</div>
|
|
))}
|
|
</div>
|
|
)}
|
|
</div>
|
|
)
|
|
}
|