Files
breakpilot-lehrer/website/app/admin/compliance/scraper/_components/ScraperTabs.tsx
Benjamin Admin 34da9f4cda [split-required] Split 700-870 LOC files across all services
backend-lehrer (11 files):
- llm_gateway/routes/schools.py (867 → 5), recording_api.py (848 → 6)
- messenger_api.py (840 → 5), print_generator.py (824 → 5)
- unit_analytics_api.py (751 → 5), classroom/routes/context.py (726 → 4)
- llm_gateway/routes/edu_search_seeds.py (710 → 4)

klausur-service (12 files):
- ocr_labeling_api.py (845 → 4), metrics_db.py (833 → 4)
- legal_corpus_api.py (790 → 4), page_crop.py (758 → 3)
- mail/ai_service.py (747 → 4), github_crawler.py (767 → 3)
- trocr_service.py (730 → 4), full_compliance_pipeline.py (723 → 4)
- dsfa_rag_api.py (715 → 4), ocr_pipeline_auto.py (705 → 4)

website (6 pages):
- audit-checklist (867 → 8), content (806 → 6)
- screen-flow (790 → 4), scraper (789 → 5)
- zeugnisse (776 → 5), modules (745 → 4)

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-04-25 08:01:18 +02:00

163 lines
12 KiB
TypeScript

'use client'
import {
Source, ScraperStatus, ScrapeResult,
PDFDocument, PDFExtractionResult,
} from './types'
import SourceCard from './SourceCard'
interface ScraperTabsProps {
activeTab: string
sources: Source[]
pdfDocuments: PDFDocument[]
status: ScraperStatus | null
scraping: boolean
extracting: boolean
results: ScrapeResult[]
pdfResult: PDFExtractionResult | null
handleScrapeAll: () => void
handleScrapeSingle: (code: string, force: boolean) => void
handleExtractPdf: (code: string, saveToDb: boolean, force: boolean) => void
}
export default function ScraperTabs(props: ScraperTabsProps) {
const { activeTab, sources, pdfDocuments, status, scraping, extracting, results, pdfResult } = props
if (activeTab === 'sources') {
return (
<div>
<div className="flex justify-between items-center mb-6">
<div>
<h3 className="text-lg font-semibold text-slate-900">Regulierungsquellen</h3>
<p className="text-sm text-slate-500">EU-Lex, BSI-TR und deutsche Gesetze</p>
</div>
<button onClick={props.handleScrapeAll} disabled={scraping} className="px-4 py-2 bg-primary-600 text-white rounded-lg hover:bg-primary-700 transition-colors disabled:opacity-50 disabled:cursor-not-allowed flex items-center gap-2">
{scraping ? (<><svg className="w-4 h-4 animate-spin" fill="none" viewBox="0 0 24 24"><circle className="opacity-25" cx="12" cy="12" r="10" stroke="currentColor" strokeWidth="4" /><path className="opacity-75" fill="currentColor" d="M4 12a8 8 0 018-8V0C5.373 0 0 5.373 0 12h4z" /></svg>Laeuft...</>) : (<><svg className="w-4 h-4" fill="none" stroke="currentColor" viewBox="0 0 24 24"><path strokeLinecap="round" strokeLinejoin="round" strokeWidth={2} d="M4 4v5h.582m15.356 2A8.001 8.001 0 004.582 9m0 0H9m11 11v-5h-.581m0 0a8.003 8.003 0 01-15.357-2m15.357 2H15" /></svg>Alle Quellen scrapen</>)}
</button>
</div>
<div className="space-y-6">
<div>
<h4 className="text-sm font-medium text-slate-700 mb-3 flex items-center gap-2"><span className="text-lg">🇪🇺</span> EU-Regulierungen (EUR-Lex)</h4>
<div className="grid gap-3">{sources.filter(s => s.source_type === 'eur_lex').map(source => (<SourceCard key={source.code} source={source} onScrape={props.handleScrapeSingle} scraping={scraping} />))}</div>
</div>
<div>
<h4 className="text-sm font-medium text-slate-700 mb-3 flex items-center gap-2"><span className="text-lg">🔒</span> BSI Technical Guidelines</h4>
<div className="grid gap-3">{sources.filter(s => s.source_type === 'bsi_pdf').map(source => (<SourceCard key={source.code} source={source} onScrape={props.handleScrapeSingle} scraping={scraping} />))}</div>
</div>
</div>
</div>
)
}
if (activeTab === 'pdf') {
return (
<div>
<div className="mb-6">
<h3 className="text-lg font-semibold text-slate-900">PDF-Extraktion (PyMuPDF)</h3>
<p className="text-sm text-slate-500">Extrahiert ALLE Pruefaspekte aus BSI-TR-03161 PDFs mit Regex-Pattern-Matching</p>
</div>
<div className="space-y-4">
{pdfDocuments.map(doc => (
<div key={doc.code} className="bg-slate-50 rounded-lg p-4 border border-slate-200">
<div className="flex items-center justify-between">
<div className="flex items-center gap-3">
<span className="text-3xl">📄</span>
<div>
<div className="flex items-center gap-2">
<span className="font-semibold text-slate-900">{doc.code}</span>
<span className={`px-2 py-0.5 rounded text-xs font-medium ${doc.available ? 'bg-green-100 text-green-700' : 'bg-red-100 text-red-700'}`}>{doc.available ? 'Verfuegbar' : 'Nicht gefunden'}</span>
</div>
<div className="text-sm text-slate-600">{doc.name}</div>
<div className="text-xs text-slate-500">{doc.description}</div>
<div className="text-xs text-slate-400 mt-1">Erwartete Pruefaspekte: {doc.expected_aspects}</div>
</div>
</div>
<div className="flex gap-2">
<button onClick={() => props.handleExtractPdf(doc.code, true, false)} disabled={extracting || !doc.available} className="px-4 py-2 bg-primary-600 text-white rounded-lg hover:bg-primary-700 transition-colors disabled:opacity-50 disabled:cursor-not-allowed flex items-center gap-2">
{extracting ? (<><svg className="w-4 h-4 animate-spin" fill="none" viewBox="0 0 24 24"><circle className="opacity-25" cx="12" cy="12" r="10" stroke="currentColor" strokeWidth="4" /><path className="opacity-75" fill="currentColor" d="M4 12a8 8 0 018-8V0C5.373 0 0 5.373 0 12h4z" /></svg>Extrahiere...</>) : (<><svg className="w-4 h-4" fill="none" stroke="currentColor" viewBox="0 0 24 24"><path strokeLinecap="round" strokeLinejoin="round" strokeWidth={2} d="M9 12h6m-6 4h6m2 5H7a2 2 0 01-2-2V5a2 2 0 012-2h5.586a1 1 0 01.707.293l5.414 5.414a1 1 0 01.293.707V19a2 2 0 01-2 2z" /></svg>Extrahieren</>)}
</button>
<button onClick={() => props.handleExtractPdf(doc.code, true, true)} disabled={extracting || !doc.available} className="px-3 py-2 bg-orange-100 text-orange-700 rounded-lg hover:bg-orange-200 transition-colors disabled:opacity-50 disabled:cursor-not-allowed" title="Force: Loescht vorhandene und extrahiert neu">Force</button>
</div>
</div>
</div>
))}
</div>
{pdfResult && (
<div className="mt-6 bg-green-50 rounded-lg p-4 border border-green-200">
<h4 className="font-semibold text-green-800 mb-3">Letztes Extraktions-Ergebnis</h4>
<div className="grid grid-cols-3 gap-4 mb-4">
<div className="text-center p-3 bg-white rounded-lg"><div className="text-2xl font-bold text-green-700">{pdfResult.total_aspects}</div><div className="text-sm text-slate-500">Pruefaspekte gefunden</div></div>
<div className="text-center p-3 bg-white rounded-lg"><div className="text-2xl font-bold text-blue-700">{pdfResult.requirements_created}</div><div className="text-sm text-slate-500">Requirements erstellt</div></div>
<div className="text-center p-3 bg-white rounded-lg"><div className="text-2xl font-bold text-slate-700">{Object.keys(pdfResult.statistics.by_category || {}).length}</div><div className="text-sm text-slate-500">Kategorien</div></div>
</div>
{pdfResult.statistics.by_category && Object.keys(pdfResult.statistics.by_category).length > 0 && (
<div><h5 className="text-sm font-medium text-slate-700 mb-2">Nach Kategorie:</h5><div className="flex flex-wrap gap-2">{Object.entries(pdfResult.statistics.by_category).map(([cat, count]) => (<span key={cat} className="px-2 py-1 bg-white rounded text-xs text-slate-600">{cat}: <strong>{count}</strong></span>))}</div></div>
)}
</div>
)}
<div className="mt-6 bg-blue-50 rounded-lg p-4 border border-blue-200">
<h4 className="font-semibold text-blue-800 mb-2">Wie funktioniert die PDF-Extraktion?</h4>
<ul className="text-sm text-blue-700 space-y-1">
<li>- <strong>PyMuPDF (fitz)</strong> liest den PDF-Text</li>
<li>- <strong>Regex-Pattern</strong> finden Aspekte wie O.Auth_1, O.Sess_2, T.Network_1</li>
<li>- <strong>Kontextanalyse</strong> extrahiert Titel, Kategorie und Anforderungsstufe (MUSS/SOLL/KANN)</li>
<li>- <strong>Automatische Speicherung</strong> erstellt Requirements in der Datenbank</li>
</ul>
</div>
</div>
)
}
if (activeTab === 'status' && status) {
return (
<div className="space-y-6">
<div className="bg-slate-50 rounded-lg p-6">
<div className="flex items-center justify-between mb-4">
<div><h3 className="text-lg font-semibold text-slate-900">Scraper-Status</h3><p className="text-sm text-slate-500">Letzter Lauf: {status.stats.last_run ? new Date(status.stats.last_run).toLocaleString('de-DE') : 'Noch nie'}</p></div>
<div className={`px-3 py-1.5 rounded-full text-sm font-medium ${status.status === 'running' ? 'bg-blue-100 text-blue-700' : status.status === 'error' ? 'bg-red-100 text-red-700' : status.status === 'completed' ? 'bg-green-100 text-green-700' : 'bg-gray-100 text-gray-700'}`}>
{status.status === 'running' ? 'Laeuft' : status.status === 'error' ? 'Fehler' : status.status === 'completed' ? 'Abgeschlossen' : 'Bereit'}
</div>
</div>
<div className="grid grid-cols-3 gap-4">
<div className="text-center p-4 bg-white rounded-lg"><div className="text-2xl font-bold text-slate-900">{status.stats.sources_processed}</div><div className="text-sm text-slate-500">Quellen verarbeitet</div></div>
<div className="text-center p-4 bg-white rounded-lg"><div className="text-2xl font-bold text-green-600">{status.stats.requirements_extracted}</div><div className="text-sm text-slate-500">Anforderungen extrahiert</div></div>
<div className="text-center p-4 bg-white rounded-lg"><div className="text-2xl font-bold text-red-600">{status.stats.errors}</div><div className="text-sm text-slate-500">Fehler</div></div>
</div>
{status.last_error && (<div className="mt-4 p-3 bg-red-50 rounded-lg text-sm text-red-700"><strong>Letzter Fehler:</strong> {status.last_error}</div>)}
</div>
<div className="bg-white border border-slate-200 rounded-lg p-6">
<h4 className="font-semibold text-slate-900 mb-4">Wie funktioniert der Scraper?</h4>
<div className="space-y-3 text-sm text-slate-600">
{[{ n: '1', t: 'EUR-Lex Abruf', d: 'Holt HTML-Version der EU-Verordnung, extrahiert Artikel und Absaetze' }, { n: '2', t: 'BSI-TR Parsing', d: 'Extrahiert Pruefaspekte (O.Auth_1, O.Sess_1, etc.) aus den TR-Dokumenten' }, { n: '3', t: 'Datenbank-Speicherung', d: 'Jede Anforderung wird als Requirement in der Compliance-DB gespeichert' }].map(s => (
<div key={s.n} className="flex items-start gap-3"><div className="w-6 h-6 bg-blue-100 rounded-full flex items-center justify-center text-blue-600 font-bold">{s.n}</div><div><strong>{s.t}</strong>: {s.d}</div></div>
))}
<div className="flex items-start gap-3"><div className="w-6 h-6 bg-green-100 rounded-full flex items-center justify-center text-green-600 font-bold"></div><div><strong>Audit-Workspace</strong>: Anforderungen koennen mit Implementierungsdetails angereichert werden</div></div>
</div>
</div>
</div>
)
}
// logs tab
return (
<div>
<h3 className="text-lg font-semibold text-slate-900 mb-4">Letzte Ergebnisse</h3>
{results.length === 0 ? (
<div className="text-center py-12 text-slate-500">Keine Ergebnisse vorhanden. Starte einen Scrape-Vorgang.</div>
) : (
<div className="space-y-2">
{results.map((result, idx) => (
<div key={idx} className={`p-3 rounded-lg flex items-center justify-between ${result.error ? 'bg-red-50' : result.reason ? 'bg-yellow-50' : 'bg-green-50'}`}>
<div className="flex items-center gap-3">
<span className="text-lg">{result.error ? '❌' : result.reason ? '⏭️' : '✅'}</span>
<span className="font-medium">{result.code}</span>
<span className="text-sm text-slate-500">{result.error || result.reason || `${result.requirements_extracted} Anforderungen`}</span>
</div>
</div>
))}
</div>
)}
</div>
)
}