backend-lehrer (11 files): - llm_gateway/routes/schools.py (867 → 5), recording_api.py (848 → 6) - messenger_api.py (840 → 5), print_generator.py (824 → 5) - unit_analytics_api.py (751 → 5), classroom/routes/context.py (726 → 4) - llm_gateway/routes/edu_search_seeds.py (710 → 4) klausur-service (12 files): - ocr_labeling_api.py (845 → 4), metrics_db.py (833 → 4) - legal_corpus_api.py (790 → 4), page_crop.py (758 → 3) - mail/ai_service.py (747 → 4), github_crawler.py (767 → 3) - trocr_service.py (730 → 4), full_compliance_pipeline.py (723 → 4) - dsfa_rag_api.py (715 → 4), ocr_pipeline_auto.py (705 → 4) website (6 pages): - audit-checklist (867 → 8), content (806 → 6) - screen-flow (790 → 4), scraper (789 → 5) - zeugnisse (776 → 5), modules (745 → 4) Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
119 lines
6.0 KiB
TypeScript
119 lines
6.0 KiB
TypeScript
'use client'
|
|
|
|
/**
|
|
* Compliance Regulation Scraper Admin Page
|
|
*
|
|
* Manages the extraction of requirements and audit aspects from:
|
|
* - EUR-Lex regulations (GDPR, AI Act, CRA, NIS2, etc.)
|
|
* - BSI Technical Guidelines (TR-03161)
|
|
* - German laws
|
|
*/
|
|
|
|
import AdminLayout from '@/components/admin/AdminLayout'
|
|
import SystemInfoSection, { SYSTEM_INFO_CONFIGS } from '@/components/admin/SystemInfoSection'
|
|
import { useComplianceScraper } from './_components/useComplianceScraper'
|
|
import ScraperTabs from './_components/ScraperTabs'
|
|
|
|
export default function ComplianceScraperPage() {
|
|
const scraper = useComplianceScraper()
|
|
|
|
const StatsCard = ({ title, value, subtitle, icon }: { title: string; value: number | string; subtitle?: string; icon: string }) => (
|
|
<div className="bg-white rounded-lg shadow-sm p-5 border border-slate-200">
|
|
<div className="flex items-center">
|
|
<div className="flex-shrink-0"><span className="text-2xl">{icon}</span></div>
|
|
<div className="ml-4">
|
|
<p className="text-sm font-medium text-slate-500">{title}</p>
|
|
<p className="text-2xl font-semibold text-slate-900">{value}</p>
|
|
{subtitle && <p className="text-xs text-slate-400">{subtitle}</p>}
|
|
</div>
|
|
</div>
|
|
</div>
|
|
)
|
|
|
|
return (
|
|
<AdminLayout title="Compliance Scraper" description="Extrahiert Anforderungen aus EU-Regulierungen, BSI-Standards und Gesetzen">
|
|
{scraper.loading && (
|
|
<div className="flex items-center justify-center py-12">
|
|
<svg className="w-8 h-8 animate-spin text-primary-600" fill="none" viewBox="0 0 24 24">
|
|
<circle className="opacity-25" cx="12" cy="12" r="10" stroke="currentColor" strokeWidth="4" />
|
|
<path className="opacity-75" fill="currentColor" d="M4 12a8 8 0 018-8V0C5.373 0 0 5.373 0 12h4z" />
|
|
</svg>
|
|
<span className="ml-3 text-slate-600">Lade Quellen...</span>
|
|
</div>
|
|
)}
|
|
|
|
{!scraper.loading && (
|
|
<>
|
|
{scraper.error && (
|
|
<div className="mb-4 bg-red-50 border border-red-200 text-red-700 px-4 py-3 rounded-lg">{scraper.error}</div>
|
|
)}
|
|
{scraper.success && (
|
|
<div className="mb-4 bg-green-50 border border-green-200 text-green-700 px-4 py-3 rounded-lg">{scraper.success}</div>
|
|
)}
|
|
|
|
<div className="grid grid-cols-1 md:grid-cols-2 lg:grid-cols-4 gap-4 mb-6">
|
|
<StatsCard title="Bekannte Quellen" value={scraper.sources.length} icon="📚" />
|
|
<StatsCard title="Mit Daten" value={scraper.sources.filter(s => s.has_data).length} subtitle={`${scraper.sources.length - scraper.sources.filter(s => s.has_data).length} noch zu scrapen`} icon="✅" />
|
|
<StatsCard title="Anforderungen gesamt" value={scraper.sources.reduce((acc, s) => acc + s.requirement_count, 0)} icon="📋" />
|
|
<StatsCard title="Letzter Lauf" value={scraper.status?.stats.last_run ? new Date(scraper.status.stats.last_run).toLocaleDateString('de-DE') : 'Nie'} subtitle={scraper.status?.stats.errors ? `${scraper.status.stats.errors} Fehler` : undefined} icon="🕐" />
|
|
</div>
|
|
|
|
{(scraper.scraping || scraper.status?.status === 'running') && (
|
|
<div className="mb-6 p-4 bg-blue-50 border border-blue-200 rounded-lg">
|
|
<div className="flex items-center">
|
|
<div className="animate-spin rounded-full h-4 w-4 border-2 border-blue-600 border-t-transparent mr-3" />
|
|
<div>
|
|
<p className="font-medium text-blue-800">Scraper laeuft</p>
|
|
{scraper.status?.current_source && (<p className="text-sm text-blue-600">Aktuell: {scraper.status.current_source}</p>)}
|
|
</div>
|
|
</div>
|
|
</div>
|
|
)}
|
|
|
|
<div className="bg-white rounded-xl shadow-sm border border-slate-200 mb-6">
|
|
<div className="border-b border-slate-200">
|
|
<nav className="flex -mb-px">
|
|
{[
|
|
{ id: 'sources' as const, name: 'Quellen', icon: '📚' },
|
|
{ id: 'pdf' as const, name: 'PDF-Extraktion', icon: '📄' },
|
|
{ id: 'status' as const, name: 'Status', icon: '📊' },
|
|
{ id: 'logs' as const, name: 'Ergebnisse', icon: '📝' },
|
|
].map(tab => (
|
|
<button key={tab.id} onClick={() => scraper.setActiveTab(tab.id)} className={`px-6 py-4 text-sm font-medium border-b-2 transition-colors ${scraper.activeTab === tab.id ? 'border-primary-600 text-primary-600' : 'border-transparent text-slate-500 hover:text-slate-700 hover:border-slate-300'}`}>
|
|
<span className="mr-2">{tab.icon}</span>{tab.name}
|
|
</button>
|
|
))}
|
|
</nav>
|
|
</div>
|
|
<div className="p-6">
|
|
<ScraperTabs
|
|
activeTab={scraper.activeTab}
|
|
sources={scraper.sources}
|
|
pdfDocuments={scraper.pdfDocuments}
|
|
status={scraper.status}
|
|
scraping={scraper.scraping}
|
|
extracting={scraper.extracting}
|
|
results={scraper.results}
|
|
pdfResult={scraper.pdfResult}
|
|
handleScrapeAll={scraper.handleScrapeAll}
|
|
handleScrapeSingle={scraper.handleScrapeSingle}
|
|
handleExtractPdf={scraper.handleExtractPdf}
|
|
/>
|
|
</div>
|
|
</div>
|
|
</>
|
|
)}
|
|
|
|
<div className="mt-8 border-t border-slate-200 pt-8">
|
|
<SystemInfoSection config={SYSTEM_INFO_CONFIGS.complianceScraper || {
|
|
title: 'Compliance Scraper',
|
|
description: 'Regulation & Requirements Extraction Service',
|
|
version: '1.0.0',
|
|
features: ['EUR-Lex HTML Parsing', 'BSI-TR PDF Extraction', 'Automatic Requirement Mapping', 'Incremental Updates'],
|
|
technicalDetails: { 'Backend': 'Python/FastAPI', 'HTTP Client': 'httpx async', 'HTML Parser': 'BeautifulSoup4', 'PDF Parser': 'PyMuPDF (optional)', 'Database': 'PostgreSQL' },
|
|
}} />
|
|
</div>
|
|
</AdminLayout>
|
|
)
|
|
}
|