Files
breakpilot-lehrer/website/app/admin/compliance/scraper/page.tsx
Benjamin Admin 34da9f4cda [split-required] Split 700-870 LOC files across all services
backend-lehrer (11 files):
- llm_gateway/routes/schools.py (867 → 5), recording_api.py (848 → 6)
- messenger_api.py (840 → 5), print_generator.py (824 → 5)
- unit_analytics_api.py (751 → 5), classroom/routes/context.py (726 → 4)
- llm_gateway/routes/edu_search_seeds.py (710 → 4)

klausur-service (12 files):
- ocr_labeling_api.py (845 → 4), metrics_db.py (833 → 4)
- legal_corpus_api.py (790 → 4), page_crop.py (758 → 3)
- mail/ai_service.py (747 → 4), github_crawler.py (767 → 3)
- trocr_service.py (730 → 4), full_compliance_pipeline.py (723 → 4)
- dsfa_rag_api.py (715 → 4), ocr_pipeline_auto.py (705 → 4)

website (6 pages):
- audit-checklist (867 → 8), content (806 → 6)
- screen-flow (790 → 4), scraper (789 → 5)
- zeugnisse (776 → 5), modules (745 → 4)

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-04-25 08:01:18 +02:00

119 lines
6.0 KiB
TypeScript

'use client'
/**
* Compliance Regulation Scraper Admin Page
*
* Manages the extraction of requirements and audit aspects from:
* - EUR-Lex regulations (GDPR, AI Act, CRA, NIS2, etc.)
* - BSI Technical Guidelines (TR-03161)
* - German laws
*/
import AdminLayout from '@/components/admin/AdminLayout'
import SystemInfoSection, { SYSTEM_INFO_CONFIGS } from '@/components/admin/SystemInfoSection'
import { useComplianceScraper } from './_components/useComplianceScraper'
import ScraperTabs from './_components/ScraperTabs'
export default function ComplianceScraperPage() {
const scraper = useComplianceScraper()
const StatsCard = ({ title, value, subtitle, icon }: { title: string; value: number | string; subtitle?: string; icon: string }) => (
<div className="bg-white rounded-lg shadow-sm p-5 border border-slate-200">
<div className="flex items-center">
<div className="flex-shrink-0"><span className="text-2xl">{icon}</span></div>
<div className="ml-4">
<p className="text-sm font-medium text-slate-500">{title}</p>
<p className="text-2xl font-semibold text-slate-900">{value}</p>
{subtitle && <p className="text-xs text-slate-400">{subtitle}</p>}
</div>
</div>
</div>
)
return (
<AdminLayout title="Compliance Scraper" description="Extrahiert Anforderungen aus EU-Regulierungen, BSI-Standards und Gesetzen">
{scraper.loading && (
<div className="flex items-center justify-center py-12">
<svg className="w-8 h-8 animate-spin text-primary-600" fill="none" viewBox="0 0 24 24">
<circle className="opacity-25" cx="12" cy="12" r="10" stroke="currentColor" strokeWidth="4" />
<path className="opacity-75" fill="currentColor" d="M4 12a8 8 0 018-8V0C5.373 0 0 5.373 0 12h4z" />
</svg>
<span className="ml-3 text-slate-600">Lade Quellen...</span>
</div>
)}
{!scraper.loading && (
<>
{scraper.error && (
<div className="mb-4 bg-red-50 border border-red-200 text-red-700 px-4 py-3 rounded-lg">{scraper.error}</div>
)}
{scraper.success && (
<div className="mb-4 bg-green-50 border border-green-200 text-green-700 px-4 py-3 rounded-lg">{scraper.success}</div>
)}
<div className="grid grid-cols-1 md:grid-cols-2 lg:grid-cols-4 gap-4 mb-6">
<StatsCard title="Bekannte Quellen" value={scraper.sources.length} icon="📚" />
<StatsCard title="Mit Daten" value={scraper.sources.filter(s => s.has_data).length} subtitle={`${scraper.sources.length - scraper.sources.filter(s => s.has_data).length} noch zu scrapen`} icon="✅" />
<StatsCard title="Anforderungen gesamt" value={scraper.sources.reduce((acc, s) => acc + s.requirement_count, 0)} icon="📋" />
<StatsCard title="Letzter Lauf" value={scraper.status?.stats.last_run ? new Date(scraper.status.stats.last_run).toLocaleDateString('de-DE') : 'Nie'} subtitle={scraper.status?.stats.errors ? `${scraper.status.stats.errors} Fehler` : undefined} icon="🕐" />
</div>
{(scraper.scraping || scraper.status?.status === 'running') && (
<div className="mb-6 p-4 bg-blue-50 border border-blue-200 rounded-lg">
<div className="flex items-center">
<div className="animate-spin rounded-full h-4 w-4 border-2 border-blue-600 border-t-transparent mr-3" />
<div>
<p className="font-medium text-blue-800">Scraper laeuft</p>
{scraper.status?.current_source && (<p className="text-sm text-blue-600">Aktuell: {scraper.status.current_source}</p>)}
</div>
</div>
</div>
)}
<div className="bg-white rounded-xl shadow-sm border border-slate-200 mb-6">
<div className="border-b border-slate-200">
<nav className="flex -mb-px">
{[
{ id: 'sources' as const, name: 'Quellen', icon: '📚' },
{ id: 'pdf' as const, name: 'PDF-Extraktion', icon: '📄' },
{ id: 'status' as const, name: 'Status', icon: '📊' },
{ id: 'logs' as const, name: 'Ergebnisse', icon: '📝' },
].map(tab => (
<button key={tab.id} onClick={() => scraper.setActiveTab(tab.id)} className={`px-6 py-4 text-sm font-medium border-b-2 transition-colors ${scraper.activeTab === tab.id ? 'border-primary-600 text-primary-600' : 'border-transparent text-slate-500 hover:text-slate-700 hover:border-slate-300'}`}>
<span className="mr-2">{tab.icon}</span>{tab.name}
</button>
))}
</nav>
</div>
<div className="p-6">
<ScraperTabs
activeTab={scraper.activeTab}
sources={scraper.sources}
pdfDocuments={scraper.pdfDocuments}
status={scraper.status}
scraping={scraper.scraping}
extracting={scraper.extracting}
results={scraper.results}
pdfResult={scraper.pdfResult}
handleScrapeAll={scraper.handleScrapeAll}
handleScrapeSingle={scraper.handleScrapeSingle}
handleExtractPdf={scraper.handleExtractPdf}
/>
</div>
</div>
</>
)}
<div className="mt-8 border-t border-slate-200 pt-8">
<SystemInfoSection config={SYSTEM_INFO_CONFIGS.complianceScraper || {
title: 'Compliance Scraper',
description: 'Regulation & Requirements Extraction Service',
version: '1.0.0',
features: ['EUR-Lex HTML Parsing', 'BSI-TR PDF Extraction', 'Automatic Requirement Mapping', 'Incremental Updates'],
technicalDetails: { 'Backend': 'Python/FastAPI', 'HTTP Client': 'httpx async', 'HTML Parser': 'BeautifulSoup4', 'PDF Parser': 'PyMuPDF (optional)', 'Database': 'PostgreSQL' },
}} />
</div>
</AdminLayout>
)
}