Services: Admin-Lehrer, Backend-Lehrer, Studio v2, Website, Klausur-Service, School-Service, Voice-Service, Geo-Service, BreakPilot Drive, Agent-Core Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
790 lines
33 KiB
TypeScript
790 lines
33 KiB
TypeScript
'use client'
|
|
|
|
/**
|
|
* Compliance Regulation Scraper Admin Page
|
|
*
|
|
* Manages the extraction of requirements and audit aspects from:
|
|
* - EUR-Lex regulations (GDPR, AI Act, CRA, NIS2, etc.)
|
|
* - BSI Technical Guidelines (TR-03161)
|
|
* - German laws
|
|
*
|
|
* Similar pattern to edu-search and zeugnisse-crawler.
|
|
*/
|
|
|
|
import { useState, useEffect, useCallback } from 'react'
|
|
import AdminLayout from '@/components/admin/AdminLayout'
|
|
import SystemInfoSection, { SYSTEM_INFO_CONFIGS } from '@/components/admin/SystemInfoSection'
|
|
|
|
// Types
|
|
interface Source {
|
|
code: string
|
|
url: string
|
|
source_type: string
|
|
regulation_type: string
|
|
has_data: boolean
|
|
requirement_count: number
|
|
}
|
|
|
|
interface ScraperStatus {
|
|
status: 'idle' | 'running' | 'completed' | 'error'
|
|
current_source: string | null
|
|
last_error: string | null
|
|
stats: {
|
|
sources_processed: number
|
|
requirements_extracted: number
|
|
errors: number
|
|
last_run: string | null
|
|
}
|
|
known_sources: string[]
|
|
}
|
|
|
|
interface ScrapeResult {
|
|
code: string
|
|
status: string
|
|
requirements_extracted?: number
|
|
reason?: string
|
|
error?: string
|
|
}
|
|
|
|
interface PDFDocument {
|
|
code: string
|
|
name: string
|
|
description: string
|
|
expected_aspects: string
|
|
available: boolean
|
|
}
|
|
|
|
interface PDFExtractionResult {
|
|
success: boolean
|
|
source_document: string
|
|
total_aspects: number
|
|
requirements_created: number
|
|
statistics: {
|
|
by_category: Record<string, number>
|
|
by_requirement_level: Record<string, number>
|
|
}
|
|
}
|
|
|
|
const BACKEND_URL = process.env.NEXT_PUBLIC_BACKEND_URL || 'http://localhost:8000'
|
|
|
|
// Source type badges
|
|
const sourceTypeBadge: Record<string, { label: string; color: string }> = {
|
|
eur_lex: { label: 'EUR-Lex', color: 'bg-blue-100 text-blue-800' },
|
|
bsi_pdf: { label: 'BSI PDF', color: 'bg-green-100 text-green-800' },
|
|
gesetze_im_internet: { label: 'Gesetze', color: 'bg-yellow-100 text-yellow-800' },
|
|
manual: { label: 'Manuell', color: 'bg-gray-100 text-gray-800' },
|
|
}
|
|
|
|
// Regulation type badges
|
|
const regulationTypeBadge: Record<string, { label: string; color: string; icon: string }> = {
|
|
eu_regulation: { label: 'EU-Verordnung', color: 'bg-indigo-100 text-indigo-800', icon: '🇪🇺' },
|
|
eu_directive: { label: 'EU-Richtlinie', color: 'bg-purple-100 text-purple-800', icon: '📜' },
|
|
de_law: { label: 'DE-Gesetz', color: 'bg-yellow-100 text-yellow-800', icon: '🇩🇪' },
|
|
bsi_standard: { label: 'BSI-Standard', color: 'bg-green-100 text-green-800', icon: '🔒' },
|
|
industry_standard: { label: 'Standard', color: 'bg-gray-100 text-gray-800', icon: '📋' },
|
|
}
|
|
|
|
export default function ComplianceScraperPage() {
|
|
const [activeTab, setActiveTab] = useState<'sources' | 'pdf' | 'status' | 'logs'>('sources')
|
|
const [sources, setSources] = useState<Source[]>([])
|
|
const [pdfDocuments, setPdfDocuments] = useState<PDFDocument[]>([])
|
|
const [status, setStatus] = useState<ScraperStatus | null>(null)
|
|
const [loading, setLoading] = useState(true)
|
|
const [scraping, setScraping] = useState(false)
|
|
const [extracting, setExtracting] = useState(false)
|
|
const [error, setError] = useState<string | null>(null)
|
|
const [success, setSuccess] = useState<string | null>(null)
|
|
const [results, setResults] = useState<ScrapeResult[]>([])
|
|
const [pdfResult, setPdfResult] = useState<PDFExtractionResult | null>(null)
|
|
|
|
// Fetch sources
|
|
const fetchSources = useCallback(async () => {
|
|
try {
|
|
const res = await fetch(`${BACKEND_URL}/api/v1/compliance/scraper/sources`)
|
|
if (res.ok) {
|
|
const data = await res.json()
|
|
setSources(data.sources || [])
|
|
}
|
|
} catch (err) {
|
|
console.error('Failed to fetch sources:', err)
|
|
}
|
|
}, [])
|
|
|
|
// Fetch PDF documents
|
|
const fetchPdfDocuments = useCallback(async () => {
|
|
try {
|
|
const res = await fetch(`${BACKEND_URL}/api/v1/compliance/scraper/pdf-documents`)
|
|
if (res.ok) {
|
|
const data = await res.json()
|
|
setPdfDocuments(data.documents || [])
|
|
}
|
|
} catch (err) {
|
|
console.error('Failed to fetch PDF documents:', err)
|
|
}
|
|
}, [])
|
|
|
|
// Fetch status
|
|
const fetchStatus = useCallback(async () => {
|
|
try {
|
|
const res = await fetch(`${BACKEND_URL}/api/v1/compliance/scraper/status`)
|
|
if (res.ok) {
|
|
const data = await res.json()
|
|
setStatus(data)
|
|
}
|
|
} catch (err) {
|
|
console.error('Failed to fetch status:', err)
|
|
}
|
|
}, [])
|
|
|
|
// Initial load
|
|
useEffect(() => {
|
|
const loadData = async () => {
|
|
setLoading(true)
|
|
await Promise.all([fetchSources(), fetchStatus(), fetchPdfDocuments()])
|
|
setLoading(false)
|
|
}
|
|
loadData()
|
|
}, [fetchSources, fetchStatus, fetchPdfDocuments])
|
|
|
|
// Poll status while scraping
|
|
useEffect(() => {
|
|
if (scraping) {
|
|
const interval = setInterval(fetchStatus, 2000)
|
|
return () => clearInterval(interval)
|
|
}
|
|
}, [scraping, fetchStatus])
|
|
|
|
// Scrape all sources
|
|
const handleScrapeAll = async () => {
|
|
setScraping(true)
|
|
setError(null)
|
|
setSuccess(null)
|
|
setResults([])
|
|
|
|
try {
|
|
const res = await fetch(`${BACKEND_URL}/api/v1/compliance/scraper/scrape-all`, {
|
|
method: 'POST',
|
|
})
|
|
|
|
if (!res.ok) {
|
|
const data = await res.json()
|
|
throw new Error(data.detail || 'Scraping fehlgeschlagen')
|
|
}
|
|
|
|
const data = await res.json()
|
|
setResults([
|
|
...data.results.success,
|
|
...data.results.failed,
|
|
...data.results.skipped,
|
|
])
|
|
setSuccess(`Scraping abgeschlossen: ${data.results.success.length} erfolgreich, ${data.results.skipped.length} uebersprungen, ${data.results.failed.length} fehlgeschlagen`)
|
|
|
|
// Refresh sources
|
|
await fetchSources()
|
|
} catch (err: any) {
|
|
setError(err.message)
|
|
} finally {
|
|
setScraping(false)
|
|
}
|
|
}
|
|
|
|
// Scrape single source
|
|
const handleScrapeSingle = async (code: string, force: boolean = false) => {
|
|
setScraping(true)
|
|
setError(null)
|
|
setSuccess(null)
|
|
|
|
try {
|
|
const res = await fetch(`${BACKEND_URL}/api/v1/compliance/scraper/scrape/${code}?force=${force}`, {
|
|
method: 'POST',
|
|
})
|
|
|
|
if (!res.ok) {
|
|
const data = await res.json()
|
|
throw new Error(data.detail || 'Scraping fehlgeschlagen')
|
|
}
|
|
|
|
const data = await res.json()
|
|
|
|
if (data.status === 'skipped') {
|
|
setSuccess(`${code}: Bereits vorhanden (${data.requirement_count} Anforderungen)`)
|
|
} else {
|
|
setSuccess(`${code}: ${data.requirements_extracted} Anforderungen extrahiert`)
|
|
}
|
|
|
|
// Refresh sources
|
|
await fetchSources()
|
|
} catch (err: any) {
|
|
setError(err.message)
|
|
} finally {
|
|
setScraping(false)
|
|
}
|
|
}
|
|
|
|
// Extract PDF
|
|
const handleExtractPdf = async (code: string, saveToDb: boolean = true, force: boolean = false) => {
|
|
setExtracting(true)
|
|
setError(null)
|
|
setSuccess(null)
|
|
setPdfResult(null)
|
|
|
|
try {
|
|
const res = await fetch(`${BACKEND_URL}/api/v1/compliance/scraper/extract-pdf`, {
|
|
method: 'POST',
|
|
headers: {
|
|
'Content-Type': 'application/json',
|
|
},
|
|
body: JSON.stringify({
|
|
document_code: code,
|
|
save_to_db: saveToDb,
|
|
force: force,
|
|
}),
|
|
})
|
|
|
|
if (!res.ok) {
|
|
const data = await res.json()
|
|
throw new Error(data.detail || 'PDF-Extraktion fehlgeschlagen')
|
|
}
|
|
|
|
const data: PDFExtractionResult = await res.json()
|
|
setPdfResult(data)
|
|
|
|
if (data.success) {
|
|
setSuccess(`${code}: ${data.total_aspects} Pruefaspekte extrahiert, ${data.requirements_created} Requirements erstellt`)
|
|
}
|
|
|
|
// Refresh sources
|
|
await fetchSources()
|
|
} catch (err: any) {
|
|
setError(err.message)
|
|
} finally {
|
|
setExtracting(false)
|
|
}
|
|
}
|
|
|
|
// Clear messages
|
|
useEffect(() => {
|
|
if (success) {
|
|
const timer = setTimeout(() => setSuccess(null), 5000)
|
|
return () => clearTimeout(timer)
|
|
}
|
|
}, [success])
|
|
|
|
useEffect(() => {
|
|
if (error) {
|
|
const timer = setTimeout(() => setError(null), 10000)
|
|
return () => clearTimeout(timer)
|
|
}
|
|
}, [error])
|
|
|
|
// Stats cards
|
|
const StatsCard = ({ title, value, subtitle, icon }: { title: string; value: number | string; subtitle?: string; icon: string }) => (
|
|
<div className="bg-white rounded-lg shadow-sm p-5 border border-slate-200">
|
|
<div className="flex items-center">
|
|
<div className="flex-shrink-0">
|
|
<span className="text-2xl">{icon}</span>
|
|
</div>
|
|
<div className="ml-4">
|
|
<p className="text-sm font-medium text-slate-500">{title}</p>
|
|
<p className="text-2xl font-semibold text-slate-900">{value}</p>
|
|
{subtitle && <p className="text-xs text-slate-400">{subtitle}</p>}
|
|
</div>
|
|
</div>
|
|
</div>
|
|
)
|
|
|
|
return (
|
|
<AdminLayout
|
|
title="Compliance Scraper"
|
|
description="Extrahiert Anforderungen aus EU-Regulierungen, BSI-Standards und Gesetzen"
|
|
>
|
|
{/* Loading */}
|
|
{loading && (
|
|
<div className="flex items-center justify-center py-12">
|
|
<svg className="w-8 h-8 animate-spin text-primary-600" fill="none" viewBox="0 0 24 24">
|
|
<circle className="opacity-25" cx="12" cy="12" r="10" stroke="currentColor" strokeWidth="4" />
|
|
<path className="opacity-75" fill="currentColor" d="M4 12a8 8 0 018-8V0C5.373 0 0 5.373 0 12h4z" />
|
|
</svg>
|
|
<span className="ml-3 text-slate-600">Lade Quellen...</span>
|
|
</div>
|
|
)}
|
|
|
|
{!loading && (
|
|
<>
|
|
{/* Messages */}
|
|
{error && (
|
|
<div className="mb-4 bg-red-50 border border-red-200 text-red-700 px-4 py-3 rounded-lg">
|
|
{error}
|
|
</div>
|
|
)}
|
|
{success && (
|
|
<div className="mb-4 bg-green-50 border border-green-200 text-green-700 px-4 py-3 rounded-lg">
|
|
{success}
|
|
</div>
|
|
)}
|
|
|
|
{/* Stats Cards */}
|
|
<div className="grid grid-cols-1 md:grid-cols-2 lg:grid-cols-4 gap-4 mb-6">
|
|
<StatsCard
|
|
title="Bekannte Quellen"
|
|
value={sources.length}
|
|
icon="📚"
|
|
/>
|
|
<StatsCard
|
|
title="Mit Daten"
|
|
value={sources.filter(s => s.has_data).length}
|
|
subtitle={`${sources.length - sources.filter(s => s.has_data).length} noch zu scrapen`}
|
|
icon="✅"
|
|
/>
|
|
<StatsCard
|
|
title="Anforderungen gesamt"
|
|
value={sources.reduce((acc, s) => acc + s.requirement_count, 0)}
|
|
icon="📋"
|
|
/>
|
|
<StatsCard
|
|
title="Letzter Lauf"
|
|
value={status?.stats.last_run ? new Date(status.stats.last_run).toLocaleDateString('de-DE') : 'Nie'}
|
|
subtitle={status?.stats.errors ? `${status.stats.errors} Fehler` : undefined}
|
|
icon="🕐"
|
|
/>
|
|
</div>
|
|
|
|
{/* Scraper Status Bar */}
|
|
{(scraping || status?.status === 'running') && (
|
|
<div className="mb-6 p-4 bg-blue-50 border border-blue-200 rounded-lg">
|
|
<div className="flex items-center">
|
|
<div className="animate-spin rounded-full h-4 w-4 border-2 border-blue-600 border-t-transparent mr-3" />
|
|
<div>
|
|
<p className="font-medium text-blue-800">Scraper laeuft</p>
|
|
{status?.current_source && (
|
|
<p className="text-sm text-blue-600">Aktuell: {status.current_source}</p>
|
|
)}
|
|
</div>
|
|
</div>
|
|
</div>
|
|
)}
|
|
|
|
{/* Tabs */}
|
|
<div className="bg-white rounded-xl shadow-sm border border-slate-200 mb-6">
|
|
<div className="border-b border-slate-200">
|
|
<nav className="flex -mb-px">
|
|
{[
|
|
{ id: 'sources', name: 'Quellen', icon: '📚' },
|
|
{ id: 'pdf', name: 'PDF-Extraktion', icon: '📄' },
|
|
{ id: 'status', name: 'Status', icon: '📊' },
|
|
{ id: 'logs', name: 'Ergebnisse', icon: '📝' },
|
|
].map(tab => (
|
|
<button
|
|
key={tab.id}
|
|
onClick={() => setActiveTab(tab.id as typeof activeTab)}
|
|
className={`px-6 py-4 text-sm font-medium border-b-2 transition-colors ${
|
|
activeTab === tab.id
|
|
? 'border-primary-600 text-primary-600'
|
|
: 'border-transparent text-slate-500 hover:text-slate-700 hover:border-slate-300'
|
|
}`}
|
|
>
|
|
<span className="mr-2">{tab.icon}</span>
|
|
{tab.name}
|
|
</button>
|
|
))}
|
|
</nav>
|
|
</div>
|
|
|
|
<div className="p-6">
|
|
{/* Sources Tab */}
|
|
{activeTab === 'sources' && (
|
|
<div>
|
|
{/* Header */}
|
|
<div className="flex justify-between items-center mb-6">
|
|
<div>
|
|
<h3 className="text-lg font-semibold text-slate-900">Regulierungsquellen</h3>
|
|
<p className="text-sm text-slate-500">EU-Lex, BSI-TR und deutsche Gesetze</p>
|
|
</div>
|
|
<button
|
|
onClick={handleScrapeAll}
|
|
disabled={scraping}
|
|
className="px-4 py-2 bg-primary-600 text-white rounded-lg hover:bg-primary-700 transition-colors disabled:opacity-50 disabled:cursor-not-allowed flex items-center gap-2"
|
|
>
|
|
{scraping ? (
|
|
<>
|
|
<svg className="w-4 h-4 animate-spin" fill="none" viewBox="0 0 24 24">
|
|
<circle className="opacity-25" cx="12" cy="12" r="10" stroke="currentColor" strokeWidth="4" />
|
|
<path className="opacity-75" fill="currentColor" d="M4 12a8 8 0 018-8V0C5.373 0 0 5.373 0 12h4z" />
|
|
</svg>
|
|
Laeuft...
|
|
</>
|
|
) : (
|
|
<>
|
|
<svg className="w-4 h-4" fill="none" stroke="currentColor" viewBox="0 0 24 24">
|
|
<path strokeLinecap="round" strokeLinejoin="round" strokeWidth={2} d="M4 4v5h.582m15.356 2A8.001 8.001 0 004.582 9m0 0H9m11 11v-5h-.581m0 0a8.003 8.003 0 01-15.357-2m15.357 2H15" />
|
|
</svg>
|
|
Alle Quellen scrapen
|
|
</>
|
|
)}
|
|
</button>
|
|
</div>
|
|
|
|
{/* Sources by Type */}
|
|
<div className="space-y-6">
|
|
{/* EU Regulations */}
|
|
<div>
|
|
<h4 className="text-sm font-medium text-slate-700 mb-3 flex items-center gap-2">
|
|
<span className="text-lg">🇪🇺</span> EU-Regulierungen (EUR-Lex)
|
|
</h4>
|
|
<div className="grid gap-3">
|
|
{sources.filter(s => s.source_type === 'eur_lex').map(source => (
|
|
<SourceCard key={source.code} source={source} onScrape={handleScrapeSingle} scraping={scraping} />
|
|
))}
|
|
</div>
|
|
</div>
|
|
|
|
{/* BSI Standards */}
|
|
<div>
|
|
<h4 className="text-sm font-medium text-slate-700 mb-3 flex items-center gap-2">
|
|
<span className="text-lg">🔒</span> BSI Technical Guidelines
|
|
</h4>
|
|
<div className="grid gap-3">
|
|
{sources.filter(s => s.source_type === 'bsi_pdf').map(source => (
|
|
<SourceCard key={source.code} source={source} onScrape={handleScrapeSingle} scraping={scraping} />
|
|
))}
|
|
</div>
|
|
</div>
|
|
</div>
|
|
</div>
|
|
)}
|
|
|
|
{/* PDF Extraction Tab */}
|
|
{activeTab === 'pdf' && (
|
|
<div>
|
|
<div className="mb-6">
|
|
<h3 className="text-lg font-semibold text-slate-900">PDF-Extraktion (PyMuPDF)</h3>
|
|
<p className="text-sm text-slate-500">
|
|
Extrahiert ALLE Pruefaspekte aus BSI-TR-03161 PDFs mit Regex-Pattern-Matching
|
|
</p>
|
|
</div>
|
|
|
|
{/* PDF Documents */}
|
|
<div className="space-y-4">
|
|
{pdfDocuments.map(doc => (
|
|
<div key={doc.code} className="bg-slate-50 rounded-lg p-4 border border-slate-200">
|
|
<div className="flex items-center justify-between">
|
|
<div className="flex items-center gap-3">
|
|
<span className="text-3xl">📄</span>
|
|
<div>
|
|
<div className="flex items-center gap-2">
|
|
<span className="font-semibold text-slate-900">{doc.code}</span>
|
|
<span className={`px-2 py-0.5 rounded text-xs font-medium ${
|
|
doc.available ? 'bg-green-100 text-green-700' : 'bg-red-100 text-red-700'
|
|
}`}>
|
|
{doc.available ? 'Verfuegbar' : 'Nicht gefunden'}
|
|
</span>
|
|
</div>
|
|
<div className="text-sm text-slate-600">{doc.name}</div>
|
|
<div className="text-xs text-slate-500">{doc.description}</div>
|
|
<div className="text-xs text-slate-400 mt-1">
|
|
Erwartete Pruefaspekte: {doc.expected_aspects}
|
|
</div>
|
|
</div>
|
|
</div>
|
|
|
|
<div className="flex gap-2">
|
|
<button
|
|
onClick={() => handleExtractPdf(doc.code, true, false)}
|
|
disabled={extracting || !doc.available}
|
|
className="px-4 py-2 bg-primary-600 text-white rounded-lg hover:bg-primary-700 transition-colors disabled:opacity-50 disabled:cursor-not-allowed flex items-center gap-2"
|
|
>
|
|
{extracting ? (
|
|
<>
|
|
<svg className="w-4 h-4 animate-spin" fill="none" viewBox="0 0 24 24">
|
|
<circle className="opacity-25" cx="12" cy="12" r="10" stroke="currentColor" strokeWidth="4" />
|
|
<path className="opacity-75" fill="currentColor" d="M4 12a8 8 0 018-8V0C5.373 0 0 5.373 0 12h4z" />
|
|
</svg>
|
|
Extrahiere...
|
|
</>
|
|
) : (
|
|
<>
|
|
<svg className="w-4 h-4" fill="none" stroke="currentColor" viewBox="0 0 24 24">
|
|
<path strokeLinecap="round" strokeLinejoin="round" strokeWidth={2} d="M9 12h6m-6 4h6m2 5H7a2 2 0 01-2-2V5a2 2 0 012-2h5.586a1 1 0 01.707.293l5.414 5.414a1 1 0 01.293.707V19a2 2 0 01-2 2z" />
|
|
</svg>
|
|
Extrahieren
|
|
</>
|
|
)}
|
|
</button>
|
|
<button
|
|
onClick={() => handleExtractPdf(doc.code, true, true)}
|
|
disabled={extracting || !doc.available}
|
|
className="px-3 py-2 bg-orange-100 text-orange-700 rounded-lg hover:bg-orange-200 transition-colors disabled:opacity-50 disabled:cursor-not-allowed"
|
|
title="Force: Loescht vorhandene und extrahiert neu"
|
|
>
|
|
Force
|
|
</button>
|
|
</div>
|
|
</div>
|
|
</div>
|
|
))}
|
|
</div>
|
|
|
|
{/* Last Extraction Result */}
|
|
{pdfResult && (
|
|
<div className="mt-6 bg-green-50 rounded-lg p-4 border border-green-200">
|
|
<h4 className="font-semibold text-green-800 mb-3">Letztes Extraktions-Ergebnis</h4>
|
|
<div className="grid grid-cols-3 gap-4 mb-4">
|
|
<div className="text-center p-3 bg-white rounded-lg">
|
|
<div className="text-2xl font-bold text-green-700">{pdfResult.total_aspects}</div>
|
|
<div className="text-sm text-slate-500">Pruefaspekte gefunden</div>
|
|
</div>
|
|
<div className="text-center p-3 bg-white rounded-lg">
|
|
<div className="text-2xl font-bold text-blue-700">{pdfResult.requirements_created}</div>
|
|
<div className="text-sm text-slate-500">Requirements erstellt</div>
|
|
</div>
|
|
<div className="text-center p-3 bg-white rounded-lg">
|
|
<div className="text-2xl font-bold text-slate-700">{Object.keys(pdfResult.statistics.by_category || {}).length}</div>
|
|
<div className="text-sm text-slate-500">Kategorien</div>
|
|
</div>
|
|
</div>
|
|
|
|
{/* Category Breakdown */}
|
|
{pdfResult.statistics.by_category && Object.keys(pdfResult.statistics.by_category).length > 0 && (
|
|
<div>
|
|
<h5 className="text-sm font-medium text-slate-700 mb-2">Nach Kategorie:</h5>
|
|
<div className="flex flex-wrap gap-2">
|
|
{Object.entries(pdfResult.statistics.by_category).map(([cat, count]) => (
|
|
<span key={cat} className="px-2 py-1 bg-white rounded text-xs text-slate-600">
|
|
{cat}: <strong>{count}</strong>
|
|
</span>
|
|
))}
|
|
</div>
|
|
</div>
|
|
)}
|
|
</div>
|
|
)}
|
|
|
|
{/* Info Box */}
|
|
<div className="mt-6 bg-blue-50 rounded-lg p-4 border border-blue-200">
|
|
<h4 className="font-semibold text-blue-800 mb-2">Wie funktioniert die PDF-Extraktion?</h4>
|
|
<ul className="text-sm text-blue-700 space-y-1">
|
|
<li>• <strong>PyMuPDF (fitz)</strong> liest den PDF-Text</li>
|
|
<li>• <strong>Regex-Pattern</strong> finden Aspekte wie O.Auth_1, O.Sess_2, T.Network_1</li>
|
|
<li>• <strong>Kontextanalyse</strong> extrahiert Titel, Kategorie und Anforderungsstufe (MUSS/SOLL/KANN)</li>
|
|
<li>• <strong>Automatische Speicherung</strong> erstellt Requirements in der Datenbank</li>
|
|
</ul>
|
|
</div>
|
|
</div>
|
|
)}
|
|
|
|
{/* Status Tab */}
|
|
{activeTab === 'status' && status && (
|
|
<div className="space-y-6">
|
|
{/* Current Status */}
|
|
<div className="bg-slate-50 rounded-lg p-6">
|
|
<div className="flex items-center justify-between mb-4">
|
|
<div>
|
|
<h3 className="text-lg font-semibold text-slate-900">Scraper-Status</h3>
|
|
<p className="text-sm text-slate-500">
|
|
Letzter Lauf: {status.stats.last_run ? new Date(status.stats.last_run).toLocaleString('de-DE') : 'Noch nie'}
|
|
</p>
|
|
</div>
|
|
<div className={`px-3 py-1.5 rounded-full text-sm font-medium ${
|
|
status.status === 'running' ? 'bg-blue-100 text-blue-700' :
|
|
status.status === 'error' ? 'bg-red-100 text-red-700' :
|
|
status.status === 'completed' ? 'bg-green-100 text-green-700' :
|
|
'bg-gray-100 text-gray-700'
|
|
}`}>
|
|
{status.status === 'running' ? '🔄 Laeuft' :
|
|
status.status === 'error' ? '❌ Fehler' :
|
|
status.status === 'completed' ? '✅ Abgeschlossen' :
|
|
'⏸️ Bereit'}
|
|
</div>
|
|
</div>
|
|
|
|
<div className="grid grid-cols-3 gap-4">
|
|
<div className="text-center p-4 bg-white rounded-lg">
|
|
<div className="text-2xl font-bold text-slate-900">{status.stats.sources_processed}</div>
|
|
<div className="text-sm text-slate-500">Quellen verarbeitet</div>
|
|
</div>
|
|
<div className="text-center p-4 bg-white rounded-lg">
|
|
<div className="text-2xl font-bold text-green-600">{status.stats.requirements_extracted}</div>
|
|
<div className="text-sm text-slate-500">Anforderungen extrahiert</div>
|
|
</div>
|
|
<div className="text-center p-4 bg-white rounded-lg">
|
|
<div className="text-2xl font-bold text-red-600">{status.stats.errors}</div>
|
|
<div className="text-sm text-slate-500">Fehler</div>
|
|
</div>
|
|
</div>
|
|
|
|
{status.last_error && (
|
|
<div className="mt-4 p-3 bg-red-50 rounded-lg text-sm text-red-700">
|
|
<strong>Letzter Fehler:</strong> {status.last_error}
|
|
</div>
|
|
)}
|
|
</div>
|
|
|
|
{/* Process Description */}
|
|
<div className="bg-white border border-slate-200 rounded-lg p-6">
|
|
<h4 className="font-semibold text-slate-900 mb-4">Wie funktioniert der Scraper?</h4>
|
|
<div className="space-y-3 text-sm text-slate-600">
|
|
<div className="flex items-start gap-3">
|
|
<div className="w-6 h-6 bg-blue-100 rounded-full flex items-center justify-center text-blue-600 font-bold">1</div>
|
|
<div>
|
|
<strong>EUR-Lex Abruf</strong>: Holt HTML-Version der EU-Verordnung, extrahiert Artikel und Absaetze
|
|
</div>
|
|
</div>
|
|
<div className="flex items-start gap-3">
|
|
<div className="w-6 h-6 bg-blue-100 rounded-full flex items-center justify-center text-blue-600 font-bold">2</div>
|
|
<div>
|
|
<strong>BSI-TR Parsing</strong>: Extrahiert Pruefaspekte (O.Auth_1, O.Sess_1, etc.) aus den TR-Dokumenten
|
|
</div>
|
|
</div>
|
|
<div className="flex items-start gap-3">
|
|
<div className="w-6 h-6 bg-blue-100 rounded-full flex items-center justify-center text-blue-600 font-bold">3</div>
|
|
<div>
|
|
<strong>Datenbank-Speicherung</strong>: Jede Anforderung wird als Requirement in der Compliance-DB gespeichert
|
|
</div>
|
|
</div>
|
|
<div className="flex items-start gap-3">
|
|
<div className="w-6 h-6 bg-green-100 rounded-full flex items-center justify-center text-green-600 font-bold">✓</div>
|
|
<div>
|
|
<strong>Audit-Workspace</strong>: Anforderungen koennen mit Implementierungsdetails angereichert werden
|
|
</div>
|
|
</div>
|
|
</div>
|
|
</div>
|
|
</div>
|
|
)}
|
|
|
|
{/* Results Tab */}
|
|
{activeTab === 'logs' && (
|
|
<div>
|
|
<h3 className="text-lg font-semibold text-slate-900 mb-4">Letzte Ergebnisse</h3>
|
|
|
|
{results.length === 0 ? (
|
|
<div className="text-center py-12 text-slate-500">
|
|
Keine Ergebnisse vorhanden. Starte einen Scrape-Vorgang.
|
|
</div>
|
|
) : (
|
|
<div className="space-y-2">
|
|
{results.map((result, idx) => (
|
|
<div
|
|
key={idx}
|
|
className={`p-3 rounded-lg flex items-center justify-between ${
|
|
result.error ? 'bg-red-50' :
|
|
result.reason ? 'bg-yellow-50' :
|
|
'bg-green-50'
|
|
}`}
|
|
>
|
|
<div className="flex items-center gap-3">
|
|
<span className="text-lg">
|
|
{result.error ? '❌' : result.reason ? '⏭️' : '✅'}
|
|
</span>
|
|
<span className="font-medium">{result.code}</span>
|
|
<span className="text-sm text-slate-500">
|
|
{result.error || result.reason || `${result.requirements_extracted} Anforderungen`}
|
|
</span>
|
|
</div>
|
|
</div>
|
|
))}
|
|
</div>
|
|
)}
|
|
</div>
|
|
)}
|
|
</div>
|
|
</div>
|
|
</>
|
|
)}
|
|
|
|
{/* System Info Section */}
|
|
<div className="mt-8 border-t border-slate-200 pt-8">
|
|
<SystemInfoSection config={SYSTEM_INFO_CONFIGS.complianceScraper || {
|
|
title: 'Compliance Scraper',
|
|
description: 'Regulation & Requirements Extraction Service',
|
|
version: '1.0.0',
|
|
features: [
|
|
'EUR-Lex HTML Parsing',
|
|
'BSI-TR PDF Extraction',
|
|
'Automatic Requirement Mapping',
|
|
'Incremental Updates',
|
|
],
|
|
technicalDetails: {
|
|
'Backend': 'Python/FastAPI',
|
|
'HTTP Client': 'httpx async',
|
|
'HTML Parser': 'BeautifulSoup4',
|
|
'PDF Parser': 'PyMuPDF (optional)',
|
|
'Database': 'PostgreSQL',
|
|
},
|
|
}} />
|
|
</div>
|
|
</AdminLayout>
|
|
)
|
|
}
|
|
|
|
// Source Card Component
|
|
function SourceCard({
|
|
source,
|
|
onScrape,
|
|
scraping
|
|
}: {
|
|
source: Source
|
|
onScrape: (code: string, force: boolean) => void
|
|
scraping: boolean
|
|
}) {
|
|
const regType = regulationTypeBadge[source.regulation_type] || regulationTypeBadge.industry_standard
|
|
const srcType = sourceTypeBadge[source.source_type] || sourceTypeBadge.manual
|
|
|
|
return (
|
|
<div className="bg-white border border-slate-200 rounded-lg p-4 hover:shadow-sm transition-shadow">
|
|
<div className="flex items-center justify-between">
|
|
<div className="flex items-center gap-3">
|
|
<span className="text-2xl">{regType.icon}</span>
|
|
<div>
|
|
<div className="flex items-center gap-2">
|
|
<span className="font-semibold text-slate-900">{source.code}</span>
|
|
<span className={`px-2 py-0.5 rounded text-xs font-medium ${regType.color}`}>
|
|
{regType.label}
|
|
</span>
|
|
<span className={`px-2 py-0.5 rounded text-xs font-medium ${srcType.color}`}>
|
|
{srcType.label}
|
|
</span>
|
|
</div>
|
|
<div className="text-sm text-slate-500 truncate max-w-md" title={source.url}>
|
|
{source.url.length > 60 ? source.url.substring(0, 60) + '...' : source.url}
|
|
</div>
|
|
</div>
|
|
</div>
|
|
|
|
<div className="flex items-center gap-3">
|
|
{source.has_data ? (
|
|
<span className="px-3 py-1 bg-green-100 text-green-700 rounded-full text-sm font-medium">
|
|
{source.requirement_count} Anforderungen
|
|
</span>
|
|
) : (
|
|
<span className="px-3 py-1 bg-gray-100 text-gray-500 rounded-full text-sm">
|
|
Keine Daten
|
|
</span>
|
|
)}
|
|
|
|
<div className="flex gap-1">
|
|
<button
|
|
onClick={() => onScrape(source.code, false)}
|
|
disabled={scraping}
|
|
className="px-3 py-1.5 text-sm bg-slate-100 text-slate-700 rounded hover:bg-slate-200 transition-colors disabled:opacity-50 disabled:cursor-not-allowed"
|
|
title="Scrapen (ueberspringt vorhandene)"
|
|
>
|
|
Scrapen
|
|
</button>
|
|
{source.has_data && (
|
|
<button
|
|
onClick={() => onScrape(source.code, true)}
|
|
disabled={scraping}
|
|
className="px-3 py-1.5 text-sm bg-orange-100 text-orange-700 rounded hover:bg-orange-200 transition-colors disabled:opacity-50 disabled:cursor-not-allowed"
|
|
title="Force: Loescht vorhandene Daten und scraped neu"
|
|
>
|
|
Force
|
|
</button>
|
|
)}
|
|
</div>
|
|
</div>
|
|
</div>
|
|
</div>
|
|
)
|
|
}
|