[split-required] Split 700-870 LOC files across all services

backend-lehrer (11 files):
- llm_gateway/routes/schools.py (867 → 5), recording_api.py (848 → 6)
- messenger_api.py (840 → 5), print_generator.py (824 → 5)
- unit_analytics_api.py (751 → 5), classroom/routes/context.py (726 → 4)
- llm_gateway/routes/edu_search_seeds.py (710 → 4)

klausur-service (12 files):
- ocr_labeling_api.py (845 → 4), metrics_db.py (833 → 4)
- legal_corpus_api.py (790 → 4), page_crop.py (758 → 3)
- mail/ai_service.py (747 → 4), github_crawler.py (767 → 3)
- trocr_service.py (730 → 4), full_compliance_pipeline.py (723 → 4)
- dsfa_rag_api.py (715 → 4), ocr_pipeline_auto.py (705 → 4)

website (6 pages):
- audit-checklist (867 → 8), content (806 → 6)
- screen-flow (790 → 4), scraper (789 → 5)
- zeugnisse (776 → 5), modules (745 → 4)

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
Benjamin Admin
2026-04-25 08:01:18 +02:00
parent b6983ab1dc
commit 34da9f4cda
106 changed files with 16500 additions and 16947 deletions

View File

@@ -0,0 +1,162 @@
'use client'
import {
Source, ScraperStatus, ScrapeResult,
PDFDocument, PDFExtractionResult,
} from './types'
import SourceCard from './SourceCard'
interface ScraperTabsProps {
activeTab: string
sources: Source[]
pdfDocuments: PDFDocument[]
status: ScraperStatus | null
scraping: boolean
extracting: boolean
results: ScrapeResult[]
pdfResult: PDFExtractionResult | null
handleScrapeAll: () => void
handleScrapeSingle: (code: string, force: boolean) => void
handleExtractPdf: (code: string, saveToDb: boolean, force: boolean) => void
}
export default function ScraperTabs(props: ScraperTabsProps) {
const { activeTab, sources, pdfDocuments, status, scraping, extracting, results, pdfResult } = props
if (activeTab === 'sources') {
return (
<div>
<div className="flex justify-between items-center mb-6">
<div>
<h3 className="text-lg font-semibold text-slate-900">Regulierungsquellen</h3>
<p className="text-sm text-slate-500">EU-Lex, BSI-TR und deutsche Gesetze</p>
</div>
<button onClick={props.handleScrapeAll} disabled={scraping} className="px-4 py-2 bg-primary-600 text-white rounded-lg hover:bg-primary-700 transition-colors disabled:opacity-50 disabled:cursor-not-allowed flex items-center gap-2">
{scraping ? (<><svg className="w-4 h-4 animate-spin" fill="none" viewBox="0 0 24 24"><circle className="opacity-25" cx="12" cy="12" r="10" stroke="currentColor" strokeWidth="4" /><path className="opacity-75" fill="currentColor" d="M4 12a8 8 0 018-8V0C5.373 0 0 5.373 0 12h4z" /></svg>Laeuft...</>) : (<><svg className="w-4 h-4" fill="none" stroke="currentColor" viewBox="0 0 24 24"><path strokeLinecap="round" strokeLinejoin="round" strokeWidth={2} d="M4 4v5h.582m15.356 2A8.001 8.001 0 004.582 9m0 0H9m11 11v-5h-.581m0 0a8.003 8.003 0 01-15.357-2m15.357 2H15" /></svg>Alle Quellen scrapen</>)}
</button>
</div>
<div className="space-y-6">
<div>
<h4 className="text-sm font-medium text-slate-700 mb-3 flex items-center gap-2"><span className="text-lg">🇪🇺</span> EU-Regulierungen (EUR-Lex)</h4>
<div className="grid gap-3">{sources.filter(s => s.source_type === 'eur_lex').map(source => (<SourceCard key={source.code} source={source} onScrape={props.handleScrapeSingle} scraping={scraping} />))}</div>
</div>
<div>
<h4 className="text-sm font-medium text-slate-700 mb-3 flex items-center gap-2"><span className="text-lg">🔒</span> BSI Technical Guidelines</h4>
<div className="grid gap-3">{sources.filter(s => s.source_type === 'bsi_pdf').map(source => (<SourceCard key={source.code} source={source} onScrape={props.handleScrapeSingle} scraping={scraping} />))}</div>
</div>
</div>
</div>
)
}
if (activeTab === 'pdf') {
return (
<div>
<div className="mb-6">
<h3 className="text-lg font-semibold text-slate-900">PDF-Extraktion (PyMuPDF)</h3>
<p className="text-sm text-slate-500">Extrahiert ALLE Pruefaspekte aus BSI-TR-03161 PDFs mit Regex-Pattern-Matching</p>
</div>
<div className="space-y-4">
{pdfDocuments.map(doc => (
<div key={doc.code} className="bg-slate-50 rounded-lg p-4 border border-slate-200">
<div className="flex items-center justify-between">
<div className="flex items-center gap-3">
<span className="text-3xl">📄</span>
<div>
<div className="flex items-center gap-2">
<span className="font-semibold text-slate-900">{doc.code}</span>
<span className={`px-2 py-0.5 rounded text-xs font-medium ${doc.available ? 'bg-green-100 text-green-700' : 'bg-red-100 text-red-700'}`}>{doc.available ? 'Verfuegbar' : 'Nicht gefunden'}</span>
</div>
<div className="text-sm text-slate-600">{doc.name}</div>
<div className="text-xs text-slate-500">{doc.description}</div>
<div className="text-xs text-slate-400 mt-1">Erwartete Pruefaspekte: {doc.expected_aspects}</div>
</div>
</div>
<div className="flex gap-2">
<button onClick={() => props.handleExtractPdf(doc.code, true, false)} disabled={extracting || !doc.available} className="px-4 py-2 bg-primary-600 text-white rounded-lg hover:bg-primary-700 transition-colors disabled:opacity-50 disabled:cursor-not-allowed flex items-center gap-2">
{extracting ? (<><svg className="w-4 h-4 animate-spin" fill="none" viewBox="0 0 24 24"><circle className="opacity-25" cx="12" cy="12" r="10" stroke="currentColor" strokeWidth="4" /><path className="opacity-75" fill="currentColor" d="M4 12a8 8 0 018-8V0C5.373 0 0 5.373 0 12h4z" /></svg>Extrahiere...</>) : (<><svg className="w-4 h-4" fill="none" stroke="currentColor" viewBox="0 0 24 24"><path strokeLinecap="round" strokeLinejoin="round" strokeWidth={2} d="M9 12h6m-6 4h6m2 5H7a2 2 0 01-2-2V5a2 2 0 012-2h5.586a1 1 0 01.707.293l5.414 5.414a1 1 0 01.293.707V19a2 2 0 01-2 2z" /></svg>Extrahieren</>)}
</button>
<button onClick={() => props.handleExtractPdf(doc.code, true, true)} disabled={extracting || !doc.available} className="px-3 py-2 bg-orange-100 text-orange-700 rounded-lg hover:bg-orange-200 transition-colors disabled:opacity-50 disabled:cursor-not-allowed" title="Force: Loescht vorhandene und extrahiert neu">Force</button>
</div>
</div>
</div>
))}
</div>
{pdfResult && (
<div className="mt-6 bg-green-50 rounded-lg p-4 border border-green-200">
<h4 className="font-semibold text-green-800 mb-3">Letztes Extraktions-Ergebnis</h4>
<div className="grid grid-cols-3 gap-4 mb-4">
<div className="text-center p-3 bg-white rounded-lg"><div className="text-2xl font-bold text-green-700">{pdfResult.total_aspects}</div><div className="text-sm text-slate-500">Pruefaspekte gefunden</div></div>
<div className="text-center p-3 bg-white rounded-lg"><div className="text-2xl font-bold text-blue-700">{pdfResult.requirements_created}</div><div className="text-sm text-slate-500">Requirements erstellt</div></div>
<div className="text-center p-3 bg-white rounded-lg"><div className="text-2xl font-bold text-slate-700">{Object.keys(pdfResult.statistics.by_category || {}).length}</div><div className="text-sm text-slate-500">Kategorien</div></div>
</div>
{pdfResult.statistics.by_category && Object.keys(pdfResult.statistics.by_category).length > 0 && (
<div><h5 className="text-sm font-medium text-slate-700 mb-2">Nach Kategorie:</h5><div className="flex flex-wrap gap-2">{Object.entries(pdfResult.statistics.by_category).map(([cat, count]) => (<span key={cat} className="px-2 py-1 bg-white rounded text-xs text-slate-600">{cat}: <strong>{count}</strong></span>))}</div></div>
)}
</div>
)}
<div className="mt-6 bg-blue-50 rounded-lg p-4 border border-blue-200">
<h4 className="font-semibold text-blue-800 mb-2">Wie funktioniert die PDF-Extraktion?</h4>
<ul className="text-sm text-blue-700 space-y-1">
<li>- <strong>PyMuPDF (fitz)</strong> liest den PDF-Text</li>
<li>- <strong>Regex-Pattern</strong> finden Aspekte wie O.Auth_1, O.Sess_2, T.Network_1</li>
<li>- <strong>Kontextanalyse</strong> extrahiert Titel, Kategorie und Anforderungsstufe (MUSS/SOLL/KANN)</li>
<li>- <strong>Automatische Speicherung</strong> erstellt Requirements in der Datenbank</li>
</ul>
</div>
</div>
)
}
if (activeTab === 'status' && status) {
return (
<div className="space-y-6">
<div className="bg-slate-50 rounded-lg p-6">
<div className="flex items-center justify-between mb-4">
<div><h3 className="text-lg font-semibold text-slate-900">Scraper-Status</h3><p className="text-sm text-slate-500">Letzter Lauf: {status.stats.last_run ? new Date(status.stats.last_run).toLocaleString('de-DE') : 'Noch nie'}</p></div>
<div className={`px-3 py-1.5 rounded-full text-sm font-medium ${status.status === 'running' ? 'bg-blue-100 text-blue-700' : status.status === 'error' ? 'bg-red-100 text-red-700' : status.status === 'completed' ? 'bg-green-100 text-green-700' : 'bg-gray-100 text-gray-700'}`}>
{status.status === 'running' ? 'Laeuft' : status.status === 'error' ? 'Fehler' : status.status === 'completed' ? 'Abgeschlossen' : 'Bereit'}
</div>
</div>
<div className="grid grid-cols-3 gap-4">
<div className="text-center p-4 bg-white rounded-lg"><div className="text-2xl font-bold text-slate-900">{status.stats.sources_processed}</div><div className="text-sm text-slate-500">Quellen verarbeitet</div></div>
<div className="text-center p-4 bg-white rounded-lg"><div className="text-2xl font-bold text-green-600">{status.stats.requirements_extracted}</div><div className="text-sm text-slate-500">Anforderungen extrahiert</div></div>
<div className="text-center p-4 bg-white rounded-lg"><div className="text-2xl font-bold text-red-600">{status.stats.errors}</div><div className="text-sm text-slate-500">Fehler</div></div>
</div>
{status.last_error && (<div className="mt-4 p-3 bg-red-50 rounded-lg text-sm text-red-700"><strong>Letzter Fehler:</strong> {status.last_error}</div>)}
</div>
<div className="bg-white border border-slate-200 rounded-lg p-6">
<h4 className="font-semibold text-slate-900 mb-4">Wie funktioniert der Scraper?</h4>
<div className="space-y-3 text-sm text-slate-600">
{[{ n: '1', t: 'EUR-Lex Abruf', d: 'Holt HTML-Version der EU-Verordnung, extrahiert Artikel und Absaetze' }, { n: '2', t: 'BSI-TR Parsing', d: 'Extrahiert Pruefaspekte (O.Auth_1, O.Sess_1, etc.) aus den TR-Dokumenten' }, { n: '3', t: 'Datenbank-Speicherung', d: 'Jede Anforderung wird als Requirement in der Compliance-DB gespeichert' }].map(s => (
<div key={s.n} className="flex items-start gap-3"><div className="w-6 h-6 bg-blue-100 rounded-full flex items-center justify-center text-blue-600 font-bold">{s.n}</div><div><strong>{s.t}</strong>: {s.d}</div></div>
))}
<div className="flex items-start gap-3"><div className="w-6 h-6 bg-green-100 rounded-full flex items-center justify-center text-green-600 font-bold"></div><div><strong>Audit-Workspace</strong>: Anforderungen koennen mit Implementierungsdetails angereichert werden</div></div>
</div>
</div>
</div>
)
}
// logs tab
return (
<div>
<h3 className="text-lg font-semibold text-slate-900 mb-4">Letzte Ergebnisse</h3>
{results.length === 0 ? (
<div className="text-center py-12 text-slate-500">Keine Ergebnisse vorhanden. Starte einen Scrape-Vorgang.</div>
) : (
<div className="space-y-2">
{results.map((result, idx) => (
<div key={idx} className={`p-3 rounded-lg flex items-center justify-between ${result.error ? 'bg-red-50' : result.reason ? 'bg-yellow-50' : 'bg-green-50'}`}>
<div className="flex items-center gap-3">
<span className="text-lg">{result.error ? '❌' : result.reason ? '⏭️' : '✅'}</span>
<span className="font-medium">{result.code}</span>
<span className="text-sm text-slate-500">{result.error || result.reason || `${result.requirements_extracted} Anforderungen`}</span>
</div>
</div>
))}
</div>
)}
</div>
)
}

View File

@@ -0,0 +1,73 @@
'use client'
import { Source, regulationTypeBadge, sourceTypeBadge } from './types'
export default function SourceCard({
source,
onScrape,
scraping,
}: {
source: Source
onScrape: (code: string, force: boolean) => void
scraping: boolean
}) {
const regType = regulationTypeBadge[source.regulation_type] || regulationTypeBadge.industry_standard
const srcType = sourceTypeBadge[source.source_type] || sourceTypeBadge.manual
return (
<div className="bg-white border border-slate-200 rounded-lg p-4 hover:shadow-sm transition-shadow">
<div className="flex items-center justify-between">
<div className="flex items-center gap-3">
<span className="text-2xl">{regType.icon}</span>
<div>
<div className="flex items-center gap-2">
<span className="font-semibold text-slate-900">{source.code}</span>
<span className={`px-2 py-0.5 rounded text-xs font-medium ${regType.color}`}>
{regType.label}
</span>
<span className={`px-2 py-0.5 rounded text-xs font-medium ${srcType.color}`}>
{srcType.label}
</span>
</div>
<div className="text-sm text-slate-500 truncate max-w-md" title={source.url}>
{source.url.length > 60 ? source.url.substring(0, 60) + '...' : source.url}
</div>
</div>
</div>
<div className="flex items-center gap-3">
{source.has_data ? (
<span className="px-3 py-1 bg-green-100 text-green-700 rounded-full text-sm font-medium">
{source.requirement_count} Anforderungen
</span>
) : (
<span className="px-3 py-1 bg-gray-100 text-gray-500 rounded-full text-sm">
Keine Daten
</span>
)}
<div className="flex gap-1">
<button
onClick={() => onScrape(source.code, false)}
disabled={scraping}
className="px-3 py-1.5 text-sm bg-slate-100 text-slate-700 rounded hover:bg-slate-200 transition-colors disabled:opacity-50 disabled:cursor-not-allowed"
title="Scrapen (ueberspringt vorhandene)"
>
Scrapen
</button>
{source.has_data && (
<button
onClick={() => onScrape(source.code, true)}
disabled={scraping}
className="px-3 py-1.5 text-sm bg-orange-100 text-orange-700 rounded hover:bg-orange-200 transition-colors disabled:opacity-50 disabled:cursor-not-allowed"
title="Force: Loescht vorhandene Daten und scraped neu"
>
Force
</button>
)}
</div>
</div>
</div>
</div>
)
}

View File

@@ -0,0 +1,65 @@
export interface Source {
code: string
url: string
source_type: string
regulation_type: string
has_data: boolean
requirement_count: number
}
export interface ScraperStatus {
status: 'idle' | 'running' | 'completed' | 'error'
current_source: string | null
last_error: string | null
stats: {
sources_processed: number
requirements_extracted: number
errors: number
last_run: string | null
}
known_sources: string[]
}
export interface ScrapeResult {
code: string
status: string
requirements_extracted?: number
reason?: string
error?: string
}
export interface PDFDocument {
code: string
name: string
description: string
expected_aspects: string
available: boolean
}
export interface PDFExtractionResult {
success: boolean
source_document: string
total_aspects: number
requirements_created: number
statistics: {
by_category: Record<string, number>
by_requirement_level: Record<string, number>
}
}
export const BACKEND_URL = process.env.NEXT_PUBLIC_BACKEND_URL || 'http://localhost:8000'
export const sourceTypeBadge: Record<string, { label: string; color: string }> = {
eur_lex: { label: 'EUR-Lex', color: 'bg-blue-100 text-blue-800' },
bsi_pdf: { label: 'BSI PDF', color: 'bg-green-100 text-green-800' },
gesetze_im_internet: { label: 'Gesetze', color: 'bg-yellow-100 text-yellow-800' },
manual: { label: 'Manuell', color: 'bg-gray-100 text-gray-800' },
}
export const regulationTypeBadge: Record<string, { label: string; color: string; icon: string }> = {
eu_regulation: { label: 'EU-Verordnung', color: 'bg-indigo-100 text-indigo-800', icon: '🇪🇺' },
eu_directive: { label: 'EU-Richtlinie', color: 'bg-purple-100 text-purple-800', icon: '📜' },
de_law: { label: 'DE-Gesetz', color: 'bg-yellow-100 text-yellow-800', icon: '🇩🇪' },
bsi_standard: { label: 'BSI-Standard', color: 'bg-green-100 text-green-800', icon: '🔒' },
industry_standard: { label: 'Standard', color: 'bg-gray-100 text-gray-800', icon: '📋' },
}

View File

@@ -0,0 +1,106 @@
'use client'
import { useState, useEffect, useCallback } from 'react'
import {
Source, ScraperStatus, ScrapeResult,
PDFDocument, PDFExtractionResult, BACKEND_URL,
} from './types'
export function useComplianceScraper() {
const [activeTab, setActiveTab] = useState<'sources' | 'pdf' | 'status' | 'logs'>('sources')
const [sources, setSources] = useState<Source[]>([])
const [pdfDocuments, setPdfDocuments] = useState<PDFDocument[]>([])
const [status, setStatus] = useState<ScraperStatus | null>(null)
const [loading, setLoading] = useState(true)
const [scraping, setScraping] = useState(false)
const [extracting, setExtracting] = useState(false)
const [error, setError] = useState<string | null>(null)
const [success, setSuccess] = useState<string | null>(null)
const [results, setResults] = useState<ScrapeResult[]>([])
const [pdfResult, setPdfResult] = useState<PDFExtractionResult | null>(null)
const fetchSources = useCallback(async () => {
try {
const res = await fetch(`${BACKEND_URL}/api/v1/compliance/scraper/sources`)
if (res.ok) { const data = await res.json(); setSources(data.sources || []) }
} catch (err) { console.error('Failed to fetch sources:', err) }
}, [])
const fetchPdfDocuments = useCallback(async () => {
try {
const res = await fetch(`${BACKEND_URL}/api/v1/compliance/scraper/pdf-documents`)
if (res.ok) { const data = await res.json(); setPdfDocuments(data.documents || []) }
} catch (err) { console.error('Failed to fetch PDF documents:', err) }
}, [])
const fetchStatus = useCallback(async () => {
try {
const res = await fetch(`${BACKEND_URL}/api/v1/compliance/scraper/status`)
if (res.ok) { const data = await res.json(); setStatus(data) }
} catch (err) { console.error('Failed to fetch status:', err) }
}, [])
useEffect(() => {
const loadData = async () => {
setLoading(true)
await Promise.all([fetchSources(), fetchStatus(), fetchPdfDocuments()])
setLoading(false)
}
loadData()
}, [fetchSources, fetchStatus, fetchPdfDocuments])
useEffect(() => {
if (scraping) { const interval = setInterval(fetchStatus, 2000); return () => clearInterval(interval) }
}, [scraping, fetchStatus])
const handleScrapeAll = async () => {
setScraping(true); setError(null); setSuccess(null); setResults([])
try {
const res = await fetch(`${BACKEND_URL}/api/v1/compliance/scraper/scrape-all`, { method: 'POST' })
if (!res.ok) { const data = await res.json(); throw new Error(data.detail || 'Scraping fehlgeschlagen') }
const data = await res.json()
setResults([...data.results.success, ...data.results.failed, ...data.results.skipped])
setSuccess(`Scraping abgeschlossen: ${data.results.success.length} erfolgreich, ${data.results.skipped.length} uebersprungen, ${data.results.failed.length} fehlgeschlagen`)
await fetchSources()
} catch (err: any) { setError(err.message) }
finally { setScraping(false) }
}
const handleScrapeSingle = async (code: string, force: boolean = false) => {
setScraping(true); setError(null); setSuccess(null)
try {
const res = await fetch(`${BACKEND_URL}/api/v1/compliance/scraper/scrape/${code}?force=${force}`, { method: 'POST' })
if (!res.ok) { const data = await res.json(); throw new Error(data.detail || 'Scraping fehlgeschlagen') }
const data = await res.json()
if (data.status === 'skipped') { setSuccess(`${code}: Bereits vorhanden (${data.requirement_count} Anforderungen)`) }
else { setSuccess(`${code}: ${data.requirements_extracted} Anforderungen extrahiert`) }
await fetchSources()
} catch (err: any) { setError(err.message) }
finally { setScraping(false) }
}
const handleExtractPdf = async (code: string, saveToDb: boolean = true, force: boolean = false) => {
setExtracting(true); setError(null); setSuccess(null); setPdfResult(null)
try {
const res = await fetch(`${BACKEND_URL}/api/v1/compliance/scraper/extract-pdf`, {
method: 'POST', headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({ document_code: code, save_to_db: saveToDb, force }),
})
if (!res.ok) { const data = await res.json(); throw new Error(data.detail || 'PDF-Extraktion fehlgeschlagen') }
const data: PDFExtractionResult = await res.json()
setPdfResult(data)
if (data.success) { setSuccess(`${code}: ${data.total_aspects} Pruefaspekte extrahiert, ${data.requirements_created} Requirements erstellt`) }
await fetchSources()
} catch (err: any) { setError(err.message) }
finally { setExtracting(false) }
}
useEffect(() => { if (success) { const timer = setTimeout(() => setSuccess(null), 5000); return () => clearTimeout(timer) } }, [success])
useEffect(() => { if (error) { const timer = setTimeout(() => setError(null), 10000); return () => clearTimeout(timer) } }, [error])
return {
activeTab, setActiveTab, sources, pdfDocuments, status,
loading, scraping, extracting, error, success, results, pdfResult,
handleScrapeAll, handleScrapeSingle, handleExtractPdf,
}
}