[split-required] Split 700-870 LOC files across all services

backend-lehrer (11 files): - llm_gateway/routes/schools.py (867 → 5), recording_api.py (848 → 6) - messenger_api.py (840 → 5), print_generator.py (824 → 5) - unit_analytics_api.py (751 → 5), classroom/routes/context.py (726 → 4) - llm_gateway/routes/edu_search_seeds.py (710 → 4) klausur-service (12 files): - ocr_labeling_api.py (845 → 4), metrics_db.py (833 → 4) - legal_corpus_api.py (790 → 4), page_crop.py (758 → 3) - mail/ai_service.py (747 → 4), github_crawler.py (767 → 3) - trocr_service.py (730 → 4), full_compliance_pipeline.py (723 → 4) - dsfa_rag_api.py (715 → 4), ocr_pipeline_auto.py (705 → 4) website (6 pages): - audit-checklist (867 → 8), content (806 → 6) - screen-flow (790 → 4), scraper (789 → 5) - zeugnisse (776 → 5), modules (745 → 4) Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-04-25 08:01:18 +02:00
parent b6983ab1dc
commit 34da9f4cda
106 changed files with 16500 additions and 16947 deletions
--- a/website/app/admin/compliance/scraper/_components/useComplianceScraper.ts
+++ b/website/app/admin/compliance/scraper/_components/useComplianceScraper.ts
@@ -0,0 +1,106 @@
+'use client'
+
+import { useState, useEffect, useCallback } from 'react'
+import {
+  Source, ScraperStatus, ScrapeResult,
+  PDFDocument, PDFExtractionResult, BACKEND_URL,
+} from './types'
+
+export function useComplianceScraper() {
+  const [activeTab, setActiveTab] = useState<'sources' | 'pdf' | 'status' | 'logs'>('sources')
+  const [sources, setSources] = useState<Source[]>([])
+  const [pdfDocuments, setPdfDocuments] = useState<PDFDocument[]>([])
+  const [status, setStatus] = useState<ScraperStatus | null>(null)
+  const [loading, setLoading] = useState(true)
+  const [scraping, setScraping] = useState(false)
+  const [extracting, setExtracting] = useState(false)
+  const [error, setError] = useState<string | null>(null)
+  const [success, setSuccess] = useState<string | null>(null)
+  const [results, setResults] = useState<ScrapeResult[]>([])
+  const [pdfResult, setPdfResult] = useState<PDFExtractionResult | null>(null)
+
+  const fetchSources = useCallback(async () => {
+    try {
+      const res = await fetch(`${BACKEND_URL}/api/v1/compliance/scraper/sources`)
+      if (res.ok) { const data = await res.json(); setSources(data.sources || []) }
+    } catch (err) { console.error('Failed to fetch sources:', err) }
+  }, [])
+
+  const fetchPdfDocuments = useCallback(async () => {
+    try {
+      const res = await fetch(`${BACKEND_URL}/api/v1/compliance/scraper/pdf-documents`)
+      if (res.ok) { const data = await res.json(); setPdfDocuments(data.documents || []) }
+    } catch (err) { console.error('Failed to fetch PDF documents:', err) }
+  }, [])
+
+  const fetchStatus = useCallback(async () => {
+    try {
+      const res = await fetch(`${BACKEND_URL}/api/v1/compliance/scraper/status`)
+      if (res.ok) { const data = await res.json(); setStatus(data) }
+    } catch (err) { console.error('Failed to fetch status:', err) }
+  }, [])
+
+  useEffect(() => {
+    const loadData = async () => {
+      setLoading(true)
+      await Promise.all([fetchSources(), fetchStatus(), fetchPdfDocuments()])
+      setLoading(false)
+    }
+    loadData()
+  }, [fetchSources, fetchStatus, fetchPdfDocuments])
+
+  useEffect(() => {
+    if (scraping) { const interval = setInterval(fetchStatus, 2000); return () => clearInterval(interval) }
+  }, [scraping, fetchStatus])
+
+  const handleScrapeAll = async () => {
+    setScraping(true); setError(null); setSuccess(null); setResults([])
+    try {
+      const res = await fetch(`${BACKEND_URL}/api/v1/compliance/scraper/scrape-all`, { method: 'POST' })
+      if (!res.ok) { const data = await res.json(); throw new Error(data.detail || 'Scraping fehlgeschlagen') }
+      const data = await res.json()
+      setResults([...data.results.success, ...data.results.failed, ...data.results.skipped])
+      setSuccess(`Scraping abgeschlossen: ${data.results.success.length} erfolgreich, ${data.results.skipped.length} uebersprungen, ${data.results.failed.length} fehlgeschlagen`)
+      await fetchSources()
+    } catch (err: any) { setError(err.message) }
+    finally { setScraping(false) }
+  }
+
+  const handleScrapeSingle = async (code: string, force: boolean = false) => {
+    setScraping(true); setError(null); setSuccess(null)
+    try {
+      const res = await fetch(`${BACKEND_URL}/api/v1/compliance/scraper/scrape/${code}?force=${force}`, { method: 'POST' })
+      if (!res.ok) { const data = await res.json(); throw new Error(data.detail || 'Scraping fehlgeschlagen') }
+      const data = await res.json()
+      if (data.status === 'skipped') { setSuccess(`${code}: Bereits vorhanden (${data.requirement_count} Anforderungen)`) }
+      else { setSuccess(`${code}: ${data.requirements_extracted} Anforderungen extrahiert`) }
+      await fetchSources()
+    } catch (err: any) { setError(err.message) }
+    finally { setScraping(false) }
+  }
+
+  const handleExtractPdf = async (code: string, saveToDb: boolean = true, force: boolean = false) => {
+    setExtracting(true); setError(null); setSuccess(null); setPdfResult(null)
+    try {
+      const res = await fetch(`${BACKEND_URL}/api/v1/compliance/scraper/extract-pdf`, {
+        method: 'POST', headers: { 'Content-Type': 'application/json' },
+        body: JSON.stringify({ document_code: code, save_to_db: saveToDb, force }),
+      })
+      if (!res.ok) { const data = await res.json(); throw new Error(data.detail || 'PDF-Extraktion fehlgeschlagen') }
+      const data: PDFExtractionResult = await res.json()
+      setPdfResult(data)
+      if (data.success) { setSuccess(`${code}: ${data.total_aspects} Pruefaspekte extrahiert, ${data.requirements_created} Requirements erstellt`) }
+      await fetchSources()
+    } catch (err: any) { setError(err.message) }
+    finally { setExtracting(false) }
+  }
+
+  useEffect(() => { if (success) { const timer = setTimeout(() => setSuccess(null), 5000); return () => clearTimeout(timer) } }, [success])
+  useEffect(() => { if (error) { const timer = setTimeout(() => setError(null), 10000); return () => clearTimeout(timer) } }, [error])
+
+  return {
+    activeTab, setActiveTab, sources, pdfDocuments, status,
+    loading, scraping, extracting, error, success, results, pdfResult,
+    handleScrapeAll, handleScrapeSingle, handleExtractPdf,
+  }
+}