Files
breakpilot-lehrer/website/app/admin/compliance/scraper/_components/useComplianceScraper.ts
Benjamin Admin 34da9f4cda [split-required] Split 700-870 LOC files across all services
backend-lehrer (11 files):
- llm_gateway/routes/schools.py (867 → 5), recording_api.py (848 → 6)
- messenger_api.py (840 → 5), print_generator.py (824 → 5)
- unit_analytics_api.py (751 → 5), classroom/routes/context.py (726 → 4)
- llm_gateway/routes/edu_search_seeds.py (710 → 4)

klausur-service (12 files):
- ocr_labeling_api.py (845 → 4), metrics_db.py (833 → 4)
- legal_corpus_api.py (790 → 4), page_crop.py (758 → 3)
- mail/ai_service.py (747 → 4), github_crawler.py (767 → 3)
- trocr_service.py (730 → 4), full_compliance_pipeline.py (723 → 4)
- dsfa_rag_api.py (715 → 4), ocr_pipeline_auto.py (705 → 4)

website (6 pages):
- audit-checklist (867 → 8), content (806 → 6)
- screen-flow (790 → 4), scraper (789 → 5)
- zeugnisse (776 → 5), modules (745 → 4)

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-04-25 08:01:18 +02:00

107 lines
5.0 KiB
TypeScript

'use client'
import { useState, useEffect, useCallback } from 'react'
import {
Source, ScraperStatus, ScrapeResult,
PDFDocument, PDFExtractionResult, BACKEND_URL,
} from './types'
export function useComplianceScraper() {
const [activeTab, setActiveTab] = useState<'sources' | 'pdf' | 'status' | 'logs'>('sources')
const [sources, setSources] = useState<Source[]>([])
const [pdfDocuments, setPdfDocuments] = useState<PDFDocument[]>([])
const [status, setStatus] = useState<ScraperStatus | null>(null)
const [loading, setLoading] = useState(true)
const [scraping, setScraping] = useState(false)
const [extracting, setExtracting] = useState(false)
const [error, setError] = useState<string | null>(null)
const [success, setSuccess] = useState<string | null>(null)
const [results, setResults] = useState<ScrapeResult[]>([])
const [pdfResult, setPdfResult] = useState<PDFExtractionResult | null>(null)
const fetchSources = useCallback(async () => {
try {
const res = await fetch(`${BACKEND_URL}/api/v1/compliance/scraper/sources`)
if (res.ok) { const data = await res.json(); setSources(data.sources || []) }
} catch (err) { console.error('Failed to fetch sources:', err) }
}, [])
const fetchPdfDocuments = useCallback(async () => {
try {
const res = await fetch(`${BACKEND_URL}/api/v1/compliance/scraper/pdf-documents`)
if (res.ok) { const data = await res.json(); setPdfDocuments(data.documents || []) }
} catch (err) { console.error('Failed to fetch PDF documents:', err) }
}, [])
const fetchStatus = useCallback(async () => {
try {
const res = await fetch(`${BACKEND_URL}/api/v1/compliance/scraper/status`)
if (res.ok) { const data = await res.json(); setStatus(data) }
} catch (err) { console.error('Failed to fetch status:', err) }
}, [])
useEffect(() => {
const loadData = async () => {
setLoading(true)
await Promise.all([fetchSources(), fetchStatus(), fetchPdfDocuments()])
setLoading(false)
}
loadData()
}, [fetchSources, fetchStatus, fetchPdfDocuments])
useEffect(() => {
if (scraping) { const interval = setInterval(fetchStatus, 2000); return () => clearInterval(interval) }
}, [scraping, fetchStatus])
const handleScrapeAll = async () => {
setScraping(true); setError(null); setSuccess(null); setResults([])
try {
const res = await fetch(`${BACKEND_URL}/api/v1/compliance/scraper/scrape-all`, { method: 'POST' })
if (!res.ok) { const data = await res.json(); throw new Error(data.detail || 'Scraping fehlgeschlagen') }
const data = await res.json()
setResults([...data.results.success, ...data.results.failed, ...data.results.skipped])
setSuccess(`Scraping abgeschlossen: ${data.results.success.length} erfolgreich, ${data.results.skipped.length} uebersprungen, ${data.results.failed.length} fehlgeschlagen`)
await fetchSources()
} catch (err: any) { setError(err.message) }
finally { setScraping(false) }
}
const handleScrapeSingle = async (code: string, force: boolean = false) => {
setScraping(true); setError(null); setSuccess(null)
try {
const res = await fetch(`${BACKEND_URL}/api/v1/compliance/scraper/scrape/${code}?force=${force}`, { method: 'POST' })
if (!res.ok) { const data = await res.json(); throw new Error(data.detail || 'Scraping fehlgeschlagen') }
const data = await res.json()
if (data.status === 'skipped') { setSuccess(`${code}: Bereits vorhanden (${data.requirement_count} Anforderungen)`) }
else { setSuccess(`${code}: ${data.requirements_extracted} Anforderungen extrahiert`) }
await fetchSources()
} catch (err: any) { setError(err.message) }
finally { setScraping(false) }
}
const handleExtractPdf = async (code: string, saveToDb: boolean = true, force: boolean = false) => {
setExtracting(true); setError(null); setSuccess(null); setPdfResult(null)
try {
const res = await fetch(`${BACKEND_URL}/api/v1/compliance/scraper/extract-pdf`, {
method: 'POST', headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({ document_code: code, save_to_db: saveToDb, force }),
})
if (!res.ok) { const data = await res.json(); throw new Error(data.detail || 'PDF-Extraktion fehlgeschlagen') }
const data: PDFExtractionResult = await res.json()
setPdfResult(data)
if (data.success) { setSuccess(`${code}: ${data.total_aspects} Pruefaspekte extrahiert, ${data.requirements_created} Requirements erstellt`) }
await fetchSources()
} catch (err: any) { setError(err.message) }
finally { setExtracting(false) }
}
useEffect(() => { if (success) { const timer = setTimeout(() => setSuccess(null), 5000); return () => clearTimeout(timer) } }, [success])
useEffect(() => { if (error) { const timer = setTimeout(() => setError(null), 10000); return () => clearTimeout(timer) } }, [error])
return {
activeTab, setActiveTab, sources, pdfDocuments, status,
loading, scraping, extracting, error, success, results, pdfResult,
handleScrapeAll, handleScrapeSingle, handleExtractPdf,
}
}