backend-lehrer (11 files): - llm_gateway/routes/schools.py (867 → 5), recording_api.py (848 → 6) - messenger_api.py (840 → 5), print_generator.py (824 → 5) - unit_analytics_api.py (751 → 5), classroom/routes/context.py (726 → 4) - llm_gateway/routes/edu_search_seeds.py (710 → 4) klausur-service (12 files): - ocr_labeling_api.py (845 → 4), metrics_db.py (833 → 4) - legal_corpus_api.py (790 → 4), page_crop.py (758 → 3) - mail/ai_service.py (747 → 4), github_crawler.py (767 → 3) - trocr_service.py (730 → 4), full_compliance_pipeline.py (723 → 4) - dsfa_rag_api.py (715 → 4), ocr_pipeline_auto.py (705 → 4) website (6 pages): - audit-checklist (867 → 8), content (806 → 6) - screen-flow (790 → 4), scraper (789 → 5) - zeugnisse (776 → 5), modules (745 → 4) Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
107 lines
5.0 KiB
TypeScript
107 lines
5.0 KiB
TypeScript
'use client'
|
|
|
|
import { useState, useEffect, useCallback } from 'react'
|
|
import {
|
|
Source, ScraperStatus, ScrapeResult,
|
|
PDFDocument, PDFExtractionResult, BACKEND_URL,
|
|
} from './types'
|
|
|
|
export function useComplianceScraper() {
|
|
const [activeTab, setActiveTab] = useState<'sources' | 'pdf' | 'status' | 'logs'>('sources')
|
|
const [sources, setSources] = useState<Source[]>([])
|
|
const [pdfDocuments, setPdfDocuments] = useState<PDFDocument[]>([])
|
|
const [status, setStatus] = useState<ScraperStatus | null>(null)
|
|
const [loading, setLoading] = useState(true)
|
|
const [scraping, setScraping] = useState(false)
|
|
const [extracting, setExtracting] = useState(false)
|
|
const [error, setError] = useState<string | null>(null)
|
|
const [success, setSuccess] = useState<string | null>(null)
|
|
const [results, setResults] = useState<ScrapeResult[]>([])
|
|
const [pdfResult, setPdfResult] = useState<PDFExtractionResult | null>(null)
|
|
|
|
const fetchSources = useCallback(async () => {
|
|
try {
|
|
const res = await fetch(`${BACKEND_URL}/api/v1/compliance/scraper/sources`)
|
|
if (res.ok) { const data = await res.json(); setSources(data.sources || []) }
|
|
} catch (err) { console.error('Failed to fetch sources:', err) }
|
|
}, [])
|
|
|
|
const fetchPdfDocuments = useCallback(async () => {
|
|
try {
|
|
const res = await fetch(`${BACKEND_URL}/api/v1/compliance/scraper/pdf-documents`)
|
|
if (res.ok) { const data = await res.json(); setPdfDocuments(data.documents || []) }
|
|
} catch (err) { console.error('Failed to fetch PDF documents:', err) }
|
|
}, [])
|
|
|
|
const fetchStatus = useCallback(async () => {
|
|
try {
|
|
const res = await fetch(`${BACKEND_URL}/api/v1/compliance/scraper/status`)
|
|
if (res.ok) { const data = await res.json(); setStatus(data) }
|
|
} catch (err) { console.error('Failed to fetch status:', err) }
|
|
}, [])
|
|
|
|
useEffect(() => {
|
|
const loadData = async () => {
|
|
setLoading(true)
|
|
await Promise.all([fetchSources(), fetchStatus(), fetchPdfDocuments()])
|
|
setLoading(false)
|
|
}
|
|
loadData()
|
|
}, [fetchSources, fetchStatus, fetchPdfDocuments])
|
|
|
|
useEffect(() => {
|
|
if (scraping) { const interval = setInterval(fetchStatus, 2000); return () => clearInterval(interval) }
|
|
}, [scraping, fetchStatus])
|
|
|
|
const handleScrapeAll = async () => {
|
|
setScraping(true); setError(null); setSuccess(null); setResults([])
|
|
try {
|
|
const res = await fetch(`${BACKEND_URL}/api/v1/compliance/scraper/scrape-all`, { method: 'POST' })
|
|
if (!res.ok) { const data = await res.json(); throw new Error(data.detail || 'Scraping fehlgeschlagen') }
|
|
const data = await res.json()
|
|
setResults([...data.results.success, ...data.results.failed, ...data.results.skipped])
|
|
setSuccess(`Scraping abgeschlossen: ${data.results.success.length} erfolgreich, ${data.results.skipped.length} uebersprungen, ${data.results.failed.length} fehlgeschlagen`)
|
|
await fetchSources()
|
|
} catch (err: any) { setError(err.message) }
|
|
finally { setScraping(false) }
|
|
}
|
|
|
|
const handleScrapeSingle = async (code: string, force: boolean = false) => {
|
|
setScraping(true); setError(null); setSuccess(null)
|
|
try {
|
|
const res = await fetch(`${BACKEND_URL}/api/v1/compliance/scraper/scrape/${code}?force=${force}`, { method: 'POST' })
|
|
if (!res.ok) { const data = await res.json(); throw new Error(data.detail || 'Scraping fehlgeschlagen') }
|
|
const data = await res.json()
|
|
if (data.status === 'skipped') { setSuccess(`${code}: Bereits vorhanden (${data.requirement_count} Anforderungen)`) }
|
|
else { setSuccess(`${code}: ${data.requirements_extracted} Anforderungen extrahiert`) }
|
|
await fetchSources()
|
|
} catch (err: any) { setError(err.message) }
|
|
finally { setScraping(false) }
|
|
}
|
|
|
|
const handleExtractPdf = async (code: string, saveToDb: boolean = true, force: boolean = false) => {
|
|
setExtracting(true); setError(null); setSuccess(null); setPdfResult(null)
|
|
try {
|
|
const res = await fetch(`${BACKEND_URL}/api/v1/compliance/scraper/extract-pdf`, {
|
|
method: 'POST', headers: { 'Content-Type': 'application/json' },
|
|
body: JSON.stringify({ document_code: code, save_to_db: saveToDb, force }),
|
|
})
|
|
if (!res.ok) { const data = await res.json(); throw new Error(data.detail || 'PDF-Extraktion fehlgeschlagen') }
|
|
const data: PDFExtractionResult = await res.json()
|
|
setPdfResult(data)
|
|
if (data.success) { setSuccess(`${code}: ${data.total_aspects} Pruefaspekte extrahiert, ${data.requirements_created} Requirements erstellt`) }
|
|
await fetchSources()
|
|
} catch (err: any) { setError(err.message) }
|
|
finally { setExtracting(false) }
|
|
}
|
|
|
|
useEffect(() => { if (success) { const timer = setTimeout(() => setSuccess(null), 5000); return () => clearTimeout(timer) } }, [success])
|
|
useEffect(() => { if (error) { const timer = setTimeout(() => setError(null), 10000); return () => clearTimeout(timer) } }, [error])
|
|
|
|
return {
|
|
activeTab, setActiveTab, sources, pdfDocuments, status,
|
|
loading, scraping, extracting, error, success, results, pdfResult,
|
|
handleScrapeAll, handleScrapeSingle, handleExtractPdf,
|
|
}
|
|
}
|