[split-required] Split 700-870 LOC files across all services
backend-lehrer (11 files): - llm_gateway/routes/schools.py (867 → 5), recording_api.py (848 → 6) - messenger_api.py (840 → 5), print_generator.py (824 → 5) - unit_analytics_api.py (751 → 5), classroom/routes/context.py (726 → 4) - llm_gateway/routes/edu_search_seeds.py (710 → 4) klausur-service (12 files): - ocr_labeling_api.py (845 → 4), metrics_db.py (833 → 4) - legal_corpus_api.py (790 → 4), page_crop.py (758 → 3) - mail/ai_service.py (747 → 4), github_crawler.py (767 → 3) - trocr_service.py (730 → 4), full_compliance_pipeline.py (723 → 4) - dsfa_rag_api.py (715 → 4), ocr_pipeline_auto.py (705 → 4) website (6 pages): - audit-checklist (867 → 8), content (806 → 6) - screen-flow (790 → 4), scraper (789 → 5) - zeugnisse (776 → 5), modules (745 → 4) Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -0,0 +1,106 @@
|
||||
'use client'
|
||||
|
||||
import { useState, useEffect, useCallback } from 'react'
|
||||
import {
|
||||
Source, ScraperStatus, ScrapeResult,
|
||||
PDFDocument, PDFExtractionResult, BACKEND_URL,
|
||||
} from './types'
|
||||
|
||||
export function useComplianceScraper() {
|
||||
const [activeTab, setActiveTab] = useState<'sources' | 'pdf' | 'status' | 'logs'>('sources')
|
||||
const [sources, setSources] = useState<Source[]>([])
|
||||
const [pdfDocuments, setPdfDocuments] = useState<PDFDocument[]>([])
|
||||
const [status, setStatus] = useState<ScraperStatus | null>(null)
|
||||
const [loading, setLoading] = useState(true)
|
||||
const [scraping, setScraping] = useState(false)
|
||||
const [extracting, setExtracting] = useState(false)
|
||||
const [error, setError] = useState<string | null>(null)
|
||||
const [success, setSuccess] = useState<string | null>(null)
|
||||
const [results, setResults] = useState<ScrapeResult[]>([])
|
||||
const [pdfResult, setPdfResult] = useState<PDFExtractionResult | null>(null)
|
||||
|
||||
const fetchSources = useCallback(async () => {
|
||||
try {
|
||||
const res = await fetch(`${BACKEND_URL}/api/v1/compliance/scraper/sources`)
|
||||
if (res.ok) { const data = await res.json(); setSources(data.sources || []) }
|
||||
} catch (err) { console.error('Failed to fetch sources:', err) }
|
||||
}, [])
|
||||
|
||||
const fetchPdfDocuments = useCallback(async () => {
|
||||
try {
|
||||
const res = await fetch(`${BACKEND_URL}/api/v1/compliance/scraper/pdf-documents`)
|
||||
if (res.ok) { const data = await res.json(); setPdfDocuments(data.documents || []) }
|
||||
} catch (err) { console.error('Failed to fetch PDF documents:', err) }
|
||||
}, [])
|
||||
|
||||
const fetchStatus = useCallback(async () => {
|
||||
try {
|
||||
const res = await fetch(`${BACKEND_URL}/api/v1/compliance/scraper/status`)
|
||||
if (res.ok) { const data = await res.json(); setStatus(data) }
|
||||
} catch (err) { console.error('Failed to fetch status:', err) }
|
||||
}, [])
|
||||
|
||||
useEffect(() => {
|
||||
const loadData = async () => {
|
||||
setLoading(true)
|
||||
await Promise.all([fetchSources(), fetchStatus(), fetchPdfDocuments()])
|
||||
setLoading(false)
|
||||
}
|
||||
loadData()
|
||||
}, [fetchSources, fetchStatus, fetchPdfDocuments])
|
||||
|
||||
useEffect(() => {
|
||||
if (scraping) { const interval = setInterval(fetchStatus, 2000); return () => clearInterval(interval) }
|
||||
}, [scraping, fetchStatus])
|
||||
|
||||
const handleScrapeAll = async () => {
|
||||
setScraping(true); setError(null); setSuccess(null); setResults([])
|
||||
try {
|
||||
const res = await fetch(`${BACKEND_URL}/api/v1/compliance/scraper/scrape-all`, { method: 'POST' })
|
||||
if (!res.ok) { const data = await res.json(); throw new Error(data.detail || 'Scraping fehlgeschlagen') }
|
||||
const data = await res.json()
|
||||
setResults([...data.results.success, ...data.results.failed, ...data.results.skipped])
|
||||
setSuccess(`Scraping abgeschlossen: ${data.results.success.length} erfolgreich, ${data.results.skipped.length} uebersprungen, ${data.results.failed.length} fehlgeschlagen`)
|
||||
await fetchSources()
|
||||
} catch (err: any) { setError(err.message) }
|
||||
finally { setScraping(false) }
|
||||
}
|
||||
|
||||
const handleScrapeSingle = async (code: string, force: boolean = false) => {
|
||||
setScraping(true); setError(null); setSuccess(null)
|
||||
try {
|
||||
const res = await fetch(`${BACKEND_URL}/api/v1/compliance/scraper/scrape/${code}?force=${force}`, { method: 'POST' })
|
||||
if (!res.ok) { const data = await res.json(); throw new Error(data.detail || 'Scraping fehlgeschlagen') }
|
||||
const data = await res.json()
|
||||
if (data.status === 'skipped') { setSuccess(`${code}: Bereits vorhanden (${data.requirement_count} Anforderungen)`) }
|
||||
else { setSuccess(`${code}: ${data.requirements_extracted} Anforderungen extrahiert`) }
|
||||
await fetchSources()
|
||||
} catch (err: any) { setError(err.message) }
|
||||
finally { setScraping(false) }
|
||||
}
|
||||
|
||||
const handleExtractPdf = async (code: string, saveToDb: boolean = true, force: boolean = false) => {
|
||||
setExtracting(true); setError(null); setSuccess(null); setPdfResult(null)
|
||||
try {
|
||||
const res = await fetch(`${BACKEND_URL}/api/v1/compliance/scraper/extract-pdf`, {
|
||||
method: 'POST', headers: { 'Content-Type': 'application/json' },
|
||||
body: JSON.stringify({ document_code: code, save_to_db: saveToDb, force }),
|
||||
})
|
||||
if (!res.ok) { const data = await res.json(); throw new Error(data.detail || 'PDF-Extraktion fehlgeschlagen') }
|
||||
const data: PDFExtractionResult = await res.json()
|
||||
setPdfResult(data)
|
||||
if (data.success) { setSuccess(`${code}: ${data.total_aspects} Pruefaspekte extrahiert, ${data.requirements_created} Requirements erstellt`) }
|
||||
await fetchSources()
|
||||
} catch (err: any) { setError(err.message) }
|
||||
finally { setExtracting(false) }
|
||||
}
|
||||
|
||||
useEffect(() => { if (success) { const timer = setTimeout(() => setSuccess(null), 5000); return () => clearTimeout(timer) } }, [success])
|
||||
useEffect(() => { if (error) { const timer = setTimeout(() => setError(null), 10000); return () => clearTimeout(timer) } }, [error])
|
||||
|
||||
return {
|
||||
activeTab, setActiveTab, sources, pdfDocuments, status,
|
||||
loading, scraping, extracting, error, success, results, pdfResult,
|
||||
handleScrapeAll, handleScrapeSingle, handleExtractPdf,
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user