fix: Restore all files lost during destructive rebase
A previous `git pull --rebase origin main` dropped 177 local commits,
losing 3400+ files across admin-v2, backend, studio-v2, website,
klausur-service, and many other services. The partial restore attempt
(660295e2) only recovered some files.
This commit restores all missing files from pre-rebase ref 98933f5e
while preserving post-rebase additions (night-scheduler, night-mode UI,
NightModeWidget dashboard integration).
Restored features include:
- AI Module Sidebar (FAB), OCR Labeling, OCR Compare
- GPU Dashboard, RAG Pipeline, Magic Help
- Klausur-Korrektur (8 files), Abitur-Archiv (5+ files)
- Companion, Zeugnisse-Crawler, Screen Flow
- Full backend, studio-v2, website, klausur-service
- All compliance SDKs, agent-core, voice-service
- CI/CD configs, documentation, scripts
Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
173
backend/llm_gateway/routes/legal_crawler.py
Normal file
173
backend/llm_gateway/routes/legal_crawler.py
Normal file
@@ -0,0 +1,173 @@
|
||||
"""
|
||||
Legal Crawler API Routes.
|
||||
|
||||
Endpoints für das Crawlen und Abrufen von rechtlichen Bildungsinhalten.
|
||||
"""
|
||||
|
||||
import logging
|
||||
import asyncio
|
||||
from typing import List, Optional
|
||||
|
||||
from fastapi import APIRouter, HTTPException, BackgroundTasks
|
||||
from pydantic import BaseModel
|
||||
|
||||
from ..services.legal_crawler import get_legal_crawler, LegalCrawler
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
router = APIRouter(prefix="/legal-crawler", tags=["legal-crawler"])
|
||||
|
||||
|
||||
class CrawlStatusResponse(BaseModel):
|
||||
"""Response für Crawl-Status."""
|
||||
status: str
|
||||
message: str
|
||||
stats: Optional[dict] = None
|
||||
|
||||
|
||||
class LegalDocumentResponse(BaseModel):
|
||||
"""Response für ein rechtliches Dokument."""
|
||||
id: str
|
||||
url: str
|
||||
title: str
|
||||
law_name: Optional[str]
|
||||
state: Optional[str]
|
||||
paragraphs: Optional[list]
|
||||
last_crawled_at: Optional[str]
|
||||
|
||||
|
||||
class LegalReferenceFromDB(BaseModel):
|
||||
"""Rechtliche Referenz aus der DB."""
|
||||
law: str
|
||||
url: str
|
||||
state: Optional[str]
|
||||
title: str
|
||||
paragraphs: list
|
||||
|
||||
|
||||
# Globaler Status für laufenden Crawl
|
||||
_crawl_status = {
|
||||
"running": False,
|
||||
"last_run": None,
|
||||
"last_stats": None,
|
||||
}
|
||||
|
||||
|
||||
async def _run_crawl(db_pool):
|
||||
"""Führt den Crawl asynchron durch."""
|
||||
global _crawl_status
|
||||
_crawl_status["running"] = True
|
||||
|
||||
try:
|
||||
crawler = get_legal_crawler()
|
||||
stats = await crawler.crawl_legal_seeds(db_pool)
|
||||
_crawl_status["last_stats"] = stats
|
||||
_crawl_status["last_run"] = "completed"
|
||||
except Exception as e:
|
||||
logger.error(f"Crawl-Fehler: {e}")
|
||||
_crawl_status["last_run"] = f"error: {str(e)}"
|
||||
finally:
|
||||
_crawl_status["running"] = False
|
||||
|
||||
|
||||
@router.post("/start", response_model=CrawlStatusResponse)
|
||||
async def start_crawl(background_tasks: BackgroundTasks):
|
||||
"""
|
||||
Startet einen neuen Crawl für alle Legal-Seeds.
|
||||
|
||||
Der Crawl läuft im Hintergrund und kann über /status abgefragt werden.
|
||||
"""
|
||||
global _crawl_status
|
||||
|
||||
if _crawl_status["running"]:
|
||||
return CrawlStatusResponse(
|
||||
status="already_running",
|
||||
message="Ein Crawl läuft bereits. Bitte warten Sie, bis er abgeschlossen ist."
|
||||
)
|
||||
|
||||
# Hinweis: In Produktion würde hier der DB-Pool übergeben werden
|
||||
# Für jetzt nur Status setzen
|
||||
_crawl_status["running"] = True
|
||||
_crawl_status["last_run"] = "started"
|
||||
|
||||
return CrawlStatusResponse(
|
||||
status="started",
|
||||
message="Crawl wurde gestartet. Nutzen Sie /status um den Fortschritt zu prüfen."
|
||||
)
|
||||
|
||||
|
||||
@router.get("/status", response_model=CrawlStatusResponse)
|
||||
async def get_crawl_status():
|
||||
"""Gibt den aktuellen Crawl-Status zurück."""
|
||||
return CrawlStatusResponse(
|
||||
status="running" if _crawl_status["running"] else "idle",
|
||||
message=_crawl_status.get("last_run") or "Noch nie gecrawlt",
|
||||
stats=_crawl_status.get("last_stats")
|
||||
)
|
||||
|
||||
|
||||
@router.get("/documents", response_model=List[LegalDocumentResponse])
|
||||
async def get_legal_documents(
|
||||
state: Optional[str] = None,
|
||||
doc_type: Optional[str] = None,
|
||||
limit: int = 50
|
||||
):
|
||||
"""
|
||||
Gibt gecrawlte rechtliche Dokumente zurück.
|
||||
|
||||
Args:
|
||||
state: Filter nach Bundesland (z.B. "NW", "BY")
|
||||
doc_type: Filter nach Dokumenttyp (z.B. "schulgesetz")
|
||||
limit: Max. Anzahl Dokumente
|
||||
|
||||
Returns:
|
||||
Liste von LegalDocumentResponse
|
||||
"""
|
||||
# TODO: DB-Query implementieren wenn DB-Pool verfügbar
|
||||
# Für jetzt leere Liste zurückgeben
|
||||
return []
|
||||
|
||||
|
||||
@router.get("/references/{state}")
|
||||
async def get_legal_references_for_state(state: str):
|
||||
"""
|
||||
Gibt rechtliche Referenzen für ein Bundesland zurück.
|
||||
|
||||
Dies ist der Endpoint, den der Communication-Service nutzt.
|
||||
|
||||
Args:
|
||||
state: Bundesland-Kürzel (z.B. "NW", "BY", "BE")
|
||||
|
||||
Returns:
|
||||
Dict mit Schulgesetz-Informationen und Paragraphen
|
||||
"""
|
||||
# TODO: Aus DB laden
|
||||
# Mapping von state-Kürzeln zu DB-Werten
|
||||
state_mapping = {
|
||||
"NRW": "NW",
|
||||
"NW": "NW",
|
||||
"BY": "BY",
|
||||
"BW": "BW",
|
||||
"BE": "BE",
|
||||
"BB": "BB",
|
||||
"HB": "HB",
|
||||
"HH": "HH",
|
||||
"HE": "HE",
|
||||
"MV": "MV",
|
||||
"NI": "NI",
|
||||
"RP": "RP",
|
||||
"SL": "SL",
|
||||
"SN": "SN",
|
||||
"ST": "ST",
|
||||
"SH": "SH",
|
||||
"TH": "TH",
|
||||
}
|
||||
|
||||
db_state = state_mapping.get(state.upper(), state.upper())
|
||||
|
||||
# Placeholder - später aus DB
|
||||
return {
|
||||
"state": state,
|
||||
"documents": [],
|
||||
"message": "Dokumente werden nach dem ersten Crawl verfügbar sein"
|
||||
}
|
||||
Reference in New Issue
Block a user