fix: Restore all files lost during destructive rebase
A previous `git pull --rebase origin main` dropped 177 local commits,
losing 3400+ files across admin-v2, backend, studio-v2, website,
klausur-service, and many other services. The partial restore attempt
(660295e2) only recovered some files.
This commit restores all missing files from pre-rebase ref 98933f5e
while preserving post-rebase additions (night-scheduler, night-mode UI,
NightModeWidget dashboard integration).
Restored features include:
- AI Module Sidebar (FAB), OCR Labeling, OCR Compare
- GPU Dashboard, RAG Pipeline, Magic Help
- Klausur-Korrektur (8 files), Abitur-Archiv (5+ files)
- Companion, Zeugnisse-Crawler, Screen Flow
- Full backend, studio-v2, website, klausur-service
- All compliance SDKs, agent-core, voice-service
- CI/CD configs, documentation, scripts
Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
537
klausur-service/backend/zeugnis_api.py
Normal file
537
klausur-service/backend/zeugnis_api.py
Normal file
@@ -0,0 +1,537 @@
|
||||
"""
|
||||
Zeugnis Rights-Aware Crawler - API Endpoints
|
||||
|
||||
FastAPI router for managing zeugnis sources, documents, and crawler operations.
|
||||
"""
|
||||
|
||||
from datetime import datetime, timedelta
|
||||
from typing import Optional, List
|
||||
from fastapi import APIRouter, HTTPException, BackgroundTasks, Query
|
||||
from pydantic import BaseModel
|
||||
|
||||
from zeugnis_models import (
|
||||
ZeugnisSource, ZeugnisSourceCreate, ZeugnisSourceVerify,
|
||||
SeedUrl, SeedUrlCreate,
|
||||
ZeugnisDocument, ZeugnisStats,
|
||||
CrawlerStatus, CrawlRequest, CrawlQueueItem,
|
||||
UsageEvent, AuditExport,
|
||||
LicenseType, CrawlStatus, DocType, EventType,
|
||||
BUNDESLAENDER, TRAINING_PERMISSIONS,
|
||||
generate_id, get_training_allowed, get_bundesland_name, get_license_for_bundesland,
|
||||
)
|
||||
from zeugnis_crawler import (
|
||||
start_crawler, stop_crawler, get_crawler_status,
|
||||
)
|
||||
from metrics_db import (
|
||||
get_zeugnis_sources, upsert_zeugnis_source,
|
||||
get_zeugnis_documents, get_zeugnis_stats,
|
||||
log_zeugnis_event, get_pool,
|
||||
)
|
||||
|
||||
|
||||
router = APIRouter(prefix="/api/v1/admin/zeugnis", tags=["Zeugnis Crawler"])
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# Sources Endpoints
|
||||
# =============================================================================
|
||||
|
||||
@router.get("/sources", response_model=List[dict])
|
||||
async def list_sources():
|
||||
"""Get all zeugnis sources (Bundesländer)."""
|
||||
sources = await get_zeugnis_sources()
|
||||
if not sources:
|
||||
# Return default sources if none exist
|
||||
return [
|
||||
{
|
||||
"id": None,
|
||||
"bundesland": code,
|
||||
"name": info["name"],
|
||||
"base_url": None,
|
||||
"license_type": str(get_license_for_bundesland(code).value),
|
||||
"training_allowed": get_training_allowed(code),
|
||||
"verified_by": None,
|
||||
"verified_at": None,
|
||||
"created_at": None,
|
||||
"updated_at": None,
|
||||
}
|
||||
for code, info in BUNDESLAENDER.items()
|
||||
]
|
||||
return sources
|
||||
|
||||
|
||||
@router.post("/sources", response_model=dict)
|
||||
async def create_source(source: ZeugnisSourceCreate):
|
||||
"""Create or update a zeugnis source."""
|
||||
source_id = generate_id()
|
||||
success = await upsert_zeugnis_source(
|
||||
id=source_id,
|
||||
bundesland=source.bundesland,
|
||||
name=source.name,
|
||||
license_type=source.license_type.value,
|
||||
training_allowed=source.training_allowed,
|
||||
base_url=source.base_url,
|
||||
)
|
||||
if not success:
|
||||
raise HTTPException(status_code=500, detail="Failed to create source")
|
||||
return {"id": source_id, "success": True}
|
||||
|
||||
|
||||
@router.put("/sources/{source_id}/verify", response_model=dict)
|
||||
async def verify_source(source_id: str, verification: ZeugnisSourceVerify):
|
||||
"""Verify a source's license status."""
|
||||
pool = await get_pool()
|
||||
if not pool:
|
||||
raise HTTPException(status_code=503, detail="Database not available")
|
||||
|
||||
try:
|
||||
async with pool.acquire() as conn:
|
||||
await conn.execute(
|
||||
"""
|
||||
UPDATE zeugnis_sources
|
||||
SET license_type = $2,
|
||||
training_allowed = $3,
|
||||
verified_by = $4,
|
||||
verified_at = NOW(),
|
||||
updated_at = NOW()
|
||||
WHERE id = $1
|
||||
""",
|
||||
source_id, verification.license_type.value,
|
||||
verification.training_allowed, verification.verified_by
|
||||
)
|
||||
return {"success": True, "source_id": source_id}
|
||||
except Exception as e:
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
|
||||
|
||||
@router.get("/sources/{bundesland}", response_model=dict)
|
||||
async def get_source_by_bundesland(bundesland: str):
|
||||
"""Get source details for a specific Bundesland."""
|
||||
pool = await get_pool()
|
||||
if not pool:
|
||||
# Return default info
|
||||
if bundesland not in BUNDESLAENDER:
|
||||
raise HTTPException(status_code=404, detail=f"Bundesland not found: {bundesland}")
|
||||
return {
|
||||
"bundesland": bundesland,
|
||||
"name": get_bundesland_name(bundesland),
|
||||
"training_allowed": get_training_allowed(bundesland),
|
||||
"license_type": get_license_for_bundesland(bundesland).value,
|
||||
"document_count": 0,
|
||||
}
|
||||
|
||||
try:
|
||||
async with pool.acquire() as conn:
|
||||
source = await conn.fetchrow(
|
||||
"SELECT * FROM zeugnis_sources WHERE bundesland = $1",
|
||||
bundesland
|
||||
)
|
||||
if source:
|
||||
doc_count = await conn.fetchval(
|
||||
"""
|
||||
SELECT COUNT(*) FROM zeugnis_documents d
|
||||
JOIN zeugnis_seed_urls u ON d.seed_url_id = u.id
|
||||
WHERE u.source_id = $1
|
||||
""",
|
||||
source["id"]
|
||||
)
|
||||
return {**dict(source), "document_count": doc_count or 0}
|
||||
|
||||
# Return default
|
||||
return {
|
||||
"bundesland": bundesland,
|
||||
"name": get_bundesland_name(bundesland),
|
||||
"training_allowed": get_training_allowed(bundesland),
|
||||
"license_type": get_license_for_bundesland(bundesland).value,
|
||||
"document_count": 0,
|
||||
}
|
||||
except Exception as e:
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# Seed URLs Endpoints
|
||||
# =============================================================================
|
||||
|
||||
@router.get("/sources/{source_id}/urls", response_model=List[dict])
|
||||
async def list_seed_urls(source_id: str):
|
||||
"""Get all seed URLs for a source."""
|
||||
pool = await get_pool()
|
||||
if not pool:
|
||||
return []
|
||||
|
||||
try:
|
||||
async with pool.acquire() as conn:
|
||||
rows = await conn.fetch(
|
||||
"SELECT * FROM zeugnis_seed_urls WHERE source_id = $1 ORDER BY created_at",
|
||||
source_id
|
||||
)
|
||||
return [dict(r) for r in rows]
|
||||
except Exception as e:
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
|
||||
|
||||
@router.post("/sources/{source_id}/urls", response_model=dict)
|
||||
async def add_seed_url(source_id: str, seed_url: SeedUrlCreate):
|
||||
"""Add a new seed URL to a source."""
|
||||
pool = await get_pool()
|
||||
if not pool:
|
||||
raise HTTPException(status_code=503, detail="Database not available")
|
||||
|
||||
url_id = generate_id()
|
||||
try:
|
||||
async with pool.acquire() as conn:
|
||||
await conn.execute(
|
||||
"""
|
||||
INSERT INTO zeugnis_seed_urls (id, source_id, url, doc_type, status)
|
||||
VALUES ($1, $2, $3, $4, 'pending')
|
||||
""",
|
||||
url_id, source_id, seed_url.url, seed_url.doc_type.value
|
||||
)
|
||||
return {"id": url_id, "success": True}
|
||||
except Exception as e:
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
|
||||
|
||||
@router.delete("/urls/{url_id}", response_model=dict)
|
||||
async def delete_seed_url(url_id: str):
|
||||
"""Delete a seed URL."""
|
||||
pool = await get_pool()
|
||||
if not pool:
|
||||
raise HTTPException(status_code=503, detail="Database not available")
|
||||
|
||||
try:
|
||||
async with pool.acquire() as conn:
|
||||
await conn.execute(
|
||||
"DELETE FROM zeugnis_seed_urls WHERE id = $1",
|
||||
url_id
|
||||
)
|
||||
return {"success": True}
|
||||
except Exception as e:
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# Documents Endpoints
|
||||
# =============================================================================
|
||||
|
||||
@router.get("/documents", response_model=List[dict])
|
||||
async def list_documents(
|
||||
bundesland: Optional[str] = None,
|
||||
limit: int = Query(100, le=500),
|
||||
offset: int = 0,
|
||||
):
|
||||
"""Get all zeugnis documents with optional filtering."""
|
||||
documents = await get_zeugnis_documents(bundesland=bundesland, limit=limit, offset=offset)
|
||||
return documents
|
||||
|
||||
|
||||
@router.get("/documents/{document_id}", response_model=dict)
|
||||
async def get_document(document_id: str):
|
||||
"""Get details for a specific document."""
|
||||
pool = await get_pool()
|
||||
if not pool:
|
||||
raise HTTPException(status_code=503, detail="Database not available")
|
||||
|
||||
try:
|
||||
async with pool.acquire() as conn:
|
||||
doc = await conn.fetchrow(
|
||||
"""
|
||||
SELECT d.*, s.bundesland, s.name as source_name
|
||||
FROM zeugnis_documents d
|
||||
JOIN zeugnis_seed_urls u ON d.seed_url_id = u.id
|
||||
JOIN zeugnis_sources s ON u.source_id = s.id
|
||||
WHERE d.id = $1
|
||||
""",
|
||||
document_id
|
||||
)
|
||||
if not doc:
|
||||
raise HTTPException(status_code=404, detail="Document not found")
|
||||
|
||||
# Log view event
|
||||
await log_zeugnis_event(document_id, EventType.VIEWED.value)
|
||||
|
||||
return dict(doc)
|
||||
except HTTPException:
|
||||
raise
|
||||
except Exception as e:
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
|
||||
|
||||
@router.get("/documents/{document_id}/versions", response_model=List[dict])
|
||||
async def get_document_versions(document_id: str):
|
||||
"""Get version history for a document."""
|
||||
pool = await get_pool()
|
||||
if not pool:
|
||||
raise HTTPException(status_code=503, detail="Database not available")
|
||||
|
||||
try:
|
||||
async with pool.acquire() as conn:
|
||||
rows = await conn.fetch(
|
||||
"""
|
||||
SELECT * FROM zeugnis_document_versions
|
||||
WHERE document_id = $1
|
||||
ORDER BY version DESC
|
||||
""",
|
||||
document_id
|
||||
)
|
||||
return [dict(r) for r in rows]
|
||||
except Exception as e:
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# Crawler Control Endpoints
|
||||
# =============================================================================
|
||||
|
||||
@router.get("/crawler/status", response_model=dict)
|
||||
async def crawler_status():
|
||||
"""Get current crawler status."""
|
||||
return get_crawler_status()
|
||||
|
||||
|
||||
@router.post("/crawler/start", response_model=dict)
|
||||
async def start_crawl(request: CrawlRequest, background_tasks: BackgroundTasks):
|
||||
"""Start the crawler."""
|
||||
success = await start_crawler(
|
||||
bundesland=request.bundesland,
|
||||
source_id=request.source_id,
|
||||
)
|
||||
if not success:
|
||||
raise HTTPException(status_code=409, detail="Crawler already running")
|
||||
return {"success": True, "message": "Crawler started"}
|
||||
|
||||
|
||||
@router.post("/crawler/stop", response_model=dict)
|
||||
async def stop_crawl():
|
||||
"""Stop the crawler."""
|
||||
success = await stop_crawler()
|
||||
if not success:
|
||||
raise HTTPException(status_code=409, detail="Crawler not running")
|
||||
return {"success": True, "message": "Crawler stopped"}
|
||||
|
||||
|
||||
@router.get("/crawler/queue", response_model=List[dict])
|
||||
async def get_queue():
|
||||
"""Get the crawler queue."""
|
||||
pool = await get_pool()
|
||||
if not pool:
|
||||
return []
|
||||
|
||||
try:
|
||||
async with pool.acquire() as conn:
|
||||
rows = await conn.fetch(
|
||||
"""
|
||||
SELECT q.*, s.bundesland, s.name as source_name
|
||||
FROM zeugnis_crawler_queue q
|
||||
JOIN zeugnis_sources s ON q.source_id = s.id
|
||||
ORDER BY q.priority DESC, q.created_at
|
||||
"""
|
||||
)
|
||||
return [dict(r) for r in rows]
|
||||
except Exception as e:
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
|
||||
|
||||
@router.post("/crawler/queue", response_model=dict)
|
||||
async def add_to_queue(request: CrawlRequest):
|
||||
"""Add a source to the crawler queue."""
|
||||
pool = await get_pool()
|
||||
if not pool:
|
||||
raise HTTPException(status_code=503, detail="Database not available")
|
||||
|
||||
queue_id = generate_id()
|
||||
try:
|
||||
async with pool.acquire() as conn:
|
||||
# Get source ID if bundesland provided
|
||||
source_id = request.source_id
|
||||
if not source_id and request.bundesland:
|
||||
source = await conn.fetchrow(
|
||||
"SELECT id FROM zeugnis_sources WHERE bundesland = $1",
|
||||
request.bundesland
|
||||
)
|
||||
if source:
|
||||
source_id = source["id"]
|
||||
|
||||
if not source_id:
|
||||
raise HTTPException(status_code=400, detail="Source not found")
|
||||
|
||||
await conn.execute(
|
||||
"""
|
||||
INSERT INTO zeugnis_crawler_queue (id, source_id, priority, status)
|
||||
VALUES ($1, $2, $3, 'pending')
|
||||
""",
|
||||
queue_id, source_id, request.priority
|
||||
)
|
||||
return {"id": queue_id, "success": True}
|
||||
except HTTPException:
|
||||
raise
|
||||
except Exception as e:
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# Statistics Endpoints
|
||||
# =============================================================================
|
||||
|
||||
@router.get("/stats", response_model=dict)
|
||||
async def get_stats():
|
||||
"""Get zeugnis crawler statistics."""
|
||||
stats = await get_zeugnis_stats()
|
||||
return stats
|
||||
|
||||
|
||||
@router.get("/stats/bundesland", response_model=List[dict])
|
||||
async def get_bundesland_stats():
|
||||
"""Get statistics per Bundesland."""
|
||||
pool = await get_pool()
|
||||
|
||||
# Build stats from BUNDESLAENDER with DB data if available
|
||||
stats = []
|
||||
for code, info in BUNDESLAENDER.items():
|
||||
stat = {
|
||||
"bundesland": code,
|
||||
"name": info["name"],
|
||||
"training_allowed": get_training_allowed(code),
|
||||
"document_count": 0,
|
||||
"indexed_count": 0,
|
||||
"last_crawled": None,
|
||||
}
|
||||
|
||||
if pool:
|
||||
try:
|
||||
async with pool.acquire() as conn:
|
||||
row = await conn.fetchrow(
|
||||
"""
|
||||
SELECT
|
||||
COUNT(d.id) as doc_count,
|
||||
COUNT(CASE WHEN d.indexed_in_qdrant THEN 1 END) as indexed_count,
|
||||
MAX(u.last_crawled) as last_crawled
|
||||
FROM zeugnis_sources s
|
||||
LEFT JOIN zeugnis_seed_urls u ON s.id = u.source_id
|
||||
LEFT JOIN zeugnis_documents d ON u.id = d.seed_url_id
|
||||
WHERE s.bundesland = $1
|
||||
GROUP BY s.id
|
||||
""",
|
||||
code
|
||||
)
|
||||
if row:
|
||||
stat["document_count"] = row["doc_count"] or 0
|
||||
stat["indexed_count"] = row["indexed_count"] or 0
|
||||
stat["last_crawled"] = row["last_crawled"].isoformat() if row["last_crawled"] else None
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
stats.append(stat)
|
||||
|
||||
return stats
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# Audit Endpoints
|
||||
# =============================================================================
|
||||
|
||||
@router.get("/audit/events", response_model=List[dict])
|
||||
async def get_audit_events(
|
||||
document_id: Optional[str] = None,
|
||||
event_type: Optional[str] = None,
|
||||
limit: int = Query(100, le=1000),
|
||||
days: int = Query(30, le=365),
|
||||
):
|
||||
"""Get audit events with optional filtering."""
|
||||
pool = await get_pool()
|
||||
if not pool:
|
||||
return []
|
||||
|
||||
try:
|
||||
since = datetime.now() - timedelta(days=days)
|
||||
async with pool.acquire() as conn:
|
||||
query = """
|
||||
SELECT * FROM zeugnis_usage_events
|
||||
WHERE created_at >= $1
|
||||
"""
|
||||
params = [since]
|
||||
|
||||
if document_id:
|
||||
query += " AND document_id = $2"
|
||||
params.append(document_id)
|
||||
if event_type:
|
||||
query += f" AND event_type = ${len(params) + 1}"
|
||||
params.append(event_type)
|
||||
|
||||
query += f" ORDER BY created_at DESC LIMIT ${len(params) + 1}"
|
||||
params.append(limit)
|
||||
|
||||
rows = await conn.fetch(query, *params)
|
||||
return [dict(r) for r in rows]
|
||||
except Exception as e:
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
|
||||
|
||||
@router.get("/audit/export", response_model=dict)
|
||||
async def export_audit(
|
||||
days: int = Query(30, le=365),
|
||||
requested_by: str = Query(..., description="User requesting the export"),
|
||||
):
|
||||
"""Export audit data for GDPR compliance."""
|
||||
pool = await get_pool()
|
||||
if not pool:
|
||||
raise HTTPException(status_code=503, detail="Database not available")
|
||||
|
||||
try:
|
||||
since = datetime.now() - timedelta(days=days)
|
||||
async with pool.acquire() as conn:
|
||||
rows = await conn.fetch(
|
||||
"""
|
||||
SELECT * FROM zeugnis_usage_events
|
||||
WHERE created_at >= $1
|
||||
ORDER BY created_at DESC
|
||||
""",
|
||||
since
|
||||
)
|
||||
|
||||
doc_count = await conn.fetchval(
|
||||
"SELECT COUNT(DISTINCT document_id) FROM zeugnis_usage_events WHERE created_at >= $1",
|
||||
since
|
||||
)
|
||||
|
||||
return {
|
||||
"export_date": datetime.now().isoformat(),
|
||||
"requested_by": requested_by,
|
||||
"events": [dict(r) for r in rows],
|
||||
"document_count": doc_count or 0,
|
||||
"date_range_start": since.isoformat(),
|
||||
"date_range_end": datetime.now().isoformat(),
|
||||
}
|
||||
except Exception as e:
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# Initialization Endpoint
|
||||
# =============================================================================
|
||||
|
||||
@router.post("/init", response_model=dict)
|
||||
async def initialize_sources():
|
||||
"""Initialize default sources from BUNDESLAENDER."""
|
||||
pool = await get_pool()
|
||||
if not pool:
|
||||
raise HTTPException(status_code=503, detail="Database not available")
|
||||
|
||||
created = 0
|
||||
try:
|
||||
for code, info in BUNDESLAENDER.items():
|
||||
source_id = generate_id()
|
||||
success = await upsert_zeugnis_source(
|
||||
id=source_id,
|
||||
bundesland=code,
|
||||
name=info["name"],
|
||||
license_type=get_license_for_bundesland(code).value,
|
||||
training_allowed=get_training_allowed(code),
|
||||
)
|
||||
if success:
|
||||
created += 1
|
||||
|
||||
return {"success": True, "sources_created": created}
|
||||
except Exception as e:
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
Reference in New Issue
Block a user