Restructure: Move 52 files into 7 domain packages
Some checks failed
CI / go-lint (push) Has been skipped
CI / python-lint (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / test-go-school (push) Successful in 28s
CI / test-go-edu-search (push) Successful in 28s
CI / test-python-klausur (push) Failing after 2m22s
CI / test-python-agent-core (push) Successful in 21s
CI / test-nodejs-website (push) Successful in 23s
Some checks failed
CI / go-lint (push) Has been skipped
CI / python-lint (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / test-go-school (push) Successful in 28s
CI / test-go-edu-search (push) Successful in 28s
CI / test-python-klausur (push) Failing after 2m22s
CI / test-python-agent-core (push) Successful in 21s
CI / test-nodejs-website (push) Successful in 23s
korrektur/ zeugnis/ admin/ compliance/ worksheet/ training/ metrics/ 52 shims, relative imports, RAG untouched. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -1,321 +1,4 @@
|
||||
"""
|
||||
Zeugnis API Docs — documents, crawler control, statistics, audit endpoints.
|
||||
|
||||
Extracted from zeugnis_api.py for modularity.
|
||||
"""
|
||||
|
||||
from datetime import datetime, timedelta
|
||||
from typing import Optional, List
|
||||
from fastapi import APIRouter, HTTPException, BackgroundTasks, Query
|
||||
|
||||
from zeugnis_models import (
|
||||
CrawlRequest, EventType,
|
||||
BUNDESLAENDER,
|
||||
generate_id, get_training_allowed, get_license_for_bundesland,
|
||||
)
|
||||
from zeugnis_crawler import (
|
||||
start_crawler, stop_crawler, get_crawler_status,
|
||||
)
|
||||
from metrics_db import (
|
||||
get_zeugnis_documents, get_zeugnis_stats,
|
||||
log_zeugnis_event, get_pool,
|
||||
)
|
||||
|
||||
|
||||
router = APIRouter(prefix="/api/v1/admin/zeugnis", tags=["Zeugnis Crawler"])
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# Documents Endpoints
|
||||
# =============================================================================
|
||||
|
||||
@router.get("/documents", response_model=List[dict])
|
||||
async def list_documents(
|
||||
bundesland: Optional[str] = None,
|
||||
limit: int = Query(100, le=500),
|
||||
offset: int = 0,
|
||||
):
|
||||
"""Get all zeugnis documents with optional filtering."""
|
||||
documents = await get_zeugnis_documents(bundesland=bundesland, limit=limit, offset=offset)
|
||||
return documents
|
||||
|
||||
|
||||
@router.get("/documents/{document_id}", response_model=dict)
|
||||
async def get_document(document_id: str):
|
||||
"""Get details for a specific document."""
|
||||
pool = await get_pool()
|
||||
if not pool:
|
||||
raise HTTPException(status_code=503, detail="Database not available")
|
||||
|
||||
try:
|
||||
async with pool.acquire() as conn:
|
||||
doc = await conn.fetchrow(
|
||||
"""
|
||||
SELECT d.*, s.bundesland, s.name as source_name
|
||||
FROM zeugnis_documents d
|
||||
JOIN zeugnis_seed_urls u ON d.seed_url_id = u.id
|
||||
JOIN zeugnis_sources s ON u.source_id = s.id
|
||||
WHERE d.id = $1
|
||||
""",
|
||||
document_id
|
||||
)
|
||||
if not doc:
|
||||
raise HTTPException(status_code=404, detail="Document not found")
|
||||
|
||||
# Log view event
|
||||
await log_zeugnis_event(document_id, EventType.VIEWED.value)
|
||||
|
||||
return dict(doc)
|
||||
except HTTPException:
|
||||
raise
|
||||
except Exception as e:
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
|
||||
|
||||
@router.get("/documents/{document_id}/versions", response_model=List[dict])
|
||||
async def get_document_versions(document_id: str):
|
||||
"""Get version history for a document."""
|
||||
pool = await get_pool()
|
||||
if not pool:
|
||||
raise HTTPException(status_code=503, detail="Database not available")
|
||||
|
||||
try:
|
||||
async with pool.acquire() as conn:
|
||||
rows = await conn.fetch(
|
||||
"""
|
||||
SELECT * FROM zeugnis_document_versions
|
||||
WHERE document_id = $1
|
||||
ORDER BY version DESC
|
||||
""",
|
||||
document_id
|
||||
)
|
||||
return [dict(r) for r in rows]
|
||||
except Exception as e:
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# Crawler Control Endpoints
|
||||
# =============================================================================
|
||||
|
||||
@router.get("/crawler/status", response_model=dict)
|
||||
async def crawler_status():
|
||||
"""Get current crawler status."""
|
||||
return get_crawler_status()
|
||||
|
||||
|
||||
@router.post("/crawler/start", response_model=dict)
|
||||
async def start_crawl(request: CrawlRequest, background_tasks: BackgroundTasks):
|
||||
"""Start the crawler."""
|
||||
success = await start_crawler(
|
||||
bundesland=request.bundesland,
|
||||
source_id=request.source_id,
|
||||
)
|
||||
if not success:
|
||||
raise HTTPException(status_code=409, detail="Crawler already running")
|
||||
return {"success": True, "message": "Crawler started"}
|
||||
|
||||
|
||||
@router.post("/crawler/stop", response_model=dict)
|
||||
async def stop_crawl():
|
||||
"""Stop the crawler."""
|
||||
success = await stop_crawler()
|
||||
if not success:
|
||||
raise HTTPException(status_code=409, detail="Crawler not running")
|
||||
return {"success": True, "message": "Crawler stopped"}
|
||||
|
||||
|
||||
@router.get("/crawler/queue", response_model=List[dict])
|
||||
async def get_queue():
|
||||
"""Get the crawler queue."""
|
||||
pool = await get_pool()
|
||||
if not pool:
|
||||
return []
|
||||
|
||||
try:
|
||||
async with pool.acquire() as conn:
|
||||
rows = await conn.fetch(
|
||||
"""
|
||||
SELECT q.*, s.bundesland, s.name as source_name
|
||||
FROM zeugnis_crawler_queue q
|
||||
JOIN zeugnis_sources s ON q.source_id = s.id
|
||||
ORDER BY q.priority DESC, q.created_at
|
||||
"""
|
||||
)
|
||||
return [dict(r) for r in rows]
|
||||
except Exception as e:
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
|
||||
|
||||
@router.post("/crawler/queue", response_model=dict)
|
||||
async def add_to_queue(request: CrawlRequest):
|
||||
"""Add a source to the crawler queue."""
|
||||
pool = await get_pool()
|
||||
if not pool:
|
||||
raise HTTPException(status_code=503, detail="Database not available")
|
||||
|
||||
queue_id = generate_id()
|
||||
try:
|
||||
async with pool.acquire() as conn:
|
||||
# Get source ID if bundesland provided
|
||||
source_id = request.source_id
|
||||
if not source_id and request.bundesland:
|
||||
source = await conn.fetchrow(
|
||||
"SELECT id FROM zeugnis_sources WHERE bundesland = $1",
|
||||
request.bundesland
|
||||
)
|
||||
if source:
|
||||
source_id = source["id"]
|
||||
|
||||
if not source_id:
|
||||
raise HTTPException(status_code=400, detail="Source not found")
|
||||
|
||||
await conn.execute(
|
||||
"""
|
||||
INSERT INTO zeugnis_crawler_queue (id, source_id, priority, status)
|
||||
VALUES ($1, $2, $3, 'pending')
|
||||
""",
|
||||
queue_id, source_id, request.priority
|
||||
)
|
||||
return {"id": queue_id, "success": True}
|
||||
except HTTPException:
|
||||
raise
|
||||
except Exception as e:
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# Statistics Endpoints
|
||||
# =============================================================================
|
||||
|
||||
@router.get("/stats", response_model=dict)
|
||||
async def get_stats():
|
||||
"""Get zeugnis crawler statistics."""
|
||||
stats = await get_zeugnis_stats()
|
||||
return stats
|
||||
|
||||
|
||||
@router.get("/stats/bundesland", response_model=List[dict])
|
||||
async def get_bundesland_stats():
|
||||
"""Get statistics per Bundesland."""
|
||||
pool = await get_pool()
|
||||
|
||||
# Build stats from BUNDESLAENDER with DB data if available
|
||||
stats = []
|
||||
for code, info in BUNDESLAENDER.items():
|
||||
stat = {
|
||||
"bundesland": code,
|
||||
"name": info["name"],
|
||||
"training_allowed": get_training_allowed(code),
|
||||
"document_count": 0,
|
||||
"indexed_count": 0,
|
||||
"last_crawled": None,
|
||||
}
|
||||
|
||||
if pool:
|
||||
try:
|
||||
async with pool.acquire() as conn:
|
||||
row = await conn.fetchrow(
|
||||
"""
|
||||
SELECT
|
||||
COUNT(d.id) as doc_count,
|
||||
COUNT(CASE WHEN d.indexed_in_qdrant THEN 1 END) as indexed_count,
|
||||
MAX(u.last_crawled) as last_crawled
|
||||
FROM zeugnis_sources s
|
||||
LEFT JOIN zeugnis_seed_urls u ON s.id = u.source_id
|
||||
LEFT JOIN zeugnis_documents d ON u.id = d.seed_url_id
|
||||
WHERE s.bundesland = $1
|
||||
GROUP BY s.id
|
||||
""",
|
||||
code
|
||||
)
|
||||
if row:
|
||||
stat["document_count"] = row["doc_count"] or 0
|
||||
stat["indexed_count"] = row["indexed_count"] or 0
|
||||
stat["last_crawled"] = row["last_crawled"].isoformat() if row["last_crawled"] else None
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
stats.append(stat)
|
||||
|
||||
return stats
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# Audit Endpoints
|
||||
# =============================================================================
|
||||
|
||||
@router.get("/audit/events", response_model=List[dict])
|
||||
async def get_audit_events(
|
||||
document_id: Optional[str] = None,
|
||||
event_type: Optional[str] = None,
|
||||
limit: int = Query(100, le=1000),
|
||||
days: int = Query(30, le=365),
|
||||
):
|
||||
"""Get audit events with optional filtering."""
|
||||
pool = await get_pool()
|
||||
if not pool:
|
||||
return []
|
||||
|
||||
try:
|
||||
since = datetime.now() - timedelta(days=days)
|
||||
async with pool.acquire() as conn:
|
||||
query = """
|
||||
SELECT * FROM zeugnis_usage_events
|
||||
WHERE created_at >= $1
|
||||
"""
|
||||
params = [since]
|
||||
|
||||
if document_id:
|
||||
query += " AND document_id = $2"
|
||||
params.append(document_id)
|
||||
if event_type:
|
||||
query += f" AND event_type = ${len(params) + 1}"
|
||||
params.append(event_type)
|
||||
|
||||
query += f" ORDER BY created_at DESC LIMIT ${len(params) + 1}"
|
||||
params.append(limit)
|
||||
|
||||
rows = await conn.fetch(query, *params)
|
||||
return [dict(r) for r in rows]
|
||||
except Exception as e:
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
|
||||
|
||||
@router.get("/audit/export", response_model=dict)
|
||||
async def export_audit(
|
||||
days: int = Query(30, le=365),
|
||||
requested_by: str = Query(..., description="User requesting the export"),
|
||||
):
|
||||
"""Export audit data for GDPR compliance."""
|
||||
pool = await get_pool()
|
||||
if not pool:
|
||||
raise HTTPException(status_code=503, detail="Database not available")
|
||||
|
||||
try:
|
||||
since = datetime.now() - timedelta(days=days)
|
||||
async with pool.acquire() as conn:
|
||||
rows = await conn.fetch(
|
||||
"""
|
||||
SELECT * FROM zeugnis_usage_events
|
||||
WHERE created_at >= $1
|
||||
ORDER BY created_at DESC
|
||||
""",
|
||||
since
|
||||
)
|
||||
|
||||
doc_count = await conn.fetchval(
|
||||
"SELECT COUNT(DISTINCT document_id) FROM zeugnis_usage_events WHERE created_at >= $1",
|
||||
since
|
||||
)
|
||||
|
||||
return {
|
||||
"export_date": datetime.now().isoformat(),
|
||||
"requested_by": requested_by,
|
||||
"events": [dict(r) for r in rows],
|
||||
"document_count": doc_count or 0,
|
||||
"date_range_start": since.isoformat(),
|
||||
"date_range_end": datetime.now().isoformat(),
|
||||
}
|
||||
except Exception as e:
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
# Backward-compat shim -- module moved to zeugnis/api_docs.py
|
||||
import importlib as _importlib
|
||||
import sys as _sys
|
||||
_sys.modules[__name__] = _importlib.import_module("zeugnis.api_docs")
|
||||
|
||||
Reference in New Issue
Block a user