Restructure: Move 52 files into 7 domain packages

korrektur/ zeugnis/ admin/ compliance/ worksheet/ training/ metrics/ 52 shims, relative imports, RAG untouched. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-04-25 22:10:48 +02:00
parent 0504d22b8e
commit 165c493d1e
111 changed files with 11859 additions and 11609 deletions
--- a/klausur-service/backend/zeugnis_api_docs.py
+++ b/klausur-service/backend/zeugnis_api_docs.py
@@ -1,321 +1,4 @@
-"""
-Zeugnis API Docs — documents, crawler control, statistics, audit endpoints.
-
-Extracted from zeugnis_api.py for modularity.
-"""
-
-from datetime import datetime, timedelta
-from typing import Optional, List
-from fastapi import APIRouter, HTTPException, BackgroundTasks, Query
-
-from zeugnis_models import (
-    CrawlRequest, EventType,
-    BUNDESLAENDER,
-    generate_id, get_training_allowed, get_license_for_bundesland,
-)
-from zeugnis_crawler import (
-    start_crawler, stop_crawler, get_crawler_status,
-)
-from metrics_db import (
-    get_zeugnis_documents, get_zeugnis_stats,
-    log_zeugnis_event, get_pool,
-)
-
-
-router = APIRouter(prefix="/api/v1/admin/zeugnis", tags=["Zeugnis Crawler"])
-
-
-# =============================================================================
-# Documents Endpoints
-# =============================================================================
-
-@router.get("/documents", response_model=List[dict])
-async def list_documents(
-    bundesland: Optional[str] = None,
-    limit: int = Query(100, le=500),
-    offset: int = 0,
-):
-    """Get all zeugnis documents with optional filtering."""
-    documents = await get_zeugnis_documents(bundesland=bundesland, limit=limit, offset=offset)
-    return documents
-
-
-@router.get("/documents/{document_id}", response_model=dict)
-async def get_document(document_id: str):
-    """Get details for a specific document."""
-    pool = await get_pool()
-    if not pool:
-        raise HTTPException(status_code=503, detail="Database not available")
-
-    try:
-        async with pool.acquire() as conn:
-            doc = await conn.fetchrow(
-                """
-                SELECT d.*, s.bundesland, s.name as source_name
-                FROM zeugnis_documents d
-                JOIN zeugnis_seed_urls u ON d.seed_url_id = u.id
-                JOIN zeugnis_sources s ON u.source_id = s.id
-                WHERE d.id = $1
-                """,
-                document_id
-            )
-            if not doc:
-                raise HTTPException(status_code=404, detail="Document not found")
-
-            # Log view event
-            await log_zeugnis_event(document_id, EventType.VIEWED.value)
-
-            return dict(doc)
-    except HTTPException:
-        raise
-    except Exception as e:
-        raise HTTPException(status_code=500, detail=str(e))
-
-
-@router.get("/documents/{document_id}/versions", response_model=List[dict])
-async def get_document_versions(document_id: str):
-    """Get version history for a document."""
-    pool = await get_pool()
-    if not pool:
-        raise HTTPException(status_code=503, detail="Database not available")
-
-    try:
-        async with pool.acquire() as conn:
-            rows = await conn.fetch(
-                """
-                SELECT * FROM zeugnis_document_versions
-                WHERE document_id = $1
-                ORDER BY version DESC
-                """,
-                document_id
-            )
-            return [dict(r) for r in rows]
-    except Exception as e:
-        raise HTTPException(status_code=500, detail=str(e))
-
-
-# =============================================================================
-# Crawler Control Endpoints
-# =============================================================================
-
-@router.get("/crawler/status", response_model=dict)
-async def crawler_status():
-    """Get current crawler status."""
-    return get_crawler_status()
-
-
-@router.post("/crawler/start", response_model=dict)
-async def start_crawl(request: CrawlRequest, background_tasks: BackgroundTasks):
-    """Start the crawler."""
-    success = await start_crawler(
-        bundesland=request.bundesland,
-        source_id=request.source_id,
-    )
-    if not success:
-        raise HTTPException(status_code=409, detail="Crawler already running")
-    return {"success": True, "message": "Crawler started"}
-
-
-@router.post("/crawler/stop", response_model=dict)
-async def stop_crawl():
-    """Stop the crawler."""
-    success = await stop_crawler()
-    if not success:
-        raise HTTPException(status_code=409, detail="Crawler not running")
-    return {"success": True, "message": "Crawler stopped"}
-
-
-@router.get("/crawler/queue", response_model=List[dict])
-async def get_queue():
-    """Get the crawler queue."""
-    pool = await get_pool()
-    if not pool:
-        return []
-
-    try:
-        async with pool.acquire() as conn:
-            rows = await conn.fetch(
-                """
-                SELECT q.*, s.bundesland, s.name as source_name
-                FROM zeugnis_crawler_queue q
-                JOIN zeugnis_sources s ON q.source_id = s.id
-                ORDER BY q.priority DESC, q.created_at
-                """
-            )
-            return [dict(r) for r in rows]
-    except Exception as e:
-        raise HTTPException(status_code=500, detail=str(e))
-
-
-@router.post("/crawler/queue", response_model=dict)
-async def add_to_queue(request: CrawlRequest):
-    """Add a source to the crawler queue."""
-    pool = await get_pool()
-    if not pool:
-        raise HTTPException(status_code=503, detail="Database not available")
-
-    queue_id = generate_id()
-    try:
-        async with pool.acquire() as conn:
-            # Get source ID if bundesland provided
-            source_id = request.source_id
-            if not source_id and request.bundesland:
-                source = await conn.fetchrow(
-                    "SELECT id FROM zeugnis_sources WHERE bundesland = $1",
-                    request.bundesland
-                )
-                if source:
-                    source_id = source["id"]
-
-            if not source_id:
-                raise HTTPException(status_code=400, detail="Source not found")
-
-            await conn.execute(
-                """
-                INSERT INTO zeugnis_crawler_queue (id, source_id, priority, status)
-                VALUES ($1, $2, $3, 'pending')
-                """,
-                queue_id, source_id, request.priority
-            )
-        return {"id": queue_id, "success": True}
-    except HTTPException:
-        raise
-    except Exception as e:
-        raise HTTPException(status_code=500, detail=str(e))
-
-
-# =============================================================================
-# Statistics Endpoints
-# =============================================================================
-
-@router.get("/stats", response_model=dict)
-async def get_stats():
-    """Get zeugnis crawler statistics."""
-    stats = await get_zeugnis_stats()
-    return stats
-
-
-@router.get("/stats/bundesland", response_model=List[dict])
-async def get_bundesland_stats():
-    """Get statistics per Bundesland."""
-    pool = await get_pool()
-
-    # Build stats from BUNDESLAENDER with DB data if available
-    stats = []
-    for code, info in BUNDESLAENDER.items():
-        stat = {
-            "bundesland": code,
-            "name": info["name"],
-            "training_allowed": get_training_allowed(code),
-            "document_count": 0,
-            "indexed_count": 0,
-            "last_crawled": None,
-        }
-
-        if pool:
-            try:
-                async with pool.acquire() as conn:
-                    row = await conn.fetchrow(
-                        """
-                        SELECT
-                            COUNT(d.id) as doc_count,
-                            COUNT(CASE WHEN d.indexed_in_qdrant THEN 1 END) as indexed_count,
-                            MAX(u.last_crawled) as last_crawled
-                        FROM zeugnis_sources s
-                        LEFT JOIN zeugnis_seed_urls u ON s.id = u.source_id
-                        LEFT JOIN zeugnis_documents d ON u.id = d.seed_url_id
-                        WHERE s.bundesland = $1
-                        GROUP BY s.id
-                        """,
-                        code
-                    )
-                    if row:
-                        stat["document_count"] = row["doc_count"] or 0
-                        stat["indexed_count"] = row["indexed_count"] or 0
-                        stat["last_crawled"] = row["last_crawled"].isoformat() if row["last_crawled"] else None
-            except Exception:
-                pass
-
-        stats.append(stat)
-
-    return stats
-
-
-# =============================================================================
-# Audit Endpoints
-# =============================================================================
-
-@router.get("/audit/events", response_model=List[dict])
-async def get_audit_events(
-    document_id: Optional[str] = None,
-    event_type: Optional[str] = None,
-    limit: int = Query(100, le=1000),
-    days: int = Query(30, le=365),
-):
-    """Get audit events with optional filtering."""
-    pool = await get_pool()
-    if not pool:
-        return []
-
-    try:
-        since = datetime.now() - timedelta(days=days)
-        async with pool.acquire() as conn:
-            query = """
-                SELECT * FROM zeugnis_usage_events
-                WHERE created_at >= $1
-            """
-            params = [since]
-
-            if document_id:
-                query += " AND document_id = $2"
-                params.append(document_id)
-            if event_type:
-                query += f" AND event_type = ${len(params) + 1}"
-                params.append(event_type)
-
-            query += f" ORDER BY created_at DESC LIMIT ${len(params) + 1}"
-            params.append(limit)
-
-            rows = await conn.fetch(query, *params)
-            return [dict(r) for r in rows]
-    except Exception as e:
-        raise HTTPException(status_code=500, detail=str(e))
-
-
-@router.get("/audit/export", response_model=dict)
-async def export_audit(
-    days: int = Query(30, le=365),
-    requested_by: str = Query(..., description="User requesting the export"),
-):
-    """Export audit data for GDPR compliance."""
-    pool = await get_pool()
-    if not pool:
-        raise HTTPException(status_code=503, detail="Database not available")
-
-    try:
-        since = datetime.now() - timedelta(days=days)
-        async with pool.acquire() as conn:
-            rows = await conn.fetch(
-                """
-                SELECT * FROM zeugnis_usage_events
-                WHERE created_at >= $1
-                ORDER BY created_at DESC
-                """,
-                since
-            )
-
-            doc_count = await conn.fetchval(
-                "SELECT COUNT(DISTINCT document_id) FROM zeugnis_usage_events WHERE created_at >= $1",
-                since
-            )
-
-            return {
-                "export_date": datetime.now().isoformat(),
-                "requested_by": requested_by,
-                "events": [dict(r) for r in rows],
-                "document_count": doc_count or 0,
-                "date_range_start": since.isoformat(),
-                "date_range_end": datetime.now().isoformat(),
-            }
-    except Exception as e:
-        raise HTTPException(status_code=500, detail=str(e))
+# Backward-compat shim -- module moved to zeugnis/api_docs.py
+import importlib as _importlib
+import sys as _sys
+_sys.modules[__name__] = _importlib.import_module("zeugnis.api_docs")