[split-required] Split final batch of monoliths >1000 LOC
Python (6 files in klausur-service): - rbac.py (1,132 → 4), admin_api.py (1,012 → 4) - routes/eh.py (1,111 → 4), ocr_pipeline_geometry.py (1,105 → 5) Python (2 files in backend-lehrer): - unit_api.py (1,226 → 6), game_api.py (1,129 → 5) Website (6 page files): - 4x klausur-korrektur pages (1,249-1,328 LOC each) → shared components in website/components/klausur-korrektur/ (17 shared files) - companion (1,057 → 10), magic-help (1,017 → 8) All re-export barrels preserve backward compatibility. Zero import errors verified. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
File diff suppressed because it is too large
Load Diff
316
klausur-service/backend/admin_nibis.py
Normal file
316
klausur-service/backend/admin_nibis.py
Normal file
@@ -0,0 +1,316 @@
|
||||
"""
|
||||
Admin API - NiBiS Ingestion & Search
|
||||
|
||||
Endpoints for NiBiS data discovery, ingestion, search, and statistics.
|
||||
Extracted from admin_api.py for file-size compliance.
|
||||
"""
|
||||
|
||||
from fastapi import APIRouter, HTTPException, BackgroundTasks, Query
|
||||
from pydantic import BaseModel
|
||||
from typing import Optional, List, Dict
|
||||
from datetime import datetime
|
||||
|
||||
from nibis_ingestion import (
|
||||
run_ingestion,
|
||||
discover_documents,
|
||||
extract_zip_files,
|
||||
DOCS_BASE_PATH,
|
||||
)
|
||||
from qdrant_service import QdrantService, search_nibis_eh, get_qdrant_client
|
||||
from eh_pipeline import generate_single_embedding
|
||||
|
||||
router = APIRouter(prefix="/api/v1/admin", tags=["Admin"])
|
||||
|
||||
# Store for background task status
|
||||
_ingestion_status: Dict = {
|
||||
"running": False,
|
||||
"last_run": None,
|
||||
"last_result": None,
|
||||
}
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# Models
|
||||
# =============================================================================
|
||||
|
||||
class IngestionRequest(BaseModel):
|
||||
ewh_only: bool = True
|
||||
year_filter: Optional[int] = None
|
||||
subject_filter: Optional[str] = None
|
||||
|
||||
|
||||
class IngestionStatus(BaseModel):
|
||||
running: bool
|
||||
last_run: Optional[str]
|
||||
documents_indexed: Optional[int]
|
||||
chunks_created: Optional[int]
|
||||
errors: Optional[List[str]]
|
||||
|
||||
|
||||
class NiBiSSearchRequest(BaseModel):
|
||||
query: str
|
||||
year: Optional[int] = None
|
||||
subject: Optional[str] = None
|
||||
niveau: Optional[str] = None
|
||||
limit: int = 5
|
||||
|
||||
|
||||
class NiBiSSearchResult(BaseModel):
|
||||
id: str
|
||||
score: float
|
||||
text: str
|
||||
year: Optional[int]
|
||||
subject: Optional[str]
|
||||
niveau: Optional[str]
|
||||
task_number: Optional[int]
|
||||
|
||||
|
||||
class DataSourceStats(BaseModel):
|
||||
source_dir: str
|
||||
year: int
|
||||
document_count: int
|
||||
subjects: List[str]
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# Endpoints
|
||||
# =============================================================================
|
||||
|
||||
@router.get("/nibis/status", response_model=IngestionStatus)
|
||||
async def get_ingestion_status():
|
||||
"""Get status of NiBiS ingestion pipeline."""
|
||||
last_result = _ingestion_status.get("last_result") or {}
|
||||
return IngestionStatus(
|
||||
running=_ingestion_status["running"],
|
||||
last_run=_ingestion_status.get("last_run"),
|
||||
documents_indexed=last_result.get("documents_indexed"),
|
||||
chunks_created=last_result.get("chunks_created"),
|
||||
errors=(last_result.get("errors") or [])[:10],
|
||||
)
|
||||
|
||||
|
||||
@router.post("/nibis/extract-zips")
|
||||
async def extract_zip_files_endpoint():
|
||||
"""Extract all ZIP files in za-download directories."""
|
||||
try:
|
||||
extracted = extract_zip_files(DOCS_BASE_PATH)
|
||||
return {
|
||||
"status": "success",
|
||||
"extracted_count": len(extracted),
|
||||
"directories": [str(d) for d in extracted],
|
||||
}
|
||||
except Exception as e:
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
|
||||
|
||||
@router.get("/nibis/discover")
|
||||
async def discover_nibis_documents(
|
||||
ewh_only: bool = Query(True, description="Only return Erwartungshorizonte"),
|
||||
year: Optional[int] = Query(None, description="Filter by year"),
|
||||
subject: Optional[str] = Query(None, description="Filter by subject"),
|
||||
):
|
||||
"""
|
||||
Discover available NiBiS documents without indexing.
|
||||
Useful for previewing what will be indexed.
|
||||
"""
|
||||
try:
|
||||
documents = discover_documents(DOCS_BASE_PATH, ewh_only=ewh_only)
|
||||
|
||||
# Apply filters
|
||||
if year:
|
||||
documents = [d for d in documents if d.year == year]
|
||||
if subject:
|
||||
documents = [d for d in documents if subject.lower() in d.subject.lower()]
|
||||
|
||||
# Group by year and subject
|
||||
by_year: Dict[int, int] = {}
|
||||
by_subject: Dict[str, int] = {}
|
||||
for doc in documents:
|
||||
by_year[doc.year] = by_year.get(doc.year, 0) + 1
|
||||
by_subject[doc.subject] = by_subject.get(doc.subject, 0) + 1
|
||||
|
||||
return {
|
||||
"total_documents": len(documents),
|
||||
"by_year": dict(sorted(by_year.items())),
|
||||
"by_subject": dict(sorted(by_subject.items(), key=lambda x: -x[1])),
|
||||
"sample_documents": [
|
||||
{
|
||||
"id": d.id,
|
||||
"filename": d.raw_filename,
|
||||
"year": d.year,
|
||||
"subject": d.subject,
|
||||
"niveau": d.niveau,
|
||||
"doc_type": d.doc_type,
|
||||
}
|
||||
for d in documents[:20]
|
||||
],
|
||||
}
|
||||
except Exception as e:
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
|
||||
|
||||
@router.post("/nibis/ingest")
|
||||
async def start_ingestion(
|
||||
request: IngestionRequest,
|
||||
background_tasks: BackgroundTasks,
|
||||
):
|
||||
"""
|
||||
Start NiBiS data ingestion in background.
|
||||
"""
|
||||
if _ingestion_status["running"]:
|
||||
raise HTTPException(
|
||||
status_code=409,
|
||||
detail="Ingestion already running. Check /nibis/status for progress."
|
||||
)
|
||||
|
||||
async def run_ingestion_task():
|
||||
global _ingestion_status
|
||||
_ingestion_status["running"] = True
|
||||
_ingestion_status["last_run"] = datetime.now().isoformat()
|
||||
|
||||
try:
|
||||
result = await run_ingestion(
|
||||
ewh_only=request.ewh_only,
|
||||
dry_run=False,
|
||||
year_filter=request.year_filter,
|
||||
subject_filter=request.subject_filter,
|
||||
)
|
||||
_ingestion_status["last_result"] = result
|
||||
except Exception as e:
|
||||
_ingestion_status["last_result"] = {"error": str(e), "errors": [str(e)]}
|
||||
finally:
|
||||
_ingestion_status["running"] = False
|
||||
|
||||
background_tasks.add_task(run_ingestion_task)
|
||||
|
||||
return {
|
||||
"status": "started",
|
||||
"message": "Ingestion started in background. Check /nibis/status for progress.",
|
||||
"filters": {
|
||||
"ewh_only": request.ewh_only,
|
||||
"year": request.year_filter,
|
||||
"subject": request.subject_filter,
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
@router.post("/nibis/search", response_model=List[NiBiSSearchResult])
|
||||
async def search_nibis(request: NiBiSSearchRequest):
|
||||
"""
|
||||
Semantic search in NiBiS Erwartungshorizonte.
|
||||
"""
|
||||
try:
|
||||
query_embedding = await generate_single_embedding(request.query)
|
||||
|
||||
if not query_embedding:
|
||||
raise HTTPException(status_code=500, detail="Failed to generate embedding")
|
||||
|
||||
results = await search_nibis_eh(
|
||||
query_embedding=query_embedding,
|
||||
year=request.year,
|
||||
subject=request.subject,
|
||||
niveau=request.niveau,
|
||||
limit=request.limit,
|
||||
)
|
||||
|
||||
return [
|
||||
NiBiSSearchResult(
|
||||
id=r["id"],
|
||||
score=r["score"],
|
||||
text=r.get("text", "")[:500],
|
||||
year=r.get("year"),
|
||||
subject=r.get("subject"),
|
||||
niveau=r.get("niveau"),
|
||||
task_number=r.get("task_number"),
|
||||
)
|
||||
for r in results
|
||||
]
|
||||
except Exception as e:
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
|
||||
|
||||
@router.get("/nibis/collections")
|
||||
async def get_collections_info():
|
||||
"""Get information about all Qdrant collections."""
|
||||
try:
|
||||
client = get_qdrant_client()
|
||||
collections = client.get_collections().collections
|
||||
|
||||
result = []
|
||||
for c in collections:
|
||||
try:
|
||||
info = client.get_collection(c.name)
|
||||
result.append({
|
||||
"name": c.name,
|
||||
"vectors_count": info.vectors_count,
|
||||
"points_count": info.points_count,
|
||||
"status": info.status.value,
|
||||
})
|
||||
except Exception as e:
|
||||
result.append({
|
||||
"name": c.name,
|
||||
"error": str(e),
|
||||
})
|
||||
|
||||
return {"collections": result}
|
||||
except Exception as e:
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
|
||||
|
||||
@router.get("/nibis/stats")
|
||||
async def get_nibis_stats():
|
||||
"""Get detailed statistics about indexed NiBiS data."""
|
||||
try:
|
||||
qdrant = QdrantService()
|
||||
stats = await qdrant.get_stats("bp_nibis_eh")
|
||||
|
||||
if "error" in stats:
|
||||
return {
|
||||
"indexed": False,
|
||||
"message": "NiBiS collection not yet created. Run ingestion first.",
|
||||
}
|
||||
|
||||
client = get_qdrant_client()
|
||||
scroll_result = client.scroll(
|
||||
collection_name="bp_nibis_eh",
|
||||
limit=1000,
|
||||
with_payload=True,
|
||||
with_vectors=False,
|
||||
)
|
||||
|
||||
years = set()
|
||||
subjects = set()
|
||||
niveaus = set()
|
||||
|
||||
for point in scroll_result[0]:
|
||||
if point.payload:
|
||||
if "year" in point.payload:
|
||||
years.add(point.payload["year"])
|
||||
if "subject" in point.payload:
|
||||
subjects.add(point.payload["subject"])
|
||||
if "niveau" in point.payload:
|
||||
niveaus.add(point.payload["niveau"])
|
||||
|
||||
return {
|
||||
"indexed": True,
|
||||
"total_chunks": stats.get("points_count", 0),
|
||||
"years": sorted(list(years)),
|
||||
"subjects": sorted(list(subjects)),
|
||||
"niveaus": sorted(list(niveaus)),
|
||||
}
|
||||
except Exception as e:
|
||||
return {
|
||||
"indexed": False,
|
||||
"error": str(e),
|
||||
}
|
||||
|
||||
|
||||
@router.delete("/nibis/collection")
|
||||
async def delete_nibis_collection():
|
||||
"""Delete the entire NiBiS collection. WARNING: removes all indexed data!"""
|
||||
try:
|
||||
client = get_qdrant_client()
|
||||
client.delete_collection("bp_nibis_eh")
|
||||
return {"status": "deleted", "collection": "bp_nibis_eh"}
|
||||
except Exception as e:
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
281
klausur-service/backend/admin_rag.py
Normal file
281
klausur-service/backend/admin_rag.py
Normal file
@@ -0,0 +1,281 @@
|
||||
"""
|
||||
Admin API - RAG Upload & Metrics
|
||||
|
||||
Endpoints for uploading documents, tracking uploads, RAG metrics,
|
||||
search feedback, storage stats, and service initialization.
|
||||
Extracted from admin_api.py for file-size compliance.
|
||||
"""
|
||||
|
||||
from fastapi import APIRouter, HTTPException, BackgroundTasks, Query, UploadFile, File, Form
|
||||
from pydantic import BaseModel
|
||||
from typing import Optional, List, Dict
|
||||
from datetime import datetime
|
||||
from pathlib import Path
|
||||
import zipfile
|
||||
import tempfile
|
||||
import os
|
||||
|
||||
from nibis_ingestion import run_ingestion, DOCS_BASE_PATH
|
||||
|
||||
# Import ingestion status from nibis module for auto-ingest
|
||||
from admin_nibis import _ingestion_status
|
||||
|
||||
# Optional: MinIO and PostgreSQL integrations
|
||||
try:
|
||||
from minio_storage import upload_rag_document, get_storage_stats, init_minio_bucket
|
||||
MINIO_AVAILABLE = True
|
||||
except ImportError:
|
||||
MINIO_AVAILABLE = False
|
||||
|
||||
try:
|
||||
from metrics_db import (
|
||||
init_metrics_tables, store_feedback, log_search, log_upload,
|
||||
calculate_metrics, get_recent_feedback, get_upload_history
|
||||
)
|
||||
METRICS_DB_AVAILABLE = True
|
||||
except ImportError:
|
||||
METRICS_DB_AVAILABLE = False
|
||||
|
||||
router = APIRouter(prefix="/api/v1/admin", tags=["Admin"])
|
||||
|
||||
# Upload directory configuration
|
||||
RAG_UPLOAD_BASE = Path(os.getenv("RAG_UPLOAD_BASE", str(DOCS_BASE_PATH)))
|
||||
|
||||
# Store for upload tracking
|
||||
_upload_history: List[Dict] = []
|
||||
|
||||
|
||||
class UploadResult(BaseModel):
|
||||
status: str
|
||||
files_received: int
|
||||
pdfs_extracted: int
|
||||
target_directory: str
|
||||
errors: List[str]
|
||||
|
||||
|
||||
@router.post("/rag/upload", response_model=UploadResult)
|
||||
async def upload_rag_documents(
|
||||
background_tasks: BackgroundTasks,
|
||||
file: UploadFile = File(...),
|
||||
collection: str = Form(default="bp_nibis_eh"),
|
||||
year: Optional[int] = Form(default=None),
|
||||
auto_ingest: bool = Form(default=False),
|
||||
):
|
||||
"""
|
||||
Upload documents for RAG indexing.
|
||||
|
||||
Supports:
|
||||
- ZIP archives (automatically extracted)
|
||||
- Individual PDF files
|
||||
"""
|
||||
errors = []
|
||||
pdfs_extracted = 0
|
||||
|
||||
# Determine target year
|
||||
target_year = year or datetime.now().year
|
||||
|
||||
# Target directory: za-download/YYYY/
|
||||
target_dir = RAG_UPLOAD_BASE / "za-download" / str(target_year)
|
||||
target_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
try:
|
||||
filename = file.filename or "upload"
|
||||
|
||||
if filename.lower().endswith(".zip"):
|
||||
# Handle ZIP file
|
||||
with tempfile.NamedTemporaryFile(delete=False, suffix=".zip") as tmp:
|
||||
content = await file.read()
|
||||
tmp.write(content)
|
||||
tmp_path = tmp.name
|
||||
|
||||
try:
|
||||
with zipfile.ZipFile(tmp_path, 'r') as zf:
|
||||
for member in zf.namelist():
|
||||
if member.lower().endswith(".pdf") and not member.startswith("__MACOSX"):
|
||||
pdf_name = Path(member).name
|
||||
if pdf_name:
|
||||
target_path = target_dir / pdf_name
|
||||
with zf.open(member) as src:
|
||||
with open(target_path, 'wb') as dst:
|
||||
dst.write(src.read())
|
||||
pdfs_extracted += 1
|
||||
finally:
|
||||
os.unlink(tmp_path)
|
||||
|
||||
elif filename.lower().endswith(".pdf"):
|
||||
target_path = target_dir / filename
|
||||
content = await file.read()
|
||||
with open(target_path, 'wb') as f:
|
||||
f.write(content)
|
||||
pdfs_extracted = 1
|
||||
else:
|
||||
raise HTTPException(
|
||||
status_code=400,
|
||||
detail=f"Unsupported file type: {filename}. Only .zip and .pdf are allowed."
|
||||
)
|
||||
|
||||
# Track upload in memory
|
||||
upload_record = {
|
||||
"timestamp": datetime.now().isoformat(),
|
||||
"filename": filename,
|
||||
"collection": collection,
|
||||
"year": target_year,
|
||||
"pdfs_extracted": pdfs_extracted,
|
||||
"target_directory": str(target_dir),
|
||||
}
|
||||
_upload_history.append(upload_record)
|
||||
|
||||
# Keep only last 100 uploads in memory
|
||||
if len(_upload_history) > 100:
|
||||
_upload_history.pop(0)
|
||||
|
||||
# Store in PostgreSQL if available
|
||||
if METRICS_DB_AVAILABLE:
|
||||
await log_upload(
|
||||
filename=filename,
|
||||
collection_name=collection,
|
||||
year=target_year,
|
||||
pdfs_extracted=pdfs_extracted,
|
||||
minio_path=str(target_dir),
|
||||
)
|
||||
|
||||
# Auto-ingest if requested
|
||||
if auto_ingest and not _ingestion_status["running"]:
|
||||
async def run_auto_ingest():
|
||||
global _ingestion_status
|
||||
_ingestion_status["running"] = True
|
||||
_ingestion_status["last_run"] = datetime.now().isoformat()
|
||||
|
||||
try:
|
||||
result = await run_ingestion(
|
||||
ewh_only=True,
|
||||
dry_run=False,
|
||||
year_filter=target_year,
|
||||
)
|
||||
_ingestion_status["last_result"] = result
|
||||
except Exception as e:
|
||||
_ingestion_status["last_result"] = {"error": str(e), "errors": [str(e)]}
|
||||
finally:
|
||||
_ingestion_status["running"] = False
|
||||
|
||||
background_tasks.add_task(run_auto_ingest)
|
||||
|
||||
return UploadResult(
|
||||
status="success",
|
||||
files_received=1,
|
||||
pdfs_extracted=pdfs_extracted,
|
||||
target_directory=str(target_dir),
|
||||
errors=errors,
|
||||
)
|
||||
|
||||
except HTTPException:
|
||||
raise
|
||||
except Exception as e:
|
||||
errors.append(str(e))
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
|
||||
|
||||
@router.get("/rag/upload/history")
|
||||
async def get_upload_history_endpoint(limit: int = Query(default=20, le=100)):
|
||||
"""Get recent upload history."""
|
||||
return {
|
||||
"uploads": _upload_history[-limit:][::-1],
|
||||
"total": len(_upload_history),
|
||||
}
|
||||
|
||||
|
||||
@router.get("/rag/metrics")
|
||||
async def get_rag_metrics(
|
||||
collection: Optional[str] = Query(default=None),
|
||||
days: int = Query(default=7, le=90),
|
||||
):
|
||||
"""Get RAG quality metrics."""
|
||||
if METRICS_DB_AVAILABLE:
|
||||
metrics = await calculate_metrics(collection_name=collection, days=days)
|
||||
if metrics.get("connected"):
|
||||
return metrics
|
||||
|
||||
# Fallback: Return placeholder metrics
|
||||
return {
|
||||
"precision_at_5": 0.78,
|
||||
"recall_at_10": 0.85,
|
||||
"mrr": 0.72,
|
||||
"avg_latency_ms": 52,
|
||||
"total_ratings": len(_upload_history),
|
||||
"error_rate": 0.3,
|
||||
"score_distribution": {
|
||||
"0.9+": 23,
|
||||
"0.7-0.9": 41,
|
||||
"0.5-0.7": 28,
|
||||
"<0.5": 8,
|
||||
},
|
||||
"note": "Placeholder metrics - PostgreSQL not connected",
|
||||
"connected": False,
|
||||
}
|
||||
|
||||
|
||||
@router.post("/rag/search/feedback")
|
||||
async def submit_search_feedback(
|
||||
result_id: str = Form(...),
|
||||
rating: int = Form(..., ge=1, le=5),
|
||||
notes: Optional[str] = Form(default=None),
|
||||
query: Optional[str] = Form(default=None),
|
||||
collection: Optional[str] = Form(default=None),
|
||||
score: Optional[float] = Form(default=None),
|
||||
):
|
||||
"""Submit feedback for a search result."""
|
||||
feedback_record = {
|
||||
"timestamp": datetime.now().isoformat(),
|
||||
"result_id": result_id,
|
||||
"rating": rating,
|
||||
"notes": notes,
|
||||
}
|
||||
|
||||
stored = False
|
||||
if METRICS_DB_AVAILABLE:
|
||||
stored = await store_feedback(
|
||||
result_id=result_id,
|
||||
rating=rating,
|
||||
query_text=query,
|
||||
collection_name=collection,
|
||||
score=score,
|
||||
notes=notes,
|
||||
)
|
||||
|
||||
return {
|
||||
"status": "stored" if stored else "received",
|
||||
"feedback": feedback_record,
|
||||
"persisted": stored,
|
||||
}
|
||||
|
||||
|
||||
@router.get("/rag/storage/stats")
|
||||
async def get_storage_statistics():
|
||||
"""Get MinIO storage statistics."""
|
||||
if MINIO_AVAILABLE:
|
||||
stats = await get_storage_stats()
|
||||
return stats
|
||||
return {
|
||||
"error": "MinIO not available",
|
||||
"connected": False,
|
||||
}
|
||||
|
||||
|
||||
@router.post("/rag/init")
|
||||
async def initialize_rag_services():
|
||||
"""Initialize RAG services (MinIO bucket, PostgreSQL tables)."""
|
||||
results = {
|
||||
"minio": False,
|
||||
"postgres": False,
|
||||
}
|
||||
|
||||
if MINIO_AVAILABLE:
|
||||
results["minio"] = await init_minio_bucket()
|
||||
|
||||
if METRICS_DB_AVAILABLE:
|
||||
results["postgres"] = await init_metrics_tables()
|
||||
|
||||
return {
|
||||
"status": "initialized",
|
||||
"services": results,
|
||||
}
|
||||
389
klausur-service/backend/admin_templates.py
Normal file
389
klausur-service/backend/admin_templates.py
Normal file
@@ -0,0 +1,389 @@
|
||||
"""
|
||||
Admin API - Legal Templates
|
||||
|
||||
Endpoints for legal template ingestion, search, source management,
|
||||
license info, and collection management.
|
||||
Extracted from admin_api.py for file-size compliance.
|
||||
"""
|
||||
|
||||
from fastapi import APIRouter, HTTPException, BackgroundTasks, Query
|
||||
from pydantic import BaseModel
|
||||
from typing import Optional, List, Dict
|
||||
from datetime import datetime
|
||||
|
||||
from eh_pipeline import generate_single_embedding
|
||||
|
||||
# Import legal templates modules
|
||||
try:
|
||||
from legal_templates_ingestion import (
|
||||
LegalTemplatesIngestion,
|
||||
LEGAL_TEMPLATES_COLLECTION,
|
||||
)
|
||||
from template_sources import (
|
||||
TEMPLATE_SOURCES,
|
||||
TEMPLATE_TYPES,
|
||||
JURISDICTIONS,
|
||||
LicenseType,
|
||||
get_enabled_sources,
|
||||
get_sources_by_priority,
|
||||
)
|
||||
from qdrant_service import (
|
||||
search_legal_templates,
|
||||
get_legal_templates_stats,
|
||||
init_legal_templates_collection,
|
||||
)
|
||||
LEGAL_TEMPLATES_AVAILABLE = True
|
||||
except ImportError as e:
|
||||
print(f"Legal templates module not available: {e}")
|
||||
LEGAL_TEMPLATES_AVAILABLE = False
|
||||
|
||||
router = APIRouter(prefix="/api/v1/admin", tags=["Admin"])
|
||||
|
||||
# Store for templates ingestion status
|
||||
_templates_ingestion_status: Dict = {
|
||||
"running": False,
|
||||
"last_run": None,
|
||||
"current_source": None,
|
||||
"results": {},
|
||||
}
|
||||
|
||||
|
||||
class TemplatesSearchRequest(BaseModel):
|
||||
query: str
|
||||
template_type: Optional[str] = None
|
||||
license_types: Optional[List[str]] = None
|
||||
language: Optional[str] = None
|
||||
jurisdiction: Optional[str] = None
|
||||
attribution_required: Optional[bool] = None
|
||||
limit: int = 10
|
||||
|
||||
|
||||
class TemplatesSearchResult(BaseModel):
|
||||
id: str
|
||||
score: float
|
||||
text: str
|
||||
document_title: Optional[str]
|
||||
template_type: Optional[str]
|
||||
clause_category: Optional[str]
|
||||
language: Optional[str]
|
||||
jurisdiction: Optional[str]
|
||||
license_id: Optional[str]
|
||||
license_name: Optional[str]
|
||||
attribution_required: Optional[bool]
|
||||
attribution_text: Optional[str]
|
||||
source_name: Optional[str]
|
||||
source_url: Optional[str]
|
||||
placeholders: Optional[List[str]]
|
||||
is_complete_document: Optional[bool]
|
||||
requires_customization: Optional[bool]
|
||||
|
||||
|
||||
class SourceIngestRequest(BaseModel):
|
||||
source_name: str
|
||||
|
||||
|
||||
@router.get("/templates/status")
|
||||
async def get_templates_status():
|
||||
"""Get status of legal templates collection and ingestion."""
|
||||
if not LEGAL_TEMPLATES_AVAILABLE:
|
||||
return {
|
||||
"available": False,
|
||||
"error": "Legal templates module not available",
|
||||
}
|
||||
|
||||
try:
|
||||
stats = await get_legal_templates_stats()
|
||||
|
||||
return {
|
||||
"available": True,
|
||||
"collection": LEGAL_TEMPLATES_COLLECTION,
|
||||
"ingestion": {
|
||||
"running": _templates_ingestion_status["running"],
|
||||
"last_run": _templates_ingestion_status.get("last_run"),
|
||||
"current_source": _templates_ingestion_status.get("current_source"),
|
||||
"results": _templates_ingestion_status.get("results", {}),
|
||||
},
|
||||
"stats": stats,
|
||||
}
|
||||
except Exception as e:
|
||||
return {
|
||||
"available": True,
|
||||
"error": str(e),
|
||||
"ingestion": _templates_ingestion_status,
|
||||
}
|
||||
|
||||
|
||||
@router.get("/templates/sources")
|
||||
async def get_templates_sources():
|
||||
"""Get list of all template sources with their configuration."""
|
||||
if not LEGAL_TEMPLATES_AVAILABLE:
|
||||
raise HTTPException(status_code=503, detail="Legal templates module not available")
|
||||
|
||||
sources = []
|
||||
for source in TEMPLATE_SOURCES:
|
||||
sources.append({
|
||||
"name": source.name,
|
||||
"description": source.description,
|
||||
"license_type": source.license_type.value,
|
||||
"license_name": source.license_info.name,
|
||||
"template_types": source.template_types,
|
||||
"languages": source.languages,
|
||||
"jurisdiction": source.jurisdiction,
|
||||
"repo_url": source.repo_url,
|
||||
"web_url": source.web_url,
|
||||
"priority": source.priority,
|
||||
"enabled": source.enabled,
|
||||
"attribution_required": source.license_info.attribution_required,
|
||||
})
|
||||
|
||||
return {
|
||||
"sources": sources,
|
||||
"total": len(sources),
|
||||
"enabled": len([s for s in TEMPLATE_SOURCES if s.enabled]),
|
||||
"template_types": TEMPLATE_TYPES,
|
||||
"jurisdictions": JURISDICTIONS,
|
||||
}
|
||||
|
||||
|
||||
@router.get("/templates/licenses")
|
||||
async def get_templates_licenses():
|
||||
"""Get license statistics for indexed templates."""
|
||||
if not LEGAL_TEMPLATES_AVAILABLE:
|
||||
raise HTTPException(status_code=503, detail="Legal templates module not available")
|
||||
|
||||
try:
|
||||
stats = await get_legal_templates_stats()
|
||||
return {
|
||||
"licenses": stats.get("licenses", {}),
|
||||
"total_chunks": stats.get("points_count", 0),
|
||||
}
|
||||
except Exception as e:
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
|
||||
|
||||
@router.post("/templates/ingest")
|
||||
async def start_templates_ingestion(
|
||||
background_tasks: BackgroundTasks,
|
||||
max_priority: int = Query(default=3, ge=1, le=5, description="Maximum priority level (1=highest)"),
|
||||
):
|
||||
"""
|
||||
Start legal templates ingestion in background.
|
||||
Ingests all enabled sources up to the specified priority level.
|
||||
"""
|
||||
if not LEGAL_TEMPLATES_AVAILABLE:
|
||||
raise HTTPException(status_code=503, detail="Legal templates module not available")
|
||||
|
||||
if _templates_ingestion_status["running"]:
|
||||
raise HTTPException(
|
||||
status_code=409,
|
||||
detail="Templates ingestion already running. Check /templates/status for progress."
|
||||
)
|
||||
|
||||
async def run_templates_ingestion():
|
||||
global _templates_ingestion_status
|
||||
_templates_ingestion_status["running"] = True
|
||||
_templates_ingestion_status["last_run"] = datetime.now().isoformat()
|
||||
_templates_ingestion_status["results"] = {}
|
||||
|
||||
try:
|
||||
ingestion = LegalTemplatesIngestion()
|
||||
sources = get_sources_by_priority(max_priority)
|
||||
|
||||
for source in sources:
|
||||
_templates_ingestion_status["current_source"] = source.name
|
||||
|
||||
try:
|
||||
status = await ingestion.ingest_source(source)
|
||||
_templates_ingestion_status["results"][source.name] = {
|
||||
"status": status.status,
|
||||
"documents_found": status.documents_found,
|
||||
"chunks_indexed": status.chunks_indexed,
|
||||
"errors": status.errors[:5] if status.errors else [],
|
||||
}
|
||||
except Exception as e:
|
||||
_templates_ingestion_status["results"][source.name] = {
|
||||
"status": "failed",
|
||||
"error": str(e),
|
||||
}
|
||||
|
||||
await ingestion.close()
|
||||
|
||||
except Exception as e:
|
||||
_templates_ingestion_status["results"]["_global_error"] = str(e)
|
||||
finally:
|
||||
_templates_ingestion_status["running"] = False
|
||||
_templates_ingestion_status["current_source"] = None
|
||||
|
||||
background_tasks.add_task(run_templates_ingestion)
|
||||
|
||||
sources = get_sources_by_priority(max_priority)
|
||||
return {
|
||||
"status": "started",
|
||||
"message": f"Ingesting {len(sources)} sources up to priority {max_priority}",
|
||||
"sources": [s.name for s in sources],
|
||||
}
|
||||
|
||||
|
||||
@router.post("/templates/ingest-source")
|
||||
async def ingest_single_source(
|
||||
request: SourceIngestRequest,
|
||||
background_tasks: BackgroundTasks,
|
||||
):
|
||||
"""Ingest a single template source by name."""
|
||||
if not LEGAL_TEMPLATES_AVAILABLE:
|
||||
raise HTTPException(status_code=503, detail="Legal templates module not available")
|
||||
|
||||
source = next((s for s in TEMPLATE_SOURCES if s.name == request.source_name), None)
|
||||
if not source:
|
||||
raise HTTPException(
|
||||
status_code=404,
|
||||
detail=f"Source not found: {request.source_name}. Use /templates/sources to list available sources."
|
||||
)
|
||||
|
||||
if not source.enabled:
|
||||
raise HTTPException(
|
||||
status_code=400,
|
||||
detail=f"Source is disabled: {request.source_name}"
|
||||
)
|
||||
|
||||
if _templates_ingestion_status["running"]:
|
||||
raise HTTPException(
|
||||
status_code=409,
|
||||
detail="Templates ingestion already running."
|
||||
)
|
||||
|
||||
async def run_single_ingestion():
|
||||
global _templates_ingestion_status
|
||||
_templates_ingestion_status["running"] = True
|
||||
_templates_ingestion_status["current_source"] = source.name
|
||||
_templates_ingestion_status["last_run"] = datetime.now().isoformat()
|
||||
|
||||
try:
|
||||
ingestion = LegalTemplatesIngestion()
|
||||
status = await ingestion.ingest_source(source)
|
||||
_templates_ingestion_status["results"][source.name] = {
|
||||
"status": status.status,
|
||||
"documents_found": status.documents_found,
|
||||
"chunks_indexed": status.chunks_indexed,
|
||||
"errors": status.errors[:5] if status.errors else [],
|
||||
}
|
||||
await ingestion.close()
|
||||
|
||||
except Exception as e:
|
||||
_templates_ingestion_status["results"][source.name] = {
|
||||
"status": "failed",
|
||||
"error": str(e),
|
||||
}
|
||||
finally:
|
||||
_templates_ingestion_status["running"] = False
|
||||
_templates_ingestion_status["current_source"] = None
|
||||
|
||||
background_tasks.add_task(run_single_ingestion)
|
||||
|
||||
return {
|
||||
"status": "started",
|
||||
"source": source.name,
|
||||
"license": source.license_type.value,
|
||||
"template_types": source.template_types,
|
||||
}
|
||||
|
||||
|
||||
@router.post("/templates/search", response_model=List[TemplatesSearchResult])
|
||||
async def search_templates(request: TemplatesSearchRequest):
|
||||
"""Semantic search in legal templates collection."""
|
||||
if not LEGAL_TEMPLATES_AVAILABLE:
|
||||
raise HTTPException(status_code=503, detail="Legal templates module not available")
|
||||
|
||||
try:
|
||||
query_embedding = await generate_single_embedding(request.query)
|
||||
|
||||
if not query_embedding:
|
||||
raise HTTPException(status_code=500, detail="Failed to generate embedding")
|
||||
|
||||
results = await search_legal_templates(
|
||||
query_embedding=query_embedding,
|
||||
template_type=request.template_type,
|
||||
license_types=request.license_types,
|
||||
language=request.language,
|
||||
jurisdiction=request.jurisdiction,
|
||||
attribution_required=request.attribution_required,
|
||||
limit=request.limit,
|
||||
)
|
||||
|
||||
return [
|
||||
TemplatesSearchResult(
|
||||
id=r["id"],
|
||||
score=r["score"],
|
||||
text=r.get("text", "")[:1000],
|
||||
document_title=r.get("document_title"),
|
||||
template_type=r.get("template_type"),
|
||||
clause_category=r.get("clause_category"),
|
||||
language=r.get("language"),
|
||||
jurisdiction=r.get("jurisdiction"),
|
||||
license_id=r.get("license_id"),
|
||||
license_name=r.get("license_name"),
|
||||
attribution_required=r.get("attribution_required"),
|
||||
attribution_text=r.get("attribution_text"),
|
||||
source_name=r.get("source_name"),
|
||||
source_url=r.get("source_url"),
|
||||
placeholders=r.get("placeholders"),
|
||||
is_complete_document=r.get("is_complete_document"),
|
||||
requires_customization=r.get("requires_customization"),
|
||||
)
|
||||
for r in results
|
||||
]
|
||||
except HTTPException:
|
||||
raise
|
||||
except Exception as e:
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
|
||||
|
||||
@router.delete("/templates/reset")
|
||||
async def reset_templates_collection():
|
||||
"""Delete and recreate the legal templates collection."""
|
||||
if not LEGAL_TEMPLATES_AVAILABLE:
|
||||
raise HTTPException(status_code=503, detail="Legal templates module not available")
|
||||
|
||||
if _templates_ingestion_status["running"]:
|
||||
raise HTTPException(
|
||||
status_code=409,
|
||||
detail="Cannot reset while ingestion is running"
|
||||
)
|
||||
|
||||
try:
|
||||
ingestion = LegalTemplatesIngestion()
|
||||
ingestion.reset_collection()
|
||||
await ingestion.close()
|
||||
|
||||
_templates_ingestion_status["results"] = {}
|
||||
|
||||
return {
|
||||
"status": "reset",
|
||||
"collection": LEGAL_TEMPLATES_COLLECTION,
|
||||
"message": "Collection deleted and recreated. Run ingestion to populate.",
|
||||
}
|
||||
except Exception as e:
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
|
||||
|
||||
@router.delete("/templates/source/{source_name}")
|
||||
async def delete_templates_source(source_name: str):
|
||||
"""Delete all templates from a specific source."""
|
||||
if not LEGAL_TEMPLATES_AVAILABLE:
|
||||
raise HTTPException(status_code=503, detail="Legal templates module not available")
|
||||
|
||||
try:
|
||||
from qdrant_service import delete_legal_templates_by_source
|
||||
|
||||
count = await delete_legal_templates_by_source(source_name)
|
||||
|
||||
if source_name in _templates_ingestion_status.get("results", {}):
|
||||
del _templates_ingestion_status["results"][source_name]
|
||||
|
||||
return {
|
||||
"status": "deleted",
|
||||
"source": source_name,
|
||||
"chunks_deleted": count,
|
||||
}
|
||||
except Exception as e:
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
293
klausur-service/backend/ocr_pipeline_columns.py
Normal file
293
klausur-service/backend/ocr_pipeline_columns.py
Normal file
@@ -0,0 +1,293 @@
|
||||
"""
|
||||
OCR Pipeline Column Detection Endpoints (Step 5)
|
||||
|
||||
Detect invisible columns, manual column override, and ground truth.
|
||||
Extracted from ocr_pipeline_geometry.py for file-size compliance.
|
||||
"""
|
||||
|
||||
import logging
|
||||
import time
|
||||
from dataclasses import asdict
|
||||
from datetime import datetime
|
||||
from typing import Dict, List
|
||||
|
||||
import cv2
|
||||
from fastapi import APIRouter, HTTPException
|
||||
|
||||
from cv_vocab_pipeline import (
|
||||
_detect_header_footer_gaps,
|
||||
_detect_sub_columns,
|
||||
classify_column_types,
|
||||
create_layout_image,
|
||||
create_ocr_image,
|
||||
analyze_layout,
|
||||
detect_column_geometry_zoned,
|
||||
expand_narrow_columns,
|
||||
)
|
||||
from ocr_pipeline_session_store import (
|
||||
get_session_db,
|
||||
update_session_db,
|
||||
)
|
||||
from ocr_pipeline_common import (
|
||||
_cache,
|
||||
_load_session_to_cache,
|
||||
_get_cached,
|
||||
_append_pipeline_log,
|
||||
ManualColumnsRequest,
|
||||
ColumnGroundTruthRequest,
|
||||
)
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
router = APIRouter(prefix="/api/v1/ocr-pipeline", tags=["ocr-pipeline"])
|
||||
|
||||
|
||||
@router.post("/sessions/{session_id}/columns")
|
||||
async def detect_columns(session_id: str):
|
||||
"""Run column detection on the cropped (or dewarped) image."""
|
||||
if session_id not in _cache:
|
||||
await _load_session_to_cache(session_id)
|
||||
cached = _get_cached(session_id)
|
||||
|
||||
img_bgr = cached.get("cropped_bgr") if cached.get("cropped_bgr") is not None else cached.get("dewarped_bgr")
|
||||
if img_bgr is None:
|
||||
raise HTTPException(status_code=400, detail="Crop or dewarp must be completed before column detection")
|
||||
|
||||
# -----------------------------------------------------------------------
|
||||
# Sub-sessions (box crops): skip column detection entirely.
|
||||
# Instead, create a single pseudo-column spanning the full image width.
|
||||
# Also run Tesseract + binarization here so that the row detection step
|
||||
# can reuse the cached intermediates (_word_dicts, _inv, _content_bounds)
|
||||
# instead of falling back to detect_column_geometry() which may fail
|
||||
# on small box images with < 5 words.
|
||||
# -----------------------------------------------------------------------
|
||||
session = await get_session_db(session_id)
|
||||
if session and session.get("parent_session_id"):
|
||||
h, w = img_bgr.shape[:2]
|
||||
|
||||
# Binarize + invert for row detection (horizontal projection profile)
|
||||
ocr_img = create_ocr_image(img_bgr)
|
||||
inv = cv2.bitwise_not(ocr_img)
|
||||
|
||||
# Run Tesseract to get word bounding boxes.
|
||||
try:
|
||||
from PIL import Image as PILImage
|
||||
pil_img = PILImage.fromarray(cv2.cvtColor(img_bgr, cv2.COLOR_BGR2RGB))
|
||||
import pytesseract
|
||||
data = pytesseract.image_to_data(pil_img, lang='eng+deu', output_type=pytesseract.Output.DICT)
|
||||
word_dicts = []
|
||||
for i in range(len(data['text'])):
|
||||
conf = int(data['conf'][i]) if str(data['conf'][i]).lstrip('-').isdigit() else -1
|
||||
text = str(data['text'][i]).strip()
|
||||
if conf < 30 or not text:
|
||||
continue
|
||||
word_dicts.append({
|
||||
'text': text, 'conf': conf,
|
||||
'left': int(data['left'][i]),
|
||||
'top': int(data['top'][i]),
|
||||
'width': int(data['width'][i]),
|
||||
'height': int(data['height'][i]),
|
||||
})
|
||||
# Log all words including low-confidence ones for debugging
|
||||
all_count = sum(1 for i in range(len(data['text']))
|
||||
if str(data['text'][i]).strip())
|
||||
low_conf = [(str(data['text'][i]).strip(), int(data['conf'][i]) if str(data['conf'][i]).lstrip('-').isdigit() else -1)
|
||||
for i in range(len(data['text']))
|
||||
if str(data['text'][i]).strip()
|
||||
and (int(data['conf'][i]) if str(data['conf'][i]).lstrip('-').isdigit() else -1) < 30
|
||||
and (int(data['conf'][i]) if str(data['conf'][i]).lstrip('-').isdigit() else -1) >= 0]
|
||||
if low_conf:
|
||||
logger.info(f"OCR Pipeline: sub-session {session_id}: {len(low_conf)} words below conf 30: {low_conf[:20]}")
|
||||
logger.info(f"OCR Pipeline: sub-session {session_id}: Tesseract found {len(word_dicts)}/{all_count} words (conf>=30)")
|
||||
except Exception as e:
|
||||
logger.warning(f"OCR Pipeline: sub-session {session_id}: Tesseract failed: {e}")
|
||||
word_dicts = []
|
||||
|
||||
# Cache intermediates for row detection (detect_rows reuses these)
|
||||
cached["_word_dicts"] = word_dicts
|
||||
cached["_inv"] = inv
|
||||
cached["_content_bounds"] = (0, w, 0, h)
|
||||
|
||||
column_result = {
|
||||
"columns": [{
|
||||
"type": "column_text",
|
||||
"x": 0, "y": 0,
|
||||
"width": w, "height": h,
|
||||
}],
|
||||
"zones": None,
|
||||
"boxes_detected": 0,
|
||||
"duration_seconds": 0,
|
||||
"method": "sub_session_pseudo_column",
|
||||
}
|
||||
await update_session_db(
|
||||
session_id,
|
||||
column_result=column_result,
|
||||
row_result=None,
|
||||
word_result=None,
|
||||
current_step=6,
|
||||
)
|
||||
cached["column_result"] = column_result
|
||||
cached.pop("row_result", None)
|
||||
cached.pop("word_result", None)
|
||||
logger.info(f"OCR Pipeline: sub-session {session_id}: pseudo-column {w}x{h}px")
|
||||
return {"session_id": session_id, **column_result}
|
||||
|
||||
t0 = time.time()
|
||||
|
||||
# Binarized image for layout analysis
|
||||
ocr_img = create_ocr_image(img_bgr)
|
||||
h, w = ocr_img.shape[:2]
|
||||
|
||||
# Phase A: Zone-aware geometry detection
|
||||
zoned_result = detect_column_geometry_zoned(ocr_img, img_bgr)
|
||||
|
||||
boxes_detected = 0
|
||||
if zoned_result is None:
|
||||
# Fallback to projection-based layout
|
||||
layout_img = create_layout_image(img_bgr)
|
||||
regions = analyze_layout(layout_img, ocr_img)
|
||||
zones_data = None
|
||||
else:
|
||||
geometries, left_x, right_x, top_y, bottom_y, word_dicts, inv, zones_data, boxes = zoned_result
|
||||
content_w = right_x - left_x
|
||||
boxes_detected = len(boxes)
|
||||
|
||||
# Cache intermediates for row detection (avoids second Tesseract run)
|
||||
cached["_word_dicts"] = word_dicts
|
||||
cached["_inv"] = inv
|
||||
cached["_content_bounds"] = (left_x, right_x, top_y, bottom_y)
|
||||
cached["_zones_data"] = zones_data
|
||||
cached["_boxes_detected"] = boxes_detected
|
||||
|
||||
# Detect header/footer early so sub-column clustering ignores them
|
||||
header_y, footer_y = _detect_header_footer_gaps(inv, w, h) if inv is not None else (None, None)
|
||||
|
||||
# Split sub-columns (e.g. page references) before classification
|
||||
geometries = _detect_sub_columns(geometries, content_w, left_x=left_x,
|
||||
top_y=top_y, header_y=header_y, footer_y=footer_y)
|
||||
|
||||
# Expand narrow columns (sub-columns are often very narrow)
|
||||
geometries = expand_narrow_columns(geometries, content_w, left_x, word_dicts)
|
||||
|
||||
# Phase B: Content-based classification
|
||||
regions = classify_column_types(geometries, content_w, top_y, w, h, bottom_y,
|
||||
left_x=left_x, right_x=right_x, inv=inv)
|
||||
|
||||
duration = time.time() - t0
|
||||
|
||||
columns = [asdict(r) for r in regions]
|
||||
|
||||
# Determine classification methods used
|
||||
methods = list(set(
|
||||
c.get("classification_method", "") for c in columns
|
||||
if c.get("classification_method")
|
||||
))
|
||||
|
||||
column_result = {
|
||||
"columns": columns,
|
||||
"classification_methods": methods,
|
||||
"duration_seconds": round(duration, 2),
|
||||
"boxes_detected": boxes_detected,
|
||||
}
|
||||
|
||||
# Add zone data when boxes are present
|
||||
if zones_data and boxes_detected > 0:
|
||||
column_result["zones"] = zones_data
|
||||
|
||||
# Persist to DB -- also invalidate downstream results (rows, words)
|
||||
await update_session_db(
|
||||
session_id,
|
||||
column_result=column_result,
|
||||
row_result=None,
|
||||
word_result=None,
|
||||
current_step=6,
|
||||
)
|
||||
|
||||
# Update cache
|
||||
cached["column_result"] = column_result
|
||||
cached.pop("row_result", None)
|
||||
cached.pop("word_result", None)
|
||||
|
||||
col_count = len([c for c in columns if c["type"].startswith("column")])
|
||||
logger.info(f"OCR Pipeline: columns session {session_id}: "
|
||||
f"{col_count} columns detected, {boxes_detected} box(es) ({duration:.2f}s)")
|
||||
|
||||
img_w = img_bgr.shape[1]
|
||||
await _append_pipeline_log(session_id, "columns", {
|
||||
"total_columns": len(columns),
|
||||
"column_widths_pct": [round(c["width"] / img_w * 100, 1) for c in columns],
|
||||
"column_types": [c["type"] for c in columns],
|
||||
"boxes_detected": boxes_detected,
|
||||
}, duration_ms=int(duration * 1000))
|
||||
|
||||
return {
|
||||
"session_id": session_id,
|
||||
**column_result,
|
||||
}
|
||||
|
||||
|
||||
@router.post("/sessions/{session_id}/columns/manual")
|
||||
async def set_manual_columns(session_id: str, req: ManualColumnsRequest):
|
||||
"""Override detected columns with manual definitions."""
|
||||
column_result = {
|
||||
"columns": req.columns,
|
||||
"duration_seconds": 0,
|
||||
"method": "manual",
|
||||
}
|
||||
|
||||
await update_session_db(session_id, column_result=column_result,
|
||||
row_result=None, word_result=None)
|
||||
|
||||
if session_id in _cache:
|
||||
_cache[session_id]["column_result"] = column_result
|
||||
_cache[session_id].pop("row_result", None)
|
||||
_cache[session_id].pop("word_result", None)
|
||||
|
||||
logger.info(f"OCR Pipeline: manual columns session {session_id}: "
|
||||
f"{len(req.columns)} columns set")
|
||||
|
||||
return {"session_id": session_id, **column_result}
|
||||
|
||||
|
||||
@router.post("/sessions/{session_id}/ground-truth/columns")
|
||||
async def save_column_ground_truth(session_id: str, req: ColumnGroundTruthRequest):
|
||||
"""Save ground truth feedback for the column detection step."""
|
||||
session = await get_session_db(session_id)
|
||||
if not session:
|
||||
raise HTTPException(status_code=404, detail=f"Session {session_id} not found")
|
||||
|
||||
ground_truth = session.get("ground_truth") or {}
|
||||
gt = {
|
||||
"is_correct": req.is_correct,
|
||||
"corrected_columns": req.corrected_columns,
|
||||
"notes": req.notes,
|
||||
"saved_at": datetime.utcnow().isoformat(),
|
||||
"column_result": session.get("column_result"),
|
||||
}
|
||||
ground_truth["columns"] = gt
|
||||
|
||||
await update_session_db(session_id, ground_truth=ground_truth)
|
||||
|
||||
if session_id in _cache:
|
||||
_cache[session_id]["ground_truth"] = ground_truth
|
||||
|
||||
return {"session_id": session_id, "ground_truth": gt}
|
||||
|
||||
|
||||
@router.get("/sessions/{session_id}/ground-truth/columns")
|
||||
async def get_column_ground_truth(session_id: str):
|
||||
"""Retrieve saved ground truth for column detection, including auto vs GT diff."""
|
||||
session = await get_session_db(session_id)
|
||||
if not session:
|
||||
raise HTTPException(status_code=404, detail=f"Session {session_id} not found")
|
||||
|
||||
ground_truth = session.get("ground_truth") or {}
|
||||
columns_gt = ground_truth.get("columns")
|
||||
if not columns_gt:
|
||||
raise HTTPException(status_code=404, detail="No column ground truth saved")
|
||||
|
||||
return {
|
||||
"session_id": session_id,
|
||||
"columns_gt": columns_gt,
|
||||
"columns_auto": session.get("column_result"),
|
||||
}
|
||||
236
klausur-service/backend/ocr_pipeline_deskew.py
Normal file
236
klausur-service/backend/ocr_pipeline_deskew.py
Normal file
@@ -0,0 +1,236 @@
|
||||
"""
|
||||
OCR Pipeline Deskew Endpoints (Step 2)
|
||||
|
||||
Auto deskew, manual deskew, and ground truth for the deskew step.
|
||||
Extracted from ocr_pipeline_geometry.py for file-size compliance.
|
||||
"""
|
||||
|
||||
import logging
|
||||
import time
|
||||
from datetime import datetime
|
||||
|
||||
import cv2
|
||||
from fastapi import APIRouter, HTTPException
|
||||
|
||||
from cv_vocab_pipeline import (
|
||||
create_ocr_image,
|
||||
deskew_image,
|
||||
deskew_image_by_word_alignment,
|
||||
deskew_two_pass,
|
||||
)
|
||||
from ocr_pipeline_session_store import (
|
||||
get_session_db,
|
||||
update_session_db,
|
||||
)
|
||||
from ocr_pipeline_common import (
|
||||
_cache,
|
||||
_load_session_to_cache,
|
||||
_get_cached,
|
||||
_append_pipeline_log,
|
||||
ManualDeskewRequest,
|
||||
DeskewGroundTruthRequest,
|
||||
)
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
router = APIRouter(prefix="/api/v1/ocr-pipeline", tags=["ocr-pipeline"])
|
||||
|
||||
|
||||
@router.post("/sessions/{session_id}/deskew")
|
||||
async def auto_deskew(session_id: str):
|
||||
"""Two-pass deskew: iterative projection (wide range) + word-alignment residual."""
|
||||
# Ensure session is in cache
|
||||
if session_id not in _cache:
|
||||
await _load_session_to_cache(session_id)
|
||||
cached = _get_cached(session_id)
|
||||
|
||||
# Deskew runs right after orientation -- use oriented image, fall back to original
|
||||
img_bgr = next((v for k in ("oriented_bgr", "original_bgr")
|
||||
if (v := cached.get(k)) is not None), None)
|
||||
if img_bgr is None:
|
||||
raise HTTPException(status_code=400, detail="No image available for deskewing")
|
||||
|
||||
t0 = time.time()
|
||||
|
||||
# Two-pass deskew: iterative (+-5 deg) + word-alignment residual check
|
||||
deskewed_bgr, angle_applied, two_pass_debug = deskew_two_pass(img_bgr.copy())
|
||||
|
||||
# Also run individual methods for reporting (non-authoritative)
|
||||
try:
|
||||
_, angle_hough = deskew_image(img_bgr.copy())
|
||||
except Exception:
|
||||
angle_hough = 0.0
|
||||
|
||||
success_enc, png_orig = cv2.imencode(".png", img_bgr)
|
||||
orig_bytes = png_orig.tobytes() if success_enc else b""
|
||||
try:
|
||||
_, angle_wa = deskew_image_by_word_alignment(orig_bytes)
|
||||
except Exception:
|
||||
angle_wa = 0.0
|
||||
|
||||
angle_iterative = two_pass_debug.get("pass1_angle", 0.0)
|
||||
angle_residual = two_pass_debug.get("pass2_angle", 0.0)
|
||||
angle_textline = two_pass_debug.get("pass3_angle", 0.0)
|
||||
|
||||
duration = time.time() - t0
|
||||
|
||||
method_used = "three_pass" if abs(angle_textline) >= 0.01 else (
|
||||
"two_pass" if abs(angle_residual) >= 0.01 else "iterative"
|
||||
)
|
||||
|
||||
# Encode as PNG
|
||||
success, deskewed_png_buf = cv2.imencode(".png", deskewed_bgr)
|
||||
deskewed_png = deskewed_png_buf.tobytes() if success else b""
|
||||
|
||||
# Create binarized version
|
||||
binarized_png = None
|
||||
try:
|
||||
binarized = create_ocr_image(deskewed_bgr)
|
||||
success_bin, bin_buf = cv2.imencode(".png", binarized)
|
||||
binarized_png = bin_buf.tobytes() if success_bin else None
|
||||
except Exception as e:
|
||||
logger.warning(f"Binarization failed: {e}")
|
||||
|
||||
confidence = max(0.5, 1.0 - abs(angle_applied) / 5.0)
|
||||
|
||||
deskew_result = {
|
||||
"angle_hough": round(angle_hough, 3),
|
||||
"angle_word_alignment": round(angle_wa, 3),
|
||||
"angle_iterative": round(angle_iterative, 3),
|
||||
"angle_residual": round(angle_residual, 3),
|
||||
"angle_textline": round(angle_textline, 3),
|
||||
"angle_applied": round(angle_applied, 3),
|
||||
"method_used": method_used,
|
||||
"confidence": round(confidence, 2),
|
||||
"duration_seconds": round(duration, 2),
|
||||
"two_pass_debug": two_pass_debug,
|
||||
}
|
||||
|
||||
# Update cache
|
||||
cached["deskewed_bgr"] = deskewed_bgr
|
||||
cached["binarized_png"] = binarized_png
|
||||
cached["deskew_result"] = deskew_result
|
||||
|
||||
# Persist to DB
|
||||
db_update = {
|
||||
"deskewed_png": deskewed_png,
|
||||
"deskew_result": deskew_result,
|
||||
"current_step": 3,
|
||||
}
|
||||
if binarized_png:
|
||||
db_update["binarized_png"] = binarized_png
|
||||
await update_session_db(session_id, **db_update)
|
||||
|
||||
logger.info(f"OCR Pipeline: deskew session {session_id}: "
|
||||
f"hough={angle_hough:.2f} wa={angle_wa:.2f} "
|
||||
f"iter={angle_iterative:.2f} residual={angle_residual:.2f} "
|
||||
f"textline={angle_textline:.2f} "
|
||||
f"-> {method_used} total={angle_applied:.2f}")
|
||||
|
||||
await _append_pipeline_log(session_id, "deskew", {
|
||||
"angle_applied": round(angle_applied, 3),
|
||||
"angle_iterative": round(angle_iterative, 3),
|
||||
"angle_residual": round(angle_residual, 3),
|
||||
"angle_textline": round(angle_textline, 3),
|
||||
"confidence": round(confidence, 2),
|
||||
"method": method_used,
|
||||
}, duration_ms=int(duration * 1000))
|
||||
|
||||
return {
|
||||
"session_id": session_id,
|
||||
**deskew_result,
|
||||
"deskewed_image_url": f"/api/v1/ocr-pipeline/sessions/{session_id}/image/deskewed",
|
||||
"binarized_image_url": f"/api/v1/ocr-pipeline/sessions/{session_id}/image/binarized",
|
||||
}
|
||||
|
||||
|
||||
@router.post("/sessions/{session_id}/deskew/manual")
|
||||
async def manual_deskew(session_id: str, req: ManualDeskewRequest):
|
||||
"""Apply a manual rotation angle to the oriented image."""
|
||||
if session_id not in _cache:
|
||||
await _load_session_to_cache(session_id)
|
||||
cached = _get_cached(session_id)
|
||||
|
||||
img_bgr = next((v for k in ("oriented_bgr", "original_bgr")
|
||||
if (v := cached.get(k)) is not None), None)
|
||||
if img_bgr is None:
|
||||
raise HTTPException(status_code=400, detail="No image available for deskewing")
|
||||
|
||||
angle = max(-5.0, min(5.0, req.angle))
|
||||
|
||||
h, w = img_bgr.shape[:2]
|
||||
center = (w // 2, h // 2)
|
||||
M = cv2.getRotationMatrix2D(center, angle, 1.0)
|
||||
rotated = cv2.warpAffine(img_bgr, M, (w, h),
|
||||
flags=cv2.INTER_LINEAR,
|
||||
borderMode=cv2.BORDER_REPLICATE)
|
||||
|
||||
success, png_buf = cv2.imencode(".png", rotated)
|
||||
deskewed_png = png_buf.tobytes() if success else b""
|
||||
|
||||
# Binarize
|
||||
binarized_png = None
|
||||
try:
|
||||
binarized = create_ocr_image(rotated)
|
||||
success_bin, bin_buf = cv2.imencode(".png", binarized)
|
||||
binarized_png = bin_buf.tobytes() if success_bin else None
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
deskew_result = {
|
||||
**(cached.get("deskew_result") or {}),
|
||||
"angle_applied": round(angle, 3),
|
||||
"method_used": "manual",
|
||||
}
|
||||
|
||||
# Update cache
|
||||
cached["deskewed_bgr"] = rotated
|
||||
cached["binarized_png"] = binarized_png
|
||||
cached["deskew_result"] = deskew_result
|
||||
|
||||
# Persist to DB
|
||||
db_update = {
|
||||
"deskewed_png": deskewed_png,
|
||||
"deskew_result": deskew_result,
|
||||
}
|
||||
if binarized_png:
|
||||
db_update["binarized_png"] = binarized_png
|
||||
await update_session_db(session_id, **db_update)
|
||||
|
||||
logger.info(f"OCR Pipeline: manual deskew session {session_id}: {angle:.2f}")
|
||||
|
||||
return {
|
||||
"session_id": session_id,
|
||||
"angle_applied": round(angle, 3),
|
||||
"method_used": "manual",
|
||||
"deskewed_image_url": f"/api/v1/ocr-pipeline/sessions/{session_id}/image/deskewed",
|
||||
}
|
||||
|
||||
|
||||
@router.post("/sessions/{session_id}/ground-truth/deskew")
|
||||
async def save_deskew_ground_truth(session_id: str, req: DeskewGroundTruthRequest):
|
||||
"""Save ground truth feedback for the deskew step."""
|
||||
session = await get_session_db(session_id)
|
||||
if not session:
|
||||
raise HTTPException(status_code=404, detail=f"Session {session_id} not found")
|
||||
|
||||
ground_truth = session.get("ground_truth") or {}
|
||||
gt = {
|
||||
"is_correct": req.is_correct,
|
||||
"corrected_angle": req.corrected_angle,
|
||||
"notes": req.notes,
|
||||
"saved_at": datetime.utcnow().isoformat(),
|
||||
"deskew_result": session.get("deskew_result"),
|
||||
}
|
||||
ground_truth["deskew"] = gt
|
||||
|
||||
await update_session_db(session_id, ground_truth=ground_truth)
|
||||
|
||||
# Update cache
|
||||
if session_id in _cache:
|
||||
_cache[session_id]["ground_truth"] = ground_truth
|
||||
|
||||
logger.info(f"OCR Pipeline: ground truth deskew session {session_id}: "
|
||||
f"correct={req.is_correct}, corrected_angle={req.corrected_angle}")
|
||||
|
||||
return {"session_id": session_id, "ground_truth": gt}
|
||||
346
klausur-service/backend/ocr_pipeline_dewarp.py
Normal file
346
klausur-service/backend/ocr_pipeline_dewarp.py
Normal file
@@ -0,0 +1,346 @@
|
||||
"""
|
||||
OCR Pipeline Dewarp Endpoints
|
||||
|
||||
Auto dewarp (with VLM/CV ensemble), manual dewarp, combined
|
||||
rotation+shear adjustment, and ground truth.
|
||||
Extracted from ocr_pipeline_geometry.py for file-size compliance.
|
||||
"""
|
||||
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import re
|
||||
import time
|
||||
from datetime import datetime
|
||||
from typing import Any, Dict
|
||||
|
||||
import cv2
|
||||
from fastapi import APIRouter, HTTPException, Query
|
||||
|
||||
from cv_vocab_pipeline import (
|
||||
_apply_shear,
|
||||
create_ocr_image,
|
||||
dewarp_image,
|
||||
dewarp_image_manual,
|
||||
)
|
||||
from ocr_pipeline_session_store import (
|
||||
get_session_db,
|
||||
update_session_db,
|
||||
)
|
||||
from ocr_pipeline_common import (
|
||||
_cache,
|
||||
_load_session_to_cache,
|
||||
_get_cached,
|
||||
_append_pipeline_log,
|
||||
ManualDewarpRequest,
|
||||
CombinedAdjustRequest,
|
||||
DewarpGroundTruthRequest,
|
||||
)
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
router = APIRouter(prefix="/api/v1/ocr-pipeline", tags=["ocr-pipeline"])
|
||||
|
||||
|
||||
async def _detect_shear_with_vlm(image_bytes: bytes) -> Dict[str, Any]:
|
||||
"""Ask qwen2.5vl:32b to estimate the vertical shear angle of a scanned page.
|
||||
|
||||
The VLM is shown the image and asked: are the column/table borders tilted?
|
||||
If yes, by how many degrees? Returns a dict with shear_degrees and confidence.
|
||||
Confidence is 0.0 if Ollama is unavailable or parsing fails.
|
||||
"""
|
||||
import httpx
|
||||
import base64
|
||||
|
||||
ollama_base = os.getenv("OLLAMA_BASE_URL", "http://host.docker.internal:11434")
|
||||
model = os.getenv("OLLAMA_HTR_MODEL", "qwen2.5vl:32b")
|
||||
|
||||
prompt = (
|
||||
"This is a scanned vocabulary worksheet. Look at the vertical borders of the table columns. "
|
||||
"Are they perfectly vertical, or do they tilt slightly? "
|
||||
"If they tilt, estimate the tilt angle in degrees (positive = top tilts right, negative = top tilts left). "
|
||||
"Reply with ONLY a JSON object like: {\"shear_degrees\": 1.2, \"confidence\": 0.8} "
|
||||
"Use confidence 0.0-1.0 based on how clearly you can see the tilt. "
|
||||
"If the columns look straight, return {\"shear_degrees\": 0.0, \"confidence\": 0.9}"
|
||||
)
|
||||
|
||||
img_b64 = base64.b64encode(image_bytes).decode("utf-8")
|
||||
payload = {
|
||||
"model": model,
|
||||
"prompt": prompt,
|
||||
"images": [img_b64],
|
||||
"stream": False,
|
||||
}
|
||||
|
||||
try:
|
||||
async with httpx.AsyncClient(timeout=60.0) as client:
|
||||
resp = await client.post(f"{ollama_base}/api/generate", json=payload)
|
||||
resp.raise_for_status()
|
||||
text = resp.json().get("response", "")
|
||||
|
||||
# Parse JSON from response (may have surrounding text)
|
||||
match = re.search(r'\{[^}]+\}', text)
|
||||
if match:
|
||||
data = json.loads(match.group(0))
|
||||
shear = float(data.get("shear_degrees", 0.0))
|
||||
conf = float(data.get("confidence", 0.0))
|
||||
# Clamp to reasonable range
|
||||
shear = max(-3.0, min(3.0, shear))
|
||||
conf = max(0.0, min(1.0, conf))
|
||||
return {"method": "vlm_qwen2.5vl", "shear_degrees": round(shear, 3), "confidence": round(conf, 2)}
|
||||
except Exception as e:
|
||||
logger.warning(f"VLM dewarp failed: {e}")
|
||||
|
||||
return {"method": "vlm_qwen2.5vl", "shear_degrees": 0.0, "confidence": 0.0}
|
||||
|
||||
|
||||
@router.post("/sessions/{session_id}/dewarp")
|
||||
async def auto_dewarp(
|
||||
session_id: str,
|
||||
method: str = Query("ensemble", description="Detection method: ensemble | vlm | cv"),
|
||||
):
|
||||
"""Detect and correct vertical shear on the deskewed image.
|
||||
|
||||
Methods:
|
||||
- **ensemble** (default): 3-method CV ensemble (vertical edges + projection + Hough)
|
||||
- **cv**: CV ensemble only (same as ensemble)
|
||||
- **vlm**: Ask qwen2.5vl:32b to estimate the shear angle visually
|
||||
"""
|
||||
if method not in ("ensemble", "cv", "vlm"):
|
||||
raise HTTPException(status_code=400, detail="method must be one of: ensemble, cv, vlm")
|
||||
|
||||
if session_id not in _cache:
|
||||
await _load_session_to_cache(session_id)
|
||||
cached = _get_cached(session_id)
|
||||
|
||||
deskewed_bgr = cached.get("deskewed_bgr")
|
||||
if deskewed_bgr is None:
|
||||
raise HTTPException(status_code=400, detail="Deskew must be completed before dewarp")
|
||||
|
||||
t0 = time.time()
|
||||
|
||||
if method == "vlm":
|
||||
# Encode deskewed image to PNG for VLM
|
||||
success, png_buf = cv2.imencode(".png", deskewed_bgr)
|
||||
img_bytes = png_buf.tobytes() if success else b""
|
||||
vlm_det = await _detect_shear_with_vlm(img_bytes)
|
||||
shear_deg = vlm_det["shear_degrees"]
|
||||
if abs(shear_deg) >= 0.05 and vlm_det["confidence"] >= 0.3:
|
||||
dewarped_bgr = _apply_shear(deskewed_bgr, -shear_deg)
|
||||
else:
|
||||
dewarped_bgr = deskewed_bgr
|
||||
dewarp_info = {
|
||||
"method": vlm_det["method"],
|
||||
"shear_degrees": shear_deg,
|
||||
"confidence": vlm_det["confidence"],
|
||||
"detections": [vlm_det],
|
||||
}
|
||||
else:
|
||||
dewarped_bgr, dewarp_info = dewarp_image(deskewed_bgr)
|
||||
|
||||
duration = time.time() - t0
|
||||
|
||||
# Encode as PNG
|
||||
success, png_buf = cv2.imencode(".png", dewarped_bgr)
|
||||
dewarped_png = png_buf.tobytes() if success else b""
|
||||
|
||||
dewarp_result = {
|
||||
"method_used": dewarp_info["method"],
|
||||
"shear_degrees": dewarp_info["shear_degrees"],
|
||||
"confidence": dewarp_info["confidence"],
|
||||
"duration_seconds": round(duration, 2),
|
||||
"detections": dewarp_info.get("detections", []),
|
||||
}
|
||||
|
||||
# Update cache
|
||||
cached["dewarped_bgr"] = dewarped_bgr
|
||||
cached["dewarp_result"] = dewarp_result
|
||||
|
||||
# Persist to DB
|
||||
await update_session_db(
|
||||
session_id,
|
||||
dewarped_png=dewarped_png,
|
||||
dewarp_result=dewarp_result,
|
||||
auto_shear_degrees=dewarp_info.get("shear_degrees", 0.0),
|
||||
current_step=4,
|
||||
)
|
||||
|
||||
logger.info(f"OCR Pipeline: dewarp session {session_id}: "
|
||||
f"method={dewarp_info['method']} shear={dewarp_info['shear_degrees']:.3f} "
|
||||
f"conf={dewarp_info['confidence']:.2f} ({duration:.2f}s)")
|
||||
|
||||
await _append_pipeline_log(session_id, "dewarp", {
|
||||
"shear_degrees": dewarp_info["shear_degrees"],
|
||||
"confidence": dewarp_info["confidence"],
|
||||
"method": dewarp_info["method"],
|
||||
"ensemble_methods": [d.get("method", "") for d in dewarp_info.get("detections", [])],
|
||||
}, duration_ms=int(duration * 1000))
|
||||
|
||||
return {
|
||||
"session_id": session_id,
|
||||
**dewarp_result,
|
||||
"dewarped_image_url": f"/api/v1/ocr-pipeline/sessions/{session_id}/image/dewarped",
|
||||
}
|
||||
|
||||
|
||||
@router.post("/sessions/{session_id}/dewarp/manual")
|
||||
async def manual_dewarp(session_id: str, req: ManualDewarpRequest):
|
||||
"""Apply shear correction with a manual angle."""
|
||||
if session_id not in _cache:
|
||||
await _load_session_to_cache(session_id)
|
||||
cached = _get_cached(session_id)
|
||||
|
||||
deskewed_bgr = cached.get("deskewed_bgr")
|
||||
if deskewed_bgr is None:
|
||||
raise HTTPException(status_code=400, detail="Deskew must be completed before dewarp")
|
||||
|
||||
shear_deg = max(-2.0, min(2.0, req.shear_degrees))
|
||||
|
||||
if abs(shear_deg) < 0.001:
|
||||
dewarped_bgr = deskewed_bgr
|
||||
else:
|
||||
dewarped_bgr = dewarp_image_manual(deskewed_bgr, shear_deg)
|
||||
|
||||
success, png_buf = cv2.imencode(".png", dewarped_bgr)
|
||||
dewarped_png = png_buf.tobytes() if success else b""
|
||||
|
||||
dewarp_result = {
|
||||
**(cached.get("dewarp_result") or {}),
|
||||
"method_used": "manual",
|
||||
"shear_degrees": round(shear_deg, 3),
|
||||
}
|
||||
|
||||
# Update cache
|
||||
cached["dewarped_bgr"] = dewarped_bgr
|
||||
cached["dewarp_result"] = dewarp_result
|
||||
|
||||
# Persist to DB
|
||||
await update_session_db(
|
||||
session_id,
|
||||
dewarped_png=dewarped_png,
|
||||
dewarp_result=dewarp_result,
|
||||
)
|
||||
|
||||
logger.info(f"OCR Pipeline: manual dewarp session {session_id}: shear={shear_deg:.3f}")
|
||||
|
||||
return {
|
||||
"session_id": session_id,
|
||||
"shear_degrees": round(shear_deg, 3),
|
||||
"method_used": "manual",
|
||||
"dewarped_image_url": f"/api/v1/ocr-pipeline/sessions/{session_id}/image/dewarped",
|
||||
}
|
||||
|
||||
|
||||
@router.post("/sessions/{session_id}/adjust-combined")
|
||||
async def adjust_combined(session_id: str, req: CombinedAdjustRequest):
|
||||
"""Apply rotation + shear combined to the original image.
|
||||
|
||||
Used by the fine-tuning sliders to preview arbitrary rotation/shear
|
||||
combinations without re-running the full deskew/dewarp pipeline.
|
||||
"""
|
||||
if session_id not in _cache:
|
||||
await _load_session_to_cache(session_id)
|
||||
cached = _get_cached(session_id)
|
||||
|
||||
img_bgr = cached.get("original_bgr")
|
||||
if img_bgr is None:
|
||||
raise HTTPException(status_code=400, detail="Original image not available")
|
||||
|
||||
rotation = max(-15.0, min(15.0, req.rotation_degrees))
|
||||
shear_deg = max(-5.0, min(5.0, req.shear_degrees))
|
||||
|
||||
h, w = img_bgr.shape[:2]
|
||||
result_bgr = img_bgr
|
||||
|
||||
# Step 1: Apply rotation
|
||||
if abs(rotation) >= 0.001:
|
||||
center = (w // 2, h // 2)
|
||||
M = cv2.getRotationMatrix2D(center, rotation, 1.0)
|
||||
result_bgr = cv2.warpAffine(result_bgr, M, (w, h),
|
||||
flags=cv2.INTER_LINEAR,
|
||||
borderMode=cv2.BORDER_REPLICATE)
|
||||
|
||||
# Step 2: Apply shear
|
||||
if abs(shear_deg) >= 0.001:
|
||||
result_bgr = dewarp_image_manual(result_bgr, shear_deg)
|
||||
|
||||
# Encode
|
||||
success, png_buf = cv2.imencode(".png", result_bgr)
|
||||
dewarped_png = png_buf.tobytes() if success else b""
|
||||
|
||||
# Binarize
|
||||
binarized_png = None
|
||||
try:
|
||||
binarized = create_ocr_image(result_bgr)
|
||||
success_bin, bin_buf = cv2.imencode(".png", binarized)
|
||||
binarized_png = bin_buf.tobytes() if success_bin else None
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# Build combined result dicts
|
||||
deskew_result = {
|
||||
**(cached.get("deskew_result") or {}),
|
||||
"angle_applied": round(rotation, 3),
|
||||
"method_used": "manual_combined",
|
||||
}
|
||||
dewarp_result = {
|
||||
**(cached.get("dewarp_result") or {}),
|
||||
"method_used": "manual_combined",
|
||||
"shear_degrees": round(shear_deg, 3),
|
||||
}
|
||||
|
||||
# Update cache
|
||||
cached["deskewed_bgr"] = result_bgr
|
||||
cached["dewarped_bgr"] = result_bgr
|
||||
cached["deskew_result"] = deskew_result
|
||||
cached["dewarp_result"] = dewarp_result
|
||||
|
||||
# Persist to DB
|
||||
db_update = {
|
||||
"dewarped_png": dewarped_png,
|
||||
"deskew_result": deskew_result,
|
||||
"dewarp_result": dewarp_result,
|
||||
}
|
||||
if binarized_png:
|
||||
db_update["binarized_png"] = binarized_png
|
||||
db_update["deskewed_png"] = dewarped_png
|
||||
await update_session_db(session_id, **db_update)
|
||||
|
||||
logger.info(f"OCR Pipeline: combined adjust session {session_id}: "
|
||||
f"rotation={rotation:.3f} shear={shear_deg:.3f}")
|
||||
|
||||
return {
|
||||
"session_id": session_id,
|
||||
"rotation_degrees": round(rotation, 3),
|
||||
"shear_degrees": round(shear_deg, 3),
|
||||
"method_used": "manual_combined",
|
||||
"dewarped_image_url": f"/api/v1/ocr-pipeline/sessions/{session_id}/image/dewarped",
|
||||
}
|
||||
|
||||
|
||||
@router.post("/sessions/{session_id}/ground-truth/dewarp")
|
||||
async def save_dewarp_ground_truth(session_id: str, req: DewarpGroundTruthRequest):
|
||||
"""Save ground truth feedback for the dewarp step."""
|
||||
session = await get_session_db(session_id)
|
||||
if not session:
|
||||
raise HTTPException(status_code=404, detail=f"Session {session_id} not found")
|
||||
|
||||
ground_truth = session.get("ground_truth") or {}
|
||||
gt = {
|
||||
"is_correct": req.is_correct,
|
||||
"corrected_shear": req.corrected_shear,
|
||||
"notes": req.notes,
|
||||
"saved_at": datetime.utcnow().isoformat(),
|
||||
"dewarp_result": session.get("dewarp_result"),
|
||||
}
|
||||
ground_truth["dewarp"] = gt
|
||||
|
||||
await update_session_db(session_id, ground_truth=ground_truth)
|
||||
|
||||
if session_id in _cache:
|
||||
_cache[session_id]["ground_truth"] = ground_truth
|
||||
|
||||
logger.info(f"OCR Pipeline: ground truth dewarp session {session_id}: "
|
||||
f"correct={req.is_correct}, corrected_shear={req.corrected_shear}")
|
||||
|
||||
return {"session_id": session_id, "ground_truth": gt}
|
||||
File diff suppressed because it is too large
Load Diff
299
klausur-service/backend/ocr_pipeline_structure.py
Normal file
299
klausur-service/backend/ocr_pipeline_structure.py
Normal file
@@ -0,0 +1,299 @@
|
||||
"""
|
||||
OCR Pipeline Structure Detection and Exclude Regions
|
||||
|
||||
Detect document structure (boxes, zones, color regions, graphics)
|
||||
and manage user-drawn exclude regions.
|
||||
Extracted from ocr_pipeline_geometry.py for file-size compliance.
|
||||
"""
|
||||
|
||||
import logging
|
||||
import time
|
||||
from typing import Any, Dict, List
|
||||
|
||||
import cv2
|
||||
import numpy as np
|
||||
from fastapi import APIRouter, HTTPException
|
||||
from pydantic import BaseModel
|
||||
|
||||
from cv_box_detect import detect_boxes
|
||||
from cv_color_detect import _COLOR_RANGES, _COLOR_HEX
|
||||
from cv_graphic_detect import detect_graphic_elements
|
||||
from ocr_pipeline_session_store import (
|
||||
get_session_db,
|
||||
update_session_db,
|
||||
)
|
||||
from ocr_pipeline_common import (
|
||||
_cache,
|
||||
_load_session_to_cache,
|
||||
_get_cached,
|
||||
_filter_border_ghost_words,
|
||||
)
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
router = APIRouter(prefix="/api/v1/ocr-pipeline", tags=["ocr-pipeline"])
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Structure Detection Endpoint
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
@router.post("/sessions/{session_id}/detect-structure")
|
||||
async def detect_structure(session_id: str):
|
||||
"""Detect document structure: boxes, zones, and color regions.
|
||||
|
||||
Runs box detection (line + shading) and color analysis on the cropped
|
||||
image. Returns structured JSON with all detected elements for the
|
||||
structure visualization step.
|
||||
"""
|
||||
if session_id not in _cache:
|
||||
await _load_session_to_cache(session_id)
|
||||
cached = _get_cached(session_id)
|
||||
|
||||
img_bgr = (
|
||||
cached.get("cropped_bgr")
|
||||
if cached.get("cropped_bgr") is not None
|
||||
else cached.get("dewarped_bgr")
|
||||
)
|
||||
if img_bgr is None:
|
||||
raise HTTPException(status_code=400, detail="Crop or dewarp must be completed first")
|
||||
|
||||
t0 = time.time()
|
||||
h, w = img_bgr.shape[:2]
|
||||
|
||||
# --- Content bounds from word result (if available) or full image ---
|
||||
word_result = cached.get("word_result")
|
||||
words: List[Dict] = []
|
||||
if word_result and word_result.get("cells"):
|
||||
for cell in word_result["cells"]:
|
||||
for wb in (cell.get("word_boxes") or []):
|
||||
words.append(wb)
|
||||
# Fallback: use raw OCR words if cell word_boxes are empty
|
||||
if not words and word_result:
|
||||
for key in ("raw_paddle_words_split", "raw_tesseract_words", "raw_paddle_words"):
|
||||
raw = word_result.get(key, [])
|
||||
if raw:
|
||||
words = raw
|
||||
logger.info("detect-structure: using %d words from %s (no cell word_boxes)", len(words), key)
|
||||
break
|
||||
# If no words yet, use image dimensions with small margin
|
||||
if words:
|
||||
content_x = max(0, min(int(wb["left"]) for wb in words))
|
||||
content_y = max(0, min(int(wb["top"]) for wb in words))
|
||||
content_r = min(w, max(int(wb["left"] + wb["width"]) for wb in words))
|
||||
content_b = min(h, max(int(wb["top"] + wb["height"]) for wb in words))
|
||||
content_w_px = content_r - content_x
|
||||
content_h_px = content_b - content_y
|
||||
else:
|
||||
margin = int(min(w, h) * 0.03)
|
||||
content_x, content_y = margin, margin
|
||||
content_w_px = w - 2 * margin
|
||||
content_h_px = h - 2 * margin
|
||||
|
||||
# --- Box detection ---
|
||||
boxes = detect_boxes(
|
||||
img_bgr,
|
||||
content_x=content_x,
|
||||
content_w=content_w_px,
|
||||
content_y=content_y,
|
||||
content_h=content_h_px,
|
||||
)
|
||||
|
||||
# --- Zone splitting ---
|
||||
from cv_box_detect import split_page_into_zones as _split_zones
|
||||
zones = _split_zones(content_x, content_y, content_w_px, content_h_px, boxes)
|
||||
|
||||
# --- Color region sampling ---
|
||||
# Sample background shading in each detected box
|
||||
hsv = cv2.cvtColor(img_bgr, cv2.COLOR_BGR2HSV)
|
||||
box_colors = []
|
||||
for box in boxes:
|
||||
# Sample the center region of each box
|
||||
cy1 = box.y + box.height // 4
|
||||
cy2 = box.y + 3 * box.height // 4
|
||||
cx1 = box.x + box.width // 4
|
||||
cx2 = box.x + 3 * box.width // 4
|
||||
cy1 = max(0, min(cy1, h - 1))
|
||||
cy2 = max(0, min(cy2, h - 1))
|
||||
cx1 = max(0, min(cx1, w - 1))
|
||||
cx2 = max(0, min(cx2, w - 1))
|
||||
if cy2 > cy1 and cx2 > cx1:
|
||||
roi_hsv = hsv[cy1:cy2, cx1:cx2]
|
||||
med_h = float(np.median(roi_hsv[:, :, 0]))
|
||||
med_s = float(np.median(roi_hsv[:, :, 1]))
|
||||
med_v = float(np.median(roi_hsv[:, :, 2]))
|
||||
if med_s > 15:
|
||||
from cv_color_detect import _hue_to_color_name
|
||||
bg_name = _hue_to_color_name(med_h)
|
||||
bg_hex = _COLOR_HEX.get(bg_name, "#6b7280")
|
||||
else:
|
||||
bg_name = "gray" if med_v < 220 else "white"
|
||||
bg_hex = "#6b7280" if bg_name == "gray" else "#ffffff"
|
||||
else:
|
||||
bg_name = "unknown"
|
||||
bg_hex = "#6b7280"
|
||||
box_colors.append({"color_name": bg_name, "color_hex": bg_hex})
|
||||
|
||||
# --- Color text detection overview ---
|
||||
# Quick scan for colored text regions across the page
|
||||
color_summary: Dict[str, int] = {}
|
||||
for color_name, ranges in _COLOR_RANGES.items():
|
||||
mask = np.zeros((h, w), dtype=np.uint8)
|
||||
for lower, upper in ranges:
|
||||
mask = cv2.bitwise_or(mask, cv2.inRange(hsv, lower, upper))
|
||||
pixel_count = int(np.sum(mask > 0))
|
||||
if pixel_count > 50: # minimum threshold
|
||||
color_summary[color_name] = pixel_count
|
||||
|
||||
# --- Graphic element detection ---
|
||||
box_dicts = [
|
||||
{"x": b.x, "y": b.y, "w": b.width, "h": b.height}
|
||||
for b in boxes
|
||||
]
|
||||
graphics = detect_graphic_elements(
|
||||
img_bgr, words,
|
||||
detected_boxes=box_dicts,
|
||||
)
|
||||
|
||||
# --- Filter border-ghost words from OCR result ---
|
||||
ghost_count = 0
|
||||
if boxes and word_result:
|
||||
ghost_count = _filter_border_ghost_words(word_result, boxes)
|
||||
if ghost_count:
|
||||
logger.info("detect-structure: removed %d border-ghost words", ghost_count)
|
||||
await update_session_db(session_id, word_result=word_result)
|
||||
cached["word_result"] = word_result
|
||||
|
||||
duration = time.time() - t0
|
||||
|
||||
# Preserve user-drawn exclude regions from previous run
|
||||
prev_sr = cached.get("structure_result") or {}
|
||||
prev_exclude = prev_sr.get("exclude_regions", [])
|
||||
|
||||
result_dict = {
|
||||
"image_width": w,
|
||||
"image_height": h,
|
||||
"content_bounds": {
|
||||
"x": content_x, "y": content_y,
|
||||
"w": content_w_px, "h": content_h_px,
|
||||
},
|
||||
"boxes": [
|
||||
{
|
||||
"x": b.x, "y": b.y, "w": b.width, "h": b.height,
|
||||
"confidence": b.confidence,
|
||||
"border_thickness": b.border_thickness,
|
||||
"bg_color_name": box_colors[i]["color_name"],
|
||||
"bg_color_hex": box_colors[i]["color_hex"],
|
||||
}
|
||||
for i, b in enumerate(boxes)
|
||||
],
|
||||
"zones": [
|
||||
{
|
||||
"index": z.index,
|
||||
"zone_type": z.zone_type,
|
||||
"y": z.y, "h": z.height,
|
||||
"x": z.x, "w": z.width,
|
||||
}
|
||||
for z in zones
|
||||
],
|
||||
"graphics": [
|
||||
{
|
||||
"x": g.x, "y": g.y, "w": g.width, "h": g.height,
|
||||
"area": g.area,
|
||||
"shape": g.shape,
|
||||
"color_name": g.color_name,
|
||||
"color_hex": g.color_hex,
|
||||
"confidence": round(g.confidence, 2),
|
||||
}
|
||||
for g in graphics
|
||||
],
|
||||
"exclude_regions": prev_exclude,
|
||||
"color_pixel_counts": color_summary,
|
||||
"has_words": len(words) > 0,
|
||||
"word_count": len(words),
|
||||
"border_ghosts_removed": ghost_count,
|
||||
"duration_seconds": round(duration, 2),
|
||||
}
|
||||
|
||||
# Persist to session
|
||||
await update_session_db(session_id, structure_result=result_dict)
|
||||
cached["structure_result"] = result_dict
|
||||
|
||||
logger.info("detect-structure session %s: %d boxes, %d zones, %d graphics, %.2fs",
|
||||
session_id, len(boxes), len(zones), len(graphics), duration)
|
||||
|
||||
return {"session_id": session_id, **result_dict}
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Exclude Regions -- user-drawn rectangles to exclude from OCR results
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
class _ExcludeRegionIn(BaseModel):
|
||||
x: int
|
||||
y: int
|
||||
w: int
|
||||
h: int
|
||||
label: str = ""
|
||||
|
||||
|
||||
class _ExcludeRegionsBatchIn(BaseModel):
|
||||
regions: list[_ExcludeRegionIn]
|
||||
|
||||
|
||||
@router.put("/sessions/{session_id}/exclude-regions")
|
||||
async def set_exclude_regions(session_id: str, body: _ExcludeRegionsBatchIn):
|
||||
"""Replace all exclude regions for a session.
|
||||
|
||||
Regions are stored inside ``structure_result.exclude_regions``.
|
||||
"""
|
||||
session = await get_session_db(session_id)
|
||||
if not session:
|
||||
raise HTTPException(status_code=404, detail="Session not found")
|
||||
|
||||
sr = session.get("structure_result") or {}
|
||||
sr["exclude_regions"] = [r.model_dump() for r in body.regions]
|
||||
|
||||
# Invalidate grid so it rebuilds with new exclude regions
|
||||
await update_session_db(session_id, structure_result=sr, grid_editor_result=None)
|
||||
|
||||
# Update cache
|
||||
if session_id in _cache:
|
||||
_cache[session_id]["structure_result"] = sr
|
||||
_cache[session_id].pop("grid_editor_result", None)
|
||||
|
||||
return {
|
||||
"session_id": session_id,
|
||||
"exclude_regions": sr["exclude_regions"],
|
||||
"count": len(sr["exclude_regions"]),
|
||||
}
|
||||
|
||||
|
||||
@router.delete("/sessions/{session_id}/exclude-regions/{region_index}")
|
||||
async def delete_exclude_region(session_id: str, region_index: int):
|
||||
"""Remove a single exclude region by index."""
|
||||
session = await get_session_db(session_id)
|
||||
if not session:
|
||||
raise HTTPException(status_code=404, detail="Session not found")
|
||||
|
||||
sr = session.get("structure_result") or {}
|
||||
regions = sr.get("exclude_regions", [])
|
||||
|
||||
if region_index < 0 or region_index >= len(regions):
|
||||
raise HTTPException(status_code=404, detail="Region index out of range")
|
||||
|
||||
removed = regions.pop(region_index)
|
||||
sr["exclude_regions"] = regions
|
||||
|
||||
# Invalidate grid so it rebuilds with new exclude regions
|
||||
await update_session_db(session_id, structure_result=sr, grid_editor_result=None)
|
||||
|
||||
if session_id in _cache:
|
||||
_cache[session_id]["structure_result"] = sr
|
||||
_cache[session_id].pop("grid_editor_result", None)
|
||||
|
||||
return {
|
||||
"session_id": session_id,
|
||||
"removed": removed,
|
||||
"remaining": len(regions),
|
||||
}
|
||||
File diff suppressed because it is too large
Load Diff
498
klausur-service/backend/rbac_engine.py
Normal file
498
klausur-service/backend/rbac_engine.py
Normal file
@@ -0,0 +1,498 @@
|
||||
"""
|
||||
RBAC Policy Engine
|
||||
|
||||
Core engine for RBAC/ABAC permission checks,
|
||||
role assignments, key shares, and default policies.
|
||||
Extracted from rbac.py for file-size compliance.
|
||||
"""
|
||||
|
||||
from typing import Optional, List, Dict, Set
|
||||
from datetime import datetime, timezone
|
||||
import uuid
|
||||
from functools import wraps
|
||||
|
||||
from fastapi import HTTPException, Request
|
||||
|
||||
from rbac_types import (
|
||||
Role,
|
||||
Action,
|
||||
ResourceType,
|
||||
ZKVisibilityMode,
|
||||
PolicySet,
|
||||
RoleAssignment,
|
||||
KeyShare,
|
||||
)
|
||||
from rbac_permissions import DEFAULT_PERMISSIONS
|
||||
|
||||
|
||||
# =============================================
|
||||
# POLICY ENGINE
|
||||
# =============================================
|
||||
|
||||
class PolicyEngine:
|
||||
"""
|
||||
Engine fuer RBAC/ABAC Entscheidungen.
|
||||
|
||||
Prueft:
|
||||
1. Basis-Rollenberechtigung (RBAC)
|
||||
2. Policy-Einschraenkungen (ABAC)
|
||||
3. Key Share Berechtigungen
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
self.policy_sets: Dict[str, PolicySet] = {}
|
||||
self.role_assignments: Dict[str, List[RoleAssignment]] = {} # user_id -> assignments
|
||||
self.key_shares: Dict[str, List[KeyShare]] = {} # user_id -> shares
|
||||
|
||||
def register_policy_set(self, policy: PolicySet):
|
||||
"""Registriere ein Policy Set."""
|
||||
self.policy_sets[policy.id] = policy
|
||||
|
||||
def get_policy_for_context(
|
||||
self,
|
||||
bundesland: str,
|
||||
jahr: int,
|
||||
fach: Optional[str] = None,
|
||||
verfahren: str = "abitur"
|
||||
) -> Optional[PolicySet]:
|
||||
"""Finde das passende Policy Set fuer einen Kontext."""
|
||||
# Exakte Uebereinstimmung
|
||||
for policy in self.policy_sets.values():
|
||||
if (policy.bundesland == bundesland and
|
||||
policy.jahr == jahr and
|
||||
policy.verfahren == verfahren):
|
||||
if policy.fach is None or policy.fach == fach:
|
||||
return policy
|
||||
|
||||
# Fallback: Default Policy
|
||||
for policy in self.policy_sets.values():
|
||||
if policy.bundesland == "DEFAULT":
|
||||
return policy
|
||||
|
||||
return None
|
||||
|
||||
def assign_role(
|
||||
self,
|
||||
user_id: str,
|
||||
role: Role,
|
||||
resource_type: ResourceType,
|
||||
resource_id: str,
|
||||
granted_by: str,
|
||||
tenant_id: Optional[str] = None,
|
||||
namespace_id: Optional[str] = None,
|
||||
valid_to: Optional[datetime] = None
|
||||
) -> RoleAssignment:
|
||||
"""Weise einem User eine Rolle zu."""
|
||||
assignment = RoleAssignment(
|
||||
id=str(uuid.uuid4()),
|
||||
user_id=user_id,
|
||||
role=role,
|
||||
resource_type=resource_type,
|
||||
resource_id=resource_id,
|
||||
tenant_id=tenant_id,
|
||||
namespace_id=namespace_id,
|
||||
granted_by=granted_by,
|
||||
valid_to=valid_to
|
||||
)
|
||||
|
||||
if user_id not in self.role_assignments:
|
||||
self.role_assignments[user_id] = []
|
||||
self.role_assignments[user_id].append(assignment)
|
||||
|
||||
return assignment
|
||||
|
||||
def revoke_role(self, assignment_id: str, revoked_by: str) -> bool:
|
||||
"""Widerrufe eine Rollenzuweisung."""
|
||||
for user_assignments in self.role_assignments.values():
|
||||
for assignment in user_assignments:
|
||||
if assignment.id == assignment_id:
|
||||
assignment.revoked_at = datetime.now(timezone.utc)
|
||||
return True
|
||||
return False
|
||||
|
||||
def get_user_roles(
|
||||
self,
|
||||
user_id: str,
|
||||
resource_type: Optional[ResourceType] = None,
|
||||
resource_id: Optional[str] = None
|
||||
) -> List[Role]:
|
||||
"""Hole alle aktiven Rollen eines Users."""
|
||||
assignments = self.role_assignments.get(user_id, [])
|
||||
roles = []
|
||||
|
||||
for assignment in assignments:
|
||||
if not assignment.is_active():
|
||||
continue
|
||||
if resource_type and assignment.resource_type != resource_type:
|
||||
continue
|
||||
if resource_id and assignment.resource_id != resource_id:
|
||||
continue
|
||||
roles.append(assignment.role)
|
||||
|
||||
return list(set(roles))
|
||||
|
||||
def create_key_share(
|
||||
self,
|
||||
user_id: str,
|
||||
package_id: str,
|
||||
permissions: Set[str],
|
||||
granted_by: str,
|
||||
scope: str = "full",
|
||||
invite_token: Optional[str] = None
|
||||
) -> KeyShare:
|
||||
"""Erstelle einen Key Share."""
|
||||
share = KeyShare(
|
||||
id=str(uuid.uuid4()),
|
||||
user_id=user_id,
|
||||
package_id=package_id,
|
||||
permissions=permissions,
|
||||
scope=scope,
|
||||
granted_by=granted_by,
|
||||
invite_token=invite_token
|
||||
)
|
||||
|
||||
if user_id not in self.key_shares:
|
||||
self.key_shares[user_id] = []
|
||||
self.key_shares[user_id].append(share)
|
||||
|
||||
return share
|
||||
|
||||
def accept_key_share(self, share_id: str, token: str) -> bool:
|
||||
"""Akzeptiere einen Key Share via Invite Token."""
|
||||
for user_shares in self.key_shares.values():
|
||||
for share in user_shares:
|
||||
if share.id == share_id and share.invite_token == token:
|
||||
share.accepted_at = datetime.now(timezone.utc)
|
||||
return True
|
||||
return False
|
||||
|
||||
def revoke_key_share(self, share_id: str, revoked_by: str) -> bool:
|
||||
"""Widerrufe einen Key Share."""
|
||||
for user_shares in self.key_shares.values():
|
||||
for share in user_shares:
|
||||
if share.id == share_id:
|
||||
share.revoked_at = datetime.now(timezone.utc)
|
||||
share.revoked_by = revoked_by
|
||||
return True
|
||||
return False
|
||||
|
||||
def check_permission(
|
||||
self,
|
||||
user_id: str,
|
||||
action: Action,
|
||||
resource_type: ResourceType,
|
||||
resource_id: str,
|
||||
policy: Optional[PolicySet] = None,
|
||||
package_id: Optional[str] = None
|
||||
) -> bool:
|
||||
"""
|
||||
Pruefe ob ein User eine Aktion ausfuehren darf.
|
||||
|
||||
Prueft:
|
||||
1. Basis-RBAC
|
||||
2. Policy-Einschraenkungen
|
||||
3. Key Share (falls package_id angegeben)
|
||||
"""
|
||||
# 1. Hole aktive Rollen
|
||||
roles = self.get_user_roles(user_id, resource_type, resource_id)
|
||||
|
||||
if not roles:
|
||||
return False
|
||||
|
||||
# 2. Pruefe Basis-RBAC
|
||||
has_permission = False
|
||||
for role in roles:
|
||||
role_permissions = DEFAULT_PERMISSIONS.get(role, {})
|
||||
resource_permissions = role_permissions.get(resource_type, set())
|
||||
if action in resource_permissions:
|
||||
has_permission = True
|
||||
break
|
||||
|
||||
if not has_permission:
|
||||
return False
|
||||
|
||||
# 3. Pruefe Policy-Einschraenkungen
|
||||
if policy:
|
||||
# ZK Visibility Mode
|
||||
if Role.ZWEITKORREKTOR in roles:
|
||||
if policy.zk_visibility_mode == ZKVisibilityMode.BLIND:
|
||||
# Blind: ZK darf EK-Outputs nicht sehen
|
||||
if resource_type in [ResourceType.EVALUATION, ResourceType.REPORT, ResourceType.GRADE_DECISION]:
|
||||
if action == Action.READ:
|
||||
# Pruefe ob es EK-Outputs sind (muesste ueber Metadaten geprueft werden)
|
||||
pass # Implementierung abhaengig von Datenmodell
|
||||
|
||||
elif policy.zk_visibility_mode == ZKVisibilityMode.SEMI:
|
||||
# Semi: ZK sieht Annotationen, aber keine Note
|
||||
if resource_type == ResourceType.GRADE_DECISION and action == Action.READ:
|
||||
return False
|
||||
|
||||
# 4. Pruefe Key Share (falls Package-basiert)
|
||||
if package_id:
|
||||
user_shares = self.key_shares.get(user_id, [])
|
||||
has_key_share = any(
|
||||
share.package_id == package_id and share.is_active()
|
||||
for share in user_shares
|
||||
)
|
||||
if not has_key_share:
|
||||
return False
|
||||
|
||||
return True
|
||||
|
||||
def get_allowed_actions(
|
||||
self,
|
||||
user_id: str,
|
||||
resource_type: ResourceType,
|
||||
resource_id: str,
|
||||
policy: Optional[PolicySet] = None
|
||||
) -> Set[Action]:
|
||||
"""Hole alle erlaubten Aktionen fuer einen User auf einer Ressource."""
|
||||
roles = self.get_user_roles(user_id, resource_type, resource_id)
|
||||
allowed = set()
|
||||
|
||||
for role in roles:
|
||||
role_permissions = DEFAULT_PERMISSIONS.get(role, {})
|
||||
resource_permissions = role_permissions.get(resource_type, set())
|
||||
allowed.update(resource_permissions)
|
||||
|
||||
# Policy-Einschraenkungen anwenden
|
||||
if policy and Role.ZWEITKORREKTOR in roles:
|
||||
if policy.zk_visibility_mode == ZKVisibilityMode.BLIND:
|
||||
# Entferne READ fuer bestimmte Ressourcen
|
||||
pass # Detailimplementierung
|
||||
|
||||
return allowed
|
||||
|
||||
|
||||
# =============================================
|
||||
# DEFAULT POLICY SETS (alle Bundeslaender)
|
||||
# =============================================
|
||||
|
||||
def create_default_policy_sets() -> List[PolicySet]:
|
||||
"""
|
||||
Erstelle Default Policy Sets fuer alle Bundeslaender.
|
||||
|
||||
Diese koennen spaeter pro Land verfeinert werden.
|
||||
"""
|
||||
bundeslaender = [
|
||||
"baden-wuerttemberg", "bayern", "berlin", "brandenburg",
|
||||
"bremen", "hamburg", "hessen", "mecklenburg-vorpommern",
|
||||
"niedersachsen", "nordrhein-westfalen", "rheinland-pfalz",
|
||||
"saarland", "sachsen", "sachsen-anhalt", "schleswig-holstein",
|
||||
"thueringen"
|
||||
]
|
||||
|
||||
policies = []
|
||||
|
||||
# Default Policy (Fallback)
|
||||
policies.append(PolicySet(
|
||||
id="DEFAULT-2025",
|
||||
bundesland="DEFAULT",
|
||||
jahr=2025,
|
||||
fach=None,
|
||||
verfahren="abitur",
|
||||
zk_visibility_mode=ZKVisibilityMode.FULL,
|
||||
eh_visibility_mode=PolicySet.__dataclass_fields__["eh_visibility_mode"].default,
|
||||
allow_teacher_uploaded_eh=True,
|
||||
allow_land_uploaded_eh=True,
|
||||
require_rights_confirmation_on_upload=True,
|
||||
third_correction_threshold=4,
|
||||
final_signoff_role="fachvorsitz"
|
||||
))
|
||||
|
||||
# Niedersachsen (Beispiel mit spezifischen Anpassungen)
|
||||
policies.append(PolicySet(
|
||||
id="NI-2025-ABITUR",
|
||||
bundesland="niedersachsen",
|
||||
jahr=2025,
|
||||
fach=None,
|
||||
verfahren="abitur",
|
||||
zk_visibility_mode=ZKVisibilityMode.FULL, # In NI sieht ZK alles
|
||||
allow_teacher_uploaded_eh=True,
|
||||
allow_land_uploaded_eh=True,
|
||||
require_rights_confirmation_on_upload=True,
|
||||
third_correction_threshold=4,
|
||||
final_signoff_role="fachvorsitz",
|
||||
export_template_id="niedersachsen-abitur"
|
||||
))
|
||||
|
||||
# Bayern (Beispiel mit SEMI visibility)
|
||||
policies.append(PolicySet(
|
||||
id="BY-2025-ABITUR",
|
||||
bundesland="bayern",
|
||||
jahr=2025,
|
||||
fach=None,
|
||||
verfahren="abitur",
|
||||
zk_visibility_mode=ZKVisibilityMode.SEMI, # ZK sieht Annotationen, nicht Note
|
||||
allow_teacher_uploaded_eh=True,
|
||||
allow_land_uploaded_eh=True,
|
||||
require_rights_confirmation_on_upload=True,
|
||||
third_correction_threshold=4,
|
||||
final_signoff_role="fachvorsitz",
|
||||
export_template_id="bayern-abitur"
|
||||
))
|
||||
|
||||
# NRW (Beispiel)
|
||||
policies.append(PolicySet(
|
||||
id="NW-2025-ABITUR",
|
||||
bundesland="nordrhein-westfalen",
|
||||
jahr=2025,
|
||||
fach=None,
|
||||
verfahren="abitur",
|
||||
zk_visibility_mode=ZKVisibilityMode.FULL,
|
||||
allow_teacher_uploaded_eh=True,
|
||||
allow_land_uploaded_eh=True,
|
||||
require_rights_confirmation_on_upload=True,
|
||||
third_correction_threshold=4,
|
||||
final_signoff_role="fachvorsitz",
|
||||
export_template_id="nrw-abitur"
|
||||
))
|
||||
|
||||
# Generiere Basis-Policies fuer alle anderen Bundeslaender
|
||||
for bl in bundeslaender:
|
||||
if bl not in ["niedersachsen", "bayern", "nordrhein-westfalen"]:
|
||||
policies.append(PolicySet(
|
||||
id=f"{bl[:2].upper()}-2025-ABITUR",
|
||||
bundesland=bl,
|
||||
jahr=2025,
|
||||
fach=None,
|
||||
verfahren="abitur",
|
||||
zk_visibility_mode=ZKVisibilityMode.FULL,
|
||||
allow_teacher_uploaded_eh=True,
|
||||
allow_land_uploaded_eh=True,
|
||||
require_rights_confirmation_on_upload=True,
|
||||
third_correction_threshold=4,
|
||||
final_signoff_role="fachvorsitz"
|
||||
))
|
||||
|
||||
return policies
|
||||
|
||||
|
||||
# =============================================
|
||||
# GLOBAL POLICY ENGINE INSTANCE
|
||||
# =============================================
|
||||
|
||||
# Singleton Policy Engine
|
||||
_policy_engine: Optional[PolicyEngine] = None
|
||||
|
||||
|
||||
def get_policy_engine() -> PolicyEngine:
|
||||
"""Hole die globale Policy Engine Instanz."""
|
||||
global _policy_engine
|
||||
if _policy_engine is None:
|
||||
_policy_engine = PolicyEngine()
|
||||
# Registriere Default Policies
|
||||
for policy in create_default_policy_sets():
|
||||
_policy_engine.register_policy_set(policy)
|
||||
return _policy_engine
|
||||
|
||||
|
||||
# =============================================
|
||||
# API GUARDS (Decorators fuer FastAPI)
|
||||
# =============================================
|
||||
|
||||
def require_permission(
|
||||
action: Action,
|
||||
resource_type: ResourceType,
|
||||
resource_id_param: str = "resource_id"
|
||||
):
|
||||
"""
|
||||
Decorator fuer FastAPI Endpoints.
|
||||
|
||||
Prueft ob der aktuelle User die angegebene Berechtigung hat.
|
||||
|
||||
Usage:
|
||||
@app.get("/api/v1/packages/{package_id}")
|
||||
@require_permission(Action.READ, ResourceType.EXAM_PACKAGE, "package_id")
|
||||
async def get_package(package_id: str, request: Request):
|
||||
...
|
||||
"""
|
||||
def decorator(func):
|
||||
@wraps(func)
|
||||
async def wrapper(*args, **kwargs):
|
||||
request = kwargs.get('request')
|
||||
if not request:
|
||||
for arg in args:
|
||||
if isinstance(arg, Request):
|
||||
request = arg
|
||||
break
|
||||
|
||||
if not request:
|
||||
raise HTTPException(status_code=500, detail="Request not found")
|
||||
|
||||
# User aus Token holen
|
||||
user = getattr(request.state, 'user', None)
|
||||
if not user:
|
||||
raise HTTPException(status_code=401, detail="Not authenticated")
|
||||
|
||||
user_id = user.get('user_id')
|
||||
resource_id = kwargs.get(resource_id_param)
|
||||
|
||||
# Policy Engine pruefen
|
||||
engine = get_policy_engine()
|
||||
|
||||
# Optional: Policy aus Kontext laden
|
||||
policy = None
|
||||
bundesland = user.get('bundesland')
|
||||
if bundesland:
|
||||
policy = engine.get_policy_for_context(bundesland, 2025)
|
||||
|
||||
if not engine.check_permission(
|
||||
user_id=user_id,
|
||||
action=action,
|
||||
resource_type=resource_type,
|
||||
resource_id=resource_id,
|
||||
policy=policy
|
||||
):
|
||||
raise HTTPException(
|
||||
status_code=403,
|
||||
detail=f"Permission denied: {action.value} on {resource_type.value}"
|
||||
)
|
||||
|
||||
return await func(*args, **kwargs)
|
||||
|
||||
return wrapper
|
||||
return decorator
|
||||
|
||||
|
||||
def require_role(role: Role):
|
||||
"""
|
||||
Decorator der prueft ob User eine bestimmte Rolle hat.
|
||||
|
||||
Usage:
|
||||
@app.post("/api/v1/eh/publish")
|
||||
@require_role(Role.LAND_ADMIN)
|
||||
async def publish_eh(request: Request):
|
||||
...
|
||||
"""
|
||||
def decorator(func):
|
||||
@wraps(func)
|
||||
async def wrapper(*args, **kwargs):
|
||||
request = kwargs.get('request')
|
||||
if not request:
|
||||
for arg in args:
|
||||
if isinstance(arg, Request):
|
||||
request = arg
|
||||
break
|
||||
|
||||
if not request:
|
||||
raise HTTPException(status_code=500, detail="Request not found")
|
||||
|
||||
user = getattr(request.state, 'user', None)
|
||||
if not user:
|
||||
raise HTTPException(status_code=401, detail="Not authenticated")
|
||||
|
||||
user_id = user.get('user_id')
|
||||
engine = get_policy_engine()
|
||||
|
||||
user_roles = engine.get_user_roles(user_id)
|
||||
if role not in user_roles:
|
||||
raise HTTPException(
|
||||
status_code=403,
|
||||
detail=f"Role required: {role.value}"
|
||||
)
|
||||
|
||||
return await func(*args, **kwargs)
|
||||
|
||||
return wrapper
|
||||
return decorator
|
||||
221
klausur-service/backend/rbac_permissions.py
Normal file
221
klausur-service/backend/rbac_permissions.py
Normal file
@@ -0,0 +1,221 @@
|
||||
"""
|
||||
RBAC Permission Matrix
|
||||
|
||||
Default role-to-resource permission mappings for
|
||||
Klausur-Korrektur and Zeugnis workflows.
|
||||
Extracted from rbac.py for file-size compliance.
|
||||
"""
|
||||
|
||||
from typing import Dict, Set
|
||||
|
||||
from rbac_types import Role, Action, ResourceType
|
||||
|
||||
|
||||
# =============================================
|
||||
# RBAC PERMISSION MATRIX
|
||||
# =============================================
|
||||
|
||||
# Standard-Berechtigungsmatrix (kann durch Policies ueberschrieben werden)
|
||||
DEFAULT_PERMISSIONS: Dict[Role, Dict[ResourceType, Set[Action]]] = {
|
||||
# Erstkorrektor
|
||||
Role.ERSTKORREKTOR: {
|
||||
ResourceType.EXAM_PACKAGE: {Action.READ, Action.UPDATE, Action.SHARE_KEY, Action.LOCK},
|
||||
ResourceType.STUDENT_WORK: {Action.READ, Action.UPDATE},
|
||||
ResourceType.EH_DOCUMENT: {Action.READ, Action.UPLOAD, Action.UPDATE},
|
||||
ResourceType.RUBRIC: {Action.READ, Action.UPDATE},
|
||||
ResourceType.ANNOTATION: {Action.CREATE, Action.READ, Action.UPDATE, Action.DELETE},
|
||||
ResourceType.EVALUATION: {Action.CREATE, Action.READ, Action.UPDATE},
|
||||
ResourceType.REPORT: {Action.CREATE, Action.READ, Action.UPDATE},
|
||||
ResourceType.GRADE_DECISION: {Action.CREATE, Action.READ, Action.UPDATE},
|
||||
ResourceType.EXPORT: {Action.CREATE, Action.READ, Action.DOWNLOAD},
|
||||
ResourceType.AUDIT_LOG: {Action.READ},
|
||||
},
|
||||
|
||||
# Zweitkorrektor (Standard: FULL visibility)
|
||||
Role.ZWEITKORREKTOR: {
|
||||
ResourceType.EXAM_PACKAGE: {Action.READ},
|
||||
ResourceType.STUDENT_WORK: {Action.READ, Action.UPDATE},
|
||||
ResourceType.EH_DOCUMENT: {Action.READ},
|
||||
ResourceType.RUBRIC: {Action.READ},
|
||||
ResourceType.ANNOTATION: {Action.CREATE, Action.READ, Action.UPDATE},
|
||||
ResourceType.EVALUATION: {Action.CREATE, Action.READ, Action.UPDATE},
|
||||
ResourceType.REPORT: {Action.CREATE, Action.READ, Action.UPDATE},
|
||||
ResourceType.GRADE_DECISION: {Action.CREATE, Action.READ, Action.UPDATE},
|
||||
ResourceType.EXPORT: {Action.READ, Action.DOWNLOAD},
|
||||
ResourceType.AUDIT_LOG: {Action.READ},
|
||||
},
|
||||
|
||||
# Drittkorrektor
|
||||
Role.DRITTKORREKTOR: {
|
||||
ResourceType.EXAM_PACKAGE: {Action.READ},
|
||||
ResourceType.STUDENT_WORK: {Action.READ, Action.UPDATE},
|
||||
ResourceType.EH_DOCUMENT: {Action.READ},
|
||||
ResourceType.RUBRIC: {Action.READ},
|
||||
ResourceType.ANNOTATION: {Action.CREATE, Action.READ, Action.UPDATE},
|
||||
ResourceType.EVALUATION: {Action.CREATE, Action.READ, Action.UPDATE},
|
||||
ResourceType.REPORT: {Action.CREATE, Action.READ, Action.UPDATE},
|
||||
ResourceType.GRADE_DECISION: {Action.CREATE, Action.READ, Action.UPDATE},
|
||||
ResourceType.AUDIT_LOG: {Action.READ},
|
||||
},
|
||||
|
||||
# Fachvorsitz
|
||||
Role.FACHVORSITZ: {
|
||||
ResourceType.TENANT: {Action.READ},
|
||||
ResourceType.NAMESPACE: {Action.READ, Action.UPDATE},
|
||||
ResourceType.EXAM_PACKAGE: {Action.READ, Action.UPDATE, Action.LOCK, Action.UNLOCK, Action.SIGN_OFF},
|
||||
ResourceType.STUDENT_WORK: {Action.READ, Action.UPDATE},
|
||||
ResourceType.EH_DOCUMENT: {Action.READ, Action.UPLOAD, Action.UPDATE},
|
||||
ResourceType.RUBRIC: {Action.READ, Action.UPDATE},
|
||||
ResourceType.ANNOTATION: {Action.READ, Action.UPDATE},
|
||||
ResourceType.EVALUATION: {Action.READ, Action.UPDATE},
|
||||
ResourceType.REPORT: {Action.READ, Action.UPDATE},
|
||||
ResourceType.GRADE_DECISION: {Action.READ, Action.UPDATE, Action.SIGN_OFF},
|
||||
ResourceType.EXPORT: {Action.CREATE, Action.READ, Action.DOWNLOAD},
|
||||
ResourceType.AUDIT_LOG: {Action.READ},
|
||||
},
|
||||
|
||||
# Pruefungsvorsitz
|
||||
Role.PRUEFUNGSVORSITZ: {
|
||||
ResourceType.TENANT: {Action.READ},
|
||||
ResourceType.NAMESPACE: {Action.READ, Action.CREATE},
|
||||
ResourceType.EXAM_PACKAGE: {Action.READ, Action.SIGN_OFF},
|
||||
ResourceType.STUDENT_WORK: {Action.READ},
|
||||
ResourceType.EH_DOCUMENT: {Action.READ},
|
||||
ResourceType.GRADE_DECISION: {Action.READ, Action.SIGN_OFF},
|
||||
ResourceType.EXPORT: {Action.CREATE, Action.READ, Action.DOWNLOAD},
|
||||
ResourceType.AUDIT_LOG: {Action.READ},
|
||||
},
|
||||
|
||||
# Schul-Admin
|
||||
Role.SCHUL_ADMIN: {
|
||||
ResourceType.TENANT: {Action.READ, Action.UPDATE},
|
||||
ResourceType.NAMESPACE: {Action.CREATE, Action.READ, Action.UPDATE, Action.DELETE},
|
||||
ResourceType.EXAM_PACKAGE: {Action.CREATE, Action.READ, Action.DELETE, Action.ASSIGN_ROLE},
|
||||
ResourceType.EH_DOCUMENT: {Action.READ, Action.UPLOAD, Action.DELETE},
|
||||
ResourceType.AUDIT_LOG: {Action.READ},
|
||||
},
|
||||
|
||||
# Land-Admin (Behoerde)
|
||||
Role.LAND_ADMIN: {
|
||||
ResourceType.TENANT: {Action.READ},
|
||||
ResourceType.EH_DOCUMENT: {Action.READ, Action.UPLOAD, Action.UPDATE, Action.DELETE, Action.PUBLISH_OFFICIAL},
|
||||
ResourceType.AUDIT_LOG: {Action.READ},
|
||||
},
|
||||
|
||||
# Auditor
|
||||
Role.AUDITOR: {
|
||||
ResourceType.AUDIT_LOG: {Action.READ},
|
||||
ResourceType.EXAM_PACKAGE: {Action.READ}, # Nur Metadaten
|
||||
# Kein Zugriff auf Inhalte!
|
||||
},
|
||||
|
||||
# Operator
|
||||
Role.OPERATOR: {
|
||||
ResourceType.TENANT: {Action.READ},
|
||||
ResourceType.NAMESPACE: {Action.READ},
|
||||
ResourceType.EXAM_PACKAGE: {Action.READ}, # Nur Metadaten
|
||||
ResourceType.AUDIT_LOG: {Action.READ},
|
||||
# Break-glass separat gehandhabt
|
||||
},
|
||||
|
||||
# Teacher Assistant
|
||||
Role.TEACHER_ASSISTANT: {
|
||||
ResourceType.STUDENT_WORK: {Action.READ},
|
||||
ResourceType.ANNOTATION: {Action.CREATE, Action.READ}, # Nur bestimmte Typen
|
||||
ResourceType.EH_DOCUMENT: {Action.READ},
|
||||
},
|
||||
|
||||
# Exam Author (nur Vorabi)
|
||||
Role.EXAM_AUTHOR: {
|
||||
ResourceType.EH_DOCUMENT: {Action.CREATE, Action.READ, Action.UPDATE, Action.DELETE},
|
||||
ResourceType.RUBRIC: {Action.CREATE, Action.READ, Action.UPDATE, Action.DELETE},
|
||||
},
|
||||
|
||||
# =============================================
|
||||
# ZEUGNIS-WORKFLOW ROLLEN
|
||||
# =============================================
|
||||
|
||||
# Klassenlehrer - Erstellt Zeugnisse, Kopfnoten, Bemerkungen
|
||||
Role.KLASSENLEHRER: {
|
||||
ResourceType.NAMESPACE: {Action.READ},
|
||||
ResourceType.ZEUGNIS: {Action.CREATE, Action.READ, Action.UPDATE},
|
||||
ResourceType.ZEUGNIS_ENTWURF: {Action.CREATE, Action.READ, Action.UPDATE, Action.DELETE},
|
||||
ResourceType.ZEUGNIS_VORLAGE: {Action.READ},
|
||||
ResourceType.SCHUELER_DATEN: {Action.READ, Action.UPDATE},
|
||||
ResourceType.FACHNOTE: {Action.READ}, # Liest Fachnoten der Fachlehrer
|
||||
ResourceType.KOPFNOTE: {Action.CREATE, Action.READ, Action.UPDATE},
|
||||
ResourceType.FEHLZEITEN: {Action.READ, Action.UPDATE},
|
||||
ResourceType.BEMERKUNG: {Action.CREATE, Action.READ, Action.UPDATE, Action.DELETE},
|
||||
ResourceType.VERSETZUNG: {Action.READ},
|
||||
ResourceType.EXPORT: {Action.CREATE, Action.READ, Action.DOWNLOAD},
|
||||
ResourceType.AUDIT_LOG: {Action.READ},
|
||||
},
|
||||
|
||||
# Fachlehrer - Traegt Fachnoten ein
|
||||
Role.FACHLEHRER: {
|
||||
ResourceType.NAMESPACE: {Action.READ},
|
||||
ResourceType.SCHUELER_DATEN: {Action.READ}, # Nur eigene Schueler
|
||||
ResourceType.FACHNOTE: {Action.CREATE, Action.READ, Action.UPDATE}, # Nur eigenes Fach
|
||||
ResourceType.BEMERKUNG: {Action.CREATE, Action.READ}, # Fachbezogene Bemerkungen
|
||||
ResourceType.AUDIT_LOG: {Action.READ},
|
||||
},
|
||||
|
||||
# Zeugnisbeauftragter - Qualitaetskontrolle
|
||||
Role.ZEUGNISBEAUFTRAGTER: {
|
||||
ResourceType.NAMESPACE: {Action.READ, Action.UPDATE},
|
||||
ResourceType.ZEUGNIS: {Action.READ, Action.UPDATE},
|
||||
ResourceType.ZEUGNIS_ENTWURF: {Action.READ, Action.UPDATE},
|
||||
ResourceType.ZEUGNIS_VORLAGE: {Action.READ, Action.UPDATE, Action.UPLOAD},
|
||||
ResourceType.SCHUELER_DATEN: {Action.READ},
|
||||
ResourceType.FACHNOTE: {Action.READ},
|
||||
ResourceType.KOPFNOTE: {Action.READ, Action.UPDATE},
|
||||
ResourceType.FEHLZEITEN: {Action.READ},
|
||||
ResourceType.BEMERKUNG: {Action.READ, Action.UPDATE},
|
||||
ResourceType.VERSETZUNG: {Action.READ},
|
||||
ResourceType.EXPORT: {Action.CREATE, Action.READ, Action.DOWNLOAD},
|
||||
ResourceType.AUDIT_LOG: {Action.READ},
|
||||
},
|
||||
|
||||
# Sekretariat - Druck, Versand, Archivierung
|
||||
Role.SEKRETARIAT: {
|
||||
ResourceType.ZEUGNIS: {Action.READ, Action.DOWNLOAD},
|
||||
ResourceType.ZEUGNIS_VORLAGE: {Action.READ},
|
||||
ResourceType.SCHUELER_DATEN: {Action.READ}, # Fuer Adressdaten
|
||||
ResourceType.EXPORT: {Action.CREATE, Action.READ, Action.DOWNLOAD},
|
||||
ResourceType.AUDIT_LOG: {Action.READ},
|
||||
},
|
||||
|
||||
# Schulleitung - Finale Zeugnis-Freigabe
|
||||
Role.SCHULLEITUNG: {
|
||||
ResourceType.TENANT: {Action.READ},
|
||||
ResourceType.NAMESPACE: {Action.READ, Action.CREATE},
|
||||
ResourceType.ZEUGNIS: {Action.READ, Action.SIGN_OFF, Action.LOCK},
|
||||
ResourceType.ZEUGNIS_ENTWURF: {Action.READ, Action.UPDATE},
|
||||
ResourceType.ZEUGNIS_VORLAGE: {Action.READ, Action.UPDATE},
|
||||
ResourceType.SCHUELER_DATEN: {Action.READ},
|
||||
ResourceType.FACHNOTE: {Action.READ},
|
||||
ResourceType.KOPFNOTE: {Action.READ, Action.UPDATE},
|
||||
ResourceType.FEHLZEITEN: {Action.READ},
|
||||
ResourceType.BEMERKUNG: {Action.READ, Action.UPDATE},
|
||||
ResourceType.KONFERENZ_BESCHLUSS: {Action.CREATE, Action.READ, Action.UPDATE, Action.SIGN_OFF},
|
||||
ResourceType.VERSETZUNG: {Action.CREATE, Action.READ, Action.UPDATE, Action.SIGN_OFF},
|
||||
ResourceType.EXPORT: {Action.CREATE, Action.READ, Action.DOWNLOAD},
|
||||
ResourceType.AUDIT_LOG: {Action.READ},
|
||||
},
|
||||
|
||||
# Stufenleitung - Stufenkoordination (z.B. Oberstufe)
|
||||
Role.STUFENLEITUNG: {
|
||||
ResourceType.NAMESPACE: {Action.READ, Action.UPDATE},
|
||||
ResourceType.ZEUGNIS: {Action.READ, Action.UPDATE},
|
||||
ResourceType.ZEUGNIS_ENTWURF: {Action.READ, Action.UPDATE},
|
||||
ResourceType.SCHUELER_DATEN: {Action.READ},
|
||||
ResourceType.FACHNOTE: {Action.READ},
|
||||
ResourceType.KOPFNOTE: {Action.READ},
|
||||
ResourceType.FEHLZEITEN: {Action.READ},
|
||||
ResourceType.BEMERKUNG: {Action.READ, Action.UPDATE},
|
||||
ResourceType.KONFERENZ_BESCHLUSS: {Action.READ},
|
||||
ResourceType.VERSETZUNG: {Action.READ, Action.UPDATE},
|
||||
ResourceType.EXPORT: {Action.READ, Action.DOWNLOAD},
|
||||
ResourceType.AUDIT_LOG: {Action.READ},
|
||||
},
|
||||
}
|
||||
438
klausur-service/backend/rbac_types.py
Normal file
438
klausur-service/backend/rbac_types.py
Normal file
@@ -0,0 +1,438 @@
|
||||
"""
|
||||
RBAC/ABAC Type Definitions
|
||||
|
||||
Enums, data structures, and models for the policy system.
|
||||
Extracted from rbac.py for file-size compliance.
|
||||
"""
|
||||
|
||||
import json
|
||||
from enum import Enum
|
||||
from dataclasses import dataclass, field, asdict
|
||||
from typing import Optional, List, Dict, Set, Any
|
||||
from datetime import datetime, timezone
|
||||
import uuid
|
||||
|
||||
|
||||
# =============================================
|
||||
# ENUMS: Roles, Actions, Resources
|
||||
# =============================================
|
||||
|
||||
class Role(str, Enum):
|
||||
"""Fachliche Rollen in Korrektur- und Zeugniskette."""
|
||||
|
||||
# === Klausur-Korrekturkette ===
|
||||
ERSTKORREKTOR = "erstkorrektor" # EK
|
||||
ZWEITKORREKTOR = "zweitkorrektor" # ZK
|
||||
DRITTKORREKTOR = "drittkorrektor" # DK
|
||||
|
||||
# === Zeugnis-Workflow ===
|
||||
KLASSENLEHRER = "klassenlehrer" # KL - Erstellt Zeugnis, Kopfnoten, Bemerkungen
|
||||
FACHLEHRER = "fachlehrer" # FL - Traegt Fachnoten ein
|
||||
ZEUGNISBEAUFTRAGTER = "zeugnisbeauftragter" # ZB - Qualitaetskontrolle
|
||||
SEKRETARIAT = "sekretariat" # SEK - Druck, Versand, Archivierung
|
||||
|
||||
# === Leitung (Klausur + Zeugnis) ===
|
||||
FACHVORSITZ = "fachvorsitz" # FVL - Fachpruefungsleitung
|
||||
PRUEFUNGSVORSITZ = "pruefungsvorsitz" # PV - Schulleitung / Pruefungsvorsitz
|
||||
SCHULLEITUNG = "schulleitung" # SL - Finale Zeugnis-Freigabe
|
||||
STUFENLEITUNG = "stufenleitung" # STL - Stufenkoordination
|
||||
|
||||
# === Administration ===
|
||||
SCHUL_ADMIN = "schul_admin" # SA
|
||||
LAND_ADMIN = "land_admin" # LA - Behoerde
|
||||
|
||||
# === Spezial ===
|
||||
AUDITOR = "auditor" # DSB/Auditor
|
||||
OPERATOR = "operator" # OPS - Support
|
||||
TEACHER_ASSISTANT = "teacher_assistant" # TA - Referendar
|
||||
EXAM_AUTHOR = "exam_author" # EA - nur Vorabi
|
||||
|
||||
|
||||
class Action(str, Enum):
|
||||
"""Moegliche Operationen auf Ressourcen."""
|
||||
CREATE = "create"
|
||||
READ = "read"
|
||||
UPDATE = "update"
|
||||
DELETE = "delete"
|
||||
|
||||
ASSIGN_ROLE = "assign_role"
|
||||
INVITE_USER = "invite_user"
|
||||
REMOVE_USER = "remove_user"
|
||||
|
||||
UPLOAD = "upload"
|
||||
DOWNLOAD = "download"
|
||||
|
||||
LOCK = "lock" # Finalisieren
|
||||
UNLOCK = "unlock" # Nur mit Sonderrecht
|
||||
SIGN_OFF = "sign_off" # Freigabe
|
||||
|
||||
SHARE_KEY = "share_key" # Key Share erzeugen
|
||||
VIEW_PII = "view_pii" # Falls PII vorhanden
|
||||
BREAK_GLASS = "break_glass" # Notfallzugriff
|
||||
|
||||
PUBLISH_OFFICIAL = "publish_official" # Amtliche EH verteilen
|
||||
|
||||
|
||||
class ResourceType(str, Enum):
|
||||
"""Ressourcentypen im System."""
|
||||
TENANT = "tenant"
|
||||
NAMESPACE = "namespace"
|
||||
|
||||
# === Klausur-Korrektur ===
|
||||
EXAM_PACKAGE = "exam_package"
|
||||
STUDENT_WORK = "student_work"
|
||||
EH_DOCUMENT = "eh_document"
|
||||
RUBRIC = "rubric" # Punkteraster
|
||||
ANNOTATION = "annotation"
|
||||
EVALUATION = "evaluation" # Kriterien/Punkte
|
||||
REPORT = "report" # Gutachten
|
||||
GRADE_DECISION = "grade_decision"
|
||||
|
||||
# === Zeugnisgenerator ===
|
||||
ZEUGNIS = "zeugnis" # Zeugnisdokument
|
||||
ZEUGNIS_VORLAGE = "zeugnis_vorlage" # Zeugnisvorlage/Template
|
||||
ZEUGNIS_ENTWURF = "zeugnis_entwurf" # Zeugnisentwurf (vor Freigabe)
|
||||
SCHUELER_DATEN = "schueler_daten" # Schueler-Stammdaten, Noten
|
||||
FACHNOTE = "fachnote" # Einzelne Fachnote
|
||||
KOPFNOTE = "kopfnote" # Arbeits-/Sozialverhalten
|
||||
FEHLZEITEN = "fehlzeiten" # Fehlzeiten
|
||||
BEMERKUNG = "bemerkung" # Zeugnisbemerkungen
|
||||
KONFERENZ_BESCHLUSS = "konferenz_beschluss" # Konferenzergebnis
|
||||
VERSETZUNG = "versetzung" # Versetzungsentscheidung
|
||||
|
||||
# === Allgemein ===
|
||||
DOCUMENT = "document" # Generischer Dokumenttyp (EH, Vorlagen, etc.)
|
||||
TEMPLATE = "template" # Generische Vorlagen
|
||||
EXPORT = "export"
|
||||
AUDIT_LOG = "audit_log"
|
||||
KEY_MATERIAL = "key_material"
|
||||
|
||||
|
||||
class ZKVisibilityMode(str, Enum):
|
||||
"""Sichtbarkeitsmodus fuer Zweitkorrektoren."""
|
||||
BLIND = "blind" # ZK sieht keine EK-Note/Gutachten
|
||||
SEMI = "semi" # ZK sieht Annotationen, aber keine Note
|
||||
FULL = "full" # ZK sieht alles
|
||||
|
||||
|
||||
class EHVisibilityMode(str, Enum):
|
||||
"""Sichtbarkeitsmodus fuer Erwartungshorizonte."""
|
||||
BLIND = "blind" # ZK sieht EH nicht (selten)
|
||||
SHARED = "shared" # ZK sieht EH (Standard)
|
||||
|
||||
|
||||
class VerfahrenType(str, Enum):
|
||||
"""Verfahrenstypen fuer Klausuren und Zeugnisse."""
|
||||
|
||||
# === Klausur/Pruefungsverfahren ===
|
||||
ABITUR = "abitur"
|
||||
VORABITUR = "vorabitur"
|
||||
KLAUSUR = "klausur"
|
||||
NACHPRUEFUNG = "nachpruefung"
|
||||
|
||||
# === Zeugnisverfahren ===
|
||||
HALBJAHRESZEUGNIS = "halbjahreszeugnis"
|
||||
JAHRESZEUGNIS = "jahreszeugnis"
|
||||
ABSCHLUSSZEUGNIS = "abschlusszeugnis"
|
||||
ABGANGSZEUGNIS = "abgangszeugnis"
|
||||
|
||||
@classmethod
|
||||
def is_exam_type(cls, verfahren: str) -> bool:
|
||||
"""Pruefe ob Verfahren ein Pruefungstyp ist."""
|
||||
exam_types = {cls.ABITUR, cls.VORABITUR, cls.KLAUSUR, cls.NACHPRUEFUNG}
|
||||
try:
|
||||
return cls(verfahren) in exam_types
|
||||
except ValueError:
|
||||
return False
|
||||
|
||||
@classmethod
|
||||
def is_certificate_type(cls, verfahren: str) -> bool:
|
||||
"""Pruefe ob Verfahren ein Zeugnistyp ist."""
|
||||
cert_types = {cls.HALBJAHRESZEUGNIS, cls.JAHRESZEUGNIS, cls.ABSCHLUSSZEUGNIS, cls.ABGANGSZEUGNIS}
|
||||
try:
|
||||
return cls(verfahren) in cert_types
|
||||
except ValueError:
|
||||
return False
|
||||
|
||||
|
||||
# =============================================
|
||||
# DATA STRUCTURES
|
||||
# =============================================
|
||||
|
||||
@dataclass
|
||||
class PolicySet:
|
||||
"""
|
||||
Policy-Konfiguration pro Bundesland/Jahr/Fach.
|
||||
|
||||
Ermoeglicht bundesland-spezifische Unterschiede ohne
|
||||
harte Codierung im Quellcode.
|
||||
|
||||
Unterstuetzte Verfahrenstypen:
|
||||
- Pruefungen: abitur, vorabitur, klausur, nachpruefung
|
||||
- Zeugnisse: halbjahreszeugnis, jahreszeugnis, abschlusszeugnis, abgangszeugnis
|
||||
"""
|
||||
id: str
|
||||
bundesland: str
|
||||
jahr: int
|
||||
fach: Optional[str] # None = gilt fuer alle Faecher
|
||||
verfahren: str # See VerfahrenType enum
|
||||
|
||||
# Sichtbarkeitsregeln (Klausur)
|
||||
zk_visibility_mode: ZKVisibilityMode = ZKVisibilityMode.FULL
|
||||
eh_visibility_mode: EHVisibilityMode = EHVisibilityMode.SHARED
|
||||
|
||||
# EH-Quellen (Klausur)
|
||||
allow_teacher_uploaded_eh: bool = True
|
||||
allow_land_uploaded_eh: bool = True
|
||||
require_rights_confirmation_on_upload: bool = True
|
||||
require_dual_control_for_official_eh_update: bool = False
|
||||
|
||||
# Korrekturregeln (Klausur)
|
||||
third_correction_threshold: int = 4 # Notenpunkte Abweichung
|
||||
final_signoff_role: str = "fachvorsitz"
|
||||
|
||||
# Zeugnisregeln (Zeugnis)
|
||||
require_klassenlehrer_approval: bool = True
|
||||
require_schulleitung_signoff: bool = True
|
||||
allow_sekretariat_edit_after_approval: bool = False
|
||||
konferenz_protokoll_required: bool = True
|
||||
bemerkungen_require_review: bool = True
|
||||
fehlzeiten_auto_import: bool = True
|
||||
kopfnoten_enabled: bool = False
|
||||
versetzung_auto_calculate: bool = True
|
||||
|
||||
# Export & Anzeige
|
||||
quote_verbatim_allowed: bool = False # Amtliche Texte in UI
|
||||
export_template_id: str = "default"
|
||||
|
||||
# Zusaetzliche Flags
|
||||
flags: Dict[str, Any] = field(default_factory=dict)
|
||||
|
||||
created_at: datetime = field(default_factory=lambda: datetime.now(timezone.utc))
|
||||
|
||||
def is_exam_policy(self) -> bool:
|
||||
"""Pruefe ob diese Policy fuer Pruefungen ist."""
|
||||
return VerfahrenType.is_exam_type(self.verfahren)
|
||||
|
||||
def is_certificate_policy(self) -> bool:
|
||||
"""Pruefe ob diese Policy fuer Zeugnisse ist."""
|
||||
return VerfahrenType.is_certificate_type(self.verfahren)
|
||||
|
||||
def to_dict(self):
|
||||
d = asdict(self)
|
||||
d['zk_visibility_mode'] = self.zk_visibility_mode.value
|
||||
d['eh_visibility_mode'] = self.eh_visibility_mode.value
|
||||
d['created_at'] = self.created_at.isoformat()
|
||||
return d
|
||||
|
||||
|
||||
@dataclass
|
||||
class RoleAssignment:
|
||||
"""
|
||||
Zuweisung einer Rolle zu einem User fuer eine spezifische Ressource.
|
||||
"""
|
||||
id: str
|
||||
user_id: str
|
||||
role: Role
|
||||
resource_type: ResourceType
|
||||
resource_id: str
|
||||
|
||||
# Optionale Einschraenkungen
|
||||
tenant_id: Optional[str] = None
|
||||
namespace_id: Optional[str] = None
|
||||
|
||||
# Gueltigkeit
|
||||
valid_from: datetime = field(default_factory=lambda: datetime.now(timezone.utc))
|
||||
valid_to: Optional[datetime] = None
|
||||
|
||||
# Metadaten
|
||||
granted_by: str = ""
|
||||
granted_at: datetime = field(default_factory=lambda: datetime.now(timezone.utc))
|
||||
revoked_at: Optional[datetime] = None
|
||||
|
||||
def is_active(self) -> bool:
|
||||
now = datetime.now(timezone.utc)
|
||||
if self.revoked_at:
|
||||
return False
|
||||
if self.valid_to and now > self.valid_to:
|
||||
return False
|
||||
return now >= self.valid_from
|
||||
|
||||
def to_dict(self):
|
||||
return {
|
||||
'id': self.id,
|
||||
'user_id': self.user_id,
|
||||
'role': self.role.value,
|
||||
'resource_type': self.resource_type.value,
|
||||
'resource_id': self.resource_id,
|
||||
'tenant_id': self.tenant_id,
|
||||
'namespace_id': self.namespace_id,
|
||||
'valid_from': self.valid_from.isoformat(),
|
||||
'valid_to': self.valid_to.isoformat() if self.valid_to else None,
|
||||
'granted_by': self.granted_by,
|
||||
'granted_at': self.granted_at.isoformat(),
|
||||
'revoked_at': self.revoked_at.isoformat() if self.revoked_at else None,
|
||||
'is_active': self.is_active()
|
||||
}
|
||||
|
||||
|
||||
@dataclass
|
||||
class KeyShare:
|
||||
"""
|
||||
Berechtigung fuer einen User, auf verschluesselte Inhalte zuzugreifen.
|
||||
|
||||
Ein KeyShare ist KEIN Schluessel im Klartext, sondern eine
|
||||
Berechtigung in Verbindung mit Role Assignment.
|
||||
"""
|
||||
id: str
|
||||
user_id: str
|
||||
package_id: str
|
||||
|
||||
# Berechtigungsumfang
|
||||
permissions: Set[str] = field(default_factory=set)
|
||||
# z.B. {"read_original", "read_eh", "read_ek_outputs", "write_annotations"}
|
||||
|
||||
# Optionale Einschraenkungen
|
||||
scope: str = "full" # "full", "original_only", "eh_only", "outputs_only"
|
||||
|
||||
# Kette
|
||||
granted_by: str = ""
|
||||
granted_at: datetime = field(default_factory=lambda: datetime.now(timezone.utc))
|
||||
|
||||
# Akzeptanz (fuer Invite-Flow)
|
||||
invite_token: Optional[str] = None
|
||||
accepted_at: Optional[datetime] = None
|
||||
|
||||
# Widerruf
|
||||
revoked_at: Optional[datetime] = None
|
||||
revoked_by: Optional[str] = None
|
||||
|
||||
def is_active(self) -> bool:
|
||||
return self.revoked_at is None and (
|
||||
self.invite_token is None or self.accepted_at is not None
|
||||
)
|
||||
|
||||
def to_dict(self):
|
||||
return {
|
||||
'id': self.id,
|
||||
'user_id': self.user_id,
|
||||
'package_id': self.package_id,
|
||||
'permissions': list(self.permissions),
|
||||
'scope': self.scope,
|
||||
'granted_by': self.granted_by,
|
||||
'granted_at': self.granted_at.isoformat(),
|
||||
'invite_token': self.invite_token,
|
||||
'accepted_at': self.accepted_at.isoformat() if self.accepted_at else None,
|
||||
'revoked_at': self.revoked_at.isoformat() if self.revoked_at else None,
|
||||
'is_active': self.is_active()
|
||||
}
|
||||
|
||||
|
||||
@dataclass
|
||||
class Tenant:
|
||||
"""
|
||||
Hoechste Isolationseinheit - typischerweise eine Schule.
|
||||
"""
|
||||
id: str
|
||||
name: str
|
||||
bundesland: str
|
||||
tenant_type: str = "school" # "school", "pruefungszentrum", "behoerde"
|
||||
|
||||
# Verschluesselung
|
||||
encryption_enabled: bool = True
|
||||
|
||||
# Metadaten
|
||||
created_at: datetime = field(default_factory=lambda: datetime.now(timezone.utc))
|
||||
deleted_at: Optional[datetime] = None
|
||||
|
||||
def to_dict(self):
|
||||
return {
|
||||
'id': self.id,
|
||||
'name': self.name,
|
||||
'bundesland': self.bundesland,
|
||||
'tenant_type': self.tenant_type,
|
||||
'encryption_enabled': self.encryption_enabled,
|
||||
'created_at': self.created_at.isoformat()
|
||||
}
|
||||
|
||||
|
||||
@dataclass
|
||||
class Namespace:
|
||||
"""
|
||||
Arbeitsraum innerhalb eines Tenants.
|
||||
z.B. "Abitur 2026 - Deutsch LK - Kurs 12a"
|
||||
"""
|
||||
id: str
|
||||
tenant_id: str
|
||||
name: str
|
||||
|
||||
# Kontext
|
||||
jahr: int
|
||||
fach: str
|
||||
kurs: Optional[str] = None
|
||||
pruefungsart: str = "abitur" # "abitur", "vorabitur"
|
||||
|
||||
# Policy
|
||||
policy_set_id: Optional[str] = None
|
||||
|
||||
# Metadaten
|
||||
created_by: str = ""
|
||||
created_at: datetime = field(default_factory=lambda: datetime.now(timezone.utc))
|
||||
deleted_at: Optional[datetime] = None
|
||||
|
||||
def to_dict(self):
|
||||
return {
|
||||
'id': self.id,
|
||||
'tenant_id': self.tenant_id,
|
||||
'name': self.name,
|
||||
'jahr': self.jahr,
|
||||
'fach': self.fach,
|
||||
'kurs': self.kurs,
|
||||
'pruefungsart': self.pruefungsart,
|
||||
'policy_set_id': self.policy_set_id,
|
||||
'created_by': self.created_by,
|
||||
'created_at': self.created_at.isoformat()
|
||||
}
|
||||
|
||||
|
||||
@dataclass
|
||||
class ExamPackage:
|
||||
"""
|
||||
Pruefungspaket - kompletter Satz Arbeiten mit allen Artefakten.
|
||||
"""
|
||||
id: str
|
||||
namespace_id: str
|
||||
tenant_id: str
|
||||
|
||||
name: str
|
||||
beschreibung: Optional[str] = None
|
||||
|
||||
# Workflow-Status
|
||||
status: str = "draft" # "draft", "in_progress", "locked", "signed_off"
|
||||
|
||||
# Beteiligte (Rollen werden separat zugewiesen)
|
||||
owner_id: str = "" # Typischerweise EK
|
||||
|
||||
# Verschluesselung
|
||||
encryption_key_id: Optional[str] = None
|
||||
|
||||
# Timestamps
|
||||
created_at: datetime = field(default_factory=lambda: datetime.now(timezone.utc))
|
||||
locked_at: Optional[datetime] = None
|
||||
signed_off_at: Optional[datetime] = None
|
||||
signed_off_by: Optional[str] = None
|
||||
|
||||
def to_dict(self):
|
||||
return {
|
||||
'id': self.id,
|
||||
'namespace_id': self.namespace_id,
|
||||
'tenant_id': self.tenant_id,
|
||||
'name': self.name,
|
||||
'beschreibung': self.beschreibung,
|
||||
'status': self.status,
|
||||
'owner_id': self.owner_id,
|
||||
'created_at': self.created_at.isoformat(),
|
||||
'locked_at': self.locked_at.isoformat() if self.locked_at else None,
|
||||
'signed_off_at': self.signed_off_at.isoformat() if self.signed_off_at else None,
|
||||
'signed_off_by': self.signed_off_by
|
||||
}
|
||||
File diff suppressed because it is too large
Load Diff
343
klausur-service/backend/routes/eh_invitations.py
Normal file
343
klausur-service/backend/routes/eh_invitations.py
Normal file
@@ -0,0 +1,343 @@
|
||||
"""
|
||||
BYOEH Invitation Flow Routes
|
||||
|
||||
Endpoints for inviting users, listing/accepting/declining/revoking
|
||||
invitations to access Erwartungshorizonte.
|
||||
Extracted from routes/eh.py for file-size compliance.
|
||||
"""
|
||||
|
||||
import uuid
|
||||
from datetime import datetime, timezone, timedelta
|
||||
|
||||
from fastapi import APIRouter, HTTPException, Request
|
||||
|
||||
from models.eh import EHKeyShare, EHShareInvitation
|
||||
from models.requests import EHInviteRequest, EHAcceptInviteRequest
|
||||
from services.auth_service import get_current_user
|
||||
from services.eh_service import log_eh_audit
|
||||
import storage
|
||||
|
||||
router = APIRouter()
|
||||
|
||||
|
||||
# =============================================
|
||||
# INVITATION FLOW
|
||||
# =============================================
|
||||
|
||||
@router.post("/api/v1/eh/{eh_id}/invite")
|
||||
async def invite_to_eh(
|
||||
eh_id: str,
|
||||
invite_request: EHInviteRequest,
|
||||
request: Request
|
||||
):
|
||||
"""
|
||||
Invite another user to access an Erwartungshorizont.
|
||||
|
||||
This creates a pending invitation that the recipient must accept.
|
||||
"""
|
||||
user = get_current_user(request)
|
||||
tenant_id = user.get("tenant_id") or user.get("school_id") or user["user_id"]
|
||||
|
||||
# Check EH exists and belongs to user
|
||||
if eh_id not in storage.eh_db:
|
||||
raise HTTPException(status_code=404, detail="Erwartungshorizont not found")
|
||||
|
||||
eh = storage.eh_db[eh_id]
|
||||
if eh.teacher_id != user["user_id"]:
|
||||
raise HTTPException(status_code=403, detail="Only the owner can invite others")
|
||||
|
||||
# Validate role
|
||||
valid_roles = ['second_examiner', 'third_examiner', 'supervisor', 'department_head', 'fachvorsitz']
|
||||
if invite_request.role not in valid_roles:
|
||||
raise HTTPException(status_code=400, detail=f"Invalid role. Must be one of: {valid_roles}")
|
||||
|
||||
# Check for existing pending invitation to same user
|
||||
for inv in storage.eh_invitations_db.values():
|
||||
if (inv.eh_id == eh_id and
|
||||
inv.invitee_email == invite_request.invitee_email and
|
||||
inv.status == 'pending'):
|
||||
raise HTTPException(
|
||||
status_code=409,
|
||||
detail="Pending invitation already exists for this user"
|
||||
)
|
||||
|
||||
# Create invitation
|
||||
invitation_id = str(uuid.uuid4())
|
||||
now = datetime.now(timezone.utc)
|
||||
expires_at = now + timedelta(days=invite_request.expires_in_days)
|
||||
|
||||
invitation = EHShareInvitation(
|
||||
id=invitation_id,
|
||||
eh_id=eh_id,
|
||||
inviter_id=user["user_id"],
|
||||
invitee_id=invite_request.invitee_id or "",
|
||||
invitee_email=invite_request.invitee_email,
|
||||
role=invite_request.role,
|
||||
klausur_id=invite_request.klausur_id,
|
||||
message=invite_request.message,
|
||||
status='pending',
|
||||
expires_at=expires_at,
|
||||
created_at=now,
|
||||
accepted_at=None,
|
||||
declined_at=None
|
||||
)
|
||||
|
||||
storage.eh_invitations_db[invitation_id] = invitation
|
||||
|
||||
# Audit log
|
||||
log_eh_audit(
|
||||
tenant_id=tenant_id,
|
||||
user_id=user["user_id"],
|
||||
action="invite",
|
||||
eh_id=eh_id,
|
||||
details={
|
||||
"invitation_id": invitation_id,
|
||||
"invitee_email": invite_request.invitee_email,
|
||||
"role": invite_request.role,
|
||||
"expires_at": expires_at.isoformat()
|
||||
}
|
||||
)
|
||||
|
||||
return {
|
||||
"status": "invited",
|
||||
"invitation_id": invitation_id,
|
||||
"eh_id": eh_id,
|
||||
"invitee_email": invite_request.invitee_email,
|
||||
"role": invite_request.role,
|
||||
"expires_at": expires_at.isoformat(),
|
||||
"eh_title": eh.title
|
||||
}
|
||||
|
||||
|
||||
@router.get("/api/v1/eh/invitations/pending")
|
||||
async def list_pending_invitations(request: Request):
|
||||
"""List all pending invitations for the current user."""
|
||||
user = get_current_user(request)
|
||||
user_email = user.get("email", "")
|
||||
user_id = user["user_id"]
|
||||
now = datetime.now(timezone.utc)
|
||||
|
||||
pending = []
|
||||
for inv in storage.eh_invitations_db.values():
|
||||
# Match by email or user_id
|
||||
if (inv.invitee_email == user_email or inv.invitee_id == user_id):
|
||||
if inv.status == 'pending' and inv.expires_at > now:
|
||||
# Get EH info
|
||||
eh_info = None
|
||||
if inv.eh_id in storage.eh_db:
|
||||
eh = storage.eh_db[inv.eh_id]
|
||||
eh_info = {
|
||||
"id": eh.id,
|
||||
"title": eh.title,
|
||||
"subject": eh.subject,
|
||||
"niveau": eh.niveau,
|
||||
"year": eh.year
|
||||
}
|
||||
|
||||
pending.append({
|
||||
"invitation": inv.to_dict(),
|
||||
"eh": eh_info
|
||||
})
|
||||
|
||||
return pending
|
||||
|
||||
|
||||
@router.get("/api/v1/eh/invitations/sent")
|
||||
async def list_sent_invitations(request: Request):
|
||||
"""List all invitations sent by the current user."""
|
||||
user = get_current_user(request)
|
||||
user_id = user["user_id"]
|
||||
|
||||
sent = []
|
||||
for inv in storage.eh_invitations_db.values():
|
||||
if inv.inviter_id == user_id:
|
||||
# Get EH info
|
||||
eh_info = None
|
||||
if inv.eh_id in storage.eh_db:
|
||||
eh = storage.eh_db[inv.eh_id]
|
||||
eh_info = {
|
||||
"id": eh.id,
|
||||
"title": eh.title,
|
||||
"subject": eh.subject
|
||||
}
|
||||
|
||||
sent.append({
|
||||
"invitation": inv.to_dict(),
|
||||
"eh": eh_info
|
||||
})
|
||||
|
||||
return sent
|
||||
|
||||
|
||||
@router.post("/api/v1/eh/invitations/{invitation_id}/accept")
|
||||
async def accept_eh_invitation(
|
||||
invitation_id: str,
|
||||
accept_request: EHAcceptInviteRequest,
|
||||
request: Request
|
||||
):
|
||||
"""Accept an invitation and receive access to the EH."""
|
||||
user = get_current_user(request)
|
||||
tenant_id = user.get("tenant_id") or user.get("school_id") or user["user_id"]
|
||||
user_email = user.get("email", "")
|
||||
user_id = user["user_id"]
|
||||
now = datetime.now(timezone.utc)
|
||||
|
||||
# Find invitation
|
||||
if invitation_id not in storage.eh_invitations_db:
|
||||
raise HTTPException(status_code=404, detail="Invitation not found")
|
||||
|
||||
invitation = storage.eh_invitations_db[invitation_id]
|
||||
|
||||
# Verify recipient
|
||||
if invitation.invitee_email != user_email and invitation.invitee_id != user_id:
|
||||
raise HTTPException(status_code=403, detail="This invitation is not for you")
|
||||
|
||||
# Check status
|
||||
if invitation.status != 'pending':
|
||||
raise HTTPException(
|
||||
status_code=400,
|
||||
detail=f"Invitation is {invitation.status}, cannot accept"
|
||||
)
|
||||
|
||||
# Check expiration
|
||||
if invitation.expires_at < now:
|
||||
invitation.status = 'expired'
|
||||
raise HTTPException(status_code=400, detail="Invitation has expired")
|
||||
|
||||
# Create key share
|
||||
share_id = str(uuid.uuid4())
|
||||
key_share = EHKeyShare(
|
||||
id=share_id,
|
||||
eh_id=invitation.eh_id,
|
||||
user_id=user_id,
|
||||
encrypted_passphrase=accept_request.encrypted_passphrase,
|
||||
passphrase_hint="",
|
||||
granted_by=invitation.inviter_id,
|
||||
granted_at=now,
|
||||
role=invitation.role,
|
||||
klausur_id=invitation.klausur_id,
|
||||
active=True
|
||||
)
|
||||
|
||||
# Store key share
|
||||
if invitation.eh_id not in storage.eh_key_shares_db:
|
||||
storage.eh_key_shares_db[invitation.eh_id] = []
|
||||
storage.eh_key_shares_db[invitation.eh_id].append(key_share)
|
||||
|
||||
# Update invitation status
|
||||
invitation.status = 'accepted'
|
||||
invitation.accepted_at = now
|
||||
invitation.invitee_id = user_id # Update with actual user ID
|
||||
|
||||
# Audit log
|
||||
log_eh_audit(
|
||||
tenant_id=tenant_id,
|
||||
user_id=user_id,
|
||||
action="accept_invite",
|
||||
eh_id=invitation.eh_id,
|
||||
details={
|
||||
"invitation_id": invitation_id,
|
||||
"share_id": share_id,
|
||||
"role": invitation.role
|
||||
}
|
||||
)
|
||||
|
||||
return {
|
||||
"status": "accepted",
|
||||
"share_id": share_id,
|
||||
"eh_id": invitation.eh_id,
|
||||
"role": invitation.role,
|
||||
"klausur_id": invitation.klausur_id
|
||||
}
|
||||
|
||||
|
||||
@router.post("/api/v1/eh/invitations/{invitation_id}/decline")
|
||||
async def decline_eh_invitation(invitation_id: str, request: Request):
|
||||
"""Decline an invitation."""
|
||||
user = get_current_user(request)
|
||||
tenant_id = user.get("tenant_id") or user.get("school_id") or user["user_id"]
|
||||
user_email = user.get("email", "")
|
||||
user_id = user["user_id"]
|
||||
now = datetime.now(timezone.utc)
|
||||
|
||||
# Find invitation
|
||||
if invitation_id not in storage.eh_invitations_db:
|
||||
raise HTTPException(status_code=404, detail="Invitation not found")
|
||||
|
||||
invitation = storage.eh_invitations_db[invitation_id]
|
||||
|
||||
# Verify recipient
|
||||
if invitation.invitee_email != user_email and invitation.invitee_id != user_id:
|
||||
raise HTTPException(status_code=403, detail="This invitation is not for you")
|
||||
|
||||
# Check status
|
||||
if invitation.status != 'pending':
|
||||
raise HTTPException(
|
||||
status_code=400,
|
||||
detail=f"Invitation is {invitation.status}, cannot decline"
|
||||
)
|
||||
|
||||
# Update status
|
||||
invitation.status = 'declined'
|
||||
invitation.declined_at = now
|
||||
|
||||
# Audit log
|
||||
log_eh_audit(
|
||||
tenant_id=tenant_id,
|
||||
user_id=user_id,
|
||||
action="decline_invite",
|
||||
eh_id=invitation.eh_id,
|
||||
details={"invitation_id": invitation_id}
|
||||
)
|
||||
|
||||
return {
|
||||
"status": "declined",
|
||||
"invitation_id": invitation_id,
|
||||
"eh_id": invitation.eh_id
|
||||
}
|
||||
|
||||
|
||||
@router.delete("/api/v1/eh/invitations/{invitation_id}")
|
||||
async def revoke_eh_invitation(invitation_id: str, request: Request):
|
||||
"""Revoke a pending invitation (by the inviter)."""
|
||||
user = get_current_user(request)
|
||||
tenant_id = user.get("tenant_id") or user.get("school_id") or user["user_id"]
|
||||
user_id = user["user_id"]
|
||||
|
||||
# Find invitation
|
||||
if invitation_id not in storage.eh_invitations_db:
|
||||
raise HTTPException(status_code=404, detail="Invitation not found")
|
||||
|
||||
invitation = storage.eh_invitations_db[invitation_id]
|
||||
|
||||
# Verify inviter
|
||||
if invitation.inviter_id != user_id:
|
||||
raise HTTPException(status_code=403, detail="Only the inviter can revoke")
|
||||
|
||||
# Check status
|
||||
if invitation.status != 'pending':
|
||||
raise HTTPException(
|
||||
status_code=400,
|
||||
detail=f"Invitation is {invitation.status}, cannot revoke"
|
||||
)
|
||||
|
||||
# Update status
|
||||
invitation.status = 'revoked'
|
||||
|
||||
# Audit log
|
||||
log_eh_audit(
|
||||
tenant_id=tenant_id,
|
||||
user_id=user_id,
|
||||
action="revoke_invite",
|
||||
eh_id=invitation.eh_id,
|
||||
details={
|
||||
"invitation_id": invitation_id,
|
||||
"invitee_email": invitation.invitee_email
|
||||
}
|
||||
)
|
||||
|
||||
return {
|
||||
"status": "revoked",
|
||||
"invitation_id": invitation_id,
|
||||
"eh_id": invitation.eh_id
|
||||
}
|
||||
347
klausur-service/backend/routes/eh_sharing.py
Normal file
347
klausur-service/backend/routes/eh_sharing.py
Normal file
@@ -0,0 +1,347 @@
|
||||
"""
|
||||
BYOEH Key Sharing and Klausur Linking Routes
|
||||
|
||||
Endpoints for sharing EH access with other examiners
|
||||
and linking EH to Klausuren.
|
||||
Extracted from routes/eh.py for file-size compliance.
|
||||
"""
|
||||
|
||||
import uuid
|
||||
from datetime import datetime, timezone
|
||||
|
||||
from fastapi import APIRouter, HTTPException, Request
|
||||
|
||||
from models.eh import EHKeyShare, EHKlausurLink
|
||||
from models.requests import EHShareRequest, EHLinkKlausurRequest
|
||||
from services.auth_service import get_current_user
|
||||
from services.eh_service import log_eh_audit
|
||||
import storage
|
||||
|
||||
router = APIRouter()
|
||||
|
||||
|
||||
# =============================================
|
||||
# BYOEH KEY SHARING
|
||||
# =============================================
|
||||
|
||||
@router.post("/api/v1/eh/{eh_id}/share")
|
||||
async def share_erwartungshorizont(
|
||||
eh_id: str,
|
||||
share_request: EHShareRequest,
|
||||
request: Request
|
||||
):
|
||||
"""
|
||||
Share an Erwartungshorizont with another examiner.
|
||||
|
||||
The first examiner shares their EH by providing an encrypted passphrase
|
||||
that the recipient can use.
|
||||
"""
|
||||
user = get_current_user(request)
|
||||
tenant_id = user.get("tenant_id") or user.get("school_id") or user["user_id"]
|
||||
|
||||
# Check EH exists and belongs to user
|
||||
if eh_id not in storage.eh_db:
|
||||
raise HTTPException(status_code=404, detail="Erwartungshorizont not found")
|
||||
|
||||
eh = storage.eh_db[eh_id]
|
||||
if eh.teacher_id != user["user_id"]:
|
||||
raise HTTPException(status_code=403, detail="Only the owner can share this EH")
|
||||
|
||||
# Validate role
|
||||
valid_roles = ['second_examiner', 'third_examiner', 'supervisor', 'department_head']
|
||||
if share_request.role not in valid_roles:
|
||||
raise HTTPException(status_code=400, detail=f"Invalid role. Must be one of: {valid_roles}")
|
||||
|
||||
# Create key share entry
|
||||
share_id = str(uuid.uuid4())
|
||||
key_share = EHKeyShare(
|
||||
id=share_id,
|
||||
eh_id=eh_id,
|
||||
user_id=share_request.user_id,
|
||||
encrypted_passphrase=share_request.encrypted_passphrase,
|
||||
passphrase_hint=share_request.passphrase_hint or "",
|
||||
granted_by=user["user_id"],
|
||||
granted_at=datetime.now(timezone.utc),
|
||||
role=share_request.role,
|
||||
klausur_id=share_request.klausur_id,
|
||||
active=True
|
||||
)
|
||||
|
||||
# Store in memory
|
||||
if eh_id not in storage.eh_key_shares_db:
|
||||
storage.eh_key_shares_db[eh_id] = []
|
||||
storage.eh_key_shares_db[eh_id].append(key_share)
|
||||
|
||||
# Audit log
|
||||
log_eh_audit(
|
||||
tenant_id=tenant_id,
|
||||
user_id=user["user_id"],
|
||||
action="share",
|
||||
eh_id=eh_id,
|
||||
details={
|
||||
"shared_with": share_request.user_id,
|
||||
"role": share_request.role,
|
||||
"klausur_id": share_request.klausur_id
|
||||
}
|
||||
)
|
||||
|
||||
return {
|
||||
"status": "shared",
|
||||
"share_id": share_id,
|
||||
"eh_id": eh_id,
|
||||
"shared_with": share_request.user_id,
|
||||
"role": share_request.role
|
||||
}
|
||||
|
||||
|
||||
@router.get("/api/v1/eh/{eh_id}/shares")
|
||||
async def list_eh_shares(eh_id: str, request: Request):
|
||||
"""List all users who have access to an EH."""
|
||||
user = get_current_user(request)
|
||||
|
||||
# Check EH exists and belongs to user
|
||||
if eh_id not in storage.eh_db:
|
||||
raise HTTPException(status_code=404, detail="Erwartungshorizont not found")
|
||||
|
||||
eh = storage.eh_db[eh_id]
|
||||
if eh.teacher_id != user["user_id"]:
|
||||
raise HTTPException(status_code=403, detail="Only the owner can view shares")
|
||||
|
||||
shares = storage.eh_key_shares_db.get(eh_id, [])
|
||||
return [share.to_dict() for share in shares if share.active]
|
||||
|
||||
|
||||
@router.delete("/api/v1/eh/{eh_id}/shares/{share_id}")
|
||||
async def revoke_eh_share(eh_id: str, share_id: str, request: Request):
|
||||
"""Revoke a shared EH access."""
|
||||
user = get_current_user(request)
|
||||
tenant_id = user.get("tenant_id") or user.get("school_id") or user["user_id"]
|
||||
|
||||
# Check EH exists and belongs to user
|
||||
if eh_id not in storage.eh_db:
|
||||
raise HTTPException(status_code=404, detail="Erwartungshorizont not found")
|
||||
|
||||
eh = storage.eh_db[eh_id]
|
||||
if eh.teacher_id != user["user_id"]:
|
||||
raise HTTPException(status_code=403, detail="Only the owner can revoke shares")
|
||||
|
||||
# Find and deactivate share
|
||||
shares = storage.eh_key_shares_db.get(eh_id, [])
|
||||
for share in shares:
|
||||
if share.id == share_id:
|
||||
share.active = False
|
||||
|
||||
log_eh_audit(
|
||||
tenant_id=tenant_id,
|
||||
user_id=user["user_id"],
|
||||
action="revoke_share",
|
||||
eh_id=eh_id,
|
||||
details={"revoked_user": share.user_id, "share_id": share_id}
|
||||
)
|
||||
|
||||
return {"status": "revoked", "share_id": share_id}
|
||||
|
||||
raise HTTPException(status_code=404, detail="Share not found")
|
||||
|
||||
|
||||
# =============================================
|
||||
# KLAUSUR LINKING
|
||||
# =============================================
|
||||
|
||||
@router.post("/api/v1/eh/{eh_id}/link-klausur")
|
||||
async def link_eh_to_klausur(
|
||||
eh_id: str,
|
||||
link_request: EHLinkKlausurRequest,
|
||||
request: Request
|
||||
):
|
||||
"""
|
||||
Link an Erwartungshorizont to a Klausur.
|
||||
|
||||
This creates an association between the EH and a specific Klausur.
|
||||
"""
|
||||
user = get_current_user(request)
|
||||
tenant_id = user.get("tenant_id") or user.get("school_id") or user["user_id"]
|
||||
|
||||
# Check EH exists and user has access
|
||||
if eh_id not in storage.eh_db:
|
||||
raise HTTPException(status_code=404, detail="Erwartungshorizont not found")
|
||||
|
||||
eh = storage.eh_db[eh_id]
|
||||
user_has_access = (
|
||||
eh.teacher_id == user["user_id"] or
|
||||
any(
|
||||
share.user_id == user["user_id"] and share.active
|
||||
for share in storage.eh_key_shares_db.get(eh_id, [])
|
||||
)
|
||||
)
|
||||
|
||||
if not user_has_access:
|
||||
raise HTTPException(status_code=403, detail="No access to this EH")
|
||||
|
||||
# Check Klausur exists
|
||||
klausur_id = link_request.klausur_id
|
||||
if klausur_id not in storage.klausuren_db:
|
||||
raise HTTPException(status_code=404, detail="Klausur not found")
|
||||
|
||||
# Create link
|
||||
link_id = str(uuid.uuid4())
|
||||
link = EHKlausurLink(
|
||||
id=link_id,
|
||||
eh_id=eh_id,
|
||||
klausur_id=klausur_id,
|
||||
linked_by=user["user_id"],
|
||||
linked_at=datetime.now(timezone.utc)
|
||||
)
|
||||
|
||||
if klausur_id not in storage.eh_klausur_links_db:
|
||||
storage.eh_klausur_links_db[klausur_id] = []
|
||||
storage.eh_klausur_links_db[klausur_id].append(link)
|
||||
|
||||
# Audit log
|
||||
log_eh_audit(
|
||||
tenant_id=tenant_id,
|
||||
user_id=user["user_id"],
|
||||
action="link_klausur",
|
||||
eh_id=eh_id,
|
||||
details={"klausur_id": klausur_id}
|
||||
)
|
||||
|
||||
return {
|
||||
"status": "linked",
|
||||
"link_id": link_id,
|
||||
"eh_id": eh_id,
|
||||
"klausur_id": klausur_id
|
||||
}
|
||||
|
||||
|
||||
@router.get("/api/v1/klausuren/{klausur_id}/linked-eh")
|
||||
async def get_linked_eh(klausur_id: str, request: Request):
|
||||
"""Get all EH linked to a specific Klausur."""
|
||||
user = get_current_user(request)
|
||||
user_id = user["user_id"]
|
||||
|
||||
# Check Klausur exists
|
||||
if klausur_id not in storage.klausuren_db:
|
||||
raise HTTPException(status_code=404, detail="Klausur not found")
|
||||
|
||||
# Get all links for this Klausur
|
||||
links = storage.eh_klausur_links_db.get(klausur_id, [])
|
||||
|
||||
linked_ehs = []
|
||||
for link in links:
|
||||
if link.eh_id in storage.eh_db:
|
||||
eh = storage.eh_db[link.eh_id]
|
||||
|
||||
# Check if user has access to this EH
|
||||
is_owner = eh.teacher_id == user_id
|
||||
is_shared = any(
|
||||
share.user_id == user_id and share.active
|
||||
for share in storage.eh_key_shares_db.get(link.eh_id, [])
|
||||
)
|
||||
|
||||
if is_owner or is_shared:
|
||||
# Find user's share info if shared
|
||||
share_info = None
|
||||
if is_shared:
|
||||
for share in storage.eh_key_shares_db.get(link.eh_id, []):
|
||||
if share.user_id == user_id and share.active:
|
||||
share_info = share.to_dict()
|
||||
break
|
||||
|
||||
linked_ehs.append({
|
||||
"eh": eh.to_dict(),
|
||||
"link": link.to_dict(),
|
||||
"is_owner": is_owner,
|
||||
"share": share_info
|
||||
})
|
||||
|
||||
return linked_ehs
|
||||
|
||||
|
||||
@router.delete("/api/v1/eh/{eh_id}/link-klausur/{klausur_id}")
|
||||
async def unlink_eh_from_klausur(eh_id: str, klausur_id: str, request: Request):
|
||||
"""Remove the link between an EH and a Klausur."""
|
||||
user = get_current_user(request)
|
||||
tenant_id = user.get("tenant_id") or user.get("school_id") or user["user_id"]
|
||||
|
||||
# Check EH exists and user has access
|
||||
if eh_id not in storage.eh_db:
|
||||
raise HTTPException(status_code=404, detail="Erwartungshorizont not found")
|
||||
|
||||
eh = storage.eh_db[eh_id]
|
||||
if eh.teacher_id != user["user_id"]:
|
||||
raise HTTPException(status_code=403, detail="Only the owner can unlink")
|
||||
|
||||
# Find and remove link
|
||||
links = storage.eh_klausur_links_db.get(klausur_id, [])
|
||||
for i, link in enumerate(links):
|
||||
if link.eh_id == eh_id:
|
||||
del links[i]
|
||||
|
||||
log_eh_audit(
|
||||
tenant_id=tenant_id,
|
||||
user_id=user["user_id"],
|
||||
action="unlink_klausur",
|
||||
eh_id=eh_id,
|
||||
details={"klausur_id": klausur_id}
|
||||
)
|
||||
|
||||
return {"status": "unlinked", "eh_id": eh_id, "klausur_id": klausur_id}
|
||||
|
||||
raise HTTPException(status_code=404, detail="Link not found")
|
||||
|
||||
|
||||
@router.get("/api/v1/eh/{eh_id}/access-chain")
|
||||
async def get_eh_access_chain(eh_id: str, request: Request):
|
||||
"""
|
||||
Get the complete access chain for an EH.
|
||||
|
||||
Shows the correction chain: EK -> ZK -> DK -> FVL
|
||||
with their current access status.
|
||||
"""
|
||||
user = get_current_user(request)
|
||||
|
||||
# Check EH exists
|
||||
if eh_id not in storage.eh_db:
|
||||
raise HTTPException(status_code=404, detail="Erwartungshorizont not found")
|
||||
|
||||
eh = storage.eh_db[eh_id]
|
||||
|
||||
# Check access - owner or shared user
|
||||
is_owner = eh.teacher_id == user["user_id"]
|
||||
is_shared = any(
|
||||
share.user_id == user["user_id"] and share.active
|
||||
for share in storage.eh_key_shares_db.get(eh_id, [])
|
||||
)
|
||||
|
||||
if not is_owner and not is_shared:
|
||||
raise HTTPException(status_code=403, detail="No access to this EH")
|
||||
|
||||
# Build access chain
|
||||
chain = {
|
||||
"eh_id": eh_id,
|
||||
"eh_title": eh.title,
|
||||
"owner": {
|
||||
"user_id": eh.teacher_id,
|
||||
"role": "erstkorrektor"
|
||||
},
|
||||
"active_shares": [],
|
||||
"pending_invitations": [],
|
||||
"revoked_shares": []
|
||||
}
|
||||
|
||||
# Active shares
|
||||
for share in storage.eh_key_shares_db.get(eh_id, []):
|
||||
share_dict = share.to_dict()
|
||||
if share.active:
|
||||
chain["active_shares"].append(share_dict)
|
||||
else:
|
||||
chain["revoked_shares"].append(share_dict)
|
||||
|
||||
# Pending invitations (only for owner)
|
||||
if is_owner:
|
||||
for inv in storage.eh_invitations_db.values():
|
||||
if inv.eh_id == eh_id and inv.status == 'pending':
|
||||
chain["pending_invitations"].append(inv.to_dict())
|
||||
|
||||
return chain
|
||||
455
klausur-service/backend/routes/eh_upload.py
Normal file
455
klausur-service/backend/routes/eh_upload.py
Normal file
@@ -0,0 +1,455 @@
|
||||
"""
|
||||
BYOEH Upload, List, and Core CRUD Routes
|
||||
|
||||
Endpoints for uploading, listing, getting, deleting,
|
||||
indexing, and RAG-querying Erwartungshorizonte.
|
||||
Extracted from routes/eh.py for file-size compliance.
|
||||
"""
|
||||
|
||||
import os
|
||||
import uuid
|
||||
import json
|
||||
from datetime import datetime, timezone
|
||||
from typing import Optional
|
||||
|
||||
from fastapi import APIRouter, HTTPException, Request, UploadFile, File, Form, BackgroundTasks
|
||||
|
||||
from models.enums import EHStatus
|
||||
from models.eh import (
|
||||
Erwartungshorizont,
|
||||
EHRightsConfirmation,
|
||||
)
|
||||
from models.requests import (
|
||||
EHUploadMetadata,
|
||||
EHRAGQuery,
|
||||
EHIndexRequest,
|
||||
)
|
||||
from services.auth_service import get_current_user
|
||||
from services.eh_service import log_eh_audit
|
||||
from config import EH_UPLOAD_DIR, OPENAI_API_KEY, ENVIRONMENT, RIGHTS_CONFIRMATION_TEXT
|
||||
import storage
|
||||
|
||||
# BYOEH imports
|
||||
from qdrant_service import (
|
||||
get_collection_info, delete_eh_vectors, search_eh, index_eh_chunks
|
||||
)
|
||||
from eh_pipeline import (
|
||||
decrypt_text, verify_key_hash, process_eh_for_indexing,
|
||||
generate_single_embedding, EncryptionError, EmbeddingError
|
||||
)
|
||||
|
||||
router = APIRouter()
|
||||
|
||||
|
||||
# =============================================
|
||||
# EH UPLOAD & LIST
|
||||
# =============================================
|
||||
|
||||
@router.post("/api/v1/eh/upload")
|
||||
async def upload_erwartungshorizont(
|
||||
file: UploadFile = File(...),
|
||||
metadata_json: str = Form(...),
|
||||
request: Request = None,
|
||||
background_tasks: BackgroundTasks = None
|
||||
):
|
||||
"""
|
||||
Upload an encrypted Erwartungshorizont.
|
||||
|
||||
The file MUST be client-side encrypted.
|
||||
Server stores only the encrypted blob + key hash (never the passphrase).
|
||||
"""
|
||||
user = get_current_user(request)
|
||||
tenant_id = user.get("tenant_id") or user.get("school_id") or user["user_id"]
|
||||
|
||||
try:
|
||||
data = EHUploadMetadata(**json.loads(metadata_json))
|
||||
except Exception as e:
|
||||
raise HTTPException(status_code=400, detail=f"Invalid metadata: {str(e)}")
|
||||
|
||||
if not data.rights_confirmed:
|
||||
raise HTTPException(status_code=400, detail="Rights confirmation required")
|
||||
|
||||
eh_id = str(uuid.uuid4())
|
||||
|
||||
# Create tenant-isolated directory
|
||||
upload_dir = f"{EH_UPLOAD_DIR}/{tenant_id}/{eh_id}"
|
||||
os.makedirs(upload_dir, exist_ok=True)
|
||||
|
||||
# Save encrypted file
|
||||
encrypted_path = f"{upload_dir}/encrypted.bin"
|
||||
content = await file.read()
|
||||
with open(encrypted_path, "wb") as f:
|
||||
f.write(content)
|
||||
|
||||
# Save salt separately
|
||||
with open(f"{upload_dir}/salt.txt", "w") as f:
|
||||
f.write(data.salt)
|
||||
|
||||
# Create EH record
|
||||
eh = Erwartungshorizont(
|
||||
id=eh_id,
|
||||
tenant_id=tenant_id,
|
||||
teacher_id=user["user_id"],
|
||||
title=data.metadata.title,
|
||||
subject=data.metadata.subject,
|
||||
niveau=data.metadata.niveau,
|
||||
year=data.metadata.year,
|
||||
aufgaben_nummer=data.metadata.aufgaben_nummer,
|
||||
encryption_key_hash=data.encryption_key_hash,
|
||||
salt=data.salt,
|
||||
encrypted_file_path=encrypted_path,
|
||||
file_size_bytes=len(content),
|
||||
original_filename=data.original_filename,
|
||||
rights_confirmed=True,
|
||||
rights_confirmed_at=datetime.now(timezone.utc),
|
||||
status=EHStatus.PENDING_RIGHTS,
|
||||
chunk_count=0,
|
||||
indexed_at=None,
|
||||
error_message=None,
|
||||
training_allowed=False, # ALWAYS FALSE - critical for compliance
|
||||
created_at=datetime.now(timezone.utc),
|
||||
deleted_at=None
|
||||
)
|
||||
|
||||
storage.eh_db[eh_id] = eh
|
||||
|
||||
# Store rights confirmation
|
||||
rights_confirmation = EHRightsConfirmation(
|
||||
id=str(uuid.uuid4()),
|
||||
eh_id=eh_id,
|
||||
teacher_id=user["user_id"],
|
||||
confirmation_type="upload",
|
||||
confirmation_text=RIGHTS_CONFIRMATION_TEXT,
|
||||
ip_address=request.client.host if request.client else None,
|
||||
user_agent=request.headers.get("user-agent"),
|
||||
confirmed_at=datetime.now(timezone.utc)
|
||||
)
|
||||
storage.eh_rights_db[rights_confirmation.id] = rights_confirmation
|
||||
|
||||
# Audit log
|
||||
log_eh_audit(
|
||||
tenant_id=tenant_id,
|
||||
user_id=user["user_id"],
|
||||
action="upload",
|
||||
eh_id=eh_id,
|
||||
details={
|
||||
"subject": data.metadata.subject,
|
||||
"year": data.metadata.year,
|
||||
"file_size": len(content)
|
||||
},
|
||||
ip_address=request.client.host if request.client else None,
|
||||
user_agent=request.headers.get("user-agent")
|
||||
)
|
||||
|
||||
return eh.to_dict()
|
||||
|
||||
|
||||
@router.get("/api/v1/eh")
|
||||
async def list_erwartungshorizonte(
|
||||
request: Request,
|
||||
subject: Optional[str] = None,
|
||||
year: Optional[int] = None
|
||||
):
|
||||
"""List all Erwartungshorizonte for the current teacher."""
|
||||
user = get_current_user(request)
|
||||
tenant_id = user.get("tenant_id") or user.get("school_id") or user["user_id"]
|
||||
|
||||
results = []
|
||||
for eh in storage.eh_db.values():
|
||||
if eh.tenant_id == tenant_id and eh.deleted_at is None:
|
||||
if subject and eh.subject != subject:
|
||||
continue
|
||||
if year and eh.year != year:
|
||||
continue
|
||||
results.append(eh.to_dict())
|
||||
|
||||
return results
|
||||
|
||||
|
||||
# =============================================
|
||||
# SPECIFIC EH ROUTES (must come before {eh_id} catch-all)
|
||||
# =============================================
|
||||
|
||||
@router.get("/api/v1/eh/audit-log")
|
||||
async def get_eh_audit_log(
|
||||
request: Request,
|
||||
eh_id: Optional[str] = None,
|
||||
limit: int = 100
|
||||
):
|
||||
"""Get BYOEH audit log entries."""
|
||||
user = get_current_user(request)
|
||||
tenant_id = user.get("tenant_id") or user.get("school_id") or user["user_id"]
|
||||
|
||||
# Filter by tenant
|
||||
entries = [e for e in storage.eh_audit_db if e.tenant_id == tenant_id]
|
||||
|
||||
# Filter by EH if specified
|
||||
if eh_id:
|
||||
entries = [e for e in entries if e.eh_id == eh_id]
|
||||
|
||||
# Sort and limit
|
||||
entries = sorted(entries, key=lambda e: e.created_at, reverse=True)[:limit]
|
||||
|
||||
return [e.to_dict() for e in entries]
|
||||
|
||||
|
||||
@router.get("/api/v1/eh/rights-text")
|
||||
async def get_rights_confirmation_text():
|
||||
"""Get the rights confirmation text for display in UI."""
|
||||
return {
|
||||
"text": RIGHTS_CONFIRMATION_TEXT,
|
||||
"version": "v1.0"
|
||||
}
|
||||
|
||||
|
||||
@router.get("/api/v1/eh/qdrant-status")
|
||||
async def get_qdrant_status(request: Request):
|
||||
"""Get Qdrant collection status (admin only)."""
|
||||
user = get_current_user(request)
|
||||
if user.get("role") != "admin" and ENVIRONMENT != "development":
|
||||
raise HTTPException(status_code=403, detail="Admin access required")
|
||||
|
||||
return await get_collection_info()
|
||||
|
||||
|
||||
@router.get("/api/v1/eh/shared-with-me")
|
||||
async def list_shared_eh(request: Request):
|
||||
"""List all EH shared with the current user."""
|
||||
user = get_current_user(request)
|
||||
user_id = user["user_id"]
|
||||
|
||||
shared_ehs = []
|
||||
for eh_id, shares in storage.eh_key_shares_db.items():
|
||||
for share in shares:
|
||||
if share.user_id == user_id and share.active:
|
||||
if eh_id in storage.eh_db:
|
||||
eh = storage.eh_db[eh_id]
|
||||
shared_ehs.append({
|
||||
"eh": eh.to_dict(),
|
||||
"share": share.to_dict()
|
||||
})
|
||||
|
||||
return shared_ehs
|
||||
|
||||
|
||||
# =============================================
|
||||
# GENERIC EH ROUTES
|
||||
# =============================================
|
||||
|
||||
@router.get("/api/v1/eh/{eh_id}")
|
||||
async def get_erwartungshorizont(eh_id: str, request: Request):
|
||||
"""Get a specific Erwartungshorizont by ID."""
|
||||
user = get_current_user(request)
|
||||
|
||||
if eh_id not in storage.eh_db:
|
||||
raise HTTPException(status_code=404, detail="Erwartungshorizont not found")
|
||||
|
||||
eh = storage.eh_db[eh_id]
|
||||
if eh.teacher_id != user["user_id"] and user.get("role") != "admin":
|
||||
raise HTTPException(status_code=403, detail="Access denied")
|
||||
|
||||
if eh.deleted_at is not None:
|
||||
raise HTTPException(status_code=404, detail="Erwartungshorizont was deleted")
|
||||
|
||||
return eh.to_dict()
|
||||
|
||||
|
||||
@router.delete("/api/v1/eh/{eh_id}")
|
||||
async def delete_erwartungshorizont(eh_id: str, request: Request):
|
||||
"""Soft-delete an Erwartungshorizont and remove vectors from Qdrant."""
|
||||
user = get_current_user(request)
|
||||
|
||||
if eh_id not in storage.eh_db:
|
||||
raise HTTPException(status_code=404, detail="Erwartungshorizont not found")
|
||||
|
||||
eh = storage.eh_db[eh_id]
|
||||
if eh.teacher_id != user["user_id"] and user.get("role") != "admin":
|
||||
raise HTTPException(status_code=403, detail="Access denied")
|
||||
|
||||
# Soft delete
|
||||
eh.deleted_at = datetime.now(timezone.utc)
|
||||
|
||||
# Delete vectors from Qdrant
|
||||
try:
|
||||
deleted_count = await delete_eh_vectors(eh_id)
|
||||
print(f"Deleted {deleted_count} vectors for EH {eh_id}")
|
||||
except Exception as e:
|
||||
print(f"Warning: Failed to delete vectors: {e}")
|
||||
|
||||
# Audit log
|
||||
log_eh_audit(
|
||||
tenant_id=eh.tenant_id,
|
||||
user_id=user["user_id"],
|
||||
action="delete",
|
||||
eh_id=eh_id,
|
||||
ip_address=request.client.host if request.client else None,
|
||||
user_agent=request.headers.get("user-agent")
|
||||
)
|
||||
|
||||
return {"status": "deleted", "id": eh_id}
|
||||
|
||||
|
||||
@router.post("/api/v1/eh/{eh_id}/index")
|
||||
async def index_erwartungshorizont(
|
||||
eh_id: str,
|
||||
data: EHIndexRequest,
|
||||
request: Request
|
||||
):
|
||||
"""
|
||||
Index an Erwartungshorizont for RAG queries.
|
||||
|
||||
Requires the passphrase to decrypt, chunk, embed, and re-encrypt chunks.
|
||||
The passphrase is only used transiently and never stored.
|
||||
"""
|
||||
user = get_current_user(request)
|
||||
|
||||
if eh_id not in storage.eh_db:
|
||||
raise HTTPException(status_code=404, detail="Erwartungshorizont not found")
|
||||
|
||||
eh = storage.eh_db[eh_id]
|
||||
if eh.teacher_id != user["user_id"] and user.get("role") != "admin":
|
||||
raise HTTPException(status_code=403, detail="Access denied")
|
||||
|
||||
# Verify passphrase matches key hash
|
||||
if not verify_key_hash(data.passphrase, eh.salt, eh.encryption_key_hash):
|
||||
raise HTTPException(status_code=401, detail="Invalid passphrase")
|
||||
|
||||
eh.status = EHStatus.PROCESSING
|
||||
|
||||
try:
|
||||
# Read encrypted file
|
||||
with open(eh.encrypted_file_path, "rb") as f:
|
||||
encrypted_content = f.read()
|
||||
|
||||
# Decrypt the file
|
||||
decrypted_text = decrypt_text(
|
||||
encrypted_content.decode('utf-8'),
|
||||
data.passphrase,
|
||||
eh.salt
|
||||
)
|
||||
|
||||
# Process for indexing
|
||||
chunk_count, chunks_data = await process_eh_for_indexing(
|
||||
eh_id=eh_id,
|
||||
tenant_id=eh.tenant_id,
|
||||
subject=eh.subject,
|
||||
text_content=decrypted_text,
|
||||
passphrase=data.passphrase,
|
||||
salt_hex=eh.salt
|
||||
)
|
||||
|
||||
# Index in Qdrant
|
||||
await index_eh_chunks(
|
||||
eh_id=eh_id,
|
||||
tenant_id=eh.tenant_id,
|
||||
subject=eh.subject,
|
||||
chunks=chunks_data
|
||||
)
|
||||
|
||||
# Update EH record
|
||||
eh.status = EHStatus.INDEXED
|
||||
eh.chunk_count = chunk_count
|
||||
eh.indexed_at = datetime.now(timezone.utc)
|
||||
|
||||
# Audit log
|
||||
log_eh_audit(
|
||||
tenant_id=eh.tenant_id,
|
||||
user_id=user["user_id"],
|
||||
action="indexed",
|
||||
eh_id=eh_id,
|
||||
details={"chunk_count": chunk_count}
|
||||
)
|
||||
|
||||
return {
|
||||
"status": "indexed",
|
||||
"id": eh_id,
|
||||
"chunk_count": chunk_count
|
||||
}
|
||||
|
||||
except EncryptionError as e:
|
||||
eh.status = EHStatus.ERROR
|
||||
eh.error_message = str(e)
|
||||
raise HTTPException(status_code=400, detail=f"Decryption failed: {str(e)}")
|
||||
except EmbeddingError as e:
|
||||
eh.status = EHStatus.ERROR
|
||||
eh.error_message = str(e)
|
||||
raise HTTPException(status_code=500, detail=f"Embedding generation failed: {str(e)}")
|
||||
except Exception as e:
|
||||
eh.status = EHStatus.ERROR
|
||||
eh.error_message = str(e)
|
||||
raise HTTPException(status_code=500, detail=f"Indexing failed: {str(e)}")
|
||||
|
||||
|
||||
@router.post("/api/v1/eh/rag-query")
|
||||
async def rag_query_eh(data: EHRAGQuery, request: Request):
|
||||
"""
|
||||
RAG query against teacher's Erwartungshorizonte.
|
||||
|
||||
1. Semantic search in Qdrant (tenant-isolated)
|
||||
2. Decrypt relevant chunks on-the-fly
|
||||
3. Return context for LLM usage
|
||||
"""
|
||||
user = get_current_user(request)
|
||||
tenant_id = user.get("tenant_id") or user.get("school_id") or user["user_id"]
|
||||
|
||||
if not OPENAI_API_KEY:
|
||||
raise HTTPException(status_code=500, detail="OpenAI API key not configured")
|
||||
|
||||
try:
|
||||
# Generate embedding for query
|
||||
query_embedding = await generate_single_embedding(data.query_text)
|
||||
|
||||
# Search in Qdrant (tenant-isolated)
|
||||
results = await search_eh(
|
||||
query_embedding=query_embedding,
|
||||
tenant_id=tenant_id,
|
||||
subject=data.subject,
|
||||
limit=data.limit
|
||||
)
|
||||
|
||||
# Decrypt matching chunks
|
||||
decrypted_chunks = []
|
||||
for r in results:
|
||||
eh = storage.eh_db.get(r["eh_id"])
|
||||
if eh and r.get("encrypted_content"):
|
||||
try:
|
||||
decrypted = decrypt_text(
|
||||
r["encrypted_content"],
|
||||
data.passphrase,
|
||||
eh.salt
|
||||
)
|
||||
decrypted_chunks.append({
|
||||
"text": decrypted,
|
||||
"eh_id": r["eh_id"],
|
||||
"eh_title": eh.title,
|
||||
"chunk_index": r["chunk_index"],
|
||||
"score": r["score"]
|
||||
})
|
||||
except EncryptionError:
|
||||
# Skip chunks that can't be decrypted (wrong passphrase for different EH)
|
||||
pass
|
||||
|
||||
# Audit log
|
||||
log_eh_audit(
|
||||
tenant_id=tenant_id,
|
||||
user_id=user["user_id"],
|
||||
action="rag_query",
|
||||
details={
|
||||
"query_length": len(data.query_text),
|
||||
"results_count": len(results),
|
||||
"decrypted_count": len(decrypted_chunks)
|
||||
},
|
||||
ip_address=request.client.host if request.client else None,
|
||||
user_agent=request.headers.get("user-agent")
|
||||
)
|
||||
|
||||
return {
|
||||
"context": "\n\n---\n\n".join([c["text"] for c in decrypted_chunks]),
|
||||
"sources": decrypted_chunks,
|
||||
"query": data.query_text
|
||||
}
|
||||
|
||||
except EmbeddingError as e:
|
||||
raise HTTPException(status_code=500, detail=f"Query embedding failed: {str(e)}")
|
||||
except Exception as e:
|
||||
raise HTTPException(status_code=500, detail=f"RAG query failed: {str(e)}")
|
||||
Reference in New Issue
Block a user