Restructure: Move 52 files into 7 domain packages
Some checks failed
CI / go-lint (push) Has been skipped
CI / python-lint (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / test-go-school (push) Successful in 28s
CI / test-go-edu-search (push) Successful in 28s
CI / test-python-klausur (push) Failing after 2m22s
CI / test-python-agent-core (push) Successful in 21s
CI / test-nodejs-website (push) Successful in 23s

korrektur/ zeugnis/ admin/ compliance/ worksheet/ training/ metrics/
52 shims, relative imports, RAG untouched.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
Benjamin Admin
2026-04-25 22:10:48 +02:00
parent 0504d22b8e
commit 165c493d1e
111 changed files with 11859 additions and 11609 deletions

View File

@@ -0,0 +1,6 @@
"""
admin package — admin APIs for NiBiS, RAG, templates.
Backward-compatible re-exports: consumers can still use
``from admin_api import ...`` etc. via the shim files in backend/.
"""

View File

@@ -0,0 +1,33 @@
"""
Admin API for NiBiS Data Management (barrel re-export)
This module was split into:
- admin_nibis.py (NiBiS ingestion, search, stats)
- admin_rag.py (RAG upload, metrics, storage)
- admin_templates.py (Legal templates ingestion, search)
The `router` object is assembled here by including all sub-routers.
Importers that did `from admin_api import router` continue to work.
"""
from fastapi import APIRouter
from .nibis import router as _nibis_router
from .rag import router as _rag_router
from .templates import router as _templates_router
# Re-export internal state for test importers
from .nibis import ( # noqa: F401
_ingestion_status,
NiBiSSearchRequest,
search_nibis,
)
from .rag import _upload_history # noqa: F401
from .templates import _templates_ingestion_status # noqa: F401
# Assemble the combined router.
# All sub-routers use prefix="/api/v1/admin", so include without extra prefix.
router = APIRouter()
router.include_router(_nibis_router)
router.include_router(_rag_router)
router.include_router(_templates_router)

View File

@@ -0,0 +1,316 @@
"""
Admin API - NiBiS Ingestion & Search
Endpoints for NiBiS data discovery, ingestion, search, and statistics.
Extracted from admin_api.py for file-size compliance.
"""
from fastapi import APIRouter, HTTPException, BackgroundTasks, Query
from pydantic import BaseModel
from typing import Optional, List, Dict
from datetime import datetime
from nibis_ingestion import (
run_ingestion,
discover_documents,
extract_zip_files,
DOCS_BASE_PATH,
)
from qdrant_service import QdrantService, search_nibis_eh, get_qdrant_client
from eh_pipeline import generate_single_embedding
router = APIRouter(prefix="/api/v1/admin", tags=["Admin"])
# Store for background task status
_ingestion_status: Dict = {
"running": False,
"last_run": None,
"last_result": None,
}
# =============================================================================
# Models
# =============================================================================
class IngestionRequest(BaseModel):
ewh_only: bool = True
year_filter: Optional[int] = None
subject_filter: Optional[str] = None
class IngestionStatus(BaseModel):
running: bool
last_run: Optional[str]
documents_indexed: Optional[int]
chunks_created: Optional[int]
errors: Optional[List[str]]
class NiBiSSearchRequest(BaseModel):
query: str
year: Optional[int] = None
subject: Optional[str] = None
niveau: Optional[str] = None
limit: int = 5
class NiBiSSearchResult(BaseModel):
id: str
score: float
text: str
year: Optional[int]
subject: Optional[str]
niveau: Optional[str]
task_number: Optional[int]
class DataSourceStats(BaseModel):
source_dir: str
year: int
document_count: int
subjects: List[str]
# =============================================================================
# Endpoints
# =============================================================================
@router.get("/nibis/status", response_model=IngestionStatus)
async def get_ingestion_status():
"""Get status of NiBiS ingestion pipeline."""
last_result = _ingestion_status.get("last_result") or {}
return IngestionStatus(
running=_ingestion_status["running"],
last_run=_ingestion_status.get("last_run"),
documents_indexed=last_result.get("documents_indexed"),
chunks_created=last_result.get("chunks_created"),
errors=(last_result.get("errors") or [])[:10],
)
@router.post("/nibis/extract-zips")
async def extract_zip_files_endpoint():
"""Extract all ZIP files in za-download directories."""
try:
extracted = extract_zip_files(DOCS_BASE_PATH)
return {
"status": "success",
"extracted_count": len(extracted),
"directories": [str(d) for d in extracted],
}
except Exception as e:
raise HTTPException(status_code=500, detail=str(e))
@router.get("/nibis/discover")
async def discover_nibis_documents(
ewh_only: bool = Query(True, description="Only return Erwartungshorizonte"),
year: Optional[int] = Query(None, description="Filter by year"),
subject: Optional[str] = Query(None, description="Filter by subject"),
):
"""
Discover available NiBiS documents without indexing.
Useful for previewing what will be indexed.
"""
try:
documents = discover_documents(DOCS_BASE_PATH, ewh_only=ewh_only)
# Apply filters
if year:
documents = [d for d in documents if d.year == year]
if subject:
documents = [d for d in documents if subject.lower() in d.subject.lower()]
# Group by year and subject
by_year: Dict[int, int] = {}
by_subject: Dict[str, int] = {}
for doc in documents:
by_year[doc.year] = by_year.get(doc.year, 0) + 1
by_subject[doc.subject] = by_subject.get(doc.subject, 0) + 1
return {
"total_documents": len(documents),
"by_year": dict(sorted(by_year.items())),
"by_subject": dict(sorted(by_subject.items(), key=lambda x: -x[1])),
"sample_documents": [
{
"id": d.id,
"filename": d.raw_filename,
"year": d.year,
"subject": d.subject,
"niveau": d.niveau,
"doc_type": d.doc_type,
}
for d in documents[:20]
],
}
except Exception as e:
raise HTTPException(status_code=500, detail=str(e))
@router.post("/nibis/ingest")
async def start_ingestion(
request: IngestionRequest,
background_tasks: BackgroundTasks,
):
"""
Start NiBiS data ingestion in background.
"""
if _ingestion_status["running"]:
raise HTTPException(
status_code=409,
detail="Ingestion already running. Check /nibis/status for progress."
)
async def run_ingestion_task():
global _ingestion_status
_ingestion_status["running"] = True
_ingestion_status["last_run"] = datetime.now().isoformat()
try:
result = await run_ingestion(
ewh_only=request.ewh_only,
dry_run=False,
year_filter=request.year_filter,
subject_filter=request.subject_filter,
)
_ingestion_status["last_result"] = result
except Exception as e:
_ingestion_status["last_result"] = {"error": str(e), "errors": [str(e)]}
finally:
_ingestion_status["running"] = False
background_tasks.add_task(run_ingestion_task)
return {
"status": "started",
"message": "Ingestion started in background. Check /nibis/status for progress.",
"filters": {
"ewh_only": request.ewh_only,
"year": request.year_filter,
"subject": request.subject_filter,
},
}
@router.post("/nibis/search", response_model=List[NiBiSSearchResult])
async def search_nibis(request: NiBiSSearchRequest):
"""
Semantic search in NiBiS Erwartungshorizonte.
"""
try:
query_embedding = await generate_single_embedding(request.query)
if not query_embedding:
raise HTTPException(status_code=500, detail="Failed to generate embedding")
results = await search_nibis_eh(
query_embedding=query_embedding,
year=request.year,
subject=request.subject,
niveau=request.niveau,
limit=request.limit,
)
return [
NiBiSSearchResult(
id=r["id"],
score=r["score"],
text=r.get("text", "")[:500],
year=r.get("year"),
subject=r.get("subject"),
niveau=r.get("niveau"),
task_number=r.get("task_number"),
)
for r in results
]
except Exception as e:
raise HTTPException(status_code=500, detail=str(e))
@router.get("/nibis/collections")
async def get_collections_info():
"""Get information about all Qdrant collections."""
try:
client = get_qdrant_client()
collections = client.get_collections().collections
result = []
for c in collections:
try:
info = client.get_collection(c.name)
result.append({
"name": c.name,
"vectors_count": info.vectors_count,
"points_count": info.points_count,
"status": info.status.value,
})
except Exception as e:
result.append({
"name": c.name,
"error": str(e),
})
return {"collections": result}
except Exception as e:
raise HTTPException(status_code=500, detail=str(e))
@router.get("/nibis/stats")
async def get_nibis_stats():
"""Get detailed statistics about indexed NiBiS data."""
try:
qdrant = QdrantService()
stats = await qdrant.get_stats("bp_nibis_eh")
if "error" in stats:
return {
"indexed": False,
"message": "NiBiS collection not yet created. Run ingestion first.",
}
client = get_qdrant_client()
scroll_result = client.scroll(
collection_name="bp_nibis_eh",
limit=1000,
with_payload=True,
with_vectors=False,
)
years = set()
subjects = set()
niveaus = set()
for point in scroll_result[0]:
if point.payload:
if "year" in point.payload:
years.add(point.payload["year"])
if "subject" in point.payload:
subjects.add(point.payload["subject"])
if "niveau" in point.payload:
niveaus.add(point.payload["niveau"])
return {
"indexed": True,
"total_chunks": stats.get("points_count", 0),
"years": sorted(list(years)),
"subjects": sorted(list(subjects)),
"niveaus": sorted(list(niveaus)),
}
except Exception as e:
return {
"indexed": False,
"error": str(e),
}
@router.delete("/nibis/collection")
async def delete_nibis_collection():
"""Delete the entire NiBiS collection. WARNING: removes all indexed data!"""
try:
client = get_qdrant_client()
client.delete_collection("bp_nibis_eh")
return {"status": "deleted", "collection": "bp_nibis_eh"}
except Exception as e:
raise HTTPException(status_code=500, detail=str(e))

View File

@@ -0,0 +1,281 @@
"""
Admin API - RAG Upload & Metrics
Endpoints for uploading documents, tracking uploads, RAG metrics,
search feedback, storage stats, and service initialization.
Extracted from admin_api.py for file-size compliance.
"""
from fastapi import APIRouter, HTTPException, BackgroundTasks, Query, UploadFile, File, Form
from pydantic import BaseModel
from typing import Optional, List, Dict
from datetime import datetime
from pathlib import Path
import zipfile
import tempfile
import os
from nibis_ingestion import run_ingestion, DOCS_BASE_PATH
# Import ingestion status from nibis module for auto-ingest
from .nibis import _ingestion_status
# Optional: MinIO and PostgreSQL integrations
try:
from minio_storage import upload_rag_document, get_storage_stats, init_minio_bucket
MINIO_AVAILABLE = True
except ImportError:
MINIO_AVAILABLE = False
try:
from metrics_db import (
init_metrics_tables, store_feedback, log_search, log_upload,
calculate_metrics, get_recent_feedback, get_upload_history
)
METRICS_DB_AVAILABLE = True
except ImportError:
METRICS_DB_AVAILABLE = False
router = APIRouter(prefix="/api/v1/admin", tags=["Admin"])
# Upload directory configuration
RAG_UPLOAD_BASE = Path(os.getenv("RAG_UPLOAD_BASE", str(DOCS_BASE_PATH)))
# Store for upload tracking
_upload_history: List[Dict] = []
class UploadResult(BaseModel):
status: str
files_received: int
pdfs_extracted: int
target_directory: str
errors: List[str]
@router.post("/rag/upload", response_model=UploadResult)
async def upload_rag_documents(
background_tasks: BackgroundTasks,
file: UploadFile = File(...),
collection: str = Form(default="bp_nibis_eh"),
year: Optional[int] = Form(default=None),
auto_ingest: bool = Form(default=False),
):
"""
Upload documents for RAG indexing.
Supports:
- ZIP archives (automatically extracted)
- Individual PDF files
"""
errors = []
pdfs_extracted = 0
# Determine target year
target_year = year or datetime.now().year
# Target directory: za-download/YYYY/
target_dir = RAG_UPLOAD_BASE / "za-download" / str(target_year)
target_dir.mkdir(parents=True, exist_ok=True)
try:
filename = file.filename or "upload"
if filename.lower().endswith(".zip"):
# Handle ZIP file
with tempfile.NamedTemporaryFile(delete=False, suffix=".zip") as tmp:
content = await file.read()
tmp.write(content)
tmp_path = tmp.name
try:
with zipfile.ZipFile(tmp_path, 'r') as zf:
for member in zf.namelist():
if member.lower().endswith(".pdf") and not member.startswith("__MACOSX"):
pdf_name = Path(member).name
if pdf_name:
target_path = target_dir / pdf_name
with zf.open(member) as src:
with open(target_path, 'wb') as dst:
dst.write(src.read())
pdfs_extracted += 1
finally:
os.unlink(tmp_path)
elif filename.lower().endswith(".pdf"):
target_path = target_dir / filename
content = await file.read()
with open(target_path, 'wb') as f:
f.write(content)
pdfs_extracted = 1
else:
raise HTTPException(
status_code=400,
detail=f"Unsupported file type: {filename}. Only .zip and .pdf are allowed."
)
# Track upload in memory
upload_record = {
"timestamp": datetime.now().isoformat(),
"filename": filename,
"collection": collection,
"year": target_year,
"pdfs_extracted": pdfs_extracted,
"target_directory": str(target_dir),
}
_upload_history.append(upload_record)
# Keep only last 100 uploads in memory
if len(_upload_history) > 100:
_upload_history.pop(0)
# Store in PostgreSQL if available
if METRICS_DB_AVAILABLE:
await log_upload(
filename=filename,
collection_name=collection,
year=target_year,
pdfs_extracted=pdfs_extracted,
minio_path=str(target_dir),
)
# Auto-ingest if requested
if auto_ingest and not _ingestion_status["running"]:
async def run_auto_ingest():
global _ingestion_status
_ingestion_status["running"] = True
_ingestion_status["last_run"] = datetime.now().isoformat()
try:
result = await run_ingestion(
ewh_only=True,
dry_run=False,
year_filter=target_year,
)
_ingestion_status["last_result"] = result
except Exception as e:
_ingestion_status["last_result"] = {"error": str(e), "errors": [str(e)]}
finally:
_ingestion_status["running"] = False
background_tasks.add_task(run_auto_ingest)
return UploadResult(
status="success",
files_received=1,
pdfs_extracted=pdfs_extracted,
target_directory=str(target_dir),
errors=errors,
)
except HTTPException:
raise
except Exception as e:
errors.append(str(e))
raise HTTPException(status_code=500, detail=str(e))
@router.get("/rag/upload/history")
async def get_upload_history_endpoint(limit: int = Query(default=20, le=100)):
"""Get recent upload history."""
return {
"uploads": _upload_history[-limit:][::-1],
"total": len(_upload_history),
}
@router.get("/rag/metrics")
async def get_rag_metrics(
collection: Optional[str] = Query(default=None),
days: int = Query(default=7, le=90),
):
"""Get RAG quality metrics."""
if METRICS_DB_AVAILABLE:
metrics = await calculate_metrics(collection_name=collection, days=days)
if metrics.get("connected"):
return metrics
# Fallback: Return placeholder metrics
return {
"precision_at_5": 0.78,
"recall_at_10": 0.85,
"mrr": 0.72,
"avg_latency_ms": 52,
"total_ratings": len(_upload_history),
"error_rate": 0.3,
"score_distribution": {
"0.9+": 23,
"0.7-0.9": 41,
"0.5-0.7": 28,
"<0.5": 8,
},
"note": "Placeholder metrics - PostgreSQL not connected",
"connected": False,
}
@router.post("/rag/search/feedback")
async def submit_search_feedback(
result_id: str = Form(...),
rating: int = Form(..., ge=1, le=5),
notes: Optional[str] = Form(default=None),
query: Optional[str] = Form(default=None),
collection: Optional[str] = Form(default=None),
score: Optional[float] = Form(default=None),
):
"""Submit feedback for a search result."""
feedback_record = {
"timestamp": datetime.now().isoformat(),
"result_id": result_id,
"rating": rating,
"notes": notes,
}
stored = False
if METRICS_DB_AVAILABLE:
stored = await store_feedback(
result_id=result_id,
rating=rating,
query_text=query,
collection_name=collection,
score=score,
notes=notes,
)
return {
"status": "stored" if stored else "received",
"feedback": feedback_record,
"persisted": stored,
}
@router.get("/rag/storage/stats")
async def get_storage_statistics():
"""Get MinIO storage statistics."""
if MINIO_AVAILABLE:
stats = await get_storage_stats()
return stats
return {
"error": "MinIO not available",
"connected": False,
}
@router.post("/rag/init")
async def initialize_rag_services():
"""Initialize RAG services (MinIO bucket, PostgreSQL tables)."""
results = {
"minio": False,
"postgres": False,
}
if MINIO_AVAILABLE:
results["minio"] = await init_minio_bucket()
if METRICS_DB_AVAILABLE:
results["postgres"] = await init_metrics_tables()
return {
"status": "initialized",
"services": results,
}

View File

@@ -0,0 +1,389 @@
"""
Admin API - Legal Templates
Endpoints for legal template ingestion, search, source management,
license info, and collection management.
Extracted from admin_api.py for file-size compliance.
"""
from fastapi import APIRouter, HTTPException, BackgroundTasks, Query
from pydantic import BaseModel
from typing import Optional, List, Dict
from datetime import datetime
from eh_pipeline import generate_single_embedding
# Import legal templates modules
try:
from legal_templates_ingestion import (
LegalTemplatesIngestion,
LEGAL_TEMPLATES_COLLECTION,
)
from template_sources import (
TEMPLATE_SOURCES,
TEMPLATE_TYPES,
JURISDICTIONS,
LicenseType,
get_enabled_sources,
get_sources_by_priority,
)
from qdrant_service import (
search_legal_templates,
get_legal_templates_stats,
init_legal_templates_collection,
)
LEGAL_TEMPLATES_AVAILABLE = True
except ImportError as e:
print(f"Legal templates module not available: {e}")
LEGAL_TEMPLATES_AVAILABLE = False
router = APIRouter(prefix="/api/v1/admin", tags=["Admin"])
# Store for templates ingestion status
_templates_ingestion_status: Dict = {
"running": False,
"last_run": None,
"current_source": None,
"results": {},
}
class TemplatesSearchRequest(BaseModel):
query: str
template_type: Optional[str] = None
license_types: Optional[List[str]] = None
language: Optional[str] = None
jurisdiction: Optional[str] = None
attribution_required: Optional[bool] = None
limit: int = 10
class TemplatesSearchResult(BaseModel):
id: str
score: float
text: str
document_title: Optional[str]
template_type: Optional[str]
clause_category: Optional[str]
language: Optional[str]
jurisdiction: Optional[str]
license_id: Optional[str]
license_name: Optional[str]
attribution_required: Optional[bool]
attribution_text: Optional[str]
source_name: Optional[str]
source_url: Optional[str]
placeholders: Optional[List[str]]
is_complete_document: Optional[bool]
requires_customization: Optional[bool]
class SourceIngestRequest(BaseModel):
source_name: str
@router.get("/templates/status")
async def get_templates_status():
"""Get status of legal templates collection and ingestion."""
if not LEGAL_TEMPLATES_AVAILABLE:
return {
"available": False,
"error": "Legal templates module not available",
}
try:
stats = await get_legal_templates_stats()
return {
"available": True,
"collection": LEGAL_TEMPLATES_COLLECTION,
"ingestion": {
"running": _templates_ingestion_status["running"],
"last_run": _templates_ingestion_status.get("last_run"),
"current_source": _templates_ingestion_status.get("current_source"),
"results": _templates_ingestion_status.get("results", {}),
},
"stats": stats,
}
except Exception as e:
return {
"available": True,
"error": str(e),
"ingestion": _templates_ingestion_status,
}
@router.get("/templates/sources")
async def get_templates_sources():
"""Get list of all template sources with their configuration."""
if not LEGAL_TEMPLATES_AVAILABLE:
raise HTTPException(status_code=503, detail="Legal templates module not available")
sources = []
for source in TEMPLATE_SOURCES:
sources.append({
"name": source.name,
"description": source.description,
"license_type": source.license_type.value,
"license_name": source.license_info.name,
"template_types": source.template_types,
"languages": source.languages,
"jurisdiction": source.jurisdiction,
"repo_url": source.repo_url,
"web_url": source.web_url,
"priority": source.priority,
"enabled": source.enabled,
"attribution_required": source.license_info.attribution_required,
})
return {
"sources": sources,
"total": len(sources),
"enabled": len([s for s in TEMPLATE_SOURCES if s.enabled]),
"template_types": TEMPLATE_TYPES,
"jurisdictions": JURISDICTIONS,
}
@router.get("/templates/licenses")
async def get_templates_licenses():
"""Get license statistics for indexed templates."""
if not LEGAL_TEMPLATES_AVAILABLE:
raise HTTPException(status_code=503, detail="Legal templates module not available")
try:
stats = await get_legal_templates_stats()
return {
"licenses": stats.get("licenses", {}),
"total_chunks": stats.get("points_count", 0),
}
except Exception as e:
raise HTTPException(status_code=500, detail=str(e))
@router.post("/templates/ingest")
async def start_templates_ingestion(
background_tasks: BackgroundTasks,
max_priority: int = Query(default=3, ge=1, le=5, description="Maximum priority level (1=highest)"),
):
"""
Start legal templates ingestion in background.
Ingests all enabled sources up to the specified priority level.
"""
if not LEGAL_TEMPLATES_AVAILABLE:
raise HTTPException(status_code=503, detail="Legal templates module not available")
if _templates_ingestion_status["running"]:
raise HTTPException(
status_code=409,
detail="Templates ingestion already running. Check /templates/status for progress."
)
async def run_templates_ingestion():
global _templates_ingestion_status
_templates_ingestion_status["running"] = True
_templates_ingestion_status["last_run"] = datetime.now().isoformat()
_templates_ingestion_status["results"] = {}
try:
ingestion = LegalTemplatesIngestion()
sources = get_sources_by_priority(max_priority)
for source in sources:
_templates_ingestion_status["current_source"] = source.name
try:
status = await ingestion.ingest_source(source)
_templates_ingestion_status["results"][source.name] = {
"status": status.status,
"documents_found": status.documents_found,
"chunks_indexed": status.chunks_indexed,
"errors": status.errors[:5] if status.errors else [],
}
except Exception as e:
_templates_ingestion_status["results"][source.name] = {
"status": "failed",
"error": str(e),
}
await ingestion.close()
except Exception as e:
_templates_ingestion_status["results"]["_global_error"] = str(e)
finally:
_templates_ingestion_status["running"] = False
_templates_ingestion_status["current_source"] = None
background_tasks.add_task(run_templates_ingestion)
sources = get_sources_by_priority(max_priority)
return {
"status": "started",
"message": f"Ingesting {len(sources)} sources up to priority {max_priority}",
"sources": [s.name for s in sources],
}
@router.post("/templates/ingest-source")
async def ingest_single_source(
request: SourceIngestRequest,
background_tasks: BackgroundTasks,
):
"""Ingest a single template source by name."""
if not LEGAL_TEMPLATES_AVAILABLE:
raise HTTPException(status_code=503, detail="Legal templates module not available")
source = next((s for s in TEMPLATE_SOURCES if s.name == request.source_name), None)
if not source:
raise HTTPException(
status_code=404,
detail=f"Source not found: {request.source_name}. Use /templates/sources to list available sources."
)
if not source.enabled:
raise HTTPException(
status_code=400,
detail=f"Source is disabled: {request.source_name}"
)
if _templates_ingestion_status["running"]:
raise HTTPException(
status_code=409,
detail="Templates ingestion already running."
)
async def run_single_ingestion():
global _templates_ingestion_status
_templates_ingestion_status["running"] = True
_templates_ingestion_status["current_source"] = source.name
_templates_ingestion_status["last_run"] = datetime.now().isoformat()
try:
ingestion = LegalTemplatesIngestion()
status = await ingestion.ingest_source(source)
_templates_ingestion_status["results"][source.name] = {
"status": status.status,
"documents_found": status.documents_found,
"chunks_indexed": status.chunks_indexed,
"errors": status.errors[:5] if status.errors else [],
}
await ingestion.close()
except Exception as e:
_templates_ingestion_status["results"][source.name] = {
"status": "failed",
"error": str(e),
}
finally:
_templates_ingestion_status["running"] = False
_templates_ingestion_status["current_source"] = None
background_tasks.add_task(run_single_ingestion)
return {
"status": "started",
"source": source.name,
"license": source.license_type.value,
"template_types": source.template_types,
}
@router.post("/templates/search", response_model=List[TemplatesSearchResult])
async def search_templates(request: TemplatesSearchRequest):
"""Semantic search in legal templates collection."""
if not LEGAL_TEMPLATES_AVAILABLE:
raise HTTPException(status_code=503, detail="Legal templates module not available")
try:
query_embedding = await generate_single_embedding(request.query)
if not query_embedding:
raise HTTPException(status_code=500, detail="Failed to generate embedding")
results = await search_legal_templates(
query_embedding=query_embedding,
template_type=request.template_type,
license_types=request.license_types,
language=request.language,
jurisdiction=request.jurisdiction,
attribution_required=request.attribution_required,
limit=request.limit,
)
return [
TemplatesSearchResult(
id=r["id"],
score=r["score"],
text=r.get("text", "")[:1000],
document_title=r.get("document_title"),
template_type=r.get("template_type"),
clause_category=r.get("clause_category"),
language=r.get("language"),
jurisdiction=r.get("jurisdiction"),
license_id=r.get("license_id"),
license_name=r.get("license_name"),
attribution_required=r.get("attribution_required"),
attribution_text=r.get("attribution_text"),
source_name=r.get("source_name"),
source_url=r.get("source_url"),
placeholders=r.get("placeholders"),
is_complete_document=r.get("is_complete_document"),
requires_customization=r.get("requires_customization"),
)
for r in results
]
except HTTPException:
raise
except Exception as e:
raise HTTPException(status_code=500, detail=str(e))
@router.delete("/templates/reset")
async def reset_templates_collection():
"""Delete and recreate the legal templates collection."""
if not LEGAL_TEMPLATES_AVAILABLE:
raise HTTPException(status_code=503, detail="Legal templates module not available")
if _templates_ingestion_status["running"]:
raise HTTPException(
status_code=409,
detail="Cannot reset while ingestion is running"
)
try:
ingestion = LegalTemplatesIngestion()
ingestion.reset_collection()
await ingestion.close()
_templates_ingestion_status["results"] = {}
return {
"status": "reset",
"collection": LEGAL_TEMPLATES_COLLECTION,
"message": "Collection deleted and recreated. Run ingestion to populate.",
}
except Exception as e:
raise HTTPException(status_code=500, detail=str(e))
@router.delete("/templates/source/{source_name}")
async def delete_templates_source(source_name: str):
"""Delete all templates from a specific source."""
if not LEGAL_TEMPLATES_AVAILABLE:
raise HTTPException(status_code=503, detail="Legal templates module not available")
try:
from qdrant_service import delete_legal_templates_by_source
count = await delete_legal_templates_by_source(source_name)
if source_name in _templates_ingestion_status.get("results", {}):
del _templates_ingestion_status["results"][source_name]
return {
"status": "deleted",
"source": source_name,
"chunks_deleted": count,
}
except Exception as e:
raise HTTPException(status_code=500, detail=str(e))