""" Admin API - Legal Templates Endpoints for legal template ingestion, search, source management, license info, and collection management. Extracted from admin_api.py for file-size compliance. """ from fastapi import APIRouter, HTTPException, BackgroundTasks, Query from pydantic import BaseModel from typing import Optional, List, Dict from datetime import datetime from eh_pipeline import generate_single_embedding # Import legal templates modules try: from legal_templates_ingestion import ( LegalTemplatesIngestion, LEGAL_TEMPLATES_COLLECTION, ) from template_sources import ( TEMPLATE_SOURCES, TEMPLATE_TYPES, JURISDICTIONS, LicenseType, get_enabled_sources, get_sources_by_priority, ) from qdrant_service import ( search_legal_templates, get_legal_templates_stats, init_legal_templates_collection, ) LEGAL_TEMPLATES_AVAILABLE = True except ImportError as e: print(f"Legal templates module not available: {e}") LEGAL_TEMPLATES_AVAILABLE = False router = APIRouter(prefix="/api/v1/admin", tags=["Admin"]) # Store for templates ingestion status _templates_ingestion_status: Dict = { "running": False, "last_run": None, "current_source": None, "results": {}, } class TemplatesSearchRequest(BaseModel): query: str template_type: Optional[str] = None license_types: Optional[List[str]] = None language: Optional[str] = None jurisdiction: Optional[str] = None attribution_required: Optional[bool] = None limit: int = 10 class TemplatesSearchResult(BaseModel): id: str score: float text: str document_title: Optional[str] template_type: Optional[str] clause_category: Optional[str] language: Optional[str] jurisdiction: Optional[str] license_id: Optional[str] license_name: Optional[str] attribution_required: Optional[bool] attribution_text: Optional[str] source_name: Optional[str] source_url: Optional[str] placeholders: Optional[List[str]] is_complete_document: Optional[bool] requires_customization: Optional[bool] class SourceIngestRequest(BaseModel): source_name: str @router.get("/templates/status") async def get_templates_status(): """Get status of legal templates collection and ingestion.""" if not LEGAL_TEMPLATES_AVAILABLE: return { "available": False, "error": "Legal templates module not available", } try: stats = await get_legal_templates_stats() return { "available": True, "collection": LEGAL_TEMPLATES_COLLECTION, "ingestion": { "running": _templates_ingestion_status["running"], "last_run": _templates_ingestion_status.get("last_run"), "current_source": _templates_ingestion_status.get("current_source"), "results": _templates_ingestion_status.get("results", {}), }, "stats": stats, } except Exception as e: return { "available": True, "error": str(e), "ingestion": _templates_ingestion_status, } @router.get("/templates/sources") async def get_templates_sources(): """Get list of all template sources with their configuration.""" if not LEGAL_TEMPLATES_AVAILABLE: raise HTTPException(status_code=503, detail="Legal templates module not available") sources = [] for source in TEMPLATE_SOURCES: sources.append({ "name": source.name, "description": source.description, "license_type": source.license_type.value, "license_name": source.license_info.name, "template_types": source.template_types, "languages": source.languages, "jurisdiction": source.jurisdiction, "repo_url": source.repo_url, "web_url": source.web_url, "priority": source.priority, "enabled": source.enabled, "attribution_required": source.license_info.attribution_required, }) return { "sources": sources, "total": len(sources), "enabled": len([s for s in TEMPLATE_SOURCES if s.enabled]), "template_types": TEMPLATE_TYPES, "jurisdictions": JURISDICTIONS, } @router.get("/templates/licenses") async def get_templates_licenses(): """Get license statistics for indexed templates.""" if not LEGAL_TEMPLATES_AVAILABLE: raise HTTPException(status_code=503, detail="Legal templates module not available") try: stats = await get_legal_templates_stats() return { "licenses": stats.get("licenses", {}), "total_chunks": stats.get("points_count", 0), } except Exception as e: raise HTTPException(status_code=500, detail=str(e)) @router.post("/templates/ingest") async def start_templates_ingestion( background_tasks: BackgroundTasks, max_priority: int = Query(default=3, ge=1, le=5, description="Maximum priority level (1=highest)"), ): """ Start legal templates ingestion in background. Ingests all enabled sources up to the specified priority level. """ if not LEGAL_TEMPLATES_AVAILABLE: raise HTTPException(status_code=503, detail="Legal templates module not available") if _templates_ingestion_status["running"]: raise HTTPException( status_code=409, detail="Templates ingestion already running. Check /templates/status for progress." ) async def run_templates_ingestion(): global _templates_ingestion_status _templates_ingestion_status["running"] = True _templates_ingestion_status["last_run"] = datetime.now().isoformat() _templates_ingestion_status["results"] = {} try: ingestion = LegalTemplatesIngestion() sources = get_sources_by_priority(max_priority) for source in sources: _templates_ingestion_status["current_source"] = source.name try: status = await ingestion.ingest_source(source) _templates_ingestion_status["results"][source.name] = { "status": status.status, "documents_found": status.documents_found, "chunks_indexed": status.chunks_indexed, "errors": status.errors[:5] if status.errors else [], } except Exception as e: _templates_ingestion_status["results"][source.name] = { "status": "failed", "error": str(e), } await ingestion.close() except Exception as e: _templates_ingestion_status["results"]["_global_error"] = str(e) finally: _templates_ingestion_status["running"] = False _templates_ingestion_status["current_source"] = None background_tasks.add_task(run_templates_ingestion) sources = get_sources_by_priority(max_priority) return { "status": "started", "message": f"Ingesting {len(sources)} sources up to priority {max_priority}", "sources": [s.name for s in sources], } @router.post("/templates/ingest-source") async def ingest_single_source( request: SourceIngestRequest, background_tasks: BackgroundTasks, ): """Ingest a single template source by name.""" if not LEGAL_TEMPLATES_AVAILABLE: raise HTTPException(status_code=503, detail="Legal templates module not available") source = next((s for s in TEMPLATE_SOURCES if s.name == request.source_name), None) if not source: raise HTTPException( status_code=404, detail=f"Source not found: {request.source_name}. Use /templates/sources to list available sources." ) if not source.enabled: raise HTTPException( status_code=400, detail=f"Source is disabled: {request.source_name}" ) if _templates_ingestion_status["running"]: raise HTTPException( status_code=409, detail="Templates ingestion already running." ) async def run_single_ingestion(): global _templates_ingestion_status _templates_ingestion_status["running"] = True _templates_ingestion_status["current_source"] = source.name _templates_ingestion_status["last_run"] = datetime.now().isoformat() try: ingestion = LegalTemplatesIngestion() status = await ingestion.ingest_source(source) _templates_ingestion_status["results"][source.name] = { "status": status.status, "documents_found": status.documents_found, "chunks_indexed": status.chunks_indexed, "errors": status.errors[:5] if status.errors else [], } await ingestion.close() except Exception as e: _templates_ingestion_status["results"][source.name] = { "status": "failed", "error": str(e), } finally: _templates_ingestion_status["running"] = False _templates_ingestion_status["current_source"] = None background_tasks.add_task(run_single_ingestion) return { "status": "started", "source": source.name, "license": source.license_type.value, "template_types": source.template_types, } @router.post("/templates/search", response_model=List[TemplatesSearchResult]) async def search_templates(request: TemplatesSearchRequest): """Semantic search in legal templates collection.""" if not LEGAL_TEMPLATES_AVAILABLE: raise HTTPException(status_code=503, detail="Legal templates module not available") try: query_embedding = await generate_single_embedding(request.query) if not query_embedding: raise HTTPException(status_code=500, detail="Failed to generate embedding") results = await search_legal_templates( query_embedding=query_embedding, template_type=request.template_type, license_types=request.license_types, language=request.language, jurisdiction=request.jurisdiction, attribution_required=request.attribution_required, limit=request.limit, ) return [ TemplatesSearchResult( id=r["id"], score=r["score"], text=r.get("text", "")[:1000], document_title=r.get("document_title"), template_type=r.get("template_type"), clause_category=r.get("clause_category"), language=r.get("language"), jurisdiction=r.get("jurisdiction"), license_id=r.get("license_id"), license_name=r.get("license_name"), attribution_required=r.get("attribution_required"), attribution_text=r.get("attribution_text"), source_name=r.get("source_name"), source_url=r.get("source_url"), placeholders=r.get("placeholders"), is_complete_document=r.get("is_complete_document"), requires_customization=r.get("requires_customization"), ) for r in results ] except HTTPException: raise except Exception as e: raise HTTPException(status_code=500, detail=str(e)) @router.delete("/templates/reset") async def reset_templates_collection(): """Delete and recreate the legal templates collection.""" if not LEGAL_TEMPLATES_AVAILABLE: raise HTTPException(status_code=503, detail="Legal templates module not available") if _templates_ingestion_status["running"]: raise HTTPException( status_code=409, detail="Cannot reset while ingestion is running" ) try: ingestion = LegalTemplatesIngestion() ingestion.reset_collection() await ingestion.close() _templates_ingestion_status["results"] = {} return { "status": "reset", "collection": LEGAL_TEMPLATES_COLLECTION, "message": "Collection deleted and recreated. Run ingestion to populate.", } except Exception as e: raise HTTPException(status_code=500, detail=str(e)) @router.delete("/templates/source/{source_name}") async def delete_templates_source(source_name: str): """Delete all templates from a specific source.""" if not LEGAL_TEMPLATES_AVAILABLE: raise HTTPException(status_code=503, detail="Legal templates module not available") try: from qdrant_service import delete_legal_templates_by_source count = await delete_legal_templates_by_source(source_name) if source_name in _templates_ingestion_status.get("results", {}): del _templates_ingestion_status["results"][source_name] return { "status": "deleted", "source": source_name, "chunks_deleted": count, } except Exception as e: raise HTTPException(status_code=500, detail=str(e))