""" BreakPilot Compliance SDK - RAG Service Retrieval-Augmented Generation service for legal document search and Q&A. """ import os from contextlib import asynccontextmanager from fastapi import FastAPI, HTTPException, UploadFile, File from fastapi.middleware.cors import CORSMiddleware from pydantic import BaseModel from typing import List, Optional import structlog from rag.search import SearchService from rag.assistant import AssistantService from rag.documents import DocumentService from config import Settings # Configure logging structlog.configure( processors=[ structlog.processors.TimeStamper(fmt="iso"), structlog.processors.JSONRenderer() ] ) logger = structlog.get_logger() # Load settings settings = Settings() # Services search_service: SearchService = None assistant_service: AssistantService = None document_service: DocumentService = None @asynccontextmanager async def lifespan(app: FastAPI): """Application lifespan handler.""" global search_service, assistant_service, document_service logger.info("Starting RAG Service", version="0.0.1") # Initialize services search_service = SearchService(settings) assistant_service = AssistantService(settings) document_service = DocumentService(settings) # Initialize vector store with legal corpus await search_service.initialize() logger.info("RAG Service ready", regulations=len(search_service.regulations), total_chunks=search_service.total_chunks) yield logger.info("Shutting down RAG Service") app = FastAPI( title="BreakPilot RAG Service", description="Legal document search and Q&A service", version="0.0.1", lifespan=lifespan ) app.add_middleware( CORSMiddleware, allow_origins=["*"], allow_credentials=True, allow_methods=["*"], allow_headers=["*"], ) # ============================================================================= # Models # ============================================================================= class SearchRequest(BaseModel): query: str regulation_codes: Optional[List[str]] = None limit: int = 10 min_score: float = 0.7 class SearchResult(BaseModel): content: str regulation_code: str article: Optional[str] = None paragraph: Optional[str] = None score: float metadata: dict = {} class SearchResponse(BaseModel): query: str results: List[SearchResult] total: int class AskRequest(BaseModel): question: str context: Optional[str] = None regulation_codes: Optional[List[str]] = None include_citations: bool = True class Citation(BaseModel): regulation_code: str article: str text: str relevance: float class AskResponse(BaseModel): question: str answer: str citations: List[Citation] confidence: float class RegulationInfo(BaseModel): code: str name: str chunks: int last_updated: str # ============================================================================= # Endpoints # ============================================================================= @app.get("/health") async def health(): """Health check endpoint.""" return { "status": "healthy", "service": "rag-service", "version": "0.0.1", "regulations": len(search_service.regulations) if search_service else 0 } @app.post("/api/v1/search", response_model=SearchResponse) async def search(request: SearchRequest): """Perform semantic search across legal documents.""" try: results = await search_service.search( query=request.query, regulation_codes=request.regulation_codes, limit=request.limit, min_score=request.min_score ) return SearchResponse( query=request.query, results=[SearchResult(**r) for r in results], total=len(results) ) except Exception as e: logger.error("Search failed", error=str(e)) raise HTTPException(status_code=500, detail=str(e)) @app.post("/api/v1/ask", response_model=AskResponse) async def ask(request: AskRequest): """Ask a question about legal requirements.""" try: response = await assistant_service.ask( question=request.question, context=request.context, regulation_codes=request.regulation_codes, include_citations=request.include_citations ) return AskResponse( question=request.question, answer=response["answer"], citations=[Citation(**c) for c in response.get("citations", [])], confidence=response.get("confidence", 0.9) ) except Exception as e: logger.error("Ask failed", error=str(e)) raise HTTPException(status_code=500, detail=str(e)) @app.get("/api/v1/regulations", response_model=List[RegulationInfo]) async def get_regulations(): """Get list of available regulations.""" return search_service.get_regulations() @app.get("/api/v1/regulations/{code}") async def get_regulation(code: str): """Get details of a specific regulation.""" regulation = search_service.get_regulation(code) if not regulation: raise HTTPException(status_code=404, detail="Regulation not found") return regulation @app.post("/api/v1/documents") async def upload_document( file: UploadFile = File(...), regulation_code: Optional[str] = None ): """Upload a custom document for indexing.""" try: result = await document_service.process_upload( file=file, regulation_code=regulation_code ) return { "id": result["id"], "filename": file.filename, "chunks": result["chunks"], "status": "INDEXED" } except Exception as e: logger.error("Document upload failed", error=str(e)) raise HTTPException(status_code=500, detail=str(e)) @app.delete("/api/v1/documents/{document_id}") async def delete_document(document_id: str): """Delete a custom document.""" try: await document_service.delete(document_id) return {"status": "deleted", "id": document_id} except Exception as e: logger.error("Document deletion failed", error=str(e)) raise HTTPException(status_code=500, detail=str(e)) if __name__ == "__main__": import uvicorn uvicorn.run( "main:app", host="0.0.0.0", port=int(os.getenv("PORT", "8082")), reload=os.getenv("ENVIRONMENT") != "production" )