Initial commit: breakpilot-core - Shared Infrastructure
Docker Compose with 24+ services: - PostgreSQL (PostGIS), Valkey, MinIO, Qdrant - Vault (PKI/TLS), Nginx (Reverse Proxy) - Backend Core API, Consent Service, Billing Service - RAG Service, Embedding Service - Gitea, Woodpecker CI/CD - Night Scheduler, Health Aggregator - Jitsi (Web/XMPP/JVB/Jicofo), Mailpit Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
11
rag-service/api/__init__.py
Normal file
11
rag-service/api/__init__.py
Normal file
@@ -0,0 +1,11 @@
|
||||
from fastapi import APIRouter
|
||||
|
||||
from api.collections import router as collections_router
|
||||
from api.documents import router as documents_router
|
||||
from api.search import router as search_router
|
||||
|
||||
router = APIRouter()
|
||||
|
||||
router.include_router(collections_router, tags=["Collections"])
|
||||
router.include_router(documents_router, tags=["Documents"])
|
||||
router.include_router(search_router, tags=["Search"])
|
||||
46
rag-service/api/auth.py
Normal file
46
rag-service/api/auth.py
Normal file
@@ -0,0 +1,46 @@
|
||||
"""Optional JWT authentication helper.
|
||||
|
||||
If JWT_SECRET is configured and an Authorization header is present, the token
|
||||
is verified. If no header is present or JWT_SECRET is empty, the request is
|
||||
allowed through (public access).
|
||||
"""
|
||||
|
||||
import logging
|
||||
from typing import Optional
|
||||
|
||||
from fastapi import HTTPException, Request
|
||||
from jose import JWTError, jwt
|
||||
|
||||
from config import settings
|
||||
|
||||
logger = logging.getLogger("rag-service.auth")
|
||||
|
||||
|
||||
def optional_jwt_auth(request: Request) -> Optional[dict]:
|
||||
"""
|
||||
Validate the JWT from the Authorization header if present.
|
||||
|
||||
Returns the decoded token payload, or None if no auth was provided.
|
||||
Raises HTTPException 401 if a token IS provided but is invalid.
|
||||
"""
|
||||
auth_header: Optional[str] = request.headers.get("authorization")
|
||||
|
||||
if not auth_header:
|
||||
return None
|
||||
|
||||
if not settings.JWT_SECRET:
|
||||
# No secret configured -- skip validation
|
||||
return None
|
||||
|
||||
# Expect "Bearer <token>"
|
||||
parts = auth_header.split()
|
||||
if len(parts) != 2 or parts[0].lower() != "bearer":
|
||||
raise HTTPException(status_code=401, detail="Invalid Authorization header format")
|
||||
|
||||
token = parts[1]
|
||||
try:
|
||||
payload = jwt.decode(token, settings.JWT_SECRET, algorithms=["HS256"])
|
||||
return payload
|
||||
except JWTError as exc:
|
||||
logger.warning("JWT verification failed: %s", exc)
|
||||
raise HTTPException(status_code=401, detail="Invalid or expired token")
|
||||
77
rag-service/api/collections.py
Normal file
77
rag-service/api/collections.py
Normal file
@@ -0,0 +1,77 @@
|
||||
import logging
|
||||
from typing import Optional
|
||||
|
||||
from fastapi import APIRouter, HTTPException, Request
|
||||
from pydantic import BaseModel
|
||||
|
||||
from api.auth import optional_jwt_auth
|
||||
from qdrant_client_wrapper import qdrant_wrapper, ALL_DEFAULT_COLLECTIONS
|
||||
|
||||
logger = logging.getLogger("rag-service.api.collections")
|
||||
|
||||
router = APIRouter(prefix="/api/v1/collections")
|
||||
|
||||
|
||||
# ---- Request / Response models --------------------------------------------
|
||||
|
||||
class CreateCollectionRequest(BaseModel):
|
||||
name: str
|
||||
vector_size: int = 1536
|
||||
|
||||
|
||||
class CollectionInfoResponse(BaseModel):
|
||||
name: str
|
||||
vectors_count: Optional[int] = None
|
||||
points_count: Optional[int] = None
|
||||
status: Optional[str] = None
|
||||
vector_size: Optional[int] = None
|
||||
|
||||
|
||||
# ---- Endpoints ------------------------------------------------------------
|
||||
|
||||
@router.post("", status_code=201)
|
||||
async def create_collection(body: CreateCollectionRequest, request: Request):
|
||||
"""Create a new Qdrant collection."""
|
||||
optional_jwt_auth(request)
|
||||
try:
|
||||
created = await qdrant_wrapper.create_collection(body.name, body.vector_size)
|
||||
return {
|
||||
"collection": body.name,
|
||||
"vector_size": body.vector_size,
|
||||
"created": created,
|
||||
}
|
||||
except Exception as exc:
|
||||
logger.error("Failed to create collection '%s': %s", body.name, exc)
|
||||
raise HTTPException(status_code=500, detail=str(exc))
|
||||
|
||||
|
||||
@router.get("")
|
||||
async def list_collections(request: Request):
|
||||
"""List all Qdrant collections."""
|
||||
optional_jwt_auth(request)
|
||||
try:
|
||||
result = qdrant_wrapper.client.get_collections()
|
||||
names = [c.name for c in result.collections]
|
||||
return {"collections": names, "count": len(names)}
|
||||
except Exception as exc:
|
||||
logger.error("Failed to list collections: %s", exc)
|
||||
raise HTTPException(status_code=500, detail=str(exc))
|
||||
|
||||
|
||||
@router.get("/defaults")
|
||||
async def list_default_collections(request: Request):
|
||||
"""Return the pre-configured default collections and their dimensions."""
|
||||
optional_jwt_auth(request)
|
||||
return {"defaults": ALL_DEFAULT_COLLECTIONS}
|
||||
|
||||
|
||||
@router.get("/{collection_name}")
|
||||
async def get_collection_info(collection_name: str, request: Request):
|
||||
"""Get stats for a single collection."""
|
||||
optional_jwt_auth(request)
|
||||
try:
|
||||
info = await qdrant_wrapper.get_collection_info(collection_name)
|
||||
return info
|
||||
except Exception as exc:
|
||||
logger.error("Failed to get collection info for '%s': %s", collection_name, exc)
|
||||
raise HTTPException(status_code=404, detail=f"Collection '{collection_name}' not found or error: {exc}")
|
||||
246
rag-service/api/documents.py
Normal file
246
rag-service/api/documents.py
Normal file
@@ -0,0 +1,246 @@
|
||||
import logging
|
||||
import uuid
|
||||
from typing import Optional
|
||||
|
||||
from fastapi import APIRouter, File, Form, HTTPException, Request, UploadFile
|
||||
from pydantic import BaseModel
|
||||
|
||||
from api.auth import optional_jwt_auth
|
||||
from embedding_client import embedding_client
|
||||
from minio_client_wrapper import minio_wrapper
|
||||
from qdrant_client_wrapper import qdrant_wrapper
|
||||
|
||||
logger = logging.getLogger("rag-service.api.documents")
|
||||
|
||||
router = APIRouter(prefix="/api/v1/documents")
|
||||
|
||||
|
||||
# ---- Request / Response models --------------------------------------------
|
||||
|
||||
class DocumentUploadResponse(BaseModel):
|
||||
document_id: str
|
||||
object_name: str
|
||||
chunks_count: int
|
||||
vectors_indexed: int
|
||||
collection: str
|
||||
|
||||
|
||||
class DocumentDeleteRequest(BaseModel):
|
||||
object_name: str
|
||||
collection: str
|
||||
|
||||
|
||||
# ---- Endpoints ------------------------------------------------------------
|
||||
|
||||
@router.post("/upload", response_model=DocumentUploadResponse)
|
||||
async def upload_document(
|
||||
request: Request,
|
||||
file: UploadFile = File(...),
|
||||
collection: str = Form(default="bp_eh"),
|
||||
data_type: str = Form(default="eh"),
|
||||
bundesland: str = Form(default="niedersachsen"),
|
||||
use_case: str = Form(default="general"),
|
||||
year: str = Form(default="2024"),
|
||||
chunk_strategy: str = Form(default="recursive"),
|
||||
chunk_size: int = Form(default=512),
|
||||
chunk_overlap: int = Form(default=50),
|
||||
metadata_json: Optional[str] = Form(default=None),
|
||||
):
|
||||
"""
|
||||
Upload a document:
|
||||
1. Store original file in MinIO
|
||||
2. Extract text (if PDF) via embedding-service
|
||||
3. Chunk the text via embedding-service
|
||||
4. Generate embeddings for each chunk
|
||||
5. Index chunks + embeddings in Qdrant
|
||||
"""
|
||||
optional_jwt_auth(request)
|
||||
|
||||
document_id = str(uuid.uuid4())
|
||||
|
||||
# --- Read file bytes ---
|
||||
try:
|
||||
file_bytes = await file.read()
|
||||
except Exception as exc:
|
||||
raise HTTPException(status_code=400, detail=f"Could not read uploaded file: {exc}")
|
||||
|
||||
if len(file_bytes) == 0:
|
||||
raise HTTPException(status_code=400, detail="Uploaded file is empty")
|
||||
|
||||
filename = file.filename or f"{document_id}.bin"
|
||||
content_type = file.content_type or "application/octet-stream"
|
||||
|
||||
# --- Store in MinIO ---
|
||||
object_name = minio_wrapper.get_minio_path(
|
||||
data_type=data_type,
|
||||
bundesland=bundesland,
|
||||
use_case=use_case,
|
||||
year=year,
|
||||
filename=filename,
|
||||
)
|
||||
|
||||
try:
|
||||
minio_meta = {
|
||||
"document_id": document_id,
|
||||
"original_filename": filename,
|
||||
}
|
||||
await minio_wrapper.upload_document(
|
||||
object_name=object_name,
|
||||
data=file_bytes,
|
||||
content_type=content_type,
|
||||
metadata=minio_meta,
|
||||
)
|
||||
except Exception as exc:
|
||||
logger.error("MinIO upload failed: %s", exc)
|
||||
raise HTTPException(status_code=500, detail=f"Failed to store file in MinIO: {exc}")
|
||||
|
||||
# --- Extract text ---
|
||||
try:
|
||||
if content_type == "application/pdf" or filename.lower().endswith(".pdf"):
|
||||
text = await embedding_client.extract_pdf(file_bytes)
|
||||
else:
|
||||
# Try to decode as text
|
||||
text = file_bytes.decode("utf-8", errors="replace")
|
||||
except Exception as exc:
|
||||
logger.error("Text extraction failed: %s", exc)
|
||||
raise HTTPException(status_code=500, detail=f"Text extraction failed: {exc}")
|
||||
|
||||
if not text or not text.strip():
|
||||
raise HTTPException(status_code=400, detail="Could not extract any text from the document")
|
||||
|
||||
# --- Chunk ---
|
||||
try:
|
||||
chunks = await embedding_client.chunk_text(
|
||||
text=text,
|
||||
strategy=chunk_strategy,
|
||||
chunk_size=chunk_size,
|
||||
overlap=chunk_overlap,
|
||||
)
|
||||
except Exception as exc:
|
||||
logger.error("Chunking failed: %s", exc)
|
||||
raise HTTPException(status_code=500, detail=f"Chunking failed: {exc}")
|
||||
|
||||
if not chunks:
|
||||
raise HTTPException(status_code=400, detail="Chunking produced zero chunks")
|
||||
|
||||
# --- Embed ---
|
||||
try:
|
||||
embeddings = await embedding_client.generate_embeddings(chunks)
|
||||
except Exception as exc:
|
||||
logger.error("Embedding generation failed: %s", exc)
|
||||
raise HTTPException(status_code=500, detail=f"Embedding generation failed: {exc}")
|
||||
|
||||
# --- Parse extra metadata ---
|
||||
extra_metadata: dict = {}
|
||||
if metadata_json:
|
||||
import json
|
||||
try:
|
||||
extra_metadata = json.loads(metadata_json)
|
||||
except json.JSONDecodeError:
|
||||
logger.warning("Invalid metadata_json, ignoring")
|
||||
|
||||
# --- Build payloads ---
|
||||
payloads = []
|
||||
for i, chunk in enumerate(chunks):
|
||||
payload = {
|
||||
"document_id": document_id,
|
||||
"object_name": object_name,
|
||||
"filename": filename,
|
||||
"chunk_index": i,
|
||||
"chunk_text": chunk,
|
||||
"data_type": data_type,
|
||||
"bundesland": bundesland,
|
||||
"use_case": use_case,
|
||||
"year": year,
|
||||
**extra_metadata,
|
||||
}
|
||||
payloads.append(payload)
|
||||
|
||||
# --- Index in Qdrant ---
|
||||
try:
|
||||
indexed = await qdrant_wrapper.index_documents(
|
||||
collection=collection,
|
||||
vectors=embeddings,
|
||||
payloads=payloads,
|
||||
)
|
||||
except Exception as exc:
|
||||
logger.error("Qdrant indexing failed: %s", exc)
|
||||
raise HTTPException(status_code=500, detail=f"Qdrant indexing failed: {exc}")
|
||||
|
||||
return DocumentUploadResponse(
|
||||
document_id=document_id,
|
||||
object_name=object_name,
|
||||
chunks_count=len(chunks),
|
||||
vectors_indexed=indexed,
|
||||
collection=collection,
|
||||
)
|
||||
|
||||
|
||||
@router.delete("")
|
||||
async def delete_document(body: DocumentDeleteRequest, request: Request):
|
||||
"""Delete a document from both MinIO and Qdrant."""
|
||||
optional_jwt_auth(request)
|
||||
|
||||
errors: list[str] = []
|
||||
|
||||
# Delete from MinIO
|
||||
try:
|
||||
await minio_wrapper.delete_document(body.object_name)
|
||||
except Exception as exc:
|
||||
errors.append(f"MinIO delete failed: {exc}")
|
||||
|
||||
# Delete vectors from Qdrant
|
||||
try:
|
||||
await qdrant_wrapper.delete_by_filter(
|
||||
collection=body.collection,
|
||||
filter_conditions={"object_name": body.object_name},
|
||||
)
|
||||
except Exception as exc:
|
||||
errors.append(f"Qdrant delete failed: {exc}")
|
||||
|
||||
if errors:
|
||||
return {"deleted": False, "errors": errors}
|
||||
|
||||
return {"deleted": True, "object_name": body.object_name, "collection": body.collection}
|
||||
|
||||
|
||||
@router.get("/list")
|
||||
async def list_documents(
|
||||
request: Request,
|
||||
prefix: Optional[str] = None,
|
||||
):
|
||||
"""List documents stored in MinIO."""
|
||||
optional_jwt_auth(request)
|
||||
try:
|
||||
docs = await minio_wrapper.list_documents(prefix=prefix)
|
||||
return {"documents": docs, "count": len(docs)}
|
||||
except Exception as exc:
|
||||
logger.error("Failed to list documents: %s", exc)
|
||||
raise HTTPException(status_code=500, detail=str(exc))
|
||||
|
||||
|
||||
@router.get("/download/{object_name:path}")
|
||||
async def download_document(object_name: str, request: Request):
|
||||
"""Get a presigned download URL for a document."""
|
||||
optional_jwt_auth(request)
|
||||
try:
|
||||
url = await minio_wrapper.get_presigned_url(object_name)
|
||||
return {"url": url, "object_name": object_name}
|
||||
except Exception as exc:
|
||||
logger.error("Failed to generate presigned URL for '%s': %s", object_name, exc)
|
||||
raise HTTPException(status_code=404, detail=f"Document not found: {exc}")
|
||||
|
||||
|
||||
@router.get("/stats")
|
||||
async def storage_stats(
|
||||
request: Request,
|
||||
prefix: Optional[str] = None,
|
||||
):
|
||||
"""Get storage stats (size, count) for a given prefix."""
|
||||
optional_jwt_auth(request)
|
||||
try:
|
||||
stats = await minio_wrapper.get_storage_stats(prefix=prefix)
|
||||
return stats
|
||||
except Exception as exc:
|
||||
logger.error("Failed to get storage stats: %s", exc)
|
||||
raise HTTPException(status_code=500, detail=str(exc))
|
||||
200
rag-service/api/search.py
Normal file
200
rag-service/api/search.py
Normal file
@@ -0,0 +1,200 @@
|
||||
import logging
|
||||
from typing import Any, Optional
|
||||
|
||||
from fastapi import APIRouter, HTTPException, Request
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
from api.auth import optional_jwt_auth
|
||||
from embedding_client import embedding_client
|
||||
from qdrant_client_wrapper import qdrant_wrapper
|
||||
|
||||
logger = logging.getLogger("rag-service.api.search")
|
||||
|
||||
router = APIRouter(prefix="/api/v1")
|
||||
|
||||
|
||||
# ---- Request / Response models --------------------------------------------
|
||||
|
||||
class SemanticSearchRequest(BaseModel):
|
||||
query: str
|
||||
collection: str = "bp_eh"
|
||||
limit: int = Field(default=10, ge=1, le=100)
|
||||
filters: Optional[dict[str, Any]] = None
|
||||
score_threshold: Optional[float] = None
|
||||
|
||||
|
||||
class HybridSearchRequest(BaseModel):
|
||||
query: str
|
||||
collection: str = "bp_eh"
|
||||
limit: int = Field(default=10, ge=1, le=100)
|
||||
filters: Optional[dict[str, Any]] = None
|
||||
score_threshold: Optional[float] = None
|
||||
keyword_boost: float = Field(default=0.3, ge=0.0, le=1.0)
|
||||
rerank: bool = True
|
||||
rerank_top_k: int = Field(default=10, ge=1, le=50)
|
||||
|
||||
|
||||
class RerankRequest(BaseModel):
|
||||
query: str
|
||||
documents: list[str]
|
||||
top_k: int = Field(default=10, ge=1, le=100)
|
||||
|
||||
|
||||
class SearchResult(BaseModel):
|
||||
id: str
|
||||
score: float
|
||||
payload: dict[str, Any] = {}
|
||||
|
||||
|
||||
class SearchResponse(BaseModel):
|
||||
results: list[SearchResult]
|
||||
count: int
|
||||
query: str
|
||||
collection: str
|
||||
|
||||
|
||||
# ---- Endpoints ------------------------------------------------------------
|
||||
|
||||
@router.post("/search", response_model=SearchResponse)
|
||||
async def semantic_search(body: SemanticSearchRequest, request: Request):
|
||||
"""
|
||||
Pure semantic (vector) search.
|
||||
Embeds the query, then searches Qdrant for nearest neighbours.
|
||||
"""
|
||||
optional_jwt_auth(request)
|
||||
|
||||
# Generate query embedding
|
||||
try:
|
||||
query_vector = await embedding_client.generate_single_embedding(body.query)
|
||||
except Exception as exc:
|
||||
logger.error("Failed to embed query: %s", exc)
|
||||
raise HTTPException(status_code=502, detail=f"Embedding service error: {exc}")
|
||||
|
||||
# Search Qdrant
|
||||
try:
|
||||
results = await qdrant_wrapper.search(
|
||||
collection=body.collection,
|
||||
query_vector=query_vector,
|
||||
limit=body.limit,
|
||||
filters=body.filters,
|
||||
score_threshold=body.score_threshold,
|
||||
)
|
||||
except Exception as exc:
|
||||
logger.error("Qdrant search failed: %s", exc)
|
||||
raise HTTPException(status_code=500, detail=f"Vector search failed: {exc}")
|
||||
|
||||
return SearchResponse(
|
||||
results=[SearchResult(**r) for r in results],
|
||||
count=len(results),
|
||||
query=body.query,
|
||||
collection=body.collection,
|
||||
)
|
||||
|
||||
|
||||
@router.post("/search/hybrid", response_model=SearchResponse)
|
||||
async def hybrid_search(body: HybridSearchRequest, request: Request):
|
||||
"""
|
||||
Hybrid search: vector search + keyword filtering + optional re-ranking.
|
||||
|
||||
1. Embed query and do vector search with a higher initial limit
|
||||
2. Apply keyword matching on chunk_text to boost relevant results
|
||||
3. Optionally re-rank the top results via the embedding service
|
||||
"""
|
||||
optional_jwt_auth(request)
|
||||
|
||||
# --- Step 1: Vector search (fetch more than needed for re-ranking) ---
|
||||
fetch_limit = max(body.limit * 3, 30)
|
||||
|
||||
try:
|
||||
query_vector = await embedding_client.generate_single_embedding(body.query)
|
||||
except Exception as exc:
|
||||
logger.error("Failed to embed query: %s", exc)
|
||||
raise HTTPException(status_code=502, detail=f"Embedding service error: {exc}")
|
||||
|
||||
try:
|
||||
vector_results = await qdrant_wrapper.search(
|
||||
collection=body.collection,
|
||||
query_vector=query_vector,
|
||||
limit=fetch_limit,
|
||||
filters=body.filters,
|
||||
score_threshold=body.score_threshold,
|
||||
)
|
||||
except Exception as exc:
|
||||
logger.error("Qdrant search failed: %s", exc)
|
||||
raise HTTPException(status_code=500, detail=f"Vector search failed: {exc}")
|
||||
|
||||
if not vector_results:
|
||||
return SearchResponse(
|
||||
results=[],
|
||||
count=0,
|
||||
query=body.query,
|
||||
collection=body.collection,
|
||||
)
|
||||
|
||||
# --- Step 2: Keyword boost ---
|
||||
query_terms = body.query.lower().split()
|
||||
for result in vector_results:
|
||||
chunk_text = result.get("payload", {}).get("chunk_text", "").lower()
|
||||
keyword_hits = sum(1 for term in query_terms if term in chunk_text)
|
||||
keyword_score = (keyword_hits / max(len(query_terms), 1)) * body.keyword_boost
|
||||
result["score"] = result["score"] + keyword_score
|
||||
|
||||
# Sort by boosted score
|
||||
vector_results.sort(key=lambda x: x["score"], reverse=True)
|
||||
|
||||
# --- Step 3: Optional re-ranking ---
|
||||
if body.rerank and len(vector_results) > 1:
|
||||
try:
|
||||
documents = [
|
||||
r.get("payload", {}).get("chunk_text", "")
|
||||
for r in vector_results[: body.rerank_top_k]
|
||||
]
|
||||
reranked = await embedding_client.rerank_documents(
|
||||
query=body.query,
|
||||
documents=documents,
|
||||
top_k=body.limit,
|
||||
)
|
||||
# Rebuild results in re-ranked order
|
||||
reranked_results = []
|
||||
for item in reranked:
|
||||
idx = item.get("index", 0)
|
||||
if idx < len(vector_results):
|
||||
entry = vector_results[idx].copy()
|
||||
entry["score"] = item.get("score", entry["score"])
|
||||
reranked_results.append(entry)
|
||||
vector_results = reranked_results
|
||||
except Exception as exc:
|
||||
logger.warning("Re-ranking failed, falling back to vector+keyword scores: %s", exc)
|
||||
|
||||
# Trim to requested limit
|
||||
final_results = vector_results[: body.limit]
|
||||
|
||||
return SearchResponse(
|
||||
results=[SearchResult(**r) for r in final_results],
|
||||
count=len(final_results),
|
||||
query=body.query,
|
||||
collection=body.collection,
|
||||
)
|
||||
|
||||
|
||||
@router.post("/rerank")
|
||||
async def rerank(body: RerankRequest, request: Request):
|
||||
"""
|
||||
Standalone re-ranking endpoint.
|
||||
Sends query + documents to the embedding service for re-ranking.
|
||||
"""
|
||||
optional_jwt_auth(request)
|
||||
|
||||
if not body.documents:
|
||||
return {"results": [], "count": 0}
|
||||
|
||||
try:
|
||||
results = await embedding_client.rerank_documents(
|
||||
query=body.query,
|
||||
documents=body.documents,
|
||||
top_k=body.top_k,
|
||||
)
|
||||
return {"results": results, "count": len(results), "query": body.query}
|
||||
except Exception as exc:
|
||||
logger.error("Re-ranking failed: %s", exc)
|
||||
raise HTTPException(status_code=502, detail=f"Re-ranking failed: {exc}")
|
||||
Reference in New Issue
Block a user