Files
breakpilot-compliance/backend-compliance/compliance/api/source_policy_router.py
Benjamin Admin a228b3b528
All checks were successful
CI / go-lint (push) Has been skipped
CI / python-lint (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / test-go-ai-compliance (push) Successful in 34s
CI / test-python-backend-compliance (push) Successful in 32s
CI / test-python-document-crawler (push) Successful in 23s
CI / test-python-dsms-gateway (push) Successful in 18s
feat: add RAG corpus versioning and source policy backend
Part 1 — RAG Corpus Versioning:
- New DB table compliance_corpus_versions (migration 017)
- Go CorpusVersionStore with CRUD operations
- Assessment struct extended with corpus_version_id
- API endpoints: GET /rag/corpus-status, /rag/corpus-versions/:collection
- RAG routes (search, regulations) now registered in main.go
- Ingestion script registers corpus versions after each run
- Frontend staleness badge in SDK sidebar

Part 3 — Source Policy Backend:
- New FastAPI router with CRUD for allowed sources, PII rules,
  operations matrix, audit trail, stats, and compliance report
- SQLAlchemy models for all source policy tables (migration 001)
- Frontend API base corrected from edu-search:8088/8089 to
  backend-compliance:8002/api

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-02 07:58:08 +01:00

504 lines
16 KiB
Python

"""
Source Policy Router — Manages allowed compliance data sources.
Controls which legal sources the RAG corpus may use,
operations matrix, PII rules, and provides audit trail.
Endpoints:
GET /api/v1/admin/sources — List all sources
POST /api/v1/admin/sources — Add new source
GET /api/v1/admin/sources/{id} — Get source by ID
PUT /api/v1/admin/sources/{id} — Update source
DELETE /api/v1/admin/sources/{id} — Remove source
GET /api/v1/admin/operations-matrix — Operations matrix
PUT /api/v1/admin/operations/{id} — Update operation
GET /api/v1/admin/pii-rules — List PII rules
POST /api/v1/admin/pii-rules — Create PII rule
PUT /api/v1/admin/pii-rules/{id} — Update PII rule
DELETE /api/v1/admin/pii-rules/{id} — Delete PII rule
GET /api/v1/admin/policy-audit — Audit trail
GET /api/v1/admin/policy-stats — Dashboard statistics
GET /api/v1/admin/compliance-report — Compliance report
"""
import uuid
from datetime import datetime
from typing import Optional, List
from fastapi import APIRouter, HTTPException, Depends, Query
from pydantic import BaseModel, Field
from sqlalchemy.orm import Session
from database import get_db
from compliance.db.source_policy_models import (
AllowedSourceDB,
SourceOperationDB,
PIIRuleDB,
SourcePolicyAuditDB,
)
router = APIRouter(prefix="/v1/admin", tags=["source-policy"])
# =============================================================================
# Pydantic Schemas
# =============================================================================
class SourceCreate(BaseModel):
domain: str
name: str
description: Optional[str] = None
license: Optional[str] = None
legal_basis: Optional[str] = None
trust_boost: float = Field(default=0.5, ge=0.0, le=1.0)
source_type: str = "legal"
active: bool = True
metadata: Optional[dict] = None
class SourceUpdate(BaseModel):
domain: Optional[str] = None
name: Optional[str] = None
description: Optional[str] = None
license: Optional[str] = None
legal_basis: Optional[str] = None
trust_boost: Optional[float] = Field(default=None, ge=0.0, le=1.0)
source_type: Optional[str] = None
active: Optional[bool] = None
metadata: Optional[dict] = None
class SourceResponse(BaseModel):
id: str
domain: str
name: str
description: Optional[str] = None
license: Optional[str] = None
legal_basis: Optional[str] = None
trust_boost: float
source_type: str
active: bool
metadata: Optional[dict] = None
created_at: str
updated_at: Optional[str] = None
class Config:
from_attributes = True
class OperationUpdate(BaseModel):
allowed: bool
conditions: Optional[str] = None
class PIIRuleCreate(BaseModel):
name: str
description: Optional[str] = None
pattern: Optional[str] = None
category: str
action: str = "mask"
active: bool = True
class PIIRuleUpdate(BaseModel):
name: Optional[str] = None
description: Optional[str] = None
pattern: Optional[str] = None
category: Optional[str] = None
action: Optional[str] = None
active: Optional[bool] = None
# =============================================================================
# Helper: Audit logging
# =============================================================================
def _log_audit(db: Session, action: str, entity_type: str, entity_id, old_values=None, new_values=None):
audit = SourcePolicyAuditDB(
action=action,
entity_type=entity_type,
entity_id=entity_id,
old_values=old_values,
new_values=new_values,
user_id="system",
)
db.add(audit)
def _source_to_dict(source: AllowedSourceDB) -> dict:
return {
"id": str(source.id),
"domain": source.domain,
"name": source.name,
"description": source.description,
"license": source.license,
"legal_basis": source.legal_basis,
"trust_boost": source.trust_boost,
"source_type": source.source_type,
"active": source.active,
}
# =============================================================================
# Sources CRUD
# =============================================================================
@router.get("/sources")
async def list_sources(
active_only: bool = Query(False),
db: Session = Depends(get_db),
):
"""List all allowed sources."""
query = db.query(AllowedSourceDB)
if active_only:
query = query.filter(AllowedSourceDB.active == True)
sources = query.order_by(AllowedSourceDB.name).all()
return {
"sources": [
{
"id": str(s.id),
"domain": s.domain,
"name": s.name,
"description": s.description,
"license": s.license,
"legal_basis": s.legal_basis,
"trust_boost": s.trust_boost,
"source_type": s.source_type,
"active": s.active,
"metadata": s.metadata_,
"created_at": s.created_at.isoformat() if s.created_at else None,
"updated_at": s.updated_at.isoformat() if s.updated_at else None,
}
for s in sources
],
"count": len(sources),
}
@router.post("/sources")
async def create_source(
data: SourceCreate,
db: Session = Depends(get_db),
):
"""Add a new allowed source."""
existing = db.query(AllowedSourceDB).filter(AllowedSourceDB.domain == data.domain).first()
if existing:
raise HTTPException(status_code=409, detail=f"Source with domain '{data.domain}' already exists")
source = AllowedSourceDB(
domain=data.domain,
name=data.name,
description=data.description,
license=data.license,
legal_basis=data.legal_basis,
trust_boost=data.trust_boost,
source_type=data.source_type,
active=data.active,
metadata_=data.metadata,
)
db.add(source)
_log_audit(db, "create", "source", source.id, new_values=_source_to_dict(source))
db.commit()
db.refresh(source)
return {
"id": str(source.id),
"domain": source.domain,
"name": source.name,
"created_at": source.created_at.isoformat(),
}
@router.get("/sources/{source_id}")
async def get_source(source_id: str, db: Session = Depends(get_db)):
"""Get a specific source."""
source = db.query(AllowedSourceDB).filter(AllowedSourceDB.id == source_id).first()
if not source:
raise HTTPException(status_code=404, detail="Source not found")
return {
"id": str(source.id),
"domain": source.domain,
"name": source.name,
"description": source.description,
"license": source.license,
"legal_basis": source.legal_basis,
"trust_boost": source.trust_boost,
"source_type": source.source_type,
"active": source.active,
"metadata": source.metadata_,
"created_at": source.created_at.isoformat() if source.created_at else None,
"updated_at": source.updated_at.isoformat() if source.updated_at else None,
}
@router.put("/sources/{source_id}")
async def update_source(
source_id: str,
data: SourceUpdate,
db: Session = Depends(get_db),
):
"""Update an existing source."""
source = db.query(AllowedSourceDB).filter(AllowedSourceDB.id == source_id).first()
if not source:
raise HTTPException(status_code=404, detail="Source not found")
old_values = _source_to_dict(source)
update_data = data.model_dump(exclude_unset=True)
# Rename metadata to metadata_ for the DB column
if "metadata" in update_data:
update_data["metadata_"] = update_data.pop("metadata")
for key, value in update_data.items():
setattr(source, key, value)
_log_audit(db, "update", "source", source.id, old_values=old_values, new_values=update_data)
db.commit()
db.refresh(source)
return {"status": "updated", "id": str(source.id)}
@router.delete("/sources/{source_id}")
async def delete_source(source_id: str, db: Session = Depends(get_db)):
"""Remove an allowed source."""
source = db.query(AllowedSourceDB).filter(AllowedSourceDB.id == source_id).first()
if not source:
raise HTTPException(status_code=404, detail="Source not found")
old_values = _source_to_dict(source)
_log_audit(db, "delete", "source", source.id, old_values=old_values)
# Also delete associated operations
db.query(SourceOperationDB).filter(SourceOperationDB.source_id == source_id).delete()
db.delete(source)
db.commit()
return {"status": "deleted", "id": source_id}
# =============================================================================
# Operations Matrix
# =============================================================================
@router.get("/operations-matrix")
async def get_operations_matrix(db: Session = Depends(get_db)):
"""Get the full operations matrix."""
operations = db.query(SourceOperationDB).all()
return {
"operations": [
{
"id": str(op.id),
"source_id": str(op.source_id),
"operation": op.operation,
"allowed": op.allowed,
"conditions": op.conditions,
}
for op in operations
],
"count": len(operations),
}
@router.put("/operations/{operation_id}")
async def update_operation(
operation_id: str,
data: OperationUpdate,
db: Session = Depends(get_db),
):
"""Update an operation in the matrix."""
op = db.query(SourceOperationDB).filter(SourceOperationDB.id == operation_id).first()
if not op:
raise HTTPException(status_code=404, detail="Operation not found")
op.allowed = data.allowed
if data.conditions is not None:
op.conditions = data.conditions
_log_audit(db, "update", "operation", op.id, new_values={"allowed": data.allowed})
db.commit()
return {"status": "updated", "id": str(op.id)}
# =============================================================================
# PII Rules
# =============================================================================
@router.get("/pii-rules")
async def list_pii_rules(db: Session = Depends(get_db)):
"""List all PII rules."""
rules = db.query(PIIRuleDB).order_by(PIIRuleDB.category, PIIRuleDB.name).all()
return {
"rules": [
{
"id": str(r.id),
"name": r.name,
"description": r.description,
"pattern": r.pattern,
"category": r.category,
"action": r.action,
"active": r.active,
"created_at": r.created_at.isoformat() if r.created_at else None,
}
for r in rules
],
"count": len(rules),
}
@router.post("/pii-rules")
async def create_pii_rule(data: PIIRuleCreate, db: Session = Depends(get_db)):
"""Create a new PII rule."""
rule = PIIRuleDB(
name=data.name,
description=data.description,
pattern=data.pattern,
category=data.category,
action=data.action,
active=data.active,
)
db.add(rule)
_log_audit(db, "create", "pii_rule", rule.id, new_values={"name": data.name, "category": data.category})
db.commit()
db.refresh(rule)
return {"id": str(rule.id), "name": rule.name}
@router.put("/pii-rules/{rule_id}")
async def update_pii_rule(rule_id: str, data: PIIRuleUpdate, db: Session = Depends(get_db)):
"""Update a PII rule."""
rule = db.query(PIIRuleDB).filter(PIIRuleDB.id == rule_id).first()
if not rule:
raise HTTPException(status_code=404, detail="PII rule not found")
update_data = data.model_dump(exclude_unset=True)
for key, value in update_data.items():
setattr(rule, key, value)
_log_audit(db, "update", "pii_rule", rule.id, new_values=update_data)
db.commit()
return {"status": "updated", "id": str(rule.id)}
@router.delete("/pii-rules/{rule_id}")
async def delete_pii_rule(rule_id: str, db: Session = Depends(get_db)):
"""Delete a PII rule."""
rule = db.query(PIIRuleDB).filter(PIIRuleDB.id == rule_id).first()
if not rule:
raise HTTPException(status_code=404, detail="PII rule not found")
_log_audit(db, "delete", "pii_rule", rule.id, old_values={"name": rule.name, "category": rule.category})
db.delete(rule)
db.commit()
return {"status": "deleted", "id": rule_id}
# =============================================================================
# Audit Trail
# =============================================================================
@router.get("/policy-audit")
async def get_policy_audit(
limit: int = Query(50, ge=1, le=500),
offset: int = Query(0, ge=0),
entity_type: Optional[str] = None,
db: Session = Depends(get_db),
):
"""Get the audit trail for source policy changes."""
query = db.query(SourcePolicyAuditDB)
if entity_type:
query = query.filter(SourcePolicyAuditDB.entity_type == entity_type)
total = query.count()
entries = query.order_by(SourcePolicyAuditDB.created_at.desc()).offset(offset).limit(limit).all()
return {
"entries": [
{
"id": str(e.id),
"action": e.action,
"entity_type": e.entity_type,
"entity_id": str(e.entity_id) if e.entity_id else None,
"old_values": e.old_values,
"new_values": e.new_values,
"user_id": e.user_id,
"created_at": e.created_at.isoformat() if e.created_at else None,
}
for e in entries
],
"total": total,
"limit": limit,
"offset": offset,
}
# =============================================================================
# Dashboard Statistics
# =============================================================================
@router.get("/policy-stats")
async def get_policy_stats(db: Session = Depends(get_db)):
"""Get dashboard statistics for source policy."""
total_sources = db.query(AllowedSourceDB).count()
active_sources = db.query(AllowedSourceDB).filter(AllowedSourceDB.active == True).count()
pii_rules = db.query(PIIRuleDB).filter(PIIRuleDB.active == True).count()
# Count audit entries from today
today_start = datetime.utcnow().replace(hour=0, minute=0, second=0, microsecond=0)
blocked_today = db.query(SourcePolicyAuditDB).filter(
SourcePolicyAuditDB.action == "delete",
SourcePolicyAuditDB.created_at >= today_start,
).count()
blocked_total = db.query(SourcePolicyAuditDB).filter(
SourcePolicyAuditDB.action == "delete",
).count()
return {
"active_policies": active_sources,
"allowed_sources": total_sources,
"pii_rules": pii_rules,
"blocked_today": blocked_today,
"blocked_total": blocked_total,
}
@router.get("/compliance-report")
async def get_compliance_report(db: Session = Depends(get_db)):
"""Generate a compliance report for source policies."""
sources = db.query(AllowedSourceDB).filter(AllowedSourceDB.active == True).all()
pii_rules = db.query(PIIRuleDB).filter(PIIRuleDB.active == True).all()
return {
"report_date": datetime.utcnow().isoformat(),
"summary": {
"active_sources": len(sources),
"active_pii_rules": len(pii_rules),
"source_types": list(set(s.source_type for s in sources)),
"licenses": list(set(s.license for s in sources if s.license)),
},
"sources": [
{
"domain": s.domain,
"name": s.name,
"license": s.license,
"legal_basis": s.legal_basis,
"trust_boost": s.trust_boost,
}
for s in sources
],
"pii_rules": [
{
"name": r.name,
"category": r.category,
"action": r.action,
}
for r in pii_rules
],
}