Files
breakpilot-compliance/backend-compliance/compliance/api/source_policy_router.py
Sharang Parnerkar cb90d0db0c chore(backend): deprecation sweep — Pydantic V1 -> V2, utcnow -> tz-aware
Two low-risk Pydantic V1 idioms that will be hard errors in V3:
  - Query(regex=...) -> Query(pattern=...) (audit_routes, control_generator_routes)
  - class Config: from_attributes=True -> model_config = ConfigDict(...)
    in source_policy_router.py (schemas.py is intentionally skipped — it is
    the Phase 1 schema-split target and the ConfigDict conversion is most
    efficient to do during that split).

Naive -> aware datetime sweep across 47 files:
  - datetime.utcnow() -> datetime.now(timezone.utc)
  - default=datetime.utcnow -> default=lambda: datetime.now(timezone.utc)
  - onupdate=datetime.utcnow -> onupdate=lambda: datetime.now(timezone.utc)

All SQLAlchemy DateTime columns in the project already declare
timezone=True, so the DB schema expects aware datetimes. Before this
commit, the in-Python side was generating naive values and the driver
was silently coercing them. This is a latent-bug fix, not a behavior
change at the DB boundary.

Verified:
  - 173/173 pytest compliance/tests/ pass (same as baseline)
  - tests/contracts/test_openapi_baseline.py passes (360 paths,
    484 operations unchanged)
  - DeprecationWarning count dropped from 158 -> 35

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-04-07 13:09:59 +02:00

581 lines
18 KiB
Python

"""
Source Policy Router — Manages allowed compliance data sources.
Controls which legal sources the RAG corpus may use,
operations matrix, PII rules, and provides audit trail.
Endpoints:
GET /api/v1/admin/sources — List all sources
POST /api/v1/admin/sources — Add new source
GET /api/v1/admin/sources/{id} — Get source by ID
PUT /api/v1/admin/sources/{id} — Update source
DELETE /api/v1/admin/sources/{id} — Remove source
GET /api/v1/admin/operations-matrix — Operations matrix
PUT /api/v1/admin/operations/{id} — Update operation
GET /api/v1/admin/pii-rules — List PII rules
POST /api/v1/admin/pii-rules — Create PII rule
PUT /api/v1/admin/pii-rules/{id} — Update PII rule
DELETE /api/v1/admin/pii-rules/{id} — Delete PII rule
GET /api/v1/admin/policy-audit — Audit trail
GET /api/v1/admin/policy-stats — Dashboard statistics
GET /api/v1/admin/compliance-report — Compliance report
"""
from datetime import datetime, timezone
from typing import Optional
from fastapi import APIRouter, HTTPException, Depends, Query
from pydantic import BaseModel, ConfigDict, Field
from sqlalchemy.orm import Session
from database import get_db
from compliance.db.source_policy_models import (
AllowedSourceDB,
BlockedContentDB,
SourceOperationDB,
PIIRuleDB,
SourcePolicyAuditDB,
)
router = APIRouter(prefix="/v1/admin", tags=["source-policy"])
# =============================================================================
# Pydantic Schemas
# =============================================================================
class SourceCreate(BaseModel):
domain: str
name: str
description: Optional[str] = None
license: Optional[str] = None
legal_basis: Optional[str] = None
trust_boost: float = Field(default=0.5, ge=0.0, le=1.0)
source_type: str = "legal"
active: bool = True
metadata: Optional[dict] = None
class SourceUpdate(BaseModel):
domain: Optional[str] = None
name: Optional[str] = None
description: Optional[str] = None
license: Optional[str] = None
legal_basis: Optional[str] = None
trust_boost: Optional[float] = Field(default=None, ge=0.0, le=1.0)
source_type: Optional[str] = None
active: Optional[bool] = None
metadata: Optional[dict] = None
class SourceResponse(BaseModel):
id: str
domain: str
name: str
description: Optional[str] = None
license: Optional[str] = None
legal_basis: Optional[str] = None
trust_boost: float
source_type: str
active: bool
metadata: Optional[dict] = None
created_at: str
updated_at: Optional[str] = None
model_config = ConfigDict(from_attributes=True)
class OperationUpdate(BaseModel):
allowed: bool
conditions: Optional[str] = None
class PIIRuleCreate(BaseModel):
name: str
description: Optional[str] = None
pattern: Optional[str] = None
category: str
action: str = "mask"
active: bool = True
class PIIRuleUpdate(BaseModel):
name: Optional[str] = None
description: Optional[str] = None
pattern: Optional[str] = None
category: Optional[str] = None
action: Optional[str] = None
active: Optional[bool] = None
# =============================================================================
# Helper: Audit logging
# =============================================================================
def _log_audit(db: Session, action: str, entity_type: str, entity_id, old_values=None, new_values=None):
audit = SourcePolicyAuditDB(
action=action,
entity_type=entity_type,
entity_id=entity_id,
old_values=old_values,
new_values=new_values,
user_id="system",
)
db.add(audit)
def _source_to_dict(source: AllowedSourceDB) -> dict:
return {
"id": str(source.id),
"domain": source.domain,
"name": source.name,
"description": source.description,
"license": source.license,
"legal_basis": source.legal_basis,
"trust_boost": source.trust_boost,
"source_type": source.source_type,
"active": source.active,
}
# =============================================================================
# Sources CRUD
# =============================================================================
@router.get("/sources")
async def list_sources(
active_only: bool = Query(False),
source_type: Optional[str] = Query(None),
license: Optional[str] = Query(None),
db: Session = Depends(get_db),
):
"""List all allowed sources with optional filters."""
query = db.query(AllowedSourceDB)
if active_only:
query = query.filter(AllowedSourceDB.active)
if source_type:
query = query.filter(AllowedSourceDB.source_type == source_type)
if license:
query = query.filter(AllowedSourceDB.license == license)
sources = query.order_by(AllowedSourceDB.name).all()
return {
"sources": [
{
"id": str(s.id),
"domain": s.domain,
"name": s.name,
"description": s.description,
"license": s.license,
"legal_basis": s.legal_basis,
"trust_boost": s.trust_boost,
"source_type": s.source_type,
"active": s.active,
"metadata": s.metadata_,
"created_at": s.created_at.isoformat() if s.created_at else None,
"updated_at": s.updated_at.isoformat() if s.updated_at else None,
}
for s in sources
],
"count": len(sources),
}
@router.post("/sources")
async def create_source(
data: SourceCreate,
db: Session = Depends(get_db),
):
"""Add a new allowed source."""
existing = db.query(AllowedSourceDB).filter(AllowedSourceDB.domain == data.domain).first()
if existing:
raise HTTPException(status_code=409, detail=f"Source with domain '{data.domain}' already exists")
source = AllowedSourceDB(
domain=data.domain,
name=data.name,
description=data.description,
license=data.license,
legal_basis=data.legal_basis,
trust_boost=data.trust_boost,
source_type=data.source_type,
active=data.active,
metadata_=data.metadata,
)
db.add(source)
_log_audit(db, "create", "source", source.id, new_values=_source_to_dict(source))
db.commit()
db.refresh(source)
return {
"id": str(source.id),
"domain": source.domain,
"name": source.name,
"created_at": source.created_at.isoformat(),
}
@router.get("/sources/{source_id}")
async def get_source(source_id: str, db: Session = Depends(get_db)):
"""Get a specific source."""
source = db.query(AllowedSourceDB).filter(AllowedSourceDB.id == source_id).first()
if not source:
raise HTTPException(status_code=404, detail="Source not found")
return {
"id": str(source.id),
"domain": source.domain,
"name": source.name,
"description": source.description,
"license": source.license,
"legal_basis": source.legal_basis,
"trust_boost": source.trust_boost,
"source_type": source.source_type,
"active": source.active,
"metadata": source.metadata_,
"created_at": source.created_at.isoformat() if source.created_at else None,
"updated_at": source.updated_at.isoformat() if source.updated_at else None,
}
@router.put("/sources/{source_id}")
async def update_source(
source_id: str,
data: SourceUpdate,
db: Session = Depends(get_db),
):
"""Update an existing source."""
source = db.query(AllowedSourceDB).filter(AllowedSourceDB.id == source_id).first()
if not source:
raise HTTPException(status_code=404, detail="Source not found")
old_values = _source_to_dict(source)
update_data = data.model_dump(exclude_unset=True)
# Rename metadata to metadata_ for the DB column
if "metadata" in update_data:
update_data["metadata_"] = update_data.pop("metadata")
for key, value in update_data.items():
setattr(source, key, value)
_log_audit(db, "update", "source", source.id, old_values=old_values, new_values=update_data)
db.commit()
db.refresh(source)
return {"status": "updated", "id": str(source.id)}
@router.delete("/sources/{source_id}")
async def delete_source(source_id: str, db: Session = Depends(get_db)):
"""Remove an allowed source."""
source = db.query(AllowedSourceDB).filter(AllowedSourceDB.id == source_id).first()
if not source:
raise HTTPException(status_code=404, detail="Source not found")
old_values = _source_to_dict(source)
_log_audit(db, "delete", "source", source.id, old_values=old_values)
# Also delete associated operations
db.query(SourceOperationDB).filter(SourceOperationDB.source_id == source_id).delete()
db.delete(source)
db.commit()
return {"status": "deleted", "id": source_id}
# =============================================================================
# Operations Matrix
# =============================================================================
@router.get("/operations-matrix")
async def get_operations_matrix(db: Session = Depends(get_db)):
"""Get the full operations matrix."""
operations = db.query(SourceOperationDB).all()
return {
"operations": [
{
"id": str(op.id),
"source_id": str(op.source_id),
"operation": op.operation,
"allowed": op.allowed,
"conditions": op.conditions,
}
for op in operations
],
"count": len(operations),
}
@router.put("/operations/{operation_id}")
async def update_operation(
operation_id: str,
data: OperationUpdate,
db: Session = Depends(get_db),
):
"""Update an operation in the matrix."""
op = db.query(SourceOperationDB).filter(SourceOperationDB.id == operation_id).first()
if not op:
raise HTTPException(status_code=404, detail="Operation not found")
op.allowed = data.allowed
if data.conditions is not None:
op.conditions = data.conditions
_log_audit(db, "update", "operation", op.id, new_values={"allowed": data.allowed})
db.commit()
return {"status": "updated", "id": str(op.id)}
# =============================================================================
# PII Rules
# =============================================================================
@router.get("/pii-rules")
async def list_pii_rules(
category: Optional[str] = Query(None),
db: Session = Depends(get_db),
):
"""List all PII rules with optional category filter."""
query = db.query(PIIRuleDB)
if category:
query = query.filter(PIIRuleDB.category == category)
rules = query.order_by(PIIRuleDB.category, PIIRuleDB.name).all()
return {
"rules": [
{
"id": str(r.id),
"name": r.name,
"description": r.description,
"pattern": r.pattern,
"category": r.category,
"action": r.action,
"active": r.active,
"created_at": r.created_at.isoformat() if r.created_at else None,
}
for r in rules
],
"count": len(rules),
}
@router.post("/pii-rules")
async def create_pii_rule(data: PIIRuleCreate, db: Session = Depends(get_db)):
"""Create a new PII rule."""
rule = PIIRuleDB(
name=data.name,
description=data.description,
pattern=data.pattern,
category=data.category,
action=data.action,
active=data.active,
)
db.add(rule)
_log_audit(db, "create", "pii_rule", rule.id, new_values={"name": data.name, "category": data.category})
db.commit()
db.refresh(rule)
return {"id": str(rule.id), "name": rule.name}
@router.put("/pii-rules/{rule_id}")
async def update_pii_rule(rule_id: str, data: PIIRuleUpdate, db: Session = Depends(get_db)):
"""Update a PII rule."""
rule = db.query(PIIRuleDB).filter(PIIRuleDB.id == rule_id).first()
if not rule:
raise HTTPException(status_code=404, detail="PII rule not found")
update_data = data.model_dump(exclude_unset=True)
for key, value in update_data.items():
setattr(rule, key, value)
_log_audit(db, "update", "pii_rule", rule.id, new_values=update_data)
db.commit()
return {"status": "updated", "id": str(rule.id)}
@router.delete("/pii-rules/{rule_id}")
async def delete_pii_rule(rule_id: str, db: Session = Depends(get_db)):
"""Delete a PII rule."""
rule = db.query(PIIRuleDB).filter(PIIRuleDB.id == rule_id).first()
if not rule:
raise HTTPException(status_code=404, detail="PII rule not found")
_log_audit(db, "delete", "pii_rule", rule.id, old_values={"name": rule.name, "category": rule.category})
db.delete(rule)
db.commit()
return {"status": "deleted", "id": rule_id}
# =============================================================================
# Blocked Content
# =============================================================================
@router.get("/blocked-content")
async def list_blocked_content(
limit: int = Query(50, ge=1, le=500),
offset: int = Query(0, ge=0),
domain: Optional[str] = None,
date_from: Optional[str] = Query(None, alias="from"),
date_to: Optional[str] = Query(None, alias="to"),
db: Session = Depends(get_db),
):
"""List blocked content entries."""
query = db.query(BlockedContentDB)
if domain:
query = query.filter(BlockedContentDB.domain == domain)
if date_from:
try:
from_dt = datetime.fromisoformat(date_from)
query = query.filter(BlockedContentDB.created_at >= from_dt)
except ValueError:
pass
if date_to:
try:
to_dt = datetime.fromisoformat(date_to)
query = query.filter(BlockedContentDB.created_at <= to_dt)
except ValueError:
pass
total = query.count()
entries = query.order_by(BlockedContentDB.created_at.desc()).offset(offset).limit(limit).all()
return {
"blocked": [
{
"id": str(e.id),
"url": e.url,
"domain": e.domain,
"block_reason": e.block_reason,
"rule_id": str(e.rule_id) if e.rule_id else None,
"details": e.details,
"created_at": e.created_at.isoformat() if e.created_at else None,
}
for e in entries
],
"total": total,
}
# =============================================================================
# Audit Trail
# =============================================================================
@router.get("/policy-audit")
async def get_policy_audit(
limit: int = Query(50, ge=1, le=500),
offset: int = Query(0, ge=0),
entity_type: Optional[str] = None,
date_from: Optional[str] = Query(None, alias="from"),
date_to: Optional[str] = Query(None, alias="to"),
db: Session = Depends(get_db),
):
"""Get the audit trail for source policy changes."""
query = db.query(SourcePolicyAuditDB)
if entity_type:
query = query.filter(SourcePolicyAuditDB.entity_type == entity_type)
if date_from:
try:
from_dt = datetime.fromisoformat(date_from)
query = query.filter(SourcePolicyAuditDB.created_at >= from_dt)
except ValueError:
pass
if date_to:
try:
to_dt = datetime.fromisoformat(date_to)
query = query.filter(SourcePolicyAuditDB.created_at <= to_dt)
except ValueError:
pass
total = query.count()
entries = query.order_by(SourcePolicyAuditDB.created_at.desc()).offset(offset).limit(limit).all()
return {
"entries": [
{
"id": str(e.id),
"action": e.action,
"entity_type": e.entity_type,
"entity_id": str(e.entity_id) if e.entity_id else None,
"old_values": e.old_values,
"new_values": e.new_values,
"user_id": e.user_id,
"created_at": e.created_at.isoformat() if e.created_at else None,
}
for e in entries
],
"total": total,
"limit": limit,
"offset": offset,
}
# =============================================================================
# Dashboard Statistics
# =============================================================================
@router.get("/policy-stats")
async def get_policy_stats(db: Session = Depends(get_db)):
"""Get dashboard statistics for source policy."""
total_sources = db.query(AllowedSourceDB).count()
active_sources = db.query(AllowedSourceDB).filter(AllowedSourceDB.active).count()
pii_rules = db.query(PIIRuleDB).filter(PIIRuleDB.active).count()
# Count blocked content entries from today
today_start = datetime.now(timezone.utc).replace(hour=0, minute=0, second=0, microsecond=0)
blocked_today = db.query(BlockedContentDB).filter(
BlockedContentDB.created_at >= today_start,
).count()
blocked_total = db.query(BlockedContentDB).count()
return {
"active_policies": active_sources,
"allowed_sources": total_sources,
"pii_rules": pii_rules,
"blocked_today": blocked_today,
"blocked_total": blocked_total,
}
@router.get("/compliance-report")
async def get_compliance_report(db: Session = Depends(get_db)):
"""Generate a compliance report for source policies."""
sources = db.query(AllowedSourceDB).filter(AllowedSourceDB.active).all()
pii_rules = db.query(PIIRuleDB).filter(PIIRuleDB.active).all()
return {
"report_date": datetime.now(timezone.utc).isoformat(),
"summary": {
"active_sources": len(sources),
"active_pii_rules": len(pii_rules),
"source_types": list(set(s.source_type for s in sources)),
"licenses": list(set(s.license for s in sources if s.license)),
},
"sources": [
{
"domain": s.domain,
"name": s.name,
"license": s.license,
"legal_basis": s.legal_basis,
"trust_boost": s.trust_boost,
}
for s in sources
],
"pii_rules": [
{
"name": r.name,
"category": r.category,
"action": r.action,
}
for r in pii_rules
],
}