Files
breakpilot-compliance/backend-compliance/compliance/db/source_policy_models.py
Benjamin Admin a228b3b528
All checks were successful
CI / go-lint (push) Has been skipped
CI / python-lint (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / test-go-ai-compliance (push) Successful in 34s
CI / test-python-backend-compliance (push) Successful in 32s
CI / test-python-document-crawler (push) Successful in 23s
CI / test-python-dsms-gateway (push) Successful in 18s
feat: add RAG corpus versioning and source policy backend
Part 1 — RAG Corpus Versioning:
- New DB table compliance_corpus_versions (migration 017)
- Go CorpusVersionStore with CRUD operations
- Assessment struct extended with corpus_version_id
- API endpoints: GET /rag/corpus-status, /rag/corpus-versions/:collection
- RAG routes (search, regulations) now registered in main.go
- Ingestion script registers corpus versions after each run
- Frontend staleness badge in SDK sidebar

Part 3 — Source Policy Backend:
- New FastAPI router with CRUD for allowed sources, PII rules,
  operations matrix, audit trail, stats, and compliance report
- SQLAlchemy models for all source policy tables (migration 001)
- Frontend API base corrected from edu-search:8088/8089 to
  backend-compliance:8002/api

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-02 07:58:08 +01:00

106 lines
4.0 KiB
Python

"""
SQLAlchemy models for Source Policy Management.
Tables:
- compliance_allowed_sources: Whitelisted data sources for RAG corpus
- compliance_source_operations: Operations matrix for source data flows
- compliance_pii_rules: PII detection/masking rules for sources
- compliance_source_policy_audit: Audit trail for source policy changes
"""
import uuid
from datetime import datetime
from sqlalchemy import (
Column, String, Text, Boolean, DateTime, Float, JSON, Index
)
from sqlalchemy.dialects.postgresql import UUID
from classroom_engine.database import Base
class AllowedSourceDB(Base):
"""Whitelisted data source for compliance RAG corpus."""
__tablename__ = 'compliance_allowed_sources'
id = Column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4)
domain = Column(String(255), unique=True, nullable=False)
name = Column(String(255), nullable=False)
description = Column(Text, nullable=True)
license = Column(String(100), nullable=True) # DL-DE-BY-2.0, CC-BY, etc.
legal_basis = Column(String(200), nullable=True) # §5 UrhG, etc.
trust_boost = Column(Float, default=0.5)
source_type = Column(String(50), default='legal') # legal, guidance, template
active = Column(Boolean, default=True)
metadata_ = Column('metadata', JSON, nullable=True)
created_at = Column(DateTime, default=datetime.utcnow, nullable=False)
updated_at = Column(DateTime, onupdate=datetime.utcnow, nullable=True)
__table_args__ = (
Index('idx_allowed_sources_domain', 'domain'),
Index('idx_allowed_sources_active', 'active'),
)
def __repr__(self):
return f"<AllowedSource {self.domain}: {self.name}>"
class SourceOperationDB(Base):
"""Operations matrix entry for source data flows."""
__tablename__ = 'compliance_source_operations'
id = Column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4)
source_id = Column(UUID(as_uuid=True), nullable=False)
operation = Column(String(50), nullable=False) # ingest, search, export, share
allowed = Column(Boolean, default=True)
conditions = Column(Text, nullable=True) # Conditions for this operation
created_at = Column(DateTime, default=datetime.utcnow, nullable=False)
updated_at = Column(DateTime, onupdate=datetime.utcnow, nullable=True)
__table_args__ = (
Index('idx_source_operations_source', 'source_id'),
)
class PIIRuleDB(Base):
"""PII detection and masking rule for compliance sources."""
__tablename__ = 'compliance_pii_rules'
id = Column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4)
name = Column(String(255), nullable=False)
description = Column(Text, nullable=True)
pattern = Column(Text, nullable=True) # Regex pattern
category = Column(String(50), nullable=False) # email, phone, name, address, etc.
action = Column(String(20), default='mask') # mask, redact, flag
active = Column(Boolean, default=True)
created_at = Column(DateTime, default=datetime.utcnow, nullable=False)
updated_at = Column(DateTime, onupdate=datetime.utcnow, nullable=True)
__table_args__ = (
Index('idx_pii_rules_category', 'category'),
Index('idx_pii_rules_active', 'active'),
)
class SourcePolicyAuditDB(Base):
"""Audit trail for source policy changes."""
__tablename__ = 'compliance_source_policy_audit'
id = Column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4)
action = Column(String(20), nullable=False) # create, update, delete
entity_type = Column(String(50), nullable=False) # source, operation, pii_rule
entity_id = Column(UUID(as_uuid=True), nullable=True)
old_values = Column(JSON, nullable=True)
new_values = Column(JSON, nullable=True)
user_id = Column(String(100), nullable=True)
created_at = Column(DateTime, default=datetime.utcnow, nullable=False)
__table_args__ = (
Index('idx_source_audit_entity', 'entity_type', 'entity_id'),
Index('idx_source_audit_created', 'created_at'),
)