feat: add RAG corpus versioning and source policy backend
All checks were successful
CI / go-lint (push) Has been skipped
CI / python-lint (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / test-go-ai-compliance (push) Successful in 34s
CI / test-python-backend-compliance (push) Successful in 32s
CI / test-python-document-crawler (push) Successful in 23s
CI / test-python-dsms-gateway (push) Successful in 18s
All checks were successful
CI / go-lint (push) Has been skipped
CI / python-lint (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / test-go-ai-compliance (push) Successful in 34s
CI / test-python-backend-compliance (push) Successful in 32s
CI / test-python-document-crawler (push) Successful in 23s
CI / test-python-dsms-gateway (push) Successful in 18s
Part 1 — RAG Corpus Versioning: - New DB table compliance_corpus_versions (migration 017) - Go CorpusVersionStore with CRUD operations - Assessment struct extended with corpus_version_id - API endpoints: GET /rag/corpus-status, /rag/corpus-versions/:collection - RAG routes (search, regulations) now registered in main.go - Ingestion script registers corpus versions after each run - Frontend staleness badge in SDK sidebar Part 3 — Source Policy Backend: - New FastAPI router with CRUD for allowed sources, PII rules, operations matrix, audit trail, stats, and compliance report - SQLAlchemy models for all source policy tables (migration 001) - Frontend API base corrected from edu-search:8088/8089 to backend-compliance:8002/api Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
105
backend-compliance/compliance/db/source_policy_models.py
Normal file
105
backend-compliance/compliance/db/source_policy_models.py
Normal file
@@ -0,0 +1,105 @@
|
||||
"""
|
||||
SQLAlchemy models for Source Policy Management.
|
||||
|
||||
Tables:
|
||||
- compliance_allowed_sources: Whitelisted data sources for RAG corpus
|
||||
- compliance_source_operations: Operations matrix for source data flows
|
||||
- compliance_pii_rules: PII detection/masking rules for sources
|
||||
- compliance_source_policy_audit: Audit trail for source policy changes
|
||||
"""
|
||||
|
||||
import uuid
|
||||
from datetime import datetime
|
||||
|
||||
from sqlalchemy import (
|
||||
Column, String, Text, Boolean, DateTime, Float, JSON, Index
|
||||
)
|
||||
from sqlalchemy.dialects.postgresql import UUID
|
||||
|
||||
from classroom_engine.database import Base
|
||||
|
||||
|
||||
class AllowedSourceDB(Base):
|
||||
"""Whitelisted data source for compliance RAG corpus."""
|
||||
|
||||
__tablename__ = 'compliance_allowed_sources'
|
||||
|
||||
id = Column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4)
|
||||
domain = Column(String(255), unique=True, nullable=False)
|
||||
name = Column(String(255), nullable=False)
|
||||
description = Column(Text, nullable=True)
|
||||
license = Column(String(100), nullable=True) # DL-DE-BY-2.0, CC-BY, etc.
|
||||
legal_basis = Column(String(200), nullable=True) # §5 UrhG, etc.
|
||||
trust_boost = Column(Float, default=0.5)
|
||||
source_type = Column(String(50), default='legal') # legal, guidance, template
|
||||
active = Column(Boolean, default=True)
|
||||
metadata_ = Column('metadata', JSON, nullable=True)
|
||||
created_at = Column(DateTime, default=datetime.utcnow, nullable=False)
|
||||
updated_at = Column(DateTime, onupdate=datetime.utcnow, nullable=True)
|
||||
|
||||
__table_args__ = (
|
||||
Index('idx_allowed_sources_domain', 'domain'),
|
||||
Index('idx_allowed_sources_active', 'active'),
|
||||
)
|
||||
|
||||
def __repr__(self):
|
||||
return f"<AllowedSource {self.domain}: {self.name}>"
|
||||
|
||||
|
||||
class SourceOperationDB(Base):
|
||||
"""Operations matrix entry for source data flows."""
|
||||
|
||||
__tablename__ = 'compliance_source_operations'
|
||||
|
||||
id = Column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4)
|
||||
source_id = Column(UUID(as_uuid=True), nullable=False)
|
||||
operation = Column(String(50), nullable=False) # ingest, search, export, share
|
||||
allowed = Column(Boolean, default=True)
|
||||
conditions = Column(Text, nullable=True) # Conditions for this operation
|
||||
created_at = Column(DateTime, default=datetime.utcnow, nullable=False)
|
||||
updated_at = Column(DateTime, onupdate=datetime.utcnow, nullable=True)
|
||||
|
||||
__table_args__ = (
|
||||
Index('idx_source_operations_source', 'source_id'),
|
||||
)
|
||||
|
||||
|
||||
class PIIRuleDB(Base):
|
||||
"""PII detection and masking rule for compliance sources."""
|
||||
|
||||
__tablename__ = 'compliance_pii_rules'
|
||||
|
||||
id = Column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4)
|
||||
name = Column(String(255), nullable=False)
|
||||
description = Column(Text, nullable=True)
|
||||
pattern = Column(Text, nullable=True) # Regex pattern
|
||||
category = Column(String(50), nullable=False) # email, phone, name, address, etc.
|
||||
action = Column(String(20), default='mask') # mask, redact, flag
|
||||
active = Column(Boolean, default=True)
|
||||
created_at = Column(DateTime, default=datetime.utcnow, nullable=False)
|
||||
updated_at = Column(DateTime, onupdate=datetime.utcnow, nullable=True)
|
||||
|
||||
__table_args__ = (
|
||||
Index('idx_pii_rules_category', 'category'),
|
||||
Index('idx_pii_rules_active', 'active'),
|
||||
)
|
||||
|
||||
|
||||
class SourcePolicyAuditDB(Base):
|
||||
"""Audit trail for source policy changes."""
|
||||
|
||||
__tablename__ = 'compliance_source_policy_audit'
|
||||
|
||||
id = Column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4)
|
||||
action = Column(String(20), nullable=False) # create, update, delete
|
||||
entity_type = Column(String(50), nullable=False) # source, operation, pii_rule
|
||||
entity_id = Column(UUID(as_uuid=True), nullable=True)
|
||||
old_values = Column(JSON, nullable=True)
|
||||
new_values = Column(JSON, nullable=True)
|
||||
user_id = Column(String(100), nullable=True)
|
||||
created_at = Column(DateTime, default=datetime.utcnow, nullable=False)
|
||||
|
||||
__table_args__ = (
|
||||
Index('idx_source_audit_entity', 'entity_type', 'entity_id'),
|
||||
Index('idx_source_audit_created', 'created_at'),
|
||||
)
|
||||
Reference in New Issue
Block a user