""" SQLAlchemy models for Source Policy Management. Tables: - compliance_allowed_sources: Whitelisted data sources for RAG corpus - compliance_source_operations: Operations matrix for source data flows - compliance_pii_rules: PII detection/masking rules for sources - compliance_source_policy_audit: Audit trail for source policy changes """ import uuid from datetime import datetime from sqlalchemy import ( Column, String, Text, Boolean, DateTime, Float, JSON, Index ) from sqlalchemy.dialects.postgresql import UUID from classroom_engine.database import Base class AllowedSourceDB(Base): """Whitelisted data source for compliance RAG corpus.""" __tablename__ = 'compliance_allowed_sources' id = Column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4) domain = Column(String(255), unique=True, nullable=False) name = Column(String(255), nullable=False) description = Column(Text, nullable=True) license = Column(String(100), nullable=True) # DL-DE-BY-2.0, CC-BY, etc. legal_basis = Column(String(200), nullable=True) # ยง5 UrhG, etc. trust_boost = Column(Float, default=0.5) source_type = Column(String(50), default='legal') # legal, guidance, template active = Column(Boolean, default=True) metadata_ = Column('metadata', JSON, nullable=True) created_at = Column(DateTime, default=datetime.utcnow, nullable=False) updated_at = Column(DateTime, onupdate=datetime.utcnow, nullable=True) __table_args__ = ( Index('idx_allowed_sources_domain', 'domain'), Index('idx_allowed_sources_active', 'active'), ) def __repr__(self): return f"" class SourceOperationDB(Base): """Operations matrix entry for source data flows.""" __tablename__ = 'compliance_source_operations' id = Column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4) source_id = Column(UUID(as_uuid=True), nullable=False) operation = Column(String(50), nullable=False) # ingest, search, export, share allowed = Column(Boolean, default=True) conditions = Column(Text, nullable=True) # Conditions for this operation created_at = Column(DateTime, default=datetime.utcnow, nullable=False) updated_at = Column(DateTime, onupdate=datetime.utcnow, nullable=True) __table_args__ = ( Index('idx_source_operations_source', 'source_id'), ) class PIIRuleDB(Base): """PII detection and masking rule for compliance sources.""" __tablename__ = 'compliance_pii_rules' id = Column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4) name = Column(String(255), nullable=False) description = Column(Text, nullable=True) pattern = Column(Text, nullable=True) # Regex pattern category = Column(String(50), nullable=False) # email, phone, name, address, etc. action = Column(String(20), default='mask') # mask, redact, flag active = Column(Boolean, default=True) created_at = Column(DateTime, default=datetime.utcnow, nullable=False) updated_at = Column(DateTime, onupdate=datetime.utcnow, nullable=True) __table_args__ = ( Index('idx_pii_rules_category', 'category'), Index('idx_pii_rules_active', 'active'), ) class BlockedContentDB(Base): """Blocked content entries tracked by source policy enforcement.""" __tablename__ = 'compliance_blocked_content' id = Column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4) url = Column(Text, nullable=True) domain = Column(String(255), nullable=False) block_reason = Column(String(100), nullable=False) # unlicensed, pii, blacklisted, etc. rule_id = Column(UUID(as_uuid=True), nullable=True) # PII rule or source that triggered block details = Column(JSON, nullable=True) created_at = Column(DateTime, default=datetime.utcnow, nullable=False) __table_args__ = ( Index('idx_blocked_content_domain', 'domain'), Index('idx_blocked_content_created', 'created_at'), ) def __repr__(self): return f"" class SourcePolicyAuditDB(Base): """Audit trail for source policy changes.""" __tablename__ = 'compliance_source_policy_audit' id = Column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4) action = Column(String(20), nullable=False) # create, update, delete entity_type = Column(String(50), nullable=False) # source, operation, pii_rule entity_id = Column(UUID(as_uuid=True), nullable=True) old_values = Column(JSON, nullable=True) new_values = Column(JSON, nullable=True) user_id = Column(String(100), nullable=True) created_at = Column(DateTime, default=datetime.utcnow, nullable=False) __table_args__ = ( Index('idx_source_audit_entity', 'entity_type', 'entity_id'), Index('idx_source_audit_created', 'created_at'), )