All checks were successful
CI / go-lint (push) Has been skipped
CI / python-lint (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / test-go-ai-compliance (push) Successful in 34s
CI / test-python-backend-compliance (push) Successful in 32s
CI / test-python-document-crawler (push) Successful in 23s
CI / test-python-dsms-gateway (push) Successful in 18s
Part 1 — RAG Corpus Versioning: - New DB table compliance_corpus_versions (migration 017) - Go CorpusVersionStore with CRUD operations - Assessment struct extended with corpus_version_id - API endpoints: GET /rag/corpus-status, /rag/corpus-versions/:collection - RAG routes (search, regulations) now registered in main.go - Ingestion script registers corpus versions after each run - Frontend staleness badge in SDK sidebar Part 3 — Source Policy Backend: - New FastAPI router with CRUD for allowed sources, PII rules, operations matrix, audit trail, stats, and compliance report - SQLAlchemy models for all source policy tables (migration 001) - Frontend API base corrected from edu-search:8088/8089 to backend-compliance:8002/api Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
106 lines
4.0 KiB
Python
106 lines
4.0 KiB
Python
"""
|
|
SQLAlchemy models for Source Policy Management.
|
|
|
|
Tables:
|
|
- compliance_allowed_sources: Whitelisted data sources for RAG corpus
|
|
- compliance_source_operations: Operations matrix for source data flows
|
|
- compliance_pii_rules: PII detection/masking rules for sources
|
|
- compliance_source_policy_audit: Audit trail for source policy changes
|
|
"""
|
|
|
|
import uuid
|
|
from datetime import datetime
|
|
|
|
from sqlalchemy import (
|
|
Column, String, Text, Boolean, DateTime, Float, JSON, Index
|
|
)
|
|
from sqlalchemy.dialects.postgresql import UUID
|
|
|
|
from classroom_engine.database import Base
|
|
|
|
|
|
class AllowedSourceDB(Base):
|
|
"""Whitelisted data source for compliance RAG corpus."""
|
|
|
|
__tablename__ = 'compliance_allowed_sources'
|
|
|
|
id = Column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4)
|
|
domain = Column(String(255), unique=True, nullable=False)
|
|
name = Column(String(255), nullable=False)
|
|
description = Column(Text, nullable=True)
|
|
license = Column(String(100), nullable=True) # DL-DE-BY-2.0, CC-BY, etc.
|
|
legal_basis = Column(String(200), nullable=True) # §5 UrhG, etc.
|
|
trust_boost = Column(Float, default=0.5)
|
|
source_type = Column(String(50), default='legal') # legal, guidance, template
|
|
active = Column(Boolean, default=True)
|
|
metadata_ = Column('metadata', JSON, nullable=True)
|
|
created_at = Column(DateTime, default=datetime.utcnow, nullable=False)
|
|
updated_at = Column(DateTime, onupdate=datetime.utcnow, nullable=True)
|
|
|
|
__table_args__ = (
|
|
Index('idx_allowed_sources_domain', 'domain'),
|
|
Index('idx_allowed_sources_active', 'active'),
|
|
)
|
|
|
|
def __repr__(self):
|
|
return f"<AllowedSource {self.domain}: {self.name}>"
|
|
|
|
|
|
class SourceOperationDB(Base):
|
|
"""Operations matrix entry for source data flows."""
|
|
|
|
__tablename__ = 'compliance_source_operations'
|
|
|
|
id = Column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4)
|
|
source_id = Column(UUID(as_uuid=True), nullable=False)
|
|
operation = Column(String(50), nullable=False) # ingest, search, export, share
|
|
allowed = Column(Boolean, default=True)
|
|
conditions = Column(Text, nullable=True) # Conditions for this operation
|
|
created_at = Column(DateTime, default=datetime.utcnow, nullable=False)
|
|
updated_at = Column(DateTime, onupdate=datetime.utcnow, nullable=True)
|
|
|
|
__table_args__ = (
|
|
Index('idx_source_operations_source', 'source_id'),
|
|
)
|
|
|
|
|
|
class PIIRuleDB(Base):
|
|
"""PII detection and masking rule for compliance sources."""
|
|
|
|
__tablename__ = 'compliance_pii_rules'
|
|
|
|
id = Column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4)
|
|
name = Column(String(255), nullable=False)
|
|
description = Column(Text, nullable=True)
|
|
pattern = Column(Text, nullable=True) # Regex pattern
|
|
category = Column(String(50), nullable=False) # email, phone, name, address, etc.
|
|
action = Column(String(20), default='mask') # mask, redact, flag
|
|
active = Column(Boolean, default=True)
|
|
created_at = Column(DateTime, default=datetime.utcnow, nullable=False)
|
|
updated_at = Column(DateTime, onupdate=datetime.utcnow, nullable=True)
|
|
|
|
__table_args__ = (
|
|
Index('idx_pii_rules_category', 'category'),
|
|
Index('idx_pii_rules_active', 'active'),
|
|
)
|
|
|
|
|
|
class SourcePolicyAuditDB(Base):
|
|
"""Audit trail for source policy changes."""
|
|
|
|
__tablename__ = 'compliance_source_policy_audit'
|
|
|
|
id = Column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4)
|
|
action = Column(String(20), nullable=False) # create, update, delete
|
|
entity_type = Column(String(50), nullable=False) # source, operation, pii_rule
|
|
entity_id = Column(UUID(as_uuid=True), nullable=True)
|
|
old_values = Column(JSON, nullable=True)
|
|
new_values = Column(JSON, nullable=True)
|
|
user_id = Column(String(100), nullable=True)
|
|
created_at = Column(DateTime, default=datetime.utcnow, nullable=False)
|
|
|
|
__table_args__ = (
|
|
Index('idx_source_audit_entity', 'entity_type', 'entity_id'),
|
|
Index('idx_source_audit_created', 'created_at'),
|
|
)
|