All checks were successful
CI / go-lint (push) Has been skipped
CI / python-lint (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / test-go-ai-compliance (push) Successful in 34s
CI / test-python-backend-compliance (push) Successful in 32s
CI / test-python-document-crawler (push) Successful in 23s
CI / test-python-dsms-gateway (push) Successful in 18s
Part 1 — RAG Corpus Versioning: - New DB table compliance_corpus_versions (migration 017) - Go CorpusVersionStore with CRUD operations - Assessment struct extended with corpus_version_id - API endpoints: GET /rag/corpus-status, /rag/corpus-versions/:collection - RAG routes (search, regulations) now registered in main.go - Ingestion script registers corpus versions after each run - Frontend staleness badge in SDK sidebar Part 3 — Source Policy Backend: - New FastAPI router with CRUD for allowed sources, PII rules, operations matrix, audit trail, stats, and compliance report - SQLAlchemy models for all source policy tables (migration 001) - Frontend API base corrected from edu-search:8088/8089 to backend-compliance:8002/api Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
159 lines
5.0 KiB
Go
159 lines
5.0 KiB
Go
package ucca
|
|
|
|
import (
|
|
"context"
|
|
"time"
|
|
|
|
"github.com/google/uuid"
|
|
"github.com/jackc/pgx/v5"
|
|
"github.com/jackc/pgx/v5/pgxpool"
|
|
)
|
|
|
|
// CorpusVersion tracks a specific version of the RAG compliance corpus.
|
|
type CorpusVersion struct {
|
|
ID uuid.UUID `json:"id"`
|
|
Version string `json:"version"` // "2026-03-02.1"
|
|
CollectionName string `json:"collection_name"` // "bp_compliance_ce"
|
|
DocumentsCount int `json:"documents_count"`
|
|
ChunksCount int `json:"chunks_count"`
|
|
Regulations []string `json:"regulations"` // ["eu_2016_679", ...]
|
|
Digest string `json:"digest,omitempty"` // SHA256 over chunks
|
|
IngestionSource string `json:"ingestion_source,omitempty"`
|
|
Notes string `json:"notes,omitempty"`
|
|
CreatedAt time.Time `json:"created_at"`
|
|
CreatedBy string `json:"created_by,omitempty"`
|
|
}
|
|
|
|
// CorpusVersionStore handles corpus version persistence.
|
|
type CorpusVersionStore struct {
|
|
pool *pgxpool.Pool
|
|
}
|
|
|
|
// NewCorpusVersionStore creates a new corpus version store.
|
|
func NewCorpusVersionStore(pool *pgxpool.Pool) *CorpusVersionStore {
|
|
return &CorpusVersionStore{pool: pool}
|
|
}
|
|
|
|
// CreateCorpusVersion inserts a new corpus version record.
|
|
func (s *CorpusVersionStore) CreateCorpusVersion(ctx context.Context, v *CorpusVersion) error {
|
|
if v.ID == uuid.Nil {
|
|
v.ID = uuid.New()
|
|
}
|
|
if v.CreatedAt.IsZero() {
|
|
v.CreatedAt = time.Now().UTC()
|
|
}
|
|
|
|
_, err := s.pool.Exec(ctx, `
|
|
INSERT INTO compliance_corpus_versions (
|
|
id, version, collection_name, documents_count, chunks_count,
|
|
regulations, digest, ingestion_source, notes, created_at, created_by
|
|
) VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11)
|
|
`,
|
|
v.ID, v.Version, v.CollectionName, v.DocumentsCount, v.ChunksCount,
|
|
v.Regulations, v.Digest, v.IngestionSource, v.Notes, v.CreatedAt, v.CreatedBy,
|
|
)
|
|
return err
|
|
}
|
|
|
|
// GetLatestCorpusVersion returns the most recent version for a collection.
|
|
func (s *CorpusVersionStore) GetLatestCorpusVersion(ctx context.Context, collection string) (*CorpusVersion, error) {
|
|
var v CorpusVersion
|
|
err := s.pool.QueryRow(ctx, `
|
|
SELECT id, version, collection_name, documents_count, chunks_count,
|
|
regulations, digest, ingestion_source, notes, created_at, created_by
|
|
FROM compliance_corpus_versions
|
|
WHERE collection_name = $1
|
|
ORDER BY created_at DESC
|
|
LIMIT 1
|
|
`, collection).Scan(
|
|
&v.ID, &v.Version, &v.CollectionName, &v.DocumentsCount, &v.ChunksCount,
|
|
&v.Regulations, &v.Digest, &v.IngestionSource, &v.Notes, &v.CreatedAt, &v.CreatedBy,
|
|
)
|
|
if err == pgx.ErrNoRows {
|
|
return nil, nil
|
|
}
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
return &v, nil
|
|
}
|
|
|
|
// GetCorpusVersionByID retrieves a specific corpus version by ID.
|
|
func (s *CorpusVersionStore) GetCorpusVersionByID(ctx context.Context, id uuid.UUID) (*CorpusVersion, error) {
|
|
var v CorpusVersion
|
|
err := s.pool.QueryRow(ctx, `
|
|
SELECT id, version, collection_name, documents_count, chunks_count,
|
|
regulations, digest, ingestion_source, notes, created_at, created_by
|
|
FROM compliance_corpus_versions
|
|
WHERE id = $1
|
|
`, id).Scan(
|
|
&v.ID, &v.Version, &v.CollectionName, &v.DocumentsCount, &v.ChunksCount,
|
|
&v.Regulations, &v.Digest, &v.IngestionSource, &v.Notes, &v.CreatedAt, &v.CreatedBy,
|
|
)
|
|
if err == pgx.ErrNoRows {
|
|
return nil, nil
|
|
}
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
return &v, nil
|
|
}
|
|
|
|
// ListCorpusVersions returns all versions for a collection, newest first.
|
|
func (s *CorpusVersionStore) ListCorpusVersions(ctx context.Context, collection string) ([]CorpusVersion, error) {
|
|
rows, err := s.pool.Query(ctx, `
|
|
SELECT id, version, collection_name, documents_count, chunks_count,
|
|
regulations, digest, ingestion_source, notes, created_at, created_by
|
|
FROM compliance_corpus_versions
|
|
WHERE collection_name = $1
|
|
ORDER BY created_at DESC
|
|
`, collection)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
defer rows.Close()
|
|
|
|
var versions []CorpusVersion
|
|
for rows.Next() {
|
|
var v CorpusVersion
|
|
err := rows.Scan(
|
|
&v.ID, &v.Version, &v.CollectionName, &v.DocumentsCount, &v.ChunksCount,
|
|
&v.Regulations, &v.Digest, &v.IngestionSource, &v.Notes, &v.CreatedAt, &v.CreatedBy,
|
|
)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
versions = append(versions, v)
|
|
}
|
|
return versions, nil
|
|
}
|
|
|
|
// GetAllLatestVersions returns the latest version for every collection.
|
|
func (s *CorpusVersionStore) GetAllLatestVersions(ctx context.Context) ([]CorpusVersion, error) {
|
|
rows, err := s.pool.Query(ctx, `
|
|
SELECT DISTINCT ON (collection_name)
|
|
id, version, collection_name, documents_count, chunks_count,
|
|
regulations, digest, ingestion_source, notes, created_at, created_by
|
|
FROM compliance_corpus_versions
|
|
ORDER BY collection_name, created_at DESC
|
|
`)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
defer rows.Close()
|
|
|
|
var versions []CorpusVersion
|
|
for rows.Next() {
|
|
var v CorpusVersion
|
|
err := rows.Scan(
|
|
&v.ID, &v.Version, &v.CollectionName, &v.DocumentsCount, &v.ChunksCount,
|
|
&v.Regulations, &v.Digest, &v.IngestionSource, &v.Notes, &v.CreatedAt, &v.CreatedBy,
|
|
)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
versions = append(versions, v)
|
|
}
|
|
return versions, nil
|
|
}
|