Files
breakpilot-compliance/ai-compliance-sdk/internal/ucca/corpus_version.go
Benjamin Admin a228b3b528
All checks were successful
CI / go-lint (push) Has been skipped
CI / python-lint (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / test-go-ai-compliance (push) Successful in 34s
CI / test-python-backend-compliance (push) Successful in 32s
CI / test-python-document-crawler (push) Successful in 23s
CI / test-python-dsms-gateway (push) Successful in 18s
feat: add RAG corpus versioning and source policy backend
Part 1 — RAG Corpus Versioning:
- New DB table compliance_corpus_versions (migration 017)
- Go CorpusVersionStore with CRUD operations
- Assessment struct extended with corpus_version_id
- API endpoints: GET /rag/corpus-status, /rag/corpus-versions/:collection
- RAG routes (search, regulations) now registered in main.go
- Ingestion script registers corpus versions after each run
- Frontend staleness badge in SDK sidebar

Part 3 — Source Policy Backend:
- New FastAPI router with CRUD for allowed sources, PII rules,
  operations matrix, audit trail, stats, and compliance report
- SQLAlchemy models for all source policy tables (migration 001)
- Frontend API base corrected from edu-search:8088/8089 to
  backend-compliance:8002/api

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-02 07:58:08 +01:00

159 lines
5.0 KiB
Go

package ucca
import (
"context"
"time"
"github.com/google/uuid"
"github.com/jackc/pgx/v5"
"github.com/jackc/pgx/v5/pgxpool"
)
// CorpusVersion tracks a specific version of the RAG compliance corpus.
type CorpusVersion struct {
ID uuid.UUID `json:"id"`
Version string `json:"version"` // "2026-03-02.1"
CollectionName string `json:"collection_name"` // "bp_compliance_ce"
DocumentsCount int `json:"documents_count"`
ChunksCount int `json:"chunks_count"`
Regulations []string `json:"regulations"` // ["eu_2016_679", ...]
Digest string `json:"digest,omitempty"` // SHA256 over chunks
IngestionSource string `json:"ingestion_source,omitempty"`
Notes string `json:"notes,omitempty"`
CreatedAt time.Time `json:"created_at"`
CreatedBy string `json:"created_by,omitempty"`
}
// CorpusVersionStore handles corpus version persistence.
type CorpusVersionStore struct {
pool *pgxpool.Pool
}
// NewCorpusVersionStore creates a new corpus version store.
func NewCorpusVersionStore(pool *pgxpool.Pool) *CorpusVersionStore {
return &CorpusVersionStore{pool: pool}
}
// CreateCorpusVersion inserts a new corpus version record.
func (s *CorpusVersionStore) CreateCorpusVersion(ctx context.Context, v *CorpusVersion) error {
if v.ID == uuid.Nil {
v.ID = uuid.New()
}
if v.CreatedAt.IsZero() {
v.CreatedAt = time.Now().UTC()
}
_, err := s.pool.Exec(ctx, `
INSERT INTO compliance_corpus_versions (
id, version, collection_name, documents_count, chunks_count,
regulations, digest, ingestion_source, notes, created_at, created_by
) VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11)
`,
v.ID, v.Version, v.CollectionName, v.DocumentsCount, v.ChunksCount,
v.Regulations, v.Digest, v.IngestionSource, v.Notes, v.CreatedAt, v.CreatedBy,
)
return err
}
// GetLatestCorpusVersion returns the most recent version for a collection.
func (s *CorpusVersionStore) GetLatestCorpusVersion(ctx context.Context, collection string) (*CorpusVersion, error) {
var v CorpusVersion
err := s.pool.QueryRow(ctx, `
SELECT id, version, collection_name, documents_count, chunks_count,
regulations, digest, ingestion_source, notes, created_at, created_by
FROM compliance_corpus_versions
WHERE collection_name = $1
ORDER BY created_at DESC
LIMIT 1
`, collection).Scan(
&v.ID, &v.Version, &v.CollectionName, &v.DocumentsCount, &v.ChunksCount,
&v.Regulations, &v.Digest, &v.IngestionSource, &v.Notes, &v.CreatedAt, &v.CreatedBy,
)
if err == pgx.ErrNoRows {
return nil, nil
}
if err != nil {
return nil, err
}
return &v, nil
}
// GetCorpusVersionByID retrieves a specific corpus version by ID.
func (s *CorpusVersionStore) GetCorpusVersionByID(ctx context.Context, id uuid.UUID) (*CorpusVersion, error) {
var v CorpusVersion
err := s.pool.QueryRow(ctx, `
SELECT id, version, collection_name, documents_count, chunks_count,
regulations, digest, ingestion_source, notes, created_at, created_by
FROM compliance_corpus_versions
WHERE id = $1
`, id).Scan(
&v.ID, &v.Version, &v.CollectionName, &v.DocumentsCount, &v.ChunksCount,
&v.Regulations, &v.Digest, &v.IngestionSource, &v.Notes, &v.CreatedAt, &v.CreatedBy,
)
if err == pgx.ErrNoRows {
return nil, nil
}
if err != nil {
return nil, err
}
return &v, nil
}
// ListCorpusVersions returns all versions for a collection, newest first.
func (s *CorpusVersionStore) ListCorpusVersions(ctx context.Context, collection string) ([]CorpusVersion, error) {
rows, err := s.pool.Query(ctx, `
SELECT id, version, collection_name, documents_count, chunks_count,
regulations, digest, ingestion_source, notes, created_at, created_by
FROM compliance_corpus_versions
WHERE collection_name = $1
ORDER BY created_at DESC
`, collection)
if err != nil {
return nil, err
}
defer rows.Close()
var versions []CorpusVersion
for rows.Next() {
var v CorpusVersion
err := rows.Scan(
&v.ID, &v.Version, &v.CollectionName, &v.DocumentsCount, &v.ChunksCount,
&v.Regulations, &v.Digest, &v.IngestionSource, &v.Notes, &v.CreatedAt, &v.CreatedBy,
)
if err != nil {
return nil, err
}
versions = append(versions, v)
}
return versions, nil
}
// GetAllLatestVersions returns the latest version for every collection.
func (s *CorpusVersionStore) GetAllLatestVersions(ctx context.Context) ([]CorpusVersion, error) {
rows, err := s.pool.Query(ctx, `
SELECT DISTINCT ON (collection_name)
id, version, collection_name, documents_count, chunks_count,
regulations, digest, ingestion_source, notes, created_at, created_by
FROM compliance_corpus_versions
ORDER BY collection_name, created_at DESC
`)
if err != nil {
return nil, err
}
defer rows.Close()
var versions []CorpusVersion
for rows.Next() {
var v CorpusVersion
err := rows.Scan(
&v.ID, &v.Version, &v.CollectionName, &v.DocumentsCount, &v.ChunksCount,
&v.Regulations, &v.Digest, &v.IngestionSource, &v.Notes, &v.CreatedAt, &v.CreatedBy,
)
if err != nil {
return nil, err
}
versions = append(versions, v)
}
return versions, nil
}