feat: add RAG corpus versioning and source policy backend
All checks were successful
CI / go-lint (push) Has been skipped
CI / python-lint (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / test-go-ai-compliance (push) Successful in 34s
CI / test-python-backend-compliance (push) Successful in 32s
CI / test-python-document-crawler (push) Successful in 23s
CI / test-python-dsms-gateway (push) Successful in 18s

Part 1 — RAG Corpus Versioning:
- New DB table compliance_corpus_versions (migration 017)
- Go CorpusVersionStore with CRUD operations
- Assessment struct extended with corpus_version_id
- API endpoints: GET /rag/corpus-status, /rag/corpus-versions/:collection
- RAG routes (search, regulations) now registered in main.go
- Ingestion script registers corpus versions after each run
- Frontend staleness badge in SDK sidebar

Part 3 — Source Policy Backend:
- New FastAPI router with CRUD for allowed sources, PII rules,
  operations matrix, audit trail, stats, and compliance report
- SQLAlchemy models for all source policy tables (migration 001)
- Frontend API base corrected from edu-search:8088/8089 to
  backend-compliance:8002/api

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
Benjamin Admin
2026-03-02 07:58:08 +01:00
parent 187dbf1b77
commit a228b3b528
15 changed files with 2020 additions and 11 deletions

View File

@@ -62,6 +62,7 @@ func main() {
dsgvoStore := dsgvo.NewStore(pool)
uccaStore := ucca.NewStore(pool)
escalationStore := ucca.NewEscalationStore(pool)
corpusVersionStore := ucca.NewCorpusVersionStore(pool)
roadmapStore := roadmap.NewStore(pool)
workshopStore := workshop.NewStore(pool)
portfolioStore := portfolio.NewStore(pool)
@@ -120,6 +121,7 @@ func main() {
vendorHandlers := handlers.NewVendorHandlers(vendorStore)
iaceHandler := handlers.NewIACEHandler(iaceStore)
trainingHandlers := handlers.NewTrainingHandlers(trainingStore, contentGenerator)
ragHandlers := handlers.NewRAGHandlers(corpusVersionStore)
// Initialize middleware
rbacMiddleware := rbac.NewMiddleware(rbacService, policyEngine)
@@ -345,6 +347,15 @@ func main() {
uccaRoutes.POST("/dsb-pool", escalationHandlers.AddDSBPoolMember)
}
// RAG routes - Legal Corpus Search & Versioning
ragRoutes := v1.Group("/rag")
{
ragRoutes.POST("/search", ragHandlers.Search)
ragRoutes.GET("/regulations", ragHandlers.ListRegulations)
ragRoutes.GET("/corpus-status", ragHandlers.CorpusStatus)
ragRoutes.GET("/corpus-versions/:collection", ragHandlers.CorpusVersionHistory)
}
// Roadmap routes - Compliance Implementation Roadmaps
roadmapRoutes := v1.Group("/roadmaps")
{

View File

@@ -9,13 +9,15 @@ import (
// RAGHandlers handles RAG search API endpoints.
type RAGHandlers struct {
ragClient *ucca.LegalRAGClient
ragClient *ucca.LegalRAGClient
corpusVersionStore *ucca.CorpusVersionStore
}
// NewRAGHandlers creates new RAG handlers.
func NewRAGHandlers() *RAGHandlers {
func NewRAGHandlers(corpusVersionStore *ucca.CorpusVersionStore) *RAGHandlers {
return &RAGHandlers{
ragClient: ucca.NewLegalRAGClient(),
ragClient: ucca.NewLegalRAGClient(),
corpusVersionStore: corpusVersionStore,
}
}
@@ -74,3 +76,62 @@ func (h *RAGHandlers) ListRegulations(c *gin.Context) {
"count": len(regs),
})
}
// CorpusStatus returns the current version status of all RAG collections.
// GET /sdk/v1/rag/corpus-status
func (h *RAGHandlers) CorpusStatus(c *gin.Context) {
if h.corpusVersionStore == nil {
c.JSON(http.StatusServiceUnavailable, gin.H{"error": "corpus version store not configured"})
return
}
versions, err := h.corpusVersionStore.GetAllLatestVersions(c.Request.Context())
if err != nil {
c.JSON(http.StatusInternalServerError, gin.H{"error": "failed to fetch corpus versions: " + err.Error()})
return
}
collections := make(map[string]gin.H)
for _, v := range versions {
collections[v.CollectionName] = gin.H{
"id": v.ID,
"current_version": v.Version,
"documents_count": v.DocumentsCount,
"chunks_count": v.ChunksCount,
"regulations": v.Regulations,
"last_updated": v.CreatedAt,
"digest": v.Digest,
}
}
c.JSON(http.StatusOK, gin.H{
"collections": collections,
})
}
// CorpusVersionHistory returns the version history for a specific collection.
// GET /sdk/v1/rag/corpus-versions/:collection
func (h *RAGHandlers) CorpusVersionHistory(c *gin.Context) {
if h.corpusVersionStore == nil {
c.JSON(http.StatusServiceUnavailable, gin.H{"error": "corpus version store not configured"})
return
}
collection := c.Param("collection")
if collection == "" {
c.JSON(http.StatusBadRequest, gin.H{"error": "collection name required"})
return
}
versions, err := h.corpusVersionStore.ListCorpusVersions(c.Request.Context(), collection)
if err != nil {
c.JSON(http.StatusInternalServerError, gin.H{"error": "failed to fetch corpus versions: " + err.Error()})
return
}
c.JSON(http.StatusOK, gin.H{
"collection": collection,
"versions": versions,
"count": len(versions),
})
}

View File

@@ -0,0 +1,158 @@
package ucca
import (
"context"
"time"
"github.com/google/uuid"
"github.com/jackc/pgx/v5"
"github.com/jackc/pgx/v5/pgxpool"
)
// CorpusVersion tracks a specific version of the RAG compliance corpus.
type CorpusVersion struct {
ID uuid.UUID `json:"id"`
Version string `json:"version"` // "2026-03-02.1"
CollectionName string `json:"collection_name"` // "bp_compliance_ce"
DocumentsCount int `json:"documents_count"`
ChunksCount int `json:"chunks_count"`
Regulations []string `json:"regulations"` // ["eu_2016_679", ...]
Digest string `json:"digest,omitempty"` // SHA256 over chunks
IngestionSource string `json:"ingestion_source,omitempty"`
Notes string `json:"notes,omitempty"`
CreatedAt time.Time `json:"created_at"`
CreatedBy string `json:"created_by,omitempty"`
}
// CorpusVersionStore handles corpus version persistence.
type CorpusVersionStore struct {
pool *pgxpool.Pool
}
// NewCorpusVersionStore creates a new corpus version store.
func NewCorpusVersionStore(pool *pgxpool.Pool) *CorpusVersionStore {
return &CorpusVersionStore{pool: pool}
}
// CreateCorpusVersion inserts a new corpus version record.
func (s *CorpusVersionStore) CreateCorpusVersion(ctx context.Context, v *CorpusVersion) error {
if v.ID == uuid.Nil {
v.ID = uuid.New()
}
if v.CreatedAt.IsZero() {
v.CreatedAt = time.Now().UTC()
}
_, err := s.pool.Exec(ctx, `
INSERT INTO compliance_corpus_versions (
id, version, collection_name, documents_count, chunks_count,
regulations, digest, ingestion_source, notes, created_at, created_by
) VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11)
`,
v.ID, v.Version, v.CollectionName, v.DocumentsCount, v.ChunksCount,
v.Regulations, v.Digest, v.IngestionSource, v.Notes, v.CreatedAt, v.CreatedBy,
)
return err
}
// GetLatestCorpusVersion returns the most recent version for a collection.
func (s *CorpusVersionStore) GetLatestCorpusVersion(ctx context.Context, collection string) (*CorpusVersion, error) {
var v CorpusVersion
err := s.pool.QueryRow(ctx, `
SELECT id, version, collection_name, documents_count, chunks_count,
regulations, digest, ingestion_source, notes, created_at, created_by
FROM compliance_corpus_versions
WHERE collection_name = $1
ORDER BY created_at DESC
LIMIT 1
`, collection).Scan(
&v.ID, &v.Version, &v.CollectionName, &v.DocumentsCount, &v.ChunksCount,
&v.Regulations, &v.Digest, &v.IngestionSource, &v.Notes, &v.CreatedAt, &v.CreatedBy,
)
if err == pgx.ErrNoRows {
return nil, nil
}
if err != nil {
return nil, err
}
return &v, nil
}
// GetCorpusVersionByID retrieves a specific corpus version by ID.
func (s *CorpusVersionStore) GetCorpusVersionByID(ctx context.Context, id uuid.UUID) (*CorpusVersion, error) {
var v CorpusVersion
err := s.pool.QueryRow(ctx, `
SELECT id, version, collection_name, documents_count, chunks_count,
regulations, digest, ingestion_source, notes, created_at, created_by
FROM compliance_corpus_versions
WHERE id = $1
`, id).Scan(
&v.ID, &v.Version, &v.CollectionName, &v.DocumentsCount, &v.ChunksCount,
&v.Regulations, &v.Digest, &v.IngestionSource, &v.Notes, &v.CreatedAt, &v.CreatedBy,
)
if err == pgx.ErrNoRows {
return nil, nil
}
if err != nil {
return nil, err
}
return &v, nil
}
// ListCorpusVersions returns all versions for a collection, newest first.
func (s *CorpusVersionStore) ListCorpusVersions(ctx context.Context, collection string) ([]CorpusVersion, error) {
rows, err := s.pool.Query(ctx, `
SELECT id, version, collection_name, documents_count, chunks_count,
regulations, digest, ingestion_source, notes, created_at, created_by
FROM compliance_corpus_versions
WHERE collection_name = $1
ORDER BY created_at DESC
`, collection)
if err != nil {
return nil, err
}
defer rows.Close()
var versions []CorpusVersion
for rows.Next() {
var v CorpusVersion
err := rows.Scan(
&v.ID, &v.Version, &v.CollectionName, &v.DocumentsCount, &v.ChunksCount,
&v.Regulations, &v.Digest, &v.IngestionSource, &v.Notes, &v.CreatedAt, &v.CreatedBy,
)
if err != nil {
return nil, err
}
versions = append(versions, v)
}
return versions, nil
}
// GetAllLatestVersions returns the latest version for every collection.
func (s *CorpusVersionStore) GetAllLatestVersions(ctx context.Context) ([]CorpusVersion, error) {
rows, err := s.pool.Query(ctx, `
SELECT DISTINCT ON (collection_name)
id, version, collection_name, documents_count, chunks_count,
regulations, digest, ingestion_source, notes, created_at, created_by
FROM compliance_corpus_versions
ORDER BY collection_name, created_at DESC
`)
if err != nil {
return nil, err
}
defer rows.Close()
var versions []CorpusVersion
for rows.Next() {
var v CorpusVersion
err := rows.Scan(
&v.ID, &v.Version, &v.CollectionName, &v.DocumentsCount, &v.ChunksCount,
&v.Regulations, &v.Digest, &v.IngestionSource, &v.Notes, &v.CreatedAt, &v.CreatedBy,
)
if err != nil {
return nil, err
}
versions = append(versions, v)
}
return versions, nil
}

View File

@@ -471,6 +471,10 @@ type Assessment struct {
Art22Risk bool `json:"art22_risk"`
TrainingAllowed TrainingAllowed `json:"training_allowed"`
// Corpus Versioning (RAG)
CorpusVersionID *uuid.UUID `json:"corpus_version_id,omitempty"`
CorpusVersion string `json:"corpus_version,omitempty"`
// LLM Explanation (optional)
ExplanationText *string `json:"explanation_text,omitempty"`
ExplanationGeneratedAt *time.Time `json:"explanation_generated_at,omitempty"`

View File

@@ -52,6 +52,7 @@ func (s *Store) CreateAssessment(ctx context.Context, a *Assessment) error {
triggered_rules, required_controls, recommended_architecture,
forbidden_patterns, example_matches,
dsfa_recommended, art22_risk, training_allowed,
corpus_version_id, corpus_version,
explanation_text, explanation_generated_at, explanation_model,
domain, created_at, updated_at, created_by
) VALUES (
@@ -61,8 +62,9 @@ func (s *Store) CreateAssessment(ctx context.Context, a *Assessment) error {
$14, $15, $16,
$17, $18,
$19, $20, $21,
$22, $23, $24,
$25, $26, $27, $28
$22, $23,
$24, $25, $26,
$27, $28, $29, $30
)
`,
a.ID, a.TenantID, a.NamespaceID, a.Title, a.PolicyVersion, a.Status,
@@ -71,6 +73,7 @@ func (s *Store) CreateAssessment(ctx context.Context, a *Assessment) error {
triggeredRules, requiredControls, recommendedArchitecture,
forbiddenPatterns, exampleMatches,
a.DSFARecommended, a.Art22Risk, string(a.TrainingAllowed),
a.CorpusVersionID, a.CorpusVersion,
a.ExplanationText, a.ExplanationGeneratedAt, a.ExplanationModel,
string(a.Domain), a.CreatedAt, a.UpdatedAt, a.CreatedBy,
)
@@ -92,6 +95,7 @@ func (s *Store) GetAssessment(ctx context.Context, id uuid.UUID) (*Assessment, e
triggered_rules, required_controls, recommended_architecture,
forbidden_patterns, example_matches,
dsfa_recommended, art22_risk, training_allowed,
corpus_version_id, corpus_version,
explanation_text, explanation_generated_at, explanation_model,
domain, created_at, updated_at, created_by
FROM ucca_assessments WHERE id = $1
@@ -102,6 +106,7 @@ func (s *Store) GetAssessment(ctx context.Context, id uuid.UUID) (*Assessment, e
&triggeredRules, &requiredControls, &recommendedArchitecture,
&forbiddenPatterns, &exampleMatches,
&a.DSFARecommended, &a.Art22Risk, &trainingAllowed,
&a.CorpusVersionID, &a.CorpusVersion,
&a.ExplanationText, &a.ExplanationGeneratedAt, &a.ExplanationModel,
&domain, &a.CreatedAt, &a.UpdatedAt, &a.CreatedBy,
)
@@ -141,6 +146,7 @@ func (s *Store) ListAssessments(ctx context.Context, tenantID uuid.UUID, filters
triggered_rules, required_controls, recommended_architecture,
forbidden_patterns, example_matches,
dsfa_recommended, art22_risk, training_allowed,
corpus_version_id, corpus_version,
explanation_text, explanation_generated_at, explanation_model,
domain, created_at, updated_at, created_by
FROM ucca_assessments WHERE tenant_id = $1`
@@ -194,6 +200,7 @@ func (s *Store) ListAssessments(ctx context.Context, tenantID uuid.UUID, filters
&triggeredRules, &requiredControls, &recommendedArchitecture,
&forbiddenPatterns, &exampleMatches,
&a.DSFARecommended, &a.Art22Risk, &trainingAllowed,
&a.CorpusVersionID, &a.CorpusVersion,
&a.ExplanationText, &a.ExplanationGeneratedAt, &a.ExplanationModel,
&domain, &a.CreatedAt, &a.UpdatedAt, &a.CreatedBy,
)

View File

@@ -0,0 +1,35 @@
-- =============================================================================
-- Migration 017: RAG Corpus Versioning
--
-- Tracks versions of the RAG corpus so assessments can record which
-- corpus version they were evaluated against. Enables staleness detection
-- and re-evaluation recommendations.
-- =============================================================================
CREATE TABLE IF NOT EXISTS compliance_corpus_versions (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
version VARCHAR(50) NOT NULL, -- "2026-03-02.1"
collection_name VARCHAR(100) NOT NULL, -- "bp_compliance_ce"
documents_count INT NOT NULL DEFAULT 0,
chunks_count INT NOT NULL DEFAULT 0,
regulations TEXT[], -- {"eu_2016_679", "eu_2024_1689"}
digest VARCHAR(128), -- SHA256 over all chunks
ingestion_source VARCHAR(200), -- "ingest-legal-corpus.sh"
notes TEXT,
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
created_by VARCHAR(100)
);
CREATE INDEX IF NOT EXISTS idx_corpus_versions_collection
ON compliance_corpus_versions(collection_name);
CREATE INDEX IF NOT EXISTS idx_corpus_versions_latest
ON compliance_corpus_versions(collection_name, created_at DESC);
-- Add corpus_version_id to ucca_assessments
ALTER TABLE ucca_assessments
ADD COLUMN IF NOT EXISTS corpus_version_id UUID REFERENCES compliance_corpus_versions(id),
ADD COLUMN IF NOT EXISTS corpus_version VARCHAR(50);
CREATE INDEX IF NOT EXISTS idx_ucca_assessments_corpus_version
ON ucca_assessments(corpus_version_id);