feat: add RAG corpus versioning and source policy backend
All checks were successful
CI / go-lint (push) Has been skipped
CI / python-lint (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / test-go-ai-compliance (push) Successful in 34s
CI / test-python-backend-compliance (push) Successful in 32s
CI / test-python-document-crawler (push) Successful in 23s
CI / test-python-dsms-gateway (push) Successful in 18s
All checks were successful
CI / go-lint (push) Has been skipped
CI / python-lint (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / test-go-ai-compliance (push) Successful in 34s
CI / test-python-backend-compliance (push) Successful in 32s
CI / test-python-document-crawler (push) Successful in 23s
CI / test-python-dsms-gateway (push) Successful in 18s
Part 1 — RAG Corpus Versioning: - New DB table compliance_corpus_versions (migration 017) - Go CorpusVersionStore with CRUD operations - Assessment struct extended with corpus_version_id - API endpoints: GET /rag/corpus-status, /rag/corpus-versions/:collection - RAG routes (search, regulations) now registered in main.go - Ingestion script registers corpus versions after each run - Frontend staleness badge in SDK sidebar Part 3 — Source Policy Backend: - New FastAPI router with CRUD for allowed sources, PII rules, operations matrix, audit trail, stats, and compliance report - SQLAlchemy models for all source policy tables (migration 001) - Frontend API base corrected from edu-search:8088/8089 to backend-compliance:8002/api Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -62,6 +62,7 @@ func main() {
|
||||
dsgvoStore := dsgvo.NewStore(pool)
|
||||
uccaStore := ucca.NewStore(pool)
|
||||
escalationStore := ucca.NewEscalationStore(pool)
|
||||
corpusVersionStore := ucca.NewCorpusVersionStore(pool)
|
||||
roadmapStore := roadmap.NewStore(pool)
|
||||
workshopStore := workshop.NewStore(pool)
|
||||
portfolioStore := portfolio.NewStore(pool)
|
||||
@@ -120,6 +121,7 @@ func main() {
|
||||
vendorHandlers := handlers.NewVendorHandlers(vendorStore)
|
||||
iaceHandler := handlers.NewIACEHandler(iaceStore)
|
||||
trainingHandlers := handlers.NewTrainingHandlers(trainingStore, contentGenerator)
|
||||
ragHandlers := handlers.NewRAGHandlers(corpusVersionStore)
|
||||
|
||||
// Initialize middleware
|
||||
rbacMiddleware := rbac.NewMiddleware(rbacService, policyEngine)
|
||||
@@ -345,6 +347,15 @@ func main() {
|
||||
uccaRoutes.POST("/dsb-pool", escalationHandlers.AddDSBPoolMember)
|
||||
}
|
||||
|
||||
// RAG routes - Legal Corpus Search & Versioning
|
||||
ragRoutes := v1.Group("/rag")
|
||||
{
|
||||
ragRoutes.POST("/search", ragHandlers.Search)
|
||||
ragRoutes.GET("/regulations", ragHandlers.ListRegulations)
|
||||
ragRoutes.GET("/corpus-status", ragHandlers.CorpusStatus)
|
||||
ragRoutes.GET("/corpus-versions/:collection", ragHandlers.CorpusVersionHistory)
|
||||
}
|
||||
|
||||
// Roadmap routes - Compliance Implementation Roadmaps
|
||||
roadmapRoutes := v1.Group("/roadmaps")
|
||||
{
|
||||
|
||||
@@ -9,13 +9,15 @@ import (
|
||||
|
||||
// RAGHandlers handles RAG search API endpoints.
|
||||
type RAGHandlers struct {
|
||||
ragClient *ucca.LegalRAGClient
|
||||
ragClient *ucca.LegalRAGClient
|
||||
corpusVersionStore *ucca.CorpusVersionStore
|
||||
}
|
||||
|
||||
// NewRAGHandlers creates new RAG handlers.
|
||||
func NewRAGHandlers() *RAGHandlers {
|
||||
func NewRAGHandlers(corpusVersionStore *ucca.CorpusVersionStore) *RAGHandlers {
|
||||
return &RAGHandlers{
|
||||
ragClient: ucca.NewLegalRAGClient(),
|
||||
ragClient: ucca.NewLegalRAGClient(),
|
||||
corpusVersionStore: corpusVersionStore,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -74,3 +76,62 @@ func (h *RAGHandlers) ListRegulations(c *gin.Context) {
|
||||
"count": len(regs),
|
||||
})
|
||||
}
|
||||
|
||||
// CorpusStatus returns the current version status of all RAG collections.
|
||||
// GET /sdk/v1/rag/corpus-status
|
||||
func (h *RAGHandlers) CorpusStatus(c *gin.Context) {
|
||||
if h.corpusVersionStore == nil {
|
||||
c.JSON(http.StatusServiceUnavailable, gin.H{"error": "corpus version store not configured"})
|
||||
return
|
||||
}
|
||||
|
||||
versions, err := h.corpusVersionStore.GetAllLatestVersions(c.Request.Context())
|
||||
if err != nil {
|
||||
c.JSON(http.StatusInternalServerError, gin.H{"error": "failed to fetch corpus versions: " + err.Error()})
|
||||
return
|
||||
}
|
||||
|
||||
collections := make(map[string]gin.H)
|
||||
for _, v := range versions {
|
||||
collections[v.CollectionName] = gin.H{
|
||||
"id": v.ID,
|
||||
"current_version": v.Version,
|
||||
"documents_count": v.DocumentsCount,
|
||||
"chunks_count": v.ChunksCount,
|
||||
"regulations": v.Regulations,
|
||||
"last_updated": v.CreatedAt,
|
||||
"digest": v.Digest,
|
||||
}
|
||||
}
|
||||
|
||||
c.JSON(http.StatusOK, gin.H{
|
||||
"collections": collections,
|
||||
})
|
||||
}
|
||||
|
||||
// CorpusVersionHistory returns the version history for a specific collection.
|
||||
// GET /sdk/v1/rag/corpus-versions/:collection
|
||||
func (h *RAGHandlers) CorpusVersionHistory(c *gin.Context) {
|
||||
if h.corpusVersionStore == nil {
|
||||
c.JSON(http.StatusServiceUnavailable, gin.H{"error": "corpus version store not configured"})
|
||||
return
|
||||
}
|
||||
|
||||
collection := c.Param("collection")
|
||||
if collection == "" {
|
||||
c.JSON(http.StatusBadRequest, gin.H{"error": "collection name required"})
|
||||
return
|
||||
}
|
||||
|
||||
versions, err := h.corpusVersionStore.ListCorpusVersions(c.Request.Context(), collection)
|
||||
if err != nil {
|
||||
c.JSON(http.StatusInternalServerError, gin.H{"error": "failed to fetch corpus versions: " + err.Error()})
|
||||
return
|
||||
}
|
||||
|
||||
c.JSON(http.StatusOK, gin.H{
|
||||
"collection": collection,
|
||||
"versions": versions,
|
||||
"count": len(versions),
|
||||
})
|
||||
}
|
||||
|
||||
158
ai-compliance-sdk/internal/ucca/corpus_version.go
Normal file
158
ai-compliance-sdk/internal/ucca/corpus_version.go
Normal file
@@ -0,0 +1,158 @@
|
||||
package ucca
|
||||
|
||||
import (
|
||||
"context"
|
||||
"time"
|
||||
|
||||
"github.com/google/uuid"
|
||||
"github.com/jackc/pgx/v5"
|
||||
"github.com/jackc/pgx/v5/pgxpool"
|
||||
)
|
||||
|
||||
// CorpusVersion tracks a specific version of the RAG compliance corpus.
|
||||
type CorpusVersion struct {
|
||||
ID uuid.UUID `json:"id"`
|
||||
Version string `json:"version"` // "2026-03-02.1"
|
||||
CollectionName string `json:"collection_name"` // "bp_compliance_ce"
|
||||
DocumentsCount int `json:"documents_count"`
|
||||
ChunksCount int `json:"chunks_count"`
|
||||
Regulations []string `json:"regulations"` // ["eu_2016_679", ...]
|
||||
Digest string `json:"digest,omitempty"` // SHA256 over chunks
|
||||
IngestionSource string `json:"ingestion_source,omitempty"`
|
||||
Notes string `json:"notes,omitempty"`
|
||||
CreatedAt time.Time `json:"created_at"`
|
||||
CreatedBy string `json:"created_by,omitempty"`
|
||||
}
|
||||
|
||||
// CorpusVersionStore handles corpus version persistence.
|
||||
type CorpusVersionStore struct {
|
||||
pool *pgxpool.Pool
|
||||
}
|
||||
|
||||
// NewCorpusVersionStore creates a new corpus version store.
|
||||
func NewCorpusVersionStore(pool *pgxpool.Pool) *CorpusVersionStore {
|
||||
return &CorpusVersionStore{pool: pool}
|
||||
}
|
||||
|
||||
// CreateCorpusVersion inserts a new corpus version record.
|
||||
func (s *CorpusVersionStore) CreateCorpusVersion(ctx context.Context, v *CorpusVersion) error {
|
||||
if v.ID == uuid.Nil {
|
||||
v.ID = uuid.New()
|
||||
}
|
||||
if v.CreatedAt.IsZero() {
|
||||
v.CreatedAt = time.Now().UTC()
|
||||
}
|
||||
|
||||
_, err := s.pool.Exec(ctx, `
|
||||
INSERT INTO compliance_corpus_versions (
|
||||
id, version, collection_name, documents_count, chunks_count,
|
||||
regulations, digest, ingestion_source, notes, created_at, created_by
|
||||
) VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11)
|
||||
`,
|
||||
v.ID, v.Version, v.CollectionName, v.DocumentsCount, v.ChunksCount,
|
||||
v.Regulations, v.Digest, v.IngestionSource, v.Notes, v.CreatedAt, v.CreatedBy,
|
||||
)
|
||||
return err
|
||||
}
|
||||
|
||||
// GetLatestCorpusVersion returns the most recent version for a collection.
|
||||
func (s *CorpusVersionStore) GetLatestCorpusVersion(ctx context.Context, collection string) (*CorpusVersion, error) {
|
||||
var v CorpusVersion
|
||||
err := s.pool.QueryRow(ctx, `
|
||||
SELECT id, version, collection_name, documents_count, chunks_count,
|
||||
regulations, digest, ingestion_source, notes, created_at, created_by
|
||||
FROM compliance_corpus_versions
|
||||
WHERE collection_name = $1
|
||||
ORDER BY created_at DESC
|
||||
LIMIT 1
|
||||
`, collection).Scan(
|
||||
&v.ID, &v.Version, &v.CollectionName, &v.DocumentsCount, &v.ChunksCount,
|
||||
&v.Regulations, &v.Digest, &v.IngestionSource, &v.Notes, &v.CreatedAt, &v.CreatedBy,
|
||||
)
|
||||
if err == pgx.ErrNoRows {
|
||||
return nil, nil
|
||||
}
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return &v, nil
|
||||
}
|
||||
|
||||
// GetCorpusVersionByID retrieves a specific corpus version by ID.
|
||||
func (s *CorpusVersionStore) GetCorpusVersionByID(ctx context.Context, id uuid.UUID) (*CorpusVersion, error) {
|
||||
var v CorpusVersion
|
||||
err := s.pool.QueryRow(ctx, `
|
||||
SELECT id, version, collection_name, documents_count, chunks_count,
|
||||
regulations, digest, ingestion_source, notes, created_at, created_by
|
||||
FROM compliance_corpus_versions
|
||||
WHERE id = $1
|
||||
`, id).Scan(
|
||||
&v.ID, &v.Version, &v.CollectionName, &v.DocumentsCount, &v.ChunksCount,
|
||||
&v.Regulations, &v.Digest, &v.IngestionSource, &v.Notes, &v.CreatedAt, &v.CreatedBy,
|
||||
)
|
||||
if err == pgx.ErrNoRows {
|
||||
return nil, nil
|
||||
}
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return &v, nil
|
||||
}
|
||||
|
||||
// ListCorpusVersions returns all versions for a collection, newest first.
|
||||
func (s *CorpusVersionStore) ListCorpusVersions(ctx context.Context, collection string) ([]CorpusVersion, error) {
|
||||
rows, err := s.pool.Query(ctx, `
|
||||
SELECT id, version, collection_name, documents_count, chunks_count,
|
||||
regulations, digest, ingestion_source, notes, created_at, created_by
|
||||
FROM compliance_corpus_versions
|
||||
WHERE collection_name = $1
|
||||
ORDER BY created_at DESC
|
||||
`, collection)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer rows.Close()
|
||||
|
||||
var versions []CorpusVersion
|
||||
for rows.Next() {
|
||||
var v CorpusVersion
|
||||
err := rows.Scan(
|
||||
&v.ID, &v.Version, &v.CollectionName, &v.DocumentsCount, &v.ChunksCount,
|
||||
&v.Regulations, &v.Digest, &v.IngestionSource, &v.Notes, &v.CreatedAt, &v.CreatedBy,
|
||||
)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
versions = append(versions, v)
|
||||
}
|
||||
return versions, nil
|
||||
}
|
||||
|
||||
// GetAllLatestVersions returns the latest version for every collection.
|
||||
func (s *CorpusVersionStore) GetAllLatestVersions(ctx context.Context) ([]CorpusVersion, error) {
|
||||
rows, err := s.pool.Query(ctx, `
|
||||
SELECT DISTINCT ON (collection_name)
|
||||
id, version, collection_name, documents_count, chunks_count,
|
||||
regulations, digest, ingestion_source, notes, created_at, created_by
|
||||
FROM compliance_corpus_versions
|
||||
ORDER BY collection_name, created_at DESC
|
||||
`)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer rows.Close()
|
||||
|
||||
var versions []CorpusVersion
|
||||
for rows.Next() {
|
||||
var v CorpusVersion
|
||||
err := rows.Scan(
|
||||
&v.ID, &v.Version, &v.CollectionName, &v.DocumentsCount, &v.ChunksCount,
|
||||
&v.Regulations, &v.Digest, &v.IngestionSource, &v.Notes, &v.CreatedAt, &v.CreatedBy,
|
||||
)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
versions = append(versions, v)
|
||||
}
|
||||
return versions, nil
|
||||
}
|
||||
@@ -471,6 +471,10 @@ type Assessment struct {
|
||||
Art22Risk bool `json:"art22_risk"`
|
||||
TrainingAllowed TrainingAllowed `json:"training_allowed"`
|
||||
|
||||
// Corpus Versioning (RAG)
|
||||
CorpusVersionID *uuid.UUID `json:"corpus_version_id,omitempty"`
|
||||
CorpusVersion string `json:"corpus_version,omitempty"`
|
||||
|
||||
// LLM Explanation (optional)
|
||||
ExplanationText *string `json:"explanation_text,omitempty"`
|
||||
ExplanationGeneratedAt *time.Time `json:"explanation_generated_at,omitempty"`
|
||||
|
||||
@@ -52,6 +52,7 @@ func (s *Store) CreateAssessment(ctx context.Context, a *Assessment) error {
|
||||
triggered_rules, required_controls, recommended_architecture,
|
||||
forbidden_patterns, example_matches,
|
||||
dsfa_recommended, art22_risk, training_allowed,
|
||||
corpus_version_id, corpus_version,
|
||||
explanation_text, explanation_generated_at, explanation_model,
|
||||
domain, created_at, updated_at, created_by
|
||||
) VALUES (
|
||||
@@ -61,8 +62,9 @@ func (s *Store) CreateAssessment(ctx context.Context, a *Assessment) error {
|
||||
$14, $15, $16,
|
||||
$17, $18,
|
||||
$19, $20, $21,
|
||||
$22, $23, $24,
|
||||
$25, $26, $27, $28
|
||||
$22, $23,
|
||||
$24, $25, $26,
|
||||
$27, $28, $29, $30
|
||||
)
|
||||
`,
|
||||
a.ID, a.TenantID, a.NamespaceID, a.Title, a.PolicyVersion, a.Status,
|
||||
@@ -71,6 +73,7 @@ func (s *Store) CreateAssessment(ctx context.Context, a *Assessment) error {
|
||||
triggeredRules, requiredControls, recommendedArchitecture,
|
||||
forbiddenPatterns, exampleMatches,
|
||||
a.DSFARecommended, a.Art22Risk, string(a.TrainingAllowed),
|
||||
a.CorpusVersionID, a.CorpusVersion,
|
||||
a.ExplanationText, a.ExplanationGeneratedAt, a.ExplanationModel,
|
||||
string(a.Domain), a.CreatedAt, a.UpdatedAt, a.CreatedBy,
|
||||
)
|
||||
@@ -92,6 +95,7 @@ func (s *Store) GetAssessment(ctx context.Context, id uuid.UUID) (*Assessment, e
|
||||
triggered_rules, required_controls, recommended_architecture,
|
||||
forbidden_patterns, example_matches,
|
||||
dsfa_recommended, art22_risk, training_allowed,
|
||||
corpus_version_id, corpus_version,
|
||||
explanation_text, explanation_generated_at, explanation_model,
|
||||
domain, created_at, updated_at, created_by
|
||||
FROM ucca_assessments WHERE id = $1
|
||||
@@ -102,6 +106,7 @@ func (s *Store) GetAssessment(ctx context.Context, id uuid.UUID) (*Assessment, e
|
||||
&triggeredRules, &requiredControls, &recommendedArchitecture,
|
||||
&forbiddenPatterns, &exampleMatches,
|
||||
&a.DSFARecommended, &a.Art22Risk, &trainingAllowed,
|
||||
&a.CorpusVersionID, &a.CorpusVersion,
|
||||
&a.ExplanationText, &a.ExplanationGeneratedAt, &a.ExplanationModel,
|
||||
&domain, &a.CreatedAt, &a.UpdatedAt, &a.CreatedBy,
|
||||
)
|
||||
@@ -141,6 +146,7 @@ func (s *Store) ListAssessments(ctx context.Context, tenantID uuid.UUID, filters
|
||||
triggered_rules, required_controls, recommended_architecture,
|
||||
forbidden_patterns, example_matches,
|
||||
dsfa_recommended, art22_risk, training_allowed,
|
||||
corpus_version_id, corpus_version,
|
||||
explanation_text, explanation_generated_at, explanation_model,
|
||||
domain, created_at, updated_at, created_by
|
||||
FROM ucca_assessments WHERE tenant_id = $1`
|
||||
@@ -194,6 +200,7 @@ func (s *Store) ListAssessments(ctx context.Context, tenantID uuid.UUID, filters
|
||||
&triggeredRules, &requiredControls, &recommendedArchitecture,
|
||||
&forbiddenPatterns, &exampleMatches,
|
||||
&a.DSFARecommended, &a.Art22Risk, &trainingAllowed,
|
||||
&a.CorpusVersionID, &a.CorpusVersion,
|
||||
&a.ExplanationText, &a.ExplanationGeneratedAt, &a.ExplanationModel,
|
||||
&domain, &a.CreatedAt, &a.UpdatedAt, &a.CreatedBy,
|
||||
)
|
||||
|
||||
35
ai-compliance-sdk/migrations/017_corpus_versioning.sql
Normal file
35
ai-compliance-sdk/migrations/017_corpus_versioning.sql
Normal file
@@ -0,0 +1,35 @@
|
||||
-- =============================================================================
|
||||
-- Migration 017: RAG Corpus Versioning
|
||||
--
|
||||
-- Tracks versions of the RAG corpus so assessments can record which
|
||||
-- corpus version they were evaluated against. Enables staleness detection
|
||||
-- and re-evaluation recommendations.
|
||||
-- =============================================================================
|
||||
|
||||
CREATE TABLE IF NOT EXISTS compliance_corpus_versions (
|
||||
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
|
||||
version VARCHAR(50) NOT NULL, -- "2026-03-02.1"
|
||||
collection_name VARCHAR(100) NOT NULL, -- "bp_compliance_ce"
|
||||
documents_count INT NOT NULL DEFAULT 0,
|
||||
chunks_count INT NOT NULL DEFAULT 0,
|
||||
regulations TEXT[], -- {"eu_2016_679", "eu_2024_1689"}
|
||||
digest VARCHAR(128), -- SHA256 over all chunks
|
||||
ingestion_source VARCHAR(200), -- "ingest-legal-corpus.sh"
|
||||
notes TEXT,
|
||||
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
|
||||
created_by VARCHAR(100)
|
||||
);
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_corpus_versions_collection
|
||||
ON compliance_corpus_versions(collection_name);
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_corpus_versions_latest
|
||||
ON compliance_corpus_versions(collection_name, created_at DESC);
|
||||
|
||||
-- Add corpus_version_id to ucca_assessments
|
||||
ALTER TABLE ucca_assessments
|
||||
ADD COLUMN IF NOT EXISTS corpus_version_id UUID REFERENCES compliance_corpus_versions(id),
|
||||
ADD COLUMN IF NOT EXISTS corpus_version VARCHAR(50);
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_ucca_assessments_corpus_version
|
||||
ON ucca_assessments(corpus_version_id);
|
||||
Reference in New Issue
Block a user