From e50892a2aa3f625ec3f6729c5d530f504b569dbf Mon Sep 17 00:00:00 2001 From: Benjamin_Boenisch Date: Wed, 24 Jun 2026 12:08:29 +0000 Subject: [PATCH] =?UTF-8?q?feat(ai-sdk):=20searchControls=20=E2=80=94=20re?= =?UTF-8?q?call=20control=20sources=20on=20implementation=20questions=20(#?= =?UTF-8?q?39)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../internal/ucca/control_role.go | 29 +++++++++++++++++++ .../internal/ucca/control_role_test.go | 29 +++++++++++++++++++ .../internal/ucca/legal_rag_client.go | 9 ++++++ .../internal/ucca/legal_rag_http.go | 28 ++++++++++++++++++ 4 files changed, 95 insertions(+) diff --git a/ai-compliance-sdk/internal/ucca/control_role.go b/ai-compliance-sdk/internal/ucca/control_role.go index fd2d7ece..8cfa509e 100644 --- a/ai-compliance-sdk/internal/ucca/control_role.go +++ b/ai-compliance-sdk/internal/ucca/control_role.go @@ -92,3 +92,32 @@ func applyControlRoles(out []LegalSearchResult) { } } } + +// isControlPoolRole reports whether a role belongs to the control-pool surfaced on +// implementation questions (the four "how to implement" roles). +func isControlPoolRole(role string) bool { + switch role { + case roleOperationalReq, roleProceduralReq, roleControlStandard, roleImplGuidance: + return true + } + return false +} + +// controlRoleOf classifies a raw Qdrant payload into a source_role, so searchControls can +// filter its deep dense pull to the control-pool BEFORE hits are mapped to LegalSearchResult. +func controlRoleOf(payload map[string]interface{}) string { + article := getString(payload, "article") + if article == "" { + article = getString(payload, "section") + } + return classifyRole(LegalSearchResult{ + RegulationShort: getString(payload, "regulation_short"), + RegulationName: getString(payload, "regulation_name_de"), + ArticleLabel: getString(payload, "article_label"), + Article: article, + Category: getString(payload, "category"), + SourceClass: getString(payload, "source_class"), + AuthorityWeight: getInt(payload, "authority_weight"), + IsRecital: getBool(payload, "is_recital"), + }) +} diff --git a/ai-compliance-sdk/internal/ucca/control_role_test.go b/ai-compliance-sdk/internal/ucca/control_role_test.go index b4872769..597516d6 100644 --- a/ai-compliance-sdk/internal/ucca/control_role_test.go +++ b/ai-compliance-sdk/internal/ucca/control_role_test.go @@ -48,3 +48,32 @@ func TestApplyControlRoles_PoolPreference(t *testing.T) { } } } + +func TestIsControlPoolRole(t *testing.T) { + for _, r := range []string{roleOperationalReq, roleProceduralReq, roleControlStandard, roleImplGuidance} { + if !isControlPoolRole(r) { + t.Errorf("%q should be in the control-pool", r) + } + } + for _, r := range []string{roleObligation, roleInterpretation, roleDefinition} { + if isControlPoolRole(r) { + t.Errorf("%q should NOT be in the control-pool", r) + } + } +} + +func TestControlRoleOf_Payload(t *testing.T) { + // searchControls filters its deep dense pull by classifying the raw Qdrant payload. + nist := map[string]interface{}{"regulation_short": "NIST SP 800-82r3", "article": "AU-8"} + if got := controlRoleOf(nist); got != roleControlStandard { + t.Errorf("untagged NIST payload role = %q, want control_standard", got) + } + craAnnex := map[string]interface{}{"regulation_short": "CRA", "article": "Anhang-I", "category": "regulation"} + if got := controlRoleOf(craAnnex); got != roleOperationalReq { + t.Errorf("CRA Anhang payload role = %q, want operational_requirement", got) + } + dora := map[string]interface{}{"regulation_short": "DORA", "article_label": "Art. 5 DORA", "category": "regulation"} + if got := controlRoleOf(dora); isControlPoolRole(got) { + t.Errorf("DORA abstract article role = %q must be excluded from the control-pool", got) + } +} diff --git a/ai-compliance-sdk/internal/ucca/legal_rag_client.go b/ai-compliance-sdk/internal/ucca/legal_rag_client.go index 2942265c..0ac9f489 100644 --- a/ai-compliance-sdk/internal/ucca/legal_rag_client.go +++ b/ai-compliance-sdk/internal/ucca/legal_rag_client.go @@ -107,6 +107,15 @@ func (c *LegalRAGClient) searchInternal(ctx context.Context, collection string, hits = mergeDedupHits(hits, bindingHits) } + // Control-Augmentation: bei expliziter Umsetzungsfrage einen tiefen dense-Pool ziehen und + // nur die Control-Pool-Rollen behalten — so werden NIST/CRA-Anhang (dense rank ~8-9, unter + // dem kleinen top-K) Kandidaten. Re-Rank/applyControlRoles ordnen sie danach. + if queryWantsControls(query) { + if controlHits, cErr := c.searchControls(ctx, collection, embedding); cErr == nil { + hits = mergeDedupHits(hits, controlHits) + } + } + // Graph-Augmentation: verbundene Normen (references_out/in) der Top-Hits ueber die // praezise Zitations-Kante in den Pool ziehen — z.B. Art. 13 CRA zieht Anhang I (die // eigentliche Pflichtquelle). Pool-Augmentation only; Re-Rank + topK bleiben. diff --git a/ai-compliance-sdk/internal/ucca/legal_rag_http.go b/ai-compliance-sdk/internal/ucca/legal_rag_http.go index 5d68181e..c9805d0a 100644 --- a/ai-compliance-sdk/internal/ucca/legal_rag_http.go +++ b/ai-compliance-sdk/internal/ucca/legal_rag_http.go @@ -204,6 +204,34 @@ func (c *LegalRAGClient) searchBinding(ctx context.Context, collection string, e return c.doPointsSearch(ctx, collection, searchReq) } +// controlPoolDepth is how deep the dense control pull reaches. Measured: for an EU-cyber +// control query the relevant control sources sit at dense rank ~8-9 (NIST, CRA Annex), far +// below the client's small top-K — so a fixed dense depth of 60 reliably surfaces them. +const controlPoolDepth = 60 + +// searchControls fetches a DEEP dense pool and keeps only the control-pool roles, so control +// sources that the small top-K (hybrid) search misses become candidates on an implementation +// question. Role is derived in code (no source_role tag needed). AUGMENTS the pool — the +// caller gates it on control-intent. +func (c *LegalRAGClient) searchControls(ctx context.Context, collection string, embedding []float64) ([]qdrantSearchHit, error) { + searchReq := qdrantSearchRequest{ + Vector: embedding, + Limit: controlPoolDepth, + WithPayload: true, + } + hits, err := c.doPointsSearch(ctx, collection, searchReq) + if err != nil { + return nil, err + } + kept := make([]qdrantSearchHit, 0, len(hits)) + for _, h := range hits { + if isControlPoolRole(controlRoleOf(h.Payload)) { + kept = append(kept, h) + } + } + return kept, nil +} + // doPointsSearch issues a POST /points/search and decodes the hits. func (c *LegalRAGClient) doPointsSearch(ctx context.Context, collection string, searchReq qdrantSearchRequest) ([]qdrantSearchHit, error) { jsonBody, err := json.Marshal(searchReq)