Compare commits

...

1 Commits

Author SHA1 Message Date
Benjamin Admin 576063515b feat(ai-sdk): searchControls — deep dense pull recalls control sources on implementation questions
CI / detect-changes (pull_request) Successful in 8s
CI / branch-name (pull_request) Successful in 2s
CI / guardrail-integrity (pull_request) Successful in 6s
CI / secret-scan (pull_request) Successful in 8s
CI / dep-audit (pull_request) Failing after 55s
CI / sbom-scan (pull_request) Failing after 1m1s
CI / build-sha-integrity (pull_request) Successful in 11s
CI / validate-canonical-controls (pull_request) Successful in 5s
CI / loc-budget (pull_request) Successful in 16s
CI / go-lint (pull_request) Successful in 50s
CI / python-lint (pull_request) Failing after 15s
CI / nodejs-lint (pull_request) Failing after 1m8s
CI / nodejs-build (pull_request) Successful in 3m1s
CI / test-go (pull_request) Successful in 59s
CI / iace-gt-coverage (pull_request) Successful in 15s
CI / test-python-backend (pull_request) Successful in 27s
CI / test-python-document-crawler (pull_request) Successful in 13s
CI / test-python-dsms-gateway (pull_request) Successful in 10s
Measured (raw dense, top-500, "Welche Controls passen zu Security Updates?"):
NIST at dense rank 9 (115 chunks), CRA Annex at rank 8 — both shallow, just below
the client's small top-K, so the rank layer (#38) never saw them. OWASP: absent from
the corpus (separate ingest).

Add searchControls: on an explicit implementation question (queryWantsControls) pull a
deep dense pool (depth 60, no filter), classify each hit's role in code, and keep only
the four control-pool roles (operational/procedural requirement, control standard,
implementation guidance) — no source_role tagging of the corpus. Merge-dedup into the
pool; the existing rerank + applyControlRoles then order them (op_req > procedural >
standard > guidance). So CRA Annex I (operational_requirement) lands Top-1 and NIST
(control_standard) enters Top-3/5, while ENISA stays visible. Norm questions (no control
intent) are untouched.

Tested: isControlPoolRole, controlRoleOf payload classification (NIST/CRA-Annex/DORA).

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
2026-06-24 14:03:45 +02:00
4 changed files with 95 additions and 0 deletions
@@ -92,3 +92,32 @@ func applyControlRoles(out []LegalSearchResult) {
}
}
}
// isControlPoolRole reports whether a role belongs to the control-pool surfaced on
// implementation questions (the four "how to implement" roles).
func isControlPoolRole(role string) bool {
switch role {
case roleOperationalReq, roleProceduralReq, roleControlStandard, roleImplGuidance:
return true
}
return false
}
// controlRoleOf classifies a raw Qdrant payload into a source_role, so searchControls can
// filter its deep dense pull to the control-pool BEFORE hits are mapped to LegalSearchResult.
func controlRoleOf(payload map[string]interface{}) string {
article := getString(payload, "article")
if article == "" {
article = getString(payload, "section")
}
return classifyRole(LegalSearchResult{
RegulationShort: getString(payload, "regulation_short"),
RegulationName: getString(payload, "regulation_name_de"),
ArticleLabel: getString(payload, "article_label"),
Article: article,
Category: getString(payload, "category"),
SourceClass: getString(payload, "source_class"),
AuthorityWeight: getInt(payload, "authority_weight"),
IsRecital: getBool(payload, "is_recital"),
})
}
@@ -48,3 +48,32 @@ func TestApplyControlRoles_PoolPreference(t *testing.T) {
}
}
}
func TestIsControlPoolRole(t *testing.T) {
for _, r := range []string{roleOperationalReq, roleProceduralReq, roleControlStandard, roleImplGuidance} {
if !isControlPoolRole(r) {
t.Errorf("%q should be in the control-pool", r)
}
}
for _, r := range []string{roleObligation, roleInterpretation, roleDefinition} {
if isControlPoolRole(r) {
t.Errorf("%q should NOT be in the control-pool", r)
}
}
}
func TestControlRoleOf_Payload(t *testing.T) {
// searchControls filters its deep dense pull by classifying the raw Qdrant payload.
nist := map[string]interface{}{"regulation_short": "NIST SP 800-82r3", "article": "AU-8"}
if got := controlRoleOf(nist); got != roleControlStandard {
t.Errorf("untagged NIST payload role = %q, want control_standard", got)
}
craAnnex := map[string]interface{}{"regulation_short": "CRA", "article": "Anhang-I", "category": "regulation"}
if got := controlRoleOf(craAnnex); got != roleOperationalReq {
t.Errorf("CRA Anhang payload role = %q, want operational_requirement", got)
}
dora := map[string]interface{}{"regulation_short": "DORA", "article_label": "Art. 5 DORA", "category": "regulation"}
if got := controlRoleOf(dora); isControlPoolRole(got) {
t.Errorf("DORA abstract article role = %q must be excluded from the control-pool", got)
}
}
@@ -107,6 +107,15 @@ func (c *LegalRAGClient) searchInternal(ctx context.Context, collection string,
hits = mergeDedupHits(hits, bindingHits)
}
// Control-Augmentation: bei expliziter Umsetzungsfrage einen tiefen dense-Pool ziehen und
// nur die Control-Pool-Rollen behalten — so werden NIST/CRA-Anhang (dense rank ~8-9, unter
// dem kleinen top-K) Kandidaten. Re-Rank/applyControlRoles ordnen sie danach.
if queryWantsControls(query) {
if controlHits, cErr := c.searchControls(ctx, collection, embedding); cErr == nil {
hits = mergeDedupHits(hits, controlHits)
}
}
// Graph-Augmentation: verbundene Normen (references_out/in) der Top-Hits ueber die
// praezise Zitations-Kante in den Pool ziehen — z.B. Art. 13 CRA zieht Anhang I (die
// eigentliche Pflichtquelle). Pool-Augmentation only; Re-Rank + topK bleiben.
@@ -204,6 +204,34 @@ func (c *LegalRAGClient) searchBinding(ctx context.Context, collection string, e
return c.doPointsSearch(ctx, collection, searchReq)
}
// controlPoolDepth is how deep the dense control pull reaches. Measured: for an EU-cyber
// control query the relevant control sources sit at dense rank ~8-9 (NIST, CRA Annex), far
// below the client's small top-K — so a fixed dense depth of 60 reliably surfaces them.
const controlPoolDepth = 60
// searchControls fetches a DEEP dense pool and keeps only the control-pool roles, so control
// sources that the small top-K (hybrid) search misses become candidates on an implementation
// question. Role is derived in code (no source_role tag needed). AUGMENTS the pool — the
// caller gates it on control-intent.
func (c *LegalRAGClient) searchControls(ctx context.Context, collection string, embedding []float64) ([]qdrantSearchHit, error) {
searchReq := qdrantSearchRequest{
Vector: embedding,
Limit: controlPoolDepth,
WithPayload: true,
}
hits, err := c.doPointsSearch(ctx, collection, searchReq)
if err != nil {
return nil, err
}
kept := make([]qdrantSearchHit, 0, len(hits))
for _, h := range hits {
if isControlPoolRole(controlRoleOf(h.Payload)) {
kept = append(kept, h)
}
}
return kept, nil
}
// doPointsSearch issues a POST /points/search and decodes the hits.
func (c *LegalRAGClient) doPointsSearch(ctx context.Context, collection string, searchReq qdrantSearchRequest) ([]qdrantSearchHit, error) {
jsonBody, err := json.Marshal(searchReq)