feat(ucca): Multi-Regulation-Retrieval für Cross-Regulation-Fragen
CI / detect-changes (pull_request) Successful in 10s
CI / branch-name (pull_request) Successful in 1s
CI / guardrail-integrity (pull_request) Successful in 8s
CI / secret-scan (pull_request) Successful in 9s
CI / dep-audit (pull_request) Failing after 56s
CI / sbom-scan (pull_request) Failing after 58s
CI / build-sha-integrity (pull_request) Successful in 9s
CI / validate-canonical-controls (pull_request) Successful in 7s
CI / loc-budget (pull_request) Successful in 24s
CI / go-lint (pull_request) Successful in 54s
CI / python-lint (pull_request) Failing after 16s
CI / nodejs-lint (pull_request) Failing after 1m9s
CI / nodejs-build (pull_request) Successful in 3m6s
CI / test-go (pull_request) Successful in 1m3s
CI / iace-gt-coverage (pull_request) Successful in 19s
CI / test-python-backend (pull_request) Successful in 26s
CI / test-python-document-crawler (pull_request) Successful in 15s
CI / test-python-dsms-gateway (pull_request) Successful in 12s
CI / detect-changes (pull_request) Successful in 10s
CI / branch-name (pull_request) Successful in 1s
CI / guardrail-integrity (pull_request) Successful in 8s
CI / secret-scan (pull_request) Successful in 9s
CI / dep-audit (pull_request) Failing after 56s
CI / sbom-scan (pull_request) Failing after 58s
CI / build-sha-integrity (pull_request) Successful in 9s
CI / validate-canonical-controls (pull_request) Successful in 7s
CI / loc-budget (pull_request) Successful in 24s
CI / go-lint (pull_request) Successful in 54s
CI / python-lint (pull_request) Failing after 16s
CI / nodejs-lint (pull_request) Failing after 1m9s
CI / nodejs-build (pull_request) Successful in 3m6s
CI / test-go (pull_request) Successful in 1m3s
CI / iace-gt-coverage (pull_request) Successful in 19s
CI / test-python-backend (pull_request) Successful in 26s
CI / test-python-document-crawler (pull_request) Successful in 15s
CI / test-python-dsms-gateway (pull_request) Successful in 12s
Nennt eine Query EXPLIZIT >=2 Regelwerke ("Wie greifen CRA und Maschinen-
verordnung ineinander?"), retrievt searchInternal pro Regelwerk separat
(regulation_code/regulation_id-Filter) und merged — damit BEIDE Domänen im
Prompt landen statt nur der keyword-dominanten. Generisch (Query->Regelwerke,
KEINE doc-spezifische Logik), gegated auf >=2 erkannte Regelwerke; sonst
unveränderter Single-Domain-Pfad.
Behebt GQ-0070: vorher CRA x8 / null MaschVO -> Modell halluzinierte
MaschVO=2019/2144 + falsche "CRA ausgenommen"-Konklusion. Nachher CRA + MaschVO
im Prompt -> korrekt "beide gleichzeitig anwendbar" + Art. 20(9)
Konformitätsvermutung, gegroundet.
Validierung (Build-Collection, echtes SearchCollection):
- Unit: detectRegulations-Scoping (>=2 -> multi, 1/0 -> single)
- 5 Cross-Reg-Fälle (0070 + DSGVO+TDDDG/CRA+NIS2/DORA+NIS2/AI Act+DSGVO):
beide Regelwerke in Top-8
- CB-100 Freeze-Regression: NUR GQ-0070 + GQ-0095 geändert (beide echte
Cross-Reg, beide verbessert), 98/100 byte-identisch
- 10 Hard Cases: 9 Single-Domain unverändert, 0070 behält CRA Rang 1
Filter erweitert auf regulation_id UND regulation_code (rückwärtskompatibel,
aktiviert die re-ingestierte Build-Collection).
Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
@@ -0,0 +1,92 @@
|
||||
package ucca
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"os"
|
||||
"strings"
|
||||
"testing"
|
||||
)
|
||||
|
||||
// TestDetectRegulations is a pure unit test of the multi-regulation TRIGGER (no Qdrant):
|
||||
// only an explicit naming of >=2 regulations enables multi-regulation retrieval. A single
|
||||
// named regulation, or a topical question that doesn't name one, stays single-domain.
|
||||
func TestDetectRegulations(t *testing.T) {
|
||||
cases := []struct {
|
||||
q string
|
||||
want int
|
||||
}{
|
||||
{"Welche neun Kriterien nennt WP248 fuer ein voraussichtlich hohes Risiko?", 0},
|
||||
{"Welche Anforderungen gelten fuer wesentliche Veraenderungen einer Maschine?", 0}, // "Maschine" != MaschVO
|
||||
{"Benoetigt eine SPS ohne Netzwerkanschluss eine CRA-Bewertung?", 1}, // 1 -> single
|
||||
{"Wie greifen CRA und Maschinenverordnung bei einer vernetzten Maschine ineinander?", 2},
|
||||
{"Wie greifen DSGVO und TDDDG bei der Nutzung von Cookies ineinander?", 2},
|
||||
{"Wie verhalten sich DORA und NIS2 fuer ein Finanzunternehmen?", 2},
|
||||
{"Wie greifen AI Act und DSGVO bei einem KI-System ineinander?", 2},
|
||||
}
|
||||
for _, c := range cases {
|
||||
if got := len(detectRegulations(c.q)); got != c.want {
|
||||
t.Errorf("detectRegulations(%q) = %d, want %d", c.q, got, c.want)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// TestMultiRegE2E (RUN_E2E=1) verifies against the build collection that an explicit
|
||||
// cross-regulation query returns BOTH named domains in the top-K — the core acceptance
|
||||
// gate for multi-regulation retrieval.
|
||||
func TestMultiRegE2E(t *testing.T) {
|
||||
if os.Getenv("RUN_E2E") != "1" {
|
||||
t.Skip("set RUN_E2E=1 + QDRANT_URL/OLLAMA_URL")
|
||||
}
|
||||
c := NewLegalRAGClient()
|
||||
coll := os.Getenv("E2E_COLLECTION")
|
||||
if coll == "" {
|
||||
coll = "bp_compliance_kb_2026_1_build"
|
||||
}
|
||||
cases := []struct {
|
||||
id string
|
||||
q string
|
||||
want []string
|
||||
}{
|
||||
{"GQ-0070 CRA+MaschVO", "Wie greifen CRA und Maschinenverordnung bei einer vernetzten Maschine ineinander?", []string{"CRA", "MASCH"}},
|
||||
{"DSGVO+TDDDG", "Wie greifen DSGVO und TDDDG bei der Nutzung von Cookies und Tracking-Technologien ineinander?", []string{"DSGVO", "TDDDG"}},
|
||||
{"CRA+NIS2", "Wie verhalten sich CRA und NIS2 bei einem vernetzten Produkt eines wichtigen Unternehmens zueinander?", []string{"CRA", "NIS2"}},
|
||||
{"DORA+NIS2", "Wie greifen DORA und NIS2 bei einem Finanzunternehmen ineinander?", []string{"DORA", "NIS2"}},
|
||||
{"AI Act+DSGVO", "Wie greifen AI Act und DSGVO bei einem KI-System ineinander, das personenbezogene Daten verarbeitet?", []string{"AI ACT", "DSGVO"}},
|
||||
}
|
||||
for _, tc := range cases {
|
||||
res, err := c.SearchCollection(context.Background(), coll, tc.q, nil, 8)
|
||||
if err != nil {
|
||||
t.Fatalf("%s: %v", tc.id, err)
|
||||
}
|
||||
present := map[string]bool{}
|
||||
for _, r := range res {
|
||||
present[strings.ToUpper(r.RegulationCode)] = true
|
||||
}
|
||||
ok := true
|
||||
for _, w := range tc.want {
|
||||
found := false
|
||||
for cd := range present {
|
||||
if strings.Contains(cd, w) {
|
||||
found = true
|
||||
break
|
||||
}
|
||||
}
|
||||
if !found {
|
||||
ok = false
|
||||
}
|
||||
}
|
||||
codes := make([]string, 0, len(present))
|
||||
for cd := range present {
|
||||
codes = append(codes, cd)
|
||||
}
|
||||
status := "OK"
|
||||
if !ok {
|
||||
status = "FAIL"
|
||||
}
|
||||
fmt.Printf("%-22s want=%v present=%v %s\n", tc.id, tc.want, codes, status)
|
||||
if !ok {
|
||||
t.Errorf("%s: not all named regulations in top-8 (want %v, got %v)", tc.id, tc.want, codes)
|
||||
}
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user