Compare commits
1 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| 31562a31e9 |
@@ -1,73 +0,0 @@
|
|||||||
package iace
|
|
||||||
|
|
||||||
// P3: pin accepted proposer decisions into the GT gate.
|
|
||||||
//
|
|
||||||
// When a human accepts a proposal from the offline proposer (a dedup
|
|
||||||
// supersession, a foreign-framing gate, a vocab→tag mapping, a coverage hazard),
|
|
||||||
// they record an AcceptedPin. A pin is a tiny, machine-scoped invariant — "this
|
|
||||||
// pattern MUST (or must NOT) fire for this machine" — that a test re-checks on
|
|
||||||
// every run. This is what makes the library's growth COMPOUND into the gate
|
|
||||||
// instead of silently eroding it: a future change that re-introduces a dropped
|
|
||||||
// duplicate, un-gates a foreign pattern, or removes a coverage hazard breaks the
|
|
||||||
// pin and fails CI.
|
|
||||||
//
|
|
||||||
// A single boolean covers all four proposal types:
|
|
||||||
// - dedup supersession accepted → DropPattern MustFire=false
|
|
||||||
// - foreign-framing gate accepted → foreign pattern MustFire=false
|
|
||||||
// - vocab→tag / coverage hazard accepted → the enabled pattern MustFire=true
|
|
||||||
|
|
||||||
// AcceptedPin is one regression invariant for an accepted proposal.
|
|
||||||
type AcceptedPin struct {
|
|
||||||
Pattern string `json:"pattern"`
|
|
||||||
MustFire bool `json:"must_fire"`
|
|
||||||
Reason string `json:"reason"`
|
|
||||||
FromProposal string `json:"from_proposal,omitempty"`
|
|
||||||
}
|
|
||||||
|
|
||||||
// PinSet is the accepted-pin registry for one machine (testdata/accepted_pins_*.json).
|
|
||||||
type PinSet struct {
|
|
||||||
Machine string `json:"machine"`
|
|
||||||
Pins []AcceptedPin `json:"pins"`
|
|
||||||
}
|
|
||||||
|
|
||||||
// PinResult is the verdict for one pin against an engine run.
|
|
||||||
type PinResult struct {
|
|
||||||
Pin AcceptedPin
|
|
||||||
OK bool
|
|
||||||
Detail string
|
|
||||||
}
|
|
||||||
|
|
||||||
// VerifyPins checks every pin against the set of pattern IDs the engine actually
|
|
||||||
// fired for the machine. A pin holds iff the pattern's presence equals MustFire.
|
|
||||||
func VerifyPins(pins []AcceptedPin, firedPatternIDs []string) []PinResult {
|
|
||||||
fired := make(map[string]bool, len(firedPatternIDs))
|
|
||||||
for _, id := range firedPatternIDs {
|
|
||||||
fired[id] = true
|
|
||||||
}
|
|
||||||
out := make([]PinResult, 0, len(pins))
|
|
||||||
for _, p := range pins {
|
|
||||||
got := fired[p.Pattern]
|
|
||||||
ok := got == p.MustFire
|
|
||||||
detail := "ok"
|
|
||||||
if !ok {
|
|
||||||
if p.MustFire {
|
|
||||||
detail = "expected to fire but did NOT — coverage/mapping regressed"
|
|
||||||
} else {
|
|
||||||
detail = "expected to be suppressed but FIRED — gate/supersession regressed"
|
|
||||||
}
|
|
||||||
}
|
|
||||||
out = append(out, PinResult{Pin: p, OK: ok, Detail: detail})
|
|
||||||
}
|
|
||||||
return out
|
|
||||||
}
|
|
||||||
|
|
||||||
// GenerateDedupPin turns an accepted (verdict=duplicate) dedup candidate into the
|
|
||||||
// pin that protects the supersession: the dropped pattern must no longer fire.
|
|
||||||
func GenerateDedupPin(c DedupCandidate) AcceptedPin {
|
|
||||||
return AcceptedPin{
|
|
||||||
Pattern: c.DropPattern,
|
|
||||||
MustFire: false,
|
|
||||||
Reason: "accepted duplicate of " + c.KeepPattern + " (" + c.Category + ")",
|
|
||||||
FromProposal: "dedup " + c.DropPattern + " -> " + c.KeepPattern,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
@@ -1,63 +0,0 @@
|
|||||||
package iace
|
|
||||||
|
|
||||||
import (
|
|
||||||
"encoding/json"
|
|
||||||
"os"
|
|
||||||
"path/filepath"
|
|
||||||
"testing"
|
|
||||||
)
|
|
||||||
|
|
||||||
func TestVerifyPins(t *testing.T) {
|
|
||||||
pins := []AcceptedPin{
|
|
||||||
{Pattern: "HPa", MustFire: true},
|
|
||||||
{Pattern: "HPb", MustFire: false},
|
|
||||||
}
|
|
||||||
res := VerifyPins(pins, []string{"HPa", "HPb"})
|
|
||||||
if !res[0].OK {
|
|
||||||
t.Errorf("HPa must_fire=true and it fired -> should be OK")
|
|
||||||
}
|
|
||||||
if res[1].OK {
|
|
||||||
t.Errorf("HPb must_fire=false but it fired -> should be VIOLATED")
|
|
||||||
}
|
|
||||||
res2 := VerifyPins(pins, []string{})
|
|
||||||
if res2[0].OK || !res2[1].OK {
|
|
||||||
t.Errorf("expected HPa violated + HPb ok, got %+v", res2)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func TestGenerateDedupPin(t *testing.T) {
|
|
||||||
pin := GenerateDedupPin(DedupCandidate{KeepPattern: "HP144", DropPattern: "HP013", Category: "electrical_hazard"})
|
|
||||||
if pin.Pattern != "HP013" || pin.MustFire {
|
|
||||||
t.Fatalf("want pin {HP013, must_fire=false}, got %+v", pin)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// TestWarewashing_AcceptedPins re-checks every accepted P1 supersession against the
|
|
||||||
// live warewashing engine output. A future change that un-suppresses HP013/016/018
|
|
||||||
// or drops HP2201/HP144 breaks a pin here — the gate compounds, not erodes.
|
|
||||||
func TestWarewashing_AcceptedPins(t *testing.T) {
|
|
||||||
raw, err := os.ReadFile(filepath.Join("testdata", "accepted_pins_warewashing.json"))
|
|
||||||
if err != nil {
|
|
||||||
t.Fatalf("read pins: %v", err)
|
|
||||||
}
|
|
||||||
var ps PinSet
|
|
||||||
if err := json.Unmarshal(raw, &ps); err != nil {
|
|
||||||
t.Fatalf("parse pins: %v", err)
|
|
||||||
}
|
|
||||||
|
|
||||||
_, _, kept := warewashingEngineOutput()
|
|
||||||
firedIDs := make([]string, 0, len(kept))
|
|
||||||
for _, pm := range kept {
|
|
||||||
firedIDs = append(firedIDs, pm.PatternID)
|
|
||||||
}
|
|
||||||
|
|
||||||
ok := 0
|
|
||||||
for _, r := range VerifyPins(ps.Pins, firedIDs) {
|
|
||||||
if r.OK {
|
|
||||||
ok++
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
t.Errorf("PIN VIOLATED: %s (must_fire=%v) — %s [%s]", r.Pin.Pattern, r.Pin.MustFire, r.Detail, r.Pin.Reason)
|
|
||||||
}
|
|
||||||
t.Logf("accepted pins for %q: %d/%d hold", ps.Machine, ok, len(ps.Pins))
|
|
||||||
}
|
|
||||||
@@ -1,10 +0,0 @@
|
|||||||
{
|
|
||||||
"machine": "Gewerbliche Untertisch-Geschirrspuelmaschine (vernetzt)",
|
|
||||||
"pins": [
|
|
||||||
{"pattern": "HP016", "must_fire": false, "reason": "generic hot-surface (Formwerkzeuge/Auspuffleitung framing) superseded by HP2201", "from_proposal": "P1 thermal supersession"},
|
|
||||||
{"pattern": "HP018", "must_fire": false, "reason": "actuator-burn superseded by HP2201", "from_proposal": "P1 thermal supersession"},
|
|
||||||
{"pattern": "HP013", "must_fire": false, "reason": "stored-energy Batterie/USV framing superseded by HP144", "from_proposal": "P1 stored-energy supersession"},
|
|
||||||
{"pattern": "HP2201", "must_fire": true, "reason": "warewashing hot-surface (Boiler/Tank/Spuelkammer) must remain — it is the clean equivalent that replaces HP016/HP018", "from_proposal": "P1 thermal supersession"},
|
|
||||||
{"pattern": "HP144", "must_fire": true, "reason": "residual-voltage (Frequenzumrichter/Zwischenkreis) must remain — clean equivalent that replaces HP013", "from_proposal": "P1 stored-energy supersession"}
|
|
||||||
]
|
|
||||||
}
|
|
||||||
@@ -18,10 +18,7 @@ func TestGuidanceFixE2E(t *testing.T) {
|
|||||||
t.Skip("set RUN_E2E=1 + QDRANT_URL/OLLAMA_URL to run")
|
t.Skip("set RUN_E2E=1 + QDRANT_URL/OLLAMA_URL to run")
|
||||||
}
|
}
|
||||||
c := NewLegalRAGClient()
|
c := NewLegalRAGClient()
|
||||||
coll := os.Getenv("E2E_COLLECTION")
|
coll := "bp_compliance_kb_2026_1_build"
|
||||||
if coll == "" {
|
|
||||||
coll = "bp_compliance_kb_2026_1_build"
|
|
||||||
}
|
|
||||||
cases := []struct{ id, q, expect string }{
|
cases := []struct{ id, q, expect string }{
|
||||||
{"GQ-0012", "Welche neun Kriterien nennt WP248 fuer ein voraussichtlich hohes Risiko?", "WP248"},
|
{"GQ-0012", "Welche neun Kriterien nennt WP248 fuer ein voraussichtlich hohes Risiko?", "WP248"},
|
||||||
{"GQ-0013", "Ab wie vielen der WP248-Kriterien ist in der Regel eine Datenschutz-Folgenabschaetzung erforderlich?", "WP248"},
|
{"GQ-0013", "Ab wie vielen der WP248-Kriterien ist in der Regel eine Datenschutz-Folgenabschaetzung erforderlich?", "WP248"},
|
||||||
@@ -86,10 +83,7 @@ func TestBenchE2E(t *testing.T) {
|
|||||||
t.Fatal(err)
|
t.Fatal(err)
|
||||||
}
|
}
|
||||||
c := NewLegalRAGClient()
|
c := NewLegalRAGClient()
|
||||||
coll := os.Getenv("E2E_COLLECTION")
|
coll := "bp_compliance_kb_2026_1_build"
|
||||||
if coll == "" {
|
|
||||||
coll = "bp_compliance_kb_2026_1_build"
|
|
||||||
}
|
|
||||||
fmt.Printf("### BENCH n=%d hybrid=%v\n", len(bench.Questions), os.Getenv("RAG_HYBRID_SEARCH") != "false")
|
fmt.Printf("### BENCH n=%d hybrid=%v\n", len(bench.Questions), os.Getenv("RAG_HYBRID_SEARCH") != "false")
|
||||||
for _, q := range bench.Questions {
|
for _, q := range bench.Questions {
|
||||||
res, err := c.SearchCollection(context.Background(), coll, q.Question, nil, 8)
|
res, err := c.SearchCollection(context.Background(), coll, q.Question, nil, 8)
|
||||||
|
|||||||
@@ -1,52 +0,0 @@
|
|||||||
package ucca
|
|
||||||
|
|
||||||
import "strings"
|
|
||||||
|
|
||||||
// kbScopeTopics are high-precision data-protection / compliance topic markers that place a query in
|
|
||||||
// the KB-2026.1 authoritative slice even when it does NOT name a regulation. Conservative by design:
|
|
||||||
// an unmatched query falls back to the broad CE default (no regression) — the slice is only used when
|
|
||||||
// the query is confidently in-scope.
|
|
||||||
var kbScopeTopics = []string{
|
|
||||||
// DP-Guidance-Marker, die IN der Slice liegen (EDPB/DSK/WP/GL) — bewusst NICHT die generischen
|
|
||||||
// Verben aus guidanceIntentSignals (sagt/laut/empfiehlt/auslegung) und NICHT enisa/bsi/nist/owasp
|
|
||||||
// (die liegen im breiten CE-Pool, nicht in der Slice).
|
|
||||||
"edpb", "dsk", "datenschutzausschuss", "orientierungshilfe",
|
|
||||||
"wp2", "wp 2", "wp29", "working paper", "gl 0",
|
|
||||||
"datenschutz", "dsgvo", "gdpr", "dsfa", "folgenabschätzung", "folgenabschaetzung",
|
|
||||||
"einwilligung", "auftragsverarbeit", "betroffenenrecht", "auskunftsrecht",
|
|
||||||
"verarbeitungsverzeichnis", "datenschutzbeauftragt", "verzeichnis von verarbeitung",
|
|
||||||
"cookie", "tracking", "transparenzpflicht", "datenpanne", "meldepflicht",
|
|
||||||
"technische und organisatorische maßnahmen",
|
|
||||||
"cyber resilience", "schwachstelle", "vulnerability", "sicherheitsupdate",
|
|
||||||
"maschinensicherheit", "wesentliche veränderung", "wesentliche veraenderung",
|
|
||||||
"konformitätsbewertung", "konformitaetsbewertung", "ce-kennzeichnung",
|
|
||||||
}
|
|
||||||
|
|
||||||
// inKBScope reports whether the query belongs to the KB-2026.1 authoritative slice. True when it
|
|
||||||
// names an in-slice regulation (detectRegulations), asks for guidance (EDPB/DSK/WP/GL), or hits a
|
|
||||||
// data-protection / compliance topic marker.
|
|
||||||
func inKBScope(query string) bool {
|
|
||||||
if len(detectRegulations(query)) > 0 {
|
|
||||||
return true
|
|
||||||
}
|
|
||||||
q := strings.ToLower(query)
|
|
||||||
for _, t := range kbScopeTopics {
|
|
||||||
if strings.Contains(q, t) {
|
|
||||||
return true
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return false
|
|
||||||
}
|
|
||||||
|
|
||||||
// resolveCollection applies the Blue-Green „authoritative slice promotion" routing. An explicitly
|
|
||||||
// requested collection is honoured unchanged; the DEFAULT (empty) request is routed to the KB-2026.1
|
|
||||||
// slice when the query is in-scope, else to the broad CE default. Disable via RAG_KB_SCOPE_ROUTING=false.
|
|
||||||
func (c *LegalRAGClient) resolveCollection(query, requested string) string {
|
|
||||||
if requested != "" {
|
|
||||||
return requested
|
|
||||||
}
|
|
||||||
if c.kbScopeRoutingEnabled && c.kbSliceCollection != "" && inKBScope(query) {
|
|
||||||
return c.kbSliceCollection
|
|
||||||
}
|
|
||||||
return c.collection
|
|
||||||
}
|
|
||||||
@@ -1,101 +0,0 @@
|
|||||||
package ucca
|
|
||||||
|
|
||||||
import (
|
|
||||||
"context"
|
|
||||||
"fmt"
|
|
||||||
"os"
|
|
||||||
"strings"
|
|
||||||
"testing"
|
|
||||||
)
|
|
||||||
|
|
||||||
func TestInKBScope(t *testing.T) {
|
|
||||||
inScope := []string{
|
|
||||||
"Welche neun Kriterien nennt WP248 fuer ein hohes Risiko?",
|
|
||||||
"Wie greifen CRA und Maschinenverordnung bei einer vernetzten Maschine ineinander?",
|
|
||||||
"Wann ist eine Datenschutz-Folgenabschaetzung erforderlich?",
|
|
||||||
"Welche Anforderungen stellt die DSGVO an die Einwilligung?",
|
|
||||||
"Brauche ich einen Datenschutzbeauftragten?",
|
|
||||||
"Wann muss eine aktiv ausgenutzte Schwachstelle gemeldet werden?",
|
|
||||||
}
|
|
||||||
outScope := []string{
|
|
||||||
"Welche OWASP-Kontrollen gibt es fuer Authentifizierung?",
|
|
||||||
"Was sagt NIST SP 800-53 zu Access Control?",
|
|
||||||
"Wie funktioniert ISO 27001 Zertifizierung?",
|
|
||||||
"Welche IFRS-Standards gelten fuer Leasing?",
|
|
||||||
}
|
|
||||||
for _, q := range inScope {
|
|
||||||
if !inKBScope(q) {
|
|
||||||
t.Errorf("inKBScope(%q) = false, want true", q)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
for _, q := range outScope {
|
|
||||||
if inKBScope(q) {
|
|
||||||
t.Errorf("inKBScope(%q) = true, want false", q)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func TestResolveCollection(t *testing.T) {
|
|
||||||
c := &LegalRAGClient{collection: "bp_compliance_ce", kbSliceCollection: "kb_2026_1_build", kbScopeRoutingEnabled: true}
|
|
||||||
if got := c.resolveCollection("Welche Kriterien nennt WP248?", ""); got != "kb_2026_1_build" {
|
|
||||||
t.Errorf("in-scope default -> %s, want kb_2026_1_build", got)
|
|
||||||
}
|
|
||||||
if got := c.resolveCollection("Was sagt NIST SP 800-53?", ""); got != "bp_compliance_ce" {
|
|
||||||
t.Errorf("out-of-scope default -> %s, want bp_compliance_ce", got)
|
|
||||||
}
|
|
||||||
if got := c.resolveCollection("Welche Kriterien nennt WP248?", "explicit_coll"); got != "explicit_coll" {
|
|
||||||
t.Errorf("explicit request must be honoured -> %s", got)
|
|
||||||
}
|
|
||||||
c.kbScopeRoutingEnabled = false
|
|
||||||
if got := c.resolveCollection("Welche Kriterien nennt WP248?", ""); got != "bp_compliance_ce" {
|
|
||||||
t.Errorf("disabled routing -> %s, want bp_compliance_ce", got)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// TestKBScopeRoutingE2E (RUN_E2E=1) verifies the routing against the REAL collections: a default
|
|
||||||
// Search() of an in-scope query must hit the KB-2026.1 slice (WP248/MaschVO live there but NOT in
|
|
||||||
// the broad CE pool = clean discriminator); an out-of-scope query stays on CE.
|
|
||||||
func TestKBScopeRoutingE2E(t *testing.T) {
|
|
||||||
if os.Getenv("RUN_E2E") != "1" {
|
|
||||||
t.Skip("set RUN_E2E=1 + QDRANT_URL/OLLAMA_URL/QDRANT_API_KEY")
|
|
||||||
}
|
|
||||||
c := NewLegalRAGClient()
|
|
||||||
cases := []struct {
|
|
||||||
q string
|
|
||||||
wantToken string // expected in top-8 when routed to the slice
|
|
||||||
wantInKB bool
|
|
||||||
}{
|
|
||||||
{"Welche neun Kriterien nennt WP248 fuer ein voraussichtlich hohes Risiko?", "WP248", true},
|
|
||||||
{"Welche grundlegenden Sicherheits- und Gesundheitsschutzanforderungen enthaelt Anhang III der Maschinenverordnung?", "MASCH", true},
|
|
||||||
{"Wie greifen CRA und Maschinenverordnung bei einer vernetzten Maschine ineinander?", "MASCH", true},
|
|
||||||
{"Was sagt NIST SP 800-53 zu Access Control?", "", false},
|
|
||||||
}
|
|
||||||
for _, tc := range cases {
|
|
||||||
routed := c.resolveCollection(tc.q, "")
|
|
||||||
res, err := c.Search(context.Background(), tc.q, nil, 8)
|
|
||||||
if err != nil {
|
|
||||||
t.Fatalf("%q: %v", tc.q, err)
|
|
||||||
}
|
|
||||||
codes := map[string]bool{}
|
|
||||||
for _, r := range res {
|
|
||||||
codes[strings.ToUpper(r.RegulationCode)] = true
|
|
||||||
}
|
|
||||||
hit := false
|
|
||||||
if tc.wantToken != "" {
|
|
||||||
for cd := range codes {
|
|
||||||
if strings.Contains(cd, tc.wantToken) {
|
|
||||||
hit = true
|
|
||||||
break
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
col := make([]string, 0, len(codes))
|
|
||||||
for cd := range codes {
|
|
||||||
col = append(col, cd)
|
|
||||||
}
|
|
||||||
fmt.Printf("inKB=%-5v routed=%-16s wantTok=%-6s found=%-5v | %v\n", tc.wantInKB, routed, tc.wantToken, hit, col)
|
|
||||||
if tc.wantInKB && tc.wantToken != "" && !hit {
|
|
||||||
t.Errorf("%q routed to %s but %s not in top-8 (slice not active?)", tc.q, routed, tc.wantToken)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
@@ -21,12 +21,6 @@ type LegalRAGClient struct {
|
|||||||
textIndexEnsured map[string]bool
|
textIndexEnsured map[string]bool
|
||||||
hybridEnabled bool
|
hybridEnabled bool
|
||||||
graphEnabled bool
|
graphEnabled bool
|
||||||
|
|
||||||
// Blue-Green „authoritative slice promotion" (additiv, KEIN CE-Ersatz): faellt eine Query
|
|
||||||
// in den KB-2026.1-Scope (DP/CRA/MaschVO/NIS2/DataAct/DORA/AIAct + EDPB/DSK-Guidance), wird
|
|
||||||
// die hochwertige Slice-Collection abgefragt; sonst bleibt der breite Default (bp_compliance_ce).
|
|
||||||
kbSliceCollection string
|
|
||||||
kbScopeRoutingEnabled bool
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// NewLegalRAGClient creates a new Legal RAG client using Ollama bge-m3 embeddings.
|
// NewLegalRAGClient creates a new Legal RAG client using Ollama bge-m3 embeddings.
|
||||||
@@ -51,14 +45,6 @@ func NewLegalRAGClient() *LegalRAGClient {
|
|||||||
// zur Begruendung/Vollstaendigkeit genutzt, nicht zur Pool-Expansion (Default).
|
// zur Begruendung/Vollstaendigkeit genutzt, nicht zur Pool-Expansion (Default).
|
||||||
graphEnabled := os.Getenv("RAG_GRAPH_EXPANSION") == "true"
|
graphEnabled := os.Getenv("RAG_GRAPH_EXPANSION") == "true"
|
||||||
|
|
||||||
// KB-2026.1 authoritative slice (Blue-Green, additiv). Routing default AN; Rollback ohne
|
|
||||||
// Redeploy ueber RAG_KB_SCOPE_ROUTING=false (dann faellt alles auf den CE-Default zurueck).
|
|
||||||
kbSlice := os.Getenv("RAG_KB_SLICE_COLLECTION")
|
|
||||||
if kbSlice == "" {
|
|
||||||
kbSlice = "kb_2026_1_build"
|
|
||||||
}
|
|
||||||
kbScopeRouting := os.Getenv("RAG_KB_SCOPE_ROUTING") != "false"
|
|
||||||
|
|
||||||
return &LegalRAGClient{
|
return &LegalRAGClient{
|
||||||
qdrantURL: qdrantURL,
|
qdrantURL: qdrantURL,
|
||||||
qdrantAPIKey: qdrantAPIKey,
|
qdrantAPIKey: qdrantAPIKey,
|
||||||
@@ -68,8 +54,6 @@ func NewLegalRAGClient() *LegalRAGClient {
|
|||||||
textIndexEnsured: make(map[string]bool),
|
textIndexEnsured: make(map[string]bool),
|
||||||
hybridEnabled: hybridEnabled,
|
hybridEnabled: hybridEnabled,
|
||||||
graphEnabled: graphEnabled,
|
graphEnabled: graphEnabled,
|
||||||
kbSliceCollection: kbSlice,
|
|
||||||
kbScopeRoutingEnabled: kbScopeRouting,
|
|
||||||
httpClient: &http.Client{
|
httpClient: &http.Client{
|
||||||
Timeout: 60 * time.Second,
|
Timeout: 60 * time.Second,
|
||||||
},
|
},
|
||||||
@@ -79,32 +63,21 @@ func NewLegalRAGClient() *LegalRAGClient {
|
|||||||
// SearchCollection queries a specific Qdrant collection for relevant passages.
|
// SearchCollection queries a specific Qdrant collection for relevant passages.
|
||||||
// If collection is empty, it falls back to the default collection (bp_compliance_ce).
|
// If collection is empty, it falls back to the default collection (bp_compliance_ce).
|
||||||
func (c *LegalRAGClient) SearchCollection(ctx context.Context, collection string, query string, regulationIDs []string, topK int) ([]LegalSearchResult, error) {
|
func (c *LegalRAGClient) SearchCollection(ctx context.Context, collection string, query string, regulationIDs []string, topK int) ([]LegalSearchResult, error) {
|
||||||
return c.searchInternal(ctx, c.resolveCollection(query, collection), query, regulationIDs, topK)
|
if collection == "" {
|
||||||
|
collection = c.collection
|
||||||
|
}
|
||||||
|
return c.searchInternal(ctx, collection, query, regulationIDs, topK)
|
||||||
}
|
}
|
||||||
|
|
||||||
// Search queries the compliance corpus for relevant passages. The target collection is resolved by
|
// Search queries the compliance CE corpus for relevant passages.
|
||||||
// the Blue-Green slice routing: the KB-2026.1 slice for in-scope queries, else the broad CE default.
|
|
||||||
func (c *LegalRAGClient) Search(ctx context.Context, query string, regulationIDs []string, topK int) ([]LegalSearchResult, error) {
|
func (c *LegalRAGClient) Search(ctx context.Context, query string, regulationIDs []string, topK int) ([]LegalSearchResult, error) {
|
||||||
return c.searchInternal(ctx, c.resolveCollection(query, ""), query, regulationIDs, topK)
|
return c.searchInternal(ctx, c.collection, query, regulationIDs, topK)
|
||||||
}
|
}
|
||||||
|
|
||||||
// searchInternal performs the actual search against a given collection.
|
// searchInternal performs the actual search against a given collection.
|
||||||
// If hybrid search is enabled, it uses the Qdrant Query API with RRF fusion
|
// If hybrid search is enabled, it uses the Qdrant Query API with RRF fusion
|
||||||
// (dense + full-text). Falls back to dense-only /points/search on failure.
|
// (dense + full-text). Falls back to dense-only /points/search on failure.
|
||||||
func (c *LegalRAGClient) searchInternal(ctx context.Context, collection string, query string, regulationIDs []string, topK int) ([]LegalSearchResult, error) {
|
func (c *LegalRAGClient) searchInternal(ctx context.Context, collection string, query string, regulationIDs []string, topK int) ([]LegalSearchResult, error) {
|
||||||
// Multi-Regulation-Retrieval: nennt die Query EXPLIZIT >=2 Regelwerke (z.B. "CRA und
|
|
||||||
// Maschinenverordnung"), wird pro Regelwerk separat retrieved + gemergt, damit BEIDE
|
|
||||||
// Domaenen im Prompt landen statt nur der keyword-dominanten. Generisch (Query->Regelwerke,
|
|
||||||
// keine doc-spezifische Logik); nur wenn der Caller nicht ohnehin schon auf Regulierungen
|
|
||||||
// filtert. Best-effort: leeres/fehlerhaftes Multi-Ergebnis faellt auf die Standardsuche zurueck.
|
|
||||||
if len(regulationIDs) == 0 {
|
|
||||||
if regs := detectRegulations(query); len(regs) >= 2 {
|
|
||||||
if mr, mErr := c.searchMultiRegulation(ctx, collection, query, regs, topK); mErr == nil && len(mr) > 0 {
|
|
||||||
return mr, nil
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
embedding, err := c.generateEmbedding(ctx, query)
|
embedding, err := c.generateEmbedding(ctx, query)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, fmt.Errorf("failed to generate embedding: %w", err)
|
return nil, fmt.Errorf("failed to generate embedding: %w", err)
|
||||||
@@ -150,7 +123,43 @@ func (c *LegalRAGClient) searchInternal(ctx context.Context, collection string,
|
|||||||
hits = c.expandViaGraph(ctx, collection, hits)
|
hits = c.expandViaGraph(ctx, collection, hits)
|
||||||
}
|
}
|
||||||
|
|
||||||
results := hitsToResults(hits)
|
results := make([]LegalSearchResult, len(hits))
|
||||||
|
for i, hit := range hits {
|
||||||
|
// Legal-Metadaten nach rag_reingest_spec.md §2: bevorzugt die normalisierten Felder
|
||||||
|
// (article_label/regulation_code/article/...); Fallback auf alte Feldnamen, solange der
|
||||||
|
// Korpus noch nicht re-ingestiert ist (regulation_id, section="§ 38").
|
||||||
|
regCode := getString(hit.Payload, "regulation_code")
|
||||||
|
if regCode == "" {
|
||||||
|
regCode = getString(hit.Payload, "regulation_id")
|
||||||
|
}
|
||||||
|
article := getString(hit.Payload, "article")
|
||||||
|
if article == "" {
|
||||||
|
article = getString(hit.Payload, "section")
|
||||||
|
}
|
||||||
|
results[i] = LegalSearchResult{
|
||||||
|
Text: getString(hit.Payload, "chunk_text"),
|
||||||
|
RegulationCode: regCode,
|
||||||
|
RegulationName: getString(hit.Payload, "regulation_name_de"),
|
||||||
|
RegulationShort: getString(hit.Payload, "regulation_short"),
|
||||||
|
Category: getString(hit.Payload, "category"),
|
||||||
|
ArticleLabel: getString(hit.Payload, "article_label"),
|
||||||
|
Article: article,
|
||||||
|
Paragraph: getString(hit.Payload, "paragraph"),
|
||||||
|
Sub: getString(hit.Payload, "sub"),
|
||||||
|
IsRecital: getBool(hit.Payload, "is_recital"),
|
||||||
|
CitationStyle: getString(hit.Payload, "citation_style"),
|
||||||
|
Pages: getIntSlice(hit.Payload, "pages"),
|
||||||
|
SourceURL: getString(hit.Payload, "source"),
|
||||||
|
Score: hit.Score,
|
||||||
|
AuthorityWeight: getInt(hit.Payload, "authority_weight"),
|
||||||
|
SourceClass: getString(hit.Payload, "source_class"),
|
||||||
|
Jurisdiction: getString(hit.Payload, "jurisdiction"),
|
||||||
|
CitationUnit: getString(hit.Payload, "citation_unit"),
|
||||||
|
ReferencesOut: getStringSlice(hit.Payload, "references_out"),
|
||||||
|
ReferencesIn: getStringSlice(hit.Payload, "references_in"),
|
||||||
|
Superseded: getString(hit.Payload, "status") == "superseded",
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// Authority-aware Re-Ranking: bindendes Recht der passenden Jurisdiktion/Domaene nach
|
// Authority-aware Re-Ranking: bindendes Recht der passenden Jurisdiktion/Domaene nach
|
||||||
// oben, Guidance/Fremdrecht/Off-Domain runter (nichts wird geloescht). Reihenfolge only,
|
// oben, Guidance/Fremdrecht/Off-Domain runter (nichts wird geloescht). Reihenfolge only,
|
||||||
|
|||||||
@@ -122,14 +122,12 @@ func (c *LegalRAGClient) searchHybrid(ctx context.Context, collection string, em
|
|||||||
}
|
}
|
||||||
|
|
||||||
if len(regulationIDs) > 0 {
|
if len(regulationIDs) > 0 {
|
||||||
// Match BOTH the legacy field (regulation_id) and the normalized field
|
conditions := make([]qdrantCondition, len(regulationIDs))
|
||||||
// (regulation_code) so per-regulation filtering works on the re-ingested corpus too.
|
for i, regID := range regulationIDs {
|
||||||
conditions := make([]qdrantCondition, 0, len(regulationIDs)*2)
|
conditions[i] = qdrantCondition{
|
||||||
for _, regID := range regulationIDs {
|
Key: "regulation_id",
|
||||||
conditions = append(conditions,
|
Match: qdrantMatch{Value: regID},
|
||||||
qdrantCondition{Key: "regulation_id", Match: qdrantMatch{Value: regID}},
|
}
|
||||||
qdrantCondition{Key: "regulation_code", Match: qdrantMatch{Value: regID}},
|
|
||||||
)
|
|
||||||
}
|
}
|
||||||
queryReq.Filter = &qdrantFilter{Should: conditions}
|
queryReq.Filter = &qdrantFilter{Should: conditions}
|
||||||
}
|
}
|
||||||
@@ -177,14 +175,12 @@ func (c *LegalRAGClient) searchDense(ctx context.Context, collection string, emb
|
|||||||
}
|
}
|
||||||
|
|
||||||
if len(regulationIDs) > 0 {
|
if len(regulationIDs) > 0 {
|
||||||
// Match BOTH the legacy field (regulation_id) and the normalized field
|
conditions := make([]qdrantCondition, len(regulationIDs))
|
||||||
// (regulation_code) so per-regulation filtering works on the re-ingested corpus too.
|
for i, regID := range regulationIDs {
|
||||||
conditions := make([]qdrantCondition, 0, len(regulationIDs)*2)
|
conditions[i] = qdrantCondition{
|
||||||
for _, regID := range regulationIDs {
|
Key: "regulation_id",
|
||||||
conditions = append(conditions,
|
Match: qdrantMatch{Value: regID},
|
||||||
qdrantCondition{Key: "regulation_id", Match: qdrantMatch{Value: regID}},
|
}
|
||||||
qdrantCondition{Key: "regulation_code", Match: qdrantMatch{Value: regID}},
|
|
||||||
)
|
|
||||||
}
|
}
|
||||||
searchReq.Filter = &qdrantFilter{Should: conditions}
|
searchReq.Filter = &qdrantFilter{Should: conditions}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,143 +0,0 @@
|
|||||||
package ucca
|
|
||||||
|
|
||||||
import (
|
|
||||||
"context"
|
|
||||||
"fmt"
|
|
||||||
"strings"
|
|
||||||
)
|
|
||||||
|
|
||||||
// multiRegMinPerRegulation is the minimum number of hits fetched per named regulation, so
|
|
||||||
// each domain is fairly represented even when topK/len(regs) would be tiny.
|
|
||||||
const multiRegMinPerRegulation = 3
|
|
||||||
|
|
||||||
// regulationCatalog maps a regulation to (a) the aliases that signal it is EXPLICITLY named
|
|
||||||
// in a query and (b) the regulation_code/regulation_id values used to filter the corpus.
|
|
||||||
// Deterministic + generic: a query naming >=2 regulations triggers per-regulation retrieval
|
|
||||||
// so a cross-regulation question returns every named domain — NOT a doc-specific rule.
|
|
||||||
var regulationCatalog = []struct {
|
|
||||||
Canonical string
|
|
||||||
Aliases []string
|
|
||||||
CodeValues []string
|
|
||||||
}{
|
|
||||||
{"CRA", []string{"cra", "cyber resilience"}, []string{"CRA"}},
|
|
||||||
{"MaschVO", []string{"maschinenverordnung", "maschvo", "machinery regulation"}, []string{"MASCHVO", "MaschVO"}},
|
|
||||||
{"NIS2", []string{"nis2", "nis-2", "nis 2"}, []string{"NIS2"}},
|
|
||||||
{"DORA", []string{"dora"}, []string{"DORA"}},
|
|
||||||
{"Data Act", []string{"data act", "datengesetz"}, []string{"DATA ACT", "DataAct"}},
|
|
||||||
{"AI Act", []string{"ai act", "ki-vo", "ki-verordnung", "ai-verordnung"}, []string{"AI ACT", "AIAct"}},
|
|
||||||
{"DSGVO", []string{"dsgvo", "gdpr"}, []string{"DSGVO"}},
|
|
||||||
{"TDDDG", []string{"tdddg"}, []string{"TDDDG"}},
|
|
||||||
{"BDSG", []string{"bdsg"}, []string{"BDSG"}},
|
|
||||||
}
|
|
||||||
|
|
||||||
type detectedRegulation struct {
|
|
||||||
Canonical string
|
|
||||||
CodeValues []string
|
|
||||||
}
|
|
||||||
|
|
||||||
// detectRegulations returns the DISTINCT regulations explicitly named in the query. >=2 of
|
|
||||||
// them is the trigger for multi-regulation retrieval. Pure + deterministic, no LLM.
|
|
||||||
func detectRegulations(query string) []detectedRegulation {
|
|
||||||
q := strings.ToLower(query)
|
|
||||||
var out []detectedRegulation
|
|
||||||
for _, r := range regulationCatalog {
|
|
||||||
for _, a := range r.Aliases {
|
|
||||||
if strings.Contains(q, a) {
|
|
||||||
out = append(out, detectedRegulation{Canonical: r.Canonical, CodeValues: r.CodeValues})
|
|
||||||
break
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return out
|
|
||||||
}
|
|
||||||
|
|
||||||
func hitID(h qdrantSearchHit) string { return fmt.Sprintf("%v", h.ID) }
|
|
||||||
|
|
||||||
// searchMultiRegulation retrieves each explicitly-named regulation SEPARATELY (per-regulation
|
|
||||||
// filter) and merges, so a cross-regulation query ("Wie greifen CRA und MaschVO ineinander?")
|
|
||||||
// returns BOTH domains in the prompt instead of only the keyword-dominant one. Generic over any
|
|
||||||
// named pair (DSGVO+TDDDG, CRA+NIS2, DORA+NIS2, AI Act+DSGVO, ...). The merged pool is
|
|
||||||
// authority-reranked once. Pure pool-construction; topK contract preserved.
|
|
||||||
func (c *LegalRAGClient) searchMultiRegulation(ctx context.Context, collection, query string, regs []detectedRegulation, topK int) ([]LegalSearchResult, error) {
|
|
||||||
embedding, err := c.generateEmbedding(ctx, query)
|
|
||||||
if err != nil {
|
|
||||||
return nil, fmt.Errorf("failed to generate embedding: %w", err)
|
|
||||||
}
|
|
||||||
perReg := topK / len(regs)
|
|
||||||
if perReg < multiRegMinPerRegulation {
|
|
||||||
perReg = multiRegMinPerRegulation
|
|
||||||
}
|
|
||||||
var merged []qdrantSearchHit
|
|
||||||
seen := make(map[string]bool)
|
|
||||||
for _, r := range regs {
|
|
||||||
var hits []qdrantSearchHit
|
|
||||||
if c.hybridEnabled {
|
|
||||||
if h, hErr := c.searchHybrid(ctx, collection, embedding, r.CodeValues, perReg); hErr == nil {
|
|
||||||
hits = h
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if hits == nil {
|
|
||||||
if h, dErr := c.searchDense(ctx, collection, embedding, r.CodeValues, perReg); dErr == nil {
|
|
||||||
hits = h
|
|
||||||
}
|
|
||||||
}
|
|
||||||
for _, h := range hits {
|
|
||||||
id := hitID(h)
|
|
||||||
if seen[id] {
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
seen[id] = true
|
|
||||||
merged = append(merged, h)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if len(merged) == 0 {
|
|
||||||
return nil, fmt.Errorf("multi-regulation search returned no hits")
|
|
||||||
}
|
|
||||||
results := hitsToResults(merged)
|
|
||||||
results = rerankByAuthority(query, results)
|
|
||||||
if topK > 0 && len(results) > topK {
|
|
||||||
results = results[:topK]
|
|
||||||
}
|
|
||||||
return results, nil
|
|
||||||
}
|
|
||||||
|
|
||||||
// hitsToResults maps raw Qdrant hits to LegalSearchResult, preferring the normalized payload
|
|
||||||
// fields (regulation_code/article_label/...) with fallback to the legacy names (regulation_id,
|
|
||||||
// section) while the corpus is mid-re-ingestion. Shared by searchInternal + searchMultiRegulation.
|
|
||||||
func hitsToResults(hits []qdrantSearchHit) []LegalSearchResult {
|
|
||||||
results := make([]LegalSearchResult, len(hits))
|
|
||||||
for i, hit := range hits {
|
|
||||||
regCode := getString(hit.Payload, "regulation_code")
|
|
||||||
if regCode == "" {
|
|
||||||
regCode = getString(hit.Payload, "regulation_id")
|
|
||||||
}
|
|
||||||
article := getString(hit.Payload, "article")
|
|
||||||
if article == "" {
|
|
||||||
article = getString(hit.Payload, "section")
|
|
||||||
}
|
|
||||||
results[i] = LegalSearchResult{
|
|
||||||
Text: getString(hit.Payload, "chunk_text"),
|
|
||||||
RegulationCode: regCode,
|
|
||||||
RegulationName: getString(hit.Payload, "regulation_name_de"),
|
|
||||||
RegulationShort: getString(hit.Payload, "regulation_short"),
|
|
||||||
Category: getString(hit.Payload, "category"),
|
|
||||||
ArticleLabel: getString(hit.Payload, "article_label"),
|
|
||||||
Article: article,
|
|
||||||
Paragraph: getString(hit.Payload, "paragraph"),
|
|
||||||
Sub: getString(hit.Payload, "sub"),
|
|
||||||
IsRecital: getBool(hit.Payload, "is_recital"),
|
|
||||||
CitationStyle: getString(hit.Payload, "citation_style"),
|
|
||||||
Pages: getIntSlice(hit.Payload, "pages"),
|
|
||||||
SourceURL: getString(hit.Payload, "source"),
|
|
||||||
Score: hit.Score,
|
|
||||||
AuthorityWeight: getInt(hit.Payload, "authority_weight"),
|
|
||||||
SourceClass: getString(hit.Payload, "source_class"),
|
|
||||||
Jurisdiction: getString(hit.Payload, "jurisdiction"),
|
|
||||||
CitationUnit: getString(hit.Payload, "citation_unit"),
|
|
||||||
ReferencesOut: getStringSlice(hit.Payload, "references_out"),
|
|
||||||
ReferencesIn: getStringSlice(hit.Payload, "references_in"),
|
|
||||||
Superseded: getString(hit.Payload, "status") == "superseded",
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return results
|
|
||||||
}
|
|
||||||
@@ -1,92 +0,0 @@
|
|||||||
package ucca
|
|
||||||
|
|
||||||
import (
|
|
||||||
"context"
|
|
||||||
"fmt"
|
|
||||||
"os"
|
|
||||||
"strings"
|
|
||||||
"testing"
|
|
||||||
)
|
|
||||||
|
|
||||||
// TestDetectRegulations is a pure unit test of the multi-regulation TRIGGER (no Qdrant):
|
|
||||||
// only an explicit naming of >=2 regulations enables multi-regulation retrieval. A single
|
|
||||||
// named regulation, or a topical question that doesn't name one, stays single-domain.
|
|
||||||
func TestDetectRegulations(t *testing.T) {
|
|
||||||
cases := []struct {
|
|
||||||
q string
|
|
||||||
want int
|
|
||||||
}{
|
|
||||||
{"Welche neun Kriterien nennt WP248 fuer ein voraussichtlich hohes Risiko?", 0},
|
|
||||||
{"Welche Anforderungen gelten fuer wesentliche Veraenderungen einer Maschine?", 0}, // "Maschine" != MaschVO
|
|
||||||
{"Benoetigt eine SPS ohne Netzwerkanschluss eine CRA-Bewertung?", 1}, // 1 -> single
|
|
||||||
{"Wie greifen CRA und Maschinenverordnung bei einer vernetzten Maschine ineinander?", 2},
|
|
||||||
{"Wie greifen DSGVO und TDDDG bei der Nutzung von Cookies ineinander?", 2},
|
|
||||||
{"Wie verhalten sich DORA und NIS2 fuer ein Finanzunternehmen?", 2},
|
|
||||||
{"Wie greifen AI Act und DSGVO bei einem KI-System ineinander?", 2},
|
|
||||||
}
|
|
||||||
for _, c := range cases {
|
|
||||||
if got := len(detectRegulations(c.q)); got != c.want {
|
|
||||||
t.Errorf("detectRegulations(%q) = %d, want %d", c.q, got, c.want)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// TestMultiRegE2E (RUN_E2E=1) verifies against the build collection that an explicit
|
|
||||||
// cross-regulation query returns BOTH named domains in the top-K — the core acceptance
|
|
||||||
// gate for multi-regulation retrieval.
|
|
||||||
func TestMultiRegE2E(t *testing.T) {
|
|
||||||
if os.Getenv("RUN_E2E") != "1" {
|
|
||||||
t.Skip("set RUN_E2E=1 + QDRANT_URL/OLLAMA_URL")
|
|
||||||
}
|
|
||||||
c := NewLegalRAGClient()
|
|
||||||
coll := os.Getenv("E2E_COLLECTION")
|
|
||||||
if coll == "" {
|
|
||||||
coll = "bp_compliance_kb_2026_1_build"
|
|
||||||
}
|
|
||||||
cases := []struct {
|
|
||||||
id string
|
|
||||||
q string
|
|
||||||
want []string
|
|
||||||
}{
|
|
||||||
{"GQ-0070 CRA+MaschVO", "Wie greifen CRA und Maschinenverordnung bei einer vernetzten Maschine ineinander?", []string{"CRA", "MASCH"}},
|
|
||||||
{"DSGVO+TDDDG", "Wie greifen DSGVO und TDDDG bei der Nutzung von Cookies und Tracking-Technologien ineinander?", []string{"DSGVO", "TDDDG"}},
|
|
||||||
{"CRA+NIS2", "Wie verhalten sich CRA und NIS2 bei einem vernetzten Produkt eines wichtigen Unternehmens zueinander?", []string{"CRA", "NIS2"}},
|
|
||||||
{"DORA+NIS2", "Wie greifen DORA und NIS2 bei einem Finanzunternehmen ineinander?", []string{"DORA", "NIS2"}},
|
|
||||||
{"AI Act+DSGVO", "Wie greifen AI Act und DSGVO bei einem KI-System ineinander, das personenbezogene Daten verarbeitet?", []string{"AI ACT", "DSGVO"}},
|
|
||||||
}
|
|
||||||
for _, tc := range cases {
|
|
||||||
res, err := c.SearchCollection(context.Background(), coll, tc.q, nil, 8)
|
|
||||||
if err != nil {
|
|
||||||
t.Fatalf("%s: %v", tc.id, err)
|
|
||||||
}
|
|
||||||
present := map[string]bool{}
|
|
||||||
for _, r := range res {
|
|
||||||
present[strings.ToUpper(r.RegulationCode)] = true
|
|
||||||
}
|
|
||||||
ok := true
|
|
||||||
for _, w := range tc.want {
|
|
||||||
found := false
|
|
||||||
for cd := range present {
|
|
||||||
if strings.Contains(cd, w) {
|
|
||||||
found = true
|
|
||||||
break
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if !found {
|
|
||||||
ok = false
|
|
||||||
}
|
|
||||||
}
|
|
||||||
codes := make([]string, 0, len(present))
|
|
||||||
for cd := range present {
|
|
||||||
codes = append(codes, cd)
|
|
||||||
}
|
|
||||||
status := "OK"
|
|
||||||
if !ok {
|
|
||||||
status = "FAIL"
|
|
||||||
}
|
|
||||||
fmt.Printf("%-22s want=%v present=%v %s\n", tc.id, tc.want, codes, status)
|
|
||||||
if !ok {
|
|
||||||
t.Errorf("%s: not all named regulations in top-8 (want %v, got %v)", tc.id, tc.want, codes)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
@@ -162,7 +162,7 @@ async def update_ai_system(
|
|||||||
db: Session = Depends(get_db),
|
db: Session = Depends(get_db),
|
||||||
):
|
):
|
||||||
"""Update an AI system."""
|
"""Update an AI system."""
|
||||||
from datetime import datetime, timezone
|
from datetime import datetime
|
||||||
|
|
||||||
system = db.query(AISystemDB).filter(AISystemDB.id == system_id).first()
|
system = db.query(AISystemDB).filter(AISystemDB.id == system_id).first()
|
||||||
if not system:
|
if not system:
|
||||||
@@ -226,7 +226,7 @@ async def assess_ai_system(
|
|||||||
db: Session = Depends(get_db),
|
db: Session = Depends(get_db),
|
||||||
):
|
):
|
||||||
"""Run AI Act risk assessment for an AI system."""
|
"""Run AI Act risk assessment for an AI system."""
|
||||||
from datetime import datetime, timezone
|
from datetime import datetime
|
||||||
|
|
||||||
system = db.query(AISystemDB).filter(AISystemDB.id == system_id).first()
|
system = db.query(AISystemDB).filter(AISystemDB.id == system_id).first()
|
||||||
if not system:
|
if not system:
|
||||||
|
|||||||
@@ -47,8 +47,6 @@ from compliance.services.canonical_control_service import (
|
|||||||
_control_row, # re-exported for legacy test imports
|
_control_row, # re-exported for legacy test imports
|
||||||
)
|
)
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
|
||||||
|
|
||||||
router = APIRouter(prefix="/v1/canonical", tags=["canonical-controls"])
|
router = APIRouter(prefix="/v1/canonical", tags=["canonical-controls"])
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -14,7 +14,7 @@ Endpoints:
|
|||||||
"""
|
"""
|
||||||
|
|
||||||
import logging
|
import logging
|
||||||
from datetime import datetime, date, timedelta, timezone
|
from datetime import datetime, date, timedelta
|
||||||
from calendar import month_abbr
|
from calendar import month_abbr
|
||||||
from typing import Optional, Dict, Any, List
|
from typing import Optional, Dict, Any, List
|
||||||
from decimal import Decimal
|
from decimal import Decimal
|
||||||
|
|||||||
@@ -26,11 +26,10 @@ versions). Module-level helpers re-exported for legacy tests.
|
|||||||
import logging
|
import logging
|
||||||
from typing import Any, List, Optional
|
from typing import Any, List, Optional
|
||||||
|
|
||||||
from fastapi import APIRouter, Depends, HTTPException, Query
|
from fastapi import APIRouter, Depends, Query
|
||||||
from pydantic import BaseModel
|
from pydantic import BaseModel
|
||||||
from fastapi.responses import Response
|
from fastapi.responses import Response
|
||||||
from sqlalchemy.orm import Session
|
from sqlalchemy.orm import Session
|
||||||
from sqlalchemy import text
|
|
||||||
|
|
||||||
from classroom_engine.database import get_db
|
from classroom_engine.database import get_db
|
||||||
from compliance.api._http_errors import translate_domain_errors
|
from compliance.api._http_errors import translate_domain_errors
|
||||||
@@ -485,7 +484,6 @@ async def list_dsfas(
|
|||||||
async def create_dsfa(
|
async def create_dsfa(
|
||||||
request: DSFACreate,
|
request: DSFACreate,
|
||||||
tenant_id: Optional[str] = Query(None),
|
tenant_id: Optional[str] = Query(None),
|
||||||
db: Session = Depends(get_db),
|
|
||||||
service: DSFAService = Depends(get_dsfa_service),
|
service: DSFAService = Depends(get_dsfa_service),
|
||||||
) -> dict[str, Any]:
|
) -> dict[str, Any]:
|
||||||
"""Neue DSFA erstellen."""
|
"""Neue DSFA erstellen."""
|
||||||
|
|||||||
@@ -16,11 +16,6 @@ from the legacy path.
|
|||||||
"""
|
"""
|
||||||
|
|
||||||
import logging
|
import logging
|
||||||
import os
|
|
||||||
import json
|
|
||||||
import hashlib
|
|
||||||
import uuid as uuid_module
|
|
||||||
from datetime import datetime, timedelta
|
|
||||||
from typing import Any, Optional
|
from typing import Any, Optional
|
||||||
|
|
||||||
from fastapi import APIRouter, Depends, File, HTTPException, Query, UploadFile
|
from fastapi import APIRouter, Depends, File, HTTPException, Query, UploadFile
|
||||||
@@ -35,15 +30,14 @@ from ..db import (
|
|||||||
EvidenceConfidenceEnum,
|
EvidenceConfidenceEnum,
|
||||||
EvidenceTruthStatusEnum,
|
EvidenceTruthStatusEnum,
|
||||||
)
|
)
|
||||||
from ..db.models import EvidenceDB, AuditTrailDB
|
from ..db.models import EvidenceDB, ControlDB, AuditTrailDB
|
||||||
from ..services.auto_risk_updater import AutoRiskUpdater
|
from ..services.auto_risk_updater import AutoRiskUpdater
|
||||||
from ..services.evidence_service import EvidenceService, _update_risks as _update_risks_impl
|
from ..services.evidence_service import EvidenceService
|
||||||
from .schemas import (
|
from .schemas import (
|
||||||
EvidenceCreate, EvidenceResponse, EvidenceListResponse,
|
EvidenceCreate, EvidenceResponse, EvidenceListResponse,
|
||||||
EvidenceRejectRequest,
|
EvidenceRejectRequest,
|
||||||
)
|
)
|
||||||
from .audit_trail_utils import log_audit_trail
|
from .audit_trail_utils import log_audit_trail
|
||||||
from ._http_errors import translate_domain_errors
|
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
router = APIRouter(tags=["compliance-evidence"])
|
router = APIRouter(tags=["compliance-evidence"])
|
||||||
@@ -152,7 +146,6 @@ async def list_evidence(
|
|||||||
status: Optional[str] = None,
|
status: Optional[str] = None,
|
||||||
page: Optional[int] = Query(None, ge=1, description="Page number (1-based)"),
|
page: Optional[int] = Query(None, ge=1, description="Page number (1-based)"),
|
||||||
limit: Optional[int] = Query(None, ge=1, le=500, description="Items per page"),
|
limit: Optional[int] = Query(None, ge=1, le=500, description="Items per page"),
|
||||||
db: Session = Depends(get_db),
|
|
||||||
service: EvidenceService = Depends(get_evidence_service),
|
service: EvidenceService = Depends(get_evidence_service),
|
||||||
) -> EvidenceListResponse:
|
) -> EvidenceListResponse:
|
||||||
"""List evidence with optional filters and pagination."""
|
"""List evidence with optional filters and pagination."""
|
||||||
@@ -193,11 +186,9 @@ async def list_evidence(
|
|||||||
@router.post("/evidence", response_model=EvidenceResponse)
|
@router.post("/evidence", response_model=EvidenceResponse)
|
||||||
async def create_evidence(
|
async def create_evidence(
|
||||||
evidence_data: EvidenceCreate,
|
evidence_data: EvidenceCreate,
|
||||||
db: Session = Depends(get_db),
|
|
||||||
service: EvidenceService = Depends(get_evidence_service),
|
service: EvidenceService = Depends(get_evidence_service),
|
||||||
) -> EvidenceResponse:
|
) -> EvidenceResponse:
|
||||||
"""Create new evidence record."""
|
"""Create new evidence record."""
|
||||||
dsms_cid = None
|
|
||||||
repo = EvidenceRepository(db)
|
repo = EvidenceRepository(db)
|
||||||
|
|
||||||
# Get control UUID
|
# Get control UUID
|
||||||
@@ -266,7 +257,6 @@ async def create_evidence(
|
|||||||
@router.delete("/evidence/{evidence_id}")
|
@router.delete("/evidence/{evidence_id}")
|
||||||
async def delete_evidence(
|
async def delete_evidence(
|
||||||
evidence_id: str,
|
evidence_id: str,
|
||||||
db: Session = Depends(get_db),
|
|
||||||
service: EvidenceService = Depends(get_evidence_service),
|
service: EvidenceService = Depends(get_evidence_service),
|
||||||
) -> dict[str, Any]:
|
) -> dict[str, Any]:
|
||||||
"""Delete an evidence record."""
|
"""Delete an evidence record."""
|
||||||
@@ -285,7 +275,6 @@ async def upload_evidence(
|
|||||||
title: str = Query(...),
|
title: str = Query(...),
|
||||||
file: UploadFile = File(...),
|
file: UploadFile = File(...),
|
||||||
description: Optional[str] = Query(None),
|
description: Optional[str] = Query(None),
|
||||||
db: Session = Depends(get_db),
|
|
||||||
service: EvidenceService = Depends(get_evidence_service),
|
service: EvidenceService = Depends(get_evidence_service),
|
||||||
) -> EvidenceResponse:
|
) -> EvidenceResponse:
|
||||||
"""Upload evidence file."""
|
"""Upload evidence file."""
|
||||||
@@ -685,7 +674,6 @@ async def collect_ci_evidence(
|
|||||||
async def get_ci_evidence_status(
|
async def get_ci_evidence_status(
|
||||||
control_id: Optional[str] = Query(None, description="Filter by control ID"),
|
control_id: Optional[str] = Query(None, description="Filter by control ID"),
|
||||||
days: int = Query(30, description="Look back N days"),
|
days: int = Query(30, description="Look back N days"),
|
||||||
db: Session = Depends(get_db),
|
|
||||||
service: EvidenceService = Depends(get_evidence_service),
|
service: EvidenceService = Depends(get_evidence_service),
|
||||||
) -> dict[str, Any]:
|
) -> dict[str, Any]:
|
||||||
"""Get CI/CD evidence collection status overview."""
|
"""Get CI/CD evidence collection status overview."""
|
||||||
@@ -693,8 +681,70 @@ async def get_ci_evidence_status(
|
|||||||
return service.ci_status(control_id, days)
|
return service.ci_status(control_id, days)
|
||||||
|
|
||||||
|
|
||||||
# (Alte CI-Status-Implementierung entfernt — unerreichbarer Code nach `return
|
# ----------------------------------------------------------------------------
|
||||||
# service.ci_status(...)`; durch den Service ersetzt, `query` war nie initialisiert.)
|
# Legacy re-exports for tests that import helpers directly.
|
||||||
|
# ----------------------------------------------------------------------------
|
||||||
|
|
||||||
|
if control_id:
|
||||||
|
ctrl_repo = ControlRepository(db)
|
||||||
|
control = ctrl_repo.get_by_control_id(control_id)
|
||||||
|
if control:
|
||||||
|
query = query.filter(EvidenceDB.control_id == control.id)
|
||||||
|
|
||||||
|
evidence_list = query.order_by(EvidenceDB.collected_at.desc()).limit(100).all()
|
||||||
|
|
||||||
|
# Group by control and calculate stats
|
||||||
|
control_stats = defaultdict(lambda: {
|
||||||
|
"total": 0,
|
||||||
|
"valid": 0,
|
||||||
|
"failed": 0,
|
||||||
|
"last_collected": None,
|
||||||
|
"evidence": [],
|
||||||
|
})
|
||||||
|
|
||||||
|
for e in evidence_list:
|
||||||
|
# Get control_id string
|
||||||
|
control = db.query(ControlDB).filter(ControlDB.id == e.control_id).first()
|
||||||
|
ctrl_id = control.control_id if control else "unknown"
|
||||||
|
|
||||||
|
stats = control_stats[ctrl_id]
|
||||||
|
stats["total"] += 1
|
||||||
|
if e.status:
|
||||||
|
if e.status.value == "valid":
|
||||||
|
stats["valid"] += 1
|
||||||
|
elif e.status.value == "failed":
|
||||||
|
stats["failed"] += 1
|
||||||
|
if not stats["last_collected"] or e.collected_at > stats["last_collected"]:
|
||||||
|
stats["last_collected"] = e.collected_at
|
||||||
|
|
||||||
|
# Add evidence summary
|
||||||
|
stats["evidence"].append({
|
||||||
|
"id": e.id,
|
||||||
|
"type": e.evidence_type,
|
||||||
|
"status": e.status.value if e.status else None,
|
||||||
|
"collected_at": e.collected_at.isoformat() if e.collected_at else None,
|
||||||
|
"ci_job_id": e.ci_job_id,
|
||||||
|
})
|
||||||
|
|
||||||
|
# Convert to list and sort
|
||||||
|
result = []
|
||||||
|
for ctrl_id, stats in control_stats.items():
|
||||||
|
result.append({
|
||||||
|
"control_id": ctrl_id,
|
||||||
|
"total_evidence": stats["total"],
|
||||||
|
"valid_count": stats["valid"],
|
||||||
|
"failed_count": stats["failed"],
|
||||||
|
"last_collected": stats["last_collected"].isoformat() if stats["last_collected"] else None,
|
||||||
|
"recent_evidence": stats["evidence"][:5],
|
||||||
|
})
|
||||||
|
|
||||||
|
result.sort(key=lambda x: x["last_collected"] or "", reverse=True)
|
||||||
|
|
||||||
|
return {
|
||||||
|
"period_days": days,
|
||||||
|
"total_evidence": len(evidence_list),
|
||||||
|
"controls": result,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
# ============================================================================
|
# ============================================================================
|
||||||
@@ -722,7 +772,6 @@ async def review_evidence(
|
|||||||
approval_status='first_approved'. A second (different) reviewer then
|
approval_status='first_approved'. A second (different) reviewer then
|
||||||
sets second_reviewer and approval_status='approved'.
|
sets second_reviewer and approval_status='approved'.
|
||||||
"""
|
"""
|
||||||
dsms_cid = None
|
|
||||||
evidence = db.query(EvidenceDB).filter(EvidenceDB.id == evidence_id).first()
|
evidence = db.query(EvidenceDB).filter(EvidenceDB.id == evidence_id).first()
|
||||||
if not evidence:
|
if not evidence:
|
||||||
raise HTTPException(status_code=404, detail=f"Evidence {evidence_id} not found")
|
raise HTTPException(status_code=404, detail=f"Evidence {evidence_id} not found")
|
||||||
@@ -802,7 +851,6 @@ async def reject_evidence(
|
|||||||
db: Session = Depends(get_db),
|
db: Session = Depends(get_db),
|
||||||
):
|
):
|
||||||
"""Reject evidence (sets approval_status='rejected')."""
|
"""Reject evidence (sets approval_status='rejected')."""
|
||||||
dsms_cid = None
|
|
||||||
evidence = db.query(EvidenceDB).filter(EvidenceDB.id == evidence_id).first()
|
evidence = db.query(EvidenceDB).filter(EvidenceDB.id == evidence_id).first()
|
||||||
if not evidence:
|
if not evidence:
|
||||||
raise HTTPException(status_code=404, detail=f"Evidence {evidence_id} not found")
|
raise HTTPException(status_code=404, detail=f"Evidence {evidence_id} not found")
|
||||||
|
|||||||
@@ -24,7 +24,6 @@ from fastapi.responses import FileResponse
|
|||||||
from sqlalchemy.orm import Session
|
from sqlalchemy.orm import Session
|
||||||
|
|
||||||
from classroom_engine.database import get_db
|
from classroom_engine.database import get_db
|
||||||
from ..db.models import EvidenceDB
|
|
||||||
|
|
||||||
from .audit_trail_utils import log_audit_trail
|
from .audit_trail_utils import log_audit_trail
|
||||||
from ..db import (
|
from ..db import (
|
||||||
@@ -311,7 +310,6 @@ async def list_controls_paginated(
|
|||||||
)
|
)
|
||||||
async def get_control(
|
async def get_control(
|
||||||
control_id: str,
|
control_id: str,
|
||||||
db: Session = Depends(get_db),
|
|
||||||
svc: ControlExportService = Depends(get_ctrl_export_service),
|
svc: ControlExportService = Depends(get_ctrl_export_service),
|
||||||
) -> ControlResponse:
|
) -> ControlResponse:
|
||||||
"""Get a specific control by control_id."""
|
"""Get a specific control by control_id."""
|
||||||
@@ -356,7 +354,6 @@ async def get_control(
|
|||||||
async def update_control(
|
async def update_control(
|
||||||
control_id: str,
|
control_id: str,
|
||||||
update: ControlUpdate,
|
update: ControlUpdate,
|
||||||
db: Session = Depends(get_db),
|
|
||||||
svc: ControlExportService = Depends(get_ctrl_export_service),
|
svc: ControlExportService = Depends(get_ctrl_export_service),
|
||||||
) -> ControlResponse:
|
) -> ControlResponse:
|
||||||
"""Update a control."""
|
"""Update a control."""
|
||||||
@@ -446,7 +443,6 @@ async def update_control(
|
|||||||
async def review_control(
|
async def review_control(
|
||||||
control_id: str,
|
control_id: str,
|
||||||
review: ControlReviewRequest,
|
review: ControlReviewRequest,
|
||||||
db: Session = Depends(get_db),
|
|
||||||
svc: ControlExportService = Depends(get_ctrl_export_service),
|
svc: ControlExportService = Depends(get_ctrl_export_service),
|
||||||
) -> ControlResponse:
|
) -> ControlResponse:
|
||||||
"""Mark a control as reviewed with new status."""
|
"""Mark a control as reviewed with new status."""
|
||||||
|
|||||||
@@ -21,7 +21,7 @@ Phase 1 Step 4 refactor: handlers delegate to VVTService.
|
|||||||
import logging
|
import logging
|
||||||
from typing import Any, List, Optional
|
from typing import Any, List, Optional
|
||||||
|
|
||||||
from fastapi import APIRouter, Depends, HTTPException, Query, Request
|
from fastapi import APIRouter, Depends, Query, Request
|
||||||
from fastapi.responses import StreamingResponse
|
from fastapi.responses import StreamingResponse
|
||||||
from sqlalchemy.orm import Session
|
from sqlalchemy.orm import Session
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user