Files
breakpilot-compliance/ai-compliance-sdk/internal/api/handlers/iace_handler_benchmark.go
T
Benjamin Admin 2677bca9ca
CI / detect-changes (push) Successful in 7s
CI / branch-name (push) Has been skipped
CI / guardrail-integrity (push) Has been skipped
CI / secret-scan (push) Has been skipped
CI / dep-audit (push) Has been skipped
CI / sbom-scan (push) Has been skipped
CI / build-sha-integrity (push) Failing after 4s
CI / validate-canonical-controls (push) Successful in 11s
CI / loc-budget (push) Failing after 14s
CI / go-lint (push) Has been skipped
CI / python-lint (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / nodejs-build (push) Successful in 2m23s
CI / test-go (push) Failing after 37s
CI / iace-gt-coverage (push) Successful in 24s
CI / test-python-backend (push) Has been skipped
CI / test-python-document-crawler (push) Has been skipped
CI / test-python-dsms-gateway (push) Has been skipped
feat(iace): benchmark risk comparison (traffic lights) + misuse pattern + 1:n matcher
#1 Risk-number comparison in the benchmark: ComputeRiskComparison derives the
tool's S/F/W/P + Fine-Kinney per matched hazard and compares to the GT values;
exposed on the benchmark response and rendered in a new RiskComparison table
with GREEN/YELLOW/RED traffic lights on the risk number R (like the Excel),
plus per-axis within-1 agreement cards.

#2 Generic misuse pattern HP2103 "Personenbefoerderung auf Hebezeug" — gated to
lift-family machine types, fires for ANY lifting device (not machine-specific).

#3 Benchmark matcher is now 1:n — one broad engine hazard may cover several
fine-grained GT sub-scenarios (foot/hand/leg crush), so coverage reflects real
risk coverage rather than 1:1 wording matches.

Validated on BOTH ground truths (robot cell + lift): leakage 0, ghosts 0,
coverage held.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
2026-06-09 17:24:52 +02:00

164 lines
4.7 KiB
Go

package handlers
import (
"encoding/json"
"net/http"
"time"
"github.com/breakpilot/ai-compliance-sdk/internal/iace"
"github.com/gin-gonic/gin"
"github.com/google/uuid"
)
// ImportGroundTruth handles POST /projects/:id/benchmark/import-gt
// Stores Ground Truth data in project metadata.ground_truth.
func (h *IACEHandler) ImportGroundTruth(c *gin.Context) {
projectID, err := uuid.Parse(c.Param("id"))
if err != nil {
c.JSON(http.StatusBadRequest, gin.H{"error": "invalid project ID"})
return
}
ctx := c.Request.Context()
project, err := h.store.GetProject(ctx, projectID)
if err != nil || project == nil {
c.JSON(http.StatusNotFound, gin.H{"error": "project not found"})
return
}
var gt iace.GroundTruth
if err := c.ShouldBindJSON(&gt); err != nil {
c.JSON(http.StatusBadRequest, gin.H{"error": "invalid ground truth JSON: " + err.Error()})
return
}
if gt.ImportedAt == "" {
gt.ImportedAt = time.Now().Format("2006-01-02")
}
// Merge into existing metadata
meta := make(map[string]json.RawMessage)
if project.Metadata != nil {
_ = json.Unmarshal(project.Metadata, &meta)
}
gtJSON, _ := json.Marshal(gt)
meta["ground_truth"] = gtJSON
mergedMeta, _ := json.Marshal(meta)
err = h.store.UpdateProjectMetadata(ctx, projectID, mergedMeta)
if err != nil {
c.JSON(http.StatusInternalServerError, gin.H{"error": "failed to store ground truth"})
return
}
c.JSON(http.StatusOK, gin.H{
"message": "ground truth imported",
"entry_count": len(gt.Entries),
"source_file": gt.SourceFile,
})
}
// RunBenchmark handles GET /projects/:id/benchmark?gt_project_id=:gtId
// Compares engine hazards from project :id against GT from project :gtId.
// If gt_project_id is omitted, looks for GT in the same project's metadata.
func (h *IACEHandler) RunBenchmark(c *gin.Context) {
projectID, err := uuid.Parse(c.Param("id"))
if err != nil {
c.JSON(http.StatusBadRequest, gin.H{"error": "invalid project ID"})
return
}
ctx := c.Request.Context()
// Determine GT source
gtProjectID := projectID
if gtParam := c.Query("gt_project_id"); gtParam != "" {
parsed, err := uuid.Parse(gtParam)
if err != nil {
c.JSON(http.StatusBadRequest, gin.H{"error": "invalid gt_project_id"})
return
}
gtProjectID = parsed
}
// Load GT
gtProject, err := h.store.GetProject(ctx, gtProjectID)
if err != nil || gtProject == nil {
c.JSON(http.StatusNotFound, gin.H{"error": "GT project not found"})
return
}
gt, err := iace.ParseGroundTruth(gtProject.Metadata)
if err != nil || gt == nil {
c.JSON(http.StatusNotFound, gin.H{"error": "no ground truth data in project metadata"})
return
}
// Load engine hazards + mitigations
hazards, err := h.store.ListHazards(ctx, projectID)
if err != nil {
c.JSON(http.StatusInternalServerError, gin.H{"error": "failed to load hazards"})
return
}
mitigations, err := h.store.ListMitigationsByProject(ctx, projectID)
if err != nil {
mitigations = nil
}
result := iace.CompareBenchmark(gt, hazards, mitigations)
result.RiskComparison, result.RiskAgreement = iace.ComputeRiskComparison(result.MatchedPairs)
c.JSON(http.StatusOK, result)
}
// GetBenchmarkSummary handles GET /projects/:id/benchmark/summary
// Returns lightweight coverage metrics without full match details.
func (h *IACEHandler) GetBenchmarkSummary(c *gin.Context) {
projectID, err := uuid.Parse(c.Param("id"))
if err != nil {
c.JSON(http.StatusBadRequest, gin.H{"error": "invalid project ID"})
return
}
ctx := c.Request.Context()
gtProjectID := projectID
if gtParam := c.Query("gt_project_id"); gtParam != "" {
parsed, err := uuid.Parse(gtParam)
if err != nil {
c.JSON(http.StatusBadRequest, gin.H{"error": "invalid gt_project_id"})
return
}
gtProjectID = parsed
}
gtProject, err := h.store.GetProject(ctx, gtProjectID)
if err != nil || gtProject == nil {
c.JSON(http.StatusNotFound, gin.H{"error": "GT project not found"})
return
}
gt, err := iace.ParseGroundTruth(gtProject.Metadata)
if err != nil || gt == nil {
c.JSON(http.StatusNotFound, gin.H{"error": "no ground truth data"})
return
}
hazards, err := h.store.ListHazards(ctx, projectID)
if err != nil {
c.JSON(http.StatusInternalServerError, gin.H{"error": "failed to load hazards"})
return
}
mitigations, _ := h.store.ListMitigationsByProject(ctx, projectID)
result := iace.CompareBenchmark(gt, hazards, mitigations)
c.JSON(http.StatusOK, gin.H{
"coverage_score": result.CoverageScore,
"measure_coverage": result.MeasureCoverage,
"total_gt": result.TotalGT,
"total_engine": result.TotalEngine,
"matched_count": len(result.MatchedPairs),
"missing_count": len(result.MissingFromEngine),
"extra_count": len(result.ExtraInEngine),
"category_breakdown": result.CategoryBreakdown,
})
}