feat(iace): benchmark system + erklaerteil + dedup-fix
Build + Deploy / build-admin-compliance (push) Successful in 2m7s
Build + Deploy / build-backend-compliance (push) Successful in 3m34s
Build + Deploy / build-ai-sdk (push) Successful in 1m6s
Build + Deploy / build-developer-portal (push) Successful in 1m7s
Build + Deploy / build-tts (push) Successful in 1m58s
Build + Deploy / build-document-crawler (push) Successful in 57s
Build + Deploy / build-dsms-gateway (push) Successful in 34s
Build + Deploy / build-dsms-node (push) Successful in 29s
CI / branch-name (push) Has been skipped
CI / guardrail-integrity (push) Has been skipped
CI / loc-budget (push) Failing after 17s
CI / secret-scan (push) Has been skipped
CI / go-lint (push) Has been skipped
CI / python-lint (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / nodejs-build (push) Successful in 2m28s
CI / dep-audit (push) Has been skipped
CI / sbom-scan (push) Has been skipped
CI / test-go (push) Successful in 42s
CI / test-python-backend (push) Successful in 37s
CI / test-python-document-crawler (push) Successful in 27s
CI / test-python-dsms-gateway (push) Successful in 22s
CI / validate-canonical-controls (push) Successful in 15s
Build + Deploy / trigger-orca (push) Successful in 3m10s

- Erklaerteil-Template fuer Risikobeurteilungen (risk_assessment_template.go)
  in PDF-Export, Markdown-Export und Frontend ReportPrintView eingebaut
- Ground Truth Benchmark-System: Datenmodell, Fuzzy-Matching-Engine,
  3 API Endpoints (import-gt, benchmark, benchmark/summary)
- Frontend Benchmark-Tab mit Score-Cards, Kategorie-Breakdown,
  Hazard-Vergleichstabelle (Zugeordnet/Fehlend/Extra), Business Impact
- Erster Benchmark: 13.3% Coverage (Baseline) gegen 60 GT-Eintraege
- Dedup-Fix: seenCat[cat] -> seenCatZone[cat+zone] erlaubt mehrere
  Gefaehrdungen pro Kategorie an verschiedenen Gefahrenstellen
- Komponenten-spezifische Hazard-Namen und Zone-basierte Zuordnung

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
Benjamin Admin
2026-05-13 01:02:33 +02:00
parent 185d680669
commit 8bb90d73e5
18 changed files with 4029 additions and 5 deletions
@@ -0,0 +1,115 @@
'use client'
import { useState, useCallback } from 'react'
export interface GTRisk { f: number; w: number; p: number; s: number; r: number }
export interface GTPLr { s: string; f: string; p: string; ew?: string; plr: string }
export interface GroundTruthEntry {
nr: string
hazard_group: string
hazard_group_applicable: boolean
hazard_subgroup: string
hazard_type: string
hazard_cause: string
lifecycle_phases: string[]
component_zone: string
risk_in: GTRisk
plr?: GTPLr | null
measures: string[]
measure_type: string
risk_out: GTRisk
norm_references: string[]
sufficient: boolean
comment?: string
reduction_steps?: {
risk_in: GTRisk; measures: string[]; measure_type: string
risk_out: GTRisk; norm_references: string[]; sufficient: boolean
}[]
}
export interface HazardSummary {
id: string; name: string; category: string
component?: string; zone?: string; risk_level?: string
}
export interface HazardMatchPair {
gt_entry: GroundTruthEntry
engine_hazard: HazardSummary
match_score: number
match_reason: string
}
export interface CategoryScore {
category: string; gt_count: number; match_count: number; coverage: number
}
export interface BenchmarkResult {
coverage_score: number
measure_coverage: number
total_gt: number
total_engine: number
matched_pairs: HazardMatchPair[]
missing_from_engine: GroundTruthEntry[]
extra_in_engine: HazardSummary[]
category_breakdown: CategoryScore[]
risk_rank_pairs: { gt_rank: number; engine_rank: number; hazard_name: string; gt_risk_score: number }[]
}
interface UseBenchmarkReturn {
result: BenchmarkResult | null
gtLoaded: boolean
gtEntryCount: number
loading: boolean
error: string | null
importGT: (gt: { entries: GroundTruthEntry[]; source_file?: string; description?: string }) => Promise<void>
runBenchmark: (gtProjectId?: string) => Promise<void>
}
export function useBenchmark(projectId: string): UseBenchmarkReturn {
const [result, setResult] = useState<BenchmarkResult | null>(null)
const [gtLoaded, setGtLoaded] = useState(false)
const [gtEntryCount, setGtEntryCount] = useState(0)
const [loading, setLoading] = useState(false)
const [error, setError] = useState<string | null>(null)
const importGT = useCallback(async (gt: { entries: GroundTruthEntry[]; source_file?: string; description?: string }) => {
setLoading(true)
setError(null)
try {
const res = await fetch(`/api/sdk/v1/iace/projects/${projectId}/benchmark/import-gt`, {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify(gt),
})
if (!res.ok) throw new Error(await res.text())
const data = await res.json()
setGtLoaded(true)
setGtEntryCount(data.entry_count || gt.entries.length)
} catch (err) {
setError(err instanceof Error ? err.message : 'Import failed')
} finally {
setLoading(false)
}
}, [projectId])
const runBenchmark = useCallback(async (gtProjectId?: string) => {
setLoading(true)
setError(null)
try {
const params = gtProjectId ? `?gt_project_id=${gtProjectId}` : ''
const res = await fetch(`/api/sdk/v1/iace/projects/${projectId}/benchmark${params}`)
if (!res.ok) throw new Error(await res.text())
const data: BenchmarkResult = await res.json()
setResult(data)
setGtLoaded(true)
setGtEntryCount(data.total_gt)
} catch (err) {
setError(err instanceof Error ? err.message : 'Benchmark failed')
} finally {
setLoading(false)
}
}, [projectId])
return { result, gtLoaded, gtEntryCount, loading, error, importGT, runBenchmark }
}