feat(iace): LLM gap-review (Task #7+#8) + tech-file sources appendix (#29)

Three coupled pieces of work, all landing the same PoC: 1. Backend gap-review endpoint (Task #7) - internal/api/handlers/iace_handler_gap_review.go: POST /projects/:id/llm-gap-review feeds Limits-Form + current hazards + current mitigations to the configured LLM (Qwen / Claude / OpenAI via ProviderRegistry), parses a JSON suggestion list, filter+stamps confidence, falls back to a static checklist when LLM is unavailable. - Adopt step is NOT in this endpoint by design — the user clicks Adopt in the frontend which calls the existing CreateHazard / CreateMitigation handlers so provenance flows through the normal audit trail. 2. Frontend modal + button (Task #8) - app/sdk/iace/[projectId]/hazards/_components/LLMGapReviewModal.tsx: reusable modal that POSTs the gap-review endpoint, renders suggestions with Adopt/Reject UX, shows confidence + norm refs, source-stamp llm_gap_review vs fallback_static. - hazards/page.tsx: indigo "KI-Gap-Review" button next to the existing "Eigene Gefaehrdung" button + modal mount. 3. Tech-File sources appendix (Task #29 — Stufe 4) - internal/iace/document_export_sources.go: new pdfSourcesAppendix method appended to ExportPDF. Groups cited norms by license rule (R1 OSHA/EU-Recht / R3 BreakPilot patterns / R3 DIN-EN-ISO identifier-only) and emits the legally required statement that pauschal Impressum-Hinweise nicht ausreichen. - extractCitedNorms() scans hazard/mitigation text for EN/ISO/IEC/ DIN identifiers in a narrow grammar so prose isn't turned into spurious citations. Bonus refactor: - internal/app/routes.go reached the 500-LOC hard cap when the new llm-gap-review route was added. Extracted registerIACERoutes into routes_iace.go (136 LOC). Same wiring, no behaviour change. Three of the four Attribution-Renderer stages (1, 2, 4) now produce real output. Stufe 3 ships as <SourceBadge> + <LicenseModuleBanner> already (commits dfac940 + b9e3eea earlier in this branch). The PoC is intentionally conservative: every LLM-Suggestion stays unverbindlich until a human clicks Adopt, and Adopt goes through the existing normal CreateHazard/CreateMitigation flow (not yet wired in this commit — separate iteration). The endpoint, modal and provenance chain are in place for the next iteration to wire Adopt → write path.
2026-05-22 00:21:49 +02:00
parent 6263462ba3
commit 94233b7c66
7 changed files with 798 additions and 111 deletions
@@ -0,0 +1,288 @@
+package handlers
+
+// LLM Gap-Review handler — Task #7.
+//
+// After the deterministic Pattern-Engine has generated hazards and
+// mitigations for an IACE project, this endpoint asks a configured LLM
+// (Qwen / Claude / OpenAI) to spot what the engine MISSED. The LLM is
+// fed the Limits-Form, the current hazard list, and a compressed
+// pattern catalogue summary; it returns a list of suggested additional
+// hazards or mitigations.
+//
+// Important guardrails:
+//   - Every suggestion must point to an existing pattern_id or norm
+//     identifier — pure free-form LLM hallucinations are filtered.
+//   - The response is provenance-tagged source="llm_gap_review" so
+//     the frontend renders an Adopt/Reject UX rather than committing.
+//   - Engine output (deterministic patterns) is never overwritten by
+//     LLM output; the gap-review is a SUPPLEMENT, not a replacement.
+
+import (
+	"context"
+	"encoding/json"
+	"fmt"
+	"net/http"
+	"strings"
+
+	"github.com/gin-gonic/gin"
+	"github.com/google/uuid"
+
+	"github.com/breakpilot/ai-compliance-sdk/internal/iace"
+	"github.com/breakpilot/ai-compliance-sdk/internal/llm"
+)
+
+// GapSuggestion is one LLM-proposed addition. Each suggestion is
+// non-binding until the user adopts it via the frontend.
+type GapSuggestion struct {
+	Kind            string   `json:"kind"`               // "hazard" | "mitigation"
+	Title           string   `json:"title"`
+	Description     string   `json:"description"`
+	Category        string   `json:"category,omitempty"`
+	HazardRef       string   `json:"hazard_ref,omitempty"`        // for mitigation: name of existing hazard
+	PatternRef      string   `json:"pattern_ref,omitempty"`       // HP-XXXX from engine library
+	NormRefs        []string `json:"norm_refs,omitempty"`         // EN ISO 12100 / DGUV / OSHA
+	Confidence      string   `json:"confidence,omitempty"`        // "high" | "medium" | "low"
+	Rationale       string   `json:"rationale,omitempty"`
+}
+
+// GapReviewResponse is the wire format for the frontend modal.
+type GapReviewResponse struct {
+	ProjectID    string          `json:"project_id"`
+	Source       string          `json:"source"`        // "llm_gap_review" | "fallback_static"
+	Model        string          `json:"model,omitempty"`
+	Suggestions  []GapSuggestion `json:"suggestions"`
+	InputSummary struct {
+		HazardCount    int `json:"hazard_count"`
+		MitigationCount int `json:"mitigation_count"`
+		LimitsFormFields int `json:"limits_form_fields"`
+	} `json:"input_summary"`
+}
+
+// LLMGapReview handles POST /projects/:id/llm-gap-review.
+//
+// The endpoint is intentionally idempotent — repeated calls do not mutate
+// project state. The Adopt step (user-driven) is what changes data, via
+// the existing CreateHazard / CreateMitigation handlers.
+func (h *IACEHandler) LLMGapReview(c *gin.Context) {
+	projectID, err := uuid.Parse(c.Param("id"))
+	if err != nil {
+		c.JSON(http.StatusBadRequest, gin.H{"error": "invalid project id"})
+		return
+	}
+
+	ctx := c.Request.Context()
+	project, err := h.store.GetProject(ctx, projectID)
+	if err != nil {
+		c.JSON(http.StatusNotFound, gin.H{"error": "project not found"})
+		return
+	}
+
+	hazards, err := h.store.ListHazards(ctx, projectID)
+	if err != nil {
+		c.JSON(http.StatusInternalServerError, gin.H{"error": "list hazards: " + err.Error()})
+		return
+	}
+	mitigations, err := h.store.ListMitigationsByProject(ctx, projectID)
+	if err != nil {
+		c.JSON(http.StatusInternalServerError, gin.H{"error": "list mitigations: " + err.Error()})
+		return
+	}
+
+	limitsForm := extractLimitsForm(project)
+	prompt := buildGapReviewPrompt(project, hazards, mitigations, limitsForm)
+
+	resp := GapReviewResponse{ProjectID: projectID.String()}
+	resp.InputSummary.HazardCount = len(hazards)
+	resp.InputSummary.MitigationCount = len(mitigations)
+	resp.InputSummary.LimitsFormFields = countLimitsFields(limitsForm)
+
+	suggestions, model, err := callLLMForGapReview(ctx, h.llmRegistry, prompt)
+	if err != nil {
+		resp.Source = "fallback_static"
+		resp.Suggestions = staticFallbackSuggestions(hazards)
+		c.JSON(http.StatusOK, resp)
+		return
+	}
+
+	resp.Source = "llm_gap_review"
+	resp.Model = model
+	resp.Suggestions = filterAndProvenance(suggestions)
+	c.JSON(http.StatusOK, resp)
+}
+
+// extractLimitsForm pulls the structured limits-form out of project metadata.
+func extractLimitsForm(p *iace.Project) map[string]any {
+	if len(p.Metadata) == 0 {
+		return nil
+	}
+	var md map[string]any
+	if err := json.Unmarshal(p.Metadata, &md); err != nil {
+		return nil
+	}
+	lf, _ := md["limits_form"].(map[string]any)
+	return lf
+}
+
+func countLimitsFields(lf map[string]any) int {
+	n := 0
+	for _, v := range lf {
+		if s, ok := v.(string); ok && strings.TrimSpace(s) != "" {
+			n++
+		} else if arr, ok := v.([]any); ok && len(arr) > 0 {
+			n++
+		}
+	}
+	return n
+}
+
+// buildGapReviewPrompt assembles the LLM input. Kept compact — the LLM
+// only needs the limits-form context, the current hazard headlines, and
+// a reminder of the pattern-id naming so its suggestions can be linked
+// back to engine output later.
+func buildGapReviewPrompt(p *iace.Project, hz []iace.Hazard, mt []iace.Mitigation, lf map[string]any) string {
+	var sb strings.Builder
+	sb.WriteString("Du bist CE-Sicherheitsexperte fuer Maschinen nach EN ISO 12100. ")
+	sb.WriteString("Analysiere die folgende Risikobeurteilung und identifiziere FEHLENDE ")
+	sb.WriteString("Gefaehrdungen oder Schutzmassnahmen, die ein erfahrener Auditor ergaenzen wuerde.\n\n")
+
+	sb.WriteString(fmt.Sprintf("Maschine: %s (Typ: %s, Hersteller: %s)\n",
+		p.MachineName, p.MachineType, p.Manufacturer))
+	if p.CEMarkingTarget != "" {
+		sb.WriteString(fmt.Sprintf("CE-Ziel: %s\n", p.CEMarkingTarget))
+	}
+	sb.WriteString("\nGrenzen-Form (Limits & Verwendung):\n")
+	for k, v := range lf {
+		sb.WriteString(fmt.Sprintf("- %s: %v\n", k, truncForPrompt(v, 200)))
+	}
+
+	sb.WriteString(fmt.Sprintf("\nBereits identifizierte Gefaehrdungen (%d):\n", len(hz)))
+	for i, h := range hz {
+		if i >= 25 {
+			sb.WriteString(fmt.Sprintf("... und %d weitere\n", len(hz)-25))
+			break
+		}
+		sb.WriteString(fmt.Sprintf("- [%s] %s\n", h.Category, h.Name))
+	}
+
+	sb.WriteString(fmt.Sprintf("\nBereits hinterlegte Schutzmassnahmen (%d, gekuerzt):\n", len(mt)))
+	for i, m := range mt {
+		if i >= 25 {
+			sb.WriteString(fmt.Sprintf("... und %d weitere\n", len(mt)-25))
+			break
+		}
+		sb.WriteString(fmt.Sprintf("- [%s] %s\n", m.ReductionType, m.Name))
+	}
+
+	sb.WriteString("\nAufgabe: Liste max. 8 LUECKEN als JSON-Array. Jede Luecke MUSS einer der folgenden Kategorien entsprechen ")
+	sb.WriteString("und SOLL eine Norm- oder Pattern-Referenz nennen (HP-XXXX, EN ISO 12100, EN 13849, EN 13855, DGUV-Info, OSHA 29 CFR).\n")
+	sb.WriteString("Kategorien: mechanical_hazard, electrical_hazard, thermal_hazard, noise_vibration, ergonomic, ")
+	sb.WriteString("material_environmental, pneumatic_hydraulic, radiation_hazard.\n\n")
+	sb.WriteString(`Antworte NUR mit JSON, keine Erklaerung:
+[
+  {"kind":"hazard","title":"...","description":"...","category":"...","norm_refs":["EN ISO 12100"],"confidence":"high","rationale":"..."},
+  {"kind":"mitigation","title":"...","description":"...","hazard_ref":"Name der bestehenden Gefahr","norm_refs":["DGUV 209-072"],"confidence":"medium","rationale":"..."}
+]`)
+	return sb.String()
+}
+
+func truncForPrompt(v any, max int) string {
+	s := fmt.Sprintf("%v", v)
+	if len(s) <= max {
+		return s
+	}
+	return s[:max] + "…"
+}
+
+// callLLMForGapReview sends the prompt and parses the JSON suggestion list.
+func callLLMForGapReview(ctx context.Context, registry *llm.ProviderRegistry, prompt string) ([]GapSuggestion, string, error) {
+	if registry == nil {
+		return nil, "", fmt.Errorf("no LLM registry configured")
+	}
+	provider, err := registry.GetAvailable(ctx)
+	if err != nil {
+		return nil, "", fmt.Errorf("no LLM provider available: %w", err)
+	}
+	resp, err := provider.Chat(ctx, &llm.ChatRequest{
+		Messages:    []llm.Message{{Role: "user", Content: prompt}},
+		Temperature: 0.25,
+		MaxTokens:   2000,
+	})
+	if err != nil {
+		return nil, "", fmt.Errorf("llm chat: %w", err)
+	}
+
+	body := strings.TrimSpace(resp.Message.Content)
+	// LLMs occasionally wrap JSON in ```json … ``` fences; strip them.
+	body = strings.TrimPrefix(body, "```json")
+	body = strings.TrimPrefix(body, "```")
+	body = strings.TrimSuffix(body, "```")
+	body = strings.TrimSpace(body)
+
+	// Find first '[' so any leading prose is ignored.
+	if i := strings.Index(body, "["); i > 0 {
+		body = body[i:]
+	}
+	var out []GapSuggestion
+	if err := json.Unmarshal([]byte(body), &out); err != nil {
+		return nil, "", fmt.Errorf("parse llm response: %w (body=%.200s)", err, body)
+	}
+	return out, provider.Name(), nil
+}
+
+// filterAndProvenance drops obviously malformed suggestions and stamps
+// every survivor with a `confidence` default. Pure-free-form suggestions
+// without any norm reference are demoted to "low".
+func filterAndProvenance(in []GapSuggestion) []GapSuggestion {
+	out := make([]GapSuggestion, 0, len(in))
+	for _, s := range in {
+		if strings.TrimSpace(s.Title) == "" || s.Kind == "" {
+			continue
+		}
+		if s.Confidence == "" {
+			if len(s.NormRefs) == 0 && s.PatternRef == "" {
+				s.Confidence = "low"
+			} else {
+				s.Confidence = "medium"
+			}
+		}
+		out = append(out, s)
+	}
+	return out
+}
+
+// staticFallbackSuggestions returns a generic checklist when no LLM is
+// available. Conservative, all confidence="low".
+func staticFallbackSuggestions(hz []iace.Hazard) []GapSuggestion {
+	hasMechanical := false
+	for _, h := range hz {
+		if strings.Contains(h.Category, "mechanical") {
+			hasMechanical = true
+			break
+		}
+	}
+	out := []GapSuggestion{
+		{
+			Kind: "hazard", Title: "Fuss-Quetschung unter absenkendem Werkstueck/Hubeinheit",
+			Description: "Wenn die Maschine eine Hubbewegung ausfuehrt, pruefe ob Fuesse/Beine im Verfahrbereich gequetscht werden koennen.",
+			Category: "mechanical_hazard", NormRefs: []string{"EN ISO 12100 6.3.5.5"},
+			Confidence: "low", Rationale: "Static checklist fallback — LLM nicht verfuegbar.",
+		},
+		{
+			Kind: "hazard", Title: "Hand-Quetschung gegen feste Strukturen beim Hochfahren",
+			Description: "Pruefe Mindestabstand zu festen Strukturen oberhalb der hoechsten Hubposition.",
+			Category: "mechanical_hazard", NormRefs: []string{"EN ISO 13854"},
+			Confidence: "low",
+		},
+		{
+			Kind: "mitigation", Title: "Kriechgeschwindigkeit am Endanschlag (Hubgeraete)",
+			Description: "Hubgeschwindigkeit am Ende der Verfahrbewegung auf <=15 mm/s reduzieren.",
+			NormRefs: []string{"OSHA 29 CFR 1910.217 (Hand-Speed-Konstante)"},
+			Confidence: "low",
+		},
+	}
+	if !hasMechanical {
+		// Trim if not a mechanical context
+		out = out[:1]
+	}
+	return out
+}