Merge pull request 'feat(onboarding): surface curated expert text + human labels' (#53) from feat/advisor-human-text into main

This commit is contained in:
pilotadmin
2026-06-28 18:47:07 +02:00
7 changed files with 89 additions and 13 deletions
@@ -29,7 +29,7 @@ interface AdvisorResponse {
silent_intake_summary: string; headline: string; auto_detected: string[]; indications: string[] silent_intake_summary: string; headline: string; auto_detected: string[]; indications: string[]
inferred_assumptions: Inferred[]; rejected_assumptions: Rejected[]; top_5_questions: Question[] inferred_assumptions: Inferred[]; rejected_assumptions: Rejected[]; top_5_questions: Question[]
capability_delta: string[]; top_measures: Measure[]; evidence_requests: string[] capability_delta: string[]; top_measures: Measure[]; evidence_requests: string[]
unsupported_domains: string[]; completeness_summary: string unsupported_domains: string[]; completeness_summary: string; capability_labels: Record<string, string>
} }
const PROXY = '/api/sdk/v1/compliance/onboarding' const PROXY = '/api/sdk/v1/compliance/onboarding'
@@ -74,6 +74,8 @@ export default function OnboardingAdvisorPage() {
const toggle = (list: string[], set: (v: string[]) => void, v: string) => const toggle = (list: string[], set: (v: string[]) => void, v: string) =>
set(list.includes(v) ? list.filter(x => x !== v) : [...list, v]) set(list.includes(v) ? list.filter(x => x !== v) : [...list, v])
const lbl = (id: string) => result?.capability_labels?.[id] || id.replace(/_/g, ' ')
const run = async () => { const run = async () => {
setLoading(true); setError(''); setResult(null) setLoading(true); setError(''); setResult(null)
try { try {
@@ -153,15 +155,15 @@ export default function OnboardingAdvisorPage() {
<div className="text-blue-100 text-sm mt-1">{result.silent_intake_summary}</div> <div className="text-blue-100 text-sm mt-1">{result.silent_intake_summary}</div>
</div> </div>
<div className="grid md:grid-cols-2 gap-4"> <div className="grid md:grid-cols-2 gap-4">
<Section title="Automatisch erkannt" hint="konkrete Artefakte nicht mehr gefragt"><Chips items={result.auto_detected} tone="bg-emerald-100 text-emerald-800" /></Section> <Section title="Automatisch erkannt" hint="konkrete Artefakte nicht mehr gefragt"><Chips items={result.auto_detected.map(lbl)} tone="bg-emerald-100 text-emerald-800" /></Section>
<Section title="Indikationen" hint="erhöht Annahmestärke trotzdem gefragt"><Chips items={result.indications} tone="bg-amber-100 text-amber-800" /></Section> <Section title="Indikationen" hint="erhöht Annahmestärke trotzdem gefragt"><Chips items={result.indications.map(lbl)} tone="bg-amber-100 text-amber-800" /></Section>
</div> </div>
<Section title="Nächste beste Fragen" hint="max 5, jede erklärt sich selbst"> <Section title="Nächste beste Fragen" hint="max 5, jede erklärt sich selbst">
{result.top_5_questions.length ? ( {result.top_5_questions.length ? (
<ol className="space-y-3"> <ol className="space-y-3">
{result.top_5_questions.map((q, i) => ( {result.top_5_questions.map((q, i) => (
<li key={q.capability_id} className="border-l-2 border-blue-300 pl-3"> <li key={q.capability_id} className="border-l-2 border-blue-300 pl-3">
<div className="font-medium text-gray-900">{i + 1}. {q.capability_id} <span className="text-xs text-gray-500">({q.question_intent})</span></div> <div className="font-medium text-gray-900">{i + 1}. {lbl(q.capability_id)}</div>
<div className="text-sm text-gray-600">{q.why}</div> <div className="text-sm text-gray-600">{q.why}</div>
</li> </li>
))} ))}
@@ -171,7 +173,7 @@ export default function OnboardingAdvisorPage() {
<div className="grid md:grid-cols-2 gap-4"> <div className="grid md:grid-cols-2 gap-4">
<Section title="Wahrscheinlich abgedeckt (Welt-1)" hint="Zertifikat legt nahe Verifikation erforderlich"> <Section title="Wahrscheinlich abgedeckt (Welt-1)" hint="Zertifikat legt nahe Verifikation erforderlich">
{result.inferred_assumptions.length ? result.inferred_assumptions.map(a => ( {result.inferred_assumptions.length ? result.inferred_assumptions.map(a => (
<div key={a.certification} className="mb-2"><span className="font-medium">{a.certification}</span>: {a.capabilities.join(', ')}</div> <div key={a.certification} className="mb-2"><span className="font-medium">{a.certification}</span>: {a.capabilities.map(lbl).join(', ')}</div>
)) : <span className="text-gray-400 text-sm"></span>} )) : <span className="text-gray-400 text-sm"></span>}
</Section> </Section>
<Section title="Nicht relevant" hint="relevance(evidence, target) = 0"> <Section title="Nicht relevant" hint="relevance(evidence, target) = 0">
@@ -181,7 +183,7 @@ export default function OnboardingAdvisorPage() {
</Section> </Section>
</div> </div>
<div className="grid md:grid-cols-2 gap-4"> <div className="grid md:grid-cols-2 gap-4">
<Section title="Offene Lücken (Delta)"><Chips items={result.capability_delta} tone="bg-gray-100 text-gray-700" /></Section> <Section title="Offene Lücken (Delta)"><Chips items={result.capability_delta.map(lbl)} tone="bg-gray-100 text-gray-700" /></Section>
<Section title="Geforderte Nachweise"><Chips items={result.evidence_requests} tone="bg-gray-100 text-gray-700" /></Section> <Section title="Geforderte Nachweise"><Chips items={result.evidence_requests} tone="bg-gray-100 text-gray-700" /></Section>
</div> </div>
<Section title="Vollständigkeit" hint={result.unsupported_domains.length ? `nicht abgedeckt: ${result.unsupported_domains.join(', ')}` : undefined}> <Section title="Vollständigkeit" hint={result.unsupported_domains.length ? `nicht abgedeckt: ${result.unsupported_domains.join(', ')}` : undefined}>
@@ -8,7 +8,7 @@ This adds NO new reasoning logic. It exposes the already-built, tested orchestra
""" """
import logging import logging
from typing import List, Optional from typing import Dict, List, Optional
from fastapi import APIRouter, HTTPException from fastapi import APIRouter, HTTPException
from pydantic import BaseModel, Field from pydantic import BaseModel, Field
@@ -20,7 +20,7 @@ from compliance.onboarding import (
ProducedSignal, ProducedSignal,
RejectedAssumption, RejectedAssumption,
) )
from compliance.services.onboarding_service import run_advisor, supported_targets from compliance.services.onboarding_service import labels_for, run_advisor, supported_targets
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
router = APIRouter(prefix="/onboarding", tags=["onboarding"]) router = APIRouter(prefix="/onboarding", tags=["onboarding"])
@@ -50,6 +50,7 @@ class AdvisorResponse(BaseModel):
evidence_requests: List[str] = Field(default_factory=list) evidence_requests: List[str] = Field(default_factory=list)
unsupported_domains: List[str] = Field(default_factory=list) unsupported_domains: List[str] = Field(default_factory=list)
completeness_summary: str = "" completeness_summary: str = ""
capability_labels: Dict[str, str] = Field(default_factory=dict) # capability_id -> human label (DE)
@router.get("/targets") @router.get("/targets")
@@ -65,10 +66,17 @@ def advisor_start_endpoint(req: OnboardingAdvisorRequest) -> AdvisorResponse:
company=req.company, certifications=req.certifications, target=req.target, company=req.company, certifications=req.certifications, target=req.target,
signals=req.scanner_findings, known_evidence=req.known_evidence, signals=req.scanner_findings, known_evidence=req.known_evidence,
products=req.products, markets=req.markets, industry=req.industry or "") products=req.products, markets=req.markets, industry=req.industry or "")
surfaced = [
*result.auto_detected, *result.indications, *result.capability_delta,
*(q.capability_id for q in result.next_best_questions),
*(c for a in result.inferred_assumptions for c in a.capabilities),
*(m.capability_id for m in result.top_measures),
]
return AdvisorResponse( return AdvisorResponse(
silent_intake_summary=si_summary, headline=result.headline, auto_detected=result.auto_detected, silent_intake_summary=si_summary, headline=result.headline, auto_detected=result.auto_detected,
indications=result.indications, indications=result.indications,
inferred_assumptions=result.inferred_assumptions, rejected_assumptions=result.rejected_assumptions, inferred_assumptions=result.inferred_assumptions, rejected_assumptions=result.rejected_assumptions,
top_5_questions=result.next_best_questions, capability_delta=result.capability_delta, top_5_questions=result.next_best_questions, capability_delta=result.capability_delta,
top_measures=result.top_measures, evidence_requests=result.evidence_requests, top_measures=result.top_measures, evidence_requests=result.evidence_requests,
unsupported_domains=result.unsupported_domains, completeness_summary=result.completeness_summary) unsupported_domains=result.unsupported_domains, completeness_summary=result.completeness_summary,
capability_labels=labels_for(surfaced))
@@ -9,7 +9,7 @@ It adds NO new reasoning logic — it only exposes what exists. No DB, no persis
from __future__ import annotations from __future__ import annotations
import os import os
from typing import Any, Dict, List, Sequence, Tuple from typing import Any, Dict, Iterable, List, Sequence, Tuple
import yaml import yaml
@@ -37,6 +37,13 @@ def _load(*parts: str) -> Any:
_HYP_LIB = [CapabilityHypothesis(**h) for h in _load("certification_hypotheses", "hypotheses.yaml")["hypotheses"]] _HYP_LIB = [CapabilityHypothesis(**h) for h in _load("certification_hypotheses", "hypotheses.yaml")["hypotheses"]]
_VOCAB = [SignalVocabularyEntry(**v) for v in _load("onboarding", "signal_vocabulary.yaml")["signals"]] _VOCAB = [SignalVocabularyEntry(**v) for v in _load("onboarding", "signal_vocabulary.yaml")["signals"]]
_SIGNAL_MAP = [SignalMapping(**m) for m in _load("onboarding", "intake_signal_map.yaml")["mappings"]] _SIGNAL_MAP = [SignalMapping(**m) for m in _load("onboarding", "intake_signal_map.yaml")["mappings"]]
_LABELS: Dict[str, str] = _load("onboarding", "capability_labels.yaml")["labels"]
def labels_for(capability_ids: Iterable[str]) -> Dict[str, str]:
"""Human labels (DE) for the given capability ids — presentation only. Ids without a curated label
are omitted (the frontend falls back to a prettified id). Deduped, deterministic."""
return {c: _LABELS[c] for c in dict.fromkeys(capability_ids) if c in _LABELS}
# target id -> transition pattern that defines its required capabilities (curated registry) # target id -> transition pattern that defines its required capabilities (curated registry)
_TARGET_PATTERNS = { _TARGET_PATTERNS = {
@@ -53,9 +60,10 @@ def supported_targets() -> List[str]:
def _target(target_id: str) -> Tuple[List[TargetRequirement], Dict[str, List[str]]]: def _target(target_id: str) -> Tuple[List[TargetRequirement], Dict[str, List[str]]]:
pat = _load("transition_patterns", _TARGET_PATTERNS[target_id]) pat = _load("transition_patterns", _TARGET_PATTERNS[target_id])
reqs = [TargetRequirement(capability_id=a["capability"]) for a in pat["likely_covered"]] reqs = [TargetRequirement(capability_id=a["capability"], rationale=a.get("reviewable_claim", "")) for a in pat["likely_covered"]]
reqs += [TargetRequirement(capability_id=d["capability"], question_intent=d.get("needed_information", "verify_existence"), reqs += [TargetRequirement(capability_id=d["capability"], question_intent=d.get("needed_information", "verify_existence"),
expected_evidence=d.get("expected_evidence", [])) for d in pat["delta_requirements"]] rationale=d.get("why_asked", ""), expected_evidence=d.get("expected_evidence", []))
for d in pat["delta_requirements"]]
covers = {d["capability"]: d.get("covers_targets", []) for d in pat["delta_requirements"]} covers = {d["capability"]: d.get("covers_targets", []) for d in pat["delta_requirements"]}
return reqs, covers return reqs, covers
@@ -104,7 +104,8 @@ def assess_transition(
) )
buckets[status].append(req.capability_id) buckets[status].append(req.capability_id)
if status in _REQUESTABLE: if status in _REQUESTABLE:
reason, prio = _REQUESTABLE[status] default_reason, prio = _REQUESTABLE[status]
reason = req.rationale or default_reason # curated human text wins over the generic fallback
requests.append( requests.append(
TransitionQuestionRequest( TransitionQuestionRequest(
capability_id=req.capability_id, capability_id=req.capability_id,
@@ -70,6 +70,7 @@ class TargetRequirement(BaseModel):
capability_id: str # MCAP-... capability_id: str # MCAP-...
question_intent: str = "verify_existence" # passed through to the request, not rendered question_intent: str = "verify_existence" # passed through to the request, not rendered
rationale: str = "" # curated human text (e.g. why_asked / reviewable_claim) — surfaced as the request reason
expected_evidence: List[str] = Field(default_factory=list) expected_evidence: List[str] = Field(default_factory=list)
source_control_id: Optional[str] = None source_control_id: Optional[str] = None
supports_obligations: List[str] = Field(default_factory=list) supports_obligations: List[str] = Field(default_factory=list)
@@ -0,0 +1,45 @@
# Human-readable capability labels (DE) — presentation only, reusable across all targets.
# A capability id is the stable machine identity; this maps it to an expert-facing label for the UI.
# Curated knowledge (draft — to be corrected by the domain expert). Missing ids fall back to a
# prettified id in the frontend. NO real company names. Keep labels short + concrete.
labels:
# ── ISMS / ISO 27001 core ───────────────────────────────────────────────
information_security_management: "Informationssicherheits-Managementsystem (ISMS)"
access_control_and_authentication: "Zugriffskontrolle & Authentifizierung"
asset_and_configuration_management: "Asset- & Konfigurationsverwaltung"
cryptography: "Kryptographie / Verschlüsselung"
incident_management: "Security-Incident-Management"
security_awareness_training: "Security-Awareness-Schulungen"
supplier_security: "Lieferanten-Sicherheit"
security_logging_and_monitoring: "Security-Logging & Monitoring"
technical_vulnerability_management: "Technisches Schwachstellen-Management"
# ── TISAX / VDA-spezifisch ──────────────────────────────────────────────
prototype_protection: "Prototypenschutz (physisch & logisch)"
tisax_label_scope_selection: "TISAX-Label-/Scope-Festlegung"
tisax_assessment_via_enx: "TISAX-Assessment über die ENX-Plattform"
vda_isa_self_assessment: "VDA-ISA-Selbstauskunft"
data_protection_processing_on_behalf: "Auftragsverarbeitung (Art. 28 DSGVO)"
physical_security: "Physische Sicherheit / Zutrittskontrolle"
# ── QM / ISO 9001 ───────────────────────────────────────────────────────
document_and_change_control: "Dokumenten- & Änderungslenkung"
supplier_evaluation: "Lieferantenbewertung"
release_and_approval_process: "Freigabe- & Genehmigungsprozess"
ce_conformity_assessment_and_technical_documentation: "CE-Konformitätsbewertung & technische Dokumentation"
# ── CRA / Produkt-Cybersecurity ─────────────────────────────────────────
sbom_creation: "SBOM-Erstellung (Software-Stückliste)"
coordinated_vulnerability_disclosure: "Coordinated Vulnerability Disclosure (CVD)"
secure_development_lifecycle: "Sicherer Entwicklungslebenszyklus (SDLC)"
secure_signed_update_distribution: "Sichere, signierte Update-Verteilung"
security_update_support_period: "Sicherheits-Update-Supportzeitraum"
product_cyber_risk_assessment: "Produkt-Cyber-Risikobewertung"
exploited_vuln_and_incident_reporting: "Meldung ausgenutzter Schwachstellen & Vorfälle"
public_security_advisories: "Öffentliche Security Advisories"
cybersecurity_management_system: "Cybersecurity-Managementsystem (CSMS)"
# ── MaschinenVO / Safety ────────────────────────────────────────────────
machine_safety_risk_assessment: "Maschinen-Risikobeurteilung"
mechanical_safety_and_guards: "Mechanische Sicherheit & Schutzeinrichtungen"
operating_instructions_and_safety_information: "Betriebsanleitung & Sicherheitshinweise"
protection_against_corruption_of_safety_functions: "Schutz der Sicherheitsfunktionen vor Manipulation"
# ── Umwelt ──────────────────────────────────────────────────────────────
environmental_management_documentation: "Umweltmanagement-Dokumentation"
@@ -73,6 +73,17 @@ def test_partial_signal_surfaces_as_indication_and_is_still_asked():
assert "secure_development_lifecycle" in asked or "secure_development_lifecycle" in d["capability_delta"] assert "secure_development_lifecycle" in asked or "secure_development_lifecycle" in d["capability_delta"]
def test_questions_carry_curated_text_and_human_labels():
# the curated why_asked from the transition pattern must reach the question (not the generic
# fallback "Keine Anhaltspunkte ... klären"), and surfaced capabilities get human labels.
body = dict(_BODY, certifications=["ISO27001"], target="TISAX", scanner_findings=[])
r = _client.post("/onboarding/advisor-start", json=body)
assert r.status_code == 200, r.text
d = r.json()
assert any("Keine Anhaltspunkte" not in q["why"] for q in d["top_5_questions"]) # real expert text surfaced
assert d["capability_labels"].get("vda_isa_self_assessment") == "VDA-ISA-Selbstauskunft"
def test_unknown_target_is_404(): def test_unknown_target_is_404():
body = dict(_BODY, target="NOPE") body = dict(_BODY, target="NOPE")
r = _client.post("/onboarding/advisor-start", json=body) r = _client.post("/onboarding/advisor-start", json=body)