feat(onboarding): surface curated expert text + human capability labels (advisor was showing snake_case)
The advisor was structurally correct but unusable: every question showed a snake_case capability id plus a
single generic fallback reason ("Keine Anhaltspunkte im Unternehmensprofil — klären"). The expert text
already EXISTED in the transition patterns (why_asked / reviewable_claim) — the pipeline just dropped it.
- transition_reasoning: TargetRequirement gains `rationale`; assess_transition uses it as the request
reason when present, else the generic fallback (additive, backward-compatible for all consumers).
- onboarding_service._target carries the pattern's why_asked (delta) and reviewable_claim (likely_covered)
into the requirement rationale -> the question's `why`.
- knowledge/onboarding/capability_labels.yaml: curated DE labels (id -> human), reusable across targets;
labels_for() + response.capability_labels expose them; the frontend renders label || prettified id.
Now ISO27001->TISAX reads "Auftragsverarbeitung (Art. 28 DSGVO) — If a TISAX data label is in scope, you
must show Art. 28 GDPR processing-on-behalf controls; ISO 27001 does not establish these." instead of
"data_protection_processing_on_behalf — klären". why_asked text is still EN (existing knowledge; translation
is curation). 34 onboarding+transition tests pass, mypy --strict clean (13 modules), check-loc 0.
This commit is contained in:
@@ -8,7 +8,7 @@ This adds NO new reasoning logic. It exposes the already-built, tested orchestra
|
||||
"""
|
||||
|
||||
import logging
|
||||
from typing import List, Optional
|
||||
from typing import Dict, List, Optional
|
||||
|
||||
from fastapi import APIRouter, HTTPException
|
||||
from pydantic import BaseModel, Field
|
||||
@@ -20,7 +20,7 @@ from compliance.onboarding import (
|
||||
ProducedSignal,
|
||||
RejectedAssumption,
|
||||
)
|
||||
from compliance.services.onboarding_service import run_advisor, supported_targets
|
||||
from compliance.services.onboarding_service import labels_for, run_advisor, supported_targets
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
router = APIRouter(prefix="/onboarding", tags=["onboarding"])
|
||||
@@ -50,6 +50,7 @@ class AdvisorResponse(BaseModel):
|
||||
evidence_requests: List[str] = Field(default_factory=list)
|
||||
unsupported_domains: List[str] = Field(default_factory=list)
|
||||
completeness_summary: str = ""
|
||||
capability_labels: Dict[str, str] = Field(default_factory=dict) # capability_id -> human label (DE)
|
||||
|
||||
|
||||
@router.get("/targets")
|
||||
@@ -65,10 +66,17 @@ def advisor_start_endpoint(req: OnboardingAdvisorRequest) -> AdvisorResponse:
|
||||
company=req.company, certifications=req.certifications, target=req.target,
|
||||
signals=req.scanner_findings, known_evidence=req.known_evidence,
|
||||
products=req.products, markets=req.markets, industry=req.industry or "")
|
||||
surfaced = [
|
||||
*result.auto_detected, *result.indications, *result.capability_delta,
|
||||
*(q.capability_id for q in result.next_best_questions),
|
||||
*(c for a in result.inferred_assumptions for c in a.capabilities),
|
||||
*(m.capability_id for m in result.top_measures),
|
||||
]
|
||||
return AdvisorResponse(
|
||||
silent_intake_summary=si_summary, headline=result.headline, auto_detected=result.auto_detected,
|
||||
indications=result.indications,
|
||||
inferred_assumptions=result.inferred_assumptions, rejected_assumptions=result.rejected_assumptions,
|
||||
top_5_questions=result.next_best_questions, capability_delta=result.capability_delta,
|
||||
top_measures=result.top_measures, evidence_requests=result.evidence_requests,
|
||||
unsupported_domains=result.unsupported_domains, completeness_summary=result.completeness_summary)
|
||||
unsupported_domains=result.unsupported_domains, completeness_summary=result.completeness_summary,
|
||||
capability_labels=labels_for(surfaced))
|
||||
|
||||
@@ -9,7 +9,7 @@ It adds NO new reasoning logic — it only exposes what exists. No DB, no persis
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
from typing import Any, Dict, List, Sequence, Tuple
|
||||
from typing import Any, Dict, Iterable, List, Sequence, Tuple
|
||||
|
||||
import yaml
|
||||
|
||||
@@ -37,6 +37,13 @@ def _load(*parts: str) -> Any:
|
||||
_HYP_LIB = [CapabilityHypothesis(**h) for h in _load("certification_hypotheses", "hypotheses.yaml")["hypotheses"]]
|
||||
_VOCAB = [SignalVocabularyEntry(**v) for v in _load("onboarding", "signal_vocabulary.yaml")["signals"]]
|
||||
_SIGNAL_MAP = [SignalMapping(**m) for m in _load("onboarding", "intake_signal_map.yaml")["mappings"]]
|
||||
_LABELS: Dict[str, str] = _load("onboarding", "capability_labels.yaml")["labels"]
|
||||
|
||||
|
||||
def labels_for(capability_ids: Iterable[str]) -> Dict[str, str]:
|
||||
"""Human labels (DE) for the given capability ids — presentation only. Ids without a curated label
|
||||
are omitted (the frontend falls back to a prettified id). Deduped, deterministic."""
|
||||
return {c: _LABELS[c] for c in dict.fromkeys(capability_ids) if c in _LABELS}
|
||||
|
||||
# target id -> transition pattern that defines its required capabilities (curated registry)
|
||||
_TARGET_PATTERNS = {
|
||||
@@ -53,9 +60,10 @@ def supported_targets() -> List[str]:
|
||||
|
||||
def _target(target_id: str) -> Tuple[List[TargetRequirement], Dict[str, List[str]]]:
|
||||
pat = _load("transition_patterns", _TARGET_PATTERNS[target_id])
|
||||
reqs = [TargetRequirement(capability_id=a["capability"]) for a in pat["likely_covered"]]
|
||||
reqs = [TargetRequirement(capability_id=a["capability"], rationale=a.get("reviewable_claim", "")) for a in pat["likely_covered"]]
|
||||
reqs += [TargetRequirement(capability_id=d["capability"], question_intent=d.get("needed_information", "verify_existence"),
|
||||
expected_evidence=d.get("expected_evidence", [])) for d in pat["delta_requirements"]]
|
||||
rationale=d.get("why_asked", ""), expected_evidence=d.get("expected_evidence", []))
|
||||
for d in pat["delta_requirements"]]
|
||||
covers = {d["capability"]: d.get("covers_targets", []) for d in pat["delta_requirements"]}
|
||||
return reqs, covers
|
||||
|
||||
|
||||
@@ -104,7 +104,8 @@ def assess_transition(
|
||||
)
|
||||
buckets[status].append(req.capability_id)
|
||||
if status in _REQUESTABLE:
|
||||
reason, prio = _REQUESTABLE[status]
|
||||
default_reason, prio = _REQUESTABLE[status]
|
||||
reason = req.rationale or default_reason # curated human text wins over the generic fallback
|
||||
requests.append(
|
||||
TransitionQuestionRequest(
|
||||
capability_id=req.capability_id,
|
||||
|
||||
@@ -70,6 +70,7 @@ class TargetRequirement(BaseModel):
|
||||
|
||||
capability_id: str # MCAP-...
|
||||
question_intent: str = "verify_existence" # passed through to the request, not rendered
|
||||
rationale: str = "" # curated human text (e.g. why_asked / reviewable_claim) — surfaced as the request reason
|
||||
expected_evidence: List[str] = Field(default_factory=list)
|
||||
source_control_id: Optional[str] = None
|
||||
supports_obligations: List[str] = Field(default_factory=list)
|
||||
|
||||
Reference in New Issue
Block a user