fix(onboarding): decouple partial/indicative signals from detected — partial no longer removes a question

Fix B of the pre-#59 semantic correction. The Silent Pass had only TWO effective states though the data carries three: a `detected` mapping (a concrete artifact) AND a `partial` mapping (an indicative signal, e.g. a CI pipeline -> secure-development-lifecycle) both flowed through capability_ids() and were fed to the Advisor as already-present — so a weak indication silently removed a question, exactly the Welt-1/ Welt-2 transparency we want to keep. Now three distinct states: - detected -> reduces the delta immediately (auto_detected, not asked). [unchanged] - partial -> raises assumption strength but does NOT replace the question (surfaced as `indications`, the capability stays in the delta and is still asked). - requirement-> describes a target, never the present state (already handled by Fix A's kind split). Changes (data + thin wiring, no new architecture): - SilentIntakeResult.capability_ids() returns only relationship==detected; new indicative_capability_ids() returns the partial ones. - advisor_start() gains indicative_capabilities (NOT fed into the profile) and surfaces result.indications = indicative ∩ required − auto_detected. - AdvisorResult / AdvisorResponse gain `indications` (additive, contract-safe); the service passes the indicative ids through. Tests: a partial CI signal is indicative-not-detected and does NOT shrink the delta; end-to-end it appears in `indications`, not `auto_detected`, and the gap is still asked. 28 onboarding tests pass, mypy --strict clean on the onboarding modules, demo runs, check-loc 0. Runtime effect -> deploy + smoke.
2026-06-28 16:02:35 +02:00
parent 19931208a9
commit 978052b5a2
7 changed files with 51 additions and 5 deletions
@@ -41,6 +41,7 @@ class AdvisorResponse(BaseModel):
    silent_intake_summary: str = ""
    headline: str = ""
    auto_detected: List[str] = Field(default_factory=list)
    indications: List[str] = Field(default_factory=list)        # partial signal: raises strength, still asked
    inferred_assumptions: List[InferredAssumption] = Field(default_factory=list)
    rejected_assumptions: List[RejectedAssumption] = Field(default_factory=list)
    top_5_questions: List[AdvisorQuestion] = Field(default_factory=list)
@@ -66,6 +67,7 @@ def advisor_start_endpoint(req: OnboardingAdvisorRequest) -> AdvisorResponse:
        products=req.products, markets=req.markets, industry=req.industry or "")
    return AdvisorResponse(
        silent_intake_summary=si_summary, headline=result.headline, auto_detected=result.auto_detected,
        indications=result.indications,
        inferred_assumptions=result.inferred_assumptions, rejected_assumptions=result.rejected_assumptions,
        top_5_questions=result.next_best_questions, capability_delta=result.capability_delta,
        top_measures=result.top_measures, evidence_requests=result.evidence_requests,
@@ -75,6 +75,7 @@ def advisor_start(
    corpus_status: Optional[Dict[str, str]] = None,
    uncertain: Optional[List[Dict[str, str]]] = None,
    detected_capabilities: Optional[Sequence[str]] = None,
    indicative_capabilities: Optional[Sequence[str]] = None,
 ) -> AdvisorResult:
    """Run the onboarding flow: (silent intake +) certs -> profile -> delta -> ranked questions + measures.
@@ -86,6 +87,9 @@ def advisor_start(
    required = {r.capability_id for r in target_requirements}
    profile = _profile(inp, cert_hypotheses, detected_capabilities)
    auto_detected = sorted(set(detected_capabilities or []) & required)
    # partial/indicative signals raise assumption strength but are NOT fed into the profile -> the gap
    # stays open and is still asked. Surface only those still relevant and NOT already auto-detected.
    indications = sorted((set(indicative_capabilities or []) & required) - set(auto_detected))
    assess = assess_transition(
        TransitionContext(company_id=inp.company or "company", target=TransitionGoal(target_id=target_id)),
        list(target_requirements), profile)
@@ -135,6 +139,7 @@ def advisor_start(
    probably = [c for c in assess.summary.probably_covered if c not in set(auto_detected)]
    return AdvisorResult(
        inferred_assumptions=inferred, rejected_assumptions=rejected, auto_detected=auto_detected,
        indications=indications,
        next_best_questions=next_q, capability_delta=delta, top_measures=measures,
        evidence_requests=evidence, unsupported_domains=unsupported,
        completeness_summary=rep.completeness_summary,
@@ -53,7 +53,8 @@ class AdvisorMeasure(BaseModel):
 class AdvisorResult(BaseModel):
    inferred_assumptions: List[InferredAssumption] = Field(default_factory=list)
    rejected_assumptions: List[RejectedAssumption] = Field(default_factory=list)
-    auto_detected: List[str] = Field(default_factory=list)                     # Silent Pass: recognised w/o asking
+    auto_detected: List[str] = Field(default_factory=list)                     # detected (concrete artifact): recognised w/o asking
    indications: List[str] = Field(default_factory=list)                       # partial signal: raises assumption strength, STILL asked
    next_best_questions: List[AdvisorQuestion] = Field(default_factory=list)   # max 5
    capability_delta: List[str] = Field(default_factory=list)
    top_measures: List[AdvisorMeasure] = Field(default_factory=list)
@@ -66,10 +66,15 @@ class SilentIntakeResult(BaseModel):
    summary: str = ""
    def capability_ids(self) -> List[str]:
-        """The detected capability ids — fed into the Advisor as already-present (delta-reducing).
+        """The DETECTED capability ids (relationship == detected) — fed into the Advisor as already-present
        (delta-reducing, not asked). ONLY observation-kind signals reach here (requirements never become a
        present capability); a merely PARTIAL/indicative signal does NOT (see indicative_capability_ids)."""
        return sorted({d.capability for d in self.detected_capabilities if d.relationship == "detected"})
-        ONLY observation-kind signals reach here (requirements never become a present capability)."""
+    def indicative_capability_ids(self) -> List[str]:
-        return sorted({d.capability for d in self.detected_capabilities})
+        """Capabilities backed only by a PARTIAL/indicative signal — they raise assumption strength but do
        NOT replace a question (the gap stays open and is still asked, just with an indication shown)."""
        return sorted({d.capability for d in self.detected_capabilities if d.relationship != "detected"})
 def silent_intake(
@@ -76,5 +76,6 @@ def run_advisor(
                          known_evidence=list(known_evidence), target=[target])
    result = advisor_start(
        inp, resolve_for_certifications(certifications, _HYP_LIB), reqs, target_id=target,
-        covers_targets=covers, corpus_status={target: "validated"}, detected_capabilities=si.capability_ids())
+        covers_targets=covers, corpus_status={target: "validated"},
        detected_capabilities=si.capability_ids(), indicative_capabilities=si.indicative_capability_ids())
    return result, si.summary
@@ -61,6 +61,18 @@ def test_requirement_signal_does_not_auto_detect_capability():
    assert "sbom_creation" in asked or "sbom_creation" in d["capability_delta"]       # still an open gap
 def test_partial_signal_surfaces_as_indication_and_is_still_asked():
    # a PARTIAL observation (a CI pipeline) raises assumption strength but does NOT replace the question
    body = dict(_BODY, scanner_findings=[{"signal_id": "github_actions_ci", "source_type": "repository"}])
    r = _client.post("/onboarding/advisor-start", json=body)
    assert r.status_code == 200, r.text
    d = r.json()
    assert "secure_development_lifecycle" not in d["auto_detected"]            # partial != detected
    assert "secure_development_lifecycle" in d["indications"]                  # but its strength is shown
    asked = {q["capability_id"] for q in d["top_5_questions"]}
    assert "secure_development_lifecycle" in asked or "secure_development_lifecycle" in d["capability_delta"]
 def test_unknown_target_is_404():
    body = dict(_BODY, target="NOPE")
    r = _client.post("/onboarding/advisor-start", json=body)
@@ -77,3 +77,23 @@ def test_detected_capabilities_are_not_asked_again():
                        detected_capabilities=detected)
    asked = {q.capability_id for q in res.next_best_questions}
    assert "sbom_creation" not in asked and "sbom_creation" not in res.capability_delta
 def test_partial_signal_is_indicative_not_detected():
    # a PARTIAL signal (CI present -> secure dev lifecycle) raises assumption strength but is NOT a
    # detected capability: it must NOT shrink the delta the way a concrete artifact does.
    res = silent_intake([IntakeSignal(source="repository", signal="github_actions_ci")], _MAP)
    assert "secure_development_lifecycle" not in res.capability_ids()           # not counted as present
    assert res.indicative_capability_ids() == ["secure_development_lifecycle"]  # surfaced as an indication
 def test_partial_indication_does_not_remove_the_question():
    inp = OnboardingInput(company="x", certifications=["ISO27001"], target=["CRA"])
    hyp = resolve_for_certifications(inp.certifications, _LIB)
    si = silent_intake([IntakeSignal(source="repository", signal="github_actions_ci")], _MAP)
    res = advisor_start(inp, hyp, _REQ, target_id="CRA", corpus_status={"CRA": "validated"},
                        detected_capabilities=si.capability_ids(),
                        indicative_capabilities=si.indicative_capability_ids())
    assert "secure_development_lifecycle" not in res.auto_detected               # partial != detected
    assert "secure_development_lifecycle" in res.indications                     # strength shown
    assert "secure_development_lifecycle" in res.capability_delta                # gap still open / asked