feat: Smart Onboarding Advisor — make the knowledge usable in onboarding (ADR-012)

The user-named "right next runtime step": stop building knowledge, start using it automatically in onboarding — no sales training, no regulation picking. compliance/onboarding/ is an ORCHESTRATOR (not a new engine) wiring Company 2A -> RS-005 -> optimization -> completeness: advisor_start(input, cert_hypotheses, target_requirements, ...) -> AdvisorResult From (company + products + certifications + target) it returns inferred_assumptions, rejected_ assumptions, next_best_questions (<=5, ranked by information_gain + leverage + unknown_high_risk + evidence_missing, each self-explaining), capability_delta, top_measures, evidence_requests, unsupported_domains, completeness_summary. apply_answer() updates the profile (delta shrinks). Welt-1 throughout: certificates REDUCE questions but satisfy nothing automatically (verification_ required); relevance(evidence,target) keeps ISO 14001 out of the CRA result. Certificate->capability hypotheses + target requirements are INJECTED (curated knowledge, outsourced; not in code). All 7 acceptance criteria pass; mypy --strict clean. First app-caller wiring the engines into a product flow — still no endpoint/persistence, so 0 runtime effect -> no deploy yet (deploys when POST /onboarding/advisor-start + frontend are wired). check-loc 0.
2026-06-28 12:45:49 +02:00
parent 009083882a
commit 3ba90f49cf
7 changed files with 487 additions and 0 deletions
@@ -0,0 +1,30 @@
+"""Smart Onboarding Advisor — the onboarding runtime step (orchestration over existing engines).
+
+Turns (company + products + certifications + target) into inferred assumptions, the next best questions
+(<=5, each self-explaining), the capability delta, top measures, evidence requests and completeness —
+with NO sales interpretation and NO regulation picking. Orchestrator only: no new engine/registry/
+meta-model; certificate->capability hypotheses and target requirements are INJECTED.
+"""
+
+from __future__ import annotations
+
+from .engine import advisor_start, apply_answer
+from .schemas import (
+    AdvisorMeasure,
+    AdvisorQuestion,
+    AdvisorResult,
+    InferredAssumption,
+    OnboardingInput,
+    RejectedAssumption,
+)
+
+__all__ = [
+    "advisor_start",
+    "apply_answer",
+    "OnboardingInput",
+    "AdvisorResult",
+    "AdvisorQuestion",
+    "AdvisorMeasure",
+    "InferredAssumption",
+    "RejectedAssumption",
+]
@@ -0,0 +1,144 @@
+"""Smart Onboarding Advisor — orchestration over the existing engines (the onboarding runtime step).
+
+The point of the whole platform, made usable: the user types company + products + certifications +
+target, and the system does the rest — no sales interpretation, no regulation picking. This is an
+ORCHESTRATOR, not a new engine: it wires Company 2A (Evidence -> Capability), RS-005 (Capability ->
+Delta), optimization (Delta -> Roadmap) and completeness into one onboarding flow.
+
+Three principles it must honour (acceptance criteria):
+  - Multi-cert works; a profile is built from ALL certificates.
+  - relevance(evidence, target): ISO 14001 is NOT falsely relevant to the CRA; ISO 27001/TISAX REDUCE
+    questions but satisfy NOTHING automatically (Welt-1 -> verification_required).
+  - Only the NEXT BEST questions (<= 5), each explaining WHY; every answer updates the profile.
+
+Certificate -> probable-capability hypotheses and the target's required capabilities are INJECTED (the
+hypotheses are curated knowledge, not in this code). No corpus loaded here. Python 3.9 compatible.
+"""
+
+from __future__ import annotations
+
+from typing import Dict, List, Optional, Sequence
+
+from ..company import (
+    CapabilityMappingEntry,
+    Certification,
+    CompanyCapabilityProfile,
+    CompanyContext,
+    build_company_profile,
+)
+from ..completeness import assess_completeness
+from ..optimization import roadmap_from_delta
+from ..reasoning.enums import Confidence
+from ..transition_reasoning import (
+    CoverageStatus,
+    TargetRequirement,
+    TransitionContext,
+    TransitionGoal,
+    assess_transition,
+)
+from .schemas import (
+    AdvisorMeasure,
+    AdvisorQuestion,
+    AdvisorResult,
+    InferredAssumption,
+    OnboardingInput,
+    RejectedAssumption,
+)
+
+_GAIN = {"high": 3, "medium": 2, "low": 1}
+_RISK = {"high": 2, "medium": 1, "low": 0}
+
+
+def _profile(inp: OnboardingInput, cert_hypotheses: Dict[str, List[str]]) -> CompanyCapabilityProfile:
+    cmap = {
+        cert: CapabilityMappingEntry(capability_ids=list(caps), confidence=Confidence.MEDIUM)
+        for cert, caps in cert_hypotheses.items()
+        if cert in inp.certifications and caps
+    }
+    ctx = CompanyContext(company_id=inp.company or "company",
+                         certifications=[Certification(certification_id=c) for c in cmap])
+    return build_company_profile(ctx, cmap)
+
+
+def advisor_start(
+    inp: OnboardingInput,
+    cert_hypotheses: Dict[str, List[str]],
+    target_requirements: Sequence[TargetRequirement],
+    target_id: str = "target",
+    covers_targets: Optional[Dict[str, List[str]]] = None,
+    corpus_status: Optional[Dict[str, str]] = None,
+    uncertain: Optional[List[Dict[str, str]]] = None,
+) -> AdvisorResult:
+    """Run the onboarding flow: certs -> profile -> delta -> ranked next-best questions + measures.
+
+    Pure orchestration; deterministic. `cert_hypotheses` (cert -> probable cap ids) and
+    `target_requirements` are INJECTED. `covers_targets` (cap -> targets it closes) drives leverage.
+    """
+    covers_targets = covers_targets or {}
+    required = {r.capability_id for r in target_requirements}
+    profile = _profile(inp, cert_hypotheses)
+    assess = assess_transition(
+        TransitionContext(company_id=inp.company or "company", target=TransitionGoal(target_id=target_id)),
+        list(target_requirements), profile)
+
+    # inferred (Welt-1): per cert, the caps it probably provides that are RELEVANT to this target
+    inferred: List[InferredAssumption] = []
+    rejected: List[RejectedAssumption] = []
+    for cert in inp.certifications:
+        caps = set(cert_hypotheses.get(cert, []))
+        relevant = sorted(caps & required)
+        if relevant:
+            inferred.append(InferredAssumption(
+                certification=cert, capabilities=relevant,
+                statement="%s legt %d relevante Fähigkeit(en) nahe — Verifikation erforderlich, nicht automatisch erfüllt"
+                % (cert, len(relevant))))
+        elif caps:
+            rejected.append(RejectedAssumption(
+                certification=cert,
+                statement="%s ist für dieses Ziel nicht relevant" % cert,
+                reason="relevance(evidence, target) = 0 — keine geforderte Fähigkeit abgedeckt"))
+
+    # next best questions (<=5): re-rank the RS-005 requests by info gain + leverage + risk + evidence-gap
+    known_ev = set(inp.known_evidence)
+    scored = []
+    for q in assess.question_requests:
+        lev = len(covers_targets.get(q.capability_id, []))
+        ev_missing = 1 if (q.expected_evidence and not (set(q.expected_evidence) & known_ev)) else 0
+        score = _GAIN.get(q.information_gain.value, 1) + lev + _RISK.get(q.priority.value, 0) + ev_missing
+        scored.append((score, q))
+    scored.sort(key=lambda x: (-x[0], x[1].capability_id))
+    next_q = [
+        AdvisorQuestion(capability_id=q.capability_id, question_intent=q.question_intent, why=q.reason,
+                        information_value=float(s), priority=q.priority.value)
+        for s, q in scored[:5]
+    ]
+
+    delta = sorted({c.capability_id for c in assess.coverage if c.status == CoverageStatus.MISSING})
+    plan = roadmap_from_delta(assess, {c: covers_targets.get(c, []) for c in delta})
+    measures = [AdvisorMeasure(capability_id=m.capability_id, leverage=m.leverage, closes=m.covers)
+                for m in plan.ranked_measures[:5]]
+    evidence = sorted({e for q in assess.question_requests for e in q.expected_evidence})
+
+    applicable = list(inp.target) or [target_id]
+    rep = assess_completeness(applicable, corpus_status or {}, uncertain=uncertain or [])
+    unsupported = [e.subject for e in rep.exclusions]
+
+    probably = assess.summary.probably_covered
+    return AdvisorResult(
+        inferred_assumptions=inferred, rejected_assumptions=rejected, next_best_questions=next_q,
+        capability_delta=delta, top_measures=measures, evidence_requests=evidence,
+        unsupported_domains=unsupported, completeness_summary=rep.completeness_summary,
+        headline="%d Anforderungen erkannt · %d wahrscheinlich abgedeckt · %d zu klären"
+        % (len(assess.coverage), len(probably), len(next_q)))
+
+
+def apply_answer(known_capabilities: Sequence[str], capability_id: str, answer: str) -> List[str]:
+    """Update the known-capability set from one answer. `answer` in {confirmed, rejected, unknown}.
+
+    A confirmed answer adds the capability to the known set (shrinking the delta on the next run);
+    rejected/unknown leave it open. This is how every answer updates the profile (criterion 6).
+    """
+    known = list(dict.fromkeys(known_capabilities))
+    if answer == "confirmed" and capability_id not in known:
+        known.append(capability_id)
+    return known
@@ -0,0 +1,62 @@
+"""Schemas for the Smart Onboarding Advisor — the onboarding RUNTIME step.
+
+DTOs only. The Advisor ORCHESTRATES the existing engines (Company 2A, RS-005, optimization,
+completeness) — no new reasoning engine, no new capability registry, no new meta-model. Welt-1
+discipline: a certificate yields PROBABLE capabilities (verification required), never "erfüllt".
+Python 3.9 compatible (no `|` unions).
+"""
+
+from __future__ import annotations
+
+from typing import List, Optional
+
+from pydantic import BaseModel, Field
+
+
+class OnboardingInput(BaseModel):
+    company: str = ""
+    industry: Optional[str] = None
+    products: List[str] = Field(default_factory=list)
+    markets: List[str] = Field(default_factory=list)
+    certifications: List[str] = Field(default_factory=list)
+    known_evidence: List[str] = Field(default_factory=list)
+    target: List[str] = Field(default_factory=list)          # informational; the delta uses injected requirements
+
+
+class InferredAssumption(BaseModel):
+    certification: str
+    capabilities: List[str] = Field(default_factory=list)    # RELEVANT-to-target caps the cert probably provides
+    verification_required: bool = True                       # Welt-1: never auto-satisfied
+    statement: str = ""
+
+
+class RejectedAssumption(BaseModel):
+    certification: Optional[str] = None
+    statement: str = ""
+    reason: str = ""                                         # e.g. "relevance(evidence, target) = 0"
+
+
+class AdvisorQuestion(BaseModel):
+    capability_id: str
+    question_intent: str
+    why: str                                                 # every question explains itself
+    information_value: float = 0.0                           # deterministic rank score
+    priority: str = "medium"
+
+
+class AdvisorMeasure(BaseModel):
+    capability_id: str
+    leverage: int = 0
+    closes: List[str] = Field(default_factory=list)
+
+
+class AdvisorResult(BaseModel):
+    inferred_assumptions: List[InferredAssumption] = Field(default_factory=list)
+    rejected_assumptions: List[RejectedAssumption] = Field(default_factory=list)
+    next_best_questions: List[AdvisorQuestion] = Field(default_factory=list)   # max 5
+    capability_delta: List[str] = Field(default_factory=list)
+    top_measures: List[AdvisorMeasure] = Field(default_factory=list)
+    evidence_requests: List[str] = Field(default_factory=list)
+    unsupported_domains: List[str] = Field(default_factory=list)
+    completeness_summary: str = ""
+    headline: str = ""                                       # "N erkannt, M wahrscheinlich abgedeckt, K zu klären"