Files
breakpilot-compliance/backend-compliance/compliance/onboarding/engine.py
T
Benjamin Admin 3ba90f49cf feat: Smart Onboarding Advisor — make the knowledge usable in onboarding (ADR-012)
The user-named "right next runtime step": stop building knowledge, start using it automatically in
onboarding — no sales training, no regulation picking. compliance/onboarding/ is an ORCHESTRATOR (not
a new engine) wiring Company 2A -> RS-005 -> optimization -> completeness:

  advisor_start(input, cert_hypotheses, target_requirements, ...) -> AdvisorResult

From (company + products + certifications + target) it returns inferred_assumptions, rejected_
assumptions, next_best_questions (<=5, ranked by information_gain + leverage + unknown_high_risk +
evidence_missing, each self-explaining), capability_delta, top_measures, evidence_requests,
unsupported_domains, completeness_summary. apply_answer() updates the profile (delta shrinks).

Welt-1 throughout: certificates REDUCE questions but satisfy nothing automatically (verification_
required); relevance(evidence,target) keeps ISO 14001 out of the CRA result. Certificate->capability
hypotheses + target requirements are INJECTED (curated knowledge, outsourced; not in code).

All 7 acceptance criteria pass; mypy --strict clean. First app-caller wiring the engines into a
product flow — still no endpoint/persistence, so 0 runtime effect -> no deploy yet (deploys when
POST /onboarding/advisor-start + frontend are wired). check-loc 0.
2026-06-28 12:45:49 +02:00

145 lines
6.6 KiB
Python

"""Smart Onboarding Advisor — orchestration over the existing engines (the onboarding runtime step).
The point of the whole platform, made usable: the user types company + products + certifications +
target, and the system does the rest — no sales interpretation, no regulation picking. This is an
ORCHESTRATOR, not a new engine: it wires Company 2A (Evidence -> Capability), RS-005 (Capability ->
Delta), optimization (Delta -> Roadmap) and completeness into one onboarding flow.
Three principles it must honour (acceptance criteria):
- Multi-cert works; a profile is built from ALL certificates.
- relevance(evidence, target): ISO 14001 is NOT falsely relevant to the CRA; ISO 27001/TISAX REDUCE
questions but satisfy NOTHING automatically (Welt-1 -> verification_required).
- Only the NEXT BEST questions (<= 5), each explaining WHY; every answer updates the profile.
Certificate -> probable-capability hypotheses and the target's required capabilities are INJECTED (the
hypotheses are curated knowledge, not in this code). No corpus loaded here. Python 3.9 compatible.
"""
from __future__ import annotations
from typing import Dict, List, Optional, Sequence
from ..company import (
CapabilityMappingEntry,
Certification,
CompanyCapabilityProfile,
CompanyContext,
build_company_profile,
)
from ..completeness import assess_completeness
from ..optimization import roadmap_from_delta
from ..reasoning.enums import Confidence
from ..transition_reasoning import (
CoverageStatus,
TargetRequirement,
TransitionContext,
TransitionGoal,
assess_transition,
)
from .schemas import (
AdvisorMeasure,
AdvisorQuestion,
AdvisorResult,
InferredAssumption,
OnboardingInput,
RejectedAssumption,
)
_GAIN = {"high": 3, "medium": 2, "low": 1}
_RISK = {"high": 2, "medium": 1, "low": 0}
def _profile(inp: OnboardingInput, cert_hypotheses: Dict[str, List[str]]) -> CompanyCapabilityProfile:
cmap = {
cert: CapabilityMappingEntry(capability_ids=list(caps), confidence=Confidence.MEDIUM)
for cert, caps in cert_hypotheses.items()
if cert in inp.certifications and caps
}
ctx = CompanyContext(company_id=inp.company or "company",
certifications=[Certification(certification_id=c) for c in cmap])
return build_company_profile(ctx, cmap)
def advisor_start(
inp: OnboardingInput,
cert_hypotheses: Dict[str, List[str]],
target_requirements: Sequence[TargetRequirement],
target_id: str = "target",
covers_targets: Optional[Dict[str, List[str]]] = None,
corpus_status: Optional[Dict[str, str]] = None,
uncertain: Optional[List[Dict[str, str]]] = None,
) -> AdvisorResult:
"""Run the onboarding flow: certs -> profile -> delta -> ranked next-best questions + measures.
Pure orchestration; deterministic. `cert_hypotheses` (cert -> probable cap ids) and
`target_requirements` are INJECTED. `covers_targets` (cap -> targets it closes) drives leverage.
"""
covers_targets = covers_targets or {}
required = {r.capability_id for r in target_requirements}
profile = _profile(inp, cert_hypotheses)
assess = assess_transition(
TransitionContext(company_id=inp.company or "company", target=TransitionGoal(target_id=target_id)),
list(target_requirements), profile)
# inferred (Welt-1): per cert, the caps it probably provides that are RELEVANT to this target
inferred: List[InferredAssumption] = []
rejected: List[RejectedAssumption] = []
for cert in inp.certifications:
caps = set(cert_hypotheses.get(cert, []))
relevant = sorted(caps & required)
if relevant:
inferred.append(InferredAssumption(
certification=cert, capabilities=relevant,
statement="%s legt %d relevante Fähigkeit(en) nahe — Verifikation erforderlich, nicht automatisch erfüllt"
% (cert, len(relevant))))
elif caps:
rejected.append(RejectedAssumption(
certification=cert,
statement="%s ist für dieses Ziel nicht relevant" % cert,
reason="relevance(evidence, target) = 0 — keine geforderte Fähigkeit abgedeckt"))
# next best questions (<=5): re-rank the RS-005 requests by info gain + leverage + risk + evidence-gap
known_ev = set(inp.known_evidence)
scored = []
for q in assess.question_requests:
lev = len(covers_targets.get(q.capability_id, []))
ev_missing = 1 if (q.expected_evidence and not (set(q.expected_evidence) & known_ev)) else 0
score = _GAIN.get(q.information_gain.value, 1) + lev + _RISK.get(q.priority.value, 0) + ev_missing
scored.append((score, q))
scored.sort(key=lambda x: (-x[0], x[1].capability_id))
next_q = [
AdvisorQuestion(capability_id=q.capability_id, question_intent=q.question_intent, why=q.reason,
information_value=float(s), priority=q.priority.value)
for s, q in scored[:5]
]
delta = sorted({c.capability_id for c in assess.coverage if c.status == CoverageStatus.MISSING})
plan = roadmap_from_delta(assess, {c: covers_targets.get(c, []) for c in delta})
measures = [AdvisorMeasure(capability_id=m.capability_id, leverage=m.leverage, closes=m.covers)
for m in plan.ranked_measures[:5]]
evidence = sorted({e for q in assess.question_requests for e in q.expected_evidence})
applicable = list(inp.target) or [target_id]
rep = assess_completeness(applicable, corpus_status or {}, uncertain=uncertain or [])
unsupported = [e.subject for e in rep.exclusions]
probably = assess.summary.probably_covered
return AdvisorResult(
inferred_assumptions=inferred, rejected_assumptions=rejected, next_best_questions=next_q,
capability_delta=delta, top_measures=measures, evidence_requests=evidence,
unsupported_domains=unsupported, completeness_summary=rep.completeness_summary,
headline="%d Anforderungen erkannt · %d wahrscheinlich abgedeckt · %d zu klären"
% (len(assess.coverage), len(probably), len(next_q)))
def apply_answer(known_capabilities: Sequence[str], capability_id: str, answer: str) -> List[str]:
"""Update the known-capability set from one answer. `answer` in {confirmed, rejected, unknown}.
A confirmed answer adds the capability to the known set (shrinking the delta on the next run);
rejected/unknown leave it open. This is how every answer updates the profile (criterion 6).
"""
known = list(dict.fromkeys(known_capabilities))
if answer == "confirmed" and capability_id not in known:
known.append(capability_id)
return known