feat: Silent Knowledge Pass — recognise before asking (Phase 0, before the endpoint)
Not the endpoint yet — the bigger knowledge lever first. The Advisor can say "I need 5 answers" but does not yet decide what it can find out by ITSELF. The Silent Knowledge Pass runs in front of the Advisor and, from signals existing scanners/parsers already produce (website, repository, documents, product data), deterministically derives capabilities the company demonstrably HAS + product facts that drive scope — so every recognised item shrinks the delta and removes a question. compliance/onboarding/silent_intake.py: silent_intake(signals, signal_map) -> detected_capabilities (+ evidence already in hand) + product_facts. The signal->conclusion map is curated DATA (knowledge/onboarding/intake_signal_map.yaml), signals are injected (scanners are upstream). Pure, deterministic, no LLM. advisor_start gains detected_capabilities (folded into the profile at HIGH confidence -> covered, not asked) and an auto_detected result + headline. The experience flips from a question wall to "we already recognised 4 capabilities, 2 product facts and have 4 pieces of evidence in hand — only these few remain". Order now: Silent Pass -> #58 endpoint/frontend -> #59 empirical loop. NOT new architecture, just an orchestration step in front. Non-runtime (no app caller) -> no deploy. 15 onboarding tests pass, mypy --strict clean, check-loc 0.
This commit is contained in:
@@ -21,6 +21,14 @@ from .observations import (
|
||||
empirical_distribution,
|
||||
reviewed,
|
||||
)
|
||||
from .silent_intake import (
|
||||
DetectedCapability,
|
||||
IntakeSignal,
|
||||
ProductFact,
|
||||
SignalMapping,
|
||||
SilentIntakeResult,
|
||||
silent_intake,
|
||||
)
|
||||
from .schemas import (
|
||||
AdvisorMeasure,
|
||||
AdvisorQuestion,
|
||||
@@ -47,4 +55,10 @@ __all__ = [
|
||||
"empirical_distribution",
|
||||
"empirical_confidence",
|
||||
"reviewed",
|
||||
"silent_intake",
|
||||
"IntakeSignal",
|
||||
"SignalMapping",
|
||||
"DetectedCapability",
|
||||
"ProductFact",
|
||||
"SilentIntakeResult",
|
||||
]
|
||||
|
||||
@@ -49,15 +49,21 @@ _GAIN = {"high": 3, "medium": 2, "low": 1}
|
||||
_RISK = {"high": 2, "medium": 1, "low": 0}
|
||||
|
||||
|
||||
def _profile(inp: OnboardingInput, cert_hypotheses: Dict[str, List[str]]) -> CompanyCapabilityProfile:
|
||||
def _profile(
|
||||
inp: OnboardingInput, cert_hypotheses: Dict[str, List[str]],
|
||||
detected: Optional[Sequence[str]] = None,
|
||||
) -> CompanyCapabilityProfile:
|
||||
cmap = {
|
||||
cert: CapabilityMappingEntry(capability_ids=list(caps), confidence=Confidence.MEDIUM)
|
||||
for cert, caps in cert_hypotheses.items()
|
||||
if cert in inp.certifications and caps
|
||||
}
|
||||
ctx = CompanyContext(company_id=inp.company or "company",
|
||||
certifications=[Certification(certification_id=c) for c in cmap])
|
||||
return build_company_profile(ctx, cmap)
|
||||
certs = [Certification(certification_id=c) for c in cmap]
|
||||
if detected: # Silent Pass: concrete findings -> HIGH confidence
|
||||
cmap["__detected__"] = CapabilityMappingEntry(
|
||||
capability_ids=list(dict.fromkeys(detected)), confidence=Confidence.HIGH)
|
||||
certs.append(Certification(certification_id="__detected__"))
|
||||
return build_company_profile(CompanyContext(company_id=inp.company or "company", certifications=certs), cmap)
|
||||
|
||||
|
||||
def advisor_start(
|
||||
@@ -68,15 +74,18 @@ def advisor_start(
|
||||
covers_targets: Optional[Dict[str, List[str]]] = None,
|
||||
corpus_status: Optional[Dict[str, str]] = None,
|
||||
uncertain: Optional[List[Dict[str, str]]] = None,
|
||||
detected_capabilities: Optional[Sequence[str]] = None,
|
||||
) -> AdvisorResult:
|
||||
"""Run the onboarding flow: certs -> profile -> delta -> ranked next-best questions + measures.
|
||||
"""Run the onboarding flow: (silent intake +) certs -> profile -> delta -> ranked questions + measures.
|
||||
|
||||
Pure orchestration; deterministic. `cert_hypotheses` (cert -> probable cap ids) and
|
||||
`target_requirements` are INJECTED. `covers_targets` (cap -> targets it closes) drives leverage.
|
||||
Pure orchestration; deterministic. `cert_hypotheses` (cert -> probable cap ids), `target_requirements`
|
||||
and `detected_capabilities` (from the Silent Knowledge Pass) are INJECTED. Detected capabilities are
|
||||
recognised WITHOUT asking -> they shrink the delta and remove questions.
|
||||
"""
|
||||
covers_targets = covers_targets or {}
|
||||
required = {r.capability_id for r in target_requirements}
|
||||
profile = _profile(inp, cert_hypotheses)
|
||||
profile = _profile(inp, cert_hypotheses, detected_capabilities)
|
||||
auto_detected = sorted(set(detected_capabilities or []) & required)
|
||||
assess = assess_transition(
|
||||
TransitionContext(company_id=inp.company or "company", target=TransitionGoal(target_id=target_id)),
|
||||
list(target_requirements), profile)
|
||||
@@ -123,13 +132,14 @@ def advisor_start(
|
||||
rep = assess_completeness(applicable, corpus_status or {}, uncertain=uncertain or [])
|
||||
unsupported = [e.subject for e in rep.exclusions]
|
||||
|
||||
probably = assess.summary.probably_covered
|
||||
probably = [c for c in assess.summary.probably_covered if c not in set(auto_detected)]
|
||||
return AdvisorResult(
|
||||
inferred_assumptions=inferred, rejected_assumptions=rejected, next_best_questions=next_q,
|
||||
capability_delta=delta, top_measures=measures, evidence_requests=evidence,
|
||||
unsupported_domains=unsupported, completeness_summary=rep.completeness_summary,
|
||||
headline="%d Anforderungen erkannt · %d wahrscheinlich abgedeckt · %d zu klären"
|
||||
% (len(assess.coverage), len(probably), len(next_q)))
|
||||
inferred_assumptions=inferred, rejected_assumptions=rejected, auto_detected=auto_detected,
|
||||
next_best_questions=next_q, capability_delta=delta, top_measures=measures,
|
||||
evidence_requests=evidence, unsupported_domains=unsupported,
|
||||
completeness_summary=rep.completeness_summary,
|
||||
headline="%d Anforderungen erkannt · %d automatisch erkannt (Intake) · %d wahrscheinlich (Zertifikate) · %d zu klären"
|
||||
% (len(assess.coverage), len(auto_detected), len(probably), len(next_q)))
|
||||
|
||||
|
||||
def apply_answer(known_capabilities: Sequence[str], capability_id: str, answer: str) -> List[str]:
|
||||
|
||||
@@ -53,6 +53,7 @@ class AdvisorMeasure(BaseModel):
|
||||
class AdvisorResult(BaseModel):
|
||||
inferred_assumptions: List[InferredAssumption] = Field(default_factory=list)
|
||||
rejected_assumptions: List[RejectedAssumption] = Field(default_factory=list)
|
||||
auto_detected: List[str] = Field(default_factory=list) # Silent Pass: recognised w/o asking
|
||||
next_best_questions: List[AdvisorQuestion] = Field(default_factory=list) # max 5
|
||||
capability_delta: List[str] = Field(default_factory=list)
|
||||
top_measures: List[AdvisorMeasure] = Field(default_factory=list)
|
||||
|
||||
@@ -0,0 +1,99 @@
|
||||
"""Silent Knowledge Pass — recognise everything possible BEFORE asking a single question (Phase 0).
|
||||
|
||||
The Advisor can say "I need 5 answers" but does not yet decide WHAT it can find out by itself. The Silent
|
||||
Pass runs first: from signals that existing scanners/parsers already produce (website, repository,
|
||||
documents, product data) it deterministically derives capabilities the company demonstrably HAS and
|
||||
product facts that drive scope — so every recognised item shrinks the delta and removes a question.
|
||||
|
||||
The customer then experiences "we already recognised 11 of 17 — only these 4 remain" instead of a
|
||||
question wall. This is NOT new architecture: it is one orchestration step in front of the Advisor
|
||||
Company -> Silent Intake -> Company Profile -> Hypotheses -> Delta -> Top Questions
|
||||
All building blocks already exist. SIGNALS are INJECTED (the scanners produce them); the signal->capability
|
||||
map is curated DATA, also injected. Pure, deterministic, no I/O. Python 3.9 compatible.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Dict, List, Optional, Sequence, Set
|
||||
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
|
||||
class IntakeSignal(BaseModel):
|
||||
"""One finding a scanner/parser produced (no LLM here — the scanners are upstream)."""
|
||||
|
||||
source: str # website / repository / document / product
|
||||
signal: str # signal id, e.g. "sbom_file_found"
|
||||
detail: str = "" # optional (url, filename) for the audit trail
|
||||
|
||||
|
||||
class SignalMapping(BaseModel):
|
||||
"""Curated: what a signal lets us conclude. A signal yields a capability OR a product fact."""
|
||||
|
||||
signal: str
|
||||
capability: Optional[str] = None # capability the signal evidences
|
||||
relationship: str = "detected" # detected (concrete artifact) / partial (indicative)
|
||||
evidence: Optional[str] = None # the artifact found (already in hand -> no upload needed)
|
||||
product_fact: Optional[str] = None # e.g. "connected_to_internet"
|
||||
fact_value: str = "true"
|
||||
|
||||
|
||||
class DetectedCapability(BaseModel):
|
||||
capability: str
|
||||
relationship: str = "detected"
|
||||
source: str = "" # which signal/source detected it (audit trail)
|
||||
evidence: Optional[str] = None
|
||||
|
||||
|
||||
class ProductFact(BaseModel):
|
||||
key: str
|
||||
value: str = "true"
|
||||
source: str = ""
|
||||
|
||||
|
||||
class SilentIntakeResult(BaseModel):
|
||||
detected_capabilities: List[DetectedCapability] = Field(default_factory=list)
|
||||
product_facts: List[ProductFact] = Field(default_factory=list)
|
||||
evidence_found: List[str] = Field(default_factory=list)
|
||||
summary: str = ""
|
||||
|
||||
def capability_ids(self) -> List[str]:
|
||||
"""The detected capability ids — fed into the Advisor as already-present (delta-reducing)."""
|
||||
return sorted({d.capability for d in self.detected_capabilities})
|
||||
|
||||
|
||||
def silent_intake(
|
||||
signals: Sequence[IntakeSignal], signal_map: Sequence[SignalMapping]
|
||||
) -> SilentIntakeResult:
|
||||
"""Derive capabilities + product facts from injected scanner signals (deterministic, no questions).
|
||||
|
||||
Each signal is matched to curated mappings by `signal` id; a mapping contributes either a detected
|
||||
capability (+ optional evidence already in hand) or a product fact. Deduped, deterministic order.
|
||||
"""
|
||||
by_signal: Dict[str, List[SignalMapping]] = {}
|
||||
for m in signal_map:
|
||||
by_signal.setdefault(m.signal, []).append(m)
|
||||
|
||||
caps: Dict[str, DetectedCapability] = {}
|
||||
facts: Dict[str, ProductFact] = {}
|
||||
evidence: Set[str] = set()
|
||||
for s in signals:
|
||||
for m in by_signal.get(s.signal, []):
|
||||
if m.capability and m.capability not in caps:
|
||||
caps[m.capability] = DetectedCapability(
|
||||
capability=m.capability, relationship=m.relationship,
|
||||
source="%s:%s" % (s.source, s.signal), evidence=m.evidence)
|
||||
if m.evidence:
|
||||
evidence.add(m.evidence)
|
||||
if m.product_fact:
|
||||
facts[m.product_fact] = ProductFact(key=m.product_fact, value=m.fact_value, source=s.source)
|
||||
|
||||
detected = [caps[k] for k in sorted(caps)]
|
||||
product_facts = [facts[k] for k in sorted(facts)]
|
||||
summary = (
|
||||
"Stille Vorbefüllung: %d Fähigkeit(en) automatisch erkannt, %d Produktfakt(en), %d Nachweis(e) bereits vorhanden."
|
||||
% (len(detected), len(product_facts), len(evidence))
|
||||
)
|
||||
return SilentIntakeResult(
|
||||
detected_capabilities=detected, product_facts=product_facts,
|
||||
evidence_found=sorted(evidence), summary=summary)
|
||||
Reference in New Issue
Block a user