"""Silent Knowledge Pass — recognise everything possible BEFORE asking a single question (Phase 0). The Advisor can say "I need 5 answers" but does not yet decide WHAT it can find out by itself. The Silent Pass runs first: from signals that existing scanners/parsers already produce (website, repository, documents, product data) it deterministically derives capabilities the company demonstrably HAS and product facts that drive scope — so every recognised item shrinks the delta and removes a question. The customer then experiences "we already recognised 11 of 17 — only these 4 remain" instead of a question wall. This is NOT new architecture: it is one orchestration step in front of the Advisor Company -> Silent Intake -> Company Profile -> Hypotheses -> Delta -> Top Questions All building blocks already exist. SIGNALS are INJECTED (the scanners produce them); the signal->capability map is curated DATA, also injected. Pure, deterministic, no I/O. Python 3.9 compatible. """ from __future__ import annotations from typing import Dict, List, Optional, Sequence, Set from pydantic import BaseModel, Field class IntakeSignal(BaseModel): """A CANONICAL signal the Silent Pass consumes. Producer-agnostic: the same `signal` may have come from a website, a repo, a PDF, a tender or the user — normalize_signals() unified them (see signals.py).""" source: str # source_type: website / repository / document / product / tender / user signal: str # CANONICAL signal id, e.g. "sbom_present" kind: str = "observation" # "observation" (I saw X) | "requirement" (someone DEMANDS X) confidence: float = 1.0 # carried from the producer evidence: Optional[str] = None # the artifact already in hand provenance: str = "" # where it came from (url / filename / tender clause) — audit trail detail: str = "" # free-text (kept for back-compat) class SignalMapping(BaseModel): """Curated: what a signal lets us conclude. A signal yields a capability OR a product fact.""" signal: str capability: Optional[str] = None # capability the signal evidences relationship: str = "detected" # detected (concrete artifact) / partial (indicative) evidence: Optional[str] = None # the artifact found (already in hand -> no upload needed) product_fact: Optional[str] = None # e.g. "connected_to_internet" fact_value: str = "true" class DetectedCapability(BaseModel): capability: str relationship: str = "detected" source: str = "" # which signal/source detected it (audit trail) evidence: Optional[str] = None confidence: float = 1.0 # carried from the producing signal provenance: str = "" # where the signal came from class ProductFact(BaseModel): key: str value: str = "true" source: str = "" class SilentIntakeResult(BaseModel): detected_capabilities: List[DetectedCapability] = Field(default_factory=list) product_facts: List[ProductFact] = Field(default_factory=list) evidence_found: List[str] = Field(default_factory=list) requirements_seen: List[str] = Field(default_factory=list) # requirement-kind signals — preserved, NOT present summary: str = "" def capability_ids(self) -> List[str]: """The DETECTED capability ids (relationship == detected) — fed into the Advisor as already-present (delta-reducing, not asked). ONLY observation-kind signals reach here (requirements never become a present capability); a merely PARTIAL/indicative signal does NOT (see indicative_capability_ids).""" return sorted({d.capability for d in self.detected_capabilities if d.relationship == "detected"}) def indicative_capability_ids(self) -> List[str]: """Capabilities backed only by a PARTIAL/indicative signal — they raise assumption strength but do NOT replace a question (the gap stays open and is still asked, just with an indication shown).""" return sorted({d.capability for d in self.detected_capabilities if d.relationship != "detected"}) def silent_intake( signals: Sequence[IntakeSignal], signal_map: Sequence[SignalMapping] ) -> SilentIntakeResult: """Derive capabilities + product facts from injected scanner signals (deterministic, no questions). Each signal is matched to curated mappings by `signal` id; a mapping contributes either a detected capability (+ optional evidence already in hand) or a product fact. Deduped, deterministic order. """ by_signal: Dict[str, List[SignalMapping]] = {} for m in signal_map: by_signal.setdefault(m.signal, []).append(m) caps: Dict[str, DetectedCapability] = {} facts: Dict[str, ProductFact] = {} evidence: Set[str] = set() requirements: Set[str] = set() for s in signals: if s.kind != "observation": # a requirement describes a TARGET, never the present state requirements.add(s.signal) # preserved + visible, but NEVER turned into a capability continue for m in by_signal.get(s.signal, []): if m.capability and m.capability not in caps: caps[m.capability] = DetectedCapability( capability=m.capability, relationship=m.relationship, source="%s:%s" % (s.source, s.signal), evidence=m.evidence, confidence=s.confidence, provenance=s.provenance) if m.evidence: evidence.add(m.evidence) if m.product_fact: facts[m.product_fact] = ProductFact(key=m.product_fact, value=m.fact_value, source=s.source) detected = [caps[k] for k in sorted(caps)] product_facts = [facts[k] for k in sorted(facts)] requirements_seen = sorted(requirements) summary = ( "Stille Vorbefüllung: %d Fähigkeit(en) automatisch erkannt, %d Produktfakt(en), %d Nachweis(e) " "bereits vorhanden, %d Anforderung(en) erkannt (nicht als vorhanden gewertet)." % (len(detected), len(product_facts), len(evidence), len(requirements_seen)) ) return SilentIntakeResult( detected_capabilities=detected, product_facts=product_facts, evidence_found=sorted(evidence), requirements_seen=requirements_seen, summary=summary)