breakpilot-compliance/backend-compliance/tests/test_onboarding_endpoint.py

"""POST /onboarding/advisor-start — the runtime endpoint that exposes the existing Advisor.

Exercises the router in isolation (no DB, no full app): scanner findings (ProducedSignal) -> normalize
-> Silent Pass -> Advisor -> the advisory payload. No new reasoning logic — just the wiring.
"""

from __future__ import annotations

from fastapi import FastAPI
from fastapi.testclient import TestClient

from compliance.api.onboarding_routes import router

_app = FastAPI()
_app.include_router(router)
_client = TestClient(_app)

_BODY = {
    "company": "synthetic", "industry": "machine_builder", "products": ["parking payment system"],
    "markets": ["EU"], "certifications": ["ISO27001", "ISO9001"], "known_evidence": ["CE process"],
    "target": "CRA",
    "scanner_findings": [
        {"signal_id": "cyclonedx_found", "source_type": "repository", "evidence": "sbom", "provenance": "sbom.cdx.json"},
        {"signal_id": "vdp_found", "source_type": "website", "provenance": "/.well-known/security.txt"},
        {"signal_id": "risk_assessment_pdf", "source_type": "document", "provenance": "risk.pdf"},
        {"signal_id": "cloud_hosted", "source_type": "product"},
    ],
}


def test_targets_endpoint_lists_supported():
    r = _client.get("/onboarding/targets")
    assert r.status_code == 200
    assert "CRA" in r.json()["targets"]


def test_advisor_start_returns_full_payload():
    r = _client.post("/onboarding/advisor-start", json=_BODY)
    assert r.status_code == 200, r.text
    d = r.json()
    for field in ["silent_intake_summary", "inferred_assumptions", "rejected_assumptions",
                  "top_5_questions", "capability_delta", "top_measures", "evidence_requests",
                  "completeness_summary"]:
        assert field in d
    assert len(d["top_5_questions"]) <= 5
    assert d["auto_detected"]                                    # Silent Pass recognised things from the scanners
    assert "sbom_creation" not in {q["capability_id"] for q in d["top_5_questions"]}  # detected -> not asked


def test_requirement_signal_does_not_auto_detect_capability():
    # a tender that DEMANDS an SBOM (requirement) must NOT be read as "SBOM present": sbom_creation stays
    # open (asked / in the delta), unlike a real cyclonedx_found observation.
    body = dict(_BODY, scanner_findings=[
        {"signal_id": "requires_sbom", "source_type": "tender", "provenance": "tender §4.2"},
    ])
    r = _client.post("/onboarding/advisor-start", json=body)
    assert r.status_code == 200, r.text
    d = r.json()
    assert "sbom_creation" not in d["auto_detected"]                                  # demanded != present
    asked = {q["capability_id"] for q in d["top_5_questions"]}
    assert "sbom_creation" in asked or "sbom_creation" in d["capability_delta"]       # still an open gap


def test_partial_signal_surfaces_as_indication_and_is_still_asked():
    # a PARTIAL observation (a CI pipeline) raises assumption strength but does NOT replace the question
    body = dict(_BODY, scanner_findings=[{"signal_id": "github_actions_ci", "source_type": "repository"}])
    r = _client.post("/onboarding/advisor-start", json=body)
    assert r.status_code == 200, r.text
    d = r.json()
    assert "secure_development_lifecycle" not in d["auto_detected"]            # partial != detected
    assert "secure_development_lifecycle" in d["indications"]                  # but its strength is shown
    asked = {q["capability_id"] for q in d["top_5_questions"]}
    assert "secure_development_lifecycle" in asked or "secure_development_lifecycle" in d["capability_delta"]


def test_questions_carry_curated_text_and_human_labels():
    # the curated why_asked from the transition pattern must reach the question (not the generic
    # fallback "Keine Anhaltspunkte ... klären"), and surfaced capabilities get human labels.
    body = dict(_BODY, certifications=["ISO27001"], target="TISAX", scanner_findings=[])
    r = _client.post("/onboarding/advisor-start", json=body)
    assert r.status_code == 200, r.text
    d = r.json()
    assert any("Keine Anhaltspunkte" not in q["why"] for q in d["top_5_questions"])   # real expert text surfaced
    assert d["capability_labels"].get("vda_isa_self_assessment") == "VDA-ISA-Selbstauskunft"


def test_unknown_target_is_404():
    body = dict(_BODY, target="NOPE")
    r = _client.post("/onboarding/advisor-start", json=body)
    assert r.status_code == 404