refactor(reasoning): enforce ClaimCoverage (Welt 1) vs ComplianceStatus (Welt 2) boundary [F1]

Architecture-validation finding: the implementation mode produced compliance- flavored output ("teilweise erfüllt", "covered") from a mere customer claim, blurring the line to the Execution layer. This is a design decision, not a text fix — the reasoning layer judges only the customer's STATEMENT, never conformity. - CoverageStatus -> ClaimCoverage; values are claim-relative + carry "potential": potentially_addresses / partially_addresses / does_not_address / insufficient_information. - ImplementationAssessment -> ClaimObligationMapping (coverage_status -> claim_coverage); ImplementationResponse -> ImplementationReasoningResponse (assessments -> mappings, + explicit `disclaimer`); request renamed; engine entry assess_implementation -> reason_implementation_claim. - Endpoint /reasoning/implementation-assessment -> /reasoning/implementation-reasoning. - Summary/explanations reworded: "adressiert wahrscheinlich N Pflichten … für eine Bewertung der tatsächlichen Umsetzung sind Nachweise erforderlich (keine Konformitätsaussage)". No "erfüllt"/"abgedeckt" leaks. - New guard test asserts no compliance verdict leaks (no "erfüllt"; disclaimer separates ClaimCoverage from ComplianceStatus). 23 tests green, mypy clean. Discovery (scope/obligations) was already structurally claim-free and unaffected. Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
2026-06-26 00:37:57 +02:00
parent 1607c89459
commit 5e5002c883
6 changed files with 135 additions and 84 deletions
@@ -14,15 +14,15 @@ from fastapi import FastAPI
 from fastapi.testclient import TestClient

 from compliance.reasoning import (
-    assess_implementation,
    assess_interpretation,
    derive_obligations,
    discover_scope,
    normalize_claim,
+    reason_implementation_claim,
 )
 from compliance.reasoning.enums import (
    ApplicabilityStatus,
-    CoverageStatus,
+    ClaimCoverage,
    InterpretationVerdict,
 )
 from compliance.reasoning.schemas import ProductProfile
@@ -82,25 +82,25 @@ def test_interpretation_only_new_products_is_too_narrow():
 # 3. Reicht eine SBOM allein? -> nein, nur teilweise
 # ---------------------------------------------------------------------------
 def test_sbom_alone_is_not_enough():
-    resp = assess_implementation(sps_profile(), "Wir haben SBOMs.")
-    sbom = [a for a in resp.assessments if a.obligation_id == "sbom_creation"]
-    assert sbom and sbom[0].coverage_status == CoverageStatus.COVERED
-    # but other obligations are surfaced as gaps -> aggregate not fully covered
-    assert any(a.coverage_status != CoverageStatus.COVERED for a in resp.assessments)
-    assert "Teilweise erfüllt" in resp.summary or "offen" in resp.summary
+    resp = reason_implementation_claim(sps_profile(), "Wir haben SBOMs.")
+    sbom = [m for m in resp.mappings if m.obligation_id == "sbom_creation"]
+    assert sbom and sbom[0].claim_coverage == ClaimCoverage.POTENTIALLY_ADDRESSES
+    # but other obligations are surfaced as gaps -> claim does not address everything
+    assert any(m.claim_coverage != ClaimCoverage.POTENTIALLY_ADDRESSES for m in resp.mappings)
+    assert "Nachweise" in resp.summary


 # ---------------------------------------------------------------------------
 # 4. Ist ein reaktiver Updateprozess ausreichend? -> nur teilweise
 # ---------------------------------------------------------------------------
 def test_reactive_update_process_is_partial():
-    resp = assess_implementation(
+    resp = reason_implementation_claim(
        sps_profile(), "Wir machen Updates, wenn Kunden Fehler melden."
    )
-    upd = [a for a in resp.assessments if a.obligation_id == "provide_security_updates"]
-    assert upd and upd[0].coverage_status == CoverageStatus.PARTIALLY_COVERED
+    upd = [m for m in resp.mappings if m.obligation_id == "provide_security_updates"]
+    assert upd and upd[0].claim_coverage == ClaimCoverage.PARTIALLY_ADDRESSES
    assert "reactive" in resp.claim.qualifiers
-    assert any("Schwachstellenüberwachung" in m for m in upd[0].missing_elements)
+    assert any("Schwachstellenüberwachung" in e for e in upd[0].missing_elements)


 # ---------------------------------------------------------------------------
@@ -211,13 +211,29 @@ def test_claim_normalizer_is_deterministic():


 def test_unspecific_claim_asks_for_detail():
-    resp = assess_implementation(sps_profile(), "Wir sind sicher aufgestellt.")
-    assert resp.assessments == [] or all(
-        a.coverage_status == CoverageStatus.UNCLEAR for a in resp.assessments
+    resp = reason_implementation_claim(sps_profile(), "Wir sind sicher aufgestellt.")
+    assert resp.mappings == [] or all(
+        m.claim_coverage == ClaimCoverage.INSUFFICIENT_INFORMATION for m in resp.mappings
    )
    assert "unspezifisch" in resp.summary.lower()


+def test_claim_reasoning_carries_no_compliance_verdict():
+    """Welt-1 boundary: claim mapping must never read as a conformity verdict."""
+    resp = reason_implementation_claim(
+        sps_profile(), "Wir haben SBOMs und einen Update-Prozess."
+    )
+    # claim-relative vocabulary only
+    for m in resp.mappings:
+        assert m.claim_coverage in set(ClaimCoverage)
+    # no compliance wording leaks into summary or explanations
+    assert "erfüllt" not in resp.summary
+    assert all("erfüllt" not in m.explanation for m in resp.mappings)
+    # explicit disclaimer separating ClaimCoverage (Welt 1) from ComplianceStatus (Welt 2)
+    assert resp.disclaimer
+    assert "ComplianceStatus" in resp.disclaimer and "Nachweis" in resp.disclaimer
+
+
 # ---------------------------------------------------------------------------
 # Endpoint smoke tests
 # ---------------------------------------------------------------------------
@@ -248,11 +264,13 @@ def test_endpoint_obligations(client):

 def test_endpoint_implementation(client):
    r = client.post(
-        "/reasoning/implementation-assessment",
+        "/reasoning/implementation-reasoning",
        json={"product_profile": {"product_name": "X", "has_software": True, "eu_market": True, "manufacturer_role": "manufacturer"}, "customer_claim": "Wir haben SBOMs."},
    )
    assert r.status_code == 200
-    assert r.json()["assessments"]
+    body = r.json()
+    assert body["mappings"]
+    assert body["disclaimer"]


 def test_endpoint_interpretation(client):