refactor(reasoning): enforce ClaimCoverage (Welt 1) vs ComplianceStatus (Welt 2) boundary [F1]

Architecture-validation finding: the implementation mode produced compliance-
flavored output ("teilweise erfüllt", "covered") from a mere customer claim,
blurring the line to the Execution layer. This is a design decision, not a text
fix — the reasoning layer judges only the customer's STATEMENT, never conformity.

- CoverageStatus -> ClaimCoverage; values are claim-relative + carry "potential":
  potentially_addresses / partially_addresses / does_not_address /
  insufficient_information.
- ImplementationAssessment -> ClaimObligationMapping (coverage_status ->
  claim_coverage); ImplementationResponse -> ImplementationReasoningResponse
  (assessments -> mappings, + explicit `disclaimer`); request renamed; engine
  entry assess_implementation -> reason_implementation_claim.
- Endpoint /reasoning/implementation-assessment -> /reasoning/implementation-reasoning.
- Summary/explanations reworded: "adressiert wahrscheinlich N Pflichten … für
  eine Bewertung der tatsächlichen Umsetzung sind Nachweise erforderlich (keine
  Konformitätsaussage)". No "erfüllt"/"abgedeckt" leaks.
- New guard test asserts no compliance verdict leaks (no "erfüllt"; disclaimer
  separates ClaimCoverage from ComplianceStatus). 23 tests green, mypy clean.

Discovery (scope/obligations) was already structurally claim-free and unaffected.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
Benjamin Admin
2026-06-26 00:37:57 +02:00
parent 1607c89459
commit 5e5002c883
6 changed files with 135 additions and 84 deletions
@@ -14,15 +14,15 @@ from fastapi import FastAPI
from fastapi.testclient import TestClient
from compliance.reasoning import (
assess_implementation,
assess_interpretation,
derive_obligations,
discover_scope,
normalize_claim,
reason_implementation_claim,
)
from compliance.reasoning.enums import (
ApplicabilityStatus,
CoverageStatus,
ClaimCoverage,
InterpretationVerdict,
)
from compliance.reasoning.schemas import ProductProfile
@@ -82,25 +82,25 @@ def test_interpretation_only_new_products_is_too_narrow():
# 3. Reicht eine SBOM allein? -> nein, nur teilweise
# ---------------------------------------------------------------------------
def test_sbom_alone_is_not_enough():
resp = assess_implementation(sps_profile(), "Wir haben SBOMs.")
sbom = [a for a in resp.assessments if a.obligation_id == "sbom_creation"]
assert sbom and sbom[0].coverage_status == CoverageStatus.COVERED
# but other obligations are surfaced as gaps -> aggregate not fully covered
assert any(a.coverage_status != CoverageStatus.COVERED for a in resp.assessments)
assert "Teilweise erfüllt" in resp.summary or "offen" in resp.summary
resp = reason_implementation_claim(sps_profile(), "Wir haben SBOMs.")
sbom = [m for m in resp.mappings if m.obligation_id == "sbom_creation"]
assert sbom and sbom[0].claim_coverage == ClaimCoverage.POTENTIALLY_ADDRESSES
# but other obligations are surfaced as gaps -> claim does not address everything
assert any(m.claim_coverage != ClaimCoverage.POTENTIALLY_ADDRESSES for m in resp.mappings)
assert "Nachweise" in resp.summary
# ---------------------------------------------------------------------------
# 4. Ist ein reaktiver Updateprozess ausreichend? -> nur teilweise
# ---------------------------------------------------------------------------
def test_reactive_update_process_is_partial():
resp = assess_implementation(
resp = reason_implementation_claim(
sps_profile(), "Wir machen Updates, wenn Kunden Fehler melden."
)
upd = [a for a in resp.assessments if a.obligation_id == "provide_security_updates"]
assert upd and upd[0].coverage_status == CoverageStatus.PARTIALLY_COVERED
upd = [m for m in resp.mappings if m.obligation_id == "provide_security_updates"]
assert upd and upd[0].claim_coverage == ClaimCoverage.PARTIALLY_ADDRESSES
assert "reactive" in resp.claim.qualifiers
assert any("Schwachstellenüberwachung" in m for m in upd[0].missing_elements)
assert any("Schwachstellenüberwachung" in e for e in upd[0].missing_elements)
# ---------------------------------------------------------------------------
@@ -211,13 +211,29 @@ def test_claim_normalizer_is_deterministic():
def test_unspecific_claim_asks_for_detail():
resp = assess_implementation(sps_profile(), "Wir sind sicher aufgestellt.")
assert resp.assessments == [] or all(
a.coverage_status == CoverageStatus.UNCLEAR for a in resp.assessments
resp = reason_implementation_claim(sps_profile(), "Wir sind sicher aufgestellt.")
assert resp.mappings == [] or all(
m.claim_coverage == ClaimCoverage.INSUFFICIENT_INFORMATION for m in resp.mappings
)
assert "unspezifisch" in resp.summary.lower()
def test_claim_reasoning_carries_no_compliance_verdict():
"""Welt-1 boundary: claim mapping must never read as a conformity verdict."""
resp = reason_implementation_claim(
sps_profile(), "Wir haben SBOMs und einen Update-Prozess."
)
# claim-relative vocabulary only
for m in resp.mappings:
assert m.claim_coverage in set(ClaimCoverage)
# no compliance wording leaks into summary or explanations
assert "erfüllt" not in resp.summary
assert all("erfüllt" not in m.explanation for m in resp.mappings)
# explicit disclaimer separating ClaimCoverage (Welt 1) from ComplianceStatus (Welt 2)
assert resp.disclaimer
assert "ComplianceStatus" in resp.disclaimer and "Nachweis" in resp.disclaimer
# ---------------------------------------------------------------------------
# Endpoint smoke tests
# ---------------------------------------------------------------------------
@@ -248,11 +264,13 @@ def test_endpoint_obligations(client):
def test_endpoint_implementation(client):
r = client.post(
"/reasoning/implementation-assessment",
"/reasoning/implementation-reasoning",
json={"product_profile": {"product_name": "X", "has_software": True, "eu_market": True, "manufacturer_role": "manufacturer"}, "customer_claim": "Wir haben SBOMs."},
)
assert r.status_code == 200
assert r.json()["assessments"]
body = r.json()
assert body["mappings"]
assert body["disclaimer"]
def test_endpoint_interpretation(client):