breakpilot-compliance/backend-compliance/compliance/completeness/schemas.py

"""Schemas for the Regulatory Completeness Engine — auditable knowledge-coverage, not confidence.

For an assessment it answers „wie sicher sind wir, dass diese Antwort VOLLSTÄNDIG ist?" by separating
IDENTIFIED regulations from ASSESSED ones (those in the validated corpus) and listing every open or
excluded domain WITH a reason. The metric is counts, never a single „87%". This is an internal quality
machine: the product never claims full coverage — it makes its own knowledge state transparent.
Deterministic, computed-not-stored, no new meta-model class (freeze v1.0). Python 3.9 compatible.
"""

from __future__ import annotations

from enum import Enum
from typing import List

from pydantic import BaseModel, Field


class CorpusStatus(str, Enum):
    """The maturity of our knowledge corpus for a regulation/domain."""

    VALIDATED = "validated"      # we can fully assess this
    DRAFT = "draft"             # partial / under review
    UNSUPPORTED = "unsupported"  # triggered but no corpus yet
    UNKNOWN = "unknown"          # not in our registry at all


class DomainCoverage(BaseModel):
    regulation: str
    status: CorpusStatus = CorpusStatus.UNKNOWN
    note: str = ""


class Exclusion(BaseModel):
    """A domain/regulation DELIBERATELY not assessed — always with a reason (the heart of the engine)."""

    subject: str
    reason: str
    deciding_question: str = ""                 # what would resolve it (if a query)
    resolution: str = "future_corpus"           # query_required | future_corpus | not_applicable


class Assumption(BaseModel):
    key: str
    value: str = ""
    note: str = ""


class CompletenessReport(BaseModel):
    """The auditable coverage report for one assessment — counts + justification, NO single percentage."""

    identified_regulations: List[str] = Field(default_factory=list)
    assessed_regulations: List[str] = Field(default_factory=list)      # in the validated corpus
    open_regulations: List[str] = Field(default_factory=list)          # identified but not validated
    open_corpora: List[str] = Field(default_factory=list)             # missing domains worth building
    coverage: List[DomainCoverage] = Field(default_factory=list)
    assumptions: List[Assumption] = Field(default_factory=list)
    exclusions: List[Exclusion] = Field(default_factory=list)
    uncertainties_count: int = 0
    assessed_obligations: int = 0                                      # injected (Execution-owned)
    justification_present: bool = False
    completeness_summary: str = ""                                    # "Identifiziert N · bewertet M · offen K · ..."
    audit_statement: str = ""                                         # the honest narrative sentence