"""Regulatory Completeness Engine — measure auditable knowledge coverage for an assessment. Separates what we IDENTIFIED (triggered regulations) from what we ASSESSED (validated corpus AND determined applicability), and justifies every gap. Two kinds of „open": - corpus gap — no validated corpus yet (e.g. Environmental) -> future_corpus - applicability open — corpus exists but applicability is uncertain (Data Act) -> query_required The metric is COUNTS, never a single percentage. The audit statement says plainly „wir bewerteten M von N Domänen; K sind nicht im validierten Korpus und wurden bewusst nicht bewertet". Deterministic, computed-not-stored, no LLM, no new corpus/meta-model class (freeze v1.0). Python 3.9. """ from __future__ import annotations from typing import Any, Dict, List, Optional from .schemas import ( Assumption, CompletenessReport, CorpusStatus, DomainCoverage, Exclusion, ) _VALID = {s.value for s in CorpusStatus} def _status(corpus_status: Dict[str, str], reg: str) -> CorpusStatus: raw = corpus_status.get(reg, "unknown") return CorpusStatus(raw) if raw in _VALID else CorpusStatus.UNKNOWN def assess_completeness( identified_regulations: List[str], corpus_status: Dict[str, str], uncertain: Optional[List[Dict[str, Any]]] = None, assumptions: Optional[List[Dict[str, Any]]] = None, assessed_obligations: int = 0, ) -> CompletenessReport: """Build the auditable coverage report. `identified_regulations`: triggered/identified for this product. `corpus_status`: regulation -> one of validated/draft/unsupported/unknown (curated/injected corpus registry). `uncertain`: applicability-uncertain regulations [{regulation, deciding_question, reason}]. `assumptions`: [{key, value, note}]. `assessed_obligations`: count from Execution (injected, default 0). """ ids = sorted(set(identified_regulations)) unc = uncertain or [] unc_subjects = {str(u.get("regulation") or u.get("subject")) for u in unc if (u.get("regulation") or u.get("subject"))} coverage = [DomainCoverage(regulation=r, status=_status(corpus_status, r)) for r in ids] assessed = [r for r in ids if _status(corpus_status, r) == CorpusStatus.VALIDATED and r not in unc_subjects] open_regs = [r for r in ids if r not in assessed] open_corpora = [r for r in ids if _status(corpus_status, r) in (CorpusStatus.UNSUPPORTED, CorpusStatus.UNKNOWN)] exclusions: List[Exclusion] = [] for u in unc: subj = str(u.get("regulation") or u.get("subject") or "") if not subj: continue exclusions.append(Exclusion( subject=subj, reason=str(u.get("reason", "Anwendbarkeit unsicher")), deciding_question=str(u.get("deciding_question", "")), resolution="query_required")) for r in open_regs: if r in unc_subjects: continue st = _status(corpus_status, r) if st == CorpusStatus.DRAFT: exclusions.append(Exclusion(subject=r, reason="Korpus in Bearbeitung (draft)", resolution="in_review")) else: exclusions.append(Exclusion(subject=r, reason="nicht im validierten Korpus", resolution="future_corpus")) covered_subjects = {e.subject for e in exclusions} justification = (not open_regs) or set(open_regs) <= covered_subjects assumptions_m = [Assumption(key=str(a.get("key", "")), value=str(a.get("value", "")), note=str(a.get("note", ""))) for a in (assumptions or [])] summary = "Identifiziert %d · bewertet %d · offen %d · Unsicherheiten %d · Begründung %s" % ( len(ids), len(assessed), len(open_regs), len(unc), "ja" if justification else "nein") if open_regs: audit = ( "Für dieses Produkt konnten wir %d von %d identifizierten regulatorischen Domänen vollständig " "bewerten. %d weitere %s noch nicht Bestandteil des validierten Korpus bzw. anwendungsunsicher " "und wurden deshalb bewusst nicht bewertet." % ( len(assessed), len(ids), len(open_regs), "ist" if len(open_regs) == 1 else "sind")) else: audit = "Für dieses Produkt konnten wir alle %d identifizierten regulatorischen Domänen vollständig bewerten." % len(ids) return CompletenessReport( identified_regulations=ids, assessed_regulations=assessed, open_regulations=open_regs, open_corpora=open_corpora, coverage=coverage, assumptions=assumptions_m, exclusions=exclusions, uncertainties_count=len(unc), assessed_obligations=assessed_obligations, justification_present=justification, completeness_summary=summary, audit_statement=audit, )