breakpilot-compliance/backend-compliance/reference_scenarios/_helpers.py

# ruff: noqa
# mypy: ignore-errors
"""Rendering helpers for the Reference Scenario Suite generator.

Holds the shared mutable output buffers (OUT, ROLLUP) and the small markdown helpers so the
generator script (`generate.py`) stays under the LOC budget. Not product code; not imported by
the app — only by the generator (run via `PYTHONPATH=. python3 reference_scenarios/generate.py`).
"""
from __future__ import annotations

from typing import List, Tuple

Row = Tuple[str, str, str]
OUT: List[str] = []
ROLLUP: List[str] = []


def w(s: str = "") -> None:
    OUT.append(s)


def coverage_table(rows: List[Row]) -> None:
    w("**Architecture Coverage**")
    w("")
    w("| Layer | Status | Hinweis |")
    w("|---|---|---|")
    for layer, status, note in rows:
        w("| %s | **%s** | %s |" % (layer, status, note))
        ROLLUP.append(status)
    w("")


def reg_map_block(rmap) -> None:
    w("**Expected Regulatory Map**")
    w("")
    w("> " + rmap.executive_summary)
    w("")
    for v in rmap.applicable_regulations:
        obs = ", ".join(o.obligation_id for o in v.obligations) or v.obligations_note
        w("- **%s** (%s) — Pflichten: %s" % (v.regulation_id, v.name, obs))
    for u in rmap.uncertain_regulations:
        w("- _unsicher_ %s — fehlt: %s" % (u.regulation_id, ", ".join(u.missing_facts) or "-"))
    for ov in rmap.overlaps:
        w("- Overlap %s: %s" % (ov.overlap_group_id, ", ".join(ov.shared_obligations)))
    for ev, ids in rmap.shared_evidence.items():
        w("- 1 Nachweis `%s` => %d Pflichten" % (ev, len(ids)))
    w("")


def unsupported_block(rmap) -> None:
    w("**Expected Unsupported Domains**")
    w("")
    if not rmap.unsupported_domains:
        w("- keine — alle getriggerten Domaenen sind im Korpus")
    for d in rmap.unsupported_domains:
        w("- `%s` (Trigger: %s) -> %s" % (d.domain, d.trigger, d.note))
    w("")


def interp_status(verdict_value: str) -> str:
    return "PARTIAL" if verdict_value in ("uncertain", "unsupported") else "PASS"


def knowledge_intake_section(base_dir) -> None:
    """Render the Knowledge Intake section (kept here so generate.py stays under the LOC budget)."""
    import os
    import yaml
    from compliance.knowledge_intake import (
        DocumentDescriptor, assess_document_impact, build_knowledge_index,
    )

    def _load(sub):
        d = os.path.join(base_dir, "..", "knowledge", sub)
        return [yaml.safe_load(open(os.path.join(d, f), encoding="utf-8"))
                for f in sorted(os.listdir(d)) if f.endswith(".yaml")]

    idx = build_knowledge_index(
        _load("transition_patterns"), _load("implementation_playbooks"),
        _load("reference_transition_scenarios"), obligation_index={"CRA": ["cra_obl_1", "cra_obl_2"]})
    docs = [
        DocumentDescriptor(document_id="ENISA CRA SBOM-FAQ", regulations=["CRA"], keywords=["sbom", "vulnerability"], document_type="faq"),
        DocumentDescriptor(document_id="EU Umwelt-Leitfaden", regulations=["UmweltVO"], keywords=["wastewater"], document_type="guidance"),
        DocumentDescriptor(document_id="Marketing-Blog", keywords=["newsletter"], document_type="blog"),
    ]
    w("## Knowledge Intake — Impact zuerst, Extraktion später")
    w("")
    w('_Vor dem Parser: ein neues Dokument NUR einordnen und seinen Impact auf den bestehenden Wissensbestand bestimmen. „Von N Dokumenten verändern wenige tatsächlich unser Wissen." Deterministisch, keine Extraktion, kein LLM._')
    w("")
    w("| Dokument | Impact | betrifft | Empfehlung |")
    w("|---|---|---|---|")
    for d in docs:
        kp = assess_document_impact(d, idx)
        touch = "neue Domäne" if kp.new_domain else "%dC·%dPB·%dRTS·%dObl" % (
            len(kp.affected_capabilities), len(kp.affected_playbooks),
            len(kp.affected_reference_scenarios), len(kp.affected_obligations))
        w("| %s | **%s** | %s | %s |" % (d.document_id, kp.impact_level.value, touch, kp.recommendation.split(" —")[0]))
    w("")
    w("**Beispiel-Knowledge-Package** (`%s`): %s" % (docs[0].document_id, assess_document_impact(docs[0], idx).impact_summary))
    w("")
    w('_So entsteht bei jedem neuen Dokument eine Impact-Analyse statt „200 Seiten PDF" — Targeted Updating statt Schreiben._')
    w("")
    coverage_table([
        ("Knowledge Intake (Klassifikation+Impact)", "PASS", "%d Regelwerke / %d Capabilities im Index" % (len(idx.regulations), len(idx.capability_regulations))),
        ("Impact-Triage (HIGH/LOW/NONE/new_domain)", "PASS", "3 Beispiel-Dokumente korrekt eingeordnet"),
        ("Regelwerk-ID-Normalisierung", "TODO", "CRA vs Cyber Resilience Act vereinheitlichen"),
    ])