diff --git a/backend-compliance/compliance/knowledge_production/__init__.py b/backend-compliance/compliance/knowledge_production/__init__.py new file mode 100644 index 00000000..0714d8e0 --- /dev/null +++ b/backend-compliance/compliance/knowledge_production/__init__.py @@ -0,0 +1,19 @@ +"""Knowledge Production — deterministically prepare the corpus, then curate it. + +The corpus is not written by hand: the Playbook Draft Generator structures drafts from data the +software already owns (Transition Pattern + leverage + injected Execution controls), leaving the +practitioner know-how as TODO for expert review. Mirrors the legal pipeline (Parser -> Review). +Deterministic, no LLM in core, no new corpus, no new meta-model class (freeze v1.0). +""" + +from __future__ import annotations + +from .engine import drafts_from_pattern, generate_playbook_draft +from .schemas import DraftStatus, PlaybookDraft + +__all__ = [ + "generate_playbook_draft", + "drafts_from_pattern", + "PlaybookDraft", + "DraftStatus", +] diff --git a/backend-compliance/compliance/knowledge_production/engine.py b/backend-compliance/compliance/knowledge_production/engine.py new file mode 100644 index 00000000..2d39a076 --- /dev/null +++ b/backend-compliance/compliance/knowledge_production/engine.py @@ -0,0 +1,91 @@ +"""Knowledge Production — the Playbook Draft Generator (deterministic assembly + expert review). + +Mirrors the legal pipeline (Gesetz -> Parser -> Obligation -> Review) for BreakPilot's OWN knowledge: +new Capability -> Registry -> Transition Pattern -> **Playbook Draft Generator** -> Expert Review -> +versioned Playbook. The generator does not WRITE playbooks — it STRUCTURES drafts from data the +software already owns (a transition/convergence pattern's delta requirement: why_asked, covers_targets, +expected_evidence) plus injected Execution controls. The practitioner know-how (tools / process steps / +how others do it) is left as an explicit TODO for the expert (or a separate offline-propose step). + +Fully deterministic, NO LLM in the core (deterministic-first: any model enrichment is offline, +advisory, never in this assembly). No new corpus, no new meta-model class (freeze v1.0). Python 3.9. +""" + +from __future__ import annotations + +from typing import Any, Dict, List, Optional + +from .schemas import DraftStatus, PlaybookDraft + +_SOFT_FIELDS = ["tools", "process_steps", "how_others_do_it"] # practitioner know-how — expert/offline-propose +_DISCLAIMER = ( + "Maschinell assemblierter ENTWURF aus vorhandenen Daten (Transition Pattern + Leverage + " + "injizierte Controls). KEINE normative Anforderung; erfordert fachliche Kuratierung (TODO-Felder) " + "und Statuswechsel draft_generated -> reviewed -> validated." +) + + +def generate_playbook_draft( + capability_id: str, + requirement: Optional[Dict[str, Any]] = None, + control_links: Optional[List[str]] = None, +) -> PlaybookDraft: + """Assemble a playbook draft for ONE capability from a pattern delta requirement (deterministic). + + `requirement`: a delta_requirement dict (why_asked / covers_targets / expected_evidence). Owned + fields are filled with provenance; soft fields are listed in `todo`. `control_links`: injected + Execution controls (default empty — no Execution data in the draft generator). + """ + req = requirement or {} + why = str(req.get("why_asked") or req.get("missing_because") or "") + closes = sorted({str(t) for t in req.get("covers_targets", [])}) + evidence = [str(e) for e in req.get("expected_evidence", [])] + controls = list(control_links or []) + + provenance: Dict[str, str] = {} + todo: List[str] = [] + if why: + provenance["why"] = "transition_pattern:why_asked" + else: + todo.append("why") + if closes: + provenance["closes_regulations"] = "leverage:covers_targets" + if evidence: + provenance["expected_evidence"] = "transition_pattern:expected_evidence" + else: + todo.append("expected_evidence") + if controls: + provenance["typical_controls"] = "execution:control_links" + todo.extend(_SOFT_FIELDS) # always expert-owned + + return PlaybookDraft( + capability_id=capability_id, + status=DraftStatus.DRAFT_GENERATED, + title=capability_id.replace("_", " "), + why=why, + closes_regulations=closes, + expected_evidence=evidence, + typical_controls=controls, + provenance=provenance, + todo=todo, + disclaimer=_DISCLAIMER, + ) + + +def drafts_from_pattern( + pattern: Dict[str, Any], + control_links_by_cap: Optional[Dict[str, List[str]]] = None, +) -> List[PlaybookDraft]: + """Assemble one playbook draft per delta capability of a transition/convergence pattern. + + This is the "produce drafts, don't write them" tool: feed a pattern -> get a draft per missing + capability, ready for expert review. Deterministic + order-preserving (pattern order). + """ + links = control_links_by_cap or {} + drafts: List[PlaybookDraft] = [] + for d in pattern.get("delta_requirements", []): + cap = d.get("capability") + if not cap: + continue + drafts.append(generate_playbook_draft(str(cap), d, links.get(str(cap)))) + return drafts diff --git a/backend-compliance/compliance/knowledge_production/schemas.py b/backend-compliance/compliance/knowledge_production/schemas.py new file mode 100644 index 00000000..fb6c944a --- /dev/null +++ b/backend-compliance/compliance/knowledge_production/schemas.py @@ -0,0 +1,46 @@ +"""Schemas for Knowledge Production — deterministic draft assembly + lifecycle. + +The corpus is no longer written by hand: it is deterministically PREPARED from data the software +already owns (Capability, Transition Pattern, Controls, Evidence, leverage), then curated by an +expert. A `PlaybookDraft` is a machine-assembled skeleton with per-field provenance and an explicit +TODO list of what still needs human (or offline-propose) input. No LLM in the deterministic core. +Python 3.9 compatible (no `|` unions). +""" + +from __future__ import annotations + +from enum import Enum +from typing import Dict, List + +from pydantic import BaseModel, Field + + +class DraftStatus(str, Enum): + """Freigabestatus — the knowledge lifecycle from machine draft to proven (mirrors the + transition-pattern / playbook maturity, with a machine-assembled pre-stage).""" + + DRAFT_GENERATED = "draft_generated" # machine-assembled, NOT yet expert-touched + IN_REVIEW = "in_review" # an expert is curating it + REVIEWED = "reviewed" # internally reviewed + VALIDATED = "validated" # domain expert confirmed + PROVEN = "proven" # confirmed in the field + + +class PlaybookDraft(BaseModel): + """A deterministically assembled playbook draft for one capability. + + Owned fields (why / closes_regulations / expected_evidence / typical_controls) are filled from + existing data with provenance; the practitioner know-how (tools / process_steps / how_others) + is left as TODO. The expert reviews a draft instead of writing from a blank page. + """ + + capability_id: str + status: DraftStatus = DraftStatus.DRAFT_GENERATED + title: str = "" + why: str = "" # from the transition pattern (why_asked/missing_because) + closes_regulations: List[str] = Field(default_factory=list) # from leverage (covers_targets) + expected_evidence: List[str] = Field(default_factory=list) # from the transition pattern + typical_controls: List[str] = Field(default_factory=list) # injected from Execution (may be empty) + provenance: Dict[str, str] = Field(default_factory=dict) # field -> source it was assembled from + todo: List[str] = Field(default_factory=list) # fields the expert/offline-propose must still add + disclaimer: str = "" # machine draft, requires expert curation diff --git a/backend-compliance/reference_scenarios/generate.py b/backend-compliance/reference_scenarios/generate.py index d0976ebf..5fa9eaae 100644 --- a/backend-compliance/reference_scenarios/generate.py +++ b/backend-compliance/reference_scenarios/generate.py @@ -40,6 +40,7 @@ from compliance.transition_reasoning import ( ) from compliance.optimization import roadmap_from_delta, select_within_budget from compliance.playbook import playbooks_for_plan +from compliance.knowledge_production import drafts_from_pattern import os import yaml @@ -438,6 +439,30 @@ coverage_table([ ("Playbook-Inhalt (Knowledge)", "TODO" if _miss else "PASS", "%d Capabilities brauchen noch Inhalt" % len(_miss)), ]) +# ── Knowledge Production — Playbook Draft Generator (vorbereiten, dann kuratieren) ─── +w("## Knowledge Production — Playbook-Entwürfe automatisch assemblieren") +w("") +w("_Der Engpass ist nicht Content, sondern Wissensproduktion. Der Corpus wird nicht von Hand geschrieben, sondern deterministisch aus vorhandenen Daten (Transition Pattern + Leverage + injizierte Controls) vorbereitet — dann fachlich kuratiert (wie Gesetz→Parser→Obligation→Review)._") +w("") +_kp = drafts_from_pattern(CP) if CP else [] # CP = convergence pattern (already loaded) +w("**Aus 1 Pattern → %d Playbook-Entwürfe** (`status: draft_generated`): eigene Felder (Warum/schließt/Nachweise) aus den Daten gefüllt, der Experte ergänzt nur Tools/Prozess/How-others." % len(_kp)) +w("") +_kd = next((d for d in _kp if d.capability_id == "sbom_creation"), _kp[0] if _kp else None) +if _kd: + w("**Beispiel-Entwurf — `%s`** _(%s)_" % (_kd.capability_id, _kd.status.value)) + w("- **Warum** (aus Pattern): %s" % _kd.why.strip()) + w("- **schließt** %s · **Nachweise** %s" % ("+".join(_kd.closes_regulations) or "—", ", ".join(_kd.expected_evidence) or "—")) + w("- **Provenance:** %s" % ", ".join("%s←%s" % (k, v) for k, v in _kd.provenance.items())) + w("- **TODO (Experte/Offline-Propose):** %s" % ", ".join(_kd.todo)) + w("") +w("_So reviewt der Experte %d Entwürfe statt %d Playbooks zu schreiben. Derselbe Generator bereitet später ISO14001-/IATF-Entwürfe vor, sobald der Corpus da ist._" % (len(_kp), len(_kp))) +w("") +coverage_table([ + ("Playbook Draft Generator (deterministisch)", "PASS", "%d Entwürfe aus 1 Pattern, kein LLM im Kern" % len(_kp)), + ("Provenance + TODO + Freigabestatus", "PASS", "draft_generated→reviewed→validated→proven"), + ("Draft-Generatoren neue Domänen (Phase A)", "TODO", "Transition-/Reference-Scenario-Drafts"), +]) + # ── Epics + roll-up ─────────────────────────────────────────────────────── w("## Gaps → Epics (Backlog — nur erfasst, NICHT implementiert)") w("") diff --git a/backend-compliance/reference_scenarios/reference_scenario_suite_v1.md b/backend-compliance/reference_scenarios/reference_scenario_suite_v1.md index 43bd50e7..364bb45b 100644 --- a/backend-compliance/reference_scenarios/reference_scenario_suite_v1.md +++ b/backend-compliance/reference_scenarios/reference_scenario_suite_v1.md @@ -296,6 +296,28 @@ _Derselbe Capability-Strang, neuer Renderer: aus Diagnose wird Beratung. Die `fe | Roadmap → Playbook (Verkettung) | **PASS** | 2/12 Maßnahmen mit Playbook | | Playbook-Inhalt (Knowledge) | **TODO** | 10 Capabilities brauchen noch Inhalt | +## Knowledge Production — Playbook-Entwürfe automatisch assemblieren + +_Der Engpass ist nicht Content, sondern Wissensproduktion. Der Corpus wird nicht von Hand geschrieben, sondern deterministisch aus vorhandenen Daten (Transition Pattern + Leverage + injizierte Controls) vorbereitet — dann fachlich kuratiert (wie Gesetz→Parser→Obligation→Review)._ + +**Aus 1 Pattern → 12 Playbook-Entwürfe** (`status: draft_generated`): eigene Felder (Warum/schließt/Nachweise) aus den Daten gefüllt, der Experte ergänzt nur Tools/Prozess/How-others. + +**Beispiel-Entwurf — `sbom_creation`** _(draft_generated)_ +- **Warum** (aus Pattern): CRA requires an SBOM; MaschinenVO does not. +- **schließt** CRA · **Nachweise** sbom +- **Provenance:** why←transition_pattern:why_asked, closes_regulations←leverage:covers_targets, expected_evidence←transition_pattern:expected_evidence +- **TODO (Experte/Offline-Propose):** tools, process_steps, how_others_do_it + +_So reviewt der Experte 12 Entwürfe statt 12 Playbooks zu schreiben. Derselbe Generator bereitet später ISO14001-/IATF-Entwürfe vor, sobald der Corpus da ist._ + +**Architecture Coverage** + +| Layer | Status | Hinweis | +|---|---|---| +| Playbook Draft Generator (deterministisch) | **PASS** | 12 Entwürfe aus 1 Pattern, kein LLM im Kern | +| Provenance + TODO + Freigabestatus | **PASS** | draft_generated→reviewed→validated→proven | +| Draft-Generatoren neue Domänen (Phase A) | **TODO** | Transition-/Reference-Scenario-Drafts | + ## Gaps → Epics (Backlog — nur erfasst, NICHT implementiert) | Epic | Titel | schliesst Coverage-Luecke | @@ -307,6 +329,6 @@ _Derselbe Capability-Strang, neuer Renderer: aus Diagnose wird Beratung. Die `fe ## Suite-Status (Roll-up) -- Coverage-Zellen gesamt: **35** -- PASS: **26** · PARTIAL: 3 · UNSUPPORTED: 1 · TODO: 4 · N/A: 1 · NEEDS_FACTS: 0 +- Coverage-Zellen gesamt: **38** +- PASS: **28** · PARTIAL: 3 · UNSUPPORTED: 1 · TODO: 5 · N/A: 1 · NEEDS_FACTS: 0 - Fortschritt = PASS-Anteil steigt, wenn Epics RS-001…004 landen (objektiver Maßstab, kein LOC). diff --git a/backend-compliance/tests/test_knowledge_production.py b/backend-compliance/tests/test_knowledge_production.py new file mode 100644 index 00000000..283bb1a0 --- /dev/null +++ b/backend-compliance/tests/test_knowledge_production.py @@ -0,0 +1,89 @@ +"""Tests for Knowledge Production — the Playbook Draft Generator. + +Acceptance: deterministically assemble a playbook DRAFT for a capability from a transition-pattern +delta requirement (why / closes / evidence with provenance), leaving practitioner know-how as an +explicit TODO; turn a whole pattern into one draft per delta capability. No LLM, fully deterministic. +The expert reviews drafts instead of writing from a blank page. +""" + +from __future__ import annotations + +from compliance.knowledge_production import ( + DraftStatus, PlaybookDraft, drafts_from_pattern, generate_playbook_draft, +) + +REQ = { + "capability": "sbom_creation", + "why_asked": "CRA requires an SBOM; MaschinenVO does not.", + "covers_targets": ["CRA"], + "expected_evidence": ["sbom"], +} +CONV_REQ = { + "capability": "product_cyber_risk_assessment", + "why_asked": "Both require assessing cyber threats.", + "covers_targets": ["CRA", "MaschinenVO"], + "expected_evidence": ["product_risk_assessment"], +} + + +def test_assembles_owned_fields_with_provenance(): + d = generate_playbook_draft("sbom_creation", REQ, control_links=["component_inventory"]) + assert d.status == DraftStatus.DRAFT_GENERATED + assert d.why.startswith("CRA requires an SBOM") + assert d.closes_regulations == ["CRA"] and d.expected_evidence == ["sbom"] + assert d.typical_controls == ["component_inventory"] + assert d.provenance["why"] == "transition_pattern:why_asked" + assert d.provenance["closes_regulations"] == "leverage:covers_targets" + assert d.provenance["typical_controls"] == "execution:control_links" + + +def test_soft_fields_are_todo(): + d = generate_playbook_draft("sbom_creation", REQ) + assert d.todo == ["tools", "process_steps", "how_others_do_it"] # practitioner know-how owed + + +def test_missing_owned_fields_go_to_todo(): + d = generate_playbook_draft("x", {}) + assert "why" in d.todo and "expected_evidence" in d.todo + assert d.closes_regulations == [] and d.typical_controls == [] + assert d.status == DraftStatus.DRAFT_GENERATED + + +def test_missing_because_fallback_for_why(): + d = generate_playbook_draft("x", {"missing_because": "no analogue in ISO 27001"}) + assert d.why == "no analogue in ISO 27001" and "why" not in d.todo + + +def test_closes_deduped_sorted_and_title_humanised(): + d = generate_playbook_draft("secure_signed_update_distribution", {"covers_targets": ["MaschinenVO", "CRA", "CRA"]}) + assert d.closes_regulations == ["CRA", "MaschinenVO"] + assert d.title == "secure signed update distribution" + + +def test_controls_default_empty_no_execution_data(): + d = generate_playbook_draft("x", REQ) + assert d.typical_controls == [] # nothing injected -> empty + + +def test_drafts_from_pattern_one_per_delta_in_order(): + pattern = {"delta_requirements": [REQ, CONV_REQ]} + drafts = drafts_from_pattern(pattern) + assert [d.capability_id for d in drafts] == ["sbom_creation", "product_cyber_risk_assessment"] + assert drafts[1].closes_regulations == ["CRA", "MaschinenVO"] # leverage 2 carried through + + +def test_drafts_from_pattern_injects_controls_and_skips_unnamed(): + pattern = {"delta_requirements": [REQ, {"why_asked": "no capability key"}]} + drafts = drafts_from_pattern(pattern, control_links_by_cap={"sbom_creation": ["c1"]}) + assert len(drafts) == 1 and drafts[0].typical_controls == ["c1"] # entry without capability skipped + + +def test_deterministic(): + pattern = {"delta_requirements": [REQ, CONV_REQ]} + a = [(d.capability_id, d.why, tuple(d.todo)) for d in drafts_from_pattern(pattern)] + b = [(d.capability_id, d.why, tuple(d.todo)) for d in drafts_from_pattern(pattern)] + assert a == b + + +def test_returns_playbookdraft_type(): + assert isinstance(generate_playbook_draft("x", REQ), PlaybookDraft) diff --git a/docs-src/architecture/adr/ADR-005-knowledge-production-pipeline.md b/docs-src/architecture/adr/ADR-005-knowledge-production-pipeline.md new file mode 100644 index 00000000..33ee9acf --- /dev/null +++ b/docs-src/architecture/adr/ADR-005-knowledge-production-pipeline.md @@ -0,0 +1,55 @@ +# ADR-005: Knowledge Production — prepare deterministically, then curate + +- **Status:** Accepted +- **Datum:** 2026-06-27 +- **Typ:** Architektur-Entscheidung +- **Bezug:** [ADR-004](ADR-004-implementation-playbooks.md), [ADR-002](ADR-002-transition-is-data-not-architecture.md), Architektur-Freeze v1.0, [[transition-reasoning]], [[iace-quality-architecture]] + +## Kontext + +Mit Capability Delta, Optimization und Playbooks ist die Diagnose weitgehend fertig. Der nächste +Engpass ist NICHT „Content" (mehr Playbooks schreiben), sondern **Wissensproduktion**: würde man +200 Playbooks (und je Domäne neue Patterns/Reference-Szenarien) von Hand schreiben, verlagerte sich +der Engpass dauerhaft vom Engineering auf manuelle Wissenspflege. + +Der entscheidende Grundsatz war: **„Die Engine ändert sich nicht. Der Corpus wächst."** Diese ADR +ergänzt ihn: + +> **„Und der Corpus wird nicht manuell geschrieben. Er wird deterministisch vorbereitet und +> anschließend fachlich kuratiert."** + +## Entscheidung + +1. **BreakPilot produziert künftig keine fertigen Wissensartefakte, sondern ENTWÜRFE.** Ein Draft + Generator strukturiert deterministisch aus Daten, die die Software bereits besitzt + (Capability, Transition Pattern, Controls, Evidence, Regulatory Map / Leverage), einen Entwurf — + und überlässt das Fachwissen der menschlichen Kuratierung. + +2. **Spiegelung der Legal-Pipeline.** Wie `Gesetz → Parser → Obligation → Review` gilt jetzt + `neue Capability → Registry → Transition Pattern → Playbook Draft Generator → Expert Review → + versioniertes Playbook`. Dieselbe Logik für jedes Wissensartefakt (Playbooks, später Transition + Patterns, Reference-Szenarien). + +3. **Deterministisch-first (kein LLM im Kern).** Der Generator assembliert nur, was die Software + besitzt; weiche Felder (Tools / Prozessschritte / „wie machen das andere") werden als **TODO** + ausgewiesen. Optionale Modell-Anreicherung bleibt **offline, advisory, propose-only** — nie im + deterministischen Kern (vgl. [[iace-quality-architecture]]). + +4. **Freigabestatus.** Jedes Artefakt trägt einen Lifecycle + `draft_generated → in_review → reviewed → validated → proven` plus **Provenance je Feld** + (woraus es assembliert wurde) — Voraussetzung für Review-Workflow und Versionierung. + +## Konsequenzen + +- **Review statt Schreiben:** der Experte reviewt N Entwürfe statt N Artefakte zu schreiben — der + manuelle Aufwand sinkt massiv, ohne fachliche Kontrolle aufzugeben. +- **Neue Domänen werden billig:** sobald ein Domänen-Corpus (z. B. Umwelt) existiert, erzeugt + derselbe Generator erste Entwürfe — ISO 14001 wird ein Draft-+-Review-Problem, kein Schreibprojekt. +- **Internes Werkzeug:** die wertvollste Maschine ist nicht nur das Kunden-OS, sondern die + **Produktionsmaschine für das eigene regulatorische Wissen** — sie wird mit jeder Domäne wertvoller. +- **Freeze-konform:** kein neues Metamodell, kein Graph, kein neuer Corpus. `compliance/knowledge_production` + ist eine reine, deterministische Vorbereitung (computed-not-stored); Execution-Controls werden injiziert. +- **Phase A (Wissensproduktion) VOR Phase B (neue Domänen):** Draft-Generatoren (Playbook ✓, dann + Transition-Pattern, Reference-Szenario) + Review-Workflow + Versionierung + Freigabestatus, dann + ISO 14001 / IATF 16949 / IEC 62443. +- Diese ADR ist non-runtime → kein Deploy (siehe [ADR-001](ADR-001-runtime-deploy-policy.md)).