Merge pull request 'feat: Knowledge Production — Playbook Draft Generator (prepare deterministically, curate)' (#19) from feat/knowledge-production into main

This commit is contained in:
pilotadmin
2026-06-27 13:32:26 +02:00
7 changed files with 349 additions and 2 deletions
@@ -0,0 +1,19 @@
"""Knowledge Production — deterministically prepare the corpus, then curate it.
The corpus is not written by hand: the Playbook Draft Generator structures drafts from data the
software already owns (Transition Pattern + leverage + injected Execution controls), leaving the
practitioner know-how as TODO for expert review. Mirrors the legal pipeline (Parser -> Review).
Deterministic, no LLM in core, no new corpus, no new meta-model class (freeze v1.0).
"""
from __future__ import annotations
from .engine import drafts_from_pattern, generate_playbook_draft
from .schemas import DraftStatus, PlaybookDraft
__all__ = [
"generate_playbook_draft",
"drafts_from_pattern",
"PlaybookDraft",
"DraftStatus",
]
@@ -0,0 +1,91 @@
"""Knowledge Production — the Playbook Draft Generator (deterministic assembly + expert review).
Mirrors the legal pipeline (Gesetz -> Parser -> Obligation -> Review) for BreakPilot's OWN knowledge:
new Capability -> Registry -> Transition Pattern -> **Playbook Draft Generator** -> Expert Review ->
versioned Playbook. The generator does not WRITE playbooks — it STRUCTURES drafts from data the
software already owns (a transition/convergence pattern's delta requirement: why_asked, covers_targets,
expected_evidence) plus injected Execution controls. The practitioner know-how (tools / process steps /
how others do it) is left as an explicit TODO for the expert (or a separate offline-propose step).
Fully deterministic, NO LLM in the core (deterministic-first: any model enrichment is offline,
advisory, never in this assembly). No new corpus, no new meta-model class (freeze v1.0). Python 3.9.
"""
from __future__ import annotations
from typing import Any, Dict, List, Optional
from .schemas import DraftStatus, PlaybookDraft
_SOFT_FIELDS = ["tools", "process_steps", "how_others_do_it"] # practitioner know-how — expert/offline-propose
_DISCLAIMER = (
"Maschinell assemblierter ENTWURF aus vorhandenen Daten (Transition Pattern + Leverage + "
"injizierte Controls). KEINE normative Anforderung; erfordert fachliche Kuratierung (TODO-Felder) "
"und Statuswechsel draft_generated -> reviewed -> validated."
)
def generate_playbook_draft(
capability_id: str,
requirement: Optional[Dict[str, Any]] = None,
control_links: Optional[List[str]] = None,
) -> PlaybookDraft:
"""Assemble a playbook draft for ONE capability from a pattern delta requirement (deterministic).
`requirement`: a delta_requirement dict (why_asked / covers_targets / expected_evidence). Owned
fields are filled with provenance; soft fields are listed in `todo`. `control_links`: injected
Execution controls (default empty — no Execution data in the draft generator).
"""
req = requirement or {}
why = str(req.get("why_asked") or req.get("missing_because") or "")
closes = sorted({str(t) for t in req.get("covers_targets", [])})
evidence = [str(e) for e in req.get("expected_evidence", [])]
controls = list(control_links or [])
provenance: Dict[str, str] = {}
todo: List[str] = []
if why:
provenance["why"] = "transition_pattern:why_asked"
else:
todo.append("why")
if closes:
provenance["closes_regulations"] = "leverage:covers_targets"
if evidence:
provenance["expected_evidence"] = "transition_pattern:expected_evidence"
else:
todo.append("expected_evidence")
if controls:
provenance["typical_controls"] = "execution:control_links"
todo.extend(_SOFT_FIELDS) # always expert-owned
return PlaybookDraft(
capability_id=capability_id,
status=DraftStatus.DRAFT_GENERATED,
title=capability_id.replace("_", " "),
why=why,
closes_regulations=closes,
expected_evidence=evidence,
typical_controls=controls,
provenance=provenance,
todo=todo,
disclaimer=_DISCLAIMER,
)
def drafts_from_pattern(
pattern: Dict[str, Any],
control_links_by_cap: Optional[Dict[str, List[str]]] = None,
) -> List[PlaybookDraft]:
"""Assemble one playbook draft per delta capability of a transition/convergence pattern.
This is the "produce drafts, don't write them" tool: feed a pattern -> get a draft per missing
capability, ready for expert review. Deterministic + order-preserving (pattern order).
"""
links = control_links_by_cap or {}
drafts: List[PlaybookDraft] = []
for d in pattern.get("delta_requirements", []):
cap = d.get("capability")
if not cap:
continue
drafts.append(generate_playbook_draft(str(cap), d, links.get(str(cap))))
return drafts
@@ -0,0 +1,46 @@
"""Schemas for Knowledge Production — deterministic draft assembly + lifecycle.
The corpus is no longer written by hand: it is deterministically PREPARED from data the software
already owns (Capability, Transition Pattern, Controls, Evidence, leverage), then curated by an
expert. A `PlaybookDraft` is a machine-assembled skeleton with per-field provenance and an explicit
TODO list of what still needs human (or offline-propose) input. No LLM in the deterministic core.
Python 3.9 compatible (no `|` unions).
"""
from __future__ import annotations
from enum import Enum
from typing import Dict, List
from pydantic import BaseModel, Field
class DraftStatus(str, Enum):
"""Freigabestatus — the knowledge lifecycle from machine draft to proven (mirrors the
transition-pattern / playbook maturity, with a machine-assembled pre-stage)."""
DRAFT_GENERATED = "draft_generated" # machine-assembled, NOT yet expert-touched
IN_REVIEW = "in_review" # an expert is curating it
REVIEWED = "reviewed" # internally reviewed
VALIDATED = "validated" # domain expert confirmed
PROVEN = "proven" # confirmed in the field
class PlaybookDraft(BaseModel):
"""A deterministically assembled playbook draft for one capability.
Owned fields (why / closes_regulations / expected_evidence / typical_controls) are filled from
existing data with provenance; the practitioner know-how (tools / process_steps / how_others)
is left as TODO. The expert reviews a draft instead of writing from a blank page.
"""
capability_id: str
status: DraftStatus = DraftStatus.DRAFT_GENERATED
title: str = ""
why: str = "" # from the transition pattern (why_asked/missing_because)
closes_regulations: List[str] = Field(default_factory=list) # from leverage (covers_targets)
expected_evidence: List[str] = Field(default_factory=list) # from the transition pattern
typical_controls: List[str] = Field(default_factory=list) # injected from Execution (may be empty)
provenance: Dict[str, str] = Field(default_factory=dict) # field -> source it was assembled from
todo: List[str] = Field(default_factory=list) # fields the expert/offline-propose must still add
disclaimer: str = "" # machine draft, requires expert curation
@@ -40,6 +40,7 @@ from compliance.transition_reasoning import (
)
from compliance.optimization import roadmap_from_delta, select_within_budget
from compliance.playbook import playbooks_for_plan
from compliance.knowledge_production import drafts_from_pattern
import os
import yaml
@@ -438,6 +439,30 @@ coverage_table([
("Playbook-Inhalt (Knowledge)", "TODO" if _miss else "PASS", "%d Capabilities brauchen noch Inhalt" % len(_miss)),
])
# ── Knowledge Production — Playbook Draft Generator (vorbereiten, dann kuratieren) ───
w("## Knowledge Production — Playbook-Entwürfe automatisch assemblieren")
w("")
w("_Der Engpass ist nicht Content, sondern Wissensproduktion. Der Corpus wird nicht von Hand geschrieben, sondern deterministisch aus vorhandenen Daten (Transition Pattern + Leverage + injizierte Controls) vorbereitet — dann fachlich kuratiert (wie Gesetz→Parser→Obligation→Review)._")
w("")
_kp = drafts_from_pattern(CP) if CP else [] # CP = convergence pattern (already loaded)
w("**Aus 1 Pattern → %d Playbook-Entwürfe** (`status: draft_generated`): eigene Felder (Warum/schließt/Nachweise) aus den Daten gefüllt, der Experte ergänzt nur Tools/Prozess/How-others." % len(_kp))
w("")
_kd = next((d for d in _kp if d.capability_id == "sbom_creation"), _kp[0] if _kp else None)
if _kd:
w("**Beispiel-Entwurf — `%s`** _(%s)_" % (_kd.capability_id, _kd.status.value))
w("- **Warum** (aus Pattern): %s" % _kd.why.strip())
w("- **schließt** %s · **Nachweise** %s" % ("+".join(_kd.closes_regulations) or "", ", ".join(_kd.expected_evidence) or ""))
w("- **Provenance:** %s" % ", ".join("%s%s" % (k, v) for k, v in _kd.provenance.items()))
w("- **TODO (Experte/Offline-Propose):** %s" % ", ".join(_kd.todo))
w("")
w("_So reviewt der Experte %d Entwürfe statt %d Playbooks zu schreiben. Derselbe Generator bereitet später ISO14001-/IATF-Entwürfe vor, sobald der Corpus da ist._" % (len(_kp), len(_kp)))
w("")
coverage_table([
("Playbook Draft Generator (deterministisch)", "PASS", "%d Entwürfe aus 1 Pattern, kein LLM im Kern" % len(_kp)),
("Provenance + TODO + Freigabestatus", "PASS", "draft_generated→reviewed→validated→proven"),
("Draft-Generatoren neue Domänen (Phase A)", "TODO", "Transition-/Reference-Scenario-Drafts"),
])
# ── Epics + roll-up ───────────────────────────────────────────────────────
w("## Gaps → Epics (Backlog — nur erfasst, NICHT implementiert)")
w("")
@@ -296,6 +296,28 @@ _Derselbe Capability-Strang, neuer Renderer: aus Diagnose wird Beratung. Die `fe
| Roadmap → Playbook (Verkettung) | **PASS** | 2/12 Maßnahmen mit Playbook |
| Playbook-Inhalt (Knowledge) | **TODO** | 10 Capabilities brauchen noch Inhalt |
## Knowledge Production — Playbook-Entwürfe automatisch assemblieren
_Der Engpass ist nicht Content, sondern Wissensproduktion. Der Corpus wird nicht von Hand geschrieben, sondern deterministisch aus vorhandenen Daten (Transition Pattern + Leverage + injizierte Controls) vorbereitet — dann fachlich kuratiert (wie Gesetz→Parser→Obligation→Review)._
**Aus 1 Pattern → 12 Playbook-Entwürfe** (`status: draft_generated`): eigene Felder (Warum/schließt/Nachweise) aus den Daten gefüllt, der Experte ergänzt nur Tools/Prozess/How-others.
**Beispiel-Entwurf — `sbom_creation`** _(draft_generated)_
- **Warum** (aus Pattern): CRA requires an SBOM; MaschinenVO does not.
- **schließt** CRA · **Nachweise** sbom
- **Provenance:** why←transition_pattern:why_asked, closes_regulations←leverage:covers_targets, expected_evidence←transition_pattern:expected_evidence
- **TODO (Experte/Offline-Propose):** tools, process_steps, how_others_do_it
_So reviewt der Experte 12 Entwürfe statt 12 Playbooks zu schreiben. Derselbe Generator bereitet später ISO14001-/IATF-Entwürfe vor, sobald der Corpus da ist._
**Architecture Coverage**
| Layer | Status | Hinweis |
|---|---|---|
| Playbook Draft Generator (deterministisch) | **PASS** | 12 Entwürfe aus 1 Pattern, kein LLM im Kern |
| Provenance + TODO + Freigabestatus | **PASS** | draft_generated→reviewed→validated→proven |
| Draft-Generatoren neue Domänen (Phase A) | **TODO** | Transition-/Reference-Scenario-Drafts |
## Gaps → Epics (Backlog — nur erfasst, NICHT implementiert)
| Epic | Titel | schliesst Coverage-Luecke |
@@ -307,6 +329,6 @@ _Derselbe Capability-Strang, neuer Renderer: aus Diagnose wird Beratung. Die `fe
## Suite-Status (Roll-up)
- Coverage-Zellen gesamt: **35**
- PASS: **26** · PARTIAL: 3 · UNSUPPORTED: 1 · TODO: 4 · N/A: 1 · NEEDS_FACTS: 0
- Coverage-Zellen gesamt: **38**
- PASS: **28** · PARTIAL: 3 · UNSUPPORTED: 1 · TODO: 5 · N/A: 1 · NEEDS_FACTS: 0
- Fortschritt = PASS-Anteil steigt, wenn Epics RS-001…004 landen (objektiver Maßstab, kein LOC).
@@ -0,0 +1,89 @@
"""Tests for Knowledge Production — the Playbook Draft Generator.
Acceptance: deterministically assemble a playbook DRAFT for a capability from a transition-pattern
delta requirement (why / closes / evidence with provenance), leaving practitioner know-how as an
explicit TODO; turn a whole pattern into one draft per delta capability. No LLM, fully deterministic.
The expert reviews drafts instead of writing from a blank page.
"""
from __future__ import annotations
from compliance.knowledge_production import (
DraftStatus, PlaybookDraft, drafts_from_pattern, generate_playbook_draft,
)
REQ = {
"capability": "sbom_creation",
"why_asked": "CRA requires an SBOM; MaschinenVO does not.",
"covers_targets": ["CRA"],
"expected_evidence": ["sbom"],
}
CONV_REQ = {
"capability": "product_cyber_risk_assessment",
"why_asked": "Both require assessing cyber threats.",
"covers_targets": ["CRA", "MaschinenVO"],
"expected_evidence": ["product_risk_assessment"],
}
def test_assembles_owned_fields_with_provenance():
d = generate_playbook_draft("sbom_creation", REQ, control_links=["component_inventory"])
assert d.status == DraftStatus.DRAFT_GENERATED
assert d.why.startswith("CRA requires an SBOM")
assert d.closes_regulations == ["CRA"] and d.expected_evidence == ["sbom"]
assert d.typical_controls == ["component_inventory"]
assert d.provenance["why"] == "transition_pattern:why_asked"
assert d.provenance["closes_regulations"] == "leverage:covers_targets"
assert d.provenance["typical_controls"] == "execution:control_links"
def test_soft_fields_are_todo():
d = generate_playbook_draft("sbom_creation", REQ)
assert d.todo == ["tools", "process_steps", "how_others_do_it"] # practitioner know-how owed
def test_missing_owned_fields_go_to_todo():
d = generate_playbook_draft("x", {})
assert "why" in d.todo and "expected_evidence" in d.todo
assert d.closes_regulations == [] and d.typical_controls == []
assert d.status == DraftStatus.DRAFT_GENERATED
def test_missing_because_fallback_for_why():
d = generate_playbook_draft("x", {"missing_because": "no analogue in ISO 27001"})
assert d.why == "no analogue in ISO 27001" and "why" not in d.todo
def test_closes_deduped_sorted_and_title_humanised():
d = generate_playbook_draft("secure_signed_update_distribution", {"covers_targets": ["MaschinenVO", "CRA", "CRA"]})
assert d.closes_regulations == ["CRA", "MaschinenVO"]
assert d.title == "secure signed update distribution"
def test_controls_default_empty_no_execution_data():
d = generate_playbook_draft("x", REQ)
assert d.typical_controls == [] # nothing injected -> empty
def test_drafts_from_pattern_one_per_delta_in_order():
pattern = {"delta_requirements": [REQ, CONV_REQ]}
drafts = drafts_from_pattern(pattern)
assert [d.capability_id for d in drafts] == ["sbom_creation", "product_cyber_risk_assessment"]
assert drafts[1].closes_regulations == ["CRA", "MaschinenVO"] # leverage 2 carried through
def test_drafts_from_pattern_injects_controls_and_skips_unnamed():
pattern = {"delta_requirements": [REQ, {"why_asked": "no capability key"}]}
drafts = drafts_from_pattern(pattern, control_links_by_cap={"sbom_creation": ["c1"]})
assert len(drafts) == 1 and drafts[0].typical_controls == ["c1"] # entry without capability skipped
def test_deterministic():
pattern = {"delta_requirements": [REQ, CONV_REQ]}
a = [(d.capability_id, d.why, tuple(d.todo)) for d in drafts_from_pattern(pattern)]
b = [(d.capability_id, d.why, tuple(d.todo)) for d in drafts_from_pattern(pattern)]
assert a == b
def test_returns_playbookdraft_type():
assert isinstance(generate_playbook_draft("x", REQ), PlaybookDraft)
@@ -0,0 +1,55 @@
# ADR-005: Knowledge Production — prepare deterministically, then curate
- **Status:** Accepted
- **Datum:** 2026-06-27
- **Typ:** Architektur-Entscheidung
- **Bezug:** [ADR-004](ADR-004-implementation-playbooks.md), [ADR-002](ADR-002-transition-is-data-not-architecture.md), Architektur-Freeze v1.0, [[transition-reasoning]], [[iace-quality-architecture]]
## Kontext
Mit Capability Delta, Optimization und Playbooks ist die Diagnose weitgehend fertig. Der nächste
Engpass ist NICHT „Content" (mehr Playbooks schreiben), sondern **Wissensproduktion**: würde man
200 Playbooks (und je Domäne neue Patterns/Reference-Szenarien) von Hand schreiben, verlagerte sich
der Engpass dauerhaft vom Engineering auf manuelle Wissenspflege.
Der entscheidende Grundsatz war: **„Die Engine ändert sich nicht. Der Corpus wächst."** Diese ADR
ergänzt ihn:
> **„Und der Corpus wird nicht manuell geschrieben. Er wird deterministisch vorbereitet und
> anschließend fachlich kuratiert."**
## Entscheidung
1. **BreakPilot produziert künftig keine fertigen Wissensartefakte, sondern ENTWÜRFE.** Ein Draft
Generator strukturiert deterministisch aus Daten, die die Software bereits besitzt
(Capability, Transition Pattern, Controls, Evidence, Regulatory Map / Leverage), einen Entwurf —
und überlässt das Fachwissen der menschlichen Kuratierung.
2. **Spiegelung der Legal-Pipeline.** Wie `Gesetz → Parser → Obligation → Review` gilt jetzt
`neue Capability → Registry → Transition Pattern → Playbook Draft Generator → Expert Review →
versioniertes Playbook`. Dieselbe Logik für jedes Wissensartefakt (Playbooks, später Transition
Patterns, Reference-Szenarien).
3. **Deterministisch-first (kein LLM im Kern).** Der Generator assembliert nur, was die Software
besitzt; weiche Felder (Tools / Prozessschritte / „wie machen das andere") werden als **TODO**
ausgewiesen. Optionale Modell-Anreicherung bleibt **offline, advisory, propose-only** — nie im
deterministischen Kern (vgl. [[iace-quality-architecture]]).
4. **Freigabestatus.** Jedes Artefakt trägt einen Lifecycle
`draft_generated → in_review → reviewed → validated → proven` plus **Provenance je Feld**
(woraus es assembliert wurde) — Voraussetzung für Review-Workflow und Versionierung.
## Konsequenzen
- **Review statt Schreiben:** der Experte reviewt N Entwürfe statt N Artefakte zu schreiben — der
manuelle Aufwand sinkt massiv, ohne fachliche Kontrolle aufzugeben.
- **Neue Domänen werden billig:** sobald ein Domänen-Corpus (z. B. Umwelt) existiert, erzeugt
derselbe Generator erste Entwürfe — ISO 14001 wird ein Draft-+-Review-Problem, kein Schreibprojekt.
- **Internes Werkzeug:** die wertvollste Maschine ist nicht nur das Kunden-OS, sondern die
**Produktionsmaschine für das eigene regulatorische Wissen** — sie wird mit jeder Domäne wertvoller.
- **Freeze-konform:** kein neues Metamodell, kein Graph, kein neuer Corpus. `compliance/knowledge_production`
ist eine reine, deterministische Vorbereitung (computed-not-stored); Execution-Controls werden injiziert.
- **Phase A (Wissensproduktion) VOR Phase B (neue Domänen):** Draft-Generatoren (Playbook ✓, dann
Transition-Pattern, Reference-Szenario) + Review-Workflow + Versionierung + Freigabestatus, dann
ISO 14001 / IATF 16949 / IEC 62443.
- Diese ADR ist non-runtime → kein Deploy (siehe [ADR-001](ADR-001-runtime-deploy-policy.md)).