Merge pull request 'feat: Knowledge Production — Playbook Draft Generator (prepare deterministically, curate)' (#19) from feat/knowledge-production into main
This commit is contained in:
@@ -0,0 +1,19 @@
|
||||
"""Knowledge Production — deterministically prepare the corpus, then curate it.
|
||||
|
||||
The corpus is not written by hand: the Playbook Draft Generator structures drafts from data the
|
||||
software already owns (Transition Pattern + leverage + injected Execution controls), leaving the
|
||||
practitioner know-how as TODO for expert review. Mirrors the legal pipeline (Parser -> Review).
|
||||
Deterministic, no LLM in core, no new corpus, no new meta-model class (freeze v1.0).
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from .engine import drafts_from_pattern, generate_playbook_draft
|
||||
from .schemas import DraftStatus, PlaybookDraft
|
||||
|
||||
__all__ = [
|
||||
"generate_playbook_draft",
|
||||
"drafts_from_pattern",
|
||||
"PlaybookDraft",
|
||||
"DraftStatus",
|
||||
]
|
||||
@@ -0,0 +1,91 @@
|
||||
"""Knowledge Production — the Playbook Draft Generator (deterministic assembly + expert review).
|
||||
|
||||
Mirrors the legal pipeline (Gesetz -> Parser -> Obligation -> Review) for BreakPilot's OWN knowledge:
|
||||
new Capability -> Registry -> Transition Pattern -> **Playbook Draft Generator** -> Expert Review ->
|
||||
versioned Playbook. The generator does not WRITE playbooks — it STRUCTURES drafts from data the
|
||||
software already owns (a transition/convergence pattern's delta requirement: why_asked, covers_targets,
|
||||
expected_evidence) plus injected Execution controls. The practitioner know-how (tools / process steps /
|
||||
how others do it) is left as an explicit TODO for the expert (or a separate offline-propose step).
|
||||
|
||||
Fully deterministic, NO LLM in the core (deterministic-first: any model enrichment is offline,
|
||||
advisory, never in this assembly). No new corpus, no new meta-model class (freeze v1.0). Python 3.9.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
from .schemas import DraftStatus, PlaybookDraft
|
||||
|
||||
_SOFT_FIELDS = ["tools", "process_steps", "how_others_do_it"] # practitioner know-how — expert/offline-propose
|
||||
_DISCLAIMER = (
|
||||
"Maschinell assemblierter ENTWURF aus vorhandenen Daten (Transition Pattern + Leverage + "
|
||||
"injizierte Controls). KEINE normative Anforderung; erfordert fachliche Kuratierung (TODO-Felder) "
|
||||
"und Statuswechsel draft_generated -> reviewed -> validated."
|
||||
)
|
||||
|
||||
|
||||
def generate_playbook_draft(
|
||||
capability_id: str,
|
||||
requirement: Optional[Dict[str, Any]] = None,
|
||||
control_links: Optional[List[str]] = None,
|
||||
) -> PlaybookDraft:
|
||||
"""Assemble a playbook draft for ONE capability from a pattern delta requirement (deterministic).
|
||||
|
||||
`requirement`: a delta_requirement dict (why_asked / covers_targets / expected_evidence). Owned
|
||||
fields are filled with provenance; soft fields are listed in `todo`. `control_links`: injected
|
||||
Execution controls (default empty — no Execution data in the draft generator).
|
||||
"""
|
||||
req = requirement or {}
|
||||
why = str(req.get("why_asked") or req.get("missing_because") or "")
|
||||
closes = sorted({str(t) for t in req.get("covers_targets", [])})
|
||||
evidence = [str(e) for e in req.get("expected_evidence", [])]
|
||||
controls = list(control_links or [])
|
||||
|
||||
provenance: Dict[str, str] = {}
|
||||
todo: List[str] = []
|
||||
if why:
|
||||
provenance["why"] = "transition_pattern:why_asked"
|
||||
else:
|
||||
todo.append("why")
|
||||
if closes:
|
||||
provenance["closes_regulations"] = "leverage:covers_targets"
|
||||
if evidence:
|
||||
provenance["expected_evidence"] = "transition_pattern:expected_evidence"
|
||||
else:
|
||||
todo.append("expected_evidence")
|
||||
if controls:
|
||||
provenance["typical_controls"] = "execution:control_links"
|
||||
todo.extend(_SOFT_FIELDS) # always expert-owned
|
||||
|
||||
return PlaybookDraft(
|
||||
capability_id=capability_id,
|
||||
status=DraftStatus.DRAFT_GENERATED,
|
||||
title=capability_id.replace("_", " "),
|
||||
why=why,
|
||||
closes_regulations=closes,
|
||||
expected_evidence=evidence,
|
||||
typical_controls=controls,
|
||||
provenance=provenance,
|
||||
todo=todo,
|
||||
disclaimer=_DISCLAIMER,
|
||||
)
|
||||
|
||||
|
||||
def drafts_from_pattern(
|
||||
pattern: Dict[str, Any],
|
||||
control_links_by_cap: Optional[Dict[str, List[str]]] = None,
|
||||
) -> List[PlaybookDraft]:
|
||||
"""Assemble one playbook draft per delta capability of a transition/convergence pattern.
|
||||
|
||||
This is the "produce drafts, don't write them" tool: feed a pattern -> get a draft per missing
|
||||
capability, ready for expert review. Deterministic + order-preserving (pattern order).
|
||||
"""
|
||||
links = control_links_by_cap or {}
|
||||
drafts: List[PlaybookDraft] = []
|
||||
for d in pattern.get("delta_requirements", []):
|
||||
cap = d.get("capability")
|
||||
if not cap:
|
||||
continue
|
||||
drafts.append(generate_playbook_draft(str(cap), d, links.get(str(cap))))
|
||||
return drafts
|
||||
@@ -0,0 +1,46 @@
|
||||
"""Schemas for Knowledge Production — deterministic draft assembly + lifecycle.
|
||||
|
||||
The corpus is no longer written by hand: it is deterministically PREPARED from data the software
|
||||
already owns (Capability, Transition Pattern, Controls, Evidence, leverage), then curated by an
|
||||
expert. A `PlaybookDraft` is a machine-assembled skeleton with per-field provenance and an explicit
|
||||
TODO list of what still needs human (or offline-propose) input. No LLM in the deterministic core.
|
||||
Python 3.9 compatible (no `|` unions).
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from enum import Enum
|
||||
from typing import Dict, List
|
||||
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
|
||||
class DraftStatus(str, Enum):
|
||||
"""Freigabestatus — the knowledge lifecycle from machine draft to proven (mirrors the
|
||||
transition-pattern / playbook maturity, with a machine-assembled pre-stage)."""
|
||||
|
||||
DRAFT_GENERATED = "draft_generated" # machine-assembled, NOT yet expert-touched
|
||||
IN_REVIEW = "in_review" # an expert is curating it
|
||||
REVIEWED = "reviewed" # internally reviewed
|
||||
VALIDATED = "validated" # domain expert confirmed
|
||||
PROVEN = "proven" # confirmed in the field
|
||||
|
||||
|
||||
class PlaybookDraft(BaseModel):
|
||||
"""A deterministically assembled playbook draft for one capability.
|
||||
|
||||
Owned fields (why / closes_regulations / expected_evidence / typical_controls) are filled from
|
||||
existing data with provenance; the practitioner know-how (tools / process_steps / how_others)
|
||||
is left as TODO. The expert reviews a draft instead of writing from a blank page.
|
||||
"""
|
||||
|
||||
capability_id: str
|
||||
status: DraftStatus = DraftStatus.DRAFT_GENERATED
|
||||
title: str = ""
|
||||
why: str = "" # from the transition pattern (why_asked/missing_because)
|
||||
closes_regulations: List[str] = Field(default_factory=list) # from leverage (covers_targets)
|
||||
expected_evidence: List[str] = Field(default_factory=list) # from the transition pattern
|
||||
typical_controls: List[str] = Field(default_factory=list) # injected from Execution (may be empty)
|
||||
provenance: Dict[str, str] = Field(default_factory=dict) # field -> source it was assembled from
|
||||
todo: List[str] = Field(default_factory=list) # fields the expert/offline-propose must still add
|
||||
disclaimer: str = "" # machine draft, requires expert curation
|
||||
@@ -40,6 +40,7 @@ from compliance.transition_reasoning import (
|
||||
)
|
||||
from compliance.optimization import roadmap_from_delta, select_within_budget
|
||||
from compliance.playbook import playbooks_for_plan
|
||||
from compliance.knowledge_production import drafts_from_pattern
|
||||
import os
|
||||
import yaml
|
||||
|
||||
@@ -438,6 +439,30 @@ coverage_table([
|
||||
("Playbook-Inhalt (Knowledge)", "TODO" if _miss else "PASS", "%d Capabilities brauchen noch Inhalt" % len(_miss)),
|
||||
])
|
||||
|
||||
# ── Knowledge Production — Playbook Draft Generator (vorbereiten, dann kuratieren) ───
|
||||
w("## Knowledge Production — Playbook-Entwürfe automatisch assemblieren")
|
||||
w("")
|
||||
w("_Der Engpass ist nicht Content, sondern Wissensproduktion. Der Corpus wird nicht von Hand geschrieben, sondern deterministisch aus vorhandenen Daten (Transition Pattern + Leverage + injizierte Controls) vorbereitet — dann fachlich kuratiert (wie Gesetz→Parser→Obligation→Review)._")
|
||||
w("")
|
||||
_kp = drafts_from_pattern(CP) if CP else [] # CP = convergence pattern (already loaded)
|
||||
w("**Aus 1 Pattern → %d Playbook-Entwürfe** (`status: draft_generated`): eigene Felder (Warum/schließt/Nachweise) aus den Daten gefüllt, der Experte ergänzt nur Tools/Prozess/How-others." % len(_kp))
|
||||
w("")
|
||||
_kd = next((d for d in _kp if d.capability_id == "sbom_creation"), _kp[0] if _kp else None)
|
||||
if _kd:
|
||||
w("**Beispiel-Entwurf — `%s`** _(%s)_" % (_kd.capability_id, _kd.status.value))
|
||||
w("- **Warum** (aus Pattern): %s" % _kd.why.strip())
|
||||
w("- **schließt** %s · **Nachweise** %s" % ("+".join(_kd.closes_regulations) or "—", ", ".join(_kd.expected_evidence) or "—"))
|
||||
w("- **Provenance:** %s" % ", ".join("%s←%s" % (k, v) for k, v in _kd.provenance.items()))
|
||||
w("- **TODO (Experte/Offline-Propose):** %s" % ", ".join(_kd.todo))
|
||||
w("")
|
||||
w("_So reviewt der Experte %d Entwürfe statt %d Playbooks zu schreiben. Derselbe Generator bereitet später ISO14001-/IATF-Entwürfe vor, sobald der Corpus da ist._" % (len(_kp), len(_kp)))
|
||||
w("")
|
||||
coverage_table([
|
||||
("Playbook Draft Generator (deterministisch)", "PASS", "%d Entwürfe aus 1 Pattern, kein LLM im Kern" % len(_kp)),
|
||||
("Provenance + TODO + Freigabestatus", "PASS", "draft_generated→reviewed→validated→proven"),
|
||||
("Draft-Generatoren neue Domänen (Phase A)", "TODO", "Transition-/Reference-Scenario-Drafts"),
|
||||
])
|
||||
|
||||
# ── Epics + roll-up ───────────────────────────────────────────────────────
|
||||
w("## Gaps → Epics (Backlog — nur erfasst, NICHT implementiert)")
|
||||
w("")
|
||||
|
||||
@@ -296,6 +296,28 @@ _Derselbe Capability-Strang, neuer Renderer: aus Diagnose wird Beratung. Die `fe
|
||||
| Roadmap → Playbook (Verkettung) | **PASS** | 2/12 Maßnahmen mit Playbook |
|
||||
| Playbook-Inhalt (Knowledge) | **TODO** | 10 Capabilities brauchen noch Inhalt |
|
||||
|
||||
## Knowledge Production — Playbook-Entwürfe automatisch assemblieren
|
||||
|
||||
_Der Engpass ist nicht Content, sondern Wissensproduktion. Der Corpus wird nicht von Hand geschrieben, sondern deterministisch aus vorhandenen Daten (Transition Pattern + Leverage + injizierte Controls) vorbereitet — dann fachlich kuratiert (wie Gesetz→Parser→Obligation→Review)._
|
||||
|
||||
**Aus 1 Pattern → 12 Playbook-Entwürfe** (`status: draft_generated`): eigene Felder (Warum/schließt/Nachweise) aus den Daten gefüllt, der Experte ergänzt nur Tools/Prozess/How-others.
|
||||
|
||||
**Beispiel-Entwurf — `sbom_creation`** _(draft_generated)_
|
||||
- **Warum** (aus Pattern): CRA requires an SBOM; MaschinenVO does not.
|
||||
- **schließt** CRA · **Nachweise** sbom
|
||||
- **Provenance:** why←transition_pattern:why_asked, closes_regulations←leverage:covers_targets, expected_evidence←transition_pattern:expected_evidence
|
||||
- **TODO (Experte/Offline-Propose):** tools, process_steps, how_others_do_it
|
||||
|
||||
_So reviewt der Experte 12 Entwürfe statt 12 Playbooks zu schreiben. Derselbe Generator bereitet später ISO14001-/IATF-Entwürfe vor, sobald der Corpus da ist._
|
||||
|
||||
**Architecture Coverage**
|
||||
|
||||
| Layer | Status | Hinweis |
|
||||
|---|---|---|
|
||||
| Playbook Draft Generator (deterministisch) | **PASS** | 12 Entwürfe aus 1 Pattern, kein LLM im Kern |
|
||||
| Provenance + TODO + Freigabestatus | **PASS** | draft_generated→reviewed→validated→proven |
|
||||
| Draft-Generatoren neue Domänen (Phase A) | **TODO** | Transition-/Reference-Scenario-Drafts |
|
||||
|
||||
## Gaps → Epics (Backlog — nur erfasst, NICHT implementiert)
|
||||
|
||||
| Epic | Titel | schliesst Coverage-Luecke |
|
||||
@@ -307,6 +329,6 @@ _Derselbe Capability-Strang, neuer Renderer: aus Diagnose wird Beratung. Die `fe
|
||||
|
||||
## Suite-Status (Roll-up)
|
||||
|
||||
- Coverage-Zellen gesamt: **35**
|
||||
- PASS: **26** · PARTIAL: 3 · UNSUPPORTED: 1 · TODO: 4 · N/A: 1 · NEEDS_FACTS: 0
|
||||
- Coverage-Zellen gesamt: **38**
|
||||
- PASS: **28** · PARTIAL: 3 · UNSUPPORTED: 1 · TODO: 5 · N/A: 1 · NEEDS_FACTS: 0
|
||||
- Fortschritt = PASS-Anteil steigt, wenn Epics RS-001…004 landen (objektiver Maßstab, kein LOC).
|
||||
|
||||
@@ -0,0 +1,89 @@
|
||||
"""Tests for Knowledge Production — the Playbook Draft Generator.
|
||||
|
||||
Acceptance: deterministically assemble a playbook DRAFT for a capability from a transition-pattern
|
||||
delta requirement (why / closes / evidence with provenance), leaving practitioner know-how as an
|
||||
explicit TODO; turn a whole pattern into one draft per delta capability. No LLM, fully deterministic.
|
||||
The expert reviews drafts instead of writing from a blank page.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from compliance.knowledge_production import (
|
||||
DraftStatus, PlaybookDraft, drafts_from_pattern, generate_playbook_draft,
|
||||
)
|
||||
|
||||
REQ = {
|
||||
"capability": "sbom_creation",
|
||||
"why_asked": "CRA requires an SBOM; MaschinenVO does not.",
|
||||
"covers_targets": ["CRA"],
|
||||
"expected_evidence": ["sbom"],
|
||||
}
|
||||
CONV_REQ = {
|
||||
"capability": "product_cyber_risk_assessment",
|
||||
"why_asked": "Both require assessing cyber threats.",
|
||||
"covers_targets": ["CRA", "MaschinenVO"],
|
||||
"expected_evidence": ["product_risk_assessment"],
|
||||
}
|
||||
|
||||
|
||||
def test_assembles_owned_fields_with_provenance():
|
||||
d = generate_playbook_draft("sbom_creation", REQ, control_links=["component_inventory"])
|
||||
assert d.status == DraftStatus.DRAFT_GENERATED
|
||||
assert d.why.startswith("CRA requires an SBOM")
|
||||
assert d.closes_regulations == ["CRA"] and d.expected_evidence == ["sbom"]
|
||||
assert d.typical_controls == ["component_inventory"]
|
||||
assert d.provenance["why"] == "transition_pattern:why_asked"
|
||||
assert d.provenance["closes_regulations"] == "leverage:covers_targets"
|
||||
assert d.provenance["typical_controls"] == "execution:control_links"
|
||||
|
||||
|
||||
def test_soft_fields_are_todo():
|
||||
d = generate_playbook_draft("sbom_creation", REQ)
|
||||
assert d.todo == ["tools", "process_steps", "how_others_do_it"] # practitioner know-how owed
|
||||
|
||||
|
||||
def test_missing_owned_fields_go_to_todo():
|
||||
d = generate_playbook_draft("x", {})
|
||||
assert "why" in d.todo and "expected_evidence" in d.todo
|
||||
assert d.closes_regulations == [] and d.typical_controls == []
|
||||
assert d.status == DraftStatus.DRAFT_GENERATED
|
||||
|
||||
|
||||
def test_missing_because_fallback_for_why():
|
||||
d = generate_playbook_draft("x", {"missing_because": "no analogue in ISO 27001"})
|
||||
assert d.why == "no analogue in ISO 27001" and "why" not in d.todo
|
||||
|
||||
|
||||
def test_closes_deduped_sorted_and_title_humanised():
|
||||
d = generate_playbook_draft("secure_signed_update_distribution", {"covers_targets": ["MaschinenVO", "CRA", "CRA"]})
|
||||
assert d.closes_regulations == ["CRA", "MaschinenVO"]
|
||||
assert d.title == "secure signed update distribution"
|
||||
|
||||
|
||||
def test_controls_default_empty_no_execution_data():
|
||||
d = generate_playbook_draft("x", REQ)
|
||||
assert d.typical_controls == [] # nothing injected -> empty
|
||||
|
||||
|
||||
def test_drafts_from_pattern_one_per_delta_in_order():
|
||||
pattern = {"delta_requirements": [REQ, CONV_REQ]}
|
||||
drafts = drafts_from_pattern(pattern)
|
||||
assert [d.capability_id for d in drafts] == ["sbom_creation", "product_cyber_risk_assessment"]
|
||||
assert drafts[1].closes_regulations == ["CRA", "MaschinenVO"] # leverage 2 carried through
|
||||
|
||||
|
||||
def test_drafts_from_pattern_injects_controls_and_skips_unnamed():
|
||||
pattern = {"delta_requirements": [REQ, {"why_asked": "no capability key"}]}
|
||||
drafts = drafts_from_pattern(pattern, control_links_by_cap={"sbom_creation": ["c1"]})
|
||||
assert len(drafts) == 1 and drafts[0].typical_controls == ["c1"] # entry without capability skipped
|
||||
|
||||
|
||||
def test_deterministic():
|
||||
pattern = {"delta_requirements": [REQ, CONV_REQ]}
|
||||
a = [(d.capability_id, d.why, tuple(d.todo)) for d in drafts_from_pattern(pattern)]
|
||||
b = [(d.capability_id, d.why, tuple(d.todo)) for d in drafts_from_pattern(pattern)]
|
||||
assert a == b
|
||||
|
||||
|
||||
def test_returns_playbookdraft_type():
|
||||
assert isinstance(generate_playbook_draft("x", REQ), PlaybookDraft)
|
||||
@@ -0,0 +1,55 @@
|
||||
# ADR-005: Knowledge Production — prepare deterministically, then curate
|
||||
|
||||
- **Status:** Accepted
|
||||
- **Datum:** 2026-06-27
|
||||
- **Typ:** Architektur-Entscheidung
|
||||
- **Bezug:** [ADR-004](ADR-004-implementation-playbooks.md), [ADR-002](ADR-002-transition-is-data-not-architecture.md), Architektur-Freeze v1.0, [[transition-reasoning]], [[iace-quality-architecture]]
|
||||
|
||||
## Kontext
|
||||
|
||||
Mit Capability Delta, Optimization und Playbooks ist die Diagnose weitgehend fertig. Der nächste
|
||||
Engpass ist NICHT „Content" (mehr Playbooks schreiben), sondern **Wissensproduktion**: würde man
|
||||
200 Playbooks (und je Domäne neue Patterns/Reference-Szenarien) von Hand schreiben, verlagerte sich
|
||||
der Engpass dauerhaft vom Engineering auf manuelle Wissenspflege.
|
||||
|
||||
Der entscheidende Grundsatz war: **„Die Engine ändert sich nicht. Der Corpus wächst."** Diese ADR
|
||||
ergänzt ihn:
|
||||
|
||||
> **„Und der Corpus wird nicht manuell geschrieben. Er wird deterministisch vorbereitet und
|
||||
> anschließend fachlich kuratiert."**
|
||||
|
||||
## Entscheidung
|
||||
|
||||
1. **BreakPilot produziert künftig keine fertigen Wissensartefakte, sondern ENTWÜRFE.** Ein Draft
|
||||
Generator strukturiert deterministisch aus Daten, die die Software bereits besitzt
|
||||
(Capability, Transition Pattern, Controls, Evidence, Regulatory Map / Leverage), einen Entwurf —
|
||||
und überlässt das Fachwissen der menschlichen Kuratierung.
|
||||
|
||||
2. **Spiegelung der Legal-Pipeline.** Wie `Gesetz → Parser → Obligation → Review` gilt jetzt
|
||||
`neue Capability → Registry → Transition Pattern → Playbook Draft Generator → Expert Review →
|
||||
versioniertes Playbook`. Dieselbe Logik für jedes Wissensartefakt (Playbooks, später Transition
|
||||
Patterns, Reference-Szenarien).
|
||||
|
||||
3. **Deterministisch-first (kein LLM im Kern).** Der Generator assembliert nur, was die Software
|
||||
besitzt; weiche Felder (Tools / Prozessschritte / „wie machen das andere") werden als **TODO**
|
||||
ausgewiesen. Optionale Modell-Anreicherung bleibt **offline, advisory, propose-only** — nie im
|
||||
deterministischen Kern (vgl. [[iace-quality-architecture]]).
|
||||
|
||||
4. **Freigabestatus.** Jedes Artefakt trägt einen Lifecycle
|
||||
`draft_generated → in_review → reviewed → validated → proven` plus **Provenance je Feld**
|
||||
(woraus es assembliert wurde) — Voraussetzung für Review-Workflow und Versionierung.
|
||||
|
||||
## Konsequenzen
|
||||
|
||||
- **Review statt Schreiben:** der Experte reviewt N Entwürfe statt N Artefakte zu schreiben — der
|
||||
manuelle Aufwand sinkt massiv, ohne fachliche Kontrolle aufzugeben.
|
||||
- **Neue Domänen werden billig:** sobald ein Domänen-Corpus (z. B. Umwelt) existiert, erzeugt
|
||||
derselbe Generator erste Entwürfe — ISO 14001 wird ein Draft-+-Review-Problem, kein Schreibprojekt.
|
||||
- **Internes Werkzeug:** die wertvollste Maschine ist nicht nur das Kunden-OS, sondern die
|
||||
**Produktionsmaschine für das eigene regulatorische Wissen** — sie wird mit jeder Domäne wertvoller.
|
||||
- **Freeze-konform:** kein neues Metamodell, kein Graph, kein neuer Corpus. `compliance/knowledge_production`
|
||||
ist eine reine, deterministische Vorbereitung (computed-not-stored); Execution-Controls werden injiziert.
|
||||
- **Phase A (Wissensproduktion) VOR Phase B (neue Domänen):** Draft-Generatoren (Playbook ✓, dann
|
||||
Transition-Pattern, Reference-Szenario) + Review-Workflow + Versionierung + Freigabestatus, dann
|
||||
ISO 14001 / IATF 16949 / IEC 62443.
|
||||
- Diese ADR ist non-runtime → kein Deploy (siehe [ADR-001](ADR-001-runtime-deploy-policy.md)).
|
||||
Reference in New Issue
Block a user