feat(knowledge-production): Playbook Draft Generator — prepare the corpus deterministically

The bottleneck is not content, it is knowledge PRODUCTION. Instead of writing 200 playbooks by hand, generate drafts deterministically from data the software already owns, then have an expert review them. Mirrors the legal pipeline (Gesetz -> Parser -> Obligation -> Review) for BreakPilot's own knowledge: new Capability -> Registry -> Transition Pattern -> Playbook Draft Generator -> Expert Review -> versioned Playbook. - compliance/knowledge_production/: generate_playbook_draft(capability, requirement, control_links) + drafts_from_pattern(pattern) -> one PlaybookDraft per delta capability. Owned fields (why / closes_regulations / expected_evidence / typical_controls) are assembled with per-field provenance; the practitioner know-how (tools / process_steps / how_others) is left as an explicit TODO. - DraftStatus lifecycle (Freigabestatus): draft_generated -> in_review -> reviewed -> validated -> proven. Deterministic, NO LLM in the core (any model enrichment stays offline/advisory/propose-only). - ADR-005: extends "the engine does not change, the corpus grows" with "and the corpus is not written by hand — it is deterministically prepared, then curated". - reference suite: "Knowledge Production" section turns the convergence pattern into 12 auto-assembled drafts (why/closes/evidence filled, tools/steps TODO) -> review 12 drafts, don't write 12 playbooks. 10 tests (50 with playbook/optimization/transition/company), mypy --strict clean, check-loc 0. Product code with no app caller + ADR/reference = non-runtime -> no deploy (ADR-001). Freeze-safe. Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
2026-06-27 13:31:31 +02:00
parent 1e1689f1f2
commit b6cfc0a503
7 changed files with 349 additions and 2 deletions
@@ -0,0 +1,19 @@
+"""Knowledge Production — deterministically prepare the corpus, then curate it.
+
+The corpus is not written by hand: the Playbook Draft Generator structures drafts from data the
+software already owns (Transition Pattern + leverage + injected Execution controls), leaving the
+practitioner know-how as TODO for expert review. Mirrors the legal pipeline (Parser -> Review).
+Deterministic, no LLM in core, no new corpus, no new meta-model class (freeze v1.0).
+"""
+
+from __future__ import annotations
+
+from .engine import drafts_from_pattern, generate_playbook_draft
+from .schemas import DraftStatus, PlaybookDraft
+
+__all__ = [
+    "generate_playbook_draft",
+    "drafts_from_pattern",
+    "PlaybookDraft",
+    "DraftStatus",
+]
@@ -0,0 +1,91 @@
+"""Knowledge Production — the Playbook Draft Generator (deterministic assembly + expert review).
+
+Mirrors the legal pipeline (Gesetz -> Parser -> Obligation -> Review) for BreakPilot's OWN knowledge:
+new Capability -> Registry -> Transition Pattern -> **Playbook Draft Generator** -> Expert Review ->
+versioned Playbook. The generator does not WRITE playbooks — it STRUCTURES drafts from data the
+software already owns (a transition/convergence pattern's delta requirement: why_asked, covers_targets,
+expected_evidence) plus injected Execution controls. The practitioner know-how (tools / process steps /
+how others do it) is left as an explicit TODO for the expert (or a separate offline-propose step).
+
+Fully deterministic, NO LLM in the core (deterministic-first: any model enrichment is offline,
+advisory, never in this assembly). No new corpus, no new meta-model class (freeze v1.0). Python 3.9.
+"""
+
+from __future__ import annotations
+
+from typing import Any, Dict, List, Optional
+
+from .schemas import DraftStatus, PlaybookDraft
+
+_SOFT_FIELDS = ["tools", "process_steps", "how_others_do_it"]   # practitioner know-how — expert/offline-propose
+_DISCLAIMER = (
+    "Maschinell assemblierter ENTWURF aus vorhandenen Daten (Transition Pattern + Leverage + "
+    "injizierte Controls). KEINE normative Anforderung; erfordert fachliche Kuratierung (TODO-Felder) "
+    "und Statuswechsel draft_generated -> reviewed -> validated."
+)
+
+
+def generate_playbook_draft(
+    capability_id: str,
+    requirement: Optional[Dict[str, Any]] = None,
+    control_links: Optional[List[str]] = None,
+) -> PlaybookDraft:
+    """Assemble a playbook draft for ONE capability from a pattern delta requirement (deterministic).
+
+    `requirement`: a delta_requirement dict (why_asked / covers_targets / expected_evidence). Owned
+    fields are filled with provenance; soft fields are listed in `todo`. `control_links`: injected
+    Execution controls (default empty — no Execution data in the draft generator).
+    """
+    req = requirement or {}
+    why = str(req.get("why_asked") or req.get("missing_because") or "")
+    closes = sorted({str(t) for t in req.get("covers_targets", [])})
+    evidence = [str(e) for e in req.get("expected_evidence", [])]
+    controls = list(control_links or [])
+
+    provenance: Dict[str, str] = {}
+    todo: List[str] = []
+    if why:
+        provenance["why"] = "transition_pattern:why_asked"
+    else:
+        todo.append("why")
+    if closes:
+        provenance["closes_regulations"] = "leverage:covers_targets"
+    if evidence:
+        provenance["expected_evidence"] = "transition_pattern:expected_evidence"
+    else:
+        todo.append("expected_evidence")
+    if controls:
+        provenance["typical_controls"] = "execution:control_links"
+    todo.extend(_SOFT_FIELDS)   # always expert-owned
+
+    return PlaybookDraft(
+        capability_id=capability_id,
+        status=DraftStatus.DRAFT_GENERATED,
+        title=capability_id.replace("_", " "),
+        why=why,
+        closes_regulations=closes,
+        expected_evidence=evidence,
+        typical_controls=controls,
+        provenance=provenance,
+        todo=todo,
+        disclaimer=_DISCLAIMER,
+    )
+
+
+def drafts_from_pattern(
+    pattern: Dict[str, Any],
+    control_links_by_cap: Optional[Dict[str, List[str]]] = None,
+) -> List[PlaybookDraft]:
+    """Assemble one playbook draft per delta capability of a transition/convergence pattern.
+
+    This is the "produce drafts, don't write them" tool: feed a pattern -> get a draft per missing
+    capability, ready for expert review. Deterministic + order-preserving (pattern order).
+    """
+    links = control_links_by_cap or {}
+    drafts: List[PlaybookDraft] = []
+    for d in pattern.get("delta_requirements", []):
+        cap = d.get("capability")
+        if not cap:
+            continue
+        drafts.append(generate_playbook_draft(str(cap), d, links.get(str(cap))))
+    return drafts
@@ -0,0 +1,46 @@
+"""Schemas for Knowledge Production — deterministic draft assembly + lifecycle.
+
+The corpus is no longer written by hand: it is deterministically PREPARED from data the software
+already owns (Capability, Transition Pattern, Controls, Evidence, leverage), then curated by an
+expert. A `PlaybookDraft` is a machine-assembled skeleton with per-field provenance and an explicit
+TODO list of what still needs human (or offline-propose) input. No LLM in the deterministic core.
+Python 3.9 compatible (no `|` unions).
+"""
+
+from __future__ import annotations
+
+from enum import Enum
+from typing import Dict, List
+
+from pydantic import BaseModel, Field
+
+
+class DraftStatus(str, Enum):
+    """Freigabestatus — the knowledge lifecycle from machine draft to proven (mirrors the
+    transition-pattern / playbook maturity, with a machine-assembled pre-stage)."""
+
+    DRAFT_GENERATED = "draft_generated"   # machine-assembled, NOT yet expert-touched
+    IN_REVIEW = "in_review"               # an expert is curating it
+    REVIEWED = "reviewed"                 # internally reviewed
+    VALIDATED = "validated"               # domain expert confirmed
+    PROVEN = "proven"                     # confirmed in the field
+
+
+class PlaybookDraft(BaseModel):
+    """A deterministically assembled playbook draft for one capability.
+
+    Owned fields (why / closes_regulations / expected_evidence / typical_controls) are filled from
+    existing data with provenance; the practitioner know-how (tools / process_steps / how_others)
+    is left as TODO. The expert reviews a draft instead of writing from a blank page.
+    """
+
+    capability_id: str
+    status: DraftStatus = DraftStatus.DRAFT_GENERATED
+    title: str = ""
+    why: str = ""                                          # from the transition pattern (why_asked/missing_because)
+    closes_regulations: List[str] = Field(default_factory=list)   # from leverage (covers_targets)
+    expected_evidence: List[str] = Field(default_factory=list)    # from the transition pattern
+    typical_controls: List[str] = Field(default_factory=list)     # injected from Execution (may be empty)
+    provenance: Dict[str, str] = Field(default_factory=dict)      # field -> source it was assembled from
+    todo: List[str] = Field(default_factory=list)          # fields the expert/offline-propose must still add
+    disclaimer: str = ""                                   # machine draft, requires expert curation