Files
breakpilot-compliance/backend-compliance/compliance/knowledge_production/engine.py
T
Benjamin Admin b6cfc0a503 feat(knowledge-production): Playbook Draft Generator — prepare the corpus deterministically
The bottleneck is not content, it is knowledge PRODUCTION. Instead of writing 200 playbooks by
hand, generate drafts deterministically from data the software already owns, then have an expert
review them. Mirrors the legal pipeline (Gesetz -> Parser -> Obligation -> Review) for BreakPilot's
own knowledge: new Capability -> Registry -> Transition Pattern -> Playbook Draft Generator ->
Expert Review -> versioned Playbook.

- compliance/knowledge_production/: generate_playbook_draft(capability, requirement, control_links)
  + drafts_from_pattern(pattern) -> one PlaybookDraft per delta capability. Owned fields (why /
  closes_regulations / expected_evidence / typical_controls) are assembled with per-field provenance;
  the practitioner know-how (tools / process_steps / how_others) is left as an explicit TODO.
- DraftStatus lifecycle (Freigabestatus): draft_generated -> in_review -> reviewed -> validated ->
  proven. Deterministic, NO LLM in the core (any model enrichment stays offline/advisory/propose-only).
- ADR-005: extends "the engine does not change, the corpus grows" with "and the corpus is not written
  by hand — it is deterministically prepared, then curated".
- reference suite: "Knowledge Production" section turns the convergence pattern into 12 auto-assembled
  drafts (why/closes/evidence filled, tools/steps TODO) -> review 12 drafts, don't write 12 playbooks.

10 tests (50 with playbook/optimization/transition/company), mypy --strict clean, check-loc 0.
Product code with no app caller + ADR/reference = non-runtime -> no deploy (ADR-001). Freeze-safe.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
2026-06-27 13:31:31 +02:00

92 lines
3.8 KiB
Python

"""Knowledge Production — the Playbook Draft Generator (deterministic assembly + expert review).
Mirrors the legal pipeline (Gesetz -> Parser -> Obligation -> Review) for BreakPilot's OWN knowledge:
new Capability -> Registry -> Transition Pattern -> **Playbook Draft Generator** -> Expert Review ->
versioned Playbook. The generator does not WRITE playbooks — it STRUCTURES drafts from data the
software already owns (a transition/convergence pattern's delta requirement: why_asked, covers_targets,
expected_evidence) plus injected Execution controls. The practitioner know-how (tools / process steps /
how others do it) is left as an explicit TODO for the expert (or a separate offline-propose step).
Fully deterministic, NO LLM in the core (deterministic-first: any model enrichment is offline,
advisory, never in this assembly). No new corpus, no new meta-model class (freeze v1.0). Python 3.9.
"""
from __future__ import annotations
from typing import Any, Dict, List, Optional
from .schemas import DraftStatus, PlaybookDraft
_SOFT_FIELDS = ["tools", "process_steps", "how_others_do_it"] # practitioner know-how — expert/offline-propose
_DISCLAIMER = (
"Maschinell assemblierter ENTWURF aus vorhandenen Daten (Transition Pattern + Leverage + "
"injizierte Controls). KEINE normative Anforderung; erfordert fachliche Kuratierung (TODO-Felder) "
"und Statuswechsel draft_generated -> reviewed -> validated."
)
def generate_playbook_draft(
capability_id: str,
requirement: Optional[Dict[str, Any]] = None,
control_links: Optional[List[str]] = None,
) -> PlaybookDraft:
"""Assemble a playbook draft for ONE capability from a pattern delta requirement (deterministic).
`requirement`: a delta_requirement dict (why_asked / covers_targets / expected_evidence). Owned
fields are filled with provenance; soft fields are listed in `todo`. `control_links`: injected
Execution controls (default empty — no Execution data in the draft generator).
"""
req = requirement or {}
why = str(req.get("why_asked") or req.get("missing_because") or "")
closes = sorted({str(t) for t in req.get("covers_targets", [])})
evidence = [str(e) for e in req.get("expected_evidence", [])]
controls = list(control_links or [])
provenance: Dict[str, str] = {}
todo: List[str] = []
if why:
provenance["why"] = "transition_pattern:why_asked"
else:
todo.append("why")
if closes:
provenance["closes_regulations"] = "leverage:covers_targets"
if evidence:
provenance["expected_evidence"] = "transition_pattern:expected_evidence"
else:
todo.append("expected_evidence")
if controls:
provenance["typical_controls"] = "execution:control_links"
todo.extend(_SOFT_FIELDS) # always expert-owned
return PlaybookDraft(
capability_id=capability_id,
status=DraftStatus.DRAFT_GENERATED,
title=capability_id.replace("_", " "),
why=why,
closes_regulations=closes,
expected_evidence=evidence,
typical_controls=controls,
provenance=provenance,
todo=todo,
disclaimer=_DISCLAIMER,
)
def drafts_from_pattern(
pattern: Dict[str, Any],
control_links_by_cap: Optional[Dict[str, List[str]]] = None,
) -> List[PlaybookDraft]:
"""Assemble one playbook draft per delta capability of a transition/convergence pattern.
This is the "produce drafts, don't write them" tool: feed a pattern -> get a draft per missing
capability, ready for expert review. Deterministic + order-preserving (pattern order).
"""
links = control_links_by_cap or {}
drafts: List[PlaybookDraft] = []
for d in pattern.get("delta_requirements", []):
cap = d.get("capability")
if not cap:
continue
drafts.append(generate_playbook_draft(str(cap), d, links.get(str(cap))))
return drafts