Merge pull request 'feat: Knowledge Intake — classify + impact triage before extraction' (#20) from feat/knowledge-intake into main

2026-06-27 13:59:47 +02:00
parent d51bcd77c7 07e392913f
commit 0b0d262462
8 changed files with 419 additions and 2 deletions
@@ -0,0 +1,23 @@
 """Knowledge Intake — classify an incoming document and assess its impact on existing knowledge.
 The stage BEFORE the parser: no content extraction, only Einordnung. Intersects a document's signals
 (regulations + keywords) with an index of the existing knowledge to emit a `KnowledgePackage` — which
 capabilities / playbooks / patterns / reference scenarios / obligations it probably touches, whether
 it is a new domain, and how much review it warrants. Deterministic, no LLM, no new corpus (freeze v1.0).
 """
 from __future__ import annotations
 from .engine import assess_document_impact, build_knowledge_index
 from .schemas import (
    DocumentDescriptor, ImpactLevel, KnowledgeIndex, KnowledgePackage,
 )
 __all__ = [
    "build_knowledge_index",
    "assess_document_impact",
    "DocumentDescriptor",
    "KnowledgeIndex",
    "KnowledgePackage",
    "ImpactLevel",
 ]
@@ -0,0 +1,111 @@
 """Knowledge Intake — classify a document and assess its impact on existing knowledge.
 The real Knowledge Production is not writing — it is TARGETED UPDATING: when 20 documents arrive,
 which 5 actually change our knowledge and which 15 are ignorable? Intake answers this deterministically
 by intersecting a document's signals (declared regulations + keywords) with an index of the existing
 knowledge (capabilities, playbooks, transition patterns, reference scenarios, injected obligations).
 It performs NO content extraction (that is the later parser stage) and uses NO LLM.
 Pipeline: Knowledge Intake -> Knowledge Package -> Parser -> Draft Generator -> Review -> Published.
 Pure, deterministic, computed-not-stored. No new corpus/meta-model class (freeze v1.0). Python 3.9.
 """
 from __future__ import annotations
 from typing import Any, Dict, List, Optional, Set
 from .schemas import DocumentDescriptor, ImpactLevel, KnowledgeIndex, KnowledgePackage
 def _targets(goal_to: Any) -> List[str]:
    """Extract target regulations from a transition_goal.to (single dict OR list of targets)."""
    out: List[str] = []
    items = goal_to if isinstance(goal_to, list) else [goal_to]
    for it in items:
        if isinstance(it, dict):
            reg = it.get("regulation") or it.get("target") or it.get("framework")
            if reg:
                out.append(str(reg))
    return out
 def build_knowledge_index(
    patterns: List[Dict[str, Any]],
    playbooks: List[Dict[str, Any]],
    reference_scenarios: List[Dict[str, Any]],
    obligation_index: Optional[Dict[str, List[str]]] = None,
 ) -> KnowledgeIndex:
    """Assemble the matching index from already-loaded knowledge dicts (file I/O stays in the caller)."""
    tp: Dict[str, List[str]] = {}
    cap_regs: Dict[str, List[str]] = {}
    for p in patterns:
        pid = str(p.get("id", ""))
        targets = _targets(p.get("transition_goal", {}).get("to"))
        if pid:
            tp[pid] = targets
        for item in list(p.get("likely_covered", [])) + list(p.get("delta_requirements", [])):
            cap = item.get("capability")
            if not cap:
                continue
            regs = [str(t) for t in item.get("covers_targets", [])] or targets
            cap_regs.setdefault(str(cap), [])
            cap_regs[str(cap)] = sorted(set(cap_regs[str(cap)]) | set(regs))
    rts = {str(r.get("id", "")): _targets(r.get("transition_goal", {}).get("to")) for r in reference_scenarios}
    rts.pop("", None)
    obl = obligation_index or {}
    regulations = sorted(
        {t for ts in tp.values() for t in ts}
        | {t for ts in rts.values() for t in ts}
        | {t for ts in cap_regs.values() for t in ts}
        | set(obl.keys())
    )
    return KnowledgeIndex(
        regulations=regulations, capability_regulations=cap_regs,
        playbook_capabilities=sorted({str(pb.get("capability_id", "")) for pb in playbooks} - {""}),
        transition_patterns=tp, reference_scenarios=rts, obligation_index=dict(obl),
    )
 def _kw_match(keywords: Set[str], capability: str) -> bool:
    tokens = set(capability.lower().split("_"))
    return bool(keywords & tokens) or capability.lower() in keywords
 def assess_document_impact(descriptor: DocumentDescriptor, index: KnowledgeIndex) -> KnowledgePackage:
    """Classify the document and compute which existing knowledge it probably touches, and how much."""
    doc_regs = set(descriptor.regulations)
    known = set(index.regulations)
    unknown = sorted(doc_regs - known)
    new_domain = bool(doc_regs) and not (doc_regs & known)
    kw = {k.lower() for k in descriptor.keywords}
    caps = sorted(c for c, regs in index.capability_regulations.items() if (set(regs) & doc_regs) or _kw_match(kw, c))
    playbooks = sorted(set(caps) & set(index.playbook_capabilities))
    patterns = sorted(pid for pid, regs in index.transition_patterns.items() if set(regs) & doc_regs)
    scenarios = sorted(rid for rid, regs in index.reference_scenarios.items() if set(regs) & doc_regs)
    obligations = sorted({o for r in doc_regs for o in index.obligation_index.get(r, [])})
    total = len(caps) + len(playbooks) + len(patterns) + len(scenarios) + len(obligations)
    if new_domain:
        level, rec = ImpactLevel.NEW_DOMAIN, "Neue Domäne — Corpus-Intake nötig (kein bestehendes Wissen betroffen)."
    elif total == 0:
        level, rec = ImpactLevel.NONE, "Wahrscheinlich ignorierbar — betrifft keinen bekannten Wissensbaustein."
    elif len(caps) >= 3 or playbooks or len(obligations) >= 5:
        level, rec = ImpactLevel.HIGH, "Gezielter Review priorisieren — hoher Impact auf bestehendes Wissen."
    else:
        level, rec = ImpactLevel.LOW, "Gezielter Review — geringer, eingegrenzter Impact."
    summary = "Betrifft %d Capabilities, %d Playbooks, %d Patterns, %d Reference Scenarios, %d Obligations; %s." % (
        len(caps), len(playbooks), len(patterns), len(scenarios), len(obligations),
        "NEUE Domäne" if new_domain else "keine neue Domäne",
    )
    return KnowledgePackage(
        document_id=descriptor.document_id,
        classification={"regulations": sorted(doc_regs), "keywords": sorted(kw),
                        "document_type": [descriptor.document_type] if descriptor.document_type else []},
        new_domain=new_domain, unknown_regulations=unknown,
        affected_capabilities=caps, affected_playbooks=playbooks,
        affected_transition_patterns=patterns, affected_reference_scenarios=scenarios,
        affected_obligations=obligations, impact_level=level,
        impact_summary=summary, recommendation=rec,
    )
@@ -0,0 +1,62 @@
 """Schemas for Knowledge Intake — classify a new document and assess its IMPACT (no extraction yet).
 Before the parser/draft stages, Intake answers „welche Teile unseres Wissensbestands sind überhaupt
 betroffen?". It does NOT extract content — it only classifies the document and intersects its signals
 with an index of the existing knowledge (capabilities, playbooks, transition patterns, reference
 scenarios, injected obligations) to emit a `KnowledgePackage` (an impact analysis). Deterministic,
 computed-not-stored, no new corpus, no new meta-model class (freeze v1.0). Python 3.9 compatible.
 """
 from __future__ import annotations
 from enum import Enum
 from typing import Dict, List
 from pydantic import BaseModel, Field
 class ImpactLevel(str, Enum):
    NONE = "none"                # touches nothing known -> likely ignorable
    LOW = "low"                  # touches a little -> targeted review
    HIGH = "high"                # touches a lot -> prioritise review
    NEW_DOMAIN = "new_domain"    # references only unknown regulations -> domain intake
 class DocumentDescriptor(BaseModel):
    """Lightweight signals of an incoming document — NO content body, only classification inputs."""
    document_id: str
    title: str = ""
    source: str = ""                                      # e.g. BSI, ENISA, EU
    document_type: str = ""                               # e.g. guidance, faq, regulation, recommendation
    regulations: List[str] = Field(default_factory=list)  # declared regulations it references
    keywords: List[str] = Field(default_factory=list)     # lightweight topic signals (e.g. sbom)
    product_types: List[str] = Field(default_factory=list)
 class KnowledgeIndex(BaseModel):
    """A deterministic index of the EXISTING knowledge to match an incoming document against."""
    regulations: List[str] = Field(default_factory=list)               # all regulations the corpus knows
    capability_regulations: Dict[str, List[str]] = Field(default_factory=dict)   # capability -> covers_targets
    playbook_capabilities: List[str] = Field(default_factory=list)     # capabilities that HAVE a playbook
    transition_patterns: Dict[str, List[str]] = Field(default_factory=dict)      # pattern_id -> target regulations
    reference_scenarios: Dict[str, List[str]] = Field(default_factory=dict)      # rts_id -> regulations
    obligation_index: Dict[str, List[str]] = Field(default_factory=dict)         # regulation -> obligation ids (INJECTED)
 class KnowledgePackage(BaseModel):
    """The impact analysis for one document — what of our knowledge it probably touches, and how much."""
    document_id: str
    classification: Dict[str, List[str]] = Field(default_factory=dict)   # echoed regulations/keywords/types
    new_domain: bool = False
    unknown_regulations: List[str] = Field(default_factory=list)
    affected_capabilities: List[str] = Field(default_factory=list)
    affected_playbooks: List[str] = Field(default_factory=list)
    affected_transition_patterns: List[str] = Field(default_factory=list)
    affected_reference_scenarios: List[str] = Field(default_factory=list)
    affected_obligations: List[str] = Field(default_factory=list)
    impact_level: ImpactLevel = ImpactLevel.NONE
    impact_summary: str = ""
    recommendation: str = ""
@@ -59,3 +59,48 @@ def unsupported_block(rmap) -> None:
 def interp_status(verdict_value: str) -> str:
    return "PARTIAL" if verdict_value in ("uncertain", "unsupported") else "PASS"
 def knowledge_intake_section(base_dir) -> None:
    """Render the Knowledge Intake section (kept here so generate.py stays under the LOC budget)."""
    import os
    import yaml
    from compliance.knowledge_intake import (
        DocumentDescriptor, assess_document_impact, build_knowledge_index,
    )
    def _load(sub):
        d = os.path.join(base_dir, "..", "knowledge", sub)
        return [yaml.safe_load(open(os.path.join(d, f), encoding="utf-8"))
                for f in sorted(os.listdir(d)) if f.endswith(".yaml")]
    idx = build_knowledge_index(
        _load("transition_patterns"), _load("implementation_playbooks"),
        _load("reference_transition_scenarios"), obligation_index={"CRA": ["cra_obl_1", "cra_obl_2"]})
    docs = [
        DocumentDescriptor(document_id="ENISA CRA SBOM-FAQ", regulations=["CRA"], keywords=["sbom", "vulnerability"], document_type="faq"),
        DocumentDescriptor(document_id="EU Umwelt-Leitfaden", regulations=["UmweltVO"], keywords=["wastewater"], document_type="guidance"),
        DocumentDescriptor(document_id="Marketing-Blog", keywords=["newsletter"], document_type="blog"),
    ]
    w("## Knowledge Intake — Impact zuerst, Extraktion später")
    w("")
    w('_Vor dem Parser: ein neues Dokument NUR einordnen und seinen Impact auf den bestehenden Wissensbestand bestimmen. „Von N Dokumenten verändern wenige tatsächlich unser Wissen." Deterministisch, keine Extraktion, kein LLM._')
    w("")
    w("| Dokument | Impact | betrifft | Empfehlung |")
    w("|---|---|---|---|")
    for d in docs:
        kp = assess_document_impact(d, idx)
        touch = "neue Domäne" if kp.new_domain else "%dC·%dPB·%dRTS·%dObl" % (
            len(kp.affected_capabilities), len(kp.affected_playbooks),
            len(kp.affected_reference_scenarios), len(kp.affected_obligations))
        w("| %s | **%s** | %s | %s |" % (d.document_id, kp.impact_level.value, touch, kp.recommendation.split(" —")[0]))
    w("")
    w("**Beispiel-Knowledge-Package** (`%s`): %s" % (docs[0].document_id, assess_document_impact(docs[0], idx).impact_summary))
    w("")
    w('_So entsteht bei jedem neuen Dokument eine Impact-Analyse statt „200 Seiten PDF" — Targeted Updating statt Schreiben._')
    w("")
    coverage_table([
        ("Knowledge Intake (Klassifikation+Impact)", "PASS", "%d Regelwerke / %d Capabilities im Index" % (len(idx.regulations), len(idx.capability_regulations))),
        ("Impact-Triage (HIGH/LOW/NONE/new_domain)", "PASS", "3 Beispiel-Dokumente korrekt eingeordnet"),
        ("Regelwerk-ID-Normalisierung", "TODO", "CRA vs Cyber Resilience Act vereinheitlichen"),
    ])
@@ -46,6 +46,7 @@ import yaml
 from _helpers import (  # noqa: E402  (script-dir module; keeps generate.py under the LOC budget)
    OUT, ROLLUP, Row, w, coverage_table, reg_map_block, unsupported_block, interp_status,
    knowledge_intake_section,
 )
 ISO_MAP = {"ISO27001": CapabilityMappingEntry(
@@ -463,6 +464,8 @@ coverage_table([
    ("Draft-Generatoren neue Domänen (Phase A)", "TODO", "Transition-/Reference-Scenario-Drafts"),
 ])
 knowledge_intake_section(os.path.dirname(__file__))   # Knowledge Intake (impact triage) — kept in _helpers for LOC
 # ── Epics + roll-up ───────────────────────────────────────────────────────
 w("## Gaps → Epics (Backlog — nur erfasst, NICHT implementiert)")
 w("")
@@ -318,6 +318,28 @@ _So reviewt der Experte 12 Entwürfe statt 12 Playbooks zu schreiben. Derselbe G
 | Provenance + TODO + Freigabestatus | **PASS** | draft_generated→reviewed→validated→proven |
 | Draft-Generatoren neue Domänen (Phase A) | **TODO** | Transition-/Reference-Scenario-Drafts |
 ## Knowledge Intake — Impact zuerst, Extraktion später
 _Vor dem Parser: ein neues Dokument NUR einordnen und seinen Impact auf den bestehenden Wissensbestand bestimmen. „Von N Dokumenten verändern wenige tatsächlich unser Wissen." Deterministisch, keine Extraktion, kein LLM._
 | Dokument | Impact | betrifft | Empfehlung |
 |---|---|---|---|
 | ENISA CRA SBOM-FAQ | **high** | 14C·2PB·3RTS·2Obl | Gezielter Review priorisieren |
 | EU Umwelt-Leitfaden | **new_domain** | neue Domäne | Neue Domäne |
 | Marketing-Blog | **none** | 0C·0PB·0RTS·0Obl | Wahrscheinlich ignorierbar |
 **Beispiel-Knowledge-Package** (`ENISA CRA SBOM-FAQ`): Betrifft 14 Capabilities, 2 Playbooks, 0 Patterns, 3 Reference Scenarios, 2 Obligations; keine neue Domäne.
 _So entsteht bei jedem neuen Dokument eine Impact-Analyse statt „200 Seiten PDF" — Targeted Updating statt Schreiben._
 **Architecture Coverage**
 | Layer | Status | Hinweis |
 |---|---|---|
 | Knowledge Intake (Klassifikation+Impact) | **PASS** | 6 Regelwerke / 32 Capabilities im Index |
 | Impact-Triage (HIGH/LOW/NONE/new_domain) | **PASS** | 3 Beispiel-Dokumente korrekt eingeordnet |
 | Regelwerk-ID-Normalisierung | **TODO** | CRA vs Cyber Resilience Act vereinheitlichen |
 ## Gaps → Epics (Backlog — nur erfasst, NICHT implementiert)
 | Epic | Titel | schliesst Coverage-Luecke |
@@ -329,6 +351,6 @@ _So reviewt der Experte 12 Entwürfe statt 12 Playbooks zu schreiben. Derselbe G
 ## Suite-Status (Roll-up)
- Coverage-Zellen gesamt: **38**
+- Coverage-Zellen gesamt: **41**
- PASS: **28** · PARTIAL: 3 · UNSUPPORTED: 1 · TODO: 5 · N/A: 1 · NEEDS_FACTS: 0
+- PASS: **30** · PARTIAL: 3 · UNSUPPORTED: 1 · TODO: 6 · N/A: 1 · NEEDS_FACTS: 0
 - Fortschritt = PASS-Anteil steigt, wenn Epics RS-001…004 landen (objektiver Maßstab, kein LOC).
@@ -0,0 +1,97 @@
 """Tests for Knowledge Intake — classify a document and assess its impact (no extraction, no LLM).
 Acceptance: build a deterministic index from existing knowledge; for an incoming document, surface
 which capabilities / playbooks / patterns / reference scenarios / obligations it probably touches,
 whether it is a new domain, and triage it (HIGH / LOW / NONE / NEW_DOMAIN). The point: of N documents,
 which few actually change our knowledge.
 """
 from __future__ import annotations
 from compliance.knowledge_intake import (
    DocumentDescriptor, ImpactLevel, KnowledgeIndex,
    assess_document_impact, build_knowledge_index,
 )
 PATTERNS = [
    {"id": "TP-A", "transition_goal": {"to": {"regulation": "CRA"}},
     "delta_requirements": [{"capability": "sbom_creation", "covers_targets": ["CRA"]},
                            {"capability": "coordinated_vulnerability_disclosure", "covers_targets": ["CRA"]}]},
    {"id": "TP-B", "transition_goal": {"to": [{"regulation": "CRA"}, {"regulation": "MaschinenVO"}]},
     "delta_requirements": [{"capability": "machine_guards", "covers_targets": ["MaschinenVO"]}]},
 ]
 PLAYBOOKS = [{"capability_id": "sbom_creation"}]
 RTS = [{"id": "RTS-1", "transition_goal": {"to": [{"target": "CRA"}]}}]
 def _index():
    return build_knowledge_index(PATTERNS, PLAYBOOKS, RTS, obligation_index={"CRA": ["o1", "o2"]})
 def test_build_index_extracts_regs_caps_playbooks():
    idx = _index()
    assert "CRA" in idx.regulations and "MaschinenVO" in idx.regulations
    assert idx.capability_regulations["sbom_creation"] == ["CRA"]
    assert idx.playbook_capabilities == ["sbom_creation"]
    assert idx.transition_patterns["TP-B"] == ["CRA", "MaschinenVO"]   # list-form to[] handled
    assert idx.reference_scenarios["RTS-1"] == ["CRA"]                 # target key handled
 def test_affected_by_regulation():
    kp = assess_document_impact(DocumentDescriptor(document_id="d", regulations=["CRA"]), _index())
    assert "sbom_creation" in kp.affected_capabilities
    assert "TP-A" in kp.affected_transition_patterns and "RTS-1" in kp.affected_reference_scenarios
    assert kp.affected_obligations == ["o1", "o2"]
    assert not kp.new_domain
 def test_affected_by_keyword_even_without_regulation():
    # a doc with no declared regulation but keyword 'sbom' still finds the capability
    kp = assess_document_impact(DocumentDescriptor(document_id="d", keywords=["sbom"]), _index())
    assert "sbom_creation" in kp.affected_capabilities
    assert "sbom_creation" in kp.affected_playbooks
 def test_playbooks_are_affected_caps_with_a_playbook():
    kp = assess_document_impact(DocumentDescriptor(document_id="d", regulations=["CRA"]), _index())
    assert kp.affected_playbooks == ["sbom_creation"]                 # cvd has no playbook here
 def test_new_domain_when_only_unknown_regulations():
    kp = assess_document_impact(DocumentDescriptor(document_id="d", regulations=["UmweltVO"]), _index())
    assert kp.new_domain and kp.impact_level == ImpactLevel.NEW_DOMAIN
    assert kp.unknown_regulations == ["UmweltVO"]
    assert kp.affected_capabilities == []
 def test_none_when_nothing_matches():
    kp = assess_document_impact(DocumentDescriptor(document_id="d", keywords=["newsletter"]), _index())
    assert kp.impact_level == ImpactLevel.NONE and not kp.new_domain
    assert "ignorierbar" in kp.recommendation
 def test_high_impact_triage():
    kp = assess_document_impact(DocumentDescriptor(document_id="d", regulations=["CRA"]), _index())
    # >= 3 affected caps OR a playbook -> HIGH
    assert kp.impact_level == ImpactLevel.HIGH
    assert "priorisieren" in kp.recommendation
 def test_low_impact_when_small_and_no_playbook():
    idx = KnowledgeIndex(regulations=["CRA"], capability_regulations={"x": ["CRA"]}, playbook_capabilities=[])
    kp = assess_document_impact(DocumentDescriptor(document_id="d", regulations=["CRA"]), idx)
    assert kp.impact_level == ImpactLevel.LOW and kp.affected_capabilities == ["x"]
 def test_classification_echoed():
    kp = assess_document_impact(DocumentDescriptor(document_id="d", regulations=["CRA"], keywords=["SBOM"], document_type="faq"), _index())
    assert kp.classification["regulations"] == ["CRA"]
    assert kp.classification["keywords"] == ["sbom"] and kp.classification["document_type"] == ["faq"]
 def test_deterministic():
    idx = _index()
    d = DocumentDescriptor(document_id="d", regulations=["CRA"], keywords=["sbom"])
    a = assess_document_impact(d, idx)
    b = assess_document_impact(d, idx)
    assert a.model_dump() == b.model_dump()
@@ -0,0 +1,54 @@
 # ADR-006: Knowledge Intake — classify and assess impact before extraction
 - **Status:** Accepted
 - **Datum:** 2026-06-27
 - **Typ:** Architektur-Entscheidung
 - **Bezug:** [ADR-005](ADR-005-knowledge-production-pipeline.md), [ADR-002](ADR-002-transition-is-data-not-architecture.md), Architektur-Freeze v1.0, [[transition-reasoning]], [[iace-quality-architecture]]
 ## Kontext
 Vier Produktionspipelines folgen demselben Muster `Rohwissen → deterministischer Entwurf →
 Expertenreview → veröffentlichter Wissensbaustein` (Obligations, Capabilities, Playbook-Drafts,
 Reference-Szenarien). Aber **woher kommt das Rohwissen, und wie verarbeitet man es effizient?**
 Heute beginnt die Pipeline beim Parser (`PDF → Parser → Review`). Damit startet man bei JEDEM neuen
 Dokument wieder bei Null. Der eigentliche Aufwand der Wissensproduktion ist nicht das Schreiben,
 sondern das **gezielte Aktualisieren**: wenn morgen 20 Dokumente erscheinen — welche 15 kann man
 ignorieren und welche 5 verändern tatsächlich den Wissensbestand?
 ## Entscheidung
 1. **Vor dem Parser steht eine neue Stufe: Knowledge Intake.** Sie extrahiert KEINEN Inhalt, sondern
   ordnet ein neues Dokument nur ein (Klassifikation) und bestimmt seinen **Impact** auf den
   bestehenden Wissensbestand.
 2. **Output ist ein `KnowledgePackage` (Impact-Analyse), kein Inhalt:** welche bestehenden
   Capabilities / Playbooks / Transition Patterns / Reference-Szenarien / (injizierten) Obligations
   das Dokument wahrscheinlich betrifft, ob es eine **neue Domäne** ist, und ein Triage-Level
   (`HIGH / LOW / NONE / NEW_DOMAIN`) mit Empfehlung.
 3. **Deterministisch, kein LLM.** Intake schneidet die Dokument-Signale (deklarierte Regelwerke +
   Stichworte) gegen einen Index des vorhandenen Wissens. Optionale Modell-Anreicherung bleibt
   offline/advisory (vgl. [[iace-quality-architecture]]).
 4. **Vollständige Wissensfabrik:**
   `Knowledge Intake → Knowledge Package → Parser → Draft Generator → Expert Review →
   Published Knowledge → Reference Suite`.
 ## Konsequenzen
 - **Targeted Updating statt Schreiben:** statt „hier sind 200 Seiten PDF" liefert das System eine
  Impact-Analyse („betrifft 4 Capabilities, 2 Playbooks, RTS-003; keine neue Domäne"). Das spart
  enorm viel Review-Aufwand und ist die eigentliche Knowledge Production.
 - **Neue Quellen werden automatisch eingeordnet:** CRA-FAQ, MaschinenVO-Guidance, ENISA-Empfehlung,
  BSI-Orientierungshilfe → je eine Impact-Analyse statt Rohtext.
 - **Geänderte Phasen-Reihenfolge:** **A1 Knowledge Intake** (klassifizieren + Impact + Knowledge
  Package) → **A2 Draft Production** (Transition Patterns / Playbooks / Reference-Szenarien) →
  **A3 Expert Review** (Review / Versionierung / Veröffentlichung). Erst danach Phase B (neue Domänen).
 - **Freeze-konform:** kein neues Metamodell, kein Graph, kein neuer Corpus. `compliance/knowledge_intake`
  ist eine reine, deterministische Sicht (computed-not-stored); Obligations werden injiziert. Bekannte
  Verfeinerung: Regelwerk-ID-Normalisierung (CRA ↔ Cyber Resilience Act) — vom Intake ehrlich sichtbar.
 - **Strategische Bedeutung:** die Plattform wird von einem Compliance-Produkt zu einer **kontinuierlich
  lernenden regulatorischen Wissensbasis** — und Intake ist der Filter, der bestimmt, was überhaupt
  Arbeit auslöst.
 - Diese ADR ist non-runtime → kein Deploy (siehe [ADR-001](ADR-001-runtime-deploy-policy.md)).