"""Schemas for Knowledge Intake — classify a new document and assess its IMPACT (no extraction yet). Before the parser/draft stages, Intake answers „welche Teile unseres Wissensbestands sind überhaupt betroffen?". It does NOT extract content — it only classifies the document and intersects its signals with an index of the existing knowledge (capabilities, playbooks, transition patterns, reference scenarios, injected obligations) to emit a `KnowledgePackage` (an impact analysis). Deterministic, computed-not-stored, no new corpus, no new meta-model class (freeze v1.0). Python 3.9 compatible. """ from __future__ import annotations from enum import Enum from typing import Dict, List from pydantic import BaseModel, Field class ImpactLevel(str, Enum): NONE = "none" # touches nothing known -> likely ignorable LOW = "low" # touches a little -> targeted review HIGH = "high" # touches a lot -> prioritise review NEW_DOMAIN = "new_domain" # references only unknown regulations -> domain intake class DocumentDescriptor(BaseModel): """Lightweight signals of an incoming document — NO content body, only classification inputs.""" document_id: str title: str = "" source: str = "" # e.g. BSI, ENISA, EU document_type: str = "" # e.g. guidance, faq, regulation, recommendation regulations: List[str] = Field(default_factory=list) # declared regulations it references keywords: List[str] = Field(default_factory=list) # lightweight topic signals (e.g. sbom) product_types: List[str] = Field(default_factory=list) class KnowledgeIndex(BaseModel): """A deterministic index of the EXISTING knowledge to match an incoming document against.""" regulations: List[str] = Field(default_factory=list) # all regulations the corpus knows capability_regulations: Dict[str, List[str]] = Field(default_factory=dict) # capability -> covers_targets playbook_capabilities: List[str] = Field(default_factory=list) # capabilities that HAVE a playbook transition_patterns: Dict[str, List[str]] = Field(default_factory=dict) # pattern_id -> target regulations reference_scenarios: Dict[str, List[str]] = Field(default_factory=dict) # rts_id -> regulations obligation_index: Dict[str, List[str]] = Field(default_factory=dict) # regulation -> obligation ids (INJECTED) class KnowledgePackage(BaseModel): """The impact analysis for one document — what of our knowledge it probably touches, and how much.""" document_id: str classification: Dict[str, List[str]] = Field(default_factory=dict) # echoed regulations/keywords/types new_domain: bool = False unknown_regulations: List[str] = Field(default_factory=list) affected_capabilities: List[str] = Field(default_factory=list) affected_playbooks: List[str] = Field(default_factory=list) affected_transition_patterns: List[str] = Field(default_factory=list) affected_reference_scenarios: List[str] = Field(default_factory=list) affected_obligations: List[str] = Field(default_factory=list) impact_level: ImpactLevel = ImpactLevel.NONE impact_summary: str = "" recommendation: str = ""