""" DB-backed Action & Object Ontology Registry with in-memory cache. Replaces hardcoded ACTION_TYPES, _NEGATIVE_PATTERNS, _ACTION_SYNONYMS, and _OBJECT_SYNONYMS with PostgreSQL tables. Cache TTL: 5 minutes. Thread-safe via simple timestamp check. Falls back to hardcoded dicts if DB is unavailable. """ import logging import re import time from typing import Optional from sqlalchemy import text from sqlalchemy.exc import SQLAlchemyError from db.session import SessionLocal logger = logging.getLogger(__name__) _CACHE_TTL_SECONDS = 300 # 5 minutes class OntologyRegistry: """In-memory cache of action_types, action_synonyms, and object_synonyms.""" def __init__(self): # Action types: canonical_name → phase self._action_phases: dict[str, str] = {} # Alias → canonical action (for classify_action) self._alias_to_action: dict[str, str] = {} # Negative patterns: [(pattern, action_type)] ordered longest first self._negative_patterns: list[tuple[str, str]] = [] # Action synonyms for dedup: synonym → canonical (for normalize_action) self._action_synonyms: dict[str, str] = {} # Object synonyms: synonym → canonical_token (for normalize_object) self._object_synonyms: dict[str, str] = {} # Sorted object keys (longest first) for substring matching self._object_keys_sorted: list[str] = [] self._loaded_at: float = 0.0 def _is_stale(self) -> bool: return (time.monotonic() - self._loaded_at) > _CACHE_TTL_SECONDS def _load(self) -> bool: """Load all ontology data from DB into memory.""" try: db = SessionLocal() try: return self._load_from_db(db) finally: db.close() except SQLAlchemyError: logger.warning( "Failed to load ontology from DB — using stale cache", exc_info=True, ) return False def _load_from_db(self, db) -> bool: """Load from DB session.""" # 1. Action types rows = db.execute(text( "SELECT canonical_name, phase FROM action_types" )).fetchall() action_phases = {r[0]: r[1] for r in rows} # 2. Action synonyms (aliases + negative patterns) rows = db.execute(text( "SELECT canonical_action, synonym, pattern_type FROM action_synonyms" )).fetchall() alias_to_action: dict[str, str] = {} negative_patterns: list[tuple[str, str]] = [] action_synonyms: dict[str, str] = {} for canonical, synonym, ptype in rows: if ptype == "negative_pattern": negative_patterns.append((synonym, canonical)) else: alias_to_action[synonym] = canonical action_synonyms[synonym] = canonical # Sort negative patterns: longest first (for priority matching) negative_patterns.sort(key=lambda x: -len(x[0])) # 3. Object synonyms rows = db.execute(text( "SELECT canonical_token, synonym FROM object_synonyms" )).fetchall() object_synonyms = {r[1]: r[0] for r in rows} object_keys_sorted = sorted(object_synonyms.keys(), key=len, reverse=True) # Commit to cache self._action_phases = action_phases self._alias_to_action = alias_to_action self._negative_patterns = negative_patterns self._action_synonyms = action_synonyms self._object_synonyms = object_synonyms self._object_keys_sorted = object_keys_sorted self._loaded_at = time.monotonic() logger.info( "Ontology loaded: %d action_types, %d aliases, %d neg_patterns, %d object_synonyms", len(action_phases), len(alias_to_action), len(negative_patterns), len(object_synonyms), ) return True @property def is_loaded(self) -> bool: """True if the cache has any data.""" return len(self._action_phases) > 0 def _ensure_loaded(self) -> None: if self._is_stale(): self._load() if not self.is_loaded: raise RuntimeError("OntologyRegistry has no data") # ── Action Classification (replaces control_ontology.classify_action) ── def classify_action(self, text_input: str) -> str: """Classify text into a canonical action_type.""" self._ensure_loaded() text_lower = text_input.lower().strip() # Check negative patterns first for pattern, action_type in self._negative_patterns: if pattern in text_lower: return action_type # Direct alias match if text_lower in self._alias_to_action: return self._alias_to_action[text_lower] # Substring match (longest first) best_match = "" best_action = "implement" for alias, action_type in sorted( self._alias_to_action.items(), key=lambda x: -len(x[0]) ): if alias in text_lower and len(alias) > len(best_match): best_match = alias best_action = action_type return best_action def get_phase(self, action_type: str) -> str: """Get the control_phase for an action_type.""" self._ensure_loaded() return self._action_phases.get(action_type, "implementation") # ── Action Normalization (replaces control_dedup.normalize_action) ── def normalize_action(self, action: str) -> str: """Normalize an action verb to a canonical English form.""" self._ensure_loaded() if not action: return "" action = action.strip().lower() action_base = re.sub(r"(en|t|st|e|te|tet|end)$", "", action) if action in self._action_synonyms: return self._action_synonyms[action] if action_base in self._action_synonyms: return self._action_synonyms[action_base] for verb, canonical in self._action_synonyms.items(): if action.startswith(verb) or verb.startswith(action): return canonical return action # ── Object Normalization (replaces control_dedup.normalize_object) ── def normalize_object(self, obj: str) -> str: """Normalize an object to a canonical token.""" self._ensure_loaded() if not obj: return "" obj_lower = obj.strip().lower() # Exact match if obj_lower in self._object_synonyms: return self._object_synonyms[obj_lower] # Substring match (longest phrase first) for phrase in self._object_keys_sorted: if phrase in obj_lower: return self._object_synonyms[phrase] return obj_lower def get_action_types(self) -> dict[str, str]: """Return all action_type → phase mappings.""" self._ensure_loaded() return dict(self._action_phases) def get_object_synonyms(self) -> dict[str, str]: """Return all object synonym → canonical mappings.""" self._ensure_loaded() return dict(self._object_synonyms) # Module-level singleton _registry: Optional[OntologyRegistry] = None def get_ontology_registry() -> OntologyRegistry: """Get or create the singleton OntologyRegistry instance.""" global _registry if _registry is None: _registry = OntologyRegistry() return _registry