""" Tool Gateway Service. Bietet sichere Schnittstelle zu externen Tools wie Tavily Web Search. Alle Anfragen werden vor dem Versand auf PII geprüft und redaktiert. """ import os import httpx import logging from dataclasses import dataclass, field from typing import Optional, Any from enum import Enum from .pii_detector import PIIDetector, get_pii_detector, RedactionResult logger = logging.getLogger(__name__) class SearchDepth(str, Enum): """Suchtiefe für Tavily.""" BASIC = "basic" ADVANCED = "advanced" @dataclass class SearchResult: """Ein einzelnes Suchergebnis.""" title: str url: str content: str score: float = 0.0 published_date: Optional[str] = None @dataclass class SearchResponse: """Antwort einer Suche.""" query: str redacted_query: Optional[str] = None results: list[SearchResult] = field(default_factory=list) answer: Optional[str] = None pii_detected: bool = False pii_types: list[str] = field(default_factory=list) response_time_ms: int = 0 @dataclass class ToolGatewayConfig: """Konfiguration für den Tool Gateway.""" tavily_api_key: Optional[str] = None tavily_base_url: str = "https://api.tavily.com" timeout: int = 30 max_results: int = 5 search_depth: SearchDepth = SearchDepth.BASIC include_answer: bool = True include_images: bool = False pii_redaction_enabled: bool = True @classmethod def from_env(cls) -> "ToolGatewayConfig": """Erstellt Config aus Umgebungsvariablen.""" return cls( tavily_api_key=os.getenv("TAVILY_API_KEY"), tavily_base_url=os.getenv("TAVILY_BASE_URL", "https://api.tavily.com"), timeout=int(os.getenv("TAVILY_TIMEOUT", "30")), max_results=int(os.getenv("TAVILY_MAX_RESULTS", "5")), search_depth=SearchDepth(os.getenv("TAVILY_SEARCH_DEPTH", "basic")), include_answer=os.getenv("TAVILY_INCLUDE_ANSWER", "true").lower() == "true", include_images=os.getenv("TAVILY_INCLUDE_IMAGES", "false").lower() == "true", pii_redaction_enabled=os.getenv("PII_REDACTION_ENABLED", "true").lower() == "true", ) class ToolGatewayError(Exception): """Fehler im Tool Gateway.""" pass class TavilyError(ToolGatewayError): """Fehler bei Tavily API.""" pass class ToolGateway: """ Gateway für externe Tools mit PII-Schutz. Alle Anfragen werden vor dem Versand auf personenbezogene Daten geprüft und diese redaktiert. Dies gewährleistet DSGVO-Konformität. """ def __init__( self, config: Optional[ToolGatewayConfig] = None, pii_detector: Optional[PIIDetector] = None, ): """ Initialisiert den Tool Gateway. Args: config: Konfiguration. None = aus Umgebungsvariablen. pii_detector: PII Detector. None = Standard-Detector. """ self.config = config or ToolGatewayConfig.from_env() self.pii_detector = pii_detector or get_pii_detector() self._client: Optional[httpx.AsyncClient] = None @property def tavily_available(self) -> bool: """Prüft ob Tavily konfiguriert ist.""" return bool(self.config.tavily_api_key) async def _get_client(self) -> httpx.AsyncClient: """Lazy-init HTTP Client.""" if self._client is None: self._client = httpx.AsyncClient( timeout=self.config.timeout, headers={"Content-Type": "application/json"}, ) return self._client async def close(self): """Schließt HTTP Client.""" if self._client: await self._client.aclose() self._client = None def _redact_query(self, query: str) -> RedactionResult: """ Redaktiert PII aus einer Suchanfrage. Args: query: Die originale Suchanfrage. Returns: RedactionResult mit redaktiertem Text. """ if not self.config.pii_redaction_enabled: return RedactionResult( original_text=query, redacted_text=query, matches=[], pii_found=False, ) return self.pii_detector.redact(query) async def search( self, query: str, search_depth: Optional[SearchDepth] = None, max_results: Optional[int] = None, include_domains: Optional[list[str]] = None, exclude_domains: Optional[list[str]] = None, ) -> SearchResponse: """ Führt eine Web-Suche mit Tavily durch. PII wird automatisch aus der Anfrage entfernt bevor sie an Tavily gesendet wird. Args: query: Die Suchanfrage. search_depth: Suchtiefe (basic/advanced). max_results: Maximale Anzahl Ergebnisse. include_domains: Nur diese Domains durchsuchen. exclude_domains: Diese Domains ausschließen. Returns: SearchResponse mit Ergebnissen. Raises: TavilyError: Bei API-Fehlern. ToolGatewayError: Bei Konfigurationsfehlern. """ import time start_time = time.time() if not self.tavily_available: raise ToolGatewayError("Tavily API key not configured") # PII redaktieren redaction = self._redact_query(query) if redaction.pii_found: logger.warning( f"PII detected in search query. Types: {[m.type.value for m in redaction.matches]}" ) # Request an Tavily client = await self._get_client() payload: dict[str, Any] = { "api_key": self.config.tavily_api_key, "query": redaction.redacted_text, "search_depth": (search_depth or self.config.search_depth).value, "max_results": max_results or self.config.max_results, "include_answer": self.config.include_answer, "include_images": self.config.include_images, } if include_domains: payload["include_domains"] = include_domains if exclude_domains: payload["exclude_domains"] = exclude_domains try: response = await client.post( f"{self.config.tavily_base_url}/search", json=payload, ) response.raise_for_status() data = response.json() except httpx.HTTPStatusError as e: logger.error(f"Tavily API error: {e.response.status_code} - {e.response.text}") raise TavilyError(f"Tavily API error: {e.response.status_code}") except httpx.RequestError as e: logger.error(f"Tavily request error: {e}") raise TavilyError(f"Failed to connect to Tavily: {e}") # Response parsen results = [ SearchResult( title=r.get("title", ""), url=r.get("url", ""), content=r.get("content", ""), score=r.get("score", 0.0), published_date=r.get("published_date"), ) for r in data.get("results", []) ] elapsed_ms = int((time.time() - start_time) * 1000) return SearchResponse( query=query, redacted_query=redaction.redacted_text if redaction.pii_found else None, results=results, answer=data.get("answer"), pii_detected=redaction.pii_found, pii_types=[m.type.value for m in redaction.matches], response_time_ms=elapsed_ms, ) async def health_check(self) -> dict[str, Any]: """ Prüft Verfügbarkeit der Tools. Returns: Dict mit Status der einzelnen Tools. """ status = { "tavily": { "configured": self.tavily_available, "healthy": False, }, "pii_redaction": { "enabled": self.config.pii_redaction_enabled, }, } # Tavily Health Check (einfache Suche) if self.tavily_available: try: result = await self.search("test", max_results=1) status["tavily"]["healthy"] = True status["tavily"]["response_time_ms"] = result.response_time_ms except Exception as e: status["tavily"]["error"] = str(e) return status # Singleton Instance _tool_gateway: Optional[ToolGateway] = None def get_tool_gateway() -> ToolGateway: """Gibt Singleton-Instanz des Tool Gateways zurück.""" global _tool_gateway if _tool_gateway is None: _tool_gateway = ToolGateway() return _tool_gateway