This repository has been archived on 2026-02-15. You can view files and clone it. You cannot open issues or pull requests or push a commit.
Files
BreakPilot Dev 19855efacc
Some checks failed
Tests / Go Tests (push) Has been cancelled
Tests / Python Tests (push) Has been cancelled
Tests / Integration Tests (push) Has been cancelled
Tests / Go Lint (push) Has been cancelled
Tests / Python Lint (push) Has been cancelled
Tests / Security Scan (push) Has been cancelled
Tests / All Checks Passed (push) Has been cancelled
Security Scanning / Secret Scanning (push) Has been cancelled
Security Scanning / Dependency Vulnerability Scan (push) Has been cancelled
Security Scanning / Go Security Scan (push) Has been cancelled
Security Scanning / Python Security Scan (push) Has been cancelled
Security Scanning / Node.js Security Scan (push) Has been cancelled
Security Scanning / Docker Image Security (push) Has been cancelled
Security Scanning / Security Summary (push) Has been cancelled
CI/CD Pipeline / Go Tests (push) Has been cancelled
CI/CD Pipeline / Python Tests (push) Has been cancelled
CI/CD Pipeline / Website Tests (push) Has been cancelled
CI/CD Pipeline / Linting (push) Has been cancelled
CI/CD Pipeline / Security Scan (push) Has been cancelled
CI/CD Pipeline / Docker Build & Push (push) Has been cancelled
CI/CD Pipeline / Integration Tests (push) Has been cancelled
CI/CD Pipeline / Deploy to Staging (push) Has been cancelled
CI/CD Pipeline / Deploy to Production (push) Has been cancelled
CI/CD Pipeline / CI Summary (push) Has been cancelled
ci/woodpecker/manual/build-ci-image Pipeline was successful
ci/woodpecker/manual/main Pipeline failed
feat: BreakPilot PWA - Full codebase (clean push without large binaries)
All services: admin-v2, studio-v2, website, ai-compliance-sdk,
consent-service, klausur-service, voice-service, and infrastructure.
Large PDFs and compiled binaries excluded via .gitignore.
2026-02-11 13:25:58 +01:00

286 lines
8.6 KiB
Python

"""
Tool Gateway Service.
Bietet sichere Schnittstelle zu externen Tools wie Tavily Web Search.
Alle Anfragen werden vor dem Versand auf PII geprüft und redaktiert.
"""
import os
import httpx
import logging
from dataclasses import dataclass, field
from typing import Optional, Any
from enum import Enum
from .pii_detector import PIIDetector, get_pii_detector, RedactionResult
logger = logging.getLogger(__name__)
class SearchDepth(str, Enum):
"""Suchtiefe für Tavily."""
BASIC = "basic"
ADVANCED = "advanced"
@dataclass
class SearchResult:
"""Ein einzelnes Suchergebnis."""
title: str
url: str
content: str
score: float = 0.0
published_date: Optional[str] = None
@dataclass
class SearchResponse:
"""Antwort einer Suche."""
query: str
redacted_query: Optional[str] = None
results: list[SearchResult] = field(default_factory=list)
answer: Optional[str] = None
pii_detected: bool = False
pii_types: list[str] = field(default_factory=list)
response_time_ms: int = 0
@dataclass
class ToolGatewayConfig:
"""Konfiguration für den Tool Gateway."""
tavily_api_key: Optional[str] = None
tavily_base_url: str = "https://api.tavily.com"
timeout: int = 30
max_results: int = 5
search_depth: SearchDepth = SearchDepth.BASIC
include_answer: bool = True
include_images: bool = False
pii_redaction_enabled: bool = True
@classmethod
def from_env(cls) -> "ToolGatewayConfig":
"""Erstellt Config aus Umgebungsvariablen."""
return cls(
tavily_api_key=os.getenv("TAVILY_API_KEY"),
tavily_base_url=os.getenv("TAVILY_BASE_URL", "https://api.tavily.com"),
timeout=int(os.getenv("TAVILY_TIMEOUT", "30")),
max_results=int(os.getenv("TAVILY_MAX_RESULTS", "5")),
search_depth=SearchDepth(os.getenv("TAVILY_SEARCH_DEPTH", "basic")),
include_answer=os.getenv("TAVILY_INCLUDE_ANSWER", "true").lower() == "true",
include_images=os.getenv("TAVILY_INCLUDE_IMAGES", "false").lower() == "true",
pii_redaction_enabled=os.getenv("PII_REDACTION_ENABLED", "true").lower() == "true",
)
class ToolGatewayError(Exception):
"""Fehler im Tool Gateway."""
pass
class TavilyError(ToolGatewayError):
"""Fehler bei Tavily API."""
pass
class ToolGateway:
"""
Gateway für externe Tools mit PII-Schutz.
Alle Anfragen werden vor dem Versand auf personenbezogene Daten
geprüft und diese redaktiert. Dies gewährleistet DSGVO-Konformität.
"""
def __init__(
self,
config: Optional[ToolGatewayConfig] = None,
pii_detector: Optional[PIIDetector] = None,
):
"""
Initialisiert den Tool Gateway.
Args:
config: Konfiguration. None = aus Umgebungsvariablen.
pii_detector: PII Detector. None = Standard-Detector.
"""
self.config = config or ToolGatewayConfig.from_env()
self.pii_detector = pii_detector or get_pii_detector()
self._client: Optional[httpx.AsyncClient] = None
@property
def tavily_available(self) -> bool:
"""Prüft ob Tavily konfiguriert ist."""
return bool(self.config.tavily_api_key)
async def _get_client(self) -> httpx.AsyncClient:
"""Lazy-init HTTP Client."""
if self._client is None:
self._client = httpx.AsyncClient(
timeout=self.config.timeout,
headers={"Content-Type": "application/json"},
)
return self._client
async def close(self):
"""Schließt HTTP Client."""
if self._client:
await self._client.aclose()
self._client = None
def _redact_query(self, query: str) -> RedactionResult:
"""
Redaktiert PII aus einer Suchanfrage.
Args:
query: Die originale Suchanfrage.
Returns:
RedactionResult mit redaktiertem Text.
"""
if not self.config.pii_redaction_enabled:
return RedactionResult(
original_text=query,
redacted_text=query,
matches=[],
pii_found=False,
)
return self.pii_detector.redact(query)
async def search(
self,
query: str,
search_depth: Optional[SearchDepth] = None,
max_results: Optional[int] = None,
include_domains: Optional[list[str]] = None,
exclude_domains: Optional[list[str]] = None,
) -> SearchResponse:
"""
Führt eine Web-Suche mit Tavily durch.
PII wird automatisch aus der Anfrage entfernt bevor sie
an Tavily gesendet wird.
Args:
query: Die Suchanfrage.
search_depth: Suchtiefe (basic/advanced).
max_results: Maximale Anzahl Ergebnisse.
include_domains: Nur diese Domains durchsuchen.
exclude_domains: Diese Domains ausschließen.
Returns:
SearchResponse mit Ergebnissen.
Raises:
TavilyError: Bei API-Fehlern.
ToolGatewayError: Bei Konfigurationsfehlern.
"""
import time
start_time = time.time()
if not self.tavily_available:
raise ToolGatewayError("Tavily API key not configured")
# PII redaktieren
redaction = self._redact_query(query)
if redaction.pii_found:
logger.warning(
f"PII detected in search query. Types: {[m.type.value for m in redaction.matches]}"
)
# Request an Tavily
client = await self._get_client()
payload: dict[str, Any] = {
"api_key": self.config.tavily_api_key,
"query": redaction.redacted_text,
"search_depth": (search_depth or self.config.search_depth).value,
"max_results": max_results or self.config.max_results,
"include_answer": self.config.include_answer,
"include_images": self.config.include_images,
}
if include_domains:
payload["include_domains"] = include_domains
if exclude_domains:
payload["exclude_domains"] = exclude_domains
try:
response = await client.post(
f"{self.config.tavily_base_url}/search",
json=payload,
)
response.raise_for_status()
data = response.json()
except httpx.HTTPStatusError as e:
logger.error(f"Tavily API error: {e.response.status_code} - {e.response.text}")
raise TavilyError(f"Tavily API error: {e.response.status_code}")
except httpx.RequestError as e:
logger.error(f"Tavily request error: {e}")
raise TavilyError(f"Failed to connect to Tavily: {e}")
# Response parsen
results = [
SearchResult(
title=r.get("title", ""),
url=r.get("url", ""),
content=r.get("content", ""),
score=r.get("score", 0.0),
published_date=r.get("published_date"),
)
for r in data.get("results", [])
]
elapsed_ms = int((time.time() - start_time) * 1000)
return SearchResponse(
query=query,
redacted_query=redaction.redacted_text if redaction.pii_found else None,
results=results,
answer=data.get("answer"),
pii_detected=redaction.pii_found,
pii_types=[m.type.value for m in redaction.matches],
response_time_ms=elapsed_ms,
)
async def health_check(self) -> dict[str, Any]:
"""
Prüft Verfügbarkeit der Tools.
Returns:
Dict mit Status der einzelnen Tools.
"""
status = {
"tavily": {
"configured": self.tavily_available,
"healthy": False,
},
"pii_redaction": {
"enabled": self.config.pii_redaction_enabled,
},
}
# Tavily Health Check (einfache Suche)
if self.tavily_available:
try:
result = await self.search("test", max_results=1)
status["tavily"]["healthy"] = True
status["tavily"]["response_time_ms"] = result.response_time_ms
except Exception as e:
status["tavily"]["error"] = str(e)
return status
# Singleton Instance
_tool_gateway: Optional[ToolGateway] = None
def get_tool_gateway() -> ToolGateway:
"""Gibt Singleton-Instanz des Tool Gateways zurück."""
global _tool_gateway
if _tool_gateway is None:
_tool_gateway = ToolGateway()
return _tool_gateway