From c53089896314336b4ee7a5999cb361442d385e51 Mon Sep 17 00:00:00 2001 From: Benjamin Admin Date: Fri, 13 Mar 2026 09:36:14 +0100 Subject: [PATCH] fix: replace Python 3.10+ union type syntax with typing.Optional for Pydantic v2 compat MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit from __future__ import annotations breaks Pydantic BaseModel runtime type evaluation. Replaced str | None → Optional[str], list[str] → List[str] etc. in control_generator.py, anchor_finder.py, control_generator_routes.py. Co-Authored-By: Claude Opus 4.6 --- .../api/control_generator_routes.py | 18 ++++++------ .../compliance/services/anchor_finder.py | 17 ++++++----- .../compliance/services/control_generator.py | 28 +++++++++---------- 3 files changed, 29 insertions(+), 34 deletions(-) diff --git a/backend-compliance/compliance/api/control_generator_routes.py b/backend-compliance/compliance/api/control_generator_routes.py index 2a9c0d1..e76df1f 100644 --- a/backend-compliance/compliance/api/control_generator_routes.py +++ b/backend-compliance/compliance/api/control_generator_routes.py @@ -12,11 +12,9 @@ Endpoints: POST /v1/canonical/blocked-sources/cleanup — Start cleanup workflow """ -from __future__ import annotations - import json import logging -from typing import Optional +from typing import Optional, List from fastapi import APIRouter, HTTPException, Query from pydantic import BaseModel @@ -39,8 +37,8 @@ router = APIRouter(prefix="/v1/canonical", tags=["control-generator"]) # ============================================================================= class GenerateRequest(BaseModel): - domain: str | None = None - collections: list[str] | None = None + domain: Optional[str] = None + collections: Optional[List[str]] = None max_controls: int = 50 batch_size: int = 5 skip_web_search: bool = False @@ -63,8 +61,8 @@ class GenerateResponse(BaseModel): class ReviewRequest(BaseModel): action: str # "approve", "reject", "needs_rework" - release_state: str | None = None # Override release_state - notes: str | None = None + release_state: Optional[str] = None # Override release_state + notes: Optional[str] = None class ProcessedStats(BaseModel): @@ -83,7 +81,7 @@ class BlockedSourceResponse(BaseModel): document_title: str reason: str deletion_status: str - qdrant_collection: str | None = None + qdrant_collection: Optional[str] = None marked_at: str @@ -367,8 +365,8 @@ async def start_cleanup(): @router.get("/controls-customer") async def get_controls_customer_view( - severity: str | None = Query(None), - domain: str | None = Query(None), + severity: Optional[str] = Query(None), + domain: Optional[str] = Query(None), ): """Get controls filtered for customer visibility. diff --git a/backend-compliance/compliance/services/anchor_finder.py b/backend-compliance/compliance/services/anchor_finder.py index f895e4b..b88d6ca 100644 --- a/backend-compliance/compliance/services/anchor_finder.py +++ b/backend-compliance/compliance/services/anchor_finder.py @@ -8,10 +8,9 @@ Two-stage search: Only open-source references (Rule 1+2) are accepted as anchors. """ -from __future__ import annotations - import logging from dataclasses import dataclass +from typing import List, Optional import httpx @@ -40,7 +39,7 @@ class OpenAnchor: class AnchorFinder: """Finds open-source references to anchor generated controls.""" - def __init__(self, rag_client: ComplianceRAGClient | None = None): + def __init__(self, rag_client: Optional[ComplianceRAGClient] = None): self.rag = rag_client or get_rag_client() async def find_anchors( @@ -48,7 +47,7 @@ class AnchorFinder: control: GeneratedControl, skip_web: bool = False, min_anchors: int = 2, - ) -> list[OpenAnchor]: + ) -> List[OpenAnchor]: """Find open-source anchors for a control.""" # Stage A: RAG-internal search anchors = await self._search_rag_for_open_anchors(control) @@ -64,7 +63,7 @@ class AnchorFinder: return anchors - async def _search_rag_for_open_anchors(self, control: GeneratedControl) -> list[OpenAnchor]: + async def _search_rag_for_open_anchors(self, control: GeneratedControl) -> List[OpenAnchor]: """Search RAG for chunks from open sources matching the control topic.""" # Build search query from control title + first 3 tags tags_str = " ".join(control.tags[:3]) if control.tags else "" @@ -76,7 +75,7 @@ class AnchorFinder: top_k=15, ) - anchors: list[OpenAnchor] = [] + anchors: List[OpenAnchor] = [] seen: set[str] = set() for r in results: @@ -109,7 +108,7 @@ class AnchorFinder: return anchors - async def _search_web(self, control: GeneratedControl) -> list[OpenAnchor]: + async def _search_web(self, control: GeneratedControl) -> List[OpenAnchor]: """Search DuckDuckGo Instant Answer API for open references.""" keywords = f"{control.title} security control OWASP NIST" try: @@ -127,7 +126,7 @@ class AnchorFinder: return [] data = resp.json() - anchors: list[OpenAnchor] = [] + anchors: List[OpenAnchor] = [] # Parse RelatedTopics for topic in data.get("RelatedTopics", [])[:10]: @@ -156,7 +155,7 @@ class AnchorFinder: return [] @staticmethod - def _identify_framework_from_url(url: str) -> str | None: + def _identify_framework_from_url(url: str) -> Optional[str]: """Identify if a URL belongs to a known open-source framework.""" url_lower = url.lower() if "owasp.org" in url_lower: diff --git a/backend-compliance/compliance/services/control_generator.py b/backend-compliance/compliance/services/control_generator.py index 725bb59..be9b34e 100644 --- a/backend-compliance/compliance/services/control_generator.py +++ b/backend-compliance/compliance/services/control_generator.py @@ -17,8 +17,6 @@ Three License Rules: Rule 3 (restricted): BSI, ISO — full reformulation, no source names """ -from __future__ import annotations - import hashlib import json import logging @@ -27,7 +25,7 @@ import re import uuid from dataclasses import dataclass, field, asdict from datetime import datetime, timezone -from typing import Optional +from typing import Dict, List, Optional, Set import httpx from pydantic import BaseModel @@ -168,8 +166,8 @@ def _detect_domain(text: str) -> str: # --------------------------------------------------------------------------- class GeneratorConfig(BaseModel): - collections: list[str] | None = None - domain: str | None = None + collections: Optional[List[str]] = None + domain: Optional[str] = None batch_size: int = 5 max_controls: int = 50 skip_processed: bool = True @@ -194,9 +192,9 @@ class GeneratedControl: release_state: str = "draft" tags: list = field(default_factory=list) # 3-rule fields - license_rule: int | None = None - source_original_text: str | None = None - source_citation: dict | None = None + license_rule: Optional[int] = None + source_original_text: Optional[str] = None + source_citation: Optional[dict] = None customer_visible: bool = True generation_metadata: dict = field(default_factory=dict) @@ -219,7 +217,7 @@ class GeneratorResult: # LLM Client (via Go SDK) # --------------------------------------------------------------------------- -async def _llm_chat(prompt: str, system_prompt: str | None = None) -> str: +async def _llm_chat(prompt: str, system_prompt: Optional[str] = None) -> str: """Call the Go SDK LLM chat endpoint.""" messages = [] if system_prompt: @@ -322,11 +320,11 @@ Antworte NUR mit validem JSON.""" class ControlGeneratorPipeline: """Orchestrates the 7-stage control generation pipeline.""" - def __init__(self, db: Session, rag_client: ComplianceRAGClient | None = None): + def __init__(self, db: Session, rag_client: Optional[ComplianceRAGClient] = None): self.db = db self.rag = rag_client or get_rag_client() - self._existing_controls: list[dict] | None = None - self._existing_embeddings: dict[str, list[float]] = {} + self._existing_controls: Optional[List[dict]] = None + self._existing_embeddings: Dict[str, List[float]] = {} # ── Stage 1: RAG Scan ────────────────────────────────────────────── @@ -537,7 +535,7 @@ Gib JSON zurück mit diesen Feldern: # ── Stage 4: Harmonization ───────────────────────────────────────── - async def _check_harmonization(self, new_control: GeneratedControl) -> list | None: + async def _check_harmonization(self, new_control: GeneratedControl) -> Optional[list]: """Check if a new control duplicates existing ones via embedding similarity.""" existing = self._load_existing_controls() if not existing: @@ -698,7 +696,7 @@ Gib JSON zurück mit diesen Feldern: except Exception as e: logger.error("Failed to update job: %s", e) - def _store_control(self, control: GeneratedControl, job_id: str) -> str | None: + def _store_control(self, control: GeneratedControl, job_id: str) -> Optional[str]: """Persist a generated control to DB. Returns the control UUID or None.""" try: # Get framework UUID @@ -889,7 +887,7 @@ Gib JSON zurück mit diesen Feldern: chunk: RAGSearchResult, config: GeneratorConfig, job_id: str, - ) -> GeneratedControl | None: + ) -> Optional[GeneratedControl]: """Process a single chunk through stages 2-5.""" # Stage 2: License classification license_info = self._classify_license(chunk)