From f51671737a5108e4402005843dbfbe603951502b Mon Sep 17 00:00:00 2001 From: Benjamin Admin Date: Thu, 7 May 2026 15:53:53 +0200 Subject: [PATCH] fix: Correct Ollama model name + strict blank-line heading detection MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 1. LLM model: qwen3:32b → qwen3.5:35b-a3b (actual model on Mac Mini) 2. Section splitter: headings MUST be preceded by a blank line. This prevents cookie table entries ("Funktionale Cookies", "Session Cookies") from splitting the cookie section. Co-Authored-By: Claude Opus 4.6 (1M context) --- .../compliance/api/agent_doc_check_routes.py | 7 +++---- .../compliance/services/doc_checks/llm_verify.py | 2 +- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/backend-compliance/compliance/api/agent_doc_check_routes.py b/backend-compliance/compliance/api/agent_doc_check_routes.py index 51975e1..82a6e81 100644 --- a/backend-compliance/compliance/api/agent_doc_check_routes.py +++ b/backend-compliance/compliance/api/agent_doc_check_routes.py @@ -346,10 +346,9 @@ def _split_into_sections(text: str, parent_label: str, url: str) -> list[dict]: and not stripped.endswith(".") and not stripped.endswith(",") and stripped[0].isupper() - # Require preceding blank line OR line > 15 chars to avoid - # table column headers ("Funktion", "Speicherdauer") being - # treated as section headings - and (prev_blank or len(stripped) > 15) + # Require preceding blank line to distinguish real headings + # from table content ("Funktionale Cookies", "Session Cookies") + and prev_blank ) is_skip = is_heading and stripped.lower().strip() in SKIP_HEADINGS diff --git a/backend-compliance/compliance/services/doc_checks/llm_verify.py b/backend-compliance/compliance/services/doc_checks/llm_verify.py index 58b4325..5c7a90c 100644 --- a/backend-compliance/compliance/services/doc_checks/llm_verify.py +++ b/backend-compliance/compliance/services/doc_checks/llm_verify.py @@ -15,7 +15,7 @@ import httpx logger = logging.getLogger(__name__) OLLAMA_URL = os.getenv("OLLAMA_URL", "http://host.docker.internal:11434") -OLLAMA_MODEL = os.getenv("OLLAMA_VERIFY_MODEL", "qwen3:32b") +OLLAMA_MODEL = os.getenv("OLLAMA_VERIFY_MODEL", "qwen3.5:35b-a3b") TIMEOUT = 30.0