From 58957a4aaae20e77cab637ef93a80cd106961fac Mon Sep 17 00:00:00 2001 From: Benjamin Admin Date: Wed, 29 Apr 2026 19:36:46 +0200 Subject: [PATCH] fix: Playwright user permission + etracker DSE matching + CMP skip MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 1. Dockerfile: install Playwright AS appuser (not root) so chromium binary is accessible at runtime. Was causing 500 error. 2. DSE service matching: text-search fallback when LLM extraction fails. If "etracker" appears in DSE text, mark as documented even without LLM parsing the service list. 3. CMP skip: consent managers in category "cmp" skipped (not just "other" with id "cmp"). NOT DEPLOYED — RAG pipeline is running. Co-Authored-By: Claude Opus 4.6 (1M context) --- .../compliance/api/agent_scan_routes.py | 16 ++++++++++++++-- .../compliance/services/dse_service_extractor.py | 3 ++- consent-tester/Dockerfile | 10 +++++++++- 3 files changed, 25 insertions(+), 4 deletions(-) diff --git a/backend-compliance/compliance/api/agent_scan_routes.py b/backend-compliance/compliance/api/agent_scan_routes.py index d75adb3..051354a 100644 --- a/backend-compliance/compliance/api/agent_scan_routes.py +++ b/backend-compliance/compliance/api/agent_scan_routes.py @@ -153,9 +153,21 @@ async def scan_website_endpoint(req: ScanRequest): if not dse_text: dse_text = await _fetch_dse_text(req.url, scan.pages_scanned) - # Step 3: Extract services mentioned in DSE via LLM + # Step 3: Extract services mentioned in DSE via LLM + text fallback dse_services = await extract_dse_services(dse_text) if dse_text else [] - logger.info("DSE mentions %d services", len(dse_services)) + logger.info("DSE mentions %d services (LLM)", len(dse_services)) + + # Fallback: if LLM extraction failed, search DSE text directly for service names + if not dse_services and dse_text: + dse_lower = dse_text.lower() + detected_dicts_for_check = [_service_to_dict(s) for s in scan.detected_services] + for svc in detected_dicts_for_check: + name = svc.get("name", "").lower() + # Check if service name appears in DSE text + if name and len(name) > 3 and name in dse_lower: + dse_services.append({"name": svc["name"], "purpose": "", "country": svc.get("country", ""), "legal_basis": ""}) + if dse_services: + logger.info("DSE text fallback found %d services", len(dse_services)) # Step 4: Parse DSE into structured sections (prefer Playwright HTML) dse_html = "" diff --git a/backend-compliance/compliance/services/dse_service_extractor.py b/backend-compliance/compliance/services/dse_service_extractor.py index e50e6d7..1363df9 100644 --- a/backend-compliance/compliance/services/dse_service_extractor.py +++ b/backend-compliance/compliance/services/dse_service_extractor.py @@ -87,9 +87,10 @@ def compare_services( for key, svc in detected_names.items(): # Skip CMP — consent managers don't need DSE mention - if svc.get("category") == "other" and svc.get("id") == "cmp": + if svc.get("category") == "cmp" or (svc.get("category") == "other" and svc.get("id") == "cmp"): continue matched = False + # Method 1: Match against LLM-extracted service list for dse_key, dse_svc in dse_names.items(): if key == dse_key or _fuzzy_match(svc["name"], dse_svc["name"]): documented.append({"detected": svc, "dse": dse_svc, "status": "ok"}) diff --git a/consent-tester/Dockerfile b/consent-tester/Dockerfile index 58c2333..02f21e8 100644 --- a/consent-tester/Dockerfile +++ b/consent-tester/Dockerfile @@ -7,15 +7,23 @@ RUN apt-get update && apt-get install -y --no-install-recommends \ libnss3 libnspr4 libatk1.0-0 libatk-bridge2.0-0 libcups2 \ libdrm2 libxkbcommon0 libxcomposite1 libxdamage1 libxfixes3 \ libxrandr2 libgbm1 libpango-1.0-0 libcairo2 libasound2 \ + curl \ && rm -rf /var/lib/apt/lists/* +# Create user BEFORE installing Playwright (so browsers are in user's cache) +RUN useradd --create-home appuser + COPY requirements.txt . RUN pip install --no-cache-dir -r requirements.txt + +# Install Playwright browsers AS appuser (so they land in /home/appuser/.cache/) +USER appuser RUN playwright install chromium +USER root COPY . . +RUN chown -R appuser:appuser /app -RUN useradd --create-home appuser USER appuser EXPOSE 8094