873997c13b
When the cookie text has no captured CMP payload (long-tail sites that
don't use ePaaS/OneTrust/Cookiebot/etc.) we now fall back to a Qwen → OVH
LLM cascade to extract a structured vendor list from the policy text.
New module backend/compliance/services/vendor_llm_extractor.py:
- extract_vendors_via_llm(cookie_text): runs Qwen first (local Ollama),
then OVH if Qwen returns nothing usable.
- System prompt instructs the model to return STRICT JSON only:
{vendors: [{name, country, purpose, category, opt_out_url,
privacy_policy_url, persistence, cookies: [...]}]}
- Lenient JSON parser tolerates code-fences, prose wrappers, dict vs list.
- _normalize() caps array sizes (80 vendors, 30 cookies each), validates
URLs (must be http(s)), trims fields to reasonable lengths.
Route integration (agent_compliance_check_routes.py):
- After named-CMP extract: if cmp_vendors is empty AND the cookie text
has ≥500 words (otherwise it's likely navigation chrome), invoke the
LLM extractor. Progress message 'Vendor-Liste per LLM extrahieren...'.
- Vendors then run through the same validate_vendor_urls + score_vendors
pipeline → VVT table rendered identically regardless of source.
docker-compose.yml: backend-compliance gains OLLAMA_URL, CMP_LLM_MODEL,
OVH_LLM_URL/KEY/MODEL env vars (same names as consent-tester so the
configuration is unified).
This closes the 'every site eventually gets a VVT table' goal:
- Known CMP → V1/V2 structured extraction (fast, exact)
- Unknown CMP → V3 LLM extraction (slow, best-effort)
- No text at all → no vendors, but other compliance checks still run.
340 lines
11 KiB
YAML
340 lines
11 KiB
YAML
# =========================================================
|
|
# BreakPilot Compliance — Compliance SDK Platform
|
|
# =========================================================
|
|
# Voraussetzung: breakpilot-core muss laufen!
|
|
# Start: docker compose up -d
|
|
# =========================================================
|
|
|
|
networks:
|
|
breakpilot-network:
|
|
external: true
|
|
name: breakpilot-network
|
|
|
|
volumes:
|
|
dsms_data:
|
|
cmp-data: # consent-tester: CMP discovery log + auto-promoted modules
|
|
|
|
services:
|
|
|
|
# =========================================================
|
|
# CORE HEALTH CHECK — wartet auf Core-Infrastruktur
|
|
# =========================================================
|
|
core-health-check:
|
|
image: curlimages/curl:latest
|
|
container_name: bp-compliance-core-wait
|
|
command: >
|
|
sh -c "
|
|
echo 'Waiting for Core infrastructure...'
|
|
until curl -sf http://bp-core-health:8099/health; do
|
|
echo 'Core not ready, waiting 5s...'
|
|
sleep 5
|
|
done
|
|
echo 'Core is healthy!'
|
|
"
|
|
restart: "no"
|
|
networks:
|
|
- breakpilot-network
|
|
|
|
# =========================================================
|
|
# FRONTEND
|
|
# =========================================================
|
|
admin-compliance:
|
|
build:
|
|
context: ./admin-compliance
|
|
dockerfile: Dockerfile
|
|
args:
|
|
NEXT_PUBLIC_API_URL: ${NEXT_PUBLIC_API_URL:-https://macmini:8002}
|
|
NEXT_PUBLIC_SDK_URL: ${NEXT_PUBLIC_SDK_URL:-https://macmini:8093}
|
|
container_name: bp-compliance-admin
|
|
platform: linux/arm64
|
|
expose:
|
|
- "3000"
|
|
environment:
|
|
NODE_ENV: production
|
|
DATABASE_URL: ${COMPLIANCE_DATABASE_URL:-postgresql://breakpilot:breakpilot123@bp-core-postgres:5432/breakpilot_db}
|
|
BACKEND_URL: http://backend-compliance:8002
|
|
CONSENT_SERVICE_URL: http://bp-core-consent-service:8081
|
|
SDK_URL: http://ai-compliance-sdk:8090
|
|
OLLAMA_URL: ${OLLAMA_URL:-http://host.docker.internal:11434}
|
|
COMPLIANCE_LLM_MODEL: ${COMPLIANCE_LLM_MODEL:-qwen3.5:35b-a3b}
|
|
extra_hosts:
|
|
- "host.docker.internal:host-gateway"
|
|
depends_on:
|
|
core-health-check:
|
|
condition: service_completed_successfully
|
|
backend-compliance:
|
|
condition: service_started
|
|
restart: unless-stopped
|
|
networks:
|
|
- breakpilot-network
|
|
|
|
developer-portal:
|
|
build:
|
|
context: ./developer-portal
|
|
dockerfile: Dockerfile
|
|
container_name: bp-compliance-developer-portal
|
|
platform: linux/arm64
|
|
expose:
|
|
- "3000"
|
|
environment:
|
|
NODE_ENV: production
|
|
restart: unless-stopped
|
|
networks:
|
|
- breakpilot-network
|
|
|
|
# =========================================================
|
|
# BACKEND
|
|
# =========================================================
|
|
backend-compliance:
|
|
build:
|
|
context: ./backend-compliance
|
|
dockerfile: Dockerfile
|
|
container_name: bp-compliance-backend
|
|
platform: linux/arm64
|
|
expose:
|
|
- "8002"
|
|
environment:
|
|
PORT: 8002
|
|
DATABASE_URL: ${COMPLIANCE_DATABASE_URL:-postgresql://breakpilot:breakpilot123@bp-core-postgres:5432/breakpilot_db}
|
|
JWT_SECRET: ${JWT_SECRET:-your-super-secret-jwt-key-change-in-production}
|
|
ENVIRONMENT: ${ENVIRONMENT:-development}
|
|
CONSENT_SERVICE_URL: http://bp-core-consent-service:8081
|
|
VALKEY_URL: redis://bp-core-valkey:6379/0
|
|
SESSION_TTL_HOURS: ${SESSION_TTL_HOURS:-24}
|
|
COMPLIANCE_LLM_PROVIDER: ${COMPLIANCE_LLM_PROVIDER:-ollama}
|
|
SELF_HOSTED_LLM_URL: ${SELF_HOSTED_LLM_URL:-http://host.docker.internal:11434}
|
|
SELF_HOSTED_LLM_MODEL: ${SELF_HOSTED_LLM_MODEL:-qwen3.5:35b-a3b}
|
|
COMPLIANCE_LLM_MAX_TOKENS: ${COMPLIANCE_LLM_MAX_TOKENS:-4096}
|
|
COMPLIANCE_LLM_TEMPERATURE: ${COMPLIANCE_LLM_TEMPERATURE:-0.3}
|
|
COMPLIANCE_LLM_TIMEOUT: ${COMPLIANCE_LLM_TIMEOUT:-120}
|
|
ANTHROPIC_API_KEY: ${ANTHROPIC_API_KEY:-}
|
|
DECOMPOSITION_LLM_MODEL: ${DECOMPOSITION_LLM_MODEL:-claude-haiku-4-5-20251001}
|
|
SMTP_HOST: ${SMTP_HOST:-bp-core-mailpit}
|
|
SMTP_PORT: ${SMTP_PORT:-1025}
|
|
SMTP_USERNAME: ${SMTP_USERNAME:-}
|
|
SMTP_PASSWORD: ${SMTP_PASSWORD:-}
|
|
SMTP_FROM_NAME: ${SMTP_FROM_NAME:-BreakPilot Compliance}
|
|
SMTP_FROM_ADDR: ${SMTP_FROM_ADDR:-compliance@breakpilot.app}
|
|
RAG_SERVICE_URL: http://bp-core-rag-service:8097
|
|
# LLM cascade for V3 vendor extraction (unknown CMPs).
|
|
# Reuses the same env vars as the consent-tester so both can be
|
|
# configured in one place.
|
|
OLLAMA_URL: ${OLLAMA_URL:-http://host.docker.internal:11434}
|
|
CMP_LLM_MODEL: ${CMP_LLM_MODEL:-qwen3:30b-a3b}
|
|
OVH_LLM_URL: ${OVH_LLM_URL:-}
|
|
OVH_LLM_KEY: ${OVH_LLM_KEY:-}
|
|
OVH_LLM_MODEL: ${OVH_LLM_MODEL:-}
|
|
extra_hosts:
|
|
- "host.docker.internal:host-gateway"
|
|
depends_on:
|
|
core-health-check:
|
|
condition: service_completed_successfully
|
|
restart: unless-stopped
|
|
networks:
|
|
- breakpilot-network
|
|
|
|
# =========================================================
|
|
# SDK SERVICES
|
|
# =========================================================
|
|
ai-compliance-sdk:
|
|
build:
|
|
context: ./ai-compliance-sdk
|
|
dockerfile: Dockerfile
|
|
container_name: bp-compliance-ai-sdk
|
|
platform: linux/arm64
|
|
environment:
|
|
PORT: 8090
|
|
ENVIRONMENT: ${ENVIRONMENT:-development}
|
|
DATABASE_URL: ${COMPLIANCE_DATABASE_URL:-postgresql://breakpilot:breakpilot123@bp-core-postgres:5432/breakpilot_db}
|
|
JWT_SECRET: ${JWT_SECRET:-your-super-secret-jwt-key-change-in-production}
|
|
LLM_PROVIDER: ${COMPLIANCE_LLM_PROVIDER:-ollama}
|
|
LLM_FALLBACK_PROVIDER: ${LLM_FALLBACK_PROVIDER:-}
|
|
OLLAMA_URL: ${OLLAMA_URL:-http://host.docker.internal:11434}
|
|
OLLAMA_DEFAULT_MODEL: ${OLLAMA_DEFAULT_MODEL:-qwen3.5:35b-a3b}
|
|
ANTHROPIC_API_KEY: ${ANTHROPIC_API_KEY:-}
|
|
ANTHROPIC_DEFAULT_MODEL: ${ANTHROPIC_DEFAULT_MODEL:-claude-sonnet-4-5-20250929}
|
|
PII_REDACTION_ENABLED: ${PII_REDACTION_ENABLED:-true}
|
|
PII_REDACTION_LEVEL: ${PII_REDACTION_LEVEL:-standard}
|
|
AUDIT_RETENTION_DAYS: ${AUDIT_RETENTION_DAYS:-365}
|
|
AUDIT_LOG_PROMPTS: ${AUDIT_LOG_PROMPTS:-true}
|
|
ALLOWED_ORIGINS: "*"
|
|
TTS_SERVICE_URL: http://compliance-tts-service:8095
|
|
QDRANT_URL: ${QDRANT_URL:-https://qdrant-dev.breakpilot.ai}
|
|
QDRANT_API_KEY: ${QDRANT_API_KEY:-z9cKbT74vl1aKPD1QGIlKWfET47VH93u}
|
|
extra_hosts:
|
|
- "host.docker.internal:host-gateway"
|
|
depends_on:
|
|
core-health-check:
|
|
condition: service_completed_successfully
|
|
healthcheck:
|
|
test: ["CMD", "wget", "-q", "--spider", "http://127.0.0.1:8090/health"]
|
|
interval: 30s
|
|
timeout: 3s
|
|
start_period: 10s
|
|
retries: 3
|
|
restart: unless-stopped
|
|
networks:
|
|
- breakpilot-network
|
|
|
|
# =========================================================
|
|
# =========================================================
|
|
# TTS SERVICE (Piper TTS + FFmpeg)
|
|
# =========================================================
|
|
compliance-tts-service:
|
|
build:
|
|
context: ./compliance-tts-service
|
|
dockerfile: Dockerfile
|
|
container_name: bp-compliance-tts
|
|
platform: linux/arm64
|
|
expose:
|
|
- "8095"
|
|
environment:
|
|
MINIO_ENDPOINT: nbg1.your-objectstorage.com
|
|
MINIO_ACCESS_KEY: T18RGFVXXG2ZHQ5404TP
|
|
MINIO_SECRET_KEY: KOUU4WO6wh07cQjNgh0IZHkeKQrVfBz6hnIGpNss
|
|
MINIO_SECURE: "true"
|
|
PIPER_MODEL_PATH: /app/models/de_DE-thorsten-high.onnx
|
|
depends_on:
|
|
core-health-check:
|
|
condition: service_completed_successfully
|
|
healthcheck:
|
|
test: ["CMD", "python", "-c", "import urllib.request; urllib.request.urlopen('http://127.0.0.1:8095/health')"]
|
|
interval: 30s
|
|
timeout: 10s
|
|
start_period: 60s
|
|
retries: 3
|
|
restart: unless-stopped
|
|
networks:
|
|
- breakpilot-network
|
|
|
|
# DATA SOVEREIGNTY
|
|
# =========================================================
|
|
dsms-node:
|
|
build:
|
|
context: ./dsms-node
|
|
dockerfile: Dockerfile
|
|
container_name: bp-compliance-dsms-node
|
|
ports:
|
|
- "4001:4001"
|
|
- "5001:5001"
|
|
- "8085:8080"
|
|
volumes:
|
|
- dsms_data:/data/ipfs
|
|
environment:
|
|
IPFS_PROFILE: server
|
|
healthcheck:
|
|
test: ["CMD-SHELL", "ipfs id"]
|
|
interval: 30s
|
|
timeout: 10s
|
|
start_period: 30s
|
|
retries: 3
|
|
restart: unless-stopped
|
|
networks:
|
|
- breakpilot-network
|
|
|
|
dsms-gateway:
|
|
build:
|
|
context: ./dsms-gateway
|
|
dockerfile: Dockerfile
|
|
container_name: bp-compliance-dsms-gateway
|
|
ports:
|
|
- "8082:8082"
|
|
environment:
|
|
IPFS_API_URL: http://dsms-node:5001
|
|
IPFS_GATEWAY_URL: http://dsms-node:8080
|
|
JWT_SECRET: ${JWT_SECRET:-your-super-secret-jwt-key-change-in-production}
|
|
depends_on:
|
|
dsms-node:
|
|
condition: service_healthy
|
|
restart: unless-stopped
|
|
networks:
|
|
- breakpilot-network
|
|
|
|
# =========================================================
|
|
# DOCUMENT CRAWLER & AUTO-ONBOARDING
|
|
# =========================================================
|
|
consent-tester:
|
|
build:
|
|
context: ./consent-tester
|
|
dockerfile: Dockerfile
|
|
container_name: bp-compliance-consent-tester
|
|
platform: linux/arm64
|
|
ports:
|
|
- "8094:8094"
|
|
mem_limit: 2g
|
|
depends_on:
|
|
core-health-check:
|
|
condition: service_completed_successfully
|
|
environment:
|
|
# LLM fallback for cookie-policy extraction (Phase C+D of cascade)
|
|
OLLAMA_URL: "${OLLAMA_URL:-http://bp-core-ollama:11434}"
|
|
CMP_LLM_MODEL: "${CMP_LLM_MODEL:-qwen3:30b-a3b}"
|
|
OVH_LLM_URL: "${OVH_LLM_URL:-}"
|
|
OVH_LLM_KEY: "${OVH_LLM_KEY:-}"
|
|
OVH_LLM_MODEL: "${OVH_LLM_MODEL:-}"
|
|
VALKEY_URL: "${VALKEY_URL:-redis://bp-core-valkey:6379}"
|
|
# CMP discovery log (Phase E auto-promote)
|
|
CMP_DISCOVERY_DB: "/data/cmp_discoveries.db"
|
|
volumes:
|
|
- cmp-data:/data
|
|
healthcheck:
|
|
test: ["CMD", "curl", "-f", "http://127.0.0.1:8094/health"]
|
|
interval: 30s
|
|
timeout: 10s
|
|
start_period: 30s
|
|
retries: 3
|
|
restart: unless-stopped
|
|
networks:
|
|
- breakpilot-network
|
|
|
|
document-crawler:
|
|
build:
|
|
context: ./document-crawler
|
|
dockerfile: Dockerfile
|
|
container_name: bp-compliance-document-crawler
|
|
platform: linux/arm64
|
|
ports:
|
|
- "8098:8098"
|
|
environment:
|
|
PORT: 8098
|
|
DATABASE_URL: ${COMPLIANCE_DATABASE_URL:-postgresql://breakpilot:breakpilot123@bp-core-postgres:5432/breakpilot_db}
|
|
LLM_GATEWAY_URL: http://ai-compliance-sdk:8090
|
|
DSMS_GATEWAY_URL: http://dsms-gateway:8082
|
|
CRAWL_BASE_PATH: /data/crawl
|
|
MAX_FILE_SIZE_MB: 50
|
|
volumes:
|
|
- /tmp/breakpilot-crawl-data:/data/crawl:ro
|
|
depends_on:
|
|
core-health-check:
|
|
condition: service_completed_successfully
|
|
healthcheck:
|
|
test: ["CMD", "curl", "-f", "http://127.0.0.1:8098/health"]
|
|
interval: 30s
|
|
timeout: 10s
|
|
start_period: 15s
|
|
retries: 3
|
|
restart: unless-stopped
|
|
networks:
|
|
- breakpilot-network
|
|
|
|
# =========================================================
|
|
# DOCUMENTATION
|
|
# =========================================================
|
|
docs:
|
|
build:
|
|
context: .
|
|
dockerfile: docs-src/Dockerfile
|
|
container_name: bp-compliance-docs
|
|
profiles: [docs]
|
|
platform: linux/arm64
|
|
expose:
|
|
- "80"
|
|
healthcheck:
|
|
test: ["CMD", "wget", "-q", "--spider", "http://127.0.0.1:80/"]
|
|
interval: 30s
|
|
timeout: 10s
|
|
retries: 3
|
|
restart: unless-stopped
|
|
networks:
|
|
- breakpilot-network
|