Files
breakpilot-compliance/backend-compliance/compliance/services/banner_ab_service.py
T
Benjamin Admin 965af3a34c feat: A/B Testing + Compliance Report PDF (F5 + F8)
F5: A/B Testing for Consent Rate
- Migration 116: banner_variants table + variant tracking in audit log
- BannerABService: deterministic sticky bucketing via device hash,
  chi-squared significance testing, variant CRUD
- banner_ab_routes: 6 endpoints (CRUD + stats + assign)
- ABTestPanel.tsx: variant creation, traffic sliders, opt-in comparison
  chart with winner/significance badges
- New "A/B-Test" tab in cookie-banner page

F8: Compliance Report PDF
- CompliancePDFGenerator: reportlab-based A4 PDF covering all modules
  (Company Profile, TOM, VVT, DSFA, Risks, Vendors, Incidents,
  Reviews, Consents, Roles)
- compliance_report_routes: GET /compliance/report/pdf
- "Compliance-Report herunterladen" button on SDK dashboard

[migration-approved]

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-05-03 21:42:50 +02:00

194 lines
8.0 KiB
Python

"""
Banner A/B Testing Service — variant assignment, stats, significance.
Deterministic variant assignment via device fingerprint hash ensures
the same device always sees the same variant (sticky bucketing).
"""
import hashlib
import math
import uuid
from datetime import datetime, timezone
from typing import Any, Optional
from sqlalchemy import text
from sqlalchemy.orm import Session
class BannerABService:
"""A/B testing for consent banner variants."""
def __init__(self, db: Session) -> None:
self.db = db
# ------------------------------------------------------------------
# Variant CRUD
# ------------------------------------------------------------------
def list_variants(self, tenant_id: str, site_config_id: str) -> list[dict]:
q = text("""
SELECT * FROM compliance_banner_variants
WHERE tenant_id = :tid AND site_config_id = :scid
ORDER BY variant_key
""")
rows = self.db.execute(q, {"tid": tenant_id, "scid": site_config_id}).fetchall()
return [dict(r._mapping) for r in rows]
def create_variant(self, tenant_id: str, site_config_id: str, data: dict) -> dict:
q = text("""
INSERT INTO compliance_banner_variants
(tenant_id, site_config_id, variant_name, variant_key, traffic_percent, is_control,
banner_title, banner_description, position, style, primary_color, show_decline_all, theme_overrides)
VALUES (:tid, :scid, :name, :key, :pct, :ctrl,
:title, :desc, :pos, :style, :color, :decline, :theme)
RETURNING *
""")
row = self.db.execute(q, {
"tid": tenant_id, "scid": site_config_id,
"name": data.get("variant_name", ""),
"key": data.get("variant_key", "A"),
"pct": data.get("traffic_percent", 50),
"ctrl": data.get("is_control", False),
"title": data.get("banner_title"),
"desc": data.get("banner_description"),
"pos": data.get("position"),
"style": data.get("style"),
"color": data.get("primary_color"),
"decline": data.get("show_decline_all"),
"theme": data.get("theme_overrides", "{}"),
}).fetchone()
self.db.commit()
return dict(row._mapping)
def update_variant(self, variant_id: str, data: dict) -> Optional[dict]:
sets, params = [], {"vid": variant_id}
for field in ["variant_name", "traffic_percent", "is_control", "banner_title",
"banner_description", "position", "style", "primary_color",
"show_decline_all", "is_active"]:
if field in data and data[field] is not None:
sets.append(f"{field} = :{field}")
params[field] = data[field]
if not sets:
return None
sets.append("updated_at = NOW()")
q = text(f"UPDATE compliance_banner_variants SET {', '.join(sets)} WHERE id = :vid RETURNING *")
row = self.db.execute(q, params).fetchone()
self.db.commit()
return dict(row._mapping) if row else None
def delete_variant(self, variant_id: str) -> bool:
q = text("DELETE FROM compliance_banner_variants WHERE id = :vid")
result = self.db.execute(q, {"vid": variant_id})
self.db.commit()
return result.rowcount > 0
# ------------------------------------------------------------------
# Variant Assignment (deterministic sticky bucketing)
# ------------------------------------------------------------------
def assign_variant(self, site_config_id: str, device_fingerprint: str) -> Optional[dict]:
"""Assign a variant based on device fingerprint hash. Returns variant or None."""
variants = self.db.execute(text("""
SELECT * FROM compliance_banner_variants
WHERE site_config_id = :scid AND is_active = TRUE
ORDER BY variant_key
"""), {"scid": site_config_id}).fetchall()
if not variants:
return None
# Deterministic bucket 0-99 from device fingerprint
bucket = int(hashlib.md5(f"{site_config_id}:{device_fingerprint}".encode()).hexdigest(), 16) % 100
cumulative = 0
for v in variants:
cumulative += v.traffic_percent
if bucket < cumulative:
return dict(v._mapping)
# Fallback to last variant
return dict(variants[-1]._mapping)
# ------------------------------------------------------------------
# Stats with statistical significance
# ------------------------------------------------------------------
def get_variant_stats(self, tenant_id: str, site_config_id: str) -> list[dict]:
"""Per-variant stats with chi-squared significance test."""
variants = self.list_variants(tenant_id, site_config_id)
if not variants:
return []
results = []
for v in variants:
vid = str(v["id"])
vkey = v["variant_key"]
q = text("""
SELECT
COUNT(*) AS total,
COUNT(*) FILTER (WHERE action = 'consent_given') AS accepted,
COUNT(*) FILTER (WHERE action IN ('consent_withdrawn', 'consent_revoked')) AS rejected
FROM compliance_banner_consent_audit_log
WHERE tenant_id = :tid AND variant_key = :vkey
""")
row = self.db.execute(q, {"tid": tenant_id, "vkey": vkey}).fetchone()
total = row.total if row else 0
accepted = row.accepted if row else 0
results.append({
"variant_id": vid,
"variant_key": vkey,
"variant_name": v["variant_name"],
"traffic_percent": v["traffic_percent"],
"is_control": v["is_control"],
"total": total,
"accepted": accepted,
"opt_in_rate": round(accepted / total * 100, 1) if total > 0 else 0,
})
# Chi-squared test between control and best variant
control = next((r for r in results if r["is_control"]), None)
if control and len(results) > 1:
best = max((r for r in results if not r["is_control"]), key=lambda x: x["opt_in_rate"], default=None)
if best and control["total"] > 0 and best["total"] > 0:
sig = self._chi_squared_significance(
control["accepted"], control["total"],
best["accepted"], best["total"],
)
best["is_winner"] = sig > 0.95
best["significance"] = round(sig * 100, 1)
control["is_winner"] = False
control["significance"] = round((1 - sig) * 100, 1)
return results
@staticmethod
def _chi_squared_significance(a_success: int, a_total: int, b_success: int, b_total: int) -> float:
"""Simple chi-squared test for 2x2 contingency table. Returns confidence 0-1."""
a_fail = a_total - a_success
b_fail = b_total - b_success
n = a_total + b_total
if n == 0:
return 0.0
# Expected values
exp_a_s = a_total * (a_success + b_success) / n
exp_a_f = a_total * (a_fail + b_fail) / n
exp_b_s = b_total * (a_success + b_success) / n
exp_b_f = b_total * (a_fail + b_fail) / n
chi2 = 0.0
for obs, exp in [(a_success, exp_a_s), (a_fail, exp_a_f), (b_success, exp_b_s), (b_fail, exp_b_f)]:
if exp > 0:
chi2 += (obs - exp) ** 2 / exp
# Approximate p-value for 1 df using Wilson-Hilferty
if chi2 < 0.001:
return 0.0
if chi2 > 10.83:
return 0.999
# Lookup table for common thresholds (1 df)
thresholds = [(2.706, 0.90), (3.841, 0.95), (5.024, 0.975), (6.635, 0.99), (10.83, 0.999)]
confidence = 0.0
for threshold, conf in thresholds:
if chi2 >= threshold:
confidence = conf
return confidence