fix(cookie): gate non-COOKIE_POLICY controls out of the cookie-policy scan
The cookie agent loaded 100 controls, 11 of which have no COOKIE_POLICY in applicable_artifacts -- Security/TOM/Audit (PROCESS) or Banner-behaviour (BEHAVIOR) controls that produce nonsense findings against a cookie policy (e.g. "TOMs not documented"). Add a cookie classification gate (analogous to the DSE gate, keyed on COOKIE_POLICY, without the needs_review carve-out since the artifact signal is decisive and the set is inventory-verified). Controls are routed out, not deleted. Effect vs Opus-GT: FP 16->11, FN 179->159; the remaining FN=159 over-rescue is a separate (judge/criteria) question, not routing. Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
+78
@@ -0,0 +1,78 @@
|
|||||||
|
"""Applicability-Gate fuer den Cookie-Policy-Scan.
|
||||||
|
|
||||||
|
Schliesst Controls aus dem Cookie-Findings-Scan aus, die laut
|
||||||
|
`compliance.control_classification` NICHT gegen eine Cookie-Policy laufen
|
||||||
|
('COOKIE_POLICY' nicht in applicable_artifacts). Diese gehoeren zu einem
|
||||||
|
anderen Artefakt/Pruefer — Banner (BEHAVIOR/Playwright), Security/TOM/Audit
|
||||||
|
(PROCESS) — und erzeugen sonst Unsinn-Findings (z.B. 'TOMs nicht dokumentiert'
|
||||||
|
gegen eine Cookie-Richtlinie). Sie werden NICHT geloescht, sondern als
|
||||||
|
Routing-Liste zurueckgegeben.
|
||||||
|
|
||||||
|
Anders als das DSE-Gate OHNE needs_review-Ausnahme: das Artefakt-Signal ist
|
||||||
|
hier entscheidend und per Inventar (2026-06-21) belegt; die mis-scopeten 11
|
||||||
|
sind geprueft. Fail-safe: fehlt die Tabelle / DB nicht erreichbar -> leeres
|
||||||
|
Dict -> es wird NICHT gefiltert (kein stiller Recall-Verlust).
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import logging
|
||||||
|
import os
|
||||||
|
from typing import Any
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
async def load_cookie_gate(db_url: str = "") -> dict[str, dict[str, Any]]:
|
||||||
|
"""Liefert {control_id: meta} fuer Controls, die aus dem Cookie-Findings-
|
||||||
|
Scan auszuschliessen sind (kein COOKIE_POLICY-Artefakt). Leeres Dict =
|
||||||
|
kein Filter."""
|
||||||
|
dsn = (db_url or os.getenv("DATABASE_URL")
|
||||||
|
or os.getenv("COMPLIANCE_DATABASE_URL") or "")
|
||||||
|
if not dsn:
|
||||||
|
return {}
|
||||||
|
try:
|
||||||
|
import asyncpg
|
||||||
|
conn = await asyncpg.connect(dsn)
|
||||||
|
try:
|
||||||
|
rows = await conn.fetch(
|
||||||
|
"""SELECT control_id, obligation_type, check_intent,
|
||||||
|
applicable_artifacts
|
||||||
|
FROM compliance.control_classification
|
||||||
|
WHERE is_active
|
||||||
|
AND NOT ('COOKIE_POLICY' = ANY(applicable_artifacts))""")
|
||||||
|
finally:
|
||||||
|
await conn.close()
|
||||||
|
except Exception as e: # Tabelle fehlt / DB weg -> kein Filter
|
||||||
|
logger.info("cookie classification gate inaktiv: %s", str(e)[:90])
|
||||||
|
return {}
|
||||||
|
return {
|
||||||
|
r["control_id"]: {
|
||||||
|
"obligation_type": r["obligation_type"],
|
||||||
|
"check_intent": r["check_intent"],
|
||||||
|
"applicable_artifacts": list(r["applicable_artifacts"] or []),
|
||||||
|
}
|
||||||
|
for r in rows if r["control_id"]
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def apply_gate(
|
||||||
|
controls: list[dict[str, Any]],
|
||||||
|
gate: dict[str, dict[str, Any]],
|
||||||
|
) -> tuple[list[dict[str, Any]], list[dict[str, Any]]]:
|
||||||
|
"""Teilt geladene Controls in (kept, routed_out).
|
||||||
|
|
||||||
|
kept: laufen normal durch den Cookie-Scan.
|
||||||
|
routed_out: aus dem Scan genommen (control_id + title + Klassifikations-
|
||||||
|
Metadaten fuer das Routing zu Banner/Security/Audit).
|
||||||
|
"""
|
||||||
|
kept: list[dict[str, Any]] = []
|
||||||
|
routed_out: list[dict[str, Any]] = []
|
||||||
|
for c in controls:
|
||||||
|
cid = c.get("control_id")
|
||||||
|
meta = gate.get(cid) if cid else None
|
||||||
|
if meta:
|
||||||
|
routed_out.append({"control_id": cid, "title": c.get("title"), **meta})
|
||||||
|
else:
|
||||||
|
kept.append(c)
|
||||||
|
return kept, routed_out
|
||||||
@@ -45,6 +45,15 @@ async def run_v3_pipeline(
|
|||||||
controls = []
|
controls = []
|
||||||
_normalize_criteria(controls)
|
_normalize_criteria(controls)
|
||||||
controls, sector_dropped = _filter_sector(controls, business_scope)
|
controls, sector_dropped = _filter_sector(controls, business_scope)
|
||||||
|
# Artefakt-Gate: Controls ohne COOKIE_POLICY-Artefakt (Security/TOM/Audit,
|
||||||
|
# Banner) raus — sie gehoeren zu anderem Pruefer/Artefakt und erzeugen sonst
|
||||||
|
# Unsinn-Findings. Siehe _classification_gate.
|
||||||
|
routed_out: list[dict[str, Any]] = []
|
||||||
|
try:
|
||||||
|
from ._classification_gate import apply_gate, load_cookie_gate
|
||||||
|
controls, routed_out = apply_gate(controls, await load_cookie_gate(db_url))
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning("cookie classification gate skipped: %s", e)
|
||||||
results: list[dict[str, Any]] = []
|
results: list[dict[str, Any]] = []
|
||||||
if controls:
|
if controls:
|
||||||
try:
|
try:
|
||||||
@@ -111,6 +120,7 @@ async def run_v3_pipeline(
|
|||||||
"layer_0_boost_overrides": boost_overrides,
|
"layer_0_boost_overrides": boost_overrides,
|
||||||
"total_mcs": len(results),
|
"total_mcs": len(results),
|
||||||
"sector_dropped": sector_dropped,
|
"sector_dropped": sector_dropped,
|
||||||
|
"artifact_gated": len(routed_out),
|
||||||
}
|
}
|
||||||
return results, telemetry
|
return results, telemetry
|
||||||
|
|
||||||
|
|||||||
@@ -0,0 +1,42 @@
|
|||||||
|
"""Tests for the cookie-policy applicability gate: controls without a
|
||||||
|
COOKIE_POLICY artifact are routed out of the findings scan (not deleted),
|
||||||
|
and the gate is fail-safe (no DSN -> no filter)."""
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
from compliance.services.specialist_agents.cookie_policy._classification_gate import (
|
||||||
|
apply_gate, load_cookie_gate,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def test_apply_gate_splits_kept_and_routed():
|
||||||
|
controls = [
|
||||||
|
{"control_id": "COOK-1", "title": "Kategorien"},
|
||||||
|
{"control_id": "TOM-1", "title": "Verschlüsselung"},
|
||||||
|
{"control_id": "BAN-1", "title": "Consent vor Setzen"},
|
||||||
|
]
|
||||||
|
gate = {
|
||||||
|
"TOM-1": {"obligation_type": "TECHNICAL", "check_intent": "DIRECT_TECHNICAL",
|
||||||
|
"applicable_artifacts": ["TOM", "AUDIT"]},
|
||||||
|
"BAN-1": {"obligation_type": "TECHNICAL", "check_intent": "DIRECT_TECHNICAL",
|
||||||
|
"applicable_artifacts": ["COOKIE_BANNER", "SYSTEMSCAN"]},
|
||||||
|
}
|
||||||
|
kept, routed = apply_gate(controls, gate)
|
||||||
|
assert [c["control_id"] for c in kept] == ["COOK-1"]
|
||||||
|
assert {c["control_id"] for c in routed} == {"TOM-1", "BAN-1"}
|
||||||
|
# routed entries carry title + classification metadata for downstream routing
|
||||||
|
tom = next(c for c in routed if c["control_id"] == "TOM-1")
|
||||||
|
assert tom["title"] == "Verschlüsselung"
|
||||||
|
assert tom["applicable_artifacts"] == ["TOM", "AUDIT"]
|
||||||
|
|
||||||
|
|
||||||
|
def test_apply_gate_empty_gate_keeps_all():
|
||||||
|
controls = [{"control_id": "A"}, {"control_id": "B"}]
|
||||||
|
kept, routed = apply_gate(controls, {})
|
||||||
|
assert len(kept) == 2 and routed == []
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_load_cookie_gate_no_dsn_is_failsafe(monkeypatch):
|
||||||
|
monkeypatch.delenv("DATABASE_URL", raising=False)
|
||||||
|
monkeypatch.delenv("COMPLIANCE_DATABASE_URL", raising=False)
|
||||||
|
assert await load_cookie_gate("") == {}
|
||||||
Reference in New Issue
Block a user