Compare commits

..

2 Commits

Author SHA1 Message Date
Benjamin Admin 4bfb438c92 feat: 4 banner check upgrades — 30 CMPs, stealth, Shadow DOM, categories
Build + Deploy / build-admin-compliance (push) Successful in 2m17s
Build + Deploy / build-backend-compliance (push) Successful in 3m17s
Build + Deploy / build-ai-sdk (push) Successful in 56s
Build + Deploy / build-developer-portal (push) Successful in 1m37s
Build + Deploy / build-tts (push) Successful in 1m33s
Build + Deploy / build-document-crawler (push) Successful in 42s
Build + Deploy / build-dsms-gateway (push) Successful in 33s
Build + Deploy / build-dsms-node (push) Successful in 16s
CI / branch-name (push) Has been skipped
CI / guardrail-integrity (push) Has been skipped
CI / loc-budget (push) Failing after 25s
CI / secret-scan (push) Has been skipped
CI / go-lint (push) Has been skipped
CI / python-lint (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / nodejs-build (push) Successful in 3m33s
CI / dep-audit (push) Has been skipped
CI / sbom-scan (push) Has been skipped
CI / test-go (push) Failing after 1m18s
CI / test-python-backend (push) Successful in 53s
CI / test-python-document-crawler (push) Successful in 36s
CI / test-python-dsms-gateway (push) Successful in 33s
CI / validate-canonical-controls (push) Successful in 24s
Build + Deploy / trigger-orca (push) Successful in 3m19s
1. 30 CMP selectors (was 10): Added Sourcepoint, Iubenda, Complianz,
   CookieFirst, HubSpot, Osano, Piwik PRO, Cookie Consent (Insites),
   Axeptio, Termly, CookieScript, Civic UK, GDPR Cookie Compliance,
   CookieHub, Ketch, Admiral, Sibbo, Evidon, LiveRamp, Adsimple.
   Plus improved generic fallback: role=dialog, aria-label, data-* attrs.

2. Playwright stealth mode: playwright-stealth against bot detection.
   Removes WebDriver flag, simulates plugins, realistic viewport/locale.
   Launch args: --disable-blink-features=AutomationControlled.

3. Shadow DOM: Recursive JS-based search through shadowRoot elements
   for consent banners. Fallback click via page.evaluate() when
   normal Playwright selectors can't penetrate Shadow DOM.

4. Category selection UI: User can choose which cookie categories to
   test (Notwendig, Statistik, Marketing, Funktional, Praeferenzen).
   Pill-style checkboxes in BannerCheckTab, forwarded through API chain.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-05-09 08:42:30 +02:00
Benjamin Admin 0371eecc03 fix: Struktureller Fix — Maschinentyp-Filter fuer Keywords + Patterns
PROBLEM: Cobot-Projekt hatte 52 Pressen-Hazards weil Keywords wie
"stempel" und "stoessel" ohne Maschinentyp-Kontext matchten.

FIX an 3 Stellen:
1. KeywordEntry.MachineTypes — Pressen-Keywords nur fuer press/*_press
2. ParseNarrative(text, machineType) — Parser laedt Maschinentyp aus Projekt
3. HazardPattern.MachineTypes — Pressen-Patterns (HP045-HP058) nur fuer Pressen

Verhindert zukuenftig falsche Zuordnungen bei neuen Kundenprojekten.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-05-09 08:30:45 +02:00
12 changed files with 574 additions and 72 deletions
@@ -11,7 +11,7 @@ const BACKEND_URL = process.env.BACKEND_API_URL || 'http://backend-compliance:80
export async function POST(request: NextRequest) { export async function POST(request: NextRequest) {
try { try {
const body = await request.json() const body = await request.json()
const { url } = body const { url, categories = [] } = body
if (!url) { if (!url) {
return NextResponse.json({ error: 'URL erforderlich' }, { status: 400 }) return NextResponse.json({ error: 'URL erforderlich' }, { status: 400 })
@@ -21,7 +21,7 @@ export async function POST(request: NextRequest) {
const response = await fetch(`${BACKEND_URL}/api/compliance/agent/banner-check`, { const response = await fetch(`${BACKEND_URL}/api/compliance/agent/banner-check`, {
method: 'POST', method: 'POST',
headers: { 'Content-Type': 'application/json' }, headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({ url }), body: JSON.stringify({ url, categories }),
signal: AbortSignal.timeout(120000), // 2 min for Playwright signal: AbortSignal.timeout(120000), // 2 min for Playwright
}) })
@@ -33,12 +33,34 @@ interface BannerResult {
} }
} }
const CATEGORIES = [
{ id: 'all', label: 'Alle Kategorien' },
{ id: 'necessary', label: 'Notwendig' },
{ id: 'statistics', label: 'Statistik' },
{ id: 'marketing', label: 'Marketing' },
{ id: 'functional', label: 'Funktional' },
{ id: 'preferences', label: 'Praeferenzen' },
]
export function BannerCheckTab() { export function BannerCheckTab() {
const [url, setUrl] = useState('') const [url, setUrl] = useState('')
const [loading, setLoading] = useState(false) const [loading, setLoading] = useState(false)
const [progress, setProgress] = useState('') const [progress, setProgress] = useState('')
const [error, setError] = useState<string | null>(null) const [error, setError] = useState<string | null>(null)
const [result, setResult] = useState<BannerResult | null>(null) const [result, setResult] = useState<BannerResult | null>(null)
const [categories, setCategories] = useState<string[]>(['all'])
const toggleCategory = (id: string) => {
if (id === 'all') {
setCategories(['all'])
return
}
setCategories(prev => {
const without = prev.filter(c => c !== 'all' && c !== id)
const next = prev.includes(id) ? without : [...without, id]
return next.length === 0 ? ['all'] : next
})
}
const handleScan = async (e: React.FormEvent) => { const handleScan = async (e: React.FormEvent) => {
e.preventDefault() e.preventDefault()
@@ -49,11 +71,16 @@ export function BannerCheckTab() {
setResult(null) setResult(null)
setProgress('Cookie-Banner wird analysiert...') setProgress('Cookie-Banner wird analysiert...')
// 'all' selected = empty array (test everything)
const selectedCategories = categories.includes('all')
? []
: categories
try { try {
const res = await fetch('/api/sdk/v1/agent/banner-check', { const res = await fetch('/api/sdk/v1/agent/banner-check', {
method: 'POST', method: 'POST',
headers: { 'Content-Type': 'application/json' }, headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({ url: url.trim() }), body: JSON.stringify({ url: url.trim(), categories: selectedCategories }),
}) })
if (!res.ok) throw new Error(`Fehler: ${res.status}`) if (!res.ok) throw new Error(`Fehler: ${res.status}`)
const data = await res.json() const data = await res.json()
@@ -94,22 +121,55 @@ export function BannerCheckTab() {
</p> </p>
</div> </div>
<form onSubmit={handleScan} className="flex gap-3"> <form onSubmit={handleScan} className="space-y-3">
<input <div className="flex gap-3">
type="url" value={url} onChange={e => setUrl(e.target.value)} <input
placeholder="https://www.example.com/" type="url" value={url} onChange={e => setUrl(e.target.value)}
className="flex-1 px-4 py-3 border border-gray-300 rounded-lg focus:ring-2 focus:ring-purple-500 focus:border-transparent text-sm" placeholder="https://www.example.com/"
disabled={loading} required className="flex-1 px-4 py-3 border border-gray-300 rounded-lg focus:ring-2 focus:ring-purple-500 focus:border-transparent text-sm"
/> disabled={loading} required
<button type="submit" disabled={loading || !url.trim()} />
className="px-6 py-3 bg-purple-600 text-white rounded-lg hover:bg-purple-700 disabled:opacity-50 transition-colors flex items-center gap-2 text-sm font-medium"> <button type="submit" disabled={loading || !url.trim()}
{loading ? ( className="px-6 py-3 bg-purple-600 text-white rounded-lg hover:bg-purple-700 disabled:opacity-50 transition-colors flex items-center gap-2 text-sm font-medium whitespace-nowrap">
<><svg className="animate-spin w-4 h-4" fill="none" viewBox="0 0 24 24"> {loading ? (
<circle className="opacity-25" cx="12" cy="12" r="10" stroke="currentColor" strokeWidth="4" /> <><svg className="animate-spin w-4 h-4" fill="none" viewBox="0 0 24 24">
<path className="opacity-75" fill="currentColor" d="M4 12a8 8 0 018-8V0C5.373 0 0 5.373 0 12h4z" /> <circle className="opacity-25" cx="12" cy="12" r="10" stroke="currentColor" strokeWidth="4" />
</svg>Pruefe...</> <path className="opacity-75" fill="currentColor" d="M4 12a8 8 0 018-8V0C5.373 0 0 5.373 0 12h4z" />
) : 'Banner pruefen'} </svg>Pruefe...</>
</button> ) : 'Banner pruefen'}
</button>
</div>
<div className="flex flex-wrap gap-2">
{CATEGORIES.map(cat => (
<label key={cat.id}
className={`inline-flex items-center gap-1.5 px-3 py-1.5 rounded-full text-xs font-medium cursor-pointer border transition-colors ${
categories.includes(cat.id)
? 'bg-purple-100 border-purple-300 text-purple-800'
: 'bg-gray-50 border-gray-200 text-gray-600 hover:bg-gray-100'
}`}
>
<input
type="checkbox"
checked={categories.includes(cat.id)}
onChange={() => toggleCategory(cat.id)}
className="sr-only"
/>
<span className={`w-3 h-3 rounded-sm border flex items-center justify-center ${
categories.includes(cat.id)
? 'bg-purple-600 border-purple-600'
: 'border-gray-400'
}`}>
{categories.includes(cat.id) && (
<svg className="w-2 h-2 text-white" fill="currentColor" viewBox="0 0 12 12">
<path d="M10 3L4.5 8.5 2 6" stroke="currentColor" strokeWidth="2" fill="none" strokeLinecap="round" strokeLinejoin="round" />
</svg>
)}
</span>
{cat.label}
</label>
))}
</div>
</form> </form>
{progress && ( {progress && (
@@ -6,6 +6,7 @@ import (
"github.com/breakpilot/ai-compliance-sdk/internal/iace" "github.com/breakpilot/ai-compliance-sdk/internal/iace"
"github.com/gin-gonic/gin" "github.com/gin-gonic/gin"
"github.com/google/uuid"
) )
// ParseNarrativeRequest is the request body for POST /projects/:id/parse-narrative. // ParseNarrativeRequest is the request body for POST /projects/:id/parse-narrative.
@@ -43,8 +44,16 @@ func (h *IACEHandler) ParseNarrative(c *gin.Context) {
return return
} }
// 1. Parse narrative text deterministically // Load project to get machine type for context-aware parsing
parseResult := iace.ParseNarrative(req.NarrativeText) var machineType string
if projectID, err := uuid.Parse(c.Param("id")); err == nil {
if project, err := h.store.GetProject(c.Request.Context(), projectID); err == nil && project != nil {
machineType = project.MachineType
}
}
// 1. Parse narrative text deterministically (machine-type-aware)
parseResult := iace.ParseNarrative(req.NarrativeText, machineType)
// 2. Feed parsed tags into pattern engine // 2. Feed parsed tags into pattern engine
// Collect all component IDs for tag resolution // Collect all component IDs for tag resolution
@@ -27,4 +27,9 @@ type HazardPattern struct {
ZoneDE string `json:"zone_de,omitempty"` // Gefahrstelle/Zone ZoneDE string `json:"zone_de,omitempty"` // Gefahrstelle/Zone
DefaultSeverity int `json:"default_severity,omitempty"` // 1-5 DefaultSeverity int `json:"default_severity,omitempty"` // 1-5
DefaultExposure int `json:"default_exposure,omitempty"` // 1-5 DefaultExposure int `json:"default_exposure,omitempty"` // 1-5
// MachineTypes restricts this pattern to specific machine types.
// Empty = fires for all machine types. If set, only fires when the
// project's machine_type is in this list. Prevents e.g. press-specific
// patterns from firing for a cobot project.
MachineTypes []string `json:"machine_types,omitempty"`
} }
@@ -23,7 +23,7 @@ func GetPressHazardPatterns() []HazardPattern {
HarmDE: "Toedliche Quetschverletzung, Amputation von Gliedmassen.", HarmDE: "Toedliche Quetschverletzung, Amputation von Gliedmassen.",
AffectedDE: "Einrichter, Bedienpersonal im Werkzeugeinbauraum.", AffectedDE: "Einrichter, Bedienpersonal im Werkzeugeinbauraum.",
ZoneDE: "Werkzeugeinbauraum unterhalb des Stoessels.", ZoneDE: "Werkzeugeinbauraum unterhalb des Stoessels.",
DefaultSeverity: 5, DefaultExposure: 2, DefaultSeverity: 5, DefaultExposure: 2, MachineTypes: []string{"press", "hydraulic_press", "mechanical_press"},
}, },
{ {
ID: "HP046", NameDE: "Quetschen im Werkzeugeinbauraum", NameEN: "Crushing in die space", ID: "HP046", NameDE: "Quetschen im Werkzeugeinbauraum", NameEN: "Crushing in die space",
@@ -38,7 +38,7 @@ func GetPressHazardPatterns() []HazardPattern {
HarmDE: "Toedliche Quetschverletzung, Amputation der oberen Extremitaeten.", HarmDE: "Toedliche Quetschverletzung, Amputation der oberen Extremitaeten.",
AffectedDE: "Einrichter, Werkzeugbauer, Instandhaltungspersonal.", AffectedDE: "Einrichter, Werkzeugbauer, Instandhaltungspersonal.",
ZoneDE: "Werkzeugeinbauraum zwischen Ober- und Unterwerkzeug.", ZoneDE: "Werkzeugeinbauraum zwischen Ober- und Unterwerkzeug.",
DefaultSeverity: 5, DefaultExposure: 3, DefaultSeverity: 5, DefaultExposure: 3, MachineTypes: []string{"press", "hydraulic_press", "mechanical_press"},
}, },
{ {
ID: "HP047", NameDE: "Oelnebelexposition Atemwege", NameEN: "Oil mist inhalation exposure", ID: "HP047", NameDE: "Oelnebelexposition Atemwege", NameEN: "Oil mist inhalation exposure",
@@ -53,7 +53,7 @@ func GetPressHazardPatterns() []HazardPattern {
HarmDE: "Atemwegsreizung, chronische Lungenerkrankung bei Langzeitexposition.", HarmDE: "Atemwegsreizung, chronische Lungenerkrankung bei Langzeitexposition.",
AffectedDE: "Bedienpersonal, Personen im Nahbereich der Presse.", AffectedDE: "Bedienpersonal, Personen im Nahbereich der Presse.",
ZoneDE: "Arbeitsbereich rund um die Presse, insbesondere Bedienerseite.", ZoneDE: "Arbeitsbereich rund um die Presse, insbesondere Bedienerseite.",
DefaultSeverity: 3, DefaultExposure: 4, DefaultSeverity: 3, DefaultExposure: 4, MachineTypes: []string{"press", "hydraulic_press", "mechanical_press"},
}, },
{ {
ID: "HP048", NameDE: "Verbrennung durch heisse Werkstuecke", NameEN: "Burns from hot workpieces", ID: "HP048", NameDE: "Verbrennung durch heisse Werkstuecke", NameEN: "Burns from hot workpieces",
@@ -68,7 +68,7 @@ func GetPressHazardPatterns() []HazardPattern {
HarmDE: "Verbrennungen zweiten oder dritten Grades an Haenden und Unterarmen.", HarmDE: "Verbrennungen zweiten oder dritten Grades an Haenden und Unterarmen.",
AffectedDE: "Bedienpersonal, Einrichter bei Werkzeugwechsel.", AffectedDE: "Bedienpersonal, Einrichter bei Werkzeugwechsel.",
ZoneDE: "Entnahmebereich, Werkzeugeinbauraum, Ablagetisch.", ZoneDE: "Entnahmebereich, Werkzeugeinbauraum, Ablagetisch.",
DefaultSeverity: 4, DefaultExposure: 3, DefaultSeverity: 4, DefaultExposure: 3, MachineTypes: []string{"press", "hydraulic_press", "mechanical_press"},
}, },
{ {
ID: "HP049", NameDE: "Schwebende Last (Hubwerk/Aufzug)", NameEN: "Suspended load (hoist/elevator)", ID: "HP049", NameDE: "Schwebende Last (Hubwerk/Aufzug)", NameEN: "Suspended load (hoist/elevator)",
@@ -83,7 +83,7 @@ func GetPressHazardPatterns() []HazardPattern {
HarmDE: "Toedliche Verletzung durch herabfallende Last, Knochenbrueche.", HarmDE: "Toedliche Verletzung durch herabfallende Last, Knochenbrueche.",
AffectedDE: "Personen im Gefahrenbereich unter der schwebenden Last.", AffectedDE: "Personen im Gefahrenbereich unter der schwebenden Last.",
ZoneDE: "Bereich unterhalb des Hubwerks, Werkzeugwechselzone.", ZoneDE: "Bereich unterhalb des Hubwerks, Werkzeugwechselzone.",
DefaultSeverity: 5, DefaultExposure: 2, DefaultSeverity: 5, DefaultExposure: 2, MachineTypes: []string{"press", "hydraulic_press", "mechanical_press"},
}, },
{ {
ID: "HP050", NameDE: "Einziehen/Scheren Transfersystem", NameEN: "Draw-in/shearing at transfer system", ID: "HP050", NameDE: "Einziehen/Scheren Transfersystem", NameEN: "Draw-in/shearing at transfer system",
@@ -98,7 +98,7 @@ func GetPressHazardPatterns() []HazardPattern {
HarmDE: "Schnittverletzungen, Amputation von Fingern, Quetschungen.", HarmDE: "Schnittverletzungen, Amputation von Fingern, Quetschungen.",
AffectedDE: "Bedienpersonal, Einrichter bei Stoerungsbeseitigung.", AffectedDE: "Bedienpersonal, Einrichter bei Stoerungsbeseitigung.",
ZoneDE: "Transferbereich zwischen den Pressenstationen.", ZoneDE: "Transferbereich zwischen den Pressenstationen.",
DefaultSeverity: 4, DefaultExposure: 3, DefaultSeverity: 4, DefaultExposure: 3, MachineTypes: []string{"press", "hydraulic_press", "mechanical_press"},
}, },
{ {
ID: "HP051", NameDE: "Sturzgefahr Auswurfbereich", NameEN: "Fall hazard at ejection area", ID: "HP051", NameDE: "Sturzgefahr Auswurfbereich", NameEN: "Fall hazard at ejection area",
@@ -114,7 +114,7 @@ func GetPressHazardPatterns() []HazardPattern {
HarmDE: "Knochenbrueche, Prellungen, Kopfverletzungen bei Sturz.", HarmDE: "Knochenbrueche, Prellungen, Kopfverletzungen bei Sturz.",
AffectedDE: "Bedienpersonal, Logistikmitarbeiter im Auswurfbereich.", AffectedDE: "Bedienpersonal, Logistikmitarbeiter im Auswurfbereich.",
ZoneDE: "Auswurfschacht und angrenzender Bodenbereich.", ZoneDE: "Auswurfschacht und angrenzender Bodenbereich.",
DefaultSeverity: 3, DefaultExposure: 4, DefaultSeverity: 3, DefaultExposure: 4, MachineTypes: []string{"press", "hydraulic_press", "mechanical_press"},
}, },
{ {
ID: "HP052", NameDE: "Druckfreisetzung Hydraulikspeicher", NameEN: "Pressure release from hydraulic accumulator", ID: "HP052", NameDE: "Druckfreisetzung Hydraulikspeicher", NameEN: "Pressure release from hydraulic accumulator",
@@ -129,7 +129,7 @@ func GetPressHazardPatterns() []HazardPattern {
HarmDE: "Schwere Schnittverletzungen durch Oelstrahl, Augenverletzungen, Verbrennungen.", HarmDE: "Schwere Schnittverletzungen durch Oelstrahl, Augenverletzungen, Verbrennungen.",
AffectedDE: "Instandhaltungspersonal, Hydraulik-Fachkraefte.", AffectedDE: "Instandhaltungspersonal, Hydraulik-Fachkraefte.",
ZoneDE: "Hydraulikaggregat, Speicherbereich, Leitungsfuehrung.", ZoneDE: "Hydraulikaggregat, Speicherbereich, Leitungsfuehrung.",
DefaultSeverity: 5, DefaultExposure: 2, DefaultSeverity: 5, DefaultExposure: 2, MachineTypes: []string{"press", "hydraulic_press", "mechanical_press"},
}, },
{ {
ID: "HP053", NameDE: "Impulslaerm Pressvorgang", NameEN: "Impact noise during press operation", ID: "HP053", NameDE: "Impulslaerm Pressvorgang", NameEN: "Impact noise during press operation",
@@ -144,7 +144,7 @@ func GetPressHazardPatterns() []HazardPattern {
HarmDE: "Laermschwerhoerigkeit, Tinnitus bei Langzeitexposition.", HarmDE: "Laermschwerhoerigkeit, Tinnitus bei Langzeitexposition.",
AffectedDE: "Bedienpersonal, Personen in angrenzenden Arbeitsbereichen.", AffectedDE: "Bedienpersonal, Personen in angrenzenden Arbeitsbereichen.",
ZoneDE: "Gesamter Pressenbereich, Radius ca. 5-10 m um die Maschine.", ZoneDE: "Gesamter Pressenbereich, Radius ca. 5-10 m um die Maschine.",
DefaultSeverity: 3, DefaultExposure: 5, DefaultSeverity: 3, DefaultExposure: 5, MachineTypes: []string{"press", "hydraulic_press", "mechanical_press"},
}, },
{ {
ID: "HP054", NameDE: "Schwungrad-Restenergie nach Abschaltung", NameEN: "Flywheel residual energy after shutdown", ID: "HP054", NameDE: "Schwungrad-Restenergie nach Abschaltung", NameEN: "Flywheel residual energy after shutdown",
@@ -159,7 +159,7 @@ func GetPressHazardPatterns() []HazardPattern {
HarmDE: "Erfassen durch rotierende Teile, schwere Schnittverletzungen, Skalpierung.", HarmDE: "Erfassen durch rotierende Teile, schwere Schnittverletzungen, Skalpierung.",
AffectedDE: "Instandhaltungspersonal, Einrichter nach Maschinenstopp.", AffectedDE: "Instandhaltungspersonal, Einrichter nach Maschinenstopp.",
ZoneDE: "Schwungradbereich, Kupplungsraum, Antriebsseite der Presse.", ZoneDE: "Schwungradbereich, Kupplungsraum, Antriebsseite der Presse.",
DefaultSeverity: 4, DefaultExposure: 2, DefaultSeverity: 4, DefaultExposure: 2, MachineTypes: []string{"press", "hydraulic_press", "mechanical_press"},
}, },
{ {
ID: "HP055", NameDE: "Umgehung Schutzeinrichtung (Pressentuer)", NameEN: "Bypass of safety guard (press door)", ID: "HP055", NameDE: "Umgehung Schutzeinrichtung (Pressentuer)", NameEN: "Bypass of safety guard (press door)",
@@ -174,7 +174,7 @@ func GetPressHazardPatterns() []HazardPattern {
HarmDE: "Toedliche Quetsch- oder Scherverletzungen bei Eingriff in den Gefahrenbereich.", HarmDE: "Toedliche Quetsch- oder Scherverletzungen bei Eingriff in den Gefahrenbereich.",
AffectedDE: "Bedienpersonal, Einrichter bei Stoerungsbeseitigung.", AffectedDE: "Bedienpersonal, Einrichter bei Stoerungsbeseitigung.",
ZoneDE: "Gesamter Werkzeugeinbauraum hinter der Schutztuer.", ZoneDE: "Gesamter Werkzeugeinbauraum hinter der Schutztuer.",
DefaultSeverity: 5, DefaultExposure: 3, DefaultSeverity: 5, DefaultExposure: 3, MachineTypes: []string{"press", "hydraulic_press", "mechanical_press"},
}, },
{ {
ID: "HP056", NameDE: "Fehlbedienung Zweihandschaltung", NameEN: "Two-hand control misoperation", ID: "HP056", NameDE: "Fehlbedienung Zweihandschaltung", NameEN: "Two-hand control misoperation",
@@ -189,7 +189,7 @@ func GetPressHazardPatterns() []HazardPattern {
HarmDE: "Quetschverletzungen der freien Hand im Werkzeugbereich.", HarmDE: "Quetschverletzungen der freien Hand im Werkzeugbereich.",
AffectedDE: "Bedienpersonal an der Pressenbedienung.", AffectedDE: "Bedienpersonal an der Pressenbedienung.",
ZoneDE: "Gefahrenbereich zwischen Ober- und Unterwerkzeug.", ZoneDE: "Gefahrenbereich zwischen Ober- und Unterwerkzeug.",
DefaultSeverity: 5, DefaultExposure: 3, DefaultSeverity: 5, DefaultExposure: 3, MachineTypes: []string{"press", "hydraulic_press", "mechanical_press"},
}, },
{ {
ID: "HP057", NameDE: "Hydraulikoelleckage + Rutschgefahr", NameEN: "Hydraulic oil leakage + slip hazard", ID: "HP057", NameDE: "Hydraulikoelleckage + Rutschgefahr", NameEN: "Hydraulic oil leakage + slip hazard",
@@ -204,7 +204,7 @@ func GetPressHazardPatterns() []HazardPattern {
HarmDE: "Sturzverletzungen durch Ausrutschen, Hautreizungen bei Hautkontakt.", HarmDE: "Sturzverletzungen durch Ausrutschen, Hautreizungen bei Hautkontakt.",
AffectedDE: "Bedienpersonal, Logistikmitarbeiter, alle Personen im Pressenbereich.", AffectedDE: "Bedienpersonal, Logistikmitarbeiter, alle Personen im Pressenbereich.",
ZoneDE: "Bodenbereich rund um das Hydraulikaggregat und unter der Presse.", ZoneDE: "Bodenbereich rund um das Hydraulikaggregat und unter der Presse.",
DefaultSeverity: 2, DefaultExposure: 4, DefaultSeverity: 2, DefaultExposure: 4, MachineTypes: []string{"press", "hydraulic_press", "mechanical_press"},
}, },
{ {
ID: "HP058", NameDE: "Ergonomische Belastung Kistenwechsel", NameEN: "Ergonomic strain during bin changeover", ID: "HP058", NameDE: "Ergonomische Belastung Kistenwechsel", NameEN: "Ergonomic strain during bin changeover",
@@ -219,7 +219,7 @@ func GetPressHazardPatterns() []HazardPattern {
HarmDE: "Rueckenverletzungen, Bandscheibenvorfall, Muskel-Skelett-Erkrankungen.", HarmDE: "Rueckenverletzungen, Bandscheibenvorfall, Muskel-Skelett-Erkrankungen.",
AffectedDE: "Bedienpersonal, Logistikmitarbeiter an der Presse.", AffectedDE: "Bedienpersonal, Logistikmitarbeiter an der Presse.",
ZoneDE: "Auswurfbereich, Palettenstellplatz neben der Presse.", ZoneDE: "Auswurfbereich, Palettenstellplatz neben der Presse.",
DefaultSeverity: 2, DefaultExposure: 5, DefaultSeverity: 2, DefaultExposure: 5, MachineTypes: []string{"press", "hydraulic_press", "mechanical_press"},
}, },
} }
} }
@@ -7,6 +7,10 @@ type KeywordEntry struct {
ComponentIDs []string // Matched component library IDs (C001-C135) ComponentIDs []string // Matched component library IDs (C001-C135)
EnergyIDs []string // Matched energy source IDs (EN01-EN20) EnergyIDs []string // Matched energy source IDs (EN01-EN20)
ExtraTags []string // Additional tags derived from keyword context ExtraTags []string // Additional tags derived from keyword context
// MachineTypes restricts this keyword to specific machine types.
// Empty = matches all machine types. If set, only matches when the
// project's machine_type is in this list.
MachineTypes []string // e.g. ["press", "hydraulic_press"]
} }
// GetKeywordDictionary returns the complete keyword dictionary for // GetKeywordDictionary returns the complete keyword dictionary for
@@ -14,13 +18,13 @@ type KeywordEntry struct {
// machinery terminology in German and English. // machinery terminology in German and English.
func GetKeywordDictionary() []KeywordEntry { func GetKeywordDictionary() []KeywordEntry {
return []KeywordEntry{ return []KeywordEntry{
// ── Pressen / Umformmaschinen ─────────────────────────────────── // ── Pressen / Umformmaschinen (NUR fuer press/hydraulic_press) ──
{Keywords: []string{"presse", "press", "umform", "umformung"}, ComponentIDs: []string{"C008", "C122"}, EnergyIDs: []string{"EN01"}, ExtraTags: []string{"high_force", "crush_point"}}, {Keywords: []string{"presse", "press", "umform", "umformung"}, ComponentIDs: []string{"C008", "C122"}, EnergyIDs: []string{"EN01"}, ExtraTags: []string{"high_force", "crush_point"}, MachineTypes: []string{"press", "hydraulic_press", "mechanical_press", "stamping_press"}},
{Keywords: []string{"kniehebel", "toggle"}, ComponentIDs: []string{"C121"}, ExtraTags: []string{"mechanical_transmission"}}, {Keywords: []string{"kniehebel", "toggle"}, ComponentIDs: []string{"C121"}, ExtraTags: []string{"mechanical_transmission"}, MachineTypes: []string{"press"}},
{Keywords: []string{"stossel", "stoessel", "ram", "slide"}, ComponentIDs: []string{"C122"}, EnergyIDs: []string{"EN01"}, ExtraTags: []string{"moving_part", "crush_point", "gravity_risk"}}, {Keywords: []string{"stossel", "stoessel", "ram", "slide"}, ComponentIDs: []string{"C122"}, EnergyIDs: []string{"EN01"}, ExtraTags: []string{"moving_part", "crush_point", "gravity_risk"}, MachineTypes: []string{"press", "hydraulic_press", "mechanical_press"}},
{Keywords: []string{"stempel", "punch", "matrize", "die"}, ComponentIDs: []string{"C126"}, ExtraTags: []string{"crush_point", "cutting_part"}}, {Keywords: []string{"stempel", "punch", "matrize", "die"}, ComponentIDs: []string{"C126"}, ExtraTags: []string{"crush_point", "cutting_part"}, MachineTypes: []string{"press", "hydraulic_press", "mechanical_press", "stamping_press"}},
{Keywords: []string{"schwungrad", "flywheel"}, ComponentIDs: []string{"C133"}, EnergyIDs: []string{"EN02", "EN03"}, ExtraTags: []string{"stored_energy", "rotating_part"}}, {Keywords: []string{"schwungrad", "flywheel"}, ComponentIDs: []string{"C133"}, EnergyIDs: []string{"EN02", "EN03"}, ExtraTags: []string{"stored_energy", "rotating_part"}, MachineTypes: []string{"press", "mechanical_press"}},
{Keywords: []string{"werkzeugeinbauraum", "die space"}, ComponentIDs: []string{"C132"}, ExtraTags: []string{"crush_point", "pinch_point"}}, {Keywords: []string{"werkzeugeinbauraum", "die space"}, ComponentIDs: []string{"C132"}, ExtraTags: []string{"crush_point", "pinch_point"}, MachineTypes: []string{"press", "hydraulic_press", "mechanical_press"}},
// ── Foerdertechnik ────────────────────────────────────────────── // ── Foerdertechnik ──────────────────────────────────────────────
{Keywords: []string{"foerderband", "transportband", "conveyor"}, ComponentIDs: []string{"C003"}, EnergyIDs: []string{"EN01", "EN02"}, ExtraTags: []string{"entanglement_risk"}}, {Keywords: []string{"foerderband", "transportband", "conveyor"}, ComponentIDs: []string{"C003"}, EnergyIDs: []string{"EN01", "EN02"}, ExtraTags: []string{"entanglement_risk"}},
@@ -94,7 +94,9 @@ var roleKeywords = map[string]string{
// ParseNarrative extracts components, energy sources, lifecycle phases, // ParseNarrative extracts components, energy sources, lifecycle phases,
// roles, and tags from a machine description text. Fully deterministic, // roles, and tags from a machine description text. Fully deterministic,
// no LLM required. // no LLM required.
func ParseNarrative(text string) ParseResult { // machineType is optional — if provided, keywords with MachineTypes
// restrictions are only matched when the machine type is in the list.
func ParseNarrative(text string, machineType ...string) ParseResult {
result := ParseResult{} result := ParseResult{}
if text == "" { if text == "" {
return result return result
@@ -122,7 +124,27 @@ func ParseNarrative(text string) ParseResult {
seenEnergy := make(map[string]bool) seenEnergy := make(map[string]bool)
tagSet := make(map[string]bool) tagSet := make(map[string]bool)
// Resolve machine type for filtering
var mType string
if len(machineType) > 0 {
mType = machineType[0]
}
for _, entry := range dictionary { for _, entry := range dictionary {
// Skip keywords restricted to other machine types
if len(entry.MachineTypes) > 0 && mType != "" {
matched := false
for _, mt := range entry.MachineTypes {
if mt == mType {
matched = true
break
}
}
if !matched {
continue // This keyword is for a different machine type
}
}
for _, kw := range entry.Keywords { for _, kw := range entry.Keywords {
kwNorm := strings.ToLower(kw) kwNorm := strings.ToLower(kw)
kwNorm = strings.ReplaceAll(kwNorm, "ä", "ae") kwNorm = strings.ReplaceAll(kwNorm, "ä", "ae")
@@ -94,6 +94,7 @@ class DocCheckStatusResponse(BaseModel):
class BannerCheckRequest(BaseModel): class BannerCheckRequest(BaseModel):
url: str url: str
categories: list[str] = [] # empty = test all categories
@router.post("/banner-check") @router.post("/banner-check")
@@ -103,7 +104,11 @@ async def run_banner_check(req: BannerCheckRequest):
async with httpx.AsyncClient(timeout=120.0) as client: async with httpx.AsyncClient(timeout=120.0) as client:
resp = await client.post( resp = await client.post(
f"{CONSENT_TESTER_URL}/scan", f"{CONSENT_TESTER_URL}/scan",
json={"url": req.url, "timeout_per_phase": 10}, json={
"url": req.url,
"timeout_per_phase": 10,
"categories": req.categories,
},
) )
if resp.status_code == 200: if resp.status_code == 200:
return resp.json() return resp.json()
+2 -1
View File
@@ -34,6 +34,7 @@ app.add_middleware(
class ScanRequest(BaseModel): class ScanRequest(BaseModel):
url: str url: str
timeout_per_phase: int = 10 # seconds to wait after page load timeout_per_phase: int = 10 # seconds to wait after page load
categories: list[str] = [] # empty = test all categories
class ScanResponse(BaseModel): class ScanResponse(BaseModel):
@@ -59,7 +60,7 @@ async def health():
async def scan_consent(req: ScanRequest): async def scan_consent(req: ScanRequest):
"""Run 3-phase consent test on a URL.""" """Run 3-phase consent test on a URL."""
logger.info("Starting consent test for %s", req.url) logger.info("Starting consent test for %s", req.url)
result = await run_consent_test(req.url, req.timeout_per_phase) result = await run_consent_test(req.url, req.timeout_per_phase, req.categories)
# Build raw response dict for structured check mapping # Build raw response dict for structured check mapping
phases = { phases = {
+1
View File
@@ -1,4 +1,5 @@
fastapi==0.115.12 fastapi==0.115.12
uvicorn==0.34.2 uvicorn==0.34.2
playwright==1.52.0 playwright==1.52.0
playwright-stealth==1.0.6
pydantic>=2.0 pydantic>=2.0
+347 -15
View File
@@ -1,12 +1,12 @@
""" """
Banner Detector identifies Consent Management Platforms and their buttons. Banner Detector identifies Consent Management Platforms and their buttons.
Supports 10+ CMPs with specific selectors + generic fallback. Supports 30 CMPs with specific selectors + generic fallback + Shadow DOM.
""" """
from dataclasses import dataclass from dataclasses import dataclass
from playwright.async_api import Page, Locator from playwright.async_api import Page
@dataclass @dataclass
@@ -79,6 +79,127 @@ CMP_SELECTORS = [
"accept": "#tarteaucitronPersonalize2", "accept": "#tarteaucitronPersonalize2",
"reject": "#tarteaucitronAllDenied2", "reject": "#tarteaucitronAllDenied2",
}, },
# --- 20 additional CMPs ---
{
"name": "Sourcepoint",
"detect": "div[id^='sp_message']",
"accept": ".sp_choice_type_11",
"reject": ".sp_choice_type_13",
},
{
"name": "Axeptio",
"detect": "#axeptio_widget",
"accept": "[data-ax='accept']",
"reject": "[data-ax='decline']",
},
{
"name": "Iubenda",
"detect": "#iubenda-cs-banner",
"accept": ".iubenda-cs-accept-btn",
"reject": ".iubenda-cs-reject-btn",
},
{
"name": "Termly",
"detect": "#termly-code-snippet-support",
"accept": "[data-tid='banner-accept']",
"reject": "[data-tid='banner-decline']",
},
{
"name": "CookieFirst",
"detect": "#cookiefirst-root",
"accept": "[data-cookiefirst-action='accept']",
"reject": "[data-cookiefirst-action='reject']",
},
{
"name": "Complianz",
"detect": "#cmplz-cookiebanner-container",
"accept": ".cmplz-accept",
"reject": ".cmplz-deny",
},
{
"name": "CookieScript",
"detect": "#cookiescript_injected",
"accept": "#cookiescript_accept",
"reject": "#cookiescript_reject",
},
{
"name": "HubSpot",
"detect": "#hs-eu-cookie-confirmation",
"accept": "#hs-eu-confirmation-button",
"reject": "#hs-eu-decline-button",
},
{
"name": "Civic UK",
"detect": "#ccc, .ccc-content",
"accept": "#ccc-recommended-settings",
"reject": "#ccc-reject-settings",
},
{
"name": "GDPR Cookie Compliance",
"detect": "#moove_gdpr_cookie_modal",
"accept": ".moove-gdpr-modal-allow-all",
"reject": ".moove-gdpr-modal-save-settings",
},
{
"name": "CookieHub",
"detect": "#ch2-container",
"accept": "#ch2-btn-accept",
"reject": "#ch2-btn-decline",
},
{
"name": "Osano",
"detect": ".osano-cm-dialog",
"accept": ".osano-cm-accept-all",
"reject": ".osano-cm-deny",
},
{
"name": "Ketch",
"detect": "#ketch-consent",
"accept": "[data-testid='accept-button']",
"reject": "[data-testid='decline-button']",
},
{
"name": "Piwik PRO",
"detect": "#ppms_cm_popup_overlay",
"accept": "#ppms_cm_agree-to-all",
"reject": "#ppms_cm_reject-all",
},
{
"name": "Cookie Consent (Insites)",
"detect": ".cc-window",
"accept": ".cc-btn.cc-allow",
"reject": ".cc-btn.cc-deny",
},
{
"name": "Admiral",
"detect": "[id^='admiral-']",
"accept": "[class*='admiral-accept']",
"reject": "[class*='admiral-reject']",
},
{
"name": "Sibbo",
"detect": "#sibbo-cmp-layout",
"accept": "#sibbo-cmp-accept-all",
"reject": "#sibbo-cmp-reject-all",
},
{
"name": "Evidon",
"detect": "#_evidon_banner",
"accept": "#_evidon-accept-button",
"reject": "#_evidon-decline-button",
},
{
"name": "LiveRamp",
"detect": "#_lr-cookie-consent",
"accept": "#_lr-accept-all",
"reject": "#_lr-reject-all",
},
{
"name": "Adsimple",
"detect": "#adconsent-usp-banner",
"accept": ".adconsent-accept-all",
"reject": ".adconsent-reject-all",
},
] ]
# Generic fallback patterns (text-based) # Generic fallback patterns (text-based)
@@ -94,45 +215,245 @@ GENERIC_REJECT_TEXTS = [
"Decline", "Nein", "Nicht einverstanden", "Decline", "Nein", "Nicht einverstanden",
] ]
# Attribute-based generic selectors for consent buttons
_GENERIC_ATTR_ACCEPT = [
"[data-consent='accept']", "[data-cookie='accept']", "[data-gdpr='accept']",
"[data-consent-accept]", "[data-cookie-accept]",
]
_GENERIC_ATTR_REJECT = [
"[data-consent='reject']", "[data-cookie='reject']", "[data-gdpr='reject']",
"[data-consent-reject]", "[data-cookie-reject]",
]
# Dialog / aria selectors to find consent containers
_DIALOG_SELECTORS = [
"[role='dialog']",
"[aria-label*='cookie' i]", "[aria-label*='consent' i]",
"[aria-label*='datenschutz' i]", "[aria-label*='Cookie' i]",
]
# JavaScript for recursive Shadow DOM search
_SHADOW_DETECT_JS = """
() => {
const KEYWORDS = /cookie|consent|datenschutz|privacy/i;
const results = [];
function walk(root) {
for (const el of root.querySelectorAll('*')) {
if (el.shadowRoot) {
const shadow = el.shadowRoot;
const text = shadow.innerHTML || '';
if (KEYWORDS.test(text)) {
const buttons = [];
for (const btn of shadow.querySelectorAll(
'button, a[role="button"], [role="button"]'
)) {
const t = (btn.textContent || '').trim();
if (t.length > 0 && t.length < 80) {
buttons.push(t);
}
}
if (buttons.length > 0) {
const tag = el.tagName.toLowerCase();
const id = el.id ? '#' + el.id : '';
results.push({
host: tag + id,
buttons: buttons,
preview: text.substring(0, 200)
});
}
}
walk(shadow);
}
}
}
walk(document);
return results.length > 0 ? results[0] : null;
}
"""
_SHADOW_CLICK_JS = """
(textPattern) => {
const regex = new RegExp(textPattern, 'i');
function walk(root) {
for (const el of root.querySelectorAll('*')) {
if (el.shadowRoot) {
const btns = el.shadowRoot.querySelectorAll(
'button, a[role="button"], [role="button"]'
);
for (const btn of btns) {
if (regex.test(btn.textContent || '')) {
btn.click();
return true;
}
}
const found = walk(el.shadowRoot);
if (found) return true;
}
}
return false;
}
return walk(document);
}
"""
async def _detect_in_shadow_dom(page: Page) -> BannerInfo | None:
"""Search Shadow DOM roots for consent banners as last-resort fallback."""
try:
result = await page.evaluate(_SHADOW_DETECT_JS)
if not result:
return None
buttons = result.get("buttons", [])
host = result.get("host", "")
accept_pat = ""
reject_pat = ""
accept_kw = ("accept", "akzeptieren", "zustimmen", "agree", "allow",
"einverstanden", "alle")
reject_kw = ("reject", "ablehnen", "deny", "decline", "refuse",
"notwendig", "necessary", "essential")
for text in buttons:
low = text.lower()
if not accept_pat and any(k in low for k in accept_kw):
accept_pat = text
elif not reject_pat and any(k in low for k in reject_kw):
reject_pat = text
if not accept_pat and not reject_pat:
return None
return BannerInfo(
detected=True,
provider=f"ShadowDOM({host})",
accept_selector=f"shadow-click:{accept_pat}" if accept_pat else "",
reject_selector=f"shadow-click:{reject_pat}" if reject_pat else "",
)
except Exception:
return None
async def _click_in_shadow_dom(page: Page, text_pattern: str) -> bool:
"""Click a button inside a Shadow DOM root matching the text pattern."""
try:
return await page.evaluate(_SHADOW_CLICK_JS, text_pattern)
except Exception:
return False
async def _detect_generic_dialog(page: Page) -> BannerInfo | None:
"""Detect consent banners in dialog/aria containers."""
consent_kw = ("cookie", "consent", "datenschutz", "privacy")
for sel in _DIALOG_SELECTORS:
try:
containers = page.locator(sel)
count = await containers.count()
if count == 0:
continue
container = containers.first
text = (await container.inner_text(timeout=2000)).lower()
if not any(kw in text for kw in consent_kw):
continue
# Found a consent dialog — look for accept/reject buttons
accept = ""
reject = ""
for asel in _GENERIC_ATTR_ACCEPT:
if await container.locator(asel).count() > 0:
accept = f"{sel} {asel}"
break
for rsel in _GENERIC_ATTR_REJECT:
if await container.locator(rsel).count() > 0:
reject = f"{sel} {rsel}"
break
if not accept:
for t in GENERIC_ACCEPT_TEXTS:
if await container.get_by_text(t, exact=False).count() > 0:
accept = f'{sel} button:has-text("{t}")'
break
if not reject:
for t in GENERIC_REJECT_TEXTS:
if await container.get_by_text(t, exact=False).count() > 0:
reject = f'{sel} button:has-text("{t}")'
break
if accept or reject:
return BannerInfo(
detected=True, provider="Generic (dialog)",
accept_selector=accept, reject_selector=reject,
)
except Exception:
continue
return None
async def _detect_generic_attr(page: Page) -> BannerInfo | None:
"""Detect consent buttons by data-consent/data-cookie/data-gdpr attributes."""
accept = ""
reject = ""
for sel in _GENERIC_ATTR_ACCEPT:
try:
if await page.locator(sel).count() > 0:
accept = sel
break
except Exception:
continue
for sel in _GENERIC_ATTR_REJECT:
try:
if await page.locator(sel).count() > 0:
reject = sel
break
except Exception:
continue
if accept or reject:
return BannerInfo(
detected=True, provider="Generic (attr)",
accept_selector=accept, reject_selector=reject,
)
return None
async def detect_banner(page: Page) -> BannerInfo: async def detect_banner(page: Page) -> BannerInfo:
"""Detect which CMP is used and return button selectors.""" """Detect which CMP is used and return button selectors."""
# Try CMP-specific selectors first # 1. Try CMP-specific selectors
for cmp in CMP_SELECTORS: for cmp in CMP_SELECTORS:
try: try:
count = await page.locator(cmp["detect"]).count() if await page.locator(cmp["detect"]).count() > 0:
if count > 0:
return BannerInfo( return BannerInfo(
detected=True, detected=True, provider=cmp["name"],
provider=cmp["name"],
accept_selector=cmp["accept"], accept_selector=cmp["accept"],
reject_selector=cmp["reject"], reject_selector=cmp["reject"],
) )
except Exception: except Exception:
continue continue
# Generic fallback — search for buttons by text # 2. Generic fallback — search buttons by text
for text in GENERIC_ACCEPT_TEXTS: for text in GENERIC_ACCEPT_TEXTS:
try: try:
btn = page.get_by_text(text, exact=False) btn = page.get_by_text(text, exact=False)
if await btn.count() > 0: if await btn.count() > 0:
accept = f'button:has-text("{text}")' accept = f'button:has-text("{text}")'
# Try to find reject button nearby
reject = "" reject = ""
for rtext in GENERIC_REJECT_TEXTS: for rtext in GENERIC_REJECT_TEXTS:
rbtn = page.get_by_text(rtext, exact=False) if await page.get_by_text(rtext, exact=False).count() > 0:
if await rbtn.count() > 0:
reject = f'button:has-text("{rtext}")' reject = f'button:has-text("{rtext}")'
break break
return BannerInfo( return BannerInfo(
detected=True, detected=True, provider="Generic",
provider="Generic", accept_selector=accept, reject_selector=reject,
accept_selector=accept,
reject_selector=reject,
) )
except Exception: except Exception:
continue continue
# 3. Generic fallback — dialog/aria containers with consent keywords
dialog_result = await _detect_generic_dialog(page)
if dialog_result:
return dialog_result
# 4. Generic fallback — data-consent/data-cookie/data-gdpr attributes
attr_result = await _detect_generic_attr(page)
if attr_result:
return attr_result
# 5. Shadow DOM fallback — search inside shadow roots
shadow_result = await _detect_in_shadow_dom(page)
if shadow_result:
return shadow_result
return BannerInfo(detected=False, provider="", accept_selector="", reject_selector="") return BannerInfo(detected=False, provider="", accept_selector="", reject_selector="")
@@ -140,10 +461,21 @@ async def click_button(page: Page, selector: str, timeout: int = 5000) -> bool:
"""Try to click a consent button. Returns True if clicked successfully.""" """Try to click a consent button. Returns True if clicked successfully."""
if not selector: if not selector:
return False return False
# Handle Shadow DOM selectors
if selector.startswith("shadow-click:"):
text_pattern = selector[len("shadow-click:"):]
return await _click_in_shadow_dom(page, text_pattern)
try: try:
locator = page.locator(selector).first locator = page.locator(selector).first
await locator.wait_for(state="visible", timeout=timeout) await locator.wait_for(state="visible", timeout=timeout)
await locator.click() await locator.click()
return True return True
except Exception: except Exception:
# Fallback: try Shadow DOM click with selector text
# Extract button text from selector like 'button:has-text("Accept all")'
if ':has-text("' in selector:
text = selector.split(':has-text("')[1].rstrip('")')
return await _click_in_shadow_dom(page, text)
return False return False
+75 -12
View File
@@ -11,6 +11,12 @@ from dataclasses import dataclass, field
from playwright.async_api import async_playwright, Page, BrowserContext from playwright.async_api import async_playwright, Page, BrowserContext
try:
from playwright_stealth import stealth_async
HAS_STEALTH = True
except ImportError:
HAS_STEALTH = False
from services.banner_detector import detect_banner, click_button, BannerInfo from services.banner_detector import detect_banner, click_button, BannerInfo
from services.script_analyzer import ( from services.script_analyzer import (
classify_scripts, find_tracking_services, classify_scripts, find_tracking_services,
@@ -53,22 +59,43 @@ class ConsentTestResult:
banner_has_dse_link: bool = False banner_has_dse_link: bool = False
async def run_consent_test(url: str, wait_secs: int = 10) -> ConsentTestResult: async def run_consent_test(
"""Run 3-phase consent test on a URL.""" url: str, wait_secs: int = 10, categories: list[str] | None = None,
) -> ConsentTestResult:
"""Run 3-phase consent test on a URL.
Args:
url: Website URL to test.
wait_secs: Seconds to wait per phase.
categories: Optional list of category names to test (empty = test all).
"""
result = ConsentTestResult() result = ConsentTestResult()
wait_ms = wait_secs * 1000 wait_ms = wait_secs * 1000
filter_cats = categories or []
async with async_playwright() as p: async with async_playwright() as p:
browser = await p.chromium.launch( browser = await p.chromium.launch(
headless=True, headless=True,
args=["--no-sandbox", "--disable-dev-shm-usage"], args=[
"--no-sandbox",
"--disable-dev-shm-usage",
"--disable-blink-features=AutomationControlled",
"--window-size=1920,1080",
],
) )
try: try:
# ── Phase A: Before consent ────────────────────────── # ── Phase A: Before consent ──────────────────────────
logger.info("Phase A: First visit (no interaction)") logger.info("Phase A: First visit (no interaction)")
ctx_a = await browser.new_context(user_agent=USER_AGENT) ctx_a = await browser.new_context(
user_agent=USER_AGENT,
viewport={"width": 1920, "height": 1080},
locale="de-DE",
timezone_id="Europe/Berlin",
)
page_a = await ctx_a.new_page() page_a = await ctx_a.new_page()
if HAS_STEALTH:
await stealth_async(page_a)
scripts_a = [] scripts_a = []
page_a.on("request", lambda req: _collect_script(req, scripts_a)) page_a.on("request", lambda req: _collect_script(req, scripts_a))
@@ -101,8 +128,15 @@ async def run_consent_test(url: str, wait_secs: int = 10) -> ConsentTestResult:
# ── Phase B: After rejecting ───────────────────────── # ── Phase B: After rejecting ─────────────────────────
logger.info("Phase B: Reject consent (%s)", banner.provider) logger.info("Phase B: Reject consent (%s)", banner.provider)
ctx_b = await browser.new_context(user_agent=USER_AGENT) ctx_b = await browser.new_context(
user_agent=USER_AGENT,
viewport={"width": 1920, "height": 1080},
locale="de-DE",
timezone_id="Europe/Berlin",
)
page_b = await ctx_b.new_page() page_b = await ctx_b.new_page()
if HAS_STEALTH:
await stealth_async(page_b)
scripts_b = [] scripts_b = []
page_b.on("request", lambda req: _collect_script(req, scripts_b)) page_b.on("request", lambda req: _collect_script(req, scripts_b))
@@ -128,8 +162,15 @@ async def run_consent_test(url: str, wait_secs: int = 10) -> ConsentTestResult:
# ── Phase C: After accepting ───────────────────────── # ── Phase C: After accepting ─────────────────────────
logger.info("Phase C: Accept consent (%s)", banner.provider) logger.info("Phase C: Accept consent (%s)", banner.provider)
ctx_c = await browser.new_context(user_agent=USER_AGENT) ctx_c = await browser.new_context(
user_agent=USER_AGENT,
viewport={"width": 1920, "height": 1080},
locale="de-DE",
timezone_id="Europe/Berlin",
)
page_c = await ctx_c.new_page() page_c = await ctx_c.new_page()
if HAS_STEALTH:
await stealth_async(page_c)
scripts_c = [] scripts_c = []
page_c.on("request", lambda req: _collect_script(req, scripts_c)) page_c.on("request", lambda req: _collect_script(req, scripts_c))
@@ -154,18 +195,40 @@ async def run_consent_test(url: str, wait_secs: int = 10) -> ConsentTestResult:
try: try:
from services.category_tester import detect_categories, test_single_category from services.category_tester import detect_categories, test_single_category
ctx_cat = await browser.new_context(user_agent=USER_AGENT) ctx_cat = await browser.new_context(
user_agent=USER_AGENT,
viewport={"width": 1920, "height": 1080},
locale="de-DE",
timezone_id="Europe/Berlin",
)
page_cat = await ctx_cat.new_page() page_cat = await ctx_cat.new_page()
if HAS_STEALTH:
await stealth_async(page_cat)
await page_cat.goto(url, wait_until="networkidle", timeout=20000) await page_cat.goto(url, wait_until="networkidle", timeout=20000)
await page_cat.wait_for_timeout(2000) await page_cat.wait_for_timeout(2000)
categories = await detect_categories(page_cat, banner) detected_cats = await detect_categories(page_cat, banner)
await page_cat.close() await page_cat.close()
if categories: # Filter to requested categories if specified
logger.info("Testing %d categories individually", len(categories)) if filter_cats and detected_cats:
for cat in categories: detected_cats = [
cat_ctx = await browser.new_context(user_agent=USER_AGENT) c for c in detected_cats if c.name in filter_cats
]
logger.info(
"Filtered to %d categories (requested: %s)",
len(detected_cats), filter_cats,
)
if detected_cats:
logger.info("Testing %d categories individually", len(detected_cats))
for cat in detected_cats:
cat_ctx = await browser.new_context(
user_agent=USER_AGENT,
viewport={"width": 1920, "height": 1080},
locale="de-DE",
timezone_id="Europe/Berlin",
)
cat_result = await test_single_category(cat_ctx, url, cat, banner, wait_ms) cat_result = await test_single_category(cat_ctx, url, cat, banner, wait_ms)
result.category_tests.append(cat_result) result.category_tests.append(cat_result)
await cat_ctx.close() await cat_ctx.close()