57c0f940a2
CI / detect-changes (push) Successful in 11s
CI / branch-name (push) Has been skipped
CI / nodejs-build (push) Successful in 2m19s
CI / test-go (push) Has been skipped
CI / test-python-document-crawler (push) Has been skipped
CI / test-python-dsms-gateway (push) Has been skipped
CI / guardrail-integrity (push) Has been skipped
CI / secret-scan (push) Has been skipped
CI / dep-audit (push) Has been skipped
CI / sbom-scan (push) Has been skipped
CI / validate-canonical-controls (push) Successful in 16s
CI / loc-budget (push) Failing after 15s
CI / go-lint (push) Has been skipped
CI / python-lint (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / iace-gt-coverage (push) Has been skipped
CI / test-python-backend (push) Successful in 37s
P56 Anti-Auditing-Detection als constructive Compliance-Finding (Audit-API-
Empfehlung statt Anklage, weil Mercedes berechtigt Bots blockiert)
P57 Phase G vendor_details Union mit cmp_vendors -> 42 Anbieter sichtbar
P58 Anti-Audit-Detection robuster (Script-Domain-Check + Settings-spezifisch)
P59 Cookie-Behavior-Validator (4 Layer, 3-Tier-Severity: MEDIUM=Kategorie-
Mismatch / HIGH=Zweck-Mismatch / CRITICAL=beide=Vorsatz-Indiz)
+ Open Cookie Database (CC0) als Library-Seed (2264 Cookies)
P59b Cookie-Behavior in Banner-Check verdrahtet + Mail-Block (BUGFIX:
SessionLocal selbst oeffnen, db war im Background-Task nicht im Scope)
Mail-Polish nach Mercedes-Review:
P63 Banner-Footer-Links auch im wb7-link/role=link erkennen (Shadow-DOM-
Walker label-based statt nur <a href>)
P64 Re-Access-Severity: MEDIUM statt HIGH, wenn Footer "Einstellungen" oder
Mercedes-typisch existiert; OEM-Footer-Detection (wb7-footer)
P65 Text-Truncation: Word-Boundary statt Zeichen-Cut (kein "einfa"-Bruch
mehr in Sofortmassnahmen)
P66 GF-Aktionen: Service-Zweck vs Cookie-Zweck explizit erklaert
(haeufige Verwechslung Marketing/GF: "Akamai-Beschreibung" != Cookie-
Zweck pro DSK-OH 2024)
P67 Stirring-Finding mit "Verlust-Framing"-Erklaerung + Alt-vs-Neutral-
Beispiel, statt nur EDPB-Fachbegriff
Compliance-Advisor FAQ (admin agent-core/soul):
+ CNIL/EDPB Top-Bussgelder (Google 100M, Meta 60M, Amazon 35M)
+ Deutsche Praezedenz (LG Muenchen Google Fonts, EuGH Planet49, BGH I ZR 7/16)
+ 4 Risiko-Pfade (Bussgeld/Abmahnung/Sammelklage/NOYB) + Berechnungs-Methodik
Document-Generator Templates: AGB-DE (142), Impressum (140), Widerrufs-
formular-Anlage (143), DSR-Process-Dedup (139), Cookie-Library (144).
Architektur: doc_action_mappings.py + banner_dom_walkers.py +
cookie_behavior_validator.py + vendor_detail_extractor.py rausgezogen,
um die 500-LOC-Caps in agent_doc_check_report.py und
banner_text_checker.py einzuhalten.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
109 lines
4.4 KiB
Python
109 lines
4.4 KiB
Python
"""
|
|
Browser-side DOM walkers for Web-Component CMPs and OEM design-systems.
|
|
|
|
Centralizes the JavaScript snippets used by banner_text_checker.py so the
|
|
checker file stays under the 500-LOC cap. Each function returns a JS string
|
|
that Playwright passes to `page.evaluate()`.
|
|
|
|
Two walkers:
|
|
* SHADOW_BANNER_WALKER_JS — pierces shadow DOM (Mercedes cmm-cookie-banner,
|
|
BMW cookie-consent-banner, etc.) and extracts banner text + label-based
|
|
legal links (P63 — recognizes wb7-link/role=link/button, not just
|
|
<a href>, since OEM design-systems wrap navigation).
|
|
* FOOTER_LABELS_WALKER_JS — collects unique footer link labels from any
|
|
candidate footer root (footer, [role=contentinfo], wb7-footer, ...) with
|
|
a bottom-25%-of-viewport fallback (P64).
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
|
|
SHADOW_BANNER_WALKER_JS = """() => {
|
|
const LEGAL_KW = {
|
|
impressum: ['impressum','imprint','legal notice','mentions legales','colophon'],
|
|
dse: ['datenschutz','privacy','dsgvo','data protection','politique de confidentialite'],
|
|
};
|
|
function isLegalLabel(txt) {
|
|
const t = (txt||'').toLowerCase();
|
|
if (!t || t.length > 60) return null;
|
|
for (const k of LEGAL_KW.impressum) if (t.includes(k)) return 'impressum';
|
|
for (const k of LEGAL_KW.dse) if (t.includes(k)) return 'dse';
|
|
return null;
|
|
}
|
|
function walk(root, acc) {
|
|
if (!root) return;
|
|
const all = root.querySelectorAll ? root.querySelectorAll('*') : [];
|
|
for (const el of all) {
|
|
if (el.shadowRoot) walk(el.shadowRoot, acc);
|
|
}
|
|
const tags = ['cmm-cookie-banner', 'cookie-consent-banner',
|
|
'consent-banner', 'cookie-banner', 'cmp-banner',
|
|
'ot-banner', 'usercentrics-banner'];
|
|
for (const tag of tags) {
|
|
const els = root.querySelectorAll ? root.querySelectorAll(tag) : [];
|
|
for (const el of els) {
|
|
if (el.shadowRoot) {
|
|
const txt = (el.shadowRoot.textContent || '').trim();
|
|
if (txt) acc.text += ' ' + txt;
|
|
const links = el.shadowRoot.querySelectorAll('a[href]');
|
|
for (const a of links) {
|
|
acc.links.push({
|
|
href: (a.getAttribute('href') || '').toLowerCase(),
|
|
text: (a.textContent || '').trim().toLowerCase(),
|
|
});
|
|
}
|
|
const cands = el.shadowRoot.querySelectorAll(
|
|
'wb7-link, wb7-button, [role="link"], button, span, a'
|
|
);
|
|
for (const c of cands) {
|
|
const label = (c.textContent || '').trim();
|
|
const which = isLegalLabel(label);
|
|
if (which) {
|
|
const href = (c.getAttribute('href') ||
|
|
c.getAttribute('data-href') ||
|
|
c.getAttribute('data-uri') || '').toLowerCase();
|
|
acc.links.push({
|
|
href: href || ('#shadow-' + which),
|
|
text: label.toLowerCase(),
|
|
});
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
const acc = { text: '', links: [] };
|
|
walk(document, acc);
|
|
return acc;
|
|
}"""
|
|
|
|
|
|
FOOTER_LABELS_WALKER_JS = """() => {
|
|
const out = new Set();
|
|
const roots = [
|
|
...document.querySelectorAll(
|
|
'footer, [role="contentinfo"], ' +
|
|
'wb7-footer, wb-footer, b-footer, cmm-footer, ' +
|
|
'[class*="footer" i], [id*="footer" i]'
|
|
)
|
|
];
|
|
if (roots.length === 0) {
|
|
const viewH = window.innerHeight;
|
|
for (const el of document.querySelectorAll('a, button, [role="link"], wb7-link')) {
|
|
const r = el.getBoundingClientRect();
|
|
if (r.top > viewH * 0.75) roots.push(el.parentElement);
|
|
}
|
|
}
|
|
for (const root of roots) {
|
|
if (!root) continue;
|
|
const cands = root.querySelectorAll('a, button, [role="link"], wb7-link, wb7-button');
|
|
let n = 0;
|
|
for (const c of cands) {
|
|
if (n++ > 80) break;
|
|
const t = (c.textContent || '').trim().toLowerCase();
|
|
if (t && t.length < 60) out.add(t);
|
|
}
|
|
}
|
|
return [...out];
|
|
}"""
|