""" Phase D — Per-Vendor Detail Extraction (P50). After Accept (Phase C) the banner contains every vendor; on most CMPs (Usercentrics, OneTrust, Cookiebot) each vendor has an Info/Details icon that opens a modal with Beschreibung, Verarbeitendes Unternehmen, Zweck, Genutzte Technologien, Cookies, Opt-Out-URL and Privacy-URL. We open the settings-view of the banner, walk the Shadow-DOM for info icons, click each one, capture the modal text + the XHR triggered by the click (which Usercentrics uses to load the detail JSON), and parse the text into structured fields. Returns: list[VendorDetail] with raw_text + structured fields. """ from __future__ import annotations import asyncio import logging import re from dataclasses import dataclass, field from typing import Optional from playwright.async_api import Browser, Page, TimeoutError as PlaywrightTimeout logger = logging.getLogger(__name__) USER_AGENT = ( "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) " "AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36" ) @dataclass class VendorDetail: name: str = "" description: str = "" processing_company: str = "" address: str = "" purposes: list[str] = field(default_factory=list) technologies: list[str] = field(default_factory=list) cookies: list[str] = field(default_factory=list) retention: str = "" opt_out_url: str = "" privacy_url: str = "" raw_text: str = "" # ── Shadow-DOM helper: find info-buttons in Mercedes/Usercentrics/etc. _FIND_INFO_BUTTONS_JS = r""" () => { // Walk all shadow roots and collect "info"/"i"-icon clickables. // Covers