From e494cf62bb43388d30fd28f1031cc726b7cb654f Mon Sep 17 00:00:00 2001 From: Benjamin Admin Date: Tue, 5 May 2026 13:10:59 +0200 Subject: [PATCH] =?UTF-8?q?fix:=20Increase=20page=20load=20timeouts=20?= =?UTF-8?q?=E2=80=94=20IHK=20site=20needs=20>30s=20for=20networkidle?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Initial page.goto timeout: 30s → 60s (IHK loads many JS resources) - Per-page navigation timeout: 20s → 45s (heavy JS sites) - Reduced extra wait from 3s+1s back to 2s+0.5s (goto timeout handles slow loads) - Playwright scanner page timeout: 20s → 45s Root cause: IHK website has heavy JavaScript that takes >30s to reach 'networkidle' state, causing DSI discovery to fail immediately. Co-Authored-By: Claude Opus 4.6 (1M context) --- consent-tester/services/dsi_discovery.py | 12 ++++++------ consent-tester/services/playwright_scanner.py | 2 +- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/consent-tester/services/dsi_discovery.py b/consent-tester/services/dsi_discovery.py index 9f71b62..f1d34f3 100644 --- a/consent-tester/services/dsi_discovery.py +++ b/consent-tester/services/dsi_discovery.py @@ -217,7 +217,7 @@ async def discover_dsi_documents( try: # Step 1: Load the page - await page.goto(url, wait_until="networkidle", timeout=30000) + await page.goto(url, wait_until="networkidle", timeout=60000) await page.wait_for_timeout(2000) # Step 2: Find DSI links in current page @@ -290,11 +290,11 @@ async def discover_dsi_documents( continue # Navigate to page — wait for JS to load content - resp = await page.goto(href, wait_until="networkidle", timeout=20000) + resp = await page.goto(href, wait_until="networkidle", timeout=45000) if resp and resp.status < 400: - await page.wait_for_timeout(3000) # Extra wait for JS content loading + await page.wait_for_timeout(2000) await _expand_all_interactive(page) - await page.wait_for_timeout(1000) + await page.wait_for_timeout(500) # Extract text — try specific content areas, fall back to full body text = await page.evaluate(""" @@ -333,14 +333,14 @@ async def discover_dsi_documents( pending_links.append(nl) # Navigate back for next link - await page.goto(url, wait_until="networkidle", timeout=20000) + await page.goto(url, wait_until="networkidle", timeout=45000) await page.wait_for_timeout(500) await _expand_all_interactive(page) except Exception as e: result.errors.append(f"Failed to load {href}: {str(e)[:80]}") try: - await page.goto(url, wait_until="networkidle", timeout=20000) + await page.goto(url, wait_until="networkidle", timeout=45000) except Exception: pass diff --git a/consent-tester/services/playwright_scanner.py b/consent-tester/services/playwright_scanner.py index 49635d2..2aef6ef 100644 --- a/consent-tester/services/playwright_scanner.py +++ b/consent-tester/services/playwright_scanner.py @@ -157,7 +157,7 @@ async def _visit_page(page: Page, url: str, result: PlaywrightScanResult) -> Sca """Visit a page and capture its rendered HTML.""" sp = ScannedPage(url=url, status=0) try: - response = await page.goto(url, wait_until="networkidle", timeout=20000) + response = await page.goto(url, wait_until="networkidle", timeout=45000) sp.status = response.status if response else 0 await page.wait_for_timeout(2000)