fix: Increase page load timeouts — IHK site needs >30s for networkidle
- Initial page.goto timeout: 30s → 60s (IHK loads many JS resources) - Per-page navigation timeout: 20s → 45s (heavy JS sites) - Reduced extra wait from 3s+1s back to 2s+0.5s (goto timeout handles slow loads) - Playwright scanner page timeout: 20s → 45s Root cause: IHK website has heavy JavaScript that takes >30s to reach 'networkidle' state, causing DSI discovery to fail immediately. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -217,7 +217,7 @@ async def discover_dsi_documents(
|
||||
|
||||
try:
|
||||
# Step 1: Load the page
|
||||
await page.goto(url, wait_until="networkidle", timeout=30000)
|
||||
await page.goto(url, wait_until="networkidle", timeout=60000)
|
||||
await page.wait_for_timeout(2000)
|
||||
|
||||
# Step 2: Find DSI links in current page
|
||||
@@ -290,11 +290,11 @@ async def discover_dsi_documents(
|
||||
continue
|
||||
|
||||
# Navigate to page — wait for JS to load content
|
||||
resp = await page.goto(href, wait_until="networkidle", timeout=20000)
|
||||
resp = await page.goto(href, wait_until="networkidle", timeout=45000)
|
||||
if resp and resp.status < 400:
|
||||
await page.wait_for_timeout(3000) # Extra wait for JS content loading
|
||||
await page.wait_for_timeout(2000)
|
||||
await _expand_all_interactive(page)
|
||||
await page.wait_for_timeout(1000)
|
||||
await page.wait_for_timeout(500)
|
||||
|
||||
# Extract text — try specific content areas, fall back to full body
|
||||
text = await page.evaluate("""
|
||||
@@ -333,14 +333,14 @@ async def discover_dsi_documents(
|
||||
pending_links.append(nl)
|
||||
|
||||
# Navigate back for next link
|
||||
await page.goto(url, wait_until="networkidle", timeout=20000)
|
||||
await page.goto(url, wait_until="networkidle", timeout=45000)
|
||||
await page.wait_for_timeout(500)
|
||||
await _expand_all_interactive(page)
|
||||
|
||||
except Exception as e:
|
||||
result.errors.append(f"Failed to load {href}: {str(e)[:80]}")
|
||||
try:
|
||||
await page.goto(url, wait_until="networkidle", timeout=20000)
|
||||
await page.goto(url, wait_until="networkidle", timeout=45000)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
|
||||
Reference in New Issue
Block a user