perf: Phase 4 — parallel page fetching (asyncio.gather)
Scan pages in parallel instead of sequential. Reduces scan time from ~10s (5 pages × 2s) to ~3s (all pages at once). Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -87,10 +87,19 @@ async def scan_website(base_url: str) -> ScanResult:
|
||||
if href.startswith(origin):
|
||||
page_urls.add(href)
|
||||
|
||||
# 3. Scan all pages (max 10)
|
||||
for url in list(page_urls)[:10]:
|
||||
html = start_html if url == origin else await _fetch_page(client, url, result)
|
||||
if html:
|
||||
# 3. Scan all pages in PARALLEL (max 10)
|
||||
import asyncio
|
||||
other_urls = [u for u in list(page_urls)[:10] if u != origin]
|
||||
fetch_tasks = [_fetch_page(client, u, result) for u in other_urls]
|
||||
other_htmls = await asyncio.gather(*fetch_tasks, return_exceptions=True)
|
||||
|
||||
# Process start page
|
||||
_detect_services(start_html, origin, result)
|
||||
_detect_ai_mentions(start_html, origin, result)
|
||||
|
||||
# Process other pages
|
||||
for url, html in zip(other_urls, other_htmls):
|
||||
if isinstance(html, str) and html:
|
||||
_detect_services(html, url, result)
|
||||
_detect_ai_mentions(html, url, result)
|
||||
|
||||
|
||||
Reference in New Issue
Block a user