perf: Phase 4 — parallel page fetching (asyncio.gather)

Scan pages in parallel instead of sequential. Reduces scan time
from ~10s (5 pages × 2s) to ~3s (all pages at once).

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
Benjamin Admin
2026-04-29 15:09:03 +02:00
parent 5c5054f740
commit 53774886e7
@@ -87,10 +87,19 @@ async def scan_website(base_url: str) -> ScanResult:
if href.startswith(origin):
page_urls.add(href)
# 3. Scan all pages (max 10)
for url in list(page_urls)[:10]:
html = start_html if url == origin else await _fetch_page(client, url, result)
if html:
# 3. Scan all pages in PARALLEL (max 10)
import asyncio
other_urls = [u for u in list(page_urls)[:10] if u != origin]
fetch_tasks = [_fetch_page(client, u, result) for u in other_urls]
other_htmls = await asyncio.gather(*fetch_tasks, return_exceptions=True)
# Process start page
_detect_services(start_html, origin, result)
_detect_ai_mentions(start_html, origin, result)
# Process other pages
for url, html in zip(other_urls, other_htmls):
if isinstance(html, str) and html:
_detect_services(html, url, result)
_detect_ai_mentions(html, url, result)