From 53774886e723a1bcb8a3045c8afab3243f8df43d Mon Sep 17 00:00:00 2001 From: Benjamin Admin Date: Wed, 29 Apr 2026 15:09:03 +0200 Subject: [PATCH] =?UTF-8?q?perf:=20Phase=204=20=E2=80=94=20parallel=20page?= =?UTF-8?q?=20fetching=20(asyncio.gather)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Scan pages in parallel instead of sequential. Reduces scan time from ~10s (5 pages × 2s) to ~3s (all pages at once). Co-Authored-By: Claude Opus 4.6 (1M context) --- .../compliance/services/website_scanner.py | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/backend-compliance/compliance/services/website_scanner.py b/backend-compliance/compliance/services/website_scanner.py index 795f0ed..1a20015 100644 --- a/backend-compliance/compliance/services/website_scanner.py +++ b/backend-compliance/compliance/services/website_scanner.py @@ -87,10 +87,19 @@ async def scan_website(base_url: str) -> ScanResult: if href.startswith(origin): page_urls.add(href) - # 3. Scan all pages (max 10) - for url in list(page_urls)[:10]: - html = start_html if url == origin else await _fetch_page(client, url, result) - if html: + # 3. Scan all pages in PARALLEL (max 10) + import asyncio + other_urls = [u for u in list(page_urls)[:10] if u != origin] + fetch_tasks = [_fetch_page(client, u, result) for u in other_urls] + other_htmls = await asyncio.gather(*fetch_tasks, return_exceptions=True) + + # Process start page + _detect_services(start_html, origin, result) + _detect_ai_mentions(start_html, origin, result) + + # Process other pages + for url, html in zip(other_urls, other_htmls): + if isinstance(html, str) and html: _detect_services(html, url, result) _detect_ai_mentions(html, url, result)