From 53774886e723a1bcb8a3045c8afab3243f8df43d Mon Sep 17 00:00:00 2001
From: Benjamin Admin <benjaminadmin@MacBook-Pro.local>
Date: Wed, 29 Apr 2026 15:09:03 +0200
Subject: [PATCH] =?UTF-8?q?perf:=20Phase=204=20=E2=80=94=20parallel=20page?=
 =?UTF-8?q?=20fetching=20(asyncio.gather)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Scan pages in parallel instead of sequential. Reduces scan time
from ~10s (5 pages × 2s) to ~3s (all pages at once).

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 .../compliance/services/website_scanner.py      | 17 +++++++++++++----
 1 file changed, 13 insertions(+), 4 deletions(-)

diff --git a/backend-compliance/compliance/services/website_scanner.py b/backend-compliance/compliance/services/website_scanner.py
index 795f0ed..1a20015 100644
--- a/backend-compliance/compliance/services/website_scanner.py
+++ b/backend-compliance/compliance/services/website_scanner.py
@@ -87,10 +87,19 @@ async def scan_website(base_url: str) -> ScanResult:
                 if href.startswith(origin):
                     page_urls.add(href)
 
-        # 3. Scan all pages (max 10)
-        for url in list(page_urls)[:10]:
-            html = start_html if url == origin else await _fetch_page(client, url, result)
-            if html:
+        # 3. Scan all pages in PARALLEL (max 10)
+        import asyncio
+        other_urls = [u for u in list(page_urls)[:10] if u != origin]
+        fetch_tasks = [_fetch_page(client, u, result) for u in other_urls]
+        other_htmls = await asyncio.gather(*fetch_tasks, return_exceptions=True)
+
+        # Process start page
+        _detect_services(start_html, origin, result)
+        _detect_ai_mentions(start_html, origin, result)
+
+        # Process other pages
+        for url, html in zip(other_urls, other_htmls):
+            if isinstance(html, str) and html:
                 _detect_services(html, url, result)
                 _detect_ai_mentions(html, url, result)