debug: Log DSI text lengths to diagnose 0% completeness bug

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
Benjamin Admin
2026-05-05 14:08:04 +02:00
parent e494cf62bb
commit 72761d6066
@@ -242,6 +242,9 @@ async def _execute_scan(req: ScanRequest, scan_id: str = "") -> ScanResponse:
for doc in dsi_data.get("documents", []): for doc in dsi_data.get("documents", []):
doc_type = classify_document_type(doc["title"], doc["url"]) doc_type = classify_document_type(doc["title"], doc["url"])
doc_text = doc.get("full_text", "") or doc.get("text_preview", "") doc_text = doc.get("full_text", "") or doc.get("text_preview", "")
logger.info("DSI check: '%s' type=%s text_len=%d full_text_len=%d preview_len=%d",
doc["title"][:50], doc_type, len(doc_text),
len(doc.get("full_text", "")), len(doc.get("text_preview", "")))
doc_findings = check_document_completeness( doc_findings = check_document_completeness(
doc_text, doc_type, doc["title"], doc["url"], doc_text, doc_type, doc["title"], doc["url"],
) )