fix: accordion close bug + merge multi-page DSIs (BMW fix)
1. _expand_all_interactive(): Only click aria-expanded="false" buttons. Before: clicked ALL accordion buttons including open ones → BMW's pre-expanded accordions got CLOSED, reducing text from 1151 to 361w. 2. _fetch_text() + /extract-text: merge ALL documents found on a page (max_documents=10 instead of 1). BMW splits DSI across 5 sub-pages that the discovery finds as separate documents — now merged. 3. Tab panels: unhide hidden tabpanels instead of clicking tabs (clicking tabs can hide the currently visible panel). Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -532,19 +532,43 @@ async def _find_dsi_links(page: Page, base_domain: str) -> list[dict]:
|
||||
return []
|
||||
|
||||
async def _expand_all_interactive(page: Page) -> None:
|
||||
"""Expand all accordions, tabs, details, dropdowns on the page."""
|
||||
"""Expand all accordions, tabs, details, dropdowns on the page.
|
||||
|
||||
IMPORTANT: Only expand CLOSED elements. Never click elements that
|
||||
are already expanded (aria-expanded="true") — that would close them.
|
||||
BMW, for example, has accordions open by default.
|
||||
"""
|
||||
try:
|
||||
await page.evaluate("""() => {
|
||||
// 1. Open all <details> that are closed
|
||||
document.querySelectorAll('details:not([open])').forEach(d => d.open = true);
|
||||
const sels = ['button[aria-expanded="false"]','[data-toggle="collapse"]',
|
||||
'[data-bs-toggle="collapse"]','[class*="accordion"] > button',
|
||||
'[class*="collapse"] > button','.panel-heading a'];
|
||||
sels.forEach(s => document.querySelectorAll(s).forEach(e => { try{e.click()}catch{} }));
|
||||
document.querySelectorAll('button,a').forEach(b => {
|
||||
if (/^(mehr|more|weiterlesen|read more|show more|anzeigen|alle anzeigen)/i.test((b.textContent||'').trim()))
|
||||
try{b.click()}catch{}
|
||||
|
||||
// 2. Click buttons that are explicitly CLOSED (aria-expanded="false")
|
||||
document.querySelectorAll('button[aria-expanded="false"]').forEach(b => {
|
||||
try { b.click(); } catch {}
|
||||
});
|
||||
|
||||
// 3. Bootstrap/jQuery collapse triggers (only closed ones)
|
||||
document.querySelectorAll('[data-toggle="collapse"].collapsed').forEach(e => {
|
||||
try { e.click(); } catch {}
|
||||
});
|
||||
document.querySelectorAll('[data-bs-toggle="collapse"].collapsed').forEach(e => {
|
||||
try { e.click(); } catch {}
|
||||
});
|
||||
|
||||
// 4. "Show more" / "Mehr anzeigen" buttons
|
||||
document.querySelectorAll('button,a').forEach(b => {
|
||||
const t = (b.textContent || '').trim();
|
||||
if (/^(mehr|more|weiterlesen|read more|show more|anzeigen|alle anzeigen)/i.test(t))
|
||||
try { b.click(); } catch {}
|
||||
});
|
||||
|
||||
// 5. Tabs — click each to make content visible, then go back
|
||||
// (don't click, just make tab panels visible)
|
||||
document.querySelectorAll('[role="tabpanel"][hidden]').forEach(p => {
|
||||
p.removeAttribute('hidden');
|
||||
p.style.display = '';
|
||||
});
|
||||
document.querySelectorAll('[role="tab"]').forEach(t => { try{t.click()}catch{} });
|
||||
}""")
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
Reference in New Issue
Block a user