"""B20 wiring — Legacy-URL-Discovery + Mail-Block.""" from __future__ import annotations import html import logging import os from compliance.services.legacy_url_discovery import discover_legacy_urls from compliance.services.multi_version_dse import ( analyze_multiple_dse_versions, render_multi_version_block, ) logger = logging.getLogger(__name__) _DISABLED = os.environ.get("LEGACY_URL_DISABLED", "").lower() in ( "1", "true", "yes", ) async def run_b20(state: dict) -> None: if _DISABLED: return try: result = await discover_legacy_urls(state) except Exception as e: logger.warning("legacy-url-discovery failed: %s", e) return candidates = result.get("candidates") or [] state["legacy_url_inventory"] = result if candidates: state["legacy_url_html"] = _render(result) logger.info( "B20 legacy-url: %d candidates of %d probed", len(candidates), result.get("probed", 0), ) # Plan C — Multi-Version-DSE-Analyse: falls Legacy-Discovery zusätz- # liche DSE-URLs liefert UND ≥2 reachable sind, parallele Analyse + # Vergleichsblock. try: mv_info = await analyze_multiple_dse_versions(state) if mv_info.get("versions") and len(mv_info["versions"]) >= 2: state["multi_version_dse_info"] = mv_info state["multi_version_dse_html"] = render_multi_version_block( mv_info, ) logger.info( "B20-C multi-version-dse: %d versions, date_div=%s dsb_div=%s", len(mv_info["versions"]), mv_info.get("date_divergent"), mv_info.get("dsb_divergent"), ) except Exception as e: logger.warning("multi-version-dse analysis failed: %s", e) def _render(result: dict) -> str: candidates = result.get("candidates") or [] if not candidates: return "" rows = [] for c in candidates[:25]: st = c["status"] sev_color = ( "#dc2626" if "Legacy-Verdacht" in (c.get("recommendation") or "") else "#f59e0b" if st in (404, 410) else "#64748b" ) age = c.get("age_months") age_disp = f"{age} Mo." if age is not None else "—" rec = c.get("recommendation") or "—" rows.append( f"" f"" f"{html.escape(c['url'][:120])}" f"" f"" f"{st or '?'}" f"" f"{age_disp}" f"" f"{'✓' if c.get('in_footer') else '—'}" f"" f"{html.escape(rec)}" f"" ) rest = "" if len(candidates) > 25: rest = ( f"

" f"… und {len(candidates)-25} weitere — vollständig in " f"legacy-urls.csv im ZIP-Anhang.

" ) return ( "
" "

" f"🗂️ Legacy-URL-Inventar ({len(candidates)} Kandidaten von " f"{result.get('probed', '?')} geprüft)" "

" "

" "Quellen: /sitemap.xml + Wayback-Machine + Slug-Permutations. " "Wir entscheiden nicht ob eine URL Legacy ist — " "wir präsentieren das Inventar mit Status und Empfehlung. Der " "Kunde entscheidet." "

" "" "" "" "" "" "" "" "" + "".join(rows) + "
URLHTTPWayback-AlterFooterEmpfehlung
" + rest + "
" )