feat(cross-doc): search all texts for all doc_types + misplacement finding
Cross-Document Intelligence: When a doc_type row is empty, searches ALL other loaded documents for that content. If found (e.g. Widerruf in AGB), extracts the section, runs the check, AND creates a finding: "Widerrufsbelehrung in falschem Dokument gefunden — schwer auffindbar" Keywords for: widerruf, cookie, social_media, impressum, agb, dsb. Integrated as Step 1c in compliance check pipeline. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -178,11 +178,16 @@ async def _run_compliance_check(check_id: str, req: ComplianceCheckRequest):
|
||||
# 1. Same URL used for multiple doc_types → split by heading
|
||||
# 2. DSI text contains Cookie/Social-Media sections → auto-fill empty rows
|
||||
from compliance.services.section_splitter import (
|
||||
split_shared_texts, auto_fill_from_dsi,
|
||||
split_shared_texts, auto_fill_from_dsi, cross_search_documents,
|
||||
)
|
||||
split_shared_texts(doc_entries, url_text_cache)
|
||||
auto_fill_from_dsi(doc_entries)
|
||||
# Refresh doc_texts after splitting
|
||||
|
||||
# Step 1c: Cross-document search — find doc_types in wrong documents
|
||||
_update(check_id, "Dokumente werden uebergreifend durchsucht...")
|
||||
placement_findings = cross_search_documents(doc_entries)
|
||||
|
||||
# Refresh doc_texts after all splitting/searching
|
||||
for entry in doc_entries:
|
||||
if entry.get("text"):
|
||||
doc_texts[entry["doc_type"]] = entry["text"]
|
||||
@@ -232,6 +237,13 @@ async def _run_compliance_check(check_id: str, req: ComplianceCheckRequest):
|
||||
# Apply profile context filter
|
||||
result = _apply_profile_filter(result, profile, doc_type)
|
||||
|
||||
# Add placement findings (doc found in wrong location)
|
||||
for pf in placement_findings:
|
||||
if pf.get("doc_type") == doc_type:
|
||||
result.checks.insert(0, CheckItem(**{
|
||||
k: v for k, v in pf.items() if k != "doc_type"
|
||||
}))
|
||||
|
||||
results.append(result)
|
||||
total_findings += result.findings_count
|
||||
|
||||
|
||||
Reference in New Issue
Block a user