diff --git a/backend-compliance/compliance/api/agent_doc_check_routes.py b/backend-compliance/compliance/api/agent_doc_check_routes.py index 03de912..2569628 100644 --- a/backend-compliance/compliance/api/agent_doc_check_routes.py +++ b/backend-compliance/compliance/api/agent_doc_check_routes.py @@ -327,7 +327,7 @@ SECTION_TYPE_MAP = [ (r"^(?:agb|allgemeine geschäftsbedingungen|nutzungsbedingungen)$", "agb"), # DSFA MUST be checked BEFORE social_media (both can contain "Social Media") (r"datenschutzfolge|dsfa|risikoanalyse", "dsfa"), - (r"^social\s*media$", "social_media"), # Standalone heading "Social Media" = DSE + (r"^social\s*media$|^soziale\s+(?:medien|netzwerke)$", "social_media"), (r"datenschutzerkl(?:ae|ä)rung.*social|datenschutz\s+f(?:ue|ü)r\s+social", "social_media"), ] @@ -415,6 +415,8 @@ def _classify_section(heading: str) -> str | None: """Classify a section heading into a document type.""" import re as _re heading_lower = heading.lower().strip() + # Strip leading numbers/bullets: "5. Soziale Medien" → "soziale medien" + heading_lower = _re.sub(r"^[\d\.\)\-]+\s*", "", heading_lower).strip() # Skip known sub-sections if heading_lower in SKIP_HEADINGS: return None