fix(compliance-check): always render 8 doc types + 4 BMW GT-gap fixes
Always-show-8 (user-requested): - agent_compliance_check_routes.py: _pad_results_with_missing pads the results list to always include all 8 canonical doc_types in canonical order. Missing types get a placeholder DocCheckResult with error= 'Nicht eingereicht' + scenario='missing'. - agent_doc_check_report.py: NICHT EINGEREICHT status label (neutral), friendly grey body block instead of red error. - ChecklistView.tsx: 'Nicht eingereicht' chip (neutral grey, not red 'Fehler'); SCENARIO_LABELS adds missing entry + header chip counter. Impressum-Regression fix (#18): - _fetch_text(url, doc_type): cookie/dse/social_media -> max_documents=1 (CMP capture authoritative, sub-pages dilute). Other types -> =3 (Impressum needs Versicherungsvermittler, Aufsicht, Berufsrecht sub- pages). 15s networkidle bail keeps timing safe. ODR/Verbraucherstreitbeilegung filter (#19): - _apply_profile_filter: when profile.needs_odr=True (B2C), override the check's default B2B-oriented hint with action-oriented B2C guidance pointing at Art. 14 EU-VO 524/2013 + §36 VSBG. Previously the check contradicted itself: 'profile says B2C' + hint 'only relevant for B2C online vendors'. Registergericht regex (#20): - impressum_checks.py: accept colon/dot/dash between keyword and city (BMW writes 'registergericht: münchen hrb 42243'). Add 'sitz und registergericht: X' as separate pattern. Industry detection (#21): - business_profiler.py: 'automotive' keywords broadened (antriebs, motor, leasing, werkstatt, probefahrt, plus brand names BMW/Mercedes/ Audi/VW/Porsche/Opel). 'it_services' keywords narrowed — software/ cloud/hosting are mentioned in every privacy policy and were biasing the result toward IT for any tech-aware company.
This commit is contained in:
@@ -107,7 +107,13 @@ _EDITORIAL_KEYWORDS = [
|
||||
]
|
||||
|
||||
_INDUSTRY_KEYWORDS = {
|
||||
"it_services": ["software", "saas", "cloud", "hosting", "api", "plattform"],
|
||||
# "software/cloud/hosting" are often mentioned in privacy texts of any
|
||||
# vendor (Cloud-Hosting fuer Newsletter, SaaS-Tools etc.) without making
|
||||
# the company an IT-services vendor itself. Keep the list deliberately
|
||||
# narrow: only patterns that strongly suggest IT/SaaS as the core business.
|
||||
"it_services": ["saas-anbieter", "software-as-a-service",
|
||||
"ihr saas", "ihre cloud", "hosting-provider",
|
||||
"api-anbieter", "developer-portal"],
|
||||
"retail": ["shop", "warenkorb", "versand", "lieferung", "einzelhandel"],
|
||||
"healthcare": ["arzt", "praxis", "patient", "gesundheit", "therapie", "klinik"],
|
||||
"legal": ["kanzlei", "rechtsanwalt", "mandant", "anwalt"],
|
||||
@@ -120,7 +126,11 @@ _INDUSTRY_KEYWORDS = {
|
||||
"manufacturing": ["fertigung", "produktion", "maschinenbau", "anlagenbau", "zulieferer",
|
||||
"werkzeugbau", "spritzguss", "cnc", "industrietechnik"],
|
||||
"automotive": ["fahrzeug", "kraftfahrzeug", "kfz", "automobil", "neuwagen",
|
||||
"gebrauchtwagen", "konfigurator", "modellreihe", "modellpalette"],
|
||||
"gebrauchtwagen", "fahrzeugempfehlung", "modellreihe",
|
||||
"modellpalette", "antriebs", "motor", "reifen", "elektroauto",
|
||||
"verbrenner", "hybridfahrzeug", "leasing", "werkstatt",
|
||||
"wartung und reparatur", "probefahrt", "bmw", "mercedes",
|
||||
"audi", "volkswagen", "porsche", "opel"],
|
||||
"media": ["redaktion", "verlag", "medien", "journalismus", "presse"],
|
||||
}
|
||||
|
||||
|
||||
@@ -111,9 +111,16 @@ IMPRESSUM_CHECKLIST = [
|
||||
"label": "Registergericht benannt (Amtsgericht X)",
|
||||
"level": 2, "parent": "register",
|
||||
"patterns": [
|
||||
r"(?:amtsgericht|registergericht)\s+[a-zA-Z\u00c0-\u017e]\w+",
|
||||
# "Amtsgericht <Stadt>" or "Registergericht <Stadt>"
|
||||
# Allow colon/dot/dash between keyword and city (BMW writes
|
||||
# "registergericht: m\u00fcnchen hrb 42243").
|
||||
r"(?:amtsgericht|registergericht)[\s:\.\-,]+[a-zA-Z\u00c0-\u017e]\w+",
|
||||
# "AG <Stadt>" short form
|
||||
r"\bag\s+[a-zA-Z\u00c0-\u017e]\w+",
|
||||
# "Handelsregister AG/Amtsgericht <Stadt>"
|
||||
r"(?:handelsregister|register)\s+(?:ag|amtsgericht)\s+\w+",
|
||||
# "Sitz und Registergericht: M\u00fcnchen" \u2014 BMW pattern
|
||||
r"sitz\s+und\s+registergericht[\s:\.\-,]+[a-zA-Z\u00c0-\u017e]\w+",
|
||||
],
|
||||
"severity": "LOW",
|
||||
"hint": "Registergericht benennen (z.B. 'Amtsgericht Freiburg' oder 'AG Freiburg'). Beides ist korrekt.",
|
||||
|
||||
Reference in New Issue
Block a user