diff --git a/backend-compliance/compliance/api/agent_compliance_check_routes.py b/backend-compliance/compliance/api/agent_compliance_check_routes.py index 1229aca..c113437 100644 --- a/backend-compliance/compliance/api/agent_compliance_check_routes.py +++ b/backend-compliance/compliance/api/agent_compliance_check_routes.py @@ -194,12 +194,22 @@ async def _run_compliance_check(check_id: str, req: ComplianceCheckRequest): "COMPLIANCE_USE_AGENT", "false" ).lower() == "true" + # Filter out doc_types that don't apply to this business profile + skip_types = _get_skip_types(profile) + for i, entry in enumerate(doc_entries): text = entry["text"] doc_type = entry["doc_type"] label = _doc_type_label(doc_type) url = entry["url"] + if doc_type in skip_types: + results.append(DocCheckResult( + label=label, url=url, doc_type=doc_type, + error=skip_types[doc_type], + )) + continue + _update(check_id, f"Pruefe {label} ({i+1}/{len(doc_entries)})...") if not text or len(text) < 50: @@ -351,10 +361,10 @@ async def _check_single( completeness = f.get("completeness_pct", 0) correctness = f.get("correctness_pct", 0) - # Master Control checks + # Master Control checks (top 20 by severity to avoid noise) try: mc_results = await check_document_with_controls( - text, doc_type, label, max_controls=0, use_agent=use_agent, + text, doc_type, label, max_controls=20, use_agent=use_agent, ) if mc_results: for mc in mc_results: @@ -395,6 +405,19 @@ async def _check_single( ) +def _get_skip_types(profile) -> dict[str, str]: + """Return doc_types to skip entirely based on business profile. + + Returns dict mapping doc_type -> skip reason. + """ + skip: dict[str, str] = {} + if profile.business_type in ("b2b", "b2g"): + skip["widerruf"] = "Uebersprungen: Widerrufsbelehrung nur fuer B2C relevant" + if profile.business_type in ("b2b", "b2g") and not profile.has_online_shop: + skip["nutzungsbedingungen"] = "Uebersprungen: Nutzungsbedingungen bei B2B ohne Shop selten relevant" + return skip + + def _apply_profile_filter(result, profile, doc_type: str): """Adjust INFO-level checks based on business profile context. diff --git a/backend-compliance/compliance/services/business_profiler.py b/backend-compliance/compliance/services/business_profiler.py index 5cae27c..acf2dca 100644 --- a/backend-compliance/compliance/services/business_profiler.py +++ b/backend-compliance/compliance/services/business_profiler.py @@ -104,8 +104,10 @@ _INDUSTRY_KEYWORDS = { "public": ["kommune", "stadtverwaltung", "buergerservice", "bürgerservice", "rathaus"], "finance": ["bank", "versicherung", "finanz", "kredit", "anlage"], "education": ["schule", "bildung", "unterricht", "lehrplan", "schueler", "schüler"], - "consulting": ["beratung", "consulting", "schulung", "seminar", "gutachten", "audit"], - "manufacturing": ["fertigung", "produktion", "maschinenbau", "anlagenbau", "zulieferer"], + "consulting": ["beratung", "consulting", "schulung", "seminar", "gutachten", "audit", + "arbeitssicherheit", "brandschutz", "sicherheitstechnik", "zertifizierung"], + "manufacturing": ["fertigung", "produktion", "maschinenbau", "anlagenbau", "zulieferer", + "werkzeugbau", "spritzguss", "cnc", "industrietechnik"], "media": ["redaktion", "verlag", "medien", "journalismus", "presse"], } @@ -224,7 +226,7 @@ async def detect_business_profile(documents: dict[str, str]) -> BusinessProfile: industry_scores: dict[str, int] = {} for industry, keywords in _INDUSTRY_KEYWORDS.items(): hits = _count_hits(full_text, keywords) - if hits >= 2: + if hits >= 1: industry_scores[industry] = hits if industry_scores: