feat(compliance-check): skip Widerruf for B2B, limit MCs, fix industry
Build + Deploy / build-admin-compliance (push) Successful in 2m1s
Build + Deploy / build-tts (push) Successful in 2m48s
Build + Deploy / build-document-crawler (push) Successful in 52s
Build + Deploy / build-dsms-node (push) Successful in 13s
CI / branch-name (push) Has been skipped
CI / guardrail-integrity (push) Has been skipped
CI / loc-budget (push) Failing after 15s
CI / secret-scan (push) Has been skipped
Build + Deploy / build-backend-compliance (push) Successful in 4m20s
Build + Deploy / build-ai-sdk (push) Successful in 53s
Build + Deploy / build-developer-portal (push) Successful in 2m6s
Build + Deploy / build-dsms-gateway (push) Successful in 11s
CI / python-lint (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / go-lint (push) Has been skipped
CI / nodejs-build (push) Successful in 2m45s
CI / dep-audit (push) Has been skipped
CI / sbom-scan (push) Has been skipped
CI / test-go (push) Successful in 45s
CI / test-python-backend (push) Successful in 41s
CI / test-python-document-crawler (push) Successful in 26s
CI / test-python-dsms-gateway (push) Successful in 21s
CI / validate-canonical-controls (push) Successful in 15s
Build + Deploy / trigger-orca (push) Successful in 3m17s

- Skip Widerrufsbelehrung check entirely for B2B/B2G businesses
- Limit MC checks to top 20 per doc_type (by severity) to reduce noise
  (e.g. 75 impressum MCs → 20, avoiding 55 irrelevant FAILs)
- Add consulting/manufacturing industry keywords (arbeitssicherheit,
  brandschutz, werkzeugbau, etc.)
- Lower industry detection threshold from 2 to 1 keyword hit

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
Benjamin Admin
2026-05-12 17:03:57 +02:00
parent b42e1cd091
commit be9cfdc2d4
2 changed files with 30 additions and 5 deletions
@@ -194,12 +194,22 @@ async def _run_compliance_check(check_id: str, req: ComplianceCheckRequest):
"COMPLIANCE_USE_AGENT", "false"
).lower() == "true"
# Filter out doc_types that don't apply to this business profile
skip_types = _get_skip_types(profile)
for i, entry in enumerate(doc_entries):
text = entry["text"]
doc_type = entry["doc_type"]
label = _doc_type_label(doc_type)
url = entry["url"]
if doc_type in skip_types:
results.append(DocCheckResult(
label=label, url=url, doc_type=doc_type,
error=skip_types[doc_type],
))
continue
_update(check_id, f"Pruefe {label} ({i+1}/{len(doc_entries)})...")
if not text or len(text) < 50:
@@ -351,10 +361,10 @@ async def _check_single(
completeness = f.get("completeness_pct", 0)
correctness = f.get("correctness_pct", 0)
# Master Control checks
# Master Control checks (top 20 by severity to avoid noise)
try:
mc_results = await check_document_with_controls(
text, doc_type, label, max_controls=0, use_agent=use_agent,
text, doc_type, label, max_controls=20, use_agent=use_agent,
)
if mc_results:
for mc in mc_results:
@@ -395,6 +405,19 @@ async def _check_single(
)
def _get_skip_types(profile) -> dict[str, str]:
"""Return doc_types to skip entirely based on business profile.
Returns dict mapping doc_type -> skip reason.
"""
skip: dict[str, str] = {}
if profile.business_type in ("b2b", "b2g"):
skip["widerruf"] = "Uebersprungen: Widerrufsbelehrung nur fuer B2C relevant"
if profile.business_type in ("b2b", "b2g") and not profile.has_online_shop:
skip["nutzungsbedingungen"] = "Uebersprungen: Nutzungsbedingungen bei B2B ohne Shop selten relevant"
return skip
def _apply_profile_filter(result, profile, doc_type: str):
"""Adjust INFO-level checks based on business profile context.
@@ -104,8 +104,10 @@ _INDUSTRY_KEYWORDS = {
"public": ["kommune", "stadtverwaltung", "buergerservice", "bürgerservice", "rathaus"],
"finance": ["bank", "versicherung", "finanz", "kredit", "anlage"],
"education": ["schule", "bildung", "unterricht", "lehrplan", "schueler", "schüler"],
"consulting": ["beratung", "consulting", "schulung", "seminar", "gutachten", "audit"],
"manufacturing": ["fertigung", "produktion", "maschinenbau", "anlagenbau", "zulieferer"],
"consulting": ["beratung", "consulting", "schulung", "seminar", "gutachten", "audit",
"arbeitssicherheit", "brandschutz", "sicherheitstechnik", "zertifizierung"],
"manufacturing": ["fertigung", "produktion", "maschinenbau", "anlagenbau", "zulieferer",
"werkzeugbau", "spritzguss", "cnc", "industrietechnik"],
"media": ["redaktion", "verlag", "medien", "journalismus", "presse"],
}
@@ -224,7 +226,7 @@ async def detect_business_profile(documents: dict[str, str]) -> BusinessProfile:
industry_scores: dict[str, int] = {}
for industry, keywords in _INDUSTRY_KEYWORDS.items():
hits = _count_hits(full_text, keywords)
if hits >= 2:
if hits >= 1:
industry_scores[industry] = hits
if industry_scores: