feat(compliance-check): skip Widerruf for B2B, limit MCs, fix industry
Build + Deploy / build-admin-compliance (push) Successful in 2m1s
Build + Deploy / build-tts (push) Successful in 2m48s
Build + Deploy / build-document-crawler (push) Successful in 52s
Build + Deploy / build-dsms-node (push) Successful in 13s
CI / branch-name (push) Has been skipped
CI / guardrail-integrity (push) Has been skipped
CI / loc-budget (push) Failing after 15s
CI / secret-scan (push) Has been skipped
Build + Deploy / build-backend-compliance (push) Successful in 4m20s
Build + Deploy / build-ai-sdk (push) Successful in 53s
Build + Deploy / build-developer-portal (push) Successful in 2m6s
Build + Deploy / build-dsms-gateway (push) Successful in 11s
CI / python-lint (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / go-lint (push) Has been skipped
CI / nodejs-build (push) Successful in 2m45s
CI / dep-audit (push) Has been skipped
CI / sbom-scan (push) Has been skipped
CI / test-go (push) Successful in 45s
CI / test-python-backend (push) Successful in 41s
CI / test-python-document-crawler (push) Successful in 26s
CI / test-python-dsms-gateway (push) Successful in 21s
CI / validate-canonical-controls (push) Successful in 15s
Build + Deploy / trigger-orca (push) Successful in 3m17s
Build + Deploy / build-admin-compliance (push) Successful in 2m1s
Build + Deploy / build-tts (push) Successful in 2m48s
Build + Deploy / build-document-crawler (push) Successful in 52s
Build + Deploy / build-dsms-node (push) Successful in 13s
CI / branch-name (push) Has been skipped
CI / guardrail-integrity (push) Has been skipped
CI / loc-budget (push) Failing after 15s
CI / secret-scan (push) Has been skipped
Build + Deploy / build-backend-compliance (push) Successful in 4m20s
Build + Deploy / build-ai-sdk (push) Successful in 53s
Build + Deploy / build-developer-portal (push) Successful in 2m6s
Build + Deploy / build-dsms-gateway (push) Successful in 11s
CI / python-lint (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / go-lint (push) Has been skipped
CI / nodejs-build (push) Successful in 2m45s
CI / dep-audit (push) Has been skipped
CI / sbom-scan (push) Has been skipped
CI / test-go (push) Successful in 45s
CI / test-python-backend (push) Successful in 41s
CI / test-python-document-crawler (push) Successful in 26s
CI / test-python-dsms-gateway (push) Successful in 21s
CI / validate-canonical-controls (push) Successful in 15s
Build + Deploy / trigger-orca (push) Successful in 3m17s
- Skip Widerrufsbelehrung check entirely for B2B/B2G businesses - Limit MC checks to top 20 per doc_type (by severity) to reduce noise (e.g. 75 impressum MCs → 20, avoiding 55 irrelevant FAILs) - Add consulting/manufacturing industry keywords (arbeitssicherheit, brandschutz, werkzeugbau, etc.) - Lower industry detection threshold from 2 to 1 keyword hit Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -194,12 +194,22 @@ async def _run_compliance_check(check_id: str, req: ComplianceCheckRequest):
|
||||
"COMPLIANCE_USE_AGENT", "false"
|
||||
).lower() == "true"
|
||||
|
||||
# Filter out doc_types that don't apply to this business profile
|
||||
skip_types = _get_skip_types(profile)
|
||||
|
||||
for i, entry in enumerate(doc_entries):
|
||||
text = entry["text"]
|
||||
doc_type = entry["doc_type"]
|
||||
label = _doc_type_label(doc_type)
|
||||
url = entry["url"]
|
||||
|
||||
if doc_type in skip_types:
|
||||
results.append(DocCheckResult(
|
||||
label=label, url=url, doc_type=doc_type,
|
||||
error=skip_types[doc_type],
|
||||
))
|
||||
continue
|
||||
|
||||
_update(check_id, f"Pruefe {label} ({i+1}/{len(doc_entries)})...")
|
||||
|
||||
if not text or len(text) < 50:
|
||||
@@ -351,10 +361,10 @@ async def _check_single(
|
||||
completeness = f.get("completeness_pct", 0)
|
||||
correctness = f.get("correctness_pct", 0)
|
||||
|
||||
# Master Control checks
|
||||
# Master Control checks (top 20 by severity to avoid noise)
|
||||
try:
|
||||
mc_results = await check_document_with_controls(
|
||||
text, doc_type, label, max_controls=0, use_agent=use_agent,
|
||||
text, doc_type, label, max_controls=20, use_agent=use_agent,
|
||||
)
|
||||
if mc_results:
|
||||
for mc in mc_results:
|
||||
@@ -395,6 +405,19 @@ async def _check_single(
|
||||
)
|
||||
|
||||
|
||||
def _get_skip_types(profile) -> dict[str, str]:
|
||||
"""Return doc_types to skip entirely based on business profile.
|
||||
|
||||
Returns dict mapping doc_type -> skip reason.
|
||||
"""
|
||||
skip: dict[str, str] = {}
|
||||
if profile.business_type in ("b2b", "b2g"):
|
||||
skip["widerruf"] = "Uebersprungen: Widerrufsbelehrung nur fuer B2C relevant"
|
||||
if profile.business_type in ("b2b", "b2g") and not profile.has_online_shop:
|
||||
skip["nutzungsbedingungen"] = "Uebersprungen: Nutzungsbedingungen bei B2B ohne Shop selten relevant"
|
||||
return skip
|
||||
|
||||
|
||||
def _apply_profile_filter(result, profile, doc_type: str):
|
||||
"""Adjust INFO-level checks based on business profile context.
|
||||
|
||||
|
||||
@@ -104,8 +104,10 @@ _INDUSTRY_KEYWORDS = {
|
||||
"public": ["kommune", "stadtverwaltung", "buergerservice", "bürgerservice", "rathaus"],
|
||||
"finance": ["bank", "versicherung", "finanz", "kredit", "anlage"],
|
||||
"education": ["schule", "bildung", "unterricht", "lehrplan", "schueler", "schüler"],
|
||||
"consulting": ["beratung", "consulting", "schulung", "seminar", "gutachten", "audit"],
|
||||
"manufacturing": ["fertigung", "produktion", "maschinenbau", "anlagenbau", "zulieferer"],
|
||||
"consulting": ["beratung", "consulting", "schulung", "seminar", "gutachten", "audit",
|
||||
"arbeitssicherheit", "brandschutz", "sicherheitstechnik", "zertifizierung"],
|
||||
"manufacturing": ["fertigung", "produktion", "maschinenbau", "anlagenbau", "zulieferer",
|
||||
"werkzeugbau", "spritzguss", "cnc", "industrietechnik"],
|
||||
"media": ["redaktion", "verlag", "medien", "journalismus", "presse"],
|
||||
}
|
||||
|
||||
@@ -224,7 +226,7 @@ async def detect_business_profile(documents: dict[str, str]) -> BusinessProfile:
|
||||
industry_scores: dict[str, int] = {}
|
||||
for industry, keywords in _INDUSTRY_KEYWORDS.items():
|
||||
hits = _count_hits(full_text, keywords)
|
||||
if hits >= 2:
|
||||
if hits >= 1:
|
||||
industry_scores[industry] = hits
|
||||
|
||||
if industry_scores:
|
||||
|
||||
Reference in New Issue
Block a user