feat(profile+report): P17 — 4 Polish-Items
CI / detect-changes (push) Successful in 10s
CI / branch-name (push) Has been skipped
CI / guardrail-integrity (push) Has been skipped
CI / secret-scan (push) Has been skipped
CI / dep-audit (push) Has been skipped
CI / sbom-scan (push) Has been skipped
CI / validate-canonical-controls (push) Successful in 16s
CI / loc-budget (push) Successful in 19s
CI / go-lint (push) Has been skipped
CI / python-lint (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / nodejs-build (push) Has been skipped
CI / test-go (push) Has been skipped
CI / iace-gt-coverage (push) Has been skipped
CI / test-python-backend (push) Successful in 39s
CI / test-python-document-crawler (push) Has been skipped
CI / test-python-dsms-gateway (push) Has been skipped
CI / detect-changes (push) Successful in 10s
CI / branch-name (push) Has been skipped
CI / guardrail-integrity (push) Has been skipped
CI / secret-scan (push) Has been skipped
CI / dep-audit (push) Has been skipped
CI / sbom-scan (push) Has been skipped
CI / validate-canonical-controls (push) Successful in 16s
CI / loc-budget (push) Successful in 19s
CI / go-lint (push) Has been skipped
CI / python-lint (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / nodejs-build (push) Has been skipped
CI / test-go (push) Has been skipped
CI / iace-gt-coverage (push) Has been skipped
CI / test-python-backend (push) Successful in 39s
CI / test-python-document-crawler (push) Has been skipped
CI / test-python-dsms-gateway (push) Has been skipped
A) Cookie-Policy-Architecture-Block Fallback auf DSE-Text wenn cookie via P15 deduped wurde. Erkennt jetzt auch single-doc Sites (Safetykon-Pattern). B) Konkrete-Aufgaben-Liste: Per-Doc-Cap (3) entfernt + globaler Cap 10→20. Safetykon zeigt jetzt 7 statt 4 Aufgaben. C) business_type-Klassifizierer: B2B-Service-Cluster aus P14 als Boost. Bei 2+ Service-Indikatoren (CE-Zertifizierung/Compliance/Auditierung) wird b2b_score angehoben. Safetykon: "B2C consulting" → "B2B (consulting)". D) Vendor-Extract Fallback auf DSE-Text wenn cookie deduped + keine CMP- Payloads. LLM extrahiert dann Vendors aus dem DSE-Text. Safetykon: 0 → 1 Vendor (Google Analytics aus dem DSE-Text erkannt). Smoke-Test Safetykon: alle 4 Polish-Items wirken, kein Regression. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -536,6 +536,15 @@ async def _run_compliance_check(check_id: str, req: ComplianceCheckRequest):
|
|||||||
cookie_payloads.extend(e["cmp_payloads"])
|
cookie_payloads.extend(e["cmp_payloads"])
|
||||||
if e.get("text"):
|
if e.get("text"):
|
||||||
cookie_text = e["text"]
|
cookie_text = e["text"]
|
||||||
|
# P17-D: Fallback wenn cookie via P15 deduped wurde — nutze DSE-Text
|
||||||
|
# sofern Cookie-Begriffe drin sind, damit LLM-Vendor-Extract trotzdem
|
||||||
|
# greifen kann.
|
||||||
|
if not cookie_text and not cookie_payloads:
|
||||||
|
dse_t = doc_texts.get("dse", "")
|
||||||
|
if dse_t and any(w in dse_t.lower() for w in
|
||||||
|
("cookie", "tracking", "google analytics", "consent")):
|
||||||
|
cookie_text = dse_t
|
||||||
|
logger.info("P17-D: vendor-extract Fallback auf DSE (Cookie deduped)")
|
||||||
# Site-owner derived from the submitted URLs — drives the
|
# Site-owner derived from the submitted URLs — drives the
|
||||||
# INTERNAL/GROUP_COMPANY classification of vendor records.
|
# INTERNAL/GROUP_COMPANY classification of vendor records.
|
||||||
owner_name = _company_name_from_url(doc_entries) or ""
|
owner_name = _company_name_from_url(doc_entries) or ""
|
||||||
@@ -681,6 +690,19 @@ async def _run_compliance_check(check_id: str, req: ComplianceCheckRequest):
|
|||||||
cookie_doc_url = e.get("url", "")
|
cookie_doc_url = e.get("url", "")
|
||||||
cookie_cmp_payloads = e.get("cmp_payloads") or []
|
cookie_cmp_payloads = e.get("cmp_payloads") or []
|
||||||
break
|
break
|
||||||
|
# P17-A: Fallback wenn Cookie-Doc via P15 deduped wurde — nutze
|
||||||
|
# den DSE-Text wenn er Cookie-Schluesselwoerter enthaelt.
|
||||||
|
if not cookie_doc_text:
|
||||||
|
dse_text = doc_texts.get("dse", "")
|
||||||
|
if dse_text and any(w in dse_text.lower() for w in
|
||||||
|
("cookie", "tracking", "google analytics",
|
||||||
|
"consent")):
|
||||||
|
cookie_doc_text = dse_text
|
||||||
|
dse_entry = next((e for e in doc_entries
|
||||||
|
if e.get("doc_type") == "dse"), {})
|
||||||
|
cookie_doc_url = dse_entry.get("url", "")
|
||||||
|
cookie_cmp_payloads = dse_entry.get("cmp_payloads") or []
|
||||||
|
logger.info("P17-A: cookie-arch fallback auf DSE (Cookie-Doc deduped)")
|
||||||
if cookie_doc_text:
|
if cookie_doc_text:
|
||||||
arch = detect_architecture(
|
arch = detect_architecture(
|
||||||
doc_url=cookie_doc_url,
|
doc_url=cookie_doc_url,
|
||||||
|
|||||||
@@ -182,7 +182,7 @@ def build_management_summary(results: list[DocCheckResult]) -> str:
|
|||||||
if c.level == 1 and not c.passed and not c.skipped
|
if c.level == 1 and not c.passed and not c.skipped
|
||||||
and c.severity != "INFO"
|
and c.severity != "INFO"
|
||||||
]
|
]
|
||||||
for c in failed_checks[:3]: # Max 3 per document
|
for c in failed_checks: # P17-B: kein Per-Doc-Cap
|
||||||
action = _check_to_action(r.label, c.label, c.hint)
|
action = _check_to_action(r.label, c.label, c.hint)
|
||||||
if action:
|
if action:
|
||||||
actions.append(action)
|
actions.append(action)
|
||||||
@@ -193,7 +193,7 @@ def build_management_summary(results: list[DocCheckResult]) -> str:
|
|||||||
'Konkrete Aufgaben:</h3>'
|
'Konkrete Aufgaben:</h3>'
|
||||||
'<ol style="font-size:13px;color:#475569;padding-left:20px;margin:0">'
|
'<ol style="font-size:13px;color:#475569;padding-left:20px;margin:0">'
|
||||||
)
|
)
|
||||||
for a in actions[:10]: # Max 10 actions
|
for a in actions[:20]: # P17-B: 10 -> 20
|
||||||
html.append(f'<li style="margin-bottom:6px">{a}</li>')
|
html.append(f'<li style="margin-bottom:6px">{a}</li>')
|
||||||
html.append('</ol>')
|
html.append('</ol>')
|
||||||
|
|
||||||
|
|||||||
@@ -237,6 +237,13 @@ async def detect_business_profile(documents: dict[str, str]) -> BusinessProfile:
|
|||||||
b2g_score = _count_hits(full_text, _B2G_KEYWORDS)
|
b2g_score = _count_hits(full_text, _B2G_KEYWORDS)
|
||||||
nonprofit_score = _count_hits(full_text, _NONPROFIT_KEYWORDS)
|
nonprofit_score = _count_hits(full_text, _NONPROFIT_KEYWORDS)
|
||||||
|
|
||||||
|
# P17-C: B2B-Dienstleister-Cluster (P14) als Boost — wenn ein Unternehmen
|
||||||
|
# CE-Zertifizierung / Compliance-Beratung / Auditierung / Schulungen anbietet,
|
||||||
|
# ist es i.d.R. B2B auch wenn die strikten B2B-Keywords nicht greifen.
|
||||||
|
b2b_service_boost = _count_hits(full_text, _B2B_SERVICE_POSITIVE)
|
||||||
|
if b2b_service_boost >= 2:
|
||||||
|
b2b_score += min(3, b2b_service_boost - 1)
|
||||||
|
|
||||||
# Missing documents as signal
|
# Missing documents as signal
|
||||||
has_agb = "agb" in documents
|
has_agb = "agb" in documents
|
||||||
has_widerruf = "widerruf" in documents
|
has_widerruf = "widerruf" in documents
|
||||||
|
|||||||
Reference in New Issue
Block a user