fix: 4 bugs from IHK scan — false positives + missing etracker
1. GA regex: G-\w{5,} matched CSS classes (g-7031048). Now requires
G-[A-Z0-9]{8,12} (uppercase after G-, 8-12 chars = real GA4 ID)
2. External page scanning: DSE-internal links now SAME DOMAIN only.
Previously followed links to etracker.com, google.de/policies etc.
and detected services on THOSE sites as IHK services.
3. Added etracker to service registry (DE, ePrivacy-certified)
4. CSS/JS/image files excluded from page scanning
5. Navigation-pattern links for deeper DSE sub-pages
Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -16,7 +16,7 @@ SERVICE_REGISTRY: dict[str, dict] = {
|
||||
# ═══════════════════════════════════════════════════════════════
|
||||
# TRACKING & ANALYTICS
|
||||
# ═══════════════════════════════════════════════════════════════
|
||||
r"google.?analytics|gtag\(|UA-\d+|G-\w{5,}": {
|
||||
r"google.?analytics|gtag\(|UA-\d{4,}|G-[A-Z0-9]{8,12}": {
|
||||
"id": "google_analytics", "name": "Google Analytics", "category": "tracking",
|
||||
"provider": "Google LLC", "country": "US", "eu_adequate": False,
|
||||
"requires_consent": True, "legal_ref": "§25 TDDDG, Art. 44-49 DSGVO",
|
||||
@@ -51,6 +51,11 @@ SERVICE_REGISTRY: dict[str, dict] = {
|
||||
"provider": "Plausible Insights", "country": "EE", "eu_adequate": True,
|
||||
"requires_consent": False, "legal_ref": "EU-Anbieter, cookieless",
|
||||
},
|
||||
r"etracker\.com|etracker\.de|etrackerCookieless": {
|
||||
"id": "etracker", "name": "etracker", "category": "tracking",
|
||||
"provider": "etracker GmbH", "country": "DE", "eu_adequate": True,
|
||||
"requires_consent": True, "legal_ref": "§25 TDDDG, DE-Anbieter mit ePrivacy-Siegel",
|
||||
},
|
||||
r"pirsch\.io": {
|
||||
"id": "pirsch", "name": "Pirsch Analytics", "category": "tracking",
|
||||
"provider": "Pirsch GmbH", "country": "DE", "eu_adequate": True,
|
||||
|
||||
Reference in New Issue
Block a user