Compare commits
2 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| 4434e3827b | |||
| 07cc00da11 |
@@ -82,6 +82,8 @@ class CompliancePDFGenerator:
|
|||||||
self._add_consent_section(story, ss, tenant_id)
|
self._add_consent_section(story, ss, tenant_id)
|
||||||
# Org Roles
|
# Org Roles
|
||||||
self._add_role_section(story, ss, tenant_id, project_id)
|
self._add_role_section(story, ss, tenant_id, project_id)
|
||||||
|
# Stufe 2 — Quellen- und Lizenz-Footer (Attribution-Renderer Task #23)
|
||||||
|
self._add_attribution_footer(story, ss)
|
||||||
# Footer
|
# Footer
|
||||||
story.append(Spacer(1, 15 * mm))
|
story.append(Spacer(1, 15 * mm))
|
||||||
story.append(Paragraph("Erstellt mit BreakPilot Compliance SDK", ss["Small"]))
|
story.append(Paragraph("Erstellt mit BreakPilot Compliance SDK", ss["Small"]))
|
||||||
@@ -214,3 +216,64 @@ class CompliancePDFGenerator:
|
|||||||
story.append(Paragraph("Keine Rollen zugewiesen.", ss["Body2"]))
|
story.append(Paragraph("Keine Rollen zugewiesen.", ss["Body2"]))
|
||||||
except Exception:
|
except Exception:
|
||||||
story.append(Paragraph("Rollen-Tabelle nicht vorhanden.", ss["Small"]))
|
story.append(Paragraph("Rollen-Tabelle nicht vorhanden.", ss["Small"]))
|
||||||
|
|
||||||
|
def _add_attribution_footer(self, story, ss) -> None:
|
||||||
|
"""Stufe 2 of the attribution renderer (Task #23).
|
||||||
|
|
||||||
|
Adds a "Quellen und Lizenzen" section listing the platform's
|
||||||
|
license-rule distribution and, crucially, the mandatory
|
||||||
|
attribution lines for Rule-2 sources (CC-BY-SA, OECD, Apache).
|
||||||
|
For Rule 1 sources the attribution is optional but rendered as
|
||||||
|
a brief reference list for auditability.
|
||||||
|
|
||||||
|
The section is added to every generated compliance PDF so each
|
||||||
|
export carries its own provenance footer — pauschale Hinweise
|
||||||
|
in AGB/Impressum reichen rechtlich nicht (siehe
|
||||||
|
project_attribution_strategy.md).
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
rows = self.db.execute(text("""
|
||||||
|
SELECT cc.license_rule, COUNT(*) AS n,
|
||||||
|
array_agg(DISTINCT cpl.source_regulation ORDER BY cpl.source_regulation)
|
||||||
|
FILTER (WHERE cpl.source_regulation IS NOT NULL) AS sources
|
||||||
|
FROM compliance.canonical_controls cc
|
||||||
|
LEFT JOIN compliance.control_parent_links cpl ON cpl.control_uuid = cc.id
|
||||||
|
WHERE cc.license_rule IS NOT NULL
|
||||||
|
GROUP BY cc.license_rule
|
||||||
|
ORDER BY cc.license_rule
|
||||||
|
""")).fetchall()
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning("attribution footer skipped: %s", e)
|
||||||
|
return
|
||||||
|
if not rows:
|
||||||
|
return
|
||||||
|
|
||||||
|
rule_labels = {1: "Hoheitsrecht/Public Domain (woertlich)",
|
||||||
|
2: "Mit Attribution (CC-BY u.ae.)",
|
||||||
|
3: "Nur Identifier-Verweis"}
|
||||||
|
|
||||||
|
story.append(Spacer(1, 8 * mm))
|
||||||
|
story.append(Paragraph("Quellen & Lizenzen", ss["Section"]))
|
||||||
|
story.append(Paragraph(
|
||||||
|
"Dieser Bericht stuetzt sich auf klassifizierte Compliance-Controls "
|
||||||
|
"aus den folgenden Quellen. Jede Quelle ist deterministisch in eine "
|
||||||
|
"der drei Lizenzregeln (R1-R3) eingeordnet.", ss["Body2"]))
|
||||||
|
|
||||||
|
for r in rows:
|
||||||
|
rule = int(r.license_rule)
|
||||||
|
sources = (r.sources or [])[:8]
|
||||||
|
label = rule_labels.get(rule, f"Regel {rule}")
|
||||||
|
head = f"<b>R{rule} — {label}</b> ({r.n} Controls)"
|
||||||
|
story.append(Paragraph(head, ss["Body2"]))
|
||||||
|
if sources:
|
||||||
|
src_text = "; ".join(sources)
|
||||||
|
if len(r.sources or []) > 8:
|
||||||
|
src_text += f" und {len(r.sources) - 8} weitere"
|
||||||
|
story.append(Paragraph(src_text, ss["Small"]))
|
||||||
|
if rule == 2:
|
||||||
|
story.append(Paragraph(
|
||||||
|
"Pflicht-Attribution: Inhalte aus den oben genannten Quellen sind "
|
||||||
|
"unter den jeweiligen freien Lizenzen (z.B. CC-BY-SA, OECD-Public, "
|
||||||
|
"Apache-2.0) wiedergegeben. Original-Urheber bleibt in jeder "
|
||||||
|
"Weiterverwendung zu nennen.", ss["Small"]))
|
||||||
|
story.append(Spacer(1, 2 * mm))
|
||||||
|
|||||||
@@ -189,35 +189,41 @@ def parse_cookie_table(text: str) -> list[dict]:
|
|||||||
return out
|
return out
|
||||||
|
|
||||||
|
|
||||||
|
# textContent-Output von HTML-Tabellen verkettet Zellen ohne Whitespace
|
||||||
|
# (z.B. VW: "Permanent/Protokoll_fbcTracking Cookies (Marketing)..."). Wir
|
||||||
|
# erkennen Cookie-Eintraege ueber 2 Anker:
|
||||||
|
# - Davor: typisches End-Token einer vorherigen Tabellen-Zelle
|
||||||
|
# (Speicherdauer-Suffix wie Permanent/Protokoll, Session Cookie, ...)
|
||||||
|
# - Danach: Kategorie-Token (Tracking Cookies, Funktionscookie, ...)
|
||||||
|
# Dazwischen: der Cookie-Name (3-50 Zeichen, alphanum/underscore/dash).
|
||||||
_FLAT_ROW_RE = re.compile(
|
_FLAT_ROW_RE = re.compile(
|
||||||
r"\b([A-Za-z_][A-Za-z0-9_\-\.]{1,40})\s+"
|
r"(?:Permanent/Protokoll|Session Cookie|Persistent Cookie|"
|
||||||
r"((?:Tracking|Session|Funktional|Marketing|Analytics|Performance|"
|
r"TagePersistent|TageSitzungs-Cookie|TageSession Cookie|"
|
||||||
r"Notwendig|Strictly\s+Necessary|Statistik|Personalisierung)"
|
r"MinutenPersistent|MinutenSession Cookie|StundenPersistent|"
|
||||||
r"[A-Za-zäöüÄÖÜß \-\(\)]*?Cookies?[^A-Z]{0,400}?)"
|
r"MonatePersistent|JahrePersistent)"
|
||||||
r"(?:(\d+)\s*(Sekunde|Minute|Stunde|Tag|Woche|Monat|Jahr|day|month|year)|"
|
r"([A-Za-z_][A-Za-z0-9_\-\.]{1,40}?)"
|
||||||
r"\b(Session|Permanent)\b)",
|
r"(?=Tracking Cookies|Session Cookies|Funktionscookie|Funktional|"
|
||||||
re.I | re.S,
|
r"Marketing|Analytics|Necessary)",
|
||||||
|
re.I,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
def parse_flat_cookie_text(text: str) -> list[dict]:
|
def parse_flat_cookie_text(text: str) -> list[dict]:
|
||||||
"""Variante fuer Sites wie VW die ihre Cookie-Tabelle als flachen
|
"""Variante fuer Sites wie VW die ihre Cookie-Tabelle als flachen
|
||||||
Text liefern (Cookie-Name + Kategorie + Beschreibung + Dauer in
|
Text liefern (textContent-Output ohne Whitespace zwischen Zellen).
|
||||||
einem Block hintereinander, ohne klare Trenner).
|
|
||||||
|
|
||||||
Regex sucht nach 'NAME [Tracking|Session|Funktional...] Cookies
|
Regex anchored auf vorherige Speicherdauer-Suffixe + folgende
|
||||||
... [13 Monate|Session|Permanent]' und behandelt jeden Match als
|
Kategorie-Token → extrahiert den Cookie-Namen dazwischen.
|
||||||
eine Tabellen-Zeile.
|
|
||||||
"""
|
"""
|
||||||
if not text or len(text) < 500:
|
if not text or len(text) < 500:
|
||||||
return []
|
return []
|
||||||
matches = list(_FLAT_ROW_RE.finditer(text))
|
names = _FLAT_ROW_RE.findall(text)
|
||||||
if len(matches) < 3:
|
if len(names) < 3:
|
||||||
return []
|
return []
|
||||||
by_vendor: dict[str, dict] = {}
|
by_vendor: dict[str, dict] = {}
|
||||||
seen_names: set[str] = set()
|
seen_names: set[str] = set()
|
||||||
for m in matches:
|
for raw in names:
|
||||||
name = m.group(1).strip()
|
name = raw.strip()
|
||||||
nl = name.lower()
|
nl = name.lower()
|
||||||
if nl in seen_names:
|
if nl in seen_names:
|
||||||
continue
|
continue
|
||||||
@@ -226,30 +232,23 @@ def parse_flat_cookie_text(text: str) -> list[dict]:
|
|||||||
"marketing", "analytics", "werbung", "anbieter",
|
"marketing", "analytics", "werbung", "anbieter",
|
||||||
"tracking", "cookie", "cookies", "und", "von",
|
"tracking", "cookie", "cookies", "und", "von",
|
||||||
"einer", "ist", "alle", "noch", "auch", "name",
|
"einer", "ist", "alle", "noch", "auch", "name",
|
||||||
"art", "zweck", "dauer"):
|
"art", "zweck", "dauer", "test"):
|
||||||
continue
|
continue
|
||||||
if len(name) < 3 or len(name) > 60:
|
if len(name) < 3 or len(name) > 60:
|
||||||
continue
|
continue
|
||||||
seen_names.add(nl)
|
seen_names.add(nl)
|
||||||
category = _normalize_category(m.group(2) or "")
|
|
||||||
persistence = ""
|
|
||||||
if m.group(3):
|
|
||||||
persistence = f"{m.group(3)} {m.group(4)}"
|
|
||||||
elif m.group(5):
|
|
||||||
persistence = m.group(5)
|
|
||||||
purpose = (m.group(2) or "").strip()[:300]
|
|
||||||
vendor = _guess_vendor(name) or "Unbekannter Anbieter"
|
vendor = _guess_vendor(name) or "Unbekannter Anbieter"
|
||||||
entry = by_vendor.setdefault(vendor, {
|
entry = by_vendor.setdefault(vendor, {
|
||||||
"name": vendor, "country": "",
|
"name": vendor, "country": "",
|
||||||
"purpose": purpose, "category": category,
|
"purpose": "", "category": "",
|
||||||
"opt_out_url": "", "privacy_policy_url": "",
|
"opt_out_url": "", "privacy_policy_url": "",
|
||||||
"persistence": persistence,
|
"persistence": "",
|
||||||
"cookies": [],
|
"cookies": [],
|
||||||
"source": "flat_pattern",
|
"source": "flat_pattern",
|
||||||
})
|
})
|
||||||
entry["cookies"].append({
|
entry["cookies"].append({
|
||||||
"name": name, "purpose": purpose[:200],
|
"name": name, "purpose": "",
|
||||||
"expiry": persistence, "is_third_party": True,
|
"expiry": "", "is_third_party": True,
|
||||||
})
|
})
|
||||||
out = list(by_vendor.values())
|
out = list(by_vendor.values())
|
||||||
logger.info("parse_flat_cookie_text: %d vendors / %d cookies",
|
logger.info("parse_flat_cookie_text: %d vendors / %d cookies",
|
||||||
|
|||||||
Reference in New Issue
Block a user