This repository has been archived on 2026-02-15. You can view files and clone it. You cannot open issues or pull requests or push a commit.
Files
breakpilot-pwa/klausur-service/backend/template_sources.py
BreakPilot Dev 19855efacc
Some checks failed
Tests / Go Tests (push) Has been cancelled
Tests / Python Tests (push) Has been cancelled
Tests / Integration Tests (push) Has been cancelled
Tests / Go Lint (push) Has been cancelled
Tests / Python Lint (push) Has been cancelled
Tests / Security Scan (push) Has been cancelled
Tests / All Checks Passed (push) Has been cancelled
Security Scanning / Secret Scanning (push) Has been cancelled
Security Scanning / Dependency Vulnerability Scan (push) Has been cancelled
Security Scanning / Go Security Scan (push) Has been cancelled
Security Scanning / Python Security Scan (push) Has been cancelled
Security Scanning / Node.js Security Scan (push) Has been cancelled
Security Scanning / Docker Image Security (push) Has been cancelled
Security Scanning / Security Summary (push) Has been cancelled
CI/CD Pipeline / Go Tests (push) Has been cancelled
CI/CD Pipeline / Python Tests (push) Has been cancelled
CI/CD Pipeline / Website Tests (push) Has been cancelled
CI/CD Pipeline / Linting (push) Has been cancelled
CI/CD Pipeline / Security Scan (push) Has been cancelled
CI/CD Pipeline / Docker Build & Push (push) Has been cancelled
CI/CD Pipeline / Integration Tests (push) Has been cancelled
CI/CD Pipeline / Deploy to Staging (push) Has been cancelled
CI/CD Pipeline / Deploy to Production (push) Has been cancelled
CI/CD Pipeline / CI Summary (push) Has been cancelled
ci/woodpecker/manual/build-ci-image Pipeline was successful
ci/woodpecker/manual/main Pipeline failed
feat: BreakPilot PWA - Full codebase (clean push without large binaries)
All services: admin-v2, studio-v2, website, ai-compliance-sdk,
consent-service, klausur-service, voice-service, and infrastructure.
Large PDFs and compiled binaries excluded via .gitignore.
2026-02-11 13:25:58 +01:00

460 lines
17 KiB
Python

"""
Template Sources Configuration for Legal Templates RAG.
Defines all source repositories and their license metadata for the
bp_legal_templates collection. Sources are organized by license type
for proper attribution compliance.
License Types:
- PUBLIC_DOMAIN: German official works (§5 UrhG) - no attribution needed
- CC0: Public Domain Dedication - no attribution needed (recommended)
- UNLICENSE: Public Domain equivalent - no attribution needed
- MIT: Attribution required on redistribution
- CC_BY_4: Attribution + change notices required
- REUSE_NOTICE: May quote with source, no distortion allowed
"""
from dataclasses import dataclass, field
from enum import Enum
from typing import List, Optional
class LicenseType(Enum):
"""License types for template sources with compliance requirements."""
PUBLIC_DOMAIN = "public_domain" # §5 UrhG amtliche Werke
CC0 = "cc0" # CC0 1.0 Universal
UNLICENSE = "unlicense" # Unlicense (public domain)
MIT = "mit" # MIT License
CC_BY_4 = "cc_by_4" # CC BY 4.0 International
REUSE_NOTICE = "reuse_notice" # EU reuse notice (source required)
@dataclass
class LicenseInfo:
"""Detailed license information for compliance."""
id: LicenseType
name: str
url: str
attribution_required: bool
share_alike: bool = False
no_derivatives: bool = False
commercial_use: bool = True
training_allowed: bool = True
output_allowed: bool = True
modification_allowed: bool = True
distortion_prohibited: bool = False
attribution_template: Optional[str] = None
def get_attribution_text(self, source_name: str, source_url: str) -> str:
"""Generate attribution text for this license type."""
if not self.attribution_required:
return ""
if self.attribution_template:
return self.attribution_template.format(
source_name=source_name,
source_url=source_url,
license_name=self.name,
license_url=self.url
)
return f"Source: {source_name} ({self.name})"
# License definitions with full compliance info
LICENSES: dict[LicenseType, LicenseInfo] = {
LicenseType.PUBLIC_DOMAIN: LicenseInfo(
id=LicenseType.PUBLIC_DOMAIN,
name="Public Domain (§5 UrhG)",
url="https://www.gesetze-im-internet.de/urhg/__5.html",
attribution_required=False,
training_allowed=True,
output_allowed=True,
modification_allowed=True,
),
LicenseType.CC0: LicenseInfo(
id=LicenseType.CC0,
name="CC0 1.0 Universal",
url="https://creativecommons.org/publicdomain/zero/1.0/",
attribution_required=False, # Not required but recommended
training_allowed=True,
output_allowed=True,
modification_allowed=True,
attribution_template="[{source_name}]({source_url}) - CC0 1.0",
),
LicenseType.UNLICENSE: LicenseInfo(
id=LicenseType.UNLICENSE,
name="Unlicense",
url="https://unlicense.org/",
attribution_required=False,
training_allowed=True,
output_allowed=True,
modification_allowed=True,
),
LicenseType.MIT: LicenseInfo(
id=LicenseType.MIT,
name="MIT License",
url="https://opensource.org/licenses/MIT",
attribution_required=True,
training_allowed=True,
output_allowed=True,
modification_allowed=True,
attribution_template="Based on [{source_name}]({source_url}) - MIT License",
),
LicenseType.CC_BY_4: LicenseInfo(
id=LicenseType.CC_BY_4,
name="CC BY 4.0 International",
url="https://creativecommons.org/licenses/by/4.0/",
attribution_required=True,
training_allowed=False, # CC BY 4.0 may restrict training
output_allowed=True,
modification_allowed=True,
attribution_template=(
"Adapted from [{source_name}]({source_url}), "
"licensed under [CC BY 4.0]({license_url}). Changes were made."
),
),
LicenseType.REUSE_NOTICE: LicenseInfo(
id=LicenseType.REUSE_NOTICE,
name="EU Reuse Notice",
url="https://commission.europa.eu/legal-notice_en",
attribution_required=True,
training_allowed=False,
output_allowed=True,
modification_allowed=False,
distortion_prohibited=True,
attribution_template="Source: {source_name} ({source_url})",
),
}
@dataclass
class SourceConfig:
"""Configuration for a template source repository."""
name: str
license_type: LicenseType
template_types: List[str]
languages: List[str]
jurisdiction: str
description: str
repo_url: Optional[str] = None
web_url: Optional[str] = None
file_patterns: List[str] = field(default_factory=lambda: ["*.md", "*.txt", "*.html"])
exclude_patterns: List[str] = field(default_factory=list)
priority: int = 1 # 1 = highest priority (CC0), 5 = lowest (REUSE_NOTICE)
enabled: bool = True
@property
def license_info(self) -> LicenseInfo:
"""Get the full license information for this source."""
return LICENSES[self.license_type]
def get_source_url(self) -> str:
"""Get the primary URL for this source."""
return self.repo_url or self.web_url or ""
# =============================================================================
# Phase 1: CC0-Quellen (Höchste Priorität - keine Attribution nötig)
# =============================================================================
TEMPLATE_SOURCES: List[SourceConfig] = [
# GitHub Site Policy (CC0)
SourceConfig(
name="github-site-policy",
repo_url="https://github.com/github/site-policy",
license_type=LicenseType.CC0,
template_types=["terms_of_service", "privacy_policy", "community_guidelines", "acceptable_use"],
languages=["en"],
jurisdiction="US",
description="GitHub's site policies including Terms of Service, Privacy Policy, and Community Guidelines. High-quality, well-structured legal templates.",
file_patterns=["Policies/*.md", "*.md"],
exclude_patterns=["README.md", "CONTRIBUTING.md", "LICENSE.md", "archived/*"],
priority=1,
),
# opr.vc DSGVO Muster (CC0)
SourceConfig(
name="opr-vc",
repo_url="https://github.com/oprvc/oprvc.github.io",
web_url="https://opr.vc/",
license_type=LicenseType.CC0,
template_types=["privacy_policy", "impressum"],
languages=["de"],
jurisdiction="DE",
description="Open Privacy Resource - DSGVO-konforme Mustertexte für Datenschutzerklärungen und Impressum. Speziell für deutsche Websites.",
file_patterns=["*.md", "*.html", "_posts/*.md"],
priority=1,
),
# Open Gov Foundation (CC0)
SourceConfig(
name="opengovfoundation-site-policy",
repo_url="https://github.com/opengovfoundation/site-policy",
license_type=LicenseType.CC0,
template_types=["terms_of_service", "privacy_policy", "copyright_policy"],
languages=["en"],
jurisdiction="US",
description="OpenGov Foundation's site policies. Clean, reusable templates for open government projects.",
file_patterns=["*.md"],
priority=1,
),
# Creative Commons Legal Tools Data (CC0)
SourceConfig(
name="cc-legal-tools-data",
repo_url="https://github.com/creativecommons/cc-legal-tools-data",
license_type=LicenseType.CC0,
template_types=["license_text"],
languages=["de", "en"],
jurisdiction="INTL",
description="Creative Commons license texts in multiple languages. Useful as reference for license templates.",
file_patterns=["legalcode/**/legalcode.de.html", "legalcode/**/legalcode.en.html"],
priority=1,
),
# =============================================================================
# Phase 2: MIT-Quellen (Attribution bei Weitergabe)
# =============================================================================
# Webflorist Privacy Policy Text (MIT)
SourceConfig(
name="webflorist-privacy-policy",
repo_url="https://github.com/webflorist/privacy-policy-text",
license_type=LicenseType.MIT,
template_types=["privacy_policy"],
languages=["de", "en"],
jurisdiction="EU",
description="Modular GDPR-compliant privacy policy texts in JSON/PHP format. Highly customizable with variable sections.",
file_patterns=["src/**/*.json", "src/**/*.php", "*.md"],
priority=2,
),
# Tempest Privacy Policy Generator (MIT)
SourceConfig(
name="tempest-privacy-policy",
repo_url="https://github.com/Tempest-Solutions-Company/privacy-policy-generator",
license_type=LicenseType.MIT,
template_types=["privacy_policy"],
languages=["en"],
jurisdiction="INTL",
description="Privacy policy generator with templates for various use cases.",
file_patterns=["templates/*.md", "src/**/*.txt", "*.md"],
priority=2,
),
# Tempest Terms of Service Generator (MIT)
SourceConfig(
name="tempest-terms-of-service",
repo_url="https://github.com/Tempest-Solutions-Company/terms-of-service-generator",
license_type=LicenseType.MIT,
template_types=["terms_of_service", "dpa"],
languages=["en"],
jurisdiction="INTL",
description="Terms of Service and DPA clause generator templates.",
file_patterns=["templates/*.md", "src/**/*.txt", "*.md"],
priority=2,
),
# Tempest Cookie Banner (MIT)
SourceConfig(
name="tempest-cookie-banner",
repo_url="https://github.com/Tempest-Solutions-Company/cookie-banner-consent-solution",
license_type=LicenseType.MIT,
template_types=["cookie_banner", "cookie_policy"],
languages=["en"],
jurisdiction="EU",
description="Cookie consent banner texts and templates for GDPR/ePrivacy compliance.",
file_patterns=["templates/*.md", "src/**/*.txt", "*.md", "locales/*.json"],
priority=2,
),
# =============================================================================
# Phase 3: CC BY 4.0 (Attribution + Änderungskennzeichnung)
# =============================================================================
# Common Paper Standards (CC BY 4.0)
SourceConfig(
name="common-paper-standards",
repo_url="https://github.com/CommonPaper/SLA",
web_url="https://commonpaper.com/standards/",
license_type=LicenseType.CC_BY_4,
template_types=["sla", "cloud_service_agreement", "terms_of_service", "nda", "dpa"],
languages=["en"],
jurisdiction="US",
description="Common Paper's standardized B2B SaaS contract templates. Industry-standard agreements for cloud services.",
file_patterns=["*.md", "versions/**/*.md"],
priority=3,
),
# Datennutzungsklauseln Muster (CC BY 4.0)
SourceConfig(
name="datennutzungsklauseln-muster",
repo_url="https://gitlab.opencode.de/wernerth/datennutzungsklauseln-muster",
license_type=LicenseType.CC_BY_4,
template_types=["data_usage_clause", "dpa"],
languages=["de"],
jurisdiction="DE",
description="B2B Datennutzungsklauseln für Verträge. Speziell für deutsche Unternehmen.",
file_patterns=["*.md", "klauseln/*.md"],
priority=3,
),
# =============================================================================
# Phase 4: Amtliche Werke (§5 UrhG - urheberrechtsfrei, Referenz)
# =============================================================================
# Bundestag Gesetze (Unlicense)
SourceConfig(
name="bundestag-gesetze",
repo_url="https://github.com/bundestag/gesetze",
license_type=LicenseType.UNLICENSE,
template_types=["law_reference"],
languages=["de"],
jurisdiction="DE",
description="Deutsche Bundesgesetze im Markdown-Format. Referenz für DDG, TDDDG, EGBGB Muster.",
file_patterns=["d/ddg/*.md", "t/tdddg/*.md", "e/egbgb/*.md", "b/bgb/*.md"],
priority=4,
),
# Gesetze im Internet (Public Domain via §5 UrhG)
SourceConfig(
name="gesetze-im-internet",
web_url="https://www.gesetze-im-internet.de/",
license_type=LicenseType.PUBLIC_DOMAIN,
template_types=["law_reference", "widerruf", "impressum"],
languages=["de"],
jurisdiction="DE",
description="Amtliche Gesetzestexte. DDG §5 (Impressum), TDDDG §25, EGBGB Muster-Widerrufsformular.",
file_patterns=[], # Web scraping required
enabled=False, # Requires custom web crawler
priority=4,
),
# EUR-Lex (Public Domain + Reuse Notice)
SourceConfig(
name="eur-lex",
web_url="https://eur-lex.europa.eu/",
license_type=LicenseType.PUBLIC_DOMAIN,
template_types=["scc", "law_reference"],
languages=["de", "en"],
jurisdiction="EU",
description="EU-Recht: DSGVO Artikel, DSA, SCC (Durchführungsbeschluss 2021/914).",
file_patterns=[], # Web scraping required
enabled=False, # Requires custom web crawler
priority=4,
),
# =============================================================================
# Phase 5: Reuse-Notices (Guidance als Referenz)
# =============================================================================
# EDPB Guidelines (Reuse Notice)
SourceConfig(
name="edpb-guidelines",
web_url="https://www.edpb.europa.eu/",
license_type=LicenseType.REUSE_NOTICE,
template_types=["guidance"],
languages=["de", "en"],
jurisdiction="EU",
description="EDPB Datenschutz-Guidelines und FAQs. Als Referenz verwendbar, keine Sinnentstellung erlaubt.",
file_patterns=[], # Web scraping required
enabled=False, # Requires custom web crawler
priority=5,
),
# EDPS Resources (Reuse Notice)
SourceConfig(
name="edps-resources",
web_url="https://www.edps.europa.eu/",
license_type=LicenseType.REUSE_NOTICE,
template_types=["guidance"],
languages=["de", "en"],
jurisdiction="EU",
description="EDPS Datenschutz-Ressourcen und FAQs. Als Referenz verwendbar.",
file_patterns=[], # Web scraping required
enabled=False, # Requires custom web crawler
priority=5,
),
# EU Commission Policies (CC BY 4.0)
SourceConfig(
name="eu-commission-policies",
web_url="https://commission.europa.eu/",
license_type=LicenseType.CC_BY_4,
template_types=["guidance", "policy"],
languages=["de", "en"],
jurisdiction="EU",
description="EU-Kommission Policy-Dokumente. CC BY 4.0 lizenziert.",
file_patterns=[], # Web scraping required
enabled=False, # Requires custom web crawler
priority=5,
),
]
def get_enabled_sources() -> List[SourceConfig]:
"""Get all enabled template sources."""
return [s for s in TEMPLATE_SOURCES if s.enabled]
def get_sources_by_priority(max_priority: int = 5) -> List[SourceConfig]:
"""Get sources filtered by priority level (lower = higher priority)."""
return sorted(
[s for s in get_enabled_sources() if s.priority <= max_priority],
key=lambda s: s.priority
)
def get_sources_by_license(license_type: LicenseType) -> List[SourceConfig]:
"""Get sources filtered by license type."""
return [s for s in get_enabled_sources() if s.license_type == license_type]
def get_sources_by_template_type(template_type: str) -> List[SourceConfig]:
"""Get sources that provide a specific template type."""
return [s for s in get_enabled_sources() if template_type in s.template_types]
def get_sources_by_language(language: str) -> List[SourceConfig]:
"""Get sources that provide content in a specific language."""
return [s for s in get_enabled_sources() if language in s.languages]
def get_sources_by_jurisdiction(jurisdiction: str) -> List[SourceConfig]:
"""Get sources for a specific jurisdiction."""
return [s for s in get_enabled_sources() if s.jurisdiction == jurisdiction]
# Template type definitions for documentation
TEMPLATE_TYPES = {
"privacy_policy": "Datenschutzerklärung / Privacy Policy",
"terms_of_service": "Nutzungsbedingungen / Terms of Service",
"agb": "Allgemeine Geschäftsbedingungen",
"cookie_banner": "Cookie-Banner Text",
"cookie_policy": "Cookie-Richtlinie / Cookie Policy",
"impressum": "Impressum / Legal Notice",
"widerruf": "Widerrufsbelehrung / Cancellation Policy",
"dpa": "Auftragsverarbeitungsvertrag / Data Processing Agreement",
"sla": "Service Level Agreement",
"nda": "Geheimhaltungsvereinbarung / Non-Disclosure Agreement",
"cloud_service_agreement": "Cloud-Dienstleistungsvertrag",
"data_usage_clause": "Datennutzungsklausel",
"acceptable_use": "Acceptable Use Policy",
"community_guidelines": "Community-Richtlinien",
"copyright_policy": "Urheberrechtsrichtlinie",
"license_text": "Lizenztext",
"law_reference": "Gesetzesreferenz (nicht als Vorlage)",
"guidance": "Behördliche Guidance (nur Referenz)",
"policy": "Policy-Dokument",
}
# Jurisdiction definitions
JURISDICTIONS = {
"DE": "Deutschland",
"AT": "Österreich",
"CH": "Schweiz",
"EU": "Europäische Union",
"US": "United States",
"INTL": "International",
}