""" Template Sources Configuration for Legal Templates RAG. Defines all source repositories and their license metadata for the bp_legal_templates collection. Sources are organized by license type for proper attribution compliance. License Types: - PUBLIC_DOMAIN: German official works (§5 UrhG) - no attribution needed - CC0: Public Domain Dedication - no attribution needed (recommended) - UNLICENSE: Public Domain equivalent - no attribution needed - MIT: Attribution required on redistribution - CC_BY_4: Attribution + change notices required - REUSE_NOTICE: May quote with source, no distortion allowed """ from dataclasses import dataclass, field from enum import Enum from typing import List, Optional class LicenseType(Enum): """License types for template sources with compliance requirements.""" PUBLIC_DOMAIN = "public_domain" # §5 UrhG amtliche Werke CC0 = "cc0" # CC0 1.0 Universal UNLICENSE = "unlicense" # Unlicense (public domain) MIT = "mit" # MIT License CC_BY_4 = "cc_by_4" # CC BY 4.0 International REUSE_NOTICE = "reuse_notice" # EU reuse notice (source required) @dataclass class LicenseInfo: """Detailed license information for compliance.""" id: LicenseType name: str url: str attribution_required: bool share_alike: bool = False no_derivatives: bool = False commercial_use: bool = True training_allowed: bool = True output_allowed: bool = True modification_allowed: bool = True distortion_prohibited: bool = False attribution_template: Optional[str] = None def get_attribution_text(self, source_name: str, source_url: str) -> str: """Generate attribution text for this license type.""" if not self.attribution_required: return "" if self.attribution_template: return self.attribution_template.format( source_name=source_name, source_url=source_url, license_name=self.name, license_url=self.url ) return f"Source: {source_name} ({self.name})" # License definitions with full compliance info LICENSES: dict[LicenseType, LicenseInfo] = { LicenseType.PUBLIC_DOMAIN: LicenseInfo( id=LicenseType.PUBLIC_DOMAIN, name="Public Domain (§5 UrhG)", url="https://www.gesetze-im-internet.de/urhg/__5.html", attribution_required=False, training_allowed=True, output_allowed=True, modification_allowed=True, ), LicenseType.CC0: LicenseInfo( id=LicenseType.CC0, name="CC0 1.0 Universal", url="https://creativecommons.org/publicdomain/zero/1.0/", attribution_required=False, # Not required but recommended training_allowed=True, output_allowed=True, modification_allowed=True, attribution_template="[{source_name}]({source_url}) - CC0 1.0", ), LicenseType.UNLICENSE: LicenseInfo( id=LicenseType.UNLICENSE, name="Unlicense", url="https://unlicense.org/", attribution_required=False, training_allowed=True, output_allowed=True, modification_allowed=True, ), LicenseType.MIT: LicenseInfo( id=LicenseType.MIT, name="MIT License", url="https://opensource.org/licenses/MIT", attribution_required=True, training_allowed=True, output_allowed=True, modification_allowed=True, attribution_template="Based on [{source_name}]({source_url}) - MIT License", ), LicenseType.CC_BY_4: LicenseInfo( id=LicenseType.CC_BY_4, name="CC BY 4.0 International", url="https://creativecommons.org/licenses/by/4.0/", attribution_required=True, training_allowed=False, # CC BY 4.0 may restrict training output_allowed=True, modification_allowed=True, attribution_template=( "Adapted from [{source_name}]({source_url}), " "licensed under [CC BY 4.0]({license_url}). Changes were made." ), ), LicenseType.REUSE_NOTICE: LicenseInfo( id=LicenseType.REUSE_NOTICE, name="EU Reuse Notice", url="https://commission.europa.eu/legal-notice_en", attribution_required=True, training_allowed=False, output_allowed=True, modification_allowed=False, distortion_prohibited=True, attribution_template="Source: {source_name} ({source_url})", ), } @dataclass class SourceConfig: """Configuration for a template source repository.""" name: str license_type: LicenseType template_types: List[str] languages: List[str] jurisdiction: str description: str repo_url: Optional[str] = None web_url: Optional[str] = None file_patterns: List[str] = field(default_factory=lambda: ["*.md", "*.txt", "*.html"]) exclude_patterns: List[str] = field(default_factory=list) priority: int = 1 # 1 = highest priority (CC0), 5 = lowest (REUSE_NOTICE) enabled: bool = True @property def license_info(self) -> LicenseInfo: """Get the full license information for this source.""" return LICENSES[self.license_type] def get_source_url(self) -> str: """Get the primary URL for this source.""" return self.repo_url or self.web_url or "" # ============================================================================= # Phase 1: CC0-Quellen (Höchste Priorität - keine Attribution nötig) # ============================================================================= TEMPLATE_SOURCES: List[SourceConfig] = [ # GitHub Site Policy (CC0) SourceConfig( name="github-site-policy", repo_url="https://github.com/github/site-policy", license_type=LicenseType.CC0, template_types=["terms_of_service", "privacy_policy", "community_guidelines", "acceptable_use"], languages=["en"], jurisdiction="US", description="GitHub's site policies including Terms of Service, Privacy Policy, and Community Guidelines. High-quality, well-structured legal templates.", file_patterns=["Policies/*.md", "*.md"], exclude_patterns=["README.md", "CONTRIBUTING.md", "LICENSE.md", "archived/*"], priority=1, ), # opr.vc DSGVO Muster (CC0) SourceConfig( name="opr-vc", repo_url="https://github.com/oprvc/oprvc.github.io", web_url="https://opr.vc/", license_type=LicenseType.CC0, template_types=["privacy_policy", "impressum"], languages=["de"], jurisdiction="DE", description="Open Privacy Resource - DSGVO-konforme Mustertexte für Datenschutzerklärungen und Impressum. Speziell für deutsche Websites.", file_patterns=["*.md", "*.html", "_posts/*.md"], priority=1, ), # Open Gov Foundation (CC0) SourceConfig( name="opengovfoundation-site-policy", repo_url="https://github.com/opengovfoundation/site-policy", license_type=LicenseType.CC0, template_types=["terms_of_service", "privacy_policy", "copyright_policy"], languages=["en"], jurisdiction="US", description="OpenGov Foundation's site policies. Clean, reusable templates for open government projects.", file_patterns=["*.md"], priority=1, ), # Creative Commons Legal Tools Data (CC0) SourceConfig( name="cc-legal-tools-data", repo_url="https://github.com/creativecommons/cc-legal-tools-data", license_type=LicenseType.CC0, template_types=["license_text"], languages=["de", "en"], jurisdiction="INTL", description="Creative Commons license texts in multiple languages. Useful as reference for license templates.", file_patterns=["legalcode/**/legalcode.de.html", "legalcode/**/legalcode.en.html"], priority=1, ), # ============================================================================= # Phase 2: MIT-Quellen (Attribution bei Weitergabe) # ============================================================================= # Webflorist Privacy Policy Text (MIT) SourceConfig( name="webflorist-privacy-policy", repo_url="https://github.com/webflorist/privacy-policy-text", license_type=LicenseType.MIT, template_types=["privacy_policy"], languages=["de", "en"], jurisdiction="EU", description="Modular GDPR-compliant privacy policy texts in JSON/PHP format. Highly customizable with variable sections.", file_patterns=["src/**/*.json", "src/**/*.php", "*.md"], priority=2, ), # Tempest Privacy Policy Generator (MIT) SourceConfig( name="tempest-privacy-policy", repo_url="https://github.com/Tempest-Solutions-Company/privacy-policy-generator", license_type=LicenseType.MIT, template_types=["privacy_policy"], languages=["en"], jurisdiction="INTL", description="Privacy policy generator with templates for various use cases.", file_patterns=["templates/*.md", "src/**/*.txt", "*.md"], priority=2, ), # Tempest Terms of Service Generator (MIT) SourceConfig( name="tempest-terms-of-service", repo_url="https://github.com/Tempest-Solutions-Company/terms-of-service-generator", license_type=LicenseType.MIT, template_types=["terms_of_service", "dpa"], languages=["en"], jurisdiction="INTL", description="Terms of Service and DPA clause generator templates.", file_patterns=["templates/*.md", "src/**/*.txt", "*.md"], priority=2, ), # Tempest Cookie Banner (MIT) SourceConfig( name="tempest-cookie-banner", repo_url="https://github.com/Tempest-Solutions-Company/cookie-banner-consent-solution", license_type=LicenseType.MIT, template_types=["cookie_banner", "cookie_policy"], languages=["en"], jurisdiction="EU", description="Cookie consent banner texts and templates for GDPR/ePrivacy compliance.", file_patterns=["templates/*.md", "src/**/*.txt", "*.md", "locales/*.json"], priority=2, ), # ============================================================================= # Phase 3: CC BY 4.0 (Attribution + Änderungskennzeichnung) # ============================================================================= # Common Paper Standards (CC BY 4.0) SourceConfig( name="common-paper-standards", repo_url="https://github.com/CommonPaper/SLA", web_url="https://commonpaper.com/standards/", license_type=LicenseType.CC_BY_4, template_types=["sla", "cloud_service_agreement", "terms_of_service", "nda", "dpa"], languages=["en"], jurisdiction="US", description="Common Paper's standardized B2B SaaS contract templates. Industry-standard agreements for cloud services.", file_patterns=["*.md", "versions/**/*.md"], priority=3, ), # Datennutzungsklauseln Muster (CC BY 4.0) SourceConfig( name="datennutzungsklauseln-muster", repo_url="https://gitlab.opencode.de/wernerth/datennutzungsklauseln-muster", license_type=LicenseType.CC_BY_4, template_types=["data_usage_clause", "dpa"], languages=["de"], jurisdiction="DE", description="B2B Datennutzungsklauseln für Verträge. Speziell für deutsche Unternehmen.", file_patterns=["*.md", "klauseln/*.md"], priority=3, ), # ============================================================================= # Phase 4: Amtliche Werke (§5 UrhG - urheberrechtsfrei, Referenz) # ============================================================================= # Bundestag Gesetze (Unlicense) SourceConfig( name="bundestag-gesetze", repo_url="https://github.com/bundestag/gesetze", license_type=LicenseType.UNLICENSE, template_types=["law_reference"], languages=["de"], jurisdiction="DE", description="Deutsche Bundesgesetze im Markdown-Format. Referenz für DDG, TDDDG, EGBGB Muster.", file_patterns=["d/ddg/*.md", "t/tdddg/*.md", "e/egbgb/*.md", "b/bgb/*.md"], priority=4, ), # Gesetze im Internet (Public Domain via §5 UrhG) SourceConfig( name="gesetze-im-internet", web_url="https://www.gesetze-im-internet.de/", license_type=LicenseType.PUBLIC_DOMAIN, template_types=["law_reference", "widerruf", "impressum"], languages=["de"], jurisdiction="DE", description="Amtliche Gesetzestexte. DDG §5 (Impressum), TDDDG §25, EGBGB Muster-Widerrufsformular.", file_patterns=[], # Web scraping required enabled=False, # Requires custom web crawler priority=4, ), # EUR-Lex (Public Domain + Reuse Notice) SourceConfig( name="eur-lex", web_url="https://eur-lex.europa.eu/", license_type=LicenseType.PUBLIC_DOMAIN, template_types=["scc", "law_reference"], languages=["de", "en"], jurisdiction="EU", description="EU-Recht: DSGVO Artikel, DSA, SCC (Durchführungsbeschluss 2021/914).", file_patterns=[], # Web scraping required enabled=False, # Requires custom web crawler priority=4, ), # ============================================================================= # Phase 5: Reuse-Notices (Guidance als Referenz) # ============================================================================= # EDPB Guidelines (Reuse Notice) SourceConfig( name="edpb-guidelines", web_url="https://www.edpb.europa.eu/", license_type=LicenseType.REUSE_NOTICE, template_types=["guidance"], languages=["de", "en"], jurisdiction="EU", description="EDPB Datenschutz-Guidelines und FAQs. Als Referenz verwendbar, keine Sinnentstellung erlaubt.", file_patterns=[], # Web scraping required enabled=False, # Requires custom web crawler priority=5, ), # EDPS Resources (Reuse Notice) SourceConfig( name="edps-resources", web_url="https://www.edps.europa.eu/", license_type=LicenseType.REUSE_NOTICE, template_types=["guidance"], languages=["de", "en"], jurisdiction="EU", description="EDPS Datenschutz-Ressourcen und FAQs. Als Referenz verwendbar.", file_patterns=[], # Web scraping required enabled=False, # Requires custom web crawler priority=5, ), # EU Commission Policies (CC BY 4.0) SourceConfig( name="eu-commission-policies", web_url="https://commission.europa.eu/", license_type=LicenseType.CC_BY_4, template_types=["guidance", "policy"], languages=["de", "en"], jurisdiction="EU", description="EU-Kommission Policy-Dokumente. CC BY 4.0 lizenziert.", file_patterns=[], # Web scraping required enabled=False, # Requires custom web crawler priority=5, ), ] def get_enabled_sources() -> List[SourceConfig]: """Get all enabled template sources.""" return [s for s in TEMPLATE_SOURCES if s.enabled] def get_sources_by_priority(max_priority: int = 5) -> List[SourceConfig]: """Get sources filtered by priority level (lower = higher priority).""" return sorted( [s for s in get_enabled_sources() if s.priority <= max_priority], key=lambda s: s.priority ) def get_sources_by_license(license_type: LicenseType) -> List[SourceConfig]: """Get sources filtered by license type.""" return [s for s in get_enabled_sources() if s.license_type == license_type] def get_sources_by_template_type(template_type: str) -> List[SourceConfig]: """Get sources that provide a specific template type.""" return [s for s in get_enabled_sources() if template_type in s.template_types] def get_sources_by_language(language: str) -> List[SourceConfig]: """Get sources that provide content in a specific language.""" return [s for s in get_enabled_sources() if language in s.languages] def get_sources_by_jurisdiction(jurisdiction: str) -> List[SourceConfig]: """Get sources for a specific jurisdiction.""" return [s for s in get_enabled_sources() if s.jurisdiction == jurisdiction] # Template type definitions for documentation TEMPLATE_TYPES = { "privacy_policy": "Datenschutzerklärung / Privacy Policy", "terms_of_service": "Nutzungsbedingungen / Terms of Service", "agb": "Allgemeine Geschäftsbedingungen", "cookie_banner": "Cookie-Banner Text", "cookie_policy": "Cookie-Richtlinie / Cookie Policy", "impressum": "Impressum / Legal Notice", "widerruf": "Widerrufsbelehrung / Cancellation Policy", "dpa": "Auftragsverarbeitungsvertrag / Data Processing Agreement", "sla": "Service Level Agreement", "nda": "Geheimhaltungsvereinbarung / Non-Disclosure Agreement", "cloud_service_agreement": "Cloud-Dienstleistungsvertrag", "data_usage_clause": "Datennutzungsklausel", "acceptable_use": "Acceptable Use Policy", "community_guidelines": "Community-Richtlinien", "copyright_policy": "Urheberrechtsrichtlinie", "license_text": "Lizenztext", "law_reference": "Gesetzesreferenz (nicht als Vorlage)", "guidance": "Behördliche Guidance (nur Referenz)", "policy": "Policy-Dokument", } # Jurisdiction definitions JURISDICTIONS = { "DE": "Deutschland", "AT": "Österreich", "CH": "Schweiz", "EU": "Europäische Union", "US": "United States", "INTL": "International", }