Services: Admin-Lehrer, Backend-Lehrer, Studio v2, Website, Klausur-Service, School-Service, Voice-Service, Geo-Service, BreakPilot Drive, Agent-Core Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
460 lines
17 KiB
Python
460 lines
17 KiB
Python
"""
|
|
Template Sources Configuration for Legal Templates RAG.
|
|
|
|
Defines all source repositories and their license metadata for the
|
|
bp_legal_templates collection. Sources are organized by license type
|
|
for proper attribution compliance.
|
|
|
|
License Types:
|
|
- PUBLIC_DOMAIN: German official works (§5 UrhG) - no attribution needed
|
|
- CC0: Public Domain Dedication - no attribution needed (recommended)
|
|
- UNLICENSE: Public Domain equivalent - no attribution needed
|
|
- MIT: Attribution required on redistribution
|
|
- CC_BY_4: Attribution + change notices required
|
|
- REUSE_NOTICE: May quote with source, no distortion allowed
|
|
"""
|
|
|
|
from dataclasses import dataclass, field
|
|
from enum import Enum
|
|
from typing import List, Optional
|
|
|
|
|
|
class LicenseType(Enum):
|
|
"""License types for template sources with compliance requirements."""
|
|
PUBLIC_DOMAIN = "public_domain" # §5 UrhG amtliche Werke
|
|
CC0 = "cc0" # CC0 1.0 Universal
|
|
UNLICENSE = "unlicense" # Unlicense (public domain)
|
|
MIT = "mit" # MIT License
|
|
CC_BY_4 = "cc_by_4" # CC BY 4.0 International
|
|
REUSE_NOTICE = "reuse_notice" # EU reuse notice (source required)
|
|
|
|
|
|
@dataclass
|
|
class LicenseInfo:
|
|
"""Detailed license information for compliance."""
|
|
id: LicenseType
|
|
name: str
|
|
url: str
|
|
attribution_required: bool
|
|
share_alike: bool = False
|
|
no_derivatives: bool = False
|
|
commercial_use: bool = True
|
|
training_allowed: bool = True
|
|
output_allowed: bool = True
|
|
modification_allowed: bool = True
|
|
distortion_prohibited: bool = False
|
|
attribution_template: Optional[str] = None
|
|
|
|
def get_attribution_text(self, source_name: str, source_url: str) -> str:
|
|
"""Generate attribution text for this license type."""
|
|
if not self.attribution_required:
|
|
return ""
|
|
if self.attribution_template:
|
|
return self.attribution_template.format(
|
|
source_name=source_name,
|
|
source_url=source_url,
|
|
license_name=self.name,
|
|
license_url=self.url
|
|
)
|
|
return f"Source: {source_name} ({self.name})"
|
|
|
|
|
|
# License definitions with full compliance info
|
|
LICENSES: dict[LicenseType, LicenseInfo] = {
|
|
LicenseType.PUBLIC_DOMAIN: LicenseInfo(
|
|
id=LicenseType.PUBLIC_DOMAIN,
|
|
name="Public Domain (§5 UrhG)",
|
|
url="https://www.gesetze-im-internet.de/urhg/__5.html",
|
|
attribution_required=False,
|
|
training_allowed=True,
|
|
output_allowed=True,
|
|
modification_allowed=True,
|
|
),
|
|
LicenseType.CC0: LicenseInfo(
|
|
id=LicenseType.CC0,
|
|
name="CC0 1.0 Universal",
|
|
url="https://creativecommons.org/publicdomain/zero/1.0/",
|
|
attribution_required=False, # Not required but recommended
|
|
training_allowed=True,
|
|
output_allowed=True,
|
|
modification_allowed=True,
|
|
attribution_template="[{source_name}]({source_url}) - CC0 1.0",
|
|
),
|
|
LicenseType.UNLICENSE: LicenseInfo(
|
|
id=LicenseType.UNLICENSE,
|
|
name="Unlicense",
|
|
url="https://unlicense.org/",
|
|
attribution_required=False,
|
|
training_allowed=True,
|
|
output_allowed=True,
|
|
modification_allowed=True,
|
|
),
|
|
LicenseType.MIT: LicenseInfo(
|
|
id=LicenseType.MIT,
|
|
name="MIT License",
|
|
url="https://opensource.org/licenses/MIT",
|
|
attribution_required=True,
|
|
training_allowed=True,
|
|
output_allowed=True,
|
|
modification_allowed=True,
|
|
attribution_template="Based on [{source_name}]({source_url}) - MIT License",
|
|
),
|
|
LicenseType.CC_BY_4: LicenseInfo(
|
|
id=LicenseType.CC_BY_4,
|
|
name="CC BY 4.0 International",
|
|
url="https://creativecommons.org/licenses/by/4.0/",
|
|
attribution_required=True,
|
|
training_allowed=False, # CC BY 4.0 may restrict training
|
|
output_allowed=True,
|
|
modification_allowed=True,
|
|
attribution_template=(
|
|
"Adapted from [{source_name}]({source_url}), "
|
|
"licensed under [CC BY 4.0]({license_url}). Changes were made."
|
|
),
|
|
),
|
|
LicenseType.REUSE_NOTICE: LicenseInfo(
|
|
id=LicenseType.REUSE_NOTICE,
|
|
name="EU Reuse Notice",
|
|
url="https://commission.europa.eu/legal-notice_en",
|
|
attribution_required=True,
|
|
training_allowed=False,
|
|
output_allowed=True,
|
|
modification_allowed=False,
|
|
distortion_prohibited=True,
|
|
attribution_template="Source: {source_name} ({source_url})",
|
|
),
|
|
}
|
|
|
|
|
|
@dataclass
|
|
class SourceConfig:
|
|
"""Configuration for a template source repository."""
|
|
name: str
|
|
license_type: LicenseType
|
|
template_types: List[str]
|
|
languages: List[str]
|
|
jurisdiction: str
|
|
description: str
|
|
repo_url: Optional[str] = None
|
|
web_url: Optional[str] = None
|
|
file_patterns: List[str] = field(default_factory=lambda: ["*.md", "*.txt", "*.html"])
|
|
exclude_patterns: List[str] = field(default_factory=list)
|
|
priority: int = 1 # 1 = highest priority (CC0), 5 = lowest (REUSE_NOTICE)
|
|
enabled: bool = True
|
|
|
|
@property
|
|
def license_info(self) -> LicenseInfo:
|
|
"""Get the full license information for this source."""
|
|
return LICENSES[self.license_type]
|
|
|
|
def get_source_url(self) -> str:
|
|
"""Get the primary URL for this source."""
|
|
return self.repo_url or self.web_url or ""
|
|
|
|
|
|
# =============================================================================
|
|
# Phase 1: CC0-Quellen (Höchste Priorität - keine Attribution nötig)
|
|
# =============================================================================
|
|
|
|
TEMPLATE_SOURCES: List[SourceConfig] = [
|
|
# GitHub Site Policy (CC0)
|
|
SourceConfig(
|
|
name="github-site-policy",
|
|
repo_url="https://github.com/github/site-policy",
|
|
license_type=LicenseType.CC0,
|
|
template_types=["terms_of_service", "privacy_policy", "community_guidelines", "acceptable_use"],
|
|
languages=["en"],
|
|
jurisdiction="US",
|
|
description="GitHub's site policies including Terms of Service, Privacy Policy, and Community Guidelines. High-quality, well-structured legal templates.",
|
|
file_patterns=["Policies/*.md", "*.md"],
|
|
exclude_patterns=["README.md", "CONTRIBUTING.md", "LICENSE.md", "archived/*"],
|
|
priority=1,
|
|
),
|
|
|
|
# opr.vc DSGVO Muster (CC0)
|
|
SourceConfig(
|
|
name="opr-vc",
|
|
repo_url="https://github.com/oprvc/oprvc.github.io",
|
|
web_url="https://opr.vc/",
|
|
license_type=LicenseType.CC0,
|
|
template_types=["privacy_policy", "impressum"],
|
|
languages=["de"],
|
|
jurisdiction="DE",
|
|
description="Open Privacy Resource - DSGVO-konforme Mustertexte für Datenschutzerklärungen und Impressum. Speziell für deutsche Websites.",
|
|
file_patterns=["*.md", "*.html", "_posts/*.md"],
|
|
priority=1,
|
|
),
|
|
|
|
# Open Gov Foundation (CC0)
|
|
SourceConfig(
|
|
name="opengovfoundation-site-policy",
|
|
repo_url="https://github.com/opengovfoundation/site-policy",
|
|
license_type=LicenseType.CC0,
|
|
template_types=["terms_of_service", "privacy_policy", "copyright_policy"],
|
|
languages=["en"],
|
|
jurisdiction="US",
|
|
description="OpenGov Foundation's site policies. Clean, reusable templates for open government projects.",
|
|
file_patterns=["*.md"],
|
|
priority=1,
|
|
),
|
|
|
|
# Creative Commons Legal Tools Data (CC0)
|
|
SourceConfig(
|
|
name="cc-legal-tools-data",
|
|
repo_url="https://github.com/creativecommons/cc-legal-tools-data",
|
|
license_type=LicenseType.CC0,
|
|
template_types=["license_text"],
|
|
languages=["de", "en"],
|
|
jurisdiction="INTL",
|
|
description="Creative Commons license texts in multiple languages. Useful as reference for license templates.",
|
|
file_patterns=["legalcode/**/legalcode.de.html", "legalcode/**/legalcode.en.html"],
|
|
priority=1,
|
|
),
|
|
|
|
# =============================================================================
|
|
# Phase 2: MIT-Quellen (Attribution bei Weitergabe)
|
|
# =============================================================================
|
|
|
|
# Webflorist Privacy Policy Text (MIT)
|
|
SourceConfig(
|
|
name="webflorist-privacy-policy",
|
|
repo_url="https://github.com/webflorist/privacy-policy-text",
|
|
license_type=LicenseType.MIT,
|
|
template_types=["privacy_policy"],
|
|
languages=["de", "en"],
|
|
jurisdiction="EU",
|
|
description="Modular GDPR-compliant privacy policy texts in JSON/PHP format. Highly customizable with variable sections.",
|
|
file_patterns=["src/**/*.json", "src/**/*.php", "*.md"],
|
|
priority=2,
|
|
),
|
|
|
|
# Tempest Privacy Policy Generator (MIT)
|
|
SourceConfig(
|
|
name="tempest-privacy-policy",
|
|
repo_url="https://github.com/Tempest-Solutions-Company/privacy-policy-generator",
|
|
license_type=LicenseType.MIT,
|
|
template_types=["privacy_policy"],
|
|
languages=["en"],
|
|
jurisdiction="INTL",
|
|
description="Privacy policy generator with templates for various use cases.",
|
|
file_patterns=["templates/*.md", "src/**/*.txt", "*.md"],
|
|
priority=2,
|
|
),
|
|
|
|
# Tempest Terms of Service Generator (MIT)
|
|
SourceConfig(
|
|
name="tempest-terms-of-service",
|
|
repo_url="https://github.com/Tempest-Solutions-Company/terms-of-service-generator",
|
|
license_type=LicenseType.MIT,
|
|
template_types=["terms_of_service", "dpa"],
|
|
languages=["en"],
|
|
jurisdiction="INTL",
|
|
description="Terms of Service and DPA clause generator templates.",
|
|
file_patterns=["templates/*.md", "src/**/*.txt", "*.md"],
|
|
priority=2,
|
|
),
|
|
|
|
# Tempest Cookie Banner (MIT)
|
|
SourceConfig(
|
|
name="tempest-cookie-banner",
|
|
repo_url="https://github.com/Tempest-Solutions-Company/cookie-banner-consent-solution",
|
|
license_type=LicenseType.MIT,
|
|
template_types=["cookie_banner", "cookie_policy"],
|
|
languages=["en"],
|
|
jurisdiction="EU",
|
|
description="Cookie consent banner texts and templates for GDPR/ePrivacy compliance.",
|
|
file_patterns=["templates/*.md", "src/**/*.txt", "*.md", "locales/*.json"],
|
|
priority=2,
|
|
),
|
|
|
|
# =============================================================================
|
|
# Phase 3: CC BY 4.0 (Attribution + Änderungskennzeichnung)
|
|
# =============================================================================
|
|
|
|
# Common Paper Standards (CC BY 4.0)
|
|
SourceConfig(
|
|
name="common-paper-standards",
|
|
repo_url="https://github.com/CommonPaper/SLA",
|
|
web_url="https://commonpaper.com/standards/",
|
|
license_type=LicenseType.CC_BY_4,
|
|
template_types=["sla", "cloud_service_agreement", "terms_of_service", "nda", "dpa"],
|
|
languages=["en"],
|
|
jurisdiction="US",
|
|
description="Common Paper's standardized B2B SaaS contract templates. Industry-standard agreements for cloud services.",
|
|
file_patterns=["*.md", "versions/**/*.md"],
|
|
priority=3,
|
|
),
|
|
|
|
# Datennutzungsklauseln Muster (CC BY 4.0)
|
|
SourceConfig(
|
|
name="datennutzungsklauseln-muster",
|
|
repo_url="https://gitlab.opencode.de/wernerth/datennutzungsklauseln-muster",
|
|
license_type=LicenseType.CC_BY_4,
|
|
template_types=["data_usage_clause", "dpa"],
|
|
languages=["de"],
|
|
jurisdiction="DE",
|
|
description="B2B Datennutzungsklauseln für Verträge. Speziell für deutsche Unternehmen.",
|
|
file_patterns=["*.md", "klauseln/*.md"],
|
|
priority=3,
|
|
),
|
|
|
|
# =============================================================================
|
|
# Phase 4: Amtliche Werke (§5 UrhG - urheberrechtsfrei, Referenz)
|
|
# =============================================================================
|
|
|
|
# Bundestag Gesetze (Unlicense)
|
|
SourceConfig(
|
|
name="bundestag-gesetze",
|
|
repo_url="https://github.com/bundestag/gesetze",
|
|
license_type=LicenseType.UNLICENSE,
|
|
template_types=["law_reference"],
|
|
languages=["de"],
|
|
jurisdiction="DE",
|
|
description="Deutsche Bundesgesetze im Markdown-Format. Referenz für DDG, TDDDG, EGBGB Muster.",
|
|
file_patterns=["d/ddg/*.md", "t/tdddg/*.md", "e/egbgb/*.md", "b/bgb/*.md"],
|
|
priority=4,
|
|
),
|
|
|
|
# Gesetze im Internet (Public Domain via §5 UrhG)
|
|
SourceConfig(
|
|
name="gesetze-im-internet",
|
|
web_url="https://www.gesetze-im-internet.de/",
|
|
license_type=LicenseType.PUBLIC_DOMAIN,
|
|
template_types=["law_reference", "widerruf", "impressum"],
|
|
languages=["de"],
|
|
jurisdiction="DE",
|
|
description="Amtliche Gesetzestexte. DDG §5 (Impressum), TDDDG §25, EGBGB Muster-Widerrufsformular.",
|
|
file_patterns=[], # Web scraping required
|
|
enabled=False, # Requires custom web crawler
|
|
priority=4,
|
|
),
|
|
|
|
# EUR-Lex (Public Domain + Reuse Notice)
|
|
SourceConfig(
|
|
name="eur-lex",
|
|
web_url="https://eur-lex.europa.eu/",
|
|
license_type=LicenseType.PUBLIC_DOMAIN,
|
|
template_types=["scc", "law_reference"],
|
|
languages=["de", "en"],
|
|
jurisdiction="EU",
|
|
description="EU-Recht: DSGVO Artikel, DSA, SCC (Durchführungsbeschluss 2021/914).",
|
|
file_patterns=[], # Web scraping required
|
|
enabled=False, # Requires custom web crawler
|
|
priority=4,
|
|
),
|
|
|
|
# =============================================================================
|
|
# Phase 5: Reuse-Notices (Guidance als Referenz)
|
|
# =============================================================================
|
|
|
|
# EDPB Guidelines (Reuse Notice)
|
|
SourceConfig(
|
|
name="edpb-guidelines",
|
|
web_url="https://www.edpb.europa.eu/",
|
|
license_type=LicenseType.REUSE_NOTICE,
|
|
template_types=["guidance"],
|
|
languages=["de", "en"],
|
|
jurisdiction="EU",
|
|
description="EDPB Datenschutz-Guidelines und FAQs. Als Referenz verwendbar, keine Sinnentstellung erlaubt.",
|
|
file_patterns=[], # Web scraping required
|
|
enabled=False, # Requires custom web crawler
|
|
priority=5,
|
|
),
|
|
|
|
# EDPS Resources (Reuse Notice)
|
|
SourceConfig(
|
|
name="edps-resources",
|
|
web_url="https://www.edps.europa.eu/",
|
|
license_type=LicenseType.REUSE_NOTICE,
|
|
template_types=["guidance"],
|
|
languages=["de", "en"],
|
|
jurisdiction="EU",
|
|
description="EDPS Datenschutz-Ressourcen und FAQs. Als Referenz verwendbar.",
|
|
file_patterns=[], # Web scraping required
|
|
enabled=False, # Requires custom web crawler
|
|
priority=5,
|
|
),
|
|
|
|
# EU Commission Policies (CC BY 4.0)
|
|
SourceConfig(
|
|
name="eu-commission-policies",
|
|
web_url="https://commission.europa.eu/",
|
|
license_type=LicenseType.CC_BY_4,
|
|
template_types=["guidance", "policy"],
|
|
languages=["de", "en"],
|
|
jurisdiction="EU",
|
|
description="EU-Kommission Policy-Dokumente. CC BY 4.0 lizenziert.",
|
|
file_patterns=[], # Web scraping required
|
|
enabled=False, # Requires custom web crawler
|
|
priority=5,
|
|
),
|
|
]
|
|
|
|
|
|
def get_enabled_sources() -> List[SourceConfig]:
|
|
"""Get all enabled template sources."""
|
|
return [s for s in TEMPLATE_SOURCES if s.enabled]
|
|
|
|
|
|
def get_sources_by_priority(max_priority: int = 5) -> List[SourceConfig]:
|
|
"""Get sources filtered by priority level (lower = higher priority)."""
|
|
return sorted(
|
|
[s for s in get_enabled_sources() if s.priority <= max_priority],
|
|
key=lambda s: s.priority
|
|
)
|
|
|
|
|
|
def get_sources_by_license(license_type: LicenseType) -> List[SourceConfig]:
|
|
"""Get sources filtered by license type."""
|
|
return [s for s in get_enabled_sources() if s.license_type == license_type]
|
|
|
|
|
|
def get_sources_by_template_type(template_type: str) -> List[SourceConfig]:
|
|
"""Get sources that provide a specific template type."""
|
|
return [s for s in get_enabled_sources() if template_type in s.template_types]
|
|
|
|
|
|
def get_sources_by_language(language: str) -> List[SourceConfig]:
|
|
"""Get sources that provide content in a specific language."""
|
|
return [s for s in get_enabled_sources() if language in s.languages]
|
|
|
|
|
|
def get_sources_by_jurisdiction(jurisdiction: str) -> List[SourceConfig]:
|
|
"""Get sources for a specific jurisdiction."""
|
|
return [s for s in get_enabled_sources() if s.jurisdiction == jurisdiction]
|
|
|
|
|
|
# Template type definitions for documentation
|
|
TEMPLATE_TYPES = {
|
|
"privacy_policy": "Datenschutzerklärung / Privacy Policy",
|
|
"terms_of_service": "Nutzungsbedingungen / Terms of Service",
|
|
"agb": "Allgemeine Geschäftsbedingungen",
|
|
"cookie_banner": "Cookie-Banner Text",
|
|
"cookie_policy": "Cookie-Richtlinie / Cookie Policy",
|
|
"impressum": "Impressum / Legal Notice",
|
|
"widerruf": "Widerrufsbelehrung / Cancellation Policy",
|
|
"dpa": "Auftragsverarbeitungsvertrag / Data Processing Agreement",
|
|
"sla": "Service Level Agreement",
|
|
"nda": "Geheimhaltungsvereinbarung / Non-Disclosure Agreement",
|
|
"cloud_service_agreement": "Cloud-Dienstleistungsvertrag",
|
|
"data_usage_clause": "Datennutzungsklausel",
|
|
"acceptable_use": "Acceptable Use Policy",
|
|
"community_guidelines": "Community-Richtlinien",
|
|
"copyright_policy": "Urheberrechtsrichtlinie",
|
|
"license_text": "Lizenztext",
|
|
"law_reference": "Gesetzesreferenz (nicht als Vorlage)",
|
|
"guidance": "Behördliche Guidance (nur Referenz)",
|
|
"policy": "Policy-Dokument",
|
|
}
|
|
|
|
|
|
# Jurisdiction definitions
|
|
JURISDICTIONS = {
|
|
"DE": "Deutschland",
|
|
"AT": "Österreich",
|
|
"CH": "Schweiz",
|
|
"EU": "Europäische Union",
|
|
"US": "United States",
|
|
"INTL": "International",
|
|
}
|