Files
breakpilot-lehrer/geo-service/utils/license_checker.py
Benjamin Boenisch 5a31f52310 Initial commit: breakpilot-lehrer - Lehrer KI Platform
Services: Admin-Lehrer, Backend-Lehrer, Studio v2, Website,
Klausur-Service, School-Service, Voice-Service, Geo-Service,
BreakPilot Drive, Agent-Core

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-11 23:47:26 +01:00

224 lines
6.8 KiB
Python

"""
License Checker Utility
Validates data source licenses and generates attribution
"""
from typing import Optional
from enum import Enum
import structlog
logger = structlog.get_logger(__name__)
class LicenseType(Enum):
"""Supported data source license types."""
ODBL = "odbl" # OpenStreetMap
COPERNICUS = "copernicus" # Copernicus DEM
CC_BY = "cc-by" # Creative Commons Attribution
CC_BY_SA = "cc-by-sa" # Creative Commons Attribution-ShareAlike
CC0 = "cc0" # Public Domain
PROPRIETARY = "proprietary" # Not allowed
class DataSource(Enum):
"""Known data sources."""
OPENSTREETMAP = "openstreetmap"
COPERNICUS_DEM = "copernicus_dem"
OPENAERIAL = "openaerial"
WIKIMEDIA = "wikimedia"
GOOGLE = "google" # FORBIDDEN
BING = "bing" # FORBIDDEN
APPLE = "apple" # FORBIDDEN
HERE = "here" # FORBIDDEN
# License information for allowed sources
ALLOWED_SOURCES = {
DataSource.OPENSTREETMAP: {
"license": LicenseType.ODBL,
"attribution": "© OpenStreetMap contributors",
"url": "https://www.openstreetmap.org/copyright",
"commercial": True,
"derivative_allowed": True,
},
DataSource.COPERNICUS_DEM: {
"license": LicenseType.COPERNICUS,
"attribution": "© Copernicus Service Information",
"url": "https://spacedata.copernicus.eu/",
"commercial": True,
"derivative_allowed": True,
},
DataSource.OPENAERIAL: {
"license": LicenseType.CC_BY,
"attribution": "© OpenAerialMap contributors",
"url": "https://openaerialmap.org/",
"commercial": True,
"derivative_allowed": True,
},
DataSource.WIKIMEDIA: {
"license": LicenseType.CC_BY_SA,
"attribution": "Wikimedia Commons",
"url": "https://commons.wikimedia.org/",
"commercial": True,
"derivative_allowed": True,
},
}
# Forbidden sources
FORBIDDEN_SOURCES = {
DataSource.GOOGLE: "Google Maps ToS prohibit derivatives and offline use",
DataSource.BING: "Bing Maps has restrictive licensing",
DataSource.APPLE: "Apple Maps prohibits commercial use",
DataSource.HERE: "HERE requires paid licensing",
}
class LicenseChecker:
"""
Utility for validating data source licenses and generating attributions.
Ensures DSGVO/GDPR compliance and proper licensing for educational use.
"""
@staticmethod
def is_source_allowed(source: DataSource) -> bool:
"""Check if a data source is allowed for use."""
return source in ALLOWED_SOURCES
@staticmethod
def get_forbidden_reason(source: DataSource) -> Optional[str]:
"""Get the reason why a source is forbidden."""
return FORBIDDEN_SOURCES.get(source)
@staticmethod
def validate_url(url: str) -> tuple[bool, str]:
"""
Validate if a URL is from an allowed source.
Returns:
Tuple of (is_allowed, message)
"""
url_lower = url.lower()
# Check for forbidden sources
forbidden_patterns = {
"google": DataSource.GOOGLE,
"googleapis": DataSource.GOOGLE,
"gstatic": DataSource.GOOGLE,
"bing.com": DataSource.BING,
"virtualearth": DataSource.BING,
"apple.com/maps": DataSource.APPLE,
"here.com": DataSource.HERE,
}
for pattern, source in forbidden_patterns.items():
if pattern in url_lower:
reason = FORBIDDEN_SOURCES.get(source, "Not allowed")
return False, f"FORBIDDEN: {source.value} - {reason}"
# Check for allowed sources
allowed_patterns = {
"openstreetmap": DataSource.OPENSTREETMAP,
"tile.osm": DataSource.OPENSTREETMAP,
"copernicus": DataSource.COPERNICUS_DEM,
"openaerialmap": DataSource.OPENAERIAL,
"wikimedia": DataSource.WIKIMEDIA,
}
for pattern, source in allowed_patterns.items():
if pattern in url_lower:
info = ALLOWED_SOURCES[source]
return True, f"ALLOWED: {source.value} ({info['license'].value})"
# Unknown source - warn but allow with custom attribution
return True, "UNKNOWN: Verify license manually"
@staticmethod
def get_attribution_for_sources(
sources: list[DataSource],
) -> list[dict]:
"""
Get attribution information for a list of data sources.
Args:
sources: List of data sources used
Returns:
List of attribution dictionaries
"""
attributions = []
for source in sources:
if source in ALLOWED_SOURCES:
info = ALLOWED_SOURCES[source]
attributions.append({
"name": source.value.replace("_", " ").title(),
"license": info["license"].value.upper(),
"attribution": info["attribution"],
"url": info["url"],
"required": True,
})
return attributions
@staticmethod
def generate_attribution_html(sources: list[DataSource]) -> str:
"""
Generate HTML attribution footer for web display.
Args:
sources: List of data sources used
Returns:
HTML string with attribution
"""
attributions = LicenseChecker.get_attribution_for_sources(sources)
if not attributions:
return ""
parts = []
for attr in attributions:
parts.append(
f'<a href="{attr["url"]}" target="_blank" rel="noopener">'
f'{attr["attribution"]}</a>'
)
return " | ".join(parts)
@staticmethod
def generate_attribution_text(sources: list[DataSource]) -> str:
"""
Generate plain text attribution.
Args:
sources: List of data sources used
Returns:
Plain text attribution string
"""
attributions = LicenseChecker.get_attribution_for_sources(sources)
if not attributions:
return ""
return " | ".join(attr["attribution"] for attr in attributions)
@staticmethod
def check_commercial_use(sources: list[DataSource]) -> tuple[bool, list[str]]:
"""
Check if all sources allow commercial use.
Returns:
Tuple of (all_allowed, list_of_issues)
"""
issues = []
for source in sources:
if source in FORBIDDEN_SOURCES:
issues.append(f"{source.value}: {FORBIDDEN_SOURCES[source]}")
elif source in ALLOWED_SOURCES:
if not ALLOWED_SOURCES[source]["commercial"]:
issues.append(f"{source.value}: Commercial use not allowed")
return len(issues) == 0, issues