""" License Checker Utility Validates data source licenses and generates attribution """ from typing import Optional from enum import Enum import structlog logger = structlog.get_logger(__name__) class LicenseType(Enum): """Supported data source license types.""" ODBL = "odbl" # OpenStreetMap COPERNICUS = "copernicus" # Copernicus DEM CC_BY = "cc-by" # Creative Commons Attribution CC_BY_SA = "cc-by-sa" # Creative Commons Attribution-ShareAlike CC0 = "cc0" # Public Domain PROPRIETARY = "proprietary" # Not allowed class DataSource(Enum): """Known data sources.""" OPENSTREETMAP = "openstreetmap" COPERNICUS_DEM = "copernicus_dem" OPENAERIAL = "openaerial" WIKIMEDIA = "wikimedia" GOOGLE = "google" # FORBIDDEN BING = "bing" # FORBIDDEN APPLE = "apple" # FORBIDDEN HERE = "here" # FORBIDDEN # License information for allowed sources ALLOWED_SOURCES = { DataSource.OPENSTREETMAP: { "license": LicenseType.ODBL, "attribution": "© OpenStreetMap contributors", "url": "https://www.openstreetmap.org/copyright", "commercial": True, "derivative_allowed": True, }, DataSource.COPERNICUS_DEM: { "license": LicenseType.COPERNICUS, "attribution": "© Copernicus Service Information", "url": "https://spacedata.copernicus.eu/", "commercial": True, "derivative_allowed": True, }, DataSource.OPENAERIAL: { "license": LicenseType.CC_BY, "attribution": "© OpenAerialMap contributors", "url": "https://openaerialmap.org/", "commercial": True, "derivative_allowed": True, }, DataSource.WIKIMEDIA: { "license": LicenseType.CC_BY_SA, "attribution": "Wikimedia Commons", "url": "https://commons.wikimedia.org/", "commercial": True, "derivative_allowed": True, }, } # Forbidden sources FORBIDDEN_SOURCES = { DataSource.GOOGLE: "Google Maps ToS prohibit derivatives and offline use", DataSource.BING: "Bing Maps has restrictive licensing", DataSource.APPLE: "Apple Maps prohibits commercial use", DataSource.HERE: "HERE requires paid licensing", } class LicenseChecker: """ Utility for validating data source licenses and generating attributions. Ensures DSGVO/GDPR compliance and proper licensing for educational use. """ @staticmethod def is_source_allowed(source: DataSource) -> bool: """Check if a data source is allowed for use.""" return source in ALLOWED_SOURCES @staticmethod def get_forbidden_reason(source: DataSource) -> Optional[str]: """Get the reason why a source is forbidden.""" return FORBIDDEN_SOURCES.get(source) @staticmethod def validate_url(url: str) -> tuple[bool, str]: """ Validate if a URL is from an allowed source. Returns: Tuple of (is_allowed, message) """ url_lower = url.lower() # Check for forbidden sources forbidden_patterns = { "google": DataSource.GOOGLE, "googleapis": DataSource.GOOGLE, "gstatic": DataSource.GOOGLE, "bing.com": DataSource.BING, "virtualearth": DataSource.BING, "apple.com/maps": DataSource.APPLE, "here.com": DataSource.HERE, } for pattern, source in forbidden_patterns.items(): if pattern in url_lower: reason = FORBIDDEN_SOURCES.get(source, "Not allowed") return False, f"FORBIDDEN: {source.value} - {reason}" # Check for allowed sources allowed_patterns = { "openstreetmap": DataSource.OPENSTREETMAP, "tile.osm": DataSource.OPENSTREETMAP, "copernicus": DataSource.COPERNICUS_DEM, "openaerialmap": DataSource.OPENAERIAL, "wikimedia": DataSource.WIKIMEDIA, } for pattern, source in allowed_patterns.items(): if pattern in url_lower: info = ALLOWED_SOURCES[source] return True, f"ALLOWED: {source.value} ({info['license'].value})" # Unknown source - warn but allow with custom attribution return True, "UNKNOWN: Verify license manually" @staticmethod def get_attribution_for_sources( sources: list[DataSource], ) -> list[dict]: """ Get attribution information for a list of data sources. Args: sources: List of data sources used Returns: List of attribution dictionaries """ attributions = [] for source in sources: if source in ALLOWED_SOURCES: info = ALLOWED_SOURCES[source] attributions.append({ "name": source.value.replace("_", " ").title(), "license": info["license"].value.upper(), "attribution": info["attribution"], "url": info["url"], "required": True, }) return attributions @staticmethod def generate_attribution_html(sources: list[DataSource]) -> str: """ Generate HTML attribution footer for web display. Args: sources: List of data sources used Returns: HTML string with attribution """ attributions = LicenseChecker.get_attribution_for_sources(sources) if not attributions: return "" parts = [] for attr in attributions: parts.append( f'' f'{attr["attribution"]}' ) return " | ".join(parts) @staticmethod def generate_attribution_text(sources: list[DataSource]) -> str: """ Generate plain text attribution. Args: sources: List of data sources used Returns: Plain text attribution string """ attributions = LicenseChecker.get_attribution_for_sources(sources) if not attributions: return "" return " | ".join(attr["attribution"] for attr in attributions) @staticmethod def check_commercial_use(sources: list[DataSource]) -> tuple[bool, list[str]]: """ Check if all sources allow commercial use. Returns: Tuple of (all_allowed, list_of_issues) """ issues = [] for source in sources: if source in FORBIDDEN_SOURCES: issues.append(f"{source.value}: {FORBIDDEN_SOURCES[source]}") elif source in ALLOWED_SOURCES: if not ALLOWED_SOURCES[source]["commercial"]: issues.append(f"{source.value}: Commercial use not allowed") return len(issues) == 0, issues