""" License Gate — checks whether a given source may be used for a specific purpose. Usage types: - analysis: Read + analyse internally (TDM under UrhG 44b) - store_excerpt: Store verbatim excerpt in vault - ship_embeddings: Ship embeddings in product - ship_in_product: Ship text/content in product Policy is driven by the canonical_control_sources table columns: allowed_analysis, allowed_store_excerpt, allowed_ship_embeddings, allowed_ship_in_product """ from __future__ import annotations import logging from typing import Any from sqlalchemy import text from sqlalchemy.orm import Session logger = logging.getLogger(__name__) USAGE_COLUMN_MAP = { "analysis": "allowed_analysis", "store_excerpt": "allowed_store_excerpt", "ship_embeddings": "allowed_ship_embeddings", "ship_in_product": "allowed_ship_in_product", } def check_source_allowed(db: Session, source_id: str, usage_type: str) -> bool: """Check whether *source_id* may be used for *usage_type*. Returns False if the source is unknown or the usage is not allowed. """ col = USAGE_COLUMN_MAP.get(usage_type) if col is None: logger.warning("Unknown usage_type=%s", usage_type) return False row = db.execute( text(f"SELECT {col} FROM canonical_control_sources WHERE source_id = :sid"), {"sid": source_id}, ).fetchone() if row is None: logger.warning("Source %s not found in registry", source_id) return False return bool(row[0]) def get_license_matrix(db: Session) -> list[dict[str, Any]]: """Return the full license matrix with allowed usages per license.""" rows = db.execute( text(""" SELECT license_id, name, terms_url, commercial_use, ai_training_restriction, tdm_allowed_under_44b, deletion_required, notes FROM canonical_control_licenses ORDER BY license_id """) ).fetchall() return [ { "license_id": r.license_id, "name": r.name, "terms_url": r.terms_url, "commercial_use": r.commercial_use, "ai_training_restriction": r.ai_training_restriction, "tdm_allowed_under_44b": r.tdm_allowed_under_44b, "deletion_required": r.deletion_required, "notes": r.notes, } for r in rows ] def get_source_permissions(db: Session) -> list[dict[str, Any]]: """Return all sources with their permission flags.""" rows = db.execute( text(""" SELECT s.source_id, s.title, s.publisher, s.url, s.version_label, s.language, s.license_id, s.allowed_analysis, s.allowed_store_excerpt, s.allowed_ship_embeddings, s.allowed_ship_in_product, s.vault_retention_days, s.vault_access_tier, l.name AS license_name, l.commercial_use FROM canonical_control_sources s JOIN canonical_control_licenses l ON l.license_id = s.license_id ORDER BY s.source_id """) ).fetchall() return [ { "source_id": r.source_id, "title": r.title, "publisher": r.publisher, "url": r.url, "version_label": r.version_label, "language": r.language, "license_id": r.license_id, "license_name": r.license_name, "commercial_use": r.commercial_use, "allowed_analysis": r.allowed_analysis, "allowed_store_excerpt": r.allowed_store_excerpt, "allowed_ship_embeddings": r.allowed_ship_embeddings, "allowed_ship_in_product": r.allowed_ship_in_product, "vault_retention_days": r.vault_retention_days, "vault_access_tier": r.vault_access_tier, } for r in rows ]