All checks were successful
CI/CD / go-lint (push) Has been skipped
CI/CD / python-lint (push) Has been skipped
CI/CD / nodejs-lint (push) Has been skipped
CI/CD / test-go-ai-compliance (push) Successful in 40s
CI/CD / test-python-backend-compliance (push) Successful in 41s
CI/CD / test-python-document-crawler (push) Successful in 26s
CI/CD / test-python-dsms-gateway (push) Successful in 23s
CI/CD / validate-canonical-controls (push) Successful in 18s
CI/CD / deploy-hetzner (push) Successful in 2m26s
Eigenstaendig formulierte Security Controls mit unabhaengiger Taxonomie und Open-Source-Verankerung (OWASP, NIST, ENISA). Keine BSI-Nomenklatur. - Migration 044: 5 DB-Tabellen (frameworks, controls, sources, licenses, mappings) - 10 Seed Controls mit 39 Open-Source-Referenzen - License Gate: Quellen-Berechtigungspruefung (analysis/excerpt/embeddings/product) - Too-Close-Detektor: 5 Metriken (exact-phrase, token-overlap, ngram, embedding, LCS) - REST API: 8 Endpoints unter /v1/canonical/ - Go Loader mit Multi-Index (ID, domain, severity, framework) - Frontend: Control Library Browser + Provenance Wiki - CI/CD: validate-controls.py Job (schema, no-leak, open-anchors) - 67 Tests (8 Go + 59 Python), alle PASS - MkDocs Dokumentation Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
117 lines
3.8 KiB
Python
117 lines
3.8 KiB
Python
"""
|
|
License Gate — checks whether a given source may be used for a specific purpose.
|
|
|
|
Usage types:
|
|
- analysis: Read + analyse internally (TDM under UrhG 44b)
|
|
- store_excerpt: Store verbatim excerpt in vault
|
|
- ship_embeddings: Ship embeddings in product
|
|
- ship_in_product: Ship text/content in product
|
|
|
|
Policy is driven by the canonical_control_sources table columns:
|
|
allowed_analysis, allowed_store_excerpt, allowed_ship_embeddings, allowed_ship_in_product
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import logging
|
|
from typing import Any
|
|
|
|
from sqlalchemy import text
|
|
from sqlalchemy.orm import Session
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
USAGE_COLUMN_MAP = {
|
|
"analysis": "allowed_analysis",
|
|
"store_excerpt": "allowed_store_excerpt",
|
|
"ship_embeddings": "allowed_ship_embeddings",
|
|
"ship_in_product": "allowed_ship_in_product",
|
|
}
|
|
|
|
|
|
def check_source_allowed(db: Session, source_id: str, usage_type: str) -> bool:
|
|
"""Check whether *source_id* may be used for *usage_type*.
|
|
|
|
Returns False if the source is unknown or the usage is not allowed.
|
|
"""
|
|
col = USAGE_COLUMN_MAP.get(usage_type)
|
|
if col is None:
|
|
logger.warning("Unknown usage_type=%s", usage_type)
|
|
return False
|
|
|
|
row = db.execute(
|
|
text(f"SELECT {col} FROM canonical_control_sources WHERE source_id = :sid"),
|
|
{"sid": source_id},
|
|
).fetchone()
|
|
|
|
if row is None:
|
|
logger.warning("Source %s not found in registry", source_id)
|
|
return False
|
|
|
|
return bool(row[0])
|
|
|
|
|
|
def get_license_matrix(db: Session) -> list[dict[str, Any]]:
|
|
"""Return the full license matrix with allowed usages per license."""
|
|
rows = db.execute(
|
|
text("""
|
|
SELECT license_id, name, terms_url, commercial_use,
|
|
ai_training_restriction, tdm_allowed_under_44b,
|
|
deletion_required, notes
|
|
FROM canonical_control_licenses
|
|
ORDER BY license_id
|
|
""")
|
|
).fetchall()
|
|
|
|
return [
|
|
{
|
|
"license_id": r.license_id,
|
|
"name": r.name,
|
|
"terms_url": r.terms_url,
|
|
"commercial_use": r.commercial_use,
|
|
"ai_training_restriction": r.ai_training_restriction,
|
|
"tdm_allowed_under_44b": r.tdm_allowed_under_44b,
|
|
"deletion_required": r.deletion_required,
|
|
"notes": r.notes,
|
|
}
|
|
for r in rows
|
|
]
|
|
|
|
|
|
def get_source_permissions(db: Session) -> list[dict[str, Any]]:
|
|
"""Return all sources with their permission flags."""
|
|
rows = db.execute(
|
|
text("""
|
|
SELECT s.source_id, s.title, s.publisher, s.url, s.version_label,
|
|
s.language, s.license_id,
|
|
s.allowed_analysis, s.allowed_store_excerpt,
|
|
s.allowed_ship_embeddings, s.allowed_ship_in_product,
|
|
s.vault_retention_days, s.vault_access_tier,
|
|
l.name AS license_name, l.commercial_use
|
|
FROM canonical_control_sources s
|
|
JOIN canonical_control_licenses l ON l.license_id = s.license_id
|
|
ORDER BY s.source_id
|
|
""")
|
|
).fetchall()
|
|
|
|
return [
|
|
{
|
|
"source_id": r.source_id,
|
|
"title": r.title,
|
|
"publisher": r.publisher,
|
|
"url": r.url,
|
|
"version_label": r.version_label,
|
|
"language": r.language,
|
|
"license_id": r.license_id,
|
|
"license_name": r.license_name,
|
|
"commercial_use": r.commercial_use,
|
|
"allowed_analysis": r.allowed_analysis,
|
|
"allowed_store_excerpt": r.allowed_store_excerpt,
|
|
"allowed_ship_embeddings": r.allowed_ship_embeddings,
|
|
"allowed_ship_in_product": r.allowed_ship_in_product,
|
|
"vault_retention_days": r.vault_retention_days,
|
|
"vault_access_tier": r.vault_access_tier,
|
|
}
|
|
for r in rows
|
|
]
|