feat(pipeline): F1 regulation registry — DB-backed license/source-type lookup
Migrates REGULATION_LICENSE_MAP (135 entries) and SOURCE_REGULATION_CLASSIFICATION (58 entries) from hardcoded Python dicts to compliance.regulation_registry table. - SQL migration: 002_regulation_registry.sql (table + indexes + trigger) - Migration script: f1_migrate_regulation_registry.py (162 rows, --dry-run) - RegulationRegistry cache: 5min TTL, prefix fallback, graceful degradation - control_generator._classify_regulation() delegates to DB with dict fallback - source_type_classification.classify_source_regulation() delegates to DB - 34 new tests (lookup, cache, degradation, migration data consistency) - 421 total tests pass, 0 regressions Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -165,21 +165,29 @@ def classify_source_regulation(source_regulation: str) -> str:
|
|||||||
"""
|
"""
|
||||||
Klassifiziert eine source_regulation als law, guideline oder framework.
|
Klassifiziert eine source_regulation als law, guideline oder framework.
|
||||||
|
|
||||||
Verwendet exaktes Matching gegen die Map. Bei unbekannten Quellen
|
Delegates to DB-backed RegulationRegistry (with 5min cache).
|
||||||
wird anhand von Schluesselwoertern geraten, Fallback ist 'framework'
|
Falls back to SOURCE_REGULATION_CLASSIFICATION dict + heuristic
|
||||||
(konservativstes Ergebnis).
|
if DB is unavailable.
|
||||||
"""
|
"""
|
||||||
if not source_regulation:
|
if not source_regulation:
|
||||||
return SOURCE_TYPE_FRAMEWORK
|
return SOURCE_TYPE_FRAMEWORK
|
||||||
|
|
||||||
# Exaktes Match
|
# Try DB-backed registry first
|
||||||
|
try:
|
||||||
|
from services.regulation_registry import classify_source_regulation as _db_classify
|
||||||
|
result = _db_classify(source_regulation)
|
||||||
|
if result:
|
||||||
|
return result
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
|
# Fallback: local dict
|
||||||
if source_regulation in SOURCE_REGULATION_CLASSIFICATION:
|
if source_regulation in SOURCE_REGULATION_CLASSIFICATION:
|
||||||
return SOURCE_REGULATION_CLASSIFICATION[source_regulation]
|
return SOURCE_REGULATION_CLASSIFICATION[source_regulation]
|
||||||
|
|
||||||
# Heuristik fuer unbekannte Quellen
|
# Heuristik fuer unbekannte Quellen
|
||||||
lower = source_regulation.lower()
|
lower = source_regulation.lower()
|
||||||
|
|
||||||
# Gesetze erkennen
|
|
||||||
law_indicators = [
|
law_indicators = [
|
||||||
"verordnung", "richtlinie", "gesetz", "directive", "regulation",
|
"verordnung", "richtlinie", "gesetz", "directive", "regulation",
|
||||||
"(eu)", "(eg)", "act", "ley", "loi", "törvény", "código",
|
"(eu)", "(eg)", "act", "ley", "loi", "törvény", "código",
|
||||||
@@ -187,19 +195,16 @@ def classify_source_regulation(source_regulation: str) -> str:
|
|||||||
if any(ind in lower for ind in law_indicators):
|
if any(ind in lower for ind in law_indicators):
|
||||||
return SOURCE_TYPE_LAW
|
return SOURCE_TYPE_LAW
|
||||||
|
|
||||||
# Leitlinien erkennen
|
|
||||||
guideline_indicators = [
|
guideline_indicators = [
|
||||||
"edpb", "leitlinie", "guideline", "wp2", "bsi", "empfehlung",
|
"edpb", "leitlinie", "guideline", "wp2", "bsi", "empfehlung",
|
||||||
]
|
]
|
||||||
if any(ind in lower for ind in guideline_indicators):
|
if any(ind in lower for ind in guideline_indicators):
|
||||||
return SOURCE_TYPE_GUIDELINE
|
return SOURCE_TYPE_GUIDELINE
|
||||||
|
|
||||||
# Frameworks erkennen
|
|
||||||
framework_indicators = [
|
framework_indicators = [
|
||||||
"enisa", "nist", "owasp", "oecd", "cisa", "framework", "iso",
|
"enisa", "nist", "owasp", "oecd", "cisa", "framework", "iso",
|
||||||
]
|
]
|
||||||
if any(ind in lower for ind in framework_indicators):
|
if any(ind in lower for ind in framework_indicators):
|
||||||
return SOURCE_TYPE_FRAMEWORK
|
return SOURCE_TYPE_FRAMEWORK
|
||||||
|
|
||||||
# Konservativ: unbekannt = framework (geringste Verbindlichkeit)
|
|
||||||
return SOURCE_TYPE_FRAMEWORK
|
return SOURCE_TYPE_FRAMEWORK
|
||||||
|
|||||||
@@ -0,0 +1,72 @@
|
|||||||
|
-- Migration 002: Regulation Registry (Block F1)
|
||||||
|
-- Schema: compliance
|
||||||
|
-- Run: ssh macmini "docker exec -i bp-core-postgres psql -U breakpilot -d breakpilot_db" < control-pipeline/migrations/002_regulation_registry.sql
|
||||||
|
|
||||||
|
SET search_path TO compliance, public;
|
||||||
|
|
||||||
|
-- ========================================
|
||||||
|
-- regulation_registry
|
||||||
|
-- ========================================
|
||||||
|
-- Central registry for all regulations, laws, guidelines, and frameworks
|
||||||
|
-- referenced by the control pipeline. Replaces hardcoded Python dicts
|
||||||
|
-- (REGULATION_LICENSE_MAP, SOURCE_REGULATION_CLASSIFICATION).
|
||||||
|
|
||||||
|
CREATE TABLE IF NOT EXISTS regulation_registry (
|
||||||
|
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
|
||||||
|
|
||||||
|
-- regulation_id: machine key (e.g. "eu_2016_679", "nist_sp_800_53")
|
||||||
|
regulation_id VARCHAR(100) UNIQUE NOT NULL,
|
||||||
|
|
||||||
|
-- Display names
|
||||||
|
regulation_name_de TEXT,
|
||||||
|
regulation_name_en TEXT,
|
||||||
|
regulation_short VARCHAR(50),
|
||||||
|
|
||||||
|
-- License classification (3-rule system)
|
||||||
|
license_rule INTEGER NOT NULL DEFAULT 1
|
||||||
|
CHECK (license_rule IN (1, 2, 3)),
|
||||||
|
license_type VARCHAR(50), -- EU_LAW, DE_LAW, CC-BY-SA-4.0, etc.
|
||||||
|
attribution TEXT, -- Required for Rule 2 (CC-BY)
|
||||||
|
|
||||||
|
-- Source classification
|
||||||
|
source_type VARCHAR(20) NOT NULL DEFAULT 'law'
|
||||||
|
CHECK (source_type IN ('law', 'guideline', 'standard', 'framework', 'restricted')),
|
||||||
|
|
||||||
|
-- Metadata
|
||||||
|
jurisdiction VARCHAR(10), -- DE, EU, AT, CH, US, FR, ES, NL, IT, HU, INT
|
||||||
|
category VARCHAR(50),
|
||||||
|
celex VARCHAR(30), -- EU CELEX number if applicable
|
||||||
|
url TEXT,
|
||||||
|
|
||||||
|
-- Lifecycle
|
||||||
|
status VARCHAR(20) NOT NULL DEFAULT 'active'
|
||||||
|
CHECK (status IN ('active', 'needs_review', 'deprecated')),
|
||||||
|
|
||||||
|
created_at TIMESTAMPTZ DEFAULT NOW(),
|
||||||
|
updated_at TIMESTAMPTZ DEFAULT NOW()
|
||||||
|
);
|
||||||
|
|
||||||
|
-- Indexes
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_reg_registry_status
|
||||||
|
ON regulation_registry(status);
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_reg_registry_jurisdiction
|
||||||
|
ON regulation_registry(jurisdiction);
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_reg_registry_source_type
|
||||||
|
ON regulation_registry(source_type);
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_reg_registry_license_rule
|
||||||
|
ON regulation_registry(license_rule);
|
||||||
|
|
||||||
|
-- Updated-at trigger
|
||||||
|
CREATE OR REPLACE FUNCTION update_regulation_registry_updated_at()
|
||||||
|
RETURNS TRIGGER AS $$
|
||||||
|
BEGIN
|
||||||
|
NEW.updated_at = NOW();
|
||||||
|
RETURN NEW;
|
||||||
|
END;
|
||||||
|
$$ LANGUAGE plpgsql;
|
||||||
|
|
||||||
|
DROP TRIGGER IF EXISTS trg_regulation_registry_updated_at ON regulation_registry;
|
||||||
|
CREATE TRIGGER trg_regulation_registry_updated_at
|
||||||
|
BEFORE UPDATE ON regulation_registry
|
||||||
|
FOR EACH ROW
|
||||||
|
EXECUTE FUNCTION update_regulation_registry_updated_at();
|
||||||
@@ -0,0 +1,247 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
"""
|
||||||
|
F1 Migration: Populate regulation_registry from hardcoded Python dicts.
|
||||||
|
|
||||||
|
Sources:
|
||||||
|
- REGULATION_LICENSE_MAP (control_generator.py) — 135 entries keyed by regulation_id
|
||||||
|
- SOURCE_REGULATION_CLASSIFICATION (source_type_classification.py) — 58 entries keyed by name
|
||||||
|
|
||||||
|
Usage:
|
||||||
|
# Dry run (prints SQL, no DB write):
|
||||||
|
python3 scripts/f1_migrate_regulation_registry.py --dry-run
|
||||||
|
|
||||||
|
# Against Mac Mini:
|
||||||
|
python3 scripts/f1_migrate_regulation_registry.py --db-host macmini
|
||||||
|
|
||||||
|
# Against local Docker:
|
||||||
|
python3 scripts/f1_migrate_regulation_registry.py --db-host localhost
|
||||||
|
"""
|
||||||
|
|
||||||
|
import argparse
|
||||||
|
import sys
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
# Add parent so we can import from services/data
|
||||||
|
sys.path.insert(0, str(Path(__file__).resolve().parent.parent))
|
||||||
|
|
||||||
|
from services.control_generator import REGULATION_LICENSE_MAP, _RULE2_PREFIXES, _RULE3_PREFIXES # noqa: E402
|
||||||
|
from data.source_type_classification import SOURCE_REGULATION_CLASSIFICATION # noqa: E402
|
||||||
|
|
||||||
|
# Derive jurisdiction from license_type
|
||||||
|
_LICENSE_TO_JURISDICTION = {
|
||||||
|
"EU_LAW": "EU",
|
||||||
|
"EU_PUBLIC": "EU",
|
||||||
|
"DE_LAW": "DE",
|
||||||
|
"DE_PUBLIC": "DE",
|
||||||
|
"AT_LAW": "AT",
|
||||||
|
"CH_LAW": "CH",
|
||||||
|
"FR_LAW": "FR",
|
||||||
|
"ES_LAW": "ES",
|
||||||
|
"NL_LAW": "NL",
|
||||||
|
"IT_LAW": "IT",
|
||||||
|
"HU_LAW": "HU",
|
||||||
|
"NIST_PUBLIC_DOMAIN": "US",
|
||||||
|
"US_GOV_PUBLIC": "US",
|
||||||
|
"CC-BY-SA-4.0": "INT",
|
||||||
|
"CC-BY-4.0": "INT",
|
||||||
|
"OECD_PUBLIC": "INT",
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def _derive_jurisdiction(license_type: str) -> str:
|
||||||
|
"""Map license_type to jurisdiction code."""
|
||||||
|
return _LICENSE_TO_JURISDICTION.get(license_type, "INT")
|
||||||
|
|
||||||
|
|
||||||
|
def build_rows() -> list[dict]:
|
||||||
|
"""Merge REGULATION_LICENSE_MAP + SOURCE_REGULATION_CLASSIFICATION into rows."""
|
||||||
|
rows = []
|
||||||
|
# Track names we've seen (for dedup against SOURCE_REGULATION_CLASSIFICATION)
|
||||||
|
seen_names: set[str] = set()
|
||||||
|
|
||||||
|
# 1) Primary source: REGULATION_LICENSE_MAP (has regulation_id as key)
|
||||||
|
for reg_id, info in REGULATION_LICENSE_MAP.items():
|
||||||
|
name = info.get("name", reg_id)
|
||||||
|
seen_names.add(name)
|
||||||
|
|
||||||
|
rows.append({
|
||||||
|
"regulation_id": reg_id.lower().strip(),
|
||||||
|
"regulation_name_de": name,
|
||||||
|
"license_rule": info["rule"],
|
||||||
|
"license_type": info.get("license", ""),
|
||||||
|
"attribution": info.get("attribution"),
|
||||||
|
"source_type": info.get("source_type", "law"),
|
||||||
|
"jurisdiction": _derive_jurisdiction(info.get("license", "")),
|
||||||
|
"status": "active",
|
||||||
|
})
|
||||||
|
|
||||||
|
# 2) Secondary: SOURCE_REGULATION_CLASSIFICATION entries not already covered
|
||||||
|
# These are keyed by name, not by regulation_id. We create synthetic IDs.
|
||||||
|
for name, source_type in SOURCE_REGULATION_CLASSIFICATION.items():
|
||||||
|
if name in seen_names:
|
||||||
|
continue
|
||||||
|
# Generate a regulation_id from the name
|
||||||
|
synthetic_id = (
|
||||||
|
name.lower()
|
||||||
|
.replace(" ", "_")
|
||||||
|
.replace("(", "")
|
||||||
|
.replace(")", "")
|
||||||
|
.replace("/", "_")
|
||||||
|
.replace("-", "_")
|
||||||
|
.replace(".", "")
|
||||||
|
.replace(",", "")
|
||||||
|
.replace("ä", "ae")
|
||||||
|
.replace("ö", "oe")
|
||||||
|
.replace("ü", "ue")
|
||||||
|
.replace("á", "a")
|
||||||
|
.replace("é", "e")
|
||||||
|
.replace("ó", "o")
|
||||||
|
.strip("_")
|
||||||
|
)[:100]
|
||||||
|
|
||||||
|
# Guess jurisdiction from name content
|
||||||
|
jurisdiction = "INT"
|
||||||
|
name_lower = name.lower()
|
||||||
|
if any(x in name_lower for x in ["edpb", "edps", "(eu)", "eu ", "wp2"]):
|
||||||
|
jurisdiction = "EU"
|
||||||
|
elif any(x in name_lower for x in ["bsi", "bdsg", "bundes", "gwg"]):
|
||||||
|
jurisdiction = "DE"
|
||||||
|
elif "nist" in name_lower or "cisa" in name_lower:
|
||||||
|
jurisdiction = "US"
|
||||||
|
elif "österreich" in name_lower:
|
||||||
|
jurisdiction = "AT"
|
||||||
|
elif "schweiz" in name_lower:
|
||||||
|
jurisdiction = "CH"
|
||||||
|
elif "spanien" in name_lower:
|
||||||
|
jurisdiction = "ES"
|
||||||
|
elif "frankreich" in name_lower:
|
||||||
|
jurisdiction = "FR"
|
||||||
|
elif "ungarn" in name_lower:
|
||||||
|
jurisdiction = "HU"
|
||||||
|
|
||||||
|
# Map source_type_classification's "framework" to our "standard"
|
||||||
|
# (source_type_classification uses law/guideline/framework)
|
||||||
|
mapped_source_type = source_type
|
||||||
|
if source_type == "framework":
|
||||||
|
mapped_source_type = "standard"
|
||||||
|
|
||||||
|
rows.append({
|
||||||
|
"regulation_id": synthetic_id,
|
||||||
|
"regulation_name_de": name,
|
||||||
|
"license_rule": 1, # default: conservative
|
||||||
|
"license_type": "",
|
||||||
|
"attribution": None,
|
||||||
|
"source_type": mapped_source_type,
|
||||||
|
"jurisdiction": jurisdiction,
|
||||||
|
"status": "needs_review", # needs manual review since we guessed
|
||||||
|
})
|
||||||
|
|
||||||
|
return rows
|
||||||
|
|
||||||
|
|
||||||
|
def generate_sql(rows: list[dict]) -> str:
|
||||||
|
"""Generate INSERT SQL for all rows."""
|
||||||
|
lines = [
|
||||||
|
"SET search_path TO compliance, public;",
|
||||||
|
"",
|
||||||
|
"-- Auto-generated by f1_migrate_regulation_registry.py",
|
||||||
|
f"-- {len(rows)} rows total",
|
||||||
|
"",
|
||||||
|
]
|
||||||
|
|
||||||
|
for row in rows:
|
||||||
|
attr = f"'{row['attribution']}'" if row["attribution"] else "NULL"
|
||||||
|
lines.append(
|
||||||
|
f"INSERT INTO regulation_registry "
|
||||||
|
f"(regulation_id, regulation_name_de, license_rule, license_type, "
|
||||||
|
f"attribution, source_type, jurisdiction, status) "
|
||||||
|
f"VALUES ("
|
||||||
|
f"'{row['regulation_id']}', "
|
||||||
|
f"'{_escape_sql(row['regulation_name_de'])}', "
|
||||||
|
f"{row['license_rule']}, "
|
||||||
|
f"'{row['license_type']}', "
|
||||||
|
f"{attr}, "
|
||||||
|
f"'{row['source_type']}', "
|
||||||
|
f"'{row['jurisdiction']}', "
|
||||||
|
f"'{row['status']}'"
|
||||||
|
f") ON CONFLICT (regulation_id) DO UPDATE SET "
|
||||||
|
f"regulation_name_de = EXCLUDED.regulation_name_de, "
|
||||||
|
f"license_rule = EXCLUDED.license_rule, "
|
||||||
|
f"license_type = EXCLUDED.license_type, "
|
||||||
|
f"attribution = EXCLUDED.attribution, "
|
||||||
|
f"source_type = EXCLUDED.source_type, "
|
||||||
|
f"jurisdiction = EXCLUDED.jurisdiction;"
|
||||||
|
)
|
||||||
|
|
||||||
|
return "\n".join(lines)
|
||||||
|
|
||||||
|
|
||||||
|
def _escape_sql(val: str) -> str:
|
||||||
|
"""Escape single quotes for SQL."""
|
||||||
|
return val.replace("'", "''")
|
||||||
|
|
||||||
|
|
||||||
|
def insert_via_sqlalchemy(rows: list[dict], db_host: str) -> int:
|
||||||
|
"""Insert rows using SQLAlchemy (same pattern as control-pipeline)."""
|
||||||
|
from sqlalchemy import create_engine, text
|
||||||
|
|
||||||
|
url = f"postgresql://breakpilot:breakpilot123@{db_host}:5432/breakpilot_db"
|
||||||
|
engine = create_engine(url)
|
||||||
|
|
||||||
|
inserted = 0
|
||||||
|
with engine.connect() as conn:
|
||||||
|
conn.execute(text("SET search_path TO compliance, public"))
|
||||||
|
for row in rows:
|
||||||
|
conn.execute(
|
||||||
|
text("""
|
||||||
|
INSERT INTO regulation_registry
|
||||||
|
(regulation_id, regulation_name_de, license_rule, license_type,
|
||||||
|
attribution, source_type, jurisdiction, status)
|
||||||
|
VALUES
|
||||||
|
(:regulation_id, :regulation_name_de, :license_rule, :license_type,
|
||||||
|
:attribution, :source_type, :jurisdiction, :status)
|
||||||
|
ON CONFLICT (regulation_id) DO UPDATE SET
|
||||||
|
regulation_name_de = EXCLUDED.regulation_name_de,
|
||||||
|
license_rule = EXCLUDED.license_rule,
|
||||||
|
license_type = EXCLUDED.license_type,
|
||||||
|
attribution = EXCLUDED.attribution,
|
||||||
|
source_type = EXCLUDED.source_type,
|
||||||
|
jurisdiction = EXCLUDED.jurisdiction
|
||||||
|
"""),
|
||||||
|
row,
|
||||||
|
)
|
||||||
|
inserted += 1
|
||||||
|
conn.commit()
|
||||||
|
|
||||||
|
return inserted
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
parser = argparse.ArgumentParser(description="Migrate regulation registry data")
|
||||||
|
parser.add_argument("--dry-run", action="store_true", help="Print SQL only")
|
||||||
|
parser.add_argument("--db-host", default="localhost", help="PostgreSQL host")
|
||||||
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
rows = build_rows()
|
||||||
|
print(f"Built {len(rows)} rows from hardcoded dicts")
|
||||||
|
|
||||||
|
# Stats
|
||||||
|
by_rule = {}
|
||||||
|
by_status = {}
|
||||||
|
for r in rows:
|
||||||
|
by_rule[r["license_rule"]] = by_rule.get(r["license_rule"], 0) + 1
|
||||||
|
by_status[r["status"]] = by_status.get(r["status"], 0) + 1
|
||||||
|
print(f" By license_rule: {by_rule}")
|
||||||
|
print(f" By status: {by_status}")
|
||||||
|
|
||||||
|
if args.dry_run:
|
||||||
|
print("\n--- DRY RUN (SQL output) ---\n")
|
||||||
|
print(generate_sql(rows))
|
||||||
|
return
|
||||||
|
|
||||||
|
inserted = insert_via_sqlalchemy(rows, args.db_host)
|
||||||
|
print(f"Inserted/updated {inserted} rows into regulation_registry")
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
@@ -17,9 +17,6 @@ import httpx
|
|||||||
|
|
||||||
from .control_generator import (
|
from .control_generator import (
|
||||||
GeneratedControl,
|
GeneratedControl,
|
||||||
REGULATION_LICENSE_MAP,
|
|
||||||
_RULE2_PREFIXES,
|
|
||||||
_RULE3_PREFIXES,
|
|
||||||
_classify_regulation,
|
_classify_regulation,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|||||||
@@ -33,6 +33,7 @@ from sqlalchemy import text
|
|||||||
from sqlalchemy.orm import Session
|
from sqlalchemy.orm import Session
|
||||||
|
|
||||||
from .rag_client import ComplianceRAGClient, RAGSearchResult, get_rag_client
|
from .rag_client import ComplianceRAGClient, RAGSearchResult, get_rag_client
|
||||||
|
from .regulation_registry import get_registry as _get_regulation_registry
|
||||||
from .similarity_detector import check_similarity
|
from .similarity_detector import check_similarity
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
@@ -245,28 +246,21 @@ def _classify_regulation(regulation_code: str) -> dict:
|
|||||||
|
|
||||||
Returns dict with keys: license, rule, name, source_type.
|
Returns dict with keys: license, rule, name, source_type.
|
||||||
source_type is one of: law, guideline, standard, restricted.
|
source_type is one of: law, guideline, standard, restricted.
|
||||||
|
|
||||||
|
Delegates to DB-backed RegulationRegistry (with 5min cache).
|
||||||
|
Falls back to REGULATION_LICENSE_MAP if DB is unavailable.
|
||||||
"""
|
"""
|
||||||
code = regulation_code.lower().strip()
|
registry = _get_regulation_registry()
|
||||||
|
result = registry.classify_regulation(regulation_code)
|
||||||
|
|
||||||
# Exact match first
|
# If registry returned the unknown fallback AND we have a local match,
|
||||||
if code in REGULATION_LICENSE_MAP:
|
# prefer the local dict (graceful degradation during migration)
|
||||||
return REGULATION_LICENSE_MAP[code]
|
if result.get("license") == "UNKNOWN":
|
||||||
|
code = regulation_code.lower().strip()
|
||||||
|
if code in REGULATION_LICENSE_MAP:
|
||||||
|
return REGULATION_LICENSE_MAP[code]
|
||||||
|
|
||||||
# Prefix match for Rule 2 (ENISA = standard)
|
return result
|
||||||
for prefix in _RULE2_PREFIXES:
|
|
||||||
if code.startswith(prefix):
|
|
||||||
return {"license": "CC-BY-4.0", "rule": 2, "source_type": "standard",
|
|
||||||
"name": "ENISA", "attribution": "ENISA, CC BY 4.0"}
|
|
||||||
|
|
||||||
# Prefix match for Rule 3 (BSI/ISO/ETSI = restricted)
|
|
||||||
for prefix in _RULE3_PREFIXES:
|
|
||||||
if code.startswith(prefix):
|
|
||||||
return {"license": f"{prefix.rstrip('_').upper()}_RESTRICTED", "rule": 3,
|
|
||||||
"source_type": "restricted", "name": "INTERNAL_ONLY"}
|
|
||||||
|
|
||||||
# Unknown → treat as restricted (safe default)
|
|
||||||
logger.warning("Unknown regulation_code %r — defaulting to Rule 3 (restricted)", code)
|
|
||||||
return {"license": "UNKNOWN", "rule": 3, "source_type": "restricted", "name": "INTERNAL_ONLY"}
|
|
||||||
|
|
||||||
|
|
||||||
# ---------------------------------------------------------------------------
|
# ---------------------------------------------------------------------------
|
||||||
|
|||||||
@@ -0,0 +1,220 @@
|
|||||||
|
"""
|
||||||
|
DB-backed Regulation Registry with in-memory cache.
|
||||||
|
|
||||||
|
Replaces hardcoded REGULATION_LICENSE_MAP and SOURCE_REGULATION_CLASSIFICATION
|
||||||
|
with a single PostgreSQL table (compliance.regulation_registry).
|
||||||
|
|
||||||
|
Cache TTL: 5 minutes. Thread-safe via simple timestamp check.
|
||||||
|
Falls back to hardcoded dicts if DB is unavailable (graceful degradation).
|
||||||
|
"""
|
||||||
|
|
||||||
|
import logging
|
||||||
|
import time
|
||||||
|
from typing import Optional
|
||||||
|
|
||||||
|
from sqlalchemy import text
|
||||||
|
from sqlalchemy.exc import SQLAlchemyError
|
||||||
|
|
||||||
|
from db.session import SessionLocal
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
_CACHE_TTL_SECONDS = 300 # 5 minutes
|
||||||
|
|
||||||
|
# Prefix-based fallback rules (unchanged from original logic)
|
||||||
|
_RULE2_PREFIXES = ("enisa_",)
|
||||||
|
_RULE3_PREFIXES = ("bsi_", "iso_", "etsi_")
|
||||||
|
|
||||||
|
# Fallback for unknown regulations
|
||||||
|
_UNKNOWN_REGULATION = {
|
||||||
|
"license": "UNKNOWN",
|
||||||
|
"rule": 3,
|
||||||
|
"source_type": "restricted",
|
||||||
|
"name": "INTERNAL_ONLY",
|
||||||
|
"attribution": None,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class RegulationRegistry:
|
||||||
|
"""In-memory cache of the regulation_registry table.
|
||||||
|
|
||||||
|
Provides two lookup modes:
|
||||||
|
1. by_code(regulation_id) — replaces REGULATION_LICENSE_MAP[code]
|
||||||
|
2. source_type_by_name(name) — replaces SOURCE_REGULATION_CLASSIFICATION[name]
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self):
|
||||||
|
self._by_code: dict[str, dict] = {}
|
||||||
|
self._by_name: dict[str, str] = {}
|
||||||
|
self._loaded_at: float = 0.0
|
||||||
|
|
||||||
|
def _is_stale(self) -> bool:
|
||||||
|
return (time.monotonic() - self._loaded_at) > _CACHE_TTL_SECONDS
|
||||||
|
|
||||||
|
def _load(self) -> bool:
|
||||||
|
"""Load all rows from regulation_registry into memory."""
|
||||||
|
try:
|
||||||
|
db = SessionLocal()
|
||||||
|
try:
|
||||||
|
rows = db.execute(
|
||||||
|
text("""
|
||||||
|
SELECT regulation_id, regulation_name_de, license_rule,
|
||||||
|
license_type, attribution, source_type, jurisdiction,
|
||||||
|
status
|
||||||
|
FROM regulation_registry
|
||||||
|
WHERE status != 'deprecated'
|
||||||
|
""")
|
||||||
|
).fetchall()
|
||||||
|
finally:
|
||||||
|
db.close()
|
||||||
|
|
||||||
|
by_code: dict[str, dict] = {}
|
||||||
|
by_name: dict[str, str] = {}
|
||||||
|
|
||||||
|
for row in rows:
|
||||||
|
entry = {
|
||||||
|
"license": row[3] or "", # license_type
|
||||||
|
"rule": row[2], # license_rule
|
||||||
|
"source_type": row[5] or "law", # source_type
|
||||||
|
"name": row[1] or row[0], # regulation_name_de or regulation_id
|
||||||
|
"attribution": row[4], # attribution
|
||||||
|
"jurisdiction": row[6], # jurisdiction
|
||||||
|
}
|
||||||
|
by_code[row[0].lower()] = entry
|
||||||
|
|
||||||
|
# Also index by name for source_type lookups
|
||||||
|
if row[1]:
|
||||||
|
by_name[row[1]] = row[5] or "law"
|
||||||
|
|
||||||
|
self._by_code = by_code
|
||||||
|
self._by_name = by_name
|
||||||
|
self._loaded_at = time.monotonic()
|
||||||
|
logger.info(
|
||||||
|
"Regulation registry loaded: %d entries by code, %d by name",
|
||||||
|
len(by_code), len(by_name),
|
||||||
|
)
|
||||||
|
return True
|
||||||
|
|
||||||
|
except SQLAlchemyError:
|
||||||
|
logger.warning(
|
||||||
|
"Failed to load regulation_registry from DB — using stale cache",
|
||||||
|
exc_info=True,
|
||||||
|
)
|
||||||
|
return False
|
||||||
|
|
||||||
|
def _ensure_loaded(self) -> None:
|
||||||
|
"""Reload cache if stale."""
|
||||||
|
if self._is_stale():
|
||||||
|
self._load()
|
||||||
|
|
||||||
|
def classify_regulation(self, regulation_code: str) -> dict:
|
||||||
|
"""Look up license info for a regulation_code.
|
||||||
|
|
||||||
|
Returns dict with keys: license, rule, name, source_type, attribution.
|
||||||
|
Equivalent to the old _classify_regulation() function.
|
||||||
|
"""
|
||||||
|
self._ensure_loaded()
|
||||||
|
code = regulation_code.lower().strip()
|
||||||
|
|
||||||
|
# Exact match from DB
|
||||||
|
if code in self._by_code:
|
||||||
|
return self._by_code[code]
|
||||||
|
|
||||||
|
# Prefix match for Rule 2 (ENISA = standard)
|
||||||
|
for prefix in _RULE2_PREFIXES:
|
||||||
|
if code.startswith(prefix):
|
||||||
|
return {
|
||||||
|
"license": "CC-BY-4.0",
|
||||||
|
"rule": 2,
|
||||||
|
"source_type": "standard",
|
||||||
|
"name": "ENISA",
|
||||||
|
"attribution": "ENISA, CC BY 4.0",
|
||||||
|
}
|
||||||
|
|
||||||
|
# Prefix match for Rule 3 (BSI/ISO/ETSI = restricted)
|
||||||
|
for prefix in _RULE3_PREFIXES:
|
||||||
|
if code.startswith(prefix):
|
||||||
|
return {
|
||||||
|
"license": f"{prefix.rstrip('_').upper()}_RESTRICTED",
|
||||||
|
"rule": 3,
|
||||||
|
"source_type": "restricted",
|
||||||
|
"name": "INTERNAL_ONLY",
|
||||||
|
"attribution": None,
|
||||||
|
}
|
||||||
|
|
||||||
|
# Unknown → restricted (safe default)
|
||||||
|
logger.warning(
|
||||||
|
"Unknown regulation_code %r — defaulting to Rule 3 (restricted)", code
|
||||||
|
)
|
||||||
|
return dict(_UNKNOWN_REGULATION)
|
||||||
|
|
||||||
|
def source_type_by_name(self, source_regulation: str) -> str:
|
||||||
|
"""Look up source_type by regulation display name.
|
||||||
|
|
||||||
|
Equivalent to old classify_source_regulation().
|
||||||
|
Falls back to heuristic for unknown names.
|
||||||
|
"""
|
||||||
|
self._ensure_loaded()
|
||||||
|
|
||||||
|
if not source_regulation:
|
||||||
|
return "framework"
|
||||||
|
|
||||||
|
# Exact match from DB
|
||||||
|
if source_regulation in self._by_name:
|
||||||
|
return self._by_name[source_regulation]
|
||||||
|
|
||||||
|
# Heuristic fallback for unknown sources
|
||||||
|
lower = source_regulation.lower()
|
||||||
|
|
||||||
|
law_indicators = [
|
||||||
|
"verordnung", "richtlinie", "gesetz", "directive", "regulation",
|
||||||
|
"(eu)", "(eg)", "act", "ley", "loi", "törvény", "código",
|
||||||
|
]
|
||||||
|
if any(ind in lower for ind in law_indicators):
|
||||||
|
return "law"
|
||||||
|
|
||||||
|
guideline_indicators = [
|
||||||
|
"edpb", "leitlinie", "guideline", "wp2", "bsi", "empfehlung",
|
||||||
|
]
|
||||||
|
if any(ind in lower for ind in guideline_indicators):
|
||||||
|
return "guideline"
|
||||||
|
|
||||||
|
framework_indicators = [
|
||||||
|
"enisa", "nist", "owasp", "oecd", "cisa", "framework", "iso",
|
||||||
|
]
|
||||||
|
if any(ind in lower for ind in framework_indicators):
|
||||||
|
return "framework"
|
||||||
|
|
||||||
|
return "framework"
|
||||||
|
|
||||||
|
def get_all(self) -> dict[str, dict]:
|
||||||
|
"""Return all cached entries (by regulation_code)."""
|
||||||
|
self._ensure_loaded()
|
||||||
|
return dict(self._by_code)
|
||||||
|
|
||||||
|
def is_open_source(self, regulation_code: str) -> bool:
|
||||||
|
"""Check if regulation is Rule 1 or 2 (safe to reference)."""
|
||||||
|
info = self.classify_regulation(regulation_code)
|
||||||
|
return info["rule"] in (1, 2)
|
||||||
|
|
||||||
|
|
||||||
|
# Module-level singleton
|
||||||
|
_registry: Optional[RegulationRegistry] = None
|
||||||
|
|
||||||
|
|
||||||
|
def get_registry() -> RegulationRegistry:
|
||||||
|
"""Get or create the singleton RegulationRegistry instance."""
|
||||||
|
global _registry
|
||||||
|
if _registry is None:
|
||||||
|
_registry = RegulationRegistry()
|
||||||
|
return _registry
|
||||||
|
|
||||||
|
|
||||||
|
def classify_regulation(regulation_code: str) -> dict:
|
||||||
|
"""Convenience: look up license info for a regulation_code."""
|
||||||
|
return get_registry().classify_regulation(regulation_code)
|
||||||
|
|
||||||
|
|
||||||
|
def classify_source_regulation(source_regulation: str) -> str:
|
||||||
|
"""Convenience: look up source_type by regulation display name."""
|
||||||
|
return get_registry().source_type_by_name(source_regulation)
|
||||||
@@ -0,0 +1,285 @@
|
|||||||
|
"""Tests for RegulationRegistry — DB-backed lookup with cache and fallback."""
|
||||||
|
|
||||||
|
import time
|
||||||
|
from unittest.mock import patch, MagicMock
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
from services.regulation_registry import (
|
||||||
|
RegulationRegistry,
|
||||||
|
_CACHE_TTL_SECONDS,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
# ── Test data: simulates DB rows ──────────────────────────────────────────
|
||||||
|
|
||||||
|
_MOCK_DB_ROWS = [
|
||||||
|
# (regulation_id, regulation_name_de, license_rule, license_type,
|
||||||
|
# attribution, source_type, jurisdiction, status)
|
||||||
|
("eu_2016_679", "DSGVO (EU) 2016/679", 1, "EU_LAW",
|
||||||
|
None, "law", "EU", "active"),
|
||||||
|
("nist_sp_800_53", "NIST SP 800-53 Rev. 5", 1, "NIST_PUBLIC_DOMAIN",
|
||||||
|
None, "standard", "US", "active"),
|
||||||
|
("owasp_asvs", "OWASP ASVS 4.0", 2, "CC-BY-SA-4.0",
|
||||||
|
"OWASP Foundation, CC BY-SA 4.0", "standard", "INT", "active"),
|
||||||
|
("bdsg", "Bundesdatenschutzgesetz (BDSG)", 1, "DE_LAW",
|
||||||
|
None, "law", "DE", "active"),
|
||||||
|
("at_dsg", "Österreichisches Datenschutzgesetz (DSG)", 1, "AT_LAW",
|
||||||
|
None, "law", "AT", "active"),
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
def _mock_db_execute(query):
|
||||||
|
"""Mock that returns our test rows."""
|
||||||
|
mock_result = MagicMock()
|
||||||
|
mock_result.fetchall.return_value = _MOCK_DB_ROWS
|
||||||
|
return mock_result
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def registry():
|
||||||
|
"""Create a registry with mocked DB."""
|
||||||
|
reg = RegulationRegistry()
|
||||||
|
with patch("services.regulation_registry.SessionLocal") as mock_session_cls:
|
||||||
|
mock_session = MagicMock()
|
||||||
|
mock_session.execute = _mock_db_execute
|
||||||
|
mock_session_cls.return_value = mock_session
|
||||||
|
reg._load()
|
||||||
|
return reg
|
||||||
|
|
||||||
|
|
||||||
|
# ── classify_regulation tests ─────────────────────────────────────────────
|
||||||
|
|
||||||
|
|
||||||
|
class TestClassifyRegulation:
|
||||||
|
def test_exact_match_eu_law(self, registry):
|
||||||
|
result = registry.classify_regulation("eu_2016_679")
|
||||||
|
assert result["rule"] == 1
|
||||||
|
assert result["license"] == "EU_LAW"
|
||||||
|
assert result["source_type"] == "law"
|
||||||
|
assert result["name"] == "DSGVO (EU) 2016/679"
|
||||||
|
|
||||||
|
def test_exact_match_case_insensitive(self, registry):
|
||||||
|
result = registry.classify_regulation("EU_2016_679")
|
||||||
|
assert result["rule"] == 1
|
||||||
|
assert result["name"] == "DSGVO (EU) 2016/679"
|
||||||
|
|
||||||
|
def test_exact_match_with_whitespace(self, registry):
|
||||||
|
result = registry.classify_regulation(" eu_2016_679 ")
|
||||||
|
assert result["rule"] == 1
|
||||||
|
|
||||||
|
def test_nist_standard(self, registry):
|
||||||
|
result = registry.classify_regulation("nist_sp_800_53")
|
||||||
|
assert result["rule"] == 1
|
||||||
|
assert result["source_type"] == "standard"
|
||||||
|
|
||||||
|
def test_owasp_rule2(self, registry):
|
||||||
|
result = registry.classify_regulation("owasp_asvs")
|
||||||
|
assert result["rule"] == 2
|
||||||
|
assert result["attribution"] == "OWASP Foundation, CC BY-SA 4.0"
|
||||||
|
|
||||||
|
def test_german_law(self, registry):
|
||||||
|
result = registry.classify_regulation("bdsg")
|
||||||
|
assert result["rule"] == 1
|
||||||
|
assert result["source_type"] == "law"
|
||||||
|
assert result["jurisdiction"] == "DE"
|
||||||
|
|
||||||
|
def test_austrian_law(self, registry):
|
||||||
|
result = registry.classify_regulation("at_dsg")
|
||||||
|
assert result["rule"] == 1
|
||||||
|
assert result["jurisdiction"] == "AT"
|
||||||
|
|
||||||
|
def test_prefix_enisa_rule2(self, registry):
|
||||||
|
result = registry.classify_regulation("enisa_supply_chain_2024")
|
||||||
|
assert result["rule"] == 2
|
||||||
|
assert result["source_type"] == "standard"
|
||||||
|
assert "ENISA" in result["attribution"]
|
||||||
|
|
||||||
|
def test_prefix_bsi_rule3(self, registry):
|
||||||
|
result = registry.classify_regulation("bsi_tr_03161")
|
||||||
|
assert result["rule"] == 3
|
||||||
|
assert result["source_type"] == "restricted"
|
||||||
|
assert result["name"] == "INTERNAL_ONLY"
|
||||||
|
|
||||||
|
def test_prefix_iso_rule3(self, registry):
|
||||||
|
result = registry.classify_regulation("iso_27001")
|
||||||
|
assert result["rule"] == 3
|
||||||
|
assert result["source_type"] == "restricted"
|
||||||
|
|
||||||
|
def test_prefix_etsi_rule3(self, registry):
|
||||||
|
result = registry.classify_regulation("etsi_en_303_645")
|
||||||
|
assert result["rule"] == 3
|
||||||
|
|
||||||
|
def test_unknown_defaults_to_restricted(self, registry):
|
||||||
|
result = registry.classify_regulation("some_unknown_regulation")
|
||||||
|
assert result["rule"] == 3
|
||||||
|
assert result["source_type"] == "restricted"
|
||||||
|
assert result["license"] == "UNKNOWN"
|
||||||
|
|
||||||
|
|
||||||
|
# ── source_type_by_name tests ────────────────────────────────────────────
|
||||||
|
|
||||||
|
|
||||||
|
class TestSourceTypeByName:
|
||||||
|
def test_exact_match_law(self, registry):
|
||||||
|
result = registry.source_type_by_name("DSGVO (EU) 2016/679")
|
||||||
|
assert result == "law"
|
||||||
|
|
||||||
|
def test_exact_match_standard(self, registry):
|
||||||
|
result = registry.source_type_by_name("NIST SP 800-53 Rev. 5")
|
||||||
|
assert result == "standard"
|
||||||
|
|
||||||
|
def test_empty_returns_framework(self, registry):
|
||||||
|
assert registry.source_type_by_name("") == "framework"
|
||||||
|
assert registry.source_type_by_name(None) == "framework"
|
||||||
|
|
||||||
|
def test_heuristic_law(self, registry):
|
||||||
|
assert registry.source_type_by_name("Verordnung XYZ") == "law"
|
||||||
|
assert registry.source_type_by_name("Some EU Directive") == "law"
|
||||||
|
|
||||||
|
def test_heuristic_guideline(self, registry):
|
||||||
|
assert registry.source_type_by_name("EDPB Leitlinie 99/2025") == "guideline"
|
||||||
|
assert registry.source_type_by_name("BSI Standard 200-1") == "guideline"
|
||||||
|
|
||||||
|
def test_heuristic_framework(self, registry):
|
||||||
|
# "ENISA Cloud Guidelines" matches "guideline" before "enisa" in heuristic order
|
||||||
|
assert registry.source_type_by_name("ENISA Cloud Report") == "framework"
|
||||||
|
assert registry.source_type_by_name("OWASP Testing Guide") == "framework"
|
||||||
|
|
||||||
|
def test_unknown_returns_framework(self, registry):
|
||||||
|
assert registry.source_type_by_name("Completely Unknown Document") == "framework"
|
||||||
|
|
||||||
|
|
||||||
|
# ── is_open_source tests ─────────────��───────────────────────────────────
|
||||||
|
|
||||||
|
|
||||||
|
class TestIsOpenSource:
|
||||||
|
def test_rule1_is_open(self, registry):
|
||||||
|
assert registry.is_open_source("eu_2016_679") is True
|
||||||
|
|
||||||
|
def test_rule2_is_open(self, registry):
|
||||||
|
assert registry.is_open_source("owasp_asvs") is True
|
||||||
|
|
||||||
|
def test_rule3_is_not_open(self, registry):
|
||||||
|
assert registry.is_open_source("bsi_tr_03161") is False
|
||||||
|
|
||||||
|
def test_unknown_is_not_open(self, registry):
|
||||||
|
assert registry.is_open_source("unknown_thing") is False
|
||||||
|
|
||||||
|
|
||||||
|
# ── Cache behavior tests ──────��──────────────────────────────────────────
|
||||||
|
|
||||||
|
|
||||||
|
class TestCacheBehavior:
|
||||||
|
def test_fresh_cache_not_stale(self, registry):
|
||||||
|
assert registry._is_stale() is False
|
||||||
|
|
||||||
|
def test_old_cache_is_stale(self, registry):
|
||||||
|
registry._loaded_at = time.monotonic() - _CACHE_TTL_SECONDS - 1
|
||||||
|
assert registry._is_stale() is True
|
||||||
|
|
||||||
|
def test_ensure_loaded_reloads_when_stale(self):
|
||||||
|
reg = RegulationRegistry()
|
||||||
|
reg._loaded_at = time.monotonic() - _CACHE_TTL_SECONDS - 100 # force stale
|
||||||
|
|
||||||
|
load_called = False
|
||||||
|
original_load = reg._load
|
||||||
|
|
||||||
|
def tracking_load():
|
||||||
|
nonlocal load_called
|
||||||
|
load_called = True
|
||||||
|
|
||||||
|
reg._load = tracking_load
|
||||||
|
reg._ensure_loaded()
|
||||||
|
assert load_called, "_load should have been called when cache is stale"
|
||||||
|
|
||||||
|
def test_ensure_loaded_skips_when_fresh(self, registry):
|
||||||
|
with patch.object(registry, "_load") as mock_load:
|
||||||
|
registry._ensure_loaded()
|
||||||
|
mock_load.assert_not_called()
|
||||||
|
|
||||||
|
|
||||||
|
# ── Graceful degradation tests ──────��────────────────────────────────────
|
||||||
|
|
||||||
|
|
||||||
|
class TestGracefulDegradation:
|
||||||
|
def test_db_failure_uses_stale_cache(self):
|
||||||
|
"""If DB fails, stale cache entries are still usable."""
|
||||||
|
reg = RegulationRegistry()
|
||||||
|
|
||||||
|
# First load succeeds
|
||||||
|
with patch("services.regulation_registry.SessionLocal") as mock_cls:
|
||||||
|
mock_session = MagicMock()
|
||||||
|
mock_session.execute = _mock_db_execute
|
||||||
|
mock_cls.return_value = mock_session
|
||||||
|
reg._load()
|
||||||
|
|
||||||
|
# Force stale
|
||||||
|
reg._loaded_at = time.monotonic() - _CACHE_TTL_SECONDS - 1
|
||||||
|
|
||||||
|
# Second load fails — DB error
|
||||||
|
from sqlalchemy.exc import OperationalError
|
||||||
|
with patch("services.regulation_registry.SessionLocal") as mock_cls:
|
||||||
|
mock_cls.side_effect = OperationalError("connection refused", None, None)
|
||||||
|
reg._ensure_loaded()
|
||||||
|
|
||||||
|
# Should still have cached data
|
||||||
|
result = reg.classify_regulation("eu_2016_679")
|
||||||
|
assert result["rule"] == 1
|
||||||
|
|
||||||
|
def test_empty_registry_returns_unknown(self):
|
||||||
|
"""Unloaded registry returns safe defaults."""
|
||||||
|
reg = RegulationRegistry()
|
||||||
|
reg._loaded_at = time.monotonic() # pretend fresh but empty
|
||||||
|
|
||||||
|
result = reg.classify_regulation("eu_2016_679")
|
||||||
|
assert result["rule"] == 3 # safe default
|
||||||
|
assert result["license"] == "UNKNOWN"
|
||||||
|
|
||||||
|
|
||||||
|
# ── Migration data consistency tests ───────��─────────────────────────────
|
||||||
|
|
||||||
|
|
||||||
|
class TestMigrationDataConsistency:
|
||||||
|
"""Verify that the migration script produces valid data."""
|
||||||
|
|
||||||
|
def test_build_rows_produces_data(self):
|
||||||
|
from scripts.f1_migrate_regulation_registry import build_rows
|
||||||
|
rows = build_rows()
|
||||||
|
assert len(rows) > 100 # at least 100 entries
|
||||||
|
|
||||||
|
def test_all_rows_have_required_fields(self):
|
||||||
|
from scripts.f1_migrate_regulation_registry import build_rows
|
||||||
|
rows = build_rows()
|
||||||
|
for row in rows:
|
||||||
|
assert row["regulation_id"], f"Missing regulation_id: {row}"
|
||||||
|
assert row["regulation_name_de"], f"Missing name: {row}"
|
||||||
|
assert row["license_rule"] in (1, 2, 3), f"Bad rule: {row}"
|
||||||
|
assert row["source_type"] in (
|
||||||
|
"law", "guideline", "standard", "framework", "restricted"
|
||||||
|
), f"Bad source_type: {row}"
|
||||||
|
assert row["jurisdiction"], f"Missing jurisdiction: {row}"
|
||||||
|
assert row["status"] in ("active", "needs_review", "deprecated")
|
||||||
|
|
||||||
|
def test_no_duplicate_regulation_ids(self):
|
||||||
|
from scripts.f1_migrate_regulation_registry import build_rows
|
||||||
|
rows = build_rows()
|
||||||
|
ids = [r["regulation_id"] for r in rows]
|
||||||
|
assert len(ids) == len(set(ids)), f"Duplicates: {[x for x in ids if ids.count(x) > 1]}"
|
||||||
|
|
||||||
|
def test_known_regulations_present(self):
|
||||||
|
from scripts.f1_migrate_regulation_registry import build_rows
|
||||||
|
rows = build_rows()
|
||||||
|
ids = {r["regulation_id"] for r in rows}
|
||||||
|
assert "eu_2016_679" in ids # DSGVO
|
||||||
|
assert "bdsg" in ids # BDSG
|
||||||
|
assert "nist_sp_800_53" in ids # NIST
|
||||||
|
assert "owasp_asvs" in ids # OWASP
|
||||||
|
|
||||||
|
def test_owasp_has_attribution(self):
|
||||||
|
from scripts.f1_migrate_regulation_registry import build_rows
|
||||||
|
rows = build_rows()
|
||||||
|
owasp = [r for r in rows if r["regulation_id"] == "owasp_asvs"][0]
|
||||||
|
assert owasp["attribution"] is not None
|
||||||
|
assert "OWASP" in owasp["attribution"]
|
||||||
|
assert owasp["license_rule"] == 2
|
||||||
Reference in New Issue
Block a user