652e3a65a3
CI / go-lint (push) Has been skipped
CI / python-lint (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / test-go-consent (push) Successful in 36s
CI / test-python-voice (push) Successful in 33s
CI / test-bqas (push) Successful in 31s
Migrates ACTION_TYPES (26+8 types), _NEGATIVE_PATTERNS (22), _ACTION_SYNONYMS (65), and _OBJECT_SYNONYMS (75) from hardcoded dicts to DB tables. - SQL migration: 003_action_object_ontology.sql (3 tables) - Migration scripts: f2_migrate_actions.py (34 types, 145 synonyms), f3_migrate_objects.py (75 objects) - OntologyRegistry cache: 5min TTL, raises RuntimeError if empty (safe fallback to dicts) - control_ontology.classify_action/get_phase delegate to DB with dict fallback - control_dedup.normalize_action/normalize_object delegate to DB with dict fallback - 25 new tests, 446 total pass, 0 regressions Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
101 lines
3.2 KiB
Python
101 lines
3.2 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
F3 Migration: Populate object_synonyms from hardcoded dict.
|
|
|
|
Source: _OBJECT_SYNONYMS (control_dedup.py) — 75 synonyms
|
|
|
|
Usage:
|
|
python3 scripts/f3_migrate_objects.py --dry-run
|
|
python3 scripts/f3_migrate_objects.py --db-host macmini
|
|
"""
|
|
|
|
import argparse
|
|
import sys
|
|
from pathlib import Path
|
|
|
|
sys.path.insert(0, str(Path(__file__).resolve().parent.parent))
|
|
|
|
from services.control_dedup import _OBJECT_SYNONYMS # noqa: E402
|
|
|
|
|
|
def build_rows() -> list[dict]:
|
|
"""Build object_synonyms rows."""
|
|
rows = []
|
|
for synonym, canonical in _OBJECT_SYNONYMS.items():
|
|
# Detect language (heuristic: German if contains umlauts or common DE words)
|
|
lang = "de"
|
|
lower = synonym.lower()
|
|
if all(c in "abcdefghijklmnopqrstuvwxyz0123456789 -_" for c in lower):
|
|
# Pure ASCII — likely English
|
|
lang = "en"
|
|
# Override for known German without umlauts
|
|
if lower in ("passwort", "kennwort", "zugangsdaten", "fernzugriff",
|
|
"sitzung", "firewall", "netzwerk", "vorfall",
|
|
"schwachstelle", "richtlinie", "schulung",
|
|
"protokoll", "datensicherung", "wiederherstellung"):
|
|
lang = "de"
|
|
|
|
rows.append({
|
|
"canonical_token": canonical,
|
|
"synonym": lower,
|
|
"language": lang,
|
|
"source": "migration",
|
|
})
|
|
return rows
|
|
|
|
|
|
def insert_via_sqlalchemy(rows: list[dict], db_host: str):
|
|
"""Insert rows using SQLAlchemy."""
|
|
from sqlalchemy import create_engine, text
|
|
|
|
url = "postgresql://breakpilot:breakpilot123@%s:5432/breakpilot_db" % db_host
|
|
engine = create_engine(url)
|
|
|
|
with engine.connect() as conn:
|
|
conn.execute(text("SET search_path TO compliance, public"))
|
|
|
|
inserted = 0
|
|
for row in rows:
|
|
conn.execute(
|
|
text("""
|
|
INSERT INTO object_synonyms
|
|
(canonical_token, synonym, language, source)
|
|
VALUES
|
|
(:canonical_token, :synonym, :language, :source)
|
|
ON CONFLICT (synonym, language) DO UPDATE SET
|
|
canonical_token = EXCLUDED.canonical_token,
|
|
source = EXCLUDED.source
|
|
"""),
|
|
row,
|
|
)
|
|
inserted += 1
|
|
|
|
conn.commit()
|
|
print("Inserted %d object_synonyms" % inserted)
|
|
|
|
|
|
def main():
|
|
parser = argparse.ArgumentParser(description="Migrate object synonyms")
|
|
parser.add_argument("--dry-run", action="store_true", help="Print stats only")
|
|
parser.add_argument("--db-host", default="localhost", help="PostgreSQL host")
|
|
args = parser.parse_args()
|
|
|
|
rows = build_rows()
|
|
print("Object synonyms: %d" % len(rows))
|
|
|
|
# Group by canonical
|
|
by_canonical = {}
|
|
for r in rows:
|
|
by_canonical[r["canonical_token"]] = by_canonical.get(r["canonical_token"], 0) + 1
|
|
print("Unique canonical tokens: %d" % len(by_canonical))
|
|
print("Top tokens: %s" % dict(sorted(by_canonical.items(), key=lambda x: -x[1])[:10]))
|
|
|
|
if args.dry_run:
|
|
return
|
|
|
|
insert_via_sqlalchemy(rows, args.db_host)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|