"""Klassifiziert Master Controls auf Use Cases (n:m) + Verifikations-Methode. Stufe 1 — Seed (kein LLM, gratis): aus vorhandenen Member-Signalen (canonical_controls.scope_doc_type / .category / .verification_method / .evidence_type) via `use_case_registry.seed_classify`. Stufe 2 — LLM (Phase 3): Multi-Label gegen die Registry-Taxonomie. [TODO] Lauf im Container: docker exec bp-compliance-backend \ python /app/scripts/classify_mc_use_cases.py [--limit N] """ from __future__ import annotations import argparse import asyncio import os import asyncpg from compliance.data import use_case_registry as reg _AGG_SQL = """ SELECT mc.id AS mc_uuid, mc.master_control_id, array_agg(DISTINCT cc.scope_doc_type) AS scopes, array_agg(DISTINCT cc.category) AS categories, array_agg(DISTINCT cc.verification_method) AS vmethods, array_agg(DISTINCT cc.evidence_type) AS etypes FROM compliance.master_controls mc JOIN compliance.master_control_members mcm ON mcm.master_control_uuid = mc.id JOIN compliance.canonical_controls cc ON cc.id = mcm.control_uuid GROUP BY mc.id, mc.master_control_id """ async def run_seed(conn, limit: int = 0) -> dict: """Deterministischer Seed → mc_use_case_mappings + mc_verification. Idempotent (ON CONFLICT DO NOTHING); ueberschreibt 'manual' nie.""" sql = _AGG_SQL + (f" LIMIT {limit}" if limit > 0 else "") rows = await conn.fetch(sql) n_mc_with_uc = n_uc_rows = n_verif = 0 for r in rows: ucs, method = reg.seed_classify( r["scopes"], r["categories"], r["vmethods"], r["etypes"], ) for uc in ucs: await conn.execute( """INSERT INTO compliance.mc_use_case_mappings (master_control_uuid, master_control_id, use_case, method, confidence, rationale) VALUES ($1,$2,$3,'seed',0.6,'deterministic seed') ON CONFLICT (master_control_uuid, use_case) DO NOTHING""", r["mc_uuid"], r["master_control_id"], uc, ) n_uc_rows += 1 if ucs: n_mc_with_uc += 1 if method: await conn.execute( """INSERT INTO compliance.mc_verification (master_control_uuid, master_control_id, verification_method, method, confidence, rationale) VALUES ($1,$2,$3,'seed',0.6,'deterministic seed') ON CONFLICT (master_control_uuid) DO NOTHING""", r["mc_uuid"], r["master_control_id"], method, ) n_verif += 1 total = await conn.fetchval( "SELECT count(*) FROM compliance.mc_use_case_mappings") await conn.execute( """INSERT INTO compliance.mc_use_case_sync_state (registry_hash, stage, total_mappings, mcs_classified) VALUES ($1,'seed',$2,$3)""", reg.registry_hash(), total, n_mc_with_uc, ) return {"mcs": len(rows), "mcs_with_use_case": n_mc_with_uc, "use_case_rows": n_uc_rows, "verification_rows": n_verif} async def _main() -> None: ap = argparse.ArgumentParser() ap.add_argument("--limit", type=int, default=0) ap.add_argument("--with-llm", action="store_true", help="Phase 3 — noch nicht implementiert") args = ap.parse_args() if args.with_llm: raise SystemExit("LLM-Stufe (Phase 3) noch nicht implementiert.") conn = await asyncpg.connect(os.getenv("DATABASE_URL")) try: stats = await run_seed(conn, args.limit) finally: await conn.close() print("Seed fertig:", stats) if __name__ == "__main__": asyncio.run(_main())