feat(controls): atom-grain path in get_controls_for_use_case

Reads compliance.atom_classification (Haiku pass: relevant + sub_topic +
canonical_obligation) when present -> precise, sub-topic-organized controls per
topic; master-grain seed stays as fallback for unprocessed topics. New optional
sub_topic filter + subtopic_counts facet + granularity flag in the response.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
Benjamin Admin
2026-06-14 09:47:49 +02:00
parent cf917ab733
commit 4d01e99ca1
2 changed files with 87 additions and 24 deletions
@@ -9,7 +9,7 @@ draw from ONE controls index instead of separate retrievals. Read-only.
from __future__ import annotations from __future__ import annotations
from typing import Any from typing import Any, Optional
from fastapi import APIRouter, Depends, Query from fastapi import APIRouter, Depends, Query
from sqlalchemy.orm import Session from sqlalchemy.orm import Session
@@ -39,11 +39,13 @@ async def list_use_cases(
@router.get("/use-cases/{use_case}/controls") @router.get("/use-cases/{use_case}/controls")
async def controls_for_use_case( async def controls_for_use_case(
use_case: str, use_case: str,
primary_only: bool = Query(False, description="Nur Primaerzweck-Mappings"), primary_only: bool = Query(False, description="master-grain Fallback: nur Primaerzweck"),
sub_topic: Optional[str] = Query(None, description="atom-grain: nur dieses Sub-Thema"),
limit: int = Query(50, ge=1, le=200), limit: int = Query(50, ge=1, le=200),
offset: int = Query(0, ge=0), offset: int = Query(0, ge=0),
svc: UseCaseControlsService = Depends(get_use_case_controls_service), svc: UseCaseControlsService = Depends(get_use_case_controls_service),
) -> dict[str, Any]: ) -> dict[str, Any]:
"""Controls mapped to a topic, ranked by the deterministic precision proxy.""" """Controls for a topic. Atom-grain (Haiku: relevant + sub_topic) wenn vorhanden,
sonst master-grain Seed."""
with translate_domain_errors(): with translate_domain_errors():
return svc.controls_for_use_case(use_case, primary_only, limit, offset) return svc.controls_for_use_case(use_case, primary_only, limit, offset, sub_topic)
@@ -72,6 +72,25 @@ _LIST_SQL = text("""
""") """)
# Atom-grain path: the one-time Haiku classification (atom_classification) gives
# per-atom relevance + sub-topic. Far more precise + organized than the master
# seed. Preferred whenever the use-case has been processed.
_ATOM_LIST_SQL = text("""
SELECT ac.control_uuid, ac.sub_topic, ac.canonical_obligation,
cc.control_id, cc.title, cc.objective, cc.severity,
(SELECT cpl.source_regulation FROM control_parent_links cpl
WHERE cpl.control_uuid = ac.control_uuid LIMIT 1) AS source_regulation
FROM atom_classification ac
JOIN canonical_controls cc ON cc.id = ac.control_uuid
WHERE ac.use_case = :uc AND ac.relevant = true
AND (:sub IS NULL OR ac.sub_topic = :sub)
ORDER BY ac.sub_topic NULLS LAST,
CASE cc.severity WHEN 'critical' THEN 0 WHEN 'high' THEN 1
WHEN 'medium' THEN 2 ELSE 3 END, cc.title
LIMIT :lim OFFSET :off
""")
class UseCaseControlsService: class UseCaseControlsService:
"""Topic → controls retrieval over the seeded use-case mappings.""" """Topic → controls retrieval over the seeded use-case mappings."""
@@ -107,27 +126,29 @@ class UseCaseControlsService:
primary_only: bool = False, primary_only: bool = False,
limit: int = 50, limit: int = 50,
offset: int = 0, offset: int = 0,
sub_topic: Optional[str] = None,
) -> dict[str, Any]: ) -> dict[str, Any]:
"""Ranked controls mapped to ``use_case`` (deduplicated master grain).""" """Controls for ``use_case``. Prefers the atom-grain Haiku classification
(precise + sub-topic-organized) when present; falls back to the
master-grain seed otherwise."""
if not is_valid_use_case(use_case): if not is_valid_use_case(use_case):
raise NotFoundError(f"Unknown use_case '{use_case}'") raise NotFoundError(f"Unknown use_case '{use_case}'")
uc = REGISTRY[use_case] uc = REGISTRY[use_case]
lim = min(max(int(limit), 1), 200) lim = min(max(int(limit), 1), 200)
off = max(int(offset), 0) off = max(int(offset), 0)
if self._has_atom_grain(use_case):
return self._atom_grain(uc, lim, off, sub_topic)
# --- master-grain fallback (recall seed) ---
count_sql = ( count_sql = (
"SELECT count(*) FROM mc_use_case_mappings WHERE use_case = :uc" "SELECT count(*) FROM mc_use_case_mappings WHERE use_case = :uc"
+ (" AND is_primary" if primary_only else "") + (" AND is_primary" if primary_only else "")
) )
total = self.db.execute(text(count_sql), {"uc": use_case}).scalar() or 0 total = self.db.execute(text(count_sql), {"uc": use_case}).scalar() or 0
rows = self.db.execute(_LIST_SQL, { rows = self.db.execute(_LIST_SQL, {
"uc": use_case, "uc": use_case, "primary_only": bool(primary_only), "lim": lim, "off": off,
"primary_only": bool(primary_only),
"lim": lim,
"off": off,
}).fetchall() }).fetchall()
controls = [ controls = [
{ {
"id": str(r.id), "id": str(r.id),
@@ -138,24 +159,64 @@ class UseCaseControlsService:
"category": r.category, "category": r.category,
"member_count": r.total_controls, "member_count": r.total_controls,
"is_primary": bool(r.is_primary), "is_primary": bool(r.is_primary),
"confidence": ( "confidence": float(r.confidence) if r.confidence is not None else None,
float(r.confidence) if r.confidence is not None else None
),
"primary_regulation": r.primary_regulation, "primary_regulation": r.primary_regulation,
"relevance": relevance_score( "relevance": relevance_score(
r.title, r.objective, uc.keyword_tokens, r.title, r.objective, uc.keyword_tokens, r.is_primary, r.confidence,
r.is_primary, r.confidence,
), ),
} }
for r in rows for r in rows
] ]
return { return {
"use_case": uc.key, "use_case": uc.key, "label": uc.label, "group": uc.group,
"label": uc.label, "granularity": "master", "total": int(total), "limit": lim, "offset": off,
"group": uc.group, "primary_only": bool(primary_only), "controls": controls,
"total": int(total), }
"limit": lim,
"offset": off, def _has_atom_grain(self, use_case: str) -> bool:
"primary_only": bool(primary_only), if self.db.execute(
"controls": controls, text("SELECT to_regclass('compliance.atom_classification')")
).scalar() is None:
return False
return (self.db.execute(
text("SELECT count(*) FROM atom_classification WHERE use_case = :uc"),
{"uc": use_case},
).scalar() or 0) > 0
def _atom_grain(
self, uc, lim: int, off: int, sub_topic: Optional[str],
) -> dict[str, Any]:
total = self.db.execute(text(
"SELECT count(*) FROM atom_classification "
"WHERE use_case = :uc AND relevant = true "
"AND (:sub IS NULL OR sub_topic = :sub)"
), {"uc": uc.key, "sub": sub_topic}).scalar() or 0
facet = {
row[0]: int(row[1])
for row in self.db.execute(text(
"SELECT COALESCE(sub_topic, '(none)'), count(*) "
"FROM atom_classification WHERE use_case = :uc AND relevant = true "
"GROUP BY 1 ORDER BY 2 DESC"
), {"uc": uc.key}).fetchall()
}
rows = self.db.execute(_ATOM_LIST_SQL, {
"uc": uc.key, "sub": sub_topic, "lim": lim, "off": off,
}).fetchall()
controls = [
{
"id": str(r.control_uuid),
"control_id": r.control_id,
"title": r.title,
"objective": r.objective,
"severity": r.severity,
"sub_topic": r.sub_topic,
"canonical_obligation": r.canonical_obligation,
"source_regulation": r.source_regulation,
}
for r in rows
]
return {
"use_case": uc.key, "label": uc.label, "group": uc.group,
"granularity": "atom", "total": int(total), "limit": lim, "offset": off,
"sub_topic": sub_topic, "subtopic_counts": facet, "controls": controls,
} }