refactor(backend/api): extract CanonicalControlService (Step 4 — file 6 of 18)

compliance/api/canonical_control_routes.py (514 LOC) -> 192 LOC thin routes + 316-line CanonicalControlService + 105-line schemas file. Canonical Control Library manages OWASP/NIST/ENISA-anchored security control frameworks and controls. Like company_profile_routes, this file uses raw SQL via sqlalchemy.text() because there are no SQLAlchemy models for canonical_control_frameworks or canonical_controls. Single-service split. Session management moved from bespoke `with SessionLocal() as db:` blocks to Depends(get_db) for consistency. Legacy test imports preserved via re-export (FrameworkResponse, ControlResponse, SimilarityCheckRequest, SimilarityCheckResponse, _control_row). Validation extracted to a module-level `_validate_control_input` helper so both create and update share the same checks. ValidationError (from compliance.domain) replaces raw HTTPException(400) raises. Verified: - 187/187 pytest (173 core + 14 canonical) pass - OpenAPI 360/484 unchanged - mypy compliance/ -> Success on 130 source files - canonical_control_routes.py 514 -> 192 LOC - Hard-cap violations: 13 -> 12 Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-04-07 19:53:55 +02:00
parent 4fa0dd6f6d
commit b850368ec9
5 changed files with 583 additions and 437 deletions
@@ -0,0 +1,316 @@
+# mypy: disable-error-code="arg-type,assignment,no-any-return,union-attr"
+"""
+Canonical Control Library service — framework + control CRUD with raw SQL.
+
+Phase 1 Step 4: extracted from ``compliance.api.canonical_control_routes``.
+Uses raw SQL via ``sqlalchemy.text()`` because the underlying tables
+(``canonical_control_frameworks``, ``canonical_controls``) have no
+SQLAlchemy model in this repo.
+"""
+
+import json
+import re
+from typing import Any, Optional
+
+from sqlalchemy import text
+from sqlalchemy.orm import Session
+
+from compliance.domain import (
+    ConflictError,
+    NotFoundError,
+    ValidationError,
+)
+from compliance.schemas.canonical_control import (
+    ControlCreateRequest,
+    ControlUpdateRequest,
+    SimilarityCheckRequest,
+)
+
+_VALID_SEVERITIES = ("low", "medium", "high", "critical")
+_CONTROL_ID_RE = re.compile(r"^[A-Z]{2,6}-[0-9]{3}$")
+_JSON_CONTROL_FIELDS = {
+    "scope", "requirements", "test_procedure", "evidence", "open_anchors", "tags",
+}
+
+_CONTROL_COLUMNS = """
+    id, framework_id, control_id, title, objective, rationale,
+    scope, requirements, test_procedure, evidence,
+    severity, risk_score, implementation_effort, evidence_confidence,
+    open_anchors, release_state, tags, created_at, updated_at
+"""
+
+
+def _control_row(r: Any) -> dict[str, Any]:
+    """Serialize a canonical_controls SELECT row to a response dict."""
+    return {
+        "id": str(r.id),
+        "framework_id": str(r.framework_id),
+        "control_id": r.control_id,
+        "title": r.title,
+        "objective": r.objective,
+        "rationale": r.rationale,
+        "scope": r.scope,
+        "requirements": r.requirements,
+        "test_procedure": r.test_procedure,
+        "evidence": r.evidence,
+        "severity": r.severity,
+        "risk_score": float(r.risk_score) if r.risk_score is not None else None,
+        "implementation_effort": r.implementation_effort,
+        "evidence_confidence": (
+            float(r.evidence_confidence) if r.evidence_confidence is not None else None
+        ),
+        "open_anchors": r.open_anchors,
+        "release_state": r.release_state,
+        "tags": r.tags or [],
+        "created_at": r.created_at.isoformat() if r.created_at else None,
+        "updated_at": r.updated_at.isoformat() if r.updated_at else None,
+    }
+
+
+def _framework_row(r: Any) -> dict[str, Any]:
+    return {
+        "id": str(r.id),
+        "framework_id": r.framework_id,
+        "name": r.name,
+        "version": r.version,
+        "description": r.description,
+        "owner": r.owner,
+        "policy_version": r.policy_version,
+        "release_state": r.release_state,
+        "created_at": r.created_at.isoformat() if r.created_at else None,
+        "updated_at": r.updated_at.isoformat() if r.updated_at else None,
+    }
+
+
+def _validate_control_input(
+    severity: Optional[str], risk_score: Optional[float], control_id: Optional[str] = None
+) -> None:
+    if control_id is not None and not _CONTROL_ID_RE.match(control_id):
+        raise ValidationError("control_id must match DOMAIN-NNN (e.g. AUTH-001)")
+    if severity is not None and severity not in _VALID_SEVERITIES:
+        raise ValidationError("severity must be low/medium/high/critical")
+    if risk_score is not None and not (0 <= risk_score <= 10):
+        raise ValidationError("risk_score must be 0..10")
+
+
+class CanonicalControlService:
+    """Business logic for the canonical control library."""
+
+    def __init__(self, db: Session) -> None:
+        self.db = db
+
+    # ------------------------------------------------------------------
+    # Frameworks
+    # ------------------------------------------------------------------
+
+    def list_frameworks(self) -> list[dict[str, Any]]:
+        rows = self.db.execute(
+            text("""
+                SELECT id, framework_id, name, version, description,
+                       owner, policy_version, release_state,
+                       created_at, updated_at
+                FROM canonical_control_frameworks
+                ORDER BY name
+            """)
+        ).fetchall()
+        return [_framework_row(r) for r in rows]
+
+    def get_framework(self, framework_id: str) -> dict[str, Any]:
+        row = self.db.execute(
+            text("""
+                SELECT id, framework_id, name, version, description,
+                       owner, policy_version, release_state,
+                       created_at, updated_at
+                FROM canonical_control_frameworks
+                WHERE framework_id = :fid
+            """),
+            {"fid": framework_id},
+        ).fetchone()
+        if not row:
+            raise NotFoundError("Framework not found")
+        return _framework_row(row)
+
+    def list_framework_controls(
+        self, framework_id: str, severity: Optional[str], release_state: Optional[str]
+    ) -> list[dict[str, Any]]:
+        fw = self.db.execute(
+            text("SELECT id FROM canonical_control_frameworks WHERE framework_id = :fid"),
+            {"fid": framework_id},
+        ).fetchone()
+        if not fw:
+            raise NotFoundError("Framework not found")
+
+        query = f"SELECT {_CONTROL_COLUMNS} FROM canonical_controls WHERE framework_id = :fw_id"
+        params: dict[str, Any] = {"fw_id": str(fw.id)}
+        if severity:
+            query += " AND severity = :sev"
+            params["sev"] = severity
+        if release_state:
+            query += " AND release_state = :rs"
+            params["rs"] = release_state
+        query += " ORDER BY control_id"
+        rows = self.db.execute(text(query), params).fetchall()
+        return [_control_row(r) for r in rows]
+
+    # ------------------------------------------------------------------
+    # Controls
+    # ------------------------------------------------------------------
+
+    def list_controls(
+        self,
+        severity: Optional[str],
+        domain: Optional[str],
+        release_state: Optional[str],
+    ) -> list[dict[str, Any]]:
+        query = f"SELECT {_CONTROL_COLUMNS} FROM canonical_controls WHERE 1=1"
+        params: dict[str, Any] = {}
+        if severity:
+            query += " AND severity = :sev"
+            params["sev"] = severity
+        if domain:
+            query += " AND LEFT(control_id, LENGTH(:dom)) = :dom"
+            params["dom"] = domain.upper()
+        if release_state:
+            query += " AND release_state = :rs"
+            params["rs"] = release_state
+        query += " ORDER BY control_id"
+        rows = self.db.execute(text(query), params).fetchall()
+        return [_control_row(r) for r in rows]
+
+    def get_control(self, control_id: str) -> dict[str, Any]:
+        row = self.db.execute(
+            text(f"SELECT {_CONTROL_COLUMNS} FROM canonical_controls WHERE control_id = :cid"),
+            {"cid": control_id.upper()},
+        ).fetchone()
+        if not row:
+            raise NotFoundError("Control not found")
+        return _control_row(row)
+
+    def create_control(self, body: ControlCreateRequest) -> dict[str, Any]:
+        _validate_control_input(body.severity, body.risk_score, body.control_id)
+
+        fw = self.db.execute(
+            text("SELECT id FROM canonical_control_frameworks WHERE framework_id = :fid"),
+            {"fid": body.framework_id},
+        ).fetchone()
+        if not fw:
+            raise NotFoundError(f"Framework '{body.framework_id}' not found")
+
+        existing = self.db.execute(
+            text(
+                "SELECT id FROM canonical_controls "
+                "WHERE framework_id = :fid AND control_id = :cid"
+            ),
+            {"fid": str(fw.id), "cid": body.control_id},
+        ).fetchone()
+        if existing:
+            raise ConflictError(f"Control '{body.control_id}' already exists")
+
+        row = self.db.execute(
+            text(f"""
+                INSERT INTO canonical_controls (
+                    framework_id, control_id, title, objective, rationale,
+                    scope, requirements, test_procedure, evidence,
+                    severity, risk_score, implementation_effort, evidence_confidence,
+                    open_anchors, release_state, tags
+                ) VALUES (
+                    :fw_id, :cid, :title, :objective, :rationale,
+                    :scope::jsonb, :requirements::jsonb, :test_procedure::jsonb, :evidence::jsonb,
+                    :severity, :risk_score, :effort, :confidence,
+                    :anchors::jsonb, :release_state, :tags::jsonb
+                )
+                RETURNING {_CONTROL_COLUMNS}
+            """),
+            {
+                "fw_id": str(fw.id),
+                "cid": body.control_id,
+                "title": body.title,
+                "objective": body.objective,
+                "rationale": body.rationale,
+                "scope": json.dumps(body.scope),
+                "requirements": json.dumps(body.requirements),
+                "test_procedure": json.dumps(body.test_procedure),
+                "evidence": json.dumps(body.evidence),
+                "severity": body.severity,
+                "risk_score": body.risk_score,
+                "effort": body.implementation_effort,
+                "confidence": body.evidence_confidence,
+                "anchors": json.dumps(body.open_anchors),
+                "release_state": body.release_state,
+                "tags": json.dumps(body.tags),
+            },
+        ).fetchone()
+        self.db.commit()
+        return _control_row(row)
+
+    def update_control(
+        self, control_id: str, body: ControlUpdateRequest
+    ) -> dict[str, Any]:
+        updates = body.dict(exclude_none=True)
+        if not updates:
+            raise ValidationError("No fields to update")
+
+        _validate_control_input(updates.get("severity"), updates.get("risk_score"))
+
+        set_parts: list[str] = []
+        params: dict[str, Any] = {"cid": control_id.upper()}
+        for key, val in updates.items():
+            if key in _JSON_CONTROL_FIELDS:
+                set_parts.append(f"{key} = :{key}::jsonb")
+                params[key] = json.dumps(val)
+            else:
+                set_parts.append(f"{key} = :{key}")
+                params[key] = val
+        set_parts.append("updated_at = NOW()")
+
+        row = self.db.execute(
+            text(f"""
+                UPDATE canonical_controls
+                SET {', '.join(set_parts)}
+                WHERE control_id = :cid
+                RETURNING {_CONTROL_COLUMNS}
+            """),
+            params,
+        ).fetchone()
+        if not row:
+            raise NotFoundError("Control not found")
+        self.db.commit()
+        return _control_row(row)
+
+    def delete_control(self, control_id: str) -> None:
+        result: Any = self.db.execute(
+            text("DELETE FROM canonical_controls WHERE control_id = :cid"),
+            {"cid": control_id.upper()},
+        )
+        if result.rowcount == 0:
+            raise NotFoundError("Control not found")
+        self.db.commit()
+
+    # ------------------------------------------------------------------
+    # Similarity + sources + licenses
+    # ------------------------------------------------------------------
+
+    async def similarity_check(
+        self, control_id: str, body: SimilarityCheckRequest
+    ) -> dict[str, Any]:
+        from compliance.services.similarity_detector import check_similarity
+
+        report = await check_similarity(body.source_text, body.candidate_text)
+        return {
+            "control_id": control_id.upper(),
+            "max_exact_run": report.max_exact_run,
+            "token_overlap": report.token_overlap,
+            "ngram_jaccard": report.ngram_jaccard,
+            "embedding_cosine": report.embedding_cosine,
+            "lcs_ratio": report.lcs_ratio,
+            "status": report.status,
+            "details": report.details,
+        }
+
+    def list_sources(self) -> Any:
+        from compliance.services.license_gate import get_source_permissions
+        return get_source_permissions(self.db)
+
+    def list_licenses(self) -> Any:
+        from compliance.services.license_gate import get_license_matrix
+        return get_license_matrix(self.db)