feat: Control Library UI, dedup migration, QA tooling, docs
Some checks failed
CI/CD / go-lint (push) Has been skipped
CI/CD / python-lint (push) Has been skipped
CI/CD / nodejs-lint (push) Has been skipped
CI/CD / test-go-ai-compliance (push) Failing after 31s
CI/CD / test-python-backend-compliance (push) Successful in 1m35s
CI/CD / test-python-document-crawler (push) Successful in 20s
CI/CD / test-python-dsms-gateway (push) Successful in 17s
CI/CD / validate-canonical-controls (push) Successful in 10s
CI/CD / Deploy (push) Has been skipped
Some checks failed
CI/CD / go-lint (push) Has been skipped
CI/CD / python-lint (push) Has been skipped
CI/CD / nodejs-lint (push) Has been skipped
CI/CD / test-go-ai-compliance (push) Failing after 31s
CI/CD / test-python-backend-compliance (push) Successful in 1m35s
CI/CD / test-python-document-crawler (push) Successful in 20s
CI/CD / test-python-dsms-gateway (push) Successful in 17s
CI/CD / validate-canonical-controls (push) Successful in 10s
CI/CD / Deploy (push) Has been skipped
- Control Library: parent control display, ObligationTypeBadge, GenerationStrategyBadge variants, evidence string fallback - API: expose parent_control_uuid/id/title in canonical controls - Fix: DSFA SQLAlchemy 2.0 Row._mapping compatibility - Migration 074: control_parent_links + control_dedup_reviews tables - QA scripts: benchmark, gap analysis, OSCAL import, OWASP cleanup, phase5 normalize, phase74 gap fill, sync_db, run_job - Docs: dedup engine, RAG benchmark, lessons learned, pipeline docs Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -174,6 +174,9 @@ _CONTROL_COLS = """id, framework_id, control_id, title, objective, rationale,
|
||||
customer_visible, verification_method, category,
|
||||
target_audience, generation_metadata, generation_strategy,
|
||||
applicable_industries, applicable_company_size, scope_conditions,
|
||||
parent_control_uuid, decomposition_method, pipeline_version,
|
||||
(SELECT p.control_id FROM canonical_controls p WHERE p.id = canonical_controls.parent_control_uuid) AS parent_control_id,
|
||||
(SELECT p.title FROM canonical_controls p WHERE p.id = canonical_controls.parent_control_uuid) AS parent_control_title,
|
||||
created_at, updated_at"""
|
||||
|
||||
|
||||
@@ -798,6 +801,11 @@ def _control_row(r) -> dict:
|
||||
"applicable_industries": getattr(r, "applicable_industries", None),
|
||||
"applicable_company_size": getattr(r, "applicable_company_size", None),
|
||||
"scope_conditions": getattr(r, "scope_conditions", None),
|
||||
"parent_control_uuid": str(r.parent_control_uuid) if getattr(r, "parent_control_uuid", None) else None,
|
||||
"parent_control_id": getattr(r, "parent_control_id", None),
|
||||
"parent_control_title": getattr(r, "parent_control_title", None),
|
||||
"decomposition_method": getattr(r, "decomposition_method", None),
|
||||
"pipeline_version": getattr(r, "pipeline_version", None),
|
||||
"created_at": r.created_at.isoformat() if r.created_at else None,
|
||||
"updated_at": r.updated_at.isoformat() if r.updated_at else None,
|
||||
}
|
||||
|
||||
@@ -200,6 +200,9 @@ def _get_tenant_id(tenant_id: Optional[str]) -> str:
|
||||
def _dsfa_to_response(row) -> dict:
|
||||
"""Convert a DB row to a JSON-serializable dict."""
|
||||
import json
|
||||
# SQLAlchemy 2.0: Row objects need ._mapping for string-key access
|
||||
if hasattr(row, "_mapping"):
|
||||
row = row._mapping
|
||||
|
||||
def _parse_arr(val):
|
||||
"""Parse a JSONB array field → list."""
|
||||
@@ -558,8 +561,9 @@ async def create_dsfa(
|
||||
).fetchone()
|
||||
|
||||
db.flush()
|
||||
row_id = row._mapping["id"] if hasattr(row, "_mapping") else row[0]
|
||||
_log_audit(
|
||||
db, tid, row["id"], "CREATE", request.created_by,
|
||||
db, tid, row_id, "CREATE", request.created_by,
|
||||
new_values={"title": request.title, "status": request.status},
|
||||
)
|
||||
db.commit()
|
||||
|
||||
73
backend-compliance/migrations/074_control_dedup.sql
Normal file
73
backend-compliance/migrations/074_control_dedup.sql
Normal file
@@ -0,0 +1,73 @@
|
||||
-- Migration 074: Control Dedup Engine — DB Schema
|
||||
-- Supports the 4-stage dedup pipeline for atomic controls (Pass 0b).
|
||||
--
|
||||
-- Tables:
|
||||
-- 1. control_parent_links — M:N parent linking (one control → many regulations)
|
||||
-- 2. control_dedup_reviews — Review queue for borderline matches (0.85-0.92)
|
||||
|
||||
BEGIN;
|
||||
|
||||
-- =============================================================================
|
||||
-- 1. Control Parent Links (M:N)
|
||||
-- Enables "1 Control erfuellt 5 Gesetze" — the biggest USP.
|
||||
-- An atomic control can have multiple parent controls from different
|
||||
-- regulations/obligations. This replaces the 1:1 parent_control_uuid FK.
|
||||
-- =============================================================================
|
||||
|
||||
CREATE TABLE IF NOT EXISTS control_parent_links (
|
||||
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
|
||||
control_uuid UUID NOT NULL REFERENCES canonical_controls(id) ON DELETE CASCADE,
|
||||
parent_control_uuid UUID NOT NULL REFERENCES canonical_controls(id) ON DELETE CASCADE,
|
||||
link_type VARCHAR(30) NOT NULL DEFAULT 'decomposition'
|
||||
CHECK (link_type IN ('decomposition', 'dedup_merge', 'manual', 'crosswalk')),
|
||||
confidence NUMERIC(3,2) DEFAULT 1.0
|
||||
CHECK (confidence >= 0 AND confidence <= 1),
|
||||
source_regulation VARCHAR(100),
|
||||
source_article VARCHAR(100),
|
||||
obligation_candidate_id UUID REFERENCES obligation_candidates(id),
|
||||
created_at TIMESTAMPTZ DEFAULT NOW(),
|
||||
CONSTRAINT uq_parent_link UNIQUE (control_uuid, parent_control_uuid)
|
||||
);
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_cpl_control ON control_parent_links(control_uuid);
|
||||
CREATE INDEX IF NOT EXISTS idx_cpl_parent ON control_parent_links(parent_control_uuid);
|
||||
CREATE INDEX IF NOT EXISTS idx_cpl_type ON control_parent_links(link_type);
|
||||
|
||||
COMMENT ON TABLE control_parent_links IS
|
||||
'M:N parent links — one atomic control can fulfill multiple regulations/obligations. USP: "1 Control erfuellt 5 Gesetze"';
|
||||
|
||||
-- =============================================================================
|
||||
-- 2. Control Dedup Reviews
|
||||
-- Queue for borderline matches (similarity 0.85-0.92) that need human review.
|
||||
-- Reviewed entries get status updated to accepted/rejected.
|
||||
-- =============================================================================
|
||||
|
||||
CREATE TABLE IF NOT EXISTS control_dedup_reviews (
|
||||
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
|
||||
candidate_control_id VARCHAR(30) NOT NULL,
|
||||
candidate_title TEXT NOT NULL,
|
||||
candidate_objective TEXT,
|
||||
matched_control_uuid UUID REFERENCES canonical_controls(id),
|
||||
matched_control_id VARCHAR(30),
|
||||
similarity_score NUMERIC(4,3) DEFAULT 0.0,
|
||||
dedup_stage VARCHAR(40) NOT NULL,
|
||||
dedup_details JSONB DEFAULT '{}',
|
||||
parent_control_uuid UUID REFERENCES canonical_controls(id),
|
||||
obligation_candidate_id UUID REFERENCES obligation_candidates(id),
|
||||
review_status VARCHAR(20) DEFAULT 'pending'
|
||||
CHECK (review_status IN ('pending', 'accepted_link', 'accepted_new', 'rejected')),
|
||||
reviewed_by VARCHAR(100),
|
||||
reviewed_at TIMESTAMPTZ,
|
||||
review_notes TEXT,
|
||||
created_at TIMESTAMPTZ DEFAULT NOW()
|
||||
);
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_cdr_status ON control_dedup_reviews(review_status);
|
||||
CREATE INDEX IF NOT EXISTS idx_cdr_matched ON control_dedup_reviews(matched_control_uuid);
|
||||
CREATE INDEX IF NOT EXISTS idx_cdr_parent ON control_dedup_reviews(parent_control_uuid);
|
||||
CREATE INDEX IF NOT EXISTS idx_cdr_stage ON control_dedup_reviews(dedup_stage);
|
||||
|
||||
COMMENT ON TABLE control_dedup_reviews IS
|
||||
'Review queue for borderline dedup matches (similarity 0.85-0.92). Human decides: link or new control.';
|
||||
|
||||
COMMIT;
|
||||
@@ -195,6 +195,11 @@ class TestControlRowConversion:
|
||||
"release_state": "draft",
|
||||
"tags": ["mfa"],
|
||||
"generation_strategy": "ungrouped",
|
||||
"parent_control_uuid": None,
|
||||
"parent_control_id": None,
|
||||
"parent_control_title": None,
|
||||
"decomposition_method": None,
|
||||
"pipeline_version": None,
|
||||
"created_at": now,
|
||||
"updated_at": now,
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user