feat: Control Library UI, dedup migration, QA tooling, docs
Some checks failed
CI/CD / go-lint (push) Has been skipped
CI/CD / python-lint (push) Has been skipped
CI/CD / nodejs-lint (push) Has been skipped
CI/CD / test-go-ai-compliance (push) Failing after 31s
CI/CD / test-python-backend-compliance (push) Successful in 1m35s
CI/CD / test-python-document-crawler (push) Successful in 20s
CI/CD / test-python-dsms-gateway (push) Successful in 17s
CI/CD / validate-canonical-controls (push) Successful in 10s
CI/CD / Deploy (push) Has been skipped

- Control Library: parent control display, ObligationTypeBadge,
  GenerationStrategyBadge variants, evidence string fallback
- API: expose parent_control_uuid/id/title in canonical controls
- Fix: DSFA SQLAlchemy 2.0 Row._mapping compatibility
- Migration 074: control_parent_links + control_dedup_reviews tables
- QA scripts: benchmark, gap analysis, OSCAL import, OWASP cleanup,
  phase5 normalize, phase74 gap fill, sync_db, run_job
- Docs: dedup engine, RAG benchmark, lessons learned, pipeline docs

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
Benjamin Admin
2026-03-21 11:56:08 +01:00
parent c52dbdb8f1
commit 643b26618f
28 changed files with 5781 additions and 75 deletions

View File

@@ -174,6 +174,9 @@ _CONTROL_COLS = """id, framework_id, control_id, title, objective, rationale,
customer_visible, verification_method, category,
target_audience, generation_metadata, generation_strategy,
applicable_industries, applicable_company_size, scope_conditions,
parent_control_uuid, decomposition_method, pipeline_version,
(SELECT p.control_id FROM canonical_controls p WHERE p.id = canonical_controls.parent_control_uuid) AS parent_control_id,
(SELECT p.title FROM canonical_controls p WHERE p.id = canonical_controls.parent_control_uuid) AS parent_control_title,
created_at, updated_at"""
@@ -798,6 +801,11 @@ def _control_row(r) -> dict:
"applicable_industries": getattr(r, "applicable_industries", None),
"applicable_company_size": getattr(r, "applicable_company_size", None),
"scope_conditions": getattr(r, "scope_conditions", None),
"parent_control_uuid": str(r.parent_control_uuid) if getattr(r, "parent_control_uuid", None) else None,
"parent_control_id": getattr(r, "parent_control_id", None),
"parent_control_title": getattr(r, "parent_control_title", None),
"decomposition_method": getattr(r, "decomposition_method", None),
"pipeline_version": getattr(r, "pipeline_version", None),
"created_at": r.created_at.isoformat() if r.created_at else None,
"updated_at": r.updated_at.isoformat() if r.updated_at else None,
}

View File

@@ -200,6 +200,9 @@ def _get_tenant_id(tenant_id: Optional[str]) -> str:
def _dsfa_to_response(row) -> dict:
"""Convert a DB row to a JSON-serializable dict."""
import json
# SQLAlchemy 2.0: Row objects need ._mapping for string-key access
if hasattr(row, "_mapping"):
row = row._mapping
def _parse_arr(val):
"""Parse a JSONB array field → list."""
@@ -558,8 +561,9 @@ async def create_dsfa(
).fetchone()
db.flush()
row_id = row._mapping["id"] if hasattr(row, "_mapping") else row[0]
_log_audit(
db, tid, row["id"], "CREATE", request.created_by,
db, tid, row_id, "CREATE", request.created_by,
new_values={"title": request.title, "status": request.status},
)
db.commit()

View File

@@ -0,0 +1,73 @@
-- Migration 074: Control Dedup Engine — DB Schema
-- Supports the 4-stage dedup pipeline for atomic controls (Pass 0b).
--
-- Tables:
-- 1. control_parent_links — M:N parent linking (one control → many regulations)
-- 2. control_dedup_reviews — Review queue for borderline matches (0.85-0.92)
BEGIN;
-- =============================================================================
-- 1. Control Parent Links (M:N)
-- Enables "1 Control erfuellt 5 Gesetze" — the biggest USP.
-- An atomic control can have multiple parent controls from different
-- regulations/obligations. This replaces the 1:1 parent_control_uuid FK.
-- =============================================================================
CREATE TABLE IF NOT EXISTS control_parent_links (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
control_uuid UUID NOT NULL REFERENCES canonical_controls(id) ON DELETE CASCADE,
parent_control_uuid UUID NOT NULL REFERENCES canonical_controls(id) ON DELETE CASCADE,
link_type VARCHAR(30) NOT NULL DEFAULT 'decomposition'
CHECK (link_type IN ('decomposition', 'dedup_merge', 'manual', 'crosswalk')),
confidence NUMERIC(3,2) DEFAULT 1.0
CHECK (confidence >= 0 AND confidence <= 1),
source_regulation VARCHAR(100),
source_article VARCHAR(100),
obligation_candidate_id UUID REFERENCES obligation_candidates(id),
created_at TIMESTAMPTZ DEFAULT NOW(),
CONSTRAINT uq_parent_link UNIQUE (control_uuid, parent_control_uuid)
);
CREATE INDEX IF NOT EXISTS idx_cpl_control ON control_parent_links(control_uuid);
CREATE INDEX IF NOT EXISTS idx_cpl_parent ON control_parent_links(parent_control_uuid);
CREATE INDEX IF NOT EXISTS idx_cpl_type ON control_parent_links(link_type);
COMMENT ON TABLE control_parent_links IS
'M:N parent links — one atomic control can fulfill multiple regulations/obligations. USP: "1 Control erfuellt 5 Gesetze"';
-- =============================================================================
-- 2. Control Dedup Reviews
-- Queue for borderline matches (similarity 0.85-0.92) that need human review.
-- Reviewed entries get status updated to accepted/rejected.
-- =============================================================================
CREATE TABLE IF NOT EXISTS control_dedup_reviews (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
candidate_control_id VARCHAR(30) NOT NULL,
candidate_title TEXT NOT NULL,
candidate_objective TEXT,
matched_control_uuid UUID REFERENCES canonical_controls(id),
matched_control_id VARCHAR(30),
similarity_score NUMERIC(4,3) DEFAULT 0.0,
dedup_stage VARCHAR(40) NOT NULL,
dedup_details JSONB DEFAULT '{}',
parent_control_uuid UUID REFERENCES canonical_controls(id),
obligation_candidate_id UUID REFERENCES obligation_candidates(id),
review_status VARCHAR(20) DEFAULT 'pending'
CHECK (review_status IN ('pending', 'accepted_link', 'accepted_new', 'rejected')),
reviewed_by VARCHAR(100),
reviewed_at TIMESTAMPTZ,
review_notes TEXT,
created_at TIMESTAMPTZ DEFAULT NOW()
);
CREATE INDEX IF NOT EXISTS idx_cdr_status ON control_dedup_reviews(review_status);
CREATE INDEX IF NOT EXISTS idx_cdr_matched ON control_dedup_reviews(matched_control_uuid);
CREATE INDEX IF NOT EXISTS idx_cdr_parent ON control_dedup_reviews(parent_control_uuid);
CREATE INDEX IF NOT EXISTS idx_cdr_stage ON control_dedup_reviews(dedup_stage);
COMMENT ON TABLE control_dedup_reviews IS
'Review queue for borderline dedup matches (similarity 0.85-0.92). Human decides: link or new control.';
COMMIT;

View File

@@ -195,6 +195,11 @@ class TestControlRowConversion:
"release_state": "draft",
"tags": ["mfa"],
"generation_strategy": "ungrouped",
"parent_control_uuid": None,
"parent_control_id": None,
"parent_control_title": None,
"decomposition_method": None,
"pipeline_version": None,
"created_at": now,
"updated_at": now,
}