feat(training+controls): interactive video pipeline, training blocks, control generator, CE libraries
Some checks failed
CI/CD / go-lint (push) Has been skipped
CI/CD / python-lint (push) Has been skipped
CI/CD / nodejs-lint (push) Has been skipped
CI/CD / test-go-ai-compliance (push) Failing after 37s
CI/CD / test-python-backend-compliance (push) Successful in 39s
CI/CD / test-python-document-crawler (push) Successful in 26s
CI/CD / test-python-dsms-gateway (push) Successful in 23s
CI/CD / validate-canonical-controls (push) Successful in 12s
CI/CD / Deploy (push) Has been skipped
Some checks failed
CI/CD / go-lint (push) Has been skipped
CI/CD / python-lint (push) Has been skipped
CI/CD / nodejs-lint (push) Has been skipped
CI/CD / test-go-ai-compliance (push) Failing after 37s
CI/CD / test-python-backend-compliance (push) Successful in 39s
CI/CD / test-python-document-crawler (push) Successful in 26s
CI/CD / test-python-dsms-gateway (push) Successful in 23s
CI/CD / validate-canonical-controls (push) Successful in 12s
CI/CD / Deploy (push) Has been skipped
Interactive Training Videos (CP-TRAIN): - DB migration 022: training_checkpoints + checkpoint_progress tables - NarratorScript generation via Anthropic (AI Teacher persona, German) - TTS batch synthesis + interactive video pipeline (slides + checkpoint slides + FFmpeg) - 4 new API endpoints: generate-interactive, interactive-manifest, checkpoint submit, checkpoint progress - InteractiveVideoPlayer component (HTML5 Video, quiz overlay, seek protection, progress tracking) - Learner portal integration with automatic completion on all checkpoints passed - 30 new tests (handler validation + grading logic + manifest/progress + seek protection) Training Blocks: - Block generator, block store, block config CRUD + preview/generate endpoints - Migration 021: training_blocks schema Control Generator + Canonical Library: - Control generator routes + service enhancements - Canonical control library helpers, sidebar entry - Citation backfill service + tests - CE libraries data (hazard, protection, evidence, lifecycle, components) Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -22,6 +22,7 @@ Endpoints:
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import logging
|
||||
from typing import Any, Optional
|
||||
|
||||
@@ -277,8 +278,8 @@ async def list_framework_controls(
|
||||
query += " AND category = :cat"
|
||||
params["cat"] = category
|
||||
if target_audience:
|
||||
query += " AND target_audience = :ta"
|
||||
params["ta"] = target_audience
|
||||
query += " AND target_audience::jsonb @> (:ta)::jsonb"
|
||||
params["ta"] = json.dumps([target_audience])
|
||||
|
||||
query += " ORDER BY control_id"
|
||||
rows = db.execute(text(query), params).fetchall()
|
||||
@@ -329,8 +330,8 @@ async def list_controls(
|
||||
query += " AND category = :cat"
|
||||
params["cat"] = category
|
||||
if target_audience:
|
||||
query += " AND target_audience = :ta"
|
||||
params["ta"] = target_audience
|
||||
query += " AND target_audience LIKE :ta_pattern"
|
||||
params["ta_pattern"] = f'%"{target_audience}"%'
|
||||
if source:
|
||||
if source == "__none__":
|
||||
query += " AND (source_citation IS NULL OR source_citation->>'source' IS NULL OR source_citation->>'source' = '')"
|
||||
@@ -398,8 +399,8 @@ async def count_controls(
|
||||
query += " AND category = :cat"
|
||||
params["cat"] = category
|
||||
if target_audience:
|
||||
query += " AND target_audience = :ta"
|
||||
params["ta"] = target_audience
|
||||
query += " AND target_audience LIKE :ta_pattern"
|
||||
params["ta_pattern"] = f'%"{target_audience}"%'
|
||||
if source:
|
||||
if source == "__none__":
|
||||
query += " AND (source_citation IS NULL OR source_citation->>'source' IS NULL OR source_citation->>'source' = '')"
|
||||
|
||||
@@ -26,6 +26,13 @@ from compliance.services.control_generator import (
|
||||
ControlGeneratorPipeline,
|
||||
GeneratorConfig,
|
||||
ALL_COLLECTIONS,
|
||||
VALID_CATEGORIES,
|
||||
VALID_DOMAINS,
|
||||
_detect_category,
|
||||
_detect_domain,
|
||||
_llm_local,
|
||||
_parse_llm_json,
|
||||
CATEGORY_LIST_STR,
|
||||
)
|
||||
from compliance.services.citation_backfill import CitationBackfill, BackfillResult
|
||||
from compliance.services.rag_client import get_rag_client
|
||||
@@ -42,6 +49,7 @@ class GenerateRequest(BaseModel):
|
||||
domain: Optional[str] = None
|
||||
collections: Optional[List[str]] = None
|
||||
max_controls: int = 50
|
||||
max_chunks: int = 1000 # Default: process max 1000 chunks per job (respects document boundaries)
|
||||
batch_size: int = 5
|
||||
skip_web_search: bool = False
|
||||
dry_run: bool = False
|
||||
@@ -57,6 +65,7 @@ class GenerateResponse(BaseModel):
|
||||
controls_needs_review: int = 0
|
||||
controls_too_close: int = 0
|
||||
controls_duplicates_found: int = 0
|
||||
controls_qa_fixed: int = 0
|
||||
errors: list = []
|
||||
controls: list = []
|
||||
|
||||
@@ -132,6 +141,7 @@ async def start_generation(req: GenerateRequest):
|
||||
domain=req.domain,
|
||||
batch_size=req.batch_size,
|
||||
max_controls=req.max_controls,
|
||||
max_chunks=req.max_chunks,
|
||||
skip_web_search=req.skip_web_search,
|
||||
dry_run=req.dry_run,
|
||||
)
|
||||
@@ -338,6 +348,188 @@ async def review_control(control_id: str, req: ReviewRequest):
|
||||
db.close()
|
||||
|
||||
|
||||
class BulkReviewRequest(BaseModel):
|
||||
release_state: str # Filter: which controls to bulk-review
|
||||
action: str # "approve" or "reject"
|
||||
new_state: Optional[str] = None # Override target state
|
||||
|
||||
|
||||
@router.post("/generate/bulk-review")
|
||||
async def bulk_review(req: BulkReviewRequest):
|
||||
"""Bulk review all controls matching a release_state filter.
|
||||
|
||||
Example: reject all needs_review → sets them to deprecated.
|
||||
"""
|
||||
if req.release_state not in ("needs_review", "too_close", "duplicate"):
|
||||
raise HTTPException(status_code=400, detail=f"Invalid filter state: {req.release_state}")
|
||||
|
||||
if req.action == "approve":
|
||||
target = req.new_state or "draft"
|
||||
elif req.action == "reject":
|
||||
target = "deprecated"
|
||||
else:
|
||||
raise HTTPException(status_code=400, detail=f"Unknown action: {req.action}")
|
||||
|
||||
if target not in ("draft", "review", "approved", "deprecated", "needs_review"):
|
||||
raise HTTPException(status_code=400, detail=f"Invalid target state: {target}")
|
||||
|
||||
db = SessionLocal()
|
||||
try:
|
||||
result = db.execute(
|
||||
text("""
|
||||
UPDATE canonical_controls
|
||||
SET release_state = :target, updated_at = NOW()
|
||||
WHERE release_state = :source
|
||||
RETURNING control_id
|
||||
"""),
|
||||
{"source": req.release_state, "target": target},
|
||||
)
|
||||
affected = [row[0] for row in result]
|
||||
db.commit()
|
||||
|
||||
return {
|
||||
"action": req.action,
|
||||
"source_state": req.release_state,
|
||||
"target_state": target,
|
||||
"affected_count": len(affected),
|
||||
}
|
||||
finally:
|
||||
db.close()
|
||||
|
||||
|
||||
class QAReclassifyRequest(BaseModel):
|
||||
limit: int = 100 # How many controls to reclassify per run
|
||||
dry_run: bool = True # Preview only by default
|
||||
filter_category: Optional[str] = None # Only reclassify controls of this category
|
||||
filter_domain_prefix: Optional[str] = None # Only reclassify controls with this prefix
|
||||
|
||||
|
||||
@router.post("/generate/qa-reclassify")
|
||||
async def qa_reclassify(req: QAReclassifyRequest):
|
||||
"""Run QA reclassification on existing controls using local LLM.
|
||||
|
||||
Finds controls where keyword-detection disagrees with current category/domain,
|
||||
then uses Ollama to determine the correct classification.
|
||||
"""
|
||||
db = SessionLocal()
|
||||
try:
|
||||
# Load controls to check
|
||||
where_clauses = ["release_state NOT IN ('deprecated')"]
|
||||
params = {"limit": req.limit}
|
||||
if req.filter_category:
|
||||
where_clauses.append("category = :cat")
|
||||
params["cat"] = req.filter_category
|
||||
if req.filter_domain_prefix:
|
||||
where_clauses.append("control_id LIKE :prefix")
|
||||
params["prefix"] = f"{req.filter_domain_prefix}-%"
|
||||
|
||||
where_sql = " AND ".join(where_clauses)
|
||||
rows = db.execute(
|
||||
text(f"""
|
||||
SELECT id, control_id, title, objective, category,
|
||||
COALESCE(requirements::text, '[]') as requirements,
|
||||
COALESCE(source_original_text, '') as source_text
|
||||
FROM canonical_controls
|
||||
WHERE {where_sql}
|
||||
ORDER BY created_at DESC
|
||||
LIMIT :limit
|
||||
"""),
|
||||
params,
|
||||
).fetchall()
|
||||
|
||||
results = {"checked": 0, "mismatches": 0, "fixes": [], "errors": []}
|
||||
|
||||
for row in rows:
|
||||
results["checked"] += 1
|
||||
control_id = row[1]
|
||||
title = row[2]
|
||||
objective = row[3] or ""
|
||||
current_category = row[4]
|
||||
source_text = row[6] or objective
|
||||
|
||||
# Keyword detection on source text
|
||||
kw_category = _detect_category(source_text) or _detect_category(objective)
|
||||
kw_domain = _detect_domain(source_text)
|
||||
current_prefix = control_id.split("-")[0] if "-" in control_id else ""
|
||||
|
||||
# Skip if keyword detection agrees with current classification
|
||||
if kw_category == current_category and kw_domain == current_prefix:
|
||||
continue
|
||||
|
||||
results["mismatches"] += 1
|
||||
|
||||
# Ask Ollama to arbitrate
|
||||
try:
|
||||
reqs_text = ""
|
||||
try:
|
||||
reqs = json.loads(row[5])
|
||||
if isinstance(reqs, list):
|
||||
reqs_text = ", ".join(str(r) for r in reqs[:3])
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
prompt = f"""Pruefe dieses Compliance-Control auf korrekte Klassifizierung.
|
||||
|
||||
Titel: {title[:100]}
|
||||
Ziel: {objective[:200]}
|
||||
Anforderungen: {reqs_text[:200]}
|
||||
|
||||
Aktuelle Zuordnung: domain={current_prefix}, category={current_category}
|
||||
Keyword-Erkennung: domain={kw_domain}, category={kw_category}
|
||||
|
||||
Welche Zuordnung ist korrekt? Antworte NUR als JSON:
|
||||
{{"domain": "KUERZEL", "category": "kategorie_name", "reason": "kurze Begruendung"}}
|
||||
|
||||
Domains: AUTH=Authentifizierung, CRYP=Kryptographie, NET=Netzwerk, DATA=Datenschutz, LOG=Logging, ACC=Zugriffskontrolle, SEC=IT-Sicherheit, INC=Vorfallmanagement, AI=KI, COMP=Compliance, GOV=Behoerden, LAB=Arbeitsrecht, FIN=Finanzregulierung, TRD=Gewerbe, ENV=Umwelt, HLT=Gesundheit
|
||||
Kategorien: {CATEGORY_LIST_STR}"""
|
||||
|
||||
raw = await _llm_local(prompt)
|
||||
data = _parse_llm_json(raw)
|
||||
if not data:
|
||||
continue
|
||||
|
||||
qa_domain = data.get("domain", "").upper()
|
||||
qa_category = data.get("category", "")
|
||||
reason = data.get("reason", "")
|
||||
|
||||
fix_entry = {
|
||||
"control_id": control_id,
|
||||
"title": title[:80],
|
||||
"old_category": current_category,
|
||||
"old_domain": current_prefix,
|
||||
"new_category": qa_category if qa_category in VALID_CATEGORIES else current_category,
|
||||
"new_domain": qa_domain if qa_domain in VALID_DOMAINS else current_prefix,
|
||||
"reason": reason,
|
||||
}
|
||||
|
||||
category_changed = qa_category in VALID_CATEGORIES and qa_category != current_category
|
||||
|
||||
if category_changed and not req.dry_run:
|
||||
db.execute(
|
||||
text("""
|
||||
UPDATE canonical_controls
|
||||
SET category = :category, updated_at = NOW()
|
||||
WHERE id = :id
|
||||
"""),
|
||||
{"id": row[0], "category": qa_category},
|
||||
)
|
||||
fix_entry["applied"] = True
|
||||
else:
|
||||
fix_entry["applied"] = False
|
||||
|
||||
results["fixes"].append(fix_entry)
|
||||
|
||||
except Exception as e:
|
||||
results["errors"].append({"control_id": control_id, "error": str(e)})
|
||||
|
||||
if not req.dry_run:
|
||||
db.commit()
|
||||
|
||||
return results
|
||||
finally:
|
||||
db.close()
|
||||
|
||||
|
||||
@router.get("/generate/processed-stats")
|
||||
async def get_processed_stats():
|
||||
"""Get processing statistics per collection."""
|
||||
|
||||
@@ -39,7 +39,6 @@ router = APIRouter(tags=["extraction"])
|
||||
|
||||
ALL_COLLECTIONS = [
|
||||
"bp_compliance_ce", # BSI-TR documents — primary Prüfaspekte source
|
||||
"bp_compliance_recht", # Legal texts (GDPR, AI Act, ...)
|
||||
"bp_compliance_gesetze", # German laws
|
||||
"bp_compliance_datenschutz", # Data protection documents
|
||||
"bp_dsfa_corpus", # DSFA corpus
|
||||
|
||||
Reference in New Issue
Block a user