merge: sync with origin/main, take upstream on conflicts
# Conflicts: # admin-compliance/lib/sdk/types.ts # admin-compliance/lib/sdk/vendor-compliance/types.ts
This commit is contained in:
@@ -10,13 +10,15 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
|
||||
&& rm -rf /var/lib/apt/lists/*
|
||||
|
||||
# Copy requirements first for better caching
|
||||
COPY requirements.txt .
|
||||
COPY requirements.txt requirements-reranker.txt ./
|
||||
|
||||
# Create virtual environment and install dependencies
|
||||
RUN python -m venv /opt/venv
|
||||
ENV PATH="/opt/venv/bin:$PATH"
|
||||
RUN pip install --no-cache-dir --upgrade pip && \
|
||||
pip install --no-cache-dir -r requirements.txt
|
||||
pip install --no-cache-dir -r requirements.txt && \
|
||||
pip install --no-cache-dir -r requirements-reranker.txt || \
|
||||
echo "WARNING: reranker dependencies not installed (torch/sentence-transformers)"
|
||||
|
||||
# ---- Runtime stage ----
|
||||
FROM python:3.12-slim-bookworm
|
||||
|
||||
@@ -6,6 +6,8 @@ from .routes import router
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
_failed_routers: dict[str, str] = {}
|
||||
|
||||
|
||||
def _safe_import_router(module_name: str, attr: str = "router"):
|
||||
"""Import a router module safely — log error but don't crash the whole app."""
|
||||
@@ -14,6 +16,7 @@ def _safe_import_router(module_name: str, attr: str = "router"):
|
||||
return getattr(mod, attr)
|
||||
except Exception as e:
|
||||
logger.error("Failed to import %s: %s", module_name, e)
|
||||
_failed_routers[module_name] = str(e)
|
||||
return None
|
||||
|
||||
|
||||
@@ -53,6 +56,13 @@ _ROUTER_MODULES = [
|
||||
"wiki_routes",
|
||||
"canonical_control_routes",
|
||||
"control_generator_routes",
|
||||
"crosswalk_routes",
|
||||
"process_task_routes",
|
||||
"evidence_check_routes",
|
||||
"vvt_library_routes",
|
||||
"tom_mapping_routes",
|
||||
"llm_audit_routes",
|
||||
"assertion_routes",
|
||||
]
|
||||
|
||||
_loaded_count = 0
|
||||
|
||||
@@ -0,0 +1,227 @@
|
||||
"""
|
||||
API routes for Assertion Engine (Anti-Fake-Evidence Phase 2).
|
||||
|
||||
Endpoints:
|
||||
- /assertions: CRUD for assertions
|
||||
- /assertions/extract: Automatic extraction from entity text
|
||||
- /assertions/summary: Stats (total assertions, facts, unverified)
|
||||
"""
|
||||
|
||||
import logging
|
||||
from datetime import datetime
|
||||
from typing import Optional
|
||||
|
||||
from fastapi import APIRouter, Depends, HTTPException, Query
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from classroom_engine.database import get_db
|
||||
|
||||
from ..db.models import AssertionDB
|
||||
from ..services.assertion_engine import extract_assertions
|
||||
from .schemas import (
|
||||
AssertionCreate,
|
||||
AssertionUpdate,
|
||||
AssertionResponse,
|
||||
AssertionListResponse,
|
||||
AssertionSummaryResponse,
|
||||
AssertionExtractRequest,
|
||||
)
|
||||
from .audit_trail_utils import log_audit_trail, generate_id
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
router = APIRouter(tags=["compliance-assertions"])
|
||||
|
||||
|
||||
def _build_assertion_response(a: AssertionDB) -> AssertionResponse:
|
||||
return AssertionResponse(
|
||||
id=a.id,
|
||||
tenant_id=a.tenant_id,
|
||||
entity_type=a.entity_type,
|
||||
entity_id=a.entity_id,
|
||||
sentence_text=a.sentence_text,
|
||||
sentence_index=a.sentence_index,
|
||||
assertion_type=a.assertion_type,
|
||||
evidence_ids=a.evidence_ids or [],
|
||||
confidence=a.confidence or 0.0,
|
||||
normative_tier=a.normative_tier,
|
||||
verified_by=a.verified_by,
|
||||
verified_at=a.verified_at,
|
||||
created_at=a.created_at,
|
||||
updated_at=a.updated_at,
|
||||
)
|
||||
|
||||
|
||||
@router.post("/assertions", response_model=AssertionResponse)
|
||||
async def create_assertion(
|
||||
data: AssertionCreate,
|
||||
tenant_id: Optional[str] = Query(None),
|
||||
db: Session = Depends(get_db),
|
||||
):
|
||||
"""Create a single assertion manually."""
|
||||
a = AssertionDB(
|
||||
id=generate_id(),
|
||||
tenant_id=tenant_id,
|
||||
entity_type=data.entity_type,
|
||||
entity_id=data.entity_id,
|
||||
sentence_text=data.sentence_text,
|
||||
assertion_type=data.assertion_type or "assertion",
|
||||
evidence_ids=data.evidence_ids or [],
|
||||
normative_tier=data.normative_tier,
|
||||
)
|
||||
db.add(a)
|
||||
db.commit()
|
||||
db.refresh(a)
|
||||
return _build_assertion_response(a)
|
||||
|
||||
|
||||
@router.get("/assertions", response_model=AssertionListResponse)
|
||||
async def list_assertions(
|
||||
entity_type: Optional[str] = Query(None),
|
||||
entity_id: Optional[str] = Query(None),
|
||||
assertion_type: Optional[str] = Query(None),
|
||||
tenant_id: Optional[str] = Query(None),
|
||||
limit: int = Query(100, ge=1, le=500),
|
||||
db: Session = Depends(get_db),
|
||||
):
|
||||
"""List assertions with optional filters."""
|
||||
query = db.query(AssertionDB)
|
||||
if entity_type:
|
||||
query = query.filter(AssertionDB.entity_type == entity_type)
|
||||
if entity_id:
|
||||
query = query.filter(AssertionDB.entity_id == entity_id)
|
||||
if assertion_type:
|
||||
query = query.filter(AssertionDB.assertion_type == assertion_type)
|
||||
if tenant_id:
|
||||
query = query.filter(AssertionDB.tenant_id == tenant_id)
|
||||
|
||||
total = query.count()
|
||||
records = query.order_by(AssertionDB.sentence_index.asc()).limit(limit).all()
|
||||
|
||||
return AssertionListResponse(
|
||||
assertions=[_build_assertion_response(a) for a in records],
|
||||
total=total,
|
||||
)
|
||||
|
||||
|
||||
@router.get("/assertions/summary", response_model=AssertionSummaryResponse)
|
||||
async def assertion_summary(
|
||||
tenant_id: Optional[str] = Query(None),
|
||||
entity_type: Optional[str] = Query(None),
|
||||
entity_id: Optional[str] = Query(None),
|
||||
db: Session = Depends(get_db),
|
||||
):
|
||||
"""Summary stats: total assertions, facts, rationale, unverified."""
|
||||
query = db.query(AssertionDB)
|
||||
if tenant_id:
|
||||
query = query.filter(AssertionDB.tenant_id == tenant_id)
|
||||
if entity_type:
|
||||
query = query.filter(AssertionDB.entity_type == entity_type)
|
||||
if entity_id:
|
||||
query = query.filter(AssertionDB.entity_id == entity_id)
|
||||
|
||||
all_records = query.all()
|
||||
|
||||
total = len(all_records)
|
||||
facts = sum(1 for a in all_records if a.assertion_type == "fact")
|
||||
rationale = sum(1 for a in all_records if a.assertion_type == "rationale")
|
||||
unverified = sum(1 for a in all_records if a.assertion_type == "assertion" and not a.verified_by)
|
||||
|
||||
return AssertionSummaryResponse(
|
||||
total_assertions=total,
|
||||
total_facts=facts,
|
||||
total_rationale=rationale,
|
||||
unverified_count=unverified,
|
||||
)
|
||||
|
||||
|
||||
@router.get("/assertions/{assertion_id}", response_model=AssertionResponse)
|
||||
async def get_assertion(
|
||||
assertion_id: str,
|
||||
db: Session = Depends(get_db),
|
||||
):
|
||||
"""Get a single assertion by ID."""
|
||||
a = db.query(AssertionDB).filter(AssertionDB.id == assertion_id).first()
|
||||
if not a:
|
||||
raise HTTPException(status_code=404, detail=f"Assertion {assertion_id} not found")
|
||||
return _build_assertion_response(a)
|
||||
|
||||
|
||||
@router.put("/assertions/{assertion_id}", response_model=AssertionResponse)
|
||||
async def update_assertion(
|
||||
assertion_id: str,
|
||||
data: AssertionUpdate,
|
||||
db: Session = Depends(get_db),
|
||||
):
|
||||
"""Update an assertion (e.g. link evidence, change type)."""
|
||||
a = db.query(AssertionDB).filter(AssertionDB.id == assertion_id).first()
|
||||
if not a:
|
||||
raise HTTPException(status_code=404, detail=f"Assertion {assertion_id} not found")
|
||||
|
||||
update_fields = data.model_dump(exclude_unset=True)
|
||||
for key, value in update_fields.items():
|
||||
setattr(a, key, value)
|
||||
a.updated_at = datetime.utcnow()
|
||||
db.commit()
|
||||
db.refresh(a)
|
||||
return _build_assertion_response(a)
|
||||
|
||||
|
||||
@router.post("/assertions/{assertion_id}/verify", response_model=AssertionResponse)
|
||||
async def verify_assertion(
|
||||
assertion_id: str,
|
||||
verified_by: str = Query(...),
|
||||
db: Session = Depends(get_db),
|
||||
):
|
||||
"""Mark an assertion as verified fact."""
|
||||
a = db.query(AssertionDB).filter(AssertionDB.id == assertion_id).first()
|
||||
if not a:
|
||||
raise HTTPException(status_code=404, detail=f"Assertion {assertion_id} not found")
|
||||
|
||||
a.assertion_type = "fact"
|
||||
a.verified_by = verified_by
|
||||
a.verified_at = datetime.utcnow()
|
||||
a.updated_at = datetime.utcnow()
|
||||
db.commit()
|
||||
db.refresh(a)
|
||||
return _build_assertion_response(a)
|
||||
|
||||
|
||||
@router.post("/assertions/extract", response_model=AssertionListResponse)
|
||||
async def extract_assertions_endpoint(
|
||||
data: AssertionExtractRequest,
|
||||
tenant_id: Optional[str] = Query(None),
|
||||
db: Session = Depends(get_db),
|
||||
):
|
||||
"""Extract assertions from free text and persist them."""
|
||||
extracted = extract_assertions(
|
||||
text=data.text,
|
||||
entity_type=data.entity_type,
|
||||
entity_id=data.entity_id,
|
||||
tenant_id=tenant_id,
|
||||
)
|
||||
|
||||
created = []
|
||||
for item in extracted:
|
||||
a = AssertionDB(
|
||||
id=generate_id(),
|
||||
tenant_id=item["tenant_id"],
|
||||
entity_type=item["entity_type"],
|
||||
entity_id=item["entity_id"],
|
||||
sentence_text=item["sentence_text"],
|
||||
sentence_index=item["sentence_index"],
|
||||
assertion_type=item["assertion_type"],
|
||||
evidence_ids=item["evidence_ids"],
|
||||
normative_tier=item.get("normative_tier"),
|
||||
confidence=item.get("confidence", 0.0),
|
||||
)
|
||||
db.add(a)
|
||||
created.append(a)
|
||||
|
||||
db.commit()
|
||||
for a in created:
|
||||
db.refresh(a)
|
||||
|
||||
return AssertionListResponse(
|
||||
assertions=[_build_assertion_response(a) for a in created],
|
||||
total=len(created),
|
||||
)
|
||||
@@ -0,0 +1,53 @@
|
||||
"""Shared audit trail utilities.
|
||||
|
||||
Extracted from isms_routes.py for reuse across evidence, control,
|
||||
and assertion routes.
|
||||
"""
|
||||
|
||||
import hashlib
|
||||
import uuid
|
||||
from datetime import datetime
|
||||
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from ..db.models import AuditTrailDB
|
||||
|
||||
|
||||
def generate_id() -> str:
|
||||
"""Generate a UUID string."""
|
||||
return str(uuid.uuid4())
|
||||
|
||||
|
||||
def create_signature(data: str) -> str:
|
||||
"""Create SHA-256 signature."""
|
||||
return hashlib.sha256(data.encode()).hexdigest()
|
||||
|
||||
|
||||
def log_audit_trail(
|
||||
db: Session,
|
||||
entity_type: str,
|
||||
entity_id: str,
|
||||
entity_name: str,
|
||||
action: str,
|
||||
performed_by: str,
|
||||
field_changed: str = None,
|
||||
old_value: str = None,
|
||||
new_value: str = None,
|
||||
change_summary: str = None,
|
||||
):
|
||||
"""Log an entry to the audit trail."""
|
||||
trail = AuditTrailDB(
|
||||
id=generate_id(),
|
||||
entity_type=entity_type,
|
||||
entity_id=entity_id,
|
||||
entity_name=entity_name,
|
||||
action=action,
|
||||
field_changed=field_changed,
|
||||
old_value=old_value,
|
||||
new_value=new_value,
|
||||
change_summary=change_summary,
|
||||
performed_by=performed_by,
|
||||
performed_at=datetime.utcnow(),
|
||||
checksum=create_signature(f"{entity_type}|{entity_id}|{action}|{performed_by}"),
|
||||
)
|
||||
db.add(trail)
|
||||
File diff suppressed because it is too large
Load Diff
@@ -12,6 +12,7 @@ Endpoints:
|
||||
POST /v1/canonical/blocked-sources/cleanup — Start cleanup workflow
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import json
|
||||
import logging
|
||||
from typing import Optional, List
|
||||
@@ -25,7 +26,16 @@ from compliance.services.control_generator import (
|
||||
ControlGeneratorPipeline,
|
||||
GeneratorConfig,
|
||||
ALL_COLLECTIONS,
|
||||
VALID_CATEGORIES,
|
||||
VALID_DOMAINS,
|
||||
_classify_regulation,
|
||||
_detect_category,
|
||||
_detect_domain,
|
||||
_llm_local,
|
||||
_parse_llm_json,
|
||||
CATEGORY_LIST_STR,
|
||||
)
|
||||
from compliance.services.citation_backfill import CitationBackfill, BackfillResult
|
||||
from compliance.services.rag_client import get_rag_client
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
@@ -40,9 +50,12 @@ class GenerateRequest(BaseModel):
|
||||
domain: Optional[str] = None
|
||||
collections: Optional[List[str]] = None
|
||||
max_controls: int = 50
|
||||
max_chunks: int = 1000 # Default: process max 1000 chunks per job (respects document boundaries)
|
||||
batch_size: int = 5
|
||||
skip_web_search: bool = False
|
||||
dry_run: bool = False
|
||||
regulation_filter: Optional[List[str]] = None # Only process these regulation_code prefixes
|
||||
skip_prefilter: bool = False # Skip local LLM pre-filter, send all chunks to API
|
||||
|
||||
|
||||
class GenerateResponse(BaseModel):
|
||||
@@ -55,6 +68,7 @@ class GenerateResponse(BaseModel):
|
||||
controls_needs_review: int = 0
|
||||
controls_too_close: int = 0
|
||||
controls_duplicates_found: int = 0
|
||||
controls_qa_fixed: int = 0
|
||||
errors: list = []
|
||||
controls: list = []
|
||||
|
||||
@@ -89,42 +103,111 @@ class BlockedSourceResponse(BaseModel):
|
||||
# ENDPOINTS
|
||||
# =============================================================================
|
||||
|
||||
async def _run_pipeline_background(config: GeneratorConfig, job_id: str):
|
||||
"""Run the pipeline in the background. Uses its own DB session."""
|
||||
db = SessionLocal()
|
||||
try:
|
||||
config.existing_job_id = job_id
|
||||
pipeline = ControlGeneratorPipeline(db=db, rag_client=get_rag_client())
|
||||
result = await pipeline.run(config)
|
||||
logger.info(
|
||||
"Background generation job %s completed: %d controls from %d chunks",
|
||||
job_id, result.controls_generated, result.total_chunks_scanned,
|
||||
)
|
||||
except Exception as e:
|
||||
logger.error("Background generation job %s failed: %s", job_id, e)
|
||||
# Update job as failed
|
||||
try:
|
||||
db.execute(
|
||||
text("""
|
||||
UPDATE canonical_generation_jobs
|
||||
SET status = 'failed', errors = :errors, completed_at = NOW()
|
||||
WHERE id = CAST(:job_id AS uuid)
|
||||
"""),
|
||||
{"job_id": job_id, "errors": json.dumps([str(e)])},
|
||||
)
|
||||
db.commit()
|
||||
except Exception:
|
||||
pass
|
||||
finally:
|
||||
db.close()
|
||||
|
||||
|
||||
@router.post("/generate", response_model=GenerateResponse)
|
||||
async def start_generation(req: GenerateRequest):
|
||||
"""Start a control generation run."""
|
||||
"""Start a control generation run (runs in background).
|
||||
|
||||
Returns immediately with job_id. Use GET /generate/status/{job_id} to poll progress.
|
||||
"""
|
||||
config = GeneratorConfig(
|
||||
collections=req.collections,
|
||||
domain=req.domain,
|
||||
batch_size=req.batch_size,
|
||||
max_controls=req.max_controls,
|
||||
max_chunks=req.max_chunks,
|
||||
skip_web_search=req.skip_web_search,
|
||||
dry_run=req.dry_run,
|
||||
regulation_filter=req.regulation_filter,
|
||||
skip_prefilter=req.skip_prefilter,
|
||||
)
|
||||
|
||||
if req.dry_run:
|
||||
# Dry run: execute synchronously and return controls
|
||||
db = SessionLocal()
|
||||
try:
|
||||
pipeline = ControlGeneratorPipeline(db=db, rag_client=get_rag_client())
|
||||
result = await pipeline.run(config)
|
||||
return GenerateResponse(
|
||||
job_id=result.job_id,
|
||||
status=result.status,
|
||||
message=f"Dry run: {result.controls_generated} controls from {result.total_chunks_scanned} chunks",
|
||||
total_chunks_scanned=result.total_chunks_scanned,
|
||||
controls_generated=result.controls_generated,
|
||||
controls_verified=result.controls_verified,
|
||||
controls_needs_review=result.controls_needs_review,
|
||||
controls_too_close=result.controls_too_close,
|
||||
controls_duplicates_found=result.controls_duplicates_found,
|
||||
errors=result.errors,
|
||||
controls=result.controls,
|
||||
)
|
||||
except Exception as e:
|
||||
logger.error("Dry run failed: %s", e)
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
finally:
|
||||
db.close()
|
||||
|
||||
# Create job record first so we can return the ID
|
||||
db = SessionLocal()
|
||||
try:
|
||||
pipeline = ControlGeneratorPipeline(db=db, rag_client=get_rag_client())
|
||||
result = await pipeline.run(config)
|
||||
|
||||
return GenerateResponse(
|
||||
job_id=result.job_id,
|
||||
status=result.status,
|
||||
message=f"Generated {result.controls_generated} controls from {result.total_chunks_scanned} chunks",
|
||||
total_chunks_scanned=result.total_chunks_scanned,
|
||||
controls_generated=result.controls_generated,
|
||||
controls_verified=result.controls_verified,
|
||||
controls_needs_review=result.controls_needs_review,
|
||||
controls_too_close=result.controls_too_close,
|
||||
controls_duplicates_found=result.controls_duplicates_found,
|
||||
errors=result.errors,
|
||||
controls=result.controls if req.dry_run else [],
|
||||
result = db.execute(
|
||||
text("""
|
||||
INSERT INTO canonical_generation_jobs (status, config)
|
||||
VALUES ('running', :config)
|
||||
RETURNING id
|
||||
"""),
|
||||
{"config": json.dumps(config.model_dump())},
|
||||
)
|
||||
db.commit()
|
||||
row = result.fetchone()
|
||||
job_id = str(row[0]) if row else None
|
||||
except Exception as e:
|
||||
logger.error("Generation failed: %s", e)
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
logger.error("Failed to create job: %s", e)
|
||||
raise HTTPException(status_code=500, detail=f"Failed to create job: {e}")
|
||||
finally:
|
||||
db.close()
|
||||
|
||||
if not job_id:
|
||||
raise HTTPException(status_code=500, detail="Failed to create job record")
|
||||
|
||||
# Launch pipeline in background
|
||||
asyncio.create_task(_run_pipeline_background(config, job_id))
|
||||
|
||||
return GenerateResponse(
|
||||
job_id=job_id,
|
||||
status="running",
|
||||
message="Generation started in background. Poll /generate/status/{job_id} for progress.",
|
||||
)
|
||||
|
||||
|
||||
@router.get("/generate/status/{job_id}")
|
||||
async def get_job_status(job_id: str):
|
||||
@@ -132,7 +215,7 @@ async def get_job_status(job_id: str):
|
||||
db = SessionLocal()
|
||||
try:
|
||||
result = db.execute(
|
||||
text("SELECT * FROM canonical_generation_jobs WHERE id = :id::uuid"),
|
||||
text("SELECT * FROM canonical_generation_jobs WHERE id = CAST(:id AS uuid)"),
|
||||
{"id": job_id},
|
||||
)
|
||||
row = result.fetchone()
|
||||
@@ -270,6 +353,188 @@ async def review_control(control_id: str, req: ReviewRequest):
|
||||
db.close()
|
||||
|
||||
|
||||
class BulkReviewRequest(BaseModel):
|
||||
release_state: str # Filter: which controls to bulk-review
|
||||
action: str # "approve" or "reject"
|
||||
new_state: Optional[str] = None # Override target state
|
||||
|
||||
|
||||
@router.post("/generate/bulk-review")
|
||||
async def bulk_review(req: BulkReviewRequest):
|
||||
"""Bulk review all controls matching a release_state filter.
|
||||
|
||||
Example: reject all needs_review → sets them to deprecated.
|
||||
"""
|
||||
if req.release_state not in ("needs_review", "too_close", "duplicate"):
|
||||
raise HTTPException(status_code=400, detail=f"Invalid filter state: {req.release_state}")
|
||||
|
||||
if req.action == "approve":
|
||||
target = req.new_state or "draft"
|
||||
elif req.action == "reject":
|
||||
target = "deprecated"
|
||||
else:
|
||||
raise HTTPException(status_code=400, detail=f"Unknown action: {req.action}")
|
||||
|
||||
if target not in ("draft", "review", "approved", "deprecated", "needs_review"):
|
||||
raise HTTPException(status_code=400, detail=f"Invalid target state: {target}")
|
||||
|
||||
db = SessionLocal()
|
||||
try:
|
||||
result = db.execute(
|
||||
text("""
|
||||
UPDATE canonical_controls
|
||||
SET release_state = :target, updated_at = NOW()
|
||||
WHERE release_state = :source
|
||||
RETURNING control_id
|
||||
"""),
|
||||
{"source": req.release_state, "target": target},
|
||||
)
|
||||
affected = [row[0] for row in result]
|
||||
db.commit()
|
||||
|
||||
return {
|
||||
"action": req.action,
|
||||
"source_state": req.release_state,
|
||||
"target_state": target,
|
||||
"affected_count": len(affected),
|
||||
}
|
||||
finally:
|
||||
db.close()
|
||||
|
||||
|
||||
class QAReclassifyRequest(BaseModel):
|
||||
limit: int = 100 # How many controls to reclassify per run
|
||||
dry_run: bool = True # Preview only by default
|
||||
filter_category: Optional[str] = None # Only reclassify controls of this category
|
||||
filter_domain_prefix: Optional[str] = None # Only reclassify controls with this prefix
|
||||
|
||||
|
||||
@router.post("/generate/qa-reclassify")
|
||||
async def qa_reclassify(req: QAReclassifyRequest):
|
||||
"""Run QA reclassification on existing controls using local LLM.
|
||||
|
||||
Finds controls where keyword-detection disagrees with current category/domain,
|
||||
then uses Ollama to determine the correct classification.
|
||||
"""
|
||||
db = SessionLocal()
|
||||
try:
|
||||
# Load controls to check
|
||||
where_clauses = ["release_state NOT IN ('deprecated')"]
|
||||
params = {"limit": req.limit}
|
||||
if req.filter_category:
|
||||
where_clauses.append("category = :cat")
|
||||
params["cat"] = req.filter_category
|
||||
if req.filter_domain_prefix:
|
||||
where_clauses.append("control_id LIKE :prefix")
|
||||
params["prefix"] = f"{req.filter_domain_prefix}-%"
|
||||
|
||||
where_sql = " AND ".join(where_clauses)
|
||||
rows = db.execute(
|
||||
text(f"""
|
||||
SELECT id, control_id, title, objective, category,
|
||||
COALESCE(requirements::text, '[]') as requirements,
|
||||
COALESCE(source_original_text, '') as source_text
|
||||
FROM canonical_controls
|
||||
WHERE {where_sql}
|
||||
ORDER BY created_at DESC
|
||||
LIMIT :limit
|
||||
"""),
|
||||
params,
|
||||
).fetchall()
|
||||
|
||||
results = {"checked": 0, "mismatches": 0, "fixes": [], "errors": []}
|
||||
|
||||
for row in rows:
|
||||
results["checked"] += 1
|
||||
control_id = row[1]
|
||||
title = row[2]
|
||||
objective = row[3] or ""
|
||||
current_category = row[4]
|
||||
source_text = row[6] or objective
|
||||
|
||||
# Keyword detection on source text
|
||||
kw_category = _detect_category(source_text) or _detect_category(objective)
|
||||
kw_domain = _detect_domain(source_text)
|
||||
current_prefix = control_id.split("-")[0] if "-" in control_id else ""
|
||||
|
||||
# Skip if keyword detection agrees with current classification
|
||||
if kw_category == current_category and kw_domain == current_prefix:
|
||||
continue
|
||||
|
||||
results["mismatches"] += 1
|
||||
|
||||
# Ask Ollama to arbitrate
|
||||
try:
|
||||
reqs_text = ""
|
||||
try:
|
||||
reqs = json.loads(row[5])
|
||||
if isinstance(reqs, list):
|
||||
reqs_text = ", ".join(str(r) for r in reqs[:3])
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
prompt = f"""Pruefe dieses Compliance-Control auf korrekte Klassifizierung.
|
||||
|
||||
Titel: {title[:100]}
|
||||
Ziel: {objective[:200]}
|
||||
Anforderungen: {reqs_text[:200]}
|
||||
|
||||
Aktuelle Zuordnung: domain={current_prefix}, category={current_category}
|
||||
Keyword-Erkennung: domain={kw_domain}, category={kw_category}
|
||||
|
||||
Welche Zuordnung ist korrekt? Antworte NUR als JSON:
|
||||
{{"domain": "KUERZEL", "category": "kategorie_name", "reason": "kurze Begruendung"}}
|
||||
|
||||
Domains: AUTH=Authentifizierung, CRYP=Kryptographie, NET=Netzwerk, DATA=Datenschutz, LOG=Logging, ACC=Zugriffskontrolle, SEC=IT-Sicherheit, INC=Vorfallmanagement, AI=KI, COMP=Compliance, GOV=Behoerden, LAB=Arbeitsrecht, FIN=Finanzregulierung, TRD=Gewerbe, ENV=Umwelt, HLT=Gesundheit
|
||||
Kategorien: {CATEGORY_LIST_STR}"""
|
||||
|
||||
raw = await _llm_local(prompt)
|
||||
data = _parse_llm_json(raw)
|
||||
if not data:
|
||||
continue
|
||||
|
||||
qa_domain = data.get("domain", "").upper()
|
||||
qa_category = data.get("category", "")
|
||||
reason = data.get("reason", "")
|
||||
|
||||
fix_entry = {
|
||||
"control_id": control_id,
|
||||
"title": title[:80],
|
||||
"old_category": current_category,
|
||||
"old_domain": current_prefix,
|
||||
"new_category": qa_category if qa_category in VALID_CATEGORIES else current_category,
|
||||
"new_domain": qa_domain if qa_domain in VALID_DOMAINS else current_prefix,
|
||||
"reason": reason,
|
||||
}
|
||||
|
||||
category_changed = qa_category in VALID_CATEGORIES and qa_category != current_category
|
||||
|
||||
if category_changed and not req.dry_run:
|
||||
db.execute(
|
||||
text("""
|
||||
UPDATE canonical_controls
|
||||
SET category = :category, updated_at = NOW()
|
||||
WHERE id = :id
|
||||
"""),
|
||||
{"id": row[0], "category": qa_category},
|
||||
)
|
||||
fix_entry["applied"] = True
|
||||
else:
|
||||
fix_entry["applied"] = False
|
||||
|
||||
results["fixes"].append(fix_entry)
|
||||
|
||||
except Exception as e:
|
||||
results["errors"].append({"control_id": control_id, "error": str(e)})
|
||||
|
||||
if not req.dry_run:
|
||||
db.commit()
|
||||
|
||||
return results
|
||||
finally:
|
||||
db.close()
|
||||
|
||||
|
||||
@router.get("/generate/processed-stats")
|
||||
async def get_processed_stats():
|
||||
"""Get processing statistics per collection."""
|
||||
@@ -429,3 +694,407 @@ async def get_controls_customer_view(
|
||||
return {"controls": controls, "total": len(controls)}
|
||||
finally:
|
||||
db.close()
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# CITATION BACKFILL
|
||||
# =============================================================================
|
||||
|
||||
class BackfillRequest(BaseModel):
|
||||
dry_run: bool = True # Default to dry_run for safety
|
||||
limit: int = 0 # 0 = all controls
|
||||
|
||||
|
||||
class BackfillResponse(BaseModel):
|
||||
status: str
|
||||
total_controls: int = 0
|
||||
matched_hash: int = 0
|
||||
matched_regex: int = 0
|
||||
matched_llm: int = 0
|
||||
unmatched: int = 0
|
||||
updated: int = 0
|
||||
errors: list = []
|
||||
|
||||
|
||||
_backfill_status: dict = {}
|
||||
|
||||
|
||||
async def _run_backfill_background(dry_run: bool, limit: int, backfill_id: str):
|
||||
"""Run backfill in background with own DB session."""
|
||||
db = SessionLocal()
|
||||
try:
|
||||
backfill = CitationBackfill(db=db, rag_client=get_rag_client())
|
||||
result = await backfill.run(dry_run=dry_run, limit=limit)
|
||||
_backfill_status[backfill_id] = {
|
||||
"status": "completed",
|
||||
"total_controls": result.total_controls,
|
||||
"matched_hash": result.matched_hash,
|
||||
"matched_regex": result.matched_regex,
|
||||
"matched_llm": result.matched_llm,
|
||||
"unmatched": result.unmatched,
|
||||
"updated": result.updated,
|
||||
"errors": result.errors[:50],
|
||||
}
|
||||
logger.info("Backfill %s completed: %d updated", backfill_id, result.updated)
|
||||
except Exception as e:
|
||||
logger.error("Backfill %s failed: %s", backfill_id, e)
|
||||
_backfill_status[backfill_id] = {"status": "failed", "errors": [str(e)]}
|
||||
finally:
|
||||
db.close()
|
||||
|
||||
|
||||
@router.post("/generate/backfill-citations", response_model=BackfillResponse)
|
||||
async def start_backfill(req: BackfillRequest):
|
||||
"""Backfill article/paragraph into existing control source_citations.
|
||||
|
||||
Uses 3-tier matching: hash lookup → regex parse → Ollama LLM.
|
||||
Default is dry_run=True (preview only, no DB changes).
|
||||
"""
|
||||
import uuid
|
||||
backfill_id = str(uuid.uuid4())[:8]
|
||||
_backfill_status[backfill_id] = {"status": "running"}
|
||||
|
||||
# Always run in background (RAG index build takes minutes)
|
||||
asyncio.create_task(_run_backfill_background(req.dry_run, req.limit, backfill_id))
|
||||
return BackfillResponse(
|
||||
status=f"running (id={backfill_id})",
|
||||
)
|
||||
|
||||
|
||||
@router.get("/generate/backfill-status/{backfill_id}")
|
||||
async def get_backfill_status(backfill_id: str):
|
||||
"""Get status of a backfill job."""
|
||||
status = _backfill_status.get(backfill_id)
|
||||
if not status:
|
||||
raise HTTPException(status_code=404, detail="Backfill job not found")
|
||||
return status
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# DOMAIN + TARGET AUDIENCE BACKFILL
|
||||
# =============================================================================
|
||||
|
||||
class DomainBackfillRequest(BaseModel):
|
||||
dry_run: bool = True
|
||||
job_id: Optional[str] = None # Only backfill controls from this job
|
||||
limit: int = 0 # 0 = all
|
||||
|
||||
_domain_backfill_status: dict = {}
|
||||
|
||||
|
||||
async def _run_domain_backfill(req: DomainBackfillRequest, backfill_id: str):
|
||||
"""Backfill domain, category, and target_audience for existing controls using Anthropic."""
|
||||
import os
|
||||
import httpx
|
||||
|
||||
ANTHROPIC_API_KEY = os.getenv("ANTHROPIC_API_KEY", "")
|
||||
ANTHROPIC_MODEL = os.getenv("CONTROL_GEN_ANTHROPIC_MODEL", "claude-sonnet-4-6")
|
||||
|
||||
if not ANTHROPIC_API_KEY:
|
||||
_domain_backfill_status[backfill_id] = {
|
||||
"status": "failed", "error": "ANTHROPIC_API_KEY not set"
|
||||
}
|
||||
return
|
||||
|
||||
db = SessionLocal()
|
||||
try:
|
||||
# Find controls needing backfill
|
||||
where_clauses = ["(target_audience IS NULL OR target_audience = '[]' OR target_audience = 'null')"]
|
||||
params: dict = {}
|
||||
if req.job_id:
|
||||
where_clauses.append("generation_metadata->>'job_id' = :job_id")
|
||||
params["job_id"] = req.job_id
|
||||
|
||||
query = f"""
|
||||
SELECT id, control_id, title, objective, category, source_original_text, tags
|
||||
FROM canonical_controls
|
||||
WHERE {' AND '.join(where_clauses)}
|
||||
ORDER BY control_id
|
||||
"""
|
||||
if req.limit > 0:
|
||||
query += f" LIMIT {req.limit}"
|
||||
|
||||
result = db.execute(text(query), params)
|
||||
controls = [dict(zip(result.keys(), row)) for row in result]
|
||||
|
||||
total = len(controls)
|
||||
updated = 0
|
||||
errors = []
|
||||
|
||||
_domain_backfill_status[backfill_id] = {
|
||||
"status": "running", "total": total, "updated": 0, "errors": []
|
||||
}
|
||||
|
||||
# Process in batches of 10
|
||||
BATCH_SIZE = 10
|
||||
for batch_start in range(0, total, BATCH_SIZE):
|
||||
batch = controls[batch_start:batch_start + BATCH_SIZE]
|
||||
|
||||
entries = []
|
||||
for idx, ctrl in enumerate(batch):
|
||||
text_for_analysis = ctrl.get("objective") or ctrl.get("title") or ""
|
||||
original = ctrl.get("source_original_text") or ""
|
||||
if original:
|
||||
text_for_analysis += f"\n\nQuelltext-Auszug: {original[:500]}"
|
||||
entries.append(
|
||||
f"--- CONTROL {idx + 1}: {ctrl['control_id']} ---\n"
|
||||
f"Titel: {ctrl.get('title', '')}\n"
|
||||
f"Objective: {text_for_analysis[:800]}\n"
|
||||
f"Tags: {json.dumps(ctrl.get('tags', []))}"
|
||||
)
|
||||
|
||||
prompt = f"""Analysiere die folgenden {len(batch)} Controls und bestimme fuer jedes:
|
||||
1. domain: Das Fachgebiet (AUTH, CRYP, NET, DATA, LOG, ACC, SEC, INC, AI, COMP, GOV, LAB, FIN, TRD, ENV, HLT)
|
||||
2. category: Die Kategorie (encryption, authentication, network, data_protection, logging, incident, continuity, compliance, supply_chain, physical, personnel, application, system, risk, governance, hardware, identity, public_administration, labor_law, finance, trade_regulation, environmental, health)
|
||||
3. target_audience: Liste der Zielgruppen (moegliche Werte: "unternehmen", "behoerden", "entwickler", "datenschutzbeauftragte", "geschaeftsfuehrung", "it-abteilung", "rechtsabteilung", "compliance-officer", "personalwesen", "einkauf", "produktion", "vertrieb", "gesundheitswesen", "finanzwesen", "oeffentlicher_dienst")
|
||||
|
||||
Antworte mit einem JSON-Array mit {len(batch)} Objekten. Jedes Objekt hat:
|
||||
- control_index: 1-basierter Index
|
||||
- domain: Fachgebiet-Kuerzel
|
||||
- category: Kategorie
|
||||
- target_audience: Liste der Zielgruppen
|
||||
|
||||
{"".join(entries)}"""
|
||||
|
||||
try:
|
||||
headers = {
|
||||
"x-api-key": ANTHROPIC_API_KEY,
|
||||
"anthropic-version": "2023-06-01",
|
||||
"content-type": "application/json",
|
||||
}
|
||||
payload = {
|
||||
"model": ANTHROPIC_MODEL,
|
||||
"max_tokens": 4096,
|
||||
"system": "Du bist ein Compliance-Experte. Klassifiziere Controls nach Fachgebiet und Zielgruppe. Antworte NUR mit validem JSON.",
|
||||
"messages": [{"role": "user", "content": prompt}],
|
||||
}
|
||||
|
||||
async with httpx.AsyncClient(timeout=60.0) as client:
|
||||
resp = await client.post(
|
||||
"https://api.anthropic.com/v1/messages",
|
||||
headers=headers,
|
||||
json=payload,
|
||||
)
|
||||
if resp.status_code != 200:
|
||||
errors.append(f"Anthropic API {resp.status_code} at batch {batch_start}")
|
||||
continue
|
||||
|
||||
raw = resp.json().get("content", [{}])[0].get("text", "")
|
||||
|
||||
# Parse response
|
||||
import re
|
||||
bracket_match = re.search(r"\[.*\]", raw, re.DOTALL)
|
||||
if not bracket_match:
|
||||
errors.append(f"No JSON array in response at batch {batch_start}")
|
||||
continue
|
||||
|
||||
results_list = json.loads(bracket_match.group(0))
|
||||
|
||||
for item in results_list:
|
||||
idx = item.get("control_index", 0) - 1
|
||||
if idx < 0 or idx >= len(batch):
|
||||
continue
|
||||
ctrl = batch[idx]
|
||||
ctrl_id = str(ctrl["id"])
|
||||
|
||||
new_domain = item.get("domain", "")
|
||||
new_category = item.get("category", "")
|
||||
new_audience = item.get("target_audience", [])
|
||||
|
||||
if not isinstance(new_audience, list):
|
||||
new_audience = []
|
||||
|
||||
# Build new control_id from domain if domain changed
|
||||
old_prefix = ctrl["control_id"].split("-")[0] if ctrl["control_id"] else ""
|
||||
new_prefix = new_domain.upper()[:4] if new_domain else old_prefix
|
||||
|
||||
if not req.dry_run:
|
||||
update_parts = []
|
||||
update_params: dict = {"ctrl_id": ctrl_id}
|
||||
|
||||
if new_category:
|
||||
update_parts.append("category = :category")
|
||||
update_params["category"] = new_category
|
||||
|
||||
if new_audience:
|
||||
update_parts.append("target_audience = :target_audience")
|
||||
update_params["target_audience"] = json.dumps(new_audience)
|
||||
|
||||
# Note: We do NOT rename control_ids here — that would
|
||||
# break references and cause unique constraint violations.
|
||||
|
||||
if update_parts:
|
||||
update_parts.append("updated_at = NOW()")
|
||||
db.execute(
|
||||
text(f"UPDATE canonical_controls SET {', '.join(update_parts)} WHERE id = CAST(:ctrl_id AS uuid)"),
|
||||
update_params,
|
||||
)
|
||||
updated += 1
|
||||
|
||||
if not req.dry_run:
|
||||
db.commit()
|
||||
|
||||
except Exception as e:
|
||||
errors.append(f"Batch {batch_start}: {str(e)}")
|
||||
db.rollback()
|
||||
|
||||
_domain_backfill_status[backfill_id] = {
|
||||
"status": "running", "total": total, "updated": updated,
|
||||
"progress": f"{min(batch_start + BATCH_SIZE, total)}/{total}",
|
||||
"errors": errors[-10:],
|
||||
}
|
||||
|
||||
_domain_backfill_status[backfill_id] = {
|
||||
"status": "completed", "total": total, "updated": updated,
|
||||
"errors": errors[-50:],
|
||||
}
|
||||
logger.info("Domain backfill %s completed: %d/%d updated", backfill_id, updated, total)
|
||||
|
||||
except Exception as e:
|
||||
logger.error("Domain backfill %s failed: %s", backfill_id, e)
|
||||
_domain_backfill_status[backfill_id] = {"status": "failed", "error": str(e)}
|
||||
finally:
|
||||
db.close()
|
||||
|
||||
|
||||
@router.post("/generate/backfill-domain")
|
||||
async def start_domain_backfill(req: DomainBackfillRequest):
|
||||
"""Backfill domain, category, and target_audience for controls using Anthropic API.
|
||||
|
||||
Finds controls where target_audience is NULL and enriches them.
|
||||
Default is dry_run=True (preview only).
|
||||
"""
|
||||
import uuid
|
||||
backfill_id = str(uuid.uuid4())[:8]
|
||||
_domain_backfill_status[backfill_id] = {"status": "starting"}
|
||||
asyncio.create_task(_run_domain_backfill(req, backfill_id))
|
||||
return {"status": "running", "backfill_id": backfill_id,
|
||||
"message": f"Domain backfill started. Poll /generate/backfill-status/{backfill_id}"}
|
||||
|
||||
|
||||
@router.get("/generate/domain-backfill-status/{backfill_id}")
|
||||
async def get_domain_backfill_status(backfill_id: str):
|
||||
"""Get status of a domain backfill job."""
|
||||
status = _domain_backfill_status.get(backfill_id)
|
||||
if not status:
|
||||
raise HTTPException(status_code=404, detail="Domain backfill job not found")
|
||||
return status
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Source-Type Backfill — Classify law vs guideline vs standard vs restricted
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
class SourceTypeBackfillRequest(BaseModel):
|
||||
dry_run: bool = True
|
||||
|
||||
|
||||
_source_type_backfill_status: dict = {}
|
||||
|
||||
|
||||
async def _run_source_type_backfill(dry_run: bool, backfill_id: str):
|
||||
"""Backfill source_type into source_citation JSONB for all controls."""
|
||||
db = SessionLocal()
|
||||
try:
|
||||
# Find controls with source_citation that lack source_type
|
||||
rows = db.execute(text("""
|
||||
SELECT control_id, source_citation, generation_metadata
|
||||
FROM compliance.canonical_controls
|
||||
WHERE source_citation IS NOT NULL
|
||||
AND (source_citation->>'source_type' IS NULL
|
||||
OR source_citation->>'source_type' = '')
|
||||
""")).fetchall()
|
||||
|
||||
total = len(rows)
|
||||
updated = 0
|
||||
already_correct = 0
|
||||
errors = []
|
||||
|
||||
_source_type_backfill_status[backfill_id] = {
|
||||
"status": "running", "total": total, "updated": 0, "dry_run": dry_run,
|
||||
}
|
||||
|
||||
for row in rows:
|
||||
cid = row[0]
|
||||
citation = row[1] if isinstance(row[1], dict) else json.loads(row[1] or "{}")
|
||||
metadata = row[2] if isinstance(row[2], dict) else json.loads(row[2] or "{}")
|
||||
|
||||
# Get regulation_code from metadata
|
||||
reg_code = metadata.get("source_regulation", "")
|
||||
if not reg_code:
|
||||
# Try to infer from source name
|
||||
errors.append(f"{cid}: no source_regulation in metadata")
|
||||
continue
|
||||
|
||||
# Classify
|
||||
license_info = _classify_regulation(reg_code)
|
||||
source_type = license_info.get("source_type", "restricted")
|
||||
|
||||
# Update citation
|
||||
citation["source_type"] = source_type
|
||||
|
||||
if not dry_run:
|
||||
db.execute(text("""
|
||||
UPDATE compliance.canonical_controls
|
||||
SET source_citation = :citation
|
||||
WHERE control_id = :cid
|
||||
"""), {"citation": json.dumps(citation), "cid": cid})
|
||||
if updated % 100 == 0:
|
||||
db.commit()
|
||||
updated += 1
|
||||
|
||||
if not dry_run:
|
||||
db.commit()
|
||||
|
||||
# Count distribution
|
||||
dist_query = db.execute(text("""
|
||||
SELECT source_citation->>'source_type' as st, COUNT(*)
|
||||
FROM compliance.canonical_controls
|
||||
WHERE source_citation IS NOT NULL
|
||||
AND source_citation->>'source_type' IS NOT NULL
|
||||
GROUP BY st
|
||||
""")).fetchall() if not dry_run else []
|
||||
|
||||
distribution = {r[0]: r[1] for r in dist_query}
|
||||
|
||||
_source_type_backfill_status[backfill_id] = {
|
||||
"status": "completed", "total": total, "updated": updated,
|
||||
"dry_run": dry_run, "distribution": distribution,
|
||||
"errors": errors[:50],
|
||||
}
|
||||
logger.info("Source-type backfill %s completed: %d/%d updated (dry_run=%s)",
|
||||
backfill_id, updated, total, dry_run)
|
||||
|
||||
except Exception as e:
|
||||
logger.error("Source-type backfill %s failed: %s", backfill_id, e)
|
||||
_source_type_backfill_status[backfill_id] = {"status": "failed", "error": str(e)}
|
||||
finally:
|
||||
db.close()
|
||||
|
||||
|
||||
@router.post("/generate/backfill-source-type")
|
||||
async def start_source_type_backfill(req: SourceTypeBackfillRequest):
|
||||
"""Backfill source_type (law/guideline/standard/restricted) into source_citation JSONB.
|
||||
|
||||
Classifies each control's source as binding law, authority guideline,
|
||||
voluntary standard, or restricted norm based on regulation_code.
|
||||
Default is dry_run=True (preview only).
|
||||
"""
|
||||
import uuid
|
||||
backfill_id = str(uuid.uuid4())[:8]
|
||||
_source_type_backfill_status[backfill_id] = {"status": "starting"}
|
||||
asyncio.create_task(_run_source_type_backfill(req.dry_run, backfill_id))
|
||||
return {
|
||||
"status": "running",
|
||||
"backfill_id": backfill_id,
|
||||
"message": f"Source-type backfill started. Poll /generate/source-type-backfill-status/{backfill_id}",
|
||||
}
|
||||
|
||||
|
||||
@router.get("/generate/source-type-backfill-status/{backfill_id}")
|
||||
async def get_source_type_backfill_status(backfill_id: str):
|
||||
"""Get status of a source-type backfill job."""
|
||||
status = _source_type_backfill_status.get(backfill_id)
|
||||
if not status:
|
||||
raise HTTPException(status_code=404, detail="Source-type backfill job not found")
|
||||
return status
|
||||
|
||||
@@ -0,0 +1,856 @@
|
||||
"""
|
||||
FastAPI routes for the Multi-Layer Control Architecture.
|
||||
|
||||
Pattern Library, Obligation Extraction, Crosswalk Matrix, and Migration endpoints.
|
||||
|
||||
Endpoints:
|
||||
GET /v1/canonical/patterns — All patterns (with filters)
|
||||
GET /v1/canonical/patterns/{pattern_id} — Single pattern
|
||||
GET /v1/canonical/patterns/{pattern_id}/controls — Controls for a pattern
|
||||
|
||||
POST /v1/canonical/obligations/extract — Extract obligations from text
|
||||
GET /v1/canonical/crosswalk — Query crosswalk matrix
|
||||
GET /v1/canonical/crosswalk/stats — Coverage statistics
|
||||
|
||||
POST /v1/canonical/migrate/decompose — Pass 0a: Obligation extraction
|
||||
POST /v1/canonical/migrate/merge-obligations — Merge implementation-level dupes
|
||||
POST /v1/canonical/migrate/enrich-obligations — Add trigger_type, impl metadata
|
||||
POST /v1/canonical/migrate/compose-atomic — Pass 0b: Atomic control composition
|
||||
POST /v1/canonical/migrate/link-obligations — Pass 1: Obligation linkage
|
||||
POST /v1/canonical/migrate/classify-patterns — Pass 2: Pattern classification
|
||||
POST /v1/canonical/migrate/triage — Pass 3: Quality triage
|
||||
POST /v1/canonical/migrate/backfill-crosswalk — Pass 4: Crosswalk backfill
|
||||
POST /v1/canonical/migrate/deduplicate — Pass 5: Deduplication
|
||||
GET /v1/canonical/migrate/status — Migration progress
|
||||
GET /v1/canonical/migrate/decomposition-status — Decomposition progress
|
||||
"""
|
||||
|
||||
import json
|
||||
import logging
|
||||
from typing import Optional, List
|
||||
|
||||
from fastapi import APIRouter, HTTPException, Query
|
||||
from pydantic import BaseModel
|
||||
from sqlalchemy import text
|
||||
|
||||
from database import SessionLocal
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
router = APIRouter(prefix="/v1/canonical", tags=["crosswalk"])
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# REQUEST / RESPONSE MODELS
|
||||
# =============================================================================
|
||||
|
||||
|
||||
class PatternResponse(BaseModel):
|
||||
id: str
|
||||
name: str
|
||||
name_de: str
|
||||
domain: str
|
||||
category: str
|
||||
description: str
|
||||
objective_template: str
|
||||
severity_default: str
|
||||
implementation_effort_default: str = "m"
|
||||
tags: list = []
|
||||
composable_with: list = []
|
||||
open_anchor_refs: list = []
|
||||
controls_count: int = 0
|
||||
|
||||
|
||||
class PatternListResponse(BaseModel):
|
||||
patterns: List[PatternResponse]
|
||||
total: int
|
||||
|
||||
|
||||
class PatternDetailResponse(PatternResponse):
|
||||
rationale_template: str = ""
|
||||
requirements_template: list = []
|
||||
test_procedure_template: list = []
|
||||
evidence_template: list = []
|
||||
obligation_match_keywords: list = []
|
||||
|
||||
|
||||
class ObligationExtractRequest(BaseModel):
|
||||
text: str
|
||||
regulation_code: Optional[str] = None
|
||||
article: Optional[str] = None
|
||||
paragraph: Optional[str] = None
|
||||
|
||||
|
||||
class ObligationExtractResponse(BaseModel):
|
||||
obligation_id: Optional[str] = None
|
||||
obligation_title: Optional[str] = None
|
||||
obligation_text: Optional[str] = None
|
||||
method: str = "none"
|
||||
confidence: float = 0.0
|
||||
regulation_id: Optional[str] = None
|
||||
pattern_id: Optional[str] = None
|
||||
pattern_confidence: float = 0.0
|
||||
|
||||
|
||||
class CrosswalkRow(BaseModel):
|
||||
regulation_code: str = ""
|
||||
article: Optional[str] = None
|
||||
obligation_id: Optional[str] = None
|
||||
pattern_id: Optional[str] = None
|
||||
master_control_id: Optional[str] = None
|
||||
confidence: float = 0.0
|
||||
source: str = "auto"
|
||||
|
||||
|
||||
class CrosswalkQueryResponse(BaseModel):
|
||||
rows: List[CrosswalkRow]
|
||||
total: int
|
||||
|
||||
|
||||
class CrosswalkStatsResponse(BaseModel):
|
||||
total_rows: int = 0
|
||||
regulations_covered: int = 0
|
||||
obligations_linked: int = 0
|
||||
patterns_used: int = 0
|
||||
controls_linked: int = 0
|
||||
coverage_by_regulation: dict = {}
|
||||
|
||||
|
||||
class MigrationRequest(BaseModel):
|
||||
limit: int = 0 # 0 = no limit
|
||||
batch_size: int = 0 # 0 = auto (5 for Anthropic, 1 for Ollama)
|
||||
use_anthropic: bool = False # Use Anthropic API instead of Ollama
|
||||
category_filter: Optional[str] = None # Comma-separated categories
|
||||
source_filter: Optional[str] = None # Comma-separated source regulations (ILIKE match)
|
||||
|
||||
|
||||
class BatchSubmitRequest(BaseModel):
|
||||
limit: int = 0
|
||||
batch_size: int = 5
|
||||
category_filter: Optional[str] = None
|
||||
source_filter: Optional[str] = None
|
||||
|
||||
|
||||
class BatchProcessRequest(BaseModel):
|
||||
batch_id: str
|
||||
pass_type: str = "0a" # "0a" or "0b"
|
||||
|
||||
|
||||
class MigrationResponse(BaseModel):
|
||||
status: str = "completed"
|
||||
stats: dict = {}
|
||||
|
||||
|
||||
class MigrationStatusResponse(BaseModel):
|
||||
total_controls: int = 0
|
||||
has_obligation: int = 0
|
||||
has_pattern: int = 0
|
||||
fully_linked: int = 0
|
||||
deprecated: int = 0
|
||||
coverage_obligation_pct: float = 0.0
|
||||
coverage_pattern_pct: float = 0.0
|
||||
coverage_full_pct: float = 0.0
|
||||
|
||||
|
||||
class DecompositionStatusResponse(BaseModel):
|
||||
rich_controls: int = 0
|
||||
decomposed_controls: int = 0
|
||||
total_candidates: int = 0
|
||||
validated: int = 0
|
||||
rejected: int = 0
|
||||
composed: int = 0
|
||||
atomic_controls: int = 0
|
||||
merged: int = 0
|
||||
enriched: int = 0
|
||||
ready_for_pass0b: int = 0
|
||||
decomposition_pct: float = 0.0
|
||||
composition_pct: float = 0.0
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# PATTERN LIBRARY ENDPOINTS
|
||||
# =============================================================================
|
||||
|
||||
|
||||
@router.get("/patterns", response_model=PatternListResponse)
|
||||
async def list_patterns(
|
||||
domain: Optional[str] = Query(None, description="Filter by domain (e.g. AUTH, CRYP)"),
|
||||
category: Optional[str] = Query(None, description="Filter by category"),
|
||||
tag: Optional[str] = Query(None, description="Filter by tag"),
|
||||
):
|
||||
"""List all control patterns with optional filters."""
|
||||
from compliance.services.pattern_matcher import PatternMatcher
|
||||
|
||||
matcher = PatternMatcher()
|
||||
matcher._load_patterns()
|
||||
matcher._build_keyword_index()
|
||||
|
||||
patterns = matcher._patterns
|
||||
|
||||
if domain:
|
||||
patterns = [p for p in patterns if p.domain == domain.upper()]
|
||||
if category:
|
||||
patterns = [p for p in patterns if p.category == category.lower()]
|
||||
if tag:
|
||||
patterns = [p for p in patterns if tag.lower() in [t.lower() for t in p.tags]]
|
||||
|
||||
# Count controls per pattern from DB
|
||||
control_counts = _get_pattern_control_counts()
|
||||
|
||||
response_patterns = []
|
||||
for p in patterns:
|
||||
response_patterns.append(PatternResponse(
|
||||
id=p.id,
|
||||
name=p.name,
|
||||
name_de=p.name_de,
|
||||
domain=p.domain,
|
||||
category=p.category,
|
||||
description=p.description,
|
||||
objective_template=p.objective_template,
|
||||
severity_default=p.severity_default,
|
||||
implementation_effort_default=p.implementation_effort_default,
|
||||
tags=p.tags,
|
||||
composable_with=p.composable_with,
|
||||
open_anchor_refs=p.open_anchor_refs,
|
||||
controls_count=control_counts.get(p.id, 0),
|
||||
))
|
||||
|
||||
return PatternListResponse(patterns=response_patterns, total=len(response_patterns))
|
||||
|
||||
|
||||
@router.get("/patterns/{pattern_id}", response_model=PatternDetailResponse)
|
||||
async def get_pattern(pattern_id: str):
|
||||
"""Get a single control pattern by ID."""
|
||||
from compliance.services.pattern_matcher import PatternMatcher
|
||||
|
||||
matcher = PatternMatcher()
|
||||
matcher._load_patterns()
|
||||
|
||||
pattern = matcher.get_pattern(pattern_id)
|
||||
if not pattern:
|
||||
raise HTTPException(status_code=404, detail=f"Pattern {pattern_id} not found")
|
||||
|
||||
control_counts = _get_pattern_control_counts()
|
||||
|
||||
return PatternDetailResponse(
|
||||
id=pattern.id,
|
||||
name=pattern.name,
|
||||
name_de=pattern.name_de,
|
||||
domain=pattern.domain,
|
||||
category=pattern.category,
|
||||
description=pattern.description,
|
||||
objective_template=pattern.objective_template,
|
||||
rationale_template=pattern.rationale_template,
|
||||
requirements_template=pattern.requirements_template,
|
||||
test_procedure_template=pattern.test_procedure_template,
|
||||
evidence_template=pattern.evidence_template,
|
||||
severity_default=pattern.severity_default,
|
||||
implementation_effort_default=pattern.implementation_effort_default,
|
||||
tags=pattern.tags,
|
||||
composable_with=pattern.composable_with,
|
||||
open_anchor_refs=pattern.open_anchor_refs,
|
||||
obligation_match_keywords=pattern.obligation_match_keywords,
|
||||
controls_count=control_counts.get(pattern.id, 0),
|
||||
)
|
||||
|
||||
|
||||
@router.get("/patterns/{pattern_id}/controls")
|
||||
async def get_pattern_controls(
|
||||
pattern_id: str,
|
||||
limit: int = Query(50, ge=1, le=500),
|
||||
offset: int = Query(0, ge=0),
|
||||
):
|
||||
"""Get controls generated from a specific pattern."""
|
||||
db = SessionLocal()
|
||||
try:
|
||||
result = db.execute(
|
||||
text("""
|
||||
SELECT id, control_id, title, objective, severity,
|
||||
release_state, category, obligation_ids
|
||||
FROM canonical_controls
|
||||
WHERE pattern_id = :pattern_id
|
||||
AND release_state NOT IN ('deprecated')
|
||||
ORDER BY control_id
|
||||
LIMIT :limit OFFSET :offset
|
||||
"""),
|
||||
{"pattern_id": pattern_id.upper(), "limit": limit, "offset": offset},
|
||||
)
|
||||
rows = result.fetchall()
|
||||
|
||||
count_result = db.execute(
|
||||
text("""
|
||||
SELECT count(*) FROM canonical_controls
|
||||
WHERE pattern_id = :pattern_id
|
||||
AND release_state NOT IN ('deprecated')
|
||||
"""),
|
||||
{"pattern_id": pattern_id.upper()},
|
||||
)
|
||||
total = count_result.fetchone()[0]
|
||||
|
||||
controls = []
|
||||
for row in rows:
|
||||
obl_ids = row[7]
|
||||
if isinstance(obl_ids, str):
|
||||
try:
|
||||
obl_ids = json.loads(obl_ids)
|
||||
except (json.JSONDecodeError, TypeError):
|
||||
obl_ids = []
|
||||
controls.append({
|
||||
"id": str(row[0]),
|
||||
"control_id": row[1],
|
||||
"title": row[2],
|
||||
"objective": row[3],
|
||||
"severity": row[4],
|
||||
"release_state": row[5],
|
||||
"category": row[6],
|
||||
"obligation_ids": obl_ids or [],
|
||||
})
|
||||
|
||||
return {"controls": controls, "total": total}
|
||||
finally:
|
||||
db.close()
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# OBLIGATION EXTRACTION ENDPOINT
|
||||
# =============================================================================
|
||||
|
||||
|
||||
@router.post("/obligations/extract", response_model=ObligationExtractResponse)
|
||||
async def extract_obligation(req: ObligationExtractRequest):
|
||||
"""Extract obligation from text using 3-tier strategy, then match to pattern."""
|
||||
from compliance.services.obligation_extractor import ObligationExtractor
|
||||
from compliance.services.pattern_matcher import PatternMatcher
|
||||
|
||||
extractor = ObligationExtractor()
|
||||
await extractor.initialize()
|
||||
|
||||
obligation = await extractor.extract(
|
||||
chunk_text=req.text,
|
||||
regulation_code=req.regulation_code or "",
|
||||
article=req.article,
|
||||
paragraph=req.paragraph,
|
||||
)
|
||||
|
||||
# Also match to pattern
|
||||
matcher = PatternMatcher()
|
||||
matcher._load_patterns()
|
||||
matcher._build_keyword_index()
|
||||
|
||||
pattern_text = obligation.obligation_text or obligation.obligation_title or req.text[:500]
|
||||
pattern_result = matcher._tier1_keyword(pattern_text, obligation.regulation_id)
|
||||
|
||||
return ObligationExtractResponse(
|
||||
obligation_id=obligation.obligation_id,
|
||||
obligation_title=obligation.obligation_title,
|
||||
obligation_text=obligation.obligation_text,
|
||||
method=obligation.method,
|
||||
confidence=obligation.confidence,
|
||||
regulation_id=obligation.regulation_id,
|
||||
pattern_id=pattern_result.pattern_id if pattern_result else None,
|
||||
pattern_confidence=pattern_result.confidence if pattern_result else 0,
|
||||
)
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# CROSSWALK MATRIX ENDPOINTS
|
||||
# =============================================================================
|
||||
|
||||
|
||||
@router.get("/crosswalk", response_model=CrosswalkQueryResponse)
|
||||
async def query_crosswalk(
|
||||
regulation_code: Optional[str] = Query(None),
|
||||
article: Optional[str] = Query(None),
|
||||
obligation_id: Optional[str] = Query(None),
|
||||
pattern_id: Optional[str] = Query(None),
|
||||
limit: int = Query(100, ge=1, le=1000),
|
||||
offset: int = Query(0, ge=0),
|
||||
):
|
||||
"""Query the crosswalk matrix with filters."""
|
||||
db = SessionLocal()
|
||||
try:
|
||||
conditions = ["1=1"]
|
||||
params = {"limit": limit, "offset": offset}
|
||||
|
||||
if regulation_code:
|
||||
conditions.append("regulation_code = :reg")
|
||||
params["reg"] = regulation_code
|
||||
if article:
|
||||
conditions.append("article = :art")
|
||||
params["art"] = article
|
||||
if obligation_id:
|
||||
conditions.append("obligation_id = :obl")
|
||||
params["obl"] = obligation_id
|
||||
if pattern_id:
|
||||
conditions.append("pattern_id = :pat")
|
||||
params["pat"] = pattern_id
|
||||
|
||||
where = " AND ".join(conditions)
|
||||
|
||||
result = db.execute(
|
||||
text(f"""
|
||||
SELECT regulation_code, article, obligation_id,
|
||||
pattern_id, master_control_id, confidence, source
|
||||
FROM crosswalk_matrix
|
||||
WHERE {where}
|
||||
ORDER BY regulation_code, article
|
||||
LIMIT :limit OFFSET :offset
|
||||
"""),
|
||||
params,
|
||||
)
|
||||
rows = result.fetchall()
|
||||
|
||||
count_result = db.execute(
|
||||
text(f"SELECT count(*) FROM crosswalk_matrix WHERE {where}"),
|
||||
params,
|
||||
)
|
||||
total = count_result.fetchone()[0]
|
||||
|
||||
crosswalk_rows = [
|
||||
CrosswalkRow(
|
||||
regulation_code=r[0] or "",
|
||||
article=r[1],
|
||||
obligation_id=r[2],
|
||||
pattern_id=r[3],
|
||||
master_control_id=r[4],
|
||||
confidence=float(r[5] or 0),
|
||||
source=r[6] or "auto",
|
||||
)
|
||||
for r in rows
|
||||
]
|
||||
|
||||
return CrosswalkQueryResponse(rows=crosswalk_rows, total=total)
|
||||
finally:
|
||||
db.close()
|
||||
|
||||
|
||||
@router.get("/crosswalk/stats", response_model=CrosswalkStatsResponse)
|
||||
async def crosswalk_stats():
|
||||
"""Get crosswalk coverage statistics."""
|
||||
db = SessionLocal()
|
||||
try:
|
||||
row = db.execute(text("""
|
||||
SELECT
|
||||
count(*) AS total,
|
||||
count(DISTINCT regulation_code) FILTER (WHERE regulation_code != '') AS regs,
|
||||
count(DISTINCT obligation_id) FILTER (WHERE obligation_id IS NOT NULL) AS obls,
|
||||
count(DISTINCT pattern_id) FILTER (WHERE pattern_id IS NOT NULL) AS pats,
|
||||
count(DISTINCT master_control_id) FILTER (WHERE master_control_id IS NOT NULL) AS ctrls
|
||||
FROM crosswalk_matrix
|
||||
""")).fetchone()
|
||||
|
||||
# Coverage by regulation
|
||||
reg_rows = db.execute(text("""
|
||||
SELECT regulation_code, count(*) AS cnt
|
||||
FROM crosswalk_matrix
|
||||
WHERE regulation_code != ''
|
||||
GROUP BY regulation_code
|
||||
ORDER BY cnt DESC
|
||||
""")).fetchall()
|
||||
|
||||
coverage = {r[0]: r[1] for r in reg_rows}
|
||||
|
||||
return CrosswalkStatsResponse(
|
||||
total_rows=row[0],
|
||||
regulations_covered=row[1],
|
||||
obligations_linked=row[2],
|
||||
patterns_used=row[3],
|
||||
controls_linked=row[4],
|
||||
coverage_by_regulation=coverage,
|
||||
)
|
||||
finally:
|
||||
db.close()
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# MIGRATION ENDPOINTS
|
||||
# =============================================================================
|
||||
|
||||
|
||||
@router.post("/migrate/decompose", response_model=MigrationResponse)
|
||||
async def migrate_decompose(req: MigrationRequest):
|
||||
"""Pass 0a: Extract obligation candidates from rich controls.
|
||||
|
||||
With use_anthropic=true, uses Anthropic API with prompt caching
|
||||
and content batching (multiple controls per API call).
|
||||
"""
|
||||
from compliance.services.decomposition_pass import DecompositionPass
|
||||
|
||||
db = SessionLocal()
|
||||
try:
|
||||
decomp = DecompositionPass(db=db)
|
||||
stats = await decomp.run_pass0a(
|
||||
limit=req.limit,
|
||||
batch_size=req.batch_size,
|
||||
use_anthropic=req.use_anthropic,
|
||||
category_filter=req.category_filter,
|
||||
source_filter=req.source_filter,
|
||||
)
|
||||
return MigrationResponse(status="completed", stats=stats)
|
||||
except Exception as e:
|
||||
logger.error("Decomposition pass 0a failed: %s", e)
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
finally:
|
||||
db.close()
|
||||
|
||||
|
||||
@router.post("/migrate/merge-obligations", response_model=MigrationResponse)
|
||||
async def migrate_merge_obligations():
|
||||
"""Merge implementation-level duplicate obligations within each parent.
|
||||
|
||||
Run AFTER Pass 0a, BEFORE Pass 0b. No LLM calls — rule-based.
|
||||
Merges obligations that share similar action+object into the more
|
||||
abstract survivor, marking the concrete duplicate as 'merged'.
|
||||
"""
|
||||
from compliance.services.decomposition_pass import DecompositionPass
|
||||
|
||||
db = SessionLocal()
|
||||
try:
|
||||
decomp = DecompositionPass(db=db)
|
||||
stats = decomp.run_merge_pass()
|
||||
return MigrationResponse(status="completed", stats=stats)
|
||||
except Exception as e:
|
||||
logger.error("Merge pass failed: %s", e)
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
finally:
|
||||
db.close()
|
||||
|
||||
|
||||
@router.post("/migrate/enrich-obligations", response_model=MigrationResponse)
|
||||
async def migrate_enrich_obligations():
|
||||
"""Add trigger_type and is_implementation_specific metadata.
|
||||
|
||||
Run AFTER merge pass, BEFORE Pass 0b. No LLM calls — rule-based.
|
||||
Classifies trigger_type (event/periodic/continuous) from obligation text
|
||||
and detects implementation-specific obligations (concrete tools/protocols).
|
||||
"""
|
||||
from compliance.services.decomposition_pass import DecompositionPass
|
||||
|
||||
db = SessionLocal()
|
||||
try:
|
||||
decomp = DecompositionPass(db=db)
|
||||
stats = decomp.enrich_obligations()
|
||||
return MigrationResponse(status="completed", stats=stats)
|
||||
except Exception as e:
|
||||
logger.error("Enrich pass failed: %s", e)
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
finally:
|
||||
db.close()
|
||||
|
||||
|
||||
@router.post("/migrate/compose-atomic", response_model=MigrationResponse)
|
||||
async def migrate_compose_atomic(req: MigrationRequest):
|
||||
"""Pass 0b: Compose atomic controls from obligation candidates.
|
||||
|
||||
With use_anthropic=true, uses Anthropic API with prompt caching
|
||||
and content batching (multiple obligations per API call).
|
||||
"""
|
||||
from compliance.services.decomposition_pass import DecompositionPass
|
||||
|
||||
db = SessionLocal()
|
||||
try:
|
||||
decomp = DecompositionPass(db=db)
|
||||
stats = await decomp.run_pass0b(
|
||||
limit=req.limit,
|
||||
batch_size=req.batch_size,
|
||||
use_anthropic=req.use_anthropic,
|
||||
)
|
||||
return MigrationResponse(status="completed", stats=stats)
|
||||
except Exception as e:
|
||||
logger.error("Decomposition pass 0b failed: %s", e)
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
finally:
|
||||
db.close()
|
||||
|
||||
|
||||
@router.post("/migrate/batch-submit-0a", response_model=MigrationResponse)
|
||||
async def batch_submit_pass0a(req: BatchSubmitRequest):
|
||||
"""Submit Pass 0a as Anthropic Batch API job (50% cost reduction).
|
||||
|
||||
Returns a batch_id for polling. Results are processed asynchronously
|
||||
within 24 hours by Anthropic.
|
||||
"""
|
||||
from compliance.services.decomposition_pass import DecompositionPass
|
||||
|
||||
db = SessionLocal()
|
||||
try:
|
||||
decomp = DecompositionPass(db=db)
|
||||
result = await decomp.submit_batch_pass0a(
|
||||
limit=req.limit,
|
||||
batch_size=req.batch_size,
|
||||
category_filter=req.category_filter,
|
||||
source_filter=req.source_filter,
|
||||
)
|
||||
return MigrationResponse(status=result.pop("status", "submitted"), stats=result)
|
||||
except Exception as e:
|
||||
logger.error("Batch submit 0a failed: %s", e)
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
finally:
|
||||
db.close()
|
||||
|
||||
|
||||
@router.post("/migrate/batch-submit-0b", response_model=MigrationResponse)
|
||||
async def batch_submit_pass0b(req: BatchSubmitRequest):
|
||||
"""Submit Pass 0b as Anthropic Batch API job (50% cost reduction)."""
|
||||
from compliance.services.decomposition_pass import DecompositionPass
|
||||
|
||||
db = SessionLocal()
|
||||
try:
|
||||
decomp = DecompositionPass(db=db)
|
||||
result = await decomp.submit_batch_pass0b(
|
||||
limit=req.limit,
|
||||
batch_size=req.batch_size,
|
||||
)
|
||||
return MigrationResponse(status=result.pop("status", "submitted"), stats=result)
|
||||
except Exception as e:
|
||||
logger.error("Batch submit 0b failed: %s", e)
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
finally:
|
||||
db.close()
|
||||
|
||||
|
||||
@router.get("/migrate/batch-status/{batch_id}")
|
||||
async def batch_check_status(batch_id: str):
|
||||
"""Check processing status of an Anthropic batch job."""
|
||||
from compliance.services.decomposition_pass import check_batch_status
|
||||
|
||||
try:
|
||||
status = await check_batch_status(batch_id)
|
||||
return status
|
||||
except Exception as e:
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
|
||||
|
||||
@router.post("/migrate/batch-process", response_model=MigrationResponse)
|
||||
async def batch_process_results(req: BatchProcessRequest):
|
||||
"""Fetch and process results from a completed Anthropic batch.
|
||||
|
||||
Call this after batch-status shows processing_status='ended'.
|
||||
"""
|
||||
from compliance.services.decomposition_pass import DecompositionPass
|
||||
|
||||
db = SessionLocal()
|
||||
try:
|
||||
decomp = DecompositionPass(db=db)
|
||||
stats = await decomp.process_batch_results(
|
||||
batch_id=req.batch_id,
|
||||
pass_type=req.pass_type,
|
||||
)
|
||||
return MigrationResponse(status=stats.pop("status", "completed"), stats=stats)
|
||||
except Exception as e:
|
||||
logger.error("Batch process failed: %s", e)
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
finally:
|
||||
db.close()
|
||||
|
||||
|
||||
@router.post("/migrate/link-obligations", response_model=MigrationResponse)
|
||||
async def migrate_link_obligations(req: MigrationRequest):
|
||||
"""Pass 1: Link controls to obligations via source_citation article."""
|
||||
from compliance.services.pipeline_adapter import MigrationPasses
|
||||
|
||||
db = SessionLocal()
|
||||
try:
|
||||
migration = MigrationPasses(db=db)
|
||||
await migration.initialize()
|
||||
stats = await migration.run_pass1_obligation_linkage(limit=req.limit)
|
||||
return MigrationResponse(status="completed", stats=stats)
|
||||
except Exception as e:
|
||||
logger.error("Migration pass 1 failed: %s", e)
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
finally:
|
||||
db.close()
|
||||
|
||||
|
||||
@router.post("/migrate/classify-patterns", response_model=MigrationResponse)
|
||||
async def migrate_classify_patterns(req: MigrationRequest):
|
||||
"""Pass 2: Classify controls into patterns via keyword matching."""
|
||||
from compliance.services.pipeline_adapter import MigrationPasses
|
||||
|
||||
db = SessionLocal()
|
||||
try:
|
||||
migration = MigrationPasses(db=db)
|
||||
await migration.initialize()
|
||||
stats = await migration.run_pass2_pattern_classification(limit=req.limit)
|
||||
return MigrationResponse(status="completed", stats=stats)
|
||||
except Exception as e:
|
||||
logger.error("Migration pass 2 failed: %s", e)
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
finally:
|
||||
db.close()
|
||||
|
||||
|
||||
@router.post("/migrate/triage", response_model=MigrationResponse)
|
||||
async def migrate_triage():
|
||||
"""Pass 3: Quality triage — categorize by linkage completeness."""
|
||||
from compliance.services.pipeline_adapter import MigrationPasses
|
||||
|
||||
db = SessionLocal()
|
||||
try:
|
||||
migration = MigrationPasses(db=db)
|
||||
stats = migration.run_pass3_quality_triage()
|
||||
return MigrationResponse(status="completed", stats=stats)
|
||||
except Exception as e:
|
||||
logger.error("Migration pass 3 failed: %s", e)
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
finally:
|
||||
db.close()
|
||||
|
||||
|
||||
@router.post("/migrate/backfill-crosswalk", response_model=MigrationResponse)
|
||||
async def migrate_backfill_crosswalk():
|
||||
"""Pass 4: Create crosswalk rows for linked controls."""
|
||||
from compliance.services.pipeline_adapter import MigrationPasses
|
||||
|
||||
db = SessionLocal()
|
||||
try:
|
||||
migration = MigrationPasses(db=db)
|
||||
stats = migration.run_pass4_crosswalk_backfill()
|
||||
return MigrationResponse(status="completed", stats=stats)
|
||||
except Exception as e:
|
||||
logger.error("Migration pass 4 failed: %s", e)
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
finally:
|
||||
db.close()
|
||||
|
||||
|
||||
@router.post("/migrate/deduplicate", response_model=MigrationResponse)
|
||||
async def migrate_deduplicate():
|
||||
"""Pass 5: Mark duplicate controls (same obligation + pattern)."""
|
||||
from compliance.services.pipeline_adapter import MigrationPasses
|
||||
|
||||
db = SessionLocal()
|
||||
try:
|
||||
migration = MigrationPasses(db=db)
|
||||
stats = migration.run_pass5_deduplication()
|
||||
return MigrationResponse(status="completed", stats=stats)
|
||||
except Exception as e:
|
||||
logger.error("Migration pass 5 failed: %s", e)
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
finally:
|
||||
db.close()
|
||||
|
||||
|
||||
@router.get("/migrate/status", response_model=MigrationStatusResponse)
|
||||
async def migration_status():
|
||||
"""Get overall migration progress."""
|
||||
from compliance.services.pipeline_adapter import MigrationPasses
|
||||
|
||||
db = SessionLocal()
|
||||
try:
|
||||
migration = MigrationPasses(db=db)
|
||||
status = migration.migration_status()
|
||||
return MigrationStatusResponse(**status)
|
||||
except Exception as e:
|
||||
logger.error("Migration status failed: %s", e)
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
finally:
|
||||
db.close()
|
||||
|
||||
|
||||
@router.get("/migrate/decomposition-status", response_model=DecompositionStatusResponse)
|
||||
async def decomposition_status():
|
||||
"""Get decomposition progress (Pass 0a/0b)."""
|
||||
from compliance.services.decomposition_pass import DecompositionPass
|
||||
|
||||
db = SessionLocal()
|
||||
try:
|
||||
decomp = DecompositionPass(db=db)
|
||||
status = decomp.decomposition_status()
|
||||
return DecompositionStatusResponse(**status)
|
||||
except Exception as e:
|
||||
logger.error("Decomposition status failed: %s", e)
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
finally:
|
||||
db.close()
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# BATCH DEDUP ENDPOINTS
|
||||
# =============================================================================
|
||||
|
||||
|
||||
# Module-level runner reference for status polling
|
||||
_batch_dedup_runner = None
|
||||
|
||||
|
||||
@router.post("/migrate/batch-dedup", response_model=MigrationResponse)
|
||||
async def migrate_batch_dedup(
|
||||
dry_run: bool = Query(False, description="Preview mode — no DB changes"),
|
||||
hint_filter: Optional[str] = Query(None, description="Only process hints matching this prefix"),
|
||||
):
|
||||
"""Batch dedup: reduce ~85k Pass 0b controls to ~18-25k masters.
|
||||
|
||||
Phase 1: Groups by merge_group_hint, picks best quality master, links rest.
|
||||
Phase 2: Cross-group embedding search for semantically similar masters.
|
||||
"""
|
||||
global _batch_dedup_runner
|
||||
from compliance.services.batch_dedup_runner import BatchDedupRunner
|
||||
|
||||
db = SessionLocal()
|
||||
try:
|
||||
runner = BatchDedupRunner(db=db)
|
||||
_batch_dedup_runner = runner
|
||||
stats = await runner.run(dry_run=dry_run, hint_filter=hint_filter)
|
||||
return MigrationResponse(status="completed", stats=stats)
|
||||
except Exception as e:
|
||||
logger.error("Batch dedup failed: %s", e)
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
finally:
|
||||
_batch_dedup_runner = None
|
||||
db.close()
|
||||
|
||||
|
||||
@router.get("/migrate/batch-dedup/status")
|
||||
async def batch_dedup_status():
|
||||
"""Get current batch dedup progress (while running)."""
|
||||
if _batch_dedup_runner is not None:
|
||||
return {"running": True, **_batch_dedup_runner.get_status()}
|
||||
|
||||
# Not running — show DB stats
|
||||
db = SessionLocal()
|
||||
try:
|
||||
row = db.execute(text("""
|
||||
SELECT
|
||||
count(*) FILTER (WHERE decomposition_method = 'pass0b') AS total_pass0b,
|
||||
count(*) FILTER (WHERE decomposition_method = 'pass0b'
|
||||
AND release_state = 'duplicate') AS duplicates,
|
||||
count(*) FILTER (WHERE decomposition_method = 'pass0b'
|
||||
AND release_state != 'duplicate'
|
||||
AND release_state != 'deprecated') AS masters
|
||||
FROM canonical_controls
|
||||
""")).fetchone()
|
||||
review_count = db.execute(text(
|
||||
"SELECT count(*) FROM control_dedup_reviews WHERE review_status = 'pending'"
|
||||
)).fetchone()[0]
|
||||
return {
|
||||
"running": False,
|
||||
"total_pass0b": row[0],
|
||||
"duplicates": row[1],
|
||||
"masters": row[2],
|
||||
"pending_reviews": review_count,
|
||||
}
|
||||
finally:
|
||||
db.close()
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# HELPERS
|
||||
# =============================================================================
|
||||
|
||||
|
||||
def _get_pattern_control_counts() -> dict[str, int]:
|
||||
"""Get count of controls per pattern_id from DB."""
|
||||
db = SessionLocal()
|
||||
try:
|
||||
result = db.execute(text("""
|
||||
SELECT pattern_id, count(*) AS cnt
|
||||
FROM canonical_controls
|
||||
WHERE pattern_id IS NOT NULL AND pattern_id != ''
|
||||
AND release_state NOT IN ('deprecated')
|
||||
GROUP BY pattern_id
|
||||
"""))
|
||||
return {row[0]: row[1] for row in result.fetchall()}
|
||||
except Exception:
|
||||
return {}
|
||||
finally:
|
||||
db.close()
|
||||
@@ -5,16 +5,23 @@ Endpoints:
|
||||
- /dashboard: Main compliance dashboard
|
||||
- /dashboard/executive: Executive summary for managers
|
||||
- /dashboard/trend: Compliance score trend over time
|
||||
- /dashboard/roadmap: Prioritised controls in 4 buckets
|
||||
- /dashboard/module-status: Completion status of each SDK module
|
||||
- /dashboard/next-actions: Top 5 most important actions
|
||||
- /dashboard/snapshot: Save / query compliance score snapshots
|
||||
- /score: Quick compliance score
|
||||
- /reports: Report generation
|
||||
"""
|
||||
|
||||
import logging
|
||||
from datetime import datetime, timedelta, timezone
|
||||
from datetime import datetime, date, timedelta
|
||||
from calendar import month_abbr
|
||||
from typing import Optional
|
||||
from typing import Optional, Dict, Any, List
|
||||
from decimal import Decimal
|
||||
|
||||
from fastapi import APIRouter, Depends, HTTPException, Query
|
||||
from pydantic import BaseModel
|
||||
from sqlalchemy import text
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from classroom_engine.database import get_db
|
||||
@@ -25,15 +32,24 @@ from ..db import (
|
||||
ControlRepository,
|
||||
EvidenceRepository,
|
||||
RiskRepository,
|
||||
AssertionDB,
|
||||
)
|
||||
from .schemas import (
|
||||
DashboardResponse,
|
||||
MultiDimensionalScore,
|
||||
ExecutiveDashboardResponse,
|
||||
TrendDataPoint,
|
||||
RiskSummary,
|
||||
DeadlineItem,
|
||||
TeamWorkloadItem,
|
||||
TraceabilityAssertion,
|
||||
TraceabilityEvidence,
|
||||
TraceabilityCoverage,
|
||||
TraceabilityControl,
|
||||
TraceabilityMatrixResponse,
|
||||
)
|
||||
from .tenant_utils import get_tenant_id as _get_tenant_id
|
||||
from .db_utils import row_to_dict as _row_to_dict
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
router = APIRouter(tags=["compliance-dashboard"])
|
||||
@@ -86,6 +102,14 @@ async def get_dashboard(db: Session = Depends(get_db)):
|
||||
# or compute from by_status dict
|
||||
score = ctrl_stats.get("compliance_score", 0.0)
|
||||
|
||||
# Multi-dimensional score (Anti-Fake-Evidence)
|
||||
try:
|
||||
ms = ctrl_repo.get_multi_dimensional_score()
|
||||
multi_score = MultiDimensionalScore(**ms)
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to compute multi-dimensional score: {e}")
|
||||
multi_score = None
|
||||
|
||||
return DashboardResponse(
|
||||
compliance_score=round(score, 1),
|
||||
total_regulations=len(regulations),
|
||||
@@ -98,6 +122,7 @@ async def get_dashboard(db: Session = Depends(get_db)):
|
||||
total_risks=len(risks),
|
||||
risks_by_level=risks_by_level,
|
||||
recent_activity=[],
|
||||
multi_score=multi_score,
|
||||
)
|
||||
|
||||
|
||||
@@ -116,11 +141,18 @@ async def get_compliance_score(db: Session = Depends(get_db)):
|
||||
else:
|
||||
score = 0
|
||||
|
||||
# Multi-dimensional score (Anti-Fake-Evidence)
|
||||
try:
|
||||
multi_score = ctrl_repo.get_multi_dimensional_score()
|
||||
except Exception:
|
||||
multi_score = None
|
||||
|
||||
return {
|
||||
"score": round(score, 1),
|
||||
"total_controls": total,
|
||||
"passing_controls": passing,
|
||||
"partial_controls": partial,
|
||||
"multi_score": multi_score,
|
||||
}
|
||||
|
||||
|
||||
@@ -322,6 +354,424 @@ async def get_compliance_trend(
|
||||
}
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# Dashboard Extended — Roadmap, Module-Status, Next-Actions, Snapshots
|
||||
# ============================================================================
|
||||
|
||||
# Weight map for control prioritisation
|
||||
_PRIORITY_WEIGHTS = {"legal": 5, "security": 3, "best_practice": 1, "operational": 2}
|
||||
|
||||
# SDK module definitions → DB table used for counting completion
|
||||
_MODULE_DEFS: List[Dict[str, str]] = [
|
||||
{"key": "vvt", "label": "VVT", "table": "compliance_vvt_activities"},
|
||||
{"key": "tom", "label": "TOM", "table": "compliance_toms"},
|
||||
{"key": "dsfa", "label": "DSFA", "table": "compliance_dsfa_assessments"},
|
||||
{"key": "loeschfristen", "label": "Loeschfristen", "table": "compliance_loeschfristen"},
|
||||
{"key": "risks", "label": "Risiken", "table": "compliance_risks"},
|
||||
{"key": "controls", "label": "Controls", "table": "compliance_controls"},
|
||||
{"key": "evidence", "label": "Nachweise", "table": "compliance_evidence"},
|
||||
{"key": "obligations", "label": "Pflichten", "table": "compliance_obligations"},
|
||||
{"key": "incidents", "label": "Vorfaelle", "table": "compliance_notfallplan_incidents"},
|
||||
{"key": "vendor", "label": "Auftragsverarbeiter", "table": "compliance_vendor_assessments"},
|
||||
{"key": "legal_templates", "label": "Rechtl. Dokumente", "table": "compliance_legal_templates"},
|
||||
{"key": "training", "label": "Schulungen", "table": "training_modules"},
|
||||
{"key": "audit", "label": "Audit", "table": "compliance_audit_sessions"},
|
||||
{"key": "security_backlog", "label": "Security-Backlog", "table": "compliance_security_backlog"},
|
||||
{"key": "quality", "label": "Qualitaet", "table": "compliance_quality_items"},
|
||||
]
|
||||
|
||||
|
||||
@router.get("/dashboard/roadmap")
|
||||
async def get_dashboard_roadmap(
|
||||
db: Session = Depends(get_db),
|
||||
tenant_id: str = Depends(_get_tenant_id),
|
||||
):
|
||||
"""Prioritised controls in 4 buckets: Quick Wins, Must Have, Should Have, Nice to Have."""
|
||||
ctrl_repo = ControlRepository(db)
|
||||
controls = ctrl_repo.get_all()
|
||||
today = datetime.utcnow().date()
|
||||
|
||||
buckets: Dict[str, list] = {
|
||||
"quick_wins": [],
|
||||
"must_have": [],
|
||||
"should_have": [],
|
||||
"nice_to_have": [],
|
||||
}
|
||||
|
||||
for ctrl in controls:
|
||||
status = ctrl.status.value if ctrl.status else "planned"
|
||||
if status == "pass":
|
||||
continue # already done
|
||||
|
||||
weight = _PRIORITY_WEIGHTS.get(ctrl.category if hasattr(ctrl, "category") else "best_practice", 1)
|
||||
days_overdue = 0
|
||||
if ctrl.next_review_at:
|
||||
review_date = ctrl.next_review_at.date() if hasattr(ctrl.next_review_at, "date") else ctrl.next_review_at
|
||||
days_overdue = (today - review_date).days
|
||||
|
||||
urgency = weight * 2 + (1 if days_overdue > 0 else 0)
|
||||
|
||||
item = {
|
||||
"id": str(ctrl.id),
|
||||
"control_id": ctrl.control_id,
|
||||
"title": ctrl.title,
|
||||
"status": status,
|
||||
"domain": ctrl.domain.value if ctrl.domain else "unknown",
|
||||
"owner": ctrl.owner,
|
||||
"next_review_at": ctrl.next_review_at.isoformat() if ctrl.next_review_at else None,
|
||||
"days_overdue": max(0, days_overdue),
|
||||
"weight": weight,
|
||||
}
|
||||
|
||||
if weight >= 5 and days_overdue > 0:
|
||||
buckets["quick_wins"].append(item)
|
||||
elif weight >= 4:
|
||||
buckets["must_have"].append(item)
|
||||
elif weight >= 2:
|
||||
buckets["should_have"].append(item)
|
||||
else:
|
||||
buckets["nice_to_have"].append(item)
|
||||
|
||||
# Sort each bucket by urgency desc
|
||||
for key in buckets:
|
||||
buckets[key].sort(key=lambda x: x["days_overdue"], reverse=True)
|
||||
|
||||
return {
|
||||
"buckets": buckets,
|
||||
"counts": {k: len(v) for k, v in buckets.items()},
|
||||
"generated_at": datetime.utcnow().isoformat(),
|
||||
}
|
||||
|
||||
|
||||
@router.get("/dashboard/module-status")
|
||||
async def get_module_status(
|
||||
db: Session = Depends(get_db),
|
||||
tenant_id: str = Depends(_get_tenant_id),
|
||||
):
|
||||
"""Completion status for each SDK module based on DB record counts."""
|
||||
modules = []
|
||||
for mod in _MODULE_DEFS:
|
||||
try:
|
||||
row = db.execute(
|
||||
text(f"SELECT COUNT(*) FROM {mod['table']} WHERE tenant_id = :tid"),
|
||||
{"tid": tenant_id},
|
||||
).fetchone()
|
||||
count = int(row[0]) if row else 0
|
||||
except Exception:
|
||||
count = 0
|
||||
|
||||
# Simple heuristic: 0 = not started, 1-2 = in progress, 3+ = complete
|
||||
if count == 0:
|
||||
status = "not_started"
|
||||
progress = 0
|
||||
elif count < 3:
|
||||
status = "in_progress"
|
||||
progress = min(60, count * 30)
|
||||
else:
|
||||
status = "complete"
|
||||
progress = 100
|
||||
|
||||
modules.append({
|
||||
"key": mod["key"],
|
||||
"label": mod["label"],
|
||||
"count": count,
|
||||
"status": status,
|
||||
"progress": progress,
|
||||
})
|
||||
|
||||
started = sum(1 for m in modules if m["status"] != "not_started")
|
||||
complete = sum(1 for m in modules if m["status"] == "complete")
|
||||
|
||||
return {
|
||||
"modules": modules,
|
||||
"total": len(modules),
|
||||
"started": started,
|
||||
"complete": complete,
|
||||
"overall_progress": round((complete / len(modules)) * 100, 1) if modules else 0,
|
||||
}
|
||||
|
||||
|
||||
@router.get("/dashboard/next-actions")
|
||||
async def get_next_actions(
|
||||
limit: int = Query(5, ge=1, le=20),
|
||||
db: Session = Depends(get_db),
|
||||
tenant_id: str = Depends(_get_tenant_id),
|
||||
):
|
||||
"""Top N most important actions sorted by urgency*impact."""
|
||||
ctrl_repo = ControlRepository(db)
|
||||
controls = ctrl_repo.get_all()
|
||||
today = datetime.utcnow().date()
|
||||
|
||||
actions = []
|
||||
for ctrl in controls:
|
||||
status = ctrl.status.value if ctrl.status else "planned"
|
||||
if status == "pass":
|
||||
continue
|
||||
|
||||
days_overdue = 0
|
||||
if ctrl.next_review_at:
|
||||
review_date = ctrl.next_review_at.date() if hasattr(ctrl.next_review_at, "date") else ctrl.next_review_at
|
||||
days_overdue = max(0, (today - review_date).days)
|
||||
|
||||
weight = _PRIORITY_WEIGHTS.get(ctrl.category if hasattr(ctrl, "category") else "best_practice", 1)
|
||||
urgency_score = weight * 10 + days_overdue
|
||||
|
||||
actions.append({
|
||||
"id": str(ctrl.id),
|
||||
"control_id": ctrl.control_id,
|
||||
"title": ctrl.title,
|
||||
"status": status,
|
||||
"domain": ctrl.domain.value if ctrl.domain else "unknown",
|
||||
"owner": ctrl.owner,
|
||||
"days_overdue": days_overdue,
|
||||
"urgency_score": urgency_score,
|
||||
"reason": "Ueberfaellig" if days_overdue > 0 else "Offen",
|
||||
})
|
||||
|
||||
actions.sort(key=lambda x: x["urgency_score"], reverse=True)
|
||||
return {"actions": actions[:limit]}
|
||||
|
||||
|
||||
@router.post("/dashboard/snapshot")
|
||||
async def create_score_snapshot(
|
||||
db: Session = Depends(get_db),
|
||||
tenant_id: str = Depends(_get_tenant_id),
|
||||
):
|
||||
"""Save current compliance score as a historical snapshot."""
|
||||
ctrl_repo = ControlRepository(db)
|
||||
evidence_repo = EvidenceRepository(db)
|
||||
risk_repo = RiskRepository(db)
|
||||
|
||||
ctrl_stats = ctrl_repo.get_statistics()
|
||||
evidence_stats = evidence_repo.get_statistics()
|
||||
risks = risk_repo.get_all()
|
||||
|
||||
total = ctrl_stats.get("total", 0)
|
||||
passing = ctrl_stats.get("pass", 0)
|
||||
partial = ctrl_stats.get("partial", 0)
|
||||
score = round(((passing + partial * 0.5) / total) * 100, 2) if total > 0 else 0
|
||||
|
||||
risks_high = sum(1 for r in risks if (r.inherent_risk.value if r.inherent_risk else "low") in ("high", "critical"))
|
||||
|
||||
today = date.today()
|
||||
|
||||
row = db.execute(text("""
|
||||
INSERT INTO compliance_score_snapshots (
|
||||
tenant_id, score, controls_total, controls_pass, controls_partial,
|
||||
evidence_total, evidence_valid, risks_total, risks_high, snapshot_date
|
||||
) VALUES (
|
||||
:tenant_id, :score, :controls_total, :controls_pass, :controls_partial,
|
||||
:evidence_total, :evidence_valid, :risks_total, :risks_high, :snapshot_date
|
||||
)
|
||||
ON CONFLICT (tenant_id, project_id, snapshot_date) DO UPDATE SET
|
||||
score = EXCLUDED.score,
|
||||
controls_total = EXCLUDED.controls_total,
|
||||
controls_pass = EXCLUDED.controls_pass,
|
||||
controls_partial = EXCLUDED.controls_partial,
|
||||
evidence_total = EXCLUDED.evidence_total,
|
||||
evidence_valid = EXCLUDED.evidence_valid,
|
||||
risks_total = EXCLUDED.risks_total,
|
||||
risks_high = EXCLUDED.risks_high
|
||||
RETURNING *
|
||||
"""), {
|
||||
"tenant_id": tenant_id,
|
||||
"score": score,
|
||||
"controls_total": total,
|
||||
"controls_pass": passing,
|
||||
"controls_partial": partial,
|
||||
"evidence_total": evidence_stats.get("total", 0),
|
||||
"evidence_valid": evidence_stats.get("by_status", {}).get("valid", 0),
|
||||
"risks_total": len(risks),
|
||||
"risks_high": risks_high,
|
||||
"snapshot_date": today,
|
||||
}).fetchone()
|
||||
db.commit()
|
||||
|
||||
return _row_to_dict(row)
|
||||
|
||||
|
||||
@router.get("/dashboard/score-history")
|
||||
async def get_score_history(
|
||||
months: int = Query(12, ge=1, le=36),
|
||||
db: Session = Depends(get_db),
|
||||
tenant_id: str = Depends(_get_tenant_id),
|
||||
):
|
||||
"""Get compliance score history from snapshots."""
|
||||
since = date.today() - timedelta(days=months * 30)
|
||||
|
||||
rows = db.execute(text("""
|
||||
SELECT * FROM compliance_score_snapshots
|
||||
WHERE tenant_id = :tenant_id AND snapshot_date >= :since
|
||||
ORDER BY snapshot_date ASC
|
||||
"""), {"tenant_id": tenant_id, "since": since}).fetchall()
|
||||
|
||||
snapshots = []
|
||||
for r in rows:
|
||||
d = _row_to_dict(r)
|
||||
# Convert Decimal to float for JSON
|
||||
if isinstance(d.get("score"), Decimal):
|
||||
d["score"] = float(d["score"])
|
||||
snapshots.append(d)
|
||||
|
||||
return {
|
||||
"snapshots": snapshots,
|
||||
"total": len(snapshots),
|
||||
"period_months": months,
|
||||
}
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# Evidence Distribution (Anti-Fake-Evidence Phase 3)
|
||||
# ============================================================================
|
||||
|
||||
@router.get("/dashboard/evidence-distribution")
|
||||
async def get_evidence_distribution(
|
||||
db: Session = Depends(get_db),
|
||||
tenant_id: str = Depends(_get_tenant_id),
|
||||
):
|
||||
"""Evidence counts by confidence level and four-eyes status."""
|
||||
evidence_repo = EvidenceRepository(db)
|
||||
all_evidence = evidence_repo.get_all()
|
||||
|
||||
by_confidence = {"E0": 0, "E1": 0, "E2": 0, "E3": 0, "E4": 0}
|
||||
four_eyes_pending = 0
|
||||
|
||||
for e in all_evidence:
|
||||
level = e.confidence_level.value if e.confidence_level else "E1"
|
||||
if level in by_confidence:
|
||||
by_confidence[level] += 1
|
||||
if e.requires_four_eyes and e.approval_status not in ("approved", "rejected"):
|
||||
four_eyes_pending += 1
|
||||
|
||||
return {
|
||||
"by_confidence": by_confidence,
|
||||
"four_eyes_pending": four_eyes_pending,
|
||||
"total": len(all_evidence),
|
||||
}
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# Traceability Matrix (Anti-Fake-Evidence Phase 4a)
|
||||
# ============================================================================
|
||||
|
||||
@router.get("/dashboard/traceability-matrix", response_model=TraceabilityMatrixResponse)
|
||||
async def get_traceability_matrix(
|
||||
db: Session = Depends(get_db),
|
||||
tenant_id: str = Depends(_get_tenant_id),
|
||||
):
|
||||
"""
|
||||
Full traceability chain: Control → Evidence → Assertions.
|
||||
|
||||
Loads each entity set once, builds in-memory indices, and nests
|
||||
the result so the frontend can render a matrix view.
|
||||
"""
|
||||
ctrl_repo = ControlRepository(db)
|
||||
evidence_repo = EvidenceRepository(db)
|
||||
|
||||
# 1. Load all three entity sets
|
||||
controls = ctrl_repo.get_all()
|
||||
all_evidence = evidence_repo.get_all()
|
||||
all_assertions = db.query(AssertionDB).filter(
|
||||
AssertionDB.entity_type == "evidence",
|
||||
).all()
|
||||
|
||||
# 2. Index assertions by evidence_id (entity_id)
|
||||
assertions_by_evidence: Dict[str, list] = {}
|
||||
for a in all_assertions:
|
||||
assertions_by_evidence.setdefault(a.entity_id, []).append(a)
|
||||
|
||||
# 3. Index evidence by control_id
|
||||
evidence_by_control: Dict[str, list] = {}
|
||||
for e in all_evidence:
|
||||
evidence_by_control.setdefault(str(e.control_id), []).append(e)
|
||||
|
||||
# 4. Build nested response
|
||||
result_controls: list = []
|
||||
total_controls = 0
|
||||
covered_controls = 0
|
||||
fully_verified = 0
|
||||
|
||||
for ctrl in controls:
|
||||
total_controls += 1
|
||||
ctrl_id = str(ctrl.id)
|
||||
ctrl_evidence = evidence_by_control.get(ctrl_id, [])
|
||||
|
||||
nested_evidence: list = []
|
||||
has_evidence = len(ctrl_evidence) > 0
|
||||
has_assertions = False
|
||||
all_verified = True
|
||||
min_conf: Optional[str] = None
|
||||
conf_order = {"E0": 0, "E1": 1, "E2": 2, "E3": 3, "E4": 4}
|
||||
|
||||
for e in ctrl_evidence:
|
||||
ev_id = str(e.id)
|
||||
ev_assertions = assertions_by_evidence.get(ev_id, [])
|
||||
|
||||
nested_assertions = [
|
||||
TraceabilityAssertion(
|
||||
id=str(a.id),
|
||||
sentence_text=a.sentence_text,
|
||||
assertion_type=a.assertion_type or "assertion",
|
||||
confidence=a.confidence or 0.0,
|
||||
verified=a.verified_by is not None,
|
||||
)
|
||||
for a in ev_assertions
|
||||
]
|
||||
|
||||
if nested_assertions:
|
||||
has_assertions = True
|
||||
for na in nested_assertions:
|
||||
if not na.verified:
|
||||
all_verified = False
|
||||
|
||||
conf = e.confidence_level.value if e.confidence_level else "E1"
|
||||
if min_conf is None or conf_order.get(conf, 1) < conf_order.get(min_conf, 1):
|
||||
min_conf = conf
|
||||
|
||||
nested_evidence.append(TraceabilityEvidence(
|
||||
id=ev_id,
|
||||
title=e.title,
|
||||
evidence_type=e.evidence_type,
|
||||
confidence_level=conf,
|
||||
status=e.status.value if e.status else "valid",
|
||||
assertions=nested_assertions,
|
||||
))
|
||||
|
||||
if not has_assertions:
|
||||
all_verified = False
|
||||
|
||||
if has_evidence:
|
||||
covered_controls += 1
|
||||
if has_evidence and has_assertions and all_verified:
|
||||
fully_verified += 1
|
||||
|
||||
coverage = TraceabilityCoverage(
|
||||
has_evidence=has_evidence,
|
||||
has_assertions=has_assertions,
|
||||
all_assertions_verified=all_verified,
|
||||
min_confidence_level=min_conf,
|
||||
)
|
||||
|
||||
result_controls.append(TraceabilityControl(
|
||||
id=ctrl_id,
|
||||
control_id=ctrl.control_id,
|
||||
title=ctrl.title,
|
||||
status=ctrl.status.value if ctrl.status else "planned",
|
||||
domain=ctrl.domain.value if ctrl.domain else "unknown",
|
||||
evidence=nested_evidence,
|
||||
coverage=coverage,
|
||||
))
|
||||
|
||||
summary = {
|
||||
"total_controls": total_controls,
|
||||
"covered_controls": covered_controls,
|
||||
"fully_verified": fully_verified,
|
||||
"uncovered_controls": total_controls - covered_controls,
|
||||
}
|
||||
|
||||
return TraceabilityMatrixResponse(controls=result_controls, summary=summary)
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# Reports
|
||||
# ============================================================================
|
||||
|
||||
@@ -60,10 +60,314 @@ def get_dsfa_service(db: Session = Depends(get_db)) -> DSFAService:
|
||||
return DSFAService(db)
|
||||
|
||||
|
||||
def get_workflow_service(
|
||||
db: Session = Depends(get_db),
|
||||
) -> DSFAWorkflowService:
|
||||
return DSFAWorkflowService(db)
|
||||
# =============================================================================
|
||||
# Pydantic Schemas
|
||||
# =============================================================================
|
||||
|
||||
class DSFACreate(BaseModel):
|
||||
title: str
|
||||
description: str = ""
|
||||
status: str = "draft"
|
||||
risk_level: str = "low"
|
||||
processing_activity: str = ""
|
||||
data_categories: List[str] = []
|
||||
recipients: List[str] = []
|
||||
measures: List[str] = []
|
||||
created_by: str = "system"
|
||||
# Section 1
|
||||
processing_description: Optional[str] = None
|
||||
processing_purpose: Optional[str] = None
|
||||
legal_basis: Optional[str] = None
|
||||
legal_basis_details: Optional[str] = None
|
||||
# Section 2
|
||||
necessity_assessment: Optional[str] = None
|
||||
proportionality_assessment: Optional[str] = None
|
||||
data_minimization: Optional[str] = None
|
||||
alternatives_considered: Optional[str] = None
|
||||
retention_justification: Optional[str] = None
|
||||
# Section 3
|
||||
involves_ai: Optional[bool] = None
|
||||
overall_risk_level: Optional[str] = None
|
||||
risk_score: Optional[int] = None
|
||||
# Section 6
|
||||
dpo_consulted: Optional[bool] = None
|
||||
dpo_name: Optional[str] = None
|
||||
dpo_opinion: Optional[str] = None
|
||||
dpo_approved: Optional[bool] = None
|
||||
authority_consulted: Optional[bool] = None
|
||||
authority_reference: Optional[str] = None
|
||||
authority_decision: Optional[str] = None
|
||||
# Metadata
|
||||
version: Optional[int] = None
|
||||
conclusion: Optional[str] = None
|
||||
federal_state: Optional[str] = None
|
||||
authority_resource_id: Optional[str] = None
|
||||
submitted_by: Optional[str] = None
|
||||
# JSONB Arrays
|
||||
data_subjects: Optional[List[str]] = None
|
||||
affected_rights: Optional[List[str]] = None
|
||||
triggered_rule_codes: Optional[List[str]] = None
|
||||
ai_trigger_ids: Optional[List[str]] = None
|
||||
wp248_criteria_met: Optional[List[str]] = None
|
||||
art35_abs3_triggered: Optional[List[str]] = None
|
||||
tom_references: Optional[List[str]] = None
|
||||
risks: Optional[List[dict]] = None
|
||||
mitigations: Optional[List[dict]] = None
|
||||
stakeholder_consultations: Optional[List[dict]] = None
|
||||
review_triggers: Optional[List[dict]] = None
|
||||
review_comments: Optional[List[dict]] = None
|
||||
ai_use_case_modules: Optional[List[dict]] = None
|
||||
section_8_complete: Optional[bool] = None
|
||||
# JSONB Objects
|
||||
threshold_analysis: Optional[dict] = None
|
||||
consultation_requirement: Optional[dict] = None
|
||||
review_schedule: Optional[dict] = None
|
||||
section_progress: Optional[dict] = None
|
||||
metadata: Optional[dict] = None
|
||||
|
||||
|
||||
class DSFAUpdate(BaseModel):
|
||||
title: Optional[str] = None
|
||||
description: Optional[str] = None
|
||||
status: Optional[str] = None
|
||||
risk_level: Optional[str] = None
|
||||
processing_activity: Optional[str] = None
|
||||
data_categories: Optional[List[str]] = None
|
||||
recipients: Optional[List[str]] = None
|
||||
measures: Optional[List[str]] = None
|
||||
approved_by: Optional[str] = None
|
||||
# Section 1
|
||||
processing_description: Optional[str] = None
|
||||
processing_purpose: Optional[str] = None
|
||||
legal_basis: Optional[str] = None
|
||||
legal_basis_details: Optional[str] = None
|
||||
# Section 2
|
||||
necessity_assessment: Optional[str] = None
|
||||
proportionality_assessment: Optional[str] = None
|
||||
data_minimization: Optional[str] = None
|
||||
alternatives_considered: Optional[str] = None
|
||||
retention_justification: Optional[str] = None
|
||||
# Section 3
|
||||
involves_ai: Optional[bool] = None
|
||||
overall_risk_level: Optional[str] = None
|
||||
risk_score: Optional[int] = None
|
||||
# Section 6
|
||||
dpo_consulted: Optional[bool] = None
|
||||
dpo_name: Optional[str] = None
|
||||
dpo_opinion: Optional[str] = None
|
||||
dpo_approved: Optional[bool] = None
|
||||
authority_consulted: Optional[bool] = None
|
||||
authority_reference: Optional[str] = None
|
||||
authority_decision: Optional[str] = None
|
||||
# Metadata
|
||||
version: Optional[int] = None
|
||||
conclusion: Optional[str] = None
|
||||
federal_state: Optional[str] = None
|
||||
authority_resource_id: Optional[str] = None
|
||||
submitted_by: Optional[str] = None
|
||||
# JSONB Arrays
|
||||
data_subjects: Optional[List[str]] = None
|
||||
affected_rights: Optional[List[str]] = None
|
||||
triggered_rule_codes: Optional[List[str]] = None
|
||||
ai_trigger_ids: Optional[List[str]] = None
|
||||
wp248_criteria_met: Optional[List[str]] = None
|
||||
art35_abs3_triggered: Optional[List[str]] = None
|
||||
tom_references: Optional[List[str]] = None
|
||||
risks: Optional[List[dict]] = None
|
||||
mitigations: Optional[List[dict]] = None
|
||||
stakeholder_consultations: Optional[List[dict]] = None
|
||||
review_triggers: Optional[List[dict]] = None
|
||||
review_comments: Optional[List[dict]] = None
|
||||
ai_use_case_modules: Optional[List[dict]] = None
|
||||
section_8_complete: Optional[bool] = None
|
||||
# JSONB Objects
|
||||
threshold_analysis: Optional[dict] = None
|
||||
consultation_requirement: Optional[dict] = None
|
||||
review_schedule: Optional[dict] = None
|
||||
section_progress: Optional[dict] = None
|
||||
metadata: Optional[dict] = None
|
||||
|
||||
|
||||
class DSFAStatusUpdate(BaseModel):
|
||||
status: str
|
||||
approved_by: Optional[str] = None
|
||||
|
||||
|
||||
class DSFASectionUpdate(BaseModel):
|
||||
"""Body for PUT /dsfa/{id}/sections/{section_number}."""
|
||||
content: Optional[str] = None
|
||||
# Allow arbitrary extra fields so the frontend can send any section-specific data
|
||||
extra: Optional[dict] = None
|
||||
|
||||
|
||||
class DSFAApproveRequest(BaseModel):
|
||||
"""Body for POST /dsfa/{id}/approve."""
|
||||
approved: bool
|
||||
comments: Optional[str] = None
|
||||
approved_by: Optional[str] = None
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# Helpers
|
||||
# =============================================================================
|
||||
|
||||
def _get_tenant_id(tenant_id: Optional[str]) -> str:
|
||||
return tenant_id or DEFAULT_TENANT_ID
|
||||
|
||||
|
||||
def _dsfa_to_response(row) -> dict:
|
||||
"""Convert a DB row to a JSON-serializable dict."""
|
||||
import json
|
||||
# SQLAlchemy 2.0: Row objects need ._mapping for string-key access
|
||||
if hasattr(row, "_mapping"):
|
||||
row = row._mapping
|
||||
|
||||
def _parse_arr(val):
|
||||
"""Parse a JSONB array field → list."""
|
||||
if val is None:
|
||||
return []
|
||||
if isinstance(val, list):
|
||||
return val
|
||||
if isinstance(val, str):
|
||||
try:
|
||||
parsed = json.loads(val)
|
||||
return parsed if isinstance(parsed, list) else []
|
||||
except Exception:
|
||||
return []
|
||||
return val
|
||||
|
||||
def _parse_obj(val):
|
||||
"""Parse a JSONB object field → dict."""
|
||||
if val is None:
|
||||
return {}
|
||||
if isinstance(val, dict):
|
||||
return val
|
||||
if isinstance(val, str):
|
||||
try:
|
||||
parsed = json.loads(val)
|
||||
return parsed if isinstance(parsed, dict) else {}
|
||||
except Exception:
|
||||
return {}
|
||||
return val
|
||||
|
||||
def _ts(val):
|
||||
"""Timestamp → ISO string or None."""
|
||||
if not val:
|
||||
return None
|
||||
if isinstance(val, str):
|
||||
return val
|
||||
return val.isoformat()
|
||||
|
||||
def _get(key, default=None):
|
||||
"""Safe row access — returns default if key missing (handles old rows)."""
|
||||
try:
|
||||
v = row[key]
|
||||
return default if v is None and default is not None else v
|
||||
except (KeyError, IndexError):
|
||||
return default
|
||||
|
||||
return {
|
||||
# Core fields (always present since Migration 024)
|
||||
"id": str(row["id"]),
|
||||
"tenant_id": row["tenant_id"],
|
||||
"title": row["title"],
|
||||
"description": row["description"] or "",
|
||||
"status": row["status"] or "draft",
|
||||
"risk_level": row["risk_level"] or "low",
|
||||
"processing_activity": row["processing_activity"] or "",
|
||||
"data_categories": _parse_arr(row["data_categories"]),
|
||||
"recipients": _parse_arr(row["recipients"]),
|
||||
"measures": _parse_arr(row["measures"]),
|
||||
"approved_by": row["approved_by"],
|
||||
"approved_at": _ts(row["approved_at"]),
|
||||
"created_by": row["created_by"] or "system",
|
||||
"created_at": _ts(row["created_at"]),
|
||||
"updated_at": _ts(row["updated_at"]),
|
||||
# Section 1 (Migration 030)
|
||||
"processing_description": _get("processing_description"),
|
||||
"processing_purpose": _get("processing_purpose"),
|
||||
"legal_basis": _get("legal_basis"),
|
||||
"legal_basis_details": _get("legal_basis_details"),
|
||||
# Section 2
|
||||
"necessity_assessment": _get("necessity_assessment"),
|
||||
"proportionality_assessment": _get("proportionality_assessment"),
|
||||
"data_minimization": _get("data_minimization"),
|
||||
"alternatives_considered": _get("alternatives_considered"),
|
||||
"retention_justification": _get("retention_justification"),
|
||||
# Section 3
|
||||
"involves_ai": _get("involves_ai", False),
|
||||
"overall_risk_level": _get("overall_risk_level"),
|
||||
"risk_score": _get("risk_score", 0),
|
||||
# Section 6
|
||||
"dpo_consulted": _get("dpo_consulted", False),
|
||||
"dpo_consulted_at": _ts(_get("dpo_consulted_at")),
|
||||
"dpo_name": _get("dpo_name"),
|
||||
"dpo_opinion": _get("dpo_opinion"),
|
||||
"dpo_approved": _get("dpo_approved"),
|
||||
"authority_consulted": _get("authority_consulted", False),
|
||||
"authority_consulted_at": _ts(_get("authority_consulted_at")),
|
||||
"authority_reference": _get("authority_reference"),
|
||||
"authority_decision": _get("authority_decision"),
|
||||
# Metadata / Versioning
|
||||
"version": _get("version", 1),
|
||||
"previous_version_id": str(_get("previous_version_id")) if _get("previous_version_id") else None,
|
||||
"conclusion": _get("conclusion"),
|
||||
"federal_state": _get("federal_state"),
|
||||
"authority_resource_id": _get("authority_resource_id"),
|
||||
"submitted_for_review_at": _ts(_get("submitted_for_review_at")),
|
||||
"submitted_by": _get("submitted_by"),
|
||||
# JSONB Arrays
|
||||
"data_subjects": _parse_arr(_get("data_subjects")),
|
||||
"affected_rights": _parse_arr(_get("affected_rights")),
|
||||
"triggered_rule_codes": _parse_arr(_get("triggered_rule_codes")),
|
||||
"ai_trigger_ids": _parse_arr(_get("ai_trigger_ids")),
|
||||
"wp248_criteria_met": _parse_arr(_get("wp248_criteria_met")),
|
||||
"art35_abs3_triggered": _parse_arr(_get("art35_abs3_triggered")),
|
||||
"tom_references": _parse_arr(_get("tom_references")),
|
||||
"risks": _parse_arr(_get("risks")),
|
||||
"mitigations": _parse_arr(_get("mitigations")),
|
||||
"stakeholder_consultations": _parse_arr(_get("stakeholder_consultations")),
|
||||
"review_triggers": _parse_arr(_get("review_triggers")),
|
||||
"review_comments": _parse_arr(_get("review_comments")),
|
||||
# Section 8 / AI (Migration 028)
|
||||
"ai_use_case_modules": _parse_arr(_get("ai_use_case_modules")),
|
||||
"section_8_complete": _get("section_8_complete", False),
|
||||
# JSONB Objects
|
||||
"threshold_analysis": _parse_obj(_get("threshold_analysis")),
|
||||
"consultation_requirement": _parse_obj(_get("consultation_requirement")),
|
||||
"review_schedule": _parse_obj(_get("review_schedule")),
|
||||
"section_progress": _parse_obj(_get("section_progress")),
|
||||
"metadata": _parse_obj(_get("metadata")),
|
||||
}
|
||||
|
||||
|
||||
def _log_audit(
|
||||
db: Session,
|
||||
tenant_id: str,
|
||||
dsfa_id,
|
||||
action: str,
|
||||
changed_by: str = "system",
|
||||
old_values=None,
|
||||
new_values=None,
|
||||
):
|
||||
import json
|
||||
db.execute(
|
||||
text("""
|
||||
INSERT INTO compliance_dsfa_audit_log
|
||||
(tenant_id, dsfa_id, action, changed_by, old_values, new_values)
|
||||
VALUES
|
||||
(:tenant_id, :dsfa_id, :action, :changed_by,
|
||||
CAST(:old_values AS jsonb), CAST(:new_values AS jsonb))
|
||||
"""),
|
||||
{
|
||||
"tenant_id": tenant_id,
|
||||
"dsfa_id": str(dsfa_id) if dsfa_id else None,
|
||||
"action": action,
|
||||
"changed_by": changed_by,
|
||||
"old_values": json.dumps(old_values) if old_values else None,
|
||||
"new_values": json.dumps(new_values) if new_values else None,
|
||||
},
|
||||
)
|
||||
|
||||
|
||||
# =============================================================================
|
||||
@@ -177,8 +481,51 @@ async def create_dsfa(
|
||||
service: DSFAService = Depends(get_dsfa_service),
|
||||
) -> dict[str, Any]:
|
||||
"""Neue DSFA erstellen."""
|
||||
with translate_domain_errors():
|
||||
return service.create(tenant_id, request)
|
||||
import json
|
||||
|
||||
if request.status not in VALID_STATUSES:
|
||||
raise HTTPException(status_code=422, detail=f"Ungültiger Status: {request.status}")
|
||||
if request.risk_level not in VALID_RISK_LEVELS:
|
||||
raise HTTPException(status_code=422, detail=f"Ungültiges Risiko-Level: {request.risk_level}")
|
||||
|
||||
tid = _get_tenant_id(tenant_id)
|
||||
|
||||
row = db.execute(
|
||||
text("""
|
||||
INSERT INTO compliance_dsfas
|
||||
(tenant_id, title, description, status, risk_level,
|
||||
processing_activity, data_categories, recipients, measures, created_by)
|
||||
VALUES
|
||||
(:tenant_id, :title, :description, :status, :risk_level,
|
||||
:processing_activity,
|
||||
CAST(:data_categories AS jsonb),
|
||||
CAST(:recipients AS jsonb),
|
||||
CAST(:measures AS jsonb),
|
||||
:created_by)
|
||||
RETURNING *
|
||||
"""),
|
||||
{
|
||||
"tenant_id": tid,
|
||||
"title": request.title,
|
||||
"description": request.description,
|
||||
"status": request.status,
|
||||
"risk_level": request.risk_level,
|
||||
"processing_activity": request.processing_activity,
|
||||
"data_categories": json.dumps(request.data_categories),
|
||||
"recipients": json.dumps(request.recipients),
|
||||
"measures": json.dumps(request.measures),
|
||||
"created_by": request.created_by,
|
||||
},
|
||||
).fetchone()
|
||||
|
||||
db.flush()
|
||||
row_id = row._mapping["id"] if hasattr(row, "_mapping") else row[0]
|
||||
_log_audit(
|
||||
db, tid, row_id, "CREATE", request.created_by,
|
||||
new_values={"title": request.title, "status": request.status},
|
||||
)
|
||||
db.commit()
|
||||
return _dsfa_to_response(row)
|
||||
|
||||
|
||||
# =============================================================================
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -22,23 +22,21 @@ from fastapi import APIRouter, Depends, File, HTTPException, Query, UploadFile
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from classroom_engine.database import get_db
|
||||
from compliance.api._http_errors import translate_domain_errors
|
||||
from compliance.db import ControlRepository, EvidenceRepository
|
||||
from compliance.schemas.evidence import (
|
||||
EvidenceCreate,
|
||||
EvidenceListResponse,
|
||||
EvidenceResponse,
|
||||
|
||||
from ..db import (
|
||||
ControlRepository,
|
||||
EvidenceRepository,
|
||||
EvidenceStatusEnum,
|
||||
EvidenceConfidenceEnum,
|
||||
EvidenceTruthStatusEnum,
|
||||
)
|
||||
from compliance.services.auto_risk_updater import AutoRiskUpdater
|
||||
from compliance.domain import NotFoundError, ValidationError
|
||||
from compliance.services.evidence_service import (
|
||||
SOURCE_CONTROL_MAP,
|
||||
EvidenceService,
|
||||
_extract_findings_detail, # re-exported for legacy test imports
|
||||
_parse_ci_evidence, # re-exported for legacy test imports
|
||||
_store_evidence, # re-exported for legacy test imports
|
||||
_update_risks as _update_risks_impl,
|
||||
from ..db.models import EvidenceDB, ControlDB, AuditTrailDB
|
||||
from ..services.auto_risk_updater import AutoRiskUpdater
|
||||
from .schemas import (
|
||||
EvidenceCreate, EvidenceResponse, EvidenceListResponse,
|
||||
EvidenceRejectRequest,
|
||||
)
|
||||
from .audit_trail_utils import log_audit_trail
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
router = APIRouter(tags=["compliance-evidence"])
|
||||
@@ -56,7 +54,88 @@ def get_evidence_service(db: Session = Depends(get_db)) -> EvidenceService:
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# Evidence CRUD
|
||||
# Anti-Fake-Evidence: Four-Eyes Domain Check
|
||||
# ============================================================================
|
||||
|
||||
FOUR_EYES_DOMAINS = {"gov", "priv"}
|
||||
|
||||
|
||||
def _requires_four_eyes(control_domain: str) -> bool:
|
||||
"""Controls in governance/privacy domains require two independent reviewers."""
|
||||
return control_domain in FOUR_EYES_DOMAINS
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# Anti-Fake-Evidence: Auto-Classification Helpers
|
||||
# ============================================================================
|
||||
|
||||
def _classify_confidence(source: Optional[str], evidence_type: Optional[str] = None, artifact_hash: Optional[str] = None) -> EvidenceConfidenceEnum:
|
||||
"""Classify evidence confidence level based on source and metadata."""
|
||||
if source == "ci_pipeline":
|
||||
return EvidenceConfidenceEnum.E3
|
||||
if source == "api" and artifact_hash:
|
||||
return EvidenceConfidenceEnum.E3
|
||||
if source == "api":
|
||||
return EvidenceConfidenceEnum.E3
|
||||
if source in ("manual", "upload"):
|
||||
return EvidenceConfidenceEnum.E1
|
||||
if source == "generated":
|
||||
return EvidenceConfidenceEnum.E0
|
||||
# Default for unknown sources
|
||||
return EvidenceConfidenceEnum.E1
|
||||
|
||||
|
||||
def _classify_truth_status(source: Optional[str]) -> EvidenceTruthStatusEnum:
|
||||
"""Classify evidence truth status based on source."""
|
||||
if source == "ci_pipeline":
|
||||
return EvidenceTruthStatusEnum.OBSERVED
|
||||
if source in ("manual", "upload"):
|
||||
return EvidenceTruthStatusEnum.UPLOADED
|
||||
if source == "generated":
|
||||
return EvidenceTruthStatusEnum.GENERATED
|
||||
if source == "api":
|
||||
return EvidenceTruthStatusEnum.OBSERVED
|
||||
return EvidenceTruthStatusEnum.UPLOADED
|
||||
|
||||
|
||||
def _build_evidence_response(e: EvidenceDB) -> EvidenceResponse:
|
||||
"""Build an EvidenceResponse from an EvidenceDB, including anti-fake fields."""
|
||||
return EvidenceResponse(
|
||||
id=e.id,
|
||||
control_id=e.control_id,
|
||||
evidence_type=e.evidence_type,
|
||||
title=e.title,
|
||||
description=e.description,
|
||||
artifact_path=e.artifact_path,
|
||||
artifact_url=e.artifact_url,
|
||||
artifact_hash=e.artifact_hash,
|
||||
file_size_bytes=e.file_size_bytes,
|
||||
mime_type=e.mime_type,
|
||||
valid_from=e.valid_from,
|
||||
valid_until=e.valid_until,
|
||||
status=e.status.value if e.status else None,
|
||||
source=e.source,
|
||||
ci_job_id=e.ci_job_id,
|
||||
uploaded_by=e.uploaded_by,
|
||||
collected_at=e.collected_at,
|
||||
created_at=e.created_at,
|
||||
confidence_level=e.confidence_level.value if e.confidence_level else None,
|
||||
truth_status=e.truth_status.value if e.truth_status else None,
|
||||
generation_mode=e.generation_mode,
|
||||
may_be_used_as_evidence=e.may_be_used_as_evidence,
|
||||
reviewed_by=e.reviewed_by,
|
||||
reviewed_at=e.reviewed_at,
|
||||
approval_status=e.approval_status,
|
||||
first_reviewer=e.first_reviewer,
|
||||
first_reviewed_at=e.first_reviewed_at,
|
||||
second_reviewer=e.second_reviewer,
|
||||
second_reviewed_at=e.second_reviewed_at,
|
||||
requires_four_eyes=e.requires_four_eyes,
|
||||
)
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# Evidence
|
||||
# ============================================================================
|
||||
|
||||
@router.get("/evidence", response_model=EvidenceListResponse)
|
||||
@@ -69,8 +148,38 @@ async def list_evidence(
|
||||
service: EvidenceService = Depends(get_evidence_service),
|
||||
) -> EvidenceListResponse:
|
||||
"""List evidence with optional filters and pagination."""
|
||||
with translate_domain_errors():
|
||||
return service.list_evidence(control_id, evidence_type, status, page, limit)
|
||||
repo = EvidenceRepository(db)
|
||||
|
||||
if control_id:
|
||||
# First get the control UUID
|
||||
ctrl_repo = ControlRepository(db)
|
||||
control = ctrl_repo.get_by_control_id(control_id)
|
||||
if not control:
|
||||
raise HTTPException(status_code=404, detail=f"Control {control_id} not found")
|
||||
evidence = repo.get_by_control(control.id)
|
||||
else:
|
||||
evidence = repo.get_all()
|
||||
|
||||
if evidence_type:
|
||||
evidence = [e for e in evidence if e.evidence_type == evidence_type]
|
||||
|
||||
if status:
|
||||
try:
|
||||
status_enum = EvidenceStatusEnum(status)
|
||||
evidence = [e for e in evidence if e.status == status_enum]
|
||||
except ValueError:
|
||||
pass
|
||||
|
||||
total = len(evidence)
|
||||
|
||||
# Apply pagination if requested
|
||||
if page is not None and limit is not None:
|
||||
offset = (page - 1) * limit
|
||||
evidence = evidence[offset:offset + limit]
|
||||
|
||||
results = [_build_evidence_response(e) for e in evidence]
|
||||
|
||||
return EvidenceListResponse(evidence=results, total=total)
|
||||
|
||||
|
||||
@router.post("/evidence", response_model=EvidenceResponse)
|
||||
@@ -79,8 +188,66 @@ async def create_evidence(
|
||||
service: EvidenceService = Depends(get_evidence_service),
|
||||
) -> EvidenceResponse:
|
||||
"""Create new evidence record."""
|
||||
with translate_domain_errors():
|
||||
return service.create_evidence(evidence_data)
|
||||
repo = EvidenceRepository(db)
|
||||
|
||||
# Get control UUID
|
||||
ctrl_repo = ControlRepository(db)
|
||||
control = ctrl_repo.get_by_control_id(evidence_data.control_id)
|
||||
if not control:
|
||||
raise HTTPException(status_code=404, detail=f"Control {evidence_data.control_id} not found")
|
||||
|
||||
source = evidence_data.source or "api"
|
||||
confidence = _classify_confidence(source, evidence_data.evidence_type)
|
||||
truth = _classify_truth_status(source)
|
||||
|
||||
# Allow explicit override from request
|
||||
if evidence_data.confidence_level:
|
||||
try:
|
||||
confidence = EvidenceConfidenceEnum(evidence_data.confidence_level)
|
||||
except ValueError:
|
||||
pass
|
||||
if evidence_data.truth_status:
|
||||
try:
|
||||
truth = EvidenceTruthStatusEnum(evidence_data.truth_status)
|
||||
except ValueError:
|
||||
pass
|
||||
|
||||
evidence = repo.create(
|
||||
control_id=control.id,
|
||||
evidence_type=evidence_data.evidence_type,
|
||||
title=evidence_data.title,
|
||||
description=evidence_data.description,
|
||||
artifact_url=evidence_data.artifact_url,
|
||||
valid_from=evidence_data.valid_from,
|
||||
valid_until=evidence_data.valid_until,
|
||||
source=source,
|
||||
ci_job_id=evidence_data.ci_job_id,
|
||||
)
|
||||
|
||||
# Set anti-fake-evidence fields
|
||||
evidence.confidence_level = confidence
|
||||
evidence.truth_status = truth
|
||||
# Generated evidence should not be used as evidence by default
|
||||
if truth == EvidenceTruthStatusEnum.GENERATED:
|
||||
evidence.may_be_used_as_evidence = False
|
||||
|
||||
# Four-Eyes: check if the linked control's domain requires it
|
||||
control_domain = control.domain.value if control.domain else ""
|
||||
if _requires_four_eyes(control_domain):
|
||||
evidence.requires_four_eyes = True
|
||||
evidence.approval_status = "pending_first"
|
||||
|
||||
db.commit()
|
||||
|
||||
# Audit trail
|
||||
log_audit_trail(
|
||||
db, "evidence", evidence.id, evidence.title, "create",
|
||||
performed_by=evidence_data.source or "api",
|
||||
change_summary=f"Evidence created with confidence={confidence.value}, truth={truth.value}",
|
||||
)
|
||||
db.commit()
|
||||
|
||||
return _build_evidence_response(evidence)
|
||||
|
||||
|
||||
@router.delete("/evidence/{evidence_id}")
|
||||
@@ -107,9 +274,271 @@ async def upload_evidence(
|
||||
service: EvidenceService = Depends(get_evidence_service),
|
||||
) -> EvidenceResponse:
|
||||
"""Upload evidence file."""
|
||||
with translate_domain_errors():
|
||||
return await service.upload_evidence(
|
||||
control_id, evidence_type, title, file, description
|
||||
# Get control UUID
|
||||
ctrl_repo = ControlRepository(db)
|
||||
control = ctrl_repo.get_by_control_id(control_id)
|
||||
if not control:
|
||||
raise HTTPException(status_code=404, detail=f"Control {control_id} not found")
|
||||
|
||||
# Create upload directory
|
||||
upload_dir = f"/tmp/compliance_evidence/{control_id}"
|
||||
os.makedirs(upload_dir, exist_ok=True)
|
||||
|
||||
# Save file
|
||||
file_path = os.path.join(upload_dir, file.filename)
|
||||
content = await file.read()
|
||||
|
||||
with open(file_path, "wb") as f:
|
||||
f.write(content)
|
||||
|
||||
# Calculate hash
|
||||
file_hash = hashlib.sha256(content).hexdigest()
|
||||
|
||||
# Create evidence record
|
||||
repo = EvidenceRepository(db)
|
||||
evidence = repo.create(
|
||||
control_id=control.id,
|
||||
evidence_type=evidence_type,
|
||||
title=title,
|
||||
description=description,
|
||||
artifact_path=file_path,
|
||||
artifact_hash=file_hash,
|
||||
file_size_bytes=len(content),
|
||||
mime_type=file.content_type,
|
||||
source="upload",
|
||||
)
|
||||
|
||||
# Upload evidence → E1 + uploaded
|
||||
evidence.confidence_level = EvidenceConfidenceEnum.E1
|
||||
evidence.truth_status = EvidenceTruthStatusEnum.UPLOADED
|
||||
|
||||
# Four-Eyes: check if the linked control's domain requires it
|
||||
control_domain = control.domain.value if control.domain else ""
|
||||
if _requires_four_eyes(control_domain):
|
||||
evidence.requires_four_eyes = True
|
||||
evidence.approval_status = "pending_first"
|
||||
|
||||
db.commit()
|
||||
|
||||
return _build_evidence_response(evidence)
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# CI/CD Evidence Collection — helpers
|
||||
# ============================================================================
|
||||
|
||||
# Map CI source names to the corresponding control IDs
|
||||
SOURCE_CONTROL_MAP = {
|
||||
"sast": "SDLC-001",
|
||||
"dependency_scan": "SDLC-002",
|
||||
"secret_scan": "SDLC-003",
|
||||
"code_review": "SDLC-004",
|
||||
"sbom": "SDLC-005",
|
||||
"container_scan": "SDLC-006",
|
||||
"test_results": "AUD-001",
|
||||
}
|
||||
|
||||
|
||||
def _parse_ci_evidence(data: dict) -> dict:
|
||||
"""
|
||||
Parse and validate incoming CI evidence data.
|
||||
|
||||
Returns a dict with:
|
||||
- report_json: str (serialised JSON)
|
||||
- report_hash: str (SHA-256 hex digest)
|
||||
- evidence_status: str ("valid" or "failed")
|
||||
- findings_count: int
|
||||
- critical_findings: int
|
||||
"""
|
||||
report_json = json.dumps(data) if data else "{}"
|
||||
report_hash = hashlib.sha256(report_json.encode()).hexdigest()
|
||||
|
||||
findings_count = 0
|
||||
critical_findings = 0
|
||||
|
||||
if data and isinstance(data, dict):
|
||||
# Semgrep format
|
||||
if "results" in data:
|
||||
findings_count = len(data.get("results", []))
|
||||
critical_findings = len([
|
||||
r for r in data.get("results", [])
|
||||
if r.get("extra", {}).get("severity", "").upper() in ["CRITICAL", "HIGH"]
|
||||
])
|
||||
|
||||
# Trivy format
|
||||
elif "Results" in data:
|
||||
for result in data.get("Results", []):
|
||||
vulns = result.get("Vulnerabilities", [])
|
||||
findings_count += len(vulns)
|
||||
critical_findings += len([
|
||||
v for v in vulns
|
||||
if v.get("Severity", "").upper() in ["CRITICAL", "HIGH"]
|
||||
])
|
||||
|
||||
# Generic findings array
|
||||
elif "findings" in data:
|
||||
findings_count = len(data.get("findings", []))
|
||||
|
||||
# SBOM format - just count components
|
||||
elif "components" in data:
|
||||
findings_count = len(data.get("components", []))
|
||||
|
||||
evidence_status = "failed" if critical_findings > 0 else "valid"
|
||||
|
||||
return {
|
||||
"report_json": report_json,
|
||||
"report_hash": report_hash,
|
||||
"evidence_status": evidence_status,
|
||||
"findings_count": findings_count,
|
||||
"critical_findings": critical_findings,
|
||||
}
|
||||
|
||||
|
||||
def _store_evidence(
|
||||
db: Session,
|
||||
*,
|
||||
control_db_id: str,
|
||||
source: str,
|
||||
parsed: dict,
|
||||
ci_job_id: str,
|
||||
ci_job_url: str,
|
||||
report_data: dict,
|
||||
) -> EvidenceDB:
|
||||
"""
|
||||
Persist a CI evidence item to the database and write the report file.
|
||||
|
||||
Returns the created EvidenceDB instance (already committed).
|
||||
"""
|
||||
findings_count = parsed["findings_count"]
|
||||
critical_findings = parsed["critical_findings"]
|
||||
|
||||
# Build title and description
|
||||
title = f"{source.upper()} Report - {datetime.now().strftime('%Y-%m-%d %H:%M')}"
|
||||
description = "Automatically collected from CI/CD pipeline"
|
||||
if findings_count > 0:
|
||||
description += f"\n- Total findings: {findings_count}"
|
||||
if critical_findings > 0:
|
||||
description += f"\n- Critical/High findings: {critical_findings}"
|
||||
if ci_job_id:
|
||||
description += f"\n- CI Job ID: {ci_job_id}"
|
||||
if ci_job_url:
|
||||
description += f"\n- CI Job URL: {ci_job_url}"
|
||||
|
||||
# Store report file
|
||||
upload_dir = f"/tmp/compliance_evidence/ci/{source}"
|
||||
os.makedirs(upload_dir, exist_ok=True)
|
||||
file_name = f"{source}_{datetime.now().strftime('%Y%m%d_%H%M%S')}_{parsed['report_hash'][:8]}.json"
|
||||
file_path = os.path.join(upload_dir, file_name)
|
||||
|
||||
with open(file_path, "w") as f:
|
||||
json.dump(report_data or {}, f, indent=2)
|
||||
|
||||
# Create evidence record with anti-fake-evidence classification
|
||||
evidence = EvidenceDB(
|
||||
id=str(uuid_module.uuid4()),
|
||||
control_id=control_db_id,
|
||||
evidence_type=f"ci_{source}",
|
||||
title=title,
|
||||
description=description,
|
||||
artifact_path=file_path,
|
||||
artifact_hash=parsed["report_hash"],
|
||||
file_size_bytes=len(parsed["report_json"]),
|
||||
mime_type="application/json",
|
||||
source="ci_pipeline",
|
||||
ci_job_id=ci_job_id,
|
||||
valid_from=datetime.utcnow(),
|
||||
valid_until=datetime.utcnow() + timedelta(days=90),
|
||||
status=EvidenceStatusEnum(parsed["evidence_status"]),
|
||||
# CI pipeline evidence → E3 observed (system-observed, hash-verified)
|
||||
confidence_level=EvidenceConfidenceEnum.E3,
|
||||
truth_status=EvidenceTruthStatusEnum.OBSERVED,
|
||||
may_be_used_as_evidence=True,
|
||||
)
|
||||
db.add(evidence)
|
||||
db.commit()
|
||||
db.refresh(evidence)
|
||||
|
||||
return evidence
|
||||
|
||||
|
||||
def _extract_findings_detail(report_data: dict) -> dict:
|
||||
"""
|
||||
Extract severity-bucketed finding counts from report data.
|
||||
|
||||
Returns dict with keys: critical, high, medium, low.
|
||||
"""
|
||||
findings_detail = {
|
||||
"critical": 0,
|
||||
"high": 0,
|
||||
"medium": 0,
|
||||
"low": 0,
|
||||
}
|
||||
|
||||
if not report_data:
|
||||
return findings_detail
|
||||
|
||||
# Semgrep format
|
||||
if "results" in report_data:
|
||||
for r in report_data.get("results", []):
|
||||
severity = r.get("extra", {}).get("severity", "").upper()
|
||||
if severity == "CRITICAL":
|
||||
findings_detail["critical"] += 1
|
||||
elif severity == "HIGH":
|
||||
findings_detail["high"] += 1
|
||||
elif severity == "MEDIUM":
|
||||
findings_detail["medium"] += 1
|
||||
elif severity in ["LOW", "INFO"]:
|
||||
findings_detail["low"] += 1
|
||||
|
||||
# Trivy format
|
||||
elif "Results" in report_data:
|
||||
for result in report_data.get("Results", []):
|
||||
for v in result.get("Vulnerabilities", []):
|
||||
severity = v.get("Severity", "").upper()
|
||||
if severity == "CRITICAL":
|
||||
findings_detail["critical"] += 1
|
||||
elif severity == "HIGH":
|
||||
findings_detail["high"] += 1
|
||||
elif severity == "MEDIUM":
|
||||
findings_detail["medium"] += 1
|
||||
elif severity == "LOW":
|
||||
findings_detail["low"] += 1
|
||||
|
||||
# Generic findings with severity
|
||||
elif "findings" in report_data:
|
||||
for f in report_data.get("findings", []):
|
||||
severity = f.get("severity", "").upper()
|
||||
if severity == "CRITICAL":
|
||||
findings_detail["critical"] += 1
|
||||
elif severity == "HIGH":
|
||||
findings_detail["high"] += 1
|
||||
elif severity == "MEDIUM":
|
||||
findings_detail["medium"] += 1
|
||||
else:
|
||||
findings_detail["low"] += 1
|
||||
|
||||
return findings_detail
|
||||
|
||||
|
||||
def _update_risks(db: Session, *, source: str, control_id: str, ci_job_id: str, report_data: dict):
|
||||
"""
|
||||
Update risk status based on new evidence.
|
||||
|
||||
Uses AutoRiskUpdater to update Control status and linked Risks based on
|
||||
severity-bucketed findings. Returns the update result or None on error.
|
||||
"""
|
||||
findings_detail = _extract_findings_detail(report_data)
|
||||
|
||||
try:
|
||||
auto_updater = AutoRiskUpdater(db)
|
||||
risk_update_result = auto_updater.process_evidence_collect_request(
|
||||
tool=source,
|
||||
control_id=control_id,
|
||||
evidence_type=f"ci_{source}",
|
||||
timestamp=datetime.utcnow().isoformat(),
|
||||
commit_sha=report_data.get("commit_sha", "unknown") if report_data else "unknown",
|
||||
ci_job_id=ci_job_id,
|
||||
findings=findings_detail,
|
||||
)
|
||||
|
||||
|
||||
@@ -227,14 +656,229 @@ async def get_ci_evidence_status(
|
||||
# Legacy re-exports for tests that import helpers directly.
|
||||
# ----------------------------------------------------------------------------
|
||||
|
||||
__all__ = [
|
||||
"router",
|
||||
"SOURCE_CONTROL_MAP",
|
||||
"EvidenceRepository",
|
||||
"ControlRepository",
|
||||
"AutoRiskUpdater",
|
||||
"_parse_ci_evidence",
|
||||
"_extract_findings_detail",
|
||||
"_store_evidence",
|
||||
"_update_risks",
|
||||
]
|
||||
if control_id:
|
||||
ctrl_repo = ControlRepository(db)
|
||||
control = ctrl_repo.get_by_control_id(control_id)
|
||||
if control:
|
||||
query = query.filter(EvidenceDB.control_id == control.id)
|
||||
|
||||
evidence_list = query.order_by(EvidenceDB.collected_at.desc()).limit(100).all()
|
||||
|
||||
# Group by control and calculate stats
|
||||
control_stats = defaultdict(lambda: {
|
||||
"total": 0,
|
||||
"valid": 0,
|
||||
"failed": 0,
|
||||
"last_collected": None,
|
||||
"evidence": [],
|
||||
})
|
||||
|
||||
for e in evidence_list:
|
||||
# Get control_id string
|
||||
control = db.query(ControlDB).filter(ControlDB.id == e.control_id).first()
|
||||
ctrl_id = control.control_id if control else "unknown"
|
||||
|
||||
stats = control_stats[ctrl_id]
|
||||
stats["total"] += 1
|
||||
if e.status:
|
||||
if e.status.value == "valid":
|
||||
stats["valid"] += 1
|
||||
elif e.status.value == "failed":
|
||||
stats["failed"] += 1
|
||||
if not stats["last_collected"] or e.collected_at > stats["last_collected"]:
|
||||
stats["last_collected"] = e.collected_at
|
||||
|
||||
# Add evidence summary
|
||||
stats["evidence"].append({
|
||||
"id": e.id,
|
||||
"type": e.evidence_type,
|
||||
"status": e.status.value if e.status else None,
|
||||
"collected_at": e.collected_at.isoformat() if e.collected_at else None,
|
||||
"ci_job_id": e.ci_job_id,
|
||||
})
|
||||
|
||||
# Convert to list and sort
|
||||
result = []
|
||||
for ctrl_id, stats in control_stats.items():
|
||||
result.append({
|
||||
"control_id": ctrl_id,
|
||||
"total_evidence": stats["total"],
|
||||
"valid_count": stats["valid"],
|
||||
"failed_count": stats["failed"],
|
||||
"last_collected": stats["last_collected"].isoformat() if stats["last_collected"] else None,
|
||||
"recent_evidence": stats["evidence"][:5],
|
||||
})
|
||||
|
||||
result.sort(key=lambda x: x["last_collected"] or "", reverse=True)
|
||||
|
||||
return {
|
||||
"period_days": days,
|
||||
"total_evidence": len(evidence_list),
|
||||
"controls": result,
|
||||
}
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# Evidence Review (Anti-Fake-Evidence)
|
||||
# ============================================================================
|
||||
|
||||
from pydantic import BaseModel as _BaseModel
|
||||
|
||||
class _EvidenceReviewRequest(_BaseModel):
|
||||
confidence_level: Optional[str] = None
|
||||
truth_status: Optional[str] = None
|
||||
reviewed_by: str
|
||||
|
||||
|
||||
@router.patch("/evidence/{evidence_id}/review", response_model=EvidenceResponse)
|
||||
async def review_evidence(
|
||||
evidence_id: str,
|
||||
review: _EvidenceReviewRequest,
|
||||
db: Session = Depends(get_db),
|
||||
):
|
||||
"""
|
||||
Review evidence: upgrade confidence level and/or change truth status.
|
||||
|
||||
For Four-Eyes evidence, the first reviewer sets first_reviewer and
|
||||
approval_status='first_approved'. A second (different) reviewer then
|
||||
sets second_reviewer and approval_status='approved'.
|
||||
"""
|
||||
evidence = db.query(EvidenceDB).filter(EvidenceDB.id == evidence_id).first()
|
||||
if not evidence:
|
||||
raise HTTPException(status_code=404, detail=f"Evidence {evidence_id} not found")
|
||||
|
||||
old_confidence = evidence.confidence_level.value if evidence.confidence_level else None
|
||||
old_truth = evidence.truth_status.value if evidence.truth_status else None
|
||||
|
||||
if review.confidence_level:
|
||||
try:
|
||||
evidence.confidence_level = EvidenceConfidenceEnum(review.confidence_level)
|
||||
except ValueError:
|
||||
raise HTTPException(status_code=400, detail=f"Invalid confidence_level: {review.confidence_level}")
|
||||
|
||||
if review.truth_status:
|
||||
try:
|
||||
evidence.truth_status = EvidenceTruthStatusEnum(review.truth_status)
|
||||
except ValueError:
|
||||
raise HTTPException(status_code=400, detail=f"Invalid truth_status: {review.truth_status}")
|
||||
|
||||
# Four-Eyes branching
|
||||
if evidence.requires_four_eyes:
|
||||
status = evidence.approval_status or "none"
|
||||
if status in ("none", "pending_first"):
|
||||
evidence.first_reviewer = review.reviewed_by
|
||||
evidence.first_reviewed_at = datetime.utcnow()
|
||||
evidence.approval_status = "first_approved"
|
||||
elif status == "first_approved":
|
||||
if review.reviewed_by == evidence.first_reviewer:
|
||||
raise HTTPException(
|
||||
status_code=400,
|
||||
detail="Four-Eyes: second reviewer must be different from first reviewer",
|
||||
)
|
||||
evidence.second_reviewer = review.reviewed_by
|
||||
evidence.second_reviewed_at = datetime.utcnow()
|
||||
evidence.approval_status = "approved"
|
||||
elif status == "approved":
|
||||
raise HTTPException(status_code=400, detail="Evidence already approved")
|
||||
elif status == "rejected":
|
||||
raise HTTPException(status_code=400, detail="Evidence was rejected — create new evidence instead")
|
||||
|
||||
evidence.reviewed_by = review.reviewed_by
|
||||
evidence.reviewed_at = datetime.utcnow()
|
||||
db.commit()
|
||||
|
||||
# Audit trail
|
||||
new_confidence = evidence.confidence_level.value if evidence.confidence_level else None
|
||||
if old_confidence != new_confidence:
|
||||
log_audit_trail(
|
||||
db, "evidence", evidence_id, evidence.title, "review",
|
||||
performed_by=review.reviewed_by,
|
||||
field_changed="confidence_level",
|
||||
old_value=old_confidence,
|
||||
new_value=new_confidence,
|
||||
)
|
||||
new_truth = evidence.truth_status.value if evidence.truth_status else None
|
||||
if old_truth != new_truth:
|
||||
log_audit_trail(
|
||||
db, "evidence", evidence_id, evidence.title, "review",
|
||||
performed_by=review.reviewed_by,
|
||||
field_changed="truth_status",
|
||||
old_value=old_truth,
|
||||
new_value=new_truth,
|
||||
)
|
||||
db.commit()
|
||||
|
||||
db.refresh(evidence)
|
||||
return _build_evidence_response(evidence)
|
||||
|
||||
|
||||
@router.patch("/evidence/{evidence_id}/reject", response_model=EvidenceResponse)
|
||||
async def reject_evidence(
|
||||
evidence_id: str,
|
||||
body: EvidenceRejectRequest,
|
||||
db: Session = Depends(get_db),
|
||||
):
|
||||
"""Reject evidence (sets approval_status='rejected')."""
|
||||
evidence = db.query(EvidenceDB).filter(EvidenceDB.id == evidence_id).first()
|
||||
if not evidence:
|
||||
raise HTTPException(status_code=404, detail=f"Evidence {evidence_id} not found")
|
||||
|
||||
evidence.approval_status = "rejected"
|
||||
evidence.reviewed_by = body.reviewed_by
|
||||
evidence.reviewed_at = datetime.utcnow()
|
||||
db.commit()
|
||||
|
||||
log_audit_trail(
|
||||
db, "evidence", evidence_id, evidence.title, "reject",
|
||||
performed_by=body.reviewed_by,
|
||||
change_summary=body.rejection_reason or "Evidence rejected",
|
||||
)
|
||||
db.commit()
|
||||
|
||||
db.refresh(evidence)
|
||||
return _build_evidence_response(evidence)
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# Audit Trail Query
|
||||
# ============================================================================
|
||||
|
||||
@router.get("/audit-trail")
|
||||
async def get_audit_trail(
|
||||
entity_type: Optional[str] = Query(None),
|
||||
entity_id: Optional[str] = Query(None),
|
||||
action: Optional[str] = Query(None),
|
||||
limit: int = Query(50, ge=1, le=200),
|
||||
db: Session = Depends(get_db),
|
||||
):
|
||||
"""Query audit trail entries for an entity."""
|
||||
query = db.query(AuditTrailDB)
|
||||
if entity_type:
|
||||
query = query.filter(AuditTrailDB.entity_type == entity_type)
|
||||
if entity_id:
|
||||
query = query.filter(AuditTrailDB.entity_id == entity_id)
|
||||
if action:
|
||||
query = query.filter(AuditTrailDB.action == action)
|
||||
|
||||
records = query.order_by(AuditTrailDB.performed_at.desc()).limit(limit).all()
|
||||
|
||||
return {
|
||||
"entries": [
|
||||
{
|
||||
"id": r.id,
|
||||
"entity_type": r.entity_type,
|
||||
"entity_id": r.entity_id,
|
||||
"entity_name": r.entity_name,
|
||||
"action": r.action,
|
||||
"field_changed": r.field_changed,
|
||||
"old_value": r.old_value,
|
||||
"new_value": r.new_value,
|
||||
"change_summary": r.change_summary,
|
||||
"performed_by": r.performed_by,
|
||||
"performed_at": r.performed_at.isoformat() if r.performed_at else None,
|
||||
"checksum": r.checksum,
|
||||
}
|
||||
for r in records
|
||||
],
|
||||
"total": len(records),
|
||||
}
|
||||
|
||||
@@ -39,7 +39,6 @@ router = APIRouter(tags=["extraction"])
|
||||
|
||||
ALL_COLLECTIONS = [
|
||||
"bp_compliance_ce", # BSI-TR documents — primary Prüfaspekte source
|
||||
"bp_compliance_recht", # Legal texts (GDPR, AI Act, ...)
|
||||
"bp_compliance_gesetze", # German laws
|
||||
"bp_compliance_datenschutz", # Data protection documents
|
||||
"bp_dsfa_corpus", # DSFA corpus
|
||||
|
||||
@@ -80,9 +80,13 @@ def _handle(func, *args, **kwargs): # type: ignore[no-untyped-def]
|
||||
raise HTTPException(status_code=400, detail=str(exc))
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# ISMS Scope (ISO 27001 4.3)
|
||||
# ============================================================================
|
||||
# Shared audit trail utilities — canonical implementation in audit_trail_utils.py
|
||||
from .audit_trail_utils import log_audit_trail, create_signature # noqa: E402
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# ISMS SCOPE (ISO 27001 4.3)
|
||||
# =============================================================================
|
||||
|
||||
@router.get("/scope", response_model=ISMSScopeResponse)
|
||||
async def get_isms_scope(db: Session = Depends(get_db)):
|
||||
|
||||
@@ -50,6 +50,57 @@ VALID_DOCUMENT_TYPES = {
|
||||
"cookie_banner",
|
||||
"agb",
|
||||
"clause",
|
||||
# Security document templates (Migration 051)
|
||||
"it_security_concept",
|
||||
"data_protection_concept",
|
||||
"backup_recovery_concept",
|
||||
"logging_concept",
|
||||
"incident_response_plan",
|
||||
"access_control_concept",
|
||||
"risk_management_concept",
|
||||
# Policy templates — IT Security (Migration 054)
|
||||
"information_security_policy",
|
||||
"access_control_policy",
|
||||
"password_policy",
|
||||
"encryption_policy",
|
||||
"logging_policy",
|
||||
"backup_policy",
|
||||
"incident_response_policy",
|
||||
"change_management_policy",
|
||||
"patch_management_policy",
|
||||
"asset_management_policy",
|
||||
"cloud_security_policy",
|
||||
"devsecops_policy",
|
||||
"secrets_management_policy",
|
||||
"vulnerability_management_policy",
|
||||
# Policy templates — Data (Migration 054)
|
||||
"data_protection_policy",
|
||||
"data_classification_policy",
|
||||
"data_retention_policy",
|
||||
"data_transfer_policy",
|
||||
"privacy_incident_policy",
|
||||
# Policy templates — Personnel (Migration 054)
|
||||
"employee_security_policy",
|
||||
"security_awareness_policy",
|
||||
"remote_work_policy",
|
||||
"offboarding_policy",
|
||||
# Policy templates — Vendor/Supply Chain (Migration 054)
|
||||
"vendor_risk_management_policy",
|
||||
"third_party_security_policy",
|
||||
"supplier_security_policy",
|
||||
# Policy templates — BCM (Migration 054)
|
||||
"business_continuity_policy",
|
||||
"disaster_recovery_policy",
|
||||
"crisis_management_policy",
|
||||
# CRA Cybersecurity (Migration 056)
|
||||
"cybersecurity_policy",
|
||||
# DSFA template
|
||||
"dsfa",
|
||||
# Module document templates (Migration 073)
|
||||
"vvt_register",
|
||||
"tom_documentation",
|
||||
"loeschkonzept",
|
||||
"pflichtenregister",
|
||||
}
|
||||
VALID_STATUSES = {"published", "draft", "archived"}
|
||||
|
||||
|
||||
@@ -0,0 +1,162 @@
|
||||
"""
|
||||
FastAPI routes for LLM Generation Audit Trail.
|
||||
|
||||
Endpoints:
|
||||
- POST /llm-audit: Record an LLM generation event
|
||||
- GET /llm-audit: List audit records with filters
|
||||
"""
|
||||
|
||||
import logging
|
||||
import uuid as uuid_module
|
||||
from datetime import datetime
|
||||
from typing import Optional
|
||||
|
||||
from fastapi import APIRouter, Depends, Query
|
||||
from pydantic import BaseModel
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from classroom_engine.database import get_db
|
||||
from ..db.models import LLMGenerationAuditDB
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
router = APIRouter(tags=["compliance-llm-audit"])
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# Schemas
|
||||
# ============================================================================
|
||||
|
||||
class LLMAuditCreate(BaseModel):
|
||||
entity_type: str
|
||||
entity_id: Optional[str] = None
|
||||
generation_mode: str
|
||||
truth_status: str = "generated"
|
||||
may_be_used_as_evidence: bool = False
|
||||
llm_model: Optional[str] = None
|
||||
llm_provider: Optional[str] = None
|
||||
prompt_hash: Optional[str] = None
|
||||
input_summary: Optional[str] = None
|
||||
output_summary: Optional[str] = None
|
||||
metadata: Optional[dict] = None
|
||||
tenant_id: Optional[str] = None
|
||||
|
||||
|
||||
class LLMAuditResponse(BaseModel):
|
||||
id: str
|
||||
tenant_id: Optional[str] = None
|
||||
entity_type: str
|
||||
entity_id: Optional[str] = None
|
||||
generation_mode: str
|
||||
truth_status: str
|
||||
may_be_used_as_evidence: bool
|
||||
llm_model: Optional[str] = None
|
||||
llm_provider: Optional[str] = None
|
||||
prompt_hash: Optional[str] = None
|
||||
input_summary: Optional[str] = None
|
||||
output_summary: Optional[str] = None
|
||||
metadata: Optional[dict] = None
|
||||
created_at: datetime
|
||||
|
||||
class Config:
|
||||
from_attributes = True
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# Routes
|
||||
# ============================================================================
|
||||
|
||||
@router.post("/llm-audit", response_model=LLMAuditResponse)
|
||||
async def create_llm_audit(
|
||||
data: LLMAuditCreate,
|
||||
db: Session = Depends(get_db),
|
||||
):
|
||||
"""Record an LLM generation event for audit trail."""
|
||||
from ..db.models import EvidenceTruthStatusEnum
|
||||
|
||||
# Validate truth_status
|
||||
try:
|
||||
truth_enum = EvidenceTruthStatusEnum(data.truth_status)
|
||||
except ValueError:
|
||||
truth_enum = EvidenceTruthStatusEnum.GENERATED
|
||||
|
||||
record = LLMGenerationAuditDB(
|
||||
id=str(uuid_module.uuid4()),
|
||||
tenant_id=data.tenant_id,
|
||||
entity_type=data.entity_type,
|
||||
entity_id=data.entity_id,
|
||||
generation_mode=data.generation_mode,
|
||||
truth_status=truth_enum,
|
||||
may_be_used_as_evidence=data.may_be_used_as_evidence,
|
||||
llm_model=data.llm_model,
|
||||
llm_provider=data.llm_provider,
|
||||
prompt_hash=data.prompt_hash,
|
||||
input_summary=data.input_summary[:500] if data.input_summary else None,
|
||||
output_summary=data.output_summary[:500] if data.output_summary else None,
|
||||
extra_metadata=data.metadata or {},
|
||||
)
|
||||
db.add(record)
|
||||
db.commit()
|
||||
db.refresh(record)
|
||||
|
||||
return LLMAuditResponse(
|
||||
id=record.id,
|
||||
tenant_id=record.tenant_id,
|
||||
entity_type=record.entity_type,
|
||||
entity_id=record.entity_id,
|
||||
generation_mode=record.generation_mode,
|
||||
truth_status=record.truth_status.value if record.truth_status else "generated",
|
||||
may_be_used_as_evidence=record.may_be_used_as_evidence,
|
||||
llm_model=record.llm_model,
|
||||
llm_provider=record.llm_provider,
|
||||
prompt_hash=record.prompt_hash,
|
||||
input_summary=record.input_summary,
|
||||
output_summary=record.output_summary,
|
||||
metadata=record.extra_metadata,
|
||||
created_at=record.created_at,
|
||||
)
|
||||
|
||||
|
||||
@router.get("/llm-audit")
|
||||
async def list_llm_audit(
|
||||
entity_type: Optional[str] = Query(None),
|
||||
entity_id: Optional[str] = Query(None),
|
||||
page: int = Query(1, ge=1),
|
||||
limit: int = Query(50, ge=1, le=200),
|
||||
db: Session = Depends(get_db),
|
||||
):
|
||||
"""List LLM generation audit records with optional filters."""
|
||||
query = db.query(LLMGenerationAuditDB)
|
||||
|
||||
if entity_type:
|
||||
query = query.filter(LLMGenerationAuditDB.entity_type == entity_type)
|
||||
if entity_id:
|
||||
query = query.filter(LLMGenerationAuditDB.entity_id == entity_id)
|
||||
|
||||
total = query.count()
|
||||
offset = (page - 1) * limit
|
||||
records = query.order_by(LLMGenerationAuditDB.created_at.desc()).offset(offset).limit(limit).all()
|
||||
|
||||
return {
|
||||
"records": [
|
||||
LLMAuditResponse(
|
||||
id=r.id,
|
||||
tenant_id=r.tenant_id,
|
||||
entity_type=r.entity_type,
|
||||
entity_id=r.entity_id,
|
||||
generation_mode=r.generation_mode,
|
||||
truth_status=r.truth_status.value if r.truth_status else "generated",
|
||||
may_be_used_as_evidence=r.may_be_used_as_evidence,
|
||||
llm_model=r.llm_model,
|
||||
llm_provider=r.llm_provider,
|
||||
prompt_hash=r.prompt_hash,
|
||||
input_summary=r.input_summary,
|
||||
output_summary=r.output_summary,
|
||||
metadata=r.extra_metadata,
|
||||
created_at=r.created_at,
|
||||
)
|
||||
for r in records
|
||||
],
|
||||
"total": total,
|
||||
"page": page,
|
||||
"limit": limit,
|
||||
}
|
||||
@@ -56,6 +56,7 @@ class LoeschfristCreate(BaseModel):
|
||||
responsible_person: Optional[str] = None
|
||||
release_process: Optional[str] = None
|
||||
linked_vvt_activity_ids: Optional[List[Any]] = None
|
||||
linked_vendor_ids: Optional[List[Any]] = None
|
||||
status: str = "DRAFT"
|
||||
last_review_date: Optional[datetime] = None
|
||||
next_review_date: Optional[datetime] = None
|
||||
@@ -86,6 +87,7 @@ class LoeschfristUpdate(BaseModel):
|
||||
responsible_person: Optional[str] = None
|
||||
release_process: Optional[str] = None
|
||||
linked_vvt_activity_ids: Optional[List[Any]] = None
|
||||
linked_vendor_ids: Optional[List[Any]] = None
|
||||
status: Optional[str] = None
|
||||
last_review_date: Optional[datetime] = None
|
||||
next_review_date: Optional[datetime] = None
|
||||
@@ -100,7 +102,7 @@ class StatusUpdate(BaseModel):
|
||||
# JSONB fields that need CAST
|
||||
JSONB_FIELDS = {
|
||||
"affected_groups", "data_categories", "legal_holds",
|
||||
"storage_locations", "linked_vvt_activity_ids", "tags"
|
||||
"storage_locations", "linked_vvt_activity_ids", "linked_vendor_ids", "tags"
|
||||
}
|
||||
|
||||
|
||||
|
||||
@@ -42,6 +42,7 @@ class ObligationCreate(BaseModel):
|
||||
priority: str = "medium"
|
||||
responsible: Optional[str] = None
|
||||
linked_systems: Optional[List[str]] = None
|
||||
linked_vendor_ids: Optional[List[str]] = None
|
||||
assessment_id: Optional[str] = None
|
||||
rule_code: Optional[str] = None
|
||||
notes: Optional[str] = None
|
||||
@@ -57,6 +58,7 @@ class ObligationUpdate(BaseModel):
|
||||
priority: Optional[str] = None
|
||||
responsible: Optional[str] = None
|
||||
linked_systems: Optional[List[str]] = None
|
||||
linked_vendor_ids: Optional[List[str]] = None
|
||||
notes: Optional[str] = None
|
||||
|
||||
|
||||
@@ -173,14 +175,15 @@ async def create_obligation(
|
||||
|
||||
import json
|
||||
linked_systems = json.dumps(payload.linked_systems or [])
|
||||
linked_vendor_ids = json.dumps(payload.linked_vendor_ids or [])
|
||||
|
||||
row = db.execute(text("""
|
||||
INSERT INTO compliance_obligations
|
||||
(tenant_id, title, description, source, source_article, deadline,
|
||||
status, priority, responsible, linked_systems, assessment_id, rule_code, notes)
|
||||
status, priority, responsible, linked_systems, linked_vendor_ids, assessment_id, rule_code, notes)
|
||||
VALUES
|
||||
(:tenant_id, :title, :description, :source, :source_article, :deadline,
|
||||
:status, :priority, :responsible, CAST(:linked_systems AS jsonb), :assessment_id, :rule_code, :notes)
|
||||
:status, :priority, :responsible, CAST(:linked_systems AS jsonb), CAST(:linked_vendor_ids AS jsonb), :assessment_id, :rule_code, :notes)
|
||||
RETURNING *
|
||||
"""), {
|
||||
"tenant_id": tenant_id,
|
||||
@@ -193,6 +196,7 @@ async def create_obligation(
|
||||
"priority": payload.priority,
|
||||
"responsible": payload.responsible,
|
||||
"linked_systems": linked_systems,
|
||||
"linked_vendor_ids": linked_vendor_ids,
|
||||
"assessment_id": payload.assessment_id,
|
||||
"rule_code": payload.rule_code,
|
||||
"notes": payload.notes,
|
||||
@@ -235,6 +239,9 @@ async def update_obligation(
|
||||
if field == "linked_systems":
|
||||
updates["linked_systems"] = json.dumps(value or [])
|
||||
set_clauses.append("linked_systems = CAST(:linked_systems AS jsonb)")
|
||||
elif field == "linked_vendor_ids":
|
||||
updates["linked_vendor_ids"] = json.dumps(value or [])
|
||||
set_clauses.append("linked_vendor_ids = CAST(:linked_vendor_ids AS jsonb)")
|
||||
else:
|
||||
updates[field] = value
|
||||
set_clauses.append(f"{field} = :{field}")
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -25,6 +25,7 @@ from sqlalchemy.orm import Session
|
||||
|
||||
from classroom_engine.database import get_db
|
||||
|
||||
from .audit_trail_utils import log_audit_trail
|
||||
from ..db import (
|
||||
ControlDomainEnum,
|
||||
ControlRepository,
|
||||
@@ -312,8 +313,39 @@ async def get_control(
|
||||
svc: ControlExportService = Depends(get_ctrl_export_service),
|
||||
) -> ControlResponse:
|
||||
"""Get a specific control by control_id."""
|
||||
with translate_domain_errors():
|
||||
return svc.get_control(control_id)
|
||||
repo = ControlRepository(db)
|
||||
control = repo.get_by_control_id(control_id)
|
||||
if not control:
|
||||
raise HTTPException(status_code=404, detail=f"Control {control_id} not found")
|
||||
|
||||
evidence_repo = EvidenceRepository(db)
|
||||
evidence = evidence_repo.get_by_control(control.id)
|
||||
|
||||
return ControlResponse(
|
||||
id=control.id,
|
||||
control_id=control.control_id,
|
||||
domain=control.domain.value if control.domain else None,
|
||||
control_type=control.control_type.value if control.control_type else None,
|
||||
title=control.title,
|
||||
description=control.description,
|
||||
pass_criteria=control.pass_criteria,
|
||||
implementation_guidance=control.implementation_guidance,
|
||||
code_reference=control.code_reference,
|
||||
documentation_url=control.documentation_url,
|
||||
is_automated=control.is_automated,
|
||||
automation_tool=control.automation_tool,
|
||||
automation_config=control.automation_config,
|
||||
owner=control.owner,
|
||||
review_frequency_days=control.review_frequency_days,
|
||||
status=control.status.value if control.status else None,
|
||||
status_notes=control.status_notes,
|
||||
status_justification=control.status_justification,
|
||||
last_reviewed_at=control.last_reviewed_at,
|
||||
next_review_at=control.next_review_at,
|
||||
created_at=control.created_at,
|
||||
updated_at=control.updated_at,
|
||||
evidence_count=len(evidence),
|
||||
)
|
||||
|
||||
|
||||
@router.put(
|
||||
@@ -325,8 +357,83 @@ async def update_control(
|
||||
svc: ControlExportService = Depends(get_ctrl_export_service),
|
||||
) -> ControlResponse:
|
||||
"""Update a control."""
|
||||
with translate_domain_errors():
|
||||
return svc.update_control(control_id, update)
|
||||
repo = ControlRepository(db)
|
||||
control = repo.get_by_control_id(control_id)
|
||||
if not control:
|
||||
raise HTTPException(status_code=404, detail=f"Control {control_id} not found")
|
||||
|
||||
update_data = update.model_dump(exclude_unset=True)
|
||||
|
||||
# Convert status string to enum and validate transition
|
||||
if "status" in update_data:
|
||||
try:
|
||||
new_status_enum = ControlStatusEnum(update_data["status"])
|
||||
except ValueError:
|
||||
raise HTTPException(status_code=400, detail=f"Invalid status: {update_data['status']}")
|
||||
|
||||
# Validate status transition (Anti-Fake-Evidence)
|
||||
from ..services.control_status_machine import validate_transition
|
||||
current_status = control.status.value if control.status else "planned"
|
||||
evidence_list = db.query(EvidenceDB).filter(EvidenceDB.control_id == control.id).all()
|
||||
allowed, violations = validate_transition(
|
||||
current_status=current_status,
|
||||
new_status=update_data["status"],
|
||||
evidence_list=evidence_list,
|
||||
status_justification=update_data.get("status_justification") or update_data.get("status_notes"),
|
||||
)
|
||||
if not allowed:
|
||||
raise HTTPException(
|
||||
status_code=409,
|
||||
detail={
|
||||
"error": "Status transition not allowed",
|
||||
"current_status": current_status,
|
||||
"requested_status": update_data["status"],
|
||||
"violations": violations,
|
||||
}
|
||||
)
|
||||
|
||||
update_data["status"] = new_status_enum
|
||||
|
||||
updated = repo.update(control.id, **update_data)
|
||||
db.commit()
|
||||
|
||||
# Audit trail for status changes
|
||||
new_status = updated.status.value if updated.status else None
|
||||
if "status" in update.model_dump(exclude_unset=True) and current_status != new_status:
|
||||
log_audit_trail(
|
||||
db, "control", control.id, updated.control_id or updated.title,
|
||||
"status_change",
|
||||
performed_by=update.owner or "system",
|
||||
field_changed="status",
|
||||
old_value=current_status,
|
||||
new_value=new_status,
|
||||
)
|
||||
db.commit()
|
||||
|
||||
return ControlResponse(
|
||||
id=updated.id,
|
||||
control_id=updated.control_id,
|
||||
domain=updated.domain.value if updated.domain else None,
|
||||
control_type=updated.control_type.value if updated.control_type else None,
|
||||
title=updated.title,
|
||||
description=updated.description,
|
||||
pass_criteria=updated.pass_criteria,
|
||||
implementation_guidance=updated.implementation_guidance,
|
||||
code_reference=updated.code_reference,
|
||||
documentation_url=updated.documentation_url,
|
||||
is_automated=updated.is_automated,
|
||||
automation_tool=updated.automation_tool,
|
||||
automation_config=updated.automation_config,
|
||||
owner=updated.owner,
|
||||
review_frequency_days=updated.review_frequency_days,
|
||||
status=updated.status.value if updated.status else None,
|
||||
status_notes=updated.status_notes,
|
||||
status_justification=updated.status_justification,
|
||||
last_reviewed_at=updated.last_reviewed_at,
|
||||
next_review_at=updated.next_review_at,
|
||||
created_at=updated.created_at,
|
||||
updated_at=updated.updated_at,
|
||||
)
|
||||
|
||||
|
||||
@router.put(
|
||||
@@ -339,8 +446,43 @@ async def review_control(
|
||||
svc: ControlExportService = Depends(get_ctrl_export_service),
|
||||
) -> ControlResponse:
|
||||
"""Mark a control as reviewed with new status."""
|
||||
with translate_domain_errors():
|
||||
return svc.review_control(control_id, review)
|
||||
repo = ControlRepository(db)
|
||||
control = repo.get_by_control_id(control_id)
|
||||
if not control:
|
||||
raise HTTPException(status_code=404, detail=f"Control {control_id} not found")
|
||||
|
||||
try:
|
||||
status_enum = ControlStatusEnum(review.status)
|
||||
except ValueError:
|
||||
raise HTTPException(status_code=400, detail=f"Invalid status: {review.status}")
|
||||
|
||||
updated = repo.mark_reviewed(control.id, status_enum, review.status_notes)
|
||||
db.commit()
|
||||
|
||||
return ControlResponse(
|
||||
id=updated.id,
|
||||
control_id=updated.control_id,
|
||||
domain=updated.domain.value if updated.domain else None,
|
||||
control_type=updated.control_type.value if updated.control_type else None,
|
||||
title=updated.title,
|
||||
description=updated.description,
|
||||
pass_criteria=updated.pass_criteria,
|
||||
implementation_guidance=updated.implementation_guidance,
|
||||
code_reference=updated.code_reference,
|
||||
documentation_url=updated.documentation_url,
|
||||
is_automated=updated.is_automated,
|
||||
automation_tool=updated.automation_tool,
|
||||
automation_config=updated.automation_config,
|
||||
owner=updated.owner,
|
||||
review_frequency_days=updated.review_frequency_days,
|
||||
status=updated.status.value if updated.status else None,
|
||||
status_notes=updated.status_notes,
|
||||
status_justification=updated.status_justification,
|
||||
last_reviewed_at=updated.last_reviewed_at,
|
||||
next_review_at=updated.next_review_at,
|
||||
created_at=updated.created_at,
|
||||
updated_at=updated.updated_at,
|
||||
)
|
||||
|
||||
|
||||
@router.get(
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -22,7 +22,9 @@ import uuid
|
||||
from datetime import datetime, timezone
|
||||
from typing import Any
|
||||
|
||||
from fastapi import APIRouter, File, Form, HTTPException, UploadFile
|
||||
import httpx
|
||||
from fastapi import APIRouter, File, Form, UploadFile, HTTPException
|
||||
from pydantic import BaseModel
|
||||
from sqlalchemy import text
|
||||
|
||||
from database import SessionLocal # re-exported below for legacy test patches
|
||||
@@ -96,15 +98,13 @@ async def scan_dependencies(
|
||||
db = SessionLocal()
|
||||
try:
|
||||
db.execute(
|
||||
text(
|
||||
"INSERT INTO compliance_screenings "
|
||||
"(id, tenant_id, status, sbom_format, sbom_version, "
|
||||
"total_components, total_issues, critical_issues, high_issues, "
|
||||
"medium_issues, low_issues, sbom_data, started_at, completed_at) "
|
||||
"VALUES (:id, :tenant_id, 'completed', 'CycloneDX', '1.5', "
|
||||
":total_components, :total_issues, :critical, :high, :medium, :low, "
|
||||
":sbom_data::jsonb, :started_at, :completed_at)"
|
||||
),
|
||||
text("""INSERT INTO compliance_screenings
|
||||
(id, tenant_id, status, sbom_format, sbom_version,
|
||||
total_components, total_issues, critical_issues, high_issues, medium_issues, low_issues,
|
||||
sbom_data, started_at, completed_at)
|
||||
VALUES (:id, :tenant_id, 'completed', 'CycloneDX', '1.5',
|
||||
:total_components, :total_issues, :critical, :high, :medium, :low,
|
||||
:sbom_data::jsonb, :started_at, :completed_at)"""),
|
||||
{
|
||||
"id": screening_id,
|
||||
"tenant_id": tenant_id,
|
||||
@@ -121,13 +121,11 @@ async def scan_dependencies(
|
||||
)
|
||||
for issue in issues:
|
||||
db.execute(
|
||||
text(
|
||||
"INSERT INTO compliance_security_issues "
|
||||
"(id, screening_id, severity, title, description, cve, cvss, "
|
||||
"affected_component, affected_version, fixed_in, remediation, status) "
|
||||
"VALUES (:id, :screening_id, :severity, :title, :description, :cve, :cvss, "
|
||||
":component, :version, :fixed_in, :remediation, :status)"
|
||||
),
|
||||
text("""INSERT INTO compliance_security_issues
|
||||
(id, screening_id, severity, title, description, cve, cvss,
|
||||
affected_component, affected_version, fixed_in, remediation, status)
|
||||
VALUES (:id, :screening_id, :severity, :title, :description, :cve, :cvss,
|
||||
:component, :version, :fixed_in, :remediation, :status)"""),
|
||||
{
|
||||
"id": issue["id"],
|
||||
"screening_id": screening_id,
|
||||
@@ -214,8 +212,77 @@ async def get_screening(screening_id: str) -> ScreeningResponse:
|
||||
"""Get a screening result by ID."""
|
||||
db = SessionLocal()
|
||||
try:
|
||||
with translate_domain_errors():
|
||||
return ScreeningService(db).get_screening(screening_id)
|
||||
result = db.execute(
|
||||
text("""SELECT id, status, sbom_format, sbom_version,
|
||||
total_components, total_issues, critical_issues, high_issues,
|
||||
medium_issues, low_issues, sbom_data, started_at, completed_at
|
||||
FROM compliance_screenings WHERE id = :id"""),
|
||||
{"id": screening_id},
|
||||
)
|
||||
row = result.fetchone()
|
||||
if not row:
|
||||
raise HTTPException(status_code=404, detail="Screening not found")
|
||||
|
||||
# Fetch issues
|
||||
issues_result = db.execute(
|
||||
text("""SELECT id, severity, title, description, cve, cvss,
|
||||
affected_component, affected_version, fixed_in, remediation, status
|
||||
FROM compliance_security_issues WHERE screening_id = :id"""),
|
||||
{"id": screening_id},
|
||||
)
|
||||
issues_rows = issues_result.fetchall()
|
||||
|
||||
issues = [
|
||||
SecurityIssueResponse(
|
||||
id=str(r[0]), severity=r[1], title=r[2], description=r[3],
|
||||
cve=r[4], cvss=r[5], affected_component=r[6],
|
||||
affected_version=r[7], fixed_in=r[8], remediation=r[9], status=r[10],
|
||||
)
|
||||
for r in issues_rows
|
||||
]
|
||||
|
||||
# Reconstruct components from SBOM data
|
||||
sbom_data = row[10] or {}
|
||||
components = []
|
||||
comp_vulns: dict[str, list[dict]] = {}
|
||||
for issue in issues:
|
||||
if issue.affected_component not in comp_vulns:
|
||||
comp_vulns[issue.affected_component] = []
|
||||
comp_vulns[issue.affected_component].append({
|
||||
"id": issue.cve or issue.id,
|
||||
"cve": issue.cve,
|
||||
"severity": issue.severity,
|
||||
"title": issue.title,
|
||||
"cvss": issue.cvss,
|
||||
"fixedIn": issue.fixed_in,
|
||||
})
|
||||
|
||||
for sc in sbom_data.get("components", []):
|
||||
components.append(SBOMComponentResponse(
|
||||
name=sc["name"],
|
||||
version=sc["version"],
|
||||
type=sc.get("type", "library"),
|
||||
purl=sc.get("purl", ""),
|
||||
licenses=sc.get("licenses", []),
|
||||
vulnerabilities=comp_vulns.get(sc["name"], []),
|
||||
))
|
||||
|
||||
return ScreeningResponse(
|
||||
id=str(row[0]),
|
||||
status=row[1],
|
||||
sbom_format=row[2] or "CycloneDX",
|
||||
sbom_version=row[3] or "1.5",
|
||||
total_components=row[4] or 0,
|
||||
total_issues=row[5] or 0,
|
||||
critical_issues=row[6] or 0,
|
||||
high_issues=row[7] or 0,
|
||||
medium_issues=row[8] or 0,
|
||||
low_issues=row[9] or 0,
|
||||
components=components,
|
||||
issues=issues,
|
||||
started_at=str(row[11]) if row[11] else None,
|
||||
completed_at=str(row[12]) if row[12] else None,
|
||||
)
|
||||
finally:
|
||||
db.close()
|
||||
|
||||
@@ -225,8 +292,33 @@ async def list_screenings(tenant_id: str = "default") -> ScreeningListResponse:
|
||||
"""List all screenings for a tenant."""
|
||||
db = SessionLocal()
|
||||
try:
|
||||
with translate_domain_errors():
|
||||
return ScreeningService(db).list_screenings(tenant_id)
|
||||
result = db.execute(
|
||||
text("""SELECT id, status, total_components, total_issues,
|
||||
critical_issues, high_issues, medium_issues, low_issues,
|
||||
started_at, completed_at, created_at
|
||||
FROM compliance_screenings
|
||||
WHERE tenant_id = :tenant_id
|
||||
ORDER BY created_at DESC"""),
|
||||
{"tenant_id": tenant_id},
|
||||
)
|
||||
rows = result.fetchall()
|
||||
screenings = [
|
||||
{
|
||||
"id": str(r[0]),
|
||||
"status": r[1],
|
||||
"total_components": r[2],
|
||||
"total_issues": r[3],
|
||||
"critical_issues": r[4],
|
||||
"high_issues": r[5],
|
||||
"medium_issues": r[6],
|
||||
"low_issues": r[7],
|
||||
"started_at": str(r[8]) if r[8] else None,
|
||||
"completed_at": str(r[9]) if r[9] else None,
|
||||
"created_at": str(r[10]),
|
||||
}
|
||||
for r in rows
|
||||
]
|
||||
return ScreeningListResponse(screenings=screenings, total=len(screenings))
|
||||
finally:
|
||||
db.close()
|
||||
|
||||
|
||||
@@ -0,0 +1,537 @@
|
||||
"""
|
||||
TOM ↔ Canonical Control Mapping Routes.
|
||||
|
||||
Three-layer architecture:
|
||||
TOM Measures (~88, audit-level) → Mapping Bridge → Canonical Controls (10,000+)
|
||||
|
||||
Endpoints:
|
||||
POST /v1/tom-mappings/sync — Sync canonical controls for company profile
|
||||
GET /v1/tom-mappings — List all mappings for tenant/project
|
||||
GET /v1/tom-mappings/by-tom/{code} — Mappings for a specific TOM control
|
||||
GET /v1/tom-mappings/stats — Coverage statistics
|
||||
POST /v1/tom-mappings/manual — Manually add a mapping
|
||||
DELETE /v1/tom-mappings/{id} — Remove a mapping
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import hashlib
|
||||
import json
|
||||
import logging
|
||||
from typing import Any, Optional
|
||||
|
||||
from fastapi import APIRouter, HTTPException, Query, Header
|
||||
from pydantic import BaseModel
|
||||
from sqlalchemy import text
|
||||
|
||||
from database import SessionLocal
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
router = APIRouter(prefix="/tom-mappings", tags=["tom-control-mappings"])
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# TOM CATEGORY → CANONICAL CATEGORY MAPPING
|
||||
# =============================================================================
|
||||
|
||||
# Maps 13 TOM control categories to canonical_control_categories
|
||||
# Each TOM category maps to 1-3 canonical categories for broad coverage
|
||||
TOM_TO_CANONICAL_CATEGORIES: dict[str, list[str]] = {
|
||||
"ACCESS_CONTROL": ["authentication", "identity", "physical"],
|
||||
"ADMISSION_CONTROL": ["authentication", "identity", "system"],
|
||||
"ACCESS_AUTHORIZATION": ["authentication", "identity"],
|
||||
"TRANSFER_CONTROL": ["network", "data_protection", "encryption"],
|
||||
"INPUT_CONTROL": ["application", "data_protection"],
|
||||
"ORDER_CONTROL": ["supply_chain", "compliance"],
|
||||
"AVAILABILITY": ["continuity", "system"],
|
||||
"SEPARATION": ["network", "data_protection"],
|
||||
"ENCRYPTION": ["encryption"],
|
||||
"PSEUDONYMIZATION": ["data_protection", "encryption"],
|
||||
"RESILIENCE": ["continuity", "system"],
|
||||
"RECOVERY": ["continuity"],
|
||||
"REVIEW": ["compliance", "governance", "risk"],
|
||||
}
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# REQUEST / RESPONSE MODELS
|
||||
# =============================================================================
|
||||
|
||||
class SyncRequest(BaseModel):
|
||||
"""Trigger a sync of canonical controls to TOM measures."""
|
||||
industry: Optional[str] = None
|
||||
company_size: Optional[str] = None
|
||||
force: bool = False
|
||||
|
||||
|
||||
class ManualMappingRequest(BaseModel):
|
||||
"""Manually add a canonical control to a TOM measure."""
|
||||
tom_control_code: str
|
||||
tom_category: str
|
||||
canonical_control_id: str
|
||||
canonical_control_code: str
|
||||
canonical_category: Optional[str] = None
|
||||
relevance_score: float = 1.0
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# HELPERS
|
||||
# =============================================================================
|
||||
|
||||
def _get_tenant_id(x_tenant_id: Optional[str]) -> str:
|
||||
"""Extract tenant ID from header."""
|
||||
if not x_tenant_id:
|
||||
raise HTTPException(status_code=400, detail="X-Tenant-ID header required")
|
||||
return x_tenant_id
|
||||
|
||||
|
||||
def _compute_profile_hash(industry: Optional[str], company_size: Optional[str]) -> str:
|
||||
"""Compute a hash from profile parameters for change detection."""
|
||||
data = json.dumps({"industry": industry, "company_size": company_size}, sort_keys=True)
|
||||
return hashlib.sha256(data.encode()).hexdigest()[:16]
|
||||
|
||||
|
||||
def _mapping_row_to_dict(r) -> dict[str, Any]:
|
||||
"""Convert a mapping row to API response dict."""
|
||||
return {
|
||||
"id": str(r.id),
|
||||
"tenant_id": str(r.tenant_id),
|
||||
"project_id": str(r.project_id) if r.project_id else None,
|
||||
"tom_control_code": r.tom_control_code,
|
||||
"tom_category": r.tom_category,
|
||||
"canonical_control_id": str(r.canonical_control_id),
|
||||
"canonical_control_code": r.canonical_control_code,
|
||||
"canonical_category": r.canonical_category,
|
||||
"mapping_type": r.mapping_type,
|
||||
"relevance_score": float(r.relevance_score) if r.relevance_score else 1.0,
|
||||
"created_at": r.created_at.isoformat() if r.created_at else None,
|
||||
}
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# SYNC ENDPOINT
|
||||
# =============================================================================
|
||||
|
||||
@router.post("/sync")
|
||||
async def sync_mappings(
|
||||
body: SyncRequest,
|
||||
x_tenant_id: Optional[str] = Header(None, alias="X-Tenant-ID"),
|
||||
project_id: Optional[str] = Query(None),
|
||||
):
|
||||
"""
|
||||
Sync canonical controls to TOM measures based on company profile.
|
||||
|
||||
Algorithm:
|
||||
1. Compute profile hash → skip if unchanged (unless force=True)
|
||||
2. For each TOM category, find matching canonical controls by:
|
||||
- Category mapping (TOM category → canonical categories)
|
||||
- Industry filter (applicable_industries JSONB containment)
|
||||
- Company size filter (applicable_company_size JSONB containment)
|
||||
- Only approved + customer_visible controls
|
||||
3. Delete old auto-mappings, insert new ones
|
||||
4. Update sync state
|
||||
"""
|
||||
tenant_id = _get_tenant_id(x_tenant_id)
|
||||
profile_hash = _compute_profile_hash(body.industry, body.company_size)
|
||||
|
||||
with SessionLocal() as db:
|
||||
# Check if sync is needed (profile unchanged)
|
||||
if not body.force:
|
||||
existing = db.execute(
|
||||
text("""
|
||||
SELECT profile_hash FROM tom_control_sync_state
|
||||
WHERE tenant_id = :tid AND (project_id = :pid OR (project_id IS NULL AND :pid IS NULL))
|
||||
"""),
|
||||
{"tid": tenant_id, "pid": project_id},
|
||||
).fetchone()
|
||||
if existing and existing.profile_hash == profile_hash:
|
||||
return {
|
||||
"status": "unchanged",
|
||||
"message": "Profile unchanged since last sync",
|
||||
"profile_hash": profile_hash,
|
||||
}
|
||||
|
||||
# Delete old auto-mappings for this tenant+project
|
||||
db.execute(
|
||||
text("""
|
||||
DELETE FROM tom_control_mappings
|
||||
WHERE tenant_id = :tid
|
||||
AND (project_id = :pid OR (project_id IS NULL AND :pid IS NULL))
|
||||
AND mapping_type = 'auto'
|
||||
"""),
|
||||
{"tid": tenant_id, "pid": project_id},
|
||||
)
|
||||
|
||||
total_mappings = 0
|
||||
canonical_ids_matched = set()
|
||||
tom_codes_covered = set()
|
||||
|
||||
# For each TOM category, find matching canonical controls
|
||||
for tom_category, canonical_categories in TOM_TO_CANONICAL_CATEGORIES.items():
|
||||
# Build JSONB containment query for categories
|
||||
cat_conditions = " OR ".join(
|
||||
f"category = :cat_{i}" for i in range(len(canonical_categories))
|
||||
)
|
||||
cat_params = {f"cat_{i}": c for i, c in enumerate(canonical_categories)}
|
||||
|
||||
# Build industry filter
|
||||
industry_filter = ""
|
||||
if body.industry:
|
||||
industry_filter = """
|
||||
AND (
|
||||
applicable_industries IS NULL
|
||||
OR applicable_industries @> '"all"'::jsonb
|
||||
OR applicable_industries @> (:industry)::jsonb
|
||||
)
|
||||
"""
|
||||
cat_params["industry"] = json.dumps([body.industry])
|
||||
|
||||
# Build company size filter
|
||||
size_filter = ""
|
||||
if body.company_size:
|
||||
size_filter = """
|
||||
AND (
|
||||
applicable_company_size IS NULL
|
||||
OR applicable_company_size @> '"all"'::jsonb
|
||||
OR applicable_company_size @> (:csize)::jsonb
|
||||
)
|
||||
"""
|
||||
cat_params["csize"] = json.dumps([body.company_size])
|
||||
|
||||
query = f"""
|
||||
SELECT id, control_id, category
|
||||
FROM canonical_controls
|
||||
WHERE ({cat_conditions})
|
||||
AND release_state = 'approved'
|
||||
AND customer_visible = true
|
||||
{industry_filter}
|
||||
{size_filter}
|
||||
ORDER BY control_id
|
||||
"""
|
||||
|
||||
rows = db.execute(text(query), cat_params).fetchall()
|
||||
|
||||
# Find TOM control codes in this category (query the frontend library
|
||||
# codes; we use the category prefix pattern from the loader)
|
||||
# TOM codes follow pattern: TOM-XX-NN where XX is category abbreviation
|
||||
# We insert one mapping per canonical control per TOM category
|
||||
for row in rows:
|
||||
db.execute(
|
||||
text("""
|
||||
INSERT INTO tom_control_mappings (
|
||||
tenant_id, project_id, tom_control_code, tom_category,
|
||||
canonical_control_id, canonical_control_code, canonical_category,
|
||||
mapping_type, relevance_score
|
||||
) VALUES (
|
||||
:tid, :pid, :tom_cat, :tom_cat,
|
||||
:cc_id, :cc_code, :cc_category,
|
||||
'auto', 1.00
|
||||
)
|
||||
ON CONFLICT (tenant_id, project_id, tom_control_code, canonical_control_id)
|
||||
DO NOTHING
|
||||
"""),
|
||||
{
|
||||
"tid": tenant_id,
|
||||
"pid": project_id,
|
||||
"tom_cat": tom_category,
|
||||
"cc_id": str(row.id),
|
||||
"cc_code": row.control_id,
|
||||
"cc_category": row.category,
|
||||
},
|
||||
)
|
||||
total_mappings += 1
|
||||
canonical_ids_matched.add(str(row.id))
|
||||
tom_codes_covered.add(tom_category)
|
||||
|
||||
# Upsert sync state
|
||||
db.execute(
|
||||
text("""
|
||||
INSERT INTO tom_control_sync_state (
|
||||
tenant_id, project_id, profile_hash,
|
||||
total_mappings, canonical_controls_matched, tom_controls_covered,
|
||||
last_synced_at
|
||||
) VALUES (
|
||||
:tid, :pid, :hash,
|
||||
:total, :matched, :covered,
|
||||
NOW()
|
||||
)
|
||||
ON CONFLICT (tenant_id, project_id)
|
||||
DO UPDATE SET
|
||||
profile_hash = :hash,
|
||||
total_mappings = :total,
|
||||
canonical_controls_matched = :matched,
|
||||
tom_controls_covered = :covered,
|
||||
last_synced_at = NOW()
|
||||
"""),
|
||||
{
|
||||
"tid": tenant_id,
|
||||
"pid": project_id,
|
||||
"hash": profile_hash,
|
||||
"total": total_mappings,
|
||||
"matched": len(canonical_ids_matched),
|
||||
"covered": len(tom_codes_covered),
|
||||
},
|
||||
)
|
||||
|
||||
db.commit()
|
||||
|
||||
return {
|
||||
"status": "synced",
|
||||
"profile_hash": profile_hash,
|
||||
"total_mappings": total_mappings,
|
||||
"canonical_controls_matched": len(canonical_ids_matched),
|
||||
"tom_categories_covered": len(tom_codes_covered),
|
||||
}
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# LIST MAPPINGS
|
||||
# =============================================================================
|
||||
|
||||
@router.get("")
|
||||
async def list_mappings(
|
||||
x_tenant_id: Optional[str] = Header(None, alias="X-Tenant-ID"),
|
||||
project_id: Optional[str] = Query(None),
|
||||
tom_category: Optional[str] = Query(None),
|
||||
mapping_type: Optional[str] = Query(None),
|
||||
limit: int = Query(500, ge=1, le=5000),
|
||||
offset: int = Query(0, ge=0),
|
||||
):
|
||||
"""List all TOM ↔ canonical control mappings for tenant/project."""
|
||||
tenant_id = _get_tenant_id(x_tenant_id)
|
||||
|
||||
query = """
|
||||
SELECT m.*, cc.title as canonical_title, cc.severity as canonical_severity
|
||||
FROM tom_control_mappings m
|
||||
LEFT JOIN canonical_controls cc ON cc.id = m.canonical_control_id
|
||||
WHERE m.tenant_id = :tid
|
||||
AND (m.project_id = :pid OR (m.project_id IS NULL AND :pid IS NULL))
|
||||
"""
|
||||
params: dict[str, Any] = {"tid": tenant_id, "pid": project_id}
|
||||
|
||||
if tom_category:
|
||||
query += " AND m.tom_category = :tcat"
|
||||
params["tcat"] = tom_category
|
||||
if mapping_type:
|
||||
query += " AND m.mapping_type = :mtype"
|
||||
params["mtype"] = mapping_type
|
||||
|
||||
query += " ORDER BY m.tom_category, m.canonical_control_code"
|
||||
query += " LIMIT :lim OFFSET :off"
|
||||
params["lim"] = limit
|
||||
params["off"] = offset
|
||||
|
||||
count_query = """
|
||||
SELECT count(*) FROM tom_control_mappings
|
||||
WHERE tenant_id = :tid
|
||||
AND (project_id = :pid OR (project_id IS NULL AND :pid IS NULL))
|
||||
"""
|
||||
count_params: dict[str, Any] = {"tid": tenant_id, "pid": project_id}
|
||||
if tom_category:
|
||||
count_query += " AND tom_category = :tcat"
|
||||
count_params["tcat"] = tom_category
|
||||
|
||||
with SessionLocal() as db:
|
||||
rows = db.execute(text(query), params).fetchall()
|
||||
total = db.execute(text(count_query), count_params).scalar()
|
||||
|
||||
mappings = []
|
||||
for r in rows:
|
||||
d = _mapping_row_to_dict(r)
|
||||
d["canonical_title"] = getattr(r, "canonical_title", None)
|
||||
d["canonical_severity"] = getattr(r, "canonical_severity", None)
|
||||
mappings.append(d)
|
||||
|
||||
return {"mappings": mappings, "total": total}
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# MAPPINGS BY TOM CONTROL
|
||||
# =============================================================================
|
||||
|
||||
@router.get("/by-tom/{tom_code}")
|
||||
async def get_mappings_by_tom(
|
||||
tom_code: str,
|
||||
x_tenant_id: Optional[str] = Header(None, alias="X-Tenant-ID"),
|
||||
project_id: Optional[str] = Query(None),
|
||||
):
|
||||
"""Get all canonical controls mapped to a specific TOM control code or category."""
|
||||
tenant_id = _get_tenant_id(x_tenant_id)
|
||||
|
||||
with SessionLocal() as db:
|
||||
rows = db.execute(
|
||||
text("""
|
||||
SELECT m.*, cc.title as canonical_title, cc.severity as canonical_severity,
|
||||
cc.objective as canonical_objective
|
||||
FROM tom_control_mappings m
|
||||
LEFT JOIN canonical_controls cc ON cc.id = m.canonical_control_id
|
||||
WHERE m.tenant_id = :tid
|
||||
AND (m.project_id = :pid OR (m.project_id IS NULL AND :pid IS NULL))
|
||||
AND (m.tom_control_code = :code OR m.tom_category = :code)
|
||||
ORDER BY m.canonical_control_code
|
||||
"""),
|
||||
{"tid": tenant_id, "pid": project_id, "code": tom_code},
|
||||
).fetchall()
|
||||
|
||||
mappings = []
|
||||
for r in rows:
|
||||
d = _mapping_row_to_dict(r)
|
||||
d["canonical_title"] = getattr(r, "canonical_title", None)
|
||||
d["canonical_severity"] = getattr(r, "canonical_severity", None)
|
||||
d["canonical_objective"] = getattr(r, "canonical_objective", None)
|
||||
mappings.append(d)
|
||||
|
||||
return {"tom_code": tom_code, "mappings": mappings, "total": len(mappings)}
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# STATS
|
||||
# =============================================================================
|
||||
|
||||
@router.get("/stats")
|
||||
async def get_mapping_stats(
|
||||
x_tenant_id: Optional[str] = Header(None, alias="X-Tenant-ID"),
|
||||
project_id: Optional[str] = Query(None),
|
||||
):
|
||||
"""Coverage statistics for TOM ↔ canonical control mappings."""
|
||||
tenant_id = _get_tenant_id(x_tenant_id)
|
||||
|
||||
with SessionLocal() as db:
|
||||
# Sync state
|
||||
sync_state = db.execute(
|
||||
text("""
|
||||
SELECT * FROM tom_control_sync_state
|
||||
WHERE tenant_id = :tid
|
||||
AND (project_id = :pid OR (project_id IS NULL AND :pid IS NULL))
|
||||
"""),
|
||||
{"tid": tenant_id, "pid": project_id},
|
||||
).fetchone()
|
||||
|
||||
# Per-category breakdown
|
||||
category_stats = db.execute(
|
||||
text("""
|
||||
SELECT tom_category,
|
||||
count(*) as total_mappings,
|
||||
count(DISTINCT canonical_control_id) as unique_controls,
|
||||
count(*) FILTER (WHERE mapping_type = 'auto') as auto_count,
|
||||
count(*) FILTER (WHERE mapping_type = 'manual') as manual_count
|
||||
FROM tom_control_mappings
|
||||
WHERE tenant_id = :tid
|
||||
AND (project_id = :pid OR (project_id IS NULL AND :pid IS NULL))
|
||||
GROUP BY tom_category
|
||||
ORDER BY tom_category
|
||||
"""),
|
||||
{"tid": tenant_id, "pid": project_id},
|
||||
).fetchall()
|
||||
|
||||
# Total canonical controls in DB (approved + visible)
|
||||
total_canonical = db.execute(
|
||||
text("""
|
||||
SELECT count(*) FROM canonical_controls
|
||||
WHERE release_state = 'approved' AND customer_visible = true
|
||||
""")
|
||||
).scalar()
|
||||
|
||||
return {
|
||||
"sync_state": {
|
||||
"profile_hash": sync_state.profile_hash if sync_state else None,
|
||||
"total_mappings": sync_state.total_mappings if sync_state else 0,
|
||||
"canonical_controls_matched": sync_state.canonical_controls_matched if sync_state else 0,
|
||||
"tom_controls_covered": sync_state.tom_controls_covered if sync_state else 0,
|
||||
"last_synced_at": sync_state.last_synced_at.isoformat() if sync_state and sync_state.last_synced_at else None,
|
||||
},
|
||||
"category_breakdown": [
|
||||
{
|
||||
"tom_category": r.tom_category,
|
||||
"total_mappings": r.total_mappings,
|
||||
"unique_controls": r.unique_controls,
|
||||
"auto_count": r.auto_count,
|
||||
"manual_count": r.manual_count,
|
||||
}
|
||||
for r in category_stats
|
||||
],
|
||||
"total_canonical_controls_available": total_canonical or 0,
|
||||
}
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# MANUAL MAPPING
|
||||
# =============================================================================
|
||||
|
||||
@router.post("/manual", status_code=201)
|
||||
async def add_manual_mapping(
|
||||
body: ManualMappingRequest,
|
||||
x_tenant_id: Optional[str] = Header(None, alias="X-Tenant-ID"),
|
||||
project_id: Optional[str] = Query(None),
|
||||
):
|
||||
"""Manually add a canonical control to a TOM measure."""
|
||||
tenant_id = _get_tenant_id(x_tenant_id)
|
||||
|
||||
with SessionLocal() as db:
|
||||
# Verify canonical control exists
|
||||
cc = db.execute(
|
||||
text("SELECT id, control_id, category FROM canonical_controls WHERE id = CAST(:cid AS uuid)"),
|
||||
{"cid": body.canonical_control_id},
|
||||
).fetchone()
|
||||
if not cc:
|
||||
raise HTTPException(status_code=404, detail="Canonical control not found")
|
||||
|
||||
try:
|
||||
row = db.execute(
|
||||
text("""
|
||||
INSERT INTO tom_control_mappings (
|
||||
tenant_id, project_id, tom_control_code, tom_category,
|
||||
canonical_control_id, canonical_control_code, canonical_category,
|
||||
mapping_type, relevance_score
|
||||
) VALUES (
|
||||
:tid, :pid, :tom_code, :tom_cat,
|
||||
CAST(:cc_id AS uuid), :cc_code, :cc_category,
|
||||
'manual', :score
|
||||
)
|
||||
RETURNING *
|
||||
"""),
|
||||
{
|
||||
"tid": tenant_id,
|
||||
"pid": project_id,
|
||||
"tom_code": body.tom_control_code,
|
||||
"tom_cat": body.tom_category,
|
||||
"cc_id": body.canonical_control_id,
|
||||
"cc_code": body.canonical_control_code,
|
||||
"cc_category": body.canonical_category or cc.category,
|
||||
"score": body.relevance_score,
|
||||
},
|
||||
).fetchone()
|
||||
db.commit()
|
||||
except Exception as e:
|
||||
if "unique" in str(e).lower() or "duplicate" in str(e).lower():
|
||||
raise HTTPException(status_code=409, detail="Mapping already exists")
|
||||
raise
|
||||
|
||||
return _mapping_row_to_dict(row)
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# DELETE MAPPING
|
||||
# =============================================================================
|
||||
|
||||
@router.delete("/{mapping_id}", status_code=204)
|
||||
async def delete_mapping(
|
||||
mapping_id: str,
|
||||
x_tenant_id: Optional[str] = Header(None, alias="X-Tenant-ID"),
|
||||
):
|
||||
"""Remove a mapping (manual or auto)."""
|
||||
tenant_id = _get_tenant_id(x_tenant_id)
|
||||
|
||||
with SessionLocal() as db:
|
||||
result = db.execute(
|
||||
text("""
|
||||
DELETE FROM tom_control_mappings
|
||||
WHERE id = CAST(:mid AS uuid) AND tenant_id = :tid
|
||||
"""),
|
||||
{"mid": mapping_id, "tid": tenant_id},
|
||||
)
|
||||
if result.rowcount == 0:
|
||||
raise HTTPException(status_code=404, detail="Mapping not found")
|
||||
db.commit()
|
||||
|
||||
return None
|
||||
@@ -0,0 +1,427 @@
|
||||
"""
|
||||
FastAPI routes for VVT Master Libraries + Process Templates.
|
||||
|
||||
Library endpoints (read-only, global):
|
||||
GET /vvt/libraries — Overview: all library types + counts
|
||||
GET /vvt/libraries/data-subjects — Data subjects (filter: typical_for)
|
||||
GET /vvt/libraries/data-categories — Hierarchical (filter: parent_id, is_art9, flat)
|
||||
GET /vvt/libraries/recipients — Recipients (filter: type)
|
||||
GET /vvt/libraries/legal-bases — Legal bases (filter: is_art9, type)
|
||||
GET /vvt/libraries/retention-rules — Retention rules
|
||||
GET /vvt/libraries/transfer-mechanisms — Transfer mechanisms
|
||||
GET /vvt/libraries/purposes — Purposes (filter: typical_for)
|
||||
GET /vvt/libraries/toms — TOMs (filter: category)
|
||||
|
||||
Template endpoints:
|
||||
GET /vvt/templates — List templates (filter: business_function, search)
|
||||
GET /vvt/templates/{id} — Single template with resolved labels
|
||||
POST /vvt/templates/{id}/instantiate — Create VVT activity from template
|
||||
"""
|
||||
|
||||
import logging
|
||||
import uuid
|
||||
from datetime import datetime
|
||||
from typing import Optional
|
||||
|
||||
from fastapi import APIRouter, Depends, HTTPException, Query, Request
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from classroom_engine.database import get_db
|
||||
|
||||
from ..db.vvt_library_models import (
|
||||
VVTLibDataSubjectDB,
|
||||
VVTLibDataCategoryDB,
|
||||
VVTLibRecipientDB,
|
||||
VVTLibLegalBasisDB,
|
||||
VVTLibRetentionRuleDB,
|
||||
VVTLibTransferMechanismDB,
|
||||
VVTLibPurposeDB,
|
||||
VVTLibTomDB,
|
||||
VVTProcessTemplateDB,
|
||||
)
|
||||
from ..db.vvt_models import VVTActivityDB, VVTAuditLogDB
|
||||
from .tenant_utils import get_tenant_id
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
router = APIRouter(prefix="/vvt", tags=["compliance-vvt-libraries"])
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# Helper: row → dict
|
||||
# ============================================================================
|
||||
|
||||
def _row_to_dict(row, extra_fields=None):
|
||||
"""Generic row → dict for library items."""
|
||||
d = {
|
||||
"id": row.id,
|
||||
"label_de": row.label_de,
|
||||
}
|
||||
if hasattr(row, 'description_de') and row.description_de:
|
||||
d["description_de"] = row.description_de
|
||||
if hasattr(row, 'sort_order'):
|
||||
d["sort_order"] = row.sort_order
|
||||
if extra_fields:
|
||||
for f in extra_fields:
|
||||
if hasattr(row, f):
|
||||
val = getattr(row, f)
|
||||
if val is not None:
|
||||
d[f] = val
|
||||
return d
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# Library Overview
|
||||
# ============================================================================
|
||||
|
||||
@router.get("/libraries")
|
||||
async def get_libraries_overview(db: Session = Depends(get_db)):
|
||||
"""Overview of all library types with item counts."""
|
||||
return {
|
||||
"libraries": [
|
||||
{"type": "data-subjects", "count": db.query(VVTLibDataSubjectDB).count()},
|
||||
{"type": "data-categories", "count": db.query(VVTLibDataCategoryDB).count()},
|
||||
{"type": "recipients", "count": db.query(VVTLibRecipientDB).count()},
|
||||
{"type": "legal-bases", "count": db.query(VVTLibLegalBasisDB).count()},
|
||||
{"type": "retention-rules", "count": db.query(VVTLibRetentionRuleDB).count()},
|
||||
{"type": "transfer-mechanisms", "count": db.query(VVTLibTransferMechanismDB).count()},
|
||||
{"type": "purposes", "count": db.query(VVTLibPurposeDB).count()},
|
||||
{"type": "toms", "count": db.query(VVTLibTomDB).count()},
|
||||
]
|
||||
}
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# Data Subjects
|
||||
# ============================================================================
|
||||
|
||||
@router.get("/libraries/data-subjects")
|
||||
async def list_data_subjects(
|
||||
typical_for: Optional[str] = Query(None, description="Filter by business function"),
|
||||
db: Session = Depends(get_db),
|
||||
):
|
||||
query = db.query(VVTLibDataSubjectDB).order_by(VVTLibDataSubjectDB.sort_order)
|
||||
rows = query.all()
|
||||
items = [_row_to_dict(r, ["art9_relevant", "typical_for"]) for r in rows]
|
||||
if typical_for:
|
||||
items = [i for i in items if typical_for in (i.get("typical_for") or [])]
|
||||
return items
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# Data Categories (hierarchical)
|
||||
# ============================================================================
|
||||
|
||||
@router.get("/libraries/data-categories")
|
||||
async def list_data_categories(
|
||||
flat: Optional[bool] = Query(False, description="Return flat list instead of tree"),
|
||||
parent_id: Optional[str] = Query(None),
|
||||
is_art9: Optional[bool] = Query(None),
|
||||
db: Session = Depends(get_db),
|
||||
):
|
||||
query = db.query(VVTLibDataCategoryDB).order_by(VVTLibDataCategoryDB.sort_order)
|
||||
if parent_id is not None:
|
||||
query = query.filter(VVTLibDataCategoryDB.parent_id == parent_id)
|
||||
if is_art9 is not None:
|
||||
query = query.filter(VVTLibDataCategoryDB.is_art9 == is_art9)
|
||||
rows = query.all()
|
||||
|
||||
extra = ["parent_id", "is_art9", "is_art10", "risk_weight", "default_retention_rule", "default_legal_basis"]
|
||||
items = [_row_to_dict(r, extra) for r in rows]
|
||||
|
||||
if flat or parent_id is not None or is_art9 is not None:
|
||||
return items
|
||||
|
||||
# Build tree
|
||||
by_parent: dict = {}
|
||||
for item in items:
|
||||
pid = item.get("parent_id")
|
||||
by_parent.setdefault(pid, []).append(item)
|
||||
|
||||
tree = []
|
||||
for item in by_parent.get(None, []):
|
||||
children = by_parent.get(item["id"], [])
|
||||
if children:
|
||||
item["children"] = children
|
||||
tree.append(item)
|
||||
return tree
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# Recipients
|
||||
# ============================================================================
|
||||
|
||||
@router.get("/libraries/recipients")
|
||||
async def list_recipients(
|
||||
type: Optional[str] = Query(None, description="INTERNAL, PROCESSOR, CONTROLLER, AUTHORITY"),
|
||||
db: Session = Depends(get_db),
|
||||
):
|
||||
query = db.query(VVTLibRecipientDB).order_by(VVTLibRecipientDB.sort_order)
|
||||
if type:
|
||||
query = query.filter(VVTLibRecipientDB.type == type)
|
||||
rows = query.all()
|
||||
return [_row_to_dict(r, ["type", "is_third_country", "country"]) for r in rows]
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# Legal Bases
|
||||
# ============================================================================
|
||||
|
||||
@router.get("/libraries/legal-bases")
|
||||
async def list_legal_bases(
|
||||
is_art9: Optional[bool] = Query(None),
|
||||
type: Optional[str] = Query(None),
|
||||
db: Session = Depends(get_db),
|
||||
):
|
||||
query = db.query(VVTLibLegalBasisDB).order_by(VVTLibLegalBasisDB.sort_order)
|
||||
if is_art9 is not None:
|
||||
query = query.filter(VVTLibLegalBasisDB.is_art9 == is_art9)
|
||||
if type:
|
||||
query = query.filter(VVTLibLegalBasisDB.type == type)
|
||||
rows = query.all()
|
||||
return [_row_to_dict(r, ["article", "type", "is_art9", "typical_national_law"]) for r in rows]
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# Retention Rules
|
||||
# ============================================================================
|
||||
|
||||
@router.get("/libraries/retention-rules")
|
||||
async def list_retention_rules(db: Session = Depends(get_db)):
|
||||
rows = db.query(VVTLibRetentionRuleDB).order_by(VVTLibRetentionRuleDB.sort_order).all()
|
||||
return [_row_to_dict(r, ["legal_basis", "duration", "duration_unit", "start_event", "deletion_procedure"]) for r in rows]
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# Transfer Mechanisms
|
||||
# ============================================================================
|
||||
|
||||
@router.get("/libraries/transfer-mechanisms")
|
||||
async def list_transfer_mechanisms(db: Session = Depends(get_db)):
|
||||
rows = db.query(VVTLibTransferMechanismDB).order_by(VVTLibTransferMechanismDB.sort_order).all()
|
||||
return [_row_to_dict(r, ["article", "requires_tia"]) for r in rows]
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# Purposes
|
||||
# ============================================================================
|
||||
|
||||
@router.get("/libraries/purposes")
|
||||
async def list_purposes(
|
||||
typical_for: Optional[str] = Query(None),
|
||||
db: Session = Depends(get_db),
|
||||
):
|
||||
rows = db.query(VVTLibPurposeDB).order_by(VVTLibPurposeDB.sort_order).all()
|
||||
items = [_row_to_dict(r, ["typical_legal_basis", "typical_for"]) for r in rows]
|
||||
if typical_for:
|
||||
items = [i for i in items if typical_for in (i.get("typical_for") or [])]
|
||||
return items
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# TOMs
|
||||
# ============================================================================
|
||||
|
||||
@router.get("/libraries/toms")
|
||||
async def list_toms(
|
||||
category: Optional[str] = Query(None),
|
||||
db: Session = Depends(get_db),
|
||||
):
|
||||
query = db.query(VVTLibTomDB).order_by(VVTLibTomDB.sort_order)
|
||||
if category:
|
||||
query = query.filter(VVTLibTomDB.category == category)
|
||||
rows = query.all()
|
||||
return [_row_to_dict(r, ["category", "art32_reference"]) for r in rows]
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# Process Templates
|
||||
# ============================================================================
|
||||
|
||||
def _template_to_dict(t: VVTProcessTemplateDB) -> dict:
|
||||
return {
|
||||
"id": t.id,
|
||||
"name": t.name,
|
||||
"description": t.description,
|
||||
"business_function": t.business_function,
|
||||
"purpose_refs": t.purpose_refs or [],
|
||||
"legal_basis_refs": t.legal_basis_refs or [],
|
||||
"data_subject_refs": t.data_subject_refs or [],
|
||||
"data_category_refs": t.data_category_refs or [],
|
||||
"recipient_refs": t.recipient_refs or [],
|
||||
"tom_refs": t.tom_refs or [],
|
||||
"transfer_mechanism_refs": t.transfer_mechanism_refs or [],
|
||||
"retention_rule_ref": t.retention_rule_ref,
|
||||
"typical_systems": t.typical_systems or [],
|
||||
"protection_level": t.protection_level or "MEDIUM",
|
||||
"dpia_required": t.dpia_required or False,
|
||||
"risk_score": t.risk_score,
|
||||
"tags": t.tags or [],
|
||||
"is_system": t.is_system,
|
||||
"sort_order": t.sort_order,
|
||||
}
|
||||
|
||||
|
||||
def _resolve_labels(template_dict: dict, db: Session) -> dict:
|
||||
"""Resolve library IDs to labels within the template dict."""
|
||||
resolvers = {
|
||||
"purpose_refs": (VVTLibPurposeDB, "purpose_labels"),
|
||||
"legal_basis_refs": (VVTLibLegalBasisDB, "legal_basis_labels"),
|
||||
"data_subject_refs": (VVTLibDataSubjectDB, "data_subject_labels"),
|
||||
"data_category_refs": (VVTLibDataCategoryDB, "data_category_labels"),
|
||||
"recipient_refs": (VVTLibRecipientDB, "recipient_labels"),
|
||||
"tom_refs": (VVTLibTomDB, "tom_labels"),
|
||||
"transfer_mechanism_refs": (VVTLibTransferMechanismDB, "transfer_mechanism_labels"),
|
||||
}
|
||||
for refs_key, (model, labels_key) in resolvers.items():
|
||||
ids = template_dict.get(refs_key) or []
|
||||
if ids:
|
||||
rows = db.query(model).filter(model.id.in_(ids)).all()
|
||||
label_map = {r.id: r.label_de for r in rows}
|
||||
template_dict[labels_key] = {rid: label_map.get(rid, rid) for rid in ids}
|
||||
|
||||
# Resolve single retention rule
|
||||
rr = template_dict.get("retention_rule_ref")
|
||||
if rr:
|
||||
row = db.query(VVTLibRetentionRuleDB).filter(VVTLibRetentionRuleDB.id == rr).first()
|
||||
if row:
|
||||
template_dict["retention_rule_label"] = row.label_de
|
||||
|
||||
return template_dict
|
||||
|
||||
|
||||
@router.get("/templates")
|
||||
async def list_templates(
|
||||
business_function: Optional[str] = Query(None),
|
||||
search: Optional[str] = Query(None),
|
||||
db: Session = Depends(get_db),
|
||||
):
|
||||
"""List process templates (system + tenant)."""
|
||||
query = db.query(VVTProcessTemplateDB).order_by(VVTProcessTemplateDB.sort_order)
|
||||
if business_function:
|
||||
query = query.filter(VVTProcessTemplateDB.business_function == business_function)
|
||||
if search:
|
||||
term = f"%{search}%"
|
||||
query = query.filter(
|
||||
(VVTProcessTemplateDB.name.ilike(term)) |
|
||||
(VVTProcessTemplateDB.description.ilike(term))
|
||||
)
|
||||
templates = query.all()
|
||||
return [_template_to_dict(t) for t in templates]
|
||||
|
||||
|
||||
@router.get("/templates/{template_id}")
|
||||
async def get_template(
|
||||
template_id: str,
|
||||
db: Session = Depends(get_db),
|
||||
):
|
||||
"""Get a single template with resolved library labels."""
|
||||
t = db.query(VVTProcessTemplateDB).filter(VVTProcessTemplateDB.id == template_id).first()
|
||||
if not t:
|
||||
raise HTTPException(status_code=404, detail=f"Template '{template_id}' not found")
|
||||
result = _template_to_dict(t)
|
||||
return _resolve_labels(result, db)
|
||||
|
||||
|
||||
@router.post("/templates/{template_id}/instantiate", status_code=201)
|
||||
async def instantiate_template(
|
||||
template_id: str,
|
||||
http_request: Request,
|
||||
tid: str = Depends(get_tenant_id),
|
||||
db: Session = Depends(get_db),
|
||||
):
|
||||
"""Create a new VVT activity from a process template."""
|
||||
t = db.query(VVTProcessTemplateDB).filter(VVTProcessTemplateDB.id == template_id).first()
|
||||
if not t:
|
||||
raise HTTPException(status_code=404, detail=f"Template '{template_id}' not found")
|
||||
|
||||
# Generate unique VVT-ID
|
||||
count = db.query(VVTActivityDB).filter(VVTActivityDB.tenant_id == tid).count()
|
||||
vvt_id = f"VVT-{count + 1:04d}"
|
||||
|
||||
# Resolve library IDs to freetext labels for backward-compat fields
|
||||
purpose_labels = _resolve_ids(db, VVTLibPurposeDB, t.purpose_refs or [])
|
||||
legal_labels = _resolve_ids(db, VVTLibLegalBasisDB, t.legal_basis_refs or [])
|
||||
subject_labels = _resolve_ids(db, VVTLibDataSubjectDB, t.data_subject_refs or [])
|
||||
category_labels = _resolve_ids(db, VVTLibDataCategoryDB, t.data_category_refs or [])
|
||||
recipient_labels = _resolve_ids(db, VVTLibRecipientDB, t.recipient_refs or [])
|
||||
|
||||
# Resolve retention rule
|
||||
retention_period = {}
|
||||
if t.retention_rule_ref:
|
||||
rr = db.query(VVTLibRetentionRuleDB).filter(VVTLibRetentionRuleDB.id == t.retention_rule_ref).first()
|
||||
if rr:
|
||||
retention_period = {
|
||||
"description": rr.label_de,
|
||||
"legalBasis": rr.legal_basis or "",
|
||||
"deletionProcedure": rr.deletion_procedure or "",
|
||||
"duration": rr.duration,
|
||||
"durationUnit": rr.duration_unit,
|
||||
}
|
||||
|
||||
# Build structured TOMs from tom_refs
|
||||
structured_toms = {"accessControl": [], "confidentiality": [], "integrity": [], "availability": [], "separation": []}
|
||||
if t.tom_refs:
|
||||
tom_rows = db.query(VVTLibTomDB).filter(VVTLibTomDB.id.in_(t.tom_refs)).all()
|
||||
for tr in tom_rows:
|
||||
cat = tr.category
|
||||
if cat in structured_toms:
|
||||
structured_toms[cat].append(tr.label_de)
|
||||
|
||||
act = VVTActivityDB(
|
||||
tenant_id=tid,
|
||||
vvt_id=vvt_id,
|
||||
name=t.name,
|
||||
description=t.description or "",
|
||||
purposes=purpose_labels,
|
||||
legal_bases=[{"type": lid, "description": lbl} for lid, lbl in zip(t.legal_basis_refs or [], legal_labels)],
|
||||
data_subject_categories=subject_labels,
|
||||
personal_data_categories=category_labels,
|
||||
recipient_categories=[{"type": "unknown", "name": lbl} for lbl in recipient_labels],
|
||||
retention_period=retention_period,
|
||||
business_function=t.business_function,
|
||||
systems=[{"systemId": s, "name": s} for s in (t.typical_systems or [])],
|
||||
protection_level=t.protection_level or "MEDIUM",
|
||||
dpia_required=t.dpia_required or False,
|
||||
structured_toms=structured_toms,
|
||||
status="DRAFT",
|
||||
created_by=http_request.headers.get("X-User-ID", "system"),
|
||||
# Library refs
|
||||
purpose_refs=t.purpose_refs,
|
||||
legal_basis_refs=t.legal_basis_refs,
|
||||
data_subject_refs=t.data_subject_refs,
|
||||
data_category_refs=t.data_category_refs,
|
||||
recipient_refs=t.recipient_refs,
|
||||
retention_rule_ref=t.retention_rule_ref,
|
||||
transfer_mechanism_refs=t.transfer_mechanism_refs,
|
||||
tom_refs=t.tom_refs,
|
||||
source_template_id=t.id,
|
||||
risk_score=t.risk_score,
|
||||
)
|
||||
db.add(act)
|
||||
db.flush()
|
||||
|
||||
# Audit log
|
||||
audit = VVTAuditLogDB(
|
||||
tenant_id=tid,
|
||||
action="CREATE",
|
||||
entity_type="activity",
|
||||
entity_id=act.id,
|
||||
changed_by=http_request.headers.get("X-User-ID", "system"),
|
||||
new_values={"vvt_id": vvt_id, "source_template_id": t.id, "name": t.name},
|
||||
)
|
||||
db.add(audit)
|
||||
db.commit()
|
||||
db.refresh(act)
|
||||
|
||||
# Return full response
|
||||
from .vvt_routes import _activity_to_response
|
||||
return _activity_to_response(act)
|
||||
|
||||
|
||||
def _resolve_ids(db: Session, model, ids: list) -> list:
|
||||
"""Resolve list of library IDs to list of label_de strings."""
|
||||
if not ids:
|
||||
return []
|
||||
rows = db.query(model).filter(model.id.in_(ids)).all()
|
||||
label_map = {r.id: r.label_de for r in rows}
|
||||
return [label_map.get(i, i) for i in ids]
|
||||
@@ -81,6 +81,54 @@ async def upsert_organization(
|
||||
# Activities
|
||||
# ============================================================================
|
||||
|
||||
def _activity_to_response(act: VVTActivityDB) -> VVTActivityResponse:
|
||||
return VVTActivityResponse(
|
||||
id=str(act.id),
|
||||
vvt_id=act.vvt_id,
|
||||
name=act.name,
|
||||
description=act.description,
|
||||
purposes=act.purposes or [],
|
||||
legal_bases=act.legal_bases or [],
|
||||
data_subject_categories=act.data_subject_categories or [],
|
||||
personal_data_categories=act.personal_data_categories or [],
|
||||
recipient_categories=act.recipient_categories or [],
|
||||
third_country_transfers=act.third_country_transfers or [],
|
||||
retention_period=act.retention_period or {},
|
||||
tom_description=act.tom_description,
|
||||
business_function=act.business_function,
|
||||
systems=act.systems or [],
|
||||
deployment_model=act.deployment_model,
|
||||
data_sources=act.data_sources or [],
|
||||
data_flows=act.data_flows or [],
|
||||
protection_level=act.protection_level or 'MEDIUM',
|
||||
dpia_required=act.dpia_required or False,
|
||||
structured_toms=act.structured_toms or {},
|
||||
status=act.status or 'DRAFT',
|
||||
responsible=act.responsible,
|
||||
owner=act.owner,
|
||||
last_reviewed_at=act.last_reviewed_at,
|
||||
next_review_at=act.next_review_at,
|
||||
created_by=act.created_by,
|
||||
dsfa_id=str(act.dsfa_id) if act.dsfa_id else None,
|
||||
# Library refs
|
||||
purpose_refs=act.purpose_refs,
|
||||
legal_basis_refs=act.legal_basis_refs,
|
||||
data_subject_refs=act.data_subject_refs,
|
||||
data_category_refs=act.data_category_refs,
|
||||
recipient_refs=act.recipient_refs,
|
||||
retention_rule_ref=act.retention_rule_ref,
|
||||
transfer_mechanism_refs=act.transfer_mechanism_refs,
|
||||
tom_refs=act.tom_refs,
|
||||
source_template_id=act.source_template_id,
|
||||
risk_score=act.risk_score,
|
||||
linked_loeschfristen_ids=act.linked_loeschfristen_ids,
|
||||
linked_tom_measure_ids=act.linked_tom_measure_ids,
|
||||
art30_completeness=act.art30_completeness,
|
||||
created_at=act.created_at,
|
||||
updated_at=act.updated_at,
|
||||
)
|
||||
|
||||
|
||||
@router.get("/activities", response_model=List[VVTActivityResponse])
|
||||
async def list_activities(
|
||||
status: Optional[str] = Query(None),
|
||||
@@ -145,6 +193,107 @@ async def delete_activity(
|
||||
return service.delete_activity(tid, activity_id)
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# Art. 30 Completeness Check
|
||||
# ============================================================================
|
||||
|
||||
@router.get("/activities/{activity_id}/completeness")
|
||||
async def get_activity_completeness(
|
||||
activity_id: str,
|
||||
tid: str = Depends(get_tenant_id),
|
||||
db: Session = Depends(get_db),
|
||||
):
|
||||
"""Calculate Art. 30 completeness score for a VVT activity."""
|
||||
act = db.query(VVTActivityDB).filter(
|
||||
VVTActivityDB.id == activity_id,
|
||||
VVTActivityDB.tenant_id == tid,
|
||||
).first()
|
||||
if not act:
|
||||
raise HTTPException(status_code=404, detail=f"Activity {activity_id} not found")
|
||||
return _calculate_completeness(act)
|
||||
|
||||
|
||||
def _calculate_completeness(act: VVTActivityDB) -> dict:
|
||||
"""Calculate Art. 30 completeness — required fields per DSGVO Art. 30 Abs. 1."""
|
||||
missing = []
|
||||
warnings = []
|
||||
total_checks = 10
|
||||
passed = 0
|
||||
|
||||
# 1. Name/Zweck
|
||||
if act.name:
|
||||
passed += 1
|
||||
else:
|
||||
missing.append("name")
|
||||
|
||||
# 2. Verarbeitungszwecke
|
||||
has_purposes = bool(act.purposes) or bool(act.purpose_refs)
|
||||
if has_purposes:
|
||||
passed += 1
|
||||
else:
|
||||
missing.append("purposes")
|
||||
|
||||
# 3. Rechtsgrundlage
|
||||
has_legal = bool(act.legal_bases) or bool(act.legal_basis_refs)
|
||||
if has_legal:
|
||||
passed += 1
|
||||
else:
|
||||
missing.append("legal_bases")
|
||||
|
||||
# 4. Betroffenenkategorien
|
||||
has_subjects = bool(act.data_subject_categories) or bool(act.data_subject_refs)
|
||||
if has_subjects:
|
||||
passed += 1
|
||||
else:
|
||||
missing.append("data_subjects")
|
||||
|
||||
# 5. Datenkategorien
|
||||
has_categories = bool(act.personal_data_categories) or bool(act.data_category_refs)
|
||||
if has_categories:
|
||||
passed += 1
|
||||
else:
|
||||
missing.append("data_categories")
|
||||
|
||||
# 6. Empfaenger
|
||||
has_recipients = bool(act.recipient_categories) or bool(act.recipient_refs)
|
||||
if has_recipients:
|
||||
passed += 1
|
||||
else:
|
||||
missing.append("recipients")
|
||||
|
||||
# 7. Drittland-Uebermittlung (checked but not strictly required)
|
||||
passed += 1 # always passes — no transfer is valid state
|
||||
|
||||
# 8. Loeschfristen
|
||||
has_retention = bool(act.retention_period and act.retention_period.get('description')) or bool(act.retention_rule_ref)
|
||||
if has_retention:
|
||||
passed += 1
|
||||
else:
|
||||
missing.append("retention_period")
|
||||
|
||||
# 9. TOM-Beschreibung
|
||||
has_tom = bool(act.tom_description) or bool(act.tom_refs) or bool(act.structured_toms)
|
||||
if has_tom:
|
||||
passed += 1
|
||||
else:
|
||||
missing.append("tom_description")
|
||||
|
||||
# 10. Verantwortlicher
|
||||
if act.responsible:
|
||||
passed += 1
|
||||
else:
|
||||
missing.append("responsible")
|
||||
|
||||
# Warnings
|
||||
if act.dpia_required and not act.dsfa_id:
|
||||
warnings.append("dpia_required_but_no_dsfa_linked")
|
||||
if act.third_country_transfers and not act.transfer_mechanism_refs:
|
||||
warnings.append("third_country_transfer_without_mechanism")
|
||||
|
||||
score = int((passed / total_checks) * 100)
|
||||
return {"score": score, "missing": missing, "warnings": warnings, "passed": passed, "total": total_checks}
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# Audit Log
|
||||
# ============================================================================
|
||||
|
||||
@@ -0,0 +1,443 @@
|
||||
{
|
||||
"framework_id": "CSA_CCM",
|
||||
"display_name": "Cloud Security Alliance CCM v4",
|
||||
"license": {
|
||||
"type": "restricted",
|
||||
"rag_allowed": false,
|
||||
"use_as_metadata": true,
|
||||
"note": "Abstrahierte Struktur — keine Originaltexte uebernommen"
|
||||
},
|
||||
"domains": [
|
||||
{
|
||||
"domain_id": "AIS",
|
||||
"title": "Application and Interface Security",
|
||||
"aliases": ["ais", "application and interface security", "anwendungssicherheit", "schnittstellensicherheit"],
|
||||
"keywords": ["application", "anwendung", "interface", "schnittstelle", "api", "web", "eingabevalidierung"],
|
||||
"subcontrols": [
|
||||
{
|
||||
"subcontrol_id": "AIS-01",
|
||||
"title": "Application Security Policy",
|
||||
"statement": "Sicherheitsrichtlinien fuer Anwendungsentwicklung und Schnittstellenmanagement muessen definiert und angewendet werden.",
|
||||
"keywords": ["policy", "richtlinie", "entwicklung"],
|
||||
"action_hint": "document",
|
||||
"object_hint": "Anwendungssicherheitsrichtlinie",
|
||||
"object_class": "policy"
|
||||
},
|
||||
{
|
||||
"subcontrol_id": "AIS-02",
|
||||
"title": "Application Security Design",
|
||||
"statement": "Sicherheitsanforderungen muessen in den Entwurf jeder Anwendung integriert werden.",
|
||||
"keywords": ["design", "entwurf", "security by design"],
|
||||
"action_hint": "implement",
|
||||
"object_hint": "Sicherheitsanforderungen im Anwendungsentwurf",
|
||||
"object_class": "process"
|
||||
},
|
||||
{
|
||||
"subcontrol_id": "AIS-03",
|
||||
"title": "Application Security Testing",
|
||||
"statement": "Anwendungen muessen vor dem Deployment und regelmaessig auf Sicherheitsschwachstellen getestet werden.",
|
||||
"keywords": ["testing", "test", "sast", "dast", "penetration"],
|
||||
"action_hint": "test",
|
||||
"object_hint": "Anwendungssicherheitstests",
|
||||
"object_class": "process"
|
||||
},
|
||||
{
|
||||
"subcontrol_id": "AIS-04",
|
||||
"title": "Secure Development Practices",
|
||||
"statement": "Sichere Entwicklungspraktiken (Code Review, Pair Programming, SAST) muessen fuer alle Entwicklungsprojekte gelten.",
|
||||
"keywords": ["development", "entwicklung", "code review", "sast", "praktiken"],
|
||||
"action_hint": "implement",
|
||||
"object_hint": "Sichere Entwicklungspraktiken",
|
||||
"object_class": "process"
|
||||
},
|
||||
{
|
||||
"subcontrol_id": "AIS-05",
|
||||
"title": "API Security",
|
||||
"statement": "APIs muessen authentifiziert, autorisiert und gegen Missbrauch geschuetzt werden.",
|
||||
"keywords": ["api", "schnittstelle", "authentifizierung", "rate limiting"],
|
||||
"action_hint": "implement",
|
||||
"object_hint": "API-Sicherheitskontrollen",
|
||||
"object_class": "interface"
|
||||
},
|
||||
{
|
||||
"subcontrol_id": "AIS-06",
|
||||
"title": "Automated Application Security Testing",
|
||||
"statement": "Automatisierte Sicherheitstests muessen in die CI/CD-Pipeline integriert werden.",
|
||||
"keywords": ["automatisiert", "ci/cd", "pipeline", "sast", "dast"],
|
||||
"action_hint": "configure",
|
||||
"object_hint": "Automatisierte Sicherheitstests in CI/CD",
|
||||
"object_class": "configuration"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"domain_id": "BCR",
|
||||
"title": "Business Continuity and Resilience",
|
||||
"aliases": ["bcr", "business continuity", "resilience", "geschaeftskontinuitaet", "resilienz"],
|
||||
"keywords": ["continuity", "kontinuitaet", "resilience", "resilienz", "disaster", "recovery", "backup"],
|
||||
"subcontrols": [
|
||||
{
|
||||
"subcontrol_id": "BCR-01",
|
||||
"title": "Business Continuity Planning",
|
||||
"statement": "Ein Geschaeftskontinuitaetsplan muss erstellt, dokumentiert und regelmaessig getestet werden.",
|
||||
"keywords": ["plan", "kontinuitaet", "geschaeft"],
|
||||
"action_hint": "document",
|
||||
"object_hint": "Geschaeftskontinuitaetsplan",
|
||||
"object_class": "policy"
|
||||
},
|
||||
{
|
||||
"subcontrol_id": "BCR-02",
|
||||
"title": "Risk Assessment for BCM",
|
||||
"statement": "Risikobewertungen muessen fuer geschaeftskritische Prozesse durchgefuehrt werden.",
|
||||
"keywords": ["risiko", "bewertung", "kritisch"],
|
||||
"action_hint": "assess",
|
||||
"object_hint": "BCM-Risikobewertung",
|
||||
"object_class": "risk_artifact"
|
||||
},
|
||||
{
|
||||
"subcontrol_id": "BCR-03",
|
||||
"title": "Backup and Recovery",
|
||||
"statement": "Datensicherungen muessen regelmaessig erstellt und Wiederherstellungstests durchgefuehrt werden.",
|
||||
"keywords": ["backup", "sicherung", "wiederherstellung", "recovery"],
|
||||
"action_hint": "maintain",
|
||||
"object_hint": "Datensicherung und Wiederherstellung",
|
||||
"object_class": "technical_control"
|
||||
},
|
||||
{
|
||||
"subcontrol_id": "BCR-04",
|
||||
"title": "Disaster Recovery Planning",
|
||||
"statement": "Ein Disaster-Recovery-Plan muss dokumentiert und jaehrlich getestet werden.",
|
||||
"keywords": ["disaster", "recovery", "katastrophe"],
|
||||
"action_hint": "document",
|
||||
"object_hint": "Disaster-Recovery-Plan",
|
||||
"object_class": "policy"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"domain_id": "CCC",
|
||||
"title": "Change Control and Configuration Management",
|
||||
"aliases": ["ccc", "change control", "configuration management", "aenderungsmanagement", "konfigurationsmanagement"],
|
||||
"keywords": ["change", "aenderung", "konfiguration", "configuration", "release", "deployment"],
|
||||
"subcontrols": [
|
||||
{
|
||||
"subcontrol_id": "CCC-01",
|
||||
"title": "Change Management Policy",
|
||||
"statement": "Ein Aenderungsmanagement-Prozess muss definiert und fuer alle Aenderungen angewendet werden.",
|
||||
"keywords": ["policy", "richtlinie", "aenderung"],
|
||||
"action_hint": "document",
|
||||
"object_hint": "Aenderungsmanagement-Richtlinie",
|
||||
"object_class": "policy"
|
||||
},
|
||||
{
|
||||
"subcontrol_id": "CCC-02",
|
||||
"title": "Change Testing",
|
||||
"statement": "Aenderungen muessen vor der Produktivsetzung getestet und genehmigt werden.",
|
||||
"keywords": ["test", "genehmigung", "approval"],
|
||||
"action_hint": "test",
|
||||
"object_hint": "Aenderungstests",
|
||||
"object_class": "process"
|
||||
},
|
||||
{
|
||||
"subcontrol_id": "CCC-03",
|
||||
"title": "Configuration Baseline",
|
||||
"statement": "Basiskonfigurationen fuer alle Systeme muessen definiert und dokumentiert werden.",
|
||||
"keywords": ["baseline", "basis", "standard"],
|
||||
"action_hint": "define",
|
||||
"object_hint": "Konfigurationsbaseline",
|
||||
"object_class": "configuration"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"domain_id": "CEK",
|
||||
"title": "Cryptography, Encryption and Key Management",
|
||||
"aliases": ["cek", "cryptography", "encryption", "key management", "kryptographie", "verschluesselung", "schluesselverwaltung"],
|
||||
"keywords": ["kryptographie", "verschluesselung", "schluessel", "key", "encryption", "certificate", "zertifikat"],
|
||||
"subcontrols": [
|
||||
{
|
||||
"subcontrol_id": "CEK-01",
|
||||
"title": "Encryption Policy",
|
||||
"statement": "Verschluesselungsrichtlinien muessen definiert werden, die Algorithmen, Schluessellaengen und Einsatzbereiche festlegen.",
|
||||
"keywords": ["policy", "richtlinie", "algorithmus"],
|
||||
"action_hint": "document",
|
||||
"object_hint": "Verschluesselungsrichtlinie",
|
||||
"object_class": "policy"
|
||||
},
|
||||
{
|
||||
"subcontrol_id": "CEK-02",
|
||||
"title": "Key Management",
|
||||
"statement": "Kryptographische Schluessel muessen ueber ihren Lebenszyklus sicher verwaltet werden.",
|
||||
"keywords": ["key", "schluessel", "management", "lebenszyklus"],
|
||||
"action_hint": "maintain",
|
||||
"object_hint": "Schluesselverwaltung",
|
||||
"object_class": "cryptographic_control"
|
||||
},
|
||||
{
|
||||
"subcontrol_id": "CEK-03",
|
||||
"title": "Data Encryption",
|
||||
"statement": "Sensible Daten muessen bei Speicherung und Uebertragung verschluesselt werden.",
|
||||
"keywords": ["data", "daten", "speicherung", "uebertragung"],
|
||||
"action_hint": "encrypt",
|
||||
"object_hint": "Datenverschluesselung",
|
||||
"object_class": "cryptographic_control"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"domain_id": "DSP",
|
||||
"title": "Data Security and Privacy",
|
||||
"aliases": ["dsp", "data security", "privacy", "datensicherheit", "datenschutz"],
|
||||
"keywords": ["datenschutz", "datensicherheit", "privacy", "data security", "pii", "personenbezogen", "dsgvo"],
|
||||
"subcontrols": [
|
||||
{
|
||||
"subcontrol_id": "DSP-01",
|
||||
"title": "Data Classification",
|
||||
"statement": "Daten muessen nach Sensibilitaet klassifiziert und entsprechend geschuetzt werden.",
|
||||
"keywords": ["klassifizierung", "sensibilitaet", "classification"],
|
||||
"action_hint": "define",
|
||||
"object_hint": "Datenklassifizierung",
|
||||
"object_class": "data"
|
||||
},
|
||||
{
|
||||
"subcontrol_id": "DSP-02",
|
||||
"title": "Data Inventory",
|
||||
"statement": "Ein Dateninventar muss gefuehrt werden, das alle Verarbeitungen personenbezogener Daten dokumentiert.",
|
||||
"keywords": ["inventar", "verzeichnis", "verarbeitung", "vvt"],
|
||||
"action_hint": "maintain",
|
||||
"object_hint": "Dateninventar",
|
||||
"object_class": "register"
|
||||
},
|
||||
{
|
||||
"subcontrol_id": "DSP-03",
|
||||
"title": "Data Retention and Deletion",
|
||||
"statement": "Aufbewahrungsfristen muessen definiert und Daten nach Ablauf sicher geloescht werden.",
|
||||
"keywords": ["retention", "aufbewahrung", "loeschung", "frist"],
|
||||
"action_hint": "delete",
|
||||
"object_hint": "Datenloeschung nach Frist",
|
||||
"object_class": "data"
|
||||
},
|
||||
{
|
||||
"subcontrol_id": "DSP-04",
|
||||
"title": "Privacy Impact Assessment",
|
||||
"statement": "Datenschutz-Folgenabschaetzungen muessen fuer risikoreiche Verarbeitungen durchgefuehrt werden.",
|
||||
"keywords": ["dsfa", "pia", "folgenabschaetzung", "impact"],
|
||||
"action_hint": "assess",
|
||||
"object_hint": "Datenschutz-Folgenabschaetzung",
|
||||
"object_class": "risk_artifact"
|
||||
},
|
||||
{
|
||||
"subcontrol_id": "DSP-05",
|
||||
"title": "Data Subject Rights",
|
||||
"statement": "Verfahren zur Bearbeitung von Betroffenenrechten muessen implementiert werden.",
|
||||
"keywords": ["betroffenenrechte", "auskunft", "loeschung", "data subject"],
|
||||
"action_hint": "implement",
|
||||
"object_hint": "Betroffenenrechte-Verfahren",
|
||||
"object_class": "process"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"domain_id": "GRC",
|
||||
"title": "Governance, Risk and Compliance",
|
||||
"aliases": ["grc", "governance", "risk", "compliance", "risikomanagement"],
|
||||
"keywords": ["governance", "risiko", "compliance", "management", "policy", "richtlinie"],
|
||||
"subcontrols": [
|
||||
{
|
||||
"subcontrol_id": "GRC-01",
|
||||
"title": "Information Security Program",
|
||||
"statement": "Ein umfassendes Informationssicherheitsprogramm muss etabliert und aufrechterhalten werden.",
|
||||
"keywords": ["programm", "sicherheit", "information"],
|
||||
"action_hint": "maintain",
|
||||
"object_hint": "Informationssicherheitsprogramm",
|
||||
"object_class": "policy"
|
||||
},
|
||||
{
|
||||
"subcontrol_id": "GRC-02",
|
||||
"title": "Risk Management Program",
|
||||
"statement": "Ein Risikomanagement-Programm muss implementiert werden, das Identifikation, Bewertung und Behandlung umfasst.",
|
||||
"keywords": ["risiko", "management", "bewertung", "behandlung"],
|
||||
"action_hint": "implement",
|
||||
"object_hint": "Risikomanagement-Programm",
|
||||
"object_class": "process"
|
||||
},
|
||||
{
|
||||
"subcontrol_id": "GRC-03",
|
||||
"title": "Compliance Monitoring",
|
||||
"statement": "Die Einhaltung regulatorischer und vertraglicher Anforderungen muss ueberwacht werden.",
|
||||
"keywords": ["compliance", "einhaltung", "regulatorisch", "ueberwachung"],
|
||||
"action_hint": "monitor",
|
||||
"object_hint": "Compliance-Ueberwachung",
|
||||
"object_class": "process"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"domain_id": "IAM",
|
||||
"title": "Identity and Access Management",
|
||||
"aliases": ["iam", "identity", "access management", "identitaetsmanagement", "zugriffsverwaltung"],
|
||||
"keywords": ["identitaet", "zugriff", "identity", "access", "authentifizierung", "autorisierung", "sso"],
|
||||
"subcontrols": [
|
||||
{
|
||||
"subcontrol_id": "IAM-01",
|
||||
"title": "Identity and Access Policy",
|
||||
"statement": "Identitaets- und Zugriffsmanagement-Richtlinien muessen definiert werden.",
|
||||
"keywords": ["policy", "richtlinie"],
|
||||
"action_hint": "document",
|
||||
"object_hint": "IAM-Richtlinie",
|
||||
"object_class": "policy"
|
||||
},
|
||||
{
|
||||
"subcontrol_id": "IAM-02",
|
||||
"title": "Strong Authentication",
|
||||
"statement": "Starke Authentifizierung (MFA) muss fuer administrative und sicherheitskritische Zugriffe gefordert werden.",
|
||||
"keywords": ["mfa", "stark", "authentifizierung", "admin"],
|
||||
"action_hint": "implement",
|
||||
"object_hint": "Starke Authentifizierung",
|
||||
"object_class": "technical_control"
|
||||
},
|
||||
{
|
||||
"subcontrol_id": "IAM-03",
|
||||
"title": "Identity Lifecycle Management",
|
||||
"statement": "Identitaeten muessen ueber ihren gesamten Lebenszyklus verwaltet werden.",
|
||||
"keywords": ["lifecycle", "lebenszyklus", "onboarding", "offboarding"],
|
||||
"action_hint": "maintain",
|
||||
"object_hint": "Identitaets-Lebenszyklus",
|
||||
"object_class": "account"
|
||||
},
|
||||
{
|
||||
"subcontrol_id": "IAM-04",
|
||||
"title": "Access Review",
|
||||
"statement": "Zugriffsrechte muessen regelmaessig ueberprueft und ueberschuessige Rechte entzogen werden.",
|
||||
"keywords": ["review", "ueberpruefen", "rechte", "rezertifizierung"],
|
||||
"action_hint": "review",
|
||||
"object_hint": "Zugriffsrechte-Review",
|
||||
"object_class": "access_control"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"domain_id": "LOG",
|
||||
"title": "Logging and Monitoring",
|
||||
"aliases": ["log", "logging", "monitoring", "protokollierung", "ueberwachung"],
|
||||
"keywords": ["logging", "monitoring", "protokollierung", "ueberwachung", "siem", "alarm"],
|
||||
"subcontrols": [
|
||||
{
|
||||
"subcontrol_id": "LOG-01",
|
||||
"title": "Logging Policy",
|
||||
"statement": "Protokollierungs-Richtlinien muessen definiert werden, die Umfang und Aufbewahrung festlegen.",
|
||||
"keywords": ["policy", "richtlinie", "umfang", "aufbewahrung"],
|
||||
"action_hint": "document",
|
||||
"object_hint": "Protokollierungsrichtlinie",
|
||||
"object_class": "policy"
|
||||
},
|
||||
{
|
||||
"subcontrol_id": "LOG-02",
|
||||
"title": "Security Event Logging",
|
||||
"statement": "Sicherheitsrelevante Ereignisse muessen erfasst und zentral gespeichert werden.",
|
||||
"keywords": ["event", "ereignis", "sicherheit", "zentral"],
|
||||
"action_hint": "configure",
|
||||
"object_hint": "Sicherheits-Event-Logging",
|
||||
"object_class": "configuration"
|
||||
},
|
||||
{
|
||||
"subcontrol_id": "LOG-03",
|
||||
"title": "Monitoring and Alerting",
|
||||
"statement": "Sicherheitsrelevante Logs muessen ueberwacht und bei Anomalien Alarme ausgeloest werden.",
|
||||
"keywords": ["monitoring", "alerting", "alarm", "anomalie"],
|
||||
"action_hint": "monitor",
|
||||
"object_hint": "Log-Ueberwachung und Alarmierung",
|
||||
"object_class": "technical_control"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"domain_id": "SEF",
|
||||
"title": "Security Incident Management",
|
||||
"aliases": ["sef", "security incident", "incident management", "vorfallmanagement", "sicherheitsvorfall"],
|
||||
"keywords": ["vorfall", "incident", "sicherheitsvorfall", "reaktion", "response", "meldung"],
|
||||
"subcontrols": [
|
||||
{
|
||||
"subcontrol_id": "SEF-01",
|
||||
"title": "Incident Management Policy",
|
||||
"statement": "Ein Vorfallmanagement-Prozess muss definiert, dokumentiert und getestet werden.",
|
||||
"keywords": ["policy", "richtlinie", "prozess"],
|
||||
"action_hint": "document",
|
||||
"object_hint": "Vorfallmanagement-Richtlinie",
|
||||
"object_class": "policy"
|
||||
},
|
||||
{
|
||||
"subcontrol_id": "SEF-02",
|
||||
"title": "Incident Response Team",
|
||||
"statement": "Ein Incident-Response-Team muss benannt und geschult werden.",
|
||||
"keywords": ["team", "response", "schulung"],
|
||||
"action_hint": "define",
|
||||
"object_hint": "Incident-Response-Team",
|
||||
"object_class": "role"
|
||||
},
|
||||
{
|
||||
"subcontrol_id": "SEF-03",
|
||||
"title": "Incident Reporting",
|
||||
"statement": "Sicherheitsvorfaelle muessen innerhalb definierter Fristen an zustaendige Stellen gemeldet werden.",
|
||||
"keywords": ["reporting", "meldung", "frist", "behoerde"],
|
||||
"action_hint": "report",
|
||||
"object_hint": "Vorfallmeldung",
|
||||
"object_class": "incident"
|
||||
},
|
||||
{
|
||||
"subcontrol_id": "SEF-04",
|
||||
"title": "Incident Lessons Learned",
|
||||
"statement": "Nach jedem Vorfall muss eine Nachbereitung mit Lessons Learned durchgefuehrt werden.",
|
||||
"keywords": ["lessons learned", "nachbereitung", "verbesserung"],
|
||||
"action_hint": "review",
|
||||
"object_hint": "Vorfall-Nachbereitung",
|
||||
"object_class": "record"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"domain_id": "TVM",
|
||||
"title": "Threat and Vulnerability Management",
|
||||
"aliases": ["tvm", "threat", "vulnerability", "schwachstelle", "bedrohung", "schwachstellenmanagement"],
|
||||
"keywords": ["schwachstelle", "vulnerability", "threat", "bedrohung", "patch", "scan"],
|
||||
"subcontrols": [
|
||||
{
|
||||
"subcontrol_id": "TVM-01",
|
||||
"title": "Vulnerability Management Policy",
|
||||
"statement": "Schwachstellenmanagement-Richtlinien muessen definiert und umgesetzt werden.",
|
||||
"keywords": ["policy", "richtlinie"],
|
||||
"action_hint": "document",
|
||||
"object_hint": "Schwachstellenmanagement-Richtlinie",
|
||||
"object_class": "policy"
|
||||
},
|
||||
{
|
||||
"subcontrol_id": "TVM-02",
|
||||
"title": "Vulnerability Scanning",
|
||||
"statement": "Systeme muessen regelmaessig auf Schwachstellen gescannt werden.",
|
||||
"keywords": ["scan", "scanning", "regelmaessig"],
|
||||
"action_hint": "test",
|
||||
"object_hint": "Schwachstellenscan",
|
||||
"object_class": "system"
|
||||
},
|
||||
{
|
||||
"subcontrol_id": "TVM-03",
|
||||
"title": "Vulnerability Remediation",
|
||||
"statement": "Erkannte Schwachstellen muessen priorisiert und innerhalb definierter Fristen behoben werden.",
|
||||
"keywords": ["remediation", "behebung", "frist", "priorisierung"],
|
||||
"action_hint": "remediate",
|
||||
"object_hint": "Schwachstellenbehebung",
|
||||
"object_class": "system"
|
||||
},
|
||||
{
|
||||
"subcontrol_id": "TVM-04",
|
||||
"title": "Penetration Testing",
|
||||
"statement": "Regelmaessige Penetrationstests muessen durchgefuehrt werden.",
|
||||
"keywords": ["penetration", "pentest", "test"],
|
||||
"action_hint": "test",
|
||||
"object_hint": "Penetrationstest",
|
||||
"object_class": "system"
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
||||
@@ -0,0 +1,514 @@
|
||||
{
|
||||
"framework_id": "NIST_SP800_53",
|
||||
"display_name": "NIST SP 800-53 Rev. 5",
|
||||
"license": {
|
||||
"type": "public_domain",
|
||||
"rag_allowed": true,
|
||||
"use_as_metadata": true
|
||||
},
|
||||
"domains": [
|
||||
{
|
||||
"domain_id": "AC",
|
||||
"title": "Access Control",
|
||||
"aliases": ["access control", "zugriffskontrolle", "zugriffssteuerung"],
|
||||
"keywords": ["access", "zugriff", "berechtigung", "authorization", "autorisierung"],
|
||||
"subcontrols": [
|
||||
{
|
||||
"subcontrol_id": "AC-1",
|
||||
"title": "Access Control Policy and Procedures",
|
||||
"statement": "Zugriffskontrollrichtlinien und -verfahren muessen definiert, dokumentiert und regelmaessig ueberprueft werden.",
|
||||
"keywords": ["policy", "richtlinie", "verfahren", "procedures"],
|
||||
"action_hint": "document",
|
||||
"object_hint": "Zugriffskontrollrichtlinie",
|
||||
"object_class": "policy"
|
||||
},
|
||||
{
|
||||
"subcontrol_id": "AC-2",
|
||||
"title": "Account Management",
|
||||
"statement": "Benutzerkonten muessen ueber ihren gesamten Lebenszyklus verwaltet werden: Erstellung, Aktivierung, Aenderung, Deaktivierung und Loeschung.",
|
||||
"keywords": ["account", "konto", "benutzer", "lifecycle", "lebenszyklus"],
|
||||
"action_hint": "maintain",
|
||||
"object_hint": "Benutzerkontenverwaltung",
|
||||
"object_class": "account"
|
||||
},
|
||||
{
|
||||
"subcontrol_id": "AC-3",
|
||||
"title": "Access Enforcement",
|
||||
"statement": "Der Zugriff auf Systemressourcen muss gemaess der definierten Zugriffskontrollrichtlinie durchgesetzt werden.",
|
||||
"keywords": ["enforcement", "durchsetzung", "ressourcen", "system"],
|
||||
"action_hint": "restrict_access",
|
||||
"object_hint": "Zugriffsdurchsetzung",
|
||||
"object_class": "access_control"
|
||||
},
|
||||
{
|
||||
"subcontrol_id": "AC-5",
|
||||
"title": "Separation of Duties",
|
||||
"statement": "Aufgabentrennung muss definiert und durchgesetzt werden, um Interessenkonflikte und Missbrauch zu verhindern.",
|
||||
"keywords": ["separation", "trennung", "duties", "aufgaben", "funktionstrennung"],
|
||||
"action_hint": "define",
|
||||
"object_hint": "Aufgabentrennung",
|
||||
"object_class": "role"
|
||||
},
|
||||
{
|
||||
"subcontrol_id": "AC-6",
|
||||
"title": "Least Privilege",
|
||||
"statement": "Zugriffsrechte muessen nach dem Prinzip der minimalen Rechte vergeben werden.",
|
||||
"keywords": ["least privilege", "minimal", "rechte", "privileg"],
|
||||
"action_hint": "restrict_access",
|
||||
"object_hint": "Minimale Rechtevergabe",
|
||||
"object_class": "access_control"
|
||||
},
|
||||
{
|
||||
"subcontrol_id": "AC-7",
|
||||
"title": "Unsuccessful Logon Attempts",
|
||||
"statement": "Fehlgeschlagene Anmeldeversuche muessen begrenzt und ueberwacht werden.",
|
||||
"keywords": ["logon", "anmeldung", "fehlgeschlagen", "sperre", "lockout"],
|
||||
"action_hint": "monitor",
|
||||
"object_hint": "Anmeldeversuchsueberwachung",
|
||||
"object_class": "technical_control"
|
||||
},
|
||||
{
|
||||
"subcontrol_id": "AC-17",
|
||||
"title": "Remote Access",
|
||||
"statement": "Fernzugriff muss autorisiert, ueberwacht und verschluesselt werden.",
|
||||
"keywords": ["remote", "fern", "vpn", "fernzugriff"],
|
||||
"action_hint": "configure",
|
||||
"object_hint": "Fernzugriffskonfiguration",
|
||||
"object_class": "technical_control"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"domain_id": "AU",
|
||||
"title": "Audit and Accountability",
|
||||
"aliases": ["audit", "protokollierung", "accountability", "rechenschaftspflicht"],
|
||||
"keywords": ["audit", "log", "protokoll", "nachvollziehbarkeit", "logging"],
|
||||
"subcontrols": [
|
||||
{
|
||||
"subcontrol_id": "AU-1",
|
||||
"title": "Audit Policy and Procedures",
|
||||
"statement": "Audit- und Protokollierungsrichtlinien muessen definiert und regelmaessig ueberprueft werden.",
|
||||
"keywords": ["policy", "richtlinie", "audit"],
|
||||
"action_hint": "document",
|
||||
"object_hint": "Auditrichtlinie",
|
||||
"object_class": "policy"
|
||||
},
|
||||
{
|
||||
"subcontrol_id": "AU-2",
|
||||
"title": "Event Logging",
|
||||
"statement": "Sicherheitsrelevante Ereignisse muessen identifiziert und protokolliert werden.",
|
||||
"keywords": ["event", "ereignis", "logging", "protokollierung"],
|
||||
"action_hint": "configure",
|
||||
"object_hint": "Ereignisprotokollierung",
|
||||
"object_class": "configuration"
|
||||
},
|
||||
{
|
||||
"subcontrol_id": "AU-3",
|
||||
"title": "Content of Audit Records",
|
||||
"statement": "Audit-Eintraege muessen ausreichende Informationen enthalten: Zeitstempel, Quelle, Ergebnis, Identitaet.",
|
||||
"keywords": ["content", "inhalt", "record", "eintrag"],
|
||||
"action_hint": "define",
|
||||
"object_hint": "Audit-Eintragsformat",
|
||||
"object_class": "record"
|
||||
},
|
||||
{
|
||||
"subcontrol_id": "AU-6",
|
||||
"title": "Audit Record Review and Reporting",
|
||||
"statement": "Audit-Eintraege muessen regelmaessig ueberprueft und bei Anomalien berichtet werden.",
|
||||
"keywords": ["review", "ueberpruefen", "reporting", "anomalie"],
|
||||
"action_hint": "review",
|
||||
"object_hint": "Audit-Ueberpruefung",
|
||||
"object_class": "record"
|
||||
},
|
||||
{
|
||||
"subcontrol_id": "AU-9",
|
||||
"title": "Protection of Audit Information",
|
||||
"statement": "Audit-Daten muessen vor unbefugtem Zugriff, Aenderung und Loeschung geschuetzt werden.",
|
||||
"keywords": ["schutz", "protection", "integritaet", "integrity"],
|
||||
"action_hint": "implement",
|
||||
"object_hint": "Audit-Datenschutz",
|
||||
"object_class": "technical_control"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"domain_id": "AT",
|
||||
"title": "Awareness and Training",
|
||||
"aliases": ["awareness", "training", "schulung", "sensibilisierung"],
|
||||
"keywords": ["training", "schulung", "awareness", "sensibilisierung", "weiterbildung"],
|
||||
"subcontrols": [
|
||||
{
|
||||
"subcontrol_id": "AT-1",
|
||||
"title": "Policy and Procedures",
|
||||
"statement": "Schulungs- und Sensibilisierungsrichtlinien muessen definiert und regelmaessig aktualisiert werden.",
|
||||
"keywords": ["policy", "richtlinie"],
|
||||
"action_hint": "document",
|
||||
"object_hint": "Schulungsrichtlinie",
|
||||
"object_class": "policy"
|
||||
},
|
||||
{
|
||||
"subcontrol_id": "AT-2",
|
||||
"title": "Literacy Training and Awareness",
|
||||
"statement": "Alle Mitarbeiter muessen regelmaessig Sicherheitsschulungen erhalten.",
|
||||
"keywords": ["mitarbeiter", "schulung", "sicherheit"],
|
||||
"action_hint": "train",
|
||||
"object_hint": "Sicherheitsschulung",
|
||||
"object_class": "training"
|
||||
},
|
||||
{
|
||||
"subcontrol_id": "AT-3",
|
||||
"title": "Role-Based Training",
|
||||
"statement": "Rollenbasierte Sicherheitsschulungen muessen fuer Mitarbeiter mit besonderen Sicherheitsaufgaben durchgefuehrt werden.",
|
||||
"keywords": ["rollenbasiert", "role-based", "speziell"],
|
||||
"action_hint": "train",
|
||||
"object_hint": "Rollenbasierte Sicherheitsschulung",
|
||||
"object_class": "training"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"domain_id": "CM",
|
||||
"title": "Configuration Management",
|
||||
"aliases": ["configuration management", "konfigurationsmanagement", "konfiguration"],
|
||||
"keywords": ["konfiguration", "configuration", "baseline", "haertung", "hardening"],
|
||||
"subcontrols": [
|
||||
{
|
||||
"subcontrol_id": "CM-1",
|
||||
"title": "Policy and Procedures",
|
||||
"statement": "Konfigurationsmanagement-Richtlinien muessen dokumentiert und gepflegt werden.",
|
||||
"keywords": ["policy", "richtlinie"],
|
||||
"action_hint": "document",
|
||||
"object_hint": "Konfigurationsmanagement-Richtlinie",
|
||||
"object_class": "policy"
|
||||
},
|
||||
{
|
||||
"subcontrol_id": "CM-2",
|
||||
"title": "Baseline Configuration",
|
||||
"statement": "Basiskonfigurationen fuer Systeme muessen definiert, dokumentiert und gepflegt werden.",
|
||||
"keywords": ["baseline", "basis", "standard"],
|
||||
"action_hint": "define",
|
||||
"object_hint": "Basiskonfiguration",
|
||||
"object_class": "configuration"
|
||||
},
|
||||
{
|
||||
"subcontrol_id": "CM-6",
|
||||
"title": "Configuration Settings",
|
||||
"statement": "Sicherheitsrelevante Konfigurationseinstellungen muessen definiert und durchgesetzt werden.",
|
||||
"keywords": ["settings", "einstellungen", "sicherheit"],
|
||||
"action_hint": "configure",
|
||||
"object_hint": "Sicherheitskonfiguration",
|
||||
"object_class": "configuration"
|
||||
},
|
||||
{
|
||||
"subcontrol_id": "CM-7",
|
||||
"title": "Least Functionality",
|
||||
"statement": "Systeme muessen so konfiguriert werden, dass nur notwendige Funktionen aktiv sind.",
|
||||
"keywords": ["least functionality", "minimal", "dienste", "ports"],
|
||||
"action_hint": "configure",
|
||||
"object_hint": "Minimalkonfiguration",
|
||||
"object_class": "configuration"
|
||||
},
|
||||
{
|
||||
"subcontrol_id": "CM-8",
|
||||
"title": "System Component Inventory",
|
||||
"statement": "Ein Inventar aller Systemkomponenten muss gefuehrt und aktuell gehalten werden.",
|
||||
"keywords": ["inventar", "inventory", "komponenten", "assets"],
|
||||
"action_hint": "maintain",
|
||||
"object_hint": "Systemkomponenten-Inventar",
|
||||
"object_class": "register"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"domain_id": "IA",
|
||||
"title": "Identification and Authentication",
|
||||
"aliases": ["identification", "authentication", "identifikation", "authentifizierung"],
|
||||
"keywords": ["authentifizierung", "identifikation", "identity", "passwort", "mfa", "credential"],
|
||||
"subcontrols": [
|
||||
{
|
||||
"subcontrol_id": "IA-1",
|
||||
"title": "Policy and Procedures",
|
||||
"statement": "Identifikations- und Authentifizierungsrichtlinien muessen dokumentiert und regelmaessig ueberprueft werden.",
|
||||
"keywords": ["policy", "richtlinie"],
|
||||
"action_hint": "document",
|
||||
"object_hint": "Authentifizierungsrichtlinie",
|
||||
"object_class": "policy"
|
||||
},
|
||||
{
|
||||
"subcontrol_id": "IA-2",
|
||||
"title": "Identification and Authentication",
|
||||
"statement": "Benutzer und Geraete muessen eindeutig identifiziert und authentifiziert werden.",
|
||||
"keywords": ["benutzer", "geraete", "identifizierung"],
|
||||
"action_hint": "implement",
|
||||
"object_hint": "Benutzerauthentifizierung",
|
||||
"object_class": "technical_control"
|
||||
},
|
||||
{
|
||||
"subcontrol_id": "IA-2(1)",
|
||||
"title": "Multi-Factor Authentication",
|
||||
"statement": "Multi-Faktor-Authentifizierung muss fuer privilegierte Konten implementiert werden.",
|
||||
"keywords": ["mfa", "multi-faktor", "zwei-faktor", "2fa"],
|
||||
"action_hint": "implement",
|
||||
"object_hint": "Multi-Faktor-Authentifizierung",
|
||||
"object_class": "technical_control"
|
||||
},
|
||||
{
|
||||
"subcontrol_id": "IA-5",
|
||||
"title": "Authenticator Management",
|
||||
"statement": "Authentifizierungsmittel (Passwoerter, Token, Zertifikate) muessen sicher verwaltet werden.",
|
||||
"keywords": ["passwort", "token", "zertifikat", "credential"],
|
||||
"action_hint": "maintain",
|
||||
"object_hint": "Authentifizierungsmittel-Verwaltung",
|
||||
"object_class": "technical_control"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"domain_id": "IR",
|
||||
"title": "Incident Response",
|
||||
"aliases": ["incident response", "vorfallbehandlung", "vorfallreaktion", "incident management"],
|
||||
"keywords": ["vorfall", "incident", "reaktion", "response", "breach", "sicherheitsvorfall"],
|
||||
"subcontrols": [
|
||||
{
|
||||
"subcontrol_id": "IR-1",
|
||||
"title": "Policy and Procedures",
|
||||
"statement": "Vorfallreaktionsrichtlinien und -verfahren muessen definiert und regelmaessig aktualisiert werden.",
|
||||
"keywords": ["policy", "richtlinie", "verfahren"],
|
||||
"action_hint": "document",
|
||||
"object_hint": "Vorfallreaktionsrichtlinie",
|
||||
"object_class": "policy"
|
||||
},
|
||||
{
|
||||
"subcontrol_id": "IR-2",
|
||||
"title": "Incident Response Training",
|
||||
"statement": "Mitarbeiter muessen regelmaessig in der Vorfallreaktion geschult werden.",
|
||||
"keywords": ["training", "schulung"],
|
||||
"action_hint": "train",
|
||||
"object_hint": "Vorfallreaktionsschulung",
|
||||
"object_class": "training"
|
||||
},
|
||||
{
|
||||
"subcontrol_id": "IR-4",
|
||||
"title": "Incident Handling",
|
||||
"statement": "Ein strukturierter Prozess fuer die Vorfallbehandlung muss implementiert werden: Erkennung, Analyse, Eindaemmung, Behebung.",
|
||||
"keywords": ["handling", "behandlung", "erkennung", "eindaemmung"],
|
||||
"action_hint": "implement",
|
||||
"object_hint": "Vorfallbehandlungsprozess",
|
||||
"object_class": "process"
|
||||
},
|
||||
{
|
||||
"subcontrol_id": "IR-5",
|
||||
"title": "Incident Monitoring",
|
||||
"statement": "Sicherheitsvorfaelle muessen kontinuierlich ueberwacht und verfolgt werden.",
|
||||
"keywords": ["monitoring", "ueberwachung", "tracking"],
|
||||
"action_hint": "monitor",
|
||||
"object_hint": "Vorfallsueberwachung",
|
||||
"object_class": "incident"
|
||||
},
|
||||
{
|
||||
"subcontrol_id": "IR-6",
|
||||
"title": "Incident Reporting",
|
||||
"statement": "Sicherheitsvorfaelle muessen innerhalb definierter Fristen an die zustaendigen Stellen gemeldet werden.",
|
||||
"keywords": ["reporting", "meldung", "melden", "frist"],
|
||||
"action_hint": "report",
|
||||
"object_hint": "Vorfallmeldung",
|
||||
"object_class": "incident"
|
||||
},
|
||||
{
|
||||
"subcontrol_id": "IR-8",
|
||||
"title": "Incident Response Plan",
|
||||
"statement": "Ein Vorfallreaktionsplan muss dokumentiert und regelmaessig getestet werden.",
|
||||
"keywords": ["plan", "dokumentation", "test"],
|
||||
"action_hint": "document",
|
||||
"object_hint": "Vorfallreaktionsplan",
|
||||
"object_class": "policy"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"domain_id": "RA",
|
||||
"title": "Risk Assessment",
|
||||
"aliases": ["risk assessment", "risikobewertung", "risikoanalyse"],
|
||||
"keywords": ["risiko", "risk", "bewertung", "assessment", "analyse", "bedrohung", "threat"],
|
||||
"subcontrols": [
|
||||
{
|
||||
"subcontrol_id": "RA-1",
|
||||
"title": "Policy and Procedures",
|
||||
"statement": "Risikobewertungsrichtlinien muessen dokumentiert und regelmaessig aktualisiert werden.",
|
||||
"keywords": ["policy", "richtlinie"],
|
||||
"action_hint": "document",
|
||||
"object_hint": "Risikobewertungsrichtlinie",
|
||||
"object_class": "policy"
|
||||
},
|
||||
{
|
||||
"subcontrol_id": "RA-3",
|
||||
"title": "Risk Assessment",
|
||||
"statement": "Regelmaessige Risikobewertungen muessen durchgefuehrt und dokumentiert werden.",
|
||||
"keywords": ["bewertung", "assessment", "regelmaessig"],
|
||||
"action_hint": "assess",
|
||||
"object_hint": "Risikobewertung",
|
||||
"object_class": "risk_artifact"
|
||||
},
|
||||
{
|
||||
"subcontrol_id": "RA-5",
|
||||
"title": "Vulnerability Monitoring and Scanning",
|
||||
"statement": "Systeme muessen regelmaessig auf Schwachstellen gescannt und ueberwacht werden.",
|
||||
"keywords": ["vulnerability", "schwachstelle", "scan", "monitoring"],
|
||||
"action_hint": "monitor",
|
||||
"object_hint": "Schwachstellenueberwachung",
|
||||
"object_class": "system"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"domain_id": "SC",
|
||||
"title": "System and Communications Protection",
|
||||
"aliases": ["system protection", "communications protection", "kommunikationsschutz", "systemschutz"],
|
||||
"keywords": ["verschluesselung", "encryption", "tls", "netzwerk", "network", "kommunikation", "firewall"],
|
||||
"subcontrols": [
|
||||
{
|
||||
"subcontrol_id": "SC-1",
|
||||
"title": "Policy and Procedures",
|
||||
"statement": "System- und Kommunikationsschutzrichtlinien muessen dokumentiert und aktuell gehalten werden.",
|
||||
"keywords": ["policy", "richtlinie"],
|
||||
"action_hint": "document",
|
||||
"object_hint": "Kommunikationsschutzrichtlinie",
|
||||
"object_class": "policy"
|
||||
},
|
||||
{
|
||||
"subcontrol_id": "SC-7",
|
||||
"title": "Boundary Protection",
|
||||
"statement": "Netzwerkgrenzen muessen durch Firewall-Regeln und Zugangskontrollen geschuetzt werden.",
|
||||
"keywords": ["boundary", "grenze", "firewall", "netzwerk"],
|
||||
"action_hint": "implement",
|
||||
"object_hint": "Netzwerkgrenzschutz",
|
||||
"object_class": "technical_control"
|
||||
},
|
||||
{
|
||||
"subcontrol_id": "SC-8",
|
||||
"title": "Transmission Confidentiality and Integrity",
|
||||
"statement": "Daten muessen bei der Uebertragung durch Verschluesselung geschuetzt werden.",
|
||||
"keywords": ["transmission", "uebertragung", "verschluesselung", "tls"],
|
||||
"action_hint": "encrypt",
|
||||
"object_hint": "Uebertragungsverschluesselung",
|
||||
"object_class": "cryptographic_control"
|
||||
},
|
||||
{
|
||||
"subcontrol_id": "SC-12",
|
||||
"title": "Cryptographic Key Establishment and Management",
|
||||
"statement": "Kryptographische Schluessel muessen sicher erzeugt, verteilt, gespeichert und widerrufen werden.",
|
||||
"keywords": ["key", "schluessel", "kryptographie", "management"],
|
||||
"action_hint": "maintain",
|
||||
"object_hint": "Schluesselverwaltung",
|
||||
"object_class": "cryptographic_control"
|
||||
},
|
||||
{
|
||||
"subcontrol_id": "SC-13",
|
||||
"title": "Cryptographic Protection",
|
||||
"statement": "Kryptographische Mechanismen muessen gemaess anerkannten Standards implementiert werden.",
|
||||
"keywords": ["kryptographie", "verschluesselung", "standard"],
|
||||
"action_hint": "implement",
|
||||
"object_hint": "Kryptographischer Schutz",
|
||||
"object_class": "cryptographic_control"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"domain_id": "SI",
|
||||
"title": "System and Information Integrity",
|
||||
"aliases": ["system integrity", "information integrity", "systemintegritaet", "informationsintegritaet"],
|
||||
"keywords": ["integritaet", "integrity", "malware", "patch", "flaw", "schwachstelle"],
|
||||
"subcontrols": [
|
||||
{
|
||||
"subcontrol_id": "SI-1",
|
||||
"title": "Policy and Procedures",
|
||||
"statement": "System- und Informationsintegritaetsrichtlinien muessen dokumentiert und regelmaessig ueberprueft werden.",
|
||||
"keywords": ["policy", "richtlinie"],
|
||||
"action_hint": "document",
|
||||
"object_hint": "Integritaetsrichtlinie",
|
||||
"object_class": "policy"
|
||||
},
|
||||
{
|
||||
"subcontrol_id": "SI-2",
|
||||
"title": "Flaw Remediation",
|
||||
"statement": "Bekannte Schwachstellen muessen innerhalb definierter Fristen behoben werden.",
|
||||
"keywords": ["flaw", "schwachstelle", "patch", "behebung", "remediation"],
|
||||
"action_hint": "remediate",
|
||||
"object_hint": "Schwachstellenbehebung",
|
||||
"object_class": "system"
|
||||
},
|
||||
{
|
||||
"subcontrol_id": "SI-3",
|
||||
"title": "Malicious Code Protection",
|
||||
"statement": "Systeme muessen vor Schadsoftware geschuetzt werden durch Erkennung und Abwehrmechanismen.",
|
||||
"keywords": ["malware", "schadsoftware", "antivirus", "erkennung"],
|
||||
"action_hint": "implement",
|
||||
"object_hint": "Schadsoftwareschutz",
|
||||
"object_class": "technical_control"
|
||||
},
|
||||
{
|
||||
"subcontrol_id": "SI-4",
|
||||
"title": "System Monitoring",
|
||||
"statement": "Systeme muessen kontinuierlich auf Sicherheitsereignisse und Anomalien ueberwacht werden.",
|
||||
"keywords": ["monitoring", "ueberwachung", "anomalie", "siem"],
|
||||
"action_hint": "monitor",
|
||||
"object_hint": "Systemueberwachung",
|
||||
"object_class": "system"
|
||||
},
|
||||
{
|
||||
"subcontrol_id": "SI-5",
|
||||
"title": "Security Alerts and Advisories",
|
||||
"statement": "Sicherheitswarnungen muessen empfangen, bewertet und darauf reagiert werden.",
|
||||
"keywords": ["alert", "warnung", "advisory", "cve"],
|
||||
"action_hint": "monitor",
|
||||
"object_hint": "Sicherheitswarnungen",
|
||||
"object_class": "incident"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"domain_id": "SA",
|
||||
"title": "System and Services Acquisition",
|
||||
"aliases": ["system acquisition", "services acquisition", "systembeschaffung", "secure development"],
|
||||
"keywords": ["beschaffung", "acquisition", "entwicklung", "development", "lieferkette", "supply chain"],
|
||||
"subcontrols": [
|
||||
{
|
||||
"subcontrol_id": "SA-1",
|
||||
"title": "Policy and Procedures",
|
||||
"statement": "Beschaffungsrichtlinien mit Sicherheitsanforderungen muessen dokumentiert werden.",
|
||||
"keywords": ["policy", "richtlinie", "beschaffung"],
|
||||
"action_hint": "document",
|
||||
"object_hint": "Beschaffungsrichtlinie",
|
||||
"object_class": "policy"
|
||||
},
|
||||
{
|
||||
"subcontrol_id": "SA-8",
|
||||
"title": "Security and Privacy Engineering Principles",
|
||||
"statement": "Sicherheits- und Datenschutzprinzipien muessen in die Systementwicklung integriert werden.",
|
||||
"keywords": ["engineering", "development", "prinzipien", "design"],
|
||||
"action_hint": "implement",
|
||||
"object_hint": "Security-by-Design-Prinzipien",
|
||||
"object_class": "process"
|
||||
},
|
||||
{
|
||||
"subcontrol_id": "SA-11",
|
||||
"title": "Developer Testing and Evaluation",
|
||||
"statement": "Entwickler muessen Sicherheitstests und Code-Reviews durchfuehren.",
|
||||
"keywords": ["testing", "test", "code review", "evaluation"],
|
||||
"action_hint": "test",
|
||||
"object_hint": "Entwickler-Sicherheitstests",
|
||||
"object_class": "process"
|
||||
},
|
||||
{
|
||||
"subcontrol_id": "SA-12",
|
||||
"title": "Supply Chain Protection",
|
||||
"statement": "Lieferkettenrisiken muessen bewertet und Schutzmassnahmen implementiert werden.",
|
||||
"keywords": ["supply chain", "lieferkette", "third party", "drittanbieter"],
|
||||
"action_hint": "assess",
|
||||
"object_hint": "Lieferkettenrisikobewertung",
|
||||
"object_class": "risk_artifact"
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
||||
@@ -0,0 +1,353 @@
|
||||
{
|
||||
"framework_id": "OWASP_ASVS",
|
||||
"display_name": "OWASP Application Security Verification Standard 4.0",
|
||||
"license": {
|
||||
"type": "cc_by_sa_4",
|
||||
"rag_allowed": true,
|
||||
"use_as_metadata": true
|
||||
},
|
||||
"domains": [
|
||||
{
|
||||
"domain_id": "V1",
|
||||
"title": "Architecture, Design and Threat Modeling",
|
||||
"aliases": ["architecture", "architektur", "design", "threat modeling", "bedrohungsmodellierung"],
|
||||
"keywords": ["architektur", "design", "threat model", "bedrohung", "modellierung"],
|
||||
"subcontrols": [
|
||||
{
|
||||
"subcontrol_id": "V1.1",
|
||||
"title": "Secure Software Development Lifecycle",
|
||||
"statement": "Ein sicherer Softwareentwicklungs-Lebenszyklus (SSDLC) muss definiert und angewendet werden.",
|
||||
"keywords": ["sdlc", "lifecycle", "lebenszyklus", "entwicklung"],
|
||||
"action_hint": "implement",
|
||||
"object_hint": "Sicherer Entwicklungs-Lebenszyklus",
|
||||
"object_class": "process"
|
||||
},
|
||||
{
|
||||
"subcontrol_id": "V1.2",
|
||||
"title": "Authentication Architecture",
|
||||
"statement": "Die Authentifizierungsarchitektur muss dokumentiert und regelmaessig ueberprueft werden.",
|
||||
"keywords": ["authentication", "authentifizierung", "architektur"],
|
||||
"action_hint": "document",
|
||||
"object_hint": "Authentifizierungsarchitektur",
|
||||
"object_class": "policy"
|
||||
},
|
||||
{
|
||||
"subcontrol_id": "V1.4",
|
||||
"title": "Access Control Architecture",
|
||||
"statement": "Die Zugriffskontrollarchitektur muss dokumentiert und zentral durchgesetzt werden.",
|
||||
"keywords": ["access control", "zugriffskontrolle", "architektur"],
|
||||
"action_hint": "document",
|
||||
"object_hint": "Zugriffskontrollarchitektur",
|
||||
"object_class": "policy"
|
||||
},
|
||||
{
|
||||
"subcontrol_id": "V1.5",
|
||||
"title": "Input and Output Architecture",
|
||||
"statement": "Eingabe- und Ausgabevalidierung muss architektonisch verankert und durchgaengig angewendet werden.",
|
||||
"keywords": ["input", "output", "eingabe", "ausgabe", "validierung"],
|
||||
"action_hint": "implement",
|
||||
"object_hint": "Ein-/Ausgabevalidierung",
|
||||
"object_class": "technical_control"
|
||||
},
|
||||
{
|
||||
"subcontrol_id": "V1.6",
|
||||
"title": "Cryptographic Architecture",
|
||||
"statement": "Kryptographische Mechanismen muessen architektonisch definiert und standardisiert sein.",
|
||||
"keywords": ["crypto", "kryptographie", "verschluesselung"],
|
||||
"action_hint": "define",
|
||||
"object_hint": "Kryptographie-Architektur",
|
||||
"object_class": "cryptographic_control"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"domain_id": "V2",
|
||||
"title": "Authentication",
|
||||
"aliases": ["authentication", "authentifizierung", "anmeldung", "login"],
|
||||
"keywords": ["authentication", "authentifizierung", "passwort", "login", "anmeldung", "credential"],
|
||||
"subcontrols": [
|
||||
{
|
||||
"subcontrol_id": "V2.1",
|
||||
"title": "Password Security",
|
||||
"statement": "Passwortrichtlinien muessen Mindestlaenge, Komplexitaet und Sperrmechanismen definieren.",
|
||||
"keywords": ["passwort", "password", "laenge", "komplexitaet"],
|
||||
"action_hint": "define",
|
||||
"object_hint": "Passwortrichtlinie",
|
||||
"object_class": "policy"
|
||||
},
|
||||
{
|
||||
"subcontrol_id": "V2.2",
|
||||
"title": "General Authenticator Security",
|
||||
"statement": "Authentifizierungsmittel muessen sicher gespeichert und uebertragen werden.",
|
||||
"keywords": ["authenticator", "credential", "speicherung"],
|
||||
"action_hint": "implement",
|
||||
"object_hint": "Sichere Credential-Verwaltung",
|
||||
"object_class": "technical_control"
|
||||
},
|
||||
{
|
||||
"subcontrol_id": "V2.7",
|
||||
"title": "Out-of-Band Verification",
|
||||
"statement": "Out-of-Band-Verifikationsmechanismen muessen sicher implementiert werden.",
|
||||
"keywords": ["oob", "out-of-band", "sms", "push"],
|
||||
"action_hint": "implement",
|
||||
"object_hint": "Out-of-Band-Verifikation",
|
||||
"object_class": "technical_control"
|
||||
},
|
||||
{
|
||||
"subcontrol_id": "V2.8",
|
||||
"title": "Multi-Factor Authentication",
|
||||
"statement": "Multi-Faktor-Authentifizierung muss fuer sicherheitskritische Funktionen verfuegbar sein.",
|
||||
"keywords": ["mfa", "multi-faktor", "totp", "fido"],
|
||||
"action_hint": "implement",
|
||||
"object_hint": "Multi-Faktor-Authentifizierung",
|
||||
"object_class": "technical_control"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"domain_id": "V3",
|
||||
"title": "Session Management",
|
||||
"aliases": ["session", "sitzung", "session management", "sitzungsverwaltung"],
|
||||
"keywords": ["session", "sitzung", "token", "cookie", "timeout"],
|
||||
"subcontrols": [
|
||||
{
|
||||
"subcontrol_id": "V3.1",
|
||||
"title": "Session Management Security",
|
||||
"statement": "Sitzungstoken muessen sicher erzeugt, uebertragen und invalidiert werden.",
|
||||
"keywords": ["token", "sitzung", "sicherheit"],
|
||||
"action_hint": "implement",
|
||||
"object_hint": "Sichere Sitzungsverwaltung",
|
||||
"object_class": "technical_control"
|
||||
},
|
||||
{
|
||||
"subcontrol_id": "V3.3",
|
||||
"title": "Session Termination",
|
||||
"statement": "Sitzungen muessen nach Inaktivitaet und bei Abmeldung zuverlaessig beendet werden.",
|
||||
"keywords": ["termination", "timeout", "abmeldung", "beenden"],
|
||||
"action_hint": "configure",
|
||||
"object_hint": "Sitzungstimeout",
|
||||
"object_class": "configuration"
|
||||
},
|
||||
{
|
||||
"subcontrol_id": "V3.5",
|
||||
"title": "Token-Based Session Management",
|
||||
"statement": "Tokenbasierte Sitzungsmechanismen muessen gegen Diebstahl und Replay geschuetzt sein.",
|
||||
"keywords": ["jwt", "token", "replay", "diebstahl"],
|
||||
"action_hint": "implement",
|
||||
"object_hint": "Token-Schutz",
|
||||
"object_class": "technical_control"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"domain_id": "V5",
|
||||
"title": "Validation, Sanitization and Encoding",
|
||||
"aliases": ["validation", "validierung", "sanitization", "encoding", "eingabevalidierung"],
|
||||
"keywords": ["validierung", "sanitization", "encoding", "xss", "injection", "eingabe"],
|
||||
"subcontrols": [
|
||||
{
|
||||
"subcontrol_id": "V5.1",
|
||||
"title": "Input Validation",
|
||||
"statement": "Alle Eingabedaten muessen serverseitig validiert werden.",
|
||||
"keywords": ["input", "eingabe", "validierung", "serverseitig"],
|
||||
"action_hint": "implement",
|
||||
"object_hint": "Eingabevalidierung",
|
||||
"object_class": "technical_control"
|
||||
},
|
||||
{
|
||||
"subcontrol_id": "V5.2",
|
||||
"title": "Sanitization and Sandboxing",
|
||||
"statement": "Eingaben muessen bereinigt und in sicherer Umgebung verarbeitet werden.",
|
||||
"keywords": ["sanitization", "bereinigung", "sandbox"],
|
||||
"action_hint": "implement",
|
||||
"object_hint": "Eingabebereinigung",
|
||||
"object_class": "technical_control"
|
||||
},
|
||||
{
|
||||
"subcontrol_id": "V5.3",
|
||||
"title": "Output Encoding and Injection Prevention",
|
||||
"statement": "Ausgaben muessen kontextabhaengig kodiert werden, um Injection-Angriffe zu verhindern.",
|
||||
"keywords": ["output", "encoding", "injection", "xss", "sql"],
|
||||
"action_hint": "implement",
|
||||
"object_hint": "Ausgabe-Encoding",
|
||||
"object_class": "technical_control"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"domain_id": "V6",
|
||||
"title": "Stored Cryptography",
|
||||
"aliases": ["cryptography", "kryptographie", "verschluesselung", "stored cryptography"],
|
||||
"keywords": ["kryptographie", "verschluesselung", "hashing", "schluessel", "key management"],
|
||||
"subcontrols": [
|
||||
{
|
||||
"subcontrol_id": "V6.1",
|
||||
"title": "Data Classification",
|
||||
"statement": "Daten muessen klassifiziert und entsprechend ihrer Schutzklasse behandelt werden.",
|
||||
"keywords": ["klassifizierung", "classification", "schutzklasse"],
|
||||
"action_hint": "define",
|
||||
"object_hint": "Datenklassifizierung",
|
||||
"object_class": "data"
|
||||
},
|
||||
{
|
||||
"subcontrol_id": "V6.2",
|
||||
"title": "Algorithms",
|
||||
"statement": "Nur zugelassene und aktuelle kryptographische Algorithmen duerfen verwendet werden.",
|
||||
"keywords": ["algorithmus", "algorithm", "aes", "rsa"],
|
||||
"action_hint": "configure",
|
||||
"object_hint": "Kryptographische Algorithmen",
|
||||
"object_class": "cryptographic_control"
|
||||
},
|
||||
{
|
||||
"subcontrol_id": "V6.4",
|
||||
"title": "Secret Management",
|
||||
"statement": "Geheimnisse (Schluessel, Passwoerter, Tokens) muessen in einem Secret-Management-System verwaltet werden.",
|
||||
"keywords": ["secret", "geheimnis", "vault", "key management"],
|
||||
"action_hint": "maintain",
|
||||
"object_hint": "Secret-Management",
|
||||
"object_class": "cryptographic_control"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"domain_id": "V8",
|
||||
"title": "Data Protection",
|
||||
"aliases": ["data protection", "datenschutz", "datenverarbeitung"],
|
||||
"keywords": ["datenschutz", "data protection", "pii", "personenbezogen", "privacy"],
|
||||
"subcontrols": [
|
||||
{
|
||||
"subcontrol_id": "V8.1",
|
||||
"title": "General Data Protection",
|
||||
"statement": "Personenbezogene Daten muessen gemaess Datenschutzanforderungen geschuetzt werden.",
|
||||
"keywords": ["personenbezogen", "pii", "datenschutz"],
|
||||
"action_hint": "implement",
|
||||
"object_hint": "Datenschutzmassnahmen",
|
||||
"object_class": "data"
|
||||
},
|
||||
{
|
||||
"subcontrol_id": "V8.2",
|
||||
"title": "Client-Side Data Protection",
|
||||
"statement": "Clientseitig gespeicherte sensible Daten muessen geschuetzt und minimiert werden.",
|
||||
"keywords": ["client", "browser", "localstorage", "cookie"],
|
||||
"action_hint": "implement",
|
||||
"object_hint": "Clientseitiger Datenschutz",
|
||||
"object_class": "technical_control"
|
||||
},
|
||||
{
|
||||
"subcontrol_id": "V8.3",
|
||||
"title": "Sensitive Private Data",
|
||||
"statement": "Sensible Daten muessen bei Speicherung und Verarbeitung besonders geschuetzt werden.",
|
||||
"keywords": ["sensibel", "vertraulich", "speicherung"],
|
||||
"action_hint": "encrypt",
|
||||
"object_hint": "Verschluesselung sensibler Daten",
|
||||
"object_class": "data"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"domain_id": "V9",
|
||||
"title": "Communication",
|
||||
"aliases": ["communication", "kommunikation", "tls", "transport"],
|
||||
"keywords": ["tls", "ssl", "https", "transport", "kommunikation", "verschluesselung"],
|
||||
"subcontrols": [
|
||||
{
|
||||
"subcontrol_id": "V9.1",
|
||||
"title": "Client Communication Security",
|
||||
"statement": "Alle Client-Server-Kommunikation muss ueber TLS verschluesselt werden.",
|
||||
"keywords": ["tls", "https", "client", "server"],
|
||||
"action_hint": "encrypt",
|
||||
"object_hint": "TLS-Transportverschluesselung",
|
||||
"object_class": "cryptographic_control"
|
||||
},
|
||||
{
|
||||
"subcontrol_id": "V9.2",
|
||||
"title": "Server Communication Security",
|
||||
"statement": "Server-zu-Server-Kommunikation muss authentifiziert und verschluesselt erfolgen.",
|
||||
"keywords": ["server", "mtls", "backend"],
|
||||
"action_hint": "encrypt",
|
||||
"object_hint": "Server-Kommunikationsverschluesselung",
|
||||
"object_class": "cryptographic_control"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"domain_id": "V13",
|
||||
"title": "API and Web Service",
|
||||
"aliases": ["api", "web service", "rest", "graphql", "webservice"],
|
||||
"keywords": ["api", "rest", "graphql", "webservice", "endpoint", "schnittstelle"],
|
||||
"subcontrols": [
|
||||
{
|
||||
"subcontrol_id": "V13.1",
|
||||
"title": "Generic Web Service Security",
|
||||
"statement": "Web-Services muessen gegen gaengige Angriffe abgesichert werden.",
|
||||
"keywords": ["web service", "sicherheit", "angriff"],
|
||||
"action_hint": "implement",
|
||||
"object_hint": "Web-Service-Absicherung",
|
||||
"object_class": "interface"
|
||||
},
|
||||
{
|
||||
"subcontrol_id": "V13.2",
|
||||
"title": "RESTful Web Service",
|
||||
"statement": "REST-APIs muessen Input-Validierung, Rate Limiting und sichere Authentifizierung implementieren.",
|
||||
"keywords": ["rest", "api", "rate limiting", "input"],
|
||||
"action_hint": "implement",
|
||||
"object_hint": "REST-API-Absicherung",
|
||||
"object_class": "interface"
|
||||
},
|
||||
{
|
||||
"subcontrol_id": "V13.4",
|
||||
"title": "GraphQL and Web Services",
|
||||
"statement": "GraphQL-Endpoints muessen gegen Query-Complexity-Angriffe und Introspection geschuetzt werden.",
|
||||
"keywords": ["graphql", "query", "complexity", "introspection"],
|
||||
"action_hint": "configure",
|
||||
"object_hint": "GraphQL-Absicherung",
|
||||
"object_class": "interface"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"domain_id": "V14",
|
||||
"title": "Configuration",
|
||||
"aliases": ["configuration", "konfiguration", "hardening", "haertung"],
|
||||
"keywords": ["konfiguration", "hardening", "haertung", "header", "deployment"],
|
||||
"subcontrols": [
|
||||
{
|
||||
"subcontrol_id": "V14.1",
|
||||
"title": "Build and Deploy",
|
||||
"statement": "Build- und Deployment-Prozesse muessen sicher konfiguriert und reproduzierbar sein.",
|
||||
"keywords": ["build", "deploy", "ci/cd", "pipeline"],
|
||||
"action_hint": "configure",
|
||||
"object_hint": "Sichere Build-Pipeline",
|
||||
"object_class": "configuration"
|
||||
},
|
||||
{
|
||||
"subcontrol_id": "V14.2",
|
||||
"title": "Dependency Management",
|
||||
"statement": "Abhaengigkeiten muessen auf Schwachstellen geprueft und aktuell gehalten werden.",
|
||||
"keywords": ["dependency", "abhaengigkeit", "sca", "sbom"],
|
||||
"action_hint": "maintain",
|
||||
"object_hint": "Abhaengigkeitsverwaltung",
|
||||
"object_class": "system"
|
||||
},
|
||||
{
|
||||
"subcontrol_id": "V14.3",
|
||||
"title": "Unintended Security Disclosure",
|
||||
"statement": "Fehlermeldungen und Debug-Informationen duerfen keine sicherheitsrelevanten Details preisgeben.",
|
||||
"keywords": ["disclosure", "fehlermeldung", "debug", "information leakage"],
|
||||
"action_hint": "configure",
|
||||
"object_hint": "Fehlerbehandlung",
|
||||
"object_class": "configuration"
|
||||
},
|
||||
{
|
||||
"subcontrol_id": "V14.4",
|
||||
"title": "HTTP Security Headers",
|
||||
"statement": "HTTP-Sicherheitsheader muessen korrekt konfiguriert sein.",
|
||||
"keywords": ["header", "csp", "hsts", "x-frame"],
|
||||
"action_hint": "configure",
|
||||
"object_hint": "HTTP-Sicherheitsheader",
|
||||
"object_class": "configuration"
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
||||
@@ -0,0 +1,205 @@
|
||||
"""
|
||||
Source-Type-Klassifikation fuer Regulierungen und Frameworks.
|
||||
|
||||
Dreistufiges Modell der normativen Verbindlichkeit:
|
||||
|
||||
Stufe 1 — GESETZ (law):
|
||||
Rechtlich bindend. Bussgeld bei Verstoss.
|
||||
Beispiele: DSGVO, NIS2, AI Act, CRA
|
||||
|
||||
Stufe 2 — LEITLINIE (guideline):
|
||||
Offizielle Auslegungshilfe von Aufsichtsbehoerden.
|
||||
Beweislastumkehr: Wer abweicht, muss begruenden warum.
|
||||
Beispiele: EDPB-Leitlinien, BSI-Standards, WP29-Dokumente
|
||||
|
||||
Stufe 3 — FRAMEWORK (framework):
|
||||
Freiwillige Best Practices, nicht rechtsverbindlich.
|
||||
Aber: Koennen als "Stand der Technik" herangezogen werden.
|
||||
Beispiele: ENISA, NIST, OWASP, OECD, CISA
|
||||
|
||||
Mapping: source_regulation (aus control_parent_links) -> source_type
|
||||
"""
|
||||
|
||||
# --- Typ-Definitionen ---
|
||||
SOURCE_TYPE_LAW = "law" # Gesetz/Verordnung/Richtlinie — normative_strength bleibt
|
||||
SOURCE_TYPE_GUIDELINE = "guideline" # Leitlinie/Standard — max "should"
|
||||
SOURCE_TYPE_FRAMEWORK = "framework" # Framework/Best Practice — max "may"
|
||||
|
||||
# Max erlaubte normative_strength pro source_type
|
||||
# DB-Constraint erlaubt: must, should, may (NICHT "can")
|
||||
NORMATIVE_STRENGTH_CAP: dict[str, str] = {
|
||||
SOURCE_TYPE_LAW: "must", # keine Begrenzung
|
||||
SOURCE_TYPE_GUIDELINE: "should", # max "should"
|
||||
SOURCE_TYPE_FRAMEWORK: "may", # max "may" (= "kann")
|
||||
}
|
||||
|
||||
# Reihenfolge fuer Vergleiche (hoeher = staerker)
|
||||
STRENGTH_ORDER: dict[str, int] = {
|
||||
"may": 1, # KANN (DB-Wert)
|
||||
"can": 1, # Alias — wird in cap_normative_strength zu "may" normalisiert
|
||||
"should": 2,
|
||||
"must": 3,
|
||||
}
|
||||
|
||||
|
||||
def cap_normative_strength(original: str, source_type: str) -> str:
|
||||
"""
|
||||
Begrenzt die normative_strength basierend auf dem source_type.
|
||||
|
||||
Beispiel:
|
||||
cap_normative_strength("must", "framework") -> "may"
|
||||
cap_normative_strength("should", "law") -> "should"
|
||||
cap_normative_strength("must", "guideline") -> "should"
|
||||
"""
|
||||
cap = NORMATIVE_STRENGTH_CAP.get(source_type, "must")
|
||||
cap_level = STRENGTH_ORDER.get(cap, 3)
|
||||
original_level = STRENGTH_ORDER.get(original, 3)
|
||||
if original_level > cap_level:
|
||||
return cap
|
||||
return original
|
||||
|
||||
|
||||
def get_highest_source_type(source_types: list[str]) -> str:
|
||||
"""
|
||||
Bestimmt den hoechsten source_type aus einer Liste.
|
||||
Ein Gesetz uebertrumpft alles.
|
||||
|
||||
Beispiel:
|
||||
get_highest_source_type(["framework", "law"]) -> "law"
|
||||
get_highest_source_type(["framework", "guideline"]) -> "guideline"
|
||||
"""
|
||||
type_order = {SOURCE_TYPE_FRAMEWORK: 1, SOURCE_TYPE_GUIDELINE: 2, SOURCE_TYPE_LAW: 3}
|
||||
if not source_types:
|
||||
return SOURCE_TYPE_FRAMEWORK
|
||||
return max(source_types, key=lambda t: type_order.get(t, 0))
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# Klassifikation: source_regulation -> source_type
|
||||
#
|
||||
# Diese Map wird fuer den Backfill und zukuenftige Pipeline-Runs verwendet.
|
||||
# Neue Regulierungen hier eintragen!
|
||||
# ============================================================================
|
||||
|
||||
SOURCE_REGULATION_CLASSIFICATION: dict[str, str] = {
|
||||
# --- EU-Verordnungen (unmittelbar bindend) ---
|
||||
"DSGVO (EU) 2016/679": SOURCE_TYPE_LAW,
|
||||
"KI-Verordnung (EU) 2024/1689": SOURCE_TYPE_LAW,
|
||||
"Cyber Resilience Act (CRA)": SOURCE_TYPE_LAW,
|
||||
"NIS2-Richtlinie (EU) 2022/2555": SOURCE_TYPE_LAW,
|
||||
"Data Act": SOURCE_TYPE_LAW,
|
||||
"Data Governance Act (DGA)": SOURCE_TYPE_LAW,
|
||||
"Markets in Crypto-Assets (MiCA)": SOURCE_TYPE_LAW,
|
||||
"Maschinenverordnung (EU) 2023/1230": SOURCE_TYPE_LAW,
|
||||
"Batterieverordnung (EU) 2023/1542": SOURCE_TYPE_LAW,
|
||||
"AML-Verordnung": SOURCE_TYPE_LAW,
|
||||
|
||||
# --- EU-Richtlinien (nach nationaler Umsetzung bindend) ---
|
||||
# Fuer Compliance-Zwecke wie Gesetze behandeln
|
||||
|
||||
# --- Nationale Gesetze ---
|
||||
"Bundesdatenschutzgesetz (BDSG)": SOURCE_TYPE_LAW,
|
||||
"Telekommunikationsgesetz": SOURCE_TYPE_LAW,
|
||||
"Telekommunikationsgesetz Oesterreich": SOURCE_TYPE_LAW,
|
||||
"Gewerbeordnung (GewO)": SOURCE_TYPE_LAW,
|
||||
"Handelsgesetzbuch (HGB)": SOURCE_TYPE_LAW,
|
||||
"Abgabenordnung (AO)": SOURCE_TYPE_LAW,
|
||||
"IFRS-Übernahmeverordnung": SOURCE_TYPE_LAW,
|
||||
"Österreichisches Datenschutzgesetz (DSG)": SOURCE_TYPE_LAW,
|
||||
"LOPDGDD - Ley Orgánica de Protección de Datos (Spanien)": SOURCE_TYPE_LAW,
|
||||
"Loi Informatique et Libertés (Frankreich)": SOURCE_TYPE_LAW,
|
||||
"Információs önrendelkezési jog törvény (Ungarn)": SOURCE_TYPE_LAW,
|
||||
"EU Blue Guide 2022": SOURCE_TYPE_LAW,
|
||||
|
||||
# --- EDPB/WP29 Leitlinien (offizielle Auslegungshilfe) ---
|
||||
"EDPB Leitlinien 01/2019 (Zertifizierung)": SOURCE_TYPE_GUIDELINE,
|
||||
"EDPB Leitlinien 01/2020 (Datentransfers)": SOURCE_TYPE_GUIDELINE,
|
||||
"EDPB Leitlinien 01/2020 (Vernetzte Fahrzeuge)": SOURCE_TYPE_GUIDELINE,
|
||||
"EDPB Leitlinien 01/2022 (BCR)": SOURCE_TYPE_GUIDELINE,
|
||||
"EDPB Leitlinien 01/2024 (Berechtigtes Interesse)": SOURCE_TYPE_GUIDELINE,
|
||||
"EDPB Leitlinien 04/2019 (Data Protection by Design)": SOURCE_TYPE_GUIDELINE,
|
||||
"EDPB Leitlinien 05/2020 - Einwilligung": SOURCE_TYPE_GUIDELINE,
|
||||
"EDPB Leitlinien 07/2020 (Datentransfers)": SOURCE_TYPE_GUIDELINE,
|
||||
"EDPB Leitlinien 08/2020 (Social Media)": SOURCE_TYPE_GUIDELINE,
|
||||
"EDPB Leitlinien 09/2022 (Data Breach)": SOURCE_TYPE_GUIDELINE,
|
||||
"EDPB Leitlinien 09/2022 - Meldung von Datenschutzverletzungen": SOURCE_TYPE_GUIDELINE,
|
||||
"EDPB Empfehlungen 01/2020 - Ergaenzende Massnahmen fuer Datentransfers": SOURCE_TYPE_GUIDELINE,
|
||||
"EDPB Leitlinien - Berechtigtes Interesse (Art. 6(1)(f))": SOURCE_TYPE_GUIDELINE,
|
||||
"WP244 Leitlinien (Profiling)": SOURCE_TYPE_GUIDELINE,
|
||||
"WP251 Leitlinien (Profiling)": SOURCE_TYPE_GUIDELINE,
|
||||
"WP260 Leitlinien (Transparenz)": SOURCE_TYPE_GUIDELINE,
|
||||
|
||||
# --- BSI Standards (behoerdliche technische Richtlinien) ---
|
||||
"BSI-TR-03161-1": SOURCE_TYPE_GUIDELINE,
|
||||
"BSI-TR-03161-2": SOURCE_TYPE_GUIDELINE,
|
||||
"BSI-TR-03161-3": SOURCE_TYPE_GUIDELINE,
|
||||
|
||||
# --- ENISA (EU-Agentur, aber Empfehlungen nicht rechtsverbindlich) ---
|
||||
"ENISA Cybersecurity State 2024": SOURCE_TYPE_FRAMEWORK,
|
||||
"ENISA ICS/SCADA Dependencies": SOURCE_TYPE_FRAMEWORK,
|
||||
"ENISA Supply Chain Good Practices": SOURCE_TYPE_FRAMEWORK,
|
||||
"ENISA Threat Landscape Supply Chain": SOURCE_TYPE_FRAMEWORK,
|
||||
|
||||
# --- NIST (US-Standards, international als Best Practice) ---
|
||||
"NIST AI Risk Management Framework": SOURCE_TYPE_FRAMEWORK,
|
||||
"NIST Cybersecurity Framework 2.0": SOURCE_TYPE_FRAMEWORK,
|
||||
"NIST SP 800-207 (Zero Trust)": SOURCE_TYPE_FRAMEWORK,
|
||||
"NIST SP 800-218 (SSDF)": SOURCE_TYPE_FRAMEWORK,
|
||||
"NIST SP 800-53 Rev. 5": SOURCE_TYPE_FRAMEWORK,
|
||||
"NIST SP 800-63-3": SOURCE_TYPE_FRAMEWORK,
|
||||
|
||||
# --- OWASP (Community-Standards) ---
|
||||
"OWASP API Security Top 10 (2023)": SOURCE_TYPE_FRAMEWORK,
|
||||
"OWASP ASVS 4.0": SOURCE_TYPE_FRAMEWORK,
|
||||
"OWASP MASVS 2.0": SOURCE_TYPE_FRAMEWORK,
|
||||
"OWASP SAMM 2.0": SOURCE_TYPE_FRAMEWORK,
|
||||
"OWASP Top 10 (2021)": SOURCE_TYPE_FRAMEWORK,
|
||||
|
||||
# --- Sonstige Frameworks ---
|
||||
"OECD KI-Empfehlung": SOURCE_TYPE_FRAMEWORK,
|
||||
"CISA Secure by Design": SOURCE_TYPE_FRAMEWORK,
|
||||
}
|
||||
|
||||
|
||||
def classify_source_regulation(source_regulation: str) -> str:
|
||||
"""
|
||||
Klassifiziert eine source_regulation als law, guideline oder framework.
|
||||
|
||||
Verwendet exaktes Matching gegen die Map. Bei unbekannten Quellen
|
||||
wird anhand von Schluesselwoertern geraten, Fallback ist 'framework'
|
||||
(konservativstes Ergebnis).
|
||||
"""
|
||||
if not source_regulation:
|
||||
return SOURCE_TYPE_FRAMEWORK
|
||||
|
||||
# Exaktes Match
|
||||
if source_regulation in SOURCE_REGULATION_CLASSIFICATION:
|
||||
return SOURCE_REGULATION_CLASSIFICATION[source_regulation]
|
||||
|
||||
# Heuristik fuer unbekannte Quellen
|
||||
lower = source_regulation.lower()
|
||||
|
||||
# Gesetze erkennen
|
||||
law_indicators = [
|
||||
"verordnung", "richtlinie", "gesetz", "directive", "regulation",
|
||||
"(eu)", "(eg)", "act", "ley", "loi", "törvény", "código",
|
||||
]
|
||||
if any(ind in lower for ind in law_indicators):
|
||||
return SOURCE_TYPE_LAW
|
||||
|
||||
# Leitlinien erkennen
|
||||
guideline_indicators = [
|
||||
"edpb", "leitlinie", "guideline", "wp2", "bsi", "empfehlung",
|
||||
]
|
||||
if any(ind in lower for ind in guideline_indicators):
|
||||
return SOURCE_TYPE_GUIDELINE
|
||||
|
||||
# Frameworks erkennen
|
||||
framework_indicators = [
|
||||
"enisa", "nist", "owasp", "oecd", "cisa", "framework", "iso",
|
||||
]
|
||||
if any(ind in lower for ind in framework_indicators):
|
||||
return SOURCE_TYPE_FRAMEWORK
|
||||
|
||||
# Konservativ: unbekannt = framework (geringste Verbindlichkeit)
|
||||
return SOURCE_TYPE_FRAMEWORK
|
||||
@@ -8,12 +8,16 @@ from .models import (
|
||||
EvidenceDB,
|
||||
RiskDB,
|
||||
AuditExportDB,
|
||||
LLMGenerationAuditDB,
|
||||
AssertionDB,
|
||||
RegulationTypeEnum,
|
||||
ControlTypeEnum,
|
||||
ControlDomainEnum,
|
||||
RiskLevelEnum,
|
||||
EvidenceStatusEnum,
|
||||
ControlStatusEnum,
|
||||
EvidenceConfidenceEnum,
|
||||
EvidenceTruthStatusEnum,
|
||||
)
|
||||
from .repository import (
|
||||
RegulationRepository,
|
||||
@@ -33,6 +37,8 @@ __all__ = [
|
||||
"EvidenceDB",
|
||||
"RiskDB",
|
||||
"AuditExportDB",
|
||||
"LLMGenerationAuditDB",
|
||||
"AssertionDB",
|
||||
# Enums
|
||||
"RegulationTypeEnum",
|
||||
"ControlTypeEnum",
|
||||
@@ -40,6 +46,8 @@ __all__ = [
|
||||
"RiskLevelEnum",
|
||||
"EvidenceStatusEnum",
|
||||
"ControlStatusEnum",
|
||||
"EvidenceConfidenceEnum",
|
||||
"EvidenceTruthStatusEnum",
|
||||
# Repositories
|
||||
"RegulationRepository",
|
||||
"RequirementRepository",
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,164 @@
|
||||
"""
|
||||
SQLAlchemy models for VVT Master Libraries + Process Templates.
|
||||
|
||||
Tables (global, no tenant_id):
|
||||
- vvt_lib_data_subjects
|
||||
- vvt_lib_data_categories (hierarchical, self-referencing)
|
||||
- vvt_lib_recipients
|
||||
- vvt_lib_legal_bases
|
||||
- vvt_lib_retention_rules
|
||||
- vvt_lib_transfer_mechanisms
|
||||
- vvt_lib_purposes
|
||||
- vvt_lib_toms
|
||||
|
||||
Tenant-scoped:
|
||||
- vvt_process_templates (system + tenant-specific)
|
||||
"""
|
||||
|
||||
from datetime import datetime
|
||||
|
||||
from sqlalchemy import (
|
||||
Column, String, Text, Boolean, Integer, DateTime, JSON, Index,
|
||||
ForeignKey,
|
||||
)
|
||||
from sqlalchemy.dialects.postgresql import UUID
|
||||
|
||||
from classroom_engine.database import Base
|
||||
|
||||
|
||||
class VVTLibDataSubjectDB(Base):
|
||||
__tablename__ = 'vvt_lib_data_subjects'
|
||||
|
||||
id = Column(String(50), primary_key=True)
|
||||
label_de = Column(String(200), nullable=False)
|
||||
description_de = Column(Text)
|
||||
art9_relevant = Column(Boolean, default=False)
|
||||
typical_for = Column(JSON, default=list)
|
||||
sort_order = Column(Integer, default=0)
|
||||
created_at = Column(DateTime(timezone=True), default=datetime.utcnow)
|
||||
|
||||
|
||||
class VVTLibDataCategoryDB(Base):
|
||||
__tablename__ = 'vvt_lib_data_categories'
|
||||
|
||||
id = Column(String(50), primary_key=True)
|
||||
parent_id = Column(String(50), ForeignKey('vvt_lib_data_categories.id', ondelete='SET NULL'), nullable=True)
|
||||
label_de = Column(String(200), nullable=False)
|
||||
description_de = Column(Text)
|
||||
is_art9 = Column(Boolean, default=False)
|
||||
is_art10 = Column(Boolean, default=False)
|
||||
risk_weight = Column(Integer, default=1)
|
||||
default_retention_rule = Column(String(50))
|
||||
default_legal_basis = Column(String(50))
|
||||
sort_order = Column(Integer, default=0)
|
||||
created_at = Column(DateTime(timezone=True), default=datetime.utcnow)
|
||||
|
||||
|
||||
class VVTLibRecipientDB(Base):
|
||||
__tablename__ = 'vvt_lib_recipients'
|
||||
|
||||
id = Column(String(50), primary_key=True)
|
||||
type = Column(String(20), nullable=False)
|
||||
label_de = Column(String(200), nullable=False)
|
||||
description_de = Column(Text)
|
||||
is_third_country = Column(Boolean, default=False)
|
||||
country = Column(String(5))
|
||||
sort_order = Column(Integer, default=0)
|
||||
created_at = Column(DateTime(timezone=True), default=datetime.utcnow)
|
||||
|
||||
|
||||
class VVTLibLegalBasisDB(Base):
|
||||
__tablename__ = 'vvt_lib_legal_bases'
|
||||
|
||||
id = Column(String(50), primary_key=True)
|
||||
article = Column(String(50), nullable=False)
|
||||
type = Column(String(30), nullable=False)
|
||||
label_de = Column(String(300), nullable=False)
|
||||
description_de = Column(Text)
|
||||
is_art9 = Column(Boolean, default=False)
|
||||
typical_national_law = Column(String(100))
|
||||
sort_order = Column(Integer, default=0)
|
||||
created_at = Column(DateTime(timezone=True), default=datetime.utcnow)
|
||||
|
||||
|
||||
class VVTLibRetentionRuleDB(Base):
|
||||
__tablename__ = 'vvt_lib_retention_rules'
|
||||
|
||||
id = Column(String(50), primary_key=True)
|
||||
label_de = Column(String(300), nullable=False)
|
||||
description_de = Column(Text)
|
||||
legal_basis = Column(String(200))
|
||||
duration = Column(Integer, nullable=False)
|
||||
duration_unit = Column(String(10), nullable=False)
|
||||
start_event = Column(String(200))
|
||||
deletion_procedure = Column(String(500))
|
||||
sort_order = Column(Integer, default=0)
|
||||
created_at = Column(DateTime(timezone=True), default=datetime.utcnow)
|
||||
|
||||
|
||||
class VVTLibTransferMechanismDB(Base):
|
||||
__tablename__ = 'vvt_lib_transfer_mechanisms'
|
||||
|
||||
id = Column(String(50), primary_key=True)
|
||||
label_de = Column(String(300), nullable=False)
|
||||
description_de = Column(Text)
|
||||
article = Column(String(50))
|
||||
requires_tia = Column(Boolean, default=False)
|
||||
sort_order = Column(Integer, default=0)
|
||||
created_at = Column(DateTime(timezone=True), default=datetime.utcnow)
|
||||
|
||||
|
||||
class VVTLibPurposeDB(Base):
|
||||
__tablename__ = 'vvt_lib_purposes'
|
||||
|
||||
id = Column(String(50), primary_key=True)
|
||||
label_de = Column(String(300), nullable=False)
|
||||
description_de = Column(Text)
|
||||
typical_legal_basis = Column(String(50))
|
||||
typical_for = Column(JSON, default=list)
|
||||
sort_order = Column(Integer, default=0)
|
||||
created_at = Column(DateTime(timezone=True), default=datetime.utcnow)
|
||||
|
||||
|
||||
class VVTLibTomDB(Base):
|
||||
__tablename__ = 'vvt_lib_toms'
|
||||
|
||||
id = Column(String(50), primary_key=True)
|
||||
category = Column(String(30), nullable=False)
|
||||
label_de = Column(String(300), nullable=False)
|
||||
description_de = Column(Text)
|
||||
art32_reference = Column(String(100))
|
||||
sort_order = Column(Integer, default=0)
|
||||
created_at = Column(DateTime(timezone=True), default=datetime.utcnow)
|
||||
|
||||
|
||||
class VVTProcessTemplateDB(Base):
|
||||
__tablename__ = 'vvt_process_templates'
|
||||
|
||||
id = Column(String(80), primary_key=True)
|
||||
name = Column(String(300), nullable=False)
|
||||
description = Column(Text)
|
||||
business_function = Column(String(50))
|
||||
purpose_refs = Column(JSON, default=list)
|
||||
legal_basis_refs = Column(JSON, default=list)
|
||||
data_subject_refs = Column(JSON, default=list)
|
||||
data_category_refs = Column(JSON, default=list)
|
||||
recipient_refs = Column(JSON, default=list)
|
||||
tom_refs = Column(JSON, default=list)
|
||||
transfer_mechanism_refs = Column(JSON, default=list)
|
||||
retention_rule_ref = Column(String(50))
|
||||
typical_systems = Column(JSON, default=list)
|
||||
protection_level = Column(String(10), default='MEDIUM')
|
||||
dpia_required = Column(Boolean, default=False)
|
||||
risk_score = Column(Integer)
|
||||
tags = Column(JSON, default=list)
|
||||
is_system = Column(Boolean, default=True)
|
||||
tenant_id = Column(UUID(as_uuid=True), nullable=True)
|
||||
sort_order = Column(Integer, default=0)
|
||||
created_at = Column(DateTime(timezone=True), default=datetime.utcnow)
|
||||
updated_at = Column(DateTime(timezone=True), default=datetime.utcnow, onupdate=datetime.utcnow)
|
||||
|
||||
__table_args__ = (
|
||||
Index('idx_vvt_process_templates_bf', 'business_function'),
|
||||
Index('idx_vvt_process_templates_system', 'is_system'),
|
||||
)
|
||||
@@ -79,6 +79,26 @@ class VVTActivityDB(Base):
|
||||
next_review_at = Column(DateTime(timezone=True), nullable=True)
|
||||
created_by = Column(String(200), default='system')
|
||||
dsfa_id = Column(UUID(as_uuid=True), nullable=True)
|
||||
|
||||
# Library refs (Phase 1 — parallel to freetext fields)
|
||||
purpose_refs = Column(JSON, nullable=True)
|
||||
legal_basis_refs = Column(JSON, nullable=True)
|
||||
data_subject_refs = Column(JSON, nullable=True)
|
||||
data_category_refs = Column(JSON, nullable=True)
|
||||
recipient_refs = Column(JSON, nullable=True)
|
||||
retention_rule_ref = Column(String(50), nullable=True)
|
||||
transfer_mechanism_refs = Column(JSON, nullable=True)
|
||||
tom_refs = Column(JSON, nullable=True)
|
||||
|
||||
# Cross-module links
|
||||
linked_loeschfristen_ids = Column(JSON, nullable=True)
|
||||
linked_tom_measure_ids = Column(JSON, nullable=True)
|
||||
|
||||
# Template + risk
|
||||
source_template_id = Column(String(80), nullable=True)
|
||||
risk_score = Column(Integer, nullable=True)
|
||||
art30_completeness = Column(JSON, nullable=True)
|
||||
|
||||
created_at = Column(DateTime, default=datetime.utcnow, nullable=False)
|
||||
updated_at = Column(DateTime, default=datetime.utcnow, onupdate=datetime.utcnow)
|
||||
|
||||
|
||||
@@ -69,7 +69,7 @@ class AnchorFinder:
|
||||
tags_str = " ".join(control.tags[:3]) if control.tags else ""
|
||||
query = f"{control.title} {tags_str}".strip()
|
||||
|
||||
results = await self.rag.search(
|
||||
results = await self.rag.search_with_rerank(
|
||||
query=query,
|
||||
collection="bp_compliance_ce",
|
||||
top_k=15,
|
||||
|
||||
@@ -0,0 +1,80 @@
|
||||
"""Assertion Engine — splits text into sentences and classifies each.
|
||||
|
||||
Each sentence is tagged as:
|
||||
- assertion: normative statement (pflicht / empfehlung / kann)
|
||||
- fact: references concrete evidence artifacts
|
||||
- rationale: explains why something is required
|
||||
"""
|
||||
|
||||
import re
|
||||
from typing import Optional
|
||||
|
||||
from .normative_patterns import (
|
||||
PFLICHT_RE, EMPFEHLUNG_RE, KANN_RE, RATIONALE_RE, EVIDENCE_RE,
|
||||
)
|
||||
|
||||
# Sentence splitter: period/excl/question followed by space+uppercase, or newlines
|
||||
_SENTENCE_SPLIT = re.compile(r'(?<=[.!?])\s+(?=[A-ZÄÖÜ])|(?:\n\s*\n)')
|
||||
|
||||
|
||||
def extract_assertions(
|
||||
text: str,
|
||||
entity_type: str,
|
||||
entity_id: str,
|
||||
tenant_id: Optional[str] = None,
|
||||
) -> list[dict]:
|
||||
"""Split *text* into sentences and classify each one.
|
||||
|
||||
Returns a list of dicts ready for AssertionDB creation.
|
||||
"""
|
||||
if not text or not text.strip():
|
||||
return []
|
||||
|
||||
sentences = _SENTENCE_SPLIT.split(text.strip())
|
||||
results: list[dict] = []
|
||||
|
||||
for idx, raw in enumerate(sentences):
|
||||
sentence = raw.strip()
|
||||
if not sentence or len(sentence) < 5:
|
||||
continue
|
||||
|
||||
assertion_type, normative_tier = _classify_sentence(sentence)
|
||||
|
||||
results.append({
|
||||
"tenant_id": tenant_id,
|
||||
"entity_type": entity_type,
|
||||
"entity_id": entity_id,
|
||||
"sentence_text": sentence,
|
||||
"sentence_index": idx,
|
||||
"assertion_type": assertion_type,
|
||||
"normative_tier": normative_tier,
|
||||
"evidence_ids": [],
|
||||
"confidence": 0.0,
|
||||
})
|
||||
|
||||
return results
|
||||
|
||||
|
||||
def _classify_sentence(sentence: str) -> tuple[str, Optional[str]]:
|
||||
"""Return (assertion_type, normative_tier) for a single sentence."""
|
||||
|
||||
# 1. Check for evidence/fact keywords first
|
||||
if EVIDENCE_RE.search(sentence):
|
||||
return ("fact", None)
|
||||
|
||||
# 2. Check for rationale
|
||||
normative_count = len(PFLICHT_RE.findall(sentence)) + len(EMPFEHLUNG_RE.findall(sentence)) + len(KANN_RE.findall(sentence))
|
||||
rationale_count = len(RATIONALE_RE.findall(sentence))
|
||||
if rationale_count > 0 and rationale_count >= normative_count:
|
||||
return ("rationale", None)
|
||||
|
||||
# 3. Normative classification
|
||||
if PFLICHT_RE.search(sentence):
|
||||
return ("assertion", "pflicht")
|
||||
if EMPFEHLUNG_RE.search(sentence):
|
||||
return ("assertion", "empfehlung")
|
||||
if KANN_RE.search(sentence):
|
||||
return ("assertion", "kann")
|
||||
|
||||
# 4. Default: unclassified assertion
|
||||
return ("assertion", None)
|
||||
@@ -0,0 +1,618 @@
|
||||
"""Batch Dedup Runner — Orchestrates deduplication of ~85k atomare Controls.
|
||||
|
||||
Reduces Pass 0b controls from ~85k to ~18-25k unique Master Controls via:
|
||||
Phase 1: Intra-Group Dedup — same merge_group_hint → pick best, link rest
|
||||
(85k → ~52k, mostly title-identical short-circuit, no embeddings)
|
||||
Phase 2: Cross-Group Dedup — embed masters, search Qdrant for similar
|
||||
masters with different hints (52k → ~18-25k)
|
||||
|
||||
All Pass 0b controls have pattern_id=NULL. The primary grouping key is
|
||||
merge_group_hint (format: "action_type:norm_obj:trigger_key"), which
|
||||
encodes the normalized action, object, and trigger.
|
||||
|
||||
Usage:
|
||||
runner = BatchDedupRunner(db)
|
||||
stats = await runner.run(dry_run=True) # preview
|
||||
stats = await runner.run(dry_run=False) # execute
|
||||
stats = await runner.run(hint_filter="implement:multi_factor_auth:none")
|
||||
"""
|
||||
|
||||
import json
|
||||
import logging
|
||||
import time
|
||||
from collections import defaultdict
|
||||
|
||||
from sqlalchemy import text
|
||||
|
||||
from compliance.services.control_dedup import (
|
||||
canonicalize_text,
|
||||
ensure_qdrant_collection,
|
||||
get_embedding,
|
||||
normalize_action,
|
||||
normalize_object,
|
||||
qdrant_search_cross_regulation,
|
||||
qdrant_upsert,
|
||||
LINK_THRESHOLD,
|
||||
REVIEW_THRESHOLD,
|
||||
)
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
DEDUP_COLLECTION = "atomic_controls_dedup"
|
||||
|
||||
|
||||
# ── Quality Score ────────────────────────────────────────────────────────
|
||||
|
||||
|
||||
def quality_score(control: dict) -> float:
|
||||
"""Score a control by richness of requirements, tests, evidence, and objective.
|
||||
|
||||
Higher score = better candidate for master control.
|
||||
"""
|
||||
score = 0.0
|
||||
|
||||
reqs = control.get("requirements") or "[]"
|
||||
if isinstance(reqs, str):
|
||||
try:
|
||||
reqs = json.loads(reqs)
|
||||
except (json.JSONDecodeError, TypeError):
|
||||
reqs = []
|
||||
score += len(reqs) * 2.0
|
||||
|
||||
tests = control.get("test_procedure") or "[]"
|
||||
if isinstance(tests, str):
|
||||
try:
|
||||
tests = json.loads(tests)
|
||||
except (json.JSONDecodeError, TypeError):
|
||||
tests = []
|
||||
score += len(tests) * 1.5
|
||||
|
||||
evidence = control.get("evidence") or "[]"
|
||||
if isinstance(evidence, str):
|
||||
try:
|
||||
evidence = json.loads(evidence)
|
||||
except (json.JSONDecodeError, TypeError):
|
||||
evidence = []
|
||||
score += len(evidence) * 1.0
|
||||
|
||||
objective = control.get("objective") or ""
|
||||
score += min(len(objective) / 200, 3.0)
|
||||
|
||||
return score
|
||||
|
||||
|
||||
# ── Batch Dedup Runner ───────────────────────────────────────────────────
|
||||
|
||||
|
||||
class BatchDedupRunner:
|
||||
"""Batch dedup orchestrator for existing Pass 0b atomic controls."""
|
||||
|
||||
def __init__(self, db, collection: str = DEDUP_COLLECTION):
|
||||
self.db = db
|
||||
self.collection = collection
|
||||
self.stats = {
|
||||
"total_controls": 0,
|
||||
"unique_hints": 0,
|
||||
"phase1_groups_processed": 0,
|
||||
"masters": 0,
|
||||
"linked": 0,
|
||||
"review": 0,
|
||||
"new_controls": 0,
|
||||
"parent_links_transferred": 0,
|
||||
"cross_group_linked": 0,
|
||||
"cross_group_review": 0,
|
||||
"errors": 0,
|
||||
"skipped_title_identical": 0,
|
||||
}
|
||||
self._progress_phase = ""
|
||||
self._progress_count = 0
|
||||
self._progress_total = 0
|
||||
|
||||
async def run(
|
||||
self,
|
||||
dry_run: bool = False,
|
||||
hint_filter: str = None,
|
||||
) -> dict:
|
||||
"""Run the full batch dedup pipeline.
|
||||
|
||||
Args:
|
||||
dry_run: If True, compute stats but don't modify DB/Qdrant.
|
||||
hint_filter: If set, only process groups matching this hint prefix.
|
||||
|
||||
Returns:
|
||||
Stats dict with counts.
|
||||
"""
|
||||
start = time.monotonic()
|
||||
logger.info("BatchDedup starting (dry_run=%s, hint_filter=%s)",
|
||||
dry_run, hint_filter)
|
||||
|
||||
if not dry_run:
|
||||
await ensure_qdrant_collection(collection=self.collection)
|
||||
|
||||
# Phase 1: Intra-group dedup (same merge_group_hint)
|
||||
self._progress_phase = "phase1"
|
||||
groups = self._load_merge_groups(hint_filter)
|
||||
self._progress_total = self.stats["total_controls"]
|
||||
|
||||
for hint, controls in groups:
|
||||
try:
|
||||
await self._process_hint_group(hint, controls, dry_run)
|
||||
self.stats["phase1_groups_processed"] += 1
|
||||
except Exception as e:
|
||||
logger.error("BatchDedup Phase 1 error on hint %s: %s", hint, e)
|
||||
self.stats["errors"] += 1
|
||||
try:
|
||||
self.db.rollback()
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
logger.info(
|
||||
"BatchDedup Phase 1 done: %d masters, %d linked, %d review",
|
||||
self.stats["masters"], self.stats["linked"], self.stats["review"],
|
||||
)
|
||||
|
||||
# Phase 2: Cross-group dedup via embeddings
|
||||
if not dry_run:
|
||||
self._progress_phase = "phase2"
|
||||
await self._run_cross_group_pass()
|
||||
|
||||
elapsed = time.monotonic() - start
|
||||
self.stats["elapsed_seconds"] = round(elapsed, 1)
|
||||
logger.info("BatchDedup completed in %.1fs: %s", elapsed, self.stats)
|
||||
return self.stats
|
||||
|
||||
def _load_merge_groups(self, hint_filter: str = None) -> list:
|
||||
"""Load all Pass 0b controls grouped by merge_group_hint, largest first."""
|
||||
conditions = [
|
||||
"decomposition_method = 'pass0b'",
|
||||
"release_state != 'deprecated'",
|
||||
"release_state != 'duplicate'",
|
||||
]
|
||||
params = {}
|
||||
|
||||
if hint_filter:
|
||||
conditions.append("generation_metadata->>'merge_group_hint' LIKE :hf")
|
||||
params["hf"] = f"{hint_filter}%"
|
||||
|
||||
where = " AND ".join(conditions)
|
||||
rows = self.db.execute(text(f"""
|
||||
SELECT id::text, control_id, title, objective,
|
||||
pattern_id, requirements::text, test_procedure::text,
|
||||
evidence::text, release_state,
|
||||
generation_metadata->>'merge_group_hint' as merge_group_hint,
|
||||
generation_metadata->>'action_object_class' as action_object_class
|
||||
FROM canonical_controls
|
||||
WHERE {where}
|
||||
ORDER BY control_id
|
||||
"""), params).fetchall()
|
||||
|
||||
by_hint = defaultdict(list)
|
||||
for r in rows:
|
||||
by_hint[r[9] or ""].append({
|
||||
"uuid": r[0],
|
||||
"control_id": r[1],
|
||||
"title": r[2],
|
||||
"objective": r[3],
|
||||
"pattern_id": r[4],
|
||||
"requirements": r[5],
|
||||
"test_procedure": r[6],
|
||||
"evidence": r[7],
|
||||
"release_state": r[8],
|
||||
"merge_group_hint": r[9] or "",
|
||||
"action_object_class": r[10] or "",
|
||||
})
|
||||
|
||||
self.stats["total_controls"] = len(rows)
|
||||
self.stats["unique_hints"] = len(by_hint)
|
||||
|
||||
sorted_groups = sorted(by_hint.items(), key=lambda x: len(x[1]), reverse=True)
|
||||
logger.info("BatchDedup loaded %d controls in %d hint groups",
|
||||
len(rows), len(sorted_groups))
|
||||
return sorted_groups
|
||||
|
||||
def _sub_group_by_merge_hint(self, controls: list) -> dict:
|
||||
"""Group controls by merge_group_hint composite key."""
|
||||
groups = defaultdict(list)
|
||||
for c in controls:
|
||||
hint = c["merge_group_hint"]
|
||||
if hint:
|
||||
groups[hint].append(c)
|
||||
else:
|
||||
groups[f"__no_hint_{c['uuid']}"].append(c)
|
||||
return dict(groups)
|
||||
|
||||
async def _process_hint_group(
|
||||
self,
|
||||
hint: str,
|
||||
controls: list,
|
||||
dry_run: bool,
|
||||
):
|
||||
"""Process all controls sharing the same merge_group_hint.
|
||||
|
||||
Within a hint group, all controls share action+object+trigger.
|
||||
The best-quality control becomes master, rest are linked as duplicates.
|
||||
"""
|
||||
if len(controls) < 2:
|
||||
# Singleton → always master
|
||||
self.stats["masters"] += 1
|
||||
if not dry_run:
|
||||
await self._embed_and_index(controls[0])
|
||||
self._progress_count += 1
|
||||
self._log_progress(hint)
|
||||
return
|
||||
|
||||
# Sort by quality score (best first)
|
||||
sorted_group = sorted(controls, key=quality_score, reverse=True)
|
||||
master = sorted_group[0]
|
||||
self.stats["masters"] += 1
|
||||
|
||||
if not dry_run:
|
||||
await self._embed_and_index(master)
|
||||
|
||||
for candidate in sorted_group[1:]:
|
||||
# All share the same hint → check title similarity
|
||||
if candidate["title"].strip().lower() == master["title"].strip().lower():
|
||||
# Identical title → direct link (no embedding needed)
|
||||
self.stats["linked"] += 1
|
||||
self.stats["skipped_title_identical"] += 1
|
||||
if not dry_run:
|
||||
await self._mark_duplicate(master, candidate, confidence=1.0)
|
||||
else:
|
||||
# Different title within same hint → still likely duplicate
|
||||
# Use embedding to verify
|
||||
await self._check_and_link_within_group(master, candidate, dry_run)
|
||||
|
||||
self._progress_count += 1
|
||||
self._log_progress(hint)
|
||||
|
||||
async def _check_and_link_within_group(
|
||||
self,
|
||||
master: dict,
|
||||
candidate: dict,
|
||||
dry_run: bool,
|
||||
):
|
||||
"""Check if candidate (same hint group) is duplicate of master via embedding."""
|
||||
parts = candidate["merge_group_hint"].split(":", 2)
|
||||
action = parts[0] if len(parts) > 0 else ""
|
||||
obj = parts[1] if len(parts) > 1 else ""
|
||||
|
||||
canonical = canonicalize_text(action, obj, candidate["title"])
|
||||
embedding = await get_embedding(canonical)
|
||||
|
||||
if not embedding:
|
||||
# Can't embed → link anyway (same hint = same action+object)
|
||||
self.stats["linked"] += 1
|
||||
if not dry_run:
|
||||
await self._mark_duplicate(master, candidate, confidence=0.90)
|
||||
return
|
||||
|
||||
# Search the dedup collection (unfiltered — pattern_id is NULL)
|
||||
results = await qdrant_search_cross_regulation(
|
||||
embedding, top_k=3, collection=self.collection,
|
||||
)
|
||||
|
||||
if not results:
|
||||
# No Qdrant matches yet (master might not be indexed yet) → link to master
|
||||
self.stats["linked"] += 1
|
||||
if not dry_run:
|
||||
await self._mark_duplicate(master, candidate, confidence=0.90)
|
||||
return
|
||||
|
||||
best = results[0]
|
||||
best_score = best.get("score", 0.0)
|
||||
best_payload = best.get("payload", {})
|
||||
best_uuid = best_payload.get("control_uuid", "")
|
||||
|
||||
if best_score > LINK_THRESHOLD:
|
||||
self.stats["linked"] += 1
|
||||
if not dry_run:
|
||||
await self._mark_duplicate_to(best_uuid, candidate, confidence=best_score)
|
||||
elif best_score > REVIEW_THRESHOLD:
|
||||
self.stats["review"] += 1
|
||||
if not dry_run:
|
||||
self._write_review(candidate, best_payload, best_score)
|
||||
else:
|
||||
# Very different despite same hint → new master
|
||||
self.stats["new_controls"] += 1
|
||||
if not dry_run:
|
||||
await self._index_with_embedding(candidate, embedding)
|
||||
|
||||
async def _run_cross_group_pass(self):
|
||||
"""Phase 2: Find cross-group duplicates among surviving masters.
|
||||
|
||||
After Phase 1, ~52k masters remain. Many have similar semantics
|
||||
despite different merge_group_hints (e.g. different German spellings).
|
||||
This pass embeds all masters and finds near-duplicates via Qdrant.
|
||||
"""
|
||||
logger.info("BatchDedup Phase 2: Cross-group pass starting...")
|
||||
|
||||
rows = self.db.execute(text("""
|
||||
SELECT id::text, control_id, title,
|
||||
generation_metadata->>'merge_group_hint' as merge_group_hint
|
||||
FROM canonical_controls
|
||||
WHERE decomposition_method = 'pass0b'
|
||||
AND release_state != 'duplicate'
|
||||
AND release_state != 'deprecated'
|
||||
ORDER BY control_id
|
||||
""")).fetchall()
|
||||
|
||||
self._progress_total = len(rows)
|
||||
self._progress_count = 0
|
||||
logger.info("BatchDedup Cross-group: %d masters to check", len(rows))
|
||||
cross_linked = 0
|
||||
cross_review = 0
|
||||
|
||||
for i, r in enumerate(rows):
|
||||
uuid = r[0]
|
||||
hint = r[3] or ""
|
||||
parts = hint.split(":", 2)
|
||||
action = parts[0] if len(parts) > 0 else ""
|
||||
obj = parts[1] if len(parts) > 1 else ""
|
||||
|
||||
canonical = canonicalize_text(action, obj, r[2])
|
||||
embedding = await get_embedding(canonical)
|
||||
if not embedding:
|
||||
continue
|
||||
|
||||
results = await qdrant_search_cross_regulation(
|
||||
embedding, top_k=5, collection=self.collection,
|
||||
)
|
||||
if not results:
|
||||
continue
|
||||
|
||||
# Find best match from a DIFFERENT hint group
|
||||
for match in results:
|
||||
match_score = match.get("score", 0.0)
|
||||
match_payload = match.get("payload", {})
|
||||
match_uuid = match_payload.get("control_uuid", "")
|
||||
|
||||
# Skip self-match
|
||||
if match_uuid == uuid:
|
||||
continue
|
||||
|
||||
# Must be a different hint group (otherwise already handled in Phase 1)
|
||||
match_action = match_payload.get("action_normalized", "")
|
||||
match_object = match_payload.get("object_normalized", "")
|
||||
# Simple check: different control UUID is enough
|
||||
if match_score > LINK_THRESHOLD:
|
||||
# Mark the worse one as duplicate
|
||||
try:
|
||||
self.db.execute(text("""
|
||||
UPDATE canonical_controls
|
||||
SET release_state = 'duplicate', merged_into_uuid = CAST(:master AS uuid)
|
||||
WHERE id = CAST(:dup AS uuid)
|
||||
AND release_state != 'duplicate'
|
||||
"""), {"master": match_uuid, "dup": uuid})
|
||||
|
||||
self.db.execute(text("""
|
||||
INSERT INTO control_parent_links
|
||||
(control_uuid, parent_control_uuid, link_type, confidence)
|
||||
VALUES (CAST(:cu AS uuid), CAST(:pu AS uuid), 'cross_regulation', :conf)
|
||||
ON CONFLICT (control_uuid, parent_control_uuid) DO NOTHING
|
||||
"""), {"cu": match_uuid, "pu": uuid, "conf": match_score})
|
||||
|
||||
# Transfer parent links
|
||||
transferred = self._transfer_parent_links(match_uuid, uuid)
|
||||
self.stats["parent_links_transferred"] += transferred
|
||||
|
||||
self.db.commit()
|
||||
cross_linked += 1
|
||||
except Exception as e:
|
||||
logger.error("BatchDedup cross-group link error %s→%s: %s",
|
||||
uuid, match_uuid, e)
|
||||
self.db.rollback()
|
||||
self.stats["errors"] += 1
|
||||
break # Only one cross-link per control
|
||||
elif match_score > REVIEW_THRESHOLD:
|
||||
self._write_review(
|
||||
{"control_id": r[1], "title": r[2], "objective": "",
|
||||
"merge_group_hint": hint, "pattern_id": None},
|
||||
match_payload, match_score,
|
||||
)
|
||||
cross_review += 1
|
||||
break
|
||||
|
||||
self._progress_count = i + 1
|
||||
if (i + 1) % 500 == 0:
|
||||
logger.info("BatchDedup Cross-group: %d/%d checked, %d linked, %d review",
|
||||
i + 1, len(rows), cross_linked, cross_review)
|
||||
|
||||
self.stats["cross_group_linked"] = cross_linked
|
||||
self.stats["cross_group_review"] = cross_review
|
||||
logger.info("BatchDedup Cross-group complete: %d linked, %d review",
|
||||
cross_linked, cross_review)
|
||||
|
||||
# ── Qdrant Helpers ───────────────────────────────────────────────────
|
||||
|
||||
async def _embed_and_index(self, control: dict):
|
||||
"""Compute embedding and index a control in the dedup Qdrant collection."""
|
||||
parts = control["merge_group_hint"].split(":", 2)
|
||||
action = parts[0] if len(parts) > 0 else ""
|
||||
obj = parts[1] if len(parts) > 1 else ""
|
||||
|
||||
norm_action = normalize_action(action)
|
||||
norm_object = normalize_object(obj)
|
||||
canonical = canonicalize_text(action, obj, control["title"])
|
||||
embedding = await get_embedding(canonical)
|
||||
|
||||
if not embedding:
|
||||
return
|
||||
|
||||
await qdrant_upsert(
|
||||
point_id=control["uuid"],
|
||||
embedding=embedding,
|
||||
payload={
|
||||
"control_uuid": control["uuid"],
|
||||
"control_id": control["control_id"],
|
||||
"title": control["title"],
|
||||
"pattern_id": control.get("pattern_id"),
|
||||
"action_normalized": norm_action,
|
||||
"object_normalized": norm_object,
|
||||
"canonical_text": canonical,
|
||||
"merge_group_hint": control["merge_group_hint"],
|
||||
},
|
||||
collection=self.collection,
|
||||
)
|
||||
|
||||
async def _index_with_embedding(self, control: dict, embedding: list):
|
||||
"""Index a control with a pre-computed embedding."""
|
||||
parts = control["merge_group_hint"].split(":", 2)
|
||||
action = parts[0] if len(parts) > 0 else ""
|
||||
obj = parts[1] if len(parts) > 1 else ""
|
||||
|
||||
norm_action = normalize_action(action)
|
||||
norm_object = normalize_object(obj)
|
||||
canonical = canonicalize_text(action, obj, control["title"])
|
||||
|
||||
await qdrant_upsert(
|
||||
point_id=control["uuid"],
|
||||
embedding=embedding,
|
||||
payload={
|
||||
"control_uuid": control["uuid"],
|
||||
"control_id": control["control_id"],
|
||||
"title": control["title"],
|
||||
"pattern_id": control.get("pattern_id"),
|
||||
"action_normalized": norm_action,
|
||||
"object_normalized": norm_object,
|
||||
"canonical_text": canonical,
|
||||
"merge_group_hint": control["merge_group_hint"],
|
||||
},
|
||||
collection=self.collection,
|
||||
)
|
||||
|
||||
# ── DB Write Helpers ─────────────────────────────────────────────────
|
||||
|
||||
async def _mark_duplicate(self, master: dict, candidate: dict, confidence: float):
|
||||
"""Mark candidate as duplicate of master, transfer parent links."""
|
||||
try:
|
||||
self.db.execute(text("""
|
||||
UPDATE canonical_controls
|
||||
SET release_state = 'duplicate', merged_into_uuid = CAST(:master AS uuid)
|
||||
WHERE id = CAST(:cand AS uuid)
|
||||
"""), {"master": master["uuid"], "cand": candidate["uuid"]})
|
||||
|
||||
self.db.execute(text("""
|
||||
INSERT INTO control_parent_links
|
||||
(control_uuid, parent_control_uuid, link_type, confidence)
|
||||
VALUES (CAST(:master AS uuid), CAST(:cand_parent AS uuid), 'dedup_merge', :conf)
|
||||
ON CONFLICT (control_uuid, parent_control_uuid) DO NOTHING
|
||||
"""), {"master": master["uuid"], "cand_parent": candidate["uuid"], "conf": confidence})
|
||||
|
||||
transferred = self._transfer_parent_links(master["uuid"], candidate["uuid"])
|
||||
self.stats["parent_links_transferred"] += transferred
|
||||
|
||||
self.db.commit()
|
||||
except Exception as e:
|
||||
logger.error("BatchDedup _mark_duplicate error %s→%s: %s",
|
||||
candidate["uuid"], master["uuid"], e)
|
||||
self.db.rollback()
|
||||
raise
|
||||
|
||||
async def _mark_duplicate_to(self, master_uuid: str, candidate: dict, confidence: float):
|
||||
"""Mark candidate as duplicate of a Qdrant-matched master."""
|
||||
try:
|
||||
self.db.execute(text("""
|
||||
UPDATE canonical_controls
|
||||
SET release_state = 'duplicate', merged_into_uuid = CAST(:master AS uuid)
|
||||
WHERE id = CAST(:cand AS uuid)
|
||||
"""), {"master": master_uuid, "cand": candidate["uuid"]})
|
||||
|
||||
self.db.execute(text("""
|
||||
INSERT INTO control_parent_links
|
||||
(control_uuid, parent_control_uuid, link_type, confidence)
|
||||
VALUES (CAST(:master AS uuid), CAST(:cand_parent AS uuid), 'dedup_merge', :conf)
|
||||
ON CONFLICT (control_uuid, parent_control_uuid) DO NOTHING
|
||||
"""), {"master": master_uuid, "cand_parent": candidate["uuid"], "conf": confidence})
|
||||
|
||||
transferred = self._transfer_parent_links(master_uuid, candidate["uuid"])
|
||||
self.stats["parent_links_transferred"] += transferred
|
||||
|
||||
self.db.commit()
|
||||
except Exception as e:
|
||||
logger.error("BatchDedup _mark_duplicate_to error %s→%s: %s",
|
||||
candidate["uuid"], master_uuid, e)
|
||||
self.db.rollback()
|
||||
raise
|
||||
|
||||
def _transfer_parent_links(self, master_uuid: str, duplicate_uuid: str) -> int:
|
||||
"""Move existing parent links from duplicate to master."""
|
||||
rows = self.db.execute(text("""
|
||||
SELECT parent_control_uuid::text, link_type, confidence,
|
||||
source_regulation, source_article, obligation_candidate_id::text
|
||||
FROM control_parent_links
|
||||
WHERE control_uuid = CAST(:dup AS uuid)
|
||||
AND link_type = 'decomposition'
|
||||
"""), {"dup": duplicate_uuid}).fetchall()
|
||||
|
||||
transferred = 0
|
||||
for r in rows:
|
||||
parent_uuid = r[0]
|
||||
if parent_uuid == master_uuid:
|
||||
continue
|
||||
self.db.execute(text("""
|
||||
INSERT INTO control_parent_links
|
||||
(control_uuid, parent_control_uuid, link_type, confidence,
|
||||
source_regulation, source_article, obligation_candidate_id)
|
||||
VALUES (CAST(:cu AS uuid), CAST(:pu AS uuid), :lt, :conf,
|
||||
:sr, :sa, CAST(:oci AS uuid))
|
||||
ON CONFLICT (control_uuid, parent_control_uuid) DO NOTHING
|
||||
"""), {
|
||||
"cu": master_uuid,
|
||||
"pu": parent_uuid,
|
||||
"lt": r[1],
|
||||
"conf": float(r[2]) if r[2] else 1.0,
|
||||
"sr": r[3],
|
||||
"sa": r[4],
|
||||
"oci": r[5],
|
||||
})
|
||||
transferred += 1
|
||||
|
||||
return transferred
|
||||
|
||||
def _write_review(self, candidate: dict, matched_payload: dict, score: float):
|
||||
"""Write a dedup review entry for borderline matches."""
|
||||
try:
|
||||
self.db.execute(text("""
|
||||
INSERT INTO control_dedup_reviews
|
||||
(candidate_control_id, candidate_title, candidate_objective,
|
||||
matched_control_uuid, matched_control_id,
|
||||
similarity_score, dedup_stage, dedup_details)
|
||||
VALUES (:ccid, :ct, :co, CAST(:mcu AS uuid), :mci,
|
||||
:ss, 'batch_dedup', CAST(:dd AS jsonb))
|
||||
"""), {
|
||||
"ccid": candidate["control_id"],
|
||||
"ct": candidate["title"],
|
||||
"co": candidate.get("objective", ""),
|
||||
"mcu": matched_payload.get("control_uuid"),
|
||||
"mci": matched_payload.get("control_id"),
|
||||
"ss": score,
|
||||
"dd": json.dumps({
|
||||
"merge_group_hint": candidate.get("merge_group_hint", ""),
|
||||
"pattern_id": candidate.get("pattern_id"),
|
||||
}),
|
||||
})
|
||||
self.db.commit()
|
||||
except Exception as e:
|
||||
logger.error("BatchDedup _write_review error: %s", e)
|
||||
self.db.rollback()
|
||||
raise
|
||||
|
||||
# ── Progress ─────────────────────────────────────────────────────────
|
||||
|
||||
def _log_progress(self, hint: str):
|
||||
"""Log progress every 500 controls."""
|
||||
if self._progress_count > 0 and self._progress_count % 500 == 0:
|
||||
logger.info(
|
||||
"BatchDedup [%s] %d/%d — masters=%d, linked=%d, review=%d",
|
||||
self._progress_phase, self._progress_count, self._progress_total,
|
||||
self.stats["masters"], self.stats["linked"], self.stats["review"],
|
||||
)
|
||||
|
||||
def get_status(self) -> dict:
|
||||
"""Return current progress stats (for status endpoint)."""
|
||||
return {
|
||||
"phase": self._progress_phase,
|
||||
"progress": self._progress_count,
|
||||
"total": self._progress_total,
|
||||
**self.stats,
|
||||
}
|
||||
@@ -0,0 +1,438 @@
|
||||
"""
|
||||
Citation Backfill Service — enrich existing controls with article/paragraph provenance.
|
||||
|
||||
3-tier matching strategy:
|
||||
Tier 1 — Hash match: sha256(source_original_text) → RAG chunk lookup
|
||||
Tier 2 — Regex parse: split concatenated "DSGVO Art. 35" → regulation + article
|
||||
Tier 3 — Ollama LLM: ask local LLM to identify article/paragraph from text
|
||||
"""
|
||||
|
||||
import hashlib
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import re
|
||||
from dataclasses import dataclass, field
|
||||
from datetime import datetime, timezone
|
||||
from typing import Optional
|
||||
|
||||
import httpx
|
||||
from sqlalchemy import text
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from .rag_client import ComplianceRAGClient, RAGSearchResult
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
OLLAMA_URL = os.getenv("OLLAMA_URL", "http://host.docker.internal:11434")
|
||||
OLLAMA_MODEL = os.getenv("CONTROL_GEN_OLLAMA_MODEL", "qwen3.5:35b-a3b")
|
||||
LLM_TIMEOUT = float(os.getenv("CONTROL_GEN_LLM_TIMEOUT", "180"))
|
||||
|
||||
ALL_COLLECTIONS = [
|
||||
"bp_compliance_ce",
|
||||
"bp_compliance_gesetze",
|
||||
"bp_compliance_datenschutz",
|
||||
"bp_dsfa_corpus",
|
||||
"bp_legal_templates",
|
||||
]
|
||||
|
||||
BACKFILL_SYSTEM_PROMPT = (
|
||||
"Du bist ein Rechtsexperte. Deine Aufgabe ist es, aus einem Gesetzestext "
|
||||
"den genauen Artikel und Absatz zu bestimmen. Antworte NUR mit validem JSON."
|
||||
)
|
||||
|
||||
# Regex to split concatenated source like "DSGVO Art. 35" or "NIS2 Artikel 21 Abs. 2"
|
||||
_SOURCE_ARTICLE_RE = re.compile(
|
||||
r"^(.+?)\s+(Art(?:ikel)?\.?\s*\d+.*)$", re.IGNORECASE
|
||||
)
|
||||
|
||||
|
||||
@dataclass
|
||||
class MatchResult:
|
||||
article: str
|
||||
paragraph: str
|
||||
method: str # "hash", "regex", "llm"
|
||||
|
||||
|
||||
@dataclass
|
||||
class BackfillResult:
|
||||
total_controls: int = 0
|
||||
matched_hash: int = 0
|
||||
matched_regex: int = 0
|
||||
matched_llm: int = 0
|
||||
unmatched: int = 0
|
||||
updated: int = 0
|
||||
errors: list = field(default_factory=list)
|
||||
|
||||
|
||||
class CitationBackfill:
|
||||
"""Backfill article/paragraph into existing control source_citations."""
|
||||
|
||||
def __init__(self, db: Session, rag_client: ComplianceRAGClient):
|
||||
self.db = db
|
||||
self.rag = rag_client
|
||||
self._rag_index: dict[str, RAGSearchResult] = {}
|
||||
|
||||
async def run(self, dry_run: bool = True, limit: int = 0) -> BackfillResult:
|
||||
"""Main entry: iterate controls missing article/paragraph, match to RAG, update."""
|
||||
result = BackfillResult()
|
||||
|
||||
# Load controls needing backfill
|
||||
controls = self._load_controls_needing_backfill(limit)
|
||||
result.total_controls = len(controls)
|
||||
logger.info("Backfill: %d controls need article/paragraph enrichment", len(controls))
|
||||
|
||||
if not controls:
|
||||
return result
|
||||
|
||||
# Collect hashes we need to find — only build index for controls with source text
|
||||
needed_hashes: set[str] = set()
|
||||
for ctrl in controls:
|
||||
src = ctrl.get("source_original_text")
|
||||
if src:
|
||||
needed_hashes.add(hashlib.sha256(src.encode()).hexdigest())
|
||||
|
||||
if needed_hashes:
|
||||
# Build targeted RAG index — only scroll collections that our controls reference
|
||||
logger.info("Building targeted RAG hash index for %d source texts...", len(needed_hashes))
|
||||
await self._build_rag_index_targeted(controls)
|
||||
logger.info("RAG index built: %d chunks indexed, %d hashes needed", len(self._rag_index), len(needed_hashes))
|
||||
else:
|
||||
logger.info("No source_original_text found — skipping RAG index build")
|
||||
|
||||
# Process each control
|
||||
for i, ctrl in enumerate(controls):
|
||||
if i > 0 and i % 100 == 0:
|
||||
logger.info("Backfill progress: %d/%d processed", i, result.total_controls)
|
||||
|
||||
try:
|
||||
match = await self._match_control(ctrl)
|
||||
if match:
|
||||
if match.method == "hash":
|
||||
result.matched_hash += 1
|
||||
elif match.method == "regex":
|
||||
result.matched_regex += 1
|
||||
elif match.method == "llm":
|
||||
result.matched_llm += 1
|
||||
|
||||
if not dry_run:
|
||||
self._update_control(ctrl, match)
|
||||
result.updated += 1
|
||||
else:
|
||||
logger.debug(
|
||||
"DRY RUN: Would update %s with article=%s paragraph=%s (method=%s)",
|
||||
ctrl["control_id"], match.article, match.paragraph, match.method,
|
||||
)
|
||||
else:
|
||||
result.unmatched += 1
|
||||
|
||||
except Exception as e:
|
||||
error_msg = f"Error backfilling {ctrl.get('control_id', '?')}: {e}"
|
||||
logger.error(error_msg)
|
||||
result.errors.append(error_msg)
|
||||
|
||||
if not dry_run:
|
||||
try:
|
||||
self.db.commit()
|
||||
except Exception as e:
|
||||
logger.error("Backfill commit failed: %s", e)
|
||||
result.errors.append(f"Commit failed: {e}")
|
||||
|
||||
logger.info(
|
||||
"Backfill complete: %d total, hash=%d regex=%d llm=%d unmatched=%d updated=%d",
|
||||
result.total_controls, result.matched_hash, result.matched_regex,
|
||||
result.matched_llm, result.unmatched, result.updated,
|
||||
)
|
||||
return result
|
||||
|
||||
def _load_controls_needing_backfill(self, limit: int = 0) -> list[dict]:
|
||||
"""Load controls where source_citation exists but lacks separate 'article' key."""
|
||||
query = """
|
||||
SELECT id, control_id, source_citation, source_original_text,
|
||||
generation_metadata, license_rule
|
||||
FROM canonical_controls
|
||||
WHERE license_rule IN (1, 2)
|
||||
AND source_citation IS NOT NULL
|
||||
AND (
|
||||
source_citation->>'article' IS NULL
|
||||
OR source_citation->>'article' = ''
|
||||
)
|
||||
ORDER BY control_id
|
||||
"""
|
||||
if limit > 0:
|
||||
query += f" LIMIT {limit}"
|
||||
|
||||
result = self.db.execute(text(query))
|
||||
cols = result.keys()
|
||||
controls = []
|
||||
for row in result:
|
||||
ctrl = dict(zip(cols, row))
|
||||
ctrl["id"] = str(ctrl["id"])
|
||||
# Parse JSON fields
|
||||
for jf in ("source_citation", "generation_metadata"):
|
||||
if isinstance(ctrl.get(jf), str):
|
||||
try:
|
||||
ctrl[jf] = json.loads(ctrl[jf])
|
||||
except (json.JSONDecodeError, TypeError):
|
||||
ctrl[jf] = {}
|
||||
controls.append(ctrl)
|
||||
return controls
|
||||
|
||||
async def _build_rag_index_targeted(self, controls: list[dict]):
|
||||
"""Build RAG index by scrolling only collections relevant to our controls.
|
||||
|
||||
Uses regulation codes from generation_metadata to identify which collections
|
||||
to search, falling back to all collections only if needed.
|
||||
"""
|
||||
# Determine which collections are relevant based on regulation codes
|
||||
regulation_to_collection = self._map_regulations_to_collections(controls)
|
||||
collections_to_search = set(regulation_to_collection.values()) or set(ALL_COLLECTIONS)
|
||||
|
||||
logger.info("Targeted index: searching %d collections: %s",
|
||||
len(collections_to_search), ", ".join(collections_to_search))
|
||||
|
||||
for collection in collections_to_search:
|
||||
offset = None
|
||||
page = 0
|
||||
seen_offsets: set[str] = set()
|
||||
while True:
|
||||
chunks, next_offset = await self.rag.scroll(
|
||||
collection=collection, offset=offset, limit=200,
|
||||
)
|
||||
if not chunks:
|
||||
break
|
||||
for chunk in chunks:
|
||||
if chunk.text and len(chunk.text.strip()) >= 50:
|
||||
h = hashlib.sha256(chunk.text.encode()).hexdigest()
|
||||
self._rag_index[h] = chunk
|
||||
page += 1
|
||||
if page % 50 == 0:
|
||||
logger.info("Indexing %s: page %d (%d chunks so far)",
|
||||
collection, page, len(self._rag_index))
|
||||
if not next_offset:
|
||||
break
|
||||
if next_offset in seen_offsets:
|
||||
logger.warning("Scroll loop in %s at page %d — stopping", collection, page)
|
||||
break
|
||||
seen_offsets.add(next_offset)
|
||||
offset = next_offset
|
||||
|
||||
logger.info("Indexed collection %s: %d pages", collection, page)
|
||||
|
||||
def _map_regulations_to_collections(self, controls: list[dict]) -> dict[str, str]:
|
||||
"""Map regulation codes from controls to likely Qdrant collections."""
|
||||
# Heuristic: regulation code prefix → collection
|
||||
collection_map = {
|
||||
"eu_": "bp_compliance_gesetze",
|
||||
"dsgvo": "bp_compliance_datenschutz",
|
||||
"bdsg": "bp_compliance_gesetze",
|
||||
"ttdsg": "bp_compliance_gesetze",
|
||||
"nist_": "bp_compliance_ce",
|
||||
"owasp": "bp_compliance_ce",
|
||||
"bsi_": "bp_compliance_ce",
|
||||
"enisa": "bp_compliance_ce",
|
||||
"at_": "bp_compliance_recht",
|
||||
"fr_": "bp_compliance_recht",
|
||||
"es_": "bp_compliance_recht",
|
||||
}
|
||||
result: dict[str, str] = {}
|
||||
for ctrl in controls:
|
||||
meta = ctrl.get("generation_metadata") or {}
|
||||
reg = meta.get("source_regulation", "")
|
||||
if not reg:
|
||||
continue
|
||||
for prefix, coll in collection_map.items():
|
||||
if reg.startswith(prefix):
|
||||
result[reg] = coll
|
||||
break
|
||||
else:
|
||||
# Unknown regulation — search all
|
||||
for coll in ALL_COLLECTIONS:
|
||||
result[f"_all_{coll}"] = coll
|
||||
return result
|
||||
|
||||
async def _match_control(self, ctrl: dict) -> Optional[MatchResult]:
|
||||
"""3-tier matching: hash → regex → LLM."""
|
||||
|
||||
# Tier 1: Hash match against RAG index
|
||||
source_text = ctrl.get("source_original_text")
|
||||
if source_text:
|
||||
h = hashlib.sha256(source_text.encode()).hexdigest()
|
||||
chunk = self._rag_index.get(h)
|
||||
if chunk and (chunk.article or chunk.paragraph):
|
||||
return MatchResult(
|
||||
article=chunk.article or "",
|
||||
paragraph=chunk.paragraph or "",
|
||||
method="hash",
|
||||
)
|
||||
|
||||
# Tier 2: Regex parse concatenated source
|
||||
citation = ctrl.get("source_citation") or {}
|
||||
source_str = citation.get("source", "")
|
||||
parsed = _parse_concatenated_source(source_str)
|
||||
if parsed and parsed["article"]:
|
||||
return MatchResult(
|
||||
article=parsed["article"],
|
||||
paragraph="", # Regex can't extract paragraph from concatenated format
|
||||
method="regex",
|
||||
)
|
||||
|
||||
# Tier 3: Ollama LLM
|
||||
if source_text:
|
||||
return await self._llm_match(ctrl)
|
||||
|
||||
return None
|
||||
|
||||
async def _llm_match(self, ctrl: dict) -> Optional[MatchResult]:
|
||||
"""Use Ollama to identify article/paragraph from source text."""
|
||||
citation = ctrl.get("source_citation") or {}
|
||||
regulation_name = citation.get("source", "")
|
||||
metadata = ctrl.get("generation_metadata") or {}
|
||||
regulation_code = metadata.get("source_regulation", "")
|
||||
source_text = ctrl.get("source_original_text", "")
|
||||
|
||||
prompt = f"""Analysiere den folgenden Gesetzestext und bestimme den genauen Artikel und Absatz.
|
||||
|
||||
Gesetz: {regulation_name} (Code: {regulation_code})
|
||||
|
||||
Text:
|
||||
---
|
||||
{source_text[:2000]}
|
||||
---
|
||||
|
||||
Antworte NUR mit JSON:
|
||||
{{"article": "Art. XX", "paragraph": "Abs. Y"}}
|
||||
|
||||
Falls kein spezifischer Absatz erkennbar ist, setze paragraph auf "".
|
||||
Falls kein Artikel erkennbar ist, setze article auf "".
|
||||
Bei deutschen Gesetzen mit § verwende: "§ XX" statt "Art. XX"."""
|
||||
|
||||
try:
|
||||
raw = await _llm_ollama(prompt, BACKFILL_SYSTEM_PROMPT)
|
||||
data = _parse_json(raw)
|
||||
if data and (data.get("article") or data.get("paragraph")):
|
||||
return MatchResult(
|
||||
article=data.get("article", ""),
|
||||
paragraph=data.get("paragraph", ""),
|
||||
method="llm",
|
||||
)
|
||||
except Exception as e:
|
||||
logger.warning("LLM match failed for %s: %s", ctrl.get("control_id"), e)
|
||||
|
||||
return None
|
||||
|
||||
def _update_control(self, ctrl: dict, match: MatchResult):
|
||||
"""Update source_citation and generation_metadata in DB."""
|
||||
citation = ctrl.get("source_citation") or {}
|
||||
|
||||
# Clean the source name: remove concatenated article if present
|
||||
source_str = citation.get("source", "")
|
||||
parsed = _parse_concatenated_source(source_str)
|
||||
if parsed:
|
||||
citation["source"] = parsed["name"]
|
||||
|
||||
# Add separate article/paragraph fields
|
||||
citation["article"] = match.article
|
||||
citation["paragraph"] = match.paragraph
|
||||
|
||||
# Update generation_metadata
|
||||
metadata = ctrl.get("generation_metadata") or {}
|
||||
if match.article:
|
||||
metadata["source_article"] = match.article
|
||||
metadata["source_paragraph"] = match.paragraph
|
||||
metadata["backfill_method"] = match.method
|
||||
metadata["backfill_at"] = datetime.now(timezone.utc).isoformat()
|
||||
|
||||
self.db.execute(
|
||||
text("""
|
||||
UPDATE canonical_controls
|
||||
SET source_citation = :citation,
|
||||
generation_metadata = :metadata,
|
||||
updated_at = NOW()
|
||||
WHERE id = CAST(:id AS uuid)
|
||||
"""),
|
||||
{
|
||||
"id": ctrl["id"],
|
||||
"citation": json.dumps(citation),
|
||||
"metadata": json.dumps(metadata),
|
||||
},
|
||||
)
|
||||
|
||||
|
||||
def _parse_concatenated_source(source: str) -> Optional[dict]:
|
||||
"""Parse 'DSGVO Art. 35' → {name: 'DSGVO', article: 'Art. 35'}.
|
||||
|
||||
Also handles '§' format: 'BDSG § 42' → {name: 'BDSG', article: '§ 42'}.
|
||||
"""
|
||||
if not source:
|
||||
return None
|
||||
|
||||
# Try Art./Artikel pattern
|
||||
m = _SOURCE_ARTICLE_RE.match(source)
|
||||
if m:
|
||||
return {"name": m.group(1).strip(), "article": m.group(2).strip()}
|
||||
|
||||
# Try § pattern
|
||||
m2 = re.match(r"^(.+?)\s+(§\s*\d+.*)$", source)
|
||||
if m2:
|
||||
return {"name": m2.group(1).strip(), "article": m2.group(2).strip()}
|
||||
|
||||
return None
|
||||
|
||||
|
||||
async def _llm_ollama(prompt: str, system_prompt: Optional[str] = None) -> str:
|
||||
"""Call Ollama chat API for backfill matching."""
|
||||
messages = []
|
||||
if system_prompt:
|
||||
messages.append({"role": "system", "content": system_prompt})
|
||||
messages.append({"role": "user", "content": prompt})
|
||||
|
||||
payload = {
|
||||
"model": OLLAMA_MODEL,
|
||||
"messages": messages,
|
||||
"stream": False,
|
||||
"format": "json",
|
||||
"options": {"num_predict": 256},
|
||||
"think": False,
|
||||
}
|
||||
|
||||
try:
|
||||
async with httpx.AsyncClient(timeout=LLM_TIMEOUT) as client:
|
||||
resp = await client.post(f"{OLLAMA_URL}/api/chat", json=payload)
|
||||
if resp.status_code != 200:
|
||||
logger.error("Ollama backfill failed %d: %s", resp.status_code, resp.text[:300])
|
||||
return ""
|
||||
data = resp.json()
|
||||
msg = data.get("message", {})
|
||||
if isinstance(msg, dict):
|
||||
return msg.get("content", "")
|
||||
return data.get("response", str(msg))
|
||||
except Exception as e:
|
||||
logger.error("Ollama backfill request failed: %s", e)
|
||||
return ""
|
||||
|
||||
|
||||
def _parse_json(raw: str) -> Optional[dict]:
|
||||
"""Extract JSON object from LLM output."""
|
||||
if not raw:
|
||||
return None
|
||||
# Try direct parse
|
||||
try:
|
||||
return json.loads(raw)
|
||||
except json.JSONDecodeError:
|
||||
pass
|
||||
# Try extracting from markdown code block
|
||||
m = re.search(r"```(?:json)?\s*(\{.*?\})\s*```", raw, re.DOTALL)
|
||||
if m:
|
||||
try:
|
||||
return json.loads(m.group(1))
|
||||
except json.JSONDecodeError:
|
||||
pass
|
||||
# Try finding first { ... }
|
||||
m = re.search(r"\{[^{}]*\}", raw)
|
||||
if m:
|
||||
try:
|
||||
return json.loads(m.group(0))
|
||||
except json.JSONDecodeError:
|
||||
pass
|
||||
return None
|
||||
@@ -0,0 +1,546 @@
|
||||
"""Control Composer — Pattern + Obligation → Master Control.
|
||||
|
||||
Takes an obligation (from ObligationExtractor) and a matched control pattern
|
||||
(from PatternMatcher), then uses LLM to compose a structured, actionable
|
||||
Master Control. Replaces the old Stage 3 (STRUCTURE/REFORM) with a
|
||||
pattern-guided approach.
|
||||
|
||||
Three composition modes based on license rules:
|
||||
Rule 1: Obligation + Pattern + original text → full control
|
||||
Rule 2: Obligation + Pattern + original text + citation → control
|
||||
Rule 3: Obligation + Pattern (NO original text) → reformulated control
|
||||
|
||||
Fallback: No pattern match → basic generation (tagged needs_pattern_assignment)
|
||||
|
||||
Part of the Multi-Layer Control Architecture (Phase 6 of 8).
|
||||
"""
|
||||
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
from dataclasses import dataclass, field
|
||||
from typing import Optional
|
||||
|
||||
from compliance.services.obligation_extractor import (
|
||||
ObligationMatch,
|
||||
_llm_ollama,
|
||||
_parse_json,
|
||||
)
|
||||
from compliance.services.pattern_matcher import (
|
||||
ControlPattern,
|
||||
PatternMatchResult,
|
||||
)
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
OLLAMA_MODEL = os.getenv("CONTROL_GEN_OLLAMA_MODEL", "qwen3.5:35b-a3b")
|
||||
|
||||
# Valid values for generated control fields
|
||||
VALID_SEVERITIES = {"low", "medium", "high", "critical"}
|
||||
VALID_EFFORTS = {"s", "m", "l", "xl"}
|
||||
VALID_VERIFICATION = {"code_review", "document", "tool", "hybrid"}
|
||||
|
||||
|
||||
@dataclass
|
||||
class ComposedControl:
|
||||
"""A Master Control composed from an obligation + pattern."""
|
||||
|
||||
# Core fields (match canonical_controls schema)
|
||||
control_id: str = ""
|
||||
title: str = ""
|
||||
objective: str = ""
|
||||
rationale: str = ""
|
||||
scope: dict = field(default_factory=dict)
|
||||
requirements: list = field(default_factory=list)
|
||||
test_procedure: list = field(default_factory=list)
|
||||
evidence: list = field(default_factory=list)
|
||||
severity: str = "medium"
|
||||
risk_score: float = 5.0
|
||||
implementation_effort: str = "m"
|
||||
open_anchors: list = field(default_factory=list)
|
||||
release_state: str = "draft"
|
||||
tags: list = field(default_factory=list)
|
||||
# 3-Rule License fields
|
||||
license_rule: Optional[int] = None
|
||||
source_original_text: Optional[str] = None
|
||||
source_citation: Optional[dict] = None
|
||||
customer_visible: bool = True
|
||||
# Classification
|
||||
verification_method: Optional[str] = None
|
||||
category: Optional[str] = None
|
||||
target_audience: Optional[list] = None
|
||||
# Pattern + Obligation linkage
|
||||
pattern_id: Optional[str] = None
|
||||
obligation_ids: list = field(default_factory=list)
|
||||
# Metadata
|
||||
generation_metadata: dict = field(default_factory=dict)
|
||||
composition_method: str = "pattern_guided" # pattern_guided | fallback
|
||||
|
||||
def to_dict(self) -> dict:
|
||||
"""Serialize for DB storage or API response."""
|
||||
return {
|
||||
"control_id": self.control_id,
|
||||
"title": self.title,
|
||||
"objective": self.objective,
|
||||
"rationale": self.rationale,
|
||||
"scope": self.scope,
|
||||
"requirements": self.requirements,
|
||||
"test_procedure": self.test_procedure,
|
||||
"evidence": self.evidence,
|
||||
"severity": self.severity,
|
||||
"risk_score": self.risk_score,
|
||||
"implementation_effort": self.implementation_effort,
|
||||
"open_anchors": self.open_anchors,
|
||||
"release_state": self.release_state,
|
||||
"tags": self.tags,
|
||||
"license_rule": self.license_rule,
|
||||
"source_original_text": self.source_original_text,
|
||||
"source_citation": self.source_citation,
|
||||
"customer_visible": self.customer_visible,
|
||||
"verification_method": self.verification_method,
|
||||
"category": self.category,
|
||||
"target_audience": self.target_audience,
|
||||
"pattern_id": self.pattern_id,
|
||||
"obligation_ids": self.obligation_ids,
|
||||
"generation_metadata": self.generation_metadata,
|
||||
"composition_method": self.composition_method,
|
||||
}
|
||||
|
||||
|
||||
class ControlComposer:
|
||||
"""Composes Master Controls from obligations + patterns.
|
||||
|
||||
Usage::
|
||||
|
||||
composer = ControlComposer()
|
||||
|
||||
control = await composer.compose(
|
||||
obligation=obligation_match,
|
||||
pattern_result=pattern_match_result,
|
||||
chunk_text="...",
|
||||
license_rule=1,
|
||||
source_citation={...},
|
||||
)
|
||||
"""
|
||||
|
||||
async def compose(
|
||||
self,
|
||||
obligation: ObligationMatch,
|
||||
pattern_result: PatternMatchResult,
|
||||
chunk_text: Optional[str] = None,
|
||||
license_rule: int = 3,
|
||||
source_citation: Optional[dict] = None,
|
||||
regulation_code: Optional[str] = None,
|
||||
) -> ComposedControl:
|
||||
"""Compose a Master Control from obligation + pattern.
|
||||
|
||||
Args:
|
||||
obligation: The extracted obligation (from ObligationExtractor).
|
||||
pattern_result: The matched pattern (from PatternMatcher).
|
||||
chunk_text: Original RAG chunk text (only used for Rules 1-2).
|
||||
license_rule: 1=free, 2=citation, 3=restricted.
|
||||
source_citation: Citation metadata for Rule 2.
|
||||
regulation_code: Source regulation code.
|
||||
|
||||
Returns:
|
||||
ComposedControl ready for storage.
|
||||
"""
|
||||
pattern = pattern_result.pattern if pattern_result else None
|
||||
|
||||
if pattern:
|
||||
control = await self._compose_with_pattern(
|
||||
obligation, pattern, chunk_text, license_rule, source_citation,
|
||||
)
|
||||
else:
|
||||
control = await self._compose_fallback(
|
||||
obligation, chunk_text, license_rule, source_citation,
|
||||
)
|
||||
|
||||
# Set linkage fields
|
||||
control.pattern_id = pattern.id if pattern else None
|
||||
if obligation.obligation_id:
|
||||
control.obligation_ids = [obligation.obligation_id]
|
||||
|
||||
# Set license fields
|
||||
control.license_rule = license_rule
|
||||
if license_rule in (1, 2) and chunk_text:
|
||||
control.source_original_text = chunk_text
|
||||
if license_rule == 2 and source_citation:
|
||||
control.source_citation = source_citation
|
||||
if license_rule == 3:
|
||||
control.customer_visible = False
|
||||
control.source_original_text = None
|
||||
control.source_citation = None
|
||||
|
||||
# Build metadata
|
||||
control.generation_metadata = {
|
||||
"composition_method": control.composition_method,
|
||||
"pattern_id": control.pattern_id,
|
||||
"pattern_confidence": round(pattern_result.confidence, 3) if pattern_result else 0,
|
||||
"pattern_method": pattern_result.method if pattern_result else "none",
|
||||
"obligation_id": obligation.obligation_id,
|
||||
"obligation_method": obligation.method,
|
||||
"obligation_confidence": round(obligation.confidence, 3),
|
||||
"license_rule": license_rule,
|
||||
"regulation_code": regulation_code,
|
||||
}
|
||||
|
||||
# Validate and fix fields
|
||||
_validate_control(control)
|
||||
|
||||
return control
|
||||
|
||||
async def compose_batch(
|
||||
self,
|
||||
items: list[dict],
|
||||
) -> list[ComposedControl]:
|
||||
"""Compose multiple controls.
|
||||
|
||||
Args:
|
||||
items: List of dicts with keys: obligation, pattern_result,
|
||||
chunk_text, license_rule, source_citation, regulation_code.
|
||||
|
||||
Returns:
|
||||
List of ComposedControl instances.
|
||||
"""
|
||||
results = []
|
||||
for item in items:
|
||||
control = await self.compose(
|
||||
obligation=item["obligation"],
|
||||
pattern_result=item.get("pattern_result", PatternMatchResult()),
|
||||
chunk_text=item.get("chunk_text"),
|
||||
license_rule=item.get("license_rule", 3),
|
||||
source_citation=item.get("source_citation"),
|
||||
regulation_code=item.get("regulation_code"),
|
||||
)
|
||||
results.append(control)
|
||||
return results
|
||||
|
||||
# -----------------------------------------------------------------------
|
||||
# Pattern-guided composition
|
||||
# -----------------------------------------------------------------------
|
||||
|
||||
async def _compose_with_pattern(
|
||||
self,
|
||||
obligation: ObligationMatch,
|
||||
pattern: ControlPattern,
|
||||
chunk_text: Optional[str],
|
||||
license_rule: int,
|
||||
source_citation: Optional[dict],
|
||||
) -> ComposedControl:
|
||||
"""Use LLM to fill the pattern template with obligation-specific details."""
|
||||
prompt = _build_compose_prompt(obligation, pattern, chunk_text, license_rule)
|
||||
system_prompt = _compose_system_prompt(license_rule)
|
||||
|
||||
llm_result = await _llm_ollama(prompt, system_prompt)
|
||||
if not llm_result:
|
||||
return self._compose_from_template(obligation, pattern)
|
||||
|
||||
parsed = _parse_json(llm_result)
|
||||
if not parsed:
|
||||
return self._compose_from_template(obligation, pattern)
|
||||
|
||||
control = ComposedControl(
|
||||
title=parsed.get("title", pattern.name_de)[:255],
|
||||
objective=parsed.get("objective", pattern.objective_template),
|
||||
rationale=parsed.get("rationale", pattern.rationale_template),
|
||||
requirements=_ensure_list(parsed.get("requirements", pattern.requirements_template)),
|
||||
test_procedure=_ensure_list(parsed.get("test_procedure", pattern.test_procedure_template)),
|
||||
evidence=_ensure_list(parsed.get("evidence", pattern.evidence_template)),
|
||||
severity=parsed.get("severity", pattern.severity_default),
|
||||
implementation_effort=parsed.get("implementation_effort", pattern.implementation_effort_default),
|
||||
category=parsed.get("category", pattern.category),
|
||||
tags=_ensure_list(parsed.get("tags", pattern.tags)),
|
||||
target_audience=_ensure_list(parsed.get("target_audience", [])),
|
||||
verification_method=parsed.get("verification_method"),
|
||||
open_anchors=_anchors_from_pattern(pattern),
|
||||
composition_method="pattern_guided",
|
||||
)
|
||||
|
||||
return control
|
||||
|
||||
def _compose_from_template(
|
||||
self,
|
||||
obligation: ObligationMatch,
|
||||
pattern: ControlPattern,
|
||||
) -> ComposedControl:
|
||||
"""Fallback: fill template directly without LLM (when LLM fails)."""
|
||||
obl_title = obligation.obligation_title or ""
|
||||
obl_text = obligation.obligation_text or ""
|
||||
|
||||
title = f"{pattern.name_de}"
|
||||
if obl_title:
|
||||
title = f"{pattern.name_de} — {obl_title}"
|
||||
|
||||
objective = pattern.objective_template
|
||||
if obl_text and len(obl_text) > 20:
|
||||
objective = f"{pattern.objective_template} Bezug: {obl_text[:200]}"
|
||||
|
||||
return ComposedControl(
|
||||
title=title[:255],
|
||||
objective=objective,
|
||||
rationale=pattern.rationale_template,
|
||||
requirements=list(pattern.requirements_template),
|
||||
test_procedure=list(pattern.test_procedure_template),
|
||||
evidence=list(pattern.evidence_template),
|
||||
severity=pattern.severity_default,
|
||||
implementation_effort=pattern.implementation_effort_default,
|
||||
category=pattern.category,
|
||||
tags=list(pattern.tags),
|
||||
open_anchors=_anchors_from_pattern(pattern),
|
||||
composition_method="template_only",
|
||||
)
|
||||
|
||||
# -----------------------------------------------------------------------
|
||||
# Fallback (no pattern)
|
||||
# -----------------------------------------------------------------------
|
||||
|
||||
async def _compose_fallback(
|
||||
self,
|
||||
obligation: ObligationMatch,
|
||||
chunk_text: Optional[str],
|
||||
license_rule: int,
|
||||
source_citation: Optional[dict],
|
||||
) -> ComposedControl:
|
||||
"""Generate a control without a pattern template (old-style)."""
|
||||
prompt = _build_fallback_prompt(obligation, chunk_text, license_rule)
|
||||
system_prompt = _compose_system_prompt(license_rule)
|
||||
|
||||
llm_result = await _llm_ollama(prompt, system_prompt)
|
||||
parsed = _parse_json(llm_result) if llm_result else {}
|
||||
|
||||
obl_text = obligation.obligation_text or ""
|
||||
|
||||
control = ComposedControl(
|
||||
title=parsed.get("title", obl_text[:100] if obl_text else "Untitled Control")[:255],
|
||||
objective=parsed.get("objective", obl_text[:500]),
|
||||
rationale=parsed.get("rationale", "Aus gesetzlicher Pflicht abgeleitet."),
|
||||
requirements=_ensure_list(parsed.get("requirements", [])),
|
||||
test_procedure=_ensure_list(parsed.get("test_procedure", [])),
|
||||
evidence=_ensure_list(parsed.get("evidence", [])),
|
||||
severity=parsed.get("severity", "medium"),
|
||||
implementation_effort=parsed.get("implementation_effort", "m"),
|
||||
category=parsed.get("category"),
|
||||
tags=_ensure_list(parsed.get("tags", [])),
|
||||
target_audience=_ensure_list(parsed.get("target_audience", [])),
|
||||
verification_method=parsed.get("verification_method"),
|
||||
composition_method="fallback",
|
||||
release_state="needs_review",
|
||||
)
|
||||
|
||||
return control
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Prompt builders
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def _compose_system_prompt(license_rule: int) -> str:
|
||||
"""Build the system prompt based on license rule."""
|
||||
if license_rule == 3:
|
||||
return (
|
||||
"Du bist ein Security-Compliance-Experte. Deine Aufgabe ist es, "
|
||||
"eigenstaendige Security Controls zu formulieren. "
|
||||
"Du formulierst IMMER in eigenen Worten. "
|
||||
"KOPIERE KEINE Saetze aus dem Quelltext. "
|
||||
"Verwende eigene Begriffe und Struktur. "
|
||||
"NENNE NICHT die Quelle. Keine proprietaeren Bezeichner. "
|
||||
"Antworte NUR mit validem JSON."
|
||||
)
|
||||
return (
|
||||
"Du bist ein Security-Compliance-Experte. "
|
||||
"Erstelle ein praxisorientiertes, umsetzbares Security Control. "
|
||||
"Antworte NUR mit validem JSON."
|
||||
)
|
||||
|
||||
|
||||
def _build_compose_prompt(
|
||||
obligation: ObligationMatch,
|
||||
pattern: ControlPattern,
|
||||
chunk_text: Optional[str],
|
||||
license_rule: int,
|
||||
) -> str:
|
||||
"""Build the LLM prompt for pattern-guided composition."""
|
||||
obl_section = _obligation_section(obligation)
|
||||
pattern_section = _pattern_section(pattern)
|
||||
|
||||
if license_rule == 3:
|
||||
context_section = "KONTEXT: Intern analysiert (keine Quellenangabe)."
|
||||
elif chunk_text:
|
||||
context_section = f"KONTEXT (Originaltext):\n{chunk_text[:2000]}"
|
||||
else:
|
||||
context_section = "KONTEXT: Kein Originaltext verfuegbar."
|
||||
|
||||
return f"""Erstelle ein PRAXISORIENTIERTES Security Control.
|
||||
|
||||
{obl_section}
|
||||
|
||||
{pattern_section}
|
||||
|
||||
{context_section}
|
||||
|
||||
AUFGABE:
|
||||
Fuelle das Muster mit pflicht-spezifischen Details.
|
||||
Das Ergebnis muss UMSETZBAR sein — keine Gesetzesparaphrase.
|
||||
Formuliere konkret und handlungsorientiert.
|
||||
|
||||
Antworte als JSON:
|
||||
{{
|
||||
"title": "Kurzer praegnanter Titel (max 100 Zeichen, deutsch)",
|
||||
"objective": "Was soll erreicht werden? (1-3 Saetze)",
|
||||
"rationale": "Warum ist das wichtig? (1-2 Saetze)",
|
||||
"requirements": ["Konkrete Anforderung 1", "Anforderung 2", ...],
|
||||
"test_procedure": ["Pruefschritt 1", "Pruefschritt 2", ...],
|
||||
"evidence": ["Nachweis 1", "Nachweis 2", ...],
|
||||
"severity": "low|medium|high|critical",
|
||||
"implementation_effort": "s|m|l|xl",
|
||||
"category": "{pattern.category}",
|
||||
"tags": ["tag1", "tag2"],
|
||||
"target_audience": ["unternehmen", "behoerden", "entwickler"],
|
||||
"verification_method": "code_review|document|tool|hybrid"
|
||||
}}"""
|
||||
|
||||
|
||||
def _build_fallback_prompt(
|
||||
obligation: ObligationMatch,
|
||||
chunk_text: Optional[str],
|
||||
license_rule: int,
|
||||
) -> str:
|
||||
"""Build the LLM prompt for fallback composition (no pattern)."""
|
||||
obl_section = _obligation_section(obligation)
|
||||
|
||||
if license_rule == 3:
|
||||
context_section = "KONTEXT: Intern analysiert (keine Quellenangabe)."
|
||||
elif chunk_text:
|
||||
context_section = f"KONTEXT (Originaltext):\n{chunk_text[:2000]}"
|
||||
else:
|
||||
context_section = "KONTEXT: Kein Originaltext verfuegbar."
|
||||
|
||||
return f"""Erstelle ein Security Control aus der folgenden Pflicht.
|
||||
|
||||
{obl_section}
|
||||
|
||||
{context_section}
|
||||
|
||||
AUFGABE:
|
||||
Formuliere ein umsetzbares Security Control.
|
||||
Keine Gesetzesparaphrase — konkrete Massnahmen beschreiben.
|
||||
|
||||
Antworte als JSON:
|
||||
{{
|
||||
"title": "Kurzer praegnanter Titel (max 100 Zeichen, deutsch)",
|
||||
"objective": "Was soll erreicht werden? (1-3 Saetze)",
|
||||
"rationale": "Warum ist das wichtig? (1-2 Saetze)",
|
||||
"requirements": ["Konkrete Anforderung 1", "Anforderung 2", ...],
|
||||
"test_procedure": ["Pruefschritt 1", "Pruefschritt 2", ...],
|
||||
"evidence": ["Nachweis 1", "Nachweis 2", ...],
|
||||
"severity": "low|medium|high|critical",
|
||||
"implementation_effort": "s|m|l|xl",
|
||||
"category": "one of: authentication, encryption, data_protection, etc.",
|
||||
"tags": ["tag1", "tag2"],
|
||||
"target_audience": ["unternehmen"],
|
||||
"verification_method": "code_review|document|tool|hybrid"
|
||||
}}"""
|
||||
|
||||
|
||||
def _obligation_section(obligation: ObligationMatch) -> str:
|
||||
"""Format the obligation for the prompt."""
|
||||
parts = ["PFLICHT (was das Gesetz verlangt):"]
|
||||
if obligation.obligation_title:
|
||||
parts.append(f" Titel: {obligation.obligation_title}")
|
||||
if obligation.obligation_text:
|
||||
parts.append(f" Beschreibung: {obligation.obligation_text[:500]}")
|
||||
if obligation.obligation_id:
|
||||
parts.append(f" ID: {obligation.obligation_id}")
|
||||
if obligation.regulation_id:
|
||||
parts.append(f" Rechtsgrundlage: {obligation.regulation_id}")
|
||||
if not obligation.obligation_text and not obligation.obligation_title:
|
||||
parts.append(" (Keine spezifische Pflicht extrahiert)")
|
||||
return "\n".join(parts)
|
||||
|
||||
|
||||
def _pattern_section(pattern: ControlPattern) -> str:
|
||||
"""Format the pattern for the prompt."""
|
||||
reqs = "\n ".join(f"- {r}" for r in pattern.requirements_template[:5])
|
||||
tests = "\n ".join(f"- {t}" for t in pattern.test_procedure_template[:3])
|
||||
return f"""MUSTER (wie man es typischerweise umsetzt):
|
||||
Pattern: {pattern.name_de} ({pattern.id})
|
||||
Domain: {pattern.domain}
|
||||
Ziel-Template: {pattern.objective_template}
|
||||
Anforderungs-Template:
|
||||
{reqs}
|
||||
Pruefverfahren-Template:
|
||||
{tests}"""
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Helpers
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def _ensure_list(value) -> list:
|
||||
"""Ensure a value is a list of strings."""
|
||||
if isinstance(value, list):
|
||||
return [str(v) for v in value if v]
|
||||
if isinstance(value, str):
|
||||
return [value]
|
||||
return []
|
||||
|
||||
|
||||
def _anchors_from_pattern(pattern: ControlPattern) -> list:
|
||||
"""Convert pattern's open_anchor_refs to control anchor format."""
|
||||
anchors = []
|
||||
for ref in pattern.open_anchor_refs:
|
||||
anchors.append({
|
||||
"framework": ref.get("framework", ""),
|
||||
"control_id": ref.get("ref", ""),
|
||||
"title": "",
|
||||
"alignment_score": 0.8,
|
||||
})
|
||||
return anchors
|
||||
|
||||
|
||||
def _validate_control(control: ComposedControl) -> None:
|
||||
"""Validate and fix control field values."""
|
||||
# Severity
|
||||
if control.severity not in VALID_SEVERITIES:
|
||||
control.severity = "medium"
|
||||
|
||||
# Implementation effort
|
||||
if control.implementation_effort not in VALID_EFFORTS:
|
||||
control.implementation_effort = "m"
|
||||
|
||||
# Verification method
|
||||
if control.verification_method and control.verification_method not in VALID_VERIFICATION:
|
||||
control.verification_method = None
|
||||
|
||||
# Risk score
|
||||
if not (0 <= control.risk_score <= 10):
|
||||
control.risk_score = _severity_to_risk(control.severity)
|
||||
|
||||
# Title length
|
||||
if len(control.title) > 255:
|
||||
control.title = control.title[:252] + "..."
|
||||
|
||||
# Ensure minimum content
|
||||
if not control.objective:
|
||||
control.objective = control.title
|
||||
if not control.rationale:
|
||||
control.rationale = "Aus regulatorischer Anforderung abgeleitet."
|
||||
if not control.requirements:
|
||||
control.requirements = ["Anforderung gemaess Pflichtbeschreibung umsetzen"]
|
||||
if not control.test_procedure:
|
||||
control.test_procedure = ["Umsetzung der Anforderungen pruefen"]
|
||||
if not control.evidence:
|
||||
control.evidence = ["Dokumentation der Umsetzung"]
|
||||
|
||||
|
||||
def _severity_to_risk(severity: str) -> float:
|
||||
"""Map severity to a default risk score."""
|
||||
return {
|
||||
"critical": 9.0,
|
||||
"high": 7.0,
|
||||
"medium": 5.0,
|
||||
"low": 3.0,
|
||||
}.get(severity, 5.0)
|
||||
@@ -0,0 +1,745 @@
|
||||
"""Control Deduplication Engine — 4-Stage Matching Pipeline.
|
||||
|
||||
Prevents duplicate atomic controls during Pass 0b by checking candidates
|
||||
against existing controls before insertion.
|
||||
|
||||
Stages:
|
||||
1. Pattern-Gate: pattern_id must match (hard gate)
|
||||
2. Action-Check: normalized action verb must match (hard gate)
|
||||
3. Object-Norm: normalized object must match (soft gate with high threshold)
|
||||
4. Embedding: cosine similarity with tiered thresholds (Qdrant)
|
||||
|
||||
Verdicts:
|
||||
- NEW: create a new atomic control
|
||||
- LINK: add parent link to existing control (similarity > LINK_THRESHOLD)
|
||||
- REVIEW: queue for human review (REVIEW_THRESHOLD < sim < LINK_THRESHOLD)
|
||||
"""
|
||||
|
||||
import logging
|
||||
import os
|
||||
import re
|
||||
from dataclasses import dataclass, field
|
||||
from typing import Optional, Callable, Awaitable
|
||||
|
||||
import httpx
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# ── Configuration ────────────────────────────────────────────────────
|
||||
|
||||
DEDUP_ENABLED = os.getenv("DEDUP_ENABLED", "true").lower() == "true"
|
||||
LINK_THRESHOLD = float(os.getenv("DEDUP_LINK_THRESHOLD", "0.92"))
|
||||
REVIEW_THRESHOLD = float(os.getenv("DEDUP_REVIEW_THRESHOLD", "0.85"))
|
||||
LINK_THRESHOLD_DIFF_OBJECT = float(os.getenv("DEDUP_LINK_THRESHOLD_DIFF_OBJ", "0.95"))
|
||||
CROSS_REG_LINK_THRESHOLD = float(os.getenv("DEDUP_CROSS_REG_THRESHOLD", "0.95"))
|
||||
QDRANT_COLLECTION = os.getenv("DEDUP_QDRANT_COLLECTION", "atomic_controls")
|
||||
QDRANT_URL = os.getenv("QDRANT_URL", "http://host.docker.internal:6333")
|
||||
EMBEDDING_URL = os.getenv("EMBEDDING_URL", "http://embedding-service:8087")
|
||||
|
||||
|
||||
# ── Result Dataclass ─────────────────────────────────────────────────
|
||||
|
||||
@dataclass
|
||||
class DedupResult:
|
||||
"""Outcome of the dedup check."""
|
||||
verdict: str # "new" | "link" | "review"
|
||||
matched_control_uuid: Optional[str] = None
|
||||
matched_control_id: Optional[str] = None
|
||||
matched_title: Optional[str] = None
|
||||
stage: str = "" # which stage decided
|
||||
similarity_score: float = 0.0
|
||||
link_type: str = "dedup_merge" # "dedup_merge" | "cross_regulation"
|
||||
details: dict = field(default_factory=dict)
|
||||
|
||||
|
||||
# ── Action Normalization ─────────────────────────────────────────────
|
||||
|
||||
_ACTION_SYNONYMS: dict[str, str] = {
|
||||
# German → canonical English
|
||||
"implementieren": "implement",
|
||||
"umsetzen": "implement",
|
||||
"einrichten": "implement",
|
||||
"einführen": "implement",
|
||||
"aufbauen": "implement",
|
||||
"bereitstellen": "implement",
|
||||
"aktivieren": "implement",
|
||||
"konfigurieren": "configure",
|
||||
"einstellen": "configure",
|
||||
"parametrieren": "configure",
|
||||
"testen": "test",
|
||||
"prüfen": "test",
|
||||
"überprüfen": "test",
|
||||
"verifizieren": "test",
|
||||
"validieren": "test",
|
||||
"kontrollieren": "test",
|
||||
"auditieren": "audit",
|
||||
"dokumentieren": "document",
|
||||
"protokollieren": "log",
|
||||
"aufzeichnen": "log",
|
||||
"loggen": "log",
|
||||
"überwachen": "monitor",
|
||||
"monitoring": "monitor",
|
||||
"beobachten": "monitor",
|
||||
"schulen": "train",
|
||||
"trainieren": "train",
|
||||
"sensibilisieren": "train",
|
||||
"löschen": "delete",
|
||||
"entfernen": "delete",
|
||||
"verschlüsseln": "encrypt",
|
||||
"sperren": "block",
|
||||
"beschränken": "restrict",
|
||||
"einschränken": "restrict",
|
||||
"begrenzen": "restrict",
|
||||
"autorisieren": "authorize",
|
||||
"genehmigen": "authorize",
|
||||
"freigeben": "authorize",
|
||||
"authentifizieren": "authenticate",
|
||||
"identifizieren": "identify",
|
||||
"melden": "report",
|
||||
"benachrichtigen": "notify",
|
||||
"informieren": "notify",
|
||||
"aktualisieren": "update",
|
||||
"erneuern": "update",
|
||||
"sichern": "backup",
|
||||
"wiederherstellen": "restore",
|
||||
# English passthrough
|
||||
"implement": "implement",
|
||||
"configure": "configure",
|
||||
"test": "test",
|
||||
"verify": "test",
|
||||
"validate": "test",
|
||||
"audit": "audit",
|
||||
"document": "document",
|
||||
"log": "log",
|
||||
"monitor": "monitor",
|
||||
"train": "train",
|
||||
"delete": "delete",
|
||||
"encrypt": "encrypt",
|
||||
"restrict": "restrict",
|
||||
"authorize": "authorize",
|
||||
"authenticate": "authenticate",
|
||||
"report": "report",
|
||||
"update": "update",
|
||||
"backup": "backup",
|
||||
"restore": "restore",
|
||||
}
|
||||
|
||||
|
||||
def normalize_action(action: str) -> str:
|
||||
"""Normalize an action verb to a canonical English form."""
|
||||
if not action:
|
||||
return ""
|
||||
action = action.strip().lower()
|
||||
# Strip German infinitive/conjugation suffixes for lookup
|
||||
action_base = re.sub(r"(en|t|st|e|te|tet|end)$", "", action)
|
||||
# Try exact match first, then base form
|
||||
if action in _ACTION_SYNONYMS:
|
||||
return _ACTION_SYNONYMS[action]
|
||||
if action_base in _ACTION_SYNONYMS:
|
||||
return _ACTION_SYNONYMS[action_base]
|
||||
# Fuzzy: check if action starts with any known verb
|
||||
for verb, canonical in _ACTION_SYNONYMS.items():
|
||||
if action.startswith(verb) or verb.startswith(action):
|
||||
return canonical
|
||||
return action # fallback: return as-is
|
||||
|
||||
|
||||
# ── Object Normalization ─────────────────────────────────────────────
|
||||
|
||||
_OBJECT_SYNONYMS: dict[str, str] = {
|
||||
# Authentication / Access
|
||||
"mfa": "multi_factor_auth",
|
||||
"multi-faktor-authentifizierung": "multi_factor_auth",
|
||||
"mehrfaktorauthentifizierung": "multi_factor_auth",
|
||||
"multi-factor authentication": "multi_factor_auth",
|
||||
"two-factor": "multi_factor_auth",
|
||||
"2fa": "multi_factor_auth",
|
||||
"passwort": "password_policy",
|
||||
"kennwort": "password_policy",
|
||||
"password": "password_policy",
|
||||
"zugangsdaten": "credentials",
|
||||
"credentials": "credentials",
|
||||
"admin-konten": "privileged_access",
|
||||
"admin accounts": "privileged_access",
|
||||
"administratorkonten": "privileged_access",
|
||||
"privilegierte zugriffe": "privileged_access",
|
||||
"privileged accounts": "privileged_access",
|
||||
"remote-zugriff": "remote_access",
|
||||
"fernzugriff": "remote_access",
|
||||
"remote access": "remote_access",
|
||||
"session": "session_management",
|
||||
"sitzung": "session_management",
|
||||
"sitzungsverwaltung": "session_management",
|
||||
# Encryption
|
||||
"verschlüsselung": "encryption",
|
||||
"encryption": "encryption",
|
||||
"kryptografie": "encryption",
|
||||
"kryptografische verfahren": "encryption",
|
||||
"schlüssel": "key_management",
|
||||
"key management": "key_management",
|
||||
"schlüsselverwaltung": "key_management",
|
||||
"zertifikat": "certificate_management",
|
||||
"certificate": "certificate_management",
|
||||
"tls": "transport_encryption",
|
||||
"ssl": "transport_encryption",
|
||||
"https": "transport_encryption",
|
||||
# Network
|
||||
"firewall": "firewall",
|
||||
"netzwerk": "network_security",
|
||||
"network": "network_security",
|
||||
"vpn": "vpn",
|
||||
"segmentierung": "network_segmentation",
|
||||
"segmentation": "network_segmentation",
|
||||
# Logging / Monitoring
|
||||
"audit-log": "audit_logging",
|
||||
"audit log": "audit_logging",
|
||||
"protokoll": "audit_logging",
|
||||
"logging": "audit_logging",
|
||||
"monitoring": "monitoring",
|
||||
"überwachung": "monitoring",
|
||||
"alerting": "alerting",
|
||||
"alarmierung": "alerting",
|
||||
"siem": "siem",
|
||||
# Data
|
||||
"personenbezogene daten": "personal_data",
|
||||
"personal data": "personal_data",
|
||||
"sensible daten": "sensitive_data",
|
||||
"sensitive data": "sensitive_data",
|
||||
"datensicherung": "backup",
|
||||
"backup": "backup",
|
||||
"wiederherstellung": "disaster_recovery",
|
||||
"disaster recovery": "disaster_recovery",
|
||||
# Policy / Process
|
||||
"richtlinie": "policy",
|
||||
"policy": "policy",
|
||||
"verfahrensanweisung": "procedure",
|
||||
"procedure": "procedure",
|
||||
"prozess": "process",
|
||||
"schulung": "training",
|
||||
"training": "training",
|
||||
"awareness": "awareness",
|
||||
"sensibilisierung": "awareness",
|
||||
# Incident
|
||||
"vorfall": "incident",
|
||||
"incident": "incident",
|
||||
"sicherheitsvorfall": "security_incident",
|
||||
"security incident": "security_incident",
|
||||
# Vulnerability
|
||||
"schwachstelle": "vulnerability",
|
||||
"vulnerability": "vulnerability",
|
||||
"patch": "patch_management",
|
||||
"update": "patch_management",
|
||||
"patching": "patch_management",
|
||||
}
|
||||
|
||||
# Precompile for substring matching (longest first)
|
||||
_OBJECT_KEYS_SORTED = sorted(_OBJECT_SYNONYMS.keys(), key=len, reverse=True)
|
||||
|
||||
|
||||
def normalize_object(obj: str) -> str:
|
||||
"""Normalize a compliance object to a canonical token."""
|
||||
if not obj:
|
||||
return ""
|
||||
obj_lower = obj.strip().lower()
|
||||
# Exact match
|
||||
if obj_lower in _OBJECT_SYNONYMS:
|
||||
return _OBJECT_SYNONYMS[obj_lower]
|
||||
# Substring match (longest first)
|
||||
for phrase in _OBJECT_KEYS_SORTED:
|
||||
if phrase in obj_lower:
|
||||
return _OBJECT_SYNONYMS[phrase]
|
||||
# Fallback: strip articles/prepositions, join with underscore
|
||||
cleaned = re.sub(r"\b(der|die|das|den|dem|des|ein|eine|eines|einem|einen"
|
||||
r"|für|von|zu|auf|in|an|bei|mit|nach|über|unter|the|a|an"
|
||||
r"|for|of|to|on|in|at|by|with)\b", "", obj_lower)
|
||||
tokens = [t for t in cleaned.split() if len(t) > 2]
|
||||
return "_".join(tokens[:4]) if tokens else obj_lower.replace(" ", "_")
|
||||
|
||||
|
||||
# ── Canonicalization ─────────────────────────────────────────────────
|
||||
|
||||
def canonicalize_text(action: str, obj: str, title: str = "") -> str:
|
||||
"""Build a canonical English text for embedding.
|
||||
|
||||
Transforms German compliance text into normalized English tokens
|
||||
for more stable embedding comparisons.
|
||||
"""
|
||||
norm_action = normalize_action(action)
|
||||
norm_object = normalize_object(obj)
|
||||
# Build canonical sentence
|
||||
parts = [norm_action, norm_object]
|
||||
if title:
|
||||
# Add title keywords (stripped of common filler)
|
||||
title_clean = re.sub(
|
||||
r"\b(und|oder|für|von|zu|der|die|das|den|dem|des|ein|eine"
|
||||
r"|bei|mit|nach|gemäß|gem\.|laut|entsprechend)\b",
|
||||
"", title.lower()
|
||||
)
|
||||
title_tokens = [t for t in title_clean.split() if len(t) > 3][:5]
|
||||
if title_tokens:
|
||||
parts.append("for")
|
||||
parts.extend(title_tokens)
|
||||
return " ".join(parts)
|
||||
|
||||
|
||||
# ── Embedding Helper ─────────────────────────────────────────────────
|
||||
|
||||
async def get_embedding(text: str) -> list[float]:
|
||||
"""Get embedding vector for a single text via embedding service."""
|
||||
try:
|
||||
async with httpx.AsyncClient(timeout=10.0) as client:
|
||||
resp = await client.post(
|
||||
f"{EMBEDDING_URL}/embed",
|
||||
json={"texts": [text]},
|
||||
)
|
||||
embeddings = resp.json().get("embeddings", [])
|
||||
return embeddings[0] if embeddings else []
|
||||
except Exception as e:
|
||||
logger.warning("Embedding failed: %s", e)
|
||||
return []
|
||||
|
||||
|
||||
def cosine_similarity(a: list[float], b: list[float]) -> float:
|
||||
"""Compute cosine similarity between two vectors."""
|
||||
if not a or not b or len(a) != len(b):
|
||||
return 0.0
|
||||
dot = sum(x * y for x, y in zip(a, b))
|
||||
norm_a = sum(x * x for x in a) ** 0.5
|
||||
norm_b = sum(x * x for x in b) ** 0.5
|
||||
if norm_a == 0 or norm_b == 0:
|
||||
return 0.0
|
||||
return dot / (norm_a * norm_b)
|
||||
|
||||
|
||||
# ── Qdrant Helpers ───────────────────────────────────────────────────
|
||||
|
||||
async def qdrant_search(
|
||||
embedding: list[float],
|
||||
pattern_id: str,
|
||||
top_k: int = 10,
|
||||
collection: Optional[str] = None,
|
||||
) -> list[dict]:
|
||||
"""Search Qdrant for similar atomic controls, filtered by pattern_id."""
|
||||
if not embedding:
|
||||
return []
|
||||
coll = collection or QDRANT_COLLECTION
|
||||
body: dict = {
|
||||
"vector": embedding,
|
||||
"limit": top_k,
|
||||
"with_payload": True,
|
||||
"filter": {
|
||||
"must": [
|
||||
{"key": "pattern_id", "match": {"value": pattern_id}}
|
||||
]
|
||||
},
|
||||
}
|
||||
try:
|
||||
async with httpx.AsyncClient(timeout=10.0) as client:
|
||||
resp = await client.post(
|
||||
f"{QDRANT_URL}/collections/{coll}/points/search",
|
||||
json=body,
|
||||
)
|
||||
if resp.status_code != 200:
|
||||
logger.warning("Qdrant search failed: %d", resp.status_code)
|
||||
return []
|
||||
return resp.json().get("result", [])
|
||||
except Exception as e:
|
||||
logger.warning("Qdrant search error: %s", e)
|
||||
return []
|
||||
|
||||
|
||||
async def qdrant_search_cross_regulation(
|
||||
embedding: list[float],
|
||||
top_k: int = 5,
|
||||
collection: Optional[str] = None,
|
||||
) -> list[dict]:
|
||||
"""Search Qdrant for similar controls across ALL regulations (no pattern_id filter).
|
||||
|
||||
Used for cross-regulation linking (e.g. DSGVO Art. 25 ↔ NIS2 Art. 21).
|
||||
"""
|
||||
if not embedding:
|
||||
return []
|
||||
coll = collection or QDRANT_COLLECTION
|
||||
body: dict = {
|
||||
"vector": embedding,
|
||||
"limit": top_k,
|
||||
"with_payload": True,
|
||||
}
|
||||
try:
|
||||
async with httpx.AsyncClient(timeout=10.0) as client:
|
||||
resp = await client.post(
|
||||
f"{QDRANT_URL}/collections/{coll}/points/search",
|
||||
json=body,
|
||||
)
|
||||
if resp.status_code != 200:
|
||||
logger.warning("Qdrant cross-reg search failed: %d", resp.status_code)
|
||||
return []
|
||||
return resp.json().get("result", [])
|
||||
except Exception as e:
|
||||
logger.warning("Qdrant cross-reg search error: %s", e)
|
||||
return []
|
||||
|
||||
|
||||
async def qdrant_upsert(
|
||||
point_id: str,
|
||||
embedding: list[float],
|
||||
payload: dict,
|
||||
collection: Optional[str] = None,
|
||||
) -> bool:
|
||||
"""Upsert a single point into a Qdrant collection."""
|
||||
if not embedding:
|
||||
return False
|
||||
coll = collection or QDRANT_COLLECTION
|
||||
body = {
|
||||
"points": [{
|
||||
"id": point_id,
|
||||
"vector": embedding,
|
||||
"payload": payload,
|
||||
}]
|
||||
}
|
||||
try:
|
||||
async with httpx.AsyncClient(timeout=10.0) as client:
|
||||
resp = await client.put(
|
||||
f"{QDRANT_URL}/collections/{coll}/points",
|
||||
json=body,
|
||||
)
|
||||
return resp.status_code == 200
|
||||
except Exception as e:
|
||||
logger.warning("Qdrant upsert error: %s", e)
|
||||
return False
|
||||
|
||||
|
||||
async def ensure_qdrant_collection(
|
||||
vector_size: int = 1024,
|
||||
collection: Optional[str] = None,
|
||||
) -> bool:
|
||||
"""Create a Qdrant collection if it doesn't exist (idempotent)."""
|
||||
coll = collection or QDRANT_COLLECTION
|
||||
try:
|
||||
async with httpx.AsyncClient(timeout=10.0) as client:
|
||||
# Check if exists
|
||||
resp = await client.get(f"{QDRANT_URL}/collections/{coll}")
|
||||
if resp.status_code == 200:
|
||||
return True
|
||||
# Create
|
||||
resp = await client.put(
|
||||
f"{QDRANT_URL}/collections/{coll}",
|
||||
json={
|
||||
"vectors": {"size": vector_size, "distance": "Cosine"},
|
||||
},
|
||||
)
|
||||
if resp.status_code == 200:
|
||||
logger.info("Created Qdrant collection: %s", coll)
|
||||
# Create payload indexes
|
||||
for field_name in ["pattern_id", "action_normalized", "object_normalized", "control_id"]:
|
||||
await client.put(
|
||||
f"{QDRANT_URL}/collections/{coll}/index",
|
||||
json={"field_name": field_name, "field_schema": "keyword"},
|
||||
)
|
||||
return True
|
||||
logger.error("Failed to create Qdrant collection: %d", resp.status_code)
|
||||
return False
|
||||
except Exception as e:
|
||||
logger.warning("Qdrant collection check error: %s", e)
|
||||
return False
|
||||
|
||||
|
||||
# ── Main Dedup Checker ───────────────────────────────────────────────
|
||||
|
||||
class ControlDedupChecker:
|
||||
"""4-stage dedup checker for atomic controls.
|
||||
|
||||
Usage:
|
||||
checker = ControlDedupChecker(db_session)
|
||||
result = await checker.check_duplicate(candidate_action, candidate_object, candidate_title, pattern_id)
|
||||
if result.verdict == "link":
|
||||
checker.add_parent_link(result.matched_control_uuid, parent_uuid)
|
||||
elif result.verdict == "review":
|
||||
checker.write_review(candidate, result)
|
||||
else:
|
||||
# Insert new control
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
db,
|
||||
embed_fn: Optional[Callable[[str], Awaitable[list[float]]]] = None,
|
||||
search_fn: Optional[Callable] = None,
|
||||
):
|
||||
self.db = db
|
||||
self._embed = embed_fn or get_embedding
|
||||
self._search = search_fn or qdrant_search
|
||||
self._cache: dict[str, list[dict]] = {} # pattern_id → existing controls
|
||||
|
||||
def _load_existing(self, pattern_id: str) -> list[dict]:
|
||||
"""Load existing atomic controls with same pattern_id from DB."""
|
||||
if pattern_id in self._cache:
|
||||
return self._cache[pattern_id]
|
||||
from sqlalchemy import text
|
||||
rows = self.db.execute(text("""
|
||||
SELECT id::text, control_id, title, objective,
|
||||
pattern_id,
|
||||
generation_metadata->>'obligation_type' as obligation_type
|
||||
FROM canonical_controls
|
||||
WHERE parent_control_uuid IS NOT NULL
|
||||
AND release_state != 'deprecated'
|
||||
AND pattern_id = :pid
|
||||
"""), {"pid": pattern_id}).fetchall()
|
||||
result = [
|
||||
{
|
||||
"uuid": r[0], "control_id": r[1], "title": r[2],
|
||||
"objective": r[3], "pattern_id": r[4],
|
||||
"obligation_type": r[5],
|
||||
}
|
||||
for r in rows
|
||||
]
|
||||
self._cache[pattern_id] = result
|
||||
return result
|
||||
|
||||
async def check_duplicate(
|
||||
self,
|
||||
action: str,
|
||||
obj: str,
|
||||
title: str,
|
||||
pattern_id: Optional[str],
|
||||
) -> DedupResult:
|
||||
"""Run the 4-stage dedup pipeline + cross-regulation linking.
|
||||
|
||||
Returns DedupResult with verdict: new/link/review.
|
||||
"""
|
||||
# No pattern_id → can't dedup meaningfully
|
||||
if not pattern_id:
|
||||
return DedupResult(verdict="new", stage="no_pattern")
|
||||
|
||||
# Stage 1: Pattern-Gate
|
||||
existing = self._load_existing(pattern_id)
|
||||
if not existing:
|
||||
return DedupResult(
|
||||
verdict="new", stage="pattern_gate",
|
||||
details={"reason": "no existing controls with this pattern_id"},
|
||||
)
|
||||
|
||||
# Stage 2: Action-Check
|
||||
norm_action = normalize_action(action)
|
||||
# We don't have action stored on existing controls from DB directly,
|
||||
# so we use embedding for controls that passed pattern gate.
|
||||
# But we CAN check via generation_metadata if available.
|
||||
|
||||
# Stage 3: Object-Normalization
|
||||
norm_object = normalize_object(obj)
|
||||
|
||||
# Stage 4: Embedding Similarity
|
||||
canonical = canonicalize_text(action, obj, title)
|
||||
embedding = await self._embed(canonical)
|
||||
if not embedding:
|
||||
# Can't compute embedding → default to new
|
||||
return DedupResult(
|
||||
verdict="new", stage="embedding_unavailable",
|
||||
details={"canonical_text": canonical},
|
||||
)
|
||||
|
||||
# Search Qdrant
|
||||
results = await self._search(embedding, pattern_id, top_k=5)
|
||||
|
||||
if not results:
|
||||
# No intra-pattern matches → try cross-regulation
|
||||
return await self._check_cross_regulation(embedding, DedupResult(
|
||||
verdict="new", stage="no_qdrant_matches",
|
||||
details={"canonical_text": canonical, "action": norm_action, "object": norm_object},
|
||||
))
|
||||
|
||||
# Evaluate best match
|
||||
best = results[0]
|
||||
best_score = best.get("score", 0.0)
|
||||
best_payload = best.get("payload", {})
|
||||
best_action = best_payload.get("action_normalized", "")
|
||||
best_object = best_payload.get("object_normalized", "")
|
||||
|
||||
# Action differs → NEW (even if embedding is high)
|
||||
if best_action and norm_action and best_action != norm_action:
|
||||
return await self._check_cross_regulation(embedding, DedupResult(
|
||||
verdict="new", stage="action_mismatch",
|
||||
similarity_score=best_score,
|
||||
matched_control_id=best_payload.get("control_id"),
|
||||
details={
|
||||
"candidate_action": norm_action,
|
||||
"existing_action": best_action,
|
||||
"similarity": best_score,
|
||||
},
|
||||
))
|
||||
|
||||
# Object differs → use higher threshold
|
||||
if best_object and norm_object and best_object != norm_object:
|
||||
if best_score > LINK_THRESHOLD_DIFF_OBJECT:
|
||||
return DedupResult(
|
||||
verdict="link", stage="embedding_diff_object",
|
||||
matched_control_uuid=best_payload.get("control_uuid"),
|
||||
matched_control_id=best_payload.get("control_id"),
|
||||
matched_title=best_payload.get("title"),
|
||||
similarity_score=best_score,
|
||||
details={"candidate_object": norm_object, "existing_object": best_object},
|
||||
)
|
||||
return await self._check_cross_regulation(embedding, DedupResult(
|
||||
verdict="new", stage="object_mismatch_below_threshold",
|
||||
similarity_score=best_score,
|
||||
matched_control_id=best_payload.get("control_id"),
|
||||
details={
|
||||
"candidate_object": norm_object,
|
||||
"existing_object": best_object,
|
||||
"threshold": LINK_THRESHOLD_DIFF_OBJECT,
|
||||
},
|
||||
))
|
||||
|
||||
# Same action + same object → tiered thresholds
|
||||
if best_score > LINK_THRESHOLD:
|
||||
return DedupResult(
|
||||
verdict="link", stage="embedding_match",
|
||||
matched_control_uuid=best_payload.get("control_uuid"),
|
||||
matched_control_id=best_payload.get("control_id"),
|
||||
matched_title=best_payload.get("title"),
|
||||
similarity_score=best_score,
|
||||
)
|
||||
if best_score > REVIEW_THRESHOLD:
|
||||
return DedupResult(
|
||||
verdict="review", stage="embedding_review",
|
||||
matched_control_uuid=best_payload.get("control_uuid"),
|
||||
matched_control_id=best_payload.get("control_id"),
|
||||
matched_title=best_payload.get("title"),
|
||||
similarity_score=best_score,
|
||||
)
|
||||
return await self._check_cross_regulation(embedding, DedupResult(
|
||||
verdict="new", stage="embedding_below_threshold",
|
||||
similarity_score=best_score,
|
||||
details={"threshold": REVIEW_THRESHOLD},
|
||||
))
|
||||
|
||||
async def _check_cross_regulation(
|
||||
self,
|
||||
embedding: list[float],
|
||||
intra_result: DedupResult,
|
||||
) -> DedupResult:
|
||||
"""Second pass: cross-regulation linking for controls deemed 'new'.
|
||||
|
||||
Searches Qdrant WITHOUT pattern_id filter. Uses a higher threshold
|
||||
(0.95) to avoid false positives across regulation boundaries.
|
||||
"""
|
||||
if intra_result.verdict != "new" or not embedding:
|
||||
return intra_result
|
||||
|
||||
cross_results = await qdrant_search_cross_regulation(embedding, top_k=5)
|
||||
if not cross_results:
|
||||
return intra_result
|
||||
|
||||
best = cross_results[0]
|
||||
best_score = best.get("score", 0.0)
|
||||
if best_score > CROSS_REG_LINK_THRESHOLD:
|
||||
best_payload = best.get("payload", {})
|
||||
return DedupResult(
|
||||
verdict="link",
|
||||
stage="cross_regulation",
|
||||
matched_control_uuid=best_payload.get("control_uuid"),
|
||||
matched_control_id=best_payload.get("control_id"),
|
||||
matched_title=best_payload.get("title"),
|
||||
similarity_score=best_score,
|
||||
link_type="cross_regulation",
|
||||
details={
|
||||
"cross_reg_score": best_score,
|
||||
"cross_reg_threshold": CROSS_REG_LINK_THRESHOLD,
|
||||
},
|
||||
)
|
||||
|
||||
return intra_result
|
||||
|
||||
def add_parent_link(
|
||||
self,
|
||||
control_uuid: str,
|
||||
parent_control_uuid: str,
|
||||
link_type: str = "dedup_merge",
|
||||
confidence: float = 0.0,
|
||||
source_regulation: Optional[str] = None,
|
||||
source_article: Optional[str] = None,
|
||||
obligation_candidate_id: Optional[str] = None,
|
||||
) -> None:
|
||||
"""Add a parent link to an existing atomic control."""
|
||||
from sqlalchemy import text
|
||||
self.db.execute(text("""
|
||||
INSERT INTO control_parent_links
|
||||
(control_uuid, parent_control_uuid, link_type, confidence,
|
||||
source_regulation, source_article, obligation_candidate_id)
|
||||
VALUES (:cu, :pu, :lt, :conf, :sr, :sa, :oci::uuid)
|
||||
ON CONFLICT (control_uuid, parent_control_uuid) DO NOTHING
|
||||
"""), {
|
||||
"cu": control_uuid,
|
||||
"pu": parent_control_uuid,
|
||||
"lt": link_type,
|
||||
"conf": confidence,
|
||||
"sr": source_regulation,
|
||||
"sa": source_article,
|
||||
"oci": obligation_candidate_id,
|
||||
})
|
||||
self.db.commit()
|
||||
|
||||
def write_review(
|
||||
self,
|
||||
candidate_control_id: str,
|
||||
candidate_title: str,
|
||||
candidate_objective: str,
|
||||
result: DedupResult,
|
||||
parent_control_uuid: Optional[str] = None,
|
||||
obligation_candidate_id: Optional[str] = None,
|
||||
) -> None:
|
||||
"""Write a dedup review queue entry."""
|
||||
from sqlalchemy import text
|
||||
self.db.execute(text("""
|
||||
INSERT INTO control_dedup_reviews
|
||||
(candidate_control_id, candidate_title, candidate_objective,
|
||||
matched_control_uuid, matched_control_id,
|
||||
similarity_score, dedup_stage, dedup_details,
|
||||
parent_control_uuid, obligation_candidate_id)
|
||||
VALUES (:ccid, :ct, :co, :mcu::uuid, :mci, :ss, :ds,
|
||||
:dd::jsonb, :pcu::uuid, :oci)
|
||||
"""), {
|
||||
"ccid": candidate_control_id,
|
||||
"ct": candidate_title,
|
||||
"co": candidate_objective,
|
||||
"mcu": result.matched_control_uuid,
|
||||
"mci": result.matched_control_id,
|
||||
"ss": result.similarity_score,
|
||||
"ds": result.stage,
|
||||
"dd": __import__("json").dumps(result.details),
|
||||
"pcu": parent_control_uuid,
|
||||
"oci": obligation_candidate_id,
|
||||
})
|
||||
self.db.commit()
|
||||
|
||||
async def index_control(
|
||||
self,
|
||||
control_uuid: str,
|
||||
control_id: str,
|
||||
title: str,
|
||||
action: str,
|
||||
obj: str,
|
||||
pattern_id: str,
|
||||
collection: Optional[str] = None,
|
||||
) -> bool:
|
||||
"""Index a new atomic control in Qdrant for future dedup checks."""
|
||||
norm_action = normalize_action(action)
|
||||
norm_object = normalize_object(obj)
|
||||
canonical = canonicalize_text(action, obj, title)
|
||||
embedding = await self._embed(canonical)
|
||||
if not embedding:
|
||||
return False
|
||||
return await qdrant_upsert(
|
||||
point_id=control_uuid,
|
||||
embedding=embedding,
|
||||
payload={
|
||||
"control_uuid": control_uuid,
|
||||
"control_id": control_id,
|
||||
"title": title,
|
||||
"pattern_id": pattern_id,
|
||||
"action_normalized": norm_action,
|
||||
"object_normalized": norm_object,
|
||||
"canonical_text": canonical,
|
||||
},
|
||||
collection=collection,
|
||||
)
|
||||
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,152 @@
|
||||
"""
|
||||
Control Status Transition State Machine.
|
||||
|
||||
Enforces that controls cannot be set to "pass" without sufficient evidence.
|
||||
Prevents Compliance-Theater where controls claim compliance without real proof.
|
||||
|
||||
Transition rules:
|
||||
planned → in_progress : always allowed
|
||||
in_progress → pass : requires ≥1 evidence with confidence ≥ E2 and
|
||||
truth_status in (uploaded, observed, validated_internal)
|
||||
in_progress → partial : requires ≥1 evidence (any level)
|
||||
pass → fail : always allowed (degradation)
|
||||
any → n/a : requires status_justification
|
||||
any → planned : always allowed (reset)
|
||||
"""
|
||||
|
||||
from typing import List, Optional, Tuple
|
||||
|
||||
from ..db.models import EvidenceDB
|
||||
|
||||
|
||||
# Confidence level ordering for comparisons
|
||||
CONFIDENCE_ORDER = {"E0": 0, "E1": 1, "E2": 2, "E3": 3, "E4": 4}
|
||||
|
||||
# Truth statuses that qualify as "real" evidence for pass transitions
|
||||
VALID_TRUTH_STATUSES = {"uploaded", "observed", "validated_internal", "accepted_by_auditor", "provided_to_auditor"}
|
||||
|
||||
|
||||
def validate_transition(
|
||||
current_status: str,
|
||||
new_status: str,
|
||||
evidence_list: Optional[List[EvidenceDB]] = None,
|
||||
status_justification: Optional[str] = None,
|
||||
bypass_for_auto_updater: bool = False,
|
||||
) -> Tuple[bool, List[str]]:
|
||||
"""
|
||||
Validate whether a control status transition is allowed.
|
||||
|
||||
Args:
|
||||
current_status: Current control status value (e.g. "planned", "pass")
|
||||
new_status: Requested new status
|
||||
evidence_list: List of EvidenceDB objects linked to this control
|
||||
status_justification: Text justification (required for n/a transitions)
|
||||
bypass_for_auto_updater: If True, skip evidence checks (used by CI/CD auto-updater
|
||||
which creates evidence atomically with status change)
|
||||
|
||||
Returns:
|
||||
Tuple of (allowed: bool, violations: list[str])
|
||||
"""
|
||||
violations: List[str] = []
|
||||
evidence_list = evidence_list or []
|
||||
|
||||
# Same status → no-op, always allowed
|
||||
if current_status == new_status:
|
||||
return True, []
|
||||
|
||||
# Reset to planned is always allowed
|
||||
if new_status == "planned":
|
||||
return True, []
|
||||
|
||||
# n/a requires justification
|
||||
if new_status == "n/a":
|
||||
if not status_justification or not status_justification.strip():
|
||||
violations.append("Transition to 'n/a' requires a status_justification explaining why this control is not applicable.")
|
||||
return len(violations) == 0, violations
|
||||
|
||||
# Degradation: pass → fail is always allowed
|
||||
if current_status == "pass" and new_status == "fail":
|
||||
return True, []
|
||||
|
||||
# planned → in_progress: always allowed
|
||||
if current_status == "planned" and new_status == "in_progress":
|
||||
return True, []
|
||||
|
||||
# in_progress → partial: needs at least 1 evidence
|
||||
if new_status == "partial":
|
||||
if not bypass_for_auto_updater and len(evidence_list) == 0:
|
||||
violations.append("Transition to 'partial' requires at least 1 evidence record.")
|
||||
return len(violations) == 0, violations
|
||||
|
||||
# in_progress → pass: strict requirements
|
||||
if new_status == "pass":
|
||||
if bypass_for_auto_updater:
|
||||
return True, []
|
||||
|
||||
if len(evidence_list) == 0:
|
||||
violations.append("Transition to 'pass' requires at least 1 evidence record.")
|
||||
return False, violations
|
||||
|
||||
# Check for at least one qualifying evidence
|
||||
has_qualifying = False
|
||||
for e in evidence_list:
|
||||
conf = getattr(e, "confidence_level", None)
|
||||
truth = getattr(e, "truth_status", None)
|
||||
|
||||
# Get string values from enum or string
|
||||
conf_val = conf.value if hasattr(conf, "value") else str(conf) if conf else "E1"
|
||||
truth_val = truth.value if hasattr(truth, "value") else str(truth) if truth else "uploaded"
|
||||
|
||||
if CONFIDENCE_ORDER.get(conf_val, 1) >= CONFIDENCE_ORDER["E2"] and truth_val in VALID_TRUTH_STATUSES:
|
||||
has_qualifying = True
|
||||
break
|
||||
|
||||
if not has_qualifying:
|
||||
violations.append(
|
||||
"Transition to 'pass' requires at least 1 evidence with confidence >= E2 "
|
||||
"and truth_status in (uploaded, observed, validated_internal, accepted_by_auditor). "
|
||||
"Current evidence does not meet this threshold."
|
||||
)
|
||||
|
||||
return len(violations) == 0, violations
|
||||
|
||||
# in_progress → fail: always allowed
|
||||
if new_status == "fail":
|
||||
return True, []
|
||||
|
||||
# Any other transition from planned/fail to pass requires going through in_progress
|
||||
if current_status in ("planned", "fail") and new_status == "pass":
|
||||
if bypass_for_auto_updater:
|
||||
return True, []
|
||||
violations.append(
|
||||
f"Direct transition from '{current_status}' to 'pass' is not allowed. "
|
||||
f"Move to 'in_progress' first, then to 'pass' with qualifying evidence."
|
||||
)
|
||||
return False, violations
|
||||
|
||||
# Default: allow other transitions (e.g. fail → partial, partial → pass)
|
||||
# For partial → pass, apply the same evidence checks
|
||||
if current_status == "partial" and new_status == "pass":
|
||||
if bypass_for_auto_updater:
|
||||
return True, []
|
||||
|
||||
has_qualifying = False
|
||||
for e in evidence_list:
|
||||
conf = getattr(e, "confidence_level", None)
|
||||
truth = getattr(e, "truth_status", None)
|
||||
conf_val = conf.value if hasattr(conf, "value") else str(conf) if conf else "E1"
|
||||
truth_val = truth.value if hasattr(truth, "value") else str(truth) if truth else "uploaded"
|
||||
|
||||
if CONFIDENCE_ORDER.get(conf_val, 1) >= CONFIDENCE_ORDER["E2"] and truth_val in VALID_TRUTH_STATUSES:
|
||||
has_qualifying = True
|
||||
break
|
||||
|
||||
if not has_qualifying:
|
||||
violations.append(
|
||||
"Transition from 'partial' to 'pass' requires at least 1 evidence with confidence >= E2 "
|
||||
"and truth_status in (uploaded, observed, validated_internal, accepted_by_auditor)."
|
||||
)
|
||||
return len(violations) == 0, violations
|
||||
|
||||
# All other transitions allowed
|
||||
return True, []
|
||||
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,714 @@
|
||||
"""Framework Decomposition Engine — decomposes framework-container obligations.
|
||||
|
||||
Sits between Pass 0a (obligation extraction) and Pass 0b (atomic control
|
||||
composition). Detects obligations that reference a framework domain (e.g.
|
||||
"CCM-Praktiken fuer AIS") and decomposes them into concrete sub-obligations
|
||||
using an internal framework registry.
|
||||
|
||||
Three routing types:
|
||||
atomic → pass through to Pass 0b unchanged
|
||||
compound → split compound verbs, then Pass 0b
|
||||
framework_container → decompose via registry, then Pass 0b
|
||||
|
||||
The registry is a set of JSON files under compliance/data/frameworks/.
|
||||
"""
|
||||
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import re
|
||||
import uuid
|
||||
from dataclasses import dataclass, field
|
||||
from pathlib import Path
|
||||
from typing import Optional
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Registry loading
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
_REGISTRY_DIR = Path(__file__).resolve().parent.parent / "data" / "frameworks"
|
||||
_REGISTRY: dict[str, dict] = {} # framework_id → framework dict
|
||||
|
||||
|
||||
def _load_registry() -> dict[str, dict]:
|
||||
"""Load all framework JSON files from the registry directory."""
|
||||
registry: dict[str, dict] = {}
|
||||
if not _REGISTRY_DIR.is_dir():
|
||||
logger.warning("Framework registry dir not found: %s", _REGISTRY_DIR)
|
||||
return registry
|
||||
|
||||
for fpath in sorted(_REGISTRY_DIR.glob("*.json")):
|
||||
try:
|
||||
with open(fpath, encoding="utf-8") as f:
|
||||
fw = json.load(f)
|
||||
fw_id = fw.get("framework_id", fpath.stem)
|
||||
registry[fw_id] = fw
|
||||
logger.info(
|
||||
"Loaded framework: %s (%d domains)",
|
||||
fw_id,
|
||||
len(fw.get("domains", [])),
|
||||
)
|
||||
except Exception:
|
||||
logger.exception("Failed to load framework file: %s", fpath)
|
||||
return registry
|
||||
|
||||
|
||||
def get_registry() -> dict[str, dict]:
|
||||
"""Return the global framework registry (lazy-loaded)."""
|
||||
global _REGISTRY
|
||||
if not _REGISTRY:
|
||||
_REGISTRY = _load_registry()
|
||||
return _REGISTRY
|
||||
|
||||
|
||||
def reload_registry() -> dict[str, dict]:
|
||||
"""Force-reload the framework registry from disk."""
|
||||
global _REGISTRY
|
||||
_REGISTRY = _load_registry()
|
||||
return _REGISTRY
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Framework alias index (built from registry)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def _build_alias_index(registry: dict[str, dict]) -> dict[str, str]:
|
||||
"""Build a lowercase alias → framework_id lookup."""
|
||||
idx: dict[str, str] = {}
|
||||
for fw_id, fw in registry.items():
|
||||
# Framework-level aliases
|
||||
idx[fw_id.lower()] = fw_id
|
||||
name = fw.get("display_name", "")
|
||||
if name:
|
||||
idx[name.lower()] = fw_id
|
||||
# Common short forms
|
||||
for part in fw_id.lower().replace("_", " ").split():
|
||||
if len(part) >= 3:
|
||||
idx[part] = fw_id
|
||||
return idx
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Routing — classify obligation type
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
# Extended patterns for framework detection (beyond the simple _COMPOSITE_RE
|
||||
# in decomposition_pass.py — here we also capture the framework name)
|
||||
_FRAMEWORK_PATTERN = re.compile(
|
||||
r"(?:praktiken|kontrollen|ma(?:ss|ß)nahmen|anforderungen|vorgaben|controls|practices|measures|requirements)"
|
||||
r"\s+(?:f(?:ue|ü)r|aus|gem(?:ae|ä)(?:ss|ß)|nach|from|of|for|per)\s+"
|
||||
r"(.+?)(?:\s+(?:m(?:ue|ü)ssen|sollen|sind|werden|implementieren|umsetzen|einf(?:ue|ü)hren)|\.|,|$)",
|
||||
re.IGNORECASE,
|
||||
)
|
||||
|
||||
# Direct framework name references
|
||||
_DIRECT_FRAMEWORK_RE = re.compile(
|
||||
r"\b(?:CSA\s*CCM|NIST\s*(?:SP\s*)?800-53|OWASP\s*(?:ASVS|SAMM|Top\s*10)"
|
||||
r"|CIS\s*Controls|BSI\s*(?:IT-)?Grundschutz|ENISA|ISO\s*2700[12]"
|
||||
r"|COBIT|SOX|PCI\s*DSS|HITRUST|SOC\s*2|KRITIS)\b",
|
||||
re.IGNORECASE,
|
||||
)
|
||||
|
||||
# Compound verb patterns (multiple main verbs)
|
||||
_COMPOUND_VERB_RE = re.compile(
|
||||
r"\b(?:und|sowie|als\s+auch|or|and)\b",
|
||||
re.IGNORECASE,
|
||||
)
|
||||
|
||||
# No-split phrases that look compound but aren't
|
||||
_NO_SPLIT_PHRASES = [
|
||||
"pflegen und aufrechterhalten",
|
||||
"dokumentieren und pflegen",
|
||||
"definieren und dokumentieren",
|
||||
"erstellen und freigeben",
|
||||
"pruefen und genehmigen",
|
||||
"identifizieren und bewerten",
|
||||
"erkennen und melden",
|
||||
"define and maintain",
|
||||
"create and maintain",
|
||||
"establish and maintain",
|
||||
"monitor and review",
|
||||
"detect and respond",
|
||||
]
|
||||
|
||||
|
||||
@dataclass
|
||||
class RoutingResult:
|
||||
"""Result of obligation routing classification."""
|
||||
routing_type: str # atomic | compound | framework_container | unknown_review
|
||||
framework_ref: Optional[str] = None
|
||||
framework_domain: Optional[str] = None
|
||||
domain_title: Optional[str] = None
|
||||
confidence: float = 0.0
|
||||
reason: str = ""
|
||||
|
||||
|
||||
def classify_routing(
|
||||
obligation_text: str,
|
||||
action_raw: str,
|
||||
object_raw: str,
|
||||
condition_raw: Optional[str] = None,
|
||||
) -> RoutingResult:
|
||||
"""Classify an obligation into atomic / compound / framework_container."""
|
||||
combined = f"{obligation_text} {object_raw}".lower()
|
||||
|
||||
# --- Step 1: Framework container detection ---
|
||||
fw_result = _detect_framework(obligation_text, object_raw)
|
||||
if fw_result.routing_type == "framework_container":
|
||||
return fw_result
|
||||
|
||||
# --- Step 2: Compound verb detection ---
|
||||
if _is_compound_obligation(action_raw, obligation_text):
|
||||
return RoutingResult(
|
||||
routing_type="compound",
|
||||
confidence=0.7,
|
||||
reason="multiple_main_verbs",
|
||||
)
|
||||
|
||||
# --- Step 3: Default = atomic ---
|
||||
return RoutingResult(
|
||||
routing_type="atomic",
|
||||
confidence=0.9,
|
||||
reason="single_action_single_object",
|
||||
)
|
||||
|
||||
|
||||
def _detect_framework(
|
||||
obligation_text: str, object_raw: str,
|
||||
) -> RoutingResult:
|
||||
"""Detect if obligation references a framework domain."""
|
||||
combined = f"{obligation_text} {object_raw}"
|
||||
registry = get_registry()
|
||||
alias_idx = _build_alias_index(registry)
|
||||
|
||||
# Strategy 1: direct framework name match
|
||||
m = _DIRECT_FRAMEWORK_RE.search(combined)
|
||||
if m:
|
||||
fw_name = m.group(0).strip()
|
||||
fw_id = _resolve_framework_id(fw_name, alias_idx, registry)
|
||||
if fw_id:
|
||||
domain_id, domain_title = _match_domain(
|
||||
combined, registry[fw_id],
|
||||
)
|
||||
return RoutingResult(
|
||||
routing_type="framework_container",
|
||||
framework_ref=fw_id,
|
||||
framework_domain=domain_id,
|
||||
domain_title=domain_title,
|
||||
confidence=0.95 if domain_id else 0.75,
|
||||
reason=f"direct_framework_match:{fw_name}",
|
||||
)
|
||||
else:
|
||||
# Framework name recognized but not in registry
|
||||
return RoutingResult(
|
||||
routing_type="framework_container",
|
||||
framework_ref=None,
|
||||
framework_domain=None,
|
||||
confidence=0.6,
|
||||
reason=f"direct_framework_match_no_registry:{fw_name}",
|
||||
)
|
||||
|
||||
# Strategy 2: pattern match ("Praktiken fuer X")
|
||||
m2 = _FRAMEWORK_PATTERN.search(combined)
|
||||
if m2:
|
||||
ref_text = m2.group(1).strip()
|
||||
fw_id, domain_id, domain_title = _resolve_from_ref_text(
|
||||
ref_text, registry, alias_idx,
|
||||
)
|
||||
if fw_id:
|
||||
return RoutingResult(
|
||||
routing_type="framework_container",
|
||||
framework_ref=fw_id,
|
||||
framework_domain=domain_id,
|
||||
domain_title=domain_title,
|
||||
confidence=0.85 if domain_id else 0.65,
|
||||
reason=f"pattern_match:{ref_text}",
|
||||
)
|
||||
|
||||
# Strategy 3: keyword-heavy object
|
||||
if _has_framework_keywords(object_raw):
|
||||
return RoutingResult(
|
||||
routing_type="framework_container",
|
||||
framework_ref=None,
|
||||
framework_domain=None,
|
||||
confidence=0.5,
|
||||
reason="framework_keywords_in_object",
|
||||
)
|
||||
|
||||
return RoutingResult(routing_type="atomic", confidence=0.0)
|
||||
|
||||
|
||||
def _resolve_framework_id(
|
||||
name: str,
|
||||
alias_idx: dict[str, str],
|
||||
registry: dict[str, dict],
|
||||
) -> Optional[str]:
|
||||
"""Resolve a framework name to its registry ID."""
|
||||
normalized = re.sub(r"\s+", " ", name.strip().lower())
|
||||
# Direct alias match
|
||||
if normalized in alias_idx:
|
||||
return alias_idx[normalized]
|
||||
# Try compact form (strip spaces, hyphens, underscores)
|
||||
compact = re.sub(r"[\s_\-]+", "", normalized)
|
||||
for alias, fw_id in alias_idx.items():
|
||||
if re.sub(r"[\s_\-]+", "", alias) == compact:
|
||||
return fw_id
|
||||
# Substring match in display names
|
||||
for fw_id, fw in registry.items():
|
||||
display = fw.get("display_name", "").lower()
|
||||
if normalized in display or display in normalized:
|
||||
return fw_id
|
||||
# Partial match: check if normalized contains any alias (for multi-word refs)
|
||||
for alias, fw_id in alias_idx.items():
|
||||
if len(alias) >= 4 and alias in normalized:
|
||||
return fw_id
|
||||
return None
|
||||
|
||||
|
||||
def _match_domain(
|
||||
text: str, framework: dict,
|
||||
) -> tuple[Optional[str], Optional[str]]:
|
||||
"""Match a domain within a framework from text references."""
|
||||
text_lower = text.lower()
|
||||
best_id: Optional[str] = None
|
||||
best_title: Optional[str] = None
|
||||
best_score = 0
|
||||
|
||||
for domain in framework.get("domains", []):
|
||||
score = 0
|
||||
domain_id = domain["domain_id"]
|
||||
title = domain.get("title", "")
|
||||
|
||||
# Exact domain ID match (e.g. "AIS")
|
||||
if re.search(rf"\b{re.escape(domain_id)}\b", text, re.IGNORECASE):
|
||||
score += 10
|
||||
|
||||
# Full title match
|
||||
if title.lower() in text_lower:
|
||||
score += 8
|
||||
|
||||
# Alias match
|
||||
for alias in domain.get("aliases", []):
|
||||
if alias.lower() in text_lower:
|
||||
score += 6
|
||||
break
|
||||
|
||||
# Keyword overlap
|
||||
kw_hits = sum(
|
||||
1 for kw in domain.get("keywords", [])
|
||||
if kw.lower() in text_lower
|
||||
)
|
||||
score += kw_hits
|
||||
|
||||
if score > best_score:
|
||||
best_score = score
|
||||
best_id = domain_id
|
||||
best_title = title
|
||||
|
||||
if best_score >= 3:
|
||||
return best_id, best_title
|
||||
return None, None
|
||||
|
||||
|
||||
def _resolve_from_ref_text(
|
||||
ref_text: str,
|
||||
registry: dict[str, dict],
|
||||
alias_idx: dict[str, str],
|
||||
) -> tuple[Optional[str], Optional[str], Optional[str]]:
|
||||
"""Resolve framework + domain from a reference text like 'AIS' or 'Application Security'."""
|
||||
ref_lower = ref_text.lower()
|
||||
|
||||
for fw_id, fw in registry.items():
|
||||
for domain in fw.get("domains", []):
|
||||
# Check domain ID
|
||||
if domain["domain_id"].lower() in ref_lower:
|
||||
return fw_id, domain["domain_id"], domain.get("title")
|
||||
# Check title
|
||||
if domain.get("title", "").lower() in ref_lower:
|
||||
return fw_id, domain["domain_id"], domain.get("title")
|
||||
# Check aliases
|
||||
for alias in domain.get("aliases", []):
|
||||
if alias.lower() in ref_lower or ref_lower in alias.lower():
|
||||
return fw_id, domain["domain_id"], domain.get("title")
|
||||
|
||||
return None, None, None
|
||||
|
||||
|
||||
_FRAMEWORK_KW_SET = {
|
||||
"praktiken", "kontrollen", "massnahmen", "maßnahmen",
|
||||
"anforderungen", "vorgaben", "framework", "standard",
|
||||
"baseline", "katalog", "domain", "family", "category",
|
||||
"practices", "controls", "measures", "requirements",
|
||||
}
|
||||
|
||||
|
||||
def _has_framework_keywords(text: str) -> bool:
|
||||
"""Check if text contains framework-indicator keywords."""
|
||||
words = set(re.findall(r"[a-zäöüß]+", text.lower()))
|
||||
return len(words & _FRAMEWORK_KW_SET) >= 2
|
||||
|
||||
|
||||
def _is_compound_obligation(action_raw: str, obligation_text: str) -> bool:
|
||||
"""Detect if the obligation has multiple competing main verbs."""
|
||||
if not action_raw:
|
||||
return False
|
||||
|
||||
action_lower = action_raw.lower().strip()
|
||||
|
||||
# Check no-split phrases first
|
||||
for phrase in _NO_SPLIT_PHRASES:
|
||||
if phrase in action_lower:
|
||||
return False
|
||||
|
||||
# Must have a conjunction
|
||||
if not _COMPOUND_VERB_RE.search(action_lower):
|
||||
return False
|
||||
|
||||
# Split by conjunctions and check if we get 2+ meaningful verbs
|
||||
parts = re.split(r"\b(?:und|sowie|als\s+auch|or|and)\b", action_lower)
|
||||
meaningful = [p.strip() for p in parts if len(p.strip()) >= 3]
|
||||
return len(meaningful) >= 2
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Framework Decomposition
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
@dataclass
|
||||
class DecomposedObligation:
|
||||
"""A concrete obligation derived from a framework container."""
|
||||
obligation_candidate_id: str
|
||||
parent_control_id: str
|
||||
parent_framework_container_id: str
|
||||
source_ref_law: str
|
||||
source_ref_article: str
|
||||
obligation_text: str
|
||||
actor: str
|
||||
action_raw: str
|
||||
object_raw: str
|
||||
condition_raw: Optional[str] = None
|
||||
trigger_raw: Optional[str] = None
|
||||
routing_type: str = "atomic"
|
||||
release_state: str = "decomposed"
|
||||
subcontrol_id: str = ""
|
||||
# Metadata
|
||||
action_hint: str = ""
|
||||
object_hint: str = ""
|
||||
object_class: str = ""
|
||||
keywords: list[str] = field(default_factory=list)
|
||||
|
||||
|
||||
@dataclass
|
||||
class FrameworkDecompositionResult:
|
||||
"""Result of framework decomposition."""
|
||||
framework_container_id: str
|
||||
source_obligation_candidate_id: str
|
||||
framework_ref: Optional[str]
|
||||
framework_domain: Optional[str]
|
||||
domain_title: Optional[str]
|
||||
matched_subcontrols: list[str]
|
||||
decomposition_confidence: float
|
||||
release_state: str # decomposed | unmatched | error
|
||||
decomposed_obligations: list[DecomposedObligation]
|
||||
issues: list[str]
|
||||
|
||||
|
||||
def decompose_framework_container(
|
||||
obligation_candidate_id: str,
|
||||
parent_control_id: str,
|
||||
obligation_text: str,
|
||||
framework_ref: Optional[str],
|
||||
framework_domain: Optional[str],
|
||||
actor: str = "organization",
|
||||
) -> FrameworkDecompositionResult:
|
||||
"""Decompose a framework-container obligation into concrete sub-obligations.
|
||||
|
||||
Steps:
|
||||
1. Resolve framework from registry
|
||||
2. Resolve domain within framework
|
||||
3. Select relevant subcontrols (keyword filter or full domain)
|
||||
4. Generate decomposed obligations
|
||||
"""
|
||||
container_id = f"FWC-{uuid.uuid4().hex[:8]}"
|
||||
registry = get_registry()
|
||||
issues: list[str] = []
|
||||
|
||||
# Step 1: Resolve framework
|
||||
fw = None
|
||||
if framework_ref and framework_ref in registry:
|
||||
fw = registry[framework_ref]
|
||||
else:
|
||||
# Try to find by name in text
|
||||
fw, framework_ref = _find_framework_in_text(obligation_text, registry)
|
||||
|
||||
if not fw:
|
||||
issues.append("ERROR: framework_not_matched")
|
||||
return FrameworkDecompositionResult(
|
||||
framework_container_id=container_id,
|
||||
source_obligation_candidate_id=obligation_candidate_id,
|
||||
framework_ref=framework_ref,
|
||||
framework_domain=framework_domain,
|
||||
domain_title=None,
|
||||
matched_subcontrols=[],
|
||||
decomposition_confidence=0.0,
|
||||
release_state="unmatched",
|
||||
decomposed_obligations=[],
|
||||
issues=issues,
|
||||
)
|
||||
|
||||
# Step 2: Resolve domain
|
||||
domain_data = None
|
||||
domain_title = None
|
||||
if framework_domain:
|
||||
for d in fw.get("domains", []):
|
||||
if d["domain_id"].lower() == framework_domain.lower():
|
||||
domain_data = d
|
||||
domain_title = d.get("title")
|
||||
break
|
||||
if not domain_data:
|
||||
# Try matching from text
|
||||
domain_id, domain_title = _match_domain(obligation_text, fw)
|
||||
if domain_id:
|
||||
for d in fw.get("domains", []):
|
||||
if d["domain_id"] == domain_id:
|
||||
domain_data = d
|
||||
framework_domain = domain_id
|
||||
break
|
||||
|
||||
if not domain_data:
|
||||
issues.append("WARN: domain_not_matched — using all domains")
|
||||
# Fall back to all subcontrols across all domains
|
||||
all_subcontrols = []
|
||||
for d in fw.get("domains", []):
|
||||
for sc in d.get("subcontrols", []):
|
||||
sc["_domain_id"] = d["domain_id"]
|
||||
all_subcontrols.append(sc)
|
||||
subcontrols = _select_subcontrols(obligation_text, all_subcontrols)
|
||||
if not subcontrols:
|
||||
issues.append("ERROR: no_subcontrols_matched")
|
||||
return FrameworkDecompositionResult(
|
||||
framework_container_id=container_id,
|
||||
source_obligation_candidate_id=obligation_candidate_id,
|
||||
framework_ref=framework_ref,
|
||||
framework_domain=framework_domain,
|
||||
domain_title=None,
|
||||
matched_subcontrols=[],
|
||||
decomposition_confidence=0.0,
|
||||
release_state="unmatched",
|
||||
decomposed_obligations=[],
|
||||
issues=issues,
|
||||
)
|
||||
else:
|
||||
# Step 3: Select subcontrols from domain
|
||||
raw_subcontrols = domain_data.get("subcontrols", [])
|
||||
subcontrols = _select_subcontrols(obligation_text, raw_subcontrols)
|
||||
if not subcontrols:
|
||||
# Full domain decomposition
|
||||
subcontrols = raw_subcontrols
|
||||
|
||||
# Quality check: too many subcontrols
|
||||
if len(subcontrols) > 25:
|
||||
issues.append(f"WARN: {len(subcontrols)} subcontrols — may be too broad")
|
||||
|
||||
# Step 4: Generate decomposed obligations
|
||||
display_name = fw.get("display_name", framework_ref or "Unknown")
|
||||
decomposed: list[DecomposedObligation] = []
|
||||
matched_ids: list[str] = []
|
||||
|
||||
for sc in subcontrols:
|
||||
sc_id = sc.get("subcontrol_id", "")
|
||||
matched_ids.append(sc_id)
|
||||
|
||||
action_hint = sc.get("action_hint", "")
|
||||
object_hint = sc.get("object_hint", "")
|
||||
|
||||
# Quality warnings
|
||||
if not action_hint:
|
||||
issues.append(f"WARN: {sc_id} missing action_hint")
|
||||
if not object_hint:
|
||||
issues.append(f"WARN: {sc_id} missing object_hint")
|
||||
|
||||
obl_id = f"{obligation_candidate_id}-{sc_id}"
|
||||
|
||||
decomposed.append(DecomposedObligation(
|
||||
obligation_candidate_id=obl_id,
|
||||
parent_control_id=parent_control_id,
|
||||
parent_framework_container_id=container_id,
|
||||
source_ref_law=display_name,
|
||||
source_ref_article=sc_id,
|
||||
obligation_text=sc.get("statement", ""),
|
||||
actor=actor,
|
||||
action_raw=action_hint or _infer_action(sc.get("statement", "")),
|
||||
object_raw=object_hint or _infer_object(sc.get("statement", "")),
|
||||
routing_type="atomic",
|
||||
release_state="decomposed",
|
||||
subcontrol_id=sc_id,
|
||||
action_hint=action_hint,
|
||||
object_hint=object_hint,
|
||||
object_class=sc.get("object_class", ""),
|
||||
keywords=sc.get("keywords", []),
|
||||
))
|
||||
|
||||
# Check if decomposed are identical to container
|
||||
for d in decomposed:
|
||||
if d.obligation_text.strip() == obligation_text.strip():
|
||||
issues.append(f"WARN: {d.subcontrol_id} identical to container text")
|
||||
|
||||
confidence = _compute_decomposition_confidence(
|
||||
framework_ref, framework_domain, domain_data, len(subcontrols), issues,
|
||||
)
|
||||
|
||||
return FrameworkDecompositionResult(
|
||||
framework_container_id=container_id,
|
||||
source_obligation_candidate_id=obligation_candidate_id,
|
||||
framework_ref=framework_ref,
|
||||
framework_domain=framework_domain,
|
||||
domain_title=domain_title,
|
||||
matched_subcontrols=matched_ids,
|
||||
decomposition_confidence=confidence,
|
||||
release_state="decomposed",
|
||||
decomposed_obligations=decomposed,
|
||||
issues=issues,
|
||||
)
|
||||
|
||||
|
||||
def _find_framework_in_text(
|
||||
text: str, registry: dict[str, dict],
|
||||
) -> tuple[Optional[dict], Optional[str]]:
|
||||
"""Try to find a framework by searching text for known names."""
|
||||
alias_idx = _build_alias_index(registry)
|
||||
m = _DIRECT_FRAMEWORK_RE.search(text)
|
||||
if m:
|
||||
fw_id = _resolve_framework_id(m.group(0), alias_idx, registry)
|
||||
if fw_id and fw_id in registry:
|
||||
return registry[fw_id], fw_id
|
||||
return None, None
|
||||
|
||||
|
||||
def _select_subcontrols(
|
||||
obligation_text: str, subcontrols: list[dict],
|
||||
) -> list[dict]:
|
||||
"""Select relevant subcontrols based on keyword matching.
|
||||
|
||||
Returns empty list if no targeted match found (caller falls back to
|
||||
full domain).
|
||||
"""
|
||||
text_lower = obligation_text.lower()
|
||||
scored: list[tuple[int, dict]] = []
|
||||
|
||||
for sc in subcontrols:
|
||||
score = 0
|
||||
for kw in sc.get("keywords", []):
|
||||
if kw.lower() in text_lower:
|
||||
score += 1
|
||||
# Title match
|
||||
title = sc.get("title", "").lower()
|
||||
if title and title in text_lower:
|
||||
score += 3
|
||||
# Object hint in text
|
||||
obj = sc.get("object_hint", "").lower()
|
||||
if obj and obj in text_lower:
|
||||
score += 2
|
||||
|
||||
if score > 0:
|
||||
scored.append((score, sc))
|
||||
|
||||
if not scored:
|
||||
return []
|
||||
|
||||
# Only return those with meaningful overlap (score >= 2)
|
||||
scored.sort(key=lambda x: x[0], reverse=True)
|
||||
return [sc for score, sc in scored if score >= 2]
|
||||
|
||||
|
||||
def _infer_action(statement: str) -> str:
|
||||
"""Infer a basic action verb from a statement."""
|
||||
s = statement.lower()
|
||||
if any(w in s for w in ["definiert", "definieren", "define"]):
|
||||
return "definieren"
|
||||
if any(w in s for w in ["implementiert", "implementieren", "implement"]):
|
||||
return "implementieren"
|
||||
if any(w in s for w in ["dokumentiert", "dokumentieren", "document"]):
|
||||
return "dokumentieren"
|
||||
if any(w in s for w in ["ueberwacht", "ueberwachen", "monitor"]):
|
||||
return "ueberwachen"
|
||||
if any(w in s for w in ["getestet", "testen", "test"]):
|
||||
return "testen"
|
||||
if any(w in s for w in ["geschuetzt", "schuetzen", "protect"]):
|
||||
return "implementieren"
|
||||
if any(w in s for w in ["verwaltet", "verwalten", "manage"]):
|
||||
return "pflegen"
|
||||
if any(w in s for w in ["gemeldet", "melden", "report"]):
|
||||
return "melden"
|
||||
return "implementieren"
|
||||
|
||||
|
||||
def _infer_object(statement: str) -> str:
|
||||
"""Infer the primary object from a statement (first noun phrase)."""
|
||||
# Simple heuristic: take the text after "muessen"/"muss" up to the verb
|
||||
m = re.search(
|
||||
r"(?:muessen|muss|m(?:ü|ue)ssen)\s+(.+?)(?:\s+werden|\s+sein|\.|,|$)",
|
||||
statement,
|
||||
re.IGNORECASE,
|
||||
)
|
||||
if m:
|
||||
return m.group(1).strip()[:80]
|
||||
# Fallback: first 80 chars
|
||||
return statement[:80] if statement else ""
|
||||
|
||||
|
||||
def _compute_decomposition_confidence(
|
||||
framework_ref: Optional[str],
|
||||
domain: Optional[str],
|
||||
domain_data: Optional[dict],
|
||||
num_subcontrols: int,
|
||||
issues: list[str],
|
||||
) -> float:
|
||||
"""Compute confidence score for the decomposition."""
|
||||
score = 0.3
|
||||
if framework_ref:
|
||||
score += 0.25
|
||||
if domain:
|
||||
score += 0.20
|
||||
if domain_data:
|
||||
score += 0.10
|
||||
if 1 <= num_subcontrols <= 15:
|
||||
score += 0.10
|
||||
elif num_subcontrols > 15:
|
||||
score += 0.05 # less confident with too many
|
||||
|
||||
# Penalize errors
|
||||
errors = sum(1 for i in issues if i.startswith("ERROR:"))
|
||||
score -= errors * 0.15
|
||||
return round(max(min(score, 1.0), 0.0), 2)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Registry statistics (for admin/debugging)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def registry_stats() -> dict:
|
||||
"""Return summary statistics about the loaded registry."""
|
||||
reg = get_registry()
|
||||
stats = {
|
||||
"frameworks": len(reg),
|
||||
"details": [],
|
||||
}
|
||||
total_domains = 0
|
||||
total_subcontrols = 0
|
||||
for fw_id, fw in reg.items():
|
||||
domains = fw.get("domains", [])
|
||||
n_sc = sum(len(d.get("subcontrols", [])) for d in domains)
|
||||
total_domains += len(domains)
|
||||
total_subcontrols += n_sc
|
||||
stats["details"].append({
|
||||
"framework_id": fw_id,
|
||||
"display_name": fw.get("display_name", ""),
|
||||
"domains": len(domains),
|
||||
"subcontrols": n_sc,
|
||||
})
|
||||
stats["total_domains"] = total_domains
|
||||
stats["total_subcontrols"] = total_subcontrols
|
||||
return stats
|
||||
@@ -173,6 +173,7 @@ class LLMProviderType(str, Enum):
|
||||
"""Supported LLM provider types."""
|
||||
ANTHROPIC = "anthropic"
|
||||
SELF_HOSTED = "self_hosted"
|
||||
OLLAMA = "ollama" # Alias for self_hosted (Ollama-specific)
|
||||
MOCK = "mock" # For testing
|
||||
|
||||
|
||||
@@ -392,6 +393,7 @@ class SelfHostedProvider(LLMProvider):
|
||||
"model": self.model,
|
||||
"prompt": full_prompt,
|
||||
"stream": False,
|
||||
"think": False, # Disable thinking mode (qwen3.5 etc.)
|
||||
"options": {}
|
||||
}
|
||||
|
||||
@@ -549,7 +551,7 @@ def get_llm_config() -> LLMConfig:
|
||||
vault_path="breakpilot/api_keys/anthropic",
|
||||
env_var="ANTHROPIC_API_KEY"
|
||||
)
|
||||
elif provider_type == LLMProviderType.SELF_HOSTED:
|
||||
elif provider_type in (LLMProviderType.SELF_HOSTED, LLMProviderType.OLLAMA):
|
||||
api_key = get_secret_from_vault_or_env(
|
||||
vault_path="breakpilot/api_keys/self_hosted_llm",
|
||||
env_var="SELF_HOSTED_LLM_KEY"
|
||||
@@ -558,7 +560,7 @@ def get_llm_config() -> LLMConfig:
|
||||
# Select model based on provider type
|
||||
if provider_type == LLMProviderType.ANTHROPIC:
|
||||
model = os.getenv("ANTHROPIC_MODEL", "claude-sonnet-4-20250514")
|
||||
elif provider_type == LLMProviderType.SELF_HOSTED:
|
||||
elif provider_type in (LLMProviderType.SELF_HOSTED, LLMProviderType.OLLAMA):
|
||||
model = os.getenv("SELF_HOSTED_LLM_MODEL", "qwen2.5:14b")
|
||||
else:
|
||||
model = "mock-model"
|
||||
@@ -591,7 +593,7 @@ def get_llm_provider(config: Optional[LLMConfig] = None) -> LLMProvider:
|
||||
return MockProvider(config)
|
||||
return AnthropicProvider(config)
|
||||
|
||||
elif config.provider_type == LLMProviderType.SELF_HOSTED:
|
||||
elif config.provider_type in (LLMProviderType.SELF_HOSTED, LLMProviderType.OLLAMA):
|
||||
if not config.base_url:
|
||||
logger.warning("No self-hosted LLM URL found, using mock provider")
|
||||
return MockProvider(config)
|
||||
|
||||
@@ -0,0 +1,59 @@
|
||||
"""Shared normative language patterns for assertion classification.
|
||||
|
||||
Extracted from decomposition_pass.py for reuse in the assertion engine.
|
||||
"""
|
||||
|
||||
import re
|
||||
|
||||
_PFLICHT_SIGNALS = [
|
||||
r"\bmüssen\b", r"\bmuss\b", r"\bhat\s+sicherzustellen\b",
|
||||
r"\bhaben\s+sicherzustellen\b", r"\bsind\s+verpflichtet\b",
|
||||
r"\bist\s+verpflichtet\b",
|
||||
r"\bist\s+zu\s+\w+en\b", r"\bsind\s+zu\s+\w+en\b",
|
||||
r"\bhat\s+zu\s+\w+en\b", r"\bhaben\s+zu\s+\w+en\b",
|
||||
r"\bist\s+\w+zu\w+en\b", r"\bsind\s+\w+zu\w+en\b",
|
||||
r"\bist\s+\w+\s+zu\s+\w+en\b", r"\bsind\s+\w+\s+zu\s+\w+en\b",
|
||||
r"\bhat\s+\w+\s+zu\s+\w+en\b", r"\bhaben\s+\w+\s+zu\s+\w+en\b",
|
||||
r"\bshall\b", r"\bmust\b", r"\brequired\b",
|
||||
r"\b\w+zuteilen\b", r"\b\w+zuwenden\b", r"\b\w+zustellen\b", r"\b\w+zulegen\b",
|
||||
r"\b\w+zunehmen\b", r"\b\w+zuführen\b", r"\b\w+zuhalten\b", r"\b\w+zusetzen\b",
|
||||
r"\b\w+zuweisen\b", r"\b\w+zuordnen\b", r"\b\w+zufügen\b", r"\b\w+zugeben\b",
|
||||
r"\bist\b.{1,80}\bzu\s+\w+en\b", r"\bsind\b.{1,80}\bzu\s+\w+en\b",
|
||||
]
|
||||
PFLICHT_RE = re.compile("|".join(_PFLICHT_SIGNALS), re.IGNORECASE)
|
||||
|
||||
_EMPFEHLUNG_SIGNALS = [
|
||||
r"\bsoll\b", r"\bsollen\b", r"\bsollte\b", r"\bsollten\b",
|
||||
r"\bgewährleisten\b", r"\bsicherstellen\b",
|
||||
r"\bshould\b", r"\bensure\b", r"\brecommend\w*\b",
|
||||
r"\bnachweisen\b", r"\beinhalten\b", r"\bunterlassen\b", r"\bwahren\b",
|
||||
r"\bdokumentieren\b", r"\bimplementieren\b", r"\büberprüfen\b", r"\büberwachen\b",
|
||||
r"\bprüfen,\s+ob\b", r"\bkontrollieren,\s+ob\b",
|
||||
]
|
||||
EMPFEHLUNG_RE = re.compile("|".join(_EMPFEHLUNG_SIGNALS), re.IGNORECASE)
|
||||
|
||||
_KANN_SIGNALS = [
|
||||
r"\bkann\b", r"\bkönnen\b", r"\bdarf\b", r"\bdürfen\b",
|
||||
r"\bmay\b", r"\boptional\b",
|
||||
]
|
||||
KANN_RE = re.compile("|".join(_KANN_SIGNALS), re.IGNORECASE)
|
||||
|
||||
NORMATIVE_RE = re.compile(
|
||||
"|".join(_PFLICHT_SIGNALS + _EMPFEHLUNG_SIGNALS + _KANN_SIGNALS),
|
||||
re.IGNORECASE,
|
||||
)
|
||||
|
||||
_RATIONALE_SIGNALS = [
|
||||
r"\bda\s+", r"\bweil\b", r"\bgrund\b", r"\berwägung",
|
||||
r"\bbecause\b", r"\breason\b", r"\brationale\b",
|
||||
r"\bkönnen\s+.*\s+verursachen\b", r"\bführt\s+zu\b",
|
||||
]
|
||||
RATIONALE_RE = re.compile("|".join(_RATIONALE_SIGNALS), re.IGNORECASE)
|
||||
|
||||
# Evidence-related keywords (for fact detection)
|
||||
_EVIDENCE_KEYWORDS = [
|
||||
r"\bnachweis\b", r"\bzertifikat\b", r"\baudit.report\b",
|
||||
r"\bprotokoll\b", r"\bdokumentation\b", r"\bbericht\b",
|
||||
r"\bcertificate\b", r"\bevidence\b", r"\bproof\b",
|
||||
]
|
||||
EVIDENCE_RE = re.compile("|".join(_EVIDENCE_KEYWORDS), re.IGNORECASE)
|
||||
@@ -0,0 +1,563 @@
|
||||
"""Obligation Extractor — 3-Tier Chunk-to-Obligation Linking.
|
||||
|
||||
Maps RAG chunks to obligations from the v2 obligation framework using
|
||||
three tiers (fastest first):
|
||||
|
||||
Tier 1: EXACT MATCH — regulation_code + article → obligation_id (~40%)
|
||||
Tier 2: EMBEDDING — chunk text vs. obligation descriptions (~30%)
|
||||
Tier 3: LLM EXTRACT — local Ollama extracts obligation text (~25%)
|
||||
|
||||
Part of the Multi-Layer Control Architecture (Phase 4 of 8).
|
||||
"""
|
||||
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import re
|
||||
from dataclasses import dataclass, field
|
||||
from pathlib import Path
|
||||
from typing import Optional
|
||||
|
||||
import httpx
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
EMBEDDING_URL = os.getenv("EMBEDDING_URL", "http://embedding-service:8087")
|
||||
OLLAMA_URL = os.getenv("OLLAMA_URL", "http://host.docker.internal:11434")
|
||||
OLLAMA_MODEL = os.getenv("CONTROL_GEN_OLLAMA_MODEL", "qwen3.5:35b-a3b")
|
||||
LLM_TIMEOUT = float(os.getenv("CONTROL_GEN_LLM_TIMEOUT", "180"))
|
||||
|
||||
# Embedding similarity thresholds for Tier 2
|
||||
EMBEDDING_MATCH_THRESHOLD = 0.80
|
||||
EMBEDDING_CANDIDATE_THRESHOLD = 0.60
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Regulation code mapping: RAG chunk codes → obligation file regulation IDs
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
_REGULATION_CODE_TO_ID = {
|
||||
# DSGVO
|
||||
"eu_2016_679": "dsgvo",
|
||||
"dsgvo": "dsgvo",
|
||||
"gdpr": "dsgvo",
|
||||
# AI Act
|
||||
"eu_2024_1689": "ai_act",
|
||||
"ai_act": "ai_act",
|
||||
"aiact": "ai_act",
|
||||
# NIS2
|
||||
"eu_2022_2555": "nis2",
|
||||
"nis2": "nis2",
|
||||
"bsig": "nis2",
|
||||
# BDSG
|
||||
"bdsg": "bdsg",
|
||||
# TTDSG
|
||||
"ttdsg": "ttdsg",
|
||||
# DSA
|
||||
"eu_2022_2065": "dsa",
|
||||
"dsa": "dsa",
|
||||
# Data Act
|
||||
"eu_2023_2854": "data_act",
|
||||
"data_act": "data_act",
|
||||
# EU Machinery
|
||||
"eu_2023_1230": "eu_machinery",
|
||||
"eu_machinery": "eu_machinery",
|
||||
# DORA
|
||||
"eu_2022_2554": "dora",
|
||||
"dora": "dora",
|
||||
}
|
||||
|
||||
|
||||
@dataclass
|
||||
class ObligationMatch:
|
||||
"""Result of obligation extraction."""
|
||||
|
||||
obligation_id: Optional[str] = None
|
||||
obligation_title: Optional[str] = None
|
||||
obligation_text: Optional[str] = None
|
||||
method: str = "none" # exact_match | embedding_match | llm_extracted | inferred
|
||||
confidence: float = 0.0
|
||||
regulation_id: Optional[str] = None # e.g. "dsgvo"
|
||||
|
||||
def to_dict(self) -> dict:
|
||||
return {
|
||||
"obligation_id": self.obligation_id,
|
||||
"obligation_title": self.obligation_title,
|
||||
"obligation_text": self.obligation_text,
|
||||
"method": self.method,
|
||||
"confidence": self.confidence,
|
||||
"regulation_id": self.regulation_id,
|
||||
}
|
||||
|
||||
|
||||
@dataclass
|
||||
class _ObligationEntry:
|
||||
"""Internal representation of a loaded obligation."""
|
||||
|
||||
id: str
|
||||
title: str
|
||||
description: str
|
||||
regulation_id: str
|
||||
articles: list[str] = field(default_factory=list) # normalized: ["art. 30", "§ 38"]
|
||||
embedding: list[float] = field(default_factory=list)
|
||||
|
||||
|
||||
class ObligationExtractor:
|
||||
"""3-Tier obligation extraction from RAG chunks.
|
||||
|
||||
Usage::
|
||||
|
||||
extractor = ObligationExtractor()
|
||||
await extractor.initialize() # loads obligations + embeddings
|
||||
|
||||
match = await extractor.extract(
|
||||
chunk_text="...",
|
||||
regulation_code="eu_2016_679",
|
||||
article="Art. 30",
|
||||
paragraph="Abs. 1",
|
||||
)
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
self._article_lookup: dict[str, list[str]] = {} # "dsgvo/art. 30" → ["DSGVO-OBL-001"]
|
||||
self._obligations: dict[str, _ObligationEntry] = {} # id → entry
|
||||
self._obligation_embeddings: list[list[float]] = []
|
||||
self._obligation_ids: list[str] = []
|
||||
self._initialized = False
|
||||
|
||||
async def initialize(self) -> None:
|
||||
"""Load all obligations from v2 JSON files and compute embeddings."""
|
||||
if self._initialized:
|
||||
return
|
||||
|
||||
self._load_obligations()
|
||||
await self._compute_embeddings()
|
||||
self._initialized = True
|
||||
logger.info(
|
||||
"ObligationExtractor initialized: %d obligations, %d article lookups, %d embeddings",
|
||||
len(self._obligations),
|
||||
len(self._article_lookup),
|
||||
sum(1 for e in self._obligation_embeddings if e),
|
||||
)
|
||||
|
||||
async def extract(
|
||||
self,
|
||||
chunk_text: str,
|
||||
regulation_code: str,
|
||||
article: Optional[str] = None,
|
||||
paragraph: Optional[str] = None,
|
||||
) -> ObligationMatch:
|
||||
"""Extract obligation from a chunk using 3-tier strategy."""
|
||||
if not self._initialized:
|
||||
await self.initialize()
|
||||
|
||||
reg_id = _normalize_regulation(regulation_code)
|
||||
|
||||
# Tier 1: Exact match via article lookup
|
||||
if article:
|
||||
match = self._tier1_exact(reg_id, article)
|
||||
if match:
|
||||
return match
|
||||
|
||||
# Tier 2: Embedding similarity
|
||||
match = await self._tier2_embedding(chunk_text, reg_id)
|
||||
if match:
|
||||
return match
|
||||
|
||||
# Tier 3: LLM extraction
|
||||
match = await self._tier3_llm(chunk_text, regulation_code, article)
|
||||
return match
|
||||
|
||||
# -----------------------------------------------------------------------
|
||||
# Tier 1: Exact Match
|
||||
# -----------------------------------------------------------------------
|
||||
|
||||
def _tier1_exact(self, reg_id: Optional[str], article: str) -> Optional[ObligationMatch]:
|
||||
"""Look up obligation by regulation + article."""
|
||||
if not reg_id:
|
||||
return None
|
||||
|
||||
norm_article = _normalize_article(article)
|
||||
key = f"{reg_id}/{norm_article}"
|
||||
|
||||
obl_ids = self._article_lookup.get(key)
|
||||
if not obl_ids:
|
||||
return None
|
||||
|
||||
# Take the first match (highest priority)
|
||||
obl_id = obl_ids[0]
|
||||
entry = self._obligations.get(obl_id)
|
||||
if not entry:
|
||||
return None
|
||||
|
||||
return ObligationMatch(
|
||||
obligation_id=entry.id,
|
||||
obligation_title=entry.title,
|
||||
obligation_text=entry.description,
|
||||
method="exact_match",
|
||||
confidence=1.0,
|
||||
regulation_id=reg_id,
|
||||
)
|
||||
|
||||
# -----------------------------------------------------------------------
|
||||
# Tier 2: Embedding Match
|
||||
# -----------------------------------------------------------------------
|
||||
|
||||
async def _tier2_embedding(
|
||||
self, chunk_text: str, reg_id: Optional[str]
|
||||
) -> Optional[ObligationMatch]:
|
||||
"""Find nearest obligation by embedding similarity."""
|
||||
if not self._obligation_embeddings:
|
||||
return None
|
||||
|
||||
chunk_embedding = await _get_embedding(chunk_text[:2000])
|
||||
if not chunk_embedding:
|
||||
return None
|
||||
|
||||
best_idx = -1
|
||||
best_score = 0.0
|
||||
|
||||
for i, obl_emb in enumerate(self._obligation_embeddings):
|
||||
if not obl_emb:
|
||||
continue
|
||||
# Prefer same-regulation matches
|
||||
obl_id = self._obligation_ids[i]
|
||||
entry = self._obligations.get(obl_id)
|
||||
score = _cosine_sim(chunk_embedding, obl_emb)
|
||||
|
||||
# Domain bonus: +0.05 if same regulation
|
||||
if entry and reg_id and entry.regulation_id == reg_id:
|
||||
score += 0.05
|
||||
|
||||
if score > best_score:
|
||||
best_score = score
|
||||
best_idx = i
|
||||
|
||||
if best_idx < 0:
|
||||
return None
|
||||
|
||||
# Remove domain bonus for threshold comparison
|
||||
raw_score = best_score
|
||||
obl_id = self._obligation_ids[best_idx]
|
||||
entry = self._obligations.get(obl_id)
|
||||
if entry and reg_id and entry.regulation_id == reg_id:
|
||||
raw_score -= 0.05
|
||||
|
||||
if raw_score >= EMBEDDING_MATCH_THRESHOLD:
|
||||
return ObligationMatch(
|
||||
obligation_id=entry.id if entry else obl_id,
|
||||
obligation_title=entry.title if entry else None,
|
||||
obligation_text=entry.description if entry else None,
|
||||
method="embedding_match",
|
||||
confidence=round(min(raw_score, 1.0), 3),
|
||||
regulation_id=entry.regulation_id if entry else reg_id,
|
||||
)
|
||||
|
||||
return None
|
||||
|
||||
# -----------------------------------------------------------------------
|
||||
# Tier 3: LLM Extraction
|
||||
# -----------------------------------------------------------------------
|
||||
|
||||
async def _tier3_llm(
|
||||
self, chunk_text: str, regulation_code: str, article: Optional[str]
|
||||
) -> ObligationMatch:
|
||||
"""Use local LLM to extract the obligation from the chunk."""
|
||||
prompt = f"""Analysiere den folgenden Gesetzestext und extrahiere die zentrale rechtliche Pflicht.
|
||||
|
||||
Text:
|
||||
{chunk_text[:3000]}
|
||||
|
||||
Quelle: {regulation_code} {article or ''}
|
||||
|
||||
Antworte NUR als JSON:
|
||||
{{
|
||||
"obligation_text": "Die zentrale Pflicht in einem Satz",
|
||||
"actor": "Wer muss handeln (z.B. Verantwortlicher, Auftragsverarbeiter)",
|
||||
"action": "Was muss getan werden",
|
||||
"normative_strength": "muss|soll|kann"
|
||||
}}"""
|
||||
|
||||
system_prompt = (
|
||||
"Du bist ein Rechtsexperte fuer EU-Datenschutz- und Digitalrecht. "
|
||||
"Extrahiere die zentrale rechtliche Pflicht aus Gesetzestexten. "
|
||||
"Antworte ausschliesslich als JSON."
|
||||
)
|
||||
|
||||
result_text = await _llm_ollama(prompt, system_prompt)
|
||||
if not result_text:
|
||||
return ObligationMatch(
|
||||
method="llm_extracted",
|
||||
confidence=0.0,
|
||||
regulation_id=_normalize_regulation(regulation_code),
|
||||
)
|
||||
|
||||
parsed = _parse_json(result_text)
|
||||
obligation_text = parsed.get("obligation_text", result_text[:500])
|
||||
|
||||
return ObligationMatch(
|
||||
obligation_id=None,
|
||||
obligation_title=None,
|
||||
obligation_text=obligation_text,
|
||||
method="llm_extracted",
|
||||
confidence=0.60,
|
||||
regulation_id=_normalize_regulation(regulation_code),
|
||||
)
|
||||
|
||||
# -----------------------------------------------------------------------
|
||||
# Initialization helpers
|
||||
# -----------------------------------------------------------------------
|
||||
|
||||
def _load_obligations(self) -> None:
|
||||
"""Load all obligation files from v2 framework."""
|
||||
v2_dir = _find_obligations_dir()
|
||||
if not v2_dir:
|
||||
logger.warning("Obligations v2 directory not found — Tier 1 disabled")
|
||||
return
|
||||
|
||||
manifest_path = v2_dir / "_manifest.json"
|
||||
if not manifest_path.exists():
|
||||
logger.warning("Manifest not found at %s", manifest_path)
|
||||
return
|
||||
|
||||
with open(manifest_path) as f:
|
||||
manifest = json.load(f)
|
||||
|
||||
for reg_info in manifest.get("regulations", []):
|
||||
reg_id = reg_info["id"]
|
||||
reg_file = v2_dir / reg_info["file"]
|
||||
if not reg_file.exists():
|
||||
logger.warning("Regulation file not found: %s", reg_file)
|
||||
continue
|
||||
|
||||
with open(reg_file) as f:
|
||||
data = json.load(f)
|
||||
|
||||
for obl in data.get("obligations", []):
|
||||
obl_id = obl["id"]
|
||||
entry = _ObligationEntry(
|
||||
id=obl_id,
|
||||
title=obl.get("title", ""),
|
||||
description=obl.get("description", ""),
|
||||
regulation_id=reg_id,
|
||||
)
|
||||
|
||||
# Build article lookup from legal_basis
|
||||
for basis in obl.get("legal_basis", []):
|
||||
article_raw = basis.get("article", "")
|
||||
if article_raw:
|
||||
norm_art = _normalize_article(article_raw)
|
||||
key = f"{reg_id}/{norm_art}"
|
||||
if key not in self._article_lookup:
|
||||
self._article_lookup[key] = []
|
||||
self._article_lookup[key].append(obl_id)
|
||||
entry.articles.append(norm_art)
|
||||
|
||||
self._obligations[obl_id] = entry
|
||||
|
||||
logger.info(
|
||||
"Loaded %d obligations from %d regulations",
|
||||
len(self._obligations),
|
||||
len(manifest.get("regulations", [])),
|
||||
)
|
||||
|
||||
async def _compute_embeddings(self) -> None:
|
||||
"""Compute embeddings for all obligation descriptions."""
|
||||
if not self._obligations:
|
||||
return
|
||||
|
||||
self._obligation_ids = list(self._obligations.keys())
|
||||
texts = [
|
||||
f"{self._obligations[oid].title}: {self._obligations[oid].description}"
|
||||
for oid in self._obligation_ids
|
||||
]
|
||||
|
||||
logger.info("Computing embeddings for %d obligations...", len(texts))
|
||||
self._obligation_embeddings = await _get_embeddings_batch(texts)
|
||||
valid = sum(1 for e in self._obligation_embeddings if e)
|
||||
logger.info("Got %d/%d valid embeddings", valid, len(texts))
|
||||
|
||||
# -----------------------------------------------------------------------
|
||||
# Stats
|
||||
# -----------------------------------------------------------------------
|
||||
|
||||
def stats(self) -> dict:
|
||||
"""Return initialization statistics."""
|
||||
return {
|
||||
"total_obligations": len(self._obligations),
|
||||
"article_lookups": len(self._article_lookup),
|
||||
"embeddings_valid": sum(1 for e in self._obligation_embeddings if e),
|
||||
"regulations": list(
|
||||
{e.regulation_id for e in self._obligations.values()}
|
||||
),
|
||||
"initialized": self._initialized,
|
||||
}
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Module-level helpers (reusable by other modules)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def _normalize_regulation(regulation_code: str) -> Optional[str]:
|
||||
"""Map a RAG regulation_code to obligation framework regulation ID."""
|
||||
if not regulation_code:
|
||||
return None
|
||||
code = regulation_code.lower().strip()
|
||||
|
||||
# Direct lookup
|
||||
if code in _REGULATION_CODE_TO_ID:
|
||||
return _REGULATION_CODE_TO_ID[code]
|
||||
|
||||
# Prefix matching for families
|
||||
for prefix, reg_id in [
|
||||
("eu_2016_679", "dsgvo"),
|
||||
("eu_2024_1689", "ai_act"),
|
||||
("eu_2022_2555", "nis2"),
|
||||
("eu_2022_2065", "dsa"),
|
||||
("eu_2023_2854", "data_act"),
|
||||
("eu_2023_1230", "eu_machinery"),
|
||||
("eu_2022_2554", "dora"),
|
||||
]:
|
||||
if code.startswith(prefix):
|
||||
return reg_id
|
||||
|
||||
return None
|
||||
|
||||
|
||||
def _normalize_article(article: str) -> str:
|
||||
"""Normalize article references for consistent lookup.
|
||||
|
||||
Examples:
|
||||
"Art. 30" → "art. 30"
|
||||
"§ 38 BDSG" → "§ 38"
|
||||
"Article 10" → "art. 10"
|
||||
"Art. 30 Abs. 1" → "art. 30"
|
||||
"Artikel 35" → "art. 35"
|
||||
"""
|
||||
if not article:
|
||||
return ""
|
||||
s = article.strip()
|
||||
|
||||
# Remove trailing law name: "§ 38 BDSG" → "§ 38"
|
||||
s = re.sub(r"\s+(DSGVO|BDSG|TTDSG|DSA|NIS2|DORA|AI.?Act)\s*$", "", s, flags=re.IGNORECASE)
|
||||
|
||||
# Remove paragraph references: "Art. 30 Abs. 1" → "Art. 30"
|
||||
s = re.sub(r"\s+(Abs|Absatz|para|paragraph|lit|Satz)\.?\s+.*$", "", s, flags=re.IGNORECASE)
|
||||
|
||||
# Normalize "Article" / "Artikel" → "Art."
|
||||
s = re.sub(r"^(Article|Artikel)\s+", "Art. ", s, flags=re.IGNORECASE)
|
||||
|
||||
return s.lower().strip()
|
||||
|
||||
|
||||
def _cosine_sim(a: list[float], b: list[float]) -> float:
|
||||
"""Compute cosine similarity between two vectors."""
|
||||
if not a or not b or len(a) != len(b):
|
||||
return 0.0
|
||||
dot = sum(x * y for x, y in zip(a, b))
|
||||
norm_a = sum(x * x for x in a) ** 0.5
|
||||
norm_b = sum(x * x for x in b) ** 0.5
|
||||
if norm_a == 0 or norm_b == 0:
|
||||
return 0.0
|
||||
return dot / (norm_a * norm_b)
|
||||
|
||||
|
||||
def _find_obligations_dir() -> Optional[Path]:
|
||||
"""Locate the obligations v2 directory."""
|
||||
candidates = [
|
||||
Path(__file__).resolve().parent.parent.parent.parent
|
||||
/ "ai-compliance-sdk" / "policies" / "obligations" / "v2",
|
||||
Path("/app/ai-compliance-sdk/policies/obligations/v2"),
|
||||
Path("ai-compliance-sdk/policies/obligations/v2"),
|
||||
]
|
||||
for p in candidates:
|
||||
if p.is_dir() and (p / "_manifest.json").exists():
|
||||
return p
|
||||
return None
|
||||
|
||||
|
||||
async def _get_embedding(text: str) -> list[float]:
|
||||
"""Get embedding vector for a single text."""
|
||||
try:
|
||||
async with httpx.AsyncClient(timeout=10.0) as client:
|
||||
resp = await client.post(
|
||||
f"{EMBEDDING_URL}/embed",
|
||||
json={"texts": [text]},
|
||||
)
|
||||
resp.raise_for_status()
|
||||
embeddings = resp.json().get("embeddings", [])
|
||||
return embeddings[0] if embeddings else []
|
||||
except Exception:
|
||||
return []
|
||||
|
||||
|
||||
async def _get_embeddings_batch(
|
||||
texts: list[str], batch_size: int = 32
|
||||
) -> list[list[float]]:
|
||||
"""Get embeddings for multiple texts in batches."""
|
||||
all_embeddings: list[list[float]] = []
|
||||
for i in range(0, len(texts), batch_size):
|
||||
batch = texts[i : i + batch_size]
|
||||
try:
|
||||
async with httpx.AsyncClient(timeout=30.0) as client:
|
||||
resp = await client.post(
|
||||
f"{EMBEDDING_URL}/embed",
|
||||
json={"texts": batch},
|
||||
)
|
||||
resp.raise_for_status()
|
||||
embeddings = resp.json().get("embeddings", [])
|
||||
all_embeddings.extend(embeddings)
|
||||
except Exception as e:
|
||||
logger.warning("Batch embedding failed for %d texts: %s", len(batch), e)
|
||||
all_embeddings.extend([[] for _ in batch])
|
||||
return all_embeddings
|
||||
|
||||
|
||||
async def _llm_ollama(prompt: str, system_prompt: Optional[str] = None) -> str:
|
||||
"""Call local Ollama for LLM extraction."""
|
||||
messages = []
|
||||
if system_prompt:
|
||||
messages.append({"role": "system", "content": system_prompt})
|
||||
messages.append({"role": "user", "content": prompt})
|
||||
|
||||
payload = {
|
||||
"model": OLLAMA_MODEL,
|
||||
"messages": messages,
|
||||
"stream": False,
|
||||
"format": "json",
|
||||
"options": {"num_predict": 512},
|
||||
"think": False,
|
||||
}
|
||||
|
||||
try:
|
||||
async with httpx.AsyncClient(timeout=LLM_TIMEOUT) as client:
|
||||
resp = await client.post(f"{OLLAMA_URL}/api/chat", json=payload)
|
||||
if resp.status_code != 200:
|
||||
logger.error(
|
||||
"Ollama chat failed %d: %s", resp.status_code, resp.text[:300]
|
||||
)
|
||||
return ""
|
||||
data = resp.json()
|
||||
return data.get("message", {}).get("content", "")
|
||||
except Exception as e:
|
||||
logger.warning("Ollama call failed: %s", e)
|
||||
return ""
|
||||
|
||||
|
||||
def _parse_json(text: str) -> dict:
|
||||
"""Extract JSON from LLM response text."""
|
||||
# Try direct parse
|
||||
try:
|
||||
return json.loads(text)
|
||||
except json.JSONDecodeError:
|
||||
pass
|
||||
|
||||
# Try extracting JSON block
|
||||
match = re.search(r"\{[^{}]*\}", text, re.DOTALL)
|
||||
if match:
|
||||
try:
|
||||
return json.loads(match.group())
|
||||
except json.JSONDecodeError:
|
||||
pass
|
||||
|
||||
return {}
|
||||
@@ -0,0 +1,532 @@
|
||||
"""Pattern Matcher — Obligation-to-Control-Pattern Linking.
|
||||
|
||||
Maps obligations (from the ObligationExtractor) to control patterns
|
||||
using two tiers:
|
||||
|
||||
Tier 1: KEYWORD MATCH — obligation_match_keywords from patterns (~70%)
|
||||
Tier 2: EMBEDDING — cosine similarity with domain bonus (~25%)
|
||||
|
||||
Part of the Multi-Layer Control Architecture (Phase 5 of 8).
|
||||
"""
|
||||
|
||||
import logging
|
||||
import os
|
||||
from dataclasses import dataclass, field
|
||||
from pathlib import Path
|
||||
from typing import Optional
|
||||
|
||||
import yaml
|
||||
|
||||
from compliance.services.obligation_extractor import (
|
||||
_cosine_sim,
|
||||
_get_embedding,
|
||||
_get_embeddings_batch,
|
||||
)
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Minimum keyword score to accept a match (at least 2 keyword hits)
|
||||
KEYWORD_MATCH_MIN_HITS = 2
|
||||
# Embedding threshold for Tier 2
|
||||
EMBEDDING_PATTERN_THRESHOLD = 0.75
|
||||
# Domain bonus when regulation maps to the pattern's domain
|
||||
DOMAIN_BONUS = 0.10
|
||||
|
||||
# Map regulation IDs to pattern domains that are likely relevant
|
||||
_REGULATION_DOMAIN_AFFINITY = {
|
||||
"dsgvo": ["DATA", "COMP", "GOV"],
|
||||
"bdsg": ["DATA", "COMP"],
|
||||
"ttdsg": ["DATA"],
|
||||
"ai_act": ["AI", "COMP", "DATA"],
|
||||
"nis2": ["SEC", "INC", "NET", "LOG", "CRYP"],
|
||||
"dsa": ["DATA", "COMP"],
|
||||
"data_act": ["DATA", "COMP"],
|
||||
"eu_machinery": ["SEC", "COMP"],
|
||||
"dora": ["SEC", "INC", "FIN", "COMP"],
|
||||
}
|
||||
|
||||
|
||||
@dataclass
|
||||
class ControlPattern:
|
||||
"""Python representation of a control pattern from YAML."""
|
||||
|
||||
id: str
|
||||
name: str
|
||||
name_de: str
|
||||
domain: str
|
||||
category: str
|
||||
description: str
|
||||
objective_template: str
|
||||
rationale_template: str
|
||||
requirements_template: list[str] = field(default_factory=list)
|
||||
test_procedure_template: list[str] = field(default_factory=list)
|
||||
evidence_template: list[str] = field(default_factory=list)
|
||||
severity_default: str = "medium"
|
||||
implementation_effort_default: str = "m"
|
||||
obligation_match_keywords: list[str] = field(default_factory=list)
|
||||
tags: list[str] = field(default_factory=list)
|
||||
composable_with: list[str] = field(default_factory=list)
|
||||
open_anchor_refs: list[dict] = field(default_factory=list)
|
||||
|
||||
|
||||
@dataclass
|
||||
class PatternMatchResult:
|
||||
"""Result of pattern matching."""
|
||||
|
||||
pattern: Optional[ControlPattern] = None
|
||||
pattern_id: Optional[str] = None
|
||||
method: str = "none" # keyword | embedding | combined | none
|
||||
confidence: float = 0.0
|
||||
keyword_hits: int = 0
|
||||
total_keywords: int = 0
|
||||
embedding_score: float = 0.0
|
||||
domain_bonus_applied: bool = False
|
||||
composable_patterns: list[str] = field(default_factory=list)
|
||||
|
||||
def to_dict(self) -> dict:
|
||||
return {
|
||||
"pattern_id": self.pattern_id,
|
||||
"method": self.method,
|
||||
"confidence": round(self.confidence, 3),
|
||||
"keyword_hits": self.keyword_hits,
|
||||
"total_keywords": self.total_keywords,
|
||||
"embedding_score": round(self.embedding_score, 3),
|
||||
"domain_bonus_applied": self.domain_bonus_applied,
|
||||
"composable_patterns": self.composable_patterns,
|
||||
}
|
||||
|
||||
|
||||
class PatternMatcher:
|
||||
"""Links obligations to control patterns using keyword + embedding matching.
|
||||
|
||||
Usage::
|
||||
|
||||
matcher = PatternMatcher()
|
||||
await matcher.initialize()
|
||||
|
||||
result = await matcher.match(
|
||||
obligation_text="Fuehrung eines Verarbeitungsverzeichnisses...",
|
||||
regulation_id="dsgvo",
|
||||
)
|
||||
print(result.pattern_id) # e.g. "CP-COMP-001"
|
||||
print(result.confidence) # e.g. 0.85
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
self._patterns: list[ControlPattern] = []
|
||||
self._by_id: dict[str, ControlPattern] = {}
|
||||
self._by_domain: dict[str, list[ControlPattern]] = {}
|
||||
self._keyword_index: dict[str, list[str]] = {} # keyword → [pattern_ids]
|
||||
self._pattern_embeddings: list[list[float]] = []
|
||||
self._pattern_ids: list[str] = []
|
||||
self._initialized = False
|
||||
|
||||
async def initialize(self) -> None:
|
||||
"""Load patterns from YAML and compute embeddings."""
|
||||
if self._initialized:
|
||||
return
|
||||
|
||||
self._load_patterns()
|
||||
self._build_keyword_index()
|
||||
await self._compute_embeddings()
|
||||
self._initialized = True
|
||||
logger.info(
|
||||
"PatternMatcher initialized: %d patterns, %d keywords, %d embeddings",
|
||||
len(self._patterns),
|
||||
len(self._keyword_index),
|
||||
sum(1 for e in self._pattern_embeddings if e),
|
||||
)
|
||||
|
||||
async def match(
|
||||
self,
|
||||
obligation_text: str,
|
||||
regulation_id: Optional[str] = None,
|
||||
top_n: int = 1,
|
||||
) -> PatternMatchResult:
|
||||
"""Match obligation text to the best control pattern.
|
||||
|
||||
Args:
|
||||
obligation_text: The obligation description to match against.
|
||||
regulation_id: Source regulation (for domain bonus).
|
||||
top_n: Number of top results to consider for composability.
|
||||
|
||||
Returns:
|
||||
PatternMatchResult with the best match.
|
||||
"""
|
||||
if not self._initialized:
|
||||
await self.initialize()
|
||||
|
||||
if not obligation_text or not self._patterns:
|
||||
return PatternMatchResult()
|
||||
|
||||
# Tier 1: Keyword matching
|
||||
keyword_result = self._tier1_keyword(obligation_text, regulation_id)
|
||||
|
||||
# Tier 2: Embedding matching
|
||||
embedding_result = await self._tier2_embedding(obligation_text, regulation_id)
|
||||
|
||||
# Combine scores: prefer keyword match, boost with embedding if available
|
||||
best = self._combine_results(keyword_result, embedding_result)
|
||||
|
||||
# Attach composable patterns
|
||||
if best.pattern:
|
||||
best.composable_patterns = [
|
||||
pid for pid in best.pattern.composable_with
|
||||
if pid in self._by_id
|
||||
]
|
||||
|
||||
return best
|
||||
|
||||
async def match_top_n(
|
||||
self,
|
||||
obligation_text: str,
|
||||
regulation_id: Optional[str] = None,
|
||||
n: int = 3,
|
||||
) -> list[PatternMatchResult]:
|
||||
"""Return top-N pattern matches sorted by confidence descending."""
|
||||
if not self._initialized:
|
||||
await self.initialize()
|
||||
|
||||
if not obligation_text or not self._patterns:
|
||||
return []
|
||||
|
||||
keyword_scores = self._keyword_scores(obligation_text, regulation_id)
|
||||
embedding_scores = await self._embedding_scores(obligation_text, regulation_id)
|
||||
|
||||
# Merge scores
|
||||
all_pattern_ids = set(keyword_scores.keys()) | set(embedding_scores.keys())
|
||||
results: list[PatternMatchResult] = []
|
||||
|
||||
for pid in all_pattern_ids:
|
||||
pattern = self._by_id.get(pid)
|
||||
if not pattern:
|
||||
continue
|
||||
|
||||
kw_score = keyword_scores.get(pid, (0, 0, 0.0)) # (hits, total, score)
|
||||
emb_score = embedding_scores.get(pid, (0.0, False)) # (score, bonus_applied)
|
||||
|
||||
kw_hits, kw_total, kw_confidence = kw_score
|
||||
emb_confidence, bonus_applied = emb_score
|
||||
|
||||
# Combined confidence: max of keyword and embedding, with boost if both
|
||||
if kw_confidence > 0 and emb_confidence > 0:
|
||||
combined = max(kw_confidence, emb_confidence) + 0.05
|
||||
method = "combined"
|
||||
elif kw_confidence > 0:
|
||||
combined = kw_confidence
|
||||
method = "keyword"
|
||||
else:
|
||||
combined = emb_confidence
|
||||
method = "embedding"
|
||||
|
||||
results.append(PatternMatchResult(
|
||||
pattern=pattern,
|
||||
pattern_id=pid,
|
||||
method=method,
|
||||
confidence=min(combined, 1.0),
|
||||
keyword_hits=kw_hits,
|
||||
total_keywords=kw_total,
|
||||
embedding_score=emb_confidence,
|
||||
domain_bonus_applied=bonus_applied,
|
||||
composable_patterns=[
|
||||
p for p in pattern.composable_with if p in self._by_id
|
||||
],
|
||||
))
|
||||
|
||||
# Sort by confidence descending
|
||||
results.sort(key=lambda r: r.confidence, reverse=True)
|
||||
return results[:n]
|
||||
|
||||
# -----------------------------------------------------------------------
|
||||
# Tier 1: Keyword Match
|
||||
# -----------------------------------------------------------------------
|
||||
|
||||
def _tier1_keyword(
|
||||
self, obligation_text: str, regulation_id: Optional[str]
|
||||
) -> Optional[PatternMatchResult]:
|
||||
"""Match by counting keyword hits in the obligation text."""
|
||||
scores = self._keyword_scores(obligation_text, regulation_id)
|
||||
if not scores:
|
||||
return None
|
||||
|
||||
# Find best match
|
||||
best_pid = max(scores, key=lambda pid: scores[pid][2])
|
||||
hits, total, confidence = scores[best_pid]
|
||||
|
||||
if hits < KEYWORD_MATCH_MIN_HITS:
|
||||
return None
|
||||
|
||||
pattern = self._by_id.get(best_pid)
|
||||
if not pattern:
|
||||
return None
|
||||
|
||||
# Check domain bonus
|
||||
bonus_applied = False
|
||||
if regulation_id and self._domain_matches(pattern.domain, regulation_id):
|
||||
confidence = min(confidence + DOMAIN_BONUS, 1.0)
|
||||
bonus_applied = True
|
||||
|
||||
return PatternMatchResult(
|
||||
pattern=pattern,
|
||||
pattern_id=best_pid,
|
||||
method="keyword",
|
||||
confidence=confidence,
|
||||
keyword_hits=hits,
|
||||
total_keywords=total,
|
||||
domain_bonus_applied=bonus_applied,
|
||||
)
|
||||
|
||||
def _keyword_scores(
|
||||
self, text: str, regulation_id: Optional[str]
|
||||
) -> dict[str, tuple[int, int, float]]:
|
||||
"""Compute keyword match scores for all patterns.
|
||||
|
||||
Returns dict: pattern_id → (hits, total_keywords, confidence).
|
||||
"""
|
||||
text_lower = text.lower()
|
||||
hits_by_pattern: dict[str, int] = {}
|
||||
|
||||
for keyword, pattern_ids in self._keyword_index.items():
|
||||
if keyword in text_lower:
|
||||
for pid in pattern_ids:
|
||||
hits_by_pattern[pid] = hits_by_pattern.get(pid, 0) + 1
|
||||
|
||||
result: dict[str, tuple[int, int, float]] = {}
|
||||
for pid, hits in hits_by_pattern.items():
|
||||
pattern = self._by_id.get(pid)
|
||||
if not pattern:
|
||||
continue
|
||||
total = len(pattern.obligation_match_keywords)
|
||||
confidence = hits / total if total > 0 else 0.0
|
||||
result[pid] = (hits, total, confidence)
|
||||
|
||||
return result
|
||||
|
||||
# -----------------------------------------------------------------------
|
||||
# Tier 2: Embedding Match
|
||||
# -----------------------------------------------------------------------
|
||||
|
||||
async def _tier2_embedding(
|
||||
self, obligation_text: str, regulation_id: Optional[str]
|
||||
) -> Optional[PatternMatchResult]:
|
||||
"""Match by embedding similarity against pattern objective_templates."""
|
||||
scores = await self._embedding_scores(obligation_text, regulation_id)
|
||||
if not scores:
|
||||
return None
|
||||
|
||||
best_pid = max(scores, key=lambda pid: scores[pid][0])
|
||||
emb_score, bonus_applied = scores[best_pid]
|
||||
|
||||
if emb_score < EMBEDDING_PATTERN_THRESHOLD:
|
||||
return None
|
||||
|
||||
pattern = self._by_id.get(best_pid)
|
||||
if not pattern:
|
||||
return None
|
||||
|
||||
return PatternMatchResult(
|
||||
pattern=pattern,
|
||||
pattern_id=best_pid,
|
||||
method="embedding",
|
||||
confidence=min(emb_score, 1.0),
|
||||
embedding_score=emb_score,
|
||||
domain_bonus_applied=bonus_applied,
|
||||
)
|
||||
|
||||
async def _embedding_scores(
|
||||
self, obligation_text: str, regulation_id: Optional[str]
|
||||
) -> dict[str, tuple[float, bool]]:
|
||||
"""Compute embedding similarity scores for all patterns.
|
||||
|
||||
Returns dict: pattern_id → (score, domain_bonus_applied).
|
||||
"""
|
||||
if not self._pattern_embeddings:
|
||||
return {}
|
||||
|
||||
chunk_embedding = await _get_embedding(obligation_text[:2000])
|
||||
if not chunk_embedding:
|
||||
return {}
|
||||
|
||||
result: dict[str, tuple[float, bool]] = {}
|
||||
for i, pat_emb in enumerate(self._pattern_embeddings):
|
||||
if not pat_emb:
|
||||
continue
|
||||
pid = self._pattern_ids[i]
|
||||
pattern = self._by_id.get(pid)
|
||||
if not pattern:
|
||||
continue
|
||||
|
||||
score = _cosine_sim(chunk_embedding, pat_emb)
|
||||
|
||||
# Domain bonus
|
||||
bonus_applied = False
|
||||
if regulation_id and self._domain_matches(pattern.domain, regulation_id):
|
||||
score += DOMAIN_BONUS
|
||||
bonus_applied = True
|
||||
|
||||
result[pid] = (score, bonus_applied)
|
||||
|
||||
return result
|
||||
|
||||
# -----------------------------------------------------------------------
|
||||
# Score combination
|
||||
# -----------------------------------------------------------------------
|
||||
|
||||
def _combine_results(
|
||||
self,
|
||||
keyword_result: Optional[PatternMatchResult],
|
||||
embedding_result: Optional[PatternMatchResult],
|
||||
) -> PatternMatchResult:
|
||||
"""Combine keyword and embedding results into the best match."""
|
||||
if not keyword_result and not embedding_result:
|
||||
return PatternMatchResult()
|
||||
|
||||
if not keyword_result:
|
||||
return embedding_result
|
||||
if not embedding_result:
|
||||
return keyword_result
|
||||
|
||||
# Both matched — check if they agree
|
||||
if keyword_result.pattern_id == embedding_result.pattern_id:
|
||||
# Same pattern: boost confidence
|
||||
combined_confidence = min(
|
||||
max(keyword_result.confidence, embedding_result.confidence) + 0.05,
|
||||
1.0,
|
||||
)
|
||||
return PatternMatchResult(
|
||||
pattern=keyword_result.pattern,
|
||||
pattern_id=keyword_result.pattern_id,
|
||||
method="combined",
|
||||
confidence=combined_confidence,
|
||||
keyword_hits=keyword_result.keyword_hits,
|
||||
total_keywords=keyword_result.total_keywords,
|
||||
embedding_score=embedding_result.embedding_score,
|
||||
domain_bonus_applied=(
|
||||
keyword_result.domain_bonus_applied
|
||||
or embedding_result.domain_bonus_applied
|
||||
),
|
||||
)
|
||||
|
||||
# Different patterns: pick the one with higher confidence
|
||||
if keyword_result.confidence >= embedding_result.confidence:
|
||||
return keyword_result
|
||||
return embedding_result
|
||||
|
||||
# -----------------------------------------------------------------------
|
||||
# Domain affinity
|
||||
# -----------------------------------------------------------------------
|
||||
|
||||
@staticmethod
|
||||
def _domain_matches(pattern_domain: str, regulation_id: str) -> bool:
|
||||
"""Check if a pattern's domain has affinity with a regulation."""
|
||||
affine_domains = _REGULATION_DOMAIN_AFFINITY.get(regulation_id, [])
|
||||
return pattern_domain in affine_domains
|
||||
|
||||
# -----------------------------------------------------------------------
|
||||
# Initialization helpers
|
||||
# -----------------------------------------------------------------------
|
||||
|
||||
def _load_patterns(self) -> None:
|
||||
"""Load control patterns from YAML files."""
|
||||
patterns_dir = _find_patterns_dir()
|
||||
if not patterns_dir:
|
||||
logger.warning("Control patterns directory not found")
|
||||
return
|
||||
|
||||
for yaml_file in sorted(patterns_dir.glob("*.yaml")):
|
||||
if yaml_file.name.startswith("_"):
|
||||
continue
|
||||
try:
|
||||
with open(yaml_file) as f:
|
||||
data = yaml.safe_load(f)
|
||||
if not data or "patterns" not in data:
|
||||
continue
|
||||
for p in data["patterns"]:
|
||||
pattern = ControlPattern(
|
||||
id=p["id"],
|
||||
name=p["name"],
|
||||
name_de=p["name_de"],
|
||||
domain=p["domain"],
|
||||
category=p["category"],
|
||||
description=p["description"],
|
||||
objective_template=p["objective_template"],
|
||||
rationale_template=p["rationale_template"],
|
||||
requirements_template=p.get("requirements_template", []),
|
||||
test_procedure_template=p.get("test_procedure_template", []),
|
||||
evidence_template=p.get("evidence_template", []),
|
||||
severity_default=p.get("severity_default", "medium"),
|
||||
implementation_effort_default=p.get("implementation_effort_default", "m"),
|
||||
obligation_match_keywords=p.get("obligation_match_keywords", []),
|
||||
tags=p.get("tags", []),
|
||||
composable_with=p.get("composable_with", []),
|
||||
open_anchor_refs=p.get("open_anchor_refs", []),
|
||||
)
|
||||
self._patterns.append(pattern)
|
||||
self._by_id[pattern.id] = pattern
|
||||
domain_list = self._by_domain.setdefault(pattern.domain, [])
|
||||
domain_list.append(pattern)
|
||||
except Exception as e:
|
||||
logger.error("Failed to load %s: %s", yaml_file.name, e)
|
||||
|
||||
logger.info("Loaded %d patterns from %s", len(self._patterns), patterns_dir)
|
||||
|
||||
def _build_keyword_index(self) -> None:
|
||||
"""Build reverse index: keyword → [pattern_ids]."""
|
||||
for pattern in self._patterns:
|
||||
for kw in pattern.obligation_match_keywords:
|
||||
lower_kw = kw.lower()
|
||||
if lower_kw not in self._keyword_index:
|
||||
self._keyword_index[lower_kw] = []
|
||||
self._keyword_index[lower_kw].append(pattern.id)
|
||||
|
||||
async def _compute_embeddings(self) -> None:
|
||||
"""Compute embeddings for all pattern objective templates."""
|
||||
if not self._patterns:
|
||||
return
|
||||
|
||||
self._pattern_ids = [p.id for p in self._patterns]
|
||||
texts = [
|
||||
f"{p.name_de}: {p.objective_template}"
|
||||
for p in self._patterns
|
||||
]
|
||||
|
||||
logger.info("Computing embeddings for %d patterns...", len(texts))
|
||||
self._pattern_embeddings = await _get_embeddings_batch(texts)
|
||||
valid = sum(1 for e in self._pattern_embeddings if e)
|
||||
logger.info("Got %d/%d valid pattern embeddings", valid, len(texts))
|
||||
|
||||
# -----------------------------------------------------------------------
|
||||
# Public helpers
|
||||
# -----------------------------------------------------------------------
|
||||
|
||||
def get_pattern(self, pattern_id: str) -> Optional[ControlPattern]:
|
||||
"""Get a pattern by its ID."""
|
||||
return self._by_id.get(pattern_id.upper())
|
||||
|
||||
def get_patterns_by_domain(self, domain: str) -> list[ControlPattern]:
|
||||
"""Get all patterns for a domain."""
|
||||
return self._by_domain.get(domain.upper(), [])
|
||||
|
||||
def stats(self) -> dict:
|
||||
"""Return matcher statistics."""
|
||||
return {
|
||||
"total_patterns": len(self._patterns),
|
||||
"domains": list(self._by_domain.keys()),
|
||||
"keywords": len(self._keyword_index),
|
||||
"embeddings_valid": sum(1 for e in self._pattern_embeddings if e),
|
||||
"initialized": self._initialized,
|
||||
}
|
||||
|
||||
|
||||
def _find_patterns_dir() -> Optional[Path]:
|
||||
"""Locate the control_patterns directory."""
|
||||
candidates = [
|
||||
Path(__file__).resolve().parent.parent.parent.parent
|
||||
/ "ai-compliance-sdk" / "policies" / "control_patterns",
|
||||
Path("/app/ai-compliance-sdk/policies/control_patterns"),
|
||||
Path("ai-compliance-sdk/policies/control_patterns"),
|
||||
]
|
||||
for p in candidates:
|
||||
if p.is_dir():
|
||||
return p
|
||||
return None
|
||||
@@ -0,0 +1,670 @@
|
||||
"""Pipeline Adapter — New 10-Stage Pipeline Integration.
|
||||
|
||||
Bridges the existing 7-stage control_generator pipeline with the new
|
||||
multi-layer components (ObligationExtractor, PatternMatcher, ControlComposer).
|
||||
|
||||
New pipeline flow:
|
||||
chunk → license_classify
|
||||
→ obligation_extract (Stage 4 — NEW)
|
||||
→ pattern_match (Stage 5 — NEW)
|
||||
→ control_compose (Stage 6 — replaces old Stage 3)
|
||||
→ harmonize → anchor → store + crosswalk → mark processed
|
||||
|
||||
Can be used in two modes:
|
||||
1. INLINE: Called from _process_batch() to enrich the pipeline
|
||||
2. STANDALONE: Process chunks directly through new stages
|
||||
|
||||
Part of the Multi-Layer Control Architecture (Phase 7 of 8).
|
||||
"""
|
||||
|
||||
import hashlib
|
||||
import json
|
||||
import logging
|
||||
from dataclasses import dataclass, field
|
||||
from typing import Optional
|
||||
|
||||
from sqlalchemy import text
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from compliance.services.control_composer import ComposedControl, ControlComposer
|
||||
from compliance.services.obligation_extractor import ObligationExtractor, ObligationMatch
|
||||
from compliance.services.pattern_matcher import PatternMatcher, PatternMatchResult
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
@dataclass
|
||||
class PipelineChunk:
|
||||
"""Input chunk for the new pipeline stages."""
|
||||
|
||||
text: str
|
||||
collection: str = ""
|
||||
regulation_code: str = ""
|
||||
article: Optional[str] = None
|
||||
paragraph: Optional[str] = None
|
||||
license_rule: int = 3
|
||||
license_info: dict = field(default_factory=dict)
|
||||
source_citation: Optional[dict] = None
|
||||
chunk_hash: str = ""
|
||||
|
||||
def compute_hash(self) -> str:
|
||||
if not self.chunk_hash:
|
||||
self.chunk_hash = hashlib.sha256(self.text.encode()).hexdigest()
|
||||
return self.chunk_hash
|
||||
|
||||
|
||||
@dataclass
|
||||
class PipelineResult:
|
||||
"""Result of processing a chunk through the new pipeline."""
|
||||
|
||||
chunk: PipelineChunk
|
||||
obligation: ObligationMatch = field(default_factory=ObligationMatch)
|
||||
pattern_result: PatternMatchResult = field(default_factory=PatternMatchResult)
|
||||
control: Optional[ComposedControl] = None
|
||||
crosswalk_written: bool = False
|
||||
error: Optional[str] = None
|
||||
|
||||
def to_dict(self) -> dict:
|
||||
return {
|
||||
"chunk_hash": self.chunk.chunk_hash,
|
||||
"obligation": self.obligation.to_dict() if self.obligation else None,
|
||||
"pattern": self.pattern_result.to_dict() if self.pattern_result else None,
|
||||
"control": self.control.to_dict() if self.control else None,
|
||||
"crosswalk_written": self.crosswalk_written,
|
||||
"error": self.error,
|
||||
}
|
||||
|
||||
|
||||
class PipelineAdapter:
|
||||
"""Integrates ObligationExtractor + PatternMatcher + ControlComposer.
|
||||
|
||||
Usage::
|
||||
|
||||
adapter = PipelineAdapter(db)
|
||||
await adapter.initialize()
|
||||
|
||||
result = await adapter.process_chunk(PipelineChunk(
|
||||
text="...",
|
||||
regulation_code="eu_2016_679",
|
||||
article="Art. 30",
|
||||
license_rule=1,
|
||||
))
|
||||
"""
|
||||
|
||||
def __init__(self, db: Optional[Session] = None):
|
||||
self.db = db
|
||||
self._extractor = ObligationExtractor()
|
||||
self._matcher = PatternMatcher()
|
||||
self._composer = ControlComposer()
|
||||
self._initialized = False
|
||||
|
||||
async def initialize(self) -> None:
|
||||
"""Initialize all sub-components."""
|
||||
if self._initialized:
|
||||
return
|
||||
await self._extractor.initialize()
|
||||
await self._matcher.initialize()
|
||||
self._initialized = True
|
||||
logger.info("PipelineAdapter initialized")
|
||||
|
||||
async def process_chunk(self, chunk: PipelineChunk) -> PipelineResult:
|
||||
"""Process a single chunk through the new 3-stage pipeline.
|
||||
|
||||
Stage 4: Obligation Extract
|
||||
Stage 5: Pattern Match
|
||||
Stage 6: Control Compose
|
||||
"""
|
||||
if not self._initialized:
|
||||
await self.initialize()
|
||||
|
||||
chunk.compute_hash()
|
||||
result = PipelineResult(chunk=chunk)
|
||||
|
||||
try:
|
||||
# Stage 4: Obligation Extract
|
||||
result.obligation = await self._extractor.extract(
|
||||
chunk_text=chunk.text,
|
||||
regulation_code=chunk.regulation_code,
|
||||
article=chunk.article,
|
||||
paragraph=chunk.paragraph,
|
||||
)
|
||||
|
||||
# Stage 5: Pattern Match
|
||||
obligation_text = (
|
||||
result.obligation.obligation_text
|
||||
or result.obligation.obligation_title
|
||||
or chunk.text[:500]
|
||||
)
|
||||
result.pattern_result = await self._matcher.match(
|
||||
obligation_text=obligation_text,
|
||||
regulation_id=result.obligation.regulation_id,
|
||||
)
|
||||
|
||||
# Stage 6: Control Compose
|
||||
result.control = await self._composer.compose(
|
||||
obligation=result.obligation,
|
||||
pattern_result=result.pattern_result,
|
||||
chunk_text=chunk.text if chunk.license_rule in (1, 2) else None,
|
||||
license_rule=chunk.license_rule,
|
||||
source_citation=chunk.source_citation,
|
||||
regulation_code=chunk.regulation_code,
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
logger.error("Pipeline processing failed: %s", e)
|
||||
result.error = str(e)
|
||||
|
||||
return result
|
||||
|
||||
async def process_batch(self, chunks: list[PipelineChunk]) -> list[PipelineResult]:
|
||||
"""Process multiple chunks through the pipeline."""
|
||||
results = []
|
||||
for chunk in chunks:
|
||||
result = await self.process_chunk(chunk)
|
||||
results.append(result)
|
||||
return results
|
||||
|
||||
def write_crosswalk(self, result: PipelineResult, control_uuid: str) -> bool:
|
||||
"""Write obligation_extraction + crosswalk_matrix rows for a processed chunk.
|
||||
|
||||
Called AFTER the control is stored in canonical_controls.
|
||||
"""
|
||||
if not self.db or not result.control:
|
||||
return False
|
||||
|
||||
chunk = result.chunk
|
||||
obligation = result.obligation
|
||||
pattern = result.pattern_result
|
||||
|
||||
try:
|
||||
# 1. Write obligation_extraction row
|
||||
self.db.execute(
|
||||
text("""
|
||||
INSERT INTO obligation_extractions (
|
||||
chunk_hash, collection, regulation_code,
|
||||
article, paragraph, obligation_id,
|
||||
obligation_text, confidence, extraction_method,
|
||||
pattern_id, pattern_match_score, control_uuid
|
||||
) VALUES (
|
||||
:chunk_hash, :collection, :regulation_code,
|
||||
:article, :paragraph, :obligation_id,
|
||||
:obligation_text, :confidence, :extraction_method,
|
||||
:pattern_id, :pattern_match_score,
|
||||
CAST(:control_uuid AS uuid)
|
||||
)
|
||||
"""),
|
||||
{
|
||||
"chunk_hash": chunk.chunk_hash,
|
||||
"collection": chunk.collection,
|
||||
"regulation_code": chunk.regulation_code,
|
||||
"article": chunk.article,
|
||||
"paragraph": chunk.paragraph,
|
||||
"obligation_id": obligation.obligation_id if obligation else None,
|
||||
"obligation_text": (
|
||||
obligation.obligation_text[:2000]
|
||||
if obligation and obligation.obligation_text
|
||||
else None
|
||||
),
|
||||
"confidence": obligation.confidence if obligation else 0,
|
||||
"extraction_method": obligation.method if obligation else "none",
|
||||
"pattern_id": pattern.pattern_id if pattern else None,
|
||||
"pattern_match_score": pattern.confidence if pattern else 0,
|
||||
"control_uuid": control_uuid,
|
||||
},
|
||||
)
|
||||
|
||||
# 2. Write crosswalk_matrix row
|
||||
self.db.execute(
|
||||
text("""
|
||||
INSERT INTO crosswalk_matrix (
|
||||
regulation_code, article, paragraph,
|
||||
obligation_id, pattern_id,
|
||||
master_control_id, master_control_uuid,
|
||||
confidence, source
|
||||
) VALUES (
|
||||
:regulation_code, :article, :paragraph,
|
||||
:obligation_id, :pattern_id,
|
||||
:master_control_id,
|
||||
CAST(:master_control_uuid AS uuid),
|
||||
:confidence, :source
|
||||
)
|
||||
"""),
|
||||
{
|
||||
"regulation_code": chunk.regulation_code,
|
||||
"article": chunk.article,
|
||||
"paragraph": chunk.paragraph,
|
||||
"obligation_id": obligation.obligation_id if obligation else None,
|
||||
"pattern_id": pattern.pattern_id if pattern else None,
|
||||
"master_control_id": result.control.control_id,
|
||||
"master_control_uuid": control_uuid,
|
||||
"confidence": min(
|
||||
obligation.confidence if obligation else 0,
|
||||
pattern.confidence if pattern else 0,
|
||||
),
|
||||
"source": "auto",
|
||||
},
|
||||
)
|
||||
|
||||
# 3. Update canonical_controls with pattern_id + obligation_ids
|
||||
if result.control.pattern_id or result.control.obligation_ids:
|
||||
self.db.execute(
|
||||
text("""
|
||||
UPDATE canonical_controls
|
||||
SET pattern_id = COALESCE(:pattern_id, pattern_id),
|
||||
obligation_ids = COALESCE(:obligation_ids, obligation_ids)
|
||||
WHERE id = CAST(:control_uuid AS uuid)
|
||||
"""),
|
||||
{
|
||||
"pattern_id": result.control.pattern_id,
|
||||
"obligation_ids": json.dumps(result.control.obligation_ids),
|
||||
"control_uuid": control_uuid,
|
||||
},
|
||||
)
|
||||
|
||||
self.db.commit()
|
||||
result.crosswalk_written = True
|
||||
return True
|
||||
|
||||
except Exception as e:
|
||||
logger.error("Failed to write crosswalk: %s", e)
|
||||
self.db.rollback()
|
||||
return False
|
||||
|
||||
def stats(self) -> dict:
|
||||
"""Return component statistics."""
|
||||
return {
|
||||
"extractor": self._extractor.stats(),
|
||||
"matcher": self._matcher.stats(),
|
||||
"initialized": self._initialized,
|
||||
}
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Migration Passes — Backfill existing 4,800+ controls
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class MigrationPasses:
|
||||
"""Non-destructive migration passes for existing controls.
|
||||
|
||||
Pass 1: Obligation Linkage (deterministic, article→obligation lookup)
|
||||
Pass 2: Pattern Classification (keyword-based matching)
|
||||
Pass 3: Quality Triage (categorize by linkage completeness)
|
||||
Pass 4: Crosswalk Backfill (write crosswalk rows for linked controls)
|
||||
Pass 5: Deduplication (mark duplicate controls)
|
||||
|
||||
Usage::
|
||||
|
||||
migration = MigrationPasses(db)
|
||||
await migration.initialize()
|
||||
|
||||
result = await migration.run_pass1_obligation_linkage(limit=100)
|
||||
result = await migration.run_pass2_pattern_classification(limit=100)
|
||||
result = migration.run_pass3_quality_triage()
|
||||
result = migration.run_pass4_crosswalk_backfill()
|
||||
result = migration.run_pass5_deduplication()
|
||||
"""
|
||||
|
||||
def __init__(self, db: Session):
|
||||
self.db = db
|
||||
self._extractor = ObligationExtractor()
|
||||
self._matcher = PatternMatcher()
|
||||
self._initialized = False
|
||||
|
||||
async def initialize(self) -> None:
|
||||
"""Initialize extractors (loads obligations + patterns)."""
|
||||
if self._initialized:
|
||||
return
|
||||
self._extractor._load_obligations()
|
||||
self._matcher._load_patterns()
|
||||
self._matcher._build_keyword_index()
|
||||
self._initialized = True
|
||||
|
||||
# -------------------------------------------------------------------
|
||||
# Pass 1: Obligation Linkage (deterministic)
|
||||
# -------------------------------------------------------------------
|
||||
|
||||
async def run_pass1_obligation_linkage(self, limit: int = 0) -> dict:
|
||||
"""Link existing controls to obligations via source_citation article.
|
||||
|
||||
For each control with source_citation → extract regulation + article
|
||||
→ look up in obligation framework → set obligation_ids.
|
||||
"""
|
||||
if not self._initialized:
|
||||
await self.initialize()
|
||||
|
||||
query = """
|
||||
SELECT id, control_id, source_citation, generation_metadata
|
||||
FROM canonical_controls
|
||||
WHERE release_state NOT IN ('deprecated')
|
||||
AND (obligation_ids IS NULL OR obligation_ids = '[]')
|
||||
"""
|
||||
if limit > 0:
|
||||
query += f" LIMIT {limit}"
|
||||
|
||||
rows = self.db.execute(text(query)).fetchall()
|
||||
|
||||
stats = {"total": len(rows), "linked": 0, "no_match": 0, "no_citation": 0}
|
||||
|
||||
for row in rows:
|
||||
control_uuid = str(row[0])
|
||||
control_id = row[1]
|
||||
citation = row[2]
|
||||
metadata = row[3]
|
||||
|
||||
# Extract regulation + article from citation or metadata
|
||||
reg_code, article = _extract_regulation_article(citation, metadata)
|
||||
if not reg_code:
|
||||
stats["no_citation"] += 1
|
||||
continue
|
||||
|
||||
# Tier 1: Exact match
|
||||
match = self._extractor._tier1_exact(reg_code, article or "")
|
||||
if match and match.obligation_id:
|
||||
self.db.execute(
|
||||
text("""
|
||||
UPDATE canonical_controls
|
||||
SET obligation_ids = :obl_ids
|
||||
WHERE id = CAST(:uuid AS uuid)
|
||||
"""),
|
||||
{
|
||||
"obl_ids": json.dumps([match.obligation_id]),
|
||||
"uuid": control_uuid,
|
||||
},
|
||||
)
|
||||
stats["linked"] += 1
|
||||
else:
|
||||
stats["no_match"] += 1
|
||||
|
||||
self.db.commit()
|
||||
logger.info("Pass 1: %s", stats)
|
||||
return stats
|
||||
|
||||
# -------------------------------------------------------------------
|
||||
# Pass 2: Pattern Classification (keyword-based)
|
||||
# -------------------------------------------------------------------
|
||||
|
||||
async def run_pass2_pattern_classification(self, limit: int = 0) -> dict:
|
||||
"""Classify existing controls into patterns via keyword matching.
|
||||
|
||||
For each control without pattern_id → keyword-match title+objective
|
||||
against pattern library → assign best match.
|
||||
"""
|
||||
if not self._initialized:
|
||||
await self.initialize()
|
||||
|
||||
query = """
|
||||
SELECT id, control_id, title, objective
|
||||
FROM canonical_controls
|
||||
WHERE release_state NOT IN ('deprecated')
|
||||
AND (pattern_id IS NULL OR pattern_id = '')
|
||||
"""
|
||||
if limit > 0:
|
||||
query += f" LIMIT {limit}"
|
||||
|
||||
rows = self.db.execute(text(query)).fetchall()
|
||||
|
||||
stats = {"total": len(rows), "classified": 0, "no_match": 0}
|
||||
|
||||
for row in rows:
|
||||
control_uuid = str(row[0])
|
||||
title = row[2] or ""
|
||||
objective = row[3] or ""
|
||||
|
||||
# Keyword match
|
||||
match_text = f"{title} {objective}"
|
||||
result = self._matcher._tier1_keyword(match_text, None)
|
||||
|
||||
if result and result.pattern_id and result.keyword_hits >= 2:
|
||||
self.db.execute(
|
||||
text("""
|
||||
UPDATE canonical_controls
|
||||
SET pattern_id = :pattern_id
|
||||
WHERE id = CAST(:uuid AS uuid)
|
||||
"""),
|
||||
{
|
||||
"pattern_id": result.pattern_id,
|
||||
"uuid": control_uuid,
|
||||
},
|
||||
)
|
||||
stats["classified"] += 1
|
||||
else:
|
||||
stats["no_match"] += 1
|
||||
|
||||
self.db.commit()
|
||||
logger.info("Pass 2: %s", stats)
|
||||
return stats
|
||||
|
||||
# -------------------------------------------------------------------
|
||||
# Pass 3: Quality Triage
|
||||
# -------------------------------------------------------------------
|
||||
|
||||
def run_pass3_quality_triage(self) -> dict:
|
||||
"""Categorize controls by linkage completeness.
|
||||
|
||||
Sets generation_metadata.triage_status:
|
||||
- "review": has both obligation_id + pattern_id
|
||||
- "needs_obligation": has pattern_id but no obligation_id
|
||||
- "needs_pattern": has obligation_id but no pattern_id
|
||||
- "legacy_unlinked": has neither
|
||||
"""
|
||||
categories = {
|
||||
"review": """
|
||||
UPDATE canonical_controls
|
||||
SET generation_metadata = jsonb_set(
|
||||
COALESCE(generation_metadata::jsonb, '{}'::jsonb),
|
||||
'{triage_status}', '"review"'
|
||||
)
|
||||
WHERE release_state NOT IN ('deprecated')
|
||||
AND obligation_ids IS NOT NULL AND obligation_ids != '[]'
|
||||
AND pattern_id IS NOT NULL AND pattern_id != ''
|
||||
""",
|
||||
"needs_obligation": """
|
||||
UPDATE canonical_controls
|
||||
SET generation_metadata = jsonb_set(
|
||||
COALESCE(generation_metadata::jsonb, '{}'::jsonb),
|
||||
'{triage_status}', '"needs_obligation"'
|
||||
)
|
||||
WHERE release_state NOT IN ('deprecated')
|
||||
AND (obligation_ids IS NULL OR obligation_ids = '[]')
|
||||
AND pattern_id IS NOT NULL AND pattern_id != ''
|
||||
""",
|
||||
"needs_pattern": """
|
||||
UPDATE canonical_controls
|
||||
SET generation_metadata = jsonb_set(
|
||||
COALESCE(generation_metadata::jsonb, '{}'::jsonb),
|
||||
'{triage_status}', '"needs_pattern"'
|
||||
)
|
||||
WHERE release_state NOT IN ('deprecated')
|
||||
AND obligation_ids IS NOT NULL AND obligation_ids != '[]'
|
||||
AND (pattern_id IS NULL OR pattern_id = '')
|
||||
""",
|
||||
"legacy_unlinked": """
|
||||
UPDATE canonical_controls
|
||||
SET generation_metadata = jsonb_set(
|
||||
COALESCE(generation_metadata::jsonb, '{}'::jsonb),
|
||||
'{triage_status}', '"legacy_unlinked"'
|
||||
)
|
||||
WHERE release_state NOT IN ('deprecated')
|
||||
AND (obligation_ids IS NULL OR obligation_ids = '[]')
|
||||
AND (pattern_id IS NULL OR pattern_id = '')
|
||||
""",
|
||||
}
|
||||
|
||||
stats = {}
|
||||
for category, sql in categories.items():
|
||||
result = self.db.execute(text(sql))
|
||||
stats[category] = result.rowcount
|
||||
|
||||
self.db.commit()
|
||||
logger.info("Pass 3: %s", stats)
|
||||
return stats
|
||||
|
||||
# -------------------------------------------------------------------
|
||||
# Pass 4: Crosswalk Backfill
|
||||
# -------------------------------------------------------------------
|
||||
|
||||
def run_pass4_crosswalk_backfill(self) -> dict:
|
||||
"""Create crosswalk_matrix rows for controls with obligation + pattern.
|
||||
|
||||
Only creates rows that don't already exist.
|
||||
"""
|
||||
result = self.db.execute(text("""
|
||||
INSERT INTO crosswalk_matrix (
|
||||
regulation_code, obligation_id, pattern_id,
|
||||
master_control_id, master_control_uuid,
|
||||
confidence, source
|
||||
)
|
||||
SELECT
|
||||
COALESCE(
|
||||
(generation_metadata::jsonb->>'source_regulation'),
|
||||
''
|
||||
) AS regulation_code,
|
||||
obl.value::text AS obligation_id,
|
||||
cc.pattern_id,
|
||||
cc.control_id,
|
||||
cc.id,
|
||||
0.80,
|
||||
'migrated'
|
||||
FROM canonical_controls cc,
|
||||
jsonb_array_elements_text(
|
||||
COALESCE(cc.obligation_ids::jsonb, '[]'::jsonb)
|
||||
) AS obl(value)
|
||||
WHERE cc.release_state NOT IN ('deprecated')
|
||||
AND cc.pattern_id IS NOT NULL AND cc.pattern_id != ''
|
||||
AND cc.obligation_ids IS NOT NULL AND cc.obligation_ids != '[]'
|
||||
AND NOT EXISTS (
|
||||
SELECT 1 FROM crosswalk_matrix cw
|
||||
WHERE cw.master_control_uuid = cc.id
|
||||
AND cw.obligation_id = obl.value::text
|
||||
)
|
||||
"""))
|
||||
|
||||
rows_inserted = result.rowcount
|
||||
self.db.commit()
|
||||
logger.info("Pass 4: %d crosswalk rows inserted", rows_inserted)
|
||||
return {"rows_inserted": rows_inserted}
|
||||
|
||||
# -------------------------------------------------------------------
|
||||
# Pass 5: Deduplication
|
||||
# -------------------------------------------------------------------
|
||||
|
||||
def run_pass5_deduplication(self) -> dict:
|
||||
"""Mark duplicate controls (same obligation + same pattern).
|
||||
|
||||
Groups controls by (obligation_id, pattern_id), keeps the one with
|
||||
highest evidence_confidence (or newest), marks rest as deprecated.
|
||||
"""
|
||||
# Find groups with duplicates
|
||||
groups = self.db.execute(text("""
|
||||
SELECT cc.pattern_id,
|
||||
obl.value::text AS obligation_id,
|
||||
array_agg(cc.id ORDER BY cc.evidence_confidence DESC NULLS LAST, cc.created_at DESC) AS ids,
|
||||
count(*) AS cnt
|
||||
FROM canonical_controls cc,
|
||||
jsonb_array_elements_text(
|
||||
COALESCE(cc.obligation_ids::jsonb, '[]'::jsonb)
|
||||
) AS obl(value)
|
||||
WHERE cc.release_state NOT IN ('deprecated')
|
||||
AND cc.pattern_id IS NOT NULL AND cc.pattern_id != ''
|
||||
GROUP BY cc.pattern_id, obl.value::text
|
||||
HAVING count(*) > 1
|
||||
""")).fetchall()
|
||||
|
||||
stats = {"groups_found": len(groups), "controls_deprecated": 0}
|
||||
|
||||
for group in groups:
|
||||
ids = group[2] # Array of UUIDs, first is the keeper
|
||||
if len(ids) <= 1:
|
||||
continue
|
||||
|
||||
# Keep first (highest confidence), deprecate rest
|
||||
deprecate_ids = ids[1:]
|
||||
for dep_id in deprecate_ids:
|
||||
self.db.execute(
|
||||
text("""
|
||||
UPDATE canonical_controls
|
||||
SET release_state = 'deprecated',
|
||||
generation_metadata = jsonb_set(
|
||||
COALESCE(generation_metadata::jsonb, '{}'::jsonb),
|
||||
'{deprecated_reason}', '"duplicate_same_obligation_pattern"'
|
||||
)
|
||||
WHERE id = CAST(:uuid AS uuid)
|
||||
AND release_state != 'deprecated'
|
||||
"""),
|
||||
{"uuid": str(dep_id)},
|
||||
)
|
||||
stats["controls_deprecated"] += 1
|
||||
|
||||
self.db.commit()
|
||||
logger.info("Pass 5: %s", stats)
|
||||
return stats
|
||||
|
||||
def migration_status(self) -> dict:
|
||||
"""Return overall migration progress."""
|
||||
row = self.db.execute(text("""
|
||||
SELECT
|
||||
count(*) AS total,
|
||||
count(*) FILTER (WHERE obligation_ids IS NOT NULL AND obligation_ids != '[]') AS has_obligation,
|
||||
count(*) FILTER (WHERE pattern_id IS NOT NULL AND pattern_id != '') AS has_pattern,
|
||||
count(*) FILTER (
|
||||
WHERE obligation_ids IS NOT NULL AND obligation_ids != '[]'
|
||||
AND pattern_id IS NOT NULL AND pattern_id != ''
|
||||
) AS fully_linked,
|
||||
count(*) FILTER (WHERE release_state = 'deprecated') AS deprecated
|
||||
FROM canonical_controls
|
||||
""")).fetchone()
|
||||
|
||||
return {
|
||||
"total_controls": row[0],
|
||||
"has_obligation": row[1],
|
||||
"has_pattern": row[2],
|
||||
"fully_linked": row[3],
|
||||
"deprecated": row[4],
|
||||
"coverage_obligation_pct": round(row[1] / max(row[0], 1) * 100, 1),
|
||||
"coverage_pattern_pct": round(row[2] / max(row[0], 1) * 100, 1),
|
||||
"coverage_full_pct": round(row[3] / max(row[0], 1) * 100, 1),
|
||||
}
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Helpers
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def _extract_regulation_article(
|
||||
citation: Optional[str], metadata: Optional[str]
|
||||
) -> tuple[Optional[str], Optional[str]]:
|
||||
"""Extract regulation_code and article from control's citation/metadata."""
|
||||
from compliance.services.obligation_extractor import _normalize_regulation
|
||||
|
||||
reg_code = None
|
||||
article = None
|
||||
|
||||
# Try citation first (JSON string or dict)
|
||||
if citation:
|
||||
try:
|
||||
c = json.loads(citation) if isinstance(citation, str) else citation
|
||||
if isinstance(c, dict):
|
||||
article = c.get("article") or c.get("source_article")
|
||||
# Try to get regulation from source field
|
||||
source = c.get("source", "")
|
||||
if source:
|
||||
reg_code = _normalize_regulation(source)
|
||||
except (json.JSONDecodeError, TypeError):
|
||||
pass
|
||||
|
||||
# Try metadata
|
||||
if metadata and not reg_code:
|
||||
try:
|
||||
m = json.loads(metadata) if isinstance(metadata, str) else metadata
|
||||
if isinstance(m, dict):
|
||||
src_reg = m.get("source_regulation", "")
|
||||
if src_reg:
|
||||
reg_code = _normalize_regulation(src_reg)
|
||||
if not article:
|
||||
article = m.get("source_article")
|
||||
except (json.JSONDecodeError, TypeError):
|
||||
pass
|
||||
|
||||
return reg_code, article
|
||||
@@ -33,6 +33,7 @@ class RAGSearchResult:
|
||||
paragraph: str
|
||||
source_url: str
|
||||
score: float
|
||||
collection: str = ""
|
||||
|
||||
|
||||
class ComplianceRAGClient:
|
||||
@@ -91,6 +92,7 @@ class ComplianceRAGClient:
|
||||
paragraph=r.get("paragraph", ""),
|
||||
source_url=r.get("source_url", ""),
|
||||
score=r.get("score", 0.0),
|
||||
collection=collection,
|
||||
))
|
||||
return results
|
||||
|
||||
@@ -98,6 +100,88 @@ class ComplianceRAGClient:
|
||||
logger.warning("RAG search failed: %s", e)
|
||||
return []
|
||||
|
||||
async def search_with_rerank(
|
||||
self,
|
||||
query: str,
|
||||
collection: str = "bp_compliance_ce",
|
||||
regulations: Optional[List[str]] = None,
|
||||
top_k: int = 5,
|
||||
) -> List[RAGSearchResult]:
|
||||
"""
|
||||
Search with optional cross-encoder re-ranking.
|
||||
|
||||
Fetches top_k*4 results from RAG, then re-ranks with cross-encoder
|
||||
and returns top_k. Falls back to regular search if reranker is disabled.
|
||||
"""
|
||||
from .reranker import get_reranker
|
||||
|
||||
reranker = get_reranker()
|
||||
if reranker is None:
|
||||
return await self.search(query, collection, regulations, top_k)
|
||||
|
||||
# Fetch more candidates for re-ranking
|
||||
candidates = await self.search(
|
||||
query, collection, regulations, top_k=max(top_k * 4, 20)
|
||||
)
|
||||
if not candidates:
|
||||
return []
|
||||
|
||||
texts = [c.text for c in candidates]
|
||||
try:
|
||||
ranked_indices = reranker.rerank(query, texts, top_k=top_k)
|
||||
return [candidates[i] for i in ranked_indices]
|
||||
except Exception as e:
|
||||
logger.warning("Reranking failed, returning unranked: %s", e)
|
||||
return candidates[:top_k]
|
||||
|
||||
async def scroll(
|
||||
self,
|
||||
collection: str,
|
||||
offset: Optional[str] = None,
|
||||
limit: int = 100,
|
||||
) -> tuple[List[RAGSearchResult], Optional[str]]:
|
||||
"""
|
||||
Scroll through ALL chunks in a collection (paginated).
|
||||
|
||||
Returns (chunks, next_offset). next_offset is None when done.
|
||||
"""
|
||||
scroll_url = self._search_url.replace("/search", "/scroll")
|
||||
params = {"collection": collection, "limit": str(limit)}
|
||||
if offset:
|
||||
params["offset"] = offset
|
||||
|
||||
try:
|
||||
async with httpx.AsyncClient(timeout=30.0) as client:
|
||||
resp = await client.get(scroll_url, params=params)
|
||||
|
||||
if resp.status_code != 200:
|
||||
logger.warning(
|
||||
"RAG scroll returned %d: %s", resp.status_code, resp.text[:200]
|
||||
)
|
||||
return [], None
|
||||
|
||||
data = resp.json()
|
||||
results = []
|
||||
for r in data.get("chunks", []):
|
||||
results.append(RAGSearchResult(
|
||||
text=r.get("text", ""),
|
||||
regulation_code=r.get("regulation_code", ""),
|
||||
regulation_name=r.get("regulation_name", ""),
|
||||
regulation_short=r.get("regulation_short", ""),
|
||||
category=r.get("category", ""),
|
||||
article=r.get("article", ""),
|
||||
paragraph=r.get("paragraph", ""),
|
||||
source_url=r.get("source_url", ""),
|
||||
score=0.0,
|
||||
collection=collection,
|
||||
))
|
||||
next_offset = data.get("next_offset") or None
|
||||
return results, next_offset
|
||||
|
||||
except Exception as e:
|
||||
logger.warning("RAG scroll failed: %s", e)
|
||||
return [], None
|
||||
|
||||
def format_for_prompt(
|
||||
self, results: List[RAGSearchResult], max_results: int = 5
|
||||
) -> str:
|
||||
|
||||
@@ -0,0 +1,85 @@
|
||||
"""
|
||||
Cross-Encoder Re-Ranking for RAG Search Results.
|
||||
|
||||
Uses BGE Reranker v2 (BAAI/bge-reranker-v2-m3, MIT license) to re-rank
|
||||
search results from Qdrant for improved retrieval quality.
|
||||
|
||||
Lazy-loads the model on first use. Disabled by default (RERANK_ENABLED=false).
|
||||
"""
|
||||
|
||||
import logging
|
||||
import os
|
||||
from typing import Optional
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
RERANK_ENABLED = os.getenv("RERANK_ENABLED", "false").lower() == "true"
|
||||
RERANK_MODEL = os.getenv("RERANK_MODEL", "BAAI/bge-reranker-v2-m3")
|
||||
|
||||
|
||||
class Reranker:
|
||||
"""Cross-encoder reranker using sentence-transformers."""
|
||||
|
||||
def __init__(self, model_name: str = RERANK_MODEL):
|
||||
self._model = None # Lazy init
|
||||
self._model_name = model_name
|
||||
|
||||
def _ensure_model(self) -> None:
|
||||
"""Load model on first use."""
|
||||
if self._model is not None:
|
||||
return
|
||||
try:
|
||||
from sentence_transformers import CrossEncoder
|
||||
|
||||
logger.info("Loading reranker model: %s", self._model_name)
|
||||
self._model = CrossEncoder(self._model_name)
|
||||
logger.info("Reranker model loaded successfully")
|
||||
except ImportError:
|
||||
logger.error(
|
||||
"sentence-transformers not installed. "
|
||||
"Install with: pip install sentence-transformers"
|
||||
)
|
||||
raise
|
||||
except Exception as e:
|
||||
logger.error("Failed to load reranker model: %s", e)
|
||||
raise
|
||||
|
||||
def rerank(
|
||||
self, query: str, texts: list[str], top_k: int = 5
|
||||
) -> list[int]:
|
||||
"""
|
||||
Return indices of top_k texts sorted by relevance (highest first).
|
||||
|
||||
Args:
|
||||
query: The search query.
|
||||
texts: List of candidate texts to re-rank.
|
||||
top_k: Number of top results to return.
|
||||
|
||||
Returns:
|
||||
List of indices into the original texts list, sorted by relevance.
|
||||
"""
|
||||
if not texts:
|
||||
return []
|
||||
|
||||
self._ensure_model()
|
||||
|
||||
pairs = [[query, text] for text in texts]
|
||||
scores = self._model.predict(pairs)
|
||||
|
||||
# Sort by score descending, return indices
|
||||
ranked = sorted(range(len(scores)), key=lambda i: scores[i], reverse=True)
|
||||
return ranked[:top_k]
|
||||
|
||||
|
||||
# Module-level singleton
|
||||
_reranker: Optional[Reranker] = None
|
||||
|
||||
|
||||
def get_reranker() -> Optional[Reranker]:
|
||||
"""Get the shared reranker instance. Returns None if disabled."""
|
||||
global _reranker
|
||||
if not RERANK_ENABLED:
|
||||
return None
|
||||
if _reranker is None:
|
||||
_reranker = Reranker()
|
||||
return _reranker
|
||||
@@ -0,0 +1,331 @@
|
||||
"""V1 Control Enrichment Service — Match Eigenentwicklung controls to regulations.
|
||||
|
||||
Finds regulatory coverage for v1 controls (generation_strategy='ungrouped',
|
||||
pipeline_version=1, no source_citation) by embedding similarity search.
|
||||
|
||||
Reuses embedding + Qdrant helpers from control_dedup.py.
|
||||
"""
|
||||
|
||||
import logging
|
||||
from typing import Optional
|
||||
|
||||
from sqlalchemy import text
|
||||
|
||||
from database import SessionLocal
|
||||
from compliance.services.control_dedup import (
|
||||
get_embedding,
|
||||
qdrant_search_cross_regulation,
|
||||
)
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Similarity threshold — lower than dedup (0.85) since we want informational matches
|
||||
# Typical top scores for v1 controls are 0.70-0.77
|
||||
V1_MATCH_THRESHOLD = 0.70
|
||||
V1_MAX_MATCHES = 5
|
||||
|
||||
|
||||
def _is_eigenentwicklung_query() -> str:
|
||||
"""SQL WHERE clause identifying v1 Eigenentwicklung controls."""
|
||||
return """
|
||||
generation_strategy = 'ungrouped'
|
||||
AND (pipeline_version = '1' OR pipeline_version IS NULL)
|
||||
AND source_citation IS NULL
|
||||
AND parent_control_uuid IS NULL
|
||||
AND release_state NOT IN ('rejected', 'merged', 'deprecated')
|
||||
"""
|
||||
|
||||
|
||||
async def count_v1_controls() -> int:
|
||||
"""Count how many v1 Eigenentwicklung controls exist."""
|
||||
with SessionLocal() as db:
|
||||
row = db.execute(text(f"""
|
||||
SELECT COUNT(*) AS cnt
|
||||
FROM canonical_controls
|
||||
WHERE {_is_eigenentwicklung_query()}
|
||||
""")).fetchone()
|
||||
return row.cnt if row else 0
|
||||
|
||||
|
||||
async def enrich_v1_matches(
|
||||
dry_run: bool = True,
|
||||
batch_size: int = 100,
|
||||
offset: int = 0,
|
||||
) -> dict:
|
||||
"""Find regulatory matches for v1 Eigenentwicklung controls.
|
||||
|
||||
Args:
|
||||
dry_run: If True, only count — don't write matches.
|
||||
batch_size: Number of v1 controls to process per call.
|
||||
offset: Pagination offset (v1 control index).
|
||||
|
||||
Returns:
|
||||
Stats dict with counts, sample matches, and pagination info.
|
||||
"""
|
||||
with SessionLocal() as db:
|
||||
# 1. Load v1 controls (paginated)
|
||||
v1_controls = db.execute(text(f"""
|
||||
SELECT id, control_id, title, objective, category
|
||||
FROM canonical_controls
|
||||
WHERE {_is_eigenentwicklung_query()}
|
||||
ORDER BY control_id
|
||||
LIMIT :limit OFFSET :offset
|
||||
"""), {"limit": batch_size, "offset": offset}).fetchall()
|
||||
|
||||
# Count total for pagination
|
||||
total_row = db.execute(text(f"""
|
||||
SELECT COUNT(*) AS cnt
|
||||
FROM canonical_controls
|
||||
WHERE {_is_eigenentwicklung_query()}
|
||||
""")).fetchone()
|
||||
total_v1 = total_row.cnt if total_row else 0
|
||||
|
||||
if not v1_controls:
|
||||
return {
|
||||
"dry_run": dry_run,
|
||||
"processed": 0,
|
||||
"total_v1": total_v1,
|
||||
"message": "Kein weiterer Batch — alle v1 Controls verarbeitet.",
|
||||
}
|
||||
|
||||
if dry_run:
|
||||
return {
|
||||
"dry_run": True,
|
||||
"total_v1": total_v1,
|
||||
"offset": offset,
|
||||
"batch_size": batch_size,
|
||||
"sample_controls": [
|
||||
{
|
||||
"control_id": r.control_id,
|
||||
"title": r.title,
|
||||
"category": r.category,
|
||||
}
|
||||
for r in v1_controls[:20]
|
||||
],
|
||||
}
|
||||
|
||||
# 2. Process each v1 control
|
||||
processed = 0
|
||||
matches_inserted = 0
|
||||
errors = []
|
||||
sample_matches = []
|
||||
|
||||
for v1 in v1_controls:
|
||||
try:
|
||||
# Build search text
|
||||
search_text = f"{v1.title} — {v1.objective}"
|
||||
|
||||
# Get embedding
|
||||
embedding = await get_embedding(search_text)
|
||||
if not embedding:
|
||||
errors.append({
|
||||
"control_id": v1.control_id,
|
||||
"error": "Embedding fehlgeschlagen",
|
||||
})
|
||||
continue
|
||||
|
||||
# Search Qdrant (cross-regulation, no pattern filter)
|
||||
# Collection is atomic_controls_dedup (contains ~51k atomare Controls)
|
||||
results = await qdrant_search_cross_regulation(
|
||||
embedding, top_k=20,
|
||||
collection="atomic_controls_dedup",
|
||||
)
|
||||
|
||||
# For each hit: resolve to a regulatory parent with source_citation.
|
||||
# Atomic controls in Qdrant usually have parent_control_uuid → parent
|
||||
# has the source_citation. We deduplicate by parent to avoid
|
||||
# listing the same regulation multiple times.
|
||||
rank = 0
|
||||
seen_parents: set[str] = set()
|
||||
|
||||
for hit in results:
|
||||
score = hit.get("score", 0)
|
||||
if score < V1_MATCH_THRESHOLD:
|
||||
continue
|
||||
|
||||
payload = hit.get("payload", {})
|
||||
matched_uuid = payload.get("control_uuid")
|
||||
if not matched_uuid or matched_uuid == str(v1.id):
|
||||
continue
|
||||
|
||||
# Try the matched control itself first, then its parent
|
||||
matched_row = db.execute(text("""
|
||||
SELECT c.id, c.control_id, c.title, c.source_citation,
|
||||
c.severity, c.category, c.parent_control_uuid
|
||||
FROM canonical_controls c
|
||||
WHERE c.id = CAST(:uuid AS uuid)
|
||||
"""), {"uuid": matched_uuid}).fetchone()
|
||||
|
||||
if not matched_row:
|
||||
continue
|
||||
|
||||
# Resolve to regulatory control (one with source_citation)
|
||||
reg_row = matched_row
|
||||
if not reg_row.source_citation and reg_row.parent_control_uuid:
|
||||
# Look up parent — the parent has the source_citation
|
||||
parent_row = db.execute(text("""
|
||||
SELECT id, control_id, title, source_citation,
|
||||
severity, category, parent_control_uuid
|
||||
FROM canonical_controls
|
||||
WHERE id = CAST(:uuid AS uuid)
|
||||
AND source_citation IS NOT NULL
|
||||
"""), {"uuid": str(reg_row.parent_control_uuid)}).fetchone()
|
||||
if parent_row:
|
||||
reg_row = parent_row
|
||||
|
||||
if not reg_row.source_citation:
|
||||
continue
|
||||
|
||||
# Deduplicate by parent UUID
|
||||
parent_key = str(reg_row.id)
|
||||
if parent_key in seen_parents:
|
||||
continue
|
||||
seen_parents.add(parent_key)
|
||||
|
||||
rank += 1
|
||||
if rank > V1_MAX_MATCHES:
|
||||
break
|
||||
|
||||
# Extract source info
|
||||
source_citation = reg_row.source_citation or {}
|
||||
matched_source = source_citation.get("source") if isinstance(source_citation, dict) else None
|
||||
matched_article = source_citation.get("article") if isinstance(source_citation, dict) else None
|
||||
|
||||
# Insert match — link to the regulatory parent (not the atomic child)
|
||||
db.execute(text("""
|
||||
INSERT INTO v1_control_matches
|
||||
(v1_control_uuid, matched_control_uuid, similarity_score,
|
||||
match_rank, matched_source, matched_article, match_method)
|
||||
VALUES
|
||||
(CAST(:v1_uuid AS uuid), CAST(:matched_uuid AS uuid), :score,
|
||||
:rank, :source, :article, 'embedding')
|
||||
ON CONFLICT (v1_control_uuid, matched_control_uuid) DO UPDATE
|
||||
SET similarity_score = EXCLUDED.similarity_score,
|
||||
match_rank = EXCLUDED.match_rank
|
||||
"""), {
|
||||
"v1_uuid": str(v1.id),
|
||||
"matched_uuid": str(reg_row.id),
|
||||
"score": round(score, 3),
|
||||
"rank": rank,
|
||||
"source": matched_source,
|
||||
"article": matched_article,
|
||||
})
|
||||
matches_inserted += 1
|
||||
|
||||
# Collect sample
|
||||
if len(sample_matches) < 20:
|
||||
sample_matches.append({
|
||||
"v1_control_id": v1.control_id,
|
||||
"v1_title": v1.title,
|
||||
"matched_control_id": reg_row.control_id,
|
||||
"matched_title": reg_row.title,
|
||||
"matched_source": matched_source,
|
||||
"matched_article": matched_article,
|
||||
"similarity_score": round(score, 3),
|
||||
"match_rank": rank,
|
||||
})
|
||||
|
||||
processed += 1
|
||||
|
||||
except Exception as e:
|
||||
logger.warning("V1 enrichment error for %s: %s", v1.control_id, e)
|
||||
errors.append({
|
||||
"control_id": v1.control_id,
|
||||
"error": str(e),
|
||||
})
|
||||
|
||||
db.commit()
|
||||
|
||||
# Pagination
|
||||
next_offset = offset + batch_size if len(v1_controls) == batch_size else None
|
||||
|
||||
return {
|
||||
"dry_run": False,
|
||||
"offset": offset,
|
||||
"batch_size": batch_size,
|
||||
"next_offset": next_offset,
|
||||
"total_v1": total_v1,
|
||||
"processed": processed,
|
||||
"matches_inserted": matches_inserted,
|
||||
"errors": errors[:10],
|
||||
"sample_matches": sample_matches,
|
||||
}
|
||||
|
||||
|
||||
async def get_v1_matches(control_uuid: str) -> list[dict]:
|
||||
"""Get all regulatory matches for a specific v1 control.
|
||||
|
||||
Args:
|
||||
control_uuid: The UUID of the v1 control.
|
||||
|
||||
Returns:
|
||||
List of match dicts with control details.
|
||||
"""
|
||||
with SessionLocal() as db:
|
||||
rows = db.execute(text("""
|
||||
SELECT
|
||||
m.similarity_score,
|
||||
m.match_rank,
|
||||
m.matched_source,
|
||||
m.matched_article,
|
||||
m.match_method,
|
||||
c.control_id AS matched_control_id,
|
||||
c.title AS matched_title,
|
||||
c.objective AS matched_objective,
|
||||
c.severity AS matched_severity,
|
||||
c.category AS matched_category,
|
||||
c.source_citation AS matched_source_citation
|
||||
FROM v1_control_matches m
|
||||
JOIN canonical_controls c ON c.id = m.matched_control_uuid
|
||||
WHERE m.v1_control_uuid = CAST(:uuid AS uuid)
|
||||
ORDER BY m.match_rank
|
||||
"""), {"uuid": control_uuid}).fetchall()
|
||||
|
||||
return [
|
||||
{
|
||||
"matched_control_id": r.matched_control_id,
|
||||
"matched_title": r.matched_title,
|
||||
"matched_objective": r.matched_objective,
|
||||
"matched_severity": r.matched_severity,
|
||||
"matched_category": r.matched_category,
|
||||
"matched_source": r.matched_source,
|
||||
"matched_article": r.matched_article,
|
||||
"matched_source_citation": r.matched_source_citation,
|
||||
"similarity_score": float(r.similarity_score),
|
||||
"match_rank": r.match_rank,
|
||||
"match_method": r.match_method,
|
||||
}
|
||||
for r in rows
|
||||
]
|
||||
|
||||
|
||||
async def get_v1_enrichment_stats() -> dict:
|
||||
"""Get overview stats for v1 enrichment."""
|
||||
with SessionLocal() as db:
|
||||
total_v1 = db.execute(text(f"""
|
||||
SELECT COUNT(*) AS cnt FROM canonical_controls
|
||||
WHERE {_is_eigenentwicklung_query()}
|
||||
""")).fetchone()
|
||||
|
||||
matched_v1 = db.execute(text(f"""
|
||||
SELECT COUNT(DISTINCT m.v1_control_uuid) AS cnt
|
||||
FROM v1_control_matches m
|
||||
JOIN canonical_controls c ON c.id = m.v1_control_uuid
|
||||
WHERE {_is_eigenentwicklung_query().replace('release_state', 'c.release_state').replace('generation_strategy', 'c.generation_strategy').replace('pipeline_version', 'c.pipeline_version').replace('source_citation', 'c.source_citation').replace('parent_control_uuid', 'c.parent_control_uuid')}
|
||||
""")).fetchone()
|
||||
|
||||
total_matches = db.execute(text("""
|
||||
SELECT COUNT(*) AS cnt FROM v1_control_matches
|
||||
""")).fetchone()
|
||||
|
||||
avg_score = db.execute(text("""
|
||||
SELECT AVG(similarity_score) AS avg_score FROM v1_control_matches
|
||||
""")).fetchone()
|
||||
|
||||
return {
|
||||
"total_v1_controls": total_v1.cnt if total_v1 else 0,
|
||||
"v1_with_matches": matched_v1.cnt if matched_v1 else 0,
|
||||
"v1_without_matches": (total_v1.cnt if total_v1 else 0) - (matched_v1.cnt if matched_v1 else 0),
|
||||
"total_matches": total_matches.cnt if total_matches else 0,
|
||||
"avg_similarity_score": round(float(avg_score.avg_score), 3) if avg_score and avg_score.avg_score else None,
|
||||
}
|
||||
@@ -14,6 +14,12 @@ from contextlib import asynccontextmanager
|
||||
from fastapi import FastAPI
|
||||
from fastapi.middleware.cors import CORSMiddleware
|
||||
|
||||
# Configure root logging so all modules' logger.info() etc. are visible
|
||||
logging.basicConfig(
|
||||
level=logging.INFO,
|
||||
format="%(levelname)s:%(name)s: %(message)s",
|
||||
)
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Compliance-specific API routers
|
||||
@@ -86,6 +92,18 @@ async def health():
|
||||
}
|
||||
|
||||
|
||||
@app.get("/debug/routers", tags=["system"])
|
||||
async def debug_routers():
|
||||
"""Diagnostic: show which sub-routers loaded and which failed."""
|
||||
from compliance.api import _ROUTER_MODULES, _failed_routers, _loaded_count
|
||||
return {
|
||||
"total": len(_ROUTER_MODULES),
|
||||
"loaded": _loaded_count,
|
||||
"failed_count": len(_failed_routers),
|
||||
"failed": _failed_routers,
|
||||
}
|
||||
|
||||
|
||||
# --- Compliance-specific Routers ---
|
||||
|
||||
# Consent (user-facing)
|
||||
|
||||
@@ -79,11 +79,14 @@ def run_migrations():
|
||||
|
||||
logger.info("%d pending migrations (of %d total)", len(pending), len(migration_files))
|
||||
|
||||
failed = []
|
||||
for migration_file in pending:
|
||||
logger.info("Applying migration: %s", migration_file.name)
|
||||
try:
|
||||
sql = migration_file.read_text(encoding="utf-8")
|
||||
# Execute the full SQL file as-is (supports BEGIN/COMMIT)
|
||||
# Strip explicit BEGIN/COMMIT — we manage transactions ourselves
|
||||
sql = re.sub(r'(?mi)^\s*BEGIN\s*;\s*$', '', sql)
|
||||
sql = re.sub(r'(?mi)^\s*COMMIT\s*;\s*$', '', sql)
|
||||
cursor.execute(sql)
|
||||
raw_conn.commit()
|
||||
# Record successful application
|
||||
@@ -96,11 +99,14 @@ def run_migrations():
|
||||
except Exception as e:
|
||||
raw_conn.rollback()
|
||||
logger.error(" FAILED: %s — %s", migration_file.name, e)
|
||||
raise RuntimeError(
|
||||
f"Migration {migration_file.name} failed: {e}"
|
||||
) from e
|
||||
failed.append((migration_file.name, str(e)))
|
||||
# Continue with remaining migrations instead of aborting
|
||||
|
||||
logger.info("All migrations applied successfully")
|
||||
if failed:
|
||||
names = ", ".join(f[0] for f in failed)
|
||||
logger.error("Some migrations failed: %s", names)
|
||||
else:
|
||||
logger.info("All migrations applied successfully")
|
||||
finally:
|
||||
raw_conn.close()
|
||||
|
||||
|
||||
@@ -2,7 +2,7 @@
|
||||
-- Adds job tracking, chunk tracking, blocked sources, and extends canonical_controls
|
||||
-- for the 3-license-rule system (free_use, citation_required, restricted).
|
||||
|
||||
BEGIN;
|
||||
-- Transaction managed by migration_runner
|
||||
|
||||
-- =============================================================================
|
||||
-- 1. Job-Tracking for Generator Runs
|
||||
@@ -69,35 +69,21 @@ CREATE TABLE IF NOT EXISTS canonical_blocked_sources (
|
||||
|
||||
-- =============================================================================
|
||||
-- 4. Extend canonical_controls: release_state + 3-rule columns
|
||||
-- Safe: only runs if canonical_controls exists
|
||||
-- =============================================================================
|
||||
|
||||
-- Expand release_state enum to include generator states
|
||||
ALTER TABLE canonical_controls DROP CONSTRAINT IF EXISTS canonical_controls_release_state_check;
|
||||
ALTER TABLE canonical_controls ADD CONSTRAINT canonical_controls_release_state_check
|
||||
CHECK (release_state IN ('draft', 'review', 'approved', 'deprecated', 'needs_review', 'too_close', 'duplicate'));
|
||||
|
||||
-- License rule: 1 = free_use, 2 = citation_required, 3 = restricted
|
||||
ALTER TABLE canonical_controls ADD COLUMN IF NOT EXISTS
|
||||
license_rule INTEGER DEFAULT NULL;
|
||||
|
||||
-- Original text from source (Rule 1+2 only; Rule 3 = always NULL)
|
||||
ALTER TABLE canonical_controls ADD COLUMN IF NOT EXISTS
|
||||
source_original_text TEXT DEFAULT NULL;
|
||||
|
||||
-- Citation info (Rule 1+2 only; Rule 3 = always NULL)
|
||||
ALTER TABLE canonical_controls ADD COLUMN IF NOT EXISTS
|
||||
source_citation JSONB DEFAULT NULL;
|
||||
|
||||
-- Whether source info may be shown to customers
|
||||
ALTER TABLE canonical_controls ADD COLUMN IF NOT EXISTS
|
||||
customer_visible BOOLEAN DEFAULT true;
|
||||
|
||||
-- Generation metadata (internal only, never shown to customers)
|
||||
ALTER TABLE canonical_controls ADD COLUMN IF NOT EXISTS
|
||||
generation_metadata JSONB DEFAULT NULL;
|
||||
|
||||
-- Index for filtering by license rule and customer visibility
|
||||
CREATE INDEX IF NOT EXISTS idx_canonical_controls_license_rule ON canonical_controls(license_rule);
|
||||
CREATE INDEX IF NOT EXISTS idx_canonical_controls_customer_visible ON canonical_controls(customer_visible);
|
||||
|
||||
COMMIT;
|
||||
DO $$
|
||||
BEGIN
|
||||
IF EXISTS (SELECT 1 FROM information_schema.tables WHERE table_name = 'canonical_controls') THEN
|
||||
ALTER TABLE canonical_controls DROP CONSTRAINT IF EXISTS canonical_controls_release_state_check;
|
||||
ALTER TABLE canonical_controls ADD CONSTRAINT canonical_controls_release_state_check
|
||||
CHECK (release_state IN ('draft', 'review', 'approved', 'deprecated', 'needs_review', 'too_close', 'duplicate'));
|
||||
ALTER TABLE canonical_controls ADD COLUMN IF NOT EXISTS license_rule INTEGER DEFAULT NULL;
|
||||
ALTER TABLE canonical_controls ADD COLUMN IF NOT EXISTS source_original_text TEXT DEFAULT NULL;
|
||||
ALTER TABLE canonical_controls ADD COLUMN IF NOT EXISTS source_citation JSONB DEFAULT NULL;
|
||||
ALTER TABLE canonical_controls ADD COLUMN IF NOT EXISTS customer_visible BOOLEAN DEFAULT true;
|
||||
ALTER TABLE canonical_controls ADD COLUMN IF NOT EXISTS generation_metadata JSONB DEFAULT NULL;
|
||||
CREATE INDEX IF NOT EXISTS idx_canonical_controls_license_rule ON canonical_controls(license_rule);
|
||||
CREATE INDEX IF NOT EXISTS idx_canonical_controls_customer_visible ON canonical_controls(customer_visible);
|
||||
END IF;
|
||||
END $$;
|
||||
|
||||
@@ -0,0 +1,44 @@
|
||||
-- Migration 047: Add verification_method and category to canonical_controls
|
||||
-- verification_method: How a control is verified (code_review, document, tool, hybrid)
|
||||
-- category: Thematic grouping for customer-facing filters
|
||||
-- Safe: only alters canonical_controls if it exists
|
||||
|
||||
DO $$
|
||||
BEGIN
|
||||
IF EXISTS (SELECT 1 FROM information_schema.tables WHERE table_name = 'canonical_controls') THEN
|
||||
ALTER TABLE canonical_controls ADD COLUMN IF NOT EXISTS
|
||||
verification_method VARCHAR(20) DEFAULT NULL
|
||||
CHECK (verification_method IN ('code_review', 'document', 'tool', 'hybrid'));
|
||||
ALTER TABLE canonical_controls ADD COLUMN IF NOT EXISTS
|
||||
category VARCHAR(50) DEFAULT NULL;
|
||||
CREATE INDEX IF NOT EXISTS idx_cc_verification ON canonical_controls(verification_method);
|
||||
CREATE INDEX IF NOT EXISTS idx_cc_category ON canonical_controls(category);
|
||||
END IF;
|
||||
END $$;
|
||||
|
||||
CREATE TABLE IF NOT EXISTS canonical_control_categories (
|
||||
category_id VARCHAR(50) PRIMARY KEY,
|
||||
label_de VARCHAR(100) NOT NULL,
|
||||
label_en VARCHAR(100) NOT NULL,
|
||||
sort_order INTEGER DEFAULT 0
|
||||
);
|
||||
|
||||
INSERT INTO canonical_control_categories VALUES
|
||||
('encryption', 'Verschluesselung & Kryptographie', 'Encryption & Cryptography', 1),
|
||||
('authentication', 'Authentisierung & Zugriffskontrolle', 'Authentication & Access Control', 2),
|
||||
('network', 'Netzwerksicherheit', 'Network Security', 3),
|
||||
('data_protection', 'Datenschutz & Datensicherheit', 'Data Protection & Security', 4),
|
||||
('logging', 'Logging & Monitoring', 'Logging & Monitoring', 5),
|
||||
('incident', 'Vorfallmanagement', 'Incident Management', 6),
|
||||
('continuity', 'Notfall & Wiederherstellung', 'Continuity & Recovery', 7),
|
||||
('compliance', 'Compliance & Audit', 'Compliance & Audit', 8),
|
||||
('supply_chain', 'Lieferkettenmanagement', 'Supply Chain Management', 9),
|
||||
('physical', 'Physische Sicherheit', 'Physical Security', 10),
|
||||
('personnel', 'Personal & Schulung', 'Personnel & Training', 11),
|
||||
('application', 'Anwendungssicherheit', 'Application Security', 12),
|
||||
('system', 'Systemhaertung & -betrieb', 'System Hardening & Operations', 13),
|
||||
('risk', 'Risikomanagement', 'Risk Management', 14),
|
||||
('governance', 'Sicherheitsorganisation', 'Security Governance', 15),
|
||||
('hardware', 'Hardware & Plattformsicherheit', 'Hardware & Platform Security', 16),
|
||||
('identity', 'Identitaetsmanagement', 'Identity Management', 17)
|
||||
ON CONFLICT DO NOTHING;
|
||||
@@ -0,0 +1,22 @@
|
||||
-- 048: Expand processing_path CHECK constraint for new pipeline paths
|
||||
-- New values: prefilter_skip, no_control, store_failed, error
|
||||
-- Safe: only runs if the table exists (may not exist on all environments)
|
||||
|
||||
DO $$
|
||||
BEGIN
|
||||
IF EXISTS (SELECT 1 FROM information_schema.tables WHERE table_name = 'canonical_processed_chunks') THEN
|
||||
ALTER TABLE canonical_processed_chunks
|
||||
DROP CONSTRAINT IF EXISTS canonical_processed_chunks_processing_path_check;
|
||||
ALTER TABLE canonical_processed_chunks
|
||||
ADD CONSTRAINT canonical_processed_chunks_processing_path_check
|
||||
CHECK (processing_path IN (
|
||||
'structured',
|
||||
'llm_reform',
|
||||
'skipped',
|
||||
'prefilter_skip',
|
||||
'no_control',
|
||||
'store_failed',
|
||||
'error'
|
||||
));
|
||||
END IF;
|
||||
END $$;
|
||||
@@ -0,0 +1,13 @@
|
||||
-- 049: Add target_audience field to canonical_controls
|
||||
-- Distinguishes who a control is relevant for: enterprises, authorities, providers, or all.
|
||||
-- Safe: only runs if the table exists (may not exist on all environments)
|
||||
|
||||
DO $$
|
||||
BEGIN
|
||||
IF EXISTS (SELECT 1 FROM information_schema.tables WHERE table_name = 'canonical_controls') THEN
|
||||
ALTER TABLE canonical_controls ADD COLUMN IF NOT EXISTS
|
||||
target_audience VARCHAR(20) DEFAULT NULL
|
||||
CHECK (target_audience IN ('enterprise', 'authority', 'provider', 'all'));
|
||||
CREATE INDEX IF NOT EXISTS idx_cc_target_audience ON canonical_controls(target_audience);
|
||||
END IF;
|
||||
END $$;
|
||||
@@ -0,0 +1,22 @@
|
||||
-- Score Snapshots: Historical compliance score tracking
|
||||
-- Migration 050
|
||||
|
||||
CREATE TABLE IF NOT EXISTS compliance_score_snapshots (
|
||||
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
|
||||
tenant_id UUID NOT NULL,
|
||||
project_id UUID,
|
||||
score DECIMAL(5,2) NOT NULL,
|
||||
controls_total INTEGER DEFAULT 0,
|
||||
controls_pass INTEGER DEFAULT 0,
|
||||
controls_partial INTEGER DEFAULT 0,
|
||||
evidence_total INTEGER DEFAULT 0,
|
||||
evidence_valid INTEGER DEFAULT 0,
|
||||
risks_total INTEGER DEFAULT 0,
|
||||
risks_high INTEGER DEFAULT 0,
|
||||
snapshot_date DATE NOT NULL,
|
||||
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
|
||||
UNIQUE (tenant_id, project_id, snapshot_date)
|
||||
);
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_score_snap_tenant ON compliance_score_snapshots(tenant_id);
|
||||
CREATE INDEX IF NOT EXISTS idx_score_snap_date ON compliance_score_snapshots(snapshot_date);
|
||||
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,53 @@
|
||||
-- Process Manager: Recurring compliance tasks with audit trail
|
||||
-- Migration 052
|
||||
|
||||
CREATE TABLE compliance_process_tasks (
|
||||
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
|
||||
tenant_id UUID NOT NULL,
|
||||
project_id UUID,
|
||||
task_code VARCHAR(50) NOT NULL,
|
||||
title VARCHAR(500) NOT NULL,
|
||||
description TEXT,
|
||||
category VARCHAR(50) NOT NULL
|
||||
CHECK (category IN ('dsgvo','nis2','bsi','iso27001','ai_act','internal')),
|
||||
priority VARCHAR(20) NOT NULL DEFAULT 'medium'
|
||||
CHECK (priority IN ('critical','high','medium','low')),
|
||||
frequency VARCHAR(20) NOT NULL DEFAULT 'yearly'
|
||||
CHECK (frequency IN ('weekly','monthly','quarterly','semi_annual','yearly','once')),
|
||||
assigned_to VARCHAR(255),
|
||||
responsible_team VARCHAR(255),
|
||||
linked_control_ids JSONB DEFAULT '[]',
|
||||
linked_module VARCHAR(100),
|
||||
last_completed_at TIMESTAMPTZ,
|
||||
next_due_date DATE,
|
||||
due_reminder_days INTEGER DEFAULT 14,
|
||||
status VARCHAR(20) NOT NULL DEFAULT 'pending'
|
||||
CHECK (status IN ('pending','in_progress','completed','overdue','skipped')),
|
||||
completion_date TIMESTAMPTZ,
|
||||
completion_result TEXT,
|
||||
completion_evidence_id UUID,
|
||||
follow_up_actions JSONB DEFAULT '[]',
|
||||
is_seed BOOLEAN DEFAULT FALSE,
|
||||
notes TEXT,
|
||||
tags JSONB DEFAULT '[]',
|
||||
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
|
||||
updated_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
|
||||
UNIQUE (tenant_id, project_id, task_code)
|
||||
);
|
||||
|
||||
CREATE TABLE compliance_process_task_history (
|
||||
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
|
||||
task_id UUID NOT NULL REFERENCES compliance_process_tasks(id) ON DELETE CASCADE,
|
||||
completed_by VARCHAR(255),
|
||||
completed_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
|
||||
result TEXT,
|
||||
evidence_id UUID,
|
||||
notes TEXT,
|
||||
status VARCHAR(20) NOT NULL
|
||||
);
|
||||
|
||||
CREATE INDEX idx_process_tasks_tenant ON compliance_process_tasks(tenant_id);
|
||||
CREATE INDEX idx_process_tasks_status ON compliance_process_tasks(status);
|
||||
CREATE INDEX idx_process_tasks_due ON compliance_process_tasks(next_due_date);
|
||||
CREATE INDEX idx_process_tasks_category ON compliance_process_tasks(category);
|
||||
CREATE INDEX idx_task_history_task ON compliance_process_task_history(task_id);
|
||||
@@ -0,0 +1,62 @@
|
||||
-- Evidence Checks: Automated compliance verification
|
||||
-- Migration 053
|
||||
|
||||
CREATE TABLE compliance_evidence_checks (
|
||||
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
|
||||
tenant_id UUID NOT NULL,
|
||||
project_id UUID,
|
||||
check_code VARCHAR(50) NOT NULL,
|
||||
title VARCHAR(500) NOT NULL,
|
||||
description TEXT,
|
||||
check_type VARCHAR(30) NOT NULL
|
||||
CHECK (check_type IN ('tls_scan','header_check','certificate_check',
|
||||
'config_scan','api_scan','dns_check','port_scan')),
|
||||
target_url TEXT,
|
||||
target_config JSONB DEFAULT '{}',
|
||||
linked_control_ids JSONB DEFAULT '[]',
|
||||
frequency VARCHAR(20) DEFAULT 'monthly'
|
||||
CHECK (frequency IN ('daily','weekly','monthly','quarterly','manual')),
|
||||
last_run_at TIMESTAMPTZ,
|
||||
next_run_at TIMESTAMPTZ,
|
||||
is_active BOOLEAN DEFAULT TRUE,
|
||||
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
|
||||
updated_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
|
||||
UNIQUE (tenant_id, project_id, check_code)
|
||||
);
|
||||
|
||||
CREATE TABLE compliance_evidence_check_results (
|
||||
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
|
||||
check_id UUID NOT NULL REFERENCES compliance_evidence_checks(id) ON DELETE CASCADE,
|
||||
tenant_id UUID NOT NULL,
|
||||
run_status VARCHAR(20) NOT NULL DEFAULT 'running'
|
||||
CHECK (run_status IN ('running','passed','failed','warning','error')),
|
||||
result_data JSONB NOT NULL DEFAULT '{}',
|
||||
summary TEXT,
|
||||
findings_count INTEGER DEFAULT 0,
|
||||
critical_findings INTEGER DEFAULT 0,
|
||||
evidence_id UUID,
|
||||
duration_ms INTEGER,
|
||||
run_at TIMESTAMPTZ NOT NULL DEFAULT NOW()
|
||||
);
|
||||
|
||||
CREATE TABLE compliance_evidence_control_map (
|
||||
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
|
||||
tenant_id UUID NOT NULL,
|
||||
evidence_id UUID NOT NULL,
|
||||
control_code VARCHAR(50) NOT NULL,
|
||||
mapping_type VARCHAR(20) DEFAULT 'supports'
|
||||
CHECK (mapping_type IN ('supports','partially_supports','required')),
|
||||
verified_at TIMESTAMPTZ,
|
||||
verified_by VARCHAR(255),
|
||||
notes TEXT,
|
||||
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
|
||||
UNIQUE (tenant_id, evidence_id, control_code)
|
||||
);
|
||||
|
||||
CREATE INDEX idx_evidence_checks_tenant ON compliance_evidence_checks(tenant_id);
|
||||
CREATE INDEX idx_evidence_checks_type ON compliance_evidence_checks(check_type);
|
||||
CREATE INDEX idx_evidence_checks_active ON compliance_evidence_checks(is_active);
|
||||
CREATE INDEX idx_check_results_check ON compliance_evidence_check_results(check_id);
|
||||
CREATE INDEX idx_check_results_status ON compliance_evidence_check_results(run_status);
|
||||
CREATE INDEX idx_evidence_control_map_tenant ON compliance_evidence_control_map(tenant_id);
|
||||
CREATE INDEX idx_evidence_control_map_control ON compliance_evidence_control_map(control_code);
|
||||
@@ -0,0 +1,340 @@
|
||||
-- Migration 054: Erweiterte HinSchG-Wiki-Artikel
|
||||
-- Ergaenzt die bestehende HinSchG-Kategorie um detaillierte Artikel
|
||||
|
||||
-- Bestehenden Grundlagen-Artikel mit umfassenderem Inhalt aktualisieren
|
||||
UPDATE compliance_wiki_articles
|
||||
SET content = '## Ueberblick
|
||||
|
||||
Das **Hinweisgeberschutzgesetz (HinSchG)** setzt die EU-Whistleblowing-Richtlinie (EU) 2019/1937 in deutsches Recht um. Es schuetzt Personen, die auf Missstaende in Unternehmen und Behoerden hinweisen und ist seit dem **2. Juli 2023** in Kraft.
|
||||
|
||||
- Ab 02.07.2023: Pflicht fuer Unternehmen ab **250 Beschaeftigten**
|
||||
- Ab 17.12.2023: Pflicht fuer Unternehmen ab **50 Beschaeftigten** (§ 12 HinSchG)
|
||||
|
||||
## Kernpflichten
|
||||
|
||||
### Interne Meldestelle einrichten (§ 12 HinSchG)
|
||||
- Kann eine **interne Person** (Ombudsperson) oder ein **externer Dienstleister** sein
|
||||
- Meldungen muessen **muendlich, schriftlich und persoenlich** moeglich sein
|
||||
- Die Meldestelle muss **unabhaengig** und **fachkundig** sein
|
||||
- **Gemeinsame Meldestellen** sind fuer Unternehmen mit 50–249 Beschaeftigten zulaessig
|
||||
|
||||
### Gesetzliche Fristen (§ 17 HinSchG)
|
||||
- Eingangsbestaetigung innerhalb von **7 Tagen** nach Meldungseingang (§ 17 Abs. 1 S. 2)
|
||||
- Rueckmeldung ueber Folgemaßnahmen innerhalb von **3 Monaten** nach Eingangsbestaetigung (§ 17 Abs. 2)
|
||||
- Dokumentation muss **3 Jahre** nach Abschluss aufbewahrt werden (§ 11 Abs. 5)
|
||||
|
||||
### Vertraulichkeitsgebot (§ 8 HinSchG)
|
||||
- Die **Identitaet des Hinweisgebers** darf nur den zustaendigen Personen bekannt sein
|
||||
- Offenlegung nur mit **Einwilligung** oder bei **gesetzlicher Verpflichtung**
|
||||
- Verstoss ist bussgeld-bewehrt (bis 50.000 EUR)
|
||||
|
||||
## Welche Daten fallen an?
|
||||
- Identitaet des Hinweisgebers (besonders schuetzenswert!)
|
||||
- Beschuldigte Personen
|
||||
- Zeugen und weitere Beteiligte
|
||||
- Inhalt der Meldung (kann sensible Daten enthalten)
|
||||
- Kommunikationsverlauf
|
||||
|
||||
## Datenschutz-Anforderungen
|
||||
- **Eigene Verarbeitungstaetigkeit** im VVT anlegen
|
||||
- Rechtsgrundlage: Art. 6 Abs. 1c DSGVO (rechtliche Verpflichtung)
|
||||
- **Zugriffsbeschraenkung:** Nur die benannte Meldestelle darf auf die Daten zugreifen
|
||||
- **Loeschfrist:** 3 Jahre nach Abschluss des Verfahrens (§ 11 Abs. 5 HinSchG)
|
||||
- Bei Art.-9-Daten in Meldungen: besondere Schutzmassnahmen erforderlich
|
||||
|
||||
## Sanktionen (§ 40 HinSchG)
|
||||
|
||||
| Verstoss | Bussgeld |
|
||||
|----------|----------|
|
||||
| Keine Meldestelle eingerichtet | Bis 20.000 EUR |
|
||||
| Behinderung einer Meldung | Bis 50.000 EUR |
|
||||
| Verstoss gegen Vertraulichkeitsgebot | Bis 50.000 EUR |
|
||||
| Repressalien gegen Hinweisgeber | Bis 50.000 EUR |
|
||||
|
||||
## Praxis-Tipp
|
||||
Pruefen Sie bei externen Meldestellen-Anbietern, ob ein **AVV** erforderlich ist. In den meisten Faellen ja — der Anbieter verarbeitet personenbezogene Daten in Ihrem Auftrag.',
|
||||
summary = 'Das HinSchG setzt die EU-Whistleblowing-Richtlinie um und verpflichtet seit Dezember 2023 alle Unternehmen ab 50 Beschaeftigten zur Einrichtung einer internen Meldestelle. Verstoesse koennen mit bis zu 50.000 EUR geahndet werden.',
|
||||
legal_refs = ARRAY['§ 2 HinSchG', '§ 8 HinSchG', '§ 11 Abs. 5 HinSchG', '§ 12 HinSchG', '§ 17 HinSchG', '§ 36 HinSchG', '§ 40 HinSchG', 'Art. 6 Abs. 1c DSGVO', 'EU-RL 2019/1937'],
|
||||
tags = ARRAY['hinweisgeberschutz', 'whistleblower', 'meldestelle', 'vertraulichkeit', 'fristen', 'bussgelder'],
|
||||
version = 2,
|
||||
updated_at = NOW()
|
||||
WHERE id = 'hinschg-grundlagen';
|
||||
|
||||
-- Neuer Artikel: Sachlicher Anwendungsbereich
|
||||
INSERT INTO compliance_wiki_articles (id, category_id, title, summary, content, legal_refs, tags, relevance, source_urls) VALUES
|
||||
('hinschg-anwendungsbereich', 'hinschg',
|
||||
'Sachlicher Anwendungsbereich — Welche Verstoesse sind meldbar?',
|
||||
'Das HinSchG schuetzt Meldungen ueber Verstoesse gegen EU-Recht und nationales Recht. Der Anwendungsbereich geht weit ueber rein strafrechtliche Verstoesse hinaus.',
|
||||
'## Ueberblick
|
||||
|
||||
Der sachliche Anwendungsbereich des HinSchG (§ 2) ist bewusst weit gefasst. Geschuetzt werden Meldungen ueber Verstoesse, die **strafbewehrt** sind oder **bussgeld-bewehrt**, sowie Verstoesse gegen bestimmte **EU-Rechtsakte** und deren nationale Umsetzungsgesetze.
|
||||
|
||||
## Meldbare Verstoesse (§ 2 HinSchG)
|
||||
|
||||
### Strafvorschriften
|
||||
- Alle Straftaten nach dem **StGB** (Betrug, Untreue, Korruption, Urkundenfaelschung)
|
||||
- Straftaten nach **Nebenstrafrecht** (Umweltstrafrecht, Wirtschaftsstrafrecht)
|
||||
|
||||
### Bussgeld-bewehrte Vorschriften
|
||||
- Verstoesse gegen **Ordnungswidrigkeiten-Vorschriften**, soweit die verletzte Norm dem Schutz von Leben, Leib, Gesundheit oder Rechten von Beschaeftigten dient
|
||||
|
||||
### EU-Rechtsakte und nationale Umsetzung
|
||||
| Rechtsgebiet | Beispiele |
|
||||
|-------------|-----------|
|
||||
| Datenschutz | DSGVO, BDSG — z.B. unrechtmaessige Datenweitergabe |
|
||||
| Geldwaesche | GwG — z.B. fehlende Verdachtsmeldungen |
|
||||
| Produktsicherheit | ProdSG — z.B. mangelhafte Produkte im Verkehr |
|
||||
| Umweltschutz | BImSchG, KrWG — z.B. illegale Entsorgung |
|
||||
| Lebensmittelsicherheit | LFGB — z.B. Hygienemaengel |
|
||||
| Arbeitsschutz | ArbSchG, ArbZG — z.B. ueberlange Arbeitszeiten |
|
||||
| Verbraucherschutz | UWG — z.B. irrefuehrende Werbung |
|
||||
| Wettbewerbsrecht | GWB — z.B. Preisabsprachen, Kartelle |
|
||||
| Steuerrecht | AO — z.B. Steuerhinterziehung bei Unternehmen |
|
||||
| Vergaberecht | GWB Teil 4 — z.B. Manipulationen bei oeffentlichen Auftraegen |
|
||||
|
||||
## Nicht erfasste Bereiche
|
||||
|
||||
- **Rein privatrechtliche Streitigkeiten** (z.B. Vertragskonflikte)
|
||||
- **Nationale Sicherheit** — Informationen, die der nationalen Sicherheit unterliegen
|
||||
- **Berufsgeheimnisse** — Anwalts-, Arzt- oder Seelsorgegeheimnis (mit Ausnahmen)
|
||||
|
||||
## Praxis-Tipp
|
||||
|
||||
Im Zweifelsfall sollte eine Meldung **immer entgegengenommen** und geprueft werden. Die Meldestelle entscheidet erst bei der Sachverhaltspruefung, ob ein meldepflichtiger Verstoss vorliegt.',
|
||||
ARRAY['§ 2 HinSchG', '§ 3 HinSchG', '§ 5 HinSchG'],
|
||||
ARRAY['anwendungsbereich', 'verstoesse', 'strafrecht', 'bussgeld', 'eu-recht', 'meldepflicht'],
|
||||
'important',
|
||||
ARRAY[]::text[])
|
||||
ON CONFLICT (id) DO NOTHING;
|
||||
|
||||
-- Neuer Artikel: Schutz des Hinweisgebers
|
||||
INSERT INTO compliance_wiki_articles (id, category_id, title, summary, content, legal_refs, tags, relevance, source_urls) VALUES
|
||||
('hinschg-hinweisgeberschutz', 'hinschg',
|
||||
'Schutz des Hinweisgebers — Repressalienverbot und Beweislastumkehr',
|
||||
'Das HinSchG verbietet jede Form der Benachteiligung von Hinweisgebern. Bei Verstoessen greift eine Beweislastumkehr zugunsten des Hinweisgebers.',
|
||||
'## Ueberblick
|
||||
|
||||
Der Schutz hinweisgebender Personen ist das **Kernziel des HinSchG**. Das Gesetz sieht ein umfassendes Verbot von Repressalien, eine Beweislastumkehr und einen Schadensersatzanspruch vor.
|
||||
|
||||
## Repressalienverbot (§ 36 HinSchG)
|
||||
|
||||
Verboten ist jede Form der **Benachteiligung** aufgrund einer Meldung:
|
||||
- **Kuendigung** oder Nichterneuerung eines befristeten Vertrags
|
||||
- **Abmahnung** oder negative Leistungsbewertung
|
||||
- **Versetzung**, Degradierung oder Befoerderungsverweigerung
|
||||
- **Gehaltsreduktion** oder Entzug von Verguenstigungen
|
||||
- **Mobbing**, Ausgrenzung, Einschuechterung
|
||||
- **Aufnahme in schwarze Listen** oder Branchenregister
|
||||
- **Entzug einer Lizenz** oder Genehmigung
|
||||
- **Anordnung einer psychiatrischen Untersuchung**
|
||||
|
||||
## Beweislastumkehr (§ 36 Abs. 2 HinSchG)
|
||||
|
||||
Erleidet ein Hinweisgeber nach einer Meldung eine Benachteiligung, wird **vermutet**, dass diese Benachteiligung eine Repressalie ist. Der **Arbeitgeber** muss beweisen, dass die Massnahme:
|
||||
- Auf hinreichend gerechtfertigten Gruenden beruht
|
||||
- **Keinen Zusammenhang** mit der Meldung hat
|
||||
|
||||
## Schadensersatz (§ 37 HinSchG)
|
||||
|
||||
- Hinweisgeber hat Anspruch auf **Ersatz des erlittenen Schadens**
|
||||
- Umfasst **materielle** Schaeden (Gehaltsverlust) und **immaterielle** Schaeden (Mobbing)
|
||||
- Kein **Mitverschulden** des Hinweisgebers, wenn die Meldung in gutem Glauben erfolgte
|
||||
|
||||
## Geschuetzte Personengruppen (§ 1 HinSchG)
|
||||
|
||||
- Arbeitnehmerinnen und Arbeitnehmer
|
||||
- Beamtinnen und Beamte
|
||||
- Auszubildende und Praktikanten
|
||||
- Selbststaendige und Anteilseigner
|
||||
- Mitglieder von Leitungs- und Aufsichtsorganen
|
||||
- Ehrenamtlich Taetige und Freiwillige
|
||||
- Bewerberinnen und Bewerber (bei Informationen im Bewerbungsprozess)
|
||||
|
||||
## Voraussetzungen fuer den Schutz (§ 33 HinSchG)
|
||||
|
||||
Der Schutz greift, wenn der Hinweisgeber:
|
||||
- **Hinreichenden Grund** hatte anzunehmen, dass die gemeldeten Informationen der Wahrheit entsprechen
|
||||
- Die Meldung ueber einen **vorgesehenen Kanal** (intern oder extern) erfolgte
|
||||
- Der Verstoß in den **sachlichen Anwendungsbereich** faellt
|
||||
|
||||
**Achtung:** Wissentlich **falsche Meldungen** sind nicht geschuetzt und koennen eigene Schadensersatzpflichten ausloesen (§ 38 HinSchG).',
|
||||
ARRAY['§ 1 HinSchG', '§ 33 HinSchG', '§ 36 HinSchG', '§ 37 HinSchG', '§ 38 HinSchG'],
|
||||
ARRAY['repressalienverbot', 'beweislastumkehr', 'schadensersatz', 'hinweisgeberschutz', 'kuendigungsschutz'],
|
||||
'critical',
|
||||
ARRAY[]::text[])
|
||||
ON CONFLICT (id) DO NOTHING;
|
||||
|
||||
-- Neuer Artikel: Interne vs. Externe Meldestelle
|
||||
INSERT INTO compliance_wiki_articles (id, category_id, title, summary, content, legal_refs, tags, relevance, source_urls) VALUES
|
||||
('hinschg-meldestellen', 'hinschg',
|
||||
'Interne vs. Externe Meldestelle — Was ist der Unterschied?',
|
||||
'Das HinSchG sieht interne und externe Meldestelllen vor. Hinweisgeber koennen frei waehlen, an wen sie sich wenden. Die Einrichtung einer internen Meldestelle ist Pflicht.',
|
||||
'## Ueberblick
|
||||
|
||||
Das HinSchG unterscheidet zwischen **internen Meldestellen** (beim Unternehmen) und **externen Meldestellen** (bei Behoerden). Hinweisgeber haben ein **Wahlrecht** — sie koennen sich direkt an die externe Meldestelle wenden, ohne den internen Weg vorher beschritten zu haben.
|
||||
|
||||
## Interne Meldestelle (§§ 12–18 HinSchG)
|
||||
|
||||
### Einrichtungspflicht
|
||||
- **Ab 50 Beschaeftigten**: Pflicht zur Einrichtung (seit 17.12.2023)
|
||||
- Unternehmen mit **50–249 Beschaeftigten** duerfen eine gemeinsame Meldestelle nutzen
|
||||
- Ab **250 Beschaeftigten**: eigene Meldestelle erforderlich
|
||||
|
||||
### Anforderungen
|
||||
- **Unabhaengigkeit** — keine Interessenkonflikte
|
||||
- **Fachkunde** — geschultes Personal
|
||||
- Meldekanal muss **muendliche, schriftliche und persoenliche** Meldungen ermoeglichen
|
||||
- **Anonyme Meldungen** sollen ermoeglicht werden (keine Pflicht, aber empfohlen)
|
||||
|
||||
### Besetzung
|
||||
Die Meldestelle kann besetzt werden durch:
|
||||
- Interne **Ombudsperson** (Compliance Officer, Datenschutzbeauftragter in Personalunion kritisch)
|
||||
- **Externer Dienstleister** (Kanzlei, spezialisierter Anbieter) — erfordert AVV
|
||||
- **Gremium** aus mehreren Personen
|
||||
|
||||
## Externe Meldestelle (§§ 19–31 HinSchG)
|
||||
|
||||
Die wichtigsten externen Meldestellen:
|
||||
|
||||
| Meldestelle | Zustaendigkeit |
|
||||
|-------------|---------------|
|
||||
| **BfJ (Bundesamt fuer Justiz)** | Auffangmeldestelle fuer alle Verstoesse |
|
||||
| **BaFin** | Finanzaufsicht, Geldwaesche, Wertpapierrecht |
|
||||
| **Bundeskartellamt** | Wettbewerbsrecht, Kartelle |
|
||||
|
||||
## Wahlrecht des Hinweisgebers
|
||||
|
||||
- Hinweisgeber duerfen **frei waehlen** zwischen intern und extern
|
||||
- Die interne Meldung ist **nicht vorrangig** — anders als bei vielen Unternehmenspolicies
|
||||
- Ein Unternehmen darf **nicht verbieten**, sich an die externe Stelle zu wenden
|
||||
|
||||
## Praxis-Tipp
|
||||
|
||||
Gestalten Sie die interne Meldestelle **niedrigschwellig und vertrauenswuerdig**, damit Mitarbeiter sie bevorzugt nutzen. Unternehmen erfahren frueh von Problemen und koennen schneller reagieren.',
|
||||
ARRAY['§ 12 HinSchG', '§ 13 HinSchG', '§ 14 HinSchG', '§ 16 HinSchG', '§ 17 HinSchG', '§ 19 HinSchG', '§ 27 HinSchG'],
|
||||
ARRAY['meldestelle', 'intern', 'extern', 'ombudsperson', 'bfj', 'bafin', 'wahlrecht'],
|
||||
'critical',
|
||||
ARRAY[]::text[])
|
||||
ON CONFLICT (id) DO NOTHING;
|
||||
|
||||
-- Neuer Artikel: Verfahrensablauf bei einer Meldung
|
||||
INSERT INTO compliance_wiki_articles (id, category_id, title, summary, content, legal_refs, tags, relevance, source_urls) VALUES
|
||||
('hinschg-verfahrensablauf', 'hinschg',
|
||||
'Verfahrensablauf — Von der Meldung bis zur Rueckmeldung',
|
||||
'Der gesetzlich vorgeschriebene Ablauf einer Meldung umfasst Eingangsbestaetigung, Sachverhaltspruefung, Folgemaßnahmen und Rueckmeldung an den Hinweisgeber.',
|
||||
'## Ueberblick
|
||||
|
||||
Das HinSchG schreibt einen strukturierten Verfahrensablauf fuer jede eingehende Meldung vor (§ 17 HinSchG). Dieser Ablauf ist nicht verhandelbar — die Fristen sind gesetzlich bindend.
|
||||
|
||||
## Schritt-fuer-Schritt-Verfahren
|
||||
|
||||
### 1. Meldungseingang
|
||||
- Meldung wird ueber den internen Meldekanal eingereicht
|
||||
- Das System vergibt automatisch eine **Referenznummer** und einen **Zugangscode**
|
||||
- Der Zugangscode ermoeglicht dem Hinweisgeber die anonyme Statusabfrage
|
||||
|
||||
### 2. Eingangsbestaetigung (Frist: 7 Tage)
|
||||
- Innerhalb von **7 Tagen** nach Eingang muss die Meldestelle den Eingang bestaetigen (§ 17 Abs. 1 S. 2)
|
||||
- Bei anonymen Meldungen: Bestaetigung ueber den anonymen Kommunikationskanal
|
||||
- **Wichtig:** Die Bestaetigung darf keine inhaltliche Bewertung enthalten
|
||||
|
||||
### 3. Sachverhaltspruefung
|
||||
- Die Meldestelle prueft, ob ein **meldepflichtiger Verstoss** vorliegt (§ 2 HinSchG)
|
||||
- Stichhaltigkeitspruefung der gemeldeten Informationen
|
||||
- Gegebenenfalls Rueckfragen an den Hinweisgeber (ueber anonymen Kanal)
|
||||
|
||||
### 4. Folgemaßnahmen (§ 18 HinSchG)
|
||||
Moegliche Maßnahmen umfassen:
|
||||
- **Interne Untersuchung** (ggf. mit externen Gutachtern)
|
||||
- **Abstellung des Verstosses** durch organisatorische Aenderungen
|
||||
- Weiterleitung an eine **zustaendige Behoerde**
|
||||
- **Disziplinarmaßnahmen** gegen Verantwortliche
|
||||
- **Einstellung** des Verfahrens bei unbegruendeten Meldungen
|
||||
|
||||
### 5. Rueckmeldung (Frist: 3 Monate)
|
||||
- Innerhalb von **3 Monaten** nach Eingangsbestaetigung muss dem Hinweisgeber eine Rueckmeldung ueber ergriffene oder geplante Folgemaßnahmen gegeben werden (§ 17 Abs. 2)
|
||||
- Die Rueckmeldung soll den Hinweisgeber informieren, **ohne laufende Ermittlungen zu gefaehrden**
|
||||
|
||||
### 6. Abschluss und Dokumentation
|
||||
- Abschließende Dokumentation des gesamten Verfahrens
|
||||
- Aufbewahrung fuer **3 Jahre** nach Abschluss (§ 11 Abs. 5 HinSchG)
|
||||
- Danach: Loeschung aller personenbezogenen Daten
|
||||
|
||||
## Fristen-Uebersicht
|
||||
|
||||
| Schritt | Frist | Ab wann |
|
||||
|---------|-------|---------|
|
||||
| Eingangsbestaetigung | 7 Tage | Ab Meldungseingang |
|
||||
| Rueckmeldung | 3 Monate | Ab Eingangsbestaetigung |
|
||||
| Aufbewahrung | 3 Jahre | Ab Verfahrensabschluss |
|
||||
|
||||
## Praxis-Tipp
|
||||
|
||||
Richten Sie ein **automatisches Fristen-Monitoring** ein. Das BreakPilot Hinweisgebersystem berechnet die Fristen automatisch und warnt rechtzeitig vor drohender Ueberschreitung.',
|
||||
ARRAY['§ 11 Abs. 5 HinSchG', '§ 17 Abs. 1 HinSchG', '§ 17 Abs. 2 HinSchG', '§ 18 HinSchG'],
|
||||
ARRAY['verfahren', 'ablauf', 'fristen', 'eingangsbestaetigung', 'rueckmeldung', 'folgemaßnahmen', 'dokumentation'],
|
||||
'important',
|
||||
ARRAY[]::text[])
|
||||
ON CONFLICT (id) DO NOTHING;
|
||||
|
||||
-- Neuer Artikel: Datenschutz-Anforderungen
|
||||
INSERT INTO compliance_wiki_articles (id, category_id, title, summary, content, legal_refs, tags, relevance, source_urls) VALUES
|
||||
('hinschg-datenschutz', 'hinschg',
|
||||
'Datenschutz im Hinweisgebersystem — DSGVO-Konformitaet sicherstellen',
|
||||
'Das Hinweisgebersystem verarbeitet besonders sensible personenbezogene Daten. Die DSGVO-Anforderungen an Datenschutz, Loeschfristen und Zugriffskontrollen sind strikt einzuhalten.',
|
||||
'## Ueberblick
|
||||
|
||||
Ein Hinweisgebersystem verarbeitet **hochsensible personenbezogene Daten**: die Identitaet des Hinweisgebers, Beschuldigter, Zeugen und den Inhalt der Meldung. Die DSGVO-Anforderungen muessen mit den HinSchG-Pflichten in Einklang gebracht werden.
|
||||
|
||||
## Rechtsgrundlage
|
||||
|
||||
Die Verarbeitung stuetzt sich auf:
|
||||
- **Art. 6 Abs. 1c DSGVO** — Erfuellung einer rechtlichen Verpflichtung (HinSchG)
|
||||
- **Art. 6 Abs. 1f DSGVO** — Berechtigtes Interesse (fuer nicht-verpflichtete Unternehmen)
|
||||
- **Art. 9 Abs. 2b DSGVO** — Fuer besondere Datenkategorien im Beschaeftigungskontext
|
||||
|
||||
## VVT-Eintrag (Pflicht)
|
||||
|
||||
Erstellen Sie einen eigenen VVT-Eintrag fuer das Hinweisgebersystem:
|
||||
|
||||
| Feld | Inhalt |
|
||||
|------|--------|
|
||||
| Bezeichnung | Betrieb des internen Hinweisgebersystems |
|
||||
| Rechtsgrundlage | Art. 6 Abs. 1c DSGVO i.V.m. §§ 12 ff. HinSchG |
|
||||
| Kategorien betroffener Personen | Hinweisgeber, Beschuldigte, Zeugen |
|
||||
| Datenkategorien | Identitaetsdaten, Kommunikationsdaten, Meldungsinhalt |
|
||||
| Loeschfrist | 3 Jahre nach Verfahrensabschluss |
|
||||
| Empfaenger | Interne Meldestelle, ggf. externe Meldestelle |
|
||||
|
||||
## Technisch-organisatorische Massnahmen (TOM)
|
||||
|
||||
- **Verschluesselung** — Alle Meldungsdaten at-rest und in-transit verschluesselt
|
||||
- **Zugriffsbeschraenkung** — Nur die benannte Meldestelle darf auf Daten zugreifen
|
||||
- **Protokollierung** — Revisionssicherer Audit-Trail aller Zugriffe
|
||||
- **Pseudonymisierung** — Anonyme Meldungen ohne Zuordnung zu Klarnamen
|
||||
- **Trennung** — Meldungsdaten getrennt von sonstigen HR-Daten speichern
|
||||
|
||||
## Loeschkonzept
|
||||
|
||||
| Daten | Loeschfrist | Rechtsgrundlage |
|
||||
|-------|-------------|-----------------|
|
||||
| Meldungsdaten | 3 Jahre nach Abschluss | § 11 Abs. 5 HinSchG |
|
||||
| Audit-Trail | 3 Jahre nach Abschluss | § 11 Abs. 5 HinSchG |
|
||||
| Kommunikationsdaten | 3 Jahre nach Abschluss | § 11 Abs. 5 HinSchG |
|
||||
| Zugangscodes | Nach Verfahrensabschluss | Zweckerfuellung |
|
||||
|
||||
## DSFA-Pflicht?
|
||||
|
||||
Eine **Datenschutz-Folgenabschaetzung** (Art. 35 DSGVO) ist in vielen Faellen erforderlich, da:
|
||||
- **Systematische Ueberwachung** von Beschaeftigten (potenziell)
|
||||
- Verarbeitung **besonderer Datenkategorien** moeglich (Art. 9 DSGVO)
|
||||
- **Verletzliche Personengruppen** betroffen (Hinweisgeber, Beschuldigte)
|
||||
|
||||
## Praxis-Tipp
|
||||
|
||||
Fuehren Sie eine DSFA durch und dokumentieren Sie die Abwaegung. Dies dient auch als Nachweis der Rechenschaftspflicht (Art. 5 Abs. 2 DSGVO).',
|
||||
ARRAY['Art. 5 Abs. 2 DSGVO', 'Art. 6 Abs. 1c DSGVO', 'Art. 9 Abs. 2b DSGVO', 'Art. 28 DSGVO', 'Art. 35 DSGVO', '§ 8 HinSchG', '§ 11 Abs. 5 HinSchG', '§ 26 BDSG'],
|
||||
ARRAY['datenschutz', 'dsgvo', 'vvt', 'dsfa', 'loeschfristen', 'tom', 'verschluesselung', 'audit-trail'],
|
||||
'critical',
|
||||
ARRAY[]::text[])
|
||||
ON CONFLICT (id) DO NOTHING;
|
||||
@@ -0,0 +1,230 @@
|
||||
-- Migration 055: CRA (Cyber Resilience Act) Wiki-Kategorie und Artikel
|
||||
-- Neue Kategorie + 3 Artikel zum EU Cyber Resilience Act
|
||||
|
||||
-- Kategorie: CRA
|
||||
INSERT INTO compliance_wiki_categories (id, name, description, icon, sort_order) VALUES
|
||||
('cra', 'Cyber Resilience Act (CRA)', 'EU-Verordnung fuer Cybersicherheit von Produkten mit digitalen Elementen', 'Shield', 75)
|
||||
ON CONFLICT (id) DO NOTHING;
|
||||
|
||||
-- Artikel 1: CRA Grundlagen
|
||||
INSERT INTO compliance_wiki_articles (id, category_id, title, summary, content, legal_refs, tags, relevance, source_urls) VALUES
|
||||
('cra-grundlagen', 'cra',
|
||||
'Cyber Resilience Act — Ueberblick und Pflichten',
|
||||
'Der CRA (EU) 2024/2847 verpflichtet Hersteller von Produkten mit digitalen Elementen zu umfassenden Cybersicherheits-Massnahmen ueber den gesamten Produktlebenszyklus.',
|
||||
'## Ueberblick
|
||||
|
||||
Der **EU Cyber Resilience Act (CRA)**, Verordnung (EU) 2024/2847, ist am **10. Dezember 2024** in Kraft getreten. Er etabliert horizontale Cybersicherheitsanforderungen fuer alle **Produkte mit digitalen Elementen**, die in der EU in Verkehr gebracht werden.
|
||||
|
||||
## Zeitplan
|
||||
|
||||
| Datum | Meilenstein |
|
||||
|-------|------------|
|
||||
| 10.12.2024 | Inkrafttreten |
|
||||
| 11.06.2026 | Konformitaetsbewertungsstellen muessen benannt sein |
|
||||
| 11.09.2026 | Meldepflicht fuer Schwachstellen und Vorfaelle |
|
||||
| 11.12.2027 | Volle Anwendung — CE-Kennzeichnung erforderlich |
|
||||
|
||||
## Was sind "Produkte mit digitalen Elementen"?
|
||||
|
||||
Jedes Software- oder Hardware-Produkt, das:
|
||||
- Eine **Datenverbindung** (direkt oder indirekt) zu einem Geraet oder Netzwerk hat
|
||||
- **Software** enthaelt, die bestimmungsgemaeß genutzt wird
|
||||
|
||||
**Beispiele:** IoT-Geraete, Firmware, eigenstaendige Software, Betriebssysteme, Router, Smart-Home-Geraete, industrielle Steuerungssysteme.
|
||||
|
||||
## Kernpflichten fuer Hersteller
|
||||
|
||||
### 1. Cybersecurity-Risikobewertung
|
||||
- Systematische Bewertung der Cybersecurity-Risiken des Produkts
|
||||
- Dokumentation der Risikoanalyse
|
||||
- Regelmaessige Aktualisierung
|
||||
|
||||
### 2. Secure Development (SSDLC)
|
||||
- Sichere Entwicklungsprozesse etablieren
|
||||
- Code Reviews und Security Testing
|
||||
- Supply-Chain-Security pruefen
|
||||
|
||||
### 3. Vulnerability Management
|
||||
- Aktives CVE-Monitoring
|
||||
- Coordinated Vulnerability Disclosure (CVD)
|
||||
- Patch-Bereitstellung waehrend des gesamten Support-Zeitraums
|
||||
|
||||
### 4. Security Updates
|
||||
- Sichere Update-Mechanismen (signiert, integritaetsgeprueft)
|
||||
- Automatische oder einfache Update-Moeglichkeit fuer Nutzer
|
||||
- Mindest-Support-Zeitraum: 5 Jahre oder erwartete Produktlebensdauer
|
||||
|
||||
### 5. Software Bill of Materials (SBOM)
|
||||
- Dokumentation aller Software-Komponenten
|
||||
- Top-Level-Abhaengigkeiten
|
||||
- Maschinenlesbares Format
|
||||
|
||||
### 6. Incident Reporting
|
||||
- **24 Stunden:** Fruehwarnung an ENISA/nationale Behoerde
|
||||
- **72 Stunden:** Detaillierter Incident Report
|
||||
- Meldepflicht fuer aktiv ausgenutzte Schwachstellen
|
||||
|
||||
## CE-Kennzeichnung
|
||||
|
||||
Der CRA wird Teil der **CE-Konformitaet**. Ab Dezember 2027 duerfen Produkte ohne Cybersecurity-Konformitaet **nicht mehr in der EU verkauft werden**.
|
||||
|
||||
## Sanktionen
|
||||
|
||||
| Verstoss | Bussgeld |
|
||||
|----------|----------|
|
||||
| Wesentliche Anforderungen (Annex I) | Bis 15 Mio. EUR oder 2,5% des Jahresumsatzes |
|
||||
| Sonstige Pflichten | Bis 10 Mio. EUR oder 2% des Jahresumsatzes |
|
||||
| Falsche Informationen | Bis 5 Mio. EUR oder 1% des Jahresumsatzes |',
|
||||
ARRAY['Art. 13 CRA', 'Art. 14 CRA', 'Annex I CRA', 'Annex II CRA', '(EU) 2024/2847'],
|
||||
ARRAY['cra', 'cybersecurity', 'ce-kennzeichnung', 'iot', 'software', 'sbom', 'vulnerability', 'incident-reporting'],
|
||||
'critical',
|
||||
ARRAY['https://eur-lex.europa.eu/eli/reg/2024/2847/oj/eng'])
|
||||
ON CONFLICT (id) DO NOTHING;
|
||||
|
||||
-- Artikel 2: CRA Security Controls (Annex I)
|
||||
INSERT INTO compliance_wiki_articles (id, category_id, title, summary, content, legal_refs, tags, relevance, source_urls) VALUES
|
||||
('cra-security-controls', 'cra',
|
||||
'CRA Annex I — 35 Essential Cybersecurity Requirements',
|
||||
'Der CRA definiert in Annex I die wesentlichen Cybersicherheitsanforderungen. Daraus ergeben sich etwa 35 konkrete Security-Controls fuer den gesamten Produktlebenszyklus.',
|
||||
'## Ueberblick
|
||||
|
||||
Annex I des CRA enthaelt die **Essential Cybersecurity Requirements**. Sie lassen sich in 7 Themenbereiche mit insgesamt etwa 35 konkreten Controls aufteilen.
|
||||
|
||||
## 1. Secure-by-Design / Architektur
|
||||
|
||||
| # | Control | Beschreibung |
|
||||
|---|---------|-------------|
|
||||
| 1 | Secure-by-default | Produkte mit sicheren Standardeinstellungen ausliefern |
|
||||
| 2 | Minimale Angriffsflaeche | Nur notwendige Dienste und Schnittstellen aktivieren |
|
||||
| 3 | Sichere Systemarchitektur | Sicherheitskritische Komponenten isolieren und schuetzen |
|
||||
| 4 | Least-Privilege-Prinzip | Minimale Berechtigungen fuer Komponenten und Nutzer |
|
||||
| 5 | Trennung kritischer Funktionen | Isolation sicherheitskritischer Funktionen |
|
||||
| 6 | System-Haertung | Deaktivierung unnoetigerServices und Ports |
|
||||
| 7 | Manipulationsschutz | Schutz vor unautorisierter Software-Aenderung |
|
||||
| 8 | Integritaetspruefung | Signaturen und Integritaetschecks |
|
||||
| 9 | Zugriffsschutz | Zugriffskontrollen implementieren |
|
||||
|
||||
## 2. Authentifizierung & Zugriffskontrolle
|
||||
|
||||
| # | Control | Beschreibung |
|
||||
|---|---------|-------------|
|
||||
| 10 | Starke Authentifizierung | Sichere Authentifizierungsmechanismen |
|
||||
| 11 | Keine Default-Passwoerter | Keine universellen Standardpasswoerter |
|
||||
| 12 | Credential-Management | Sichere Verwaltung von Zugangsdaten |
|
||||
| 13 | Sitzungsmanagement | Sichere Session-Verwaltung |
|
||||
| 14 | Brute-Force-Schutz | Schutz vor Brute-Force-Angriffen |
|
||||
| 15 | Autorisierung | Rollenbasierte Zugriffskontrolle |
|
||||
|
||||
## 3. Kryptografie & Datenschutz
|
||||
|
||||
| # | Control | Beschreibung |
|
||||
|---|---------|-------------|
|
||||
| 16 | Datenverschluesselung | Verschluesselung sensibler Daten |
|
||||
| 17 | Speicher-Schutz | Schutz gespeicherter Daten (at-rest) |
|
||||
| 18 | Transport-Schutz | Schutz uebertragener Daten (in-transit) |
|
||||
| 19 | Schluesselmanagement | Sicheres kryptografisches Schluesselmanagement |
|
||||
| 20 | Schluesselschutz | Schutz kryptografischer Schluessel vor Zugriff |
|
||||
|
||||
## 4. Software-Lifecycle-Security
|
||||
|
||||
| # | Control | Beschreibung |
|
||||
|---|---------|-------------|
|
||||
| 21 | Secure Development Lifecycle | Strukturierter SSDLC-Prozess |
|
||||
| 22 | Code Reviews | Systematische Code-Ueberpruefungen |
|
||||
| 23 | Sichere Entwicklungspraktiken | Static Analysis, SAST, DAST |
|
||||
| 24 | Supply-Chain-Security | Pruefung von Drittanbieter-Komponenten |
|
||||
| 25 | Dependency-Monitoring | Ueberwachung von Abhaengigkeiten |
|
||||
| 26 | SBOM | Software Bill of Materials fuehren |
|
||||
|
||||
## 5. Logging, Monitoring & Incident Detection
|
||||
|
||||
| # | Control | Beschreibung |
|
||||
|---|---------|-------------|
|
||||
| 27 | Security-Logging | Protokollierung sicherheitsrelevanter Ereignisse |
|
||||
| 28 | Ereignis-Monitoring | Ueberwachung sicherheitsrelevanter Events |
|
||||
| 29 | Anomalie-Erkennung | Erkennung von Angriffen oder Anomalien |
|
||||
| 30 | Log-Integritaet | Schutz der Protokoll-Integritaet |
|
||||
|
||||
## 6. Update- und Patch-Management
|
||||
|
||||
| # | Control | Beschreibung |
|
||||
|---|---------|-------------|
|
||||
| 31 | Sichere Update-Mechanismen | Sichere Verfahren fuer Software-Updates |
|
||||
| 32 | Update-Authentizitaet | Signaturen fuer Updates |
|
||||
| 33 | Update-Integritaet | Integritaetspruefung bei Updates |
|
||||
| 34 | Lifecycle-Support | Security-Updates waehrend des gesamten Lebenszyklus |
|
||||
|
||||
## 7. Vulnerability-Handling
|
||||
|
||||
| # | Control | Beschreibung |
|
||||
|---|---------|-------------|
|
||||
| 35 | Vulnerability-Management | Strukturierter Prozess fuer Schwachstellen-Behandlung |
|
||||
|
||||
Dazu gehoert:
|
||||
- Koordinierte Offenlegung (Coordinated Vulnerability Disclosure)
|
||||
- CVE-Monitoring
|
||||
- Patch-Bereitstellung innerhalb angemessener Frist
|
||||
|
||||
## Automatisierungspotential
|
||||
|
||||
Diese 35 Controls koennen automatisch zu folgenden Dokumenten fuehren:
|
||||
- **Cybersecurity Policy** (Grundsatzdokument)
|
||||
- **Secure Development Policy** (SSDLC)
|
||||
- **Vulnerability Management Policy** (CVD, Patching)
|
||||
- **Incident Response Policy** (24h/72h Meldung)
|
||||
- **SBOM-Dokumentation** (Komponentenliste)',
|
||||
ARRAY['Annex I CRA', 'Art. 13 CRA', 'Art. 14 CRA', 'Art. 15 CRA'],
|
||||
ARRAY['security-controls', 'annex-i', 'secure-by-design', 'authentifizierung', 'kryptografie', 'sbom', 'vulnerability', 'patching'],
|
||||
'critical',
|
||||
ARRAY['https://eur-lex.europa.eu/eli/reg/2024/2847/oj/eng'])
|
||||
ON CONFLICT (id) DO NOTHING;
|
||||
|
||||
-- Artikel 3: CRA + NIS2 + AI Act Zusammenspiel
|
||||
INSERT INTO compliance_wiki_articles (id, category_id, title, summary, content, legal_refs, tags, relevance, source_urls) VALUES
|
||||
('cra-regulierungsrahmen', 'cra',
|
||||
'CRA + NIS2 + AI Act — Das neue EU-Security-Framework',
|
||||
'CRA, NIS2-Richtlinie und AI Act bilden zusammen ein umfassendes EU-Sicherheitsframework fuer digitale Produkte, Infrastrukturen und KI-Systeme.',
|
||||
'## Ueberblick
|
||||
|
||||
Die EU hat mit drei zentralen Rechtsakten ein zusammenhaengendes Framework fuer Cybersicherheit und KI-Regulierung geschaffen. Fuer Softwarehersteller, die KI einsetzen, sind alle drei relevant.
|
||||
|
||||
## Die drei Saeulen
|
||||
|
||||
| Verordnung | Fokus | Zielgruppe | Anwendung ab |
|
||||
|-----------|-------|-----------|-------------|
|
||||
| **CRA** (2024/2847) | Produkt-Cybersecurity | Hersteller von Hardware/Software | 12/2027 |
|
||||
| **NIS2** (2022/2555) | Infrastruktur-Security | Betreiber wesentlicher Dienste | 10/2024 (national) |
|
||||
| **AI Act** (2024/1689) | KI-Regulierung | Anbieter/Betreiber von KI-Systemen | 08/2025 (stufenweise) |
|
||||
|
||||
## Abgrenzung
|
||||
|
||||
### CRA vs. NIS2
|
||||
- **CRA**: Regelt die **Sicherheit des Produkts** selbst (Design, Updates, Vulnerability Handling)
|
||||
- **NIS2**: Regelt die **Sicherheit der Organisation** (Risikomanagement, Incident Response, Supply Chain)
|
||||
- **Ueberschneidung**: Beide fordern Incident Reporting und Supply-Chain-Security
|
||||
|
||||
### CRA vs. AI Act
|
||||
- **CRA**: Cybersecurity-Anforderungen an **alle** digitalen Produkte
|
||||
- **AI Act**: Zusaetzliche Anforderungen fuer Produkte, die **KI enthalten** (Transparenz, Erklaerbarkeit, Risikobewertung)
|
||||
- **Ueberschneidung**: Hochrisiko-KI-Systeme muessen sowohl CRA als auch AI Act erfuellen
|
||||
|
||||
## Synergien nutzen
|
||||
|
||||
Ein Unternehmen, das alle drei Verordnungen erfuellen muss, kann Synergien nutzen:
|
||||
|
||||
| Thema | CRA | NIS2 | AI Act |
|
||||
|-------|-----|------|--------|
|
||||
| Risikobewertung | Produkt-Risiko | Org-Risiko | KI-Risiko |
|
||||
| Incident Reporting | 24h/72h | 24h/72h | Meldepflicht |
|
||||
| Supply Chain | SBOM | Lieferantenpruefung | Drittanbieter-KI |
|
||||
| Dokumentation | Tech. Doku | Policies | KI-Registrierung |
|
||||
| Audit/Konformitaet | CE-Kennzeichnung | Zertifizierung | Konformitaetsbewertung |
|
||||
|
||||
## Empfehlung
|
||||
|
||||
Bauen Sie ein **integriertes Compliance-Management-System** auf, das alle drei Verordnungen abdeckt. Gemeinsame Policies (Security, Incident Response, Risk Management) koennen fuer alle drei Regelwerke genutzt werden.',
|
||||
ARRAY['(EU) 2024/2847', '(EU) 2022/2555', '(EU) 2024/1689', 'Art. 13 CRA', 'Art. 21 NIS2', 'Art. 9 AI Act'],
|
||||
ARRAY['cra', 'nis2', 'ai-act', 'security-framework', 'compliance', 'synergien', 'ce-kennzeichnung'],
|
||||
'important',
|
||||
ARRAY[]::text[])
|
||||
ON CONFLICT (id) DO NOTHING;
|
||||
@@ -0,0 +1,515 @@
|
||||
-- Migration 056: CRA Cybersecurity Policy Template
|
||||
-- Unternehmensrichtlinie Cybersecurity basierend auf EU Cyber Resilience Act, ISO 27001 Best Practices
|
||||
|
||||
INSERT INTO compliance_legal_templates (
|
||||
id, tenant_id, document_type, title, description, content,
|
||||
placeholders, language, jurisdiction,
|
||||
license_id, license_name, source_name,
|
||||
attribution_required, is_complete_document, version, status,
|
||||
created_at, updated_at
|
||||
) VALUES (
|
||||
gen_random_uuid(),
|
||||
'9282a473-5c95-4b3a-bf78-0ecc0ec71d3e',
|
||||
'cybersecurity_policy',
|
||||
'Unternehmensrichtlinie Cybersecurity (CRA-konform)',
|
||||
'Umfassende Cybersecurity-Richtlinie basierend auf dem EU Cyber Resilience Act (EU) 2024/2847, ISO 27001 und Secure-Development-Standards. Deckt Governance, Risikomanagement, Secure Development, Vulnerability Management, Incident Response und Compliance ab.',
|
||||
$template$# Unternehmensrichtlinie Cybersecurity
|
||||
|
||||
**{{COMPANY_NAME}}**
|
||||
|
||||
*(Cybersecurity Policy — CRA-konform)*
|
||||
|
||||
| Feld | Inhalt |
|
||||
|------|--------|
|
||||
| Dokumenttyp | Unternehmensrichtlinie |
|
||||
| Version | {{DOCUMENT_VERSION}} |
|
||||
| Datum | {{VERSION_DATE}} |
|
||||
| Naechste Ueberpruefung | {{NEXT_REVIEW_DATE}} |
|
||||
| Verantwortlich | {{ISB_NAME}} (CISO/ISB) |
|
||||
| Freigabe | {{GF_NAME}} (Geschaeftsfuehrung) |
|
||||
| Vertraulichkeit | Intern |
|
||||
|
||||
---
|
||||
|
||||
## 1. Zweck der Richtlinie
|
||||
|
||||
Diese Cybersecurity-Richtlinie legt die organisatorischen und technischen Massnahmen fest, mit denen {{COMPANY_NAME}}:
|
||||
|
||||
- Informationssysteme schuetzt
|
||||
- Cyberrisiken systematisch reduziert
|
||||
- Gesetzliche Anforderungen erfuellt (insb. EU Cyber Resilience Act, NIS2, DSGVO)
|
||||
- Sicherheitsvorfaelle erkennt, behandelt und meldet
|
||||
|
||||
Die Richtlinie gilt fuer alle:
|
||||
|
||||
- Mitarbeiterinnen und Mitarbeiter von {{COMPANY_NAME}}
|
||||
- Externe Dienstleister und Auftragnehmer
|
||||
- IT-Systeme, Software und Cloud-Services
|
||||
- Produkte mit digitalen Elementen im Sinne des CRA
|
||||
|
||||
---
|
||||
|
||||
## 2. Geltungsbereich
|
||||
|
||||
Diese Richtlinie gilt fuer:
|
||||
|
||||
- Unternehmens-IT und Netzwerkinfrastruktur
|
||||
- Interne Softwareentwicklung
|
||||
- Cloud-Infrastruktur und SaaS-Dienste
|
||||
- Datenverarbeitungssysteme
|
||||
- Produkte mit digitalen Elementen (Software, IoT, Firmware)
|
||||
- Lieferanten und Dienstleister mit Zugang zu Systemen von {{COMPANY_NAME}}
|
||||
|
||||
Betroffene Assets:
|
||||
|
||||
- Server und Netzwerkkomponenten
|
||||
- Endgeraete (Laptops, Mobilgeraete)
|
||||
- Software und Firmware
|
||||
- Datenbanken und APIs
|
||||
- Kryptografische Schluessel und Zertifikate
|
||||
|
||||
---
|
||||
|
||||
## 3. Sicherheitsziele
|
||||
|
||||
Die Cybersecurity-Strategie von {{COMPANY_NAME}} verfolgt folgende Ziele:
|
||||
|
||||
### Vertraulichkeit
|
||||
Schutz sensibler Daten vor unbefugtem Zugriff. Klassifizierung von Daten nach Schutzbedarf.
|
||||
|
||||
### Integritaet
|
||||
Sicherstellung, dass Daten und Systeme nicht unautorisiert veraendert werden. Einsatz von Integritaetspruefungen und Signaturen.
|
||||
|
||||
### Verfuegbarkeit
|
||||
Systeme und Dienste muessen gemaess den vereinbarten SLAs verfuegbar sein. Redundanz und Wiederherstellungsfaehigkeit sicherstellen.
|
||||
|
||||
### Nachvollziehbarkeit
|
||||
Sicherheitsrelevante Ereignisse muessen lueckenlos dokumentiert und fuer Audits nachvollziehbar sein.
|
||||
|
||||
---
|
||||
|
||||
## 4. Governance und Verantwortlichkeiten
|
||||
|
||||
### 4.1 Geschaeftsfuehrung
|
||||
|
||||
{{GF_NAME}} ist verantwortlich fuer:
|
||||
|
||||
- Festlegung der Sicherheitsstrategie
|
||||
- Bereitstellung angemessener Ressourcen
|
||||
- Ueberwachung der Compliance-Einhaltung
|
||||
- Jaehrliche Freigabe dieser Richtlinie
|
||||
|
||||
### 4.2 Chief Information Security Officer (CISO/ISB)
|
||||
|
||||
{{ISB_NAME}} ist verantwortlich fuer:
|
||||
|
||||
- Umsetzung der Sicherheitsstrategie
|
||||
- Risikomanagement und Risikoberichterstattung
|
||||
- Security-Monitoring und Threat Intelligence
|
||||
- Koordination des Incident-Response-Teams
|
||||
- Kontaktperson fuer Behoerden bei Sicherheitsvorfaellen
|
||||
|
||||
### 4.3 Datenschutzbeauftragter
|
||||
|
||||
{{DPO_NAME}} ({{DPO_EMAIL}}) wird bei sicherheitsrelevanten Vorfaellen einbezogen, die personenbezogene Daten betreffen.
|
||||
|
||||
### 4.4 IT-Abteilung
|
||||
|
||||
Verantwortlich fuer:
|
||||
|
||||
- Sichere Infrastruktur und Systemhaertung
|
||||
- Patch-Management und Update-Bereitstellung
|
||||
- Netzwerksegmentierung und Firewall-Management
|
||||
- Monitoring und Log-Management
|
||||
|
||||
### 4.5 Entwicklerteams
|
||||
|
||||
Verantwortlich fuer:
|
||||
|
||||
- Secure Coding und Code Reviews
|
||||
- Dependency Management und SBOM-Pflege
|
||||
- Security Testing (SAST, DAST, SCA)
|
||||
- Vulnerability Remediation
|
||||
|
||||
### 4.6 Alle Mitarbeiter
|
||||
|
||||
Alle Mitarbeiter von {{COMPANY_NAME}} muessen:
|
||||
|
||||
- Sicherheitsrichtlinien einhalten
|
||||
- Sicherheitsvorfaelle unverzueglich melden
|
||||
- An jaehrlichen Security-Schulungen teilnehmen
|
||||
- Phishing-Versuche und verdaechtige Aktivitaeten melden
|
||||
|
||||
---
|
||||
|
||||
## 5. Risikomanagement
|
||||
|
||||
{{COMPANY_NAME}} fuehrt regelmaessig eine Cyber-Risikoanalyse durch.
|
||||
|
||||
### Prozess
|
||||
|
||||
1. **Identifikation** kritischer Assets und Daten
|
||||
2. **Bedrohungsanalyse** (Threat Modeling, STRIDE)
|
||||
3. **Schwachstellenanalyse** (CVE-Monitoring, Vulnerability Scanning)
|
||||
4. **Risikobewertung** (Eintrittswahrscheinlichkeit x Auswirkung)
|
||||
5. **Risikobehandlung** (Vermeiden, Reduzieren, Uebertragen, Akzeptieren)
|
||||
|
||||
### Frequenz
|
||||
|
||||
Risikobewertungen erfolgen:
|
||||
|
||||
- Mindestens jaehrlich
|
||||
- Bei wesentlichen Systemanderungen
|
||||
- Bei neuen Produkten oder Dienstleistungen
|
||||
- Nach Sicherheitsvorfaellen
|
||||
|
||||
### Dokumentation
|
||||
|
||||
Alle Risikoanalysen werden dokumentiert und fuer mindestens 3 Jahre aufbewahrt. Die Ergebnisse werden der Geschaeftsfuehrung in Form eines Risikoberichts vorgelegt.
|
||||
|
||||
---
|
||||
|
||||
## 6. Secure System Architecture
|
||||
|
||||
Systeme von {{COMPANY_NAME}} muessen nach folgenden Prinzipien entwickelt und betrieben werden:
|
||||
|
||||
### Security by Design
|
||||
Sicherheitsanforderungen werden bereits in der Architekturphase beruecksichtigt. Jedes neue System durchlaeuft ein Security Architecture Review.
|
||||
|
||||
### Security by Default
|
||||
Systeme werden mit sicheren Grundeinstellungen ausgeliefert. Keine Dienste oder Ports sind standardmaessig aktiviert, die nicht benoetigt werden.
|
||||
|
||||
### Least Privilege
|
||||
Benutzer und Systeme erhalten nur die minimal notwendigen Berechtigungen. Privilegierte Zugriffe werden gesondert protokolliert.
|
||||
|
||||
### Segmentierung
|
||||
Kritische Systeme werden durch Netzwerksegmentierung isoliert. Produktiv-, Entwicklungs- und Testumgebungen sind strikt getrennt.
|
||||
|
||||
### Haertung
|
||||
Alle Systeme werden gemaess anerkannter Haertungsrichtlinien (CIS Benchmarks, BSI IT-Grundschutz) konfiguriert.
|
||||
|
||||
---
|
||||
|
||||
## 7. Zugriffskontrollen
|
||||
|
||||
### Anforderungen
|
||||
|
||||
- Eindeutige, personalisierte Benutzerkonten
|
||||
- Starke Passwortrichtlinie (mind. 12 Zeichen, Komplexitaet)
|
||||
- Multi-Faktor-Authentifizierung (MFA) fuer alle administrativen Zugriffe und externe Zugaenge
|
||||
- Rollenbasierte Zugriffskontrolle (RBAC) mit regelmaessiger Rezertifizierung
|
||||
- Automatische Sperrung nach 5 fehlgeschlagenen Login-Versuchen
|
||||
|
||||
### Verboten
|
||||
|
||||
- Gemeinsam genutzte Accounts (Shared Accounts)
|
||||
- Universal-Default-Passwoerter
|
||||
- Unverschluesselte Speicherung von Zugangsdaten
|
||||
- Weitergabe von Zugangsdaten per E-Mail
|
||||
|
||||
### Privileged Access Management
|
||||
|
||||
Administratorzugriffe muessen:
|
||||
|
||||
- Gesondert beantragt und genehmigt werden
|
||||
- Zeitlich begrenzt sein (Just-in-Time Access)
|
||||
- Vollstaendig protokolliert werden
|
||||
|
||||
---
|
||||
|
||||
## 8. Kryptografie
|
||||
|
||||
{{COMPANY_NAME}} verwendet ausschliesslich moderne, anerkannte kryptografische Verfahren.
|
||||
|
||||
### Verschluesselung erforderlich fuer
|
||||
|
||||
- Gespeicherte sensible Daten (at rest) — AES-256
|
||||
- Datenuebertraung (in transit) — TLS 1.2+, vorzugsweise TLS 1.3
|
||||
- Backups — vollstaendig verschluesselt
|
||||
- Konfigurationsdaten und Secrets — Vault oder vergleichbar
|
||||
|
||||
### Schluesselmanagement
|
||||
|
||||
- Schluessel muessen sicher gespeichert werden (HSM oder Vault)
|
||||
- Regelmaessige Rotation (mind. jaehrlich, bei Kompromittierung sofort)
|
||||
- Zugriff nur fuer autorisierte Personen
|
||||
- Dokumentation der Schluessel-Lebenszyklen
|
||||
|
||||
### Verbotene Verfahren
|
||||
|
||||
- MD5 und SHA-1 fuer kryptografische Zwecke
|
||||
- DES und 3DES
|
||||
- SSL und TLS < 1.2
|
||||
|
||||
---
|
||||
|
||||
## 9. Secure Software Development Lifecycle (SSDLC)
|
||||
|
||||
Alle Softwareprodukte von {{COMPANY_NAME}} muessen einen sicheren Entwicklungsprozess durchlaufen. Dies entspricht den Anforderungen des CRA Annex I.
|
||||
|
||||
### Entwicklungsprozess
|
||||
|
||||
1. **Security Requirements** — Sicherheitsanforderungen in User Stories und Epics
|
||||
2. **Threat Modeling** — Bedrohungsanalyse in der Designphase
|
||||
3. **Secure Coding** — Einhaltung von Secure-Coding-Standards
|
||||
4. **Code Review** — Peer Review mit Security-Fokus
|
||||
5. **Security Testing** — Automatisierte und manuelle Tests
|
||||
6. **Release-Freigabe** — Security Sign-off vor Deployment
|
||||
|
||||
### Pflichtmassnahmen
|
||||
|
||||
- **Static Application Security Testing (SAST)** — in der CI/CD-Pipeline
|
||||
- **Software Composition Analysis (SCA)** — Dependency Scanning
|
||||
- **Dynamic Application Security Testing (DAST)** — vor jedem Major Release
|
||||
- **Secrets Detection** — Automatische Pruefung auf eingebettete Zugangsdaten
|
||||
- **Penetration Testing** — mindestens jaehrlich durch externe Tester
|
||||
|
||||
---
|
||||
|
||||
## 10. Software-Supply-Chain-Security
|
||||
|
||||
{{COMPANY_NAME}} kontrolliert externe Softwarekomponenten systematisch.
|
||||
|
||||
### Software Bill of Materials (SBOM)
|
||||
|
||||
Fuer alle Produkte wird ein SBOM gefuehrt, das mindestens folgende Informationen enthaelt:
|
||||
|
||||
- Name und Version aller Software-Komponenten
|
||||
- Lizenzinformationen
|
||||
- Bekannte Schwachstellen (CVE)
|
||||
|
||||
Das SBOM wird bei jedem Release aktualisiert und in maschinenlesbarem Format (CycloneDX oder SPDX) bereitgestellt.
|
||||
|
||||
### Open-Source-Kontrolle
|
||||
|
||||
- Lizenzpruefung vor Aufnahme neuer Abhaengigkeiten
|
||||
- Monitoring auf bekannte Schwachstellen (CVE)
|
||||
- Regelmaessige Updates von Abhaengigkeiten
|
||||
|
||||
---
|
||||
|
||||
## 11. Logging und Monitoring
|
||||
|
||||
### Logging umfasst
|
||||
|
||||
- Erfolgreiche und fehlgeschlagene Login-Versuche
|
||||
- Administrative Systemanderungen
|
||||
- Zugriffe auf sensible Daten
|
||||
- Sicherheitsrelevante Konfigurationsanderungen
|
||||
- API-Zugriffe und Fehler
|
||||
|
||||
### Anforderungen an Logs
|
||||
|
||||
- Manipulationssicher (append-only, signiert oder WORM)
|
||||
- Zentral gesammelt (SIEM oder vergleichbar)
|
||||
- Aufbewahrung mindestens 12 Monate
|
||||
- Zugriff nur fuer autorisiertes Security-Personal
|
||||
|
||||
### Monitoring
|
||||
|
||||
- Echtzeit-Ueberwachung sicherheitsrelevanter Ereignisse
|
||||
- Automatische Alarmierung bei Anomalien
|
||||
- Korrelation von Events aus verschiedenen Quellen
|
||||
|
||||
---
|
||||
|
||||
## 12. Vulnerability Management
|
||||
|
||||
{{COMPANY_NAME}} betreibt ein strukturiertes Schwachstellenmanagement.
|
||||
|
||||
### Prozess
|
||||
|
||||
1. **Identifikation** — Automatische Scans, Bug Bounty, CVE-Monitoring
|
||||
2. **Bewertung** — Risikobewertung nach CVSS
|
||||
3. **Priorisierung** — Kritische Schwachstellen zuerst
|
||||
4. **Behebung** — Patch-Entwicklung und Deployment
|
||||
5. **Verifizierung** — Bestaetigung der Behebung
|
||||
6. **Kommunikation** — Information betroffener Kunden und Behoerden
|
||||
|
||||
### Coordinated Vulnerability Disclosure (CVD)
|
||||
|
||||
{{COMPANY_NAME}} veroeffentlicht eine CVD-Policy. Sicherheitsforscher koennen Schwachstellen an {{SECURITY_EMAIL}} melden. Meldungen werden innerhalb von 5 Werktagen bestaetigt.
|
||||
|
||||
---
|
||||
|
||||
## 13. Patch- und Update-Management
|
||||
|
||||
Alle Systeme muessen regelmaessig aktualisiert werden.
|
||||
|
||||
### Patchzyklen
|
||||
|
||||
| Risikostufe | Reaktionszeit |
|
||||
|-------------|---------------|
|
||||
| Kritisch (CVSS >= 9.0) | 24-72 Stunden |
|
||||
| Hoch (CVSS 7.0-8.9) | 7 Tage |
|
||||
| Mittel (CVSS 4.0-6.9) | 30 Tage |
|
||||
| Niedrig (CVSS < 4.0) | Naechster regulaerer Update-Zyklus |
|
||||
|
||||
### Anforderungen an Updates
|
||||
|
||||
- Alle Updates muessen **digital signiert** sein
|
||||
- Integritaetspruefung vor Installation
|
||||
- Rollback-Moeglichkeit bei fehlerhaften Updates
|
||||
- Automatische Update-Benachrichtigung fuer Kunden
|
||||
- **Mindest-Support-Zeitraum: 5 Jahre** (gemaess CRA)
|
||||
|
||||
---
|
||||
|
||||
## 14. Incident Response
|
||||
|
||||
{{COMPANY_NAME}} betreibt einen dokumentierten Incident-Response-Prozess.
|
||||
|
||||
### Schritte
|
||||
|
||||
1. **Detection** — Erkennung durch Monitoring, Meldung oder externe Information
|
||||
2. **Classification** — Einstufung nach Schweregrad (P1-P4)
|
||||
3. **Containment** — Sofortige Eindaemmung des Vorfalls
|
||||
4. **Investigation** — Forensische Analyse und Ursachenermittlung
|
||||
5. **Recovery** — Wiederherstellung des Normalbetriebs
|
||||
6. **Reporting** — Dokumentation und Meldung an Behoerden
|
||||
7. **Lessons Learned** — Nachbereitung und Verbesserung
|
||||
|
||||
### Meldepflichten (CRA-konform)
|
||||
|
||||
| Meldung | Frist | Empfaenger |
|
||||
|---------|-------|-----------|
|
||||
| **Fruehwarnung** | 24 Stunden | ENISA / nationale Behoerde |
|
||||
| **Detaillierter Bericht** | 72 Stunden | ENISA / nationale Behoerde |
|
||||
| **Abschlussbericht** | 1 Monat | ENISA / nationale Behoerde |
|
||||
|
||||
Bei personenbezogenen Daten gelten zusaetzlich die Fristen nach Art. 33/34 DSGVO (72 Stunden an Aufsichtsbehoerde).
|
||||
|
||||
### Kontakte
|
||||
|
||||
| Rolle | Person | Kontakt |
|
||||
|-------|--------|---------|
|
||||
| CISO/ISB | {{ISB_NAME}} | {{ISB_EMAIL}} |
|
||||
| DSB | {{DPO_NAME}} | {{DPO_EMAIL}} |
|
||||
| GF | {{GF_NAME}} | {{GF_EMAIL}} |
|
||||
|
||||
---
|
||||
|
||||
## 15. Security Testing
|
||||
|
||||
Folgende Tests werden regelmaessig durchgefuehrt:
|
||||
|
||||
| Test | Frequenz | Durchfuehrung |
|
||||
|------|----------|--------------|
|
||||
| Vulnerability Scans | Woechentlich | Automatisiert (CI/CD) |
|
||||
| SAST/SCA | Bei jedem Commit | Automatisiert (CI/CD) |
|
||||
| DAST | Vor Major Releases | Automatisiert + manuell |
|
||||
| Penetration Tests | Jaehrlich | Externer Dienstleister |
|
||||
| Red-Team-Tests | Alle 2 Jahre | Externer Dienstleister |
|
||||
| Social Engineering | Jaehrlich | Externer Dienstleister |
|
||||
|
||||
---
|
||||
|
||||
## 16. Backup und Wiederherstellung
|
||||
|
||||
### Anforderungen
|
||||
|
||||
- **Taegliche Backups** aller kritischen Systeme und Daten
|
||||
- **Off-Site-Backups** an geografisch getrenntem Standort
|
||||
- **Verschluesselung** aller Backup-Daten
|
||||
- **Wiederherstellungstests** mindestens vierteljaehrlich
|
||||
|
||||
### Recovery-Ziele
|
||||
|
||||
| Metrik | Ziel |
|
||||
|--------|------|
|
||||
| Recovery Time Objective (RTO) | {{RTO_HOURS}} Stunden |
|
||||
| Recovery Point Objective (RPO) | {{RPO_HOURS}} Stunden |
|
||||
|
||||
---
|
||||
|
||||
## 17. Lieferanten- und Drittanbieter-Management
|
||||
|
||||
Lieferanten mit Zugang zu Systemen oder Daten von {{COMPANY_NAME}} muessen Sicherheitsanforderungen erfuellen.
|
||||
|
||||
### Anforderungen
|
||||
|
||||
- Sicherheitspruefung vor Vertragsabschluss (Security Assessment)
|
||||
- Sicherheitsanforderungen im Vertrag (Auftragsverarbeitung, SLA)
|
||||
- Regelmaessige Audits und Compliance-Nachweise
|
||||
- Incident-Notification-Pflicht innerhalb von 24 Stunden
|
||||
- Nachweis ueber eigenes Vulnerability Management
|
||||
|
||||
---
|
||||
|
||||
## 18. Schulungen und Awareness
|
||||
|
||||
Alle Mitarbeiter von {{COMPANY_NAME}} erhalten:
|
||||
|
||||
- **Jaehrliche Security-Awareness-Trainings**
|
||||
- **Phishing-Simulationen** (mind. 2x jaehrlich)
|
||||
- **Rollenspezifische Schulungen** (Entwickler: Secure Coding, IT: Incident Response)
|
||||
- **Onboarding-Schulung** fuer neue Mitarbeiter
|
||||
|
||||
Teilnahme ist verpflichtend. Die Teilnahme wird dokumentiert.
|
||||
|
||||
---
|
||||
|
||||
## 19. Dokumentation und Compliance
|
||||
|
||||
{{COMPANY_NAME}} dokumentiert:
|
||||
|
||||
- Risikoanalysen und Risikobehandlungsplaene
|
||||
- Sicherheitskontrollen und deren Wirksamkeit
|
||||
- Sicherheitsvorfaelle und deren Behandlung
|
||||
- Software-Updates und Patches
|
||||
- SBOM fuer alle Produkte
|
||||
- Audit-Ergebnisse
|
||||
|
||||
Die Dokumentation muss jederzeit fuer Audits und behoerdliche Anfragen verfuegbar sein.
|
||||
|
||||
### Regulatorische Compliance
|
||||
|
||||
Diese Richtlinie dient der Einhaltung folgender Vorschriften:
|
||||
|
||||
- **EU Cyber Resilience Act** (EU) 2024/2847
|
||||
- **NIS2-Richtlinie** (EU) 2022/2555
|
||||
- **DSGVO** (EU) 2016/679 — technische und organisatorische Massnahmen
|
||||
- **ISO/IEC 27001** — Best Practices fuer Informationssicherheit
|
||||
|
||||
---
|
||||
|
||||
## 20. Durchsetzung
|
||||
|
||||
Verstoesse gegen diese Richtlinie koennen je nach Schwere folgende Konsequenzen haben:
|
||||
|
||||
- Disziplinarmassnahmen
|
||||
- Vertragsstrafen (bei externen Dienstleistern)
|
||||
- Rechtliche Konsequenzen (bei vorsaetzlichen Verstoessen)
|
||||
|
||||
---
|
||||
|
||||
## 21. Ueberpruefung und Aktualisierung
|
||||
|
||||
Diese Cybersecurity-Richtlinie wird ueberprueft:
|
||||
|
||||
- **Jaehrlich** durch {{ISB_NAME}} (CISO/ISB)
|
||||
- Bei **regulatorischen Aenderungen** (neue EU-Verordnungen, nationale Gesetze)
|
||||
- Nach **groesseren Sicherheitsvorfaellen**
|
||||
- Bei **wesentlichen Aenderungen** der IT-Infrastruktur oder Produktlandschaft
|
||||
|
||||
Die naechste planmaessige Ueberpruefung ist am **{{NEXT_REVIEW_DATE}}**.
|
||||
|
||||
---
|
||||
|
||||
## Freigabe
|
||||
|
||||
| | Name | Datum | Unterschrift |
|
||||
|--|------|-------|-------------|
|
||||
| Erstellt von | {{ISB_NAME}} (CISO/ISB) | {{VERSION_DATE}} | _________________ |
|
||||
| Freigegeben von | {{GF_NAME}} (Geschaeftsfuehrung) | {{VERSION_DATE}} | _________________ |
|
||||
|
||||
---
|
||||
|
||||
*Dieses Dokument ist Eigentum von {{COMPANY_NAME}} und unterliegt der Vertraulichkeitsstufe "Intern".*
|
||||
$template$,
|
||||
CAST('["COMPANY_NAME","COMPANY_ADDRESS","COMPANY_CITY","GF_NAME","GF_EMAIL","ISB_NAME","ISB_EMAIL","DPO_NAME","DPO_EMAIL","SECURITY_EMAIL","DOCUMENT_VERSION","VERSION_DATE","NEXT_REVIEW_DATE","RTO_HOURS","RPO_HOURS"]' AS jsonb),
|
||||
'de', 'DE',
|
||||
'mit', 'MIT License', 'BreakPilot Compliance',
|
||||
false, true, '1.0.0', 'published',
|
||||
NOW(), NOW()
|
||||
) ON CONFLICT DO NOTHING;
|
||||
@@ -0,0 +1,23 @@
|
||||
-- 057: Add batch processing paths to canonical_processed_chunks
|
||||
-- New values: structured_batch, llm_reform_batch (used by batch control generation)
|
||||
|
||||
DO $$
|
||||
BEGIN
|
||||
IF EXISTS (SELECT 1 FROM information_schema.tables WHERE table_name = 'canonical_processed_chunks') THEN
|
||||
ALTER TABLE canonical_processed_chunks
|
||||
DROP CONSTRAINT IF EXISTS canonical_processed_chunks_processing_path_check;
|
||||
ALTER TABLE canonical_processed_chunks
|
||||
ADD CONSTRAINT canonical_processed_chunks_processing_path_check
|
||||
CHECK (processing_path IN (
|
||||
'structured',
|
||||
'llm_reform',
|
||||
'skipped',
|
||||
'prefilter_skip',
|
||||
'no_control',
|
||||
'store_failed',
|
||||
'error',
|
||||
'structured_batch',
|
||||
'llm_reform_batch'
|
||||
));
|
||||
END IF;
|
||||
END $$;
|
||||
@@ -0,0 +1,8 @@
|
||||
-- Migration 058: Add generation_strategy column to canonical_controls
|
||||
-- Tracks whether a control was generated with document-grouped or ungrouped batching
|
||||
|
||||
ALTER TABLE canonical_controls
|
||||
ADD COLUMN IF NOT EXISTS generation_strategy TEXT NOT NULL DEFAULT 'ungrouped';
|
||||
|
||||
COMMENT ON COLUMN canonical_controls.generation_strategy IS
|
||||
'How chunks were batched during generation: ungrouped (random), document_grouped (by regulation+article)';
|
||||
@@ -0,0 +1,292 @@
|
||||
-- Migration 059: CRA Annex I — Detaillierte Essential Cybersecurity Requirements
|
||||
-- Erweitert den bestehenden Wiki-Artikel 'cra-security-controls' um Part 1 + Part 2,
|
||||
-- Produktklassifizierung und ISO 27001 Mapping.
|
||||
-- Zusaetzlich: Neuer Artikel fuer CRA-Produktklassifizierung und Konformitaetsbewertung.
|
||||
|
||||
-- ============================================================================
|
||||
-- 1) Update: CRA Security Controls (Annex I) — Vollstaendige 8-Kategorien-Struktur
|
||||
-- ============================================================================
|
||||
UPDATE compliance_wiki_articles
|
||||
SET
|
||||
title = 'CRA Annex I — Essential Cybersecurity Requirements (Vollstaendig)',
|
||||
summary = 'Annex I des CRA definiert die wesentlichen Cybersicherheitsanforderungen in zwei Teilen: Teil 1 (Produktsicherheit, 11 Anforderungen) und Teil 2 (Schwachstellenbehandlung, 8 Anforderungen). Daraus ergeben sich rund 35 konkrete Security-Controls in 8 Kategorien.',
|
||||
content = '## Ueberblick
|
||||
|
||||
Der **EU Cyber Resilience Act (CRA)**, Verordnung (EU) 2024/2847, legt in **Annex I** die **Essential Cybersecurity Requirements** fest, die alle Produkte mit digitalen Elementen erfuellen muessen. Annex I besteht aus zwei Teilen:
|
||||
|
||||
- **Teil 1 — Sicherheitsanforderungen an Produkte** (11 Kernanforderungen)
|
||||
- **Teil 2 — Anforderungen an die Schwachstellenbehandlung** (8 Prozessanforderungen)
|
||||
|
||||
Daraus lassen sich etwa **35 konkrete Security-Controls** in **8 thematischen Kategorien** ableiten. Diese Controls bilden die Grundlage fuer eine Cybersecurity-Compliance-Strategie.
|
||||
|
||||
---
|
||||
|
||||
## Teil 1: Sicherheitsanforderungen an Produkte
|
||||
|
||||
### Kategorie 1 — Secure-by-Design und Architektur
|
||||
|
||||
Diese Controls stellen sicher, dass Sicherheit von Anfang an in die Produktarchitektur integriert wird.
|
||||
|
||||
| # | Control | CRA-Referenz | Beschreibung | ISO 27001 Mapping |
|
||||
|---|---------|-------------|-------------|-------------------|
|
||||
| 1 | **Secure-by-Default-Konfiguration** | Annex I, 1(1) | Produkte muessen mit sicheren Standardeinstellungen ausgeliefert werden. Keine offenen Ports, keine aktivierten Debug-Schnittstellen, keine unnoetig laufenden Dienste. | A.8.9 |
|
||||
| 2 | **Minimale Angriffsflaeche** | Annex I, 1(2) | Nur notwendige Schnittstellen, Dienste und Protokolle aktivieren. Jede zusaetzliche Funktionalitaet vergroessert die Angriffsflaeche und muss einzeln gerechtfertigt werden. | A.8.9, A.8.20 |
|
||||
| 3 | **Sichere Systemarchitektur** | Annex I, 1(3) | Sicherheitskritische Komponenten muessen isoliert werden (Sandboxing, Containerisierung, Privilege Separation). Defense-in-Depth-Prinzip anwenden. | A.8.27 |
|
||||
| 4 | **Least-Privilege-Prinzip** | Annex I, 1(3)(d) | Jede Komponente, jeder Prozess und jeder Benutzer erhaelt nur die minimal notwendigen Berechtigungen. Privilegien-Eskalation muss verhindert werden. | A.8.2, A.8.3 |
|
||||
| 5 | **Manipulationsschutz** | Annex I, 1(3)(c) | Schutz vor unautorisierter Aenderung von Software und Konfiguration durch Integritaetsmechanismen (Code Signing, Secure Boot, TPM). | A.8.24 |
|
||||
| 6 | **Integritaetspruefung** | Annex I, 1(3)(c) | Automatische Ueberpruefung der Integritaet von Software, Firmware und Konfigurationsdaten bei Start und Laufzeit. Hash-basierte Validierung und digitale Signaturen. | A.8.24 |
|
||||
|
||||
### Kategorie 2 — Authentifizierung und Zugriffskontrolle
|
||||
|
||||
Controls zur Sicherstellung, dass nur autorisierte Personen und Systeme Zugriff erhalten.
|
||||
|
||||
| # | Control | CRA-Referenz | Beschreibung | ISO 27001 Mapping |
|
||||
|---|---------|-------------|-------------|-------------------|
|
||||
| 7 | **Starke Authentifizierung** | Annex I, 1(3)(d) | Implementierung sicherer Authentifizierungsmechanismen. Multi-Faktor-Authentifizierung fuer administrative Zugriffe. Unterstuetzung moderner Standards (FIDO2, WebAuthn). | A.8.5 |
|
||||
| 8 | **Keine Default-Passwoerter** | Annex I, 1(3)(d) | Produkte duerfen keine universellen Standardpasswoerter verwenden. Jedes Geraet muss ein individuelles Passwort erhalten oder den Benutzer zur Aenderung bei Ersteinrichtung zwingen. | A.8.5 |
|
||||
| 9 | **Sicheres Credential-Management** | Annex I, 1(3)(d) | Zugangsdaten muessen verschluesselt gespeichert werden (bcrypt, Argon2id). Keine Klartextspeicherung. API-Keys und Tokens regelmaessig rotieren. | A.8.5 |
|
||||
| 10 | **Sitzungsmanagement** | Annex I, 1(3)(d) | Sichere Session-Verwaltung mit Timeout, Token-Binding und Session-Invalidierung bei Logout oder Passwortwechsel. CSRF-Schutz implementieren. | A.8.5 |
|
||||
| 11 | **Brute-Force-Schutz** | Annex I, 1(3)(d) | Schutz vor Brute-Force- und Credential-Stuffing-Angriffen durch Rate Limiting, Account Lockout und CAPTCHA-Mechanismen. | A.8.5, A.8.16 |
|
||||
| 12 | **Rollenbasierte Autorisierung** | Annex I, 1(3)(d) | Implementierung von RBAC (Role-Based Access Control). Trennung von administrativen und Nutzerfunktionen. Prinzip der geringsten Privilegien durchsetzen. | A.8.2, A.8.3 |
|
||||
|
||||
### Kategorie 3 — Kryptografie und Datenschutz
|
||||
|
||||
Controls zum Schutz von Daten durch kryptografische Verfahren.
|
||||
|
||||
| # | Control | CRA-Referenz | Beschreibung | ISO 27001 Mapping |
|
||||
|---|---------|-------------|-------------|-------------------|
|
||||
| 13 | **Verschluesselung sensibler Daten** | Annex I, 1(3)(e) | Alle sensiblen Daten muessen verschluesselt werden — sowohl bei der Speicherung (at rest, AES-256) als auch bei der Uebertragung (in transit, TLS 1.2+). | A.8.24 |
|
||||
| 14 | **Speicher-Schutz (Data at Rest)** | Annex I, 1(3)(e) | Verschluesselung gespeicherter Daten auf Festplatten, in Datenbanken und Backups. Schluessel getrennt von Daten speichern. | A.8.24 |
|
||||
| 15 | **Transport-Schutz (Data in Transit)** | Annex I, 1(3)(e) | Alle Netzwerkkommunikation ueber TLS 1.2 oder hoeher. Veraltete Protokolle (SSL, TLS 1.0/1.1) deaktivieren. Certificate Pinning fuer kritische Verbindungen. | A.8.24 |
|
||||
| 16 | **Sicheres Schluesselmanagement** | Annex I, 1(3)(e) | Kryptografische Schluessel in HSM oder Vault speichern. Regelmaessige Rotation (mind. jaehrlich). Dokumentation der Schluessel-Lebenszyklen. Sofortige Rotation bei Kompromittierungsverdacht. | A.8.24 |
|
||||
| 17 | **Datenminimierung** | Annex I, 1(3)(f) | Nur Daten erfassen und verarbeiten, die fuer die Produktfunktion erforderlich sind. Personenbezogene Daten gemaess DSGVO-Grundsaetzen behandeln. | A.8.10, A.8.11 |
|
||||
|
||||
### Kategorie 4 — Secure Software Development Lifecycle
|
||||
|
||||
Controls fuer sichere Softwareentwicklung ueber den gesamten Lebenszyklus.
|
||||
|
||||
| # | Control | CRA-Referenz | Beschreibung | ISO 27001 Mapping |
|
||||
|---|---------|-------------|-------------|-------------------|
|
||||
| 18 | **Strukturierter SSDLC** | Annex I, 1(1) | Implementierung eines formalen Secure Software Development Lifecycle mit definierten Security Gates in jeder Phase (Requirements, Design, Implementation, Test, Release). | A.8.25, A.8.26 |
|
||||
| 19 | **Systematische Code Reviews** | Annex I, 1(1) | Peer Reviews mit Security-Fokus fuer jeden Code-Commit. Einsatz von Checklisten fuer OWASP Top 10 und CWE Top 25. Security Champions in jedem Entwicklerteam. | A.8.25 |
|
||||
| 20 | **Automatisierte Sicherheitstests** | Annex I, 1(1) | Static Application Security Testing (SAST), Dynamic Application Security Testing (DAST) und Software Composition Analysis (SCA) in der CI/CD-Pipeline. Secrets Detection fuer eingebettete Zugangsdaten. | A.8.25 |
|
||||
| 21 | **Supply-Chain-Security** | Annex I, 1(5) | Systematische Pruefung aller Drittanbieter-Komponenten auf Schwachstellen und Lizenz-Compliance. Vertrauenswuerdigkeit von Lieferanten bewerten. | A.5.19, A.5.21 |
|
||||
| 22 | **Dependency-Monitoring** | Annex I, 1(5) | Kontinuierliche Ueberwachung aller Abhaengigkeiten auf bekannte Schwachstellen (CVE). Automatische Benachrichtigung bei neuen CVEs in verwendeten Bibliotheken. | A.8.8, A.8.25 |
|
||||
| 23 | **Software Bill of Materials (SBOM)** | Annex I, 1(5) | Fuer jedes Produkt ein maschinenlesbares SBOM fuehren (CycloneDX oder SPDX). Mindestens Top-Level-Abhaengigkeiten mit Name, Version und Lizenz dokumentieren. SBOM bei jedem Release aktualisieren. | A.8.25 |
|
||||
|
||||
### Kategorie 5 — Logging, Monitoring und Anomalie-Erkennung
|
||||
|
||||
Controls zur Erkennung und Nachverfolgung von Sicherheitsereignissen.
|
||||
|
||||
| # | Control | CRA-Referenz | Beschreibung | ISO 27001 Mapping |
|
||||
|---|---------|-------------|-------------|-------------------|
|
||||
| 24 | **Security-Logging** | Annex I, 1(3)(g) | Protokollierung aller sicherheitsrelevanten Ereignisse: Login-Versuche, Berechtigungsaenderungen, administrative Aktionen, API-Zugriffe, Fehler und Ausnahmen. Logs muessen Zeitstempel, Akteur, Aktion und Ergebnis enthalten. | A.8.15 |
|
||||
| 25 | **Ereignis-Monitoring** | Annex I, 1(3)(g) | Zentrale Sammlung und Echtzeit-Ueberwachung sicherheitsrelevanter Events. Einsatz eines SIEM-Systems oder vergleichbarer Loesung. Korrelation von Events aus verschiedenen Quellen. | A.8.16 |
|
||||
| 26 | **Anomalie-Erkennung** | Annex I, 1(3)(g) | Automatische Erkennung von Angriffsmustern und ungewoehnlichem Verhalten. Alarmierung bei Abweichungen von Baseline-Verhalten. Integration von Threat Intelligence Feeds. | A.8.16 |
|
||||
| 27 | **Log-Integritaet und -Aufbewahrung** | Annex I, 1(3)(g) | Logs muessen manipulationssicher gespeichert werden (append-only, signiert oder WORM). Aufbewahrung mindestens 12 Monate. Zugriff auf Logs nur fuer autorisiertes Security-Personal. | A.8.15 |
|
||||
|
||||
### Kategorie 6 — Update- und Patch-Management
|
||||
|
||||
Controls fuer die sichere Bereitstellung und Installation von Updates.
|
||||
|
||||
| # | Control | CRA-Referenz | Beschreibung | ISO 27001 Mapping |
|
||||
|---|---------|-------------|-------------|-------------------|
|
||||
| 28 | **Sichere Update-Mechanismen** | Annex I, 1(4) | Updates muessen ueber sichere Kanaele verteilt werden (HTTPS, signierte Pakete). Automatische oder einfach zugaengliche Update-Moeglichkeit fuer Endnutzer. Rollback-Faehigkeit bei fehlerhaften Updates. | A.8.8, A.8.19 |
|
||||
| 29 | **Update-Authentizitaet** | Annex I, 1(4) | Alle Updates muessen digital signiert sein. Signaturpruefung vor Installation erzwingen. Verwendung vertrauenswuerdiger Signaturschluessel mit dokumentierter Key Ceremony. | A.8.24 |
|
||||
| 30 | **Update-Integritaet** | Annex I, 1(4) | Integritaetspruefung jedes Update-Pakets vor und nach Installation (Hash-Vergleich, Signatur-Verifikation). Manipulation waehrend der Uebertragung erkennen und ablehnen. | A.8.24 |
|
||||
| 31 | **Lifecycle-Support** | Annex I, 1(4) | Security-Updates waehrend des gesamten erwarteten Produktlebenszyklus bereitstellen — mindestens **5 Jahre** ab Inverkehrbringen oder die erwartete Nutzungsdauer, je nachdem welcher Zeitraum laenger ist. End-of-Life klar kommunizieren. | A.8.8 |
|
||||
|
||||
---
|
||||
|
||||
## Teil 2: Anforderungen an die Schwachstellenbehandlung
|
||||
|
||||
### Kategorie 7 — Vulnerability Management
|
||||
|
||||
Controls fuer die systematische Identifikation, Bewertung und Behebung von Schwachstellen.
|
||||
|
||||
| # | Control | CRA-Referenz | Beschreibung | ISO 27001 Mapping |
|
||||
|---|---------|-------------|-------------|-------------------|
|
||||
| 32 | **Schwachstellen-Identifikation** | Annex I, 2(1) | Kontinuierliches CVE-Monitoring aller eingesetzten Komponenten. Regelmaessige Vulnerability Scans (woechentlich automatisiert). Bug-Bounty-Programme oder Responsible-Disclosure-Kanaele einrichten. | A.8.8 |
|
||||
| 33 | **SBOM-Pflege und Analyse** | Annex I, 2(1) | SBOM aktuell halten und kontinuierlich gegen CVE-Datenbanken pruefen. Automatische Alarmierung bei neu entdeckten Schwachstellen in verwendeten Komponenten. | A.8.8, A.8.25 |
|
||||
| 34 | **Risikobasierte Priorisierung** | Annex I, 2(2) | Schwachstellen nach CVSS-Score und tatsaechlichem Risiko priorisieren. Reaktionszeiten nach Schweregrad: Kritisch (24–72h), Hoch (7 Tage), Mittel (30 Tage), Niedrig (naechster Zyklus). | A.8.8 |
|
||||
| 35 | **Coordinated Vulnerability Disclosure** | Annex I, 2(5) | Veroeffentlichung einer CVD-Policy mit klarem Meldeprozess. Kontaktadresse fuer Sicherheitsforscher bereitstellen. Eingangsbestaetigung innerhalb von 5 Werktagen. Koordinierte Veroeffentlichung nach Patch-Verfuegbarkeit. | A.5.5, A.5.6 |
|
||||
|
||||
### Kategorie 8 — Incident Response und Meldepflichten
|
||||
|
||||
Controls fuer die Erkennung, Behandlung und Meldung von Sicherheitsvorfaellen.
|
||||
|
||||
| # | Control | CRA-Referenz | Beschreibung | ISO 27001 Mapping |
|
||||
|---|---------|-------------|-------------|-------------------|
|
||||
| 36 | **Incident-Response-Prozess** | Annex I, 2(5) | Dokumentierter Prozess mit definierten Phasen: Detection → Classification → Containment → Investigation → Recovery → Reporting → Lessons Learned. Regelmaessige Uebungen (Tabletop Exercises). | A.5.24, A.5.25, A.5.26 |
|
||||
| 37 | **Fruehwarnung (24h)** | Annex I, 2(7) + Art. 14(2)(a) | Bei aktiv ausgenutzten Schwachstellen oder schweren Vorfaellen: Fruehwarnung an ENISA und/oder zustaendige nationale Behoerde innerhalb von **24 Stunden** nach Kenntniserlangung. | A.5.24, A.5.26 |
|
||||
| 38 | **Detaillierter Vorfallsbericht (72h)** | Annex I, 2(7) + Art. 14(2)(b) | Innerhalb von **72 Stunden**: Detaillierter Bericht mit Umfang, Auswirkung, Ursachenanalyse und eingeleiteten Gegenmassnahmen. Bei personenbezogenen Daten zusaetzlich Art. 33/34 DSGVO beachten. | A.5.24, A.5.26 |
|
||||
| 39 | **Patch-Bereitstellung** | Annex I, 2(3) | Patches fuer gemeldete und bestaetigte Schwachstellen so schnell wie moeglich bereitstellen. Sicherheitshinweise (Security Advisories) an Kunden veroeffentlichen. CSAF-Format fuer maschinenlesbare Advisories empfohlen. | A.8.8 |
|
||||
| 40 | **Dokumentation und Nachbereitung** | Annex I, 2(6) | Alle Schwachstellen und Vorfaelle lueckenlos dokumentieren und fuer mindestens 10 Jahre aufbewahren. Lessons-Learned-Prozess nach jedem bedeutenden Vorfall. Ergebnisse in Risikobewertung einfliessen lassen. | A.5.27 |
|
||||
|
||||
---
|
||||
|
||||
## Produktklassifizierung nach CRA
|
||||
|
||||
Der CRA unterscheidet drei Produktkategorien mit unterschiedlichen Konformitaetsanforderungen:
|
||||
|
||||
### Standardprodukte (Default)
|
||||
|
||||
**Beispiele:** einfache Apps, Desktop-Software, Spiele, Foto-Editoren
|
||||
|
||||
- **Konformitaetsbewertung:** Selbstbewertung (Modul A)
|
||||
- **Anforderungen:** Alle Annex-I-Anforderungen, aber einfachster Nachweis
|
||||
- **Betrifft:** ca. 90% aller Produkte
|
||||
|
||||
### Wichtige Produkte (Annex III) — Klasse I
|
||||
|
||||
**Beispiele:** Passwort-Manager, VPN-Software, Firewalls, Router, Smart-Home-Systeme, IoT-Geraete mit Sensorfunktion, SIEM-Systeme
|
||||
|
||||
- **Konformitaetsbewertung:** Harmonisierte Standards oder Drittanbieter-Bewertung
|
||||
- **Anforderungen:** Alle Annex-I-Anforderungen + erhoehte Nachweispflichten
|
||||
- **Betrifft:** ca. 8% aller Produkte
|
||||
|
||||
### Wichtige Produkte — Klasse II
|
||||
|
||||
**Beispiele:** Betriebssysteme, Hypervisoren, Container-Runtimes, Public-Key-Infrastruktur, industrielle Steuerungssysteme (ICS/SCADA)
|
||||
|
||||
- **Konformitaetsbewertung:** Verpflichtende Drittanbieter-Bewertung durch benannte Stelle
|
||||
- **Anforderungen:** Alle Annex-I-Anforderungen + strengste Nachweispflichten
|
||||
- **Betrifft:** ca. 2% aller Produkte
|
||||
|
||||
### Kritische Produkte (Annex IV)
|
||||
|
||||
**Beispiele:** Hardware-Security-Module (HSM), Smartcard-Chips, Secure Elements, Smart-Meter-Gateways
|
||||
|
||||
- **Konformitaetsbewertung:** Europaeisches Cybersicherheitszertifikat erforderlich (EUCC)
|
||||
- **Anforderungen:** Hoechste Stufe — europaeische Zertifizierung obligatorisch
|
||||
|
||||
---
|
||||
|
||||
## Zuordnung der Controls zu Dokumenten
|
||||
|
||||
Diese 40 Controls koennen automatisiert zu folgenden Compliance-Dokumenten fuehren:
|
||||
|
||||
| Dokument | Controls | Beschreibung |
|
||||
|----------|----------|-------------|
|
||||
| **Cybersecurity Policy** | 1–40 | Uebergreifendes Grundsatzdokument fuer Cybersicherheit |
|
||||
| **Secure Development Policy** | 18–23 | Richtlinie fuer den sicheren Entwicklungsprozess (SSDLC) |
|
||||
| **Vulnerability Management Policy** | 32–35, 39 | CVD, Patching, SBOM-Analyse |
|
||||
| **Incident Response Plan** | 36–38, 40 | 24h/72h Meldung, Eskalation, Nachbereitung |
|
||||
| **Access Control Policy** | 7–12 | Authentifizierung, Autorisierung, Passwort-Richtlinie |
|
||||
| **Cryptographic Policy** | 13–17 | Verschluesselung, Schluesselmanagement, Datenschutz |
|
||||
| **Update/Patch Policy** | 28–31 | Update-Mechanismen, Signierung, Lifecycle-Support |
|
||||
| **Logging & Monitoring Policy** | 24–27 | Security-Logging, SIEM, Anomalie-Erkennung |
|
||||
|
||||
---
|
||||
|
||||
## Zeitplan fuer die Umsetzung
|
||||
|
||||
| Datum | Meilenstein |
|
||||
|-------|------------|
|
||||
| 10.12.2024 | CRA in Kraft getreten |
|
||||
| 11.06.2026 | Konformitaetsbewertungsstellen muessen benannt sein |
|
||||
| 11.09.2026 | **Meldepflichten aktiv** (Controls 37, 38) |
|
||||
| 11.12.2027 | **Volle Anwendung** — alle 40 Controls muessen umgesetzt sein, CE-Kennzeichnung erforderlich |
|
||||
|
||||
---
|
||||
|
||||
## Sanktionen bei Nicht-Einhaltung
|
||||
|
||||
| Verstoss | Maximales Bussgeld |
|
||||
|----------|-------------------|
|
||||
| Wesentliche Anforderungen (Annex I) | 15 Mio. EUR oder 2,5% des weltweiten Jahresumsatzes |
|
||||
| Sonstige Pflichten | 10 Mio. EUR oder 2% des weltweiten Jahresumsatzes |
|
||||
| Falsche/unvollstaendige Informationen | 5 Mio. EUR oder 1% des weltweiten Jahresumsatzes |',
|
||||
legal_refs = ARRAY['Annex I CRA', 'Annex III CRA', 'Annex IV CRA', 'Art. 13 CRA', 'Art. 14 CRA', 'Art. 15 CRA', 'Art. 64 CRA', '(EU) 2024/2847'],
|
||||
tags = ARRAY['security-controls', 'annex-i', 'secure-by-design', 'authentifizierung', 'kryptografie', 'sbom', 'vulnerability', 'patching', 'incident-response', 'produktklassifizierung', 'iso-27001', 'ssdlc'],
|
||||
relevance = 'critical',
|
||||
updated_at = NOW()
|
||||
WHERE id = 'cra-security-controls';
|
||||
|
||||
-- ============================================================================
|
||||
-- 2) Neuer Artikel: CRA-Konformitaetsbewertung — Praktischer Leitfaden
|
||||
-- ============================================================================
|
||||
INSERT INTO compliance_wiki_articles (id, category_id, title, summary, content, legal_refs, tags, relevance, source_urls) VALUES
|
||||
('cra-konformitaet', 'cra',
|
||||
'CRA-Konformitaetsbewertung — Praktischer Leitfaden',
|
||||
'Schritt-fuer-Schritt-Anleitung zur CRA-Konformitaetsbewertung: Produktklassifizierung, Dokumentation, Self-Assessment vs. Drittanbieter-Pruefung, CE-Kennzeichnung.',
|
||||
'## Ueberblick
|
||||
|
||||
Jeder Hersteller muss vor dem Inverkehrbringen eine **Konformitaetsbewertung** durchfuehren, um nachzuweisen, dass sein Produkt die Essential Cybersecurity Requirements (Annex I) erfuellt. Der Aufwand haengt von der Produktkategorie ab.
|
||||
|
||||
## Schritt 1: Produkt klassifizieren
|
||||
|
||||
Bestimmen Sie, ob Ihr Produkt unter eine der Sonderkategorien faellt:
|
||||
|
||||
### Entscheidungsbaum
|
||||
|
||||
```
|
||||
Ist das Produkt in Annex IV gelistet?
|
||||
→ Ja: Kritisches Produkt → Europaeische Zertifizierung (EUCC)
|
||||
→ Nein: Weiter
|
||||
|
||||
Ist das Produkt in Annex III, Klasse II gelistet?
|
||||
→ Ja: Wichtig Klasse II → Drittanbieter-Bewertung (Pflicht)
|
||||
→ Nein: Weiter
|
||||
|
||||
Ist das Produkt in Annex III, Klasse I gelistet?
|
||||
→ Ja: Wichtig Klasse I → Harmonisierte Standards ODER Drittanbieter
|
||||
→ Nein: Standardprodukt → Selbstbewertung (Modul A)
|
||||
```
|
||||
|
||||
## Schritt 2: Cybersecurity-Risikobewertung
|
||||
|
||||
Fuehren Sie eine systematische Risikoanalyse durch:
|
||||
|
||||
1. **Assets identifizieren** — Welche Daten verarbeitet das Produkt? Welche Schnittstellen hat es?
|
||||
2. **Bedrohungen analysieren** — STRIDE-Methodik oder vergleichbar anwenden
|
||||
3. **Schwachstellen bewerten** — Bekannte CVEs, Design-Schwaechen, Konfigurationsfehler
|
||||
4. **Risiken priorisieren** — Eintrittswahrscheinlichkeit × Auswirkung
|
||||
5. **Massnahmen definieren** — Welche Controls aus Annex I adressieren welches Risiko?
|
||||
|
||||
## Schritt 3: Controls implementieren
|
||||
|
||||
Setzen Sie die relevanten Controls aus den 8 Kategorien um (siehe Artikel „CRA Annex I — Essential Cybersecurity Requirements"). Dokumentieren Sie fuer jeden Control:
|
||||
|
||||
- **Status**: Implementiert / In Bearbeitung / Nicht anwendbar
|
||||
- **Nachweis**: Wie wird die Umsetzung belegt? (Code, Konfiguration, Test, Policy)
|
||||
- **Verantwortlich**: Wer ist zustaendig?
|
||||
|
||||
## Schritt 4: Technische Dokumentation
|
||||
|
||||
Die technische Dokumentation muss enthalten:
|
||||
|
||||
- Beschreibung des Produkts und seiner Funktionen
|
||||
- Cybersecurity-Risikobewertung
|
||||
- Angewandte harmonisierte Normen
|
||||
- Nachweis der Einhaltung jeder Annex-I-Anforderung
|
||||
- SBOM (Software Bill of Materials)
|
||||
- Informationen zum Support-Zeitraum
|
||||
|
||||
## Schritt 5: Konformitaetserklaerung und CE-Kennzeichnung
|
||||
|
||||
Nach erfolgreicher Bewertung:
|
||||
|
||||
1. **EU-Konformitaetserklaerung** ausstellen
|
||||
2. **CE-Kennzeichnung** anbringen
|
||||
3. **Dokumentation** mindestens 10 Jahre aufbewahren
|
||||
4. Produkt darf in der EU vertrieben werden
|
||||
|
||||
## Haeufige Fehler
|
||||
|
||||
| Fehler | Konsequenz |
|
||||
|--------|-----------|
|
||||
| Default-Passwoerter nicht entfernt | Verstoss gegen Annex I, 1(3)(d) |
|
||||
| Kein SBOM erstellt | Verstoss gegen Annex I, 1(5) |
|
||||
| Kein Update-Mechanismus | Verstoss gegen Annex I, 1(4) |
|
||||
| Keine CVD-Policy | Verstoss gegen Annex I, 2(5) |
|
||||
| Support-Zeitraum nicht definiert | Verstoss gegen Art. 13(8) |
|
||||
|
||||
## Empfehlung
|
||||
|
||||
Nutzen Sie die **BreakPilot Compliance SDK Control Library**, um den Umsetzungsstand Ihrer CRA-Controls systematisch zu tracken und automatisiert Nachweise zu generieren.',
|
||||
ARRAY['Annex I CRA', 'Annex II CRA', 'Annex III CRA', 'Annex IV CRA', 'Annex V CRA', 'Art. 13 CRA', 'Art. 24 CRA', 'Art. 25 CRA', 'Art. 26 CRA', 'Art. 27 CRA'],
|
||||
ARRAY['konformitaet', 'ce-kennzeichnung', 'self-assessment', 'technische-dokumentation', 'sbom', 'risikobewertung'],
|
||||
'important',
|
||||
ARRAY['https://eur-lex.europa.eu/eli/reg/2024/2847/oj/eng'])
|
||||
ON CONFLICT (id) DO NOTHING;
|
||||
@@ -0,0 +1,120 @@
|
||||
-- Migration 060: Multi-Layer Control Architecture — DB Schema
|
||||
-- Adds obligation_extractions, control_patterns, and crosswalk_matrix tables.
|
||||
-- Extends canonical_controls with pattern_id and obligation_ids columns.
|
||||
--
|
||||
-- Part of the Multi-Layer Control Architecture (Phase 1 of 8).
|
||||
-- See: Legal Source → Obligation → Control Pattern → Master Control → Customer Instance
|
||||
|
||||
-- =============================================================================
|
||||
-- 1. Obligation Extractions
|
||||
-- Tracks how each RAG chunk was linked to an obligation (exact, embedding, LLM).
|
||||
-- =============================================================================
|
||||
|
||||
CREATE TABLE IF NOT EXISTS obligation_extractions (
|
||||
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
|
||||
chunk_hash VARCHAR(64) NOT NULL,
|
||||
collection VARCHAR(100) NOT NULL,
|
||||
regulation_code VARCHAR(100) NOT NULL,
|
||||
article VARCHAR(100),
|
||||
paragraph VARCHAR(100),
|
||||
obligation_id VARCHAR(50),
|
||||
obligation_text TEXT,
|
||||
confidence NUMERIC(3,2) CHECK (confidence >= 0 AND confidence <= 1),
|
||||
extraction_method VARCHAR(30) NOT NULL
|
||||
CHECK (extraction_method IN ('exact_match', 'embedding_match', 'llm_extracted', 'inferred')),
|
||||
pattern_id VARCHAR(50),
|
||||
pattern_match_score NUMERIC(3,2) CHECK (pattern_match_score >= 0 AND pattern_match_score <= 1),
|
||||
control_uuid UUID REFERENCES canonical_controls(id),
|
||||
job_id UUID REFERENCES canonical_generation_jobs(id),
|
||||
created_at TIMESTAMPTZ DEFAULT NOW()
|
||||
);
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_oe_obligation ON obligation_extractions(obligation_id);
|
||||
CREATE INDEX IF NOT EXISTS idx_oe_pattern ON obligation_extractions(pattern_id);
|
||||
CREATE INDEX IF NOT EXISTS idx_oe_control ON obligation_extractions(control_uuid);
|
||||
CREATE INDEX IF NOT EXISTS idx_oe_regulation ON obligation_extractions(regulation_code);
|
||||
CREATE INDEX IF NOT EXISTS idx_oe_chunk ON obligation_extractions(chunk_hash);
|
||||
CREATE INDEX IF NOT EXISTS idx_oe_method ON obligation_extractions(extraction_method);
|
||||
|
||||
COMMENT ON TABLE obligation_extractions IS
|
||||
'Tracks chunk-to-obligation linkage from the 3-tier extraction pipeline (exact/embedding/LLM)';
|
||||
|
||||
-- =============================================================================
|
||||
-- 2. Control Patterns Registry
|
||||
-- DB mirror of the YAML pattern library for SQL queries and joins.
|
||||
-- =============================================================================
|
||||
|
||||
CREATE TABLE IF NOT EXISTS control_patterns (
|
||||
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
|
||||
pattern_id VARCHAR(50) UNIQUE NOT NULL,
|
||||
name VARCHAR(255) NOT NULL,
|
||||
name_de VARCHAR(255),
|
||||
domain VARCHAR(10) NOT NULL,
|
||||
category VARCHAR(50),
|
||||
description TEXT,
|
||||
template_objective TEXT,
|
||||
template_rationale TEXT,
|
||||
template_requirements JSONB DEFAULT '[]',
|
||||
template_test_procedure JSONB DEFAULT '[]',
|
||||
template_evidence JSONB DEFAULT '[]',
|
||||
severity_default VARCHAR(20)
|
||||
CHECK (severity_default IN ('low', 'medium', 'high', 'critical')),
|
||||
implementation_effort_default VARCHAR(2)
|
||||
CHECK (implementation_effort_default IN ('s', 'm', 'l', 'xl')),
|
||||
obligation_match_keywords JSONB DEFAULT '[]',
|
||||
tags JSONB DEFAULT '[]',
|
||||
open_anchor_refs JSONB DEFAULT '[]',
|
||||
composable_with JSONB DEFAULT '[]',
|
||||
version VARCHAR(10) DEFAULT '1.0',
|
||||
created_at TIMESTAMPTZ DEFAULT NOW(),
|
||||
updated_at TIMESTAMPTZ DEFAULT NOW()
|
||||
);
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_cp_domain ON control_patterns(domain);
|
||||
CREATE INDEX IF NOT EXISTS idx_cp_category ON control_patterns(category);
|
||||
CREATE INDEX IF NOT EXISTS idx_cp_pattern_id ON control_patterns(pattern_id);
|
||||
|
||||
COMMENT ON TABLE control_patterns IS
|
||||
'Registry of control patterns (DB mirror of YAML library). Pattern ID format: CP-{DOMAIN}-{NNN}';
|
||||
|
||||
-- =============================================================================
|
||||
-- 3. Crosswalk Matrix
|
||||
-- The "golden thread" from legal source through to implementation.
|
||||
-- =============================================================================
|
||||
|
||||
CREATE TABLE IF NOT EXISTS crosswalk_matrix (
|
||||
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
|
||||
regulation_code VARCHAR(100) NOT NULL,
|
||||
article VARCHAR(100),
|
||||
paragraph VARCHAR(100),
|
||||
obligation_id VARCHAR(50),
|
||||
pattern_id VARCHAR(50),
|
||||
master_control_id VARCHAR(20),
|
||||
master_control_uuid UUID REFERENCES canonical_controls(id),
|
||||
tom_control_id VARCHAR(30),
|
||||
confidence NUMERIC(3,2) CHECK (confidence >= 0 AND confidence <= 1),
|
||||
source VARCHAR(30) DEFAULT 'auto'
|
||||
CHECK (source IN ('manual', 'auto', 'migrated')),
|
||||
created_at TIMESTAMPTZ DEFAULT NOW()
|
||||
);
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_cw_regulation ON crosswalk_matrix(regulation_code, article);
|
||||
CREATE INDEX IF NOT EXISTS idx_cw_obligation ON crosswalk_matrix(obligation_id);
|
||||
CREATE INDEX IF NOT EXISTS idx_cw_pattern ON crosswalk_matrix(pattern_id);
|
||||
CREATE INDEX IF NOT EXISTS idx_cw_control ON crosswalk_matrix(master_control_id);
|
||||
CREATE INDEX IF NOT EXISTS idx_cw_tom ON crosswalk_matrix(tom_control_id);
|
||||
|
||||
COMMENT ON TABLE crosswalk_matrix IS
|
||||
'Golden thread: regulation → article → obligation → pattern → master control → TOM';
|
||||
|
||||
-- =============================================================================
|
||||
-- 4. Extend canonical_controls with pattern + obligation linkage
|
||||
-- =============================================================================
|
||||
|
||||
ALTER TABLE canonical_controls
|
||||
ADD COLUMN IF NOT EXISTS pattern_id VARCHAR(50);
|
||||
|
||||
ALTER TABLE canonical_controls
|
||||
ADD COLUMN IF NOT EXISTS obligation_ids JSONB DEFAULT '[]';
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_cc_pattern ON canonical_controls(pattern_id);
|
||||
@@ -0,0 +1,49 @@
|
||||
-- Migration 061: Obligation Candidates + Decomposition Tracking
|
||||
-- Supports Pass 0a (Obligation Extraction from Rich Controls) and
|
||||
-- Pass 0b (Atomic Control Composition).
|
||||
--
|
||||
-- Part of the Multi-Layer Control Architecture — Decomposition Pass.
|
||||
|
||||
-- =============================================================================
|
||||
-- 1. Obligation Candidates
|
||||
-- Individual normative obligations extracted from Rich Controls (Pass 0a).
|
||||
-- =============================================================================
|
||||
|
||||
CREATE TABLE IF NOT EXISTS obligation_candidates (
|
||||
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
|
||||
parent_control_uuid UUID NOT NULL REFERENCES canonical_controls(id),
|
||||
candidate_id VARCHAR(30) NOT NULL,
|
||||
obligation_text TEXT NOT NULL,
|
||||
action VARCHAR(500),
|
||||
object TEXT,
|
||||
condition TEXT,
|
||||
normative_strength VARCHAR(20) DEFAULT 'must'
|
||||
CHECK (normative_strength IN ('must', 'should', 'may')),
|
||||
is_test_obligation BOOLEAN DEFAULT FALSE,
|
||||
is_reporting_obligation BOOLEAN DEFAULT FALSE,
|
||||
extraction_confidence NUMERIC(3,2) DEFAULT 0.0
|
||||
CHECK (extraction_confidence >= 0 AND extraction_confidence <= 1),
|
||||
quality_flags JSONB DEFAULT '{}',
|
||||
release_state VARCHAR(30) DEFAULT 'extracted'
|
||||
CHECK (release_state IN ('extracted', 'validated', 'rejected', 'composed')),
|
||||
created_at TIMESTAMPTZ DEFAULT NOW()
|
||||
);
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_oc_parent ON obligation_candidates(parent_control_uuid);
|
||||
CREATE INDEX IF NOT EXISTS idx_oc_state ON obligation_candidates(release_state);
|
||||
CREATE INDEX IF NOT EXISTS idx_oc_candidate ON obligation_candidates(candidate_id);
|
||||
|
||||
COMMENT ON TABLE obligation_candidates IS
|
||||
'Individual normative obligations extracted from Rich Controls via Pass 0a decomposition';
|
||||
|
||||
-- =============================================================================
|
||||
-- 2. Extend canonical_controls for decomposition tracking
|
||||
-- =============================================================================
|
||||
|
||||
ALTER TABLE canonical_controls
|
||||
ADD COLUMN IF NOT EXISTS parent_control_uuid UUID REFERENCES canonical_controls(id);
|
||||
|
||||
ALTER TABLE canonical_controls
|
||||
ADD COLUMN IF NOT EXISTS decomposition_method VARCHAR(30);
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_cc_parent ON canonical_controls(parent_control_uuid);
|
||||
@@ -0,0 +1,22 @@
|
||||
-- Migration 062: Add pipeline_version to track which generation rules produced each control/chunk
|
||||
--
|
||||
-- v1 = Original pipeline (local LLM prefilter, old prompt without null-skip)
|
||||
-- v2 = Improved pipeline (skip_prefilter, Anthropic decides relevance, annexes protected)
|
||||
--
|
||||
-- This allows identifying controls that may need reprocessing when pipeline rules change.
|
||||
|
||||
ALTER TABLE canonical_controls
|
||||
ADD COLUMN IF NOT EXISTS pipeline_version smallint NOT NULL DEFAULT 1;
|
||||
|
||||
ALTER TABLE canonical_processed_chunks
|
||||
ADD COLUMN IF NOT EXISTS pipeline_version smallint NOT NULL DEFAULT 1;
|
||||
|
||||
-- Index for efficient querying by version
|
||||
CREATE INDEX IF NOT EXISTS idx_canonical_controls_pipeline_version
|
||||
ON canonical_controls (pipeline_version);
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_canonical_processed_chunks_pipeline_version
|
||||
ON canonical_processed_chunks (pipeline_version);
|
||||
|
||||
COMMENT ON COLUMN canonical_controls.pipeline_version IS 'Generation pipeline version: 1=original (local prefilter), 2=improved (Anthropic decides relevance, annexes protected)';
|
||||
COMMENT ON COLUMN canonical_processed_chunks.pipeline_version IS 'Pipeline version used when this chunk was processed';
|
||||
@@ -0,0 +1,23 @@
|
||||
-- Migration 063: Scoped Control Applicability
|
||||
--
|
||||
-- Adds 3 new JSONB columns to canonical_controls for filtering controls
|
||||
-- based on customer industry, company size, and compliance scope.
|
||||
--
|
||||
-- v3 pipeline generates these fields automatically via LLM.
|
||||
-- Old controls (v1/v2) will be backfilled separately.
|
||||
|
||||
ALTER TABLE canonical_controls
|
||||
ADD COLUMN IF NOT EXISTS applicable_industries JSONB DEFAULT NULL,
|
||||
ADD COLUMN IF NOT EXISTS applicable_company_size JSONB DEFAULT NULL,
|
||||
ADD COLUMN IF NOT EXISTS scope_conditions JSONB DEFAULT NULL;
|
||||
|
||||
-- GIN index for JSONB containment queries (e.g. applicable_industries @> '"Telekommunikation"')
|
||||
CREATE INDEX IF NOT EXISTS idx_cc_applicable_industries
|
||||
ON canonical_controls USING gin (applicable_industries);
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_cc_applicable_company_size
|
||||
ON canonical_controls USING gin (applicable_company_size);
|
||||
|
||||
COMMENT ON COLUMN canonical_controls.applicable_industries IS 'Industries this control applies to, e.g. ["all"] or ["Telekommunikation", "Energie"]. NULL = not yet classified.';
|
||||
COMMENT ON COLUMN canonical_controls.applicable_company_size IS 'Company sizes this control applies to, e.g. ["all"] or ["medium", "large", "enterprise"]. NULL = not yet classified.';
|
||||
COMMENT ON COLUMN canonical_controls.scope_conditions IS 'Optional scope conditions, e.g. {"requires_any": ["uses_ai"], "description": "..."}. NULL = no conditions.';
|
||||
@@ -0,0 +1,105 @@
|
||||
-- Migration 064: VVT Master Libraries — 8 global reference tables
|
||||
-- These are shared across all tenants (no tenant_id).
|
||||
|
||||
BEGIN;
|
||||
|
||||
-- 1. Data Subjects (Betroffenenkategorien)
|
||||
CREATE TABLE IF NOT EXISTS vvt_lib_data_subjects (
|
||||
id VARCHAR(50) PRIMARY KEY,
|
||||
label_de VARCHAR(200) NOT NULL,
|
||||
description_de TEXT,
|
||||
art9_relevant BOOLEAN DEFAULT FALSE,
|
||||
typical_for JSONB DEFAULT '[]'::jsonb,
|
||||
sort_order INTEGER DEFAULT 0,
|
||||
created_at TIMESTAMPTZ DEFAULT NOW()
|
||||
);
|
||||
|
||||
-- 2. Data Categories (Datenkategorien — hierarchisch)
|
||||
CREATE TABLE IF NOT EXISTS vvt_lib_data_categories (
|
||||
id VARCHAR(50) PRIMARY KEY,
|
||||
parent_id VARCHAR(50) REFERENCES vvt_lib_data_categories(id) ON DELETE SET NULL,
|
||||
label_de VARCHAR(200) NOT NULL,
|
||||
description_de TEXT,
|
||||
is_art9 BOOLEAN DEFAULT FALSE,
|
||||
is_art10 BOOLEAN DEFAULT FALSE,
|
||||
risk_weight INTEGER DEFAULT 1 CHECK (risk_weight BETWEEN 1 AND 5),
|
||||
default_retention_rule VARCHAR(50),
|
||||
default_legal_basis VARCHAR(50),
|
||||
sort_order INTEGER DEFAULT 0,
|
||||
created_at TIMESTAMPTZ DEFAULT NOW()
|
||||
);
|
||||
CREATE INDEX IF NOT EXISTS idx_vvt_lib_data_categories_parent ON vvt_lib_data_categories(parent_id);
|
||||
|
||||
-- 3. Recipients (Empfaengerkategorien)
|
||||
CREATE TABLE IF NOT EXISTS vvt_lib_recipients (
|
||||
id VARCHAR(50) PRIMARY KEY,
|
||||
type VARCHAR(20) NOT NULL CHECK (type IN ('INTERNAL', 'PROCESSOR', 'CONTROLLER', 'AUTHORITY')),
|
||||
label_de VARCHAR(200) NOT NULL,
|
||||
description_de TEXT,
|
||||
is_third_country BOOLEAN DEFAULT FALSE,
|
||||
country VARCHAR(5),
|
||||
sort_order INTEGER DEFAULT 0,
|
||||
created_at TIMESTAMPTZ DEFAULT NOW()
|
||||
);
|
||||
|
||||
-- 4. Legal Bases (Rechtsgrundlagen)
|
||||
CREATE TABLE IF NOT EXISTS vvt_lib_legal_bases (
|
||||
id VARCHAR(50) PRIMARY KEY,
|
||||
article VARCHAR(50) NOT NULL,
|
||||
type VARCHAR(30) NOT NULL CHECK (type IN ('CONSENT', 'CONTRACT', 'LEGAL_OBLIGATION', 'VITAL_INTEREST', 'PUBLIC_TASK', 'LEGITIMATE_INTEREST', 'ART9', 'NATIONAL')),
|
||||
label_de VARCHAR(300) NOT NULL,
|
||||
description_de TEXT,
|
||||
is_art9 BOOLEAN DEFAULT FALSE,
|
||||
typical_national_law VARCHAR(100),
|
||||
sort_order INTEGER DEFAULT 0,
|
||||
created_at TIMESTAMPTZ DEFAULT NOW()
|
||||
);
|
||||
|
||||
-- 5. Retention Rules (Aufbewahrungsfristen)
|
||||
CREATE TABLE IF NOT EXISTS vvt_lib_retention_rules (
|
||||
id VARCHAR(50) PRIMARY KEY,
|
||||
label_de VARCHAR(300) NOT NULL,
|
||||
description_de TEXT,
|
||||
legal_basis VARCHAR(200),
|
||||
duration INTEGER NOT NULL,
|
||||
duration_unit VARCHAR(10) NOT NULL CHECK (duration_unit IN ('DAYS', 'MONTHS', 'YEARS')),
|
||||
start_event VARCHAR(200),
|
||||
deletion_procedure VARCHAR(500),
|
||||
sort_order INTEGER DEFAULT 0,
|
||||
created_at TIMESTAMPTZ DEFAULT NOW()
|
||||
);
|
||||
|
||||
-- 6. Transfer Mechanisms (Uebermittlungsmechanismen)
|
||||
CREATE TABLE IF NOT EXISTS vvt_lib_transfer_mechanisms (
|
||||
id VARCHAR(50) PRIMARY KEY,
|
||||
label_de VARCHAR(300) NOT NULL,
|
||||
description_de TEXT,
|
||||
article VARCHAR(50),
|
||||
requires_tia BOOLEAN DEFAULT FALSE,
|
||||
sort_order INTEGER DEFAULT 0,
|
||||
created_at TIMESTAMPTZ DEFAULT NOW()
|
||||
);
|
||||
|
||||
-- 7. Purposes (Verarbeitungszwecke)
|
||||
CREATE TABLE IF NOT EXISTS vvt_lib_purposes (
|
||||
id VARCHAR(50) PRIMARY KEY,
|
||||
label_de VARCHAR(300) NOT NULL,
|
||||
description_de TEXT,
|
||||
typical_legal_basis VARCHAR(50),
|
||||
typical_for JSONB DEFAULT '[]'::jsonb,
|
||||
sort_order INTEGER DEFAULT 0,
|
||||
created_at TIMESTAMPTZ DEFAULT NOW()
|
||||
);
|
||||
|
||||
-- 8. TOMs (Technisch-Organisatorische Massnahmen)
|
||||
CREATE TABLE IF NOT EXISTS vvt_lib_toms (
|
||||
id VARCHAR(50) PRIMARY KEY,
|
||||
category VARCHAR(30) NOT NULL CHECK (category IN ('accessControl', 'confidentiality', 'integrity', 'availability', 'separation')),
|
||||
label_de VARCHAR(300) NOT NULL,
|
||||
description_de TEXT,
|
||||
art32_reference VARCHAR(100),
|
||||
sort_order INTEGER DEFAULT 0,
|
||||
created_at TIMESTAMPTZ DEFAULT NOW()
|
||||
);
|
||||
|
||||
COMMIT;
|
||||
@@ -0,0 +1,200 @@
|
||||
-- Migration 065: VVT Library Seed Data (~150 entries)
|
||||
-- All content self-authored, MIT-compatible.
|
||||
|
||||
BEGIN;
|
||||
|
||||
-- =============================================================================
|
||||
-- Data Subjects (15)
|
||||
-- =============================================================================
|
||||
INSERT INTO vvt_lib_data_subjects (id, label_de, description_de, art9_relevant, typical_for, sort_order) VALUES
|
||||
('EMPLOYEES', 'Beschaeftigte', 'Aktuelle Mitarbeiterinnen und Mitarbeiter', FALSE, '["hr","it_operations"]', 1),
|
||||
('APPLICANTS', 'Bewerber', 'Stellenbewerberinnen und -bewerber', FALSE, '["hr"]', 2),
|
||||
('CUSTOMERS', 'Kunden', 'Aktive Kundinnen und Kunden', FALSE, '["sales_crm","support","finance"]', 3),
|
||||
('PROSPECTIVE_CUSTOMERS', 'Interessenten', 'Potenzielle Kundinnen und Kunden', FALSE, '["marketing","sales_crm"]', 4),
|
||||
('SUPPLIERS', 'Lieferanten', 'Geschaeftspartner als Lieferanten', FALSE, '["finance"]', 5),
|
||||
('BUSINESS_PARTNERS', 'Geschaeftspartner', 'Kooperationspartner, Berater, Dienstleister', FALSE, '["management","finance"]', 6),
|
||||
('VISITORS', 'Besucher', 'Betriebsbesucher und Gaeste', FALSE, '["management"]', 7),
|
||||
('WEBSITE_USERS', 'Website-Nutzer', 'Besucher der Unternehmenswebsite', FALSE, '["marketing","it_operations"]', 8),
|
||||
('APP_USERS', 'App-Nutzer', 'Nutzer mobiler Anwendungen', FALSE, '["product_engineering"]', 9),
|
||||
('NEWSLETTER_SUBSCRIBERS', 'Newsletter-Abonnenten', 'Empfaenger von Newslettern', FALSE, '["marketing"]', 10),
|
||||
('MEMBERS', 'Mitglieder', 'Vereins- oder Verbandsmitglieder', FALSE, '["management"]', 11),
|
||||
('PATIENTS', 'Patienten', 'Patientinnen und Patienten', TRUE, '["other"]', 12),
|
||||
('STUDENTS', 'Schueler/Studierende', 'Lernende in Bildungseinrichtungen', FALSE, '["other"]', 13),
|
||||
('MINORS', 'Minderjaehrige', 'Personen unter 16 Jahren (Art. 8 DSGVO)', FALSE, '["other"]', 14),
|
||||
('OTHER', 'Sonstige', 'Andere Betroffenenkategorien', FALSE, '[]', 15)
|
||||
ON CONFLICT (id) DO NOTHING;
|
||||
|
||||
-- =============================================================================
|
||||
-- Data Categories — Parent categories (9)
|
||||
-- =============================================================================
|
||||
INSERT INTO vvt_lib_data_categories (id, parent_id, label_de, description_de, is_art9, is_art10, risk_weight, sort_order) VALUES
|
||||
('IDENTIFICATION', NULL, 'Identifikationsdaten', 'Daten zur Identifizierung natuerlicher Personen', FALSE, FALSE, 2, 1),
|
||||
('CONTACT_DATA', NULL, 'Kontaktdaten', 'Kommunikationsdaten und Adressen', FALSE, FALSE, 1, 2),
|
||||
('FINANCIAL', NULL, 'Finanzdaten', 'Bank-, Gehalts- und Zahlungsdaten', FALSE, FALSE, 3, 3),
|
||||
('EMPLOYMENT', NULL, 'Beschaeftigungsdaten', 'Arbeitsverhaeltnis und Qualifikation', FALSE, FALSE, 2, 4),
|
||||
('DIGITAL_IDENTITY', NULL, 'Digitale Identitaet', 'Online-Kennungen und Zugangsdaten', FALSE, FALSE, 2, 5),
|
||||
('COMMUNICATION', NULL, 'Kommunikationsdaten', 'Nachrichten und Vertragsdaten', FALSE, FALSE, 2, 6),
|
||||
('MEDIA', NULL, 'Medien- und Standortdaten', 'Bild, Video, Standort', FALSE, FALSE, 3, 7),
|
||||
('ART9_SPECIAL', NULL, 'Besondere Kategorien (Art. 9)', 'Besonders schuetzenswerte Daten', TRUE, FALSE, 5, 8),
|
||||
('ART10', NULL, 'Strafrechtliche Daten (Art. 10)', 'Daten ueber strafrechtliche Verurteilungen', FALSE, TRUE, 5, 9)
|
||||
ON CONFLICT (id) DO NOTHING;
|
||||
|
||||
-- =============================================================================
|
||||
-- Data Categories — Child categories (26)
|
||||
-- =============================================================================
|
||||
INSERT INTO vvt_lib_data_categories (id, parent_id, label_de, description_de, is_art9, is_art10, risk_weight, default_retention_rule, default_legal_basis, sort_order) VALUES
|
||||
('NAME', 'IDENTIFICATION', 'Name', 'Vor- und Nachname, Geburtsname', FALSE, FALSE, 1, NULL, NULL, 10),
|
||||
('DOB', 'IDENTIFICATION', 'Geburtsdatum', 'Geburtstag und -ort', FALSE, FALSE, 2, NULL, NULL, 11),
|
||||
('ADDRESS', 'CONTACT_DATA', 'Anschrift', 'Wohn- und Postadresse', FALSE, FALSE, 1, NULL, NULL, 20),
|
||||
('CONTACT', 'CONTACT_DATA', 'Kontaktinformationen', 'Telefon, E-Mail, Fax', FALSE, FALSE, 1, NULL, NULL, 21),
|
||||
('ID_NUMBER', 'IDENTIFICATION', 'Ausweisnummer', 'Personalausweis-, Reisepassnummer', FALSE, FALSE, 3, NULL, NULL, 12),
|
||||
('SOCIAL_SECURITY', 'IDENTIFICATION', 'Sozialversicherungsnummer', 'SV-Nummer', FALSE, FALSE, 4, 'BDSG_35_DELETE', 'ART6_1C', 13),
|
||||
('TAX_ID', 'FINANCIAL', 'Steuer-ID', 'Steueridentifikationsnummer', FALSE, FALSE, 3, 'AO_147_10Y', 'ART6_1C', 30),
|
||||
('BANK_ACCOUNT', 'FINANCIAL', 'Bankverbindung', 'IBAN, BIC, Kontonummer', FALSE, FALSE, 3, 'HGB_257_10Y', 'ART6_1B', 31),
|
||||
('PAYMENT_DATA', 'FINANCIAL', 'Zahlungsdaten', 'Kreditkartendaten, Zahlungshistorie', FALSE, FALSE, 4, 'HGB_257_10Y', 'ART6_1B', 32),
|
||||
('SALARY_DATA', 'FINANCIAL', 'Gehaltsdaten', 'Brutto/Netto, Zulagen, Abzuege', FALSE, FALSE, 4, 'AO_147_10Y', 'BDSG_26', 33),
|
||||
('EMPLOYMENT_DATA', 'EMPLOYMENT', 'Arbeitsvertragsdaten', 'Vertragsdetails, Position, Abteilung', FALSE, FALSE, 2, 'HGB_257_10Y', 'BDSG_26', 40),
|
||||
('EDUCATION_DATA', 'EMPLOYMENT', 'Ausbildungsdaten', 'Zeugnisse, Qualifikationen, Zertifikate', FALSE, FALSE, 2, 'AGG_15_6M', 'BDSG_26', 41),
|
||||
('IP_ADDRESS', 'DIGITAL_IDENTITY', 'IP-Adresse', 'IPv4/IPv6 Adressen', FALSE, FALSE, 2, 'CUSTOM_90D', 'ART6_1F', 50),
|
||||
('DEVICE_ID', 'DIGITAL_IDENTITY', 'Geraete-ID', 'Browser-Fingerprint, Device-ID', FALSE, FALSE, 2, 'CUSTOM_14M', 'ART6_1A', 51),
|
||||
('LOGIN_DATA', 'DIGITAL_IDENTITY', 'Zugangsdaten', 'Benutzername, Passwort-Hash', FALSE, FALSE, 3, NULL, 'ART6_1B', 52),
|
||||
('USAGE_DATA', 'DIGITAL_IDENTITY', 'Nutzungsdaten', 'Klickverhalten, Seitenaufrufe, Sessions', FALSE, FALSE, 2, 'CUSTOM_14M', 'ART6_1A', 53),
|
||||
('COMMUNICATION_DATA', 'COMMUNICATION', 'Korrespondenz', 'E-Mails, Chat-Nachrichten, Briefe', FALSE, FALSE, 2, 'BGB_195_3Y', NULL, 60),
|
||||
('CONTRACT_DATA', 'COMMUNICATION', 'Vertragsdaten', 'Vertragsdetails, Bestellungen', FALSE, FALSE, 2, 'HGB_257_10Y', 'ART6_1B', 61),
|
||||
('PHOTO_VIDEO', 'MEDIA', 'Bild-/Videodaten', 'Fotos, Videos von Personen', FALSE, FALSE, 3, 'CONSENT_REVOKE', 'ART6_1A', 70),
|
||||
('LOCATION_DATA', 'MEDIA', 'Standortdaten', 'GPS-Koordinaten, Aufenthaltsorte', FALSE, FALSE, 3, 'CUSTOM_90D', 'ART6_1A', 71),
|
||||
('HEALTH_DATA', 'ART9_SPECIAL', 'Gesundheitsdaten', 'Krankheitsdaten, Atteste, Behinderung', TRUE, FALSE, 5, 'BDSG_35_DELETE', 'ART9_2H', 80),
|
||||
('GENETIC_DATA', 'ART9_SPECIAL', 'Genetische Daten', 'DNA-Analysen, genetische Merkmale', TRUE, FALSE, 5, 'BDSG_35_DELETE', 'ART9_2A', 81),
|
||||
('BIOMETRIC_DATA', 'ART9_SPECIAL', 'Biometrische Daten', 'Fingerabdruck, Gesichtserkennung', TRUE, FALSE, 5, 'BDSG_35_DELETE', 'ART9_2A', 82),
|
||||
('RACIAL_ETHNIC', 'ART9_SPECIAL', 'Rassische/ethnische Herkunft', 'Ethnische Zugehoerigkeit', TRUE, FALSE, 5, NULL, 'ART9_2A', 83),
|
||||
('POLITICAL_OPINIONS', 'ART9_SPECIAL', 'Politische Meinungen', 'Parteizugehoerigkeit, politische Haltung', TRUE, FALSE, 5, NULL, 'ART9_2A', 84),
|
||||
('RELIGIOUS_BELIEFS', 'ART9_SPECIAL', 'Religioese Ueberzeugungen', 'Konfession, religioese Praktiken', TRUE, FALSE, 5, NULL, 'ART9_2A', 85),
|
||||
('TRADE_UNION', 'ART9_SPECIAL', 'Gewerkschaftszugehoerigkeit', 'Mitgliedschaft in Gewerkschaften', TRUE, FALSE, 5, NULL, 'ART9_2A', 86),
|
||||
('SEX_LIFE', 'ART9_SPECIAL', 'Sexualleben/Orientierung', 'Sexuelle Orientierung', TRUE, FALSE, 5, NULL, 'ART9_2A', 87),
|
||||
('CRIMINAL_DATA', 'ART10', 'Strafrechtliche Daten', 'Verurteilungen, Straftaten, Fuehrungszeugnis', FALSE, TRUE, 5, 'BDSG_35_DELETE', 'BDSG_24', 90)
|
||||
ON CONFLICT (id) DO NOTHING;
|
||||
|
||||
-- =============================================================================
|
||||
-- Legal Bases (12)
|
||||
-- =============================================================================
|
||||
INSERT INTO vvt_lib_legal_bases (id, article, type, label_de, description_de, is_art9, typical_national_law, sort_order) VALUES
|
||||
('ART6_1A', 'Art. 6 Abs. 1 lit. a', 'CONSENT', 'Einwilligung', 'Die betroffene Person hat ihre Einwilligung gegeben', FALSE, NULL, 1),
|
||||
('ART6_1B', 'Art. 6 Abs. 1 lit. b', 'CONTRACT', 'Vertragserfullung', 'Erforderlich fuer die Erfuellung eines Vertrags', FALSE, NULL, 2),
|
||||
('ART6_1C', 'Art. 6 Abs. 1 lit. c', 'LEGAL_OBLIGATION', 'Rechtliche Verpflichtung', 'Erforderlich zur Erfuellung einer rechtlichen Verpflichtung', FALSE, NULL, 3),
|
||||
('ART6_1D', 'Art. 6 Abs. 1 lit. d', 'VITAL_INTEREST', 'Lebenswichtige Interessen', 'Schutz lebenswichtiger Interessen', FALSE, NULL, 4),
|
||||
('ART6_1E', 'Art. 6 Abs. 1 lit. e', 'PUBLIC_TASK', 'Oeffentliches Interesse', 'Wahrnehmung einer Aufgabe im oeffentlichen Interesse', FALSE, NULL, 5),
|
||||
('ART6_1F', 'Art. 6 Abs. 1 lit. f', 'LEGITIMATE_INTEREST', 'Berechtigtes Interesse', 'Wahrung berechtigter Interessen des Verantwortlichen', FALSE, NULL, 6),
|
||||
('ART9_2A', 'Art. 9 Abs. 2 lit. a', 'ART9', 'Ausdrueckliche Einwilligung (Art. 9)', 'Ausdrueckliche Einwilligung fuer besondere Kategorien', TRUE, NULL, 7),
|
||||
('ART9_2B', 'Art. 9 Abs. 2 lit. b', 'ART9', 'Arbeitsrecht (Art. 9)', 'Erforderlich im Arbeitsrecht', TRUE, 'BDSG § 26', 8),
|
||||
('ART9_2H', 'Art. 9 Abs. 2 lit. h', 'ART9', 'Gesundheitsvorsorge (Art. 9)', 'Gesundheitsvorsorge oder Arbeitsmedizin', TRUE, NULL, 9),
|
||||
('BDSG_26', '§ 26 BDSG', 'NATIONAL', 'Beschaeftigtenverhaeltnis', 'Datenverarbeitung fuer Zwecke des Beschaeftigungsverhaeltnisses', FALSE, 'BDSG § 26', 10),
|
||||
('BDSG_24', '§ 24 BDSG', 'NATIONAL', 'Strafrechtliche Daten', 'Verarbeitung strafrechtlicher Daten (Art. 10 DSGVO)', FALSE, 'BDSG § 24', 11),
|
||||
('UWG_7', '§ 7 UWG', 'NATIONAL', 'Werbung mit Einwilligung', 'Werbliche Ansprache nach UWG', FALSE, 'UWG § 7', 12)
|
||||
ON CONFLICT (id) DO NOTHING;
|
||||
|
||||
-- =============================================================================
|
||||
-- Retention Rules (12)
|
||||
-- =============================================================================
|
||||
INSERT INTO vvt_lib_retention_rules (id, label_de, description_de, legal_basis, duration, duration_unit, start_event, deletion_procedure, sort_order) VALUES
|
||||
('HGB_257_10Y', '10 Jahre (HGB § 257)', 'Handelsrechtliche Aufbewahrungspflicht fuer Handelsbuecher, Jahresabschluesse, Buchungsbelege', 'HGB § 257', 10, 'YEARS', 'Ende des Kalenderjahres', 'Vernichtung nach Ablauf der Aufbewahrungsfrist', 1),
|
||||
('AO_147_10Y', '10 Jahre (AO § 147)', 'Steuerrechtliche Aufbewahrungspflicht fuer Buchungsbelege', 'AO § 147', 10, 'YEARS', 'Ende des Kalenderjahres', 'Vernichtung nach Ablauf der Aufbewahrungsfrist', 2),
|
||||
('AO_147_6Y', '6 Jahre (AO § 147)', 'Steuerrechtliche Aufbewahrungspflicht fuer Geschaeftsbriefe', 'AO § 147', 6, 'YEARS', 'Ende des Kalenderjahres', 'Vernichtung nach Ablauf der Aufbewahrungsfrist', 3),
|
||||
('AGG_15_6M', '6 Monate (AGG § 15)', 'Frist fuer Schadensersatzansprueche nach AGG', 'AGG § 15', 6, 'MONTHS', 'Ablehnung / Ende des Verfahrens', 'Loeschung personenbezogener Bewerbungsdaten', 4),
|
||||
('ARBZG_16_2Y', '2 Jahre (ArbZG § 16)', 'Aufzeichnungspflicht der Arbeitszeiten', 'ArbZG § 16', 2, 'YEARS', 'Ende des Aufzeichnungszeitraums', 'Vernichtung der Arbeitszeitaufzeichnungen', 5),
|
||||
('BGB_195_3Y', '3 Jahre (BGB § 195)', 'Regelverjaehrungsfrist fuer vertragliche Ansprueche', 'BGB § 195', 3, 'YEARS', 'Ende des Jahres der Anspruchsentstehung', 'Loeschung nach Ablauf der Verjaehrungsfrist', 6),
|
||||
('CONSENT_REVOKE', 'Bis Widerruf', 'Speicherung bis zum Widerruf der Einwilligung', 'Art. 7 Abs. 3 DSGVO', 0, 'DAYS', 'Widerruf der Einwilligung', 'Unverzuegliche Loeschung nach Widerruf', 7),
|
||||
('PURPOSE_END', 'Bis Zweckerfuellung', 'Speicherung bis der Verarbeitungszweck erreicht ist', 'Art. 5 Abs. 1 lit. e DSGVO', 0, 'DAYS', 'Zweckerfuellung', 'Loeschung nach Zweckerfuellung', 8),
|
||||
('BDSG_35_DELETE', 'Unverzuegliche Loeschung', 'Loeschung sobald Speicherung nicht mehr erforderlich', 'BDSG § 35', 0, 'DAYS', 'Wegfall der Erforderlichkeit', 'Unverzuegliche Loeschung', 9),
|
||||
('CUSTOM_90D', '90 Tage', 'Benutzerdefinierte Aufbewahrungsfrist von 90 Tagen', NULL, 90, 'DAYS', 'Erstellung des Datensatzes', 'Automatische Loeschung nach 90 Tagen', 10),
|
||||
('CUSTOM_14M', '14 Monate', 'Benutzerdefinierte Aufbewahrungsfrist von 14 Monaten (z.B. Analytics)', NULL, 14, 'MONTHS', 'Erstellung des Datensatzes', 'Automatische Loeschung nach 14 Monaten', 11),
|
||||
('CUSTOM_30D', '30 Tage', 'Benutzerdefinierte Aufbewahrungsfrist von 30 Tagen', NULL, 30, 'DAYS', 'Erstellung des Datensatzes', 'Automatische Loeschung nach 30 Tagen', 12)
|
||||
ON CONFLICT (id) DO NOTHING;
|
||||
|
||||
-- =============================================================================
|
||||
-- Recipients (15)
|
||||
-- =============================================================================
|
||||
INSERT INTO vvt_lib_recipients (id, type, label_de, description_de, is_third_country, country, sort_order) VALUES
|
||||
('INTERNAL_HR', 'INTERNAL', 'Personalabteilung', 'Interne HR-Abteilung', FALSE, 'DE', 1),
|
||||
('INTERNAL_FINANCE', 'INTERNAL', 'Finanzabteilung', 'Interne Buchhaltung und Finanzen', FALSE, 'DE', 2),
|
||||
('INTERNAL_IT', 'INTERNAL', 'IT-Abteilung', 'Interne IT-Administration', FALSE, 'DE', 3),
|
||||
('INTERNAL_MANAGEMENT', 'INTERNAL', 'Geschaeftsfuehrung', 'Geschaeftsfuehrung und Vorstand', FALSE, 'DE', 4),
|
||||
('INTERNAL_MARKETING', 'INTERNAL', 'Marketingabteilung', 'Internes Marketing-Team', FALSE, 'DE', 5),
|
||||
('INTERNAL_SUPPORT', 'INTERNAL', 'Kundenservice', 'Interner Support und Service', FALSE, 'DE', 6),
|
||||
('PROCESSOR_PAYROLL', 'PROCESSOR', 'Lohnabrechnungsdienstleister', 'Externer Gehaltsabrechnungs-Dienstleister', FALSE, 'DE', 7),
|
||||
('PROCESSOR_HOSTING', 'PROCESSOR', 'Hosting-Provider', 'Cloud- oder Server-Hosting-Anbieter', FALSE, NULL, 8),
|
||||
('PROCESSOR_ANALYTICS', 'PROCESSOR', 'Analytics-Anbieter', 'Web-Analytics und Tracking-Dienstleister', FALSE, NULL, 9),
|
||||
('PROCESSOR_EMAIL', 'PROCESSOR', 'E-Mail-Dienstleister', 'Newsletter- und E-Mail-Versand-Anbieter', FALSE, NULL, 10),
|
||||
('PROCESSOR_HELPDESK', 'PROCESSOR', 'Helpdesk-Anbieter', 'Ticketsystem- und Support-Plattform', FALSE, NULL, 11),
|
||||
('AUTHORITY_FINANZAMT', 'AUTHORITY', 'Finanzamt', 'Zustaendiges Finanzamt', FALSE, 'DE', 12),
|
||||
('AUTHORITY_SOZIALVERSICHERUNG', 'AUTHORITY', 'Sozialversicherungstraeger', 'Renten-, Kranken-, Arbeitslosen-, Pflegeversicherung', FALSE, 'DE', 13),
|
||||
('AUTHORITY_KRANKENKASSE', 'AUTHORITY', 'Krankenkasse', 'Gesetzliche oder private Krankenkasse', FALSE, 'DE', 14),
|
||||
('AUTHORITY_DATENSCHUTZ', 'AUTHORITY', 'Datenschutzbehoerde', 'Zustaendige Datenschutz-Aufsichtsbehoerde', FALSE, 'DE', 15)
|
||||
ON CONFLICT (id) DO NOTHING;
|
||||
|
||||
-- =============================================================================
|
||||
-- Transfer Mechanisms (8)
|
||||
-- =============================================================================
|
||||
INSERT INTO vvt_lib_transfer_mechanisms (id, label_de, description_de, article, requires_tia, sort_order) VALUES
|
||||
('ADEQUACY_DECISION', 'Angemessenheitsbeschluss', 'EU-Angemessenheitsbeschluss gemaess Art. 45 DSGVO', 'Art. 45 DSGVO', FALSE, 1),
|
||||
('SCC_CONTROLLER', 'Standardvertragsklauseln (C2C)', 'Standardvertragsklauseln Controller-zu-Controller', 'Art. 46 Abs. 2 lit. c DSGVO', TRUE, 2),
|
||||
('SCC_PROCESSOR', 'Standardvertragsklauseln (C2P)', 'Standardvertragsklauseln Controller-zu-Processor', 'Art. 46 Abs. 2 lit. c DSGVO', TRUE, 3),
|
||||
('BCR', 'Binding Corporate Rules', 'Verbindliche interne Datenschutzvorschriften', 'Art. 47 DSGVO', FALSE, 4),
|
||||
('CONSENT_49A', 'Einwilligung (Art. 49)', 'Ausdrueckliche Einwilligung der betroffenen Person', 'Art. 49 Abs. 1 lit. a DSGVO', FALSE, 5),
|
||||
('DEROGATION_49', 'Ausnahme (Art. 49)', 'Ausnahme fuer bestimmte Faelle gemaess Art. 49', 'Art. 49 DSGVO', FALSE, 6),
|
||||
('DPF', 'EU-US Data Privacy Framework', 'Zertifizierung unter dem EU-US Data Privacy Framework', 'Art. 45 DSGVO (DPF)', FALSE, 7),
|
||||
('TIA', 'Transfer Impact Assessment', 'Einzelfallbezogene Risikobewertung fuer Drittlandtransfers', 'Art. 46 DSGVO + Schrems II', TRUE, 8)
|
||||
ON CONFLICT (id) DO NOTHING;
|
||||
|
||||
-- =============================================================================
|
||||
-- Purposes (20)
|
||||
-- =============================================================================
|
||||
INSERT INTO vvt_lib_purposes (id, label_de, description_de, typical_legal_basis, typical_for, sort_order) VALUES
|
||||
('EMPLOYMENT_ADMIN', 'Personalverwaltung', 'Verwaltung des Beschaeftigungsverhaeltnisses', 'BDSG_26', '["hr"]', 1),
|
||||
('PAYROLL', 'Gehaltsabrechnung', 'Durchfuehrung der Lohn- und Gehaltsabrechnung', 'BDSG_26', '["hr","finance"]', 2),
|
||||
('RECRUITING', 'Bewerbermanagement', 'Durchfuehrung von Bewerbungsverfahren', 'BDSG_26', '["hr"]', 3),
|
||||
('TIME_TRACKING', 'Zeiterfassung', 'Erfassung und Verwaltung von Arbeitszeiten', 'ART6_1C', '["hr"]', 4),
|
||||
('ACCOUNTING', 'Buchhaltung', 'Fuehrung der Handelsbuecher und Finanzberichterstattung', 'ART6_1C', '["finance"]', 5),
|
||||
('INVOICING', 'Rechnungsstellung', 'Erstellung und Verwaltung von Rechnungen', 'ART6_1B', '["finance"]', 6),
|
||||
('CRM', 'Kundenbeziehungsmanagement', 'Verwaltung und Pflege von Kundenbeziehungen', 'ART6_1B', '["sales_crm"]', 7),
|
||||
('DIRECT_MARKETING', 'Direktmarketing', 'Newsletter-Versand und Werbemassnahmen', 'ART6_1A', '["marketing"]', 8),
|
||||
('WEBSITE_ANALYTICS', 'Web-Analyse', 'Analyse des Nutzerverhaltens auf der Website', 'ART6_1A', '["marketing","it_operations"]', 9),
|
||||
('CUSTOMER_SUPPORT', 'Kundenbetreuung', 'Bearbeitung von Kundenanfragen und Support-Tickets', 'ART6_1B', '["support"]', 10),
|
||||
('IT_ADMIN', 'IT-Administration', 'Verwaltung der IT-Infrastruktur und Benutzerkonten', 'ART6_1F', '["it_operations"]', 11),
|
||||
('BACKUP_RECOVERY', 'Datensicherung', 'Backup-Erstellung und Wiederherstellung', 'ART6_1F', '["it_operations"]', 12),
|
||||
('SECURITY_MONITORING', 'Sicherheitsueberwachung', 'Log-Analyse und Intrusion Detection', 'ART6_1F', '["it_operations"]', 13),
|
||||
('IAM', 'Identitaets- und Zugriffsmanagement', 'Verwaltung von Benutzeridentitaeten und Berechtigungen', 'ART6_1F', '["it_operations"]', 14),
|
||||
('VIDEO_CONFERENCING', 'Videokonferenz', 'Durchfuehrung von Online-Meetings und Videokonferenzen', 'ART6_1B', '["other"]', 15),
|
||||
('VISITOR_MANAGEMENT', 'Besucherverwaltung', 'Erfassung und Verwaltung von Betriebsbesuchern', 'ART6_1F', '["management"]', 16),
|
||||
('PAYMENT_PROCESSING', 'Zahlungsabwicklung', 'Verarbeitung und Abwicklung von Zahlungen', 'ART6_1B', '["finance"]', 17),
|
||||
('SOCIAL_MEDIA', 'Social-Media-Marketing', 'Betrieb von Social-Media-Praesenzen', 'ART6_1A', '["marketing"]', 18),
|
||||
('SALES_REPORTING', 'Vertriebssteuerung', 'Vertriebsanalysen und Berichterstattung', 'ART6_1F', '["sales_crm"]', 19),
|
||||
('COMPLIANCE_DOCS', 'Compliance-Dokumentation', 'Erstellung und Pflege von Compliance-Dokumenten', 'ART6_1C', '["legal","management"]', 20)
|
||||
ON CONFLICT (id) DO NOTHING;
|
||||
|
||||
-- =============================================================================
|
||||
-- TOMs (20)
|
||||
-- =============================================================================
|
||||
INSERT INTO vvt_lib_toms (id, category, label_de, description_de, art32_reference, sort_order) VALUES
|
||||
('AC_RBAC', 'accessControl', 'Rollenbasierte Zugriffskontrolle (RBAC)', 'Zugriff nur nach Rolle und Berechtigung', 'Art. 32 Abs. 1 lit. b', 1),
|
||||
('AC_MFA', 'accessControl', 'Multi-Faktor-Authentifizierung', 'Zwei- oder mehrstufige Anmeldung', 'Art. 32 Abs. 1 lit. b', 2),
|
||||
('AC_NEED_TO_KNOW', 'accessControl', 'Need-to-Know-Prinzip', 'Zugriff nur auf fuer die Aufgabe erforderliche Daten', 'Art. 32 Abs. 1 lit. b', 3),
|
||||
('AC_PAM', 'accessControl', 'Privileged Access Management', 'Verwaltung und Ueberwachung privilegierter Zugaenge', 'Art. 32 Abs. 1 lit. b', 4),
|
||||
('CONF_ENCRYPTION_REST', 'confidentiality', 'Verschluesselung ruhender Daten', 'AES-256 Verschluesselung fuer gespeicherte Daten', 'Art. 32 Abs. 1 lit. a', 5),
|
||||
('CONF_ENCRYPTION_TRANSIT', 'confidentiality', 'Transportverschluesselung', 'TLS 1.3 fuer alle Datenuebertragungen', 'Art. 32 Abs. 1 lit. a', 6),
|
||||
('CONF_PSEUDONYMIZATION', 'confidentiality', 'Pseudonymisierung', 'Verarbeitung ohne direkten Personenbezug', 'Art. 32 Abs. 1 lit. a', 7),
|
||||
('CONF_NDA', 'confidentiality', 'Vertraulichkeitsvereinbarungen', 'NDAs fuer Mitarbeiter und Auftragnehmer', 'Art. 32 Abs. 1 lit. b', 8),
|
||||
('INT_AUDIT_LOG', 'integrity', 'Audit-Logging', 'Lueckenlose Protokollierung aller Datenzugriffe', 'Art. 32 Abs. 1 lit. b', 9),
|
||||
('INT_FOUR_EYES', 'integrity', 'Vier-Augen-Prinzip', 'Kritische Aenderungen nur mit Freigabe durch zweite Person', 'Art. 32 Abs. 1 lit. b', 10),
|
||||
('INT_CHECKSUMS', 'integrity', 'Pruefsummen und Hashing', 'Integritaetspruefung durch kryptographische Hashes', 'Art. 32 Abs. 1 lit. b', 11),
|
||||
('INT_CHANGE_MGMT', 'integrity', 'Change Management', 'Dokumentierter Aenderungsprozess fuer IT-Systeme', 'Art. 32 Abs. 1 lit. b', 12),
|
||||
('AVAIL_BACKUP', 'availability', 'Regelmaessige Backups', 'Taegliche und woechentliche Datensicherungen', 'Art. 32 Abs. 1 lit. c', 13),
|
||||
('AVAIL_REDUNDANCY', 'availability', 'Redundante Systeme', 'Hochverfuegbarkeit durch Systemredundanz', 'Art. 32 Abs. 1 lit. c', 14),
|
||||
('AVAIL_321_RULE', 'availability', '3-2-1 Backup-Regel', 'Drei Kopien, zwei Medien, ein externer Standort', 'Art. 32 Abs. 1 lit. c', 15),
|
||||
('AVAIL_MONITORING', 'availability', 'System-Monitoring', 'Kontinuierliche Ueberwachung der Systemverfuegbarkeit', 'Art. 32 Abs. 1 lit. c', 16),
|
||||
('SEP_TENANT_ISOLATION', 'separation', 'Mandantentrennung', 'Logische Trennung der Daten verschiedener Mandanten', 'Art. 32 Abs. 1 lit. b', 17),
|
||||
('SEP_NETWORK_SEG', 'separation', 'Netzwerksegmentierung', 'Trennung von Netzwerkbereichen (VLANs, Firewalls)', 'Art. 32 Abs. 1 lit. b', 18),
|
||||
('SEP_DATA_SEPARATION', 'separation', 'Datentrennung', 'Separate Datenbanken oder Schemas pro Zweck', 'Art. 32 Abs. 1 lit. b', 19),
|
||||
('SEP_ENV_SEPARATION', 'separation', 'Umgebungstrennung', 'Getrennte Entwicklungs-, Test- und Produktionsumgebungen', 'Art. 32 Abs. 1 lit. b', 20)
|
||||
ON CONFLICT (id) DO NOTHING;
|
||||
|
||||
COMMIT;
|
||||
@@ -0,0 +1,54 @@
|
||||
-- Migration 066: VVT Process Templates + Activity extensions
|
||||
-- Template table + new ref columns on compliance_vvt_activities
|
||||
|
||||
BEGIN;
|
||||
|
||||
-- =============================================================================
|
||||
-- Process Templates
|
||||
-- =============================================================================
|
||||
CREATE TABLE IF NOT EXISTS vvt_process_templates (
|
||||
id VARCHAR(80) PRIMARY KEY,
|
||||
name VARCHAR(300) NOT NULL,
|
||||
description TEXT,
|
||||
business_function VARCHAR(50),
|
||||
purpose_refs JSONB DEFAULT '[]'::jsonb,
|
||||
legal_basis_refs JSONB DEFAULT '[]'::jsonb,
|
||||
data_subject_refs JSONB DEFAULT '[]'::jsonb,
|
||||
data_category_refs JSONB DEFAULT '[]'::jsonb,
|
||||
recipient_refs JSONB DEFAULT '[]'::jsonb,
|
||||
tom_refs JSONB DEFAULT '[]'::jsonb,
|
||||
transfer_mechanism_refs JSONB DEFAULT '[]'::jsonb,
|
||||
retention_rule_ref VARCHAR(50),
|
||||
typical_systems JSONB DEFAULT '[]'::jsonb,
|
||||
protection_level VARCHAR(10) DEFAULT 'MEDIUM',
|
||||
dpia_required BOOLEAN DEFAULT FALSE,
|
||||
risk_score INTEGER,
|
||||
tags JSONB DEFAULT '[]'::jsonb,
|
||||
is_system BOOLEAN DEFAULT TRUE,
|
||||
tenant_id UUID,
|
||||
sort_order INTEGER DEFAULT 0,
|
||||
created_at TIMESTAMPTZ DEFAULT NOW(),
|
||||
updated_at TIMESTAMPTZ DEFAULT NOW()
|
||||
);
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_vvt_process_templates_bf ON vvt_process_templates(business_function);
|
||||
CREATE INDEX IF NOT EXISTS idx_vvt_process_templates_system ON vvt_process_templates(is_system);
|
||||
|
||||
-- =============================================================================
|
||||
-- New columns on compliance_vvt_activities (all DEFAULT NULL for backward compat)
|
||||
-- =============================================================================
|
||||
ALTER TABLE compliance_vvt_activities ADD COLUMN IF NOT EXISTS purpose_refs JSONB DEFAULT NULL;
|
||||
ALTER TABLE compliance_vvt_activities ADD COLUMN IF NOT EXISTS legal_basis_refs JSONB DEFAULT NULL;
|
||||
ALTER TABLE compliance_vvt_activities ADD COLUMN IF NOT EXISTS data_subject_refs JSONB DEFAULT NULL;
|
||||
ALTER TABLE compliance_vvt_activities ADD COLUMN IF NOT EXISTS data_category_refs JSONB DEFAULT NULL;
|
||||
ALTER TABLE compliance_vvt_activities ADD COLUMN IF NOT EXISTS recipient_refs JSONB DEFAULT NULL;
|
||||
ALTER TABLE compliance_vvt_activities ADD COLUMN IF NOT EXISTS retention_rule_ref VARCHAR(50) DEFAULT NULL;
|
||||
ALTER TABLE compliance_vvt_activities ADD COLUMN IF NOT EXISTS transfer_mechanism_refs JSONB DEFAULT NULL;
|
||||
ALTER TABLE compliance_vvt_activities ADD COLUMN IF NOT EXISTS tom_refs JSONB DEFAULT NULL;
|
||||
ALTER TABLE compliance_vvt_activities ADD COLUMN IF NOT EXISTS linked_loeschfristen_ids JSONB DEFAULT NULL;
|
||||
ALTER TABLE compliance_vvt_activities ADD COLUMN IF NOT EXISTS linked_tom_measure_ids JSONB DEFAULT NULL;
|
||||
ALTER TABLE compliance_vvt_activities ADD COLUMN IF NOT EXISTS source_template_id VARCHAR(80) DEFAULT NULL;
|
||||
ALTER TABLE compliance_vvt_activities ADD COLUMN IF NOT EXISTS risk_score INTEGER DEFAULT NULL;
|
||||
ALTER TABLE compliance_vvt_activities ADD COLUMN IF NOT EXISTS art30_completeness JSONB DEFAULT NULL;
|
||||
|
||||
COMMIT;
|
||||
@@ -0,0 +1,305 @@
|
||||
-- Migration 067: VVT Process Templates Seed — 18 templates from vvt-baseline-catalog
|
||||
-- All content self-authored, MIT-compatible.
|
||||
|
||||
BEGIN;
|
||||
|
||||
INSERT INTO vvt_process_templates (id, name, description, business_function, purpose_refs, legal_basis_refs, data_subject_refs, data_category_refs, recipient_refs, tom_refs, retention_rule_ref, typical_systems, protection_level, dpia_required, risk_score, tags, sort_order) VALUES
|
||||
|
||||
-- HR Templates
|
||||
('hr-mitarbeiterverwaltung',
|
||||
'Mitarbeiterverwaltung',
|
||||
'Verwaltung des Beschaeftigungsverhaeltnisses inkl. Personalakte, Urlaub, Krankmeldungen',
|
||||
'hr',
|
||||
'["EMPLOYMENT_ADMIN", "PAYROLL"]',
|
||||
'["BDSG_26", "ART6_1B"]',
|
||||
'["EMPLOYEES"]',
|
||||
'["NAME", "DOB", "ADDRESS", "CONTACT", "SOCIAL_SECURITY", "BANK_ACCOUNT", "EMPLOYMENT_DATA", "HEALTH_DATA"]',
|
||||
'["INTERNAL_HR", "INTERNAL_FINANCE", "PROCESSOR_PAYROLL", "AUTHORITY_SOZIALVERSICHERUNG", "AUTHORITY_KRANKENKASSE"]',
|
||||
'["AC_RBAC", "AC_NEED_TO_KNOW", "CONF_ENCRYPTION_REST", "CONF_ENCRYPTION_TRANSIT", "INT_AUDIT_LOG", "SEP_TENANT_ISOLATION"]',
|
||||
'HGB_257_10Y',
|
||||
'["HR-Software", "Personalakte (digital)"]',
|
||||
'HIGH', TRUE, 3,
|
||||
'["personal", "pflicht"]',
|
||||
1),
|
||||
|
||||
('hr-gehaltsabrechnung',
|
||||
'Gehaltsabrechnung',
|
||||
'Monatliche Lohn- und Gehaltsabrechnung inkl. Steuer- und Sozialversicherungsmeldungen',
|
||||
'hr',
|
||||
'["PAYROLL"]',
|
||||
'["BDSG_26", "ART6_1C"]',
|
||||
'["EMPLOYEES"]',
|
||||
'["NAME", "ADDRESS", "SOCIAL_SECURITY", "TAX_ID", "BANK_ACCOUNT", "SALARY_DATA"]',
|
||||
'["INTERNAL_HR", "INTERNAL_FINANCE", "PROCESSOR_PAYROLL", "AUTHORITY_FINANZAMT", "AUTHORITY_SOZIALVERSICHERUNG"]',
|
||||
'["AC_RBAC", "AC_NEED_TO_KNOW", "CONF_ENCRYPTION_REST", "CONF_ENCRYPTION_TRANSIT", "INT_AUDIT_LOG", "INT_FOUR_EYES"]',
|
||||
'AO_147_10Y',
|
||||
'["Lohnabrechnungssoftware", "DATEV"]',
|
||||
'HIGH', FALSE, 3,
|
||||
'["personal", "finanzen", "pflicht"]',
|
||||
2),
|
||||
|
||||
('hr-bewerbermanagement',
|
||||
'Bewerbermanagement',
|
||||
'Durchfuehrung von Bewerbungsverfahren vom Eingang bis zur Zu-/Absage',
|
||||
'hr',
|
||||
'["RECRUITING"]',
|
||||
'["BDSG_26", "ART6_1B"]',
|
||||
'["APPLICANTS"]',
|
||||
'["NAME", "DOB", "ADDRESS", "CONTACT", "EDUCATION_DATA", "PHOTO_VIDEO"]',
|
||||
'["INTERNAL_HR", "INTERNAL_MANAGEMENT"]',
|
||||
'["AC_RBAC", "AC_NEED_TO_KNOW", "CONF_ENCRYPTION_REST", "CONF_NDA"]',
|
||||
'AGG_15_6M',
|
||||
'["Bewerbermanagement-Software", "E-Mail"]',
|
||||
'MEDIUM', FALSE, 2,
|
||||
'["personal", "recruiting"]',
|
||||
3),
|
||||
|
||||
('hr-zeiterfassung',
|
||||
'Zeiterfassung',
|
||||
'Erfassung und Verwaltung von Arbeitszeiten gemaess ArbZG',
|
||||
'hr',
|
||||
'["TIME_TRACKING"]',
|
||||
'["ART6_1C", "BDSG_26"]',
|
||||
'["EMPLOYEES"]',
|
||||
'["NAME", "EMPLOYMENT_DATA"]',
|
||||
'["INTERNAL_HR", "INTERNAL_MANAGEMENT"]',
|
||||
'["AC_RBAC", "INT_AUDIT_LOG", "CONF_ENCRYPTION_TRANSIT"]',
|
||||
'ARBZG_16_2Y',
|
||||
'["Zeiterfassungssystem", "Stempeluhr"]',
|
||||
'LOW', FALSE, 1,
|
||||
'["personal", "pflicht"]',
|
||||
4),
|
||||
|
||||
-- Finance Templates
|
||||
('finance-buchhaltung',
|
||||
'Buchhaltung',
|
||||
'Fuehrung der Handelsbuecher und steuerrechtliche Dokumentation',
|
||||
'finance',
|
||||
'["ACCOUNTING", "INVOICING"]',
|
||||
'["ART6_1C", "ART6_1B"]',
|
||||
'["CUSTOMERS", "SUPPLIERS", "EMPLOYEES"]',
|
||||
'["NAME", "ADDRESS", "CONTACT", "BANK_ACCOUNT", "PAYMENT_DATA", "CONTRACT_DATA", "TAX_ID"]',
|
||||
'["INTERNAL_FINANCE", "AUTHORITY_FINANZAMT", "PROCESSOR_HOSTING"]',
|
||||
'["AC_RBAC", "INT_AUDIT_LOG", "INT_FOUR_EYES", "CONF_ENCRYPTION_REST", "AVAIL_BACKUP"]',
|
||||
'HGB_257_10Y',
|
||||
'["Buchhaltungssoftware", "DATEV", "ERP-System"]',
|
||||
'HIGH', FALSE, 2,
|
||||
'["finanzen", "pflicht"]',
|
||||
5),
|
||||
|
||||
('finance-zahlungsverkehr',
|
||||
'Zahlungsverkehr',
|
||||
'Verarbeitung und Abwicklung von ein- und ausgehenden Zahlungen',
|
||||
'finance',
|
||||
'["PAYMENT_PROCESSING"]',
|
||||
'["ART6_1B", "ART6_1C"]',
|
||||
'["CUSTOMERS", "SUPPLIERS"]',
|
||||
'["NAME", "BANK_ACCOUNT", "PAYMENT_DATA", "CONTRACT_DATA"]',
|
||||
'["INTERNAL_FINANCE", "PROCESSOR_HOSTING"]',
|
||||
'["AC_RBAC", "AC_MFA", "CONF_ENCRYPTION_REST", "CONF_ENCRYPTION_TRANSIT", "INT_AUDIT_LOG"]',
|
||||
'HGB_257_10Y',
|
||||
'["Online-Banking", "Payment-Gateway"]',
|
||||
'HIGH', FALSE, 3,
|
||||
'["finanzen"]',
|
||||
6),
|
||||
|
||||
-- Sales/CRM Templates
|
||||
('sales-kundenverwaltung',
|
||||
'Kundenverwaltung',
|
||||
'Verwaltung und Pflege der Kundenbeziehungen im CRM-System',
|
||||
'sales_crm',
|
||||
'["CRM"]',
|
||||
'["ART6_1B", "ART6_1F"]',
|
||||
'["CUSTOMERS", "PROSPECTIVE_CUSTOMERS"]',
|
||||
'["NAME", "ADDRESS", "CONTACT", "CONTRACT_DATA", "COMMUNICATION_DATA"]',
|
||||
'["INTERNAL_MARKETING", "INTERNAL_SUPPORT", "PROCESSOR_HOSTING"]',
|
||||
'["AC_RBAC", "CONF_ENCRYPTION_REST", "CONF_ENCRYPTION_TRANSIT", "INT_AUDIT_LOG", "SEP_TENANT_ISOLATION"]',
|
||||
'BGB_195_3Y',
|
||||
'["CRM-System", "E-Mail-Client"]',
|
||||
'MEDIUM', FALSE, 2,
|
||||
'["vertrieb", "kunden"]',
|
||||
7),
|
||||
|
||||
('sales-vertriebssteuerung',
|
||||
'Vertriebssteuerung',
|
||||
'Vertriebsanalysen, Forecasting und Berichterstattung',
|
||||
'sales_crm',
|
||||
'["SALES_REPORTING"]',
|
||||
'["ART6_1F"]',
|
||||
'["CUSTOMERS", "PROSPECTIVE_CUSTOMERS"]',
|
||||
'["NAME", "CONTACT", "CONTRACT_DATA"]',
|
||||
'["INTERNAL_MANAGEMENT", "INTERNAL_MARKETING"]',
|
||||
'["AC_RBAC", "AC_NEED_TO_KNOW", "CONF_PSEUDONYMIZATION"]',
|
||||
'BGB_195_3Y',
|
||||
'["CRM-System", "BI-Tool"]',
|
||||
'LOW', FALSE, 1,
|
||||
'["vertrieb", "reporting"]',
|
||||
8),
|
||||
|
||||
-- Marketing Templates
|
||||
('marketing-newsletter',
|
||||
'Newsletter-Versand',
|
||||
'Versand von Newslettern und Werbemails an Abonnenten',
|
||||
'marketing',
|
||||
'["DIRECT_MARKETING"]',
|
||||
'["ART6_1A", "UWG_7"]',
|
||||
'["NEWSLETTER_SUBSCRIBERS", "CUSTOMERS"]',
|
||||
'["NAME", "CONTACT", "USAGE_DATA"]',
|
||||
'["INTERNAL_MARKETING", "PROCESSOR_EMAIL"]',
|
||||
'["AC_RBAC", "CONF_ENCRYPTION_TRANSIT", "SEP_DATA_SEPARATION"]',
|
||||
'CONSENT_REVOKE',
|
||||
'["Newsletter-Tool", "E-Mail-Marketing-Plattform"]',
|
||||
'LOW', FALSE, 1,
|
||||
'["marketing", "einwilligung"]',
|
||||
9),
|
||||
|
||||
('marketing-website-analytics',
|
||||
'Website-Analyse',
|
||||
'Analyse des Nutzerverhaltens auf der Unternehmenswebsite',
|
||||
'marketing',
|
||||
'["WEBSITE_ANALYTICS"]',
|
||||
'["ART6_1A"]',
|
||||
'["WEBSITE_USERS"]',
|
||||
'["IP_ADDRESS", "DEVICE_ID", "USAGE_DATA"]',
|
||||
'["INTERNAL_MARKETING", "PROCESSOR_ANALYTICS"]',
|
||||
'["CONF_PSEUDONYMIZATION", "CONF_ENCRYPTION_TRANSIT", "SEP_DATA_SEPARATION"]',
|
||||
'CUSTOM_14M',
|
||||
'["Web-Analytics-Tool", "Tag-Manager"]',
|
||||
'LOW', FALSE, 1,
|
||||
'["marketing", "einwilligung", "tracking"]',
|
||||
10),
|
||||
|
||||
('marketing-social-media',
|
||||
'Social-Media-Marketing',
|
||||
'Betrieb und Verwaltung von Social-Media-Praesenzen',
|
||||
'marketing',
|
||||
'["SOCIAL_MEDIA"]',
|
||||
'["ART6_1A", "ART6_1F"]',
|
||||
'["WEBSITE_USERS", "CUSTOMERS"]',
|
||||
'["NAME", "CONTACT", "USAGE_DATA", "PHOTO_VIDEO"]',
|
||||
'["INTERNAL_MARKETING", "PROCESSOR_ANALYTICS"]',
|
||||
'["AC_RBAC", "CONF_ENCRYPTION_TRANSIT"]',
|
||||
'PURPOSE_END',
|
||||
'["Social-Media-Plattformen", "Social-Media-Management-Tool"]',
|
||||
'LOW', FALSE, 1,
|
||||
'["marketing", "social-media"]',
|
||||
11),
|
||||
|
||||
-- Support Templates
|
||||
('support-ticketsystem',
|
||||
'Ticketsystem / Kundenservice',
|
||||
'Bearbeitung von Kundenanfragen ueber das Ticketsystem',
|
||||
'support',
|
||||
'["CUSTOMER_SUPPORT"]',
|
||||
'["ART6_1B"]',
|
||||
'["CUSTOMERS"]',
|
||||
'["NAME", "CONTACT", "COMMUNICATION_DATA", "CONTRACT_DATA"]',
|
||||
'["INTERNAL_SUPPORT", "PROCESSOR_HELPDESK"]',
|
||||
'["AC_RBAC", "CONF_ENCRYPTION_TRANSIT", "INT_AUDIT_LOG"]',
|
||||
'BGB_195_3Y',
|
||||
'["Ticketsystem", "Help-Desk-Software"]',
|
||||
'MEDIUM', FALSE, 1,
|
||||
'["support", "kunden"]',
|
||||
12),
|
||||
|
||||
-- IT Templates
|
||||
('it-systemadministration',
|
||||
'IT-Systemadministration',
|
||||
'Verwaltung der IT-Infrastruktur, Benutzerkonten und Berechtigungen',
|
||||
'it_operations',
|
||||
'["IT_ADMIN"]',
|
||||
'["ART6_1F", "ART6_1B"]',
|
||||
'["EMPLOYEES"]',
|
||||
'["NAME", "LOGIN_DATA", "IP_ADDRESS", "DEVICE_ID"]',
|
||||
'["INTERNAL_IT", "PROCESSOR_HOSTING"]',
|
||||
'["AC_RBAC", "AC_MFA", "AC_PAM", "CONF_ENCRYPTION_REST", "CONF_ENCRYPTION_TRANSIT", "INT_AUDIT_LOG", "SEP_NETWORK_SEG", "SEP_ENV_SEPARATION"]',
|
||||
'CUSTOM_90D',
|
||||
'["Active Directory", "LDAP", "IT-Management-Tool"]',
|
||||
'HIGH', FALSE, 2,
|
||||
'["it", "infrastruktur"]',
|
||||
13),
|
||||
|
||||
('it-backup',
|
||||
'Datensicherung und Recovery',
|
||||
'Regelmaessige Backups und Wiederherstellungsverfahren',
|
||||
'it_operations',
|
||||
'["BACKUP_RECOVERY"]',
|
||||
'["ART6_1F"]',
|
||||
'["EMPLOYEES", "CUSTOMERS"]',
|
||||
'["NAME", "ADDRESS", "CONTACT", "CONTRACT_DATA", "LOGIN_DATA"]',
|
||||
'["INTERNAL_IT", "PROCESSOR_HOSTING"]',
|
||||
'["AVAIL_BACKUP", "AVAIL_321_RULE", "AVAIL_REDUNDANCY", "CONF_ENCRYPTION_REST", "INT_CHECKSUMS"]',
|
||||
'CUSTOM_90D',
|
||||
'["Backup-Software", "Cloud-Backup", "NAS"]',
|
||||
'HIGH', FALSE, 2,
|
||||
'["it", "verfuegbarkeit"]',
|
||||
14),
|
||||
|
||||
('it-logging',
|
||||
'Logging und Sicherheitsueberwachung',
|
||||
'Protokollierung von System- und Sicherheitsereignissen',
|
||||
'it_operations',
|
||||
'["SECURITY_MONITORING"]',
|
||||
'["ART6_1F"]',
|
||||
'["EMPLOYEES", "CUSTOMERS", "WEBSITE_USERS"]',
|
||||
'["IP_ADDRESS", "LOGIN_DATA", "USAGE_DATA", "DEVICE_ID"]',
|
||||
'["INTERNAL_IT"]',
|
||||
'["CONF_ENCRYPTION_REST", "INT_AUDIT_LOG", "INT_CHECKSUMS", "AVAIL_MONITORING", "SEP_DATA_SEPARATION"]',
|
||||
'CUSTOM_90D',
|
||||
'["SIEM-System", "Log-Management", "Monitoring-Tool"]',
|
||||
'MEDIUM', FALSE, 2,
|
||||
'["it", "sicherheit"]',
|
||||
15),
|
||||
|
||||
('it-iam',
|
||||
'Identitaets- und Zugriffsmanagement',
|
||||
'Verwaltung von Benutzeridentitaeten, Rollen und Berechtigungen',
|
||||
'it_operations',
|
||||
'["IAM"]',
|
||||
'["ART6_1F", "BDSG_26"]',
|
||||
'["EMPLOYEES"]',
|
||||
'["NAME", "LOGIN_DATA", "EMPLOYMENT_DATA"]',
|
||||
'["INTERNAL_IT", "INTERNAL_HR"]',
|
||||
'["AC_RBAC", "AC_MFA", "AC_PAM", "AC_NEED_TO_KNOW", "INT_AUDIT_LOG", "CONF_ENCRYPTION_REST"]',
|
||||
'AGG_15_6M',
|
||||
'["IAM-System", "SSO-Provider", "Active Directory"]',
|
||||
'HIGH', FALSE, 2,
|
||||
'["it", "sicherheit", "zugriffskontrolle"]',
|
||||
16),
|
||||
|
||||
-- Other Templates
|
||||
('other-videokonferenz',
|
||||
'Videokonferenz',
|
||||
'Durchfuehrung von Online-Meetings und Videokonferenzen',
|
||||
'other',
|
||||
'["VIDEO_CONFERENCING"]',
|
||||
'["ART6_1B", "ART6_1F"]',
|
||||
'["EMPLOYEES", "CUSTOMERS", "BUSINESS_PARTNERS"]',
|
||||
'["NAME", "CONTACT", "PHOTO_VIDEO", "IP_ADDRESS"]',
|
||||
'["INTERNAL_IT", "PROCESSOR_HOSTING"]',
|
||||
'["CONF_ENCRYPTION_TRANSIT", "AC_RBAC"]',
|
||||
'PURPOSE_END',
|
||||
'["Videokonferenz-Tool", "Webinar-Plattform"]',
|
||||
'LOW', FALSE, 1,
|
||||
'["kommunikation"]',
|
||||
17),
|
||||
|
||||
('other-besuchermanagement',
|
||||
'Besuchermanagement',
|
||||
'Erfassung und Verwaltung von Betriebsbesuchern',
|
||||
'other',
|
||||
'["VISITOR_MANAGEMENT"]',
|
||||
'["ART6_1F"]',
|
||||
'["VISITORS"]',
|
||||
'["NAME", "CONTACT", "PHOTO_VIDEO"]',
|
||||
'["INTERNAL_MANAGEMENT"]',
|
||||
'["AC_RBAC", "CONF_ENCRYPTION_REST"]',
|
||||
'CUSTOM_30D',
|
||||
'["Besuchermanagement-System", "Empfangsterminal"]',
|
||||
'LOW', FALSE, 1,
|
||||
'["sonstiges", "besucher"]',
|
||||
18)
|
||||
|
||||
ON CONFLICT (id) DO NOTHING;
|
||||
|
||||
COMMIT;
|
||||
@@ -0,0 +1,65 @@
|
||||
-- Migration 068: TOM ↔ Canonical Control Mappings
|
||||
-- Bridge table connecting TOM measures (88) to Canonical Controls (10,000+)
|
||||
-- Enables three-layer architecture: TOM → Mapping → Canonical Controls
|
||||
|
||||
-- ============================================================================
|
||||
-- 1. Mapping table (TOM control code → Canonical control)
|
||||
-- ============================================================================
|
||||
|
||||
CREATE TABLE IF NOT EXISTS tom_control_mappings (
|
||||
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
|
||||
tenant_id UUID NOT NULL,
|
||||
project_id UUID,
|
||||
|
||||
-- TOM side (references the embedded TOM control code, e.g. 'TOM-AC-01')
|
||||
tom_control_code VARCHAR(20) NOT NULL,
|
||||
tom_category VARCHAR(50) NOT NULL,
|
||||
|
||||
-- Canonical control side
|
||||
canonical_control_id UUID NOT NULL,
|
||||
canonical_control_code VARCHAR(20) NOT NULL,
|
||||
canonical_category VARCHAR(50),
|
||||
|
||||
-- Mapping metadata
|
||||
mapping_type VARCHAR(20) NOT NULL DEFAULT 'auto'
|
||||
CHECK (mapping_type IN ('auto', 'manual')),
|
||||
relevance_score NUMERIC(3,2) DEFAULT 1.00
|
||||
CHECK (relevance_score >= 0 AND relevance_score <= 1),
|
||||
|
||||
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
|
||||
|
||||
-- No duplicate mappings per tenant+project+TOM+canonical
|
||||
UNIQUE (tenant_id, project_id, tom_control_code, canonical_control_id)
|
||||
);
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_tcm_tenant_project
|
||||
ON tom_control_mappings (tenant_id, project_id);
|
||||
CREATE INDEX IF NOT EXISTS idx_tcm_tom_code
|
||||
ON tom_control_mappings (tom_control_code);
|
||||
CREATE INDEX IF NOT EXISTS idx_tcm_canonical_id
|
||||
ON tom_control_mappings (canonical_control_id);
|
||||
CREATE INDEX IF NOT EXISTS idx_tcm_tom_category
|
||||
ON tom_control_mappings (tom_category);
|
||||
|
||||
-- ============================================================================
|
||||
-- 2. Sync state (tracks when the last sync ran + profile hash)
|
||||
-- ============================================================================
|
||||
|
||||
CREATE TABLE IF NOT EXISTS tom_control_sync_state (
|
||||
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
|
||||
tenant_id UUID NOT NULL,
|
||||
project_id UUID,
|
||||
|
||||
-- Profile hash to detect changes (SHA-256 of serialized company profile)
|
||||
profile_hash VARCHAR(64),
|
||||
|
||||
-- Stats from last sync
|
||||
total_mappings INTEGER DEFAULT 0,
|
||||
canonical_controls_matched INTEGER DEFAULT 0,
|
||||
tom_controls_covered INTEGER DEFAULT 0,
|
||||
|
||||
last_synced_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
|
||||
|
||||
-- One sync state per tenant+project
|
||||
UNIQUE (tenant_id, project_id)
|
||||
);
|
||||
@@ -0,0 +1,3 @@
|
||||
-- Obligations: Vendor-Verknuepfung fuer Art. 28 DSGVO
|
||||
ALTER TABLE compliance_obligations
|
||||
ADD COLUMN IF NOT EXISTS linked_vendor_ids JSONB DEFAULT '[]'::jsonb;
|
||||
@@ -0,0 +1,3 @@
|
||||
-- Loeschfristen: Vendor-Verknuepfung
|
||||
ALTER TABLE compliance_loeschfristen
|
||||
ADD COLUMN IF NOT EXISTS linked_vendor_ids JSONB DEFAULT '[]'::jsonb;
|
||||
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,873 @@
|
||||
-- Migration 073: Module Document Templates
|
||||
-- Reference templates for VVT, TOM, Loeschfristen and Pflichten modules
|
||||
-- These match the structure of the module-specific document generators
|
||||
-- and enable versioning in the document-generator
|
||||
|
||||
-- ===========================================================================
|
||||
-- Template 1: VVT — Verarbeitungsverzeichnis (Art. 30 DSGVO)
|
||||
-- ===========================================================================
|
||||
INSERT INTO compliance_legal_templates (
|
||||
id, tenant_id, document_type, title, description, content,
|
||||
placeholders, language, jurisdiction,
|
||||
license_id, license_name, source_name,
|
||||
attribution_required, is_complete_document, version, status,
|
||||
created_at, updated_at
|
||||
) SELECT
|
||||
gen_random_uuid(),
|
||||
'9282a473-5c95-4b3a-bf78-0ecc0ec71d3e',
|
||||
'vvt_register',
|
||||
'Verarbeitungsverzeichnis (Art. 30 DSGVO)',
|
||||
'Vollstaendiges Verzeichnis von Verarbeitungstaetigkeiten gemaess Art. 30 Abs. 1 DSGVO. Dokumentiert alle Verarbeitungen mit Rechtsgrundlagen, Datenkategorien, Empfaengern, Drittlandtransfers und Loeschfristen.',
|
||||
$template$# Verarbeitungsverzeichnis (Art. 30 DSGVO)
|
||||
|
||||
## Dokumentenkontrolle
|
||||
|
||||
| Feld | Wert |
|
||||
|------|------|
|
||||
| Unternehmen | {{COMPANY_NAME}} |
|
||||
| Dokumenttyp | Verzeichnis von Verarbeitungstaetigkeiten |
|
||||
| Version | {{DOCUMENT_VERSION}} |
|
||||
| Datum | {{VERSION_DATE}} |
|
||||
| Klassifizierung | Vertraulich |
|
||||
| Datenschutzbeauftragter | {{DPO_NAME}} |
|
||||
| Kontakt DSB | {{DPO_CONTACT}} |
|
||||
| Verantwortlicher | {{RESPONSIBLE_PERSON}} |
|
||||
| Naechste Pruefung | {{NEXT_REVIEW_DATE}} |
|
||||
|
||||
### Aenderungshistorie
|
||||
|
||||
| Version | Datum | Autor | Aenderung |
|
||||
|---------|-------|-------|-----------|
|
||||
| {{DOCUMENT_VERSION}} | {{VERSION_DATE}} | {{DPO_NAME}} | Erstfassung |
|
||||
|
||||
---
|
||||
|
||||
## 1. Ziel und Zweck
|
||||
|
||||
Dieses Verarbeitungsverzeichnis dient der Dokumentation aller Verarbeitungstaetigkeiten von **{{COMPANY_NAME}}** gemaess Art. 30 Abs. 1 DSGVO. Es enthaelt saemtliche Pflichtangaben und wird regelmaessig auf Vollstaendigkeit und Aktualitaet geprueft.
|
||||
|
||||
### Gesetzliche Grundlage
|
||||
|
||||
| Rechtsgrundlage | Inhalt |
|
||||
|-----------------|--------|
|
||||
| **Art. 30 Abs. 1 DSGVO** | Pflicht des Verantwortlichen, ein Verzeichnis aller Verarbeitungstaetigkeiten zu fuehren |
|
||||
| **Art. 30 Abs. 2 DSGVO** | Pflicht des Auftragsverarbeiters, ein Verzeichnis aller Kategorien von Verarbeitungstaetigkeiten zu fuehren |
|
||||
| **Art. 30 Abs. 4 DSGVO** | Bereitstellungspflicht gegenueber der Aufsichtsbehoerde |
|
||||
| **Art. 5 Abs. 2 DSGVO** | Rechenschaftspflicht — Nachweis der Einhaltung der DSGVO-Grundsaetze |
|
||||
|
||||
---
|
||||
|
||||
## 2. Organisation und Verantwortlichkeiten
|
||||
|
||||
| Rolle | Person / Abteilung |
|
||||
|-------|--------------------|
|
||||
| Verantwortlicher (Art. 4 Nr. 7) | {{RESPONSIBLE_PERSON}} |
|
||||
| Datenschutzbeauftragter (Art. 37-39) | {{DPO_NAME}} ({{DPO_CONTACT}}) |
|
||||
| VVT-Pflege | Fachabteilungen in Abstimmung mit DSB |
|
||||
|
||||
**Hinweis:** Jede Fachabteilung ist verpflichtet, neue Verarbeitungstaetigkeiten vor deren Beginn beim DSB zu melden. Aenderungen an bestehenden Verarbeitungen sind unverzueglich zu kommunizieren.
|
||||
|
||||
---
|
||||
|
||||
## 3. Verarbeitungstaetigkeiten (Art. 30 Abs. 1)
|
||||
|
||||
### Pflichtangaben je Verarbeitungstaetigkeit
|
||||
|
||||
Fuer jede Verarbeitungstaetigkeit werden folgende Pflichtfelder nach Art. 30 DSGVO dokumentiert:
|
||||
|
||||
| Pflichtfeld (Art. 30) | Beschreibung |
|
||||
|------------------------|-------------|
|
||||
| **VVT-Nr.** | Eindeutige Kennung der Verarbeitungstaetigkeit |
|
||||
| **Bezeichnung** | Bezeichnung der Verarbeitungstaetigkeit |
|
||||
| **Verantwortlicher** | Name und Kontaktdaten des Verantwortlichen |
|
||||
| **Geschaeftsbereich** | Zustaendige Organisationseinheit |
|
||||
| **Zwecke der Verarbeitung** | Beschreibung aller Verarbeitungszwecke |
|
||||
| **Rechtsgrundlage(n)** | Art. 6 Abs. 1 lit. a-f DSGVO; ggf. Art. 9 Abs. 2 DSGVO |
|
||||
| **Kategorien betroffener Personen** | z.B. Mitarbeiter, Kunden, Lieferanten, Schueler |
|
||||
| **Kategorien personenbezogener Daten** | z.B. Stammdaten, Kontaktdaten, Vertragsdaten; Art. 9-Kategorien gesondert kennzeichnen |
|
||||
| **Empfaengerkategorien** | Intern, extern, Auftragsverarbeiter, Behoerden |
|
||||
| **Uebermittlung an Drittlaender** | Zielland, Empfaenger, Transfermechanismus (Art. 44-49) |
|
||||
| **Loeschfristen** | Vorgesehene Fristen fuer die Loeschung, Rechtsgrundlage, Verfahren |
|
||||
| **TOM (Art. 32)** | Beschreibung der technischen und organisatorischen Massnahmen |
|
||||
|
||||
### Verarbeitungsuebersicht
|
||||
|
||||
*Die konkreten Verarbeitungstaetigkeiten werden vom VVT-Modul automatisch in das Dokument eingefuegt. Jede Verarbeitungstaetigkeit wird als separate Detailkarte mit allen Pflichtfeldern dargestellt.*
|
||||
|
||||
| VVT-Nr. | Bezeichnung | Geschaeftsbereich | Rechtsgrundlage | Status |
|
||||
|----------|-------------|-------------------|-----------------|--------|
|
||||
| *Wird automatisch befuellt* | | | | |
|
||||
|
||||
### Detailkarten
|
||||
|
||||
Fuer jede Verarbeitungstaetigkeit wird eine Detailkarte erstellt mit:
|
||||
|
||||
- Alle Pflichtangaben nach Art. 30 in tabellarischer Form
|
||||
- Kennzeichnung besonderer Kategorien (Art. 9 DSGVO)
|
||||
- Kennzeichnung DSFA-Pflicht (Art. 35 DSGVO)
|
||||
- Kennzeichnung Drittlanduebermittlung (Art. 44-49 DSGVO)
|
||||
- Strukturierte TOMs nach Kategorie (Zugriffskontrolle, Vertraulichkeit, Integritaet, Verfuegbarkeit, Trennbarkeit)
|
||||
- Schutzniveau und Deployment-Modell
|
||||
|
||||
---
|
||||
|
||||
## 4. Auftragsverarbeiter (Art. 30 Abs. 2)
|
||||
|
||||
Sofern **{{COMPANY_NAME}}** als Auftragsverarbeiter taetig ist, wird ein separates Verzeichnis nach Art. 30 Abs. 2 DSGVO gefuehrt. Dieses enthaelt:
|
||||
|
||||
| Pflichtfeld (Art. 30 Abs. 2) | Beschreibung |
|
||||
|-------------------------------|-------------|
|
||||
| Name und Kontaktdaten des Auftragsverarbeiters | {{COMPANY_NAME}} |
|
||||
| Kategorien von Verarbeitungen | Art der im Auftrag durchgefuehrten Verarbeitungen |
|
||||
| Name und Kontaktdaten des Verantwortlichen | Auftraggeber |
|
||||
| Uebermittlungen in Drittlaender | Zielland, Empfaenger, Garantien |
|
||||
| Technische und organisatorische Massnahmen | Art. 32 DSGVO |
|
||||
|
||||
---
|
||||
|
||||
## 5. TOM-Beschreibung (Art. 32 DSGVO)
|
||||
|
||||
Fuer jede Verarbeitungstaetigkeit werden die technischen und organisatorischen Massnahmen dokumentiert:
|
||||
|
||||
| Kategorie | Beschreibung |
|
||||
|-----------|-------------|
|
||||
| **Zugriffskontrolle** | Massnahmen zur Steuerung des Zugriffs auf personenbezogene Daten |
|
||||
| **Vertraulichkeit** | Verschluesselung, Pseudonymisierung, Zutrittskontrolle |
|
||||
| **Integritaet** | Eingabekontrolle, Weitergabekontrolle, Protokollierung |
|
||||
| **Verfuegbarkeit** | Backup, Redundanz, Disaster Recovery |
|
||||
| **Trennbarkeit** | Mandantentrennung, Zweckbindung |
|
||||
|
||||
**Verweis:** Die vollstaendige TOM-Dokumentation wird im separaten TOM-Modul gefuehrt und hier je Verarbeitungstaetigkeit referenziert.
|
||||
|
||||
---
|
||||
|
||||
## 6. Pruefverfahren und Revision
|
||||
|
||||
| Eigenschaft | Wert |
|
||||
|-------------|------|
|
||||
| Pruefintervall | Jaehrlich |
|
||||
| Letzte Pruefung | {{VERSION_DATE}} |
|
||||
| Naechste Pruefung | {{NEXT_REVIEW_DATE}} |
|
||||
| Aktuelle Version | {{DOCUMENT_VERSION}} |
|
||||
|
||||
### Pruefpunkte
|
||||
|
||||
Bei jeder Pruefung wird das VVT auf folgende Punkte ueberprueft:
|
||||
|
||||
- Vollstaendigkeit: Sind alle Verarbeitungstaetigkeiten erfasst?
|
||||
- Aktualitaet: Stimmen die Angaben noch mit der Praxis ueberein?
|
||||
- Art. 30-Konformitaet: Enthalten alle Eintraege die Pflichtangaben?
|
||||
- Art. 9-Kennzeichnung: Sind besondere Kategorien korrekt markiert?
|
||||
- Drittlandtransfers: Sind Transfermechanismen dokumentiert?
|
||||
- Loeschfristen: Sind Aufbewahrungsfristen definiert und aktuell?
|
||||
- TOM-Verweise: Sind Massnahmen je Verarbeitung beschrieben?
|
||||
|
||||
---
|
||||
|
||||
*Dieses Dokument wird automatisch vom VVT-Modul generiert und enthaelt alle erfassten Verarbeitungstaetigkeiten mit vollstaendigen Pflichtangaben nach Art. 30 DSGVO.*
|
||||
|
||||
*Erstellt mit BreakPilot Compliance — {{COMPANY_NAME}} | Stand: {{VERSION_DATE}} | Version {{DOCUMENT_VERSION}}*
|
||||
$template$,
|
||||
'["COMPANY_NAME","DPO_NAME","DPO_CONTACT","RESPONSIBLE_PERSON","DOCUMENT_VERSION","VERSION_DATE","NEXT_REVIEW_DATE"]'::jsonb,
|
||||
'de', 'DE',
|
||||
'mit', 'MIT License', 'BreakPilot Compliance',
|
||||
false, true, '1.0.0', 'published',
|
||||
NOW(), NOW()
|
||||
WHERE NOT EXISTS (
|
||||
SELECT 1 FROM compliance_legal_templates
|
||||
WHERE document_type = 'vvt_register'
|
||||
AND tenant_id = '9282a473-5c95-4b3a-bf78-0ecc0ec71d3e'
|
||||
);
|
||||
|
||||
-- ===========================================================================
|
||||
-- Template 2: TOM — TOM-Dokumentation (Art. 32 DSGVO)
|
||||
-- ===========================================================================
|
||||
INSERT INTO compliance_legal_templates (
|
||||
id, tenant_id, document_type, title, description, content,
|
||||
placeholders, language, jurisdiction,
|
||||
license_id, license_name, source_name,
|
||||
attribution_required, is_complete_document, version, status,
|
||||
created_at, updated_at
|
||||
) SELECT
|
||||
gen_random_uuid(),
|
||||
'9282a473-5c95-4b3a-bf78-0ecc0ec71d3e',
|
||||
'tom_documentation',
|
||||
'TOM-Dokumentation (Art. 32 DSGVO)',
|
||||
'Dokumentation aller technischen und organisatorischen Massnahmen gemaess Art. 32 DSGVO. Umfasst Schutzbedarf, Risikoprofil, Massnahmenkatalog nach Kategorie, SDM-Gewaehrleistungsziele und Compliance-Status.',
|
||||
$template$# TOM-Dokumentation (Art. 32 DSGVO)
|
||||
|
||||
## Dokumentenkontrolle
|
||||
|
||||
| Feld | Wert |
|
||||
|------|------|
|
||||
| Unternehmen | {{COMPANY_NAME}} |
|
||||
| Dokumenttyp | Technische und Organisatorische Massnahmen |
|
||||
| Version | {{DOCUMENT_VERSION}} |
|
||||
| Datum | {{VERSION_DATE}} |
|
||||
| Klassifizierung | Vertraulich |
|
||||
| IT-Sicherheitsbeauftragter | {{ISB_NAME}} |
|
||||
| Datenschutzbeauftragter | {{DPO_NAME}} |
|
||||
| Geschaeftsfuehrung | {{GF_NAME}} |
|
||||
| Naechste Pruefung | {{NEXT_REVIEW_DATE}} |
|
||||
|
||||
### Aenderungshistorie
|
||||
|
||||
| Version | Datum | Autor | Aenderung |
|
||||
|---------|-------|-------|-----------|
|
||||
| {{DOCUMENT_VERSION}} | {{VERSION_DATE}} | {{ISB_NAME}} | Erstfassung |
|
||||
|
||||
---
|
||||
|
||||
## 1. Ziel und Zweck
|
||||
|
||||
Diese TOM-Dokumentation beschreibt die technischen und organisatorischen Massnahmen zum Schutz personenbezogener Daten bei **{{COMPANY_NAME}}**. Sie dient der Umsetzung folgender DSGVO-Anforderungen:
|
||||
|
||||
| Rechtsgrundlage | Inhalt |
|
||||
|-----------------|--------|
|
||||
| **Art. 32 Abs. 1 lit. a DSGVO** | Pseudonymisierung und Verschluesselung personenbezogener Daten |
|
||||
| **Art. 32 Abs. 1 lit. b DSGVO** | Vertraulichkeit, Integritaet, Verfuegbarkeit und Belastbarkeit der Systeme auf Dauer sicherstellen |
|
||||
| **Art. 32 Abs. 1 lit. c DSGVO** | Rasche Wiederherstellung der Verfuegbarkeit bei physischem oder technischem Zwischenfall |
|
||||
| **Art. 32 Abs. 1 lit. d DSGVO** | Regelmaessige Ueberpruefung, Bewertung und Evaluierung der Wirksamkeit der Massnahmen |
|
||||
|
||||
Die TOM-Dokumentation ist fester Bestandteil des Datenschutz-Managementsystems und wird regelmaessig ueberprueft und aktualisiert.
|
||||
|
||||
---
|
||||
|
||||
## 2. Geltungsbereich
|
||||
|
||||
Diese TOM-Dokumentation gilt fuer alle IT-Systeme, Anwendungen und Verarbeitungsprozesse von **{{COMPANY_NAME}}**. Die dokumentierten Massnahmen stammen aus zwei Quellen:
|
||||
|
||||
- **Embedded Library (TOM-xxx):** Integrierte Kontrollbibliothek mit spezifischen Massnahmen fuer Art. 32 DSGVO
|
||||
- **Canonical Control Library (CP-CLIB):** Uebergreifende Kontrollbibliothek mit framework-uebergreifenden Massnahmen
|
||||
|
||||
---
|
||||
|
||||
## 3. Grundprinzipien Art. 32
|
||||
|
||||
- **Vertraulichkeit:** Schutz personenbezogener Daten vor unbefugter Kenntnisnahme durch Zutrittskontrolle, Zugangskontrolle, Zugriffskontrolle und Verschluesselung (Art. 32 Abs. 1 lit. b DSGVO).
|
||||
- **Integritaet:** Sicherstellung, dass personenbezogene Daten nicht unbefugt oder unbeabsichtigt veraendert werden koennen, durch Eingabekontrolle, Weitergabekontrolle und Protokollierung (Art. 32 Abs. 1 lit. b DSGVO).
|
||||
- **Verfuegbarkeit und Belastbarkeit:** Gewaehrleistung, dass Systeme und Dienste bei Lastspitzen und Stoerungen zuverlaessig funktionieren, durch Backup, Redundanz und Disaster Recovery (Art. 32 Abs. 1 lit. b DSGVO).
|
||||
- **Rasche Wiederherstellbarkeit:** Faehigkeit, nach einem physischen oder technischen Zwischenfall Daten und Systeme schnell wiederherzustellen, durch getestete Recovery-Prozesse (Art. 32 Abs. 1 lit. c DSGVO).
|
||||
- **Regelmaessige Wirksamkeitspruefung:** Verfahren zur regelmaessigen Ueberpruefung, Bewertung und Evaluierung der Wirksamkeit aller technischen und organisatorischen Massnahmen (Art. 32 Abs. 1 lit. d DSGVO).
|
||||
|
||||
---
|
||||
|
||||
## 4. Schutzbedarf und Risikoanalyse
|
||||
|
||||
Die Schutzbedarfsanalyse bildet die Grundlage fuer die Auswahl und Priorisierung der Massnahmen.
|
||||
|
||||
| Kriterium | Bewertung |
|
||||
|-----------|-----------|
|
||||
| Vertraulichkeit | *Wird vom TOM-Generator automatisch ermittelt* |
|
||||
| Integritaet | *Wird vom TOM-Generator automatisch ermittelt* |
|
||||
| Verfuegbarkeit | *Wird vom TOM-Generator automatisch ermittelt* |
|
||||
| Schutzniveau | *Basiert auf CIA-Bewertung* |
|
||||
| DSFA-Pflicht | *Wird automatisch berechnet* |
|
||||
|
||||
**Hinweis:** Die detaillierte Schutzbedarfsanalyse wird im TOM-Modul ueber den Risiko-Wizard durchgefuehrt. Die Ergebnisse fliessen automatisch in die Massnahmenauswahl ein.
|
||||
|
||||
---
|
||||
|
||||
## 5. Massnahmenkatalog
|
||||
|
||||
### 5.1 Zutrittskontrolle
|
||||
|
||||
Massnahmen zur Verhinderung des unbefugten Zutritts zu Datenverarbeitungsanlagen.
|
||||
|
||||
| Massnahme | Typ | Status | Verantwortlich |
|
||||
|-----------|-----|--------|----------------|
|
||||
| *Wird automatisch aus dem TOM-Modul befuellt* | | | |
|
||||
|
||||
### 5.2 Zugangskontrolle
|
||||
|
||||
Massnahmen zur Verhinderung der unbefugten Nutzung von Datenverarbeitungssystemen.
|
||||
|
||||
| Massnahme | Typ | Status | Verantwortlich |
|
||||
|-----------|-----|--------|----------------|
|
||||
| *Wird automatisch aus dem TOM-Modul befuellt* | | | |
|
||||
|
||||
### 5.3 Zugriffskontrolle
|
||||
|
||||
Massnahmen, die gewaehrleisten, dass ausschliesslich berechtigte Personen auf Daten zugreifen koennen.
|
||||
|
||||
| Massnahme | Typ | Status | Verantwortlich |
|
||||
|-----------|-----|--------|----------------|
|
||||
| *Wird automatisch aus dem TOM-Modul befuellt* | | | |
|
||||
|
||||
### 5.4 Weitergabekontrolle
|
||||
|
||||
Massnahmen zum Schutz personenbezogener Daten bei elektronischer Uebertragung und Transport.
|
||||
|
||||
| Massnahme | Typ | Status | Verantwortlich |
|
||||
|-----------|-----|--------|----------------|
|
||||
| *Wird automatisch aus dem TOM-Modul befuellt* | | | |
|
||||
|
||||
### 5.5 Eingabekontrolle
|
||||
|
||||
Massnahmen zur nachtraeglichen Ueberpruefung, ob und von wem Daten eingegeben, veraendert oder entfernt worden sind.
|
||||
|
||||
| Massnahme | Typ | Status | Verantwortlich |
|
||||
|-----------|-----|--------|----------------|
|
||||
| *Wird automatisch aus dem TOM-Modul befuellt* | | | |
|
||||
|
||||
### 5.6 Auftragskontrolle
|
||||
|
||||
Massnahmen, die gewaehrleisten, dass personenbezogene Daten nur entsprechend den Weisungen des Auftraggebers verarbeitet werden.
|
||||
|
||||
| Massnahme | Typ | Status | Verantwortlich |
|
||||
|-----------|-----|--------|----------------|
|
||||
| *Wird automatisch aus dem TOM-Modul befuellt* | | | |
|
||||
|
||||
### 5.7 Verschluesselung und Pseudonymisierung
|
||||
|
||||
Massnahmen zur Pseudonymisierung und Verschluesselung personenbezogener Daten (Art. 32 Abs. 1 lit. a DSGVO).
|
||||
|
||||
| Massnahme | Typ | Status | Verantwortlich |
|
||||
|-----------|-----|--------|----------------|
|
||||
| *Wird automatisch aus dem TOM-Modul befuellt* | | | |
|
||||
|
||||
### 5.8 Verfuegbarkeit und Belastbarkeit
|
||||
|
||||
Massnahmen zur Gewaehrleistung der Verfuegbarkeit und Belastbarkeit der Systeme (Art. 32 Abs. 1 lit. b DSGVO).
|
||||
|
||||
| Massnahme | Typ | Status | Verantwortlich |
|
||||
|-----------|-----|--------|----------------|
|
||||
| *Wird automatisch aus dem TOM-Modul befuellt* | | | |
|
||||
|
||||
### 5.9 Wiederherstellbarkeit
|
||||
|
||||
Massnahmen zur raschen Wiederherstellung der Verfuegbarkeit nach einem Zwischenfall (Art. 32 Abs. 1 lit. c DSGVO).
|
||||
|
||||
| Massnahme | Typ | Status | Verantwortlich |
|
||||
|-----------|-----|--------|----------------|
|
||||
| *Wird automatisch aus dem TOM-Modul befuellt* | | | |
|
||||
|
||||
### 5.10 Ueberpruefung und Bewertung
|
||||
|
||||
Verfahren zur regelmaessigen Ueberpruefung, Bewertung und Evaluierung (Art. 32 Abs. 1 lit. d DSGVO).
|
||||
|
||||
| Massnahme | Typ | Status | Verantwortlich |
|
||||
|-----------|-----|--------|----------------|
|
||||
| *Wird automatisch aus dem TOM-Modul befuellt* | | | |
|
||||
|
||||
---
|
||||
|
||||
## 6. SDM Gewaehrleistungsziele
|
||||
|
||||
Das Standard-Datenschutzmodell (SDM) definiert sieben Gewaehrleistungsziele. Die implementierten Massnahmen decken folgende Ziele ab:
|
||||
|
||||
| Gewaehrleistungsziel | Abgedeckt | Gesamt | Abdeckung (%) |
|
||||
|----------------------|-----------|--------|---------------|
|
||||
| Verfuegbarkeit | *automatisch* | | |
|
||||
| Integritaet | *automatisch* | | |
|
||||
| Vertraulichkeit | *automatisch* | | |
|
||||
| Nichtverkettung | *automatisch* | | |
|
||||
| Intervenierbarkeit | *automatisch* | | |
|
||||
| Transparenz | *automatisch* | | |
|
||||
| Datenminimierung | *automatisch* | | |
|
||||
|
||||
---
|
||||
|
||||
## 7. Verantwortlichkeiten
|
||||
|
||||
| Rolle | Aufgabe |
|
||||
|-------|---------|
|
||||
| Geschaeftsfuehrung ({{GF_NAME}}) | Gesamtverantwortung, Freigabe der TOM-Dokumentation |
|
||||
| IT-Sicherheitsbeauftragter ({{ISB_NAME}}) | Pflege und Umsetzung technischer Massnahmen |
|
||||
| Datenschutzbeauftragter ({{DPO_NAME}}) | Ueberwachung, Beratung, Compliance-Check |
|
||||
| Fachabteilungen | Umsetzung organisatorischer Massnahmen, Meldepflicht |
|
||||
|
||||
---
|
||||
|
||||
## 8. Compliance-Status
|
||||
|
||||
*Der aktuelle Compliance-Score wird vom TOM-Modul automatisch berechnet und enthaelt Befunde nach Schweregrad (Kritisch, Hoch, Mittel, Niedrig).*
|
||||
|
||||
| Kennzahl | Wert |
|
||||
|----------|------|
|
||||
| Gepruefte Massnahmen | *automatisch* |
|
||||
| Bestanden | *automatisch* |
|
||||
| Beanstandungen | *automatisch* |
|
||||
|
||||
---
|
||||
|
||||
## 9. Pruef- und Revisionszyklus
|
||||
|
||||
| Eigenschaft | Wert |
|
||||
|-------------|------|
|
||||
| Pruefintervall | Jaehrlich |
|
||||
| Letzte Pruefung | {{VERSION_DATE}} |
|
||||
| Naechste Pruefung | {{NEXT_REVIEW_DATE}} |
|
||||
| Aktuelle Version | {{DOCUMENT_VERSION}} |
|
||||
|
||||
### Pruefpunkte
|
||||
|
||||
- Vollstaendigkeit aller Massnahmen (neue Systeme oder Verarbeitungen erfasst?)
|
||||
- Aktualitaet des Umsetzungsstatus (Aenderungen seit letzter Pruefung?)
|
||||
- Wirksamkeit der technischen Massnahmen (Penetration-Tests, Audit-Ergebnisse)
|
||||
- Angemessenheit der organisatorischen Massnahmen (Schulungen, Richtlinien aktuell?)
|
||||
- Abdeckung aller SDM-Gewaehrleistungsziele
|
||||
- Zuordnung von Verantwortlichkeiten zu allen Massnahmen
|
||||
|
||||
---
|
||||
|
||||
*Dieses Dokument wird automatisch vom TOM-Modul generiert und enthaelt alle erfassten technischen und organisatorischen Massnahmen nach Art. 32 DSGVO.*
|
||||
|
||||
*Erstellt mit BreakPilot Compliance — {{COMPANY_NAME}} | Stand: {{VERSION_DATE}} | Version {{DOCUMENT_VERSION}}*
|
||||
$template$,
|
||||
'["COMPANY_NAME","ISB_NAME","GF_NAME","DPO_NAME","DOCUMENT_VERSION","VERSION_DATE","NEXT_REVIEW_DATE"]'::jsonb,
|
||||
'de', 'DE',
|
||||
'mit', 'MIT License', 'BreakPilot Compliance',
|
||||
false, true, '1.0.0', 'published',
|
||||
NOW(), NOW()
|
||||
WHERE NOT EXISTS (
|
||||
SELECT 1 FROM compliance_legal_templates
|
||||
WHERE document_type = 'tom_documentation'
|
||||
AND tenant_id = '9282a473-5c95-4b3a-bf78-0ecc0ec71d3e'
|
||||
);
|
||||
|
||||
-- ===========================================================================
|
||||
-- Template 3: Loeschkonzept (Art. 5/17 DSGVO)
|
||||
-- ===========================================================================
|
||||
INSERT INTO compliance_legal_templates (
|
||||
id, tenant_id, document_type, title, description, content,
|
||||
placeholders, language, jurisdiction,
|
||||
license_id, license_name, source_name,
|
||||
attribution_required, is_complete_document, version, status,
|
||||
created_at, updated_at
|
||||
) SELECT
|
||||
gen_random_uuid(),
|
||||
'9282a473-5c95-4b3a-bf78-0ecc0ec71d3e',
|
||||
'loeschkonzept',
|
||||
'Loeschkonzept (Art. 5/17 DSGVO)',
|
||||
'Systematisches Loeschkonzept gemaess Art. 5 Abs. 1 lit. e und Art. 17 DSGVO. Dokumentiert Loeschregeln, Aufbewahrungstreiber, Loeschmethoden, Legal Holds und Auftragsverarbeiter-Verknuepfungen.',
|
||||
$template$# Loeschkonzept (Art. 5/17 DSGVO)
|
||||
|
||||
## Dokumentenkontrolle
|
||||
|
||||
| Feld | Wert |
|
||||
|------|------|
|
||||
| Unternehmen | {{COMPANY_NAME}} |
|
||||
| Dokumenttyp | Loeschkonzept |
|
||||
| Version | {{DOCUMENT_VERSION}} |
|
||||
| Datum | {{VERSION_DATE}} |
|
||||
| Klassifizierung | Vertraulich |
|
||||
| Datenschutzbeauftragter | {{DPO_NAME}} |
|
||||
| Kontakt DSB | {{DPO_CONTACT}} |
|
||||
| Naechste Pruefung | {{NEXT_REVIEW_DATE}} |
|
||||
|
||||
### Aenderungshistorie
|
||||
|
||||
| Version | Datum | Autor | Aenderung |
|
||||
|---------|-------|-------|-----------|
|
||||
| {{DOCUMENT_VERSION}} | {{VERSION_DATE}} | {{DPO_NAME}} | Erstfassung |
|
||||
|
||||
---
|
||||
|
||||
## 1. Ziel und Zweck
|
||||
|
||||
Dieses Loeschkonzept definiert die systematischen Regeln und Verfahren fuer die Loeschung personenbezogener Daten bei **{{COMPANY_NAME}}**. Es dient der Umsetzung folgender DSGVO-Anforderungen:
|
||||
|
||||
| Rechtsgrundlage | Inhalt |
|
||||
|-----------------|--------|
|
||||
| **Art. 5 Abs. 1 lit. e DSGVO** | Grundsatz der Speicherbegrenzung — Daten nur so lange speichern, wie fuer den Zweck erforderlich |
|
||||
| **Art. 17 DSGVO** | Recht auf Loeschung ("Recht auf Vergessenwerden") — Betroffene koennen Loeschung verlangen |
|
||||
| **Art. 30 DSGVO** | Verzeichnis von Verarbeitungstaetigkeiten — Loeschfristen muessen dokumentiert werden |
|
||||
| **Art. 25 DSGVO** | Datenschutz durch Technikgestaltung — Loeschmechanismen moeglichst automatisiert |
|
||||
|
||||
Das Loeschkonzept ist fester Bestandteil des Datenschutz-Managementsystems und wird regelmaessig ueberprueft und aktualisiert.
|
||||
|
||||
---
|
||||
|
||||
## 2. Rechtsgrundlagen und Aufbewahrungstreiber
|
||||
|
||||
### Gesetzliche Aufbewahrungspflichten
|
||||
|
||||
| Aufbewahrungstreiber | Gesetz / Vorschrift | Frist |
|
||||
|----------------------|---------------------|-------|
|
||||
| Handelsrechtliche Aufbewahrung | § 257 HGB | 6 Jahre (Handelsbriefe), 10 Jahre (Buchungsbelege) |
|
||||
| Steuerrechtliche Aufbewahrung | § 147 AO | 6 Jahre (Geschaeftsbriefe), 10 Jahre (Buchungsbelege) |
|
||||
| Arbeitsrechtliche Aufbewahrung | Diverse arbeitsrechtliche Vorschriften | 3-10 Jahre je nach Dokumenttyp |
|
||||
| Sozialversicherungsrechtlich | §§ 28f, 110 SGB IV | 5 Jahre |
|
||||
| Produkthaftung | § 10 ProdHaftG | 10 Jahre |
|
||||
| Beweissicherung | §§ 195-199 BGB | 3 Jahre (regelmaessige Verjaehrung) |
|
||||
|
||||
### 3-Level-Loeschlogik
|
||||
|
||||
Die Loeschung folgt einer dreistufigen Priorisierung:
|
||||
|
||||
1. **Zweckende:** Daten werden geloescht, sobald der Verarbeitungszweck entfaellt
|
||||
2. **Gesetzliche Aufbewahrungspflichten:** Laengere Fristen aus HGB, AO etc. ueberschreiben Zweckende
|
||||
3. **Legal Hold:** Aufbewahrungspflicht aufgrund rechtlicher Verfahren setzt alle anderen Fristen aus
|
||||
|
||||
---
|
||||
|
||||
## 3. Datenkategorien und Fristen
|
||||
|
||||
### Loeschregeln-Uebersicht
|
||||
|
||||
| LF-Nr. | Datenobjekt | Loeschtrigger | Aufbewahrungsfrist | Loeschmethode | Status |
|
||||
|--------|-------------|---------------|--------------------|--------------:|--------|
|
||||
| *Wird automatisch vom Loeschfristen-Modul befuellt* | | | | | |
|
||||
|
||||
### Detaillierte Loeschregeln
|
||||
|
||||
Fuer jede Loeschregel werden folgende Informationen dokumentiert:
|
||||
|
||||
| Feld | Beschreibung |
|
||||
|------|-------------|
|
||||
| Beschreibung | Detaillierte Beschreibung der betroffenen Daten |
|
||||
| Betroffenengruppen | Kategorien betroffener Personen |
|
||||
| Datenkategorien | Art der personenbezogenen Daten |
|
||||
| Verarbeitungszweck | Primaerer Zweck der Datenverarbeitung |
|
||||
| Loeschtrigger | Ereignis, das die Loeschfrist ausloest |
|
||||
| Aufbewahrungstreiber | Gesetzliche Grundlage fuer die Aufbewahrung |
|
||||
| Aufbewahrungsfrist | Dauer der Aufbewahrung mit Einheit |
|
||||
| Startereignis | Beginn der Fristberechnung |
|
||||
| Loeschmethode | Technisches Verfahren (Loeschung, Anonymisierung, Vernichtung) |
|
||||
| Speicherorte | Betroffene Systeme und Datenbanken |
|
||||
| Verantwortlich | Person oder Rolle |
|
||||
| Pruefintervall | Frequenz der Kontrolle |
|
||||
|
||||
---
|
||||
|
||||
## 4. Loeschmethoden
|
||||
|
||||
| Methode | Beschreibung | Anwendung |
|
||||
|---------|-------------|-----------|
|
||||
| **Physische Loeschung** | Unwiderrufliches Entfernen der Daten aus allen Systemen | Standard fuer nicht mehr benoetigte Daten |
|
||||
| **Anonymisierung** | Entfernen des Personenbezugs, sodass Daten nicht mehr zuordenbar sind | Statistik, Forschung, Archivierung |
|
||||
| **Pseudonymisierung** | Ersetzen identifizierender Merkmale durch Pseudonyme | Zwischenschritt, kein Ersatz fuer Loeschung |
|
||||
| **Physische Vernichtung** | Physische Zerstoerung der Datentraeger (Shredding, Degaussing) | Datentraeger-Entsorgung |
|
||||
| **Kryptographische Loeschung** | Vernichtung der Schluessel bei verschluesselten Daten | Cloud-Umgebungen, verschluesselte Backups |
|
||||
|
||||
---
|
||||
|
||||
## 5. Verantwortlichkeiten
|
||||
|
||||
| Rolle | Aufgabe |
|
||||
|-------|---------|
|
||||
| Datenschutzbeauftragter ({{DPO_NAME}}) | Ueberwachung, Beratung, Compliance-Pruefung |
|
||||
| Fachabteilungen | Definition der Zweckende, Meldung neuer Datenkategorien |
|
||||
| IT-Abteilung | Technische Umsetzung der Loeschmechanismen |
|
||||
| Rechtsabteilung | Bewertung gesetzlicher Aufbewahrungspflichten, Legal Hold |
|
||||
|
||||
---
|
||||
|
||||
## 6. Legal Hold Verfahren
|
||||
|
||||
Ein Legal Hold setzt die regulaere Loeschung aus. Betroffene Daten duerfen trotz abgelaufener Frist nicht geloescht werden, bis der Hold aufgehoben wird.
|
||||
|
||||
### Verfahrensschritte
|
||||
|
||||
1. Rechtsabteilung / DSB identifiziert betroffene Datenkategorien
|
||||
2. Legal Hold wird im System aktiviert (Status: Aktiv)
|
||||
3. Automatische Loeschung wird fuer betroffene Policies ausgesetzt
|
||||
4. Regelmaessige Pruefung, ob der Legal Hold noch erforderlich ist
|
||||
5. Nach Aufhebung: Regulaere Loeschfristen greifen wieder
|
||||
|
||||
### Aktive Legal Holds
|
||||
|
||||
*Wird automatisch vom Loeschfristen-Modul befuellt. Enthaelt: Datenobjekt, Grund, Rechtsgrundlage, Beginn, voraussichtliches Ende.*
|
||||
|
||||
---
|
||||
|
||||
## 7. Auftragsverarbeiter mit Loeschpflichten
|
||||
|
||||
Loeschregeln, die mit Auftragsverarbeitern verknuepft sind, stellen sicher, dass auch bei extern verarbeiteten Daten die Loeschpflichten eingehalten werden (Art. 28 DSGVO).
|
||||
|
||||
| Loeschregel | LF-Nr. | Auftragsverarbeiter | Aufbewahrungsfrist |
|
||||
|-------------|--------|--------------------|--------------------|
|
||||
| *Wird automatisch vom Loeschfristen-Modul befuellt* | | | |
|
||||
|
||||
**Hinweis:** Die vollstaendige Auftragsverarbeiter-Dokumentation wird im Vendor-Compliance-Modul gefuehrt.
|
||||
|
||||
---
|
||||
|
||||
## 8. VVT-Verknuepfung
|
||||
|
||||
Die Loeschregeln sind mit den Verarbeitungstaetigkeiten im Verarbeitungsverzeichnis (Art. 30 DSGVO) verknuepft:
|
||||
|
||||
| Loeschregel | LF-Nr. | VVT-Nr. | Verarbeitungstaetigkeit |
|
||||
|-------------|--------|---------|-------------------------|
|
||||
| *Wird automatisch vom Loeschfristen-Modul befuellt* | | | |
|
||||
|
||||
---
|
||||
|
||||
## 9. Compliance-Status
|
||||
|
||||
*Der aktuelle Compliance-Score wird vom Loeschfristen-Modul automatisch berechnet und enthaelt Befunde nach Schweregrad (Kritisch, Hoch, Mittel, Niedrig).*
|
||||
|
||||
| Kennzahl | Wert |
|
||||
|----------|------|
|
||||
| Gepruefte Policies | *automatisch* |
|
||||
| Bestanden | *automatisch* |
|
||||
| Beanstandungen | *automatisch* |
|
||||
|
||||
---
|
||||
|
||||
## 10. Pruef- und Revisionszyklus
|
||||
|
||||
| Eigenschaft | Wert |
|
||||
|-------------|------|
|
||||
| Pruefintervall | Jaehrlich |
|
||||
| Letzte Pruefung | {{VERSION_DATE}} |
|
||||
| Naechste Pruefung | {{NEXT_REVIEW_DATE}} |
|
||||
| Aktuelle Version | {{DOCUMENT_VERSION}} |
|
||||
|
||||
### Pruefpunkte
|
||||
|
||||
- Vollstaendigkeit aller Loeschregeln (neue Verarbeitungen erfasst?)
|
||||
- Aktualitaet der gesetzlichen Aufbewahrungsfristen
|
||||
- Wirksamkeit der technischen Loeschmechanismen
|
||||
- Einhaltung der definierten Loeschfristen
|
||||
- Angemessenheit der Verantwortlichkeiten
|
||||
- VVT-Verknuepfung vollstaendig?
|
||||
|
||||
---
|
||||
|
||||
*Dieses Dokument wird automatisch vom Loeschfristen-Modul generiert und enthaelt alle erfassten Loeschregeln mit Aufbewahrungstreibern, Fristen und Verantwortlichkeiten.*
|
||||
|
||||
*Erstellt mit BreakPilot Compliance — {{COMPANY_NAME}} | Stand: {{VERSION_DATE}} | Version {{DOCUMENT_VERSION}}*
|
||||
$template$,
|
||||
'["COMPANY_NAME","DPO_NAME","DPO_CONTACT","DOCUMENT_VERSION","VERSION_DATE","NEXT_REVIEW_DATE"]'::jsonb,
|
||||
'de', 'DE',
|
||||
'mit', 'MIT License', 'BreakPilot Compliance',
|
||||
false, true, '1.0.0', 'published',
|
||||
NOW(), NOW()
|
||||
WHERE NOT EXISTS (
|
||||
SELECT 1 FROM compliance_legal_templates
|
||||
WHERE document_type = 'loeschkonzept'
|
||||
AND tenant_id = '9282a473-5c95-4b3a-bf78-0ecc0ec71d3e'
|
||||
);
|
||||
|
||||
-- ===========================================================================
|
||||
-- Template 4: Pflichtenregister (DSGVO/AI-Act)
|
||||
-- ===========================================================================
|
||||
INSERT INTO compliance_legal_templates (
|
||||
id, tenant_id, document_type, title, description, content,
|
||||
placeholders, language, jurisdiction,
|
||||
license_id, license_name, source_name,
|
||||
attribution_required, is_complete_document, version, status,
|
||||
created_at, updated_at
|
||||
) SELECT
|
||||
gen_random_uuid(),
|
||||
'9282a473-5c95-4b3a-bf78-0ecc0ec71d3e',
|
||||
'pflichtenregister',
|
||||
'Pflichtenregister (DSGVO/AI-Act)',
|
||||
'Vollstaendiges Pflichtenregister fuer alle regulatorischen Pflichten aus DSGVO, AI Act, NIS2 und BDSG. Dokumentiert Pflichten, Verantwortlichkeiten, Fristen, Nachweise und Compliance-Status.',
|
||||
$template$# Pflichtenregister (DSGVO / AI Act / NIS2)
|
||||
|
||||
## Dokumentenkontrolle
|
||||
|
||||
| Feld | Wert |
|
||||
|------|------|
|
||||
| Unternehmen | {{COMPANY_NAME}} |
|
||||
| Dokumenttyp | Pflichtenregister |
|
||||
| Version | {{DOCUMENT_VERSION}} |
|
||||
| Datum | {{VERSION_DATE}} |
|
||||
| Klassifizierung | Vertraulich |
|
||||
| Datenschutzbeauftragter | {{DPO_NAME}} |
|
||||
| Kontakt DSB | {{DPO_CONTACT}} |
|
||||
| Verantwortlicher | {{RESPONSIBLE_PERSON}} |
|
||||
| Rechtsabteilung | {{LEGAL_DEPARTMENT}} |
|
||||
| Naechste Pruefung | {{NEXT_REVIEW_DATE}} |
|
||||
|
||||
### Aenderungshistorie
|
||||
|
||||
| Version | Datum | Autor | Aenderung |
|
||||
|---------|-------|-------|-----------|
|
||||
| {{DOCUMENT_VERSION}} | {{VERSION_DATE}} | {{DPO_NAME}} | Erstfassung |
|
||||
|
||||
---
|
||||
|
||||
## 1. Ziel und Zweck
|
||||
|
||||
Dieses Pflichtenregister dokumentiert alle regulatorischen Pflichten, denen **{{COMPANY_NAME}}** unterliegt. Es dient der systematischen Erfassung, Ueberwachung und Nachverfolgung aller Compliance-Anforderungen aus den anwendbaren Regulierungen.
|
||||
|
||||
### Zwecke des Registers
|
||||
|
||||
- Vollstaendige Erfassung aller anwendbaren regulatorischen Pflichten
|
||||
- Zuordnung von Verantwortlichkeiten und Fristen
|
||||
- Nachverfolgung des Umsetzungsstatus
|
||||
- Dokumentation von Nachweisen fuer Audits
|
||||
- Identifikation von Compliance-Luecken und Handlungsbedarf
|
||||
|
||||
### Rechtsrahmen
|
||||
|
||||
| Rechtsrahmen | Relevanz |
|
||||
|-------------|----------|
|
||||
| **DSGVO (EU) 2016/679** | Datenschutz-Grundverordnung — Kernregulierung fuer personenbezogene Daten |
|
||||
| **AI Act (EU) 2024/1689** | KI-Verordnung — Anforderungen an KI-Systeme nach Risikoklasse |
|
||||
| **NIS2 (EU) 2022/2555** | Netzwerk- und Informationssicherheit — Cybersicherheitspflichten |
|
||||
| **BDSG** | Bundesdatenschutzgesetz — Nationale Ergaenzung zur DSGVO |
|
||||
|
||||
---
|
||||
|
||||
## 2. Geltungsbereich
|
||||
|
||||
Dieses Pflichtenregister gilt fuer alle Geschaeftsprozesse und IT-Systeme von **{{COMPANY_NAME}}**. Es umfasst Pflichten aus allen anwendbaren Regulierungen, gruppiert nach Rechtsquelle.
|
||||
|
||||
### Anwendbare Regulierungen
|
||||
|
||||
| Regulierung | Anzahl Pflichten | Status |
|
||||
|-------------|-----------------|--------|
|
||||
| *Wird automatisch vom Pflichtenregister-Modul befuellt* | | |
|
||||
|
||||
---
|
||||
|
||||
## 3. Methodik
|
||||
|
||||
Die Identifikation und Bewertung der Pflichten erfolgt in drei Schritten:
|
||||
|
||||
1. **Pflicht-Identifikation:** Systematische Analyse aller anwendbaren Regulierungen und Extraktion der einzelnen Pflichten mit Artikel-Referenz, Beschreibung und Zielgruppe.
|
||||
2. **Bewertung und Priorisierung:** Jede Pflicht wird nach Prioritaet (kritisch, hoch, mittel, niedrig) und Dringlichkeit (Frist) bewertet. Die Bewertung basiert auf dem Risikopotenzial bei Nichterfuellung.
|
||||
3. **Ueberwachung und Nachverfolgung:** Regelmaessige Pruefung des Umsetzungsstatus, Aktualisierung der Fristen und Dokumentation von Nachweisen.
|
||||
|
||||
Die Pflichten werden ueber einen automatisierten Compliance-Check geprueft, der 11 Kriterien umfasst (siehe Abschnitt 10: Compliance-Status).
|
||||
|
||||
---
|
||||
|
||||
## 4. Regulatorische Grundlagen
|
||||
|
||||
| Regulierung | Pflichten | Kritisch | Hoch | Mittel | Niedrig | Abgeschlossen |
|
||||
|-------------|----------|----------|------|--------|---------|---------------|
|
||||
| *Wird automatisch vom Pflichtenregister-Modul befuellt* | | | | | | |
|
||||
|
||||
---
|
||||
|
||||
## 5. Pflichtenuebersicht
|
||||
|
||||
Uebersicht aller Pflichten nach Regulierung und Status:
|
||||
|
||||
| Regulierung | Gesamt | Ausstehend | In Bearbeitung | Abgeschlossen | Ueberfaellig |
|
||||
|-------------|--------|------------|----------------|---------------|--------------|
|
||||
| *Wird automatisch vom Pflichtenregister-Modul befuellt* | | | | | |
|
||||
|
||||
---
|
||||
|
||||
## 6. Detaillierte Pflichten
|
||||
|
||||
Fuer jede Pflicht werden folgende Informationen als Detailkarte dokumentiert:
|
||||
|
||||
| Feld | Beschreibung |
|
||||
|------|-------------|
|
||||
| Rechtsquelle | Regulierung und Artikel-Referenz |
|
||||
| Beschreibung | Detaillierte Beschreibung der Pflicht |
|
||||
| Prioritaet | Kritisch / Hoch / Mittel / Niedrig |
|
||||
| Status | Ausstehend / In Bearbeitung / Abgeschlossen / Ueberfaellig |
|
||||
| Verantwortlich | Person oder Abteilung |
|
||||
| Frist | Umsetzungsfrist |
|
||||
| Nachweise | Dokumentierte Belege fuer die Umsetzung |
|
||||
| Betroffene Systeme | IT-Systeme, die von der Pflicht betroffen sind |
|
||||
| Notizen | Zusaetzliche Anmerkungen und Handlungsempfehlungen |
|
||||
|
||||
### Pflichten nach Regulierung
|
||||
|
||||
*Die einzelnen Pflichten werden vom Pflichtenregister-Modul automatisch nach Rechtsquelle gruppiert und als Detailkarten mit allen Feldern in das Dokument eingefuegt. Die Sortierung erfolgt nach Prioritaet (kritisch zuerst).*
|
||||
|
||||
---
|
||||
|
||||
## 7. Verantwortlichkeiten
|
||||
|
||||
| Verantwortlich | Pflichten | Anzahl | Davon offen |
|
||||
|----------------|----------|--------|-------------|
|
||||
| *Wird automatisch vom Pflichtenregister-Modul befuellt* | | | |
|
||||
|
||||
### Rollenmatrix
|
||||
|
||||
| Rolle | Aufgabe |
|
||||
|-------|---------|
|
||||
| Verantwortlicher ({{RESPONSIBLE_PERSON}}) | Gesamtverantwortung fuer Compliance |
|
||||
| Datenschutzbeauftragter ({{DPO_NAME}}) | Ueberwachung DSGVO-Pflichten, Beratung |
|
||||
| Rechtsabteilung ({{LEGAL_DEPARTMENT}}) | Bewertung regulatorischer Aenderungen, NIS2/AI-Act |
|
||||
| Fachabteilungen | Umsetzung zugewiesener Pflichten |
|
||||
| IT-Abteilung | Umsetzung technischer Anforderungen |
|
||||
|
||||
---
|
||||
|
||||
## 8. Fristen-Uebersicht
|
||||
|
||||
### Ueberfaellige Pflichten
|
||||
|
||||
| Pflicht | Regulierung | Frist | Tage ueberfaellig | Prioritaet |
|
||||
|---------|-------------|-------|--------------------:|-----------|
|
||||
| *Wird automatisch vom Pflichtenregister-Modul befuellt* | | | | |
|
||||
|
||||
### Anstehende Fristen
|
||||
|
||||
| Pflicht | Regulierung | Frist | Verbleibend | Verantwortlich |
|
||||
|---------|-------------|-------|-------------|----------------|
|
||||
| *Wird automatisch vom Pflichtenregister-Modul befuellt* | | | | |
|
||||
|
||||
---
|
||||
|
||||
## 9. Nachweisregister
|
||||
|
||||
Dokumentation der Nachweise (Evidence) fuer die Umsetzung der Pflichten:
|
||||
|
||||
| Pflicht | Regulierung | Nachweise | Status |
|
||||
|---------|-------------|-----------|--------|
|
||||
| *Wird automatisch vom Pflichtenregister-Modul befuellt* | | | |
|
||||
|
||||
### Pflichten ohne Nachweise
|
||||
|
||||
*Das Modul identifiziert automatisch alle Pflichten, fuer die noch keine Nachweise hinterlegt wurden, und listet diese als Handlungsbedarf auf.*
|
||||
|
||||
---
|
||||
|
||||
## 10. Compliance-Status
|
||||
|
||||
*Der aktuelle Compliance-Score wird vom Pflichtenregister-Modul automatisch berechnet. Der Check umfasst 11 Kriterien und bewertet Befunde nach Schweregrad (Kritisch, Hoch, Mittel, Niedrig).*
|
||||
|
||||
| Kennzahl | Wert |
|
||||
|----------|------|
|
||||
| Compliance-Score | *automatisch (0-100)* |
|
||||
| Befunde gesamt | *automatisch* |
|
||||
| Kritisch | *automatisch* |
|
||||
| Hoch | *automatisch* |
|
||||
| Mittel | *automatisch* |
|
||||
| Niedrig | *automatisch* |
|
||||
|
||||
### Befunde und Empfehlungen
|
||||
|
||||
| Schweregrad | Befund | Betroffene Pflichten | Empfehlung |
|
||||
|-------------|--------|---------------------|------------|
|
||||
| *Wird automatisch vom Compliance-Check befuellt* | | | |
|
||||
|
||||
---
|
||||
|
||||
## 11. Pruef- und Revisionszyklus
|
||||
|
||||
| Eigenschaft | Wert |
|
||||
|-------------|------|
|
||||
| Pruefintervall | Jaehrlich |
|
||||
| Letzte Pruefung | {{VERSION_DATE}} |
|
||||
| Naechste Pruefung | {{NEXT_REVIEW_DATE}} |
|
||||
| Aktuelle Version | {{DOCUMENT_VERSION}} |
|
||||
|
||||
### Pruefpunkte
|
||||
|
||||
- Vollstaendigkeit: Sind alle anwendbaren Pflichten erfasst?
|
||||
- Aktualitaet: Gibt es neue Regulierungen oder Gesetzesaenderungen?
|
||||
- Umsetzungsstatus: Sind ueberfaellige Pflichten eskaliert?
|
||||
- Nachweise: Sind fuer alle abgeschlossenen Pflichten Belege hinterlegt?
|
||||
- Verantwortlichkeiten: Sind alle Pflichten zugewiesen?
|
||||
- Fristen: Sind neue Fristen aus Gesetzesaenderungen beruecksichtigt?
|
||||
|
||||
---
|
||||
|
||||
*Dieses Dokument wird automatisch vom Pflichtenregister-Modul generiert und enthaelt alle erfassten regulatorischen Pflichten mit Verantwortlichkeiten, Fristen und Nachweisen.*
|
||||
|
||||
*Erstellt mit BreakPilot Compliance — {{COMPANY_NAME}} | Stand: {{VERSION_DATE}} | Version {{DOCUMENT_VERSION}}*
|
||||
$template$,
|
||||
'["COMPANY_NAME","DPO_NAME","DPO_CONTACT","RESPONSIBLE_PERSON","LEGAL_DEPARTMENT","DOCUMENT_VERSION","VERSION_DATE","NEXT_REVIEW_DATE"]'::jsonb,
|
||||
'de', 'DE',
|
||||
'mit', 'MIT License', 'BreakPilot Compliance',
|
||||
false, true, '1.0.0', 'published',
|
||||
NOW(), NOW()
|
||||
WHERE NOT EXISTS (
|
||||
SELECT 1 FROM compliance_legal_templates
|
||||
WHERE document_type = 'pflichtenregister'
|
||||
AND tenant_id = '9282a473-5c95-4b3a-bf78-0ecc0ec71d3e'
|
||||
);
|
||||
@@ -0,0 +1,73 @@
|
||||
-- Migration 074: Control Dedup Engine — DB Schema
|
||||
-- Supports the 4-stage dedup pipeline for atomic controls (Pass 0b).
|
||||
--
|
||||
-- Tables:
|
||||
-- 1. control_parent_links — M:N parent linking (one control → many regulations)
|
||||
-- 2. control_dedup_reviews — Review queue for borderline matches (0.85-0.92)
|
||||
|
||||
BEGIN;
|
||||
|
||||
-- =============================================================================
|
||||
-- 1. Control Parent Links (M:N)
|
||||
-- Enables "1 Control erfuellt 5 Gesetze" — the biggest USP.
|
||||
-- An atomic control can have multiple parent controls from different
|
||||
-- regulations/obligations. This replaces the 1:1 parent_control_uuid FK.
|
||||
-- =============================================================================
|
||||
|
||||
CREATE TABLE IF NOT EXISTS control_parent_links (
|
||||
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
|
||||
control_uuid UUID NOT NULL REFERENCES canonical_controls(id) ON DELETE CASCADE,
|
||||
parent_control_uuid UUID NOT NULL REFERENCES canonical_controls(id) ON DELETE CASCADE,
|
||||
link_type VARCHAR(30) NOT NULL DEFAULT 'decomposition'
|
||||
CHECK (link_type IN ('decomposition', 'dedup_merge', 'manual', 'crosswalk')),
|
||||
confidence NUMERIC(3,2) DEFAULT 1.0
|
||||
CHECK (confidence >= 0 AND confidence <= 1),
|
||||
source_regulation VARCHAR(100),
|
||||
source_article VARCHAR(100),
|
||||
obligation_candidate_id UUID REFERENCES obligation_candidates(id),
|
||||
created_at TIMESTAMPTZ DEFAULT NOW(),
|
||||
CONSTRAINT uq_parent_link UNIQUE (control_uuid, parent_control_uuid)
|
||||
);
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_cpl_control ON control_parent_links(control_uuid);
|
||||
CREATE INDEX IF NOT EXISTS idx_cpl_parent ON control_parent_links(parent_control_uuid);
|
||||
CREATE INDEX IF NOT EXISTS idx_cpl_type ON control_parent_links(link_type);
|
||||
|
||||
COMMENT ON TABLE control_parent_links IS
|
||||
'M:N parent links — one atomic control can fulfill multiple regulations/obligations. USP: "1 Control erfuellt 5 Gesetze"';
|
||||
|
||||
-- =============================================================================
|
||||
-- 2. Control Dedup Reviews
|
||||
-- Queue for borderline matches (similarity 0.85-0.92) that need human review.
|
||||
-- Reviewed entries get status updated to accepted/rejected.
|
||||
-- =============================================================================
|
||||
|
||||
CREATE TABLE IF NOT EXISTS control_dedup_reviews (
|
||||
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
|
||||
candidate_control_id VARCHAR(30) NOT NULL,
|
||||
candidate_title TEXT NOT NULL,
|
||||
candidate_objective TEXT,
|
||||
matched_control_uuid UUID REFERENCES canonical_controls(id),
|
||||
matched_control_id VARCHAR(30),
|
||||
similarity_score NUMERIC(4,3) DEFAULT 0.0,
|
||||
dedup_stage VARCHAR(40) NOT NULL,
|
||||
dedup_details JSONB DEFAULT '{}',
|
||||
parent_control_uuid UUID REFERENCES canonical_controls(id),
|
||||
obligation_candidate_id UUID REFERENCES obligation_candidates(id),
|
||||
review_status VARCHAR(20) DEFAULT 'pending'
|
||||
CHECK (review_status IN ('pending', 'accepted_link', 'accepted_new', 'rejected')),
|
||||
reviewed_by VARCHAR(100),
|
||||
reviewed_at TIMESTAMPTZ,
|
||||
review_notes TEXT,
|
||||
created_at TIMESTAMPTZ DEFAULT NOW()
|
||||
);
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_cdr_status ON control_dedup_reviews(review_status);
|
||||
CREATE INDEX IF NOT EXISTS idx_cdr_matched ON control_dedup_reviews(matched_control_uuid);
|
||||
CREATE INDEX IF NOT EXISTS idx_cdr_parent ON control_dedup_reviews(parent_control_uuid);
|
||||
CREATE INDEX IF NOT EXISTS idx_cdr_stage ON control_dedup_reviews(dedup_stage);
|
||||
|
||||
COMMENT ON TABLE control_dedup_reviews IS
|
||||
'Review queue for borderline dedup matches (similarity 0.85-0.92). Human decides: link or new control.';
|
||||
|
||||
COMMIT;
|
||||
@@ -0,0 +1,38 @@
|
||||
-- Migration 075: Obligation Refinement Fields
|
||||
-- Supports Merge Pass (implementation-level dedup) and metadata enrichment.
|
||||
--
|
||||
-- New fields:
|
||||
-- merged_into_id — points to survivor obligation when merged
|
||||
-- trigger_type — event / periodic / continuous
|
||||
-- is_implementation_specific — true if obligation references concrete tool/protocol
|
||||
|
||||
-- =============================================================================
|
||||
-- 1. Add merge tracking
|
||||
-- =============================================================================
|
||||
|
||||
ALTER TABLE obligation_candidates
|
||||
ADD COLUMN IF NOT EXISTS merged_into_id UUID
|
||||
REFERENCES obligation_candidates(id);
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_oc_merged_into
|
||||
ON obligation_candidates(merged_into_id)
|
||||
WHERE merged_into_id IS NOT NULL;
|
||||
|
||||
-- Allow 'merged' as release_state
|
||||
ALTER TABLE obligation_candidates
|
||||
DROP CONSTRAINT IF EXISTS obligation_candidates_release_state_check;
|
||||
|
||||
ALTER TABLE obligation_candidates
|
||||
ADD CONSTRAINT obligation_candidates_release_state_check
|
||||
CHECK (release_state IN ('extracted', 'validated', 'rejected', 'composed', 'merged'));
|
||||
|
||||
-- =============================================================================
|
||||
-- 2. Add enrichment metadata
|
||||
-- =============================================================================
|
||||
|
||||
ALTER TABLE obligation_candidates
|
||||
ADD COLUMN IF NOT EXISTS trigger_type VARCHAR(20) DEFAULT NULL
|
||||
CHECK (trigger_type IS NULL OR trigger_type IN ('event', 'periodic', 'continuous'));
|
||||
|
||||
ALTER TABLE obligation_candidates
|
||||
ADD COLUMN IF NOT EXISTS is_implementation_specific BOOLEAN DEFAULT FALSE;
|
||||
@@ -0,0 +1,125 @@
|
||||
-- Migration 076: Anti-Fake-Evidence Guardrails (Phase 1)
|
||||
--
|
||||
-- Prevents "Compliance-Theater": generated content passed off as real evidence,
|
||||
-- controls without evidence marked as "pass", unvalidated 100% compliance claims.
|
||||
--
|
||||
-- Changes:
|
||||
-- 1. New ENUM types for evidence confidence + truth status
|
||||
-- 2. New columns on compliance_evidence (confidence, truth, review tracking)
|
||||
-- 3. New value 'in_progress' for controlstatusenum
|
||||
-- 4. status_justification column on compliance_controls
|
||||
-- 5. New table compliance_llm_generation_audit
|
||||
-- 6. Backfill existing evidence based on source
|
||||
-- 7. Indexes on new columns
|
||||
|
||||
-- ============================================================================
|
||||
-- 1. New ENUM types
|
||||
-- ============================================================================
|
||||
|
||||
-- NOTE: CREATE TYPE cannot run inside a transaction block when combined with
|
||||
-- ALTER TYPE ... ADD VALUE. Each statement here is auto-committed separately
|
||||
-- when executed outside a transaction (which is the default for psql scripts).
|
||||
|
||||
CREATE TYPE evidence_confidence_level AS ENUM (
|
||||
'E0', -- Generated / no real evidence (LLM output, placeholder)
|
||||
'E1', -- Uploaded but unreviewed (manual upload, no hash, no reviewer)
|
||||
'E2', -- Reviewed internally (human reviewed, hash verified)
|
||||
'E3', -- Observed by system (CI/CD pipeline, API with hash)
|
||||
'E4' -- Validated by external auditor
|
||||
);
|
||||
|
||||
CREATE TYPE evidence_truth_status AS ENUM (
|
||||
'generated', -- Created by LLM / system generation
|
||||
'uploaded', -- Manually uploaded by user
|
||||
'observed', -- Automatically observed (CI/CD, monitoring)
|
||||
'validated_internal', -- Reviewed + approved by internal reviewer
|
||||
'rejected', -- Reviewed and rejected
|
||||
'provided_to_auditor', -- Shared with external auditor
|
||||
'accepted_by_auditor' -- Accepted by external auditor
|
||||
);
|
||||
|
||||
-- ============================================================================
|
||||
-- 2. Add 'in_progress' to controlstatusenum
|
||||
-- ============================================================================
|
||||
-- ALTER TYPE ... ADD VALUE cannot run inside a transaction.
|
||||
|
||||
ALTER TYPE controlstatusenum ADD VALUE IF NOT EXISTS 'in_progress';
|
||||
|
||||
-- ============================================================================
|
||||
-- 3. New columns on compliance_evidence
|
||||
-- ============================================================================
|
||||
|
||||
ALTER TABLE compliance_evidence
|
||||
ADD COLUMN IF NOT EXISTS confidence_level evidence_confidence_level DEFAULT 'E1',
|
||||
ADD COLUMN IF NOT EXISTS truth_status evidence_truth_status DEFAULT 'uploaded',
|
||||
ADD COLUMN IF NOT EXISTS generation_mode VARCHAR(100),
|
||||
ADD COLUMN IF NOT EXISTS may_be_used_as_evidence BOOLEAN DEFAULT TRUE,
|
||||
ADD COLUMN IF NOT EXISTS reviewed_by VARCHAR(200),
|
||||
ADD COLUMN IF NOT EXISTS reviewed_at TIMESTAMPTZ;
|
||||
|
||||
-- ============================================================================
|
||||
-- 4. status_justification on compliance_controls
|
||||
-- ============================================================================
|
||||
|
||||
ALTER TABLE compliance_controls
|
||||
ADD COLUMN IF NOT EXISTS status_justification TEXT;
|
||||
|
||||
-- ============================================================================
|
||||
-- 5. LLM Generation Audit table
|
||||
-- ============================================================================
|
||||
|
||||
CREATE TABLE IF NOT EXISTS compliance_llm_generation_audit (
|
||||
id VARCHAR(36) PRIMARY KEY DEFAULT gen_random_uuid()::text,
|
||||
tenant_id VARCHAR(36),
|
||||
entity_type VARCHAR(50) NOT NULL, -- 'evidence', 'control', 'document', ...
|
||||
entity_id VARCHAR(36), -- FK to the generated entity
|
||||
generation_mode VARCHAR(100) NOT NULL, -- 'draft_assistance', 'auto_generation', ...
|
||||
truth_status evidence_truth_status NOT NULL DEFAULT 'generated',
|
||||
may_be_used_as_evidence BOOLEAN NOT NULL DEFAULT FALSE,
|
||||
llm_model VARCHAR(100),
|
||||
llm_provider VARCHAR(50), -- 'ollama', 'anthropic', ...
|
||||
prompt_hash VARCHAR(64), -- SHA-256 of the prompt
|
||||
input_summary TEXT, -- Truncated input for auditability
|
||||
output_summary TEXT, -- Truncated output for auditability
|
||||
metadata JSONB DEFAULT '{}'::jsonb,
|
||||
created_at TIMESTAMPTZ DEFAULT NOW(),
|
||||
updated_at TIMESTAMPTZ DEFAULT NOW()
|
||||
);
|
||||
|
||||
-- ============================================================================
|
||||
-- 6. Backfill existing evidence based on source
|
||||
-- ============================================================================
|
||||
|
||||
-- CI pipeline evidence → E3 + observed
|
||||
UPDATE compliance_evidence
|
||||
SET confidence_level = 'E3',
|
||||
truth_status = 'observed'
|
||||
WHERE source = 'ci_pipeline'
|
||||
AND confidence_level = 'E1';
|
||||
|
||||
-- API evidence → E3 + observed
|
||||
UPDATE compliance_evidence
|
||||
SET confidence_level = 'E3',
|
||||
truth_status = 'observed'
|
||||
WHERE source = 'api'
|
||||
AND confidence_level = 'E1';
|
||||
|
||||
-- Manual/upload evidence stays at E1 + uploaded (default)
|
||||
|
||||
-- Generated evidence → E0 + generated
|
||||
UPDATE compliance_evidence
|
||||
SET confidence_level = 'E0',
|
||||
truth_status = 'generated',
|
||||
may_be_used_as_evidence = FALSE
|
||||
WHERE source = 'generated'
|
||||
AND confidence_level = 'E1';
|
||||
|
||||
-- ============================================================================
|
||||
-- 7. Indexes
|
||||
-- ============================================================================
|
||||
|
||||
CREATE INDEX IF NOT EXISTS ix_evidence_confidence ON compliance_evidence (confidence_level);
|
||||
CREATE INDEX IF NOT EXISTS ix_evidence_truth_status ON compliance_evidence (truth_status);
|
||||
CREATE INDEX IF NOT EXISTS ix_evidence_may_be_used ON compliance_evidence (may_be_used_as_evidence);
|
||||
CREATE INDEX IF NOT EXISTS ix_llm_audit_entity ON compliance_llm_generation_audit (entity_type, entity_id);
|
||||
CREATE INDEX IF NOT EXISTS ix_llm_audit_tenant ON compliance_llm_generation_audit (tenant_id);
|
||||
@@ -0,0 +1,37 @@
|
||||
-- Migration 077: Anti-Fake-Evidence Phase 2
|
||||
-- Assertions table, Four-Eyes columns on Evidence, Audit-Trail performance index
|
||||
|
||||
-- 1A. Assertions table
|
||||
CREATE TABLE IF NOT EXISTS compliance_assertions (
|
||||
id VARCHAR(36) PRIMARY KEY DEFAULT gen_random_uuid()::text,
|
||||
tenant_id VARCHAR(36),
|
||||
entity_type VARCHAR(50) NOT NULL,
|
||||
entity_id VARCHAR(36) NOT NULL,
|
||||
sentence_text TEXT NOT NULL,
|
||||
sentence_index INTEGER NOT NULL DEFAULT 0,
|
||||
assertion_type VARCHAR(20) NOT NULL DEFAULT 'assertion',
|
||||
evidence_ids JSONB DEFAULT '[]'::jsonb,
|
||||
confidence FLOAT DEFAULT 0.0,
|
||||
normative_tier VARCHAR(20),
|
||||
verified_by VARCHAR(200),
|
||||
verified_at TIMESTAMPTZ,
|
||||
created_at TIMESTAMPTZ DEFAULT NOW(),
|
||||
updated_at TIMESTAMPTZ DEFAULT NOW()
|
||||
);
|
||||
CREATE INDEX IF NOT EXISTS ix_assertion_entity ON compliance_assertions (entity_type, entity_id);
|
||||
CREATE INDEX IF NOT EXISTS ix_assertion_type ON compliance_assertions (assertion_type);
|
||||
CREATE INDEX IF NOT EXISTS ix_assertion_tenant ON compliance_assertions (tenant_id);
|
||||
|
||||
-- 1B. Four-Eyes columns on Evidence
|
||||
ALTER TABLE compliance_evidence
|
||||
ADD COLUMN IF NOT EXISTS approval_status VARCHAR(30) DEFAULT 'none',
|
||||
ADD COLUMN IF NOT EXISTS first_reviewer VARCHAR(200),
|
||||
ADD COLUMN IF NOT EXISTS first_reviewed_at TIMESTAMPTZ,
|
||||
ADD COLUMN IF NOT EXISTS second_reviewer VARCHAR(200),
|
||||
ADD COLUMN IF NOT EXISTS second_reviewed_at TIMESTAMPTZ,
|
||||
ADD COLUMN IF NOT EXISTS requires_four_eyes BOOLEAN DEFAULT FALSE;
|
||||
CREATE INDEX IF NOT EXISTS ix_evidence_approval_status ON compliance_evidence (approval_status);
|
||||
|
||||
-- 1C. Audit-Trail performance index
|
||||
CREATE INDEX IF NOT EXISTS ix_audit_trail_entity_action
|
||||
ON compliance_audit_trail (entity_type, action, performed_at);
|
||||
@@ -0,0 +1,42 @@
|
||||
-- Migration 078: Batch Dedup — Schema extensions for 85k→~18-25k reduction
|
||||
-- Adds merged_into_uuid tracking, performance indexes for batch dedup,
|
||||
-- and extends link_type CHECK to include 'cross_regulation'.
|
||||
|
||||
BEGIN;
|
||||
|
||||
-- =============================================================================
|
||||
-- 1. merged_into_uuid: Track which master a duplicate was merged into
|
||||
-- =============================================================================
|
||||
|
||||
ALTER TABLE canonical_controls
|
||||
ADD COLUMN IF NOT EXISTS merged_into_uuid UUID REFERENCES canonical_controls(id);
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_cc_merged_into
|
||||
ON canonical_controls(merged_into_uuid) WHERE merged_into_uuid IS NOT NULL;
|
||||
|
||||
-- =============================================================================
|
||||
-- 2. Performance indexes for batch dedup queries
|
||||
-- =============================================================================
|
||||
|
||||
-- Index on merge_group_hint inside generation_metadata (for sub-grouping)
|
||||
CREATE INDEX IF NOT EXISTS idx_cc_merge_group_hint
|
||||
ON canonical_controls ((generation_metadata->>'merge_group_hint'))
|
||||
WHERE decomposition_method = 'pass0b';
|
||||
|
||||
-- Composite index for pattern-based dedup loading
|
||||
CREATE INDEX IF NOT EXISTS idx_cc_pattern_dedup
|
||||
ON canonical_controls (pattern_id, release_state)
|
||||
WHERE decomposition_method = 'pass0b';
|
||||
|
||||
-- =============================================================================
|
||||
-- 3. Extend link_type CHECK to include 'cross_regulation'
|
||||
-- =============================================================================
|
||||
|
||||
ALTER TABLE control_parent_links
|
||||
DROP CONSTRAINT IF EXISTS control_parent_links_link_type_check;
|
||||
|
||||
ALTER TABLE control_parent_links
|
||||
ADD CONSTRAINT control_parent_links_link_type_check
|
||||
CHECK (link_type IN ('decomposition', 'dedup_merge', 'manual', 'crosswalk', 'cross_regulation'));
|
||||
|
||||
COMMIT;
|
||||
@@ -0,0 +1,16 @@
|
||||
-- Migration 079: Add evidence_type to canonical_controls
|
||||
-- Classifies HOW a control is evidenced:
|
||||
-- code = Technical control, verifiable in source code / IaC / CI-CD
|
||||
-- process = Organizational / governance control, verified via documents / policies
|
||||
-- hybrid = Both code and process evidence required
|
||||
|
||||
DO $$
|
||||
BEGIN
|
||||
IF EXISTS (SELECT 1 FROM information_schema.tables
|
||||
WHERE table_schema = 'compliance' AND table_name = 'canonical_controls') THEN
|
||||
ALTER TABLE canonical_controls ADD COLUMN IF NOT EXISTS
|
||||
evidence_type VARCHAR(20) DEFAULT NULL
|
||||
CHECK (evidence_type IN ('code', 'process', 'hybrid'));
|
||||
CREATE INDEX IF NOT EXISTS idx_cc_evidence_type ON canonical_controls(evidence_type);
|
||||
END IF;
|
||||
END $$;
|
||||
@@ -0,0 +1,18 @@
|
||||
-- V1 Control Enrichment: Cross-reference table for matching
|
||||
-- Eigenentwicklung (v1, ungrouped, no source) → regulatorische Controls
|
||||
|
||||
CREATE TABLE IF NOT EXISTS v1_control_matches (
|
||||
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
|
||||
v1_control_uuid UUID NOT NULL REFERENCES canonical_controls(id) ON DELETE CASCADE,
|
||||
matched_control_uuid UUID NOT NULL REFERENCES canonical_controls(id) ON DELETE CASCADE,
|
||||
similarity_score NUMERIC(4,3) NOT NULL,
|
||||
match_rank SMALLINT NOT NULL DEFAULT 1,
|
||||
matched_source TEXT, -- e.g. "DSGVO (EU) 2016/679"
|
||||
matched_article TEXT, -- e.g. "Art. 32"
|
||||
match_method VARCHAR(30) NOT NULL DEFAULT 'embedding',
|
||||
created_at TIMESTAMPTZ DEFAULT NOW(),
|
||||
CONSTRAINT uq_v1_match UNIQUE (v1_control_uuid, matched_control_uuid)
|
||||
);
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_v1m_v1 ON v1_control_matches(v1_control_uuid);
|
||||
CREATE INDEX IF NOT EXISTS idx_v1m_matched ON v1_control_matches(matched_control_uuid);
|
||||
@@ -0,0 +1,11 @@
|
||||
-- Migration 081: Add 'duplicate' release_state for obligation deduplication
|
||||
--
|
||||
-- Allows marking duplicate obligation_candidates as 'duplicate' instead of
|
||||
-- deleting them, preserving traceability via merged_into_id.
|
||||
|
||||
ALTER TABLE obligation_candidates
|
||||
DROP CONSTRAINT IF EXISTS obligation_candidates_release_state_check;
|
||||
|
||||
ALTER TABLE obligation_candidates
|
||||
ADD CONSTRAINT obligation_candidates_release_state_check
|
||||
CHECK (release_state IN ('extracted', 'validated', 'rejected', 'composed', 'merged', 'duplicate'));
|
||||
@@ -0,0 +1,4 @@
|
||||
-- Widen source_article and source_regulation to TEXT to handle long NIST references
|
||||
-- e.g. "SC-22 (und weitere redaktionelle Änderungen SC-7, SC-14, SC-17, ...)"
|
||||
ALTER TABLE control_parent_links ALTER COLUMN source_article TYPE TEXT;
|
||||
ALTER TABLE control_parent_links ALTER COLUMN source_regulation TYPE TEXT;
|
||||
@@ -0,0 +1,6 @@
|
||||
# Optional: Cross-Encoder Re-Ranking (CPU-only PyTorch)
|
||||
# Install separately: pip install -r requirements-reranker.txt
|
||||
# Enable at runtime: RERANK_ENABLED=true
|
||||
--extra-index-url https://download.pytorch.org/whl/cpu
|
||||
torch
|
||||
sentence-transformers>=3.0.0
|
||||
@@ -22,6 +22,8 @@ python-multipart>=0.0.22
|
||||
# AI / Anthropic (compliance AI assistant)
|
||||
anthropic==0.75.0
|
||||
|
||||
# Re-Ranking: see requirements-reranker.txt (optional, CPU-only PyTorch)
|
||||
|
||||
# PDF Generation (GDPR export, audit reports)
|
||||
weasyprint>=68.0
|
||||
reportlab==4.2.5
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,562 @@
|
||||
"""Tests for Anti-Fake-Evidence Phase 1 guardrails.
|
||||
|
||||
~45 tests covering:
|
||||
- Evidence confidence classification
|
||||
- Evidence truth status classification
|
||||
- Control status transition state machine
|
||||
- Multi-dimensional compliance score
|
||||
- LLM generation audit
|
||||
- Evidence review endpoint
|
||||
"""
|
||||
|
||||
from datetime import datetime, timedelta
|
||||
from unittest.mock import MagicMock, patch
|
||||
from fastapi import FastAPI
|
||||
from fastapi.testclient import TestClient
|
||||
|
||||
from compliance.api.evidence_routes import router as evidence_router
|
||||
from compliance.api.llm_audit_routes import router as llm_audit_router
|
||||
from compliance.api.evidence_routes import _classify_confidence, _classify_truth_status
|
||||
from compliance.services.control_status_machine import validate_transition
|
||||
from compliance.db.models import (
|
||||
EvidenceConfidenceEnum,
|
||||
EvidenceTruthStatusEnum,
|
||||
ControlStatusEnum,
|
||||
)
|
||||
from classroom_engine.database import get_db
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# App setup with mocked DB dependency
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
app = FastAPI()
|
||||
app.include_router(evidence_router)
|
||||
app.include_router(llm_audit_router, prefix="/compliance")
|
||||
|
||||
mock_db = MagicMock()
|
||||
|
||||
|
||||
def override_get_db():
|
||||
yield mock_db
|
||||
|
||||
|
||||
app.dependency_overrides[get_db] = override_get_db
|
||||
client = TestClient(app)
|
||||
|
||||
EVIDENCE_UUID = "eeeeeeee-aaaa-bbbb-cccc-ffffffffffff"
|
||||
CONTROL_UUID = "cccccccc-aaaa-bbbb-cccc-dddddddddddd"
|
||||
NOW = datetime(2026, 3, 23, 12, 0, 0)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Helpers
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def make_evidence(overrides=None):
|
||||
e = MagicMock()
|
||||
e.id = EVIDENCE_UUID
|
||||
e.control_id = CONTROL_UUID
|
||||
e.evidence_type = "test_results"
|
||||
e.title = "Pytest Test Report"
|
||||
e.description = "All tests passing"
|
||||
e.artifact_url = "https://ci.example.com/job/123/artifact"
|
||||
e.artifact_path = None
|
||||
e.artifact_hash = "abc123def456"
|
||||
e.file_size_bytes = None
|
||||
e.mime_type = None
|
||||
e.status = MagicMock()
|
||||
e.status.value = "valid"
|
||||
e.uploaded_by = None
|
||||
e.source = "ci_pipeline"
|
||||
e.ci_job_id = "job-123"
|
||||
e.valid_from = NOW
|
||||
e.valid_until = NOW + timedelta(days=90)
|
||||
e.collected_at = NOW
|
||||
e.created_at = NOW
|
||||
# Anti-fake-evidence fields
|
||||
e.confidence_level = EvidenceConfidenceEnum.E3
|
||||
e.truth_status = EvidenceTruthStatusEnum.OBSERVED
|
||||
e.generation_mode = None
|
||||
e.may_be_used_as_evidence = True
|
||||
e.reviewed_by = None
|
||||
e.reviewed_at = None
|
||||
# Phase 2 fields
|
||||
e.approval_status = "none"
|
||||
e.first_reviewer = None
|
||||
e.first_reviewed_at = None
|
||||
e.second_reviewer = None
|
||||
e.second_reviewed_at = None
|
||||
e.requires_four_eyes = False
|
||||
if overrides:
|
||||
for k, v in overrides.items():
|
||||
setattr(e, k, v)
|
||||
return e
|
||||
|
||||
|
||||
def make_control(overrides=None):
|
||||
c = MagicMock()
|
||||
c.id = CONTROL_UUID
|
||||
c.control_id = "GOV-001"
|
||||
c.title = "Access Control"
|
||||
c.status = ControlStatusEnum.PLANNED
|
||||
if overrides:
|
||||
for k, v in overrides.items():
|
||||
setattr(c, k, v)
|
||||
return c
|
||||
|
||||
|
||||
# ===========================================================================
|
||||
# 1. TestEvidenceConfidenceClassification
|
||||
# ===========================================================================
|
||||
|
||||
class TestEvidenceConfidenceClassification:
|
||||
"""Test automatic confidence level classification."""
|
||||
|
||||
def test_ci_pipeline_returns_e3(self):
|
||||
assert _classify_confidence("ci_pipeline") == EvidenceConfidenceEnum.E3
|
||||
|
||||
def test_api_with_hash_returns_e3(self):
|
||||
assert _classify_confidence("api", artifact_hash="sha256:abc") == EvidenceConfidenceEnum.E3
|
||||
|
||||
def test_api_without_hash_returns_e3(self):
|
||||
assert _classify_confidence("api") == EvidenceConfidenceEnum.E3
|
||||
|
||||
def test_manual_returns_e1(self):
|
||||
assert _classify_confidence("manual") == EvidenceConfidenceEnum.E1
|
||||
|
||||
def test_upload_returns_e1(self):
|
||||
assert _classify_confidence("upload") == EvidenceConfidenceEnum.E1
|
||||
|
||||
def test_generated_returns_e0(self):
|
||||
assert _classify_confidence("generated") == EvidenceConfidenceEnum.E0
|
||||
|
||||
def test_unknown_source_returns_e1(self):
|
||||
assert _classify_confidence("some_random_source") == EvidenceConfidenceEnum.E1
|
||||
|
||||
def test_none_source_returns_e1(self):
|
||||
assert _classify_confidence(None) == EvidenceConfidenceEnum.E1
|
||||
|
||||
|
||||
# ===========================================================================
|
||||
# 2. TestEvidenceTruthStatus
|
||||
# ===========================================================================
|
||||
|
||||
class TestEvidenceTruthStatus:
|
||||
"""Test automatic truth status classification."""
|
||||
|
||||
def test_ci_pipeline_returns_observed(self):
|
||||
assert _classify_truth_status("ci_pipeline") == EvidenceTruthStatusEnum.OBSERVED
|
||||
|
||||
def test_manual_returns_uploaded(self):
|
||||
assert _classify_truth_status("manual") == EvidenceTruthStatusEnum.UPLOADED
|
||||
|
||||
def test_upload_returns_uploaded(self):
|
||||
assert _classify_truth_status("upload") == EvidenceTruthStatusEnum.UPLOADED
|
||||
|
||||
def test_generated_returns_generated(self):
|
||||
assert _classify_truth_status("generated") == EvidenceTruthStatusEnum.GENERATED
|
||||
|
||||
def test_api_returns_observed(self):
|
||||
assert _classify_truth_status("api") == EvidenceTruthStatusEnum.OBSERVED
|
||||
|
||||
def test_none_returns_uploaded(self):
|
||||
assert _classify_truth_status(None) == EvidenceTruthStatusEnum.UPLOADED
|
||||
|
||||
|
||||
# ===========================================================================
|
||||
# 3. TestControlStatusTransitions
|
||||
# ===========================================================================
|
||||
|
||||
class TestControlStatusTransitions:
|
||||
"""Test the control status transition state machine."""
|
||||
|
||||
def test_planned_to_in_progress_allowed(self):
|
||||
allowed, violations = validate_transition("planned", "in_progress")
|
||||
assert allowed is True
|
||||
assert violations == []
|
||||
|
||||
def test_in_progress_to_pass_without_evidence_blocked(self):
|
||||
allowed, violations = validate_transition("in_progress", "pass", evidence_list=[])
|
||||
assert allowed is False
|
||||
assert len(violations) > 0
|
||||
assert "pass" in violations[0].lower()
|
||||
|
||||
def test_in_progress_to_pass_with_e2_evidence_allowed(self):
|
||||
e = make_evidence({
|
||||
"confidence_level": EvidenceConfidenceEnum.E2,
|
||||
"truth_status": EvidenceTruthStatusEnum.VALIDATED_INTERNAL,
|
||||
})
|
||||
allowed, violations = validate_transition("in_progress", "pass", evidence_list=[e])
|
||||
assert allowed is True
|
||||
assert violations == []
|
||||
|
||||
def test_in_progress_to_pass_with_e1_evidence_blocked(self):
|
||||
e = make_evidence({
|
||||
"confidence_level": EvidenceConfidenceEnum.E1,
|
||||
"truth_status": EvidenceTruthStatusEnum.UPLOADED,
|
||||
})
|
||||
allowed, violations = validate_transition("in_progress", "pass", evidence_list=[e])
|
||||
assert allowed is False
|
||||
assert "E2" in violations[0]
|
||||
|
||||
def test_in_progress_to_partial_with_evidence_allowed(self):
|
||||
e = make_evidence({"confidence_level": EvidenceConfidenceEnum.E0})
|
||||
allowed, violations = validate_transition("in_progress", "partial", evidence_list=[e])
|
||||
assert allowed is True
|
||||
|
||||
def test_in_progress_to_partial_without_evidence_blocked(self):
|
||||
allowed, violations = validate_transition("in_progress", "partial", evidence_list=[])
|
||||
assert allowed is False
|
||||
|
||||
def test_pass_to_fail_always_allowed(self):
|
||||
allowed, violations = validate_transition("pass", "fail")
|
||||
assert allowed is True
|
||||
|
||||
def test_any_to_na_requires_justification(self):
|
||||
allowed, violations = validate_transition("in_progress", "n/a", status_justification=None)
|
||||
assert allowed is False
|
||||
assert "justification" in violations[0].lower()
|
||||
|
||||
def test_any_to_na_with_justification_allowed(self):
|
||||
allowed, violations = validate_transition("in_progress", "n/a", status_justification="Not applicable for this project")
|
||||
assert allowed is True
|
||||
|
||||
def test_any_to_planned_always_allowed(self):
|
||||
allowed, violations = validate_transition("pass", "planned")
|
||||
assert allowed is True
|
||||
|
||||
def test_same_status_noop_allowed(self):
|
||||
allowed, violations = validate_transition("pass", "pass")
|
||||
assert allowed is True
|
||||
|
||||
def test_bypass_for_auto_updater(self):
|
||||
allowed, violations = validate_transition("in_progress", "pass", evidence_list=[], bypass_for_auto_updater=True)
|
||||
assert allowed is True
|
||||
|
||||
def test_partial_to_pass_needs_e2(self):
|
||||
e = make_evidence({
|
||||
"confidence_level": EvidenceConfidenceEnum.E1,
|
||||
"truth_status": EvidenceTruthStatusEnum.UPLOADED,
|
||||
})
|
||||
allowed, violations = validate_transition("partial", "pass", evidence_list=[e])
|
||||
assert allowed is False
|
||||
|
||||
def test_partial_to_pass_with_e3_allowed(self):
|
||||
e = make_evidence({
|
||||
"confidence_level": EvidenceConfidenceEnum.E3,
|
||||
"truth_status": EvidenceTruthStatusEnum.OBSERVED,
|
||||
})
|
||||
allowed, violations = validate_transition("partial", "pass", evidence_list=[e])
|
||||
assert allowed is True
|
||||
|
||||
def test_in_progress_to_fail_allowed(self):
|
||||
allowed, violations = validate_transition("in_progress", "fail")
|
||||
assert allowed is True
|
||||
|
||||
|
||||
# ===========================================================================
|
||||
# 4. TestMultiDimensionalScore
|
||||
# ===========================================================================
|
||||
|
||||
class TestMultiDimensionalScore:
|
||||
"""Test multi-dimensional score calculation."""
|
||||
|
||||
def test_score_structure(self):
|
||||
"""Score result should have all required keys."""
|
||||
from compliance.db.repository import ControlRepository
|
||||
repo = ControlRepository(mock_db)
|
||||
|
||||
with patch.object(repo, 'get_all', return_value=[]):
|
||||
result = repo.get_multi_dimensional_score()
|
||||
|
||||
assert "requirement_coverage" in result
|
||||
assert "evidence_strength" in result
|
||||
assert "validation_quality" in result
|
||||
assert "evidence_freshness" in result
|
||||
assert "control_effectiveness" in result
|
||||
assert "overall_readiness" in result
|
||||
assert "hard_blocks" in result
|
||||
|
||||
def test_empty_controls_returns_zeros(self):
|
||||
from compliance.db.repository import ControlRepository
|
||||
repo = ControlRepository(mock_db)
|
||||
|
||||
with patch.object(repo, 'get_all', return_value=[]):
|
||||
result = repo.get_multi_dimensional_score()
|
||||
|
||||
assert result["overall_readiness"] == 0.0
|
||||
assert "Keine Controls" in result["hard_blocks"][0]
|
||||
|
||||
def test_hard_blocks_pass_without_evidence(self):
|
||||
"""Controls on 'pass' without evidence should trigger hard block."""
|
||||
from compliance.db.repository import ControlRepository
|
||||
repo = ControlRepository(mock_db)
|
||||
|
||||
ctrl = make_control({"status": ControlStatusEnum.PASS})
|
||||
mock_db.query.return_value.all.return_value = [] # no evidence
|
||||
mock_db.query.return_value.scalar.return_value = 0
|
||||
|
||||
with patch.object(repo, 'get_all', return_value=[ctrl]):
|
||||
result = repo.get_multi_dimensional_score()
|
||||
|
||||
assert any("Evidence" in b or "evidence" in b.lower() for b in result["hard_blocks"])
|
||||
|
||||
def test_all_dimensions_are_floats(self):
|
||||
from compliance.db.repository import ControlRepository
|
||||
repo = ControlRepository(mock_db)
|
||||
|
||||
with patch.object(repo, 'get_all', return_value=[]):
|
||||
result = repo.get_multi_dimensional_score()
|
||||
|
||||
for key in ["requirement_coverage", "evidence_strength", "validation_quality",
|
||||
"evidence_freshness", "control_effectiveness", "overall_readiness"]:
|
||||
assert isinstance(result[key], float), f"{key} should be float"
|
||||
|
||||
def test_hard_blocks_is_list(self):
|
||||
from compliance.db.repository import ControlRepository
|
||||
repo = ControlRepository(mock_db)
|
||||
|
||||
with patch.object(repo, 'get_all', return_value=[]):
|
||||
result = repo.get_multi_dimensional_score()
|
||||
|
||||
assert isinstance(result["hard_blocks"], list)
|
||||
|
||||
def test_backwards_compatibility_with_old_score(self):
|
||||
"""get_statistics should still work and return compliance_score."""
|
||||
from compliance.db.repository import ControlRepository
|
||||
repo = ControlRepository(mock_db)
|
||||
|
||||
mock_db.query.return_value.scalar.return_value = 0
|
||||
mock_db.query.return_value.group_by.return_value.all.return_value = []
|
||||
|
||||
result = repo.get_statistics()
|
||||
assert "compliance_score" in result
|
||||
assert "total" in result
|
||||
|
||||
|
||||
# ===========================================================================
|
||||
# 5. TestForbiddenFormulations
|
||||
# ===========================================================================
|
||||
|
||||
class TestForbiddenFormulations:
|
||||
"""Test forbidden formulation detection (tested via the validate endpoint context)."""
|
||||
|
||||
def test_import_works(self):
|
||||
"""Verify forbidden pattern check function is importable and callable."""
|
||||
# This tests the Python-side schema, the actual check is in TypeScript
|
||||
from compliance.api.schemas import MultiDimensionalScore, StatusTransitionError
|
||||
score = MultiDimensionalScore()
|
||||
assert score.overall_readiness == 0.0
|
||||
err = StatusTransitionError(current_status="planned", requested_status="pass")
|
||||
assert err.allowed is False
|
||||
|
||||
def test_status_transition_error_schema(self):
|
||||
from compliance.api.schemas import StatusTransitionError
|
||||
err = StatusTransitionError(
|
||||
allowed=False,
|
||||
current_status="in_progress",
|
||||
requested_status="pass",
|
||||
violations=["Need E2 evidence"],
|
||||
)
|
||||
assert err.violations == ["Need E2 evidence"]
|
||||
|
||||
def test_multi_dimensional_score_defaults(self):
|
||||
from compliance.api.schemas import MultiDimensionalScore
|
||||
score = MultiDimensionalScore()
|
||||
assert score.requirement_coverage == 0.0
|
||||
assert score.hard_blocks == []
|
||||
|
||||
def test_multi_dimensional_score_with_data(self):
|
||||
from compliance.api.schemas import MultiDimensionalScore
|
||||
score = MultiDimensionalScore(
|
||||
requirement_coverage=80.0,
|
||||
evidence_strength=60.0,
|
||||
validation_quality=40.0,
|
||||
evidence_freshness=90.0,
|
||||
control_effectiveness=70.0,
|
||||
overall_readiness=65.0,
|
||||
hard_blocks=["3 Controls ohne Evidence"],
|
||||
)
|
||||
assert score.overall_readiness == 65.0
|
||||
assert len(score.hard_blocks) == 1
|
||||
|
||||
def test_evidence_response_has_anti_fake_fields(self):
|
||||
from compliance.api.schemas import EvidenceResponse
|
||||
fields = EvidenceResponse.model_fields
|
||||
assert "confidence_level" in fields
|
||||
assert "truth_status" in fields
|
||||
assert "generation_mode" in fields
|
||||
assert "may_be_used_as_evidence" in fields
|
||||
assert "reviewed_by" in fields
|
||||
assert "reviewed_at" in fields
|
||||
|
||||
|
||||
# ===========================================================================
|
||||
# 6. TestLLMGenerationAudit
|
||||
# ===========================================================================
|
||||
|
||||
class TestLLMGenerationAudit:
|
||||
"""Test LLM generation audit trail."""
|
||||
|
||||
def test_create_audit_record(self):
|
||||
"""POST /compliance/llm-audit should create a record."""
|
||||
mock_record = MagicMock()
|
||||
mock_record.id = "audit-001"
|
||||
mock_record.tenant_id = None
|
||||
mock_record.entity_type = "document"
|
||||
mock_record.entity_id = None
|
||||
mock_record.generation_mode = "draft_assistance"
|
||||
mock_record.truth_status = EvidenceTruthStatusEnum.GENERATED
|
||||
mock_record.may_be_used_as_evidence = False
|
||||
mock_record.llm_model = "qwen2.5vl:32b"
|
||||
mock_record.llm_provider = "ollama"
|
||||
mock_record.prompt_hash = None
|
||||
mock_record.input_summary = "Test input"
|
||||
mock_record.output_summary = "Test output"
|
||||
mock_record.extra_metadata = {}
|
||||
mock_record.created_at = NOW
|
||||
|
||||
mock_db.add = MagicMock()
|
||||
mock_db.commit = MagicMock()
|
||||
mock_db.refresh = MagicMock(side_effect=lambda r: setattr(r, 'id', 'audit-001'))
|
||||
|
||||
# We need to patch the LLMGenerationAuditDB constructor
|
||||
with patch('compliance.api.llm_audit_routes.LLMGenerationAuditDB', return_value=mock_record):
|
||||
resp = client.post("/compliance/llm-audit", json={
|
||||
"entity_type": "document",
|
||||
"generation_mode": "draft_assistance",
|
||||
"truth_status": "generated",
|
||||
"may_be_used_as_evidence": False,
|
||||
"llm_model": "qwen2.5vl:32b",
|
||||
"llm_provider": "ollama",
|
||||
})
|
||||
|
||||
assert resp.status_code == 200
|
||||
data = resp.json()
|
||||
assert data["entity_type"] == "document"
|
||||
assert data["truth_status"] == "generated"
|
||||
assert data["may_be_used_as_evidence"] is False
|
||||
|
||||
def test_truth_status_always_generated_for_llm(self):
|
||||
"""LLM-generated content should always start with truth_status=generated."""
|
||||
from compliance.db.models import LLMGenerationAuditDB, EvidenceTruthStatusEnum
|
||||
audit = LLMGenerationAuditDB()
|
||||
# Default should be GENERATED
|
||||
assert audit.truth_status is None or audit.truth_status == EvidenceTruthStatusEnum.GENERATED
|
||||
|
||||
def test_may_be_used_as_evidence_defaults_false(self):
|
||||
"""Generated content should NOT be usable as evidence by default."""
|
||||
from compliance.db.models import LLMGenerationAuditDB
|
||||
audit = LLMGenerationAuditDB()
|
||||
assert audit.may_be_used_as_evidence is False or audit.may_be_used_as_evidence is None
|
||||
|
||||
def test_list_audit_records(self):
|
||||
"""GET /compliance/llm-audit should return records."""
|
||||
mock_query = MagicMock()
|
||||
mock_query.count.return_value = 0
|
||||
mock_query.filter.return_value = mock_query
|
||||
mock_query.order_by.return_value = mock_query
|
||||
mock_query.offset.return_value = mock_query
|
||||
mock_query.limit.return_value = mock_query
|
||||
mock_query.all.return_value = []
|
||||
mock_db.query.return_value = mock_query
|
||||
|
||||
resp = client.get("/compliance/llm-audit")
|
||||
assert resp.status_code == 200
|
||||
data = resp.json()
|
||||
assert "records" in data
|
||||
assert "total" in data
|
||||
assert data["total"] == 0
|
||||
|
||||
|
||||
# ===========================================================================
|
||||
# 7. TestEvidenceReview
|
||||
# ===========================================================================
|
||||
|
||||
class TestEvidenceReview:
|
||||
"""Test evidence review endpoint."""
|
||||
|
||||
def test_review_upgrades_confidence(self):
|
||||
"""PATCH /evidence/{id}/review should update confidence and set reviewer."""
|
||||
evidence = make_evidence({
|
||||
"confidence_level": EvidenceConfidenceEnum.E1,
|
||||
"truth_status": EvidenceTruthStatusEnum.UPLOADED,
|
||||
})
|
||||
mock_db.query.return_value.filter.return_value.first.return_value = evidence
|
||||
mock_db.commit = MagicMock()
|
||||
mock_db.refresh = MagicMock()
|
||||
|
||||
resp = client.patch(f"/evidence/{EVIDENCE_UUID}/review", json={
|
||||
"confidence_level": "E2",
|
||||
"truth_status": "validated_internal",
|
||||
"reviewed_by": "auditor@example.com",
|
||||
})
|
||||
|
||||
assert resp.status_code == 200
|
||||
# Verify the evidence was updated
|
||||
assert evidence.confidence_level == EvidenceConfidenceEnum.E2
|
||||
assert evidence.truth_status == EvidenceTruthStatusEnum.VALIDATED_INTERNAL
|
||||
assert evidence.reviewed_by == "auditor@example.com"
|
||||
assert evidence.reviewed_at is not None
|
||||
|
||||
def test_review_nonexistent_evidence_returns_404(self):
|
||||
mock_db.query.return_value.filter.return_value.first.return_value = None
|
||||
resp = client.patch("/evidence/nonexistent-id/review", json={
|
||||
"reviewed_by": "someone",
|
||||
})
|
||||
assert resp.status_code == 404
|
||||
|
||||
def test_review_invalid_confidence_returns_400(self):
|
||||
evidence = make_evidence()
|
||||
mock_db.query.return_value.filter.return_value.first.return_value = evidence
|
||||
|
||||
resp = client.patch(f"/evidence/{EVIDENCE_UUID}/review", json={
|
||||
"confidence_level": "INVALID",
|
||||
"reviewed_by": "someone",
|
||||
})
|
||||
assert resp.status_code == 400
|
||||
|
||||
|
||||
# ===========================================================================
|
||||
# 8. TestControlUpdateIntegration
|
||||
# ===========================================================================
|
||||
|
||||
class TestControlUpdateIntegration:
|
||||
"""Test that ControlUpdate schema includes status_justification."""
|
||||
|
||||
def test_control_update_has_status_justification(self):
|
||||
from compliance.api.schemas import ControlUpdate
|
||||
fields = ControlUpdate.model_fields
|
||||
assert "status_justification" in fields
|
||||
|
||||
def test_control_response_has_status_justification(self):
|
||||
from compliance.api.schemas import ControlResponse
|
||||
fields = ControlResponse.model_fields
|
||||
assert "status_justification" in fields
|
||||
|
||||
def test_control_status_enum_has_in_progress(self):
|
||||
assert ControlStatusEnum.IN_PROGRESS.value == "in_progress"
|
||||
|
||||
|
||||
# ===========================================================================
|
||||
# 9. TestEvidenceEnums
|
||||
# ===========================================================================
|
||||
|
||||
class TestEvidenceEnums:
|
||||
"""Test the new evidence enums."""
|
||||
|
||||
def test_confidence_enum_values(self):
|
||||
assert EvidenceConfidenceEnum.E0.value == "E0"
|
||||
assert EvidenceConfidenceEnum.E1.value == "E1"
|
||||
assert EvidenceConfidenceEnum.E2.value == "E2"
|
||||
assert EvidenceConfidenceEnum.E3.value == "E3"
|
||||
assert EvidenceConfidenceEnum.E4.value == "E4"
|
||||
|
||||
def test_truth_status_enum_values(self):
|
||||
assert EvidenceTruthStatusEnum.GENERATED.value == "generated"
|
||||
assert EvidenceTruthStatusEnum.UPLOADED.value == "uploaded"
|
||||
assert EvidenceTruthStatusEnum.OBSERVED.value == "observed"
|
||||
assert EvidenceTruthStatusEnum.VALIDATED_INTERNAL.value == "validated_internal"
|
||||
assert EvidenceTruthStatusEnum.REJECTED.value == "rejected"
|
||||
assert EvidenceTruthStatusEnum.PROVIDED_TO_AUDITOR.value == "provided_to_auditor"
|
||||
assert EvidenceTruthStatusEnum.ACCEPTED_BY_AUDITOR.value == "accepted_by_auditor"
|
||||
@@ -0,0 +1,528 @@
|
||||
"""Tests for Anti-Fake-Evidence Phase 2.
|
||||
|
||||
~35 tests covering:
|
||||
- Audit trail extension (evidence review/create logging)
|
||||
- Assertion engine (extraction, CRUD, verify, summary)
|
||||
- Four-Eyes review (domain check, first/second review, same-person reject)
|
||||
- UI badge data (response schema includes new fields)
|
||||
"""
|
||||
|
||||
from datetime import datetime, timedelta
|
||||
from unittest.mock import MagicMock, patch
|
||||
from fastapi import FastAPI
|
||||
from fastapi.testclient import TestClient
|
||||
|
||||
from compliance.api.evidence_routes import (
|
||||
router as evidence_router,
|
||||
_requires_four_eyes,
|
||||
_classify_confidence,
|
||||
_classify_truth_status,
|
||||
)
|
||||
from compliance.api.assertion_routes import router as assertion_router
|
||||
from compliance.services.assertion_engine import extract_assertions, _classify_sentence
|
||||
from compliance.db.models import (
|
||||
EvidenceConfidenceEnum,
|
||||
EvidenceTruthStatusEnum,
|
||||
ControlStatusEnum,
|
||||
AssertionDB,
|
||||
)
|
||||
from classroom_engine.database import get_db
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# App setup with mocked DB dependency
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
app = FastAPI()
|
||||
app.include_router(evidence_router)
|
||||
app.include_router(assertion_router)
|
||||
|
||||
mock_db = MagicMock()
|
||||
|
||||
|
||||
def override_get_db():
|
||||
yield mock_db
|
||||
|
||||
|
||||
app.dependency_overrides[get_db] = override_get_db
|
||||
client = TestClient(app)
|
||||
|
||||
EVIDENCE_UUID = "eeee0002-aaaa-bbbb-cccc-ffffffffffff"
|
||||
CONTROL_UUID = "cccc0002-aaaa-bbbb-cccc-dddddddddddd"
|
||||
ASSERTION_UUID = "aaaa0002-bbbb-cccc-dddd-eeeeeeeeeeee"
|
||||
NOW = datetime(2026, 3, 23, 14, 0, 0)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Helpers
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def make_evidence(overrides=None):
|
||||
e = MagicMock()
|
||||
e.id = EVIDENCE_UUID
|
||||
e.control_id = CONTROL_UUID
|
||||
e.evidence_type = "test_results"
|
||||
e.title = "Phase 2 Test Evidence"
|
||||
e.description = "Testing four-eyes"
|
||||
e.artifact_url = "https://ci.example.com/artifact"
|
||||
e.artifact_path = None
|
||||
e.artifact_hash = "abc123"
|
||||
e.file_size_bytes = None
|
||||
e.mime_type = None
|
||||
e.status = MagicMock()
|
||||
e.status.value = "valid"
|
||||
e.uploaded_by = None
|
||||
e.source = "api"
|
||||
e.ci_job_id = None
|
||||
e.valid_from = NOW
|
||||
e.valid_until = NOW + timedelta(days=90)
|
||||
e.collected_at = NOW
|
||||
e.created_at = NOW
|
||||
e.confidence_level = EvidenceConfidenceEnum.E1
|
||||
e.truth_status = EvidenceTruthStatusEnum.UPLOADED
|
||||
e.generation_mode = None
|
||||
e.may_be_used_as_evidence = True
|
||||
e.reviewed_by = None
|
||||
e.reviewed_at = None
|
||||
# Phase 2 fields
|
||||
e.approval_status = "none"
|
||||
e.first_reviewer = None
|
||||
e.first_reviewed_at = None
|
||||
e.second_reviewer = None
|
||||
e.second_reviewed_at = None
|
||||
e.requires_four_eyes = False
|
||||
if overrides:
|
||||
for k, v in overrides.items():
|
||||
setattr(e, k, v)
|
||||
return e
|
||||
|
||||
|
||||
def make_assertion(overrides=None):
|
||||
a = MagicMock()
|
||||
a.id = ASSERTION_UUID
|
||||
a.tenant_id = "tenant-001"
|
||||
a.entity_type = "control"
|
||||
a.entity_id = CONTROL_UUID
|
||||
a.sentence_text = "Test assertion sentence"
|
||||
a.sentence_index = 0
|
||||
a.assertion_type = "assertion"
|
||||
a.evidence_ids = []
|
||||
a.confidence = 0.0
|
||||
a.normative_tier = "pflicht"
|
||||
a.verified_by = None
|
||||
a.verified_at = None
|
||||
a.created_at = NOW
|
||||
a.updated_at = NOW
|
||||
if overrides:
|
||||
for k, v in overrides.items():
|
||||
setattr(a, k, v)
|
||||
return a
|
||||
|
||||
|
||||
# ===========================================================================
|
||||
# 1. TestAuditTrailExtension
|
||||
# ===========================================================================
|
||||
|
||||
class TestAuditTrailExtension:
|
||||
"""Test that evidence review and create log audit trail entries."""
|
||||
|
||||
def test_review_evidence_logs_audit_trail(self):
|
||||
evidence = make_evidence()
|
||||
mock_db.reset_mock()
|
||||
mock_db.query.return_value.filter.return_value.first.return_value = evidence
|
||||
mock_db.refresh.return_value = None
|
||||
|
||||
resp = client.patch(
|
||||
f"/evidence/{EVIDENCE_UUID}/review",
|
||||
json={"confidence_level": "E2", "reviewed_by": "auditor@test.com"},
|
||||
)
|
||||
assert resp.status_code == 200
|
||||
# db.add should be called for audit trail entries
|
||||
assert mock_db.add.called
|
||||
|
||||
def test_review_evidence_records_old_and_new_confidence(self):
|
||||
evidence = make_evidence({"confidence_level": EvidenceConfidenceEnum.E1})
|
||||
mock_db.reset_mock()
|
||||
mock_db.query.return_value.filter.return_value.first.return_value = evidence
|
||||
mock_db.refresh.return_value = None
|
||||
|
||||
resp = client.patch(
|
||||
f"/evidence/{EVIDENCE_UUID}/review",
|
||||
json={"confidence_level": "E3", "reviewed_by": "reviewer@test.com"},
|
||||
)
|
||||
assert resp.status_code == 200
|
||||
|
||||
def test_review_evidence_records_truth_status_change(self):
|
||||
evidence = make_evidence({"truth_status": EvidenceTruthStatusEnum.UPLOADED})
|
||||
mock_db.reset_mock()
|
||||
mock_db.query.return_value.filter.return_value.first.return_value = evidence
|
||||
mock_db.refresh.return_value = None
|
||||
|
||||
resp = client.patch(
|
||||
f"/evidence/{EVIDENCE_UUID}/review",
|
||||
json={"truth_status": "validated_internal", "reviewed_by": "reviewer@test.com"},
|
||||
)
|
||||
assert resp.status_code == 200
|
||||
|
||||
def test_review_nonexistent_evidence_returns_404(self):
|
||||
mock_db.reset_mock()
|
||||
mock_db.query.return_value.filter.return_value.first.return_value = None
|
||||
|
||||
resp = client.patch(
|
||||
"/evidence/nonexistent/review",
|
||||
json={"reviewed_by": "someone"},
|
||||
)
|
||||
assert resp.status_code == 404
|
||||
|
||||
def test_reject_evidence_logs_audit_trail(self):
|
||||
evidence = make_evidence()
|
||||
mock_db.reset_mock()
|
||||
mock_db.query.return_value.filter.return_value.first.return_value = evidence
|
||||
mock_db.refresh.return_value = None
|
||||
|
||||
resp = client.patch(
|
||||
f"/evidence/{EVIDENCE_UUID}/reject",
|
||||
json={"reviewed_by": "auditor@test.com", "rejection_reason": "Fake evidence"},
|
||||
)
|
||||
assert resp.status_code == 200
|
||||
data = resp.json()
|
||||
assert data["approval_status"] == "rejected"
|
||||
|
||||
def test_reject_nonexistent_evidence_returns_404(self):
|
||||
mock_db.reset_mock()
|
||||
mock_db.query.return_value.filter.return_value.first.return_value = None
|
||||
|
||||
resp = client.patch(
|
||||
"/evidence/nonexistent/reject",
|
||||
json={"reviewed_by": "someone"},
|
||||
)
|
||||
assert resp.status_code == 404
|
||||
|
||||
def test_audit_trail_query_endpoint(self):
|
||||
mock_db.reset_mock()
|
||||
trail_entry = MagicMock()
|
||||
trail_entry.id = "trail-001"
|
||||
trail_entry.entity_type = "evidence"
|
||||
trail_entry.entity_id = EVIDENCE_UUID
|
||||
trail_entry.entity_name = "Test"
|
||||
trail_entry.action = "review"
|
||||
trail_entry.field_changed = "confidence_level"
|
||||
trail_entry.old_value = "E1"
|
||||
trail_entry.new_value = "E2"
|
||||
trail_entry.change_summary = None
|
||||
trail_entry.performed_by = "auditor"
|
||||
trail_entry.performed_at = NOW
|
||||
trail_entry.checksum = "abc"
|
||||
mock_db.query.return_value.filter.return_value.filter.return_value.order_by.return_value.limit.return_value.all.return_value = [trail_entry]
|
||||
|
||||
resp = client.get(f"/audit-trail?entity_type=evidence&entity_id={EVIDENCE_UUID}")
|
||||
assert resp.status_code == 200
|
||||
data = resp.json()
|
||||
assert data["total"] >= 1
|
||||
|
||||
def test_audit_trail_checksum_present(self):
|
||||
"""Audit trail entries should have a checksum for integrity."""
|
||||
from compliance.api.audit_trail_utils import create_signature
|
||||
sig = create_signature("evidence|123|review|user@test.com")
|
||||
assert len(sig) == 64 # SHA-256 hex digest
|
||||
|
||||
|
||||
# ===========================================================================
|
||||
# 2. TestAssertionEngine
|
||||
# ===========================================================================
|
||||
|
||||
class TestAssertionEngine:
|
||||
"""Test assertion extraction and classification."""
|
||||
|
||||
def test_pflicht_sentence_classified_as_assertion(self):
|
||||
result = _classify_sentence("Die Organisation muss ein ISMS implementieren.")
|
||||
assert result == ("assertion", "pflicht")
|
||||
|
||||
def test_empfehlung_sentence_classified(self):
|
||||
result = _classify_sentence("Die Organisation sollte regelmäßige Audits durchführen.")
|
||||
assert result == ("assertion", "empfehlung")
|
||||
|
||||
def test_kann_sentence_classified(self):
|
||||
result = _classify_sentence("Optional kann ein externes Audit durchgeführt werden.")
|
||||
assert result == ("assertion", "kann")
|
||||
|
||||
def test_rationale_sentence_classified(self):
|
||||
result = _classify_sentence("Dies ist erforderlich, weil Datenverlust schwere Folgen hat.")
|
||||
assert result == ("rationale", None)
|
||||
|
||||
def test_fact_sentence_with_evidence_keyword(self):
|
||||
result = _classify_sentence("Das Zertifikat wurde am 15.03.2026 ausgestellt.")
|
||||
assert result == ("fact", None)
|
||||
|
||||
def test_extract_assertions_splits_sentences(self):
|
||||
text = "Die Organisation muss Daten schützen. Sie sollte regelmäßig prüfen."
|
||||
results = extract_assertions(text, "control", "ctrl-001")
|
||||
assert len(results) == 2
|
||||
assert results[0]["assertion_type"] == "assertion"
|
||||
assert results[0]["normative_tier"] == "pflicht"
|
||||
assert results[1]["normative_tier"] == "empfehlung"
|
||||
|
||||
def test_extract_assertions_empty_text(self):
|
||||
results = extract_assertions("", "control", "ctrl-001")
|
||||
assert results == []
|
||||
|
||||
def test_extract_assertions_single_sentence(self):
|
||||
results = extract_assertions("Der Betreiber muss ein Audit durchführen.", "control", "ctrl-001")
|
||||
assert len(results) == 1
|
||||
assert results[0]["normative_tier"] == "pflicht"
|
||||
|
||||
def test_mixed_text_with_rationale(self):
|
||||
text = "Die Organisation muss ein ISMS implementieren. Dies ist notwendig, weil Compliance gefordert ist."
|
||||
results = extract_assertions(text, "control", "ctrl-001")
|
||||
assert len(results) == 2
|
||||
types = [r["assertion_type"] for r in results]
|
||||
assert "assertion" in types
|
||||
assert "rationale" in types
|
||||
|
||||
def test_assertion_crud_create(self):
|
||||
mock_db.reset_mock()
|
||||
mock_db.refresh.return_value = None
|
||||
# Mock the added object to return proper values
|
||||
def side_effect_add(obj):
|
||||
obj.id = ASSERTION_UUID
|
||||
obj.created_at = NOW
|
||||
obj.updated_at = NOW
|
||||
obj.sentence_index = 0
|
||||
obj.confidence = 0.0
|
||||
mock_db.add.side_effect = side_effect_add
|
||||
|
||||
resp = client.post(
|
||||
"/assertions?tenant_id=tenant-001",
|
||||
json={
|
||||
"entity_type": "control",
|
||||
"entity_id": CONTROL_UUID,
|
||||
"sentence_text": "Die Organisation muss ein ISMS implementieren.",
|
||||
"assertion_type": "assertion",
|
||||
"normative_tier": "pflicht",
|
||||
},
|
||||
)
|
||||
assert resp.status_code == 200
|
||||
|
||||
def test_assertion_verify_endpoint(self):
|
||||
a = make_assertion()
|
||||
mock_db.reset_mock()
|
||||
mock_db.query.return_value.filter.return_value.first.return_value = a
|
||||
mock_db.refresh.return_value = None
|
||||
|
||||
resp = client.post(f"/assertions/{ASSERTION_UUID}/verify?verified_by=auditor@test.com")
|
||||
assert resp.status_code == 200
|
||||
assert a.assertion_type == "fact"
|
||||
assert a.verified_by == "auditor@test.com"
|
||||
|
||||
def test_assertion_summary(self):
|
||||
mock_db.reset_mock()
|
||||
a1 = make_assertion({"assertion_type": "assertion", "verified_by": None})
|
||||
a2 = make_assertion({"assertion_type": "fact", "verified_by": "user"})
|
||||
a3 = make_assertion({"assertion_type": "rationale", "verified_by": None})
|
||||
mock_db.query.return_value.filter.return_value.filter.return_value.filter.return_value.all.return_value = [a1, a2, a3]
|
||||
# Direct .all() for no-filter case
|
||||
mock_db.query.return_value.all.return_value = [a1, a2, a3]
|
||||
|
||||
resp = client.get("/assertions/summary")
|
||||
assert resp.status_code == 200
|
||||
data = resp.json()
|
||||
assert data["total_assertions"] == 3
|
||||
assert data["total_facts"] == 1
|
||||
assert data["total_rationale"] == 1
|
||||
assert data["unverified_count"] == 1
|
||||
|
||||
|
||||
# ===========================================================================
|
||||
# 3. TestFourEyesReview
|
||||
# ===========================================================================
|
||||
|
||||
class TestFourEyesReview:
|
||||
"""Test Four-Eyes review process."""
|
||||
|
||||
def test_gov_domain_requires_four_eyes(self):
|
||||
assert _requires_four_eyes("gov") is True
|
||||
|
||||
def test_priv_domain_requires_four_eyes(self):
|
||||
assert _requires_four_eyes("priv") is True
|
||||
|
||||
def test_ops_domain_does_not_require_four_eyes(self):
|
||||
assert _requires_four_eyes("ops") is False
|
||||
|
||||
def test_sdlc_domain_does_not_require_four_eyes(self):
|
||||
assert _requires_four_eyes("sdlc") is False
|
||||
|
||||
def test_first_review_sets_first_approved(self):
|
||||
evidence = make_evidence({
|
||||
"requires_four_eyes": True,
|
||||
"approval_status": "pending_first",
|
||||
})
|
||||
mock_db.reset_mock()
|
||||
mock_db.query.return_value.filter.return_value.first.return_value = evidence
|
||||
mock_db.refresh.return_value = None
|
||||
|
||||
resp = client.patch(
|
||||
f"/evidence/{EVIDENCE_UUID}/review",
|
||||
json={"reviewed_by": "reviewer1@test.com"},
|
||||
)
|
||||
assert resp.status_code == 200
|
||||
assert evidence.first_reviewer == "reviewer1@test.com"
|
||||
assert evidence.approval_status == "first_approved"
|
||||
|
||||
def test_second_review_different_person_approves(self):
|
||||
evidence = make_evidence({
|
||||
"requires_four_eyes": True,
|
||||
"approval_status": "first_approved",
|
||||
"first_reviewer": "reviewer1@test.com",
|
||||
})
|
||||
mock_db.reset_mock()
|
||||
mock_db.query.return_value.filter.return_value.first.return_value = evidence
|
||||
mock_db.refresh.return_value = None
|
||||
|
||||
resp = client.patch(
|
||||
f"/evidence/{EVIDENCE_UUID}/review",
|
||||
json={"reviewed_by": "reviewer2@test.com"},
|
||||
)
|
||||
assert resp.status_code == 200
|
||||
assert evidence.second_reviewer == "reviewer2@test.com"
|
||||
assert evidence.approval_status == "approved"
|
||||
|
||||
def test_same_person_second_review_rejected(self):
|
||||
evidence = make_evidence({
|
||||
"requires_four_eyes": True,
|
||||
"approval_status": "first_approved",
|
||||
"first_reviewer": "reviewer1@test.com",
|
||||
})
|
||||
mock_db.reset_mock()
|
||||
mock_db.query.return_value.filter.return_value.first.return_value = evidence
|
||||
|
||||
resp = client.patch(
|
||||
f"/evidence/{EVIDENCE_UUID}/review",
|
||||
json={"reviewed_by": "reviewer1@test.com"},
|
||||
)
|
||||
assert resp.status_code == 400
|
||||
assert "different" in resp.json()["detail"].lower()
|
||||
|
||||
def test_already_approved_blocked(self):
|
||||
evidence = make_evidence({
|
||||
"requires_four_eyes": True,
|
||||
"approval_status": "approved",
|
||||
})
|
||||
mock_db.reset_mock()
|
||||
mock_db.query.return_value.filter.return_value.first.return_value = evidence
|
||||
|
||||
resp = client.patch(
|
||||
f"/evidence/{EVIDENCE_UUID}/review",
|
||||
json={"reviewed_by": "reviewer3@test.com"},
|
||||
)
|
||||
assert resp.status_code == 400
|
||||
assert "already" in resp.json()["detail"].lower()
|
||||
|
||||
def test_rejected_evidence_cannot_be_reviewed(self):
|
||||
evidence = make_evidence({
|
||||
"requires_four_eyes": True,
|
||||
"approval_status": "rejected",
|
||||
})
|
||||
mock_db.reset_mock()
|
||||
mock_db.query.return_value.filter.return_value.first.return_value = evidence
|
||||
|
||||
resp = client.patch(
|
||||
f"/evidence/{EVIDENCE_UUID}/review",
|
||||
json={"reviewed_by": "reviewer@test.com"},
|
||||
)
|
||||
assert resp.status_code == 400
|
||||
|
||||
def test_reject_endpoint(self):
|
||||
evidence = make_evidence({"requires_four_eyes": True})
|
||||
mock_db.reset_mock()
|
||||
mock_db.query.return_value.filter.return_value.first.return_value = evidence
|
||||
mock_db.refresh.return_value = None
|
||||
|
||||
resp = client.patch(
|
||||
f"/evidence/{EVIDENCE_UUID}/reject",
|
||||
json={"reviewed_by": "auditor@test.com", "rejection_reason": "Not authentic"},
|
||||
)
|
||||
assert resp.status_code == 200
|
||||
assert evidence.approval_status == "rejected"
|
||||
|
||||
|
||||
# ===========================================================================
|
||||
# 4. TestUIBadgeData
|
||||
# ===========================================================================
|
||||
|
||||
class TestUIBadgeData:
|
||||
"""Test that evidence response includes all Phase 2 fields."""
|
||||
|
||||
def test_evidence_response_includes_approval_status(self):
|
||||
evidence = make_evidence({
|
||||
"approval_status": "first_approved",
|
||||
"first_reviewer": "reviewer1@test.com",
|
||||
"first_reviewed_at": NOW,
|
||||
"requires_four_eyes": True,
|
||||
})
|
||||
mock_db.reset_mock()
|
||||
mock_db.query.return_value.filter.return_value.first.return_value = evidence
|
||||
mock_db.refresh.return_value = None
|
||||
|
||||
resp = client.patch(
|
||||
f"/evidence/{EVIDENCE_UUID}/review",
|
||||
json={"reviewed_by": "reviewer2@test.com"},
|
||||
)
|
||||
assert resp.status_code == 200
|
||||
data = resp.json()
|
||||
assert "approval_status" in data
|
||||
assert "requires_four_eyes" in data
|
||||
assert data["requires_four_eyes"] is True
|
||||
|
||||
def test_evidence_response_includes_four_eyes_fields(self):
|
||||
evidence = make_evidence({
|
||||
"requires_four_eyes": True,
|
||||
"approval_status": "approved",
|
||||
"first_reviewer": "r1@test.com",
|
||||
"first_reviewed_at": NOW,
|
||||
"second_reviewer": "r2@test.com",
|
||||
"second_reviewed_at": NOW,
|
||||
})
|
||||
mock_db.reset_mock()
|
||||
mock_db.query.return_value.filter.return_value.first.return_value = evidence
|
||||
|
||||
# Use list endpoint
|
||||
mock_db.query.return_value.filter.return_value.all.return_value = [evidence]
|
||||
mock_db.query.return_value.all.return_value = [evidence]
|
||||
|
||||
# Direct test via _build_evidence_response
|
||||
from compliance.api.evidence_routes import _build_evidence_response
|
||||
resp = _build_evidence_response(evidence)
|
||||
assert resp.approval_status == "approved"
|
||||
assert resp.first_reviewer == "r1@test.com"
|
||||
assert resp.second_reviewer == "r2@test.com"
|
||||
assert resp.requires_four_eyes is True
|
||||
|
||||
def test_assertion_response_schema(self):
|
||||
a = make_assertion()
|
||||
mock_db.reset_mock()
|
||||
mock_db.query.return_value.filter.return_value.first.return_value = a
|
||||
|
||||
resp = client.get(f"/assertions/{ASSERTION_UUID}")
|
||||
assert resp.status_code == 200
|
||||
data = resp.json()
|
||||
assert "assertion_type" in data
|
||||
assert "normative_tier" in data
|
||||
assert "evidence_ids" in data
|
||||
assert "verified_by" in data
|
||||
|
||||
def test_evidence_response_includes_confidence_and_truth(self):
|
||||
evidence = make_evidence({
|
||||
"confidence_level": EvidenceConfidenceEnum.E3,
|
||||
"truth_status": EvidenceTruthStatusEnum.OBSERVED,
|
||||
})
|
||||
from compliance.api.evidence_routes import _build_evidence_response
|
||||
resp = _build_evidence_response(evidence)
|
||||
assert resp.confidence_level == "E3"
|
||||
assert resp.truth_status == "observed"
|
||||
|
||||
def test_evidence_response_none_four_eyes_fields_default(self):
|
||||
evidence = make_evidence()
|
||||
from compliance.api.evidence_routes import _build_evidence_response
|
||||
resp = _build_evidence_response(evidence)
|
||||
assert resp.approval_status == "none"
|
||||
assert resp.requires_four_eyes is False
|
||||
assert resp.first_reviewer is None
|
||||
@@ -0,0 +1,191 @@
|
||||
"""Tests for Anti-Fake-Evidence Phase 3: Enforcement.
|
||||
|
||||
~8 tests covering:
|
||||
- Evidence distribution endpoint (confidence counts, four-eyes pending)
|
||||
- Dashboard multi-score presence
|
||||
"""
|
||||
|
||||
from datetime import datetime, timedelta
|
||||
from unittest.mock import MagicMock, patch, PropertyMock
|
||||
from fastapi import FastAPI
|
||||
from fastapi.testclient import TestClient
|
||||
|
||||
from compliance.api.dashboard_routes import router as dashboard_router
|
||||
from compliance.db.models import EvidenceConfidenceEnum, EvidenceTruthStatusEnum
|
||||
from classroom_engine.database import get_db
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# App setup with mocked DB dependency
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
app = FastAPI()
|
||||
app.include_router(dashboard_router)
|
||||
|
||||
mock_db = MagicMock()
|
||||
|
||||
|
||||
def override_get_db():
|
||||
yield mock_db
|
||||
|
||||
|
||||
app.dependency_overrides[get_db] = override_get_db
|
||||
client = TestClient(app)
|
||||
|
||||
NOW = datetime(2026, 3, 23, 14, 0, 0)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Helpers
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def make_evidence(confidence="E1", requires_four_eyes=False, approval_status="none"):
|
||||
e = MagicMock()
|
||||
e.confidence_level = MagicMock()
|
||||
e.confidence_level.value = confidence
|
||||
e.requires_four_eyes = requires_four_eyes
|
||||
e.approval_status = approval_status
|
||||
return e
|
||||
|
||||
|
||||
# ===========================================================================
|
||||
# 1. TestEvidenceDistributionEndpoint
|
||||
# ===========================================================================
|
||||
|
||||
class TestEvidenceDistributionEndpoint:
|
||||
"""Test GET /dashboard/evidence-distribution endpoint."""
|
||||
|
||||
def _setup_evidence(self, evidence_list):
|
||||
"""Configure mock DB to return evidence list via EvidenceRepository."""
|
||||
mock_db.reset_mock()
|
||||
# EvidenceRepository(db).get_all() internally does db.query(...).all()
|
||||
# We patch the EvidenceRepository class to return our list
|
||||
return evidence_list
|
||||
|
||||
@patch("compliance.api.dashboard_routes.EvidenceRepository")
|
||||
def test_empty_db_returns_zero_counts(self, mock_repo_cls):
|
||||
mock_repo = MagicMock()
|
||||
mock_repo.get_all.return_value = []
|
||||
mock_repo_cls.return_value = mock_repo
|
||||
|
||||
resp = client.get("/dashboard/evidence-distribution")
|
||||
assert resp.status_code == 200
|
||||
data = resp.json()
|
||||
assert data["total"] == 0
|
||||
assert data["four_eyes_pending"] == 0
|
||||
assert data["by_confidence"] == {"E0": 0, "E1": 0, "E2": 0, "E3": 0, "E4": 0}
|
||||
|
||||
@patch("compliance.api.dashboard_routes.EvidenceRepository")
|
||||
def test_counts_by_confidence_level(self, mock_repo_cls):
|
||||
evidence = [
|
||||
make_evidence("E0"),
|
||||
make_evidence("E1"),
|
||||
make_evidence("E1"),
|
||||
make_evidence("E2"),
|
||||
make_evidence("E3"),
|
||||
make_evidence("E3"),
|
||||
make_evidence("E3"),
|
||||
make_evidence("E4"),
|
||||
]
|
||||
mock_repo = MagicMock()
|
||||
mock_repo.get_all.return_value = evidence
|
||||
mock_repo_cls.return_value = mock_repo
|
||||
|
||||
resp = client.get("/dashboard/evidence-distribution")
|
||||
assert resp.status_code == 200
|
||||
data = resp.json()
|
||||
assert data["total"] == 8
|
||||
assert data["by_confidence"]["E0"] == 1
|
||||
assert data["by_confidence"]["E1"] == 2
|
||||
assert data["by_confidence"]["E2"] == 1
|
||||
assert data["by_confidence"]["E3"] == 3
|
||||
assert data["by_confidence"]["E4"] == 1
|
||||
|
||||
@patch("compliance.api.dashboard_routes.EvidenceRepository")
|
||||
def test_four_eyes_pending_count(self, mock_repo_cls):
|
||||
evidence = [
|
||||
make_evidence("E1", requires_four_eyes=True, approval_status="pending_first"),
|
||||
make_evidence("E2", requires_four_eyes=True, approval_status="first_approved"),
|
||||
make_evidence("E2", requires_four_eyes=True, approval_status="approved"),
|
||||
make_evidence("E1", requires_four_eyes=True, approval_status="rejected"),
|
||||
make_evidence("E1", requires_four_eyes=False, approval_status="none"),
|
||||
]
|
||||
mock_repo = MagicMock()
|
||||
mock_repo.get_all.return_value = evidence
|
||||
mock_repo_cls.return_value = mock_repo
|
||||
|
||||
resp = client.get("/dashboard/evidence-distribution")
|
||||
assert resp.status_code == 200
|
||||
data = resp.json()
|
||||
# pending_first and first_approved are pending; approved and rejected are not
|
||||
assert data["four_eyes_pending"] == 2
|
||||
assert data["total"] == 5
|
||||
|
||||
@patch("compliance.api.dashboard_routes.EvidenceRepository")
|
||||
def test_null_confidence_defaults_to_e1(self, mock_repo_cls):
|
||||
e = MagicMock()
|
||||
e.confidence_level = None
|
||||
e.requires_four_eyes = False
|
||||
e.approval_status = "none"
|
||||
|
||||
mock_repo = MagicMock()
|
||||
mock_repo.get_all.return_value = [e]
|
||||
mock_repo_cls.return_value = mock_repo
|
||||
|
||||
resp = client.get("/dashboard/evidence-distribution")
|
||||
assert resp.status_code == 200
|
||||
data = resp.json()
|
||||
assert data["by_confidence"]["E1"] == 1
|
||||
assert data["total"] == 1
|
||||
|
||||
@patch("compliance.api.dashboard_routes.EvidenceRepository")
|
||||
def test_all_four_eyes_approved_zero_pending(self, mock_repo_cls):
|
||||
evidence = [
|
||||
make_evidence("E2", requires_four_eyes=True, approval_status="approved"),
|
||||
make_evidence("E3", requires_four_eyes=True, approval_status="approved"),
|
||||
]
|
||||
mock_repo = MagicMock()
|
||||
mock_repo.get_all.return_value = evidence
|
||||
mock_repo_cls.return_value = mock_repo
|
||||
|
||||
resp = client.get("/dashboard/evidence-distribution")
|
||||
assert resp.status_code == 200
|
||||
data = resp.json()
|
||||
assert data["four_eyes_pending"] == 0
|
||||
|
||||
|
||||
# ===========================================================================
|
||||
# 2. TestDashboardMultiScore
|
||||
# ===========================================================================
|
||||
|
||||
class TestDashboardMultiScore:
|
||||
"""Test that dashboard response includes multi_score."""
|
||||
|
||||
def test_dashboard_response_schema_includes_multi_score(self):
|
||||
"""DashboardResponse schema must include the multi_score field."""
|
||||
from compliance.api.schemas import DashboardResponse
|
||||
fields = DashboardResponse.model_fields
|
||||
assert "multi_score" in fields, "DashboardResponse must have multi_score field"
|
||||
|
||||
def test_multi_score_schema_has_required_fields(self):
|
||||
"""MultiDimensionalScore schema should have all 7 fields."""
|
||||
from compliance.api.schemas import MultiDimensionalScore
|
||||
fields = MultiDimensionalScore.model_fields
|
||||
required = [
|
||||
"requirement_coverage",
|
||||
"evidence_strength",
|
||||
"validation_quality",
|
||||
"evidence_freshness",
|
||||
"control_effectiveness",
|
||||
"overall_readiness",
|
||||
"hard_blocks",
|
||||
]
|
||||
for field in required:
|
||||
assert field in fields, f"Missing field: {field}"
|
||||
|
||||
def test_multi_score_default_values(self):
|
||||
"""MultiDimensionalScore defaults should be sensible."""
|
||||
from compliance.api.schemas import MultiDimensionalScore
|
||||
score = MultiDimensionalScore()
|
||||
assert score.overall_readiness == 0.0
|
||||
assert score.hard_blocks == []
|
||||
assert score.requirement_coverage == 0.0
|
||||
@@ -0,0 +1,277 @@
|
||||
"""Tests for Anti-Fake-Evidence Phase 4a: Traceability Matrix.
|
||||
|
||||
6 tests covering:
|
||||
- Empty DB returns empty controls + zero summary
|
||||
- Nested structure: Control → Evidence → Assertions
|
||||
- Assertions appear under correct evidence
|
||||
- Coverage flags computed correctly
|
||||
- Control without evidence has correct coverage
|
||||
- Summary counts match
|
||||
"""
|
||||
|
||||
from datetime import datetime
|
||||
from unittest.mock import MagicMock, patch
|
||||
from fastapi import FastAPI
|
||||
from fastapi.testclient import TestClient
|
||||
|
||||
from compliance.api.dashboard_routes import router as dashboard_router
|
||||
from classroom_engine.database import get_db
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# App setup with mocked DB dependency
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
app = FastAPI()
|
||||
app.include_router(dashboard_router)
|
||||
|
||||
mock_db = MagicMock()
|
||||
|
||||
|
||||
def override_get_db():
|
||||
yield mock_db
|
||||
|
||||
|
||||
app.dependency_overrides[get_db] = override_get_db
|
||||
client = TestClient(app)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Helpers
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def make_control(id="c1", control_id="CTRL-001", title="Test Control", status="pass", domain="gov"):
|
||||
ctrl = MagicMock()
|
||||
ctrl.id = id
|
||||
ctrl.control_id = control_id
|
||||
ctrl.title = title
|
||||
ctrl.status = MagicMock()
|
||||
ctrl.status.value = status
|
||||
ctrl.domain = MagicMock()
|
||||
ctrl.domain.value = domain
|
||||
return ctrl
|
||||
|
||||
|
||||
def make_evidence(id="e1", control_id="c1", title="Evidence 1", evidence_type="scan_report",
|
||||
confidence="E2", status="valid"):
|
||||
e = MagicMock()
|
||||
e.id = id
|
||||
e.control_id = control_id
|
||||
e.title = title
|
||||
e.evidence_type = evidence_type
|
||||
e.confidence_level = MagicMock()
|
||||
e.confidence_level.value = confidence
|
||||
e.status = MagicMock()
|
||||
e.status.value = status
|
||||
return e
|
||||
|
||||
|
||||
def make_assertion(id="a1", entity_id="e1", sentence_text="System encrypts data at rest.",
|
||||
assertion_type="assertion", confidence=0.85, verified_by=None):
|
||||
a = MagicMock()
|
||||
a.id = id
|
||||
a.entity_id = entity_id
|
||||
a.sentence_text = sentence_text
|
||||
a.assertion_type = assertion_type
|
||||
a.confidence = confidence
|
||||
a.verified_by = verified_by
|
||||
return a
|
||||
|
||||
|
||||
# ===========================================================================
|
||||
# Tests
|
||||
# ===========================================================================
|
||||
|
||||
class TestTraceabilityMatrix:
|
||||
"""Test GET /dashboard/traceability-matrix endpoint."""
|
||||
|
||||
@patch("compliance.api.dashboard_routes.EvidenceRepository")
|
||||
@patch("compliance.api.dashboard_routes.ControlRepository")
|
||||
def test_empty_db_returns_empty_matrix(self, mock_ctrl_cls, mock_ev_cls):
|
||||
"""Empty DB should return zero controls and zero summary counts."""
|
||||
mock_ctrl = MagicMock()
|
||||
mock_ctrl.get_all.return_value = []
|
||||
mock_ctrl_cls.return_value = mock_ctrl
|
||||
|
||||
mock_ev = MagicMock()
|
||||
mock_ev.get_all.return_value = []
|
||||
mock_ev_cls.return_value = mock_ev
|
||||
|
||||
# Mock db.query(AssertionDB).filter(...).all()
|
||||
mock_db.reset_mock()
|
||||
mock_query = MagicMock()
|
||||
mock_query.filter.return_value.all.return_value = []
|
||||
mock_db.query.return_value = mock_query
|
||||
|
||||
resp = client.get("/dashboard/traceability-matrix")
|
||||
assert resp.status_code == 200
|
||||
data = resp.json()
|
||||
assert data["controls"] == []
|
||||
assert data["summary"]["total_controls"] == 0
|
||||
assert data["summary"]["covered_controls"] == 0
|
||||
assert data["summary"]["fully_verified"] == 0
|
||||
assert data["summary"]["uncovered_controls"] == 0
|
||||
|
||||
@patch("compliance.api.dashboard_routes.EvidenceRepository")
|
||||
@patch("compliance.api.dashboard_routes.ControlRepository")
|
||||
def test_nested_structure(self, mock_ctrl_cls, mock_ev_cls):
|
||||
"""Control with evidence and assertions should return nested structure."""
|
||||
ctrl = make_control(id="c1", control_id="PRIV-001", title="Privacy Control")
|
||||
ev = make_evidence(id="e1", control_id="c1", confidence="E3")
|
||||
assertion = make_assertion(id="a1", entity_id="e1", verified_by="auditor@example.com")
|
||||
|
||||
mock_ctrl = MagicMock()
|
||||
mock_ctrl.get_all.return_value = [ctrl]
|
||||
mock_ctrl_cls.return_value = mock_ctrl
|
||||
|
||||
mock_ev = MagicMock()
|
||||
mock_ev.get_all.return_value = [ev]
|
||||
mock_ev_cls.return_value = mock_ev
|
||||
|
||||
mock_db.reset_mock()
|
||||
mock_query = MagicMock()
|
||||
mock_query.filter.return_value.all.return_value = [assertion]
|
||||
mock_db.query.return_value = mock_query
|
||||
|
||||
resp = client.get("/dashboard/traceability-matrix")
|
||||
assert resp.status_code == 200
|
||||
data = resp.json()
|
||||
|
||||
assert len(data["controls"]) == 1
|
||||
c = data["controls"][0]
|
||||
assert c["control_id"] == "PRIV-001"
|
||||
assert len(c["evidence"]) == 1
|
||||
assert c["evidence"][0]["confidence_level"] == "E3"
|
||||
assert len(c["evidence"][0]["assertions"]) == 1
|
||||
assert c["evidence"][0]["assertions"][0]["verified"] is True
|
||||
|
||||
@patch("compliance.api.dashboard_routes.EvidenceRepository")
|
||||
@patch("compliance.api.dashboard_routes.ControlRepository")
|
||||
def test_assertions_grouped_under_correct_evidence(self, mock_ctrl_cls, mock_ev_cls):
|
||||
"""Assertions should only appear under the evidence they reference."""
|
||||
ctrl = make_control(id="c1")
|
||||
ev1 = make_evidence(id="e1", control_id="c1", title="Evidence A")
|
||||
ev2 = make_evidence(id="e2", control_id="c1", title="Evidence B")
|
||||
a1 = make_assertion(id="a1", entity_id="e1", sentence_text="Assertion for E1")
|
||||
a2 = make_assertion(id="a2", entity_id="e2", sentence_text="Assertion for E2")
|
||||
a3 = make_assertion(id="a3", entity_id="e2", sentence_text="Second assertion for E2")
|
||||
|
||||
mock_ctrl = MagicMock()
|
||||
mock_ctrl.get_all.return_value = [ctrl]
|
||||
mock_ctrl_cls.return_value = mock_ctrl
|
||||
|
||||
mock_ev = MagicMock()
|
||||
mock_ev.get_all.return_value = [ev1, ev2]
|
||||
mock_ev_cls.return_value = mock_ev
|
||||
|
||||
mock_db.reset_mock()
|
||||
mock_query = MagicMock()
|
||||
mock_query.filter.return_value.all.return_value = [a1, a2, a3]
|
||||
mock_db.query.return_value = mock_query
|
||||
|
||||
resp = client.get("/dashboard/traceability-matrix")
|
||||
assert resp.status_code == 200
|
||||
data = resp.json()
|
||||
|
||||
c = data["controls"][0]
|
||||
ev1_data = next(e for e in c["evidence"] if e["id"] == "e1")
|
||||
ev2_data = next(e for e in c["evidence"] if e["id"] == "e2")
|
||||
assert len(ev1_data["assertions"]) == 1
|
||||
assert len(ev2_data["assertions"]) == 2
|
||||
|
||||
@patch("compliance.api.dashboard_routes.EvidenceRepository")
|
||||
@patch("compliance.api.dashboard_routes.ControlRepository")
|
||||
def test_coverage_flags_correct(self, mock_ctrl_cls, mock_ev_cls):
|
||||
"""Coverage flags should reflect evidence, assertions, and verification state."""
|
||||
ctrl = make_control(id="c1")
|
||||
ev = make_evidence(id="e1", control_id="c1", confidence="E2")
|
||||
# One verified, one not
|
||||
a1 = make_assertion(id="a1", entity_id="e1", verified_by="alice")
|
||||
a2 = make_assertion(id="a2", entity_id="e1", verified_by=None)
|
||||
|
||||
mock_ctrl = MagicMock()
|
||||
mock_ctrl.get_all.return_value = [ctrl]
|
||||
mock_ctrl_cls.return_value = mock_ctrl
|
||||
|
||||
mock_ev = MagicMock()
|
||||
mock_ev.get_all.return_value = [ev]
|
||||
mock_ev_cls.return_value = mock_ev
|
||||
|
||||
mock_db.reset_mock()
|
||||
mock_query = MagicMock()
|
||||
mock_query.filter.return_value.all.return_value = [a1, a2]
|
||||
mock_db.query.return_value = mock_query
|
||||
|
||||
resp = client.get("/dashboard/traceability-matrix")
|
||||
assert resp.status_code == 200
|
||||
|
||||
cov = resp.json()["controls"][0]["coverage"]
|
||||
assert cov["has_evidence"] is True
|
||||
assert cov["has_assertions"] is True
|
||||
assert cov["all_assertions_verified"] is False # a2 not verified
|
||||
assert cov["min_confidence_level"] == "E2"
|
||||
|
||||
@patch("compliance.api.dashboard_routes.EvidenceRepository")
|
||||
@patch("compliance.api.dashboard_routes.ControlRepository")
|
||||
def test_coverage_without_evidence(self, mock_ctrl_cls, mock_ev_cls):
|
||||
"""Control with no evidence should have all coverage flags False/None."""
|
||||
ctrl = make_control(id="c1")
|
||||
|
||||
mock_ctrl = MagicMock()
|
||||
mock_ctrl.get_all.return_value = [ctrl]
|
||||
mock_ctrl_cls.return_value = mock_ctrl
|
||||
|
||||
mock_ev = MagicMock()
|
||||
mock_ev.get_all.return_value = []
|
||||
mock_ev_cls.return_value = mock_ev
|
||||
|
||||
mock_db.reset_mock()
|
||||
mock_query = MagicMock()
|
||||
mock_query.filter.return_value.all.return_value = []
|
||||
mock_db.query.return_value = mock_query
|
||||
|
||||
resp = client.get("/dashboard/traceability-matrix")
|
||||
assert resp.status_code == 200
|
||||
|
||||
cov = resp.json()["controls"][0]["coverage"]
|
||||
assert cov["has_evidence"] is False
|
||||
assert cov["has_assertions"] is False
|
||||
assert cov["all_assertions_verified"] is False
|
||||
assert cov["min_confidence_level"] is None
|
||||
|
||||
@patch("compliance.api.dashboard_routes.EvidenceRepository")
|
||||
@patch("compliance.api.dashboard_routes.ControlRepository")
|
||||
def test_summary_counts(self, mock_ctrl_cls, mock_ev_cls):
|
||||
"""Summary should count total, covered, fully verified, and uncovered controls."""
|
||||
# c1: has evidence + verified assertions → fully verified
|
||||
# c2: has evidence but no assertions → covered, not fully verified
|
||||
# c3: no evidence → uncovered
|
||||
c1 = make_control(id="c1", control_id="C-001")
|
||||
c2 = make_control(id="c2", control_id="C-002")
|
||||
c3 = make_control(id="c3", control_id="C-003")
|
||||
|
||||
ev1 = make_evidence(id="e1", control_id="c1", confidence="E3")
|
||||
ev2 = make_evidence(id="e2", control_id="c2", confidence="E1")
|
||||
|
||||
a1 = make_assertion(id="a1", entity_id="e1", verified_by="auditor")
|
||||
|
||||
mock_ctrl = MagicMock()
|
||||
mock_ctrl.get_all.return_value = [c1, c2, c3]
|
||||
mock_ctrl_cls.return_value = mock_ctrl
|
||||
|
||||
mock_ev = MagicMock()
|
||||
mock_ev.get_all.return_value = [ev1, ev2]
|
||||
mock_ev_cls.return_value = mock_ev
|
||||
|
||||
mock_db.reset_mock()
|
||||
mock_query = MagicMock()
|
||||
mock_query.filter.return_value.all.return_value = [a1]
|
||||
mock_db.query.return_value = mock_query
|
||||
|
||||
resp = client.get("/dashboard/traceability-matrix")
|
||||
assert resp.status_code == 200
|
||||
|
||||
summary = resp.json()["summary"]
|
||||
assert summary["total_controls"] == 3
|
||||
assert summary["covered_controls"] == 2
|
||||
assert summary["fully_verified"] == 1
|
||||
assert summary["uncovered_controls"] == 1
|
||||
@@ -0,0 +1,440 @@
|
||||
"""Tests for Batch Dedup Runner (batch_dedup_runner.py).
|
||||
|
||||
Covers:
|
||||
- quality_score(): Richness ranking
|
||||
- BatchDedupRunner._sub_group_by_merge_hint(): Composite key grouping
|
||||
- Master selection (highest quality score wins)
|
||||
- Duplicate linking (mark + parent-link transfer)
|
||||
- Dry run mode (no DB changes)
|
||||
- Cross-group pass
|
||||
- Progress reporting / stats
|
||||
"""
|
||||
|
||||
import json
|
||||
import pytest
|
||||
from unittest.mock import MagicMock, AsyncMock, patch, call
|
||||
|
||||
from compliance.services.batch_dedup_runner import (
|
||||
quality_score,
|
||||
BatchDedupRunner,
|
||||
DEDUP_COLLECTION,
|
||||
)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# quality_score TESTS
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestQualityScore:
|
||||
"""Quality scoring: richer controls should score higher."""
|
||||
|
||||
def test_empty_control(self):
|
||||
score = quality_score({})
|
||||
assert score == 0.0
|
||||
|
||||
def test_requirements_weight(self):
|
||||
score = quality_score({"requirements": json.dumps(["r1", "r2", "r3"])})
|
||||
assert score == pytest.approx(6.0) # 3 * 2.0
|
||||
|
||||
def test_test_procedure_weight(self):
|
||||
score = quality_score({"test_procedure": json.dumps(["t1", "t2"])})
|
||||
assert score == pytest.approx(3.0) # 2 * 1.5
|
||||
|
||||
def test_evidence_weight(self):
|
||||
score = quality_score({"evidence": json.dumps(["e1"])})
|
||||
assert score == pytest.approx(1.0) # 1 * 1.0
|
||||
|
||||
def test_objective_weight_capped(self):
|
||||
short = quality_score({"objective": "x" * 100})
|
||||
long = quality_score({"objective": "x" * 1000})
|
||||
assert short == pytest.approx(0.5) # 100/200
|
||||
assert long == pytest.approx(3.0) # capped at 3.0
|
||||
|
||||
def test_combined_score(self):
|
||||
control = {
|
||||
"requirements": json.dumps(["r1", "r2"]),
|
||||
"test_procedure": json.dumps(["t1"]),
|
||||
"evidence": json.dumps(["e1", "e2"]),
|
||||
"objective": "x" * 400,
|
||||
}
|
||||
# 2*2 + 1*1.5 + 2*1.0 + min(400/200, 3) = 4 + 1.5 + 2 + 2 = 9.5
|
||||
assert quality_score(control) == pytest.approx(9.5)
|
||||
|
||||
def test_json_string_vs_list(self):
|
||||
"""Both JSON strings and already-parsed lists should work."""
|
||||
a = quality_score({"requirements": json.dumps(["r1", "r2"])})
|
||||
b = quality_score({"requirements": '["r1", "r2"]'})
|
||||
assert a == b
|
||||
|
||||
def test_null_fields(self):
|
||||
"""None values should not crash."""
|
||||
score = quality_score({
|
||||
"requirements": None,
|
||||
"test_procedure": None,
|
||||
"evidence": None,
|
||||
"objective": None,
|
||||
})
|
||||
assert score == 0.0
|
||||
|
||||
def test_ranking_order(self):
|
||||
"""Rich control should rank above sparse control."""
|
||||
rich = {
|
||||
"requirements": json.dumps(["r1", "r2", "r3"]),
|
||||
"test_procedure": json.dumps(["t1", "t2"]),
|
||||
"evidence": json.dumps(["e1"]),
|
||||
"objective": "A comprehensive objective for this control.",
|
||||
}
|
||||
sparse = {
|
||||
"requirements": json.dumps(["r1"]),
|
||||
"objective": "Short",
|
||||
}
|
||||
assert quality_score(rich) > quality_score(sparse)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Sub-grouping TESTS
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestSubGrouping:
|
||||
def _make_runner(self):
|
||||
db = MagicMock()
|
||||
return BatchDedupRunner(db=db)
|
||||
|
||||
def test_groups_by_merge_hint(self):
|
||||
runner = self._make_runner()
|
||||
controls = [
|
||||
{"uuid": "a", "merge_group_hint": "implement:mfa:none"},
|
||||
{"uuid": "b", "merge_group_hint": "implement:mfa:none"},
|
||||
{"uuid": "c", "merge_group_hint": "test:firewall:periodic"},
|
||||
]
|
||||
groups = runner._sub_group_by_merge_hint(controls)
|
||||
assert len(groups) == 2
|
||||
assert len(groups["implement:mfa:none"]) == 2
|
||||
assert len(groups["test:firewall:periodic"]) == 1
|
||||
|
||||
def test_empty_hint_gets_own_group(self):
|
||||
runner = self._make_runner()
|
||||
controls = [
|
||||
{"uuid": "x", "merge_group_hint": ""},
|
||||
{"uuid": "y", "merge_group_hint": ""},
|
||||
]
|
||||
groups = runner._sub_group_by_merge_hint(controls)
|
||||
# Each empty-hint control gets its own group
|
||||
assert len(groups) == 2
|
||||
|
||||
def test_single_control_single_group(self):
|
||||
runner = self._make_runner()
|
||||
controls = [
|
||||
{"uuid": "a", "merge_group_hint": "implement:mfa:none"},
|
||||
]
|
||||
groups = runner._sub_group_by_merge_hint(controls)
|
||||
assert len(groups) == 1
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Master Selection TESTS
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestMasterSelection:
|
||||
"""Best quality score should become master."""
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_highest_score_is_master(self):
|
||||
"""In a group, the control with highest quality_score is master."""
|
||||
db = MagicMock()
|
||||
db.execute = MagicMock()
|
||||
db.commit = MagicMock()
|
||||
# Mock parent link transfer query
|
||||
db.execute.return_value.fetchall.return_value = []
|
||||
|
||||
runner = BatchDedupRunner(db=db)
|
||||
|
||||
sparse = _make_control("s1", reqs=1, hint="implement:mfa:none",
|
||||
title="MFA implementiert")
|
||||
rich = _make_control("r1", reqs=5, tests=3, evidence=2,
|
||||
hint="implement:mfa:none", title="MFA implementiert")
|
||||
medium = _make_control("m1", reqs=2, tests=1,
|
||||
hint="implement:mfa:none", title="MFA implementiert")
|
||||
|
||||
controls = [sparse, medium, rich]
|
||||
|
||||
# All have same title → all should be title-identical linked
|
||||
with patch("compliance.services.batch_dedup_runner.get_embedding",
|
||||
new_callable=AsyncMock, return_value=[0.1] * 1024), \
|
||||
patch("compliance.services.batch_dedup_runner.qdrant_upsert",
|
||||
new_callable=AsyncMock, return_value=True):
|
||||
await runner._process_hint_group("implement:mfa:none", controls, dry_run=True)
|
||||
|
||||
# Rich should be master (1 master), others linked (2 linked)
|
||||
assert runner.stats["masters"] == 1
|
||||
assert runner.stats["linked"] == 2
|
||||
assert runner.stats["skipped_title_identical"] == 2
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Dry Run TESTS
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestDryRun:
|
||||
"""Dry run should compute stats but NOT modify DB."""
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_dry_run_no_db_writes(self):
|
||||
db = MagicMock()
|
||||
db.execute = MagicMock()
|
||||
db.commit = MagicMock()
|
||||
|
||||
runner = BatchDedupRunner(db=db)
|
||||
|
||||
controls = [
|
||||
_make_control("a", reqs=3, hint="implement:mfa:none", title="MFA impl"),
|
||||
_make_control("b", reqs=1, hint="implement:mfa:none", title="MFA impl"),
|
||||
]
|
||||
|
||||
with patch("compliance.services.batch_dedup_runner.get_embedding",
|
||||
new_callable=AsyncMock, return_value=[0.1] * 1024), \
|
||||
patch("compliance.services.batch_dedup_runner.qdrant_upsert",
|
||||
new_callable=AsyncMock, return_value=True):
|
||||
await runner._process_hint_group("implement:mfa:none", controls, dry_run=True)
|
||||
|
||||
assert runner.stats["masters"] == 1
|
||||
assert runner.stats["linked"] == 1
|
||||
# No commit for dedup operations in dry_run
|
||||
db.commit.assert_not_called()
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Parent Link Transfer TESTS
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestParentLinkTransfer:
|
||||
"""Parent links should migrate from duplicate to master."""
|
||||
|
||||
def test_transfer_parent_links(self):
|
||||
db = MagicMock()
|
||||
# Mock: duplicate has 2 parent links
|
||||
db.execute.return_value.fetchall.return_value = [
|
||||
("parent-1", "decomposition", 1.0, "DSGVO", "Art. 32", "obl-1"),
|
||||
("parent-2", "decomposition", 0.9, "NIS2", "Art. 21", "obl-2"),
|
||||
]
|
||||
|
||||
runner = BatchDedupRunner(db=db)
|
||||
count = runner._transfer_parent_links("master-uuid", "dup-uuid")
|
||||
|
||||
assert count == 2
|
||||
# Two INSERT calls for the transferred links
|
||||
assert db.execute.call_count == 3 # 1 SELECT + 2 INSERTs
|
||||
|
||||
def test_transfer_skips_self_reference(self):
|
||||
db = MagicMock()
|
||||
# Parent link points to master itself → should be skipped
|
||||
db.execute.return_value.fetchall.return_value = [
|
||||
("master-uuid", "decomposition", 1.0, "DSGVO", "Art. 32", "obl-1"),
|
||||
]
|
||||
|
||||
runner = BatchDedupRunner(db=db)
|
||||
count = runner._transfer_parent_links("master-uuid", "dup-uuid")
|
||||
|
||||
assert count == 0
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Title-identical Short-circuit TESTS
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestTitleIdenticalShortCircuit:
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_identical_titles_skip_embedding(self):
|
||||
"""Controls with identical titles in same hint group → direct link."""
|
||||
db = MagicMock()
|
||||
db.execute = MagicMock()
|
||||
db.commit = MagicMock()
|
||||
db.execute.return_value.fetchall.return_value = []
|
||||
|
||||
runner = BatchDedupRunner(db=db)
|
||||
|
||||
controls = [
|
||||
_make_control("m", reqs=3, hint="implement:mfa:none",
|
||||
title="MFA implementieren"),
|
||||
_make_control("c", reqs=1, hint="implement:mfa:none",
|
||||
title="MFA implementieren"),
|
||||
]
|
||||
|
||||
with patch("compliance.services.batch_dedup_runner.get_embedding",
|
||||
new_callable=AsyncMock) as mock_embed, \
|
||||
patch("compliance.services.batch_dedup_runner.qdrant_upsert",
|
||||
new_callable=AsyncMock, return_value=True):
|
||||
await runner._process_hint_group("implement:mfa:none", controls, dry_run=False)
|
||||
|
||||
# Embedding should only be called for the master (indexing), not for linking
|
||||
assert runner.stats["linked"] == 1
|
||||
assert runner.stats["skipped_title_identical"] == 1
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_different_titles_use_embedding(self):
|
||||
"""Controls with different titles should use embedding check."""
|
||||
db = MagicMock()
|
||||
db.execute = MagicMock()
|
||||
db.commit = MagicMock()
|
||||
db.execute.return_value.fetchall.return_value = []
|
||||
|
||||
runner = BatchDedupRunner(db=db)
|
||||
|
||||
controls = [
|
||||
_make_control("m", reqs=3, hint="implement:mfa:none",
|
||||
title="MFA implementieren fuer Admins"),
|
||||
_make_control("c", reqs=1, hint="implement:mfa:none",
|
||||
title="MFA einrichten fuer alle Benutzer"),
|
||||
]
|
||||
|
||||
with patch("compliance.services.batch_dedup_runner.get_embedding",
|
||||
new_callable=AsyncMock, return_value=[0.1] * 1024) as mock_embed, \
|
||||
patch("compliance.services.batch_dedup_runner.qdrant_upsert",
|
||||
new_callable=AsyncMock, return_value=True), \
|
||||
patch("compliance.services.batch_dedup_runner.qdrant_search_cross_regulation",
|
||||
new_callable=AsyncMock, return_value=[]):
|
||||
await runner._process_hint_group("implement:mfa:none", controls, dry_run=False)
|
||||
|
||||
# Different titles → embedding was called for both (master + candidate)
|
||||
assert mock_embed.call_count >= 2
|
||||
# No Qdrant results → linked anyway (same hint = same action+object)
|
||||
assert runner.stats["linked"] == 1
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Cross-Group Pass TESTS
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestCrossGroupPass:
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_cross_group_creates_link(self):
|
||||
db = MagicMock()
|
||||
db.commit = MagicMock()
|
||||
|
||||
# First call returns masters, subsequent calls return empty (for transfer)
|
||||
master_rows = [
|
||||
("uuid-1", "CTRL-001", "MFA implementieren",
|
||||
"implement:multi_factor_auth:none"),
|
||||
]
|
||||
call_count = {"n": 0}
|
||||
|
||||
def mock_execute(stmt, params=None):
|
||||
result = MagicMock()
|
||||
call_count["n"] += 1
|
||||
if call_count["n"] == 1:
|
||||
result.fetchall.return_value = master_rows
|
||||
else:
|
||||
result.fetchall.return_value = []
|
||||
return result
|
||||
|
||||
db.execute = mock_execute
|
||||
|
||||
runner = BatchDedupRunner(db=db)
|
||||
|
||||
cross_result = [{
|
||||
"score": 0.95,
|
||||
"payload": {
|
||||
"control_uuid": "uuid-2",
|
||||
"control_id": "CTRL-002",
|
||||
"merge_group_hint": "implement:mfa:continuous",
|
||||
},
|
||||
}]
|
||||
|
||||
with patch("compliance.services.batch_dedup_runner.get_embedding",
|
||||
new_callable=AsyncMock, return_value=[0.1] * 1024), \
|
||||
patch("compliance.services.batch_dedup_runner.qdrant_search_cross_regulation",
|
||||
new_callable=AsyncMock, return_value=cross_result):
|
||||
await runner._run_cross_group_pass()
|
||||
|
||||
assert runner.stats["cross_group_linked"] == 1
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Progress Stats TESTS
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestProgressStats:
|
||||
|
||||
def test_get_status(self):
|
||||
db = MagicMock()
|
||||
runner = BatchDedupRunner(db=db)
|
||||
runner.stats["masters"] = 42
|
||||
runner.stats["linked"] = 100
|
||||
runner._progress_phase = "phase1"
|
||||
runner._progress_count = 500
|
||||
runner._progress_total = 85000
|
||||
|
||||
status = runner.get_status()
|
||||
assert status["phase"] == "phase1"
|
||||
assert status["progress"] == 500
|
||||
assert status["total"] == 85000
|
||||
assert status["masters"] == 42
|
||||
assert status["linked"] == 100
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Route endpoint TESTS
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestBatchDedupRoutes:
|
||||
"""Test the batch-dedup API endpoints."""
|
||||
|
||||
def test_status_endpoint_not_running(self):
|
||||
from fastapi import FastAPI
|
||||
from fastapi.testclient import TestClient
|
||||
from compliance.api.crosswalk_routes import router
|
||||
|
||||
app = FastAPI()
|
||||
app.include_router(router, prefix="/api/compliance")
|
||||
client = TestClient(app)
|
||||
|
||||
with patch("compliance.api.crosswalk_routes.SessionLocal") as mock_session:
|
||||
mock_db = MagicMock()
|
||||
mock_session.return_value = mock_db
|
||||
mock_db.execute.return_value.fetchone.return_value = (85000, 0, 85000)
|
||||
|
||||
resp = client.get("/api/compliance/v1/canonical/migrate/batch-dedup/status")
|
||||
assert resp.status_code == 200
|
||||
data = resp.json()
|
||||
assert data["running"] is False
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# HELPERS
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def _make_control(
|
||||
prefix: str,
|
||||
reqs: int = 0,
|
||||
tests: int = 0,
|
||||
evidence: int = 0,
|
||||
hint: str = "",
|
||||
title: str = None,
|
||||
pattern_id: str = None,
|
||||
) -> dict:
|
||||
"""Build a mock control dict for testing."""
|
||||
return {
|
||||
"uuid": f"{prefix}-uuid",
|
||||
"control_id": f"CTRL-{prefix}",
|
||||
"title": title or f"Control {prefix}",
|
||||
"objective": f"Objective for {prefix}",
|
||||
"pattern_id": pattern_id,
|
||||
"requirements": json.dumps([f"r{i}" for i in range(reqs)]),
|
||||
"test_procedure": json.dumps([f"t{i}" for i in range(tests)]),
|
||||
"evidence": json.dumps([f"e{i}" for i in range(evidence)]),
|
||||
"release_state": "draft",
|
||||
"merge_group_hint": hint,
|
||||
"action_object_class": "",
|
||||
}
|
||||
@@ -1,17 +1,36 @@
|
||||
"""Tests for Canonical Control Library routes (canonical_control_routes.py)."""
|
||||
"""Tests for Canonical Control Library routes (canonical_control_routes.py).
|
||||
|
||||
Includes:
|
||||
- Model validation tests (FrameworkResponse, ControlResponse, etc.)
|
||||
- _control_row conversion tests
|
||||
- Server-side pagination, sorting, search, source filter tests
|
||||
- /controls-count and /controls-meta endpoint tests
|
||||
"""
|
||||
|
||||
import pytest
|
||||
from unittest.mock import MagicMock, patch
|
||||
from datetime import datetime, timezone
|
||||
|
||||
from fastapi import FastAPI
|
||||
from fastapi.testclient import TestClient
|
||||
|
||||
from compliance.api.canonical_control_routes import (
|
||||
FrameworkResponse,
|
||||
ControlResponse,
|
||||
SimilarityCheckRequest,
|
||||
SimilarityCheckResponse,
|
||||
_control_row,
|
||||
router,
|
||||
)
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# TestClient setup for endpoint tests
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
_app = FastAPI()
|
||||
_app.include_router(router, prefix="/api/compliance")
|
||||
_client = TestClient(_app)
|
||||
|
||||
|
||||
class TestFrameworkResponse:
|
||||
"""Tests for FrameworkResponse model."""
|
||||
@@ -175,6 +194,12 @@ class TestControlRowConversion:
|
||||
],
|
||||
"release_state": "draft",
|
||||
"tags": ["mfa"],
|
||||
"generation_strategy": "ungrouped",
|
||||
"parent_control_uuid": None,
|
||||
"parent_control_id": None,
|
||||
"parent_control_title": None,
|
||||
"decomposition_method": None,
|
||||
"pipeline_version": None,
|
||||
"created_at": now,
|
||||
"updated_at": now,
|
||||
}
|
||||
@@ -223,3 +248,300 @@ class TestControlRowConversion:
|
||||
result = _control_row(row)
|
||||
assert result["created_at"] is None
|
||||
assert result["updated_at"] is None
|
||||
|
||||
def test_generation_strategy_default(self):
|
||||
row = self._make_row()
|
||||
result = _control_row(row)
|
||||
assert result["generation_strategy"] == "ungrouped"
|
||||
|
||||
def test_generation_strategy_document_grouped(self):
|
||||
row = self._make_row(generation_strategy="document_grouped")
|
||||
result = _control_row(row)
|
||||
assert result["generation_strategy"] == "document_grouped"
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# ENDPOINT TESTS — Server-Side Pagination, Sort, Search, Source Filter
|
||||
# =============================================================================
|
||||
|
||||
def _make_mock_row(**overrides):
|
||||
"""Build a mock Row with all canonical_controls columns."""
|
||||
now = datetime.now(timezone.utc)
|
||||
defaults = {
|
||||
"id": "uuid-ctrl-1",
|
||||
"framework_id": "uuid-fw-1",
|
||||
"control_id": "AUTH-001",
|
||||
"title": "Test Control",
|
||||
"objective": "Test obj",
|
||||
"rationale": "Test rat",
|
||||
"scope": {},
|
||||
"requirements": ["Req 1"],
|
||||
"test_procedure": ["Test 1"],
|
||||
"evidence": [],
|
||||
"severity": "high",
|
||||
"risk_score": 3.0,
|
||||
"implementation_effort": "m",
|
||||
"evidence_confidence": None,
|
||||
"open_anchors": [],
|
||||
"release_state": "draft",
|
||||
"tags": [],
|
||||
"license_rule": 1,
|
||||
"source_original_text": None,
|
||||
"source_citation": None,
|
||||
"customer_visible": True,
|
||||
"verification_method": "automated",
|
||||
"category": "authentication",
|
||||
"target_audience": "developer",
|
||||
"generation_metadata": {},
|
||||
"generation_strategy": "ungrouped",
|
||||
"created_at": now,
|
||||
"updated_at": now,
|
||||
}
|
||||
defaults.update(overrides)
|
||||
mock = MagicMock()
|
||||
for k, v in defaults.items():
|
||||
setattr(mock, k, v)
|
||||
return mock
|
||||
|
||||
|
||||
def _session_returning(rows=None, scalar=None):
|
||||
"""Create a mock SessionLocal that returns rows or scalar."""
|
||||
db = MagicMock()
|
||||
result = MagicMock()
|
||||
if rows is not None:
|
||||
result.fetchall.return_value = rows
|
||||
if scalar is not None:
|
||||
result.scalar.return_value = scalar
|
||||
db.execute.return_value = result
|
||||
db.__enter__ = MagicMock(return_value=db)
|
||||
db.__exit__ = MagicMock(return_value=False)
|
||||
return db
|
||||
|
||||
|
||||
class TestListControlsPagination:
|
||||
"""GET /controls with limit/offset."""
|
||||
|
||||
@patch("compliance.api.canonical_control_routes.SessionLocal")
|
||||
def test_limit_param_in_sql(self, mock_cls):
|
||||
mock_cls.return_value = _session_returning(rows=[_make_mock_row()])
|
||||
resp = _client.get("/api/compliance/v1/canonical/controls?limit=10&offset=20")
|
||||
assert resp.status_code == 200
|
||||
sql = str(mock_cls.return_value.__enter__().execute.call_args[0][0].text)
|
||||
assert "LIMIT" in sql
|
||||
assert "OFFSET" in sql
|
||||
|
||||
@patch("compliance.api.canonical_control_routes.SessionLocal")
|
||||
def test_no_limit_by_default(self, mock_cls):
|
||||
mock_cls.return_value = _session_returning(rows=[])
|
||||
resp = _client.get("/api/compliance/v1/canonical/controls")
|
||||
assert resp.status_code == 200
|
||||
sql = str(mock_cls.return_value.__enter__().execute.call_args[0][0].text)
|
||||
assert "LIMIT" not in sql
|
||||
|
||||
|
||||
class TestListControlsSorting:
|
||||
"""GET /controls with sort/order."""
|
||||
|
||||
@patch("compliance.api.canonical_control_routes.SessionLocal")
|
||||
def test_sort_created_at_desc(self, mock_cls):
|
||||
mock_cls.return_value = _session_returning(rows=[])
|
||||
resp = _client.get("/api/compliance/v1/canonical/controls?sort=created_at&order=desc")
|
||||
assert resp.status_code == 200
|
||||
sql = str(mock_cls.return_value.__enter__().execute.call_args[0][0].text)
|
||||
assert "created_at DESC" in sql
|
||||
|
||||
@patch("compliance.api.canonical_control_routes.SessionLocal")
|
||||
def test_default_sort_control_id_asc(self, mock_cls):
|
||||
mock_cls.return_value = _session_returning(rows=[])
|
||||
resp = _client.get("/api/compliance/v1/canonical/controls")
|
||||
assert resp.status_code == 200
|
||||
sql = str(mock_cls.return_value.__enter__().execute.call_args[0][0].text)
|
||||
assert "control_id ASC" in sql
|
||||
|
||||
@patch("compliance.api.canonical_control_routes.SessionLocal")
|
||||
def test_sql_injection_in_sort_blocked(self, mock_cls):
|
||||
mock_cls.return_value = _session_returning(rows=[])
|
||||
resp = _client.get("/api/compliance/v1/canonical/controls?sort=1;DROP+TABLE")
|
||||
assert resp.status_code == 200
|
||||
sql = str(mock_cls.return_value.__enter__().execute.call_args[0][0].text)
|
||||
assert "DROP" not in sql
|
||||
assert "control_id" in sql
|
||||
|
||||
@patch("compliance.api.canonical_control_routes.SessionLocal")
|
||||
def test_sort_by_source(self, mock_cls):
|
||||
mock_cls.return_value = _session_returning(rows=[])
|
||||
resp = _client.get("/api/compliance/v1/canonical/controls?sort=source&order=asc")
|
||||
assert resp.status_code == 200
|
||||
sql = str(mock_cls.return_value.__enter__().execute.call_args[0][0].text)
|
||||
assert "source_citation" in sql
|
||||
assert "control_id ASC" in sql # secondary sort within source group
|
||||
|
||||
|
||||
class TestListControlsSearch:
|
||||
"""GET /controls with search."""
|
||||
|
||||
@patch("compliance.api.canonical_control_routes.SessionLocal")
|
||||
def test_search_uses_ilike(self, mock_cls):
|
||||
mock_cls.return_value = _session_returning(rows=[])
|
||||
resp = _client.get("/api/compliance/v1/canonical/controls?search=encryption")
|
||||
assert resp.status_code == 200
|
||||
sql = str(mock_cls.return_value.__enter__().execute.call_args[0][0].text)
|
||||
assert "ILIKE" in sql
|
||||
params = mock_cls.return_value.__enter__().execute.call_args[0][1]
|
||||
assert params["q"] == "%encryption%"
|
||||
|
||||
|
||||
class TestListControlsSourceFilter:
|
||||
"""GET /controls with source filter."""
|
||||
|
||||
@patch("compliance.api.canonical_control_routes.SessionLocal")
|
||||
def test_specific_source(self, mock_cls):
|
||||
mock_cls.return_value = _session_returning(rows=[])
|
||||
resp = _client.get("/api/compliance/v1/canonical/controls?source=DSGVO")
|
||||
assert resp.status_code == 200
|
||||
sql = str(mock_cls.return_value.__enter__().execute.call_args[0][0].text)
|
||||
assert "source_citation" in sql
|
||||
params = mock_cls.return_value.__enter__().execute.call_args[0][1]
|
||||
assert params["src"] == "DSGVO"
|
||||
|
||||
@patch("compliance.api.canonical_control_routes.SessionLocal")
|
||||
def test_no_source_filter(self, mock_cls):
|
||||
mock_cls.return_value = _session_returning(rows=[])
|
||||
resp = _client.get("/api/compliance/v1/canonical/controls?source=__none__")
|
||||
assert resp.status_code == 200
|
||||
sql = str(mock_cls.return_value.__enter__().execute.call_args[0][0].text)
|
||||
assert "IS NULL" in sql
|
||||
|
||||
|
||||
class TestControlsCount:
|
||||
"""GET /controls-count."""
|
||||
|
||||
@patch("compliance.api.canonical_control_routes.SessionLocal")
|
||||
def test_returns_total(self, mock_cls):
|
||||
mock_cls.return_value = _session_returning(scalar=42)
|
||||
resp = _client.get("/api/compliance/v1/canonical/controls-count")
|
||||
assert resp.status_code == 200
|
||||
assert resp.json() == {"total": 42}
|
||||
|
||||
@patch("compliance.api.canonical_control_routes.SessionLocal")
|
||||
def test_with_filters(self, mock_cls):
|
||||
mock_cls.return_value = _session_returning(scalar=5)
|
||||
resp = _client.get("/api/compliance/v1/canonical/controls-count?severity=critical&search=mfa")
|
||||
assert resp.status_code == 200
|
||||
assert resp.json() == {"total": 5}
|
||||
sql = str(mock_cls.return_value.__enter__().execute.call_args[0][0].text)
|
||||
assert "severity" in sql
|
||||
assert "ILIKE" in sql
|
||||
|
||||
|
||||
class TestControlsMeta:
|
||||
"""GET /controls-meta."""
|
||||
|
||||
@patch("compliance.api.canonical_control_routes.SessionLocal")
|
||||
def test_returns_structure(self, mock_cls):
|
||||
db = MagicMock()
|
||||
db.__enter__ = MagicMock(return_value=db)
|
||||
db.__exit__ = MagicMock(return_value=False)
|
||||
|
||||
# Faceted meta does many execute() calls — use a default mock
|
||||
scalar_r = MagicMock()
|
||||
scalar_r.scalar.return_value = 100
|
||||
scalar_r.fetchall.return_value = []
|
||||
db.execute.return_value = scalar_r
|
||||
mock_cls.return_value = db
|
||||
|
||||
resp = _client.get("/api/compliance/v1/canonical/controls-meta")
|
||||
assert resp.status_code == 200
|
||||
data = resp.json()
|
||||
assert data["total"] == 100
|
||||
assert isinstance(data["domains"], list)
|
||||
assert isinstance(data["sources"], list)
|
||||
assert "type_counts" in data
|
||||
assert "severity_counts" in data
|
||||
assert "verification_method_counts" in data
|
||||
assert "category_counts" in data
|
||||
assert "evidence_type_counts" in data
|
||||
assert "release_state_counts" in data
|
||||
|
||||
|
||||
class TestObligationDedup:
|
||||
"""Tests for obligation deduplication endpoints."""
|
||||
|
||||
@patch("compliance.api.canonical_control_routes.SessionLocal")
|
||||
def test_dedup_dry_run(self, mock_cls):
|
||||
db = MagicMock()
|
||||
db.__enter__ = MagicMock(return_value=db)
|
||||
db.__exit__ = MagicMock(return_value=False)
|
||||
mock_cls.return_value = db
|
||||
|
||||
# Mock: 2 duplicate groups
|
||||
dup_row1 = MagicMock(candidate_id="OC-AUTH-001-01", cnt=3)
|
||||
dup_row2 = MagicMock(candidate_id="OC-AUTH-001-02", cnt=2)
|
||||
|
||||
# Entries for group 1
|
||||
import uuid
|
||||
uid1 = uuid.uuid4()
|
||||
uid2 = uuid.uuid4()
|
||||
uid3 = uuid.uuid4()
|
||||
entry1 = MagicMock(id=uid1, candidate_id="OC-AUTH-001-01", obligation_text="Text A", release_state="composed", created_at=datetime(2026, 1, 1, tzinfo=timezone.utc))
|
||||
entry2 = MagicMock(id=uid2, candidate_id="OC-AUTH-001-01", obligation_text="Text B", release_state="composed", created_at=datetime(2026, 1, 2, tzinfo=timezone.utc))
|
||||
entry3 = MagicMock(id=uid3, candidate_id="OC-AUTH-001-01", obligation_text="Text C", release_state="composed", created_at=datetime(2026, 1, 3, tzinfo=timezone.utc))
|
||||
|
||||
# Entries for group 2
|
||||
uid4 = uuid.uuid4()
|
||||
uid5 = uuid.uuid4()
|
||||
entry4 = MagicMock(id=uid4, candidate_id="OC-AUTH-001-02", obligation_text="Text D", release_state="composed", created_at=datetime(2026, 1, 1, tzinfo=timezone.utc))
|
||||
entry5 = MagicMock(id=uid5, candidate_id="OC-AUTH-001-02", obligation_text="Text E", release_state="composed", created_at=datetime(2026, 1, 2, tzinfo=timezone.utc))
|
||||
|
||||
# Side effects: 1) dup groups, 2) total count, 3) entries grp1, 4) entries grp2
|
||||
mock_result_groups = MagicMock()
|
||||
mock_result_groups.fetchall.return_value = [dup_row1, dup_row2]
|
||||
mock_result_total = MagicMock()
|
||||
mock_result_total.scalar.return_value = 2
|
||||
mock_result_entries1 = MagicMock()
|
||||
mock_result_entries1.fetchall.return_value = [entry1, entry2, entry3]
|
||||
mock_result_entries2 = MagicMock()
|
||||
mock_result_entries2.fetchall.return_value = [entry4, entry5]
|
||||
|
||||
db.execute.side_effect = [mock_result_groups, mock_result_total, mock_result_entries1, mock_result_entries2]
|
||||
|
||||
resp = _client.post("/api/compliance/v1/canonical/obligations/dedup?dry_run=true")
|
||||
assert resp.status_code == 200
|
||||
data = resp.json()
|
||||
assert data["dry_run"] is True
|
||||
assert data["stats"]["total_duplicate_groups"] == 2
|
||||
assert data["stats"]["kept"] == 2
|
||||
assert data["stats"]["marked_duplicate"] == 3 # 2 from grp1 + 1 from grp2
|
||||
# Dry run: no commit
|
||||
db.commit.assert_not_called()
|
||||
|
||||
@patch("compliance.api.canonical_control_routes.SessionLocal")
|
||||
def test_dedup_stats(self, mock_cls):
|
||||
db = MagicMock()
|
||||
db.__enter__ = MagicMock(return_value=db)
|
||||
db.__exit__ = MagicMock(return_value=False)
|
||||
mock_cls.return_value = db
|
||||
|
||||
# total, by_state, dup_groups, removable
|
||||
mock_total = MagicMock()
|
||||
mock_total.scalar.return_value = 76046
|
||||
mock_states = MagicMock()
|
||||
mock_states.fetchall.return_value = [
|
||||
MagicMock(release_state="composed", cnt=41217),
|
||||
MagicMock(release_state="duplicate", cnt=34829),
|
||||
]
|
||||
mock_dup_groups = MagicMock()
|
||||
mock_dup_groups.scalar.return_value = 0
|
||||
mock_removable = MagicMock()
|
||||
mock_removable.scalar.return_value = 0
|
||||
|
||||
db.execute.side_effect = [mock_total, mock_states, mock_dup_groups, mock_removable]
|
||||
|
||||
resp = _client.get("/api/compliance/v1/canonical/obligations/dedup-stats")
|
||||
assert resp.status_code == 200
|
||||
data = resp.json()
|
||||
assert data["total_obligations"] == 76046
|
||||
assert data["by_state"]["composed"] == 41217
|
||||
assert data["by_state"]["duplicate"] == 34829
|
||||
assert data["pending_duplicate_groups"] == 0
|
||||
assert data["pending_removable_duplicates"] == 0
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user