merge: sync with origin/main, take upstream on conflicts

# Conflicts: # admin-compliance/lib/sdk/types.ts # admin-compliance/lib/sdk/vendor-compliance/types.ts
2026-04-16 16:26:48 +02:00
parent e04816cfe5 712fa8cb74
commit c43d9da6d0
352 changed files with 181673 additions and 2188 deletions
@@ -10,13 +10,15 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
    && rm -rf /var/lib/apt/lists/*

 # Copy requirements first for better caching
-COPY requirements.txt .
+COPY requirements.txt requirements-reranker.txt ./

 # Create virtual environment and install dependencies
 RUN python -m venv /opt/venv
 ENV PATH="/opt/venv/bin:$PATH"
 RUN pip install --no-cache-dir --upgrade pip && \
-    pip install --no-cache-dir -r requirements.txt
+    pip install --no-cache-dir -r requirements.txt && \
+    pip install --no-cache-dir -r requirements-reranker.txt || \
+    echo "WARNING: reranker dependencies not installed (torch/sentence-transformers)"

 # ---- Runtime stage ----
 FROM python:3.12-slim-bookworm
@@ -6,6 +6,8 @@ from .routes import router

 logger = logging.getLogger(__name__)

+_failed_routers: dict[str, str] = {}
+

 def _safe_import_router(module_name: str, attr: str = "router"):
    """Import a router module safely — log error but don't crash the whole app."""
@@ -14,6 +16,7 @@ def _safe_import_router(module_name: str, attr: str = "router"):
        return getattr(mod, attr)
    except Exception as e:
        logger.error("Failed to import %s: %s", module_name, e)
+        _failed_routers[module_name] = str(e)
        return None


@@ -53,6 +56,13 @@ _ROUTER_MODULES = [
    "wiki_routes",
    "canonical_control_routes",
    "control_generator_routes",
+    "crosswalk_routes",
+    "process_task_routes",
+    "evidence_check_routes",
+    "vvt_library_routes",
+    "tom_mapping_routes",
+    "llm_audit_routes",
+    "assertion_routes",
 ]

 _loaded_count = 0
@@ -0,0 +1,227 @@
+"""
+API routes for Assertion Engine (Anti-Fake-Evidence Phase 2).
+
+Endpoints:
+- /assertions: CRUD for assertions
+- /assertions/extract: Automatic extraction from entity text
+- /assertions/summary: Stats (total assertions, facts, unverified)
+"""
+
+import logging
+from datetime import datetime
+from typing import Optional
+
+from fastapi import APIRouter, Depends, HTTPException, Query
+from sqlalchemy.orm import Session
+
+from classroom_engine.database import get_db
+
+from ..db.models import AssertionDB
+from ..services.assertion_engine import extract_assertions
+from .schemas import (
+    AssertionCreate,
+    AssertionUpdate,
+    AssertionResponse,
+    AssertionListResponse,
+    AssertionSummaryResponse,
+    AssertionExtractRequest,
+)
+from .audit_trail_utils import log_audit_trail, generate_id
+
+logger = logging.getLogger(__name__)
+router = APIRouter(tags=["compliance-assertions"])
+
+
+def _build_assertion_response(a: AssertionDB) -> AssertionResponse:
+    return AssertionResponse(
+        id=a.id,
+        tenant_id=a.tenant_id,
+        entity_type=a.entity_type,
+        entity_id=a.entity_id,
+        sentence_text=a.sentence_text,
+        sentence_index=a.sentence_index,
+        assertion_type=a.assertion_type,
+        evidence_ids=a.evidence_ids or [],
+        confidence=a.confidence or 0.0,
+        normative_tier=a.normative_tier,
+        verified_by=a.verified_by,
+        verified_at=a.verified_at,
+        created_at=a.created_at,
+        updated_at=a.updated_at,
+    )
+
+
+@router.post("/assertions", response_model=AssertionResponse)
+async def create_assertion(
+    data: AssertionCreate,
+    tenant_id: Optional[str] = Query(None),
+    db: Session = Depends(get_db),
+):
+    """Create a single assertion manually."""
+    a = AssertionDB(
+        id=generate_id(),
+        tenant_id=tenant_id,
+        entity_type=data.entity_type,
+        entity_id=data.entity_id,
+        sentence_text=data.sentence_text,
+        assertion_type=data.assertion_type or "assertion",
+        evidence_ids=data.evidence_ids or [],
+        normative_tier=data.normative_tier,
+    )
+    db.add(a)
+    db.commit()
+    db.refresh(a)
+    return _build_assertion_response(a)
+
+
+@router.get("/assertions", response_model=AssertionListResponse)
+async def list_assertions(
+    entity_type: Optional[str] = Query(None),
+    entity_id: Optional[str] = Query(None),
+    assertion_type: Optional[str] = Query(None),
+    tenant_id: Optional[str] = Query(None),
+    limit: int = Query(100, ge=1, le=500),
+    db: Session = Depends(get_db),
+):
+    """List assertions with optional filters."""
+    query = db.query(AssertionDB)
+    if entity_type:
+        query = query.filter(AssertionDB.entity_type == entity_type)
+    if entity_id:
+        query = query.filter(AssertionDB.entity_id == entity_id)
+    if assertion_type:
+        query = query.filter(AssertionDB.assertion_type == assertion_type)
+    if tenant_id:
+        query = query.filter(AssertionDB.tenant_id == tenant_id)
+
+    total = query.count()
+    records = query.order_by(AssertionDB.sentence_index.asc()).limit(limit).all()
+
+    return AssertionListResponse(
+        assertions=[_build_assertion_response(a) for a in records],
+        total=total,
+    )
+
+
+@router.get("/assertions/summary", response_model=AssertionSummaryResponse)
+async def assertion_summary(
+    tenant_id: Optional[str] = Query(None),
+    entity_type: Optional[str] = Query(None),
+    entity_id: Optional[str] = Query(None),
+    db: Session = Depends(get_db),
+):
+    """Summary stats: total assertions, facts, rationale, unverified."""
+    query = db.query(AssertionDB)
+    if tenant_id:
+        query = query.filter(AssertionDB.tenant_id == tenant_id)
+    if entity_type:
+        query = query.filter(AssertionDB.entity_type == entity_type)
+    if entity_id:
+        query = query.filter(AssertionDB.entity_id == entity_id)
+
+    all_records = query.all()
+
+    total = len(all_records)
+    facts = sum(1 for a in all_records if a.assertion_type == "fact")
+    rationale = sum(1 for a in all_records if a.assertion_type == "rationale")
+    unverified = sum(1 for a in all_records if a.assertion_type == "assertion" and not a.verified_by)
+
+    return AssertionSummaryResponse(
+        total_assertions=total,
+        total_facts=facts,
+        total_rationale=rationale,
+        unverified_count=unverified,
+    )
+
+
+@router.get("/assertions/{assertion_id}", response_model=AssertionResponse)
+async def get_assertion(
+    assertion_id: str,
+    db: Session = Depends(get_db),
+):
+    """Get a single assertion by ID."""
+    a = db.query(AssertionDB).filter(AssertionDB.id == assertion_id).first()
+    if not a:
+        raise HTTPException(status_code=404, detail=f"Assertion {assertion_id} not found")
+    return _build_assertion_response(a)
+
+
+@router.put("/assertions/{assertion_id}", response_model=AssertionResponse)
+async def update_assertion(
+    assertion_id: str,
+    data: AssertionUpdate,
+    db: Session = Depends(get_db),
+):
+    """Update an assertion (e.g. link evidence, change type)."""
+    a = db.query(AssertionDB).filter(AssertionDB.id == assertion_id).first()
+    if not a:
+        raise HTTPException(status_code=404, detail=f"Assertion {assertion_id} not found")
+
+    update_fields = data.model_dump(exclude_unset=True)
+    for key, value in update_fields.items():
+        setattr(a, key, value)
+    a.updated_at = datetime.utcnow()
+    db.commit()
+    db.refresh(a)
+    return _build_assertion_response(a)
+
+
+@router.post("/assertions/{assertion_id}/verify", response_model=AssertionResponse)
+async def verify_assertion(
+    assertion_id: str,
+    verified_by: str = Query(...),
+    db: Session = Depends(get_db),
+):
+    """Mark an assertion as verified fact."""
+    a = db.query(AssertionDB).filter(AssertionDB.id == assertion_id).first()
+    if not a:
+        raise HTTPException(status_code=404, detail=f"Assertion {assertion_id} not found")
+
+    a.assertion_type = "fact"
+    a.verified_by = verified_by
+    a.verified_at = datetime.utcnow()
+    a.updated_at = datetime.utcnow()
+    db.commit()
+    db.refresh(a)
+    return _build_assertion_response(a)
+
+
+@router.post("/assertions/extract", response_model=AssertionListResponse)
+async def extract_assertions_endpoint(
+    data: AssertionExtractRequest,
+    tenant_id: Optional[str] = Query(None),
+    db: Session = Depends(get_db),
+):
+    """Extract assertions from free text and persist them."""
+    extracted = extract_assertions(
+        text=data.text,
+        entity_type=data.entity_type,
+        entity_id=data.entity_id,
+        tenant_id=tenant_id,
+    )
+
+    created = []
+    for item in extracted:
+        a = AssertionDB(
+            id=generate_id(),
+            tenant_id=item["tenant_id"],
+            entity_type=item["entity_type"],
+            entity_id=item["entity_id"],
+            sentence_text=item["sentence_text"],
+            sentence_index=item["sentence_index"],
+            assertion_type=item["assertion_type"],
+            evidence_ids=item["evidence_ids"],
+            normative_tier=item.get("normative_tier"),
+            confidence=item.get("confidence", 0.0),
+        )
+        db.add(a)
+        created.append(a)
+
+    db.commit()
+    for a in created:
+        db.refresh(a)
+
+    return AssertionListResponse(
+        assertions=[_build_assertion_response(a) for a in created],
+        total=len(created),
+    )
@@ -0,0 +1,53 @@
+"""Shared audit trail utilities.
+
+Extracted from isms_routes.py for reuse across evidence, control,
+and assertion routes.
+"""
+
+import hashlib
+import uuid
+from datetime import datetime
+
+from sqlalchemy.orm import Session
+
+from ..db.models import AuditTrailDB
+
+
+def generate_id() -> str:
+    """Generate a UUID string."""
+    return str(uuid.uuid4())
+
+
+def create_signature(data: str) -> str:
+    """Create SHA-256 signature."""
+    return hashlib.sha256(data.encode()).hexdigest()
+
+
+def log_audit_trail(
+    db: Session,
+    entity_type: str,
+    entity_id: str,
+    entity_name: str,
+    action: str,
+    performed_by: str,
+    field_changed: str = None,
+    old_value: str = None,
+    new_value: str = None,
+    change_summary: str = None,
+):
+    """Log an entry to the audit trail."""
+    trail = AuditTrailDB(
+        id=generate_id(),
+        entity_type=entity_type,
+        entity_id=entity_id,
+        entity_name=entity_name,
+        action=action,
+        field_changed=field_changed,
+        old_value=old_value,
+        new_value=new_value,
+        change_summary=change_summary,
+        performed_by=performed_by,
+        performed_at=datetime.utcnow(),
+        checksum=create_signature(f"{entity_type}|{entity_id}|{action}|{performed_by}"),
+    )
+    db.add(trail)
@@ -12,6 +12,7 @@ Endpoints:
  POST /v1/canonical/blocked-sources/cleanup      — Start cleanup workflow
 """

+import asyncio
 import json
 import logging
 from typing import Optional, List
@@ -25,7 +26,16 @@ from compliance.services.control_generator import (
    ControlGeneratorPipeline,
    GeneratorConfig,
    ALL_COLLECTIONS,
+    VALID_CATEGORIES,
+    VALID_DOMAINS,
+    _classify_regulation,
+    _detect_category,
+    _detect_domain,
+    _llm_local,
+    _parse_llm_json,
+    CATEGORY_LIST_STR,
 )
+from compliance.services.citation_backfill import CitationBackfill, BackfillResult
 from compliance.services.rag_client import get_rag_client

 logger = logging.getLogger(__name__)
@@ -40,9 +50,12 @@ class GenerateRequest(BaseModel):
    domain: Optional[str] = None
    collections: Optional[List[str]] = None
    max_controls: int = 50
+    max_chunks: int = 1000  # Default: process max 1000 chunks per job (respects document boundaries)
    batch_size: int = 5
    skip_web_search: bool = False
    dry_run: bool = False
+    regulation_filter: Optional[List[str]] = None  # Only process these regulation_code prefixes
+    skip_prefilter: bool = False  # Skip local LLM pre-filter, send all chunks to API


 class GenerateResponse(BaseModel):
@@ -55,6 +68,7 @@ class GenerateResponse(BaseModel):
    controls_needs_review: int = 0
    controls_too_close: int = 0
    controls_duplicates_found: int = 0
+    controls_qa_fixed: int = 0
    errors: list = []
    controls: list = []

@@ -89,42 +103,111 @@ class BlockedSourceResponse(BaseModel):
 # ENDPOINTS
 # =============================================================================

+async def _run_pipeline_background(config: GeneratorConfig, job_id: str):
+    """Run the pipeline in the background. Uses its own DB session."""
+    db = SessionLocal()
+    try:
+        config.existing_job_id = job_id
+        pipeline = ControlGeneratorPipeline(db=db, rag_client=get_rag_client())
+        result = await pipeline.run(config)
+        logger.info(
+            "Background generation job %s completed: %d controls from %d chunks",
+            job_id, result.controls_generated, result.total_chunks_scanned,
+        )
+    except Exception as e:
+        logger.error("Background generation job %s failed: %s", job_id, e)
+        # Update job as failed
+        try:
+            db.execute(
+                text("""
+                    UPDATE canonical_generation_jobs
+                    SET status = 'failed', errors = :errors, completed_at = NOW()
+                    WHERE id = CAST(:job_id AS uuid)
+                """),
+                {"job_id": job_id, "errors": json.dumps([str(e)])},
+            )
+            db.commit()
+        except Exception:
+            pass
+    finally:
+        db.close()
+
+
@router.post("/generate", response_model=GenerateResponse)
 async def start_generation(req: GenerateRequest):
-    """Start a control generation run."""
+    """Start a control generation run (runs in background).
+
+    Returns immediately with job_id. Use GET /generate/status/{job_id} to poll progress.
+    """
    config = GeneratorConfig(
        collections=req.collections,
        domain=req.domain,
        batch_size=req.batch_size,
        max_controls=req.max_controls,
+        max_chunks=req.max_chunks,
        skip_web_search=req.skip_web_search,
        dry_run=req.dry_run,
+        regulation_filter=req.regulation_filter,
+        skip_prefilter=req.skip_prefilter,
    )

+    if req.dry_run:
+        # Dry run: execute synchronously and return controls
+        db = SessionLocal()
+        try:
+            pipeline = ControlGeneratorPipeline(db=db, rag_client=get_rag_client())
+            result = await pipeline.run(config)
+            return GenerateResponse(
+                job_id=result.job_id,
+                status=result.status,
+                message=f"Dry run: {result.controls_generated} controls from {result.total_chunks_scanned} chunks",
+                total_chunks_scanned=result.total_chunks_scanned,
+                controls_generated=result.controls_generated,
+                controls_verified=result.controls_verified,
+                controls_needs_review=result.controls_needs_review,
+                controls_too_close=result.controls_too_close,
+                controls_duplicates_found=result.controls_duplicates_found,
+                errors=result.errors,
+                controls=result.controls,
+            )
+        except Exception as e:
+            logger.error("Dry run failed: %s", e)
+            raise HTTPException(status_code=500, detail=str(e))
+        finally:
+            db.close()
+
+    # Create job record first so we can return the ID
    db = SessionLocal()
    try:
-        pipeline = ControlGeneratorPipeline(db=db, rag_client=get_rag_client())
-        result = await pipeline.run(config)
-
-        return GenerateResponse(
-            job_id=result.job_id,
-            status=result.status,
-            message=f"Generated {result.controls_generated} controls from {result.total_chunks_scanned} chunks",
-            total_chunks_scanned=result.total_chunks_scanned,
-            controls_generated=result.controls_generated,
-            controls_verified=result.controls_verified,
-            controls_needs_review=result.controls_needs_review,
-            controls_too_close=result.controls_too_close,
-            controls_duplicates_found=result.controls_duplicates_found,
-            errors=result.errors,
-            controls=result.controls if req.dry_run else [],
+        result = db.execute(
+            text("""
+                INSERT INTO canonical_generation_jobs (status, config)
+                VALUES ('running', :config)
+                RETURNING id
+            """),
+            {"config": json.dumps(config.model_dump())},
        )
+        db.commit()
+        row = result.fetchone()
+        job_id = str(row[0]) if row else None
    except Exception as e:
-        logger.error("Generation failed: %s", e)
-        raise HTTPException(status_code=500, detail=str(e))
+        logger.error("Failed to create job: %s", e)
+        raise HTTPException(status_code=500, detail=f"Failed to create job: {e}")
    finally:
        db.close()

+    if not job_id:
+        raise HTTPException(status_code=500, detail="Failed to create job record")
+
+    # Launch pipeline in background
+    asyncio.create_task(_run_pipeline_background(config, job_id))
+
+    return GenerateResponse(
+        job_id=job_id,
+        status="running",
+        message="Generation started in background. Poll /generate/status/{job_id} for progress.",
+    )
+

@router.get("/generate/status/{job_id}")
 async def get_job_status(job_id: str):
@@ -132,7 +215,7 @@ async def get_job_status(job_id: str):
    db = SessionLocal()
    try:
        result = db.execute(
-            text("SELECT * FROM canonical_generation_jobs WHERE id = :id::uuid"),
+            text("SELECT * FROM canonical_generation_jobs WHERE id = CAST(:id AS uuid)"),
            {"id": job_id},
        )
        row = result.fetchone()
@@ -270,6 +353,188 @@ async def review_control(control_id: str, req: ReviewRequest):
        db.close()


+class BulkReviewRequest(BaseModel):
+    release_state: str  # Filter: which controls to bulk-review
+    action: str  # "approve" or "reject"
+    new_state: Optional[str] = None  # Override target state
+
+
+@router.post("/generate/bulk-review")
+async def bulk_review(req: BulkReviewRequest):
+    """Bulk review all controls matching a release_state filter.
+
+    Example: reject all needs_review → sets them to deprecated.
+    """
+    if req.release_state not in ("needs_review", "too_close", "duplicate"):
+        raise HTTPException(status_code=400, detail=f"Invalid filter state: {req.release_state}")
+
+    if req.action == "approve":
+        target = req.new_state or "draft"
+    elif req.action == "reject":
+        target = "deprecated"
+    else:
+        raise HTTPException(status_code=400, detail=f"Unknown action: {req.action}")
+
+    if target not in ("draft", "review", "approved", "deprecated", "needs_review"):
+        raise HTTPException(status_code=400, detail=f"Invalid target state: {target}")
+
+    db = SessionLocal()
+    try:
+        result = db.execute(
+            text("""
+                UPDATE canonical_controls
+                SET release_state = :target, updated_at = NOW()
+                WHERE release_state = :source
+                RETURNING control_id
+            """),
+            {"source": req.release_state, "target": target},
+        )
+        affected = [row[0] for row in result]
+        db.commit()
+
+        return {
+            "action": req.action,
+            "source_state": req.release_state,
+            "target_state": target,
+            "affected_count": len(affected),
+        }
+    finally:
+        db.close()
+
+
+class QAReclassifyRequest(BaseModel):
+    limit: int = 100  # How many controls to reclassify per run
+    dry_run: bool = True  # Preview only by default
+    filter_category: Optional[str] = None  # Only reclassify controls of this category
+    filter_domain_prefix: Optional[str] = None  # Only reclassify controls with this prefix
+
+
+@router.post("/generate/qa-reclassify")
+async def qa_reclassify(req: QAReclassifyRequest):
+    """Run QA reclassification on existing controls using local LLM.
+
+    Finds controls where keyword-detection disagrees with current category/domain,
+    then uses Ollama to determine the correct classification.
+    """
+    db = SessionLocal()
+    try:
+        # Load controls to check
+        where_clauses = ["release_state NOT IN ('deprecated')"]
+        params = {"limit": req.limit}
+        if req.filter_category:
+            where_clauses.append("category = :cat")
+            params["cat"] = req.filter_category
+        if req.filter_domain_prefix:
+            where_clauses.append("control_id LIKE :prefix")
+            params["prefix"] = f"{req.filter_domain_prefix}-%"
+
+        where_sql = " AND ".join(where_clauses)
+        rows = db.execute(
+            text(f"""
+                SELECT id, control_id, title, objective, category,
+                       COALESCE(requirements::text, '[]') as requirements,
+                       COALESCE(source_original_text, '') as source_text
+                FROM canonical_controls
+                WHERE {where_sql}
+                ORDER BY created_at DESC
+                LIMIT :limit
+            """),
+            params,
+        ).fetchall()
+
+        results = {"checked": 0, "mismatches": 0, "fixes": [], "errors": []}
+
+        for row in rows:
+            results["checked"] += 1
+            control_id = row[1]
+            title = row[2]
+            objective = row[3] or ""
+            current_category = row[4]
+            source_text = row[6] or objective
+
+            # Keyword detection on source text
+            kw_category = _detect_category(source_text) or _detect_category(objective)
+            kw_domain = _detect_domain(source_text)
+            current_prefix = control_id.split("-")[0] if "-" in control_id else ""
+
+            # Skip if keyword detection agrees with current classification
+            if kw_category == current_category and kw_domain == current_prefix:
+                continue
+
+            results["mismatches"] += 1
+
+            # Ask Ollama to arbitrate
+            try:
+                reqs_text = ""
+                try:
+                    reqs = json.loads(row[5])
+                    if isinstance(reqs, list):
+                        reqs_text = ", ".join(str(r) for r in reqs[:3])
+                except Exception:
+                    pass
+
+                prompt = f"""Pruefe dieses Compliance-Control auf korrekte Klassifizierung.
+
+Titel: {title[:100]}
+Ziel: {objective[:200]}
+Anforderungen: {reqs_text[:200]}
+
+Aktuelle Zuordnung: domain={current_prefix}, category={current_category}
+Keyword-Erkennung: domain={kw_domain}, category={kw_category}
+
+Welche Zuordnung ist korrekt? Antworte NUR als JSON:
+{{"domain": "KUERZEL", "category": "kategorie_name", "reason": "kurze Begruendung"}}
+
+Domains: AUTH=Authentifizierung, CRYP=Kryptographie, NET=Netzwerk, DATA=Datenschutz, LOG=Logging, ACC=Zugriffskontrolle, SEC=IT-Sicherheit, INC=Vorfallmanagement, AI=KI, COMP=Compliance, GOV=Behoerden, LAB=Arbeitsrecht, FIN=Finanzregulierung, TRD=Gewerbe, ENV=Umwelt, HLT=Gesundheit
+Kategorien: {CATEGORY_LIST_STR}"""
+
+                raw = await _llm_local(prompt)
+                data = _parse_llm_json(raw)
+                if not data:
+                    continue
+
+                qa_domain = data.get("domain", "").upper()
+                qa_category = data.get("category", "")
+                reason = data.get("reason", "")
+
+                fix_entry = {
+                    "control_id": control_id,
+                    "title": title[:80],
+                    "old_category": current_category,
+                    "old_domain": current_prefix,
+                    "new_category": qa_category if qa_category in VALID_CATEGORIES else current_category,
+                    "new_domain": qa_domain if qa_domain in VALID_DOMAINS else current_prefix,
+                    "reason": reason,
+                }
+
+                category_changed = qa_category in VALID_CATEGORIES and qa_category != current_category
+
+                if category_changed and not req.dry_run:
+                    db.execute(
+                        text("""
+                            UPDATE canonical_controls
+                            SET category = :category, updated_at = NOW()
+                            WHERE id = :id
+                        """),
+                        {"id": row[0], "category": qa_category},
+                    )
+                    fix_entry["applied"] = True
+                else:
+                    fix_entry["applied"] = False
+
+                results["fixes"].append(fix_entry)
+
+            except Exception as e:
+                results["errors"].append({"control_id": control_id, "error": str(e)})
+
+        if not req.dry_run:
+            db.commit()
+
+        return results
+    finally:
+        db.close()
+
+
@router.get("/generate/processed-stats")
 async def get_processed_stats():
    """Get processing statistics per collection."""
@@ -429,3 +694,407 @@ async def get_controls_customer_view(
        return {"controls": controls, "total": len(controls)}
    finally:
        db.close()
+
+
+# =============================================================================
+# CITATION BACKFILL
+# =============================================================================
+
+class BackfillRequest(BaseModel):
+    dry_run: bool = True  # Default to dry_run for safety
+    limit: int = 0  # 0 = all controls
+
+
+class BackfillResponse(BaseModel):
+    status: str
+    total_controls: int = 0
+    matched_hash: int = 0
+    matched_regex: int = 0
+    matched_llm: int = 0
+    unmatched: int = 0
+    updated: int = 0
+    errors: list = []
+
+
+_backfill_status: dict = {}
+
+
+async def _run_backfill_background(dry_run: bool, limit: int, backfill_id: str):
+    """Run backfill in background with own DB session."""
+    db = SessionLocal()
+    try:
+        backfill = CitationBackfill(db=db, rag_client=get_rag_client())
+        result = await backfill.run(dry_run=dry_run, limit=limit)
+        _backfill_status[backfill_id] = {
+            "status": "completed",
+            "total_controls": result.total_controls,
+            "matched_hash": result.matched_hash,
+            "matched_regex": result.matched_regex,
+            "matched_llm": result.matched_llm,
+            "unmatched": result.unmatched,
+            "updated": result.updated,
+            "errors": result.errors[:50],
+        }
+        logger.info("Backfill %s completed: %d updated", backfill_id, result.updated)
+    except Exception as e:
+        logger.error("Backfill %s failed: %s", backfill_id, e)
+        _backfill_status[backfill_id] = {"status": "failed", "errors": [str(e)]}
+    finally:
+        db.close()
+
+
+@router.post("/generate/backfill-citations", response_model=BackfillResponse)
+async def start_backfill(req: BackfillRequest):
+    """Backfill article/paragraph into existing control source_citations.
+
+    Uses 3-tier matching: hash lookup → regex parse → Ollama LLM.
+    Default is dry_run=True (preview only, no DB changes).
+    """
+    import uuid
+    backfill_id = str(uuid.uuid4())[:8]
+    _backfill_status[backfill_id] = {"status": "running"}
+
+    # Always run in background (RAG index build takes minutes)
+    asyncio.create_task(_run_backfill_background(req.dry_run, req.limit, backfill_id))
+    return BackfillResponse(
+        status=f"running (id={backfill_id})",
+    )
+
+
+@router.get("/generate/backfill-status/{backfill_id}")
+async def get_backfill_status(backfill_id: str):
+    """Get status of a backfill job."""
+    status = _backfill_status.get(backfill_id)
+    if not status:
+        raise HTTPException(status_code=404, detail="Backfill job not found")
+    return status
+
+
+# =============================================================================
+# DOMAIN + TARGET AUDIENCE BACKFILL
+# =============================================================================
+
+class DomainBackfillRequest(BaseModel):
+    dry_run: bool = True
+    job_id: Optional[str] = None  # Only backfill controls from this job
+    limit: int = 0  # 0 = all
+
+_domain_backfill_status: dict = {}
+
+
+async def _run_domain_backfill(req: DomainBackfillRequest, backfill_id: str):
+    """Backfill domain, category, and target_audience for existing controls using Anthropic."""
+    import os
+    import httpx
+
+    ANTHROPIC_API_KEY = os.getenv("ANTHROPIC_API_KEY", "")
+    ANTHROPIC_MODEL = os.getenv("CONTROL_GEN_ANTHROPIC_MODEL", "claude-sonnet-4-6")
+
+    if not ANTHROPIC_API_KEY:
+        _domain_backfill_status[backfill_id] = {
+            "status": "failed", "error": "ANTHROPIC_API_KEY not set"
+        }
+        return
+
+    db = SessionLocal()
+    try:
+        # Find controls needing backfill
+        where_clauses = ["(target_audience IS NULL OR target_audience = '[]' OR target_audience = 'null')"]
+        params: dict = {}
+        if req.job_id:
+            where_clauses.append("generation_metadata->>'job_id' = :job_id")
+            params["job_id"] = req.job_id
+
+        query = f"""
+            SELECT id, control_id, title, objective, category, source_original_text, tags
+            FROM canonical_controls
+            WHERE {' AND '.join(where_clauses)}
+            ORDER BY control_id
+        """
+        if req.limit > 0:
+            query += f" LIMIT {req.limit}"
+
+        result = db.execute(text(query), params)
+        controls = [dict(zip(result.keys(), row)) for row in result]
+
+        total = len(controls)
+        updated = 0
+        errors = []
+
+        _domain_backfill_status[backfill_id] = {
+            "status": "running", "total": total, "updated": 0, "errors": []
+        }
+
+        # Process in batches of 10
+        BATCH_SIZE = 10
+        for batch_start in range(0, total, BATCH_SIZE):
+            batch = controls[batch_start:batch_start + BATCH_SIZE]
+
+            entries = []
+            for idx, ctrl in enumerate(batch):
+                text_for_analysis = ctrl.get("objective") or ctrl.get("title") or ""
+                original = ctrl.get("source_original_text") or ""
+                if original:
+                    text_for_analysis += f"\n\nQuelltext-Auszug: {original[:500]}"
+                entries.append(
+                    f"--- CONTROL {idx + 1}: {ctrl['control_id']} ---\n"
+                    f"Titel: {ctrl.get('title', '')}\n"
+                    f"Objective: {text_for_analysis[:800]}\n"
+                    f"Tags: {json.dumps(ctrl.get('tags', []))}"
+                )
+
+            prompt = f"""Analysiere die folgenden {len(batch)} Controls und bestimme fuer jedes:
+1. domain: Das Fachgebiet (AUTH, CRYP, NET, DATA, LOG, ACC, SEC, INC, AI, COMP, GOV, LAB, FIN, TRD, ENV, HLT)
+2. category: Die Kategorie (encryption, authentication, network, data_protection, logging, incident, continuity, compliance, supply_chain, physical, personnel, application, system, risk, governance, hardware, identity, public_administration, labor_law, finance, trade_regulation, environmental, health)
+3. target_audience: Liste der Zielgruppen (moegliche Werte: "unternehmen", "behoerden", "entwickler", "datenschutzbeauftragte", "geschaeftsfuehrung", "it-abteilung", "rechtsabteilung", "compliance-officer", "personalwesen", "einkauf", "produktion", "vertrieb", "gesundheitswesen", "finanzwesen", "oeffentlicher_dienst")
+
+Antworte mit einem JSON-Array mit {len(batch)} Objekten. Jedes Objekt hat:
+- control_index: 1-basierter Index
+- domain: Fachgebiet-Kuerzel
+- category: Kategorie
+- target_audience: Liste der Zielgruppen
+
+{"".join(entries)}"""
+
+            try:
+                headers = {
+                    "x-api-key": ANTHROPIC_API_KEY,
+                    "anthropic-version": "2023-06-01",
+                    "content-type": "application/json",
+                }
+                payload = {
+                    "model": ANTHROPIC_MODEL,
+                    "max_tokens": 4096,
+                    "system": "Du bist ein Compliance-Experte. Klassifiziere Controls nach Fachgebiet und Zielgruppe. Antworte NUR mit validem JSON.",
+                    "messages": [{"role": "user", "content": prompt}],
+                }
+
+                async with httpx.AsyncClient(timeout=60.0) as client:
+                    resp = await client.post(
+                        "https://api.anthropic.com/v1/messages",
+                        headers=headers,
+                        json=payload,
+                    )
+                    if resp.status_code != 200:
+                        errors.append(f"Anthropic API {resp.status_code} at batch {batch_start}")
+                        continue
+
+                    raw = resp.json().get("content", [{}])[0].get("text", "")
+
+                # Parse response
+                import re
+                bracket_match = re.search(r"\[.*\]", raw, re.DOTALL)
+                if not bracket_match:
+                    errors.append(f"No JSON array in response at batch {batch_start}")
+                    continue
+
+                results_list = json.loads(bracket_match.group(0))
+
+                for item in results_list:
+                    idx = item.get("control_index", 0) - 1
+                    if idx < 0 or idx >= len(batch):
+                        continue
+                    ctrl = batch[idx]
+                    ctrl_id = str(ctrl["id"])
+
+                    new_domain = item.get("domain", "")
+                    new_category = item.get("category", "")
+                    new_audience = item.get("target_audience", [])
+
+                    if not isinstance(new_audience, list):
+                        new_audience = []
+
+                    # Build new control_id from domain if domain changed
+                    old_prefix = ctrl["control_id"].split("-")[0] if ctrl["control_id"] else ""
+                    new_prefix = new_domain.upper()[:4] if new_domain else old_prefix
+
+                    if not req.dry_run:
+                        update_parts = []
+                        update_params: dict = {"ctrl_id": ctrl_id}
+
+                        if new_category:
+                            update_parts.append("category = :category")
+                            update_params["category"] = new_category
+
+                        if new_audience:
+                            update_parts.append("target_audience = :target_audience")
+                            update_params["target_audience"] = json.dumps(new_audience)
+
+                        # Note: We do NOT rename control_ids here — that would
+                        # break references and cause unique constraint violations.
+
+                        if update_parts:
+                            update_parts.append("updated_at = NOW()")
+                            db.execute(
+                                text(f"UPDATE canonical_controls SET {', '.join(update_parts)} WHERE id = CAST(:ctrl_id AS uuid)"),
+                                update_params,
+                            )
+                            updated += 1
+
+                if not req.dry_run:
+                    db.commit()
+
+            except Exception as e:
+                errors.append(f"Batch {batch_start}: {str(e)}")
+                db.rollback()
+
+            _domain_backfill_status[backfill_id] = {
+                "status": "running", "total": total, "updated": updated,
+                "progress": f"{min(batch_start + BATCH_SIZE, total)}/{total}",
+                "errors": errors[-10:],
+            }
+
+        _domain_backfill_status[backfill_id] = {
+            "status": "completed", "total": total, "updated": updated,
+            "errors": errors[-50:],
+        }
+        logger.info("Domain backfill %s completed: %d/%d updated", backfill_id, updated, total)
+
+    except Exception as e:
+        logger.error("Domain backfill %s failed: %s", backfill_id, e)
+        _domain_backfill_status[backfill_id] = {"status": "failed", "error": str(e)}
+    finally:
+        db.close()
+
+
+@router.post("/generate/backfill-domain")
+async def start_domain_backfill(req: DomainBackfillRequest):
+    """Backfill domain, category, and target_audience for controls using Anthropic API.
+
+    Finds controls where target_audience is NULL and enriches them.
+    Default is dry_run=True (preview only).
+    """
+    import uuid
+    backfill_id = str(uuid.uuid4())[:8]
+    _domain_backfill_status[backfill_id] = {"status": "starting"}
+    asyncio.create_task(_run_domain_backfill(req, backfill_id))
+    return {"status": "running", "backfill_id": backfill_id,
+            "message": f"Domain backfill started. Poll /generate/backfill-status/{backfill_id}"}
+
+
+@router.get("/generate/domain-backfill-status/{backfill_id}")
+async def get_domain_backfill_status(backfill_id: str):
+    """Get status of a domain backfill job."""
+    status = _domain_backfill_status.get(backfill_id)
+    if not status:
+        raise HTTPException(status_code=404, detail="Domain backfill job not found")
+    return status
+
+
+# ---------------------------------------------------------------------------
+# Source-Type Backfill — Classify law vs guideline vs standard vs restricted
+# ---------------------------------------------------------------------------
+
+class SourceTypeBackfillRequest(BaseModel):
+    dry_run: bool = True
+
+
+_source_type_backfill_status: dict = {}
+
+
+async def _run_source_type_backfill(dry_run: bool, backfill_id: str):
+    """Backfill source_type into source_citation JSONB for all controls."""
+    db = SessionLocal()
+    try:
+        # Find controls with source_citation that lack source_type
+        rows = db.execute(text("""
+            SELECT control_id, source_citation, generation_metadata
+            FROM compliance.canonical_controls
+            WHERE source_citation IS NOT NULL
+              AND (source_citation->>'source_type' IS NULL
+                   OR source_citation->>'source_type' = '')
+        """)).fetchall()
+
+        total = len(rows)
+        updated = 0
+        already_correct = 0
+        errors = []
+
+        _source_type_backfill_status[backfill_id] = {
+            "status": "running", "total": total, "updated": 0, "dry_run": dry_run,
+        }
+
+        for row in rows:
+            cid = row[0]
+            citation = row[1] if isinstance(row[1], dict) else json.loads(row[1] or "{}")
+            metadata = row[2] if isinstance(row[2], dict) else json.loads(row[2] or "{}")
+
+            # Get regulation_code from metadata
+            reg_code = metadata.get("source_regulation", "")
+            if not reg_code:
+                # Try to infer from source name
+                errors.append(f"{cid}: no source_regulation in metadata")
+                continue
+
+            # Classify
+            license_info = _classify_regulation(reg_code)
+            source_type = license_info.get("source_type", "restricted")
+
+            # Update citation
+            citation["source_type"] = source_type
+
+            if not dry_run:
+                db.execute(text("""
+                    UPDATE compliance.canonical_controls
+                    SET source_citation = :citation
+                    WHERE control_id = :cid
+                """), {"citation": json.dumps(citation), "cid": cid})
+                if updated % 100 == 0:
+                    db.commit()
+            updated += 1
+
+        if not dry_run:
+            db.commit()
+
+        # Count distribution
+        dist_query = db.execute(text("""
+            SELECT source_citation->>'source_type' as st, COUNT(*)
+            FROM compliance.canonical_controls
+            WHERE source_citation IS NOT NULL
+              AND source_citation->>'source_type' IS NOT NULL
+            GROUP BY st
+        """)).fetchall() if not dry_run else []
+
+        distribution = {r[0]: r[1] for r in dist_query}
+
+        _source_type_backfill_status[backfill_id] = {
+            "status": "completed", "total": total, "updated": updated,
+            "dry_run": dry_run, "distribution": distribution,
+            "errors": errors[:50],
+        }
+        logger.info("Source-type backfill %s completed: %d/%d updated (dry_run=%s)",
+                     backfill_id, updated, total, dry_run)
+
+    except Exception as e:
+        logger.error("Source-type backfill %s failed: %s", backfill_id, e)
+        _source_type_backfill_status[backfill_id] = {"status": "failed", "error": str(e)}
+    finally:
+        db.close()
+
+
+@router.post("/generate/backfill-source-type")
+async def start_source_type_backfill(req: SourceTypeBackfillRequest):
+    """Backfill source_type (law/guideline/standard/restricted) into source_citation JSONB.
+
+    Classifies each control's source as binding law, authority guideline,
+    voluntary standard, or restricted norm based on regulation_code.
+    Default is dry_run=True (preview only).
+    """
+    import uuid
+    backfill_id = str(uuid.uuid4())[:8]
+    _source_type_backfill_status[backfill_id] = {"status": "starting"}
+    asyncio.create_task(_run_source_type_backfill(req.dry_run, backfill_id))
+    return {
+        "status": "running",
+        "backfill_id": backfill_id,
+        "message": f"Source-type backfill started. Poll /generate/source-type-backfill-status/{backfill_id}",
+    }
+
+
+@router.get("/generate/source-type-backfill-status/{backfill_id}")
+async def get_source_type_backfill_status(backfill_id: str):
+    """Get status of a source-type backfill job."""
+    status = _source_type_backfill_status.get(backfill_id)
+    if not status:
+        raise HTTPException(status_code=404, detail="Source-type backfill job not found")
+    return status
@@ -0,0 +1,856 @@
+"""
+FastAPI routes for the Multi-Layer Control Architecture.
+
+Pattern Library, Obligation Extraction, Crosswalk Matrix, and Migration endpoints.
+
+Endpoints:
+  GET  /v1/canonical/patterns                          — All patterns (with filters)
+  GET  /v1/canonical/patterns/{pattern_id}             — Single pattern
+  GET  /v1/canonical/patterns/{pattern_id}/controls    — Controls for a pattern
+
+  POST /v1/canonical/obligations/extract               — Extract obligations from text
+  GET  /v1/canonical/crosswalk                         — Query crosswalk matrix
+  GET  /v1/canonical/crosswalk/stats                   — Coverage statistics
+
+  POST /v1/canonical/migrate/decompose                 — Pass 0a: Obligation extraction
+  POST /v1/canonical/migrate/merge-obligations         — Merge implementation-level dupes
+  POST /v1/canonical/migrate/enrich-obligations        — Add trigger_type, impl metadata
+  POST /v1/canonical/migrate/compose-atomic            — Pass 0b: Atomic control composition
+  POST /v1/canonical/migrate/link-obligations          — Pass 1: Obligation linkage
+  POST /v1/canonical/migrate/classify-patterns         — Pass 2: Pattern classification
+  POST /v1/canonical/migrate/triage                    — Pass 3: Quality triage
+  POST /v1/canonical/migrate/backfill-crosswalk        — Pass 4: Crosswalk backfill
+  POST /v1/canonical/migrate/deduplicate               — Pass 5: Deduplication
+  GET  /v1/canonical/migrate/status                    — Migration progress
+  GET  /v1/canonical/migrate/decomposition-status      — Decomposition progress
+"""
+
+import json
+import logging
+from typing import Optional, List
+
+from fastapi import APIRouter, HTTPException, Query
+from pydantic import BaseModel
+from sqlalchemy import text
+
+from database import SessionLocal
+
+logger = logging.getLogger(__name__)
+router = APIRouter(prefix="/v1/canonical", tags=["crosswalk"])
+
+
+# =============================================================================
+# REQUEST / RESPONSE MODELS
+# =============================================================================
+
+
+class PatternResponse(BaseModel):
+    id: str
+    name: str
+    name_de: str
+    domain: str
+    category: str
+    description: str
+    objective_template: str
+    severity_default: str
+    implementation_effort_default: str = "m"
+    tags: list = []
+    composable_with: list = []
+    open_anchor_refs: list = []
+    controls_count: int = 0
+
+
+class PatternListResponse(BaseModel):
+    patterns: List[PatternResponse]
+    total: int
+
+
+class PatternDetailResponse(PatternResponse):
+    rationale_template: str = ""
+    requirements_template: list = []
+    test_procedure_template: list = []
+    evidence_template: list = []
+    obligation_match_keywords: list = []
+
+
+class ObligationExtractRequest(BaseModel):
+    text: str
+    regulation_code: Optional[str] = None
+    article: Optional[str] = None
+    paragraph: Optional[str] = None
+
+
+class ObligationExtractResponse(BaseModel):
+    obligation_id: Optional[str] = None
+    obligation_title: Optional[str] = None
+    obligation_text: Optional[str] = None
+    method: str = "none"
+    confidence: float = 0.0
+    regulation_id: Optional[str] = None
+    pattern_id: Optional[str] = None
+    pattern_confidence: float = 0.0
+
+
+class CrosswalkRow(BaseModel):
+    regulation_code: str = ""
+    article: Optional[str] = None
+    obligation_id: Optional[str] = None
+    pattern_id: Optional[str] = None
+    master_control_id: Optional[str] = None
+    confidence: float = 0.0
+    source: str = "auto"
+
+
+class CrosswalkQueryResponse(BaseModel):
+    rows: List[CrosswalkRow]
+    total: int
+
+
+class CrosswalkStatsResponse(BaseModel):
+    total_rows: int = 0
+    regulations_covered: int = 0
+    obligations_linked: int = 0
+    patterns_used: int = 0
+    controls_linked: int = 0
+    coverage_by_regulation: dict = {}
+
+
+class MigrationRequest(BaseModel):
+    limit: int = 0  # 0 = no limit
+    batch_size: int = 0  # 0 = auto (5 for Anthropic, 1 for Ollama)
+    use_anthropic: bool = False  # Use Anthropic API instead of Ollama
+    category_filter: Optional[str] = None  # Comma-separated categories
+    source_filter: Optional[str] = None  # Comma-separated source regulations (ILIKE match)
+
+
+class BatchSubmitRequest(BaseModel):
+    limit: int = 0
+    batch_size: int = 5
+    category_filter: Optional[str] = None
+    source_filter: Optional[str] = None
+
+
+class BatchProcessRequest(BaseModel):
+    batch_id: str
+    pass_type: str = "0a"  # "0a" or "0b"
+
+
+class MigrationResponse(BaseModel):
+    status: str = "completed"
+    stats: dict = {}
+
+
+class MigrationStatusResponse(BaseModel):
+    total_controls: int = 0
+    has_obligation: int = 0
+    has_pattern: int = 0
+    fully_linked: int = 0
+    deprecated: int = 0
+    coverage_obligation_pct: float = 0.0
+    coverage_pattern_pct: float = 0.0
+    coverage_full_pct: float = 0.0
+
+
+class DecompositionStatusResponse(BaseModel):
+    rich_controls: int = 0
+    decomposed_controls: int = 0
+    total_candidates: int = 0
+    validated: int = 0
+    rejected: int = 0
+    composed: int = 0
+    atomic_controls: int = 0
+    merged: int = 0
+    enriched: int = 0
+    ready_for_pass0b: int = 0
+    decomposition_pct: float = 0.0
+    composition_pct: float = 0.0
+
+
+# =============================================================================
+# PATTERN LIBRARY ENDPOINTS
+# =============================================================================
+
+
+@router.get("/patterns", response_model=PatternListResponse)
+async def list_patterns(
+    domain: Optional[str] = Query(None, description="Filter by domain (e.g. AUTH, CRYP)"),
+    category: Optional[str] = Query(None, description="Filter by category"),
+    tag: Optional[str] = Query(None, description="Filter by tag"),
+):
+    """List all control patterns with optional filters."""
+    from compliance.services.pattern_matcher import PatternMatcher
+
+    matcher = PatternMatcher()
+    matcher._load_patterns()
+    matcher._build_keyword_index()
+
+    patterns = matcher._patterns
+
+    if domain:
+        patterns = [p for p in patterns if p.domain == domain.upper()]
+    if category:
+        patterns = [p for p in patterns if p.category == category.lower()]
+    if tag:
+        patterns = [p for p in patterns if tag.lower() in [t.lower() for t in p.tags]]
+
+    # Count controls per pattern from DB
+    control_counts = _get_pattern_control_counts()
+
+    response_patterns = []
+    for p in patterns:
+        response_patterns.append(PatternResponse(
+            id=p.id,
+            name=p.name,
+            name_de=p.name_de,
+            domain=p.domain,
+            category=p.category,
+            description=p.description,
+            objective_template=p.objective_template,
+            severity_default=p.severity_default,
+            implementation_effort_default=p.implementation_effort_default,
+            tags=p.tags,
+            composable_with=p.composable_with,
+            open_anchor_refs=p.open_anchor_refs,
+            controls_count=control_counts.get(p.id, 0),
+        ))
+
+    return PatternListResponse(patterns=response_patterns, total=len(response_patterns))
+
+
+@router.get("/patterns/{pattern_id}", response_model=PatternDetailResponse)
+async def get_pattern(pattern_id: str):
+    """Get a single control pattern by ID."""
+    from compliance.services.pattern_matcher import PatternMatcher
+
+    matcher = PatternMatcher()
+    matcher._load_patterns()
+
+    pattern = matcher.get_pattern(pattern_id)
+    if not pattern:
+        raise HTTPException(status_code=404, detail=f"Pattern {pattern_id} not found")
+
+    control_counts = _get_pattern_control_counts()
+
+    return PatternDetailResponse(
+        id=pattern.id,
+        name=pattern.name,
+        name_de=pattern.name_de,
+        domain=pattern.domain,
+        category=pattern.category,
+        description=pattern.description,
+        objective_template=pattern.objective_template,
+        rationale_template=pattern.rationale_template,
+        requirements_template=pattern.requirements_template,
+        test_procedure_template=pattern.test_procedure_template,
+        evidence_template=pattern.evidence_template,
+        severity_default=pattern.severity_default,
+        implementation_effort_default=pattern.implementation_effort_default,
+        tags=pattern.tags,
+        composable_with=pattern.composable_with,
+        open_anchor_refs=pattern.open_anchor_refs,
+        obligation_match_keywords=pattern.obligation_match_keywords,
+        controls_count=control_counts.get(pattern.id, 0),
+    )
+
+
+@router.get("/patterns/{pattern_id}/controls")
+async def get_pattern_controls(
+    pattern_id: str,
+    limit: int = Query(50, ge=1, le=500),
+    offset: int = Query(0, ge=0),
+):
+    """Get controls generated from a specific pattern."""
+    db = SessionLocal()
+    try:
+        result = db.execute(
+            text("""
+                SELECT id, control_id, title, objective, severity,
+                       release_state, category, obligation_ids
+                FROM canonical_controls
+                WHERE pattern_id = :pattern_id
+                  AND release_state NOT IN ('deprecated')
+                ORDER BY control_id
+                LIMIT :limit OFFSET :offset
+            """),
+            {"pattern_id": pattern_id.upper(), "limit": limit, "offset": offset},
+        )
+        rows = result.fetchall()
+
+        count_result = db.execute(
+            text("""
+                SELECT count(*) FROM canonical_controls
+                WHERE pattern_id = :pattern_id
+                  AND release_state NOT IN ('deprecated')
+            """),
+            {"pattern_id": pattern_id.upper()},
+        )
+        total = count_result.fetchone()[0]
+
+        controls = []
+        for row in rows:
+            obl_ids = row[7]
+            if isinstance(obl_ids, str):
+                try:
+                    obl_ids = json.loads(obl_ids)
+                except (json.JSONDecodeError, TypeError):
+                    obl_ids = []
+            controls.append({
+                "id": str(row[0]),
+                "control_id": row[1],
+                "title": row[2],
+                "objective": row[3],
+                "severity": row[4],
+                "release_state": row[5],
+                "category": row[6],
+                "obligation_ids": obl_ids or [],
+            })
+
+        return {"controls": controls, "total": total}
+    finally:
+        db.close()
+
+
+# =============================================================================
+# OBLIGATION EXTRACTION ENDPOINT
+# =============================================================================
+
+
+@router.post("/obligations/extract", response_model=ObligationExtractResponse)
+async def extract_obligation(req: ObligationExtractRequest):
+    """Extract obligation from text using 3-tier strategy, then match to pattern."""
+    from compliance.services.obligation_extractor import ObligationExtractor
+    from compliance.services.pattern_matcher import PatternMatcher
+
+    extractor = ObligationExtractor()
+    await extractor.initialize()
+
+    obligation = await extractor.extract(
+        chunk_text=req.text,
+        regulation_code=req.regulation_code or "",
+        article=req.article,
+        paragraph=req.paragraph,
+    )
+
+    # Also match to pattern
+    matcher = PatternMatcher()
+    matcher._load_patterns()
+    matcher._build_keyword_index()
+
+    pattern_text = obligation.obligation_text or obligation.obligation_title or req.text[:500]
+    pattern_result = matcher._tier1_keyword(pattern_text, obligation.regulation_id)
+
+    return ObligationExtractResponse(
+        obligation_id=obligation.obligation_id,
+        obligation_title=obligation.obligation_title,
+        obligation_text=obligation.obligation_text,
+        method=obligation.method,
+        confidence=obligation.confidence,
+        regulation_id=obligation.regulation_id,
+        pattern_id=pattern_result.pattern_id if pattern_result else None,
+        pattern_confidence=pattern_result.confidence if pattern_result else 0,
+    )
+
+
+# =============================================================================
+# CROSSWALK MATRIX ENDPOINTS
+# =============================================================================
+
+
+@router.get("/crosswalk", response_model=CrosswalkQueryResponse)
+async def query_crosswalk(
+    regulation_code: Optional[str] = Query(None),
+    article: Optional[str] = Query(None),
+    obligation_id: Optional[str] = Query(None),
+    pattern_id: Optional[str] = Query(None),
+    limit: int = Query(100, ge=1, le=1000),
+    offset: int = Query(0, ge=0),
+):
+    """Query the crosswalk matrix with filters."""
+    db = SessionLocal()
+    try:
+        conditions = ["1=1"]
+        params = {"limit": limit, "offset": offset}
+
+        if regulation_code:
+            conditions.append("regulation_code = :reg")
+            params["reg"] = regulation_code
+        if article:
+            conditions.append("article = :art")
+            params["art"] = article
+        if obligation_id:
+            conditions.append("obligation_id = :obl")
+            params["obl"] = obligation_id
+        if pattern_id:
+            conditions.append("pattern_id = :pat")
+            params["pat"] = pattern_id
+
+        where = " AND ".join(conditions)
+
+        result = db.execute(
+            text(f"""
+                SELECT regulation_code, article, obligation_id,
+                       pattern_id, master_control_id, confidence, source
+                FROM crosswalk_matrix
+                WHERE {where}
+                ORDER BY regulation_code, article
+                LIMIT :limit OFFSET :offset
+            """),
+            params,
+        )
+        rows = result.fetchall()
+
+        count_result = db.execute(
+            text(f"SELECT count(*) FROM crosswalk_matrix WHERE {where}"),
+            params,
+        )
+        total = count_result.fetchone()[0]
+
+        crosswalk_rows = [
+            CrosswalkRow(
+                regulation_code=r[0] or "",
+                article=r[1],
+                obligation_id=r[2],
+                pattern_id=r[3],
+                master_control_id=r[4],
+                confidence=float(r[5] or 0),
+                source=r[6] or "auto",
+            )
+            for r in rows
+        ]
+
+        return CrosswalkQueryResponse(rows=crosswalk_rows, total=total)
+    finally:
+        db.close()
+
+
+@router.get("/crosswalk/stats", response_model=CrosswalkStatsResponse)
+async def crosswalk_stats():
+    """Get crosswalk coverage statistics."""
+    db = SessionLocal()
+    try:
+        row = db.execute(text("""
+            SELECT
+                count(*) AS total,
+                count(DISTINCT regulation_code) FILTER (WHERE regulation_code != '') AS regs,
+                count(DISTINCT obligation_id) FILTER (WHERE obligation_id IS NOT NULL) AS obls,
+                count(DISTINCT pattern_id) FILTER (WHERE pattern_id IS NOT NULL) AS pats,
+                count(DISTINCT master_control_id) FILTER (WHERE master_control_id IS NOT NULL) AS ctrls
+            FROM crosswalk_matrix
+        """)).fetchone()
+
+        # Coverage by regulation
+        reg_rows = db.execute(text("""
+            SELECT regulation_code, count(*) AS cnt
+            FROM crosswalk_matrix
+            WHERE regulation_code != ''
+            GROUP BY regulation_code
+            ORDER BY cnt DESC
+        """)).fetchall()
+
+        coverage = {r[0]: r[1] for r in reg_rows}
+
+        return CrosswalkStatsResponse(
+            total_rows=row[0],
+            regulations_covered=row[1],
+            obligations_linked=row[2],
+            patterns_used=row[3],
+            controls_linked=row[4],
+            coverage_by_regulation=coverage,
+        )
+    finally:
+        db.close()
+
+
+# =============================================================================
+# MIGRATION ENDPOINTS
+# =============================================================================
+
+
+@router.post("/migrate/decompose", response_model=MigrationResponse)
+async def migrate_decompose(req: MigrationRequest):
+    """Pass 0a: Extract obligation candidates from rich controls.
+
+    With use_anthropic=true, uses Anthropic API with prompt caching
+    and content batching (multiple controls per API call).
+    """
+    from compliance.services.decomposition_pass import DecompositionPass
+
+    db = SessionLocal()
+    try:
+        decomp = DecompositionPass(db=db)
+        stats = await decomp.run_pass0a(
+            limit=req.limit,
+            batch_size=req.batch_size,
+            use_anthropic=req.use_anthropic,
+            category_filter=req.category_filter,
+            source_filter=req.source_filter,
+        )
+        return MigrationResponse(status="completed", stats=stats)
+    except Exception as e:
+        logger.error("Decomposition pass 0a failed: %s", e)
+        raise HTTPException(status_code=500, detail=str(e))
+    finally:
+        db.close()
+
+
+@router.post("/migrate/merge-obligations", response_model=MigrationResponse)
+async def migrate_merge_obligations():
+    """Merge implementation-level duplicate obligations within each parent.
+
+    Run AFTER Pass 0a, BEFORE Pass 0b. No LLM calls — rule-based.
+    Merges obligations that share similar action+object into the more
+    abstract survivor, marking the concrete duplicate as 'merged'.
+    """
+    from compliance.services.decomposition_pass import DecompositionPass
+
+    db = SessionLocal()
+    try:
+        decomp = DecompositionPass(db=db)
+        stats = decomp.run_merge_pass()
+        return MigrationResponse(status="completed", stats=stats)
+    except Exception as e:
+        logger.error("Merge pass failed: %s", e)
+        raise HTTPException(status_code=500, detail=str(e))
+    finally:
+        db.close()
+
+
+@router.post("/migrate/enrich-obligations", response_model=MigrationResponse)
+async def migrate_enrich_obligations():
+    """Add trigger_type and is_implementation_specific metadata.
+
+    Run AFTER merge pass, BEFORE Pass 0b. No LLM calls — rule-based.
+    Classifies trigger_type (event/periodic/continuous) from obligation text
+    and detects implementation-specific obligations (concrete tools/protocols).
+    """
+    from compliance.services.decomposition_pass import DecompositionPass
+
+    db = SessionLocal()
+    try:
+        decomp = DecompositionPass(db=db)
+        stats = decomp.enrich_obligations()
+        return MigrationResponse(status="completed", stats=stats)
+    except Exception as e:
+        logger.error("Enrich pass failed: %s", e)
+        raise HTTPException(status_code=500, detail=str(e))
+    finally:
+        db.close()
+
+
+@router.post("/migrate/compose-atomic", response_model=MigrationResponse)
+async def migrate_compose_atomic(req: MigrationRequest):
+    """Pass 0b: Compose atomic controls from obligation candidates.
+
+    With use_anthropic=true, uses Anthropic API with prompt caching
+    and content batching (multiple obligations per API call).
+    """
+    from compliance.services.decomposition_pass import DecompositionPass
+
+    db = SessionLocal()
+    try:
+        decomp = DecompositionPass(db=db)
+        stats = await decomp.run_pass0b(
+            limit=req.limit,
+            batch_size=req.batch_size,
+            use_anthropic=req.use_anthropic,
+        )
+        return MigrationResponse(status="completed", stats=stats)
+    except Exception as e:
+        logger.error("Decomposition pass 0b failed: %s", e)
+        raise HTTPException(status_code=500, detail=str(e))
+    finally:
+        db.close()
+
+
+@router.post("/migrate/batch-submit-0a", response_model=MigrationResponse)
+async def batch_submit_pass0a(req: BatchSubmitRequest):
+    """Submit Pass 0a as Anthropic Batch API job (50% cost reduction).
+
+    Returns a batch_id for polling. Results are processed asynchronously
+    within 24 hours by Anthropic.
+    """
+    from compliance.services.decomposition_pass import DecompositionPass
+
+    db = SessionLocal()
+    try:
+        decomp = DecompositionPass(db=db)
+        result = await decomp.submit_batch_pass0a(
+            limit=req.limit,
+            batch_size=req.batch_size,
+            category_filter=req.category_filter,
+            source_filter=req.source_filter,
+        )
+        return MigrationResponse(status=result.pop("status", "submitted"), stats=result)
+    except Exception as e:
+        logger.error("Batch submit 0a failed: %s", e)
+        raise HTTPException(status_code=500, detail=str(e))
+    finally:
+        db.close()
+
+
+@router.post("/migrate/batch-submit-0b", response_model=MigrationResponse)
+async def batch_submit_pass0b(req: BatchSubmitRequest):
+    """Submit Pass 0b as Anthropic Batch API job (50% cost reduction)."""
+    from compliance.services.decomposition_pass import DecompositionPass
+
+    db = SessionLocal()
+    try:
+        decomp = DecompositionPass(db=db)
+        result = await decomp.submit_batch_pass0b(
+            limit=req.limit,
+            batch_size=req.batch_size,
+        )
+        return MigrationResponse(status=result.pop("status", "submitted"), stats=result)
+    except Exception as e:
+        logger.error("Batch submit 0b failed: %s", e)
+        raise HTTPException(status_code=500, detail=str(e))
+    finally:
+        db.close()
+
+
+@router.get("/migrate/batch-status/{batch_id}")
+async def batch_check_status(batch_id: str):
+    """Check processing status of an Anthropic batch job."""
+    from compliance.services.decomposition_pass import check_batch_status
+
+    try:
+        status = await check_batch_status(batch_id)
+        return status
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=str(e))
+
+
+@router.post("/migrate/batch-process", response_model=MigrationResponse)
+async def batch_process_results(req: BatchProcessRequest):
+    """Fetch and process results from a completed Anthropic batch.
+
+    Call this after batch-status shows processing_status='ended'.
+    """
+    from compliance.services.decomposition_pass import DecompositionPass
+
+    db = SessionLocal()
+    try:
+        decomp = DecompositionPass(db=db)
+        stats = await decomp.process_batch_results(
+            batch_id=req.batch_id,
+            pass_type=req.pass_type,
+        )
+        return MigrationResponse(status=stats.pop("status", "completed"), stats=stats)
+    except Exception as e:
+        logger.error("Batch process failed: %s", e)
+        raise HTTPException(status_code=500, detail=str(e))
+    finally:
+        db.close()
+
+
+@router.post("/migrate/link-obligations", response_model=MigrationResponse)
+async def migrate_link_obligations(req: MigrationRequest):
+    """Pass 1: Link controls to obligations via source_citation article."""
+    from compliance.services.pipeline_adapter import MigrationPasses
+
+    db = SessionLocal()
+    try:
+        migration = MigrationPasses(db=db)
+        await migration.initialize()
+        stats = await migration.run_pass1_obligation_linkage(limit=req.limit)
+        return MigrationResponse(status="completed", stats=stats)
+    except Exception as e:
+        logger.error("Migration pass 1 failed: %s", e)
+        raise HTTPException(status_code=500, detail=str(e))
+    finally:
+        db.close()
+
+
+@router.post("/migrate/classify-patterns", response_model=MigrationResponse)
+async def migrate_classify_patterns(req: MigrationRequest):
+    """Pass 2: Classify controls into patterns via keyword matching."""
+    from compliance.services.pipeline_adapter import MigrationPasses
+
+    db = SessionLocal()
+    try:
+        migration = MigrationPasses(db=db)
+        await migration.initialize()
+        stats = await migration.run_pass2_pattern_classification(limit=req.limit)
+        return MigrationResponse(status="completed", stats=stats)
+    except Exception as e:
+        logger.error("Migration pass 2 failed: %s", e)
+        raise HTTPException(status_code=500, detail=str(e))
+    finally:
+        db.close()
+
+
+@router.post("/migrate/triage", response_model=MigrationResponse)
+async def migrate_triage():
+    """Pass 3: Quality triage — categorize by linkage completeness."""
+    from compliance.services.pipeline_adapter import MigrationPasses
+
+    db = SessionLocal()
+    try:
+        migration = MigrationPasses(db=db)
+        stats = migration.run_pass3_quality_triage()
+        return MigrationResponse(status="completed", stats=stats)
+    except Exception as e:
+        logger.error("Migration pass 3 failed: %s", e)
+        raise HTTPException(status_code=500, detail=str(e))
+    finally:
+        db.close()
+
+
+@router.post("/migrate/backfill-crosswalk", response_model=MigrationResponse)
+async def migrate_backfill_crosswalk():
+    """Pass 4: Create crosswalk rows for linked controls."""
+    from compliance.services.pipeline_adapter import MigrationPasses
+
+    db = SessionLocal()
+    try:
+        migration = MigrationPasses(db=db)
+        stats = migration.run_pass4_crosswalk_backfill()
+        return MigrationResponse(status="completed", stats=stats)
+    except Exception as e:
+        logger.error("Migration pass 4 failed: %s", e)
+        raise HTTPException(status_code=500, detail=str(e))
+    finally:
+        db.close()
+
+
+@router.post("/migrate/deduplicate", response_model=MigrationResponse)
+async def migrate_deduplicate():
+    """Pass 5: Mark duplicate controls (same obligation + pattern)."""
+    from compliance.services.pipeline_adapter import MigrationPasses
+
+    db = SessionLocal()
+    try:
+        migration = MigrationPasses(db=db)
+        stats = migration.run_pass5_deduplication()
+        return MigrationResponse(status="completed", stats=stats)
+    except Exception as e:
+        logger.error("Migration pass 5 failed: %s", e)
+        raise HTTPException(status_code=500, detail=str(e))
+    finally:
+        db.close()
+
+
+@router.get("/migrate/status", response_model=MigrationStatusResponse)
+async def migration_status():
+    """Get overall migration progress."""
+    from compliance.services.pipeline_adapter import MigrationPasses
+
+    db = SessionLocal()
+    try:
+        migration = MigrationPasses(db=db)
+        status = migration.migration_status()
+        return MigrationStatusResponse(**status)
+    except Exception as e:
+        logger.error("Migration status failed: %s", e)
+        raise HTTPException(status_code=500, detail=str(e))
+    finally:
+        db.close()
+
+
+@router.get("/migrate/decomposition-status", response_model=DecompositionStatusResponse)
+async def decomposition_status():
+    """Get decomposition progress (Pass 0a/0b)."""
+    from compliance.services.decomposition_pass import DecompositionPass
+
+    db = SessionLocal()
+    try:
+        decomp = DecompositionPass(db=db)
+        status = decomp.decomposition_status()
+        return DecompositionStatusResponse(**status)
+    except Exception as e:
+        logger.error("Decomposition status failed: %s", e)
+        raise HTTPException(status_code=500, detail=str(e))
+    finally:
+        db.close()
+
+
+# =============================================================================
+# BATCH DEDUP ENDPOINTS
+# =============================================================================
+
+
+# Module-level runner reference for status polling
+_batch_dedup_runner = None
+
+
+@router.post("/migrate/batch-dedup", response_model=MigrationResponse)
+async def migrate_batch_dedup(
+    dry_run: bool = Query(False, description="Preview mode — no DB changes"),
+    hint_filter: Optional[str] = Query(None, description="Only process hints matching this prefix"),
+):
+    """Batch dedup: reduce ~85k Pass 0b controls to ~18-25k masters.
+
+    Phase 1: Groups by merge_group_hint, picks best quality master, links rest.
+    Phase 2: Cross-group embedding search for semantically similar masters.
+    """
+    global _batch_dedup_runner
+    from compliance.services.batch_dedup_runner import BatchDedupRunner
+
+    db = SessionLocal()
+    try:
+        runner = BatchDedupRunner(db=db)
+        _batch_dedup_runner = runner
+        stats = await runner.run(dry_run=dry_run, hint_filter=hint_filter)
+        return MigrationResponse(status="completed", stats=stats)
+    except Exception as e:
+        logger.error("Batch dedup failed: %s", e)
+        raise HTTPException(status_code=500, detail=str(e))
+    finally:
+        _batch_dedup_runner = None
+        db.close()
+
+
+@router.get("/migrate/batch-dedup/status")
+async def batch_dedup_status():
+    """Get current batch dedup progress (while running)."""
+    if _batch_dedup_runner is not None:
+        return {"running": True, **_batch_dedup_runner.get_status()}
+
+    # Not running — show DB stats
+    db = SessionLocal()
+    try:
+        row = db.execute(text("""
+            SELECT
+                count(*) FILTER (WHERE decomposition_method = 'pass0b') AS total_pass0b,
+                count(*) FILTER (WHERE decomposition_method = 'pass0b'
+                                   AND release_state = 'duplicate') AS duplicates,
+                count(*) FILTER (WHERE decomposition_method = 'pass0b'
+                                   AND release_state != 'duplicate'
+                                   AND release_state != 'deprecated') AS masters
+            FROM canonical_controls
+        """)).fetchone()
+        review_count = db.execute(text(
+            "SELECT count(*) FROM control_dedup_reviews WHERE review_status = 'pending'"
+        )).fetchone()[0]
+        return {
+            "running": False,
+            "total_pass0b": row[0],
+            "duplicates": row[1],
+            "masters": row[2],
+            "pending_reviews": review_count,
+        }
+    finally:
+        db.close()
+
+
+# =============================================================================
+# HELPERS
+# =============================================================================
+
+
+def _get_pattern_control_counts() -> dict[str, int]:
+    """Get count of controls per pattern_id from DB."""
+    db = SessionLocal()
+    try:
+        result = db.execute(text("""
+            SELECT pattern_id, count(*) AS cnt
+            FROM canonical_controls
+            WHERE pattern_id IS NOT NULL AND pattern_id != ''
+              AND release_state NOT IN ('deprecated')
+            GROUP BY pattern_id
+        """))
+        return {row[0]: row[1] for row in result.fetchall()}
+    except Exception:
+        return {}
+    finally:
+        db.close()
@@ -5,16 +5,23 @@ Endpoints:
 - /dashboard: Main compliance dashboard
 - /dashboard/executive: Executive summary for managers
 - /dashboard/trend: Compliance score trend over time
+- /dashboard/roadmap: Prioritised controls in 4 buckets
+- /dashboard/module-status: Completion status of each SDK module
+- /dashboard/next-actions: Top 5 most important actions
+- /dashboard/snapshot: Save / query compliance score snapshots
 - /score: Quick compliance score
 - /reports: Report generation
 """

 import logging
-from datetime import datetime, timedelta, timezone
+from datetime import datetime, date, timedelta
 from calendar import month_abbr
-from typing import Optional
+from typing import Optional, Dict, Any, List
+from decimal import Decimal

 from fastapi import APIRouter, Depends, HTTPException, Query
+from pydantic import BaseModel
+from sqlalchemy import text
 from sqlalchemy.orm import Session

 from classroom_engine.database import get_db
@@ -25,15 +32,24 @@ from ..db import (
    ControlRepository,
    EvidenceRepository,
    RiskRepository,
+    AssertionDB,
 )
 from .schemas import (
    DashboardResponse,
+    MultiDimensionalScore,
    ExecutiveDashboardResponse,
    TrendDataPoint,
    RiskSummary,
    DeadlineItem,
    TeamWorkloadItem,
+    TraceabilityAssertion,
+    TraceabilityEvidence,
+    TraceabilityCoverage,
+    TraceabilityControl,
+    TraceabilityMatrixResponse,
 )
+from .tenant_utils import get_tenant_id as _get_tenant_id
+from .db_utils import row_to_dict as _row_to_dict

 logger = logging.getLogger(__name__)
 router = APIRouter(tags=["compliance-dashboard"])
@@ -86,6 +102,14 @@ async def get_dashboard(db: Session = Depends(get_db)):
    # or compute from by_status dict
    score = ctrl_stats.get("compliance_score", 0.0)

+    # Multi-dimensional score (Anti-Fake-Evidence)
+    try:
+        ms = ctrl_repo.get_multi_dimensional_score()
+        multi_score = MultiDimensionalScore(**ms)
+    except Exception as e:
+        logger.warning(f"Failed to compute multi-dimensional score: {e}")
+        multi_score = None
+
    return DashboardResponse(
        compliance_score=round(score, 1),
        total_regulations=len(regulations),
@@ -98,6 +122,7 @@ async def get_dashboard(db: Session = Depends(get_db)):
        total_risks=len(risks),
        risks_by_level=risks_by_level,
        recent_activity=[],
+        multi_score=multi_score,
    )


@@ -116,11 +141,18 @@ async def get_compliance_score(db: Session = Depends(get_db)):
    else:
        score = 0

+    # Multi-dimensional score (Anti-Fake-Evidence)
+    try:
+        multi_score = ctrl_repo.get_multi_dimensional_score()
+    except Exception:
+        multi_score = None
+
    return {
        "score": round(score, 1),
        "total_controls": total,
        "passing_controls": passing,
        "partial_controls": partial,
+        "multi_score": multi_score,
    }


@@ -322,6 +354,424 @@ async def get_compliance_trend(
    }


+# ============================================================================
+# Dashboard Extended — Roadmap, Module-Status, Next-Actions, Snapshots
+# ============================================================================
+
+# Weight map for control prioritisation
+_PRIORITY_WEIGHTS = {"legal": 5, "security": 3, "best_practice": 1, "operational": 2}
+
+# SDK module definitions → DB table used for counting completion
+_MODULE_DEFS: List[Dict[str, str]] = [
+    {"key": "vvt", "label": "VVT", "table": "compliance_vvt_activities"},
+    {"key": "tom", "label": "TOM", "table": "compliance_toms"},
+    {"key": "dsfa", "label": "DSFA", "table": "compliance_dsfa_assessments"},
+    {"key": "loeschfristen", "label": "Loeschfristen", "table": "compliance_loeschfristen"},
+    {"key": "risks", "label": "Risiken", "table": "compliance_risks"},
+    {"key": "controls", "label": "Controls", "table": "compliance_controls"},
+    {"key": "evidence", "label": "Nachweise", "table": "compliance_evidence"},
+    {"key": "obligations", "label": "Pflichten", "table": "compliance_obligations"},
+    {"key": "incidents", "label": "Vorfaelle", "table": "compliance_notfallplan_incidents"},
+    {"key": "vendor", "label": "Auftragsverarbeiter", "table": "compliance_vendor_assessments"},
+    {"key": "legal_templates", "label": "Rechtl. Dokumente", "table": "compliance_legal_templates"},
+    {"key": "training", "label": "Schulungen", "table": "training_modules"},
+    {"key": "audit", "label": "Audit", "table": "compliance_audit_sessions"},
+    {"key": "security_backlog", "label": "Security-Backlog", "table": "compliance_security_backlog"},
+    {"key": "quality", "label": "Qualitaet", "table": "compliance_quality_items"},
+]
+
+
+@router.get("/dashboard/roadmap")
+async def get_dashboard_roadmap(
+    db: Session = Depends(get_db),
+    tenant_id: str = Depends(_get_tenant_id),
+):
+    """Prioritised controls in 4 buckets: Quick Wins, Must Have, Should Have, Nice to Have."""
+    ctrl_repo = ControlRepository(db)
+    controls = ctrl_repo.get_all()
+    today = datetime.utcnow().date()
+
+    buckets: Dict[str, list] = {
+        "quick_wins": [],
+        "must_have": [],
+        "should_have": [],
+        "nice_to_have": [],
+    }
+
+    for ctrl in controls:
+        status = ctrl.status.value if ctrl.status else "planned"
+        if status == "pass":
+            continue  # already done
+
+        weight = _PRIORITY_WEIGHTS.get(ctrl.category if hasattr(ctrl, "category") else "best_practice", 1)
+        days_overdue = 0
+        if ctrl.next_review_at:
+            review_date = ctrl.next_review_at.date() if hasattr(ctrl.next_review_at, "date") else ctrl.next_review_at
+            days_overdue = (today - review_date).days
+
+        urgency = weight * 2 + (1 if days_overdue > 0 else 0)
+
+        item = {
+            "id": str(ctrl.id),
+            "control_id": ctrl.control_id,
+            "title": ctrl.title,
+            "status": status,
+            "domain": ctrl.domain.value if ctrl.domain else "unknown",
+            "owner": ctrl.owner,
+            "next_review_at": ctrl.next_review_at.isoformat() if ctrl.next_review_at else None,
+            "days_overdue": max(0, days_overdue),
+            "weight": weight,
+        }
+
+        if weight >= 5 and days_overdue > 0:
+            buckets["quick_wins"].append(item)
+        elif weight >= 4:
+            buckets["must_have"].append(item)
+        elif weight >= 2:
+            buckets["should_have"].append(item)
+        else:
+            buckets["nice_to_have"].append(item)
+
+    # Sort each bucket by urgency desc
+    for key in buckets:
+        buckets[key].sort(key=lambda x: x["days_overdue"], reverse=True)
+
+    return {
+        "buckets": buckets,
+        "counts": {k: len(v) for k, v in buckets.items()},
+        "generated_at": datetime.utcnow().isoformat(),
+    }
+
+
+@router.get("/dashboard/module-status")
+async def get_module_status(
+    db: Session = Depends(get_db),
+    tenant_id: str = Depends(_get_tenant_id),
+):
+    """Completion status for each SDK module based on DB record counts."""
+    modules = []
+    for mod in _MODULE_DEFS:
+        try:
+            row = db.execute(
+                text(f"SELECT COUNT(*) FROM {mod['table']} WHERE tenant_id = :tid"),
+                {"tid": tenant_id},
+            ).fetchone()
+            count = int(row[0]) if row else 0
+        except Exception:
+            count = 0
+
+        # Simple heuristic: 0 = not started, 1-2 = in progress, 3+ = complete
+        if count == 0:
+            status = "not_started"
+            progress = 0
+        elif count < 3:
+            status = "in_progress"
+            progress = min(60, count * 30)
+        else:
+            status = "complete"
+            progress = 100
+
+        modules.append({
+            "key": mod["key"],
+            "label": mod["label"],
+            "count": count,
+            "status": status,
+            "progress": progress,
+        })
+
+    started = sum(1 for m in modules if m["status"] != "not_started")
+    complete = sum(1 for m in modules if m["status"] == "complete")
+
+    return {
+        "modules": modules,
+        "total": len(modules),
+        "started": started,
+        "complete": complete,
+        "overall_progress": round((complete / len(modules)) * 100, 1) if modules else 0,
+    }
+
+
+@router.get("/dashboard/next-actions")
+async def get_next_actions(
+    limit: int = Query(5, ge=1, le=20),
+    db: Session = Depends(get_db),
+    tenant_id: str = Depends(_get_tenant_id),
+):
+    """Top N most important actions sorted by urgency*impact."""
+    ctrl_repo = ControlRepository(db)
+    controls = ctrl_repo.get_all()
+    today = datetime.utcnow().date()
+
+    actions = []
+    for ctrl in controls:
+        status = ctrl.status.value if ctrl.status else "planned"
+        if status == "pass":
+            continue
+
+        days_overdue = 0
+        if ctrl.next_review_at:
+            review_date = ctrl.next_review_at.date() if hasattr(ctrl.next_review_at, "date") else ctrl.next_review_at
+            days_overdue = max(0, (today - review_date).days)
+
+        weight = _PRIORITY_WEIGHTS.get(ctrl.category if hasattr(ctrl, "category") else "best_practice", 1)
+        urgency_score = weight * 10 + days_overdue
+
+        actions.append({
+            "id": str(ctrl.id),
+            "control_id": ctrl.control_id,
+            "title": ctrl.title,
+            "status": status,
+            "domain": ctrl.domain.value if ctrl.domain else "unknown",
+            "owner": ctrl.owner,
+            "days_overdue": days_overdue,
+            "urgency_score": urgency_score,
+            "reason": "Ueberfaellig" if days_overdue > 0 else "Offen",
+        })
+
+    actions.sort(key=lambda x: x["urgency_score"], reverse=True)
+    return {"actions": actions[:limit]}
+
+
+@router.post("/dashboard/snapshot")
+async def create_score_snapshot(
+    db: Session = Depends(get_db),
+    tenant_id: str = Depends(_get_tenant_id),
+):
+    """Save current compliance score as a historical snapshot."""
+    ctrl_repo = ControlRepository(db)
+    evidence_repo = EvidenceRepository(db)
+    risk_repo = RiskRepository(db)
+
+    ctrl_stats = ctrl_repo.get_statistics()
+    evidence_stats = evidence_repo.get_statistics()
+    risks = risk_repo.get_all()
+
+    total = ctrl_stats.get("total", 0)
+    passing = ctrl_stats.get("pass", 0)
+    partial = ctrl_stats.get("partial", 0)
+    score = round(((passing + partial * 0.5) / total) * 100, 2) if total > 0 else 0
+
+    risks_high = sum(1 for r in risks if (r.inherent_risk.value if r.inherent_risk else "low") in ("high", "critical"))
+
+    today = date.today()
+
+    row = db.execute(text("""
+        INSERT INTO compliance_score_snapshots (
+            tenant_id, score, controls_total, controls_pass, controls_partial,
+            evidence_total, evidence_valid, risks_total, risks_high, snapshot_date
+        ) VALUES (
+            :tenant_id, :score, :controls_total, :controls_pass, :controls_partial,
+            :evidence_total, :evidence_valid, :risks_total, :risks_high, :snapshot_date
+        )
+        ON CONFLICT (tenant_id, project_id, snapshot_date) DO UPDATE SET
+            score = EXCLUDED.score,
+            controls_total = EXCLUDED.controls_total,
+            controls_pass = EXCLUDED.controls_pass,
+            controls_partial = EXCLUDED.controls_partial,
+            evidence_total = EXCLUDED.evidence_total,
+            evidence_valid = EXCLUDED.evidence_valid,
+            risks_total = EXCLUDED.risks_total,
+            risks_high = EXCLUDED.risks_high
+        RETURNING *
+    """), {
+        "tenant_id": tenant_id,
+        "score": score,
+        "controls_total": total,
+        "controls_pass": passing,
+        "controls_partial": partial,
+        "evidence_total": evidence_stats.get("total", 0),
+        "evidence_valid": evidence_stats.get("by_status", {}).get("valid", 0),
+        "risks_total": len(risks),
+        "risks_high": risks_high,
+        "snapshot_date": today,
+    }).fetchone()
+    db.commit()
+
+    return _row_to_dict(row)
+
+
+@router.get("/dashboard/score-history")
+async def get_score_history(
+    months: int = Query(12, ge=1, le=36),
+    db: Session = Depends(get_db),
+    tenant_id: str = Depends(_get_tenant_id),
+):
+    """Get compliance score history from snapshots."""
+    since = date.today() - timedelta(days=months * 30)
+
+    rows = db.execute(text("""
+        SELECT * FROM compliance_score_snapshots
+        WHERE tenant_id = :tenant_id AND snapshot_date >= :since
+        ORDER BY snapshot_date ASC
+    """), {"tenant_id": tenant_id, "since": since}).fetchall()
+
+    snapshots = []
+    for r in rows:
+        d = _row_to_dict(r)
+        # Convert Decimal to float for JSON
+        if isinstance(d.get("score"), Decimal):
+            d["score"] = float(d["score"])
+        snapshots.append(d)
+
+    return {
+        "snapshots": snapshots,
+        "total": len(snapshots),
+        "period_months": months,
+    }
+
+
+# ============================================================================
+# Evidence Distribution (Anti-Fake-Evidence Phase 3)
+# ============================================================================
+
+@router.get("/dashboard/evidence-distribution")
+async def get_evidence_distribution(
+    db: Session = Depends(get_db),
+    tenant_id: str = Depends(_get_tenant_id),
+):
+    """Evidence counts by confidence level and four-eyes status."""
+    evidence_repo = EvidenceRepository(db)
+    all_evidence = evidence_repo.get_all()
+
+    by_confidence = {"E0": 0, "E1": 0, "E2": 0, "E3": 0, "E4": 0}
+    four_eyes_pending = 0
+
+    for e in all_evidence:
+        level = e.confidence_level.value if e.confidence_level else "E1"
+        if level in by_confidence:
+            by_confidence[level] += 1
+        if e.requires_four_eyes and e.approval_status not in ("approved", "rejected"):
+            four_eyes_pending += 1
+
+    return {
+        "by_confidence": by_confidence,
+        "four_eyes_pending": four_eyes_pending,
+        "total": len(all_evidence),
+    }
+
+
+# ============================================================================
+# Traceability Matrix (Anti-Fake-Evidence Phase 4a)
+# ============================================================================
+
+@router.get("/dashboard/traceability-matrix", response_model=TraceabilityMatrixResponse)
+async def get_traceability_matrix(
+    db: Session = Depends(get_db),
+    tenant_id: str = Depends(_get_tenant_id),
+):
+    """
+    Full traceability chain: Control → Evidence → Assertions.
+
+    Loads each entity set once, builds in-memory indices, and nests
+    the result so the frontend can render a matrix view.
+    """
+    ctrl_repo = ControlRepository(db)
+    evidence_repo = EvidenceRepository(db)
+
+    # 1. Load all three entity sets
+    controls = ctrl_repo.get_all()
+    all_evidence = evidence_repo.get_all()
+    all_assertions = db.query(AssertionDB).filter(
+        AssertionDB.entity_type == "evidence",
+    ).all()
+
+    # 2. Index assertions by evidence_id (entity_id)
+    assertions_by_evidence: Dict[str, list] = {}
+    for a in all_assertions:
+        assertions_by_evidence.setdefault(a.entity_id, []).append(a)
+
+    # 3. Index evidence by control_id
+    evidence_by_control: Dict[str, list] = {}
+    for e in all_evidence:
+        evidence_by_control.setdefault(str(e.control_id), []).append(e)
+
+    # 4. Build nested response
+    result_controls: list = []
+    total_controls = 0
+    covered_controls = 0
+    fully_verified = 0
+
+    for ctrl in controls:
+        total_controls += 1
+        ctrl_id = str(ctrl.id)
+        ctrl_evidence = evidence_by_control.get(ctrl_id, [])
+
+        nested_evidence: list = []
+        has_evidence = len(ctrl_evidence) > 0
+        has_assertions = False
+        all_verified = True
+        min_conf: Optional[str] = None
+        conf_order = {"E0": 0, "E1": 1, "E2": 2, "E3": 3, "E4": 4}
+
+        for e in ctrl_evidence:
+            ev_id = str(e.id)
+            ev_assertions = assertions_by_evidence.get(ev_id, [])
+
+            nested_assertions = [
+                TraceabilityAssertion(
+                    id=str(a.id),
+                    sentence_text=a.sentence_text,
+                    assertion_type=a.assertion_type or "assertion",
+                    confidence=a.confidence or 0.0,
+                    verified=a.verified_by is not None,
+                )
+                for a in ev_assertions
+            ]
+
+            if nested_assertions:
+                has_assertions = True
+            for na in nested_assertions:
+                if not na.verified:
+                    all_verified = False
+
+            conf = e.confidence_level.value if e.confidence_level else "E1"
+            if min_conf is None or conf_order.get(conf, 1) < conf_order.get(min_conf, 1):
+                min_conf = conf
+
+            nested_evidence.append(TraceabilityEvidence(
+                id=ev_id,
+                title=e.title,
+                evidence_type=e.evidence_type,
+                confidence_level=conf,
+                status=e.status.value if e.status else "valid",
+                assertions=nested_assertions,
+            ))
+
+        if not has_assertions:
+            all_verified = False
+
+        if has_evidence:
+            covered_controls += 1
+        if has_evidence and has_assertions and all_verified:
+            fully_verified += 1
+
+        coverage = TraceabilityCoverage(
+            has_evidence=has_evidence,
+            has_assertions=has_assertions,
+            all_assertions_verified=all_verified,
+            min_confidence_level=min_conf,
+        )
+
+        result_controls.append(TraceabilityControl(
+            id=ctrl_id,
+            control_id=ctrl.control_id,
+            title=ctrl.title,
+            status=ctrl.status.value if ctrl.status else "planned",
+            domain=ctrl.domain.value if ctrl.domain else "unknown",
+            evidence=nested_evidence,
+            coverage=coverage,
+        ))
+
+    summary = {
+        "total_controls": total_controls,
+        "covered_controls": covered_controls,
+        "fully_verified": fully_verified,
+        "uncovered_controls": total_controls - covered_controls,
+    }
+
+    return TraceabilityMatrixResponse(controls=result_controls, summary=summary)
+
+
 # ============================================================================
 # Reports
 # ============================================================================
@@ -60,10 +60,314 @@ def get_dsfa_service(db: Session = Depends(get_db)) -> DSFAService:
    return DSFAService(db)


-def get_workflow_service(
-    db: Session = Depends(get_db),
-) -> DSFAWorkflowService:
-    return DSFAWorkflowService(db)
+# =============================================================================
+# Pydantic Schemas
+# =============================================================================
+
+class DSFACreate(BaseModel):
+    title: str
+    description: str = ""
+    status: str = "draft"
+    risk_level: str = "low"
+    processing_activity: str = ""
+    data_categories: List[str] = []
+    recipients: List[str] = []
+    measures: List[str] = []
+    created_by: str = "system"
+    # Section 1
+    processing_description: Optional[str] = None
+    processing_purpose: Optional[str] = None
+    legal_basis: Optional[str] = None
+    legal_basis_details: Optional[str] = None
+    # Section 2
+    necessity_assessment: Optional[str] = None
+    proportionality_assessment: Optional[str] = None
+    data_minimization: Optional[str] = None
+    alternatives_considered: Optional[str] = None
+    retention_justification: Optional[str] = None
+    # Section 3
+    involves_ai: Optional[bool] = None
+    overall_risk_level: Optional[str] = None
+    risk_score: Optional[int] = None
+    # Section 6
+    dpo_consulted: Optional[bool] = None
+    dpo_name: Optional[str] = None
+    dpo_opinion: Optional[str] = None
+    dpo_approved: Optional[bool] = None
+    authority_consulted: Optional[bool] = None
+    authority_reference: Optional[str] = None
+    authority_decision: Optional[str] = None
+    # Metadata
+    version: Optional[int] = None
+    conclusion: Optional[str] = None
+    federal_state: Optional[str] = None
+    authority_resource_id: Optional[str] = None
+    submitted_by: Optional[str] = None
+    # JSONB Arrays
+    data_subjects: Optional[List[str]] = None
+    affected_rights: Optional[List[str]] = None
+    triggered_rule_codes: Optional[List[str]] = None
+    ai_trigger_ids: Optional[List[str]] = None
+    wp248_criteria_met: Optional[List[str]] = None
+    art35_abs3_triggered: Optional[List[str]] = None
+    tom_references: Optional[List[str]] = None
+    risks: Optional[List[dict]] = None
+    mitigations: Optional[List[dict]] = None
+    stakeholder_consultations: Optional[List[dict]] = None
+    review_triggers: Optional[List[dict]] = None
+    review_comments: Optional[List[dict]] = None
+    ai_use_case_modules: Optional[List[dict]] = None
+    section_8_complete: Optional[bool] = None
+    # JSONB Objects
+    threshold_analysis: Optional[dict] = None
+    consultation_requirement: Optional[dict] = None
+    review_schedule: Optional[dict] = None
+    section_progress: Optional[dict] = None
+    metadata: Optional[dict] = None
+
+
+class DSFAUpdate(BaseModel):
+    title: Optional[str] = None
+    description: Optional[str] = None
+    status: Optional[str] = None
+    risk_level: Optional[str] = None
+    processing_activity: Optional[str] = None
+    data_categories: Optional[List[str]] = None
+    recipients: Optional[List[str]] = None
+    measures: Optional[List[str]] = None
+    approved_by: Optional[str] = None
+    # Section 1
+    processing_description: Optional[str] = None
+    processing_purpose: Optional[str] = None
+    legal_basis: Optional[str] = None
+    legal_basis_details: Optional[str] = None
+    # Section 2
+    necessity_assessment: Optional[str] = None
+    proportionality_assessment: Optional[str] = None
+    data_minimization: Optional[str] = None
+    alternatives_considered: Optional[str] = None
+    retention_justification: Optional[str] = None
+    # Section 3
+    involves_ai: Optional[bool] = None
+    overall_risk_level: Optional[str] = None
+    risk_score: Optional[int] = None
+    # Section 6
+    dpo_consulted: Optional[bool] = None
+    dpo_name: Optional[str] = None
+    dpo_opinion: Optional[str] = None
+    dpo_approved: Optional[bool] = None
+    authority_consulted: Optional[bool] = None
+    authority_reference: Optional[str] = None
+    authority_decision: Optional[str] = None
+    # Metadata
+    version: Optional[int] = None
+    conclusion: Optional[str] = None
+    federal_state: Optional[str] = None
+    authority_resource_id: Optional[str] = None
+    submitted_by: Optional[str] = None
+    # JSONB Arrays
+    data_subjects: Optional[List[str]] = None
+    affected_rights: Optional[List[str]] = None
+    triggered_rule_codes: Optional[List[str]] = None
+    ai_trigger_ids: Optional[List[str]] = None
+    wp248_criteria_met: Optional[List[str]] = None
+    art35_abs3_triggered: Optional[List[str]] = None
+    tom_references: Optional[List[str]] = None
+    risks: Optional[List[dict]] = None
+    mitigations: Optional[List[dict]] = None
+    stakeholder_consultations: Optional[List[dict]] = None
+    review_triggers: Optional[List[dict]] = None
+    review_comments: Optional[List[dict]] = None
+    ai_use_case_modules: Optional[List[dict]] = None
+    section_8_complete: Optional[bool] = None
+    # JSONB Objects
+    threshold_analysis: Optional[dict] = None
+    consultation_requirement: Optional[dict] = None
+    review_schedule: Optional[dict] = None
+    section_progress: Optional[dict] = None
+    metadata: Optional[dict] = None
+
+
+class DSFAStatusUpdate(BaseModel):
+    status: str
+    approved_by: Optional[str] = None
+
+
+class DSFASectionUpdate(BaseModel):
+    """Body for PUT /dsfa/{id}/sections/{section_number}."""
+    content: Optional[str] = None
+    # Allow arbitrary extra fields so the frontend can send any section-specific data
+    extra: Optional[dict] = None
+
+
+class DSFAApproveRequest(BaseModel):
+    """Body for POST /dsfa/{id}/approve."""
+    approved: bool
+    comments: Optional[str] = None
+    approved_by: Optional[str] = None
+
+
+# =============================================================================
+# Helpers
+# =============================================================================
+
+def _get_tenant_id(tenant_id: Optional[str]) -> str:
+    return tenant_id or DEFAULT_TENANT_ID
+
+
+def _dsfa_to_response(row) -> dict:
+    """Convert a DB row to a JSON-serializable dict."""
+    import json
+    # SQLAlchemy 2.0: Row objects need ._mapping for string-key access
+    if hasattr(row, "_mapping"):
+        row = row._mapping
+
+    def _parse_arr(val):
+        """Parse a JSONB array field → list."""
+        if val is None:
+            return []
+        if isinstance(val, list):
+            return val
+        if isinstance(val, str):
+            try:
+                parsed = json.loads(val)
+                return parsed if isinstance(parsed, list) else []
+            except Exception:
+                return []
+        return val
+
+    def _parse_obj(val):
+        """Parse a JSONB object field → dict."""
+        if val is None:
+            return {}
+        if isinstance(val, dict):
+            return val
+        if isinstance(val, str):
+            try:
+                parsed = json.loads(val)
+                return parsed if isinstance(parsed, dict) else {}
+            except Exception:
+                return {}
+        return val
+
+    def _ts(val):
+        """Timestamp → ISO string or None."""
+        if not val:
+            return None
+        if isinstance(val, str):
+            return val
+        return val.isoformat()
+
+    def _get(key, default=None):
+        """Safe row access — returns default if key missing (handles old rows)."""
+        try:
+            v = row[key]
+            return default if v is None and default is not None else v
+        except (KeyError, IndexError):
+            return default
+
+    return {
+        # Core fields (always present since Migration 024)
+        "id": str(row["id"]),
+        "tenant_id": row["tenant_id"],
+        "title": row["title"],
+        "description": row["description"] or "",
+        "status": row["status"] or "draft",
+        "risk_level": row["risk_level"] or "low",
+        "processing_activity": row["processing_activity"] or "",
+        "data_categories": _parse_arr(row["data_categories"]),
+        "recipients": _parse_arr(row["recipients"]),
+        "measures": _parse_arr(row["measures"]),
+        "approved_by": row["approved_by"],
+        "approved_at": _ts(row["approved_at"]),
+        "created_by": row["created_by"] or "system",
+        "created_at": _ts(row["created_at"]),
+        "updated_at": _ts(row["updated_at"]),
+        # Section 1 (Migration 030)
+        "processing_description": _get("processing_description"),
+        "processing_purpose": _get("processing_purpose"),
+        "legal_basis": _get("legal_basis"),
+        "legal_basis_details": _get("legal_basis_details"),
+        # Section 2
+        "necessity_assessment": _get("necessity_assessment"),
+        "proportionality_assessment": _get("proportionality_assessment"),
+        "data_minimization": _get("data_minimization"),
+        "alternatives_considered": _get("alternatives_considered"),
+        "retention_justification": _get("retention_justification"),
+        # Section 3
+        "involves_ai": _get("involves_ai", False),
+        "overall_risk_level": _get("overall_risk_level"),
+        "risk_score": _get("risk_score", 0),
+        # Section 6
+        "dpo_consulted": _get("dpo_consulted", False),
+        "dpo_consulted_at": _ts(_get("dpo_consulted_at")),
+        "dpo_name": _get("dpo_name"),
+        "dpo_opinion": _get("dpo_opinion"),
+        "dpo_approved": _get("dpo_approved"),
+        "authority_consulted": _get("authority_consulted", False),
+        "authority_consulted_at": _ts(_get("authority_consulted_at")),
+        "authority_reference": _get("authority_reference"),
+        "authority_decision": _get("authority_decision"),
+        # Metadata / Versioning
+        "version": _get("version", 1),
+        "previous_version_id": str(_get("previous_version_id")) if _get("previous_version_id") else None,
+        "conclusion": _get("conclusion"),
+        "federal_state": _get("federal_state"),
+        "authority_resource_id": _get("authority_resource_id"),
+        "submitted_for_review_at": _ts(_get("submitted_for_review_at")),
+        "submitted_by": _get("submitted_by"),
+        # JSONB Arrays
+        "data_subjects": _parse_arr(_get("data_subjects")),
+        "affected_rights": _parse_arr(_get("affected_rights")),
+        "triggered_rule_codes": _parse_arr(_get("triggered_rule_codes")),
+        "ai_trigger_ids": _parse_arr(_get("ai_trigger_ids")),
+        "wp248_criteria_met": _parse_arr(_get("wp248_criteria_met")),
+        "art35_abs3_triggered": _parse_arr(_get("art35_abs3_triggered")),
+        "tom_references": _parse_arr(_get("tom_references")),
+        "risks": _parse_arr(_get("risks")),
+        "mitigations": _parse_arr(_get("mitigations")),
+        "stakeholder_consultations": _parse_arr(_get("stakeholder_consultations")),
+        "review_triggers": _parse_arr(_get("review_triggers")),
+        "review_comments": _parse_arr(_get("review_comments")),
+        # Section 8 / AI (Migration 028)
+        "ai_use_case_modules": _parse_arr(_get("ai_use_case_modules")),
+        "section_8_complete": _get("section_8_complete", False),
+        # JSONB Objects
+        "threshold_analysis": _parse_obj(_get("threshold_analysis")),
+        "consultation_requirement": _parse_obj(_get("consultation_requirement")),
+        "review_schedule": _parse_obj(_get("review_schedule")),
+        "section_progress": _parse_obj(_get("section_progress")),
+        "metadata": _parse_obj(_get("metadata")),
+    }
+
+
+def _log_audit(
+    db: Session,
+    tenant_id: str,
+    dsfa_id,
+    action: str,
+    changed_by: str = "system",
+    old_values=None,
+    new_values=None,
+):
+    import json
+    db.execute(
+        text("""
+            INSERT INTO compliance_dsfa_audit_log
+                (tenant_id, dsfa_id, action, changed_by, old_values, new_values)
+            VALUES
+                (:tenant_id, :dsfa_id, :action, :changed_by,
+                 CAST(:old_values AS jsonb), CAST(:new_values AS jsonb))
+        """),
+        {
+            "tenant_id": tenant_id,
+            "dsfa_id": str(dsfa_id) if dsfa_id else None,
+            "action": action,
+            "changed_by": changed_by,
+            "old_values": json.dumps(old_values) if old_values else None,
+            "new_values": json.dumps(new_values) if new_values else None,
+        },
+    )


 # =============================================================================
@@ -177,8 +481,51 @@ async def create_dsfa(
    service: DSFAService = Depends(get_dsfa_service),
 ) -> dict[str, Any]:
    """Neue DSFA erstellen."""
-    with translate_domain_errors():
-        return service.create(tenant_id, request)
+    import json
+
+    if request.status not in VALID_STATUSES:
+        raise HTTPException(status_code=422, detail=f"Ungültiger Status: {request.status}")
+    if request.risk_level not in VALID_RISK_LEVELS:
+        raise HTTPException(status_code=422, detail=f"Ungültiges Risiko-Level: {request.risk_level}")
+
+    tid = _get_tenant_id(tenant_id)
+
+    row = db.execute(
+        text("""
+            INSERT INTO compliance_dsfas
+                (tenant_id, title, description, status, risk_level,
+                 processing_activity, data_categories, recipients, measures, created_by)
+            VALUES
+                (:tenant_id, :title, :description, :status, :risk_level,
+                 :processing_activity,
+                 CAST(:data_categories AS jsonb),
+                 CAST(:recipients AS jsonb),
+                 CAST(:measures AS jsonb),
+                 :created_by)
+            RETURNING *
+        """),
+        {
+            "tenant_id": tid,
+            "title": request.title,
+            "description": request.description,
+            "status": request.status,
+            "risk_level": request.risk_level,
+            "processing_activity": request.processing_activity,
+            "data_categories": json.dumps(request.data_categories),
+            "recipients": json.dumps(request.recipients),
+            "measures": json.dumps(request.measures),
+            "created_by": request.created_by,
+        },
+    ).fetchone()
+
+    db.flush()
+    row_id = row._mapping["id"] if hasattr(row, "_mapping") else row[0]
+    _log_audit(
+        db, tid, row_id, "CREATE", request.created_by,
+        new_values={"title": request.title, "status": request.status},
+    )
+    db.commit()
+    return _dsfa_to_response(row)


 # =============================================================================
@@ -22,23 +22,21 @@ from fastapi import APIRouter, Depends, File, HTTPException, Query, UploadFile
 from sqlalchemy.orm import Session

 from classroom_engine.database import get_db
-from compliance.api._http_errors import translate_domain_errors
-from compliance.db import ControlRepository, EvidenceRepository
-from compliance.schemas.evidence import (
-    EvidenceCreate,
-    EvidenceListResponse,
-    EvidenceResponse,
+
+from ..db import (
+    ControlRepository,
+    EvidenceRepository,
+    EvidenceStatusEnum,
+    EvidenceConfidenceEnum,
+    EvidenceTruthStatusEnum,
 )
-from compliance.services.auto_risk_updater import AutoRiskUpdater
-from compliance.domain import NotFoundError, ValidationError
-from compliance.services.evidence_service import (
-    SOURCE_CONTROL_MAP,
-    EvidenceService,
-    _extract_findings_detail,  # re-exported for legacy test imports
-    _parse_ci_evidence,  # re-exported for legacy test imports
-    _store_evidence,  # re-exported for legacy test imports
-    _update_risks as _update_risks_impl,
+from ..db.models import EvidenceDB, ControlDB, AuditTrailDB
+from ..services.auto_risk_updater import AutoRiskUpdater
+from .schemas import (
+    EvidenceCreate, EvidenceResponse, EvidenceListResponse,
+    EvidenceRejectRequest,
 )
+from .audit_trail_utils import log_audit_trail

 logger = logging.getLogger(__name__)
 router = APIRouter(tags=["compliance-evidence"])
@@ -56,7 +54,88 @@ def get_evidence_service(db: Session = Depends(get_db)) -> EvidenceService:


 # ============================================================================
-# Evidence CRUD
+# Anti-Fake-Evidence: Four-Eyes Domain Check
+# ============================================================================
+
+FOUR_EYES_DOMAINS = {"gov", "priv"}
+
+
+def _requires_four_eyes(control_domain: str) -> bool:
+    """Controls in governance/privacy domains require two independent reviewers."""
+    return control_domain in FOUR_EYES_DOMAINS
+
+
+# ============================================================================
+# Anti-Fake-Evidence: Auto-Classification Helpers
+# ============================================================================
+
+def _classify_confidence(source: Optional[str], evidence_type: Optional[str] = None, artifact_hash: Optional[str] = None) -> EvidenceConfidenceEnum:
+    """Classify evidence confidence level based on source and metadata."""
+    if source == "ci_pipeline":
+        return EvidenceConfidenceEnum.E3
+    if source == "api" and artifact_hash:
+        return EvidenceConfidenceEnum.E3
+    if source == "api":
+        return EvidenceConfidenceEnum.E3
+    if source in ("manual", "upload"):
+        return EvidenceConfidenceEnum.E1
+    if source == "generated":
+        return EvidenceConfidenceEnum.E0
+    # Default for unknown sources
+    return EvidenceConfidenceEnum.E1
+
+
+def _classify_truth_status(source: Optional[str]) -> EvidenceTruthStatusEnum:
+    """Classify evidence truth status based on source."""
+    if source == "ci_pipeline":
+        return EvidenceTruthStatusEnum.OBSERVED
+    if source in ("manual", "upload"):
+        return EvidenceTruthStatusEnum.UPLOADED
+    if source == "generated":
+        return EvidenceTruthStatusEnum.GENERATED
+    if source == "api":
+        return EvidenceTruthStatusEnum.OBSERVED
+    return EvidenceTruthStatusEnum.UPLOADED
+
+
+def _build_evidence_response(e: EvidenceDB) -> EvidenceResponse:
+    """Build an EvidenceResponse from an EvidenceDB, including anti-fake fields."""
+    return EvidenceResponse(
+        id=e.id,
+        control_id=e.control_id,
+        evidence_type=e.evidence_type,
+        title=e.title,
+        description=e.description,
+        artifact_path=e.artifact_path,
+        artifact_url=e.artifact_url,
+        artifact_hash=e.artifact_hash,
+        file_size_bytes=e.file_size_bytes,
+        mime_type=e.mime_type,
+        valid_from=e.valid_from,
+        valid_until=e.valid_until,
+        status=e.status.value if e.status else None,
+        source=e.source,
+        ci_job_id=e.ci_job_id,
+        uploaded_by=e.uploaded_by,
+        collected_at=e.collected_at,
+        created_at=e.created_at,
+        confidence_level=e.confidence_level.value if e.confidence_level else None,
+        truth_status=e.truth_status.value if e.truth_status else None,
+        generation_mode=e.generation_mode,
+        may_be_used_as_evidence=e.may_be_used_as_evidence,
+        reviewed_by=e.reviewed_by,
+        reviewed_at=e.reviewed_at,
+        approval_status=e.approval_status,
+        first_reviewer=e.first_reviewer,
+        first_reviewed_at=e.first_reviewed_at,
+        second_reviewer=e.second_reviewer,
+        second_reviewed_at=e.second_reviewed_at,
+        requires_four_eyes=e.requires_four_eyes,
+    )
+
+
+# ============================================================================
+# Evidence
 # ============================================================================

@router.get("/evidence", response_model=EvidenceListResponse)
@@ -69,8 +148,38 @@ async def list_evidence(
    service: EvidenceService = Depends(get_evidence_service),
 ) -> EvidenceListResponse:
    """List evidence with optional filters and pagination."""
-    with translate_domain_errors():
-        return service.list_evidence(control_id, evidence_type, status, page, limit)
+    repo = EvidenceRepository(db)
+
+    if control_id:
+        # First get the control UUID
+        ctrl_repo = ControlRepository(db)
+        control = ctrl_repo.get_by_control_id(control_id)
+        if not control:
+            raise HTTPException(status_code=404, detail=f"Control {control_id} not found")
+        evidence = repo.get_by_control(control.id)
+    else:
+        evidence = repo.get_all()
+
+    if evidence_type:
+        evidence = [e for e in evidence if e.evidence_type == evidence_type]
+
+    if status:
+        try:
+            status_enum = EvidenceStatusEnum(status)
+            evidence = [e for e in evidence if e.status == status_enum]
+        except ValueError:
+            pass
+
+    total = len(evidence)
+
+    # Apply pagination if requested
+    if page is not None and limit is not None:
+        offset = (page - 1) * limit
+        evidence = evidence[offset:offset + limit]
+
+    results = [_build_evidence_response(e) for e in evidence]
+
+    return EvidenceListResponse(evidence=results, total=total)


@router.post("/evidence", response_model=EvidenceResponse)
@@ -79,8 +188,66 @@ async def create_evidence(
    service: EvidenceService = Depends(get_evidence_service),
 ) -> EvidenceResponse:
    """Create new evidence record."""
-    with translate_domain_errors():
-        return service.create_evidence(evidence_data)
+    repo = EvidenceRepository(db)
+
+    # Get control UUID
+    ctrl_repo = ControlRepository(db)
+    control = ctrl_repo.get_by_control_id(evidence_data.control_id)
+    if not control:
+        raise HTTPException(status_code=404, detail=f"Control {evidence_data.control_id} not found")
+
+    source = evidence_data.source or "api"
+    confidence = _classify_confidence(source, evidence_data.evidence_type)
+    truth = _classify_truth_status(source)
+
+    # Allow explicit override from request
+    if evidence_data.confidence_level:
+        try:
+            confidence = EvidenceConfidenceEnum(evidence_data.confidence_level)
+        except ValueError:
+            pass
+    if evidence_data.truth_status:
+        try:
+            truth = EvidenceTruthStatusEnum(evidence_data.truth_status)
+        except ValueError:
+            pass
+
+    evidence = repo.create(
+        control_id=control.id,
+        evidence_type=evidence_data.evidence_type,
+        title=evidence_data.title,
+        description=evidence_data.description,
+        artifact_url=evidence_data.artifact_url,
+        valid_from=evidence_data.valid_from,
+        valid_until=evidence_data.valid_until,
+        source=source,
+        ci_job_id=evidence_data.ci_job_id,
+    )
+
+    # Set anti-fake-evidence fields
+    evidence.confidence_level = confidence
+    evidence.truth_status = truth
+    # Generated evidence should not be used as evidence by default
+    if truth == EvidenceTruthStatusEnum.GENERATED:
+        evidence.may_be_used_as_evidence = False
+
+    # Four-Eyes: check if the linked control's domain requires it
+    control_domain = control.domain.value if control.domain else ""
+    if _requires_four_eyes(control_domain):
+        evidence.requires_four_eyes = True
+        evidence.approval_status = "pending_first"
+
+    db.commit()
+
+    # Audit trail
+    log_audit_trail(
+        db, "evidence", evidence.id, evidence.title, "create",
+        performed_by=evidence_data.source or "api",
+        change_summary=f"Evidence created with confidence={confidence.value}, truth={truth.value}",
+    )
+    db.commit()
+
+    return _build_evidence_response(evidence)


@router.delete("/evidence/{evidence_id}")
@@ -107,9 +274,271 @@ async def upload_evidence(
    service: EvidenceService = Depends(get_evidence_service),
 ) -> EvidenceResponse:
    """Upload evidence file."""
-    with translate_domain_errors():
-        return await service.upload_evidence(
-            control_id, evidence_type, title, file, description
+    # Get control UUID
+    ctrl_repo = ControlRepository(db)
+    control = ctrl_repo.get_by_control_id(control_id)
+    if not control:
+        raise HTTPException(status_code=404, detail=f"Control {control_id} not found")
+
+    # Create upload directory
+    upload_dir = f"/tmp/compliance_evidence/{control_id}"
+    os.makedirs(upload_dir, exist_ok=True)
+
+    # Save file
+    file_path = os.path.join(upload_dir, file.filename)
+    content = await file.read()
+
+    with open(file_path, "wb") as f:
+        f.write(content)
+
+    # Calculate hash
+    file_hash = hashlib.sha256(content).hexdigest()
+
+    # Create evidence record
+    repo = EvidenceRepository(db)
+    evidence = repo.create(
+        control_id=control.id,
+        evidence_type=evidence_type,
+        title=title,
+        description=description,
+        artifact_path=file_path,
+        artifact_hash=file_hash,
+        file_size_bytes=len(content),
+        mime_type=file.content_type,
+        source="upload",
+    )
+
+    # Upload evidence → E1 + uploaded
+    evidence.confidence_level = EvidenceConfidenceEnum.E1
+    evidence.truth_status = EvidenceTruthStatusEnum.UPLOADED
+
+    # Four-Eyes: check if the linked control's domain requires it
+    control_domain = control.domain.value if control.domain else ""
+    if _requires_four_eyes(control_domain):
+        evidence.requires_four_eyes = True
+        evidence.approval_status = "pending_first"
+
+    db.commit()
+
+    return _build_evidence_response(evidence)
+
+
+# ============================================================================
+# CI/CD Evidence Collection — helpers
+# ============================================================================
+
+# Map CI source names to the corresponding control IDs
+SOURCE_CONTROL_MAP = {
+    "sast": "SDLC-001",
+    "dependency_scan": "SDLC-002",
+    "secret_scan": "SDLC-003",
+    "code_review": "SDLC-004",
+    "sbom": "SDLC-005",
+    "container_scan": "SDLC-006",
+    "test_results": "AUD-001",
+}
+
+
+def _parse_ci_evidence(data: dict) -> dict:
+    """
+    Parse and validate incoming CI evidence data.
+
+    Returns a dict with:
+      - report_json: str  (serialised JSON)
+      - report_hash: str  (SHA-256 hex digest)
+      - evidence_status: str  ("valid" or "failed")
+      - findings_count: int
+      - critical_findings: int
+    """
+    report_json = json.dumps(data) if data else "{}"
+    report_hash = hashlib.sha256(report_json.encode()).hexdigest()
+
+    findings_count = 0
+    critical_findings = 0
+
+    if data and isinstance(data, dict):
+        # Semgrep format
+        if "results" in data:
+            findings_count = len(data.get("results", []))
+            critical_findings = len([
+                r for r in data.get("results", [])
+                if r.get("extra", {}).get("severity", "").upper() in ["CRITICAL", "HIGH"]
+            ])
+
+        # Trivy format
+        elif "Results" in data:
+            for result in data.get("Results", []):
+                vulns = result.get("Vulnerabilities", [])
+                findings_count += len(vulns)
+                critical_findings += len([
+                    v for v in vulns
+                    if v.get("Severity", "").upper() in ["CRITICAL", "HIGH"]
+                ])
+
+        # Generic findings array
+        elif "findings" in data:
+            findings_count = len(data.get("findings", []))
+
+        # SBOM format - just count components
+        elif "components" in data:
+            findings_count = len(data.get("components", []))
+
+    evidence_status = "failed" if critical_findings > 0 else "valid"
+
+    return {
+        "report_json": report_json,
+        "report_hash": report_hash,
+        "evidence_status": evidence_status,
+        "findings_count": findings_count,
+        "critical_findings": critical_findings,
+    }
+
+
+def _store_evidence(
+    db: Session,
+    *,
+    control_db_id: str,
+    source: str,
+    parsed: dict,
+    ci_job_id: str,
+    ci_job_url: str,
+    report_data: dict,
+) -> EvidenceDB:
+    """
+    Persist a CI evidence item to the database and write the report file.
+
+    Returns the created EvidenceDB instance (already committed).
+    """
+    findings_count = parsed["findings_count"]
+    critical_findings = parsed["critical_findings"]
+
+    # Build title and description
+    title = f"{source.upper()} Report - {datetime.now().strftime('%Y-%m-%d %H:%M')}"
+    description = "Automatically collected from CI/CD pipeline"
+    if findings_count > 0:
+        description += f"\n- Total findings: {findings_count}"
+    if critical_findings > 0:
+        description += f"\n- Critical/High findings: {critical_findings}"
+    if ci_job_id:
+        description += f"\n- CI Job ID: {ci_job_id}"
+    if ci_job_url:
+        description += f"\n- CI Job URL: {ci_job_url}"
+
+    # Store report file
+    upload_dir = f"/tmp/compliance_evidence/ci/{source}"
+    os.makedirs(upload_dir, exist_ok=True)
+    file_name = f"{source}_{datetime.now().strftime('%Y%m%d_%H%M%S')}_{parsed['report_hash'][:8]}.json"
+    file_path = os.path.join(upload_dir, file_name)
+
+    with open(file_path, "w") as f:
+        json.dump(report_data or {}, f, indent=2)
+
+    # Create evidence record with anti-fake-evidence classification
+    evidence = EvidenceDB(
+        id=str(uuid_module.uuid4()),
+        control_id=control_db_id,
+        evidence_type=f"ci_{source}",
+        title=title,
+        description=description,
+        artifact_path=file_path,
+        artifact_hash=parsed["report_hash"],
+        file_size_bytes=len(parsed["report_json"]),
+        mime_type="application/json",
+        source="ci_pipeline",
+        ci_job_id=ci_job_id,
+        valid_from=datetime.utcnow(),
+        valid_until=datetime.utcnow() + timedelta(days=90),
+        status=EvidenceStatusEnum(parsed["evidence_status"]),
+        # CI pipeline evidence → E3 observed (system-observed, hash-verified)
+        confidence_level=EvidenceConfidenceEnum.E3,
+        truth_status=EvidenceTruthStatusEnum.OBSERVED,
+        may_be_used_as_evidence=True,
+    )
+    db.add(evidence)
+    db.commit()
+    db.refresh(evidence)
+
+    return evidence
+
+
+def _extract_findings_detail(report_data: dict) -> dict:
+    """
+    Extract severity-bucketed finding counts from report data.
+
+    Returns dict with keys: critical, high, medium, low.
+    """
+    findings_detail = {
+        "critical": 0,
+        "high": 0,
+        "medium": 0,
+        "low": 0,
+    }
+
+    if not report_data:
+        return findings_detail
+
+    # Semgrep format
+    if "results" in report_data:
+        for r in report_data.get("results", []):
+            severity = r.get("extra", {}).get("severity", "").upper()
+            if severity == "CRITICAL":
+                findings_detail["critical"] += 1
+            elif severity == "HIGH":
+                findings_detail["high"] += 1
+            elif severity == "MEDIUM":
+                findings_detail["medium"] += 1
+            elif severity in ["LOW", "INFO"]:
+                findings_detail["low"] += 1
+
+    # Trivy format
+    elif "Results" in report_data:
+        for result in report_data.get("Results", []):
+            for v in result.get("Vulnerabilities", []):
+                severity = v.get("Severity", "").upper()
+                if severity == "CRITICAL":
+                    findings_detail["critical"] += 1
+                elif severity == "HIGH":
+                    findings_detail["high"] += 1
+                elif severity == "MEDIUM":
+                    findings_detail["medium"] += 1
+                elif severity == "LOW":
+                    findings_detail["low"] += 1
+
+    # Generic findings with severity
+    elif "findings" in report_data:
+        for f in report_data.get("findings", []):
+            severity = f.get("severity", "").upper()
+            if severity == "CRITICAL":
+                findings_detail["critical"] += 1
+            elif severity == "HIGH":
+                findings_detail["high"] += 1
+            elif severity == "MEDIUM":
+                findings_detail["medium"] += 1
+            else:
+                findings_detail["low"] += 1
+
+    return findings_detail
+
+
+def _update_risks(db: Session, *, source: str, control_id: str, ci_job_id: str, report_data: dict):
+    """
+    Update risk status based on new evidence.
+
+    Uses AutoRiskUpdater to update Control status and linked Risks based on
+    severity-bucketed findings.  Returns the update result or None on error.
+    """
+    findings_detail = _extract_findings_detail(report_data)
+
+    try:
+        auto_updater = AutoRiskUpdater(db)
+        risk_update_result = auto_updater.process_evidence_collect_request(
+            tool=source,
+            control_id=control_id,
+            evidence_type=f"ci_{source}",
+            timestamp=datetime.utcnow().isoformat(),
+            commit_sha=report_data.get("commit_sha", "unknown") if report_data else "unknown",
+            ci_job_id=ci_job_id,
+            findings=findings_detail,
        )


@@ -227,14 +656,229 @@ async def get_ci_evidence_status(
 # Legacy re-exports for tests that import helpers directly.
 # ----------------------------------------------------------------------------

-__all__ = [
-    "router",
-    "SOURCE_CONTROL_MAP",
-    "EvidenceRepository",
-    "ControlRepository",
-    "AutoRiskUpdater",
-    "_parse_ci_evidence",
-    "_extract_findings_detail",
-    "_store_evidence",
-    "_update_risks",
-]
+    if control_id:
+        ctrl_repo = ControlRepository(db)
+        control = ctrl_repo.get_by_control_id(control_id)
+        if control:
+            query = query.filter(EvidenceDB.control_id == control.id)
+
+    evidence_list = query.order_by(EvidenceDB.collected_at.desc()).limit(100).all()
+
+    # Group by control and calculate stats
+    control_stats = defaultdict(lambda: {
+        "total": 0,
+        "valid": 0,
+        "failed": 0,
+        "last_collected": None,
+        "evidence": [],
+    })
+
+    for e in evidence_list:
+        # Get control_id string
+        control = db.query(ControlDB).filter(ControlDB.id == e.control_id).first()
+        ctrl_id = control.control_id if control else "unknown"
+
+        stats = control_stats[ctrl_id]
+        stats["total"] += 1
+        if e.status:
+            if e.status.value == "valid":
+                stats["valid"] += 1
+            elif e.status.value == "failed":
+                stats["failed"] += 1
+        if not stats["last_collected"] or e.collected_at > stats["last_collected"]:
+            stats["last_collected"] = e.collected_at
+
+        # Add evidence summary
+        stats["evidence"].append({
+            "id": e.id,
+            "type": e.evidence_type,
+            "status": e.status.value if e.status else None,
+            "collected_at": e.collected_at.isoformat() if e.collected_at else None,
+            "ci_job_id": e.ci_job_id,
+        })
+
+    # Convert to list and sort
+    result = []
+    for ctrl_id, stats in control_stats.items():
+        result.append({
+            "control_id": ctrl_id,
+            "total_evidence": stats["total"],
+            "valid_count": stats["valid"],
+            "failed_count": stats["failed"],
+            "last_collected": stats["last_collected"].isoformat() if stats["last_collected"] else None,
+            "recent_evidence": stats["evidence"][:5],
+        })
+
+    result.sort(key=lambda x: x["last_collected"] or "", reverse=True)
+
+    return {
+        "period_days": days,
+        "total_evidence": len(evidence_list),
+        "controls": result,
+    }
+
+
+# ============================================================================
+# Evidence Review (Anti-Fake-Evidence)
+# ============================================================================
+
+from pydantic import BaseModel as _BaseModel
+
+class _EvidenceReviewRequest(_BaseModel):
+    confidence_level: Optional[str] = None
+    truth_status: Optional[str] = None
+    reviewed_by: str
+
+
+@router.patch("/evidence/{evidence_id}/review", response_model=EvidenceResponse)
+async def review_evidence(
+    evidence_id: str,
+    review: _EvidenceReviewRequest,
+    db: Session = Depends(get_db),
+):
+    """
+    Review evidence: upgrade confidence level and/or change truth status.
+
+    For Four-Eyes evidence, the first reviewer sets first_reviewer and
+    approval_status='first_approved'. A second (different) reviewer then
+    sets second_reviewer and approval_status='approved'.
+    """
+    evidence = db.query(EvidenceDB).filter(EvidenceDB.id == evidence_id).first()
+    if not evidence:
+        raise HTTPException(status_code=404, detail=f"Evidence {evidence_id} not found")
+
+    old_confidence = evidence.confidence_level.value if evidence.confidence_level else None
+    old_truth = evidence.truth_status.value if evidence.truth_status else None
+
+    if review.confidence_level:
+        try:
+            evidence.confidence_level = EvidenceConfidenceEnum(review.confidence_level)
+        except ValueError:
+            raise HTTPException(status_code=400, detail=f"Invalid confidence_level: {review.confidence_level}")
+
+    if review.truth_status:
+        try:
+            evidence.truth_status = EvidenceTruthStatusEnum(review.truth_status)
+        except ValueError:
+            raise HTTPException(status_code=400, detail=f"Invalid truth_status: {review.truth_status}")
+
+    # Four-Eyes branching
+    if evidence.requires_four_eyes:
+        status = evidence.approval_status or "none"
+        if status in ("none", "pending_first"):
+            evidence.first_reviewer = review.reviewed_by
+            evidence.first_reviewed_at = datetime.utcnow()
+            evidence.approval_status = "first_approved"
+        elif status == "first_approved":
+            if review.reviewed_by == evidence.first_reviewer:
+                raise HTTPException(
+                    status_code=400,
+                    detail="Four-Eyes: second reviewer must be different from first reviewer",
+                )
+            evidence.second_reviewer = review.reviewed_by
+            evidence.second_reviewed_at = datetime.utcnow()
+            evidence.approval_status = "approved"
+        elif status == "approved":
+            raise HTTPException(status_code=400, detail="Evidence already approved")
+        elif status == "rejected":
+            raise HTTPException(status_code=400, detail="Evidence was rejected — create new evidence instead")
+
+    evidence.reviewed_by = review.reviewed_by
+    evidence.reviewed_at = datetime.utcnow()
+    db.commit()
+
+    # Audit trail
+    new_confidence = evidence.confidence_level.value if evidence.confidence_level else None
+    if old_confidence != new_confidence:
+        log_audit_trail(
+            db, "evidence", evidence_id, evidence.title, "review",
+            performed_by=review.reviewed_by,
+            field_changed="confidence_level",
+            old_value=old_confidence,
+            new_value=new_confidence,
+        )
+    new_truth = evidence.truth_status.value if evidence.truth_status else None
+    if old_truth != new_truth:
+        log_audit_trail(
+            db, "evidence", evidence_id, evidence.title, "review",
+            performed_by=review.reviewed_by,
+            field_changed="truth_status",
+            old_value=old_truth,
+            new_value=new_truth,
+        )
+    db.commit()
+
+    db.refresh(evidence)
+    return _build_evidence_response(evidence)
+
+
+@router.patch("/evidence/{evidence_id}/reject", response_model=EvidenceResponse)
+async def reject_evidence(
+    evidence_id: str,
+    body: EvidenceRejectRequest,
+    db: Session = Depends(get_db),
+):
+    """Reject evidence (sets approval_status='rejected')."""
+    evidence = db.query(EvidenceDB).filter(EvidenceDB.id == evidence_id).first()
+    if not evidence:
+        raise HTTPException(status_code=404, detail=f"Evidence {evidence_id} not found")
+
+    evidence.approval_status = "rejected"
+    evidence.reviewed_by = body.reviewed_by
+    evidence.reviewed_at = datetime.utcnow()
+    db.commit()
+
+    log_audit_trail(
+        db, "evidence", evidence_id, evidence.title, "reject",
+        performed_by=body.reviewed_by,
+        change_summary=body.rejection_reason or "Evidence rejected",
+    )
+    db.commit()
+
+    db.refresh(evidence)
+    return _build_evidence_response(evidence)
+
+
+# ============================================================================
+# Audit Trail Query
+# ============================================================================
+
+@router.get("/audit-trail")
+async def get_audit_trail(
+    entity_type: Optional[str] = Query(None),
+    entity_id: Optional[str] = Query(None),
+    action: Optional[str] = Query(None),
+    limit: int = Query(50, ge=1, le=200),
+    db: Session = Depends(get_db),
+):
+    """Query audit trail entries for an entity."""
+    query = db.query(AuditTrailDB)
+    if entity_type:
+        query = query.filter(AuditTrailDB.entity_type == entity_type)
+    if entity_id:
+        query = query.filter(AuditTrailDB.entity_id == entity_id)
+    if action:
+        query = query.filter(AuditTrailDB.action == action)
+
+    records = query.order_by(AuditTrailDB.performed_at.desc()).limit(limit).all()
+
+    return {
+        "entries": [
+            {
+                "id": r.id,
+                "entity_type": r.entity_type,
+                "entity_id": r.entity_id,
+                "entity_name": r.entity_name,
+                "action": r.action,
+                "field_changed": r.field_changed,
+                "old_value": r.old_value,
+                "new_value": r.new_value,
+                "change_summary": r.change_summary,
+                "performed_by": r.performed_by,
+                "performed_at": r.performed_at.isoformat() if r.performed_at else None,
+                "checksum": r.checksum,
+            }
+            for r in records
+        ],
+        "total": len(records),
+    }
@@ -39,7 +39,6 @@ router = APIRouter(tags=["extraction"])

 ALL_COLLECTIONS = [
    "bp_compliance_ce",          # BSI-TR documents — primary Prüfaspekte source
-    "bp_compliance_recht",       # Legal texts (GDPR, AI Act, ...)
    "bp_compliance_gesetze",     # German laws
    "bp_compliance_datenschutz", # Data protection documents
    "bp_dsfa_corpus",            # DSFA corpus
@@ -80,9 +80,13 @@ def _handle(func, *args, **kwargs):  # type: ignore[no-untyped-def]
        raise HTTPException(status_code=400, detail=str(exc))


-# ============================================================================
-# ISMS Scope (ISO 27001 4.3)
-# ============================================================================
+# Shared audit trail utilities — canonical implementation in audit_trail_utils.py
+from .audit_trail_utils import log_audit_trail, create_signature  # noqa: E402
+
+
+# =============================================================================
+# ISMS SCOPE (ISO 27001 4.3)
+# =============================================================================

@router.get("/scope", response_model=ISMSScopeResponse)
 async def get_isms_scope(db: Session = Depends(get_db)):
@@ -50,6 +50,57 @@ VALID_DOCUMENT_TYPES = {
    "cookie_banner",
    "agb",
    "clause",
+    # Security document templates (Migration 051)
+    "it_security_concept",
+    "data_protection_concept",
+    "backup_recovery_concept",
+    "logging_concept",
+    "incident_response_plan",
+    "access_control_concept",
+    "risk_management_concept",
+    # Policy templates — IT Security (Migration 054)
+    "information_security_policy",
+    "access_control_policy",
+    "password_policy",
+    "encryption_policy",
+    "logging_policy",
+    "backup_policy",
+    "incident_response_policy",
+    "change_management_policy",
+    "patch_management_policy",
+    "asset_management_policy",
+    "cloud_security_policy",
+    "devsecops_policy",
+    "secrets_management_policy",
+    "vulnerability_management_policy",
+    # Policy templates — Data (Migration 054)
+    "data_protection_policy",
+    "data_classification_policy",
+    "data_retention_policy",
+    "data_transfer_policy",
+    "privacy_incident_policy",
+    # Policy templates — Personnel (Migration 054)
+    "employee_security_policy",
+    "security_awareness_policy",
+    "remote_work_policy",
+    "offboarding_policy",
+    # Policy templates — Vendor/Supply Chain (Migration 054)
+    "vendor_risk_management_policy",
+    "third_party_security_policy",
+    "supplier_security_policy",
+    # Policy templates — BCM (Migration 054)
+    "business_continuity_policy",
+    "disaster_recovery_policy",
+    "crisis_management_policy",
+    # CRA Cybersecurity (Migration 056)
+    "cybersecurity_policy",
+    # DSFA template
+    "dsfa",
+    # Module document templates (Migration 073)
+    "vvt_register",
+    "tom_documentation",
+    "loeschkonzept",
+    "pflichtenregister",
 }
 VALID_STATUSES = {"published", "draft", "archived"}

@@ -0,0 +1,162 @@
+"""
+FastAPI routes for LLM Generation Audit Trail.
+
+Endpoints:
+- POST /llm-audit: Record an LLM generation event
+- GET  /llm-audit: List audit records with filters
+"""
+
+import logging
+import uuid as uuid_module
+from datetime import datetime
+from typing import Optional
+
+from fastapi import APIRouter, Depends, Query
+from pydantic import BaseModel
+from sqlalchemy.orm import Session
+
+from classroom_engine.database import get_db
+from ..db.models import LLMGenerationAuditDB
+
+logger = logging.getLogger(__name__)
+router = APIRouter(tags=["compliance-llm-audit"])
+
+
+# ============================================================================
+# Schemas
+# ============================================================================
+
+class LLMAuditCreate(BaseModel):
+    entity_type: str
+    entity_id: Optional[str] = None
+    generation_mode: str
+    truth_status: str = "generated"
+    may_be_used_as_evidence: bool = False
+    llm_model: Optional[str] = None
+    llm_provider: Optional[str] = None
+    prompt_hash: Optional[str] = None
+    input_summary: Optional[str] = None
+    output_summary: Optional[str] = None
+    metadata: Optional[dict] = None
+    tenant_id: Optional[str] = None
+
+
+class LLMAuditResponse(BaseModel):
+    id: str
+    tenant_id: Optional[str] = None
+    entity_type: str
+    entity_id: Optional[str] = None
+    generation_mode: str
+    truth_status: str
+    may_be_used_as_evidence: bool
+    llm_model: Optional[str] = None
+    llm_provider: Optional[str] = None
+    prompt_hash: Optional[str] = None
+    input_summary: Optional[str] = None
+    output_summary: Optional[str] = None
+    metadata: Optional[dict] = None
+    created_at: datetime
+
+    class Config:
+        from_attributes = True
+
+
+# ============================================================================
+# Routes
+# ============================================================================
+
+@router.post("/llm-audit", response_model=LLMAuditResponse)
+async def create_llm_audit(
+    data: LLMAuditCreate,
+    db: Session = Depends(get_db),
+):
+    """Record an LLM generation event for audit trail."""
+    from ..db.models import EvidenceTruthStatusEnum
+
+    # Validate truth_status
+    try:
+        truth_enum = EvidenceTruthStatusEnum(data.truth_status)
+    except ValueError:
+        truth_enum = EvidenceTruthStatusEnum.GENERATED
+
+    record = LLMGenerationAuditDB(
+        id=str(uuid_module.uuid4()),
+        tenant_id=data.tenant_id,
+        entity_type=data.entity_type,
+        entity_id=data.entity_id,
+        generation_mode=data.generation_mode,
+        truth_status=truth_enum,
+        may_be_used_as_evidence=data.may_be_used_as_evidence,
+        llm_model=data.llm_model,
+        llm_provider=data.llm_provider,
+        prompt_hash=data.prompt_hash,
+        input_summary=data.input_summary[:500] if data.input_summary else None,
+        output_summary=data.output_summary[:500] if data.output_summary else None,
+        extra_metadata=data.metadata or {},
+    )
+    db.add(record)
+    db.commit()
+    db.refresh(record)
+
+    return LLMAuditResponse(
+        id=record.id,
+        tenant_id=record.tenant_id,
+        entity_type=record.entity_type,
+        entity_id=record.entity_id,
+        generation_mode=record.generation_mode,
+        truth_status=record.truth_status.value if record.truth_status else "generated",
+        may_be_used_as_evidence=record.may_be_used_as_evidence,
+        llm_model=record.llm_model,
+        llm_provider=record.llm_provider,
+        prompt_hash=record.prompt_hash,
+        input_summary=record.input_summary,
+        output_summary=record.output_summary,
+        metadata=record.extra_metadata,
+        created_at=record.created_at,
+    )
+
+
+@router.get("/llm-audit")
+async def list_llm_audit(
+    entity_type: Optional[str] = Query(None),
+    entity_id: Optional[str] = Query(None),
+    page: int = Query(1, ge=1),
+    limit: int = Query(50, ge=1, le=200),
+    db: Session = Depends(get_db),
+):
+    """List LLM generation audit records with optional filters."""
+    query = db.query(LLMGenerationAuditDB)
+
+    if entity_type:
+        query = query.filter(LLMGenerationAuditDB.entity_type == entity_type)
+    if entity_id:
+        query = query.filter(LLMGenerationAuditDB.entity_id == entity_id)
+
+    total = query.count()
+    offset = (page - 1) * limit
+    records = query.order_by(LLMGenerationAuditDB.created_at.desc()).offset(offset).limit(limit).all()
+
+    return {
+        "records": [
+            LLMAuditResponse(
+                id=r.id,
+                tenant_id=r.tenant_id,
+                entity_type=r.entity_type,
+                entity_id=r.entity_id,
+                generation_mode=r.generation_mode,
+                truth_status=r.truth_status.value if r.truth_status else "generated",
+                may_be_used_as_evidence=r.may_be_used_as_evidence,
+                llm_model=r.llm_model,
+                llm_provider=r.llm_provider,
+                prompt_hash=r.prompt_hash,
+                input_summary=r.input_summary,
+                output_summary=r.output_summary,
+                metadata=r.extra_metadata,
+                created_at=r.created_at,
+            )
+            for r in records
+        ],
+        "total": total,
+        "page": page,
+        "limit": limit,
+    }
@@ -56,6 +56,7 @@ class LoeschfristCreate(BaseModel):
    responsible_person: Optional[str] = None
    release_process: Optional[str] = None
    linked_vvt_activity_ids: Optional[List[Any]] = None
+    linked_vendor_ids: Optional[List[Any]] = None
    status: str = "DRAFT"
    last_review_date: Optional[datetime] = None
    next_review_date: Optional[datetime] = None
@@ -86,6 +87,7 @@ class LoeschfristUpdate(BaseModel):
    responsible_person: Optional[str] = None
    release_process: Optional[str] = None
    linked_vvt_activity_ids: Optional[List[Any]] = None
+    linked_vendor_ids: Optional[List[Any]] = None
    status: Optional[str] = None
    last_review_date: Optional[datetime] = None
    next_review_date: Optional[datetime] = None
@@ -100,7 +102,7 @@ class StatusUpdate(BaseModel):
 # JSONB fields that need CAST
 JSONB_FIELDS = {
    "affected_groups", "data_categories", "legal_holds",
-    "storage_locations", "linked_vvt_activity_ids", "tags"
+    "storage_locations", "linked_vvt_activity_ids", "linked_vendor_ids", "tags"
 }


@@ -42,6 +42,7 @@ class ObligationCreate(BaseModel):
    priority: str = "medium"
    responsible: Optional[str] = None
    linked_systems: Optional[List[str]] = None
+    linked_vendor_ids: Optional[List[str]] = None
    assessment_id: Optional[str] = None
    rule_code: Optional[str] = None
    notes: Optional[str] = None
@@ -57,6 +58,7 @@ class ObligationUpdate(BaseModel):
    priority: Optional[str] = None
    responsible: Optional[str] = None
    linked_systems: Optional[List[str]] = None
+    linked_vendor_ids: Optional[List[str]] = None
    notes: Optional[str] = None


@@ -173,14 +175,15 @@ async def create_obligation(

    import json
    linked_systems = json.dumps(payload.linked_systems or [])
+    linked_vendor_ids = json.dumps(payload.linked_vendor_ids or [])

    row = db.execute(text("""
        INSERT INTO compliance_obligations
            (tenant_id, title, description, source, source_article, deadline,
-             status, priority, responsible, linked_systems, assessment_id, rule_code, notes)
+             status, priority, responsible, linked_systems, linked_vendor_ids, assessment_id, rule_code, notes)
        VALUES
            (:tenant_id, :title, :description, :source, :source_article, :deadline,
-             :status, :priority, :responsible, CAST(:linked_systems AS jsonb), :assessment_id, :rule_code, :notes)
+             :status, :priority, :responsible, CAST(:linked_systems AS jsonb), CAST(:linked_vendor_ids AS jsonb), :assessment_id, :rule_code, :notes)
        RETURNING *
    """), {
        "tenant_id": tenant_id,
@@ -193,6 +196,7 @@ async def create_obligation(
        "priority": payload.priority,
        "responsible": payload.responsible,
        "linked_systems": linked_systems,
+        "linked_vendor_ids": linked_vendor_ids,
        "assessment_id": payload.assessment_id,
        "rule_code": payload.rule_code,
        "notes": payload.notes,
@@ -235,6 +239,9 @@ async def update_obligation(
        if field == "linked_systems":
            updates["linked_systems"] = json.dumps(value or [])
            set_clauses.append("linked_systems = CAST(:linked_systems AS jsonb)")
+        elif field == "linked_vendor_ids":
+            updates["linked_vendor_ids"] = json.dumps(value or [])
+            set_clauses.append("linked_vendor_ids = CAST(:linked_vendor_ids AS jsonb)")
        else:
            updates[field] = value
            set_clauses.append(f"{field} = :{field}")
@@ -25,6 +25,7 @@ from sqlalchemy.orm import Session

 from classroom_engine.database import get_db

+from .audit_trail_utils import log_audit_trail
 from ..db import (
    ControlDomainEnum,
    ControlRepository,
@@ -312,8 +313,39 @@ async def get_control(
    svc: ControlExportService = Depends(get_ctrl_export_service),
 ) -> ControlResponse:
    """Get a specific control by control_id."""
-    with translate_domain_errors():
-        return svc.get_control(control_id)
+    repo = ControlRepository(db)
+    control = repo.get_by_control_id(control_id)
+    if not control:
+        raise HTTPException(status_code=404, detail=f"Control {control_id} not found")
+
+    evidence_repo = EvidenceRepository(db)
+    evidence = evidence_repo.get_by_control(control.id)
+
+    return ControlResponse(
+        id=control.id,
+        control_id=control.control_id,
+        domain=control.domain.value if control.domain else None,
+        control_type=control.control_type.value if control.control_type else None,
+        title=control.title,
+        description=control.description,
+        pass_criteria=control.pass_criteria,
+        implementation_guidance=control.implementation_guidance,
+        code_reference=control.code_reference,
+        documentation_url=control.documentation_url,
+        is_automated=control.is_automated,
+        automation_tool=control.automation_tool,
+        automation_config=control.automation_config,
+        owner=control.owner,
+        review_frequency_days=control.review_frequency_days,
+        status=control.status.value if control.status else None,
+        status_notes=control.status_notes,
+        status_justification=control.status_justification,
+        last_reviewed_at=control.last_reviewed_at,
+        next_review_at=control.next_review_at,
+        created_at=control.created_at,
+        updated_at=control.updated_at,
+        evidence_count=len(evidence),
+    )


@router.put(
@@ -325,8 +357,83 @@ async def update_control(
    svc: ControlExportService = Depends(get_ctrl_export_service),
 ) -> ControlResponse:
    """Update a control."""
-    with translate_domain_errors():
-        return svc.update_control(control_id, update)
+    repo = ControlRepository(db)
+    control = repo.get_by_control_id(control_id)
+    if not control:
+        raise HTTPException(status_code=404, detail=f"Control {control_id} not found")
+
+    update_data = update.model_dump(exclude_unset=True)
+
+    # Convert status string to enum and validate transition
+    if "status" in update_data:
+        try:
+            new_status_enum = ControlStatusEnum(update_data["status"])
+        except ValueError:
+            raise HTTPException(status_code=400, detail=f"Invalid status: {update_data['status']}")
+
+        # Validate status transition (Anti-Fake-Evidence)
+        from ..services.control_status_machine import validate_transition
+        current_status = control.status.value if control.status else "planned"
+        evidence_list = db.query(EvidenceDB).filter(EvidenceDB.control_id == control.id).all()
+        allowed, violations = validate_transition(
+            current_status=current_status,
+            new_status=update_data["status"],
+            evidence_list=evidence_list,
+            status_justification=update_data.get("status_justification") or update_data.get("status_notes"),
+        )
+        if not allowed:
+            raise HTTPException(
+                status_code=409,
+                detail={
+                    "error": "Status transition not allowed",
+                    "current_status": current_status,
+                    "requested_status": update_data["status"],
+                    "violations": violations,
+                }
+            )
+
+        update_data["status"] = new_status_enum
+
+    updated = repo.update(control.id, **update_data)
+    db.commit()
+
+    # Audit trail for status changes
+    new_status = updated.status.value if updated.status else None
+    if "status" in update.model_dump(exclude_unset=True) and current_status != new_status:
+        log_audit_trail(
+            db, "control", control.id, updated.control_id or updated.title,
+            "status_change",
+            performed_by=update.owner or "system",
+            field_changed="status",
+            old_value=current_status,
+            new_value=new_status,
+        )
+        db.commit()
+
+    return ControlResponse(
+        id=updated.id,
+        control_id=updated.control_id,
+        domain=updated.domain.value if updated.domain else None,
+        control_type=updated.control_type.value if updated.control_type else None,
+        title=updated.title,
+        description=updated.description,
+        pass_criteria=updated.pass_criteria,
+        implementation_guidance=updated.implementation_guidance,
+        code_reference=updated.code_reference,
+        documentation_url=updated.documentation_url,
+        is_automated=updated.is_automated,
+        automation_tool=updated.automation_tool,
+        automation_config=updated.automation_config,
+        owner=updated.owner,
+        review_frequency_days=updated.review_frequency_days,
+        status=updated.status.value if updated.status else None,
+        status_notes=updated.status_notes,
+        status_justification=updated.status_justification,
+        last_reviewed_at=updated.last_reviewed_at,
+        next_review_at=updated.next_review_at,
+        created_at=updated.created_at,
+        updated_at=updated.updated_at,
+    )


@router.put(
@@ -339,8 +446,43 @@ async def review_control(
    svc: ControlExportService = Depends(get_ctrl_export_service),
 ) -> ControlResponse:
    """Mark a control as reviewed with new status."""
-    with translate_domain_errors():
-        return svc.review_control(control_id, review)
+    repo = ControlRepository(db)
+    control = repo.get_by_control_id(control_id)
+    if not control:
+        raise HTTPException(status_code=404, detail=f"Control {control_id} not found")
+
+    try:
+        status_enum = ControlStatusEnum(review.status)
+    except ValueError:
+        raise HTTPException(status_code=400, detail=f"Invalid status: {review.status}")
+
+    updated = repo.mark_reviewed(control.id, status_enum, review.status_notes)
+    db.commit()
+
+    return ControlResponse(
+        id=updated.id,
+        control_id=updated.control_id,
+        domain=updated.domain.value if updated.domain else None,
+        control_type=updated.control_type.value if updated.control_type else None,
+        title=updated.title,
+        description=updated.description,
+        pass_criteria=updated.pass_criteria,
+        implementation_guidance=updated.implementation_guidance,
+        code_reference=updated.code_reference,
+        documentation_url=updated.documentation_url,
+        is_automated=updated.is_automated,
+        automation_tool=updated.automation_tool,
+        automation_config=updated.automation_config,
+        owner=updated.owner,
+        review_frequency_days=updated.review_frequency_days,
+        status=updated.status.value if updated.status else None,
+        status_notes=updated.status_notes,
+        status_justification=updated.status_justification,
+        last_reviewed_at=updated.last_reviewed_at,
+        next_review_at=updated.next_review_at,
+        created_at=updated.created_at,
+        updated_at=updated.updated_at,
+    )


@router.get(
@@ -22,7 +22,9 @@ import uuid
 from datetime import datetime, timezone
 from typing import Any

-from fastapi import APIRouter, File, Form, HTTPException, UploadFile
+import httpx
+from fastapi import APIRouter, File, Form, UploadFile, HTTPException
+from pydantic import BaseModel
 from sqlalchemy import text

 from database import SessionLocal  # re-exported below for legacy test patches
@@ -96,15 +98,13 @@ async def scan_dependencies(
    db = SessionLocal()
    try:
        db.execute(
-            text(
-                "INSERT INTO compliance_screenings "
-                "(id, tenant_id, status, sbom_format, sbom_version, "
-                "total_components, total_issues, critical_issues, high_issues, "
-                "medium_issues, low_issues, sbom_data, started_at, completed_at) "
-                "VALUES (:id, :tenant_id, 'completed', 'CycloneDX', '1.5', "
-                ":total_components, :total_issues, :critical, :high, :medium, :low, "
-                ":sbom_data::jsonb, :started_at, :completed_at)"
-            ),
+            text("""INSERT INTO compliance_screenings
+               (id, tenant_id, status, sbom_format, sbom_version,
+                total_components, total_issues, critical_issues, high_issues, medium_issues, low_issues,
+                sbom_data, started_at, completed_at)
+               VALUES (:id, :tenant_id, 'completed', 'CycloneDX', '1.5',
+                       :total_components, :total_issues, :critical, :high, :medium, :low,
+                       :sbom_data::jsonb, :started_at, :completed_at)"""),
            {
                "id": screening_id,
                "tenant_id": tenant_id,
@@ -121,13 +121,11 @@ async def scan_dependencies(
        )
        for issue in issues:
            db.execute(
-                text(
-                    "INSERT INTO compliance_security_issues "
-                    "(id, screening_id, severity, title, description, cve, cvss, "
-                    "affected_component, affected_version, fixed_in, remediation, status) "
-                    "VALUES (:id, :screening_id, :severity, :title, :description, :cve, :cvss, "
-                    ":component, :version, :fixed_in, :remediation, :status)"
-                ),
+                text("""INSERT INTO compliance_security_issues
+                   (id, screening_id, severity, title, description, cve, cvss,
+                    affected_component, affected_version, fixed_in, remediation, status)
+                   VALUES (:id, :screening_id, :severity, :title, :description, :cve, :cvss,
+                           :component, :version, :fixed_in, :remediation, :status)"""),
                {
                    "id": issue["id"],
                    "screening_id": screening_id,
@@ -214,8 +212,77 @@ async def get_screening(screening_id: str) -> ScreeningResponse:
    """Get a screening result by ID."""
    db = SessionLocal()
    try:
-        with translate_domain_errors():
-            return ScreeningService(db).get_screening(screening_id)
+        result = db.execute(
+            text("""SELECT id, status, sbom_format, sbom_version,
+                      total_components, total_issues, critical_issues, high_issues,
+                      medium_issues, low_issues, sbom_data, started_at, completed_at
+               FROM compliance_screenings WHERE id = :id"""),
+            {"id": screening_id},
+        )
+        row = result.fetchone()
+        if not row:
+            raise HTTPException(status_code=404, detail="Screening not found")
+
+        # Fetch issues
+        issues_result = db.execute(
+            text("""SELECT id, severity, title, description, cve, cvss,
+                      affected_component, affected_version, fixed_in, remediation, status
+               FROM compliance_security_issues WHERE screening_id = :id"""),
+            {"id": screening_id},
+        )
+        issues_rows = issues_result.fetchall()
+
+        issues = [
+            SecurityIssueResponse(
+                id=str(r[0]), severity=r[1], title=r[2], description=r[3],
+                cve=r[4], cvss=r[5], affected_component=r[6],
+                affected_version=r[7], fixed_in=r[8], remediation=r[9], status=r[10],
+            )
+            for r in issues_rows
+        ]
+
+        # Reconstruct components from SBOM data
+        sbom_data = row[10] or {}
+        components = []
+        comp_vulns: dict[str, list[dict]] = {}
+        for issue in issues:
+            if issue.affected_component not in comp_vulns:
+                comp_vulns[issue.affected_component] = []
+            comp_vulns[issue.affected_component].append({
+                "id": issue.cve or issue.id,
+                "cve": issue.cve,
+                "severity": issue.severity,
+                "title": issue.title,
+                "cvss": issue.cvss,
+                "fixedIn": issue.fixed_in,
+            })
+
+        for sc in sbom_data.get("components", []):
+            components.append(SBOMComponentResponse(
+                name=sc["name"],
+                version=sc["version"],
+                type=sc.get("type", "library"),
+                purl=sc.get("purl", ""),
+                licenses=sc.get("licenses", []),
+                vulnerabilities=comp_vulns.get(sc["name"], []),
+            ))
+
+        return ScreeningResponse(
+            id=str(row[0]),
+            status=row[1],
+            sbom_format=row[2] or "CycloneDX",
+            sbom_version=row[3] or "1.5",
+            total_components=row[4] or 0,
+            total_issues=row[5] or 0,
+            critical_issues=row[6] or 0,
+            high_issues=row[7] or 0,
+            medium_issues=row[8] or 0,
+            low_issues=row[9] or 0,
+            components=components,
+            issues=issues,
+            started_at=str(row[11]) if row[11] else None,
+            completed_at=str(row[12]) if row[12] else None,
+        )
    finally:
        db.close()

@@ -225,8 +292,33 @@ async def list_screenings(tenant_id: str = "default") -> ScreeningListResponse:
    """List all screenings for a tenant."""
    db = SessionLocal()
    try:
-        with translate_domain_errors():
-            return ScreeningService(db).list_screenings(tenant_id)
+        result = db.execute(
+            text("""SELECT id, status, total_components, total_issues,
+                      critical_issues, high_issues, medium_issues, low_issues,
+                      started_at, completed_at, created_at
+               FROM compliance_screenings
+               WHERE tenant_id = :tenant_id
+               ORDER BY created_at DESC"""),
+            {"tenant_id": tenant_id},
+        )
+        rows = result.fetchall()
+        screenings = [
+            {
+                "id": str(r[0]),
+                "status": r[1],
+                "total_components": r[2],
+                "total_issues": r[3],
+                "critical_issues": r[4],
+                "high_issues": r[5],
+                "medium_issues": r[6],
+                "low_issues": r[7],
+                "started_at": str(r[8]) if r[8] else None,
+                "completed_at": str(r[9]) if r[9] else None,
+                "created_at": str(r[10]),
+            }
+            for r in rows
+        ]
+        return ScreeningListResponse(screenings=screenings, total=len(screenings))
    finally:
        db.close()

@@ -0,0 +1,537 @@
+"""
+TOM ↔ Canonical Control Mapping Routes.
+
+Three-layer architecture:
+  TOM Measures (~88, audit-level) → Mapping Bridge → Canonical Controls (10,000+)
+
+Endpoints:
+  POST /v1/tom-mappings/sync         — Sync canonical controls for company profile
+  GET  /v1/tom-mappings              — List all mappings for tenant/project
+  GET  /v1/tom-mappings/by-tom/{code} — Mappings for a specific TOM control
+  GET  /v1/tom-mappings/stats        — Coverage statistics
+  POST /v1/tom-mappings/manual       — Manually add a mapping
+  DELETE /v1/tom-mappings/{id}       — Remove a mapping
+"""
+
+from __future__ import annotations
+
+import hashlib
+import json
+import logging
+from typing import Any, Optional
+
+from fastapi import APIRouter, HTTPException, Query, Header
+from pydantic import BaseModel
+from sqlalchemy import text
+
+from database import SessionLocal
+
+logger = logging.getLogger(__name__)
+router = APIRouter(prefix="/tom-mappings", tags=["tom-control-mappings"])
+
+
+# =============================================================================
+# TOM CATEGORY → CANONICAL CATEGORY MAPPING
+# =============================================================================
+
+# Maps 13 TOM control categories to canonical_control_categories
+# Each TOM category maps to 1-3 canonical categories for broad coverage
+TOM_TO_CANONICAL_CATEGORIES: dict[str, list[str]] = {
+    "ACCESS_CONTROL":       ["authentication", "identity", "physical"],
+    "ADMISSION_CONTROL":    ["authentication", "identity", "system"],
+    "ACCESS_AUTHORIZATION": ["authentication", "identity"],
+    "TRANSFER_CONTROL":     ["network", "data_protection", "encryption"],
+    "INPUT_CONTROL":        ["application", "data_protection"],
+    "ORDER_CONTROL":        ["supply_chain", "compliance"],
+    "AVAILABILITY":         ["continuity", "system"],
+    "SEPARATION":           ["network", "data_protection"],
+    "ENCRYPTION":           ["encryption"],
+    "PSEUDONYMIZATION":     ["data_protection", "encryption"],
+    "RESILIENCE":           ["continuity", "system"],
+    "RECOVERY":             ["continuity"],
+    "REVIEW":               ["compliance", "governance", "risk"],
+}
+
+
+# =============================================================================
+# REQUEST / RESPONSE MODELS
+# =============================================================================
+
+class SyncRequest(BaseModel):
+    """Trigger a sync of canonical controls to TOM measures."""
+    industry: Optional[str] = None
+    company_size: Optional[str] = None
+    force: bool = False
+
+
+class ManualMappingRequest(BaseModel):
+    """Manually add a canonical control to a TOM measure."""
+    tom_control_code: str
+    tom_category: str
+    canonical_control_id: str
+    canonical_control_code: str
+    canonical_category: Optional[str] = None
+    relevance_score: float = 1.0
+
+
+# =============================================================================
+# HELPERS
+# =============================================================================
+
+def _get_tenant_id(x_tenant_id: Optional[str]) -> str:
+    """Extract tenant ID from header."""
+    if not x_tenant_id:
+        raise HTTPException(status_code=400, detail="X-Tenant-ID header required")
+    return x_tenant_id
+
+
+def _compute_profile_hash(industry: Optional[str], company_size: Optional[str]) -> str:
+    """Compute a hash from profile parameters for change detection."""
+    data = json.dumps({"industry": industry, "company_size": company_size}, sort_keys=True)
+    return hashlib.sha256(data.encode()).hexdigest()[:16]
+
+
+def _mapping_row_to_dict(r) -> dict[str, Any]:
+    """Convert a mapping row to API response dict."""
+    return {
+        "id": str(r.id),
+        "tenant_id": str(r.tenant_id),
+        "project_id": str(r.project_id) if r.project_id else None,
+        "tom_control_code": r.tom_control_code,
+        "tom_category": r.tom_category,
+        "canonical_control_id": str(r.canonical_control_id),
+        "canonical_control_code": r.canonical_control_code,
+        "canonical_category": r.canonical_category,
+        "mapping_type": r.mapping_type,
+        "relevance_score": float(r.relevance_score) if r.relevance_score else 1.0,
+        "created_at": r.created_at.isoformat() if r.created_at else None,
+    }
+
+
+# =============================================================================
+# SYNC ENDPOINT
+# =============================================================================
+
+@router.post("/sync")
+async def sync_mappings(
+    body: SyncRequest,
+    x_tenant_id: Optional[str] = Header(None, alias="X-Tenant-ID"),
+    project_id: Optional[str] = Query(None),
+):
+    """
+    Sync canonical controls to TOM measures based on company profile.
+
+    Algorithm:
+    1. Compute profile hash → skip if unchanged (unless force=True)
+    2. For each TOM category, find matching canonical controls by:
+       - Category mapping (TOM category → canonical categories)
+       - Industry filter (applicable_industries JSONB containment)
+       - Company size filter (applicable_company_size JSONB containment)
+       - Only approved + customer_visible controls
+    3. Delete old auto-mappings, insert new ones
+    4. Update sync state
+    """
+    tenant_id = _get_tenant_id(x_tenant_id)
+    profile_hash = _compute_profile_hash(body.industry, body.company_size)
+
+    with SessionLocal() as db:
+        # Check if sync is needed (profile unchanged)
+        if not body.force:
+            existing = db.execute(
+                text("""
+                    SELECT profile_hash FROM tom_control_sync_state
+                    WHERE tenant_id = :tid AND (project_id = :pid OR (project_id IS NULL AND :pid IS NULL))
+                """),
+                {"tid": tenant_id, "pid": project_id},
+            ).fetchone()
+            if existing and existing.profile_hash == profile_hash:
+                return {
+                    "status": "unchanged",
+                    "message": "Profile unchanged since last sync",
+                    "profile_hash": profile_hash,
+                }
+
+        # Delete old auto-mappings for this tenant+project
+        db.execute(
+            text("""
+                DELETE FROM tom_control_mappings
+                WHERE tenant_id = :tid
+                  AND (project_id = :pid OR (project_id IS NULL AND :pid IS NULL))
+                  AND mapping_type = 'auto'
+            """),
+            {"tid": tenant_id, "pid": project_id},
+        )
+
+        total_mappings = 0
+        canonical_ids_matched = set()
+        tom_codes_covered = set()
+
+        # For each TOM category, find matching canonical controls
+        for tom_category, canonical_categories in TOM_TO_CANONICAL_CATEGORIES.items():
+            # Build JSONB containment query for categories
+            cat_conditions = " OR ".join(
+                f"category = :cat_{i}" for i in range(len(canonical_categories))
+            )
+            cat_params = {f"cat_{i}": c for i, c in enumerate(canonical_categories)}
+
+            # Build industry filter
+            industry_filter = ""
+            if body.industry:
+                industry_filter = """
+                    AND (
+                        applicable_industries IS NULL
+                        OR applicable_industries @> '"all"'::jsonb
+                        OR applicable_industries @> (:industry)::jsonb
+                    )
+                """
+                cat_params["industry"] = json.dumps([body.industry])
+
+            # Build company size filter
+            size_filter = ""
+            if body.company_size:
+                size_filter = """
+                    AND (
+                        applicable_company_size IS NULL
+                        OR applicable_company_size @> '"all"'::jsonb
+                        OR applicable_company_size @> (:csize)::jsonb
+                    )
+                """
+                cat_params["csize"] = json.dumps([body.company_size])
+
+            query = f"""
+                SELECT id, control_id, category
+                FROM canonical_controls
+                WHERE ({cat_conditions})
+                  AND release_state = 'approved'
+                  AND customer_visible = true
+                  {industry_filter}
+                  {size_filter}
+                ORDER BY control_id
+            """
+
+            rows = db.execute(text(query), cat_params).fetchall()
+
+            # Find TOM control codes in this category (query the frontend library
+            # codes; we use the category prefix pattern from the loader)
+            # TOM codes follow pattern: TOM-XX-NN where XX is category abbreviation
+            # We insert one mapping per canonical control per TOM category
+            for row in rows:
+                db.execute(
+                    text("""
+                        INSERT INTO tom_control_mappings (
+                            tenant_id, project_id, tom_control_code, tom_category,
+                            canonical_control_id, canonical_control_code, canonical_category,
+                            mapping_type, relevance_score
+                        ) VALUES (
+                            :tid, :pid, :tom_cat, :tom_cat,
+                            :cc_id, :cc_code, :cc_category,
+                            'auto', 1.00
+                        )
+                        ON CONFLICT (tenant_id, project_id, tom_control_code, canonical_control_id)
+                        DO NOTHING
+                    """),
+                    {
+                        "tid": tenant_id,
+                        "pid": project_id,
+                        "tom_cat": tom_category,
+                        "cc_id": str(row.id),
+                        "cc_code": row.control_id,
+                        "cc_category": row.category,
+                    },
+                )
+                total_mappings += 1
+                canonical_ids_matched.add(str(row.id))
+                tom_codes_covered.add(tom_category)
+
+        # Upsert sync state
+        db.execute(
+            text("""
+                INSERT INTO tom_control_sync_state (
+                    tenant_id, project_id, profile_hash,
+                    total_mappings, canonical_controls_matched, tom_controls_covered,
+                    last_synced_at
+                ) VALUES (
+                    :tid, :pid, :hash,
+                    :total, :matched, :covered,
+                    NOW()
+                )
+                ON CONFLICT (tenant_id, project_id)
+                DO UPDATE SET
+                    profile_hash = :hash,
+                    total_mappings = :total,
+                    canonical_controls_matched = :matched,
+                    tom_controls_covered = :covered,
+                    last_synced_at = NOW()
+            """),
+            {
+                "tid": tenant_id,
+                "pid": project_id,
+                "hash": profile_hash,
+                "total": total_mappings,
+                "matched": len(canonical_ids_matched),
+                "covered": len(tom_codes_covered),
+            },
+        )
+
+        db.commit()
+
+    return {
+        "status": "synced",
+        "profile_hash": profile_hash,
+        "total_mappings": total_mappings,
+        "canonical_controls_matched": len(canonical_ids_matched),
+        "tom_categories_covered": len(tom_codes_covered),
+    }
+
+
+# =============================================================================
+# LIST MAPPINGS
+# =============================================================================
+
+@router.get("")
+async def list_mappings(
+    x_tenant_id: Optional[str] = Header(None, alias="X-Tenant-ID"),
+    project_id: Optional[str] = Query(None),
+    tom_category: Optional[str] = Query(None),
+    mapping_type: Optional[str] = Query(None),
+    limit: int = Query(500, ge=1, le=5000),
+    offset: int = Query(0, ge=0),
+):
+    """List all TOM ↔ canonical control mappings for tenant/project."""
+    tenant_id = _get_tenant_id(x_tenant_id)
+
+    query = """
+        SELECT m.*, cc.title as canonical_title, cc.severity as canonical_severity
+        FROM tom_control_mappings m
+        LEFT JOIN canonical_controls cc ON cc.id = m.canonical_control_id
+        WHERE m.tenant_id = :tid
+          AND (m.project_id = :pid OR (m.project_id IS NULL AND :pid IS NULL))
+    """
+    params: dict[str, Any] = {"tid": tenant_id, "pid": project_id}
+
+    if tom_category:
+        query += " AND m.tom_category = :tcat"
+        params["tcat"] = tom_category
+    if mapping_type:
+        query += " AND m.mapping_type = :mtype"
+        params["mtype"] = mapping_type
+
+    query += " ORDER BY m.tom_category, m.canonical_control_code"
+    query += " LIMIT :lim OFFSET :off"
+    params["lim"] = limit
+    params["off"] = offset
+
+    count_query = """
+        SELECT count(*) FROM tom_control_mappings
+        WHERE tenant_id = :tid
+          AND (project_id = :pid OR (project_id IS NULL AND :pid IS NULL))
+    """
+    count_params: dict[str, Any] = {"tid": tenant_id, "pid": project_id}
+    if tom_category:
+        count_query += " AND tom_category = :tcat"
+        count_params["tcat"] = tom_category
+
+    with SessionLocal() as db:
+        rows = db.execute(text(query), params).fetchall()
+        total = db.execute(text(count_query), count_params).scalar()
+
+    mappings = []
+    for r in rows:
+        d = _mapping_row_to_dict(r)
+        d["canonical_title"] = getattr(r, "canonical_title", None)
+        d["canonical_severity"] = getattr(r, "canonical_severity", None)
+        mappings.append(d)
+
+    return {"mappings": mappings, "total": total}
+
+
+# =============================================================================
+# MAPPINGS BY TOM CONTROL
+# =============================================================================
+
+@router.get("/by-tom/{tom_code}")
+async def get_mappings_by_tom(
+    tom_code: str,
+    x_tenant_id: Optional[str] = Header(None, alias="X-Tenant-ID"),
+    project_id: Optional[str] = Query(None),
+):
+    """Get all canonical controls mapped to a specific TOM control code or category."""
+    tenant_id = _get_tenant_id(x_tenant_id)
+
+    with SessionLocal() as db:
+        rows = db.execute(
+            text("""
+                SELECT m.*, cc.title as canonical_title, cc.severity as canonical_severity,
+                       cc.objective as canonical_objective
+                FROM tom_control_mappings m
+                LEFT JOIN canonical_controls cc ON cc.id = m.canonical_control_id
+                WHERE m.tenant_id = :tid
+                  AND (m.project_id = :pid OR (m.project_id IS NULL AND :pid IS NULL))
+                  AND (m.tom_control_code = :code OR m.tom_category = :code)
+                ORDER BY m.canonical_control_code
+            """),
+            {"tid": tenant_id, "pid": project_id, "code": tom_code},
+        ).fetchall()
+
+    mappings = []
+    for r in rows:
+        d = _mapping_row_to_dict(r)
+        d["canonical_title"] = getattr(r, "canonical_title", None)
+        d["canonical_severity"] = getattr(r, "canonical_severity", None)
+        d["canonical_objective"] = getattr(r, "canonical_objective", None)
+        mappings.append(d)
+
+    return {"tom_code": tom_code, "mappings": mappings, "total": len(mappings)}
+
+
+# =============================================================================
+# STATS
+# =============================================================================
+
+@router.get("/stats")
+async def get_mapping_stats(
+    x_tenant_id: Optional[str] = Header(None, alias="X-Tenant-ID"),
+    project_id: Optional[str] = Query(None),
+):
+    """Coverage statistics for TOM ↔ canonical control mappings."""
+    tenant_id = _get_tenant_id(x_tenant_id)
+
+    with SessionLocal() as db:
+        # Sync state
+        sync_state = db.execute(
+            text("""
+                SELECT * FROM tom_control_sync_state
+                WHERE tenant_id = :tid
+                  AND (project_id = :pid OR (project_id IS NULL AND :pid IS NULL))
+            """),
+            {"tid": tenant_id, "pid": project_id},
+        ).fetchone()
+
+        # Per-category breakdown
+        category_stats = db.execute(
+            text("""
+                SELECT tom_category,
+                       count(*) as total_mappings,
+                       count(DISTINCT canonical_control_id) as unique_controls,
+                       count(*) FILTER (WHERE mapping_type = 'auto') as auto_count,
+                       count(*) FILTER (WHERE mapping_type = 'manual') as manual_count
+                FROM tom_control_mappings
+                WHERE tenant_id = :tid
+                  AND (project_id = :pid OR (project_id IS NULL AND :pid IS NULL))
+                GROUP BY tom_category
+                ORDER BY tom_category
+            """),
+            {"tid": tenant_id, "pid": project_id},
+        ).fetchall()
+
+        # Total canonical controls in DB (approved + visible)
+        total_canonical = db.execute(
+            text("""
+                SELECT count(*) FROM canonical_controls
+                WHERE release_state = 'approved' AND customer_visible = true
+            """)
+        ).scalar()
+
+    return {
+        "sync_state": {
+            "profile_hash": sync_state.profile_hash if sync_state else None,
+            "total_mappings": sync_state.total_mappings if sync_state else 0,
+            "canonical_controls_matched": sync_state.canonical_controls_matched if sync_state else 0,
+            "tom_controls_covered": sync_state.tom_controls_covered if sync_state else 0,
+            "last_synced_at": sync_state.last_synced_at.isoformat() if sync_state and sync_state.last_synced_at else None,
+        },
+        "category_breakdown": [
+            {
+                "tom_category": r.tom_category,
+                "total_mappings": r.total_mappings,
+                "unique_controls": r.unique_controls,
+                "auto_count": r.auto_count,
+                "manual_count": r.manual_count,
+            }
+            for r in category_stats
+        ],
+        "total_canonical_controls_available": total_canonical or 0,
+    }
+
+
+# =============================================================================
+# MANUAL MAPPING
+# =============================================================================
+
+@router.post("/manual", status_code=201)
+async def add_manual_mapping(
+    body: ManualMappingRequest,
+    x_tenant_id: Optional[str] = Header(None, alias="X-Tenant-ID"),
+    project_id: Optional[str] = Query(None),
+):
+    """Manually add a canonical control to a TOM measure."""
+    tenant_id = _get_tenant_id(x_tenant_id)
+
+    with SessionLocal() as db:
+        # Verify canonical control exists
+        cc = db.execute(
+            text("SELECT id, control_id, category FROM canonical_controls WHERE id = CAST(:cid AS uuid)"),
+            {"cid": body.canonical_control_id},
+        ).fetchone()
+        if not cc:
+            raise HTTPException(status_code=404, detail="Canonical control not found")
+
+        try:
+            row = db.execute(
+                text("""
+                    INSERT INTO tom_control_mappings (
+                        tenant_id, project_id, tom_control_code, tom_category,
+                        canonical_control_id, canonical_control_code, canonical_category,
+                        mapping_type, relevance_score
+                    ) VALUES (
+                        :tid, :pid, :tom_code, :tom_cat,
+                        CAST(:cc_id AS uuid), :cc_code, :cc_category,
+                        'manual', :score
+                    )
+                    RETURNING *
+                """),
+                {
+                    "tid": tenant_id,
+                    "pid": project_id,
+                    "tom_code": body.tom_control_code,
+                    "tom_cat": body.tom_category,
+                    "cc_id": body.canonical_control_id,
+                    "cc_code": body.canonical_control_code,
+                    "cc_category": body.canonical_category or cc.category,
+                    "score": body.relevance_score,
+                },
+            ).fetchone()
+            db.commit()
+        except Exception as e:
+            if "unique" in str(e).lower() or "duplicate" in str(e).lower():
+                raise HTTPException(status_code=409, detail="Mapping already exists")
+            raise
+
+    return _mapping_row_to_dict(row)
+
+
+# =============================================================================
+# DELETE MAPPING
+# =============================================================================
+
+@router.delete("/{mapping_id}", status_code=204)
+async def delete_mapping(
+    mapping_id: str,
+    x_tenant_id: Optional[str] = Header(None, alias="X-Tenant-ID"),
+):
+    """Remove a mapping (manual or auto)."""
+    tenant_id = _get_tenant_id(x_tenant_id)
+
+    with SessionLocal() as db:
+        result = db.execute(
+            text("""
+                DELETE FROM tom_control_mappings
+                WHERE id = CAST(:mid AS uuid) AND tenant_id = :tid
+            """),
+            {"mid": mapping_id, "tid": tenant_id},
+        )
+        if result.rowcount == 0:
+            raise HTTPException(status_code=404, detail="Mapping not found")
+        db.commit()
+
+    return None
@@ -0,0 +1,427 @@
+"""
+FastAPI routes for VVT Master Libraries + Process Templates.
+
+Library endpoints (read-only, global):
+  GET /vvt/libraries                       — Overview: all library types + counts
+  GET /vvt/libraries/data-subjects         — Data subjects (filter: typical_for)
+  GET /vvt/libraries/data-categories       — Hierarchical (filter: parent_id, is_art9, flat)
+  GET /vvt/libraries/recipients            — Recipients (filter: type)
+  GET /vvt/libraries/legal-bases           — Legal bases (filter: is_art9, type)
+  GET /vvt/libraries/retention-rules       — Retention rules
+  GET /vvt/libraries/transfer-mechanisms   — Transfer mechanisms
+  GET /vvt/libraries/purposes              — Purposes (filter: typical_for)
+  GET /vvt/libraries/toms                  — TOMs (filter: category)
+
+Template endpoints:
+  GET  /vvt/templates                      — List templates (filter: business_function, search)
+  GET  /vvt/templates/{id}                 — Single template with resolved labels
+  POST /vvt/templates/{id}/instantiate     — Create VVT activity from template
+"""
+
+import logging
+import uuid
+from datetime import datetime
+from typing import Optional
+
+from fastapi import APIRouter, Depends, HTTPException, Query, Request
+from sqlalchemy.orm import Session
+
+from classroom_engine.database import get_db
+
+from ..db.vvt_library_models import (
+    VVTLibDataSubjectDB,
+    VVTLibDataCategoryDB,
+    VVTLibRecipientDB,
+    VVTLibLegalBasisDB,
+    VVTLibRetentionRuleDB,
+    VVTLibTransferMechanismDB,
+    VVTLibPurposeDB,
+    VVTLibTomDB,
+    VVTProcessTemplateDB,
+)
+from ..db.vvt_models import VVTActivityDB, VVTAuditLogDB
+from .tenant_utils import get_tenant_id
+
+logger = logging.getLogger(__name__)
+router = APIRouter(prefix="/vvt", tags=["compliance-vvt-libraries"])
+
+
+# ============================================================================
+# Helper: row → dict
+# ============================================================================
+
+def _row_to_dict(row, extra_fields=None):
+    """Generic row → dict for library items."""
+    d = {
+        "id": row.id,
+        "label_de": row.label_de,
+    }
+    if hasattr(row, 'description_de') and row.description_de:
+        d["description_de"] = row.description_de
+    if hasattr(row, 'sort_order'):
+        d["sort_order"] = row.sort_order
+    if extra_fields:
+        for f in extra_fields:
+            if hasattr(row, f):
+                val = getattr(row, f)
+                if val is not None:
+                    d[f] = val
+    return d
+
+
+# ============================================================================
+# Library Overview
+# ============================================================================
+
+@router.get("/libraries")
+async def get_libraries_overview(db: Session = Depends(get_db)):
+    """Overview of all library types with item counts."""
+    return {
+        "libraries": [
+            {"type": "data-subjects", "count": db.query(VVTLibDataSubjectDB).count()},
+            {"type": "data-categories", "count": db.query(VVTLibDataCategoryDB).count()},
+            {"type": "recipients", "count": db.query(VVTLibRecipientDB).count()},
+            {"type": "legal-bases", "count": db.query(VVTLibLegalBasisDB).count()},
+            {"type": "retention-rules", "count": db.query(VVTLibRetentionRuleDB).count()},
+            {"type": "transfer-mechanisms", "count": db.query(VVTLibTransferMechanismDB).count()},
+            {"type": "purposes", "count": db.query(VVTLibPurposeDB).count()},
+            {"type": "toms", "count": db.query(VVTLibTomDB).count()},
+        ]
+    }
+
+
+# ============================================================================
+# Data Subjects
+# ============================================================================
+
+@router.get("/libraries/data-subjects")
+async def list_data_subjects(
+    typical_for: Optional[str] = Query(None, description="Filter by business function"),
+    db: Session = Depends(get_db),
+):
+    query = db.query(VVTLibDataSubjectDB).order_by(VVTLibDataSubjectDB.sort_order)
+    rows = query.all()
+    items = [_row_to_dict(r, ["art9_relevant", "typical_for"]) for r in rows]
+    if typical_for:
+        items = [i for i in items if typical_for in (i.get("typical_for") or [])]
+    return items
+
+
+# ============================================================================
+# Data Categories (hierarchical)
+# ============================================================================
+
+@router.get("/libraries/data-categories")
+async def list_data_categories(
+    flat: Optional[bool] = Query(False, description="Return flat list instead of tree"),
+    parent_id: Optional[str] = Query(None),
+    is_art9: Optional[bool] = Query(None),
+    db: Session = Depends(get_db),
+):
+    query = db.query(VVTLibDataCategoryDB).order_by(VVTLibDataCategoryDB.sort_order)
+    if parent_id is not None:
+        query = query.filter(VVTLibDataCategoryDB.parent_id == parent_id)
+    if is_art9 is not None:
+        query = query.filter(VVTLibDataCategoryDB.is_art9 == is_art9)
+    rows = query.all()
+
+    extra = ["parent_id", "is_art9", "is_art10", "risk_weight", "default_retention_rule", "default_legal_basis"]
+    items = [_row_to_dict(r, extra) for r in rows]
+
+    if flat or parent_id is not None or is_art9 is not None:
+        return items
+
+    # Build tree
+    by_parent: dict = {}
+    for item in items:
+        pid = item.get("parent_id")
+        by_parent.setdefault(pid, []).append(item)
+
+    tree = []
+    for item in by_parent.get(None, []):
+        children = by_parent.get(item["id"], [])
+        if children:
+            item["children"] = children
+        tree.append(item)
+    return tree
+
+
+# ============================================================================
+# Recipients
+# ============================================================================
+
+@router.get("/libraries/recipients")
+async def list_recipients(
+    type: Optional[str] = Query(None, description="INTERNAL, PROCESSOR, CONTROLLER, AUTHORITY"),
+    db: Session = Depends(get_db),
+):
+    query = db.query(VVTLibRecipientDB).order_by(VVTLibRecipientDB.sort_order)
+    if type:
+        query = query.filter(VVTLibRecipientDB.type == type)
+    rows = query.all()
+    return [_row_to_dict(r, ["type", "is_third_country", "country"]) for r in rows]
+
+
+# ============================================================================
+# Legal Bases
+# ============================================================================
+
+@router.get("/libraries/legal-bases")
+async def list_legal_bases(
+    is_art9: Optional[bool] = Query(None),
+    type: Optional[str] = Query(None),
+    db: Session = Depends(get_db),
+):
+    query = db.query(VVTLibLegalBasisDB).order_by(VVTLibLegalBasisDB.sort_order)
+    if is_art9 is not None:
+        query = query.filter(VVTLibLegalBasisDB.is_art9 == is_art9)
+    if type:
+        query = query.filter(VVTLibLegalBasisDB.type == type)
+    rows = query.all()
+    return [_row_to_dict(r, ["article", "type", "is_art9", "typical_national_law"]) for r in rows]
+
+
+# ============================================================================
+# Retention Rules
+# ============================================================================
+
+@router.get("/libraries/retention-rules")
+async def list_retention_rules(db: Session = Depends(get_db)):
+    rows = db.query(VVTLibRetentionRuleDB).order_by(VVTLibRetentionRuleDB.sort_order).all()
+    return [_row_to_dict(r, ["legal_basis", "duration", "duration_unit", "start_event", "deletion_procedure"]) for r in rows]
+
+
+# ============================================================================
+# Transfer Mechanisms
+# ============================================================================
+
+@router.get("/libraries/transfer-mechanisms")
+async def list_transfer_mechanisms(db: Session = Depends(get_db)):
+    rows = db.query(VVTLibTransferMechanismDB).order_by(VVTLibTransferMechanismDB.sort_order).all()
+    return [_row_to_dict(r, ["article", "requires_tia"]) for r in rows]
+
+
+# ============================================================================
+# Purposes
+# ============================================================================
+
+@router.get("/libraries/purposes")
+async def list_purposes(
+    typical_for: Optional[str] = Query(None),
+    db: Session = Depends(get_db),
+):
+    rows = db.query(VVTLibPurposeDB).order_by(VVTLibPurposeDB.sort_order).all()
+    items = [_row_to_dict(r, ["typical_legal_basis", "typical_for"]) for r in rows]
+    if typical_for:
+        items = [i for i in items if typical_for in (i.get("typical_for") or [])]
+    return items
+
+
+# ============================================================================
+# TOMs
+# ============================================================================
+
+@router.get("/libraries/toms")
+async def list_toms(
+    category: Optional[str] = Query(None),
+    db: Session = Depends(get_db),
+):
+    query = db.query(VVTLibTomDB).order_by(VVTLibTomDB.sort_order)
+    if category:
+        query = query.filter(VVTLibTomDB.category == category)
+    rows = query.all()
+    return [_row_to_dict(r, ["category", "art32_reference"]) for r in rows]
+
+
+# ============================================================================
+# Process Templates
+# ============================================================================
+
+def _template_to_dict(t: VVTProcessTemplateDB) -> dict:
+    return {
+        "id": t.id,
+        "name": t.name,
+        "description": t.description,
+        "business_function": t.business_function,
+        "purpose_refs": t.purpose_refs or [],
+        "legal_basis_refs": t.legal_basis_refs or [],
+        "data_subject_refs": t.data_subject_refs or [],
+        "data_category_refs": t.data_category_refs or [],
+        "recipient_refs": t.recipient_refs or [],
+        "tom_refs": t.tom_refs or [],
+        "transfer_mechanism_refs": t.transfer_mechanism_refs or [],
+        "retention_rule_ref": t.retention_rule_ref,
+        "typical_systems": t.typical_systems or [],
+        "protection_level": t.protection_level or "MEDIUM",
+        "dpia_required": t.dpia_required or False,
+        "risk_score": t.risk_score,
+        "tags": t.tags or [],
+        "is_system": t.is_system,
+        "sort_order": t.sort_order,
+    }
+
+
+def _resolve_labels(template_dict: dict, db: Session) -> dict:
+    """Resolve library IDs to labels within the template dict."""
+    resolvers = {
+        "purpose_refs": (VVTLibPurposeDB, "purpose_labels"),
+        "legal_basis_refs": (VVTLibLegalBasisDB, "legal_basis_labels"),
+        "data_subject_refs": (VVTLibDataSubjectDB, "data_subject_labels"),
+        "data_category_refs": (VVTLibDataCategoryDB, "data_category_labels"),
+        "recipient_refs": (VVTLibRecipientDB, "recipient_labels"),
+        "tom_refs": (VVTLibTomDB, "tom_labels"),
+        "transfer_mechanism_refs": (VVTLibTransferMechanismDB, "transfer_mechanism_labels"),
+    }
+    for refs_key, (model, labels_key) in resolvers.items():
+        ids = template_dict.get(refs_key) or []
+        if ids:
+            rows = db.query(model).filter(model.id.in_(ids)).all()
+            label_map = {r.id: r.label_de for r in rows}
+            template_dict[labels_key] = {rid: label_map.get(rid, rid) for rid in ids}
+
+    # Resolve single retention rule
+    rr = template_dict.get("retention_rule_ref")
+    if rr:
+        row = db.query(VVTLibRetentionRuleDB).filter(VVTLibRetentionRuleDB.id == rr).first()
+        if row:
+            template_dict["retention_rule_label"] = row.label_de
+
+    return template_dict
+
+
+@router.get("/templates")
+async def list_templates(
+    business_function: Optional[str] = Query(None),
+    search: Optional[str] = Query(None),
+    db: Session = Depends(get_db),
+):
+    """List process templates (system + tenant)."""
+    query = db.query(VVTProcessTemplateDB).order_by(VVTProcessTemplateDB.sort_order)
+    if business_function:
+        query = query.filter(VVTProcessTemplateDB.business_function == business_function)
+    if search:
+        term = f"%{search}%"
+        query = query.filter(
+            (VVTProcessTemplateDB.name.ilike(term)) |
+            (VVTProcessTemplateDB.description.ilike(term))
+        )
+    templates = query.all()
+    return [_template_to_dict(t) for t in templates]
+
+
+@router.get("/templates/{template_id}")
+async def get_template(
+    template_id: str,
+    db: Session = Depends(get_db),
+):
+    """Get a single template with resolved library labels."""
+    t = db.query(VVTProcessTemplateDB).filter(VVTProcessTemplateDB.id == template_id).first()
+    if not t:
+        raise HTTPException(status_code=404, detail=f"Template '{template_id}' not found")
+    result = _template_to_dict(t)
+    return _resolve_labels(result, db)
+
+
+@router.post("/templates/{template_id}/instantiate", status_code=201)
+async def instantiate_template(
+    template_id: str,
+    http_request: Request,
+    tid: str = Depends(get_tenant_id),
+    db: Session = Depends(get_db),
+):
+    """Create a new VVT activity from a process template."""
+    t = db.query(VVTProcessTemplateDB).filter(VVTProcessTemplateDB.id == template_id).first()
+    if not t:
+        raise HTTPException(status_code=404, detail=f"Template '{template_id}' not found")
+
+    # Generate unique VVT-ID
+    count = db.query(VVTActivityDB).filter(VVTActivityDB.tenant_id == tid).count()
+    vvt_id = f"VVT-{count + 1:04d}"
+
+    # Resolve library IDs to freetext labels for backward-compat fields
+    purpose_labels = _resolve_ids(db, VVTLibPurposeDB, t.purpose_refs or [])
+    legal_labels = _resolve_ids(db, VVTLibLegalBasisDB, t.legal_basis_refs or [])
+    subject_labels = _resolve_ids(db, VVTLibDataSubjectDB, t.data_subject_refs or [])
+    category_labels = _resolve_ids(db, VVTLibDataCategoryDB, t.data_category_refs or [])
+    recipient_labels = _resolve_ids(db, VVTLibRecipientDB, t.recipient_refs or [])
+
+    # Resolve retention rule
+    retention_period = {}
+    if t.retention_rule_ref:
+        rr = db.query(VVTLibRetentionRuleDB).filter(VVTLibRetentionRuleDB.id == t.retention_rule_ref).first()
+        if rr:
+            retention_period = {
+                "description": rr.label_de,
+                "legalBasis": rr.legal_basis or "",
+                "deletionProcedure": rr.deletion_procedure or "",
+                "duration": rr.duration,
+                "durationUnit": rr.duration_unit,
+            }
+
+    # Build structured TOMs from tom_refs
+    structured_toms = {"accessControl": [], "confidentiality": [], "integrity": [], "availability": [], "separation": []}
+    if t.tom_refs:
+        tom_rows = db.query(VVTLibTomDB).filter(VVTLibTomDB.id.in_(t.tom_refs)).all()
+        for tr in tom_rows:
+            cat = tr.category
+            if cat in structured_toms:
+                structured_toms[cat].append(tr.label_de)
+
+    act = VVTActivityDB(
+        tenant_id=tid,
+        vvt_id=vvt_id,
+        name=t.name,
+        description=t.description or "",
+        purposes=purpose_labels,
+        legal_bases=[{"type": lid, "description": lbl} for lid, lbl in zip(t.legal_basis_refs or [], legal_labels)],
+        data_subject_categories=subject_labels,
+        personal_data_categories=category_labels,
+        recipient_categories=[{"type": "unknown", "name": lbl} for lbl in recipient_labels],
+        retention_period=retention_period,
+        business_function=t.business_function,
+        systems=[{"systemId": s, "name": s} for s in (t.typical_systems or [])],
+        protection_level=t.protection_level or "MEDIUM",
+        dpia_required=t.dpia_required or False,
+        structured_toms=structured_toms,
+        status="DRAFT",
+        created_by=http_request.headers.get("X-User-ID", "system"),
+        # Library refs
+        purpose_refs=t.purpose_refs,
+        legal_basis_refs=t.legal_basis_refs,
+        data_subject_refs=t.data_subject_refs,
+        data_category_refs=t.data_category_refs,
+        recipient_refs=t.recipient_refs,
+        retention_rule_ref=t.retention_rule_ref,
+        transfer_mechanism_refs=t.transfer_mechanism_refs,
+        tom_refs=t.tom_refs,
+        source_template_id=t.id,
+        risk_score=t.risk_score,
+    )
+    db.add(act)
+    db.flush()
+
+    # Audit log
+    audit = VVTAuditLogDB(
+        tenant_id=tid,
+        action="CREATE",
+        entity_type="activity",
+        entity_id=act.id,
+        changed_by=http_request.headers.get("X-User-ID", "system"),
+        new_values={"vvt_id": vvt_id, "source_template_id": t.id, "name": t.name},
+    )
+    db.add(audit)
+    db.commit()
+    db.refresh(act)
+
+    # Return full response
+    from .vvt_routes import _activity_to_response
+    return _activity_to_response(act)
+
+
+def _resolve_ids(db: Session, model, ids: list) -> list:
+    """Resolve list of library IDs to list of label_de strings."""
+    if not ids:
+        return []
+    rows = db.query(model).filter(model.id.in_(ids)).all()
+    label_map = {r.id: r.label_de for r in rows}
+    return [label_map.get(i, i) for i in ids]
@@ -81,6 +81,54 @@ async def upsert_organization(
 # Activities
 # ============================================================================

+def _activity_to_response(act: VVTActivityDB) -> VVTActivityResponse:
+    return VVTActivityResponse(
+        id=str(act.id),
+        vvt_id=act.vvt_id,
+        name=act.name,
+        description=act.description,
+        purposes=act.purposes or [],
+        legal_bases=act.legal_bases or [],
+        data_subject_categories=act.data_subject_categories or [],
+        personal_data_categories=act.personal_data_categories or [],
+        recipient_categories=act.recipient_categories or [],
+        third_country_transfers=act.third_country_transfers or [],
+        retention_period=act.retention_period or {},
+        tom_description=act.tom_description,
+        business_function=act.business_function,
+        systems=act.systems or [],
+        deployment_model=act.deployment_model,
+        data_sources=act.data_sources or [],
+        data_flows=act.data_flows or [],
+        protection_level=act.protection_level or 'MEDIUM',
+        dpia_required=act.dpia_required or False,
+        structured_toms=act.structured_toms or {},
+        status=act.status or 'DRAFT',
+        responsible=act.responsible,
+        owner=act.owner,
+        last_reviewed_at=act.last_reviewed_at,
+        next_review_at=act.next_review_at,
+        created_by=act.created_by,
+        dsfa_id=str(act.dsfa_id) if act.dsfa_id else None,
+        # Library refs
+        purpose_refs=act.purpose_refs,
+        legal_basis_refs=act.legal_basis_refs,
+        data_subject_refs=act.data_subject_refs,
+        data_category_refs=act.data_category_refs,
+        recipient_refs=act.recipient_refs,
+        retention_rule_ref=act.retention_rule_ref,
+        transfer_mechanism_refs=act.transfer_mechanism_refs,
+        tom_refs=act.tom_refs,
+        source_template_id=act.source_template_id,
+        risk_score=act.risk_score,
+        linked_loeschfristen_ids=act.linked_loeschfristen_ids,
+        linked_tom_measure_ids=act.linked_tom_measure_ids,
+        art30_completeness=act.art30_completeness,
+        created_at=act.created_at,
+        updated_at=act.updated_at,
+    )
+
+
@router.get("/activities", response_model=List[VVTActivityResponse])
 async def list_activities(
    status: Optional[str] = Query(None),
@@ -145,6 +193,107 @@ async def delete_activity(
        return service.delete_activity(tid, activity_id)


+# ============================================================================
+# Art. 30 Completeness Check
+# ============================================================================
+
+@router.get("/activities/{activity_id}/completeness")
+async def get_activity_completeness(
+    activity_id: str,
+    tid: str = Depends(get_tenant_id),
+    db: Session = Depends(get_db),
+):
+    """Calculate Art. 30 completeness score for a VVT activity."""
+    act = db.query(VVTActivityDB).filter(
+        VVTActivityDB.id == activity_id,
+        VVTActivityDB.tenant_id == tid,
+    ).first()
+    if not act:
+        raise HTTPException(status_code=404, detail=f"Activity {activity_id} not found")
+    return _calculate_completeness(act)
+
+
+def _calculate_completeness(act: VVTActivityDB) -> dict:
+    """Calculate Art. 30 completeness — required fields per DSGVO Art. 30 Abs. 1."""
+    missing = []
+    warnings = []
+    total_checks = 10
+    passed = 0
+
+    # 1. Name/Zweck
+    if act.name:
+        passed += 1
+    else:
+        missing.append("name")
+
+    # 2. Verarbeitungszwecke
+    has_purposes = bool(act.purposes) or bool(act.purpose_refs)
+    if has_purposes:
+        passed += 1
+    else:
+        missing.append("purposes")
+
+    # 3. Rechtsgrundlage
+    has_legal = bool(act.legal_bases) or bool(act.legal_basis_refs)
+    if has_legal:
+        passed += 1
+    else:
+        missing.append("legal_bases")
+
+    # 4. Betroffenenkategorien
+    has_subjects = bool(act.data_subject_categories) or bool(act.data_subject_refs)
+    if has_subjects:
+        passed += 1
+    else:
+        missing.append("data_subjects")
+
+    # 5. Datenkategorien
+    has_categories = bool(act.personal_data_categories) or bool(act.data_category_refs)
+    if has_categories:
+        passed += 1
+    else:
+        missing.append("data_categories")
+
+    # 6. Empfaenger
+    has_recipients = bool(act.recipient_categories) or bool(act.recipient_refs)
+    if has_recipients:
+        passed += 1
+    else:
+        missing.append("recipients")
+
+    # 7. Drittland-Uebermittlung (checked but not strictly required)
+    passed += 1  # always passes — no transfer is valid state
+
+    # 8. Loeschfristen
+    has_retention = bool(act.retention_period and act.retention_period.get('description')) or bool(act.retention_rule_ref)
+    if has_retention:
+        passed += 1
+    else:
+        missing.append("retention_period")
+
+    # 9. TOM-Beschreibung
+    has_tom = bool(act.tom_description) or bool(act.tom_refs) or bool(act.structured_toms)
+    if has_tom:
+        passed += 1
+    else:
+        missing.append("tom_description")
+
+    # 10. Verantwortlicher
+    if act.responsible:
+        passed += 1
+    else:
+        missing.append("responsible")
+
+    # Warnings
+    if act.dpia_required and not act.dsfa_id:
+        warnings.append("dpia_required_but_no_dsfa_linked")
+    if act.third_country_transfers and not act.transfer_mechanism_refs:
+        warnings.append("third_country_transfer_without_mechanism")
+
+    score = int((passed / total_checks) * 100)
+    return {"score": score, "missing": missing, "warnings": warnings, "passed": passed, "total": total_checks}
+
+
 # ============================================================================
 # Audit Log
 # ============================================================================
@@ -0,0 +1,443 @@
+{
+  "framework_id": "CSA_CCM",
+  "display_name": "Cloud Security Alliance CCM v4",
+  "license": {
+    "type": "restricted",
+    "rag_allowed": false,
+    "use_as_metadata": true,
+    "note": "Abstrahierte Struktur — keine Originaltexte uebernommen"
+  },
+  "domains": [
+    {
+      "domain_id": "AIS",
+      "title": "Application and Interface Security",
+      "aliases": ["ais", "application and interface security", "anwendungssicherheit", "schnittstellensicherheit"],
+      "keywords": ["application", "anwendung", "interface", "schnittstelle", "api", "web", "eingabevalidierung"],
+      "subcontrols": [
+        {
+          "subcontrol_id": "AIS-01",
+          "title": "Application Security Policy",
+          "statement": "Sicherheitsrichtlinien fuer Anwendungsentwicklung und Schnittstellenmanagement muessen definiert und angewendet werden.",
+          "keywords": ["policy", "richtlinie", "entwicklung"],
+          "action_hint": "document",
+          "object_hint": "Anwendungssicherheitsrichtlinie",
+          "object_class": "policy"
+        },
+        {
+          "subcontrol_id": "AIS-02",
+          "title": "Application Security Design",
+          "statement": "Sicherheitsanforderungen muessen in den Entwurf jeder Anwendung integriert werden.",
+          "keywords": ["design", "entwurf", "security by design"],
+          "action_hint": "implement",
+          "object_hint": "Sicherheitsanforderungen im Anwendungsentwurf",
+          "object_class": "process"
+        },
+        {
+          "subcontrol_id": "AIS-03",
+          "title": "Application Security Testing",
+          "statement": "Anwendungen muessen vor dem Deployment und regelmaessig auf Sicherheitsschwachstellen getestet werden.",
+          "keywords": ["testing", "test", "sast", "dast", "penetration"],
+          "action_hint": "test",
+          "object_hint": "Anwendungssicherheitstests",
+          "object_class": "process"
+        },
+        {
+          "subcontrol_id": "AIS-04",
+          "title": "Secure Development Practices",
+          "statement": "Sichere Entwicklungspraktiken (Code Review, Pair Programming, SAST) muessen fuer alle Entwicklungsprojekte gelten.",
+          "keywords": ["development", "entwicklung", "code review", "sast", "praktiken"],
+          "action_hint": "implement",
+          "object_hint": "Sichere Entwicklungspraktiken",
+          "object_class": "process"
+        },
+        {
+          "subcontrol_id": "AIS-05",
+          "title": "API Security",
+          "statement": "APIs muessen authentifiziert, autorisiert und gegen Missbrauch geschuetzt werden.",
+          "keywords": ["api", "schnittstelle", "authentifizierung", "rate limiting"],
+          "action_hint": "implement",
+          "object_hint": "API-Sicherheitskontrollen",
+          "object_class": "interface"
+        },
+        {
+          "subcontrol_id": "AIS-06",
+          "title": "Automated Application Security Testing",
+          "statement": "Automatisierte Sicherheitstests muessen in die CI/CD-Pipeline integriert werden.",
+          "keywords": ["automatisiert", "ci/cd", "pipeline", "sast", "dast"],
+          "action_hint": "configure",
+          "object_hint": "Automatisierte Sicherheitstests in CI/CD",
+          "object_class": "configuration"
+        }
+      ]
+    },
+    {
+      "domain_id": "BCR",
+      "title": "Business Continuity and Resilience",
+      "aliases": ["bcr", "business continuity", "resilience", "geschaeftskontinuitaet", "resilienz"],
+      "keywords": ["continuity", "kontinuitaet", "resilience", "resilienz", "disaster", "recovery", "backup"],
+      "subcontrols": [
+        {
+          "subcontrol_id": "BCR-01",
+          "title": "Business Continuity Planning",
+          "statement": "Ein Geschaeftskontinuitaetsplan muss erstellt, dokumentiert und regelmaessig getestet werden.",
+          "keywords": ["plan", "kontinuitaet", "geschaeft"],
+          "action_hint": "document",
+          "object_hint": "Geschaeftskontinuitaetsplan",
+          "object_class": "policy"
+        },
+        {
+          "subcontrol_id": "BCR-02",
+          "title": "Risk Assessment for BCM",
+          "statement": "Risikobewertungen muessen fuer geschaeftskritische Prozesse durchgefuehrt werden.",
+          "keywords": ["risiko", "bewertung", "kritisch"],
+          "action_hint": "assess",
+          "object_hint": "BCM-Risikobewertung",
+          "object_class": "risk_artifact"
+        },
+        {
+          "subcontrol_id": "BCR-03",
+          "title": "Backup and Recovery",
+          "statement": "Datensicherungen muessen regelmaessig erstellt und Wiederherstellungstests durchgefuehrt werden.",
+          "keywords": ["backup", "sicherung", "wiederherstellung", "recovery"],
+          "action_hint": "maintain",
+          "object_hint": "Datensicherung und Wiederherstellung",
+          "object_class": "technical_control"
+        },
+        {
+          "subcontrol_id": "BCR-04",
+          "title": "Disaster Recovery Planning",
+          "statement": "Ein Disaster-Recovery-Plan muss dokumentiert und jaehrlich getestet werden.",
+          "keywords": ["disaster", "recovery", "katastrophe"],
+          "action_hint": "document",
+          "object_hint": "Disaster-Recovery-Plan",
+          "object_class": "policy"
+        }
+      ]
+    },
+    {
+      "domain_id": "CCC",
+      "title": "Change Control and Configuration Management",
+      "aliases": ["ccc", "change control", "configuration management", "aenderungsmanagement", "konfigurationsmanagement"],
+      "keywords": ["change", "aenderung", "konfiguration", "configuration", "release", "deployment"],
+      "subcontrols": [
+        {
+          "subcontrol_id": "CCC-01",
+          "title": "Change Management Policy",
+          "statement": "Ein Aenderungsmanagement-Prozess muss definiert und fuer alle Aenderungen angewendet werden.",
+          "keywords": ["policy", "richtlinie", "aenderung"],
+          "action_hint": "document",
+          "object_hint": "Aenderungsmanagement-Richtlinie",
+          "object_class": "policy"
+        },
+        {
+          "subcontrol_id": "CCC-02",
+          "title": "Change Testing",
+          "statement": "Aenderungen muessen vor der Produktivsetzung getestet und genehmigt werden.",
+          "keywords": ["test", "genehmigung", "approval"],
+          "action_hint": "test",
+          "object_hint": "Aenderungstests",
+          "object_class": "process"
+        },
+        {
+          "subcontrol_id": "CCC-03",
+          "title": "Configuration Baseline",
+          "statement": "Basiskonfigurationen fuer alle Systeme muessen definiert und dokumentiert werden.",
+          "keywords": ["baseline", "basis", "standard"],
+          "action_hint": "define",
+          "object_hint": "Konfigurationsbaseline",
+          "object_class": "configuration"
+        }
+      ]
+    },
+    {
+      "domain_id": "CEK",
+      "title": "Cryptography, Encryption and Key Management",
+      "aliases": ["cek", "cryptography", "encryption", "key management", "kryptographie", "verschluesselung", "schluesselverwaltung"],
+      "keywords": ["kryptographie", "verschluesselung", "schluessel", "key", "encryption", "certificate", "zertifikat"],
+      "subcontrols": [
+        {
+          "subcontrol_id": "CEK-01",
+          "title": "Encryption Policy",
+          "statement": "Verschluesselungsrichtlinien muessen definiert werden, die Algorithmen, Schluessellaengen und Einsatzbereiche festlegen.",
+          "keywords": ["policy", "richtlinie", "algorithmus"],
+          "action_hint": "document",
+          "object_hint": "Verschluesselungsrichtlinie",
+          "object_class": "policy"
+        },
+        {
+          "subcontrol_id": "CEK-02",
+          "title": "Key Management",
+          "statement": "Kryptographische Schluessel muessen ueber ihren Lebenszyklus sicher verwaltet werden.",
+          "keywords": ["key", "schluessel", "management", "lebenszyklus"],
+          "action_hint": "maintain",
+          "object_hint": "Schluesselverwaltung",
+          "object_class": "cryptographic_control"
+        },
+        {
+          "subcontrol_id": "CEK-03",
+          "title": "Data Encryption",
+          "statement": "Sensible Daten muessen bei Speicherung und Uebertragung verschluesselt werden.",
+          "keywords": ["data", "daten", "speicherung", "uebertragung"],
+          "action_hint": "encrypt",
+          "object_hint": "Datenverschluesselung",
+          "object_class": "cryptographic_control"
+        }
+      ]
+    },
+    {
+      "domain_id": "DSP",
+      "title": "Data Security and Privacy",
+      "aliases": ["dsp", "data security", "privacy", "datensicherheit", "datenschutz"],
+      "keywords": ["datenschutz", "datensicherheit", "privacy", "data security", "pii", "personenbezogen", "dsgvo"],
+      "subcontrols": [
+        {
+          "subcontrol_id": "DSP-01",
+          "title": "Data Classification",
+          "statement": "Daten muessen nach Sensibilitaet klassifiziert und entsprechend geschuetzt werden.",
+          "keywords": ["klassifizierung", "sensibilitaet", "classification"],
+          "action_hint": "define",
+          "object_hint": "Datenklassifizierung",
+          "object_class": "data"
+        },
+        {
+          "subcontrol_id": "DSP-02",
+          "title": "Data Inventory",
+          "statement": "Ein Dateninventar muss gefuehrt werden, das alle Verarbeitungen personenbezogener Daten dokumentiert.",
+          "keywords": ["inventar", "verzeichnis", "verarbeitung", "vvt"],
+          "action_hint": "maintain",
+          "object_hint": "Dateninventar",
+          "object_class": "register"
+        },
+        {
+          "subcontrol_id": "DSP-03",
+          "title": "Data Retention and Deletion",
+          "statement": "Aufbewahrungsfristen muessen definiert und Daten nach Ablauf sicher geloescht werden.",
+          "keywords": ["retention", "aufbewahrung", "loeschung", "frist"],
+          "action_hint": "delete",
+          "object_hint": "Datenloeschung nach Frist",
+          "object_class": "data"
+        },
+        {
+          "subcontrol_id": "DSP-04",
+          "title": "Privacy Impact Assessment",
+          "statement": "Datenschutz-Folgenabschaetzungen muessen fuer risikoreiche Verarbeitungen durchgefuehrt werden.",
+          "keywords": ["dsfa", "pia", "folgenabschaetzung", "impact"],
+          "action_hint": "assess",
+          "object_hint": "Datenschutz-Folgenabschaetzung",
+          "object_class": "risk_artifact"
+        },
+        {
+          "subcontrol_id": "DSP-05",
+          "title": "Data Subject Rights",
+          "statement": "Verfahren zur Bearbeitung von Betroffenenrechten muessen implementiert werden.",
+          "keywords": ["betroffenenrechte", "auskunft", "loeschung", "data subject"],
+          "action_hint": "implement",
+          "object_hint": "Betroffenenrechte-Verfahren",
+          "object_class": "process"
+        }
+      ]
+    },
+    {
+      "domain_id": "GRC",
+      "title": "Governance, Risk and Compliance",
+      "aliases": ["grc", "governance", "risk", "compliance", "risikomanagement"],
+      "keywords": ["governance", "risiko", "compliance", "management", "policy", "richtlinie"],
+      "subcontrols": [
+        {
+          "subcontrol_id": "GRC-01",
+          "title": "Information Security Program",
+          "statement": "Ein umfassendes Informationssicherheitsprogramm muss etabliert und aufrechterhalten werden.",
+          "keywords": ["programm", "sicherheit", "information"],
+          "action_hint": "maintain",
+          "object_hint": "Informationssicherheitsprogramm",
+          "object_class": "policy"
+        },
+        {
+          "subcontrol_id": "GRC-02",
+          "title": "Risk Management Program",
+          "statement": "Ein Risikomanagement-Programm muss implementiert werden, das Identifikation, Bewertung und Behandlung umfasst.",
+          "keywords": ["risiko", "management", "bewertung", "behandlung"],
+          "action_hint": "implement",
+          "object_hint": "Risikomanagement-Programm",
+          "object_class": "process"
+        },
+        {
+          "subcontrol_id": "GRC-03",
+          "title": "Compliance Monitoring",
+          "statement": "Die Einhaltung regulatorischer und vertraglicher Anforderungen muss ueberwacht werden.",
+          "keywords": ["compliance", "einhaltung", "regulatorisch", "ueberwachung"],
+          "action_hint": "monitor",
+          "object_hint": "Compliance-Ueberwachung",
+          "object_class": "process"
+        }
+      ]
+    },
+    {
+      "domain_id": "IAM",
+      "title": "Identity and Access Management",
+      "aliases": ["iam", "identity", "access management", "identitaetsmanagement", "zugriffsverwaltung"],
+      "keywords": ["identitaet", "zugriff", "identity", "access", "authentifizierung", "autorisierung", "sso"],
+      "subcontrols": [
+        {
+          "subcontrol_id": "IAM-01",
+          "title": "Identity and Access Policy",
+          "statement": "Identitaets- und Zugriffsmanagement-Richtlinien muessen definiert werden.",
+          "keywords": ["policy", "richtlinie"],
+          "action_hint": "document",
+          "object_hint": "IAM-Richtlinie",
+          "object_class": "policy"
+        },
+        {
+          "subcontrol_id": "IAM-02",
+          "title": "Strong Authentication",
+          "statement": "Starke Authentifizierung (MFA) muss fuer administrative und sicherheitskritische Zugriffe gefordert werden.",
+          "keywords": ["mfa", "stark", "authentifizierung", "admin"],
+          "action_hint": "implement",
+          "object_hint": "Starke Authentifizierung",
+          "object_class": "technical_control"
+        },
+        {
+          "subcontrol_id": "IAM-03",
+          "title": "Identity Lifecycle Management",
+          "statement": "Identitaeten muessen ueber ihren gesamten Lebenszyklus verwaltet werden.",
+          "keywords": ["lifecycle", "lebenszyklus", "onboarding", "offboarding"],
+          "action_hint": "maintain",
+          "object_hint": "Identitaets-Lebenszyklus",
+          "object_class": "account"
+        },
+        {
+          "subcontrol_id": "IAM-04",
+          "title": "Access Review",
+          "statement": "Zugriffsrechte muessen regelmaessig ueberprueft und ueberschuessige Rechte entzogen werden.",
+          "keywords": ["review", "ueberpruefen", "rechte", "rezertifizierung"],
+          "action_hint": "review",
+          "object_hint": "Zugriffsrechte-Review",
+          "object_class": "access_control"
+        }
+      ]
+    },
+    {
+      "domain_id": "LOG",
+      "title": "Logging and Monitoring",
+      "aliases": ["log", "logging", "monitoring", "protokollierung", "ueberwachung"],
+      "keywords": ["logging", "monitoring", "protokollierung", "ueberwachung", "siem", "alarm"],
+      "subcontrols": [
+        {
+          "subcontrol_id": "LOG-01",
+          "title": "Logging Policy",
+          "statement": "Protokollierungs-Richtlinien muessen definiert werden, die Umfang und Aufbewahrung festlegen.",
+          "keywords": ["policy", "richtlinie", "umfang", "aufbewahrung"],
+          "action_hint": "document",
+          "object_hint": "Protokollierungsrichtlinie",
+          "object_class": "policy"
+        },
+        {
+          "subcontrol_id": "LOG-02",
+          "title": "Security Event Logging",
+          "statement": "Sicherheitsrelevante Ereignisse muessen erfasst und zentral gespeichert werden.",
+          "keywords": ["event", "ereignis", "sicherheit", "zentral"],
+          "action_hint": "configure",
+          "object_hint": "Sicherheits-Event-Logging",
+          "object_class": "configuration"
+        },
+        {
+          "subcontrol_id": "LOG-03",
+          "title": "Monitoring and Alerting",
+          "statement": "Sicherheitsrelevante Logs muessen ueberwacht und bei Anomalien Alarme ausgeloest werden.",
+          "keywords": ["monitoring", "alerting", "alarm", "anomalie"],
+          "action_hint": "monitor",
+          "object_hint": "Log-Ueberwachung und Alarmierung",
+          "object_class": "technical_control"
+        }
+      ]
+    },
+    {
+      "domain_id": "SEF",
+      "title": "Security Incident Management",
+      "aliases": ["sef", "security incident", "incident management", "vorfallmanagement", "sicherheitsvorfall"],
+      "keywords": ["vorfall", "incident", "sicherheitsvorfall", "reaktion", "response", "meldung"],
+      "subcontrols": [
+        {
+          "subcontrol_id": "SEF-01",
+          "title": "Incident Management Policy",
+          "statement": "Ein Vorfallmanagement-Prozess muss definiert, dokumentiert und getestet werden.",
+          "keywords": ["policy", "richtlinie", "prozess"],
+          "action_hint": "document",
+          "object_hint": "Vorfallmanagement-Richtlinie",
+          "object_class": "policy"
+        },
+        {
+          "subcontrol_id": "SEF-02",
+          "title": "Incident Response Team",
+          "statement": "Ein Incident-Response-Team muss benannt und geschult werden.",
+          "keywords": ["team", "response", "schulung"],
+          "action_hint": "define",
+          "object_hint": "Incident-Response-Team",
+          "object_class": "role"
+        },
+        {
+          "subcontrol_id": "SEF-03",
+          "title": "Incident Reporting",
+          "statement": "Sicherheitsvorfaelle muessen innerhalb definierter Fristen an zustaendige Stellen gemeldet werden.",
+          "keywords": ["reporting", "meldung", "frist", "behoerde"],
+          "action_hint": "report",
+          "object_hint": "Vorfallmeldung",
+          "object_class": "incident"
+        },
+        {
+          "subcontrol_id": "SEF-04",
+          "title": "Incident Lessons Learned",
+          "statement": "Nach jedem Vorfall muss eine Nachbereitung mit Lessons Learned durchgefuehrt werden.",
+          "keywords": ["lessons learned", "nachbereitung", "verbesserung"],
+          "action_hint": "review",
+          "object_hint": "Vorfall-Nachbereitung",
+          "object_class": "record"
+        }
+      ]
+    },
+    {
+      "domain_id": "TVM",
+      "title": "Threat and Vulnerability Management",
+      "aliases": ["tvm", "threat", "vulnerability", "schwachstelle", "bedrohung", "schwachstellenmanagement"],
+      "keywords": ["schwachstelle", "vulnerability", "threat", "bedrohung", "patch", "scan"],
+      "subcontrols": [
+        {
+          "subcontrol_id": "TVM-01",
+          "title": "Vulnerability Management Policy",
+          "statement": "Schwachstellenmanagement-Richtlinien muessen definiert und umgesetzt werden.",
+          "keywords": ["policy", "richtlinie"],
+          "action_hint": "document",
+          "object_hint": "Schwachstellenmanagement-Richtlinie",
+          "object_class": "policy"
+        },
+        {
+          "subcontrol_id": "TVM-02",
+          "title": "Vulnerability Scanning",
+          "statement": "Systeme muessen regelmaessig auf Schwachstellen gescannt werden.",
+          "keywords": ["scan", "scanning", "regelmaessig"],
+          "action_hint": "test",
+          "object_hint": "Schwachstellenscan",
+          "object_class": "system"
+        },
+        {
+          "subcontrol_id": "TVM-03",
+          "title": "Vulnerability Remediation",
+          "statement": "Erkannte Schwachstellen muessen priorisiert und innerhalb definierter Fristen behoben werden.",
+          "keywords": ["remediation", "behebung", "frist", "priorisierung"],
+          "action_hint": "remediate",
+          "object_hint": "Schwachstellenbehebung",
+          "object_class": "system"
+        },
+        {
+          "subcontrol_id": "TVM-04",
+          "title": "Penetration Testing",
+          "statement": "Regelmaessige Penetrationstests muessen durchgefuehrt werden.",
+          "keywords": ["penetration", "pentest", "test"],
+          "action_hint": "test",
+          "object_hint": "Penetrationstest",
+          "object_class": "system"
+        }
+      ]
+    }
+  ]
+}
@@ -0,0 +1,514 @@
+{
+  "framework_id": "NIST_SP800_53",
+  "display_name": "NIST SP 800-53 Rev. 5",
+  "license": {
+    "type": "public_domain",
+    "rag_allowed": true,
+    "use_as_metadata": true
+  },
+  "domains": [
+    {
+      "domain_id": "AC",
+      "title": "Access Control",
+      "aliases": ["access control", "zugriffskontrolle", "zugriffssteuerung"],
+      "keywords": ["access", "zugriff", "berechtigung", "authorization", "autorisierung"],
+      "subcontrols": [
+        {
+          "subcontrol_id": "AC-1",
+          "title": "Access Control Policy and Procedures",
+          "statement": "Zugriffskontrollrichtlinien und -verfahren muessen definiert, dokumentiert und regelmaessig ueberprueft werden.",
+          "keywords": ["policy", "richtlinie", "verfahren", "procedures"],
+          "action_hint": "document",
+          "object_hint": "Zugriffskontrollrichtlinie",
+          "object_class": "policy"
+        },
+        {
+          "subcontrol_id": "AC-2",
+          "title": "Account Management",
+          "statement": "Benutzerkonten muessen ueber ihren gesamten Lebenszyklus verwaltet werden: Erstellung, Aktivierung, Aenderung, Deaktivierung und Loeschung.",
+          "keywords": ["account", "konto", "benutzer", "lifecycle", "lebenszyklus"],
+          "action_hint": "maintain",
+          "object_hint": "Benutzerkontenverwaltung",
+          "object_class": "account"
+        },
+        {
+          "subcontrol_id": "AC-3",
+          "title": "Access Enforcement",
+          "statement": "Der Zugriff auf Systemressourcen muss gemaess der definierten Zugriffskontrollrichtlinie durchgesetzt werden.",
+          "keywords": ["enforcement", "durchsetzung", "ressourcen", "system"],
+          "action_hint": "restrict_access",
+          "object_hint": "Zugriffsdurchsetzung",
+          "object_class": "access_control"
+        },
+        {
+          "subcontrol_id": "AC-5",
+          "title": "Separation of Duties",
+          "statement": "Aufgabentrennung muss definiert und durchgesetzt werden, um Interessenkonflikte und Missbrauch zu verhindern.",
+          "keywords": ["separation", "trennung", "duties", "aufgaben", "funktionstrennung"],
+          "action_hint": "define",
+          "object_hint": "Aufgabentrennung",
+          "object_class": "role"
+        },
+        {
+          "subcontrol_id": "AC-6",
+          "title": "Least Privilege",
+          "statement": "Zugriffsrechte muessen nach dem Prinzip der minimalen Rechte vergeben werden.",
+          "keywords": ["least privilege", "minimal", "rechte", "privileg"],
+          "action_hint": "restrict_access",
+          "object_hint": "Minimale Rechtevergabe",
+          "object_class": "access_control"
+        },
+        {
+          "subcontrol_id": "AC-7",
+          "title": "Unsuccessful Logon Attempts",
+          "statement": "Fehlgeschlagene Anmeldeversuche muessen begrenzt und ueberwacht werden.",
+          "keywords": ["logon", "anmeldung", "fehlgeschlagen", "sperre", "lockout"],
+          "action_hint": "monitor",
+          "object_hint": "Anmeldeversuchsueberwachung",
+          "object_class": "technical_control"
+        },
+        {
+          "subcontrol_id": "AC-17",
+          "title": "Remote Access",
+          "statement": "Fernzugriff muss autorisiert, ueberwacht und verschluesselt werden.",
+          "keywords": ["remote", "fern", "vpn", "fernzugriff"],
+          "action_hint": "configure",
+          "object_hint": "Fernzugriffskonfiguration",
+          "object_class": "technical_control"
+        }
+      ]
+    },
+    {
+      "domain_id": "AU",
+      "title": "Audit and Accountability",
+      "aliases": ["audit", "protokollierung", "accountability", "rechenschaftspflicht"],
+      "keywords": ["audit", "log", "protokoll", "nachvollziehbarkeit", "logging"],
+      "subcontrols": [
+        {
+          "subcontrol_id": "AU-1",
+          "title": "Audit Policy and Procedures",
+          "statement": "Audit- und Protokollierungsrichtlinien muessen definiert und regelmaessig ueberprueft werden.",
+          "keywords": ["policy", "richtlinie", "audit"],
+          "action_hint": "document",
+          "object_hint": "Auditrichtlinie",
+          "object_class": "policy"
+        },
+        {
+          "subcontrol_id": "AU-2",
+          "title": "Event Logging",
+          "statement": "Sicherheitsrelevante Ereignisse muessen identifiziert und protokolliert werden.",
+          "keywords": ["event", "ereignis", "logging", "protokollierung"],
+          "action_hint": "configure",
+          "object_hint": "Ereignisprotokollierung",
+          "object_class": "configuration"
+        },
+        {
+          "subcontrol_id": "AU-3",
+          "title": "Content of Audit Records",
+          "statement": "Audit-Eintraege muessen ausreichende Informationen enthalten: Zeitstempel, Quelle, Ergebnis, Identitaet.",
+          "keywords": ["content", "inhalt", "record", "eintrag"],
+          "action_hint": "define",
+          "object_hint": "Audit-Eintragsformat",
+          "object_class": "record"
+        },
+        {
+          "subcontrol_id": "AU-6",
+          "title": "Audit Record Review and Reporting",
+          "statement": "Audit-Eintraege muessen regelmaessig ueberprueft und bei Anomalien berichtet werden.",
+          "keywords": ["review", "ueberpruefen", "reporting", "anomalie"],
+          "action_hint": "review",
+          "object_hint": "Audit-Ueberpruefung",
+          "object_class": "record"
+        },
+        {
+          "subcontrol_id": "AU-9",
+          "title": "Protection of Audit Information",
+          "statement": "Audit-Daten muessen vor unbefugtem Zugriff, Aenderung und Loeschung geschuetzt werden.",
+          "keywords": ["schutz", "protection", "integritaet", "integrity"],
+          "action_hint": "implement",
+          "object_hint": "Audit-Datenschutz",
+          "object_class": "technical_control"
+        }
+      ]
+    },
+    {
+      "domain_id": "AT",
+      "title": "Awareness and Training",
+      "aliases": ["awareness", "training", "schulung", "sensibilisierung"],
+      "keywords": ["training", "schulung", "awareness", "sensibilisierung", "weiterbildung"],
+      "subcontrols": [
+        {
+          "subcontrol_id": "AT-1",
+          "title": "Policy and Procedures",
+          "statement": "Schulungs- und Sensibilisierungsrichtlinien muessen definiert und regelmaessig aktualisiert werden.",
+          "keywords": ["policy", "richtlinie"],
+          "action_hint": "document",
+          "object_hint": "Schulungsrichtlinie",
+          "object_class": "policy"
+        },
+        {
+          "subcontrol_id": "AT-2",
+          "title": "Literacy Training and Awareness",
+          "statement": "Alle Mitarbeiter muessen regelmaessig Sicherheitsschulungen erhalten.",
+          "keywords": ["mitarbeiter", "schulung", "sicherheit"],
+          "action_hint": "train",
+          "object_hint": "Sicherheitsschulung",
+          "object_class": "training"
+        },
+        {
+          "subcontrol_id": "AT-3",
+          "title": "Role-Based Training",
+          "statement": "Rollenbasierte Sicherheitsschulungen muessen fuer Mitarbeiter mit besonderen Sicherheitsaufgaben durchgefuehrt werden.",
+          "keywords": ["rollenbasiert", "role-based", "speziell"],
+          "action_hint": "train",
+          "object_hint": "Rollenbasierte Sicherheitsschulung",
+          "object_class": "training"
+        }
+      ]
+    },
+    {
+      "domain_id": "CM",
+      "title": "Configuration Management",
+      "aliases": ["configuration management", "konfigurationsmanagement", "konfiguration"],
+      "keywords": ["konfiguration", "configuration", "baseline", "haertung", "hardening"],
+      "subcontrols": [
+        {
+          "subcontrol_id": "CM-1",
+          "title": "Policy and Procedures",
+          "statement": "Konfigurationsmanagement-Richtlinien muessen dokumentiert und gepflegt werden.",
+          "keywords": ["policy", "richtlinie"],
+          "action_hint": "document",
+          "object_hint": "Konfigurationsmanagement-Richtlinie",
+          "object_class": "policy"
+        },
+        {
+          "subcontrol_id": "CM-2",
+          "title": "Baseline Configuration",
+          "statement": "Basiskonfigurationen fuer Systeme muessen definiert, dokumentiert und gepflegt werden.",
+          "keywords": ["baseline", "basis", "standard"],
+          "action_hint": "define",
+          "object_hint": "Basiskonfiguration",
+          "object_class": "configuration"
+        },
+        {
+          "subcontrol_id": "CM-6",
+          "title": "Configuration Settings",
+          "statement": "Sicherheitsrelevante Konfigurationseinstellungen muessen definiert und durchgesetzt werden.",
+          "keywords": ["settings", "einstellungen", "sicherheit"],
+          "action_hint": "configure",
+          "object_hint": "Sicherheitskonfiguration",
+          "object_class": "configuration"
+        },
+        {
+          "subcontrol_id": "CM-7",
+          "title": "Least Functionality",
+          "statement": "Systeme muessen so konfiguriert werden, dass nur notwendige Funktionen aktiv sind.",
+          "keywords": ["least functionality", "minimal", "dienste", "ports"],
+          "action_hint": "configure",
+          "object_hint": "Minimalkonfiguration",
+          "object_class": "configuration"
+        },
+        {
+          "subcontrol_id": "CM-8",
+          "title": "System Component Inventory",
+          "statement": "Ein Inventar aller Systemkomponenten muss gefuehrt und aktuell gehalten werden.",
+          "keywords": ["inventar", "inventory", "komponenten", "assets"],
+          "action_hint": "maintain",
+          "object_hint": "Systemkomponenten-Inventar",
+          "object_class": "register"
+        }
+      ]
+    },
+    {
+      "domain_id": "IA",
+      "title": "Identification and Authentication",
+      "aliases": ["identification", "authentication", "identifikation", "authentifizierung"],
+      "keywords": ["authentifizierung", "identifikation", "identity", "passwort", "mfa", "credential"],
+      "subcontrols": [
+        {
+          "subcontrol_id": "IA-1",
+          "title": "Policy and Procedures",
+          "statement": "Identifikations- und Authentifizierungsrichtlinien muessen dokumentiert und regelmaessig ueberprueft werden.",
+          "keywords": ["policy", "richtlinie"],
+          "action_hint": "document",
+          "object_hint": "Authentifizierungsrichtlinie",
+          "object_class": "policy"
+        },
+        {
+          "subcontrol_id": "IA-2",
+          "title": "Identification and Authentication",
+          "statement": "Benutzer und Geraete muessen eindeutig identifiziert und authentifiziert werden.",
+          "keywords": ["benutzer", "geraete", "identifizierung"],
+          "action_hint": "implement",
+          "object_hint": "Benutzerauthentifizierung",
+          "object_class": "technical_control"
+        },
+        {
+          "subcontrol_id": "IA-2(1)",
+          "title": "Multi-Factor Authentication",
+          "statement": "Multi-Faktor-Authentifizierung muss fuer privilegierte Konten implementiert werden.",
+          "keywords": ["mfa", "multi-faktor", "zwei-faktor", "2fa"],
+          "action_hint": "implement",
+          "object_hint": "Multi-Faktor-Authentifizierung",
+          "object_class": "technical_control"
+        },
+        {
+          "subcontrol_id": "IA-5",
+          "title": "Authenticator Management",
+          "statement": "Authentifizierungsmittel (Passwoerter, Token, Zertifikate) muessen sicher verwaltet werden.",
+          "keywords": ["passwort", "token", "zertifikat", "credential"],
+          "action_hint": "maintain",
+          "object_hint": "Authentifizierungsmittel-Verwaltung",
+          "object_class": "technical_control"
+        }
+      ]
+    },
+    {
+      "domain_id": "IR",
+      "title": "Incident Response",
+      "aliases": ["incident response", "vorfallbehandlung", "vorfallreaktion", "incident management"],
+      "keywords": ["vorfall", "incident", "reaktion", "response", "breach", "sicherheitsvorfall"],
+      "subcontrols": [
+        {
+          "subcontrol_id": "IR-1",
+          "title": "Policy and Procedures",
+          "statement": "Vorfallreaktionsrichtlinien und -verfahren muessen definiert und regelmaessig aktualisiert werden.",
+          "keywords": ["policy", "richtlinie", "verfahren"],
+          "action_hint": "document",
+          "object_hint": "Vorfallreaktionsrichtlinie",
+          "object_class": "policy"
+        },
+        {
+          "subcontrol_id": "IR-2",
+          "title": "Incident Response Training",
+          "statement": "Mitarbeiter muessen regelmaessig in der Vorfallreaktion geschult werden.",
+          "keywords": ["training", "schulung"],
+          "action_hint": "train",
+          "object_hint": "Vorfallreaktionsschulung",
+          "object_class": "training"
+        },
+        {
+          "subcontrol_id": "IR-4",
+          "title": "Incident Handling",
+          "statement": "Ein strukturierter Prozess fuer die Vorfallbehandlung muss implementiert werden: Erkennung, Analyse, Eindaemmung, Behebung.",
+          "keywords": ["handling", "behandlung", "erkennung", "eindaemmung"],
+          "action_hint": "implement",
+          "object_hint": "Vorfallbehandlungsprozess",
+          "object_class": "process"
+        },
+        {
+          "subcontrol_id": "IR-5",
+          "title": "Incident Monitoring",
+          "statement": "Sicherheitsvorfaelle muessen kontinuierlich ueberwacht und verfolgt werden.",
+          "keywords": ["monitoring", "ueberwachung", "tracking"],
+          "action_hint": "monitor",
+          "object_hint": "Vorfallsueberwachung",
+          "object_class": "incident"
+        },
+        {
+          "subcontrol_id": "IR-6",
+          "title": "Incident Reporting",
+          "statement": "Sicherheitsvorfaelle muessen innerhalb definierter Fristen an die zustaendigen Stellen gemeldet werden.",
+          "keywords": ["reporting", "meldung", "melden", "frist"],
+          "action_hint": "report",
+          "object_hint": "Vorfallmeldung",
+          "object_class": "incident"
+        },
+        {
+          "subcontrol_id": "IR-8",
+          "title": "Incident Response Plan",
+          "statement": "Ein Vorfallreaktionsplan muss dokumentiert und regelmaessig getestet werden.",
+          "keywords": ["plan", "dokumentation", "test"],
+          "action_hint": "document",
+          "object_hint": "Vorfallreaktionsplan",
+          "object_class": "policy"
+        }
+      ]
+    },
+    {
+      "domain_id": "RA",
+      "title": "Risk Assessment",
+      "aliases": ["risk assessment", "risikobewertung", "risikoanalyse"],
+      "keywords": ["risiko", "risk", "bewertung", "assessment", "analyse", "bedrohung", "threat"],
+      "subcontrols": [
+        {
+          "subcontrol_id": "RA-1",
+          "title": "Policy and Procedures",
+          "statement": "Risikobewertungsrichtlinien muessen dokumentiert und regelmaessig aktualisiert werden.",
+          "keywords": ["policy", "richtlinie"],
+          "action_hint": "document",
+          "object_hint": "Risikobewertungsrichtlinie",
+          "object_class": "policy"
+        },
+        {
+          "subcontrol_id": "RA-3",
+          "title": "Risk Assessment",
+          "statement": "Regelmaessige Risikobewertungen muessen durchgefuehrt und dokumentiert werden.",
+          "keywords": ["bewertung", "assessment", "regelmaessig"],
+          "action_hint": "assess",
+          "object_hint": "Risikobewertung",
+          "object_class": "risk_artifact"
+        },
+        {
+          "subcontrol_id": "RA-5",
+          "title": "Vulnerability Monitoring and Scanning",
+          "statement": "Systeme muessen regelmaessig auf Schwachstellen gescannt und ueberwacht werden.",
+          "keywords": ["vulnerability", "schwachstelle", "scan", "monitoring"],
+          "action_hint": "monitor",
+          "object_hint": "Schwachstellenueberwachung",
+          "object_class": "system"
+        }
+      ]
+    },
+    {
+      "domain_id": "SC",
+      "title": "System and Communications Protection",
+      "aliases": ["system protection", "communications protection", "kommunikationsschutz", "systemschutz"],
+      "keywords": ["verschluesselung", "encryption", "tls", "netzwerk", "network", "kommunikation", "firewall"],
+      "subcontrols": [
+        {
+          "subcontrol_id": "SC-1",
+          "title": "Policy and Procedures",
+          "statement": "System- und Kommunikationsschutzrichtlinien muessen dokumentiert und aktuell gehalten werden.",
+          "keywords": ["policy", "richtlinie"],
+          "action_hint": "document",
+          "object_hint": "Kommunikationsschutzrichtlinie",
+          "object_class": "policy"
+        },
+        {
+          "subcontrol_id": "SC-7",
+          "title": "Boundary Protection",
+          "statement": "Netzwerkgrenzen muessen durch Firewall-Regeln und Zugangskontrollen geschuetzt werden.",
+          "keywords": ["boundary", "grenze", "firewall", "netzwerk"],
+          "action_hint": "implement",
+          "object_hint": "Netzwerkgrenzschutz",
+          "object_class": "technical_control"
+        },
+        {
+          "subcontrol_id": "SC-8",
+          "title": "Transmission Confidentiality and Integrity",
+          "statement": "Daten muessen bei der Uebertragung durch Verschluesselung geschuetzt werden.",
+          "keywords": ["transmission", "uebertragung", "verschluesselung", "tls"],
+          "action_hint": "encrypt",
+          "object_hint": "Uebertragungsverschluesselung",
+          "object_class": "cryptographic_control"
+        },
+        {
+          "subcontrol_id": "SC-12",
+          "title": "Cryptographic Key Establishment and Management",
+          "statement": "Kryptographische Schluessel muessen sicher erzeugt, verteilt, gespeichert und widerrufen werden.",
+          "keywords": ["key", "schluessel", "kryptographie", "management"],
+          "action_hint": "maintain",
+          "object_hint": "Schluesselverwaltung",
+          "object_class": "cryptographic_control"
+        },
+        {
+          "subcontrol_id": "SC-13",
+          "title": "Cryptographic Protection",
+          "statement": "Kryptographische Mechanismen muessen gemaess anerkannten Standards implementiert werden.",
+          "keywords": ["kryptographie", "verschluesselung", "standard"],
+          "action_hint": "implement",
+          "object_hint": "Kryptographischer Schutz",
+          "object_class": "cryptographic_control"
+        }
+      ]
+    },
+    {
+      "domain_id": "SI",
+      "title": "System and Information Integrity",
+      "aliases": ["system integrity", "information integrity", "systemintegritaet", "informationsintegritaet"],
+      "keywords": ["integritaet", "integrity", "malware", "patch", "flaw", "schwachstelle"],
+      "subcontrols": [
+        {
+          "subcontrol_id": "SI-1",
+          "title": "Policy and Procedures",
+          "statement": "System- und Informationsintegritaetsrichtlinien muessen dokumentiert und regelmaessig ueberprueft werden.",
+          "keywords": ["policy", "richtlinie"],
+          "action_hint": "document",
+          "object_hint": "Integritaetsrichtlinie",
+          "object_class": "policy"
+        },
+        {
+          "subcontrol_id": "SI-2",
+          "title": "Flaw Remediation",
+          "statement": "Bekannte Schwachstellen muessen innerhalb definierter Fristen behoben werden.",
+          "keywords": ["flaw", "schwachstelle", "patch", "behebung", "remediation"],
+          "action_hint": "remediate",
+          "object_hint": "Schwachstellenbehebung",
+          "object_class": "system"
+        },
+        {
+          "subcontrol_id": "SI-3",
+          "title": "Malicious Code Protection",
+          "statement": "Systeme muessen vor Schadsoftware geschuetzt werden durch Erkennung und Abwehrmechanismen.",
+          "keywords": ["malware", "schadsoftware", "antivirus", "erkennung"],
+          "action_hint": "implement",
+          "object_hint": "Schadsoftwareschutz",
+          "object_class": "technical_control"
+        },
+        {
+          "subcontrol_id": "SI-4",
+          "title": "System Monitoring",
+          "statement": "Systeme muessen kontinuierlich auf Sicherheitsereignisse und Anomalien ueberwacht werden.",
+          "keywords": ["monitoring", "ueberwachung", "anomalie", "siem"],
+          "action_hint": "monitor",
+          "object_hint": "Systemueberwachung",
+          "object_class": "system"
+        },
+        {
+          "subcontrol_id": "SI-5",
+          "title": "Security Alerts and Advisories",
+          "statement": "Sicherheitswarnungen muessen empfangen, bewertet und darauf reagiert werden.",
+          "keywords": ["alert", "warnung", "advisory", "cve"],
+          "action_hint": "monitor",
+          "object_hint": "Sicherheitswarnungen",
+          "object_class": "incident"
+        }
+      ]
+    },
+    {
+      "domain_id": "SA",
+      "title": "System and Services Acquisition",
+      "aliases": ["system acquisition", "services acquisition", "systembeschaffung", "secure development"],
+      "keywords": ["beschaffung", "acquisition", "entwicklung", "development", "lieferkette", "supply chain"],
+      "subcontrols": [
+        {
+          "subcontrol_id": "SA-1",
+          "title": "Policy and Procedures",
+          "statement": "Beschaffungsrichtlinien mit Sicherheitsanforderungen muessen dokumentiert werden.",
+          "keywords": ["policy", "richtlinie", "beschaffung"],
+          "action_hint": "document",
+          "object_hint": "Beschaffungsrichtlinie",
+          "object_class": "policy"
+        },
+        {
+          "subcontrol_id": "SA-8",
+          "title": "Security and Privacy Engineering Principles",
+          "statement": "Sicherheits- und Datenschutzprinzipien muessen in die Systementwicklung integriert werden.",
+          "keywords": ["engineering", "development", "prinzipien", "design"],
+          "action_hint": "implement",
+          "object_hint": "Security-by-Design-Prinzipien",
+          "object_class": "process"
+        },
+        {
+          "subcontrol_id": "SA-11",
+          "title": "Developer Testing and Evaluation",
+          "statement": "Entwickler muessen Sicherheitstests und Code-Reviews durchfuehren.",
+          "keywords": ["testing", "test", "code review", "evaluation"],
+          "action_hint": "test",
+          "object_hint": "Entwickler-Sicherheitstests",
+          "object_class": "process"
+        },
+        {
+          "subcontrol_id": "SA-12",
+          "title": "Supply Chain Protection",
+          "statement": "Lieferkettenrisiken muessen bewertet und Schutzmassnahmen implementiert werden.",
+          "keywords": ["supply chain", "lieferkette", "third party", "drittanbieter"],
+          "action_hint": "assess",
+          "object_hint": "Lieferkettenrisikobewertung",
+          "object_class": "risk_artifact"
+        }
+      ]
+    }
+  ]
+}
@@ -0,0 +1,353 @@
+{
+  "framework_id": "OWASP_ASVS",
+  "display_name": "OWASP Application Security Verification Standard 4.0",
+  "license": {
+    "type": "cc_by_sa_4",
+    "rag_allowed": true,
+    "use_as_metadata": true
+  },
+  "domains": [
+    {
+      "domain_id": "V1",
+      "title": "Architecture, Design and Threat Modeling",
+      "aliases": ["architecture", "architektur", "design", "threat modeling", "bedrohungsmodellierung"],
+      "keywords": ["architektur", "design", "threat model", "bedrohung", "modellierung"],
+      "subcontrols": [
+        {
+          "subcontrol_id": "V1.1",
+          "title": "Secure Software Development Lifecycle",
+          "statement": "Ein sicherer Softwareentwicklungs-Lebenszyklus (SSDLC) muss definiert und angewendet werden.",
+          "keywords": ["sdlc", "lifecycle", "lebenszyklus", "entwicklung"],
+          "action_hint": "implement",
+          "object_hint": "Sicherer Entwicklungs-Lebenszyklus",
+          "object_class": "process"
+        },
+        {
+          "subcontrol_id": "V1.2",
+          "title": "Authentication Architecture",
+          "statement": "Die Authentifizierungsarchitektur muss dokumentiert und regelmaessig ueberprueft werden.",
+          "keywords": ["authentication", "authentifizierung", "architektur"],
+          "action_hint": "document",
+          "object_hint": "Authentifizierungsarchitektur",
+          "object_class": "policy"
+        },
+        {
+          "subcontrol_id": "V1.4",
+          "title": "Access Control Architecture",
+          "statement": "Die Zugriffskontrollarchitektur muss dokumentiert und zentral durchgesetzt werden.",
+          "keywords": ["access control", "zugriffskontrolle", "architektur"],
+          "action_hint": "document",
+          "object_hint": "Zugriffskontrollarchitektur",
+          "object_class": "policy"
+        },
+        {
+          "subcontrol_id": "V1.5",
+          "title": "Input and Output Architecture",
+          "statement": "Eingabe- und Ausgabevalidierung muss architektonisch verankert und durchgaengig angewendet werden.",
+          "keywords": ["input", "output", "eingabe", "ausgabe", "validierung"],
+          "action_hint": "implement",
+          "object_hint": "Ein-/Ausgabevalidierung",
+          "object_class": "technical_control"
+        },
+        {
+          "subcontrol_id": "V1.6",
+          "title": "Cryptographic Architecture",
+          "statement": "Kryptographische Mechanismen muessen architektonisch definiert und standardisiert sein.",
+          "keywords": ["crypto", "kryptographie", "verschluesselung"],
+          "action_hint": "define",
+          "object_hint": "Kryptographie-Architektur",
+          "object_class": "cryptographic_control"
+        }
+      ]
+    },
+    {
+      "domain_id": "V2",
+      "title": "Authentication",
+      "aliases": ["authentication", "authentifizierung", "anmeldung", "login"],
+      "keywords": ["authentication", "authentifizierung", "passwort", "login", "anmeldung", "credential"],
+      "subcontrols": [
+        {
+          "subcontrol_id": "V2.1",
+          "title": "Password Security",
+          "statement": "Passwortrichtlinien muessen Mindestlaenge, Komplexitaet und Sperrmechanismen definieren.",
+          "keywords": ["passwort", "password", "laenge", "komplexitaet"],
+          "action_hint": "define",
+          "object_hint": "Passwortrichtlinie",
+          "object_class": "policy"
+        },
+        {
+          "subcontrol_id": "V2.2",
+          "title": "General Authenticator Security",
+          "statement": "Authentifizierungsmittel muessen sicher gespeichert und uebertragen werden.",
+          "keywords": ["authenticator", "credential", "speicherung"],
+          "action_hint": "implement",
+          "object_hint": "Sichere Credential-Verwaltung",
+          "object_class": "technical_control"
+        },
+        {
+          "subcontrol_id": "V2.7",
+          "title": "Out-of-Band Verification",
+          "statement": "Out-of-Band-Verifikationsmechanismen muessen sicher implementiert werden.",
+          "keywords": ["oob", "out-of-band", "sms", "push"],
+          "action_hint": "implement",
+          "object_hint": "Out-of-Band-Verifikation",
+          "object_class": "technical_control"
+        },
+        {
+          "subcontrol_id": "V2.8",
+          "title": "Multi-Factor Authentication",
+          "statement": "Multi-Faktor-Authentifizierung muss fuer sicherheitskritische Funktionen verfuegbar sein.",
+          "keywords": ["mfa", "multi-faktor", "totp", "fido"],
+          "action_hint": "implement",
+          "object_hint": "Multi-Faktor-Authentifizierung",
+          "object_class": "technical_control"
+        }
+      ]
+    },
+    {
+      "domain_id": "V3",
+      "title": "Session Management",
+      "aliases": ["session", "sitzung", "session management", "sitzungsverwaltung"],
+      "keywords": ["session", "sitzung", "token", "cookie", "timeout"],
+      "subcontrols": [
+        {
+          "subcontrol_id": "V3.1",
+          "title": "Session Management Security",
+          "statement": "Sitzungstoken muessen sicher erzeugt, uebertragen und invalidiert werden.",
+          "keywords": ["token", "sitzung", "sicherheit"],
+          "action_hint": "implement",
+          "object_hint": "Sichere Sitzungsverwaltung",
+          "object_class": "technical_control"
+        },
+        {
+          "subcontrol_id": "V3.3",
+          "title": "Session Termination",
+          "statement": "Sitzungen muessen nach Inaktivitaet und bei Abmeldung zuverlaessig beendet werden.",
+          "keywords": ["termination", "timeout", "abmeldung", "beenden"],
+          "action_hint": "configure",
+          "object_hint": "Sitzungstimeout",
+          "object_class": "configuration"
+        },
+        {
+          "subcontrol_id": "V3.5",
+          "title": "Token-Based Session Management",
+          "statement": "Tokenbasierte Sitzungsmechanismen muessen gegen Diebstahl und Replay geschuetzt sein.",
+          "keywords": ["jwt", "token", "replay", "diebstahl"],
+          "action_hint": "implement",
+          "object_hint": "Token-Schutz",
+          "object_class": "technical_control"
+        }
+      ]
+    },
+    {
+      "domain_id": "V5",
+      "title": "Validation, Sanitization and Encoding",
+      "aliases": ["validation", "validierung", "sanitization", "encoding", "eingabevalidierung"],
+      "keywords": ["validierung", "sanitization", "encoding", "xss", "injection", "eingabe"],
+      "subcontrols": [
+        {
+          "subcontrol_id": "V5.1",
+          "title": "Input Validation",
+          "statement": "Alle Eingabedaten muessen serverseitig validiert werden.",
+          "keywords": ["input", "eingabe", "validierung", "serverseitig"],
+          "action_hint": "implement",
+          "object_hint": "Eingabevalidierung",
+          "object_class": "technical_control"
+        },
+        {
+          "subcontrol_id": "V5.2",
+          "title": "Sanitization and Sandboxing",
+          "statement": "Eingaben muessen bereinigt und in sicherer Umgebung verarbeitet werden.",
+          "keywords": ["sanitization", "bereinigung", "sandbox"],
+          "action_hint": "implement",
+          "object_hint": "Eingabebereinigung",
+          "object_class": "technical_control"
+        },
+        {
+          "subcontrol_id": "V5.3",
+          "title": "Output Encoding and Injection Prevention",
+          "statement": "Ausgaben muessen kontextabhaengig kodiert werden, um Injection-Angriffe zu verhindern.",
+          "keywords": ["output", "encoding", "injection", "xss", "sql"],
+          "action_hint": "implement",
+          "object_hint": "Ausgabe-Encoding",
+          "object_class": "technical_control"
+        }
+      ]
+    },
+    {
+      "domain_id": "V6",
+      "title": "Stored Cryptography",
+      "aliases": ["cryptography", "kryptographie", "verschluesselung", "stored cryptography"],
+      "keywords": ["kryptographie", "verschluesselung", "hashing", "schluessel", "key management"],
+      "subcontrols": [
+        {
+          "subcontrol_id": "V6.1",
+          "title": "Data Classification",
+          "statement": "Daten muessen klassifiziert und entsprechend ihrer Schutzklasse behandelt werden.",
+          "keywords": ["klassifizierung", "classification", "schutzklasse"],
+          "action_hint": "define",
+          "object_hint": "Datenklassifizierung",
+          "object_class": "data"
+        },
+        {
+          "subcontrol_id": "V6.2",
+          "title": "Algorithms",
+          "statement": "Nur zugelassene und aktuelle kryptographische Algorithmen duerfen verwendet werden.",
+          "keywords": ["algorithmus", "algorithm", "aes", "rsa"],
+          "action_hint": "configure",
+          "object_hint": "Kryptographische Algorithmen",
+          "object_class": "cryptographic_control"
+        },
+        {
+          "subcontrol_id": "V6.4",
+          "title": "Secret Management",
+          "statement": "Geheimnisse (Schluessel, Passwoerter, Tokens) muessen in einem Secret-Management-System verwaltet werden.",
+          "keywords": ["secret", "geheimnis", "vault", "key management"],
+          "action_hint": "maintain",
+          "object_hint": "Secret-Management",
+          "object_class": "cryptographic_control"
+        }
+      ]
+    },
+    {
+      "domain_id": "V8",
+      "title": "Data Protection",
+      "aliases": ["data protection", "datenschutz", "datenverarbeitung"],
+      "keywords": ["datenschutz", "data protection", "pii", "personenbezogen", "privacy"],
+      "subcontrols": [
+        {
+          "subcontrol_id": "V8.1",
+          "title": "General Data Protection",
+          "statement": "Personenbezogene Daten muessen gemaess Datenschutzanforderungen geschuetzt werden.",
+          "keywords": ["personenbezogen", "pii", "datenschutz"],
+          "action_hint": "implement",
+          "object_hint": "Datenschutzmassnahmen",
+          "object_class": "data"
+        },
+        {
+          "subcontrol_id": "V8.2",
+          "title": "Client-Side Data Protection",
+          "statement": "Clientseitig gespeicherte sensible Daten muessen geschuetzt und minimiert werden.",
+          "keywords": ["client", "browser", "localstorage", "cookie"],
+          "action_hint": "implement",
+          "object_hint": "Clientseitiger Datenschutz",
+          "object_class": "technical_control"
+        },
+        {
+          "subcontrol_id": "V8.3",
+          "title": "Sensitive Private Data",
+          "statement": "Sensible Daten muessen bei Speicherung und Verarbeitung besonders geschuetzt werden.",
+          "keywords": ["sensibel", "vertraulich", "speicherung"],
+          "action_hint": "encrypt",
+          "object_hint": "Verschluesselung sensibler Daten",
+          "object_class": "data"
+        }
+      ]
+    },
+    {
+      "domain_id": "V9",
+      "title": "Communication",
+      "aliases": ["communication", "kommunikation", "tls", "transport"],
+      "keywords": ["tls", "ssl", "https", "transport", "kommunikation", "verschluesselung"],
+      "subcontrols": [
+        {
+          "subcontrol_id": "V9.1",
+          "title": "Client Communication Security",
+          "statement": "Alle Client-Server-Kommunikation muss ueber TLS verschluesselt werden.",
+          "keywords": ["tls", "https", "client", "server"],
+          "action_hint": "encrypt",
+          "object_hint": "TLS-Transportverschluesselung",
+          "object_class": "cryptographic_control"
+        },
+        {
+          "subcontrol_id": "V9.2",
+          "title": "Server Communication Security",
+          "statement": "Server-zu-Server-Kommunikation muss authentifiziert und verschluesselt erfolgen.",
+          "keywords": ["server", "mtls", "backend"],
+          "action_hint": "encrypt",
+          "object_hint": "Server-Kommunikationsverschluesselung",
+          "object_class": "cryptographic_control"
+        }
+      ]
+    },
+    {
+      "domain_id": "V13",
+      "title": "API and Web Service",
+      "aliases": ["api", "web service", "rest", "graphql", "webservice"],
+      "keywords": ["api", "rest", "graphql", "webservice", "endpoint", "schnittstelle"],
+      "subcontrols": [
+        {
+          "subcontrol_id": "V13.1",
+          "title": "Generic Web Service Security",
+          "statement": "Web-Services muessen gegen gaengige Angriffe abgesichert werden.",
+          "keywords": ["web service", "sicherheit", "angriff"],
+          "action_hint": "implement",
+          "object_hint": "Web-Service-Absicherung",
+          "object_class": "interface"
+        },
+        {
+          "subcontrol_id": "V13.2",
+          "title": "RESTful Web Service",
+          "statement": "REST-APIs muessen Input-Validierung, Rate Limiting und sichere Authentifizierung implementieren.",
+          "keywords": ["rest", "api", "rate limiting", "input"],
+          "action_hint": "implement",
+          "object_hint": "REST-API-Absicherung",
+          "object_class": "interface"
+        },
+        {
+          "subcontrol_id": "V13.4",
+          "title": "GraphQL and Web Services",
+          "statement": "GraphQL-Endpoints muessen gegen Query-Complexity-Angriffe und Introspection geschuetzt werden.",
+          "keywords": ["graphql", "query", "complexity", "introspection"],
+          "action_hint": "configure",
+          "object_hint": "GraphQL-Absicherung",
+          "object_class": "interface"
+        }
+      ]
+    },
+    {
+      "domain_id": "V14",
+      "title": "Configuration",
+      "aliases": ["configuration", "konfiguration", "hardening", "haertung"],
+      "keywords": ["konfiguration", "hardening", "haertung", "header", "deployment"],
+      "subcontrols": [
+        {
+          "subcontrol_id": "V14.1",
+          "title": "Build and Deploy",
+          "statement": "Build- und Deployment-Prozesse muessen sicher konfiguriert und reproduzierbar sein.",
+          "keywords": ["build", "deploy", "ci/cd", "pipeline"],
+          "action_hint": "configure",
+          "object_hint": "Sichere Build-Pipeline",
+          "object_class": "configuration"
+        },
+        {
+          "subcontrol_id": "V14.2",
+          "title": "Dependency Management",
+          "statement": "Abhaengigkeiten muessen auf Schwachstellen geprueft und aktuell gehalten werden.",
+          "keywords": ["dependency", "abhaengigkeit", "sca", "sbom"],
+          "action_hint": "maintain",
+          "object_hint": "Abhaengigkeitsverwaltung",
+          "object_class": "system"
+        },
+        {
+          "subcontrol_id": "V14.3",
+          "title": "Unintended Security Disclosure",
+          "statement": "Fehlermeldungen und Debug-Informationen duerfen keine sicherheitsrelevanten Details preisgeben.",
+          "keywords": ["disclosure", "fehlermeldung", "debug", "information leakage"],
+          "action_hint": "configure",
+          "object_hint": "Fehlerbehandlung",
+          "object_class": "configuration"
+        },
+        {
+          "subcontrol_id": "V14.4",
+          "title": "HTTP Security Headers",
+          "statement": "HTTP-Sicherheitsheader muessen korrekt konfiguriert sein.",
+          "keywords": ["header", "csp", "hsts", "x-frame"],
+          "action_hint": "configure",
+          "object_hint": "HTTP-Sicherheitsheader",
+          "object_class": "configuration"
+        }
+      ]
+    }
+  ]
+}
@@ -0,0 +1,205 @@
+"""
+Source-Type-Klassifikation fuer Regulierungen und Frameworks.
+
+Dreistufiges Modell der normativen Verbindlichkeit:
+
+  Stufe 1 — GESETZ (law):
+    Rechtlich bindend. Bussgeld bei Verstoss.
+    Beispiele: DSGVO, NIS2, AI Act, CRA
+
+  Stufe 2 — LEITLINIE (guideline):
+    Offizielle Auslegungshilfe von Aufsichtsbehoerden.
+    Beweislastumkehr: Wer abweicht, muss begruenden warum.
+    Beispiele: EDPB-Leitlinien, BSI-Standards, WP29-Dokumente
+
+  Stufe 3 — FRAMEWORK (framework):
+    Freiwillige Best Practices, nicht rechtsverbindlich.
+    Aber: Koennen als "Stand der Technik" herangezogen werden.
+    Beispiele: ENISA, NIST, OWASP, OECD, CISA
+
+Mapping: source_regulation (aus control_parent_links) -> source_type
+"""
+
+# --- Typ-Definitionen ---
+SOURCE_TYPE_LAW = "law"           # Gesetz/Verordnung/Richtlinie — normative_strength bleibt
+SOURCE_TYPE_GUIDELINE = "guideline"  # Leitlinie/Standard — max "should"
+SOURCE_TYPE_FRAMEWORK = "framework"  # Framework/Best Practice — max "may"
+
+# Max erlaubte normative_strength pro source_type
+# DB-Constraint erlaubt: must, should, may (NICHT "can")
+NORMATIVE_STRENGTH_CAP: dict[str, str] = {
+    SOURCE_TYPE_LAW: "must",       # keine Begrenzung
+    SOURCE_TYPE_GUIDELINE: "should",  # max "should"
+    SOURCE_TYPE_FRAMEWORK: "may",     # max "may" (= "kann")
+}
+
+# Reihenfolge fuer Vergleiche (hoeher = staerker)
+STRENGTH_ORDER: dict[str, int] = {
+    "may": 1,        # KANN (DB-Wert)
+    "can": 1,        # Alias — wird in cap_normative_strength zu "may" normalisiert
+    "should": 2,
+    "must": 3,
+}
+
+
+def cap_normative_strength(original: str, source_type: str) -> str:
+    """
+    Begrenzt die normative_strength basierend auf dem source_type.
+
+    Beispiel:
+        cap_normative_strength("must", "framework") -> "may"
+        cap_normative_strength("should", "law") -> "should"
+        cap_normative_strength("must", "guideline") -> "should"
+    """
+    cap = NORMATIVE_STRENGTH_CAP.get(source_type, "must")
+    cap_level = STRENGTH_ORDER.get(cap, 3)
+    original_level = STRENGTH_ORDER.get(original, 3)
+    if original_level > cap_level:
+        return cap
+    return original
+
+
+def get_highest_source_type(source_types: list[str]) -> str:
+    """
+    Bestimmt den hoechsten source_type aus einer Liste.
+    Ein Gesetz uebertrumpft alles.
+
+    Beispiel:
+        get_highest_source_type(["framework", "law"]) -> "law"
+        get_highest_source_type(["framework", "guideline"]) -> "guideline"
+    """
+    type_order = {SOURCE_TYPE_FRAMEWORK: 1, SOURCE_TYPE_GUIDELINE: 2, SOURCE_TYPE_LAW: 3}
+    if not source_types:
+        return SOURCE_TYPE_FRAMEWORK
+    return max(source_types, key=lambda t: type_order.get(t, 0))
+
+
+# ============================================================================
+# Klassifikation: source_regulation -> source_type
+#
+# Diese Map wird fuer den Backfill und zukuenftige Pipeline-Runs verwendet.
+# Neue Regulierungen hier eintragen!
+# ============================================================================
+
+SOURCE_REGULATION_CLASSIFICATION: dict[str, str] = {
+    # --- EU-Verordnungen (unmittelbar bindend) ---
+    "DSGVO (EU) 2016/679": SOURCE_TYPE_LAW,
+    "KI-Verordnung (EU) 2024/1689": SOURCE_TYPE_LAW,
+    "Cyber Resilience Act (CRA)": SOURCE_TYPE_LAW,
+    "NIS2-Richtlinie (EU) 2022/2555": SOURCE_TYPE_LAW,
+    "Data Act": SOURCE_TYPE_LAW,
+    "Data Governance Act (DGA)": SOURCE_TYPE_LAW,
+    "Markets in Crypto-Assets (MiCA)": SOURCE_TYPE_LAW,
+    "Maschinenverordnung (EU) 2023/1230": SOURCE_TYPE_LAW,
+    "Batterieverordnung (EU) 2023/1542": SOURCE_TYPE_LAW,
+    "AML-Verordnung": SOURCE_TYPE_LAW,
+
+    # --- EU-Richtlinien (nach nationaler Umsetzung bindend) ---
+    # Fuer Compliance-Zwecke wie Gesetze behandeln
+
+    # --- Nationale Gesetze ---
+    "Bundesdatenschutzgesetz (BDSG)": SOURCE_TYPE_LAW,
+    "Telekommunikationsgesetz": SOURCE_TYPE_LAW,
+    "Telekommunikationsgesetz Oesterreich": SOURCE_TYPE_LAW,
+    "Gewerbeordnung (GewO)": SOURCE_TYPE_LAW,
+    "Handelsgesetzbuch (HGB)": SOURCE_TYPE_LAW,
+    "Abgabenordnung (AO)": SOURCE_TYPE_LAW,
+    "IFRS-Übernahmeverordnung": SOURCE_TYPE_LAW,
+    "Österreichisches Datenschutzgesetz (DSG)": SOURCE_TYPE_LAW,
+    "LOPDGDD - Ley Orgánica de Protección de Datos (Spanien)": SOURCE_TYPE_LAW,
+    "Loi Informatique et Libertés (Frankreich)": SOURCE_TYPE_LAW,
+    "Információs önrendelkezési jog törvény (Ungarn)": SOURCE_TYPE_LAW,
+    "EU Blue Guide 2022": SOURCE_TYPE_LAW,
+
+    # --- EDPB/WP29 Leitlinien (offizielle Auslegungshilfe) ---
+    "EDPB Leitlinien 01/2019 (Zertifizierung)": SOURCE_TYPE_GUIDELINE,
+    "EDPB Leitlinien 01/2020 (Datentransfers)": SOURCE_TYPE_GUIDELINE,
+    "EDPB Leitlinien 01/2020 (Vernetzte Fahrzeuge)": SOURCE_TYPE_GUIDELINE,
+    "EDPB Leitlinien 01/2022 (BCR)": SOURCE_TYPE_GUIDELINE,
+    "EDPB Leitlinien 01/2024 (Berechtigtes Interesse)": SOURCE_TYPE_GUIDELINE,
+    "EDPB Leitlinien 04/2019 (Data Protection by Design)": SOURCE_TYPE_GUIDELINE,
+    "EDPB Leitlinien 05/2020 - Einwilligung": SOURCE_TYPE_GUIDELINE,
+    "EDPB Leitlinien 07/2020 (Datentransfers)": SOURCE_TYPE_GUIDELINE,
+    "EDPB Leitlinien 08/2020 (Social Media)": SOURCE_TYPE_GUIDELINE,
+    "EDPB Leitlinien 09/2022 (Data Breach)": SOURCE_TYPE_GUIDELINE,
+    "EDPB Leitlinien 09/2022 - Meldung von Datenschutzverletzungen": SOURCE_TYPE_GUIDELINE,
+    "EDPB Empfehlungen 01/2020 - Ergaenzende Massnahmen fuer Datentransfers": SOURCE_TYPE_GUIDELINE,
+    "EDPB Leitlinien - Berechtigtes Interesse (Art. 6(1)(f))": SOURCE_TYPE_GUIDELINE,
+    "WP244 Leitlinien (Profiling)": SOURCE_TYPE_GUIDELINE,
+    "WP251 Leitlinien (Profiling)": SOURCE_TYPE_GUIDELINE,
+    "WP260 Leitlinien (Transparenz)": SOURCE_TYPE_GUIDELINE,
+
+    # --- BSI Standards (behoerdliche technische Richtlinien) ---
+    "BSI-TR-03161-1": SOURCE_TYPE_GUIDELINE,
+    "BSI-TR-03161-2": SOURCE_TYPE_GUIDELINE,
+    "BSI-TR-03161-3": SOURCE_TYPE_GUIDELINE,
+
+    # --- ENISA (EU-Agentur, aber Empfehlungen nicht rechtsverbindlich) ---
+    "ENISA Cybersecurity State 2024": SOURCE_TYPE_FRAMEWORK,
+    "ENISA ICS/SCADA Dependencies": SOURCE_TYPE_FRAMEWORK,
+    "ENISA Supply Chain Good Practices": SOURCE_TYPE_FRAMEWORK,
+    "ENISA Threat Landscape Supply Chain": SOURCE_TYPE_FRAMEWORK,
+
+    # --- NIST (US-Standards, international als Best Practice) ---
+    "NIST AI Risk Management Framework": SOURCE_TYPE_FRAMEWORK,
+    "NIST Cybersecurity Framework 2.0": SOURCE_TYPE_FRAMEWORK,
+    "NIST SP 800-207 (Zero Trust)": SOURCE_TYPE_FRAMEWORK,
+    "NIST SP 800-218 (SSDF)": SOURCE_TYPE_FRAMEWORK,
+    "NIST SP 800-53 Rev. 5": SOURCE_TYPE_FRAMEWORK,
+    "NIST SP 800-63-3": SOURCE_TYPE_FRAMEWORK,
+
+    # --- OWASP (Community-Standards) ---
+    "OWASP API Security Top 10 (2023)": SOURCE_TYPE_FRAMEWORK,
+    "OWASP ASVS 4.0": SOURCE_TYPE_FRAMEWORK,
+    "OWASP MASVS 2.0": SOURCE_TYPE_FRAMEWORK,
+    "OWASP SAMM 2.0": SOURCE_TYPE_FRAMEWORK,
+    "OWASP Top 10 (2021)": SOURCE_TYPE_FRAMEWORK,
+
+    # --- Sonstige Frameworks ---
+    "OECD KI-Empfehlung": SOURCE_TYPE_FRAMEWORK,
+    "CISA Secure by Design": SOURCE_TYPE_FRAMEWORK,
+}
+
+
+def classify_source_regulation(source_regulation: str) -> str:
+    """
+    Klassifiziert eine source_regulation als law, guideline oder framework.
+
+    Verwendet exaktes Matching gegen die Map. Bei unbekannten Quellen
+    wird anhand von Schluesselwoertern geraten, Fallback ist 'framework'
+    (konservativstes Ergebnis).
+    """
+    if not source_regulation:
+        return SOURCE_TYPE_FRAMEWORK
+
+    # Exaktes Match
+    if source_regulation in SOURCE_REGULATION_CLASSIFICATION:
+        return SOURCE_REGULATION_CLASSIFICATION[source_regulation]
+
+    # Heuristik fuer unbekannte Quellen
+    lower = source_regulation.lower()
+
+    # Gesetze erkennen
+    law_indicators = [
+        "verordnung", "richtlinie", "gesetz", "directive", "regulation",
+        "(eu)", "(eg)", "act", "ley", "loi", "törvény", "código",
+    ]
+    if any(ind in lower for ind in law_indicators):
+        return SOURCE_TYPE_LAW
+
+    # Leitlinien erkennen
+    guideline_indicators = [
+        "edpb", "leitlinie", "guideline", "wp2", "bsi", "empfehlung",
+    ]
+    if any(ind in lower for ind in guideline_indicators):
+        return SOURCE_TYPE_GUIDELINE
+
+    # Frameworks erkennen
+    framework_indicators = [
+        "enisa", "nist", "owasp", "oecd", "cisa", "framework", "iso",
+    ]
+    if any(ind in lower for ind in framework_indicators):
+        return SOURCE_TYPE_FRAMEWORK
+
+    # Konservativ: unbekannt = framework (geringste Verbindlichkeit)
+    return SOURCE_TYPE_FRAMEWORK
@@ -8,12 +8,16 @@ from .models import (
    EvidenceDB,
    RiskDB,
    AuditExportDB,
+    LLMGenerationAuditDB,
+    AssertionDB,
    RegulationTypeEnum,
    ControlTypeEnum,
    ControlDomainEnum,
    RiskLevelEnum,
    EvidenceStatusEnum,
    ControlStatusEnum,
+    EvidenceConfidenceEnum,
+    EvidenceTruthStatusEnum,
 )
 from .repository import (
    RegulationRepository,
@@ -33,6 +37,8 @@ __all__ = [
    "EvidenceDB",
    "RiskDB",
    "AuditExportDB",
+    "LLMGenerationAuditDB",
+    "AssertionDB",
    # Enums
    "RegulationTypeEnum",
    "ControlTypeEnum",
@@ -40,6 +46,8 @@ __all__ = [
    "RiskLevelEnum",
    "EvidenceStatusEnum",
    "ControlStatusEnum",
+    "EvidenceConfidenceEnum",
+    "EvidenceTruthStatusEnum",
    # Repositories
    "RegulationRepository",
    "RequirementRepository",
@@ -0,0 +1,164 @@
+"""
+SQLAlchemy models for VVT Master Libraries + Process Templates.
+
+Tables (global, no tenant_id):
+- vvt_lib_data_subjects
+- vvt_lib_data_categories (hierarchical, self-referencing)
+- vvt_lib_recipients
+- vvt_lib_legal_bases
+- vvt_lib_retention_rules
+- vvt_lib_transfer_mechanisms
+- vvt_lib_purposes
+- vvt_lib_toms
+
+Tenant-scoped:
+- vvt_process_templates (system + tenant-specific)
+"""
+
+from datetime import datetime
+
+from sqlalchemy import (
+    Column, String, Text, Boolean, Integer, DateTime, JSON, Index,
+    ForeignKey,
+)
+from sqlalchemy.dialects.postgresql import UUID
+
+from classroom_engine.database import Base
+
+
+class VVTLibDataSubjectDB(Base):
+    __tablename__ = 'vvt_lib_data_subjects'
+
+    id = Column(String(50), primary_key=True)
+    label_de = Column(String(200), nullable=False)
+    description_de = Column(Text)
+    art9_relevant = Column(Boolean, default=False)
+    typical_for = Column(JSON, default=list)
+    sort_order = Column(Integer, default=0)
+    created_at = Column(DateTime(timezone=True), default=datetime.utcnow)
+
+
+class VVTLibDataCategoryDB(Base):
+    __tablename__ = 'vvt_lib_data_categories'
+
+    id = Column(String(50), primary_key=True)
+    parent_id = Column(String(50), ForeignKey('vvt_lib_data_categories.id', ondelete='SET NULL'), nullable=True)
+    label_de = Column(String(200), nullable=False)
+    description_de = Column(Text)
+    is_art9 = Column(Boolean, default=False)
+    is_art10 = Column(Boolean, default=False)
+    risk_weight = Column(Integer, default=1)
+    default_retention_rule = Column(String(50))
+    default_legal_basis = Column(String(50))
+    sort_order = Column(Integer, default=0)
+    created_at = Column(DateTime(timezone=True), default=datetime.utcnow)
+
+
+class VVTLibRecipientDB(Base):
+    __tablename__ = 'vvt_lib_recipients'
+
+    id = Column(String(50), primary_key=True)
+    type = Column(String(20), nullable=False)
+    label_de = Column(String(200), nullable=False)
+    description_de = Column(Text)
+    is_third_country = Column(Boolean, default=False)
+    country = Column(String(5))
+    sort_order = Column(Integer, default=0)
+    created_at = Column(DateTime(timezone=True), default=datetime.utcnow)
+
+
+class VVTLibLegalBasisDB(Base):
+    __tablename__ = 'vvt_lib_legal_bases'
+
+    id = Column(String(50), primary_key=True)
+    article = Column(String(50), nullable=False)
+    type = Column(String(30), nullable=False)
+    label_de = Column(String(300), nullable=False)
+    description_de = Column(Text)
+    is_art9 = Column(Boolean, default=False)
+    typical_national_law = Column(String(100))
+    sort_order = Column(Integer, default=0)
+    created_at = Column(DateTime(timezone=True), default=datetime.utcnow)
+
+
+class VVTLibRetentionRuleDB(Base):
+    __tablename__ = 'vvt_lib_retention_rules'
+
+    id = Column(String(50), primary_key=True)
+    label_de = Column(String(300), nullable=False)
+    description_de = Column(Text)
+    legal_basis = Column(String(200))
+    duration = Column(Integer, nullable=False)
+    duration_unit = Column(String(10), nullable=False)
+    start_event = Column(String(200))
+    deletion_procedure = Column(String(500))
+    sort_order = Column(Integer, default=0)
+    created_at = Column(DateTime(timezone=True), default=datetime.utcnow)
+
+
+class VVTLibTransferMechanismDB(Base):
+    __tablename__ = 'vvt_lib_transfer_mechanisms'
+
+    id = Column(String(50), primary_key=True)
+    label_de = Column(String(300), nullable=False)
+    description_de = Column(Text)
+    article = Column(String(50))
+    requires_tia = Column(Boolean, default=False)
+    sort_order = Column(Integer, default=0)
+    created_at = Column(DateTime(timezone=True), default=datetime.utcnow)
+
+
+class VVTLibPurposeDB(Base):
+    __tablename__ = 'vvt_lib_purposes'
+
+    id = Column(String(50), primary_key=True)
+    label_de = Column(String(300), nullable=False)
+    description_de = Column(Text)
+    typical_legal_basis = Column(String(50))
+    typical_for = Column(JSON, default=list)
+    sort_order = Column(Integer, default=0)
+    created_at = Column(DateTime(timezone=True), default=datetime.utcnow)
+
+
+class VVTLibTomDB(Base):
+    __tablename__ = 'vvt_lib_toms'
+
+    id = Column(String(50), primary_key=True)
+    category = Column(String(30), nullable=False)
+    label_de = Column(String(300), nullable=False)
+    description_de = Column(Text)
+    art32_reference = Column(String(100))
+    sort_order = Column(Integer, default=0)
+    created_at = Column(DateTime(timezone=True), default=datetime.utcnow)
+
+
+class VVTProcessTemplateDB(Base):
+    __tablename__ = 'vvt_process_templates'
+
+    id = Column(String(80), primary_key=True)
+    name = Column(String(300), nullable=False)
+    description = Column(Text)
+    business_function = Column(String(50))
+    purpose_refs = Column(JSON, default=list)
+    legal_basis_refs = Column(JSON, default=list)
+    data_subject_refs = Column(JSON, default=list)
+    data_category_refs = Column(JSON, default=list)
+    recipient_refs = Column(JSON, default=list)
+    tom_refs = Column(JSON, default=list)
+    transfer_mechanism_refs = Column(JSON, default=list)
+    retention_rule_ref = Column(String(50))
+    typical_systems = Column(JSON, default=list)
+    protection_level = Column(String(10), default='MEDIUM')
+    dpia_required = Column(Boolean, default=False)
+    risk_score = Column(Integer)
+    tags = Column(JSON, default=list)
+    is_system = Column(Boolean, default=True)
+    tenant_id = Column(UUID(as_uuid=True), nullable=True)
+    sort_order = Column(Integer, default=0)
+    created_at = Column(DateTime(timezone=True), default=datetime.utcnow)
+    updated_at = Column(DateTime(timezone=True), default=datetime.utcnow, onupdate=datetime.utcnow)
+
+    __table_args__ = (
+        Index('idx_vvt_process_templates_bf', 'business_function'),
+        Index('idx_vvt_process_templates_system', 'is_system'),
+    )
@@ -79,6 +79,26 @@ class VVTActivityDB(Base):
    next_review_at = Column(DateTime(timezone=True), nullable=True)
    created_by = Column(String(200), default='system')
    dsfa_id = Column(UUID(as_uuid=True), nullable=True)
+
+    # Library refs (Phase 1 — parallel to freetext fields)
+    purpose_refs = Column(JSON, nullable=True)
+    legal_basis_refs = Column(JSON, nullable=True)
+    data_subject_refs = Column(JSON, nullable=True)
+    data_category_refs = Column(JSON, nullable=True)
+    recipient_refs = Column(JSON, nullable=True)
+    retention_rule_ref = Column(String(50), nullable=True)
+    transfer_mechanism_refs = Column(JSON, nullable=True)
+    tom_refs = Column(JSON, nullable=True)
+
+    # Cross-module links
+    linked_loeschfristen_ids = Column(JSON, nullable=True)
+    linked_tom_measure_ids = Column(JSON, nullable=True)
+
+    # Template + risk
+    source_template_id = Column(String(80), nullable=True)
+    risk_score = Column(Integer, nullable=True)
+    art30_completeness = Column(JSON, nullable=True)
+
    created_at = Column(DateTime, default=datetime.utcnow, nullable=False)
    updated_at = Column(DateTime, default=datetime.utcnow, onupdate=datetime.utcnow)

@@ -69,7 +69,7 @@ class AnchorFinder:
        tags_str = " ".join(control.tags[:3]) if control.tags else ""
        query = f"{control.title} {tags_str}".strip()

-        results = await self.rag.search(
+        results = await self.rag.search_with_rerank(
            query=query,
            collection="bp_compliance_ce",
            top_k=15,
@@ -0,0 +1,80 @@
+"""Assertion Engine — splits text into sentences and classifies each.
+
+Each sentence is tagged as:
+- assertion: normative statement (pflicht / empfehlung / kann)
+- fact: references concrete evidence artifacts
+- rationale: explains why something is required
+"""
+
+import re
+from typing import Optional
+
+from .normative_patterns import (
+    PFLICHT_RE, EMPFEHLUNG_RE, KANN_RE, RATIONALE_RE, EVIDENCE_RE,
+)
+
+# Sentence splitter: period/excl/question followed by space+uppercase, or newlines
+_SENTENCE_SPLIT = re.compile(r'(?<=[.!?])\s+(?=[A-ZÄÖÜ])|(?:\n\s*\n)')
+
+
+def extract_assertions(
+    text: str,
+    entity_type: str,
+    entity_id: str,
+    tenant_id: Optional[str] = None,
+) -> list[dict]:
+    """Split *text* into sentences and classify each one.
+
+    Returns a list of dicts ready for AssertionDB creation.
+    """
+    if not text or not text.strip():
+        return []
+
+    sentences = _SENTENCE_SPLIT.split(text.strip())
+    results: list[dict] = []
+
+    for idx, raw in enumerate(sentences):
+        sentence = raw.strip()
+        if not sentence or len(sentence) < 5:
+            continue
+
+        assertion_type, normative_tier = _classify_sentence(sentence)
+
+        results.append({
+            "tenant_id": tenant_id,
+            "entity_type": entity_type,
+            "entity_id": entity_id,
+            "sentence_text": sentence,
+            "sentence_index": idx,
+            "assertion_type": assertion_type,
+            "normative_tier": normative_tier,
+            "evidence_ids": [],
+            "confidence": 0.0,
+        })
+
+    return results
+
+
+def _classify_sentence(sentence: str) -> tuple[str, Optional[str]]:
+    """Return (assertion_type, normative_tier) for a single sentence."""
+
+    # 1. Check for evidence/fact keywords first
+    if EVIDENCE_RE.search(sentence):
+        return ("fact", None)
+
+    # 2. Check for rationale
+    normative_count = len(PFLICHT_RE.findall(sentence)) + len(EMPFEHLUNG_RE.findall(sentence)) + len(KANN_RE.findall(sentence))
+    rationale_count = len(RATIONALE_RE.findall(sentence))
+    if rationale_count > 0 and rationale_count >= normative_count:
+        return ("rationale", None)
+
+    # 3. Normative classification
+    if PFLICHT_RE.search(sentence):
+        return ("assertion", "pflicht")
+    if EMPFEHLUNG_RE.search(sentence):
+        return ("assertion", "empfehlung")
+    if KANN_RE.search(sentence):
+        return ("assertion", "kann")
+
+    # 4. Default: unclassified assertion
+    return ("assertion", None)
@@ -0,0 +1,618 @@
+"""Batch Dedup Runner — Orchestrates deduplication of ~85k atomare Controls.
+
+Reduces Pass 0b controls from ~85k to ~18-25k unique Master Controls via:
+  Phase 1: Intra-Group Dedup — same merge_group_hint → pick best, link rest
+           (85k → ~52k, mostly title-identical short-circuit, no embeddings)
+  Phase 2: Cross-Group Dedup — embed masters, search Qdrant for similar
+           masters with different hints (52k → ~18-25k)
+
+All Pass 0b controls have pattern_id=NULL. The primary grouping key is
+merge_group_hint (format: "action_type:norm_obj:trigger_key"), which
+encodes the normalized action, object, and trigger.
+
+Usage:
+    runner = BatchDedupRunner(db)
+    stats = await runner.run(dry_run=True)       # preview
+    stats = await runner.run(dry_run=False)       # execute
+    stats = await runner.run(hint_filter="implement:multi_factor_auth:none")
+"""
+
+import json
+import logging
+import time
+from collections import defaultdict
+
+from sqlalchemy import text
+
+from compliance.services.control_dedup import (
+    canonicalize_text,
+    ensure_qdrant_collection,
+    get_embedding,
+    normalize_action,
+    normalize_object,
+    qdrant_search_cross_regulation,
+    qdrant_upsert,
+    LINK_THRESHOLD,
+    REVIEW_THRESHOLD,
+)
+
+logger = logging.getLogger(__name__)
+
+DEDUP_COLLECTION = "atomic_controls_dedup"
+
+
+# ── Quality Score ────────────────────────────────────────────────────────
+
+
+def quality_score(control: dict) -> float:
+    """Score a control by richness of requirements, tests, evidence, and objective.
+
+    Higher score = better candidate for master control.
+    """
+    score = 0.0
+
+    reqs = control.get("requirements") or "[]"
+    if isinstance(reqs, str):
+        try:
+            reqs = json.loads(reqs)
+        except (json.JSONDecodeError, TypeError):
+            reqs = []
+    score += len(reqs) * 2.0
+
+    tests = control.get("test_procedure") or "[]"
+    if isinstance(tests, str):
+        try:
+            tests = json.loads(tests)
+        except (json.JSONDecodeError, TypeError):
+            tests = []
+    score += len(tests) * 1.5
+
+    evidence = control.get("evidence") or "[]"
+    if isinstance(evidence, str):
+        try:
+            evidence = json.loads(evidence)
+        except (json.JSONDecodeError, TypeError):
+            evidence = []
+    score += len(evidence) * 1.0
+
+    objective = control.get("objective") or ""
+    score += min(len(objective) / 200, 3.0)
+
+    return score
+
+
+# ── Batch Dedup Runner ───────────────────────────────────────────────────
+
+
+class BatchDedupRunner:
+    """Batch dedup orchestrator for existing Pass 0b atomic controls."""
+
+    def __init__(self, db, collection: str = DEDUP_COLLECTION):
+        self.db = db
+        self.collection = collection
+        self.stats = {
+            "total_controls": 0,
+            "unique_hints": 0,
+            "phase1_groups_processed": 0,
+            "masters": 0,
+            "linked": 0,
+            "review": 0,
+            "new_controls": 0,
+            "parent_links_transferred": 0,
+            "cross_group_linked": 0,
+            "cross_group_review": 0,
+            "errors": 0,
+            "skipped_title_identical": 0,
+        }
+        self._progress_phase = ""
+        self._progress_count = 0
+        self._progress_total = 0
+
+    async def run(
+        self,
+        dry_run: bool = False,
+        hint_filter: str = None,
+    ) -> dict:
+        """Run the full batch dedup pipeline.
+
+        Args:
+            dry_run: If True, compute stats but don't modify DB/Qdrant.
+            hint_filter: If set, only process groups matching this hint prefix.
+
+        Returns:
+            Stats dict with counts.
+        """
+        start = time.monotonic()
+        logger.info("BatchDedup starting (dry_run=%s, hint_filter=%s)",
+                     dry_run, hint_filter)
+
+        if not dry_run:
+            await ensure_qdrant_collection(collection=self.collection)
+
+        # Phase 1: Intra-group dedup (same merge_group_hint)
+        self._progress_phase = "phase1"
+        groups = self._load_merge_groups(hint_filter)
+        self._progress_total = self.stats["total_controls"]
+
+        for hint, controls in groups:
+            try:
+                await self._process_hint_group(hint, controls, dry_run)
+                self.stats["phase1_groups_processed"] += 1
+            except Exception as e:
+                logger.error("BatchDedup Phase 1 error on hint %s: %s", hint, e)
+                self.stats["errors"] += 1
+                try:
+                    self.db.rollback()
+                except Exception:
+                    pass
+
+        logger.info(
+            "BatchDedup Phase 1 done: %d masters, %d linked, %d review",
+            self.stats["masters"], self.stats["linked"], self.stats["review"],
+        )
+
+        # Phase 2: Cross-group dedup via embeddings
+        if not dry_run:
+            self._progress_phase = "phase2"
+            await self._run_cross_group_pass()
+
+        elapsed = time.monotonic() - start
+        self.stats["elapsed_seconds"] = round(elapsed, 1)
+        logger.info("BatchDedup completed in %.1fs: %s", elapsed, self.stats)
+        return self.stats
+
+    def _load_merge_groups(self, hint_filter: str = None) -> list:
+        """Load all Pass 0b controls grouped by merge_group_hint, largest first."""
+        conditions = [
+            "decomposition_method = 'pass0b'",
+            "release_state != 'deprecated'",
+            "release_state != 'duplicate'",
+        ]
+        params = {}
+
+        if hint_filter:
+            conditions.append("generation_metadata->>'merge_group_hint' LIKE :hf")
+            params["hf"] = f"{hint_filter}%"
+
+        where = " AND ".join(conditions)
+        rows = self.db.execute(text(f"""
+            SELECT id::text, control_id, title, objective,
+                   pattern_id, requirements::text, test_procedure::text,
+                   evidence::text, release_state,
+                   generation_metadata->>'merge_group_hint' as merge_group_hint,
+                   generation_metadata->>'action_object_class' as action_object_class
+            FROM canonical_controls
+            WHERE {where}
+            ORDER BY control_id
+        """), params).fetchall()
+
+        by_hint = defaultdict(list)
+        for r in rows:
+            by_hint[r[9] or ""].append({
+                "uuid": r[0],
+                "control_id": r[1],
+                "title": r[2],
+                "objective": r[3],
+                "pattern_id": r[4],
+                "requirements": r[5],
+                "test_procedure": r[6],
+                "evidence": r[7],
+                "release_state": r[8],
+                "merge_group_hint": r[9] or "",
+                "action_object_class": r[10] or "",
+            })
+
+        self.stats["total_controls"] = len(rows)
+        self.stats["unique_hints"] = len(by_hint)
+
+        sorted_groups = sorted(by_hint.items(), key=lambda x: len(x[1]), reverse=True)
+        logger.info("BatchDedup loaded %d controls in %d hint groups",
+                     len(rows), len(sorted_groups))
+        return sorted_groups
+
+    def _sub_group_by_merge_hint(self, controls: list) -> dict:
+        """Group controls by merge_group_hint composite key."""
+        groups = defaultdict(list)
+        for c in controls:
+            hint = c["merge_group_hint"]
+            if hint:
+                groups[hint].append(c)
+            else:
+                groups[f"__no_hint_{c['uuid']}"].append(c)
+        return dict(groups)
+
+    async def _process_hint_group(
+        self,
+        hint: str,
+        controls: list,
+        dry_run: bool,
+    ):
+        """Process all controls sharing the same merge_group_hint.
+
+        Within a hint group, all controls share action+object+trigger.
+        The best-quality control becomes master, rest are linked as duplicates.
+        """
+        if len(controls) < 2:
+            # Singleton → always master
+            self.stats["masters"] += 1
+            if not dry_run:
+                await self._embed_and_index(controls[0])
+            self._progress_count += 1
+            self._log_progress(hint)
+            return
+
+        # Sort by quality score (best first)
+        sorted_group = sorted(controls, key=quality_score, reverse=True)
+        master = sorted_group[0]
+        self.stats["masters"] += 1
+
+        if not dry_run:
+            await self._embed_and_index(master)
+
+        for candidate in sorted_group[1:]:
+            # All share the same hint → check title similarity
+            if candidate["title"].strip().lower() == master["title"].strip().lower():
+                # Identical title → direct link (no embedding needed)
+                self.stats["linked"] += 1
+                self.stats["skipped_title_identical"] += 1
+                if not dry_run:
+                    await self._mark_duplicate(master, candidate, confidence=1.0)
+            else:
+                # Different title within same hint → still likely duplicate
+                # Use embedding to verify
+                await self._check_and_link_within_group(master, candidate, dry_run)
+
+            self._progress_count += 1
+            self._log_progress(hint)
+
+    async def _check_and_link_within_group(
+        self,
+        master: dict,
+        candidate: dict,
+        dry_run: bool,
+    ):
+        """Check if candidate (same hint group) is duplicate of master via embedding."""
+        parts = candidate["merge_group_hint"].split(":", 2)
+        action = parts[0] if len(parts) > 0 else ""
+        obj = parts[1] if len(parts) > 1 else ""
+
+        canonical = canonicalize_text(action, obj, candidate["title"])
+        embedding = await get_embedding(canonical)
+
+        if not embedding:
+            # Can't embed → link anyway (same hint = same action+object)
+            self.stats["linked"] += 1
+            if not dry_run:
+                await self._mark_duplicate(master, candidate, confidence=0.90)
+            return
+
+        # Search the dedup collection (unfiltered — pattern_id is NULL)
+        results = await qdrant_search_cross_regulation(
+            embedding, top_k=3, collection=self.collection,
+        )
+
+        if not results:
+            # No Qdrant matches yet (master might not be indexed yet) → link to master
+            self.stats["linked"] += 1
+            if not dry_run:
+                await self._mark_duplicate(master, candidate, confidence=0.90)
+            return
+
+        best = results[0]
+        best_score = best.get("score", 0.0)
+        best_payload = best.get("payload", {})
+        best_uuid = best_payload.get("control_uuid", "")
+
+        if best_score > LINK_THRESHOLD:
+            self.stats["linked"] += 1
+            if not dry_run:
+                await self._mark_duplicate_to(best_uuid, candidate, confidence=best_score)
+        elif best_score > REVIEW_THRESHOLD:
+            self.stats["review"] += 1
+            if not dry_run:
+                self._write_review(candidate, best_payload, best_score)
+        else:
+            # Very different despite same hint → new master
+            self.stats["new_controls"] += 1
+            if not dry_run:
+                await self._index_with_embedding(candidate, embedding)
+
+    async def _run_cross_group_pass(self):
+        """Phase 2: Find cross-group duplicates among surviving masters.
+
+        After Phase 1, ~52k masters remain. Many have similar semantics
+        despite different merge_group_hints (e.g. different German spellings).
+        This pass embeds all masters and finds near-duplicates via Qdrant.
+        """
+        logger.info("BatchDedup Phase 2: Cross-group pass starting...")
+
+        rows = self.db.execute(text("""
+            SELECT id::text, control_id, title,
+                   generation_metadata->>'merge_group_hint' as merge_group_hint
+            FROM canonical_controls
+            WHERE decomposition_method = 'pass0b'
+              AND release_state != 'duplicate'
+              AND release_state != 'deprecated'
+            ORDER BY control_id
+        """)).fetchall()
+
+        self._progress_total = len(rows)
+        self._progress_count = 0
+        logger.info("BatchDedup Cross-group: %d masters to check", len(rows))
+        cross_linked = 0
+        cross_review = 0
+
+        for i, r in enumerate(rows):
+            uuid = r[0]
+            hint = r[3] or ""
+            parts = hint.split(":", 2)
+            action = parts[0] if len(parts) > 0 else ""
+            obj = parts[1] if len(parts) > 1 else ""
+
+            canonical = canonicalize_text(action, obj, r[2])
+            embedding = await get_embedding(canonical)
+            if not embedding:
+                continue
+
+            results = await qdrant_search_cross_regulation(
+                embedding, top_k=5, collection=self.collection,
+            )
+            if not results:
+                continue
+
+            # Find best match from a DIFFERENT hint group
+            for match in results:
+                match_score = match.get("score", 0.0)
+                match_payload = match.get("payload", {})
+                match_uuid = match_payload.get("control_uuid", "")
+
+                # Skip self-match
+                if match_uuid == uuid:
+                    continue
+
+                # Must be a different hint group (otherwise already handled in Phase 1)
+                match_action = match_payload.get("action_normalized", "")
+                match_object = match_payload.get("object_normalized", "")
+                # Simple check: different control UUID is enough
+                if match_score > LINK_THRESHOLD:
+                    # Mark the worse one as duplicate
+                    try:
+                        self.db.execute(text("""
+                            UPDATE canonical_controls
+                            SET release_state = 'duplicate', merged_into_uuid = CAST(:master AS uuid)
+                            WHERE id = CAST(:dup AS uuid)
+                              AND release_state != 'duplicate'
+                        """), {"master": match_uuid, "dup": uuid})
+
+                        self.db.execute(text("""
+                            INSERT INTO control_parent_links
+                                (control_uuid, parent_control_uuid, link_type, confidence)
+                            VALUES (CAST(:cu AS uuid), CAST(:pu AS uuid), 'cross_regulation', :conf)
+                            ON CONFLICT (control_uuid, parent_control_uuid) DO NOTHING
+                        """), {"cu": match_uuid, "pu": uuid, "conf": match_score})
+
+                        # Transfer parent links
+                        transferred = self._transfer_parent_links(match_uuid, uuid)
+                        self.stats["parent_links_transferred"] += transferred
+
+                        self.db.commit()
+                        cross_linked += 1
+                    except Exception as e:
+                        logger.error("BatchDedup cross-group link error %s→%s: %s",
+                                     uuid, match_uuid, e)
+                        self.db.rollback()
+                        self.stats["errors"] += 1
+                    break  # Only one cross-link per control
+                elif match_score > REVIEW_THRESHOLD:
+                    self._write_review(
+                        {"control_id": r[1], "title": r[2], "objective": "",
+                         "merge_group_hint": hint, "pattern_id": None},
+                        match_payload, match_score,
+                    )
+                    cross_review += 1
+                    break
+
+            self._progress_count = i + 1
+            if (i + 1) % 500 == 0:
+                logger.info("BatchDedup Cross-group: %d/%d checked, %d linked, %d review",
+                            i + 1, len(rows), cross_linked, cross_review)
+
+        self.stats["cross_group_linked"] = cross_linked
+        self.stats["cross_group_review"] = cross_review
+        logger.info("BatchDedup Cross-group complete: %d linked, %d review",
+                     cross_linked, cross_review)
+
+    # ── Qdrant Helpers ───────────────────────────────────────────────────
+
+    async def _embed_and_index(self, control: dict):
+        """Compute embedding and index a control in the dedup Qdrant collection."""
+        parts = control["merge_group_hint"].split(":", 2)
+        action = parts[0] if len(parts) > 0 else ""
+        obj = parts[1] if len(parts) > 1 else ""
+
+        norm_action = normalize_action(action)
+        norm_object = normalize_object(obj)
+        canonical = canonicalize_text(action, obj, control["title"])
+        embedding = await get_embedding(canonical)
+
+        if not embedding:
+            return
+
+        await qdrant_upsert(
+            point_id=control["uuid"],
+            embedding=embedding,
+            payload={
+                "control_uuid": control["uuid"],
+                "control_id": control["control_id"],
+                "title": control["title"],
+                "pattern_id": control.get("pattern_id"),
+                "action_normalized": norm_action,
+                "object_normalized": norm_object,
+                "canonical_text": canonical,
+                "merge_group_hint": control["merge_group_hint"],
+            },
+            collection=self.collection,
+        )
+
+    async def _index_with_embedding(self, control: dict, embedding: list):
+        """Index a control with a pre-computed embedding."""
+        parts = control["merge_group_hint"].split(":", 2)
+        action = parts[0] if len(parts) > 0 else ""
+        obj = parts[1] if len(parts) > 1 else ""
+
+        norm_action = normalize_action(action)
+        norm_object = normalize_object(obj)
+        canonical = canonicalize_text(action, obj, control["title"])
+
+        await qdrant_upsert(
+            point_id=control["uuid"],
+            embedding=embedding,
+            payload={
+                "control_uuid": control["uuid"],
+                "control_id": control["control_id"],
+                "title": control["title"],
+                "pattern_id": control.get("pattern_id"),
+                "action_normalized": norm_action,
+                "object_normalized": norm_object,
+                "canonical_text": canonical,
+                "merge_group_hint": control["merge_group_hint"],
+            },
+            collection=self.collection,
+        )
+
+    # ── DB Write Helpers ─────────────────────────────────────────────────
+
+    async def _mark_duplicate(self, master: dict, candidate: dict, confidence: float):
+        """Mark candidate as duplicate of master, transfer parent links."""
+        try:
+            self.db.execute(text("""
+                UPDATE canonical_controls
+                SET release_state = 'duplicate', merged_into_uuid = CAST(:master AS uuid)
+                WHERE id = CAST(:cand AS uuid)
+            """), {"master": master["uuid"], "cand": candidate["uuid"]})
+
+            self.db.execute(text("""
+                INSERT INTO control_parent_links
+                    (control_uuid, parent_control_uuid, link_type, confidence)
+                VALUES (CAST(:master AS uuid), CAST(:cand_parent AS uuid), 'dedup_merge', :conf)
+                ON CONFLICT (control_uuid, parent_control_uuid) DO NOTHING
+            """), {"master": master["uuid"], "cand_parent": candidate["uuid"], "conf": confidence})
+
+            transferred = self._transfer_parent_links(master["uuid"], candidate["uuid"])
+            self.stats["parent_links_transferred"] += transferred
+
+            self.db.commit()
+        except Exception as e:
+            logger.error("BatchDedup _mark_duplicate error %s→%s: %s",
+                         candidate["uuid"], master["uuid"], e)
+            self.db.rollback()
+            raise
+
+    async def _mark_duplicate_to(self, master_uuid: str, candidate: dict, confidence: float):
+        """Mark candidate as duplicate of a Qdrant-matched master."""
+        try:
+            self.db.execute(text("""
+                UPDATE canonical_controls
+                SET release_state = 'duplicate', merged_into_uuid = CAST(:master AS uuid)
+                WHERE id = CAST(:cand AS uuid)
+            """), {"master": master_uuid, "cand": candidate["uuid"]})
+
+            self.db.execute(text("""
+                INSERT INTO control_parent_links
+                    (control_uuid, parent_control_uuid, link_type, confidence)
+                VALUES (CAST(:master AS uuid), CAST(:cand_parent AS uuid), 'dedup_merge', :conf)
+                ON CONFLICT (control_uuid, parent_control_uuid) DO NOTHING
+            """), {"master": master_uuid, "cand_parent": candidate["uuid"], "conf": confidence})
+
+            transferred = self._transfer_parent_links(master_uuid, candidate["uuid"])
+            self.stats["parent_links_transferred"] += transferred
+
+            self.db.commit()
+        except Exception as e:
+            logger.error("BatchDedup _mark_duplicate_to error %s→%s: %s",
+                         candidate["uuid"], master_uuid, e)
+            self.db.rollback()
+            raise
+
+    def _transfer_parent_links(self, master_uuid: str, duplicate_uuid: str) -> int:
+        """Move existing parent links from duplicate to master."""
+        rows = self.db.execute(text("""
+            SELECT parent_control_uuid::text, link_type, confidence,
+                   source_regulation, source_article, obligation_candidate_id::text
+            FROM control_parent_links
+            WHERE control_uuid = CAST(:dup AS uuid)
+              AND link_type = 'decomposition'
+        """), {"dup": duplicate_uuid}).fetchall()
+
+        transferred = 0
+        for r in rows:
+            parent_uuid = r[0]
+            if parent_uuid == master_uuid:
+                continue
+            self.db.execute(text("""
+                INSERT INTO control_parent_links
+                    (control_uuid, parent_control_uuid, link_type, confidence,
+                     source_regulation, source_article, obligation_candidate_id)
+                VALUES (CAST(:cu AS uuid), CAST(:pu AS uuid), :lt, :conf,
+                        :sr, :sa, CAST(:oci AS uuid))
+                ON CONFLICT (control_uuid, parent_control_uuid) DO NOTHING
+            """), {
+                "cu": master_uuid,
+                "pu": parent_uuid,
+                "lt": r[1],
+                "conf": float(r[2]) if r[2] else 1.0,
+                "sr": r[3],
+                "sa": r[4],
+                "oci": r[5],
+            })
+            transferred += 1
+
+        return transferred
+
+    def _write_review(self, candidate: dict, matched_payload: dict, score: float):
+        """Write a dedup review entry for borderline matches."""
+        try:
+            self.db.execute(text("""
+                INSERT INTO control_dedup_reviews
+                    (candidate_control_id, candidate_title, candidate_objective,
+                     matched_control_uuid, matched_control_id,
+                     similarity_score, dedup_stage, dedup_details)
+                VALUES (:ccid, :ct, :co, CAST(:mcu AS uuid), :mci,
+                        :ss, 'batch_dedup', CAST(:dd AS jsonb))
+            """), {
+                "ccid": candidate["control_id"],
+                "ct": candidate["title"],
+                "co": candidate.get("objective", ""),
+                "mcu": matched_payload.get("control_uuid"),
+                "mci": matched_payload.get("control_id"),
+                "ss": score,
+                "dd": json.dumps({
+                    "merge_group_hint": candidate.get("merge_group_hint", ""),
+                    "pattern_id": candidate.get("pattern_id"),
+                }),
+            })
+            self.db.commit()
+        except Exception as e:
+            logger.error("BatchDedup _write_review error: %s", e)
+            self.db.rollback()
+            raise
+
+    # ── Progress ─────────────────────────────────────────────────────────
+
+    def _log_progress(self, hint: str):
+        """Log progress every 500 controls."""
+        if self._progress_count > 0 and self._progress_count % 500 == 0:
+            logger.info(
+                "BatchDedup [%s] %d/%d — masters=%d, linked=%d, review=%d",
+                self._progress_phase, self._progress_count, self._progress_total,
+                self.stats["masters"], self.stats["linked"], self.stats["review"],
+            )
+
+    def get_status(self) -> dict:
+        """Return current progress stats (for status endpoint)."""
+        return {
+            "phase": self._progress_phase,
+            "progress": self._progress_count,
+            "total": self._progress_total,
+            **self.stats,
+        }
@@ -0,0 +1,438 @@
+"""
+Citation Backfill Service — enrich existing controls with article/paragraph provenance.
+
+3-tier matching strategy:
+  Tier 1 — Hash match:  sha256(source_original_text) → RAG chunk lookup
+  Tier 2 — Regex parse: split concatenated "DSGVO Art. 35" → regulation + article
+  Tier 3 — Ollama LLM:  ask local LLM to identify article/paragraph from text
+"""
+
+import hashlib
+import json
+import logging
+import os
+import re
+from dataclasses import dataclass, field
+from datetime import datetime, timezone
+from typing import Optional
+
+import httpx
+from sqlalchemy import text
+from sqlalchemy.orm import Session
+
+from .rag_client import ComplianceRAGClient, RAGSearchResult
+
+logger = logging.getLogger(__name__)
+
+OLLAMA_URL = os.getenv("OLLAMA_URL", "http://host.docker.internal:11434")
+OLLAMA_MODEL = os.getenv("CONTROL_GEN_OLLAMA_MODEL", "qwen3.5:35b-a3b")
+LLM_TIMEOUT = float(os.getenv("CONTROL_GEN_LLM_TIMEOUT", "180"))
+
+ALL_COLLECTIONS = [
+    "bp_compliance_ce",
+    "bp_compliance_gesetze",
+    "bp_compliance_datenschutz",
+    "bp_dsfa_corpus",
+    "bp_legal_templates",
+]
+
+BACKFILL_SYSTEM_PROMPT = (
+    "Du bist ein Rechtsexperte. Deine Aufgabe ist es, aus einem Gesetzestext "
+    "den genauen Artikel und Absatz zu bestimmen. Antworte NUR mit validem JSON."
+)
+
+# Regex to split concatenated source like "DSGVO Art. 35" or "NIS2 Artikel 21 Abs. 2"
+_SOURCE_ARTICLE_RE = re.compile(
+    r"^(.+?)\s+(Art(?:ikel)?\.?\s*\d+.*)$", re.IGNORECASE
+)
+
+
+@dataclass
+class MatchResult:
+    article: str
+    paragraph: str
+    method: str  # "hash", "regex", "llm"
+
+
+@dataclass
+class BackfillResult:
+    total_controls: int = 0
+    matched_hash: int = 0
+    matched_regex: int = 0
+    matched_llm: int = 0
+    unmatched: int = 0
+    updated: int = 0
+    errors: list = field(default_factory=list)
+
+
+class CitationBackfill:
+    """Backfill article/paragraph into existing control source_citations."""
+
+    def __init__(self, db: Session, rag_client: ComplianceRAGClient):
+        self.db = db
+        self.rag = rag_client
+        self._rag_index: dict[str, RAGSearchResult] = {}
+
+    async def run(self, dry_run: bool = True, limit: int = 0) -> BackfillResult:
+        """Main entry: iterate controls missing article/paragraph, match to RAG, update."""
+        result = BackfillResult()
+
+        # Load controls needing backfill
+        controls = self._load_controls_needing_backfill(limit)
+        result.total_controls = len(controls)
+        logger.info("Backfill: %d controls need article/paragraph enrichment", len(controls))
+
+        if not controls:
+            return result
+
+        # Collect hashes we need to find — only build index for controls with source text
+        needed_hashes: set[str] = set()
+        for ctrl in controls:
+            src = ctrl.get("source_original_text")
+            if src:
+                needed_hashes.add(hashlib.sha256(src.encode()).hexdigest())
+
+        if needed_hashes:
+            # Build targeted RAG index — only scroll collections that our controls reference
+            logger.info("Building targeted RAG hash index for %d source texts...", len(needed_hashes))
+            await self._build_rag_index_targeted(controls)
+            logger.info("RAG index built: %d chunks indexed, %d hashes needed", len(self._rag_index), len(needed_hashes))
+        else:
+            logger.info("No source_original_text found — skipping RAG index build")
+
+        # Process each control
+        for i, ctrl in enumerate(controls):
+            if i > 0 and i % 100 == 0:
+                logger.info("Backfill progress: %d/%d processed", i, result.total_controls)
+
+            try:
+                match = await self._match_control(ctrl)
+                if match:
+                    if match.method == "hash":
+                        result.matched_hash += 1
+                    elif match.method == "regex":
+                        result.matched_regex += 1
+                    elif match.method == "llm":
+                        result.matched_llm += 1
+
+                    if not dry_run:
+                        self._update_control(ctrl, match)
+                        result.updated += 1
+                    else:
+                        logger.debug(
+                            "DRY RUN: Would update %s with article=%s paragraph=%s (method=%s)",
+                            ctrl["control_id"], match.article, match.paragraph, match.method,
+                        )
+                else:
+                    result.unmatched += 1
+
+            except Exception as e:
+                error_msg = f"Error backfilling {ctrl.get('control_id', '?')}: {e}"
+                logger.error(error_msg)
+                result.errors.append(error_msg)
+
+        if not dry_run:
+            try:
+                self.db.commit()
+            except Exception as e:
+                logger.error("Backfill commit failed: %s", e)
+                result.errors.append(f"Commit failed: {e}")
+
+        logger.info(
+            "Backfill complete: %d total, hash=%d regex=%d llm=%d unmatched=%d updated=%d",
+            result.total_controls, result.matched_hash, result.matched_regex,
+            result.matched_llm, result.unmatched, result.updated,
+        )
+        return result
+
+    def _load_controls_needing_backfill(self, limit: int = 0) -> list[dict]:
+        """Load controls where source_citation exists but lacks separate 'article' key."""
+        query = """
+            SELECT id, control_id, source_citation, source_original_text,
+                   generation_metadata, license_rule
+            FROM canonical_controls
+            WHERE license_rule IN (1, 2)
+              AND source_citation IS NOT NULL
+              AND (
+                  source_citation->>'article' IS NULL
+                  OR source_citation->>'article' = ''
+              )
+            ORDER BY control_id
+        """
+        if limit > 0:
+            query += f" LIMIT {limit}"
+
+        result = self.db.execute(text(query))
+        cols = result.keys()
+        controls = []
+        for row in result:
+            ctrl = dict(zip(cols, row))
+            ctrl["id"] = str(ctrl["id"])
+            # Parse JSON fields
+            for jf in ("source_citation", "generation_metadata"):
+                if isinstance(ctrl.get(jf), str):
+                    try:
+                        ctrl[jf] = json.loads(ctrl[jf])
+                    except (json.JSONDecodeError, TypeError):
+                        ctrl[jf] = {}
+            controls.append(ctrl)
+        return controls
+
+    async def _build_rag_index_targeted(self, controls: list[dict]):
+        """Build RAG index by scrolling only collections relevant to our controls.
+
+        Uses regulation codes from generation_metadata to identify which collections
+        to search, falling back to all collections only if needed.
+        """
+        # Determine which collections are relevant based on regulation codes
+        regulation_to_collection = self._map_regulations_to_collections(controls)
+        collections_to_search = set(regulation_to_collection.values()) or set(ALL_COLLECTIONS)
+
+        logger.info("Targeted index: searching %d collections: %s",
+                     len(collections_to_search), ", ".join(collections_to_search))
+
+        for collection in collections_to_search:
+            offset = None
+            page = 0
+            seen_offsets: set[str] = set()
+            while True:
+                chunks, next_offset = await self.rag.scroll(
+                    collection=collection, offset=offset, limit=200,
+                )
+                if not chunks:
+                    break
+                for chunk in chunks:
+                    if chunk.text and len(chunk.text.strip()) >= 50:
+                        h = hashlib.sha256(chunk.text.encode()).hexdigest()
+                        self._rag_index[h] = chunk
+                page += 1
+                if page % 50 == 0:
+                    logger.info("Indexing %s: page %d (%d chunks so far)",
+                                collection, page, len(self._rag_index))
+                if not next_offset:
+                    break
+                if next_offset in seen_offsets:
+                    logger.warning("Scroll loop in %s at page %d — stopping", collection, page)
+                    break
+                seen_offsets.add(next_offset)
+                offset = next_offset
+
+            logger.info("Indexed collection %s: %d pages", collection, page)
+
+    def _map_regulations_to_collections(self, controls: list[dict]) -> dict[str, str]:
+        """Map regulation codes from controls to likely Qdrant collections."""
+        # Heuristic: regulation code prefix → collection
+        collection_map = {
+            "eu_": "bp_compliance_gesetze",
+            "dsgvo": "bp_compliance_datenschutz",
+            "bdsg": "bp_compliance_gesetze",
+            "ttdsg": "bp_compliance_gesetze",
+            "nist_": "bp_compliance_ce",
+            "owasp": "bp_compliance_ce",
+            "bsi_": "bp_compliance_ce",
+            "enisa": "bp_compliance_ce",
+            "at_": "bp_compliance_recht",
+            "fr_": "bp_compliance_recht",
+            "es_": "bp_compliance_recht",
+        }
+        result: dict[str, str] = {}
+        for ctrl in controls:
+            meta = ctrl.get("generation_metadata") or {}
+            reg = meta.get("source_regulation", "")
+            if not reg:
+                continue
+            for prefix, coll in collection_map.items():
+                if reg.startswith(prefix):
+                    result[reg] = coll
+                    break
+            else:
+                # Unknown regulation — search all
+                for coll in ALL_COLLECTIONS:
+                    result[f"_all_{coll}"] = coll
+        return result
+
+    async def _match_control(self, ctrl: dict) -> Optional[MatchResult]:
+        """3-tier matching: hash → regex → LLM."""
+
+        # Tier 1: Hash match against RAG index
+        source_text = ctrl.get("source_original_text")
+        if source_text:
+            h = hashlib.sha256(source_text.encode()).hexdigest()
+            chunk = self._rag_index.get(h)
+            if chunk and (chunk.article or chunk.paragraph):
+                return MatchResult(
+                    article=chunk.article or "",
+                    paragraph=chunk.paragraph or "",
+                    method="hash",
+                )
+
+        # Tier 2: Regex parse concatenated source
+        citation = ctrl.get("source_citation") or {}
+        source_str = citation.get("source", "")
+        parsed = _parse_concatenated_source(source_str)
+        if parsed and parsed["article"]:
+            return MatchResult(
+                article=parsed["article"],
+                paragraph="",  # Regex can't extract paragraph from concatenated format
+                method="regex",
+            )
+
+        # Tier 3: Ollama LLM
+        if source_text:
+            return await self._llm_match(ctrl)
+
+        return None
+
+    async def _llm_match(self, ctrl: dict) -> Optional[MatchResult]:
+        """Use Ollama to identify article/paragraph from source text."""
+        citation = ctrl.get("source_citation") or {}
+        regulation_name = citation.get("source", "")
+        metadata = ctrl.get("generation_metadata") or {}
+        regulation_code = metadata.get("source_regulation", "")
+        source_text = ctrl.get("source_original_text", "")
+
+        prompt = f"""Analysiere den folgenden Gesetzestext und bestimme den genauen Artikel und Absatz.
+
+Gesetz: {regulation_name} (Code: {regulation_code})
+
+Text:
+---
+{source_text[:2000]}
+---
+
+Antworte NUR mit JSON:
+{{"article": "Art. XX", "paragraph": "Abs. Y"}}
+
+Falls kein spezifischer Absatz erkennbar ist, setze paragraph auf "".
+Falls kein Artikel erkennbar ist, setze article auf "".
+Bei deutschen Gesetzen mit § verwende: "§ XX" statt "Art. XX"."""
+
+        try:
+            raw = await _llm_ollama(prompt, BACKFILL_SYSTEM_PROMPT)
+            data = _parse_json(raw)
+            if data and (data.get("article") or data.get("paragraph")):
+                return MatchResult(
+                    article=data.get("article", ""),
+                    paragraph=data.get("paragraph", ""),
+                    method="llm",
+                )
+        except Exception as e:
+            logger.warning("LLM match failed for %s: %s", ctrl.get("control_id"), e)
+
+        return None
+
+    def _update_control(self, ctrl: dict, match: MatchResult):
+        """Update source_citation and generation_metadata in DB."""
+        citation = ctrl.get("source_citation") or {}
+
+        # Clean the source name: remove concatenated article if present
+        source_str = citation.get("source", "")
+        parsed = _parse_concatenated_source(source_str)
+        if parsed:
+            citation["source"] = parsed["name"]
+
+        # Add separate article/paragraph fields
+        citation["article"] = match.article
+        citation["paragraph"] = match.paragraph
+
+        # Update generation_metadata
+        metadata = ctrl.get("generation_metadata") or {}
+        if match.article:
+            metadata["source_article"] = match.article
+        metadata["source_paragraph"] = match.paragraph
+        metadata["backfill_method"] = match.method
+        metadata["backfill_at"] = datetime.now(timezone.utc).isoformat()
+
+        self.db.execute(
+            text("""
+                UPDATE canonical_controls
+                SET source_citation = :citation,
+                    generation_metadata = :metadata,
+                    updated_at = NOW()
+                WHERE id = CAST(:id AS uuid)
+            """),
+            {
+                "id": ctrl["id"],
+                "citation": json.dumps(citation),
+                "metadata": json.dumps(metadata),
+            },
+        )
+
+
+def _parse_concatenated_source(source: str) -> Optional[dict]:
+    """Parse 'DSGVO Art. 35' → {name: 'DSGVO', article: 'Art. 35'}.
+
+    Also handles '§' format: 'BDSG § 42' → {name: 'BDSG', article: '§ 42'}.
+    """
+    if not source:
+        return None
+
+    # Try Art./Artikel pattern
+    m = _SOURCE_ARTICLE_RE.match(source)
+    if m:
+        return {"name": m.group(1).strip(), "article": m.group(2).strip()}
+
+    # Try § pattern
+    m2 = re.match(r"^(.+?)\s+(§\s*\d+.*)$", source)
+    if m2:
+        return {"name": m2.group(1).strip(), "article": m2.group(2).strip()}
+
+    return None
+
+
+async def _llm_ollama(prompt: str, system_prompt: Optional[str] = None) -> str:
+    """Call Ollama chat API for backfill matching."""
+    messages = []
+    if system_prompt:
+        messages.append({"role": "system", "content": system_prompt})
+    messages.append({"role": "user", "content": prompt})
+
+    payload = {
+        "model": OLLAMA_MODEL,
+        "messages": messages,
+        "stream": False,
+        "format": "json",
+        "options": {"num_predict": 256},
+        "think": False,
+    }
+
+    try:
+        async with httpx.AsyncClient(timeout=LLM_TIMEOUT) as client:
+            resp = await client.post(f"{OLLAMA_URL}/api/chat", json=payload)
+            if resp.status_code != 200:
+                logger.error("Ollama backfill failed %d: %s", resp.status_code, resp.text[:300])
+                return ""
+            data = resp.json()
+            msg = data.get("message", {})
+            if isinstance(msg, dict):
+                return msg.get("content", "")
+            return data.get("response", str(msg))
+    except Exception as e:
+        logger.error("Ollama backfill request failed: %s", e)
+        return ""
+
+
+def _parse_json(raw: str) -> Optional[dict]:
+    """Extract JSON object from LLM output."""
+    if not raw:
+        return None
+    # Try direct parse
+    try:
+        return json.loads(raw)
+    except json.JSONDecodeError:
+        pass
+    # Try extracting from markdown code block
+    m = re.search(r"```(?:json)?\s*(\{.*?\})\s*```", raw, re.DOTALL)
+    if m:
+        try:
+            return json.loads(m.group(1))
+        except json.JSONDecodeError:
+            pass
+    # Try finding first { ... }
+    m = re.search(r"\{[^{}]*\}", raw)
+    if m:
+        try:
+            return json.loads(m.group(0))
+        except json.JSONDecodeError:
+            pass
+    return None
@@ -0,0 +1,546 @@
+"""Control Composer — Pattern + Obligation → Master Control.
+
+Takes an obligation (from ObligationExtractor) and a matched control pattern
+(from PatternMatcher), then uses LLM to compose a structured, actionable
+Master Control. Replaces the old Stage 3 (STRUCTURE/REFORM) with a
+pattern-guided approach.
+
+Three composition modes based on license rules:
+    Rule 1: Obligation + Pattern + original text → full control
+    Rule 2: Obligation + Pattern + original text + citation → control
+    Rule 3: Obligation + Pattern (NO original text) → reformulated control
+
+Fallback: No pattern match → basic generation (tagged needs_pattern_assignment)
+
+Part of the Multi-Layer Control Architecture (Phase 6 of 8).
+"""
+
+import json
+import logging
+import os
+from dataclasses import dataclass, field
+from typing import Optional
+
+from compliance.services.obligation_extractor import (
+    ObligationMatch,
+    _llm_ollama,
+    _parse_json,
+)
+from compliance.services.pattern_matcher import (
+    ControlPattern,
+    PatternMatchResult,
+)
+
+logger = logging.getLogger(__name__)
+
+OLLAMA_MODEL = os.getenv("CONTROL_GEN_OLLAMA_MODEL", "qwen3.5:35b-a3b")
+
+# Valid values for generated control fields
+VALID_SEVERITIES = {"low", "medium", "high", "critical"}
+VALID_EFFORTS = {"s", "m", "l", "xl"}
+VALID_VERIFICATION = {"code_review", "document", "tool", "hybrid"}
+
+
+@dataclass
+class ComposedControl:
+    """A Master Control composed from an obligation + pattern."""
+
+    # Core fields (match canonical_controls schema)
+    control_id: str = ""
+    title: str = ""
+    objective: str = ""
+    rationale: str = ""
+    scope: dict = field(default_factory=dict)
+    requirements: list = field(default_factory=list)
+    test_procedure: list = field(default_factory=list)
+    evidence: list = field(default_factory=list)
+    severity: str = "medium"
+    risk_score: float = 5.0
+    implementation_effort: str = "m"
+    open_anchors: list = field(default_factory=list)
+    release_state: str = "draft"
+    tags: list = field(default_factory=list)
+    # 3-Rule License fields
+    license_rule: Optional[int] = None
+    source_original_text: Optional[str] = None
+    source_citation: Optional[dict] = None
+    customer_visible: bool = True
+    # Classification
+    verification_method: Optional[str] = None
+    category: Optional[str] = None
+    target_audience: Optional[list] = None
+    # Pattern + Obligation linkage
+    pattern_id: Optional[str] = None
+    obligation_ids: list = field(default_factory=list)
+    # Metadata
+    generation_metadata: dict = field(default_factory=dict)
+    composition_method: str = "pattern_guided"  # pattern_guided | fallback
+
+    def to_dict(self) -> dict:
+        """Serialize for DB storage or API response."""
+        return {
+            "control_id": self.control_id,
+            "title": self.title,
+            "objective": self.objective,
+            "rationale": self.rationale,
+            "scope": self.scope,
+            "requirements": self.requirements,
+            "test_procedure": self.test_procedure,
+            "evidence": self.evidence,
+            "severity": self.severity,
+            "risk_score": self.risk_score,
+            "implementation_effort": self.implementation_effort,
+            "open_anchors": self.open_anchors,
+            "release_state": self.release_state,
+            "tags": self.tags,
+            "license_rule": self.license_rule,
+            "source_original_text": self.source_original_text,
+            "source_citation": self.source_citation,
+            "customer_visible": self.customer_visible,
+            "verification_method": self.verification_method,
+            "category": self.category,
+            "target_audience": self.target_audience,
+            "pattern_id": self.pattern_id,
+            "obligation_ids": self.obligation_ids,
+            "generation_metadata": self.generation_metadata,
+            "composition_method": self.composition_method,
+        }
+
+
+class ControlComposer:
+    """Composes Master Controls from obligations + patterns.
+
+    Usage::
+
+        composer = ControlComposer()
+
+        control = await composer.compose(
+            obligation=obligation_match,
+            pattern_result=pattern_match_result,
+            chunk_text="...",
+            license_rule=1,
+            source_citation={...},
+        )
+    """
+
+    async def compose(
+        self,
+        obligation: ObligationMatch,
+        pattern_result: PatternMatchResult,
+        chunk_text: Optional[str] = None,
+        license_rule: int = 3,
+        source_citation: Optional[dict] = None,
+        regulation_code: Optional[str] = None,
+    ) -> ComposedControl:
+        """Compose a Master Control from obligation + pattern.
+
+        Args:
+            obligation: The extracted obligation (from ObligationExtractor).
+            pattern_result: The matched pattern (from PatternMatcher).
+            chunk_text: Original RAG chunk text (only used for Rules 1-2).
+            license_rule: 1=free, 2=citation, 3=restricted.
+            source_citation: Citation metadata for Rule 2.
+            regulation_code: Source regulation code.
+
+        Returns:
+            ComposedControl ready for storage.
+        """
+        pattern = pattern_result.pattern if pattern_result else None
+
+        if pattern:
+            control = await self._compose_with_pattern(
+                obligation, pattern, chunk_text, license_rule, source_citation,
+            )
+        else:
+            control = await self._compose_fallback(
+                obligation, chunk_text, license_rule, source_citation,
+            )
+
+        # Set linkage fields
+        control.pattern_id = pattern.id if pattern else None
+        if obligation.obligation_id:
+            control.obligation_ids = [obligation.obligation_id]
+
+        # Set license fields
+        control.license_rule = license_rule
+        if license_rule in (1, 2) and chunk_text:
+            control.source_original_text = chunk_text
+        if license_rule == 2 and source_citation:
+            control.source_citation = source_citation
+        if license_rule == 3:
+            control.customer_visible = False
+            control.source_original_text = None
+            control.source_citation = None
+
+        # Build metadata
+        control.generation_metadata = {
+            "composition_method": control.composition_method,
+            "pattern_id": control.pattern_id,
+            "pattern_confidence": round(pattern_result.confidence, 3) if pattern_result else 0,
+            "pattern_method": pattern_result.method if pattern_result else "none",
+            "obligation_id": obligation.obligation_id,
+            "obligation_method": obligation.method,
+            "obligation_confidence": round(obligation.confidence, 3),
+            "license_rule": license_rule,
+            "regulation_code": regulation_code,
+        }
+
+        # Validate and fix fields
+        _validate_control(control)
+
+        return control
+
+    async def compose_batch(
+        self,
+        items: list[dict],
+    ) -> list[ComposedControl]:
+        """Compose multiple controls.
+
+        Args:
+            items: List of dicts with keys: obligation, pattern_result,
+                   chunk_text, license_rule, source_citation, regulation_code.
+
+        Returns:
+            List of ComposedControl instances.
+        """
+        results = []
+        for item in items:
+            control = await self.compose(
+                obligation=item["obligation"],
+                pattern_result=item.get("pattern_result", PatternMatchResult()),
+                chunk_text=item.get("chunk_text"),
+                license_rule=item.get("license_rule", 3),
+                source_citation=item.get("source_citation"),
+                regulation_code=item.get("regulation_code"),
+            )
+            results.append(control)
+        return results
+
+    # -----------------------------------------------------------------------
+    # Pattern-guided composition
+    # -----------------------------------------------------------------------
+
+    async def _compose_with_pattern(
+        self,
+        obligation: ObligationMatch,
+        pattern: ControlPattern,
+        chunk_text: Optional[str],
+        license_rule: int,
+        source_citation: Optional[dict],
+    ) -> ComposedControl:
+        """Use LLM to fill the pattern template with obligation-specific details."""
+        prompt = _build_compose_prompt(obligation, pattern, chunk_text, license_rule)
+        system_prompt = _compose_system_prompt(license_rule)
+
+        llm_result = await _llm_ollama(prompt, system_prompt)
+        if not llm_result:
+            return self._compose_from_template(obligation, pattern)
+
+        parsed = _parse_json(llm_result)
+        if not parsed:
+            return self._compose_from_template(obligation, pattern)
+
+        control = ComposedControl(
+            title=parsed.get("title", pattern.name_de)[:255],
+            objective=parsed.get("objective", pattern.objective_template),
+            rationale=parsed.get("rationale", pattern.rationale_template),
+            requirements=_ensure_list(parsed.get("requirements", pattern.requirements_template)),
+            test_procedure=_ensure_list(parsed.get("test_procedure", pattern.test_procedure_template)),
+            evidence=_ensure_list(parsed.get("evidence", pattern.evidence_template)),
+            severity=parsed.get("severity", pattern.severity_default),
+            implementation_effort=parsed.get("implementation_effort", pattern.implementation_effort_default),
+            category=parsed.get("category", pattern.category),
+            tags=_ensure_list(parsed.get("tags", pattern.tags)),
+            target_audience=_ensure_list(parsed.get("target_audience", [])),
+            verification_method=parsed.get("verification_method"),
+            open_anchors=_anchors_from_pattern(pattern),
+            composition_method="pattern_guided",
+        )
+
+        return control
+
+    def _compose_from_template(
+        self,
+        obligation: ObligationMatch,
+        pattern: ControlPattern,
+    ) -> ComposedControl:
+        """Fallback: fill template directly without LLM (when LLM fails)."""
+        obl_title = obligation.obligation_title or ""
+        obl_text = obligation.obligation_text or ""
+
+        title = f"{pattern.name_de}"
+        if obl_title:
+            title = f"{pattern.name_de} — {obl_title}"
+
+        objective = pattern.objective_template
+        if obl_text and len(obl_text) > 20:
+            objective = f"{pattern.objective_template} Bezug: {obl_text[:200]}"
+
+        return ComposedControl(
+            title=title[:255],
+            objective=objective,
+            rationale=pattern.rationale_template,
+            requirements=list(pattern.requirements_template),
+            test_procedure=list(pattern.test_procedure_template),
+            evidence=list(pattern.evidence_template),
+            severity=pattern.severity_default,
+            implementation_effort=pattern.implementation_effort_default,
+            category=pattern.category,
+            tags=list(pattern.tags),
+            open_anchors=_anchors_from_pattern(pattern),
+            composition_method="template_only",
+        )
+
+    # -----------------------------------------------------------------------
+    # Fallback (no pattern)
+    # -----------------------------------------------------------------------
+
+    async def _compose_fallback(
+        self,
+        obligation: ObligationMatch,
+        chunk_text: Optional[str],
+        license_rule: int,
+        source_citation: Optional[dict],
+    ) -> ComposedControl:
+        """Generate a control without a pattern template (old-style)."""
+        prompt = _build_fallback_prompt(obligation, chunk_text, license_rule)
+        system_prompt = _compose_system_prompt(license_rule)
+
+        llm_result = await _llm_ollama(prompt, system_prompt)
+        parsed = _parse_json(llm_result) if llm_result else {}
+
+        obl_text = obligation.obligation_text or ""
+
+        control = ComposedControl(
+            title=parsed.get("title", obl_text[:100] if obl_text else "Untitled Control")[:255],
+            objective=parsed.get("objective", obl_text[:500]),
+            rationale=parsed.get("rationale", "Aus gesetzlicher Pflicht abgeleitet."),
+            requirements=_ensure_list(parsed.get("requirements", [])),
+            test_procedure=_ensure_list(parsed.get("test_procedure", [])),
+            evidence=_ensure_list(parsed.get("evidence", [])),
+            severity=parsed.get("severity", "medium"),
+            implementation_effort=parsed.get("implementation_effort", "m"),
+            category=parsed.get("category"),
+            tags=_ensure_list(parsed.get("tags", [])),
+            target_audience=_ensure_list(parsed.get("target_audience", [])),
+            verification_method=parsed.get("verification_method"),
+            composition_method="fallback",
+            release_state="needs_review",
+        )
+
+        return control
+
+
+# ---------------------------------------------------------------------------
+# Prompt builders
+# ---------------------------------------------------------------------------
+
+
+def _compose_system_prompt(license_rule: int) -> str:
+    """Build the system prompt based on license rule."""
+    if license_rule == 3:
+        return (
+            "Du bist ein Security-Compliance-Experte. Deine Aufgabe ist es, "
+            "eigenstaendige Security Controls zu formulieren. "
+            "Du formulierst IMMER in eigenen Worten. "
+            "KOPIERE KEINE Saetze aus dem Quelltext. "
+            "Verwende eigene Begriffe und Struktur. "
+            "NENNE NICHT die Quelle. Keine proprietaeren Bezeichner. "
+            "Antworte NUR mit validem JSON."
+        )
+    return (
+        "Du bist ein Security-Compliance-Experte. "
+        "Erstelle ein praxisorientiertes, umsetzbares Security Control. "
+        "Antworte NUR mit validem JSON."
+    )
+
+
+def _build_compose_prompt(
+    obligation: ObligationMatch,
+    pattern: ControlPattern,
+    chunk_text: Optional[str],
+    license_rule: int,
+) -> str:
+    """Build the LLM prompt for pattern-guided composition."""
+    obl_section = _obligation_section(obligation)
+    pattern_section = _pattern_section(pattern)
+
+    if license_rule == 3:
+        context_section = "KONTEXT: Intern analysiert (keine Quellenangabe)."
+    elif chunk_text:
+        context_section = f"KONTEXT (Originaltext):\n{chunk_text[:2000]}"
+    else:
+        context_section = "KONTEXT: Kein Originaltext verfuegbar."
+
+    return f"""Erstelle ein PRAXISORIENTIERTES Security Control.
+
+{obl_section}
+
+{pattern_section}
+
+{context_section}
+
+AUFGABE:
+Fuelle das Muster mit pflicht-spezifischen Details.
+Das Ergebnis muss UMSETZBAR sein — keine Gesetzesparaphrase.
+Formuliere konkret und handlungsorientiert.
+
+Antworte als JSON:
+{{
+  "title": "Kurzer praegnanter Titel (max 100 Zeichen, deutsch)",
+  "objective": "Was soll erreicht werden? (1-3 Saetze)",
+  "rationale": "Warum ist das wichtig? (1-2 Saetze)",
+  "requirements": ["Konkrete Anforderung 1", "Anforderung 2", ...],
+  "test_procedure": ["Pruefschritt 1", "Pruefschritt 2", ...],
+  "evidence": ["Nachweis 1", "Nachweis 2", ...],
+  "severity": "low|medium|high|critical",
+  "implementation_effort": "s|m|l|xl",
+  "category": "{pattern.category}",
+  "tags": ["tag1", "tag2"],
+  "target_audience": ["unternehmen", "behoerden", "entwickler"],
+  "verification_method": "code_review|document|tool|hybrid"
+}}"""
+
+
+def _build_fallback_prompt(
+    obligation: ObligationMatch,
+    chunk_text: Optional[str],
+    license_rule: int,
+) -> str:
+    """Build the LLM prompt for fallback composition (no pattern)."""
+    obl_section = _obligation_section(obligation)
+
+    if license_rule == 3:
+        context_section = "KONTEXT: Intern analysiert (keine Quellenangabe)."
+    elif chunk_text:
+        context_section = f"KONTEXT (Originaltext):\n{chunk_text[:2000]}"
+    else:
+        context_section = "KONTEXT: Kein Originaltext verfuegbar."
+
+    return f"""Erstelle ein Security Control aus der folgenden Pflicht.
+
+{obl_section}
+
+{context_section}
+
+AUFGABE:
+Formuliere ein umsetzbares Security Control.
+Keine Gesetzesparaphrase — konkrete Massnahmen beschreiben.
+
+Antworte als JSON:
+{{
+  "title": "Kurzer praegnanter Titel (max 100 Zeichen, deutsch)",
+  "objective": "Was soll erreicht werden? (1-3 Saetze)",
+  "rationale": "Warum ist das wichtig? (1-2 Saetze)",
+  "requirements": ["Konkrete Anforderung 1", "Anforderung 2", ...],
+  "test_procedure": ["Pruefschritt 1", "Pruefschritt 2", ...],
+  "evidence": ["Nachweis 1", "Nachweis 2", ...],
+  "severity": "low|medium|high|critical",
+  "implementation_effort": "s|m|l|xl",
+  "category": "one of: authentication, encryption, data_protection, etc.",
+  "tags": ["tag1", "tag2"],
+  "target_audience": ["unternehmen"],
+  "verification_method": "code_review|document|tool|hybrid"
+}}"""
+
+
+def _obligation_section(obligation: ObligationMatch) -> str:
+    """Format the obligation for the prompt."""
+    parts = ["PFLICHT (was das Gesetz verlangt):"]
+    if obligation.obligation_title:
+        parts.append(f"  Titel: {obligation.obligation_title}")
+    if obligation.obligation_text:
+        parts.append(f"  Beschreibung: {obligation.obligation_text[:500]}")
+    if obligation.obligation_id:
+        parts.append(f"  ID: {obligation.obligation_id}")
+    if obligation.regulation_id:
+        parts.append(f"  Rechtsgrundlage: {obligation.regulation_id}")
+    if not obligation.obligation_text and not obligation.obligation_title:
+        parts.append("  (Keine spezifische Pflicht extrahiert)")
+    return "\n".join(parts)
+
+
+def _pattern_section(pattern: ControlPattern) -> str:
+    """Format the pattern for the prompt."""
+    reqs = "\n    ".join(f"- {r}" for r in pattern.requirements_template[:5])
+    tests = "\n    ".join(f"- {t}" for t in pattern.test_procedure_template[:3])
+    return f"""MUSTER (wie man es typischerweise umsetzt):
+  Pattern: {pattern.name_de} ({pattern.id})
+  Domain: {pattern.domain}
+  Ziel-Template: {pattern.objective_template}
+  Anforderungs-Template:
+    {reqs}
+  Pruefverfahren-Template:
+    {tests}"""
+
+
+# ---------------------------------------------------------------------------
+# Helpers
+# ---------------------------------------------------------------------------
+
+
+def _ensure_list(value) -> list:
+    """Ensure a value is a list of strings."""
+    if isinstance(value, list):
+        return [str(v) for v in value if v]
+    if isinstance(value, str):
+        return [value]
+    return []
+
+
+def _anchors_from_pattern(pattern: ControlPattern) -> list:
+    """Convert pattern's open_anchor_refs to control anchor format."""
+    anchors = []
+    for ref in pattern.open_anchor_refs:
+        anchors.append({
+            "framework": ref.get("framework", ""),
+            "control_id": ref.get("ref", ""),
+            "title": "",
+            "alignment_score": 0.8,
+        })
+    return anchors
+
+
+def _validate_control(control: ComposedControl) -> None:
+    """Validate and fix control field values."""
+    # Severity
+    if control.severity not in VALID_SEVERITIES:
+        control.severity = "medium"
+
+    # Implementation effort
+    if control.implementation_effort not in VALID_EFFORTS:
+        control.implementation_effort = "m"
+
+    # Verification method
+    if control.verification_method and control.verification_method not in VALID_VERIFICATION:
+        control.verification_method = None
+
+    # Risk score
+    if not (0 <= control.risk_score <= 10):
+        control.risk_score = _severity_to_risk(control.severity)
+
+    # Title length
+    if len(control.title) > 255:
+        control.title = control.title[:252] + "..."
+
+    # Ensure minimum content
+    if not control.objective:
+        control.objective = control.title
+    if not control.rationale:
+        control.rationale = "Aus regulatorischer Anforderung abgeleitet."
+    if not control.requirements:
+        control.requirements = ["Anforderung gemaess Pflichtbeschreibung umsetzen"]
+    if not control.test_procedure:
+        control.test_procedure = ["Umsetzung der Anforderungen pruefen"]
+    if not control.evidence:
+        control.evidence = ["Dokumentation der Umsetzung"]
+
+
+def _severity_to_risk(severity: str) -> float:
+    """Map severity to a default risk score."""
+    return {
+        "critical": 9.0,
+        "high": 7.0,
+        "medium": 5.0,
+        "low": 3.0,
+    }.get(severity, 5.0)
@@ -0,0 +1,745 @@
+"""Control Deduplication Engine — 4-Stage Matching Pipeline.
+
+Prevents duplicate atomic controls during Pass 0b by checking candidates
+against existing controls before insertion.
+
+Stages:
+    1. Pattern-Gate:  pattern_id must match (hard gate)
+    2. Action-Check:  normalized action verb must match (hard gate)
+    3. Object-Norm:   normalized object must match (soft gate with high threshold)
+    4. Embedding:     cosine similarity with tiered thresholds (Qdrant)
+
+Verdicts:
+    - NEW:    create a new atomic control
+    - LINK:   add parent link to existing control (similarity > LINK_THRESHOLD)
+    - REVIEW: queue for human review (REVIEW_THRESHOLD < sim < LINK_THRESHOLD)
+"""
+
+import logging
+import os
+import re
+from dataclasses import dataclass, field
+from typing import Optional, Callable, Awaitable
+
+import httpx
+
+logger = logging.getLogger(__name__)
+
+# ── Configuration ────────────────────────────────────────────────────
+
+DEDUP_ENABLED = os.getenv("DEDUP_ENABLED", "true").lower() == "true"
+LINK_THRESHOLD = float(os.getenv("DEDUP_LINK_THRESHOLD", "0.92"))
+REVIEW_THRESHOLD = float(os.getenv("DEDUP_REVIEW_THRESHOLD", "0.85"))
+LINK_THRESHOLD_DIFF_OBJECT = float(os.getenv("DEDUP_LINK_THRESHOLD_DIFF_OBJ", "0.95"))
+CROSS_REG_LINK_THRESHOLD = float(os.getenv("DEDUP_CROSS_REG_THRESHOLD", "0.95"))
+QDRANT_COLLECTION = os.getenv("DEDUP_QDRANT_COLLECTION", "atomic_controls")
+QDRANT_URL = os.getenv("QDRANT_URL", "http://host.docker.internal:6333")
+EMBEDDING_URL = os.getenv("EMBEDDING_URL", "http://embedding-service:8087")
+
+
+# ── Result Dataclass ─────────────────────────────────────────────────
+
+@dataclass
+class DedupResult:
+    """Outcome of the dedup check."""
+    verdict: str  # "new" | "link" | "review"
+    matched_control_uuid: Optional[str] = None
+    matched_control_id: Optional[str] = None
+    matched_title: Optional[str] = None
+    stage: str = ""  # which stage decided
+    similarity_score: float = 0.0
+    link_type: str = "dedup_merge"  # "dedup_merge" | "cross_regulation"
+    details: dict = field(default_factory=dict)
+
+
+# ── Action Normalization ─────────────────────────────────────────────
+
+_ACTION_SYNONYMS: dict[str, str] = {
+    # German → canonical English
+    "implementieren": "implement",
+    "umsetzen": "implement",
+    "einrichten": "implement",
+    "einführen": "implement",
+    "aufbauen": "implement",
+    "bereitstellen": "implement",
+    "aktivieren": "implement",
+    "konfigurieren": "configure",
+    "einstellen": "configure",
+    "parametrieren": "configure",
+    "testen": "test",
+    "prüfen": "test",
+    "überprüfen": "test",
+    "verifizieren": "test",
+    "validieren": "test",
+    "kontrollieren": "test",
+    "auditieren": "audit",
+    "dokumentieren": "document",
+    "protokollieren": "log",
+    "aufzeichnen": "log",
+    "loggen": "log",
+    "überwachen": "monitor",
+    "monitoring": "monitor",
+    "beobachten": "monitor",
+    "schulen": "train",
+    "trainieren": "train",
+    "sensibilisieren": "train",
+    "löschen": "delete",
+    "entfernen": "delete",
+    "verschlüsseln": "encrypt",
+    "sperren": "block",
+    "beschränken": "restrict",
+    "einschränken": "restrict",
+    "begrenzen": "restrict",
+    "autorisieren": "authorize",
+    "genehmigen": "authorize",
+    "freigeben": "authorize",
+    "authentifizieren": "authenticate",
+    "identifizieren": "identify",
+    "melden": "report",
+    "benachrichtigen": "notify",
+    "informieren": "notify",
+    "aktualisieren": "update",
+    "erneuern": "update",
+    "sichern": "backup",
+    "wiederherstellen": "restore",
+    # English passthrough
+    "implement": "implement",
+    "configure": "configure",
+    "test": "test",
+    "verify": "test",
+    "validate": "test",
+    "audit": "audit",
+    "document": "document",
+    "log": "log",
+    "monitor": "monitor",
+    "train": "train",
+    "delete": "delete",
+    "encrypt": "encrypt",
+    "restrict": "restrict",
+    "authorize": "authorize",
+    "authenticate": "authenticate",
+    "report": "report",
+    "update": "update",
+    "backup": "backup",
+    "restore": "restore",
+}
+
+
+def normalize_action(action: str) -> str:
+    """Normalize an action verb to a canonical English form."""
+    if not action:
+        return ""
+    action = action.strip().lower()
+    # Strip German infinitive/conjugation suffixes for lookup
+    action_base = re.sub(r"(en|t|st|e|te|tet|end)$", "", action)
+    # Try exact match first, then base form
+    if action in _ACTION_SYNONYMS:
+        return _ACTION_SYNONYMS[action]
+    if action_base in _ACTION_SYNONYMS:
+        return _ACTION_SYNONYMS[action_base]
+    # Fuzzy: check if action starts with any known verb
+    for verb, canonical in _ACTION_SYNONYMS.items():
+        if action.startswith(verb) or verb.startswith(action):
+            return canonical
+    return action  # fallback: return as-is
+
+
+# ── Object Normalization ─────────────────────────────────────────────
+
+_OBJECT_SYNONYMS: dict[str, str] = {
+    # Authentication / Access
+    "mfa": "multi_factor_auth",
+    "multi-faktor-authentifizierung": "multi_factor_auth",
+    "mehrfaktorauthentifizierung": "multi_factor_auth",
+    "multi-factor authentication": "multi_factor_auth",
+    "two-factor": "multi_factor_auth",
+    "2fa": "multi_factor_auth",
+    "passwort": "password_policy",
+    "kennwort": "password_policy",
+    "password": "password_policy",
+    "zugangsdaten": "credentials",
+    "credentials": "credentials",
+    "admin-konten": "privileged_access",
+    "admin accounts": "privileged_access",
+    "administratorkonten": "privileged_access",
+    "privilegierte zugriffe": "privileged_access",
+    "privileged accounts": "privileged_access",
+    "remote-zugriff": "remote_access",
+    "fernzugriff": "remote_access",
+    "remote access": "remote_access",
+    "session": "session_management",
+    "sitzung": "session_management",
+    "sitzungsverwaltung": "session_management",
+    # Encryption
+    "verschlüsselung": "encryption",
+    "encryption": "encryption",
+    "kryptografie": "encryption",
+    "kryptografische verfahren": "encryption",
+    "schlüssel": "key_management",
+    "key management": "key_management",
+    "schlüsselverwaltung": "key_management",
+    "zertifikat": "certificate_management",
+    "certificate": "certificate_management",
+    "tls": "transport_encryption",
+    "ssl": "transport_encryption",
+    "https": "transport_encryption",
+    # Network
+    "firewall": "firewall",
+    "netzwerk": "network_security",
+    "network": "network_security",
+    "vpn": "vpn",
+    "segmentierung": "network_segmentation",
+    "segmentation": "network_segmentation",
+    # Logging / Monitoring
+    "audit-log": "audit_logging",
+    "audit log": "audit_logging",
+    "protokoll": "audit_logging",
+    "logging": "audit_logging",
+    "monitoring": "monitoring",
+    "überwachung": "monitoring",
+    "alerting": "alerting",
+    "alarmierung": "alerting",
+    "siem": "siem",
+    # Data
+    "personenbezogene daten": "personal_data",
+    "personal data": "personal_data",
+    "sensible daten": "sensitive_data",
+    "sensitive data": "sensitive_data",
+    "datensicherung": "backup",
+    "backup": "backup",
+    "wiederherstellung": "disaster_recovery",
+    "disaster recovery": "disaster_recovery",
+    # Policy / Process
+    "richtlinie": "policy",
+    "policy": "policy",
+    "verfahrensanweisung": "procedure",
+    "procedure": "procedure",
+    "prozess": "process",
+    "schulung": "training",
+    "training": "training",
+    "awareness": "awareness",
+    "sensibilisierung": "awareness",
+    # Incident
+    "vorfall": "incident",
+    "incident": "incident",
+    "sicherheitsvorfall": "security_incident",
+    "security incident": "security_incident",
+    # Vulnerability
+    "schwachstelle": "vulnerability",
+    "vulnerability": "vulnerability",
+    "patch": "patch_management",
+    "update": "patch_management",
+    "patching": "patch_management",
+}
+
+# Precompile for substring matching (longest first)
+_OBJECT_KEYS_SORTED = sorted(_OBJECT_SYNONYMS.keys(), key=len, reverse=True)
+
+
+def normalize_object(obj: str) -> str:
+    """Normalize a compliance object to a canonical token."""
+    if not obj:
+        return ""
+    obj_lower = obj.strip().lower()
+    # Exact match
+    if obj_lower in _OBJECT_SYNONYMS:
+        return _OBJECT_SYNONYMS[obj_lower]
+    # Substring match (longest first)
+    for phrase in _OBJECT_KEYS_SORTED:
+        if phrase in obj_lower:
+            return _OBJECT_SYNONYMS[phrase]
+    # Fallback: strip articles/prepositions, join with underscore
+    cleaned = re.sub(r"\b(der|die|das|den|dem|des|ein|eine|eines|einem|einen"
+                     r"|für|von|zu|auf|in|an|bei|mit|nach|über|unter|the|a|an"
+                     r"|for|of|to|on|in|at|by|with)\b", "", obj_lower)
+    tokens = [t for t in cleaned.split() if len(t) > 2]
+    return "_".join(tokens[:4]) if tokens else obj_lower.replace(" ", "_")
+
+
+# ── Canonicalization ─────────────────────────────────────────────────
+
+def canonicalize_text(action: str, obj: str, title: str = "") -> str:
+    """Build a canonical English text for embedding.
+
+    Transforms German compliance text into normalized English tokens
+    for more stable embedding comparisons.
+    """
+    norm_action = normalize_action(action)
+    norm_object = normalize_object(obj)
+    # Build canonical sentence
+    parts = [norm_action, norm_object]
+    if title:
+        # Add title keywords (stripped of common filler)
+        title_clean = re.sub(
+            r"\b(und|oder|für|von|zu|der|die|das|den|dem|des|ein|eine"
+            r"|bei|mit|nach|gemäß|gem\.|laut|entsprechend)\b",
+            "", title.lower()
+        )
+        title_tokens = [t for t in title_clean.split() if len(t) > 3][:5]
+        if title_tokens:
+            parts.append("for")
+            parts.extend(title_tokens)
+    return " ".join(parts)
+
+
+# ── Embedding Helper ─────────────────────────────────────────────────
+
+async def get_embedding(text: str) -> list[float]:
+    """Get embedding vector for a single text via embedding service."""
+    try:
+        async with httpx.AsyncClient(timeout=10.0) as client:
+            resp = await client.post(
+                f"{EMBEDDING_URL}/embed",
+                json={"texts": [text]},
+            )
+            embeddings = resp.json().get("embeddings", [])
+            return embeddings[0] if embeddings else []
+    except Exception as e:
+        logger.warning("Embedding failed: %s", e)
+        return []
+
+
+def cosine_similarity(a: list[float], b: list[float]) -> float:
+    """Compute cosine similarity between two vectors."""
+    if not a or not b or len(a) != len(b):
+        return 0.0
+    dot = sum(x * y for x, y in zip(a, b))
+    norm_a = sum(x * x for x in a) ** 0.5
+    norm_b = sum(x * x for x in b) ** 0.5
+    if norm_a == 0 or norm_b == 0:
+        return 0.0
+    return dot / (norm_a * norm_b)
+
+
+# ── Qdrant Helpers ───────────────────────────────────────────────────
+
+async def qdrant_search(
+    embedding: list[float],
+    pattern_id: str,
+    top_k: int = 10,
+    collection: Optional[str] = None,
+) -> list[dict]:
+    """Search Qdrant for similar atomic controls, filtered by pattern_id."""
+    if not embedding:
+        return []
+    coll = collection or QDRANT_COLLECTION
+    body: dict = {
+        "vector": embedding,
+        "limit": top_k,
+        "with_payload": True,
+        "filter": {
+            "must": [
+                {"key": "pattern_id", "match": {"value": pattern_id}}
+            ]
+        },
+    }
+    try:
+        async with httpx.AsyncClient(timeout=10.0) as client:
+            resp = await client.post(
+                f"{QDRANT_URL}/collections/{coll}/points/search",
+                json=body,
+            )
+            if resp.status_code != 200:
+                logger.warning("Qdrant search failed: %d", resp.status_code)
+                return []
+            return resp.json().get("result", [])
+    except Exception as e:
+        logger.warning("Qdrant search error: %s", e)
+        return []
+
+
+async def qdrant_search_cross_regulation(
+    embedding: list[float],
+    top_k: int = 5,
+    collection: Optional[str] = None,
+) -> list[dict]:
+    """Search Qdrant for similar controls across ALL regulations (no pattern_id filter).
+
+    Used for cross-regulation linking (e.g. DSGVO Art. 25 ↔ NIS2 Art. 21).
+    """
+    if not embedding:
+        return []
+    coll = collection or QDRANT_COLLECTION
+    body: dict = {
+        "vector": embedding,
+        "limit": top_k,
+        "with_payload": True,
+    }
+    try:
+        async with httpx.AsyncClient(timeout=10.0) as client:
+            resp = await client.post(
+                f"{QDRANT_URL}/collections/{coll}/points/search",
+                json=body,
+            )
+            if resp.status_code != 200:
+                logger.warning("Qdrant cross-reg search failed: %d", resp.status_code)
+                return []
+            return resp.json().get("result", [])
+    except Exception as e:
+        logger.warning("Qdrant cross-reg search error: %s", e)
+        return []
+
+
+async def qdrant_upsert(
+    point_id: str,
+    embedding: list[float],
+    payload: dict,
+    collection: Optional[str] = None,
+) -> bool:
+    """Upsert a single point into a Qdrant collection."""
+    if not embedding:
+        return False
+    coll = collection or QDRANT_COLLECTION
+    body = {
+        "points": [{
+            "id": point_id,
+            "vector": embedding,
+            "payload": payload,
+        }]
+    }
+    try:
+        async with httpx.AsyncClient(timeout=10.0) as client:
+            resp = await client.put(
+                f"{QDRANT_URL}/collections/{coll}/points",
+                json=body,
+            )
+            return resp.status_code == 200
+    except Exception as e:
+        logger.warning("Qdrant upsert error: %s", e)
+        return False
+
+
+async def ensure_qdrant_collection(
+    vector_size: int = 1024,
+    collection: Optional[str] = None,
+) -> bool:
+    """Create a Qdrant collection if it doesn't exist (idempotent)."""
+    coll = collection or QDRANT_COLLECTION
+    try:
+        async with httpx.AsyncClient(timeout=10.0) as client:
+            # Check if exists
+            resp = await client.get(f"{QDRANT_URL}/collections/{coll}")
+            if resp.status_code == 200:
+                return True
+            # Create
+            resp = await client.put(
+                f"{QDRANT_URL}/collections/{coll}",
+                json={
+                    "vectors": {"size": vector_size, "distance": "Cosine"},
+                },
+            )
+            if resp.status_code == 200:
+                logger.info("Created Qdrant collection: %s", coll)
+                # Create payload indexes
+                for field_name in ["pattern_id", "action_normalized", "object_normalized", "control_id"]:
+                    await client.put(
+                        f"{QDRANT_URL}/collections/{coll}/index",
+                        json={"field_name": field_name, "field_schema": "keyword"},
+                    )
+                return True
+            logger.error("Failed to create Qdrant collection: %d", resp.status_code)
+            return False
+    except Exception as e:
+        logger.warning("Qdrant collection check error: %s", e)
+        return False
+
+
+# ── Main Dedup Checker ───────────────────────────────────────────────
+
+class ControlDedupChecker:
+    """4-stage dedup checker for atomic controls.
+
+    Usage:
+        checker = ControlDedupChecker(db_session)
+        result = await checker.check_duplicate(candidate_action, candidate_object, candidate_title, pattern_id)
+        if result.verdict == "link":
+            checker.add_parent_link(result.matched_control_uuid, parent_uuid)
+        elif result.verdict == "review":
+            checker.write_review(candidate, result)
+        else:
+            # Insert new control
+    """
+
+    def __init__(
+        self,
+        db,
+        embed_fn: Optional[Callable[[str], Awaitable[list[float]]]] = None,
+        search_fn: Optional[Callable] = None,
+    ):
+        self.db = db
+        self._embed = embed_fn or get_embedding
+        self._search = search_fn or qdrant_search
+        self._cache: dict[str, list[dict]] = {}  # pattern_id → existing controls
+
+    def _load_existing(self, pattern_id: str) -> list[dict]:
+        """Load existing atomic controls with same pattern_id from DB."""
+        if pattern_id in self._cache:
+            return self._cache[pattern_id]
+        from sqlalchemy import text
+        rows = self.db.execute(text("""
+            SELECT id::text, control_id, title, objective,
+                   pattern_id,
+                   generation_metadata->>'obligation_type' as obligation_type
+            FROM canonical_controls
+            WHERE parent_control_uuid IS NOT NULL
+              AND release_state != 'deprecated'
+              AND pattern_id = :pid
+        """), {"pid": pattern_id}).fetchall()
+        result = [
+            {
+                "uuid": r[0], "control_id": r[1], "title": r[2],
+                "objective": r[3], "pattern_id": r[4],
+                "obligation_type": r[5],
+            }
+            for r in rows
+        ]
+        self._cache[pattern_id] = result
+        return result
+
+    async def check_duplicate(
+        self,
+        action: str,
+        obj: str,
+        title: str,
+        pattern_id: Optional[str],
+    ) -> DedupResult:
+        """Run the 4-stage dedup pipeline + cross-regulation linking.
+
+        Returns DedupResult with verdict: new/link/review.
+        """
+        # No pattern_id → can't dedup meaningfully
+        if not pattern_id:
+            return DedupResult(verdict="new", stage="no_pattern")
+
+        # Stage 1: Pattern-Gate
+        existing = self._load_existing(pattern_id)
+        if not existing:
+            return DedupResult(
+                verdict="new", stage="pattern_gate",
+                details={"reason": "no existing controls with this pattern_id"},
+            )
+
+        # Stage 2: Action-Check
+        norm_action = normalize_action(action)
+        # We don't have action stored on existing controls from DB directly,
+        # so we use embedding for controls that passed pattern gate.
+        # But we CAN check via generation_metadata if available.
+
+        # Stage 3: Object-Normalization
+        norm_object = normalize_object(obj)
+
+        # Stage 4: Embedding Similarity
+        canonical = canonicalize_text(action, obj, title)
+        embedding = await self._embed(canonical)
+        if not embedding:
+            # Can't compute embedding → default to new
+            return DedupResult(
+                verdict="new", stage="embedding_unavailable",
+                details={"canonical_text": canonical},
+            )
+
+        # Search Qdrant
+        results = await self._search(embedding, pattern_id, top_k=5)
+
+        if not results:
+            # No intra-pattern matches → try cross-regulation
+            return await self._check_cross_regulation(embedding, DedupResult(
+                verdict="new", stage="no_qdrant_matches",
+                details={"canonical_text": canonical, "action": norm_action, "object": norm_object},
+            ))
+
+        # Evaluate best match
+        best = results[0]
+        best_score = best.get("score", 0.0)
+        best_payload = best.get("payload", {})
+        best_action = best_payload.get("action_normalized", "")
+        best_object = best_payload.get("object_normalized", "")
+
+        # Action differs → NEW (even if embedding is high)
+        if best_action and norm_action and best_action != norm_action:
+            return await self._check_cross_regulation(embedding, DedupResult(
+                verdict="new", stage="action_mismatch",
+                similarity_score=best_score,
+                matched_control_id=best_payload.get("control_id"),
+                details={
+                    "candidate_action": norm_action,
+                    "existing_action": best_action,
+                    "similarity": best_score,
+                },
+            ))
+
+        # Object differs → use higher threshold
+        if best_object and norm_object and best_object != norm_object:
+            if best_score > LINK_THRESHOLD_DIFF_OBJECT:
+                return DedupResult(
+                    verdict="link", stage="embedding_diff_object",
+                    matched_control_uuid=best_payload.get("control_uuid"),
+                    matched_control_id=best_payload.get("control_id"),
+                    matched_title=best_payload.get("title"),
+                    similarity_score=best_score,
+                    details={"candidate_object": norm_object, "existing_object": best_object},
+                )
+            return await self._check_cross_regulation(embedding, DedupResult(
+                verdict="new", stage="object_mismatch_below_threshold",
+                similarity_score=best_score,
+                matched_control_id=best_payload.get("control_id"),
+                details={
+                    "candidate_object": norm_object,
+                    "existing_object": best_object,
+                    "threshold": LINK_THRESHOLD_DIFF_OBJECT,
+                },
+            ))
+
+        # Same action + same object → tiered thresholds
+        if best_score > LINK_THRESHOLD:
+            return DedupResult(
+                verdict="link", stage="embedding_match",
+                matched_control_uuid=best_payload.get("control_uuid"),
+                matched_control_id=best_payload.get("control_id"),
+                matched_title=best_payload.get("title"),
+                similarity_score=best_score,
+            )
+        if best_score > REVIEW_THRESHOLD:
+            return DedupResult(
+                verdict="review", stage="embedding_review",
+                matched_control_uuid=best_payload.get("control_uuid"),
+                matched_control_id=best_payload.get("control_id"),
+                matched_title=best_payload.get("title"),
+                similarity_score=best_score,
+            )
+        return await self._check_cross_regulation(embedding, DedupResult(
+            verdict="new", stage="embedding_below_threshold",
+            similarity_score=best_score,
+            details={"threshold": REVIEW_THRESHOLD},
+        ))
+
+    async def _check_cross_regulation(
+        self,
+        embedding: list[float],
+        intra_result: DedupResult,
+    ) -> DedupResult:
+        """Second pass: cross-regulation linking for controls deemed 'new'.
+
+        Searches Qdrant WITHOUT pattern_id filter. Uses a higher threshold
+        (0.95) to avoid false positives across regulation boundaries.
+        """
+        if intra_result.verdict != "new" or not embedding:
+            return intra_result
+
+        cross_results = await qdrant_search_cross_regulation(embedding, top_k=5)
+        if not cross_results:
+            return intra_result
+
+        best = cross_results[0]
+        best_score = best.get("score", 0.0)
+        if best_score > CROSS_REG_LINK_THRESHOLD:
+            best_payload = best.get("payload", {})
+            return DedupResult(
+                verdict="link",
+                stage="cross_regulation",
+                matched_control_uuid=best_payload.get("control_uuid"),
+                matched_control_id=best_payload.get("control_id"),
+                matched_title=best_payload.get("title"),
+                similarity_score=best_score,
+                link_type="cross_regulation",
+                details={
+                    "cross_reg_score": best_score,
+                    "cross_reg_threshold": CROSS_REG_LINK_THRESHOLD,
+                },
+            )
+
+        return intra_result
+
+    def add_parent_link(
+        self,
+        control_uuid: str,
+        parent_control_uuid: str,
+        link_type: str = "dedup_merge",
+        confidence: float = 0.0,
+        source_regulation: Optional[str] = None,
+        source_article: Optional[str] = None,
+        obligation_candidate_id: Optional[str] = None,
+    ) -> None:
+        """Add a parent link to an existing atomic control."""
+        from sqlalchemy import text
+        self.db.execute(text("""
+            INSERT INTO control_parent_links
+                (control_uuid, parent_control_uuid, link_type, confidence,
+                 source_regulation, source_article, obligation_candidate_id)
+            VALUES (:cu, :pu, :lt, :conf, :sr, :sa, :oci::uuid)
+            ON CONFLICT (control_uuid, parent_control_uuid) DO NOTHING
+        """), {
+            "cu": control_uuid,
+            "pu": parent_control_uuid,
+            "lt": link_type,
+            "conf": confidence,
+            "sr": source_regulation,
+            "sa": source_article,
+            "oci": obligation_candidate_id,
+        })
+        self.db.commit()
+
+    def write_review(
+        self,
+        candidate_control_id: str,
+        candidate_title: str,
+        candidate_objective: str,
+        result: DedupResult,
+        parent_control_uuid: Optional[str] = None,
+        obligation_candidate_id: Optional[str] = None,
+    ) -> None:
+        """Write a dedup review queue entry."""
+        from sqlalchemy import text
+        self.db.execute(text("""
+            INSERT INTO control_dedup_reviews
+                (candidate_control_id, candidate_title, candidate_objective,
+                 matched_control_uuid, matched_control_id,
+                 similarity_score, dedup_stage, dedup_details,
+                 parent_control_uuid, obligation_candidate_id)
+            VALUES (:ccid, :ct, :co, :mcu::uuid, :mci, :ss, :ds,
+                    :dd::jsonb, :pcu::uuid, :oci)
+        """), {
+            "ccid": candidate_control_id,
+            "ct": candidate_title,
+            "co": candidate_objective,
+            "mcu": result.matched_control_uuid,
+            "mci": result.matched_control_id,
+            "ss": result.similarity_score,
+            "ds": result.stage,
+            "dd": __import__("json").dumps(result.details),
+            "pcu": parent_control_uuid,
+            "oci": obligation_candidate_id,
+        })
+        self.db.commit()
+
+    async def index_control(
+        self,
+        control_uuid: str,
+        control_id: str,
+        title: str,
+        action: str,
+        obj: str,
+        pattern_id: str,
+        collection: Optional[str] = None,
+    ) -> bool:
+        """Index a new atomic control in Qdrant for future dedup checks."""
+        norm_action = normalize_action(action)
+        norm_object = normalize_object(obj)
+        canonical = canonicalize_text(action, obj, title)
+        embedding = await self._embed(canonical)
+        if not embedding:
+            return False
+        return await qdrant_upsert(
+            point_id=control_uuid,
+            embedding=embedding,
+            payload={
+                "control_uuid": control_uuid,
+                "control_id": control_id,
+                "title": title,
+                "pattern_id": pattern_id,
+                "action_normalized": norm_action,
+                "object_normalized": norm_object,
+                "canonical_text": canonical,
+            },
+            collection=collection,
+        )
@@ -0,0 +1,152 @@
+"""
+Control Status Transition State Machine.
+
+Enforces that controls cannot be set to "pass" without sufficient evidence.
+Prevents Compliance-Theater where controls claim compliance without real proof.
+
+Transition rules:
+  planned     → in_progress : always allowed
+  in_progress → pass        : requires ≥1 evidence with confidence ≥ E2 and
+                              truth_status in (uploaded, observed, validated_internal)
+  in_progress → partial     : requires ≥1 evidence (any level)
+  pass        → fail        : always allowed (degradation)
+  any         → n/a         : requires status_justification
+  any         → planned     : always allowed (reset)
+"""
+
+from typing import List, Optional, Tuple
+
+from ..db.models import EvidenceDB
+
+
+# Confidence level ordering for comparisons
+CONFIDENCE_ORDER = {"E0": 0, "E1": 1, "E2": 2, "E3": 3, "E4": 4}
+
+# Truth statuses that qualify as "real" evidence for pass transitions
+VALID_TRUTH_STATUSES = {"uploaded", "observed", "validated_internal", "accepted_by_auditor", "provided_to_auditor"}
+
+
+def validate_transition(
+    current_status: str,
+    new_status: str,
+    evidence_list: Optional[List[EvidenceDB]] = None,
+    status_justification: Optional[str] = None,
+    bypass_for_auto_updater: bool = False,
+) -> Tuple[bool, List[str]]:
+    """
+    Validate whether a control status transition is allowed.
+
+    Args:
+        current_status: Current control status value (e.g. "planned", "pass")
+        new_status: Requested new status
+        evidence_list: List of EvidenceDB objects linked to this control
+        status_justification: Text justification (required for n/a transitions)
+        bypass_for_auto_updater: If True, skip evidence checks (used by CI/CD auto-updater
+                                 which creates evidence atomically with status change)
+
+    Returns:
+        Tuple of (allowed: bool, violations: list[str])
+    """
+    violations: List[str] = []
+    evidence_list = evidence_list or []
+
+    # Same status → no-op, always allowed
+    if current_status == new_status:
+        return True, []
+
+    # Reset to planned is always allowed
+    if new_status == "planned":
+        return True, []
+
+    # n/a requires justification
+    if new_status == "n/a":
+        if not status_justification or not status_justification.strip():
+            violations.append("Transition to 'n/a' requires a status_justification explaining why this control is not applicable.")
+        return len(violations) == 0, violations
+
+    # Degradation: pass → fail is always allowed
+    if current_status == "pass" and new_status == "fail":
+        return True, []
+
+    # planned → in_progress: always allowed
+    if current_status == "planned" and new_status == "in_progress":
+        return True, []
+
+    # in_progress → partial: needs at least 1 evidence
+    if new_status == "partial":
+        if not bypass_for_auto_updater and len(evidence_list) == 0:
+            violations.append("Transition to 'partial' requires at least 1 evidence record.")
+        return len(violations) == 0, violations
+
+    # in_progress → pass: strict requirements
+    if new_status == "pass":
+        if bypass_for_auto_updater:
+            return True, []
+
+        if len(evidence_list) == 0:
+            violations.append("Transition to 'pass' requires at least 1 evidence record.")
+            return False, violations
+
+        # Check for at least one qualifying evidence
+        has_qualifying = False
+        for e in evidence_list:
+            conf = getattr(e, "confidence_level", None)
+            truth = getattr(e, "truth_status", None)
+
+            # Get string values from enum or string
+            conf_val = conf.value if hasattr(conf, "value") else str(conf) if conf else "E1"
+            truth_val = truth.value if hasattr(truth, "value") else str(truth) if truth else "uploaded"
+
+            if CONFIDENCE_ORDER.get(conf_val, 1) >= CONFIDENCE_ORDER["E2"] and truth_val in VALID_TRUTH_STATUSES:
+                has_qualifying = True
+                break
+
+        if not has_qualifying:
+            violations.append(
+                "Transition to 'pass' requires at least 1 evidence with confidence >= E2 "
+                "and truth_status in (uploaded, observed, validated_internal, accepted_by_auditor). "
+                "Current evidence does not meet this threshold."
+            )
+
+        return len(violations) == 0, violations
+
+    # in_progress → fail: always allowed
+    if new_status == "fail":
+        return True, []
+
+    # Any other transition from planned/fail to pass requires going through in_progress
+    if current_status in ("planned", "fail") and new_status == "pass":
+        if bypass_for_auto_updater:
+            return True, []
+        violations.append(
+            f"Direct transition from '{current_status}' to 'pass' is not allowed. "
+            f"Move to 'in_progress' first, then to 'pass' with qualifying evidence."
+        )
+        return False, violations
+
+    # Default: allow other transitions (e.g. fail → partial, partial → pass)
+    # For partial → pass, apply the same evidence checks
+    if current_status == "partial" and new_status == "pass":
+        if bypass_for_auto_updater:
+            return True, []
+
+        has_qualifying = False
+        for e in evidence_list:
+            conf = getattr(e, "confidence_level", None)
+            truth = getattr(e, "truth_status", None)
+            conf_val = conf.value if hasattr(conf, "value") else str(conf) if conf else "E1"
+            truth_val = truth.value if hasattr(truth, "value") else str(truth) if truth else "uploaded"
+
+            if CONFIDENCE_ORDER.get(conf_val, 1) >= CONFIDENCE_ORDER["E2"] and truth_val in VALID_TRUTH_STATUSES:
+                has_qualifying = True
+                break
+
+        if not has_qualifying:
+            violations.append(
+                "Transition from 'partial' to 'pass' requires at least 1 evidence with confidence >= E2 "
+                "and truth_status in (uploaded, observed, validated_internal, accepted_by_auditor)."
+            )
+        return len(violations) == 0, violations
+
+    # All other transitions allowed
+    return True, []
@@ -0,0 +1,714 @@
+"""Framework Decomposition Engine — decomposes framework-container obligations.
+
+Sits between Pass 0a (obligation extraction) and Pass 0b (atomic control
+composition).  Detects obligations that reference a framework domain (e.g.
+"CCM-Praktiken fuer AIS") and decomposes them into concrete sub-obligations
+using an internal framework registry.
+
+Three routing types:
+    atomic              → pass through to Pass 0b unchanged
+    compound            → split compound verbs, then Pass 0b
+    framework_container → decompose via registry, then Pass 0b
+
+The registry is a set of JSON files under compliance/data/frameworks/.
+"""
+
+import json
+import logging
+import os
+import re
+import uuid
+from dataclasses import dataclass, field
+from pathlib import Path
+from typing import Optional
+
+logger = logging.getLogger(__name__)
+
+# ---------------------------------------------------------------------------
+# Registry loading
+# ---------------------------------------------------------------------------
+
+_REGISTRY_DIR = Path(__file__).resolve().parent.parent / "data" / "frameworks"
+_REGISTRY: dict[str, dict] = {}  # framework_id → framework dict
+
+
+def _load_registry() -> dict[str, dict]:
+    """Load all framework JSON files from the registry directory."""
+    registry: dict[str, dict] = {}
+    if not _REGISTRY_DIR.is_dir():
+        logger.warning("Framework registry dir not found: %s", _REGISTRY_DIR)
+        return registry
+
+    for fpath in sorted(_REGISTRY_DIR.glob("*.json")):
+        try:
+            with open(fpath, encoding="utf-8") as f:
+                fw = json.load(f)
+            fw_id = fw.get("framework_id", fpath.stem)
+            registry[fw_id] = fw
+            logger.info(
+                "Loaded framework: %s (%d domains)",
+                fw_id,
+                len(fw.get("domains", [])),
+            )
+        except Exception:
+            logger.exception("Failed to load framework file: %s", fpath)
+    return registry
+
+
+def get_registry() -> dict[str, dict]:
+    """Return the global framework registry (lazy-loaded)."""
+    global _REGISTRY
+    if not _REGISTRY:
+        _REGISTRY = _load_registry()
+    return _REGISTRY
+
+
+def reload_registry() -> dict[str, dict]:
+    """Force-reload the framework registry from disk."""
+    global _REGISTRY
+    _REGISTRY = _load_registry()
+    return _REGISTRY
+
+
+# ---------------------------------------------------------------------------
+# Framework alias index (built from registry)
+# ---------------------------------------------------------------------------
+
+def _build_alias_index(registry: dict[str, dict]) -> dict[str, str]:
+    """Build a lowercase alias → framework_id lookup."""
+    idx: dict[str, str] = {}
+    for fw_id, fw in registry.items():
+        # Framework-level aliases
+        idx[fw_id.lower()] = fw_id
+        name = fw.get("display_name", "")
+        if name:
+            idx[name.lower()] = fw_id
+        # Common short forms
+        for part in fw_id.lower().replace("_", " ").split():
+            if len(part) >= 3:
+                idx[part] = fw_id
+    return idx
+
+
+# ---------------------------------------------------------------------------
+# Routing — classify obligation type
+# ---------------------------------------------------------------------------
+
+# Extended patterns for framework detection (beyond the simple _COMPOSITE_RE
+# in decomposition_pass.py — here we also capture the framework name)
+_FRAMEWORK_PATTERN = re.compile(
+    r"(?:praktiken|kontrollen|ma(?:ss|ß)nahmen|anforderungen|vorgaben|controls|practices|measures|requirements)"
+    r"\s+(?:f(?:ue|ü)r|aus|gem(?:ae|ä)(?:ss|ß)|nach|from|of|for|per)\s+"
+    r"(.+?)(?:\s+(?:m(?:ue|ü)ssen|sollen|sind|werden|implementieren|umsetzen|einf(?:ue|ü)hren)|\.|,|$)",
+    re.IGNORECASE,
+)
+
+# Direct framework name references
+_DIRECT_FRAMEWORK_RE = re.compile(
+    r"\b(?:CSA\s*CCM|NIST\s*(?:SP\s*)?800-53|OWASP\s*(?:ASVS|SAMM|Top\s*10)"
+    r"|CIS\s*Controls|BSI\s*(?:IT-)?Grundschutz|ENISA|ISO\s*2700[12]"
+    r"|COBIT|SOX|PCI\s*DSS|HITRUST|SOC\s*2|KRITIS)\b",
+    re.IGNORECASE,
+)
+
+# Compound verb patterns (multiple main verbs)
+_COMPOUND_VERB_RE = re.compile(
+    r"\b(?:und|sowie|als\s+auch|or|and)\b",
+    re.IGNORECASE,
+)
+
+# No-split phrases that look compound but aren't
+_NO_SPLIT_PHRASES = [
+    "pflegen und aufrechterhalten",
+    "dokumentieren und pflegen",
+    "definieren und dokumentieren",
+    "erstellen und freigeben",
+    "pruefen und genehmigen",
+    "identifizieren und bewerten",
+    "erkennen und melden",
+    "define and maintain",
+    "create and maintain",
+    "establish and maintain",
+    "monitor and review",
+    "detect and respond",
+]
+
+
+@dataclass
+class RoutingResult:
+    """Result of obligation routing classification."""
+    routing_type: str  # atomic | compound | framework_container | unknown_review
+    framework_ref: Optional[str] = None
+    framework_domain: Optional[str] = None
+    domain_title: Optional[str] = None
+    confidence: float = 0.0
+    reason: str = ""
+
+
+def classify_routing(
+    obligation_text: str,
+    action_raw: str,
+    object_raw: str,
+    condition_raw: Optional[str] = None,
+) -> RoutingResult:
+    """Classify an obligation into atomic / compound / framework_container."""
+    combined = f"{obligation_text} {object_raw}".lower()
+
+    # --- Step 1: Framework container detection ---
+    fw_result = _detect_framework(obligation_text, object_raw)
+    if fw_result.routing_type == "framework_container":
+        return fw_result
+
+    # --- Step 2: Compound verb detection ---
+    if _is_compound_obligation(action_raw, obligation_text):
+        return RoutingResult(
+            routing_type="compound",
+            confidence=0.7,
+            reason="multiple_main_verbs",
+        )
+
+    # --- Step 3: Default = atomic ---
+    return RoutingResult(
+        routing_type="atomic",
+        confidence=0.9,
+        reason="single_action_single_object",
+    )
+
+
+def _detect_framework(
+    obligation_text: str, object_raw: str,
+) -> RoutingResult:
+    """Detect if obligation references a framework domain."""
+    combined = f"{obligation_text} {object_raw}"
+    registry = get_registry()
+    alias_idx = _build_alias_index(registry)
+
+    # Strategy 1: direct framework name match
+    m = _DIRECT_FRAMEWORK_RE.search(combined)
+    if m:
+        fw_name = m.group(0).strip()
+        fw_id = _resolve_framework_id(fw_name, alias_idx, registry)
+        if fw_id:
+            domain_id, domain_title = _match_domain(
+                combined, registry[fw_id],
+            )
+            return RoutingResult(
+                routing_type="framework_container",
+                framework_ref=fw_id,
+                framework_domain=domain_id,
+                domain_title=domain_title,
+                confidence=0.95 if domain_id else 0.75,
+                reason=f"direct_framework_match:{fw_name}",
+            )
+        else:
+            # Framework name recognized but not in registry
+            return RoutingResult(
+                routing_type="framework_container",
+                framework_ref=None,
+                framework_domain=None,
+                confidence=0.6,
+                reason=f"direct_framework_match_no_registry:{fw_name}",
+            )
+
+    # Strategy 2: pattern match ("Praktiken fuer X")
+    m2 = _FRAMEWORK_PATTERN.search(combined)
+    if m2:
+        ref_text = m2.group(1).strip()
+        fw_id, domain_id, domain_title = _resolve_from_ref_text(
+            ref_text, registry, alias_idx,
+        )
+        if fw_id:
+            return RoutingResult(
+                routing_type="framework_container",
+                framework_ref=fw_id,
+                framework_domain=domain_id,
+                domain_title=domain_title,
+                confidence=0.85 if domain_id else 0.65,
+                reason=f"pattern_match:{ref_text}",
+            )
+
+    # Strategy 3: keyword-heavy object
+    if _has_framework_keywords(object_raw):
+        return RoutingResult(
+            routing_type="framework_container",
+            framework_ref=None,
+            framework_domain=None,
+            confidence=0.5,
+            reason="framework_keywords_in_object",
+        )
+
+    return RoutingResult(routing_type="atomic", confidence=0.0)
+
+
+def _resolve_framework_id(
+    name: str,
+    alias_idx: dict[str, str],
+    registry: dict[str, dict],
+) -> Optional[str]:
+    """Resolve a framework name to its registry ID."""
+    normalized = re.sub(r"\s+", " ", name.strip().lower())
+    # Direct alias match
+    if normalized in alias_idx:
+        return alias_idx[normalized]
+    # Try compact form (strip spaces, hyphens, underscores)
+    compact = re.sub(r"[\s_\-]+", "", normalized)
+    for alias, fw_id in alias_idx.items():
+        if re.sub(r"[\s_\-]+", "", alias) == compact:
+            return fw_id
+    # Substring match in display names
+    for fw_id, fw in registry.items():
+        display = fw.get("display_name", "").lower()
+        if normalized in display or display in normalized:
+            return fw_id
+    # Partial match: check if normalized contains any alias (for multi-word refs)
+    for alias, fw_id in alias_idx.items():
+        if len(alias) >= 4 and alias in normalized:
+            return fw_id
+    return None
+
+
+def _match_domain(
+    text: str, framework: dict,
+) -> tuple[Optional[str], Optional[str]]:
+    """Match a domain within a framework from text references."""
+    text_lower = text.lower()
+    best_id: Optional[str] = None
+    best_title: Optional[str] = None
+    best_score = 0
+
+    for domain in framework.get("domains", []):
+        score = 0
+        domain_id = domain["domain_id"]
+        title = domain.get("title", "")
+
+        # Exact domain ID match (e.g. "AIS")
+        if re.search(rf"\b{re.escape(domain_id)}\b", text, re.IGNORECASE):
+            score += 10
+
+        # Full title match
+        if title.lower() in text_lower:
+            score += 8
+
+        # Alias match
+        for alias in domain.get("aliases", []):
+            if alias.lower() in text_lower:
+                score += 6
+                break
+
+        # Keyword overlap
+        kw_hits = sum(
+            1 for kw in domain.get("keywords", [])
+            if kw.lower() in text_lower
+        )
+        score += kw_hits
+
+        if score > best_score:
+            best_score = score
+            best_id = domain_id
+            best_title = title
+
+    if best_score >= 3:
+        return best_id, best_title
+    return None, None
+
+
+def _resolve_from_ref_text(
+    ref_text: str,
+    registry: dict[str, dict],
+    alias_idx: dict[str, str],
+) -> tuple[Optional[str], Optional[str], Optional[str]]:
+    """Resolve framework + domain from a reference text like 'AIS' or 'Application Security'."""
+    ref_lower = ref_text.lower()
+
+    for fw_id, fw in registry.items():
+        for domain in fw.get("domains", []):
+            # Check domain ID
+            if domain["domain_id"].lower() in ref_lower:
+                return fw_id, domain["domain_id"], domain.get("title")
+            # Check title
+            if domain.get("title", "").lower() in ref_lower:
+                return fw_id, domain["domain_id"], domain.get("title")
+            # Check aliases
+            for alias in domain.get("aliases", []):
+                if alias.lower() in ref_lower or ref_lower in alias.lower():
+                    return fw_id, domain["domain_id"], domain.get("title")
+
+    return None, None, None
+
+
+_FRAMEWORK_KW_SET = {
+    "praktiken", "kontrollen", "massnahmen", "maßnahmen",
+    "anforderungen", "vorgaben", "framework", "standard",
+    "baseline", "katalog", "domain", "family", "category",
+    "practices", "controls", "measures", "requirements",
+}
+
+
+def _has_framework_keywords(text: str) -> bool:
+    """Check if text contains framework-indicator keywords."""
+    words = set(re.findall(r"[a-zäöüß]+", text.lower()))
+    return len(words & _FRAMEWORK_KW_SET) >= 2
+
+
+def _is_compound_obligation(action_raw: str, obligation_text: str) -> bool:
+    """Detect if the obligation has multiple competing main verbs."""
+    if not action_raw:
+        return False
+
+    action_lower = action_raw.lower().strip()
+
+    # Check no-split phrases first
+    for phrase in _NO_SPLIT_PHRASES:
+        if phrase in action_lower:
+            return False
+
+    # Must have a conjunction
+    if not _COMPOUND_VERB_RE.search(action_lower):
+        return False
+
+    # Split by conjunctions and check if we get 2+ meaningful verbs
+    parts = re.split(r"\b(?:und|sowie|als\s+auch|or|and)\b", action_lower)
+    meaningful = [p.strip() for p in parts if len(p.strip()) >= 3]
+    return len(meaningful) >= 2
+
+
+# ---------------------------------------------------------------------------
+# Framework Decomposition
+# ---------------------------------------------------------------------------
+
+@dataclass
+class DecomposedObligation:
+    """A concrete obligation derived from a framework container."""
+    obligation_candidate_id: str
+    parent_control_id: str
+    parent_framework_container_id: str
+    source_ref_law: str
+    source_ref_article: str
+    obligation_text: str
+    actor: str
+    action_raw: str
+    object_raw: str
+    condition_raw: Optional[str] = None
+    trigger_raw: Optional[str] = None
+    routing_type: str = "atomic"
+    release_state: str = "decomposed"
+    subcontrol_id: str = ""
+    # Metadata
+    action_hint: str = ""
+    object_hint: str = ""
+    object_class: str = ""
+    keywords: list[str] = field(default_factory=list)
+
+
+@dataclass
+class FrameworkDecompositionResult:
+    """Result of framework decomposition."""
+    framework_container_id: str
+    source_obligation_candidate_id: str
+    framework_ref: Optional[str]
+    framework_domain: Optional[str]
+    domain_title: Optional[str]
+    matched_subcontrols: list[str]
+    decomposition_confidence: float
+    release_state: str  # decomposed | unmatched | error
+    decomposed_obligations: list[DecomposedObligation]
+    issues: list[str]
+
+
+def decompose_framework_container(
+    obligation_candidate_id: str,
+    parent_control_id: str,
+    obligation_text: str,
+    framework_ref: Optional[str],
+    framework_domain: Optional[str],
+    actor: str = "organization",
+) -> FrameworkDecompositionResult:
+    """Decompose a framework-container obligation into concrete sub-obligations.
+
+    Steps:
+    1. Resolve framework from registry
+    2. Resolve domain within framework
+    3. Select relevant subcontrols (keyword filter or full domain)
+    4. Generate decomposed obligations
+    """
+    container_id = f"FWC-{uuid.uuid4().hex[:8]}"
+    registry = get_registry()
+    issues: list[str] = []
+
+    # Step 1: Resolve framework
+    fw = None
+    if framework_ref and framework_ref in registry:
+        fw = registry[framework_ref]
+    else:
+        # Try to find by name in text
+        fw, framework_ref = _find_framework_in_text(obligation_text, registry)
+
+    if not fw:
+        issues.append("ERROR: framework_not_matched")
+        return FrameworkDecompositionResult(
+            framework_container_id=container_id,
+            source_obligation_candidate_id=obligation_candidate_id,
+            framework_ref=framework_ref,
+            framework_domain=framework_domain,
+            domain_title=None,
+            matched_subcontrols=[],
+            decomposition_confidence=0.0,
+            release_state="unmatched",
+            decomposed_obligations=[],
+            issues=issues,
+        )
+
+    # Step 2: Resolve domain
+    domain_data = None
+    domain_title = None
+    if framework_domain:
+        for d in fw.get("domains", []):
+            if d["domain_id"].lower() == framework_domain.lower():
+                domain_data = d
+                domain_title = d.get("title")
+                break
+    if not domain_data:
+        # Try matching from text
+        domain_id, domain_title = _match_domain(obligation_text, fw)
+        if domain_id:
+            for d in fw.get("domains", []):
+                if d["domain_id"] == domain_id:
+                    domain_data = d
+                    framework_domain = domain_id
+                    break
+
+    if not domain_data:
+        issues.append("WARN: domain_not_matched — using all domains")
+        # Fall back to all subcontrols across all domains
+        all_subcontrols = []
+        for d in fw.get("domains", []):
+            for sc in d.get("subcontrols", []):
+                sc["_domain_id"] = d["domain_id"]
+                all_subcontrols.append(sc)
+        subcontrols = _select_subcontrols(obligation_text, all_subcontrols)
+        if not subcontrols:
+            issues.append("ERROR: no_subcontrols_matched")
+            return FrameworkDecompositionResult(
+                framework_container_id=container_id,
+                source_obligation_candidate_id=obligation_candidate_id,
+                framework_ref=framework_ref,
+                framework_domain=framework_domain,
+                domain_title=None,
+                matched_subcontrols=[],
+                decomposition_confidence=0.0,
+                release_state="unmatched",
+                decomposed_obligations=[],
+                issues=issues,
+            )
+    else:
+        # Step 3: Select subcontrols from domain
+        raw_subcontrols = domain_data.get("subcontrols", [])
+        subcontrols = _select_subcontrols(obligation_text, raw_subcontrols)
+        if not subcontrols:
+            # Full domain decomposition
+            subcontrols = raw_subcontrols
+
+    # Quality check: too many subcontrols
+    if len(subcontrols) > 25:
+        issues.append(f"WARN: {len(subcontrols)} subcontrols — may be too broad")
+
+    # Step 4: Generate decomposed obligations
+    display_name = fw.get("display_name", framework_ref or "Unknown")
+    decomposed: list[DecomposedObligation] = []
+    matched_ids: list[str] = []
+
+    for sc in subcontrols:
+        sc_id = sc.get("subcontrol_id", "")
+        matched_ids.append(sc_id)
+
+        action_hint = sc.get("action_hint", "")
+        object_hint = sc.get("object_hint", "")
+
+        # Quality warnings
+        if not action_hint:
+            issues.append(f"WARN: {sc_id} missing action_hint")
+        if not object_hint:
+            issues.append(f"WARN: {sc_id} missing object_hint")
+
+        obl_id = f"{obligation_candidate_id}-{sc_id}"
+
+        decomposed.append(DecomposedObligation(
+            obligation_candidate_id=obl_id,
+            parent_control_id=parent_control_id,
+            parent_framework_container_id=container_id,
+            source_ref_law=display_name,
+            source_ref_article=sc_id,
+            obligation_text=sc.get("statement", ""),
+            actor=actor,
+            action_raw=action_hint or _infer_action(sc.get("statement", "")),
+            object_raw=object_hint or _infer_object(sc.get("statement", "")),
+            routing_type="atomic",
+            release_state="decomposed",
+            subcontrol_id=sc_id,
+            action_hint=action_hint,
+            object_hint=object_hint,
+            object_class=sc.get("object_class", ""),
+            keywords=sc.get("keywords", []),
+        ))
+
+    # Check if decomposed are identical to container
+    for d in decomposed:
+        if d.obligation_text.strip() == obligation_text.strip():
+            issues.append(f"WARN: {d.subcontrol_id} identical to container text")
+
+    confidence = _compute_decomposition_confidence(
+        framework_ref, framework_domain, domain_data, len(subcontrols), issues,
+    )
+
+    return FrameworkDecompositionResult(
+        framework_container_id=container_id,
+        source_obligation_candidate_id=obligation_candidate_id,
+        framework_ref=framework_ref,
+        framework_domain=framework_domain,
+        domain_title=domain_title,
+        matched_subcontrols=matched_ids,
+        decomposition_confidence=confidence,
+        release_state="decomposed",
+        decomposed_obligations=decomposed,
+        issues=issues,
+    )
+
+
+def _find_framework_in_text(
+    text: str, registry: dict[str, dict],
+) -> tuple[Optional[dict], Optional[str]]:
+    """Try to find a framework by searching text for known names."""
+    alias_idx = _build_alias_index(registry)
+    m = _DIRECT_FRAMEWORK_RE.search(text)
+    if m:
+        fw_id = _resolve_framework_id(m.group(0), alias_idx, registry)
+        if fw_id and fw_id in registry:
+            return registry[fw_id], fw_id
+    return None, None
+
+
+def _select_subcontrols(
+    obligation_text: str, subcontrols: list[dict],
+) -> list[dict]:
+    """Select relevant subcontrols based on keyword matching.
+
+    Returns empty list if no targeted match found (caller falls back to
+    full domain).
+    """
+    text_lower = obligation_text.lower()
+    scored: list[tuple[int, dict]] = []
+
+    for sc in subcontrols:
+        score = 0
+        for kw in sc.get("keywords", []):
+            if kw.lower() in text_lower:
+                score += 1
+        # Title match
+        title = sc.get("title", "").lower()
+        if title and title in text_lower:
+            score += 3
+        # Object hint in text
+        obj = sc.get("object_hint", "").lower()
+        if obj and obj in text_lower:
+            score += 2
+
+        if score > 0:
+            scored.append((score, sc))
+
+    if not scored:
+        return []
+
+    # Only return those with meaningful overlap (score >= 2)
+    scored.sort(key=lambda x: x[0], reverse=True)
+    return [sc for score, sc in scored if score >= 2]
+
+
+def _infer_action(statement: str) -> str:
+    """Infer a basic action verb from a statement."""
+    s = statement.lower()
+    if any(w in s for w in ["definiert", "definieren", "define"]):
+        return "definieren"
+    if any(w in s for w in ["implementiert", "implementieren", "implement"]):
+        return "implementieren"
+    if any(w in s for w in ["dokumentiert", "dokumentieren", "document"]):
+        return "dokumentieren"
+    if any(w in s for w in ["ueberwacht", "ueberwachen", "monitor"]):
+        return "ueberwachen"
+    if any(w in s for w in ["getestet", "testen", "test"]):
+        return "testen"
+    if any(w in s for w in ["geschuetzt", "schuetzen", "protect"]):
+        return "implementieren"
+    if any(w in s for w in ["verwaltet", "verwalten", "manage"]):
+        return "pflegen"
+    if any(w in s for w in ["gemeldet", "melden", "report"]):
+        return "melden"
+    return "implementieren"
+
+
+def _infer_object(statement: str) -> str:
+    """Infer the primary object from a statement (first noun phrase)."""
+    # Simple heuristic: take the text after "muessen"/"muss" up to the verb
+    m = re.search(
+        r"(?:muessen|muss|m(?:ü|ue)ssen)\s+(.+?)(?:\s+werden|\s+sein|\.|,|$)",
+        statement,
+        re.IGNORECASE,
+    )
+    if m:
+        return m.group(1).strip()[:80]
+    # Fallback: first 80 chars
+    return statement[:80] if statement else ""
+
+
+def _compute_decomposition_confidence(
+    framework_ref: Optional[str],
+    domain: Optional[str],
+    domain_data: Optional[dict],
+    num_subcontrols: int,
+    issues: list[str],
+) -> float:
+    """Compute confidence score for the decomposition."""
+    score = 0.3
+    if framework_ref:
+        score += 0.25
+    if domain:
+        score += 0.20
+    if domain_data:
+        score += 0.10
+    if 1 <= num_subcontrols <= 15:
+        score += 0.10
+    elif num_subcontrols > 15:
+        score += 0.05  # less confident with too many
+
+    # Penalize errors
+    errors = sum(1 for i in issues if i.startswith("ERROR:"))
+    score -= errors * 0.15
+    return round(max(min(score, 1.0), 0.0), 2)
+
+
+# ---------------------------------------------------------------------------
+# Registry statistics (for admin/debugging)
+# ---------------------------------------------------------------------------
+
+def registry_stats() -> dict:
+    """Return summary statistics about the loaded registry."""
+    reg = get_registry()
+    stats = {
+        "frameworks": len(reg),
+        "details": [],
+    }
+    total_domains = 0
+    total_subcontrols = 0
+    for fw_id, fw in reg.items():
+        domains = fw.get("domains", [])
+        n_sc = sum(len(d.get("subcontrols", [])) for d in domains)
+        total_domains += len(domains)
+        total_subcontrols += n_sc
+        stats["details"].append({
+            "framework_id": fw_id,
+            "display_name": fw.get("display_name", ""),
+            "domains": len(domains),
+            "subcontrols": n_sc,
+        })
+    stats["total_domains"] = total_domains
+    stats["total_subcontrols"] = total_subcontrols
+    return stats
@@ -173,6 +173,7 @@ class LLMProviderType(str, Enum):
    """Supported LLM provider types."""
    ANTHROPIC = "anthropic"
    SELF_HOSTED = "self_hosted"
+    OLLAMA = "ollama"  # Alias for self_hosted (Ollama-specific)
    MOCK = "mock"  # For testing


@@ -392,6 +393,7 @@ class SelfHostedProvider(LLMProvider):
                "model": self.model,
                "prompt": full_prompt,
                "stream": False,
+                "think": False,  # Disable thinking mode (qwen3.5 etc.)
                "options": {}
            }

@@ -549,7 +551,7 @@ def get_llm_config() -> LLMConfig:
            vault_path="breakpilot/api_keys/anthropic",
            env_var="ANTHROPIC_API_KEY"
        )
-    elif provider_type == LLMProviderType.SELF_HOSTED:
+    elif provider_type in (LLMProviderType.SELF_HOSTED, LLMProviderType.OLLAMA):
        api_key = get_secret_from_vault_or_env(
            vault_path="breakpilot/api_keys/self_hosted_llm",
            env_var="SELF_HOSTED_LLM_KEY"
@@ -558,7 +560,7 @@ def get_llm_config() -> LLMConfig:
    # Select model based on provider type
    if provider_type == LLMProviderType.ANTHROPIC:
        model = os.getenv("ANTHROPIC_MODEL", "claude-sonnet-4-20250514")
-    elif provider_type == LLMProviderType.SELF_HOSTED:
+    elif provider_type in (LLMProviderType.SELF_HOSTED, LLMProviderType.OLLAMA):
        model = os.getenv("SELF_HOSTED_LLM_MODEL", "qwen2.5:14b")
    else:
        model = "mock-model"
@@ -591,7 +593,7 @@ def get_llm_provider(config: Optional[LLMConfig] = None) -> LLMProvider:
            return MockProvider(config)
        return AnthropicProvider(config)

-    elif config.provider_type == LLMProviderType.SELF_HOSTED:
+    elif config.provider_type in (LLMProviderType.SELF_HOSTED, LLMProviderType.OLLAMA):
        if not config.base_url:
            logger.warning("No self-hosted LLM URL found, using mock provider")
            return MockProvider(config)
@@ -0,0 +1,59 @@
+"""Shared normative language patterns for assertion classification.
+
+Extracted from decomposition_pass.py for reuse in the assertion engine.
+"""
+
+import re
+
+_PFLICHT_SIGNALS = [
+    r"\bmüssen\b", r"\bmuss\b", r"\bhat\s+sicherzustellen\b",
+    r"\bhaben\s+sicherzustellen\b", r"\bsind\s+verpflichtet\b",
+    r"\bist\s+verpflichtet\b",
+    r"\bist\s+zu\s+\w+en\b", r"\bsind\s+zu\s+\w+en\b",
+    r"\bhat\s+zu\s+\w+en\b", r"\bhaben\s+zu\s+\w+en\b",
+    r"\bist\s+\w+zu\w+en\b", r"\bsind\s+\w+zu\w+en\b",
+    r"\bist\s+\w+\s+zu\s+\w+en\b", r"\bsind\s+\w+\s+zu\s+\w+en\b",
+    r"\bhat\s+\w+\s+zu\s+\w+en\b", r"\bhaben\s+\w+\s+zu\s+\w+en\b",
+    r"\bshall\b", r"\bmust\b", r"\brequired\b",
+    r"\b\w+zuteilen\b", r"\b\w+zuwenden\b", r"\b\w+zustellen\b", r"\b\w+zulegen\b",
+    r"\b\w+zunehmen\b", r"\b\w+zuführen\b", r"\b\w+zuhalten\b", r"\b\w+zusetzen\b",
+    r"\b\w+zuweisen\b", r"\b\w+zuordnen\b", r"\b\w+zufügen\b", r"\b\w+zugeben\b",
+    r"\bist\b.{1,80}\bzu\s+\w+en\b", r"\bsind\b.{1,80}\bzu\s+\w+en\b",
+]
+PFLICHT_RE = re.compile("|".join(_PFLICHT_SIGNALS), re.IGNORECASE)
+
+_EMPFEHLUNG_SIGNALS = [
+    r"\bsoll\b", r"\bsollen\b", r"\bsollte\b", r"\bsollten\b",
+    r"\bgewährleisten\b", r"\bsicherstellen\b",
+    r"\bshould\b", r"\bensure\b", r"\brecommend\w*\b",
+    r"\bnachweisen\b", r"\beinhalten\b", r"\bunterlassen\b", r"\bwahren\b",
+    r"\bdokumentieren\b", r"\bimplementieren\b", r"\büberprüfen\b", r"\büberwachen\b",
+    r"\bprüfen,\s+ob\b", r"\bkontrollieren,\s+ob\b",
+]
+EMPFEHLUNG_RE = re.compile("|".join(_EMPFEHLUNG_SIGNALS), re.IGNORECASE)
+
+_KANN_SIGNALS = [
+    r"\bkann\b", r"\bkönnen\b", r"\bdarf\b", r"\bdürfen\b",
+    r"\bmay\b", r"\boptional\b",
+]
+KANN_RE = re.compile("|".join(_KANN_SIGNALS), re.IGNORECASE)
+
+NORMATIVE_RE = re.compile(
+    "|".join(_PFLICHT_SIGNALS + _EMPFEHLUNG_SIGNALS + _KANN_SIGNALS),
+    re.IGNORECASE,
+)
+
+_RATIONALE_SIGNALS = [
+    r"\bda\s+", r"\bweil\b", r"\bgrund\b", r"\berwägung",
+    r"\bbecause\b", r"\breason\b", r"\brationale\b",
+    r"\bkönnen\s+.*\s+verursachen\b", r"\bführt\s+zu\b",
+]
+RATIONALE_RE = re.compile("|".join(_RATIONALE_SIGNALS), re.IGNORECASE)
+
+# Evidence-related keywords (for fact detection)
+_EVIDENCE_KEYWORDS = [
+    r"\bnachweis\b", r"\bzertifikat\b", r"\baudit.report\b",
+    r"\bprotokoll\b", r"\bdokumentation\b", r"\bbericht\b",
+    r"\bcertificate\b", r"\bevidence\b", r"\bproof\b",
+]
+EVIDENCE_RE = re.compile("|".join(_EVIDENCE_KEYWORDS), re.IGNORECASE)
@@ -0,0 +1,563 @@
+"""Obligation Extractor — 3-Tier Chunk-to-Obligation Linking.
+
+Maps RAG chunks to obligations from the v2 obligation framework using
+three tiers (fastest first):
+
+    Tier 1: EXACT MATCH  — regulation_code + article → obligation_id  (~40%)
+    Tier 2: EMBEDDING    — chunk text vs. obligation descriptions     (~30%)
+    Tier 3: LLM EXTRACT  — local Ollama extracts obligation text      (~25%)
+
+Part of the Multi-Layer Control Architecture (Phase 4 of 8).
+"""
+
+import json
+import logging
+import os
+import re
+from dataclasses import dataclass, field
+from pathlib import Path
+from typing import Optional
+
+import httpx
+
+logger = logging.getLogger(__name__)
+
+EMBEDDING_URL = os.getenv("EMBEDDING_URL", "http://embedding-service:8087")
+OLLAMA_URL = os.getenv("OLLAMA_URL", "http://host.docker.internal:11434")
+OLLAMA_MODEL = os.getenv("CONTROL_GEN_OLLAMA_MODEL", "qwen3.5:35b-a3b")
+LLM_TIMEOUT = float(os.getenv("CONTROL_GEN_LLM_TIMEOUT", "180"))
+
+# Embedding similarity thresholds for Tier 2
+EMBEDDING_MATCH_THRESHOLD = 0.80
+EMBEDDING_CANDIDATE_THRESHOLD = 0.60
+
+# ---------------------------------------------------------------------------
+# Regulation code mapping: RAG chunk codes → obligation file regulation IDs
+# ---------------------------------------------------------------------------
+
+_REGULATION_CODE_TO_ID = {
+    # DSGVO
+    "eu_2016_679": "dsgvo",
+    "dsgvo": "dsgvo",
+    "gdpr": "dsgvo",
+    # AI Act
+    "eu_2024_1689": "ai_act",
+    "ai_act": "ai_act",
+    "aiact": "ai_act",
+    # NIS2
+    "eu_2022_2555": "nis2",
+    "nis2": "nis2",
+    "bsig": "nis2",
+    # BDSG
+    "bdsg": "bdsg",
+    # TTDSG
+    "ttdsg": "ttdsg",
+    # DSA
+    "eu_2022_2065": "dsa",
+    "dsa": "dsa",
+    # Data Act
+    "eu_2023_2854": "data_act",
+    "data_act": "data_act",
+    # EU Machinery
+    "eu_2023_1230": "eu_machinery",
+    "eu_machinery": "eu_machinery",
+    # DORA
+    "eu_2022_2554": "dora",
+    "dora": "dora",
+}
+
+
+@dataclass
+class ObligationMatch:
+    """Result of obligation extraction."""
+
+    obligation_id: Optional[str] = None
+    obligation_title: Optional[str] = None
+    obligation_text: Optional[str] = None
+    method: str = "none"  # exact_match | embedding_match | llm_extracted | inferred
+    confidence: float = 0.0
+    regulation_id: Optional[str] = None  # e.g. "dsgvo"
+
+    def to_dict(self) -> dict:
+        return {
+            "obligation_id": self.obligation_id,
+            "obligation_title": self.obligation_title,
+            "obligation_text": self.obligation_text,
+            "method": self.method,
+            "confidence": self.confidence,
+            "regulation_id": self.regulation_id,
+        }
+
+
+@dataclass
+class _ObligationEntry:
+    """Internal representation of a loaded obligation."""
+
+    id: str
+    title: str
+    description: str
+    regulation_id: str
+    articles: list[str] = field(default_factory=list)  # normalized: ["art. 30", "§ 38"]
+    embedding: list[float] = field(default_factory=list)
+
+
+class ObligationExtractor:
+    """3-Tier obligation extraction from RAG chunks.
+
+    Usage::
+
+        extractor = ObligationExtractor()
+        await extractor.initialize()  # loads obligations + embeddings
+
+        match = await extractor.extract(
+            chunk_text="...",
+            regulation_code="eu_2016_679",
+            article="Art. 30",
+            paragraph="Abs. 1",
+        )
+    """
+
+    def __init__(self):
+        self._article_lookup: dict[str, list[str]] = {}  # "dsgvo/art. 30" → ["DSGVO-OBL-001"]
+        self._obligations: dict[str, _ObligationEntry] = {}  # id → entry
+        self._obligation_embeddings: list[list[float]] = []
+        self._obligation_ids: list[str] = []
+        self._initialized = False
+
+    async def initialize(self) -> None:
+        """Load all obligations from v2 JSON files and compute embeddings."""
+        if self._initialized:
+            return
+
+        self._load_obligations()
+        await self._compute_embeddings()
+        self._initialized = True
+        logger.info(
+            "ObligationExtractor initialized: %d obligations, %d article lookups, %d embeddings",
+            len(self._obligations),
+            len(self._article_lookup),
+            sum(1 for e in self._obligation_embeddings if e),
+        )
+
+    async def extract(
+        self,
+        chunk_text: str,
+        regulation_code: str,
+        article: Optional[str] = None,
+        paragraph: Optional[str] = None,
+    ) -> ObligationMatch:
+        """Extract obligation from a chunk using 3-tier strategy."""
+        if not self._initialized:
+            await self.initialize()
+
+        reg_id = _normalize_regulation(regulation_code)
+
+        # Tier 1: Exact match via article lookup
+        if article:
+            match = self._tier1_exact(reg_id, article)
+            if match:
+                return match
+
+        # Tier 2: Embedding similarity
+        match = await self._tier2_embedding(chunk_text, reg_id)
+        if match:
+            return match
+
+        # Tier 3: LLM extraction
+        match = await self._tier3_llm(chunk_text, regulation_code, article)
+        return match
+
+    # -----------------------------------------------------------------------
+    # Tier 1: Exact Match
+    # -----------------------------------------------------------------------
+
+    def _tier1_exact(self, reg_id: Optional[str], article: str) -> Optional[ObligationMatch]:
+        """Look up obligation by regulation + article."""
+        if not reg_id:
+            return None
+
+        norm_article = _normalize_article(article)
+        key = f"{reg_id}/{norm_article}"
+
+        obl_ids = self._article_lookup.get(key)
+        if not obl_ids:
+            return None
+
+        # Take the first match (highest priority)
+        obl_id = obl_ids[0]
+        entry = self._obligations.get(obl_id)
+        if not entry:
+            return None
+
+        return ObligationMatch(
+            obligation_id=entry.id,
+            obligation_title=entry.title,
+            obligation_text=entry.description,
+            method="exact_match",
+            confidence=1.0,
+            regulation_id=reg_id,
+        )
+
+    # -----------------------------------------------------------------------
+    # Tier 2: Embedding Match
+    # -----------------------------------------------------------------------
+
+    async def _tier2_embedding(
+        self, chunk_text: str, reg_id: Optional[str]
+    ) -> Optional[ObligationMatch]:
+        """Find nearest obligation by embedding similarity."""
+        if not self._obligation_embeddings:
+            return None
+
+        chunk_embedding = await _get_embedding(chunk_text[:2000])
+        if not chunk_embedding:
+            return None
+
+        best_idx = -1
+        best_score = 0.0
+
+        for i, obl_emb in enumerate(self._obligation_embeddings):
+            if not obl_emb:
+                continue
+            # Prefer same-regulation matches
+            obl_id = self._obligation_ids[i]
+            entry = self._obligations.get(obl_id)
+            score = _cosine_sim(chunk_embedding, obl_emb)
+
+            # Domain bonus: +0.05 if same regulation
+            if entry and reg_id and entry.regulation_id == reg_id:
+                score += 0.05
+
+            if score > best_score:
+                best_score = score
+                best_idx = i
+
+        if best_idx < 0:
+            return None
+
+        # Remove domain bonus for threshold comparison
+        raw_score = best_score
+        obl_id = self._obligation_ids[best_idx]
+        entry = self._obligations.get(obl_id)
+        if entry and reg_id and entry.regulation_id == reg_id:
+            raw_score -= 0.05
+
+        if raw_score >= EMBEDDING_MATCH_THRESHOLD:
+            return ObligationMatch(
+                obligation_id=entry.id if entry else obl_id,
+                obligation_title=entry.title if entry else None,
+                obligation_text=entry.description if entry else None,
+                method="embedding_match",
+                confidence=round(min(raw_score, 1.0), 3),
+                regulation_id=entry.regulation_id if entry else reg_id,
+            )
+
+        return None
+
+    # -----------------------------------------------------------------------
+    # Tier 3: LLM Extraction
+    # -----------------------------------------------------------------------
+
+    async def _tier3_llm(
+        self, chunk_text: str, regulation_code: str, article: Optional[str]
+    ) -> ObligationMatch:
+        """Use local LLM to extract the obligation from the chunk."""
+        prompt = f"""Analysiere den folgenden Gesetzestext und extrahiere die zentrale rechtliche Pflicht.
+
+Text:
+{chunk_text[:3000]}
+
+Quelle: {regulation_code} {article or ''}
+
+Antworte NUR als JSON:
+{{
+  "obligation_text": "Die zentrale Pflicht in einem Satz",
+  "actor": "Wer muss handeln (z.B. Verantwortlicher, Auftragsverarbeiter)",
+  "action": "Was muss getan werden",
+  "normative_strength": "muss|soll|kann"
+}}"""
+
+        system_prompt = (
+            "Du bist ein Rechtsexperte fuer EU-Datenschutz- und Digitalrecht. "
+            "Extrahiere die zentrale rechtliche Pflicht aus Gesetzestexten. "
+            "Antworte ausschliesslich als JSON."
+        )
+
+        result_text = await _llm_ollama(prompt, system_prompt)
+        if not result_text:
+            return ObligationMatch(
+                method="llm_extracted",
+                confidence=0.0,
+                regulation_id=_normalize_regulation(regulation_code),
+            )
+
+        parsed = _parse_json(result_text)
+        obligation_text = parsed.get("obligation_text", result_text[:500])
+
+        return ObligationMatch(
+            obligation_id=None,
+            obligation_title=None,
+            obligation_text=obligation_text,
+            method="llm_extracted",
+            confidence=0.60,
+            regulation_id=_normalize_regulation(regulation_code),
+        )
+
+    # -----------------------------------------------------------------------
+    # Initialization helpers
+    # -----------------------------------------------------------------------
+
+    def _load_obligations(self) -> None:
+        """Load all obligation files from v2 framework."""
+        v2_dir = _find_obligations_dir()
+        if not v2_dir:
+            logger.warning("Obligations v2 directory not found — Tier 1 disabled")
+            return
+
+        manifest_path = v2_dir / "_manifest.json"
+        if not manifest_path.exists():
+            logger.warning("Manifest not found at %s", manifest_path)
+            return
+
+        with open(manifest_path) as f:
+            manifest = json.load(f)
+
+        for reg_info in manifest.get("regulations", []):
+            reg_id = reg_info["id"]
+            reg_file = v2_dir / reg_info["file"]
+            if not reg_file.exists():
+                logger.warning("Regulation file not found: %s", reg_file)
+                continue
+
+            with open(reg_file) as f:
+                data = json.load(f)
+
+            for obl in data.get("obligations", []):
+                obl_id = obl["id"]
+                entry = _ObligationEntry(
+                    id=obl_id,
+                    title=obl.get("title", ""),
+                    description=obl.get("description", ""),
+                    regulation_id=reg_id,
+                )
+
+                # Build article lookup from legal_basis
+                for basis in obl.get("legal_basis", []):
+                    article_raw = basis.get("article", "")
+                    if article_raw:
+                        norm_art = _normalize_article(article_raw)
+                        key = f"{reg_id}/{norm_art}"
+                        if key not in self._article_lookup:
+                            self._article_lookup[key] = []
+                        self._article_lookup[key].append(obl_id)
+                        entry.articles.append(norm_art)
+
+                self._obligations[obl_id] = entry
+
+        logger.info(
+            "Loaded %d obligations from %d regulations",
+            len(self._obligations),
+            len(manifest.get("regulations", [])),
+        )
+
+    async def _compute_embeddings(self) -> None:
+        """Compute embeddings for all obligation descriptions."""
+        if not self._obligations:
+            return
+
+        self._obligation_ids = list(self._obligations.keys())
+        texts = [
+            f"{self._obligations[oid].title}: {self._obligations[oid].description}"
+            for oid in self._obligation_ids
+        ]
+
+        logger.info("Computing embeddings for %d obligations...", len(texts))
+        self._obligation_embeddings = await _get_embeddings_batch(texts)
+        valid = sum(1 for e in self._obligation_embeddings if e)
+        logger.info("Got %d/%d valid embeddings", valid, len(texts))
+
+    # -----------------------------------------------------------------------
+    # Stats
+    # -----------------------------------------------------------------------
+
+    def stats(self) -> dict:
+        """Return initialization statistics."""
+        return {
+            "total_obligations": len(self._obligations),
+            "article_lookups": len(self._article_lookup),
+            "embeddings_valid": sum(1 for e in self._obligation_embeddings if e),
+            "regulations": list(
+                {e.regulation_id for e in self._obligations.values()}
+            ),
+            "initialized": self._initialized,
+        }
+
+
+# ---------------------------------------------------------------------------
+# Module-level helpers (reusable by other modules)
+# ---------------------------------------------------------------------------
+
+
+def _normalize_regulation(regulation_code: str) -> Optional[str]:
+    """Map a RAG regulation_code to obligation framework regulation ID."""
+    if not regulation_code:
+        return None
+    code = regulation_code.lower().strip()
+
+    # Direct lookup
+    if code in _REGULATION_CODE_TO_ID:
+        return _REGULATION_CODE_TO_ID[code]
+
+    # Prefix matching for families
+    for prefix, reg_id in [
+        ("eu_2016_679", "dsgvo"),
+        ("eu_2024_1689", "ai_act"),
+        ("eu_2022_2555", "nis2"),
+        ("eu_2022_2065", "dsa"),
+        ("eu_2023_2854", "data_act"),
+        ("eu_2023_1230", "eu_machinery"),
+        ("eu_2022_2554", "dora"),
+    ]:
+        if code.startswith(prefix):
+            return reg_id
+
+    return None
+
+
+def _normalize_article(article: str) -> str:
+    """Normalize article references for consistent lookup.
+
+    Examples:
+        "Art. 30"       → "art. 30"
+        "§ 38 BDSG"     → "§ 38"
+        "Article 10"    → "art. 10"
+        "Art. 30 Abs. 1" → "art. 30"
+        "Artikel 35"    → "art. 35"
+    """
+    if not article:
+        return ""
+    s = article.strip()
+
+    # Remove trailing law name: "§ 38 BDSG" → "§ 38"
+    s = re.sub(r"\s+(DSGVO|BDSG|TTDSG|DSA|NIS2|DORA|AI.?Act)\s*$", "", s, flags=re.IGNORECASE)
+
+    # Remove paragraph references: "Art. 30 Abs. 1" → "Art. 30"
+    s = re.sub(r"\s+(Abs|Absatz|para|paragraph|lit|Satz)\.?\s+.*$", "", s, flags=re.IGNORECASE)
+
+    # Normalize "Article" / "Artikel" → "Art."
+    s = re.sub(r"^(Article|Artikel)\s+", "Art. ", s, flags=re.IGNORECASE)
+
+    return s.lower().strip()
+
+
+def _cosine_sim(a: list[float], b: list[float]) -> float:
+    """Compute cosine similarity between two vectors."""
+    if not a or not b or len(a) != len(b):
+        return 0.0
+    dot = sum(x * y for x, y in zip(a, b))
+    norm_a = sum(x * x for x in a) ** 0.5
+    norm_b = sum(x * x for x in b) ** 0.5
+    if norm_a == 0 or norm_b == 0:
+        return 0.0
+    return dot / (norm_a * norm_b)
+
+
+def _find_obligations_dir() -> Optional[Path]:
+    """Locate the obligations v2 directory."""
+    candidates = [
+        Path(__file__).resolve().parent.parent.parent.parent
+        / "ai-compliance-sdk" / "policies" / "obligations" / "v2",
+        Path("/app/ai-compliance-sdk/policies/obligations/v2"),
+        Path("ai-compliance-sdk/policies/obligations/v2"),
+    ]
+    for p in candidates:
+        if p.is_dir() and (p / "_manifest.json").exists():
+            return p
+    return None
+
+
+async def _get_embedding(text: str) -> list[float]:
+    """Get embedding vector for a single text."""
+    try:
+        async with httpx.AsyncClient(timeout=10.0) as client:
+            resp = await client.post(
+                f"{EMBEDDING_URL}/embed",
+                json={"texts": [text]},
+            )
+            resp.raise_for_status()
+            embeddings = resp.json().get("embeddings", [])
+            return embeddings[0] if embeddings else []
+    except Exception:
+        return []
+
+
+async def _get_embeddings_batch(
+    texts: list[str], batch_size: int = 32
+) -> list[list[float]]:
+    """Get embeddings for multiple texts in batches."""
+    all_embeddings: list[list[float]] = []
+    for i in range(0, len(texts), batch_size):
+        batch = texts[i : i + batch_size]
+        try:
+            async with httpx.AsyncClient(timeout=30.0) as client:
+                resp = await client.post(
+                    f"{EMBEDDING_URL}/embed",
+                    json={"texts": batch},
+                )
+                resp.raise_for_status()
+                embeddings = resp.json().get("embeddings", [])
+                all_embeddings.extend(embeddings)
+        except Exception as e:
+            logger.warning("Batch embedding failed for %d texts: %s", len(batch), e)
+            all_embeddings.extend([[] for _ in batch])
+    return all_embeddings
+
+
+async def _llm_ollama(prompt: str, system_prompt: Optional[str] = None) -> str:
+    """Call local Ollama for LLM extraction."""
+    messages = []
+    if system_prompt:
+        messages.append({"role": "system", "content": system_prompt})
+    messages.append({"role": "user", "content": prompt})
+
+    payload = {
+        "model": OLLAMA_MODEL,
+        "messages": messages,
+        "stream": False,
+        "format": "json",
+        "options": {"num_predict": 512},
+        "think": False,
+    }
+
+    try:
+        async with httpx.AsyncClient(timeout=LLM_TIMEOUT) as client:
+            resp = await client.post(f"{OLLAMA_URL}/api/chat", json=payload)
+            if resp.status_code != 200:
+                logger.error(
+                    "Ollama chat failed %d: %s", resp.status_code, resp.text[:300]
+                )
+                return ""
+            data = resp.json()
+            return data.get("message", {}).get("content", "")
+    except Exception as e:
+        logger.warning("Ollama call failed: %s", e)
+        return ""
+
+
+def _parse_json(text: str) -> dict:
+    """Extract JSON from LLM response text."""
+    # Try direct parse
+    try:
+        return json.loads(text)
+    except json.JSONDecodeError:
+        pass
+
+    # Try extracting JSON block
+    match = re.search(r"\{[^{}]*\}", text, re.DOTALL)
+    if match:
+        try:
+            return json.loads(match.group())
+        except json.JSONDecodeError:
+            pass
+
+    return {}
@@ -0,0 +1,532 @@
+"""Pattern Matcher — Obligation-to-Control-Pattern Linking.
+
+Maps obligations (from the ObligationExtractor) to control patterns
+using two tiers:
+
+    Tier 1: KEYWORD MATCH  — obligation_match_keywords from patterns  (~70%)
+    Tier 2: EMBEDDING      — cosine similarity with domain bonus      (~25%)
+
+Part of the Multi-Layer Control Architecture (Phase 5 of 8).
+"""
+
+import logging
+import os
+from dataclasses import dataclass, field
+from pathlib import Path
+from typing import Optional
+
+import yaml
+
+from compliance.services.obligation_extractor import (
+    _cosine_sim,
+    _get_embedding,
+    _get_embeddings_batch,
+)
+
+logger = logging.getLogger(__name__)
+
+# Minimum keyword score to accept a match (at least 2 keyword hits)
+KEYWORD_MATCH_MIN_HITS = 2
+# Embedding threshold for Tier 2
+EMBEDDING_PATTERN_THRESHOLD = 0.75
+# Domain bonus when regulation maps to the pattern's domain
+DOMAIN_BONUS = 0.10
+
+# Map regulation IDs to pattern domains that are likely relevant
+_REGULATION_DOMAIN_AFFINITY = {
+    "dsgvo": ["DATA", "COMP", "GOV"],
+    "bdsg": ["DATA", "COMP"],
+    "ttdsg": ["DATA"],
+    "ai_act": ["AI", "COMP", "DATA"],
+    "nis2": ["SEC", "INC", "NET", "LOG", "CRYP"],
+    "dsa": ["DATA", "COMP"],
+    "data_act": ["DATA", "COMP"],
+    "eu_machinery": ["SEC", "COMP"],
+    "dora": ["SEC", "INC", "FIN", "COMP"],
+}
+
+
+@dataclass
+class ControlPattern:
+    """Python representation of a control pattern from YAML."""
+
+    id: str
+    name: str
+    name_de: str
+    domain: str
+    category: str
+    description: str
+    objective_template: str
+    rationale_template: str
+    requirements_template: list[str] = field(default_factory=list)
+    test_procedure_template: list[str] = field(default_factory=list)
+    evidence_template: list[str] = field(default_factory=list)
+    severity_default: str = "medium"
+    implementation_effort_default: str = "m"
+    obligation_match_keywords: list[str] = field(default_factory=list)
+    tags: list[str] = field(default_factory=list)
+    composable_with: list[str] = field(default_factory=list)
+    open_anchor_refs: list[dict] = field(default_factory=list)
+
+
+@dataclass
+class PatternMatchResult:
+    """Result of pattern matching."""
+
+    pattern: Optional[ControlPattern] = None
+    pattern_id: Optional[str] = None
+    method: str = "none"  # keyword | embedding | combined | none
+    confidence: float = 0.0
+    keyword_hits: int = 0
+    total_keywords: int = 0
+    embedding_score: float = 0.0
+    domain_bonus_applied: bool = False
+    composable_patterns: list[str] = field(default_factory=list)
+
+    def to_dict(self) -> dict:
+        return {
+            "pattern_id": self.pattern_id,
+            "method": self.method,
+            "confidence": round(self.confidence, 3),
+            "keyword_hits": self.keyword_hits,
+            "total_keywords": self.total_keywords,
+            "embedding_score": round(self.embedding_score, 3),
+            "domain_bonus_applied": self.domain_bonus_applied,
+            "composable_patterns": self.composable_patterns,
+        }
+
+
+class PatternMatcher:
+    """Links obligations to control patterns using keyword + embedding matching.
+
+    Usage::
+
+        matcher = PatternMatcher()
+        await matcher.initialize()
+
+        result = await matcher.match(
+            obligation_text="Fuehrung eines Verarbeitungsverzeichnisses...",
+            regulation_id="dsgvo",
+        )
+        print(result.pattern_id)   # e.g. "CP-COMP-001"
+        print(result.confidence)   # e.g. 0.85
+    """
+
+    def __init__(self):
+        self._patterns: list[ControlPattern] = []
+        self._by_id: dict[str, ControlPattern] = {}
+        self._by_domain: dict[str, list[ControlPattern]] = {}
+        self._keyword_index: dict[str, list[str]] = {}  # keyword → [pattern_ids]
+        self._pattern_embeddings: list[list[float]] = []
+        self._pattern_ids: list[str] = []
+        self._initialized = False
+
+    async def initialize(self) -> None:
+        """Load patterns from YAML and compute embeddings."""
+        if self._initialized:
+            return
+
+        self._load_patterns()
+        self._build_keyword_index()
+        await self._compute_embeddings()
+        self._initialized = True
+        logger.info(
+            "PatternMatcher initialized: %d patterns, %d keywords, %d embeddings",
+            len(self._patterns),
+            len(self._keyword_index),
+            sum(1 for e in self._pattern_embeddings if e),
+        )
+
+    async def match(
+        self,
+        obligation_text: str,
+        regulation_id: Optional[str] = None,
+        top_n: int = 1,
+    ) -> PatternMatchResult:
+        """Match obligation text to the best control pattern.
+
+        Args:
+            obligation_text: The obligation description to match against.
+            regulation_id: Source regulation (for domain bonus).
+            top_n: Number of top results to consider for composability.
+
+        Returns:
+            PatternMatchResult with the best match.
+        """
+        if not self._initialized:
+            await self.initialize()
+
+        if not obligation_text or not self._patterns:
+            return PatternMatchResult()
+
+        # Tier 1: Keyword matching
+        keyword_result = self._tier1_keyword(obligation_text, regulation_id)
+
+        # Tier 2: Embedding matching
+        embedding_result = await self._tier2_embedding(obligation_text, regulation_id)
+
+        # Combine scores: prefer keyword match, boost with embedding if available
+        best = self._combine_results(keyword_result, embedding_result)
+
+        # Attach composable patterns
+        if best.pattern:
+            best.composable_patterns = [
+                pid for pid in best.pattern.composable_with
+                if pid in self._by_id
+            ]
+
+        return best
+
+    async def match_top_n(
+        self,
+        obligation_text: str,
+        regulation_id: Optional[str] = None,
+        n: int = 3,
+    ) -> list[PatternMatchResult]:
+        """Return top-N pattern matches sorted by confidence descending."""
+        if not self._initialized:
+            await self.initialize()
+
+        if not obligation_text or not self._patterns:
+            return []
+
+        keyword_scores = self._keyword_scores(obligation_text, regulation_id)
+        embedding_scores = await self._embedding_scores(obligation_text, regulation_id)
+
+        # Merge scores
+        all_pattern_ids = set(keyword_scores.keys()) | set(embedding_scores.keys())
+        results: list[PatternMatchResult] = []
+
+        for pid in all_pattern_ids:
+            pattern = self._by_id.get(pid)
+            if not pattern:
+                continue
+
+            kw_score = keyword_scores.get(pid, (0, 0, 0.0))  # (hits, total, score)
+            emb_score = embedding_scores.get(pid, (0.0, False))  # (score, bonus_applied)
+
+            kw_hits, kw_total, kw_confidence = kw_score
+            emb_confidence, bonus_applied = emb_score
+
+            # Combined confidence: max of keyword and embedding, with boost if both
+            if kw_confidence > 0 and emb_confidence > 0:
+                combined = max(kw_confidence, emb_confidence) + 0.05
+                method = "combined"
+            elif kw_confidence > 0:
+                combined = kw_confidence
+                method = "keyword"
+            else:
+                combined = emb_confidence
+                method = "embedding"
+
+            results.append(PatternMatchResult(
+                pattern=pattern,
+                pattern_id=pid,
+                method=method,
+                confidence=min(combined, 1.0),
+                keyword_hits=kw_hits,
+                total_keywords=kw_total,
+                embedding_score=emb_confidence,
+                domain_bonus_applied=bonus_applied,
+                composable_patterns=[
+                    p for p in pattern.composable_with if p in self._by_id
+                ],
+            ))
+
+        # Sort by confidence descending
+        results.sort(key=lambda r: r.confidence, reverse=True)
+        return results[:n]
+
+    # -----------------------------------------------------------------------
+    # Tier 1: Keyword Match
+    # -----------------------------------------------------------------------
+
+    def _tier1_keyword(
+        self, obligation_text: str, regulation_id: Optional[str]
+    ) -> Optional[PatternMatchResult]:
+        """Match by counting keyword hits in the obligation text."""
+        scores = self._keyword_scores(obligation_text, regulation_id)
+        if not scores:
+            return None
+
+        # Find best match
+        best_pid = max(scores, key=lambda pid: scores[pid][2])
+        hits, total, confidence = scores[best_pid]
+
+        if hits < KEYWORD_MATCH_MIN_HITS:
+            return None
+
+        pattern = self._by_id.get(best_pid)
+        if not pattern:
+            return None
+
+        # Check domain bonus
+        bonus_applied = False
+        if regulation_id and self._domain_matches(pattern.domain, regulation_id):
+            confidence = min(confidence + DOMAIN_BONUS, 1.0)
+            bonus_applied = True
+
+        return PatternMatchResult(
+            pattern=pattern,
+            pattern_id=best_pid,
+            method="keyword",
+            confidence=confidence,
+            keyword_hits=hits,
+            total_keywords=total,
+            domain_bonus_applied=bonus_applied,
+        )
+
+    def _keyword_scores(
+        self, text: str, regulation_id: Optional[str]
+    ) -> dict[str, tuple[int, int, float]]:
+        """Compute keyword match scores for all patterns.
+
+        Returns dict: pattern_id → (hits, total_keywords, confidence).
+        """
+        text_lower = text.lower()
+        hits_by_pattern: dict[str, int] = {}
+
+        for keyword, pattern_ids in self._keyword_index.items():
+            if keyword in text_lower:
+                for pid in pattern_ids:
+                    hits_by_pattern[pid] = hits_by_pattern.get(pid, 0) + 1
+
+        result: dict[str, tuple[int, int, float]] = {}
+        for pid, hits in hits_by_pattern.items():
+            pattern = self._by_id.get(pid)
+            if not pattern:
+                continue
+            total = len(pattern.obligation_match_keywords)
+            confidence = hits / total if total > 0 else 0.0
+            result[pid] = (hits, total, confidence)
+
+        return result
+
+    # -----------------------------------------------------------------------
+    # Tier 2: Embedding Match
+    # -----------------------------------------------------------------------
+
+    async def _tier2_embedding(
+        self, obligation_text: str, regulation_id: Optional[str]
+    ) -> Optional[PatternMatchResult]:
+        """Match by embedding similarity against pattern objective_templates."""
+        scores = await self._embedding_scores(obligation_text, regulation_id)
+        if not scores:
+            return None
+
+        best_pid = max(scores, key=lambda pid: scores[pid][0])
+        emb_score, bonus_applied = scores[best_pid]
+
+        if emb_score < EMBEDDING_PATTERN_THRESHOLD:
+            return None
+
+        pattern = self._by_id.get(best_pid)
+        if not pattern:
+            return None
+
+        return PatternMatchResult(
+            pattern=pattern,
+            pattern_id=best_pid,
+            method="embedding",
+            confidence=min(emb_score, 1.0),
+            embedding_score=emb_score,
+            domain_bonus_applied=bonus_applied,
+        )
+
+    async def _embedding_scores(
+        self, obligation_text: str, regulation_id: Optional[str]
+    ) -> dict[str, tuple[float, bool]]:
+        """Compute embedding similarity scores for all patterns.
+
+        Returns dict: pattern_id → (score, domain_bonus_applied).
+        """
+        if not self._pattern_embeddings:
+            return {}
+
+        chunk_embedding = await _get_embedding(obligation_text[:2000])
+        if not chunk_embedding:
+            return {}
+
+        result: dict[str, tuple[float, bool]] = {}
+        for i, pat_emb in enumerate(self._pattern_embeddings):
+            if not pat_emb:
+                continue
+            pid = self._pattern_ids[i]
+            pattern = self._by_id.get(pid)
+            if not pattern:
+                continue
+
+            score = _cosine_sim(chunk_embedding, pat_emb)
+
+            # Domain bonus
+            bonus_applied = False
+            if regulation_id and self._domain_matches(pattern.domain, regulation_id):
+                score += DOMAIN_BONUS
+                bonus_applied = True
+
+            result[pid] = (score, bonus_applied)
+
+        return result
+
+    # -----------------------------------------------------------------------
+    # Score combination
+    # -----------------------------------------------------------------------
+
+    def _combine_results(
+        self,
+        keyword_result: Optional[PatternMatchResult],
+        embedding_result: Optional[PatternMatchResult],
+    ) -> PatternMatchResult:
+        """Combine keyword and embedding results into the best match."""
+        if not keyword_result and not embedding_result:
+            return PatternMatchResult()
+
+        if not keyword_result:
+            return embedding_result
+        if not embedding_result:
+            return keyword_result
+
+        # Both matched — check if they agree
+        if keyword_result.pattern_id == embedding_result.pattern_id:
+            # Same pattern: boost confidence
+            combined_confidence = min(
+                max(keyword_result.confidence, embedding_result.confidence) + 0.05,
+                1.0,
+            )
+            return PatternMatchResult(
+                pattern=keyword_result.pattern,
+                pattern_id=keyword_result.pattern_id,
+                method="combined",
+                confidence=combined_confidence,
+                keyword_hits=keyword_result.keyword_hits,
+                total_keywords=keyword_result.total_keywords,
+                embedding_score=embedding_result.embedding_score,
+                domain_bonus_applied=(
+                    keyword_result.domain_bonus_applied
+                    or embedding_result.domain_bonus_applied
+                ),
+            )
+
+        # Different patterns: pick the one with higher confidence
+        if keyword_result.confidence >= embedding_result.confidence:
+            return keyword_result
+        return embedding_result
+
+    # -----------------------------------------------------------------------
+    # Domain affinity
+    # -----------------------------------------------------------------------
+
+    @staticmethod
+    def _domain_matches(pattern_domain: str, regulation_id: str) -> bool:
+        """Check if a pattern's domain has affinity with a regulation."""
+        affine_domains = _REGULATION_DOMAIN_AFFINITY.get(regulation_id, [])
+        return pattern_domain in affine_domains
+
+    # -----------------------------------------------------------------------
+    # Initialization helpers
+    # -----------------------------------------------------------------------
+
+    def _load_patterns(self) -> None:
+        """Load control patterns from YAML files."""
+        patterns_dir = _find_patterns_dir()
+        if not patterns_dir:
+            logger.warning("Control patterns directory not found")
+            return
+
+        for yaml_file in sorted(patterns_dir.glob("*.yaml")):
+            if yaml_file.name.startswith("_"):
+                continue
+            try:
+                with open(yaml_file) as f:
+                    data = yaml.safe_load(f)
+                if not data or "patterns" not in data:
+                    continue
+                for p in data["patterns"]:
+                    pattern = ControlPattern(
+                        id=p["id"],
+                        name=p["name"],
+                        name_de=p["name_de"],
+                        domain=p["domain"],
+                        category=p["category"],
+                        description=p["description"],
+                        objective_template=p["objective_template"],
+                        rationale_template=p["rationale_template"],
+                        requirements_template=p.get("requirements_template", []),
+                        test_procedure_template=p.get("test_procedure_template", []),
+                        evidence_template=p.get("evidence_template", []),
+                        severity_default=p.get("severity_default", "medium"),
+                        implementation_effort_default=p.get("implementation_effort_default", "m"),
+                        obligation_match_keywords=p.get("obligation_match_keywords", []),
+                        tags=p.get("tags", []),
+                        composable_with=p.get("composable_with", []),
+                        open_anchor_refs=p.get("open_anchor_refs", []),
+                    )
+                    self._patterns.append(pattern)
+                    self._by_id[pattern.id] = pattern
+                    domain_list = self._by_domain.setdefault(pattern.domain, [])
+                    domain_list.append(pattern)
+            except Exception as e:
+                logger.error("Failed to load %s: %s", yaml_file.name, e)
+
+        logger.info("Loaded %d patterns from %s", len(self._patterns), patterns_dir)
+
+    def _build_keyword_index(self) -> None:
+        """Build reverse index: keyword → [pattern_ids]."""
+        for pattern in self._patterns:
+            for kw in pattern.obligation_match_keywords:
+                lower_kw = kw.lower()
+                if lower_kw not in self._keyword_index:
+                    self._keyword_index[lower_kw] = []
+                self._keyword_index[lower_kw].append(pattern.id)
+
+    async def _compute_embeddings(self) -> None:
+        """Compute embeddings for all pattern objective templates."""
+        if not self._patterns:
+            return
+
+        self._pattern_ids = [p.id for p in self._patterns]
+        texts = [
+            f"{p.name_de}: {p.objective_template}"
+            for p in self._patterns
+        ]
+
+        logger.info("Computing embeddings for %d patterns...", len(texts))
+        self._pattern_embeddings = await _get_embeddings_batch(texts)
+        valid = sum(1 for e in self._pattern_embeddings if e)
+        logger.info("Got %d/%d valid pattern embeddings", valid, len(texts))
+
+    # -----------------------------------------------------------------------
+    # Public helpers
+    # -----------------------------------------------------------------------
+
+    def get_pattern(self, pattern_id: str) -> Optional[ControlPattern]:
+        """Get a pattern by its ID."""
+        return self._by_id.get(pattern_id.upper())
+
+    def get_patterns_by_domain(self, domain: str) -> list[ControlPattern]:
+        """Get all patterns for a domain."""
+        return self._by_domain.get(domain.upper(), [])
+
+    def stats(self) -> dict:
+        """Return matcher statistics."""
+        return {
+            "total_patterns": len(self._patterns),
+            "domains": list(self._by_domain.keys()),
+            "keywords": len(self._keyword_index),
+            "embeddings_valid": sum(1 for e in self._pattern_embeddings if e),
+            "initialized": self._initialized,
+        }
+
+
+def _find_patterns_dir() -> Optional[Path]:
+    """Locate the control_patterns directory."""
+    candidates = [
+        Path(__file__).resolve().parent.parent.parent.parent
+        / "ai-compliance-sdk" / "policies" / "control_patterns",
+        Path("/app/ai-compliance-sdk/policies/control_patterns"),
+        Path("ai-compliance-sdk/policies/control_patterns"),
+    ]
+    for p in candidates:
+        if p.is_dir():
+            return p
+    return None
@@ -0,0 +1,670 @@
+"""Pipeline Adapter — New 10-Stage Pipeline Integration.
+
+Bridges the existing 7-stage control_generator pipeline with the new
+multi-layer components (ObligationExtractor, PatternMatcher, ControlComposer).
+
+New pipeline flow:
+    chunk → license_classify
+          → obligation_extract (Stage 4 — NEW)
+          → pattern_match      (Stage 5 — NEW)
+          → control_compose    (Stage 6 — replaces old Stage 3)
+          → harmonize → anchor → store + crosswalk → mark processed
+
+Can be used in two modes:
+    1. INLINE: Called from _process_batch() to enrich the pipeline
+    2. STANDALONE: Process chunks directly through new stages
+
+Part of the Multi-Layer Control Architecture (Phase 7 of 8).
+"""
+
+import hashlib
+import json
+import logging
+from dataclasses import dataclass, field
+from typing import Optional
+
+from sqlalchemy import text
+from sqlalchemy.orm import Session
+
+from compliance.services.control_composer import ComposedControl, ControlComposer
+from compliance.services.obligation_extractor import ObligationExtractor, ObligationMatch
+from compliance.services.pattern_matcher import PatternMatcher, PatternMatchResult
+
+logger = logging.getLogger(__name__)
+
+
+@dataclass
+class PipelineChunk:
+    """Input chunk for the new pipeline stages."""
+
+    text: str
+    collection: str = ""
+    regulation_code: str = ""
+    article: Optional[str] = None
+    paragraph: Optional[str] = None
+    license_rule: int = 3
+    license_info: dict = field(default_factory=dict)
+    source_citation: Optional[dict] = None
+    chunk_hash: str = ""
+
+    def compute_hash(self) -> str:
+        if not self.chunk_hash:
+            self.chunk_hash = hashlib.sha256(self.text.encode()).hexdigest()
+        return self.chunk_hash
+
+
+@dataclass
+class PipelineResult:
+    """Result of processing a chunk through the new pipeline."""
+
+    chunk: PipelineChunk
+    obligation: ObligationMatch = field(default_factory=ObligationMatch)
+    pattern_result: PatternMatchResult = field(default_factory=PatternMatchResult)
+    control: Optional[ComposedControl] = None
+    crosswalk_written: bool = False
+    error: Optional[str] = None
+
+    def to_dict(self) -> dict:
+        return {
+            "chunk_hash": self.chunk.chunk_hash,
+            "obligation": self.obligation.to_dict() if self.obligation else None,
+            "pattern": self.pattern_result.to_dict() if self.pattern_result else None,
+            "control": self.control.to_dict() if self.control else None,
+            "crosswalk_written": self.crosswalk_written,
+            "error": self.error,
+        }
+
+
+class PipelineAdapter:
+    """Integrates ObligationExtractor + PatternMatcher + ControlComposer.
+
+    Usage::
+
+        adapter = PipelineAdapter(db)
+        await adapter.initialize()
+
+        result = await adapter.process_chunk(PipelineChunk(
+            text="...",
+            regulation_code="eu_2016_679",
+            article="Art. 30",
+            license_rule=1,
+        ))
+    """
+
+    def __init__(self, db: Optional[Session] = None):
+        self.db = db
+        self._extractor = ObligationExtractor()
+        self._matcher = PatternMatcher()
+        self._composer = ControlComposer()
+        self._initialized = False
+
+    async def initialize(self) -> None:
+        """Initialize all sub-components."""
+        if self._initialized:
+            return
+        await self._extractor.initialize()
+        await self._matcher.initialize()
+        self._initialized = True
+        logger.info("PipelineAdapter initialized")
+
+    async def process_chunk(self, chunk: PipelineChunk) -> PipelineResult:
+        """Process a single chunk through the new 3-stage pipeline.
+
+        Stage 4: Obligation Extract
+        Stage 5: Pattern Match
+        Stage 6: Control Compose
+        """
+        if not self._initialized:
+            await self.initialize()
+
+        chunk.compute_hash()
+        result = PipelineResult(chunk=chunk)
+
+        try:
+            # Stage 4: Obligation Extract
+            result.obligation = await self._extractor.extract(
+                chunk_text=chunk.text,
+                regulation_code=chunk.regulation_code,
+                article=chunk.article,
+                paragraph=chunk.paragraph,
+            )
+
+            # Stage 5: Pattern Match
+            obligation_text = (
+                result.obligation.obligation_text
+                or result.obligation.obligation_title
+                or chunk.text[:500]
+            )
+            result.pattern_result = await self._matcher.match(
+                obligation_text=obligation_text,
+                regulation_id=result.obligation.regulation_id,
+            )
+
+            # Stage 6: Control Compose
+            result.control = await self._composer.compose(
+                obligation=result.obligation,
+                pattern_result=result.pattern_result,
+                chunk_text=chunk.text if chunk.license_rule in (1, 2) else None,
+                license_rule=chunk.license_rule,
+                source_citation=chunk.source_citation,
+                regulation_code=chunk.regulation_code,
+            )
+
+        except Exception as e:
+            logger.error("Pipeline processing failed: %s", e)
+            result.error = str(e)
+
+        return result
+
+    async def process_batch(self, chunks: list[PipelineChunk]) -> list[PipelineResult]:
+        """Process multiple chunks through the pipeline."""
+        results = []
+        for chunk in chunks:
+            result = await self.process_chunk(chunk)
+            results.append(result)
+        return results
+
+    def write_crosswalk(self, result: PipelineResult, control_uuid: str) -> bool:
+        """Write obligation_extraction + crosswalk_matrix rows for a processed chunk.
+
+        Called AFTER the control is stored in canonical_controls.
+        """
+        if not self.db or not result.control:
+            return False
+
+        chunk = result.chunk
+        obligation = result.obligation
+        pattern = result.pattern_result
+
+        try:
+            # 1. Write obligation_extraction row
+            self.db.execute(
+                text("""
+                    INSERT INTO obligation_extractions (
+                        chunk_hash, collection, regulation_code,
+                        article, paragraph, obligation_id,
+                        obligation_text, confidence, extraction_method,
+                        pattern_id, pattern_match_score, control_uuid
+                    ) VALUES (
+                        :chunk_hash, :collection, :regulation_code,
+                        :article, :paragraph, :obligation_id,
+                        :obligation_text, :confidence, :extraction_method,
+                        :pattern_id, :pattern_match_score,
+                        CAST(:control_uuid AS uuid)
+                    )
+                """),
+                {
+                    "chunk_hash": chunk.chunk_hash,
+                    "collection": chunk.collection,
+                    "regulation_code": chunk.regulation_code,
+                    "article": chunk.article,
+                    "paragraph": chunk.paragraph,
+                    "obligation_id": obligation.obligation_id if obligation else None,
+                    "obligation_text": (
+                        obligation.obligation_text[:2000]
+                        if obligation and obligation.obligation_text
+                        else None
+                    ),
+                    "confidence": obligation.confidence if obligation else 0,
+                    "extraction_method": obligation.method if obligation else "none",
+                    "pattern_id": pattern.pattern_id if pattern else None,
+                    "pattern_match_score": pattern.confidence if pattern else 0,
+                    "control_uuid": control_uuid,
+                },
+            )
+
+            # 2. Write crosswalk_matrix row
+            self.db.execute(
+                text("""
+                    INSERT INTO crosswalk_matrix (
+                        regulation_code, article, paragraph,
+                        obligation_id, pattern_id,
+                        master_control_id, master_control_uuid,
+                        confidence, source
+                    ) VALUES (
+                        :regulation_code, :article, :paragraph,
+                        :obligation_id, :pattern_id,
+                        :master_control_id,
+                        CAST(:master_control_uuid AS uuid),
+                        :confidence, :source
+                    )
+                """),
+                {
+                    "regulation_code": chunk.regulation_code,
+                    "article": chunk.article,
+                    "paragraph": chunk.paragraph,
+                    "obligation_id": obligation.obligation_id if obligation else None,
+                    "pattern_id": pattern.pattern_id if pattern else None,
+                    "master_control_id": result.control.control_id,
+                    "master_control_uuid": control_uuid,
+                    "confidence": min(
+                        obligation.confidence if obligation else 0,
+                        pattern.confidence if pattern else 0,
+                    ),
+                    "source": "auto",
+                },
+            )
+
+            # 3. Update canonical_controls with pattern_id + obligation_ids
+            if result.control.pattern_id or result.control.obligation_ids:
+                self.db.execute(
+                    text("""
+                        UPDATE canonical_controls
+                        SET pattern_id = COALESCE(:pattern_id, pattern_id),
+                            obligation_ids = COALESCE(:obligation_ids, obligation_ids)
+                        WHERE id = CAST(:control_uuid AS uuid)
+                    """),
+                    {
+                        "pattern_id": result.control.pattern_id,
+                        "obligation_ids": json.dumps(result.control.obligation_ids),
+                        "control_uuid": control_uuid,
+                    },
+                )
+
+            self.db.commit()
+            result.crosswalk_written = True
+            return True
+
+        except Exception as e:
+            logger.error("Failed to write crosswalk: %s", e)
+            self.db.rollback()
+            return False
+
+    def stats(self) -> dict:
+        """Return component statistics."""
+        return {
+            "extractor": self._extractor.stats(),
+            "matcher": self._matcher.stats(),
+            "initialized": self._initialized,
+        }
+
+
+# ---------------------------------------------------------------------------
+# Migration Passes — Backfill existing 4,800+ controls
+# ---------------------------------------------------------------------------
+
+
+class MigrationPasses:
+    """Non-destructive migration passes for existing controls.
+
+    Pass 1: Obligation Linkage (deterministic, article→obligation lookup)
+    Pass 2: Pattern Classification (keyword-based matching)
+    Pass 3: Quality Triage (categorize by linkage completeness)
+    Pass 4: Crosswalk Backfill (write crosswalk rows for linked controls)
+    Pass 5: Deduplication (mark duplicate controls)
+
+    Usage::
+
+        migration = MigrationPasses(db)
+        await migration.initialize()
+
+        result = await migration.run_pass1_obligation_linkage(limit=100)
+        result = await migration.run_pass2_pattern_classification(limit=100)
+        result = migration.run_pass3_quality_triage()
+        result = migration.run_pass4_crosswalk_backfill()
+        result = migration.run_pass5_deduplication()
+    """
+
+    def __init__(self, db: Session):
+        self.db = db
+        self._extractor = ObligationExtractor()
+        self._matcher = PatternMatcher()
+        self._initialized = False
+
+    async def initialize(self) -> None:
+        """Initialize extractors (loads obligations + patterns)."""
+        if self._initialized:
+            return
+        self._extractor._load_obligations()
+        self._matcher._load_patterns()
+        self._matcher._build_keyword_index()
+        self._initialized = True
+
+    # -------------------------------------------------------------------
+    # Pass 1: Obligation Linkage (deterministic)
+    # -------------------------------------------------------------------
+
+    async def run_pass1_obligation_linkage(self, limit: int = 0) -> dict:
+        """Link existing controls to obligations via source_citation article.
+
+        For each control with source_citation → extract regulation + article
+        → look up in obligation framework → set obligation_ids.
+        """
+        if not self._initialized:
+            await self.initialize()
+
+        query = """
+            SELECT id, control_id, source_citation, generation_metadata
+            FROM canonical_controls
+            WHERE release_state NOT IN ('deprecated')
+              AND (obligation_ids IS NULL OR obligation_ids = '[]')
+        """
+        if limit > 0:
+            query += f" LIMIT {limit}"
+
+        rows = self.db.execute(text(query)).fetchall()
+
+        stats = {"total": len(rows), "linked": 0, "no_match": 0, "no_citation": 0}
+
+        for row in rows:
+            control_uuid = str(row[0])
+            control_id = row[1]
+            citation = row[2]
+            metadata = row[3]
+
+            # Extract regulation + article from citation or metadata
+            reg_code, article = _extract_regulation_article(citation, metadata)
+            if not reg_code:
+                stats["no_citation"] += 1
+                continue
+
+            # Tier 1: Exact match
+            match = self._extractor._tier1_exact(reg_code, article or "")
+            if match and match.obligation_id:
+                self.db.execute(
+                    text("""
+                        UPDATE canonical_controls
+                        SET obligation_ids = :obl_ids
+                        WHERE id = CAST(:uuid AS uuid)
+                    """),
+                    {
+                        "obl_ids": json.dumps([match.obligation_id]),
+                        "uuid": control_uuid,
+                    },
+                )
+                stats["linked"] += 1
+            else:
+                stats["no_match"] += 1
+
+        self.db.commit()
+        logger.info("Pass 1: %s", stats)
+        return stats
+
+    # -------------------------------------------------------------------
+    # Pass 2: Pattern Classification (keyword-based)
+    # -------------------------------------------------------------------
+
+    async def run_pass2_pattern_classification(self, limit: int = 0) -> dict:
+        """Classify existing controls into patterns via keyword matching.
+
+        For each control without pattern_id → keyword-match title+objective
+        against pattern library → assign best match.
+        """
+        if not self._initialized:
+            await self.initialize()
+
+        query = """
+            SELECT id, control_id, title, objective
+            FROM canonical_controls
+            WHERE release_state NOT IN ('deprecated')
+              AND (pattern_id IS NULL OR pattern_id = '')
+        """
+        if limit > 0:
+            query += f" LIMIT {limit}"
+
+        rows = self.db.execute(text(query)).fetchall()
+
+        stats = {"total": len(rows), "classified": 0, "no_match": 0}
+
+        for row in rows:
+            control_uuid = str(row[0])
+            title = row[2] or ""
+            objective = row[3] or ""
+
+            # Keyword match
+            match_text = f"{title} {objective}"
+            result = self._matcher._tier1_keyword(match_text, None)
+
+            if result and result.pattern_id and result.keyword_hits >= 2:
+                self.db.execute(
+                    text("""
+                        UPDATE canonical_controls
+                        SET pattern_id = :pattern_id
+                        WHERE id = CAST(:uuid AS uuid)
+                    """),
+                    {
+                        "pattern_id": result.pattern_id,
+                        "uuid": control_uuid,
+                    },
+                )
+                stats["classified"] += 1
+            else:
+                stats["no_match"] += 1
+
+        self.db.commit()
+        logger.info("Pass 2: %s", stats)
+        return stats
+
+    # -------------------------------------------------------------------
+    # Pass 3: Quality Triage
+    # -------------------------------------------------------------------
+
+    def run_pass3_quality_triage(self) -> dict:
+        """Categorize controls by linkage completeness.
+
+        Sets generation_metadata.triage_status:
+            - "review": has both obligation_id + pattern_id
+            - "needs_obligation": has pattern_id but no obligation_id
+            - "needs_pattern": has obligation_id but no pattern_id
+            - "legacy_unlinked": has neither
+        """
+        categories = {
+            "review": """
+                UPDATE canonical_controls
+                SET generation_metadata = jsonb_set(
+                    COALESCE(generation_metadata::jsonb, '{}'::jsonb),
+                    '{triage_status}', '"review"'
+                )
+                WHERE release_state NOT IN ('deprecated')
+                  AND obligation_ids IS NOT NULL AND obligation_ids != '[]'
+                  AND pattern_id IS NOT NULL AND pattern_id != ''
+            """,
+            "needs_obligation": """
+                UPDATE canonical_controls
+                SET generation_metadata = jsonb_set(
+                    COALESCE(generation_metadata::jsonb, '{}'::jsonb),
+                    '{triage_status}', '"needs_obligation"'
+                )
+                WHERE release_state NOT IN ('deprecated')
+                  AND (obligation_ids IS NULL OR obligation_ids = '[]')
+                  AND pattern_id IS NOT NULL AND pattern_id != ''
+            """,
+            "needs_pattern": """
+                UPDATE canonical_controls
+                SET generation_metadata = jsonb_set(
+                    COALESCE(generation_metadata::jsonb, '{}'::jsonb),
+                    '{triage_status}', '"needs_pattern"'
+                )
+                WHERE release_state NOT IN ('deprecated')
+                  AND obligation_ids IS NOT NULL AND obligation_ids != '[]'
+                  AND (pattern_id IS NULL OR pattern_id = '')
+            """,
+            "legacy_unlinked": """
+                UPDATE canonical_controls
+                SET generation_metadata = jsonb_set(
+                    COALESCE(generation_metadata::jsonb, '{}'::jsonb),
+                    '{triage_status}', '"legacy_unlinked"'
+                )
+                WHERE release_state NOT IN ('deprecated')
+                  AND (obligation_ids IS NULL OR obligation_ids = '[]')
+                  AND (pattern_id IS NULL OR pattern_id = '')
+            """,
+        }
+
+        stats = {}
+        for category, sql in categories.items():
+            result = self.db.execute(text(sql))
+            stats[category] = result.rowcount
+
+        self.db.commit()
+        logger.info("Pass 3: %s", stats)
+        return stats
+
+    # -------------------------------------------------------------------
+    # Pass 4: Crosswalk Backfill
+    # -------------------------------------------------------------------
+
+    def run_pass4_crosswalk_backfill(self) -> dict:
+        """Create crosswalk_matrix rows for controls with obligation + pattern.
+
+        Only creates rows that don't already exist.
+        """
+        result = self.db.execute(text("""
+            INSERT INTO crosswalk_matrix (
+                regulation_code, obligation_id, pattern_id,
+                master_control_id, master_control_uuid,
+                confidence, source
+            )
+            SELECT
+                COALESCE(
+                    (generation_metadata::jsonb->>'source_regulation'),
+                    ''
+                ) AS regulation_code,
+                obl.value::text AS obligation_id,
+                cc.pattern_id,
+                cc.control_id,
+                cc.id,
+                0.80,
+                'migrated'
+            FROM canonical_controls cc,
+                 jsonb_array_elements_text(
+                     COALESCE(cc.obligation_ids::jsonb, '[]'::jsonb)
+                 ) AS obl(value)
+            WHERE cc.release_state NOT IN ('deprecated')
+              AND cc.pattern_id IS NOT NULL AND cc.pattern_id != ''
+              AND cc.obligation_ids IS NOT NULL AND cc.obligation_ids != '[]'
+              AND NOT EXISTS (
+                  SELECT 1 FROM crosswalk_matrix cw
+                  WHERE cw.master_control_uuid = cc.id
+                    AND cw.obligation_id = obl.value::text
+              )
+        """))
+
+        rows_inserted = result.rowcount
+        self.db.commit()
+        logger.info("Pass 4: %d crosswalk rows inserted", rows_inserted)
+        return {"rows_inserted": rows_inserted}
+
+    # -------------------------------------------------------------------
+    # Pass 5: Deduplication
+    # -------------------------------------------------------------------
+
+    def run_pass5_deduplication(self) -> dict:
+        """Mark duplicate controls (same obligation + same pattern).
+
+        Groups controls by (obligation_id, pattern_id), keeps the one with
+        highest evidence_confidence (or newest), marks rest as deprecated.
+        """
+        # Find groups with duplicates
+        groups = self.db.execute(text("""
+            SELECT cc.pattern_id,
+                   obl.value::text AS obligation_id,
+                   array_agg(cc.id ORDER BY cc.evidence_confidence DESC NULLS LAST, cc.created_at DESC) AS ids,
+                   count(*) AS cnt
+            FROM canonical_controls cc,
+                 jsonb_array_elements_text(
+                     COALESCE(cc.obligation_ids::jsonb, '[]'::jsonb)
+                 ) AS obl(value)
+            WHERE cc.release_state NOT IN ('deprecated')
+              AND cc.pattern_id IS NOT NULL AND cc.pattern_id != ''
+            GROUP BY cc.pattern_id, obl.value::text
+            HAVING count(*) > 1
+        """)).fetchall()
+
+        stats = {"groups_found": len(groups), "controls_deprecated": 0}
+
+        for group in groups:
+            ids = group[2]  # Array of UUIDs, first is the keeper
+            if len(ids) <= 1:
+                continue
+
+            # Keep first (highest confidence), deprecate rest
+            deprecate_ids = ids[1:]
+            for dep_id in deprecate_ids:
+                self.db.execute(
+                    text("""
+                        UPDATE canonical_controls
+                        SET release_state = 'deprecated',
+                            generation_metadata = jsonb_set(
+                                COALESCE(generation_metadata::jsonb, '{}'::jsonb),
+                                '{deprecated_reason}', '"duplicate_same_obligation_pattern"'
+                            )
+                        WHERE id = CAST(:uuid AS uuid)
+                          AND release_state != 'deprecated'
+                    """),
+                    {"uuid": str(dep_id)},
+                )
+                stats["controls_deprecated"] += 1
+
+        self.db.commit()
+        logger.info("Pass 5: %s", stats)
+        return stats
+
+    def migration_status(self) -> dict:
+        """Return overall migration progress."""
+        row = self.db.execute(text("""
+            SELECT
+                count(*) AS total,
+                count(*) FILTER (WHERE obligation_ids IS NOT NULL AND obligation_ids != '[]') AS has_obligation,
+                count(*) FILTER (WHERE pattern_id IS NOT NULL AND pattern_id != '') AS has_pattern,
+                count(*) FILTER (
+                    WHERE obligation_ids IS NOT NULL AND obligation_ids != '[]'
+                      AND pattern_id IS NOT NULL AND pattern_id != ''
+                ) AS fully_linked,
+                count(*) FILTER (WHERE release_state = 'deprecated') AS deprecated
+            FROM canonical_controls
+        """)).fetchone()
+
+        return {
+            "total_controls": row[0],
+            "has_obligation": row[1],
+            "has_pattern": row[2],
+            "fully_linked": row[3],
+            "deprecated": row[4],
+            "coverage_obligation_pct": round(row[1] / max(row[0], 1) * 100, 1),
+            "coverage_pattern_pct": round(row[2] / max(row[0], 1) * 100, 1),
+            "coverage_full_pct": round(row[3] / max(row[0], 1) * 100, 1),
+        }
+
+
+# ---------------------------------------------------------------------------
+# Helpers
+# ---------------------------------------------------------------------------
+
+
+def _extract_regulation_article(
+    citation: Optional[str], metadata: Optional[str]
+) -> tuple[Optional[str], Optional[str]]:
+    """Extract regulation_code and article from control's citation/metadata."""
+    from compliance.services.obligation_extractor import _normalize_regulation
+
+    reg_code = None
+    article = None
+
+    # Try citation first (JSON string or dict)
+    if citation:
+        try:
+            c = json.loads(citation) if isinstance(citation, str) else citation
+            if isinstance(c, dict):
+                article = c.get("article") or c.get("source_article")
+                # Try to get regulation from source field
+                source = c.get("source", "")
+                if source:
+                    reg_code = _normalize_regulation(source)
+        except (json.JSONDecodeError, TypeError):
+            pass
+
+    # Try metadata
+    if metadata and not reg_code:
+        try:
+            m = json.loads(metadata) if isinstance(metadata, str) else metadata
+            if isinstance(m, dict):
+                src_reg = m.get("source_regulation", "")
+                if src_reg:
+                    reg_code = _normalize_regulation(src_reg)
+                if not article:
+                    article = m.get("source_article")
+        except (json.JSONDecodeError, TypeError):
+            pass
+
+    return reg_code, article
@@ -33,6 +33,7 @@ class RAGSearchResult:
    paragraph: str
    source_url: str
    score: float
+    collection: str = ""


 class ComplianceRAGClient:
@@ -91,6 +92,7 @@ class ComplianceRAGClient:
                    paragraph=r.get("paragraph", ""),
                    source_url=r.get("source_url", ""),
                    score=r.get("score", 0.0),
+                    collection=collection,
                ))
            return results

@@ -98,6 +100,88 @@ class ComplianceRAGClient:
            logger.warning("RAG search failed: %s", e)
            return []

+    async def search_with_rerank(
+        self,
+        query: str,
+        collection: str = "bp_compliance_ce",
+        regulations: Optional[List[str]] = None,
+        top_k: int = 5,
+    ) -> List[RAGSearchResult]:
+        """
+        Search with optional cross-encoder re-ranking.
+
+        Fetches top_k*4 results from RAG, then re-ranks with cross-encoder
+        and returns top_k. Falls back to regular search if reranker is disabled.
+        """
+        from .reranker import get_reranker
+
+        reranker = get_reranker()
+        if reranker is None:
+            return await self.search(query, collection, regulations, top_k)
+
+        # Fetch more candidates for re-ranking
+        candidates = await self.search(
+            query, collection, regulations, top_k=max(top_k * 4, 20)
+        )
+        if not candidates:
+            return []
+
+        texts = [c.text for c in candidates]
+        try:
+            ranked_indices = reranker.rerank(query, texts, top_k=top_k)
+            return [candidates[i] for i in ranked_indices]
+        except Exception as e:
+            logger.warning("Reranking failed, returning unranked: %s", e)
+            return candidates[:top_k]
+
+    async def scroll(
+        self,
+        collection: str,
+        offset: Optional[str] = None,
+        limit: int = 100,
+    ) -> tuple[List[RAGSearchResult], Optional[str]]:
+        """
+        Scroll through ALL chunks in a collection (paginated).
+
+        Returns (chunks, next_offset). next_offset is None when done.
+        """
+        scroll_url = self._search_url.replace("/search", "/scroll")
+        params = {"collection": collection, "limit": str(limit)}
+        if offset:
+            params["offset"] = offset
+
+        try:
+            async with httpx.AsyncClient(timeout=30.0) as client:
+                resp = await client.get(scroll_url, params=params)
+
+            if resp.status_code != 200:
+                logger.warning(
+                    "RAG scroll returned %d: %s", resp.status_code, resp.text[:200]
+                )
+                return [], None
+
+            data = resp.json()
+            results = []
+            for r in data.get("chunks", []):
+                results.append(RAGSearchResult(
+                    text=r.get("text", ""),
+                    regulation_code=r.get("regulation_code", ""),
+                    regulation_name=r.get("regulation_name", ""),
+                    regulation_short=r.get("regulation_short", ""),
+                    category=r.get("category", ""),
+                    article=r.get("article", ""),
+                    paragraph=r.get("paragraph", ""),
+                    source_url=r.get("source_url", ""),
+                    score=0.0,
+                    collection=collection,
+                ))
+            next_offset = data.get("next_offset") or None
+            return results, next_offset
+
+        except Exception as e:
+            logger.warning("RAG scroll failed: %s", e)
+            return [], None
+
    def format_for_prompt(
        self, results: List[RAGSearchResult], max_results: int = 5
    ) -> str:
@@ -0,0 +1,85 @@
+"""
+Cross-Encoder Re-Ranking for RAG Search Results.
+
+Uses BGE Reranker v2 (BAAI/bge-reranker-v2-m3, MIT license) to re-rank
+search results from Qdrant for improved retrieval quality.
+
+Lazy-loads the model on first use. Disabled by default (RERANK_ENABLED=false).
+"""
+
+import logging
+import os
+from typing import Optional
+
+logger = logging.getLogger(__name__)
+
+RERANK_ENABLED = os.getenv("RERANK_ENABLED", "false").lower() == "true"
+RERANK_MODEL = os.getenv("RERANK_MODEL", "BAAI/bge-reranker-v2-m3")
+
+
+class Reranker:
+    """Cross-encoder reranker using sentence-transformers."""
+
+    def __init__(self, model_name: str = RERANK_MODEL):
+        self._model = None  # Lazy init
+        self._model_name = model_name
+
+    def _ensure_model(self) -> None:
+        """Load model on first use."""
+        if self._model is not None:
+            return
+        try:
+            from sentence_transformers import CrossEncoder
+
+            logger.info("Loading reranker model: %s", self._model_name)
+            self._model = CrossEncoder(self._model_name)
+            logger.info("Reranker model loaded successfully")
+        except ImportError:
+            logger.error(
+                "sentence-transformers not installed. "
+                "Install with: pip install sentence-transformers"
+            )
+            raise
+        except Exception as e:
+            logger.error("Failed to load reranker model: %s", e)
+            raise
+
+    def rerank(
+        self, query: str, texts: list[str], top_k: int = 5
+    ) -> list[int]:
+        """
+        Return indices of top_k texts sorted by relevance (highest first).
+
+        Args:
+            query: The search query.
+            texts: List of candidate texts to re-rank.
+            top_k: Number of top results to return.
+
+        Returns:
+            List of indices into the original texts list, sorted by relevance.
+        """
+        if not texts:
+            return []
+
+        self._ensure_model()
+
+        pairs = [[query, text] for text in texts]
+        scores = self._model.predict(pairs)
+
+        # Sort by score descending, return indices
+        ranked = sorted(range(len(scores)), key=lambda i: scores[i], reverse=True)
+        return ranked[:top_k]
+
+
+# Module-level singleton
+_reranker: Optional[Reranker] = None
+
+
+def get_reranker() -> Optional[Reranker]:
+    """Get the shared reranker instance. Returns None if disabled."""
+    global _reranker
+    if not RERANK_ENABLED:
+        return None
+    if _reranker is None:
+        _reranker = Reranker()
+    return _reranker
@@ -0,0 +1,331 @@
+"""V1 Control Enrichment Service — Match Eigenentwicklung controls to regulations.
+
+Finds regulatory coverage for v1 controls (generation_strategy='ungrouped',
+pipeline_version=1, no source_citation) by embedding similarity search.
+
+Reuses embedding + Qdrant helpers from control_dedup.py.
+"""
+
+import logging
+from typing import Optional
+
+from sqlalchemy import text
+
+from database import SessionLocal
+from compliance.services.control_dedup import (
+    get_embedding,
+    qdrant_search_cross_regulation,
+)
+
+logger = logging.getLogger(__name__)
+
+# Similarity threshold — lower than dedup (0.85) since we want informational matches
+# Typical top scores for v1 controls are 0.70-0.77
+V1_MATCH_THRESHOLD = 0.70
+V1_MAX_MATCHES = 5
+
+
+def _is_eigenentwicklung_query() -> str:
+    """SQL WHERE clause identifying v1 Eigenentwicklung controls."""
+    return """
+        generation_strategy = 'ungrouped'
+        AND (pipeline_version = '1' OR pipeline_version IS NULL)
+        AND source_citation IS NULL
+        AND parent_control_uuid IS NULL
+        AND release_state NOT IN ('rejected', 'merged', 'deprecated')
+    """
+
+
+async def count_v1_controls() -> int:
+    """Count how many v1 Eigenentwicklung controls exist."""
+    with SessionLocal() as db:
+        row = db.execute(text(f"""
+            SELECT COUNT(*) AS cnt
+            FROM canonical_controls
+            WHERE {_is_eigenentwicklung_query()}
+        """)).fetchone()
+        return row.cnt if row else 0
+
+
+async def enrich_v1_matches(
+    dry_run: bool = True,
+    batch_size: int = 100,
+    offset: int = 0,
+) -> dict:
+    """Find regulatory matches for v1 Eigenentwicklung controls.
+
+    Args:
+        dry_run: If True, only count — don't write matches.
+        batch_size: Number of v1 controls to process per call.
+        offset: Pagination offset (v1 control index).
+
+    Returns:
+        Stats dict with counts, sample matches, and pagination info.
+    """
+    with SessionLocal() as db:
+        # 1. Load v1 controls (paginated)
+        v1_controls = db.execute(text(f"""
+            SELECT id, control_id, title, objective, category
+            FROM canonical_controls
+            WHERE {_is_eigenentwicklung_query()}
+            ORDER BY control_id
+            LIMIT :limit OFFSET :offset
+        """), {"limit": batch_size, "offset": offset}).fetchall()
+
+        # Count total for pagination
+        total_row = db.execute(text(f"""
+            SELECT COUNT(*) AS cnt
+            FROM canonical_controls
+            WHERE {_is_eigenentwicklung_query()}
+        """)).fetchone()
+        total_v1 = total_row.cnt if total_row else 0
+
+        if not v1_controls:
+            return {
+                "dry_run": dry_run,
+                "processed": 0,
+                "total_v1": total_v1,
+                "message": "Kein weiterer Batch — alle v1 Controls verarbeitet.",
+            }
+
+        if dry_run:
+            return {
+                "dry_run": True,
+                "total_v1": total_v1,
+                "offset": offset,
+                "batch_size": batch_size,
+                "sample_controls": [
+                    {
+                        "control_id": r.control_id,
+                        "title": r.title,
+                        "category": r.category,
+                    }
+                    for r in v1_controls[:20]
+                ],
+            }
+
+        # 2. Process each v1 control
+        processed = 0
+        matches_inserted = 0
+        errors = []
+        sample_matches = []
+
+        for v1 in v1_controls:
+            try:
+                # Build search text
+                search_text = f"{v1.title} — {v1.objective}"
+
+                # Get embedding
+                embedding = await get_embedding(search_text)
+                if not embedding:
+                    errors.append({
+                        "control_id": v1.control_id,
+                        "error": "Embedding fehlgeschlagen",
+                    })
+                    continue
+
+                # Search Qdrant (cross-regulation, no pattern filter)
+                # Collection is atomic_controls_dedup (contains ~51k atomare Controls)
+                results = await qdrant_search_cross_regulation(
+                    embedding, top_k=20,
+                    collection="atomic_controls_dedup",
+                )
+
+                # For each hit: resolve to a regulatory parent with source_citation.
+                # Atomic controls in Qdrant usually have parent_control_uuid → parent
+                # has the source_citation. We deduplicate by parent to avoid
+                # listing the same regulation multiple times.
+                rank = 0
+                seen_parents: set[str] = set()
+
+                for hit in results:
+                    score = hit.get("score", 0)
+                    if score < V1_MATCH_THRESHOLD:
+                        continue
+
+                    payload = hit.get("payload", {})
+                    matched_uuid = payload.get("control_uuid")
+                    if not matched_uuid or matched_uuid == str(v1.id):
+                        continue
+
+                    # Try the matched control itself first, then its parent
+                    matched_row = db.execute(text("""
+                        SELECT c.id, c.control_id, c.title, c.source_citation,
+                               c.severity, c.category, c.parent_control_uuid
+                        FROM canonical_controls c
+                        WHERE c.id = CAST(:uuid AS uuid)
+                    """), {"uuid": matched_uuid}).fetchone()
+
+                    if not matched_row:
+                        continue
+
+                    # Resolve to regulatory control (one with source_citation)
+                    reg_row = matched_row
+                    if not reg_row.source_citation and reg_row.parent_control_uuid:
+                        # Look up parent — the parent has the source_citation
+                        parent_row = db.execute(text("""
+                            SELECT id, control_id, title, source_citation,
+                                   severity, category, parent_control_uuid
+                            FROM canonical_controls
+                            WHERE id = CAST(:uuid AS uuid)
+                              AND source_citation IS NOT NULL
+                        """), {"uuid": str(reg_row.parent_control_uuid)}).fetchone()
+                        if parent_row:
+                            reg_row = parent_row
+
+                    if not reg_row.source_citation:
+                        continue
+
+                    # Deduplicate by parent UUID
+                    parent_key = str(reg_row.id)
+                    if parent_key in seen_parents:
+                        continue
+                    seen_parents.add(parent_key)
+
+                    rank += 1
+                    if rank > V1_MAX_MATCHES:
+                        break
+
+                    # Extract source info
+                    source_citation = reg_row.source_citation or {}
+                    matched_source = source_citation.get("source") if isinstance(source_citation, dict) else None
+                    matched_article = source_citation.get("article") if isinstance(source_citation, dict) else None
+
+                    # Insert match — link to the regulatory parent (not the atomic child)
+                    db.execute(text("""
+                        INSERT INTO v1_control_matches
+                            (v1_control_uuid, matched_control_uuid, similarity_score,
+                             match_rank, matched_source, matched_article, match_method)
+                        VALUES
+                            (CAST(:v1_uuid AS uuid), CAST(:matched_uuid AS uuid), :score,
+                             :rank, :source, :article, 'embedding')
+                        ON CONFLICT (v1_control_uuid, matched_control_uuid) DO UPDATE
+                        SET similarity_score = EXCLUDED.similarity_score,
+                            match_rank = EXCLUDED.match_rank
+                    """), {
+                        "v1_uuid": str(v1.id),
+                        "matched_uuid": str(reg_row.id),
+                        "score": round(score, 3),
+                        "rank": rank,
+                        "source": matched_source,
+                        "article": matched_article,
+                    })
+                    matches_inserted += 1
+
+                    # Collect sample
+                    if len(sample_matches) < 20:
+                        sample_matches.append({
+                            "v1_control_id": v1.control_id,
+                            "v1_title": v1.title,
+                            "matched_control_id": reg_row.control_id,
+                            "matched_title": reg_row.title,
+                            "matched_source": matched_source,
+                            "matched_article": matched_article,
+                            "similarity_score": round(score, 3),
+                            "match_rank": rank,
+                        })
+
+                processed += 1
+
+            except Exception as e:
+                logger.warning("V1 enrichment error for %s: %s", v1.control_id, e)
+                errors.append({
+                    "control_id": v1.control_id,
+                    "error": str(e),
+                })
+
+        db.commit()
+
+    # Pagination
+    next_offset = offset + batch_size if len(v1_controls) == batch_size else None
+
+    return {
+        "dry_run": False,
+        "offset": offset,
+        "batch_size": batch_size,
+        "next_offset": next_offset,
+        "total_v1": total_v1,
+        "processed": processed,
+        "matches_inserted": matches_inserted,
+        "errors": errors[:10],
+        "sample_matches": sample_matches,
+    }
+
+
+async def get_v1_matches(control_uuid: str) -> list[dict]:
+    """Get all regulatory matches for a specific v1 control.
+
+    Args:
+        control_uuid: The UUID of the v1 control.
+
+    Returns:
+        List of match dicts with control details.
+    """
+    with SessionLocal() as db:
+        rows = db.execute(text("""
+            SELECT
+                m.similarity_score,
+                m.match_rank,
+                m.matched_source,
+                m.matched_article,
+                m.match_method,
+                c.control_id AS matched_control_id,
+                c.title AS matched_title,
+                c.objective AS matched_objective,
+                c.severity AS matched_severity,
+                c.category AS matched_category,
+                c.source_citation AS matched_source_citation
+            FROM v1_control_matches m
+            JOIN canonical_controls c ON c.id = m.matched_control_uuid
+            WHERE m.v1_control_uuid = CAST(:uuid AS uuid)
+            ORDER BY m.match_rank
+        """), {"uuid": control_uuid}).fetchall()
+
+        return [
+            {
+                "matched_control_id": r.matched_control_id,
+                "matched_title": r.matched_title,
+                "matched_objective": r.matched_objective,
+                "matched_severity": r.matched_severity,
+                "matched_category": r.matched_category,
+                "matched_source": r.matched_source,
+                "matched_article": r.matched_article,
+                "matched_source_citation": r.matched_source_citation,
+                "similarity_score": float(r.similarity_score),
+                "match_rank": r.match_rank,
+                "match_method": r.match_method,
+            }
+            for r in rows
+        ]
+
+
+async def get_v1_enrichment_stats() -> dict:
+    """Get overview stats for v1 enrichment."""
+    with SessionLocal() as db:
+        total_v1 = db.execute(text(f"""
+            SELECT COUNT(*) AS cnt FROM canonical_controls
+            WHERE {_is_eigenentwicklung_query()}
+        """)).fetchone()
+
+        matched_v1 = db.execute(text(f"""
+            SELECT COUNT(DISTINCT m.v1_control_uuid) AS cnt
+            FROM v1_control_matches m
+            JOIN canonical_controls c ON c.id = m.v1_control_uuid
+            WHERE {_is_eigenentwicklung_query().replace('release_state', 'c.release_state').replace('generation_strategy', 'c.generation_strategy').replace('pipeline_version', 'c.pipeline_version').replace('source_citation', 'c.source_citation').replace('parent_control_uuid', 'c.parent_control_uuid')}
+        """)).fetchone()
+
+        total_matches = db.execute(text("""
+            SELECT COUNT(*) AS cnt FROM v1_control_matches
+        """)).fetchone()
+
+        avg_score = db.execute(text("""
+            SELECT AVG(similarity_score) AS avg_score FROM v1_control_matches
+        """)).fetchone()
+
+        return {
+            "total_v1_controls": total_v1.cnt if total_v1 else 0,
+            "v1_with_matches": matched_v1.cnt if matched_v1 else 0,
+            "v1_without_matches": (total_v1.cnt if total_v1 else 0) - (matched_v1.cnt if matched_v1 else 0),
+            "total_matches": total_matches.cnt if total_matches else 0,
+            "avg_similarity_score": round(float(avg_score.avg_score), 3) if avg_score and avg_score.avg_score else None,
+        }
@@ -14,6 +14,12 @@ from contextlib import asynccontextmanager
 from fastapi import FastAPI
 from fastapi.middleware.cors import CORSMiddleware

+# Configure root logging so all modules' logger.info() etc. are visible
+logging.basicConfig(
+    level=logging.INFO,
+    format="%(levelname)s:%(name)s: %(message)s",
+)
+
 logger = logging.getLogger(__name__)

 # Compliance-specific API routers
@@ -86,6 +92,18 @@ async def health():
    }


+@app.get("/debug/routers", tags=["system"])
+async def debug_routers():
+    """Diagnostic: show which sub-routers loaded and which failed."""
+    from compliance.api import _ROUTER_MODULES, _failed_routers, _loaded_count
+    return {
+        "total": len(_ROUTER_MODULES),
+        "loaded": _loaded_count,
+        "failed_count": len(_failed_routers),
+        "failed": _failed_routers,
+    }
+
+
 # --- Compliance-specific Routers ---

 # Consent (user-facing)
@@ -79,11 +79,14 @@ def run_migrations():

        logger.info("%d pending migrations (of %d total)", len(pending), len(migration_files))

+        failed = []
        for migration_file in pending:
            logger.info("Applying migration: %s", migration_file.name)
            try:
                sql = migration_file.read_text(encoding="utf-8")
-                # Execute the full SQL file as-is (supports BEGIN/COMMIT)
+                # Strip explicit BEGIN/COMMIT — we manage transactions ourselves
+                sql = re.sub(r'(?mi)^\s*BEGIN\s*;\s*$', '', sql)
+                sql = re.sub(r'(?mi)^\s*COMMIT\s*;\s*$', '', sql)
                cursor.execute(sql)
                raw_conn.commit()
                # Record successful application
@@ -96,11 +99,14 @@ def run_migrations():
            except Exception as e:
                raw_conn.rollback()
                logger.error("  FAILED: %s — %s", migration_file.name, e)
-                raise RuntimeError(
-                    f"Migration {migration_file.name} failed: {e}"
-                ) from e
+                failed.append((migration_file.name, str(e)))
+                # Continue with remaining migrations instead of aborting

-        logger.info("All migrations applied successfully")
+        if failed:
+            names = ", ".join(f[0] for f in failed)
+            logger.error("Some migrations failed: %s", names)
+        else:
+            logger.info("All migrations applied successfully")
    finally:
        raw_conn.close()

@@ -2,7 +2,7 @@
 -- Adds job tracking, chunk tracking, blocked sources, and extends canonical_controls
 -- for the 3-license-rule system (free_use, citation_required, restricted).

-BEGIN;
+-- Transaction managed by migration_runner

 -- =============================================================================
 -- 1. Job-Tracking for Generator Runs
@@ -69,35 +69,21 @@ CREATE TABLE IF NOT EXISTS canonical_blocked_sources (

 -- =============================================================================
 -- 4. Extend canonical_controls: release_state + 3-rule columns
+-- Safe: only runs if canonical_controls exists
 -- =============================================================================

-- Expand release_state enum to include generator states
-ALTER TABLE canonical_controls DROP CONSTRAINT IF EXISTS canonical_controls_release_state_check;
-ALTER TABLE canonical_controls ADD CONSTRAINT canonical_controls_release_state_check
-    CHECK (release_state IN ('draft', 'review', 'approved', 'deprecated', 'needs_review', 'too_close', 'duplicate'));
-
-- License rule: 1 = free_use, 2 = citation_required, 3 = restricted
-ALTER TABLE canonical_controls ADD COLUMN IF NOT EXISTS
-    license_rule INTEGER DEFAULT NULL;
-
-- Original text from source (Rule 1+2 only; Rule 3 = always NULL)
-ALTER TABLE canonical_controls ADD COLUMN IF NOT EXISTS
-    source_original_text TEXT DEFAULT NULL;
-
-- Citation info (Rule 1+2 only; Rule 3 = always NULL)
-ALTER TABLE canonical_controls ADD COLUMN IF NOT EXISTS
-    source_citation JSONB DEFAULT NULL;
-
-- Whether source info may be shown to customers
-ALTER TABLE canonical_controls ADD COLUMN IF NOT EXISTS
-    customer_visible BOOLEAN DEFAULT true;
-
-- Generation metadata (internal only, never shown to customers)
-ALTER TABLE canonical_controls ADD COLUMN IF NOT EXISTS
-    generation_metadata JSONB DEFAULT NULL;
-
-- Index for filtering by license rule and customer visibility
-CREATE INDEX IF NOT EXISTS idx_canonical_controls_license_rule ON canonical_controls(license_rule);
-CREATE INDEX IF NOT EXISTS idx_canonical_controls_customer_visible ON canonical_controls(customer_visible);
-
-COMMIT;
+DO $$
+BEGIN
+    IF EXISTS (SELECT 1 FROM information_schema.tables WHERE table_name = 'canonical_controls') THEN
+        ALTER TABLE canonical_controls DROP CONSTRAINT IF EXISTS canonical_controls_release_state_check;
+        ALTER TABLE canonical_controls ADD CONSTRAINT canonical_controls_release_state_check
+            CHECK (release_state IN ('draft', 'review', 'approved', 'deprecated', 'needs_review', 'too_close', 'duplicate'));
+        ALTER TABLE canonical_controls ADD COLUMN IF NOT EXISTS license_rule INTEGER DEFAULT NULL;
+        ALTER TABLE canonical_controls ADD COLUMN IF NOT EXISTS source_original_text TEXT DEFAULT NULL;
+        ALTER TABLE canonical_controls ADD COLUMN IF NOT EXISTS source_citation JSONB DEFAULT NULL;
+        ALTER TABLE canonical_controls ADD COLUMN IF NOT EXISTS customer_visible BOOLEAN DEFAULT true;
+        ALTER TABLE canonical_controls ADD COLUMN IF NOT EXISTS generation_metadata JSONB DEFAULT NULL;
+        CREATE INDEX IF NOT EXISTS idx_canonical_controls_license_rule ON canonical_controls(license_rule);
+        CREATE INDEX IF NOT EXISTS idx_canonical_controls_customer_visible ON canonical_controls(customer_visible);
+    END IF;
+END $$;
@@ -0,0 +1,44 @@
+-- Migration 047: Add verification_method and category to canonical_controls
+-- verification_method: How a control is verified (code_review, document, tool, hybrid)
+-- category: Thematic grouping for customer-facing filters
+-- Safe: only alters canonical_controls if it exists
+
+DO $$
+BEGIN
+    IF EXISTS (SELECT 1 FROM information_schema.tables WHERE table_name = 'canonical_controls') THEN
+        ALTER TABLE canonical_controls ADD COLUMN IF NOT EXISTS
+            verification_method VARCHAR(20) DEFAULT NULL
+            CHECK (verification_method IN ('code_review', 'document', 'tool', 'hybrid'));
+        ALTER TABLE canonical_controls ADD COLUMN IF NOT EXISTS
+            category VARCHAR(50) DEFAULT NULL;
+        CREATE INDEX IF NOT EXISTS idx_cc_verification ON canonical_controls(verification_method);
+        CREATE INDEX IF NOT EXISTS idx_cc_category ON canonical_controls(category);
+    END IF;
+END $$;
+
+CREATE TABLE IF NOT EXISTS canonical_control_categories (
+    category_id VARCHAR(50) PRIMARY KEY,
+    label_de VARCHAR(100) NOT NULL,
+    label_en VARCHAR(100) NOT NULL,
+    sort_order INTEGER DEFAULT 0
+);
+
+INSERT INTO canonical_control_categories VALUES
+    ('encryption',      'Verschluesselung & Kryptographie',       'Encryption & Cryptography', 1),
+    ('authentication',  'Authentisierung & Zugriffskontrolle',    'Authentication & Access Control', 2),
+    ('network',         'Netzwerksicherheit',                     'Network Security', 3),
+    ('data_protection', 'Datenschutz & Datensicherheit',          'Data Protection & Security', 4),
+    ('logging',         'Logging & Monitoring',                   'Logging & Monitoring', 5),
+    ('incident',        'Vorfallmanagement',                      'Incident Management', 6),
+    ('continuity',      'Notfall & Wiederherstellung',            'Continuity & Recovery', 7),
+    ('compliance',      'Compliance & Audit',                     'Compliance & Audit', 8),
+    ('supply_chain',    'Lieferkettenmanagement',                 'Supply Chain Management', 9),
+    ('physical',        'Physische Sicherheit',                   'Physical Security', 10),
+    ('personnel',       'Personal & Schulung',                    'Personnel & Training', 11),
+    ('application',     'Anwendungssicherheit',                   'Application Security', 12),
+    ('system',          'Systemhaertung & -betrieb',              'System Hardening & Operations', 13),
+    ('risk',            'Risikomanagement',                       'Risk Management', 14),
+    ('governance',      'Sicherheitsorganisation',                'Security Governance', 15),
+    ('hardware',        'Hardware & Plattformsicherheit',         'Hardware & Platform Security', 16),
+    ('identity',        'Identitaetsmanagement',                  'Identity Management', 17)
+ON CONFLICT DO NOTHING;
@@ -0,0 +1,22 @@
+-- 048: Expand processing_path CHECK constraint for new pipeline paths
+-- New values: prefilter_skip, no_control, store_failed, error
+-- Safe: only runs if the table exists (may not exist on all environments)
+
+DO $$
+BEGIN
+    IF EXISTS (SELECT 1 FROM information_schema.tables WHERE table_name = 'canonical_processed_chunks') THEN
+        ALTER TABLE canonical_processed_chunks
+            DROP CONSTRAINT IF EXISTS canonical_processed_chunks_processing_path_check;
+        ALTER TABLE canonical_processed_chunks
+            ADD CONSTRAINT canonical_processed_chunks_processing_path_check
+            CHECK (processing_path IN (
+                'structured',
+                'llm_reform',
+                'skipped',
+                'prefilter_skip',
+                'no_control',
+                'store_failed',
+                'error'
+            ));
+    END IF;
+END $$;
@@ -0,0 +1,13 @@
+-- 049: Add target_audience field to canonical_controls
+-- Distinguishes who a control is relevant for: enterprises, authorities, providers, or all.
+-- Safe: only runs if the table exists (may not exist on all environments)
+
+DO $$
+BEGIN
+    IF EXISTS (SELECT 1 FROM information_schema.tables WHERE table_name = 'canonical_controls') THEN
+        ALTER TABLE canonical_controls ADD COLUMN IF NOT EXISTS
+            target_audience VARCHAR(20) DEFAULT NULL
+            CHECK (target_audience IN ('enterprise', 'authority', 'provider', 'all'));
+        CREATE INDEX IF NOT EXISTS idx_cc_target_audience ON canonical_controls(target_audience);
+    END IF;
+END $$;
@@ -0,0 +1,22 @@
+-- Score Snapshots: Historical compliance score tracking
+-- Migration 050
+
+CREATE TABLE IF NOT EXISTS compliance_score_snapshots (
+    id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
+    tenant_id UUID NOT NULL,
+    project_id UUID,
+    score DECIMAL(5,2) NOT NULL,
+    controls_total INTEGER DEFAULT 0,
+    controls_pass INTEGER DEFAULT 0,
+    controls_partial INTEGER DEFAULT 0,
+    evidence_total INTEGER DEFAULT 0,
+    evidence_valid INTEGER DEFAULT 0,
+    risks_total INTEGER DEFAULT 0,
+    risks_high INTEGER DEFAULT 0,
+    snapshot_date DATE NOT NULL,
+    created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
+    UNIQUE (tenant_id, project_id, snapshot_date)
+);
+
+CREATE INDEX IF NOT EXISTS idx_score_snap_tenant ON compliance_score_snapshots(tenant_id);
+CREATE INDEX IF NOT EXISTS idx_score_snap_date ON compliance_score_snapshots(snapshot_date);
@@ -0,0 +1,53 @@
+-- Process Manager: Recurring compliance tasks with audit trail
+-- Migration 052
+
+CREATE TABLE compliance_process_tasks (
+    id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
+    tenant_id UUID NOT NULL,
+    project_id UUID,
+    task_code VARCHAR(50) NOT NULL,
+    title VARCHAR(500) NOT NULL,
+    description TEXT,
+    category VARCHAR(50) NOT NULL
+        CHECK (category IN ('dsgvo','nis2','bsi','iso27001','ai_act','internal')),
+    priority VARCHAR(20) NOT NULL DEFAULT 'medium'
+        CHECK (priority IN ('critical','high','medium','low')),
+    frequency VARCHAR(20) NOT NULL DEFAULT 'yearly'
+        CHECK (frequency IN ('weekly','monthly','quarterly','semi_annual','yearly','once')),
+    assigned_to VARCHAR(255),
+    responsible_team VARCHAR(255),
+    linked_control_ids JSONB DEFAULT '[]',
+    linked_module VARCHAR(100),
+    last_completed_at TIMESTAMPTZ,
+    next_due_date DATE,
+    due_reminder_days INTEGER DEFAULT 14,
+    status VARCHAR(20) NOT NULL DEFAULT 'pending'
+        CHECK (status IN ('pending','in_progress','completed','overdue','skipped')),
+    completion_date TIMESTAMPTZ,
+    completion_result TEXT,
+    completion_evidence_id UUID,
+    follow_up_actions JSONB DEFAULT '[]',
+    is_seed BOOLEAN DEFAULT FALSE,
+    notes TEXT,
+    tags JSONB DEFAULT '[]',
+    created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
+    updated_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
+    UNIQUE (tenant_id, project_id, task_code)
+);
+
+CREATE TABLE compliance_process_task_history (
+    id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
+    task_id UUID NOT NULL REFERENCES compliance_process_tasks(id) ON DELETE CASCADE,
+    completed_by VARCHAR(255),
+    completed_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
+    result TEXT,
+    evidence_id UUID,
+    notes TEXT,
+    status VARCHAR(20) NOT NULL
+);
+
+CREATE INDEX idx_process_tasks_tenant ON compliance_process_tasks(tenant_id);
+CREATE INDEX idx_process_tasks_status ON compliance_process_tasks(status);
+CREATE INDEX idx_process_tasks_due ON compliance_process_tasks(next_due_date);
+CREATE INDEX idx_process_tasks_category ON compliance_process_tasks(category);
+CREATE INDEX idx_task_history_task ON compliance_process_task_history(task_id);
@@ -0,0 +1,62 @@
+-- Evidence Checks: Automated compliance verification
+-- Migration 053
+
+CREATE TABLE compliance_evidence_checks (
+    id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
+    tenant_id UUID NOT NULL,
+    project_id UUID,
+    check_code VARCHAR(50) NOT NULL,
+    title VARCHAR(500) NOT NULL,
+    description TEXT,
+    check_type VARCHAR(30) NOT NULL
+        CHECK (check_type IN ('tls_scan','header_check','certificate_check',
+               'config_scan','api_scan','dns_check','port_scan')),
+    target_url TEXT,
+    target_config JSONB DEFAULT '{}',
+    linked_control_ids JSONB DEFAULT '[]',
+    frequency VARCHAR(20) DEFAULT 'monthly'
+        CHECK (frequency IN ('daily','weekly','monthly','quarterly','manual')),
+    last_run_at TIMESTAMPTZ,
+    next_run_at TIMESTAMPTZ,
+    is_active BOOLEAN DEFAULT TRUE,
+    created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
+    updated_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
+    UNIQUE (tenant_id, project_id, check_code)
+);
+
+CREATE TABLE compliance_evidence_check_results (
+    id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
+    check_id UUID NOT NULL REFERENCES compliance_evidence_checks(id) ON DELETE CASCADE,
+    tenant_id UUID NOT NULL,
+    run_status VARCHAR(20) NOT NULL DEFAULT 'running'
+        CHECK (run_status IN ('running','passed','failed','warning','error')),
+    result_data JSONB NOT NULL DEFAULT '{}',
+    summary TEXT,
+    findings_count INTEGER DEFAULT 0,
+    critical_findings INTEGER DEFAULT 0,
+    evidence_id UUID,
+    duration_ms INTEGER,
+    run_at TIMESTAMPTZ NOT NULL DEFAULT NOW()
+);
+
+CREATE TABLE compliance_evidence_control_map (
+    id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
+    tenant_id UUID NOT NULL,
+    evidence_id UUID NOT NULL,
+    control_code VARCHAR(50) NOT NULL,
+    mapping_type VARCHAR(20) DEFAULT 'supports'
+        CHECK (mapping_type IN ('supports','partially_supports','required')),
+    verified_at TIMESTAMPTZ,
+    verified_by VARCHAR(255),
+    notes TEXT,
+    created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
+    UNIQUE (tenant_id, evidence_id, control_code)
+);
+
+CREATE INDEX idx_evidence_checks_tenant ON compliance_evidence_checks(tenant_id);
+CREATE INDEX idx_evidence_checks_type ON compliance_evidence_checks(check_type);
+CREATE INDEX idx_evidence_checks_active ON compliance_evidence_checks(is_active);
+CREATE INDEX idx_check_results_check ON compliance_evidence_check_results(check_id);
+CREATE INDEX idx_check_results_status ON compliance_evidence_check_results(run_status);
+CREATE INDEX idx_evidence_control_map_tenant ON compliance_evidence_control_map(tenant_id);
+CREATE INDEX idx_evidence_control_map_control ON compliance_evidence_control_map(control_code);
@@ -0,0 +1,340 @@
+-- Migration 054: Erweiterte HinSchG-Wiki-Artikel
+-- Ergaenzt die bestehende HinSchG-Kategorie um detaillierte Artikel
+
+-- Bestehenden Grundlagen-Artikel mit umfassenderem Inhalt aktualisieren
+UPDATE compliance_wiki_articles
+SET content = '## Ueberblick
+
+Das **Hinweisgeberschutzgesetz (HinSchG)** setzt die EU-Whistleblowing-Richtlinie (EU) 2019/1937 in deutsches Recht um. Es schuetzt Personen, die auf Missstaende in Unternehmen und Behoerden hinweisen und ist seit dem **2. Juli 2023** in Kraft.
+
+- Ab 02.07.2023: Pflicht fuer Unternehmen ab **250 Beschaeftigten**
+- Ab 17.12.2023: Pflicht fuer Unternehmen ab **50 Beschaeftigten** (§ 12 HinSchG)
+
+## Kernpflichten
+
+### Interne Meldestelle einrichten (§ 12 HinSchG)
+- Kann eine **interne Person** (Ombudsperson) oder ein **externer Dienstleister** sein
+- Meldungen muessen **muendlich, schriftlich und persoenlich** moeglich sein
+- Die Meldestelle muss **unabhaengig** und **fachkundig** sein
+- **Gemeinsame Meldestellen** sind fuer Unternehmen mit 50–249 Beschaeftigten zulaessig
+
+### Gesetzliche Fristen (§ 17 HinSchG)
+- Eingangsbestaetigung innerhalb von **7 Tagen** nach Meldungseingang (§ 17 Abs. 1 S. 2)
+- Rueckmeldung ueber Folgemaßnahmen innerhalb von **3 Monaten** nach Eingangsbestaetigung (§ 17 Abs. 2)
+- Dokumentation muss **3 Jahre** nach Abschluss aufbewahrt werden (§ 11 Abs. 5)
+
+### Vertraulichkeitsgebot (§ 8 HinSchG)
+- Die **Identitaet des Hinweisgebers** darf nur den zustaendigen Personen bekannt sein
+- Offenlegung nur mit **Einwilligung** oder bei **gesetzlicher Verpflichtung**
+- Verstoss ist bussgeld-bewehrt (bis 50.000 EUR)
+
+## Welche Daten fallen an?
+- Identitaet des Hinweisgebers (besonders schuetzenswert!)
+- Beschuldigte Personen
+- Zeugen und weitere Beteiligte
+- Inhalt der Meldung (kann sensible Daten enthalten)
+- Kommunikationsverlauf
+
+## Datenschutz-Anforderungen
+- **Eigene Verarbeitungstaetigkeit** im VVT anlegen
+- Rechtsgrundlage: Art. 6 Abs. 1c DSGVO (rechtliche Verpflichtung)
+- **Zugriffsbeschraenkung:** Nur die benannte Meldestelle darf auf die Daten zugreifen
+- **Loeschfrist:** 3 Jahre nach Abschluss des Verfahrens (§ 11 Abs. 5 HinSchG)
+- Bei Art.-9-Daten in Meldungen: besondere Schutzmassnahmen erforderlich
+
+## Sanktionen (§ 40 HinSchG)
+
+| Verstoss | Bussgeld |
+|----------|----------|
+| Keine Meldestelle eingerichtet | Bis 20.000 EUR |
+| Behinderung einer Meldung | Bis 50.000 EUR |
+| Verstoss gegen Vertraulichkeitsgebot | Bis 50.000 EUR |
+| Repressalien gegen Hinweisgeber | Bis 50.000 EUR |
+
+## Praxis-Tipp
+Pruefen Sie bei externen Meldestellen-Anbietern, ob ein **AVV** erforderlich ist. In den meisten Faellen ja — der Anbieter verarbeitet personenbezogene Daten in Ihrem Auftrag.',
+    summary = 'Das HinSchG setzt die EU-Whistleblowing-Richtlinie um und verpflichtet seit Dezember 2023 alle Unternehmen ab 50 Beschaeftigten zur Einrichtung einer internen Meldestelle. Verstoesse koennen mit bis zu 50.000 EUR geahndet werden.',
+    legal_refs = ARRAY['§ 2 HinSchG', '§ 8 HinSchG', '§ 11 Abs. 5 HinSchG', '§ 12 HinSchG', '§ 17 HinSchG', '§ 36 HinSchG', '§ 40 HinSchG', 'Art. 6 Abs. 1c DSGVO', 'EU-RL 2019/1937'],
+    tags = ARRAY['hinweisgeberschutz', 'whistleblower', 'meldestelle', 'vertraulichkeit', 'fristen', 'bussgelder'],
+    version = 2,
+    updated_at = NOW()
+WHERE id = 'hinschg-grundlagen';
+
+-- Neuer Artikel: Sachlicher Anwendungsbereich
+INSERT INTO compliance_wiki_articles (id, category_id, title, summary, content, legal_refs, tags, relevance, source_urls) VALUES
+('hinschg-anwendungsbereich', 'hinschg',
+ 'Sachlicher Anwendungsbereich — Welche Verstoesse sind meldbar?',
+ 'Das HinSchG schuetzt Meldungen ueber Verstoesse gegen EU-Recht und nationales Recht. Der Anwendungsbereich geht weit ueber rein strafrechtliche Verstoesse hinaus.',
+ '## Ueberblick
+
+Der sachliche Anwendungsbereich des HinSchG (§ 2) ist bewusst weit gefasst. Geschuetzt werden Meldungen ueber Verstoesse, die **strafbewehrt** sind oder **bussgeld-bewehrt**, sowie Verstoesse gegen bestimmte **EU-Rechtsakte** und deren nationale Umsetzungsgesetze.
+
+## Meldbare Verstoesse (§ 2 HinSchG)
+
+### Strafvorschriften
+- Alle Straftaten nach dem **StGB** (Betrug, Untreue, Korruption, Urkundenfaelschung)
+- Straftaten nach **Nebenstrafrecht** (Umweltstrafrecht, Wirtschaftsstrafrecht)
+
+### Bussgeld-bewehrte Vorschriften
+- Verstoesse gegen **Ordnungswidrigkeiten-Vorschriften**, soweit die verletzte Norm dem Schutz von Leben, Leib, Gesundheit oder Rechten von Beschaeftigten dient
+
+### EU-Rechtsakte und nationale Umsetzung
+| Rechtsgebiet | Beispiele |
+|-------------|-----------|
+| Datenschutz | DSGVO, BDSG — z.B. unrechtmaessige Datenweitergabe |
+| Geldwaesche | GwG — z.B. fehlende Verdachtsmeldungen |
+| Produktsicherheit | ProdSG — z.B. mangelhafte Produkte im Verkehr |
+| Umweltschutz | BImSchG, KrWG — z.B. illegale Entsorgung |
+| Lebensmittelsicherheit | LFGB — z.B. Hygienemaengel |
+| Arbeitsschutz | ArbSchG, ArbZG — z.B. ueberlange Arbeitszeiten |
+| Verbraucherschutz | UWG — z.B. irrefuehrende Werbung |
+| Wettbewerbsrecht | GWB — z.B. Preisabsprachen, Kartelle |
+| Steuerrecht | AO — z.B. Steuerhinterziehung bei Unternehmen |
+| Vergaberecht | GWB Teil 4 — z.B. Manipulationen bei oeffentlichen Auftraegen |
+
+## Nicht erfasste Bereiche
+
+- **Rein privatrechtliche Streitigkeiten** (z.B. Vertragskonflikte)
+- **Nationale Sicherheit** — Informationen, die der nationalen Sicherheit unterliegen
+- **Berufsgeheimnisse** — Anwalts-, Arzt- oder Seelsorgegeheimnis (mit Ausnahmen)
+
+## Praxis-Tipp
+
+Im Zweifelsfall sollte eine Meldung **immer entgegengenommen** und geprueft werden. Die Meldestelle entscheidet erst bei der Sachverhaltspruefung, ob ein meldepflichtiger Verstoss vorliegt.',
+ ARRAY['§ 2 HinSchG', '§ 3 HinSchG', '§ 5 HinSchG'],
+ ARRAY['anwendungsbereich', 'verstoesse', 'strafrecht', 'bussgeld', 'eu-recht', 'meldepflicht'],
+ 'important',
+ ARRAY[]::text[])
+ON CONFLICT (id) DO NOTHING;
+
+-- Neuer Artikel: Schutz des Hinweisgebers
+INSERT INTO compliance_wiki_articles (id, category_id, title, summary, content, legal_refs, tags, relevance, source_urls) VALUES
+('hinschg-hinweisgeberschutz', 'hinschg',
+ 'Schutz des Hinweisgebers — Repressalienverbot und Beweislastumkehr',
+ 'Das HinSchG verbietet jede Form der Benachteiligung von Hinweisgebern. Bei Verstoessen greift eine Beweislastumkehr zugunsten des Hinweisgebers.',
+ '## Ueberblick
+
+Der Schutz hinweisgebender Personen ist das **Kernziel des HinSchG**. Das Gesetz sieht ein umfassendes Verbot von Repressalien, eine Beweislastumkehr und einen Schadensersatzanspruch vor.
+
+## Repressalienverbot (§ 36 HinSchG)
+
+Verboten ist jede Form der **Benachteiligung** aufgrund einer Meldung:
+- **Kuendigung** oder Nichterneuerung eines befristeten Vertrags
+- **Abmahnung** oder negative Leistungsbewertung
+- **Versetzung**, Degradierung oder Befoerderungsverweigerung
+- **Gehaltsreduktion** oder Entzug von Verguenstigungen
+- **Mobbing**, Ausgrenzung, Einschuechterung
+- **Aufnahme in schwarze Listen** oder Branchenregister
+- **Entzug einer Lizenz** oder Genehmigung
+- **Anordnung einer psychiatrischen Untersuchung**
+
+## Beweislastumkehr (§ 36 Abs. 2 HinSchG)
+
+Erleidet ein Hinweisgeber nach einer Meldung eine Benachteiligung, wird **vermutet**, dass diese Benachteiligung eine Repressalie ist. Der **Arbeitgeber** muss beweisen, dass die Massnahme:
+- Auf hinreichend gerechtfertigten Gruenden beruht
+- **Keinen Zusammenhang** mit der Meldung hat
+
+## Schadensersatz (§ 37 HinSchG)
+
+- Hinweisgeber hat Anspruch auf **Ersatz des erlittenen Schadens**
+- Umfasst **materielle** Schaeden (Gehaltsverlust) und **immaterielle** Schaeden (Mobbing)
+- Kein **Mitverschulden** des Hinweisgebers, wenn die Meldung in gutem Glauben erfolgte
+
+## Geschuetzte Personengruppen (§ 1 HinSchG)
+
+- Arbeitnehmerinnen und Arbeitnehmer
+- Beamtinnen und Beamte
+- Auszubildende und Praktikanten
+- Selbststaendige und Anteilseigner
+- Mitglieder von Leitungs- und Aufsichtsorganen
+- Ehrenamtlich Taetige und Freiwillige
+- Bewerberinnen und Bewerber (bei Informationen im Bewerbungsprozess)
+
+## Voraussetzungen fuer den Schutz (§ 33 HinSchG)
+
+Der Schutz greift, wenn der Hinweisgeber:
+- **Hinreichenden Grund** hatte anzunehmen, dass die gemeldeten Informationen der Wahrheit entsprechen
+- Die Meldung ueber einen **vorgesehenen Kanal** (intern oder extern) erfolgte
+- Der Verstoß in den **sachlichen Anwendungsbereich** faellt
+
+**Achtung:** Wissentlich **falsche Meldungen** sind nicht geschuetzt und koennen eigene Schadensersatzpflichten ausloesen (§ 38 HinSchG).',
+ ARRAY['§ 1 HinSchG', '§ 33 HinSchG', '§ 36 HinSchG', '§ 37 HinSchG', '§ 38 HinSchG'],
+ ARRAY['repressalienverbot', 'beweislastumkehr', 'schadensersatz', 'hinweisgeberschutz', 'kuendigungsschutz'],
+ 'critical',
+ ARRAY[]::text[])
+ON CONFLICT (id) DO NOTHING;
+
+-- Neuer Artikel: Interne vs. Externe Meldestelle
+INSERT INTO compliance_wiki_articles (id, category_id, title, summary, content, legal_refs, tags, relevance, source_urls) VALUES
+('hinschg-meldestellen', 'hinschg',
+ 'Interne vs. Externe Meldestelle — Was ist der Unterschied?',
+ 'Das HinSchG sieht interne und externe Meldestelllen vor. Hinweisgeber koennen frei waehlen, an wen sie sich wenden. Die Einrichtung einer internen Meldestelle ist Pflicht.',
+ '## Ueberblick
+
+Das HinSchG unterscheidet zwischen **internen Meldestellen** (beim Unternehmen) und **externen Meldestellen** (bei Behoerden). Hinweisgeber haben ein **Wahlrecht** — sie koennen sich direkt an die externe Meldestelle wenden, ohne den internen Weg vorher beschritten zu haben.
+
+## Interne Meldestelle (§§ 12–18 HinSchG)
+
+### Einrichtungspflicht
+- **Ab 50 Beschaeftigten**: Pflicht zur Einrichtung (seit 17.12.2023)
+- Unternehmen mit **50–249 Beschaeftigten** duerfen eine gemeinsame Meldestelle nutzen
+- Ab **250 Beschaeftigten**: eigene Meldestelle erforderlich
+
+### Anforderungen
+- **Unabhaengigkeit** — keine Interessenkonflikte
+- **Fachkunde** — geschultes Personal
+- Meldekanal muss **muendliche, schriftliche und persoenliche** Meldungen ermoeglichen
+- **Anonyme Meldungen** sollen ermoeglicht werden (keine Pflicht, aber empfohlen)
+
+### Besetzung
+Die Meldestelle kann besetzt werden durch:
+- Interne **Ombudsperson** (Compliance Officer, Datenschutzbeauftragter in Personalunion kritisch)
+- **Externer Dienstleister** (Kanzlei, spezialisierter Anbieter) — erfordert AVV
+- **Gremium** aus mehreren Personen
+
+## Externe Meldestelle (§§ 19–31 HinSchG)
+
+Die wichtigsten externen Meldestellen:
+
+| Meldestelle | Zustaendigkeit |
+|-------------|---------------|
+| **BfJ (Bundesamt fuer Justiz)** | Auffangmeldestelle fuer alle Verstoesse |
+| **BaFin** | Finanzaufsicht, Geldwaesche, Wertpapierrecht |
+| **Bundeskartellamt** | Wettbewerbsrecht, Kartelle |
+
+## Wahlrecht des Hinweisgebers
+
+- Hinweisgeber duerfen **frei waehlen** zwischen intern und extern
+- Die interne Meldung ist **nicht vorrangig** — anders als bei vielen Unternehmenspolicies
+- Ein Unternehmen darf **nicht verbieten**, sich an die externe Stelle zu wenden
+
+## Praxis-Tipp
+
+Gestalten Sie die interne Meldestelle **niedrigschwellig und vertrauenswuerdig**, damit Mitarbeiter sie bevorzugt nutzen. Unternehmen erfahren frueh von Problemen und koennen schneller reagieren.',
+ ARRAY['§ 12 HinSchG', '§ 13 HinSchG', '§ 14 HinSchG', '§ 16 HinSchG', '§ 17 HinSchG', '§ 19 HinSchG', '§ 27 HinSchG'],
+ ARRAY['meldestelle', 'intern', 'extern', 'ombudsperson', 'bfj', 'bafin', 'wahlrecht'],
+ 'critical',
+ ARRAY[]::text[])
+ON CONFLICT (id) DO NOTHING;
+
+-- Neuer Artikel: Verfahrensablauf bei einer Meldung
+INSERT INTO compliance_wiki_articles (id, category_id, title, summary, content, legal_refs, tags, relevance, source_urls) VALUES
+('hinschg-verfahrensablauf', 'hinschg',
+ 'Verfahrensablauf — Von der Meldung bis zur Rueckmeldung',
+ 'Der gesetzlich vorgeschriebene Ablauf einer Meldung umfasst Eingangsbestaetigung, Sachverhaltspruefung, Folgemaßnahmen und Rueckmeldung an den Hinweisgeber.',
+ '## Ueberblick
+
+Das HinSchG schreibt einen strukturierten Verfahrensablauf fuer jede eingehende Meldung vor (§ 17 HinSchG). Dieser Ablauf ist nicht verhandelbar — die Fristen sind gesetzlich bindend.
+
+## Schritt-fuer-Schritt-Verfahren
+
+### 1. Meldungseingang
+- Meldung wird ueber den internen Meldekanal eingereicht
+- Das System vergibt automatisch eine **Referenznummer** und einen **Zugangscode**
+- Der Zugangscode ermoeglicht dem Hinweisgeber die anonyme Statusabfrage
+
+### 2. Eingangsbestaetigung (Frist: 7 Tage)
+- Innerhalb von **7 Tagen** nach Eingang muss die Meldestelle den Eingang bestaetigen (§ 17 Abs. 1 S. 2)
+- Bei anonymen Meldungen: Bestaetigung ueber den anonymen Kommunikationskanal
+- **Wichtig:** Die Bestaetigung darf keine inhaltliche Bewertung enthalten
+
+### 3. Sachverhaltspruefung
+- Die Meldestelle prueft, ob ein **meldepflichtiger Verstoss** vorliegt (§ 2 HinSchG)
+- Stichhaltigkeitspruefung der gemeldeten Informationen
+- Gegebenenfalls Rueckfragen an den Hinweisgeber (ueber anonymen Kanal)
+
+### 4. Folgemaßnahmen (§ 18 HinSchG)
+Moegliche Maßnahmen umfassen:
+- **Interne Untersuchung** (ggf. mit externen Gutachtern)
+- **Abstellung des Verstosses** durch organisatorische Aenderungen
+- Weiterleitung an eine **zustaendige Behoerde**
+- **Disziplinarmaßnahmen** gegen Verantwortliche
+- **Einstellung** des Verfahrens bei unbegruendeten Meldungen
+
+### 5. Rueckmeldung (Frist: 3 Monate)
+- Innerhalb von **3 Monaten** nach Eingangsbestaetigung muss dem Hinweisgeber eine Rueckmeldung ueber ergriffene oder geplante Folgemaßnahmen gegeben werden (§ 17 Abs. 2)
+- Die Rueckmeldung soll den Hinweisgeber informieren, **ohne laufende Ermittlungen zu gefaehrden**
+
+### 6. Abschluss und Dokumentation
+- Abschließende Dokumentation des gesamten Verfahrens
+- Aufbewahrung fuer **3 Jahre** nach Abschluss (§ 11 Abs. 5 HinSchG)
+- Danach: Loeschung aller personenbezogenen Daten
+
+## Fristen-Uebersicht
+
+| Schritt | Frist | Ab wann |
+|---------|-------|---------|
+| Eingangsbestaetigung | 7 Tage | Ab Meldungseingang |
+| Rueckmeldung | 3 Monate | Ab Eingangsbestaetigung |
+| Aufbewahrung | 3 Jahre | Ab Verfahrensabschluss |
+
+## Praxis-Tipp
+
+Richten Sie ein **automatisches Fristen-Monitoring** ein. Das BreakPilot Hinweisgebersystem berechnet die Fristen automatisch und warnt rechtzeitig vor drohender Ueberschreitung.',
+ ARRAY['§ 11 Abs. 5 HinSchG', '§ 17 Abs. 1 HinSchG', '§ 17 Abs. 2 HinSchG', '§ 18 HinSchG'],
+ ARRAY['verfahren', 'ablauf', 'fristen', 'eingangsbestaetigung', 'rueckmeldung', 'folgemaßnahmen', 'dokumentation'],
+ 'important',
+ ARRAY[]::text[])
+ON CONFLICT (id) DO NOTHING;
+
+-- Neuer Artikel: Datenschutz-Anforderungen
+INSERT INTO compliance_wiki_articles (id, category_id, title, summary, content, legal_refs, tags, relevance, source_urls) VALUES
+('hinschg-datenschutz', 'hinschg',
+ 'Datenschutz im Hinweisgebersystem — DSGVO-Konformitaet sicherstellen',
+ 'Das Hinweisgebersystem verarbeitet besonders sensible personenbezogene Daten. Die DSGVO-Anforderungen an Datenschutz, Loeschfristen und Zugriffskontrollen sind strikt einzuhalten.',
+ '## Ueberblick
+
+Ein Hinweisgebersystem verarbeitet **hochsensible personenbezogene Daten**: die Identitaet des Hinweisgebers, Beschuldigter, Zeugen und den Inhalt der Meldung. Die DSGVO-Anforderungen muessen mit den HinSchG-Pflichten in Einklang gebracht werden.
+
+## Rechtsgrundlage
+
+Die Verarbeitung stuetzt sich auf:
+- **Art. 6 Abs. 1c DSGVO** — Erfuellung einer rechtlichen Verpflichtung (HinSchG)
+- **Art. 6 Abs. 1f DSGVO** — Berechtigtes Interesse (fuer nicht-verpflichtete Unternehmen)
+- **Art. 9 Abs. 2b DSGVO** — Fuer besondere Datenkategorien im Beschaeftigungskontext
+
+## VVT-Eintrag (Pflicht)
+
+Erstellen Sie einen eigenen VVT-Eintrag fuer das Hinweisgebersystem:
+
+| Feld | Inhalt |
+|------|--------|
+| Bezeichnung | Betrieb des internen Hinweisgebersystems |
+| Rechtsgrundlage | Art. 6 Abs. 1c DSGVO i.V.m. §§ 12 ff. HinSchG |
+| Kategorien betroffener Personen | Hinweisgeber, Beschuldigte, Zeugen |
+| Datenkategorien | Identitaetsdaten, Kommunikationsdaten, Meldungsinhalt |
+| Loeschfrist | 3 Jahre nach Verfahrensabschluss |
+| Empfaenger | Interne Meldestelle, ggf. externe Meldestelle |
+
+## Technisch-organisatorische Massnahmen (TOM)
+
+- **Verschluesselung** — Alle Meldungsdaten at-rest und in-transit verschluesselt
+- **Zugriffsbeschraenkung** — Nur die benannte Meldestelle darf auf Daten zugreifen
+- **Protokollierung** — Revisionssicherer Audit-Trail aller Zugriffe
+- **Pseudonymisierung** — Anonyme Meldungen ohne Zuordnung zu Klarnamen
+- **Trennung** — Meldungsdaten getrennt von sonstigen HR-Daten speichern
+
+## Loeschkonzept
+
+| Daten | Loeschfrist | Rechtsgrundlage |
+|-------|-------------|-----------------|
+| Meldungsdaten | 3 Jahre nach Abschluss | § 11 Abs. 5 HinSchG |
+| Audit-Trail | 3 Jahre nach Abschluss | § 11 Abs. 5 HinSchG |
+| Kommunikationsdaten | 3 Jahre nach Abschluss | § 11 Abs. 5 HinSchG |
+| Zugangscodes | Nach Verfahrensabschluss | Zweckerfuellung |
+
+## DSFA-Pflicht?
+
+Eine **Datenschutz-Folgenabschaetzung** (Art. 35 DSGVO) ist in vielen Faellen erforderlich, da:
+- **Systematische Ueberwachung** von Beschaeftigten (potenziell)
+- Verarbeitung **besonderer Datenkategorien** moeglich (Art. 9 DSGVO)
+- **Verletzliche Personengruppen** betroffen (Hinweisgeber, Beschuldigte)
+
+## Praxis-Tipp
+
+Fuehren Sie eine DSFA durch und dokumentieren Sie die Abwaegung. Dies dient auch als Nachweis der Rechenschaftspflicht (Art. 5 Abs. 2 DSGVO).',
+ ARRAY['Art. 5 Abs. 2 DSGVO', 'Art. 6 Abs. 1c DSGVO', 'Art. 9 Abs. 2b DSGVO', 'Art. 28 DSGVO', 'Art. 35 DSGVO', '§ 8 HinSchG', '§ 11 Abs. 5 HinSchG', '§ 26 BDSG'],
+ ARRAY['datenschutz', 'dsgvo', 'vvt', 'dsfa', 'loeschfristen', 'tom', 'verschluesselung', 'audit-trail'],
+ 'critical',
+ ARRAY[]::text[])
+ON CONFLICT (id) DO NOTHING;
@@ -0,0 +1,230 @@
+-- Migration 055: CRA (Cyber Resilience Act) Wiki-Kategorie und Artikel
+-- Neue Kategorie + 3 Artikel zum EU Cyber Resilience Act
+
+-- Kategorie: CRA
+INSERT INTO compliance_wiki_categories (id, name, description, icon, sort_order) VALUES
+('cra', 'Cyber Resilience Act (CRA)', 'EU-Verordnung fuer Cybersicherheit von Produkten mit digitalen Elementen', 'Shield', 75)
+ON CONFLICT (id) DO NOTHING;
+
+-- Artikel 1: CRA Grundlagen
+INSERT INTO compliance_wiki_articles (id, category_id, title, summary, content, legal_refs, tags, relevance, source_urls) VALUES
+('cra-grundlagen', 'cra',
+ 'Cyber Resilience Act — Ueberblick und Pflichten',
+ 'Der CRA (EU) 2024/2847 verpflichtet Hersteller von Produkten mit digitalen Elementen zu umfassenden Cybersicherheits-Massnahmen ueber den gesamten Produktlebenszyklus.',
+ '## Ueberblick
+
+Der **EU Cyber Resilience Act (CRA)**, Verordnung (EU) 2024/2847, ist am **10. Dezember 2024** in Kraft getreten. Er etabliert horizontale Cybersicherheitsanforderungen fuer alle **Produkte mit digitalen Elementen**, die in der EU in Verkehr gebracht werden.
+
+## Zeitplan
+
+| Datum | Meilenstein |
+|-------|------------|
+| 10.12.2024 | Inkrafttreten |
+| 11.06.2026 | Konformitaetsbewertungsstellen muessen benannt sein |
+| 11.09.2026 | Meldepflicht fuer Schwachstellen und Vorfaelle |
+| 11.12.2027 | Volle Anwendung — CE-Kennzeichnung erforderlich |
+
+## Was sind "Produkte mit digitalen Elementen"?
+
+Jedes Software- oder Hardware-Produkt, das:
+- Eine **Datenverbindung** (direkt oder indirekt) zu einem Geraet oder Netzwerk hat
+- **Software** enthaelt, die bestimmungsgemaeß genutzt wird
+
+**Beispiele:** IoT-Geraete, Firmware, eigenstaendige Software, Betriebssysteme, Router, Smart-Home-Geraete, industrielle Steuerungssysteme.
+
+## Kernpflichten fuer Hersteller
+
+### 1. Cybersecurity-Risikobewertung
+- Systematische Bewertung der Cybersecurity-Risiken des Produkts
+- Dokumentation der Risikoanalyse
+- Regelmaessige Aktualisierung
+
+### 2. Secure Development (SSDLC)
+- Sichere Entwicklungsprozesse etablieren
+- Code Reviews und Security Testing
+- Supply-Chain-Security pruefen
+
+### 3. Vulnerability Management
+- Aktives CVE-Monitoring
+- Coordinated Vulnerability Disclosure (CVD)
+- Patch-Bereitstellung waehrend des gesamten Support-Zeitraums
+
+### 4. Security Updates
+- Sichere Update-Mechanismen (signiert, integritaetsgeprueft)
+- Automatische oder einfache Update-Moeglichkeit fuer Nutzer
+- Mindest-Support-Zeitraum: 5 Jahre oder erwartete Produktlebensdauer
+
+### 5. Software Bill of Materials (SBOM)
+- Dokumentation aller Software-Komponenten
+- Top-Level-Abhaengigkeiten
+- Maschinenlesbares Format
+
+### 6. Incident Reporting
+- **24 Stunden:** Fruehwarnung an ENISA/nationale Behoerde
+- **72 Stunden:** Detaillierter Incident Report
+- Meldepflicht fuer aktiv ausgenutzte Schwachstellen
+
+## CE-Kennzeichnung
+
+Der CRA wird Teil der **CE-Konformitaet**. Ab Dezember 2027 duerfen Produkte ohne Cybersecurity-Konformitaet **nicht mehr in der EU verkauft werden**.
+
+## Sanktionen
+
+| Verstoss | Bussgeld |
+|----------|----------|
+| Wesentliche Anforderungen (Annex I) | Bis 15 Mio. EUR oder 2,5% des Jahresumsatzes |
+| Sonstige Pflichten | Bis 10 Mio. EUR oder 2% des Jahresumsatzes |
+| Falsche Informationen | Bis 5 Mio. EUR oder 1% des Jahresumsatzes |',
+ ARRAY['Art. 13 CRA', 'Art. 14 CRA', 'Annex I CRA', 'Annex II CRA', '(EU) 2024/2847'],
+ ARRAY['cra', 'cybersecurity', 'ce-kennzeichnung', 'iot', 'software', 'sbom', 'vulnerability', 'incident-reporting'],
+ 'critical',
+ ARRAY['https://eur-lex.europa.eu/eli/reg/2024/2847/oj/eng'])
+ON CONFLICT (id) DO NOTHING;
+
+-- Artikel 2: CRA Security Controls (Annex I)
+INSERT INTO compliance_wiki_articles (id, category_id, title, summary, content, legal_refs, tags, relevance, source_urls) VALUES
+('cra-security-controls', 'cra',
+ 'CRA Annex I — 35 Essential Cybersecurity Requirements',
+ 'Der CRA definiert in Annex I die wesentlichen Cybersicherheitsanforderungen. Daraus ergeben sich etwa 35 konkrete Security-Controls fuer den gesamten Produktlebenszyklus.',
+ '## Ueberblick
+
+Annex I des CRA enthaelt die **Essential Cybersecurity Requirements**. Sie lassen sich in 7 Themenbereiche mit insgesamt etwa 35 konkreten Controls aufteilen.
+
+## 1. Secure-by-Design / Architektur
+
+| # | Control | Beschreibung |
+|---|---------|-------------|
+| 1 | Secure-by-default | Produkte mit sicheren Standardeinstellungen ausliefern |
+| 2 | Minimale Angriffsflaeche | Nur notwendige Dienste und Schnittstellen aktivieren |
+| 3 | Sichere Systemarchitektur | Sicherheitskritische Komponenten isolieren und schuetzen |
+| 4 | Least-Privilege-Prinzip | Minimale Berechtigungen fuer Komponenten und Nutzer |
+| 5 | Trennung kritischer Funktionen | Isolation sicherheitskritischer Funktionen |
+| 6 | System-Haertung | Deaktivierung unnoetigerServices und Ports |
+| 7 | Manipulationsschutz | Schutz vor unautorisierter Software-Aenderung |
+| 8 | Integritaetspruefung | Signaturen und Integritaetschecks |
+| 9 | Zugriffsschutz | Zugriffskontrollen implementieren |
+
+## 2. Authentifizierung & Zugriffskontrolle
+
+| # | Control | Beschreibung |
+|---|---------|-------------|
+| 10 | Starke Authentifizierung | Sichere Authentifizierungsmechanismen |
+| 11 | Keine Default-Passwoerter | Keine universellen Standardpasswoerter |
+| 12 | Credential-Management | Sichere Verwaltung von Zugangsdaten |
+| 13 | Sitzungsmanagement | Sichere Session-Verwaltung |
+| 14 | Brute-Force-Schutz | Schutz vor Brute-Force-Angriffen |
+| 15 | Autorisierung | Rollenbasierte Zugriffskontrolle |
+
+## 3. Kryptografie & Datenschutz
+
+| # | Control | Beschreibung |
+|---|---------|-------------|
+| 16 | Datenverschluesselung | Verschluesselung sensibler Daten |
+| 17 | Speicher-Schutz | Schutz gespeicherter Daten (at-rest) |
+| 18 | Transport-Schutz | Schutz uebertragener Daten (in-transit) |
+| 19 | Schluesselmanagement | Sicheres kryptografisches Schluesselmanagement |
+| 20 | Schluesselschutz | Schutz kryptografischer Schluessel vor Zugriff |
+
+## 4. Software-Lifecycle-Security
+
+| # | Control | Beschreibung |
+|---|---------|-------------|
+| 21 | Secure Development Lifecycle | Strukturierter SSDLC-Prozess |
+| 22 | Code Reviews | Systematische Code-Ueberpruefungen |
+| 23 | Sichere Entwicklungspraktiken | Static Analysis, SAST, DAST |
+| 24 | Supply-Chain-Security | Pruefung von Drittanbieter-Komponenten |
+| 25 | Dependency-Monitoring | Ueberwachung von Abhaengigkeiten |
+| 26 | SBOM | Software Bill of Materials fuehren |
+
+## 5. Logging, Monitoring & Incident Detection
+
+| # | Control | Beschreibung |
+|---|---------|-------------|
+| 27 | Security-Logging | Protokollierung sicherheitsrelevanter Ereignisse |
+| 28 | Ereignis-Monitoring | Ueberwachung sicherheitsrelevanter Events |
+| 29 | Anomalie-Erkennung | Erkennung von Angriffen oder Anomalien |
+| 30 | Log-Integritaet | Schutz der Protokoll-Integritaet |
+
+## 6. Update- und Patch-Management
+
+| # | Control | Beschreibung |
+|---|---------|-------------|
+| 31 | Sichere Update-Mechanismen | Sichere Verfahren fuer Software-Updates |
+| 32 | Update-Authentizitaet | Signaturen fuer Updates |
+| 33 | Update-Integritaet | Integritaetspruefung bei Updates |
+| 34 | Lifecycle-Support | Security-Updates waehrend des gesamten Lebenszyklus |
+
+## 7. Vulnerability-Handling
+
+| # | Control | Beschreibung |
+|---|---------|-------------|
+| 35 | Vulnerability-Management | Strukturierter Prozess fuer Schwachstellen-Behandlung |
+
+Dazu gehoert:
+- Koordinierte Offenlegung (Coordinated Vulnerability Disclosure)
+- CVE-Monitoring
+- Patch-Bereitstellung innerhalb angemessener Frist
+
+## Automatisierungspotential
+
+Diese 35 Controls koennen automatisch zu folgenden Dokumenten fuehren:
+- **Cybersecurity Policy** (Grundsatzdokument)
+- **Secure Development Policy** (SSDLC)
+- **Vulnerability Management Policy** (CVD, Patching)
+- **Incident Response Policy** (24h/72h Meldung)
+- **SBOM-Dokumentation** (Komponentenliste)',
+ ARRAY['Annex I CRA', 'Art. 13 CRA', 'Art. 14 CRA', 'Art. 15 CRA'],
+ ARRAY['security-controls', 'annex-i', 'secure-by-design', 'authentifizierung', 'kryptografie', 'sbom', 'vulnerability', 'patching'],
+ 'critical',
+ ARRAY['https://eur-lex.europa.eu/eli/reg/2024/2847/oj/eng'])
+ON CONFLICT (id) DO NOTHING;
+
+-- Artikel 3: CRA + NIS2 + AI Act Zusammenspiel
+INSERT INTO compliance_wiki_articles (id, category_id, title, summary, content, legal_refs, tags, relevance, source_urls) VALUES
+('cra-regulierungsrahmen', 'cra',
+ 'CRA + NIS2 + AI Act — Das neue EU-Security-Framework',
+ 'CRA, NIS2-Richtlinie und AI Act bilden zusammen ein umfassendes EU-Sicherheitsframework fuer digitale Produkte, Infrastrukturen und KI-Systeme.',
+ '## Ueberblick
+
+Die EU hat mit drei zentralen Rechtsakten ein zusammenhaengendes Framework fuer Cybersicherheit und KI-Regulierung geschaffen. Fuer Softwarehersteller, die KI einsetzen, sind alle drei relevant.
+
+## Die drei Saeulen
+
+| Verordnung | Fokus | Zielgruppe | Anwendung ab |
+|-----------|-------|-----------|-------------|
+| **CRA** (2024/2847) | Produkt-Cybersecurity | Hersteller von Hardware/Software | 12/2027 |
+| **NIS2** (2022/2555) | Infrastruktur-Security | Betreiber wesentlicher Dienste | 10/2024 (national) |
+| **AI Act** (2024/1689) | KI-Regulierung | Anbieter/Betreiber von KI-Systemen | 08/2025 (stufenweise) |
+
+## Abgrenzung
+
+### CRA vs. NIS2
+- **CRA**: Regelt die **Sicherheit des Produkts** selbst (Design, Updates, Vulnerability Handling)
+- **NIS2**: Regelt die **Sicherheit der Organisation** (Risikomanagement, Incident Response, Supply Chain)
+- **Ueberschneidung**: Beide fordern Incident Reporting und Supply-Chain-Security
+
+### CRA vs. AI Act
+- **CRA**: Cybersecurity-Anforderungen an **alle** digitalen Produkte
+- **AI Act**: Zusaetzliche Anforderungen fuer Produkte, die **KI enthalten** (Transparenz, Erklaerbarkeit, Risikobewertung)
+- **Ueberschneidung**: Hochrisiko-KI-Systeme muessen sowohl CRA als auch AI Act erfuellen
+
+## Synergien nutzen
+
+Ein Unternehmen, das alle drei Verordnungen erfuellen muss, kann Synergien nutzen:
+
+| Thema | CRA | NIS2 | AI Act |
+|-------|-----|------|--------|
+| Risikobewertung | Produkt-Risiko | Org-Risiko | KI-Risiko |
+| Incident Reporting | 24h/72h | 24h/72h | Meldepflicht |
+| Supply Chain | SBOM | Lieferantenpruefung | Drittanbieter-KI |
+| Dokumentation | Tech. Doku | Policies | KI-Registrierung |
+| Audit/Konformitaet | CE-Kennzeichnung | Zertifizierung | Konformitaetsbewertung |
+
+## Empfehlung
+
+Bauen Sie ein **integriertes Compliance-Management-System** auf, das alle drei Verordnungen abdeckt. Gemeinsame Policies (Security, Incident Response, Risk Management) koennen fuer alle drei Regelwerke genutzt werden.',
+ ARRAY['(EU) 2024/2847', '(EU) 2022/2555', '(EU) 2024/1689', 'Art. 13 CRA', 'Art. 21 NIS2', 'Art. 9 AI Act'],
+ ARRAY['cra', 'nis2', 'ai-act', 'security-framework', 'compliance', 'synergien', 'ce-kennzeichnung'],
+ 'important',
+ ARRAY[]::text[])
+ON CONFLICT (id) DO NOTHING;
@@ -0,0 +1,515 @@
+-- Migration 056: CRA Cybersecurity Policy Template
+-- Unternehmensrichtlinie Cybersecurity basierend auf EU Cyber Resilience Act, ISO 27001 Best Practices
+
+INSERT INTO compliance_legal_templates (
+    id, tenant_id, document_type, title, description, content,
+    placeholders, language, jurisdiction,
+    license_id, license_name, source_name,
+    attribution_required, is_complete_document, version, status,
+    created_at, updated_at
+) VALUES (
+    gen_random_uuid(),
+    '9282a473-5c95-4b3a-bf78-0ecc0ec71d3e',
+    'cybersecurity_policy',
+    'Unternehmensrichtlinie Cybersecurity (CRA-konform)',
+    'Umfassende Cybersecurity-Richtlinie basierend auf dem EU Cyber Resilience Act (EU) 2024/2847, ISO 27001 und Secure-Development-Standards. Deckt Governance, Risikomanagement, Secure Development, Vulnerability Management, Incident Response und Compliance ab.',
+    $template$# Unternehmensrichtlinie Cybersecurity
+
+**{{COMPANY_NAME}}**
+
+*(Cybersecurity Policy — CRA-konform)*
+
+| Feld | Inhalt |
+|------|--------|
+| Dokumenttyp | Unternehmensrichtlinie |
+| Version | {{DOCUMENT_VERSION}} |
+| Datum | {{VERSION_DATE}} |
+| Naechste Ueberpruefung | {{NEXT_REVIEW_DATE}} |
+| Verantwortlich | {{ISB_NAME}} (CISO/ISB) |
+| Freigabe | {{GF_NAME}} (Geschaeftsfuehrung) |
+| Vertraulichkeit | Intern |
+
+---
+
+## 1. Zweck der Richtlinie
+
+Diese Cybersecurity-Richtlinie legt die organisatorischen und technischen Massnahmen fest, mit denen {{COMPANY_NAME}}:
+
+- Informationssysteme schuetzt
+- Cyberrisiken systematisch reduziert
+- Gesetzliche Anforderungen erfuellt (insb. EU Cyber Resilience Act, NIS2, DSGVO)
+- Sicherheitsvorfaelle erkennt, behandelt und meldet
+
+Die Richtlinie gilt fuer alle:
+
+- Mitarbeiterinnen und Mitarbeiter von {{COMPANY_NAME}}
+- Externe Dienstleister und Auftragnehmer
+- IT-Systeme, Software und Cloud-Services
+- Produkte mit digitalen Elementen im Sinne des CRA
+
+---
+
+## 2. Geltungsbereich
+
+Diese Richtlinie gilt fuer:
+
+- Unternehmens-IT und Netzwerkinfrastruktur
+- Interne Softwareentwicklung
+- Cloud-Infrastruktur und SaaS-Dienste
+- Datenverarbeitungssysteme
+- Produkte mit digitalen Elementen (Software, IoT, Firmware)
+- Lieferanten und Dienstleister mit Zugang zu Systemen von {{COMPANY_NAME}}
+
+Betroffene Assets:
+
+- Server und Netzwerkkomponenten
+- Endgeraete (Laptops, Mobilgeraete)
+- Software und Firmware
+- Datenbanken und APIs
+- Kryptografische Schluessel und Zertifikate
+
+---
+
+## 3. Sicherheitsziele
+
+Die Cybersecurity-Strategie von {{COMPANY_NAME}} verfolgt folgende Ziele:
+
+### Vertraulichkeit
+Schutz sensibler Daten vor unbefugtem Zugriff. Klassifizierung von Daten nach Schutzbedarf.
+
+### Integritaet
+Sicherstellung, dass Daten und Systeme nicht unautorisiert veraendert werden. Einsatz von Integritaetspruefungen und Signaturen.
+
+### Verfuegbarkeit
+Systeme und Dienste muessen gemaess den vereinbarten SLAs verfuegbar sein. Redundanz und Wiederherstellungsfaehigkeit sicherstellen.
+
+### Nachvollziehbarkeit
+Sicherheitsrelevante Ereignisse muessen lueckenlos dokumentiert und fuer Audits nachvollziehbar sein.
+
+---
+
+## 4. Governance und Verantwortlichkeiten
+
+### 4.1 Geschaeftsfuehrung
+
+{{GF_NAME}} ist verantwortlich fuer:
+
+- Festlegung der Sicherheitsstrategie
+- Bereitstellung angemessener Ressourcen
+- Ueberwachung der Compliance-Einhaltung
+- Jaehrliche Freigabe dieser Richtlinie
+
+### 4.2 Chief Information Security Officer (CISO/ISB)
+
+{{ISB_NAME}} ist verantwortlich fuer:
+
+- Umsetzung der Sicherheitsstrategie
+- Risikomanagement und Risikoberichterstattung
+- Security-Monitoring und Threat Intelligence
+- Koordination des Incident-Response-Teams
+- Kontaktperson fuer Behoerden bei Sicherheitsvorfaellen
+
+### 4.3 Datenschutzbeauftragter
+
+{{DPO_NAME}} ({{DPO_EMAIL}}) wird bei sicherheitsrelevanten Vorfaellen einbezogen, die personenbezogene Daten betreffen.
+
+### 4.4 IT-Abteilung
+
+Verantwortlich fuer:
+
+- Sichere Infrastruktur und Systemhaertung
+- Patch-Management und Update-Bereitstellung
+- Netzwerksegmentierung und Firewall-Management
+- Monitoring und Log-Management
+
+### 4.5 Entwicklerteams
+
+Verantwortlich fuer:
+
+- Secure Coding und Code Reviews
+- Dependency Management und SBOM-Pflege
+- Security Testing (SAST, DAST, SCA)
+- Vulnerability Remediation
+
+### 4.6 Alle Mitarbeiter
+
+Alle Mitarbeiter von {{COMPANY_NAME}} muessen:
+
+- Sicherheitsrichtlinien einhalten
+- Sicherheitsvorfaelle unverzueglich melden
+- An jaehrlichen Security-Schulungen teilnehmen
+- Phishing-Versuche und verdaechtige Aktivitaeten melden
+
+---
+
+## 5. Risikomanagement
+
+{{COMPANY_NAME}} fuehrt regelmaessig eine Cyber-Risikoanalyse durch.
+
+### Prozess
+
+1. **Identifikation** kritischer Assets und Daten
+2. **Bedrohungsanalyse** (Threat Modeling, STRIDE)
+3. **Schwachstellenanalyse** (CVE-Monitoring, Vulnerability Scanning)
+4. **Risikobewertung** (Eintrittswahrscheinlichkeit x Auswirkung)
+5. **Risikobehandlung** (Vermeiden, Reduzieren, Uebertragen, Akzeptieren)
+
+### Frequenz
+
+Risikobewertungen erfolgen:
+
+- Mindestens jaehrlich
+- Bei wesentlichen Systemanderungen
+- Bei neuen Produkten oder Dienstleistungen
+- Nach Sicherheitsvorfaellen
+
+### Dokumentation
+
+Alle Risikoanalysen werden dokumentiert und fuer mindestens 3 Jahre aufbewahrt. Die Ergebnisse werden der Geschaeftsfuehrung in Form eines Risikoberichts vorgelegt.
+
+---
+
+## 6. Secure System Architecture
+
+Systeme von {{COMPANY_NAME}} muessen nach folgenden Prinzipien entwickelt und betrieben werden:
+
+### Security by Design
+Sicherheitsanforderungen werden bereits in der Architekturphase beruecksichtigt. Jedes neue System durchlaeuft ein Security Architecture Review.
+
+### Security by Default
+Systeme werden mit sicheren Grundeinstellungen ausgeliefert. Keine Dienste oder Ports sind standardmaessig aktiviert, die nicht benoetigt werden.
+
+### Least Privilege
+Benutzer und Systeme erhalten nur die minimal notwendigen Berechtigungen. Privilegierte Zugriffe werden gesondert protokolliert.
+
+### Segmentierung
+Kritische Systeme werden durch Netzwerksegmentierung isoliert. Produktiv-, Entwicklungs- und Testumgebungen sind strikt getrennt.
+
+### Haertung
+Alle Systeme werden gemaess anerkannter Haertungsrichtlinien (CIS Benchmarks, BSI IT-Grundschutz) konfiguriert.
+
+---
+
+## 7. Zugriffskontrollen
+
+### Anforderungen
+
+- Eindeutige, personalisierte Benutzerkonten
+- Starke Passwortrichtlinie (mind. 12 Zeichen, Komplexitaet)
+- Multi-Faktor-Authentifizierung (MFA) fuer alle administrativen Zugriffe und externe Zugaenge
+- Rollenbasierte Zugriffskontrolle (RBAC) mit regelmaessiger Rezertifizierung
+- Automatische Sperrung nach 5 fehlgeschlagenen Login-Versuchen
+
+### Verboten
+
+- Gemeinsam genutzte Accounts (Shared Accounts)
+- Universal-Default-Passwoerter
+- Unverschluesselte Speicherung von Zugangsdaten
+- Weitergabe von Zugangsdaten per E-Mail
+
+### Privileged Access Management
+
+Administratorzugriffe muessen:
+
+- Gesondert beantragt und genehmigt werden
+- Zeitlich begrenzt sein (Just-in-Time Access)
+- Vollstaendig protokolliert werden
+
+---
+
+## 8. Kryptografie
+
+{{COMPANY_NAME}} verwendet ausschliesslich moderne, anerkannte kryptografische Verfahren.
+
+### Verschluesselung erforderlich fuer
+
+- Gespeicherte sensible Daten (at rest) — AES-256
+- Datenuebertraung (in transit) — TLS 1.2+, vorzugsweise TLS 1.3
+- Backups — vollstaendig verschluesselt
+- Konfigurationsdaten und Secrets — Vault oder vergleichbar
+
+### Schluesselmanagement
+
+- Schluessel muessen sicher gespeichert werden (HSM oder Vault)
+- Regelmaessige Rotation (mind. jaehrlich, bei Kompromittierung sofort)
+- Zugriff nur fuer autorisierte Personen
+- Dokumentation der Schluessel-Lebenszyklen
+
+### Verbotene Verfahren
+
+- MD5 und SHA-1 fuer kryptografische Zwecke
+- DES und 3DES
+- SSL und TLS < 1.2
+
+---
+
+## 9. Secure Software Development Lifecycle (SSDLC)
+
+Alle Softwareprodukte von {{COMPANY_NAME}} muessen einen sicheren Entwicklungsprozess durchlaufen. Dies entspricht den Anforderungen des CRA Annex I.
+
+### Entwicklungsprozess
+
+1. **Security Requirements** — Sicherheitsanforderungen in User Stories und Epics
+2. **Threat Modeling** — Bedrohungsanalyse in der Designphase
+3. **Secure Coding** — Einhaltung von Secure-Coding-Standards
+4. **Code Review** — Peer Review mit Security-Fokus
+5. **Security Testing** — Automatisierte und manuelle Tests
+6. **Release-Freigabe** — Security Sign-off vor Deployment
+
+### Pflichtmassnahmen
+
+- **Static Application Security Testing (SAST)** — in der CI/CD-Pipeline
+- **Software Composition Analysis (SCA)** — Dependency Scanning
+- **Dynamic Application Security Testing (DAST)** — vor jedem Major Release
+- **Secrets Detection** — Automatische Pruefung auf eingebettete Zugangsdaten
+- **Penetration Testing** — mindestens jaehrlich durch externe Tester
+
+---
+
+## 10. Software-Supply-Chain-Security
+
+{{COMPANY_NAME}} kontrolliert externe Softwarekomponenten systematisch.
+
+### Software Bill of Materials (SBOM)
+
+Fuer alle Produkte wird ein SBOM gefuehrt, das mindestens folgende Informationen enthaelt:
+
+- Name und Version aller Software-Komponenten
+- Lizenzinformationen
+- Bekannte Schwachstellen (CVE)
+
+Das SBOM wird bei jedem Release aktualisiert und in maschinenlesbarem Format (CycloneDX oder SPDX) bereitgestellt.
+
+### Open-Source-Kontrolle
+
+- Lizenzpruefung vor Aufnahme neuer Abhaengigkeiten
+- Monitoring auf bekannte Schwachstellen (CVE)
+- Regelmaessige Updates von Abhaengigkeiten
+
+---
+
+## 11. Logging und Monitoring
+
+### Logging umfasst
+
+- Erfolgreiche und fehlgeschlagene Login-Versuche
+- Administrative Systemanderungen
+- Zugriffe auf sensible Daten
+- Sicherheitsrelevante Konfigurationsanderungen
+- API-Zugriffe und Fehler
+
+### Anforderungen an Logs
+
+- Manipulationssicher (append-only, signiert oder WORM)
+- Zentral gesammelt (SIEM oder vergleichbar)
+- Aufbewahrung mindestens 12 Monate
+- Zugriff nur fuer autorisiertes Security-Personal
+
+### Monitoring
+
+- Echtzeit-Ueberwachung sicherheitsrelevanter Ereignisse
+- Automatische Alarmierung bei Anomalien
+- Korrelation von Events aus verschiedenen Quellen
+
+---
+
+## 12. Vulnerability Management
+
+{{COMPANY_NAME}} betreibt ein strukturiertes Schwachstellenmanagement.
+
+### Prozess
+
+1. **Identifikation** — Automatische Scans, Bug Bounty, CVE-Monitoring
+2. **Bewertung** — Risikobewertung nach CVSS
+3. **Priorisierung** — Kritische Schwachstellen zuerst
+4. **Behebung** — Patch-Entwicklung und Deployment
+5. **Verifizierung** — Bestaetigung der Behebung
+6. **Kommunikation** — Information betroffener Kunden und Behoerden
+
+### Coordinated Vulnerability Disclosure (CVD)
+
+{{COMPANY_NAME}} veroeffentlicht eine CVD-Policy. Sicherheitsforscher koennen Schwachstellen an {{SECURITY_EMAIL}} melden. Meldungen werden innerhalb von 5 Werktagen bestaetigt.
+
+---
+
+## 13. Patch- und Update-Management
+
+Alle Systeme muessen regelmaessig aktualisiert werden.
+
+### Patchzyklen
+
+| Risikostufe | Reaktionszeit |
+|-------------|---------------|
+| Kritisch (CVSS >= 9.0) | 24-72 Stunden |
+| Hoch (CVSS 7.0-8.9) | 7 Tage |
+| Mittel (CVSS 4.0-6.9) | 30 Tage |
+| Niedrig (CVSS < 4.0) | Naechster regulaerer Update-Zyklus |
+
+### Anforderungen an Updates
+
+- Alle Updates muessen **digital signiert** sein
+- Integritaetspruefung vor Installation
+- Rollback-Moeglichkeit bei fehlerhaften Updates
+- Automatische Update-Benachrichtigung fuer Kunden
+- **Mindest-Support-Zeitraum: 5 Jahre** (gemaess CRA)
+
+---
+
+## 14. Incident Response
+
+{{COMPANY_NAME}} betreibt einen dokumentierten Incident-Response-Prozess.
+
+### Schritte
+
+1. **Detection** — Erkennung durch Monitoring, Meldung oder externe Information
+2. **Classification** — Einstufung nach Schweregrad (P1-P4)
+3. **Containment** — Sofortige Eindaemmung des Vorfalls
+4. **Investigation** — Forensische Analyse und Ursachenermittlung
+5. **Recovery** — Wiederherstellung des Normalbetriebs
+6. **Reporting** — Dokumentation und Meldung an Behoerden
+7. **Lessons Learned** — Nachbereitung und Verbesserung
+
+### Meldepflichten (CRA-konform)
+
+| Meldung | Frist | Empfaenger |
+|---------|-------|-----------|
+| **Fruehwarnung** | 24 Stunden | ENISA / nationale Behoerde |
+| **Detaillierter Bericht** | 72 Stunden | ENISA / nationale Behoerde |
+| **Abschlussbericht** | 1 Monat | ENISA / nationale Behoerde |
+
+Bei personenbezogenen Daten gelten zusaetzlich die Fristen nach Art. 33/34 DSGVO (72 Stunden an Aufsichtsbehoerde).
+
+### Kontakte
+
+| Rolle | Person | Kontakt |
+|-------|--------|---------|
+| CISO/ISB | {{ISB_NAME}} | {{ISB_EMAIL}} |
+| DSB | {{DPO_NAME}} | {{DPO_EMAIL}} |
+| GF | {{GF_NAME}} | {{GF_EMAIL}} |
+
+---
+
+## 15. Security Testing
+
+Folgende Tests werden regelmaessig durchgefuehrt:
+
+| Test | Frequenz | Durchfuehrung |
+|------|----------|--------------|
+| Vulnerability Scans | Woechentlich | Automatisiert (CI/CD) |
+| SAST/SCA | Bei jedem Commit | Automatisiert (CI/CD) |
+| DAST | Vor Major Releases | Automatisiert + manuell |
+| Penetration Tests | Jaehrlich | Externer Dienstleister |
+| Red-Team-Tests | Alle 2 Jahre | Externer Dienstleister |
+| Social Engineering | Jaehrlich | Externer Dienstleister |
+
+---
+
+## 16. Backup und Wiederherstellung
+
+### Anforderungen
+
+- **Taegliche Backups** aller kritischen Systeme und Daten
+- **Off-Site-Backups** an geografisch getrenntem Standort
+- **Verschluesselung** aller Backup-Daten
+- **Wiederherstellungstests** mindestens vierteljaehrlich
+
+### Recovery-Ziele
+
+| Metrik | Ziel |
+|--------|------|
+| Recovery Time Objective (RTO) | {{RTO_HOURS}} Stunden |
+| Recovery Point Objective (RPO) | {{RPO_HOURS}} Stunden |
+
+---
+
+## 17. Lieferanten- und Drittanbieter-Management
+
+Lieferanten mit Zugang zu Systemen oder Daten von {{COMPANY_NAME}} muessen Sicherheitsanforderungen erfuellen.
+
+### Anforderungen
+
+- Sicherheitspruefung vor Vertragsabschluss (Security Assessment)
+- Sicherheitsanforderungen im Vertrag (Auftragsverarbeitung, SLA)
+- Regelmaessige Audits und Compliance-Nachweise
+- Incident-Notification-Pflicht innerhalb von 24 Stunden
+- Nachweis ueber eigenes Vulnerability Management
+
+---
+
+## 18. Schulungen und Awareness
+
+Alle Mitarbeiter von {{COMPANY_NAME}} erhalten:
+
+- **Jaehrliche Security-Awareness-Trainings**
+- **Phishing-Simulationen** (mind. 2x jaehrlich)
+- **Rollenspezifische Schulungen** (Entwickler: Secure Coding, IT: Incident Response)
+- **Onboarding-Schulung** fuer neue Mitarbeiter
+
+Teilnahme ist verpflichtend. Die Teilnahme wird dokumentiert.
+
+---
+
+## 19. Dokumentation und Compliance
+
+{{COMPANY_NAME}} dokumentiert:
+
+- Risikoanalysen und Risikobehandlungsplaene
+- Sicherheitskontrollen und deren Wirksamkeit
+- Sicherheitsvorfaelle und deren Behandlung
+- Software-Updates und Patches
+- SBOM fuer alle Produkte
+- Audit-Ergebnisse
+
+Die Dokumentation muss jederzeit fuer Audits und behoerdliche Anfragen verfuegbar sein.
+
+### Regulatorische Compliance
+
+Diese Richtlinie dient der Einhaltung folgender Vorschriften:
+
+- **EU Cyber Resilience Act** (EU) 2024/2847
+- **NIS2-Richtlinie** (EU) 2022/2555
+- **DSGVO** (EU) 2016/679 — technische und organisatorische Massnahmen
+- **ISO/IEC 27001** — Best Practices fuer Informationssicherheit
+
+---
+
+## 20. Durchsetzung
+
+Verstoesse gegen diese Richtlinie koennen je nach Schwere folgende Konsequenzen haben:
+
+- Disziplinarmassnahmen
+- Vertragsstrafen (bei externen Dienstleistern)
+- Rechtliche Konsequenzen (bei vorsaetzlichen Verstoessen)
+
+---
+
+## 21. Ueberpruefung und Aktualisierung
+
+Diese Cybersecurity-Richtlinie wird ueberprueft:
+
+- **Jaehrlich** durch {{ISB_NAME}} (CISO/ISB)
+- Bei **regulatorischen Aenderungen** (neue EU-Verordnungen, nationale Gesetze)
+- Nach **groesseren Sicherheitsvorfaellen**
+- Bei **wesentlichen Aenderungen** der IT-Infrastruktur oder Produktlandschaft
+
+Die naechste planmaessige Ueberpruefung ist am **{{NEXT_REVIEW_DATE}}**.
+
+---
+
+## Freigabe
+
+| | Name | Datum | Unterschrift |
+|--|------|-------|-------------|
+| Erstellt von | {{ISB_NAME}} (CISO/ISB) | {{VERSION_DATE}} | _________________ |
+| Freigegeben von | {{GF_NAME}} (Geschaeftsfuehrung) | {{VERSION_DATE}} | _________________ |
+
+---
+
+*Dieses Dokument ist Eigentum von {{COMPANY_NAME}} und unterliegt der Vertraulichkeitsstufe "Intern".*
+$template$,
+    CAST('["COMPANY_NAME","COMPANY_ADDRESS","COMPANY_CITY","GF_NAME","GF_EMAIL","ISB_NAME","ISB_EMAIL","DPO_NAME","DPO_EMAIL","SECURITY_EMAIL","DOCUMENT_VERSION","VERSION_DATE","NEXT_REVIEW_DATE","RTO_HOURS","RPO_HOURS"]' AS jsonb),
+    'de', 'DE',
+    'mit', 'MIT License', 'BreakPilot Compliance',
+    false, true, '1.0.0', 'published',
+    NOW(), NOW()
+) ON CONFLICT DO NOTHING;
@@ -0,0 +1,23 @@
+-- 057: Add batch processing paths to canonical_processed_chunks
+-- New values: structured_batch, llm_reform_batch (used by batch control generation)
+
+DO $$
+BEGIN
+    IF EXISTS (SELECT 1 FROM information_schema.tables WHERE table_name = 'canonical_processed_chunks') THEN
+        ALTER TABLE canonical_processed_chunks
+            DROP CONSTRAINT IF EXISTS canonical_processed_chunks_processing_path_check;
+        ALTER TABLE canonical_processed_chunks
+            ADD CONSTRAINT canonical_processed_chunks_processing_path_check
+            CHECK (processing_path IN (
+                'structured',
+                'llm_reform',
+                'skipped',
+                'prefilter_skip',
+                'no_control',
+                'store_failed',
+                'error',
+                'structured_batch',
+                'llm_reform_batch'
+            ));
+    END IF;
+END $$;
@@ -0,0 +1,8 @@
+-- Migration 058: Add generation_strategy column to canonical_controls
+-- Tracks whether a control was generated with document-grouped or ungrouped batching
+
+ALTER TABLE canonical_controls
+    ADD COLUMN IF NOT EXISTS generation_strategy TEXT NOT NULL DEFAULT 'ungrouped';
+
+COMMENT ON COLUMN canonical_controls.generation_strategy IS
+    'How chunks were batched during generation: ungrouped (random), document_grouped (by regulation+article)';
@@ -0,0 +1,292 @@
+-- Migration 059: CRA Annex I — Detaillierte Essential Cybersecurity Requirements
+-- Erweitert den bestehenden Wiki-Artikel 'cra-security-controls' um Part 1 + Part 2,
+-- Produktklassifizierung und ISO 27001 Mapping.
+-- Zusaetzlich: Neuer Artikel fuer CRA-Produktklassifizierung und Konformitaetsbewertung.
+
+-- ============================================================================
+-- 1) Update: CRA Security Controls (Annex I) — Vollstaendige 8-Kategorien-Struktur
+-- ============================================================================
+UPDATE compliance_wiki_articles
+SET
+    title   = 'CRA Annex I — Essential Cybersecurity Requirements (Vollstaendig)',
+    summary = 'Annex I des CRA definiert die wesentlichen Cybersicherheitsanforderungen in zwei Teilen: Teil 1 (Produktsicherheit, 11 Anforderungen) und Teil 2 (Schwachstellenbehandlung, 8 Anforderungen). Daraus ergeben sich rund 35 konkrete Security-Controls in 8 Kategorien.',
+    content = '## Ueberblick
+
+Der **EU Cyber Resilience Act (CRA)**, Verordnung (EU) 2024/2847, legt in **Annex I** die **Essential Cybersecurity Requirements** fest, die alle Produkte mit digitalen Elementen erfuellen muessen. Annex I besteht aus zwei Teilen:
+
+- **Teil 1 — Sicherheitsanforderungen an Produkte** (11 Kernanforderungen)
+- **Teil 2 — Anforderungen an die Schwachstellenbehandlung** (8 Prozessanforderungen)
+
+Daraus lassen sich etwa **35 konkrete Security-Controls** in **8 thematischen Kategorien** ableiten. Diese Controls bilden die Grundlage fuer eine Cybersecurity-Compliance-Strategie.
+
+---
+
+## Teil 1: Sicherheitsanforderungen an Produkte
+
+### Kategorie 1 — Secure-by-Design und Architektur
+
+Diese Controls stellen sicher, dass Sicherheit von Anfang an in die Produktarchitektur integriert wird.
+
+| # | Control | CRA-Referenz | Beschreibung | ISO 27001 Mapping |
+|---|---------|-------------|-------------|-------------------|
+| 1 | **Secure-by-Default-Konfiguration** | Annex I, 1(1) | Produkte muessen mit sicheren Standardeinstellungen ausgeliefert werden. Keine offenen Ports, keine aktivierten Debug-Schnittstellen, keine unnoetig laufenden Dienste. | A.8.9 |
+| 2 | **Minimale Angriffsflaeche** | Annex I, 1(2) | Nur notwendige Schnittstellen, Dienste und Protokolle aktivieren. Jede zusaetzliche Funktionalitaet vergroessert die Angriffsflaeche und muss einzeln gerechtfertigt werden. | A.8.9, A.8.20 |
+| 3 | **Sichere Systemarchitektur** | Annex I, 1(3) | Sicherheitskritische Komponenten muessen isoliert werden (Sandboxing, Containerisierung, Privilege Separation). Defense-in-Depth-Prinzip anwenden. | A.8.27 |
+| 4 | **Least-Privilege-Prinzip** | Annex I, 1(3)(d) | Jede Komponente, jeder Prozess und jeder Benutzer erhaelt nur die minimal notwendigen Berechtigungen. Privilegien-Eskalation muss verhindert werden. | A.8.2, A.8.3 |
+| 5 | **Manipulationsschutz** | Annex I, 1(3)(c) | Schutz vor unautorisierter Aenderung von Software und Konfiguration durch Integritaetsmechanismen (Code Signing, Secure Boot, TPM). | A.8.24 |
+| 6 | **Integritaetspruefung** | Annex I, 1(3)(c) | Automatische Ueberpruefung der Integritaet von Software, Firmware und Konfigurationsdaten bei Start und Laufzeit. Hash-basierte Validierung und digitale Signaturen. | A.8.24 |
+
+### Kategorie 2 — Authentifizierung und Zugriffskontrolle
+
+Controls zur Sicherstellung, dass nur autorisierte Personen und Systeme Zugriff erhalten.
+
+| # | Control | CRA-Referenz | Beschreibung | ISO 27001 Mapping |
+|---|---------|-------------|-------------|-------------------|
+| 7 | **Starke Authentifizierung** | Annex I, 1(3)(d) | Implementierung sicherer Authentifizierungsmechanismen. Multi-Faktor-Authentifizierung fuer administrative Zugriffe. Unterstuetzung moderner Standards (FIDO2, WebAuthn). | A.8.5 |
+| 8 | **Keine Default-Passwoerter** | Annex I, 1(3)(d) | Produkte duerfen keine universellen Standardpasswoerter verwenden. Jedes Geraet muss ein individuelles Passwort erhalten oder den Benutzer zur Aenderung bei Ersteinrichtung zwingen. | A.8.5 |
+| 9 | **Sicheres Credential-Management** | Annex I, 1(3)(d) | Zugangsdaten muessen verschluesselt gespeichert werden (bcrypt, Argon2id). Keine Klartextspeicherung. API-Keys und Tokens regelmaessig rotieren. | A.8.5 |
+| 10 | **Sitzungsmanagement** | Annex I, 1(3)(d) | Sichere Session-Verwaltung mit Timeout, Token-Binding und Session-Invalidierung bei Logout oder Passwortwechsel. CSRF-Schutz implementieren. | A.8.5 |
+| 11 | **Brute-Force-Schutz** | Annex I, 1(3)(d) | Schutz vor Brute-Force- und Credential-Stuffing-Angriffen durch Rate Limiting, Account Lockout und CAPTCHA-Mechanismen. | A.8.5, A.8.16 |
+| 12 | **Rollenbasierte Autorisierung** | Annex I, 1(3)(d) | Implementierung von RBAC (Role-Based Access Control). Trennung von administrativen und Nutzerfunktionen. Prinzip der geringsten Privilegien durchsetzen. | A.8.2, A.8.3 |
+
+### Kategorie 3 — Kryptografie und Datenschutz
+
+Controls zum Schutz von Daten durch kryptografische Verfahren.
+
+| # | Control | CRA-Referenz | Beschreibung | ISO 27001 Mapping |
+|---|---------|-------------|-------------|-------------------|
+| 13 | **Verschluesselung sensibler Daten** | Annex I, 1(3)(e) | Alle sensiblen Daten muessen verschluesselt werden — sowohl bei der Speicherung (at rest, AES-256) als auch bei der Uebertragung (in transit, TLS 1.2+). | A.8.24 |
+| 14 | **Speicher-Schutz (Data at Rest)** | Annex I, 1(3)(e) | Verschluesselung gespeicherter Daten auf Festplatten, in Datenbanken und Backups. Schluessel getrennt von Daten speichern. | A.8.24 |
+| 15 | **Transport-Schutz (Data in Transit)** | Annex I, 1(3)(e) | Alle Netzwerkkommunikation ueber TLS 1.2 oder hoeher. Veraltete Protokolle (SSL, TLS 1.0/1.1) deaktivieren. Certificate Pinning fuer kritische Verbindungen. | A.8.24 |
+| 16 | **Sicheres Schluesselmanagement** | Annex I, 1(3)(e) | Kryptografische Schluessel in HSM oder Vault speichern. Regelmaessige Rotation (mind. jaehrlich). Dokumentation der Schluessel-Lebenszyklen. Sofortige Rotation bei Kompromittierungsverdacht. | A.8.24 |
+| 17 | **Datenminimierung** | Annex I, 1(3)(f) | Nur Daten erfassen und verarbeiten, die fuer die Produktfunktion erforderlich sind. Personenbezogene Daten gemaess DSGVO-Grundsaetzen behandeln. | A.8.10, A.8.11 |
+
+### Kategorie 4 — Secure Software Development Lifecycle
+
+Controls fuer sichere Softwareentwicklung ueber den gesamten Lebenszyklus.
+
+| # | Control | CRA-Referenz | Beschreibung | ISO 27001 Mapping |
+|---|---------|-------------|-------------|-------------------|
+| 18 | **Strukturierter SSDLC** | Annex I, 1(1) | Implementierung eines formalen Secure Software Development Lifecycle mit definierten Security Gates in jeder Phase (Requirements, Design, Implementation, Test, Release). | A.8.25, A.8.26 |
+| 19 | **Systematische Code Reviews** | Annex I, 1(1) | Peer Reviews mit Security-Fokus fuer jeden Code-Commit. Einsatz von Checklisten fuer OWASP Top 10 und CWE Top 25. Security Champions in jedem Entwicklerteam. | A.8.25 |
+| 20 | **Automatisierte Sicherheitstests** | Annex I, 1(1) | Static Application Security Testing (SAST), Dynamic Application Security Testing (DAST) und Software Composition Analysis (SCA) in der CI/CD-Pipeline. Secrets Detection fuer eingebettete Zugangsdaten. | A.8.25 |
+| 21 | **Supply-Chain-Security** | Annex I, 1(5) | Systematische Pruefung aller Drittanbieter-Komponenten auf Schwachstellen und Lizenz-Compliance. Vertrauenswuerdigkeit von Lieferanten bewerten. | A.5.19, A.5.21 |
+| 22 | **Dependency-Monitoring** | Annex I, 1(5) | Kontinuierliche Ueberwachung aller Abhaengigkeiten auf bekannte Schwachstellen (CVE). Automatische Benachrichtigung bei neuen CVEs in verwendeten Bibliotheken. | A.8.8, A.8.25 |
+| 23 | **Software Bill of Materials (SBOM)** | Annex I, 1(5) | Fuer jedes Produkt ein maschinenlesbares SBOM fuehren (CycloneDX oder SPDX). Mindestens Top-Level-Abhaengigkeiten mit Name, Version und Lizenz dokumentieren. SBOM bei jedem Release aktualisieren. | A.8.25 |
+
+### Kategorie 5 — Logging, Monitoring und Anomalie-Erkennung
+
+Controls zur Erkennung und Nachverfolgung von Sicherheitsereignissen.
+
+| # | Control | CRA-Referenz | Beschreibung | ISO 27001 Mapping |
+|---|---------|-------------|-------------|-------------------|
+| 24 | **Security-Logging** | Annex I, 1(3)(g) | Protokollierung aller sicherheitsrelevanten Ereignisse: Login-Versuche, Berechtigungsaenderungen, administrative Aktionen, API-Zugriffe, Fehler und Ausnahmen. Logs muessen Zeitstempel, Akteur, Aktion und Ergebnis enthalten. | A.8.15 |
+| 25 | **Ereignis-Monitoring** | Annex I, 1(3)(g) | Zentrale Sammlung und Echtzeit-Ueberwachung sicherheitsrelevanter Events. Einsatz eines SIEM-Systems oder vergleichbarer Loesung. Korrelation von Events aus verschiedenen Quellen. | A.8.16 |
+| 26 | **Anomalie-Erkennung** | Annex I, 1(3)(g) | Automatische Erkennung von Angriffsmustern und ungewoehnlichem Verhalten. Alarmierung bei Abweichungen von Baseline-Verhalten. Integration von Threat Intelligence Feeds. | A.8.16 |
+| 27 | **Log-Integritaet und -Aufbewahrung** | Annex I, 1(3)(g) | Logs muessen manipulationssicher gespeichert werden (append-only, signiert oder WORM). Aufbewahrung mindestens 12 Monate. Zugriff auf Logs nur fuer autorisiertes Security-Personal. | A.8.15 |
+
+### Kategorie 6 — Update- und Patch-Management
+
+Controls fuer die sichere Bereitstellung und Installation von Updates.
+
+| # | Control | CRA-Referenz | Beschreibung | ISO 27001 Mapping |
+|---|---------|-------------|-------------|-------------------|
+| 28 | **Sichere Update-Mechanismen** | Annex I, 1(4) | Updates muessen ueber sichere Kanaele verteilt werden (HTTPS, signierte Pakete). Automatische oder einfach zugaengliche Update-Moeglichkeit fuer Endnutzer. Rollback-Faehigkeit bei fehlerhaften Updates. | A.8.8, A.8.19 |
+| 29 | **Update-Authentizitaet** | Annex I, 1(4) | Alle Updates muessen digital signiert sein. Signaturpruefung vor Installation erzwingen. Verwendung vertrauenswuerdiger Signaturschluessel mit dokumentierter Key Ceremony. | A.8.24 |
+| 30 | **Update-Integritaet** | Annex I, 1(4) | Integritaetspruefung jedes Update-Pakets vor und nach Installation (Hash-Vergleich, Signatur-Verifikation). Manipulation waehrend der Uebertragung erkennen und ablehnen. | A.8.24 |
+| 31 | **Lifecycle-Support** | Annex I, 1(4) | Security-Updates waehrend des gesamten erwarteten Produktlebenszyklus bereitstellen — mindestens **5 Jahre** ab Inverkehrbringen oder die erwartete Nutzungsdauer, je nachdem welcher Zeitraum laenger ist. End-of-Life klar kommunizieren. | A.8.8 |
+
+---
+
+## Teil 2: Anforderungen an die Schwachstellenbehandlung
+
+### Kategorie 7 — Vulnerability Management
+
+Controls fuer die systematische Identifikation, Bewertung und Behebung von Schwachstellen.
+
+| # | Control | CRA-Referenz | Beschreibung | ISO 27001 Mapping |
+|---|---------|-------------|-------------|-------------------|
+| 32 | **Schwachstellen-Identifikation** | Annex I, 2(1) | Kontinuierliches CVE-Monitoring aller eingesetzten Komponenten. Regelmaessige Vulnerability Scans (woechentlich automatisiert). Bug-Bounty-Programme oder Responsible-Disclosure-Kanaele einrichten. | A.8.8 |
+| 33 | **SBOM-Pflege und Analyse** | Annex I, 2(1) | SBOM aktuell halten und kontinuierlich gegen CVE-Datenbanken pruefen. Automatische Alarmierung bei neu entdeckten Schwachstellen in verwendeten Komponenten. | A.8.8, A.8.25 |
+| 34 | **Risikobasierte Priorisierung** | Annex I, 2(2) | Schwachstellen nach CVSS-Score und tatsaechlichem Risiko priorisieren. Reaktionszeiten nach Schweregrad: Kritisch (24–72h), Hoch (7 Tage), Mittel (30 Tage), Niedrig (naechster Zyklus). | A.8.8 |
+| 35 | **Coordinated Vulnerability Disclosure** | Annex I, 2(5) | Veroeffentlichung einer CVD-Policy mit klarem Meldeprozess. Kontaktadresse fuer Sicherheitsforscher bereitstellen. Eingangsbestaetigung innerhalb von 5 Werktagen. Koordinierte Veroeffentlichung nach Patch-Verfuegbarkeit. | A.5.5, A.5.6 |
+
+### Kategorie 8 — Incident Response und Meldepflichten
+
+Controls fuer die Erkennung, Behandlung und Meldung von Sicherheitsvorfaellen.
+
+| # | Control | CRA-Referenz | Beschreibung | ISO 27001 Mapping |
+|---|---------|-------------|-------------|-------------------|
+| 36 | **Incident-Response-Prozess** | Annex I, 2(5) | Dokumentierter Prozess mit definierten Phasen: Detection → Classification → Containment → Investigation → Recovery → Reporting → Lessons Learned. Regelmaessige Uebungen (Tabletop Exercises). | A.5.24, A.5.25, A.5.26 |
+| 37 | **Fruehwarnung (24h)** | Annex I, 2(7) + Art. 14(2)(a) | Bei aktiv ausgenutzten Schwachstellen oder schweren Vorfaellen: Fruehwarnung an ENISA und/oder zustaendige nationale Behoerde innerhalb von **24 Stunden** nach Kenntniserlangung. | A.5.24, A.5.26 |
+| 38 | **Detaillierter Vorfallsbericht (72h)** | Annex I, 2(7) + Art. 14(2)(b) | Innerhalb von **72 Stunden**: Detaillierter Bericht mit Umfang, Auswirkung, Ursachenanalyse und eingeleiteten Gegenmassnahmen. Bei personenbezogenen Daten zusaetzlich Art. 33/34 DSGVO beachten. | A.5.24, A.5.26 |
+| 39 | **Patch-Bereitstellung** | Annex I, 2(3) | Patches fuer gemeldete und bestaetigte Schwachstellen so schnell wie moeglich bereitstellen. Sicherheitshinweise (Security Advisories) an Kunden veroeffentlichen. CSAF-Format fuer maschinenlesbare Advisories empfohlen. | A.8.8 |
+| 40 | **Dokumentation und Nachbereitung** | Annex I, 2(6) | Alle Schwachstellen und Vorfaelle lueckenlos dokumentieren und fuer mindestens 10 Jahre aufbewahren. Lessons-Learned-Prozess nach jedem bedeutenden Vorfall. Ergebnisse in Risikobewertung einfliessen lassen. | A.5.27 |
+
+---
+
+## Produktklassifizierung nach CRA
+
+Der CRA unterscheidet drei Produktkategorien mit unterschiedlichen Konformitaetsanforderungen:
+
+### Standardprodukte (Default)
+
+**Beispiele:** einfache Apps, Desktop-Software, Spiele, Foto-Editoren
+
+- **Konformitaetsbewertung:** Selbstbewertung (Modul A)
+- **Anforderungen:** Alle Annex-I-Anforderungen, aber einfachster Nachweis
+- **Betrifft:** ca. 90% aller Produkte
+
+### Wichtige Produkte (Annex III) — Klasse I
+
+**Beispiele:** Passwort-Manager, VPN-Software, Firewalls, Router, Smart-Home-Systeme, IoT-Geraete mit Sensorfunktion, SIEM-Systeme
+
+- **Konformitaetsbewertung:** Harmonisierte Standards oder Drittanbieter-Bewertung
+- **Anforderungen:** Alle Annex-I-Anforderungen + erhoehte Nachweispflichten
+- **Betrifft:** ca. 8% aller Produkte
+
+### Wichtige Produkte — Klasse II
+
+**Beispiele:** Betriebssysteme, Hypervisoren, Container-Runtimes, Public-Key-Infrastruktur, industrielle Steuerungssysteme (ICS/SCADA)
+
+- **Konformitaetsbewertung:** Verpflichtende Drittanbieter-Bewertung durch benannte Stelle
+- **Anforderungen:** Alle Annex-I-Anforderungen + strengste Nachweispflichten
+- **Betrifft:** ca. 2% aller Produkte
+
+### Kritische Produkte (Annex IV)
+
+**Beispiele:** Hardware-Security-Module (HSM), Smartcard-Chips, Secure Elements, Smart-Meter-Gateways
+
+- **Konformitaetsbewertung:** Europaeisches Cybersicherheitszertifikat erforderlich (EUCC)
+- **Anforderungen:** Hoechste Stufe — europaeische Zertifizierung obligatorisch
+
+---
+
+## Zuordnung der Controls zu Dokumenten
+
+Diese 40 Controls koennen automatisiert zu folgenden Compliance-Dokumenten fuehren:
+
+| Dokument | Controls | Beschreibung |
+|----------|----------|-------------|
+| **Cybersecurity Policy** | 1–40 | Uebergreifendes Grundsatzdokument fuer Cybersicherheit |
+| **Secure Development Policy** | 18–23 | Richtlinie fuer den sicheren Entwicklungsprozess (SSDLC) |
+| **Vulnerability Management Policy** | 32–35, 39 | CVD, Patching, SBOM-Analyse |
+| **Incident Response Plan** | 36–38, 40 | 24h/72h Meldung, Eskalation, Nachbereitung |
+| **Access Control Policy** | 7–12 | Authentifizierung, Autorisierung, Passwort-Richtlinie |
+| **Cryptographic Policy** | 13–17 | Verschluesselung, Schluesselmanagement, Datenschutz |
+| **Update/Patch Policy** | 28–31 | Update-Mechanismen, Signierung, Lifecycle-Support |
+| **Logging & Monitoring Policy** | 24–27 | Security-Logging, SIEM, Anomalie-Erkennung |
+
+---
+
+## Zeitplan fuer die Umsetzung
+
+| Datum | Meilenstein |
+|-------|------------|
+| 10.12.2024 | CRA in Kraft getreten |
+| 11.06.2026 | Konformitaetsbewertungsstellen muessen benannt sein |
+| 11.09.2026 | **Meldepflichten aktiv** (Controls 37, 38) |
+| 11.12.2027 | **Volle Anwendung** — alle 40 Controls muessen umgesetzt sein, CE-Kennzeichnung erforderlich |
+
+---
+
+## Sanktionen bei Nicht-Einhaltung
+
+| Verstoss | Maximales Bussgeld |
+|----------|-------------------|
+| Wesentliche Anforderungen (Annex I) | 15 Mio. EUR oder 2,5% des weltweiten Jahresumsatzes |
+| Sonstige Pflichten | 10 Mio. EUR oder 2% des weltweiten Jahresumsatzes |
+| Falsche/unvollstaendige Informationen | 5 Mio. EUR oder 1% des weltweiten Jahresumsatzes |',
+    legal_refs = ARRAY['Annex I CRA', 'Annex III CRA', 'Annex IV CRA', 'Art. 13 CRA', 'Art. 14 CRA', 'Art. 15 CRA', 'Art. 64 CRA', '(EU) 2024/2847'],
+    tags = ARRAY['security-controls', 'annex-i', 'secure-by-design', 'authentifizierung', 'kryptografie', 'sbom', 'vulnerability', 'patching', 'incident-response', 'produktklassifizierung', 'iso-27001', 'ssdlc'],
+    relevance = 'critical',
+    updated_at = NOW()
+WHERE id = 'cra-security-controls';
+
+-- ============================================================================
+-- 2) Neuer Artikel: CRA-Konformitaetsbewertung — Praktischer Leitfaden
+-- ============================================================================
+INSERT INTO compliance_wiki_articles (id, category_id, title, summary, content, legal_refs, tags, relevance, source_urls) VALUES
+('cra-konformitaet', 'cra',
+ 'CRA-Konformitaetsbewertung — Praktischer Leitfaden',
+ 'Schritt-fuer-Schritt-Anleitung zur CRA-Konformitaetsbewertung: Produktklassifizierung, Dokumentation, Self-Assessment vs. Drittanbieter-Pruefung, CE-Kennzeichnung.',
+ '## Ueberblick
+
+Jeder Hersteller muss vor dem Inverkehrbringen eine **Konformitaetsbewertung** durchfuehren, um nachzuweisen, dass sein Produkt die Essential Cybersecurity Requirements (Annex I) erfuellt. Der Aufwand haengt von der Produktkategorie ab.
+
+## Schritt 1: Produkt klassifizieren
+
+Bestimmen Sie, ob Ihr Produkt unter eine der Sonderkategorien faellt:
+
+### Entscheidungsbaum
+
+```
+Ist das Produkt in Annex IV gelistet?
+  → Ja: Kritisches Produkt → Europaeische Zertifizierung (EUCC)
+  → Nein: Weiter
+
+Ist das Produkt in Annex III, Klasse II gelistet?
+  → Ja: Wichtig Klasse II → Drittanbieter-Bewertung (Pflicht)
+  → Nein: Weiter
+
+Ist das Produkt in Annex III, Klasse I gelistet?
+  → Ja: Wichtig Klasse I → Harmonisierte Standards ODER Drittanbieter
+  → Nein: Standardprodukt → Selbstbewertung (Modul A)
+```
+
+## Schritt 2: Cybersecurity-Risikobewertung
+
+Fuehren Sie eine systematische Risikoanalyse durch:
+
+1. **Assets identifizieren** — Welche Daten verarbeitet das Produkt? Welche Schnittstellen hat es?
+2. **Bedrohungen analysieren** — STRIDE-Methodik oder vergleichbar anwenden
+3. **Schwachstellen bewerten** — Bekannte CVEs, Design-Schwaechen, Konfigurationsfehler
+4. **Risiken priorisieren** — Eintrittswahrscheinlichkeit × Auswirkung
+5. **Massnahmen definieren** — Welche Controls aus Annex I adressieren welches Risiko?
+
+## Schritt 3: Controls implementieren
+
+Setzen Sie die relevanten Controls aus den 8 Kategorien um (siehe Artikel „CRA Annex I — Essential Cybersecurity Requirements"). Dokumentieren Sie fuer jeden Control:
+
+- **Status**: Implementiert / In Bearbeitung / Nicht anwendbar
+- **Nachweis**: Wie wird die Umsetzung belegt? (Code, Konfiguration, Test, Policy)
+- **Verantwortlich**: Wer ist zustaendig?
+
+## Schritt 4: Technische Dokumentation
+
+Die technische Dokumentation muss enthalten:
+
+- Beschreibung des Produkts und seiner Funktionen
+- Cybersecurity-Risikobewertung
+- Angewandte harmonisierte Normen
+- Nachweis der Einhaltung jeder Annex-I-Anforderung
+- SBOM (Software Bill of Materials)
+- Informationen zum Support-Zeitraum
+
+## Schritt 5: Konformitaetserklaerung und CE-Kennzeichnung
+
+Nach erfolgreicher Bewertung:
+
+1. **EU-Konformitaetserklaerung** ausstellen
+2. **CE-Kennzeichnung** anbringen
+3. **Dokumentation** mindestens 10 Jahre aufbewahren
+4. Produkt darf in der EU vertrieben werden
+
+## Haeufige Fehler
+
+| Fehler | Konsequenz |
+|--------|-----------|
+| Default-Passwoerter nicht entfernt | Verstoss gegen Annex I, 1(3)(d) |
+| Kein SBOM erstellt | Verstoss gegen Annex I, 1(5) |
+| Kein Update-Mechanismus | Verstoss gegen Annex I, 1(4) |
+| Keine CVD-Policy | Verstoss gegen Annex I, 2(5) |
+| Support-Zeitraum nicht definiert | Verstoss gegen Art. 13(8) |
+
+## Empfehlung
+
+Nutzen Sie die **BreakPilot Compliance SDK Control Library**, um den Umsetzungsstand Ihrer CRA-Controls systematisch zu tracken und automatisiert Nachweise zu generieren.',
+ ARRAY['Annex I CRA', 'Annex II CRA', 'Annex III CRA', 'Annex IV CRA', 'Annex V CRA', 'Art. 13 CRA', 'Art. 24 CRA', 'Art. 25 CRA', 'Art. 26 CRA', 'Art. 27 CRA'],
+ ARRAY['konformitaet', 'ce-kennzeichnung', 'self-assessment', 'technische-dokumentation', 'sbom', 'risikobewertung'],
+ 'important',
+ ARRAY['https://eur-lex.europa.eu/eli/reg/2024/2847/oj/eng'])
+ON CONFLICT (id) DO NOTHING;
@@ -0,0 +1,120 @@
+-- Migration 060: Multi-Layer Control Architecture — DB Schema
+-- Adds obligation_extractions, control_patterns, and crosswalk_matrix tables.
+-- Extends canonical_controls with pattern_id and obligation_ids columns.
+--
+-- Part of the Multi-Layer Control Architecture (Phase 1 of 8).
+-- See: Legal Source → Obligation → Control Pattern → Master Control → Customer Instance
+
+-- =============================================================================
+-- 1. Obligation Extractions
+--    Tracks how each RAG chunk was linked to an obligation (exact, embedding, LLM).
+-- =============================================================================
+
+CREATE TABLE IF NOT EXISTS obligation_extractions (
+    id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
+    chunk_hash VARCHAR(64) NOT NULL,
+    collection VARCHAR(100) NOT NULL,
+    regulation_code VARCHAR(100) NOT NULL,
+    article VARCHAR(100),
+    paragraph VARCHAR(100),
+    obligation_id VARCHAR(50),
+    obligation_text TEXT,
+    confidence NUMERIC(3,2) CHECK (confidence >= 0 AND confidence <= 1),
+    extraction_method VARCHAR(30) NOT NULL
+        CHECK (extraction_method IN ('exact_match', 'embedding_match', 'llm_extracted', 'inferred')),
+    pattern_id VARCHAR(50),
+    pattern_match_score NUMERIC(3,2) CHECK (pattern_match_score >= 0 AND pattern_match_score <= 1),
+    control_uuid UUID REFERENCES canonical_controls(id),
+    job_id UUID REFERENCES canonical_generation_jobs(id),
+    created_at TIMESTAMPTZ DEFAULT NOW()
+);
+
+CREATE INDEX IF NOT EXISTS idx_oe_obligation ON obligation_extractions(obligation_id);
+CREATE INDEX IF NOT EXISTS idx_oe_pattern ON obligation_extractions(pattern_id);
+CREATE INDEX IF NOT EXISTS idx_oe_control ON obligation_extractions(control_uuid);
+CREATE INDEX IF NOT EXISTS idx_oe_regulation ON obligation_extractions(regulation_code);
+CREATE INDEX IF NOT EXISTS idx_oe_chunk ON obligation_extractions(chunk_hash);
+CREATE INDEX IF NOT EXISTS idx_oe_method ON obligation_extractions(extraction_method);
+
+COMMENT ON TABLE obligation_extractions IS
+    'Tracks chunk-to-obligation linkage from the 3-tier extraction pipeline (exact/embedding/LLM)';
+
+-- =============================================================================
+-- 2. Control Patterns Registry
+--    DB mirror of the YAML pattern library for SQL queries and joins.
+-- =============================================================================
+
+CREATE TABLE IF NOT EXISTS control_patterns (
+    id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
+    pattern_id VARCHAR(50) UNIQUE NOT NULL,
+    name VARCHAR(255) NOT NULL,
+    name_de VARCHAR(255),
+    domain VARCHAR(10) NOT NULL,
+    category VARCHAR(50),
+    description TEXT,
+    template_objective TEXT,
+    template_rationale TEXT,
+    template_requirements JSONB DEFAULT '[]',
+    template_test_procedure JSONB DEFAULT '[]',
+    template_evidence JSONB DEFAULT '[]',
+    severity_default VARCHAR(20)
+        CHECK (severity_default IN ('low', 'medium', 'high', 'critical')),
+    implementation_effort_default VARCHAR(2)
+        CHECK (implementation_effort_default IN ('s', 'm', 'l', 'xl')),
+    obligation_match_keywords JSONB DEFAULT '[]',
+    tags JSONB DEFAULT '[]',
+    open_anchor_refs JSONB DEFAULT '[]',
+    composable_with JSONB DEFAULT '[]',
+    version VARCHAR(10) DEFAULT '1.0',
+    created_at TIMESTAMPTZ DEFAULT NOW(),
+    updated_at TIMESTAMPTZ DEFAULT NOW()
+);
+
+CREATE INDEX IF NOT EXISTS idx_cp_domain ON control_patterns(domain);
+CREATE INDEX IF NOT EXISTS idx_cp_category ON control_patterns(category);
+CREATE INDEX IF NOT EXISTS idx_cp_pattern_id ON control_patterns(pattern_id);
+
+COMMENT ON TABLE control_patterns IS
+    'Registry of control patterns (DB mirror of YAML library). Pattern ID format: CP-{DOMAIN}-{NNN}';
+
+-- =============================================================================
+-- 3. Crosswalk Matrix
+--    The "golden thread" from legal source through to implementation.
+-- =============================================================================
+
+CREATE TABLE IF NOT EXISTS crosswalk_matrix (
+    id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
+    regulation_code VARCHAR(100) NOT NULL,
+    article VARCHAR(100),
+    paragraph VARCHAR(100),
+    obligation_id VARCHAR(50),
+    pattern_id VARCHAR(50),
+    master_control_id VARCHAR(20),
+    master_control_uuid UUID REFERENCES canonical_controls(id),
+    tom_control_id VARCHAR(30),
+    confidence NUMERIC(3,2) CHECK (confidence >= 0 AND confidence <= 1),
+    source VARCHAR(30) DEFAULT 'auto'
+        CHECK (source IN ('manual', 'auto', 'migrated')),
+    created_at TIMESTAMPTZ DEFAULT NOW()
+);
+
+CREATE INDEX IF NOT EXISTS idx_cw_regulation ON crosswalk_matrix(regulation_code, article);
+CREATE INDEX IF NOT EXISTS idx_cw_obligation ON crosswalk_matrix(obligation_id);
+CREATE INDEX IF NOT EXISTS idx_cw_pattern ON crosswalk_matrix(pattern_id);
+CREATE INDEX IF NOT EXISTS idx_cw_control ON crosswalk_matrix(master_control_id);
+CREATE INDEX IF NOT EXISTS idx_cw_tom ON crosswalk_matrix(tom_control_id);
+
+COMMENT ON TABLE crosswalk_matrix IS
+    'Golden thread: regulation → article → obligation → pattern → master control → TOM';
+
+-- =============================================================================
+-- 4. Extend canonical_controls with pattern + obligation linkage
+-- =============================================================================
+
+ALTER TABLE canonical_controls
+    ADD COLUMN IF NOT EXISTS pattern_id VARCHAR(50);
+
+ALTER TABLE canonical_controls
+    ADD COLUMN IF NOT EXISTS obligation_ids JSONB DEFAULT '[]';
+
+CREATE INDEX IF NOT EXISTS idx_cc_pattern ON canonical_controls(pattern_id);
@@ -0,0 +1,49 @@
+-- Migration 061: Obligation Candidates + Decomposition Tracking
+-- Supports Pass 0a (Obligation Extraction from Rich Controls) and
+-- Pass 0b (Atomic Control Composition).
+--
+-- Part of the Multi-Layer Control Architecture — Decomposition Pass.
+
+-- =============================================================================
+-- 1. Obligation Candidates
+--    Individual normative obligations extracted from Rich Controls (Pass 0a).
+-- =============================================================================
+
+CREATE TABLE IF NOT EXISTS obligation_candidates (
+    id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
+    parent_control_uuid UUID NOT NULL REFERENCES canonical_controls(id),
+    candidate_id VARCHAR(30) NOT NULL,
+    obligation_text TEXT NOT NULL,
+    action VARCHAR(500),
+    object TEXT,
+    condition TEXT,
+    normative_strength VARCHAR(20) DEFAULT 'must'
+        CHECK (normative_strength IN ('must', 'should', 'may')),
+    is_test_obligation BOOLEAN DEFAULT FALSE,
+    is_reporting_obligation BOOLEAN DEFAULT FALSE,
+    extraction_confidence NUMERIC(3,2) DEFAULT 0.0
+        CHECK (extraction_confidence >= 0 AND extraction_confidence <= 1),
+    quality_flags JSONB DEFAULT '{}',
+    release_state VARCHAR(30) DEFAULT 'extracted'
+        CHECK (release_state IN ('extracted', 'validated', 'rejected', 'composed')),
+    created_at TIMESTAMPTZ DEFAULT NOW()
+);
+
+CREATE INDEX IF NOT EXISTS idx_oc_parent ON obligation_candidates(parent_control_uuid);
+CREATE INDEX IF NOT EXISTS idx_oc_state ON obligation_candidates(release_state);
+CREATE INDEX IF NOT EXISTS idx_oc_candidate ON obligation_candidates(candidate_id);
+
+COMMENT ON TABLE obligation_candidates IS
+    'Individual normative obligations extracted from Rich Controls via Pass 0a decomposition';
+
+-- =============================================================================
+-- 2. Extend canonical_controls for decomposition tracking
+-- =============================================================================
+
+ALTER TABLE canonical_controls
+    ADD COLUMN IF NOT EXISTS parent_control_uuid UUID REFERENCES canonical_controls(id);
+
+ALTER TABLE canonical_controls
+    ADD COLUMN IF NOT EXISTS decomposition_method VARCHAR(30);
+
+CREATE INDEX IF NOT EXISTS idx_cc_parent ON canonical_controls(parent_control_uuid);
@@ -0,0 +1,22 @@
+-- Migration 062: Add pipeline_version to track which generation rules produced each control/chunk
+--
+-- v1 = Original pipeline (local LLM prefilter, old prompt without null-skip)
+-- v2 = Improved pipeline (skip_prefilter, Anthropic decides relevance, annexes protected)
+--
+-- This allows identifying controls that may need reprocessing when pipeline rules change.
+
+ALTER TABLE canonical_controls
+    ADD COLUMN IF NOT EXISTS pipeline_version smallint NOT NULL DEFAULT 1;
+
+ALTER TABLE canonical_processed_chunks
+    ADD COLUMN IF NOT EXISTS pipeline_version smallint NOT NULL DEFAULT 1;
+
+-- Index for efficient querying by version
+CREATE INDEX IF NOT EXISTS idx_canonical_controls_pipeline_version
+    ON canonical_controls (pipeline_version);
+
+CREATE INDEX IF NOT EXISTS idx_canonical_processed_chunks_pipeline_version
+    ON canonical_processed_chunks (pipeline_version);
+
+COMMENT ON COLUMN canonical_controls.pipeline_version IS 'Generation pipeline version: 1=original (local prefilter), 2=improved (Anthropic decides relevance, annexes protected)';
+COMMENT ON COLUMN canonical_processed_chunks.pipeline_version IS 'Pipeline version used when this chunk was processed';
@@ -0,0 +1,23 @@
+-- Migration 063: Scoped Control Applicability
+--
+-- Adds 3 new JSONB columns to canonical_controls for filtering controls
+-- based on customer industry, company size, and compliance scope.
+--
+-- v3 pipeline generates these fields automatically via LLM.
+-- Old controls (v1/v2) will be backfilled separately.
+
+ALTER TABLE canonical_controls
+    ADD COLUMN IF NOT EXISTS applicable_industries JSONB DEFAULT NULL,
+    ADD COLUMN IF NOT EXISTS applicable_company_size JSONB DEFAULT NULL,
+    ADD COLUMN IF NOT EXISTS scope_conditions JSONB DEFAULT NULL;
+
+-- GIN index for JSONB containment queries (e.g. applicable_industries @> '"Telekommunikation"')
+CREATE INDEX IF NOT EXISTS idx_cc_applicable_industries
+    ON canonical_controls USING gin (applicable_industries);
+
+CREATE INDEX IF NOT EXISTS idx_cc_applicable_company_size
+    ON canonical_controls USING gin (applicable_company_size);
+
+COMMENT ON COLUMN canonical_controls.applicable_industries IS 'Industries this control applies to, e.g. ["all"] or ["Telekommunikation", "Energie"]. NULL = not yet classified.';
+COMMENT ON COLUMN canonical_controls.applicable_company_size IS 'Company sizes this control applies to, e.g. ["all"] or ["medium", "large", "enterprise"]. NULL = not yet classified.';
+COMMENT ON COLUMN canonical_controls.scope_conditions IS 'Optional scope conditions, e.g. {"requires_any": ["uses_ai"], "description": "..."}. NULL = no conditions.';
@@ -0,0 +1,105 @@
+-- Migration 064: VVT Master Libraries — 8 global reference tables
+-- These are shared across all tenants (no tenant_id).
+
+BEGIN;
+
+-- 1. Data Subjects (Betroffenenkategorien)
+CREATE TABLE IF NOT EXISTS vvt_lib_data_subjects (
+    id VARCHAR(50) PRIMARY KEY,
+    label_de VARCHAR(200) NOT NULL,
+    description_de TEXT,
+    art9_relevant BOOLEAN DEFAULT FALSE,
+    typical_for JSONB DEFAULT '[]'::jsonb,
+    sort_order INTEGER DEFAULT 0,
+    created_at TIMESTAMPTZ DEFAULT NOW()
+);
+
+-- 2. Data Categories (Datenkategorien — hierarchisch)
+CREATE TABLE IF NOT EXISTS vvt_lib_data_categories (
+    id VARCHAR(50) PRIMARY KEY,
+    parent_id VARCHAR(50) REFERENCES vvt_lib_data_categories(id) ON DELETE SET NULL,
+    label_de VARCHAR(200) NOT NULL,
+    description_de TEXT,
+    is_art9 BOOLEAN DEFAULT FALSE,
+    is_art10 BOOLEAN DEFAULT FALSE,
+    risk_weight INTEGER DEFAULT 1 CHECK (risk_weight BETWEEN 1 AND 5),
+    default_retention_rule VARCHAR(50),
+    default_legal_basis VARCHAR(50),
+    sort_order INTEGER DEFAULT 0,
+    created_at TIMESTAMPTZ DEFAULT NOW()
+);
+CREATE INDEX IF NOT EXISTS idx_vvt_lib_data_categories_parent ON vvt_lib_data_categories(parent_id);
+
+-- 3. Recipients (Empfaengerkategorien)
+CREATE TABLE IF NOT EXISTS vvt_lib_recipients (
+    id VARCHAR(50) PRIMARY KEY,
+    type VARCHAR(20) NOT NULL CHECK (type IN ('INTERNAL', 'PROCESSOR', 'CONTROLLER', 'AUTHORITY')),
+    label_de VARCHAR(200) NOT NULL,
+    description_de TEXT,
+    is_third_country BOOLEAN DEFAULT FALSE,
+    country VARCHAR(5),
+    sort_order INTEGER DEFAULT 0,
+    created_at TIMESTAMPTZ DEFAULT NOW()
+);
+
+-- 4. Legal Bases (Rechtsgrundlagen)
+CREATE TABLE IF NOT EXISTS vvt_lib_legal_bases (
+    id VARCHAR(50) PRIMARY KEY,
+    article VARCHAR(50) NOT NULL,
+    type VARCHAR(30) NOT NULL CHECK (type IN ('CONSENT', 'CONTRACT', 'LEGAL_OBLIGATION', 'VITAL_INTEREST', 'PUBLIC_TASK', 'LEGITIMATE_INTEREST', 'ART9', 'NATIONAL')),
+    label_de VARCHAR(300) NOT NULL,
+    description_de TEXT,
+    is_art9 BOOLEAN DEFAULT FALSE,
+    typical_national_law VARCHAR(100),
+    sort_order INTEGER DEFAULT 0,
+    created_at TIMESTAMPTZ DEFAULT NOW()
+);
+
+-- 5. Retention Rules (Aufbewahrungsfristen)
+CREATE TABLE IF NOT EXISTS vvt_lib_retention_rules (
+    id VARCHAR(50) PRIMARY KEY,
+    label_de VARCHAR(300) NOT NULL,
+    description_de TEXT,
+    legal_basis VARCHAR(200),
+    duration INTEGER NOT NULL,
+    duration_unit VARCHAR(10) NOT NULL CHECK (duration_unit IN ('DAYS', 'MONTHS', 'YEARS')),
+    start_event VARCHAR(200),
+    deletion_procedure VARCHAR(500),
+    sort_order INTEGER DEFAULT 0,
+    created_at TIMESTAMPTZ DEFAULT NOW()
+);
+
+-- 6. Transfer Mechanisms (Uebermittlungsmechanismen)
+CREATE TABLE IF NOT EXISTS vvt_lib_transfer_mechanisms (
+    id VARCHAR(50) PRIMARY KEY,
+    label_de VARCHAR(300) NOT NULL,
+    description_de TEXT,
+    article VARCHAR(50),
+    requires_tia BOOLEAN DEFAULT FALSE,
+    sort_order INTEGER DEFAULT 0,
+    created_at TIMESTAMPTZ DEFAULT NOW()
+);
+
+-- 7. Purposes (Verarbeitungszwecke)
+CREATE TABLE IF NOT EXISTS vvt_lib_purposes (
+    id VARCHAR(50) PRIMARY KEY,
+    label_de VARCHAR(300) NOT NULL,
+    description_de TEXT,
+    typical_legal_basis VARCHAR(50),
+    typical_for JSONB DEFAULT '[]'::jsonb,
+    sort_order INTEGER DEFAULT 0,
+    created_at TIMESTAMPTZ DEFAULT NOW()
+);
+
+-- 8. TOMs (Technisch-Organisatorische Massnahmen)
+CREATE TABLE IF NOT EXISTS vvt_lib_toms (
+    id VARCHAR(50) PRIMARY KEY,
+    category VARCHAR(30) NOT NULL CHECK (category IN ('accessControl', 'confidentiality', 'integrity', 'availability', 'separation')),
+    label_de VARCHAR(300) NOT NULL,
+    description_de TEXT,
+    art32_reference VARCHAR(100),
+    sort_order INTEGER DEFAULT 0,
+    created_at TIMESTAMPTZ DEFAULT NOW()
+);
+
+COMMIT;
@@ -0,0 +1,200 @@
+-- Migration 065: VVT Library Seed Data (~150 entries)
+-- All content self-authored, MIT-compatible.
+
+BEGIN;
+
+-- =============================================================================
+-- Data Subjects (15)
+-- =============================================================================
+INSERT INTO vvt_lib_data_subjects (id, label_de, description_de, art9_relevant, typical_for, sort_order) VALUES
+('EMPLOYEES', 'Beschaeftigte', 'Aktuelle Mitarbeiterinnen und Mitarbeiter', FALSE, '["hr","it_operations"]', 1),
+('APPLICANTS', 'Bewerber', 'Stellenbewerberinnen und -bewerber', FALSE, '["hr"]', 2),
+('CUSTOMERS', 'Kunden', 'Aktive Kundinnen und Kunden', FALSE, '["sales_crm","support","finance"]', 3),
+('PROSPECTIVE_CUSTOMERS', 'Interessenten', 'Potenzielle Kundinnen und Kunden', FALSE, '["marketing","sales_crm"]', 4),
+('SUPPLIERS', 'Lieferanten', 'Geschaeftspartner als Lieferanten', FALSE, '["finance"]', 5),
+('BUSINESS_PARTNERS', 'Geschaeftspartner', 'Kooperationspartner, Berater, Dienstleister', FALSE, '["management","finance"]', 6),
+('VISITORS', 'Besucher', 'Betriebsbesucher und Gaeste', FALSE, '["management"]', 7),
+('WEBSITE_USERS', 'Website-Nutzer', 'Besucher der Unternehmenswebsite', FALSE, '["marketing","it_operations"]', 8),
+('APP_USERS', 'App-Nutzer', 'Nutzer mobiler Anwendungen', FALSE, '["product_engineering"]', 9),
+('NEWSLETTER_SUBSCRIBERS', 'Newsletter-Abonnenten', 'Empfaenger von Newslettern', FALSE, '["marketing"]', 10),
+('MEMBERS', 'Mitglieder', 'Vereins- oder Verbandsmitglieder', FALSE, '["management"]', 11),
+('PATIENTS', 'Patienten', 'Patientinnen und Patienten', TRUE, '["other"]', 12),
+('STUDENTS', 'Schueler/Studierende', 'Lernende in Bildungseinrichtungen', FALSE, '["other"]', 13),
+('MINORS', 'Minderjaehrige', 'Personen unter 16 Jahren (Art. 8 DSGVO)', FALSE, '["other"]', 14),
+('OTHER', 'Sonstige', 'Andere Betroffenenkategorien', FALSE, '[]', 15)
+ON CONFLICT (id) DO NOTHING;
+
+-- =============================================================================
+-- Data Categories — Parent categories (9)
+-- =============================================================================
+INSERT INTO vvt_lib_data_categories (id, parent_id, label_de, description_de, is_art9, is_art10, risk_weight, sort_order) VALUES
+('IDENTIFICATION', NULL, 'Identifikationsdaten', 'Daten zur Identifizierung natuerlicher Personen', FALSE, FALSE, 2, 1),
+('CONTACT_DATA', NULL, 'Kontaktdaten', 'Kommunikationsdaten und Adressen', FALSE, FALSE, 1, 2),
+('FINANCIAL', NULL, 'Finanzdaten', 'Bank-, Gehalts- und Zahlungsdaten', FALSE, FALSE, 3, 3),
+('EMPLOYMENT', NULL, 'Beschaeftigungsdaten', 'Arbeitsverhaeltnis und Qualifikation', FALSE, FALSE, 2, 4),
+('DIGITAL_IDENTITY', NULL, 'Digitale Identitaet', 'Online-Kennungen und Zugangsdaten', FALSE, FALSE, 2, 5),
+('COMMUNICATION', NULL, 'Kommunikationsdaten', 'Nachrichten und Vertragsdaten', FALSE, FALSE, 2, 6),
+('MEDIA', NULL, 'Medien- und Standortdaten', 'Bild, Video, Standort', FALSE, FALSE, 3, 7),
+('ART9_SPECIAL', NULL, 'Besondere Kategorien (Art. 9)', 'Besonders schuetzenswerte Daten', TRUE, FALSE, 5, 8),
+('ART10', NULL, 'Strafrechtliche Daten (Art. 10)', 'Daten ueber strafrechtliche Verurteilungen', FALSE, TRUE, 5, 9)
+ON CONFLICT (id) DO NOTHING;
+
+-- =============================================================================
+-- Data Categories — Child categories (26)
+-- =============================================================================
+INSERT INTO vvt_lib_data_categories (id, parent_id, label_de, description_de, is_art9, is_art10, risk_weight, default_retention_rule, default_legal_basis, sort_order) VALUES
+('NAME', 'IDENTIFICATION', 'Name', 'Vor- und Nachname, Geburtsname', FALSE, FALSE, 1, NULL, NULL, 10),
+('DOB', 'IDENTIFICATION', 'Geburtsdatum', 'Geburtstag und -ort', FALSE, FALSE, 2, NULL, NULL, 11),
+('ADDRESS', 'CONTACT_DATA', 'Anschrift', 'Wohn- und Postadresse', FALSE, FALSE, 1, NULL, NULL, 20),
+('CONTACT', 'CONTACT_DATA', 'Kontaktinformationen', 'Telefon, E-Mail, Fax', FALSE, FALSE, 1, NULL, NULL, 21),
+('ID_NUMBER', 'IDENTIFICATION', 'Ausweisnummer', 'Personalausweis-, Reisepassnummer', FALSE, FALSE, 3, NULL, NULL, 12),
+('SOCIAL_SECURITY', 'IDENTIFICATION', 'Sozialversicherungsnummer', 'SV-Nummer', FALSE, FALSE, 4, 'BDSG_35_DELETE', 'ART6_1C', 13),
+('TAX_ID', 'FINANCIAL', 'Steuer-ID', 'Steueridentifikationsnummer', FALSE, FALSE, 3, 'AO_147_10Y', 'ART6_1C', 30),
+('BANK_ACCOUNT', 'FINANCIAL', 'Bankverbindung', 'IBAN, BIC, Kontonummer', FALSE, FALSE, 3, 'HGB_257_10Y', 'ART6_1B', 31),
+('PAYMENT_DATA', 'FINANCIAL', 'Zahlungsdaten', 'Kreditkartendaten, Zahlungshistorie', FALSE, FALSE, 4, 'HGB_257_10Y', 'ART6_1B', 32),
+('SALARY_DATA', 'FINANCIAL', 'Gehaltsdaten', 'Brutto/Netto, Zulagen, Abzuege', FALSE, FALSE, 4, 'AO_147_10Y', 'BDSG_26', 33),
+('EMPLOYMENT_DATA', 'EMPLOYMENT', 'Arbeitsvertragsdaten', 'Vertragsdetails, Position, Abteilung', FALSE, FALSE, 2, 'HGB_257_10Y', 'BDSG_26', 40),
+('EDUCATION_DATA', 'EMPLOYMENT', 'Ausbildungsdaten', 'Zeugnisse, Qualifikationen, Zertifikate', FALSE, FALSE, 2, 'AGG_15_6M', 'BDSG_26', 41),
+('IP_ADDRESS', 'DIGITAL_IDENTITY', 'IP-Adresse', 'IPv4/IPv6 Adressen', FALSE, FALSE, 2, 'CUSTOM_90D', 'ART6_1F', 50),
+('DEVICE_ID', 'DIGITAL_IDENTITY', 'Geraete-ID', 'Browser-Fingerprint, Device-ID', FALSE, FALSE, 2, 'CUSTOM_14M', 'ART6_1A', 51),
+('LOGIN_DATA', 'DIGITAL_IDENTITY', 'Zugangsdaten', 'Benutzername, Passwort-Hash', FALSE, FALSE, 3, NULL, 'ART6_1B', 52),
+('USAGE_DATA', 'DIGITAL_IDENTITY', 'Nutzungsdaten', 'Klickverhalten, Seitenaufrufe, Sessions', FALSE, FALSE, 2, 'CUSTOM_14M', 'ART6_1A', 53),
+('COMMUNICATION_DATA', 'COMMUNICATION', 'Korrespondenz', 'E-Mails, Chat-Nachrichten, Briefe', FALSE, FALSE, 2, 'BGB_195_3Y', NULL, 60),
+('CONTRACT_DATA', 'COMMUNICATION', 'Vertragsdaten', 'Vertragsdetails, Bestellungen', FALSE, FALSE, 2, 'HGB_257_10Y', 'ART6_1B', 61),
+('PHOTO_VIDEO', 'MEDIA', 'Bild-/Videodaten', 'Fotos, Videos von Personen', FALSE, FALSE, 3, 'CONSENT_REVOKE', 'ART6_1A', 70),
+('LOCATION_DATA', 'MEDIA', 'Standortdaten', 'GPS-Koordinaten, Aufenthaltsorte', FALSE, FALSE, 3, 'CUSTOM_90D', 'ART6_1A', 71),
+('HEALTH_DATA', 'ART9_SPECIAL', 'Gesundheitsdaten', 'Krankheitsdaten, Atteste, Behinderung', TRUE, FALSE, 5, 'BDSG_35_DELETE', 'ART9_2H', 80),
+('GENETIC_DATA', 'ART9_SPECIAL', 'Genetische Daten', 'DNA-Analysen, genetische Merkmale', TRUE, FALSE, 5, 'BDSG_35_DELETE', 'ART9_2A', 81),
+('BIOMETRIC_DATA', 'ART9_SPECIAL', 'Biometrische Daten', 'Fingerabdruck, Gesichtserkennung', TRUE, FALSE, 5, 'BDSG_35_DELETE', 'ART9_2A', 82),
+('RACIAL_ETHNIC', 'ART9_SPECIAL', 'Rassische/ethnische Herkunft', 'Ethnische Zugehoerigkeit', TRUE, FALSE, 5, NULL, 'ART9_2A', 83),
+('POLITICAL_OPINIONS', 'ART9_SPECIAL', 'Politische Meinungen', 'Parteizugehoerigkeit, politische Haltung', TRUE, FALSE, 5, NULL, 'ART9_2A', 84),
+('RELIGIOUS_BELIEFS', 'ART9_SPECIAL', 'Religioese Ueberzeugungen', 'Konfession, religioese Praktiken', TRUE, FALSE, 5, NULL, 'ART9_2A', 85),
+('TRADE_UNION', 'ART9_SPECIAL', 'Gewerkschaftszugehoerigkeit', 'Mitgliedschaft in Gewerkschaften', TRUE, FALSE, 5, NULL, 'ART9_2A', 86),
+('SEX_LIFE', 'ART9_SPECIAL', 'Sexualleben/Orientierung', 'Sexuelle Orientierung', TRUE, FALSE, 5, NULL, 'ART9_2A', 87),
+('CRIMINAL_DATA', 'ART10', 'Strafrechtliche Daten', 'Verurteilungen, Straftaten, Fuehrungszeugnis', FALSE, TRUE, 5, 'BDSG_35_DELETE', 'BDSG_24', 90)
+ON CONFLICT (id) DO NOTHING;
+
+-- =============================================================================
+-- Legal Bases (12)
+-- =============================================================================
+INSERT INTO vvt_lib_legal_bases (id, article, type, label_de, description_de, is_art9, typical_national_law, sort_order) VALUES
+('ART6_1A', 'Art. 6 Abs. 1 lit. a', 'CONSENT', 'Einwilligung', 'Die betroffene Person hat ihre Einwilligung gegeben', FALSE, NULL, 1),
+('ART6_1B', 'Art. 6 Abs. 1 lit. b', 'CONTRACT', 'Vertragserfullung', 'Erforderlich fuer die Erfuellung eines Vertrags', FALSE, NULL, 2),
+('ART6_1C', 'Art. 6 Abs. 1 lit. c', 'LEGAL_OBLIGATION', 'Rechtliche Verpflichtung', 'Erforderlich zur Erfuellung einer rechtlichen Verpflichtung', FALSE, NULL, 3),
+('ART6_1D', 'Art. 6 Abs. 1 lit. d', 'VITAL_INTEREST', 'Lebenswichtige Interessen', 'Schutz lebenswichtiger Interessen', FALSE, NULL, 4),
+('ART6_1E', 'Art. 6 Abs. 1 lit. e', 'PUBLIC_TASK', 'Oeffentliches Interesse', 'Wahrnehmung einer Aufgabe im oeffentlichen Interesse', FALSE, NULL, 5),
+('ART6_1F', 'Art. 6 Abs. 1 lit. f', 'LEGITIMATE_INTEREST', 'Berechtigtes Interesse', 'Wahrung berechtigter Interessen des Verantwortlichen', FALSE, NULL, 6),
+('ART9_2A', 'Art. 9 Abs. 2 lit. a', 'ART9', 'Ausdrueckliche Einwilligung (Art. 9)', 'Ausdrueckliche Einwilligung fuer besondere Kategorien', TRUE, NULL, 7),
+('ART9_2B', 'Art. 9 Abs. 2 lit. b', 'ART9', 'Arbeitsrecht (Art. 9)', 'Erforderlich im Arbeitsrecht', TRUE, 'BDSG § 26', 8),
+('ART9_2H', 'Art. 9 Abs. 2 lit. h', 'ART9', 'Gesundheitsvorsorge (Art. 9)', 'Gesundheitsvorsorge oder Arbeitsmedizin', TRUE, NULL, 9),
+('BDSG_26', '§ 26 BDSG', 'NATIONAL', 'Beschaeftigtenverhaeltnis', 'Datenverarbeitung fuer Zwecke des Beschaeftigungsverhaeltnisses', FALSE, 'BDSG § 26', 10),
+('BDSG_24', '§ 24 BDSG', 'NATIONAL', 'Strafrechtliche Daten', 'Verarbeitung strafrechtlicher Daten (Art. 10 DSGVO)', FALSE, 'BDSG § 24', 11),
+('UWG_7', '§ 7 UWG', 'NATIONAL', 'Werbung mit Einwilligung', 'Werbliche Ansprache nach UWG', FALSE, 'UWG § 7', 12)
+ON CONFLICT (id) DO NOTHING;
+
+-- =============================================================================
+-- Retention Rules (12)
+-- =============================================================================
+INSERT INTO vvt_lib_retention_rules (id, label_de, description_de, legal_basis, duration, duration_unit, start_event, deletion_procedure, sort_order) VALUES
+('HGB_257_10Y', '10 Jahre (HGB § 257)', 'Handelsrechtliche Aufbewahrungspflicht fuer Handelsbuecher, Jahresabschluesse, Buchungsbelege', 'HGB § 257', 10, 'YEARS', 'Ende des Kalenderjahres', 'Vernichtung nach Ablauf der Aufbewahrungsfrist', 1),
+('AO_147_10Y', '10 Jahre (AO § 147)', 'Steuerrechtliche Aufbewahrungspflicht fuer Buchungsbelege', 'AO § 147', 10, 'YEARS', 'Ende des Kalenderjahres', 'Vernichtung nach Ablauf der Aufbewahrungsfrist', 2),
+('AO_147_6Y', '6 Jahre (AO § 147)', 'Steuerrechtliche Aufbewahrungspflicht fuer Geschaeftsbriefe', 'AO § 147', 6, 'YEARS', 'Ende des Kalenderjahres', 'Vernichtung nach Ablauf der Aufbewahrungsfrist', 3),
+('AGG_15_6M', '6 Monate (AGG § 15)', 'Frist fuer Schadensersatzansprueche nach AGG', 'AGG § 15', 6, 'MONTHS', 'Ablehnung / Ende des Verfahrens', 'Loeschung personenbezogener Bewerbungsdaten', 4),
+('ARBZG_16_2Y', '2 Jahre (ArbZG § 16)', 'Aufzeichnungspflicht der Arbeitszeiten', 'ArbZG § 16', 2, 'YEARS', 'Ende des Aufzeichnungszeitraums', 'Vernichtung der Arbeitszeitaufzeichnungen', 5),
+('BGB_195_3Y', '3 Jahre (BGB § 195)', 'Regelverjaehrungsfrist fuer vertragliche Ansprueche', 'BGB § 195', 3, 'YEARS', 'Ende des Jahres der Anspruchsentstehung', 'Loeschung nach Ablauf der Verjaehrungsfrist', 6),
+('CONSENT_REVOKE', 'Bis Widerruf', 'Speicherung bis zum Widerruf der Einwilligung', 'Art. 7 Abs. 3 DSGVO', 0, 'DAYS', 'Widerruf der Einwilligung', 'Unverzuegliche Loeschung nach Widerruf', 7),
+('PURPOSE_END', 'Bis Zweckerfuellung', 'Speicherung bis der Verarbeitungszweck erreicht ist', 'Art. 5 Abs. 1 lit. e DSGVO', 0, 'DAYS', 'Zweckerfuellung', 'Loeschung nach Zweckerfuellung', 8),
+('BDSG_35_DELETE', 'Unverzuegliche Loeschung', 'Loeschung sobald Speicherung nicht mehr erforderlich', 'BDSG § 35', 0, 'DAYS', 'Wegfall der Erforderlichkeit', 'Unverzuegliche Loeschung', 9),
+('CUSTOM_90D', '90 Tage', 'Benutzerdefinierte Aufbewahrungsfrist von 90 Tagen', NULL, 90, 'DAYS', 'Erstellung des Datensatzes', 'Automatische Loeschung nach 90 Tagen', 10),
+('CUSTOM_14M', '14 Monate', 'Benutzerdefinierte Aufbewahrungsfrist von 14 Monaten (z.B. Analytics)', NULL, 14, 'MONTHS', 'Erstellung des Datensatzes', 'Automatische Loeschung nach 14 Monaten', 11),
+('CUSTOM_30D', '30 Tage', 'Benutzerdefinierte Aufbewahrungsfrist von 30 Tagen', NULL, 30, 'DAYS', 'Erstellung des Datensatzes', 'Automatische Loeschung nach 30 Tagen', 12)
+ON CONFLICT (id) DO NOTHING;
+
+-- =============================================================================
+-- Recipients (15)
+-- =============================================================================
+INSERT INTO vvt_lib_recipients (id, type, label_de, description_de, is_third_country, country, sort_order) VALUES
+('INTERNAL_HR', 'INTERNAL', 'Personalabteilung', 'Interne HR-Abteilung', FALSE, 'DE', 1),
+('INTERNAL_FINANCE', 'INTERNAL', 'Finanzabteilung', 'Interne Buchhaltung und Finanzen', FALSE, 'DE', 2),
+('INTERNAL_IT', 'INTERNAL', 'IT-Abteilung', 'Interne IT-Administration', FALSE, 'DE', 3),
+('INTERNAL_MANAGEMENT', 'INTERNAL', 'Geschaeftsfuehrung', 'Geschaeftsfuehrung und Vorstand', FALSE, 'DE', 4),
+('INTERNAL_MARKETING', 'INTERNAL', 'Marketingabteilung', 'Internes Marketing-Team', FALSE, 'DE', 5),
+('INTERNAL_SUPPORT', 'INTERNAL', 'Kundenservice', 'Interner Support und Service', FALSE, 'DE', 6),
+('PROCESSOR_PAYROLL', 'PROCESSOR', 'Lohnabrechnungsdienstleister', 'Externer Gehaltsabrechnungs-Dienstleister', FALSE, 'DE', 7),
+('PROCESSOR_HOSTING', 'PROCESSOR', 'Hosting-Provider', 'Cloud- oder Server-Hosting-Anbieter', FALSE, NULL, 8),
+('PROCESSOR_ANALYTICS', 'PROCESSOR', 'Analytics-Anbieter', 'Web-Analytics und Tracking-Dienstleister', FALSE, NULL, 9),
+('PROCESSOR_EMAIL', 'PROCESSOR', 'E-Mail-Dienstleister', 'Newsletter- und E-Mail-Versand-Anbieter', FALSE, NULL, 10),
+('PROCESSOR_HELPDESK', 'PROCESSOR', 'Helpdesk-Anbieter', 'Ticketsystem- und Support-Plattform', FALSE, NULL, 11),
+('AUTHORITY_FINANZAMT', 'AUTHORITY', 'Finanzamt', 'Zustaendiges Finanzamt', FALSE, 'DE', 12),
+('AUTHORITY_SOZIALVERSICHERUNG', 'AUTHORITY', 'Sozialversicherungstraeger', 'Renten-, Kranken-, Arbeitslosen-, Pflegeversicherung', FALSE, 'DE', 13),
+('AUTHORITY_KRANKENKASSE', 'AUTHORITY', 'Krankenkasse', 'Gesetzliche oder private Krankenkasse', FALSE, 'DE', 14),
+('AUTHORITY_DATENSCHUTZ', 'AUTHORITY', 'Datenschutzbehoerde', 'Zustaendige Datenschutz-Aufsichtsbehoerde', FALSE, 'DE', 15)
+ON CONFLICT (id) DO NOTHING;
+
+-- =============================================================================
+-- Transfer Mechanisms (8)
+-- =============================================================================
+INSERT INTO vvt_lib_transfer_mechanisms (id, label_de, description_de, article, requires_tia, sort_order) VALUES
+('ADEQUACY_DECISION', 'Angemessenheitsbeschluss', 'EU-Angemessenheitsbeschluss gemaess Art. 45 DSGVO', 'Art. 45 DSGVO', FALSE, 1),
+('SCC_CONTROLLER', 'Standardvertragsklauseln (C2C)', 'Standardvertragsklauseln Controller-zu-Controller', 'Art. 46 Abs. 2 lit. c DSGVO', TRUE, 2),
+('SCC_PROCESSOR', 'Standardvertragsklauseln (C2P)', 'Standardvertragsklauseln Controller-zu-Processor', 'Art. 46 Abs. 2 lit. c DSGVO', TRUE, 3),
+('BCR', 'Binding Corporate Rules', 'Verbindliche interne Datenschutzvorschriften', 'Art. 47 DSGVO', FALSE, 4),
+('CONSENT_49A', 'Einwilligung (Art. 49)', 'Ausdrueckliche Einwilligung der betroffenen Person', 'Art. 49 Abs. 1 lit. a DSGVO', FALSE, 5),
+('DEROGATION_49', 'Ausnahme (Art. 49)', 'Ausnahme fuer bestimmte Faelle gemaess Art. 49', 'Art. 49 DSGVO', FALSE, 6),
+('DPF', 'EU-US Data Privacy Framework', 'Zertifizierung unter dem EU-US Data Privacy Framework', 'Art. 45 DSGVO (DPF)', FALSE, 7),
+('TIA', 'Transfer Impact Assessment', 'Einzelfallbezogene Risikobewertung fuer Drittlandtransfers', 'Art. 46 DSGVO + Schrems II', TRUE, 8)
+ON CONFLICT (id) DO NOTHING;
+
+-- =============================================================================
+-- Purposes (20)
+-- =============================================================================
+INSERT INTO vvt_lib_purposes (id, label_de, description_de, typical_legal_basis, typical_for, sort_order) VALUES
+('EMPLOYMENT_ADMIN', 'Personalverwaltung', 'Verwaltung des Beschaeftigungsverhaeltnisses', 'BDSG_26', '["hr"]', 1),
+('PAYROLL', 'Gehaltsabrechnung', 'Durchfuehrung der Lohn- und Gehaltsabrechnung', 'BDSG_26', '["hr","finance"]', 2),
+('RECRUITING', 'Bewerbermanagement', 'Durchfuehrung von Bewerbungsverfahren', 'BDSG_26', '["hr"]', 3),
+('TIME_TRACKING', 'Zeiterfassung', 'Erfassung und Verwaltung von Arbeitszeiten', 'ART6_1C', '["hr"]', 4),
+('ACCOUNTING', 'Buchhaltung', 'Fuehrung der Handelsbuecher und Finanzberichterstattung', 'ART6_1C', '["finance"]', 5),
+('INVOICING', 'Rechnungsstellung', 'Erstellung und Verwaltung von Rechnungen', 'ART6_1B', '["finance"]', 6),
+('CRM', 'Kundenbeziehungsmanagement', 'Verwaltung und Pflege von Kundenbeziehungen', 'ART6_1B', '["sales_crm"]', 7),
+('DIRECT_MARKETING', 'Direktmarketing', 'Newsletter-Versand und Werbemassnahmen', 'ART6_1A', '["marketing"]', 8),
+('WEBSITE_ANALYTICS', 'Web-Analyse', 'Analyse des Nutzerverhaltens auf der Website', 'ART6_1A', '["marketing","it_operations"]', 9),
+('CUSTOMER_SUPPORT', 'Kundenbetreuung', 'Bearbeitung von Kundenanfragen und Support-Tickets', 'ART6_1B', '["support"]', 10),
+('IT_ADMIN', 'IT-Administration', 'Verwaltung der IT-Infrastruktur und Benutzerkonten', 'ART6_1F', '["it_operations"]', 11),
+('BACKUP_RECOVERY', 'Datensicherung', 'Backup-Erstellung und Wiederherstellung', 'ART6_1F', '["it_operations"]', 12),
+('SECURITY_MONITORING', 'Sicherheitsueberwachung', 'Log-Analyse und Intrusion Detection', 'ART6_1F', '["it_operations"]', 13),
+('IAM', 'Identitaets- und Zugriffsmanagement', 'Verwaltung von Benutzeridentitaeten und Berechtigungen', 'ART6_1F', '["it_operations"]', 14),
+('VIDEO_CONFERENCING', 'Videokonferenz', 'Durchfuehrung von Online-Meetings und Videokonferenzen', 'ART6_1B', '["other"]', 15),
+('VISITOR_MANAGEMENT', 'Besucherverwaltung', 'Erfassung und Verwaltung von Betriebsbesuchern', 'ART6_1F', '["management"]', 16),
+('PAYMENT_PROCESSING', 'Zahlungsabwicklung', 'Verarbeitung und Abwicklung von Zahlungen', 'ART6_1B', '["finance"]', 17),
+('SOCIAL_MEDIA', 'Social-Media-Marketing', 'Betrieb von Social-Media-Praesenzen', 'ART6_1A', '["marketing"]', 18),
+('SALES_REPORTING', 'Vertriebssteuerung', 'Vertriebsanalysen und Berichterstattung', 'ART6_1F', '["sales_crm"]', 19),
+('COMPLIANCE_DOCS', 'Compliance-Dokumentation', 'Erstellung und Pflege von Compliance-Dokumenten', 'ART6_1C', '["legal","management"]', 20)
+ON CONFLICT (id) DO NOTHING;
+
+-- =============================================================================
+-- TOMs (20)
+-- =============================================================================
+INSERT INTO vvt_lib_toms (id, category, label_de, description_de, art32_reference, sort_order) VALUES
+('AC_RBAC', 'accessControl', 'Rollenbasierte Zugriffskontrolle (RBAC)', 'Zugriff nur nach Rolle und Berechtigung', 'Art. 32 Abs. 1 lit. b', 1),
+('AC_MFA', 'accessControl', 'Multi-Faktor-Authentifizierung', 'Zwei- oder mehrstufige Anmeldung', 'Art. 32 Abs. 1 lit. b', 2),
+('AC_NEED_TO_KNOW', 'accessControl', 'Need-to-Know-Prinzip', 'Zugriff nur auf fuer die Aufgabe erforderliche Daten', 'Art. 32 Abs. 1 lit. b', 3),
+('AC_PAM', 'accessControl', 'Privileged Access Management', 'Verwaltung und Ueberwachung privilegierter Zugaenge', 'Art. 32 Abs. 1 lit. b', 4),
+('CONF_ENCRYPTION_REST', 'confidentiality', 'Verschluesselung ruhender Daten', 'AES-256 Verschluesselung fuer gespeicherte Daten', 'Art. 32 Abs. 1 lit. a', 5),
+('CONF_ENCRYPTION_TRANSIT', 'confidentiality', 'Transportverschluesselung', 'TLS 1.3 fuer alle Datenuebertragungen', 'Art. 32 Abs. 1 lit. a', 6),
+('CONF_PSEUDONYMIZATION', 'confidentiality', 'Pseudonymisierung', 'Verarbeitung ohne direkten Personenbezug', 'Art. 32 Abs. 1 lit. a', 7),
+('CONF_NDA', 'confidentiality', 'Vertraulichkeitsvereinbarungen', 'NDAs fuer Mitarbeiter und Auftragnehmer', 'Art. 32 Abs. 1 lit. b', 8),
+('INT_AUDIT_LOG', 'integrity', 'Audit-Logging', 'Lueckenlose Protokollierung aller Datenzugriffe', 'Art. 32 Abs. 1 lit. b', 9),
+('INT_FOUR_EYES', 'integrity', 'Vier-Augen-Prinzip', 'Kritische Aenderungen nur mit Freigabe durch zweite Person', 'Art. 32 Abs. 1 lit. b', 10),
+('INT_CHECKSUMS', 'integrity', 'Pruefsummen und Hashing', 'Integritaetspruefung durch kryptographische Hashes', 'Art. 32 Abs. 1 lit. b', 11),
+('INT_CHANGE_MGMT', 'integrity', 'Change Management', 'Dokumentierter Aenderungsprozess fuer IT-Systeme', 'Art. 32 Abs. 1 lit. b', 12),
+('AVAIL_BACKUP', 'availability', 'Regelmaessige Backups', 'Taegliche und woechentliche Datensicherungen', 'Art. 32 Abs. 1 lit. c', 13),
+('AVAIL_REDUNDANCY', 'availability', 'Redundante Systeme', 'Hochverfuegbarkeit durch Systemredundanz', 'Art. 32 Abs. 1 lit. c', 14),
+('AVAIL_321_RULE', 'availability', '3-2-1 Backup-Regel', 'Drei Kopien, zwei Medien, ein externer Standort', 'Art. 32 Abs. 1 lit. c', 15),
+('AVAIL_MONITORING', 'availability', 'System-Monitoring', 'Kontinuierliche Ueberwachung der Systemverfuegbarkeit', 'Art. 32 Abs. 1 lit. c', 16),
+('SEP_TENANT_ISOLATION', 'separation', 'Mandantentrennung', 'Logische Trennung der Daten verschiedener Mandanten', 'Art. 32 Abs. 1 lit. b', 17),
+('SEP_NETWORK_SEG', 'separation', 'Netzwerksegmentierung', 'Trennung von Netzwerkbereichen (VLANs, Firewalls)', 'Art. 32 Abs. 1 lit. b', 18),
+('SEP_DATA_SEPARATION', 'separation', 'Datentrennung', 'Separate Datenbanken oder Schemas pro Zweck', 'Art. 32 Abs. 1 lit. b', 19),
+('SEP_ENV_SEPARATION', 'separation', 'Umgebungstrennung', 'Getrennte Entwicklungs-, Test- und Produktionsumgebungen', 'Art. 32 Abs. 1 lit. b', 20)
+ON CONFLICT (id) DO NOTHING;
+
+COMMIT;
@@ -0,0 +1,54 @@
+-- Migration 066: VVT Process Templates + Activity extensions
+-- Template table + new ref columns on compliance_vvt_activities
+
+BEGIN;
+
+-- =============================================================================
+-- Process Templates
+-- =============================================================================
+CREATE TABLE IF NOT EXISTS vvt_process_templates (
+    id VARCHAR(80) PRIMARY KEY,
+    name VARCHAR(300) NOT NULL,
+    description TEXT,
+    business_function VARCHAR(50),
+    purpose_refs JSONB DEFAULT '[]'::jsonb,
+    legal_basis_refs JSONB DEFAULT '[]'::jsonb,
+    data_subject_refs JSONB DEFAULT '[]'::jsonb,
+    data_category_refs JSONB DEFAULT '[]'::jsonb,
+    recipient_refs JSONB DEFAULT '[]'::jsonb,
+    tom_refs JSONB DEFAULT '[]'::jsonb,
+    transfer_mechanism_refs JSONB DEFAULT '[]'::jsonb,
+    retention_rule_ref VARCHAR(50),
+    typical_systems JSONB DEFAULT '[]'::jsonb,
+    protection_level VARCHAR(10) DEFAULT 'MEDIUM',
+    dpia_required BOOLEAN DEFAULT FALSE,
+    risk_score INTEGER,
+    tags JSONB DEFAULT '[]'::jsonb,
+    is_system BOOLEAN DEFAULT TRUE,
+    tenant_id UUID,
+    sort_order INTEGER DEFAULT 0,
+    created_at TIMESTAMPTZ DEFAULT NOW(),
+    updated_at TIMESTAMPTZ DEFAULT NOW()
+);
+
+CREATE INDEX IF NOT EXISTS idx_vvt_process_templates_bf ON vvt_process_templates(business_function);
+CREATE INDEX IF NOT EXISTS idx_vvt_process_templates_system ON vvt_process_templates(is_system);
+
+-- =============================================================================
+-- New columns on compliance_vvt_activities (all DEFAULT NULL for backward compat)
+-- =============================================================================
+ALTER TABLE compliance_vvt_activities ADD COLUMN IF NOT EXISTS purpose_refs JSONB DEFAULT NULL;
+ALTER TABLE compliance_vvt_activities ADD COLUMN IF NOT EXISTS legal_basis_refs JSONB DEFAULT NULL;
+ALTER TABLE compliance_vvt_activities ADD COLUMN IF NOT EXISTS data_subject_refs JSONB DEFAULT NULL;
+ALTER TABLE compliance_vvt_activities ADD COLUMN IF NOT EXISTS data_category_refs JSONB DEFAULT NULL;
+ALTER TABLE compliance_vvt_activities ADD COLUMN IF NOT EXISTS recipient_refs JSONB DEFAULT NULL;
+ALTER TABLE compliance_vvt_activities ADD COLUMN IF NOT EXISTS retention_rule_ref VARCHAR(50) DEFAULT NULL;
+ALTER TABLE compliance_vvt_activities ADD COLUMN IF NOT EXISTS transfer_mechanism_refs JSONB DEFAULT NULL;
+ALTER TABLE compliance_vvt_activities ADD COLUMN IF NOT EXISTS tom_refs JSONB DEFAULT NULL;
+ALTER TABLE compliance_vvt_activities ADD COLUMN IF NOT EXISTS linked_loeschfristen_ids JSONB DEFAULT NULL;
+ALTER TABLE compliance_vvt_activities ADD COLUMN IF NOT EXISTS linked_tom_measure_ids JSONB DEFAULT NULL;
+ALTER TABLE compliance_vvt_activities ADD COLUMN IF NOT EXISTS source_template_id VARCHAR(80) DEFAULT NULL;
+ALTER TABLE compliance_vvt_activities ADD COLUMN IF NOT EXISTS risk_score INTEGER DEFAULT NULL;
+ALTER TABLE compliance_vvt_activities ADD COLUMN IF NOT EXISTS art30_completeness JSONB DEFAULT NULL;
+
+COMMIT;
@@ -0,0 +1,305 @@
+-- Migration 067: VVT Process Templates Seed — 18 templates from vvt-baseline-catalog
+-- All content self-authored, MIT-compatible.
+
+BEGIN;
+
+INSERT INTO vvt_process_templates (id, name, description, business_function, purpose_refs, legal_basis_refs, data_subject_refs, data_category_refs, recipient_refs, tom_refs, retention_rule_ref, typical_systems, protection_level, dpia_required, risk_score, tags, sort_order) VALUES
+
+-- HR Templates
+('hr-mitarbeiterverwaltung',
+ 'Mitarbeiterverwaltung',
+ 'Verwaltung des Beschaeftigungsverhaeltnisses inkl. Personalakte, Urlaub, Krankmeldungen',
+ 'hr',
+ '["EMPLOYMENT_ADMIN", "PAYROLL"]',
+ '["BDSG_26", "ART6_1B"]',
+ '["EMPLOYEES"]',
+ '["NAME", "DOB", "ADDRESS", "CONTACT", "SOCIAL_SECURITY", "BANK_ACCOUNT", "EMPLOYMENT_DATA", "HEALTH_DATA"]',
+ '["INTERNAL_HR", "INTERNAL_FINANCE", "PROCESSOR_PAYROLL", "AUTHORITY_SOZIALVERSICHERUNG", "AUTHORITY_KRANKENKASSE"]',
+ '["AC_RBAC", "AC_NEED_TO_KNOW", "CONF_ENCRYPTION_REST", "CONF_ENCRYPTION_TRANSIT", "INT_AUDIT_LOG", "SEP_TENANT_ISOLATION"]',
+ 'HGB_257_10Y',
+ '["HR-Software", "Personalakte (digital)"]',
+ 'HIGH', TRUE, 3,
+ '["personal", "pflicht"]',
+ 1),
+
+('hr-gehaltsabrechnung',
+ 'Gehaltsabrechnung',
+ 'Monatliche Lohn- und Gehaltsabrechnung inkl. Steuer- und Sozialversicherungsmeldungen',
+ 'hr',
+ '["PAYROLL"]',
+ '["BDSG_26", "ART6_1C"]',
+ '["EMPLOYEES"]',
+ '["NAME", "ADDRESS", "SOCIAL_SECURITY", "TAX_ID", "BANK_ACCOUNT", "SALARY_DATA"]',
+ '["INTERNAL_HR", "INTERNAL_FINANCE", "PROCESSOR_PAYROLL", "AUTHORITY_FINANZAMT", "AUTHORITY_SOZIALVERSICHERUNG"]',
+ '["AC_RBAC", "AC_NEED_TO_KNOW", "CONF_ENCRYPTION_REST", "CONF_ENCRYPTION_TRANSIT", "INT_AUDIT_LOG", "INT_FOUR_EYES"]',
+ 'AO_147_10Y',
+ '["Lohnabrechnungssoftware", "DATEV"]',
+ 'HIGH', FALSE, 3,
+ '["personal", "finanzen", "pflicht"]',
+ 2),
+
+('hr-bewerbermanagement',
+ 'Bewerbermanagement',
+ 'Durchfuehrung von Bewerbungsverfahren vom Eingang bis zur Zu-/Absage',
+ 'hr',
+ '["RECRUITING"]',
+ '["BDSG_26", "ART6_1B"]',
+ '["APPLICANTS"]',
+ '["NAME", "DOB", "ADDRESS", "CONTACT", "EDUCATION_DATA", "PHOTO_VIDEO"]',
+ '["INTERNAL_HR", "INTERNAL_MANAGEMENT"]',
+ '["AC_RBAC", "AC_NEED_TO_KNOW", "CONF_ENCRYPTION_REST", "CONF_NDA"]',
+ 'AGG_15_6M',
+ '["Bewerbermanagement-Software", "E-Mail"]',
+ 'MEDIUM', FALSE, 2,
+ '["personal", "recruiting"]',
+ 3),
+
+('hr-zeiterfassung',
+ 'Zeiterfassung',
+ 'Erfassung und Verwaltung von Arbeitszeiten gemaess ArbZG',
+ 'hr',
+ '["TIME_TRACKING"]',
+ '["ART6_1C", "BDSG_26"]',
+ '["EMPLOYEES"]',
+ '["NAME", "EMPLOYMENT_DATA"]',
+ '["INTERNAL_HR", "INTERNAL_MANAGEMENT"]',
+ '["AC_RBAC", "INT_AUDIT_LOG", "CONF_ENCRYPTION_TRANSIT"]',
+ 'ARBZG_16_2Y',
+ '["Zeiterfassungssystem", "Stempeluhr"]',
+ 'LOW', FALSE, 1,
+ '["personal", "pflicht"]',
+ 4),
+
+-- Finance Templates
+('finance-buchhaltung',
+ 'Buchhaltung',
+ 'Fuehrung der Handelsbuecher und steuerrechtliche Dokumentation',
+ 'finance',
+ '["ACCOUNTING", "INVOICING"]',
+ '["ART6_1C", "ART6_1B"]',
+ '["CUSTOMERS", "SUPPLIERS", "EMPLOYEES"]',
+ '["NAME", "ADDRESS", "CONTACT", "BANK_ACCOUNT", "PAYMENT_DATA", "CONTRACT_DATA", "TAX_ID"]',
+ '["INTERNAL_FINANCE", "AUTHORITY_FINANZAMT", "PROCESSOR_HOSTING"]',
+ '["AC_RBAC", "INT_AUDIT_LOG", "INT_FOUR_EYES", "CONF_ENCRYPTION_REST", "AVAIL_BACKUP"]',
+ 'HGB_257_10Y',
+ '["Buchhaltungssoftware", "DATEV", "ERP-System"]',
+ 'HIGH', FALSE, 2,
+ '["finanzen", "pflicht"]',
+ 5),
+
+('finance-zahlungsverkehr',
+ 'Zahlungsverkehr',
+ 'Verarbeitung und Abwicklung von ein- und ausgehenden Zahlungen',
+ 'finance',
+ '["PAYMENT_PROCESSING"]',
+ '["ART6_1B", "ART6_1C"]',
+ '["CUSTOMERS", "SUPPLIERS"]',
+ '["NAME", "BANK_ACCOUNT", "PAYMENT_DATA", "CONTRACT_DATA"]',
+ '["INTERNAL_FINANCE", "PROCESSOR_HOSTING"]',
+ '["AC_RBAC", "AC_MFA", "CONF_ENCRYPTION_REST", "CONF_ENCRYPTION_TRANSIT", "INT_AUDIT_LOG"]',
+ 'HGB_257_10Y',
+ '["Online-Banking", "Payment-Gateway"]',
+ 'HIGH', FALSE, 3,
+ '["finanzen"]',
+ 6),
+
+-- Sales/CRM Templates
+('sales-kundenverwaltung',
+ 'Kundenverwaltung',
+ 'Verwaltung und Pflege der Kundenbeziehungen im CRM-System',
+ 'sales_crm',
+ '["CRM"]',
+ '["ART6_1B", "ART6_1F"]',
+ '["CUSTOMERS", "PROSPECTIVE_CUSTOMERS"]',
+ '["NAME", "ADDRESS", "CONTACT", "CONTRACT_DATA", "COMMUNICATION_DATA"]',
+ '["INTERNAL_MARKETING", "INTERNAL_SUPPORT", "PROCESSOR_HOSTING"]',
+ '["AC_RBAC", "CONF_ENCRYPTION_REST", "CONF_ENCRYPTION_TRANSIT", "INT_AUDIT_LOG", "SEP_TENANT_ISOLATION"]',
+ 'BGB_195_3Y',
+ '["CRM-System", "E-Mail-Client"]',
+ 'MEDIUM', FALSE, 2,
+ '["vertrieb", "kunden"]',
+ 7),
+
+('sales-vertriebssteuerung',
+ 'Vertriebssteuerung',
+ 'Vertriebsanalysen, Forecasting und Berichterstattung',
+ 'sales_crm',
+ '["SALES_REPORTING"]',
+ '["ART6_1F"]',
+ '["CUSTOMERS", "PROSPECTIVE_CUSTOMERS"]',
+ '["NAME", "CONTACT", "CONTRACT_DATA"]',
+ '["INTERNAL_MANAGEMENT", "INTERNAL_MARKETING"]',
+ '["AC_RBAC", "AC_NEED_TO_KNOW", "CONF_PSEUDONYMIZATION"]',
+ 'BGB_195_3Y',
+ '["CRM-System", "BI-Tool"]',
+ 'LOW', FALSE, 1,
+ '["vertrieb", "reporting"]',
+ 8),
+
+-- Marketing Templates
+('marketing-newsletter',
+ 'Newsletter-Versand',
+ 'Versand von Newslettern und Werbemails an Abonnenten',
+ 'marketing',
+ '["DIRECT_MARKETING"]',
+ '["ART6_1A", "UWG_7"]',
+ '["NEWSLETTER_SUBSCRIBERS", "CUSTOMERS"]',
+ '["NAME", "CONTACT", "USAGE_DATA"]',
+ '["INTERNAL_MARKETING", "PROCESSOR_EMAIL"]',
+ '["AC_RBAC", "CONF_ENCRYPTION_TRANSIT", "SEP_DATA_SEPARATION"]',
+ 'CONSENT_REVOKE',
+ '["Newsletter-Tool", "E-Mail-Marketing-Plattform"]',
+ 'LOW', FALSE, 1,
+ '["marketing", "einwilligung"]',
+ 9),
+
+('marketing-website-analytics',
+ 'Website-Analyse',
+ 'Analyse des Nutzerverhaltens auf der Unternehmenswebsite',
+ 'marketing',
+ '["WEBSITE_ANALYTICS"]',
+ '["ART6_1A"]',
+ '["WEBSITE_USERS"]',
+ '["IP_ADDRESS", "DEVICE_ID", "USAGE_DATA"]',
+ '["INTERNAL_MARKETING", "PROCESSOR_ANALYTICS"]',
+ '["CONF_PSEUDONYMIZATION", "CONF_ENCRYPTION_TRANSIT", "SEP_DATA_SEPARATION"]',
+ 'CUSTOM_14M',
+ '["Web-Analytics-Tool", "Tag-Manager"]',
+ 'LOW', FALSE, 1,
+ '["marketing", "einwilligung", "tracking"]',
+ 10),
+
+('marketing-social-media',
+ 'Social-Media-Marketing',
+ 'Betrieb und Verwaltung von Social-Media-Praesenzen',
+ 'marketing',
+ '["SOCIAL_MEDIA"]',
+ '["ART6_1A", "ART6_1F"]',
+ '["WEBSITE_USERS", "CUSTOMERS"]',
+ '["NAME", "CONTACT", "USAGE_DATA", "PHOTO_VIDEO"]',
+ '["INTERNAL_MARKETING", "PROCESSOR_ANALYTICS"]',
+ '["AC_RBAC", "CONF_ENCRYPTION_TRANSIT"]',
+ 'PURPOSE_END',
+ '["Social-Media-Plattformen", "Social-Media-Management-Tool"]',
+ 'LOW', FALSE, 1,
+ '["marketing", "social-media"]',
+ 11),
+
+-- Support Templates
+('support-ticketsystem',
+ 'Ticketsystem / Kundenservice',
+ 'Bearbeitung von Kundenanfragen ueber das Ticketsystem',
+ 'support',
+ '["CUSTOMER_SUPPORT"]',
+ '["ART6_1B"]',
+ '["CUSTOMERS"]',
+ '["NAME", "CONTACT", "COMMUNICATION_DATA", "CONTRACT_DATA"]',
+ '["INTERNAL_SUPPORT", "PROCESSOR_HELPDESK"]',
+ '["AC_RBAC", "CONF_ENCRYPTION_TRANSIT", "INT_AUDIT_LOG"]',
+ 'BGB_195_3Y',
+ '["Ticketsystem", "Help-Desk-Software"]',
+ 'MEDIUM', FALSE, 1,
+ '["support", "kunden"]',
+ 12),
+
+-- IT Templates
+('it-systemadministration',
+ 'IT-Systemadministration',
+ 'Verwaltung der IT-Infrastruktur, Benutzerkonten und Berechtigungen',
+ 'it_operations',
+ '["IT_ADMIN"]',
+ '["ART6_1F", "ART6_1B"]',
+ '["EMPLOYEES"]',
+ '["NAME", "LOGIN_DATA", "IP_ADDRESS", "DEVICE_ID"]',
+ '["INTERNAL_IT", "PROCESSOR_HOSTING"]',
+ '["AC_RBAC", "AC_MFA", "AC_PAM", "CONF_ENCRYPTION_REST", "CONF_ENCRYPTION_TRANSIT", "INT_AUDIT_LOG", "SEP_NETWORK_SEG", "SEP_ENV_SEPARATION"]',
+ 'CUSTOM_90D',
+ '["Active Directory", "LDAP", "IT-Management-Tool"]',
+ 'HIGH', FALSE, 2,
+ '["it", "infrastruktur"]',
+ 13),
+
+('it-backup',
+ 'Datensicherung und Recovery',
+ 'Regelmaessige Backups und Wiederherstellungsverfahren',
+ 'it_operations',
+ '["BACKUP_RECOVERY"]',
+ '["ART6_1F"]',
+ '["EMPLOYEES", "CUSTOMERS"]',
+ '["NAME", "ADDRESS", "CONTACT", "CONTRACT_DATA", "LOGIN_DATA"]',
+ '["INTERNAL_IT", "PROCESSOR_HOSTING"]',
+ '["AVAIL_BACKUP", "AVAIL_321_RULE", "AVAIL_REDUNDANCY", "CONF_ENCRYPTION_REST", "INT_CHECKSUMS"]',
+ 'CUSTOM_90D',
+ '["Backup-Software", "Cloud-Backup", "NAS"]',
+ 'HIGH', FALSE, 2,
+ '["it", "verfuegbarkeit"]',
+ 14),
+
+('it-logging',
+ 'Logging und Sicherheitsueberwachung',
+ 'Protokollierung von System- und Sicherheitsereignissen',
+ 'it_operations',
+ '["SECURITY_MONITORING"]',
+ '["ART6_1F"]',
+ '["EMPLOYEES", "CUSTOMERS", "WEBSITE_USERS"]',
+ '["IP_ADDRESS", "LOGIN_DATA", "USAGE_DATA", "DEVICE_ID"]',
+ '["INTERNAL_IT"]',
+ '["CONF_ENCRYPTION_REST", "INT_AUDIT_LOG", "INT_CHECKSUMS", "AVAIL_MONITORING", "SEP_DATA_SEPARATION"]',
+ 'CUSTOM_90D',
+ '["SIEM-System", "Log-Management", "Monitoring-Tool"]',
+ 'MEDIUM', FALSE, 2,
+ '["it", "sicherheit"]',
+ 15),
+
+('it-iam',
+ 'Identitaets- und Zugriffsmanagement',
+ 'Verwaltung von Benutzeridentitaeten, Rollen und Berechtigungen',
+ 'it_operations',
+ '["IAM"]',
+ '["ART6_1F", "BDSG_26"]',
+ '["EMPLOYEES"]',
+ '["NAME", "LOGIN_DATA", "EMPLOYMENT_DATA"]',
+ '["INTERNAL_IT", "INTERNAL_HR"]',
+ '["AC_RBAC", "AC_MFA", "AC_PAM", "AC_NEED_TO_KNOW", "INT_AUDIT_LOG", "CONF_ENCRYPTION_REST"]',
+ 'AGG_15_6M',
+ '["IAM-System", "SSO-Provider", "Active Directory"]',
+ 'HIGH', FALSE, 2,
+ '["it", "sicherheit", "zugriffskontrolle"]',
+ 16),
+
+-- Other Templates
+('other-videokonferenz',
+ 'Videokonferenz',
+ 'Durchfuehrung von Online-Meetings und Videokonferenzen',
+ 'other',
+ '["VIDEO_CONFERENCING"]',
+ '["ART6_1B", "ART6_1F"]',
+ '["EMPLOYEES", "CUSTOMERS", "BUSINESS_PARTNERS"]',
+ '["NAME", "CONTACT", "PHOTO_VIDEO", "IP_ADDRESS"]',
+ '["INTERNAL_IT", "PROCESSOR_HOSTING"]',
+ '["CONF_ENCRYPTION_TRANSIT", "AC_RBAC"]',
+ 'PURPOSE_END',
+ '["Videokonferenz-Tool", "Webinar-Plattform"]',
+ 'LOW', FALSE, 1,
+ '["kommunikation"]',
+ 17),
+
+('other-besuchermanagement',
+ 'Besuchermanagement',
+ 'Erfassung und Verwaltung von Betriebsbesuchern',
+ 'other',
+ '["VISITOR_MANAGEMENT"]',
+ '["ART6_1F"]',
+ '["VISITORS"]',
+ '["NAME", "CONTACT", "PHOTO_VIDEO"]',
+ '["INTERNAL_MANAGEMENT"]',
+ '["AC_RBAC", "CONF_ENCRYPTION_REST"]',
+ 'CUSTOM_30D',
+ '["Besuchermanagement-System", "Empfangsterminal"]',
+ 'LOW', FALSE, 1,
+ '["sonstiges", "besucher"]',
+ 18)
+
+ON CONFLICT (id) DO NOTHING;
+
+COMMIT;
@@ -0,0 +1,65 @@
+-- Migration 068: TOM ↔ Canonical Control Mappings
+-- Bridge table connecting TOM measures (88) to Canonical Controls (10,000+)
+-- Enables three-layer architecture: TOM → Mapping → Canonical Controls
+
+-- ============================================================================
+-- 1. Mapping table (TOM control code → Canonical control)
+-- ============================================================================
+
+CREATE TABLE IF NOT EXISTS tom_control_mappings (
+    id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
+    tenant_id UUID NOT NULL,
+    project_id UUID,
+
+    -- TOM side (references the embedded TOM control code, e.g. 'TOM-AC-01')
+    tom_control_code VARCHAR(20) NOT NULL,
+    tom_category VARCHAR(50) NOT NULL,
+
+    -- Canonical control side
+    canonical_control_id UUID NOT NULL,
+    canonical_control_code VARCHAR(20) NOT NULL,
+    canonical_category VARCHAR(50),
+
+    -- Mapping metadata
+    mapping_type VARCHAR(20) NOT NULL DEFAULT 'auto'
+        CHECK (mapping_type IN ('auto', 'manual')),
+    relevance_score NUMERIC(3,2) DEFAULT 1.00
+        CHECK (relevance_score >= 0 AND relevance_score <= 1),
+
+    created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
+
+    -- No duplicate mappings per tenant+project+TOM+canonical
+    UNIQUE (tenant_id, project_id, tom_control_code, canonical_control_id)
+);
+
+CREATE INDEX IF NOT EXISTS idx_tcm_tenant_project
+    ON tom_control_mappings (tenant_id, project_id);
+CREATE INDEX IF NOT EXISTS idx_tcm_tom_code
+    ON tom_control_mappings (tom_control_code);
+CREATE INDEX IF NOT EXISTS idx_tcm_canonical_id
+    ON tom_control_mappings (canonical_control_id);
+CREATE INDEX IF NOT EXISTS idx_tcm_tom_category
+    ON tom_control_mappings (tom_category);
+
+-- ============================================================================
+-- 2. Sync state (tracks when the last sync ran + profile hash)
+-- ============================================================================
+
+CREATE TABLE IF NOT EXISTS tom_control_sync_state (
+    id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
+    tenant_id UUID NOT NULL,
+    project_id UUID,
+
+    -- Profile hash to detect changes (SHA-256 of serialized company profile)
+    profile_hash VARCHAR(64),
+
+    -- Stats from last sync
+    total_mappings INTEGER DEFAULT 0,
+    canonical_controls_matched INTEGER DEFAULT 0,
+    tom_controls_covered INTEGER DEFAULT 0,
+
+    last_synced_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
+
+    -- One sync state per tenant+project
+    UNIQUE (tenant_id, project_id)
+);
@@ -0,0 +1,3 @@
+-- Obligations: Vendor-Verknuepfung fuer Art. 28 DSGVO
+ALTER TABLE compliance_obligations
+  ADD COLUMN IF NOT EXISTS linked_vendor_ids JSONB DEFAULT '[]'::jsonb;
@@ -0,0 +1,3 @@
+-- Loeschfristen: Vendor-Verknuepfung
+ALTER TABLE compliance_loeschfristen
+  ADD COLUMN IF NOT EXISTS linked_vendor_ids JSONB DEFAULT '[]'::jsonb;
@@ -0,0 +1,873 @@
+-- Migration 073: Module Document Templates
+-- Reference templates for VVT, TOM, Loeschfristen and Pflichten modules
+-- These match the structure of the module-specific document generators
+-- and enable versioning in the document-generator
+
+-- ===========================================================================
+-- Template 1: VVT — Verarbeitungsverzeichnis (Art. 30 DSGVO)
+-- ===========================================================================
+INSERT INTO compliance_legal_templates (
+    id, tenant_id, document_type, title, description, content,
+    placeholders, language, jurisdiction,
+    license_id, license_name, source_name,
+    attribution_required, is_complete_document, version, status,
+    created_at, updated_at
+) SELECT
+    gen_random_uuid(),
+    '9282a473-5c95-4b3a-bf78-0ecc0ec71d3e',
+    'vvt_register',
+    'Verarbeitungsverzeichnis (Art. 30 DSGVO)',
+    'Vollstaendiges Verzeichnis von Verarbeitungstaetigkeiten gemaess Art. 30 Abs. 1 DSGVO. Dokumentiert alle Verarbeitungen mit Rechtsgrundlagen, Datenkategorien, Empfaengern, Drittlandtransfers und Loeschfristen.',
+    $template$# Verarbeitungsverzeichnis (Art. 30 DSGVO)
+
+## Dokumentenkontrolle
+
+| Feld | Wert |
+|------|------|
+| Unternehmen | {{COMPANY_NAME}} |
+| Dokumenttyp | Verzeichnis von Verarbeitungstaetigkeiten |
+| Version | {{DOCUMENT_VERSION}} |
+| Datum | {{VERSION_DATE}} |
+| Klassifizierung | Vertraulich |
+| Datenschutzbeauftragter | {{DPO_NAME}} |
+| Kontakt DSB | {{DPO_CONTACT}} |
+| Verantwortlicher | {{RESPONSIBLE_PERSON}} |
+| Naechste Pruefung | {{NEXT_REVIEW_DATE}} |
+
+### Aenderungshistorie
+
+| Version | Datum | Autor | Aenderung |
+|---------|-------|-------|-----------|
+| {{DOCUMENT_VERSION}} | {{VERSION_DATE}} | {{DPO_NAME}} | Erstfassung |
+
+---
+
+## 1. Ziel und Zweck
+
+Dieses Verarbeitungsverzeichnis dient der Dokumentation aller Verarbeitungstaetigkeiten von **{{COMPANY_NAME}}** gemaess Art. 30 Abs. 1 DSGVO. Es enthaelt saemtliche Pflichtangaben und wird regelmaessig auf Vollstaendigkeit und Aktualitaet geprueft.
+
+### Gesetzliche Grundlage
+
+| Rechtsgrundlage | Inhalt |
+|-----------------|--------|
+| **Art. 30 Abs. 1 DSGVO** | Pflicht des Verantwortlichen, ein Verzeichnis aller Verarbeitungstaetigkeiten zu fuehren |
+| **Art. 30 Abs. 2 DSGVO** | Pflicht des Auftragsverarbeiters, ein Verzeichnis aller Kategorien von Verarbeitungstaetigkeiten zu fuehren |
+| **Art. 30 Abs. 4 DSGVO** | Bereitstellungspflicht gegenueber der Aufsichtsbehoerde |
+| **Art. 5 Abs. 2 DSGVO** | Rechenschaftspflicht — Nachweis der Einhaltung der DSGVO-Grundsaetze |
+
+---
+
+## 2. Organisation und Verantwortlichkeiten
+
+| Rolle | Person / Abteilung |
+|-------|--------------------|
+| Verantwortlicher (Art. 4 Nr. 7) | {{RESPONSIBLE_PERSON}} |
+| Datenschutzbeauftragter (Art. 37-39) | {{DPO_NAME}} ({{DPO_CONTACT}}) |
+| VVT-Pflege | Fachabteilungen in Abstimmung mit DSB |
+
+**Hinweis:** Jede Fachabteilung ist verpflichtet, neue Verarbeitungstaetigkeiten vor deren Beginn beim DSB zu melden. Aenderungen an bestehenden Verarbeitungen sind unverzueglich zu kommunizieren.
+
+---
+
+## 3. Verarbeitungstaetigkeiten (Art. 30 Abs. 1)
+
+### Pflichtangaben je Verarbeitungstaetigkeit
+
+Fuer jede Verarbeitungstaetigkeit werden folgende Pflichtfelder nach Art. 30 DSGVO dokumentiert:
+
+| Pflichtfeld (Art. 30) | Beschreibung |
+|------------------------|-------------|
+| **VVT-Nr.** | Eindeutige Kennung der Verarbeitungstaetigkeit |
+| **Bezeichnung** | Bezeichnung der Verarbeitungstaetigkeit |
+| **Verantwortlicher** | Name und Kontaktdaten des Verantwortlichen |
+| **Geschaeftsbereich** | Zustaendige Organisationseinheit |
+| **Zwecke der Verarbeitung** | Beschreibung aller Verarbeitungszwecke |
+| **Rechtsgrundlage(n)** | Art. 6 Abs. 1 lit. a-f DSGVO; ggf. Art. 9 Abs. 2 DSGVO |
+| **Kategorien betroffener Personen** | z.B. Mitarbeiter, Kunden, Lieferanten, Schueler |
+| **Kategorien personenbezogener Daten** | z.B. Stammdaten, Kontaktdaten, Vertragsdaten; Art. 9-Kategorien gesondert kennzeichnen |
+| **Empfaengerkategorien** | Intern, extern, Auftragsverarbeiter, Behoerden |
+| **Uebermittlung an Drittlaender** | Zielland, Empfaenger, Transfermechanismus (Art. 44-49) |
+| **Loeschfristen** | Vorgesehene Fristen fuer die Loeschung, Rechtsgrundlage, Verfahren |
+| **TOM (Art. 32)** | Beschreibung der technischen und organisatorischen Massnahmen |
+
+### Verarbeitungsuebersicht
+
+*Die konkreten Verarbeitungstaetigkeiten werden vom VVT-Modul automatisch in das Dokument eingefuegt. Jede Verarbeitungstaetigkeit wird als separate Detailkarte mit allen Pflichtfeldern dargestellt.*
+
+| VVT-Nr. | Bezeichnung | Geschaeftsbereich | Rechtsgrundlage | Status |
+|----------|-------------|-------------------|-----------------|--------|
+| *Wird automatisch befuellt* | | | | |
+
+### Detailkarten
+
+Fuer jede Verarbeitungstaetigkeit wird eine Detailkarte erstellt mit:
+
+- Alle Pflichtangaben nach Art. 30 in tabellarischer Form
+- Kennzeichnung besonderer Kategorien (Art. 9 DSGVO)
+- Kennzeichnung DSFA-Pflicht (Art. 35 DSGVO)
+- Kennzeichnung Drittlanduebermittlung (Art. 44-49 DSGVO)
+- Strukturierte TOMs nach Kategorie (Zugriffskontrolle, Vertraulichkeit, Integritaet, Verfuegbarkeit, Trennbarkeit)
+- Schutzniveau und Deployment-Modell
+
+---
+
+## 4. Auftragsverarbeiter (Art. 30 Abs. 2)
+
+Sofern **{{COMPANY_NAME}}** als Auftragsverarbeiter taetig ist, wird ein separates Verzeichnis nach Art. 30 Abs. 2 DSGVO gefuehrt. Dieses enthaelt:
+
+| Pflichtfeld (Art. 30 Abs. 2) | Beschreibung |
+|-------------------------------|-------------|
+| Name und Kontaktdaten des Auftragsverarbeiters | {{COMPANY_NAME}} |
+| Kategorien von Verarbeitungen | Art der im Auftrag durchgefuehrten Verarbeitungen |
+| Name und Kontaktdaten des Verantwortlichen | Auftraggeber |
+| Uebermittlungen in Drittlaender | Zielland, Empfaenger, Garantien |
+| Technische und organisatorische Massnahmen | Art. 32 DSGVO |
+
+---
+
+## 5. TOM-Beschreibung (Art. 32 DSGVO)
+
+Fuer jede Verarbeitungstaetigkeit werden die technischen und organisatorischen Massnahmen dokumentiert:
+
+| Kategorie | Beschreibung |
+|-----------|-------------|
+| **Zugriffskontrolle** | Massnahmen zur Steuerung des Zugriffs auf personenbezogene Daten |
+| **Vertraulichkeit** | Verschluesselung, Pseudonymisierung, Zutrittskontrolle |
+| **Integritaet** | Eingabekontrolle, Weitergabekontrolle, Protokollierung |
+| **Verfuegbarkeit** | Backup, Redundanz, Disaster Recovery |
+| **Trennbarkeit** | Mandantentrennung, Zweckbindung |
+
+**Verweis:** Die vollstaendige TOM-Dokumentation wird im separaten TOM-Modul gefuehrt und hier je Verarbeitungstaetigkeit referenziert.
+
+---
+
+## 6. Pruefverfahren und Revision
+
+| Eigenschaft | Wert |
+|-------------|------|
+| Pruefintervall | Jaehrlich |
+| Letzte Pruefung | {{VERSION_DATE}} |
+| Naechste Pruefung | {{NEXT_REVIEW_DATE}} |
+| Aktuelle Version | {{DOCUMENT_VERSION}} |
+
+### Pruefpunkte
+
+Bei jeder Pruefung wird das VVT auf folgende Punkte ueberprueft:
+
+- Vollstaendigkeit: Sind alle Verarbeitungstaetigkeiten erfasst?
+- Aktualitaet: Stimmen die Angaben noch mit der Praxis ueberein?
+- Art. 30-Konformitaet: Enthalten alle Eintraege die Pflichtangaben?
+- Art. 9-Kennzeichnung: Sind besondere Kategorien korrekt markiert?
+- Drittlandtransfers: Sind Transfermechanismen dokumentiert?
+- Loeschfristen: Sind Aufbewahrungsfristen definiert und aktuell?
+- TOM-Verweise: Sind Massnahmen je Verarbeitung beschrieben?
+
+---
+
+*Dieses Dokument wird automatisch vom VVT-Modul generiert und enthaelt alle erfassten Verarbeitungstaetigkeiten mit vollstaendigen Pflichtangaben nach Art. 30 DSGVO.*
+
+*Erstellt mit BreakPilot Compliance — {{COMPANY_NAME}} | Stand: {{VERSION_DATE}} | Version {{DOCUMENT_VERSION}}*
+$template$,
+    '["COMPANY_NAME","DPO_NAME","DPO_CONTACT","RESPONSIBLE_PERSON","DOCUMENT_VERSION","VERSION_DATE","NEXT_REVIEW_DATE"]'::jsonb,
+    'de', 'DE',
+    'mit', 'MIT License', 'BreakPilot Compliance',
+    false, true, '1.0.0', 'published',
+    NOW(), NOW()
+WHERE NOT EXISTS (
+    SELECT 1 FROM compliance_legal_templates
+    WHERE document_type = 'vvt_register'
+    AND tenant_id = '9282a473-5c95-4b3a-bf78-0ecc0ec71d3e'
+);
+
+-- ===========================================================================
+-- Template 2: TOM — TOM-Dokumentation (Art. 32 DSGVO)
+-- ===========================================================================
+INSERT INTO compliance_legal_templates (
+    id, tenant_id, document_type, title, description, content,
+    placeholders, language, jurisdiction,
+    license_id, license_name, source_name,
+    attribution_required, is_complete_document, version, status,
+    created_at, updated_at
+) SELECT
+    gen_random_uuid(),
+    '9282a473-5c95-4b3a-bf78-0ecc0ec71d3e',
+    'tom_documentation',
+    'TOM-Dokumentation (Art. 32 DSGVO)',
+    'Dokumentation aller technischen und organisatorischen Massnahmen gemaess Art. 32 DSGVO. Umfasst Schutzbedarf, Risikoprofil, Massnahmenkatalog nach Kategorie, SDM-Gewaehrleistungsziele und Compliance-Status.',
+    $template$# TOM-Dokumentation (Art. 32 DSGVO)
+
+## Dokumentenkontrolle
+
+| Feld | Wert |
+|------|------|
+| Unternehmen | {{COMPANY_NAME}} |
+| Dokumenttyp | Technische und Organisatorische Massnahmen |
+| Version | {{DOCUMENT_VERSION}} |
+| Datum | {{VERSION_DATE}} |
+| Klassifizierung | Vertraulich |
+| IT-Sicherheitsbeauftragter | {{ISB_NAME}} |
+| Datenschutzbeauftragter | {{DPO_NAME}} |
+| Geschaeftsfuehrung | {{GF_NAME}} |
+| Naechste Pruefung | {{NEXT_REVIEW_DATE}} |
+
+### Aenderungshistorie
+
+| Version | Datum | Autor | Aenderung |
+|---------|-------|-------|-----------|
+| {{DOCUMENT_VERSION}} | {{VERSION_DATE}} | {{ISB_NAME}} | Erstfassung |
+
+---
+
+## 1. Ziel und Zweck
+
+Diese TOM-Dokumentation beschreibt die technischen und organisatorischen Massnahmen zum Schutz personenbezogener Daten bei **{{COMPANY_NAME}}**. Sie dient der Umsetzung folgender DSGVO-Anforderungen:
+
+| Rechtsgrundlage | Inhalt |
+|-----------------|--------|
+| **Art. 32 Abs. 1 lit. a DSGVO** | Pseudonymisierung und Verschluesselung personenbezogener Daten |
+| **Art. 32 Abs. 1 lit. b DSGVO** | Vertraulichkeit, Integritaet, Verfuegbarkeit und Belastbarkeit der Systeme auf Dauer sicherstellen |
+| **Art. 32 Abs. 1 lit. c DSGVO** | Rasche Wiederherstellung der Verfuegbarkeit bei physischem oder technischem Zwischenfall |
+| **Art. 32 Abs. 1 lit. d DSGVO** | Regelmaessige Ueberpruefung, Bewertung und Evaluierung der Wirksamkeit der Massnahmen |
+
+Die TOM-Dokumentation ist fester Bestandteil des Datenschutz-Managementsystems und wird regelmaessig ueberprueft und aktualisiert.
+
+---
+
+## 2. Geltungsbereich
+
+Diese TOM-Dokumentation gilt fuer alle IT-Systeme, Anwendungen und Verarbeitungsprozesse von **{{COMPANY_NAME}}**. Die dokumentierten Massnahmen stammen aus zwei Quellen:
+
+- **Embedded Library (TOM-xxx):** Integrierte Kontrollbibliothek mit spezifischen Massnahmen fuer Art. 32 DSGVO
+- **Canonical Control Library (CP-CLIB):** Uebergreifende Kontrollbibliothek mit framework-uebergreifenden Massnahmen
+
+---
+
+## 3. Grundprinzipien Art. 32
+
+- **Vertraulichkeit:** Schutz personenbezogener Daten vor unbefugter Kenntnisnahme durch Zutrittskontrolle, Zugangskontrolle, Zugriffskontrolle und Verschluesselung (Art. 32 Abs. 1 lit. b DSGVO).
+- **Integritaet:** Sicherstellung, dass personenbezogene Daten nicht unbefugt oder unbeabsichtigt veraendert werden koennen, durch Eingabekontrolle, Weitergabekontrolle und Protokollierung (Art. 32 Abs. 1 lit. b DSGVO).
+- **Verfuegbarkeit und Belastbarkeit:** Gewaehrleistung, dass Systeme und Dienste bei Lastspitzen und Stoerungen zuverlaessig funktionieren, durch Backup, Redundanz und Disaster Recovery (Art. 32 Abs. 1 lit. b DSGVO).
+- **Rasche Wiederherstellbarkeit:** Faehigkeit, nach einem physischen oder technischen Zwischenfall Daten und Systeme schnell wiederherzustellen, durch getestete Recovery-Prozesse (Art. 32 Abs. 1 lit. c DSGVO).
+- **Regelmaessige Wirksamkeitspruefung:** Verfahren zur regelmaessigen Ueberpruefung, Bewertung und Evaluierung der Wirksamkeit aller technischen und organisatorischen Massnahmen (Art. 32 Abs. 1 lit. d DSGVO).
+
+---
+
+## 4. Schutzbedarf und Risikoanalyse
+
+Die Schutzbedarfsanalyse bildet die Grundlage fuer die Auswahl und Priorisierung der Massnahmen.
+
+| Kriterium | Bewertung |
+|-----------|-----------|
+| Vertraulichkeit | *Wird vom TOM-Generator automatisch ermittelt* |
+| Integritaet | *Wird vom TOM-Generator automatisch ermittelt* |
+| Verfuegbarkeit | *Wird vom TOM-Generator automatisch ermittelt* |
+| Schutzniveau | *Basiert auf CIA-Bewertung* |
+| DSFA-Pflicht | *Wird automatisch berechnet* |
+
+**Hinweis:** Die detaillierte Schutzbedarfsanalyse wird im TOM-Modul ueber den Risiko-Wizard durchgefuehrt. Die Ergebnisse fliessen automatisch in die Massnahmenauswahl ein.
+
+---
+
+## 5. Massnahmenkatalog
+
+### 5.1 Zutrittskontrolle
+
+Massnahmen zur Verhinderung des unbefugten Zutritts zu Datenverarbeitungsanlagen.
+
+| Massnahme | Typ | Status | Verantwortlich |
+|-----------|-----|--------|----------------|
+| *Wird automatisch aus dem TOM-Modul befuellt* | | | |
+
+### 5.2 Zugangskontrolle
+
+Massnahmen zur Verhinderung der unbefugten Nutzung von Datenverarbeitungssystemen.
+
+| Massnahme | Typ | Status | Verantwortlich |
+|-----------|-----|--------|----------------|
+| *Wird automatisch aus dem TOM-Modul befuellt* | | | |
+
+### 5.3 Zugriffskontrolle
+
+Massnahmen, die gewaehrleisten, dass ausschliesslich berechtigte Personen auf Daten zugreifen koennen.
+
+| Massnahme | Typ | Status | Verantwortlich |
+|-----------|-----|--------|----------------|
+| *Wird automatisch aus dem TOM-Modul befuellt* | | | |
+
+### 5.4 Weitergabekontrolle
+
+Massnahmen zum Schutz personenbezogener Daten bei elektronischer Uebertragung und Transport.
+
+| Massnahme | Typ | Status | Verantwortlich |
+|-----------|-----|--------|----------------|
+| *Wird automatisch aus dem TOM-Modul befuellt* | | | |
+
+### 5.5 Eingabekontrolle
+
+Massnahmen zur nachtraeglichen Ueberpruefung, ob und von wem Daten eingegeben, veraendert oder entfernt worden sind.
+
+| Massnahme | Typ | Status | Verantwortlich |
+|-----------|-----|--------|----------------|
+| *Wird automatisch aus dem TOM-Modul befuellt* | | | |
+
+### 5.6 Auftragskontrolle
+
+Massnahmen, die gewaehrleisten, dass personenbezogene Daten nur entsprechend den Weisungen des Auftraggebers verarbeitet werden.
+
+| Massnahme | Typ | Status | Verantwortlich |
+|-----------|-----|--------|----------------|
+| *Wird automatisch aus dem TOM-Modul befuellt* | | | |
+
+### 5.7 Verschluesselung und Pseudonymisierung
+
+Massnahmen zur Pseudonymisierung und Verschluesselung personenbezogener Daten (Art. 32 Abs. 1 lit. a DSGVO).
+
+| Massnahme | Typ | Status | Verantwortlich |
+|-----------|-----|--------|----------------|
+| *Wird automatisch aus dem TOM-Modul befuellt* | | | |
+
+### 5.8 Verfuegbarkeit und Belastbarkeit
+
+Massnahmen zur Gewaehrleistung der Verfuegbarkeit und Belastbarkeit der Systeme (Art. 32 Abs. 1 lit. b DSGVO).
+
+| Massnahme | Typ | Status | Verantwortlich |
+|-----------|-----|--------|----------------|
+| *Wird automatisch aus dem TOM-Modul befuellt* | | | |
+
+### 5.9 Wiederherstellbarkeit
+
+Massnahmen zur raschen Wiederherstellung der Verfuegbarkeit nach einem Zwischenfall (Art. 32 Abs. 1 lit. c DSGVO).
+
+| Massnahme | Typ | Status | Verantwortlich |
+|-----------|-----|--------|----------------|
+| *Wird automatisch aus dem TOM-Modul befuellt* | | | |
+
+### 5.10 Ueberpruefung und Bewertung
+
+Verfahren zur regelmaessigen Ueberpruefung, Bewertung und Evaluierung (Art. 32 Abs. 1 lit. d DSGVO).
+
+| Massnahme | Typ | Status | Verantwortlich |
+|-----------|-----|--------|----------------|
+| *Wird automatisch aus dem TOM-Modul befuellt* | | | |
+
+---
+
+## 6. SDM Gewaehrleistungsziele
+
+Das Standard-Datenschutzmodell (SDM) definiert sieben Gewaehrleistungsziele. Die implementierten Massnahmen decken folgende Ziele ab:
+
+| Gewaehrleistungsziel | Abgedeckt | Gesamt | Abdeckung (%) |
+|----------------------|-----------|--------|---------------|
+| Verfuegbarkeit | *automatisch* | | |
+| Integritaet | *automatisch* | | |
+| Vertraulichkeit | *automatisch* | | |
+| Nichtverkettung | *automatisch* | | |
+| Intervenierbarkeit | *automatisch* | | |
+| Transparenz | *automatisch* | | |
+| Datenminimierung | *automatisch* | | |
+
+---
+
+## 7. Verantwortlichkeiten
+
+| Rolle | Aufgabe |
+|-------|---------|
+| Geschaeftsfuehrung ({{GF_NAME}}) | Gesamtverantwortung, Freigabe der TOM-Dokumentation |
+| IT-Sicherheitsbeauftragter ({{ISB_NAME}}) | Pflege und Umsetzung technischer Massnahmen |
+| Datenschutzbeauftragter ({{DPO_NAME}}) | Ueberwachung, Beratung, Compliance-Check |
+| Fachabteilungen | Umsetzung organisatorischer Massnahmen, Meldepflicht |
+
+---
+
+## 8. Compliance-Status
+
+*Der aktuelle Compliance-Score wird vom TOM-Modul automatisch berechnet und enthaelt Befunde nach Schweregrad (Kritisch, Hoch, Mittel, Niedrig).*
+
+| Kennzahl | Wert |
+|----------|------|
+| Gepruefte Massnahmen | *automatisch* |
+| Bestanden | *automatisch* |
+| Beanstandungen | *automatisch* |
+
+---
+
+## 9. Pruef- und Revisionszyklus
+
+| Eigenschaft | Wert |
+|-------------|------|
+| Pruefintervall | Jaehrlich |
+| Letzte Pruefung | {{VERSION_DATE}} |
+| Naechste Pruefung | {{NEXT_REVIEW_DATE}} |
+| Aktuelle Version | {{DOCUMENT_VERSION}} |
+
+### Pruefpunkte
+
+- Vollstaendigkeit aller Massnahmen (neue Systeme oder Verarbeitungen erfasst?)
+- Aktualitaet des Umsetzungsstatus (Aenderungen seit letzter Pruefung?)
+- Wirksamkeit der technischen Massnahmen (Penetration-Tests, Audit-Ergebnisse)
+- Angemessenheit der organisatorischen Massnahmen (Schulungen, Richtlinien aktuell?)
+- Abdeckung aller SDM-Gewaehrleistungsziele
+- Zuordnung von Verantwortlichkeiten zu allen Massnahmen
+
+---
+
+*Dieses Dokument wird automatisch vom TOM-Modul generiert und enthaelt alle erfassten technischen und organisatorischen Massnahmen nach Art. 32 DSGVO.*
+
+*Erstellt mit BreakPilot Compliance — {{COMPANY_NAME}} | Stand: {{VERSION_DATE}} | Version {{DOCUMENT_VERSION}}*
+$template$,
+    '["COMPANY_NAME","ISB_NAME","GF_NAME","DPO_NAME","DOCUMENT_VERSION","VERSION_DATE","NEXT_REVIEW_DATE"]'::jsonb,
+    'de', 'DE',
+    'mit', 'MIT License', 'BreakPilot Compliance',
+    false, true, '1.0.0', 'published',
+    NOW(), NOW()
+WHERE NOT EXISTS (
+    SELECT 1 FROM compliance_legal_templates
+    WHERE document_type = 'tom_documentation'
+    AND tenant_id = '9282a473-5c95-4b3a-bf78-0ecc0ec71d3e'
+);
+
+-- ===========================================================================
+-- Template 3: Loeschkonzept (Art. 5/17 DSGVO)
+-- ===========================================================================
+INSERT INTO compliance_legal_templates (
+    id, tenant_id, document_type, title, description, content,
+    placeholders, language, jurisdiction,
+    license_id, license_name, source_name,
+    attribution_required, is_complete_document, version, status,
+    created_at, updated_at
+) SELECT
+    gen_random_uuid(),
+    '9282a473-5c95-4b3a-bf78-0ecc0ec71d3e',
+    'loeschkonzept',
+    'Loeschkonzept (Art. 5/17 DSGVO)',
+    'Systematisches Loeschkonzept gemaess Art. 5 Abs. 1 lit. e und Art. 17 DSGVO. Dokumentiert Loeschregeln, Aufbewahrungstreiber, Loeschmethoden, Legal Holds und Auftragsverarbeiter-Verknuepfungen.',
+    $template$# Loeschkonzept (Art. 5/17 DSGVO)
+
+## Dokumentenkontrolle
+
+| Feld | Wert |
+|------|------|
+| Unternehmen | {{COMPANY_NAME}} |
+| Dokumenttyp | Loeschkonzept |
+| Version | {{DOCUMENT_VERSION}} |
+| Datum | {{VERSION_DATE}} |
+| Klassifizierung | Vertraulich |
+| Datenschutzbeauftragter | {{DPO_NAME}} |
+| Kontakt DSB | {{DPO_CONTACT}} |
+| Naechste Pruefung | {{NEXT_REVIEW_DATE}} |
+
+### Aenderungshistorie
+
+| Version | Datum | Autor | Aenderung |
+|---------|-------|-------|-----------|
+| {{DOCUMENT_VERSION}} | {{VERSION_DATE}} | {{DPO_NAME}} | Erstfassung |
+
+---
+
+## 1. Ziel und Zweck
+
+Dieses Loeschkonzept definiert die systematischen Regeln und Verfahren fuer die Loeschung personenbezogener Daten bei **{{COMPANY_NAME}}**. Es dient der Umsetzung folgender DSGVO-Anforderungen:
+
+| Rechtsgrundlage | Inhalt |
+|-----------------|--------|
+| **Art. 5 Abs. 1 lit. e DSGVO** | Grundsatz der Speicherbegrenzung — Daten nur so lange speichern, wie fuer den Zweck erforderlich |
+| **Art. 17 DSGVO** | Recht auf Loeschung ("Recht auf Vergessenwerden") — Betroffene koennen Loeschung verlangen |
+| **Art. 30 DSGVO** | Verzeichnis von Verarbeitungstaetigkeiten — Loeschfristen muessen dokumentiert werden |
+| **Art. 25 DSGVO** | Datenschutz durch Technikgestaltung — Loeschmechanismen moeglichst automatisiert |
+
+Das Loeschkonzept ist fester Bestandteil des Datenschutz-Managementsystems und wird regelmaessig ueberprueft und aktualisiert.
+
+---
+
+## 2. Rechtsgrundlagen und Aufbewahrungstreiber
+
+### Gesetzliche Aufbewahrungspflichten
+
+| Aufbewahrungstreiber | Gesetz / Vorschrift | Frist |
+|----------------------|---------------------|-------|
+| Handelsrechtliche Aufbewahrung | § 257 HGB | 6 Jahre (Handelsbriefe), 10 Jahre (Buchungsbelege) |
+| Steuerrechtliche Aufbewahrung | § 147 AO | 6 Jahre (Geschaeftsbriefe), 10 Jahre (Buchungsbelege) |
+| Arbeitsrechtliche Aufbewahrung | Diverse arbeitsrechtliche Vorschriften | 3-10 Jahre je nach Dokumenttyp |
+| Sozialversicherungsrechtlich | §§ 28f, 110 SGB IV | 5 Jahre |
+| Produkthaftung | § 10 ProdHaftG | 10 Jahre |
+| Beweissicherung | §§ 195-199 BGB | 3 Jahre (regelmaessige Verjaehrung) |
+
+### 3-Level-Loeschlogik
+
+Die Loeschung folgt einer dreistufigen Priorisierung:
+
+1. **Zweckende:** Daten werden geloescht, sobald der Verarbeitungszweck entfaellt
+2. **Gesetzliche Aufbewahrungspflichten:** Laengere Fristen aus HGB, AO etc. ueberschreiben Zweckende
+3. **Legal Hold:** Aufbewahrungspflicht aufgrund rechtlicher Verfahren setzt alle anderen Fristen aus
+
+---
+
+## 3. Datenkategorien und Fristen
+
+### Loeschregeln-Uebersicht
+
+| LF-Nr. | Datenobjekt | Loeschtrigger | Aufbewahrungsfrist | Loeschmethode | Status |
+|--------|-------------|---------------|--------------------|--------------:|--------|
+| *Wird automatisch vom Loeschfristen-Modul befuellt* | | | | | |
+
+### Detaillierte Loeschregeln
+
+Fuer jede Loeschregel werden folgende Informationen dokumentiert:
+
+| Feld | Beschreibung |
+|------|-------------|
+| Beschreibung | Detaillierte Beschreibung der betroffenen Daten |
+| Betroffenengruppen | Kategorien betroffener Personen |
+| Datenkategorien | Art der personenbezogenen Daten |
+| Verarbeitungszweck | Primaerer Zweck der Datenverarbeitung |
+| Loeschtrigger | Ereignis, das die Loeschfrist ausloest |
+| Aufbewahrungstreiber | Gesetzliche Grundlage fuer die Aufbewahrung |
+| Aufbewahrungsfrist | Dauer der Aufbewahrung mit Einheit |
+| Startereignis | Beginn der Fristberechnung |
+| Loeschmethode | Technisches Verfahren (Loeschung, Anonymisierung, Vernichtung) |
+| Speicherorte | Betroffene Systeme und Datenbanken |
+| Verantwortlich | Person oder Rolle |
+| Pruefintervall | Frequenz der Kontrolle |
+
+---
+
+## 4. Loeschmethoden
+
+| Methode | Beschreibung | Anwendung |
+|---------|-------------|-----------|
+| **Physische Loeschung** | Unwiderrufliches Entfernen der Daten aus allen Systemen | Standard fuer nicht mehr benoetigte Daten |
+| **Anonymisierung** | Entfernen des Personenbezugs, sodass Daten nicht mehr zuordenbar sind | Statistik, Forschung, Archivierung |
+| **Pseudonymisierung** | Ersetzen identifizierender Merkmale durch Pseudonyme | Zwischenschritt, kein Ersatz fuer Loeschung |
+| **Physische Vernichtung** | Physische Zerstoerung der Datentraeger (Shredding, Degaussing) | Datentraeger-Entsorgung |
+| **Kryptographische Loeschung** | Vernichtung der Schluessel bei verschluesselten Daten | Cloud-Umgebungen, verschluesselte Backups |
+
+---
+
+## 5. Verantwortlichkeiten
+
+| Rolle | Aufgabe |
+|-------|---------|
+| Datenschutzbeauftragter ({{DPO_NAME}}) | Ueberwachung, Beratung, Compliance-Pruefung |
+| Fachabteilungen | Definition der Zweckende, Meldung neuer Datenkategorien |
+| IT-Abteilung | Technische Umsetzung der Loeschmechanismen |
+| Rechtsabteilung | Bewertung gesetzlicher Aufbewahrungspflichten, Legal Hold |
+
+---
+
+## 6. Legal Hold Verfahren
+
+Ein Legal Hold setzt die regulaere Loeschung aus. Betroffene Daten duerfen trotz abgelaufener Frist nicht geloescht werden, bis der Hold aufgehoben wird.
+
+### Verfahrensschritte
+
+1. Rechtsabteilung / DSB identifiziert betroffene Datenkategorien
+2. Legal Hold wird im System aktiviert (Status: Aktiv)
+3. Automatische Loeschung wird fuer betroffene Policies ausgesetzt
+4. Regelmaessige Pruefung, ob der Legal Hold noch erforderlich ist
+5. Nach Aufhebung: Regulaere Loeschfristen greifen wieder
+
+### Aktive Legal Holds
+
+*Wird automatisch vom Loeschfristen-Modul befuellt. Enthaelt: Datenobjekt, Grund, Rechtsgrundlage, Beginn, voraussichtliches Ende.*
+
+---
+
+## 7. Auftragsverarbeiter mit Loeschpflichten
+
+Loeschregeln, die mit Auftragsverarbeitern verknuepft sind, stellen sicher, dass auch bei extern verarbeiteten Daten die Loeschpflichten eingehalten werden (Art. 28 DSGVO).
+
+| Loeschregel | LF-Nr. | Auftragsverarbeiter | Aufbewahrungsfrist |
+|-------------|--------|--------------------|--------------------|
+| *Wird automatisch vom Loeschfristen-Modul befuellt* | | | |
+
+**Hinweis:** Die vollstaendige Auftragsverarbeiter-Dokumentation wird im Vendor-Compliance-Modul gefuehrt.
+
+---
+
+## 8. VVT-Verknuepfung
+
+Die Loeschregeln sind mit den Verarbeitungstaetigkeiten im Verarbeitungsverzeichnis (Art. 30 DSGVO) verknuepft:
+
+| Loeschregel | LF-Nr. | VVT-Nr. | Verarbeitungstaetigkeit |
+|-------------|--------|---------|-------------------------|
+| *Wird automatisch vom Loeschfristen-Modul befuellt* | | | |
+
+---
+
+## 9. Compliance-Status
+
+*Der aktuelle Compliance-Score wird vom Loeschfristen-Modul automatisch berechnet und enthaelt Befunde nach Schweregrad (Kritisch, Hoch, Mittel, Niedrig).*
+
+| Kennzahl | Wert |
+|----------|------|
+| Gepruefte Policies | *automatisch* |
+| Bestanden | *automatisch* |
+| Beanstandungen | *automatisch* |
+
+---
+
+## 10. Pruef- und Revisionszyklus
+
+| Eigenschaft | Wert |
+|-------------|------|
+| Pruefintervall | Jaehrlich |
+| Letzte Pruefung | {{VERSION_DATE}} |
+| Naechste Pruefung | {{NEXT_REVIEW_DATE}} |
+| Aktuelle Version | {{DOCUMENT_VERSION}} |
+
+### Pruefpunkte
+
+- Vollstaendigkeit aller Loeschregeln (neue Verarbeitungen erfasst?)
+- Aktualitaet der gesetzlichen Aufbewahrungsfristen
+- Wirksamkeit der technischen Loeschmechanismen
+- Einhaltung der definierten Loeschfristen
+- Angemessenheit der Verantwortlichkeiten
+- VVT-Verknuepfung vollstaendig?
+
+---
+
+*Dieses Dokument wird automatisch vom Loeschfristen-Modul generiert und enthaelt alle erfassten Loeschregeln mit Aufbewahrungstreibern, Fristen und Verantwortlichkeiten.*
+
+*Erstellt mit BreakPilot Compliance — {{COMPANY_NAME}} | Stand: {{VERSION_DATE}} | Version {{DOCUMENT_VERSION}}*
+$template$,
+    '["COMPANY_NAME","DPO_NAME","DPO_CONTACT","DOCUMENT_VERSION","VERSION_DATE","NEXT_REVIEW_DATE"]'::jsonb,
+    'de', 'DE',
+    'mit', 'MIT License', 'BreakPilot Compliance',
+    false, true, '1.0.0', 'published',
+    NOW(), NOW()
+WHERE NOT EXISTS (
+    SELECT 1 FROM compliance_legal_templates
+    WHERE document_type = 'loeschkonzept'
+    AND tenant_id = '9282a473-5c95-4b3a-bf78-0ecc0ec71d3e'
+);
+
+-- ===========================================================================
+-- Template 4: Pflichtenregister (DSGVO/AI-Act)
+-- ===========================================================================
+INSERT INTO compliance_legal_templates (
+    id, tenant_id, document_type, title, description, content,
+    placeholders, language, jurisdiction,
+    license_id, license_name, source_name,
+    attribution_required, is_complete_document, version, status,
+    created_at, updated_at
+) SELECT
+    gen_random_uuid(),
+    '9282a473-5c95-4b3a-bf78-0ecc0ec71d3e',
+    'pflichtenregister',
+    'Pflichtenregister (DSGVO/AI-Act)',
+    'Vollstaendiges Pflichtenregister fuer alle regulatorischen Pflichten aus DSGVO, AI Act, NIS2 und BDSG. Dokumentiert Pflichten, Verantwortlichkeiten, Fristen, Nachweise und Compliance-Status.',
+    $template$# Pflichtenregister (DSGVO / AI Act / NIS2)
+
+## Dokumentenkontrolle
+
+| Feld | Wert |
+|------|------|
+| Unternehmen | {{COMPANY_NAME}} |
+| Dokumenttyp | Pflichtenregister |
+| Version | {{DOCUMENT_VERSION}} |
+| Datum | {{VERSION_DATE}} |
+| Klassifizierung | Vertraulich |
+| Datenschutzbeauftragter | {{DPO_NAME}} |
+| Kontakt DSB | {{DPO_CONTACT}} |
+| Verantwortlicher | {{RESPONSIBLE_PERSON}} |
+| Rechtsabteilung | {{LEGAL_DEPARTMENT}} |
+| Naechste Pruefung | {{NEXT_REVIEW_DATE}} |
+
+### Aenderungshistorie
+
+| Version | Datum | Autor | Aenderung |
+|---------|-------|-------|-----------|
+| {{DOCUMENT_VERSION}} | {{VERSION_DATE}} | {{DPO_NAME}} | Erstfassung |
+
+---
+
+## 1. Ziel und Zweck
+
+Dieses Pflichtenregister dokumentiert alle regulatorischen Pflichten, denen **{{COMPANY_NAME}}** unterliegt. Es dient der systematischen Erfassung, Ueberwachung und Nachverfolgung aller Compliance-Anforderungen aus den anwendbaren Regulierungen.
+
+### Zwecke des Registers
+
+- Vollstaendige Erfassung aller anwendbaren regulatorischen Pflichten
+- Zuordnung von Verantwortlichkeiten und Fristen
+- Nachverfolgung des Umsetzungsstatus
+- Dokumentation von Nachweisen fuer Audits
+- Identifikation von Compliance-Luecken und Handlungsbedarf
+
+### Rechtsrahmen
+
+| Rechtsrahmen | Relevanz |
+|-------------|----------|
+| **DSGVO (EU) 2016/679** | Datenschutz-Grundverordnung — Kernregulierung fuer personenbezogene Daten |
+| **AI Act (EU) 2024/1689** | KI-Verordnung — Anforderungen an KI-Systeme nach Risikoklasse |
+| **NIS2 (EU) 2022/2555** | Netzwerk- und Informationssicherheit — Cybersicherheitspflichten |
+| **BDSG** | Bundesdatenschutzgesetz — Nationale Ergaenzung zur DSGVO |
+
+---
+
+## 2. Geltungsbereich
+
+Dieses Pflichtenregister gilt fuer alle Geschaeftsprozesse und IT-Systeme von **{{COMPANY_NAME}}**. Es umfasst Pflichten aus allen anwendbaren Regulierungen, gruppiert nach Rechtsquelle.
+
+### Anwendbare Regulierungen
+
+| Regulierung | Anzahl Pflichten | Status |
+|-------------|-----------------|--------|
+| *Wird automatisch vom Pflichtenregister-Modul befuellt* | | |
+
+---
+
+## 3. Methodik
+
+Die Identifikation und Bewertung der Pflichten erfolgt in drei Schritten:
+
+1. **Pflicht-Identifikation:** Systematische Analyse aller anwendbaren Regulierungen und Extraktion der einzelnen Pflichten mit Artikel-Referenz, Beschreibung und Zielgruppe.
+2. **Bewertung und Priorisierung:** Jede Pflicht wird nach Prioritaet (kritisch, hoch, mittel, niedrig) und Dringlichkeit (Frist) bewertet. Die Bewertung basiert auf dem Risikopotenzial bei Nichterfuellung.
+3. **Ueberwachung und Nachverfolgung:** Regelmaessige Pruefung des Umsetzungsstatus, Aktualisierung der Fristen und Dokumentation von Nachweisen.
+
+Die Pflichten werden ueber einen automatisierten Compliance-Check geprueft, der 11 Kriterien umfasst (siehe Abschnitt 10: Compliance-Status).
+
+---
+
+## 4. Regulatorische Grundlagen
+
+| Regulierung | Pflichten | Kritisch | Hoch | Mittel | Niedrig | Abgeschlossen |
+|-------------|----------|----------|------|--------|---------|---------------|
+| *Wird automatisch vom Pflichtenregister-Modul befuellt* | | | | | | |
+
+---
+
+## 5. Pflichtenuebersicht
+
+Uebersicht aller Pflichten nach Regulierung und Status:
+
+| Regulierung | Gesamt | Ausstehend | In Bearbeitung | Abgeschlossen | Ueberfaellig |
+|-------------|--------|------------|----------------|---------------|--------------|
+| *Wird automatisch vom Pflichtenregister-Modul befuellt* | | | | | |
+
+---
+
+## 6. Detaillierte Pflichten
+
+Fuer jede Pflicht werden folgende Informationen als Detailkarte dokumentiert:
+
+| Feld | Beschreibung |
+|------|-------------|
+| Rechtsquelle | Regulierung und Artikel-Referenz |
+| Beschreibung | Detaillierte Beschreibung der Pflicht |
+| Prioritaet | Kritisch / Hoch / Mittel / Niedrig |
+| Status | Ausstehend / In Bearbeitung / Abgeschlossen / Ueberfaellig |
+| Verantwortlich | Person oder Abteilung |
+| Frist | Umsetzungsfrist |
+| Nachweise | Dokumentierte Belege fuer die Umsetzung |
+| Betroffene Systeme | IT-Systeme, die von der Pflicht betroffen sind |
+| Notizen | Zusaetzliche Anmerkungen und Handlungsempfehlungen |
+
+### Pflichten nach Regulierung
+
+*Die einzelnen Pflichten werden vom Pflichtenregister-Modul automatisch nach Rechtsquelle gruppiert und als Detailkarten mit allen Feldern in das Dokument eingefuegt. Die Sortierung erfolgt nach Prioritaet (kritisch zuerst).*
+
+---
+
+## 7. Verantwortlichkeiten
+
+| Verantwortlich | Pflichten | Anzahl | Davon offen |
+|----------------|----------|--------|-------------|
+| *Wird automatisch vom Pflichtenregister-Modul befuellt* | | | |
+
+### Rollenmatrix
+
+| Rolle | Aufgabe |
+|-------|---------|
+| Verantwortlicher ({{RESPONSIBLE_PERSON}}) | Gesamtverantwortung fuer Compliance |
+| Datenschutzbeauftragter ({{DPO_NAME}}) | Ueberwachung DSGVO-Pflichten, Beratung |
+| Rechtsabteilung ({{LEGAL_DEPARTMENT}}) | Bewertung regulatorischer Aenderungen, NIS2/AI-Act |
+| Fachabteilungen | Umsetzung zugewiesener Pflichten |
+| IT-Abteilung | Umsetzung technischer Anforderungen |
+
+---
+
+## 8. Fristen-Uebersicht
+
+### Ueberfaellige Pflichten
+
+| Pflicht | Regulierung | Frist | Tage ueberfaellig | Prioritaet |
+|---------|-------------|-------|--------------------:|-----------|
+| *Wird automatisch vom Pflichtenregister-Modul befuellt* | | | | |
+
+### Anstehende Fristen
+
+| Pflicht | Regulierung | Frist | Verbleibend | Verantwortlich |
+|---------|-------------|-------|-------------|----------------|
+| *Wird automatisch vom Pflichtenregister-Modul befuellt* | | | | |
+
+---
+
+## 9. Nachweisregister
+
+Dokumentation der Nachweise (Evidence) fuer die Umsetzung der Pflichten:
+
+| Pflicht | Regulierung | Nachweise | Status |
+|---------|-------------|-----------|--------|
+| *Wird automatisch vom Pflichtenregister-Modul befuellt* | | | |
+
+### Pflichten ohne Nachweise
+
+*Das Modul identifiziert automatisch alle Pflichten, fuer die noch keine Nachweise hinterlegt wurden, und listet diese als Handlungsbedarf auf.*
+
+---
+
+## 10. Compliance-Status
+
+*Der aktuelle Compliance-Score wird vom Pflichtenregister-Modul automatisch berechnet. Der Check umfasst 11 Kriterien und bewertet Befunde nach Schweregrad (Kritisch, Hoch, Mittel, Niedrig).*
+
+| Kennzahl | Wert |
+|----------|------|
+| Compliance-Score | *automatisch (0-100)* |
+| Befunde gesamt | *automatisch* |
+| Kritisch | *automatisch* |
+| Hoch | *automatisch* |
+| Mittel | *automatisch* |
+| Niedrig | *automatisch* |
+
+### Befunde und Empfehlungen
+
+| Schweregrad | Befund | Betroffene Pflichten | Empfehlung |
+|-------------|--------|---------------------|------------|
+| *Wird automatisch vom Compliance-Check befuellt* | | | |
+
+---
+
+## 11. Pruef- und Revisionszyklus
+
+| Eigenschaft | Wert |
+|-------------|------|
+| Pruefintervall | Jaehrlich |
+| Letzte Pruefung | {{VERSION_DATE}} |
+| Naechste Pruefung | {{NEXT_REVIEW_DATE}} |
+| Aktuelle Version | {{DOCUMENT_VERSION}} |
+
+### Pruefpunkte
+
+- Vollstaendigkeit: Sind alle anwendbaren Pflichten erfasst?
+- Aktualitaet: Gibt es neue Regulierungen oder Gesetzesaenderungen?
+- Umsetzungsstatus: Sind ueberfaellige Pflichten eskaliert?
+- Nachweise: Sind fuer alle abgeschlossenen Pflichten Belege hinterlegt?
+- Verantwortlichkeiten: Sind alle Pflichten zugewiesen?
+- Fristen: Sind neue Fristen aus Gesetzesaenderungen beruecksichtigt?
+
+---
+
+*Dieses Dokument wird automatisch vom Pflichtenregister-Modul generiert und enthaelt alle erfassten regulatorischen Pflichten mit Verantwortlichkeiten, Fristen und Nachweisen.*
+
+*Erstellt mit BreakPilot Compliance — {{COMPANY_NAME}} | Stand: {{VERSION_DATE}} | Version {{DOCUMENT_VERSION}}*
+$template$,
+    '["COMPANY_NAME","DPO_NAME","DPO_CONTACT","RESPONSIBLE_PERSON","LEGAL_DEPARTMENT","DOCUMENT_VERSION","VERSION_DATE","NEXT_REVIEW_DATE"]'::jsonb,
+    'de', 'DE',
+    'mit', 'MIT License', 'BreakPilot Compliance',
+    false, true, '1.0.0', 'published',
+    NOW(), NOW()
+WHERE NOT EXISTS (
+    SELECT 1 FROM compliance_legal_templates
+    WHERE document_type = 'pflichtenregister'
+    AND tenant_id = '9282a473-5c95-4b3a-bf78-0ecc0ec71d3e'
+);
@@ -0,0 +1,73 @@
+-- Migration 074: Control Dedup Engine — DB Schema
+-- Supports the 4-stage dedup pipeline for atomic controls (Pass 0b).
+--
+-- Tables:
+--   1. control_parent_links    — M:N parent linking (one control → many regulations)
+--   2. control_dedup_reviews   — Review queue for borderline matches (0.85-0.92)
+
+BEGIN;
+
+-- =============================================================================
+-- 1. Control Parent Links (M:N)
+--    Enables "1 Control erfuellt 5 Gesetze" — the biggest USP.
+--    An atomic control can have multiple parent controls from different
+--    regulations/obligations. This replaces the 1:1 parent_control_uuid FK.
+-- =============================================================================
+
+CREATE TABLE IF NOT EXISTS control_parent_links (
+    id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
+    control_uuid UUID NOT NULL REFERENCES canonical_controls(id) ON DELETE CASCADE,
+    parent_control_uuid UUID NOT NULL REFERENCES canonical_controls(id) ON DELETE CASCADE,
+    link_type VARCHAR(30) NOT NULL DEFAULT 'decomposition'
+        CHECK (link_type IN ('decomposition', 'dedup_merge', 'manual', 'crosswalk')),
+    confidence NUMERIC(3,2) DEFAULT 1.0
+        CHECK (confidence >= 0 AND confidence <= 1),
+    source_regulation VARCHAR(100),
+    source_article VARCHAR(100),
+    obligation_candidate_id UUID REFERENCES obligation_candidates(id),
+    created_at TIMESTAMPTZ DEFAULT NOW(),
+    CONSTRAINT uq_parent_link UNIQUE (control_uuid, parent_control_uuid)
+);
+
+CREATE INDEX IF NOT EXISTS idx_cpl_control ON control_parent_links(control_uuid);
+CREATE INDEX IF NOT EXISTS idx_cpl_parent ON control_parent_links(parent_control_uuid);
+CREATE INDEX IF NOT EXISTS idx_cpl_type ON control_parent_links(link_type);
+
+COMMENT ON TABLE control_parent_links IS
+    'M:N parent links — one atomic control can fulfill multiple regulations/obligations. USP: "1 Control erfuellt 5 Gesetze"';
+
+-- =============================================================================
+-- 2. Control Dedup Reviews
+--    Queue for borderline matches (similarity 0.85-0.92) that need human review.
+--    Reviewed entries get status updated to accepted/rejected.
+-- =============================================================================
+
+CREATE TABLE IF NOT EXISTS control_dedup_reviews (
+    id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
+    candidate_control_id VARCHAR(30) NOT NULL,
+    candidate_title TEXT NOT NULL,
+    candidate_objective TEXT,
+    matched_control_uuid UUID REFERENCES canonical_controls(id),
+    matched_control_id VARCHAR(30),
+    similarity_score NUMERIC(4,3) DEFAULT 0.0,
+    dedup_stage VARCHAR(40) NOT NULL,
+    dedup_details JSONB DEFAULT '{}',
+    parent_control_uuid UUID REFERENCES canonical_controls(id),
+    obligation_candidate_id UUID REFERENCES obligation_candidates(id),
+    review_status VARCHAR(20) DEFAULT 'pending'
+        CHECK (review_status IN ('pending', 'accepted_link', 'accepted_new', 'rejected')),
+    reviewed_by VARCHAR(100),
+    reviewed_at TIMESTAMPTZ,
+    review_notes TEXT,
+    created_at TIMESTAMPTZ DEFAULT NOW()
+);
+
+CREATE INDEX IF NOT EXISTS idx_cdr_status ON control_dedup_reviews(review_status);
+CREATE INDEX IF NOT EXISTS idx_cdr_matched ON control_dedup_reviews(matched_control_uuid);
+CREATE INDEX IF NOT EXISTS idx_cdr_parent ON control_dedup_reviews(parent_control_uuid);
+CREATE INDEX IF NOT EXISTS idx_cdr_stage ON control_dedup_reviews(dedup_stage);
+
+COMMENT ON TABLE control_dedup_reviews IS
+    'Review queue for borderline dedup matches (similarity 0.85-0.92). Human decides: link or new control.';
+
+COMMIT;
@@ -0,0 +1,38 @@
+-- Migration 075: Obligation Refinement Fields
+-- Supports Merge Pass (implementation-level dedup) and metadata enrichment.
+--
+-- New fields:
+--   merged_into_id    — points to survivor obligation when merged
+--   trigger_type      — event / periodic / continuous
+--   is_implementation_specific — true if obligation references concrete tool/protocol
+
+-- =============================================================================
+-- 1. Add merge tracking
+-- =============================================================================
+
+ALTER TABLE obligation_candidates
+    ADD COLUMN IF NOT EXISTS merged_into_id UUID
+        REFERENCES obligation_candidates(id);
+
+CREATE INDEX IF NOT EXISTS idx_oc_merged_into
+    ON obligation_candidates(merged_into_id)
+    WHERE merged_into_id IS NOT NULL;
+
+-- Allow 'merged' as release_state
+ALTER TABLE obligation_candidates
+    DROP CONSTRAINT IF EXISTS obligation_candidates_release_state_check;
+
+ALTER TABLE obligation_candidates
+    ADD CONSTRAINT obligation_candidates_release_state_check
+        CHECK (release_state IN ('extracted', 'validated', 'rejected', 'composed', 'merged'));
+
+-- =============================================================================
+-- 2. Add enrichment metadata
+-- =============================================================================
+
+ALTER TABLE obligation_candidates
+    ADD COLUMN IF NOT EXISTS trigger_type VARCHAR(20) DEFAULT NULL
+        CHECK (trigger_type IS NULL OR trigger_type IN ('event', 'periodic', 'continuous'));
+
+ALTER TABLE obligation_candidates
+    ADD COLUMN IF NOT EXISTS is_implementation_specific BOOLEAN DEFAULT FALSE;
@@ -0,0 +1,125 @@
+-- Migration 076: Anti-Fake-Evidence Guardrails (Phase 1)
+--
+-- Prevents "Compliance-Theater": generated content passed off as real evidence,
+-- controls without evidence marked as "pass", unvalidated 100% compliance claims.
+--
+-- Changes:
+--   1. New ENUM types for evidence confidence + truth status
+--   2. New columns on compliance_evidence (confidence, truth, review tracking)
+--   3. New value 'in_progress' for controlstatusenum
+--   4. status_justification column on compliance_controls
+--   5. New table compliance_llm_generation_audit
+--   6. Backfill existing evidence based on source
+--   7. Indexes on new columns
+
+-- ============================================================================
+-- 1. New ENUM types
+-- ============================================================================
+
+-- NOTE: CREATE TYPE cannot run inside a transaction block when combined with
+-- ALTER TYPE ... ADD VALUE.  Each statement here is auto-committed separately
+-- when executed outside a transaction (which is the default for psql scripts).
+
+CREATE TYPE evidence_confidence_level AS ENUM (
+    'E0',   -- Generated / no real evidence (LLM output, placeholder)
+    'E1',   -- Uploaded but unreviewed (manual upload, no hash, no reviewer)
+    'E2',   -- Reviewed internally (human reviewed, hash verified)
+    'E3',   -- Observed by system (CI/CD pipeline, API with hash)
+    'E4'    -- Validated by external auditor
+);
+
+CREATE TYPE evidence_truth_status AS ENUM (
+    'generated',              -- Created by LLM / system generation
+    'uploaded',               -- Manually uploaded by user
+    'observed',               -- Automatically observed (CI/CD, monitoring)
+    'validated_internal',     -- Reviewed + approved by internal reviewer
+    'rejected',               -- Reviewed and rejected
+    'provided_to_auditor',    -- Shared with external auditor
+    'accepted_by_auditor'     -- Accepted by external auditor
+);
+
+-- ============================================================================
+-- 2. Add 'in_progress' to controlstatusenum
+-- ============================================================================
+-- ALTER TYPE ... ADD VALUE cannot run inside a transaction.
+
+ALTER TYPE controlstatusenum ADD VALUE IF NOT EXISTS 'in_progress';
+
+-- ============================================================================
+-- 3. New columns on compliance_evidence
+-- ============================================================================
+
+ALTER TABLE compliance_evidence
+    ADD COLUMN IF NOT EXISTS confidence_level evidence_confidence_level DEFAULT 'E1',
+    ADD COLUMN IF NOT EXISTS truth_status evidence_truth_status DEFAULT 'uploaded',
+    ADD COLUMN IF NOT EXISTS generation_mode VARCHAR(100),
+    ADD COLUMN IF NOT EXISTS may_be_used_as_evidence BOOLEAN DEFAULT TRUE,
+    ADD COLUMN IF NOT EXISTS reviewed_by VARCHAR(200),
+    ADD COLUMN IF NOT EXISTS reviewed_at TIMESTAMPTZ;
+
+-- ============================================================================
+-- 4. status_justification on compliance_controls
+-- ============================================================================
+
+ALTER TABLE compliance_controls
+    ADD COLUMN IF NOT EXISTS status_justification TEXT;
+
+-- ============================================================================
+-- 5. LLM Generation Audit table
+-- ============================================================================
+
+CREATE TABLE IF NOT EXISTS compliance_llm_generation_audit (
+    id              VARCHAR(36) PRIMARY KEY DEFAULT gen_random_uuid()::text,
+    tenant_id       VARCHAR(36),
+    entity_type     VARCHAR(50) NOT NULL,      -- 'evidence', 'control', 'document', ...
+    entity_id       VARCHAR(36),               -- FK to the generated entity
+    generation_mode VARCHAR(100) NOT NULL,      -- 'draft_assistance', 'auto_generation', ...
+    truth_status    evidence_truth_status NOT NULL DEFAULT 'generated',
+    may_be_used_as_evidence BOOLEAN NOT NULL DEFAULT FALSE,
+    llm_model       VARCHAR(100),
+    llm_provider    VARCHAR(50),               -- 'ollama', 'anthropic', ...
+    prompt_hash     VARCHAR(64),               -- SHA-256 of the prompt
+    input_summary   TEXT,                      -- Truncated input for auditability
+    output_summary  TEXT,                      -- Truncated output for auditability
+    metadata        JSONB DEFAULT '{}'::jsonb,
+    created_at      TIMESTAMPTZ DEFAULT NOW(),
+    updated_at      TIMESTAMPTZ DEFAULT NOW()
+);
+
+-- ============================================================================
+-- 6. Backfill existing evidence based on source
+-- ============================================================================
+
+-- CI pipeline evidence → E3 + observed
+UPDATE compliance_evidence
+SET confidence_level = 'E3',
+    truth_status = 'observed'
+WHERE source = 'ci_pipeline'
+  AND confidence_level = 'E1';
+
+-- API evidence → E3 + observed
+UPDATE compliance_evidence
+SET confidence_level = 'E3',
+    truth_status = 'observed'
+WHERE source = 'api'
+  AND confidence_level = 'E1';
+
+-- Manual/upload evidence stays at E1 + uploaded (default)
+
+-- Generated evidence → E0 + generated
+UPDATE compliance_evidence
+SET confidence_level = 'E0',
+    truth_status = 'generated',
+    may_be_used_as_evidence = FALSE
+WHERE source = 'generated'
+  AND confidence_level = 'E1';
+
+-- ============================================================================
+-- 7. Indexes
+-- ============================================================================
+
+CREATE INDEX IF NOT EXISTS ix_evidence_confidence ON compliance_evidence (confidence_level);
+CREATE INDEX IF NOT EXISTS ix_evidence_truth_status ON compliance_evidence (truth_status);
+CREATE INDEX IF NOT EXISTS ix_evidence_may_be_used ON compliance_evidence (may_be_used_as_evidence);
+CREATE INDEX IF NOT EXISTS ix_llm_audit_entity ON compliance_llm_generation_audit (entity_type, entity_id);
+CREATE INDEX IF NOT EXISTS ix_llm_audit_tenant ON compliance_llm_generation_audit (tenant_id);
@@ -0,0 +1,37 @@
+-- Migration 077: Anti-Fake-Evidence Phase 2
+-- Assertions table, Four-Eyes columns on Evidence, Audit-Trail performance index
+
+-- 1A. Assertions table
+CREATE TABLE IF NOT EXISTS compliance_assertions (
+    id              VARCHAR(36) PRIMARY KEY DEFAULT gen_random_uuid()::text,
+    tenant_id       VARCHAR(36),
+    entity_type     VARCHAR(50) NOT NULL,
+    entity_id       VARCHAR(36) NOT NULL,
+    sentence_text   TEXT NOT NULL,
+    sentence_index  INTEGER NOT NULL DEFAULT 0,
+    assertion_type  VARCHAR(20) NOT NULL DEFAULT 'assertion',
+    evidence_ids    JSONB DEFAULT '[]'::jsonb,
+    confidence      FLOAT DEFAULT 0.0,
+    normative_tier  VARCHAR(20),
+    verified_by     VARCHAR(200),
+    verified_at     TIMESTAMPTZ,
+    created_at      TIMESTAMPTZ DEFAULT NOW(),
+    updated_at      TIMESTAMPTZ DEFAULT NOW()
+);
+CREATE INDEX IF NOT EXISTS ix_assertion_entity ON compliance_assertions (entity_type, entity_id);
+CREATE INDEX IF NOT EXISTS ix_assertion_type ON compliance_assertions (assertion_type);
+CREATE INDEX IF NOT EXISTS ix_assertion_tenant ON compliance_assertions (tenant_id);
+
+-- 1B. Four-Eyes columns on Evidence
+ALTER TABLE compliance_evidence
+    ADD COLUMN IF NOT EXISTS approval_status VARCHAR(30) DEFAULT 'none',
+    ADD COLUMN IF NOT EXISTS first_reviewer VARCHAR(200),
+    ADD COLUMN IF NOT EXISTS first_reviewed_at TIMESTAMPTZ,
+    ADD COLUMN IF NOT EXISTS second_reviewer VARCHAR(200),
+    ADD COLUMN IF NOT EXISTS second_reviewed_at TIMESTAMPTZ,
+    ADD COLUMN IF NOT EXISTS requires_four_eyes BOOLEAN DEFAULT FALSE;
+CREATE INDEX IF NOT EXISTS ix_evidence_approval_status ON compliance_evidence (approval_status);
+
+-- 1C. Audit-Trail performance index
+CREATE INDEX IF NOT EXISTS ix_audit_trail_entity_action
+    ON compliance_audit_trail (entity_type, action, performed_at);
@@ -0,0 +1,42 @@
+-- Migration 078: Batch Dedup — Schema extensions for 85k→~18-25k reduction
+-- Adds merged_into_uuid tracking, performance indexes for batch dedup,
+-- and extends link_type CHECK to include 'cross_regulation'.
+
+BEGIN;
+
+-- =============================================================================
+-- 1. merged_into_uuid: Track which master a duplicate was merged into
+-- =============================================================================
+
+ALTER TABLE canonical_controls
+    ADD COLUMN IF NOT EXISTS merged_into_uuid UUID REFERENCES canonical_controls(id);
+
+CREATE INDEX IF NOT EXISTS idx_cc_merged_into
+    ON canonical_controls(merged_into_uuid) WHERE merged_into_uuid IS NOT NULL;
+
+-- =============================================================================
+-- 2. Performance indexes for batch dedup queries
+-- =============================================================================
+
+-- Index on merge_group_hint inside generation_metadata (for sub-grouping)
+CREATE INDEX IF NOT EXISTS idx_cc_merge_group_hint
+    ON canonical_controls ((generation_metadata->>'merge_group_hint'))
+    WHERE decomposition_method = 'pass0b';
+
+-- Composite index for pattern-based dedup loading
+CREATE INDEX IF NOT EXISTS idx_cc_pattern_dedup
+    ON canonical_controls (pattern_id, release_state)
+    WHERE decomposition_method = 'pass0b';
+
+-- =============================================================================
+-- 3. Extend link_type CHECK to include 'cross_regulation'
+-- =============================================================================
+
+ALTER TABLE control_parent_links
+    DROP CONSTRAINT IF EXISTS control_parent_links_link_type_check;
+
+ALTER TABLE control_parent_links
+    ADD CONSTRAINT control_parent_links_link_type_check
+        CHECK (link_type IN ('decomposition', 'dedup_merge', 'manual', 'crosswalk', 'cross_regulation'));
+
+COMMIT;
@@ -0,0 +1,16 @@
+-- Migration 079: Add evidence_type to canonical_controls
+-- Classifies HOW a control is evidenced:
+--   code    = Technical control, verifiable in source code / IaC / CI-CD
+--   process = Organizational / governance control, verified via documents / policies
+--   hybrid  = Both code and process evidence required
+
+DO $$
+BEGIN
+    IF EXISTS (SELECT 1 FROM information_schema.tables
+               WHERE table_schema = 'compliance' AND table_name = 'canonical_controls') THEN
+        ALTER TABLE canonical_controls ADD COLUMN IF NOT EXISTS
+            evidence_type VARCHAR(20) DEFAULT NULL
+            CHECK (evidence_type IN ('code', 'process', 'hybrid'));
+        CREATE INDEX IF NOT EXISTS idx_cc_evidence_type ON canonical_controls(evidence_type);
+    END IF;
+END $$;
@@ -0,0 +1,18 @@
+-- V1 Control Enrichment: Cross-reference table for matching
+-- Eigenentwicklung (v1, ungrouped, no source) → regulatorische Controls
+
+CREATE TABLE IF NOT EXISTS v1_control_matches (
+    id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
+    v1_control_uuid UUID NOT NULL REFERENCES canonical_controls(id) ON DELETE CASCADE,
+    matched_control_uuid UUID NOT NULL REFERENCES canonical_controls(id) ON DELETE CASCADE,
+    similarity_score NUMERIC(4,3) NOT NULL,
+    match_rank SMALLINT NOT NULL DEFAULT 1,
+    matched_source TEXT,           -- e.g. "DSGVO (EU) 2016/679"
+    matched_article TEXT,          -- e.g. "Art. 32"
+    match_method VARCHAR(30) NOT NULL DEFAULT 'embedding',
+    created_at TIMESTAMPTZ DEFAULT NOW(),
+    CONSTRAINT uq_v1_match UNIQUE (v1_control_uuid, matched_control_uuid)
+);
+
+CREATE INDEX IF NOT EXISTS idx_v1m_v1 ON v1_control_matches(v1_control_uuid);
+CREATE INDEX IF NOT EXISTS idx_v1m_matched ON v1_control_matches(matched_control_uuid);
@@ -0,0 +1,11 @@
+-- Migration 081: Add 'duplicate' release_state for obligation deduplication
+--
+-- Allows marking duplicate obligation_candidates as 'duplicate' instead of
+-- deleting them, preserving traceability via merged_into_id.
+
+ALTER TABLE obligation_candidates
+    DROP CONSTRAINT IF EXISTS obligation_candidates_release_state_check;
+
+ALTER TABLE obligation_candidates
+    ADD CONSTRAINT obligation_candidates_release_state_check
+        CHECK (release_state IN ('extracted', 'validated', 'rejected', 'composed', 'merged', 'duplicate'));
@@ -0,0 +1,4 @@
+-- Widen source_article and source_regulation to TEXT to handle long NIST references
+-- e.g. "SC-22 (und weitere redaktionelle Änderungen SC-7, SC-14, SC-17, ...)"
+ALTER TABLE control_parent_links ALTER COLUMN source_article TYPE TEXT;
+ALTER TABLE control_parent_links ALTER COLUMN source_regulation TYPE TEXT;
@@ -0,0 +1,6 @@
+# Optional: Cross-Encoder Re-Ranking (CPU-only PyTorch)
+# Install separately: pip install -r requirements-reranker.txt
+# Enable at runtime: RERANK_ENABLED=true
+--extra-index-url https://download.pytorch.org/whl/cpu
+torch
+sentence-transformers>=3.0.0
@@ -22,6 +22,8 @@ python-multipart>=0.0.22
 # AI / Anthropic (compliance AI assistant)
 anthropic==0.75.0

+# Re-Ranking: see requirements-reranker.txt (optional, CPU-only PyTorch)
+
 # PDF Generation (GDPR export, audit reports)
 weasyprint>=68.0
 reportlab==4.2.5
@@ -0,0 +1,562 @@
+"""Tests for Anti-Fake-Evidence Phase 1 guardrails.
+
+~45 tests covering:
+- Evidence confidence classification
+- Evidence truth status classification
+- Control status transition state machine
+- Multi-dimensional compliance score
+- LLM generation audit
+- Evidence review endpoint
+"""
+
+from datetime import datetime, timedelta
+from unittest.mock import MagicMock, patch
+from fastapi import FastAPI
+from fastapi.testclient import TestClient
+
+from compliance.api.evidence_routes import router as evidence_router
+from compliance.api.llm_audit_routes import router as llm_audit_router
+from compliance.api.evidence_routes import _classify_confidence, _classify_truth_status
+from compliance.services.control_status_machine import validate_transition
+from compliance.db.models import (
+    EvidenceConfidenceEnum,
+    EvidenceTruthStatusEnum,
+    ControlStatusEnum,
+)
+from classroom_engine.database import get_db
+
+# ---------------------------------------------------------------------------
+# App setup with mocked DB dependency
+# ---------------------------------------------------------------------------
+
+app = FastAPI()
+app.include_router(evidence_router)
+app.include_router(llm_audit_router, prefix="/compliance")
+
+mock_db = MagicMock()
+
+
+def override_get_db():
+    yield mock_db
+
+
+app.dependency_overrides[get_db] = override_get_db
+client = TestClient(app)
+
+EVIDENCE_UUID = "eeeeeeee-aaaa-bbbb-cccc-ffffffffffff"
+CONTROL_UUID = "cccccccc-aaaa-bbbb-cccc-dddddddddddd"
+NOW = datetime(2026, 3, 23, 12, 0, 0)
+
+
+# ---------------------------------------------------------------------------
+# Helpers
+# ---------------------------------------------------------------------------
+
+def make_evidence(overrides=None):
+    e = MagicMock()
+    e.id = EVIDENCE_UUID
+    e.control_id = CONTROL_UUID
+    e.evidence_type = "test_results"
+    e.title = "Pytest Test Report"
+    e.description = "All tests passing"
+    e.artifact_url = "https://ci.example.com/job/123/artifact"
+    e.artifact_path = None
+    e.artifact_hash = "abc123def456"
+    e.file_size_bytes = None
+    e.mime_type = None
+    e.status = MagicMock()
+    e.status.value = "valid"
+    e.uploaded_by = None
+    e.source = "ci_pipeline"
+    e.ci_job_id = "job-123"
+    e.valid_from = NOW
+    e.valid_until = NOW + timedelta(days=90)
+    e.collected_at = NOW
+    e.created_at = NOW
+    # Anti-fake-evidence fields
+    e.confidence_level = EvidenceConfidenceEnum.E3
+    e.truth_status = EvidenceTruthStatusEnum.OBSERVED
+    e.generation_mode = None
+    e.may_be_used_as_evidence = True
+    e.reviewed_by = None
+    e.reviewed_at = None
+    # Phase 2 fields
+    e.approval_status = "none"
+    e.first_reviewer = None
+    e.first_reviewed_at = None
+    e.second_reviewer = None
+    e.second_reviewed_at = None
+    e.requires_four_eyes = False
+    if overrides:
+        for k, v in overrides.items():
+            setattr(e, k, v)
+    return e
+
+
+def make_control(overrides=None):
+    c = MagicMock()
+    c.id = CONTROL_UUID
+    c.control_id = "GOV-001"
+    c.title = "Access Control"
+    c.status = ControlStatusEnum.PLANNED
+    if overrides:
+        for k, v in overrides.items():
+            setattr(c, k, v)
+    return c
+
+
+# ===========================================================================
+# 1. TestEvidenceConfidenceClassification
+# ===========================================================================
+
+class TestEvidenceConfidenceClassification:
+    """Test automatic confidence level classification."""
+
+    def test_ci_pipeline_returns_e3(self):
+        assert _classify_confidence("ci_pipeline") == EvidenceConfidenceEnum.E3
+
+    def test_api_with_hash_returns_e3(self):
+        assert _classify_confidence("api", artifact_hash="sha256:abc") == EvidenceConfidenceEnum.E3
+
+    def test_api_without_hash_returns_e3(self):
+        assert _classify_confidence("api") == EvidenceConfidenceEnum.E3
+
+    def test_manual_returns_e1(self):
+        assert _classify_confidence("manual") == EvidenceConfidenceEnum.E1
+
+    def test_upload_returns_e1(self):
+        assert _classify_confidence("upload") == EvidenceConfidenceEnum.E1
+
+    def test_generated_returns_e0(self):
+        assert _classify_confidence("generated") == EvidenceConfidenceEnum.E0
+
+    def test_unknown_source_returns_e1(self):
+        assert _classify_confidence("some_random_source") == EvidenceConfidenceEnum.E1
+
+    def test_none_source_returns_e1(self):
+        assert _classify_confidence(None) == EvidenceConfidenceEnum.E1
+
+
+# ===========================================================================
+# 2. TestEvidenceTruthStatus
+# ===========================================================================
+
+class TestEvidenceTruthStatus:
+    """Test automatic truth status classification."""
+
+    def test_ci_pipeline_returns_observed(self):
+        assert _classify_truth_status("ci_pipeline") == EvidenceTruthStatusEnum.OBSERVED
+
+    def test_manual_returns_uploaded(self):
+        assert _classify_truth_status("manual") == EvidenceTruthStatusEnum.UPLOADED
+
+    def test_upload_returns_uploaded(self):
+        assert _classify_truth_status("upload") == EvidenceTruthStatusEnum.UPLOADED
+
+    def test_generated_returns_generated(self):
+        assert _classify_truth_status("generated") == EvidenceTruthStatusEnum.GENERATED
+
+    def test_api_returns_observed(self):
+        assert _classify_truth_status("api") == EvidenceTruthStatusEnum.OBSERVED
+
+    def test_none_returns_uploaded(self):
+        assert _classify_truth_status(None) == EvidenceTruthStatusEnum.UPLOADED
+
+
+# ===========================================================================
+# 3. TestControlStatusTransitions
+# ===========================================================================
+
+class TestControlStatusTransitions:
+    """Test the control status transition state machine."""
+
+    def test_planned_to_in_progress_allowed(self):
+        allowed, violations = validate_transition("planned", "in_progress")
+        assert allowed is True
+        assert violations == []
+
+    def test_in_progress_to_pass_without_evidence_blocked(self):
+        allowed, violations = validate_transition("in_progress", "pass", evidence_list=[])
+        assert allowed is False
+        assert len(violations) > 0
+        assert "pass" in violations[0].lower()
+
+    def test_in_progress_to_pass_with_e2_evidence_allowed(self):
+        e = make_evidence({
+            "confidence_level": EvidenceConfidenceEnum.E2,
+            "truth_status": EvidenceTruthStatusEnum.VALIDATED_INTERNAL,
+        })
+        allowed, violations = validate_transition("in_progress", "pass", evidence_list=[e])
+        assert allowed is True
+        assert violations == []
+
+    def test_in_progress_to_pass_with_e1_evidence_blocked(self):
+        e = make_evidence({
+            "confidence_level": EvidenceConfidenceEnum.E1,
+            "truth_status": EvidenceTruthStatusEnum.UPLOADED,
+        })
+        allowed, violations = validate_transition("in_progress", "pass", evidence_list=[e])
+        assert allowed is False
+        assert "E2" in violations[0]
+
+    def test_in_progress_to_partial_with_evidence_allowed(self):
+        e = make_evidence({"confidence_level": EvidenceConfidenceEnum.E0})
+        allowed, violations = validate_transition("in_progress", "partial", evidence_list=[e])
+        assert allowed is True
+
+    def test_in_progress_to_partial_without_evidence_blocked(self):
+        allowed, violations = validate_transition("in_progress", "partial", evidence_list=[])
+        assert allowed is False
+
+    def test_pass_to_fail_always_allowed(self):
+        allowed, violations = validate_transition("pass", "fail")
+        assert allowed is True
+
+    def test_any_to_na_requires_justification(self):
+        allowed, violations = validate_transition("in_progress", "n/a", status_justification=None)
+        assert allowed is False
+        assert "justification" in violations[0].lower()
+
+    def test_any_to_na_with_justification_allowed(self):
+        allowed, violations = validate_transition("in_progress", "n/a", status_justification="Not applicable for this project")
+        assert allowed is True
+
+    def test_any_to_planned_always_allowed(self):
+        allowed, violations = validate_transition("pass", "planned")
+        assert allowed is True
+
+    def test_same_status_noop_allowed(self):
+        allowed, violations = validate_transition("pass", "pass")
+        assert allowed is True
+
+    def test_bypass_for_auto_updater(self):
+        allowed, violations = validate_transition("in_progress", "pass", evidence_list=[], bypass_for_auto_updater=True)
+        assert allowed is True
+
+    def test_partial_to_pass_needs_e2(self):
+        e = make_evidence({
+            "confidence_level": EvidenceConfidenceEnum.E1,
+            "truth_status": EvidenceTruthStatusEnum.UPLOADED,
+        })
+        allowed, violations = validate_transition("partial", "pass", evidence_list=[e])
+        assert allowed is False
+
+    def test_partial_to_pass_with_e3_allowed(self):
+        e = make_evidence({
+            "confidence_level": EvidenceConfidenceEnum.E3,
+            "truth_status": EvidenceTruthStatusEnum.OBSERVED,
+        })
+        allowed, violations = validate_transition("partial", "pass", evidence_list=[e])
+        assert allowed is True
+
+    def test_in_progress_to_fail_allowed(self):
+        allowed, violations = validate_transition("in_progress", "fail")
+        assert allowed is True
+
+
+# ===========================================================================
+# 4. TestMultiDimensionalScore
+# ===========================================================================
+
+class TestMultiDimensionalScore:
+    """Test multi-dimensional score calculation."""
+
+    def test_score_structure(self):
+        """Score result should have all required keys."""
+        from compliance.db.repository import ControlRepository
+        repo = ControlRepository(mock_db)
+
+        with patch.object(repo, 'get_all', return_value=[]):
+            result = repo.get_multi_dimensional_score()
+
+        assert "requirement_coverage" in result
+        assert "evidence_strength" in result
+        assert "validation_quality" in result
+        assert "evidence_freshness" in result
+        assert "control_effectiveness" in result
+        assert "overall_readiness" in result
+        assert "hard_blocks" in result
+
+    def test_empty_controls_returns_zeros(self):
+        from compliance.db.repository import ControlRepository
+        repo = ControlRepository(mock_db)
+
+        with patch.object(repo, 'get_all', return_value=[]):
+            result = repo.get_multi_dimensional_score()
+
+        assert result["overall_readiness"] == 0.0
+        assert "Keine Controls" in result["hard_blocks"][0]
+
+    def test_hard_blocks_pass_without_evidence(self):
+        """Controls on 'pass' without evidence should trigger hard block."""
+        from compliance.db.repository import ControlRepository
+        repo = ControlRepository(mock_db)
+
+        ctrl = make_control({"status": ControlStatusEnum.PASS})
+        mock_db.query.return_value.all.return_value = []  # no evidence
+        mock_db.query.return_value.scalar.return_value = 0
+
+        with patch.object(repo, 'get_all', return_value=[ctrl]):
+            result = repo.get_multi_dimensional_score()
+
+        assert any("Evidence" in b or "evidence" in b.lower() for b in result["hard_blocks"])
+
+    def test_all_dimensions_are_floats(self):
+        from compliance.db.repository import ControlRepository
+        repo = ControlRepository(mock_db)
+
+        with patch.object(repo, 'get_all', return_value=[]):
+            result = repo.get_multi_dimensional_score()
+
+        for key in ["requirement_coverage", "evidence_strength", "validation_quality",
+                     "evidence_freshness", "control_effectiveness", "overall_readiness"]:
+            assert isinstance(result[key], float), f"{key} should be float"
+
+    def test_hard_blocks_is_list(self):
+        from compliance.db.repository import ControlRepository
+        repo = ControlRepository(mock_db)
+
+        with patch.object(repo, 'get_all', return_value=[]):
+            result = repo.get_multi_dimensional_score()
+
+        assert isinstance(result["hard_blocks"], list)
+
+    def test_backwards_compatibility_with_old_score(self):
+        """get_statistics should still work and return compliance_score."""
+        from compliance.db.repository import ControlRepository
+        repo = ControlRepository(mock_db)
+
+        mock_db.query.return_value.scalar.return_value = 0
+        mock_db.query.return_value.group_by.return_value.all.return_value = []
+
+        result = repo.get_statistics()
+        assert "compliance_score" in result
+        assert "total" in result
+
+
+# ===========================================================================
+# 5. TestForbiddenFormulations
+# ===========================================================================
+
+class TestForbiddenFormulations:
+    """Test forbidden formulation detection (tested via the validate endpoint context)."""
+
+    def test_import_works(self):
+        """Verify forbidden pattern check function is importable and callable."""
+        # This tests the Python-side schema, the actual check is in TypeScript
+        from compliance.api.schemas import MultiDimensionalScore, StatusTransitionError
+        score = MultiDimensionalScore()
+        assert score.overall_readiness == 0.0
+        err = StatusTransitionError(current_status="planned", requested_status="pass")
+        assert err.allowed is False
+
+    def test_status_transition_error_schema(self):
+        from compliance.api.schemas import StatusTransitionError
+        err = StatusTransitionError(
+            allowed=False,
+            current_status="in_progress",
+            requested_status="pass",
+            violations=["Need E2 evidence"],
+        )
+        assert err.violations == ["Need E2 evidence"]
+
+    def test_multi_dimensional_score_defaults(self):
+        from compliance.api.schemas import MultiDimensionalScore
+        score = MultiDimensionalScore()
+        assert score.requirement_coverage == 0.0
+        assert score.hard_blocks == []
+
+    def test_multi_dimensional_score_with_data(self):
+        from compliance.api.schemas import MultiDimensionalScore
+        score = MultiDimensionalScore(
+            requirement_coverage=80.0,
+            evidence_strength=60.0,
+            validation_quality=40.0,
+            evidence_freshness=90.0,
+            control_effectiveness=70.0,
+            overall_readiness=65.0,
+            hard_blocks=["3 Controls ohne Evidence"],
+        )
+        assert score.overall_readiness == 65.0
+        assert len(score.hard_blocks) == 1
+
+    def test_evidence_response_has_anti_fake_fields(self):
+        from compliance.api.schemas import EvidenceResponse
+        fields = EvidenceResponse.model_fields
+        assert "confidence_level" in fields
+        assert "truth_status" in fields
+        assert "generation_mode" in fields
+        assert "may_be_used_as_evidence" in fields
+        assert "reviewed_by" in fields
+        assert "reviewed_at" in fields
+
+
+# ===========================================================================
+# 6. TestLLMGenerationAudit
+# ===========================================================================
+
+class TestLLMGenerationAudit:
+    """Test LLM generation audit trail."""
+
+    def test_create_audit_record(self):
+        """POST /compliance/llm-audit should create a record."""
+        mock_record = MagicMock()
+        mock_record.id = "audit-001"
+        mock_record.tenant_id = None
+        mock_record.entity_type = "document"
+        mock_record.entity_id = None
+        mock_record.generation_mode = "draft_assistance"
+        mock_record.truth_status = EvidenceTruthStatusEnum.GENERATED
+        mock_record.may_be_used_as_evidence = False
+        mock_record.llm_model = "qwen2.5vl:32b"
+        mock_record.llm_provider = "ollama"
+        mock_record.prompt_hash = None
+        mock_record.input_summary = "Test input"
+        mock_record.output_summary = "Test output"
+        mock_record.extra_metadata = {}
+        mock_record.created_at = NOW
+
+        mock_db.add = MagicMock()
+        mock_db.commit = MagicMock()
+        mock_db.refresh = MagicMock(side_effect=lambda r: setattr(r, 'id', 'audit-001'))
+
+        # We need to patch the LLMGenerationAuditDB constructor
+        with patch('compliance.api.llm_audit_routes.LLMGenerationAuditDB', return_value=mock_record):
+            resp = client.post("/compliance/llm-audit", json={
+                "entity_type": "document",
+                "generation_mode": "draft_assistance",
+                "truth_status": "generated",
+                "may_be_used_as_evidence": False,
+                "llm_model": "qwen2.5vl:32b",
+                "llm_provider": "ollama",
+            })
+
+        assert resp.status_code == 200
+        data = resp.json()
+        assert data["entity_type"] == "document"
+        assert data["truth_status"] == "generated"
+        assert data["may_be_used_as_evidence"] is False
+
+    def test_truth_status_always_generated_for_llm(self):
+        """LLM-generated content should always start with truth_status=generated."""
+        from compliance.db.models import LLMGenerationAuditDB, EvidenceTruthStatusEnum
+        audit = LLMGenerationAuditDB()
+        # Default should be GENERATED
+        assert audit.truth_status is None or audit.truth_status == EvidenceTruthStatusEnum.GENERATED
+
+    def test_may_be_used_as_evidence_defaults_false(self):
+        """Generated content should NOT be usable as evidence by default."""
+        from compliance.db.models import LLMGenerationAuditDB
+        audit = LLMGenerationAuditDB()
+        assert audit.may_be_used_as_evidence is False or audit.may_be_used_as_evidence is None
+
+    def test_list_audit_records(self):
+        """GET /compliance/llm-audit should return records."""
+        mock_query = MagicMock()
+        mock_query.count.return_value = 0
+        mock_query.filter.return_value = mock_query
+        mock_query.order_by.return_value = mock_query
+        mock_query.offset.return_value = mock_query
+        mock_query.limit.return_value = mock_query
+        mock_query.all.return_value = []
+        mock_db.query.return_value = mock_query
+
+        resp = client.get("/compliance/llm-audit")
+        assert resp.status_code == 200
+        data = resp.json()
+        assert "records" in data
+        assert "total" in data
+        assert data["total"] == 0
+
+
+# ===========================================================================
+# 7. TestEvidenceReview
+# ===========================================================================
+
+class TestEvidenceReview:
+    """Test evidence review endpoint."""
+
+    def test_review_upgrades_confidence(self):
+        """PATCH /evidence/{id}/review should update confidence and set reviewer."""
+        evidence = make_evidence({
+            "confidence_level": EvidenceConfidenceEnum.E1,
+            "truth_status": EvidenceTruthStatusEnum.UPLOADED,
+        })
+        mock_db.query.return_value.filter.return_value.first.return_value = evidence
+        mock_db.commit = MagicMock()
+        mock_db.refresh = MagicMock()
+
+        resp = client.patch(f"/evidence/{EVIDENCE_UUID}/review", json={
+            "confidence_level": "E2",
+            "truth_status": "validated_internal",
+            "reviewed_by": "auditor@example.com",
+        })
+
+        assert resp.status_code == 200
+        # Verify the evidence was updated
+        assert evidence.confidence_level == EvidenceConfidenceEnum.E2
+        assert evidence.truth_status == EvidenceTruthStatusEnum.VALIDATED_INTERNAL
+        assert evidence.reviewed_by == "auditor@example.com"
+        assert evidence.reviewed_at is not None
+
+    def test_review_nonexistent_evidence_returns_404(self):
+        mock_db.query.return_value.filter.return_value.first.return_value = None
+        resp = client.patch("/evidence/nonexistent-id/review", json={
+            "reviewed_by": "someone",
+        })
+        assert resp.status_code == 404
+
+    def test_review_invalid_confidence_returns_400(self):
+        evidence = make_evidence()
+        mock_db.query.return_value.filter.return_value.first.return_value = evidence
+
+        resp = client.patch(f"/evidence/{EVIDENCE_UUID}/review", json={
+            "confidence_level": "INVALID",
+            "reviewed_by": "someone",
+        })
+        assert resp.status_code == 400
+
+
+# ===========================================================================
+# 8. TestControlUpdateIntegration
+# ===========================================================================
+
+class TestControlUpdateIntegration:
+    """Test that ControlUpdate schema includes status_justification."""
+
+    def test_control_update_has_status_justification(self):
+        from compliance.api.schemas import ControlUpdate
+        fields = ControlUpdate.model_fields
+        assert "status_justification" in fields
+
+    def test_control_response_has_status_justification(self):
+        from compliance.api.schemas import ControlResponse
+        fields = ControlResponse.model_fields
+        assert "status_justification" in fields
+
+    def test_control_status_enum_has_in_progress(self):
+        assert ControlStatusEnum.IN_PROGRESS.value == "in_progress"
+
+
+# ===========================================================================
+# 9. TestEvidenceEnums
+# ===========================================================================
+
+class TestEvidenceEnums:
+    """Test the new evidence enums."""
+
+    def test_confidence_enum_values(self):
+        assert EvidenceConfidenceEnum.E0.value == "E0"
+        assert EvidenceConfidenceEnum.E1.value == "E1"
+        assert EvidenceConfidenceEnum.E2.value == "E2"
+        assert EvidenceConfidenceEnum.E3.value == "E3"
+        assert EvidenceConfidenceEnum.E4.value == "E4"
+
+    def test_truth_status_enum_values(self):
+        assert EvidenceTruthStatusEnum.GENERATED.value == "generated"
+        assert EvidenceTruthStatusEnum.UPLOADED.value == "uploaded"
+        assert EvidenceTruthStatusEnum.OBSERVED.value == "observed"
+        assert EvidenceTruthStatusEnum.VALIDATED_INTERNAL.value == "validated_internal"
+        assert EvidenceTruthStatusEnum.REJECTED.value == "rejected"
+        assert EvidenceTruthStatusEnum.PROVIDED_TO_AUDITOR.value == "provided_to_auditor"
+        assert EvidenceTruthStatusEnum.ACCEPTED_BY_AUDITOR.value == "accepted_by_auditor"
@@ -0,0 +1,528 @@
+"""Tests for Anti-Fake-Evidence Phase 2.
+
+~35 tests covering:
+- Audit trail extension (evidence review/create logging)
+- Assertion engine (extraction, CRUD, verify, summary)
+- Four-Eyes review (domain check, first/second review, same-person reject)
+- UI badge data (response schema includes new fields)
+"""
+
+from datetime import datetime, timedelta
+from unittest.mock import MagicMock, patch
+from fastapi import FastAPI
+from fastapi.testclient import TestClient
+
+from compliance.api.evidence_routes import (
+    router as evidence_router,
+    _requires_four_eyes,
+    _classify_confidence,
+    _classify_truth_status,
+)
+from compliance.api.assertion_routes import router as assertion_router
+from compliance.services.assertion_engine import extract_assertions, _classify_sentence
+from compliance.db.models import (
+    EvidenceConfidenceEnum,
+    EvidenceTruthStatusEnum,
+    ControlStatusEnum,
+    AssertionDB,
+)
+from classroom_engine.database import get_db
+
+# ---------------------------------------------------------------------------
+# App setup with mocked DB dependency
+# ---------------------------------------------------------------------------
+
+app = FastAPI()
+app.include_router(evidence_router)
+app.include_router(assertion_router)
+
+mock_db = MagicMock()
+
+
+def override_get_db():
+    yield mock_db
+
+
+app.dependency_overrides[get_db] = override_get_db
+client = TestClient(app)
+
+EVIDENCE_UUID = "eeee0002-aaaa-bbbb-cccc-ffffffffffff"
+CONTROL_UUID = "cccc0002-aaaa-bbbb-cccc-dddddddddddd"
+ASSERTION_UUID = "aaaa0002-bbbb-cccc-dddd-eeeeeeeeeeee"
+NOW = datetime(2026, 3, 23, 14, 0, 0)
+
+
+# ---------------------------------------------------------------------------
+# Helpers
+# ---------------------------------------------------------------------------
+
+def make_evidence(overrides=None):
+    e = MagicMock()
+    e.id = EVIDENCE_UUID
+    e.control_id = CONTROL_UUID
+    e.evidence_type = "test_results"
+    e.title = "Phase 2 Test Evidence"
+    e.description = "Testing four-eyes"
+    e.artifact_url = "https://ci.example.com/artifact"
+    e.artifact_path = None
+    e.artifact_hash = "abc123"
+    e.file_size_bytes = None
+    e.mime_type = None
+    e.status = MagicMock()
+    e.status.value = "valid"
+    e.uploaded_by = None
+    e.source = "api"
+    e.ci_job_id = None
+    e.valid_from = NOW
+    e.valid_until = NOW + timedelta(days=90)
+    e.collected_at = NOW
+    e.created_at = NOW
+    e.confidence_level = EvidenceConfidenceEnum.E1
+    e.truth_status = EvidenceTruthStatusEnum.UPLOADED
+    e.generation_mode = None
+    e.may_be_used_as_evidence = True
+    e.reviewed_by = None
+    e.reviewed_at = None
+    # Phase 2 fields
+    e.approval_status = "none"
+    e.first_reviewer = None
+    e.first_reviewed_at = None
+    e.second_reviewer = None
+    e.second_reviewed_at = None
+    e.requires_four_eyes = False
+    if overrides:
+        for k, v in overrides.items():
+            setattr(e, k, v)
+    return e
+
+
+def make_assertion(overrides=None):
+    a = MagicMock()
+    a.id = ASSERTION_UUID
+    a.tenant_id = "tenant-001"
+    a.entity_type = "control"
+    a.entity_id = CONTROL_UUID
+    a.sentence_text = "Test assertion sentence"
+    a.sentence_index = 0
+    a.assertion_type = "assertion"
+    a.evidence_ids = []
+    a.confidence = 0.0
+    a.normative_tier = "pflicht"
+    a.verified_by = None
+    a.verified_at = None
+    a.created_at = NOW
+    a.updated_at = NOW
+    if overrides:
+        for k, v in overrides.items():
+            setattr(a, k, v)
+    return a
+
+
+# ===========================================================================
+# 1. TestAuditTrailExtension
+# ===========================================================================
+
+class TestAuditTrailExtension:
+    """Test that evidence review and create log audit trail entries."""
+
+    def test_review_evidence_logs_audit_trail(self):
+        evidence = make_evidence()
+        mock_db.reset_mock()
+        mock_db.query.return_value.filter.return_value.first.return_value = evidence
+        mock_db.refresh.return_value = None
+
+        resp = client.patch(
+            f"/evidence/{EVIDENCE_UUID}/review",
+            json={"confidence_level": "E2", "reviewed_by": "auditor@test.com"},
+        )
+        assert resp.status_code == 200
+        # db.add should be called for audit trail entries
+        assert mock_db.add.called
+
+    def test_review_evidence_records_old_and_new_confidence(self):
+        evidence = make_evidence({"confidence_level": EvidenceConfidenceEnum.E1})
+        mock_db.reset_mock()
+        mock_db.query.return_value.filter.return_value.first.return_value = evidence
+        mock_db.refresh.return_value = None
+
+        resp = client.patch(
+            f"/evidence/{EVIDENCE_UUID}/review",
+            json={"confidence_level": "E3", "reviewed_by": "reviewer@test.com"},
+        )
+        assert resp.status_code == 200
+
+    def test_review_evidence_records_truth_status_change(self):
+        evidence = make_evidence({"truth_status": EvidenceTruthStatusEnum.UPLOADED})
+        mock_db.reset_mock()
+        mock_db.query.return_value.filter.return_value.first.return_value = evidence
+        mock_db.refresh.return_value = None
+
+        resp = client.patch(
+            f"/evidence/{EVIDENCE_UUID}/review",
+            json={"truth_status": "validated_internal", "reviewed_by": "reviewer@test.com"},
+        )
+        assert resp.status_code == 200
+
+    def test_review_nonexistent_evidence_returns_404(self):
+        mock_db.reset_mock()
+        mock_db.query.return_value.filter.return_value.first.return_value = None
+
+        resp = client.patch(
+            "/evidence/nonexistent/review",
+            json={"reviewed_by": "someone"},
+        )
+        assert resp.status_code == 404
+
+    def test_reject_evidence_logs_audit_trail(self):
+        evidence = make_evidence()
+        mock_db.reset_mock()
+        mock_db.query.return_value.filter.return_value.first.return_value = evidence
+        mock_db.refresh.return_value = None
+
+        resp = client.patch(
+            f"/evidence/{EVIDENCE_UUID}/reject",
+            json={"reviewed_by": "auditor@test.com", "rejection_reason": "Fake evidence"},
+        )
+        assert resp.status_code == 200
+        data = resp.json()
+        assert data["approval_status"] == "rejected"
+
+    def test_reject_nonexistent_evidence_returns_404(self):
+        mock_db.reset_mock()
+        mock_db.query.return_value.filter.return_value.first.return_value = None
+
+        resp = client.patch(
+            "/evidence/nonexistent/reject",
+            json={"reviewed_by": "someone"},
+        )
+        assert resp.status_code == 404
+
+    def test_audit_trail_query_endpoint(self):
+        mock_db.reset_mock()
+        trail_entry = MagicMock()
+        trail_entry.id = "trail-001"
+        trail_entry.entity_type = "evidence"
+        trail_entry.entity_id = EVIDENCE_UUID
+        trail_entry.entity_name = "Test"
+        trail_entry.action = "review"
+        trail_entry.field_changed = "confidence_level"
+        trail_entry.old_value = "E1"
+        trail_entry.new_value = "E2"
+        trail_entry.change_summary = None
+        trail_entry.performed_by = "auditor"
+        trail_entry.performed_at = NOW
+        trail_entry.checksum = "abc"
+        mock_db.query.return_value.filter.return_value.filter.return_value.order_by.return_value.limit.return_value.all.return_value = [trail_entry]
+
+        resp = client.get(f"/audit-trail?entity_type=evidence&entity_id={EVIDENCE_UUID}")
+        assert resp.status_code == 200
+        data = resp.json()
+        assert data["total"] >= 1
+
+    def test_audit_trail_checksum_present(self):
+        """Audit trail entries should have a checksum for integrity."""
+        from compliance.api.audit_trail_utils import create_signature
+        sig = create_signature("evidence|123|review|user@test.com")
+        assert len(sig) == 64  # SHA-256 hex digest
+
+
+# ===========================================================================
+# 2. TestAssertionEngine
+# ===========================================================================
+
+class TestAssertionEngine:
+    """Test assertion extraction and classification."""
+
+    def test_pflicht_sentence_classified_as_assertion(self):
+        result = _classify_sentence("Die Organisation muss ein ISMS implementieren.")
+        assert result == ("assertion", "pflicht")
+
+    def test_empfehlung_sentence_classified(self):
+        result = _classify_sentence("Die Organisation sollte regelmäßige Audits durchführen.")
+        assert result == ("assertion", "empfehlung")
+
+    def test_kann_sentence_classified(self):
+        result = _classify_sentence("Optional kann ein externes Audit durchgeführt werden.")
+        assert result == ("assertion", "kann")
+
+    def test_rationale_sentence_classified(self):
+        result = _classify_sentence("Dies ist erforderlich, weil Datenverlust schwere Folgen hat.")
+        assert result == ("rationale", None)
+
+    def test_fact_sentence_with_evidence_keyword(self):
+        result = _classify_sentence("Das Zertifikat wurde am 15.03.2026 ausgestellt.")
+        assert result == ("fact", None)
+
+    def test_extract_assertions_splits_sentences(self):
+        text = "Die Organisation muss Daten schützen. Sie sollte regelmäßig prüfen."
+        results = extract_assertions(text, "control", "ctrl-001")
+        assert len(results) == 2
+        assert results[0]["assertion_type"] == "assertion"
+        assert results[0]["normative_tier"] == "pflicht"
+        assert results[1]["normative_tier"] == "empfehlung"
+
+    def test_extract_assertions_empty_text(self):
+        results = extract_assertions("", "control", "ctrl-001")
+        assert results == []
+
+    def test_extract_assertions_single_sentence(self):
+        results = extract_assertions("Der Betreiber muss ein Audit durchführen.", "control", "ctrl-001")
+        assert len(results) == 1
+        assert results[0]["normative_tier"] == "pflicht"
+
+    def test_mixed_text_with_rationale(self):
+        text = "Die Organisation muss ein ISMS implementieren. Dies ist notwendig, weil Compliance gefordert ist."
+        results = extract_assertions(text, "control", "ctrl-001")
+        assert len(results) == 2
+        types = [r["assertion_type"] for r in results]
+        assert "assertion" in types
+        assert "rationale" in types
+
+    def test_assertion_crud_create(self):
+        mock_db.reset_mock()
+        mock_db.refresh.return_value = None
+        # Mock the added object to return proper values
+        def side_effect_add(obj):
+            obj.id = ASSERTION_UUID
+            obj.created_at = NOW
+            obj.updated_at = NOW
+            obj.sentence_index = 0
+            obj.confidence = 0.0
+        mock_db.add.side_effect = side_effect_add
+
+        resp = client.post(
+            "/assertions?tenant_id=tenant-001",
+            json={
+                "entity_type": "control",
+                "entity_id": CONTROL_UUID,
+                "sentence_text": "Die Organisation muss ein ISMS implementieren.",
+                "assertion_type": "assertion",
+                "normative_tier": "pflicht",
+            },
+        )
+        assert resp.status_code == 200
+
+    def test_assertion_verify_endpoint(self):
+        a = make_assertion()
+        mock_db.reset_mock()
+        mock_db.query.return_value.filter.return_value.first.return_value = a
+        mock_db.refresh.return_value = None
+
+        resp = client.post(f"/assertions/{ASSERTION_UUID}/verify?verified_by=auditor@test.com")
+        assert resp.status_code == 200
+        assert a.assertion_type == "fact"
+        assert a.verified_by == "auditor@test.com"
+
+    def test_assertion_summary(self):
+        mock_db.reset_mock()
+        a1 = make_assertion({"assertion_type": "assertion", "verified_by": None})
+        a2 = make_assertion({"assertion_type": "fact", "verified_by": "user"})
+        a3 = make_assertion({"assertion_type": "rationale", "verified_by": None})
+        mock_db.query.return_value.filter.return_value.filter.return_value.filter.return_value.all.return_value = [a1, a2, a3]
+        # Direct .all() for no-filter case
+        mock_db.query.return_value.all.return_value = [a1, a2, a3]
+
+        resp = client.get("/assertions/summary")
+        assert resp.status_code == 200
+        data = resp.json()
+        assert data["total_assertions"] == 3
+        assert data["total_facts"] == 1
+        assert data["total_rationale"] == 1
+        assert data["unverified_count"] == 1
+
+
+# ===========================================================================
+# 3. TestFourEyesReview
+# ===========================================================================
+
+class TestFourEyesReview:
+    """Test Four-Eyes review process."""
+
+    def test_gov_domain_requires_four_eyes(self):
+        assert _requires_four_eyes("gov") is True
+
+    def test_priv_domain_requires_four_eyes(self):
+        assert _requires_four_eyes("priv") is True
+
+    def test_ops_domain_does_not_require_four_eyes(self):
+        assert _requires_four_eyes("ops") is False
+
+    def test_sdlc_domain_does_not_require_four_eyes(self):
+        assert _requires_four_eyes("sdlc") is False
+
+    def test_first_review_sets_first_approved(self):
+        evidence = make_evidence({
+            "requires_four_eyes": True,
+            "approval_status": "pending_first",
+        })
+        mock_db.reset_mock()
+        mock_db.query.return_value.filter.return_value.first.return_value = evidence
+        mock_db.refresh.return_value = None
+
+        resp = client.patch(
+            f"/evidence/{EVIDENCE_UUID}/review",
+            json={"reviewed_by": "reviewer1@test.com"},
+        )
+        assert resp.status_code == 200
+        assert evidence.first_reviewer == "reviewer1@test.com"
+        assert evidence.approval_status == "first_approved"
+
+    def test_second_review_different_person_approves(self):
+        evidence = make_evidence({
+            "requires_four_eyes": True,
+            "approval_status": "first_approved",
+            "first_reviewer": "reviewer1@test.com",
+        })
+        mock_db.reset_mock()
+        mock_db.query.return_value.filter.return_value.first.return_value = evidence
+        mock_db.refresh.return_value = None
+
+        resp = client.patch(
+            f"/evidence/{EVIDENCE_UUID}/review",
+            json={"reviewed_by": "reviewer2@test.com"},
+        )
+        assert resp.status_code == 200
+        assert evidence.second_reviewer == "reviewer2@test.com"
+        assert evidence.approval_status == "approved"
+
+    def test_same_person_second_review_rejected(self):
+        evidence = make_evidence({
+            "requires_four_eyes": True,
+            "approval_status": "first_approved",
+            "first_reviewer": "reviewer1@test.com",
+        })
+        mock_db.reset_mock()
+        mock_db.query.return_value.filter.return_value.first.return_value = evidence
+
+        resp = client.patch(
+            f"/evidence/{EVIDENCE_UUID}/review",
+            json={"reviewed_by": "reviewer1@test.com"},
+        )
+        assert resp.status_code == 400
+        assert "different" in resp.json()["detail"].lower()
+
+    def test_already_approved_blocked(self):
+        evidence = make_evidence({
+            "requires_four_eyes": True,
+            "approval_status": "approved",
+        })
+        mock_db.reset_mock()
+        mock_db.query.return_value.filter.return_value.first.return_value = evidence
+
+        resp = client.patch(
+            f"/evidence/{EVIDENCE_UUID}/review",
+            json={"reviewed_by": "reviewer3@test.com"},
+        )
+        assert resp.status_code == 400
+        assert "already" in resp.json()["detail"].lower()
+
+    def test_rejected_evidence_cannot_be_reviewed(self):
+        evidence = make_evidence({
+            "requires_four_eyes": True,
+            "approval_status": "rejected",
+        })
+        mock_db.reset_mock()
+        mock_db.query.return_value.filter.return_value.first.return_value = evidence
+
+        resp = client.patch(
+            f"/evidence/{EVIDENCE_UUID}/review",
+            json={"reviewed_by": "reviewer@test.com"},
+        )
+        assert resp.status_code == 400
+
+    def test_reject_endpoint(self):
+        evidence = make_evidence({"requires_four_eyes": True})
+        mock_db.reset_mock()
+        mock_db.query.return_value.filter.return_value.first.return_value = evidence
+        mock_db.refresh.return_value = None
+
+        resp = client.patch(
+            f"/evidence/{EVIDENCE_UUID}/reject",
+            json={"reviewed_by": "auditor@test.com", "rejection_reason": "Not authentic"},
+        )
+        assert resp.status_code == 200
+        assert evidence.approval_status == "rejected"
+
+
+# ===========================================================================
+# 4. TestUIBadgeData
+# ===========================================================================
+
+class TestUIBadgeData:
+    """Test that evidence response includes all Phase 2 fields."""
+
+    def test_evidence_response_includes_approval_status(self):
+        evidence = make_evidence({
+            "approval_status": "first_approved",
+            "first_reviewer": "reviewer1@test.com",
+            "first_reviewed_at": NOW,
+            "requires_four_eyes": True,
+        })
+        mock_db.reset_mock()
+        mock_db.query.return_value.filter.return_value.first.return_value = evidence
+        mock_db.refresh.return_value = None
+
+        resp = client.patch(
+            f"/evidence/{EVIDENCE_UUID}/review",
+            json={"reviewed_by": "reviewer2@test.com"},
+        )
+        assert resp.status_code == 200
+        data = resp.json()
+        assert "approval_status" in data
+        assert "requires_four_eyes" in data
+        assert data["requires_four_eyes"] is True
+
+    def test_evidence_response_includes_four_eyes_fields(self):
+        evidence = make_evidence({
+            "requires_four_eyes": True,
+            "approval_status": "approved",
+            "first_reviewer": "r1@test.com",
+            "first_reviewed_at": NOW,
+            "second_reviewer": "r2@test.com",
+            "second_reviewed_at": NOW,
+        })
+        mock_db.reset_mock()
+        mock_db.query.return_value.filter.return_value.first.return_value = evidence
+
+        # Use list endpoint
+        mock_db.query.return_value.filter.return_value.all.return_value = [evidence]
+        mock_db.query.return_value.all.return_value = [evidence]
+
+        # Direct test via _build_evidence_response
+        from compliance.api.evidence_routes import _build_evidence_response
+        resp = _build_evidence_response(evidence)
+        assert resp.approval_status == "approved"
+        assert resp.first_reviewer == "r1@test.com"
+        assert resp.second_reviewer == "r2@test.com"
+        assert resp.requires_four_eyes is True
+
+    def test_assertion_response_schema(self):
+        a = make_assertion()
+        mock_db.reset_mock()
+        mock_db.query.return_value.filter.return_value.first.return_value = a
+
+        resp = client.get(f"/assertions/{ASSERTION_UUID}")
+        assert resp.status_code == 200
+        data = resp.json()
+        assert "assertion_type" in data
+        assert "normative_tier" in data
+        assert "evidence_ids" in data
+        assert "verified_by" in data
+
+    def test_evidence_response_includes_confidence_and_truth(self):
+        evidence = make_evidence({
+            "confidence_level": EvidenceConfidenceEnum.E3,
+            "truth_status": EvidenceTruthStatusEnum.OBSERVED,
+        })
+        from compliance.api.evidence_routes import _build_evidence_response
+        resp = _build_evidence_response(evidence)
+        assert resp.confidence_level == "E3"
+        assert resp.truth_status == "observed"
+
+    def test_evidence_response_none_four_eyes_fields_default(self):
+        evidence = make_evidence()
+        from compliance.api.evidence_routes import _build_evidence_response
+        resp = _build_evidence_response(evidence)
+        assert resp.approval_status == "none"
+        assert resp.requires_four_eyes is False
+        assert resp.first_reviewer is None
@@ -0,0 +1,191 @@
+"""Tests for Anti-Fake-Evidence Phase 3: Enforcement.
+
+~8 tests covering:
+- Evidence distribution endpoint (confidence counts, four-eyes pending)
+- Dashboard multi-score presence
+"""
+
+from datetime import datetime, timedelta
+from unittest.mock import MagicMock, patch, PropertyMock
+from fastapi import FastAPI
+from fastapi.testclient import TestClient
+
+from compliance.api.dashboard_routes import router as dashboard_router
+from compliance.db.models import EvidenceConfidenceEnum, EvidenceTruthStatusEnum
+from classroom_engine.database import get_db
+
+# ---------------------------------------------------------------------------
+# App setup with mocked DB dependency
+# ---------------------------------------------------------------------------
+
+app = FastAPI()
+app.include_router(dashboard_router)
+
+mock_db = MagicMock()
+
+
+def override_get_db():
+    yield mock_db
+
+
+app.dependency_overrides[get_db] = override_get_db
+client = TestClient(app)
+
+NOW = datetime(2026, 3, 23, 14, 0, 0)
+
+
+# ---------------------------------------------------------------------------
+# Helpers
+# ---------------------------------------------------------------------------
+
+def make_evidence(confidence="E1", requires_four_eyes=False, approval_status="none"):
+    e = MagicMock()
+    e.confidence_level = MagicMock()
+    e.confidence_level.value = confidence
+    e.requires_four_eyes = requires_four_eyes
+    e.approval_status = approval_status
+    return e
+
+
+# ===========================================================================
+# 1. TestEvidenceDistributionEndpoint
+# ===========================================================================
+
+class TestEvidenceDistributionEndpoint:
+    """Test GET /dashboard/evidence-distribution endpoint."""
+
+    def _setup_evidence(self, evidence_list):
+        """Configure mock DB to return evidence list via EvidenceRepository."""
+        mock_db.reset_mock()
+        # EvidenceRepository(db).get_all() internally does db.query(...).all()
+        # We patch the EvidenceRepository class to return our list
+        return evidence_list
+
+    @patch("compliance.api.dashboard_routes.EvidenceRepository")
+    def test_empty_db_returns_zero_counts(self, mock_repo_cls):
+        mock_repo = MagicMock()
+        mock_repo.get_all.return_value = []
+        mock_repo_cls.return_value = mock_repo
+
+        resp = client.get("/dashboard/evidence-distribution")
+        assert resp.status_code == 200
+        data = resp.json()
+        assert data["total"] == 0
+        assert data["four_eyes_pending"] == 0
+        assert data["by_confidence"] == {"E0": 0, "E1": 0, "E2": 0, "E3": 0, "E4": 0}
+
+    @patch("compliance.api.dashboard_routes.EvidenceRepository")
+    def test_counts_by_confidence_level(self, mock_repo_cls):
+        evidence = [
+            make_evidence("E0"),
+            make_evidence("E1"),
+            make_evidence("E1"),
+            make_evidence("E2"),
+            make_evidence("E3"),
+            make_evidence("E3"),
+            make_evidence("E3"),
+            make_evidence("E4"),
+        ]
+        mock_repo = MagicMock()
+        mock_repo.get_all.return_value = evidence
+        mock_repo_cls.return_value = mock_repo
+
+        resp = client.get("/dashboard/evidence-distribution")
+        assert resp.status_code == 200
+        data = resp.json()
+        assert data["total"] == 8
+        assert data["by_confidence"]["E0"] == 1
+        assert data["by_confidence"]["E1"] == 2
+        assert data["by_confidence"]["E2"] == 1
+        assert data["by_confidence"]["E3"] == 3
+        assert data["by_confidence"]["E4"] == 1
+
+    @patch("compliance.api.dashboard_routes.EvidenceRepository")
+    def test_four_eyes_pending_count(self, mock_repo_cls):
+        evidence = [
+            make_evidence("E1", requires_four_eyes=True, approval_status="pending_first"),
+            make_evidence("E2", requires_four_eyes=True, approval_status="first_approved"),
+            make_evidence("E2", requires_four_eyes=True, approval_status="approved"),
+            make_evidence("E1", requires_four_eyes=True, approval_status="rejected"),
+            make_evidence("E1", requires_four_eyes=False, approval_status="none"),
+        ]
+        mock_repo = MagicMock()
+        mock_repo.get_all.return_value = evidence
+        mock_repo_cls.return_value = mock_repo
+
+        resp = client.get("/dashboard/evidence-distribution")
+        assert resp.status_code == 200
+        data = resp.json()
+        # pending_first and first_approved are pending; approved and rejected are not
+        assert data["four_eyes_pending"] == 2
+        assert data["total"] == 5
+
+    @patch("compliance.api.dashboard_routes.EvidenceRepository")
+    def test_null_confidence_defaults_to_e1(self, mock_repo_cls):
+        e = MagicMock()
+        e.confidence_level = None
+        e.requires_four_eyes = False
+        e.approval_status = "none"
+
+        mock_repo = MagicMock()
+        mock_repo.get_all.return_value = [e]
+        mock_repo_cls.return_value = mock_repo
+
+        resp = client.get("/dashboard/evidence-distribution")
+        assert resp.status_code == 200
+        data = resp.json()
+        assert data["by_confidence"]["E1"] == 1
+        assert data["total"] == 1
+
+    @patch("compliance.api.dashboard_routes.EvidenceRepository")
+    def test_all_four_eyes_approved_zero_pending(self, mock_repo_cls):
+        evidence = [
+            make_evidence("E2", requires_four_eyes=True, approval_status="approved"),
+            make_evidence("E3", requires_four_eyes=True, approval_status="approved"),
+        ]
+        mock_repo = MagicMock()
+        mock_repo.get_all.return_value = evidence
+        mock_repo_cls.return_value = mock_repo
+
+        resp = client.get("/dashboard/evidence-distribution")
+        assert resp.status_code == 200
+        data = resp.json()
+        assert data["four_eyes_pending"] == 0
+
+
+# ===========================================================================
+# 2. TestDashboardMultiScore
+# ===========================================================================
+
+class TestDashboardMultiScore:
+    """Test that dashboard response includes multi_score."""
+
+    def test_dashboard_response_schema_includes_multi_score(self):
+        """DashboardResponse schema must include the multi_score field."""
+        from compliance.api.schemas import DashboardResponse
+        fields = DashboardResponse.model_fields
+        assert "multi_score" in fields, "DashboardResponse must have multi_score field"
+
+    def test_multi_score_schema_has_required_fields(self):
+        """MultiDimensionalScore schema should have all 7 fields."""
+        from compliance.api.schemas import MultiDimensionalScore
+        fields = MultiDimensionalScore.model_fields
+        required = [
+            "requirement_coverage",
+            "evidence_strength",
+            "validation_quality",
+            "evidence_freshness",
+            "control_effectiveness",
+            "overall_readiness",
+            "hard_blocks",
+        ]
+        for field in required:
+            assert field in fields, f"Missing field: {field}"
+
+    def test_multi_score_default_values(self):
+        """MultiDimensionalScore defaults should be sensible."""
+        from compliance.api.schemas import MultiDimensionalScore
+        score = MultiDimensionalScore()
+        assert score.overall_readiness == 0.0
+        assert score.hard_blocks == []
+        assert score.requirement_coverage == 0.0
@@ -0,0 +1,277 @@
+"""Tests for Anti-Fake-Evidence Phase 4a: Traceability Matrix.
+
+6 tests covering:
+- Empty DB returns empty controls + zero summary
+- Nested structure: Control → Evidence → Assertions
+- Assertions appear under correct evidence
+- Coverage flags computed correctly
+- Control without evidence has correct coverage
+- Summary counts match
+"""
+
+from datetime import datetime
+from unittest.mock import MagicMock, patch
+from fastapi import FastAPI
+from fastapi.testclient import TestClient
+
+from compliance.api.dashboard_routes import router as dashboard_router
+from classroom_engine.database import get_db
+
+# ---------------------------------------------------------------------------
+# App setup with mocked DB dependency
+# ---------------------------------------------------------------------------
+
+app = FastAPI()
+app.include_router(dashboard_router)
+
+mock_db = MagicMock()
+
+
+def override_get_db():
+    yield mock_db
+
+
+app.dependency_overrides[get_db] = override_get_db
+client = TestClient(app)
+
+
+# ---------------------------------------------------------------------------
+# Helpers
+# ---------------------------------------------------------------------------
+
+def make_control(id="c1", control_id="CTRL-001", title="Test Control", status="pass", domain="gov"):
+    ctrl = MagicMock()
+    ctrl.id = id
+    ctrl.control_id = control_id
+    ctrl.title = title
+    ctrl.status = MagicMock()
+    ctrl.status.value = status
+    ctrl.domain = MagicMock()
+    ctrl.domain.value = domain
+    return ctrl
+
+
+def make_evidence(id="e1", control_id="c1", title="Evidence 1", evidence_type="scan_report",
+                  confidence="E2", status="valid"):
+    e = MagicMock()
+    e.id = id
+    e.control_id = control_id
+    e.title = title
+    e.evidence_type = evidence_type
+    e.confidence_level = MagicMock()
+    e.confidence_level.value = confidence
+    e.status = MagicMock()
+    e.status.value = status
+    return e
+
+
+def make_assertion(id="a1", entity_id="e1", sentence_text="System encrypts data at rest.",
+                   assertion_type="assertion", confidence=0.85, verified_by=None):
+    a = MagicMock()
+    a.id = id
+    a.entity_id = entity_id
+    a.sentence_text = sentence_text
+    a.assertion_type = assertion_type
+    a.confidence = confidence
+    a.verified_by = verified_by
+    return a
+
+
+# ===========================================================================
+# Tests
+# ===========================================================================
+
+class TestTraceabilityMatrix:
+    """Test GET /dashboard/traceability-matrix endpoint."""
+
+    @patch("compliance.api.dashboard_routes.EvidenceRepository")
+    @patch("compliance.api.dashboard_routes.ControlRepository")
+    def test_empty_db_returns_empty_matrix(self, mock_ctrl_cls, mock_ev_cls):
+        """Empty DB should return zero controls and zero summary counts."""
+        mock_ctrl = MagicMock()
+        mock_ctrl.get_all.return_value = []
+        mock_ctrl_cls.return_value = mock_ctrl
+
+        mock_ev = MagicMock()
+        mock_ev.get_all.return_value = []
+        mock_ev_cls.return_value = mock_ev
+
+        # Mock db.query(AssertionDB).filter(...).all()
+        mock_db.reset_mock()
+        mock_query = MagicMock()
+        mock_query.filter.return_value.all.return_value = []
+        mock_db.query.return_value = mock_query
+
+        resp = client.get("/dashboard/traceability-matrix")
+        assert resp.status_code == 200
+        data = resp.json()
+        assert data["controls"] == []
+        assert data["summary"]["total_controls"] == 0
+        assert data["summary"]["covered_controls"] == 0
+        assert data["summary"]["fully_verified"] == 0
+        assert data["summary"]["uncovered_controls"] == 0
+
+    @patch("compliance.api.dashboard_routes.EvidenceRepository")
+    @patch("compliance.api.dashboard_routes.ControlRepository")
+    def test_nested_structure(self, mock_ctrl_cls, mock_ev_cls):
+        """Control with evidence and assertions should return nested structure."""
+        ctrl = make_control(id="c1", control_id="PRIV-001", title="Privacy Control")
+        ev = make_evidence(id="e1", control_id="c1", confidence="E3")
+        assertion = make_assertion(id="a1", entity_id="e1", verified_by="auditor@example.com")
+
+        mock_ctrl = MagicMock()
+        mock_ctrl.get_all.return_value = [ctrl]
+        mock_ctrl_cls.return_value = mock_ctrl
+
+        mock_ev = MagicMock()
+        mock_ev.get_all.return_value = [ev]
+        mock_ev_cls.return_value = mock_ev
+
+        mock_db.reset_mock()
+        mock_query = MagicMock()
+        mock_query.filter.return_value.all.return_value = [assertion]
+        mock_db.query.return_value = mock_query
+
+        resp = client.get("/dashboard/traceability-matrix")
+        assert resp.status_code == 200
+        data = resp.json()
+
+        assert len(data["controls"]) == 1
+        c = data["controls"][0]
+        assert c["control_id"] == "PRIV-001"
+        assert len(c["evidence"]) == 1
+        assert c["evidence"][0]["confidence_level"] == "E3"
+        assert len(c["evidence"][0]["assertions"]) == 1
+        assert c["evidence"][0]["assertions"][0]["verified"] is True
+
+    @patch("compliance.api.dashboard_routes.EvidenceRepository")
+    @patch("compliance.api.dashboard_routes.ControlRepository")
+    def test_assertions_grouped_under_correct_evidence(self, mock_ctrl_cls, mock_ev_cls):
+        """Assertions should only appear under the evidence they reference."""
+        ctrl = make_control(id="c1")
+        ev1 = make_evidence(id="e1", control_id="c1", title="Evidence A")
+        ev2 = make_evidence(id="e2", control_id="c1", title="Evidence B")
+        a1 = make_assertion(id="a1", entity_id="e1", sentence_text="Assertion for E1")
+        a2 = make_assertion(id="a2", entity_id="e2", sentence_text="Assertion for E2")
+        a3 = make_assertion(id="a3", entity_id="e2", sentence_text="Second assertion for E2")
+
+        mock_ctrl = MagicMock()
+        mock_ctrl.get_all.return_value = [ctrl]
+        mock_ctrl_cls.return_value = mock_ctrl
+
+        mock_ev = MagicMock()
+        mock_ev.get_all.return_value = [ev1, ev2]
+        mock_ev_cls.return_value = mock_ev
+
+        mock_db.reset_mock()
+        mock_query = MagicMock()
+        mock_query.filter.return_value.all.return_value = [a1, a2, a3]
+        mock_db.query.return_value = mock_query
+
+        resp = client.get("/dashboard/traceability-matrix")
+        assert resp.status_code == 200
+        data = resp.json()
+
+        c = data["controls"][0]
+        ev1_data = next(e for e in c["evidence"] if e["id"] == "e1")
+        ev2_data = next(e for e in c["evidence"] if e["id"] == "e2")
+        assert len(ev1_data["assertions"]) == 1
+        assert len(ev2_data["assertions"]) == 2
+
+    @patch("compliance.api.dashboard_routes.EvidenceRepository")
+    @patch("compliance.api.dashboard_routes.ControlRepository")
+    def test_coverage_flags_correct(self, mock_ctrl_cls, mock_ev_cls):
+        """Coverage flags should reflect evidence, assertions, and verification state."""
+        ctrl = make_control(id="c1")
+        ev = make_evidence(id="e1", control_id="c1", confidence="E2")
+        # One verified, one not
+        a1 = make_assertion(id="a1", entity_id="e1", verified_by="alice")
+        a2 = make_assertion(id="a2", entity_id="e1", verified_by=None)
+
+        mock_ctrl = MagicMock()
+        mock_ctrl.get_all.return_value = [ctrl]
+        mock_ctrl_cls.return_value = mock_ctrl
+
+        mock_ev = MagicMock()
+        mock_ev.get_all.return_value = [ev]
+        mock_ev_cls.return_value = mock_ev
+
+        mock_db.reset_mock()
+        mock_query = MagicMock()
+        mock_query.filter.return_value.all.return_value = [a1, a2]
+        mock_db.query.return_value = mock_query
+
+        resp = client.get("/dashboard/traceability-matrix")
+        assert resp.status_code == 200
+
+        cov = resp.json()["controls"][0]["coverage"]
+        assert cov["has_evidence"] is True
+        assert cov["has_assertions"] is True
+        assert cov["all_assertions_verified"] is False  # a2 not verified
+        assert cov["min_confidence_level"] == "E2"
+
+    @patch("compliance.api.dashboard_routes.EvidenceRepository")
+    @patch("compliance.api.dashboard_routes.ControlRepository")
+    def test_coverage_without_evidence(self, mock_ctrl_cls, mock_ev_cls):
+        """Control with no evidence should have all coverage flags False/None."""
+        ctrl = make_control(id="c1")
+
+        mock_ctrl = MagicMock()
+        mock_ctrl.get_all.return_value = [ctrl]
+        mock_ctrl_cls.return_value = mock_ctrl
+
+        mock_ev = MagicMock()
+        mock_ev.get_all.return_value = []
+        mock_ev_cls.return_value = mock_ev
+
+        mock_db.reset_mock()
+        mock_query = MagicMock()
+        mock_query.filter.return_value.all.return_value = []
+        mock_db.query.return_value = mock_query
+
+        resp = client.get("/dashboard/traceability-matrix")
+        assert resp.status_code == 200
+
+        cov = resp.json()["controls"][0]["coverage"]
+        assert cov["has_evidence"] is False
+        assert cov["has_assertions"] is False
+        assert cov["all_assertions_verified"] is False
+        assert cov["min_confidence_level"] is None
+
+    @patch("compliance.api.dashboard_routes.EvidenceRepository")
+    @patch("compliance.api.dashboard_routes.ControlRepository")
+    def test_summary_counts(self, mock_ctrl_cls, mock_ev_cls):
+        """Summary should count total, covered, fully verified, and uncovered controls."""
+        # c1: has evidence + verified assertions → fully verified
+        # c2: has evidence but no assertions → covered, not fully verified
+        # c3: no evidence → uncovered
+        c1 = make_control(id="c1", control_id="C-001")
+        c2 = make_control(id="c2", control_id="C-002")
+        c3 = make_control(id="c3", control_id="C-003")
+
+        ev1 = make_evidence(id="e1", control_id="c1", confidence="E3")
+        ev2 = make_evidence(id="e2", control_id="c2", confidence="E1")
+
+        a1 = make_assertion(id="a1", entity_id="e1", verified_by="auditor")
+
+        mock_ctrl = MagicMock()
+        mock_ctrl.get_all.return_value = [c1, c2, c3]
+        mock_ctrl_cls.return_value = mock_ctrl
+
+        mock_ev = MagicMock()
+        mock_ev.get_all.return_value = [ev1, ev2]
+        mock_ev_cls.return_value = mock_ev
+
+        mock_db.reset_mock()
+        mock_query = MagicMock()
+        mock_query.filter.return_value.all.return_value = [a1]
+        mock_db.query.return_value = mock_query
+
+        resp = client.get("/dashboard/traceability-matrix")
+        assert resp.status_code == 200
+
+        summary = resp.json()["summary"]
+        assert summary["total_controls"] == 3
+        assert summary["covered_controls"] == 2
+        assert summary["fully_verified"] == 1
+        assert summary["uncovered_controls"] == 1
@@ -0,0 +1,440 @@
+"""Tests for Batch Dedup Runner (batch_dedup_runner.py).
+
+Covers:
+- quality_score(): Richness ranking
+- BatchDedupRunner._sub_group_by_merge_hint(): Composite key grouping
+- Master selection (highest quality score wins)
+- Duplicate linking (mark + parent-link transfer)
+- Dry run mode (no DB changes)
+- Cross-group pass
+- Progress reporting / stats
+"""
+
+import json
+import pytest
+from unittest.mock import MagicMock, AsyncMock, patch, call
+
+from compliance.services.batch_dedup_runner import (
+    quality_score,
+    BatchDedupRunner,
+    DEDUP_COLLECTION,
+)
+
+
+# ---------------------------------------------------------------------------
+# quality_score TESTS
+# ---------------------------------------------------------------------------
+
+
+class TestQualityScore:
+    """Quality scoring: richer controls should score higher."""
+
+    def test_empty_control(self):
+        score = quality_score({})
+        assert score == 0.0
+
+    def test_requirements_weight(self):
+        score = quality_score({"requirements": json.dumps(["r1", "r2", "r3"])})
+        assert score == pytest.approx(6.0)  # 3 * 2.0
+
+    def test_test_procedure_weight(self):
+        score = quality_score({"test_procedure": json.dumps(["t1", "t2"])})
+        assert score == pytest.approx(3.0)  # 2 * 1.5
+
+    def test_evidence_weight(self):
+        score = quality_score({"evidence": json.dumps(["e1"])})
+        assert score == pytest.approx(1.0)  # 1 * 1.0
+
+    def test_objective_weight_capped(self):
+        short = quality_score({"objective": "x" * 100})
+        long = quality_score({"objective": "x" * 1000})
+        assert short == pytest.approx(0.5)  # 100/200
+        assert long == pytest.approx(3.0)   # capped at 3.0
+
+    def test_combined_score(self):
+        control = {
+            "requirements": json.dumps(["r1", "r2"]),
+            "test_procedure": json.dumps(["t1"]),
+            "evidence": json.dumps(["e1", "e2"]),
+            "objective": "x" * 400,
+        }
+        # 2*2 + 1*1.5 + 2*1.0 + min(400/200, 3) = 4 + 1.5 + 2 + 2 = 9.5
+        assert quality_score(control) == pytest.approx(9.5)
+
+    def test_json_string_vs_list(self):
+        """Both JSON strings and already-parsed lists should work."""
+        a = quality_score({"requirements": json.dumps(["r1", "r2"])})
+        b = quality_score({"requirements": '["r1", "r2"]'})
+        assert a == b
+
+    def test_null_fields(self):
+        """None values should not crash."""
+        score = quality_score({
+            "requirements": None,
+            "test_procedure": None,
+            "evidence": None,
+            "objective": None,
+        })
+        assert score == 0.0
+
+    def test_ranking_order(self):
+        """Rich control should rank above sparse control."""
+        rich = {
+            "requirements": json.dumps(["r1", "r2", "r3"]),
+            "test_procedure": json.dumps(["t1", "t2"]),
+            "evidence": json.dumps(["e1"]),
+            "objective": "A comprehensive objective for this control.",
+        }
+        sparse = {
+            "requirements": json.dumps(["r1"]),
+            "objective": "Short",
+        }
+        assert quality_score(rich) > quality_score(sparse)
+
+
+# ---------------------------------------------------------------------------
+# Sub-grouping TESTS
+# ---------------------------------------------------------------------------
+
+
+class TestSubGrouping:
+    def _make_runner(self):
+        db = MagicMock()
+        return BatchDedupRunner(db=db)
+
+    def test_groups_by_merge_hint(self):
+        runner = self._make_runner()
+        controls = [
+            {"uuid": "a", "merge_group_hint": "implement:mfa:none"},
+            {"uuid": "b", "merge_group_hint": "implement:mfa:none"},
+            {"uuid": "c", "merge_group_hint": "test:firewall:periodic"},
+        ]
+        groups = runner._sub_group_by_merge_hint(controls)
+        assert len(groups) == 2
+        assert len(groups["implement:mfa:none"]) == 2
+        assert len(groups["test:firewall:periodic"]) == 1
+
+    def test_empty_hint_gets_own_group(self):
+        runner = self._make_runner()
+        controls = [
+            {"uuid": "x", "merge_group_hint": ""},
+            {"uuid": "y", "merge_group_hint": ""},
+        ]
+        groups = runner._sub_group_by_merge_hint(controls)
+        # Each empty-hint control gets its own group
+        assert len(groups) == 2
+
+    def test_single_control_single_group(self):
+        runner = self._make_runner()
+        controls = [
+            {"uuid": "a", "merge_group_hint": "implement:mfa:none"},
+        ]
+        groups = runner._sub_group_by_merge_hint(controls)
+        assert len(groups) == 1
+
+
+# ---------------------------------------------------------------------------
+# Master Selection TESTS
+# ---------------------------------------------------------------------------
+
+
+class TestMasterSelection:
+    """Best quality score should become master."""
+
+    @pytest.mark.asyncio
+    async def test_highest_score_is_master(self):
+        """In a group, the control with highest quality_score is master."""
+        db = MagicMock()
+        db.execute = MagicMock()
+        db.commit = MagicMock()
+        # Mock parent link transfer query
+        db.execute.return_value.fetchall.return_value = []
+
+        runner = BatchDedupRunner(db=db)
+
+        sparse = _make_control("s1", reqs=1, hint="implement:mfa:none",
+                               title="MFA implementiert")
+        rich = _make_control("r1", reqs=5, tests=3, evidence=2,
+                             hint="implement:mfa:none", title="MFA implementiert")
+        medium = _make_control("m1", reqs=2, tests=1,
+                               hint="implement:mfa:none", title="MFA implementiert")
+
+        controls = [sparse, medium, rich]
+
+        # All have same title → all should be title-identical linked
+        with patch("compliance.services.batch_dedup_runner.get_embedding",
+                    new_callable=AsyncMock, return_value=[0.1] * 1024), \
+             patch("compliance.services.batch_dedup_runner.qdrant_upsert",
+                    new_callable=AsyncMock, return_value=True):
+            await runner._process_hint_group("implement:mfa:none", controls, dry_run=True)
+
+        # Rich should be master (1 master), others linked (2 linked)
+        assert runner.stats["masters"] == 1
+        assert runner.stats["linked"] == 2
+        assert runner.stats["skipped_title_identical"] == 2
+
+
+# ---------------------------------------------------------------------------
+# Dry Run TESTS
+# ---------------------------------------------------------------------------
+
+
+class TestDryRun:
+    """Dry run should compute stats but NOT modify DB."""
+
+    @pytest.mark.asyncio
+    async def test_dry_run_no_db_writes(self):
+        db = MagicMock()
+        db.execute = MagicMock()
+        db.commit = MagicMock()
+
+        runner = BatchDedupRunner(db=db)
+
+        controls = [
+            _make_control("a", reqs=3, hint="implement:mfa:none", title="MFA impl"),
+            _make_control("b", reqs=1, hint="implement:mfa:none", title="MFA impl"),
+        ]
+
+        with patch("compliance.services.batch_dedup_runner.get_embedding",
+                    new_callable=AsyncMock, return_value=[0.1] * 1024), \
+             patch("compliance.services.batch_dedup_runner.qdrant_upsert",
+                    new_callable=AsyncMock, return_value=True):
+            await runner._process_hint_group("implement:mfa:none", controls, dry_run=True)
+
+        assert runner.stats["masters"] == 1
+        assert runner.stats["linked"] == 1
+        # No commit for dedup operations in dry_run
+        db.commit.assert_not_called()
+
+
+# ---------------------------------------------------------------------------
+# Parent Link Transfer TESTS
+# ---------------------------------------------------------------------------
+
+
+class TestParentLinkTransfer:
+    """Parent links should migrate from duplicate to master."""
+
+    def test_transfer_parent_links(self):
+        db = MagicMock()
+        # Mock: duplicate has 2 parent links
+        db.execute.return_value.fetchall.return_value = [
+            ("parent-1", "decomposition", 1.0, "DSGVO", "Art. 32", "obl-1"),
+            ("parent-2", "decomposition", 0.9, "NIS2", "Art. 21", "obl-2"),
+        ]
+
+        runner = BatchDedupRunner(db=db)
+        count = runner._transfer_parent_links("master-uuid", "dup-uuid")
+
+        assert count == 2
+        # Two INSERT calls for the transferred links
+        assert db.execute.call_count == 3  # 1 SELECT + 2 INSERTs
+
+    def test_transfer_skips_self_reference(self):
+        db = MagicMock()
+        # Parent link points to master itself → should be skipped
+        db.execute.return_value.fetchall.return_value = [
+            ("master-uuid", "decomposition", 1.0, "DSGVO", "Art. 32", "obl-1"),
+        ]
+
+        runner = BatchDedupRunner(db=db)
+        count = runner._transfer_parent_links("master-uuid", "dup-uuid")
+
+        assert count == 0
+
+
+# ---------------------------------------------------------------------------
+# Title-identical Short-circuit TESTS
+# ---------------------------------------------------------------------------
+
+
+class TestTitleIdenticalShortCircuit:
+
+    @pytest.mark.asyncio
+    async def test_identical_titles_skip_embedding(self):
+        """Controls with identical titles in same hint group → direct link."""
+        db = MagicMock()
+        db.execute = MagicMock()
+        db.commit = MagicMock()
+        db.execute.return_value.fetchall.return_value = []
+
+        runner = BatchDedupRunner(db=db)
+
+        controls = [
+            _make_control("m", reqs=3, hint="implement:mfa:none",
+                          title="MFA implementieren"),
+            _make_control("c", reqs=1, hint="implement:mfa:none",
+                          title="MFA implementieren"),
+        ]
+
+        with patch("compliance.services.batch_dedup_runner.get_embedding",
+                    new_callable=AsyncMock) as mock_embed, \
+             patch("compliance.services.batch_dedup_runner.qdrant_upsert",
+                    new_callable=AsyncMock, return_value=True):
+            await runner._process_hint_group("implement:mfa:none", controls, dry_run=False)
+
+        # Embedding should only be called for the master (indexing), not for linking
+        assert runner.stats["linked"] == 1
+        assert runner.stats["skipped_title_identical"] == 1
+
+    @pytest.mark.asyncio
+    async def test_different_titles_use_embedding(self):
+        """Controls with different titles should use embedding check."""
+        db = MagicMock()
+        db.execute = MagicMock()
+        db.commit = MagicMock()
+        db.execute.return_value.fetchall.return_value = []
+
+        runner = BatchDedupRunner(db=db)
+
+        controls = [
+            _make_control("m", reqs=3, hint="implement:mfa:none",
+                          title="MFA implementieren fuer Admins"),
+            _make_control("c", reqs=1, hint="implement:mfa:none",
+                          title="MFA einrichten fuer alle Benutzer"),
+        ]
+
+        with patch("compliance.services.batch_dedup_runner.get_embedding",
+                    new_callable=AsyncMock, return_value=[0.1] * 1024) as mock_embed, \
+             patch("compliance.services.batch_dedup_runner.qdrant_upsert",
+                    new_callable=AsyncMock, return_value=True), \
+             patch("compliance.services.batch_dedup_runner.qdrant_search_cross_regulation",
+                    new_callable=AsyncMock, return_value=[]):
+            await runner._process_hint_group("implement:mfa:none", controls, dry_run=False)
+
+        # Different titles → embedding was called for both (master + candidate)
+        assert mock_embed.call_count >= 2
+        # No Qdrant results → linked anyway (same hint = same action+object)
+        assert runner.stats["linked"] == 1
+
+
+# ---------------------------------------------------------------------------
+# Cross-Group Pass TESTS
+# ---------------------------------------------------------------------------
+
+
+class TestCrossGroupPass:
+
+    @pytest.mark.asyncio
+    async def test_cross_group_creates_link(self):
+        db = MagicMock()
+        db.commit = MagicMock()
+
+        # First call returns masters, subsequent calls return empty (for transfer)
+        master_rows = [
+            ("uuid-1", "CTRL-001", "MFA implementieren",
+             "implement:multi_factor_auth:none"),
+        ]
+        call_count = {"n": 0}
+
+        def mock_execute(stmt, params=None):
+            result = MagicMock()
+            call_count["n"] += 1
+            if call_count["n"] == 1:
+                result.fetchall.return_value = master_rows
+            else:
+                result.fetchall.return_value = []
+            return result
+
+        db.execute = mock_execute
+
+        runner = BatchDedupRunner(db=db)
+
+        cross_result = [{
+            "score": 0.95,
+            "payload": {
+                "control_uuid": "uuid-2",
+                "control_id": "CTRL-002",
+                "merge_group_hint": "implement:mfa:continuous",
+            },
+        }]
+
+        with patch("compliance.services.batch_dedup_runner.get_embedding",
+                    new_callable=AsyncMock, return_value=[0.1] * 1024), \
+             patch("compliance.services.batch_dedup_runner.qdrant_search_cross_regulation",
+                    new_callable=AsyncMock, return_value=cross_result):
+            await runner._run_cross_group_pass()
+
+        assert runner.stats["cross_group_linked"] == 1
+
+
+# ---------------------------------------------------------------------------
+# Progress Stats TESTS
+# ---------------------------------------------------------------------------
+
+
+class TestProgressStats:
+
+    def test_get_status(self):
+        db = MagicMock()
+        runner = BatchDedupRunner(db=db)
+        runner.stats["masters"] = 42
+        runner.stats["linked"] = 100
+        runner._progress_phase = "phase1"
+        runner._progress_count = 500
+        runner._progress_total = 85000
+
+        status = runner.get_status()
+        assert status["phase"] == "phase1"
+        assert status["progress"] == 500
+        assert status["total"] == 85000
+        assert status["masters"] == 42
+        assert status["linked"] == 100
+
+
+# ---------------------------------------------------------------------------
+# Route endpoint TESTS
+# ---------------------------------------------------------------------------
+
+
+class TestBatchDedupRoutes:
+    """Test the batch-dedup API endpoints."""
+
+    def test_status_endpoint_not_running(self):
+        from fastapi import FastAPI
+        from fastapi.testclient import TestClient
+        from compliance.api.crosswalk_routes import router
+
+        app = FastAPI()
+        app.include_router(router, prefix="/api/compliance")
+        client = TestClient(app)
+
+        with patch("compliance.api.crosswalk_routes.SessionLocal") as mock_session:
+            mock_db = MagicMock()
+            mock_session.return_value = mock_db
+            mock_db.execute.return_value.fetchone.return_value = (85000, 0, 85000)
+
+            resp = client.get("/api/compliance/v1/canonical/migrate/batch-dedup/status")
+            assert resp.status_code == 200
+            data = resp.json()
+            assert data["running"] is False
+
+
+# ---------------------------------------------------------------------------
+# HELPERS
+# ---------------------------------------------------------------------------
+
+
+def _make_control(
+    prefix: str,
+    reqs: int = 0,
+    tests: int = 0,
+    evidence: int = 0,
+    hint: str = "",
+    title: str = None,
+    pattern_id: str = None,
+) -> dict:
+    """Build a mock control dict for testing."""
+    return {
+        "uuid": f"{prefix}-uuid",
+        "control_id": f"CTRL-{prefix}",
+        "title": title or f"Control {prefix}",
+        "objective": f"Objective for {prefix}",
+        "pattern_id": pattern_id,
+        "requirements": json.dumps([f"r{i}" for i in range(reqs)]),
+        "test_procedure": json.dumps([f"t{i}" for i in range(tests)]),
+        "evidence": json.dumps([f"e{i}" for i in range(evidence)]),
+        "release_state": "draft",
+        "merge_group_hint": hint,
+        "action_object_class": "",
+    }
@@ -1,17 +1,36 @@
-"""Tests for Canonical Control Library routes (canonical_control_routes.py)."""
+"""Tests for Canonical Control Library routes (canonical_control_routes.py).
+
+Includes:
+- Model validation tests (FrameworkResponse, ControlResponse, etc.)
+- _control_row conversion tests
+- Server-side pagination, sorting, search, source filter tests
+- /controls-count and /controls-meta endpoint tests
+"""

 import pytest
 from unittest.mock import MagicMock, patch
 from datetime import datetime, timezone

+from fastapi import FastAPI
+from fastapi.testclient import TestClient
+
 from compliance.api.canonical_control_routes import (
    FrameworkResponse,
    ControlResponse,
    SimilarityCheckRequest,
    SimilarityCheckResponse,
    _control_row,
+    router,
 )

+# ---------------------------------------------------------------------------
+# TestClient setup for endpoint tests
+# ---------------------------------------------------------------------------
+
+_app = FastAPI()
+_app.include_router(router, prefix="/api/compliance")
+_client = TestClient(_app)
+

 class TestFrameworkResponse:
    """Tests for FrameworkResponse model."""
@@ -175,6 +194,12 @@ class TestControlRowConversion:
            ],
            "release_state": "draft",
            "tags": ["mfa"],
+            "generation_strategy": "ungrouped",
+            "parent_control_uuid": None,
+            "parent_control_id": None,
+            "parent_control_title": None,
+            "decomposition_method": None,
+            "pipeline_version": None,
            "created_at": now,
            "updated_at": now,
        }
@@ -223,3 +248,300 @@ class TestControlRowConversion:
        result = _control_row(row)
        assert result["created_at"] is None
        assert result["updated_at"] is None
+
+    def test_generation_strategy_default(self):
+        row = self._make_row()
+        result = _control_row(row)
+        assert result["generation_strategy"] == "ungrouped"
+
+    def test_generation_strategy_document_grouped(self):
+        row = self._make_row(generation_strategy="document_grouped")
+        result = _control_row(row)
+        assert result["generation_strategy"] == "document_grouped"
+
+
+# =============================================================================
+# ENDPOINT TESTS — Server-Side Pagination, Sort, Search, Source Filter
+# =============================================================================
+
+def _make_mock_row(**overrides):
+    """Build a mock Row with all canonical_controls columns."""
+    now = datetime.now(timezone.utc)
+    defaults = {
+        "id": "uuid-ctrl-1",
+        "framework_id": "uuid-fw-1",
+        "control_id": "AUTH-001",
+        "title": "Test Control",
+        "objective": "Test obj",
+        "rationale": "Test rat",
+        "scope": {},
+        "requirements": ["Req 1"],
+        "test_procedure": ["Test 1"],
+        "evidence": [],
+        "severity": "high",
+        "risk_score": 3.0,
+        "implementation_effort": "m",
+        "evidence_confidence": None,
+        "open_anchors": [],
+        "release_state": "draft",
+        "tags": [],
+        "license_rule": 1,
+        "source_original_text": None,
+        "source_citation": None,
+        "customer_visible": True,
+        "verification_method": "automated",
+        "category": "authentication",
+        "target_audience": "developer",
+        "generation_metadata": {},
+        "generation_strategy": "ungrouped",
+        "created_at": now,
+        "updated_at": now,
+    }
+    defaults.update(overrides)
+    mock = MagicMock()
+    for k, v in defaults.items():
+        setattr(mock, k, v)
+    return mock
+
+
+def _session_returning(rows=None, scalar=None):
+    """Create a mock SessionLocal that returns rows or scalar."""
+    db = MagicMock()
+    result = MagicMock()
+    if rows is not None:
+        result.fetchall.return_value = rows
+    if scalar is not None:
+        result.scalar.return_value = scalar
+    db.execute.return_value = result
+    db.__enter__ = MagicMock(return_value=db)
+    db.__exit__ = MagicMock(return_value=False)
+    return db
+
+
+class TestListControlsPagination:
+    """GET /controls with limit/offset."""
+
+    @patch("compliance.api.canonical_control_routes.SessionLocal")
+    def test_limit_param_in_sql(self, mock_cls):
+        mock_cls.return_value = _session_returning(rows=[_make_mock_row()])
+        resp = _client.get("/api/compliance/v1/canonical/controls?limit=10&offset=20")
+        assert resp.status_code == 200
+        sql = str(mock_cls.return_value.__enter__().execute.call_args[0][0].text)
+        assert "LIMIT" in sql
+        assert "OFFSET" in sql
+
+    @patch("compliance.api.canonical_control_routes.SessionLocal")
+    def test_no_limit_by_default(self, mock_cls):
+        mock_cls.return_value = _session_returning(rows=[])
+        resp = _client.get("/api/compliance/v1/canonical/controls")
+        assert resp.status_code == 200
+        sql = str(mock_cls.return_value.__enter__().execute.call_args[0][0].text)
+        assert "LIMIT" not in sql
+
+
+class TestListControlsSorting:
+    """GET /controls with sort/order."""
+
+    @patch("compliance.api.canonical_control_routes.SessionLocal")
+    def test_sort_created_at_desc(self, mock_cls):
+        mock_cls.return_value = _session_returning(rows=[])
+        resp = _client.get("/api/compliance/v1/canonical/controls?sort=created_at&order=desc")
+        assert resp.status_code == 200
+        sql = str(mock_cls.return_value.__enter__().execute.call_args[0][0].text)
+        assert "created_at DESC" in sql
+
+    @patch("compliance.api.canonical_control_routes.SessionLocal")
+    def test_default_sort_control_id_asc(self, mock_cls):
+        mock_cls.return_value = _session_returning(rows=[])
+        resp = _client.get("/api/compliance/v1/canonical/controls")
+        assert resp.status_code == 200
+        sql = str(mock_cls.return_value.__enter__().execute.call_args[0][0].text)
+        assert "control_id ASC" in sql
+
+    @patch("compliance.api.canonical_control_routes.SessionLocal")
+    def test_sql_injection_in_sort_blocked(self, mock_cls):
+        mock_cls.return_value = _session_returning(rows=[])
+        resp = _client.get("/api/compliance/v1/canonical/controls?sort=1;DROP+TABLE")
+        assert resp.status_code == 200
+        sql = str(mock_cls.return_value.__enter__().execute.call_args[0][0].text)
+        assert "DROP" not in sql
+        assert "control_id" in sql
+
+    @patch("compliance.api.canonical_control_routes.SessionLocal")
+    def test_sort_by_source(self, mock_cls):
+        mock_cls.return_value = _session_returning(rows=[])
+        resp = _client.get("/api/compliance/v1/canonical/controls?sort=source&order=asc")
+        assert resp.status_code == 200
+        sql = str(mock_cls.return_value.__enter__().execute.call_args[0][0].text)
+        assert "source_citation" in sql
+        assert "control_id ASC" in sql  # secondary sort within source group
+
+
+class TestListControlsSearch:
+    """GET /controls with search."""
+
+    @patch("compliance.api.canonical_control_routes.SessionLocal")
+    def test_search_uses_ilike(self, mock_cls):
+        mock_cls.return_value = _session_returning(rows=[])
+        resp = _client.get("/api/compliance/v1/canonical/controls?search=encryption")
+        assert resp.status_code == 200
+        sql = str(mock_cls.return_value.__enter__().execute.call_args[0][0].text)
+        assert "ILIKE" in sql
+        params = mock_cls.return_value.__enter__().execute.call_args[0][1]
+        assert params["q"] == "%encryption%"
+
+
+class TestListControlsSourceFilter:
+    """GET /controls with source filter."""
+
+    @patch("compliance.api.canonical_control_routes.SessionLocal")
+    def test_specific_source(self, mock_cls):
+        mock_cls.return_value = _session_returning(rows=[])
+        resp = _client.get("/api/compliance/v1/canonical/controls?source=DSGVO")
+        assert resp.status_code == 200
+        sql = str(mock_cls.return_value.__enter__().execute.call_args[0][0].text)
+        assert "source_citation" in sql
+        params = mock_cls.return_value.__enter__().execute.call_args[0][1]
+        assert params["src"] == "DSGVO"
+
+    @patch("compliance.api.canonical_control_routes.SessionLocal")
+    def test_no_source_filter(self, mock_cls):
+        mock_cls.return_value = _session_returning(rows=[])
+        resp = _client.get("/api/compliance/v1/canonical/controls?source=__none__")
+        assert resp.status_code == 200
+        sql = str(mock_cls.return_value.__enter__().execute.call_args[0][0].text)
+        assert "IS NULL" in sql
+
+
+class TestControlsCount:
+    """GET /controls-count."""
+
+    @patch("compliance.api.canonical_control_routes.SessionLocal")
+    def test_returns_total(self, mock_cls):
+        mock_cls.return_value = _session_returning(scalar=42)
+        resp = _client.get("/api/compliance/v1/canonical/controls-count")
+        assert resp.status_code == 200
+        assert resp.json() == {"total": 42}
+
+    @patch("compliance.api.canonical_control_routes.SessionLocal")
+    def test_with_filters(self, mock_cls):
+        mock_cls.return_value = _session_returning(scalar=5)
+        resp = _client.get("/api/compliance/v1/canonical/controls-count?severity=critical&search=mfa")
+        assert resp.status_code == 200
+        assert resp.json() == {"total": 5}
+        sql = str(mock_cls.return_value.__enter__().execute.call_args[0][0].text)
+        assert "severity" in sql
+        assert "ILIKE" in sql
+
+
+class TestControlsMeta:
+    """GET /controls-meta."""
+
+    @patch("compliance.api.canonical_control_routes.SessionLocal")
+    def test_returns_structure(self, mock_cls):
+        db = MagicMock()
+        db.__enter__ = MagicMock(return_value=db)
+        db.__exit__ = MagicMock(return_value=False)
+
+        # Faceted meta does many execute() calls — use a default mock
+        scalar_r = MagicMock()
+        scalar_r.scalar.return_value = 100
+        scalar_r.fetchall.return_value = []
+        db.execute.return_value = scalar_r
+        mock_cls.return_value = db
+
+        resp = _client.get("/api/compliance/v1/canonical/controls-meta")
+        assert resp.status_code == 200
+        data = resp.json()
+        assert data["total"] == 100
+        assert isinstance(data["domains"], list)
+        assert isinstance(data["sources"], list)
+        assert "type_counts" in data
+        assert "severity_counts" in data
+        assert "verification_method_counts" in data
+        assert "category_counts" in data
+        assert "evidence_type_counts" in data
+        assert "release_state_counts" in data
+
+
+class TestObligationDedup:
+    """Tests for obligation deduplication endpoints."""
+
+    @patch("compliance.api.canonical_control_routes.SessionLocal")
+    def test_dedup_dry_run(self, mock_cls):
+        db = MagicMock()
+        db.__enter__ = MagicMock(return_value=db)
+        db.__exit__ = MagicMock(return_value=False)
+        mock_cls.return_value = db
+
+        # Mock: 2 duplicate groups
+        dup_row1 = MagicMock(candidate_id="OC-AUTH-001-01", cnt=3)
+        dup_row2 = MagicMock(candidate_id="OC-AUTH-001-02", cnt=2)
+
+        # Entries for group 1
+        import uuid
+        uid1 = uuid.uuid4()
+        uid2 = uuid.uuid4()
+        uid3 = uuid.uuid4()
+        entry1 = MagicMock(id=uid1, candidate_id="OC-AUTH-001-01", obligation_text="Text A", release_state="composed", created_at=datetime(2026, 1, 1, tzinfo=timezone.utc))
+        entry2 = MagicMock(id=uid2, candidate_id="OC-AUTH-001-01", obligation_text="Text B", release_state="composed", created_at=datetime(2026, 1, 2, tzinfo=timezone.utc))
+        entry3 = MagicMock(id=uid3, candidate_id="OC-AUTH-001-01", obligation_text="Text C", release_state="composed", created_at=datetime(2026, 1, 3, tzinfo=timezone.utc))
+
+        # Entries for group 2
+        uid4 = uuid.uuid4()
+        uid5 = uuid.uuid4()
+        entry4 = MagicMock(id=uid4, candidate_id="OC-AUTH-001-02", obligation_text="Text D", release_state="composed", created_at=datetime(2026, 1, 1, tzinfo=timezone.utc))
+        entry5 = MagicMock(id=uid5, candidate_id="OC-AUTH-001-02", obligation_text="Text E", release_state="composed", created_at=datetime(2026, 1, 2, tzinfo=timezone.utc))
+
+        # Side effects: 1) dup groups, 2) total count, 3) entries grp1, 4) entries grp2
+        mock_result_groups = MagicMock()
+        mock_result_groups.fetchall.return_value = [dup_row1, dup_row2]
+        mock_result_total = MagicMock()
+        mock_result_total.scalar.return_value = 2
+        mock_result_entries1 = MagicMock()
+        mock_result_entries1.fetchall.return_value = [entry1, entry2, entry3]
+        mock_result_entries2 = MagicMock()
+        mock_result_entries2.fetchall.return_value = [entry4, entry5]
+
+        db.execute.side_effect = [mock_result_groups, mock_result_total, mock_result_entries1, mock_result_entries2]
+
+        resp = _client.post("/api/compliance/v1/canonical/obligations/dedup?dry_run=true")
+        assert resp.status_code == 200
+        data = resp.json()
+        assert data["dry_run"] is True
+        assert data["stats"]["total_duplicate_groups"] == 2
+        assert data["stats"]["kept"] == 2
+        assert data["stats"]["marked_duplicate"] == 3  # 2 from grp1 + 1 from grp2
+        # Dry run: no commit
+        db.commit.assert_not_called()
+
+    @patch("compliance.api.canonical_control_routes.SessionLocal")
+    def test_dedup_stats(self, mock_cls):
+        db = MagicMock()
+        db.__enter__ = MagicMock(return_value=db)
+        db.__exit__ = MagicMock(return_value=False)
+        mock_cls.return_value = db
+
+        # total, by_state, dup_groups, removable
+        mock_total = MagicMock()
+        mock_total.scalar.return_value = 76046
+        mock_states = MagicMock()
+        mock_states.fetchall.return_value = [
+            MagicMock(release_state="composed", cnt=41217),
+            MagicMock(release_state="duplicate", cnt=34829),
+        ]
+        mock_dup_groups = MagicMock()
+        mock_dup_groups.scalar.return_value = 0
+        mock_removable = MagicMock()
+        mock_removable.scalar.return_value = 0
+
+        db.execute.side_effect = [mock_total, mock_states, mock_dup_groups, mock_removable]
+
+        resp = _client.get("/api/compliance/v1/canonical/obligations/dedup-stats")
+        assert resp.status_code == 200
+        data = resp.json()
+        assert data["total_obligations"] == 76046
+        assert data["by_state"]["composed"] == 41217
+        assert data["by_state"]["duplicate"] == 34829
+        assert data["pending_duplicate_groups"] == 0
+        assert data["pending_removable_duplicates"] == 0
--- a/Show More
+++ b/Show More