merge: sync with origin/main, take upstream on conflicts

# Conflicts: # admin-compliance/lib/sdk/types.ts # admin-compliance/lib/sdk/vendor-compliance/types.ts
2026-04-16 16:26:48 +02:00
parent e04816cfe5 712fa8cb74
commit c43d9da6d0
352 changed files with 181673 additions and 2188 deletions
--- a/backend-compliance/compliance/api/init.py
+++ b/backend-compliance/compliance/api/init.py
@@ -6,6 +6,8 @@ from .routes import router

 logger = logging.getLogger(__name__)

+_failed_routers: dict[str, str] = {}
+

 def _safe_import_router(module_name: str, attr: str = "router"):
    """Import a router module safely — log error but don't crash the whole app."""
@@ -14,6 +16,7 @@ def _safe_import_router(module_name: str, attr: str = "router"):
        return getattr(mod, attr)
    except Exception as e:
        logger.error("Failed to import %s: %s", module_name, e)
+        _failed_routers[module_name] = str(e)
        return None


@@ -53,6 +56,13 @@ _ROUTER_MODULES = [
    "wiki_routes",
    "canonical_control_routes",
    "control_generator_routes",
+    "crosswalk_routes",
+    "process_task_routes",
+    "evidence_check_routes",
+    "vvt_library_routes",
+    "tom_mapping_routes",
+    "llm_audit_routes",
+    "assertion_routes",
 ]

 _loaded_count = 0
--- a/backend-compliance/compliance/api/assertion_routes.py
+++ b/backend-compliance/compliance/api/assertion_routes.py
@@ -0,0 +1,227 @@
+"""
+API routes for Assertion Engine (Anti-Fake-Evidence Phase 2).
+
+Endpoints:
+- /assertions: CRUD for assertions
+- /assertions/extract: Automatic extraction from entity text
+- /assertions/summary: Stats (total assertions, facts, unverified)
+"""
+
+import logging
+from datetime import datetime
+from typing import Optional
+
+from fastapi import APIRouter, Depends, HTTPException, Query
+from sqlalchemy.orm import Session
+
+from classroom_engine.database import get_db
+
+from ..db.models import AssertionDB
+from ..services.assertion_engine import extract_assertions
+from .schemas import (
+    AssertionCreate,
+    AssertionUpdate,
+    AssertionResponse,
+    AssertionListResponse,
+    AssertionSummaryResponse,
+    AssertionExtractRequest,
+)
+from .audit_trail_utils import log_audit_trail, generate_id
+
+logger = logging.getLogger(__name__)
+router = APIRouter(tags=["compliance-assertions"])
+
+
+def _build_assertion_response(a: AssertionDB) -> AssertionResponse:
+    return AssertionResponse(
+        id=a.id,
+        tenant_id=a.tenant_id,
+        entity_type=a.entity_type,
+        entity_id=a.entity_id,
+        sentence_text=a.sentence_text,
+        sentence_index=a.sentence_index,
+        assertion_type=a.assertion_type,
+        evidence_ids=a.evidence_ids or [],
+        confidence=a.confidence or 0.0,
+        normative_tier=a.normative_tier,
+        verified_by=a.verified_by,
+        verified_at=a.verified_at,
+        created_at=a.created_at,
+        updated_at=a.updated_at,
+    )
+
+
+@router.post("/assertions", response_model=AssertionResponse)
+async def create_assertion(
+    data: AssertionCreate,
+    tenant_id: Optional[str] = Query(None),
+    db: Session = Depends(get_db),
+):
+    """Create a single assertion manually."""
+    a = AssertionDB(
+        id=generate_id(),
+        tenant_id=tenant_id,
+        entity_type=data.entity_type,
+        entity_id=data.entity_id,
+        sentence_text=data.sentence_text,
+        assertion_type=data.assertion_type or "assertion",
+        evidence_ids=data.evidence_ids or [],
+        normative_tier=data.normative_tier,
+    )
+    db.add(a)
+    db.commit()
+    db.refresh(a)
+    return _build_assertion_response(a)
+
+
+@router.get("/assertions", response_model=AssertionListResponse)
+async def list_assertions(
+    entity_type: Optional[str] = Query(None),
+    entity_id: Optional[str] = Query(None),
+    assertion_type: Optional[str] = Query(None),
+    tenant_id: Optional[str] = Query(None),
+    limit: int = Query(100, ge=1, le=500),
+    db: Session = Depends(get_db),
+):
+    """List assertions with optional filters."""
+    query = db.query(AssertionDB)
+    if entity_type:
+        query = query.filter(AssertionDB.entity_type == entity_type)
+    if entity_id:
+        query = query.filter(AssertionDB.entity_id == entity_id)
+    if assertion_type:
+        query = query.filter(AssertionDB.assertion_type == assertion_type)
+    if tenant_id:
+        query = query.filter(AssertionDB.tenant_id == tenant_id)
+
+    total = query.count()
+    records = query.order_by(AssertionDB.sentence_index.asc()).limit(limit).all()
+
+    return AssertionListResponse(
+        assertions=[_build_assertion_response(a) for a in records],
+        total=total,
+    )
+
+
+@router.get("/assertions/summary", response_model=AssertionSummaryResponse)
+async def assertion_summary(
+    tenant_id: Optional[str] = Query(None),
+    entity_type: Optional[str] = Query(None),
+    entity_id: Optional[str] = Query(None),
+    db: Session = Depends(get_db),
+):
+    """Summary stats: total assertions, facts, rationale, unverified."""
+    query = db.query(AssertionDB)
+    if tenant_id:
+        query = query.filter(AssertionDB.tenant_id == tenant_id)
+    if entity_type:
+        query = query.filter(AssertionDB.entity_type == entity_type)
+    if entity_id:
+        query = query.filter(AssertionDB.entity_id == entity_id)
+
+    all_records = query.all()
+
+    total = len(all_records)
+    facts = sum(1 for a in all_records if a.assertion_type == "fact")
+    rationale = sum(1 for a in all_records if a.assertion_type == "rationale")
+    unverified = sum(1 for a in all_records if a.assertion_type == "assertion" and not a.verified_by)
+
+    return AssertionSummaryResponse(
+        total_assertions=total,
+        total_facts=facts,
+        total_rationale=rationale,
+        unverified_count=unverified,
+    )
+
+
+@router.get("/assertions/{assertion_id}", response_model=AssertionResponse)
+async def get_assertion(
+    assertion_id: str,
+    db: Session = Depends(get_db),
+):
+    """Get a single assertion by ID."""
+    a = db.query(AssertionDB).filter(AssertionDB.id == assertion_id).first()
+    if not a:
+        raise HTTPException(status_code=404, detail=f"Assertion {assertion_id} not found")
+    return _build_assertion_response(a)
+
+
+@router.put("/assertions/{assertion_id}", response_model=AssertionResponse)
+async def update_assertion(
+    assertion_id: str,
+    data: AssertionUpdate,
+    db: Session = Depends(get_db),
+):
+    """Update an assertion (e.g. link evidence, change type)."""
+    a = db.query(AssertionDB).filter(AssertionDB.id == assertion_id).first()
+    if not a:
+        raise HTTPException(status_code=404, detail=f"Assertion {assertion_id} not found")
+
+    update_fields = data.model_dump(exclude_unset=True)
+    for key, value in update_fields.items():
+        setattr(a, key, value)
+    a.updated_at = datetime.utcnow()
+    db.commit()
+    db.refresh(a)
+    return _build_assertion_response(a)
+
+
+@router.post("/assertions/{assertion_id}/verify", response_model=AssertionResponse)
+async def verify_assertion(
+    assertion_id: str,
+    verified_by: str = Query(...),
+    db: Session = Depends(get_db),
+):
+    """Mark an assertion as verified fact."""
+    a = db.query(AssertionDB).filter(AssertionDB.id == assertion_id).first()
+    if not a:
+        raise HTTPException(status_code=404, detail=f"Assertion {assertion_id} not found")
+
+    a.assertion_type = "fact"
+    a.verified_by = verified_by
+    a.verified_at = datetime.utcnow()
+    a.updated_at = datetime.utcnow()
+    db.commit()
+    db.refresh(a)
+    return _build_assertion_response(a)
+
+
+@router.post("/assertions/extract", response_model=AssertionListResponse)
+async def extract_assertions_endpoint(
+    data: AssertionExtractRequest,
+    tenant_id: Optional[str] = Query(None),
+    db: Session = Depends(get_db),
+):
+    """Extract assertions from free text and persist them."""
+    extracted = extract_assertions(
+        text=data.text,
+        entity_type=data.entity_type,
+        entity_id=data.entity_id,
+        tenant_id=tenant_id,
+    )
+
+    created = []
+    for item in extracted:
+        a = AssertionDB(
+            id=generate_id(),
+            tenant_id=item["tenant_id"],
+            entity_type=item["entity_type"],
+            entity_id=item["entity_id"],
+            sentence_text=item["sentence_text"],
+            sentence_index=item["sentence_index"],
+            assertion_type=item["assertion_type"],
+            evidence_ids=item["evidence_ids"],
+            normative_tier=item.get("normative_tier"),
+            confidence=item.get("confidence", 0.0),
+        )
+        db.add(a)
+        created.append(a)
+
+    db.commit()
+    for a in created:
+        db.refresh(a)
+
+    return AssertionListResponse(
+        assertions=[_build_assertion_response(a) for a in created],
+        total=len(created),
+    )
--- a/backend-compliance/compliance/api/audit_trail_utils.py
+++ b/backend-compliance/compliance/api/audit_trail_utils.py
@@ -0,0 +1,53 @@
+"""Shared audit trail utilities.
+
+Extracted from isms_routes.py for reuse across evidence, control,
+and assertion routes.
+"""
+
+import hashlib
+import uuid
+from datetime import datetime
+
+from sqlalchemy.orm import Session
+
+from ..db.models import AuditTrailDB
+
+
+def generate_id() -> str:
+    """Generate a UUID string."""
+    return str(uuid.uuid4())
+
+
+def create_signature(data: str) -> str:
+    """Create SHA-256 signature."""
+    return hashlib.sha256(data.encode()).hexdigest()
+
+
+def log_audit_trail(
+    db: Session,
+    entity_type: str,
+    entity_id: str,
+    entity_name: str,
+    action: str,
+    performed_by: str,
+    field_changed: str = None,
+    old_value: str = None,
+    new_value: str = None,
+    change_summary: str = None,
+):
+    """Log an entry to the audit trail."""
+    trail = AuditTrailDB(
+        id=generate_id(),
+        entity_type=entity_type,
+        entity_id=entity_id,
+        entity_name=entity_name,
+        action=action,
+        field_changed=field_changed,
+        old_value=old_value,
+        new_value=new_value,
+        change_summary=change_summary,
+        performed_by=performed_by,
+        performed_at=datetime.utcnow(),
+        checksum=create_signature(f"{entity_type}|{entity_id}|{action}|{performed_by}"),
+    )
+    db.add(trail)
--- a/backend-compliance/compliance/api/canonical_control_routes.py
+++ b/backend-compliance/compliance/api/canonical_control_routes.py
--- a/backend-compliance/compliance/api/control_generator_routes.py
+++ b/backend-compliance/compliance/api/control_generator_routes.py
@@ -12,6 +12,7 @@ Endpoints:
  POST /v1/canonical/blocked-sources/cleanup      — Start cleanup workflow
 """

+import asyncio
 import json
 import logging
 from typing import Optional, List
@@ -25,7 +26,16 @@ from compliance.services.control_generator import (
    ControlGeneratorPipeline,
    GeneratorConfig,
    ALL_COLLECTIONS,
+    VALID_CATEGORIES,
+    VALID_DOMAINS,
+    _classify_regulation,
+    _detect_category,
+    _detect_domain,
+    _llm_local,
+    _parse_llm_json,
+    CATEGORY_LIST_STR,
 )
+from compliance.services.citation_backfill import CitationBackfill, BackfillResult
 from compliance.services.rag_client import get_rag_client

 logger = logging.getLogger(__name__)
@@ -40,9 +50,12 @@ class GenerateRequest(BaseModel):
    domain: Optional[str] = None
    collections: Optional[List[str]] = None
    max_controls: int = 50
+    max_chunks: int = 1000  # Default: process max 1000 chunks per job (respects document boundaries)
    batch_size: int = 5
    skip_web_search: bool = False
    dry_run: bool = False
+    regulation_filter: Optional[List[str]] = None  # Only process these regulation_code prefixes
+    skip_prefilter: bool = False  # Skip local LLM pre-filter, send all chunks to API


 class GenerateResponse(BaseModel):
@@ -55,6 +68,7 @@ class GenerateResponse(BaseModel):
    controls_needs_review: int = 0
    controls_too_close: int = 0
    controls_duplicates_found: int = 0
+    controls_qa_fixed: int = 0
    errors: list = []
    controls: list = []

@@ -89,42 +103,111 @@ class BlockedSourceResponse(BaseModel):
 # ENDPOINTS
 # =============================================================================

+async def _run_pipeline_background(config: GeneratorConfig, job_id: str):
+    """Run the pipeline in the background. Uses its own DB session."""
+    db = SessionLocal()
+    try:
+        config.existing_job_id = job_id
+        pipeline = ControlGeneratorPipeline(db=db, rag_client=get_rag_client())
+        result = await pipeline.run(config)
+        logger.info(
+            "Background generation job %s completed: %d controls from %d chunks",
+            job_id, result.controls_generated, result.total_chunks_scanned,
+        )
+    except Exception as e:
+        logger.error("Background generation job %s failed: %s", job_id, e)
+        # Update job as failed
+        try:
+            db.execute(
+                text("""
+                    UPDATE canonical_generation_jobs
+                    SET status = 'failed', errors = :errors, completed_at = NOW()
+                    WHERE id = CAST(:job_id AS uuid)
+                """),
+                {"job_id": job_id, "errors": json.dumps([str(e)])},
+            )
+            db.commit()
+        except Exception:
+            pass
+    finally:
+        db.close()
+
+
@router.post("/generate", response_model=GenerateResponse)
 async def start_generation(req: GenerateRequest):
-    """Start a control generation run."""
+    """Start a control generation run (runs in background).
+
+    Returns immediately with job_id. Use GET /generate/status/{job_id} to poll progress.
+    """
    config = GeneratorConfig(
        collections=req.collections,
        domain=req.domain,
        batch_size=req.batch_size,
        max_controls=req.max_controls,
+        max_chunks=req.max_chunks,
        skip_web_search=req.skip_web_search,
        dry_run=req.dry_run,
+        regulation_filter=req.regulation_filter,
+        skip_prefilter=req.skip_prefilter,
    )

+    if req.dry_run:
+        # Dry run: execute synchronously and return controls
+        db = SessionLocal()
+        try:
+            pipeline = ControlGeneratorPipeline(db=db, rag_client=get_rag_client())
+            result = await pipeline.run(config)
+            return GenerateResponse(
+                job_id=result.job_id,
+                status=result.status,
+                message=f"Dry run: {result.controls_generated} controls from {result.total_chunks_scanned} chunks",
+                total_chunks_scanned=result.total_chunks_scanned,
+                controls_generated=result.controls_generated,
+                controls_verified=result.controls_verified,
+                controls_needs_review=result.controls_needs_review,
+                controls_too_close=result.controls_too_close,
+                controls_duplicates_found=result.controls_duplicates_found,
+                errors=result.errors,
+                controls=result.controls,
+            )
+        except Exception as e:
+            logger.error("Dry run failed: %s", e)
+            raise HTTPException(status_code=500, detail=str(e))
+        finally:
+            db.close()
+
+    # Create job record first so we can return the ID
    db = SessionLocal()
    try:
-        pipeline = ControlGeneratorPipeline(db=db, rag_client=get_rag_client())
-        result = await pipeline.run(config)
-
-        return GenerateResponse(
-            job_id=result.job_id,
-            status=result.status,
-            message=f"Generated {result.controls_generated} controls from {result.total_chunks_scanned} chunks",
-            total_chunks_scanned=result.total_chunks_scanned,
-            controls_generated=result.controls_generated,
-            controls_verified=result.controls_verified,
-            controls_needs_review=result.controls_needs_review,
-            controls_too_close=result.controls_too_close,
-            controls_duplicates_found=result.controls_duplicates_found,
-            errors=result.errors,
-            controls=result.controls if req.dry_run else [],
+        result = db.execute(
+            text("""
+                INSERT INTO canonical_generation_jobs (status, config)
+                VALUES ('running', :config)
+                RETURNING id
+            """),
+            {"config": json.dumps(config.model_dump())},
        )
+        db.commit()
+        row = result.fetchone()
+        job_id = str(row[0]) if row else None
    except Exception as e:
-        logger.error("Generation failed: %s", e)
-        raise HTTPException(status_code=500, detail=str(e))
+        logger.error("Failed to create job: %s", e)
+        raise HTTPException(status_code=500, detail=f"Failed to create job: {e}")
    finally:
        db.close()

+    if not job_id:
+        raise HTTPException(status_code=500, detail="Failed to create job record")
+
+    # Launch pipeline in background
+    asyncio.create_task(_run_pipeline_background(config, job_id))
+
+    return GenerateResponse(
+        job_id=job_id,
+        status="running",
+        message="Generation started in background. Poll /generate/status/{job_id} for progress.",
+    )
+

@router.get("/generate/status/{job_id}")
 async def get_job_status(job_id: str):
@@ -132,7 +215,7 @@ async def get_job_status(job_id: str):
    db = SessionLocal()
    try:
        result = db.execute(
-            text("SELECT * FROM canonical_generation_jobs WHERE id = :id::uuid"),
+            text("SELECT * FROM canonical_generation_jobs WHERE id = CAST(:id AS uuid)"),
            {"id": job_id},
        )
        row = result.fetchone()
@@ -270,6 +353,188 @@ async def review_control(control_id: str, req: ReviewRequest):
        db.close()


+class BulkReviewRequest(BaseModel):
+    release_state: str  # Filter: which controls to bulk-review
+    action: str  # "approve" or "reject"
+    new_state: Optional[str] = None  # Override target state
+
+
+@router.post("/generate/bulk-review")
+async def bulk_review(req: BulkReviewRequest):
+    """Bulk review all controls matching a release_state filter.
+
+    Example: reject all needs_review → sets them to deprecated.
+    """
+    if req.release_state not in ("needs_review", "too_close", "duplicate"):
+        raise HTTPException(status_code=400, detail=f"Invalid filter state: {req.release_state}")
+
+    if req.action == "approve":
+        target = req.new_state or "draft"
+    elif req.action == "reject":
+        target = "deprecated"
+    else:
+        raise HTTPException(status_code=400, detail=f"Unknown action: {req.action}")
+
+    if target not in ("draft", "review", "approved", "deprecated", "needs_review"):
+        raise HTTPException(status_code=400, detail=f"Invalid target state: {target}")
+
+    db = SessionLocal()
+    try:
+        result = db.execute(
+            text("""
+                UPDATE canonical_controls
+                SET release_state = :target, updated_at = NOW()
+                WHERE release_state = :source
+                RETURNING control_id
+            """),
+            {"source": req.release_state, "target": target},
+        )
+        affected = [row[0] for row in result]
+        db.commit()
+
+        return {
+            "action": req.action,
+            "source_state": req.release_state,
+            "target_state": target,
+            "affected_count": len(affected),
+        }
+    finally:
+        db.close()
+
+
+class QAReclassifyRequest(BaseModel):
+    limit: int = 100  # How many controls to reclassify per run
+    dry_run: bool = True  # Preview only by default
+    filter_category: Optional[str] = None  # Only reclassify controls of this category
+    filter_domain_prefix: Optional[str] = None  # Only reclassify controls with this prefix
+
+
+@router.post("/generate/qa-reclassify")
+async def qa_reclassify(req: QAReclassifyRequest):
+    """Run QA reclassification on existing controls using local LLM.
+
+    Finds controls where keyword-detection disagrees with current category/domain,
+    then uses Ollama to determine the correct classification.
+    """
+    db = SessionLocal()
+    try:
+        # Load controls to check
+        where_clauses = ["release_state NOT IN ('deprecated')"]
+        params = {"limit": req.limit}
+        if req.filter_category:
+            where_clauses.append("category = :cat")
+            params["cat"] = req.filter_category
+        if req.filter_domain_prefix:
+            where_clauses.append("control_id LIKE :prefix")
+            params["prefix"] = f"{req.filter_domain_prefix}-%"
+
+        where_sql = " AND ".join(where_clauses)
+        rows = db.execute(
+            text(f"""
+                SELECT id, control_id, title, objective, category,
+                       COALESCE(requirements::text, '[]') as requirements,
+                       COALESCE(source_original_text, '') as source_text
+                FROM canonical_controls
+                WHERE {where_sql}
+                ORDER BY created_at DESC
+                LIMIT :limit
+            """),
+            params,
+        ).fetchall()
+
+        results = {"checked": 0, "mismatches": 0, "fixes": [], "errors": []}
+
+        for row in rows:
+            results["checked"] += 1
+            control_id = row[1]
+            title = row[2]
+            objective = row[3] or ""
+            current_category = row[4]
+            source_text = row[6] or objective
+
+            # Keyword detection on source text
+            kw_category = _detect_category(source_text) or _detect_category(objective)
+            kw_domain = _detect_domain(source_text)
+            current_prefix = control_id.split("-")[0] if "-" in control_id else ""
+
+            # Skip if keyword detection agrees with current classification
+            if kw_category == current_category and kw_domain == current_prefix:
+                continue
+
+            results["mismatches"] += 1
+
+            # Ask Ollama to arbitrate
+            try:
+                reqs_text = ""
+                try:
+                    reqs = json.loads(row[5])
+                    if isinstance(reqs, list):
+                        reqs_text = ", ".join(str(r) for r in reqs[:3])
+                except Exception:
+                    pass
+
+                prompt = f"""Pruefe dieses Compliance-Control auf korrekte Klassifizierung.
+
+Titel: {title[:100]}
+Ziel: {objective[:200]}
+Anforderungen: {reqs_text[:200]}
+
+Aktuelle Zuordnung: domain={current_prefix}, category={current_category}
+Keyword-Erkennung: domain={kw_domain}, category={kw_category}
+
+Welche Zuordnung ist korrekt? Antworte NUR als JSON:
+{{"domain": "KUERZEL", "category": "kategorie_name", "reason": "kurze Begruendung"}}
+
+Domains: AUTH=Authentifizierung, CRYP=Kryptographie, NET=Netzwerk, DATA=Datenschutz, LOG=Logging, ACC=Zugriffskontrolle, SEC=IT-Sicherheit, INC=Vorfallmanagement, AI=KI, COMP=Compliance, GOV=Behoerden, LAB=Arbeitsrecht, FIN=Finanzregulierung, TRD=Gewerbe, ENV=Umwelt, HLT=Gesundheit
+Kategorien: {CATEGORY_LIST_STR}"""
+
+                raw = await _llm_local(prompt)
+                data = _parse_llm_json(raw)
+                if not data:
+                    continue
+
+                qa_domain = data.get("domain", "").upper()
+                qa_category = data.get("category", "")
+                reason = data.get("reason", "")
+
+                fix_entry = {
+                    "control_id": control_id,
+                    "title": title[:80],
+                    "old_category": current_category,
+                    "old_domain": current_prefix,
+                    "new_category": qa_category if qa_category in VALID_CATEGORIES else current_category,
+                    "new_domain": qa_domain if qa_domain in VALID_DOMAINS else current_prefix,
+                    "reason": reason,
+                }
+
+                category_changed = qa_category in VALID_CATEGORIES and qa_category != current_category
+
+                if category_changed and not req.dry_run:
+                    db.execute(
+                        text("""
+                            UPDATE canonical_controls
+                            SET category = :category, updated_at = NOW()
+                            WHERE id = :id
+                        """),
+                        {"id": row[0], "category": qa_category},
+                    )
+                    fix_entry["applied"] = True
+                else:
+                    fix_entry["applied"] = False
+
+                results["fixes"].append(fix_entry)
+
+            except Exception as e:
+                results["errors"].append({"control_id": control_id, "error": str(e)})
+
+        if not req.dry_run:
+            db.commit()
+
+        return results
+    finally:
+        db.close()
+
+
@router.get("/generate/processed-stats")
 async def get_processed_stats():
    """Get processing statistics per collection."""
@@ -429,3 +694,407 @@ async def get_controls_customer_view(
        return {"controls": controls, "total": len(controls)}
    finally:
        db.close()
+
+
+# =============================================================================
+# CITATION BACKFILL
+# =============================================================================
+
+class BackfillRequest(BaseModel):
+    dry_run: bool = True  # Default to dry_run for safety
+    limit: int = 0  # 0 = all controls
+
+
+class BackfillResponse(BaseModel):
+    status: str
+    total_controls: int = 0
+    matched_hash: int = 0
+    matched_regex: int = 0
+    matched_llm: int = 0
+    unmatched: int = 0
+    updated: int = 0
+    errors: list = []
+
+
+_backfill_status: dict = {}
+
+
+async def _run_backfill_background(dry_run: bool, limit: int, backfill_id: str):
+    """Run backfill in background with own DB session."""
+    db = SessionLocal()
+    try:
+        backfill = CitationBackfill(db=db, rag_client=get_rag_client())
+        result = await backfill.run(dry_run=dry_run, limit=limit)
+        _backfill_status[backfill_id] = {
+            "status": "completed",
+            "total_controls": result.total_controls,
+            "matched_hash": result.matched_hash,
+            "matched_regex": result.matched_regex,
+            "matched_llm": result.matched_llm,
+            "unmatched": result.unmatched,
+            "updated": result.updated,
+            "errors": result.errors[:50],
+        }
+        logger.info("Backfill %s completed: %d updated", backfill_id, result.updated)
+    except Exception as e:
+        logger.error("Backfill %s failed: %s", backfill_id, e)
+        _backfill_status[backfill_id] = {"status": "failed", "errors": [str(e)]}
+    finally:
+        db.close()
+
+
+@router.post("/generate/backfill-citations", response_model=BackfillResponse)
+async def start_backfill(req: BackfillRequest):
+    """Backfill article/paragraph into existing control source_citations.
+
+    Uses 3-tier matching: hash lookup → regex parse → Ollama LLM.
+    Default is dry_run=True (preview only, no DB changes).
+    """
+    import uuid
+    backfill_id = str(uuid.uuid4())[:8]
+    _backfill_status[backfill_id] = {"status": "running"}
+
+    # Always run in background (RAG index build takes minutes)
+    asyncio.create_task(_run_backfill_background(req.dry_run, req.limit, backfill_id))
+    return BackfillResponse(
+        status=f"running (id={backfill_id})",
+    )
+
+
+@router.get("/generate/backfill-status/{backfill_id}")
+async def get_backfill_status(backfill_id: str):
+    """Get status of a backfill job."""
+    status = _backfill_status.get(backfill_id)
+    if not status:
+        raise HTTPException(status_code=404, detail="Backfill job not found")
+    return status
+
+
+# =============================================================================
+# DOMAIN + TARGET AUDIENCE BACKFILL
+# =============================================================================
+
+class DomainBackfillRequest(BaseModel):
+    dry_run: bool = True
+    job_id: Optional[str] = None  # Only backfill controls from this job
+    limit: int = 0  # 0 = all
+
+_domain_backfill_status: dict = {}
+
+
+async def _run_domain_backfill(req: DomainBackfillRequest, backfill_id: str):
+    """Backfill domain, category, and target_audience for existing controls using Anthropic."""
+    import os
+    import httpx
+
+    ANTHROPIC_API_KEY = os.getenv("ANTHROPIC_API_KEY", "")
+    ANTHROPIC_MODEL = os.getenv("CONTROL_GEN_ANTHROPIC_MODEL", "claude-sonnet-4-6")
+
+    if not ANTHROPIC_API_KEY:
+        _domain_backfill_status[backfill_id] = {
+            "status": "failed", "error": "ANTHROPIC_API_KEY not set"
+        }
+        return
+
+    db = SessionLocal()
+    try:
+        # Find controls needing backfill
+        where_clauses = ["(target_audience IS NULL OR target_audience = '[]' OR target_audience = 'null')"]
+        params: dict = {}
+        if req.job_id:
+            where_clauses.append("generation_metadata->>'job_id' = :job_id")
+            params["job_id"] = req.job_id
+
+        query = f"""
+            SELECT id, control_id, title, objective, category, source_original_text, tags
+            FROM canonical_controls
+            WHERE {' AND '.join(where_clauses)}
+            ORDER BY control_id
+        """
+        if req.limit > 0:
+            query += f" LIMIT {req.limit}"
+
+        result = db.execute(text(query), params)
+        controls = [dict(zip(result.keys(), row)) for row in result]
+
+        total = len(controls)
+        updated = 0
+        errors = []
+
+        _domain_backfill_status[backfill_id] = {
+            "status": "running", "total": total, "updated": 0, "errors": []
+        }
+
+        # Process in batches of 10
+        BATCH_SIZE = 10
+        for batch_start in range(0, total, BATCH_SIZE):
+            batch = controls[batch_start:batch_start + BATCH_SIZE]
+
+            entries = []
+            for idx, ctrl in enumerate(batch):
+                text_for_analysis = ctrl.get("objective") or ctrl.get("title") or ""
+                original = ctrl.get("source_original_text") or ""
+                if original:
+                    text_for_analysis += f"\n\nQuelltext-Auszug: {original[:500]}"
+                entries.append(
+                    f"--- CONTROL {idx + 1}: {ctrl['control_id']} ---\n"
+                    f"Titel: {ctrl.get('title', '')}\n"
+                    f"Objective: {text_for_analysis[:800]}\n"
+                    f"Tags: {json.dumps(ctrl.get('tags', []))}"
+                )
+
+            prompt = f"""Analysiere die folgenden {len(batch)} Controls und bestimme fuer jedes:
+1. domain: Das Fachgebiet (AUTH, CRYP, NET, DATA, LOG, ACC, SEC, INC, AI, COMP, GOV, LAB, FIN, TRD, ENV, HLT)
+2. category: Die Kategorie (encryption, authentication, network, data_protection, logging, incident, continuity, compliance, supply_chain, physical, personnel, application, system, risk, governance, hardware, identity, public_administration, labor_law, finance, trade_regulation, environmental, health)
+3. target_audience: Liste der Zielgruppen (moegliche Werte: "unternehmen", "behoerden", "entwickler", "datenschutzbeauftragte", "geschaeftsfuehrung", "it-abteilung", "rechtsabteilung", "compliance-officer", "personalwesen", "einkauf", "produktion", "vertrieb", "gesundheitswesen", "finanzwesen", "oeffentlicher_dienst")
+
+Antworte mit einem JSON-Array mit {len(batch)} Objekten. Jedes Objekt hat:
+- control_index: 1-basierter Index
+- domain: Fachgebiet-Kuerzel
+- category: Kategorie
+- target_audience: Liste der Zielgruppen
+
+{"".join(entries)}"""
+
+            try:
+                headers = {
+                    "x-api-key": ANTHROPIC_API_KEY,
+                    "anthropic-version": "2023-06-01",
+                    "content-type": "application/json",
+                }
+                payload = {
+                    "model": ANTHROPIC_MODEL,
+                    "max_tokens": 4096,
+                    "system": "Du bist ein Compliance-Experte. Klassifiziere Controls nach Fachgebiet und Zielgruppe. Antworte NUR mit validem JSON.",
+                    "messages": [{"role": "user", "content": prompt}],
+                }
+
+                async with httpx.AsyncClient(timeout=60.0) as client:
+                    resp = await client.post(
+                        "https://api.anthropic.com/v1/messages",
+                        headers=headers,
+                        json=payload,
+                    )
+                    if resp.status_code != 200:
+                        errors.append(f"Anthropic API {resp.status_code} at batch {batch_start}")
+                        continue
+
+                    raw = resp.json().get("content", [{}])[0].get("text", "")
+
+                # Parse response
+                import re
+                bracket_match = re.search(r"\[.*\]", raw, re.DOTALL)
+                if not bracket_match:
+                    errors.append(f"No JSON array in response at batch {batch_start}")
+                    continue
+
+                results_list = json.loads(bracket_match.group(0))
+
+                for item in results_list:
+                    idx = item.get("control_index", 0) - 1
+                    if idx < 0 or idx >= len(batch):
+                        continue
+                    ctrl = batch[idx]
+                    ctrl_id = str(ctrl["id"])
+
+                    new_domain = item.get("domain", "")
+                    new_category = item.get("category", "")
+                    new_audience = item.get("target_audience", [])
+
+                    if not isinstance(new_audience, list):
+                        new_audience = []
+
+                    # Build new control_id from domain if domain changed
+                    old_prefix = ctrl["control_id"].split("-")[0] if ctrl["control_id"] else ""
+                    new_prefix = new_domain.upper()[:4] if new_domain else old_prefix
+
+                    if not req.dry_run:
+                        update_parts = []
+                        update_params: dict = {"ctrl_id": ctrl_id}
+
+                        if new_category:
+                            update_parts.append("category = :category")
+                            update_params["category"] = new_category
+
+                        if new_audience:
+                            update_parts.append("target_audience = :target_audience")
+                            update_params["target_audience"] = json.dumps(new_audience)
+
+                        # Note: We do NOT rename control_ids here — that would
+                        # break references and cause unique constraint violations.
+
+                        if update_parts:
+                            update_parts.append("updated_at = NOW()")
+                            db.execute(
+                                text(f"UPDATE canonical_controls SET {', '.join(update_parts)} WHERE id = CAST(:ctrl_id AS uuid)"),
+                                update_params,
+                            )
+                            updated += 1
+
+                if not req.dry_run:
+                    db.commit()
+
+            except Exception as e:
+                errors.append(f"Batch {batch_start}: {str(e)}")
+                db.rollback()
+
+            _domain_backfill_status[backfill_id] = {
+                "status": "running", "total": total, "updated": updated,
+                "progress": f"{min(batch_start + BATCH_SIZE, total)}/{total}",
+                "errors": errors[-10:],
+            }
+
+        _domain_backfill_status[backfill_id] = {
+            "status": "completed", "total": total, "updated": updated,
+            "errors": errors[-50:],
+        }
+        logger.info("Domain backfill %s completed: %d/%d updated", backfill_id, updated, total)
+
+    except Exception as e:
+        logger.error("Domain backfill %s failed: %s", backfill_id, e)
+        _domain_backfill_status[backfill_id] = {"status": "failed", "error": str(e)}
+    finally:
+        db.close()
+
+
+@router.post("/generate/backfill-domain")
+async def start_domain_backfill(req: DomainBackfillRequest):
+    """Backfill domain, category, and target_audience for controls using Anthropic API.
+
+    Finds controls where target_audience is NULL and enriches them.
+    Default is dry_run=True (preview only).
+    """
+    import uuid
+    backfill_id = str(uuid.uuid4())[:8]
+    _domain_backfill_status[backfill_id] = {"status": "starting"}
+    asyncio.create_task(_run_domain_backfill(req, backfill_id))
+    return {"status": "running", "backfill_id": backfill_id,
+            "message": f"Domain backfill started. Poll /generate/backfill-status/{backfill_id}"}
+
+
+@router.get("/generate/domain-backfill-status/{backfill_id}")
+async def get_domain_backfill_status(backfill_id: str):
+    """Get status of a domain backfill job."""
+    status = _domain_backfill_status.get(backfill_id)
+    if not status:
+        raise HTTPException(status_code=404, detail="Domain backfill job not found")
+    return status
+
+
+# ---------------------------------------------------------------------------
+# Source-Type Backfill — Classify law vs guideline vs standard vs restricted
+# ---------------------------------------------------------------------------
+
+class SourceTypeBackfillRequest(BaseModel):
+    dry_run: bool = True
+
+
+_source_type_backfill_status: dict = {}
+
+
+async def _run_source_type_backfill(dry_run: bool, backfill_id: str):
+    """Backfill source_type into source_citation JSONB for all controls."""
+    db = SessionLocal()
+    try:
+        # Find controls with source_citation that lack source_type
+        rows = db.execute(text("""
+            SELECT control_id, source_citation, generation_metadata
+            FROM compliance.canonical_controls
+            WHERE source_citation IS NOT NULL
+              AND (source_citation->>'source_type' IS NULL
+                   OR source_citation->>'source_type' = '')
+        """)).fetchall()
+
+        total = len(rows)
+        updated = 0
+        already_correct = 0
+        errors = []
+
+        _source_type_backfill_status[backfill_id] = {
+            "status": "running", "total": total, "updated": 0, "dry_run": dry_run,
+        }
+
+        for row in rows:
+            cid = row[0]
+            citation = row[1] if isinstance(row[1], dict) else json.loads(row[1] or "{}")
+            metadata = row[2] if isinstance(row[2], dict) else json.loads(row[2] or "{}")
+
+            # Get regulation_code from metadata
+            reg_code = metadata.get("source_regulation", "")
+            if not reg_code:
+                # Try to infer from source name
+                errors.append(f"{cid}: no source_regulation in metadata")
+                continue
+
+            # Classify
+            license_info = _classify_regulation(reg_code)
+            source_type = license_info.get("source_type", "restricted")
+
+            # Update citation
+            citation["source_type"] = source_type
+
+            if not dry_run:
+                db.execute(text("""
+                    UPDATE compliance.canonical_controls
+                    SET source_citation = :citation
+                    WHERE control_id = :cid
+                """), {"citation": json.dumps(citation), "cid": cid})
+                if updated % 100 == 0:
+                    db.commit()
+            updated += 1
+
+        if not dry_run:
+            db.commit()
+
+        # Count distribution
+        dist_query = db.execute(text("""
+            SELECT source_citation->>'source_type' as st, COUNT(*)
+            FROM compliance.canonical_controls
+            WHERE source_citation IS NOT NULL
+              AND source_citation->>'source_type' IS NOT NULL
+            GROUP BY st
+        """)).fetchall() if not dry_run else []
+
+        distribution = {r[0]: r[1] for r in dist_query}
+
+        _source_type_backfill_status[backfill_id] = {
+            "status": "completed", "total": total, "updated": updated,
+            "dry_run": dry_run, "distribution": distribution,
+            "errors": errors[:50],
+        }
+        logger.info("Source-type backfill %s completed: %d/%d updated (dry_run=%s)",
+                     backfill_id, updated, total, dry_run)
+
+    except Exception as e:
+        logger.error("Source-type backfill %s failed: %s", backfill_id, e)
+        _source_type_backfill_status[backfill_id] = {"status": "failed", "error": str(e)}
+    finally:
+        db.close()
+
+
+@router.post("/generate/backfill-source-type")
+async def start_source_type_backfill(req: SourceTypeBackfillRequest):
+    """Backfill source_type (law/guideline/standard/restricted) into source_citation JSONB.
+
+    Classifies each control's source as binding law, authority guideline,
+    voluntary standard, or restricted norm based on regulation_code.
+    Default is dry_run=True (preview only).
+    """
+    import uuid
+    backfill_id = str(uuid.uuid4())[:8]
+    _source_type_backfill_status[backfill_id] = {"status": "starting"}
+    asyncio.create_task(_run_source_type_backfill(req.dry_run, backfill_id))
+    return {
+        "status": "running",
+        "backfill_id": backfill_id,
+        "message": f"Source-type backfill started. Poll /generate/source-type-backfill-status/{backfill_id}",
+    }
+
+
+@router.get("/generate/source-type-backfill-status/{backfill_id}")
+async def get_source_type_backfill_status(backfill_id: str):
+    """Get status of a source-type backfill job."""
+    status = _source_type_backfill_status.get(backfill_id)
+    if not status:
+        raise HTTPException(status_code=404, detail="Source-type backfill job not found")
+    return status
--- a/backend-compliance/compliance/api/crosswalk_routes.py
+++ b/backend-compliance/compliance/api/crosswalk_routes.py
@@ -0,0 +1,856 @@
+"""
+FastAPI routes for the Multi-Layer Control Architecture.
+
+Pattern Library, Obligation Extraction, Crosswalk Matrix, and Migration endpoints.
+
+Endpoints:
+  GET  /v1/canonical/patterns                          — All patterns (with filters)
+  GET  /v1/canonical/patterns/{pattern_id}             — Single pattern
+  GET  /v1/canonical/patterns/{pattern_id}/controls    — Controls for a pattern
+
+  POST /v1/canonical/obligations/extract               — Extract obligations from text
+  GET  /v1/canonical/crosswalk                         — Query crosswalk matrix
+  GET  /v1/canonical/crosswalk/stats                   — Coverage statistics
+
+  POST /v1/canonical/migrate/decompose                 — Pass 0a: Obligation extraction
+  POST /v1/canonical/migrate/merge-obligations         — Merge implementation-level dupes
+  POST /v1/canonical/migrate/enrich-obligations        — Add trigger_type, impl metadata
+  POST /v1/canonical/migrate/compose-atomic            — Pass 0b: Atomic control composition
+  POST /v1/canonical/migrate/link-obligations          — Pass 1: Obligation linkage
+  POST /v1/canonical/migrate/classify-patterns         — Pass 2: Pattern classification
+  POST /v1/canonical/migrate/triage                    — Pass 3: Quality triage
+  POST /v1/canonical/migrate/backfill-crosswalk        — Pass 4: Crosswalk backfill
+  POST /v1/canonical/migrate/deduplicate               — Pass 5: Deduplication
+  GET  /v1/canonical/migrate/status                    — Migration progress
+  GET  /v1/canonical/migrate/decomposition-status      — Decomposition progress
+"""
+
+import json
+import logging
+from typing import Optional, List
+
+from fastapi import APIRouter, HTTPException, Query
+from pydantic import BaseModel
+from sqlalchemy import text
+
+from database import SessionLocal
+
+logger = logging.getLogger(__name__)
+router = APIRouter(prefix="/v1/canonical", tags=["crosswalk"])
+
+
+# =============================================================================
+# REQUEST / RESPONSE MODELS
+# =============================================================================
+
+
+class PatternResponse(BaseModel):
+    id: str
+    name: str
+    name_de: str
+    domain: str
+    category: str
+    description: str
+    objective_template: str
+    severity_default: str
+    implementation_effort_default: str = "m"
+    tags: list = []
+    composable_with: list = []
+    open_anchor_refs: list = []
+    controls_count: int = 0
+
+
+class PatternListResponse(BaseModel):
+    patterns: List[PatternResponse]
+    total: int
+
+
+class PatternDetailResponse(PatternResponse):
+    rationale_template: str = ""
+    requirements_template: list = []
+    test_procedure_template: list = []
+    evidence_template: list = []
+    obligation_match_keywords: list = []
+
+
+class ObligationExtractRequest(BaseModel):
+    text: str
+    regulation_code: Optional[str] = None
+    article: Optional[str] = None
+    paragraph: Optional[str] = None
+
+
+class ObligationExtractResponse(BaseModel):
+    obligation_id: Optional[str] = None
+    obligation_title: Optional[str] = None
+    obligation_text: Optional[str] = None
+    method: str = "none"
+    confidence: float = 0.0
+    regulation_id: Optional[str] = None
+    pattern_id: Optional[str] = None
+    pattern_confidence: float = 0.0
+
+
+class CrosswalkRow(BaseModel):
+    regulation_code: str = ""
+    article: Optional[str] = None
+    obligation_id: Optional[str] = None
+    pattern_id: Optional[str] = None
+    master_control_id: Optional[str] = None
+    confidence: float = 0.0
+    source: str = "auto"
+
+
+class CrosswalkQueryResponse(BaseModel):
+    rows: List[CrosswalkRow]
+    total: int
+
+
+class CrosswalkStatsResponse(BaseModel):
+    total_rows: int = 0
+    regulations_covered: int = 0
+    obligations_linked: int = 0
+    patterns_used: int = 0
+    controls_linked: int = 0
+    coverage_by_regulation: dict = {}
+
+
+class MigrationRequest(BaseModel):
+    limit: int = 0  # 0 = no limit
+    batch_size: int = 0  # 0 = auto (5 for Anthropic, 1 for Ollama)
+    use_anthropic: bool = False  # Use Anthropic API instead of Ollama
+    category_filter: Optional[str] = None  # Comma-separated categories
+    source_filter: Optional[str] = None  # Comma-separated source regulations (ILIKE match)
+
+
+class BatchSubmitRequest(BaseModel):
+    limit: int = 0
+    batch_size: int = 5
+    category_filter: Optional[str] = None
+    source_filter: Optional[str] = None
+
+
+class BatchProcessRequest(BaseModel):
+    batch_id: str
+    pass_type: str = "0a"  # "0a" or "0b"
+
+
+class MigrationResponse(BaseModel):
+    status: str = "completed"
+    stats: dict = {}
+
+
+class MigrationStatusResponse(BaseModel):
+    total_controls: int = 0
+    has_obligation: int = 0
+    has_pattern: int = 0
+    fully_linked: int = 0
+    deprecated: int = 0
+    coverage_obligation_pct: float = 0.0
+    coverage_pattern_pct: float = 0.0
+    coverage_full_pct: float = 0.0
+
+
+class DecompositionStatusResponse(BaseModel):
+    rich_controls: int = 0
+    decomposed_controls: int = 0
+    total_candidates: int = 0
+    validated: int = 0
+    rejected: int = 0
+    composed: int = 0
+    atomic_controls: int = 0
+    merged: int = 0
+    enriched: int = 0
+    ready_for_pass0b: int = 0
+    decomposition_pct: float = 0.0
+    composition_pct: float = 0.0
+
+
+# =============================================================================
+# PATTERN LIBRARY ENDPOINTS
+# =============================================================================
+
+
+@router.get("/patterns", response_model=PatternListResponse)
+async def list_patterns(
+    domain: Optional[str] = Query(None, description="Filter by domain (e.g. AUTH, CRYP)"),
+    category: Optional[str] = Query(None, description="Filter by category"),
+    tag: Optional[str] = Query(None, description="Filter by tag"),
+):
+    """List all control patterns with optional filters."""
+    from compliance.services.pattern_matcher import PatternMatcher
+
+    matcher = PatternMatcher()
+    matcher._load_patterns()
+    matcher._build_keyword_index()
+
+    patterns = matcher._patterns
+
+    if domain:
+        patterns = [p for p in patterns if p.domain == domain.upper()]
+    if category:
+        patterns = [p for p in patterns if p.category == category.lower()]
+    if tag:
+        patterns = [p for p in patterns if tag.lower() in [t.lower() for t in p.tags]]
+
+    # Count controls per pattern from DB
+    control_counts = _get_pattern_control_counts()
+
+    response_patterns = []
+    for p in patterns:
+        response_patterns.append(PatternResponse(
+            id=p.id,
+            name=p.name,
+            name_de=p.name_de,
+            domain=p.domain,
+            category=p.category,
+            description=p.description,
+            objective_template=p.objective_template,
+            severity_default=p.severity_default,
+            implementation_effort_default=p.implementation_effort_default,
+            tags=p.tags,
+            composable_with=p.composable_with,
+            open_anchor_refs=p.open_anchor_refs,
+            controls_count=control_counts.get(p.id, 0),
+        ))
+
+    return PatternListResponse(patterns=response_patterns, total=len(response_patterns))
+
+
+@router.get("/patterns/{pattern_id}", response_model=PatternDetailResponse)
+async def get_pattern(pattern_id: str):
+    """Get a single control pattern by ID."""
+    from compliance.services.pattern_matcher import PatternMatcher
+
+    matcher = PatternMatcher()
+    matcher._load_patterns()
+
+    pattern = matcher.get_pattern(pattern_id)
+    if not pattern:
+        raise HTTPException(status_code=404, detail=f"Pattern {pattern_id} not found")
+
+    control_counts = _get_pattern_control_counts()
+
+    return PatternDetailResponse(
+        id=pattern.id,
+        name=pattern.name,
+        name_de=pattern.name_de,
+        domain=pattern.domain,
+        category=pattern.category,
+        description=pattern.description,
+        objective_template=pattern.objective_template,
+        rationale_template=pattern.rationale_template,
+        requirements_template=pattern.requirements_template,
+        test_procedure_template=pattern.test_procedure_template,
+        evidence_template=pattern.evidence_template,
+        severity_default=pattern.severity_default,
+        implementation_effort_default=pattern.implementation_effort_default,
+        tags=pattern.tags,
+        composable_with=pattern.composable_with,
+        open_anchor_refs=pattern.open_anchor_refs,
+        obligation_match_keywords=pattern.obligation_match_keywords,
+        controls_count=control_counts.get(pattern.id, 0),
+    )
+
+
+@router.get("/patterns/{pattern_id}/controls")
+async def get_pattern_controls(
+    pattern_id: str,
+    limit: int = Query(50, ge=1, le=500),
+    offset: int = Query(0, ge=0),
+):
+    """Get controls generated from a specific pattern."""
+    db = SessionLocal()
+    try:
+        result = db.execute(
+            text("""
+                SELECT id, control_id, title, objective, severity,
+                       release_state, category, obligation_ids
+                FROM canonical_controls
+                WHERE pattern_id = :pattern_id
+                  AND release_state NOT IN ('deprecated')
+                ORDER BY control_id
+                LIMIT :limit OFFSET :offset
+            """),
+            {"pattern_id": pattern_id.upper(), "limit": limit, "offset": offset},
+        )
+        rows = result.fetchall()
+
+        count_result = db.execute(
+            text("""
+                SELECT count(*) FROM canonical_controls
+                WHERE pattern_id = :pattern_id
+                  AND release_state NOT IN ('deprecated')
+            """),
+            {"pattern_id": pattern_id.upper()},
+        )
+        total = count_result.fetchone()[0]
+
+        controls = []
+        for row in rows:
+            obl_ids = row[7]
+            if isinstance(obl_ids, str):
+                try:
+                    obl_ids = json.loads(obl_ids)
+                except (json.JSONDecodeError, TypeError):
+                    obl_ids = []
+            controls.append({
+                "id": str(row[0]),
+                "control_id": row[1],
+                "title": row[2],
+                "objective": row[3],
+                "severity": row[4],
+                "release_state": row[5],
+                "category": row[6],
+                "obligation_ids": obl_ids or [],
+            })
+
+        return {"controls": controls, "total": total}
+    finally:
+        db.close()
+
+
+# =============================================================================
+# OBLIGATION EXTRACTION ENDPOINT
+# =============================================================================
+
+
+@router.post("/obligations/extract", response_model=ObligationExtractResponse)
+async def extract_obligation(req: ObligationExtractRequest):
+    """Extract obligation from text using 3-tier strategy, then match to pattern."""
+    from compliance.services.obligation_extractor import ObligationExtractor
+    from compliance.services.pattern_matcher import PatternMatcher
+
+    extractor = ObligationExtractor()
+    await extractor.initialize()
+
+    obligation = await extractor.extract(
+        chunk_text=req.text,
+        regulation_code=req.regulation_code or "",
+        article=req.article,
+        paragraph=req.paragraph,
+    )
+
+    # Also match to pattern
+    matcher = PatternMatcher()
+    matcher._load_patterns()
+    matcher._build_keyword_index()
+
+    pattern_text = obligation.obligation_text or obligation.obligation_title or req.text[:500]
+    pattern_result = matcher._tier1_keyword(pattern_text, obligation.regulation_id)
+
+    return ObligationExtractResponse(
+        obligation_id=obligation.obligation_id,
+        obligation_title=obligation.obligation_title,
+        obligation_text=obligation.obligation_text,
+        method=obligation.method,
+        confidence=obligation.confidence,
+        regulation_id=obligation.regulation_id,
+        pattern_id=pattern_result.pattern_id if pattern_result else None,
+        pattern_confidence=pattern_result.confidence if pattern_result else 0,
+    )
+
+
+# =============================================================================
+# CROSSWALK MATRIX ENDPOINTS
+# =============================================================================
+
+
+@router.get("/crosswalk", response_model=CrosswalkQueryResponse)
+async def query_crosswalk(
+    regulation_code: Optional[str] = Query(None),
+    article: Optional[str] = Query(None),
+    obligation_id: Optional[str] = Query(None),
+    pattern_id: Optional[str] = Query(None),
+    limit: int = Query(100, ge=1, le=1000),
+    offset: int = Query(0, ge=0),
+):
+    """Query the crosswalk matrix with filters."""
+    db = SessionLocal()
+    try:
+        conditions = ["1=1"]
+        params = {"limit": limit, "offset": offset}
+
+        if regulation_code:
+            conditions.append("regulation_code = :reg")
+            params["reg"] = regulation_code
+        if article:
+            conditions.append("article = :art")
+            params["art"] = article
+        if obligation_id:
+            conditions.append("obligation_id = :obl")
+            params["obl"] = obligation_id
+        if pattern_id:
+            conditions.append("pattern_id = :pat")
+            params["pat"] = pattern_id
+
+        where = " AND ".join(conditions)
+
+        result = db.execute(
+            text(f"""
+                SELECT regulation_code, article, obligation_id,
+                       pattern_id, master_control_id, confidence, source
+                FROM crosswalk_matrix
+                WHERE {where}
+                ORDER BY regulation_code, article
+                LIMIT :limit OFFSET :offset
+            """),
+            params,
+        )
+        rows = result.fetchall()
+
+        count_result = db.execute(
+            text(f"SELECT count(*) FROM crosswalk_matrix WHERE {where}"),
+            params,
+        )
+        total = count_result.fetchone()[0]
+
+        crosswalk_rows = [
+            CrosswalkRow(
+                regulation_code=r[0] or "",
+                article=r[1],
+                obligation_id=r[2],
+                pattern_id=r[3],
+                master_control_id=r[4],
+                confidence=float(r[5] or 0),
+                source=r[6] or "auto",
+            )
+            for r in rows
+        ]
+
+        return CrosswalkQueryResponse(rows=crosswalk_rows, total=total)
+    finally:
+        db.close()
+
+
+@router.get("/crosswalk/stats", response_model=CrosswalkStatsResponse)
+async def crosswalk_stats():
+    """Get crosswalk coverage statistics."""
+    db = SessionLocal()
+    try:
+        row = db.execute(text("""
+            SELECT
+                count(*) AS total,
+                count(DISTINCT regulation_code) FILTER (WHERE regulation_code != '') AS regs,
+                count(DISTINCT obligation_id) FILTER (WHERE obligation_id IS NOT NULL) AS obls,
+                count(DISTINCT pattern_id) FILTER (WHERE pattern_id IS NOT NULL) AS pats,
+                count(DISTINCT master_control_id) FILTER (WHERE master_control_id IS NOT NULL) AS ctrls
+            FROM crosswalk_matrix
+        """)).fetchone()
+
+        # Coverage by regulation
+        reg_rows = db.execute(text("""
+            SELECT regulation_code, count(*) AS cnt
+            FROM crosswalk_matrix
+            WHERE regulation_code != ''
+            GROUP BY regulation_code
+            ORDER BY cnt DESC
+        """)).fetchall()
+
+        coverage = {r[0]: r[1] for r in reg_rows}
+
+        return CrosswalkStatsResponse(
+            total_rows=row[0],
+            regulations_covered=row[1],
+            obligations_linked=row[2],
+            patterns_used=row[3],
+            controls_linked=row[4],
+            coverage_by_regulation=coverage,
+        )
+    finally:
+        db.close()
+
+
+# =============================================================================
+# MIGRATION ENDPOINTS
+# =============================================================================
+
+
+@router.post("/migrate/decompose", response_model=MigrationResponse)
+async def migrate_decompose(req: MigrationRequest):
+    """Pass 0a: Extract obligation candidates from rich controls.
+
+    With use_anthropic=true, uses Anthropic API with prompt caching
+    and content batching (multiple controls per API call).
+    """
+    from compliance.services.decomposition_pass import DecompositionPass
+
+    db = SessionLocal()
+    try:
+        decomp = DecompositionPass(db=db)
+        stats = await decomp.run_pass0a(
+            limit=req.limit,
+            batch_size=req.batch_size,
+            use_anthropic=req.use_anthropic,
+            category_filter=req.category_filter,
+            source_filter=req.source_filter,
+        )
+        return MigrationResponse(status="completed", stats=stats)
+    except Exception as e:
+        logger.error("Decomposition pass 0a failed: %s", e)
+        raise HTTPException(status_code=500, detail=str(e))
+    finally:
+        db.close()
+
+
+@router.post("/migrate/merge-obligations", response_model=MigrationResponse)
+async def migrate_merge_obligations():
+    """Merge implementation-level duplicate obligations within each parent.
+
+    Run AFTER Pass 0a, BEFORE Pass 0b. No LLM calls — rule-based.
+    Merges obligations that share similar action+object into the more
+    abstract survivor, marking the concrete duplicate as 'merged'.
+    """
+    from compliance.services.decomposition_pass import DecompositionPass
+
+    db = SessionLocal()
+    try:
+        decomp = DecompositionPass(db=db)
+        stats = decomp.run_merge_pass()
+        return MigrationResponse(status="completed", stats=stats)
+    except Exception as e:
+        logger.error("Merge pass failed: %s", e)
+        raise HTTPException(status_code=500, detail=str(e))
+    finally:
+        db.close()
+
+
+@router.post("/migrate/enrich-obligations", response_model=MigrationResponse)
+async def migrate_enrich_obligations():
+    """Add trigger_type and is_implementation_specific metadata.
+
+    Run AFTER merge pass, BEFORE Pass 0b. No LLM calls — rule-based.
+    Classifies trigger_type (event/periodic/continuous) from obligation text
+    and detects implementation-specific obligations (concrete tools/protocols).
+    """
+    from compliance.services.decomposition_pass import DecompositionPass
+
+    db = SessionLocal()
+    try:
+        decomp = DecompositionPass(db=db)
+        stats = decomp.enrich_obligations()
+        return MigrationResponse(status="completed", stats=stats)
+    except Exception as e:
+        logger.error("Enrich pass failed: %s", e)
+        raise HTTPException(status_code=500, detail=str(e))
+    finally:
+        db.close()
+
+
+@router.post("/migrate/compose-atomic", response_model=MigrationResponse)
+async def migrate_compose_atomic(req: MigrationRequest):
+    """Pass 0b: Compose atomic controls from obligation candidates.
+
+    With use_anthropic=true, uses Anthropic API with prompt caching
+    and content batching (multiple obligations per API call).
+    """
+    from compliance.services.decomposition_pass import DecompositionPass
+
+    db = SessionLocal()
+    try:
+        decomp = DecompositionPass(db=db)
+        stats = await decomp.run_pass0b(
+            limit=req.limit,
+            batch_size=req.batch_size,
+            use_anthropic=req.use_anthropic,
+        )
+        return MigrationResponse(status="completed", stats=stats)
+    except Exception as e:
+        logger.error("Decomposition pass 0b failed: %s", e)
+        raise HTTPException(status_code=500, detail=str(e))
+    finally:
+        db.close()
+
+
+@router.post("/migrate/batch-submit-0a", response_model=MigrationResponse)
+async def batch_submit_pass0a(req: BatchSubmitRequest):
+    """Submit Pass 0a as Anthropic Batch API job (50% cost reduction).
+
+    Returns a batch_id for polling. Results are processed asynchronously
+    within 24 hours by Anthropic.
+    """
+    from compliance.services.decomposition_pass import DecompositionPass
+
+    db = SessionLocal()
+    try:
+        decomp = DecompositionPass(db=db)
+        result = await decomp.submit_batch_pass0a(
+            limit=req.limit,
+            batch_size=req.batch_size,
+            category_filter=req.category_filter,
+            source_filter=req.source_filter,
+        )
+        return MigrationResponse(status=result.pop("status", "submitted"), stats=result)
+    except Exception as e:
+        logger.error("Batch submit 0a failed: %s", e)
+        raise HTTPException(status_code=500, detail=str(e))
+    finally:
+        db.close()
+
+
+@router.post("/migrate/batch-submit-0b", response_model=MigrationResponse)
+async def batch_submit_pass0b(req: BatchSubmitRequest):
+    """Submit Pass 0b as Anthropic Batch API job (50% cost reduction)."""
+    from compliance.services.decomposition_pass import DecompositionPass
+
+    db = SessionLocal()
+    try:
+        decomp = DecompositionPass(db=db)
+        result = await decomp.submit_batch_pass0b(
+            limit=req.limit,
+            batch_size=req.batch_size,
+        )
+        return MigrationResponse(status=result.pop("status", "submitted"), stats=result)
+    except Exception as e:
+        logger.error("Batch submit 0b failed: %s", e)
+        raise HTTPException(status_code=500, detail=str(e))
+    finally:
+        db.close()
+
+
+@router.get("/migrate/batch-status/{batch_id}")
+async def batch_check_status(batch_id: str):
+    """Check processing status of an Anthropic batch job."""
+    from compliance.services.decomposition_pass import check_batch_status
+
+    try:
+        status = await check_batch_status(batch_id)
+        return status
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=str(e))
+
+
+@router.post("/migrate/batch-process", response_model=MigrationResponse)
+async def batch_process_results(req: BatchProcessRequest):
+    """Fetch and process results from a completed Anthropic batch.
+
+    Call this after batch-status shows processing_status='ended'.
+    """
+    from compliance.services.decomposition_pass import DecompositionPass
+
+    db = SessionLocal()
+    try:
+        decomp = DecompositionPass(db=db)
+        stats = await decomp.process_batch_results(
+            batch_id=req.batch_id,
+            pass_type=req.pass_type,
+        )
+        return MigrationResponse(status=stats.pop("status", "completed"), stats=stats)
+    except Exception as e:
+        logger.error("Batch process failed: %s", e)
+        raise HTTPException(status_code=500, detail=str(e))
+    finally:
+        db.close()
+
+
+@router.post("/migrate/link-obligations", response_model=MigrationResponse)
+async def migrate_link_obligations(req: MigrationRequest):
+    """Pass 1: Link controls to obligations via source_citation article."""
+    from compliance.services.pipeline_adapter import MigrationPasses
+
+    db = SessionLocal()
+    try:
+        migration = MigrationPasses(db=db)
+        await migration.initialize()
+        stats = await migration.run_pass1_obligation_linkage(limit=req.limit)
+        return MigrationResponse(status="completed", stats=stats)
+    except Exception as e:
+        logger.error("Migration pass 1 failed: %s", e)
+        raise HTTPException(status_code=500, detail=str(e))
+    finally:
+        db.close()
+
+
+@router.post("/migrate/classify-patterns", response_model=MigrationResponse)
+async def migrate_classify_patterns(req: MigrationRequest):
+    """Pass 2: Classify controls into patterns via keyword matching."""
+    from compliance.services.pipeline_adapter import MigrationPasses
+
+    db = SessionLocal()
+    try:
+        migration = MigrationPasses(db=db)
+        await migration.initialize()
+        stats = await migration.run_pass2_pattern_classification(limit=req.limit)
+        return MigrationResponse(status="completed", stats=stats)
+    except Exception as e:
+        logger.error("Migration pass 2 failed: %s", e)
+        raise HTTPException(status_code=500, detail=str(e))
+    finally:
+        db.close()
+
+
+@router.post("/migrate/triage", response_model=MigrationResponse)
+async def migrate_triage():
+    """Pass 3: Quality triage — categorize by linkage completeness."""
+    from compliance.services.pipeline_adapter import MigrationPasses
+
+    db = SessionLocal()
+    try:
+        migration = MigrationPasses(db=db)
+        stats = migration.run_pass3_quality_triage()
+        return MigrationResponse(status="completed", stats=stats)
+    except Exception as e:
+        logger.error("Migration pass 3 failed: %s", e)
+        raise HTTPException(status_code=500, detail=str(e))
+    finally:
+        db.close()
+
+
+@router.post("/migrate/backfill-crosswalk", response_model=MigrationResponse)
+async def migrate_backfill_crosswalk():
+    """Pass 4: Create crosswalk rows for linked controls."""
+    from compliance.services.pipeline_adapter import MigrationPasses
+
+    db = SessionLocal()
+    try:
+        migration = MigrationPasses(db=db)
+        stats = migration.run_pass4_crosswalk_backfill()
+        return MigrationResponse(status="completed", stats=stats)
+    except Exception as e:
+        logger.error("Migration pass 4 failed: %s", e)
+        raise HTTPException(status_code=500, detail=str(e))
+    finally:
+        db.close()
+
+
+@router.post("/migrate/deduplicate", response_model=MigrationResponse)
+async def migrate_deduplicate():
+    """Pass 5: Mark duplicate controls (same obligation + pattern)."""
+    from compliance.services.pipeline_adapter import MigrationPasses
+
+    db = SessionLocal()
+    try:
+        migration = MigrationPasses(db=db)
+        stats = migration.run_pass5_deduplication()
+        return MigrationResponse(status="completed", stats=stats)
+    except Exception as e:
+        logger.error("Migration pass 5 failed: %s", e)
+        raise HTTPException(status_code=500, detail=str(e))
+    finally:
+        db.close()
+
+
+@router.get("/migrate/status", response_model=MigrationStatusResponse)
+async def migration_status():
+    """Get overall migration progress."""
+    from compliance.services.pipeline_adapter import MigrationPasses
+
+    db = SessionLocal()
+    try:
+        migration = MigrationPasses(db=db)
+        status = migration.migration_status()
+        return MigrationStatusResponse(**status)
+    except Exception as e:
+        logger.error("Migration status failed: %s", e)
+        raise HTTPException(status_code=500, detail=str(e))
+    finally:
+        db.close()
+
+
+@router.get("/migrate/decomposition-status", response_model=DecompositionStatusResponse)
+async def decomposition_status():
+    """Get decomposition progress (Pass 0a/0b)."""
+    from compliance.services.decomposition_pass import DecompositionPass
+
+    db = SessionLocal()
+    try:
+        decomp = DecompositionPass(db=db)
+        status = decomp.decomposition_status()
+        return DecompositionStatusResponse(**status)
+    except Exception as e:
+        logger.error("Decomposition status failed: %s", e)
+        raise HTTPException(status_code=500, detail=str(e))
+    finally:
+        db.close()
+
+
+# =============================================================================
+# BATCH DEDUP ENDPOINTS
+# =============================================================================
+
+
+# Module-level runner reference for status polling
+_batch_dedup_runner = None
+
+
+@router.post("/migrate/batch-dedup", response_model=MigrationResponse)
+async def migrate_batch_dedup(
+    dry_run: bool = Query(False, description="Preview mode — no DB changes"),
+    hint_filter: Optional[str] = Query(None, description="Only process hints matching this prefix"),
+):
+    """Batch dedup: reduce ~85k Pass 0b controls to ~18-25k masters.
+
+    Phase 1: Groups by merge_group_hint, picks best quality master, links rest.
+    Phase 2: Cross-group embedding search for semantically similar masters.
+    """
+    global _batch_dedup_runner
+    from compliance.services.batch_dedup_runner import BatchDedupRunner
+
+    db = SessionLocal()
+    try:
+        runner = BatchDedupRunner(db=db)
+        _batch_dedup_runner = runner
+        stats = await runner.run(dry_run=dry_run, hint_filter=hint_filter)
+        return MigrationResponse(status="completed", stats=stats)
+    except Exception as e:
+        logger.error("Batch dedup failed: %s", e)
+        raise HTTPException(status_code=500, detail=str(e))
+    finally:
+        _batch_dedup_runner = None
+        db.close()
+
+
+@router.get("/migrate/batch-dedup/status")
+async def batch_dedup_status():
+    """Get current batch dedup progress (while running)."""
+    if _batch_dedup_runner is not None:
+        return {"running": True, **_batch_dedup_runner.get_status()}
+
+    # Not running — show DB stats
+    db = SessionLocal()
+    try:
+        row = db.execute(text("""
+            SELECT
+                count(*) FILTER (WHERE decomposition_method = 'pass0b') AS total_pass0b,
+                count(*) FILTER (WHERE decomposition_method = 'pass0b'
+                                   AND release_state = 'duplicate') AS duplicates,
+                count(*) FILTER (WHERE decomposition_method = 'pass0b'
+                                   AND release_state != 'duplicate'
+                                   AND release_state != 'deprecated') AS masters
+            FROM canonical_controls
+        """)).fetchone()
+        review_count = db.execute(text(
+            "SELECT count(*) FROM control_dedup_reviews WHERE review_status = 'pending'"
+        )).fetchone()[0]
+        return {
+            "running": False,
+            "total_pass0b": row[0],
+            "duplicates": row[1],
+            "masters": row[2],
+            "pending_reviews": review_count,
+        }
+    finally:
+        db.close()
+
+
+# =============================================================================
+# HELPERS
+# =============================================================================
+
+
+def _get_pattern_control_counts() -> dict[str, int]:
+    """Get count of controls per pattern_id from DB."""
+    db = SessionLocal()
+    try:
+        result = db.execute(text("""
+            SELECT pattern_id, count(*) AS cnt
+            FROM canonical_controls
+            WHERE pattern_id IS NOT NULL AND pattern_id != ''
+              AND release_state NOT IN ('deprecated')
+            GROUP BY pattern_id
+        """))
+        return {row[0]: row[1] for row in result.fetchall()}
+    except Exception:
+        return {}
+    finally:
+        db.close()
--- a/backend-compliance/compliance/api/dashboard_routes.py
+++ b/backend-compliance/compliance/api/dashboard_routes.py
@@ -5,16 +5,23 @@ Endpoints:
 - /dashboard: Main compliance dashboard
 - /dashboard/executive: Executive summary for managers
 - /dashboard/trend: Compliance score trend over time
+- /dashboard/roadmap: Prioritised controls in 4 buckets
+- /dashboard/module-status: Completion status of each SDK module
+- /dashboard/next-actions: Top 5 most important actions
+- /dashboard/snapshot: Save / query compliance score snapshots
 - /score: Quick compliance score
 - /reports: Report generation
 """

 import logging
-from datetime import datetime, timedelta, timezone
+from datetime import datetime, date, timedelta
 from calendar import month_abbr
-from typing import Optional
+from typing import Optional, Dict, Any, List
+from decimal import Decimal

 from fastapi import APIRouter, Depends, HTTPException, Query
+from pydantic import BaseModel
+from sqlalchemy import text
 from sqlalchemy.orm import Session

 from classroom_engine.database import get_db
@@ -25,15 +32,24 @@ from ..db import (
    ControlRepository,
    EvidenceRepository,
    RiskRepository,
+    AssertionDB,
 )
 from .schemas import (
    DashboardResponse,
+    MultiDimensionalScore,
    ExecutiveDashboardResponse,
    TrendDataPoint,
    RiskSummary,
    DeadlineItem,
    TeamWorkloadItem,
+    TraceabilityAssertion,
+    TraceabilityEvidence,
+    TraceabilityCoverage,
+    TraceabilityControl,
+    TraceabilityMatrixResponse,
 )
+from .tenant_utils import get_tenant_id as _get_tenant_id
+from .db_utils import row_to_dict as _row_to_dict

 logger = logging.getLogger(__name__)
 router = APIRouter(tags=["compliance-dashboard"])
@@ -86,6 +102,14 @@ async def get_dashboard(db: Session = Depends(get_db)):
    # or compute from by_status dict
    score = ctrl_stats.get("compliance_score", 0.0)

+    # Multi-dimensional score (Anti-Fake-Evidence)
+    try:
+        ms = ctrl_repo.get_multi_dimensional_score()
+        multi_score = MultiDimensionalScore(**ms)
+    except Exception as e:
+        logger.warning(f"Failed to compute multi-dimensional score: {e}")
+        multi_score = None
+
    return DashboardResponse(
        compliance_score=round(score, 1),
        total_regulations=len(regulations),
@@ -98,6 +122,7 @@ async def get_dashboard(db: Session = Depends(get_db)):
        total_risks=len(risks),
        risks_by_level=risks_by_level,
        recent_activity=[],
+        multi_score=multi_score,
    )


@@ -116,11 +141,18 @@ async def get_compliance_score(db: Session = Depends(get_db)):
    else:
        score = 0

+    # Multi-dimensional score (Anti-Fake-Evidence)
+    try:
+        multi_score = ctrl_repo.get_multi_dimensional_score()
+    except Exception:
+        multi_score = None
+
    return {
        "score": round(score, 1),
        "total_controls": total,
        "passing_controls": passing,
        "partial_controls": partial,
+        "multi_score": multi_score,
    }


@@ -322,6 +354,424 @@ async def get_compliance_trend(
    }


+# ============================================================================
+# Dashboard Extended — Roadmap, Module-Status, Next-Actions, Snapshots
+# ============================================================================
+
+# Weight map for control prioritisation
+_PRIORITY_WEIGHTS = {"legal": 5, "security": 3, "best_practice": 1, "operational": 2}
+
+# SDK module definitions → DB table used for counting completion
+_MODULE_DEFS: List[Dict[str, str]] = [
+    {"key": "vvt", "label": "VVT", "table": "compliance_vvt_activities"},
+    {"key": "tom", "label": "TOM", "table": "compliance_toms"},
+    {"key": "dsfa", "label": "DSFA", "table": "compliance_dsfa_assessments"},
+    {"key": "loeschfristen", "label": "Loeschfristen", "table": "compliance_loeschfristen"},
+    {"key": "risks", "label": "Risiken", "table": "compliance_risks"},
+    {"key": "controls", "label": "Controls", "table": "compliance_controls"},
+    {"key": "evidence", "label": "Nachweise", "table": "compliance_evidence"},
+    {"key": "obligations", "label": "Pflichten", "table": "compliance_obligations"},
+    {"key": "incidents", "label": "Vorfaelle", "table": "compliance_notfallplan_incidents"},
+    {"key": "vendor", "label": "Auftragsverarbeiter", "table": "compliance_vendor_assessments"},
+    {"key": "legal_templates", "label": "Rechtl. Dokumente", "table": "compliance_legal_templates"},
+    {"key": "training", "label": "Schulungen", "table": "training_modules"},
+    {"key": "audit", "label": "Audit", "table": "compliance_audit_sessions"},
+    {"key": "security_backlog", "label": "Security-Backlog", "table": "compliance_security_backlog"},
+    {"key": "quality", "label": "Qualitaet", "table": "compliance_quality_items"},
+]
+
+
+@router.get("/dashboard/roadmap")
+async def get_dashboard_roadmap(
+    db: Session = Depends(get_db),
+    tenant_id: str = Depends(_get_tenant_id),
+):
+    """Prioritised controls in 4 buckets: Quick Wins, Must Have, Should Have, Nice to Have."""
+    ctrl_repo = ControlRepository(db)
+    controls = ctrl_repo.get_all()
+    today = datetime.utcnow().date()
+
+    buckets: Dict[str, list] = {
+        "quick_wins": [],
+        "must_have": [],
+        "should_have": [],
+        "nice_to_have": [],
+    }
+
+    for ctrl in controls:
+        status = ctrl.status.value if ctrl.status else "planned"
+        if status == "pass":
+            continue  # already done
+
+        weight = _PRIORITY_WEIGHTS.get(ctrl.category if hasattr(ctrl, "category") else "best_practice", 1)
+        days_overdue = 0
+        if ctrl.next_review_at:
+            review_date = ctrl.next_review_at.date() if hasattr(ctrl.next_review_at, "date") else ctrl.next_review_at
+            days_overdue = (today - review_date).days
+
+        urgency = weight * 2 + (1 if days_overdue > 0 else 0)
+
+        item = {
+            "id": str(ctrl.id),
+            "control_id": ctrl.control_id,
+            "title": ctrl.title,
+            "status": status,
+            "domain": ctrl.domain.value if ctrl.domain else "unknown",
+            "owner": ctrl.owner,
+            "next_review_at": ctrl.next_review_at.isoformat() if ctrl.next_review_at else None,
+            "days_overdue": max(0, days_overdue),
+            "weight": weight,
+        }
+
+        if weight >= 5 and days_overdue > 0:
+            buckets["quick_wins"].append(item)
+        elif weight >= 4:
+            buckets["must_have"].append(item)
+        elif weight >= 2:
+            buckets["should_have"].append(item)
+        else:
+            buckets["nice_to_have"].append(item)
+
+    # Sort each bucket by urgency desc
+    for key in buckets:
+        buckets[key].sort(key=lambda x: x["days_overdue"], reverse=True)
+
+    return {
+        "buckets": buckets,
+        "counts": {k: len(v) for k, v in buckets.items()},
+        "generated_at": datetime.utcnow().isoformat(),
+    }
+
+
+@router.get("/dashboard/module-status")
+async def get_module_status(
+    db: Session = Depends(get_db),
+    tenant_id: str = Depends(_get_tenant_id),
+):
+    """Completion status for each SDK module based on DB record counts."""
+    modules = []
+    for mod in _MODULE_DEFS:
+        try:
+            row = db.execute(
+                text(f"SELECT COUNT(*) FROM {mod['table']} WHERE tenant_id = :tid"),
+                {"tid": tenant_id},
+            ).fetchone()
+            count = int(row[0]) if row else 0
+        except Exception:
+            count = 0
+
+        # Simple heuristic: 0 = not started, 1-2 = in progress, 3+ = complete
+        if count == 0:
+            status = "not_started"
+            progress = 0
+        elif count < 3:
+            status = "in_progress"
+            progress = min(60, count * 30)
+        else:
+            status = "complete"
+            progress = 100
+
+        modules.append({
+            "key": mod["key"],
+            "label": mod["label"],
+            "count": count,
+            "status": status,
+            "progress": progress,
+        })
+
+    started = sum(1 for m in modules if m["status"] != "not_started")
+    complete = sum(1 for m in modules if m["status"] == "complete")
+
+    return {
+        "modules": modules,
+        "total": len(modules),
+        "started": started,
+        "complete": complete,
+        "overall_progress": round((complete / len(modules)) * 100, 1) if modules else 0,
+    }
+
+
+@router.get("/dashboard/next-actions")
+async def get_next_actions(
+    limit: int = Query(5, ge=1, le=20),
+    db: Session = Depends(get_db),
+    tenant_id: str = Depends(_get_tenant_id),
+):
+    """Top N most important actions sorted by urgency*impact."""
+    ctrl_repo = ControlRepository(db)
+    controls = ctrl_repo.get_all()
+    today = datetime.utcnow().date()
+
+    actions = []
+    for ctrl in controls:
+        status = ctrl.status.value if ctrl.status else "planned"
+        if status == "pass":
+            continue
+
+        days_overdue = 0
+        if ctrl.next_review_at:
+            review_date = ctrl.next_review_at.date() if hasattr(ctrl.next_review_at, "date") else ctrl.next_review_at
+            days_overdue = max(0, (today - review_date).days)
+
+        weight = _PRIORITY_WEIGHTS.get(ctrl.category if hasattr(ctrl, "category") else "best_practice", 1)
+        urgency_score = weight * 10 + days_overdue
+
+        actions.append({
+            "id": str(ctrl.id),
+            "control_id": ctrl.control_id,
+            "title": ctrl.title,
+            "status": status,
+            "domain": ctrl.domain.value if ctrl.domain else "unknown",
+            "owner": ctrl.owner,
+            "days_overdue": days_overdue,
+            "urgency_score": urgency_score,
+            "reason": "Ueberfaellig" if days_overdue > 0 else "Offen",
+        })
+
+    actions.sort(key=lambda x: x["urgency_score"], reverse=True)
+    return {"actions": actions[:limit]}
+
+
+@router.post("/dashboard/snapshot")
+async def create_score_snapshot(
+    db: Session = Depends(get_db),
+    tenant_id: str = Depends(_get_tenant_id),
+):
+    """Save current compliance score as a historical snapshot."""
+    ctrl_repo = ControlRepository(db)
+    evidence_repo = EvidenceRepository(db)
+    risk_repo = RiskRepository(db)
+
+    ctrl_stats = ctrl_repo.get_statistics()
+    evidence_stats = evidence_repo.get_statistics()
+    risks = risk_repo.get_all()
+
+    total = ctrl_stats.get("total", 0)
+    passing = ctrl_stats.get("pass", 0)
+    partial = ctrl_stats.get("partial", 0)
+    score = round(((passing + partial * 0.5) / total) * 100, 2) if total > 0 else 0
+
+    risks_high = sum(1 for r in risks if (r.inherent_risk.value if r.inherent_risk else "low") in ("high", "critical"))
+
+    today = date.today()
+
+    row = db.execute(text("""
+        INSERT INTO compliance_score_snapshots (
+            tenant_id, score, controls_total, controls_pass, controls_partial,
+            evidence_total, evidence_valid, risks_total, risks_high, snapshot_date
+        ) VALUES (
+            :tenant_id, :score, :controls_total, :controls_pass, :controls_partial,
+            :evidence_total, :evidence_valid, :risks_total, :risks_high, :snapshot_date
+        )
+        ON CONFLICT (tenant_id, project_id, snapshot_date) DO UPDATE SET
+            score = EXCLUDED.score,
+            controls_total = EXCLUDED.controls_total,
+            controls_pass = EXCLUDED.controls_pass,
+            controls_partial = EXCLUDED.controls_partial,
+            evidence_total = EXCLUDED.evidence_total,
+            evidence_valid = EXCLUDED.evidence_valid,
+            risks_total = EXCLUDED.risks_total,
+            risks_high = EXCLUDED.risks_high
+        RETURNING *
+    """), {
+        "tenant_id": tenant_id,
+        "score": score,
+        "controls_total": total,
+        "controls_pass": passing,
+        "controls_partial": partial,
+        "evidence_total": evidence_stats.get("total", 0),
+        "evidence_valid": evidence_stats.get("by_status", {}).get("valid", 0),
+        "risks_total": len(risks),
+        "risks_high": risks_high,
+        "snapshot_date": today,
+    }).fetchone()
+    db.commit()
+
+    return _row_to_dict(row)
+
+
+@router.get("/dashboard/score-history")
+async def get_score_history(
+    months: int = Query(12, ge=1, le=36),
+    db: Session = Depends(get_db),
+    tenant_id: str = Depends(_get_tenant_id),
+):
+    """Get compliance score history from snapshots."""
+    since = date.today() - timedelta(days=months * 30)
+
+    rows = db.execute(text("""
+        SELECT * FROM compliance_score_snapshots
+        WHERE tenant_id = :tenant_id AND snapshot_date >= :since
+        ORDER BY snapshot_date ASC
+    """), {"tenant_id": tenant_id, "since": since}).fetchall()
+
+    snapshots = []
+    for r in rows:
+        d = _row_to_dict(r)
+        # Convert Decimal to float for JSON
+        if isinstance(d.get("score"), Decimal):
+            d["score"] = float(d["score"])
+        snapshots.append(d)
+
+    return {
+        "snapshots": snapshots,
+        "total": len(snapshots),
+        "period_months": months,
+    }
+
+
+# ============================================================================
+# Evidence Distribution (Anti-Fake-Evidence Phase 3)
+# ============================================================================
+
+@router.get("/dashboard/evidence-distribution")
+async def get_evidence_distribution(
+    db: Session = Depends(get_db),
+    tenant_id: str = Depends(_get_tenant_id),
+):
+    """Evidence counts by confidence level and four-eyes status."""
+    evidence_repo = EvidenceRepository(db)
+    all_evidence = evidence_repo.get_all()
+
+    by_confidence = {"E0": 0, "E1": 0, "E2": 0, "E3": 0, "E4": 0}
+    four_eyes_pending = 0
+
+    for e in all_evidence:
+        level = e.confidence_level.value if e.confidence_level else "E1"
+        if level in by_confidence:
+            by_confidence[level] += 1
+        if e.requires_four_eyes and e.approval_status not in ("approved", "rejected"):
+            four_eyes_pending += 1
+
+    return {
+        "by_confidence": by_confidence,
+        "four_eyes_pending": four_eyes_pending,
+        "total": len(all_evidence),
+    }
+
+
+# ============================================================================
+# Traceability Matrix (Anti-Fake-Evidence Phase 4a)
+# ============================================================================
+
+@router.get("/dashboard/traceability-matrix", response_model=TraceabilityMatrixResponse)
+async def get_traceability_matrix(
+    db: Session = Depends(get_db),
+    tenant_id: str = Depends(_get_tenant_id),
+):
+    """
+    Full traceability chain: Control → Evidence → Assertions.
+
+    Loads each entity set once, builds in-memory indices, and nests
+    the result so the frontend can render a matrix view.
+    """
+    ctrl_repo = ControlRepository(db)
+    evidence_repo = EvidenceRepository(db)
+
+    # 1. Load all three entity sets
+    controls = ctrl_repo.get_all()
+    all_evidence = evidence_repo.get_all()
+    all_assertions = db.query(AssertionDB).filter(
+        AssertionDB.entity_type == "evidence",
+    ).all()
+
+    # 2. Index assertions by evidence_id (entity_id)
+    assertions_by_evidence: Dict[str, list] = {}
+    for a in all_assertions:
+        assertions_by_evidence.setdefault(a.entity_id, []).append(a)
+
+    # 3. Index evidence by control_id
+    evidence_by_control: Dict[str, list] = {}
+    for e in all_evidence:
+        evidence_by_control.setdefault(str(e.control_id), []).append(e)
+
+    # 4. Build nested response
+    result_controls: list = []
+    total_controls = 0
+    covered_controls = 0
+    fully_verified = 0
+
+    for ctrl in controls:
+        total_controls += 1
+        ctrl_id = str(ctrl.id)
+        ctrl_evidence = evidence_by_control.get(ctrl_id, [])
+
+        nested_evidence: list = []
+        has_evidence = len(ctrl_evidence) > 0
+        has_assertions = False
+        all_verified = True
+        min_conf: Optional[str] = None
+        conf_order = {"E0": 0, "E1": 1, "E2": 2, "E3": 3, "E4": 4}
+
+        for e in ctrl_evidence:
+            ev_id = str(e.id)
+            ev_assertions = assertions_by_evidence.get(ev_id, [])
+
+            nested_assertions = [
+                TraceabilityAssertion(
+                    id=str(a.id),
+                    sentence_text=a.sentence_text,
+                    assertion_type=a.assertion_type or "assertion",
+                    confidence=a.confidence or 0.0,
+                    verified=a.verified_by is not None,
+                )
+                for a in ev_assertions
+            ]
+
+            if nested_assertions:
+                has_assertions = True
+            for na in nested_assertions:
+                if not na.verified:
+                    all_verified = False
+
+            conf = e.confidence_level.value if e.confidence_level else "E1"
+            if min_conf is None or conf_order.get(conf, 1) < conf_order.get(min_conf, 1):
+                min_conf = conf
+
+            nested_evidence.append(TraceabilityEvidence(
+                id=ev_id,
+                title=e.title,
+                evidence_type=e.evidence_type,
+                confidence_level=conf,
+                status=e.status.value if e.status else "valid",
+                assertions=nested_assertions,
+            ))
+
+        if not has_assertions:
+            all_verified = False
+
+        if has_evidence:
+            covered_controls += 1
+        if has_evidence and has_assertions and all_verified:
+            fully_verified += 1
+
+        coverage = TraceabilityCoverage(
+            has_evidence=has_evidence,
+            has_assertions=has_assertions,
+            all_assertions_verified=all_verified,
+            min_confidence_level=min_conf,
+        )
+
+        result_controls.append(TraceabilityControl(
+            id=ctrl_id,
+            control_id=ctrl.control_id,
+            title=ctrl.title,
+            status=ctrl.status.value if ctrl.status else "planned",
+            domain=ctrl.domain.value if ctrl.domain else "unknown",
+            evidence=nested_evidence,
+            coverage=coverage,
+        ))
+
+    summary = {
+        "total_controls": total_controls,
+        "covered_controls": covered_controls,
+        "fully_verified": fully_verified,
+        "uncovered_controls": total_controls - covered_controls,
+    }
+
+    return TraceabilityMatrixResponse(controls=result_controls, summary=summary)
+
+
 # ============================================================================
 # Reports
 # ============================================================================
--- a/backend-compliance/compliance/api/dsfa_routes.py
+++ b/backend-compliance/compliance/api/dsfa_routes.py
@@ -60,10 +60,314 @@ def get_dsfa_service(db: Session = Depends(get_db)) -> DSFAService:
    return DSFAService(db)


-def get_workflow_service(
-    db: Session = Depends(get_db),
-) -> DSFAWorkflowService:
-    return DSFAWorkflowService(db)
+# =============================================================================
+# Pydantic Schemas
+# =============================================================================
+
+class DSFACreate(BaseModel):
+    title: str
+    description: str = ""
+    status: str = "draft"
+    risk_level: str = "low"
+    processing_activity: str = ""
+    data_categories: List[str] = []
+    recipients: List[str] = []
+    measures: List[str] = []
+    created_by: str = "system"
+    # Section 1
+    processing_description: Optional[str] = None
+    processing_purpose: Optional[str] = None
+    legal_basis: Optional[str] = None
+    legal_basis_details: Optional[str] = None
+    # Section 2
+    necessity_assessment: Optional[str] = None
+    proportionality_assessment: Optional[str] = None
+    data_minimization: Optional[str] = None
+    alternatives_considered: Optional[str] = None
+    retention_justification: Optional[str] = None
+    # Section 3
+    involves_ai: Optional[bool] = None
+    overall_risk_level: Optional[str] = None
+    risk_score: Optional[int] = None
+    # Section 6
+    dpo_consulted: Optional[bool] = None
+    dpo_name: Optional[str] = None
+    dpo_opinion: Optional[str] = None
+    dpo_approved: Optional[bool] = None
+    authority_consulted: Optional[bool] = None
+    authority_reference: Optional[str] = None
+    authority_decision: Optional[str] = None
+    # Metadata
+    version: Optional[int] = None
+    conclusion: Optional[str] = None
+    federal_state: Optional[str] = None
+    authority_resource_id: Optional[str] = None
+    submitted_by: Optional[str] = None
+    # JSONB Arrays
+    data_subjects: Optional[List[str]] = None
+    affected_rights: Optional[List[str]] = None
+    triggered_rule_codes: Optional[List[str]] = None
+    ai_trigger_ids: Optional[List[str]] = None
+    wp248_criteria_met: Optional[List[str]] = None
+    art35_abs3_triggered: Optional[List[str]] = None
+    tom_references: Optional[List[str]] = None
+    risks: Optional[List[dict]] = None
+    mitigations: Optional[List[dict]] = None
+    stakeholder_consultations: Optional[List[dict]] = None
+    review_triggers: Optional[List[dict]] = None
+    review_comments: Optional[List[dict]] = None
+    ai_use_case_modules: Optional[List[dict]] = None
+    section_8_complete: Optional[bool] = None
+    # JSONB Objects
+    threshold_analysis: Optional[dict] = None
+    consultation_requirement: Optional[dict] = None
+    review_schedule: Optional[dict] = None
+    section_progress: Optional[dict] = None
+    metadata: Optional[dict] = None
+
+
+class DSFAUpdate(BaseModel):
+    title: Optional[str] = None
+    description: Optional[str] = None
+    status: Optional[str] = None
+    risk_level: Optional[str] = None
+    processing_activity: Optional[str] = None
+    data_categories: Optional[List[str]] = None
+    recipients: Optional[List[str]] = None
+    measures: Optional[List[str]] = None
+    approved_by: Optional[str] = None
+    # Section 1
+    processing_description: Optional[str] = None
+    processing_purpose: Optional[str] = None
+    legal_basis: Optional[str] = None
+    legal_basis_details: Optional[str] = None
+    # Section 2
+    necessity_assessment: Optional[str] = None
+    proportionality_assessment: Optional[str] = None
+    data_minimization: Optional[str] = None
+    alternatives_considered: Optional[str] = None
+    retention_justification: Optional[str] = None
+    # Section 3
+    involves_ai: Optional[bool] = None
+    overall_risk_level: Optional[str] = None
+    risk_score: Optional[int] = None
+    # Section 6
+    dpo_consulted: Optional[bool] = None
+    dpo_name: Optional[str] = None
+    dpo_opinion: Optional[str] = None
+    dpo_approved: Optional[bool] = None
+    authority_consulted: Optional[bool] = None
+    authority_reference: Optional[str] = None
+    authority_decision: Optional[str] = None
+    # Metadata
+    version: Optional[int] = None
+    conclusion: Optional[str] = None
+    federal_state: Optional[str] = None
+    authority_resource_id: Optional[str] = None
+    submitted_by: Optional[str] = None
+    # JSONB Arrays
+    data_subjects: Optional[List[str]] = None
+    affected_rights: Optional[List[str]] = None
+    triggered_rule_codes: Optional[List[str]] = None
+    ai_trigger_ids: Optional[List[str]] = None
+    wp248_criteria_met: Optional[List[str]] = None
+    art35_abs3_triggered: Optional[List[str]] = None
+    tom_references: Optional[List[str]] = None
+    risks: Optional[List[dict]] = None
+    mitigations: Optional[List[dict]] = None
+    stakeholder_consultations: Optional[List[dict]] = None
+    review_triggers: Optional[List[dict]] = None
+    review_comments: Optional[List[dict]] = None
+    ai_use_case_modules: Optional[List[dict]] = None
+    section_8_complete: Optional[bool] = None
+    # JSONB Objects
+    threshold_analysis: Optional[dict] = None
+    consultation_requirement: Optional[dict] = None
+    review_schedule: Optional[dict] = None
+    section_progress: Optional[dict] = None
+    metadata: Optional[dict] = None
+
+
+class DSFAStatusUpdate(BaseModel):
+    status: str
+    approved_by: Optional[str] = None
+
+
+class DSFASectionUpdate(BaseModel):
+    """Body for PUT /dsfa/{id}/sections/{section_number}."""
+    content: Optional[str] = None
+    # Allow arbitrary extra fields so the frontend can send any section-specific data
+    extra: Optional[dict] = None
+
+
+class DSFAApproveRequest(BaseModel):
+    """Body for POST /dsfa/{id}/approve."""
+    approved: bool
+    comments: Optional[str] = None
+    approved_by: Optional[str] = None
+
+
+# =============================================================================
+# Helpers
+# =============================================================================
+
+def _get_tenant_id(tenant_id: Optional[str]) -> str:
+    return tenant_id or DEFAULT_TENANT_ID
+
+
+def _dsfa_to_response(row) -> dict:
+    """Convert a DB row to a JSON-serializable dict."""
+    import json
+    # SQLAlchemy 2.0: Row objects need ._mapping for string-key access
+    if hasattr(row, "_mapping"):
+        row = row._mapping
+
+    def _parse_arr(val):
+        """Parse a JSONB array field → list."""
+        if val is None:
+            return []
+        if isinstance(val, list):
+            return val
+        if isinstance(val, str):
+            try:
+                parsed = json.loads(val)
+                return parsed if isinstance(parsed, list) else []
+            except Exception:
+                return []
+        return val
+
+    def _parse_obj(val):
+        """Parse a JSONB object field → dict."""
+        if val is None:
+            return {}
+        if isinstance(val, dict):
+            return val
+        if isinstance(val, str):
+            try:
+                parsed = json.loads(val)
+                return parsed if isinstance(parsed, dict) else {}
+            except Exception:
+                return {}
+        return val
+
+    def _ts(val):
+        """Timestamp → ISO string or None."""
+        if not val:
+            return None
+        if isinstance(val, str):
+            return val
+        return val.isoformat()
+
+    def _get(key, default=None):
+        """Safe row access — returns default if key missing (handles old rows)."""
+        try:
+            v = row[key]
+            return default if v is None and default is not None else v
+        except (KeyError, IndexError):
+            return default
+
+    return {
+        # Core fields (always present since Migration 024)
+        "id": str(row["id"]),
+        "tenant_id": row["tenant_id"],
+        "title": row["title"],
+        "description": row["description"] or "",
+        "status": row["status"] or "draft",
+        "risk_level": row["risk_level"] or "low",
+        "processing_activity": row["processing_activity"] or "",
+        "data_categories": _parse_arr(row["data_categories"]),
+        "recipients": _parse_arr(row["recipients"]),
+        "measures": _parse_arr(row["measures"]),
+        "approved_by": row["approved_by"],
+        "approved_at": _ts(row["approved_at"]),
+        "created_by": row["created_by"] or "system",
+        "created_at": _ts(row["created_at"]),
+        "updated_at": _ts(row["updated_at"]),
+        # Section 1 (Migration 030)
+        "processing_description": _get("processing_description"),
+        "processing_purpose": _get("processing_purpose"),
+        "legal_basis": _get("legal_basis"),
+        "legal_basis_details": _get("legal_basis_details"),
+        # Section 2
+        "necessity_assessment": _get("necessity_assessment"),
+        "proportionality_assessment": _get("proportionality_assessment"),
+        "data_minimization": _get("data_minimization"),
+        "alternatives_considered": _get("alternatives_considered"),
+        "retention_justification": _get("retention_justification"),
+        # Section 3
+        "involves_ai": _get("involves_ai", False),
+        "overall_risk_level": _get("overall_risk_level"),
+        "risk_score": _get("risk_score", 0),
+        # Section 6
+        "dpo_consulted": _get("dpo_consulted", False),
+        "dpo_consulted_at": _ts(_get("dpo_consulted_at")),
+        "dpo_name": _get("dpo_name"),
+        "dpo_opinion": _get("dpo_opinion"),
+        "dpo_approved": _get("dpo_approved"),
+        "authority_consulted": _get("authority_consulted", False),
+        "authority_consulted_at": _ts(_get("authority_consulted_at")),
+        "authority_reference": _get("authority_reference"),
+        "authority_decision": _get("authority_decision"),
+        # Metadata / Versioning
+        "version": _get("version", 1),
+        "previous_version_id": str(_get("previous_version_id")) if _get("previous_version_id") else None,
+        "conclusion": _get("conclusion"),
+        "federal_state": _get("federal_state"),
+        "authority_resource_id": _get("authority_resource_id"),
+        "submitted_for_review_at": _ts(_get("submitted_for_review_at")),
+        "submitted_by": _get("submitted_by"),
+        # JSONB Arrays
+        "data_subjects": _parse_arr(_get("data_subjects")),
+        "affected_rights": _parse_arr(_get("affected_rights")),
+        "triggered_rule_codes": _parse_arr(_get("triggered_rule_codes")),
+        "ai_trigger_ids": _parse_arr(_get("ai_trigger_ids")),
+        "wp248_criteria_met": _parse_arr(_get("wp248_criteria_met")),
+        "art35_abs3_triggered": _parse_arr(_get("art35_abs3_triggered")),
+        "tom_references": _parse_arr(_get("tom_references")),
+        "risks": _parse_arr(_get("risks")),
+        "mitigations": _parse_arr(_get("mitigations")),
+        "stakeholder_consultations": _parse_arr(_get("stakeholder_consultations")),
+        "review_triggers": _parse_arr(_get("review_triggers")),
+        "review_comments": _parse_arr(_get("review_comments")),
+        # Section 8 / AI (Migration 028)
+        "ai_use_case_modules": _parse_arr(_get("ai_use_case_modules")),
+        "section_8_complete": _get("section_8_complete", False),
+        # JSONB Objects
+        "threshold_analysis": _parse_obj(_get("threshold_analysis")),
+        "consultation_requirement": _parse_obj(_get("consultation_requirement")),
+        "review_schedule": _parse_obj(_get("review_schedule")),
+        "section_progress": _parse_obj(_get("section_progress")),
+        "metadata": _parse_obj(_get("metadata")),
+    }
+
+
+def _log_audit(
+    db: Session,
+    tenant_id: str,
+    dsfa_id,
+    action: str,
+    changed_by: str = "system",
+    old_values=None,
+    new_values=None,
+):
+    import json
+    db.execute(
+        text("""
+            INSERT INTO compliance_dsfa_audit_log
+                (tenant_id, dsfa_id, action, changed_by, old_values, new_values)
+            VALUES
+                (:tenant_id, :dsfa_id, :action, :changed_by,
+                 CAST(:old_values AS jsonb), CAST(:new_values AS jsonb))
+        """),
+        {
+            "tenant_id": tenant_id,
+            "dsfa_id": str(dsfa_id) if dsfa_id else None,
+            "action": action,
+            "changed_by": changed_by,
+            "old_values": json.dumps(old_values) if old_values else None,
+            "new_values": json.dumps(new_values) if new_values else None,
+        },
+    )


 # =============================================================================
@@ -177,8 +481,51 @@ async def create_dsfa(
    service: DSFAService = Depends(get_dsfa_service),
 ) -> dict[str, Any]:
    """Neue DSFA erstellen."""
-    with translate_domain_errors():
-        return service.create(tenant_id, request)
+    import json
+
+    if request.status not in VALID_STATUSES:
+        raise HTTPException(status_code=422, detail=f"Ungültiger Status: {request.status}")
+    if request.risk_level not in VALID_RISK_LEVELS:
+        raise HTTPException(status_code=422, detail=f"Ungültiges Risiko-Level: {request.risk_level}")
+
+    tid = _get_tenant_id(tenant_id)
+
+    row = db.execute(
+        text("""
+            INSERT INTO compliance_dsfas
+                (tenant_id, title, description, status, risk_level,
+                 processing_activity, data_categories, recipients, measures, created_by)
+            VALUES
+                (:tenant_id, :title, :description, :status, :risk_level,
+                 :processing_activity,
+                 CAST(:data_categories AS jsonb),
+                 CAST(:recipients AS jsonb),
+                 CAST(:measures AS jsonb),
+                 :created_by)
+            RETURNING *
+        """),
+        {
+            "tenant_id": tid,
+            "title": request.title,
+            "description": request.description,
+            "status": request.status,
+            "risk_level": request.risk_level,
+            "processing_activity": request.processing_activity,
+            "data_categories": json.dumps(request.data_categories),
+            "recipients": json.dumps(request.recipients),
+            "measures": json.dumps(request.measures),
+            "created_by": request.created_by,
+        },
+    ).fetchone()
+
+    db.flush()
+    row_id = row._mapping["id"] if hasattr(row, "_mapping") else row[0]
+    _log_audit(
+        db, tid, row_id, "CREATE", request.created_by,
+        new_values={"title": request.title, "status": request.status},
+    )
+    db.commit()
+    return _dsfa_to_response(row)


 # =============================================================================
--- a/backend-compliance/compliance/api/evidence_check_routes.py
+++ b/backend-compliance/compliance/api/evidence_check_routes.py
--- a/backend-compliance/compliance/api/evidence_routes.py
+++ b/backend-compliance/compliance/api/evidence_routes.py
@@ -22,23 +22,21 @@ from fastapi import APIRouter, Depends, File, HTTPException, Query, UploadFile
 from sqlalchemy.orm import Session

 from classroom_engine.database import get_db
-from compliance.api._http_errors import translate_domain_errors
-from compliance.db import ControlRepository, EvidenceRepository
-from compliance.schemas.evidence import (
-    EvidenceCreate,
-    EvidenceListResponse,
-    EvidenceResponse,
+
+from ..db import (
+    ControlRepository,
+    EvidenceRepository,
+    EvidenceStatusEnum,
+    EvidenceConfidenceEnum,
+    EvidenceTruthStatusEnum,
 )
-from compliance.services.auto_risk_updater import AutoRiskUpdater
-from compliance.domain import NotFoundError, ValidationError
-from compliance.services.evidence_service import (
-    SOURCE_CONTROL_MAP,
-    EvidenceService,
-    _extract_findings_detail,  # re-exported for legacy test imports
-    _parse_ci_evidence,  # re-exported for legacy test imports
-    _store_evidence,  # re-exported for legacy test imports
-    _update_risks as _update_risks_impl,
+from ..db.models import EvidenceDB, ControlDB, AuditTrailDB
+from ..services.auto_risk_updater import AutoRiskUpdater
+from .schemas import (
+    EvidenceCreate, EvidenceResponse, EvidenceListResponse,
+    EvidenceRejectRequest,
 )
+from .audit_trail_utils import log_audit_trail

 logger = logging.getLogger(__name__)
 router = APIRouter(tags=["compliance-evidence"])
@@ -56,7 +54,88 @@ def get_evidence_service(db: Session = Depends(get_db)) -> EvidenceService:


 # ============================================================================
-# Evidence CRUD
+# Anti-Fake-Evidence: Four-Eyes Domain Check
+# ============================================================================
+
+FOUR_EYES_DOMAINS = {"gov", "priv"}
+
+
+def _requires_four_eyes(control_domain: str) -> bool:
+    """Controls in governance/privacy domains require two independent reviewers."""
+    return control_domain in FOUR_EYES_DOMAINS
+
+
+# ============================================================================
+# Anti-Fake-Evidence: Auto-Classification Helpers
+# ============================================================================
+
+def _classify_confidence(source: Optional[str], evidence_type: Optional[str] = None, artifact_hash: Optional[str] = None) -> EvidenceConfidenceEnum:
+    """Classify evidence confidence level based on source and metadata."""
+    if source == "ci_pipeline":
+        return EvidenceConfidenceEnum.E3
+    if source == "api" and artifact_hash:
+        return EvidenceConfidenceEnum.E3
+    if source == "api":
+        return EvidenceConfidenceEnum.E3
+    if source in ("manual", "upload"):
+        return EvidenceConfidenceEnum.E1
+    if source == "generated":
+        return EvidenceConfidenceEnum.E0
+    # Default for unknown sources
+    return EvidenceConfidenceEnum.E1
+
+
+def _classify_truth_status(source: Optional[str]) -> EvidenceTruthStatusEnum:
+    """Classify evidence truth status based on source."""
+    if source == "ci_pipeline":
+        return EvidenceTruthStatusEnum.OBSERVED
+    if source in ("manual", "upload"):
+        return EvidenceTruthStatusEnum.UPLOADED
+    if source == "generated":
+        return EvidenceTruthStatusEnum.GENERATED
+    if source == "api":
+        return EvidenceTruthStatusEnum.OBSERVED
+    return EvidenceTruthStatusEnum.UPLOADED
+
+
+def _build_evidence_response(e: EvidenceDB) -> EvidenceResponse:
+    """Build an EvidenceResponse from an EvidenceDB, including anti-fake fields."""
+    return EvidenceResponse(
+        id=e.id,
+        control_id=e.control_id,
+        evidence_type=e.evidence_type,
+        title=e.title,
+        description=e.description,
+        artifact_path=e.artifact_path,
+        artifact_url=e.artifact_url,
+        artifact_hash=e.artifact_hash,
+        file_size_bytes=e.file_size_bytes,
+        mime_type=e.mime_type,
+        valid_from=e.valid_from,
+        valid_until=e.valid_until,
+        status=e.status.value if e.status else None,
+        source=e.source,
+        ci_job_id=e.ci_job_id,
+        uploaded_by=e.uploaded_by,
+        collected_at=e.collected_at,
+        created_at=e.created_at,
+        confidence_level=e.confidence_level.value if e.confidence_level else None,
+        truth_status=e.truth_status.value if e.truth_status else None,
+        generation_mode=e.generation_mode,
+        may_be_used_as_evidence=e.may_be_used_as_evidence,
+        reviewed_by=e.reviewed_by,
+        reviewed_at=e.reviewed_at,
+        approval_status=e.approval_status,
+        first_reviewer=e.first_reviewer,
+        first_reviewed_at=e.first_reviewed_at,
+        second_reviewer=e.second_reviewer,
+        second_reviewed_at=e.second_reviewed_at,
+        requires_four_eyes=e.requires_four_eyes,
+    )
+
+
+# ============================================================================
+# Evidence
 # ============================================================================

@router.get("/evidence", response_model=EvidenceListResponse)
@@ -69,8 +148,38 @@ async def list_evidence(
    service: EvidenceService = Depends(get_evidence_service),
 ) -> EvidenceListResponse:
    """List evidence with optional filters and pagination."""
-    with translate_domain_errors():
-        return service.list_evidence(control_id, evidence_type, status, page, limit)
+    repo = EvidenceRepository(db)
+
+    if control_id:
+        # First get the control UUID
+        ctrl_repo = ControlRepository(db)
+        control = ctrl_repo.get_by_control_id(control_id)
+        if not control:
+            raise HTTPException(status_code=404, detail=f"Control {control_id} not found")
+        evidence = repo.get_by_control(control.id)
+    else:
+        evidence = repo.get_all()
+
+    if evidence_type:
+        evidence = [e for e in evidence if e.evidence_type == evidence_type]
+
+    if status:
+        try:
+            status_enum = EvidenceStatusEnum(status)
+            evidence = [e for e in evidence if e.status == status_enum]
+        except ValueError:
+            pass
+
+    total = len(evidence)
+
+    # Apply pagination if requested
+    if page is not None and limit is not None:
+        offset = (page - 1) * limit
+        evidence = evidence[offset:offset + limit]
+
+    results = [_build_evidence_response(e) for e in evidence]
+
+    return EvidenceListResponse(evidence=results, total=total)


@router.post("/evidence", response_model=EvidenceResponse)
@@ -79,8 +188,66 @@ async def create_evidence(
    service: EvidenceService = Depends(get_evidence_service),
 ) -> EvidenceResponse:
    """Create new evidence record."""
-    with translate_domain_errors():
-        return service.create_evidence(evidence_data)
+    repo = EvidenceRepository(db)
+
+    # Get control UUID
+    ctrl_repo = ControlRepository(db)
+    control = ctrl_repo.get_by_control_id(evidence_data.control_id)
+    if not control:
+        raise HTTPException(status_code=404, detail=f"Control {evidence_data.control_id} not found")
+
+    source = evidence_data.source or "api"
+    confidence = _classify_confidence(source, evidence_data.evidence_type)
+    truth = _classify_truth_status(source)
+
+    # Allow explicit override from request
+    if evidence_data.confidence_level:
+        try:
+            confidence = EvidenceConfidenceEnum(evidence_data.confidence_level)
+        except ValueError:
+            pass
+    if evidence_data.truth_status:
+        try:
+            truth = EvidenceTruthStatusEnum(evidence_data.truth_status)
+        except ValueError:
+            pass
+
+    evidence = repo.create(
+        control_id=control.id,
+        evidence_type=evidence_data.evidence_type,
+        title=evidence_data.title,
+        description=evidence_data.description,
+        artifact_url=evidence_data.artifact_url,
+        valid_from=evidence_data.valid_from,
+        valid_until=evidence_data.valid_until,
+        source=source,
+        ci_job_id=evidence_data.ci_job_id,
+    )
+
+    # Set anti-fake-evidence fields
+    evidence.confidence_level = confidence
+    evidence.truth_status = truth
+    # Generated evidence should not be used as evidence by default
+    if truth == EvidenceTruthStatusEnum.GENERATED:
+        evidence.may_be_used_as_evidence = False
+
+    # Four-Eyes: check if the linked control's domain requires it
+    control_domain = control.domain.value if control.domain else ""
+    if _requires_four_eyes(control_domain):
+        evidence.requires_four_eyes = True
+        evidence.approval_status = "pending_first"
+
+    db.commit()
+
+    # Audit trail
+    log_audit_trail(
+        db, "evidence", evidence.id, evidence.title, "create",
+        performed_by=evidence_data.source or "api",
+        change_summary=f"Evidence created with confidence={confidence.value}, truth={truth.value}",
+    )
+    db.commit()
+
+    return _build_evidence_response(evidence)


@router.delete("/evidence/{evidence_id}")
@@ -107,9 +274,271 @@ async def upload_evidence(
    service: EvidenceService = Depends(get_evidence_service),
 ) -> EvidenceResponse:
    """Upload evidence file."""
-    with translate_domain_errors():
-        return await service.upload_evidence(
-            control_id, evidence_type, title, file, description
+    # Get control UUID
+    ctrl_repo = ControlRepository(db)
+    control = ctrl_repo.get_by_control_id(control_id)
+    if not control:
+        raise HTTPException(status_code=404, detail=f"Control {control_id} not found")
+
+    # Create upload directory
+    upload_dir = f"/tmp/compliance_evidence/{control_id}"
+    os.makedirs(upload_dir, exist_ok=True)
+
+    # Save file
+    file_path = os.path.join(upload_dir, file.filename)
+    content = await file.read()
+
+    with open(file_path, "wb") as f:
+        f.write(content)
+
+    # Calculate hash
+    file_hash = hashlib.sha256(content).hexdigest()
+
+    # Create evidence record
+    repo = EvidenceRepository(db)
+    evidence = repo.create(
+        control_id=control.id,
+        evidence_type=evidence_type,
+        title=title,
+        description=description,
+        artifact_path=file_path,
+        artifact_hash=file_hash,
+        file_size_bytes=len(content),
+        mime_type=file.content_type,
+        source="upload",
+    )
+
+    # Upload evidence → E1 + uploaded
+    evidence.confidence_level = EvidenceConfidenceEnum.E1
+    evidence.truth_status = EvidenceTruthStatusEnum.UPLOADED
+
+    # Four-Eyes: check if the linked control's domain requires it
+    control_domain = control.domain.value if control.domain else ""
+    if _requires_four_eyes(control_domain):
+        evidence.requires_four_eyes = True
+        evidence.approval_status = "pending_first"
+
+    db.commit()
+
+    return _build_evidence_response(evidence)
+
+
+# ============================================================================
+# CI/CD Evidence Collection — helpers
+# ============================================================================
+
+# Map CI source names to the corresponding control IDs
+SOURCE_CONTROL_MAP = {
+    "sast": "SDLC-001",
+    "dependency_scan": "SDLC-002",
+    "secret_scan": "SDLC-003",
+    "code_review": "SDLC-004",
+    "sbom": "SDLC-005",
+    "container_scan": "SDLC-006",
+    "test_results": "AUD-001",
+}
+
+
+def _parse_ci_evidence(data: dict) -> dict:
+    """
+    Parse and validate incoming CI evidence data.
+
+    Returns a dict with:
+      - report_json: str  (serialised JSON)
+      - report_hash: str  (SHA-256 hex digest)
+      - evidence_status: str  ("valid" or "failed")
+      - findings_count: int
+      - critical_findings: int
+    """
+    report_json = json.dumps(data) if data else "{}"
+    report_hash = hashlib.sha256(report_json.encode()).hexdigest()
+
+    findings_count = 0
+    critical_findings = 0
+
+    if data and isinstance(data, dict):
+        # Semgrep format
+        if "results" in data:
+            findings_count = len(data.get("results", []))
+            critical_findings = len([
+                r for r in data.get("results", [])
+                if r.get("extra", {}).get("severity", "").upper() in ["CRITICAL", "HIGH"]
+            ])
+
+        # Trivy format
+        elif "Results" in data:
+            for result in data.get("Results", []):
+                vulns = result.get("Vulnerabilities", [])
+                findings_count += len(vulns)
+                critical_findings += len([
+                    v for v in vulns
+                    if v.get("Severity", "").upper() in ["CRITICAL", "HIGH"]
+                ])
+
+        # Generic findings array
+        elif "findings" in data:
+            findings_count = len(data.get("findings", []))
+
+        # SBOM format - just count components
+        elif "components" in data:
+            findings_count = len(data.get("components", []))
+
+    evidence_status = "failed" if critical_findings > 0 else "valid"
+
+    return {
+        "report_json": report_json,
+        "report_hash": report_hash,
+        "evidence_status": evidence_status,
+        "findings_count": findings_count,
+        "critical_findings": critical_findings,
+    }
+
+
+def _store_evidence(
+    db: Session,
+    *,
+    control_db_id: str,
+    source: str,
+    parsed: dict,
+    ci_job_id: str,
+    ci_job_url: str,
+    report_data: dict,
+) -> EvidenceDB:
+    """
+    Persist a CI evidence item to the database and write the report file.
+
+    Returns the created EvidenceDB instance (already committed).
+    """
+    findings_count = parsed["findings_count"]
+    critical_findings = parsed["critical_findings"]
+
+    # Build title and description
+    title = f"{source.upper()} Report - {datetime.now().strftime('%Y-%m-%d %H:%M')}"
+    description = "Automatically collected from CI/CD pipeline"
+    if findings_count > 0:
+        description += f"\n- Total findings: {findings_count}"
+    if critical_findings > 0:
+        description += f"\n- Critical/High findings: {critical_findings}"
+    if ci_job_id:
+        description += f"\n- CI Job ID: {ci_job_id}"
+    if ci_job_url:
+        description += f"\n- CI Job URL: {ci_job_url}"
+
+    # Store report file
+    upload_dir = f"/tmp/compliance_evidence/ci/{source}"
+    os.makedirs(upload_dir, exist_ok=True)
+    file_name = f"{source}_{datetime.now().strftime('%Y%m%d_%H%M%S')}_{parsed['report_hash'][:8]}.json"
+    file_path = os.path.join(upload_dir, file_name)
+
+    with open(file_path, "w") as f:
+        json.dump(report_data or {}, f, indent=2)
+
+    # Create evidence record with anti-fake-evidence classification
+    evidence = EvidenceDB(
+        id=str(uuid_module.uuid4()),
+        control_id=control_db_id,
+        evidence_type=f"ci_{source}",
+        title=title,
+        description=description,
+        artifact_path=file_path,
+        artifact_hash=parsed["report_hash"],
+        file_size_bytes=len(parsed["report_json"]),
+        mime_type="application/json",
+        source="ci_pipeline",
+        ci_job_id=ci_job_id,
+        valid_from=datetime.utcnow(),
+        valid_until=datetime.utcnow() + timedelta(days=90),
+        status=EvidenceStatusEnum(parsed["evidence_status"]),
+        # CI pipeline evidence → E3 observed (system-observed, hash-verified)
+        confidence_level=EvidenceConfidenceEnum.E3,
+        truth_status=EvidenceTruthStatusEnum.OBSERVED,
+        may_be_used_as_evidence=True,
+    )
+    db.add(evidence)
+    db.commit()
+    db.refresh(evidence)
+
+    return evidence
+
+
+def _extract_findings_detail(report_data: dict) -> dict:
+    """
+    Extract severity-bucketed finding counts from report data.
+
+    Returns dict with keys: critical, high, medium, low.
+    """
+    findings_detail = {
+        "critical": 0,
+        "high": 0,
+        "medium": 0,
+        "low": 0,
+    }
+
+    if not report_data:
+        return findings_detail
+
+    # Semgrep format
+    if "results" in report_data:
+        for r in report_data.get("results", []):
+            severity = r.get("extra", {}).get("severity", "").upper()
+            if severity == "CRITICAL":
+                findings_detail["critical"] += 1
+            elif severity == "HIGH":
+                findings_detail["high"] += 1
+            elif severity == "MEDIUM":
+                findings_detail["medium"] += 1
+            elif severity in ["LOW", "INFO"]:
+                findings_detail["low"] += 1
+
+    # Trivy format
+    elif "Results" in report_data:
+        for result in report_data.get("Results", []):
+            for v in result.get("Vulnerabilities", []):
+                severity = v.get("Severity", "").upper()
+                if severity == "CRITICAL":
+                    findings_detail["critical"] += 1
+                elif severity == "HIGH":
+                    findings_detail["high"] += 1
+                elif severity == "MEDIUM":
+                    findings_detail["medium"] += 1
+                elif severity == "LOW":
+                    findings_detail["low"] += 1
+
+    # Generic findings with severity
+    elif "findings" in report_data:
+        for f in report_data.get("findings", []):
+            severity = f.get("severity", "").upper()
+            if severity == "CRITICAL":
+                findings_detail["critical"] += 1
+            elif severity == "HIGH":
+                findings_detail["high"] += 1
+            elif severity == "MEDIUM":
+                findings_detail["medium"] += 1
+            else:
+                findings_detail["low"] += 1
+
+    return findings_detail
+
+
+def _update_risks(db: Session, *, source: str, control_id: str, ci_job_id: str, report_data: dict):
+    """
+    Update risk status based on new evidence.
+
+    Uses AutoRiskUpdater to update Control status and linked Risks based on
+    severity-bucketed findings.  Returns the update result or None on error.
+    """
+    findings_detail = _extract_findings_detail(report_data)
+
+    try:
+        auto_updater = AutoRiskUpdater(db)
+        risk_update_result = auto_updater.process_evidence_collect_request(
+            tool=source,
+            control_id=control_id,
+            evidence_type=f"ci_{source}",
+            timestamp=datetime.utcnow().isoformat(),
+            commit_sha=report_data.get("commit_sha", "unknown") if report_data else "unknown",
+            ci_job_id=ci_job_id,
+            findings=findings_detail,
        )


@@ -227,14 +656,229 @@ async def get_ci_evidence_status(
 # Legacy re-exports for tests that import helpers directly.
 # ----------------------------------------------------------------------------

-__all__ = [
-    "router",
-    "SOURCE_CONTROL_MAP",
-    "EvidenceRepository",
-    "ControlRepository",
-    "AutoRiskUpdater",
-    "_parse_ci_evidence",
-    "_extract_findings_detail",
-    "_store_evidence",
-    "_update_risks",
-]
+    if control_id:
+        ctrl_repo = ControlRepository(db)
+        control = ctrl_repo.get_by_control_id(control_id)
+        if control:
+            query = query.filter(EvidenceDB.control_id == control.id)
+
+    evidence_list = query.order_by(EvidenceDB.collected_at.desc()).limit(100).all()
+
+    # Group by control and calculate stats
+    control_stats = defaultdict(lambda: {
+        "total": 0,
+        "valid": 0,
+        "failed": 0,
+        "last_collected": None,
+        "evidence": [],
+    })
+
+    for e in evidence_list:
+        # Get control_id string
+        control = db.query(ControlDB).filter(ControlDB.id == e.control_id).first()
+        ctrl_id = control.control_id if control else "unknown"
+
+        stats = control_stats[ctrl_id]
+        stats["total"] += 1
+        if e.status:
+            if e.status.value == "valid":
+                stats["valid"] += 1
+            elif e.status.value == "failed":
+                stats["failed"] += 1
+        if not stats["last_collected"] or e.collected_at > stats["last_collected"]:
+            stats["last_collected"] = e.collected_at
+
+        # Add evidence summary
+        stats["evidence"].append({
+            "id": e.id,
+            "type": e.evidence_type,
+            "status": e.status.value if e.status else None,
+            "collected_at": e.collected_at.isoformat() if e.collected_at else None,
+            "ci_job_id": e.ci_job_id,
+        })
+
+    # Convert to list and sort
+    result = []
+    for ctrl_id, stats in control_stats.items():
+        result.append({
+            "control_id": ctrl_id,
+            "total_evidence": stats["total"],
+            "valid_count": stats["valid"],
+            "failed_count": stats["failed"],
+            "last_collected": stats["last_collected"].isoformat() if stats["last_collected"] else None,
+            "recent_evidence": stats["evidence"][:5],
+        })
+
+    result.sort(key=lambda x: x["last_collected"] or "", reverse=True)
+
+    return {
+        "period_days": days,
+        "total_evidence": len(evidence_list),
+        "controls": result,
+    }
+
+
+# ============================================================================
+# Evidence Review (Anti-Fake-Evidence)
+# ============================================================================
+
+from pydantic import BaseModel as _BaseModel
+
+class _EvidenceReviewRequest(_BaseModel):
+    confidence_level: Optional[str] = None
+    truth_status: Optional[str] = None
+    reviewed_by: str
+
+
+@router.patch("/evidence/{evidence_id}/review", response_model=EvidenceResponse)
+async def review_evidence(
+    evidence_id: str,
+    review: _EvidenceReviewRequest,
+    db: Session = Depends(get_db),
+):
+    """
+    Review evidence: upgrade confidence level and/or change truth status.
+
+    For Four-Eyes evidence, the first reviewer sets first_reviewer and
+    approval_status='first_approved'. A second (different) reviewer then
+    sets second_reviewer and approval_status='approved'.
+    """
+    evidence = db.query(EvidenceDB).filter(EvidenceDB.id == evidence_id).first()
+    if not evidence:
+        raise HTTPException(status_code=404, detail=f"Evidence {evidence_id} not found")
+
+    old_confidence = evidence.confidence_level.value if evidence.confidence_level else None
+    old_truth = evidence.truth_status.value if evidence.truth_status else None
+
+    if review.confidence_level:
+        try:
+            evidence.confidence_level = EvidenceConfidenceEnum(review.confidence_level)
+        except ValueError:
+            raise HTTPException(status_code=400, detail=f"Invalid confidence_level: {review.confidence_level}")
+
+    if review.truth_status:
+        try:
+            evidence.truth_status = EvidenceTruthStatusEnum(review.truth_status)
+        except ValueError:
+            raise HTTPException(status_code=400, detail=f"Invalid truth_status: {review.truth_status}")
+
+    # Four-Eyes branching
+    if evidence.requires_four_eyes:
+        status = evidence.approval_status or "none"
+        if status in ("none", "pending_first"):
+            evidence.first_reviewer = review.reviewed_by
+            evidence.first_reviewed_at = datetime.utcnow()
+            evidence.approval_status = "first_approved"
+        elif status == "first_approved":
+            if review.reviewed_by == evidence.first_reviewer:
+                raise HTTPException(
+                    status_code=400,
+                    detail="Four-Eyes: second reviewer must be different from first reviewer",
+                )
+            evidence.second_reviewer = review.reviewed_by
+            evidence.second_reviewed_at = datetime.utcnow()
+            evidence.approval_status = "approved"
+        elif status == "approved":
+            raise HTTPException(status_code=400, detail="Evidence already approved")
+        elif status == "rejected":
+            raise HTTPException(status_code=400, detail="Evidence was rejected — create new evidence instead")
+
+    evidence.reviewed_by = review.reviewed_by
+    evidence.reviewed_at = datetime.utcnow()
+    db.commit()
+
+    # Audit trail
+    new_confidence = evidence.confidence_level.value if evidence.confidence_level else None
+    if old_confidence != new_confidence:
+        log_audit_trail(
+            db, "evidence", evidence_id, evidence.title, "review",
+            performed_by=review.reviewed_by,
+            field_changed="confidence_level",
+            old_value=old_confidence,
+            new_value=new_confidence,
+        )
+    new_truth = evidence.truth_status.value if evidence.truth_status else None
+    if old_truth != new_truth:
+        log_audit_trail(
+            db, "evidence", evidence_id, evidence.title, "review",
+            performed_by=review.reviewed_by,
+            field_changed="truth_status",
+            old_value=old_truth,
+            new_value=new_truth,
+        )
+    db.commit()
+
+    db.refresh(evidence)
+    return _build_evidence_response(evidence)
+
+
+@router.patch("/evidence/{evidence_id}/reject", response_model=EvidenceResponse)
+async def reject_evidence(
+    evidence_id: str,
+    body: EvidenceRejectRequest,
+    db: Session = Depends(get_db),
+):
+    """Reject evidence (sets approval_status='rejected')."""
+    evidence = db.query(EvidenceDB).filter(EvidenceDB.id == evidence_id).first()
+    if not evidence:
+        raise HTTPException(status_code=404, detail=f"Evidence {evidence_id} not found")
+
+    evidence.approval_status = "rejected"
+    evidence.reviewed_by = body.reviewed_by
+    evidence.reviewed_at = datetime.utcnow()
+    db.commit()
+
+    log_audit_trail(
+        db, "evidence", evidence_id, evidence.title, "reject",
+        performed_by=body.reviewed_by,
+        change_summary=body.rejection_reason or "Evidence rejected",
+    )
+    db.commit()
+
+    db.refresh(evidence)
+    return _build_evidence_response(evidence)
+
+
+# ============================================================================
+# Audit Trail Query
+# ============================================================================
+
+@router.get("/audit-trail")
+async def get_audit_trail(
+    entity_type: Optional[str] = Query(None),
+    entity_id: Optional[str] = Query(None),
+    action: Optional[str] = Query(None),
+    limit: int = Query(50, ge=1, le=200),
+    db: Session = Depends(get_db),
+):
+    """Query audit trail entries for an entity."""
+    query = db.query(AuditTrailDB)
+    if entity_type:
+        query = query.filter(AuditTrailDB.entity_type == entity_type)
+    if entity_id:
+        query = query.filter(AuditTrailDB.entity_id == entity_id)
+    if action:
+        query = query.filter(AuditTrailDB.action == action)
+
+    records = query.order_by(AuditTrailDB.performed_at.desc()).limit(limit).all()
+
+    return {
+        "entries": [
+            {
+                "id": r.id,
+                "entity_type": r.entity_type,
+                "entity_id": r.entity_id,
+                "entity_name": r.entity_name,
+                "action": r.action,
+                "field_changed": r.field_changed,
+                "old_value": r.old_value,
+                "new_value": r.new_value,
+                "change_summary": r.change_summary,
+                "performed_by": r.performed_by,
+                "performed_at": r.performed_at.isoformat() if r.performed_at else None,
+                "checksum": r.checksum,
+            }
+            for r in records
+        ],
+        "total": len(records),
+    }
--- a/backend-compliance/compliance/api/extraction_routes.py
+++ b/backend-compliance/compliance/api/extraction_routes.py
@@ -39,7 +39,6 @@ router = APIRouter(tags=["extraction"])

 ALL_COLLECTIONS = [
    "bp_compliance_ce",          # BSI-TR documents — primary Prüfaspekte source
-    "bp_compliance_recht",       # Legal texts (GDPR, AI Act, ...)
    "bp_compliance_gesetze",     # German laws
    "bp_compliance_datenschutz", # Data protection documents
    "bp_dsfa_corpus",            # DSFA corpus
--- a/backend-compliance/compliance/api/isms_routes.py
+++ b/backend-compliance/compliance/api/isms_routes.py
@@ -80,9 +80,13 @@ def _handle(func, *args, **kwargs):  # type: ignore[no-untyped-def]
        raise HTTPException(status_code=400, detail=str(exc))


-# ============================================================================
-# ISMS Scope (ISO 27001 4.3)
-# ============================================================================
+# Shared audit trail utilities — canonical implementation in audit_trail_utils.py
+from .audit_trail_utils import log_audit_trail, create_signature  # noqa: E402
+
+
+# =============================================================================
+# ISMS SCOPE (ISO 27001 4.3)
+# =============================================================================

@router.get("/scope", response_model=ISMSScopeResponse)
 async def get_isms_scope(db: Session = Depends(get_db)):
--- a/backend-compliance/compliance/api/legal_template_routes.py
+++ b/backend-compliance/compliance/api/legal_template_routes.py
@@ -50,6 +50,57 @@ VALID_DOCUMENT_TYPES = {
    "cookie_banner",
    "agb",
    "clause",
+    # Security document templates (Migration 051)
+    "it_security_concept",
+    "data_protection_concept",
+    "backup_recovery_concept",
+    "logging_concept",
+    "incident_response_plan",
+    "access_control_concept",
+    "risk_management_concept",
+    # Policy templates — IT Security (Migration 054)
+    "information_security_policy",
+    "access_control_policy",
+    "password_policy",
+    "encryption_policy",
+    "logging_policy",
+    "backup_policy",
+    "incident_response_policy",
+    "change_management_policy",
+    "patch_management_policy",
+    "asset_management_policy",
+    "cloud_security_policy",
+    "devsecops_policy",
+    "secrets_management_policy",
+    "vulnerability_management_policy",
+    # Policy templates — Data (Migration 054)
+    "data_protection_policy",
+    "data_classification_policy",
+    "data_retention_policy",
+    "data_transfer_policy",
+    "privacy_incident_policy",
+    # Policy templates — Personnel (Migration 054)
+    "employee_security_policy",
+    "security_awareness_policy",
+    "remote_work_policy",
+    "offboarding_policy",
+    # Policy templates — Vendor/Supply Chain (Migration 054)
+    "vendor_risk_management_policy",
+    "third_party_security_policy",
+    "supplier_security_policy",
+    # Policy templates — BCM (Migration 054)
+    "business_continuity_policy",
+    "disaster_recovery_policy",
+    "crisis_management_policy",
+    # CRA Cybersecurity (Migration 056)
+    "cybersecurity_policy",
+    # DSFA template
+    "dsfa",
+    # Module document templates (Migration 073)
+    "vvt_register",
+    "tom_documentation",
+    "loeschkonzept",
+    "pflichtenregister",
 }
 VALID_STATUSES = {"published", "draft", "archived"}

--- a/backend-compliance/compliance/api/llm_audit_routes.py
+++ b/backend-compliance/compliance/api/llm_audit_routes.py
@@ -0,0 +1,162 @@
+"""
+FastAPI routes for LLM Generation Audit Trail.
+
+Endpoints:
+- POST /llm-audit: Record an LLM generation event
+- GET  /llm-audit: List audit records with filters
+"""
+
+import logging
+import uuid as uuid_module
+from datetime import datetime
+from typing import Optional
+
+from fastapi import APIRouter, Depends, Query
+from pydantic import BaseModel
+from sqlalchemy.orm import Session
+
+from classroom_engine.database import get_db
+from ..db.models import LLMGenerationAuditDB
+
+logger = logging.getLogger(__name__)
+router = APIRouter(tags=["compliance-llm-audit"])
+
+
+# ============================================================================
+# Schemas
+# ============================================================================
+
+class LLMAuditCreate(BaseModel):
+    entity_type: str
+    entity_id: Optional[str] = None
+    generation_mode: str
+    truth_status: str = "generated"
+    may_be_used_as_evidence: bool = False
+    llm_model: Optional[str] = None
+    llm_provider: Optional[str] = None
+    prompt_hash: Optional[str] = None
+    input_summary: Optional[str] = None
+    output_summary: Optional[str] = None
+    metadata: Optional[dict] = None
+    tenant_id: Optional[str] = None
+
+
+class LLMAuditResponse(BaseModel):
+    id: str
+    tenant_id: Optional[str] = None
+    entity_type: str
+    entity_id: Optional[str] = None
+    generation_mode: str
+    truth_status: str
+    may_be_used_as_evidence: bool
+    llm_model: Optional[str] = None
+    llm_provider: Optional[str] = None
+    prompt_hash: Optional[str] = None
+    input_summary: Optional[str] = None
+    output_summary: Optional[str] = None
+    metadata: Optional[dict] = None
+    created_at: datetime
+
+    class Config:
+        from_attributes = True
+
+
+# ============================================================================
+# Routes
+# ============================================================================
+
+@router.post("/llm-audit", response_model=LLMAuditResponse)
+async def create_llm_audit(
+    data: LLMAuditCreate,
+    db: Session = Depends(get_db),
+):
+    """Record an LLM generation event for audit trail."""
+    from ..db.models import EvidenceTruthStatusEnum
+
+    # Validate truth_status
+    try:
+        truth_enum = EvidenceTruthStatusEnum(data.truth_status)
+    except ValueError:
+        truth_enum = EvidenceTruthStatusEnum.GENERATED
+
+    record = LLMGenerationAuditDB(
+        id=str(uuid_module.uuid4()),
+        tenant_id=data.tenant_id,
+        entity_type=data.entity_type,
+        entity_id=data.entity_id,
+        generation_mode=data.generation_mode,
+        truth_status=truth_enum,
+        may_be_used_as_evidence=data.may_be_used_as_evidence,
+        llm_model=data.llm_model,
+        llm_provider=data.llm_provider,
+        prompt_hash=data.prompt_hash,
+        input_summary=data.input_summary[:500] if data.input_summary else None,
+        output_summary=data.output_summary[:500] if data.output_summary else None,
+        extra_metadata=data.metadata or {},
+    )
+    db.add(record)
+    db.commit()
+    db.refresh(record)
+
+    return LLMAuditResponse(
+        id=record.id,
+        tenant_id=record.tenant_id,
+        entity_type=record.entity_type,
+        entity_id=record.entity_id,
+        generation_mode=record.generation_mode,
+        truth_status=record.truth_status.value if record.truth_status else "generated",
+        may_be_used_as_evidence=record.may_be_used_as_evidence,
+        llm_model=record.llm_model,
+        llm_provider=record.llm_provider,
+        prompt_hash=record.prompt_hash,
+        input_summary=record.input_summary,
+        output_summary=record.output_summary,
+        metadata=record.extra_metadata,
+        created_at=record.created_at,
+    )
+
+
+@router.get("/llm-audit")
+async def list_llm_audit(
+    entity_type: Optional[str] = Query(None),
+    entity_id: Optional[str] = Query(None),
+    page: int = Query(1, ge=1),
+    limit: int = Query(50, ge=1, le=200),
+    db: Session = Depends(get_db),
+):
+    """List LLM generation audit records with optional filters."""
+    query = db.query(LLMGenerationAuditDB)
+
+    if entity_type:
+        query = query.filter(LLMGenerationAuditDB.entity_type == entity_type)
+    if entity_id:
+        query = query.filter(LLMGenerationAuditDB.entity_id == entity_id)
+
+    total = query.count()
+    offset = (page - 1) * limit
+    records = query.order_by(LLMGenerationAuditDB.created_at.desc()).offset(offset).limit(limit).all()
+
+    return {
+        "records": [
+            LLMAuditResponse(
+                id=r.id,
+                tenant_id=r.tenant_id,
+                entity_type=r.entity_type,
+                entity_id=r.entity_id,
+                generation_mode=r.generation_mode,
+                truth_status=r.truth_status.value if r.truth_status else "generated",
+                may_be_used_as_evidence=r.may_be_used_as_evidence,
+                llm_model=r.llm_model,
+                llm_provider=r.llm_provider,
+                prompt_hash=r.prompt_hash,
+                input_summary=r.input_summary,
+                output_summary=r.output_summary,
+                metadata=r.extra_metadata,
+                created_at=r.created_at,
+            )
+            for r in records
+        ],
+        "total": total,
+        "page": page,
+        "limit": limit,
+    }
--- a/backend-compliance/compliance/api/loeschfristen_routes.py
+++ b/backend-compliance/compliance/api/loeschfristen_routes.py
@@ -56,6 +56,7 @@ class LoeschfristCreate(BaseModel):
    responsible_person: Optional[str] = None
    release_process: Optional[str] = None
    linked_vvt_activity_ids: Optional[List[Any]] = None
+    linked_vendor_ids: Optional[List[Any]] = None
    status: str = "DRAFT"
    last_review_date: Optional[datetime] = None
    next_review_date: Optional[datetime] = None
@@ -86,6 +87,7 @@ class LoeschfristUpdate(BaseModel):
    responsible_person: Optional[str] = None
    release_process: Optional[str] = None
    linked_vvt_activity_ids: Optional[List[Any]] = None
+    linked_vendor_ids: Optional[List[Any]] = None
    status: Optional[str] = None
    last_review_date: Optional[datetime] = None
    next_review_date: Optional[datetime] = None
@@ -100,7 +102,7 @@ class StatusUpdate(BaseModel):
 # JSONB fields that need CAST
 JSONB_FIELDS = {
    "affected_groups", "data_categories", "legal_holds",
-    "storage_locations", "linked_vvt_activity_ids", "tags"
+    "storage_locations", "linked_vvt_activity_ids", "linked_vendor_ids", "tags"
 }


--- a/backend-compliance/compliance/api/obligation_routes.py
+++ b/backend-compliance/compliance/api/obligation_routes.py
@@ -42,6 +42,7 @@ class ObligationCreate(BaseModel):
    priority: str = "medium"
    responsible: Optional[str] = None
    linked_systems: Optional[List[str]] = None
+    linked_vendor_ids: Optional[List[str]] = None
    assessment_id: Optional[str] = None
    rule_code: Optional[str] = None
    notes: Optional[str] = None
@@ -57,6 +58,7 @@ class ObligationUpdate(BaseModel):
    priority: Optional[str] = None
    responsible: Optional[str] = None
    linked_systems: Optional[List[str]] = None
+    linked_vendor_ids: Optional[List[str]] = None
    notes: Optional[str] = None


@@ -173,14 +175,15 @@ async def create_obligation(

    import json
    linked_systems = json.dumps(payload.linked_systems or [])
+    linked_vendor_ids = json.dumps(payload.linked_vendor_ids or [])

    row = db.execute(text("""
        INSERT INTO compliance_obligations
            (tenant_id, title, description, source, source_article, deadline,
-             status, priority, responsible, linked_systems, assessment_id, rule_code, notes)
+             status, priority, responsible, linked_systems, linked_vendor_ids, assessment_id, rule_code, notes)
        VALUES
            (:tenant_id, :title, :description, :source, :source_article, :deadline,
-             :status, :priority, :responsible, CAST(:linked_systems AS jsonb), :assessment_id, :rule_code, :notes)
+             :status, :priority, :responsible, CAST(:linked_systems AS jsonb), CAST(:linked_vendor_ids AS jsonb), :assessment_id, :rule_code, :notes)
        RETURNING *
    """), {
        "tenant_id": tenant_id,
@@ -193,6 +196,7 @@ async def create_obligation(
        "priority": payload.priority,
        "responsible": payload.responsible,
        "linked_systems": linked_systems,
+        "linked_vendor_ids": linked_vendor_ids,
        "assessment_id": payload.assessment_id,
        "rule_code": payload.rule_code,
        "notes": payload.notes,
@@ -235,6 +239,9 @@ async def update_obligation(
        if field == "linked_systems":
            updates["linked_systems"] = json.dumps(value or [])
            set_clauses.append("linked_systems = CAST(:linked_systems AS jsonb)")
+        elif field == "linked_vendor_ids":
+            updates["linked_vendor_ids"] = json.dumps(value or [])
+            set_clauses.append("linked_vendor_ids = CAST(:linked_vendor_ids AS jsonb)")
        else:
            updates[field] = value
            set_clauses.append(f"{field} = :{field}")
--- a/backend-compliance/compliance/api/process_task_routes.py
+++ b/backend-compliance/compliance/api/process_task_routes.py
--- a/backend-compliance/compliance/api/routes.py
+++ b/backend-compliance/compliance/api/routes.py
@@ -25,6 +25,7 @@ from sqlalchemy.orm import Session

 from classroom_engine.database import get_db

+from .audit_trail_utils import log_audit_trail
 from ..db import (
    ControlDomainEnum,
    ControlRepository,
@@ -312,8 +313,39 @@ async def get_control(
    svc: ControlExportService = Depends(get_ctrl_export_service),
 ) -> ControlResponse:
    """Get a specific control by control_id."""
-    with translate_domain_errors():
-        return svc.get_control(control_id)
+    repo = ControlRepository(db)
+    control = repo.get_by_control_id(control_id)
+    if not control:
+        raise HTTPException(status_code=404, detail=f"Control {control_id} not found")
+
+    evidence_repo = EvidenceRepository(db)
+    evidence = evidence_repo.get_by_control(control.id)
+
+    return ControlResponse(
+        id=control.id,
+        control_id=control.control_id,
+        domain=control.domain.value if control.domain else None,
+        control_type=control.control_type.value if control.control_type else None,
+        title=control.title,
+        description=control.description,
+        pass_criteria=control.pass_criteria,
+        implementation_guidance=control.implementation_guidance,
+        code_reference=control.code_reference,
+        documentation_url=control.documentation_url,
+        is_automated=control.is_automated,
+        automation_tool=control.automation_tool,
+        automation_config=control.automation_config,
+        owner=control.owner,
+        review_frequency_days=control.review_frequency_days,
+        status=control.status.value if control.status else None,
+        status_notes=control.status_notes,
+        status_justification=control.status_justification,
+        last_reviewed_at=control.last_reviewed_at,
+        next_review_at=control.next_review_at,
+        created_at=control.created_at,
+        updated_at=control.updated_at,
+        evidence_count=len(evidence),
+    )


@router.put(
@@ -325,8 +357,83 @@ async def update_control(
    svc: ControlExportService = Depends(get_ctrl_export_service),
 ) -> ControlResponse:
    """Update a control."""
-    with translate_domain_errors():
-        return svc.update_control(control_id, update)
+    repo = ControlRepository(db)
+    control = repo.get_by_control_id(control_id)
+    if not control:
+        raise HTTPException(status_code=404, detail=f"Control {control_id} not found")
+
+    update_data = update.model_dump(exclude_unset=True)
+
+    # Convert status string to enum and validate transition
+    if "status" in update_data:
+        try:
+            new_status_enum = ControlStatusEnum(update_data["status"])
+        except ValueError:
+            raise HTTPException(status_code=400, detail=f"Invalid status: {update_data['status']}")
+
+        # Validate status transition (Anti-Fake-Evidence)
+        from ..services.control_status_machine import validate_transition
+        current_status = control.status.value if control.status else "planned"
+        evidence_list = db.query(EvidenceDB).filter(EvidenceDB.control_id == control.id).all()
+        allowed, violations = validate_transition(
+            current_status=current_status,
+            new_status=update_data["status"],
+            evidence_list=evidence_list,
+            status_justification=update_data.get("status_justification") or update_data.get("status_notes"),
+        )
+        if not allowed:
+            raise HTTPException(
+                status_code=409,
+                detail={
+                    "error": "Status transition not allowed",
+                    "current_status": current_status,
+                    "requested_status": update_data["status"],
+                    "violations": violations,
+                }
+            )
+
+        update_data["status"] = new_status_enum
+
+    updated = repo.update(control.id, **update_data)
+    db.commit()
+
+    # Audit trail for status changes
+    new_status = updated.status.value if updated.status else None
+    if "status" in update.model_dump(exclude_unset=True) and current_status != new_status:
+        log_audit_trail(
+            db, "control", control.id, updated.control_id or updated.title,
+            "status_change",
+            performed_by=update.owner or "system",
+            field_changed="status",
+            old_value=current_status,
+            new_value=new_status,
+        )
+        db.commit()
+
+    return ControlResponse(
+        id=updated.id,
+        control_id=updated.control_id,
+        domain=updated.domain.value if updated.domain else None,
+        control_type=updated.control_type.value if updated.control_type else None,
+        title=updated.title,
+        description=updated.description,
+        pass_criteria=updated.pass_criteria,
+        implementation_guidance=updated.implementation_guidance,
+        code_reference=updated.code_reference,
+        documentation_url=updated.documentation_url,
+        is_automated=updated.is_automated,
+        automation_tool=updated.automation_tool,
+        automation_config=updated.automation_config,
+        owner=updated.owner,
+        review_frequency_days=updated.review_frequency_days,
+        status=updated.status.value if updated.status else None,
+        status_notes=updated.status_notes,
+        status_justification=updated.status_justification,
+        last_reviewed_at=updated.last_reviewed_at,
+        next_review_at=updated.next_review_at,
+        created_at=updated.created_at,
+        updated_at=updated.updated_at,
+    )


@router.put(
@@ -339,8 +446,43 @@ async def review_control(
    svc: ControlExportService = Depends(get_ctrl_export_service),
 ) -> ControlResponse:
    """Mark a control as reviewed with new status."""
-    with translate_domain_errors():
-        return svc.review_control(control_id, review)
+    repo = ControlRepository(db)
+    control = repo.get_by_control_id(control_id)
+    if not control:
+        raise HTTPException(status_code=404, detail=f"Control {control_id} not found")
+
+    try:
+        status_enum = ControlStatusEnum(review.status)
+    except ValueError:
+        raise HTTPException(status_code=400, detail=f"Invalid status: {review.status}")
+
+    updated = repo.mark_reviewed(control.id, status_enum, review.status_notes)
+    db.commit()
+
+    return ControlResponse(
+        id=updated.id,
+        control_id=updated.control_id,
+        domain=updated.domain.value if updated.domain else None,
+        control_type=updated.control_type.value if updated.control_type else None,
+        title=updated.title,
+        description=updated.description,
+        pass_criteria=updated.pass_criteria,
+        implementation_guidance=updated.implementation_guidance,
+        code_reference=updated.code_reference,
+        documentation_url=updated.documentation_url,
+        is_automated=updated.is_automated,
+        automation_tool=updated.automation_tool,
+        automation_config=updated.automation_config,
+        owner=updated.owner,
+        review_frequency_days=updated.review_frequency_days,
+        status=updated.status.value if updated.status else None,
+        status_notes=updated.status_notes,
+        status_justification=updated.status_justification,
+        last_reviewed_at=updated.last_reviewed_at,
+        next_review_at=updated.next_review_at,
+        created_at=updated.created_at,
+        updated_at=updated.updated_at,
+    )


@router.get(
--- a/backend-compliance/compliance/api/schemas.py
+++ b/backend-compliance/compliance/api/schemas.py
--- a/backend-compliance/compliance/api/screening_routes.py
+++ b/backend-compliance/compliance/api/screening_routes.py
@@ -22,7 +22,9 @@ import uuid
 from datetime import datetime, timezone
 from typing import Any

-from fastapi import APIRouter, File, Form, HTTPException, UploadFile
+import httpx
+from fastapi import APIRouter, File, Form, UploadFile, HTTPException
+from pydantic import BaseModel
 from sqlalchemy import text

 from database import SessionLocal  # re-exported below for legacy test patches
@@ -96,15 +98,13 @@ async def scan_dependencies(
    db = SessionLocal()
    try:
        db.execute(
-            text(
-                "INSERT INTO compliance_screenings "
-                "(id, tenant_id, status, sbom_format, sbom_version, "
-                "total_components, total_issues, critical_issues, high_issues, "
-                "medium_issues, low_issues, sbom_data, started_at, completed_at) "
-                "VALUES (:id, :tenant_id, 'completed', 'CycloneDX', '1.5', "
-                ":total_components, :total_issues, :critical, :high, :medium, :low, "
-                ":sbom_data::jsonb, :started_at, :completed_at)"
-            ),
+            text("""INSERT INTO compliance_screenings
+               (id, tenant_id, status, sbom_format, sbom_version,
+                total_components, total_issues, critical_issues, high_issues, medium_issues, low_issues,
+                sbom_data, started_at, completed_at)
+               VALUES (:id, :tenant_id, 'completed', 'CycloneDX', '1.5',
+                       :total_components, :total_issues, :critical, :high, :medium, :low,
+                       :sbom_data::jsonb, :started_at, :completed_at)"""),
            {
                "id": screening_id,
                "tenant_id": tenant_id,
@@ -121,13 +121,11 @@ async def scan_dependencies(
        )
        for issue in issues:
            db.execute(
-                text(
-                    "INSERT INTO compliance_security_issues "
-                    "(id, screening_id, severity, title, description, cve, cvss, "
-                    "affected_component, affected_version, fixed_in, remediation, status) "
-                    "VALUES (:id, :screening_id, :severity, :title, :description, :cve, :cvss, "
-                    ":component, :version, :fixed_in, :remediation, :status)"
-                ),
+                text("""INSERT INTO compliance_security_issues
+                   (id, screening_id, severity, title, description, cve, cvss,
+                    affected_component, affected_version, fixed_in, remediation, status)
+                   VALUES (:id, :screening_id, :severity, :title, :description, :cve, :cvss,
+                           :component, :version, :fixed_in, :remediation, :status)"""),
                {
                    "id": issue["id"],
                    "screening_id": screening_id,
@@ -214,8 +212,77 @@ async def get_screening(screening_id: str) -> ScreeningResponse:
    """Get a screening result by ID."""
    db = SessionLocal()
    try:
-        with translate_domain_errors():
-            return ScreeningService(db).get_screening(screening_id)
+        result = db.execute(
+            text("""SELECT id, status, sbom_format, sbom_version,
+                      total_components, total_issues, critical_issues, high_issues,
+                      medium_issues, low_issues, sbom_data, started_at, completed_at
+               FROM compliance_screenings WHERE id = :id"""),
+            {"id": screening_id},
+        )
+        row = result.fetchone()
+        if not row:
+            raise HTTPException(status_code=404, detail="Screening not found")
+
+        # Fetch issues
+        issues_result = db.execute(
+            text("""SELECT id, severity, title, description, cve, cvss,
+                      affected_component, affected_version, fixed_in, remediation, status
+               FROM compliance_security_issues WHERE screening_id = :id"""),
+            {"id": screening_id},
+        )
+        issues_rows = issues_result.fetchall()
+
+        issues = [
+            SecurityIssueResponse(
+                id=str(r[0]), severity=r[1], title=r[2], description=r[3],
+                cve=r[4], cvss=r[5], affected_component=r[6],
+                affected_version=r[7], fixed_in=r[8], remediation=r[9], status=r[10],
+            )
+            for r in issues_rows
+        ]
+
+        # Reconstruct components from SBOM data
+        sbom_data = row[10] or {}
+        components = []
+        comp_vulns: dict[str, list[dict]] = {}
+        for issue in issues:
+            if issue.affected_component not in comp_vulns:
+                comp_vulns[issue.affected_component] = []
+            comp_vulns[issue.affected_component].append({
+                "id": issue.cve or issue.id,
+                "cve": issue.cve,
+                "severity": issue.severity,
+                "title": issue.title,
+                "cvss": issue.cvss,
+                "fixedIn": issue.fixed_in,
+            })
+
+        for sc in sbom_data.get("components", []):
+            components.append(SBOMComponentResponse(
+                name=sc["name"],
+                version=sc["version"],
+                type=sc.get("type", "library"),
+                purl=sc.get("purl", ""),
+                licenses=sc.get("licenses", []),
+                vulnerabilities=comp_vulns.get(sc["name"], []),
+            ))
+
+        return ScreeningResponse(
+            id=str(row[0]),
+            status=row[1],
+            sbom_format=row[2] or "CycloneDX",
+            sbom_version=row[3] or "1.5",
+            total_components=row[4] or 0,
+            total_issues=row[5] or 0,
+            critical_issues=row[6] or 0,
+            high_issues=row[7] or 0,
+            medium_issues=row[8] or 0,
+            low_issues=row[9] or 0,
+            components=components,
+            issues=issues,
+            started_at=str(row[11]) if row[11] else None,
+            completed_at=str(row[12]) if row[12] else None,
+        )
    finally:
        db.close()

@@ -225,8 +292,33 @@ async def list_screenings(tenant_id: str = "default") -> ScreeningListResponse:
    """List all screenings for a tenant."""
    db = SessionLocal()
    try:
-        with translate_domain_errors():
-            return ScreeningService(db).list_screenings(tenant_id)
+        result = db.execute(
+            text("""SELECT id, status, total_components, total_issues,
+                      critical_issues, high_issues, medium_issues, low_issues,
+                      started_at, completed_at, created_at
+               FROM compliance_screenings
+               WHERE tenant_id = :tenant_id
+               ORDER BY created_at DESC"""),
+            {"tenant_id": tenant_id},
+        )
+        rows = result.fetchall()
+        screenings = [
+            {
+                "id": str(r[0]),
+                "status": r[1],
+                "total_components": r[2],
+                "total_issues": r[3],
+                "critical_issues": r[4],
+                "high_issues": r[5],
+                "medium_issues": r[6],
+                "low_issues": r[7],
+                "started_at": str(r[8]) if r[8] else None,
+                "completed_at": str(r[9]) if r[9] else None,
+                "created_at": str(r[10]),
+            }
+            for r in rows
+        ]
+        return ScreeningListResponse(screenings=screenings, total=len(screenings))
    finally:
        db.close()

--- a/backend-compliance/compliance/api/tom_mapping_routes.py
+++ b/backend-compliance/compliance/api/tom_mapping_routes.py
@@ -0,0 +1,537 @@
+"""
+TOM ↔ Canonical Control Mapping Routes.
+
+Three-layer architecture:
+  TOM Measures (~88, audit-level) → Mapping Bridge → Canonical Controls (10,000+)
+
+Endpoints:
+  POST /v1/tom-mappings/sync         — Sync canonical controls for company profile
+  GET  /v1/tom-mappings              — List all mappings for tenant/project
+  GET  /v1/tom-mappings/by-tom/{code} — Mappings for a specific TOM control
+  GET  /v1/tom-mappings/stats        — Coverage statistics
+  POST /v1/tom-mappings/manual       — Manually add a mapping
+  DELETE /v1/tom-mappings/{id}       — Remove a mapping
+"""
+
+from __future__ import annotations
+
+import hashlib
+import json
+import logging
+from typing import Any, Optional
+
+from fastapi import APIRouter, HTTPException, Query, Header
+from pydantic import BaseModel
+from sqlalchemy import text
+
+from database import SessionLocal
+
+logger = logging.getLogger(__name__)
+router = APIRouter(prefix="/tom-mappings", tags=["tom-control-mappings"])
+
+
+# =============================================================================
+# TOM CATEGORY → CANONICAL CATEGORY MAPPING
+# =============================================================================
+
+# Maps 13 TOM control categories to canonical_control_categories
+# Each TOM category maps to 1-3 canonical categories for broad coverage
+TOM_TO_CANONICAL_CATEGORIES: dict[str, list[str]] = {
+    "ACCESS_CONTROL":       ["authentication", "identity", "physical"],
+    "ADMISSION_CONTROL":    ["authentication", "identity", "system"],
+    "ACCESS_AUTHORIZATION": ["authentication", "identity"],
+    "TRANSFER_CONTROL":     ["network", "data_protection", "encryption"],
+    "INPUT_CONTROL":        ["application", "data_protection"],
+    "ORDER_CONTROL":        ["supply_chain", "compliance"],
+    "AVAILABILITY":         ["continuity", "system"],
+    "SEPARATION":           ["network", "data_protection"],
+    "ENCRYPTION":           ["encryption"],
+    "PSEUDONYMIZATION":     ["data_protection", "encryption"],
+    "RESILIENCE":           ["continuity", "system"],
+    "RECOVERY":             ["continuity"],
+    "REVIEW":               ["compliance", "governance", "risk"],
+}
+
+
+# =============================================================================
+# REQUEST / RESPONSE MODELS
+# =============================================================================
+
+class SyncRequest(BaseModel):
+    """Trigger a sync of canonical controls to TOM measures."""
+    industry: Optional[str] = None
+    company_size: Optional[str] = None
+    force: bool = False
+
+
+class ManualMappingRequest(BaseModel):
+    """Manually add a canonical control to a TOM measure."""
+    tom_control_code: str
+    tom_category: str
+    canonical_control_id: str
+    canonical_control_code: str
+    canonical_category: Optional[str] = None
+    relevance_score: float = 1.0
+
+
+# =============================================================================
+# HELPERS
+# =============================================================================
+
+def _get_tenant_id(x_tenant_id: Optional[str]) -> str:
+    """Extract tenant ID from header."""
+    if not x_tenant_id:
+        raise HTTPException(status_code=400, detail="X-Tenant-ID header required")
+    return x_tenant_id
+
+
+def _compute_profile_hash(industry: Optional[str], company_size: Optional[str]) -> str:
+    """Compute a hash from profile parameters for change detection."""
+    data = json.dumps({"industry": industry, "company_size": company_size}, sort_keys=True)
+    return hashlib.sha256(data.encode()).hexdigest()[:16]
+
+
+def _mapping_row_to_dict(r) -> dict[str, Any]:
+    """Convert a mapping row to API response dict."""
+    return {
+        "id": str(r.id),
+        "tenant_id": str(r.tenant_id),
+        "project_id": str(r.project_id) if r.project_id else None,
+        "tom_control_code": r.tom_control_code,
+        "tom_category": r.tom_category,
+        "canonical_control_id": str(r.canonical_control_id),
+        "canonical_control_code": r.canonical_control_code,
+        "canonical_category": r.canonical_category,
+        "mapping_type": r.mapping_type,
+        "relevance_score": float(r.relevance_score) if r.relevance_score else 1.0,
+        "created_at": r.created_at.isoformat() if r.created_at else None,
+    }
+
+
+# =============================================================================
+# SYNC ENDPOINT
+# =============================================================================
+
+@router.post("/sync")
+async def sync_mappings(
+    body: SyncRequest,
+    x_tenant_id: Optional[str] = Header(None, alias="X-Tenant-ID"),
+    project_id: Optional[str] = Query(None),
+):
+    """
+    Sync canonical controls to TOM measures based on company profile.
+
+    Algorithm:
+    1. Compute profile hash → skip if unchanged (unless force=True)
+    2. For each TOM category, find matching canonical controls by:
+       - Category mapping (TOM category → canonical categories)
+       - Industry filter (applicable_industries JSONB containment)
+       - Company size filter (applicable_company_size JSONB containment)
+       - Only approved + customer_visible controls
+    3. Delete old auto-mappings, insert new ones
+    4. Update sync state
+    """
+    tenant_id = _get_tenant_id(x_tenant_id)
+    profile_hash = _compute_profile_hash(body.industry, body.company_size)
+
+    with SessionLocal() as db:
+        # Check if sync is needed (profile unchanged)
+        if not body.force:
+            existing = db.execute(
+                text("""
+                    SELECT profile_hash FROM tom_control_sync_state
+                    WHERE tenant_id = :tid AND (project_id = :pid OR (project_id IS NULL AND :pid IS NULL))
+                """),
+                {"tid": tenant_id, "pid": project_id},
+            ).fetchone()
+            if existing and existing.profile_hash == profile_hash:
+                return {
+                    "status": "unchanged",
+                    "message": "Profile unchanged since last sync",
+                    "profile_hash": profile_hash,
+                }
+
+        # Delete old auto-mappings for this tenant+project
+        db.execute(
+            text("""
+                DELETE FROM tom_control_mappings
+                WHERE tenant_id = :tid
+                  AND (project_id = :pid OR (project_id IS NULL AND :pid IS NULL))
+                  AND mapping_type = 'auto'
+            """),
+            {"tid": tenant_id, "pid": project_id},
+        )
+
+        total_mappings = 0
+        canonical_ids_matched = set()
+        tom_codes_covered = set()
+
+        # For each TOM category, find matching canonical controls
+        for tom_category, canonical_categories in TOM_TO_CANONICAL_CATEGORIES.items():
+            # Build JSONB containment query for categories
+            cat_conditions = " OR ".join(
+                f"category = :cat_{i}" for i in range(len(canonical_categories))
+            )
+            cat_params = {f"cat_{i}": c for i, c in enumerate(canonical_categories)}
+
+            # Build industry filter
+            industry_filter = ""
+            if body.industry:
+                industry_filter = """
+                    AND (
+                        applicable_industries IS NULL
+                        OR applicable_industries @> '"all"'::jsonb
+                        OR applicable_industries @> (:industry)::jsonb
+                    )
+                """
+                cat_params["industry"] = json.dumps([body.industry])
+
+            # Build company size filter
+            size_filter = ""
+            if body.company_size:
+                size_filter = """
+                    AND (
+                        applicable_company_size IS NULL
+                        OR applicable_company_size @> '"all"'::jsonb
+                        OR applicable_company_size @> (:csize)::jsonb
+                    )
+                """
+                cat_params["csize"] = json.dumps([body.company_size])
+
+            query = f"""
+                SELECT id, control_id, category
+                FROM canonical_controls
+                WHERE ({cat_conditions})
+                  AND release_state = 'approved'
+                  AND customer_visible = true
+                  {industry_filter}
+                  {size_filter}
+                ORDER BY control_id
+            """
+
+            rows = db.execute(text(query), cat_params).fetchall()
+
+            # Find TOM control codes in this category (query the frontend library
+            # codes; we use the category prefix pattern from the loader)
+            # TOM codes follow pattern: TOM-XX-NN where XX is category abbreviation
+            # We insert one mapping per canonical control per TOM category
+            for row in rows:
+                db.execute(
+                    text("""
+                        INSERT INTO tom_control_mappings (
+                            tenant_id, project_id, tom_control_code, tom_category,
+                            canonical_control_id, canonical_control_code, canonical_category,
+                            mapping_type, relevance_score
+                        ) VALUES (
+                            :tid, :pid, :tom_cat, :tom_cat,
+                            :cc_id, :cc_code, :cc_category,
+                            'auto', 1.00
+                        )
+                        ON CONFLICT (tenant_id, project_id, tom_control_code, canonical_control_id)
+                        DO NOTHING
+                    """),
+                    {
+                        "tid": tenant_id,
+                        "pid": project_id,
+                        "tom_cat": tom_category,
+                        "cc_id": str(row.id),
+                        "cc_code": row.control_id,
+                        "cc_category": row.category,
+                    },
+                )
+                total_mappings += 1
+                canonical_ids_matched.add(str(row.id))
+                tom_codes_covered.add(tom_category)
+
+        # Upsert sync state
+        db.execute(
+            text("""
+                INSERT INTO tom_control_sync_state (
+                    tenant_id, project_id, profile_hash,
+                    total_mappings, canonical_controls_matched, tom_controls_covered,
+                    last_synced_at
+                ) VALUES (
+                    :tid, :pid, :hash,
+                    :total, :matched, :covered,
+                    NOW()
+                )
+                ON CONFLICT (tenant_id, project_id)
+                DO UPDATE SET
+                    profile_hash = :hash,
+                    total_mappings = :total,
+                    canonical_controls_matched = :matched,
+                    tom_controls_covered = :covered,
+                    last_synced_at = NOW()
+            """),
+            {
+                "tid": tenant_id,
+                "pid": project_id,
+                "hash": profile_hash,
+                "total": total_mappings,
+                "matched": len(canonical_ids_matched),
+                "covered": len(tom_codes_covered),
+            },
+        )
+
+        db.commit()
+
+    return {
+        "status": "synced",
+        "profile_hash": profile_hash,
+        "total_mappings": total_mappings,
+        "canonical_controls_matched": len(canonical_ids_matched),
+        "tom_categories_covered": len(tom_codes_covered),
+    }
+
+
+# =============================================================================
+# LIST MAPPINGS
+# =============================================================================
+
+@router.get("")
+async def list_mappings(
+    x_tenant_id: Optional[str] = Header(None, alias="X-Tenant-ID"),
+    project_id: Optional[str] = Query(None),
+    tom_category: Optional[str] = Query(None),
+    mapping_type: Optional[str] = Query(None),
+    limit: int = Query(500, ge=1, le=5000),
+    offset: int = Query(0, ge=0),
+):
+    """List all TOM ↔ canonical control mappings for tenant/project."""
+    tenant_id = _get_tenant_id(x_tenant_id)
+
+    query = """
+        SELECT m.*, cc.title as canonical_title, cc.severity as canonical_severity
+        FROM tom_control_mappings m
+        LEFT JOIN canonical_controls cc ON cc.id = m.canonical_control_id
+        WHERE m.tenant_id = :tid
+          AND (m.project_id = :pid OR (m.project_id IS NULL AND :pid IS NULL))
+    """
+    params: dict[str, Any] = {"tid": tenant_id, "pid": project_id}
+
+    if tom_category:
+        query += " AND m.tom_category = :tcat"
+        params["tcat"] = tom_category
+    if mapping_type:
+        query += " AND m.mapping_type = :mtype"
+        params["mtype"] = mapping_type
+
+    query += " ORDER BY m.tom_category, m.canonical_control_code"
+    query += " LIMIT :lim OFFSET :off"
+    params["lim"] = limit
+    params["off"] = offset
+
+    count_query = """
+        SELECT count(*) FROM tom_control_mappings
+        WHERE tenant_id = :tid
+          AND (project_id = :pid OR (project_id IS NULL AND :pid IS NULL))
+    """
+    count_params: dict[str, Any] = {"tid": tenant_id, "pid": project_id}
+    if tom_category:
+        count_query += " AND tom_category = :tcat"
+        count_params["tcat"] = tom_category
+
+    with SessionLocal() as db:
+        rows = db.execute(text(query), params).fetchall()
+        total = db.execute(text(count_query), count_params).scalar()
+
+    mappings = []
+    for r in rows:
+        d = _mapping_row_to_dict(r)
+        d["canonical_title"] = getattr(r, "canonical_title", None)
+        d["canonical_severity"] = getattr(r, "canonical_severity", None)
+        mappings.append(d)
+
+    return {"mappings": mappings, "total": total}
+
+
+# =============================================================================
+# MAPPINGS BY TOM CONTROL
+# =============================================================================
+
+@router.get("/by-tom/{tom_code}")
+async def get_mappings_by_tom(
+    tom_code: str,
+    x_tenant_id: Optional[str] = Header(None, alias="X-Tenant-ID"),
+    project_id: Optional[str] = Query(None),
+):
+    """Get all canonical controls mapped to a specific TOM control code or category."""
+    tenant_id = _get_tenant_id(x_tenant_id)
+
+    with SessionLocal() as db:
+        rows = db.execute(
+            text("""
+                SELECT m.*, cc.title as canonical_title, cc.severity as canonical_severity,
+                       cc.objective as canonical_objective
+                FROM tom_control_mappings m
+                LEFT JOIN canonical_controls cc ON cc.id = m.canonical_control_id
+                WHERE m.tenant_id = :tid
+                  AND (m.project_id = :pid OR (m.project_id IS NULL AND :pid IS NULL))
+                  AND (m.tom_control_code = :code OR m.tom_category = :code)
+                ORDER BY m.canonical_control_code
+            """),
+            {"tid": tenant_id, "pid": project_id, "code": tom_code},
+        ).fetchall()
+
+    mappings = []
+    for r in rows:
+        d = _mapping_row_to_dict(r)
+        d["canonical_title"] = getattr(r, "canonical_title", None)
+        d["canonical_severity"] = getattr(r, "canonical_severity", None)
+        d["canonical_objective"] = getattr(r, "canonical_objective", None)
+        mappings.append(d)
+
+    return {"tom_code": tom_code, "mappings": mappings, "total": len(mappings)}
+
+
+# =============================================================================
+# STATS
+# =============================================================================
+
+@router.get("/stats")
+async def get_mapping_stats(
+    x_tenant_id: Optional[str] = Header(None, alias="X-Tenant-ID"),
+    project_id: Optional[str] = Query(None),
+):
+    """Coverage statistics for TOM ↔ canonical control mappings."""
+    tenant_id = _get_tenant_id(x_tenant_id)
+
+    with SessionLocal() as db:
+        # Sync state
+        sync_state = db.execute(
+            text("""
+                SELECT * FROM tom_control_sync_state
+                WHERE tenant_id = :tid
+                  AND (project_id = :pid OR (project_id IS NULL AND :pid IS NULL))
+            """),
+            {"tid": tenant_id, "pid": project_id},
+        ).fetchone()
+
+        # Per-category breakdown
+        category_stats = db.execute(
+            text("""
+                SELECT tom_category,
+                       count(*) as total_mappings,
+                       count(DISTINCT canonical_control_id) as unique_controls,
+                       count(*) FILTER (WHERE mapping_type = 'auto') as auto_count,
+                       count(*) FILTER (WHERE mapping_type = 'manual') as manual_count
+                FROM tom_control_mappings
+                WHERE tenant_id = :tid
+                  AND (project_id = :pid OR (project_id IS NULL AND :pid IS NULL))
+                GROUP BY tom_category
+                ORDER BY tom_category
+            """),
+            {"tid": tenant_id, "pid": project_id},
+        ).fetchall()
+
+        # Total canonical controls in DB (approved + visible)
+        total_canonical = db.execute(
+            text("""
+                SELECT count(*) FROM canonical_controls
+                WHERE release_state = 'approved' AND customer_visible = true
+            """)
+        ).scalar()
+
+    return {
+        "sync_state": {
+            "profile_hash": sync_state.profile_hash if sync_state else None,
+            "total_mappings": sync_state.total_mappings if sync_state else 0,
+            "canonical_controls_matched": sync_state.canonical_controls_matched if sync_state else 0,
+            "tom_controls_covered": sync_state.tom_controls_covered if sync_state else 0,
+            "last_synced_at": sync_state.last_synced_at.isoformat() if sync_state and sync_state.last_synced_at else None,
+        },
+        "category_breakdown": [
+            {
+                "tom_category": r.tom_category,
+                "total_mappings": r.total_mappings,
+                "unique_controls": r.unique_controls,
+                "auto_count": r.auto_count,
+                "manual_count": r.manual_count,
+            }
+            for r in category_stats
+        ],
+        "total_canonical_controls_available": total_canonical or 0,
+    }
+
+
+# =============================================================================
+# MANUAL MAPPING
+# =============================================================================
+
+@router.post("/manual", status_code=201)
+async def add_manual_mapping(
+    body: ManualMappingRequest,
+    x_tenant_id: Optional[str] = Header(None, alias="X-Tenant-ID"),
+    project_id: Optional[str] = Query(None),
+):
+    """Manually add a canonical control to a TOM measure."""
+    tenant_id = _get_tenant_id(x_tenant_id)
+
+    with SessionLocal() as db:
+        # Verify canonical control exists
+        cc = db.execute(
+            text("SELECT id, control_id, category FROM canonical_controls WHERE id = CAST(:cid AS uuid)"),
+            {"cid": body.canonical_control_id},
+        ).fetchone()
+        if not cc:
+            raise HTTPException(status_code=404, detail="Canonical control not found")
+
+        try:
+            row = db.execute(
+                text("""
+                    INSERT INTO tom_control_mappings (
+                        tenant_id, project_id, tom_control_code, tom_category,
+                        canonical_control_id, canonical_control_code, canonical_category,
+                        mapping_type, relevance_score
+                    ) VALUES (
+                        :tid, :pid, :tom_code, :tom_cat,
+                        CAST(:cc_id AS uuid), :cc_code, :cc_category,
+                        'manual', :score
+                    )
+                    RETURNING *
+                """),
+                {
+                    "tid": tenant_id,
+                    "pid": project_id,
+                    "tom_code": body.tom_control_code,
+                    "tom_cat": body.tom_category,
+                    "cc_id": body.canonical_control_id,
+                    "cc_code": body.canonical_control_code,
+                    "cc_category": body.canonical_category or cc.category,
+                    "score": body.relevance_score,
+                },
+            ).fetchone()
+            db.commit()
+        except Exception as e:
+            if "unique" in str(e).lower() or "duplicate" in str(e).lower():
+                raise HTTPException(status_code=409, detail="Mapping already exists")
+            raise
+
+    return _mapping_row_to_dict(row)
+
+
+# =============================================================================
+# DELETE MAPPING
+# =============================================================================
+
+@router.delete("/{mapping_id}", status_code=204)
+async def delete_mapping(
+    mapping_id: str,
+    x_tenant_id: Optional[str] = Header(None, alias="X-Tenant-ID"),
+):
+    """Remove a mapping (manual or auto)."""
+    tenant_id = _get_tenant_id(x_tenant_id)
+
+    with SessionLocal() as db:
+        result = db.execute(
+            text("""
+                DELETE FROM tom_control_mappings
+                WHERE id = CAST(:mid AS uuid) AND tenant_id = :tid
+            """),
+            {"mid": mapping_id, "tid": tenant_id},
+        )
+        if result.rowcount == 0:
+            raise HTTPException(status_code=404, detail="Mapping not found")
+        db.commit()
+
+    return None
--- a/backend-compliance/compliance/api/vvt_library_routes.py
+++ b/backend-compliance/compliance/api/vvt_library_routes.py
@@ -0,0 +1,427 @@
+"""
+FastAPI routes for VVT Master Libraries + Process Templates.
+
+Library endpoints (read-only, global):
+  GET /vvt/libraries                       — Overview: all library types + counts
+  GET /vvt/libraries/data-subjects         — Data subjects (filter: typical_for)
+  GET /vvt/libraries/data-categories       — Hierarchical (filter: parent_id, is_art9, flat)
+  GET /vvt/libraries/recipients            — Recipients (filter: type)
+  GET /vvt/libraries/legal-bases           — Legal bases (filter: is_art9, type)
+  GET /vvt/libraries/retention-rules       — Retention rules
+  GET /vvt/libraries/transfer-mechanisms   — Transfer mechanisms
+  GET /vvt/libraries/purposes              — Purposes (filter: typical_for)
+  GET /vvt/libraries/toms                  — TOMs (filter: category)
+
+Template endpoints:
+  GET  /vvt/templates                      — List templates (filter: business_function, search)
+  GET  /vvt/templates/{id}                 — Single template with resolved labels
+  POST /vvt/templates/{id}/instantiate     — Create VVT activity from template
+"""
+
+import logging
+import uuid
+from datetime import datetime
+from typing import Optional
+
+from fastapi import APIRouter, Depends, HTTPException, Query, Request
+from sqlalchemy.orm import Session
+
+from classroom_engine.database import get_db
+
+from ..db.vvt_library_models import (
+    VVTLibDataSubjectDB,
+    VVTLibDataCategoryDB,
+    VVTLibRecipientDB,
+    VVTLibLegalBasisDB,
+    VVTLibRetentionRuleDB,
+    VVTLibTransferMechanismDB,
+    VVTLibPurposeDB,
+    VVTLibTomDB,
+    VVTProcessTemplateDB,
+)
+from ..db.vvt_models import VVTActivityDB, VVTAuditLogDB
+from .tenant_utils import get_tenant_id
+
+logger = logging.getLogger(__name__)
+router = APIRouter(prefix="/vvt", tags=["compliance-vvt-libraries"])
+
+
+# ============================================================================
+# Helper: row → dict
+# ============================================================================
+
+def _row_to_dict(row, extra_fields=None):
+    """Generic row → dict for library items."""
+    d = {
+        "id": row.id,
+        "label_de": row.label_de,
+    }
+    if hasattr(row, 'description_de') and row.description_de:
+        d["description_de"] = row.description_de
+    if hasattr(row, 'sort_order'):
+        d["sort_order"] = row.sort_order
+    if extra_fields:
+        for f in extra_fields:
+            if hasattr(row, f):
+                val = getattr(row, f)
+                if val is not None:
+                    d[f] = val
+    return d
+
+
+# ============================================================================
+# Library Overview
+# ============================================================================
+
+@router.get("/libraries")
+async def get_libraries_overview(db: Session = Depends(get_db)):
+    """Overview of all library types with item counts."""
+    return {
+        "libraries": [
+            {"type": "data-subjects", "count": db.query(VVTLibDataSubjectDB).count()},
+            {"type": "data-categories", "count": db.query(VVTLibDataCategoryDB).count()},
+            {"type": "recipients", "count": db.query(VVTLibRecipientDB).count()},
+            {"type": "legal-bases", "count": db.query(VVTLibLegalBasisDB).count()},
+            {"type": "retention-rules", "count": db.query(VVTLibRetentionRuleDB).count()},
+            {"type": "transfer-mechanisms", "count": db.query(VVTLibTransferMechanismDB).count()},
+            {"type": "purposes", "count": db.query(VVTLibPurposeDB).count()},
+            {"type": "toms", "count": db.query(VVTLibTomDB).count()},
+        ]
+    }
+
+
+# ============================================================================
+# Data Subjects
+# ============================================================================
+
+@router.get("/libraries/data-subjects")
+async def list_data_subjects(
+    typical_for: Optional[str] = Query(None, description="Filter by business function"),
+    db: Session = Depends(get_db),
+):
+    query = db.query(VVTLibDataSubjectDB).order_by(VVTLibDataSubjectDB.sort_order)
+    rows = query.all()
+    items = [_row_to_dict(r, ["art9_relevant", "typical_for"]) for r in rows]
+    if typical_for:
+        items = [i for i in items if typical_for in (i.get("typical_for") or [])]
+    return items
+
+
+# ============================================================================
+# Data Categories (hierarchical)
+# ============================================================================
+
+@router.get("/libraries/data-categories")
+async def list_data_categories(
+    flat: Optional[bool] = Query(False, description="Return flat list instead of tree"),
+    parent_id: Optional[str] = Query(None),
+    is_art9: Optional[bool] = Query(None),
+    db: Session = Depends(get_db),
+):
+    query = db.query(VVTLibDataCategoryDB).order_by(VVTLibDataCategoryDB.sort_order)
+    if parent_id is not None:
+        query = query.filter(VVTLibDataCategoryDB.parent_id == parent_id)
+    if is_art9 is not None:
+        query = query.filter(VVTLibDataCategoryDB.is_art9 == is_art9)
+    rows = query.all()
+
+    extra = ["parent_id", "is_art9", "is_art10", "risk_weight", "default_retention_rule", "default_legal_basis"]
+    items = [_row_to_dict(r, extra) for r in rows]
+
+    if flat or parent_id is not None or is_art9 is not None:
+        return items
+
+    # Build tree
+    by_parent: dict = {}
+    for item in items:
+        pid = item.get("parent_id")
+        by_parent.setdefault(pid, []).append(item)
+
+    tree = []
+    for item in by_parent.get(None, []):
+        children = by_parent.get(item["id"], [])
+        if children:
+            item["children"] = children
+        tree.append(item)
+    return tree
+
+
+# ============================================================================
+# Recipients
+# ============================================================================
+
+@router.get("/libraries/recipients")
+async def list_recipients(
+    type: Optional[str] = Query(None, description="INTERNAL, PROCESSOR, CONTROLLER, AUTHORITY"),
+    db: Session = Depends(get_db),
+):
+    query = db.query(VVTLibRecipientDB).order_by(VVTLibRecipientDB.sort_order)
+    if type:
+        query = query.filter(VVTLibRecipientDB.type == type)
+    rows = query.all()
+    return [_row_to_dict(r, ["type", "is_third_country", "country"]) for r in rows]
+
+
+# ============================================================================
+# Legal Bases
+# ============================================================================
+
+@router.get("/libraries/legal-bases")
+async def list_legal_bases(
+    is_art9: Optional[bool] = Query(None),
+    type: Optional[str] = Query(None),
+    db: Session = Depends(get_db),
+):
+    query = db.query(VVTLibLegalBasisDB).order_by(VVTLibLegalBasisDB.sort_order)
+    if is_art9 is not None:
+        query = query.filter(VVTLibLegalBasisDB.is_art9 == is_art9)
+    if type:
+        query = query.filter(VVTLibLegalBasisDB.type == type)
+    rows = query.all()
+    return [_row_to_dict(r, ["article", "type", "is_art9", "typical_national_law"]) for r in rows]
+
+
+# ============================================================================
+# Retention Rules
+# ============================================================================
+
+@router.get("/libraries/retention-rules")
+async def list_retention_rules(db: Session = Depends(get_db)):
+    rows = db.query(VVTLibRetentionRuleDB).order_by(VVTLibRetentionRuleDB.sort_order).all()
+    return [_row_to_dict(r, ["legal_basis", "duration", "duration_unit", "start_event", "deletion_procedure"]) for r in rows]
+
+
+# ============================================================================
+# Transfer Mechanisms
+# ============================================================================
+
+@router.get("/libraries/transfer-mechanisms")
+async def list_transfer_mechanisms(db: Session = Depends(get_db)):
+    rows = db.query(VVTLibTransferMechanismDB).order_by(VVTLibTransferMechanismDB.sort_order).all()
+    return [_row_to_dict(r, ["article", "requires_tia"]) for r in rows]
+
+
+# ============================================================================
+# Purposes
+# ============================================================================
+
+@router.get("/libraries/purposes")
+async def list_purposes(
+    typical_for: Optional[str] = Query(None),
+    db: Session = Depends(get_db),
+):
+    rows = db.query(VVTLibPurposeDB).order_by(VVTLibPurposeDB.sort_order).all()
+    items = [_row_to_dict(r, ["typical_legal_basis", "typical_for"]) for r in rows]
+    if typical_for:
+        items = [i for i in items if typical_for in (i.get("typical_for") or [])]
+    return items
+
+
+# ============================================================================
+# TOMs
+# ============================================================================
+
+@router.get("/libraries/toms")
+async def list_toms(
+    category: Optional[str] = Query(None),
+    db: Session = Depends(get_db),
+):
+    query = db.query(VVTLibTomDB).order_by(VVTLibTomDB.sort_order)
+    if category:
+        query = query.filter(VVTLibTomDB.category == category)
+    rows = query.all()
+    return [_row_to_dict(r, ["category", "art32_reference"]) for r in rows]
+
+
+# ============================================================================
+# Process Templates
+# ============================================================================
+
+def _template_to_dict(t: VVTProcessTemplateDB) -> dict:
+    return {
+        "id": t.id,
+        "name": t.name,
+        "description": t.description,
+        "business_function": t.business_function,
+        "purpose_refs": t.purpose_refs or [],
+        "legal_basis_refs": t.legal_basis_refs or [],
+        "data_subject_refs": t.data_subject_refs or [],
+        "data_category_refs": t.data_category_refs or [],
+        "recipient_refs": t.recipient_refs or [],
+        "tom_refs": t.tom_refs or [],
+        "transfer_mechanism_refs": t.transfer_mechanism_refs or [],
+        "retention_rule_ref": t.retention_rule_ref,
+        "typical_systems": t.typical_systems or [],
+        "protection_level": t.protection_level or "MEDIUM",
+        "dpia_required": t.dpia_required or False,
+        "risk_score": t.risk_score,
+        "tags": t.tags or [],
+        "is_system": t.is_system,
+        "sort_order": t.sort_order,
+    }
+
+
+def _resolve_labels(template_dict: dict, db: Session) -> dict:
+    """Resolve library IDs to labels within the template dict."""
+    resolvers = {
+        "purpose_refs": (VVTLibPurposeDB, "purpose_labels"),
+        "legal_basis_refs": (VVTLibLegalBasisDB, "legal_basis_labels"),
+        "data_subject_refs": (VVTLibDataSubjectDB, "data_subject_labels"),
+        "data_category_refs": (VVTLibDataCategoryDB, "data_category_labels"),
+        "recipient_refs": (VVTLibRecipientDB, "recipient_labels"),
+        "tom_refs": (VVTLibTomDB, "tom_labels"),
+        "transfer_mechanism_refs": (VVTLibTransferMechanismDB, "transfer_mechanism_labels"),
+    }
+    for refs_key, (model, labels_key) in resolvers.items():
+        ids = template_dict.get(refs_key) or []
+        if ids:
+            rows = db.query(model).filter(model.id.in_(ids)).all()
+            label_map = {r.id: r.label_de for r in rows}
+            template_dict[labels_key] = {rid: label_map.get(rid, rid) for rid in ids}
+
+    # Resolve single retention rule
+    rr = template_dict.get("retention_rule_ref")
+    if rr:
+        row = db.query(VVTLibRetentionRuleDB).filter(VVTLibRetentionRuleDB.id == rr).first()
+        if row:
+            template_dict["retention_rule_label"] = row.label_de
+
+    return template_dict
+
+
+@router.get("/templates")
+async def list_templates(
+    business_function: Optional[str] = Query(None),
+    search: Optional[str] = Query(None),
+    db: Session = Depends(get_db),
+):
+    """List process templates (system + tenant)."""
+    query = db.query(VVTProcessTemplateDB).order_by(VVTProcessTemplateDB.sort_order)
+    if business_function:
+        query = query.filter(VVTProcessTemplateDB.business_function == business_function)
+    if search:
+        term = f"%{search}%"
+        query = query.filter(
+            (VVTProcessTemplateDB.name.ilike(term)) |
+            (VVTProcessTemplateDB.description.ilike(term))
+        )
+    templates = query.all()
+    return [_template_to_dict(t) for t in templates]
+
+
+@router.get("/templates/{template_id}")
+async def get_template(
+    template_id: str,
+    db: Session = Depends(get_db),
+):
+    """Get a single template with resolved library labels."""
+    t = db.query(VVTProcessTemplateDB).filter(VVTProcessTemplateDB.id == template_id).first()
+    if not t:
+        raise HTTPException(status_code=404, detail=f"Template '{template_id}' not found")
+    result = _template_to_dict(t)
+    return _resolve_labels(result, db)
+
+
+@router.post("/templates/{template_id}/instantiate", status_code=201)
+async def instantiate_template(
+    template_id: str,
+    http_request: Request,
+    tid: str = Depends(get_tenant_id),
+    db: Session = Depends(get_db),
+):
+    """Create a new VVT activity from a process template."""
+    t = db.query(VVTProcessTemplateDB).filter(VVTProcessTemplateDB.id == template_id).first()
+    if not t:
+        raise HTTPException(status_code=404, detail=f"Template '{template_id}' not found")
+
+    # Generate unique VVT-ID
+    count = db.query(VVTActivityDB).filter(VVTActivityDB.tenant_id == tid).count()
+    vvt_id = f"VVT-{count + 1:04d}"
+
+    # Resolve library IDs to freetext labels for backward-compat fields
+    purpose_labels = _resolve_ids(db, VVTLibPurposeDB, t.purpose_refs or [])
+    legal_labels = _resolve_ids(db, VVTLibLegalBasisDB, t.legal_basis_refs or [])
+    subject_labels = _resolve_ids(db, VVTLibDataSubjectDB, t.data_subject_refs or [])
+    category_labels = _resolve_ids(db, VVTLibDataCategoryDB, t.data_category_refs or [])
+    recipient_labels = _resolve_ids(db, VVTLibRecipientDB, t.recipient_refs or [])
+
+    # Resolve retention rule
+    retention_period = {}
+    if t.retention_rule_ref:
+        rr = db.query(VVTLibRetentionRuleDB).filter(VVTLibRetentionRuleDB.id == t.retention_rule_ref).first()
+        if rr:
+            retention_period = {
+                "description": rr.label_de,
+                "legalBasis": rr.legal_basis or "",
+                "deletionProcedure": rr.deletion_procedure or "",
+                "duration": rr.duration,
+                "durationUnit": rr.duration_unit,
+            }
+
+    # Build structured TOMs from tom_refs
+    structured_toms = {"accessControl": [], "confidentiality": [], "integrity": [], "availability": [], "separation": []}
+    if t.tom_refs:
+        tom_rows = db.query(VVTLibTomDB).filter(VVTLibTomDB.id.in_(t.tom_refs)).all()
+        for tr in tom_rows:
+            cat = tr.category
+            if cat in structured_toms:
+                structured_toms[cat].append(tr.label_de)
+
+    act = VVTActivityDB(
+        tenant_id=tid,
+        vvt_id=vvt_id,
+        name=t.name,
+        description=t.description or "",
+        purposes=purpose_labels,
+        legal_bases=[{"type": lid, "description": lbl} for lid, lbl in zip(t.legal_basis_refs or [], legal_labels)],
+        data_subject_categories=subject_labels,
+        personal_data_categories=category_labels,
+        recipient_categories=[{"type": "unknown", "name": lbl} for lbl in recipient_labels],
+        retention_period=retention_period,
+        business_function=t.business_function,
+        systems=[{"systemId": s, "name": s} for s in (t.typical_systems or [])],
+        protection_level=t.protection_level or "MEDIUM",
+        dpia_required=t.dpia_required or False,
+        structured_toms=structured_toms,
+        status="DRAFT",
+        created_by=http_request.headers.get("X-User-ID", "system"),
+        # Library refs
+        purpose_refs=t.purpose_refs,
+        legal_basis_refs=t.legal_basis_refs,
+        data_subject_refs=t.data_subject_refs,
+        data_category_refs=t.data_category_refs,
+        recipient_refs=t.recipient_refs,
+        retention_rule_ref=t.retention_rule_ref,
+        transfer_mechanism_refs=t.transfer_mechanism_refs,
+        tom_refs=t.tom_refs,
+        source_template_id=t.id,
+        risk_score=t.risk_score,
+    )
+    db.add(act)
+    db.flush()
+
+    # Audit log
+    audit = VVTAuditLogDB(
+        tenant_id=tid,
+        action="CREATE",
+        entity_type="activity",
+        entity_id=act.id,
+        changed_by=http_request.headers.get("X-User-ID", "system"),
+        new_values={"vvt_id": vvt_id, "source_template_id": t.id, "name": t.name},
+    )
+    db.add(audit)
+    db.commit()
+    db.refresh(act)
+
+    # Return full response
+    from .vvt_routes import _activity_to_response
+    return _activity_to_response(act)
+
+
+def _resolve_ids(db: Session, model, ids: list) -> list:
+    """Resolve list of library IDs to list of label_de strings."""
+    if not ids:
+        return []
+    rows = db.query(model).filter(model.id.in_(ids)).all()
+    label_map = {r.id: r.label_de for r in rows}
+    return [label_map.get(i, i) for i in ids]
--- a/backend-compliance/compliance/api/vvt_routes.py
+++ b/backend-compliance/compliance/api/vvt_routes.py
@@ -81,6 +81,54 @@ async def upsert_organization(
 # Activities
 # ============================================================================

+def _activity_to_response(act: VVTActivityDB) -> VVTActivityResponse:
+    return VVTActivityResponse(
+        id=str(act.id),
+        vvt_id=act.vvt_id,
+        name=act.name,
+        description=act.description,
+        purposes=act.purposes or [],
+        legal_bases=act.legal_bases or [],
+        data_subject_categories=act.data_subject_categories or [],
+        personal_data_categories=act.personal_data_categories or [],
+        recipient_categories=act.recipient_categories or [],
+        third_country_transfers=act.third_country_transfers or [],
+        retention_period=act.retention_period or {},
+        tom_description=act.tom_description,
+        business_function=act.business_function,
+        systems=act.systems or [],
+        deployment_model=act.deployment_model,
+        data_sources=act.data_sources or [],
+        data_flows=act.data_flows or [],
+        protection_level=act.protection_level or 'MEDIUM',
+        dpia_required=act.dpia_required or False,
+        structured_toms=act.structured_toms or {},
+        status=act.status or 'DRAFT',
+        responsible=act.responsible,
+        owner=act.owner,
+        last_reviewed_at=act.last_reviewed_at,
+        next_review_at=act.next_review_at,
+        created_by=act.created_by,
+        dsfa_id=str(act.dsfa_id) if act.dsfa_id else None,
+        # Library refs
+        purpose_refs=act.purpose_refs,
+        legal_basis_refs=act.legal_basis_refs,
+        data_subject_refs=act.data_subject_refs,
+        data_category_refs=act.data_category_refs,
+        recipient_refs=act.recipient_refs,
+        retention_rule_ref=act.retention_rule_ref,
+        transfer_mechanism_refs=act.transfer_mechanism_refs,
+        tom_refs=act.tom_refs,
+        source_template_id=act.source_template_id,
+        risk_score=act.risk_score,
+        linked_loeschfristen_ids=act.linked_loeschfristen_ids,
+        linked_tom_measure_ids=act.linked_tom_measure_ids,
+        art30_completeness=act.art30_completeness,
+        created_at=act.created_at,
+        updated_at=act.updated_at,
+    )
+
+
@router.get("/activities", response_model=List[VVTActivityResponse])
 async def list_activities(
    status: Optional[str] = Query(None),
@@ -145,6 +193,107 @@ async def delete_activity(
        return service.delete_activity(tid, activity_id)


+# ============================================================================
+# Art. 30 Completeness Check
+# ============================================================================
+
+@router.get("/activities/{activity_id}/completeness")
+async def get_activity_completeness(
+    activity_id: str,
+    tid: str = Depends(get_tenant_id),
+    db: Session = Depends(get_db),
+):
+    """Calculate Art. 30 completeness score for a VVT activity."""
+    act = db.query(VVTActivityDB).filter(
+        VVTActivityDB.id == activity_id,
+        VVTActivityDB.tenant_id == tid,
+    ).first()
+    if not act:
+        raise HTTPException(status_code=404, detail=f"Activity {activity_id} not found")
+    return _calculate_completeness(act)
+
+
+def _calculate_completeness(act: VVTActivityDB) -> dict:
+    """Calculate Art. 30 completeness — required fields per DSGVO Art. 30 Abs. 1."""
+    missing = []
+    warnings = []
+    total_checks = 10
+    passed = 0
+
+    # 1. Name/Zweck
+    if act.name:
+        passed += 1
+    else:
+        missing.append("name")
+
+    # 2. Verarbeitungszwecke
+    has_purposes = bool(act.purposes) or bool(act.purpose_refs)
+    if has_purposes:
+        passed += 1
+    else:
+        missing.append("purposes")
+
+    # 3. Rechtsgrundlage
+    has_legal = bool(act.legal_bases) or bool(act.legal_basis_refs)
+    if has_legal:
+        passed += 1
+    else:
+        missing.append("legal_bases")
+
+    # 4. Betroffenenkategorien
+    has_subjects = bool(act.data_subject_categories) or bool(act.data_subject_refs)
+    if has_subjects:
+        passed += 1
+    else:
+        missing.append("data_subjects")
+
+    # 5. Datenkategorien
+    has_categories = bool(act.personal_data_categories) or bool(act.data_category_refs)
+    if has_categories:
+        passed += 1
+    else:
+        missing.append("data_categories")
+
+    # 6. Empfaenger
+    has_recipients = bool(act.recipient_categories) or bool(act.recipient_refs)
+    if has_recipients:
+        passed += 1
+    else:
+        missing.append("recipients")
+
+    # 7. Drittland-Uebermittlung (checked but not strictly required)
+    passed += 1  # always passes — no transfer is valid state
+
+    # 8. Loeschfristen
+    has_retention = bool(act.retention_period and act.retention_period.get('description')) or bool(act.retention_rule_ref)
+    if has_retention:
+        passed += 1
+    else:
+        missing.append("retention_period")
+
+    # 9. TOM-Beschreibung
+    has_tom = bool(act.tom_description) or bool(act.tom_refs) or bool(act.structured_toms)
+    if has_tom:
+        passed += 1
+    else:
+        missing.append("tom_description")
+
+    # 10. Verantwortlicher
+    if act.responsible:
+        passed += 1
+    else:
+        missing.append("responsible")
+
+    # Warnings
+    if act.dpia_required and not act.dsfa_id:
+        warnings.append("dpia_required_but_no_dsfa_linked")
+    if act.third_country_transfers and not act.transfer_mechanism_refs:
+        warnings.append("third_country_transfer_without_mechanism")
+
+    score = int((passed / total_checks) * 100)
+    return {"score": score, "missing": missing, "warnings": warnings, "passed": passed, "total": total_checks}
+
+
 # ============================================================================
 # Audit Log
 # ============================================================================
--- a/backend-compliance/compliance/data/frameworks/init.py
+++ b/backend-compliance/compliance/data/frameworks/init.py
--- a/backend-compliance/compliance/data/frameworks/csa_ccm.json
+++ b/backend-compliance/compliance/data/frameworks/csa_ccm.json
@@ -0,0 +1,443 @@
+{
+  "framework_id": "CSA_CCM",
+  "display_name": "Cloud Security Alliance CCM v4",
+  "license": {
+    "type": "restricted",
+    "rag_allowed": false,
+    "use_as_metadata": true,
+    "note": "Abstrahierte Struktur — keine Originaltexte uebernommen"
+  },
+  "domains": [
+    {
+      "domain_id": "AIS",
+      "title": "Application and Interface Security",
+      "aliases": ["ais", "application and interface security", "anwendungssicherheit", "schnittstellensicherheit"],
+      "keywords": ["application", "anwendung", "interface", "schnittstelle", "api", "web", "eingabevalidierung"],
+      "subcontrols": [
+        {
+          "subcontrol_id": "AIS-01",
+          "title": "Application Security Policy",
+          "statement": "Sicherheitsrichtlinien fuer Anwendungsentwicklung und Schnittstellenmanagement muessen definiert und angewendet werden.",
+          "keywords": ["policy", "richtlinie", "entwicklung"],
+          "action_hint": "document",
+          "object_hint": "Anwendungssicherheitsrichtlinie",
+          "object_class": "policy"
+        },
+        {
+          "subcontrol_id": "AIS-02",
+          "title": "Application Security Design",
+          "statement": "Sicherheitsanforderungen muessen in den Entwurf jeder Anwendung integriert werden.",
+          "keywords": ["design", "entwurf", "security by design"],
+          "action_hint": "implement",
+          "object_hint": "Sicherheitsanforderungen im Anwendungsentwurf",
+          "object_class": "process"
+        },
+        {
+          "subcontrol_id": "AIS-03",
+          "title": "Application Security Testing",
+          "statement": "Anwendungen muessen vor dem Deployment und regelmaessig auf Sicherheitsschwachstellen getestet werden.",
+          "keywords": ["testing", "test", "sast", "dast", "penetration"],
+          "action_hint": "test",
+          "object_hint": "Anwendungssicherheitstests",
+          "object_class": "process"
+        },
+        {
+          "subcontrol_id": "AIS-04",
+          "title": "Secure Development Practices",
+          "statement": "Sichere Entwicklungspraktiken (Code Review, Pair Programming, SAST) muessen fuer alle Entwicklungsprojekte gelten.",
+          "keywords": ["development", "entwicklung", "code review", "sast", "praktiken"],
+          "action_hint": "implement",
+          "object_hint": "Sichere Entwicklungspraktiken",
+          "object_class": "process"
+        },
+        {
+          "subcontrol_id": "AIS-05",
+          "title": "API Security",
+          "statement": "APIs muessen authentifiziert, autorisiert und gegen Missbrauch geschuetzt werden.",
+          "keywords": ["api", "schnittstelle", "authentifizierung", "rate limiting"],
+          "action_hint": "implement",
+          "object_hint": "API-Sicherheitskontrollen",
+          "object_class": "interface"
+        },
+        {
+          "subcontrol_id": "AIS-06",
+          "title": "Automated Application Security Testing",
+          "statement": "Automatisierte Sicherheitstests muessen in die CI/CD-Pipeline integriert werden.",
+          "keywords": ["automatisiert", "ci/cd", "pipeline", "sast", "dast"],
+          "action_hint": "configure",
+          "object_hint": "Automatisierte Sicherheitstests in CI/CD",
+          "object_class": "configuration"
+        }
+      ]
+    },
+    {
+      "domain_id": "BCR",
+      "title": "Business Continuity and Resilience",
+      "aliases": ["bcr", "business continuity", "resilience", "geschaeftskontinuitaet", "resilienz"],
+      "keywords": ["continuity", "kontinuitaet", "resilience", "resilienz", "disaster", "recovery", "backup"],
+      "subcontrols": [
+        {
+          "subcontrol_id": "BCR-01",
+          "title": "Business Continuity Planning",
+          "statement": "Ein Geschaeftskontinuitaetsplan muss erstellt, dokumentiert und regelmaessig getestet werden.",
+          "keywords": ["plan", "kontinuitaet", "geschaeft"],
+          "action_hint": "document",
+          "object_hint": "Geschaeftskontinuitaetsplan",
+          "object_class": "policy"
+        },
+        {
+          "subcontrol_id": "BCR-02",
+          "title": "Risk Assessment for BCM",
+          "statement": "Risikobewertungen muessen fuer geschaeftskritische Prozesse durchgefuehrt werden.",
+          "keywords": ["risiko", "bewertung", "kritisch"],
+          "action_hint": "assess",
+          "object_hint": "BCM-Risikobewertung",
+          "object_class": "risk_artifact"
+        },
+        {
+          "subcontrol_id": "BCR-03",
+          "title": "Backup and Recovery",
+          "statement": "Datensicherungen muessen regelmaessig erstellt und Wiederherstellungstests durchgefuehrt werden.",
+          "keywords": ["backup", "sicherung", "wiederherstellung", "recovery"],
+          "action_hint": "maintain",
+          "object_hint": "Datensicherung und Wiederherstellung",
+          "object_class": "technical_control"
+        },
+        {
+          "subcontrol_id": "BCR-04",
+          "title": "Disaster Recovery Planning",
+          "statement": "Ein Disaster-Recovery-Plan muss dokumentiert und jaehrlich getestet werden.",
+          "keywords": ["disaster", "recovery", "katastrophe"],
+          "action_hint": "document",
+          "object_hint": "Disaster-Recovery-Plan",
+          "object_class": "policy"
+        }
+      ]
+    },
+    {
+      "domain_id": "CCC",
+      "title": "Change Control and Configuration Management",
+      "aliases": ["ccc", "change control", "configuration management", "aenderungsmanagement", "konfigurationsmanagement"],
+      "keywords": ["change", "aenderung", "konfiguration", "configuration", "release", "deployment"],
+      "subcontrols": [
+        {
+          "subcontrol_id": "CCC-01",
+          "title": "Change Management Policy",
+          "statement": "Ein Aenderungsmanagement-Prozess muss definiert und fuer alle Aenderungen angewendet werden.",
+          "keywords": ["policy", "richtlinie", "aenderung"],
+          "action_hint": "document",
+          "object_hint": "Aenderungsmanagement-Richtlinie",
+          "object_class": "policy"
+        },
+        {
+          "subcontrol_id": "CCC-02",
+          "title": "Change Testing",
+          "statement": "Aenderungen muessen vor der Produktivsetzung getestet und genehmigt werden.",
+          "keywords": ["test", "genehmigung", "approval"],
+          "action_hint": "test",
+          "object_hint": "Aenderungstests",
+          "object_class": "process"
+        },
+        {
+          "subcontrol_id": "CCC-03",
+          "title": "Configuration Baseline",
+          "statement": "Basiskonfigurationen fuer alle Systeme muessen definiert und dokumentiert werden.",
+          "keywords": ["baseline", "basis", "standard"],
+          "action_hint": "define",
+          "object_hint": "Konfigurationsbaseline",
+          "object_class": "configuration"
+        }
+      ]
+    },
+    {
+      "domain_id": "CEK",
+      "title": "Cryptography, Encryption and Key Management",
+      "aliases": ["cek", "cryptography", "encryption", "key management", "kryptographie", "verschluesselung", "schluesselverwaltung"],
+      "keywords": ["kryptographie", "verschluesselung", "schluessel", "key", "encryption", "certificate", "zertifikat"],
+      "subcontrols": [
+        {
+          "subcontrol_id": "CEK-01",
+          "title": "Encryption Policy",
+          "statement": "Verschluesselungsrichtlinien muessen definiert werden, die Algorithmen, Schluessellaengen und Einsatzbereiche festlegen.",
+          "keywords": ["policy", "richtlinie", "algorithmus"],
+          "action_hint": "document",
+          "object_hint": "Verschluesselungsrichtlinie",
+          "object_class": "policy"
+        },
+        {
+          "subcontrol_id": "CEK-02",
+          "title": "Key Management",
+          "statement": "Kryptographische Schluessel muessen ueber ihren Lebenszyklus sicher verwaltet werden.",
+          "keywords": ["key", "schluessel", "management", "lebenszyklus"],
+          "action_hint": "maintain",
+          "object_hint": "Schluesselverwaltung",
+          "object_class": "cryptographic_control"
+        },
+        {
+          "subcontrol_id": "CEK-03",
+          "title": "Data Encryption",
+          "statement": "Sensible Daten muessen bei Speicherung und Uebertragung verschluesselt werden.",
+          "keywords": ["data", "daten", "speicherung", "uebertragung"],
+          "action_hint": "encrypt",
+          "object_hint": "Datenverschluesselung",
+          "object_class": "cryptographic_control"
+        }
+      ]
+    },
+    {
+      "domain_id": "DSP",
+      "title": "Data Security and Privacy",
+      "aliases": ["dsp", "data security", "privacy", "datensicherheit", "datenschutz"],
+      "keywords": ["datenschutz", "datensicherheit", "privacy", "data security", "pii", "personenbezogen", "dsgvo"],
+      "subcontrols": [
+        {
+          "subcontrol_id": "DSP-01",
+          "title": "Data Classification",
+          "statement": "Daten muessen nach Sensibilitaet klassifiziert und entsprechend geschuetzt werden.",
+          "keywords": ["klassifizierung", "sensibilitaet", "classification"],
+          "action_hint": "define",
+          "object_hint": "Datenklassifizierung",
+          "object_class": "data"
+        },
+        {
+          "subcontrol_id": "DSP-02",
+          "title": "Data Inventory",
+          "statement": "Ein Dateninventar muss gefuehrt werden, das alle Verarbeitungen personenbezogener Daten dokumentiert.",
+          "keywords": ["inventar", "verzeichnis", "verarbeitung", "vvt"],
+          "action_hint": "maintain",
+          "object_hint": "Dateninventar",
+          "object_class": "register"
+        },
+        {
+          "subcontrol_id": "DSP-03",
+          "title": "Data Retention and Deletion",
+          "statement": "Aufbewahrungsfristen muessen definiert und Daten nach Ablauf sicher geloescht werden.",
+          "keywords": ["retention", "aufbewahrung", "loeschung", "frist"],
+          "action_hint": "delete",
+          "object_hint": "Datenloeschung nach Frist",
+          "object_class": "data"
+        },
+        {
+          "subcontrol_id": "DSP-04",
+          "title": "Privacy Impact Assessment",
+          "statement": "Datenschutz-Folgenabschaetzungen muessen fuer risikoreiche Verarbeitungen durchgefuehrt werden.",
+          "keywords": ["dsfa", "pia", "folgenabschaetzung", "impact"],
+          "action_hint": "assess",
+          "object_hint": "Datenschutz-Folgenabschaetzung",
+          "object_class": "risk_artifact"
+        },
+        {
+          "subcontrol_id": "DSP-05",
+          "title": "Data Subject Rights",
+          "statement": "Verfahren zur Bearbeitung von Betroffenenrechten muessen implementiert werden.",
+          "keywords": ["betroffenenrechte", "auskunft", "loeschung", "data subject"],
+          "action_hint": "implement",
+          "object_hint": "Betroffenenrechte-Verfahren",
+          "object_class": "process"
+        }
+      ]
+    },
+    {
+      "domain_id": "GRC",
+      "title": "Governance, Risk and Compliance",
+      "aliases": ["grc", "governance", "risk", "compliance", "risikomanagement"],
+      "keywords": ["governance", "risiko", "compliance", "management", "policy", "richtlinie"],
+      "subcontrols": [
+        {
+          "subcontrol_id": "GRC-01",
+          "title": "Information Security Program",
+          "statement": "Ein umfassendes Informationssicherheitsprogramm muss etabliert und aufrechterhalten werden.",
+          "keywords": ["programm", "sicherheit", "information"],
+          "action_hint": "maintain",
+          "object_hint": "Informationssicherheitsprogramm",
+          "object_class": "policy"
+        },
+        {
+          "subcontrol_id": "GRC-02",
+          "title": "Risk Management Program",
+          "statement": "Ein Risikomanagement-Programm muss implementiert werden, das Identifikation, Bewertung und Behandlung umfasst.",
+          "keywords": ["risiko", "management", "bewertung", "behandlung"],
+          "action_hint": "implement",
+          "object_hint": "Risikomanagement-Programm",
+          "object_class": "process"
+        },
+        {
+          "subcontrol_id": "GRC-03",
+          "title": "Compliance Monitoring",
+          "statement": "Die Einhaltung regulatorischer und vertraglicher Anforderungen muss ueberwacht werden.",
+          "keywords": ["compliance", "einhaltung", "regulatorisch", "ueberwachung"],
+          "action_hint": "monitor",
+          "object_hint": "Compliance-Ueberwachung",
+          "object_class": "process"
+        }
+      ]
+    },
+    {
+      "domain_id": "IAM",
+      "title": "Identity and Access Management",
+      "aliases": ["iam", "identity", "access management", "identitaetsmanagement", "zugriffsverwaltung"],
+      "keywords": ["identitaet", "zugriff", "identity", "access", "authentifizierung", "autorisierung", "sso"],
+      "subcontrols": [
+        {
+          "subcontrol_id": "IAM-01",
+          "title": "Identity and Access Policy",
+          "statement": "Identitaets- und Zugriffsmanagement-Richtlinien muessen definiert werden.",
+          "keywords": ["policy", "richtlinie"],
+          "action_hint": "document",
+          "object_hint": "IAM-Richtlinie",
+          "object_class": "policy"
+        },
+        {
+          "subcontrol_id": "IAM-02",
+          "title": "Strong Authentication",
+          "statement": "Starke Authentifizierung (MFA) muss fuer administrative und sicherheitskritische Zugriffe gefordert werden.",
+          "keywords": ["mfa", "stark", "authentifizierung", "admin"],
+          "action_hint": "implement",
+          "object_hint": "Starke Authentifizierung",
+          "object_class": "technical_control"
+        },
+        {
+          "subcontrol_id": "IAM-03",
+          "title": "Identity Lifecycle Management",
+          "statement": "Identitaeten muessen ueber ihren gesamten Lebenszyklus verwaltet werden.",
+          "keywords": ["lifecycle", "lebenszyklus", "onboarding", "offboarding"],
+          "action_hint": "maintain",
+          "object_hint": "Identitaets-Lebenszyklus",
+          "object_class": "account"
+        },
+        {
+          "subcontrol_id": "IAM-04",
+          "title": "Access Review",
+          "statement": "Zugriffsrechte muessen regelmaessig ueberprueft und ueberschuessige Rechte entzogen werden.",
+          "keywords": ["review", "ueberpruefen", "rechte", "rezertifizierung"],
+          "action_hint": "review",
+          "object_hint": "Zugriffsrechte-Review",
+          "object_class": "access_control"
+        }
+      ]
+    },
+    {
+      "domain_id": "LOG",
+      "title": "Logging and Monitoring",
+      "aliases": ["log", "logging", "monitoring", "protokollierung", "ueberwachung"],
+      "keywords": ["logging", "monitoring", "protokollierung", "ueberwachung", "siem", "alarm"],
+      "subcontrols": [
+        {
+          "subcontrol_id": "LOG-01",
+          "title": "Logging Policy",
+          "statement": "Protokollierungs-Richtlinien muessen definiert werden, die Umfang und Aufbewahrung festlegen.",
+          "keywords": ["policy", "richtlinie", "umfang", "aufbewahrung"],
+          "action_hint": "document",
+          "object_hint": "Protokollierungsrichtlinie",
+          "object_class": "policy"
+        },
+        {
+          "subcontrol_id": "LOG-02",
+          "title": "Security Event Logging",
+          "statement": "Sicherheitsrelevante Ereignisse muessen erfasst und zentral gespeichert werden.",
+          "keywords": ["event", "ereignis", "sicherheit", "zentral"],
+          "action_hint": "configure",
+          "object_hint": "Sicherheits-Event-Logging",
+          "object_class": "configuration"
+        },
+        {
+          "subcontrol_id": "LOG-03",
+          "title": "Monitoring and Alerting",
+          "statement": "Sicherheitsrelevante Logs muessen ueberwacht und bei Anomalien Alarme ausgeloest werden.",
+          "keywords": ["monitoring", "alerting", "alarm", "anomalie"],
+          "action_hint": "monitor",
+          "object_hint": "Log-Ueberwachung und Alarmierung",
+          "object_class": "technical_control"
+        }
+      ]
+    },
+    {
+      "domain_id": "SEF",
+      "title": "Security Incident Management",
+      "aliases": ["sef", "security incident", "incident management", "vorfallmanagement", "sicherheitsvorfall"],
+      "keywords": ["vorfall", "incident", "sicherheitsvorfall", "reaktion", "response", "meldung"],
+      "subcontrols": [
+        {
+          "subcontrol_id": "SEF-01",
+          "title": "Incident Management Policy",
+          "statement": "Ein Vorfallmanagement-Prozess muss definiert, dokumentiert und getestet werden.",
+          "keywords": ["policy", "richtlinie", "prozess"],
+          "action_hint": "document",
+          "object_hint": "Vorfallmanagement-Richtlinie",
+          "object_class": "policy"
+        },
+        {
+          "subcontrol_id": "SEF-02",
+          "title": "Incident Response Team",
+          "statement": "Ein Incident-Response-Team muss benannt und geschult werden.",
+          "keywords": ["team", "response", "schulung"],
+          "action_hint": "define",
+          "object_hint": "Incident-Response-Team",
+          "object_class": "role"
+        },
+        {
+          "subcontrol_id": "SEF-03",
+          "title": "Incident Reporting",
+          "statement": "Sicherheitsvorfaelle muessen innerhalb definierter Fristen an zustaendige Stellen gemeldet werden.",
+          "keywords": ["reporting", "meldung", "frist", "behoerde"],
+          "action_hint": "report",
+          "object_hint": "Vorfallmeldung",
+          "object_class": "incident"
+        },
+        {
+          "subcontrol_id": "SEF-04",
+          "title": "Incident Lessons Learned",
+          "statement": "Nach jedem Vorfall muss eine Nachbereitung mit Lessons Learned durchgefuehrt werden.",
+          "keywords": ["lessons learned", "nachbereitung", "verbesserung"],
+          "action_hint": "review",
+          "object_hint": "Vorfall-Nachbereitung",
+          "object_class": "record"
+        }
+      ]
+    },
+    {
+      "domain_id": "TVM",
+      "title": "Threat and Vulnerability Management",
+      "aliases": ["tvm", "threat", "vulnerability", "schwachstelle", "bedrohung", "schwachstellenmanagement"],
+      "keywords": ["schwachstelle", "vulnerability", "threat", "bedrohung", "patch", "scan"],
+      "subcontrols": [
+        {
+          "subcontrol_id": "TVM-01",
+          "title": "Vulnerability Management Policy",
+          "statement": "Schwachstellenmanagement-Richtlinien muessen definiert und umgesetzt werden.",
+          "keywords": ["policy", "richtlinie"],
+          "action_hint": "document",
+          "object_hint": "Schwachstellenmanagement-Richtlinie",
+          "object_class": "policy"
+        },
+        {
+          "subcontrol_id": "TVM-02",
+          "title": "Vulnerability Scanning",
+          "statement": "Systeme muessen regelmaessig auf Schwachstellen gescannt werden.",
+          "keywords": ["scan", "scanning", "regelmaessig"],
+          "action_hint": "test",
+          "object_hint": "Schwachstellenscan",
+          "object_class": "system"
+        },
+        {
+          "subcontrol_id": "TVM-03",
+          "title": "Vulnerability Remediation",
+          "statement": "Erkannte Schwachstellen muessen priorisiert und innerhalb definierter Fristen behoben werden.",
+          "keywords": ["remediation", "behebung", "frist", "priorisierung"],
+          "action_hint": "remediate",
+          "object_hint": "Schwachstellenbehebung",
+          "object_class": "system"
+        },
+        {
+          "subcontrol_id": "TVM-04",
+          "title": "Penetration Testing",
+          "statement": "Regelmaessige Penetrationstests muessen durchgefuehrt werden.",
+          "keywords": ["penetration", "pentest", "test"],
+          "action_hint": "test",
+          "object_hint": "Penetrationstest",
+          "object_class": "system"
+        }
+      ]
+    }
+  ]
+}
--- a/backend-compliance/compliance/data/frameworks/nist_sp800_53.json
+++ b/backend-compliance/compliance/data/frameworks/nist_sp800_53.json
@@ -0,0 +1,514 @@
+{
+  "framework_id": "NIST_SP800_53",
+  "display_name": "NIST SP 800-53 Rev. 5",
+  "license": {
+    "type": "public_domain",
+    "rag_allowed": true,
+    "use_as_metadata": true
+  },
+  "domains": [
+    {
+      "domain_id": "AC",
+      "title": "Access Control",
+      "aliases": ["access control", "zugriffskontrolle", "zugriffssteuerung"],
+      "keywords": ["access", "zugriff", "berechtigung", "authorization", "autorisierung"],
+      "subcontrols": [
+        {
+          "subcontrol_id": "AC-1",
+          "title": "Access Control Policy and Procedures",
+          "statement": "Zugriffskontrollrichtlinien und -verfahren muessen definiert, dokumentiert und regelmaessig ueberprueft werden.",
+          "keywords": ["policy", "richtlinie", "verfahren", "procedures"],
+          "action_hint": "document",
+          "object_hint": "Zugriffskontrollrichtlinie",
+          "object_class": "policy"
+        },
+        {
+          "subcontrol_id": "AC-2",
+          "title": "Account Management",
+          "statement": "Benutzerkonten muessen ueber ihren gesamten Lebenszyklus verwaltet werden: Erstellung, Aktivierung, Aenderung, Deaktivierung und Loeschung.",
+          "keywords": ["account", "konto", "benutzer", "lifecycle", "lebenszyklus"],
+          "action_hint": "maintain",
+          "object_hint": "Benutzerkontenverwaltung",
+          "object_class": "account"
+        },
+        {
+          "subcontrol_id": "AC-3",
+          "title": "Access Enforcement",
+          "statement": "Der Zugriff auf Systemressourcen muss gemaess der definierten Zugriffskontrollrichtlinie durchgesetzt werden.",
+          "keywords": ["enforcement", "durchsetzung", "ressourcen", "system"],
+          "action_hint": "restrict_access",
+          "object_hint": "Zugriffsdurchsetzung",
+          "object_class": "access_control"
+        },
+        {
+          "subcontrol_id": "AC-5",
+          "title": "Separation of Duties",
+          "statement": "Aufgabentrennung muss definiert und durchgesetzt werden, um Interessenkonflikte und Missbrauch zu verhindern.",
+          "keywords": ["separation", "trennung", "duties", "aufgaben", "funktionstrennung"],
+          "action_hint": "define",
+          "object_hint": "Aufgabentrennung",
+          "object_class": "role"
+        },
+        {
+          "subcontrol_id": "AC-6",
+          "title": "Least Privilege",
+          "statement": "Zugriffsrechte muessen nach dem Prinzip der minimalen Rechte vergeben werden.",
+          "keywords": ["least privilege", "minimal", "rechte", "privileg"],
+          "action_hint": "restrict_access",
+          "object_hint": "Minimale Rechtevergabe",
+          "object_class": "access_control"
+        },
+        {
+          "subcontrol_id": "AC-7",
+          "title": "Unsuccessful Logon Attempts",
+          "statement": "Fehlgeschlagene Anmeldeversuche muessen begrenzt und ueberwacht werden.",
+          "keywords": ["logon", "anmeldung", "fehlgeschlagen", "sperre", "lockout"],
+          "action_hint": "monitor",
+          "object_hint": "Anmeldeversuchsueberwachung",
+          "object_class": "technical_control"
+        },
+        {
+          "subcontrol_id": "AC-17",
+          "title": "Remote Access",
+          "statement": "Fernzugriff muss autorisiert, ueberwacht und verschluesselt werden.",
+          "keywords": ["remote", "fern", "vpn", "fernzugriff"],
+          "action_hint": "configure",
+          "object_hint": "Fernzugriffskonfiguration",
+          "object_class": "technical_control"
+        }
+      ]
+    },
+    {
+      "domain_id": "AU",
+      "title": "Audit and Accountability",
+      "aliases": ["audit", "protokollierung", "accountability", "rechenschaftspflicht"],
+      "keywords": ["audit", "log", "protokoll", "nachvollziehbarkeit", "logging"],
+      "subcontrols": [
+        {
+          "subcontrol_id": "AU-1",
+          "title": "Audit Policy and Procedures",
+          "statement": "Audit- und Protokollierungsrichtlinien muessen definiert und regelmaessig ueberprueft werden.",
+          "keywords": ["policy", "richtlinie", "audit"],
+          "action_hint": "document",
+          "object_hint": "Auditrichtlinie",
+          "object_class": "policy"
+        },
+        {
+          "subcontrol_id": "AU-2",
+          "title": "Event Logging",
+          "statement": "Sicherheitsrelevante Ereignisse muessen identifiziert und protokolliert werden.",
+          "keywords": ["event", "ereignis", "logging", "protokollierung"],
+          "action_hint": "configure",
+          "object_hint": "Ereignisprotokollierung",
+          "object_class": "configuration"
+        },
+        {
+          "subcontrol_id": "AU-3",
+          "title": "Content of Audit Records",
+          "statement": "Audit-Eintraege muessen ausreichende Informationen enthalten: Zeitstempel, Quelle, Ergebnis, Identitaet.",
+          "keywords": ["content", "inhalt", "record", "eintrag"],
+          "action_hint": "define",
+          "object_hint": "Audit-Eintragsformat",
+          "object_class": "record"
+        },
+        {
+          "subcontrol_id": "AU-6",
+          "title": "Audit Record Review and Reporting",
+          "statement": "Audit-Eintraege muessen regelmaessig ueberprueft und bei Anomalien berichtet werden.",
+          "keywords": ["review", "ueberpruefen", "reporting", "anomalie"],
+          "action_hint": "review",
+          "object_hint": "Audit-Ueberpruefung",
+          "object_class": "record"
+        },
+        {
+          "subcontrol_id": "AU-9",
+          "title": "Protection of Audit Information",
+          "statement": "Audit-Daten muessen vor unbefugtem Zugriff, Aenderung und Loeschung geschuetzt werden.",
+          "keywords": ["schutz", "protection", "integritaet", "integrity"],
+          "action_hint": "implement",
+          "object_hint": "Audit-Datenschutz",
+          "object_class": "technical_control"
+        }
+      ]
+    },
+    {
+      "domain_id": "AT",
+      "title": "Awareness and Training",
+      "aliases": ["awareness", "training", "schulung", "sensibilisierung"],
+      "keywords": ["training", "schulung", "awareness", "sensibilisierung", "weiterbildung"],
+      "subcontrols": [
+        {
+          "subcontrol_id": "AT-1",
+          "title": "Policy and Procedures",
+          "statement": "Schulungs- und Sensibilisierungsrichtlinien muessen definiert und regelmaessig aktualisiert werden.",
+          "keywords": ["policy", "richtlinie"],
+          "action_hint": "document",
+          "object_hint": "Schulungsrichtlinie",
+          "object_class": "policy"
+        },
+        {
+          "subcontrol_id": "AT-2",
+          "title": "Literacy Training and Awareness",
+          "statement": "Alle Mitarbeiter muessen regelmaessig Sicherheitsschulungen erhalten.",
+          "keywords": ["mitarbeiter", "schulung", "sicherheit"],
+          "action_hint": "train",
+          "object_hint": "Sicherheitsschulung",
+          "object_class": "training"
+        },
+        {
+          "subcontrol_id": "AT-3",
+          "title": "Role-Based Training",
+          "statement": "Rollenbasierte Sicherheitsschulungen muessen fuer Mitarbeiter mit besonderen Sicherheitsaufgaben durchgefuehrt werden.",
+          "keywords": ["rollenbasiert", "role-based", "speziell"],
+          "action_hint": "train",
+          "object_hint": "Rollenbasierte Sicherheitsschulung",
+          "object_class": "training"
+        }
+      ]
+    },
+    {
+      "domain_id": "CM",
+      "title": "Configuration Management",
+      "aliases": ["configuration management", "konfigurationsmanagement", "konfiguration"],
+      "keywords": ["konfiguration", "configuration", "baseline", "haertung", "hardening"],
+      "subcontrols": [
+        {
+          "subcontrol_id": "CM-1",
+          "title": "Policy and Procedures",
+          "statement": "Konfigurationsmanagement-Richtlinien muessen dokumentiert und gepflegt werden.",
+          "keywords": ["policy", "richtlinie"],
+          "action_hint": "document",
+          "object_hint": "Konfigurationsmanagement-Richtlinie",
+          "object_class": "policy"
+        },
+        {
+          "subcontrol_id": "CM-2",
+          "title": "Baseline Configuration",
+          "statement": "Basiskonfigurationen fuer Systeme muessen definiert, dokumentiert und gepflegt werden.",
+          "keywords": ["baseline", "basis", "standard"],
+          "action_hint": "define",
+          "object_hint": "Basiskonfiguration",
+          "object_class": "configuration"
+        },
+        {
+          "subcontrol_id": "CM-6",
+          "title": "Configuration Settings",
+          "statement": "Sicherheitsrelevante Konfigurationseinstellungen muessen definiert und durchgesetzt werden.",
+          "keywords": ["settings", "einstellungen", "sicherheit"],
+          "action_hint": "configure",
+          "object_hint": "Sicherheitskonfiguration",
+          "object_class": "configuration"
+        },
+        {
+          "subcontrol_id": "CM-7",
+          "title": "Least Functionality",
+          "statement": "Systeme muessen so konfiguriert werden, dass nur notwendige Funktionen aktiv sind.",
+          "keywords": ["least functionality", "minimal", "dienste", "ports"],
+          "action_hint": "configure",
+          "object_hint": "Minimalkonfiguration",
+          "object_class": "configuration"
+        },
+        {
+          "subcontrol_id": "CM-8",
+          "title": "System Component Inventory",
+          "statement": "Ein Inventar aller Systemkomponenten muss gefuehrt und aktuell gehalten werden.",
+          "keywords": ["inventar", "inventory", "komponenten", "assets"],
+          "action_hint": "maintain",
+          "object_hint": "Systemkomponenten-Inventar",
+          "object_class": "register"
+        }
+      ]
+    },
+    {
+      "domain_id": "IA",
+      "title": "Identification and Authentication",
+      "aliases": ["identification", "authentication", "identifikation", "authentifizierung"],
+      "keywords": ["authentifizierung", "identifikation", "identity", "passwort", "mfa", "credential"],
+      "subcontrols": [
+        {
+          "subcontrol_id": "IA-1",
+          "title": "Policy and Procedures",
+          "statement": "Identifikations- und Authentifizierungsrichtlinien muessen dokumentiert und regelmaessig ueberprueft werden.",
+          "keywords": ["policy", "richtlinie"],
+          "action_hint": "document",
+          "object_hint": "Authentifizierungsrichtlinie",
+          "object_class": "policy"
+        },
+        {
+          "subcontrol_id": "IA-2",
+          "title": "Identification and Authentication",
+          "statement": "Benutzer und Geraete muessen eindeutig identifiziert und authentifiziert werden.",
+          "keywords": ["benutzer", "geraete", "identifizierung"],
+          "action_hint": "implement",
+          "object_hint": "Benutzerauthentifizierung",
+          "object_class": "technical_control"
+        },
+        {
+          "subcontrol_id": "IA-2(1)",
+          "title": "Multi-Factor Authentication",
+          "statement": "Multi-Faktor-Authentifizierung muss fuer privilegierte Konten implementiert werden.",
+          "keywords": ["mfa", "multi-faktor", "zwei-faktor", "2fa"],
+          "action_hint": "implement",
+          "object_hint": "Multi-Faktor-Authentifizierung",
+          "object_class": "technical_control"
+        },
+        {
+          "subcontrol_id": "IA-5",
+          "title": "Authenticator Management",
+          "statement": "Authentifizierungsmittel (Passwoerter, Token, Zertifikate) muessen sicher verwaltet werden.",
+          "keywords": ["passwort", "token", "zertifikat", "credential"],
+          "action_hint": "maintain",
+          "object_hint": "Authentifizierungsmittel-Verwaltung",
+          "object_class": "technical_control"
+        }
+      ]
+    },
+    {
+      "domain_id": "IR",
+      "title": "Incident Response",
+      "aliases": ["incident response", "vorfallbehandlung", "vorfallreaktion", "incident management"],
+      "keywords": ["vorfall", "incident", "reaktion", "response", "breach", "sicherheitsvorfall"],
+      "subcontrols": [
+        {
+          "subcontrol_id": "IR-1",
+          "title": "Policy and Procedures",
+          "statement": "Vorfallreaktionsrichtlinien und -verfahren muessen definiert und regelmaessig aktualisiert werden.",
+          "keywords": ["policy", "richtlinie", "verfahren"],
+          "action_hint": "document",
+          "object_hint": "Vorfallreaktionsrichtlinie",
+          "object_class": "policy"
+        },
+        {
+          "subcontrol_id": "IR-2",
+          "title": "Incident Response Training",
+          "statement": "Mitarbeiter muessen regelmaessig in der Vorfallreaktion geschult werden.",
+          "keywords": ["training", "schulung"],
+          "action_hint": "train",
+          "object_hint": "Vorfallreaktionsschulung",
+          "object_class": "training"
+        },
+        {
+          "subcontrol_id": "IR-4",
+          "title": "Incident Handling",
+          "statement": "Ein strukturierter Prozess fuer die Vorfallbehandlung muss implementiert werden: Erkennung, Analyse, Eindaemmung, Behebung.",
+          "keywords": ["handling", "behandlung", "erkennung", "eindaemmung"],
+          "action_hint": "implement",
+          "object_hint": "Vorfallbehandlungsprozess",
+          "object_class": "process"
+        },
+        {
+          "subcontrol_id": "IR-5",
+          "title": "Incident Monitoring",
+          "statement": "Sicherheitsvorfaelle muessen kontinuierlich ueberwacht und verfolgt werden.",
+          "keywords": ["monitoring", "ueberwachung", "tracking"],
+          "action_hint": "monitor",
+          "object_hint": "Vorfallsueberwachung",
+          "object_class": "incident"
+        },
+        {
+          "subcontrol_id": "IR-6",
+          "title": "Incident Reporting",
+          "statement": "Sicherheitsvorfaelle muessen innerhalb definierter Fristen an die zustaendigen Stellen gemeldet werden.",
+          "keywords": ["reporting", "meldung", "melden", "frist"],
+          "action_hint": "report",
+          "object_hint": "Vorfallmeldung",
+          "object_class": "incident"
+        },
+        {
+          "subcontrol_id": "IR-8",
+          "title": "Incident Response Plan",
+          "statement": "Ein Vorfallreaktionsplan muss dokumentiert und regelmaessig getestet werden.",
+          "keywords": ["plan", "dokumentation", "test"],
+          "action_hint": "document",
+          "object_hint": "Vorfallreaktionsplan",
+          "object_class": "policy"
+        }
+      ]
+    },
+    {
+      "domain_id": "RA",
+      "title": "Risk Assessment",
+      "aliases": ["risk assessment", "risikobewertung", "risikoanalyse"],
+      "keywords": ["risiko", "risk", "bewertung", "assessment", "analyse", "bedrohung", "threat"],
+      "subcontrols": [
+        {
+          "subcontrol_id": "RA-1",
+          "title": "Policy and Procedures",
+          "statement": "Risikobewertungsrichtlinien muessen dokumentiert und regelmaessig aktualisiert werden.",
+          "keywords": ["policy", "richtlinie"],
+          "action_hint": "document",
+          "object_hint": "Risikobewertungsrichtlinie",
+          "object_class": "policy"
+        },
+        {
+          "subcontrol_id": "RA-3",
+          "title": "Risk Assessment",
+          "statement": "Regelmaessige Risikobewertungen muessen durchgefuehrt und dokumentiert werden.",
+          "keywords": ["bewertung", "assessment", "regelmaessig"],
+          "action_hint": "assess",
+          "object_hint": "Risikobewertung",
+          "object_class": "risk_artifact"
+        },
+        {
+          "subcontrol_id": "RA-5",
+          "title": "Vulnerability Monitoring and Scanning",
+          "statement": "Systeme muessen regelmaessig auf Schwachstellen gescannt und ueberwacht werden.",
+          "keywords": ["vulnerability", "schwachstelle", "scan", "monitoring"],
+          "action_hint": "monitor",
+          "object_hint": "Schwachstellenueberwachung",
+          "object_class": "system"
+        }
+      ]
+    },
+    {
+      "domain_id": "SC",
+      "title": "System and Communications Protection",
+      "aliases": ["system protection", "communications protection", "kommunikationsschutz", "systemschutz"],
+      "keywords": ["verschluesselung", "encryption", "tls", "netzwerk", "network", "kommunikation", "firewall"],
+      "subcontrols": [
+        {
+          "subcontrol_id": "SC-1",
+          "title": "Policy and Procedures",
+          "statement": "System- und Kommunikationsschutzrichtlinien muessen dokumentiert und aktuell gehalten werden.",
+          "keywords": ["policy", "richtlinie"],
+          "action_hint": "document",
+          "object_hint": "Kommunikationsschutzrichtlinie",
+          "object_class": "policy"
+        },
+        {
+          "subcontrol_id": "SC-7",
+          "title": "Boundary Protection",
+          "statement": "Netzwerkgrenzen muessen durch Firewall-Regeln und Zugangskontrollen geschuetzt werden.",
+          "keywords": ["boundary", "grenze", "firewall", "netzwerk"],
+          "action_hint": "implement",
+          "object_hint": "Netzwerkgrenzschutz",
+          "object_class": "technical_control"
+        },
+        {
+          "subcontrol_id": "SC-8",
+          "title": "Transmission Confidentiality and Integrity",
+          "statement": "Daten muessen bei der Uebertragung durch Verschluesselung geschuetzt werden.",
+          "keywords": ["transmission", "uebertragung", "verschluesselung", "tls"],
+          "action_hint": "encrypt",
+          "object_hint": "Uebertragungsverschluesselung",
+          "object_class": "cryptographic_control"
+        },
+        {
+          "subcontrol_id": "SC-12",
+          "title": "Cryptographic Key Establishment and Management",
+          "statement": "Kryptographische Schluessel muessen sicher erzeugt, verteilt, gespeichert und widerrufen werden.",
+          "keywords": ["key", "schluessel", "kryptographie", "management"],
+          "action_hint": "maintain",
+          "object_hint": "Schluesselverwaltung",
+          "object_class": "cryptographic_control"
+        },
+        {
+          "subcontrol_id": "SC-13",
+          "title": "Cryptographic Protection",
+          "statement": "Kryptographische Mechanismen muessen gemaess anerkannten Standards implementiert werden.",
+          "keywords": ["kryptographie", "verschluesselung", "standard"],
+          "action_hint": "implement",
+          "object_hint": "Kryptographischer Schutz",
+          "object_class": "cryptographic_control"
+        }
+      ]
+    },
+    {
+      "domain_id": "SI",
+      "title": "System and Information Integrity",
+      "aliases": ["system integrity", "information integrity", "systemintegritaet", "informationsintegritaet"],
+      "keywords": ["integritaet", "integrity", "malware", "patch", "flaw", "schwachstelle"],
+      "subcontrols": [
+        {
+          "subcontrol_id": "SI-1",
+          "title": "Policy and Procedures",
+          "statement": "System- und Informationsintegritaetsrichtlinien muessen dokumentiert und regelmaessig ueberprueft werden.",
+          "keywords": ["policy", "richtlinie"],
+          "action_hint": "document",
+          "object_hint": "Integritaetsrichtlinie",
+          "object_class": "policy"
+        },
+        {
+          "subcontrol_id": "SI-2",
+          "title": "Flaw Remediation",
+          "statement": "Bekannte Schwachstellen muessen innerhalb definierter Fristen behoben werden.",
+          "keywords": ["flaw", "schwachstelle", "patch", "behebung", "remediation"],
+          "action_hint": "remediate",
+          "object_hint": "Schwachstellenbehebung",
+          "object_class": "system"
+        },
+        {
+          "subcontrol_id": "SI-3",
+          "title": "Malicious Code Protection",
+          "statement": "Systeme muessen vor Schadsoftware geschuetzt werden durch Erkennung und Abwehrmechanismen.",
+          "keywords": ["malware", "schadsoftware", "antivirus", "erkennung"],
+          "action_hint": "implement",
+          "object_hint": "Schadsoftwareschutz",
+          "object_class": "technical_control"
+        },
+        {
+          "subcontrol_id": "SI-4",
+          "title": "System Monitoring",
+          "statement": "Systeme muessen kontinuierlich auf Sicherheitsereignisse und Anomalien ueberwacht werden.",
+          "keywords": ["monitoring", "ueberwachung", "anomalie", "siem"],
+          "action_hint": "monitor",
+          "object_hint": "Systemueberwachung",
+          "object_class": "system"
+        },
+        {
+          "subcontrol_id": "SI-5",
+          "title": "Security Alerts and Advisories",
+          "statement": "Sicherheitswarnungen muessen empfangen, bewertet und darauf reagiert werden.",
+          "keywords": ["alert", "warnung", "advisory", "cve"],
+          "action_hint": "monitor",
+          "object_hint": "Sicherheitswarnungen",
+          "object_class": "incident"
+        }
+      ]
+    },
+    {
+      "domain_id": "SA",
+      "title": "System and Services Acquisition",
+      "aliases": ["system acquisition", "services acquisition", "systembeschaffung", "secure development"],
+      "keywords": ["beschaffung", "acquisition", "entwicklung", "development", "lieferkette", "supply chain"],
+      "subcontrols": [
+        {
+          "subcontrol_id": "SA-1",
+          "title": "Policy and Procedures",
+          "statement": "Beschaffungsrichtlinien mit Sicherheitsanforderungen muessen dokumentiert werden.",
+          "keywords": ["policy", "richtlinie", "beschaffung"],
+          "action_hint": "document",
+          "object_hint": "Beschaffungsrichtlinie",
+          "object_class": "policy"
+        },
+        {
+          "subcontrol_id": "SA-8",
+          "title": "Security and Privacy Engineering Principles",
+          "statement": "Sicherheits- und Datenschutzprinzipien muessen in die Systementwicklung integriert werden.",
+          "keywords": ["engineering", "development", "prinzipien", "design"],
+          "action_hint": "implement",
+          "object_hint": "Security-by-Design-Prinzipien",
+          "object_class": "process"
+        },
+        {
+          "subcontrol_id": "SA-11",
+          "title": "Developer Testing and Evaluation",
+          "statement": "Entwickler muessen Sicherheitstests und Code-Reviews durchfuehren.",
+          "keywords": ["testing", "test", "code review", "evaluation"],
+          "action_hint": "test",
+          "object_hint": "Entwickler-Sicherheitstests",
+          "object_class": "process"
+        },
+        {
+          "subcontrol_id": "SA-12",
+          "title": "Supply Chain Protection",
+          "statement": "Lieferkettenrisiken muessen bewertet und Schutzmassnahmen implementiert werden.",
+          "keywords": ["supply chain", "lieferkette", "third party", "drittanbieter"],
+          "action_hint": "assess",
+          "object_hint": "Lieferkettenrisikobewertung",
+          "object_class": "risk_artifact"
+        }
+      ]
+    }
+  ]
+}
--- a/backend-compliance/compliance/data/frameworks/owasp_asvs.json
+++ b/backend-compliance/compliance/data/frameworks/owasp_asvs.json
@@ -0,0 +1,353 @@
+{
+  "framework_id": "OWASP_ASVS",
+  "display_name": "OWASP Application Security Verification Standard 4.0",
+  "license": {
+    "type": "cc_by_sa_4",
+    "rag_allowed": true,
+    "use_as_metadata": true
+  },
+  "domains": [
+    {
+      "domain_id": "V1",
+      "title": "Architecture, Design and Threat Modeling",
+      "aliases": ["architecture", "architektur", "design", "threat modeling", "bedrohungsmodellierung"],
+      "keywords": ["architektur", "design", "threat model", "bedrohung", "modellierung"],
+      "subcontrols": [
+        {
+          "subcontrol_id": "V1.1",
+          "title": "Secure Software Development Lifecycle",
+          "statement": "Ein sicherer Softwareentwicklungs-Lebenszyklus (SSDLC) muss definiert und angewendet werden.",
+          "keywords": ["sdlc", "lifecycle", "lebenszyklus", "entwicklung"],
+          "action_hint": "implement",
+          "object_hint": "Sicherer Entwicklungs-Lebenszyklus",
+          "object_class": "process"
+        },
+        {
+          "subcontrol_id": "V1.2",
+          "title": "Authentication Architecture",
+          "statement": "Die Authentifizierungsarchitektur muss dokumentiert und regelmaessig ueberprueft werden.",
+          "keywords": ["authentication", "authentifizierung", "architektur"],
+          "action_hint": "document",
+          "object_hint": "Authentifizierungsarchitektur",
+          "object_class": "policy"
+        },
+        {
+          "subcontrol_id": "V1.4",
+          "title": "Access Control Architecture",
+          "statement": "Die Zugriffskontrollarchitektur muss dokumentiert und zentral durchgesetzt werden.",
+          "keywords": ["access control", "zugriffskontrolle", "architektur"],
+          "action_hint": "document",
+          "object_hint": "Zugriffskontrollarchitektur",
+          "object_class": "policy"
+        },
+        {
+          "subcontrol_id": "V1.5",
+          "title": "Input and Output Architecture",
+          "statement": "Eingabe- und Ausgabevalidierung muss architektonisch verankert und durchgaengig angewendet werden.",
+          "keywords": ["input", "output", "eingabe", "ausgabe", "validierung"],
+          "action_hint": "implement",
+          "object_hint": "Ein-/Ausgabevalidierung",
+          "object_class": "technical_control"
+        },
+        {
+          "subcontrol_id": "V1.6",
+          "title": "Cryptographic Architecture",
+          "statement": "Kryptographische Mechanismen muessen architektonisch definiert und standardisiert sein.",
+          "keywords": ["crypto", "kryptographie", "verschluesselung"],
+          "action_hint": "define",
+          "object_hint": "Kryptographie-Architektur",
+          "object_class": "cryptographic_control"
+        }
+      ]
+    },
+    {
+      "domain_id": "V2",
+      "title": "Authentication",
+      "aliases": ["authentication", "authentifizierung", "anmeldung", "login"],
+      "keywords": ["authentication", "authentifizierung", "passwort", "login", "anmeldung", "credential"],
+      "subcontrols": [
+        {
+          "subcontrol_id": "V2.1",
+          "title": "Password Security",
+          "statement": "Passwortrichtlinien muessen Mindestlaenge, Komplexitaet und Sperrmechanismen definieren.",
+          "keywords": ["passwort", "password", "laenge", "komplexitaet"],
+          "action_hint": "define",
+          "object_hint": "Passwortrichtlinie",
+          "object_class": "policy"
+        },
+        {
+          "subcontrol_id": "V2.2",
+          "title": "General Authenticator Security",
+          "statement": "Authentifizierungsmittel muessen sicher gespeichert und uebertragen werden.",
+          "keywords": ["authenticator", "credential", "speicherung"],
+          "action_hint": "implement",
+          "object_hint": "Sichere Credential-Verwaltung",
+          "object_class": "technical_control"
+        },
+        {
+          "subcontrol_id": "V2.7",
+          "title": "Out-of-Band Verification",
+          "statement": "Out-of-Band-Verifikationsmechanismen muessen sicher implementiert werden.",
+          "keywords": ["oob", "out-of-band", "sms", "push"],
+          "action_hint": "implement",
+          "object_hint": "Out-of-Band-Verifikation",
+          "object_class": "technical_control"
+        },
+        {
+          "subcontrol_id": "V2.8",
+          "title": "Multi-Factor Authentication",
+          "statement": "Multi-Faktor-Authentifizierung muss fuer sicherheitskritische Funktionen verfuegbar sein.",
+          "keywords": ["mfa", "multi-faktor", "totp", "fido"],
+          "action_hint": "implement",
+          "object_hint": "Multi-Faktor-Authentifizierung",
+          "object_class": "technical_control"
+        }
+      ]
+    },
+    {
+      "domain_id": "V3",
+      "title": "Session Management",
+      "aliases": ["session", "sitzung", "session management", "sitzungsverwaltung"],
+      "keywords": ["session", "sitzung", "token", "cookie", "timeout"],
+      "subcontrols": [
+        {
+          "subcontrol_id": "V3.1",
+          "title": "Session Management Security",
+          "statement": "Sitzungstoken muessen sicher erzeugt, uebertragen und invalidiert werden.",
+          "keywords": ["token", "sitzung", "sicherheit"],
+          "action_hint": "implement",
+          "object_hint": "Sichere Sitzungsverwaltung",
+          "object_class": "technical_control"
+        },
+        {
+          "subcontrol_id": "V3.3",
+          "title": "Session Termination",
+          "statement": "Sitzungen muessen nach Inaktivitaet und bei Abmeldung zuverlaessig beendet werden.",
+          "keywords": ["termination", "timeout", "abmeldung", "beenden"],
+          "action_hint": "configure",
+          "object_hint": "Sitzungstimeout",
+          "object_class": "configuration"
+        },
+        {
+          "subcontrol_id": "V3.5",
+          "title": "Token-Based Session Management",
+          "statement": "Tokenbasierte Sitzungsmechanismen muessen gegen Diebstahl und Replay geschuetzt sein.",
+          "keywords": ["jwt", "token", "replay", "diebstahl"],
+          "action_hint": "implement",
+          "object_hint": "Token-Schutz",
+          "object_class": "technical_control"
+        }
+      ]
+    },
+    {
+      "domain_id": "V5",
+      "title": "Validation, Sanitization and Encoding",
+      "aliases": ["validation", "validierung", "sanitization", "encoding", "eingabevalidierung"],
+      "keywords": ["validierung", "sanitization", "encoding", "xss", "injection", "eingabe"],
+      "subcontrols": [
+        {
+          "subcontrol_id": "V5.1",
+          "title": "Input Validation",
+          "statement": "Alle Eingabedaten muessen serverseitig validiert werden.",
+          "keywords": ["input", "eingabe", "validierung", "serverseitig"],
+          "action_hint": "implement",
+          "object_hint": "Eingabevalidierung",
+          "object_class": "technical_control"
+        },
+        {
+          "subcontrol_id": "V5.2",
+          "title": "Sanitization and Sandboxing",
+          "statement": "Eingaben muessen bereinigt und in sicherer Umgebung verarbeitet werden.",
+          "keywords": ["sanitization", "bereinigung", "sandbox"],
+          "action_hint": "implement",
+          "object_hint": "Eingabebereinigung",
+          "object_class": "technical_control"
+        },
+        {
+          "subcontrol_id": "V5.3",
+          "title": "Output Encoding and Injection Prevention",
+          "statement": "Ausgaben muessen kontextabhaengig kodiert werden, um Injection-Angriffe zu verhindern.",
+          "keywords": ["output", "encoding", "injection", "xss", "sql"],
+          "action_hint": "implement",
+          "object_hint": "Ausgabe-Encoding",
+          "object_class": "technical_control"
+        }
+      ]
+    },
+    {
+      "domain_id": "V6",
+      "title": "Stored Cryptography",
+      "aliases": ["cryptography", "kryptographie", "verschluesselung", "stored cryptography"],
+      "keywords": ["kryptographie", "verschluesselung", "hashing", "schluessel", "key management"],
+      "subcontrols": [
+        {
+          "subcontrol_id": "V6.1",
+          "title": "Data Classification",
+          "statement": "Daten muessen klassifiziert und entsprechend ihrer Schutzklasse behandelt werden.",
+          "keywords": ["klassifizierung", "classification", "schutzklasse"],
+          "action_hint": "define",
+          "object_hint": "Datenklassifizierung",
+          "object_class": "data"
+        },
+        {
+          "subcontrol_id": "V6.2",
+          "title": "Algorithms",
+          "statement": "Nur zugelassene und aktuelle kryptographische Algorithmen duerfen verwendet werden.",
+          "keywords": ["algorithmus", "algorithm", "aes", "rsa"],
+          "action_hint": "configure",
+          "object_hint": "Kryptographische Algorithmen",
+          "object_class": "cryptographic_control"
+        },
+        {
+          "subcontrol_id": "V6.4",
+          "title": "Secret Management",
+          "statement": "Geheimnisse (Schluessel, Passwoerter, Tokens) muessen in einem Secret-Management-System verwaltet werden.",
+          "keywords": ["secret", "geheimnis", "vault", "key management"],
+          "action_hint": "maintain",
+          "object_hint": "Secret-Management",
+          "object_class": "cryptographic_control"
+        }
+      ]
+    },
+    {
+      "domain_id": "V8",
+      "title": "Data Protection",
+      "aliases": ["data protection", "datenschutz", "datenverarbeitung"],
+      "keywords": ["datenschutz", "data protection", "pii", "personenbezogen", "privacy"],
+      "subcontrols": [
+        {
+          "subcontrol_id": "V8.1",
+          "title": "General Data Protection",
+          "statement": "Personenbezogene Daten muessen gemaess Datenschutzanforderungen geschuetzt werden.",
+          "keywords": ["personenbezogen", "pii", "datenschutz"],
+          "action_hint": "implement",
+          "object_hint": "Datenschutzmassnahmen",
+          "object_class": "data"
+        },
+        {
+          "subcontrol_id": "V8.2",
+          "title": "Client-Side Data Protection",
+          "statement": "Clientseitig gespeicherte sensible Daten muessen geschuetzt und minimiert werden.",
+          "keywords": ["client", "browser", "localstorage", "cookie"],
+          "action_hint": "implement",
+          "object_hint": "Clientseitiger Datenschutz",
+          "object_class": "technical_control"
+        },
+        {
+          "subcontrol_id": "V8.3",
+          "title": "Sensitive Private Data",
+          "statement": "Sensible Daten muessen bei Speicherung und Verarbeitung besonders geschuetzt werden.",
+          "keywords": ["sensibel", "vertraulich", "speicherung"],
+          "action_hint": "encrypt",
+          "object_hint": "Verschluesselung sensibler Daten",
+          "object_class": "data"
+        }
+      ]
+    },
+    {
+      "domain_id": "V9",
+      "title": "Communication",
+      "aliases": ["communication", "kommunikation", "tls", "transport"],
+      "keywords": ["tls", "ssl", "https", "transport", "kommunikation", "verschluesselung"],
+      "subcontrols": [
+        {
+          "subcontrol_id": "V9.1",
+          "title": "Client Communication Security",
+          "statement": "Alle Client-Server-Kommunikation muss ueber TLS verschluesselt werden.",
+          "keywords": ["tls", "https", "client", "server"],
+          "action_hint": "encrypt",
+          "object_hint": "TLS-Transportverschluesselung",
+          "object_class": "cryptographic_control"
+        },
+        {
+          "subcontrol_id": "V9.2",
+          "title": "Server Communication Security",
+          "statement": "Server-zu-Server-Kommunikation muss authentifiziert und verschluesselt erfolgen.",
+          "keywords": ["server", "mtls", "backend"],
+          "action_hint": "encrypt",
+          "object_hint": "Server-Kommunikationsverschluesselung",
+          "object_class": "cryptographic_control"
+        }
+      ]
+    },
+    {
+      "domain_id": "V13",
+      "title": "API and Web Service",
+      "aliases": ["api", "web service", "rest", "graphql", "webservice"],
+      "keywords": ["api", "rest", "graphql", "webservice", "endpoint", "schnittstelle"],
+      "subcontrols": [
+        {
+          "subcontrol_id": "V13.1",
+          "title": "Generic Web Service Security",
+          "statement": "Web-Services muessen gegen gaengige Angriffe abgesichert werden.",
+          "keywords": ["web service", "sicherheit", "angriff"],
+          "action_hint": "implement",
+          "object_hint": "Web-Service-Absicherung",
+          "object_class": "interface"
+        },
+        {
+          "subcontrol_id": "V13.2",
+          "title": "RESTful Web Service",
+          "statement": "REST-APIs muessen Input-Validierung, Rate Limiting und sichere Authentifizierung implementieren.",
+          "keywords": ["rest", "api", "rate limiting", "input"],
+          "action_hint": "implement",
+          "object_hint": "REST-API-Absicherung",
+          "object_class": "interface"
+        },
+        {
+          "subcontrol_id": "V13.4",
+          "title": "GraphQL and Web Services",
+          "statement": "GraphQL-Endpoints muessen gegen Query-Complexity-Angriffe und Introspection geschuetzt werden.",
+          "keywords": ["graphql", "query", "complexity", "introspection"],
+          "action_hint": "configure",
+          "object_hint": "GraphQL-Absicherung",
+          "object_class": "interface"
+        }
+      ]
+    },
+    {
+      "domain_id": "V14",
+      "title": "Configuration",
+      "aliases": ["configuration", "konfiguration", "hardening", "haertung"],
+      "keywords": ["konfiguration", "hardening", "haertung", "header", "deployment"],
+      "subcontrols": [
+        {
+          "subcontrol_id": "V14.1",
+          "title": "Build and Deploy",
+          "statement": "Build- und Deployment-Prozesse muessen sicher konfiguriert und reproduzierbar sein.",
+          "keywords": ["build", "deploy", "ci/cd", "pipeline"],
+          "action_hint": "configure",
+          "object_hint": "Sichere Build-Pipeline",
+          "object_class": "configuration"
+        },
+        {
+          "subcontrol_id": "V14.2",
+          "title": "Dependency Management",
+          "statement": "Abhaengigkeiten muessen auf Schwachstellen geprueft und aktuell gehalten werden.",
+          "keywords": ["dependency", "abhaengigkeit", "sca", "sbom"],
+          "action_hint": "maintain",
+          "object_hint": "Abhaengigkeitsverwaltung",
+          "object_class": "system"
+        },
+        {
+          "subcontrol_id": "V14.3",
+          "title": "Unintended Security Disclosure",
+          "statement": "Fehlermeldungen und Debug-Informationen duerfen keine sicherheitsrelevanten Details preisgeben.",
+          "keywords": ["disclosure", "fehlermeldung", "debug", "information leakage"],
+          "action_hint": "configure",
+          "object_hint": "Fehlerbehandlung",
+          "object_class": "configuration"
+        },
+        {
+          "subcontrol_id": "V14.4",
+          "title": "HTTP Security Headers",
+          "statement": "HTTP-Sicherheitsheader muessen korrekt konfiguriert sein.",
+          "keywords": ["header", "csp", "hsts", "x-frame"],
+          "action_hint": "configure",
+          "object_hint": "HTTP-Sicherheitsheader",
+          "object_class": "configuration"
+        }
+      ]
+    }
+  ]
+}
--- a/backend-compliance/compliance/data/source_type_classification.py
+++ b/backend-compliance/compliance/data/source_type_classification.py
@@ -0,0 +1,205 @@
+"""
+Source-Type-Klassifikation fuer Regulierungen und Frameworks.
+
+Dreistufiges Modell der normativen Verbindlichkeit:
+
+  Stufe 1 — GESETZ (law):
+    Rechtlich bindend. Bussgeld bei Verstoss.
+    Beispiele: DSGVO, NIS2, AI Act, CRA
+
+  Stufe 2 — LEITLINIE (guideline):
+    Offizielle Auslegungshilfe von Aufsichtsbehoerden.
+    Beweislastumkehr: Wer abweicht, muss begruenden warum.
+    Beispiele: EDPB-Leitlinien, BSI-Standards, WP29-Dokumente
+
+  Stufe 3 — FRAMEWORK (framework):
+    Freiwillige Best Practices, nicht rechtsverbindlich.
+    Aber: Koennen als "Stand der Technik" herangezogen werden.
+    Beispiele: ENISA, NIST, OWASP, OECD, CISA
+
+Mapping: source_regulation (aus control_parent_links) -> source_type
+"""
+
+# --- Typ-Definitionen ---
+SOURCE_TYPE_LAW = "law"           # Gesetz/Verordnung/Richtlinie — normative_strength bleibt
+SOURCE_TYPE_GUIDELINE = "guideline"  # Leitlinie/Standard — max "should"
+SOURCE_TYPE_FRAMEWORK = "framework"  # Framework/Best Practice — max "may"
+
+# Max erlaubte normative_strength pro source_type
+# DB-Constraint erlaubt: must, should, may (NICHT "can")
+NORMATIVE_STRENGTH_CAP: dict[str, str] = {
+    SOURCE_TYPE_LAW: "must",       # keine Begrenzung
+    SOURCE_TYPE_GUIDELINE: "should",  # max "should"
+    SOURCE_TYPE_FRAMEWORK: "may",     # max "may" (= "kann")
+}
+
+# Reihenfolge fuer Vergleiche (hoeher = staerker)
+STRENGTH_ORDER: dict[str, int] = {
+    "may": 1,        # KANN (DB-Wert)
+    "can": 1,        # Alias — wird in cap_normative_strength zu "may" normalisiert
+    "should": 2,
+    "must": 3,
+}
+
+
+def cap_normative_strength(original: str, source_type: str) -> str:
+    """
+    Begrenzt die normative_strength basierend auf dem source_type.
+
+    Beispiel:
+        cap_normative_strength("must", "framework") -> "may"
+        cap_normative_strength("should", "law") -> "should"
+        cap_normative_strength("must", "guideline") -> "should"
+    """
+    cap = NORMATIVE_STRENGTH_CAP.get(source_type, "must")
+    cap_level = STRENGTH_ORDER.get(cap, 3)
+    original_level = STRENGTH_ORDER.get(original, 3)
+    if original_level > cap_level:
+        return cap
+    return original
+
+
+def get_highest_source_type(source_types: list[str]) -> str:
+    """
+    Bestimmt den hoechsten source_type aus einer Liste.
+    Ein Gesetz uebertrumpft alles.
+
+    Beispiel:
+        get_highest_source_type(["framework", "law"]) -> "law"
+        get_highest_source_type(["framework", "guideline"]) -> "guideline"
+    """
+    type_order = {SOURCE_TYPE_FRAMEWORK: 1, SOURCE_TYPE_GUIDELINE: 2, SOURCE_TYPE_LAW: 3}
+    if not source_types:
+        return SOURCE_TYPE_FRAMEWORK
+    return max(source_types, key=lambda t: type_order.get(t, 0))
+
+
+# ============================================================================
+# Klassifikation: source_regulation -> source_type
+#
+# Diese Map wird fuer den Backfill und zukuenftige Pipeline-Runs verwendet.
+# Neue Regulierungen hier eintragen!
+# ============================================================================
+
+SOURCE_REGULATION_CLASSIFICATION: dict[str, str] = {
+    # --- EU-Verordnungen (unmittelbar bindend) ---
+    "DSGVO (EU) 2016/679": SOURCE_TYPE_LAW,
+    "KI-Verordnung (EU) 2024/1689": SOURCE_TYPE_LAW,
+    "Cyber Resilience Act (CRA)": SOURCE_TYPE_LAW,
+    "NIS2-Richtlinie (EU) 2022/2555": SOURCE_TYPE_LAW,
+    "Data Act": SOURCE_TYPE_LAW,
+    "Data Governance Act (DGA)": SOURCE_TYPE_LAW,
+    "Markets in Crypto-Assets (MiCA)": SOURCE_TYPE_LAW,
+    "Maschinenverordnung (EU) 2023/1230": SOURCE_TYPE_LAW,
+    "Batterieverordnung (EU) 2023/1542": SOURCE_TYPE_LAW,
+    "AML-Verordnung": SOURCE_TYPE_LAW,
+
+    # --- EU-Richtlinien (nach nationaler Umsetzung bindend) ---
+    # Fuer Compliance-Zwecke wie Gesetze behandeln
+
+    # --- Nationale Gesetze ---
+    "Bundesdatenschutzgesetz (BDSG)": SOURCE_TYPE_LAW,
+    "Telekommunikationsgesetz": SOURCE_TYPE_LAW,
+    "Telekommunikationsgesetz Oesterreich": SOURCE_TYPE_LAW,
+    "Gewerbeordnung (GewO)": SOURCE_TYPE_LAW,
+    "Handelsgesetzbuch (HGB)": SOURCE_TYPE_LAW,
+    "Abgabenordnung (AO)": SOURCE_TYPE_LAW,
+    "IFRS-Übernahmeverordnung": SOURCE_TYPE_LAW,
+    "Österreichisches Datenschutzgesetz (DSG)": SOURCE_TYPE_LAW,
+    "LOPDGDD - Ley Orgánica de Protección de Datos (Spanien)": SOURCE_TYPE_LAW,
+    "Loi Informatique et Libertés (Frankreich)": SOURCE_TYPE_LAW,
+    "Információs önrendelkezési jog törvény (Ungarn)": SOURCE_TYPE_LAW,
+    "EU Blue Guide 2022": SOURCE_TYPE_LAW,
+
+    # --- EDPB/WP29 Leitlinien (offizielle Auslegungshilfe) ---
+    "EDPB Leitlinien 01/2019 (Zertifizierung)": SOURCE_TYPE_GUIDELINE,
+    "EDPB Leitlinien 01/2020 (Datentransfers)": SOURCE_TYPE_GUIDELINE,
+    "EDPB Leitlinien 01/2020 (Vernetzte Fahrzeuge)": SOURCE_TYPE_GUIDELINE,
+    "EDPB Leitlinien 01/2022 (BCR)": SOURCE_TYPE_GUIDELINE,
+    "EDPB Leitlinien 01/2024 (Berechtigtes Interesse)": SOURCE_TYPE_GUIDELINE,
+    "EDPB Leitlinien 04/2019 (Data Protection by Design)": SOURCE_TYPE_GUIDELINE,
+    "EDPB Leitlinien 05/2020 - Einwilligung": SOURCE_TYPE_GUIDELINE,
+    "EDPB Leitlinien 07/2020 (Datentransfers)": SOURCE_TYPE_GUIDELINE,
+    "EDPB Leitlinien 08/2020 (Social Media)": SOURCE_TYPE_GUIDELINE,
+    "EDPB Leitlinien 09/2022 (Data Breach)": SOURCE_TYPE_GUIDELINE,
+    "EDPB Leitlinien 09/2022 - Meldung von Datenschutzverletzungen": SOURCE_TYPE_GUIDELINE,
+    "EDPB Empfehlungen 01/2020 - Ergaenzende Massnahmen fuer Datentransfers": SOURCE_TYPE_GUIDELINE,
+    "EDPB Leitlinien - Berechtigtes Interesse (Art. 6(1)(f))": SOURCE_TYPE_GUIDELINE,
+    "WP244 Leitlinien (Profiling)": SOURCE_TYPE_GUIDELINE,
+    "WP251 Leitlinien (Profiling)": SOURCE_TYPE_GUIDELINE,
+    "WP260 Leitlinien (Transparenz)": SOURCE_TYPE_GUIDELINE,
+
+    # --- BSI Standards (behoerdliche technische Richtlinien) ---
+    "BSI-TR-03161-1": SOURCE_TYPE_GUIDELINE,
+    "BSI-TR-03161-2": SOURCE_TYPE_GUIDELINE,
+    "BSI-TR-03161-3": SOURCE_TYPE_GUIDELINE,
+
+    # --- ENISA (EU-Agentur, aber Empfehlungen nicht rechtsverbindlich) ---
+    "ENISA Cybersecurity State 2024": SOURCE_TYPE_FRAMEWORK,
+    "ENISA ICS/SCADA Dependencies": SOURCE_TYPE_FRAMEWORK,
+    "ENISA Supply Chain Good Practices": SOURCE_TYPE_FRAMEWORK,
+    "ENISA Threat Landscape Supply Chain": SOURCE_TYPE_FRAMEWORK,
+
+    # --- NIST (US-Standards, international als Best Practice) ---
+    "NIST AI Risk Management Framework": SOURCE_TYPE_FRAMEWORK,
+    "NIST Cybersecurity Framework 2.0": SOURCE_TYPE_FRAMEWORK,
+    "NIST SP 800-207 (Zero Trust)": SOURCE_TYPE_FRAMEWORK,
+    "NIST SP 800-218 (SSDF)": SOURCE_TYPE_FRAMEWORK,
+    "NIST SP 800-53 Rev. 5": SOURCE_TYPE_FRAMEWORK,
+    "NIST SP 800-63-3": SOURCE_TYPE_FRAMEWORK,
+
+    # --- OWASP (Community-Standards) ---
+    "OWASP API Security Top 10 (2023)": SOURCE_TYPE_FRAMEWORK,
+    "OWASP ASVS 4.0": SOURCE_TYPE_FRAMEWORK,
+    "OWASP MASVS 2.0": SOURCE_TYPE_FRAMEWORK,
+    "OWASP SAMM 2.0": SOURCE_TYPE_FRAMEWORK,
+    "OWASP Top 10 (2021)": SOURCE_TYPE_FRAMEWORK,
+
+    # --- Sonstige Frameworks ---
+    "OECD KI-Empfehlung": SOURCE_TYPE_FRAMEWORK,
+    "CISA Secure by Design": SOURCE_TYPE_FRAMEWORK,
+}
+
+
+def classify_source_regulation(source_regulation: str) -> str:
+    """
+    Klassifiziert eine source_regulation als law, guideline oder framework.
+
+    Verwendet exaktes Matching gegen die Map. Bei unbekannten Quellen
+    wird anhand von Schluesselwoertern geraten, Fallback ist 'framework'
+    (konservativstes Ergebnis).
+    """
+    if not source_regulation:
+        return SOURCE_TYPE_FRAMEWORK
+
+    # Exaktes Match
+    if source_regulation in SOURCE_REGULATION_CLASSIFICATION:
+        return SOURCE_REGULATION_CLASSIFICATION[source_regulation]
+
+    # Heuristik fuer unbekannte Quellen
+    lower = source_regulation.lower()
+
+    # Gesetze erkennen
+    law_indicators = [
+        "verordnung", "richtlinie", "gesetz", "directive", "regulation",
+        "(eu)", "(eg)", "act", "ley", "loi", "törvény", "código",
+    ]
+    if any(ind in lower for ind in law_indicators):
+        return SOURCE_TYPE_LAW
+
+    # Leitlinien erkennen
+    guideline_indicators = [
+        "edpb", "leitlinie", "guideline", "wp2", "bsi", "empfehlung",
+    ]
+    if any(ind in lower for ind in guideline_indicators):
+        return SOURCE_TYPE_GUIDELINE
+
+    # Frameworks erkennen
+    framework_indicators = [
+        "enisa", "nist", "owasp", "oecd", "cisa", "framework", "iso",
+    ]
+    if any(ind in lower for ind in framework_indicators):
+        return SOURCE_TYPE_FRAMEWORK
+
+    # Konservativ: unbekannt = framework (geringste Verbindlichkeit)
+    return SOURCE_TYPE_FRAMEWORK
--- a/backend-compliance/compliance/db/init.py
+++ b/backend-compliance/compliance/db/init.py
@@ -8,12 +8,16 @@ from .models import (
    EvidenceDB,
    RiskDB,
    AuditExportDB,
+    LLMGenerationAuditDB,
+    AssertionDB,
    RegulationTypeEnum,
    ControlTypeEnum,
    ControlDomainEnum,
    RiskLevelEnum,
    EvidenceStatusEnum,
    ControlStatusEnum,
+    EvidenceConfidenceEnum,
+    EvidenceTruthStatusEnum,
 )
 from .repository import (
    RegulationRepository,
@@ -33,6 +37,8 @@ __all__ = [
    "EvidenceDB",
    "RiskDB",
    "AuditExportDB",
+    "LLMGenerationAuditDB",
+    "AssertionDB",
    # Enums
    "RegulationTypeEnum",
    "ControlTypeEnum",
@@ -40,6 +46,8 @@ __all__ = [
    "RiskLevelEnum",
    "EvidenceStatusEnum",
    "ControlStatusEnum",
+    "EvidenceConfidenceEnum",
+    "EvidenceTruthStatusEnum",
    # Repositories
    "RegulationRepository",
    "RequirementRepository",
--- a/backend-compliance/compliance/db/models.py
+++ b/backend-compliance/compliance/db/models.py
--- a/backend-compliance/compliance/db/repository.py
+++ b/backend-compliance/compliance/db/repository.py
--- a/backend-compliance/compliance/db/vvt_library_models.py
+++ b/backend-compliance/compliance/db/vvt_library_models.py
@@ -0,0 +1,164 @@
+"""
+SQLAlchemy models for VVT Master Libraries + Process Templates.
+
+Tables (global, no tenant_id):
+- vvt_lib_data_subjects
+- vvt_lib_data_categories (hierarchical, self-referencing)
+- vvt_lib_recipients
+- vvt_lib_legal_bases
+- vvt_lib_retention_rules
+- vvt_lib_transfer_mechanisms
+- vvt_lib_purposes
+- vvt_lib_toms
+
+Tenant-scoped:
+- vvt_process_templates (system + tenant-specific)
+"""
+
+from datetime import datetime
+
+from sqlalchemy import (
+    Column, String, Text, Boolean, Integer, DateTime, JSON, Index,
+    ForeignKey,
+)
+from sqlalchemy.dialects.postgresql import UUID
+
+from classroom_engine.database import Base
+
+
+class VVTLibDataSubjectDB(Base):
+    __tablename__ = 'vvt_lib_data_subjects'
+
+    id = Column(String(50), primary_key=True)
+    label_de = Column(String(200), nullable=False)
+    description_de = Column(Text)
+    art9_relevant = Column(Boolean, default=False)
+    typical_for = Column(JSON, default=list)
+    sort_order = Column(Integer, default=0)
+    created_at = Column(DateTime(timezone=True), default=datetime.utcnow)
+
+
+class VVTLibDataCategoryDB(Base):
+    __tablename__ = 'vvt_lib_data_categories'
+
+    id = Column(String(50), primary_key=True)
+    parent_id = Column(String(50), ForeignKey('vvt_lib_data_categories.id', ondelete='SET NULL'), nullable=True)
+    label_de = Column(String(200), nullable=False)
+    description_de = Column(Text)
+    is_art9 = Column(Boolean, default=False)
+    is_art10 = Column(Boolean, default=False)
+    risk_weight = Column(Integer, default=1)
+    default_retention_rule = Column(String(50))
+    default_legal_basis = Column(String(50))
+    sort_order = Column(Integer, default=0)
+    created_at = Column(DateTime(timezone=True), default=datetime.utcnow)
+
+
+class VVTLibRecipientDB(Base):
+    __tablename__ = 'vvt_lib_recipients'
+
+    id = Column(String(50), primary_key=True)
+    type = Column(String(20), nullable=False)
+    label_de = Column(String(200), nullable=False)
+    description_de = Column(Text)
+    is_third_country = Column(Boolean, default=False)
+    country = Column(String(5))
+    sort_order = Column(Integer, default=0)
+    created_at = Column(DateTime(timezone=True), default=datetime.utcnow)
+
+
+class VVTLibLegalBasisDB(Base):
+    __tablename__ = 'vvt_lib_legal_bases'
+
+    id = Column(String(50), primary_key=True)
+    article = Column(String(50), nullable=False)
+    type = Column(String(30), nullable=False)
+    label_de = Column(String(300), nullable=False)
+    description_de = Column(Text)
+    is_art9 = Column(Boolean, default=False)
+    typical_national_law = Column(String(100))
+    sort_order = Column(Integer, default=0)
+    created_at = Column(DateTime(timezone=True), default=datetime.utcnow)
+
+
+class VVTLibRetentionRuleDB(Base):
+    __tablename__ = 'vvt_lib_retention_rules'
+
+    id = Column(String(50), primary_key=True)
+    label_de = Column(String(300), nullable=False)
+    description_de = Column(Text)
+    legal_basis = Column(String(200))
+    duration = Column(Integer, nullable=False)
+    duration_unit = Column(String(10), nullable=False)
+    start_event = Column(String(200))
+    deletion_procedure = Column(String(500))
+    sort_order = Column(Integer, default=0)
+    created_at = Column(DateTime(timezone=True), default=datetime.utcnow)
+
+
+class VVTLibTransferMechanismDB(Base):
+    __tablename__ = 'vvt_lib_transfer_mechanisms'
+
+    id = Column(String(50), primary_key=True)
+    label_de = Column(String(300), nullable=False)
+    description_de = Column(Text)
+    article = Column(String(50))
+    requires_tia = Column(Boolean, default=False)
+    sort_order = Column(Integer, default=0)
+    created_at = Column(DateTime(timezone=True), default=datetime.utcnow)
+
+
+class VVTLibPurposeDB(Base):
+    __tablename__ = 'vvt_lib_purposes'
+
+    id = Column(String(50), primary_key=True)
+    label_de = Column(String(300), nullable=False)
+    description_de = Column(Text)
+    typical_legal_basis = Column(String(50))
+    typical_for = Column(JSON, default=list)
+    sort_order = Column(Integer, default=0)
+    created_at = Column(DateTime(timezone=True), default=datetime.utcnow)
+
+
+class VVTLibTomDB(Base):
+    __tablename__ = 'vvt_lib_toms'
+
+    id = Column(String(50), primary_key=True)
+    category = Column(String(30), nullable=False)
+    label_de = Column(String(300), nullable=False)
+    description_de = Column(Text)
+    art32_reference = Column(String(100))
+    sort_order = Column(Integer, default=0)
+    created_at = Column(DateTime(timezone=True), default=datetime.utcnow)
+
+
+class VVTProcessTemplateDB(Base):
+    __tablename__ = 'vvt_process_templates'
+
+    id = Column(String(80), primary_key=True)
+    name = Column(String(300), nullable=False)
+    description = Column(Text)
+    business_function = Column(String(50))
+    purpose_refs = Column(JSON, default=list)
+    legal_basis_refs = Column(JSON, default=list)
+    data_subject_refs = Column(JSON, default=list)
+    data_category_refs = Column(JSON, default=list)
+    recipient_refs = Column(JSON, default=list)
+    tom_refs = Column(JSON, default=list)
+    transfer_mechanism_refs = Column(JSON, default=list)
+    retention_rule_ref = Column(String(50))
+    typical_systems = Column(JSON, default=list)
+    protection_level = Column(String(10), default='MEDIUM')
+    dpia_required = Column(Boolean, default=False)
+    risk_score = Column(Integer)
+    tags = Column(JSON, default=list)
+    is_system = Column(Boolean, default=True)
+    tenant_id = Column(UUID(as_uuid=True), nullable=True)
+    sort_order = Column(Integer, default=0)
+    created_at = Column(DateTime(timezone=True), default=datetime.utcnow)
+    updated_at = Column(DateTime(timezone=True), default=datetime.utcnow, onupdate=datetime.utcnow)
+
+    __table_args__ = (
+        Index('idx_vvt_process_templates_bf', 'business_function'),
+        Index('idx_vvt_process_templates_system', 'is_system'),
+    )
--- a/backend-compliance/compliance/db/vvt_models.py
+++ b/backend-compliance/compliance/db/vvt_models.py
@@ -79,6 +79,26 @@ class VVTActivityDB(Base):
    next_review_at = Column(DateTime(timezone=True), nullable=True)
    created_by = Column(String(200), default='system')
    dsfa_id = Column(UUID(as_uuid=True), nullable=True)
+
+    # Library refs (Phase 1 — parallel to freetext fields)
+    purpose_refs = Column(JSON, nullable=True)
+    legal_basis_refs = Column(JSON, nullable=True)
+    data_subject_refs = Column(JSON, nullable=True)
+    data_category_refs = Column(JSON, nullable=True)
+    recipient_refs = Column(JSON, nullable=True)
+    retention_rule_ref = Column(String(50), nullable=True)
+    transfer_mechanism_refs = Column(JSON, nullable=True)
+    tom_refs = Column(JSON, nullable=True)
+
+    # Cross-module links
+    linked_loeschfristen_ids = Column(JSON, nullable=True)
+    linked_tom_measure_ids = Column(JSON, nullable=True)
+
+    # Template + risk
+    source_template_id = Column(String(80), nullable=True)
+    risk_score = Column(Integer, nullable=True)
+    art30_completeness = Column(JSON, nullable=True)
+
    created_at = Column(DateTime, default=datetime.utcnow, nullable=False)
    updated_at = Column(DateTime, default=datetime.utcnow, onupdate=datetime.utcnow)

--- a/backend-compliance/compliance/services/anchor_finder.py
+++ b/backend-compliance/compliance/services/anchor_finder.py
@@ -69,7 +69,7 @@ class AnchorFinder:
        tags_str = " ".join(control.tags[:3]) if control.tags else ""
        query = f"{control.title} {tags_str}".strip()

-        results = await self.rag.search(
+        results = await self.rag.search_with_rerank(
            query=query,
            collection="bp_compliance_ce",
            top_k=15,
--- a/backend-compliance/compliance/services/assertion_engine.py
+++ b/backend-compliance/compliance/services/assertion_engine.py
@@ -0,0 +1,80 @@
+"""Assertion Engine — splits text into sentences and classifies each.
+
+Each sentence is tagged as:
+- assertion: normative statement (pflicht / empfehlung / kann)
+- fact: references concrete evidence artifacts
+- rationale: explains why something is required
+"""
+
+import re
+from typing import Optional
+
+from .normative_patterns import (
+    PFLICHT_RE, EMPFEHLUNG_RE, KANN_RE, RATIONALE_RE, EVIDENCE_RE,
+)
+
+# Sentence splitter: period/excl/question followed by space+uppercase, or newlines
+_SENTENCE_SPLIT = re.compile(r'(?<=[.!?])\s+(?=[A-ZÄÖÜ])|(?:\n\s*\n)')
+
+
+def extract_assertions(
+    text: str,
+    entity_type: str,
+    entity_id: str,
+    tenant_id: Optional[str] = None,
+) -> list[dict]:
+    """Split *text* into sentences and classify each one.
+
+    Returns a list of dicts ready for AssertionDB creation.
+    """
+    if not text or not text.strip():
+        return []
+
+    sentences = _SENTENCE_SPLIT.split(text.strip())
+    results: list[dict] = []
+
+    for idx, raw in enumerate(sentences):
+        sentence = raw.strip()
+        if not sentence or len(sentence) < 5:
+            continue
+
+        assertion_type, normative_tier = _classify_sentence(sentence)
+
+        results.append({
+            "tenant_id": tenant_id,
+            "entity_type": entity_type,
+            "entity_id": entity_id,
+            "sentence_text": sentence,
+            "sentence_index": idx,
+            "assertion_type": assertion_type,
+            "normative_tier": normative_tier,
+            "evidence_ids": [],
+            "confidence": 0.0,
+        })
+
+    return results
+
+
+def _classify_sentence(sentence: str) -> tuple[str, Optional[str]]:
+    """Return (assertion_type, normative_tier) for a single sentence."""
+
+    # 1. Check for evidence/fact keywords first
+    if EVIDENCE_RE.search(sentence):
+        return ("fact", None)
+
+    # 2. Check for rationale
+    normative_count = len(PFLICHT_RE.findall(sentence)) + len(EMPFEHLUNG_RE.findall(sentence)) + len(KANN_RE.findall(sentence))
+    rationale_count = len(RATIONALE_RE.findall(sentence))
+    if rationale_count > 0 and rationale_count >= normative_count:
+        return ("rationale", None)
+
+    # 3. Normative classification
+    if PFLICHT_RE.search(sentence):
+        return ("assertion", "pflicht")
+    if EMPFEHLUNG_RE.search(sentence):
+        return ("assertion", "empfehlung")
+    if KANN_RE.search(sentence):
+        return ("assertion", "kann")
+
+    # 4. Default: unclassified assertion
+    return ("assertion", None)
--- a/backend-compliance/compliance/services/batch_dedup_runner.py
+++ b/backend-compliance/compliance/services/batch_dedup_runner.py
@@ -0,0 +1,618 @@
+"""Batch Dedup Runner — Orchestrates deduplication of ~85k atomare Controls.
+
+Reduces Pass 0b controls from ~85k to ~18-25k unique Master Controls via:
+  Phase 1: Intra-Group Dedup — same merge_group_hint → pick best, link rest
+           (85k → ~52k, mostly title-identical short-circuit, no embeddings)
+  Phase 2: Cross-Group Dedup — embed masters, search Qdrant for similar
+           masters with different hints (52k → ~18-25k)
+
+All Pass 0b controls have pattern_id=NULL. The primary grouping key is
+merge_group_hint (format: "action_type:norm_obj:trigger_key"), which
+encodes the normalized action, object, and trigger.
+
+Usage:
+    runner = BatchDedupRunner(db)
+    stats = await runner.run(dry_run=True)       # preview
+    stats = await runner.run(dry_run=False)       # execute
+    stats = await runner.run(hint_filter="implement:multi_factor_auth:none")
+"""
+
+import json
+import logging
+import time
+from collections import defaultdict
+
+from sqlalchemy import text
+
+from compliance.services.control_dedup import (
+    canonicalize_text,
+    ensure_qdrant_collection,
+    get_embedding,
+    normalize_action,
+    normalize_object,
+    qdrant_search_cross_regulation,
+    qdrant_upsert,
+    LINK_THRESHOLD,
+    REVIEW_THRESHOLD,
+)
+
+logger = logging.getLogger(__name__)
+
+DEDUP_COLLECTION = "atomic_controls_dedup"
+
+
+# ── Quality Score ────────────────────────────────────────────────────────
+
+
+def quality_score(control: dict) -> float:
+    """Score a control by richness of requirements, tests, evidence, and objective.
+
+    Higher score = better candidate for master control.
+    """
+    score = 0.0
+
+    reqs = control.get("requirements") or "[]"
+    if isinstance(reqs, str):
+        try:
+            reqs = json.loads(reqs)
+        except (json.JSONDecodeError, TypeError):
+            reqs = []
+    score += len(reqs) * 2.0
+
+    tests = control.get("test_procedure") or "[]"
+    if isinstance(tests, str):
+        try:
+            tests = json.loads(tests)
+        except (json.JSONDecodeError, TypeError):
+            tests = []
+    score += len(tests) * 1.5
+
+    evidence = control.get("evidence") or "[]"
+    if isinstance(evidence, str):
+        try:
+            evidence = json.loads(evidence)
+        except (json.JSONDecodeError, TypeError):
+            evidence = []
+    score += len(evidence) * 1.0
+
+    objective = control.get("objective") or ""
+    score += min(len(objective) / 200, 3.0)
+
+    return score
+
+
+# ── Batch Dedup Runner ───────────────────────────────────────────────────
+
+
+class BatchDedupRunner:
+    """Batch dedup orchestrator for existing Pass 0b atomic controls."""
+
+    def __init__(self, db, collection: str = DEDUP_COLLECTION):
+        self.db = db
+        self.collection = collection
+        self.stats = {
+            "total_controls": 0,
+            "unique_hints": 0,
+            "phase1_groups_processed": 0,
+            "masters": 0,
+            "linked": 0,
+            "review": 0,
+            "new_controls": 0,
+            "parent_links_transferred": 0,
+            "cross_group_linked": 0,
+            "cross_group_review": 0,
+            "errors": 0,
+            "skipped_title_identical": 0,
+        }
+        self._progress_phase = ""
+        self._progress_count = 0
+        self._progress_total = 0
+
+    async def run(
+        self,
+        dry_run: bool = False,
+        hint_filter: str = None,
+    ) -> dict:
+        """Run the full batch dedup pipeline.
+
+        Args:
+            dry_run: If True, compute stats but don't modify DB/Qdrant.
+            hint_filter: If set, only process groups matching this hint prefix.
+
+        Returns:
+            Stats dict with counts.
+        """
+        start = time.monotonic()
+        logger.info("BatchDedup starting (dry_run=%s, hint_filter=%s)",
+                     dry_run, hint_filter)
+
+        if not dry_run:
+            await ensure_qdrant_collection(collection=self.collection)
+
+        # Phase 1: Intra-group dedup (same merge_group_hint)
+        self._progress_phase = "phase1"
+        groups = self._load_merge_groups(hint_filter)
+        self._progress_total = self.stats["total_controls"]
+
+        for hint, controls in groups:
+            try:
+                await self._process_hint_group(hint, controls, dry_run)
+                self.stats["phase1_groups_processed"] += 1
+            except Exception as e:
+                logger.error("BatchDedup Phase 1 error on hint %s: %s", hint, e)
+                self.stats["errors"] += 1
+                try:
+                    self.db.rollback()
+                except Exception:
+                    pass
+
+        logger.info(
+            "BatchDedup Phase 1 done: %d masters, %d linked, %d review",
+            self.stats["masters"], self.stats["linked"], self.stats["review"],
+        )
+
+        # Phase 2: Cross-group dedup via embeddings
+        if not dry_run:
+            self._progress_phase = "phase2"
+            await self._run_cross_group_pass()
+
+        elapsed = time.monotonic() - start
+        self.stats["elapsed_seconds"] = round(elapsed, 1)
+        logger.info("BatchDedup completed in %.1fs: %s", elapsed, self.stats)
+        return self.stats
+
+    def _load_merge_groups(self, hint_filter: str = None) -> list:
+        """Load all Pass 0b controls grouped by merge_group_hint, largest first."""
+        conditions = [
+            "decomposition_method = 'pass0b'",
+            "release_state != 'deprecated'",
+            "release_state != 'duplicate'",
+        ]
+        params = {}
+
+        if hint_filter:
+            conditions.append("generation_metadata->>'merge_group_hint' LIKE :hf")
+            params["hf"] = f"{hint_filter}%"
+
+        where = " AND ".join(conditions)
+        rows = self.db.execute(text(f"""
+            SELECT id::text, control_id, title, objective,
+                   pattern_id, requirements::text, test_procedure::text,
+                   evidence::text, release_state,
+                   generation_metadata->>'merge_group_hint' as merge_group_hint,
+                   generation_metadata->>'action_object_class' as action_object_class
+            FROM canonical_controls
+            WHERE {where}
+            ORDER BY control_id
+        """), params).fetchall()
+
+        by_hint = defaultdict(list)
+        for r in rows:
+            by_hint[r[9] or ""].append({
+                "uuid": r[0],
+                "control_id": r[1],
+                "title": r[2],
+                "objective": r[3],
+                "pattern_id": r[4],
+                "requirements": r[5],
+                "test_procedure": r[6],
+                "evidence": r[7],
+                "release_state": r[8],
+                "merge_group_hint": r[9] or "",
+                "action_object_class": r[10] or "",
+            })
+
+        self.stats["total_controls"] = len(rows)
+        self.stats["unique_hints"] = len(by_hint)
+
+        sorted_groups = sorted(by_hint.items(), key=lambda x: len(x[1]), reverse=True)
+        logger.info("BatchDedup loaded %d controls in %d hint groups",
+                     len(rows), len(sorted_groups))
+        return sorted_groups
+
+    def _sub_group_by_merge_hint(self, controls: list) -> dict:
+        """Group controls by merge_group_hint composite key."""
+        groups = defaultdict(list)
+        for c in controls:
+            hint = c["merge_group_hint"]
+            if hint:
+                groups[hint].append(c)
+            else:
+                groups[f"__no_hint_{c['uuid']}"].append(c)
+        return dict(groups)
+
+    async def _process_hint_group(
+        self,
+        hint: str,
+        controls: list,
+        dry_run: bool,
+    ):
+        """Process all controls sharing the same merge_group_hint.
+
+        Within a hint group, all controls share action+object+trigger.
+        The best-quality control becomes master, rest are linked as duplicates.
+        """
+        if len(controls) < 2:
+            # Singleton → always master
+            self.stats["masters"] += 1
+            if not dry_run:
+                await self._embed_and_index(controls[0])
+            self._progress_count += 1
+            self._log_progress(hint)
+            return
+
+        # Sort by quality score (best first)
+        sorted_group = sorted(controls, key=quality_score, reverse=True)
+        master = sorted_group[0]
+        self.stats["masters"] += 1
+
+        if not dry_run:
+            await self._embed_and_index(master)
+
+        for candidate in sorted_group[1:]:
+            # All share the same hint → check title similarity
+            if candidate["title"].strip().lower() == master["title"].strip().lower():
+                # Identical title → direct link (no embedding needed)
+                self.stats["linked"] += 1
+                self.stats["skipped_title_identical"] += 1
+                if not dry_run:
+                    await self._mark_duplicate(master, candidate, confidence=1.0)
+            else:
+                # Different title within same hint → still likely duplicate
+                # Use embedding to verify
+                await self._check_and_link_within_group(master, candidate, dry_run)
+
+            self._progress_count += 1
+            self._log_progress(hint)
+
+    async def _check_and_link_within_group(
+        self,
+        master: dict,
+        candidate: dict,
+        dry_run: bool,
+    ):
+        """Check if candidate (same hint group) is duplicate of master via embedding."""
+        parts = candidate["merge_group_hint"].split(":", 2)
+        action = parts[0] if len(parts) > 0 else ""
+        obj = parts[1] if len(parts) > 1 else ""
+
+        canonical = canonicalize_text(action, obj, candidate["title"])
+        embedding = await get_embedding(canonical)
+
+        if not embedding:
+            # Can't embed → link anyway (same hint = same action+object)
+            self.stats["linked"] += 1
+            if not dry_run:
+                await self._mark_duplicate(master, candidate, confidence=0.90)
+            return
+
+        # Search the dedup collection (unfiltered — pattern_id is NULL)
+        results = await qdrant_search_cross_regulation(
+            embedding, top_k=3, collection=self.collection,
+        )
+
+        if not results:
+            # No Qdrant matches yet (master might not be indexed yet) → link to master
+            self.stats["linked"] += 1
+            if not dry_run:
+                await self._mark_duplicate(master, candidate, confidence=0.90)
+            return
+
+        best = results[0]
+        best_score = best.get("score", 0.0)
+        best_payload = best.get("payload", {})
+        best_uuid = best_payload.get("control_uuid", "")
+
+        if best_score > LINK_THRESHOLD:
+            self.stats["linked"] += 1
+            if not dry_run:
+                await self._mark_duplicate_to(best_uuid, candidate, confidence=best_score)
+        elif best_score > REVIEW_THRESHOLD:
+            self.stats["review"] += 1
+            if not dry_run:
+                self._write_review(candidate, best_payload, best_score)
+        else:
+            # Very different despite same hint → new master
+            self.stats["new_controls"] += 1
+            if not dry_run:
+                await self._index_with_embedding(candidate, embedding)
+
+    async def _run_cross_group_pass(self):
+        """Phase 2: Find cross-group duplicates among surviving masters.
+
+        After Phase 1, ~52k masters remain. Many have similar semantics
+        despite different merge_group_hints (e.g. different German spellings).
+        This pass embeds all masters and finds near-duplicates via Qdrant.
+        """
+        logger.info("BatchDedup Phase 2: Cross-group pass starting...")
+
+        rows = self.db.execute(text("""
+            SELECT id::text, control_id, title,
+                   generation_metadata->>'merge_group_hint' as merge_group_hint
+            FROM canonical_controls
+            WHERE decomposition_method = 'pass0b'
+              AND release_state != 'duplicate'
+              AND release_state != 'deprecated'
+            ORDER BY control_id
+        """)).fetchall()
+
+        self._progress_total = len(rows)
+        self._progress_count = 0
+        logger.info("BatchDedup Cross-group: %d masters to check", len(rows))
+        cross_linked = 0
+        cross_review = 0
+
+        for i, r in enumerate(rows):
+            uuid = r[0]
+            hint = r[3] or ""
+            parts = hint.split(":", 2)
+            action = parts[0] if len(parts) > 0 else ""
+            obj = parts[1] if len(parts) > 1 else ""
+
+            canonical = canonicalize_text(action, obj, r[2])
+            embedding = await get_embedding(canonical)
+            if not embedding:
+                continue
+
+            results = await qdrant_search_cross_regulation(
+                embedding, top_k=5, collection=self.collection,
+            )
+            if not results:
+                continue
+
+            # Find best match from a DIFFERENT hint group
+            for match in results:
+                match_score = match.get("score", 0.0)
+                match_payload = match.get("payload", {})
+                match_uuid = match_payload.get("control_uuid", "")
+
+                # Skip self-match
+                if match_uuid == uuid:
+                    continue
+
+                # Must be a different hint group (otherwise already handled in Phase 1)
+                match_action = match_payload.get("action_normalized", "")
+                match_object = match_payload.get("object_normalized", "")
+                # Simple check: different control UUID is enough
+                if match_score > LINK_THRESHOLD:
+                    # Mark the worse one as duplicate
+                    try:
+                        self.db.execute(text("""
+                            UPDATE canonical_controls
+                            SET release_state = 'duplicate', merged_into_uuid = CAST(:master AS uuid)
+                            WHERE id = CAST(:dup AS uuid)
+                              AND release_state != 'duplicate'
+                        """), {"master": match_uuid, "dup": uuid})
+
+                        self.db.execute(text("""
+                            INSERT INTO control_parent_links
+                                (control_uuid, parent_control_uuid, link_type, confidence)
+                            VALUES (CAST(:cu AS uuid), CAST(:pu AS uuid), 'cross_regulation', :conf)
+                            ON CONFLICT (control_uuid, parent_control_uuid) DO NOTHING
+                        """), {"cu": match_uuid, "pu": uuid, "conf": match_score})
+
+                        # Transfer parent links
+                        transferred = self._transfer_parent_links(match_uuid, uuid)
+                        self.stats["parent_links_transferred"] += transferred
+
+                        self.db.commit()
+                        cross_linked += 1
+                    except Exception as e:
+                        logger.error("BatchDedup cross-group link error %s→%s: %s",
+                                     uuid, match_uuid, e)
+                        self.db.rollback()
+                        self.stats["errors"] += 1
+                    break  # Only one cross-link per control
+                elif match_score > REVIEW_THRESHOLD:
+                    self._write_review(
+                        {"control_id": r[1], "title": r[2], "objective": "",
+                         "merge_group_hint": hint, "pattern_id": None},
+                        match_payload, match_score,
+                    )
+                    cross_review += 1
+                    break
+
+            self._progress_count = i + 1
+            if (i + 1) % 500 == 0:
+                logger.info("BatchDedup Cross-group: %d/%d checked, %d linked, %d review",
+                            i + 1, len(rows), cross_linked, cross_review)
+
+        self.stats["cross_group_linked"] = cross_linked
+        self.stats["cross_group_review"] = cross_review
+        logger.info("BatchDedup Cross-group complete: %d linked, %d review",
+                     cross_linked, cross_review)
+
+    # ── Qdrant Helpers ───────────────────────────────────────────────────
+
+    async def _embed_and_index(self, control: dict):
+        """Compute embedding and index a control in the dedup Qdrant collection."""
+        parts = control["merge_group_hint"].split(":", 2)
+        action = parts[0] if len(parts) > 0 else ""
+        obj = parts[1] if len(parts) > 1 else ""
+
+        norm_action = normalize_action(action)
+        norm_object = normalize_object(obj)
+        canonical = canonicalize_text(action, obj, control["title"])
+        embedding = await get_embedding(canonical)
+
+        if not embedding:
+            return
+
+        await qdrant_upsert(
+            point_id=control["uuid"],
+            embedding=embedding,
+            payload={
+                "control_uuid": control["uuid"],
+                "control_id": control["control_id"],
+                "title": control["title"],
+                "pattern_id": control.get("pattern_id"),
+                "action_normalized": norm_action,
+                "object_normalized": norm_object,
+                "canonical_text": canonical,
+                "merge_group_hint": control["merge_group_hint"],
+            },
+            collection=self.collection,
+        )
+
+    async def _index_with_embedding(self, control: dict, embedding: list):
+        """Index a control with a pre-computed embedding."""
+        parts = control["merge_group_hint"].split(":", 2)
+        action = parts[0] if len(parts) > 0 else ""
+        obj = parts[1] if len(parts) > 1 else ""
+
+        norm_action = normalize_action(action)
+        norm_object = normalize_object(obj)
+        canonical = canonicalize_text(action, obj, control["title"])
+
+        await qdrant_upsert(
+            point_id=control["uuid"],
+            embedding=embedding,
+            payload={
+                "control_uuid": control["uuid"],
+                "control_id": control["control_id"],
+                "title": control["title"],
+                "pattern_id": control.get("pattern_id"),
+                "action_normalized": norm_action,
+                "object_normalized": norm_object,
+                "canonical_text": canonical,
+                "merge_group_hint": control["merge_group_hint"],
+            },
+            collection=self.collection,
+        )
+
+    # ── DB Write Helpers ─────────────────────────────────────────────────
+
+    async def _mark_duplicate(self, master: dict, candidate: dict, confidence: float):
+        """Mark candidate as duplicate of master, transfer parent links."""
+        try:
+            self.db.execute(text("""
+                UPDATE canonical_controls
+                SET release_state = 'duplicate', merged_into_uuid = CAST(:master AS uuid)
+                WHERE id = CAST(:cand AS uuid)
+            """), {"master": master["uuid"], "cand": candidate["uuid"]})
+
+            self.db.execute(text("""
+                INSERT INTO control_parent_links
+                    (control_uuid, parent_control_uuid, link_type, confidence)
+                VALUES (CAST(:master AS uuid), CAST(:cand_parent AS uuid), 'dedup_merge', :conf)
+                ON CONFLICT (control_uuid, parent_control_uuid) DO NOTHING
+            """), {"master": master["uuid"], "cand_parent": candidate["uuid"], "conf": confidence})
+
+            transferred = self._transfer_parent_links(master["uuid"], candidate["uuid"])
+            self.stats["parent_links_transferred"] += transferred
+
+            self.db.commit()
+        except Exception as e:
+            logger.error("BatchDedup _mark_duplicate error %s→%s: %s",
+                         candidate["uuid"], master["uuid"], e)
+            self.db.rollback()
+            raise
+
+    async def _mark_duplicate_to(self, master_uuid: str, candidate: dict, confidence: float):
+        """Mark candidate as duplicate of a Qdrant-matched master."""
+        try:
+            self.db.execute(text("""
+                UPDATE canonical_controls
+                SET release_state = 'duplicate', merged_into_uuid = CAST(:master AS uuid)
+                WHERE id = CAST(:cand AS uuid)
+            """), {"master": master_uuid, "cand": candidate["uuid"]})
+
+            self.db.execute(text("""
+                INSERT INTO control_parent_links
+                    (control_uuid, parent_control_uuid, link_type, confidence)
+                VALUES (CAST(:master AS uuid), CAST(:cand_parent AS uuid), 'dedup_merge', :conf)
+                ON CONFLICT (control_uuid, parent_control_uuid) DO NOTHING
+            """), {"master": master_uuid, "cand_parent": candidate["uuid"], "conf": confidence})
+
+            transferred = self._transfer_parent_links(master_uuid, candidate["uuid"])
+            self.stats["parent_links_transferred"] += transferred
+
+            self.db.commit()
+        except Exception as e:
+            logger.error("BatchDedup _mark_duplicate_to error %s→%s: %s",
+                         candidate["uuid"], master_uuid, e)
+            self.db.rollback()
+            raise
+
+    def _transfer_parent_links(self, master_uuid: str, duplicate_uuid: str) -> int:
+        """Move existing parent links from duplicate to master."""
+        rows = self.db.execute(text("""
+            SELECT parent_control_uuid::text, link_type, confidence,
+                   source_regulation, source_article, obligation_candidate_id::text
+            FROM control_parent_links
+            WHERE control_uuid = CAST(:dup AS uuid)
+              AND link_type = 'decomposition'
+        """), {"dup": duplicate_uuid}).fetchall()
+
+        transferred = 0
+        for r in rows:
+            parent_uuid = r[0]
+            if parent_uuid == master_uuid:
+                continue
+            self.db.execute(text("""
+                INSERT INTO control_parent_links
+                    (control_uuid, parent_control_uuid, link_type, confidence,
+                     source_regulation, source_article, obligation_candidate_id)
+                VALUES (CAST(:cu AS uuid), CAST(:pu AS uuid), :lt, :conf,
+                        :sr, :sa, CAST(:oci AS uuid))
+                ON CONFLICT (control_uuid, parent_control_uuid) DO NOTHING
+            """), {
+                "cu": master_uuid,
+                "pu": parent_uuid,
+                "lt": r[1],
+                "conf": float(r[2]) if r[2] else 1.0,
+                "sr": r[3],
+                "sa": r[4],
+                "oci": r[5],
+            })
+            transferred += 1
+
+        return transferred
+
+    def _write_review(self, candidate: dict, matched_payload: dict, score: float):
+        """Write a dedup review entry for borderline matches."""
+        try:
+            self.db.execute(text("""
+                INSERT INTO control_dedup_reviews
+                    (candidate_control_id, candidate_title, candidate_objective,
+                     matched_control_uuid, matched_control_id,
+                     similarity_score, dedup_stage, dedup_details)
+                VALUES (:ccid, :ct, :co, CAST(:mcu AS uuid), :mci,
+                        :ss, 'batch_dedup', CAST(:dd AS jsonb))
+            """), {
+                "ccid": candidate["control_id"],
+                "ct": candidate["title"],
+                "co": candidate.get("objective", ""),
+                "mcu": matched_payload.get("control_uuid"),
+                "mci": matched_payload.get("control_id"),
+                "ss": score,
+                "dd": json.dumps({
+                    "merge_group_hint": candidate.get("merge_group_hint", ""),
+                    "pattern_id": candidate.get("pattern_id"),
+                }),
+            })
+            self.db.commit()
+        except Exception as e:
+            logger.error("BatchDedup _write_review error: %s", e)
+            self.db.rollback()
+            raise
+
+    # ── Progress ─────────────────────────────────────────────────────────
+
+    def _log_progress(self, hint: str):
+        """Log progress every 500 controls."""
+        if self._progress_count > 0 and self._progress_count % 500 == 0:
+            logger.info(
+                "BatchDedup [%s] %d/%d — masters=%d, linked=%d, review=%d",
+                self._progress_phase, self._progress_count, self._progress_total,
+                self.stats["masters"], self.stats["linked"], self.stats["review"],
+            )
+
+    def get_status(self) -> dict:
+        """Return current progress stats (for status endpoint)."""
+        return {
+            "phase": self._progress_phase,
+            "progress": self._progress_count,
+            "total": self._progress_total,
+            **self.stats,
+        }
--- a/backend-compliance/compliance/services/citation_backfill.py
+++ b/backend-compliance/compliance/services/citation_backfill.py
@@ -0,0 +1,438 @@
+"""
+Citation Backfill Service — enrich existing controls with article/paragraph provenance.
+
+3-tier matching strategy:
+  Tier 1 — Hash match:  sha256(source_original_text) → RAG chunk lookup
+  Tier 2 — Regex parse: split concatenated "DSGVO Art. 35" → regulation + article
+  Tier 3 — Ollama LLM:  ask local LLM to identify article/paragraph from text
+"""
+
+import hashlib
+import json
+import logging
+import os
+import re
+from dataclasses import dataclass, field
+from datetime import datetime, timezone
+from typing import Optional
+
+import httpx
+from sqlalchemy import text
+from sqlalchemy.orm import Session
+
+from .rag_client import ComplianceRAGClient, RAGSearchResult
+
+logger = logging.getLogger(__name__)
+
+OLLAMA_URL = os.getenv("OLLAMA_URL", "http://host.docker.internal:11434")
+OLLAMA_MODEL = os.getenv("CONTROL_GEN_OLLAMA_MODEL", "qwen3.5:35b-a3b")
+LLM_TIMEOUT = float(os.getenv("CONTROL_GEN_LLM_TIMEOUT", "180"))
+
+ALL_COLLECTIONS = [
+    "bp_compliance_ce",
+    "bp_compliance_gesetze",
+    "bp_compliance_datenschutz",
+    "bp_dsfa_corpus",
+    "bp_legal_templates",
+]
+
+BACKFILL_SYSTEM_PROMPT = (
+    "Du bist ein Rechtsexperte. Deine Aufgabe ist es, aus einem Gesetzestext "
+    "den genauen Artikel und Absatz zu bestimmen. Antworte NUR mit validem JSON."
+)
+
+# Regex to split concatenated source like "DSGVO Art. 35" or "NIS2 Artikel 21 Abs. 2"
+_SOURCE_ARTICLE_RE = re.compile(
+    r"^(.+?)\s+(Art(?:ikel)?\.?\s*\d+.*)$", re.IGNORECASE
+)
+
+
+@dataclass
+class MatchResult:
+    article: str
+    paragraph: str
+    method: str  # "hash", "regex", "llm"
+
+
+@dataclass
+class BackfillResult:
+    total_controls: int = 0
+    matched_hash: int = 0
+    matched_regex: int = 0
+    matched_llm: int = 0
+    unmatched: int = 0
+    updated: int = 0
+    errors: list = field(default_factory=list)
+
+
+class CitationBackfill:
+    """Backfill article/paragraph into existing control source_citations."""
+
+    def __init__(self, db: Session, rag_client: ComplianceRAGClient):
+        self.db = db
+        self.rag = rag_client
+        self._rag_index: dict[str, RAGSearchResult] = {}
+
+    async def run(self, dry_run: bool = True, limit: int = 0) -> BackfillResult:
+        """Main entry: iterate controls missing article/paragraph, match to RAG, update."""
+        result = BackfillResult()
+
+        # Load controls needing backfill
+        controls = self._load_controls_needing_backfill(limit)
+        result.total_controls = len(controls)
+        logger.info("Backfill: %d controls need article/paragraph enrichment", len(controls))
+
+        if not controls:
+            return result
+
+        # Collect hashes we need to find — only build index for controls with source text
+        needed_hashes: set[str] = set()
+        for ctrl in controls:
+            src = ctrl.get("source_original_text")
+            if src:
+                needed_hashes.add(hashlib.sha256(src.encode()).hexdigest())
+
+        if needed_hashes:
+            # Build targeted RAG index — only scroll collections that our controls reference
+            logger.info("Building targeted RAG hash index for %d source texts...", len(needed_hashes))
+            await self._build_rag_index_targeted(controls)
+            logger.info("RAG index built: %d chunks indexed, %d hashes needed", len(self._rag_index), len(needed_hashes))
+        else:
+            logger.info("No source_original_text found — skipping RAG index build")
+
+        # Process each control
+        for i, ctrl in enumerate(controls):
+            if i > 0 and i % 100 == 0:
+                logger.info("Backfill progress: %d/%d processed", i, result.total_controls)
+
+            try:
+                match = await self._match_control(ctrl)
+                if match:
+                    if match.method == "hash":
+                        result.matched_hash += 1
+                    elif match.method == "regex":
+                        result.matched_regex += 1
+                    elif match.method == "llm":
+                        result.matched_llm += 1
+
+                    if not dry_run:
+                        self._update_control(ctrl, match)
+                        result.updated += 1
+                    else:
+                        logger.debug(
+                            "DRY RUN: Would update %s with article=%s paragraph=%s (method=%s)",
+                            ctrl["control_id"], match.article, match.paragraph, match.method,
+                        )
+                else:
+                    result.unmatched += 1
+
+            except Exception as e:
+                error_msg = f"Error backfilling {ctrl.get('control_id', '?')}: {e}"
+                logger.error(error_msg)
+                result.errors.append(error_msg)
+
+        if not dry_run:
+            try:
+                self.db.commit()
+            except Exception as e:
+                logger.error("Backfill commit failed: %s", e)
+                result.errors.append(f"Commit failed: {e}")
+
+        logger.info(
+            "Backfill complete: %d total, hash=%d regex=%d llm=%d unmatched=%d updated=%d",
+            result.total_controls, result.matched_hash, result.matched_regex,
+            result.matched_llm, result.unmatched, result.updated,
+        )
+        return result
+
+    def _load_controls_needing_backfill(self, limit: int = 0) -> list[dict]:
+        """Load controls where source_citation exists but lacks separate 'article' key."""
+        query = """
+            SELECT id, control_id, source_citation, source_original_text,
+                   generation_metadata, license_rule
+            FROM canonical_controls
+            WHERE license_rule IN (1, 2)
+              AND source_citation IS NOT NULL
+              AND (
+                  source_citation->>'article' IS NULL
+                  OR source_citation->>'article' = ''
+              )
+            ORDER BY control_id
+        """
+        if limit > 0:
+            query += f" LIMIT {limit}"
+
+        result = self.db.execute(text(query))
+        cols = result.keys()
+        controls = []
+        for row in result:
+            ctrl = dict(zip(cols, row))
+            ctrl["id"] = str(ctrl["id"])
+            # Parse JSON fields
+            for jf in ("source_citation", "generation_metadata"):
+                if isinstance(ctrl.get(jf), str):
+                    try:
+                        ctrl[jf] = json.loads(ctrl[jf])
+                    except (json.JSONDecodeError, TypeError):
+                        ctrl[jf] = {}
+            controls.append(ctrl)
+        return controls
+
+    async def _build_rag_index_targeted(self, controls: list[dict]):
+        """Build RAG index by scrolling only collections relevant to our controls.
+
+        Uses regulation codes from generation_metadata to identify which collections
+        to search, falling back to all collections only if needed.
+        """
+        # Determine which collections are relevant based on regulation codes
+        regulation_to_collection = self._map_regulations_to_collections(controls)
+        collections_to_search = set(regulation_to_collection.values()) or set(ALL_COLLECTIONS)
+
+        logger.info("Targeted index: searching %d collections: %s",
+                     len(collections_to_search), ", ".join(collections_to_search))
+
+        for collection in collections_to_search:
+            offset = None
+            page = 0
+            seen_offsets: set[str] = set()
+            while True:
+                chunks, next_offset = await self.rag.scroll(
+                    collection=collection, offset=offset, limit=200,
+                )
+                if not chunks:
+                    break
+                for chunk in chunks:
+                    if chunk.text and len(chunk.text.strip()) >= 50:
+                        h = hashlib.sha256(chunk.text.encode()).hexdigest()
+                        self._rag_index[h] = chunk
+                page += 1
+                if page % 50 == 0:
+                    logger.info("Indexing %s: page %d (%d chunks so far)",
+                                collection, page, len(self._rag_index))
+                if not next_offset:
+                    break
+                if next_offset in seen_offsets:
+                    logger.warning("Scroll loop in %s at page %d — stopping", collection, page)
+                    break
+                seen_offsets.add(next_offset)
+                offset = next_offset
+
+            logger.info("Indexed collection %s: %d pages", collection, page)
+
+    def _map_regulations_to_collections(self, controls: list[dict]) -> dict[str, str]:
+        """Map regulation codes from controls to likely Qdrant collections."""
+        # Heuristic: regulation code prefix → collection
+        collection_map = {
+            "eu_": "bp_compliance_gesetze",
+            "dsgvo": "bp_compliance_datenschutz",
+            "bdsg": "bp_compliance_gesetze",
+            "ttdsg": "bp_compliance_gesetze",
+            "nist_": "bp_compliance_ce",
+            "owasp": "bp_compliance_ce",
+            "bsi_": "bp_compliance_ce",
+            "enisa": "bp_compliance_ce",
+            "at_": "bp_compliance_recht",
+            "fr_": "bp_compliance_recht",
+            "es_": "bp_compliance_recht",
+        }
+        result: dict[str, str] = {}
+        for ctrl in controls:
+            meta = ctrl.get("generation_metadata") or {}
+            reg = meta.get("source_regulation", "")
+            if not reg:
+                continue
+            for prefix, coll in collection_map.items():
+                if reg.startswith(prefix):
+                    result[reg] = coll
+                    break
+            else:
+                # Unknown regulation — search all
+                for coll in ALL_COLLECTIONS:
+                    result[f"_all_{coll}"] = coll
+        return result
+
+    async def _match_control(self, ctrl: dict) -> Optional[MatchResult]:
+        """3-tier matching: hash → regex → LLM."""
+
+        # Tier 1: Hash match against RAG index
+        source_text = ctrl.get("source_original_text")
+        if source_text:
+            h = hashlib.sha256(source_text.encode()).hexdigest()
+            chunk = self._rag_index.get(h)
+            if chunk and (chunk.article or chunk.paragraph):
+                return MatchResult(
+                    article=chunk.article or "",
+                    paragraph=chunk.paragraph or "",
+                    method="hash",
+                )
+
+        # Tier 2: Regex parse concatenated source
+        citation = ctrl.get("source_citation") or {}
+        source_str = citation.get("source", "")
+        parsed = _parse_concatenated_source(source_str)
+        if parsed and parsed["article"]:
+            return MatchResult(
+                article=parsed["article"],
+                paragraph="",  # Regex can't extract paragraph from concatenated format
+                method="regex",
+            )
+
+        # Tier 3: Ollama LLM
+        if source_text:
+            return await self._llm_match(ctrl)
+
+        return None
+
+    async def _llm_match(self, ctrl: dict) -> Optional[MatchResult]:
+        """Use Ollama to identify article/paragraph from source text."""
+        citation = ctrl.get("source_citation") or {}
+        regulation_name = citation.get("source", "")
+        metadata = ctrl.get("generation_metadata") or {}
+        regulation_code = metadata.get("source_regulation", "")
+        source_text = ctrl.get("source_original_text", "")
+
+        prompt = f"""Analysiere den folgenden Gesetzestext und bestimme den genauen Artikel und Absatz.
+
+Gesetz: {regulation_name} (Code: {regulation_code})
+
+Text:
+---
+{source_text[:2000]}
+---
+
+Antworte NUR mit JSON:
+{{"article": "Art. XX", "paragraph": "Abs. Y"}}
+
+Falls kein spezifischer Absatz erkennbar ist, setze paragraph auf "".
+Falls kein Artikel erkennbar ist, setze article auf "".
+Bei deutschen Gesetzen mit § verwende: "§ XX" statt "Art. XX"."""
+
+        try:
+            raw = await _llm_ollama(prompt, BACKFILL_SYSTEM_PROMPT)
+            data = _parse_json(raw)
+            if data and (data.get("article") or data.get("paragraph")):
+                return MatchResult(
+                    article=data.get("article", ""),
+                    paragraph=data.get("paragraph", ""),
+                    method="llm",
+                )
+        except Exception as e:
+            logger.warning("LLM match failed for %s: %s", ctrl.get("control_id"), e)
+
+        return None
+
+    def _update_control(self, ctrl: dict, match: MatchResult):
+        """Update source_citation and generation_metadata in DB."""
+        citation = ctrl.get("source_citation") or {}
+
+        # Clean the source name: remove concatenated article if present
+        source_str = citation.get("source", "")
+        parsed = _parse_concatenated_source(source_str)
+        if parsed:
+            citation["source"] = parsed["name"]
+
+        # Add separate article/paragraph fields
+        citation["article"] = match.article
+        citation["paragraph"] = match.paragraph
+
+        # Update generation_metadata
+        metadata = ctrl.get("generation_metadata") or {}
+        if match.article:
+            metadata["source_article"] = match.article
+        metadata["source_paragraph"] = match.paragraph
+        metadata["backfill_method"] = match.method
+        metadata["backfill_at"] = datetime.now(timezone.utc).isoformat()
+
+        self.db.execute(
+            text("""
+                UPDATE canonical_controls
+                SET source_citation = :citation,
+                    generation_metadata = :metadata,
+                    updated_at = NOW()
+                WHERE id = CAST(:id AS uuid)
+            """),
+            {
+                "id": ctrl["id"],
+                "citation": json.dumps(citation),
+                "metadata": json.dumps(metadata),
+            },
+        )
+
+
+def _parse_concatenated_source(source: str) -> Optional[dict]:
+    """Parse 'DSGVO Art. 35' → {name: 'DSGVO', article: 'Art. 35'}.
+
+    Also handles '§' format: 'BDSG § 42' → {name: 'BDSG', article: '§ 42'}.
+    """
+    if not source:
+        return None
+
+    # Try Art./Artikel pattern
+    m = _SOURCE_ARTICLE_RE.match(source)
+    if m:
+        return {"name": m.group(1).strip(), "article": m.group(2).strip()}
+
+    # Try § pattern
+    m2 = re.match(r"^(.+?)\s+(§\s*\d+.*)$", source)
+    if m2:
+        return {"name": m2.group(1).strip(), "article": m2.group(2).strip()}
+
+    return None
+
+
+async def _llm_ollama(prompt: str, system_prompt: Optional[str] = None) -> str:
+    """Call Ollama chat API for backfill matching."""
+    messages = []
+    if system_prompt:
+        messages.append({"role": "system", "content": system_prompt})
+    messages.append({"role": "user", "content": prompt})
+
+    payload = {
+        "model": OLLAMA_MODEL,
+        "messages": messages,
+        "stream": False,
+        "format": "json",
+        "options": {"num_predict": 256},
+        "think": False,
+    }
+
+    try:
+        async with httpx.AsyncClient(timeout=LLM_TIMEOUT) as client:
+            resp = await client.post(f"{OLLAMA_URL}/api/chat", json=payload)
+            if resp.status_code != 200:
+                logger.error("Ollama backfill failed %d: %s", resp.status_code, resp.text[:300])
+                return ""
+            data = resp.json()
+            msg = data.get("message", {})
+            if isinstance(msg, dict):
+                return msg.get("content", "")
+            return data.get("response", str(msg))
+    except Exception as e:
+        logger.error("Ollama backfill request failed: %s", e)
+        return ""
+
+
+def _parse_json(raw: str) -> Optional[dict]:
+    """Extract JSON object from LLM output."""
+    if not raw:
+        return None
+    # Try direct parse
+    try:
+        return json.loads(raw)
+    except json.JSONDecodeError:
+        pass
+    # Try extracting from markdown code block
+    m = re.search(r"```(?:json)?\s*(\{.*?\})\s*```", raw, re.DOTALL)
+    if m:
+        try:
+            return json.loads(m.group(1))
+        except json.JSONDecodeError:
+            pass
+    # Try finding first { ... }
+    m = re.search(r"\{[^{}]*\}", raw)
+    if m:
+        try:
+            return json.loads(m.group(0))
+        except json.JSONDecodeError:
+            pass
+    return None
--- a/backend-compliance/compliance/services/control_composer.py
+++ b/backend-compliance/compliance/services/control_composer.py
@@ -0,0 +1,546 @@
+"""Control Composer — Pattern + Obligation → Master Control.
+
+Takes an obligation (from ObligationExtractor) and a matched control pattern
+(from PatternMatcher), then uses LLM to compose a structured, actionable
+Master Control. Replaces the old Stage 3 (STRUCTURE/REFORM) with a
+pattern-guided approach.
+
+Three composition modes based on license rules:
+    Rule 1: Obligation + Pattern + original text → full control
+    Rule 2: Obligation + Pattern + original text + citation → control
+    Rule 3: Obligation + Pattern (NO original text) → reformulated control
+
+Fallback: No pattern match → basic generation (tagged needs_pattern_assignment)
+
+Part of the Multi-Layer Control Architecture (Phase 6 of 8).
+"""
+
+import json
+import logging
+import os
+from dataclasses import dataclass, field
+from typing import Optional
+
+from compliance.services.obligation_extractor import (
+    ObligationMatch,
+    _llm_ollama,
+    _parse_json,
+)
+from compliance.services.pattern_matcher import (
+    ControlPattern,
+    PatternMatchResult,
+)
+
+logger = logging.getLogger(__name__)
+
+OLLAMA_MODEL = os.getenv("CONTROL_GEN_OLLAMA_MODEL", "qwen3.5:35b-a3b")
+
+# Valid values for generated control fields
+VALID_SEVERITIES = {"low", "medium", "high", "critical"}
+VALID_EFFORTS = {"s", "m", "l", "xl"}
+VALID_VERIFICATION = {"code_review", "document", "tool", "hybrid"}
+
+
+@dataclass
+class ComposedControl:
+    """A Master Control composed from an obligation + pattern."""
+
+    # Core fields (match canonical_controls schema)
+    control_id: str = ""
+    title: str = ""
+    objective: str = ""
+    rationale: str = ""
+    scope: dict = field(default_factory=dict)
+    requirements: list = field(default_factory=list)
+    test_procedure: list = field(default_factory=list)
+    evidence: list = field(default_factory=list)
+    severity: str = "medium"
+    risk_score: float = 5.0
+    implementation_effort: str = "m"
+    open_anchors: list = field(default_factory=list)
+    release_state: str = "draft"
+    tags: list = field(default_factory=list)
+    # 3-Rule License fields
+    license_rule: Optional[int] = None
+    source_original_text: Optional[str] = None
+    source_citation: Optional[dict] = None
+    customer_visible: bool = True
+    # Classification
+    verification_method: Optional[str] = None
+    category: Optional[str] = None
+    target_audience: Optional[list] = None
+    # Pattern + Obligation linkage
+    pattern_id: Optional[str] = None
+    obligation_ids: list = field(default_factory=list)
+    # Metadata
+    generation_metadata: dict = field(default_factory=dict)
+    composition_method: str = "pattern_guided"  # pattern_guided | fallback
+
+    def to_dict(self) -> dict:
+        """Serialize for DB storage or API response."""
+        return {
+            "control_id": self.control_id,
+            "title": self.title,
+            "objective": self.objective,
+            "rationale": self.rationale,
+            "scope": self.scope,
+            "requirements": self.requirements,
+            "test_procedure": self.test_procedure,
+            "evidence": self.evidence,
+            "severity": self.severity,
+            "risk_score": self.risk_score,
+            "implementation_effort": self.implementation_effort,
+            "open_anchors": self.open_anchors,
+            "release_state": self.release_state,
+            "tags": self.tags,
+            "license_rule": self.license_rule,
+            "source_original_text": self.source_original_text,
+            "source_citation": self.source_citation,
+            "customer_visible": self.customer_visible,
+            "verification_method": self.verification_method,
+            "category": self.category,
+            "target_audience": self.target_audience,
+            "pattern_id": self.pattern_id,
+            "obligation_ids": self.obligation_ids,
+            "generation_metadata": self.generation_metadata,
+            "composition_method": self.composition_method,
+        }
+
+
+class ControlComposer:
+    """Composes Master Controls from obligations + patterns.
+
+    Usage::
+
+        composer = ControlComposer()
+
+        control = await composer.compose(
+            obligation=obligation_match,
+            pattern_result=pattern_match_result,
+            chunk_text="...",
+            license_rule=1,
+            source_citation={...},
+        )
+    """
+
+    async def compose(
+        self,
+        obligation: ObligationMatch,
+        pattern_result: PatternMatchResult,
+        chunk_text: Optional[str] = None,
+        license_rule: int = 3,
+        source_citation: Optional[dict] = None,
+        regulation_code: Optional[str] = None,
+    ) -> ComposedControl:
+        """Compose a Master Control from obligation + pattern.
+
+        Args:
+            obligation: The extracted obligation (from ObligationExtractor).
+            pattern_result: The matched pattern (from PatternMatcher).
+            chunk_text: Original RAG chunk text (only used for Rules 1-2).
+            license_rule: 1=free, 2=citation, 3=restricted.
+            source_citation: Citation metadata for Rule 2.
+            regulation_code: Source regulation code.
+
+        Returns:
+            ComposedControl ready for storage.
+        """
+        pattern = pattern_result.pattern if pattern_result else None
+
+        if pattern:
+            control = await self._compose_with_pattern(
+                obligation, pattern, chunk_text, license_rule, source_citation,
+            )
+        else:
+            control = await self._compose_fallback(
+                obligation, chunk_text, license_rule, source_citation,
+            )
+
+        # Set linkage fields
+        control.pattern_id = pattern.id if pattern else None
+        if obligation.obligation_id:
+            control.obligation_ids = [obligation.obligation_id]
+
+        # Set license fields
+        control.license_rule = license_rule
+        if license_rule in (1, 2) and chunk_text:
+            control.source_original_text = chunk_text
+        if license_rule == 2 and source_citation:
+            control.source_citation = source_citation
+        if license_rule == 3:
+            control.customer_visible = False
+            control.source_original_text = None
+            control.source_citation = None
+
+        # Build metadata
+        control.generation_metadata = {
+            "composition_method": control.composition_method,
+            "pattern_id": control.pattern_id,
+            "pattern_confidence": round(pattern_result.confidence, 3) if pattern_result else 0,
+            "pattern_method": pattern_result.method if pattern_result else "none",
+            "obligation_id": obligation.obligation_id,
+            "obligation_method": obligation.method,
+            "obligation_confidence": round(obligation.confidence, 3),
+            "license_rule": license_rule,
+            "regulation_code": regulation_code,
+        }
+
+        # Validate and fix fields
+        _validate_control(control)
+
+        return control
+
+    async def compose_batch(
+        self,
+        items: list[dict],
+    ) -> list[ComposedControl]:
+        """Compose multiple controls.
+
+        Args:
+            items: List of dicts with keys: obligation, pattern_result,
+                   chunk_text, license_rule, source_citation, regulation_code.
+
+        Returns:
+            List of ComposedControl instances.
+        """
+        results = []
+        for item in items:
+            control = await self.compose(
+                obligation=item["obligation"],
+                pattern_result=item.get("pattern_result", PatternMatchResult()),
+                chunk_text=item.get("chunk_text"),
+                license_rule=item.get("license_rule", 3),
+                source_citation=item.get("source_citation"),
+                regulation_code=item.get("regulation_code"),
+            )
+            results.append(control)
+        return results
+
+    # -----------------------------------------------------------------------
+    # Pattern-guided composition
+    # -----------------------------------------------------------------------
+
+    async def _compose_with_pattern(
+        self,
+        obligation: ObligationMatch,
+        pattern: ControlPattern,
+        chunk_text: Optional[str],
+        license_rule: int,
+        source_citation: Optional[dict],
+    ) -> ComposedControl:
+        """Use LLM to fill the pattern template with obligation-specific details."""
+        prompt = _build_compose_prompt(obligation, pattern, chunk_text, license_rule)
+        system_prompt = _compose_system_prompt(license_rule)
+
+        llm_result = await _llm_ollama(prompt, system_prompt)
+        if not llm_result:
+            return self._compose_from_template(obligation, pattern)
+
+        parsed = _parse_json(llm_result)
+        if not parsed:
+            return self._compose_from_template(obligation, pattern)
+
+        control = ComposedControl(
+            title=parsed.get("title", pattern.name_de)[:255],
+            objective=parsed.get("objective", pattern.objective_template),
+            rationale=parsed.get("rationale", pattern.rationale_template),
+            requirements=_ensure_list(parsed.get("requirements", pattern.requirements_template)),
+            test_procedure=_ensure_list(parsed.get("test_procedure", pattern.test_procedure_template)),
+            evidence=_ensure_list(parsed.get("evidence", pattern.evidence_template)),
+            severity=parsed.get("severity", pattern.severity_default),
+            implementation_effort=parsed.get("implementation_effort", pattern.implementation_effort_default),
+            category=parsed.get("category", pattern.category),
+            tags=_ensure_list(parsed.get("tags", pattern.tags)),
+            target_audience=_ensure_list(parsed.get("target_audience", [])),
+            verification_method=parsed.get("verification_method"),
+            open_anchors=_anchors_from_pattern(pattern),
+            composition_method="pattern_guided",
+        )
+
+        return control
+
+    def _compose_from_template(
+        self,
+        obligation: ObligationMatch,
+        pattern: ControlPattern,
+    ) -> ComposedControl:
+        """Fallback: fill template directly without LLM (when LLM fails)."""
+        obl_title = obligation.obligation_title or ""
+        obl_text = obligation.obligation_text or ""
+
+        title = f"{pattern.name_de}"
+        if obl_title:
+            title = f"{pattern.name_de} — {obl_title}"
+
+        objective = pattern.objective_template
+        if obl_text and len(obl_text) > 20:
+            objective = f"{pattern.objective_template} Bezug: {obl_text[:200]}"
+
+        return ComposedControl(
+            title=title[:255],
+            objective=objective,
+            rationale=pattern.rationale_template,
+            requirements=list(pattern.requirements_template),
+            test_procedure=list(pattern.test_procedure_template),
+            evidence=list(pattern.evidence_template),
+            severity=pattern.severity_default,
+            implementation_effort=pattern.implementation_effort_default,
+            category=pattern.category,
+            tags=list(pattern.tags),
+            open_anchors=_anchors_from_pattern(pattern),
+            composition_method="template_only",
+        )
+
+    # -----------------------------------------------------------------------
+    # Fallback (no pattern)
+    # -----------------------------------------------------------------------
+
+    async def _compose_fallback(
+        self,
+        obligation: ObligationMatch,
+        chunk_text: Optional[str],
+        license_rule: int,
+        source_citation: Optional[dict],
+    ) -> ComposedControl:
+        """Generate a control without a pattern template (old-style)."""
+        prompt = _build_fallback_prompt(obligation, chunk_text, license_rule)
+        system_prompt = _compose_system_prompt(license_rule)
+
+        llm_result = await _llm_ollama(prompt, system_prompt)
+        parsed = _parse_json(llm_result) if llm_result else {}
+
+        obl_text = obligation.obligation_text or ""
+
+        control = ComposedControl(
+            title=parsed.get("title", obl_text[:100] if obl_text else "Untitled Control")[:255],
+            objective=parsed.get("objective", obl_text[:500]),
+            rationale=parsed.get("rationale", "Aus gesetzlicher Pflicht abgeleitet."),
+            requirements=_ensure_list(parsed.get("requirements", [])),
+            test_procedure=_ensure_list(parsed.get("test_procedure", [])),
+            evidence=_ensure_list(parsed.get("evidence", [])),
+            severity=parsed.get("severity", "medium"),
+            implementation_effort=parsed.get("implementation_effort", "m"),
+            category=parsed.get("category"),
+            tags=_ensure_list(parsed.get("tags", [])),
+            target_audience=_ensure_list(parsed.get("target_audience", [])),
+            verification_method=parsed.get("verification_method"),
+            composition_method="fallback",
+            release_state="needs_review",
+        )
+
+        return control
+
+
+# ---------------------------------------------------------------------------
+# Prompt builders
+# ---------------------------------------------------------------------------
+
+
+def _compose_system_prompt(license_rule: int) -> str:
+    """Build the system prompt based on license rule."""
+    if license_rule == 3:
+        return (
+            "Du bist ein Security-Compliance-Experte. Deine Aufgabe ist es, "
+            "eigenstaendige Security Controls zu formulieren. "
+            "Du formulierst IMMER in eigenen Worten. "
+            "KOPIERE KEINE Saetze aus dem Quelltext. "
+            "Verwende eigene Begriffe und Struktur. "
+            "NENNE NICHT die Quelle. Keine proprietaeren Bezeichner. "
+            "Antworte NUR mit validem JSON."
+        )
+    return (
+        "Du bist ein Security-Compliance-Experte. "
+        "Erstelle ein praxisorientiertes, umsetzbares Security Control. "
+        "Antworte NUR mit validem JSON."
+    )
+
+
+def _build_compose_prompt(
+    obligation: ObligationMatch,
+    pattern: ControlPattern,
+    chunk_text: Optional[str],
+    license_rule: int,
+) -> str:
+    """Build the LLM prompt for pattern-guided composition."""
+    obl_section = _obligation_section(obligation)
+    pattern_section = _pattern_section(pattern)
+
+    if license_rule == 3:
+        context_section = "KONTEXT: Intern analysiert (keine Quellenangabe)."
+    elif chunk_text:
+        context_section = f"KONTEXT (Originaltext):\n{chunk_text[:2000]}"
+    else:
+        context_section = "KONTEXT: Kein Originaltext verfuegbar."
+
+    return f"""Erstelle ein PRAXISORIENTIERTES Security Control.
+
+{obl_section}
+
+{pattern_section}
+
+{context_section}
+
+AUFGABE:
+Fuelle das Muster mit pflicht-spezifischen Details.
+Das Ergebnis muss UMSETZBAR sein — keine Gesetzesparaphrase.
+Formuliere konkret und handlungsorientiert.
+
+Antworte als JSON:
+{{
+  "title": "Kurzer praegnanter Titel (max 100 Zeichen, deutsch)",
+  "objective": "Was soll erreicht werden? (1-3 Saetze)",
+  "rationale": "Warum ist das wichtig? (1-2 Saetze)",
+  "requirements": ["Konkrete Anforderung 1", "Anforderung 2", ...],
+  "test_procedure": ["Pruefschritt 1", "Pruefschritt 2", ...],
+  "evidence": ["Nachweis 1", "Nachweis 2", ...],
+  "severity": "low|medium|high|critical",
+  "implementation_effort": "s|m|l|xl",
+  "category": "{pattern.category}",
+  "tags": ["tag1", "tag2"],
+  "target_audience": ["unternehmen", "behoerden", "entwickler"],
+  "verification_method": "code_review|document|tool|hybrid"
+}}"""
+
+
+def _build_fallback_prompt(
+    obligation: ObligationMatch,
+    chunk_text: Optional[str],
+    license_rule: int,
+) -> str:
+    """Build the LLM prompt for fallback composition (no pattern)."""
+    obl_section = _obligation_section(obligation)
+
+    if license_rule == 3:
+        context_section = "KONTEXT: Intern analysiert (keine Quellenangabe)."
+    elif chunk_text:
+        context_section = f"KONTEXT (Originaltext):\n{chunk_text[:2000]}"
+    else:
+        context_section = "KONTEXT: Kein Originaltext verfuegbar."
+
+    return f"""Erstelle ein Security Control aus der folgenden Pflicht.
+
+{obl_section}
+
+{context_section}
+
+AUFGABE:
+Formuliere ein umsetzbares Security Control.
+Keine Gesetzesparaphrase — konkrete Massnahmen beschreiben.
+
+Antworte als JSON:
+{{
+  "title": "Kurzer praegnanter Titel (max 100 Zeichen, deutsch)",
+  "objective": "Was soll erreicht werden? (1-3 Saetze)",
+  "rationale": "Warum ist das wichtig? (1-2 Saetze)",
+  "requirements": ["Konkrete Anforderung 1", "Anforderung 2", ...],
+  "test_procedure": ["Pruefschritt 1", "Pruefschritt 2", ...],
+  "evidence": ["Nachweis 1", "Nachweis 2", ...],
+  "severity": "low|medium|high|critical",
+  "implementation_effort": "s|m|l|xl",
+  "category": "one of: authentication, encryption, data_protection, etc.",
+  "tags": ["tag1", "tag2"],
+  "target_audience": ["unternehmen"],
+  "verification_method": "code_review|document|tool|hybrid"
+}}"""
+
+
+def _obligation_section(obligation: ObligationMatch) -> str:
+    """Format the obligation for the prompt."""
+    parts = ["PFLICHT (was das Gesetz verlangt):"]
+    if obligation.obligation_title:
+        parts.append(f"  Titel: {obligation.obligation_title}")
+    if obligation.obligation_text:
+        parts.append(f"  Beschreibung: {obligation.obligation_text[:500]}")
+    if obligation.obligation_id:
+        parts.append(f"  ID: {obligation.obligation_id}")
+    if obligation.regulation_id:
+        parts.append(f"  Rechtsgrundlage: {obligation.regulation_id}")
+    if not obligation.obligation_text and not obligation.obligation_title:
+        parts.append("  (Keine spezifische Pflicht extrahiert)")
+    return "\n".join(parts)
+
+
+def _pattern_section(pattern: ControlPattern) -> str:
+    """Format the pattern for the prompt."""
+    reqs = "\n    ".join(f"- {r}" for r in pattern.requirements_template[:5])
+    tests = "\n    ".join(f"- {t}" for t in pattern.test_procedure_template[:3])
+    return f"""MUSTER (wie man es typischerweise umsetzt):
+  Pattern: {pattern.name_de} ({pattern.id})
+  Domain: {pattern.domain}
+  Ziel-Template: {pattern.objective_template}
+  Anforderungs-Template:
+    {reqs}
+  Pruefverfahren-Template:
+    {tests}"""
+
+
+# ---------------------------------------------------------------------------
+# Helpers
+# ---------------------------------------------------------------------------
+
+
+def _ensure_list(value) -> list:
+    """Ensure a value is a list of strings."""
+    if isinstance(value, list):
+        return [str(v) for v in value if v]
+    if isinstance(value, str):
+        return [value]
+    return []
+
+
+def _anchors_from_pattern(pattern: ControlPattern) -> list:
+    """Convert pattern's open_anchor_refs to control anchor format."""
+    anchors = []
+    for ref in pattern.open_anchor_refs:
+        anchors.append({
+            "framework": ref.get("framework", ""),
+            "control_id": ref.get("ref", ""),
+            "title": "",
+            "alignment_score": 0.8,
+        })
+    return anchors
+
+
+def _validate_control(control: ComposedControl) -> None:
+    """Validate and fix control field values."""
+    # Severity
+    if control.severity not in VALID_SEVERITIES:
+        control.severity = "medium"
+
+    # Implementation effort
+    if control.implementation_effort not in VALID_EFFORTS:
+        control.implementation_effort = "m"
+
+    # Verification method
+    if control.verification_method and control.verification_method not in VALID_VERIFICATION:
+        control.verification_method = None
+
+    # Risk score
+    if not (0 <= control.risk_score <= 10):
+        control.risk_score = _severity_to_risk(control.severity)
+
+    # Title length
+    if len(control.title) > 255:
+        control.title = control.title[:252] + "..."
+
+    # Ensure minimum content
+    if not control.objective:
+        control.objective = control.title
+    if not control.rationale:
+        control.rationale = "Aus regulatorischer Anforderung abgeleitet."
+    if not control.requirements:
+        control.requirements = ["Anforderung gemaess Pflichtbeschreibung umsetzen"]
+    if not control.test_procedure:
+        control.test_procedure = ["Umsetzung der Anforderungen pruefen"]
+    if not control.evidence:
+        control.evidence = ["Dokumentation der Umsetzung"]
+
+
+def _severity_to_risk(severity: str) -> float:
+    """Map severity to a default risk score."""
+    return {
+        "critical": 9.0,
+        "high": 7.0,
+        "medium": 5.0,
+        "low": 3.0,
+    }.get(severity, 5.0)
--- a/backend-compliance/compliance/services/control_dedup.py
+++ b/backend-compliance/compliance/services/control_dedup.py
@@ -0,0 +1,745 @@
+"""Control Deduplication Engine — 4-Stage Matching Pipeline.
+
+Prevents duplicate atomic controls during Pass 0b by checking candidates
+against existing controls before insertion.
+
+Stages:
+    1. Pattern-Gate:  pattern_id must match (hard gate)
+    2. Action-Check:  normalized action verb must match (hard gate)
+    3. Object-Norm:   normalized object must match (soft gate with high threshold)
+    4. Embedding:     cosine similarity with tiered thresholds (Qdrant)
+
+Verdicts:
+    - NEW:    create a new atomic control
+    - LINK:   add parent link to existing control (similarity > LINK_THRESHOLD)
+    - REVIEW: queue for human review (REVIEW_THRESHOLD < sim < LINK_THRESHOLD)
+"""
+
+import logging
+import os
+import re
+from dataclasses import dataclass, field
+from typing import Optional, Callable, Awaitable
+
+import httpx
+
+logger = logging.getLogger(__name__)
+
+# ── Configuration ────────────────────────────────────────────────────
+
+DEDUP_ENABLED = os.getenv("DEDUP_ENABLED", "true").lower() == "true"
+LINK_THRESHOLD = float(os.getenv("DEDUP_LINK_THRESHOLD", "0.92"))
+REVIEW_THRESHOLD = float(os.getenv("DEDUP_REVIEW_THRESHOLD", "0.85"))
+LINK_THRESHOLD_DIFF_OBJECT = float(os.getenv("DEDUP_LINK_THRESHOLD_DIFF_OBJ", "0.95"))
+CROSS_REG_LINK_THRESHOLD = float(os.getenv("DEDUP_CROSS_REG_THRESHOLD", "0.95"))
+QDRANT_COLLECTION = os.getenv("DEDUP_QDRANT_COLLECTION", "atomic_controls")
+QDRANT_URL = os.getenv("QDRANT_URL", "http://host.docker.internal:6333")
+EMBEDDING_URL = os.getenv("EMBEDDING_URL", "http://embedding-service:8087")
+
+
+# ── Result Dataclass ─────────────────────────────────────────────────
+
+@dataclass
+class DedupResult:
+    """Outcome of the dedup check."""
+    verdict: str  # "new" | "link" | "review"
+    matched_control_uuid: Optional[str] = None
+    matched_control_id: Optional[str] = None
+    matched_title: Optional[str] = None
+    stage: str = ""  # which stage decided
+    similarity_score: float = 0.0
+    link_type: str = "dedup_merge"  # "dedup_merge" | "cross_regulation"
+    details: dict = field(default_factory=dict)
+
+
+# ── Action Normalization ─────────────────────────────────────────────
+
+_ACTION_SYNONYMS: dict[str, str] = {
+    # German → canonical English
+    "implementieren": "implement",
+    "umsetzen": "implement",
+    "einrichten": "implement",
+    "einführen": "implement",
+    "aufbauen": "implement",
+    "bereitstellen": "implement",
+    "aktivieren": "implement",
+    "konfigurieren": "configure",
+    "einstellen": "configure",
+    "parametrieren": "configure",
+    "testen": "test",
+    "prüfen": "test",
+    "überprüfen": "test",
+    "verifizieren": "test",
+    "validieren": "test",
+    "kontrollieren": "test",
+    "auditieren": "audit",
+    "dokumentieren": "document",
+    "protokollieren": "log",
+    "aufzeichnen": "log",
+    "loggen": "log",
+    "überwachen": "monitor",
+    "monitoring": "monitor",
+    "beobachten": "monitor",
+    "schulen": "train",
+    "trainieren": "train",
+    "sensibilisieren": "train",
+    "löschen": "delete",
+    "entfernen": "delete",
+    "verschlüsseln": "encrypt",
+    "sperren": "block",
+    "beschränken": "restrict",
+    "einschränken": "restrict",
+    "begrenzen": "restrict",
+    "autorisieren": "authorize",
+    "genehmigen": "authorize",
+    "freigeben": "authorize",
+    "authentifizieren": "authenticate",
+    "identifizieren": "identify",
+    "melden": "report",
+    "benachrichtigen": "notify",
+    "informieren": "notify",
+    "aktualisieren": "update",
+    "erneuern": "update",
+    "sichern": "backup",
+    "wiederherstellen": "restore",
+    # English passthrough
+    "implement": "implement",
+    "configure": "configure",
+    "test": "test",
+    "verify": "test",
+    "validate": "test",
+    "audit": "audit",
+    "document": "document",
+    "log": "log",
+    "monitor": "monitor",
+    "train": "train",
+    "delete": "delete",
+    "encrypt": "encrypt",
+    "restrict": "restrict",
+    "authorize": "authorize",
+    "authenticate": "authenticate",
+    "report": "report",
+    "update": "update",
+    "backup": "backup",
+    "restore": "restore",
+}
+
+
+def normalize_action(action: str) -> str:
+    """Normalize an action verb to a canonical English form."""
+    if not action:
+        return ""
+    action = action.strip().lower()
+    # Strip German infinitive/conjugation suffixes for lookup
+    action_base = re.sub(r"(en|t|st|e|te|tet|end)$", "", action)
+    # Try exact match first, then base form
+    if action in _ACTION_SYNONYMS:
+        return _ACTION_SYNONYMS[action]
+    if action_base in _ACTION_SYNONYMS:
+        return _ACTION_SYNONYMS[action_base]
+    # Fuzzy: check if action starts with any known verb
+    for verb, canonical in _ACTION_SYNONYMS.items():
+        if action.startswith(verb) or verb.startswith(action):
+            return canonical
+    return action  # fallback: return as-is
+
+
+# ── Object Normalization ─────────────────────────────────────────────
+
+_OBJECT_SYNONYMS: dict[str, str] = {
+    # Authentication / Access
+    "mfa": "multi_factor_auth",
+    "multi-faktor-authentifizierung": "multi_factor_auth",
+    "mehrfaktorauthentifizierung": "multi_factor_auth",
+    "multi-factor authentication": "multi_factor_auth",
+    "two-factor": "multi_factor_auth",
+    "2fa": "multi_factor_auth",
+    "passwort": "password_policy",
+    "kennwort": "password_policy",
+    "password": "password_policy",
+    "zugangsdaten": "credentials",
+    "credentials": "credentials",
+    "admin-konten": "privileged_access",
+    "admin accounts": "privileged_access",
+    "administratorkonten": "privileged_access",
+    "privilegierte zugriffe": "privileged_access",
+    "privileged accounts": "privileged_access",
+    "remote-zugriff": "remote_access",
+    "fernzugriff": "remote_access",
+    "remote access": "remote_access",
+    "session": "session_management",
+    "sitzung": "session_management",
+    "sitzungsverwaltung": "session_management",
+    # Encryption
+    "verschlüsselung": "encryption",
+    "encryption": "encryption",
+    "kryptografie": "encryption",
+    "kryptografische verfahren": "encryption",
+    "schlüssel": "key_management",
+    "key management": "key_management",
+    "schlüsselverwaltung": "key_management",
+    "zertifikat": "certificate_management",
+    "certificate": "certificate_management",
+    "tls": "transport_encryption",
+    "ssl": "transport_encryption",
+    "https": "transport_encryption",
+    # Network
+    "firewall": "firewall",
+    "netzwerk": "network_security",
+    "network": "network_security",
+    "vpn": "vpn",
+    "segmentierung": "network_segmentation",
+    "segmentation": "network_segmentation",
+    # Logging / Monitoring
+    "audit-log": "audit_logging",
+    "audit log": "audit_logging",
+    "protokoll": "audit_logging",
+    "logging": "audit_logging",
+    "monitoring": "monitoring",
+    "überwachung": "monitoring",
+    "alerting": "alerting",
+    "alarmierung": "alerting",
+    "siem": "siem",
+    # Data
+    "personenbezogene daten": "personal_data",
+    "personal data": "personal_data",
+    "sensible daten": "sensitive_data",
+    "sensitive data": "sensitive_data",
+    "datensicherung": "backup",
+    "backup": "backup",
+    "wiederherstellung": "disaster_recovery",
+    "disaster recovery": "disaster_recovery",
+    # Policy / Process
+    "richtlinie": "policy",
+    "policy": "policy",
+    "verfahrensanweisung": "procedure",
+    "procedure": "procedure",
+    "prozess": "process",
+    "schulung": "training",
+    "training": "training",
+    "awareness": "awareness",
+    "sensibilisierung": "awareness",
+    # Incident
+    "vorfall": "incident",
+    "incident": "incident",
+    "sicherheitsvorfall": "security_incident",
+    "security incident": "security_incident",
+    # Vulnerability
+    "schwachstelle": "vulnerability",
+    "vulnerability": "vulnerability",
+    "patch": "patch_management",
+    "update": "patch_management",
+    "patching": "patch_management",
+}
+
+# Precompile for substring matching (longest first)
+_OBJECT_KEYS_SORTED = sorted(_OBJECT_SYNONYMS.keys(), key=len, reverse=True)
+
+
+def normalize_object(obj: str) -> str:
+    """Normalize a compliance object to a canonical token."""
+    if not obj:
+        return ""
+    obj_lower = obj.strip().lower()
+    # Exact match
+    if obj_lower in _OBJECT_SYNONYMS:
+        return _OBJECT_SYNONYMS[obj_lower]
+    # Substring match (longest first)
+    for phrase in _OBJECT_KEYS_SORTED:
+        if phrase in obj_lower:
+            return _OBJECT_SYNONYMS[phrase]
+    # Fallback: strip articles/prepositions, join with underscore
+    cleaned = re.sub(r"\b(der|die|das|den|dem|des|ein|eine|eines|einem|einen"
+                     r"|für|von|zu|auf|in|an|bei|mit|nach|über|unter|the|a|an"
+                     r"|for|of|to|on|in|at|by|with)\b", "", obj_lower)
+    tokens = [t for t in cleaned.split() if len(t) > 2]
+    return "_".join(tokens[:4]) if tokens else obj_lower.replace(" ", "_")
+
+
+# ── Canonicalization ─────────────────────────────────────────────────
+
+def canonicalize_text(action: str, obj: str, title: str = "") -> str:
+    """Build a canonical English text for embedding.
+
+    Transforms German compliance text into normalized English tokens
+    for more stable embedding comparisons.
+    """
+    norm_action = normalize_action(action)
+    norm_object = normalize_object(obj)
+    # Build canonical sentence
+    parts = [norm_action, norm_object]
+    if title:
+        # Add title keywords (stripped of common filler)
+        title_clean = re.sub(
+            r"\b(und|oder|für|von|zu|der|die|das|den|dem|des|ein|eine"
+            r"|bei|mit|nach|gemäß|gem\.|laut|entsprechend)\b",
+            "", title.lower()
+        )
+        title_tokens = [t for t in title_clean.split() if len(t) > 3][:5]
+        if title_tokens:
+            parts.append("for")
+            parts.extend(title_tokens)
+    return " ".join(parts)
+
+
+# ── Embedding Helper ─────────────────────────────────────────────────
+
+async def get_embedding(text: str) -> list[float]:
+    """Get embedding vector for a single text via embedding service."""
+    try:
+        async with httpx.AsyncClient(timeout=10.0) as client:
+            resp = await client.post(
+                f"{EMBEDDING_URL}/embed",
+                json={"texts": [text]},
+            )
+            embeddings = resp.json().get("embeddings", [])
+            return embeddings[0] if embeddings else []
+    except Exception as e:
+        logger.warning("Embedding failed: %s", e)
+        return []
+
+
+def cosine_similarity(a: list[float], b: list[float]) -> float:
+    """Compute cosine similarity between two vectors."""
+    if not a or not b or len(a) != len(b):
+        return 0.0
+    dot = sum(x * y for x, y in zip(a, b))
+    norm_a = sum(x * x for x in a) ** 0.5
+    norm_b = sum(x * x for x in b) ** 0.5
+    if norm_a == 0 or norm_b == 0:
+        return 0.0
+    return dot / (norm_a * norm_b)
+
+
+# ── Qdrant Helpers ───────────────────────────────────────────────────
+
+async def qdrant_search(
+    embedding: list[float],
+    pattern_id: str,
+    top_k: int = 10,
+    collection: Optional[str] = None,
+) -> list[dict]:
+    """Search Qdrant for similar atomic controls, filtered by pattern_id."""
+    if not embedding:
+        return []
+    coll = collection or QDRANT_COLLECTION
+    body: dict = {
+        "vector": embedding,
+        "limit": top_k,
+        "with_payload": True,
+        "filter": {
+            "must": [
+                {"key": "pattern_id", "match": {"value": pattern_id}}
+            ]
+        },
+    }
+    try:
+        async with httpx.AsyncClient(timeout=10.0) as client:
+            resp = await client.post(
+                f"{QDRANT_URL}/collections/{coll}/points/search",
+                json=body,
+            )
+            if resp.status_code != 200:
+                logger.warning("Qdrant search failed: %d", resp.status_code)
+                return []
+            return resp.json().get("result", [])
+    except Exception as e:
+        logger.warning("Qdrant search error: %s", e)
+        return []
+
+
+async def qdrant_search_cross_regulation(
+    embedding: list[float],
+    top_k: int = 5,
+    collection: Optional[str] = None,
+) -> list[dict]:
+    """Search Qdrant for similar controls across ALL regulations (no pattern_id filter).
+
+    Used for cross-regulation linking (e.g. DSGVO Art. 25 ↔ NIS2 Art. 21).
+    """
+    if not embedding:
+        return []
+    coll = collection or QDRANT_COLLECTION
+    body: dict = {
+        "vector": embedding,
+        "limit": top_k,
+        "with_payload": True,
+    }
+    try:
+        async with httpx.AsyncClient(timeout=10.0) as client:
+            resp = await client.post(
+                f"{QDRANT_URL}/collections/{coll}/points/search",
+                json=body,
+            )
+            if resp.status_code != 200:
+                logger.warning("Qdrant cross-reg search failed: %d", resp.status_code)
+                return []
+            return resp.json().get("result", [])
+    except Exception as e:
+        logger.warning("Qdrant cross-reg search error: %s", e)
+        return []
+
+
+async def qdrant_upsert(
+    point_id: str,
+    embedding: list[float],
+    payload: dict,
+    collection: Optional[str] = None,
+) -> bool:
+    """Upsert a single point into a Qdrant collection."""
+    if not embedding:
+        return False
+    coll = collection or QDRANT_COLLECTION
+    body = {
+        "points": [{
+            "id": point_id,
+            "vector": embedding,
+            "payload": payload,
+        }]
+    }
+    try:
+        async with httpx.AsyncClient(timeout=10.0) as client:
+            resp = await client.put(
+                f"{QDRANT_URL}/collections/{coll}/points",
+                json=body,
+            )
+            return resp.status_code == 200
+    except Exception as e:
+        logger.warning("Qdrant upsert error: %s", e)
+        return False
+
+
+async def ensure_qdrant_collection(
+    vector_size: int = 1024,
+    collection: Optional[str] = None,
+) -> bool:
+    """Create a Qdrant collection if it doesn't exist (idempotent)."""
+    coll = collection or QDRANT_COLLECTION
+    try:
+        async with httpx.AsyncClient(timeout=10.0) as client:
+            # Check if exists
+            resp = await client.get(f"{QDRANT_URL}/collections/{coll}")
+            if resp.status_code == 200:
+                return True
+            # Create
+            resp = await client.put(
+                f"{QDRANT_URL}/collections/{coll}",
+                json={
+                    "vectors": {"size": vector_size, "distance": "Cosine"},
+                },
+            )
+            if resp.status_code == 200:
+                logger.info("Created Qdrant collection: %s", coll)
+                # Create payload indexes
+                for field_name in ["pattern_id", "action_normalized", "object_normalized", "control_id"]:
+                    await client.put(
+                        f"{QDRANT_URL}/collections/{coll}/index",
+                        json={"field_name": field_name, "field_schema": "keyword"},
+                    )
+                return True
+            logger.error("Failed to create Qdrant collection: %d", resp.status_code)
+            return False
+    except Exception as e:
+        logger.warning("Qdrant collection check error: %s", e)
+        return False
+
+
+# ── Main Dedup Checker ───────────────────────────────────────────────
+
+class ControlDedupChecker:
+    """4-stage dedup checker for atomic controls.
+
+    Usage:
+        checker = ControlDedupChecker(db_session)
+        result = await checker.check_duplicate(candidate_action, candidate_object, candidate_title, pattern_id)
+        if result.verdict == "link":
+            checker.add_parent_link(result.matched_control_uuid, parent_uuid)
+        elif result.verdict == "review":
+            checker.write_review(candidate, result)
+        else:
+            # Insert new control
+    """
+
+    def __init__(
+        self,
+        db,
+        embed_fn: Optional[Callable[[str], Awaitable[list[float]]]] = None,
+        search_fn: Optional[Callable] = None,
+    ):
+        self.db = db
+        self._embed = embed_fn or get_embedding
+        self._search = search_fn or qdrant_search
+        self._cache: dict[str, list[dict]] = {}  # pattern_id → existing controls
+
+    def _load_existing(self, pattern_id: str) -> list[dict]:
+        """Load existing atomic controls with same pattern_id from DB."""
+        if pattern_id in self._cache:
+            return self._cache[pattern_id]
+        from sqlalchemy import text
+        rows = self.db.execute(text("""
+            SELECT id::text, control_id, title, objective,
+                   pattern_id,
+                   generation_metadata->>'obligation_type' as obligation_type
+            FROM canonical_controls
+            WHERE parent_control_uuid IS NOT NULL
+              AND release_state != 'deprecated'
+              AND pattern_id = :pid
+        """), {"pid": pattern_id}).fetchall()
+        result = [
+            {
+                "uuid": r[0], "control_id": r[1], "title": r[2],
+                "objective": r[3], "pattern_id": r[4],
+                "obligation_type": r[5],
+            }
+            for r in rows
+        ]
+        self._cache[pattern_id] = result
+        return result
+
+    async def check_duplicate(
+        self,
+        action: str,
+        obj: str,
+        title: str,
+        pattern_id: Optional[str],
+    ) -> DedupResult:
+        """Run the 4-stage dedup pipeline + cross-regulation linking.
+
+        Returns DedupResult with verdict: new/link/review.
+        """
+        # No pattern_id → can't dedup meaningfully
+        if not pattern_id:
+            return DedupResult(verdict="new", stage="no_pattern")
+
+        # Stage 1: Pattern-Gate
+        existing = self._load_existing(pattern_id)
+        if not existing:
+            return DedupResult(
+                verdict="new", stage="pattern_gate",
+                details={"reason": "no existing controls with this pattern_id"},
+            )
+
+        # Stage 2: Action-Check
+        norm_action = normalize_action(action)
+        # We don't have action stored on existing controls from DB directly,
+        # so we use embedding for controls that passed pattern gate.
+        # But we CAN check via generation_metadata if available.
+
+        # Stage 3: Object-Normalization
+        norm_object = normalize_object(obj)
+
+        # Stage 4: Embedding Similarity
+        canonical = canonicalize_text(action, obj, title)
+        embedding = await self._embed(canonical)
+        if not embedding:
+            # Can't compute embedding → default to new
+            return DedupResult(
+                verdict="new", stage="embedding_unavailable",
+                details={"canonical_text": canonical},
+            )
+
+        # Search Qdrant
+        results = await self._search(embedding, pattern_id, top_k=5)
+
+        if not results:
+            # No intra-pattern matches → try cross-regulation
+            return await self._check_cross_regulation(embedding, DedupResult(
+                verdict="new", stage="no_qdrant_matches",
+                details={"canonical_text": canonical, "action": norm_action, "object": norm_object},
+            ))
+
+        # Evaluate best match
+        best = results[0]
+        best_score = best.get("score", 0.0)
+        best_payload = best.get("payload", {})
+        best_action = best_payload.get("action_normalized", "")
+        best_object = best_payload.get("object_normalized", "")
+
+        # Action differs → NEW (even if embedding is high)
+        if best_action and norm_action and best_action != norm_action:
+            return await self._check_cross_regulation(embedding, DedupResult(
+                verdict="new", stage="action_mismatch",
+                similarity_score=best_score,
+                matched_control_id=best_payload.get("control_id"),
+                details={
+                    "candidate_action": norm_action,
+                    "existing_action": best_action,
+                    "similarity": best_score,
+                },
+            ))
+
+        # Object differs → use higher threshold
+        if best_object and norm_object and best_object != norm_object:
+            if best_score > LINK_THRESHOLD_DIFF_OBJECT:
+                return DedupResult(
+                    verdict="link", stage="embedding_diff_object",
+                    matched_control_uuid=best_payload.get("control_uuid"),
+                    matched_control_id=best_payload.get("control_id"),
+                    matched_title=best_payload.get("title"),
+                    similarity_score=best_score,
+                    details={"candidate_object": norm_object, "existing_object": best_object},
+                )
+            return await self._check_cross_regulation(embedding, DedupResult(
+                verdict="new", stage="object_mismatch_below_threshold",
+                similarity_score=best_score,
+                matched_control_id=best_payload.get("control_id"),
+                details={
+                    "candidate_object": norm_object,
+                    "existing_object": best_object,
+                    "threshold": LINK_THRESHOLD_DIFF_OBJECT,
+                },
+            ))
+
+        # Same action + same object → tiered thresholds
+        if best_score > LINK_THRESHOLD:
+            return DedupResult(
+                verdict="link", stage="embedding_match",
+                matched_control_uuid=best_payload.get("control_uuid"),
+                matched_control_id=best_payload.get("control_id"),
+                matched_title=best_payload.get("title"),
+                similarity_score=best_score,
+            )
+        if best_score > REVIEW_THRESHOLD:
+            return DedupResult(
+                verdict="review", stage="embedding_review",
+                matched_control_uuid=best_payload.get("control_uuid"),
+                matched_control_id=best_payload.get("control_id"),
+                matched_title=best_payload.get("title"),
+                similarity_score=best_score,
+            )
+        return await self._check_cross_regulation(embedding, DedupResult(
+            verdict="new", stage="embedding_below_threshold",
+            similarity_score=best_score,
+            details={"threshold": REVIEW_THRESHOLD},
+        ))
+
+    async def _check_cross_regulation(
+        self,
+        embedding: list[float],
+        intra_result: DedupResult,
+    ) -> DedupResult:
+        """Second pass: cross-regulation linking for controls deemed 'new'.
+
+        Searches Qdrant WITHOUT pattern_id filter. Uses a higher threshold
+        (0.95) to avoid false positives across regulation boundaries.
+        """
+        if intra_result.verdict != "new" or not embedding:
+            return intra_result
+
+        cross_results = await qdrant_search_cross_regulation(embedding, top_k=5)
+        if not cross_results:
+            return intra_result
+
+        best = cross_results[0]
+        best_score = best.get("score", 0.0)
+        if best_score > CROSS_REG_LINK_THRESHOLD:
+            best_payload = best.get("payload", {})
+            return DedupResult(
+                verdict="link",
+                stage="cross_regulation",
+                matched_control_uuid=best_payload.get("control_uuid"),
+                matched_control_id=best_payload.get("control_id"),
+                matched_title=best_payload.get("title"),
+                similarity_score=best_score,
+                link_type="cross_regulation",
+                details={
+                    "cross_reg_score": best_score,
+                    "cross_reg_threshold": CROSS_REG_LINK_THRESHOLD,
+                },
+            )
+
+        return intra_result
+
+    def add_parent_link(
+        self,
+        control_uuid: str,
+        parent_control_uuid: str,
+        link_type: str = "dedup_merge",
+        confidence: float = 0.0,
+        source_regulation: Optional[str] = None,
+        source_article: Optional[str] = None,
+        obligation_candidate_id: Optional[str] = None,
+    ) -> None:
+        """Add a parent link to an existing atomic control."""
+        from sqlalchemy import text
+        self.db.execute(text("""
+            INSERT INTO control_parent_links
+                (control_uuid, parent_control_uuid, link_type, confidence,
+                 source_regulation, source_article, obligation_candidate_id)
+            VALUES (:cu, :pu, :lt, :conf, :sr, :sa, :oci::uuid)
+            ON CONFLICT (control_uuid, parent_control_uuid) DO NOTHING
+        """), {
+            "cu": control_uuid,
+            "pu": parent_control_uuid,
+            "lt": link_type,
+            "conf": confidence,
+            "sr": source_regulation,
+            "sa": source_article,
+            "oci": obligation_candidate_id,
+        })
+        self.db.commit()
+
+    def write_review(
+        self,
+        candidate_control_id: str,
+        candidate_title: str,
+        candidate_objective: str,
+        result: DedupResult,
+        parent_control_uuid: Optional[str] = None,
+        obligation_candidate_id: Optional[str] = None,
+    ) -> None:
+        """Write a dedup review queue entry."""
+        from sqlalchemy import text
+        self.db.execute(text("""
+            INSERT INTO control_dedup_reviews
+                (candidate_control_id, candidate_title, candidate_objective,
+                 matched_control_uuid, matched_control_id,
+                 similarity_score, dedup_stage, dedup_details,
+                 parent_control_uuid, obligation_candidate_id)
+            VALUES (:ccid, :ct, :co, :mcu::uuid, :mci, :ss, :ds,
+                    :dd::jsonb, :pcu::uuid, :oci)
+        """), {
+            "ccid": candidate_control_id,
+            "ct": candidate_title,
+            "co": candidate_objective,
+            "mcu": result.matched_control_uuid,
+            "mci": result.matched_control_id,
+            "ss": result.similarity_score,
+            "ds": result.stage,
+            "dd": __import__("json").dumps(result.details),
+            "pcu": parent_control_uuid,
+            "oci": obligation_candidate_id,
+        })
+        self.db.commit()
+
+    async def index_control(
+        self,
+        control_uuid: str,
+        control_id: str,
+        title: str,
+        action: str,
+        obj: str,
+        pattern_id: str,
+        collection: Optional[str] = None,
+    ) -> bool:
+        """Index a new atomic control in Qdrant for future dedup checks."""
+        norm_action = normalize_action(action)
+        norm_object = normalize_object(obj)
+        canonical = canonicalize_text(action, obj, title)
+        embedding = await self._embed(canonical)
+        if not embedding:
+            return False
+        return await qdrant_upsert(
+            point_id=control_uuid,
+            embedding=embedding,
+            payload={
+                "control_uuid": control_uuid,
+                "control_id": control_id,
+                "title": title,
+                "pattern_id": pattern_id,
+                "action_normalized": norm_action,
+                "object_normalized": norm_object,
+                "canonical_text": canonical,
+            },
+            collection=collection,
+        )
--- a/backend-compliance/compliance/services/control_generator.py
+++ b/backend-compliance/compliance/services/control_generator.py
--- a/backend-compliance/compliance/services/control_status_machine.py
+++ b/backend-compliance/compliance/services/control_status_machine.py
@@ -0,0 +1,152 @@
+"""
+Control Status Transition State Machine.
+
+Enforces that controls cannot be set to "pass" without sufficient evidence.
+Prevents Compliance-Theater where controls claim compliance without real proof.
+
+Transition rules:
+  planned     → in_progress : always allowed
+  in_progress → pass        : requires ≥1 evidence with confidence ≥ E2 and
+                              truth_status in (uploaded, observed, validated_internal)
+  in_progress → partial     : requires ≥1 evidence (any level)
+  pass        → fail        : always allowed (degradation)
+  any         → n/a         : requires status_justification
+  any         → planned     : always allowed (reset)
+"""
+
+from typing import List, Optional, Tuple
+
+from ..db.models import EvidenceDB
+
+
+# Confidence level ordering for comparisons
+CONFIDENCE_ORDER = {"E0": 0, "E1": 1, "E2": 2, "E3": 3, "E4": 4}
+
+# Truth statuses that qualify as "real" evidence for pass transitions
+VALID_TRUTH_STATUSES = {"uploaded", "observed", "validated_internal", "accepted_by_auditor", "provided_to_auditor"}
+
+
+def validate_transition(
+    current_status: str,
+    new_status: str,
+    evidence_list: Optional[List[EvidenceDB]] = None,
+    status_justification: Optional[str] = None,
+    bypass_for_auto_updater: bool = False,
+) -> Tuple[bool, List[str]]:
+    """
+    Validate whether a control status transition is allowed.
+
+    Args:
+        current_status: Current control status value (e.g. "planned", "pass")
+        new_status: Requested new status
+        evidence_list: List of EvidenceDB objects linked to this control
+        status_justification: Text justification (required for n/a transitions)
+        bypass_for_auto_updater: If True, skip evidence checks (used by CI/CD auto-updater
+                                 which creates evidence atomically with status change)
+
+    Returns:
+        Tuple of (allowed: bool, violations: list[str])
+    """
+    violations: List[str] = []
+    evidence_list = evidence_list or []
+
+    # Same status → no-op, always allowed
+    if current_status == new_status:
+        return True, []
+
+    # Reset to planned is always allowed
+    if new_status == "planned":
+        return True, []
+
+    # n/a requires justification
+    if new_status == "n/a":
+        if not status_justification or not status_justification.strip():
+            violations.append("Transition to 'n/a' requires a status_justification explaining why this control is not applicable.")
+        return len(violations) == 0, violations
+
+    # Degradation: pass → fail is always allowed
+    if current_status == "pass" and new_status == "fail":
+        return True, []
+
+    # planned → in_progress: always allowed
+    if current_status == "planned" and new_status == "in_progress":
+        return True, []
+
+    # in_progress → partial: needs at least 1 evidence
+    if new_status == "partial":
+        if not bypass_for_auto_updater and len(evidence_list) == 0:
+            violations.append("Transition to 'partial' requires at least 1 evidence record.")
+        return len(violations) == 0, violations
+
+    # in_progress → pass: strict requirements
+    if new_status == "pass":
+        if bypass_for_auto_updater:
+            return True, []
+
+        if len(evidence_list) == 0:
+            violations.append("Transition to 'pass' requires at least 1 evidence record.")
+            return False, violations
+
+        # Check for at least one qualifying evidence
+        has_qualifying = False
+        for e in evidence_list:
+            conf = getattr(e, "confidence_level", None)
+            truth = getattr(e, "truth_status", None)
+
+            # Get string values from enum or string
+            conf_val = conf.value if hasattr(conf, "value") else str(conf) if conf else "E1"
+            truth_val = truth.value if hasattr(truth, "value") else str(truth) if truth else "uploaded"
+
+            if CONFIDENCE_ORDER.get(conf_val, 1) >= CONFIDENCE_ORDER["E2"] and truth_val in VALID_TRUTH_STATUSES:
+                has_qualifying = True
+                break
+
+        if not has_qualifying:
+            violations.append(
+                "Transition to 'pass' requires at least 1 evidence with confidence >= E2 "
+                "and truth_status in (uploaded, observed, validated_internal, accepted_by_auditor). "
+                "Current evidence does not meet this threshold."
+            )
+
+        return len(violations) == 0, violations
+
+    # in_progress → fail: always allowed
+    if new_status == "fail":
+        return True, []
+
+    # Any other transition from planned/fail to pass requires going through in_progress
+    if current_status in ("planned", "fail") and new_status == "pass":
+        if bypass_for_auto_updater:
+            return True, []
+        violations.append(
+            f"Direct transition from '{current_status}' to 'pass' is not allowed. "
+            f"Move to 'in_progress' first, then to 'pass' with qualifying evidence."
+        )
+        return False, violations
+
+    # Default: allow other transitions (e.g. fail → partial, partial → pass)
+    # For partial → pass, apply the same evidence checks
+    if current_status == "partial" and new_status == "pass":
+        if bypass_for_auto_updater:
+            return True, []
+
+        has_qualifying = False
+        for e in evidence_list:
+            conf = getattr(e, "confidence_level", None)
+            truth = getattr(e, "truth_status", None)
+            conf_val = conf.value if hasattr(conf, "value") else str(conf) if conf else "E1"
+            truth_val = truth.value if hasattr(truth, "value") else str(truth) if truth else "uploaded"
+
+            if CONFIDENCE_ORDER.get(conf_val, 1) >= CONFIDENCE_ORDER["E2"] and truth_val in VALID_TRUTH_STATUSES:
+                has_qualifying = True
+                break
+
+        if not has_qualifying:
+            violations.append(
+                "Transition from 'partial' to 'pass' requires at least 1 evidence with confidence >= E2 "
+                "and truth_status in (uploaded, observed, validated_internal, accepted_by_auditor)."
+            )
+        return len(violations) == 0, violations
+
+    # All other transitions allowed
+    return True, []
--- a/backend-compliance/compliance/services/decomposition_pass.py
+++ b/backend-compliance/compliance/services/decomposition_pass.py
--- a/backend-compliance/compliance/services/framework_decomposition.py
+++ b/backend-compliance/compliance/services/framework_decomposition.py
@@ -0,0 +1,714 @@
+"""Framework Decomposition Engine — decomposes framework-container obligations.
+
+Sits between Pass 0a (obligation extraction) and Pass 0b (atomic control
+composition).  Detects obligations that reference a framework domain (e.g.
+"CCM-Praktiken fuer AIS") and decomposes them into concrete sub-obligations
+using an internal framework registry.
+
+Three routing types:
+    atomic              → pass through to Pass 0b unchanged
+    compound            → split compound verbs, then Pass 0b
+    framework_container → decompose via registry, then Pass 0b
+
+The registry is a set of JSON files under compliance/data/frameworks/.
+"""
+
+import json
+import logging
+import os
+import re
+import uuid
+from dataclasses import dataclass, field
+from pathlib import Path
+from typing import Optional
+
+logger = logging.getLogger(__name__)
+
+# ---------------------------------------------------------------------------
+# Registry loading
+# ---------------------------------------------------------------------------
+
+_REGISTRY_DIR = Path(__file__).resolve().parent.parent / "data" / "frameworks"
+_REGISTRY: dict[str, dict] = {}  # framework_id → framework dict
+
+
+def _load_registry() -> dict[str, dict]:
+    """Load all framework JSON files from the registry directory."""
+    registry: dict[str, dict] = {}
+    if not _REGISTRY_DIR.is_dir():
+        logger.warning("Framework registry dir not found: %s", _REGISTRY_DIR)
+        return registry
+
+    for fpath in sorted(_REGISTRY_DIR.glob("*.json")):
+        try:
+            with open(fpath, encoding="utf-8") as f:
+                fw = json.load(f)
+            fw_id = fw.get("framework_id", fpath.stem)
+            registry[fw_id] = fw
+            logger.info(
+                "Loaded framework: %s (%d domains)",
+                fw_id,
+                len(fw.get("domains", [])),
+            )
+        except Exception:
+            logger.exception("Failed to load framework file: %s", fpath)
+    return registry
+
+
+def get_registry() -> dict[str, dict]:
+    """Return the global framework registry (lazy-loaded)."""
+    global _REGISTRY
+    if not _REGISTRY:
+        _REGISTRY = _load_registry()
+    return _REGISTRY
+
+
+def reload_registry() -> dict[str, dict]:
+    """Force-reload the framework registry from disk."""
+    global _REGISTRY
+    _REGISTRY = _load_registry()
+    return _REGISTRY
+
+
+# ---------------------------------------------------------------------------
+# Framework alias index (built from registry)
+# ---------------------------------------------------------------------------
+
+def _build_alias_index(registry: dict[str, dict]) -> dict[str, str]:
+    """Build a lowercase alias → framework_id lookup."""
+    idx: dict[str, str] = {}
+    for fw_id, fw in registry.items():
+        # Framework-level aliases
+        idx[fw_id.lower()] = fw_id
+        name = fw.get("display_name", "")
+        if name:
+            idx[name.lower()] = fw_id
+        # Common short forms
+        for part in fw_id.lower().replace("_", " ").split():
+            if len(part) >= 3:
+                idx[part] = fw_id
+    return idx
+
+
+# ---------------------------------------------------------------------------
+# Routing — classify obligation type
+# ---------------------------------------------------------------------------
+
+# Extended patterns for framework detection (beyond the simple _COMPOSITE_RE
+# in decomposition_pass.py — here we also capture the framework name)
+_FRAMEWORK_PATTERN = re.compile(
+    r"(?:praktiken|kontrollen|ma(?:ss|ß)nahmen|anforderungen|vorgaben|controls|practices|measures|requirements)"
+    r"\s+(?:f(?:ue|ü)r|aus|gem(?:ae|ä)(?:ss|ß)|nach|from|of|for|per)\s+"
+    r"(.+?)(?:\s+(?:m(?:ue|ü)ssen|sollen|sind|werden|implementieren|umsetzen|einf(?:ue|ü)hren)|\.|,|$)",
+    re.IGNORECASE,
+)
+
+# Direct framework name references
+_DIRECT_FRAMEWORK_RE = re.compile(
+    r"\b(?:CSA\s*CCM|NIST\s*(?:SP\s*)?800-53|OWASP\s*(?:ASVS|SAMM|Top\s*10)"
+    r"|CIS\s*Controls|BSI\s*(?:IT-)?Grundschutz|ENISA|ISO\s*2700[12]"
+    r"|COBIT|SOX|PCI\s*DSS|HITRUST|SOC\s*2|KRITIS)\b",
+    re.IGNORECASE,
+)
+
+# Compound verb patterns (multiple main verbs)
+_COMPOUND_VERB_RE = re.compile(
+    r"\b(?:und|sowie|als\s+auch|or|and)\b",
+    re.IGNORECASE,
+)
+
+# No-split phrases that look compound but aren't
+_NO_SPLIT_PHRASES = [
+    "pflegen und aufrechterhalten",
+    "dokumentieren und pflegen",
+    "definieren und dokumentieren",
+    "erstellen und freigeben",
+    "pruefen und genehmigen",
+    "identifizieren und bewerten",
+    "erkennen und melden",
+    "define and maintain",
+    "create and maintain",
+    "establish and maintain",
+    "monitor and review",
+    "detect and respond",
+]
+
+
+@dataclass
+class RoutingResult:
+    """Result of obligation routing classification."""
+    routing_type: str  # atomic | compound | framework_container | unknown_review
+    framework_ref: Optional[str] = None
+    framework_domain: Optional[str] = None
+    domain_title: Optional[str] = None
+    confidence: float = 0.0
+    reason: str = ""
+
+
+def classify_routing(
+    obligation_text: str,
+    action_raw: str,
+    object_raw: str,
+    condition_raw: Optional[str] = None,
+) -> RoutingResult:
+    """Classify an obligation into atomic / compound / framework_container."""
+    combined = f"{obligation_text} {object_raw}".lower()
+
+    # --- Step 1: Framework container detection ---
+    fw_result = _detect_framework(obligation_text, object_raw)
+    if fw_result.routing_type == "framework_container":
+        return fw_result
+
+    # --- Step 2: Compound verb detection ---
+    if _is_compound_obligation(action_raw, obligation_text):
+        return RoutingResult(
+            routing_type="compound",
+            confidence=0.7,
+            reason="multiple_main_verbs",
+        )
+
+    # --- Step 3: Default = atomic ---
+    return RoutingResult(
+        routing_type="atomic",
+        confidence=0.9,
+        reason="single_action_single_object",
+    )
+
+
+def _detect_framework(
+    obligation_text: str, object_raw: str,
+) -> RoutingResult:
+    """Detect if obligation references a framework domain."""
+    combined = f"{obligation_text} {object_raw}"
+    registry = get_registry()
+    alias_idx = _build_alias_index(registry)
+
+    # Strategy 1: direct framework name match
+    m = _DIRECT_FRAMEWORK_RE.search(combined)
+    if m:
+        fw_name = m.group(0).strip()
+        fw_id = _resolve_framework_id(fw_name, alias_idx, registry)
+        if fw_id:
+            domain_id, domain_title = _match_domain(
+                combined, registry[fw_id],
+            )
+            return RoutingResult(
+                routing_type="framework_container",
+                framework_ref=fw_id,
+                framework_domain=domain_id,
+                domain_title=domain_title,
+                confidence=0.95 if domain_id else 0.75,
+                reason=f"direct_framework_match:{fw_name}",
+            )
+        else:
+            # Framework name recognized but not in registry
+            return RoutingResult(
+                routing_type="framework_container",
+                framework_ref=None,
+                framework_domain=None,
+                confidence=0.6,
+                reason=f"direct_framework_match_no_registry:{fw_name}",
+            )
+
+    # Strategy 2: pattern match ("Praktiken fuer X")
+    m2 = _FRAMEWORK_PATTERN.search(combined)
+    if m2:
+        ref_text = m2.group(1).strip()
+        fw_id, domain_id, domain_title = _resolve_from_ref_text(
+            ref_text, registry, alias_idx,
+        )
+        if fw_id:
+            return RoutingResult(
+                routing_type="framework_container",
+                framework_ref=fw_id,
+                framework_domain=domain_id,
+                domain_title=domain_title,
+                confidence=0.85 if domain_id else 0.65,
+                reason=f"pattern_match:{ref_text}",
+            )
+
+    # Strategy 3: keyword-heavy object
+    if _has_framework_keywords(object_raw):
+        return RoutingResult(
+            routing_type="framework_container",
+            framework_ref=None,
+            framework_domain=None,
+            confidence=0.5,
+            reason="framework_keywords_in_object",
+        )
+
+    return RoutingResult(routing_type="atomic", confidence=0.0)
+
+
+def _resolve_framework_id(
+    name: str,
+    alias_idx: dict[str, str],
+    registry: dict[str, dict],
+) -> Optional[str]:
+    """Resolve a framework name to its registry ID."""
+    normalized = re.sub(r"\s+", " ", name.strip().lower())
+    # Direct alias match
+    if normalized in alias_idx:
+        return alias_idx[normalized]
+    # Try compact form (strip spaces, hyphens, underscores)
+    compact = re.sub(r"[\s_\-]+", "", normalized)
+    for alias, fw_id in alias_idx.items():
+        if re.sub(r"[\s_\-]+", "", alias) == compact:
+            return fw_id
+    # Substring match in display names
+    for fw_id, fw in registry.items():
+        display = fw.get("display_name", "").lower()
+        if normalized in display or display in normalized:
+            return fw_id
+    # Partial match: check if normalized contains any alias (for multi-word refs)
+    for alias, fw_id in alias_idx.items():
+        if len(alias) >= 4 and alias in normalized:
+            return fw_id
+    return None
+
+
+def _match_domain(
+    text: str, framework: dict,
+) -> tuple[Optional[str], Optional[str]]:
+    """Match a domain within a framework from text references."""
+    text_lower = text.lower()
+    best_id: Optional[str] = None
+    best_title: Optional[str] = None
+    best_score = 0
+
+    for domain in framework.get("domains", []):
+        score = 0
+        domain_id = domain["domain_id"]
+        title = domain.get("title", "")
+
+        # Exact domain ID match (e.g. "AIS")
+        if re.search(rf"\b{re.escape(domain_id)}\b", text, re.IGNORECASE):
+            score += 10
+
+        # Full title match
+        if title.lower() in text_lower:
+            score += 8
+
+        # Alias match
+        for alias in domain.get("aliases", []):
+            if alias.lower() in text_lower:
+                score += 6
+                break
+
+        # Keyword overlap
+        kw_hits = sum(
+            1 for kw in domain.get("keywords", [])
+            if kw.lower() in text_lower
+        )
+        score += kw_hits
+
+        if score > best_score:
+            best_score = score
+            best_id = domain_id
+            best_title = title
+
+    if best_score >= 3:
+        return best_id, best_title
+    return None, None
+
+
+def _resolve_from_ref_text(
+    ref_text: str,
+    registry: dict[str, dict],
+    alias_idx: dict[str, str],
+) -> tuple[Optional[str], Optional[str], Optional[str]]:
+    """Resolve framework + domain from a reference text like 'AIS' or 'Application Security'."""
+    ref_lower = ref_text.lower()
+
+    for fw_id, fw in registry.items():
+        for domain in fw.get("domains", []):
+            # Check domain ID
+            if domain["domain_id"].lower() in ref_lower:
+                return fw_id, domain["domain_id"], domain.get("title")
+            # Check title
+            if domain.get("title", "").lower() in ref_lower:
+                return fw_id, domain["domain_id"], domain.get("title")
+            # Check aliases
+            for alias in domain.get("aliases", []):
+                if alias.lower() in ref_lower or ref_lower in alias.lower():
+                    return fw_id, domain["domain_id"], domain.get("title")
+
+    return None, None, None
+
+
+_FRAMEWORK_KW_SET = {
+    "praktiken", "kontrollen", "massnahmen", "maßnahmen",
+    "anforderungen", "vorgaben", "framework", "standard",
+    "baseline", "katalog", "domain", "family", "category",
+    "practices", "controls", "measures", "requirements",
+}
+
+
+def _has_framework_keywords(text: str) -> bool:
+    """Check if text contains framework-indicator keywords."""
+    words = set(re.findall(r"[a-zäöüß]+", text.lower()))
+    return len(words & _FRAMEWORK_KW_SET) >= 2
+
+
+def _is_compound_obligation(action_raw: str, obligation_text: str) -> bool:
+    """Detect if the obligation has multiple competing main verbs."""
+    if not action_raw:
+        return False
+
+    action_lower = action_raw.lower().strip()
+
+    # Check no-split phrases first
+    for phrase in _NO_SPLIT_PHRASES:
+        if phrase in action_lower:
+            return False
+
+    # Must have a conjunction
+    if not _COMPOUND_VERB_RE.search(action_lower):
+        return False
+
+    # Split by conjunctions and check if we get 2+ meaningful verbs
+    parts = re.split(r"\b(?:und|sowie|als\s+auch|or|and)\b", action_lower)
+    meaningful = [p.strip() for p in parts if len(p.strip()) >= 3]
+    return len(meaningful) >= 2
+
+
+# ---------------------------------------------------------------------------
+# Framework Decomposition
+# ---------------------------------------------------------------------------
+
+@dataclass
+class DecomposedObligation:
+    """A concrete obligation derived from a framework container."""
+    obligation_candidate_id: str
+    parent_control_id: str
+    parent_framework_container_id: str
+    source_ref_law: str
+    source_ref_article: str
+    obligation_text: str
+    actor: str
+    action_raw: str
+    object_raw: str
+    condition_raw: Optional[str] = None
+    trigger_raw: Optional[str] = None
+    routing_type: str = "atomic"
+    release_state: str = "decomposed"
+    subcontrol_id: str = ""
+    # Metadata
+    action_hint: str = ""
+    object_hint: str = ""
+    object_class: str = ""
+    keywords: list[str] = field(default_factory=list)
+
+
+@dataclass
+class FrameworkDecompositionResult:
+    """Result of framework decomposition."""
+    framework_container_id: str
+    source_obligation_candidate_id: str
+    framework_ref: Optional[str]
+    framework_domain: Optional[str]
+    domain_title: Optional[str]
+    matched_subcontrols: list[str]
+    decomposition_confidence: float
+    release_state: str  # decomposed | unmatched | error
+    decomposed_obligations: list[DecomposedObligation]
+    issues: list[str]
+
+
+def decompose_framework_container(
+    obligation_candidate_id: str,
+    parent_control_id: str,
+    obligation_text: str,
+    framework_ref: Optional[str],
+    framework_domain: Optional[str],
+    actor: str = "organization",
+) -> FrameworkDecompositionResult:
+    """Decompose a framework-container obligation into concrete sub-obligations.
+
+    Steps:
+    1. Resolve framework from registry
+    2. Resolve domain within framework
+    3. Select relevant subcontrols (keyword filter or full domain)
+    4. Generate decomposed obligations
+    """
+    container_id = f"FWC-{uuid.uuid4().hex[:8]}"
+    registry = get_registry()
+    issues: list[str] = []
+
+    # Step 1: Resolve framework
+    fw = None
+    if framework_ref and framework_ref in registry:
+        fw = registry[framework_ref]
+    else:
+        # Try to find by name in text
+        fw, framework_ref = _find_framework_in_text(obligation_text, registry)
+
+    if not fw:
+        issues.append("ERROR: framework_not_matched")
+        return FrameworkDecompositionResult(
+            framework_container_id=container_id,
+            source_obligation_candidate_id=obligation_candidate_id,
+            framework_ref=framework_ref,
+            framework_domain=framework_domain,
+            domain_title=None,
+            matched_subcontrols=[],
+            decomposition_confidence=0.0,
+            release_state="unmatched",
+            decomposed_obligations=[],
+            issues=issues,
+        )
+
+    # Step 2: Resolve domain
+    domain_data = None
+    domain_title = None
+    if framework_domain:
+        for d in fw.get("domains", []):
+            if d["domain_id"].lower() == framework_domain.lower():
+                domain_data = d
+                domain_title = d.get("title")
+                break
+    if not domain_data:
+        # Try matching from text
+        domain_id, domain_title = _match_domain(obligation_text, fw)
+        if domain_id:
+            for d in fw.get("domains", []):
+                if d["domain_id"] == domain_id:
+                    domain_data = d
+                    framework_domain = domain_id
+                    break
+
+    if not domain_data:
+        issues.append("WARN: domain_not_matched — using all domains")
+        # Fall back to all subcontrols across all domains
+        all_subcontrols = []
+        for d in fw.get("domains", []):
+            for sc in d.get("subcontrols", []):
+                sc["_domain_id"] = d["domain_id"]
+                all_subcontrols.append(sc)
+        subcontrols = _select_subcontrols(obligation_text, all_subcontrols)
+        if not subcontrols:
+            issues.append("ERROR: no_subcontrols_matched")
+            return FrameworkDecompositionResult(
+                framework_container_id=container_id,
+                source_obligation_candidate_id=obligation_candidate_id,
+                framework_ref=framework_ref,
+                framework_domain=framework_domain,
+                domain_title=None,
+                matched_subcontrols=[],
+                decomposition_confidence=0.0,
+                release_state="unmatched",
+                decomposed_obligations=[],
+                issues=issues,
+            )
+    else:
+        # Step 3: Select subcontrols from domain
+        raw_subcontrols = domain_data.get("subcontrols", [])
+        subcontrols = _select_subcontrols(obligation_text, raw_subcontrols)
+        if not subcontrols:
+            # Full domain decomposition
+            subcontrols = raw_subcontrols
+
+    # Quality check: too many subcontrols
+    if len(subcontrols) > 25:
+        issues.append(f"WARN: {len(subcontrols)} subcontrols — may be too broad")
+
+    # Step 4: Generate decomposed obligations
+    display_name = fw.get("display_name", framework_ref or "Unknown")
+    decomposed: list[DecomposedObligation] = []
+    matched_ids: list[str] = []
+
+    for sc in subcontrols:
+        sc_id = sc.get("subcontrol_id", "")
+        matched_ids.append(sc_id)
+
+        action_hint = sc.get("action_hint", "")
+        object_hint = sc.get("object_hint", "")
+
+        # Quality warnings
+        if not action_hint:
+            issues.append(f"WARN: {sc_id} missing action_hint")
+        if not object_hint:
+            issues.append(f"WARN: {sc_id} missing object_hint")
+
+        obl_id = f"{obligation_candidate_id}-{sc_id}"
+
+        decomposed.append(DecomposedObligation(
+            obligation_candidate_id=obl_id,
+            parent_control_id=parent_control_id,
+            parent_framework_container_id=container_id,
+            source_ref_law=display_name,
+            source_ref_article=sc_id,
+            obligation_text=sc.get("statement", ""),
+            actor=actor,
+            action_raw=action_hint or _infer_action(sc.get("statement", "")),
+            object_raw=object_hint or _infer_object(sc.get("statement", "")),
+            routing_type="atomic",
+            release_state="decomposed",
+            subcontrol_id=sc_id,
+            action_hint=action_hint,
+            object_hint=object_hint,
+            object_class=sc.get("object_class", ""),
+            keywords=sc.get("keywords", []),
+        ))
+
+    # Check if decomposed are identical to container
+    for d in decomposed:
+        if d.obligation_text.strip() == obligation_text.strip():
+            issues.append(f"WARN: {d.subcontrol_id} identical to container text")
+
+    confidence = _compute_decomposition_confidence(
+        framework_ref, framework_domain, domain_data, len(subcontrols), issues,
+    )
+
+    return FrameworkDecompositionResult(
+        framework_container_id=container_id,
+        source_obligation_candidate_id=obligation_candidate_id,
+        framework_ref=framework_ref,
+        framework_domain=framework_domain,
+        domain_title=domain_title,
+        matched_subcontrols=matched_ids,
+        decomposition_confidence=confidence,
+        release_state="decomposed",
+        decomposed_obligations=decomposed,
+        issues=issues,
+    )
+
+
+def _find_framework_in_text(
+    text: str, registry: dict[str, dict],
+) -> tuple[Optional[dict], Optional[str]]:
+    """Try to find a framework by searching text for known names."""
+    alias_idx = _build_alias_index(registry)
+    m = _DIRECT_FRAMEWORK_RE.search(text)
+    if m:
+        fw_id = _resolve_framework_id(m.group(0), alias_idx, registry)
+        if fw_id and fw_id in registry:
+            return registry[fw_id], fw_id
+    return None, None
+
+
+def _select_subcontrols(
+    obligation_text: str, subcontrols: list[dict],
+) -> list[dict]:
+    """Select relevant subcontrols based on keyword matching.
+
+    Returns empty list if no targeted match found (caller falls back to
+    full domain).
+    """
+    text_lower = obligation_text.lower()
+    scored: list[tuple[int, dict]] = []
+
+    for sc in subcontrols:
+        score = 0
+        for kw in sc.get("keywords", []):
+            if kw.lower() in text_lower:
+                score += 1
+        # Title match
+        title = sc.get("title", "").lower()
+        if title and title in text_lower:
+            score += 3
+        # Object hint in text
+        obj = sc.get("object_hint", "").lower()
+        if obj and obj in text_lower:
+            score += 2
+
+        if score > 0:
+            scored.append((score, sc))
+
+    if not scored:
+        return []
+
+    # Only return those with meaningful overlap (score >= 2)
+    scored.sort(key=lambda x: x[0], reverse=True)
+    return [sc for score, sc in scored if score >= 2]
+
+
+def _infer_action(statement: str) -> str:
+    """Infer a basic action verb from a statement."""
+    s = statement.lower()
+    if any(w in s for w in ["definiert", "definieren", "define"]):
+        return "definieren"
+    if any(w in s for w in ["implementiert", "implementieren", "implement"]):
+        return "implementieren"
+    if any(w in s for w in ["dokumentiert", "dokumentieren", "document"]):
+        return "dokumentieren"
+    if any(w in s for w in ["ueberwacht", "ueberwachen", "monitor"]):
+        return "ueberwachen"
+    if any(w in s for w in ["getestet", "testen", "test"]):
+        return "testen"
+    if any(w in s for w in ["geschuetzt", "schuetzen", "protect"]):
+        return "implementieren"
+    if any(w in s for w in ["verwaltet", "verwalten", "manage"]):
+        return "pflegen"
+    if any(w in s for w in ["gemeldet", "melden", "report"]):
+        return "melden"
+    return "implementieren"
+
+
+def _infer_object(statement: str) -> str:
+    """Infer the primary object from a statement (first noun phrase)."""
+    # Simple heuristic: take the text after "muessen"/"muss" up to the verb
+    m = re.search(
+        r"(?:muessen|muss|m(?:ü|ue)ssen)\s+(.+?)(?:\s+werden|\s+sein|\.|,|$)",
+        statement,
+        re.IGNORECASE,
+    )
+    if m:
+        return m.group(1).strip()[:80]
+    # Fallback: first 80 chars
+    return statement[:80] if statement else ""
+
+
+def _compute_decomposition_confidence(
+    framework_ref: Optional[str],
+    domain: Optional[str],
+    domain_data: Optional[dict],
+    num_subcontrols: int,
+    issues: list[str],
+) -> float:
+    """Compute confidence score for the decomposition."""
+    score = 0.3
+    if framework_ref:
+        score += 0.25
+    if domain:
+        score += 0.20
+    if domain_data:
+        score += 0.10
+    if 1 <= num_subcontrols <= 15:
+        score += 0.10
+    elif num_subcontrols > 15:
+        score += 0.05  # less confident with too many
+
+    # Penalize errors
+    errors = sum(1 for i in issues if i.startswith("ERROR:"))
+    score -= errors * 0.15
+    return round(max(min(score, 1.0), 0.0), 2)
+
+
+# ---------------------------------------------------------------------------
+# Registry statistics (for admin/debugging)
+# ---------------------------------------------------------------------------
+
+def registry_stats() -> dict:
+    """Return summary statistics about the loaded registry."""
+    reg = get_registry()
+    stats = {
+        "frameworks": len(reg),
+        "details": [],
+    }
+    total_domains = 0
+    total_subcontrols = 0
+    for fw_id, fw in reg.items():
+        domains = fw.get("domains", [])
+        n_sc = sum(len(d.get("subcontrols", [])) for d in domains)
+        total_domains += len(domains)
+        total_subcontrols += n_sc
+        stats["details"].append({
+            "framework_id": fw_id,
+            "display_name": fw.get("display_name", ""),
+            "domains": len(domains),
+            "subcontrols": n_sc,
+        })
+    stats["total_domains"] = total_domains
+    stats["total_subcontrols"] = total_subcontrols
+    return stats
--- a/backend-compliance/compliance/services/llm_provider.py
+++ b/backend-compliance/compliance/services/llm_provider.py
@@ -173,6 +173,7 @@ class LLMProviderType(str, Enum):
    """Supported LLM provider types."""
    ANTHROPIC = "anthropic"
    SELF_HOSTED = "self_hosted"
+    OLLAMA = "ollama"  # Alias for self_hosted (Ollama-specific)
    MOCK = "mock"  # For testing


@@ -392,6 +393,7 @@ class SelfHostedProvider(LLMProvider):
                "model": self.model,
                "prompt": full_prompt,
                "stream": False,
+                "think": False,  # Disable thinking mode (qwen3.5 etc.)
                "options": {}
            }

@@ -549,7 +551,7 @@ def get_llm_config() -> LLMConfig:
            vault_path="breakpilot/api_keys/anthropic",
            env_var="ANTHROPIC_API_KEY"
        )
-    elif provider_type == LLMProviderType.SELF_HOSTED:
+    elif provider_type in (LLMProviderType.SELF_HOSTED, LLMProviderType.OLLAMA):
        api_key = get_secret_from_vault_or_env(
            vault_path="breakpilot/api_keys/self_hosted_llm",
            env_var="SELF_HOSTED_LLM_KEY"
@@ -558,7 +560,7 @@ def get_llm_config() -> LLMConfig:
    # Select model based on provider type
    if provider_type == LLMProviderType.ANTHROPIC:
        model = os.getenv("ANTHROPIC_MODEL", "claude-sonnet-4-20250514")
-    elif provider_type == LLMProviderType.SELF_HOSTED:
+    elif provider_type in (LLMProviderType.SELF_HOSTED, LLMProviderType.OLLAMA):
        model = os.getenv("SELF_HOSTED_LLM_MODEL", "qwen2.5:14b")
    else:
        model = "mock-model"
@@ -591,7 +593,7 @@ def get_llm_provider(config: Optional[LLMConfig] = None) -> LLMProvider:
            return MockProvider(config)
        return AnthropicProvider(config)

-    elif config.provider_type == LLMProviderType.SELF_HOSTED:
+    elif config.provider_type in (LLMProviderType.SELF_HOSTED, LLMProviderType.OLLAMA):
        if not config.base_url:
            logger.warning("No self-hosted LLM URL found, using mock provider")
            return MockProvider(config)
--- a/backend-compliance/compliance/services/normative_patterns.py
+++ b/backend-compliance/compliance/services/normative_patterns.py
@@ -0,0 +1,59 @@
+"""Shared normative language patterns for assertion classification.
+
+Extracted from decomposition_pass.py for reuse in the assertion engine.
+"""
+
+import re
+
+_PFLICHT_SIGNALS = [
+    r"\bmüssen\b", r"\bmuss\b", r"\bhat\s+sicherzustellen\b",
+    r"\bhaben\s+sicherzustellen\b", r"\bsind\s+verpflichtet\b",
+    r"\bist\s+verpflichtet\b",
+    r"\bist\s+zu\s+\w+en\b", r"\bsind\s+zu\s+\w+en\b",
+    r"\bhat\s+zu\s+\w+en\b", r"\bhaben\s+zu\s+\w+en\b",
+    r"\bist\s+\w+zu\w+en\b", r"\bsind\s+\w+zu\w+en\b",
+    r"\bist\s+\w+\s+zu\s+\w+en\b", r"\bsind\s+\w+\s+zu\s+\w+en\b",
+    r"\bhat\s+\w+\s+zu\s+\w+en\b", r"\bhaben\s+\w+\s+zu\s+\w+en\b",
+    r"\bshall\b", r"\bmust\b", r"\brequired\b",
+    r"\b\w+zuteilen\b", r"\b\w+zuwenden\b", r"\b\w+zustellen\b", r"\b\w+zulegen\b",
+    r"\b\w+zunehmen\b", r"\b\w+zuführen\b", r"\b\w+zuhalten\b", r"\b\w+zusetzen\b",
+    r"\b\w+zuweisen\b", r"\b\w+zuordnen\b", r"\b\w+zufügen\b", r"\b\w+zugeben\b",
+    r"\bist\b.{1,80}\bzu\s+\w+en\b", r"\bsind\b.{1,80}\bzu\s+\w+en\b",
+]
+PFLICHT_RE = re.compile("|".join(_PFLICHT_SIGNALS), re.IGNORECASE)
+
+_EMPFEHLUNG_SIGNALS = [
+    r"\bsoll\b", r"\bsollen\b", r"\bsollte\b", r"\bsollten\b",
+    r"\bgewährleisten\b", r"\bsicherstellen\b",
+    r"\bshould\b", r"\bensure\b", r"\brecommend\w*\b",
+    r"\bnachweisen\b", r"\beinhalten\b", r"\bunterlassen\b", r"\bwahren\b",
+    r"\bdokumentieren\b", r"\bimplementieren\b", r"\büberprüfen\b", r"\büberwachen\b",
+    r"\bprüfen,\s+ob\b", r"\bkontrollieren,\s+ob\b",
+]
+EMPFEHLUNG_RE = re.compile("|".join(_EMPFEHLUNG_SIGNALS), re.IGNORECASE)
+
+_KANN_SIGNALS = [
+    r"\bkann\b", r"\bkönnen\b", r"\bdarf\b", r"\bdürfen\b",
+    r"\bmay\b", r"\boptional\b",
+]
+KANN_RE = re.compile("|".join(_KANN_SIGNALS), re.IGNORECASE)
+
+NORMATIVE_RE = re.compile(
+    "|".join(_PFLICHT_SIGNALS + _EMPFEHLUNG_SIGNALS + _KANN_SIGNALS),
+    re.IGNORECASE,
+)
+
+_RATIONALE_SIGNALS = [
+    r"\bda\s+", r"\bweil\b", r"\bgrund\b", r"\berwägung",
+    r"\bbecause\b", r"\breason\b", r"\brationale\b",
+    r"\bkönnen\s+.*\s+verursachen\b", r"\bführt\s+zu\b",
+]
+RATIONALE_RE = re.compile("|".join(_RATIONALE_SIGNALS), re.IGNORECASE)
+
+# Evidence-related keywords (for fact detection)
+_EVIDENCE_KEYWORDS = [
+    r"\bnachweis\b", r"\bzertifikat\b", r"\baudit.report\b",
+    r"\bprotokoll\b", r"\bdokumentation\b", r"\bbericht\b",
+    r"\bcertificate\b", r"\bevidence\b", r"\bproof\b",
+]
+EVIDENCE_RE = re.compile("|".join(_EVIDENCE_KEYWORDS), re.IGNORECASE)
--- a/backend-compliance/compliance/services/obligation_extractor.py
+++ b/backend-compliance/compliance/services/obligation_extractor.py
@@ -0,0 +1,563 @@
+"""Obligation Extractor — 3-Tier Chunk-to-Obligation Linking.
+
+Maps RAG chunks to obligations from the v2 obligation framework using
+three tiers (fastest first):
+
+    Tier 1: EXACT MATCH  — regulation_code + article → obligation_id  (~40%)
+    Tier 2: EMBEDDING    — chunk text vs. obligation descriptions     (~30%)
+    Tier 3: LLM EXTRACT  — local Ollama extracts obligation text      (~25%)
+
+Part of the Multi-Layer Control Architecture (Phase 4 of 8).
+"""
+
+import json
+import logging
+import os
+import re
+from dataclasses import dataclass, field
+from pathlib import Path
+from typing import Optional
+
+import httpx
+
+logger = logging.getLogger(__name__)
+
+EMBEDDING_URL = os.getenv("EMBEDDING_URL", "http://embedding-service:8087")
+OLLAMA_URL = os.getenv("OLLAMA_URL", "http://host.docker.internal:11434")
+OLLAMA_MODEL = os.getenv("CONTROL_GEN_OLLAMA_MODEL", "qwen3.5:35b-a3b")
+LLM_TIMEOUT = float(os.getenv("CONTROL_GEN_LLM_TIMEOUT", "180"))
+
+# Embedding similarity thresholds for Tier 2
+EMBEDDING_MATCH_THRESHOLD = 0.80
+EMBEDDING_CANDIDATE_THRESHOLD = 0.60
+
+# ---------------------------------------------------------------------------
+# Regulation code mapping: RAG chunk codes → obligation file regulation IDs
+# ---------------------------------------------------------------------------
+
+_REGULATION_CODE_TO_ID = {
+    # DSGVO
+    "eu_2016_679": "dsgvo",
+    "dsgvo": "dsgvo",
+    "gdpr": "dsgvo",
+    # AI Act
+    "eu_2024_1689": "ai_act",
+    "ai_act": "ai_act",
+    "aiact": "ai_act",
+    # NIS2
+    "eu_2022_2555": "nis2",
+    "nis2": "nis2",
+    "bsig": "nis2",
+    # BDSG
+    "bdsg": "bdsg",
+    # TTDSG
+    "ttdsg": "ttdsg",
+    # DSA
+    "eu_2022_2065": "dsa",
+    "dsa": "dsa",
+    # Data Act
+    "eu_2023_2854": "data_act",
+    "data_act": "data_act",
+    # EU Machinery
+    "eu_2023_1230": "eu_machinery",
+    "eu_machinery": "eu_machinery",
+    # DORA
+    "eu_2022_2554": "dora",
+    "dora": "dora",
+}
+
+
+@dataclass
+class ObligationMatch:
+    """Result of obligation extraction."""
+
+    obligation_id: Optional[str] = None
+    obligation_title: Optional[str] = None
+    obligation_text: Optional[str] = None
+    method: str = "none"  # exact_match | embedding_match | llm_extracted | inferred
+    confidence: float = 0.0
+    regulation_id: Optional[str] = None  # e.g. "dsgvo"
+
+    def to_dict(self) -> dict:
+        return {
+            "obligation_id": self.obligation_id,
+            "obligation_title": self.obligation_title,
+            "obligation_text": self.obligation_text,
+            "method": self.method,
+            "confidence": self.confidence,
+            "regulation_id": self.regulation_id,
+        }
+
+
+@dataclass
+class _ObligationEntry:
+    """Internal representation of a loaded obligation."""
+
+    id: str
+    title: str
+    description: str
+    regulation_id: str
+    articles: list[str] = field(default_factory=list)  # normalized: ["art. 30", "§ 38"]
+    embedding: list[float] = field(default_factory=list)
+
+
+class ObligationExtractor:
+    """3-Tier obligation extraction from RAG chunks.
+
+    Usage::
+
+        extractor = ObligationExtractor()
+        await extractor.initialize()  # loads obligations + embeddings
+
+        match = await extractor.extract(
+            chunk_text="...",
+            regulation_code="eu_2016_679",
+            article="Art. 30",
+            paragraph="Abs. 1",
+        )
+    """
+
+    def __init__(self):
+        self._article_lookup: dict[str, list[str]] = {}  # "dsgvo/art. 30" → ["DSGVO-OBL-001"]
+        self._obligations: dict[str, _ObligationEntry] = {}  # id → entry
+        self._obligation_embeddings: list[list[float]] = []
+        self._obligation_ids: list[str] = []
+        self._initialized = False
+
+    async def initialize(self) -> None:
+        """Load all obligations from v2 JSON files and compute embeddings."""
+        if self._initialized:
+            return
+
+        self._load_obligations()
+        await self._compute_embeddings()
+        self._initialized = True
+        logger.info(
+            "ObligationExtractor initialized: %d obligations, %d article lookups, %d embeddings",
+            len(self._obligations),
+            len(self._article_lookup),
+            sum(1 for e in self._obligation_embeddings if e),
+        )
+
+    async def extract(
+        self,
+        chunk_text: str,
+        regulation_code: str,
+        article: Optional[str] = None,
+        paragraph: Optional[str] = None,
+    ) -> ObligationMatch:
+        """Extract obligation from a chunk using 3-tier strategy."""
+        if not self._initialized:
+            await self.initialize()
+
+        reg_id = _normalize_regulation(regulation_code)
+
+        # Tier 1: Exact match via article lookup
+        if article:
+            match = self._tier1_exact(reg_id, article)
+            if match:
+                return match
+
+        # Tier 2: Embedding similarity
+        match = await self._tier2_embedding(chunk_text, reg_id)
+        if match:
+            return match
+
+        # Tier 3: LLM extraction
+        match = await self._tier3_llm(chunk_text, regulation_code, article)
+        return match
+
+    # -----------------------------------------------------------------------
+    # Tier 1: Exact Match
+    # -----------------------------------------------------------------------
+
+    def _tier1_exact(self, reg_id: Optional[str], article: str) -> Optional[ObligationMatch]:
+        """Look up obligation by regulation + article."""
+        if not reg_id:
+            return None
+
+        norm_article = _normalize_article(article)
+        key = f"{reg_id}/{norm_article}"
+
+        obl_ids = self._article_lookup.get(key)
+        if not obl_ids:
+            return None
+
+        # Take the first match (highest priority)
+        obl_id = obl_ids[0]
+        entry = self._obligations.get(obl_id)
+        if not entry:
+            return None
+
+        return ObligationMatch(
+            obligation_id=entry.id,
+            obligation_title=entry.title,
+            obligation_text=entry.description,
+            method="exact_match",
+            confidence=1.0,
+            regulation_id=reg_id,
+        )
+
+    # -----------------------------------------------------------------------
+    # Tier 2: Embedding Match
+    # -----------------------------------------------------------------------
+
+    async def _tier2_embedding(
+        self, chunk_text: str, reg_id: Optional[str]
+    ) -> Optional[ObligationMatch]:
+        """Find nearest obligation by embedding similarity."""
+        if not self._obligation_embeddings:
+            return None
+
+        chunk_embedding = await _get_embedding(chunk_text[:2000])
+        if not chunk_embedding:
+            return None
+
+        best_idx = -1
+        best_score = 0.0
+
+        for i, obl_emb in enumerate(self._obligation_embeddings):
+            if not obl_emb:
+                continue
+            # Prefer same-regulation matches
+            obl_id = self._obligation_ids[i]
+            entry = self._obligations.get(obl_id)
+            score = _cosine_sim(chunk_embedding, obl_emb)
+
+            # Domain bonus: +0.05 if same regulation
+            if entry and reg_id and entry.regulation_id == reg_id:
+                score += 0.05
+
+            if score > best_score:
+                best_score = score
+                best_idx = i
+
+        if best_idx < 0:
+            return None
+
+        # Remove domain bonus for threshold comparison
+        raw_score = best_score
+        obl_id = self._obligation_ids[best_idx]
+        entry = self._obligations.get(obl_id)
+        if entry and reg_id and entry.regulation_id == reg_id:
+            raw_score -= 0.05
+
+        if raw_score >= EMBEDDING_MATCH_THRESHOLD:
+            return ObligationMatch(
+                obligation_id=entry.id if entry else obl_id,
+                obligation_title=entry.title if entry else None,
+                obligation_text=entry.description if entry else None,
+                method="embedding_match",
+                confidence=round(min(raw_score, 1.0), 3),
+                regulation_id=entry.regulation_id if entry else reg_id,
+            )
+
+        return None
+
+    # -----------------------------------------------------------------------
+    # Tier 3: LLM Extraction
+    # -----------------------------------------------------------------------
+
+    async def _tier3_llm(
+        self, chunk_text: str, regulation_code: str, article: Optional[str]
+    ) -> ObligationMatch:
+        """Use local LLM to extract the obligation from the chunk."""
+        prompt = f"""Analysiere den folgenden Gesetzestext und extrahiere die zentrale rechtliche Pflicht.
+
+Text:
+{chunk_text[:3000]}
+
+Quelle: {regulation_code} {article or ''}
+
+Antworte NUR als JSON:
+{{
+  "obligation_text": "Die zentrale Pflicht in einem Satz",
+  "actor": "Wer muss handeln (z.B. Verantwortlicher, Auftragsverarbeiter)",
+  "action": "Was muss getan werden",
+  "normative_strength": "muss|soll|kann"
+}}"""
+
+        system_prompt = (
+            "Du bist ein Rechtsexperte fuer EU-Datenschutz- und Digitalrecht. "
+            "Extrahiere die zentrale rechtliche Pflicht aus Gesetzestexten. "
+            "Antworte ausschliesslich als JSON."
+        )
+
+        result_text = await _llm_ollama(prompt, system_prompt)
+        if not result_text:
+            return ObligationMatch(
+                method="llm_extracted",
+                confidence=0.0,
+                regulation_id=_normalize_regulation(regulation_code),
+            )
+
+        parsed = _parse_json(result_text)
+        obligation_text = parsed.get("obligation_text", result_text[:500])
+
+        return ObligationMatch(
+            obligation_id=None,
+            obligation_title=None,
+            obligation_text=obligation_text,
+            method="llm_extracted",
+            confidence=0.60,
+            regulation_id=_normalize_regulation(regulation_code),
+        )
+
+    # -----------------------------------------------------------------------
+    # Initialization helpers
+    # -----------------------------------------------------------------------
+
+    def _load_obligations(self) -> None:
+        """Load all obligation files from v2 framework."""
+        v2_dir = _find_obligations_dir()
+        if not v2_dir:
+            logger.warning("Obligations v2 directory not found — Tier 1 disabled")
+            return
+
+        manifest_path = v2_dir / "_manifest.json"
+        if not manifest_path.exists():
+            logger.warning("Manifest not found at %s", manifest_path)
+            return
+
+        with open(manifest_path) as f:
+            manifest = json.load(f)
+
+        for reg_info in manifest.get("regulations", []):
+            reg_id = reg_info["id"]
+            reg_file = v2_dir / reg_info["file"]
+            if not reg_file.exists():
+                logger.warning("Regulation file not found: %s", reg_file)
+                continue
+
+            with open(reg_file) as f:
+                data = json.load(f)
+
+            for obl in data.get("obligations", []):
+                obl_id = obl["id"]
+                entry = _ObligationEntry(
+                    id=obl_id,
+                    title=obl.get("title", ""),
+                    description=obl.get("description", ""),
+                    regulation_id=reg_id,
+                )
+
+                # Build article lookup from legal_basis
+                for basis in obl.get("legal_basis", []):
+                    article_raw = basis.get("article", "")
+                    if article_raw:
+                        norm_art = _normalize_article(article_raw)
+                        key = f"{reg_id}/{norm_art}"
+                        if key not in self._article_lookup:
+                            self._article_lookup[key] = []
+                        self._article_lookup[key].append(obl_id)
+                        entry.articles.append(norm_art)
+
+                self._obligations[obl_id] = entry
+
+        logger.info(
+            "Loaded %d obligations from %d regulations",
+            len(self._obligations),
+            len(manifest.get("regulations", [])),
+        )
+
+    async def _compute_embeddings(self) -> None:
+        """Compute embeddings for all obligation descriptions."""
+        if not self._obligations:
+            return
+
+        self._obligation_ids = list(self._obligations.keys())
+        texts = [
+            f"{self._obligations[oid].title}: {self._obligations[oid].description}"
+            for oid in self._obligation_ids
+        ]
+
+        logger.info("Computing embeddings for %d obligations...", len(texts))
+        self._obligation_embeddings = await _get_embeddings_batch(texts)
+        valid = sum(1 for e in self._obligation_embeddings if e)
+        logger.info("Got %d/%d valid embeddings", valid, len(texts))
+
+    # -----------------------------------------------------------------------
+    # Stats
+    # -----------------------------------------------------------------------
+
+    def stats(self) -> dict:
+        """Return initialization statistics."""
+        return {
+            "total_obligations": len(self._obligations),
+            "article_lookups": len(self._article_lookup),
+            "embeddings_valid": sum(1 for e in self._obligation_embeddings if e),
+            "regulations": list(
+                {e.regulation_id for e in self._obligations.values()}
+            ),
+            "initialized": self._initialized,
+        }
+
+
+# ---------------------------------------------------------------------------
+# Module-level helpers (reusable by other modules)
+# ---------------------------------------------------------------------------
+
+
+def _normalize_regulation(regulation_code: str) -> Optional[str]:
+    """Map a RAG regulation_code to obligation framework regulation ID."""
+    if not regulation_code:
+        return None
+    code = regulation_code.lower().strip()
+
+    # Direct lookup
+    if code in _REGULATION_CODE_TO_ID:
+        return _REGULATION_CODE_TO_ID[code]
+
+    # Prefix matching for families
+    for prefix, reg_id in [
+        ("eu_2016_679", "dsgvo"),
+        ("eu_2024_1689", "ai_act"),
+        ("eu_2022_2555", "nis2"),
+        ("eu_2022_2065", "dsa"),
+        ("eu_2023_2854", "data_act"),
+        ("eu_2023_1230", "eu_machinery"),
+        ("eu_2022_2554", "dora"),
+    ]:
+        if code.startswith(prefix):
+            return reg_id
+
+    return None
+
+
+def _normalize_article(article: str) -> str:
+    """Normalize article references for consistent lookup.
+
+    Examples:
+        "Art. 30"       → "art. 30"
+        "§ 38 BDSG"     → "§ 38"
+        "Article 10"    → "art. 10"
+        "Art. 30 Abs. 1" → "art. 30"
+        "Artikel 35"    → "art. 35"
+    """
+    if not article:
+        return ""
+    s = article.strip()
+
+    # Remove trailing law name: "§ 38 BDSG" → "§ 38"
+    s = re.sub(r"\s+(DSGVO|BDSG|TTDSG|DSA|NIS2|DORA|AI.?Act)\s*$", "", s, flags=re.IGNORECASE)
+
+    # Remove paragraph references: "Art. 30 Abs. 1" → "Art. 30"
+    s = re.sub(r"\s+(Abs|Absatz|para|paragraph|lit|Satz)\.?\s+.*$", "", s, flags=re.IGNORECASE)
+
+    # Normalize "Article" / "Artikel" → "Art."
+    s = re.sub(r"^(Article|Artikel)\s+", "Art. ", s, flags=re.IGNORECASE)
+
+    return s.lower().strip()
+
+
+def _cosine_sim(a: list[float], b: list[float]) -> float:
+    """Compute cosine similarity between two vectors."""
+    if not a or not b or len(a) != len(b):
+        return 0.0
+    dot = sum(x * y for x, y in zip(a, b))
+    norm_a = sum(x * x for x in a) ** 0.5
+    norm_b = sum(x * x for x in b) ** 0.5
+    if norm_a == 0 or norm_b == 0:
+        return 0.0
+    return dot / (norm_a * norm_b)
+
+
+def _find_obligations_dir() -> Optional[Path]:
+    """Locate the obligations v2 directory."""
+    candidates = [
+        Path(__file__).resolve().parent.parent.parent.parent
+        / "ai-compliance-sdk" / "policies" / "obligations" / "v2",
+        Path("/app/ai-compliance-sdk/policies/obligations/v2"),
+        Path("ai-compliance-sdk/policies/obligations/v2"),
+    ]
+    for p in candidates:
+        if p.is_dir() and (p / "_manifest.json").exists():
+            return p
+    return None
+
+
+async def _get_embedding(text: str) -> list[float]:
+    """Get embedding vector for a single text."""
+    try:
+        async with httpx.AsyncClient(timeout=10.0) as client:
+            resp = await client.post(
+                f"{EMBEDDING_URL}/embed",
+                json={"texts": [text]},
+            )
+            resp.raise_for_status()
+            embeddings = resp.json().get("embeddings", [])
+            return embeddings[0] if embeddings else []
+    except Exception:
+        return []
+
+
+async def _get_embeddings_batch(
+    texts: list[str], batch_size: int = 32
+) -> list[list[float]]:
+    """Get embeddings for multiple texts in batches."""
+    all_embeddings: list[list[float]] = []
+    for i in range(0, len(texts), batch_size):
+        batch = texts[i : i + batch_size]
+        try:
+            async with httpx.AsyncClient(timeout=30.0) as client:
+                resp = await client.post(
+                    f"{EMBEDDING_URL}/embed",
+                    json={"texts": batch},
+                )
+                resp.raise_for_status()
+                embeddings = resp.json().get("embeddings", [])
+                all_embeddings.extend(embeddings)
+        except Exception as e:
+            logger.warning("Batch embedding failed for %d texts: %s", len(batch), e)
+            all_embeddings.extend([[] for _ in batch])
+    return all_embeddings
+
+
+async def _llm_ollama(prompt: str, system_prompt: Optional[str] = None) -> str:
+    """Call local Ollama for LLM extraction."""
+    messages = []
+    if system_prompt:
+        messages.append({"role": "system", "content": system_prompt})
+    messages.append({"role": "user", "content": prompt})
+
+    payload = {
+        "model": OLLAMA_MODEL,
+        "messages": messages,
+        "stream": False,
+        "format": "json",
+        "options": {"num_predict": 512},
+        "think": False,
+    }
+
+    try:
+        async with httpx.AsyncClient(timeout=LLM_TIMEOUT) as client:
+            resp = await client.post(f"{OLLAMA_URL}/api/chat", json=payload)
+            if resp.status_code != 200:
+                logger.error(
+                    "Ollama chat failed %d: %s", resp.status_code, resp.text[:300]
+                )
+                return ""
+            data = resp.json()
+            return data.get("message", {}).get("content", "")
+    except Exception as e:
+        logger.warning("Ollama call failed: %s", e)
+        return ""
+
+
+def _parse_json(text: str) -> dict:
+    """Extract JSON from LLM response text."""
+    # Try direct parse
+    try:
+        return json.loads(text)
+    except json.JSONDecodeError:
+        pass
+
+    # Try extracting JSON block
+    match = re.search(r"\{[^{}]*\}", text, re.DOTALL)
+    if match:
+        try:
+            return json.loads(match.group())
+        except json.JSONDecodeError:
+            pass
+
+    return {}
--- a/backend-compliance/compliance/services/pattern_matcher.py
+++ b/backend-compliance/compliance/services/pattern_matcher.py
@@ -0,0 +1,532 @@
+"""Pattern Matcher — Obligation-to-Control-Pattern Linking.
+
+Maps obligations (from the ObligationExtractor) to control patterns
+using two tiers:
+
+    Tier 1: KEYWORD MATCH  — obligation_match_keywords from patterns  (~70%)
+    Tier 2: EMBEDDING      — cosine similarity with domain bonus      (~25%)
+
+Part of the Multi-Layer Control Architecture (Phase 5 of 8).
+"""
+
+import logging
+import os
+from dataclasses import dataclass, field
+from pathlib import Path
+from typing import Optional
+
+import yaml
+
+from compliance.services.obligation_extractor import (
+    _cosine_sim,
+    _get_embedding,
+    _get_embeddings_batch,
+)
+
+logger = logging.getLogger(__name__)
+
+# Minimum keyword score to accept a match (at least 2 keyword hits)
+KEYWORD_MATCH_MIN_HITS = 2
+# Embedding threshold for Tier 2
+EMBEDDING_PATTERN_THRESHOLD = 0.75
+# Domain bonus when regulation maps to the pattern's domain
+DOMAIN_BONUS = 0.10
+
+# Map regulation IDs to pattern domains that are likely relevant
+_REGULATION_DOMAIN_AFFINITY = {
+    "dsgvo": ["DATA", "COMP", "GOV"],
+    "bdsg": ["DATA", "COMP"],
+    "ttdsg": ["DATA"],
+    "ai_act": ["AI", "COMP", "DATA"],
+    "nis2": ["SEC", "INC", "NET", "LOG", "CRYP"],
+    "dsa": ["DATA", "COMP"],
+    "data_act": ["DATA", "COMP"],
+    "eu_machinery": ["SEC", "COMP"],
+    "dora": ["SEC", "INC", "FIN", "COMP"],
+}
+
+
+@dataclass
+class ControlPattern:
+    """Python representation of a control pattern from YAML."""
+
+    id: str
+    name: str
+    name_de: str
+    domain: str
+    category: str
+    description: str
+    objective_template: str
+    rationale_template: str
+    requirements_template: list[str] = field(default_factory=list)
+    test_procedure_template: list[str] = field(default_factory=list)
+    evidence_template: list[str] = field(default_factory=list)
+    severity_default: str = "medium"
+    implementation_effort_default: str = "m"
+    obligation_match_keywords: list[str] = field(default_factory=list)
+    tags: list[str] = field(default_factory=list)
+    composable_with: list[str] = field(default_factory=list)
+    open_anchor_refs: list[dict] = field(default_factory=list)
+
+
+@dataclass
+class PatternMatchResult:
+    """Result of pattern matching."""
+
+    pattern: Optional[ControlPattern] = None
+    pattern_id: Optional[str] = None
+    method: str = "none"  # keyword | embedding | combined | none
+    confidence: float = 0.0
+    keyword_hits: int = 0
+    total_keywords: int = 0
+    embedding_score: float = 0.0
+    domain_bonus_applied: bool = False
+    composable_patterns: list[str] = field(default_factory=list)
+
+    def to_dict(self) -> dict:
+        return {
+            "pattern_id": self.pattern_id,
+            "method": self.method,
+            "confidence": round(self.confidence, 3),
+            "keyword_hits": self.keyword_hits,
+            "total_keywords": self.total_keywords,
+            "embedding_score": round(self.embedding_score, 3),
+            "domain_bonus_applied": self.domain_bonus_applied,
+            "composable_patterns": self.composable_patterns,
+        }
+
+
+class PatternMatcher:
+    """Links obligations to control patterns using keyword + embedding matching.
+
+    Usage::
+
+        matcher = PatternMatcher()
+        await matcher.initialize()
+
+        result = await matcher.match(
+            obligation_text="Fuehrung eines Verarbeitungsverzeichnisses...",
+            regulation_id="dsgvo",
+        )
+        print(result.pattern_id)   # e.g. "CP-COMP-001"
+        print(result.confidence)   # e.g. 0.85
+    """
+
+    def __init__(self):
+        self._patterns: list[ControlPattern] = []
+        self._by_id: dict[str, ControlPattern] = {}
+        self._by_domain: dict[str, list[ControlPattern]] = {}
+        self._keyword_index: dict[str, list[str]] = {}  # keyword → [pattern_ids]
+        self._pattern_embeddings: list[list[float]] = []
+        self._pattern_ids: list[str] = []
+        self._initialized = False
+
+    async def initialize(self) -> None:
+        """Load patterns from YAML and compute embeddings."""
+        if self._initialized:
+            return
+
+        self._load_patterns()
+        self._build_keyword_index()
+        await self._compute_embeddings()
+        self._initialized = True
+        logger.info(
+            "PatternMatcher initialized: %d patterns, %d keywords, %d embeddings",
+            len(self._patterns),
+            len(self._keyword_index),
+            sum(1 for e in self._pattern_embeddings if e),
+        )
+
+    async def match(
+        self,
+        obligation_text: str,
+        regulation_id: Optional[str] = None,
+        top_n: int = 1,
+    ) -> PatternMatchResult:
+        """Match obligation text to the best control pattern.
+
+        Args:
+            obligation_text: The obligation description to match against.
+            regulation_id: Source regulation (for domain bonus).
+            top_n: Number of top results to consider for composability.
+
+        Returns:
+            PatternMatchResult with the best match.
+        """
+        if not self._initialized:
+            await self.initialize()
+
+        if not obligation_text or not self._patterns:
+            return PatternMatchResult()
+
+        # Tier 1: Keyword matching
+        keyword_result = self._tier1_keyword(obligation_text, regulation_id)
+
+        # Tier 2: Embedding matching
+        embedding_result = await self._tier2_embedding(obligation_text, regulation_id)
+
+        # Combine scores: prefer keyword match, boost with embedding if available
+        best = self._combine_results(keyword_result, embedding_result)
+
+        # Attach composable patterns
+        if best.pattern:
+            best.composable_patterns = [
+                pid for pid in best.pattern.composable_with
+                if pid in self._by_id
+            ]
+
+        return best
+
+    async def match_top_n(
+        self,
+        obligation_text: str,
+        regulation_id: Optional[str] = None,
+        n: int = 3,
+    ) -> list[PatternMatchResult]:
+        """Return top-N pattern matches sorted by confidence descending."""
+        if not self._initialized:
+            await self.initialize()
+
+        if not obligation_text or not self._patterns:
+            return []
+
+        keyword_scores = self._keyword_scores(obligation_text, regulation_id)
+        embedding_scores = await self._embedding_scores(obligation_text, regulation_id)
+
+        # Merge scores
+        all_pattern_ids = set(keyword_scores.keys()) | set(embedding_scores.keys())
+        results: list[PatternMatchResult] = []
+
+        for pid in all_pattern_ids:
+            pattern = self._by_id.get(pid)
+            if not pattern:
+                continue
+
+            kw_score = keyword_scores.get(pid, (0, 0, 0.0))  # (hits, total, score)
+            emb_score = embedding_scores.get(pid, (0.0, False))  # (score, bonus_applied)
+
+            kw_hits, kw_total, kw_confidence = kw_score
+            emb_confidence, bonus_applied = emb_score
+
+            # Combined confidence: max of keyword and embedding, with boost if both
+            if kw_confidence > 0 and emb_confidence > 0:
+                combined = max(kw_confidence, emb_confidence) + 0.05
+                method = "combined"
+            elif kw_confidence > 0:
+                combined = kw_confidence
+                method = "keyword"
+            else:
+                combined = emb_confidence
+                method = "embedding"
+
+            results.append(PatternMatchResult(
+                pattern=pattern,
+                pattern_id=pid,
+                method=method,
+                confidence=min(combined, 1.0),
+                keyword_hits=kw_hits,
+                total_keywords=kw_total,
+                embedding_score=emb_confidence,
+                domain_bonus_applied=bonus_applied,
+                composable_patterns=[
+                    p for p in pattern.composable_with if p in self._by_id
+                ],
+            ))
+
+        # Sort by confidence descending
+        results.sort(key=lambda r: r.confidence, reverse=True)
+        return results[:n]
+
+    # -----------------------------------------------------------------------
+    # Tier 1: Keyword Match
+    # -----------------------------------------------------------------------
+
+    def _tier1_keyword(
+        self, obligation_text: str, regulation_id: Optional[str]
+    ) -> Optional[PatternMatchResult]:
+        """Match by counting keyword hits in the obligation text."""
+        scores = self._keyword_scores(obligation_text, regulation_id)
+        if not scores:
+            return None
+
+        # Find best match
+        best_pid = max(scores, key=lambda pid: scores[pid][2])
+        hits, total, confidence = scores[best_pid]
+
+        if hits < KEYWORD_MATCH_MIN_HITS:
+            return None
+
+        pattern = self._by_id.get(best_pid)
+        if not pattern:
+            return None
+
+        # Check domain bonus
+        bonus_applied = False
+        if regulation_id and self._domain_matches(pattern.domain, regulation_id):
+            confidence = min(confidence + DOMAIN_BONUS, 1.0)
+            bonus_applied = True
+
+        return PatternMatchResult(
+            pattern=pattern,
+            pattern_id=best_pid,
+            method="keyword",
+            confidence=confidence,
+            keyword_hits=hits,
+            total_keywords=total,
+            domain_bonus_applied=bonus_applied,
+        )
+
+    def _keyword_scores(
+        self, text: str, regulation_id: Optional[str]
+    ) -> dict[str, tuple[int, int, float]]:
+        """Compute keyword match scores for all patterns.
+
+        Returns dict: pattern_id → (hits, total_keywords, confidence).
+        """
+        text_lower = text.lower()
+        hits_by_pattern: dict[str, int] = {}
+
+        for keyword, pattern_ids in self._keyword_index.items():
+            if keyword in text_lower:
+                for pid in pattern_ids:
+                    hits_by_pattern[pid] = hits_by_pattern.get(pid, 0) + 1
+
+        result: dict[str, tuple[int, int, float]] = {}
+        for pid, hits in hits_by_pattern.items():
+            pattern = self._by_id.get(pid)
+            if not pattern:
+                continue
+            total = len(pattern.obligation_match_keywords)
+            confidence = hits / total if total > 0 else 0.0
+            result[pid] = (hits, total, confidence)
+
+        return result
+
+    # -----------------------------------------------------------------------
+    # Tier 2: Embedding Match
+    # -----------------------------------------------------------------------
+
+    async def _tier2_embedding(
+        self, obligation_text: str, regulation_id: Optional[str]
+    ) -> Optional[PatternMatchResult]:
+        """Match by embedding similarity against pattern objective_templates."""
+        scores = await self._embedding_scores(obligation_text, regulation_id)
+        if not scores:
+            return None
+
+        best_pid = max(scores, key=lambda pid: scores[pid][0])
+        emb_score, bonus_applied = scores[best_pid]
+
+        if emb_score < EMBEDDING_PATTERN_THRESHOLD:
+            return None
+
+        pattern = self._by_id.get(best_pid)
+        if not pattern:
+            return None
+
+        return PatternMatchResult(
+            pattern=pattern,
+            pattern_id=best_pid,
+            method="embedding",
+            confidence=min(emb_score, 1.0),
+            embedding_score=emb_score,
+            domain_bonus_applied=bonus_applied,
+        )
+
+    async def _embedding_scores(
+        self, obligation_text: str, regulation_id: Optional[str]
+    ) -> dict[str, tuple[float, bool]]:
+        """Compute embedding similarity scores for all patterns.
+
+        Returns dict: pattern_id → (score, domain_bonus_applied).
+        """
+        if not self._pattern_embeddings:
+            return {}
+
+        chunk_embedding = await _get_embedding(obligation_text[:2000])
+        if not chunk_embedding:
+            return {}
+
+        result: dict[str, tuple[float, bool]] = {}
+        for i, pat_emb in enumerate(self._pattern_embeddings):
+            if not pat_emb:
+                continue
+            pid = self._pattern_ids[i]
+            pattern = self._by_id.get(pid)
+            if not pattern:
+                continue
+
+            score = _cosine_sim(chunk_embedding, pat_emb)
+
+            # Domain bonus
+            bonus_applied = False
+            if regulation_id and self._domain_matches(pattern.domain, regulation_id):
+                score += DOMAIN_BONUS
+                bonus_applied = True
+
+            result[pid] = (score, bonus_applied)
+
+        return result
+
+    # -----------------------------------------------------------------------
+    # Score combination
+    # -----------------------------------------------------------------------
+
+    def _combine_results(
+        self,
+        keyword_result: Optional[PatternMatchResult],
+        embedding_result: Optional[PatternMatchResult],
+    ) -> PatternMatchResult:
+        """Combine keyword and embedding results into the best match."""
+        if not keyword_result and not embedding_result:
+            return PatternMatchResult()
+
+        if not keyword_result:
+            return embedding_result
+        if not embedding_result:
+            return keyword_result
+
+        # Both matched — check if they agree
+        if keyword_result.pattern_id == embedding_result.pattern_id:
+            # Same pattern: boost confidence
+            combined_confidence = min(
+                max(keyword_result.confidence, embedding_result.confidence) + 0.05,
+                1.0,
+            )
+            return PatternMatchResult(
+                pattern=keyword_result.pattern,
+                pattern_id=keyword_result.pattern_id,
+                method="combined",
+                confidence=combined_confidence,
+                keyword_hits=keyword_result.keyword_hits,
+                total_keywords=keyword_result.total_keywords,
+                embedding_score=embedding_result.embedding_score,
+                domain_bonus_applied=(
+                    keyword_result.domain_bonus_applied
+                    or embedding_result.domain_bonus_applied
+                ),
+            )
+
+        # Different patterns: pick the one with higher confidence
+        if keyword_result.confidence >= embedding_result.confidence:
+            return keyword_result
+        return embedding_result
+
+    # -----------------------------------------------------------------------
+    # Domain affinity
+    # -----------------------------------------------------------------------
+
+    @staticmethod
+    def _domain_matches(pattern_domain: str, regulation_id: str) -> bool:
+        """Check if a pattern's domain has affinity with a regulation."""
+        affine_domains = _REGULATION_DOMAIN_AFFINITY.get(regulation_id, [])
+        return pattern_domain in affine_domains
+
+    # -----------------------------------------------------------------------
+    # Initialization helpers
+    # -----------------------------------------------------------------------
+
+    def _load_patterns(self) -> None:
+        """Load control patterns from YAML files."""
+        patterns_dir = _find_patterns_dir()
+        if not patterns_dir:
+            logger.warning("Control patterns directory not found")
+            return
+
+        for yaml_file in sorted(patterns_dir.glob("*.yaml")):
+            if yaml_file.name.startswith("_"):
+                continue
+            try:
+                with open(yaml_file) as f:
+                    data = yaml.safe_load(f)
+                if not data or "patterns" not in data:
+                    continue
+                for p in data["patterns"]:
+                    pattern = ControlPattern(
+                        id=p["id"],
+                        name=p["name"],
+                        name_de=p["name_de"],
+                        domain=p["domain"],
+                        category=p["category"],
+                        description=p["description"],
+                        objective_template=p["objective_template"],
+                        rationale_template=p["rationale_template"],
+                        requirements_template=p.get("requirements_template", []),
+                        test_procedure_template=p.get("test_procedure_template", []),
+                        evidence_template=p.get("evidence_template", []),
+                        severity_default=p.get("severity_default", "medium"),
+                        implementation_effort_default=p.get("implementation_effort_default", "m"),
+                        obligation_match_keywords=p.get("obligation_match_keywords", []),
+                        tags=p.get("tags", []),
+                        composable_with=p.get("composable_with", []),
+                        open_anchor_refs=p.get("open_anchor_refs", []),
+                    )
+                    self._patterns.append(pattern)
+                    self._by_id[pattern.id] = pattern
+                    domain_list = self._by_domain.setdefault(pattern.domain, [])
+                    domain_list.append(pattern)
+            except Exception as e:
+                logger.error("Failed to load %s: %s", yaml_file.name, e)
+
+        logger.info("Loaded %d patterns from %s", len(self._patterns), patterns_dir)
+
+    def _build_keyword_index(self) -> None:
+        """Build reverse index: keyword → [pattern_ids]."""
+        for pattern in self._patterns:
+            for kw in pattern.obligation_match_keywords:
+                lower_kw = kw.lower()
+                if lower_kw not in self._keyword_index:
+                    self._keyword_index[lower_kw] = []
+                self._keyword_index[lower_kw].append(pattern.id)
+
+    async def _compute_embeddings(self) -> None:
+        """Compute embeddings for all pattern objective templates."""
+        if not self._patterns:
+            return
+
+        self._pattern_ids = [p.id for p in self._patterns]
+        texts = [
+            f"{p.name_de}: {p.objective_template}"
+            for p in self._patterns
+        ]
+
+        logger.info("Computing embeddings for %d patterns...", len(texts))
+        self._pattern_embeddings = await _get_embeddings_batch(texts)
+        valid = sum(1 for e in self._pattern_embeddings if e)
+        logger.info("Got %d/%d valid pattern embeddings", valid, len(texts))
+
+    # -----------------------------------------------------------------------
+    # Public helpers
+    # -----------------------------------------------------------------------
+
+    def get_pattern(self, pattern_id: str) -> Optional[ControlPattern]:
+        """Get a pattern by its ID."""
+        return self._by_id.get(pattern_id.upper())
+
+    def get_patterns_by_domain(self, domain: str) -> list[ControlPattern]:
+        """Get all patterns for a domain."""
+        return self._by_domain.get(domain.upper(), [])
+
+    def stats(self) -> dict:
+        """Return matcher statistics."""
+        return {
+            "total_patterns": len(self._patterns),
+            "domains": list(self._by_domain.keys()),
+            "keywords": len(self._keyword_index),
+            "embeddings_valid": sum(1 for e in self._pattern_embeddings if e),
+            "initialized": self._initialized,
+        }
+
+
+def _find_patterns_dir() -> Optional[Path]:
+    """Locate the control_patterns directory."""
+    candidates = [
+        Path(__file__).resolve().parent.parent.parent.parent
+        / "ai-compliance-sdk" / "policies" / "control_patterns",
+        Path("/app/ai-compliance-sdk/policies/control_patterns"),
+        Path("ai-compliance-sdk/policies/control_patterns"),
+    ]
+    for p in candidates:
+        if p.is_dir():
+            return p
+    return None
--- a/backend-compliance/compliance/services/pipeline_adapter.py
+++ b/backend-compliance/compliance/services/pipeline_adapter.py
@@ -0,0 +1,670 @@
+"""Pipeline Adapter — New 10-Stage Pipeline Integration.
+
+Bridges the existing 7-stage control_generator pipeline with the new
+multi-layer components (ObligationExtractor, PatternMatcher, ControlComposer).
+
+New pipeline flow:
+    chunk → license_classify
+          → obligation_extract (Stage 4 — NEW)
+          → pattern_match      (Stage 5 — NEW)
+          → control_compose    (Stage 6 — replaces old Stage 3)
+          → harmonize → anchor → store + crosswalk → mark processed
+
+Can be used in two modes:
+    1. INLINE: Called from _process_batch() to enrich the pipeline
+    2. STANDALONE: Process chunks directly through new stages
+
+Part of the Multi-Layer Control Architecture (Phase 7 of 8).
+"""
+
+import hashlib
+import json
+import logging
+from dataclasses import dataclass, field
+from typing import Optional
+
+from sqlalchemy import text
+from sqlalchemy.orm import Session
+
+from compliance.services.control_composer import ComposedControl, ControlComposer
+from compliance.services.obligation_extractor import ObligationExtractor, ObligationMatch
+from compliance.services.pattern_matcher import PatternMatcher, PatternMatchResult
+
+logger = logging.getLogger(__name__)
+
+
+@dataclass
+class PipelineChunk:
+    """Input chunk for the new pipeline stages."""
+
+    text: str
+    collection: str = ""
+    regulation_code: str = ""
+    article: Optional[str] = None
+    paragraph: Optional[str] = None
+    license_rule: int = 3
+    license_info: dict = field(default_factory=dict)
+    source_citation: Optional[dict] = None
+    chunk_hash: str = ""
+
+    def compute_hash(self) -> str:
+        if not self.chunk_hash:
+            self.chunk_hash = hashlib.sha256(self.text.encode()).hexdigest()
+        return self.chunk_hash
+
+
+@dataclass
+class PipelineResult:
+    """Result of processing a chunk through the new pipeline."""
+
+    chunk: PipelineChunk
+    obligation: ObligationMatch = field(default_factory=ObligationMatch)
+    pattern_result: PatternMatchResult = field(default_factory=PatternMatchResult)
+    control: Optional[ComposedControl] = None
+    crosswalk_written: bool = False
+    error: Optional[str] = None
+
+    def to_dict(self) -> dict:
+        return {
+            "chunk_hash": self.chunk.chunk_hash,
+            "obligation": self.obligation.to_dict() if self.obligation else None,
+            "pattern": self.pattern_result.to_dict() if self.pattern_result else None,
+            "control": self.control.to_dict() if self.control else None,
+            "crosswalk_written": self.crosswalk_written,
+            "error": self.error,
+        }
+
+
+class PipelineAdapter:
+    """Integrates ObligationExtractor + PatternMatcher + ControlComposer.
+
+    Usage::
+
+        adapter = PipelineAdapter(db)
+        await adapter.initialize()
+
+        result = await adapter.process_chunk(PipelineChunk(
+            text="...",
+            regulation_code="eu_2016_679",
+            article="Art. 30",
+            license_rule=1,
+        ))
+    """
+
+    def __init__(self, db: Optional[Session] = None):
+        self.db = db
+        self._extractor = ObligationExtractor()
+        self._matcher = PatternMatcher()
+        self._composer = ControlComposer()
+        self._initialized = False
+
+    async def initialize(self) -> None:
+        """Initialize all sub-components."""
+        if self._initialized:
+            return
+        await self._extractor.initialize()
+        await self._matcher.initialize()
+        self._initialized = True
+        logger.info("PipelineAdapter initialized")
+
+    async def process_chunk(self, chunk: PipelineChunk) -> PipelineResult:
+        """Process a single chunk through the new 3-stage pipeline.
+
+        Stage 4: Obligation Extract
+        Stage 5: Pattern Match
+        Stage 6: Control Compose
+        """
+        if not self._initialized:
+            await self.initialize()
+
+        chunk.compute_hash()
+        result = PipelineResult(chunk=chunk)
+
+        try:
+            # Stage 4: Obligation Extract
+            result.obligation = await self._extractor.extract(
+                chunk_text=chunk.text,
+                regulation_code=chunk.regulation_code,
+                article=chunk.article,
+                paragraph=chunk.paragraph,
+            )
+
+            # Stage 5: Pattern Match
+            obligation_text = (
+                result.obligation.obligation_text
+                or result.obligation.obligation_title
+                or chunk.text[:500]
+            )
+            result.pattern_result = await self._matcher.match(
+                obligation_text=obligation_text,
+                regulation_id=result.obligation.regulation_id,
+            )
+
+            # Stage 6: Control Compose
+            result.control = await self._composer.compose(
+                obligation=result.obligation,
+                pattern_result=result.pattern_result,
+                chunk_text=chunk.text if chunk.license_rule in (1, 2) else None,
+                license_rule=chunk.license_rule,
+                source_citation=chunk.source_citation,
+                regulation_code=chunk.regulation_code,
+            )
+
+        except Exception as e:
+            logger.error("Pipeline processing failed: %s", e)
+            result.error = str(e)
+
+        return result
+
+    async def process_batch(self, chunks: list[PipelineChunk]) -> list[PipelineResult]:
+        """Process multiple chunks through the pipeline."""
+        results = []
+        for chunk in chunks:
+            result = await self.process_chunk(chunk)
+            results.append(result)
+        return results
+
+    def write_crosswalk(self, result: PipelineResult, control_uuid: str) -> bool:
+        """Write obligation_extraction + crosswalk_matrix rows for a processed chunk.
+
+        Called AFTER the control is stored in canonical_controls.
+        """
+        if not self.db or not result.control:
+            return False
+
+        chunk = result.chunk
+        obligation = result.obligation
+        pattern = result.pattern_result
+
+        try:
+            # 1. Write obligation_extraction row
+            self.db.execute(
+                text("""
+                    INSERT INTO obligation_extractions (
+                        chunk_hash, collection, regulation_code,
+                        article, paragraph, obligation_id,
+                        obligation_text, confidence, extraction_method,
+                        pattern_id, pattern_match_score, control_uuid
+                    ) VALUES (
+                        :chunk_hash, :collection, :regulation_code,
+                        :article, :paragraph, :obligation_id,
+                        :obligation_text, :confidence, :extraction_method,
+                        :pattern_id, :pattern_match_score,
+                        CAST(:control_uuid AS uuid)
+                    )
+                """),
+                {
+                    "chunk_hash": chunk.chunk_hash,
+                    "collection": chunk.collection,
+                    "regulation_code": chunk.regulation_code,
+                    "article": chunk.article,
+                    "paragraph": chunk.paragraph,
+                    "obligation_id": obligation.obligation_id if obligation else None,
+                    "obligation_text": (
+                        obligation.obligation_text[:2000]
+                        if obligation and obligation.obligation_text
+                        else None
+                    ),
+                    "confidence": obligation.confidence if obligation else 0,
+                    "extraction_method": obligation.method if obligation else "none",
+                    "pattern_id": pattern.pattern_id if pattern else None,
+                    "pattern_match_score": pattern.confidence if pattern else 0,
+                    "control_uuid": control_uuid,
+                },
+            )
+
+            # 2. Write crosswalk_matrix row
+            self.db.execute(
+                text("""
+                    INSERT INTO crosswalk_matrix (
+                        regulation_code, article, paragraph,
+                        obligation_id, pattern_id,
+                        master_control_id, master_control_uuid,
+                        confidence, source
+                    ) VALUES (
+                        :regulation_code, :article, :paragraph,
+                        :obligation_id, :pattern_id,
+                        :master_control_id,
+                        CAST(:master_control_uuid AS uuid),
+                        :confidence, :source
+                    )
+                """),
+                {
+                    "regulation_code": chunk.regulation_code,
+                    "article": chunk.article,
+                    "paragraph": chunk.paragraph,
+                    "obligation_id": obligation.obligation_id if obligation else None,
+                    "pattern_id": pattern.pattern_id if pattern else None,
+                    "master_control_id": result.control.control_id,
+                    "master_control_uuid": control_uuid,
+                    "confidence": min(
+                        obligation.confidence if obligation else 0,
+                        pattern.confidence if pattern else 0,
+                    ),
+                    "source": "auto",
+                },
+            )
+
+            # 3. Update canonical_controls with pattern_id + obligation_ids
+            if result.control.pattern_id or result.control.obligation_ids:
+                self.db.execute(
+                    text("""
+                        UPDATE canonical_controls
+                        SET pattern_id = COALESCE(:pattern_id, pattern_id),
+                            obligation_ids = COALESCE(:obligation_ids, obligation_ids)
+                        WHERE id = CAST(:control_uuid AS uuid)
+                    """),
+                    {
+                        "pattern_id": result.control.pattern_id,
+                        "obligation_ids": json.dumps(result.control.obligation_ids),
+                        "control_uuid": control_uuid,
+                    },
+                )
+
+            self.db.commit()
+            result.crosswalk_written = True
+            return True
+
+        except Exception as e:
+            logger.error("Failed to write crosswalk: %s", e)
+            self.db.rollback()
+            return False
+
+    def stats(self) -> dict:
+        """Return component statistics."""
+        return {
+            "extractor": self._extractor.stats(),
+            "matcher": self._matcher.stats(),
+            "initialized": self._initialized,
+        }
+
+
+# ---------------------------------------------------------------------------
+# Migration Passes — Backfill existing 4,800+ controls
+# ---------------------------------------------------------------------------
+
+
+class MigrationPasses:
+    """Non-destructive migration passes for existing controls.
+
+    Pass 1: Obligation Linkage (deterministic, article→obligation lookup)
+    Pass 2: Pattern Classification (keyword-based matching)
+    Pass 3: Quality Triage (categorize by linkage completeness)
+    Pass 4: Crosswalk Backfill (write crosswalk rows for linked controls)
+    Pass 5: Deduplication (mark duplicate controls)
+
+    Usage::
+
+        migration = MigrationPasses(db)
+        await migration.initialize()
+
+        result = await migration.run_pass1_obligation_linkage(limit=100)
+        result = await migration.run_pass2_pattern_classification(limit=100)
+        result = migration.run_pass3_quality_triage()
+        result = migration.run_pass4_crosswalk_backfill()
+        result = migration.run_pass5_deduplication()
+    """
+
+    def __init__(self, db: Session):
+        self.db = db
+        self._extractor = ObligationExtractor()
+        self._matcher = PatternMatcher()
+        self._initialized = False
+
+    async def initialize(self) -> None:
+        """Initialize extractors (loads obligations + patterns)."""
+        if self._initialized:
+            return
+        self._extractor._load_obligations()
+        self._matcher._load_patterns()
+        self._matcher._build_keyword_index()
+        self._initialized = True
+
+    # -------------------------------------------------------------------
+    # Pass 1: Obligation Linkage (deterministic)
+    # -------------------------------------------------------------------
+
+    async def run_pass1_obligation_linkage(self, limit: int = 0) -> dict:
+        """Link existing controls to obligations via source_citation article.
+
+        For each control with source_citation → extract regulation + article
+        → look up in obligation framework → set obligation_ids.
+        """
+        if not self._initialized:
+            await self.initialize()
+
+        query = """
+            SELECT id, control_id, source_citation, generation_metadata
+            FROM canonical_controls
+            WHERE release_state NOT IN ('deprecated')
+              AND (obligation_ids IS NULL OR obligation_ids = '[]')
+        """
+        if limit > 0:
+            query += f" LIMIT {limit}"
+
+        rows = self.db.execute(text(query)).fetchall()
+
+        stats = {"total": len(rows), "linked": 0, "no_match": 0, "no_citation": 0}
+
+        for row in rows:
+            control_uuid = str(row[0])
+            control_id = row[1]
+            citation = row[2]
+            metadata = row[3]
+
+            # Extract regulation + article from citation or metadata
+            reg_code, article = _extract_regulation_article(citation, metadata)
+            if not reg_code:
+                stats["no_citation"] += 1
+                continue
+
+            # Tier 1: Exact match
+            match = self._extractor._tier1_exact(reg_code, article or "")
+            if match and match.obligation_id:
+                self.db.execute(
+                    text("""
+                        UPDATE canonical_controls
+                        SET obligation_ids = :obl_ids
+                        WHERE id = CAST(:uuid AS uuid)
+                    """),
+                    {
+                        "obl_ids": json.dumps([match.obligation_id]),
+                        "uuid": control_uuid,
+                    },
+                )
+                stats["linked"] += 1
+            else:
+                stats["no_match"] += 1
+
+        self.db.commit()
+        logger.info("Pass 1: %s", stats)
+        return stats
+
+    # -------------------------------------------------------------------
+    # Pass 2: Pattern Classification (keyword-based)
+    # -------------------------------------------------------------------
+
+    async def run_pass2_pattern_classification(self, limit: int = 0) -> dict:
+        """Classify existing controls into patterns via keyword matching.
+
+        For each control without pattern_id → keyword-match title+objective
+        against pattern library → assign best match.
+        """
+        if not self._initialized:
+            await self.initialize()
+
+        query = """
+            SELECT id, control_id, title, objective
+            FROM canonical_controls
+            WHERE release_state NOT IN ('deprecated')
+              AND (pattern_id IS NULL OR pattern_id = '')
+        """
+        if limit > 0:
+            query += f" LIMIT {limit}"
+
+        rows = self.db.execute(text(query)).fetchall()
+
+        stats = {"total": len(rows), "classified": 0, "no_match": 0}
+
+        for row in rows:
+            control_uuid = str(row[0])
+            title = row[2] or ""
+            objective = row[3] or ""
+
+            # Keyword match
+            match_text = f"{title} {objective}"
+            result = self._matcher._tier1_keyword(match_text, None)
+
+            if result and result.pattern_id and result.keyword_hits >= 2:
+                self.db.execute(
+                    text("""
+                        UPDATE canonical_controls
+                        SET pattern_id = :pattern_id
+                        WHERE id = CAST(:uuid AS uuid)
+                    """),
+                    {
+                        "pattern_id": result.pattern_id,
+                        "uuid": control_uuid,
+                    },
+                )
+                stats["classified"] += 1
+            else:
+                stats["no_match"] += 1
+
+        self.db.commit()
+        logger.info("Pass 2: %s", stats)
+        return stats
+
+    # -------------------------------------------------------------------
+    # Pass 3: Quality Triage
+    # -------------------------------------------------------------------
+
+    def run_pass3_quality_triage(self) -> dict:
+        """Categorize controls by linkage completeness.
+
+        Sets generation_metadata.triage_status:
+            - "review": has both obligation_id + pattern_id
+            - "needs_obligation": has pattern_id but no obligation_id
+            - "needs_pattern": has obligation_id but no pattern_id
+            - "legacy_unlinked": has neither
+        """
+        categories = {
+            "review": """
+                UPDATE canonical_controls
+                SET generation_metadata = jsonb_set(
+                    COALESCE(generation_metadata::jsonb, '{}'::jsonb),
+                    '{triage_status}', '"review"'
+                )
+                WHERE release_state NOT IN ('deprecated')
+                  AND obligation_ids IS NOT NULL AND obligation_ids != '[]'
+                  AND pattern_id IS NOT NULL AND pattern_id != ''
+            """,
+            "needs_obligation": """
+                UPDATE canonical_controls
+                SET generation_metadata = jsonb_set(
+                    COALESCE(generation_metadata::jsonb, '{}'::jsonb),
+                    '{triage_status}', '"needs_obligation"'
+                )
+                WHERE release_state NOT IN ('deprecated')
+                  AND (obligation_ids IS NULL OR obligation_ids = '[]')
+                  AND pattern_id IS NOT NULL AND pattern_id != ''
+            """,
+            "needs_pattern": """
+                UPDATE canonical_controls
+                SET generation_metadata = jsonb_set(
+                    COALESCE(generation_metadata::jsonb, '{}'::jsonb),
+                    '{triage_status}', '"needs_pattern"'
+                )
+                WHERE release_state NOT IN ('deprecated')
+                  AND obligation_ids IS NOT NULL AND obligation_ids != '[]'
+                  AND (pattern_id IS NULL OR pattern_id = '')
+            """,
+            "legacy_unlinked": """
+                UPDATE canonical_controls
+                SET generation_metadata = jsonb_set(
+                    COALESCE(generation_metadata::jsonb, '{}'::jsonb),
+                    '{triage_status}', '"legacy_unlinked"'
+                )
+                WHERE release_state NOT IN ('deprecated')
+                  AND (obligation_ids IS NULL OR obligation_ids = '[]')
+                  AND (pattern_id IS NULL OR pattern_id = '')
+            """,
+        }
+
+        stats = {}
+        for category, sql in categories.items():
+            result = self.db.execute(text(sql))
+            stats[category] = result.rowcount
+
+        self.db.commit()
+        logger.info("Pass 3: %s", stats)
+        return stats
+
+    # -------------------------------------------------------------------
+    # Pass 4: Crosswalk Backfill
+    # -------------------------------------------------------------------
+
+    def run_pass4_crosswalk_backfill(self) -> dict:
+        """Create crosswalk_matrix rows for controls with obligation + pattern.
+
+        Only creates rows that don't already exist.
+        """
+        result = self.db.execute(text("""
+            INSERT INTO crosswalk_matrix (
+                regulation_code, obligation_id, pattern_id,
+                master_control_id, master_control_uuid,
+                confidence, source
+            )
+            SELECT
+                COALESCE(
+                    (generation_metadata::jsonb->>'source_regulation'),
+                    ''
+                ) AS regulation_code,
+                obl.value::text AS obligation_id,
+                cc.pattern_id,
+                cc.control_id,
+                cc.id,
+                0.80,
+                'migrated'
+            FROM canonical_controls cc,
+                 jsonb_array_elements_text(
+                     COALESCE(cc.obligation_ids::jsonb, '[]'::jsonb)
+                 ) AS obl(value)
+            WHERE cc.release_state NOT IN ('deprecated')
+              AND cc.pattern_id IS NOT NULL AND cc.pattern_id != ''
+              AND cc.obligation_ids IS NOT NULL AND cc.obligation_ids != '[]'
+              AND NOT EXISTS (
+                  SELECT 1 FROM crosswalk_matrix cw
+                  WHERE cw.master_control_uuid = cc.id
+                    AND cw.obligation_id = obl.value::text
+              )
+        """))
+
+        rows_inserted = result.rowcount
+        self.db.commit()
+        logger.info("Pass 4: %d crosswalk rows inserted", rows_inserted)
+        return {"rows_inserted": rows_inserted}
+
+    # -------------------------------------------------------------------
+    # Pass 5: Deduplication
+    # -------------------------------------------------------------------
+
+    def run_pass5_deduplication(self) -> dict:
+        """Mark duplicate controls (same obligation + same pattern).
+
+        Groups controls by (obligation_id, pattern_id), keeps the one with
+        highest evidence_confidence (or newest), marks rest as deprecated.
+        """
+        # Find groups with duplicates
+        groups = self.db.execute(text("""
+            SELECT cc.pattern_id,
+                   obl.value::text AS obligation_id,
+                   array_agg(cc.id ORDER BY cc.evidence_confidence DESC NULLS LAST, cc.created_at DESC) AS ids,
+                   count(*) AS cnt
+            FROM canonical_controls cc,
+                 jsonb_array_elements_text(
+                     COALESCE(cc.obligation_ids::jsonb, '[]'::jsonb)
+                 ) AS obl(value)
+            WHERE cc.release_state NOT IN ('deprecated')
+              AND cc.pattern_id IS NOT NULL AND cc.pattern_id != ''
+            GROUP BY cc.pattern_id, obl.value::text
+            HAVING count(*) > 1
+        """)).fetchall()
+
+        stats = {"groups_found": len(groups), "controls_deprecated": 0}
+
+        for group in groups:
+            ids = group[2]  # Array of UUIDs, first is the keeper
+            if len(ids) <= 1:
+                continue
+
+            # Keep first (highest confidence), deprecate rest
+            deprecate_ids = ids[1:]
+            for dep_id in deprecate_ids:
+                self.db.execute(
+                    text("""
+                        UPDATE canonical_controls
+                        SET release_state = 'deprecated',
+                            generation_metadata = jsonb_set(
+                                COALESCE(generation_metadata::jsonb, '{}'::jsonb),
+                                '{deprecated_reason}', '"duplicate_same_obligation_pattern"'
+                            )
+                        WHERE id = CAST(:uuid AS uuid)
+                          AND release_state != 'deprecated'
+                    """),
+                    {"uuid": str(dep_id)},
+                )
+                stats["controls_deprecated"] += 1
+
+        self.db.commit()
+        logger.info("Pass 5: %s", stats)
+        return stats
+
+    def migration_status(self) -> dict:
+        """Return overall migration progress."""
+        row = self.db.execute(text("""
+            SELECT
+                count(*) AS total,
+                count(*) FILTER (WHERE obligation_ids IS NOT NULL AND obligation_ids != '[]') AS has_obligation,
+                count(*) FILTER (WHERE pattern_id IS NOT NULL AND pattern_id != '') AS has_pattern,
+                count(*) FILTER (
+                    WHERE obligation_ids IS NOT NULL AND obligation_ids != '[]'
+                      AND pattern_id IS NOT NULL AND pattern_id != ''
+                ) AS fully_linked,
+                count(*) FILTER (WHERE release_state = 'deprecated') AS deprecated
+            FROM canonical_controls
+        """)).fetchone()
+
+        return {
+            "total_controls": row[0],
+            "has_obligation": row[1],
+            "has_pattern": row[2],
+            "fully_linked": row[3],
+            "deprecated": row[4],
+            "coverage_obligation_pct": round(row[1] / max(row[0], 1) * 100, 1),
+            "coverage_pattern_pct": round(row[2] / max(row[0], 1) * 100, 1),
+            "coverage_full_pct": round(row[3] / max(row[0], 1) * 100, 1),
+        }
+
+
+# ---------------------------------------------------------------------------
+# Helpers
+# ---------------------------------------------------------------------------
+
+
+def _extract_regulation_article(
+    citation: Optional[str], metadata: Optional[str]
+) -> tuple[Optional[str], Optional[str]]:
+    """Extract regulation_code and article from control's citation/metadata."""
+    from compliance.services.obligation_extractor import _normalize_regulation
+
+    reg_code = None
+    article = None
+
+    # Try citation first (JSON string or dict)
+    if citation:
+        try:
+            c = json.loads(citation) if isinstance(citation, str) else citation
+            if isinstance(c, dict):
+                article = c.get("article") or c.get("source_article")
+                # Try to get regulation from source field
+                source = c.get("source", "")
+                if source:
+                    reg_code = _normalize_regulation(source)
+        except (json.JSONDecodeError, TypeError):
+            pass
+
+    # Try metadata
+    if metadata and not reg_code:
+        try:
+            m = json.loads(metadata) if isinstance(metadata, str) else metadata
+            if isinstance(m, dict):
+                src_reg = m.get("source_regulation", "")
+                if src_reg:
+                    reg_code = _normalize_regulation(src_reg)
+                if not article:
+                    article = m.get("source_article")
+        except (json.JSONDecodeError, TypeError):
+            pass
+
+    return reg_code, article
--- a/backend-compliance/compliance/services/rag_client.py
+++ b/backend-compliance/compliance/services/rag_client.py
@@ -33,6 +33,7 @@ class RAGSearchResult:
    paragraph: str
    source_url: str
    score: float
+    collection: str = ""


 class ComplianceRAGClient:
@@ -91,6 +92,7 @@ class ComplianceRAGClient:
                    paragraph=r.get("paragraph", ""),
                    source_url=r.get("source_url", ""),
                    score=r.get("score", 0.0),
+                    collection=collection,
                ))
            return results

@@ -98,6 +100,88 @@ class ComplianceRAGClient:
            logger.warning("RAG search failed: %s", e)
            return []

+    async def search_with_rerank(
+        self,
+        query: str,
+        collection: str = "bp_compliance_ce",
+        regulations: Optional[List[str]] = None,
+        top_k: int = 5,
+    ) -> List[RAGSearchResult]:
+        """
+        Search with optional cross-encoder re-ranking.
+
+        Fetches top_k*4 results from RAG, then re-ranks with cross-encoder
+        and returns top_k. Falls back to regular search if reranker is disabled.
+        """
+        from .reranker import get_reranker
+
+        reranker = get_reranker()
+        if reranker is None:
+            return await self.search(query, collection, regulations, top_k)
+
+        # Fetch more candidates for re-ranking
+        candidates = await self.search(
+            query, collection, regulations, top_k=max(top_k * 4, 20)
+        )
+        if not candidates:
+            return []
+
+        texts = [c.text for c in candidates]
+        try:
+            ranked_indices = reranker.rerank(query, texts, top_k=top_k)
+            return [candidates[i] for i in ranked_indices]
+        except Exception as e:
+            logger.warning("Reranking failed, returning unranked: %s", e)
+            return candidates[:top_k]
+
+    async def scroll(
+        self,
+        collection: str,
+        offset: Optional[str] = None,
+        limit: int = 100,
+    ) -> tuple[List[RAGSearchResult], Optional[str]]:
+        """
+        Scroll through ALL chunks in a collection (paginated).
+
+        Returns (chunks, next_offset). next_offset is None when done.
+        """
+        scroll_url = self._search_url.replace("/search", "/scroll")
+        params = {"collection": collection, "limit": str(limit)}
+        if offset:
+            params["offset"] = offset
+
+        try:
+            async with httpx.AsyncClient(timeout=30.0) as client:
+                resp = await client.get(scroll_url, params=params)
+
+            if resp.status_code != 200:
+                logger.warning(
+                    "RAG scroll returned %d: %s", resp.status_code, resp.text[:200]
+                )
+                return [], None
+
+            data = resp.json()
+            results = []
+            for r in data.get("chunks", []):
+                results.append(RAGSearchResult(
+                    text=r.get("text", ""),
+                    regulation_code=r.get("regulation_code", ""),
+                    regulation_name=r.get("regulation_name", ""),
+                    regulation_short=r.get("regulation_short", ""),
+                    category=r.get("category", ""),
+                    article=r.get("article", ""),
+                    paragraph=r.get("paragraph", ""),
+                    source_url=r.get("source_url", ""),
+                    score=0.0,
+                    collection=collection,
+                ))
+            next_offset = data.get("next_offset") or None
+            return results, next_offset
+
+        except Exception as e:
+            logger.warning("RAG scroll failed: %s", e)
+            return [], None
+
    def format_for_prompt(
        self, results: List[RAGSearchResult], max_results: int = 5
    ) -> str:
--- a/backend-compliance/compliance/services/reranker.py
+++ b/backend-compliance/compliance/services/reranker.py
@@ -0,0 +1,85 @@
+"""
+Cross-Encoder Re-Ranking for RAG Search Results.
+
+Uses BGE Reranker v2 (BAAI/bge-reranker-v2-m3, MIT license) to re-rank
+search results from Qdrant for improved retrieval quality.
+
+Lazy-loads the model on first use. Disabled by default (RERANK_ENABLED=false).
+"""
+
+import logging
+import os
+from typing import Optional
+
+logger = logging.getLogger(__name__)
+
+RERANK_ENABLED = os.getenv("RERANK_ENABLED", "false").lower() == "true"
+RERANK_MODEL = os.getenv("RERANK_MODEL", "BAAI/bge-reranker-v2-m3")
+
+
+class Reranker:
+    """Cross-encoder reranker using sentence-transformers."""
+
+    def __init__(self, model_name: str = RERANK_MODEL):
+        self._model = None  # Lazy init
+        self._model_name = model_name
+
+    def _ensure_model(self) -> None:
+        """Load model on first use."""
+        if self._model is not None:
+            return
+        try:
+            from sentence_transformers import CrossEncoder
+
+            logger.info("Loading reranker model: %s", self._model_name)
+            self._model = CrossEncoder(self._model_name)
+            logger.info("Reranker model loaded successfully")
+        except ImportError:
+            logger.error(
+                "sentence-transformers not installed. "
+                "Install with: pip install sentence-transformers"
+            )
+            raise
+        except Exception as e:
+            logger.error("Failed to load reranker model: %s", e)
+            raise
+
+    def rerank(
+        self, query: str, texts: list[str], top_k: int = 5
+    ) -> list[int]:
+        """
+        Return indices of top_k texts sorted by relevance (highest first).
+
+        Args:
+            query: The search query.
+            texts: List of candidate texts to re-rank.
+            top_k: Number of top results to return.
+
+        Returns:
+            List of indices into the original texts list, sorted by relevance.
+        """
+        if not texts:
+            return []
+
+        self._ensure_model()
+
+        pairs = [[query, text] for text in texts]
+        scores = self._model.predict(pairs)
+
+        # Sort by score descending, return indices
+        ranked = sorted(range(len(scores)), key=lambda i: scores[i], reverse=True)
+        return ranked[:top_k]
+
+
+# Module-level singleton
+_reranker: Optional[Reranker] = None
+
+
+def get_reranker() -> Optional[Reranker]:
+    """Get the shared reranker instance. Returns None if disabled."""
+    global _reranker
+    if not RERANK_ENABLED:
+        return None
+    if _reranker is None:
+        _reranker = Reranker()
+    return _reranker
--- a/backend-compliance/compliance/services/v1_enrichment.py
+++ b/backend-compliance/compliance/services/v1_enrichment.py
@@ -0,0 +1,331 @@
+"""V1 Control Enrichment Service — Match Eigenentwicklung controls to regulations.
+
+Finds regulatory coverage for v1 controls (generation_strategy='ungrouped',
+pipeline_version=1, no source_citation) by embedding similarity search.
+
+Reuses embedding + Qdrant helpers from control_dedup.py.
+"""
+
+import logging
+from typing import Optional
+
+from sqlalchemy import text
+
+from database import SessionLocal
+from compliance.services.control_dedup import (
+    get_embedding,
+    qdrant_search_cross_regulation,
+)
+
+logger = logging.getLogger(__name__)
+
+# Similarity threshold — lower than dedup (0.85) since we want informational matches
+# Typical top scores for v1 controls are 0.70-0.77
+V1_MATCH_THRESHOLD = 0.70
+V1_MAX_MATCHES = 5
+
+
+def _is_eigenentwicklung_query() -> str:
+    """SQL WHERE clause identifying v1 Eigenentwicklung controls."""
+    return """
+        generation_strategy = 'ungrouped'
+        AND (pipeline_version = '1' OR pipeline_version IS NULL)
+        AND source_citation IS NULL
+        AND parent_control_uuid IS NULL
+        AND release_state NOT IN ('rejected', 'merged', 'deprecated')
+    """
+
+
+async def count_v1_controls() -> int:
+    """Count how many v1 Eigenentwicklung controls exist."""
+    with SessionLocal() as db:
+        row = db.execute(text(f"""
+            SELECT COUNT(*) AS cnt
+            FROM canonical_controls
+            WHERE {_is_eigenentwicklung_query()}
+        """)).fetchone()
+        return row.cnt if row else 0
+
+
+async def enrich_v1_matches(
+    dry_run: bool = True,
+    batch_size: int = 100,
+    offset: int = 0,
+) -> dict:
+    """Find regulatory matches for v1 Eigenentwicklung controls.
+
+    Args:
+        dry_run: If True, only count — don't write matches.
+        batch_size: Number of v1 controls to process per call.
+        offset: Pagination offset (v1 control index).
+
+    Returns:
+        Stats dict with counts, sample matches, and pagination info.
+    """
+    with SessionLocal() as db:
+        # 1. Load v1 controls (paginated)
+        v1_controls = db.execute(text(f"""
+            SELECT id, control_id, title, objective, category
+            FROM canonical_controls
+            WHERE {_is_eigenentwicklung_query()}
+            ORDER BY control_id
+            LIMIT :limit OFFSET :offset
+        """), {"limit": batch_size, "offset": offset}).fetchall()
+
+        # Count total for pagination
+        total_row = db.execute(text(f"""
+            SELECT COUNT(*) AS cnt
+            FROM canonical_controls
+            WHERE {_is_eigenentwicklung_query()}
+        """)).fetchone()
+        total_v1 = total_row.cnt if total_row else 0
+
+        if not v1_controls:
+            return {
+                "dry_run": dry_run,
+                "processed": 0,
+                "total_v1": total_v1,
+                "message": "Kein weiterer Batch — alle v1 Controls verarbeitet.",
+            }
+
+        if dry_run:
+            return {
+                "dry_run": True,
+                "total_v1": total_v1,
+                "offset": offset,
+                "batch_size": batch_size,
+                "sample_controls": [
+                    {
+                        "control_id": r.control_id,
+                        "title": r.title,
+                        "category": r.category,
+                    }
+                    for r in v1_controls[:20]
+                ],
+            }
+
+        # 2. Process each v1 control
+        processed = 0
+        matches_inserted = 0
+        errors = []
+        sample_matches = []
+
+        for v1 in v1_controls:
+            try:
+                # Build search text
+                search_text = f"{v1.title} — {v1.objective}"
+
+                # Get embedding
+                embedding = await get_embedding(search_text)
+                if not embedding:
+                    errors.append({
+                        "control_id": v1.control_id,
+                        "error": "Embedding fehlgeschlagen",
+                    })
+                    continue
+
+                # Search Qdrant (cross-regulation, no pattern filter)
+                # Collection is atomic_controls_dedup (contains ~51k atomare Controls)
+                results = await qdrant_search_cross_regulation(
+                    embedding, top_k=20,
+                    collection="atomic_controls_dedup",
+                )
+
+                # For each hit: resolve to a regulatory parent with source_citation.
+                # Atomic controls in Qdrant usually have parent_control_uuid → parent
+                # has the source_citation. We deduplicate by parent to avoid
+                # listing the same regulation multiple times.
+                rank = 0
+                seen_parents: set[str] = set()
+
+                for hit in results:
+                    score = hit.get("score", 0)
+                    if score < V1_MATCH_THRESHOLD:
+                        continue
+
+                    payload = hit.get("payload", {})
+                    matched_uuid = payload.get("control_uuid")
+                    if not matched_uuid or matched_uuid == str(v1.id):
+                        continue
+
+                    # Try the matched control itself first, then its parent
+                    matched_row = db.execute(text("""
+                        SELECT c.id, c.control_id, c.title, c.source_citation,
+                               c.severity, c.category, c.parent_control_uuid
+                        FROM canonical_controls c
+                        WHERE c.id = CAST(:uuid AS uuid)
+                    """), {"uuid": matched_uuid}).fetchone()
+
+                    if not matched_row:
+                        continue
+
+                    # Resolve to regulatory control (one with source_citation)
+                    reg_row = matched_row
+                    if not reg_row.source_citation and reg_row.parent_control_uuid:
+                        # Look up parent — the parent has the source_citation
+                        parent_row = db.execute(text("""
+                            SELECT id, control_id, title, source_citation,
+                                   severity, category, parent_control_uuid
+                            FROM canonical_controls
+                            WHERE id = CAST(:uuid AS uuid)
+                              AND source_citation IS NOT NULL
+                        """), {"uuid": str(reg_row.parent_control_uuid)}).fetchone()
+                        if parent_row:
+                            reg_row = parent_row
+
+                    if not reg_row.source_citation:
+                        continue
+
+                    # Deduplicate by parent UUID
+                    parent_key = str(reg_row.id)
+                    if parent_key in seen_parents:
+                        continue
+                    seen_parents.add(parent_key)
+
+                    rank += 1
+                    if rank > V1_MAX_MATCHES:
+                        break
+
+                    # Extract source info
+                    source_citation = reg_row.source_citation or {}
+                    matched_source = source_citation.get("source") if isinstance(source_citation, dict) else None
+                    matched_article = source_citation.get("article") if isinstance(source_citation, dict) else None
+
+                    # Insert match — link to the regulatory parent (not the atomic child)
+                    db.execute(text("""
+                        INSERT INTO v1_control_matches
+                            (v1_control_uuid, matched_control_uuid, similarity_score,
+                             match_rank, matched_source, matched_article, match_method)
+                        VALUES
+                            (CAST(:v1_uuid AS uuid), CAST(:matched_uuid AS uuid), :score,
+                             :rank, :source, :article, 'embedding')
+                        ON CONFLICT (v1_control_uuid, matched_control_uuid) DO UPDATE
+                        SET similarity_score = EXCLUDED.similarity_score,
+                            match_rank = EXCLUDED.match_rank
+                    """), {
+                        "v1_uuid": str(v1.id),
+                        "matched_uuid": str(reg_row.id),
+                        "score": round(score, 3),
+                        "rank": rank,
+                        "source": matched_source,
+                        "article": matched_article,
+                    })
+                    matches_inserted += 1
+
+                    # Collect sample
+                    if len(sample_matches) < 20:
+                        sample_matches.append({
+                            "v1_control_id": v1.control_id,
+                            "v1_title": v1.title,
+                            "matched_control_id": reg_row.control_id,
+                            "matched_title": reg_row.title,
+                            "matched_source": matched_source,
+                            "matched_article": matched_article,
+                            "similarity_score": round(score, 3),
+                            "match_rank": rank,
+                        })
+
+                processed += 1
+
+            except Exception as e:
+                logger.warning("V1 enrichment error for %s: %s", v1.control_id, e)
+                errors.append({
+                    "control_id": v1.control_id,
+                    "error": str(e),
+                })
+
+        db.commit()
+
+    # Pagination
+    next_offset = offset + batch_size if len(v1_controls) == batch_size else None
+
+    return {
+        "dry_run": False,
+        "offset": offset,
+        "batch_size": batch_size,
+        "next_offset": next_offset,
+        "total_v1": total_v1,
+        "processed": processed,
+        "matches_inserted": matches_inserted,
+        "errors": errors[:10],
+        "sample_matches": sample_matches,
+    }
+
+
+async def get_v1_matches(control_uuid: str) -> list[dict]:
+    """Get all regulatory matches for a specific v1 control.
+
+    Args:
+        control_uuid: The UUID of the v1 control.
+
+    Returns:
+        List of match dicts with control details.
+    """
+    with SessionLocal() as db:
+        rows = db.execute(text("""
+            SELECT
+                m.similarity_score,
+                m.match_rank,
+                m.matched_source,
+                m.matched_article,
+                m.match_method,
+                c.control_id AS matched_control_id,
+                c.title AS matched_title,
+                c.objective AS matched_objective,
+                c.severity AS matched_severity,
+                c.category AS matched_category,
+                c.source_citation AS matched_source_citation
+            FROM v1_control_matches m
+            JOIN canonical_controls c ON c.id = m.matched_control_uuid
+            WHERE m.v1_control_uuid = CAST(:uuid AS uuid)
+            ORDER BY m.match_rank
+        """), {"uuid": control_uuid}).fetchall()
+
+        return [
+            {
+                "matched_control_id": r.matched_control_id,
+                "matched_title": r.matched_title,
+                "matched_objective": r.matched_objective,
+                "matched_severity": r.matched_severity,
+                "matched_category": r.matched_category,
+                "matched_source": r.matched_source,
+                "matched_article": r.matched_article,
+                "matched_source_citation": r.matched_source_citation,
+                "similarity_score": float(r.similarity_score),
+                "match_rank": r.match_rank,
+                "match_method": r.match_method,
+            }
+            for r in rows
+        ]
+
+
+async def get_v1_enrichment_stats() -> dict:
+    """Get overview stats for v1 enrichment."""
+    with SessionLocal() as db:
+        total_v1 = db.execute(text(f"""
+            SELECT COUNT(*) AS cnt FROM canonical_controls
+            WHERE {_is_eigenentwicklung_query()}
+        """)).fetchone()
+
+        matched_v1 = db.execute(text(f"""
+            SELECT COUNT(DISTINCT m.v1_control_uuid) AS cnt
+            FROM v1_control_matches m
+            JOIN canonical_controls c ON c.id = m.v1_control_uuid
+            WHERE {_is_eigenentwicklung_query().replace('release_state', 'c.release_state').replace('generation_strategy', 'c.generation_strategy').replace('pipeline_version', 'c.pipeline_version').replace('source_citation', 'c.source_citation').replace('parent_control_uuid', 'c.parent_control_uuid')}
+        """)).fetchone()
+
+        total_matches = db.execute(text("""
+            SELECT COUNT(*) AS cnt FROM v1_control_matches
+        """)).fetchone()
+
+        avg_score = db.execute(text("""
+            SELECT AVG(similarity_score) AS avg_score FROM v1_control_matches
+        """)).fetchone()
+
+        return {
+            "total_v1_controls": total_v1.cnt if total_v1 else 0,
+            "v1_with_matches": matched_v1.cnt if matched_v1 else 0,
+            "v1_without_matches": (total_v1.cnt if total_v1 else 0) - (matched_v1.cnt if matched_v1 else 0),
+            "total_matches": total_matches.cnt if total_matches else 0,
+            "avg_similarity_score": round(float(avg_score.avg_score), 3) if avg_score and avg_score.avg_score else None,
+        }