feat: Vorbereitung-Module auf 100% — Persistenz, Backend-Services, UCCA Frontend

Phase A: PostgreSQL State Store (sdk_states Tabelle, InMemory-Fallback) Phase B: Modules dynamisch vom Backend, Scope DB-Persistenz, Source Policy State Phase C: UCCA Frontend (3 Seiten, Wizard, RiskScoreGauge), Obligations Live-Daten Phase D: Document Import (PDF/LLM/Gap-Analyse), System Screening (SBOM/OSV.dev) Phase E: Company Profile CRUD mit Audit-Logging Phase F: Tests (Python + TypeScript), flow-data.ts DB-Tabellen aktualisiert Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-02 11:04:31 +01:00
parent cd15ab0932
commit e6d666b89b
38 changed files with 4195 additions and 420 deletions
@@ -0,0 +1,344 @@
+"""
+FastAPI routes for Company Profile CRUD with audit logging.
+
+Endpoints:
+- GET /v1/company-profile: Get company profile for a tenant
+- POST /v1/company-profile: Create or update company profile
+- GET /v1/company-profile/audit: Get audit log for a tenant
+"""
+
+import json
+import logging
+import uuid
+from typing import Optional
+
+from fastapi import APIRouter, HTTPException, Header
+from pydantic import BaseModel
+
+from database import SessionLocal
+
+logger = logging.getLogger(__name__)
+router = APIRouter(prefix="/v1/company-profile", tags=["company-profile"])
+
+
+# =============================================================================
+# REQUEST/RESPONSE MODELS
+# =============================================================================
+
+class CompanyProfileRequest(BaseModel):
+    company_name: str = ""
+    legal_form: str = "GmbH"
+    industry: str = ""
+    founded_year: Optional[int] = None
+    business_model: str = "B2B"
+    offerings: list[str] = []
+    company_size: str = "small"
+    employee_count: str = "1-9"
+    annual_revenue: str = "< 2 Mio"
+    headquarters_country: str = "DE"
+    headquarters_city: str = ""
+    has_international_locations: bool = False
+    international_countries: list[str] = []
+    target_markets: list[str] = ["DE"]
+    primary_jurisdiction: str = "DE"
+    is_data_controller: bool = True
+    is_data_processor: bool = False
+    uses_ai: bool = False
+    ai_use_cases: list[str] = []
+    dpo_name: Optional[str] = None
+    dpo_email: Optional[str] = None
+    legal_contact_name: Optional[str] = None
+    legal_contact_email: Optional[str] = None
+    machine_builder: Optional[dict] = None
+    is_complete: bool = False
+
+
+class CompanyProfileResponse(BaseModel):
+    id: str
+    tenant_id: str
+    company_name: str
+    legal_form: str
+    industry: str
+    founded_year: Optional[int]
+    business_model: str
+    offerings: list[str]
+    company_size: str
+    employee_count: str
+    annual_revenue: str
+    headquarters_country: str
+    headquarters_city: str
+    has_international_locations: bool
+    international_countries: list[str]
+    target_markets: list[str]
+    primary_jurisdiction: str
+    is_data_controller: bool
+    is_data_processor: bool
+    uses_ai: bool
+    ai_use_cases: list[str]
+    dpo_name: Optional[str]
+    dpo_email: Optional[str]
+    legal_contact_name: Optional[str]
+    legal_contact_email: Optional[str]
+    machine_builder: Optional[dict]
+    is_complete: bool
+    completed_at: Optional[str]
+    created_at: str
+    updated_at: str
+
+
+class AuditEntryResponse(BaseModel):
+    id: str
+    action: str
+    changed_fields: Optional[dict]
+    changed_by: Optional[str]
+    created_at: str
+
+
+class AuditListResponse(BaseModel):
+    entries: list[AuditEntryResponse]
+    total: int
+
+
+# =============================================================================
+# HELPERS
+# =============================================================================
+
+def row_to_response(row) -> CompanyProfileResponse:
+    """Convert a DB row to response model."""
+    return CompanyProfileResponse(
+        id=str(row[0]),
+        tenant_id=row[1],
+        company_name=row[2] or "",
+        legal_form=row[3] or "GmbH",
+        industry=row[4] or "",
+        founded_year=row[5],
+        business_model=row[6] or "B2B",
+        offerings=row[7] if isinstance(row[7], list) else [],
+        company_size=row[8] or "small",
+        employee_count=row[9] or "1-9",
+        annual_revenue=row[10] or "< 2 Mio",
+        headquarters_country=row[11] or "DE",
+        headquarters_city=row[12] or "",
+        has_international_locations=row[13] or False,
+        international_countries=row[14] if isinstance(row[14], list) else [],
+        target_markets=row[15] if isinstance(row[15], list) else ["DE"],
+        primary_jurisdiction=row[16] or "DE",
+        is_data_controller=row[17] if row[17] is not None else True,
+        is_data_processor=row[18] or False,
+        uses_ai=row[19] or False,
+        ai_use_cases=row[20] if isinstance(row[20], list) else [],
+        dpo_name=row[21],
+        dpo_email=row[22],
+        legal_contact_name=row[23],
+        legal_contact_email=row[24],
+        machine_builder=row[25] if isinstance(row[25], dict) else None,
+        is_complete=row[26] or False,
+        completed_at=str(row[27]) if row[27] else None,
+        created_at=str(row[28]),
+        updated_at=str(row[29]),
+    )
+
+
+def log_audit(db, tenant_id: str, action: str, changed_fields: dict | None, changed_by: str | None):
+    """Write an audit log entry."""
+    try:
+        db.execute(
+            """INSERT INTO compliance_company_profile_audit
+               (tenant_id, action, changed_fields, changed_by)
+               VALUES (:tenant_id, :action, :fields::jsonb, :changed_by)""",
+            {
+                "tenant_id": tenant_id,
+                "action": action,
+                "fields": json.dumps(changed_fields) if changed_fields else None,
+                "changed_by": changed_by,
+            },
+        )
+    except Exception as e:
+        logger.warning(f"Failed to write audit log: {e}")
+
+
+# =============================================================================
+# ROUTES
+# =============================================================================
+
+@router.get("", response_model=CompanyProfileResponse)
+async def get_company_profile(
+    tenant_id: str = "default",
+    x_tenant_id: Optional[str] = Header(None, alias="X-Tenant-ID"),
+):
+    """Get company profile for a tenant."""
+    tid = x_tenant_id or tenant_id
+    db = SessionLocal()
+    try:
+        result = db.execute(
+            """SELECT id, tenant_id, company_name, legal_form, industry, founded_year,
+                      business_model, offerings, company_size, employee_count, annual_revenue,
+                      headquarters_country, headquarters_city, has_international_locations,
+                      international_countries, target_markets, primary_jurisdiction,
+                      is_data_controller, is_data_processor, uses_ai, ai_use_cases,
+                      dpo_name, dpo_email, legal_contact_name, legal_contact_email,
+                      machine_builder, is_complete, completed_at, created_at, updated_at
+               FROM compliance_company_profiles WHERE tenant_id = :tenant_id""",
+            {"tenant_id": tid},
+        )
+        row = result.fetchone()
+        if not row:
+            raise HTTPException(status_code=404, detail="Company profile not found")
+
+        return row_to_response(row)
+    finally:
+        db.close()
+
+
+@router.post("", response_model=CompanyProfileResponse)
+async def upsert_company_profile(
+    profile: CompanyProfileRequest,
+    tenant_id: str = "default",
+    x_tenant_id: Optional[str] = Header(None, alias="X-Tenant-ID"),
+):
+    """Create or update company profile (upsert)."""
+    tid = x_tenant_id or tenant_id
+    db = SessionLocal()
+    try:
+        # Check if profile exists
+        existing = db.execute(
+            "SELECT id FROM compliance_company_profiles WHERE tenant_id = :tid",
+            {"tid": tid},
+        ).fetchone()
+
+        action = "update" if existing else "create"
+
+        completed_at_clause = ", completed_at = NOW()" if profile.is_complete else ", completed_at = NULL"
+
+        db.execute(
+            f"""INSERT INTO compliance_company_profiles
+               (tenant_id, company_name, legal_form, industry, founded_year,
+                business_model, offerings, company_size, employee_count, annual_revenue,
+                headquarters_country, headquarters_city, has_international_locations,
+                international_countries, target_markets, primary_jurisdiction,
+                is_data_controller, is_data_processor, uses_ai, ai_use_cases,
+                dpo_name, dpo_email, legal_contact_name, legal_contact_email,
+                machine_builder, is_complete)
+               VALUES (:tid, :company_name, :legal_form, :industry, :founded_year,
+                       :business_model, :offerings::jsonb, :company_size, :employee_count, :annual_revenue,
+                       :hq_country, :hq_city, :has_intl, :intl_countries::jsonb,
+                       :target_markets::jsonb, :jurisdiction,
+                       :is_controller, :is_processor, :uses_ai, :ai_use_cases::jsonb,
+                       :dpo_name, :dpo_email, :legal_name, :legal_email,
+                       :machine_builder::jsonb, :is_complete)
+               ON CONFLICT (tenant_id) DO UPDATE SET
+                 company_name = EXCLUDED.company_name,
+                 legal_form = EXCLUDED.legal_form,
+                 industry = EXCLUDED.industry,
+                 founded_year = EXCLUDED.founded_year,
+                 business_model = EXCLUDED.business_model,
+                 offerings = EXCLUDED.offerings,
+                 company_size = EXCLUDED.company_size,
+                 employee_count = EXCLUDED.employee_count,
+                 annual_revenue = EXCLUDED.annual_revenue,
+                 headquarters_country = EXCLUDED.headquarters_country,
+                 headquarters_city = EXCLUDED.headquarters_city,
+                 has_international_locations = EXCLUDED.has_international_locations,
+                 international_countries = EXCLUDED.international_countries,
+                 target_markets = EXCLUDED.target_markets,
+                 primary_jurisdiction = EXCLUDED.primary_jurisdiction,
+                 is_data_controller = EXCLUDED.is_data_controller,
+                 is_data_processor = EXCLUDED.is_data_processor,
+                 uses_ai = EXCLUDED.uses_ai,
+                 ai_use_cases = EXCLUDED.ai_use_cases,
+                 dpo_name = EXCLUDED.dpo_name,
+                 dpo_email = EXCLUDED.dpo_email,
+                 legal_contact_name = EXCLUDED.legal_contact_name,
+                 legal_contact_email = EXCLUDED.legal_contact_email,
+                 machine_builder = EXCLUDED.machine_builder,
+                 is_complete = EXCLUDED.is_complete,
+                 updated_at = NOW()
+                 {completed_at_clause}""",
+            {
+                "tid": tid,
+                "company_name": profile.company_name,
+                "legal_form": profile.legal_form,
+                "industry": profile.industry,
+                "founded_year": profile.founded_year,
+                "business_model": profile.business_model,
+                "offerings": json.dumps(profile.offerings),
+                "company_size": profile.company_size,
+                "employee_count": profile.employee_count,
+                "annual_revenue": profile.annual_revenue,
+                "hq_country": profile.headquarters_country,
+                "hq_city": profile.headquarters_city,
+                "has_intl": profile.has_international_locations,
+                "intl_countries": json.dumps(profile.international_countries),
+                "target_markets": json.dumps(profile.target_markets),
+                "jurisdiction": profile.primary_jurisdiction,
+                "is_controller": profile.is_data_controller,
+                "is_processor": profile.is_data_processor,
+                "uses_ai": profile.uses_ai,
+                "ai_use_cases": json.dumps(profile.ai_use_cases),
+                "dpo_name": profile.dpo_name,
+                "dpo_email": profile.dpo_email,
+                "legal_name": profile.legal_contact_name,
+                "legal_email": profile.legal_contact_email,
+                "machine_builder": json.dumps(profile.machine_builder) if profile.machine_builder else None,
+                "is_complete": profile.is_complete,
+            },
+        )
+
+        # Audit log
+        log_audit(db, tid, action, profile.model_dump(), None)
+
+        db.commit()
+
+        # Fetch and return
+        result = db.execute(
+            """SELECT id, tenant_id, company_name, legal_form, industry, founded_year,
+                      business_model, offerings, company_size, employee_count, annual_revenue,
+                      headquarters_country, headquarters_city, has_international_locations,
+                      international_countries, target_markets, primary_jurisdiction,
+                      is_data_controller, is_data_processor, uses_ai, ai_use_cases,
+                      dpo_name, dpo_email, legal_contact_name, legal_contact_email,
+                      machine_builder, is_complete, completed_at, created_at, updated_at
+               FROM compliance_company_profiles WHERE tenant_id = :tid""",
+            {"tid": tid},
+        )
+        row = result.fetchone()
+        return row_to_response(row)
+    except Exception as e:
+        db.rollback()
+        logger.error(f"Failed to upsert company profile: {e}")
+        raise HTTPException(status_code=500, detail="Failed to save company profile")
+    finally:
+        db.close()
+
+
+@router.get("/audit", response_model=AuditListResponse)
+async def get_audit_log(
+    tenant_id: str = "default",
+    x_tenant_id: Optional[str] = Header(None, alias="X-Tenant-ID"),
+):
+    """Get audit log for company profile changes."""
+    tid = x_tenant_id or tenant_id
+    db = SessionLocal()
+    try:
+        result = db.execute(
+            """SELECT id, action, changed_fields, changed_by, created_at
+               FROM compliance_company_profile_audit
+               WHERE tenant_id = :tid
+               ORDER BY created_at DESC
+               LIMIT 100""",
+            {"tid": tid},
+        )
+        rows = result.fetchall()
+        entries = [
+            AuditEntryResponse(
+                id=str(r[0]),
+                action=r[1],
+                changed_fields=r[2] if isinstance(r[2], dict) else None,
+                changed_by=r[3],
+                created_at=str(r[4]),
+            )
+            for r in rows
+        ]
+        return AuditListResponse(entries=entries, total=len(entries))
+    finally:
+        db.close()
@@ -0,0 +1,380 @@
+"""
+FastAPI routes for Document Import and Gap Analysis.
+
+Endpoints:
+- POST /v1/import/analyze: Upload and analyze a compliance document
+- GET /v1/import/documents: List imported documents for a tenant
+- GET /v1/import/gap-analysis/{document_id}: Get gap analysis for a document
+"""
+
+import logging
+import os
+import uuid
+from typing import Optional
+
+import httpx
+from fastapi import APIRouter, File, Form, UploadFile, HTTPException
+from pydantic import BaseModel
+
+from database import SessionLocal
+
+logger = logging.getLogger(__name__)
+router = APIRouter(prefix="/v1/import", tags=["document-import"])
+
+OLLAMA_URL = os.getenv("OLLAMA_URL", "http://host.docker.internal:11434")
+LLM_MODEL = os.getenv("COMPLIANCE_LLM_MODEL", "qwen3:30b-a3b")
+
+# =============================================================================
+# DOCUMENT TYPE DETECTION
+# =============================================================================
+
+DOCUMENT_TYPE_KEYWORDS = {
+    "DSFA": ["datenschutz-folgenabschaetzung", "dsfa", "dpia", "privacy impact"],
+    "TOM": ["technisch-organisatorische", "tom", "massnahmen", "technical measures"],
+    "VVT": ["verarbeitungsverzeichnis", "vvt", "processing activities", "art. 30"],
+    "PRIVACY_POLICY": ["datenschutzerklaerung", "privacy policy", "datenschutzhinweis"],
+    "AGB": ["allgemeine geschaeftsbedingungen", "agb", "terms and conditions"],
+    "COOKIE_POLICY": ["cookie", "tracking", "einwilligung"],
+    "RISK_ASSESSMENT": ["risikobewertung", "risk assessment", "risikoanalyse"],
+    "AUDIT_REPORT": ["audit", "pruefbericht", "zertifizierung"],
+}
+
+
+def detect_document_type(text: str) -> tuple[str, float]:
+    """Detect document type from extracted text using keyword matching."""
+    text_lower = text.lower()
+    scores: dict[str, int] = {}
+
+    for doc_type, keywords in DOCUMENT_TYPE_KEYWORDS.items():
+        score = sum(1 for kw in keywords if kw in text_lower)
+        if score > 0:
+            scores[doc_type] = score
+
+    if not scores:
+        return "OTHER", 0.3
+
+    best_type = max(scores, key=scores.get)
+    confidence = min(0.95, 0.5 + scores[best_type] * 0.15)
+    return best_type, confidence
+
+
+# =============================================================================
+# GAP ANALYSIS
+# =============================================================================
+
+GAP_RULES = [
+    {
+        "category": "AI Act Compliance",
+        "regulation": "EU AI Act Art. 6",
+        "check_keywords": ["ki", "ai", "kuenstliche intelligenz", "machine learning"],
+        "gap_if_missing": ["risikoklassifizierung", "risk classification", "risikokategorie"],
+        "severity": "CRITICAL",
+        "action": "Risikoklassifizierung fuer KI-Systeme durchfuehren",
+    },
+    {
+        "category": "Transparenz",
+        "regulation": "DSGVO Art. 13, 14, 22",
+        "check_keywords": ["automatisiert", "automated", "profiling"],
+        "gap_if_missing": ["informationspflicht", "information obligation", "transparenz"],
+        "severity": "HIGH",
+        "action": "Informationspflichten bei automatisierten Entscheidungen ergaenzen",
+    },
+    {
+        "category": "TOMs",
+        "regulation": "DSGVO Art. 32",
+        "check_keywords": ["ki", "ai", "cloud", "saas"],
+        "gap_if_missing": ["technische massnahmen", "verschluesselung", "encryption"],
+        "severity": "MEDIUM",
+        "action": "Technisch-organisatorische Massnahmen um KI-Aspekte erweitern",
+    },
+    {
+        "category": "VVT",
+        "regulation": "DSGVO Art. 30",
+        "check_keywords": ["verarbeitung", "processing", "daten"],
+        "gap_if_missing": ["verarbeitungsverzeichnis", "vvt", "processing activities"],
+        "severity": "HIGH",
+        "action": "Verarbeitungsverzeichnis aktualisieren",
+    },
+    {
+        "category": "Menschliche Aufsicht",
+        "regulation": "EU AI Act Art. 14",
+        "check_keywords": ["ki", "ai", "autonom", "autonomous"],
+        "gap_if_missing": ["menschliche aufsicht", "human oversight", "human-in-the-loop"],
+        "severity": "MEDIUM",
+        "action": "Prozesse fuer menschliche Aufsicht definieren",
+    },
+]
+
+
+def analyze_gaps(text: str, doc_type: str) -> list[dict]:
+    """Analyze document text for compliance gaps."""
+    text_lower = text.lower()
+    gaps = []
+
+    for rule in GAP_RULES:
+        # Check if rule applies (keywords present in document)
+        applies = any(kw in text_lower for kw in rule["check_keywords"])
+        if not applies:
+            continue
+
+        # Check if gap exists (required elements missing)
+        has_gap = not any(kw in text_lower for kw in rule["gap_if_missing"])
+        if has_gap:
+            gaps.append({
+                "id": f"gap-{uuid.uuid4().hex[:8]}",
+                "category": rule["category"],
+                "description": f"{rule['category']}: Luecke erkannt",
+                "severity": rule["severity"],
+                "regulation": rule["regulation"],
+                "required_action": rule["action"],
+                "related_step_id": doc_type.lower(),
+            })
+
+    return gaps
+
+
+# =============================================================================
+# TEXT EXTRACTION
+# =============================================================================
+
+def extract_text_from_pdf(content: bytes) -> str:
+    """Extract text from PDF using PyMuPDF (fitz)."""
+    try:
+        import fitz
+        doc = fitz.open(stream=content, filetype="pdf")
+        text_parts = []
+        for page in doc:
+            text_parts.append(page.get_text())
+        doc.close()
+        return "\n".join(text_parts)
+    except ImportError:
+        logger.warning("PyMuPDF not available, returning empty text")
+        return ""
+    except Exception as e:
+        logger.error(f"PDF extraction failed: {e}")
+        return ""
+
+
+# =============================================================================
+# LLM CLASSIFICATION (optional enhancement)
+# =============================================================================
+
+async def classify_with_llm(text: str) -> Optional[tuple[str, float]]:
+    """Use Ollama LLM to classify document type (optional, falls back to keywords)."""
+    try:
+        prompt = f"""Klassifiziere das folgende Dokument in eine dieser Kategorien:
+DSFA, TOM, VVT, PRIVACY_POLICY, AGB, COOKIE_POLICY, RISK_ASSESSMENT, AUDIT_REPORT, OTHER
+
+Antworte NUR mit dem Kategorienamen, nichts anderes.
+
+Dokumenttext (erste 2000 Zeichen):
+{text[:2000]}"""
+
+        async with httpx.AsyncClient(timeout=30.0) as client:
+            response = await client.post(
+                f"{OLLAMA_URL}/api/generate",
+                json={
+                    "model": LLM_MODEL,
+                    "prompt": prompt,
+                    "stream": False,
+                    "options": {"temperature": 0.1, "num_predict": 20},
+                },
+            )
+
+            if response.status_code == 200:
+                result = response.json()
+                answer = result.get("response", "").strip().upper()
+                # Validate answer
+                valid_types = {"DSFA", "TOM", "VVT", "PRIVACY_POLICY", "AGB",
+                              "COOKIE_POLICY", "RISK_ASSESSMENT", "AUDIT_REPORT", "OTHER"}
+                if answer in valid_types:
+                    return answer, 0.85
+    except Exception as e:
+        logger.warning(f"LLM classification failed, using keyword fallback: {e}")
+
+    return None
+
+
+# =============================================================================
+# RESPONSE MODELS
+# =============================================================================
+
+class DocumentAnalysisResponse(BaseModel):
+    document_id: str
+    filename: str
+    detected_type: str
+    confidence: float
+    extracted_entities: list[str]
+    recommendations: list[str]
+    gap_analysis: dict
+
+
+class DocumentListResponse(BaseModel):
+    documents: list[dict]
+    total: int
+
+
+# =============================================================================
+# ROUTES
+# =============================================================================
+
+@router.post("/analyze", response_model=DocumentAnalysisResponse)
+async def analyze_document(
+    file: UploadFile = File(...),
+    document_type: str = Form("OTHER"),
+    tenant_id: str = Form("default"),
+):
+    """Upload and analyze a compliance document."""
+    if not file.filename:
+        raise HTTPException(status_code=400, detail="No file provided")
+
+    # Read file content
+    content = await file.read()
+    file_size = len(content)
+
+    # Extract text
+    if file.content_type == "application/pdf" or (file.filename and file.filename.endswith(".pdf")):
+        text = extract_text_from_pdf(content)
+    else:
+        # Try to decode as text
+        try:
+            text = content.decode("utf-8")
+        except UnicodeDecodeError:
+            text = ""
+
+    # Detect document type
+    if document_type == "OTHER" and text:
+        # Try LLM first, fallback to keywords
+        llm_result = await classify_with_llm(text)
+        if llm_result:
+            detected_type, confidence = llm_result
+        else:
+            detected_type, confidence = detect_document_type(text)
+    else:
+        detected_type = document_type
+        confidence = 1.0
+
+    # Extract key entities
+    entities = []
+    entity_keywords = ["DSGVO", "AI Act", "ISO 27001", "NIS2", "BDSG",
+                       "Personenbezogene Daten", "Auftragsverarbeitung", "DSFA"]
+    for kw in entity_keywords:
+        if kw.lower() in text.lower():
+            entities.append(kw)
+
+    # Analyze gaps
+    gaps = analyze_gaps(text, detected_type)
+
+    # Generate recommendations
+    recommendations = []
+    if gaps:
+        recommendations = [g["required_action"] for g in gaps[:5]]
+    if not recommendations:
+        recommendations = ["Dokument erscheint vollstaendig"]
+
+    # Persist to database
+    doc_id = str(uuid.uuid4())
+    db = SessionLocal()
+    try:
+        db.execute(
+            """INSERT INTO compliance_imported_documents
+               (id, tenant_id, filename, file_type, file_size, detected_type, detection_confidence,
+                extracted_text, extracted_entities, recommendations, status, analyzed_at)
+               VALUES (:id, :tenant_id, :filename, :file_type, :file_size, :detected_type, :confidence,
+                       :text, :entities::jsonb, :recommendations::jsonb, 'analyzed', NOW())""",
+            {
+                "id": doc_id,
+                "tenant_id": tenant_id,
+                "filename": file.filename,
+                "file_type": file.content_type or "unknown",
+                "file_size": file_size,
+                "detected_type": detected_type,
+                "confidence": confidence,
+                "text": text[:50000],  # Limit stored text
+                "entities": str(entities).replace("'", '"'),
+                "recommendations": str(recommendations).replace("'", '"'),
+            },
+        )
+
+        # Save gap analysis
+        total_gaps = len(gaps)
+        gap_analysis_result = {
+            "id": f"analysis-{doc_id[:8]}",
+            "total_gaps": total_gaps,
+            "critical_gaps": len([g for g in gaps if g["severity"] == "CRITICAL"]),
+            "high_gaps": len([g for g in gaps if g["severity"] == "HIGH"]),
+            "medium_gaps": len([g for g in gaps if g["severity"] == "MEDIUM"]),
+            "low_gaps": len([g for g in gaps if g["severity"] == "LOW"]),
+            "gaps": gaps,
+            "recommended_packages": ["analyse", "dokumentation"] if total_gaps > 0 else [],
+        }
+
+        if total_gaps > 0:
+            import json
+            db.execute(
+                """INSERT INTO compliance_gap_analyses
+                   (tenant_id, document_id, total_gaps, critical_gaps, high_gaps, medium_gaps, low_gaps, gaps, recommended_packages)
+                   VALUES (:tenant_id, :document_id, :total, :critical, :high, :medium, :low, :gaps::jsonb, :packages::jsonb)""",
+                {
+                    "tenant_id": tenant_id,
+                    "document_id": doc_id,
+                    "total": gap_analysis_result["total_gaps"],
+                    "critical": gap_analysis_result["critical_gaps"],
+                    "high": gap_analysis_result["high_gaps"],
+                    "medium": gap_analysis_result["medium_gaps"],
+                    "low": gap_analysis_result["low_gaps"],
+                    "gaps": json.dumps(gaps),
+                    "packages": json.dumps(gap_analysis_result["recommended_packages"]),
+                },
+            )
+
+        db.commit()
+    except Exception as e:
+        db.rollback()
+        logger.error(f"Failed to persist document analysis: {e}")
+    finally:
+        db.close()
+
+    return DocumentAnalysisResponse(
+        document_id=doc_id,
+        filename=file.filename or "unknown",
+        detected_type=detected_type,
+        confidence=confidence,
+        extracted_entities=entities,
+        recommendations=recommendations,
+        gap_analysis=gap_analysis_result,
+    )
+
+
+@router.get("/documents", response_model=DocumentListResponse)
+async def list_documents(tenant_id: str = "default"):
+    """List all imported documents for a tenant."""
+    db = SessionLocal()
+    try:
+        result = db.execute(
+            """SELECT id, filename, file_type, file_size, detected_type, detection_confidence,
+                      extracted_entities, recommendations, status, analyzed_at, created_at
+               FROM compliance_imported_documents
+               WHERE tenant_id = :tenant_id
+               ORDER BY created_at DESC""",
+            {"tenant_id": tenant_id},
+        )
+        rows = result.fetchall()
+        documents = []
+        for row in rows:
+            documents.append({
+                "id": str(row[0]),
+                "filename": row[1],
+                "file_type": row[2],
+                "file_size": row[3],
+                "detected_type": row[4],
+                "confidence": row[5],
+                "extracted_entities": row[6] or [],
+                "recommendations": row[7] or [],
+                "status": row[8],
+                "analyzed_at": str(row[9]) if row[9] else None,
+                "created_at": str(row[10]),
+            })
+        return DocumentListResponse(documents=documents, total=len(documents))
+    finally:
+        db.close()
@@ -0,0 +1,608 @@
+"""
+FastAPI routes for System Screening (SBOM Generation + Vulnerability Scan).
+
+Endpoints:
+- POST /v1/screening/scan: Upload dependency file, generate SBOM, scan for vulnerabilities
+- GET /v1/screening/{screening_id}: Get screening result by ID
+- GET /v1/screening: List screenings for a tenant
+"""
+
+import json
+import logging
+import re
+import uuid
+from datetime import datetime, timezone
+from typing import Optional
+
+import httpx
+from fastapi import APIRouter, File, Form, UploadFile, HTTPException
+from pydantic import BaseModel
+
+from database import SessionLocal
+
+logger = logging.getLogger(__name__)
+router = APIRouter(prefix="/v1/screening", tags=["system-screening"])
+
+OSV_API_URL = "https://api.osv.dev/v1/query"
+
+
+# =============================================================================
+# RESPONSE MODELS
+# =============================================================================
+
+class SecurityIssueResponse(BaseModel):
+    id: str
+    severity: str
+    title: str
+    description: Optional[str] = None
+    cve: Optional[str] = None
+    cvss: Optional[float] = None
+    affected_component: str
+    affected_version: Optional[str] = None
+    fixed_in: Optional[str] = None
+    remediation: Optional[str] = None
+    status: str = "OPEN"
+
+
+class SBOMComponentResponse(BaseModel):
+    name: str
+    version: str
+    type: str
+    purl: str
+    licenses: list[str]
+    vulnerabilities: list[dict]
+
+
+class ScreeningResponse(BaseModel):
+    id: str
+    status: str
+    sbom_format: str
+    sbom_version: str
+    total_components: int
+    total_issues: int
+    critical_issues: int
+    high_issues: int
+    medium_issues: int
+    low_issues: int
+    components: list[SBOMComponentResponse]
+    issues: list[SecurityIssueResponse]
+    started_at: Optional[str] = None
+    completed_at: Optional[str] = None
+
+
+class ScreeningListResponse(BaseModel):
+    screenings: list[dict]
+    total: int
+
+
+# =============================================================================
+# DEPENDENCY PARSING
+# =============================================================================
+
+def parse_package_lock(content: str) -> list[dict]:
+    """Parse package-lock.json and extract dependencies."""
+    try:
+        data = json.loads(content)
+    except json.JSONDecodeError:
+        return []
+
+    components = []
+
+    # package-lock.json v2/v3 format (packages field)
+    packages = data.get("packages", {})
+    if packages:
+        for path, info in packages.items():
+            if not path:  # Skip root
+                continue
+            name = path.split("node_modules/")[-1] if "node_modules/" in path else path
+            version = info.get("version", "unknown")
+            if name and version != "unknown":
+                components.append({
+                    "name": name,
+                    "version": version,
+                    "type": "library",
+                    "ecosystem": "npm",
+                    "license": info.get("license", "unknown"),
+                })
+
+    # Fallback: v1 format (dependencies field)
+    if not components:
+        dependencies = data.get("dependencies", {})
+        for name, info in dependencies.items():
+            if isinstance(info, dict):
+                components.append({
+                    "name": name,
+                    "version": info.get("version", "unknown"),
+                    "type": "library",
+                    "ecosystem": "npm",
+                    "license": "unknown",
+                })
+
+    return components
+
+
+def parse_requirements_txt(content: str) -> list[dict]:
+    """Parse requirements.txt and extract dependencies."""
+    components = []
+    for line in content.strip().split("\n"):
+        line = line.strip()
+        if not line or line.startswith("#") or line.startswith("-"):
+            continue
+
+        # Match patterns: package==version, package>=version, package~=version
+        match = re.match(r'^([a-zA-Z0-9_.-]+)\s*([>=<~!]+)\s*([a-zA-Z0-9_.*-]+)', line)
+        if match:
+            components.append({
+                "name": match.group(1),
+                "version": match.group(3),
+                "type": "library",
+                "ecosystem": "PyPI",
+                "license": "unknown",
+            })
+        elif re.match(r'^[a-zA-Z0-9_.-]+$', line):
+            components.append({
+                "name": line,
+                "version": "latest",
+                "type": "library",
+                "ecosystem": "PyPI",
+                "license": "unknown",
+            })
+
+    return components
+
+
+def parse_yarn_lock(content: str) -> list[dict]:
+    """Parse yarn.lock and extract dependencies (basic)."""
+    components = []
+    current_name = None
+    for line in content.split("\n"):
+        # Match: "package@version":
+        match = re.match(r'^"?([^@]+)@[^"]*"?:', line)
+        if match:
+            current_name = match.group(1).strip()
+        elif current_name and line.strip().startswith("version "):
+            version_match = re.match(r'\s+version\s+"?([^"]+)"?', line)
+            if version_match:
+                components.append({
+                    "name": current_name,
+                    "version": version_match.group(1),
+                    "type": "library",
+                    "ecosystem": "npm",
+                    "license": "unknown",
+                })
+                current_name = None
+
+    return components
+
+
+def detect_and_parse(filename: str, content: str) -> tuple[list[dict], str]:
+    """Detect file type and parse accordingly."""
+    fname = filename.lower()
+
+    if "package-lock" in fname or fname.endswith("package-lock.json"):
+        return parse_package_lock(content), "npm"
+    elif fname == "requirements.txt" or fname.endswith("/requirements.txt"):
+        return parse_requirements_txt(content), "PyPI"
+    elif "yarn.lock" in fname:
+        return parse_yarn_lock(content), "npm"
+    elif fname.endswith(".json"):
+        # Try package-lock format
+        comps = parse_package_lock(content)
+        if comps:
+            return comps, "npm"
+
+    # Fallback: try requirements.txt format
+    comps = parse_requirements_txt(content)
+    if comps:
+        return comps, "PyPI"
+
+    return [], "unknown"
+
+
+# =============================================================================
+# SBOM GENERATION (CycloneDX format)
+# =============================================================================
+
+def generate_sbom(components: list[dict], ecosystem: str) -> dict:
+    """Generate a CycloneDX 1.5 SBOM from parsed components."""
+    sbom_components = []
+    for comp in components:
+        purl = f"pkg:{ecosystem.lower()}/{comp['name']}@{comp['version']}"
+        sbom_components.append({
+            "type": "library",
+            "name": comp["name"],
+            "version": comp["version"],
+            "purl": purl,
+            "licenses": [comp.get("license", "unknown")] if comp.get("license") != "unknown" else [],
+        })
+
+    return {
+        "bomFormat": "CycloneDX",
+        "specVersion": "1.5",
+        "version": 1,
+        "metadata": {
+            "timestamp": datetime.now(timezone.utc).isoformat(),
+            "tools": [{"name": "breakpilot-screening", "version": "1.0.0"}],
+        },
+        "components": sbom_components,
+    }
+
+
+# =============================================================================
+# VULNERABILITY SCANNING (OSV.dev API)
+# =============================================================================
+
+async def query_osv(name: str, version: str, ecosystem: str) -> list[dict]:
+    """Query OSV.dev API for vulnerabilities of a single package."""
+    try:
+        async with httpx.AsyncClient(timeout=10.0) as client:
+            response = await client.post(
+                OSV_API_URL,
+                json={
+                    "package": {"name": name, "ecosystem": ecosystem},
+                    "version": version,
+                },
+            )
+            if response.status_code == 200:
+                data = response.json()
+                return data.get("vulns", [])
+    except Exception as e:
+        logger.warning(f"OSV query failed for {name}@{version}: {e}")
+
+    return []
+
+
+def map_osv_severity(vuln: dict) -> tuple[str, float]:
+    """Extract severity and CVSS from OSV vulnerability data."""
+    severity = "MEDIUM"
+    cvss = 5.0
+
+    # Check severity array
+    for sev in vuln.get("severity", []):
+        if sev.get("type") == "CVSS_V3":
+            score_str = sev.get("score", "")
+            # Extract base score from CVSS vector
+            try:
+                import re as _re
+                # CVSS vectors don't contain the score directly, try database_specific
+                pass
+            except Exception:
+                pass
+
+    # Check database_specific for severity
+    db_specific = vuln.get("database_specific", {})
+    if "severity" in db_specific:
+        sev_str = db_specific["severity"].upper()
+        if sev_str in ("CRITICAL", "HIGH", "MEDIUM", "LOW"):
+            severity = sev_str
+
+    # Derive CVSS from severity if not found
+    cvss_map = {"CRITICAL": 9.5, "HIGH": 7.5, "MEDIUM": 5.0, "LOW": 2.5}
+    cvss = cvss_map.get(severity, 5.0)
+
+    return severity, cvss
+
+
+def extract_fix_version(vuln: dict, package_name: str) -> Optional[str]:
+    """Extract the fixed-in version from OSV data."""
+    for affected in vuln.get("affected", []):
+        pkg = affected.get("package", {})
+        if pkg.get("name", "").lower() == package_name.lower():
+            for rng in affected.get("ranges", []):
+                for event in rng.get("events", []):
+                    if "fixed" in event:
+                        return event["fixed"]
+    return None
+
+
+async def scan_vulnerabilities(components: list[dict], ecosystem: str) -> list[dict]:
+    """Scan all components for vulnerabilities via OSV.dev."""
+    issues = []
+
+    # Batch: scan up to 50 components to avoid timeouts
+    scan_limit = min(len(components), 50)
+
+    for comp in components[:scan_limit]:
+        if comp["version"] in ("latest", "unknown", "*"):
+            continue
+
+        vulns = await query_osv(comp["name"], comp["version"], ecosystem)
+
+        for vuln in vulns:
+            vuln_id = vuln.get("id", f"OSV-{uuid.uuid4().hex[:8]}")
+            aliases = vuln.get("aliases", [])
+            cve = next((a for a in aliases if a.startswith("CVE-")), None)
+            severity, cvss = map_osv_severity(vuln)
+            fixed_in = extract_fix_version(vuln, comp["name"])
+
+            issues.append({
+                "id": str(uuid.uuid4()),
+                "severity": severity,
+                "title": vuln.get("summary", vuln_id),
+                "description": vuln.get("details", "")[:500],
+                "cve": cve,
+                "cvss": cvss,
+                "affected_component": comp["name"],
+                "affected_version": comp["version"],
+                "fixed_in": fixed_in,
+                "remediation": f"Upgrade {comp['name']} to {fixed_in}" if fixed_in else f"Check {vuln_id} for remediation steps",
+                "status": "OPEN",
+            })
+
+    return issues
+
+
+# =============================================================================
+# ROUTES
+# =============================================================================
+
+@router.post("/scan", response_model=ScreeningResponse)
+async def scan_dependencies(
+    file: UploadFile = File(...),
+    tenant_id: str = Form("default"),
+):
+    """Upload a dependency file, generate SBOM, and scan for vulnerabilities."""
+    if not file.filename:
+        raise HTTPException(status_code=400, detail="No file provided")
+
+    content = await file.read()
+    try:
+        text = content.decode("utf-8")
+    except UnicodeDecodeError:
+        raise HTTPException(status_code=400, detail="File must be a text-based dependency file")
+
+    # Parse dependencies
+    components, ecosystem = detect_and_parse(file.filename, text)
+    if not components:
+        raise HTTPException(
+            status_code=400,
+            detail="Could not parse dependencies. Supported: package-lock.json, requirements.txt, yarn.lock",
+        )
+
+    # Generate SBOM
+    sbom = generate_sbom(components, ecosystem)
+
+    # Scan for vulnerabilities
+    started_at = datetime.now(timezone.utc)
+    issues = await scan_vulnerabilities(components, ecosystem)
+    completed_at = datetime.now(timezone.utc)
+
+    # Count severities
+    critical = len([i for i in issues if i["severity"] == "CRITICAL"])
+    high = len([i for i in issues if i["severity"] == "HIGH"])
+    medium = len([i for i in issues if i["severity"] == "MEDIUM"])
+    low = len([i for i in issues if i["severity"] == "LOW"])
+
+    # Persist to database
+    screening_id = str(uuid.uuid4())
+    db = SessionLocal()
+    try:
+        db.execute(
+            """INSERT INTO compliance_screenings
+               (id, tenant_id, status, sbom_format, sbom_version,
+                total_components, total_issues, critical_issues, high_issues, medium_issues, low_issues,
+                sbom_data, started_at, completed_at)
+               VALUES (:id, :tenant_id, 'completed', 'CycloneDX', '1.5',
+                       :total_components, :total_issues, :critical, :high, :medium, :low,
+                       :sbom_data::jsonb, :started_at, :completed_at)""",
+            {
+                "id": screening_id,
+                "tenant_id": tenant_id,
+                "total_components": len(components),
+                "total_issues": len(issues),
+                "critical": critical,
+                "high": high,
+                "medium": medium,
+                "low": low,
+                "sbom_data": json.dumps(sbom),
+                "started_at": started_at,
+                "completed_at": completed_at,
+            },
+        )
+
+        # Persist security issues
+        for issue in issues:
+            db.execute(
+                """INSERT INTO compliance_security_issues
+                   (id, screening_id, severity, title, description, cve, cvss,
+                    affected_component, affected_version, fixed_in, remediation, status)
+                   VALUES (:id, :screening_id, :severity, :title, :description, :cve, :cvss,
+                           :component, :version, :fixed_in, :remediation, :status)""",
+                {
+                    "id": issue["id"],
+                    "screening_id": screening_id,
+                    "severity": issue["severity"],
+                    "title": issue["title"][:500],
+                    "description": issue.get("description", "")[:1000],
+                    "cve": issue.get("cve"),
+                    "cvss": issue.get("cvss"),
+                    "component": issue["affected_component"],
+                    "version": issue.get("affected_version"),
+                    "fixed_in": issue.get("fixed_in"),
+                    "remediation": issue.get("remediation"),
+                    "status": issue["status"],
+                },
+            )
+
+        db.commit()
+    except Exception as e:
+        db.rollback()
+        logger.error(f"Failed to persist screening: {e}")
+    finally:
+        db.close()
+
+    # Build response
+    sbom_components = []
+    comp_vulns: dict[str, list[dict]] = {}
+    for issue in issues:
+        comp_name = issue["affected_component"]
+        if comp_name not in comp_vulns:
+            comp_vulns[comp_name] = []
+        comp_vulns[comp_name].append({
+            "id": issue.get("cve") or issue["id"],
+            "cve": issue.get("cve"),
+            "severity": issue["severity"],
+            "title": issue["title"],
+            "cvss": issue.get("cvss"),
+            "fixedIn": issue.get("fixed_in"),
+        })
+
+    for sc in sbom["components"]:
+        sbom_components.append(SBOMComponentResponse(
+            name=sc["name"],
+            version=sc["version"],
+            type=sc["type"],
+            purl=sc["purl"],
+            licenses=sc.get("licenses", []),
+            vulnerabilities=comp_vulns.get(sc["name"], []),
+        ))
+
+    issue_responses = [
+        SecurityIssueResponse(
+            id=i["id"],
+            severity=i["severity"],
+            title=i["title"],
+            description=i.get("description"),
+            cve=i.get("cve"),
+            cvss=i.get("cvss"),
+            affected_component=i["affected_component"],
+            affected_version=i.get("affected_version"),
+            fixed_in=i.get("fixed_in"),
+            remediation=i.get("remediation"),
+            status=i["status"],
+        )
+        for i in issues
+    ]
+
+    return ScreeningResponse(
+        id=screening_id,
+        status="completed",
+        sbom_format="CycloneDX",
+        sbom_version="1.5",
+        total_components=len(components),
+        total_issues=len(issues),
+        critical_issues=critical,
+        high_issues=high,
+        medium_issues=medium,
+        low_issues=low,
+        components=sbom_components,
+        issues=issue_responses,
+        started_at=started_at.isoformat(),
+        completed_at=completed_at.isoformat(),
+    )
+
+
+@router.get("/{screening_id}", response_model=ScreeningResponse)
+async def get_screening(screening_id: str):
+    """Get a screening result by ID."""
+    db = SessionLocal()
+    try:
+        result = db.execute(
+            """SELECT id, status, sbom_format, sbom_version,
+                      total_components, total_issues, critical_issues, high_issues,
+                      medium_issues, low_issues, sbom_data, started_at, completed_at
+               FROM compliance_screenings WHERE id = :id""",
+            {"id": screening_id},
+        )
+        row = result.fetchone()
+        if not row:
+            raise HTTPException(status_code=404, detail="Screening not found")
+
+        # Fetch issues
+        issues_result = db.execute(
+            """SELECT id, severity, title, description, cve, cvss,
+                      affected_component, affected_version, fixed_in, remediation, status
+               FROM compliance_security_issues WHERE screening_id = :id""",
+            {"id": screening_id},
+        )
+        issues_rows = issues_result.fetchall()
+
+        issues = [
+            SecurityIssueResponse(
+                id=str(r[0]), severity=r[1], title=r[2], description=r[3],
+                cve=r[4], cvss=r[5], affected_component=r[6],
+                affected_version=r[7], fixed_in=r[8], remediation=r[9], status=r[10],
+            )
+            for r in issues_rows
+        ]
+
+        # Reconstruct components from SBOM data
+        sbom_data = row[10] or {}
+        components = []
+        comp_vulns: dict[str, list[dict]] = {}
+        for issue in issues:
+            if issue.affected_component not in comp_vulns:
+                comp_vulns[issue.affected_component] = []
+            comp_vulns[issue.affected_component].append({
+                "id": issue.cve or issue.id,
+                "cve": issue.cve,
+                "severity": issue.severity,
+                "title": issue.title,
+                "cvss": issue.cvss,
+                "fixedIn": issue.fixed_in,
+            })
+
+        for sc in sbom_data.get("components", []):
+            components.append(SBOMComponentResponse(
+                name=sc["name"],
+                version=sc["version"],
+                type=sc.get("type", "library"),
+                purl=sc.get("purl", ""),
+                licenses=sc.get("licenses", []),
+                vulnerabilities=comp_vulns.get(sc["name"], []),
+            ))
+
+        return ScreeningResponse(
+            id=str(row[0]),
+            status=row[1],
+            sbom_format=row[2] or "CycloneDX",
+            sbom_version=row[3] or "1.5",
+            total_components=row[4] or 0,
+            total_issues=row[5] or 0,
+            critical_issues=row[6] or 0,
+            high_issues=row[7] or 0,
+            medium_issues=row[8] or 0,
+            low_issues=row[9] or 0,
+            components=components,
+            issues=issues,
+            started_at=str(row[11]) if row[11] else None,
+            completed_at=str(row[12]) if row[12] else None,
+        )
+    finally:
+        db.close()
+
+
+@router.get("", response_model=ScreeningListResponse)
+async def list_screenings(tenant_id: str = "default"):
+    """List all screenings for a tenant."""
+    db = SessionLocal()
+    try:
+        result = db.execute(
+            """SELECT id, status, total_components, total_issues,
+                      critical_issues, high_issues, medium_issues, low_issues,
+                      started_at, completed_at, created_at
+               FROM compliance_screenings
+               WHERE tenant_id = :tenant_id
+               ORDER BY created_at DESC""",
+            {"tenant_id": tenant_id},
+        )
+        rows = result.fetchall()
+        screenings = [
+            {
+                "id": str(r[0]),
+                "status": r[1],
+                "total_components": r[2],
+                "total_issues": r[3],
+                "critical_issues": r[4],
+                "high_issues": r[5],
+                "medium_issues": r[6],
+                "low_issues": r[7],
+                "started_at": str(r[8]) if r[8] else None,
+                "completed_at": str(r[9]) if r[9] else None,
+                "created_at": str(r[10]),
+            }
+            for r in rows
+        ]
+        return ScreeningListResponse(screenings=screenings, total=len(screenings))
+    finally:
+        db.close()