feat: Vorbereitung-Module auf 100% — Persistenz, Backend-Services, UCCA Frontend

Phase A: PostgreSQL State Store (sdk_states Tabelle, InMemory-Fallback) Phase B: Modules dynamisch vom Backend, Scope DB-Persistenz, Source Policy State Phase C: UCCA Frontend (3 Seiten, Wizard, RiskScoreGauge), Obligations Live-Daten Phase D: Document Import (PDF/LLM/Gap-Analyse), System Screening (SBOM/OSV.dev) Phase E: Company Profile CRUD mit Audit-Logging Phase F: Tests (Python + TypeScript), flow-data.ts DB-Tabellen aktualisiert Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-02 11:04:31 +01:00
parent cd15ab0932
commit e6d666b89b
38 changed files with 4195 additions and 420 deletions
@@ -0,0 +1,344 @@
+"""
+FastAPI routes for Company Profile CRUD with audit logging.
+
+Endpoints:
+- GET /v1/company-profile: Get company profile for a tenant
+- POST /v1/company-profile: Create or update company profile
+- GET /v1/company-profile/audit: Get audit log for a tenant
+"""
+
+import json
+import logging
+import uuid
+from typing import Optional
+
+from fastapi import APIRouter, HTTPException, Header
+from pydantic import BaseModel
+
+from database import SessionLocal
+
+logger = logging.getLogger(__name__)
+router = APIRouter(prefix="/v1/company-profile", tags=["company-profile"])
+
+
+# =============================================================================
+# REQUEST/RESPONSE MODELS
+# =============================================================================
+
+class CompanyProfileRequest(BaseModel):
+    company_name: str = ""
+    legal_form: str = "GmbH"
+    industry: str = ""
+    founded_year: Optional[int] = None
+    business_model: str = "B2B"
+    offerings: list[str] = []
+    company_size: str = "small"
+    employee_count: str = "1-9"
+    annual_revenue: str = "< 2 Mio"
+    headquarters_country: str = "DE"
+    headquarters_city: str = ""
+    has_international_locations: bool = False
+    international_countries: list[str] = []
+    target_markets: list[str] = ["DE"]
+    primary_jurisdiction: str = "DE"
+    is_data_controller: bool = True
+    is_data_processor: bool = False
+    uses_ai: bool = False
+    ai_use_cases: list[str] = []
+    dpo_name: Optional[str] = None
+    dpo_email: Optional[str] = None
+    legal_contact_name: Optional[str] = None
+    legal_contact_email: Optional[str] = None
+    machine_builder: Optional[dict] = None
+    is_complete: bool = False
+
+
+class CompanyProfileResponse(BaseModel):
+    id: str
+    tenant_id: str
+    company_name: str
+    legal_form: str
+    industry: str
+    founded_year: Optional[int]
+    business_model: str
+    offerings: list[str]
+    company_size: str
+    employee_count: str
+    annual_revenue: str
+    headquarters_country: str
+    headquarters_city: str
+    has_international_locations: bool
+    international_countries: list[str]
+    target_markets: list[str]
+    primary_jurisdiction: str
+    is_data_controller: bool
+    is_data_processor: bool
+    uses_ai: bool
+    ai_use_cases: list[str]
+    dpo_name: Optional[str]
+    dpo_email: Optional[str]
+    legal_contact_name: Optional[str]
+    legal_contact_email: Optional[str]
+    machine_builder: Optional[dict]
+    is_complete: bool
+    completed_at: Optional[str]
+    created_at: str
+    updated_at: str
+
+
+class AuditEntryResponse(BaseModel):
+    id: str
+    action: str
+    changed_fields: Optional[dict]
+    changed_by: Optional[str]
+    created_at: str
+
+
+class AuditListResponse(BaseModel):
+    entries: list[AuditEntryResponse]
+    total: int
+
+
+# =============================================================================
+# HELPERS
+# =============================================================================
+
+def row_to_response(row) -> CompanyProfileResponse:
+    """Convert a DB row to response model."""
+    return CompanyProfileResponse(
+        id=str(row[0]),
+        tenant_id=row[1],
+        company_name=row[2] or "",
+        legal_form=row[3] or "GmbH",
+        industry=row[4] or "",
+        founded_year=row[5],
+        business_model=row[6] or "B2B",
+        offerings=row[7] if isinstance(row[7], list) else [],
+        company_size=row[8] or "small",
+        employee_count=row[9] or "1-9",
+        annual_revenue=row[10] or "< 2 Mio",
+        headquarters_country=row[11] or "DE",
+        headquarters_city=row[12] or "",
+        has_international_locations=row[13] or False,
+        international_countries=row[14] if isinstance(row[14], list) else [],
+        target_markets=row[15] if isinstance(row[15], list) else ["DE"],
+        primary_jurisdiction=row[16] or "DE",
+        is_data_controller=row[17] if row[17] is not None else True,
+        is_data_processor=row[18] or False,
+        uses_ai=row[19] or False,
+        ai_use_cases=row[20] if isinstance(row[20], list) else [],
+        dpo_name=row[21],
+        dpo_email=row[22],
+        legal_contact_name=row[23],
+        legal_contact_email=row[24],
+        machine_builder=row[25] if isinstance(row[25], dict) else None,
+        is_complete=row[26] or False,
+        completed_at=str(row[27]) if row[27] else None,
+        created_at=str(row[28]),
+        updated_at=str(row[29]),
+    )
+
+
+def log_audit(db, tenant_id: str, action: str, changed_fields: dict | None, changed_by: str | None):
+    """Write an audit log entry."""
+    try:
+        db.execute(
+            """INSERT INTO compliance_company_profile_audit
+               (tenant_id, action, changed_fields, changed_by)
+               VALUES (:tenant_id, :action, :fields::jsonb, :changed_by)""",
+            {
+                "tenant_id": tenant_id,
+                "action": action,
+                "fields": json.dumps(changed_fields) if changed_fields else None,
+                "changed_by": changed_by,
+            },
+        )
+    except Exception as e:
+        logger.warning(f"Failed to write audit log: {e}")
+
+
+# =============================================================================
+# ROUTES
+# =============================================================================
+
+@router.get("", response_model=CompanyProfileResponse)
+async def get_company_profile(
+    tenant_id: str = "default",
+    x_tenant_id: Optional[str] = Header(None, alias="X-Tenant-ID"),
+):
+    """Get company profile for a tenant."""
+    tid = x_tenant_id or tenant_id
+    db = SessionLocal()
+    try:
+        result = db.execute(
+            """SELECT id, tenant_id, company_name, legal_form, industry, founded_year,
+                      business_model, offerings, company_size, employee_count, annual_revenue,
+                      headquarters_country, headquarters_city, has_international_locations,
+                      international_countries, target_markets, primary_jurisdiction,
+                      is_data_controller, is_data_processor, uses_ai, ai_use_cases,
+                      dpo_name, dpo_email, legal_contact_name, legal_contact_email,
+                      machine_builder, is_complete, completed_at, created_at, updated_at
+               FROM compliance_company_profiles WHERE tenant_id = :tenant_id""",
+            {"tenant_id": tid},
+        )
+        row = result.fetchone()
+        if not row:
+            raise HTTPException(status_code=404, detail="Company profile not found")
+
+        return row_to_response(row)
+    finally:
+        db.close()
+
+
+@router.post("", response_model=CompanyProfileResponse)
+async def upsert_company_profile(
+    profile: CompanyProfileRequest,
+    tenant_id: str = "default",
+    x_tenant_id: Optional[str] = Header(None, alias="X-Tenant-ID"),
+):
+    """Create or update company profile (upsert)."""
+    tid = x_tenant_id or tenant_id
+    db = SessionLocal()
+    try:
+        # Check if profile exists
+        existing = db.execute(
+            "SELECT id FROM compliance_company_profiles WHERE tenant_id = :tid",
+            {"tid": tid},
+        ).fetchone()
+
+        action = "update" if existing else "create"
+
+        completed_at_clause = ", completed_at = NOW()" if profile.is_complete else ", completed_at = NULL"
+
+        db.execute(
+            f"""INSERT INTO compliance_company_profiles
+               (tenant_id, company_name, legal_form, industry, founded_year,
+                business_model, offerings, company_size, employee_count, annual_revenue,
+                headquarters_country, headquarters_city, has_international_locations,
+                international_countries, target_markets, primary_jurisdiction,
+                is_data_controller, is_data_processor, uses_ai, ai_use_cases,
+                dpo_name, dpo_email, legal_contact_name, legal_contact_email,
+                machine_builder, is_complete)
+               VALUES (:tid, :company_name, :legal_form, :industry, :founded_year,
+                       :business_model, :offerings::jsonb, :company_size, :employee_count, :annual_revenue,
+                       :hq_country, :hq_city, :has_intl, :intl_countries::jsonb,
+                       :target_markets::jsonb, :jurisdiction,
+                       :is_controller, :is_processor, :uses_ai, :ai_use_cases::jsonb,
+                       :dpo_name, :dpo_email, :legal_name, :legal_email,
+                       :machine_builder::jsonb, :is_complete)
+               ON CONFLICT (tenant_id) DO UPDATE SET
+                 company_name = EXCLUDED.company_name,
+                 legal_form = EXCLUDED.legal_form,
+                 industry = EXCLUDED.industry,
+                 founded_year = EXCLUDED.founded_year,
+                 business_model = EXCLUDED.business_model,
+                 offerings = EXCLUDED.offerings,
+                 company_size = EXCLUDED.company_size,
+                 employee_count = EXCLUDED.employee_count,
+                 annual_revenue = EXCLUDED.annual_revenue,
+                 headquarters_country = EXCLUDED.headquarters_country,
+                 headquarters_city = EXCLUDED.headquarters_city,
+                 has_international_locations = EXCLUDED.has_international_locations,
+                 international_countries = EXCLUDED.international_countries,
+                 target_markets = EXCLUDED.target_markets,
+                 primary_jurisdiction = EXCLUDED.primary_jurisdiction,
+                 is_data_controller = EXCLUDED.is_data_controller,
+                 is_data_processor = EXCLUDED.is_data_processor,
+                 uses_ai = EXCLUDED.uses_ai,
+                 ai_use_cases = EXCLUDED.ai_use_cases,
+                 dpo_name = EXCLUDED.dpo_name,
+                 dpo_email = EXCLUDED.dpo_email,
+                 legal_contact_name = EXCLUDED.legal_contact_name,
+                 legal_contact_email = EXCLUDED.legal_contact_email,
+                 machine_builder = EXCLUDED.machine_builder,
+                 is_complete = EXCLUDED.is_complete,
+                 updated_at = NOW()
+                 {completed_at_clause}""",
+            {
+                "tid": tid,
+                "company_name": profile.company_name,
+                "legal_form": profile.legal_form,
+                "industry": profile.industry,
+                "founded_year": profile.founded_year,
+                "business_model": profile.business_model,
+                "offerings": json.dumps(profile.offerings),
+                "company_size": profile.company_size,
+                "employee_count": profile.employee_count,
+                "annual_revenue": profile.annual_revenue,
+                "hq_country": profile.headquarters_country,
+                "hq_city": profile.headquarters_city,
+                "has_intl": profile.has_international_locations,
+                "intl_countries": json.dumps(profile.international_countries),
+                "target_markets": json.dumps(profile.target_markets),
+                "jurisdiction": profile.primary_jurisdiction,
+                "is_controller": profile.is_data_controller,
+                "is_processor": profile.is_data_processor,
+                "uses_ai": profile.uses_ai,
+                "ai_use_cases": json.dumps(profile.ai_use_cases),
+                "dpo_name": profile.dpo_name,
+                "dpo_email": profile.dpo_email,
+                "legal_name": profile.legal_contact_name,
+                "legal_email": profile.legal_contact_email,
+                "machine_builder": json.dumps(profile.machine_builder) if profile.machine_builder else None,
+                "is_complete": profile.is_complete,
+            },
+        )
+
+        # Audit log
+        log_audit(db, tid, action, profile.model_dump(), None)
+
+        db.commit()
+
+        # Fetch and return
+        result = db.execute(
+            """SELECT id, tenant_id, company_name, legal_form, industry, founded_year,
+                      business_model, offerings, company_size, employee_count, annual_revenue,
+                      headquarters_country, headquarters_city, has_international_locations,
+                      international_countries, target_markets, primary_jurisdiction,
+                      is_data_controller, is_data_processor, uses_ai, ai_use_cases,
+                      dpo_name, dpo_email, legal_contact_name, legal_contact_email,
+                      machine_builder, is_complete, completed_at, created_at, updated_at
+               FROM compliance_company_profiles WHERE tenant_id = :tid""",
+            {"tid": tid},
+        )
+        row = result.fetchone()
+        return row_to_response(row)
+    except Exception as e:
+        db.rollback()
+        logger.error(f"Failed to upsert company profile: {e}")
+        raise HTTPException(status_code=500, detail="Failed to save company profile")
+    finally:
+        db.close()
+
+
+@router.get("/audit", response_model=AuditListResponse)
+async def get_audit_log(
+    tenant_id: str = "default",
+    x_tenant_id: Optional[str] = Header(None, alias="X-Tenant-ID"),
+):
+    """Get audit log for company profile changes."""
+    tid = x_tenant_id or tenant_id
+    db = SessionLocal()
+    try:
+        result = db.execute(
+            """SELECT id, action, changed_fields, changed_by, created_at
+               FROM compliance_company_profile_audit
+               WHERE tenant_id = :tid
+               ORDER BY created_at DESC
+               LIMIT 100""",
+            {"tid": tid},
+        )
+        rows = result.fetchall()
+        entries = [
+            AuditEntryResponse(
+                id=str(r[0]),
+                action=r[1],
+                changed_fields=r[2] if isinstance(r[2], dict) else None,
+                changed_by=r[3],
+                created_at=str(r[4]),
+            )
+            for r in rows
+        ]
+        return AuditListResponse(entries=entries, total=len(entries))
+    finally:
+        db.close()
@@ -0,0 +1,380 @@
+"""
+FastAPI routes for Document Import and Gap Analysis.
+
+Endpoints:
+- POST /v1/import/analyze: Upload and analyze a compliance document
+- GET /v1/import/documents: List imported documents for a tenant
+- GET /v1/import/gap-analysis/{document_id}: Get gap analysis for a document
+"""
+
+import logging
+import os
+import uuid
+from typing import Optional
+
+import httpx
+from fastapi import APIRouter, File, Form, UploadFile, HTTPException
+from pydantic import BaseModel
+
+from database import SessionLocal
+
+logger = logging.getLogger(__name__)
+router = APIRouter(prefix="/v1/import", tags=["document-import"])
+
+OLLAMA_URL = os.getenv("OLLAMA_URL", "http://host.docker.internal:11434")
+LLM_MODEL = os.getenv("COMPLIANCE_LLM_MODEL", "qwen3:30b-a3b")
+
+# =============================================================================
+# DOCUMENT TYPE DETECTION
+# =============================================================================
+
+DOCUMENT_TYPE_KEYWORDS = {
+    "DSFA": ["datenschutz-folgenabschaetzung", "dsfa", "dpia", "privacy impact"],
+    "TOM": ["technisch-organisatorische", "tom", "massnahmen", "technical measures"],
+    "VVT": ["verarbeitungsverzeichnis", "vvt", "processing activities", "art. 30"],
+    "PRIVACY_POLICY": ["datenschutzerklaerung", "privacy policy", "datenschutzhinweis"],
+    "AGB": ["allgemeine geschaeftsbedingungen", "agb", "terms and conditions"],
+    "COOKIE_POLICY": ["cookie", "tracking", "einwilligung"],
+    "RISK_ASSESSMENT": ["risikobewertung", "risk assessment", "risikoanalyse"],
+    "AUDIT_REPORT": ["audit", "pruefbericht", "zertifizierung"],
+}
+
+
+def detect_document_type(text: str) -> tuple[str, float]:
+    """Detect document type from extracted text using keyword matching."""
+    text_lower = text.lower()
+    scores: dict[str, int] = {}
+
+    for doc_type, keywords in DOCUMENT_TYPE_KEYWORDS.items():
+        score = sum(1 for kw in keywords if kw in text_lower)
+        if score > 0:
+            scores[doc_type] = score
+
+    if not scores:
+        return "OTHER", 0.3
+
+    best_type = max(scores, key=scores.get)
+    confidence = min(0.95, 0.5 + scores[best_type] * 0.15)
+    return best_type, confidence
+
+
+# =============================================================================
+# GAP ANALYSIS
+# =============================================================================
+
+GAP_RULES = [
+    {
+        "category": "AI Act Compliance",
+        "regulation": "EU AI Act Art. 6",
+        "check_keywords": ["ki", "ai", "kuenstliche intelligenz", "machine learning"],
+        "gap_if_missing": ["risikoklassifizierung", "risk classification", "risikokategorie"],
+        "severity": "CRITICAL",
+        "action": "Risikoklassifizierung fuer KI-Systeme durchfuehren",
+    },
+    {
+        "category": "Transparenz",
+        "regulation": "DSGVO Art. 13, 14, 22",
+        "check_keywords": ["automatisiert", "automated", "profiling"],
+        "gap_if_missing": ["informationspflicht", "information obligation", "transparenz"],
+        "severity": "HIGH",
+        "action": "Informationspflichten bei automatisierten Entscheidungen ergaenzen",
+    },
+    {
+        "category": "TOMs",
+        "regulation": "DSGVO Art. 32",
+        "check_keywords": ["ki", "ai", "cloud", "saas"],
+        "gap_if_missing": ["technische massnahmen", "verschluesselung", "encryption"],
+        "severity": "MEDIUM",
+        "action": "Technisch-organisatorische Massnahmen um KI-Aspekte erweitern",
+    },
+    {
+        "category": "VVT",
+        "regulation": "DSGVO Art. 30",
+        "check_keywords": ["verarbeitung", "processing", "daten"],
+        "gap_if_missing": ["verarbeitungsverzeichnis", "vvt", "processing activities"],
+        "severity": "HIGH",
+        "action": "Verarbeitungsverzeichnis aktualisieren",
+    },
+    {
+        "category": "Menschliche Aufsicht",
+        "regulation": "EU AI Act Art. 14",
+        "check_keywords": ["ki", "ai", "autonom", "autonomous"],
+        "gap_if_missing": ["menschliche aufsicht", "human oversight", "human-in-the-loop"],
+        "severity": "MEDIUM",
+        "action": "Prozesse fuer menschliche Aufsicht definieren",
+    },
+]
+
+
+def analyze_gaps(text: str, doc_type: str) -> list[dict]:
+    """Analyze document text for compliance gaps."""
+    text_lower = text.lower()
+    gaps = []
+
+    for rule in GAP_RULES:
+        # Check if rule applies (keywords present in document)
+        applies = any(kw in text_lower for kw in rule["check_keywords"])
+        if not applies:
+            continue
+
+        # Check if gap exists (required elements missing)
+        has_gap = not any(kw in text_lower for kw in rule["gap_if_missing"])
+        if has_gap:
+            gaps.append({
+                "id": f"gap-{uuid.uuid4().hex[:8]}",
+                "category": rule["category"],
+                "description": f"{rule['category']}: Luecke erkannt",
+                "severity": rule["severity"],
+                "regulation": rule["regulation"],
+                "required_action": rule["action"],
+                "related_step_id": doc_type.lower(),
+            })
+
+    return gaps
+
+
+# =============================================================================
+# TEXT EXTRACTION
+# =============================================================================
+
+def extract_text_from_pdf(content: bytes) -> str:
+    """Extract text from PDF using PyMuPDF (fitz)."""
+    try:
+        import fitz
+        doc = fitz.open(stream=content, filetype="pdf")
+        text_parts = []
+        for page in doc:
+            text_parts.append(page.get_text())
+        doc.close()
+        return "\n".join(text_parts)
+    except ImportError:
+        logger.warning("PyMuPDF not available, returning empty text")
+        return ""
+    except Exception as e:
+        logger.error(f"PDF extraction failed: {e}")
+        return ""
+
+
+# =============================================================================
+# LLM CLASSIFICATION (optional enhancement)
+# =============================================================================
+
+async def classify_with_llm(text: str) -> Optional[tuple[str, float]]:
+    """Use Ollama LLM to classify document type (optional, falls back to keywords)."""
+    try:
+        prompt = f"""Klassifiziere das folgende Dokument in eine dieser Kategorien:
+DSFA, TOM, VVT, PRIVACY_POLICY, AGB, COOKIE_POLICY, RISK_ASSESSMENT, AUDIT_REPORT, OTHER
+
+Antworte NUR mit dem Kategorienamen, nichts anderes.
+
+Dokumenttext (erste 2000 Zeichen):
+{text[:2000]}"""
+
+        async with httpx.AsyncClient(timeout=30.0) as client:
+            response = await client.post(
+                f"{OLLAMA_URL}/api/generate",
+                json={
+                    "model": LLM_MODEL,
+                    "prompt": prompt,
+                    "stream": False,
+                    "options": {"temperature": 0.1, "num_predict": 20},
+                },
+            )
+
+            if response.status_code == 200:
+                result = response.json()
+                answer = result.get("response", "").strip().upper()
+                # Validate answer
+                valid_types = {"DSFA", "TOM", "VVT", "PRIVACY_POLICY", "AGB",
+                              "COOKIE_POLICY", "RISK_ASSESSMENT", "AUDIT_REPORT", "OTHER"}
+                if answer in valid_types:
+                    return answer, 0.85
+    except Exception as e:
+        logger.warning(f"LLM classification failed, using keyword fallback: {e}")
+
+    return None
+
+
+# =============================================================================
+# RESPONSE MODELS
+# =============================================================================
+
+class DocumentAnalysisResponse(BaseModel):
+    document_id: str
+    filename: str
+    detected_type: str
+    confidence: float
+    extracted_entities: list[str]
+    recommendations: list[str]
+    gap_analysis: dict
+
+
+class DocumentListResponse(BaseModel):
+    documents: list[dict]
+    total: int
+
+
+# =============================================================================
+# ROUTES
+# =============================================================================
+
+@router.post("/analyze", response_model=DocumentAnalysisResponse)
+async def analyze_document(
+    file: UploadFile = File(...),
+    document_type: str = Form("OTHER"),
+    tenant_id: str = Form("default"),
+):
+    """Upload and analyze a compliance document."""
+    if not file.filename:
+        raise HTTPException(status_code=400, detail="No file provided")
+
+    # Read file content
+    content = await file.read()
+    file_size = len(content)
+
+    # Extract text
+    if file.content_type == "application/pdf" or (file.filename and file.filename.endswith(".pdf")):
+        text = extract_text_from_pdf(content)
+    else:
+        # Try to decode as text
+        try:
+            text = content.decode("utf-8")
+        except UnicodeDecodeError:
+            text = ""
+
+    # Detect document type
+    if document_type == "OTHER" and text:
+        # Try LLM first, fallback to keywords
+        llm_result = await classify_with_llm(text)
+        if llm_result:
+            detected_type, confidence = llm_result
+        else:
+            detected_type, confidence = detect_document_type(text)
+    else:
+        detected_type = document_type
+        confidence = 1.0
+
+    # Extract key entities
+    entities = []
+    entity_keywords = ["DSGVO", "AI Act", "ISO 27001", "NIS2", "BDSG",
+                       "Personenbezogene Daten", "Auftragsverarbeitung", "DSFA"]
+    for kw in entity_keywords:
+        if kw.lower() in text.lower():
+            entities.append(kw)
+
+    # Analyze gaps
+    gaps = analyze_gaps(text, detected_type)
+
+    # Generate recommendations
+    recommendations = []
+    if gaps:
+        recommendations = [g["required_action"] for g in gaps[:5]]
+    if not recommendations:
+        recommendations = ["Dokument erscheint vollstaendig"]
+
+    # Persist to database
+    doc_id = str(uuid.uuid4())
+    db = SessionLocal()
+    try:
+        db.execute(
+            """INSERT INTO compliance_imported_documents
+               (id, tenant_id, filename, file_type, file_size, detected_type, detection_confidence,
+                extracted_text, extracted_entities, recommendations, status, analyzed_at)
+               VALUES (:id, :tenant_id, :filename, :file_type, :file_size, :detected_type, :confidence,
+                       :text, :entities::jsonb, :recommendations::jsonb, 'analyzed', NOW())""",
+            {
+                "id": doc_id,
+                "tenant_id": tenant_id,
+                "filename": file.filename,
+                "file_type": file.content_type or "unknown",
+                "file_size": file_size,
+                "detected_type": detected_type,
+                "confidence": confidence,
+                "text": text[:50000],  # Limit stored text
+                "entities": str(entities).replace("'", '"'),
+                "recommendations": str(recommendations).replace("'", '"'),
+            },
+        )
+
+        # Save gap analysis
+        total_gaps = len(gaps)
+        gap_analysis_result = {
+            "id": f"analysis-{doc_id[:8]}",
+            "total_gaps": total_gaps,
+            "critical_gaps": len([g for g in gaps if g["severity"] == "CRITICAL"]),
+            "high_gaps": len([g for g in gaps if g["severity"] == "HIGH"]),
+            "medium_gaps": len([g for g in gaps if g["severity"] == "MEDIUM"]),
+            "low_gaps": len([g for g in gaps if g["severity"] == "LOW"]),
+            "gaps": gaps,
+            "recommended_packages": ["analyse", "dokumentation"] if total_gaps > 0 else [],
+        }
+
+        if total_gaps > 0:
+            import json
+            db.execute(
+                """INSERT INTO compliance_gap_analyses
+                   (tenant_id, document_id, total_gaps, critical_gaps, high_gaps, medium_gaps, low_gaps, gaps, recommended_packages)
+                   VALUES (:tenant_id, :document_id, :total, :critical, :high, :medium, :low, :gaps::jsonb, :packages::jsonb)""",
+                {
+                    "tenant_id": tenant_id,
+                    "document_id": doc_id,
+                    "total": gap_analysis_result["total_gaps"],
+                    "critical": gap_analysis_result["critical_gaps"],
+                    "high": gap_analysis_result["high_gaps"],
+                    "medium": gap_analysis_result["medium_gaps"],
+                    "low": gap_analysis_result["low_gaps"],
+                    "gaps": json.dumps(gaps),
+                    "packages": json.dumps(gap_analysis_result["recommended_packages"]),
+                },
+            )
+
+        db.commit()
+    except Exception as e:
+        db.rollback()
+        logger.error(f"Failed to persist document analysis: {e}")
+    finally:
+        db.close()
+
+    return DocumentAnalysisResponse(
+        document_id=doc_id,
+        filename=file.filename or "unknown",
+        detected_type=detected_type,
+        confidence=confidence,
+        extracted_entities=entities,
+        recommendations=recommendations,
+        gap_analysis=gap_analysis_result,
+    )
+
+
+@router.get("/documents", response_model=DocumentListResponse)
+async def list_documents(tenant_id: str = "default"):
+    """List all imported documents for a tenant."""
+    db = SessionLocal()
+    try:
+        result = db.execute(
+            """SELECT id, filename, file_type, file_size, detected_type, detection_confidence,
+                      extracted_entities, recommendations, status, analyzed_at, created_at
+               FROM compliance_imported_documents
+               WHERE tenant_id = :tenant_id
+               ORDER BY created_at DESC""",
+            {"tenant_id": tenant_id},
+        )
+        rows = result.fetchall()
+        documents = []
+        for row in rows:
+            documents.append({
+                "id": str(row[0]),
+                "filename": row[1],
+                "file_type": row[2],
+                "file_size": row[3],
+                "detected_type": row[4],
+                "confidence": row[5],
+                "extracted_entities": row[6] or [],
+                "recommendations": row[7] or [],
+                "status": row[8],
+                "analyzed_at": str(row[9]) if row[9] else None,
+                "created_at": str(row[10]),
+            })
+        return DocumentListResponse(documents=documents, total=len(documents))
+    finally:
+        db.close()
@@ -0,0 +1,608 @@
+"""
+FastAPI routes for System Screening (SBOM Generation + Vulnerability Scan).
+
+Endpoints:
+- POST /v1/screening/scan: Upload dependency file, generate SBOM, scan for vulnerabilities
+- GET /v1/screening/{screening_id}: Get screening result by ID
+- GET /v1/screening: List screenings for a tenant
+"""
+
+import json
+import logging
+import re
+import uuid
+from datetime import datetime, timezone
+from typing import Optional
+
+import httpx
+from fastapi import APIRouter, File, Form, UploadFile, HTTPException
+from pydantic import BaseModel
+
+from database import SessionLocal
+
+logger = logging.getLogger(__name__)
+router = APIRouter(prefix="/v1/screening", tags=["system-screening"])
+
+OSV_API_URL = "https://api.osv.dev/v1/query"
+
+
+# =============================================================================
+# RESPONSE MODELS
+# =============================================================================
+
+class SecurityIssueResponse(BaseModel):
+    id: str
+    severity: str
+    title: str
+    description: Optional[str] = None
+    cve: Optional[str] = None
+    cvss: Optional[float] = None
+    affected_component: str
+    affected_version: Optional[str] = None
+    fixed_in: Optional[str] = None
+    remediation: Optional[str] = None
+    status: str = "OPEN"
+
+
+class SBOMComponentResponse(BaseModel):
+    name: str
+    version: str
+    type: str
+    purl: str
+    licenses: list[str]
+    vulnerabilities: list[dict]
+
+
+class ScreeningResponse(BaseModel):
+    id: str
+    status: str
+    sbom_format: str
+    sbom_version: str
+    total_components: int
+    total_issues: int
+    critical_issues: int
+    high_issues: int
+    medium_issues: int
+    low_issues: int
+    components: list[SBOMComponentResponse]
+    issues: list[SecurityIssueResponse]
+    started_at: Optional[str] = None
+    completed_at: Optional[str] = None
+
+
+class ScreeningListResponse(BaseModel):
+    screenings: list[dict]
+    total: int
+
+
+# =============================================================================
+# DEPENDENCY PARSING
+# =============================================================================
+
+def parse_package_lock(content: str) -> list[dict]:
+    """Parse package-lock.json and extract dependencies."""
+    try:
+        data = json.loads(content)
+    except json.JSONDecodeError:
+        return []
+
+    components = []
+
+    # package-lock.json v2/v3 format (packages field)
+    packages = data.get("packages", {})
+    if packages:
+        for path, info in packages.items():
+            if not path:  # Skip root
+                continue
+            name = path.split("node_modules/")[-1] if "node_modules/" in path else path
+            version = info.get("version", "unknown")
+            if name and version != "unknown":
+                components.append({
+                    "name": name,
+                    "version": version,
+                    "type": "library",
+                    "ecosystem": "npm",
+                    "license": info.get("license", "unknown"),
+                })
+
+    # Fallback: v1 format (dependencies field)
+    if not components:
+        dependencies = data.get("dependencies", {})
+        for name, info in dependencies.items():
+            if isinstance(info, dict):
+                components.append({
+                    "name": name,
+                    "version": info.get("version", "unknown"),
+                    "type": "library",
+                    "ecosystem": "npm",
+                    "license": "unknown",
+                })
+
+    return components
+
+
+def parse_requirements_txt(content: str) -> list[dict]:
+    """Parse requirements.txt and extract dependencies."""
+    components = []
+    for line in content.strip().split("\n"):
+        line = line.strip()
+        if not line or line.startswith("#") or line.startswith("-"):
+            continue
+
+        # Match patterns: package==version, package>=version, package~=version
+        match = re.match(r'^([a-zA-Z0-9_.-]+)\s*([>=<~!]+)\s*([a-zA-Z0-9_.*-]+)', line)
+        if match:
+            components.append({
+                "name": match.group(1),
+                "version": match.group(3),
+                "type": "library",
+                "ecosystem": "PyPI",
+                "license": "unknown",
+            })
+        elif re.match(r'^[a-zA-Z0-9_.-]+$', line):
+            components.append({
+                "name": line,
+                "version": "latest",
+                "type": "library",
+                "ecosystem": "PyPI",
+                "license": "unknown",
+            })
+
+    return components
+
+
+def parse_yarn_lock(content: str) -> list[dict]:
+    """Parse yarn.lock and extract dependencies (basic)."""
+    components = []
+    current_name = None
+    for line in content.split("\n"):
+        # Match: "package@version":
+        match = re.match(r'^"?([^@]+)@[^"]*"?:', line)
+        if match:
+            current_name = match.group(1).strip()
+        elif current_name and line.strip().startswith("version "):
+            version_match = re.match(r'\s+version\s+"?([^"]+)"?', line)
+            if version_match:
+                components.append({
+                    "name": current_name,
+                    "version": version_match.group(1),
+                    "type": "library",
+                    "ecosystem": "npm",
+                    "license": "unknown",
+                })
+                current_name = None
+
+    return components
+
+
+def detect_and_parse(filename: str, content: str) -> tuple[list[dict], str]:
+    """Detect file type and parse accordingly."""
+    fname = filename.lower()
+
+    if "package-lock" in fname or fname.endswith("package-lock.json"):
+        return parse_package_lock(content), "npm"
+    elif fname == "requirements.txt" or fname.endswith("/requirements.txt"):
+        return parse_requirements_txt(content), "PyPI"
+    elif "yarn.lock" in fname:
+        return parse_yarn_lock(content), "npm"
+    elif fname.endswith(".json"):
+        # Try package-lock format
+        comps = parse_package_lock(content)
+        if comps:
+            return comps, "npm"
+
+    # Fallback: try requirements.txt format
+    comps = parse_requirements_txt(content)
+    if comps:
+        return comps, "PyPI"
+
+    return [], "unknown"
+
+
+# =============================================================================
+# SBOM GENERATION (CycloneDX format)
+# =============================================================================
+
+def generate_sbom(components: list[dict], ecosystem: str) -> dict:
+    """Generate a CycloneDX 1.5 SBOM from parsed components."""
+    sbom_components = []
+    for comp in components:
+        purl = f"pkg:{ecosystem.lower()}/{comp['name']}@{comp['version']}"
+        sbom_components.append({
+            "type": "library",
+            "name": comp["name"],
+            "version": comp["version"],
+            "purl": purl,
+            "licenses": [comp.get("license", "unknown")] if comp.get("license") != "unknown" else [],
+        })
+
+    return {
+        "bomFormat": "CycloneDX",
+        "specVersion": "1.5",
+        "version": 1,
+        "metadata": {
+            "timestamp": datetime.now(timezone.utc).isoformat(),
+            "tools": [{"name": "breakpilot-screening", "version": "1.0.0"}],
+        },
+        "components": sbom_components,
+    }
+
+
+# =============================================================================
+# VULNERABILITY SCANNING (OSV.dev API)
+# =============================================================================
+
+async def query_osv(name: str, version: str, ecosystem: str) -> list[dict]:
+    """Query OSV.dev API for vulnerabilities of a single package."""
+    try:
+        async with httpx.AsyncClient(timeout=10.0) as client:
+            response = await client.post(
+                OSV_API_URL,
+                json={
+                    "package": {"name": name, "ecosystem": ecosystem},
+                    "version": version,
+                },
+            )
+            if response.status_code == 200:
+                data = response.json()
+                return data.get("vulns", [])
+    except Exception as e:
+        logger.warning(f"OSV query failed for {name}@{version}: {e}")
+
+    return []
+
+
+def map_osv_severity(vuln: dict) -> tuple[str, float]:
+    """Extract severity and CVSS from OSV vulnerability data."""
+    severity = "MEDIUM"
+    cvss = 5.0
+
+    # Check severity array
+    for sev in vuln.get("severity", []):
+        if sev.get("type") == "CVSS_V3":
+            score_str = sev.get("score", "")
+            # Extract base score from CVSS vector
+            try:
+                import re as _re
+                # CVSS vectors don't contain the score directly, try database_specific
+                pass
+            except Exception:
+                pass
+
+    # Check database_specific for severity
+    db_specific = vuln.get("database_specific", {})
+    if "severity" in db_specific:
+        sev_str = db_specific["severity"].upper()
+        if sev_str in ("CRITICAL", "HIGH", "MEDIUM", "LOW"):
+            severity = sev_str
+
+    # Derive CVSS from severity if not found
+    cvss_map = {"CRITICAL": 9.5, "HIGH": 7.5, "MEDIUM": 5.0, "LOW": 2.5}
+    cvss = cvss_map.get(severity, 5.0)
+
+    return severity, cvss
+
+
+def extract_fix_version(vuln: dict, package_name: str) -> Optional[str]:
+    """Extract the fixed-in version from OSV data."""
+    for affected in vuln.get("affected", []):
+        pkg = affected.get("package", {})
+        if pkg.get("name", "").lower() == package_name.lower():
+            for rng in affected.get("ranges", []):
+                for event in rng.get("events", []):
+                    if "fixed" in event:
+                        return event["fixed"]
+    return None
+
+
+async def scan_vulnerabilities(components: list[dict], ecosystem: str) -> list[dict]:
+    """Scan all components for vulnerabilities via OSV.dev."""
+    issues = []
+
+    # Batch: scan up to 50 components to avoid timeouts
+    scan_limit = min(len(components), 50)
+
+    for comp in components[:scan_limit]:
+        if comp["version"] in ("latest", "unknown", "*"):
+            continue
+
+        vulns = await query_osv(comp["name"], comp["version"], ecosystem)
+
+        for vuln in vulns:
+            vuln_id = vuln.get("id", f"OSV-{uuid.uuid4().hex[:8]}")
+            aliases = vuln.get("aliases", [])
+            cve = next((a for a in aliases if a.startswith("CVE-")), None)
+            severity, cvss = map_osv_severity(vuln)
+            fixed_in = extract_fix_version(vuln, comp["name"])
+
+            issues.append({
+                "id": str(uuid.uuid4()),
+                "severity": severity,
+                "title": vuln.get("summary", vuln_id),
+                "description": vuln.get("details", "")[:500],
+                "cve": cve,
+                "cvss": cvss,
+                "affected_component": comp["name"],
+                "affected_version": comp["version"],
+                "fixed_in": fixed_in,
+                "remediation": f"Upgrade {comp['name']} to {fixed_in}" if fixed_in else f"Check {vuln_id} for remediation steps",
+                "status": "OPEN",
+            })
+
+    return issues
+
+
+# =============================================================================
+# ROUTES
+# =============================================================================
+
+@router.post("/scan", response_model=ScreeningResponse)
+async def scan_dependencies(
+    file: UploadFile = File(...),
+    tenant_id: str = Form("default"),
+):
+    """Upload a dependency file, generate SBOM, and scan for vulnerabilities."""
+    if not file.filename:
+        raise HTTPException(status_code=400, detail="No file provided")
+
+    content = await file.read()
+    try:
+        text = content.decode("utf-8")
+    except UnicodeDecodeError:
+        raise HTTPException(status_code=400, detail="File must be a text-based dependency file")
+
+    # Parse dependencies
+    components, ecosystem = detect_and_parse(file.filename, text)
+    if not components:
+        raise HTTPException(
+            status_code=400,
+            detail="Could not parse dependencies. Supported: package-lock.json, requirements.txt, yarn.lock",
+        )
+
+    # Generate SBOM
+    sbom = generate_sbom(components, ecosystem)
+
+    # Scan for vulnerabilities
+    started_at = datetime.now(timezone.utc)
+    issues = await scan_vulnerabilities(components, ecosystem)
+    completed_at = datetime.now(timezone.utc)
+
+    # Count severities
+    critical = len([i for i in issues if i["severity"] == "CRITICAL"])
+    high = len([i for i in issues if i["severity"] == "HIGH"])
+    medium = len([i for i in issues if i["severity"] == "MEDIUM"])
+    low = len([i for i in issues if i["severity"] == "LOW"])
+
+    # Persist to database
+    screening_id = str(uuid.uuid4())
+    db = SessionLocal()
+    try:
+        db.execute(
+            """INSERT INTO compliance_screenings
+               (id, tenant_id, status, sbom_format, sbom_version,
+                total_components, total_issues, critical_issues, high_issues, medium_issues, low_issues,
+                sbom_data, started_at, completed_at)
+               VALUES (:id, :tenant_id, 'completed', 'CycloneDX', '1.5',
+                       :total_components, :total_issues, :critical, :high, :medium, :low,
+                       :sbom_data::jsonb, :started_at, :completed_at)""",
+            {
+                "id": screening_id,
+                "tenant_id": tenant_id,
+                "total_components": len(components),
+                "total_issues": len(issues),
+                "critical": critical,
+                "high": high,
+                "medium": medium,
+                "low": low,
+                "sbom_data": json.dumps(sbom),
+                "started_at": started_at,
+                "completed_at": completed_at,
+            },
+        )
+
+        # Persist security issues
+        for issue in issues:
+            db.execute(
+                """INSERT INTO compliance_security_issues
+                   (id, screening_id, severity, title, description, cve, cvss,
+                    affected_component, affected_version, fixed_in, remediation, status)
+                   VALUES (:id, :screening_id, :severity, :title, :description, :cve, :cvss,
+                           :component, :version, :fixed_in, :remediation, :status)""",
+                {
+                    "id": issue["id"],
+                    "screening_id": screening_id,
+                    "severity": issue["severity"],
+                    "title": issue["title"][:500],
+                    "description": issue.get("description", "")[:1000],
+                    "cve": issue.get("cve"),
+                    "cvss": issue.get("cvss"),
+                    "component": issue["affected_component"],
+                    "version": issue.get("affected_version"),
+                    "fixed_in": issue.get("fixed_in"),
+                    "remediation": issue.get("remediation"),
+                    "status": issue["status"],
+                },
+            )
+
+        db.commit()
+    except Exception as e:
+        db.rollback()
+        logger.error(f"Failed to persist screening: {e}")
+    finally:
+        db.close()
+
+    # Build response
+    sbom_components = []
+    comp_vulns: dict[str, list[dict]] = {}
+    for issue in issues:
+        comp_name = issue["affected_component"]
+        if comp_name not in comp_vulns:
+            comp_vulns[comp_name] = []
+        comp_vulns[comp_name].append({
+            "id": issue.get("cve") or issue["id"],
+            "cve": issue.get("cve"),
+            "severity": issue["severity"],
+            "title": issue["title"],
+            "cvss": issue.get("cvss"),
+            "fixedIn": issue.get("fixed_in"),
+        })
+
+    for sc in sbom["components"]:
+        sbom_components.append(SBOMComponentResponse(
+            name=sc["name"],
+            version=sc["version"],
+            type=sc["type"],
+            purl=sc["purl"],
+            licenses=sc.get("licenses", []),
+            vulnerabilities=comp_vulns.get(sc["name"], []),
+        ))
+
+    issue_responses = [
+        SecurityIssueResponse(
+            id=i["id"],
+            severity=i["severity"],
+            title=i["title"],
+            description=i.get("description"),
+            cve=i.get("cve"),
+            cvss=i.get("cvss"),
+            affected_component=i["affected_component"],
+            affected_version=i.get("affected_version"),
+            fixed_in=i.get("fixed_in"),
+            remediation=i.get("remediation"),
+            status=i["status"],
+        )
+        for i in issues
+    ]
+
+    return ScreeningResponse(
+        id=screening_id,
+        status="completed",
+        sbom_format="CycloneDX",
+        sbom_version="1.5",
+        total_components=len(components),
+        total_issues=len(issues),
+        critical_issues=critical,
+        high_issues=high,
+        medium_issues=medium,
+        low_issues=low,
+        components=sbom_components,
+        issues=issue_responses,
+        started_at=started_at.isoformat(),
+        completed_at=completed_at.isoformat(),
+    )
+
+
+@router.get("/{screening_id}", response_model=ScreeningResponse)
+async def get_screening(screening_id: str):
+    """Get a screening result by ID."""
+    db = SessionLocal()
+    try:
+        result = db.execute(
+            """SELECT id, status, sbom_format, sbom_version,
+                      total_components, total_issues, critical_issues, high_issues,
+                      medium_issues, low_issues, sbom_data, started_at, completed_at
+               FROM compliance_screenings WHERE id = :id""",
+            {"id": screening_id},
+        )
+        row = result.fetchone()
+        if not row:
+            raise HTTPException(status_code=404, detail="Screening not found")
+
+        # Fetch issues
+        issues_result = db.execute(
+            """SELECT id, severity, title, description, cve, cvss,
+                      affected_component, affected_version, fixed_in, remediation, status
+               FROM compliance_security_issues WHERE screening_id = :id""",
+            {"id": screening_id},
+        )
+        issues_rows = issues_result.fetchall()
+
+        issues = [
+            SecurityIssueResponse(
+                id=str(r[0]), severity=r[1], title=r[2], description=r[3],
+                cve=r[4], cvss=r[5], affected_component=r[6],
+                affected_version=r[7], fixed_in=r[8], remediation=r[9], status=r[10],
+            )
+            for r in issues_rows
+        ]
+
+        # Reconstruct components from SBOM data
+        sbom_data = row[10] or {}
+        components = []
+        comp_vulns: dict[str, list[dict]] = {}
+        for issue in issues:
+            if issue.affected_component not in comp_vulns:
+                comp_vulns[issue.affected_component] = []
+            comp_vulns[issue.affected_component].append({
+                "id": issue.cve or issue.id,
+                "cve": issue.cve,
+                "severity": issue.severity,
+                "title": issue.title,
+                "cvss": issue.cvss,
+                "fixedIn": issue.fixed_in,
+            })
+
+        for sc in sbom_data.get("components", []):
+            components.append(SBOMComponentResponse(
+                name=sc["name"],
+                version=sc["version"],
+                type=sc.get("type", "library"),
+                purl=sc.get("purl", ""),
+                licenses=sc.get("licenses", []),
+                vulnerabilities=comp_vulns.get(sc["name"], []),
+            ))
+
+        return ScreeningResponse(
+            id=str(row[0]),
+            status=row[1],
+            sbom_format=row[2] or "CycloneDX",
+            sbom_version=row[3] or "1.5",
+            total_components=row[4] or 0,
+            total_issues=row[5] or 0,
+            critical_issues=row[6] or 0,
+            high_issues=row[7] or 0,
+            medium_issues=row[8] or 0,
+            low_issues=row[9] or 0,
+            components=components,
+            issues=issues,
+            started_at=str(row[11]) if row[11] else None,
+            completed_at=str(row[12]) if row[12] else None,
+        )
+    finally:
+        db.close()
+
+
+@router.get("", response_model=ScreeningListResponse)
+async def list_screenings(tenant_id: str = "default"):
+    """List all screenings for a tenant."""
+    db = SessionLocal()
+    try:
+        result = db.execute(
+            """SELECT id, status, total_components, total_issues,
+                      critical_issues, high_issues, medium_issues, low_issues,
+                      started_at, completed_at, created_at
+               FROM compliance_screenings
+               WHERE tenant_id = :tenant_id
+               ORDER BY created_at DESC""",
+            {"tenant_id": tenant_id},
+        )
+        rows = result.fetchall()
+        screenings = [
+            {
+                "id": str(r[0]),
+                "status": r[1],
+                "total_components": r[2],
+                "total_issues": r[3],
+                "critical_issues": r[4],
+                "high_issues": r[5],
+                "medium_issues": r[6],
+                "low_issues": r[7],
+                "started_at": str(r[8]) if r[8] else None,
+                "completed_at": str(r[9]) if r[9] else None,
+                "created_at": str(r[10]),
+            }
+            for r in rows
+        ]
+        return ScreeningListResponse(screenings=screenings, total=len(screenings))
+    finally:
+        db.close()
@@ -24,6 +24,13 @@ from compliance.api import router as compliance_framework_router
 # Source Policy
 from compliance.api.source_policy_router import router as source_policy_router

+# Document Import & Screening
+from compliance.api.import_routes import router as import_router
+from compliance.api.screening_routes import router as screening_router
+
+# Company Profile
+from compliance.api.company_profile_routes import router as company_profile_router
+
 # Middleware
 from middleware import (
    RequestIDMiddleware,
@@ -91,6 +98,15 @@ app.include_router(compliance_framework_router, prefix="/api")
 # Source Policy (allowed sources, PII rules, audit)
 app.include_router(source_policy_router, prefix="/api")

+# Document Import (PDF analysis, gap detection)
+app.include_router(import_router, prefix="/api")
+
+# System Screening (SBOM generation, vulnerability scan)
+app.include_router(screening_router, prefix="/api")
+
+# Company Profile (CRUD with audit logging)
+app.include_router(company_profile_router, prefix="/api")
+

 if __name__ == "__main__":
    import uvicorn
@@ -0,0 +1,19 @@
+-- =============================================================================
+-- Migration 002: SDK States Table
+--
+-- Persistent storage for SDK state management.
+-- Replaces the in-memory store used during development.
+-- =============================================================================
+
+CREATE TABLE IF NOT EXISTS sdk_states (
+    id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
+    tenant_id VARCHAR(255) NOT NULL UNIQUE,
+    user_id VARCHAR(255),
+    state JSONB NOT NULL,
+    version INTEGER DEFAULT 1,
+    created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
+    updated_at TIMESTAMPTZ NOT NULL DEFAULT NOW()
+);
+
+CREATE INDEX IF NOT EXISTS idx_sdk_states_tenant ON sdk_states(tenant_id);
+CREATE INDEX IF NOT EXISTS idx_sdk_states_updated ON sdk_states(updated_at);
@@ -0,0 +1,41 @@
+-- =============================================================================
+-- Migration 003: Document Import Tables
+--
+-- Tables for imported compliance documents and gap analysis results.
+-- =============================================================================
+
+CREATE TABLE IF NOT EXISTS compliance_imported_documents (
+    id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
+    tenant_id VARCHAR(255) NOT NULL,
+    filename VARCHAR(500) NOT NULL,
+    file_type VARCHAR(50) NOT NULL,
+    file_size INTEGER,
+    detected_type VARCHAR(50),
+    detection_confidence FLOAT,
+    extracted_text TEXT,
+    extracted_entities JSONB DEFAULT '[]',
+    recommendations JSONB DEFAULT '[]',
+    status VARCHAR(20) DEFAULT 'pending',
+    analyzed_at TIMESTAMPTZ,
+    created_at TIMESTAMPTZ NOT NULL DEFAULT NOW()
+);
+
+CREATE INDEX IF NOT EXISTS idx_imported_docs_tenant ON compliance_imported_documents(tenant_id);
+CREATE INDEX IF NOT EXISTS idx_imported_docs_status ON compliance_imported_documents(status);
+
+CREATE TABLE IF NOT EXISTS compliance_gap_analyses (
+    id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
+    tenant_id VARCHAR(255) NOT NULL,
+    document_id UUID REFERENCES compliance_imported_documents(id) ON DELETE CASCADE,
+    total_gaps INTEGER DEFAULT 0,
+    critical_gaps INTEGER DEFAULT 0,
+    high_gaps INTEGER DEFAULT 0,
+    medium_gaps INTEGER DEFAULT 0,
+    low_gaps INTEGER DEFAULT 0,
+    gaps JSONB DEFAULT '[]',
+    recommended_packages JSONB DEFAULT '[]',
+    created_at TIMESTAMPTZ NOT NULL DEFAULT NOW()
+);
+
+CREATE INDEX IF NOT EXISTS idx_gap_analyses_tenant ON compliance_gap_analyses(tenant_id);
+CREATE INDEX IF NOT EXISTS idx_gap_analyses_document ON compliance_gap_analyses(document_id);
@@ -0,0 +1,45 @@
+-- =============================================================================
+-- Migration 004: System Screening Tables
+--
+-- Tables for SBOM generation and vulnerability scanning results.
+-- =============================================================================
+
+CREATE TABLE IF NOT EXISTS compliance_screenings (
+    id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
+    tenant_id VARCHAR(255) NOT NULL,
+    status VARCHAR(20) DEFAULT 'pending',
+    sbom_format VARCHAR(50) DEFAULT 'CycloneDX',
+    sbom_version VARCHAR(20) DEFAULT '1.5',
+    total_components INTEGER DEFAULT 0,
+    total_issues INTEGER DEFAULT 0,
+    critical_issues INTEGER DEFAULT 0,
+    high_issues INTEGER DEFAULT 0,
+    medium_issues INTEGER DEFAULT 0,
+    low_issues INTEGER DEFAULT 0,
+    sbom_data JSONB,
+    started_at TIMESTAMPTZ,
+    completed_at TIMESTAMPTZ,
+    created_at TIMESTAMPTZ NOT NULL DEFAULT NOW()
+);
+
+CREATE INDEX IF NOT EXISTS idx_screenings_tenant ON compliance_screenings(tenant_id);
+CREATE INDEX IF NOT EXISTS idx_screenings_status ON compliance_screenings(status);
+
+CREATE TABLE IF NOT EXISTS compliance_security_issues (
+    id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
+    screening_id UUID NOT NULL REFERENCES compliance_screenings(id) ON DELETE CASCADE,
+    severity VARCHAR(20) NOT NULL,
+    title VARCHAR(500) NOT NULL,
+    description TEXT,
+    cve VARCHAR(50),
+    cvss FLOAT,
+    affected_component VARCHAR(255),
+    affected_version VARCHAR(100),
+    fixed_in VARCHAR(100),
+    remediation TEXT,
+    status VARCHAR(20) DEFAULT 'OPEN',
+    created_at TIMESTAMPTZ NOT NULL DEFAULT NOW()
+);
+
+CREATE INDEX IF NOT EXISTS idx_security_issues_screening ON compliance_security_issues(screening_id);
+CREATE INDEX IF NOT EXISTS idx_security_issues_severity ON compliance_security_issues(severity);
@@ -0,0 +1,74 @@
+-- =============================================================================
+-- Migration 005: Company Profile Table
+--
+-- Dedicated table for company profiles with audit logging.
+-- =============================================================================
+
+CREATE TABLE IF NOT EXISTS compliance_company_profiles (
+    id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
+    tenant_id VARCHAR(255) NOT NULL UNIQUE,
+
+    -- Basic Info
+    company_name VARCHAR(500) NOT NULL DEFAULT '',
+    legal_form VARCHAR(50) DEFAULT 'GmbH',
+    industry VARCHAR(255) DEFAULT '',
+    founded_year INTEGER,
+
+    -- Business Model
+    business_model VARCHAR(20) DEFAULT 'B2B',
+    offerings JSONB DEFAULT '[]'::jsonb,
+
+    -- Size & Scope
+    company_size VARCHAR(20) DEFAULT 'small',
+    employee_count VARCHAR(20) DEFAULT '1-9',
+    annual_revenue VARCHAR(50) DEFAULT '< 2 Mio',
+
+    -- Locations
+    headquarters_country VARCHAR(10) DEFAULT 'DE',
+    headquarters_city VARCHAR(255) DEFAULT '',
+    has_international_locations BOOLEAN DEFAULT FALSE,
+    international_countries JSONB DEFAULT '[]'::jsonb,
+
+    -- Target Markets & Legal Scope
+    target_markets JSONB DEFAULT '["DE"]'::jsonb,
+    primary_jurisdiction VARCHAR(10) DEFAULT 'DE',
+
+    -- Data Processing Role
+    is_data_controller BOOLEAN DEFAULT TRUE,
+    is_data_processor BOOLEAN DEFAULT FALSE,
+
+    -- AI Usage
+    uses_ai BOOLEAN DEFAULT FALSE,
+    ai_use_cases JSONB DEFAULT '[]'::jsonb,
+
+    -- Contact Persons
+    dpo_name VARCHAR(255),
+    dpo_email VARCHAR(255),
+    legal_contact_name VARCHAR(255),
+    legal_contact_email VARCHAR(255),
+
+    -- Machine Builder Profile (optional)
+    machine_builder JSONB,
+
+    -- Completion
+    is_complete BOOLEAN DEFAULT FALSE,
+    completed_at TIMESTAMPTZ,
+
+    -- Timestamps
+    created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
+    updated_at TIMESTAMPTZ NOT NULL DEFAULT NOW()
+);
+
+CREATE INDEX IF NOT EXISTS idx_company_profiles_tenant ON compliance_company_profiles(tenant_id);
+
+-- Audit log for company profile changes
+CREATE TABLE IF NOT EXISTS compliance_company_profile_audit (
+    id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
+    tenant_id VARCHAR(255) NOT NULL,
+    action VARCHAR(20) NOT NULL,
+    changed_fields JSONB,
+    changed_by VARCHAR(255),
+    created_at TIMESTAMPTZ NOT NULL DEFAULT NOW()
+);
+
+CREATE INDEX IF NOT EXISTS idx_company_profile_audit_tenant ON compliance_company_profile_audit(tenant_id);
@@ -30,6 +30,9 @@ Jinja2==3.1.6
 mammoth==1.11.0
 Markdown==3.9

+# PDF Text Extraction (document import analysis)
+PyMuPDF==1.25.3
+
 # Utilities
 python-dateutil==2.9.0.post0

@@ -0,0 +1,134 @@
+"""Tests for Company Profile routes (company_profile_routes.py)."""
+
+import json
+import pytest
+from unittest.mock import MagicMock, patch
+
+from compliance.api.company_profile_routes import (
+    CompanyProfileRequest,
+    row_to_response,
+    log_audit,
+)
+
+
+class TestCompanyProfileRequest:
+    """Tests for request model defaults."""
+
+    def test_default_values(self):
+        req = CompanyProfileRequest()
+        assert req.company_name == ""
+        assert req.legal_form == "GmbH"
+        assert req.business_model == "B2B"
+        assert req.company_size == "small"
+        assert req.headquarters_country == "DE"
+        assert req.is_data_controller is True
+        assert req.is_data_processor is False
+        assert req.uses_ai is False
+        assert req.is_complete is False
+
+    def test_custom_values(self):
+        req = CompanyProfileRequest(
+            company_name="Test GmbH",
+            industry="Software",
+            uses_ai=True,
+            ai_use_cases=["Chatbot", "Analytics"],
+            offerings=["app_web", "software_saas"],
+        )
+        assert req.company_name == "Test GmbH"
+        assert req.uses_ai is True
+        assert len(req.ai_use_cases) == 2
+        assert len(req.offerings) == 2
+
+    def test_serialization(self):
+        req = CompanyProfileRequest(company_name="Test")
+        data = req.model_dump()
+        assert data["company_name"] == "Test"
+        assert isinstance(data["target_markets"], list)
+
+
+class TestRowToResponse:
+    """Tests for DB row to response conversion."""
+
+    def _make_row(self, **overrides):
+        """Create a mock DB row with 30 fields."""
+        defaults = [
+            "uuid-123",      # 0: id
+            "default",       # 1: tenant_id
+            "Test GmbH",     # 2: company_name
+            "GmbH",          # 3: legal_form
+            "IT",            # 4: industry
+            2020,            # 5: founded_year
+            "B2B",           # 6: business_model
+            ["app_web"],     # 7: offerings
+            "small",         # 8: company_size
+            "10-49",         # 9: employee_count
+            "2-10 Mio",      # 10: annual_revenue
+            "DE",            # 11: headquarters_country
+            "Berlin",        # 12: headquarters_city
+            False,           # 13: has_international_locations
+            [],              # 14: international_countries
+            ["DE", "AT"],    # 15: target_markets
+            "DE",            # 16: primary_jurisdiction
+            True,            # 17: is_data_controller
+            False,           # 18: is_data_processor
+            False,           # 19: uses_ai
+            [],              # 20: ai_use_cases
+            "Max Muster",    # 21: dpo_name
+            "dpo@test.de",   # 22: dpo_email
+            None,            # 23: legal_contact_name
+            None,            # 24: legal_contact_email
+            None,            # 25: machine_builder
+            True,            # 26: is_complete
+            "2026-01-01",    # 27: completed_at
+            "2026-01-01",    # 28: created_at
+            "2026-01-01",    # 29: updated_at
+        ]
+        return tuple(defaults)
+
+    def test_basic_conversion(self):
+        row = self._make_row()
+        response = row_to_response(row)
+        assert response.id == "uuid-123"
+        assert response.tenant_id == "default"
+        assert response.company_name == "Test GmbH"
+        assert response.is_complete is True
+
+    def test_none_values_handled(self):
+        row = list(self._make_row())
+        row[5] = None  # founded_year
+        row[21] = None  # dpo_name
+        row[25] = None  # machine_builder
+        row[27] = None  # completed_at
+        response = row_to_response(tuple(row))
+        assert response.founded_year is None
+        assert response.dpo_name is None
+        assert response.machine_builder is None
+        assert response.completed_at is None
+
+    def test_non_list_jsonb_handled(self):
+        row = list(self._make_row())
+        row[7] = None  # offerings (JSONB could be None)
+        row[14] = None  # international_countries
+        response = row_to_response(tuple(row))
+        assert response.offerings == []
+        assert response.international_countries == []
+
+
+class TestLogAudit:
+    """Tests for audit logging helper."""
+
+    def test_log_audit_success(self):
+        db = MagicMock()
+        log_audit(db, "tenant-1", "create", {"company_name": "Test"}, "admin")
+        db.execute.assert_called_once()
+
+    def test_log_audit_with_none_fields(self):
+        db = MagicMock()
+        log_audit(db, "tenant-1", "update", None, None)
+        db.execute.assert_called_once()
+
+    def test_log_audit_db_error_handled(self):
+        db = MagicMock()
+        db.execute.side_effect = Exception("DB error")
+        # Should not raise
+        log_audit(db, "tenant-1", "create", {}, "admin")
@@ -0,0 +1,123 @@
+"""Tests for Document Import routes (import_routes.py)."""
+
+import pytest
+from unittest.mock import MagicMock, patch, AsyncMock
+
+from compliance.api.import_routes import (
+    detect_document_type,
+    analyze_gaps,
+    extract_text_from_pdf,
+)
+
+
+class TestDetectDocumentType:
+    """Tests for keyword-based document type detection."""
+
+    def test_dsfa_detection(self):
+        text = "Dies ist eine Datenschutz-Folgenabschaetzung (DSFA) nach Art. 35 DSGVO"
+        doc_type, confidence = detect_document_type(text)
+        assert doc_type == "DSFA"
+        assert confidence >= 0.5
+
+    def test_tom_detection(self):
+        text = "Technisch-organisatorische Massnahmen (TOM) zum Schutz personenbezogener Daten"
+        doc_type, confidence = detect_document_type(text)
+        assert doc_type == "TOM"
+        assert confidence >= 0.5
+
+    def test_vvt_detection(self):
+        text = "Verarbeitungsverzeichnis nach Art. 30 DSGVO - VVT processing activities"
+        doc_type, confidence = detect_document_type(text)
+        assert doc_type == "VVT"
+        assert confidence >= 0.5
+
+    def test_privacy_policy_detection(self):
+        text = "Datenschutzerklaerung - Privacy Policy fuer unsere Nutzer"
+        doc_type, confidence = detect_document_type(text)
+        assert doc_type == "PRIVACY_POLICY"
+        assert confidence >= 0.5
+
+    def test_unknown_document(self):
+        text = "Lorem ipsum dolor sit amet"
+        doc_type, confidence = detect_document_type(text)
+        assert doc_type == "OTHER"
+        assert confidence == 0.3
+
+    def test_empty_text(self):
+        doc_type, confidence = detect_document_type("")
+        assert doc_type == "OTHER"
+        assert confidence == 0.3
+
+    def test_confidence_increases_with_more_keywords(self):
+        text_single = "dsfa"
+        text_multi = "dsfa dpia datenschutz-folgenabschaetzung privacy impact"
+        _, conf_single = detect_document_type(text_single)
+        _, conf_multi = detect_document_type(text_multi)
+        assert conf_multi > conf_single
+
+    def test_confidence_capped_at_095(self):
+        text = "dsfa dpia datenschutz-folgenabschaetzung privacy impact assessment report analysis"
+        _, confidence = detect_document_type(text)
+        assert confidence <= 0.95
+
+
+class TestAnalyzeGaps:
+    """Tests for gap analysis rules."""
+
+    def test_ai_gap_detected(self):
+        text = "Wir setzen KI und AI in unserer Anwendung ein"
+        gaps = analyze_gaps(text, "OTHER")
+        # Should detect AI Act gap (missing risk classification)
+        ai_gaps = [g for g in gaps if g["category"] == "AI Act Compliance"]
+        assert len(ai_gaps) > 0
+        assert ai_gaps[0]["severity"] == "CRITICAL"
+
+    def test_no_gap_when_requirement_present(self):
+        text = "KI-System mit Risikoklassifizierung nach EU AI Act"
+        gaps = analyze_gaps(text, "OTHER")
+        ai_gaps = [g for g in gaps if g["category"] == "AI Act Compliance"]
+        assert len(ai_gaps) == 0
+
+    def test_tom_gap_detected(self):
+        text = "Cloud-basiertes SaaS-System mit KI-Funktionen"
+        gaps = analyze_gaps(text, "OTHER")
+        tom_gaps = [g for g in gaps if g["category"] == "TOMs"]
+        assert len(tom_gaps) > 0
+
+    def test_no_gaps_for_irrelevant_text(self):
+        text = "Ein einfacher Flyer ohne Datenbezug"
+        gaps = analyze_gaps(text, "OTHER")
+        assert len(gaps) == 0
+
+    def test_gap_has_required_fields(self):
+        text = "KI-System mit automatisierten Entscheidungen"
+        gaps = analyze_gaps(text, "OTHER")
+        assert len(gaps) > 0
+        for gap in gaps:
+            assert "id" in gap
+            assert "category" in gap
+            assert "severity" in gap
+            assert "regulation" in gap
+            assert "required_action" in gap
+
+
+class TestExtractTextFromPdf:
+    """Tests for PDF text extraction."""
+
+    def test_empty_bytes_returns_empty(self):
+        result = extract_text_from_pdf(b"")
+        assert result == ""
+
+    def test_invalid_pdf_returns_empty(self):
+        result = extract_text_from_pdf(b"not a pdf")
+        assert result == ""
+
+    @patch("compliance.api.import_routes.fitz")
+    def test_fitz_import_error(self, mock_fitz):
+        """When fitz is not available, returns empty string."""
+        mock_fitz.open.side_effect = ImportError("No module")
+        # The actual function catches ImportError internally
+        result = extract_text_from_pdf(b"test")
+        # Since we mocked fitz at module level it will raise differently,
+        # but the function should handle it gracefully
+        assert isinstance(result, str)
@@ -0,0 +1,191 @@
+"""Tests for System Screening routes (screening_routes.py)."""
+
+import json
+import pytest
+from unittest.mock import AsyncMock, patch
+
+from compliance.api.screening_routes import (
+    parse_package_lock,
+    parse_requirements_txt,
+    parse_yarn_lock,
+    detect_and_parse,
+    generate_sbom,
+    map_osv_severity,
+    extract_fix_version,
+)
+
+
+class TestParsePackageLock:
+    """Tests for package-lock.json parsing."""
+
+    def test_v2_format(self):
+        data = json.dumps({
+            "packages": {
+                "": {"name": "my-app", "version": "1.0.0"},
+                "node_modules/react": {"version": "18.3.0", "license": "MIT"},
+                "node_modules/lodash": {"version": "4.17.21", "license": "MIT"},
+            }
+        })
+        components = parse_package_lock(data)
+        assert len(components) == 2
+        names = [c["name"] for c in components]
+        assert "react" in names
+        assert "lodash" in names
+
+    def test_v1_format(self):
+        data = json.dumps({
+            "dependencies": {
+                "express": {"version": "4.18.2"},
+                "cors": {"version": "2.8.5"},
+            }
+        })
+        components = parse_package_lock(data)
+        assert len(components) == 2
+
+    def test_empty_json(self):
+        assert parse_package_lock("{}") == []
+
+    def test_invalid_json(self):
+        assert parse_package_lock("not json") == []
+
+    def test_root_package_skipped(self):
+        data = json.dumps({
+            "packages": {
+                "": {"name": "root", "version": "1.0.0"},
+            }
+        })
+        components = parse_package_lock(data)
+        assert len(components) == 0
+
+
+class TestParseRequirementsTxt:
+    """Tests for requirements.txt parsing."""
+
+    def test_pinned_versions(self):
+        content = "fastapi==0.123.9\nuvicorn==0.38.0\npydantic==2.12.5"
+        components = parse_requirements_txt(content)
+        assert len(components) == 3
+        assert components[0]["name"] == "fastapi"
+        assert components[0]["version"] == "0.123.9"
+        assert components[0]["ecosystem"] == "PyPI"
+
+    def test_minimum_versions(self):
+        content = "idna>=3.7\ncryptography>=42.0.0"
+        components = parse_requirements_txt(content)
+        assert len(components) == 2
+        assert components[0]["version"] == "3.7"
+
+    def test_comments_and_blanks_ignored(self):
+        content = "# Comment\n\nfastapi==1.0.0\n# Another comment\n-r base.txt"
+        components = parse_requirements_txt(content)
+        assert len(components) == 1
+
+    def test_bare_package_name(self):
+        content = "requests"
+        components = parse_requirements_txt(content)
+        assert len(components) == 1
+        assert components[0]["version"] == "latest"
+
+    def test_empty_content(self):
+        assert parse_requirements_txt("") == []
+
+
+class TestParseYarnLock:
+    """Tests for yarn.lock parsing (basic)."""
+
+    def test_basic_format(self):
+        content = '"react@^18.0.0":\n  version "18.3.0"\n"lodash@^4.17.0":\n  version "4.17.21"'
+        components = parse_yarn_lock(content)
+        assert len(components) == 2
+
+
+class TestDetectAndParse:
+    """Tests for file type detection and parsing."""
+
+    def test_package_lock_detection(self):
+        data = json.dumps({"packages": {"node_modules/x": {"version": "1.0"}}})
+        components, ecosystem = detect_and_parse("package-lock.json", data)
+        assert ecosystem == "npm"
+        assert len(components) == 1
+
+    def test_requirements_detection(self):
+        components, ecosystem = detect_and_parse("requirements.txt", "flask==2.0.0")
+        assert ecosystem == "PyPI"
+        assert len(components) == 1
+
+    def test_unknown_format(self):
+        components, ecosystem = detect_and_parse("readme.md", "Hello World")
+        assert len(components) == 0
+
+
+class TestGenerateSbom:
+    """Tests for CycloneDX SBOM generation."""
+
+    def test_sbom_structure(self):
+        components = [
+            {"name": "react", "version": "18.3.0", "type": "library", "ecosystem": "npm", "license": "MIT"},
+        ]
+        sbom = generate_sbom(components, "npm")
+        assert sbom["bomFormat"] == "CycloneDX"
+        assert sbom["specVersion"] == "1.5"
+        assert len(sbom["components"]) == 1
+        assert sbom["components"][0]["purl"] == "pkg:npm/react@18.3.0"
+
+    def test_sbom_empty_components(self):
+        sbom = generate_sbom([], "npm")
+        assert sbom["components"] == []
+
+    def test_sbom_unknown_license_excluded(self):
+        components = [
+            {"name": "x", "version": "1.0", "type": "library", "ecosystem": "npm", "license": "unknown"},
+        ]
+        sbom = generate_sbom(components, "npm")
+        assert sbom["components"][0]["licenses"] == []
+
+
+class TestMapOsvSeverity:
+    """Tests for OSV severity mapping."""
+
+    def test_critical_severity(self):
+        vuln = {"database_specific": {"severity": "CRITICAL"}}
+        severity, cvss = map_osv_severity(vuln)
+        assert severity == "CRITICAL"
+        assert cvss == 9.5
+
+    def test_medium_default(self):
+        vuln = {}
+        severity, cvss = map_osv_severity(vuln)
+        assert severity == "MEDIUM"
+        assert cvss == 5.0
+
+    def test_low_severity(self):
+        vuln = {"database_specific": {"severity": "LOW"}}
+        severity, cvss = map_osv_severity(vuln)
+        assert severity == "LOW"
+        assert cvss == 2.5
+
+
+class TestExtractFixVersion:
+    """Tests for extracting fix version from OSV data."""
+
+    def test_fix_version_found(self):
+        vuln = {
+            "affected": [{
+                "package": {"name": "lodash"},
+                "ranges": [{"events": [{"introduced": "0"}, {"fixed": "4.17.21"}]}],
+            }]
+        }
+        assert extract_fix_version(vuln, "lodash") == "4.17.21"
+
+    def test_no_fix_version(self):
+        vuln = {"affected": [{"package": {"name": "x"}, "ranges": [{"events": [{"introduced": "0"}]}]}]}
+        assert extract_fix_version(vuln, "x") is None
+
+    def test_wrong_package_name(self):
+        vuln = {
+            "affected": [{
+                "package": {"name": "other"},
+                "ranges": [{"events": [{"fixed": "1.0"}]}],
+            }]
+        }
+        assert extract_fix_version(vuln, "lodash") is None