fix(quality): Ruff/CVE/TS-Fixes, 104 neue Tests, Complexity-Refactoring

- Ruff: 144 auto-fixes (unused imports, == None → is None), F821/F811/F841 manuell - CVEs: python-multipart>=0.0.22, weasyprint>=68.0, pillow>=12.1.1, npm audit fix (0 vulns) - TS: 5 tote Drafting-Engine-Dateien entfernt, allowed-facts/sanitizer/StepHeader/context fixes - Tests: +104 (ISMS 58, Evidence 18, VVT 14, Generation 14) → 1449 passed - Refactoring: collect_ci_evidence (F→A), row_to_response (E→A), extract_requirements (E→A) - Dead Code: pca-platform, 7 Go-Handler, dsr_api.py, duplicate Schemas entfernt Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-07 19:00:33 +01:00
parent 6509e64dd9
commit 95fcba34cd
124 changed files with 2533 additions and 15709 deletions
@@ -248,7 +248,231 @@ async def upload_evidence(


 # ============================================================================
-# CI/CD Evidence Collection
+# CI/CD Evidence Collection — helpers
+# ============================================================================
+
+# Map CI source names to the corresponding control IDs
+SOURCE_CONTROL_MAP = {
+    "sast": "SDLC-001",
+    "dependency_scan": "SDLC-002",
+    "secret_scan": "SDLC-003",
+    "code_review": "SDLC-004",
+    "sbom": "SDLC-005",
+    "container_scan": "SDLC-006",
+    "test_results": "AUD-001",
+}
+
+
+def _parse_ci_evidence(data: dict) -> dict:
+    """
+    Parse and validate incoming CI evidence data.
+
+    Returns a dict with:
+      - report_json: str  (serialised JSON)
+      - report_hash: str  (SHA-256 hex digest)
+      - evidence_status: str  ("valid" or "failed")
+      - findings_count: int
+      - critical_findings: int
+    """
+    report_json = json.dumps(data) if data else "{}"
+    report_hash = hashlib.sha256(report_json.encode()).hexdigest()
+
+    findings_count = 0
+    critical_findings = 0
+
+    if data and isinstance(data, dict):
+        # Semgrep format
+        if "results" in data:
+            findings_count = len(data.get("results", []))
+            critical_findings = len([
+                r for r in data.get("results", [])
+                if r.get("extra", {}).get("severity", "").upper() in ["CRITICAL", "HIGH"]
+            ])
+
+        # Trivy format
+        elif "Results" in data:
+            for result in data.get("Results", []):
+                vulns = result.get("Vulnerabilities", [])
+                findings_count += len(vulns)
+                critical_findings += len([
+                    v for v in vulns
+                    if v.get("Severity", "").upper() in ["CRITICAL", "HIGH"]
+                ])
+
+        # Generic findings array
+        elif "findings" in data:
+            findings_count = len(data.get("findings", []))
+
+        # SBOM format - just count components
+        elif "components" in data:
+            findings_count = len(data.get("components", []))
+
+    evidence_status = "failed" if critical_findings > 0 else "valid"
+
+    return {
+        "report_json": report_json,
+        "report_hash": report_hash,
+        "evidence_status": evidence_status,
+        "findings_count": findings_count,
+        "critical_findings": critical_findings,
+    }
+
+
+def _store_evidence(
+    db: Session,
+    *,
+    control_db_id: str,
+    source: str,
+    parsed: dict,
+    ci_job_id: str,
+    ci_job_url: str,
+    report_data: dict,
+) -> EvidenceDB:
+    """
+    Persist a CI evidence item to the database and write the report file.
+
+    Returns the created EvidenceDB instance (already committed).
+    """
+    findings_count = parsed["findings_count"]
+    critical_findings = parsed["critical_findings"]
+
+    # Build title and description
+    title = f"{source.upper()} Report - {datetime.now().strftime('%Y-%m-%d %H:%M')}"
+    description = "Automatically collected from CI/CD pipeline"
+    if findings_count > 0:
+        description += f"\n- Total findings: {findings_count}"
+    if critical_findings > 0:
+        description += f"\n- Critical/High findings: {critical_findings}"
+    if ci_job_id:
+        description += f"\n- CI Job ID: {ci_job_id}"
+    if ci_job_url:
+        description += f"\n- CI Job URL: {ci_job_url}"
+
+    # Store report file
+    upload_dir = f"/tmp/compliance_evidence/ci/{source}"
+    os.makedirs(upload_dir, exist_ok=True)
+    file_name = f"{source}_{datetime.now().strftime('%Y%m%d_%H%M%S')}_{parsed['report_hash'][:8]}.json"
+    file_path = os.path.join(upload_dir, file_name)
+
+    with open(file_path, "w") as f:
+        json.dump(report_data or {}, f, indent=2)
+
+    # Create evidence record
+    evidence = EvidenceDB(
+        id=str(uuid_module.uuid4()),
+        control_id=control_db_id,
+        evidence_type=f"ci_{source}",
+        title=title,
+        description=description,
+        artifact_path=file_path,
+        artifact_hash=parsed["report_hash"],
+        file_size_bytes=len(parsed["report_json"]),
+        mime_type="application/json",
+        source="ci_pipeline",
+        ci_job_id=ci_job_id,
+        valid_from=datetime.utcnow(),
+        valid_until=datetime.utcnow() + timedelta(days=90),
+        status=EvidenceStatusEnum(parsed["evidence_status"]),
+    )
+    db.add(evidence)
+    db.commit()
+    db.refresh(evidence)
+
+    return evidence
+
+
+def _extract_findings_detail(report_data: dict) -> dict:
+    """
+    Extract severity-bucketed finding counts from report data.
+
+    Returns dict with keys: critical, high, medium, low.
+    """
+    findings_detail = {
+        "critical": 0,
+        "high": 0,
+        "medium": 0,
+        "low": 0,
+    }
+
+    if not report_data:
+        return findings_detail
+
+    # Semgrep format
+    if "results" in report_data:
+        for r in report_data.get("results", []):
+            severity = r.get("extra", {}).get("severity", "").upper()
+            if severity == "CRITICAL":
+                findings_detail["critical"] += 1
+            elif severity == "HIGH":
+                findings_detail["high"] += 1
+            elif severity == "MEDIUM":
+                findings_detail["medium"] += 1
+            elif severity in ["LOW", "INFO"]:
+                findings_detail["low"] += 1
+
+    # Trivy format
+    elif "Results" in report_data:
+        for result in report_data.get("Results", []):
+            for v in result.get("Vulnerabilities", []):
+                severity = v.get("Severity", "").upper()
+                if severity == "CRITICAL":
+                    findings_detail["critical"] += 1
+                elif severity == "HIGH":
+                    findings_detail["high"] += 1
+                elif severity == "MEDIUM":
+                    findings_detail["medium"] += 1
+                elif severity == "LOW":
+                    findings_detail["low"] += 1
+
+    # Generic findings with severity
+    elif "findings" in report_data:
+        for f in report_data.get("findings", []):
+            severity = f.get("severity", "").upper()
+            if severity == "CRITICAL":
+                findings_detail["critical"] += 1
+            elif severity == "HIGH":
+                findings_detail["high"] += 1
+            elif severity == "MEDIUM":
+                findings_detail["medium"] += 1
+            else:
+                findings_detail["low"] += 1
+
+    return findings_detail
+
+
+def _update_risks(db: Session, *, source: str, control_id: str, ci_job_id: str, report_data: dict):
+    """
+    Update risk status based on new evidence.
+
+    Uses AutoRiskUpdater to update Control status and linked Risks based on
+    severity-bucketed findings.  Returns the update result or None on error.
+    """
+    findings_detail = _extract_findings_detail(report_data)
+
+    try:
+        auto_updater = AutoRiskUpdater(db)
+        risk_update_result = auto_updater.process_evidence_collect_request(
+            tool=source,
+            control_id=control_id,
+            evidence_type=f"ci_{source}",
+            timestamp=datetime.utcnow().isoformat(),
+            commit_sha=report_data.get("commit_sha", "unknown") if report_data else "unknown",
+            ci_job_id=ci_job_id,
+            findings=findings_detail,
+        )
+
+        logger.info(f"Auto-risk update completed for {control_id}: "
+                    f"control_updated={risk_update_result.control_updated}, "
+                    f"risks_affected={len(risk_update_result.risks_affected)}")
+
+        return risk_update_result
+    except Exception as e:
+        logger.error(f"Auto-risk update failed for {control_id}: {str(e)}")
+        return None
+
+
+# ============================================================================
+# CI/CD Evidence Collection — endpoint
 # ============================================================================

@router.post("/evidence/collect")
@@ -274,17 +498,6 @@ async def collect_ci_evidence(
    - secret_scan: Secret detection (Gitleaks, TruffleHog)
    - code_review: Code review metrics
    """
-    # Map source to control_id
-    SOURCE_CONTROL_MAP = {
-        "sast": "SDLC-001",
-        "dependency_scan": "SDLC-002",
-        "secret_scan": "SDLC-003",
-        "code_review": "SDLC-004",
-        "sbom": "SDLC-005",
-        "container_scan": "SDLC-006",
-        "test_results": "AUD-001",
-    }
-
    if source not in SOURCE_CONTROL_MAP:
        raise HTTPException(
            status_code=400,
@@ -302,173 +515,38 @@ async def collect_ci_evidence(
            detail=f"Control {control_id} not found. Please seed the database first."
        )

-    # Parse and validate report data
-    report_json = json.dumps(report_data) if report_data else "{}"
-    report_hash = hashlib.sha256(report_json.encode()).hexdigest()
+    # --- 1. Parse and validate report data ---
+    parsed = _parse_ci_evidence(report_data)

-    # Determine evidence status based on report content
-    evidence_status = "valid"
-    findings_count = 0
-    critical_findings = 0
-
-    if report_data:
-        # Try to extract findings from common report formats
-        if isinstance(report_data, dict):
-            # Semgrep format
-            if "results" in report_data:
-                findings_count = len(report_data.get("results", []))
-                critical_findings = len([
-                    r for r in report_data.get("results", [])
-                    if r.get("extra", {}).get("severity", "").upper() in ["CRITICAL", "HIGH"]
-                ])
-
-            # Trivy format
-            elif "Results" in report_data:
-                for result in report_data.get("Results", []):
-                    vulns = result.get("Vulnerabilities", [])
-                    findings_count += len(vulns)
-                    critical_findings += len([
-                        v for v in vulns
-                        if v.get("Severity", "").upper() in ["CRITICAL", "HIGH"]
-                    ])
-
-            # Generic findings array
-            elif "findings" in report_data:
-                findings_count = len(report_data.get("findings", []))
-
-            # SBOM format - just count components
-            elif "components" in report_data:
-                findings_count = len(report_data.get("components", []))
-
-    # If critical findings exist, mark as failed
-    if critical_findings > 0:
-        evidence_status = "failed"
-
-    # Create evidence title
-    title = f"{source.upper()} Report - {datetime.now().strftime('%Y-%m-%d %H:%M')}"
-    description = f"Automatically collected from CI/CD pipeline"
-    if findings_count > 0:
-        description += f"\n- Total findings: {findings_count}"
-    if critical_findings > 0:
-        description += f"\n- Critical/High findings: {critical_findings}"
-    if ci_job_id:
-        description += f"\n- CI Job ID: {ci_job_id}"
-    if ci_job_url:
-        description += f"\n- CI Job URL: {ci_job_url}"
-
-    # Store report file
-    upload_dir = f"/tmp/compliance_evidence/ci/{source}"
-    os.makedirs(upload_dir, exist_ok=True)
-    file_name = f"{source}_{datetime.now().strftime('%Y%m%d_%H%M%S')}_{report_hash[:8]}.json"
-    file_path = os.path.join(upload_dir, file_name)
-
-    with open(file_path, "w") as f:
-        json.dump(report_data or {}, f, indent=2)
-
-    # Create evidence record directly
-    evidence = EvidenceDB(
-        id=str(uuid_module.uuid4()),
-        control_id=control.id,
-        evidence_type=f"ci_{source}",
-        title=title,
-        description=description,
-        artifact_path=file_path,
-        artifact_hash=report_hash,
-        file_size_bytes=len(report_json),
-        mime_type="application/json",
-        source="ci_pipeline",
+    # --- 2. Store evidence in DB and write report file ---
+    evidence = _store_evidence(
+        db,
+        control_db_id=control.id,
+        source=source,
+        parsed=parsed,
        ci_job_id=ci_job_id,
-        valid_from=datetime.utcnow(),
-        valid_until=datetime.utcnow() + timedelta(days=90),
-        status=EvidenceStatusEnum(evidence_status),
+        ci_job_url=ci_job_url,
+        report_data=report_data,
    )
-    db.add(evidence)
-    db.commit()
-    db.refresh(evidence)

-    # =========================================================================
-    # AUTOMATIC RISK UPDATE
-    # Update Control status and linked Risks based on findings
-    # =========================================================================
-    risk_update_result = None
-    try:
-        # Extract detailed findings for risk assessment
-        findings_detail = {
-            "critical": 0,
-            "high": 0,
-            "medium": 0,
-            "low": 0,
-        }
-
-        if report_data:
-            # Semgrep format
-            if "results" in report_data:
-                for r in report_data.get("results", []):
-                    severity = r.get("extra", {}).get("severity", "").upper()
-                    if severity == "CRITICAL":
-                        findings_detail["critical"] += 1
-                    elif severity == "HIGH":
-                        findings_detail["high"] += 1
-                    elif severity == "MEDIUM":
-                        findings_detail["medium"] += 1
-                    elif severity in ["LOW", "INFO"]:
-                        findings_detail["low"] += 1
-
-            # Trivy format
-            elif "Results" in report_data:
-                for result in report_data.get("Results", []):
-                    for v in result.get("Vulnerabilities", []):
-                        severity = v.get("Severity", "").upper()
-                        if severity == "CRITICAL":
-                            findings_detail["critical"] += 1
-                        elif severity == "HIGH":
-                            findings_detail["high"] += 1
-                        elif severity == "MEDIUM":
-                            findings_detail["medium"] += 1
-                        elif severity == "LOW":
-                            findings_detail["low"] += 1
-
-            # Generic findings with severity
-            elif "findings" in report_data:
-                for f in report_data.get("findings", []):
-                    severity = f.get("severity", "").upper()
-                    if severity == "CRITICAL":
-                        findings_detail["critical"] += 1
-                    elif severity == "HIGH":
-                        findings_detail["high"] += 1
-                    elif severity == "MEDIUM":
-                        findings_detail["medium"] += 1
-                    else:
-                        findings_detail["low"] += 1
-
-        # Use AutoRiskUpdater to update Control status and Risks
-        auto_updater = AutoRiskUpdater(db)
-        risk_update_result = auto_updater.process_evidence_collect_request(
-            tool=source,
-            control_id=control_id,
-            evidence_type=f"ci_{source}",
-            timestamp=datetime.utcnow().isoformat(),
-            commit_sha=report_data.get("commit_sha", "unknown") if report_data else "unknown",
-            ci_job_id=ci_job_id,
-            findings=findings_detail,
-        )
-
-        logger.info(f"Auto-risk update completed for {control_id}: "
-                    f"control_updated={risk_update_result.control_updated}, "
-                    f"risks_affected={len(risk_update_result.risks_affected)}")
-
-    except Exception as e:
-        logger.error(f"Auto-risk update failed for {control_id}: {str(e)}")
+    # --- 3. Automatic risk update ---
+    risk_update_result = _update_risks(
+        db,
+        source=source,
+        control_id=control_id,
+        ci_job_id=ci_job_id,
+        report_data=report_data,
+    )

    return {
        "success": True,
        "evidence_id": evidence.id,
        "control_id": control_id,
        "source": source,
-        "status": evidence_status,
-        "findings_count": findings_count,
-        "critical_findings": critical_findings,
-        "artifact_path": file_path,
+        "status": parsed["evidence_status"],
+        "findings_count": parsed["findings_count"],
+        "critical_findings": parsed["critical_findings"],
+        "artifact_path": evidence.artifact_path,
        "message": f"Evidence collected successfully for control {control_id}",
        "auto_risk_update": {
            "enabled": True,