breakpilot-compliance/backend-compliance/compliance/services/auto_risk_updater.py

"""
Automatic Risk Update Service for Compliance Framework.

This service processes CI/CD security scan results and automatically:
1. Updates Control status based on scan findings
2. Adjusts Risk levels when critical CVEs are found
3. Creates Evidence records from scan reports
4. Generates alerts for significant findings

Sprint 6: CI/CD Evidence Collection (2026-01-18)
"""

import logging
from datetime import datetime, timezone
from typing import Dict, List, Optional
from dataclasses import dataclass
from enum import Enum

from sqlalchemy.orm import Session

from ..db.models import (
    ControlDB, ControlStatusEnum,
    EvidenceDB, EvidenceStatusEnum,
    RiskDB,
)
from ..db.repository import ControlRepository, EvidenceRepository, RiskRepository

logger = logging.getLogger(__name__)


class ScanType(str, Enum):
    """Types of CI/CD security scans."""
    SAST = "sast"                    # Static Application Security Testing
    DEPENDENCY = "dependency"        # Dependency/CVE scanning
    SECRET = "secret"                # Secret detection
    CONTAINER = "container"          # Container image scanning
    SBOM = "sbom"                    # Software Bill of Materials


class FindingSeverity(str, Enum):
    """Severity levels for security findings."""
    CRITICAL = "critical"
    HIGH = "high"
    MEDIUM = "medium"
    LOW = "low"
    INFO = "info"


@dataclass
class ScanResult:
    """Represents a CI/CD scan result."""
    scan_type: ScanType
    tool: str
    timestamp: datetime
    commit_sha: str
    branch: str
    control_id: str               # Mapped Control ID (e.g., SDLC-001)
    findings: Dict[str, int]      # {"critical": 0, "high": 2, ...}
    raw_report: Optional[Dict] = None
    ci_job_id: Optional[str] = None


@dataclass
class RiskUpdateResult:
    """Result of an automatic risk update."""
    control_id: str
    control_updated: bool
    old_status: Optional[str]
    new_status: Optional[str]
    evidence_created: bool
    evidence_id: Optional[str]
    risks_affected: List[str]
    alerts_generated: List[str]
    message: str


# Mapping from Control IDs to scan types
CONTROL_SCAN_MAPPING = {
    "SDLC-001": ScanType.SAST,          # SAST Scanning
    "SDLC-002": ScanType.DEPENDENCY,    # Dependency Scanning
    "SDLC-003": ScanType.SECRET,        # Secret Detection
    "SDLC-006": ScanType.CONTAINER,     # Container Scanning
    "CRA-001": ScanType.SBOM,           # SBOM Generation
}


class AutoRiskUpdater:
    """
    Automatically updates Controls and Risks based on CI/CD scan results.

    Flow:
    1. Receive scan result from CI/CD pipeline
    2. Determine Control status based on findings
    3. Create Evidence record
    4. Update linked Risks if necessary
    5. Generate alerts for critical findings
    """

    def __init__(self, db: Session):
        self.db = db
        self.control_repo = ControlRepository(db)
        self.evidence_repo = EvidenceRepository(db)
        self.risk_repo = RiskRepository(db)

    def process_scan_result(self, scan_result: ScanResult) -> RiskUpdateResult:
        """
        Process a CI/CD scan result and update Compliance status.

        Args:
            scan_result: The scan result from CI/CD pipeline

        Returns:
            RiskUpdateResult with details of all updates made
        """
        logger.info(f"Processing {scan_result.scan_type.value} scan for control {scan_result.control_id}")

        # Find the Control
        control = self.control_repo.get_by_control_id(scan_result.control_id)
        if not control:
            logger.warning(f"Control {scan_result.control_id} not found")
            return RiskUpdateResult(
                control_id=scan_result.control_id,
                control_updated=False,
                old_status=None,
                new_status=None,
                evidence_created=False,
                evidence_id=None,
                risks_affected=[],
                alerts_generated=[],
                message=f"Control {scan_result.control_id} not found"
            )

        old_status = control.status.value if control.status else "unknown"

        # Determine new Control status based on findings
        new_status = self._determine_control_status(scan_result.findings)

        # Update Control status
        control_updated = False
        if new_status != old_status:
            control.status = ControlStatusEnum(new_status)
            control.status_notes = self._generate_status_notes(scan_result)
            control.updated_at = datetime.now(timezone.utc)
            control_updated = True
            logger.info(f"Control {scan_result.control_id} status changed: {old_status} -> {new_status}")

        # Create Evidence record
        evidence = self._create_evidence(control, scan_result)

        # Update linked Risks
        risks_affected = self._update_linked_risks(control, new_status, scan_result.findings)

        # Generate alerts for critical findings
        alerts = self._generate_alerts(scan_result, new_status)

        # Commit all changes
        self.db.commit()

        return RiskUpdateResult(
            control_id=scan_result.control_id,
            control_updated=control_updated,
            old_status=old_status,
            new_status=new_status,
            evidence_created=True,
            evidence_id=evidence.id,
            risks_affected=risks_affected,
            alerts_generated=alerts,
            message=f"Processed {scan_result.scan_type.value} scan successfully"
        )

    def _determine_control_status(self, findings: Dict[str, int]) -> str:
        """
        Determine Control status based on security findings.

        Rules:
        - Any CRITICAL findings -> fail
        - >5 HIGH findings -> fail
        - 1-5 HIGH findings -> partial
        - Only MEDIUM/LOW findings -> pass (with notes)
        - No findings -> pass
        """
        critical = findings.get("critical", 0)
        high = findings.get("high", 0)
        medium = findings.get("medium", 0)

        if critical > 0:
            return ControlStatusEnum.FAIL.value
        elif high > 5:
            return ControlStatusEnum.FAIL.value
        elif high > 0:
            return ControlStatusEnum.PARTIAL.value
        elif medium > 10:
            return ControlStatusEnum.PARTIAL.value
        else:
            return ControlStatusEnum.PASS.value

    def _generate_status_notes(self, scan_result: ScanResult) -> str:
        """Generate human-readable status notes from scan result."""
        findings = scan_result.findings
        parts = []

        if findings.get("critical", 0) > 0:
            parts.append(f"{findings['critical']} CRITICAL")
        if findings.get("high", 0) > 0:
            parts.append(f"{findings['high']} HIGH")
        if findings.get("medium", 0) > 0:
            parts.append(f"{findings['medium']} MEDIUM")

        if parts:
            findings_str = ", ".join(parts)
            return f"Auto-updated from {scan_result.tool} scan ({scan_result.timestamp.strftime('%Y-%m-%d %H:%M')}): {findings_str} findings"
        else:
            return f"Auto-updated from {scan_result.tool} scan ({scan_result.timestamp.strftime('%Y-%m-%d %H:%M')}): No significant findings"

    def _create_evidence(self, control: ControlDB, scan_result: ScanResult) -> EvidenceDB:
        """Create an Evidence record from the scan result."""
        from uuid import uuid4

        evidence = EvidenceDB(
            id=str(uuid4()),
            control_id=control.id,
            evidence_type=f"{scan_result.scan_type.value}_report",
            title=f"{scan_result.tool} Scan - {scan_result.timestamp.strftime('%Y-%m-%d')}",
            description=self._generate_status_notes(scan_result),
            source="ci_pipeline",
            ci_job_id=scan_result.ci_job_id,
            status=EvidenceStatusEnum.VALID,
            valid_from=datetime.now(timezone.utc),
            collected_at=scan_result.timestamp,
        )

        self.db.add(evidence)
        logger.info(f"Created evidence {evidence.id} for control {control.control_id}")

        return evidence

    def _update_linked_risks(
        self,
        control: ControlDB,
        new_status: str,
        findings: Dict[str, int]
    ) -> List[str]:
        """
        Update Risks that are mitigated by this Control.

        When a Control fails:
        - Increase residual risk of linked Risks
        - Update risk status to "open" if was "mitigated"

        When a Control passes:
        - Decrease residual risk if appropriate
        """
        affected_risks = []

        # Find all Risks that list this Control as a mitigating control
        all_risks = self.risk_repo.get_all()

        for risk in all_risks:
            if not risk.mitigating_controls:
                continue

            mitigating_ids = risk.mitigating_controls
            if control.control_id not in mitigating_ids:
                continue

            # This Risk is linked to the affected Control
            risk_updated = False

            if new_status == ControlStatusEnum.FAIL.value:
                # Control failed - increase risk
                if risk.status == "mitigated":
                    risk.status = "open"
                    risk_updated = True

                # Increase residual likelihood if critical findings
                if findings.get("critical", 0) > 0:
                    old_likelihood = risk.residual_likelihood or risk.likelihood
                    risk.residual_likelihood = min(5, old_likelihood + 1)
                    risk.residual_risk = RiskDB.calculate_risk_level(
                        risk.residual_likelihood,
                        risk.residual_impact or risk.impact
                    )
                    risk_updated = True

            elif new_status == ControlStatusEnum.PASS.value:
                # Control passed - potentially reduce risk
                if risk.status == "open":
                    # Check if all mitigating controls are passing
                    all_passing = True
                    for ctrl_id in mitigating_ids:
                        other_ctrl = self.control_repo.get_by_control_id(ctrl_id)
                        if other_ctrl and other_ctrl.status != ControlStatusEnum.PASS:
                            all_passing = False
                            break

                    if all_passing:
                        risk.status = "mitigated"
                        risk_updated = True

            if risk_updated:
                risk.last_assessed_at = datetime.now(timezone.utc)
                risk.updated_at = datetime.now(timezone.utc)
                affected_risks.append(risk.risk_id)
                logger.info(f"Updated risk {risk.risk_id} due to control {control.control_id} status change")

        return affected_risks

    def _generate_alerts(self, scan_result: ScanResult, new_status: str) -> List[str]:
        """
        Generate alerts for significant findings.

        Alert conditions:
        - Any CRITICAL findings
        - Control status changed to FAIL
        - >10 HIGH findings in one scan
        """
        alerts = []
        findings = scan_result.findings

        if findings.get("critical", 0) > 0:
            alert_msg = f"CRITICAL: {findings['critical']} critical vulnerabilities found in {scan_result.tool} scan"
            alerts.append(alert_msg)
            logger.warning(alert_msg)

        if new_status == ControlStatusEnum.FAIL.value:
            alert_msg = f"Control {scan_result.control_id} status changed to FAIL"
            alerts.append(alert_msg)
            logger.warning(alert_msg)

        if findings.get("high", 0) > 10:
            alert_msg = f"HIGH: {findings['high']} high-severity findings in {scan_result.tool} scan"
            alerts.append(alert_msg)
            logger.warning(alert_msg)

        return alerts

    def process_evidence_collect_request(
        self,
        tool: str,
        control_id: str,
        evidence_type: str,
        timestamp: str,
        commit_sha: str,
        ci_job_id: Optional[str] = None,
        findings: Optional[Dict[str, int]] = None,
        **kwargs
    ) -> RiskUpdateResult:
        """
        Process an evidence collection request from CI/CD.

        This is the main entry point for the /evidence/collect API endpoint.
        """
        # Parse timestamp
        try:
            ts = datetime.fromisoformat(timestamp.replace('Z', '+00:00'))
        except (ValueError, AttributeError):
            ts = datetime.now(timezone.utc)

        # Determine scan type from evidence_type
        scan_type = ScanType.SAST  # Default
        for ctrl_id, stype in CONTROL_SCAN_MAPPING.items():
            if ctrl_id == control_id:
                scan_type = stype
                break

        # Create ScanResult
        scan_result = ScanResult(
            scan_type=scan_type,
            tool=tool,
            timestamp=ts,
            commit_sha=commit_sha,
            branch=kwargs.get("branch", "unknown"),
            control_id=control_id,
            findings=findings or {"critical": 0, "high": 0, "medium": 0, "low": 0},
            ci_job_id=ci_job_id,
        )

        return self.process_scan_result(scan_result)


def create_auto_risk_updater(db: Session) -> AutoRiskUpdater:
    """Factory function for creating AutoRiskUpdater instances."""
    return AutoRiskUpdater(db)