""" FastAPI routes for Evidence management. Endpoints: - /evidence: Evidence listing and creation - /evidence/upload: Evidence file upload - /evidence/collect: CI/CD evidence collection - /evidence/ci-status: CI/CD evidence status """ import logging import os from datetime import datetime, timedelta from typing import Optional from collections import defaultdict import uuid as uuid_module import hashlib import json from fastapi import APIRouter, Depends, HTTPException, UploadFile, File, Query from sqlalchemy.orm import Session from classroom_engine.database import get_db from ..db import ( ControlRepository, EvidenceRepository, EvidenceStatusEnum, ) from ..db.models import EvidenceDB, ControlDB from ..services.auto_risk_updater import AutoRiskUpdater from .schemas import ( EvidenceCreate, EvidenceResponse, EvidenceListResponse, ) logger = logging.getLogger(__name__) router = APIRouter(tags=["compliance-evidence"]) # ============================================================================ # Evidence # ============================================================================ @router.get("/evidence", response_model=EvidenceListResponse) async def list_evidence( control_id: Optional[str] = None, evidence_type: Optional[str] = None, status: Optional[str] = None, page: Optional[int] = Query(None, ge=1, description="Page number (1-based)"), limit: Optional[int] = Query(None, ge=1, le=500, description="Items per page"), db: Session = Depends(get_db), ): """List evidence with optional filters and pagination.""" repo = EvidenceRepository(db) if control_id: # First get the control UUID ctrl_repo = ControlRepository(db) control = ctrl_repo.get_by_control_id(control_id) if not control: raise HTTPException(status_code=404, detail=f"Control {control_id} not found") evidence = repo.get_by_control(control.id) else: evidence = repo.get_all() if evidence_type: evidence = [e for e in evidence if e.evidence_type == evidence_type] if status: try: status_enum = EvidenceStatusEnum(status) evidence = [e for e in evidence if e.status == status_enum] except ValueError: pass total = len(evidence) # Apply pagination if requested if page is not None and limit is not None: offset = (page - 1) * limit evidence = evidence[offset:offset + limit] results = [ EvidenceResponse( id=e.id, control_id=e.control_id, evidence_type=e.evidence_type, title=e.title, description=e.description, artifact_path=e.artifact_path, artifact_url=e.artifact_url, artifact_hash=e.artifact_hash, file_size_bytes=e.file_size_bytes, mime_type=e.mime_type, valid_from=e.valid_from, valid_until=e.valid_until, status=e.status.value if e.status else None, source=e.source, ci_job_id=e.ci_job_id, uploaded_by=e.uploaded_by, collected_at=e.collected_at, created_at=e.created_at, ) for e in evidence ] return EvidenceListResponse(evidence=results, total=total) @router.post("/evidence", response_model=EvidenceResponse) async def create_evidence( evidence_data: EvidenceCreate, db: Session = Depends(get_db), ): """Create new evidence record.""" repo = EvidenceRepository(db) # Get control UUID ctrl_repo = ControlRepository(db) control = ctrl_repo.get_by_control_id(evidence_data.control_id) if not control: raise HTTPException(status_code=404, detail=f"Control {evidence_data.control_id} not found") evidence = repo.create( control_id=control.id, evidence_type=evidence_data.evidence_type, title=evidence_data.title, description=evidence_data.description, artifact_url=evidence_data.artifact_url, valid_from=evidence_data.valid_from, valid_until=evidence_data.valid_until, source=evidence_data.source or "api", ci_job_id=evidence_data.ci_job_id, ) db.commit() return EvidenceResponse( id=evidence.id, control_id=evidence.control_id, evidence_type=evidence.evidence_type, title=evidence.title, description=evidence.description, artifact_path=evidence.artifact_path, artifact_url=evidence.artifact_url, artifact_hash=evidence.artifact_hash, file_size_bytes=evidence.file_size_bytes, mime_type=evidence.mime_type, valid_from=evidence.valid_from, valid_until=evidence.valid_until, status=evidence.status.value if evidence.status else None, source=evidence.source, ci_job_id=evidence.ci_job_id, uploaded_by=evidence.uploaded_by, collected_at=evidence.collected_at, created_at=evidence.created_at, ) @router.delete("/evidence/{evidence_id}") async def delete_evidence( evidence_id: str, db: Session = Depends(get_db), ): """Delete an evidence record.""" evidence = db.query(EvidenceDB).filter(EvidenceDB.id == evidence_id).first() if not evidence: raise HTTPException(status_code=404, detail=f"Evidence {evidence_id} not found") # Remove artifact file if it exists if evidence.artifact_path and os.path.exists(evidence.artifact_path): try: os.remove(evidence.artifact_path) except OSError: logger.warning(f"Could not remove artifact file: {evidence.artifact_path}") db.delete(evidence) db.commit() logger.info(f"Evidence {evidence_id} deleted") return {"success": True, "message": f"Evidence {evidence_id} deleted"} @router.post("/evidence/upload") async def upload_evidence( control_id: str = Query(...), evidence_type: str = Query(...), title: str = Query(...), file: UploadFile = File(...), description: Optional[str] = Query(None), db: Session = Depends(get_db), ): """Upload evidence file.""" # Get control UUID ctrl_repo = ControlRepository(db) control = ctrl_repo.get_by_control_id(control_id) if not control: raise HTTPException(status_code=404, detail=f"Control {control_id} not found") # Create upload directory upload_dir = f"/tmp/compliance_evidence/{control_id}" os.makedirs(upload_dir, exist_ok=True) # Save file file_path = os.path.join(upload_dir, file.filename) content = await file.read() with open(file_path, "wb") as f: f.write(content) # Calculate hash file_hash = hashlib.sha256(content).hexdigest() # Create evidence record repo = EvidenceRepository(db) evidence = repo.create( control_id=control.id, evidence_type=evidence_type, title=title, description=description, artifact_path=file_path, artifact_hash=file_hash, file_size_bytes=len(content), mime_type=file.content_type, source="upload", ) db.commit() return EvidenceResponse( id=evidence.id, control_id=evidence.control_id, evidence_type=evidence.evidence_type, title=evidence.title, description=evidence.description, artifact_path=evidence.artifact_path, artifact_url=evidence.artifact_url, artifact_hash=evidence.artifact_hash, file_size_bytes=evidence.file_size_bytes, mime_type=evidence.mime_type, valid_from=evidence.valid_from, valid_until=evidence.valid_until, status=evidence.status.value if evidence.status else None, source=evidence.source, ci_job_id=evidence.ci_job_id, uploaded_by=evidence.uploaded_by, collected_at=evidence.collected_at, created_at=evidence.created_at, ) # ============================================================================ # CI/CD Evidence Collection — helpers # ============================================================================ # Map CI source names to the corresponding control IDs SOURCE_CONTROL_MAP = { "sast": "SDLC-001", "dependency_scan": "SDLC-002", "secret_scan": "SDLC-003", "code_review": "SDLC-004", "sbom": "SDLC-005", "container_scan": "SDLC-006", "test_results": "AUD-001", } def _parse_ci_evidence(data: dict) -> dict: """ Parse and validate incoming CI evidence data. Returns a dict with: - report_json: str (serialised JSON) - report_hash: str (SHA-256 hex digest) - evidence_status: str ("valid" or "failed") - findings_count: int - critical_findings: int """ report_json = json.dumps(data) if data else "{}" report_hash = hashlib.sha256(report_json.encode()).hexdigest() findings_count = 0 critical_findings = 0 if data and isinstance(data, dict): # Semgrep format if "results" in data: findings_count = len(data.get("results", [])) critical_findings = len([ r for r in data.get("results", []) if r.get("extra", {}).get("severity", "").upper() in ["CRITICAL", "HIGH"] ]) # Trivy format elif "Results" in data: for result in data.get("Results", []): vulns = result.get("Vulnerabilities", []) findings_count += len(vulns) critical_findings += len([ v for v in vulns if v.get("Severity", "").upper() in ["CRITICAL", "HIGH"] ]) # Generic findings array elif "findings" in data: findings_count = len(data.get("findings", [])) # SBOM format - just count components elif "components" in data: findings_count = len(data.get("components", [])) evidence_status = "failed" if critical_findings > 0 else "valid" return { "report_json": report_json, "report_hash": report_hash, "evidence_status": evidence_status, "findings_count": findings_count, "critical_findings": critical_findings, } def _store_evidence( db: Session, *, control_db_id: str, source: str, parsed: dict, ci_job_id: str, ci_job_url: str, report_data: dict, ) -> EvidenceDB: """ Persist a CI evidence item to the database and write the report file. Returns the created EvidenceDB instance (already committed). """ findings_count = parsed["findings_count"] critical_findings = parsed["critical_findings"] # Build title and description title = f"{source.upper()} Report - {datetime.now().strftime('%Y-%m-%d %H:%M')}" description = "Automatically collected from CI/CD pipeline" if findings_count > 0: description += f"\n- Total findings: {findings_count}" if critical_findings > 0: description += f"\n- Critical/High findings: {critical_findings}" if ci_job_id: description += f"\n- CI Job ID: {ci_job_id}" if ci_job_url: description += f"\n- CI Job URL: {ci_job_url}" # Store report file upload_dir = f"/tmp/compliance_evidence/ci/{source}" os.makedirs(upload_dir, exist_ok=True) file_name = f"{source}_{datetime.now().strftime('%Y%m%d_%H%M%S')}_{parsed['report_hash'][:8]}.json" file_path = os.path.join(upload_dir, file_name) with open(file_path, "w") as f: json.dump(report_data or {}, f, indent=2) # Create evidence record evidence = EvidenceDB( id=str(uuid_module.uuid4()), control_id=control_db_id, evidence_type=f"ci_{source}", title=title, description=description, artifact_path=file_path, artifact_hash=parsed["report_hash"], file_size_bytes=len(parsed["report_json"]), mime_type="application/json", source="ci_pipeline", ci_job_id=ci_job_id, valid_from=datetime.utcnow(), valid_until=datetime.utcnow() + timedelta(days=90), status=EvidenceStatusEnum(parsed["evidence_status"]), ) db.add(evidence) db.commit() db.refresh(evidence) return evidence def _extract_findings_detail(report_data: dict) -> dict: """ Extract severity-bucketed finding counts from report data. Returns dict with keys: critical, high, medium, low. """ findings_detail = { "critical": 0, "high": 0, "medium": 0, "low": 0, } if not report_data: return findings_detail # Semgrep format if "results" in report_data: for r in report_data.get("results", []): severity = r.get("extra", {}).get("severity", "").upper() if severity == "CRITICAL": findings_detail["critical"] += 1 elif severity == "HIGH": findings_detail["high"] += 1 elif severity == "MEDIUM": findings_detail["medium"] += 1 elif severity in ["LOW", "INFO"]: findings_detail["low"] += 1 # Trivy format elif "Results" in report_data: for result in report_data.get("Results", []): for v in result.get("Vulnerabilities", []): severity = v.get("Severity", "").upper() if severity == "CRITICAL": findings_detail["critical"] += 1 elif severity == "HIGH": findings_detail["high"] += 1 elif severity == "MEDIUM": findings_detail["medium"] += 1 elif severity == "LOW": findings_detail["low"] += 1 # Generic findings with severity elif "findings" in report_data: for f in report_data.get("findings", []): severity = f.get("severity", "").upper() if severity == "CRITICAL": findings_detail["critical"] += 1 elif severity == "HIGH": findings_detail["high"] += 1 elif severity == "MEDIUM": findings_detail["medium"] += 1 else: findings_detail["low"] += 1 return findings_detail def _update_risks(db: Session, *, source: str, control_id: str, ci_job_id: str, report_data: dict): """ Update risk status based on new evidence. Uses AutoRiskUpdater to update Control status and linked Risks based on severity-bucketed findings. Returns the update result or None on error. """ findings_detail = _extract_findings_detail(report_data) try: auto_updater = AutoRiskUpdater(db) risk_update_result = auto_updater.process_evidence_collect_request( tool=source, control_id=control_id, evidence_type=f"ci_{source}", timestamp=datetime.utcnow().isoformat(), commit_sha=report_data.get("commit_sha", "unknown") if report_data else "unknown", ci_job_id=ci_job_id, findings=findings_detail, ) logger.info(f"Auto-risk update completed for {control_id}: " f"control_updated={risk_update_result.control_updated}, " f"risks_affected={len(risk_update_result.risks_affected)}") return risk_update_result except Exception as e: logger.error(f"Auto-risk update failed for {control_id}: {str(e)}") return None # ============================================================================ # CI/CD Evidence Collection — endpoint # ============================================================================ @router.post("/evidence/collect") async def collect_ci_evidence( source: str = Query(..., description="Evidence source: sast, dependency_scan, sbom, container_scan, test_results"), ci_job_id: str = Query(None, description="CI/CD Job ID for traceability"), ci_job_url: str = Query(None, description="URL to CI/CD job"), report_data: dict = None, db: Session = Depends(get_db), ): """ Collect evidence from CI/CD pipeline. This endpoint is designed to be called from CI/CD workflows (GitHub Actions, GitLab CI, Jenkins, etc.) to automatically collect compliance evidence. Supported sources: - sast: Static Application Security Testing (Semgrep, SonarQube, etc.) - dependency_scan: Dependency vulnerability scanning (Trivy, Grype, Snyk) - sbom: Software Bill of Materials (CycloneDX, SPDX) - container_scan: Container image scanning (Trivy, Grype) - test_results: Test coverage and results - secret_scan: Secret detection (Gitleaks, TruffleHog) - code_review: Code review metrics """ if source not in SOURCE_CONTROL_MAP: raise HTTPException( status_code=400, detail=f"Unknown source '{source}'. Supported: {list(SOURCE_CONTROL_MAP.keys())}" ) control_id = SOURCE_CONTROL_MAP[source] # Get control ctrl_repo = ControlRepository(db) control = ctrl_repo.get_by_control_id(control_id) if not control: raise HTTPException( status_code=404, detail=f"Control {control_id} not found. Please seed the database first." ) # --- 1. Parse and validate report data --- parsed = _parse_ci_evidence(report_data) # --- 2. Store evidence in DB and write report file --- evidence = _store_evidence( db, control_db_id=control.id, source=source, parsed=parsed, ci_job_id=ci_job_id, ci_job_url=ci_job_url, report_data=report_data, ) # --- 3. Automatic risk update --- risk_update_result = _update_risks( db, source=source, control_id=control_id, ci_job_id=ci_job_id, report_data=report_data, ) return { "success": True, "evidence_id": evidence.id, "control_id": control_id, "source": source, "status": parsed["evidence_status"], "findings_count": parsed["findings_count"], "critical_findings": parsed["critical_findings"], "artifact_path": evidence.artifact_path, "message": f"Evidence collected successfully for control {control_id}", "auto_risk_update": { "enabled": True, "control_updated": risk_update_result.control_updated if risk_update_result else False, "old_status": risk_update_result.old_status if risk_update_result else None, "new_status": risk_update_result.new_status if risk_update_result else None, "risks_affected": risk_update_result.risks_affected if risk_update_result else [], "alerts_generated": risk_update_result.alerts_generated if risk_update_result else [], } if risk_update_result else {"enabled": False, "error": "Auto-update skipped"}, } @router.get("/evidence/ci-status") async def get_ci_evidence_status( control_id: str = Query(None, description="Filter by control ID"), days: int = Query(30, description="Look back N days"), db: Session = Depends(get_db), ): """ Get CI/CD evidence collection status. Returns overview of recent evidence collected from CI/CD pipelines, useful for dashboards and monitoring. """ cutoff_date = datetime.utcnow() - timedelta(days=days) # Build query query = db.query(EvidenceDB).filter( EvidenceDB.source == "ci_pipeline", EvidenceDB.collected_at >= cutoff_date, ) if control_id: ctrl_repo = ControlRepository(db) control = ctrl_repo.get_by_control_id(control_id) if control: query = query.filter(EvidenceDB.control_id == control.id) evidence_list = query.order_by(EvidenceDB.collected_at.desc()).limit(100).all() # Group by control and calculate stats control_stats = defaultdict(lambda: { "total": 0, "valid": 0, "failed": 0, "last_collected": None, "evidence": [], }) for e in evidence_list: # Get control_id string control = db.query(ControlDB).filter(ControlDB.id == e.control_id).first() ctrl_id = control.control_id if control else "unknown" stats = control_stats[ctrl_id] stats["total"] += 1 if e.status: if e.status.value == "valid": stats["valid"] += 1 elif e.status.value == "failed": stats["failed"] += 1 if not stats["last_collected"] or e.collected_at > stats["last_collected"]: stats["last_collected"] = e.collected_at # Add evidence summary stats["evidence"].append({ "id": e.id, "type": e.evidence_type, "status": e.status.value if e.status else None, "collected_at": e.collected_at.isoformat() if e.collected_at else None, "ci_job_id": e.ci_job_id, }) # Convert to list and sort result = [] for ctrl_id, stats in control_stats.items(): result.append({ "control_id": ctrl_id, "total_evidence": stats["total"], "valid_count": stats["valid"], "failed_count": stats["failed"], "last_collected": stats["last_collected"].isoformat() if stats["last_collected"] else None, "recent_evidence": stats["evidence"][:5], }) result.sort(key=lambda x: x["last_collected"] or "", reverse=True) return { "period_days": days, "total_evidence": len(evidence_list), "controls": result, }