Files
breakpilot-compliance/backend-compliance/compliance/api/evidence_routes.py
Benjamin Boenisch 4435e7ea0a Initial commit: breakpilot-compliance - Compliance SDK Platform
Services: Admin-Compliance, Backend-Compliance,
AI-Compliance-SDK, Consent-SDK, Developer-Portal,
PCA-Platform, DSMS

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-11 23:47:28 +01:00

531 lines
18 KiB
Python

"""
FastAPI routes for Evidence management.
Endpoints:
- /evidence: Evidence listing and creation
- /evidence/upload: Evidence file upload
- /evidence/collect: CI/CD evidence collection
- /evidence/ci-status: CI/CD evidence status
"""
import logging
import os
from datetime import datetime, timedelta
from typing import Optional
from collections import defaultdict
import uuid as uuid_module
import hashlib
import json
from fastapi import APIRouter, Depends, HTTPException, UploadFile, File, Query
from sqlalchemy.orm import Session
from classroom_engine.database import get_db
from ..db import (
ControlRepository,
EvidenceRepository,
EvidenceStatusEnum,
)
from ..db.models import EvidenceDB, ControlDB
from ..services.auto_risk_updater import AutoRiskUpdater
from .schemas import (
EvidenceCreate, EvidenceResponse, EvidenceListResponse,
)
logger = logging.getLogger(__name__)
router = APIRouter(tags=["compliance-evidence"])
# ============================================================================
# Evidence
# ============================================================================
@router.get("/evidence", response_model=EvidenceListResponse)
async def list_evidence(
control_id: Optional[str] = None,
evidence_type: Optional[str] = None,
status: Optional[str] = None,
db: Session = Depends(get_db),
):
"""List evidence with optional filters."""
repo = EvidenceRepository(db)
if control_id:
# First get the control UUID
ctrl_repo = ControlRepository(db)
control = ctrl_repo.get_by_control_id(control_id)
if not control:
raise HTTPException(status_code=404, detail=f"Control {control_id} not found")
evidence = repo.get_by_control(control.id)
else:
evidence = repo.get_all()
if evidence_type:
evidence = [e for e in evidence if e.evidence_type == evidence_type]
if status:
try:
status_enum = EvidenceStatusEnum(status)
evidence = [e for e in evidence if e.status == status_enum]
except ValueError:
pass
results = [
EvidenceResponse(
id=e.id,
control_id=e.control_id,
evidence_type=e.evidence_type,
title=e.title,
description=e.description,
artifact_path=e.artifact_path,
artifact_url=e.artifact_url,
artifact_hash=e.artifact_hash,
file_size_bytes=e.file_size_bytes,
mime_type=e.mime_type,
valid_from=e.valid_from,
valid_until=e.valid_until,
status=e.status.value if e.status else None,
source=e.source,
ci_job_id=e.ci_job_id,
uploaded_by=e.uploaded_by,
collected_at=e.collected_at,
created_at=e.created_at,
)
for e in evidence
]
return EvidenceListResponse(evidence=results, total=len(results))
@router.post("/evidence", response_model=EvidenceResponse)
async def create_evidence(
evidence_data: EvidenceCreate,
db: Session = Depends(get_db),
):
"""Create new evidence record."""
repo = EvidenceRepository(db)
# Get control UUID
ctrl_repo = ControlRepository(db)
control = ctrl_repo.get_by_control_id(evidence_data.control_id)
if not control:
raise HTTPException(status_code=404, detail=f"Control {evidence_data.control_id} not found")
evidence = repo.create(
control_id=control.id,
evidence_type=evidence_data.evidence_type,
title=evidence_data.title,
description=evidence_data.description,
artifact_url=evidence_data.artifact_url,
valid_from=evidence_data.valid_from,
valid_until=evidence_data.valid_until,
source=evidence_data.source or "api",
ci_job_id=evidence_data.ci_job_id,
)
db.commit()
return EvidenceResponse(
id=evidence.id,
control_id=evidence.control_id,
evidence_type=evidence.evidence_type,
title=evidence.title,
description=evidence.description,
artifact_path=evidence.artifact_path,
artifact_url=evidence.artifact_url,
artifact_hash=evidence.artifact_hash,
file_size_bytes=evidence.file_size_bytes,
mime_type=evidence.mime_type,
valid_from=evidence.valid_from,
valid_until=evidence.valid_until,
status=evidence.status.value if evidence.status else None,
source=evidence.source,
ci_job_id=evidence.ci_job_id,
uploaded_by=evidence.uploaded_by,
collected_at=evidence.collected_at,
created_at=evidence.created_at,
)
@router.post("/evidence/upload")
async def upload_evidence(
control_id: str = Query(...),
evidence_type: str = Query(...),
title: str = Query(...),
file: UploadFile = File(...),
description: Optional[str] = Query(None),
db: Session = Depends(get_db),
):
"""Upload evidence file."""
# Get control UUID
ctrl_repo = ControlRepository(db)
control = ctrl_repo.get_by_control_id(control_id)
if not control:
raise HTTPException(status_code=404, detail=f"Control {control_id} not found")
# Create upload directory
upload_dir = f"/tmp/compliance_evidence/{control_id}"
os.makedirs(upload_dir, exist_ok=True)
# Save file
file_path = os.path.join(upload_dir, file.filename)
content = await file.read()
with open(file_path, "wb") as f:
f.write(content)
# Calculate hash
file_hash = hashlib.sha256(content).hexdigest()
# Create evidence record
repo = EvidenceRepository(db)
evidence = repo.create(
control_id=control.id,
evidence_type=evidence_type,
title=title,
description=description,
artifact_path=file_path,
artifact_hash=file_hash,
file_size_bytes=len(content),
mime_type=file.content_type,
source="upload",
)
db.commit()
return EvidenceResponse(
id=evidence.id,
control_id=evidence.control_id,
evidence_type=evidence.evidence_type,
title=evidence.title,
description=evidence.description,
artifact_path=evidence.artifact_path,
artifact_url=evidence.artifact_url,
artifact_hash=evidence.artifact_hash,
file_size_bytes=evidence.file_size_bytes,
mime_type=evidence.mime_type,
valid_from=evidence.valid_from,
valid_until=evidence.valid_until,
status=evidence.status.value if evidence.status else None,
source=evidence.source,
ci_job_id=evidence.ci_job_id,
uploaded_by=evidence.uploaded_by,
collected_at=evidence.collected_at,
created_at=evidence.created_at,
)
# ============================================================================
# CI/CD Evidence Collection
# ============================================================================
@router.post("/evidence/collect")
async def collect_ci_evidence(
source: str = Query(..., description="Evidence source: sast, dependency_scan, sbom, container_scan, test_results"),
ci_job_id: str = Query(None, description="CI/CD Job ID for traceability"),
ci_job_url: str = Query(None, description="URL to CI/CD job"),
report_data: dict = None,
db: Session = Depends(get_db),
):
"""
Collect evidence from CI/CD pipeline.
This endpoint is designed to be called from CI/CD workflows (GitHub Actions,
GitLab CI, Jenkins, etc.) to automatically collect compliance evidence.
Supported sources:
- sast: Static Application Security Testing (Semgrep, SonarQube, etc.)
- dependency_scan: Dependency vulnerability scanning (Trivy, Grype, Snyk)
- sbom: Software Bill of Materials (CycloneDX, SPDX)
- container_scan: Container image scanning (Trivy, Grype)
- test_results: Test coverage and results
- secret_scan: Secret detection (Gitleaks, TruffleHog)
- code_review: Code review metrics
"""
# Map source to control_id
SOURCE_CONTROL_MAP = {
"sast": "SDLC-001",
"dependency_scan": "SDLC-002",
"secret_scan": "SDLC-003",
"code_review": "SDLC-004",
"sbom": "SDLC-005",
"container_scan": "SDLC-006",
"test_results": "AUD-001",
}
if source not in SOURCE_CONTROL_MAP:
raise HTTPException(
status_code=400,
detail=f"Unknown source '{source}'. Supported: {list(SOURCE_CONTROL_MAP.keys())}"
)
control_id = SOURCE_CONTROL_MAP[source]
# Get control
ctrl_repo = ControlRepository(db)
control = ctrl_repo.get_by_control_id(control_id)
if not control:
raise HTTPException(
status_code=404,
detail=f"Control {control_id} not found. Please seed the database first."
)
# Parse and validate report data
report_json = json.dumps(report_data) if report_data else "{}"
report_hash = hashlib.sha256(report_json.encode()).hexdigest()
# Determine evidence status based on report content
evidence_status = "valid"
findings_count = 0
critical_findings = 0
if report_data:
# Try to extract findings from common report formats
if isinstance(report_data, dict):
# Semgrep format
if "results" in report_data:
findings_count = len(report_data.get("results", []))
critical_findings = len([
r for r in report_data.get("results", [])
if r.get("extra", {}).get("severity", "").upper() in ["CRITICAL", "HIGH"]
])
# Trivy format
elif "Results" in report_data:
for result in report_data.get("Results", []):
vulns = result.get("Vulnerabilities", [])
findings_count += len(vulns)
critical_findings += len([
v for v in vulns
if v.get("Severity", "").upper() in ["CRITICAL", "HIGH"]
])
# Generic findings array
elif "findings" in report_data:
findings_count = len(report_data.get("findings", []))
# SBOM format - just count components
elif "components" in report_data:
findings_count = len(report_data.get("components", []))
# If critical findings exist, mark as failed
if critical_findings > 0:
evidence_status = "failed"
# Create evidence title
title = f"{source.upper()} Report - {datetime.now().strftime('%Y-%m-%d %H:%M')}"
description = f"Automatically collected from CI/CD pipeline"
if findings_count > 0:
description += f"\n- Total findings: {findings_count}"
if critical_findings > 0:
description += f"\n- Critical/High findings: {critical_findings}"
if ci_job_id:
description += f"\n- CI Job ID: {ci_job_id}"
if ci_job_url:
description += f"\n- CI Job URL: {ci_job_url}"
# Store report file
upload_dir = f"/tmp/compliance_evidence/ci/{source}"
os.makedirs(upload_dir, exist_ok=True)
file_name = f"{source}_{datetime.now().strftime('%Y%m%d_%H%M%S')}_{report_hash[:8]}.json"
file_path = os.path.join(upload_dir, file_name)
with open(file_path, "w") as f:
json.dump(report_data or {}, f, indent=2)
# Create evidence record directly
evidence = EvidenceDB(
id=str(uuid_module.uuid4()),
control_id=control.id,
evidence_type=f"ci_{source}",
title=title,
description=description,
artifact_path=file_path,
artifact_hash=report_hash,
file_size_bytes=len(report_json),
mime_type="application/json",
source="ci_pipeline",
ci_job_id=ci_job_id,
valid_from=datetime.utcnow(),
valid_until=datetime.utcnow() + timedelta(days=90),
status=EvidenceStatusEnum(evidence_status),
)
db.add(evidence)
db.commit()
db.refresh(evidence)
# =========================================================================
# AUTOMATIC RISK UPDATE
# Update Control status and linked Risks based on findings
# =========================================================================
risk_update_result = None
try:
# Extract detailed findings for risk assessment
findings_detail = {
"critical": 0,
"high": 0,
"medium": 0,
"low": 0,
}
if report_data:
# Semgrep format
if "results" in report_data:
for r in report_data.get("results", []):
severity = r.get("extra", {}).get("severity", "").upper()
if severity == "CRITICAL":
findings_detail["critical"] += 1
elif severity == "HIGH":
findings_detail["high"] += 1
elif severity == "MEDIUM":
findings_detail["medium"] += 1
elif severity in ["LOW", "INFO"]:
findings_detail["low"] += 1
# Trivy format
elif "Results" in report_data:
for result in report_data.get("Results", []):
for v in result.get("Vulnerabilities", []):
severity = v.get("Severity", "").upper()
if severity == "CRITICAL":
findings_detail["critical"] += 1
elif severity == "HIGH":
findings_detail["high"] += 1
elif severity == "MEDIUM":
findings_detail["medium"] += 1
elif severity == "LOW":
findings_detail["low"] += 1
# Generic findings with severity
elif "findings" in report_data:
for f in report_data.get("findings", []):
severity = f.get("severity", "").upper()
if severity == "CRITICAL":
findings_detail["critical"] += 1
elif severity == "HIGH":
findings_detail["high"] += 1
elif severity == "MEDIUM":
findings_detail["medium"] += 1
else:
findings_detail["low"] += 1
# Use AutoRiskUpdater to update Control status and Risks
auto_updater = AutoRiskUpdater(db)
risk_update_result = auto_updater.process_evidence_collect_request(
tool=source,
control_id=control_id,
evidence_type=f"ci_{source}",
timestamp=datetime.utcnow().isoformat(),
commit_sha=report_data.get("commit_sha", "unknown") if report_data else "unknown",
ci_job_id=ci_job_id,
findings=findings_detail,
)
logger.info(f"Auto-risk update completed for {control_id}: "
f"control_updated={risk_update_result.control_updated}, "
f"risks_affected={len(risk_update_result.risks_affected)}")
except Exception as e:
logger.error(f"Auto-risk update failed for {control_id}: {str(e)}")
return {
"success": True,
"evidence_id": evidence.id,
"control_id": control_id,
"source": source,
"status": evidence_status,
"findings_count": findings_count,
"critical_findings": critical_findings,
"artifact_path": file_path,
"message": f"Evidence collected successfully for control {control_id}",
"auto_risk_update": {
"enabled": True,
"control_updated": risk_update_result.control_updated if risk_update_result else False,
"old_status": risk_update_result.old_status if risk_update_result else None,
"new_status": risk_update_result.new_status if risk_update_result else None,
"risks_affected": risk_update_result.risks_affected if risk_update_result else [],
"alerts_generated": risk_update_result.alerts_generated if risk_update_result else [],
} if risk_update_result else {"enabled": False, "error": "Auto-update skipped"},
}
@router.get("/evidence/ci-status")
async def get_ci_evidence_status(
control_id: str = Query(None, description="Filter by control ID"),
days: int = Query(30, description="Look back N days"),
db: Session = Depends(get_db),
):
"""
Get CI/CD evidence collection status.
Returns overview of recent evidence collected from CI/CD pipelines,
useful for dashboards and monitoring.
"""
cutoff_date = datetime.utcnow() - timedelta(days=days)
# Build query
query = db.query(EvidenceDB).filter(
EvidenceDB.source == "ci_pipeline",
EvidenceDB.collected_at >= cutoff_date,
)
if control_id:
ctrl_repo = ControlRepository(db)
control = ctrl_repo.get_by_control_id(control_id)
if control:
query = query.filter(EvidenceDB.control_id == control.id)
evidence_list = query.order_by(EvidenceDB.collected_at.desc()).limit(100).all()
# Group by control and calculate stats
control_stats = defaultdict(lambda: {
"total": 0,
"valid": 0,
"failed": 0,
"last_collected": None,
"evidence": [],
})
for e in evidence_list:
# Get control_id string
control = db.query(ControlDB).filter(ControlDB.id == e.control_id).first()
ctrl_id = control.control_id if control else "unknown"
stats = control_stats[ctrl_id]
stats["total"] += 1
if e.status:
if e.status.value == "valid":
stats["valid"] += 1
elif e.status.value == "failed":
stats["failed"] += 1
if not stats["last_collected"] or e.collected_at > stats["last_collected"]:
stats["last_collected"] = e.collected_at
# Add evidence summary
stats["evidence"].append({
"id": e.id,
"type": e.evidence_type,
"status": e.status.value if e.status else None,
"collected_at": e.collected_at.isoformat() if e.collected_at else None,
"ci_job_id": e.ci_job_id,
})
# Convert to list and sort
result = []
for ctrl_id, stats in control_stats.items():
result.append({
"control_id": ctrl_id,
"total_evidence": stats["total"],
"valid_count": stats["valid"],
"failed_count": stats["failed"],
"last_collected": stats["last_collected"].isoformat() if stats["last_collected"] else None,
"recent_evidence": stats["evidence"][:5],
})
result.sort(key=lambda x: x["last_collected"] or "", reverse=True)
return {
"period_days": days,
"total_evidence": len(evidence_list),
"controls": result,
}