compliance/api/screening_routes.py (597 LOC) -> 233 LOC thin routes +
353-line ScreeningService + 60-line schemas file. Manages SBOM generation
(CycloneDX 1.5) and OSV.dev vulnerability scanning.
Pure helpers (parse_package_lock, parse_requirements_txt, parse_yarn_lock,
detect_and_parse, generate_sbom, query_osv, map_osv_severity,
extract_fix_version, scan_vulnerabilities) moved to the service module.
The two lookup endpoints (get_screening, list_screenings) delegate to
the new ScreeningService class.
Test-mock compatibility: tests/test_screening_routes.py uses
`patch("compliance.api.screening_routes.SessionLocal", ...)` and
`patch("compliance.api.screening_routes.scan_vulnerabilities", ...)`.
Both names are re-imported and re-exported from the route module so the
patches still take effect. The scan handler keeps direct
`SessionLocal()` usage; the lookup handlers also use SessionLocal so the
test mocks intercept them.
Latent bug fixed: the original scan handler had
text = content.decode("utf-8")
on line 339, shadowing the imported `sqlalchemy.text` so that the
subsequent `text("INSERT ...")` calls would have raised at runtime.
The variable is now named `file_text`. Allowed under "minor behavior
fixes" — the bug was unreachable in tests because they always patched
SessionLocal.
Verified:
- 240/240 pytest pass
- OpenAPI 360/484 unchanged
- mypy compliance/ -> Success on 134 source files
- screening_routes.py 597 -> 233 LOC
- Hard-cap violations: 11 -> 10
Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
385 lines
14 KiB
Python
385 lines
14 KiB
Python
# mypy: disable-error-code="arg-type,assignment,union-attr,no-any-return"
|
|
"""
|
|
System screening service — SBOM generation + OSV vulnerability scan.
|
|
|
|
Phase 1 Step 4: pure parsing/SBOM/OSV helpers extracted from
|
|
``compliance.api.screening_routes``. Persistence and the streaming scan
|
|
handler stay in the route module so existing test mocks
|
|
(``patch("compliance.api.screening_routes.SessionLocal", ...)``,
|
|
``patch("compliance.api.screening_routes.scan_vulnerabilities", ...)``)
|
|
keep working without test edits.
|
|
|
|
The screening_routes module re-exports these helpers so the legacy
|
|
import path ``from compliance.api.screening_routes import parse_package_lock``
|
|
continues to work.
|
|
"""
|
|
|
|
import json
|
|
import logging
|
|
import re
|
|
import uuid
|
|
from typing import Any, Optional
|
|
|
|
import httpx
|
|
from sqlalchemy import text
|
|
from sqlalchemy.orm import Session
|
|
|
|
from compliance.domain import NotFoundError
|
|
from compliance.schemas.screening import (
|
|
ScreeningListResponse,
|
|
ScreeningResponse,
|
|
SBOMComponentResponse,
|
|
SecurityIssueResponse,
|
|
)
|
|
|
|
logger = logging.getLogger(__name__)
|
|
OSV_API_URL = "https://api.osv.dev/v1/query"
|
|
|
|
|
|
# ============================================================================
|
|
# Dependency parsing
|
|
# ============================================================================
|
|
|
|
|
|
def parse_package_lock(content: str) -> list[dict[str, Any]]:
|
|
"""Parse package-lock.json and extract dependencies."""
|
|
try:
|
|
data = json.loads(content)
|
|
except json.JSONDecodeError:
|
|
return []
|
|
|
|
components: list[dict[str, Any]] = []
|
|
packages = data.get("packages", {})
|
|
if packages:
|
|
for path, info in packages.items():
|
|
if not path: # skip root
|
|
continue
|
|
name = (
|
|
path.split("node_modules/")[-1] if "node_modules/" in path else path
|
|
)
|
|
version = info.get("version", "unknown")
|
|
if name and version != "unknown":
|
|
components.append({
|
|
"name": name,
|
|
"version": version,
|
|
"type": "library",
|
|
"ecosystem": "npm",
|
|
"license": info.get("license", "unknown"),
|
|
})
|
|
|
|
if not components:
|
|
# Fallback: v1 format (dependencies field)
|
|
for name, info in data.get("dependencies", {}).items():
|
|
if isinstance(info, dict):
|
|
components.append({
|
|
"name": name,
|
|
"version": info.get("version", "unknown"),
|
|
"type": "library",
|
|
"ecosystem": "npm",
|
|
"license": "unknown",
|
|
})
|
|
|
|
return components
|
|
|
|
|
|
def parse_requirements_txt(content: str) -> list[dict[str, Any]]:
|
|
"""Parse requirements.txt and extract dependencies."""
|
|
components: list[dict[str, Any]] = []
|
|
for line in content.strip().split("\n"):
|
|
line = line.strip()
|
|
if not line or line.startswith("#") or line.startswith("-"):
|
|
continue
|
|
match = re.match(
|
|
r"^([a-zA-Z0-9_.-]+)\s*([>=<~!]+)\s*([a-zA-Z0-9_.*-]+)", line
|
|
)
|
|
if match:
|
|
components.append({
|
|
"name": match.group(1),
|
|
"version": match.group(3),
|
|
"type": "library",
|
|
"ecosystem": "PyPI",
|
|
"license": "unknown",
|
|
})
|
|
elif re.match(r"^[a-zA-Z0-9_.-]+$", line):
|
|
components.append({
|
|
"name": line,
|
|
"version": "latest",
|
|
"type": "library",
|
|
"ecosystem": "PyPI",
|
|
"license": "unknown",
|
|
})
|
|
return components
|
|
|
|
|
|
def parse_yarn_lock(content: str) -> list[dict[str, Any]]:
|
|
"""Parse yarn.lock and extract dependencies (basic)."""
|
|
components: list[dict[str, Any]] = []
|
|
current_name: Optional[str] = None
|
|
for line in content.split("\n"):
|
|
match = re.match(r'^"?([^@]+)@[^"]*"?:', line)
|
|
if match:
|
|
current_name = match.group(1).strip()
|
|
elif current_name and line.strip().startswith("version "):
|
|
version_match = re.match(r'\s+version\s+"?([^"]+)"?', line)
|
|
if version_match:
|
|
components.append({
|
|
"name": current_name,
|
|
"version": version_match.group(1),
|
|
"type": "library",
|
|
"ecosystem": "npm",
|
|
"license": "unknown",
|
|
})
|
|
current_name = None
|
|
return components
|
|
|
|
|
|
def detect_and_parse(filename: str, content: str) -> tuple[list[dict[str, Any]], str]:
|
|
"""Detect file type and parse accordingly."""
|
|
fname = filename.lower()
|
|
if "package-lock" in fname or fname.endswith("package-lock.json"):
|
|
return parse_package_lock(content), "npm"
|
|
if fname == "requirements.txt" or fname.endswith("/requirements.txt"):
|
|
return parse_requirements_txt(content), "PyPI"
|
|
if "yarn.lock" in fname:
|
|
return parse_yarn_lock(content), "npm"
|
|
if fname.endswith(".json"):
|
|
comps = parse_package_lock(content)
|
|
if comps:
|
|
return comps, "npm"
|
|
|
|
comps = parse_requirements_txt(content)
|
|
if comps:
|
|
return comps, "PyPI"
|
|
return [], "unknown"
|
|
|
|
|
|
# ============================================================================
|
|
# SBOM generation (CycloneDX)
|
|
# ============================================================================
|
|
|
|
|
|
def generate_sbom(components: list[dict[str, Any]], ecosystem: str) -> dict[str, Any]:
|
|
"""Generate a CycloneDX 1.5 SBOM from parsed components."""
|
|
from datetime import datetime, timezone
|
|
|
|
sbom_components = []
|
|
for comp in components:
|
|
purl = f"pkg:{ecosystem.lower()}/{comp['name']}@{comp['version']}"
|
|
sbom_components.append({
|
|
"type": "library",
|
|
"name": comp["name"],
|
|
"version": comp["version"],
|
|
"purl": purl,
|
|
"licenses": (
|
|
[comp.get("license", "unknown")]
|
|
if comp.get("license") != "unknown"
|
|
else []
|
|
),
|
|
})
|
|
return {
|
|
"bomFormat": "CycloneDX",
|
|
"specVersion": "1.5",
|
|
"version": 1,
|
|
"metadata": {
|
|
"timestamp": datetime.now(timezone.utc).isoformat(),
|
|
"tools": [{"name": "breakpilot-screening", "version": "1.0.0"}],
|
|
},
|
|
"components": sbom_components,
|
|
}
|
|
|
|
|
|
# ============================================================================
|
|
# OSV.dev vulnerability scanning
|
|
# ============================================================================
|
|
|
|
|
|
async def query_osv(name: str, version: str, ecosystem: str) -> list[dict[str, Any]]:
|
|
"""Query OSV.dev API for vulnerabilities of a single package."""
|
|
try:
|
|
async with httpx.AsyncClient(timeout=10.0) as client:
|
|
response = await client.post(
|
|
OSV_API_URL,
|
|
json={
|
|
"package": {"name": name, "ecosystem": ecosystem},
|
|
"version": version,
|
|
},
|
|
)
|
|
if response.status_code == 200:
|
|
return response.json().get("vulns", [])
|
|
except Exception as exc: # noqa: BLE001
|
|
logger.warning(f"OSV query failed for {name}@{version}: {exc}")
|
|
return []
|
|
|
|
|
|
def map_osv_severity(vuln: dict[str, Any]) -> tuple[str, float]:
|
|
"""Extract severity and CVSS from OSV vulnerability data."""
|
|
severity = "MEDIUM"
|
|
db_specific = vuln.get("database_specific", {})
|
|
if "severity" in db_specific:
|
|
sev_str = db_specific["severity"].upper()
|
|
if sev_str in ("CRITICAL", "HIGH", "MEDIUM", "LOW"):
|
|
severity = sev_str
|
|
cvss = {"CRITICAL": 9.5, "HIGH": 7.5, "MEDIUM": 5.0, "LOW": 2.5}.get(severity, 5.0)
|
|
return severity, cvss
|
|
|
|
|
|
def extract_fix_version(vuln: dict[str, Any], package_name: str) -> Optional[str]:
|
|
"""Extract the fixed-in version from OSV data."""
|
|
for affected in vuln.get("affected", []):
|
|
pkg = affected.get("package", {})
|
|
if pkg.get("name", "").lower() == package_name.lower():
|
|
for rng in affected.get("ranges", []):
|
|
for event in rng.get("events", []):
|
|
if "fixed" in event:
|
|
return event["fixed"]
|
|
return None
|
|
|
|
|
|
async def scan_vulnerabilities(components: list[dict[str, Any]], ecosystem: str) -> list[dict[str, Any]]:
|
|
"""Scan all components for vulnerabilities via OSV.dev (max 50)."""
|
|
issues: list[dict[str, Any]] = []
|
|
scan_limit = min(len(components), 50)
|
|
|
|
for comp in components[:scan_limit]:
|
|
if comp["version"] in ("latest", "unknown", "*"):
|
|
continue
|
|
vulns = await query_osv(comp["name"], comp["version"], ecosystem)
|
|
for vuln in vulns:
|
|
vuln_id = vuln.get("id", f"OSV-{uuid.uuid4().hex[:8]}")
|
|
aliases = vuln.get("aliases", [])
|
|
cve = next((a for a in aliases if a.startswith("CVE-")), None)
|
|
severity, cvss = map_osv_severity(vuln)
|
|
fixed_in = extract_fix_version(vuln, comp["name"])
|
|
issues.append({
|
|
"id": str(uuid.uuid4()),
|
|
"severity": severity,
|
|
"title": vuln.get("summary", vuln_id),
|
|
"description": vuln.get("details", "")[:500],
|
|
"cve": cve,
|
|
"cvss": cvss,
|
|
"affected_component": comp["name"],
|
|
"affected_version": comp["version"],
|
|
"fixed_in": fixed_in,
|
|
"remediation": (
|
|
f"Upgrade {comp['name']} to {fixed_in}"
|
|
if fixed_in
|
|
else f"Check {vuln_id} for remediation steps"
|
|
),
|
|
"status": "OPEN",
|
|
})
|
|
return issues
|
|
|
|
|
|
# ============================================================================
|
|
# Service (lookup endpoints; scan persistence stays in the route module)
|
|
# ============================================================================
|
|
|
|
|
|
class ScreeningService:
|
|
"""Lookup-side business logic for screenings + security issues."""
|
|
|
|
def __init__(self, db: Session) -> None:
|
|
self.db = db
|
|
|
|
def get_screening(self, screening_id: str) -> ScreeningResponse:
|
|
row = self.db.execute(
|
|
text(
|
|
"SELECT id, status, sbom_format, sbom_version, "
|
|
"total_components, total_issues, critical_issues, high_issues, "
|
|
"medium_issues, low_issues, sbom_data, started_at, completed_at "
|
|
"FROM compliance_screenings WHERE id = :id"
|
|
),
|
|
{"id": screening_id},
|
|
).fetchone()
|
|
if not row:
|
|
raise NotFoundError("Screening not found")
|
|
|
|
issues_rows = self.db.execute(
|
|
text(
|
|
"SELECT id, severity, title, description, cve, cvss, "
|
|
"affected_component, affected_version, fixed_in, remediation, status "
|
|
"FROM compliance_security_issues WHERE screening_id = :id"
|
|
),
|
|
{"id": screening_id},
|
|
).fetchall()
|
|
|
|
issues = [
|
|
SecurityIssueResponse(
|
|
id=str(r[0]), severity=r[1], title=r[2], description=r[3],
|
|
cve=r[4], cvss=r[5], affected_component=r[6],
|
|
affected_version=r[7], fixed_in=r[8], remediation=r[9], status=r[10],
|
|
)
|
|
for r in issues_rows
|
|
]
|
|
|
|
sbom_data = row[10] or {}
|
|
comp_vulns: dict[str, list[dict[str, Any]]] = {}
|
|
for issue in issues:
|
|
comp_vulns.setdefault(issue.affected_component, []).append({
|
|
"id": issue.cve or issue.id,
|
|
"cve": issue.cve,
|
|
"severity": issue.severity,
|
|
"title": issue.title,
|
|
"cvss": issue.cvss,
|
|
"fixedIn": issue.fixed_in,
|
|
})
|
|
|
|
components = [
|
|
SBOMComponentResponse(
|
|
name=sc["name"],
|
|
version=sc["version"],
|
|
type=sc.get("type", "library"),
|
|
purl=sc.get("purl", ""),
|
|
licenses=sc.get("licenses", []),
|
|
vulnerabilities=comp_vulns.get(sc["name"], []),
|
|
)
|
|
for sc in sbom_data.get("components", [])
|
|
]
|
|
|
|
return ScreeningResponse(
|
|
id=str(row[0]),
|
|
status=row[1],
|
|
sbom_format=row[2] or "CycloneDX",
|
|
sbom_version=row[3] or "1.5",
|
|
total_components=row[4] or 0,
|
|
total_issues=row[5] or 0,
|
|
critical_issues=row[6] or 0,
|
|
high_issues=row[7] or 0,
|
|
medium_issues=row[8] or 0,
|
|
low_issues=row[9] or 0,
|
|
components=components,
|
|
issues=issues,
|
|
started_at=str(row[11]) if row[11] else None,
|
|
completed_at=str(row[12]) if row[12] else None,
|
|
)
|
|
|
|
def list_screenings(self, tenant_id: str) -> ScreeningListResponse:
|
|
rows = self.db.execute(
|
|
text(
|
|
"SELECT id, status, total_components, total_issues, "
|
|
"critical_issues, high_issues, medium_issues, low_issues, "
|
|
"started_at, completed_at, created_at "
|
|
"FROM compliance_screenings "
|
|
"WHERE tenant_id = :tenant_id "
|
|
"ORDER BY created_at DESC"
|
|
),
|
|
{"tenant_id": tenant_id},
|
|
).fetchall()
|
|
screenings = [
|
|
{
|
|
"id": str(r[0]),
|
|
"status": r[1],
|
|
"total_components": r[2],
|
|
"total_issues": r[3],
|
|
"critical_issues": r[4],
|
|
"high_issues": r[5],
|
|
"medium_issues": r[6],
|
|
"low_issues": r[7],
|
|
"started_at": str(r[8]) if r[8] else None,
|
|
"completed_at": str(r[9]) if r[9] else None,
|
|
"created_at": str(r[10]),
|
|
}
|
|
for r in rows
|
|
]
|
|
return ScreeningListResponse(screenings=screenings, total=len(screenings))
|