fix(quality): Ruff/CVE/TS-Fixes, 104 neue Tests, Complexity-Refactoring
Some checks failed
CI / go-lint (push) Has been skipped
CI / python-lint (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / test-go-ai-compliance (push) Failing after 30s
CI / test-python-backend-compliance (push) Successful in 30s
CI / test-python-document-crawler (push) Successful in 21s
CI / test-python-dsms-gateway (push) Successful in 17s

- Ruff: 144 auto-fixes (unused imports, == None → is None), F821/F811/F841 manuell
- CVEs: python-multipart>=0.0.22, weasyprint>=68.0, pillow>=12.1.1, npm audit fix (0 vulns)
- TS: 5 tote Drafting-Engine-Dateien entfernt, allowed-facts/sanitizer/StepHeader/context fixes
- Tests: +104 (ISMS 58, Evidence 18, VVT 14, Generation 14) → 1449 passed
- Refactoring: collect_ci_evidence (F→A), row_to_response (E→A), extract_requirements (E→A)
- Dead Code: pca-platform, 7 Go-Handler, dsr_api.py, duplicate Schemas entfernt

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
Benjamin Admin
2026-03-07 19:00:33 +01:00
parent 6509e64dd9
commit 95fcba34cd
124 changed files with 2533 additions and 15709 deletions

View File

@@ -248,7 +248,231 @@ async def upload_evidence(
# ============================================================================
# CI/CD Evidence Collection
# CI/CD Evidence Collection — helpers
# ============================================================================
# Map CI source names to the corresponding control IDs
SOURCE_CONTROL_MAP = {
"sast": "SDLC-001",
"dependency_scan": "SDLC-002",
"secret_scan": "SDLC-003",
"code_review": "SDLC-004",
"sbom": "SDLC-005",
"container_scan": "SDLC-006",
"test_results": "AUD-001",
}
def _parse_ci_evidence(data: dict) -> dict:
"""
Parse and validate incoming CI evidence data.
Returns a dict with:
- report_json: str (serialised JSON)
- report_hash: str (SHA-256 hex digest)
- evidence_status: str ("valid" or "failed")
- findings_count: int
- critical_findings: int
"""
report_json = json.dumps(data) if data else "{}"
report_hash = hashlib.sha256(report_json.encode()).hexdigest()
findings_count = 0
critical_findings = 0
if data and isinstance(data, dict):
# Semgrep format
if "results" in data:
findings_count = len(data.get("results", []))
critical_findings = len([
r for r in data.get("results", [])
if r.get("extra", {}).get("severity", "").upper() in ["CRITICAL", "HIGH"]
])
# Trivy format
elif "Results" in data:
for result in data.get("Results", []):
vulns = result.get("Vulnerabilities", [])
findings_count += len(vulns)
critical_findings += len([
v for v in vulns
if v.get("Severity", "").upper() in ["CRITICAL", "HIGH"]
])
# Generic findings array
elif "findings" in data:
findings_count = len(data.get("findings", []))
# SBOM format - just count components
elif "components" in data:
findings_count = len(data.get("components", []))
evidence_status = "failed" if critical_findings > 0 else "valid"
return {
"report_json": report_json,
"report_hash": report_hash,
"evidence_status": evidence_status,
"findings_count": findings_count,
"critical_findings": critical_findings,
}
def _store_evidence(
db: Session,
*,
control_db_id: str,
source: str,
parsed: dict,
ci_job_id: str,
ci_job_url: str,
report_data: dict,
) -> EvidenceDB:
"""
Persist a CI evidence item to the database and write the report file.
Returns the created EvidenceDB instance (already committed).
"""
findings_count = parsed["findings_count"]
critical_findings = parsed["critical_findings"]
# Build title and description
title = f"{source.upper()} Report - {datetime.now().strftime('%Y-%m-%d %H:%M')}"
description = "Automatically collected from CI/CD pipeline"
if findings_count > 0:
description += f"\n- Total findings: {findings_count}"
if critical_findings > 0:
description += f"\n- Critical/High findings: {critical_findings}"
if ci_job_id:
description += f"\n- CI Job ID: {ci_job_id}"
if ci_job_url:
description += f"\n- CI Job URL: {ci_job_url}"
# Store report file
upload_dir = f"/tmp/compliance_evidence/ci/{source}"
os.makedirs(upload_dir, exist_ok=True)
file_name = f"{source}_{datetime.now().strftime('%Y%m%d_%H%M%S')}_{parsed['report_hash'][:8]}.json"
file_path = os.path.join(upload_dir, file_name)
with open(file_path, "w") as f:
json.dump(report_data or {}, f, indent=2)
# Create evidence record
evidence = EvidenceDB(
id=str(uuid_module.uuid4()),
control_id=control_db_id,
evidence_type=f"ci_{source}",
title=title,
description=description,
artifact_path=file_path,
artifact_hash=parsed["report_hash"],
file_size_bytes=len(parsed["report_json"]),
mime_type="application/json",
source="ci_pipeline",
ci_job_id=ci_job_id,
valid_from=datetime.utcnow(),
valid_until=datetime.utcnow() + timedelta(days=90),
status=EvidenceStatusEnum(parsed["evidence_status"]),
)
db.add(evidence)
db.commit()
db.refresh(evidence)
return evidence
def _extract_findings_detail(report_data: dict) -> dict:
"""
Extract severity-bucketed finding counts from report data.
Returns dict with keys: critical, high, medium, low.
"""
findings_detail = {
"critical": 0,
"high": 0,
"medium": 0,
"low": 0,
}
if not report_data:
return findings_detail
# Semgrep format
if "results" in report_data:
for r in report_data.get("results", []):
severity = r.get("extra", {}).get("severity", "").upper()
if severity == "CRITICAL":
findings_detail["critical"] += 1
elif severity == "HIGH":
findings_detail["high"] += 1
elif severity == "MEDIUM":
findings_detail["medium"] += 1
elif severity in ["LOW", "INFO"]:
findings_detail["low"] += 1
# Trivy format
elif "Results" in report_data:
for result in report_data.get("Results", []):
for v in result.get("Vulnerabilities", []):
severity = v.get("Severity", "").upper()
if severity == "CRITICAL":
findings_detail["critical"] += 1
elif severity == "HIGH":
findings_detail["high"] += 1
elif severity == "MEDIUM":
findings_detail["medium"] += 1
elif severity == "LOW":
findings_detail["low"] += 1
# Generic findings with severity
elif "findings" in report_data:
for f in report_data.get("findings", []):
severity = f.get("severity", "").upper()
if severity == "CRITICAL":
findings_detail["critical"] += 1
elif severity == "HIGH":
findings_detail["high"] += 1
elif severity == "MEDIUM":
findings_detail["medium"] += 1
else:
findings_detail["low"] += 1
return findings_detail
def _update_risks(db: Session, *, source: str, control_id: str, ci_job_id: str, report_data: dict):
"""
Update risk status based on new evidence.
Uses AutoRiskUpdater to update Control status and linked Risks based on
severity-bucketed findings. Returns the update result or None on error.
"""
findings_detail = _extract_findings_detail(report_data)
try:
auto_updater = AutoRiskUpdater(db)
risk_update_result = auto_updater.process_evidence_collect_request(
tool=source,
control_id=control_id,
evidence_type=f"ci_{source}",
timestamp=datetime.utcnow().isoformat(),
commit_sha=report_data.get("commit_sha", "unknown") if report_data else "unknown",
ci_job_id=ci_job_id,
findings=findings_detail,
)
logger.info(f"Auto-risk update completed for {control_id}: "
f"control_updated={risk_update_result.control_updated}, "
f"risks_affected={len(risk_update_result.risks_affected)}")
return risk_update_result
except Exception as e:
logger.error(f"Auto-risk update failed for {control_id}: {str(e)}")
return None
# ============================================================================
# CI/CD Evidence Collection — endpoint
# ============================================================================
@router.post("/evidence/collect")
@@ -274,17 +498,6 @@ async def collect_ci_evidence(
- secret_scan: Secret detection (Gitleaks, TruffleHog)
- code_review: Code review metrics
"""
# Map source to control_id
SOURCE_CONTROL_MAP = {
"sast": "SDLC-001",
"dependency_scan": "SDLC-002",
"secret_scan": "SDLC-003",
"code_review": "SDLC-004",
"sbom": "SDLC-005",
"container_scan": "SDLC-006",
"test_results": "AUD-001",
}
if source not in SOURCE_CONTROL_MAP:
raise HTTPException(
status_code=400,
@@ -302,173 +515,38 @@ async def collect_ci_evidence(
detail=f"Control {control_id} not found. Please seed the database first."
)
# Parse and validate report data
report_json = json.dumps(report_data) if report_data else "{}"
report_hash = hashlib.sha256(report_json.encode()).hexdigest()
# --- 1. Parse and validate report data ---
parsed = _parse_ci_evidence(report_data)
# Determine evidence status based on report content
evidence_status = "valid"
findings_count = 0
critical_findings = 0
if report_data:
# Try to extract findings from common report formats
if isinstance(report_data, dict):
# Semgrep format
if "results" in report_data:
findings_count = len(report_data.get("results", []))
critical_findings = len([
r for r in report_data.get("results", [])
if r.get("extra", {}).get("severity", "").upper() in ["CRITICAL", "HIGH"]
])
# Trivy format
elif "Results" in report_data:
for result in report_data.get("Results", []):
vulns = result.get("Vulnerabilities", [])
findings_count += len(vulns)
critical_findings += len([
v for v in vulns
if v.get("Severity", "").upper() in ["CRITICAL", "HIGH"]
])
# Generic findings array
elif "findings" in report_data:
findings_count = len(report_data.get("findings", []))
# SBOM format - just count components
elif "components" in report_data:
findings_count = len(report_data.get("components", []))
# If critical findings exist, mark as failed
if critical_findings > 0:
evidence_status = "failed"
# Create evidence title
title = f"{source.upper()} Report - {datetime.now().strftime('%Y-%m-%d %H:%M')}"
description = f"Automatically collected from CI/CD pipeline"
if findings_count > 0:
description += f"\n- Total findings: {findings_count}"
if critical_findings > 0:
description += f"\n- Critical/High findings: {critical_findings}"
if ci_job_id:
description += f"\n- CI Job ID: {ci_job_id}"
if ci_job_url:
description += f"\n- CI Job URL: {ci_job_url}"
# Store report file
upload_dir = f"/tmp/compliance_evidence/ci/{source}"
os.makedirs(upload_dir, exist_ok=True)
file_name = f"{source}_{datetime.now().strftime('%Y%m%d_%H%M%S')}_{report_hash[:8]}.json"
file_path = os.path.join(upload_dir, file_name)
with open(file_path, "w") as f:
json.dump(report_data or {}, f, indent=2)
# Create evidence record directly
evidence = EvidenceDB(
id=str(uuid_module.uuid4()),
control_id=control.id,
evidence_type=f"ci_{source}",
title=title,
description=description,
artifact_path=file_path,
artifact_hash=report_hash,
file_size_bytes=len(report_json),
mime_type="application/json",
source="ci_pipeline",
# --- 2. Store evidence in DB and write report file ---
evidence = _store_evidence(
db,
control_db_id=control.id,
source=source,
parsed=parsed,
ci_job_id=ci_job_id,
valid_from=datetime.utcnow(),
valid_until=datetime.utcnow() + timedelta(days=90),
status=EvidenceStatusEnum(evidence_status),
ci_job_url=ci_job_url,
report_data=report_data,
)
db.add(evidence)
db.commit()
db.refresh(evidence)
# =========================================================================
# AUTOMATIC RISK UPDATE
# Update Control status and linked Risks based on findings
# =========================================================================
risk_update_result = None
try:
# Extract detailed findings for risk assessment
findings_detail = {
"critical": 0,
"high": 0,
"medium": 0,
"low": 0,
}
if report_data:
# Semgrep format
if "results" in report_data:
for r in report_data.get("results", []):
severity = r.get("extra", {}).get("severity", "").upper()
if severity == "CRITICAL":
findings_detail["critical"] += 1
elif severity == "HIGH":
findings_detail["high"] += 1
elif severity == "MEDIUM":
findings_detail["medium"] += 1
elif severity in ["LOW", "INFO"]:
findings_detail["low"] += 1
# Trivy format
elif "Results" in report_data:
for result in report_data.get("Results", []):
for v in result.get("Vulnerabilities", []):
severity = v.get("Severity", "").upper()
if severity == "CRITICAL":
findings_detail["critical"] += 1
elif severity == "HIGH":
findings_detail["high"] += 1
elif severity == "MEDIUM":
findings_detail["medium"] += 1
elif severity == "LOW":
findings_detail["low"] += 1
# Generic findings with severity
elif "findings" in report_data:
for f in report_data.get("findings", []):
severity = f.get("severity", "").upper()
if severity == "CRITICAL":
findings_detail["critical"] += 1
elif severity == "HIGH":
findings_detail["high"] += 1
elif severity == "MEDIUM":
findings_detail["medium"] += 1
else:
findings_detail["low"] += 1
# Use AutoRiskUpdater to update Control status and Risks
auto_updater = AutoRiskUpdater(db)
risk_update_result = auto_updater.process_evidence_collect_request(
tool=source,
control_id=control_id,
evidence_type=f"ci_{source}",
timestamp=datetime.utcnow().isoformat(),
commit_sha=report_data.get("commit_sha", "unknown") if report_data else "unknown",
ci_job_id=ci_job_id,
findings=findings_detail,
)
logger.info(f"Auto-risk update completed for {control_id}: "
f"control_updated={risk_update_result.control_updated}, "
f"risks_affected={len(risk_update_result.risks_affected)}")
except Exception as e:
logger.error(f"Auto-risk update failed for {control_id}: {str(e)}")
# --- 3. Automatic risk update ---
risk_update_result = _update_risks(
db,
source=source,
control_id=control_id,
ci_job_id=ci_job_id,
report_data=report_data,
)
return {
"success": True,
"evidence_id": evidence.id,
"control_id": control_id,
"source": source,
"status": evidence_status,
"findings_count": findings_count,
"critical_findings": critical_findings,
"artifact_path": file_path,
"status": parsed["evidence_status"],
"findings_count": parsed["findings_count"],
"critical_findings": parsed["critical_findings"],
"artifact_path": evidence.artifact_path,
"message": f"Evidence collected successfully for control {control_id}",
"auto_risk_update": {
"enabled": True,