fix(quality): Ruff/CVE/TS-Fixes, 104 neue Tests, Complexity-Refactoring
Some checks failed
CI / go-lint (push) Has been skipped
CI / python-lint (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / test-go-ai-compliance (push) Failing after 30s
CI / test-python-backend-compliance (push) Successful in 30s
CI / test-python-document-crawler (push) Successful in 21s
CI / test-python-dsms-gateway (push) Successful in 17s
Some checks failed
CI / go-lint (push) Has been skipped
CI / python-lint (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / test-go-ai-compliance (push) Failing after 30s
CI / test-python-backend-compliance (push) Successful in 30s
CI / test-python-document-crawler (push) Successful in 21s
CI / test-python-dsms-gateway (push) Successful in 17s
- Ruff: 144 auto-fixes (unused imports, == None → is None), F821/F811/F841 manuell - CVEs: python-multipart>=0.0.22, weasyprint>=68.0, pillow>=12.1.1, npm audit fix (0 vulns) - TS: 5 tote Drafting-Engine-Dateien entfernt, allowed-facts/sanitizer/StepHeader/context fixes - Tests: +104 (ISMS 58, Evidence 18, VVT 14, Generation 14) → 1449 passed - Refactoring: collect_ci_evidence (F→A), row_to_response (E→A), extract_requirements (E→A) - Dead Code: pca-platform, 7 Go-Handler, dsr_api.py, duplicate Schemas entfernt Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -36,7 +36,6 @@ async def list_ai_systems(
|
||||
db: Session = Depends(get_db),
|
||||
):
|
||||
"""List all registered AI systems."""
|
||||
import uuid as _uuid
|
||||
query = db.query(AISystemDB)
|
||||
|
||||
if classification:
|
||||
@@ -88,7 +87,6 @@ async def create_ai_system(
|
||||
):
|
||||
"""Register a new AI system."""
|
||||
import uuid as _uuid
|
||||
from datetime import datetime
|
||||
|
||||
try:
|
||||
cls_enum = AIClassificationEnum(data.classification) if data.classification else AIClassificationEnum.UNCLASSIFIED
|
||||
|
||||
@@ -26,7 +26,7 @@ from ..db.models import (
|
||||
)
|
||||
from .schemas import (
|
||||
CreateAuditSessionRequest, AuditSessionResponse, AuditSessionSummary, AuditSessionDetailResponse,
|
||||
AuditSessionListResponse, SignOffRequest, SignOffResponse,
|
||||
SignOffRequest, SignOffResponse,
|
||||
AuditChecklistItem, AuditChecklistResponse, AuditStatistics,
|
||||
PaginationMeta,
|
||||
)
|
||||
@@ -164,7 +164,7 @@ async def get_audit_session(
|
||||
completion_percentage=session.completion_percentage,
|
||||
)
|
||||
|
||||
return AuditSessionDetail(
|
||||
return AuditSessionDetailResponse(
|
||||
id=session.id,
|
||||
name=session.name,
|
||||
description=session.description,
|
||||
|
||||
@@ -12,7 +12,6 @@ from typing import Optional, List
|
||||
from fastapi import APIRouter, Depends, HTTPException, Query, Header
|
||||
from pydantic import BaseModel
|
||||
from sqlalchemy.orm import Session
|
||||
from sqlalchemy import func
|
||||
|
||||
from classroom_engine.database import get_db
|
||||
from ..db.banner_models import (
|
||||
@@ -317,12 +316,12 @@ async def get_site_config(
|
||||
|
||||
categories = db.query(BannerCategoryConfigDB).filter(
|
||||
BannerCategoryConfigDB.site_config_id == config.id,
|
||||
BannerCategoryConfigDB.is_active == True,
|
||||
BannerCategoryConfigDB.is_active,
|
||||
).order_by(BannerCategoryConfigDB.sort_order).all()
|
||||
|
||||
vendors = db.query(BannerVendorConfigDB).filter(
|
||||
BannerVendorConfigDB.site_config_id == config.id,
|
||||
BannerVendorConfigDB.is_active == True,
|
||||
BannerVendorConfigDB.is_active,
|
||||
).all()
|
||||
|
||||
result = _site_config_to_dict(config)
|
||||
|
||||
@@ -96,8 +96,8 @@ def generate_change_requests_for_use_case(
|
||||
trigger_type="use_case_high_risk",
|
||||
target_document_type="dsfa",
|
||||
proposal_title=f"DSFA erstellen für '{title}' (Risiko: {risk_level})",
|
||||
proposal_body=f"Ein neuer Use Case mit hohem Risiko wurde erstellt. "
|
||||
f"Art. 35 DSGVO verlangt eine DSFA für Hochrisiko-Verarbeitungen.",
|
||||
proposal_body="Ein neuer Use Case mit hohem Risiko wurde erstellt. "
|
||||
"Art. 35 DSGVO verlangt eine DSFA für Hochrisiko-Verarbeitungen.",
|
||||
proposed_changes={
|
||||
"source": "use_case",
|
||||
"title": title,
|
||||
|
||||
@@ -14,8 +14,7 @@ Endpoints:
|
||||
|
||||
import json
|
||||
import logging
|
||||
from datetime import datetime
|
||||
from typing import Optional, List
|
||||
from typing import Optional
|
||||
|
||||
from fastapi import APIRouter, Depends, HTTPException, Query, Header
|
||||
from pydantic import BaseModel
|
||||
|
||||
@@ -11,7 +11,6 @@ Endpoints:
|
||||
|
||||
import json
|
||||
import logging
|
||||
import uuid
|
||||
from typing import Optional
|
||||
|
||||
from fastapi import APIRouter, HTTPException, Header
|
||||
@@ -127,16 +126,68 @@ class AuditListResponse(BaseModel):
|
||||
# SQL column lists — keep in sync with SELECT/INSERT
|
||||
# =============================================================================
|
||||
|
||||
_BASE_COLUMNS = """id, tenant_id, company_name, legal_form, industry, founded_year,
|
||||
business_model, offerings, company_size, employee_count, annual_revenue,
|
||||
headquarters_country, headquarters_city, has_international_locations,
|
||||
international_countries, target_markets, primary_jurisdiction,
|
||||
is_data_controller, is_data_processor, uses_ai, ai_use_cases,
|
||||
dpo_name, dpo_email, legal_contact_name, legal_contact_email,
|
||||
machine_builder, is_complete, completed_at, created_at, updated_at,
|
||||
repos, document_sources, processing_systems, ai_systems, technical_contacts,
|
||||
subject_to_nis2, subject_to_ai_act, subject_to_iso27001,
|
||||
supervisory_authority, review_cycle_months"""
|
||||
_BASE_COLUMNS_LIST = [
|
||||
"id", "tenant_id", "company_name", "legal_form", "industry", "founded_year",
|
||||
"business_model", "offerings", "company_size", "employee_count", "annual_revenue",
|
||||
"headquarters_country", "headquarters_city", "has_international_locations",
|
||||
"international_countries", "target_markets", "primary_jurisdiction",
|
||||
"is_data_controller", "is_data_processor", "uses_ai", "ai_use_cases",
|
||||
"dpo_name", "dpo_email", "legal_contact_name", "legal_contact_email",
|
||||
"machine_builder", "is_complete", "completed_at", "created_at", "updated_at",
|
||||
"repos", "document_sources", "processing_systems", "ai_systems", "technical_contacts",
|
||||
"subject_to_nis2", "subject_to_ai_act", "subject_to_iso27001",
|
||||
"supervisory_authority", "review_cycle_months",
|
||||
]
|
||||
|
||||
_BASE_COLUMNS = ", ".join(_BASE_COLUMNS_LIST)
|
||||
|
||||
# Per-field defaults and type coercions for row_to_response.
|
||||
# Each entry is (field_name, default_value, expected_type_or_None).
|
||||
# - expected_type: if set, the value is checked with isinstance; if it fails,
|
||||
# default_value is used instead.
|
||||
# - Special sentinels: "STR" means str(value), "STR_OR_NONE" means str(v) if v else None.
|
||||
_FIELD_DEFAULTS = {
|
||||
"id": (None, "STR"),
|
||||
"tenant_id": (None, None),
|
||||
"company_name": ("", None),
|
||||
"legal_form": ("GmbH", None),
|
||||
"industry": ("", None),
|
||||
"founded_year": (None, None),
|
||||
"business_model": ("B2B", None),
|
||||
"offerings": ([], list),
|
||||
"company_size": ("small", None),
|
||||
"employee_count": ("1-9", None),
|
||||
"annual_revenue": ("< 2 Mio", None),
|
||||
"headquarters_country": ("DE", None),
|
||||
"headquarters_city": ("", None),
|
||||
"has_international_locations": (False, None),
|
||||
"international_countries": ([], list),
|
||||
"target_markets": (["DE"], list),
|
||||
"primary_jurisdiction": ("DE", None),
|
||||
"is_data_controller": (True, None),
|
||||
"is_data_processor": (False, None),
|
||||
"uses_ai": (False, None),
|
||||
"ai_use_cases": ([], list),
|
||||
"dpo_name": (None, None),
|
||||
"dpo_email": (None, None),
|
||||
"legal_contact_name": (None, None),
|
||||
"legal_contact_email": (None, None),
|
||||
"machine_builder": (None, dict),
|
||||
"is_complete": (False, None),
|
||||
"completed_at": (None, "STR_OR_NONE"),
|
||||
"created_at": (None, "STR"),
|
||||
"updated_at": (None, "STR"),
|
||||
"repos": ([], list),
|
||||
"document_sources": ([], list),
|
||||
"processing_systems": ([], list),
|
||||
"ai_systems": ([], list),
|
||||
"technical_contacts": ([], list),
|
||||
"subject_to_nis2": (False, None),
|
||||
"subject_to_ai_act": (False, None),
|
||||
"subject_to_iso27001": (False, None),
|
||||
"supervisory_authority": (None, None),
|
||||
"review_cycle_months": (12, None),
|
||||
}
|
||||
|
||||
|
||||
# =============================================================================
|
||||
@@ -144,50 +195,29 @@ _BASE_COLUMNS = """id, tenant_id, company_name, legal_form, industry, founded_ye
|
||||
# =============================================================================
|
||||
|
||||
def row_to_response(row) -> CompanyProfileResponse:
|
||||
"""Convert a DB row to response model."""
|
||||
return CompanyProfileResponse(
|
||||
id=str(row[0]),
|
||||
tenant_id=row[1],
|
||||
company_name=row[2] or "",
|
||||
legal_form=row[3] or "GmbH",
|
||||
industry=row[4] or "",
|
||||
founded_year=row[5],
|
||||
business_model=row[6] or "B2B",
|
||||
offerings=row[7] if isinstance(row[7], list) else [],
|
||||
company_size=row[8] or "small",
|
||||
employee_count=row[9] or "1-9",
|
||||
annual_revenue=row[10] or "< 2 Mio",
|
||||
headquarters_country=row[11] or "DE",
|
||||
headquarters_city=row[12] or "",
|
||||
has_international_locations=row[13] or False,
|
||||
international_countries=row[14] if isinstance(row[14], list) else [],
|
||||
target_markets=row[15] if isinstance(row[15], list) else ["DE"],
|
||||
primary_jurisdiction=row[16] or "DE",
|
||||
is_data_controller=row[17] if row[17] is not None else True,
|
||||
is_data_processor=row[18] or False,
|
||||
uses_ai=row[19] or False,
|
||||
ai_use_cases=row[20] if isinstance(row[20], list) else [],
|
||||
dpo_name=row[21],
|
||||
dpo_email=row[22],
|
||||
legal_contact_name=row[23],
|
||||
legal_contact_email=row[24],
|
||||
machine_builder=row[25] if isinstance(row[25], dict) else None,
|
||||
is_complete=row[26] or False,
|
||||
completed_at=str(row[27]) if row[27] else None,
|
||||
created_at=str(row[28]),
|
||||
updated_at=str(row[29]),
|
||||
# Phase 2 fields (indices 30-39)
|
||||
repos=row[30] if isinstance(row[30], list) else [],
|
||||
document_sources=row[31] if isinstance(row[31], list) else [],
|
||||
processing_systems=row[32] if isinstance(row[32], list) else [],
|
||||
ai_systems=row[33] if isinstance(row[33], list) else [],
|
||||
technical_contacts=row[34] if isinstance(row[34], list) else [],
|
||||
subject_to_nis2=row[35] or False,
|
||||
subject_to_ai_act=row[36] or False,
|
||||
subject_to_iso27001=row[37] or False,
|
||||
supervisory_authority=row[38],
|
||||
review_cycle_months=row[39] or 12,
|
||||
)
|
||||
"""Convert a DB row to response model using zip-based column mapping."""
|
||||
raw = dict(zip(_BASE_COLUMNS_LIST, row))
|
||||
coerced: dict = {}
|
||||
|
||||
for col in _BASE_COLUMNS_LIST:
|
||||
default, expected_type = _FIELD_DEFAULTS[col]
|
||||
value = raw[col]
|
||||
|
||||
if expected_type == "STR":
|
||||
coerced[col] = str(value)
|
||||
elif expected_type == "STR_OR_NONE":
|
||||
coerced[col] = str(value) if value else None
|
||||
elif expected_type is not None:
|
||||
# Type-checked field (list / dict): use value only if it matches
|
||||
coerced[col] = value if isinstance(value, expected_type) else default
|
||||
else:
|
||||
# is_data_controller needs special None-check (True when NULL)
|
||||
if col == "is_data_controller":
|
||||
coerced[col] = value if value is not None else default
|
||||
else:
|
||||
coerced[col] = value or default if default is not None else value
|
||||
|
||||
return CompanyProfileResponse(**coerced)
|
||||
|
||||
|
||||
def log_audit(db, tenant_id: str, action: str, changed_fields: Optional[dict], changed_by: Optional[str]):
|
||||
|
||||
@@ -12,7 +12,7 @@ Endpoints:
|
||||
|
||||
import logging
|
||||
from datetime import datetime
|
||||
from typing import Optional, List
|
||||
from typing import Optional
|
||||
|
||||
from fastapi import APIRouter, Depends, HTTPException, Header
|
||||
from pydantic import BaseModel
|
||||
|
||||
@@ -21,7 +21,7 @@ Usage:
|
||||
|
||||
import logging
|
||||
from datetime import datetime
|
||||
from typing import Any, Dict, List, Optional, Callable
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
from fastapi import APIRouter, Depends, HTTPException, Query
|
||||
from sqlalchemy import text
|
||||
|
||||
@@ -42,7 +42,7 @@ def generate_loeschfristen_drafts(ctx: dict) -> list[dict]:
|
||||
"responsible": ctx.get("dpo_name", "DSB"),
|
||||
"status": "draft",
|
||||
"review_cycle_months": ctx.get("review_cycle_months", 12),
|
||||
"notes": f"Automatisch generiert aus Stammdaten. Bitte prüfen und anpassen.",
|
||||
"notes": "Automatisch generiert aus Stammdaten. Bitte prüfen und anpassen.",
|
||||
}
|
||||
policies.append(policy)
|
||||
|
||||
|
||||
@@ -51,7 +51,6 @@ def generate_tom_drafts(ctx: dict) -> list[dict]:
|
||||
measures.extend(_AI_ACT_TOMS)
|
||||
|
||||
# Enrich with metadata
|
||||
company = ctx.get("company_name", "")
|
||||
result = []
|
||||
for i, m in enumerate(measures, 1):
|
||||
result.append({
|
||||
|
||||
@@ -33,7 +33,6 @@ from classroom_engine.database import get_db
|
||||
logger = logging.getLogger(__name__)
|
||||
router = APIRouter(prefix="/dsfa", tags=["compliance-dsfa"])
|
||||
|
||||
from .tenant_utils import get_tenant_id as _shared_get_tenant_id
|
||||
|
||||
# Legacy compat — still used by _get_tenant_id() below; will be removed once
|
||||
# all call-sites switch to Depends(get_tenant_id).
|
||||
@@ -855,7 +854,7 @@ async def approve_dsfa(
|
||||
|
||||
if request.approved:
|
||||
new_status = "approved"
|
||||
row = db.execute(
|
||||
db.execute(
|
||||
text("""
|
||||
UPDATE compliance_dsfas
|
||||
SET status = 'approved', approved_by = :approved_by, approved_at = NOW(), updated_at = NOW()
|
||||
@@ -866,7 +865,7 @@ async def approve_dsfa(
|
||||
).fetchone()
|
||||
else:
|
||||
new_status = "needs-update"
|
||||
row = db.execute(
|
||||
db.execute(
|
||||
text("""
|
||||
UPDATE compliance_dsfas
|
||||
SET status = 'needs-update', updated_at = NOW()
|
||||
|
||||
@@ -14,7 +14,7 @@ from fastapi import APIRouter, Depends, HTTPException, Query, Header
|
||||
from fastapi.responses import StreamingResponse
|
||||
from pydantic import BaseModel
|
||||
from sqlalchemy.orm import Session
|
||||
from sqlalchemy import text, func, and_, or_, cast, String
|
||||
from sqlalchemy import text, func, and_, or_
|
||||
|
||||
from classroom_engine.database import get_db
|
||||
from ..db.dsr_models import (
|
||||
@@ -574,7 +574,7 @@ async def get_published_templates(
|
||||
"""Gibt publizierte Vorlagen zurueck."""
|
||||
query = db.query(DSRTemplateDB).filter(
|
||||
DSRTemplateDB.tenant_id == uuid.UUID(tenant_id),
|
||||
DSRTemplateDB.is_active == True,
|
||||
DSRTemplateDB.is_active,
|
||||
DSRTemplateDB.language == language,
|
||||
)
|
||||
if request_type:
|
||||
|
||||
@@ -6,14 +6,12 @@ Inklusive Versionierung, Approval-Workflow, Vorschau und Send-Logging.
|
||||
"""
|
||||
|
||||
import uuid
|
||||
import re
|
||||
from datetime import datetime
|
||||
from typing import Optional, List, Dict, Any
|
||||
from typing import Optional, Dict
|
||||
|
||||
from fastapi import APIRouter, Depends, HTTPException, Query, Header
|
||||
from pydantic import BaseModel
|
||||
from sqlalchemy.orm import Session
|
||||
from sqlalchemy import func
|
||||
|
||||
from classroom_engine.database import get_db
|
||||
from ..db.email_template_models import (
|
||||
@@ -182,7 +180,7 @@ async def get_stats(
|
||||
base = db.query(EmailTemplateDB).filter(EmailTemplateDB.tenant_id == tid)
|
||||
|
||||
total = base.count()
|
||||
active = base.filter(EmailTemplateDB.is_active == True).count()
|
||||
active = base.filter(EmailTemplateDB.is_active).count()
|
||||
|
||||
# Count templates with published versions
|
||||
published_count = 0
|
||||
|
||||
@@ -248,7 +248,231 @@ async def upload_evidence(
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# CI/CD Evidence Collection
|
||||
# CI/CD Evidence Collection — helpers
|
||||
# ============================================================================
|
||||
|
||||
# Map CI source names to the corresponding control IDs
|
||||
SOURCE_CONTROL_MAP = {
|
||||
"sast": "SDLC-001",
|
||||
"dependency_scan": "SDLC-002",
|
||||
"secret_scan": "SDLC-003",
|
||||
"code_review": "SDLC-004",
|
||||
"sbom": "SDLC-005",
|
||||
"container_scan": "SDLC-006",
|
||||
"test_results": "AUD-001",
|
||||
}
|
||||
|
||||
|
||||
def _parse_ci_evidence(data: dict) -> dict:
|
||||
"""
|
||||
Parse and validate incoming CI evidence data.
|
||||
|
||||
Returns a dict with:
|
||||
- report_json: str (serialised JSON)
|
||||
- report_hash: str (SHA-256 hex digest)
|
||||
- evidence_status: str ("valid" or "failed")
|
||||
- findings_count: int
|
||||
- critical_findings: int
|
||||
"""
|
||||
report_json = json.dumps(data) if data else "{}"
|
||||
report_hash = hashlib.sha256(report_json.encode()).hexdigest()
|
||||
|
||||
findings_count = 0
|
||||
critical_findings = 0
|
||||
|
||||
if data and isinstance(data, dict):
|
||||
# Semgrep format
|
||||
if "results" in data:
|
||||
findings_count = len(data.get("results", []))
|
||||
critical_findings = len([
|
||||
r for r in data.get("results", [])
|
||||
if r.get("extra", {}).get("severity", "").upper() in ["CRITICAL", "HIGH"]
|
||||
])
|
||||
|
||||
# Trivy format
|
||||
elif "Results" in data:
|
||||
for result in data.get("Results", []):
|
||||
vulns = result.get("Vulnerabilities", [])
|
||||
findings_count += len(vulns)
|
||||
critical_findings += len([
|
||||
v for v in vulns
|
||||
if v.get("Severity", "").upper() in ["CRITICAL", "HIGH"]
|
||||
])
|
||||
|
||||
# Generic findings array
|
||||
elif "findings" in data:
|
||||
findings_count = len(data.get("findings", []))
|
||||
|
||||
# SBOM format - just count components
|
||||
elif "components" in data:
|
||||
findings_count = len(data.get("components", []))
|
||||
|
||||
evidence_status = "failed" if critical_findings > 0 else "valid"
|
||||
|
||||
return {
|
||||
"report_json": report_json,
|
||||
"report_hash": report_hash,
|
||||
"evidence_status": evidence_status,
|
||||
"findings_count": findings_count,
|
||||
"critical_findings": critical_findings,
|
||||
}
|
||||
|
||||
|
||||
def _store_evidence(
|
||||
db: Session,
|
||||
*,
|
||||
control_db_id: str,
|
||||
source: str,
|
||||
parsed: dict,
|
||||
ci_job_id: str,
|
||||
ci_job_url: str,
|
||||
report_data: dict,
|
||||
) -> EvidenceDB:
|
||||
"""
|
||||
Persist a CI evidence item to the database and write the report file.
|
||||
|
||||
Returns the created EvidenceDB instance (already committed).
|
||||
"""
|
||||
findings_count = parsed["findings_count"]
|
||||
critical_findings = parsed["critical_findings"]
|
||||
|
||||
# Build title and description
|
||||
title = f"{source.upper()} Report - {datetime.now().strftime('%Y-%m-%d %H:%M')}"
|
||||
description = "Automatically collected from CI/CD pipeline"
|
||||
if findings_count > 0:
|
||||
description += f"\n- Total findings: {findings_count}"
|
||||
if critical_findings > 0:
|
||||
description += f"\n- Critical/High findings: {critical_findings}"
|
||||
if ci_job_id:
|
||||
description += f"\n- CI Job ID: {ci_job_id}"
|
||||
if ci_job_url:
|
||||
description += f"\n- CI Job URL: {ci_job_url}"
|
||||
|
||||
# Store report file
|
||||
upload_dir = f"/tmp/compliance_evidence/ci/{source}"
|
||||
os.makedirs(upload_dir, exist_ok=True)
|
||||
file_name = f"{source}_{datetime.now().strftime('%Y%m%d_%H%M%S')}_{parsed['report_hash'][:8]}.json"
|
||||
file_path = os.path.join(upload_dir, file_name)
|
||||
|
||||
with open(file_path, "w") as f:
|
||||
json.dump(report_data or {}, f, indent=2)
|
||||
|
||||
# Create evidence record
|
||||
evidence = EvidenceDB(
|
||||
id=str(uuid_module.uuid4()),
|
||||
control_id=control_db_id,
|
||||
evidence_type=f"ci_{source}",
|
||||
title=title,
|
||||
description=description,
|
||||
artifact_path=file_path,
|
||||
artifact_hash=parsed["report_hash"],
|
||||
file_size_bytes=len(parsed["report_json"]),
|
||||
mime_type="application/json",
|
||||
source="ci_pipeline",
|
||||
ci_job_id=ci_job_id,
|
||||
valid_from=datetime.utcnow(),
|
||||
valid_until=datetime.utcnow() + timedelta(days=90),
|
||||
status=EvidenceStatusEnum(parsed["evidence_status"]),
|
||||
)
|
||||
db.add(evidence)
|
||||
db.commit()
|
||||
db.refresh(evidence)
|
||||
|
||||
return evidence
|
||||
|
||||
|
||||
def _extract_findings_detail(report_data: dict) -> dict:
|
||||
"""
|
||||
Extract severity-bucketed finding counts from report data.
|
||||
|
||||
Returns dict with keys: critical, high, medium, low.
|
||||
"""
|
||||
findings_detail = {
|
||||
"critical": 0,
|
||||
"high": 0,
|
||||
"medium": 0,
|
||||
"low": 0,
|
||||
}
|
||||
|
||||
if not report_data:
|
||||
return findings_detail
|
||||
|
||||
# Semgrep format
|
||||
if "results" in report_data:
|
||||
for r in report_data.get("results", []):
|
||||
severity = r.get("extra", {}).get("severity", "").upper()
|
||||
if severity == "CRITICAL":
|
||||
findings_detail["critical"] += 1
|
||||
elif severity == "HIGH":
|
||||
findings_detail["high"] += 1
|
||||
elif severity == "MEDIUM":
|
||||
findings_detail["medium"] += 1
|
||||
elif severity in ["LOW", "INFO"]:
|
||||
findings_detail["low"] += 1
|
||||
|
||||
# Trivy format
|
||||
elif "Results" in report_data:
|
||||
for result in report_data.get("Results", []):
|
||||
for v in result.get("Vulnerabilities", []):
|
||||
severity = v.get("Severity", "").upper()
|
||||
if severity == "CRITICAL":
|
||||
findings_detail["critical"] += 1
|
||||
elif severity == "HIGH":
|
||||
findings_detail["high"] += 1
|
||||
elif severity == "MEDIUM":
|
||||
findings_detail["medium"] += 1
|
||||
elif severity == "LOW":
|
||||
findings_detail["low"] += 1
|
||||
|
||||
# Generic findings with severity
|
||||
elif "findings" in report_data:
|
||||
for f in report_data.get("findings", []):
|
||||
severity = f.get("severity", "").upper()
|
||||
if severity == "CRITICAL":
|
||||
findings_detail["critical"] += 1
|
||||
elif severity == "HIGH":
|
||||
findings_detail["high"] += 1
|
||||
elif severity == "MEDIUM":
|
||||
findings_detail["medium"] += 1
|
||||
else:
|
||||
findings_detail["low"] += 1
|
||||
|
||||
return findings_detail
|
||||
|
||||
|
||||
def _update_risks(db: Session, *, source: str, control_id: str, ci_job_id: str, report_data: dict):
|
||||
"""
|
||||
Update risk status based on new evidence.
|
||||
|
||||
Uses AutoRiskUpdater to update Control status and linked Risks based on
|
||||
severity-bucketed findings. Returns the update result or None on error.
|
||||
"""
|
||||
findings_detail = _extract_findings_detail(report_data)
|
||||
|
||||
try:
|
||||
auto_updater = AutoRiskUpdater(db)
|
||||
risk_update_result = auto_updater.process_evidence_collect_request(
|
||||
tool=source,
|
||||
control_id=control_id,
|
||||
evidence_type=f"ci_{source}",
|
||||
timestamp=datetime.utcnow().isoformat(),
|
||||
commit_sha=report_data.get("commit_sha", "unknown") if report_data else "unknown",
|
||||
ci_job_id=ci_job_id,
|
||||
findings=findings_detail,
|
||||
)
|
||||
|
||||
logger.info(f"Auto-risk update completed for {control_id}: "
|
||||
f"control_updated={risk_update_result.control_updated}, "
|
||||
f"risks_affected={len(risk_update_result.risks_affected)}")
|
||||
|
||||
return risk_update_result
|
||||
except Exception as e:
|
||||
logger.error(f"Auto-risk update failed for {control_id}: {str(e)}")
|
||||
return None
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# CI/CD Evidence Collection — endpoint
|
||||
# ============================================================================
|
||||
|
||||
@router.post("/evidence/collect")
|
||||
@@ -274,17 +498,6 @@ async def collect_ci_evidence(
|
||||
- secret_scan: Secret detection (Gitleaks, TruffleHog)
|
||||
- code_review: Code review metrics
|
||||
"""
|
||||
# Map source to control_id
|
||||
SOURCE_CONTROL_MAP = {
|
||||
"sast": "SDLC-001",
|
||||
"dependency_scan": "SDLC-002",
|
||||
"secret_scan": "SDLC-003",
|
||||
"code_review": "SDLC-004",
|
||||
"sbom": "SDLC-005",
|
||||
"container_scan": "SDLC-006",
|
||||
"test_results": "AUD-001",
|
||||
}
|
||||
|
||||
if source not in SOURCE_CONTROL_MAP:
|
||||
raise HTTPException(
|
||||
status_code=400,
|
||||
@@ -302,173 +515,38 @@ async def collect_ci_evidence(
|
||||
detail=f"Control {control_id} not found. Please seed the database first."
|
||||
)
|
||||
|
||||
# Parse and validate report data
|
||||
report_json = json.dumps(report_data) if report_data else "{}"
|
||||
report_hash = hashlib.sha256(report_json.encode()).hexdigest()
|
||||
# --- 1. Parse and validate report data ---
|
||||
parsed = _parse_ci_evidence(report_data)
|
||||
|
||||
# Determine evidence status based on report content
|
||||
evidence_status = "valid"
|
||||
findings_count = 0
|
||||
critical_findings = 0
|
||||
|
||||
if report_data:
|
||||
# Try to extract findings from common report formats
|
||||
if isinstance(report_data, dict):
|
||||
# Semgrep format
|
||||
if "results" in report_data:
|
||||
findings_count = len(report_data.get("results", []))
|
||||
critical_findings = len([
|
||||
r for r in report_data.get("results", [])
|
||||
if r.get("extra", {}).get("severity", "").upper() in ["CRITICAL", "HIGH"]
|
||||
])
|
||||
|
||||
# Trivy format
|
||||
elif "Results" in report_data:
|
||||
for result in report_data.get("Results", []):
|
||||
vulns = result.get("Vulnerabilities", [])
|
||||
findings_count += len(vulns)
|
||||
critical_findings += len([
|
||||
v for v in vulns
|
||||
if v.get("Severity", "").upper() in ["CRITICAL", "HIGH"]
|
||||
])
|
||||
|
||||
# Generic findings array
|
||||
elif "findings" in report_data:
|
||||
findings_count = len(report_data.get("findings", []))
|
||||
|
||||
# SBOM format - just count components
|
||||
elif "components" in report_data:
|
||||
findings_count = len(report_data.get("components", []))
|
||||
|
||||
# If critical findings exist, mark as failed
|
||||
if critical_findings > 0:
|
||||
evidence_status = "failed"
|
||||
|
||||
# Create evidence title
|
||||
title = f"{source.upper()} Report - {datetime.now().strftime('%Y-%m-%d %H:%M')}"
|
||||
description = f"Automatically collected from CI/CD pipeline"
|
||||
if findings_count > 0:
|
||||
description += f"\n- Total findings: {findings_count}"
|
||||
if critical_findings > 0:
|
||||
description += f"\n- Critical/High findings: {critical_findings}"
|
||||
if ci_job_id:
|
||||
description += f"\n- CI Job ID: {ci_job_id}"
|
||||
if ci_job_url:
|
||||
description += f"\n- CI Job URL: {ci_job_url}"
|
||||
|
||||
# Store report file
|
||||
upload_dir = f"/tmp/compliance_evidence/ci/{source}"
|
||||
os.makedirs(upload_dir, exist_ok=True)
|
||||
file_name = f"{source}_{datetime.now().strftime('%Y%m%d_%H%M%S')}_{report_hash[:8]}.json"
|
||||
file_path = os.path.join(upload_dir, file_name)
|
||||
|
||||
with open(file_path, "w") as f:
|
||||
json.dump(report_data or {}, f, indent=2)
|
||||
|
||||
# Create evidence record directly
|
||||
evidence = EvidenceDB(
|
||||
id=str(uuid_module.uuid4()),
|
||||
control_id=control.id,
|
||||
evidence_type=f"ci_{source}",
|
||||
title=title,
|
||||
description=description,
|
||||
artifact_path=file_path,
|
||||
artifact_hash=report_hash,
|
||||
file_size_bytes=len(report_json),
|
||||
mime_type="application/json",
|
||||
source="ci_pipeline",
|
||||
# --- 2. Store evidence in DB and write report file ---
|
||||
evidence = _store_evidence(
|
||||
db,
|
||||
control_db_id=control.id,
|
||||
source=source,
|
||||
parsed=parsed,
|
||||
ci_job_id=ci_job_id,
|
||||
valid_from=datetime.utcnow(),
|
||||
valid_until=datetime.utcnow() + timedelta(days=90),
|
||||
status=EvidenceStatusEnum(evidence_status),
|
||||
ci_job_url=ci_job_url,
|
||||
report_data=report_data,
|
||||
)
|
||||
db.add(evidence)
|
||||
db.commit()
|
||||
db.refresh(evidence)
|
||||
|
||||
# =========================================================================
|
||||
# AUTOMATIC RISK UPDATE
|
||||
# Update Control status and linked Risks based on findings
|
||||
# =========================================================================
|
||||
risk_update_result = None
|
||||
try:
|
||||
# Extract detailed findings for risk assessment
|
||||
findings_detail = {
|
||||
"critical": 0,
|
||||
"high": 0,
|
||||
"medium": 0,
|
||||
"low": 0,
|
||||
}
|
||||
|
||||
if report_data:
|
||||
# Semgrep format
|
||||
if "results" in report_data:
|
||||
for r in report_data.get("results", []):
|
||||
severity = r.get("extra", {}).get("severity", "").upper()
|
||||
if severity == "CRITICAL":
|
||||
findings_detail["critical"] += 1
|
||||
elif severity == "HIGH":
|
||||
findings_detail["high"] += 1
|
||||
elif severity == "MEDIUM":
|
||||
findings_detail["medium"] += 1
|
||||
elif severity in ["LOW", "INFO"]:
|
||||
findings_detail["low"] += 1
|
||||
|
||||
# Trivy format
|
||||
elif "Results" in report_data:
|
||||
for result in report_data.get("Results", []):
|
||||
for v in result.get("Vulnerabilities", []):
|
||||
severity = v.get("Severity", "").upper()
|
||||
if severity == "CRITICAL":
|
||||
findings_detail["critical"] += 1
|
||||
elif severity == "HIGH":
|
||||
findings_detail["high"] += 1
|
||||
elif severity == "MEDIUM":
|
||||
findings_detail["medium"] += 1
|
||||
elif severity == "LOW":
|
||||
findings_detail["low"] += 1
|
||||
|
||||
# Generic findings with severity
|
||||
elif "findings" in report_data:
|
||||
for f in report_data.get("findings", []):
|
||||
severity = f.get("severity", "").upper()
|
||||
if severity == "CRITICAL":
|
||||
findings_detail["critical"] += 1
|
||||
elif severity == "HIGH":
|
||||
findings_detail["high"] += 1
|
||||
elif severity == "MEDIUM":
|
||||
findings_detail["medium"] += 1
|
||||
else:
|
||||
findings_detail["low"] += 1
|
||||
|
||||
# Use AutoRiskUpdater to update Control status and Risks
|
||||
auto_updater = AutoRiskUpdater(db)
|
||||
risk_update_result = auto_updater.process_evidence_collect_request(
|
||||
tool=source,
|
||||
control_id=control_id,
|
||||
evidence_type=f"ci_{source}",
|
||||
timestamp=datetime.utcnow().isoformat(),
|
||||
commit_sha=report_data.get("commit_sha", "unknown") if report_data else "unknown",
|
||||
ci_job_id=ci_job_id,
|
||||
findings=findings_detail,
|
||||
)
|
||||
|
||||
logger.info(f"Auto-risk update completed for {control_id}: "
|
||||
f"control_updated={risk_update_result.control_updated}, "
|
||||
f"risks_affected={len(risk_update_result.risks_affected)}")
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Auto-risk update failed for {control_id}: {str(e)}")
|
||||
# --- 3. Automatic risk update ---
|
||||
risk_update_result = _update_risks(
|
||||
db,
|
||||
source=source,
|
||||
control_id=control_id,
|
||||
ci_job_id=ci_job_id,
|
||||
report_data=report_data,
|
||||
)
|
||||
|
||||
return {
|
||||
"success": True,
|
||||
"evidence_id": evidence.id,
|
||||
"control_id": control_id,
|
||||
"source": source,
|
||||
"status": evidence_status,
|
||||
"findings_count": findings_count,
|
||||
"critical_findings": critical_findings,
|
||||
"artifact_path": file_path,
|
||||
"status": parsed["evidence_status"],
|
||||
"findings_count": parsed["findings_count"],
|
||||
"critical_findings": parsed["critical_findings"],
|
||||
"artifact_path": evidence.artifact_path,
|
||||
"message": f"Evidence collected successfully for control {control_id}",
|
||||
"auto_risk_update": {
|
||||
"enabled": True,
|
||||
|
||||
@@ -20,13 +20,13 @@ import asyncio
|
||||
from typing import Optional, List, Dict
|
||||
from datetime import datetime
|
||||
|
||||
from fastapi import APIRouter, Depends, Query
|
||||
from fastapi import APIRouter, Depends
|
||||
from pydantic import BaseModel
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from classroom_engine.database import get_db
|
||||
from ..db import RegulationRepository, RequirementRepository
|
||||
from ..db.models import RegulationDB, RequirementDB, RegulationTypeEnum
|
||||
from ..db.models import RegulationDB, RegulationTypeEnum
|
||||
from ..services.rag_client import get_rag_client, RAGSearchResult
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
@@ -185,6 +185,169 @@ def _build_existing_articles(
|
||||
return {r.article for r in existing}
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Extraction helpers — independently testable
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def _parse_rag_results(
|
||||
all_results: List[RAGSearchResult],
|
||||
regulation_codes: Optional[List[str]] = None,
|
||||
) -> dict:
|
||||
"""
|
||||
Filter, deduplicate, and group RAG search results by regulation code.
|
||||
|
||||
Returns a dict with:
|
||||
- deduped_by_reg: Dict[str, List[tuple[str, RAGSearchResult]]]
|
||||
- skipped_no_article: List[RAGSearchResult]
|
||||
- unique_count: int
|
||||
"""
|
||||
# Filter by regulation_codes if requested
|
||||
if regulation_codes:
|
||||
all_results = [
|
||||
r for r in all_results
|
||||
if r.regulation_code in regulation_codes
|
||||
]
|
||||
|
||||
# Deduplicate at result level (regulation_code + article)
|
||||
seen: set[tuple[str, str]] = set()
|
||||
unique_count = 0
|
||||
for r in sorted(all_results, key=lambda x: x.score, reverse=True):
|
||||
article = _normalize_article(r)
|
||||
if not article:
|
||||
continue
|
||||
key = (r.regulation_code, article)
|
||||
if key not in seen:
|
||||
seen.add(key)
|
||||
unique_count += 1
|
||||
|
||||
# Group by regulation_code
|
||||
by_reg: Dict[str, List[tuple[str, RAGSearchResult]]] = {}
|
||||
skipped_no_article: List[RAGSearchResult] = []
|
||||
|
||||
for r in all_results:
|
||||
article = _normalize_article(r)
|
||||
if not article:
|
||||
skipped_no_article.append(r)
|
||||
continue
|
||||
key_r = r.regulation_code or "UNKNOWN"
|
||||
if key_r not in by_reg:
|
||||
by_reg[key_r] = []
|
||||
by_reg[key_r].append((article, r))
|
||||
|
||||
# Deduplicate within groups
|
||||
deduped_by_reg: Dict[str, List[tuple[str, RAGSearchResult]]] = {}
|
||||
for reg_code, items in by_reg.items():
|
||||
seen_articles: set[str] = set()
|
||||
deduped: List[tuple[str, RAGSearchResult]] = []
|
||||
for art, r in sorted(items, key=lambda x: x[1].score, reverse=True):
|
||||
if art not in seen_articles:
|
||||
seen_articles.add(art)
|
||||
deduped.append((art, r))
|
||||
deduped_by_reg[reg_code] = deduped
|
||||
|
||||
return {
|
||||
"deduped_by_reg": deduped_by_reg,
|
||||
"skipped_no_article": skipped_no_article,
|
||||
"unique_count": unique_count,
|
||||
}
|
||||
|
||||
|
||||
def _store_requirements(
|
||||
db: Session,
|
||||
deduped_by_reg: Dict[str, List[tuple[str, "RAGSearchResult"]]],
|
||||
dry_run: bool,
|
||||
) -> dict:
|
||||
"""
|
||||
Persist extracted requirements to the database (or simulate in dry_run mode).
|
||||
|
||||
Returns a dict with:
|
||||
- created_count: int
|
||||
- skipped_dup_count: int
|
||||
- failed_count: int
|
||||
- result_items: List[ExtractedRequirement]
|
||||
"""
|
||||
req_repo = RequirementRepository(db)
|
||||
created_count = 0
|
||||
skipped_dup_count = 0
|
||||
failed_count = 0
|
||||
result_items: List[ExtractedRequirement] = []
|
||||
|
||||
for reg_code, items in deduped_by_reg.items():
|
||||
if not items:
|
||||
continue
|
||||
|
||||
# Find or create regulation
|
||||
try:
|
||||
first_result = items[0][1]
|
||||
regulation_name = first_result.regulation_name or first_result.regulation_short or reg_code
|
||||
if dry_run:
|
||||
# For dry_run, fake a regulation id
|
||||
regulation_id = f"dry-run-{reg_code}"
|
||||
existing_articles: set[str] = set()
|
||||
else:
|
||||
reg = _get_or_create_regulation(db, reg_code, regulation_name)
|
||||
regulation_id = reg.id
|
||||
existing_articles = _build_existing_articles(db, regulation_id)
|
||||
except Exception as e:
|
||||
logger.error("Failed to get/create regulation %s: %s", reg_code, e)
|
||||
failed_count += len(items)
|
||||
continue
|
||||
|
||||
for article, r in items:
|
||||
title = _derive_title(r.text, article)
|
||||
|
||||
if article in existing_articles:
|
||||
skipped_dup_count += 1
|
||||
result_items.append(ExtractedRequirement(
|
||||
regulation_code=reg_code,
|
||||
article=article,
|
||||
title=title,
|
||||
requirement_text=r.text[:1000],
|
||||
source_url=r.source_url,
|
||||
score=r.score,
|
||||
action="skipped_duplicate",
|
||||
))
|
||||
continue
|
||||
|
||||
if not dry_run:
|
||||
try:
|
||||
req_repo.create(
|
||||
regulation_id=regulation_id,
|
||||
article=article,
|
||||
title=title,
|
||||
description=f"Extrahiert aus RAG-Korpus (Collection: {r.category or r.regulation_code}). Score: {r.score:.2f}",
|
||||
requirement_text=r.text[:2000],
|
||||
breakpilot_interpretation=None,
|
||||
is_applicable=True,
|
||||
priority=2,
|
||||
)
|
||||
existing_articles.add(article) # prevent intra-batch duplication
|
||||
created_count += 1
|
||||
except Exception as e:
|
||||
logger.error("Failed to create requirement %s/%s: %s", reg_code, article, e)
|
||||
failed_count += 1
|
||||
continue
|
||||
else:
|
||||
created_count += 1 # dry_run: count as would-create
|
||||
|
||||
result_items.append(ExtractedRequirement(
|
||||
regulation_code=reg_code,
|
||||
article=article,
|
||||
title=title,
|
||||
requirement_text=r.text[:1000],
|
||||
source_url=r.source_url,
|
||||
score=r.score,
|
||||
action="created" if not dry_run else "would_create",
|
||||
))
|
||||
|
||||
return {
|
||||
"created_count": created_count,
|
||||
"skipped_dup_count": skipped_dup_count,
|
||||
"failed_count": failed_count,
|
||||
"result_items": result_items,
|
||||
}
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Endpoint
|
||||
# ---------------------------------------------------------------------------
|
||||
@@ -225,126 +388,19 @@ async def extract_requirements_from_rag(
|
||||
|
||||
logger.info("RAG extraction: %d raw results from %d collections", len(all_results), len(collections))
|
||||
|
||||
# --- 2. Filter by regulation_codes if requested ---
|
||||
if body.regulation_codes:
|
||||
all_results = [
|
||||
r for r in all_results
|
||||
if r.regulation_code in body.regulation_codes
|
||||
]
|
||||
# --- 2. Parse, filter, deduplicate, and group ---
|
||||
parsed = _parse_rag_results(all_results, body.regulation_codes)
|
||||
deduped_by_reg = parsed["deduped_by_reg"]
|
||||
skipped_no_article = parsed["skipped_no_article"]
|
||||
|
||||
# --- 3. Deduplicate at result level (regulation_code + article) ---
|
||||
seen: set[tuple[str, str]] = set()
|
||||
unique_results: List[RAGSearchResult] = []
|
||||
for r in sorted(all_results, key=lambda x: x.score, reverse=True):
|
||||
article = _normalize_article(r)
|
||||
if not article:
|
||||
continue
|
||||
key = (r.regulation_code, article)
|
||||
if key not in seen:
|
||||
seen.add(key)
|
||||
unique_results.append(r)
|
||||
logger.info("RAG extraction: %d unique (regulation, article) pairs", parsed["unique_count"])
|
||||
|
||||
logger.info("RAG extraction: %d unique (regulation, article) pairs", len(unique_results))
|
||||
|
||||
# --- 4. Group by regulation_code and process ---
|
||||
by_reg: Dict[str, List[tuple[str, RAGSearchResult]]] = {}
|
||||
skipped_no_article: List[RAGSearchResult] = []
|
||||
|
||||
for r in all_results:
|
||||
article = _normalize_article(r)
|
||||
if not article:
|
||||
skipped_no_article.append(r)
|
||||
continue
|
||||
key_r = r.regulation_code or "UNKNOWN"
|
||||
if key_r not in by_reg:
|
||||
by_reg[key_r] = []
|
||||
by_reg[key_r].append((article, r))
|
||||
|
||||
# Deduplicate within groups
|
||||
deduped_by_reg: Dict[str, List[tuple[str, RAGSearchResult]]] = {}
|
||||
for reg_code, items in by_reg.items():
|
||||
seen_articles: set[str] = set()
|
||||
deduped: List[tuple[str, RAGSearchResult]] = []
|
||||
for art, r in sorted(items, key=lambda x: x[1].score, reverse=True):
|
||||
if art not in seen_articles:
|
||||
seen_articles.add(art)
|
||||
deduped.append((art, r))
|
||||
deduped_by_reg[reg_code] = deduped
|
||||
|
||||
# --- 5. Create requirements ---
|
||||
req_repo = RequirementRepository(db)
|
||||
created_count = 0
|
||||
skipped_dup_count = 0
|
||||
failed_count = 0
|
||||
result_items: List[ExtractedRequirement] = []
|
||||
|
||||
for reg_code, items in deduped_by_reg.items():
|
||||
if not items:
|
||||
continue
|
||||
|
||||
# Find or create regulation
|
||||
try:
|
||||
first_result = items[0][1]
|
||||
regulation_name = first_result.regulation_name or first_result.regulation_short or reg_code
|
||||
if body.dry_run:
|
||||
# For dry_run, fake a regulation id
|
||||
regulation_id = f"dry-run-{reg_code}"
|
||||
existing_articles: set[str] = set()
|
||||
else:
|
||||
reg = _get_or_create_regulation(db, reg_code, regulation_name)
|
||||
regulation_id = reg.id
|
||||
existing_articles = _build_existing_articles(db, regulation_id)
|
||||
except Exception as e:
|
||||
logger.error("Failed to get/create regulation %s: %s", reg_code, e)
|
||||
failed_count += len(items)
|
||||
continue
|
||||
|
||||
for article, r in items:
|
||||
title = _derive_title(r.text, article)
|
||||
|
||||
if article in existing_articles:
|
||||
skipped_dup_count += 1
|
||||
result_items.append(ExtractedRequirement(
|
||||
regulation_code=reg_code,
|
||||
article=article,
|
||||
title=title,
|
||||
requirement_text=r.text[:1000],
|
||||
source_url=r.source_url,
|
||||
score=r.score,
|
||||
action="skipped_duplicate",
|
||||
))
|
||||
continue
|
||||
|
||||
if not body.dry_run:
|
||||
try:
|
||||
req_repo.create(
|
||||
regulation_id=regulation_id,
|
||||
article=article,
|
||||
title=title,
|
||||
description=f"Extrahiert aus RAG-Korpus (Collection: {r.category or r.regulation_code}). Score: {r.score:.2f}",
|
||||
requirement_text=r.text[:2000],
|
||||
breakpilot_interpretation=None,
|
||||
is_applicable=True,
|
||||
priority=2,
|
||||
)
|
||||
existing_articles.add(article) # prevent intra-batch duplication
|
||||
created_count += 1
|
||||
except Exception as e:
|
||||
logger.error("Failed to create requirement %s/%s: %s", reg_code, article, e)
|
||||
failed_count += 1
|
||||
continue
|
||||
else:
|
||||
created_count += 1 # dry_run: count as would-create
|
||||
|
||||
result_items.append(ExtractedRequirement(
|
||||
regulation_code=reg_code,
|
||||
article=article,
|
||||
title=title,
|
||||
requirement_text=r.text[:1000],
|
||||
source_url=r.source_url,
|
||||
score=r.score,
|
||||
action="created" if not body.dry_run else "would_create",
|
||||
))
|
||||
# --- 3. Create requirements ---
|
||||
store_result = _store_requirements(db, deduped_by_reg, body.dry_run)
|
||||
created_count = store_result["created_count"]
|
||||
skipped_dup_count = store_result["skipped_dup_count"]
|
||||
failed_count = store_result["failed_count"]
|
||||
result_items = store_result["result_items"]
|
||||
|
||||
message = (
|
||||
f"{'[DRY RUN] ' if body.dry_run else ''}"
|
||||
|
||||
@@ -24,7 +24,7 @@ Endpoints:
|
||||
import json
|
||||
import logging
|
||||
from datetime import datetime, timedelta, timezone
|
||||
from typing import Optional, List, Any
|
||||
from typing import Optional, List
|
||||
from uuid import UUID, uuid4
|
||||
|
||||
from fastapi import APIRouter, Depends, HTTPException, Query, Header
|
||||
|
||||
@@ -14,7 +14,7 @@ Provides endpoints for ISO 27001 certification-ready ISMS management:
|
||||
import uuid
|
||||
import hashlib
|
||||
from datetime import datetime, date
|
||||
from typing import Optional, List
|
||||
from typing import Optional
|
||||
|
||||
from fastapi import APIRouter, HTTPException, Query, Depends
|
||||
from sqlalchemy.orm import Session
|
||||
@@ -53,7 +53,7 @@ from .schemas import (
|
||||
# Readiness
|
||||
ISMSReadinessCheckResponse, ISMSReadinessCheckRequest, PotentialFinding,
|
||||
# Audit Trail
|
||||
AuditTrailResponse, AuditTrailEntry, PaginationMeta,
|
||||
AuditTrailResponse, PaginationMeta,
|
||||
# Overview
|
||||
ISO27001OverviewResponse, ISO27001ChapterStatus
|
||||
)
|
||||
@@ -673,10 +673,6 @@ async def list_findings(
|
||||
ofi_count = sum(1 for f in findings if f.finding_type == FindingTypeEnum.OFI)
|
||||
open_count = sum(1 for f in findings if f.status != FindingStatusEnum.CLOSED)
|
||||
|
||||
# Add is_blocking property to each finding
|
||||
for f in findings:
|
||||
f.is_blocking = f.finding_type == FindingTypeEnum.MAJOR and f.status != FindingStatusEnum.CLOSED
|
||||
|
||||
return AuditFindingListResponse(
|
||||
findings=findings,
|
||||
total=len(findings),
|
||||
@@ -746,7 +742,6 @@ async def create_finding(data: AuditFindingCreate, db: Session = Depends(get_db)
|
||||
db.commit()
|
||||
db.refresh(finding)
|
||||
|
||||
finding.is_blocking = finding.finding_type == FindingTypeEnum.MAJOR
|
||||
return finding
|
||||
|
||||
|
||||
@@ -775,7 +770,6 @@ async def update_finding(
|
||||
db.commit()
|
||||
db.refresh(finding)
|
||||
|
||||
finding.is_blocking = finding.finding_type == FindingTypeEnum.MAJOR and finding.status != FindingStatusEnum.CLOSED
|
||||
return finding
|
||||
|
||||
|
||||
@@ -824,7 +818,6 @@ async def close_finding(
|
||||
db.commit()
|
||||
db.refresh(finding)
|
||||
|
||||
finding.is_blocking = False
|
||||
return finding
|
||||
|
||||
|
||||
@@ -1271,10 +1264,9 @@ async def run_readiness_check(
|
||||
|
||||
# Chapter 6: Planning - Risk Assessment
|
||||
from ..db.models import RiskDB
|
||||
risks = db.query(RiskDB).filter(RiskDB.status == "open").count()
|
||||
risks_without_treatment = db.query(RiskDB).filter(
|
||||
RiskDB.status == "open",
|
||||
RiskDB.treatment_plan == None
|
||||
RiskDB.treatment_plan is None
|
||||
).count()
|
||||
if risks_without_treatment > 0:
|
||||
potential_majors.append(PotentialFinding(
|
||||
@@ -1299,7 +1291,7 @@ async def run_readiness_check(
|
||||
# SoA
|
||||
soa_total = db.query(StatementOfApplicabilityDB).count()
|
||||
soa_unapproved = db.query(StatementOfApplicabilityDB).filter(
|
||||
StatementOfApplicabilityDB.approved_at == None
|
||||
StatementOfApplicabilityDB.approved_at is None
|
||||
).count()
|
||||
if soa_total == 0:
|
||||
potential_majors.append(PotentialFinding(
|
||||
@@ -1525,7 +1517,7 @@ async def get_iso27001_overview(db: Session = Depends(get_db)):
|
||||
|
||||
soa_total = db.query(StatementOfApplicabilityDB).count()
|
||||
soa_approved = db.query(StatementOfApplicabilityDB).filter(
|
||||
StatementOfApplicabilityDB.approved_at != None
|
||||
StatementOfApplicabilityDB.approved_at is not None
|
||||
).count()
|
||||
soa_all_approved = soa_total > 0 and soa_approved == soa_total
|
||||
|
||||
|
||||
@@ -671,7 +671,7 @@ async def get_my_consents(
|
||||
.filter(
|
||||
UserConsentDB.tenant_id == tid,
|
||||
UserConsentDB.user_id == user_id,
|
||||
UserConsentDB.withdrawn_at == None,
|
||||
UserConsentDB.withdrawn_at is None,
|
||||
)
|
||||
.order_by(UserConsentDB.consented_at.desc())
|
||||
.all()
|
||||
@@ -694,8 +694,8 @@ async def check_consent(
|
||||
UserConsentDB.tenant_id == tid,
|
||||
UserConsentDB.user_id == user_id,
|
||||
UserConsentDB.document_type == document_type,
|
||||
UserConsentDB.consented == True,
|
||||
UserConsentDB.withdrawn_at == None,
|
||||
UserConsentDB.consented,
|
||||
UserConsentDB.withdrawn_at is None,
|
||||
)
|
||||
.order_by(UserConsentDB.consented_at.desc())
|
||||
.first()
|
||||
@@ -757,10 +757,10 @@ async def get_consent_stats(
|
||||
|
||||
total = base.count()
|
||||
active = base.filter(
|
||||
UserConsentDB.consented == True,
|
||||
UserConsentDB.withdrawn_at == None,
|
||||
UserConsentDB.consented,
|
||||
UserConsentDB.withdrawn_at is None,
|
||||
).count()
|
||||
withdrawn = base.filter(UserConsentDB.withdrawn_at != None).count()
|
||||
withdrawn = base.filter(UserConsentDB.withdrawn_at is not None).count()
|
||||
|
||||
# By document type
|
||||
by_type = {}
|
||||
|
||||
@@ -314,9 +314,9 @@ async def update_legal_template(
|
||||
raise HTTPException(status_code=400, detail="No fields to update")
|
||||
|
||||
if "document_type" in updates and updates["document_type"] not in VALID_DOCUMENT_TYPES:
|
||||
raise HTTPException(status_code=400, detail=f"Invalid document_type")
|
||||
raise HTTPException(status_code=400, detail="Invalid document_type")
|
||||
if "status" in updates and updates["status"] not in VALID_STATUSES:
|
||||
raise HTTPException(status_code=400, detail=f"Invalid status")
|
||||
raise HTTPException(status_code=400, detail="Invalid status")
|
||||
|
||||
set_clauses = ["updated_at = :updated_at"]
|
||||
params: Dict[str, Any] = {
|
||||
|
||||
@@ -16,11 +16,10 @@ import logging
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
import os
|
||||
from datetime import datetime, timedelta
|
||||
from typing import Optional, List
|
||||
from datetime import datetime
|
||||
from typing import Optional
|
||||
|
||||
from pydantic import BaseModel
|
||||
from fastapi import APIRouter, Depends, HTTPException, UploadFile, File, Query, BackgroundTasks
|
||||
from fastapi import APIRouter, Depends, HTTPException, Query, BackgroundTasks
|
||||
from fastapi.responses import FileResponse
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
@@ -31,22 +30,16 @@ from ..db import (
|
||||
RequirementRepository,
|
||||
ControlRepository,
|
||||
EvidenceRepository,
|
||||
RiskRepository,
|
||||
AuditExportRepository,
|
||||
ControlStatusEnum,
|
||||
ControlDomainEnum,
|
||||
RiskLevelEnum,
|
||||
EvidenceStatusEnum,
|
||||
)
|
||||
from ..db.models import EvidenceDB, ControlDB
|
||||
from ..services.seeder import ComplianceSeeder
|
||||
from ..services.export_generator import AuditExportGenerator
|
||||
from ..services.auto_risk_updater import AutoRiskUpdater, ScanType
|
||||
from .schemas import (
|
||||
RegulationCreate, RegulationResponse, RegulationListResponse,
|
||||
RegulationResponse, RegulationListResponse,
|
||||
RequirementCreate, RequirementResponse, RequirementListResponse,
|
||||
ControlCreate, ControlUpdate, ControlResponse, ControlListResponse, ControlReviewRequest,
|
||||
MappingCreate, MappingResponse, MappingListResponse,
|
||||
ControlUpdate, ControlResponse, ControlListResponse, ControlReviewRequest,
|
||||
ExportRequest, ExportResponse, ExportListResponse,
|
||||
SeedRequest, SeedResponse,
|
||||
# Pagination schemas
|
||||
@@ -381,7 +374,6 @@ async def delete_requirement(requirement_id: str, db: Session = Depends(get_db))
|
||||
async def update_requirement(requirement_id: str, updates: dict, db: Session = Depends(get_db)):
|
||||
"""Update a requirement with implementation/audit details."""
|
||||
from ..db.models import RequirementDB
|
||||
from datetime import datetime
|
||||
|
||||
requirement = db.query(RequirementDB).filter(RequirementDB.id == requirement_id).first()
|
||||
if not requirement:
|
||||
@@ -870,8 +862,8 @@ async def init_tables(db: Session = Depends(get_db)):
|
||||
"""Create compliance tables if they don't exist."""
|
||||
from classroom_engine.database import engine
|
||||
from ..db.models import (
|
||||
RegulationDB, RequirementDB, ControlDB, ControlMappingDB,
|
||||
EvidenceDB, RiskDB, AuditExportDB, AISystemDB
|
||||
RegulationDB, RequirementDB, ControlMappingDB,
|
||||
RiskDB, AuditExportDB, AISystemDB
|
||||
)
|
||||
|
||||
try:
|
||||
@@ -971,8 +963,8 @@ async def seed_database(
|
||||
"""Seed the compliance database with initial data."""
|
||||
from classroom_engine.database import engine
|
||||
from ..db.models import (
|
||||
RegulationDB, RequirementDB, ControlDB, ControlMappingDB,
|
||||
EvidenceDB, RiskDB, AuditExportDB
|
||||
RegulationDB, RequirementDB, ControlMappingDB,
|
||||
RiskDB, AuditExportDB
|
||||
)
|
||||
|
||||
try:
|
||||
|
||||
@@ -496,57 +496,6 @@ class SeedResponse(BaseModel):
|
||||
counts: Dict[str, int]
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# PDF Extraction Schemas
|
||||
# ============================================================================
|
||||
|
||||
class BSIAspectResponse(BaseModel):
|
||||
"""Response schema for an extracted BSI-TR Pruefaspekt."""
|
||||
aspect_id: str
|
||||
title: str
|
||||
full_text: str
|
||||
category: str
|
||||
page_number: int
|
||||
section: str
|
||||
requirement_level: str
|
||||
source_document: str
|
||||
keywords: List[str] = []
|
||||
related_aspects: List[str] = []
|
||||
|
||||
|
||||
class PDFExtractionResponse(BaseModel):
|
||||
"""Response for PDF extraction operation."""
|
||||
success: bool
|
||||
source_document: str
|
||||
total_aspects: int
|
||||
aspects: List[BSIAspectResponse]
|
||||
statistics: Dict[str, Any]
|
||||
requirements_created: int = 0
|
||||
|
||||
|
||||
class PDFExtractionRequest(BaseModel):
|
||||
"""Request to extract requirements from a PDF."""
|
||||
document_code: str # e.g., "BSI-TR-03161-2"
|
||||
save_to_db: bool = True
|
||||
force: bool = False
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# Paginated Response Schemas (after all Response classes are defined)
|
||||
# ============================================================================
|
||||
|
||||
class PaginatedRequirementResponse(BaseModel):
|
||||
"""Paginated response for requirements."""
|
||||
data: List[RequirementResponse]
|
||||
pagination: PaginationMeta
|
||||
|
||||
|
||||
class PaginatedControlResponse(BaseModel):
|
||||
"""Paginated response for controls."""
|
||||
data: List[ControlResponse]
|
||||
pagination: PaginationMeta
|
||||
|
||||
|
||||
class PaginatedEvidenceResponse(BaseModel):
|
||||
"""Paginated response for evidence."""
|
||||
data: List[EvidenceResponse]
|
||||
|
||||
@@ -257,18 +257,6 @@ def map_osv_severity(vuln: dict) -> tuple[str, float]:
|
||||
severity = "MEDIUM"
|
||||
cvss = 5.0
|
||||
|
||||
# Check severity array
|
||||
for sev in vuln.get("severity", []):
|
||||
if sev.get("type") == "CVSS_V3":
|
||||
score_str = sev.get("score", "")
|
||||
# Extract base score from CVSS vector
|
||||
try:
|
||||
import re as _re
|
||||
# CVSS vectors don't contain the score directly, try database_specific
|
||||
pass
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# Check database_specific for severity
|
||||
db_specific = vuln.get("database_specific", {})
|
||||
if "severity" in db_specific:
|
||||
|
||||
@@ -21,9 +21,8 @@ Endpoints:
|
||||
GET /api/v1/admin/compliance-report — Compliance report
|
||||
"""
|
||||
|
||||
import uuid
|
||||
from datetime import datetime
|
||||
from typing import Optional, List
|
||||
from typing import Optional
|
||||
|
||||
from fastapi import APIRouter, HTTPException, Depends, Query
|
||||
from pydantic import BaseModel, Field
|
||||
@@ -155,7 +154,7 @@ async def list_sources(
|
||||
"""List all allowed sources with optional filters."""
|
||||
query = db.query(AllowedSourceDB)
|
||||
if active_only:
|
||||
query = query.filter(AllowedSourceDB.active == True)
|
||||
query = query.filter(AllowedSourceDB.active)
|
||||
if source_type:
|
||||
query = query.filter(AllowedSourceDB.source_type == source_type)
|
||||
if license:
|
||||
@@ -527,8 +526,8 @@ async def get_policy_audit(
|
||||
async def get_policy_stats(db: Session = Depends(get_db)):
|
||||
"""Get dashboard statistics for source policy."""
|
||||
total_sources = db.query(AllowedSourceDB).count()
|
||||
active_sources = db.query(AllowedSourceDB).filter(AllowedSourceDB.active == True).count()
|
||||
pii_rules = db.query(PIIRuleDB).filter(PIIRuleDB.active == True).count()
|
||||
active_sources = db.query(AllowedSourceDB).filter(AllowedSourceDB.active).count()
|
||||
pii_rules = db.query(PIIRuleDB).filter(PIIRuleDB.active).count()
|
||||
|
||||
# Count blocked content entries from today
|
||||
today_start = datetime.utcnow().replace(hour=0, minute=0, second=0, microsecond=0)
|
||||
@@ -550,8 +549,8 @@ async def get_policy_stats(db: Session = Depends(get_db)):
|
||||
@router.get("/compliance-report")
|
||||
async def get_compliance_report(db: Session = Depends(get_db)):
|
||||
"""Generate a compliance report for source policies."""
|
||||
sources = db.query(AllowedSourceDB).filter(AllowedSourceDB.active == True).all()
|
||||
pii_rules = db.query(PIIRuleDB).filter(PIIRuleDB.active == True).all()
|
||||
sources = db.query(AllowedSourceDB).filter(AllowedSourceDB.active).all()
|
||||
pii_rules = db.query(PIIRuleDB).filter(PIIRuleDB.active).all()
|
||||
|
||||
return {
|
||||
"report_date": datetime.utcnow().isoformat(),
|
||||
|
||||
@@ -19,11 +19,11 @@ import json
|
||||
import logging
|
||||
from datetime import datetime, timezone
|
||||
from typing import Optional, List, Any, Dict
|
||||
from uuid import UUID, uuid4
|
||||
from uuid import UUID
|
||||
|
||||
from fastapi import APIRouter, Depends, HTTPException, Query
|
||||
from fastapi.responses import StreamingResponse
|
||||
from pydantic import BaseModel, Field
|
||||
from pydantic import BaseModel
|
||||
from sqlalchemy import func
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
|
||||
@@ -50,10 +50,9 @@ import json
|
||||
import logging
|
||||
import uuid
|
||||
from datetime import datetime
|
||||
from typing import Optional, List
|
||||
from typing import Optional
|
||||
|
||||
from fastapi import APIRouter, Depends, HTTPException, Query
|
||||
from pydantic import BaseModel
|
||||
from sqlalchemy import text
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
|
||||
@@ -7,7 +7,6 @@ with all 5 version tables (DSFA, VVT, TOM, Loeschfristen, Obligations).
|
||||
|
||||
import json
|
||||
import logging
|
||||
from datetime import datetime
|
||||
from typing import Optional, List
|
||||
|
||||
from fastapi import APIRouter, Depends, HTTPException, Request
|
||||
|
||||
@@ -19,7 +19,6 @@ import io
|
||||
import logging
|
||||
from datetime import datetime, timezone
|
||||
from typing import Optional, List
|
||||
from uuid import uuid4
|
||||
|
||||
from fastapi import APIRouter, Depends, HTTPException, Query, Request
|
||||
from fastapi.responses import StreamingResponse
|
||||
|
||||
Reference in New Issue
Block a user