Files
breakpilot-compliance/backend-compliance/compliance/db/evidence_repository.py
Sharang Parnerkar 482e8574ad refactor(backend/db): split repository.py + isms_repository.py per-aggregate
Phase 1 Step 5 of PHASE1_RUNBOOK.md.

compliance/db/repository.py (1547 LOC) decomposed into seven sibling
per-aggregate repository modules:

  regulation_repository.py     (268) — Regulation + Requirement
  control_repository.py        (291) — Control + ControlMapping
  evidence_repository.py       (143)
  risk_repository.py           (148)
  audit_export_repository.py   (110)
  service_module_repository.py (247)
  audit_session_repository.py  (478) — AuditSession + AuditSignOff

compliance/db/isms_repository.py (838 LOC) decomposed into two
sub-aggregate modules mirroring the models split:

  isms_governance_repository.py (354) — Scope, Policy, Objective, SoA
  isms_audit_repository.py      (499) — Finding, CAPA, Review, Internal Audit,
                                         Trail, Readiness

Both original files become thin re-export shims (37 and 25 LOC
respectively) so every existing import continues to work unchanged.
New code SHOULD import from the aggregate module directly.

All new sibling files under the 500-line hard cap; largest is
isms_audit_repository.py at 499 (on the edge; when Phase 1 Step 4
router->service extraction lands, the audit_session repo may split
further if growth exceeds 500).

Verified:
  - 173/173 pytest compliance/tests/ tests/contracts/ pass
  - OpenAPI 360 paths / 484 operations unchanged
  - All repo files under 500 LOC

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-04-07 18:08:39 +02:00

144 lines
4.9 KiB
Python

"""
Compliance repositories — extracted from compliance/db/repository.py.
Phase 1 Step 5: the monolithic repository module is decomposed per
aggregate. Every repository class is re-exported from
``compliance.db.repository`` for backwards compatibility.
"""
import uuid
from datetime import datetime, date, timezone
from typing import List, Optional, Dict, Any, Tuple
from sqlalchemy.orm import Session as DBSession, selectinload, joinedload
from sqlalchemy import func, and_, or_
from compliance.db.models import (
RegulationDB, RequirementDB, ControlDB, ControlMappingDB,
EvidenceDB, RiskDB, AuditExportDB,
AuditSessionDB, AuditSignOffDB, AuditResultEnum, AuditSessionStatusEnum,
RegulationTypeEnum, ControlDomainEnum, ControlStatusEnum,
RiskLevelEnum, EvidenceStatusEnum, ExportStatusEnum,
ServiceModuleDB, ModuleRegulationMappingDB,
)
class EvidenceRepository:
"""Repository for evidence."""
def __init__(self, db: DBSession):
self.db = db
def create(
self,
control_id: str,
evidence_type: str,
title: str,
description: Optional[str] = None,
artifact_path: Optional[str] = None,
artifact_url: Optional[str] = None,
artifact_hash: Optional[str] = None,
file_size_bytes: Optional[int] = None,
mime_type: Optional[str] = None,
valid_until: Optional[datetime] = None,
source: str = "manual",
ci_job_id: Optional[str] = None,
uploaded_by: Optional[str] = None,
) -> EvidenceDB:
"""Create evidence record."""
# Get control UUID
control = self.db.query(ControlDB).filter(ControlDB.control_id == control_id).first()
if not control:
raise ValueError(f"Control {control_id} not found")
evidence = EvidenceDB(
id=str(uuid.uuid4()),
control_id=control.id,
evidence_type=evidence_type,
title=title,
description=description,
artifact_path=artifact_path,
artifact_url=artifact_url,
artifact_hash=artifact_hash,
file_size_bytes=file_size_bytes,
mime_type=mime_type,
valid_until=valid_until,
source=source,
ci_job_id=ci_job_id,
uploaded_by=uploaded_by,
)
self.db.add(evidence)
self.db.commit()
self.db.refresh(evidence)
return evidence
def get_by_id(self, evidence_id: str) -> Optional[EvidenceDB]:
"""Get evidence by ID."""
return self.db.query(EvidenceDB).filter(EvidenceDB.id == evidence_id).first()
def get_by_control(
self,
control_id: str,
status: Optional[EvidenceStatusEnum] = None
) -> List[EvidenceDB]:
"""Get all evidence for a control."""
control = self.db.query(ControlDB).filter(ControlDB.control_id == control_id).first()
if not control:
return []
query = self.db.query(EvidenceDB).filter(EvidenceDB.control_id == control.id)
if status:
query = query.filter(EvidenceDB.status == status)
return query.order_by(EvidenceDB.collected_at.desc()).all()
def get_all(
self,
evidence_type: Optional[str] = None,
status: Optional[EvidenceStatusEnum] = None,
limit: int = 100,
) -> List[EvidenceDB]:
"""Get all evidence with filters."""
query = self.db.query(EvidenceDB)
if evidence_type:
query = query.filter(EvidenceDB.evidence_type == evidence_type)
if status:
query = query.filter(EvidenceDB.status == status)
return query.order_by(EvidenceDB.collected_at.desc()).limit(limit).all()
def update_status(self, evidence_id: str, status: EvidenceStatusEnum) -> Optional[EvidenceDB]:
"""Update evidence status."""
evidence = self.get_by_id(evidence_id)
if not evidence:
return None
evidence.status = status
evidence.updated_at = datetime.now(timezone.utc)
self.db.commit()
self.db.refresh(evidence)
return evidence
def get_statistics(self) -> Dict[str, Any]:
"""Get evidence statistics."""
total = self.db.query(func.count(EvidenceDB.id)).scalar()
by_type = dict(
self.db.query(EvidenceDB.evidence_type, func.count(EvidenceDB.id))
.group_by(EvidenceDB.evidence_type)
.all()
)
by_status = dict(
self.db.query(EvidenceDB.status, func.count(EvidenceDB.id))
.group_by(EvidenceDB.status)
.all()
)
valid = by_status.get(EvidenceStatusEnum.VALID, 0)
coverage = (valid / total * 100) if total > 0 else 0
return {
"total": total,
"by_type": by_type,
"by_status": {str(k.value) if k else "none": v for k, v in by_status.items()},
"coverage_percent": round(coverage, 1),
}