Files
breakpilot-compliance/backend-compliance/compliance/services/vvt_service.py
Sharang Parnerkar 4fa0dd6f6d refactor(backend/api): extract VVTService (Step 4 — file 5 of 18)
compliance/api/vvt_routes.py (550 LOC) -> 225 LOC thin routes + 475-line
VVTService. Covers the organization header, processing activities CRUD,
audit log, JSON/CSV export, stats, and version lookups for the Art. 30
DSGVO Verzeichnis.

Single-service split: organization + activities + audit + stats all
revolve around the same tenant's VVT document, and the existing test
suite (tests/test_vvt_routes.py — 768 LOC, tests/test_vvt_tenant_isolation.py
— 205 LOC) exercises them together.

Module-level helpers (_activity_to_response, _log_audit, _export_csv)
stay module-level in compliance.services.vvt_service and are re-exported
from compliance.api.vvt_routes so the two test files keep importing
from the old path.

Pydantic schemas already live in compliance.schemas.vvt from Step 3 —
no new schema file needed this round.

mypy.ini flips compliance.api.vvt_routes from ignore_errors=True to
False. Two SQLAlchemy Column[str] vs str dict-index errors fixed with
explicit str() casts on status/business_function in the stats loop.

Verified:
  - 242/242 pytest (173 core + 69 VVT integration) pass
  - OpenAPI 360/484 unchanged
  - mypy compliance/ -> Success on 128 source files
  - vvt_routes.py 550 -> 225 LOC
  - vvt_service.py 475 LOC (under 500 hard cap)
  - Hard-cap violations: 14 -> 13

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-04-07 19:50:40 +02:00

476 lines
17 KiB
Python

# mypy: disable-error-code="arg-type,assignment"
# SQLAlchemy 1.x Column() descriptors are Column[T] statically, T at runtime.
"""
VVT service — Verzeichnis von Verarbeitungstaetigkeiten (Art. 30 DSGVO).
Phase 1 Step 4: extracted from ``compliance.api.vvt_routes``. Covers the
organization header, processing activities CRUD, audit log, export
(JSON + CSV), stats, and versioning lookups.
The module-level helpers ``_activity_to_response``, ``_log_audit``, and
``_export_csv`` are also re-exported by ``compliance.api.vvt_routes`` so
the existing test suite (tests/test_vvt_routes.py,
tests/test_vvt_tenant_isolation.py) continues to import them from the
same path.
"""
import csv
import io
from datetime import datetime, timezone
from typing import Any, Optional
from fastapi.responses import StreamingResponse
from sqlalchemy.orm import Session
from compliance.db.vvt_models import (
VVTActivityDB,
VVTAuditLogDB,
VVTOrganizationDB,
)
from compliance.domain import ConflictError, NotFoundError
from compliance.schemas.vvt import (
VVTActivityCreate,
VVTActivityResponse,
VVTActivityUpdate,
VVTAuditLogEntry,
VVTOrganizationResponse,
VVTOrganizationUpdate,
VVTStatsResponse,
)
# ============================================================================
# Module-level helpers (legacy-exported via compliance.api.vvt_routes)
# ============================================================================
def _log_audit(
db: Session,
tenant_id: str,
action: str,
entity_type: str,
entity_id: Any = None,
changed_by: str = "system",
old_values: Optional[dict[str, Any]] = None,
new_values: Optional[dict[str, Any]] = None,
) -> None:
db.add(
VVTAuditLogDB(
tenant_id=tenant_id,
action=action,
entity_type=entity_type,
entity_id=entity_id,
changed_by=changed_by,
old_values=old_values,
new_values=new_values,
)
)
def _activity_to_response(act: VVTActivityDB) -> VVTActivityResponse:
return VVTActivityResponse(
id=str(act.id),
vvt_id=act.vvt_id,
name=act.name,
description=act.description,
purposes=act.purposes or [],
legal_bases=act.legal_bases or [],
data_subject_categories=act.data_subject_categories or [],
personal_data_categories=act.personal_data_categories or [],
recipient_categories=act.recipient_categories or [],
third_country_transfers=act.third_country_transfers or [],
retention_period=act.retention_period or {},
tom_description=act.tom_description,
business_function=act.business_function,
systems=act.systems or [],
deployment_model=act.deployment_model,
data_sources=act.data_sources or [],
data_flows=act.data_flows or [],
protection_level=act.protection_level or "MEDIUM",
dpia_required=act.dpia_required or False,
structured_toms=act.structured_toms or {},
status=act.status or "DRAFT",
responsible=act.responsible,
owner=act.owner,
last_reviewed_at=act.last_reviewed_at,
next_review_at=act.next_review_at,
created_by=act.created_by,
dsfa_id=str(act.dsfa_id) if act.dsfa_id else None,
created_at=act.created_at,
updated_at=act.updated_at,
)
def _org_to_response(org: VVTOrganizationDB) -> VVTOrganizationResponse:
return VVTOrganizationResponse(
id=str(org.id),
organization_name=org.organization_name,
industry=org.industry,
locations=org.locations or [],
employee_count=org.employee_count,
dpo_name=org.dpo_name,
dpo_contact=org.dpo_contact,
vvt_version=org.vvt_version or "1.0",
last_review_date=org.last_review_date,
next_review_date=org.next_review_date,
review_interval=org.review_interval or "annual",
created_at=org.created_at,
updated_at=org.updated_at,
)
def _export_csv(activities: list[Any]) -> StreamingResponse:
"""Generate semicolon-separated CSV with UTF-8 BOM for German Excel compatibility."""
output = io.StringIO()
output.write("\ufeff") # UTF-8 BOM for Excel
writer = csv.writer(output, delimiter=";", quoting=csv.QUOTE_MINIMAL)
writer.writerow([
"ID", "VVT-ID", "Name", "Zweck", "Rechtsgrundlage",
"Datenkategorien", "Betroffene", "Empfaenger", "Drittland",
"Aufbewahrung", "Status", "Verantwortlich", "Erstellt von",
"Erstellt am",
])
for a in activities:
writer.writerow([
str(a.id),
a.vvt_id,
a.name,
"; ".join(a.purposes or []),
"; ".join(a.legal_bases or []),
"; ".join(a.personal_data_categories or []),
"; ".join(a.data_subject_categories or []),
"; ".join(a.recipient_categories or []),
"Ja" if a.third_country_transfers else "Nein",
str(a.retention_period) if a.retention_period else "",
a.status or "DRAFT",
a.responsible or "",
a.created_by or "system",
a.created_at.strftime("%d.%m.%Y %H:%M") if a.created_at else "",
])
output.seek(0)
return StreamingResponse(
iter([output.getvalue()]),
media_type="text/csv; charset=utf-8",
headers={
"Content-Disposition": (
f'attachment; filename="vvt_export_'
f'{datetime.now(timezone.utc).strftime("%Y%m%d")}.csv"'
)
},
)
# ============================================================================
# Service
# ============================================================================
class VVTService:
"""Business logic for VVT organization, activities, audit, export, stats."""
def __init__(self, db: Session) -> None:
self.db = db
# ------------------------------------------------------------------
# Organization header
# ------------------------------------------------------------------
def get_organization(self, tid: str) -> Optional[VVTOrganizationResponse]:
org = (
self.db.query(VVTOrganizationDB)
.filter(VVTOrganizationDB.tenant_id == tid)
.order_by(VVTOrganizationDB.created_at)
.first()
)
if not org:
return None
return _org_to_response(org)
def upsert_organization(
self, tid: str, request: VVTOrganizationUpdate
) -> VVTOrganizationResponse:
org = (
self.db.query(VVTOrganizationDB)
.filter(VVTOrganizationDB.tenant_id == tid)
.order_by(VVTOrganizationDB.created_at)
.first()
)
if not org:
data = request.dict(exclude_none=True)
if "organization_name" not in data:
data["organization_name"] = "Meine Organisation"
data["tenant_id"] = tid
org = VVTOrganizationDB(**data)
self.db.add(org)
else:
for field, value in request.dict(exclude_none=True).items():
setattr(org, field, value)
org.updated_at = datetime.now(timezone.utc)
self.db.commit()
self.db.refresh(org)
return _org_to_response(org)
# ------------------------------------------------------------------
# Activities
# ------------------------------------------------------------------
def list_activities(
self,
tid: str,
status: Optional[str],
business_function: Optional[str],
search: Optional[str],
review_overdue: Optional[bool],
) -> list[VVTActivityResponse]:
q = self.db.query(VVTActivityDB).filter(VVTActivityDB.tenant_id == tid)
if status:
q = q.filter(VVTActivityDB.status == status)
if business_function:
q = q.filter(VVTActivityDB.business_function == business_function)
if review_overdue:
now = datetime.now(timezone.utc)
q = q.filter(
VVTActivityDB.next_review_at.isnot(None),
VVTActivityDB.next_review_at < now,
)
if search:
term = f"%{search}%"
q = q.filter(
(VVTActivityDB.name.ilike(term))
| (VVTActivityDB.description.ilike(term))
| (VVTActivityDB.vvt_id.ilike(term))
)
rows = q.order_by(VVTActivityDB.created_at.desc()).all()
return [_activity_to_response(a) for a in rows]
def create_activity(
self,
tid: str,
request: VVTActivityCreate,
created_by_header: Optional[str],
) -> VVTActivityResponse:
existing = (
self.db.query(VVTActivityDB)
.filter(
VVTActivityDB.tenant_id == tid,
VVTActivityDB.vvt_id == request.vvt_id,
)
.first()
)
if existing:
raise ConflictError(
f"Activity with VVT-ID '{request.vvt_id}' already exists"
)
data = request.dict()
data["tenant_id"] = tid
if not data.get("created_by"):
data["created_by"] = created_by_header or "system"
act = VVTActivityDB(**data)
self.db.add(act)
self.db.flush()
_log_audit(
self.db,
tenant_id=tid,
action="CREATE",
entity_type="activity",
entity_id=act.id,
new_values={"vvt_id": act.vvt_id, "name": act.name, "status": act.status},
)
self.db.commit()
self.db.refresh(act)
return _activity_to_response(act)
def _activity_or_raise(self, tid: str, activity_id: str) -> VVTActivityDB:
act = (
self.db.query(VVTActivityDB)
.filter(
VVTActivityDB.id == activity_id,
VVTActivityDB.tenant_id == tid,
)
.first()
)
if not act:
raise NotFoundError(f"Activity {activity_id} not found")
return act
def get_activity(self, tid: str, activity_id: str) -> VVTActivityResponse:
return _activity_to_response(self._activity_or_raise(tid, activity_id))
def update_activity(
self, tid: str, activity_id: str, request: VVTActivityUpdate
) -> VVTActivityResponse:
act = self._activity_or_raise(tid, activity_id)
old_values: dict[str, Any] = {"name": act.name, "status": act.status}
updates = request.dict(exclude_none=True)
for field, value in updates.items():
setattr(act, field, value)
act.updated_at = datetime.now(timezone.utc)
_log_audit(
self.db,
tenant_id=tid,
action="UPDATE",
entity_type="activity",
entity_id=act.id,
old_values=old_values,
new_values=updates,
)
self.db.commit()
self.db.refresh(act)
return _activity_to_response(act)
def delete_activity(self, tid: str, activity_id: str) -> dict[str, Any]:
act = self._activity_or_raise(tid, activity_id)
_log_audit(
self.db,
tenant_id=tid,
action="DELETE",
entity_type="activity",
entity_id=act.id,
old_values={"vvt_id": act.vvt_id, "name": act.name},
)
self.db.delete(act)
self.db.commit()
return {"success": True, "message": f"Activity {activity_id} deleted"}
# ------------------------------------------------------------------
# Audit log
# ------------------------------------------------------------------
def audit_log(self, tid: str, limit: int, offset: int) -> list[VVTAuditLogEntry]:
entries = (
self.db.query(VVTAuditLogDB)
.filter(VVTAuditLogDB.tenant_id == tid)
.order_by(VVTAuditLogDB.created_at.desc())
.offset(offset)
.limit(limit)
.all()
)
return [
VVTAuditLogEntry(
id=str(e.id),
action=e.action,
entity_type=e.entity_type,
entity_id=str(e.entity_id) if e.entity_id else None,
changed_by=e.changed_by,
old_values=e.old_values,
new_values=e.new_values,
created_at=e.created_at,
)
for e in entries
]
# ------------------------------------------------------------------
# Export + stats
# ------------------------------------------------------------------
def export(self, tid: str, fmt: str) -> Any:
org = (
self.db.query(VVTOrganizationDB)
.filter(VVTOrganizationDB.tenant_id == tid)
.order_by(VVTOrganizationDB.created_at)
.first()
)
activities = (
self.db.query(VVTActivityDB)
.filter(VVTActivityDB.tenant_id == tid)
.order_by(VVTActivityDB.created_at)
.all()
)
_log_audit(
self.db,
tenant_id=tid,
action="EXPORT",
entity_type="all_activities",
new_values={"count": len(activities), "format": fmt},
)
self.db.commit()
if fmt == "csv":
return _export_csv(activities)
return {
"exported_at": datetime.now(timezone.utc).isoformat(),
"organization": {
"name": org.organization_name if org else "",
"dpo_name": org.dpo_name if org else "",
"dpo_contact": org.dpo_contact if org else "",
"vvt_version": org.vvt_version if org else "1.0",
} if org else None,
"activities": [
{
"id": str(a.id),
"vvt_id": a.vvt_id,
"name": a.name,
"description": a.description,
"status": a.status,
"purposes": a.purposes,
"legal_bases": a.legal_bases,
"data_subject_categories": a.data_subject_categories,
"personal_data_categories": a.personal_data_categories,
"recipient_categories": a.recipient_categories,
"third_country_transfers": a.third_country_transfers,
"retention_period": a.retention_period,
"dpia_required": a.dpia_required,
"protection_level": a.protection_level,
"business_function": a.business_function,
"responsible": a.responsible,
"created_by": a.created_by,
"dsfa_id": str(a.dsfa_id) if a.dsfa_id else None,
"last_reviewed_at": a.last_reviewed_at.isoformat() if a.last_reviewed_at else None,
"next_review_at": a.next_review_at.isoformat() if a.next_review_at else None,
"created_at": a.created_at.isoformat(),
"updated_at": a.updated_at.isoformat() if a.updated_at else None,
}
for a in activities
],
}
def stats(self, tid: str) -> VVTStatsResponse:
activities = (
self.db.query(VVTActivityDB).filter(VVTActivityDB.tenant_id == tid).all()
)
by_status: dict[str, int] = {}
by_bf: dict[str, int] = {}
now = datetime.now(timezone.utc)
overdue_count = 0
for a in activities:
st: str = str(a.status or "DRAFT")
bf: str = str(a.business_function or "unknown")
by_status[st] = by_status.get(st, 0) + 1
by_bf[bf] = by_bf.get(bf, 0) + 1
if a.next_review_at and a.next_review_at < now:
overdue_count += 1
return VVTStatsResponse(
total=len(activities),
by_status=by_status,
by_business_function=by_bf,
dpia_required_count=sum(1 for a in activities if a.dpia_required),
third_country_count=sum(1 for a in activities if a.third_country_transfers),
draft_count=by_status.get("DRAFT", 0),
approved_count=by_status.get("APPROVED", 0),
overdue_review_count=overdue_count,
)
# ------------------------------------------------------------------
# Versioning (delegates to shared versioning_utils)
# ------------------------------------------------------------------
def list_versions(self, tid: str, activity_id: str) -> Any:
from compliance.api.versioning_utils import list_versions
return list_versions(self.db, "vvt_activity", activity_id, tid)
def get_version(self, tid: str, activity_id: str, version_number: int) -> Any:
from compliance.api.versioning_utils import get_version
v = get_version(self.db, "vvt_activity", activity_id, version_number, tid)
if not v:
raise NotFoundError(f"Version {version_number} not found")
return v