Files
breakpilot-compliance/backend-compliance/compliance/api/dsfa_routes.py
Benjamin Admin 789c215e5e
All checks were successful
CI / go-lint (push) Has been skipped
CI / python-lint (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / test-go-ai-compliance (push) Successful in 36s
CI / test-python-backend-compliance (push) Successful in 37s
CI / test-python-document-crawler (push) Successful in 23s
CI / test-python-dsms-gateway (push) Successful in 22s
feat: DSFA vollständiges DB-Schema + PDF-Ingest + Tests
- Migration 030: alle fehlenden Spalten für compliance_dsfas (Sections 0-7)
  flat fields: processing_description, legal_basis, dpo_*, authority_*, ...
  JSONB arrays: risks, mitigations, wp248_criteria_met, ai_trigger_ids, ...
  JSONB objects: section_progress, threshold_analysis, review_schedule, metadata
- dsfa_routes.py: DSFACreate/DSFAUpdate erweitert (60+ neue Optional-Felder)
  _dsfa_to_response: alle neuen Felder mit safe _get() Helper
  PUT-Handler: vollständige JSONB_FIELDS-Liste (22 Felder)
- Tests: 101 (+49) Tests — TestAIUseCaseModules + TestDSFAFullSchema
- ingest-dsfa-bundesland.sh: KNOWN_PDF_URLS (15 direkte URLs), download_pdfs()
  find_pdf_for_state() Helper, PDF-first mit Text-Fallback in ingest_all()

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-03-05 10:03:09 +01:00

630 lines
23 KiB
Python

"""
FastAPI routes for DSFA — Datenschutz-Folgenabschaetzung (Art. 35 DSGVO).
Endpoints:
GET /v1/dsfa — Liste (tenant_id + status-filter + skip/limit)
POST /v1/dsfa — Neu erstellen → 201
GET /v1/dsfa/stats — Zähler nach Status
GET /v1/dsfa/audit-log — Audit-Log
GET /v1/dsfa/{id} — Detail
PUT /v1/dsfa/{id} — Update
DELETE /v1/dsfa/{id} — Löschen (Art. 17 DSGVO)
PATCH /v1/dsfa/{id}/status — Schnell-Statuswechsel
"""
import logging
from datetime import datetime
from typing import Optional, List
from fastapi import APIRouter, Depends, HTTPException, Query
from pydantic import BaseModel
from sqlalchemy import text
from sqlalchemy.orm import Session
from classroom_engine.database import get_db
logger = logging.getLogger(__name__)
router = APIRouter(prefix="/dsfa", tags=["compliance-dsfa"])
DEFAULT_TENANT_ID = "default"
VALID_STATUSES = {"draft", "in-review", "approved", "needs-update"}
VALID_RISK_LEVELS = {"low", "medium", "high", "critical"}
# =============================================================================
# Pydantic Schemas
# =============================================================================
class DSFACreate(BaseModel):
title: str
description: str = ""
status: str = "draft"
risk_level: str = "low"
processing_activity: str = ""
data_categories: List[str] = []
recipients: List[str] = []
measures: List[str] = []
created_by: str = "system"
# Section 1
processing_description: Optional[str] = None
processing_purpose: Optional[str] = None
legal_basis: Optional[str] = None
legal_basis_details: Optional[str] = None
# Section 2
necessity_assessment: Optional[str] = None
proportionality_assessment: Optional[str] = None
data_minimization: Optional[str] = None
alternatives_considered: Optional[str] = None
retention_justification: Optional[str] = None
# Section 3
involves_ai: Optional[bool] = None
overall_risk_level: Optional[str] = None
risk_score: Optional[int] = None
# Section 6
dpo_consulted: Optional[bool] = None
dpo_name: Optional[str] = None
dpo_opinion: Optional[str] = None
dpo_approved: Optional[bool] = None
authority_consulted: Optional[bool] = None
authority_reference: Optional[str] = None
authority_decision: Optional[str] = None
# Metadata
version: Optional[int] = None
conclusion: Optional[str] = None
federal_state: Optional[str] = None
authority_resource_id: Optional[str] = None
submitted_by: Optional[str] = None
# JSONB Arrays
data_subjects: Optional[List[str]] = None
affected_rights: Optional[List[str]] = None
triggered_rule_codes: Optional[List[str]] = None
ai_trigger_ids: Optional[List[str]] = None
wp248_criteria_met: Optional[List[str]] = None
art35_abs3_triggered: Optional[List[str]] = None
tom_references: Optional[List[str]] = None
risks: Optional[List[dict]] = None
mitigations: Optional[List[dict]] = None
stakeholder_consultations: Optional[List[dict]] = None
review_triggers: Optional[List[dict]] = None
review_comments: Optional[List[dict]] = None
ai_use_case_modules: Optional[List[dict]] = None
section_8_complete: Optional[bool] = None
# JSONB Objects
threshold_analysis: Optional[dict] = None
consultation_requirement: Optional[dict] = None
review_schedule: Optional[dict] = None
section_progress: Optional[dict] = None
metadata: Optional[dict] = None
class DSFAUpdate(BaseModel):
title: Optional[str] = None
description: Optional[str] = None
status: Optional[str] = None
risk_level: Optional[str] = None
processing_activity: Optional[str] = None
data_categories: Optional[List[str]] = None
recipients: Optional[List[str]] = None
measures: Optional[List[str]] = None
approved_by: Optional[str] = None
# Section 1
processing_description: Optional[str] = None
processing_purpose: Optional[str] = None
legal_basis: Optional[str] = None
legal_basis_details: Optional[str] = None
# Section 2
necessity_assessment: Optional[str] = None
proportionality_assessment: Optional[str] = None
data_minimization: Optional[str] = None
alternatives_considered: Optional[str] = None
retention_justification: Optional[str] = None
# Section 3
involves_ai: Optional[bool] = None
overall_risk_level: Optional[str] = None
risk_score: Optional[int] = None
# Section 6
dpo_consulted: Optional[bool] = None
dpo_name: Optional[str] = None
dpo_opinion: Optional[str] = None
dpo_approved: Optional[bool] = None
authority_consulted: Optional[bool] = None
authority_reference: Optional[str] = None
authority_decision: Optional[str] = None
# Metadata
version: Optional[int] = None
conclusion: Optional[str] = None
federal_state: Optional[str] = None
authority_resource_id: Optional[str] = None
submitted_by: Optional[str] = None
# JSONB Arrays
data_subjects: Optional[List[str]] = None
affected_rights: Optional[List[str]] = None
triggered_rule_codes: Optional[List[str]] = None
ai_trigger_ids: Optional[List[str]] = None
wp248_criteria_met: Optional[List[str]] = None
art35_abs3_triggered: Optional[List[str]] = None
tom_references: Optional[List[str]] = None
risks: Optional[List[dict]] = None
mitigations: Optional[List[dict]] = None
stakeholder_consultations: Optional[List[dict]] = None
review_triggers: Optional[List[dict]] = None
review_comments: Optional[List[dict]] = None
ai_use_case_modules: Optional[List[dict]] = None
section_8_complete: Optional[bool] = None
# JSONB Objects
threshold_analysis: Optional[dict] = None
consultation_requirement: Optional[dict] = None
review_schedule: Optional[dict] = None
section_progress: Optional[dict] = None
metadata: Optional[dict] = None
class DSFAStatusUpdate(BaseModel):
status: str
approved_by: Optional[str] = None
# =============================================================================
# Helpers
# =============================================================================
def _get_tenant_id(tenant_id: Optional[str]) -> str:
return tenant_id or DEFAULT_TENANT_ID
def _dsfa_to_response(row) -> dict:
"""Convert a DB row to a JSON-serializable dict."""
import json
def _parse_arr(val):
"""Parse a JSONB array field → list."""
if val is None:
return []
if isinstance(val, list):
return val
if isinstance(val, str):
try:
parsed = json.loads(val)
return parsed if isinstance(parsed, list) else []
except Exception:
return []
return val
def _parse_obj(val):
"""Parse a JSONB object field → dict."""
if val is None:
return {}
if isinstance(val, dict):
return val
if isinstance(val, str):
try:
parsed = json.loads(val)
return parsed if isinstance(parsed, dict) else {}
except Exception:
return {}
return val
def _ts(val):
"""Timestamp → ISO string or None."""
return val.isoformat() if val else None
def _get(key, default=None):
"""Safe row access — returns default if key missing (handles old rows)."""
try:
v = row[key]
return default if v is None and default is not None else v
except (KeyError, IndexError):
return default
return {
# Core fields (always present since Migration 024)
"id": str(row["id"]),
"tenant_id": row["tenant_id"],
"title": row["title"],
"description": row["description"] or "",
"status": row["status"] or "draft",
"risk_level": row["risk_level"] or "low",
"processing_activity": row["processing_activity"] or "",
"data_categories": _parse_arr(row["data_categories"]),
"recipients": _parse_arr(row["recipients"]),
"measures": _parse_arr(row["measures"]),
"approved_by": row["approved_by"],
"approved_at": _ts(row["approved_at"]),
"created_by": row["created_by"] or "system",
"created_at": _ts(row["created_at"]),
"updated_at": _ts(row["updated_at"]),
# Section 1 (Migration 030)
"processing_description": _get("processing_description"),
"processing_purpose": _get("processing_purpose"),
"legal_basis": _get("legal_basis"),
"legal_basis_details": _get("legal_basis_details"),
# Section 2
"necessity_assessment": _get("necessity_assessment"),
"proportionality_assessment": _get("proportionality_assessment"),
"data_minimization": _get("data_minimization"),
"alternatives_considered": _get("alternatives_considered"),
"retention_justification": _get("retention_justification"),
# Section 3
"involves_ai": _get("involves_ai", False),
"overall_risk_level": _get("overall_risk_level"),
"risk_score": _get("risk_score", 0),
# Section 6
"dpo_consulted": _get("dpo_consulted", False),
"dpo_consulted_at": _ts(_get("dpo_consulted_at")),
"dpo_name": _get("dpo_name"),
"dpo_opinion": _get("dpo_opinion"),
"dpo_approved": _get("dpo_approved"),
"authority_consulted": _get("authority_consulted", False),
"authority_consulted_at": _ts(_get("authority_consulted_at")),
"authority_reference": _get("authority_reference"),
"authority_decision": _get("authority_decision"),
# Metadata / Versioning
"version": _get("version", 1),
"previous_version_id": str(_get("previous_version_id")) if _get("previous_version_id") else None,
"conclusion": _get("conclusion"),
"federal_state": _get("federal_state"),
"authority_resource_id": _get("authority_resource_id"),
"submitted_for_review_at": _ts(_get("submitted_for_review_at")),
"submitted_by": _get("submitted_by"),
# JSONB Arrays
"data_subjects": _parse_arr(_get("data_subjects")),
"affected_rights": _parse_arr(_get("affected_rights")),
"triggered_rule_codes": _parse_arr(_get("triggered_rule_codes")),
"ai_trigger_ids": _parse_arr(_get("ai_trigger_ids")),
"wp248_criteria_met": _parse_arr(_get("wp248_criteria_met")),
"art35_abs3_triggered": _parse_arr(_get("art35_abs3_triggered")),
"tom_references": _parse_arr(_get("tom_references")),
"risks": _parse_arr(_get("risks")),
"mitigations": _parse_arr(_get("mitigations")),
"stakeholder_consultations": _parse_arr(_get("stakeholder_consultations")),
"review_triggers": _parse_arr(_get("review_triggers")),
"review_comments": _parse_arr(_get("review_comments")),
# Section 8 / AI (Migration 028)
"ai_use_case_modules": _parse_arr(_get("ai_use_case_modules")),
"section_8_complete": _get("section_8_complete", False),
# JSONB Objects
"threshold_analysis": _parse_obj(_get("threshold_analysis")),
"consultation_requirement": _parse_obj(_get("consultation_requirement")),
"review_schedule": _parse_obj(_get("review_schedule")),
"section_progress": _parse_obj(_get("section_progress")),
"metadata": _parse_obj(_get("metadata")),
}
def _log_audit(
db: Session,
tenant_id: str,
dsfa_id,
action: str,
changed_by: str = "system",
old_values=None,
new_values=None,
):
import json
db.execute(
text("""
INSERT INTO compliance_dsfa_audit_log
(tenant_id, dsfa_id, action, changed_by, old_values, new_values)
VALUES
(:tenant_id, :dsfa_id, :action, :changed_by,
CAST(:old_values AS jsonb), CAST(:new_values AS jsonb))
"""),
{
"tenant_id": tenant_id,
"dsfa_id": str(dsfa_id) if dsfa_id else None,
"action": action,
"changed_by": changed_by,
"old_values": json.dumps(old_values) if old_values else None,
"new_values": json.dumps(new_values) if new_values else None,
},
)
# =============================================================================
# Stats (must be before /{id} to avoid route conflict)
# =============================================================================
@router.get("/stats")
async def get_stats(
tenant_id: Optional[str] = Query(None),
db: Session = Depends(get_db),
):
"""Zähler nach Status und Risiko-Level."""
tid = _get_tenant_id(tenant_id)
rows = db.execute(
text("SELECT status, risk_level FROM compliance_dsfas WHERE tenant_id = :tid"),
{"tid": tid},
).fetchall()
by_status: dict = {}
by_risk: dict = {}
for row in rows:
s = row["status"] or "draft"
r = row["risk_level"] or "low"
by_status[s] = by_status.get(s, 0) + 1
by_risk[r] = by_risk.get(r, 0) + 1
return {
"total": len(rows),
"by_status": by_status,
"by_risk_level": by_risk,
"draft_count": by_status.get("draft", 0),
"in_review_count": by_status.get("in-review", 0),
"approved_count": by_status.get("approved", 0),
"needs_update_count": by_status.get("needs-update", 0),
}
# =============================================================================
# Audit Log (must be before /{id} to avoid route conflict)
# =============================================================================
@router.get("/audit-log")
async def get_audit_log(
tenant_id: Optional[str] = Query(None),
limit: int = Query(50, ge=1, le=500),
offset: int = Query(0, ge=0),
db: Session = Depends(get_db),
):
"""DSFA Audit-Trail."""
tid = _get_tenant_id(tenant_id)
rows = db.execute(
text("""
SELECT id, tenant_id, dsfa_id, action, changed_by, old_values, new_values, created_at
FROM compliance_dsfa_audit_log
WHERE tenant_id = :tid
ORDER BY created_at DESC
LIMIT :limit OFFSET :offset
"""),
{"tid": tid, "limit": limit, "offset": offset},
).fetchall()
return [
{
"id": str(r["id"]),
"tenant_id": r["tenant_id"],
"dsfa_id": str(r["dsfa_id"]) if r["dsfa_id"] else None,
"action": r["action"],
"changed_by": r["changed_by"],
"old_values": r["old_values"],
"new_values": r["new_values"],
"created_at": r["created_at"].isoformat() if r["created_at"] else None,
}
for r in rows
]
# =============================================================================
# List + Create
# =============================================================================
@router.get("")
async def list_dsfas(
tenant_id: Optional[str] = Query(None),
status: Optional[str] = Query(None),
risk_level: Optional[str] = Query(None),
skip: int = Query(0, ge=0),
limit: int = Query(100, ge=1, le=500),
db: Session = Depends(get_db),
):
"""Liste aller DSFAs für einen Tenant."""
tid = _get_tenant_id(tenant_id)
sql = "SELECT * FROM compliance_dsfas WHERE tenant_id = :tid"
params: dict = {"tid": tid}
if status:
sql += " AND status = :status"
params["status"] = status
if risk_level:
sql += " AND risk_level = :risk_level"
params["risk_level"] = risk_level
sql += " ORDER BY created_at DESC LIMIT :limit OFFSET :skip"
params["limit"] = limit
params["skip"] = skip
rows = db.execute(text(sql), params).fetchall()
return [_dsfa_to_response(r) for r in rows]
@router.post("", status_code=201)
async def create_dsfa(
request: DSFACreate,
tenant_id: Optional[str] = Query(None),
db: Session = Depends(get_db),
):
"""Neue DSFA erstellen."""
import json
if request.status not in VALID_STATUSES:
raise HTTPException(status_code=422, detail=f"Ungültiger Status: {request.status}")
if request.risk_level not in VALID_RISK_LEVELS:
raise HTTPException(status_code=422, detail=f"Ungültiges Risiko-Level: {request.risk_level}")
tid = _get_tenant_id(tenant_id)
row = db.execute(
text("""
INSERT INTO compliance_dsfas
(tenant_id, title, description, status, risk_level,
processing_activity, data_categories, recipients, measures, created_by)
VALUES
(:tenant_id, :title, :description, :status, :risk_level,
:processing_activity,
CAST(:data_categories AS jsonb),
CAST(:recipients AS jsonb),
CAST(:measures AS jsonb),
:created_by)
RETURNING *
"""),
{
"tenant_id": tid,
"title": request.title,
"description": request.description,
"status": request.status,
"risk_level": request.risk_level,
"processing_activity": request.processing_activity,
"data_categories": json.dumps(request.data_categories),
"recipients": json.dumps(request.recipients),
"measures": json.dumps(request.measures),
"created_by": request.created_by,
},
).fetchone()
db.flush()
_log_audit(
db, tid, row["id"], "CREATE", request.created_by,
new_values={"title": request.title, "status": request.status},
)
db.commit()
return _dsfa_to_response(row)
# =============================================================================
# Single Item (GET / PUT / DELETE / PATCH status)
# =============================================================================
@router.get("/{dsfa_id}")
async def get_dsfa(
dsfa_id: str,
tenant_id: Optional[str] = Query(None),
db: Session = Depends(get_db),
):
"""Einzelne DSFA abrufen."""
tid = _get_tenant_id(tenant_id)
row = db.execute(
text("SELECT * FROM compliance_dsfas WHERE id = :id AND tenant_id = :tid"),
{"id": dsfa_id, "tid": tid},
).fetchone()
if not row:
raise HTTPException(status_code=404, detail=f"DSFA {dsfa_id} nicht gefunden")
return _dsfa_to_response(row)
@router.put("/{dsfa_id}")
async def update_dsfa(
dsfa_id: str,
request: DSFAUpdate,
tenant_id: Optional[str] = Query(None),
db: Session = Depends(get_db),
):
"""DSFA aktualisieren."""
import json
tid = _get_tenant_id(tenant_id)
existing = db.execute(
text("SELECT * FROM compliance_dsfas WHERE id = :id AND tenant_id = :tid"),
{"id": dsfa_id, "tid": tid},
).fetchone()
if not existing:
raise HTTPException(status_code=404, detail=f"DSFA {dsfa_id} nicht gefunden")
updates = request.model_dump(exclude_none=True)
if "status" in updates and updates["status"] not in VALID_STATUSES:
raise HTTPException(status_code=422, detail=f"Ungültiger Status: {updates['status']}")
if "risk_level" in updates and updates["risk_level"] not in VALID_RISK_LEVELS:
raise HTTPException(status_code=422, detail=f"Ungültiges Risiko-Level: {updates['risk_level']}")
if not updates:
return _dsfa_to_response(existing)
set_clauses = []
params: dict = {"id": dsfa_id, "tid": tid}
jsonb_fields = {
"data_categories", "recipients", "measures",
"data_subjects", "affected_rights", "triggered_rule_codes",
"ai_trigger_ids", "wp248_criteria_met", "art35_abs3_triggered",
"tom_references", "risks", "mitigations", "stakeholder_consultations",
"review_triggers", "review_comments", "ai_use_case_modules",
"threshold_analysis", "consultation_requirement", "review_schedule",
"section_progress", "metadata",
}
for field, value in updates.items():
if field in jsonb_fields:
set_clauses.append(f"{field} = CAST(:{field} AS jsonb)")
params[field] = json.dumps(value)
else:
set_clauses.append(f"{field} = :{field}")
params[field] = value
set_clauses.append("updated_at = NOW()")
sql = f"UPDATE compliance_dsfas SET {', '.join(set_clauses)} WHERE id = :id AND tenant_id = :tid RETURNING *"
old_values = {"title": existing["title"], "status": existing["status"]}
row = db.execute(text(sql), params).fetchone()
_log_audit(db, tid, dsfa_id, "UPDATE", new_values=updates, old_values=old_values)
db.commit()
return _dsfa_to_response(row)
@router.delete("/{dsfa_id}")
async def delete_dsfa(
dsfa_id: str,
tenant_id: Optional[str] = Query(None),
db: Session = Depends(get_db),
):
"""DSFA löschen (Art. 17 DSGVO)."""
tid = _get_tenant_id(tenant_id)
existing = db.execute(
text("SELECT id, title FROM compliance_dsfas WHERE id = :id AND tenant_id = :tid"),
{"id": dsfa_id, "tid": tid},
).fetchone()
if not existing:
raise HTTPException(status_code=404, detail=f"DSFA {dsfa_id} nicht gefunden")
_log_audit(db, tid, dsfa_id, "DELETE", old_values={"title": existing["title"]})
db.execute(
text("DELETE FROM compliance_dsfas WHERE id = :id AND tenant_id = :tid"),
{"id": dsfa_id, "tid": tid},
)
db.commit()
return {"success": True, "message": f"DSFA {dsfa_id} gelöscht"}
@router.patch("/{dsfa_id}/status")
async def update_dsfa_status(
dsfa_id: str,
request: DSFAStatusUpdate,
tenant_id: Optional[str] = Query(None),
db: Session = Depends(get_db),
):
"""Schnell-Statuswechsel."""
if request.status not in VALID_STATUSES:
raise HTTPException(status_code=422, detail=f"Ungültiger Status: {request.status}")
tid = _get_tenant_id(tenant_id)
existing = db.execute(
text("SELECT id, status FROM compliance_dsfas WHERE id = :id AND tenant_id = :tid"),
{"id": dsfa_id, "tid": tid},
).fetchone()
if not existing:
raise HTTPException(status_code=404, detail=f"DSFA {dsfa_id} nicht gefunden")
params: dict = {
"id": dsfa_id, "tid": tid,
"status": request.status,
"approved_at": datetime.utcnow() if request.status == "approved" else None,
"approved_by": request.approved_by,
}
row = db.execute(
text("""
UPDATE compliance_dsfas
SET status = :status, approved_at = :approved_at, approved_by = :approved_by, updated_at = NOW()
WHERE id = :id AND tenant_id = :tid
RETURNING *
"""),
params,
).fetchone()
_log_audit(
db, tid, dsfa_id, "STATUS_CHANGE",
old_values={"status": existing["status"]},
new_values={"status": request.status},
)
db.commit()
return _dsfa_to_response(row)