feat: DSFA vollständiges DB-Schema + PDF-Ingest + Tests
All checks were successful
CI / go-lint (push) Has been skipped
CI / python-lint (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / test-go-ai-compliance (push) Successful in 36s
CI / test-python-backend-compliance (push) Successful in 37s
CI / test-python-document-crawler (push) Successful in 23s
CI / test-python-dsms-gateway (push) Successful in 22s
All checks were successful
CI / go-lint (push) Has been skipped
CI / python-lint (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / test-go-ai-compliance (push) Successful in 36s
CI / test-python-backend-compliance (push) Successful in 37s
CI / test-python-document-crawler (push) Successful in 23s
CI / test-python-dsms-gateway (push) Successful in 22s
- Migration 030: alle fehlenden Spalten für compliance_dsfas (Sections 0-7) flat fields: processing_description, legal_basis, dpo_*, authority_*, ... JSONB arrays: risks, mitigations, wp248_criteria_met, ai_trigger_ids, ... JSONB objects: section_progress, threshold_analysis, review_schedule, metadata - dsfa_routes.py: DSFACreate/DSFAUpdate erweitert (60+ neue Optional-Felder) _dsfa_to_response: alle neuen Felder mit safe _get() Helper PUT-Handler: vollständige JSONB_FIELDS-Liste (22 Felder) - Tests: 101 (+49) Tests — TestAIUseCaseModules + TestDSFAFullSchema - ingest-dsfa-bundesland.sh: KNOWN_PDF_URLS (15 direkte URLs), download_pdfs() find_pdf_for_state() Helper, PDF-first mit Text-Fallback in ingest_all() Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -46,6 +46,56 @@ class DSFACreate(BaseModel):
|
||||
recipients: List[str] = []
|
||||
measures: List[str] = []
|
||||
created_by: str = "system"
|
||||
# Section 1
|
||||
processing_description: Optional[str] = None
|
||||
processing_purpose: Optional[str] = None
|
||||
legal_basis: Optional[str] = None
|
||||
legal_basis_details: Optional[str] = None
|
||||
# Section 2
|
||||
necessity_assessment: Optional[str] = None
|
||||
proportionality_assessment: Optional[str] = None
|
||||
data_minimization: Optional[str] = None
|
||||
alternatives_considered: Optional[str] = None
|
||||
retention_justification: Optional[str] = None
|
||||
# Section 3
|
||||
involves_ai: Optional[bool] = None
|
||||
overall_risk_level: Optional[str] = None
|
||||
risk_score: Optional[int] = None
|
||||
# Section 6
|
||||
dpo_consulted: Optional[bool] = None
|
||||
dpo_name: Optional[str] = None
|
||||
dpo_opinion: Optional[str] = None
|
||||
dpo_approved: Optional[bool] = None
|
||||
authority_consulted: Optional[bool] = None
|
||||
authority_reference: Optional[str] = None
|
||||
authority_decision: Optional[str] = None
|
||||
# Metadata
|
||||
version: Optional[int] = None
|
||||
conclusion: Optional[str] = None
|
||||
federal_state: Optional[str] = None
|
||||
authority_resource_id: Optional[str] = None
|
||||
submitted_by: Optional[str] = None
|
||||
# JSONB Arrays
|
||||
data_subjects: Optional[List[str]] = None
|
||||
affected_rights: Optional[List[str]] = None
|
||||
triggered_rule_codes: Optional[List[str]] = None
|
||||
ai_trigger_ids: Optional[List[str]] = None
|
||||
wp248_criteria_met: Optional[List[str]] = None
|
||||
art35_abs3_triggered: Optional[List[str]] = None
|
||||
tom_references: Optional[List[str]] = None
|
||||
risks: Optional[List[dict]] = None
|
||||
mitigations: Optional[List[dict]] = None
|
||||
stakeholder_consultations: Optional[List[dict]] = None
|
||||
review_triggers: Optional[List[dict]] = None
|
||||
review_comments: Optional[List[dict]] = None
|
||||
ai_use_case_modules: Optional[List[dict]] = None
|
||||
section_8_complete: Optional[bool] = None
|
||||
# JSONB Objects
|
||||
threshold_analysis: Optional[dict] = None
|
||||
consultation_requirement: Optional[dict] = None
|
||||
review_schedule: Optional[dict] = None
|
||||
section_progress: Optional[dict] = None
|
||||
metadata: Optional[dict] = None
|
||||
|
||||
|
||||
class DSFAUpdate(BaseModel):
|
||||
@@ -58,6 +108,56 @@ class DSFAUpdate(BaseModel):
|
||||
recipients: Optional[List[str]] = None
|
||||
measures: Optional[List[str]] = None
|
||||
approved_by: Optional[str] = None
|
||||
# Section 1
|
||||
processing_description: Optional[str] = None
|
||||
processing_purpose: Optional[str] = None
|
||||
legal_basis: Optional[str] = None
|
||||
legal_basis_details: Optional[str] = None
|
||||
# Section 2
|
||||
necessity_assessment: Optional[str] = None
|
||||
proportionality_assessment: Optional[str] = None
|
||||
data_minimization: Optional[str] = None
|
||||
alternatives_considered: Optional[str] = None
|
||||
retention_justification: Optional[str] = None
|
||||
# Section 3
|
||||
involves_ai: Optional[bool] = None
|
||||
overall_risk_level: Optional[str] = None
|
||||
risk_score: Optional[int] = None
|
||||
# Section 6
|
||||
dpo_consulted: Optional[bool] = None
|
||||
dpo_name: Optional[str] = None
|
||||
dpo_opinion: Optional[str] = None
|
||||
dpo_approved: Optional[bool] = None
|
||||
authority_consulted: Optional[bool] = None
|
||||
authority_reference: Optional[str] = None
|
||||
authority_decision: Optional[str] = None
|
||||
# Metadata
|
||||
version: Optional[int] = None
|
||||
conclusion: Optional[str] = None
|
||||
federal_state: Optional[str] = None
|
||||
authority_resource_id: Optional[str] = None
|
||||
submitted_by: Optional[str] = None
|
||||
# JSONB Arrays
|
||||
data_subjects: Optional[List[str]] = None
|
||||
affected_rights: Optional[List[str]] = None
|
||||
triggered_rule_codes: Optional[List[str]] = None
|
||||
ai_trigger_ids: Optional[List[str]] = None
|
||||
wp248_criteria_met: Optional[List[str]] = None
|
||||
art35_abs3_triggered: Optional[List[str]] = None
|
||||
tom_references: Optional[List[str]] = None
|
||||
risks: Optional[List[dict]] = None
|
||||
mitigations: Optional[List[dict]] = None
|
||||
stakeholder_consultations: Optional[List[dict]] = None
|
||||
review_triggers: Optional[List[dict]] = None
|
||||
review_comments: Optional[List[dict]] = None
|
||||
ai_use_case_modules: Optional[List[dict]] = None
|
||||
section_8_complete: Optional[bool] = None
|
||||
# JSONB Objects
|
||||
threshold_analysis: Optional[dict] = None
|
||||
consultation_requirement: Optional[dict] = None
|
||||
review_schedule: Optional[dict] = None
|
||||
section_progress: Optional[dict] = None
|
||||
metadata: Optional[dict] = None
|
||||
|
||||
|
||||
class DSFAStatusUpdate(BaseModel):
|
||||
@@ -77,19 +177,48 @@ def _dsfa_to_response(row) -> dict:
|
||||
"""Convert a DB row to a JSON-serializable dict."""
|
||||
import json
|
||||
|
||||
def parse_json(val):
|
||||
def _parse_arr(val):
|
||||
"""Parse a JSONB array field → list."""
|
||||
if val is None:
|
||||
return []
|
||||
if isinstance(val, list):
|
||||
return val
|
||||
if isinstance(val, str):
|
||||
try:
|
||||
return json.loads(val)
|
||||
parsed = json.loads(val)
|
||||
return parsed if isinstance(parsed, list) else []
|
||||
except Exception:
|
||||
return []
|
||||
return val
|
||||
|
||||
def _parse_obj(val):
|
||||
"""Parse a JSONB object field → dict."""
|
||||
if val is None:
|
||||
return {}
|
||||
if isinstance(val, dict):
|
||||
return val
|
||||
if isinstance(val, str):
|
||||
try:
|
||||
parsed = json.loads(val)
|
||||
return parsed if isinstance(parsed, dict) else {}
|
||||
except Exception:
|
||||
return {}
|
||||
return val
|
||||
|
||||
def _ts(val):
|
||||
"""Timestamp → ISO string or None."""
|
||||
return val.isoformat() if val else None
|
||||
|
||||
def _get(key, default=None):
|
||||
"""Safe row access — returns default if key missing (handles old rows)."""
|
||||
try:
|
||||
v = row[key]
|
||||
return default if v is None and default is not None else v
|
||||
except (KeyError, IndexError):
|
||||
return default
|
||||
|
||||
return {
|
||||
# Core fields (always present since Migration 024)
|
||||
"id": str(row["id"]),
|
||||
"tenant_id": row["tenant_id"],
|
||||
"title": row["title"],
|
||||
@@ -97,14 +226,69 @@ def _dsfa_to_response(row) -> dict:
|
||||
"status": row["status"] or "draft",
|
||||
"risk_level": row["risk_level"] or "low",
|
||||
"processing_activity": row["processing_activity"] or "",
|
||||
"data_categories": parse_json(row["data_categories"]),
|
||||
"recipients": parse_json(row["recipients"]),
|
||||
"measures": parse_json(row["measures"]),
|
||||
"data_categories": _parse_arr(row["data_categories"]),
|
||||
"recipients": _parse_arr(row["recipients"]),
|
||||
"measures": _parse_arr(row["measures"]),
|
||||
"approved_by": row["approved_by"],
|
||||
"approved_at": row["approved_at"].isoformat() if row["approved_at"] else None,
|
||||
"approved_at": _ts(row["approved_at"]),
|
||||
"created_by": row["created_by"] or "system",
|
||||
"created_at": row["created_at"].isoformat() if row["created_at"] else None,
|
||||
"updated_at": row["updated_at"].isoformat() if row["updated_at"] else None,
|
||||
"created_at": _ts(row["created_at"]),
|
||||
"updated_at": _ts(row["updated_at"]),
|
||||
# Section 1 (Migration 030)
|
||||
"processing_description": _get("processing_description"),
|
||||
"processing_purpose": _get("processing_purpose"),
|
||||
"legal_basis": _get("legal_basis"),
|
||||
"legal_basis_details": _get("legal_basis_details"),
|
||||
# Section 2
|
||||
"necessity_assessment": _get("necessity_assessment"),
|
||||
"proportionality_assessment": _get("proportionality_assessment"),
|
||||
"data_minimization": _get("data_minimization"),
|
||||
"alternatives_considered": _get("alternatives_considered"),
|
||||
"retention_justification": _get("retention_justification"),
|
||||
# Section 3
|
||||
"involves_ai": _get("involves_ai", False),
|
||||
"overall_risk_level": _get("overall_risk_level"),
|
||||
"risk_score": _get("risk_score", 0),
|
||||
# Section 6
|
||||
"dpo_consulted": _get("dpo_consulted", False),
|
||||
"dpo_consulted_at": _ts(_get("dpo_consulted_at")),
|
||||
"dpo_name": _get("dpo_name"),
|
||||
"dpo_opinion": _get("dpo_opinion"),
|
||||
"dpo_approved": _get("dpo_approved"),
|
||||
"authority_consulted": _get("authority_consulted", False),
|
||||
"authority_consulted_at": _ts(_get("authority_consulted_at")),
|
||||
"authority_reference": _get("authority_reference"),
|
||||
"authority_decision": _get("authority_decision"),
|
||||
# Metadata / Versioning
|
||||
"version": _get("version", 1),
|
||||
"previous_version_id": str(_get("previous_version_id")) if _get("previous_version_id") else None,
|
||||
"conclusion": _get("conclusion"),
|
||||
"federal_state": _get("federal_state"),
|
||||
"authority_resource_id": _get("authority_resource_id"),
|
||||
"submitted_for_review_at": _ts(_get("submitted_for_review_at")),
|
||||
"submitted_by": _get("submitted_by"),
|
||||
# JSONB Arrays
|
||||
"data_subjects": _parse_arr(_get("data_subjects")),
|
||||
"affected_rights": _parse_arr(_get("affected_rights")),
|
||||
"triggered_rule_codes": _parse_arr(_get("triggered_rule_codes")),
|
||||
"ai_trigger_ids": _parse_arr(_get("ai_trigger_ids")),
|
||||
"wp248_criteria_met": _parse_arr(_get("wp248_criteria_met")),
|
||||
"art35_abs3_triggered": _parse_arr(_get("art35_abs3_triggered")),
|
||||
"tom_references": _parse_arr(_get("tom_references")),
|
||||
"risks": _parse_arr(_get("risks")),
|
||||
"mitigations": _parse_arr(_get("mitigations")),
|
||||
"stakeholder_consultations": _parse_arr(_get("stakeholder_consultations")),
|
||||
"review_triggers": _parse_arr(_get("review_triggers")),
|
||||
"review_comments": _parse_arr(_get("review_comments")),
|
||||
# Section 8 / AI (Migration 028)
|
||||
"ai_use_case_modules": _parse_arr(_get("ai_use_case_modules")),
|
||||
"section_8_complete": _get("section_8_complete", False),
|
||||
# JSONB Objects
|
||||
"threshold_analysis": _parse_obj(_get("threshold_analysis")),
|
||||
"consultation_requirement": _parse_obj(_get("consultation_requirement")),
|
||||
"review_schedule": _parse_obj(_get("review_schedule")),
|
||||
"section_progress": _parse_obj(_get("section_progress")),
|
||||
"metadata": _parse_obj(_get("metadata")),
|
||||
}
|
||||
|
||||
|
||||
@@ -350,7 +534,15 @@ async def update_dsfa(
|
||||
set_clauses = []
|
||||
params: dict = {"id": dsfa_id, "tid": tid}
|
||||
|
||||
jsonb_fields = {"data_categories", "recipients", "measures"}
|
||||
jsonb_fields = {
|
||||
"data_categories", "recipients", "measures",
|
||||
"data_subjects", "affected_rights", "triggered_rule_codes",
|
||||
"ai_trigger_ids", "wp248_criteria_met", "art35_abs3_triggered",
|
||||
"tom_references", "risks", "mitigations", "stakeholder_consultations",
|
||||
"review_triggers", "review_comments", "ai_use_case_modules",
|
||||
"threshold_analysis", "consultation_requirement", "review_schedule",
|
||||
"section_progress", "metadata",
|
||||
}
|
||||
for field, value in updates.items():
|
||||
if field in jsonb_fields:
|
||||
set_clauses.append(f"{field} = CAST(:{field} AS jsonb)")
|
||||
|
||||
Reference in New Issue
Block a user