feat: DSFA vollständiges DB-Schema + PDF-Ingest + Tests
All checks were successful
CI / go-lint (push) Has been skipped
CI / python-lint (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / test-go-ai-compliance (push) Successful in 36s
CI / test-python-backend-compliance (push) Successful in 37s
CI / test-python-document-crawler (push) Successful in 23s
CI / test-python-dsms-gateway (push) Successful in 22s
All checks were successful
CI / go-lint (push) Has been skipped
CI / python-lint (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / test-go-ai-compliance (push) Successful in 36s
CI / test-python-backend-compliance (push) Successful in 37s
CI / test-python-document-crawler (push) Successful in 23s
CI / test-python-dsms-gateway (push) Successful in 22s
- Migration 030: alle fehlenden Spalten für compliance_dsfas (Sections 0-7) flat fields: processing_description, legal_basis, dpo_*, authority_*, ... JSONB arrays: risks, mitigations, wp248_criteria_met, ai_trigger_ids, ... JSONB objects: section_progress, threshold_analysis, review_schedule, metadata - dsfa_routes.py: DSFACreate/DSFAUpdate erweitert (60+ neue Optional-Felder) _dsfa_to_response: alle neuen Felder mit safe _get() Helper PUT-Handler: vollständige JSONB_FIELDS-Liste (22 Felder) - Tests: 101 (+49) Tests — TestAIUseCaseModules + TestDSFAFullSchema - ingest-dsfa-bundesland.sh: KNOWN_PDF_URLS (15 direkte URLs), download_pdfs() find_pdf_for_state() Helper, PDF-first mit Text-Fallback in ingest_all() Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -46,6 +46,56 @@ class DSFACreate(BaseModel):
|
|||||||
recipients: List[str] = []
|
recipients: List[str] = []
|
||||||
measures: List[str] = []
|
measures: List[str] = []
|
||||||
created_by: str = "system"
|
created_by: str = "system"
|
||||||
|
# Section 1
|
||||||
|
processing_description: Optional[str] = None
|
||||||
|
processing_purpose: Optional[str] = None
|
||||||
|
legal_basis: Optional[str] = None
|
||||||
|
legal_basis_details: Optional[str] = None
|
||||||
|
# Section 2
|
||||||
|
necessity_assessment: Optional[str] = None
|
||||||
|
proportionality_assessment: Optional[str] = None
|
||||||
|
data_minimization: Optional[str] = None
|
||||||
|
alternatives_considered: Optional[str] = None
|
||||||
|
retention_justification: Optional[str] = None
|
||||||
|
# Section 3
|
||||||
|
involves_ai: Optional[bool] = None
|
||||||
|
overall_risk_level: Optional[str] = None
|
||||||
|
risk_score: Optional[int] = None
|
||||||
|
# Section 6
|
||||||
|
dpo_consulted: Optional[bool] = None
|
||||||
|
dpo_name: Optional[str] = None
|
||||||
|
dpo_opinion: Optional[str] = None
|
||||||
|
dpo_approved: Optional[bool] = None
|
||||||
|
authority_consulted: Optional[bool] = None
|
||||||
|
authority_reference: Optional[str] = None
|
||||||
|
authority_decision: Optional[str] = None
|
||||||
|
# Metadata
|
||||||
|
version: Optional[int] = None
|
||||||
|
conclusion: Optional[str] = None
|
||||||
|
federal_state: Optional[str] = None
|
||||||
|
authority_resource_id: Optional[str] = None
|
||||||
|
submitted_by: Optional[str] = None
|
||||||
|
# JSONB Arrays
|
||||||
|
data_subjects: Optional[List[str]] = None
|
||||||
|
affected_rights: Optional[List[str]] = None
|
||||||
|
triggered_rule_codes: Optional[List[str]] = None
|
||||||
|
ai_trigger_ids: Optional[List[str]] = None
|
||||||
|
wp248_criteria_met: Optional[List[str]] = None
|
||||||
|
art35_abs3_triggered: Optional[List[str]] = None
|
||||||
|
tom_references: Optional[List[str]] = None
|
||||||
|
risks: Optional[List[dict]] = None
|
||||||
|
mitigations: Optional[List[dict]] = None
|
||||||
|
stakeholder_consultations: Optional[List[dict]] = None
|
||||||
|
review_triggers: Optional[List[dict]] = None
|
||||||
|
review_comments: Optional[List[dict]] = None
|
||||||
|
ai_use_case_modules: Optional[List[dict]] = None
|
||||||
|
section_8_complete: Optional[bool] = None
|
||||||
|
# JSONB Objects
|
||||||
|
threshold_analysis: Optional[dict] = None
|
||||||
|
consultation_requirement: Optional[dict] = None
|
||||||
|
review_schedule: Optional[dict] = None
|
||||||
|
section_progress: Optional[dict] = None
|
||||||
|
metadata: Optional[dict] = None
|
||||||
|
|
||||||
|
|
||||||
class DSFAUpdate(BaseModel):
|
class DSFAUpdate(BaseModel):
|
||||||
@@ -58,6 +108,56 @@ class DSFAUpdate(BaseModel):
|
|||||||
recipients: Optional[List[str]] = None
|
recipients: Optional[List[str]] = None
|
||||||
measures: Optional[List[str]] = None
|
measures: Optional[List[str]] = None
|
||||||
approved_by: Optional[str] = None
|
approved_by: Optional[str] = None
|
||||||
|
# Section 1
|
||||||
|
processing_description: Optional[str] = None
|
||||||
|
processing_purpose: Optional[str] = None
|
||||||
|
legal_basis: Optional[str] = None
|
||||||
|
legal_basis_details: Optional[str] = None
|
||||||
|
# Section 2
|
||||||
|
necessity_assessment: Optional[str] = None
|
||||||
|
proportionality_assessment: Optional[str] = None
|
||||||
|
data_minimization: Optional[str] = None
|
||||||
|
alternatives_considered: Optional[str] = None
|
||||||
|
retention_justification: Optional[str] = None
|
||||||
|
# Section 3
|
||||||
|
involves_ai: Optional[bool] = None
|
||||||
|
overall_risk_level: Optional[str] = None
|
||||||
|
risk_score: Optional[int] = None
|
||||||
|
# Section 6
|
||||||
|
dpo_consulted: Optional[bool] = None
|
||||||
|
dpo_name: Optional[str] = None
|
||||||
|
dpo_opinion: Optional[str] = None
|
||||||
|
dpo_approved: Optional[bool] = None
|
||||||
|
authority_consulted: Optional[bool] = None
|
||||||
|
authority_reference: Optional[str] = None
|
||||||
|
authority_decision: Optional[str] = None
|
||||||
|
# Metadata
|
||||||
|
version: Optional[int] = None
|
||||||
|
conclusion: Optional[str] = None
|
||||||
|
federal_state: Optional[str] = None
|
||||||
|
authority_resource_id: Optional[str] = None
|
||||||
|
submitted_by: Optional[str] = None
|
||||||
|
# JSONB Arrays
|
||||||
|
data_subjects: Optional[List[str]] = None
|
||||||
|
affected_rights: Optional[List[str]] = None
|
||||||
|
triggered_rule_codes: Optional[List[str]] = None
|
||||||
|
ai_trigger_ids: Optional[List[str]] = None
|
||||||
|
wp248_criteria_met: Optional[List[str]] = None
|
||||||
|
art35_abs3_triggered: Optional[List[str]] = None
|
||||||
|
tom_references: Optional[List[str]] = None
|
||||||
|
risks: Optional[List[dict]] = None
|
||||||
|
mitigations: Optional[List[dict]] = None
|
||||||
|
stakeholder_consultations: Optional[List[dict]] = None
|
||||||
|
review_triggers: Optional[List[dict]] = None
|
||||||
|
review_comments: Optional[List[dict]] = None
|
||||||
|
ai_use_case_modules: Optional[List[dict]] = None
|
||||||
|
section_8_complete: Optional[bool] = None
|
||||||
|
# JSONB Objects
|
||||||
|
threshold_analysis: Optional[dict] = None
|
||||||
|
consultation_requirement: Optional[dict] = None
|
||||||
|
review_schedule: Optional[dict] = None
|
||||||
|
section_progress: Optional[dict] = None
|
||||||
|
metadata: Optional[dict] = None
|
||||||
|
|
||||||
|
|
||||||
class DSFAStatusUpdate(BaseModel):
|
class DSFAStatusUpdate(BaseModel):
|
||||||
@@ -77,19 +177,48 @@ def _dsfa_to_response(row) -> dict:
|
|||||||
"""Convert a DB row to a JSON-serializable dict."""
|
"""Convert a DB row to a JSON-serializable dict."""
|
||||||
import json
|
import json
|
||||||
|
|
||||||
def parse_json(val):
|
def _parse_arr(val):
|
||||||
|
"""Parse a JSONB array field → list."""
|
||||||
if val is None:
|
if val is None:
|
||||||
return []
|
return []
|
||||||
if isinstance(val, list):
|
if isinstance(val, list):
|
||||||
return val
|
return val
|
||||||
if isinstance(val, str):
|
if isinstance(val, str):
|
||||||
try:
|
try:
|
||||||
return json.loads(val)
|
parsed = json.loads(val)
|
||||||
|
return parsed if isinstance(parsed, list) else []
|
||||||
except Exception:
|
except Exception:
|
||||||
return []
|
return []
|
||||||
return val
|
return val
|
||||||
|
|
||||||
|
def _parse_obj(val):
|
||||||
|
"""Parse a JSONB object field → dict."""
|
||||||
|
if val is None:
|
||||||
|
return {}
|
||||||
|
if isinstance(val, dict):
|
||||||
|
return val
|
||||||
|
if isinstance(val, str):
|
||||||
|
try:
|
||||||
|
parsed = json.loads(val)
|
||||||
|
return parsed if isinstance(parsed, dict) else {}
|
||||||
|
except Exception:
|
||||||
|
return {}
|
||||||
|
return val
|
||||||
|
|
||||||
|
def _ts(val):
|
||||||
|
"""Timestamp → ISO string or None."""
|
||||||
|
return val.isoformat() if val else None
|
||||||
|
|
||||||
|
def _get(key, default=None):
|
||||||
|
"""Safe row access — returns default if key missing (handles old rows)."""
|
||||||
|
try:
|
||||||
|
v = row[key]
|
||||||
|
return default if v is None and default is not None else v
|
||||||
|
except (KeyError, IndexError):
|
||||||
|
return default
|
||||||
|
|
||||||
return {
|
return {
|
||||||
|
# Core fields (always present since Migration 024)
|
||||||
"id": str(row["id"]),
|
"id": str(row["id"]),
|
||||||
"tenant_id": row["tenant_id"],
|
"tenant_id": row["tenant_id"],
|
||||||
"title": row["title"],
|
"title": row["title"],
|
||||||
@@ -97,14 +226,69 @@ def _dsfa_to_response(row) -> dict:
|
|||||||
"status": row["status"] or "draft",
|
"status": row["status"] or "draft",
|
||||||
"risk_level": row["risk_level"] or "low",
|
"risk_level": row["risk_level"] or "low",
|
||||||
"processing_activity": row["processing_activity"] or "",
|
"processing_activity": row["processing_activity"] or "",
|
||||||
"data_categories": parse_json(row["data_categories"]),
|
"data_categories": _parse_arr(row["data_categories"]),
|
||||||
"recipients": parse_json(row["recipients"]),
|
"recipients": _parse_arr(row["recipients"]),
|
||||||
"measures": parse_json(row["measures"]),
|
"measures": _parse_arr(row["measures"]),
|
||||||
"approved_by": row["approved_by"],
|
"approved_by": row["approved_by"],
|
||||||
"approved_at": row["approved_at"].isoformat() if row["approved_at"] else None,
|
"approved_at": _ts(row["approved_at"]),
|
||||||
"created_by": row["created_by"] or "system",
|
"created_by": row["created_by"] or "system",
|
||||||
"created_at": row["created_at"].isoformat() if row["created_at"] else None,
|
"created_at": _ts(row["created_at"]),
|
||||||
"updated_at": row["updated_at"].isoformat() if row["updated_at"] else None,
|
"updated_at": _ts(row["updated_at"]),
|
||||||
|
# Section 1 (Migration 030)
|
||||||
|
"processing_description": _get("processing_description"),
|
||||||
|
"processing_purpose": _get("processing_purpose"),
|
||||||
|
"legal_basis": _get("legal_basis"),
|
||||||
|
"legal_basis_details": _get("legal_basis_details"),
|
||||||
|
# Section 2
|
||||||
|
"necessity_assessment": _get("necessity_assessment"),
|
||||||
|
"proportionality_assessment": _get("proportionality_assessment"),
|
||||||
|
"data_minimization": _get("data_minimization"),
|
||||||
|
"alternatives_considered": _get("alternatives_considered"),
|
||||||
|
"retention_justification": _get("retention_justification"),
|
||||||
|
# Section 3
|
||||||
|
"involves_ai": _get("involves_ai", False),
|
||||||
|
"overall_risk_level": _get("overall_risk_level"),
|
||||||
|
"risk_score": _get("risk_score", 0),
|
||||||
|
# Section 6
|
||||||
|
"dpo_consulted": _get("dpo_consulted", False),
|
||||||
|
"dpo_consulted_at": _ts(_get("dpo_consulted_at")),
|
||||||
|
"dpo_name": _get("dpo_name"),
|
||||||
|
"dpo_opinion": _get("dpo_opinion"),
|
||||||
|
"dpo_approved": _get("dpo_approved"),
|
||||||
|
"authority_consulted": _get("authority_consulted", False),
|
||||||
|
"authority_consulted_at": _ts(_get("authority_consulted_at")),
|
||||||
|
"authority_reference": _get("authority_reference"),
|
||||||
|
"authority_decision": _get("authority_decision"),
|
||||||
|
# Metadata / Versioning
|
||||||
|
"version": _get("version", 1),
|
||||||
|
"previous_version_id": str(_get("previous_version_id")) if _get("previous_version_id") else None,
|
||||||
|
"conclusion": _get("conclusion"),
|
||||||
|
"federal_state": _get("federal_state"),
|
||||||
|
"authority_resource_id": _get("authority_resource_id"),
|
||||||
|
"submitted_for_review_at": _ts(_get("submitted_for_review_at")),
|
||||||
|
"submitted_by": _get("submitted_by"),
|
||||||
|
# JSONB Arrays
|
||||||
|
"data_subjects": _parse_arr(_get("data_subjects")),
|
||||||
|
"affected_rights": _parse_arr(_get("affected_rights")),
|
||||||
|
"triggered_rule_codes": _parse_arr(_get("triggered_rule_codes")),
|
||||||
|
"ai_trigger_ids": _parse_arr(_get("ai_trigger_ids")),
|
||||||
|
"wp248_criteria_met": _parse_arr(_get("wp248_criteria_met")),
|
||||||
|
"art35_abs3_triggered": _parse_arr(_get("art35_abs3_triggered")),
|
||||||
|
"tom_references": _parse_arr(_get("tom_references")),
|
||||||
|
"risks": _parse_arr(_get("risks")),
|
||||||
|
"mitigations": _parse_arr(_get("mitigations")),
|
||||||
|
"stakeholder_consultations": _parse_arr(_get("stakeholder_consultations")),
|
||||||
|
"review_triggers": _parse_arr(_get("review_triggers")),
|
||||||
|
"review_comments": _parse_arr(_get("review_comments")),
|
||||||
|
# Section 8 / AI (Migration 028)
|
||||||
|
"ai_use_case_modules": _parse_arr(_get("ai_use_case_modules")),
|
||||||
|
"section_8_complete": _get("section_8_complete", False),
|
||||||
|
# JSONB Objects
|
||||||
|
"threshold_analysis": _parse_obj(_get("threshold_analysis")),
|
||||||
|
"consultation_requirement": _parse_obj(_get("consultation_requirement")),
|
||||||
|
"review_schedule": _parse_obj(_get("review_schedule")),
|
||||||
|
"section_progress": _parse_obj(_get("section_progress")),
|
||||||
|
"metadata": _parse_obj(_get("metadata")),
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@@ -350,7 +534,15 @@ async def update_dsfa(
|
|||||||
set_clauses = []
|
set_clauses = []
|
||||||
params: dict = {"id": dsfa_id, "tid": tid}
|
params: dict = {"id": dsfa_id, "tid": tid}
|
||||||
|
|
||||||
jsonb_fields = {"data_categories", "recipients", "measures"}
|
jsonb_fields = {
|
||||||
|
"data_categories", "recipients", "measures",
|
||||||
|
"data_subjects", "affected_rights", "triggered_rule_codes",
|
||||||
|
"ai_trigger_ids", "wp248_criteria_met", "art35_abs3_triggered",
|
||||||
|
"tom_references", "risks", "mitigations", "stakeholder_consultations",
|
||||||
|
"review_triggers", "review_comments", "ai_use_case_modules",
|
||||||
|
"threshold_analysis", "consultation_requirement", "review_schedule",
|
||||||
|
"section_progress", "metadata",
|
||||||
|
}
|
||||||
for field, value in updates.items():
|
for field, value in updates.items():
|
||||||
if field in jsonb_fields:
|
if field in jsonb_fields:
|
||||||
set_clauses.append(f"{field} = CAST(:{field} AS jsonb)")
|
set_clauses.append(f"{field} = CAST(:{field} AS jsonb)")
|
||||||
|
|||||||
73
backend-compliance/migrations/030_dsfa_full_schema.sql
Normal file
73
backend-compliance/migrations/030_dsfa_full_schema.sql
Normal file
@@ -0,0 +1,73 @@
|
|||||||
|
-- Migration 030: DSFA Vollständiges Schema — alle Felder für Sections 0–7
|
||||||
|
-- Fügt alle fehlenden Spalten zur compliance_dsfas Tabelle hinzu.
|
||||||
|
-- TypeScript-Typen in types.ts erwarten 60+ Felder; bisher waren nur 15 vorhanden.
|
||||||
|
|
||||||
|
-- Section 1: Verarbeitungsbeschreibung + Rechtsgrundlage
|
||||||
|
ALTER TABLE compliance.compliance_dsfas
|
||||||
|
ADD COLUMN IF NOT EXISTS processing_description TEXT,
|
||||||
|
ADD COLUMN IF NOT EXISTS processing_purpose TEXT,
|
||||||
|
ADD COLUMN IF NOT EXISTS legal_basis VARCHAR(500),
|
||||||
|
ADD COLUMN IF NOT EXISTS legal_basis_details TEXT;
|
||||||
|
|
||||||
|
-- Section 2: Notwendigkeit & Verhältnismäßigkeit
|
||||||
|
ALTER TABLE compliance.compliance_dsfas
|
||||||
|
ADD COLUMN IF NOT EXISTS necessity_assessment TEXT,
|
||||||
|
ADD COLUMN IF NOT EXISTS proportionality_assessment TEXT,
|
||||||
|
ADD COLUMN IF NOT EXISTS data_minimization TEXT,
|
||||||
|
ADD COLUMN IF NOT EXISTS alternatives_considered TEXT,
|
||||||
|
ADD COLUMN IF NOT EXISTS retention_justification TEXT;
|
||||||
|
|
||||||
|
-- Section 3: KI-Flags + Risikobewertung
|
||||||
|
ALTER TABLE compliance.compliance_dsfas
|
||||||
|
ADD COLUMN IF NOT EXISTS involves_ai BOOLEAN DEFAULT FALSE,
|
||||||
|
ADD COLUMN IF NOT EXISTS overall_risk_level VARCHAR(50),
|
||||||
|
ADD COLUMN IF NOT EXISTS risk_score INTEGER DEFAULT 0;
|
||||||
|
|
||||||
|
-- Section 6: DSB & Aufsichtsbehörde
|
||||||
|
ALTER TABLE compliance.compliance_dsfas
|
||||||
|
ADD COLUMN IF NOT EXISTS dpo_consulted BOOLEAN DEFAULT FALSE,
|
||||||
|
ADD COLUMN IF NOT EXISTS dpo_consulted_at TIMESTAMPTZ,
|
||||||
|
ADD COLUMN IF NOT EXISTS dpo_name VARCHAR(255),
|
||||||
|
ADD COLUMN IF NOT EXISTS dpo_opinion TEXT,
|
||||||
|
ADD COLUMN IF NOT EXISTS dpo_approved BOOLEAN,
|
||||||
|
ADD COLUMN IF NOT EXISTS authority_consulted BOOLEAN DEFAULT FALSE,
|
||||||
|
ADD COLUMN IF NOT EXISTS authority_consulted_at TIMESTAMPTZ,
|
||||||
|
ADD COLUMN IF NOT EXISTS authority_reference VARCHAR(255),
|
||||||
|
ADD COLUMN IF NOT EXISTS authority_decision TEXT;
|
||||||
|
|
||||||
|
-- Versionierung & Metadaten
|
||||||
|
ALTER TABLE compliance.compliance_dsfas
|
||||||
|
ADD COLUMN IF NOT EXISTS version INTEGER DEFAULT 1,
|
||||||
|
ADD COLUMN IF NOT EXISTS previous_version_id UUID,
|
||||||
|
ADD COLUMN IF NOT EXISTS conclusion TEXT,
|
||||||
|
ADD COLUMN IF NOT EXISTS federal_state VARCHAR(100),
|
||||||
|
ADD COLUMN IF NOT EXISTS authority_resource_id VARCHAR(100),
|
||||||
|
ADD COLUMN IF NOT EXISTS submitted_for_review_at TIMESTAMPTZ,
|
||||||
|
ADD COLUMN IF NOT EXISTS submitted_by VARCHAR(255);
|
||||||
|
|
||||||
|
-- JSONB Arrays
|
||||||
|
ALTER TABLE compliance.compliance_dsfas
|
||||||
|
ADD COLUMN IF NOT EXISTS data_subjects JSONB DEFAULT '[]'::jsonb,
|
||||||
|
ADD COLUMN IF NOT EXISTS affected_rights JSONB DEFAULT '[]'::jsonb,
|
||||||
|
ADD COLUMN IF NOT EXISTS triggered_rule_codes JSONB DEFAULT '[]'::jsonb,
|
||||||
|
ADD COLUMN IF NOT EXISTS ai_trigger_ids JSONB DEFAULT '[]'::jsonb,
|
||||||
|
ADD COLUMN IF NOT EXISTS wp248_criteria_met JSONB DEFAULT '[]'::jsonb,
|
||||||
|
ADD COLUMN IF NOT EXISTS art35_abs3_triggered JSONB DEFAULT '[]'::jsonb,
|
||||||
|
ADD COLUMN IF NOT EXISTS tom_references JSONB DEFAULT '[]'::jsonb,
|
||||||
|
ADD COLUMN IF NOT EXISTS risks JSONB DEFAULT '[]'::jsonb,
|
||||||
|
ADD COLUMN IF NOT EXISTS mitigations JSONB DEFAULT '[]'::jsonb,
|
||||||
|
ADD COLUMN IF NOT EXISTS stakeholder_consultations JSONB DEFAULT '[]'::jsonb,
|
||||||
|
ADD COLUMN IF NOT EXISTS review_triggers JSONB DEFAULT '[]'::jsonb,
|
||||||
|
ADD COLUMN IF NOT EXISTS review_comments JSONB DEFAULT '[]'::jsonb;
|
||||||
|
|
||||||
|
-- JSONB Objekte
|
||||||
|
ALTER TABLE compliance.compliance_dsfas
|
||||||
|
ADD COLUMN IF NOT EXISTS threshold_analysis JSONB,
|
||||||
|
ADD COLUMN IF NOT EXISTS consultation_requirement JSONB,
|
||||||
|
ADD COLUMN IF NOT EXISTS review_schedule JSONB,
|
||||||
|
ADD COLUMN IF NOT EXISTS section_progress JSONB DEFAULT '{}'::jsonb,
|
||||||
|
ADD COLUMN IF NOT EXISTS metadata JSONB DEFAULT '{}'::jsonb;
|
||||||
|
|
||||||
|
-- Indizes für häufig gefilterte Spalten
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_dsfas_federal_state ON compliance.compliance_dsfas(federal_state);
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_dsfas_involves_ai ON compliance.compliance_dsfas(involves_ai);
|
||||||
@@ -15,6 +15,8 @@ from compliance.api.dsfa_routes import (
|
|||||||
VALID_RISK_LEVELS,
|
VALID_RISK_LEVELS,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
import json as _json
|
||||||
|
|
||||||
|
|
||||||
# =============================================================================
|
# =============================================================================
|
||||||
# Schema Tests — DSFACreate
|
# Schema Tests — DSFACreate
|
||||||
@@ -167,6 +169,7 @@ class TestGetTenantId:
|
|||||||
class TestDsfaToResponse:
|
class TestDsfaToResponse:
|
||||||
def _make_row(self, **overrides):
|
def _make_row(self, **overrides):
|
||||||
defaults = {
|
defaults = {
|
||||||
|
# Core fields
|
||||||
"id": "abc123",
|
"id": "abc123",
|
||||||
"tenant_id": "default",
|
"tenant_id": "default",
|
||||||
"title": "Test DSFA",
|
"title": "Test DSFA",
|
||||||
@@ -182,6 +185,61 @@ class TestDsfaToResponse:
|
|||||||
"created_by": "system",
|
"created_by": "system",
|
||||||
"created_at": datetime(2026, 1, 1, 12, 0, 0),
|
"created_at": datetime(2026, 1, 1, 12, 0, 0),
|
||||||
"updated_at": datetime(2026, 1, 2, 12, 0, 0),
|
"updated_at": datetime(2026, 1, 2, 12, 0, 0),
|
||||||
|
# Section 1 (Migration 030)
|
||||||
|
"processing_description": None,
|
||||||
|
"processing_purpose": None,
|
||||||
|
"legal_basis": None,
|
||||||
|
"legal_basis_details": None,
|
||||||
|
# Section 2
|
||||||
|
"necessity_assessment": None,
|
||||||
|
"proportionality_assessment": None,
|
||||||
|
"data_minimization": None,
|
||||||
|
"alternatives_considered": None,
|
||||||
|
"retention_justification": None,
|
||||||
|
# Section 3
|
||||||
|
"involves_ai": False,
|
||||||
|
"overall_risk_level": None,
|
||||||
|
"risk_score": 0,
|
||||||
|
# Section 6
|
||||||
|
"dpo_consulted": False,
|
||||||
|
"dpo_consulted_at": None,
|
||||||
|
"dpo_name": None,
|
||||||
|
"dpo_opinion": None,
|
||||||
|
"dpo_approved": None,
|
||||||
|
"authority_consulted": False,
|
||||||
|
"authority_consulted_at": None,
|
||||||
|
"authority_reference": None,
|
||||||
|
"authority_decision": None,
|
||||||
|
# Metadata
|
||||||
|
"version": 1,
|
||||||
|
"previous_version_id": None,
|
||||||
|
"conclusion": None,
|
||||||
|
"federal_state": None,
|
||||||
|
"authority_resource_id": None,
|
||||||
|
"submitted_for_review_at": None,
|
||||||
|
"submitted_by": None,
|
||||||
|
# JSONB Arrays
|
||||||
|
"data_subjects": [],
|
||||||
|
"affected_rights": [],
|
||||||
|
"triggered_rule_codes": [],
|
||||||
|
"ai_trigger_ids": [],
|
||||||
|
"wp248_criteria_met": [],
|
||||||
|
"art35_abs3_triggered": [],
|
||||||
|
"tom_references": [],
|
||||||
|
"risks": [],
|
||||||
|
"mitigations": [],
|
||||||
|
"stakeholder_consultations": [],
|
||||||
|
"review_triggers": [],
|
||||||
|
"review_comments": [],
|
||||||
|
# Section 8 (Migration 028)
|
||||||
|
"ai_use_case_modules": [],
|
||||||
|
"section_8_complete": False,
|
||||||
|
# JSONB Objects
|
||||||
|
"threshold_analysis": None,
|
||||||
|
"consultation_requirement": None,
|
||||||
|
"review_schedule": None,
|
||||||
|
"section_progress": {},
|
||||||
|
"metadata": {},
|
||||||
}
|
}
|
||||||
defaults.update(overrides)
|
defaults.update(overrides)
|
||||||
row = MagicMock()
|
row = MagicMock()
|
||||||
@@ -296,7 +354,8 @@ class TestValidRiskLevels:
|
|||||||
class TestDSFARouterConfig:
|
class TestDSFARouterConfig:
|
||||||
def test_router_prefix(self):
|
def test_router_prefix(self):
|
||||||
from compliance.api.dsfa_routes import router
|
from compliance.api.dsfa_routes import router
|
||||||
assert router.prefix == "/v1/dsfa"
|
# /v1 prefix is added when router is included in the main app
|
||||||
|
assert router.prefix == "/dsfa"
|
||||||
|
|
||||||
def test_router_has_tags(self):
|
def test_router_has_tags(self):
|
||||||
from compliance.api.dsfa_routes import router
|
from compliance.api.dsfa_routes import router
|
||||||
@@ -382,3 +441,328 @@ class TestAuditLogEntry:
|
|||||||
entry = {"old_values": None, "new_values": {"title": "Test"}}
|
entry = {"old_values": None, "new_values": {"title": "Test"}}
|
||||||
assert entry["old_values"] is None
|
assert entry["old_values"] is None
|
||||||
assert entry["new_values"] is not None
|
assert entry["new_values"] is not None
|
||||||
|
|
||||||
|
|
||||||
|
# =============================================================================
|
||||||
|
# TestAIUseCaseModules — Section 8 KI-Anwendungsfälle (Migration 028)
|
||||||
|
# =============================================================================
|
||||||
|
|
||||||
|
class TestAIUseCaseModules:
|
||||||
|
"""Tests for ai_use_case_modules field (DSFACreate/DSFAUpdate Pydantic schemas)."""
|
||||||
|
|
||||||
|
def test_ai_use_case_modules_field_accepted_in_create(self):
|
||||||
|
req = DSFACreate(title="Test", ai_use_case_modules=[{"type": "generative_ai"}])
|
||||||
|
assert req.ai_use_case_modules == [{"type": "generative_ai"}]
|
||||||
|
|
||||||
|
def test_ai_use_case_modules_default_none_in_create(self):
|
||||||
|
req = DSFACreate(title="Test")
|
||||||
|
assert req.ai_use_case_modules is None
|
||||||
|
|
||||||
|
def test_ai_use_case_modules_field_accepted_in_update(self):
|
||||||
|
req = DSFAUpdate(ai_use_case_modules=[{"type": "computer_vision", "name": "Bilderkennung"}])
|
||||||
|
assert req.ai_use_case_modules == [{"type": "computer_vision", "name": "Bilderkennung"}]
|
||||||
|
|
||||||
|
def test_ai_use_case_modules_empty_list_accepted(self):
|
||||||
|
req = DSFAUpdate(ai_use_case_modules=[])
|
||||||
|
assert req.ai_use_case_modules == []
|
||||||
|
|
||||||
|
def test_ai_use_case_modules_multiple_modules(self):
|
||||||
|
modules = [
|
||||||
|
{"type": "generative_ai", "name": "LLM-Assistent"},
|
||||||
|
{"type": "predictive_analytics", "name": "Risikobewertung"},
|
||||||
|
]
|
||||||
|
req = DSFAUpdate(ai_use_case_modules=modules)
|
||||||
|
assert len(req.ai_use_case_modules) == 2
|
||||||
|
|
||||||
|
def test_module_generative_ai_type(self):
|
||||||
|
module = {"type": "generative_ai", "name": "Text-Generator"}
|
||||||
|
req = DSFAUpdate(ai_use_case_modules=[module])
|
||||||
|
assert req.ai_use_case_modules[0]["type"] == "generative_ai"
|
||||||
|
|
||||||
|
def test_module_art22_assessment_structure(self):
|
||||||
|
module = {
|
||||||
|
"type": "decision_support",
|
||||||
|
"art22_relevant": True,
|
||||||
|
"art22_assessment": {"automated_decision": True, "human_oversight": True},
|
||||||
|
}
|
||||||
|
req = DSFAUpdate(ai_use_case_modules=[module])
|
||||||
|
assert req.ai_use_case_modules[0]["art22_relevant"] is True
|
||||||
|
|
||||||
|
def test_module_ai_act_risk_class_values(self):
|
||||||
|
for risk_class in ["minimal", "limited", "high", "unacceptable"]:
|
||||||
|
module = {"type": "nlp", "ai_act_risk_class": risk_class}
|
||||||
|
req = DSFAUpdate(ai_use_case_modules=[module])
|
||||||
|
assert req.ai_use_case_modules[0]["ai_act_risk_class"] == risk_class
|
||||||
|
|
||||||
|
def test_module_risk_criteria_structure(self):
|
||||||
|
module = {
|
||||||
|
"type": "computer_vision",
|
||||||
|
"risk_criteria": [
|
||||||
|
{"criterion": "K1", "met": True, "justification": "Scoring vorhanden"},
|
||||||
|
{"criterion": "K3", "met": True, "justification": "Systematische Überwachung"},
|
||||||
|
],
|
||||||
|
}
|
||||||
|
req = DSFAUpdate(ai_use_case_modules=[module])
|
||||||
|
assert len(req.ai_use_case_modules[0]["risk_criteria"]) == 2
|
||||||
|
|
||||||
|
def test_module_privacy_by_design_measures(self):
|
||||||
|
module = {
|
||||||
|
"type": "recommendation",
|
||||||
|
"privacy_by_design": ["data_minimization", "pseudonymization"],
|
||||||
|
}
|
||||||
|
req = DSFAUpdate(ai_use_case_modules=[module])
|
||||||
|
assert "data_minimization" in req.ai_use_case_modules[0]["privacy_by_design"]
|
||||||
|
|
||||||
|
def test_module_review_triggers(self):
|
||||||
|
req = DSFAUpdate(review_triggers=[{"trigger": "model_update", "date": "2026-06-01"}])
|
||||||
|
assert req.review_triggers[0]["trigger"] == "model_update"
|
||||||
|
|
||||||
|
def test_section_8_complete_flag_in_create(self):
|
||||||
|
req = DSFACreate(title="Test", section_8_complete=True)
|
||||||
|
assert req.section_8_complete is True
|
||||||
|
|
||||||
|
def test_section_8_complete_flag_in_update(self):
|
||||||
|
req = DSFAUpdate(section_8_complete=True)
|
||||||
|
data = req.model_dump(exclude_none=True)
|
||||||
|
assert data["section_8_complete"] is True
|
||||||
|
|
||||||
|
def test_section_8_complete_default_none(self):
|
||||||
|
req = DSFAUpdate()
|
||||||
|
assert req.section_8_complete is None
|
||||||
|
|
||||||
|
def test_ai_use_case_modules_excluded_when_none(self):
|
||||||
|
req = DSFAUpdate(title="Test")
|
||||||
|
data = req.model_dump(exclude_none=True)
|
||||||
|
assert "ai_use_case_modules" not in data
|
||||||
|
|
||||||
|
def test_ai_use_case_modules_included_when_set(self):
|
||||||
|
req = DSFAUpdate(ai_use_case_modules=[{"type": "nlp"}])
|
||||||
|
data = req.model_dump(exclude_none=True)
|
||||||
|
assert "ai_use_case_modules" in data
|
||||||
|
|
||||||
|
def test_module_with_all_common_fields(self):
|
||||||
|
module = {
|
||||||
|
"type": "predictive_analytics",
|
||||||
|
"name": "Fraud Detection",
|
||||||
|
"description": "Erkennung betrügerischer Aktivitäten",
|
||||||
|
"data_inputs": ["Transaktionsdaten", "Verhaltensdaten"],
|
||||||
|
"ai_act_risk_class": "high",
|
||||||
|
"art22_relevant": True,
|
||||||
|
}
|
||||||
|
req = DSFAUpdate(ai_use_case_modules=[module])
|
||||||
|
m = req.ai_use_case_modules[0]
|
||||||
|
assert m["name"] == "Fraud Detection"
|
||||||
|
assert m["ai_act_risk_class"] == "high"
|
||||||
|
|
||||||
|
def test_response_ai_use_case_modules_list_from_list(self):
|
||||||
|
"""_dsfa_to_response: ai_use_case_modules list passthrough."""
|
||||||
|
from tests.test_dsfa_routes import TestDsfaToResponse
|
||||||
|
helper = TestDsfaToResponse()
|
||||||
|
modules = [{"type": "nlp", "name": "Test"}]
|
||||||
|
row = helper._make_row(ai_use_case_modules=modules)
|
||||||
|
result = _dsfa_to_response(row)
|
||||||
|
assert result["ai_use_case_modules"] == modules
|
||||||
|
|
||||||
|
def test_response_ai_use_case_modules_from_json_string(self):
|
||||||
|
"""_dsfa_to_response: parses JSON string for ai_use_case_modules."""
|
||||||
|
from tests.test_dsfa_routes import TestDsfaToResponse
|
||||||
|
helper = TestDsfaToResponse()
|
||||||
|
modules = [{"type": "computer_vision"}]
|
||||||
|
row = helper._make_row(ai_use_case_modules=_json.dumps(modules))
|
||||||
|
result = _dsfa_to_response(row)
|
||||||
|
assert result["ai_use_case_modules"] == modules
|
||||||
|
|
||||||
|
def test_response_ai_use_case_modules_null_becomes_empty_list(self):
|
||||||
|
"""_dsfa_to_response: None → empty list."""
|
||||||
|
from tests.test_dsfa_routes import TestDsfaToResponse
|
||||||
|
helper = TestDsfaToResponse()
|
||||||
|
row = helper._make_row(ai_use_case_modules=None)
|
||||||
|
result = _dsfa_to_response(row)
|
||||||
|
assert result["ai_use_case_modules"] == []
|
||||||
|
|
||||||
|
def test_response_section_8_complete_flag(self):
|
||||||
|
"""_dsfa_to_response: section_8_complete bool preserved."""
|
||||||
|
from tests.test_dsfa_routes import TestDsfaToResponse
|
||||||
|
helper = TestDsfaToResponse()
|
||||||
|
row = helper._make_row(section_8_complete=True)
|
||||||
|
result = _dsfa_to_response(row)
|
||||||
|
assert result["section_8_complete"] is True
|
||||||
|
|
||||||
|
|
||||||
|
# =============================================================================
|
||||||
|
# TestDSFAFullSchema — Migration 030 neue Felder
|
||||||
|
# =============================================================================
|
||||||
|
|
||||||
|
class TestDSFAFullSchema:
|
||||||
|
"""Tests for all new fields added in Migration 030."""
|
||||||
|
|
||||||
|
def _make_row(self, **overrides):
|
||||||
|
"""Reuse the shared helper from TestDsfaToResponse."""
|
||||||
|
from tests.test_dsfa_routes import TestDsfaToResponse
|
||||||
|
helper = TestDsfaToResponse()
|
||||||
|
return helper._make_row(**overrides)
|
||||||
|
|
||||||
|
# --- Pydantic Schema Tests ---
|
||||||
|
|
||||||
|
def test_processing_description_accepted(self):
|
||||||
|
req = DSFAUpdate(processing_description="Verarbeitung von Kundendaten zur Risikoanalyse")
|
||||||
|
assert req.processing_description == "Verarbeitung von Kundendaten zur Risikoanalyse"
|
||||||
|
|
||||||
|
def test_legal_basis_accepted(self):
|
||||||
|
req = DSFAUpdate(legal_basis="Art. 6 Abs. 1f DSGVO")
|
||||||
|
data = req.model_dump(exclude_none=True)
|
||||||
|
assert data["legal_basis"] == "Art. 6 Abs. 1f DSGVO"
|
||||||
|
|
||||||
|
def test_dpo_consulted_bool(self):
|
||||||
|
req = DSFAUpdate(dpo_consulted=True, dpo_name="Dr. Müller")
|
||||||
|
assert req.dpo_consulted is True
|
||||||
|
assert req.dpo_name == "Dr. Müller"
|
||||||
|
|
||||||
|
def test_dpo_approved_bool(self):
|
||||||
|
req = DSFAUpdate(dpo_approved=True)
|
||||||
|
data = req.model_dump(exclude_none=True)
|
||||||
|
assert data["dpo_approved"] is True
|
||||||
|
|
||||||
|
def test_authority_consulted_bool(self):
|
||||||
|
req = DSFAUpdate(authority_consulted=True, authority_reference="AZ-2026-001")
|
||||||
|
assert req.authority_consulted is True
|
||||||
|
assert req.authority_reference == "AZ-2026-001"
|
||||||
|
|
||||||
|
def test_risks_jsonb_structure(self):
|
||||||
|
risks = [
|
||||||
|
{"id": "R1", "title": "Datenpanne", "likelihood": "medium", "impact": "high"},
|
||||||
|
{"id": "R2", "title": "Unbefugter Zugriff", "likelihood": "low", "impact": "critical"},
|
||||||
|
]
|
||||||
|
req = DSFAUpdate(risks=risks)
|
||||||
|
assert len(req.risks) == 2
|
||||||
|
assert req.risks[0]["title"] == "Datenpanne"
|
||||||
|
|
||||||
|
def test_mitigations_jsonb_structure(self):
|
||||||
|
mitigations = [
|
||||||
|
{"id": "M1", "measure": "Verschlüsselung", "risk_ref": "R1"},
|
||||||
|
]
|
||||||
|
req = DSFAUpdate(mitigations=mitigations)
|
||||||
|
assert req.mitigations[0]["measure"] == "Verschlüsselung"
|
||||||
|
|
||||||
|
def test_review_schedule_jsonb(self):
|
||||||
|
schedule = {"next_review": "2027-01-01", "frequency": "annual", "responsible": "DSB"}
|
||||||
|
req = DSFAUpdate(review_schedule=schedule)
|
||||||
|
assert req.review_schedule["frequency"] == "annual"
|
||||||
|
|
||||||
|
def test_section_progress_jsonb(self):
|
||||||
|
progress = {"section_1": True, "section_2": False, "section_3": True}
|
||||||
|
req = DSFAUpdate(section_progress=progress)
|
||||||
|
assert req.section_progress["section_1"] is True
|
||||||
|
|
||||||
|
def test_threshold_analysis_jsonb(self):
|
||||||
|
analysis = {"wp248_criteria_count": 3, "dsfa_required": True}
|
||||||
|
req = DSFAUpdate(threshold_analysis=analysis)
|
||||||
|
assert req.threshold_analysis["dsfa_required"] is True
|
||||||
|
|
||||||
|
def test_involves_ai_bool(self):
|
||||||
|
req = DSFAUpdate(involves_ai=True)
|
||||||
|
data = req.model_dump(exclude_none=True)
|
||||||
|
assert data["involves_ai"] is True
|
||||||
|
|
||||||
|
def test_federal_state_accepted(self):
|
||||||
|
req = DSFAUpdate(federal_state="Bayern")
|
||||||
|
data = req.model_dump(exclude_none=True)
|
||||||
|
assert data["federal_state"] == "Bayern"
|
||||||
|
|
||||||
|
def test_data_subjects_list(self):
|
||||||
|
req = DSFAUpdate(data_subjects=["Kunden", "Mitarbeiter", "Minderjährige"])
|
||||||
|
assert len(req.data_subjects) == 3
|
||||||
|
|
||||||
|
def test_wp248_criteria_met_list(self):
|
||||||
|
req = DSFAUpdate(wp248_criteria_met=["K1", "K3", "K5"])
|
||||||
|
assert "K3" in req.wp248_criteria_met
|
||||||
|
|
||||||
|
def test_conclusion_text(self):
|
||||||
|
req = DSFAUpdate(conclusion="DSFA erforderlich — hohe Risiken verbleiben nach Maßnahmen.")
|
||||||
|
assert "DSFA erforderlich" in req.conclusion
|
||||||
|
|
||||||
|
def test_all_new_fields_optional_in_update(self):
|
||||||
|
req = DSFAUpdate()
|
||||||
|
for field in [
|
||||||
|
"processing_description", "processing_purpose", "legal_basis",
|
||||||
|
"necessity_assessment", "proportionality_assessment",
|
||||||
|
"involves_ai", "dpo_consulted", "dpo_opinion", "dpo_approved",
|
||||||
|
"authority_consulted", "risks", "mitigations", "section_progress",
|
||||||
|
"threshold_analysis", "federal_state", "conclusion",
|
||||||
|
]:
|
||||||
|
assert getattr(req, field) is None, f"{field} should default to None"
|
||||||
|
|
||||||
|
# --- _dsfa_to_response Tests ---
|
||||||
|
|
||||||
|
def test_response_processing_description(self):
|
||||||
|
row = self._make_row(processing_description="Test-Beschreibung")
|
||||||
|
result = _dsfa_to_response(row)
|
||||||
|
assert result["processing_description"] == "Test-Beschreibung"
|
||||||
|
|
||||||
|
def test_response_risks_parsed_from_json_string(self):
|
||||||
|
risks = [{"id": "R1", "title": "Datenpanne"}]
|
||||||
|
row = self._make_row(risks=_json.dumps(risks))
|
||||||
|
result = _dsfa_to_response(row)
|
||||||
|
assert result["risks"] == risks
|
||||||
|
|
||||||
|
def test_response_section_progress_object(self):
|
||||||
|
progress = {"section_1": True, "section_3": False}
|
||||||
|
row = self._make_row(section_progress=progress)
|
||||||
|
result = _dsfa_to_response(row)
|
||||||
|
assert result["section_progress"]["section_1"] is True
|
||||||
|
|
||||||
|
def test_response_section_progress_from_json_string(self):
|
||||||
|
progress = {"section_2": True}
|
||||||
|
row = self._make_row(section_progress=_json.dumps(progress))
|
||||||
|
result = _dsfa_to_response(row)
|
||||||
|
assert result["section_progress"] == progress
|
||||||
|
|
||||||
|
def test_response_involves_ai_bool(self):
|
||||||
|
row = self._make_row(involves_ai=True)
|
||||||
|
result = _dsfa_to_response(row)
|
||||||
|
assert result["involves_ai"] is True
|
||||||
|
|
||||||
|
def test_response_dpo_consulted_bool(self):
|
||||||
|
row = self._make_row(dpo_consulted=True, dpo_name="Dr. Müller")
|
||||||
|
result = _dsfa_to_response(row)
|
||||||
|
assert result["dpo_consulted"] is True
|
||||||
|
assert result["dpo_name"] == "Dr. Müller"
|
||||||
|
|
||||||
|
def test_response_version_defaults_to_1(self):
|
||||||
|
row = self._make_row(version=None)
|
||||||
|
result = _dsfa_to_response(row)
|
||||||
|
assert result["version"] == 1
|
||||||
|
|
||||||
|
def test_response_null_risks_becomes_empty_list(self):
|
||||||
|
row = self._make_row(risks=None)
|
||||||
|
result = _dsfa_to_response(row)
|
||||||
|
assert result["risks"] == []
|
||||||
|
|
||||||
|
def test_response_null_section_progress_becomes_empty_dict(self):
|
||||||
|
row = self._make_row(section_progress=None)
|
||||||
|
result = _dsfa_to_response(row)
|
||||||
|
assert result["section_progress"] == {}
|
||||||
|
|
||||||
|
def test_response_threshold_analysis_null_becomes_empty_dict(self):
|
||||||
|
row = self._make_row(threshold_analysis=None)
|
||||||
|
result = _dsfa_to_response(row)
|
||||||
|
assert result["threshold_analysis"] == {}
|
||||||
|
|
||||||
|
def test_response_federal_state(self):
|
||||||
|
row = self._make_row(federal_state="NRW")
|
||||||
|
result = _dsfa_to_response(row)
|
||||||
|
assert result["federal_state"] == "NRW"
|
||||||
|
|
||||||
|
def test_response_all_new_keys_present(self):
|
||||||
|
"""All new fields must be present in response even with defaults."""
|
||||||
|
row = self._make_row()
|
||||||
|
result = _dsfa_to_response(row)
|
||||||
|
new_keys = [
|
||||||
|
"processing_description", "legal_basis", "necessity_assessment",
|
||||||
|
"involves_ai", "dpo_consulted", "authority_consulted",
|
||||||
|
"risks", "mitigations", "section_progress", "threshold_analysis",
|
||||||
|
"ai_use_case_modules", "section_8_complete", "federal_state",
|
||||||
|
"version", "conclusion",
|
||||||
|
]
|
||||||
|
for key in new_keys:
|
||||||
|
assert key in result, f"Missing key in response: {key}"
|
||||||
|
|||||||
@@ -77,18 +77,80 @@ declare -A STATE_NAMES=(
|
|||||||
["th"]="Thüringen"
|
["th"]="Thüringen"
|
||||||
)
|
)
|
||||||
|
|
||||||
# PDF-URLs der Muss-Listen (direkte Download-Links)
|
# Direkte PDF-Download-URLs der Behörden-Muss-Listen (Art. 35 Abs. 4 DSGVO)
|
||||||
declare -A PDF_URLS=(
|
# Quellen: DSFA_AUTHORITY_RESOURCES in admin-compliance/lib/sdk/dsfa/types.ts
|
||||||
["bw_privat"]="https://www.baden-wuerttemberg.datenschutz.de/dsfa-muss-liste/"
|
declare -A KNOWN_PDF_URLS=(
|
||||||
["hh_beide"]="https://datenschutz.hamburg.de/infothek/datenschutz-folgenabschaetzung"
|
["bfdi_public"]="https://www.bfdi.bund.de/SharedDocs/Downloads/DE/Muster/Liste_VerarbeitungsvorgaengeArt35.pdf"
|
||||||
["nw_oeffentlich"]="https://www.ldi.nrw.de/datenschutz/datenschutz-folgenabschaetzung"
|
["bw_privat"]="https://www.baden-wuerttemberg.datenschutz.de/wp-content/uploads/2018/05/Liste-von-Verarbeitungsvorg%C3%A4ngen-nach-Art.-35-Abs.-4-DS-GVO-LfDI-BW.pdf"
|
||||||
["ni_beide"]="https://lfd.niedersachsen.de/startseite/themen/datenschutzfolgenabschaetzung/"
|
["be_public"]="https://www.datenschutz-berlin.de/fileadmin/user_upload/pdf/dokumente/2018-BlnBDI_DSFA-oeffentlich.pdf"
|
||||||
["be_beide"]="https://www.datenschutz-berlin.de/themen/verarbeitungen-mit-hohem-risiko/datenschutz-folgenabschaetzung/"
|
["be_privat"]="https://www.datenschutz-berlin.de/fileadmin/user_upload/pdf/dokumente/2018-BlnBDI_DSFA-nicht-oeffentlich.pdf"
|
||||||
["bfdi_liste"]="https://www.bfdi.bund.de/DE/Fachthemen/Inhalte/Datenschutzbehoerden/DSFA.html"
|
["bb_public"]="https://www.lda.brandenburg.de/sixcms/media.php/9/DSFA-Liste_%C3%B6ffentlicher_Bereich.pdf"
|
||||||
|
["bb_privat"]="https://www.lda.brandenburg.de/sixcms/media.php/9/DSFA-Liste_nicht_%C3%B6ffentlicher_Bereich.pdf"
|
||||||
|
["hb_public"]="https://www.datenschutz.bremen.de/sixcms/media.php/13/Liste%20von%20Verarbeitungsvorg%C3%A4ngen%20nach%20Artikel%2035.pdf"
|
||||||
|
["hb_privat"]="https://www.datenschutz.bremen.de/sixcms/media.php/13/DSFA%20Muss-Liste%20LfDI%20HB.pdf"
|
||||||
|
["hh_public"]="https://datenschutz-hamburg.de/fileadmin/user_upload/HmbBfDI/Datenschutz/Informationen/Liste_Art_35-4_DSGVO_HmbBfDI-oeffentlicher_Bereich_v2.0a.pdf"
|
||||||
|
["hh_privat"]="https://datenschutz-hamburg.de/fileadmin/user_upload/HmbBfDI/Datenschutz/Informationen/DSFA_Muss-Liste_fuer_den_nicht-oeffentlicher_Bereich_-_Stand_17.10.2018.pdf"
|
||||||
|
["mv_public"]="https://www.datenschutz-mv.de/static/DS/Dateien/DS-GVO/HilfsmittelzurUmsetzung/MV-DSFA-Muss-Liste-Oeffentlicher-Bereich.pdf"
|
||||||
|
["ni_public"]="https://www.lfd.niedersachsen.de/download/134414/DSFA_Muss-Liste_fuer_den_oeffentlichen_Bereich.pdf"
|
||||||
|
["ni_privat"]="https://www.lfd.niedersachsen.de/download/131098/Liste_von_Verarbeitungsvorgaengen_nach_Art._35_Abs._4_DS-GVO.pdf"
|
||||||
|
["sl_privat"]="https://www.datenschutz.saarland.de/fileadmin/user_upload/uds/alle_Dateien_und_Ordner_bis_2025/Download/dsfa_muss_liste_dsk_de.pdf"
|
||||||
|
["st_public"]="https://datenschutz.sachsen-anhalt.de/fileadmin/Bibliothek/Landesaemter/LfD/Informationen/Internationales/Datenschutz-Grundverordnung/Liste_DSFA/Art-35-Liste-oeffentlicher_Bereich.pdf"
|
||||||
|
["st_privat"]="https://datenschutz.sachsen-anhalt.de/fileadmin/Bibliothek/Landesaemter/LfD/Informationen/Internationales/Datenschutz-Grundverordnung/Liste_DSFA/Art-35-Liste-nichtoeffentlicher_Bereich.pdf"
|
||||||
)
|
)
|
||||||
|
|
||||||
# =============================================================================
|
# =============================================================================
|
||||||
# Phase 2: Text-Zusammenfassungen (für Bundesländer ohne direkte PDFs)
|
# Phase 2a: PDF-Downloads
|
||||||
|
# =============================================================================
|
||||||
|
|
||||||
|
download_pdfs() {
|
||||||
|
log "Lade Behörden-PDFs herunter (${#KNOWN_PDF_URLS[@]} URLs)..."
|
||||||
|
local success=0
|
||||||
|
local failed=0
|
||||||
|
|
||||||
|
for key in "${!KNOWN_PDF_URLS[@]}"; do
|
||||||
|
local url="${KNOWN_PDF_URLS[$key]}"
|
||||||
|
local outfile="$DOWNLOAD_DIR/${key}.pdf"
|
||||||
|
|
||||||
|
if [[ -f "$outfile" && $(wc -c < "$outfile") -gt 1000 ]]; then
|
||||||
|
ok "PDF bereits vorhanden: $key"
|
||||||
|
((success++)) || true
|
||||||
|
continue
|
||||||
|
fi
|
||||||
|
|
||||||
|
curl -sk --max-time 30 -L -A "BreakPilot-Compliance/1.0" -o "$outfile" "$url" 2>/dev/null
|
||||||
|
local exit_code=$?
|
||||||
|
|
||||||
|
if [[ $exit_code -eq 0 && -f "$outfile" && $(wc -c < "$outfile") -gt 1000 ]]; then
|
||||||
|
ok "PDF heruntergeladen: $key"
|
||||||
|
((success++)) || true
|
||||||
|
else
|
||||||
|
warn "PDF fehlgeschlagen: $key — nutze Text-Fallback"
|
||||||
|
rm -f "$outfile"
|
||||||
|
((failed++)) || true
|
||||||
|
fi
|
||||||
|
done
|
||||||
|
|
||||||
|
log "PDF-Downloads: $success OK, $failed fehlgeschlagen"
|
||||||
|
}
|
||||||
|
|
||||||
|
# Gibt den Pfad zur ersten vorhandenen PDF-Datei für einen State-ID-Prefix zurück.
|
||||||
|
# Gibt leeren String zurück, wenn keine PDF gefunden.
|
||||||
|
find_pdf_for_state() {
|
||||||
|
local state_id="$1"
|
||||||
|
for key in "${!KNOWN_PDF_URLS[@]}"; do
|
||||||
|
if [[ "$key" == "${state_id}_"* || "$key" == "${state_id}" ]]; then
|
||||||
|
local pdf="$DOWNLOAD_DIR/${key}.pdf"
|
||||||
|
if [[ -f "$pdf" && $(wc -c < "$pdf") -gt 1000 ]]; then
|
||||||
|
echo "$pdf"
|
||||||
|
return
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
done
|
||||||
|
echo ""
|
||||||
|
}
|
||||||
|
|
||||||
|
# =============================================================================
|
||||||
|
# Phase 2b: Text-Zusammenfassungen (für Bundesländer ohne direkte PDFs)
|
||||||
# =============================================================================
|
# =============================================================================
|
||||||
|
|
||||||
create_text_summaries() {
|
create_text_summaries() {
|
||||||
@@ -331,32 +393,40 @@ ingest_all() {
|
|||||||
log "Starte Ingest in Corpus: $COLLECTION"
|
log "Starte Ingest in Corpus: $COLLECTION"
|
||||||
log "RAG-URL: $RAG_URL"
|
log "RAG-URL: $RAG_URL"
|
||||||
|
|
||||||
# WP248-Dokument (für alle Bundesländer relevant)
|
# WP248-Dokument (für alle Bundesländer relevant — kein PDF verfügbar)
|
||||||
ingest_document \
|
ingest_document \
|
||||||
"$DOWNLOAD_DIR/dsfa_wpk248_kriterien.txt" \
|
"$DOWNLOAD_DIR/dsfa_wpk248_kriterien.txt" \
|
||||||
"wp248_rev01" "EU" "Article 29 Working Party / EDPB" "leitlinie"
|
"wp248_rev01" "EU" "Article 29 Working Party / EDPB" "leitlinie"
|
||||||
|
|
||||||
# BfDI
|
# BfDI — PDF bevorzugen, Text als Fallback
|
||||||
ingest_document \
|
local bfdi_pdf
|
||||||
"$DOWNLOAD_DIR/bfdi_muss_liste.txt" \
|
bfdi_pdf=$(find_pdf_for_state "bfdi")
|
||||||
"muss_liste_bfdi" "Bund" "BfDI" "muss_liste"
|
if [[ -n "$bfdi_pdf" ]]; then
|
||||||
|
ingest_document "$bfdi_pdf" "muss_liste_bfdi" "Bund" "BfDI" "muss_liste"
|
||||||
|
else
|
||||||
|
ingest_document "$DOWNLOAD_DIR/bfdi_muss_liste.txt" "muss_liste_bfdi" "Bund" "BfDI" "muss_liste"
|
||||||
|
fi
|
||||||
|
|
||||||
# Baden-Württemberg
|
# Baden-Württemberg — PDF bevorzugen, Text als Fallback
|
||||||
ingest_document \
|
local bw_pdf
|
||||||
"$DOWNLOAD_DIR/bw_dsfa_anforderungen.txt" \
|
bw_pdf=$(find_pdf_for_state "bw")
|
||||||
"muss_liste_bw" "Baden-Württemberg" "LfDI BW" "muss_liste"
|
if [[ -n "$bw_pdf" ]]; then
|
||||||
|
ingest_document "$bw_pdf" "muss_liste_bw" "Baden-Württemberg" "LfDI BW" "muss_liste"
|
||||||
|
else
|
||||||
|
ingest_document "$DOWNLOAD_DIR/bw_dsfa_anforderungen.txt" "muss_liste_bw" "Baden-Württemberg" "LfDI BW" "muss_liste"
|
||||||
|
fi
|
||||||
|
|
||||||
# Bayern
|
# Bayern — kein direktes PDF bekannt, Text
|
||||||
ingest_document \
|
ingest_document \
|
||||||
"$DOWNLOAD_DIR/by_dsfa_anforderungen.txt" \
|
"$DOWNLOAD_DIR/by_dsfa_anforderungen.txt" \
|
||||||
"muss_liste_by" "Bayern" "LDA Bayern" "muss_liste"
|
"muss_liste_by" "Bayern" "LDA Bayern" "muss_liste"
|
||||||
|
|
||||||
# NRW
|
# NRW — kein direktes PDF bekannt, Text
|
||||||
ingest_document \
|
ingest_document \
|
||||||
"$DOWNLOAD_DIR/nrw_dsfa_anforderungen.txt" \
|
"$DOWNLOAD_DIR/nrw_dsfa_anforderungen.txt" \
|
||||||
"muss_liste_nw" "Nordrhein-Westfalen" "LDI NRW" "muss_liste"
|
"muss_liste_nw" "Nordrhein-Westfalen" "LDI NRW" "muss_liste"
|
||||||
|
|
||||||
# Weitere Bundesländer aus DSFA_AUTHORITY_RESOURCES-Daten (als Text)
|
# Weitere Bundesländer — PDF bevorzugen, Text als Fallback
|
||||||
for state_id in be bb hb hh he mv ni rp sl sn st sh th; do
|
for state_id in be bb hb hh he mv ni rp sl sn st sh th; do
|
||||||
local txt_file="$DOWNLOAD_DIR/${state_id}_dsfa_anforderungen.txt"
|
local txt_file="$DOWNLOAD_DIR/${state_id}_dsfa_anforderungen.txt"
|
||||||
if [[ ! -f "$txt_file" ]]; then
|
if [[ ! -f "$txt_file" ]]; then
|
||||||
@@ -383,12 +453,24 @@ Quelle: DSK-Positionspapier, WP248, Art. 35 Abs. 4 DSGVO
|
|||||||
EOF
|
EOF
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
# PDF bevorzugen, Text als Fallback
|
||||||
|
local state_pdf
|
||||||
|
state_pdf=$(find_pdf_for_state "$state_id")
|
||||||
|
if [[ -n "$state_pdf" ]]; then
|
||||||
|
ingest_document \
|
||||||
|
"$state_pdf" \
|
||||||
|
"muss_liste_${state_id}" \
|
||||||
|
"${STATE_NAMES[$state_id]:-$state_id}" \
|
||||||
|
"${AUTHORITY_LABELS[$state_id]:-Datenschutzbehörde $state_id}" \
|
||||||
|
"muss_liste"
|
||||||
|
else
|
||||||
ingest_document \
|
ingest_document \
|
||||||
"$txt_file" \
|
"$txt_file" \
|
||||||
"muss_liste_${state_id}" \
|
"muss_liste_${state_id}" \
|
||||||
"${STATE_NAMES[$state_id]:-$state_id}" \
|
"${STATE_NAMES[$state_id]:-$state_id}" \
|
||||||
"${AUTHORITY_LABELS[$state_id]:-Datenschutzbehörde $state_id}" \
|
"${AUTHORITY_LABELS[$state_id]:-Datenschutzbehörde $state_id}" \
|
||||||
"muss_liste"
|
"muss_liste"
|
||||||
|
fi
|
||||||
done
|
done
|
||||||
|
|
||||||
log "Ingest abgeschlossen"
|
log "Ingest abgeschlossen"
|
||||||
@@ -427,16 +509,17 @@ main() {
|
|||||||
log "Download-Dir: $DOWNLOAD_DIR"
|
log "Download-Dir: $DOWNLOAD_DIR"
|
||||||
log "Skip-Download: $SKIP_DOWNLOAD"
|
log "Skip-Download: $SKIP_DOWNLOAD"
|
||||||
|
|
||||||
# Schritt 1: Text-Zusammenfassungen erstellen (immer)
|
# Schritt 1: Text-Zusammenfassungen erstellen (immer als Fallback)
|
||||||
create_text_summaries
|
create_text_summaries
|
||||||
|
|
||||||
# Schritt 2: PDFs herunterladen (wenn nicht --skip-download)
|
# Schritt 2: PDFs herunterladen (wenn nicht --skip-download oder --only-text)
|
||||||
if [[ "$SKIP_DOWNLOAD" == false && "$ONLY_TEXT" == false ]]; then
|
if [[ "$SKIP_DOWNLOAD" == false && "$ONLY_TEXT" == false ]]; then
|
||||||
log "PDF-Downloads übersprungen (direkte URLs zu Behörden-PDFs variieren) – nutze Text-Dateien"
|
download_pdfs
|
||||||
log "Tipp: Laden Sie PDFs manuell herunter und legen Sie sie in $DOWNLOAD_DIR ab"
|
else
|
||||||
|
log "PDF-Downloads übersprungen (--skip-download oder --only-text gesetzt)"
|
||||||
fi
|
fi
|
||||||
|
|
||||||
# Schritt 3: Ingest
|
# Schritt 3: Ingest (PDF bevorzugt, Text als Fallback)
|
||||||
ingest_all
|
ingest_all
|
||||||
|
|
||||||
# Schritt 4: Verifikation
|
# Schritt 4: Verifikation
|
||||||
|
|||||||
Reference in New Issue
Block a user