feat: Dreistufenmodell normative Verbindlichkeit + Duplikat-Filter + Auto-Deploy

- Source-Type-Klassifikation (58 Regulierungen: law/guideline/framework)
- Backfill-Endpoint POST /controls/backfill-normative-strength
- exclude_duplicates Filter fuer Control-Library (Backend + Proxy + UI-Toggle)
- MkDocs-Kapitel: Normative Verbindlichkeit mit Mermaid-Diagrammen
- scripts/deploy.sh: Auto-Push + Mac Mini rebuild + Coolify health monitoring
- 26 Unit Tests fuer Klassifikations-Logik

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
Benjamin Admin
2026-03-25 08:18:00 +01:00
parent 6d3bdf8e74
commit 230fbeb490
8 changed files with 796 additions and 4 deletions

View File

@@ -27,7 +27,7 @@ export async function GET(request: NextRequest) {
case 'controls': { case 'controls': {
const controlParams = new URLSearchParams() const controlParams = new URLSearchParams()
const passthrough = ['severity', 'domain', 'release_state', 'verification_method', 'category', const passthrough = ['severity', 'domain', 'release_state', 'verification_method', 'category',
'target_audience', 'source', 'search', 'control_type', 'sort', 'order', 'limit', 'offset'] 'target_audience', 'source', 'search', 'control_type', 'exclude_duplicates', 'sort', 'order', 'limit', 'offset']
for (const key of passthrough) { for (const key of passthrough) {
const val = searchParams.get(key) const val = searchParams.get(key)
if (val) controlParams.set(key, val) if (val) controlParams.set(key, val)
@@ -40,7 +40,7 @@ export async function GET(request: NextRequest) {
case 'controls-count': { case 'controls-count': {
const countParams = new URLSearchParams() const countParams = new URLSearchParams()
const countPassthrough = ['severity', 'domain', 'release_state', 'verification_method', 'category', const countPassthrough = ['severity', 'domain', 'release_state', 'verification_method', 'category',
'target_audience', 'source', 'search', 'control_type'] 'target_audience', 'source', 'search', 'control_type', 'exclude_duplicates']
for (const key of countPassthrough) { for (const key of countPassthrough) {
const val = searchParams.get(key) const val = searchParams.get(key)
if (val) countParams.set(key, val) if (val) countParams.set(key, val)

View File

@@ -54,6 +54,7 @@ export default function ControlLibraryPage() {
const [audienceFilter, setAudienceFilter] = useState<string>('') const [audienceFilter, setAudienceFilter] = useState<string>('')
const [sourceFilter, setSourceFilter] = useState<string>('') const [sourceFilter, setSourceFilter] = useState<string>('')
const [typeFilter, setTypeFilter] = useState<string>('') const [typeFilter, setTypeFilter] = useState<string>('')
const [hideDuplicates, setHideDuplicates] = useState(true)
const [sortBy, setSortBy] = useState<'id' | 'newest' | 'oldest' | 'source'>('id') const [sortBy, setSortBy] = useState<'id' | 'newest' | 'oldest' | 'source'>('id')
// CRUD state // CRUD state
@@ -96,10 +97,11 @@ export default function ControlLibraryPage() {
if (audienceFilter) p.set('target_audience', audienceFilter) if (audienceFilter) p.set('target_audience', audienceFilter)
if (sourceFilter) p.set('source', sourceFilter) if (sourceFilter) p.set('source', sourceFilter)
if (typeFilter) p.set('control_type', typeFilter) if (typeFilter) p.set('control_type', typeFilter)
if (hideDuplicates) p.set('exclude_duplicates', 'true')
if (debouncedSearch) p.set('search', debouncedSearch) if (debouncedSearch) p.set('search', debouncedSearch)
if (extra) for (const [k, v] of Object.entries(extra)) p.set(k, v) if (extra) for (const [k, v] of Object.entries(extra)) p.set(k, v)
return p.toString() return p.toString()
}, [severityFilter, domainFilter, stateFilter, verificationFilter, categoryFilter, audienceFilter, sourceFilter, typeFilter, debouncedSearch]) }, [severityFilter, domainFilter, stateFilter, verificationFilter, categoryFilter, audienceFilter, sourceFilter, typeFilter, hideDuplicates, debouncedSearch])
// Load metadata (domains, sources — once + on refresh) // Load metadata (domains, sources — once + on refresh)
const loadMeta = useCallback(async () => { const loadMeta = useCallback(async () => {
@@ -167,7 +169,7 @@ export default function ControlLibraryPage() {
useEffect(() => { loadControls() }, [loadControls]) useEffect(() => { loadControls() }, [loadControls])
// Reset page when filters change // Reset page when filters change
useEffect(() => { setCurrentPage(1) }, [severityFilter, domainFilter, stateFilter, verificationFilter, categoryFilter, audienceFilter, sourceFilter, typeFilter, debouncedSearch, sortBy]) useEffect(() => { setCurrentPage(1) }, [severityFilter, domainFilter, stateFilter, verificationFilter, categoryFilter, audienceFilter, sourceFilter, typeFilter, hideDuplicates, debouncedSearch, sortBy])
// Pagination // Pagination
const totalPages = Math.max(1, Math.ceil(totalCount / PAGE_SIZE)) const totalPages = Math.max(1, Math.ceil(totalCount / PAGE_SIZE))
@@ -623,6 +625,15 @@ export default function ControlLibraryPage() {
<option value="duplicate">Duplikat</option> <option value="duplicate">Duplikat</option>
<option value="deprecated">Deprecated</option> <option value="deprecated">Deprecated</option>
</select> </select>
<label className="flex items-center gap-1.5 text-sm text-gray-600 cursor-pointer whitespace-nowrap">
<input
type="checkbox"
checked={hideDuplicates}
onChange={e => setHideDuplicates(e.target.checked)}
className="rounded border-gray-300 text-purple-600 focus:ring-purple-500"
/>
Duplikate ausblenden
</label>
<select <select
value={verificationFilter} value={verificationFilter}
onChange={e => setVerificationFilter(e.target.value)} onChange={e => setVerificationFilter(e.target.value)}

View File

@@ -316,6 +316,7 @@ async def list_controls(
source: Optional[str] = Query(None, description="Filter by source_citation->source"), source: Optional[str] = Query(None, description="Filter by source_citation->source"),
search: Optional[str] = Query(None, description="Full-text search in control_id, title, objective"), search: Optional[str] = Query(None, description="Full-text search in control_id, title, objective"),
control_type: Optional[str] = Query(None, description="Filter: atomic, rich, or all"), control_type: Optional[str] = Query(None, description="Filter: atomic, rich, or all"),
exclude_duplicates: bool = Query(False, description="Exclude controls with release_state='duplicate'"),
sort: Optional[str] = Query("control_id", description="Sort field: control_id, created_at, severity"), sort: Optional[str] = Query("control_id", description="Sort field: control_id, created_at, severity"),
order: Optional[str] = Query("asc", description="Sort order: asc or desc"), order: Optional[str] = Query("asc", description="Sort order: asc or desc"),
limit: Optional[int] = Query(None, ge=1, le=5000, description="Max results"), limit: Optional[int] = Query(None, ge=1, le=5000, description="Max results"),
@@ -329,6 +330,9 @@ async def list_controls(
""" """
params: dict[str, Any] = {} params: dict[str, Any] = {}
if exclude_duplicates:
query += " AND release_state != 'duplicate'"
if severity: if severity:
query += " AND severity = :sev" query += " AND severity = :sev"
params["sev"] = severity params["sev"] = severity
@@ -398,11 +402,15 @@ async def count_controls(
source: Optional[str] = Query(None), source: Optional[str] = Query(None),
search: Optional[str] = Query(None), search: Optional[str] = Query(None),
control_type: Optional[str] = Query(None), control_type: Optional[str] = Query(None),
exclude_duplicates: bool = Query(False, description="Exclude controls with release_state='duplicate'"),
): ):
"""Count controls matching filters (for pagination).""" """Count controls matching filters (for pagination)."""
query = "SELECT count(*) FROM canonical_controls WHERE 1=1" query = "SELECT count(*) FROM canonical_controls WHERE 1=1"
params: dict[str, Any] = {} params: dict[str, Any] = {}
if exclude_duplicates:
query += " AND release_state != 'duplicate'"
if severity: if severity:
query += " AND severity = :sev" query += " AND severity = :sev"
params["sev"] = severity params["sev"] = severity
@@ -908,6 +916,107 @@ async def get_control_provenance(control_id: str):
return result return result
# =============================================================================
# NORMATIVE STRENGTH BACKFILL
# =============================================================================
@router.post("/controls/backfill-normative-strength")
async def backfill_normative_strength(
dry_run: bool = Query(True, description="Nur zaehlen, nicht aendern"),
):
"""
Korrigiert normative_strength auf obligation_candidates basierend auf
dem source_type der Quell-Regulierung.
Dreistufiges Modell:
- law (Gesetz): normative_strength bleibt unveraendert
- guideline (Leitlinie): max 'should'
- framework (Framework): max 'can'
Fuer Controls mit mehreren Parent-Links gilt der hoechste source_type.
"""
from compliance.data.source_type_classification import (
classify_source_regulation,
get_highest_source_type,
cap_normative_strength,
)
with SessionLocal() as db:
# 1. Alle Obligations mit ihren Parent-Control-Links laden
obligations = db.execute(text("""
SELECT oc.id, oc.candidate_id, oc.normative_strength,
oc.parent_control_uuid
FROM obligation_candidates oc
WHERE oc.release_state NOT IN ('rejected', 'merged')
AND oc.normative_strength IS NOT NULL
ORDER BY oc.candidate_id
""")).fetchall()
# 2. Fuer jeden Parent Control die source_regulations sammeln
parent_uuids = list({str(o.parent_control_uuid) for o in obligations if o.parent_control_uuid})
source_types_by_parent: dict[str, list[str]] = {}
if parent_uuids:
# Batch-Query fuer alle Parent-Links
links = db.execute(text("""
SELECT control_uuid::text, source_regulation
FROM control_parent_links
WHERE control_uuid::text = ANY(:uuids)
"""), {"uuids": parent_uuids}).fetchall()
for link in links:
uid = link.control_uuid
src_type = classify_source_regulation(link.source_regulation or "")
source_types_by_parent.setdefault(uid, []).append(src_type)
# 3. Normative strength korrigieren
changes = []
stats = {"total": len(obligations), "unchanged": 0, "capped_to_should": 0, "capped_to_can": 0, "no_parent_links": 0}
for obl in obligations:
parent_uid = str(obl.parent_control_uuid) if obl.parent_control_uuid else None
source_types = source_types_by_parent.get(parent_uid, []) if parent_uid else []
if not source_types:
stats["no_parent_links"] += 1
continue
highest_type = get_highest_source_type(source_types)
new_strength = cap_normative_strength(obl.normative_strength, highest_type)
if new_strength != obl.normative_strength:
changes.append({
"id": str(obl.id),
"candidate_id": obl.candidate_id,
"old_strength": obl.normative_strength,
"new_strength": new_strength,
"source_type": highest_type,
})
if new_strength == "should":
stats["capped_to_should"] += 1
elif new_strength == "can":
stats["capped_to_can"] += 1
else:
stats["unchanged"] += 1
# 4. Aenderungen anwenden (wenn kein dry_run)
if not dry_run and changes:
for change in changes:
db.execute(text("""
UPDATE obligation_candidates
SET normative_strength = :new_strength
WHERE id = CAST(:oid AS uuid)
"""), {"new_strength": change["new_strength"], "oid": change["id"]})
db.commit()
return {
"dry_run": dry_run,
"stats": stats,
"total_changes": len(changes),
"sample_changes": changes[:20],
}
# ============================================================================= # =============================================================================
# CONTROL CRUD (CREATE / UPDATE / DELETE) # CONTROL CRUD (CREATE / UPDATE / DELETE)
# ============================================================================= # =============================================================================

View File

@@ -0,0 +1,204 @@
"""
Source-Type-Klassifikation fuer Regulierungen und Frameworks.
Dreistufiges Modell der normativen Verbindlichkeit:
Stufe 1 — GESETZ (law):
Rechtlich bindend. Bussgeld bei Verstoss.
Beispiele: DSGVO, NIS2, AI Act, CRA
Stufe 2 — LEITLINIE (guideline):
Offizielle Auslegungshilfe von Aufsichtsbehoerden.
Beweislastumkehr: Wer abweicht, muss begruenden warum.
Beispiele: EDPB-Leitlinien, BSI-Standards, WP29-Dokumente
Stufe 3 — FRAMEWORK (framework):
Freiwillige Best Practices, nicht rechtsverbindlich.
Aber: Koennen als "Stand der Technik" herangezogen werden.
Beispiele: ENISA, NIST, OWASP, OECD, CISA
Mapping: source_regulation (aus control_parent_links) -> source_type
"""
# --- Typ-Definitionen ---
SOURCE_TYPE_LAW = "law" # Gesetz/Verordnung/Richtlinie — normative_strength bleibt
SOURCE_TYPE_GUIDELINE = "guideline" # Leitlinie/Standard — max "should"
SOURCE_TYPE_FRAMEWORK = "framework" # Framework/Best Practice — max "can"
# Max erlaubte normative_strength pro source_type
NORMATIVE_STRENGTH_CAP: dict[str, str] = {
SOURCE_TYPE_LAW: "must", # keine Begrenzung
SOURCE_TYPE_GUIDELINE: "should", # max "should"
SOURCE_TYPE_FRAMEWORK: "can", # max "can"
}
# Reihenfolge fuer Vergleiche (hoeher = staerker)
STRENGTH_ORDER: dict[str, int] = {
"can": 1,
"may": 1, # Alias fuer "can"
"should": 2,
"must": 3,
}
def cap_normative_strength(original: str, source_type: str) -> str:
"""
Begrenzt die normative_strength basierend auf dem source_type.
Beispiel:
cap_normative_strength("must", "framework") -> "can"
cap_normative_strength("should", "law") -> "should"
cap_normative_strength("must", "guideline") -> "should"
"""
cap = NORMATIVE_STRENGTH_CAP.get(source_type, "must")
cap_level = STRENGTH_ORDER.get(cap, 3)
original_level = STRENGTH_ORDER.get(original, 3)
if original_level > cap_level:
return cap
return original
def get_highest_source_type(source_types: list[str]) -> str:
"""
Bestimmt den hoechsten source_type aus einer Liste.
Ein Gesetz uebertrumpft alles.
Beispiel:
get_highest_source_type(["framework", "law"]) -> "law"
get_highest_source_type(["framework", "guideline"]) -> "guideline"
"""
type_order = {SOURCE_TYPE_FRAMEWORK: 1, SOURCE_TYPE_GUIDELINE: 2, SOURCE_TYPE_LAW: 3}
if not source_types:
return SOURCE_TYPE_FRAMEWORK
return max(source_types, key=lambda t: type_order.get(t, 0))
# ============================================================================
# Klassifikation: source_regulation -> source_type
#
# Diese Map wird fuer den Backfill und zukuenftige Pipeline-Runs verwendet.
# Neue Regulierungen hier eintragen!
# ============================================================================
SOURCE_REGULATION_CLASSIFICATION: dict[str, str] = {
# --- EU-Verordnungen (unmittelbar bindend) ---
"DSGVO (EU) 2016/679": SOURCE_TYPE_LAW,
"KI-Verordnung (EU) 2024/1689": SOURCE_TYPE_LAW,
"Cyber Resilience Act (CRA)": SOURCE_TYPE_LAW,
"NIS2-Richtlinie (EU) 2022/2555": SOURCE_TYPE_LAW,
"Data Act": SOURCE_TYPE_LAW,
"Data Governance Act (DGA)": SOURCE_TYPE_LAW,
"Markets in Crypto-Assets (MiCA)": SOURCE_TYPE_LAW,
"Maschinenverordnung (EU) 2023/1230": SOURCE_TYPE_LAW,
"Batterieverordnung (EU) 2023/1542": SOURCE_TYPE_LAW,
"AML-Verordnung": SOURCE_TYPE_LAW,
# --- EU-Richtlinien (nach nationaler Umsetzung bindend) ---
# Fuer Compliance-Zwecke wie Gesetze behandeln
# --- Nationale Gesetze ---
"Bundesdatenschutzgesetz (BDSG)": SOURCE_TYPE_LAW,
"Telekommunikationsgesetz": SOURCE_TYPE_LAW,
"Telekommunikationsgesetz Oesterreich": SOURCE_TYPE_LAW,
"Gewerbeordnung (GewO)": SOURCE_TYPE_LAW,
"Handelsgesetzbuch (HGB)": SOURCE_TYPE_LAW,
"Abgabenordnung (AO)": SOURCE_TYPE_LAW,
"IFRS-Übernahmeverordnung": SOURCE_TYPE_LAW,
"Österreichisches Datenschutzgesetz (DSG)": SOURCE_TYPE_LAW,
"LOPDGDD - Ley Orgánica de Protección de Datos (Spanien)": SOURCE_TYPE_LAW,
"Loi Informatique et Libertés (Frankreich)": SOURCE_TYPE_LAW,
"Információs önrendelkezési jog törvény (Ungarn)": SOURCE_TYPE_LAW,
"EU Blue Guide 2022": SOURCE_TYPE_LAW,
# --- EDPB/WP29 Leitlinien (offizielle Auslegungshilfe) ---
"EDPB Leitlinien 01/2019 (Zertifizierung)": SOURCE_TYPE_GUIDELINE,
"EDPB Leitlinien 01/2020 (Datentransfers)": SOURCE_TYPE_GUIDELINE,
"EDPB Leitlinien 01/2020 (Vernetzte Fahrzeuge)": SOURCE_TYPE_GUIDELINE,
"EDPB Leitlinien 01/2022 (BCR)": SOURCE_TYPE_GUIDELINE,
"EDPB Leitlinien 01/2024 (Berechtigtes Interesse)": SOURCE_TYPE_GUIDELINE,
"EDPB Leitlinien 04/2019 (Data Protection by Design)": SOURCE_TYPE_GUIDELINE,
"EDPB Leitlinien 05/2020 - Einwilligung": SOURCE_TYPE_GUIDELINE,
"EDPB Leitlinien 07/2020 (Datentransfers)": SOURCE_TYPE_GUIDELINE,
"EDPB Leitlinien 08/2020 (Social Media)": SOURCE_TYPE_GUIDELINE,
"EDPB Leitlinien 09/2022 (Data Breach)": SOURCE_TYPE_GUIDELINE,
"EDPB Leitlinien 09/2022 - Meldung von Datenschutzverletzungen": SOURCE_TYPE_GUIDELINE,
"EDPB Empfehlungen 01/2020 - Ergaenzende Massnahmen fuer Datentransfers": SOURCE_TYPE_GUIDELINE,
"EDPB Leitlinien - Berechtigtes Interesse (Art. 6(1)(f))": SOURCE_TYPE_GUIDELINE,
"WP244 Leitlinien (Profiling)": SOURCE_TYPE_GUIDELINE,
"WP251 Leitlinien (Profiling)": SOURCE_TYPE_GUIDELINE,
"WP260 Leitlinien (Transparenz)": SOURCE_TYPE_GUIDELINE,
# --- BSI Standards (behoerdliche technische Richtlinien) ---
"BSI-TR-03161-1": SOURCE_TYPE_GUIDELINE,
"BSI-TR-03161-2": SOURCE_TYPE_GUIDELINE,
"BSI-TR-03161-3": SOURCE_TYPE_GUIDELINE,
# --- ENISA (EU-Agentur, aber Empfehlungen nicht rechtsverbindlich) ---
"ENISA Cybersecurity State 2024": SOURCE_TYPE_FRAMEWORK,
"ENISA ICS/SCADA Dependencies": SOURCE_TYPE_FRAMEWORK,
"ENISA Supply Chain Good Practices": SOURCE_TYPE_FRAMEWORK,
"ENISA Threat Landscape Supply Chain": SOURCE_TYPE_FRAMEWORK,
# --- NIST (US-Standards, international als Best Practice) ---
"NIST AI Risk Management Framework": SOURCE_TYPE_FRAMEWORK,
"NIST Cybersecurity Framework 2.0": SOURCE_TYPE_FRAMEWORK,
"NIST SP 800-207 (Zero Trust)": SOURCE_TYPE_FRAMEWORK,
"NIST SP 800-218 (SSDF)": SOURCE_TYPE_FRAMEWORK,
"NIST SP 800-53 Rev. 5": SOURCE_TYPE_FRAMEWORK,
"NIST SP 800-63-3": SOURCE_TYPE_FRAMEWORK,
# --- OWASP (Community-Standards) ---
"OWASP API Security Top 10 (2023)": SOURCE_TYPE_FRAMEWORK,
"OWASP ASVS 4.0": SOURCE_TYPE_FRAMEWORK,
"OWASP MASVS 2.0": SOURCE_TYPE_FRAMEWORK,
"OWASP SAMM 2.0": SOURCE_TYPE_FRAMEWORK,
"OWASP Top 10 (2021)": SOURCE_TYPE_FRAMEWORK,
# --- Sonstige Frameworks ---
"OECD KI-Empfehlung": SOURCE_TYPE_FRAMEWORK,
"CISA Secure by Design": SOURCE_TYPE_FRAMEWORK,
}
def classify_source_regulation(source_regulation: str) -> str:
"""
Klassifiziert eine source_regulation als law, guideline oder framework.
Verwendet exaktes Matching gegen die Map. Bei unbekannten Quellen
wird anhand von Schluesselwoertern geraten, Fallback ist 'framework'
(konservativstes Ergebnis).
"""
if not source_regulation:
return SOURCE_TYPE_FRAMEWORK
# Exaktes Match
if source_regulation in SOURCE_REGULATION_CLASSIFICATION:
return SOURCE_REGULATION_CLASSIFICATION[source_regulation]
# Heuristik fuer unbekannte Quellen
lower = source_regulation.lower()
# Gesetze erkennen
law_indicators = [
"verordnung", "richtlinie", "gesetz", "directive", "regulation",
"(eu)", "(eg)", "act", "ley", "loi", "törvény", "código",
]
if any(ind in lower for ind in law_indicators):
return SOURCE_TYPE_LAW
# Leitlinien erkennen
guideline_indicators = [
"edpb", "leitlinie", "guideline", "wp2", "bsi", "empfehlung",
]
if any(ind in lower for ind in guideline_indicators):
return SOURCE_TYPE_GUIDELINE
# Frameworks erkennen
framework_indicators = [
"enisa", "nist", "owasp", "oecd", "cisa", "framework", "iso",
]
if any(ind in lower for ind in framework_indicators):
return SOURCE_TYPE_FRAMEWORK
# Konservativ: unbekannt = framework (geringste Verbindlichkeit)
return SOURCE_TYPE_FRAMEWORK

View File

@@ -0,0 +1,102 @@
"""Tests for source_type_classification module."""
import sys
sys.path.insert(0, ".")
from compliance.data.source_type_classification import (
classify_source_regulation,
cap_normative_strength,
get_highest_source_type,
SOURCE_TYPE_LAW,
SOURCE_TYPE_GUIDELINE,
SOURCE_TYPE_FRAMEWORK,
)
class TestClassifySourceRegulation:
"""Tests for classify_source_regulation()."""
def test_eu_regulation(self):
assert classify_source_regulation("DSGVO (EU) 2016/679") == SOURCE_TYPE_LAW
def test_eu_directive(self):
assert classify_source_regulation("NIS2-Richtlinie (EU) 2022/2555") == SOURCE_TYPE_LAW
def test_national_law(self):
assert classify_source_regulation("Bundesdatenschutzgesetz (BDSG)") == SOURCE_TYPE_LAW
def test_edpb_guideline(self):
assert classify_source_regulation("EDPB Leitlinien 01/2020 (Datentransfers)") == SOURCE_TYPE_GUIDELINE
def test_bsi_standard(self):
assert classify_source_regulation("BSI-TR-03161-1") == SOURCE_TYPE_GUIDELINE
def test_wp29_guideline(self):
assert classify_source_regulation("WP260 Leitlinien (Transparenz)") == SOURCE_TYPE_GUIDELINE
def test_enisa_framework(self):
assert classify_source_regulation("ENISA Supply Chain Good Practices") == SOURCE_TYPE_FRAMEWORK
def test_nist_framework(self):
assert classify_source_regulation("NIST Cybersecurity Framework 2.0") == SOURCE_TYPE_FRAMEWORK
def test_owasp_framework(self):
assert classify_source_regulation("OWASP Top 10 (2021)") == SOURCE_TYPE_FRAMEWORK
def test_unknown_defaults_to_framework(self):
assert classify_source_regulation("Some Unknown Source") == SOURCE_TYPE_FRAMEWORK
def test_empty_string(self):
assert classify_source_regulation("") == SOURCE_TYPE_FRAMEWORK
def test_heuristic_verordnung(self):
assert classify_source_regulation("Neue Verordnung 2027") == SOURCE_TYPE_LAW
def test_heuristic_nist(self):
assert classify_source_regulation("NIST Future Standard") == SOURCE_TYPE_FRAMEWORK
class TestCapNormativeStrength:
"""Tests for cap_normative_strength()."""
def test_must_from_law_stays(self):
assert cap_normative_strength("must", SOURCE_TYPE_LAW) == "must"
def test_should_from_law_stays(self):
assert cap_normative_strength("should", SOURCE_TYPE_LAW) == "should"
def test_must_from_guideline_capped(self):
assert cap_normative_strength("must", SOURCE_TYPE_GUIDELINE) == "should"
def test_should_from_guideline_stays(self):
assert cap_normative_strength("should", SOURCE_TYPE_GUIDELINE) == "should"
def test_must_from_framework_capped(self):
assert cap_normative_strength("must", SOURCE_TYPE_FRAMEWORK) == "can"
def test_should_from_framework_capped(self):
assert cap_normative_strength("should", SOURCE_TYPE_FRAMEWORK) == "can"
def test_can_from_framework_stays(self):
assert cap_normative_strength("can", SOURCE_TYPE_FRAMEWORK) == "can"
def test_can_from_law_stays(self):
assert cap_normative_strength("can", SOURCE_TYPE_LAW) == "can"
class TestGetHighestSourceType:
"""Tests for get_highest_source_type()."""
def test_law_wins(self):
assert get_highest_source_type(["framework", "law"]) == "law"
def test_guideline_over_framework(self):
assert get_highest_source_type(["framework", "guideline"]) == "guideline"
def test_single_framework(self):
assert get_highest_source_type(["framework"]) == "framework"
def test_empty_defaults_to_framework(self):
assert get_highest_source_type([]) == "framework"
def test_all_three(self):
assert get_highest_source_type(["framework", "guideline", "law"]) == "law"

View File

@@ -0,0 +1,201 @@
# Normative Verbindlichkeit — Dreistufenmodell
## Uebersicht
Nicht jede Quelle, aus der Controls abgeleitet werden, hat die gleiche rechtliche
Verbindlichkeit. Ein Control, das aus einem EU-Gesetz stammt, hat ein anderes
Gewicht als eines aus einem freiwilligen Framework.
Das Dreistufenmodell klassifiziert jede Quell-Regulierung und leitet daraus die
**effektive normative Staerke** der daraus erzeugten Obligations ab.
## Die drei Stufen
```mermaid
graph TB
subgraph "Stufe 1 — GESETZ (law)"
direction LR
A1["DSGVO, NIS2, AI Act, CRA..."]
A2["Rechtlich bindend"]
A3["Bussgeld bei Verstoss"]
A4["normative_strength: must/should/may"]
end
subgraph "Stufe 2 — LEITLINIE (guideline)"
direction LR
B1["EDPB-Leitlinien, BSI-TR, WP29"]
B2["Offizielle Auslegungshilfe"]
B3["Beweislastumkehr"]
B4["max normative_strength: should"]
end
subgraph "Stufe 3 — FRAMEWORK (framework)"
direction LR
C1["ENISA, NIST, OWASP, OECD"]
C2["Freiwillige Best Practice"]
C3["Stand der Technik"]
C4["max normative_strength: can"]
end
A1 --> A2 --> A3 --> A4
B1 --> B2 --> B3 --> B4
C1 --> C2 --> C3 --> C4
```
### Stufe 1: Gesetz (law)
| Eigenschaft | Beschreibung |
|---|---|
| **Verbindlichkeit** | Rechtlich bindend, Bussgeld bei Verstoss |
| **normative_strength** | Bleibt wie im Gesetzestext: `must`, `should` oder `may` |
| **Beispiele** | DSGVO (EU) 2016/679, NIS2-Richtlinie, KI-Verordnung, CRA, BDSG |
| **Warum relevant** | "Sie MUESSEN angemessene technische Massnahmen ergreifen" (Art. 32 DSGVO) |
!!! warning "Wichtig"
Gesetze formulieren Pflichten **abstrakt**. Art. 32 DSGVO sagt:
"dem Stand der Technik entsprechende Massnahmen" — aber NICHT
"verwende AES-256". Das WAS ist Pflicht, das WIE bleibt offen.
### Stufe 2: Leitlinie (guideline)
| Eigenschaft | Beschreibung |
|---|---|
| **Verbindlichkeit** | Nicht direkt bindend, aber Beweislastumkehr |
| **normative_strength** | Maximal `should` — auch wenn die Leitlinie intern "must" schreibt |
| **Beispiele** | EDPB-Leitlinien, BSI Technische Richtlinien, WP29-Dokumente |
| **Warum relevant** | "Daten at rest muessen verschluesselt werden" (BSI-TR) → `should` |
!!! info "Beweislastumkehr"
Wenn eine Aufsichtsbehoerde fragt "Warum verschluesselt ihr nicht?",
muss die Firma begruenden, warum sie von der Leitlinie abweicht.
Die Firma muss aber nicht genau so verschluesseln wie die BSI vorschlaegt.
### Stufe 3: Framework (framework)
| Eigenschaft | Beschreibung |
|---|---|
| **Verbindlichkeit** | Freiwillig, nicht rechtsverbindlich |
| **normative_strength** | Maximal `can` — unabhaengig von interner Sprache |
| **Beispiele** | ENISA CCM, NIST CSF, OWASP Top 10, OECD KI-Empfehlung |
| **Warum relevant** | "Organizations SHALL implement..." (ENISA) → `can` fuer den Anwender |
!!! tip "Stand der Technik"
NIS2 Art. 21 verweist auf ENISA-Leitlinien als Referenz fuer den
"Stand der Technik". Das hebt ENISA-Controls faktisch auf Stufe 2 (`should`)
— aber nur im Kontext von NIS2-pflichtigen Unternehmen, nicht generell.
## Ableitungskette
Die vollstaendige Kette von der Rechtsquelle zum atomaren Control:
```mermaid
graph LR
R["Regulierung<br/>(DSGVO Art. 32)"] -->|"MUSS"| O["Obligation<br/>(Daten schuetzen)"]
O -->|decomposition| RC["Rich Control<br/>(Verschluesselung)"]
RC -->|pass0b| AC["Atomares Control<br/>(AES-256 at rest)"]
R2["Framework<br/>(ENISA CCM)"] -->|"KANN"| AC
style R fill:#fee2e2,stroke:#dc2626
style R2 fill:#dbeafe,stroke:#2563eb
style O fill:#fef3c7,stroke:#d97706
style RC fill:#e0e7ff,stroke:#4f46e5
style AC fill:#d1fae5,stroke:#059669
```
**Beispiel**: Das atomare Control "AES-256 Verschluesselung at rest"
- Aus DSGVO Art. 32 abgeleitet → Obligation "must secure data" → **MUSS** (die Pflicht zu schuetzen)
- Aus ENISA CCM konkretisiert → **KANN** (AES-256 ist *eine* moegliche Umsetzung)
- Resultat: Die Firma MUSS verschluesseln, KANN aber waehlen wie
## Multi-Parent-Links
Ein atomares Control kann aus mehreren Quellen stammen:
| Control | Parent 1 | Parent 2 | Parent 3 | Effektive Staerke |
|---|---|---|---|---|
| SEC-042 (Encrypt at rest) | DSGVO Art. 32 (law) | NIS2 Art. 21 (law) | ENISA CCM (framework) | **must** (Gesetz uebertrumpft) |
| NET-015 (Zero Trust) | NIST SP 800-207 (framework) | CISA (framework) | — | **can** (nur Frameworks) |
| AUTH-003 (MFA) | DSGVO Art. 32 (law) | BSI-TR (guideline) | OWASP ASVS (framework) | **must** (Gesetz vorhanden) |
**Regel**: Der hoechste source_type bestimmt, ob die normative_strength begrenzt wird.
Wenn mindestens ein Parent-Link ein Gesetz ist, bleibt die Staerke wie extrahiert.
## Technische Umsetzung
### Klassifikations-Map
Datei: `backend-compliance/compliance/data/source_type_classification.py`
Jeder `source_regulation`-Wert aus `control_parent_links` wird klassifiziert:
```python
SOURCE_REGULATION_CLASSIFICATION = {
"DSGVO (EU) 2016/679": "law",
"EDPB Leitlinien 01/2020 (Datentransfers)": "guideline",
"NIST Cybersecurity Framework 2.0": "framework",
# ... 55+ Eintraege
}
```
### Backfill-Endpoint
```
POST /api/compliance/v1/canonical/controls/backfill-normative-strength?dry_run=true
```
Ablauf:
1. Alle aktiven `obligation_candidates` laden
2. Fuer jede Obligation den Parent-Control finden
3. Ueber `control_parent_links` die source_regulations ermitteln
4. Hoechsten source_type bestimmen
5. `normative_strength` begrenzen falls noetig
6. Bei `dry_run=false`: Aenderungen in die DB schreiben
### Cap-Funktion
```python
def cap_normative_strength(original: str, source_type: str) -> str:
"""
cap_normative_strength("must", "framework") → "can"
cap_normative_strength("should", "law") → "should"
cap_normative_strength("must", "guideline") → "should"
"""
```
## Frontend-Anzeige
In der Control-Detail-Ansicht werden Obligations mit farbcodierten Badges angezeigt:
| normative_strength | Badge | Farbe | Bedeutung |
|---|---|---|---|
| `must` | **MUSS** | Rot | Gesetzliche Pflicht |
| `should` | **SOLL** | Gelb/Amber | Empfohlen, Begruendungspflicht bei Abweichung |
| `can` / `may` | **KANN** | Gruen | Freiwillige Best Practice |
## Haeufige Fragen
### Warum steht bei einem ENISA-Control "MUSS"?
**Vor dem Backfill**: Das System uebernahm die Sprache des Quelldokuments 1:1.
ENISA schreibt intern "shall/must" weil es innerhalb seines Frameworks
verbindlich formuliert. Fuer den Anwender ist das ENISA-Dokument aber nicht
rechtsverbindlich.
**Nach dem Backfill**: ENISA-Controls zeigen maximal "KANN", es sei denn
ein Gesetz (z.B. NIS2) referenziert dasselbe Control — dann gilt die
gesetzliche Verbindlichkeit.
### Was bedeutet "Stand der Technik"?
NIS2 und DSGVO verweisen auf den "Stand der Technik", ohne ihn zu definieren.
In der Praxis werden ENISA- und BSI-Dokumente als Referenz herangezogen.
Das macht ihre Empfehlungen relevant ("SOLL"), aber nicht zu Gesetzen ("MUSS").
### Wie gehe ich mit unbekannten Quellen um?
Neue Regulierungen muessen in der `SOURCE_REGULATION_CLASSIFICATION` Map
eingetragen werden. Der Fallback fuer unbekannte Quellen ist `framework`
(konservativstes Ergebnis — geringste Verbindlichkeit zugewiesen).

View File

@@ -109,6 +109,7 @@ nav:
- Control Generator Pipeline: services/sdk-modules/control-generator-pipeline.md - Control Generator Pipeline: services/sdk-modules/control-generator-pipeline.md
- Deduplizierungs-Engine: services/sdk-modules/dedup-engine.md - Deduplizierungs-Engine: services/sdk-modules/dedup-engine.md
- Control Provenance Wiki: services/sdk-modules/control-provenance.md - Control Provenance Wiki: services/sdk-modules/control-provenance.md
- Normative Verbindlichkeit (Dreistufenmodell): services/sdk-modules/normative-verbindlichkeit.md
- Anti-Fake-Evidence Architektur: services/sdk-modules/anti-fake-evidence.md - Anti-Fake-Evidence Architektur: services/sdk-modules/anti-fake-evidence.md
- Strategie: - Strategie:
- Wettbewerbsanalyse & Roadmap: strategy/wettbewerbsanalyse.md - Wettbewerbsanalyse & Roadmap: strategy/wettbewerbsanalyse.md

164
scripts/deploy.sh Executable file
View File

@@ -0,0 +1,164 @@
#!/usr/bin/env bash
# =========================================================
# BreakPilot Compliance — Deploy Script
# =========================================================
# Pushes to both remotes, rebuilds changed services on
# Mac Mini, and monitors Coolify production health.
#
# Usage: ./scripts/deploy.sh
# =========================================================
set -euo pipefail
# --- Configuration ---
PROJECT="breakpilot-compliance"
PROJECT_DIR="/Users/benjaminadmin/Projekte/${PROJECT}"
COMPOSE_FILE="${PROJECT_DIR}/docker-compose.yml"
DOCKER="/usr/local/bin/docker"
MAC_MINI="macmini"
# Coolify health endpoints
HEALTH_ENDPOINTS=(
"https://api-dev.breakpilot.ai/health"
"https://sdk-dev.breakpilot.ai/health"
)
HEALTH_INTERVAL=20
HEALTH_TIMEOUT=300 # 5 minutes
# Map top-level directories to docker-compose service names
declare -A DIR_TO_SERVICE=(
[admin-compliance]=admin-compliance
[backend-compliance]=backend-compliance
[ai-compliance-sdk]=ai-compliance-sdk
[developer-portal]=developer-portal
[compliance-tts-service]=compliance-tts-service
[document-crawler]=document-crawler
[dsms-node]=dsms-node
[dsms-gateway]=dsms-gateway
[docs-src]=docs
)
# --- Helpers ---
info() { printf "\033[1;34m[INFO]\033[0m %s\n" "$*"; }
ok() { printf "\033[1;32m[OK]\033[0m %s\n" "$*"; }
warn() { printf "\033[1;33m[WARN]\033[0m %s\n" "$*"; }
fail() { printf "\033[1;31m[FAIL]\033[0m %s\n" "$*"; }
# --- Step 1: Push to both remotes ---
info "Pushing to origin (local Gitea)..."
git push origin main
ok "Pushed to origin."
info "Pushing to gitea (external)..."
git push gitea main
ok "Pushed to gitea."
# --- Step 2: Detect changed services ---
info "Detecting changed services since last deploy..."
# Get the commit before the push (what Mac Mini currently has)
REMOTE_HEAD=$(ssh "${MAC_MINI}" "git -C ${PROJECT_DIR} rev-parse HEAD" 2>/dev/null || echo "")
LOCAL_HEAD=$(git rev-parse HEAD)
CHANGED_SERVICES=()
if [ -z "${REMOTE_HEAD}" ] || [ "${REMOTE_HEAD}" = "${LOCAL_HEAD}" ]; then
# Cannot determine diff or already up to date — check last 1 commit
info "Cannot determine remote HEAD or already equal. Checking last commit diff..."
CHANGED_DIRS=$(git diff --name-only HEAD~1 HEAD 2>/dev/null | cut -d'/' -f1 | sort -u)
else
CHANGED_DIRS=$(git diff --name-only "${REMOTE_HEAD}" "${LOCAL_HEAD}" 2>/dev/null | cut -d'/' -f1 | sort -u)
fi
for dir in ${CHANGED_DIRS}; do
svc="${DIR_TO_SERVICE[${dir}]:-}"
if [ -n "${svc}" ]; then
CHANGED_SERVICES+=("${svc}")
fi
done
# Also check if docker-compose.yml itself changed
if echo "${CHANGED_DIRS}" | grep -q "^docker-compose"; then
info "docker-compose.yml changed — will rebuild all services."
CHANGED_SERVICES=()
for svc in "${DIR_TO_SERVICE[@]}"; do
CHANGED_SERVICES+=("${svc}")
done
fi
if [ ${#CHANGED_SERVICES[@]} -eq 0 ]; then
warn "No service directories changed. Nothing to rebuild on Mac Mini."
info "Coolify will still deploy from the gitea push."
else
# Deduplicate
CHANGED_SERVICES=($(printf '%s\n' "${CHANGED_SERVICES[@]}" | sort -u))
info "Changed services: ${CHANGED_SERVICES[*]}"
# --- Step 3: Pull code on Mac Mini ---
info "Pulling latest code on Mac Mini..."
ssh "${MAC_MINI}" "git -C ${PROJECT_DIR} pull --no-rebase origin main"
ok "Code pulled on Mac Mini."
# --- Step 4: Rebuild + restart changed services ---
SERVICES_STR="${CHANGED_SERVICES[*]}"
info "Building changed services on Mac Mini: ${SERVICES_STR}"
ssh "${MAC_MINI}" "${DOCKER} compose -f ${COMPOSE_FILE} build ${SERVICES_STR}"
ok "Build complete."
info "Restarting changed services on Mac Mini: ${SERVICES_STR}"
ssh "${MAC_MINI}" "${DOCKER} compose -f ${COMPOSE_FILE} up -d --no-deps ${SERVICES_STR}"
ok "Services restarted on Mac Mini."
fi
# --- Step 5: Monitor Coolify health in background ---
info "Monitoring Coolify production health in background (every ${HEALTH_INTERVAL}s, max ${HEALTH_TIMEOUT}s)..."
(
elapsed=0
all_healthy=false
while [ ${elapsed} -lt ${HEALTH_TIMEOUT} ]; do
sleep ${HEALTH_INTERVAL}
elapsed=$((elapsed + HEALTH_INTERVAL))
healthy_count=0
for endpoint in "${HEALTH_ENDPOINTS[@]}"; do
if curl -sf --max-time 5 "${endpoint}" >/dev/null 2>&1; then
healthy_count=$((healthy_count + 1))
fi
done
if [ ${healthy_count} -eq ${#HEALTH_ENDPOINTS[@]} ]; then
all_healthy=true
break
fi
printf "\033[1;34m[HEALTH]\033[0m %d/%d endpoints healthy (%ds elapsed)\n" \
${healthy_count} ${#HEALTH_ENDPOINTS[@]} ${elapsed}
done
echo ""
if ${all_healthy}; then
printf "\033[1;32m========================================\033[0m\n"
printf "\033[1;32m Coolify deploy complete! \033[0m\n"
printf "\033[1;32m All health endpoints are healthy. \033[0m\n"
printf "\033[1;32m Test at: https://admin-dev.breakpilot.ai\033[0m\n"
printf "\033[1;32m========================================\033[0m\n"
else
printf "\033[1;31m========================================\033[0m\n"
printf "\033[1;31m Coolify deploy may have failed! \033[0m\n"
printf "\033[1;31m Not all endpoints healthy after %ds. \033[0m\n" ${HEALTH_TIMEOUT}
printf "\033[1;31m Check Coolify logs. \033[0m\n"
printf "\033[1;31m========================================\033[0m\n"
fi
) &
HEALTH_PID=$!
# --- Step 6: Report ---
echo ""
ok "Local deploy to Mac Mini: done."
info "Coolify health monitor running in background (PID ${HEALTH_PID})."
info "You will see a status banner when Coolify is ready (or after ${HEALTH_TIMEOUT}s timeout)."
echo ""