Merge remote-tracking branch 'origin/main' into feat/advisor-status

This commit is contained in:
Benjamin Admin
2026-06-25 19:31:20 +02:00
4 changed files with 1707 additions and 4 deletions
@@ -2,7 +2,7 @@
"schema_version": "controls_for_obligation_mapping_v1", "schema_version": "controls_for_obligation_mapping_v1",
"purpose": "Accepted CRA->Framework controls (Compliance Execution Graph) for the Obligation Registry to propose the SEMANTIC control->obligation_id, replacing the coarse citation_unit interim join. Fill proposed_obligation_id per control, then we adopt it into control_mapping.obligation_id.", "purpose": "Accepted CRA->Framework controls (Compliance Execution Graph) for the Obligation Registry to propose the SEMANTIC control->obligation_id, replacing the coarse citation_unit interim join. Fill proposed_obligation_id per control, then we adopt it into control_mapping.obligation_id.",
"source": "ai-compliance-sdk control_mappings, mapping_status=accepted, reviewed_by=benjamin 2026-06-25. OWASP ASVS (7, gefuellt) + NIST SP 800-53 (3, pending).", "source": "ai-compliance-sdk control_mappings, mapping_status=accepted, reviewed_by=benjamin 2026-06-25. OWASP ASVS (7, gefuellt) + NIST SP 800-53 (3, pending).",
"filled_by": "obligation-registry-session 2026-06-25 (OWASP 7/7: 4 auth/crypto + 3 logging). NIST 3 NEU + pending: SI-7/SI-2/CM-7. Notes auf updates-Familie (join_keys 93) ausgerichtet: SI-2->provide_security_updates (stark), SI-7->signed_update_integrity (partiell, SI-7 breiter), CM-7->remote_access_attack_surface_min (partiell, CM-7 breiter).", "filled_by": "obligation-registry-session 2026-06-25. OWASP 7/7 (4 auth/crypto + 3 logging). NIST 3/3 GEFUELLT (Obligation-Session): SI-2->provide_security_updates (stark, (2)(c)/Art.13) · SI-7->signed_update_integrity (update-scoped; SI-7 breiter) · CM-7->remote_access_attack_surface_min (remote-scoped; CM-7 breiter). GAP-BEFUND (Cross-Domain-Review): generische Parent-Obligations software_integrity_protection + attack_surface_minimization FEHLEN — SI-7/CM-7 sind breiter als die domaenen-scoped Treffer. Kandidaten fuer neue generische Obligations (User-Entscheidung). Damit 10/10 proposed_obligation_id gefuellt.",
"join_principle": "SEMANTISCH via obligation_id, NICHT via citation_unit/legal_basis-Anker. Die CRA-Anker sind im Registry teils approximativ (siehe anchor_quality_note) — daher ist obligation_id der stabile Primaerschluessel, nicht der Anker.", "join_principle": "SEMANTISCH via obligation_id, NICHT via citation_unit/legal_basis-Anker. Die CRA-Anker sind im Registry teils approximativ (siehe anchor_quality_note) — daher ist obligation_id der stabile Primaerschluessel, nicht der Anker.",
"anchor_quality_note": "Registry-legal_basis-Anker sind teils CRA-Part-I-fehlzugeordnet (Opus-Synthese): user_authentication_required steht auf (2)(d) statt (2)(c); Crypto-Obligations auf (2)(e) statt (2)(d). CRA Annex I Part I: (2)(c)=Zugriffsschutz, (2)(d)=Vertraulichkeit, (2)(e)=Integritaet. Korrektur kommt mit dem zitierfaehigen Re-Ingest (span-genau). Deshalb: NICHT auf Anker joinen. ABER: der Logging-Cut (V16.*) ist korrekt auf (2)(k) verankert (echte Logging-Subsektion, kein Fehl-Anker).", "anchor_quality_note": "Registry-legal_basis-Anker sind teils CRA-Part-I-fehlzugeordnet (Opus-Synthese): user_authentication_required steht auf (2)(d) statt (2)(c); Crypto-Obligations auf (2)(e) statt (2)(d). CRA Annex I Part I: (2)(c)=Zugriffsschutz, (2)(d)=Vertraulichkeit, (2)(e)=Integritaet. Korrektur kommt mit dem zitierfaehigen Re-Ingest (span-genau). Deshalb: NICHT auf Anker joinen. ABER: der Logging-Cut (V16.*) ist korrekt auf (2)(k) verankert (echte Logging-Subsektion, kein Fehl-Anker).",
"mapping_type_note": "NEU: mapping_type=primary_implementation = die kanonische Primaer-Control einer Anforderung (genau eine), staerker als implements/supports. related-Controls (SC-3(3), RA-5, AC-6, SI-16, SA-10, ...) folgen separat als supports. Eine Obligation kann mehrere Controls haben, aber genau einen primary_implementation-Einstieg.", "mapping_type_note": "NEU: mapping_type=primary_implementation = die kanonische Primaer-Control einer Anforderung (genau eine), staerker als implements/supports. related-Controls (SC-3(3), RA-5, AC-6, SI-16, SA-10, ...) folgen separat als supports. Eine Obligation kann mehrere Controls haben, aber genau einen primary_implementation-Einstieg.",
@@ -68,7 +68,7 @@
"framework": "NIST SP 800-53", "control": "SI-7", "framework": "NIST SP 800-53", "control": "SI-7",
"source_norm": "CRA Annex I Part I (2)(e) — Integritaet", "source_norm": "CRA Annex I Part I (2)(e) — Integritaet",
"citation_unit": "Annex I (2)(e)", "family": "integrity", "mapping_type": "primary_implementation", "citation_unit": "Annex I (2)(e)", "family": "integrity", "mapping_type": "primary_implementation",
"proposed_obligation_id": "", "proposed_obligation_id": "signed_update_integrity",
"mapping_method": "semantic", "mapping_method": "semantic",
"mapping_note": "NIST SI-7 = Software/Firmware/Information Integrity (Signaturpruefung, Manipulationserkennung, Secure Boot, Runtime-Integritaet). Naechster vorhandener Treffer (93-Stand): signed_update_integrity (updates-Familie, Annex I (1)(3)(f)) — deckt aber NUR Update-Signatur. SI-7 ist BREITER (gesamte Produkt-Integritaet). Falls keine generische Integritaets-Obligation existiert: neue noetig (Vorschlag software_integrity_protection); sonst SI-7 primary_implementation fuer signed_update_integrity (update-scoped) + supports fuers Breitere. NICHT log_integrity_immutability (Audit-Log-Schutz, andere Ebene)." "mapping_note": "NIST SI-7 = Software/Firmware/Information Integrity (Signaturpruefung, Manipulationserkennung, Secure Boot, Runtime-Integritaet). Naechster vorhandener Treffer (93-Stand): signed_update_integrity (updates-Familie, Annex I (1)(3)(f)) — deckt aber NUR Update-Signatur. SI-7 ist BREITER (gesamte Produkt-Integritaet). Falls keine generische Integritaets-Obligation existiert: neue noetig (Vorschlag software_integrity_protection); sonst SI-7 primary_implementation fuer signed_update_integrity (update-scoped) + supports fuers Breitere. NICHT log_integrity_immutability (Audit-Log-Schutz, andere Ebene)."
}, },
@@ -76,7 +76,7 @@
"framework": "NIST SP 800-53", "control": "SI-2", "framework": "NIST SP 800-53", "control": "SI-2",
"source_norm": "CRA Annex I Part I (2)(l) — Sichere Updates", "source_norm": "CRA Annex I Part I (2)(l) — Sichere Updates",
"citation_unit": "Annex I (2)(l)", "family": "update", "mapping_type": "primary_implementation", "citation_unit": "Annex I (2)(l)", "family": "update", "mapping_type": "primary_implementation",
"proposed_obligation_id": "", "proposed_obligation_id": "provide_security_updates",
"mapping_method": "semantic", "mapping_method": "semantic",
"mapping_note": "NIST SI-2 = Flaw Remediation. STARKER Treffer in eurer NEUEN updates-Familie (93-Stand): provide_security_updates (LEGAL_MINIMUM, Annex I (2)(c) + Art. 13) = DAS sichere-Update-LM. -> SI-2 primary_implementation = provide_security_updates. Verwandt (supports): vuln_remediation_patching (Part II Remediations-PROZESS), support_period_maintenance, update_testing_validation, update_rollback. Mein source_norm-Anker (2)(l) ist approximativ -> bitte (2)(c)/Art.13 via provide_security_updates nutzen." "mapping_note": "NIST SI-2 = Flaw Remediation. STARKER Treffer in eurer NEUEN updates-Familie (93-Stand): provide_security_updates (LEGAL_MINIMUM, Annex I (2)(c) + Art. 13) = DAS sichere-Update-LM. -> SI-2 primary_implementation = provide_security_updates. Verwandt (supports): vuln_remediation_patching (Part II Remediations-PROZESS), support_period_maintenance, update_testing_validation, update_rollback. Mein source_norm-Anker (2)(l) ist approximativ -> bitte (2)(c)/Art.13 via provide_security_updates nutzen."
}, },
@@ -84,7 +84,7 @@
"framework": "NIST SP 800-53", "control": "CM-7", "framework": "NIST SP 800-53", "control": "CM-7",
"source_norm": "CRA Annex I Part I (2)(i) — Angriffsflaeche minimieren", "source_norm": "CRA Annex I Part I (2)(i) — Angriffsflaeche minimieren",
"citation_unit": "Annex I (2)(i)", "family": "attack_surface", "mapping_type": "primary_implementation", "citation_unit": "Annex I (2)(i)", "family": "attack_surface", "mapping_type": "primary_implementation",
"proposed_obligation_id": "", "proposed_obligation_id": "remote_access_attack_surface_min",
"mapping_method": "semantic", "mapping_method": "semantic",
"mapping_note": "NIST CM-7 = Least Functionality (deaktivierte Ports/Dienste/Funktionen, GESAMTE Angriffsflaeche). Naechster vorhandener Treffer (93-Stand): remote_access_attack_surface_min (remote_access-Familie) — deckt aber NUR Remote-Access-Flaeche. CM-7 ist BREITER. Vermutlich generische Obligation noetig (Vorschlag attack_surface_minimization); sonst CM-7 supports fuer remote_access_attack_surface_min. related (supports): SC-3(3)/AC-6/SI-16." "mapping_note": "NIST CM-7 = Least Functionality (deaktivierte Ports/Dienste/Funktionen, GESAMTE Angriffsflaeche). Naechster vorhandener Treffer (93-Stand): remote_access_attack_surface_min (remote_access-Familie) — deckt aber NUR Remote-Access-Flaeche. CM-7 ist BREITER. Vermutlich generische Obligation noetig (Vorschlag attack_surface_minimization); sonst CM-7 supports fuer remote_access_attack_surface_min. related (supports): SC-3(3)/AC-6/SI-16."
} }
File diff suppressed because it is too large Load Diff
@@ -0,0 +1,85 @@
"""Cross-Domain Relationship Discovery — Stufe 2: Opus klassifiziert jede Kandidaten-Beziehung
in GENAU EINE Kategorie. Liefert das Rohmaterial der Compliance-Ontologie (insb. SHARED_CAPABILITY
= Capability-Schicht). ANTHROPIC_API_KEY aus ENV (nie hartcodiert). Streaming.
ANTHROPIC_API_KEY=… python3 classify_relationships.py --pairs /tmp/cd_pairs.json \
--only-cross-family --out /tmp/cd_classified.json
"""
from __future__ import annotations
import argparse
import json
import os
import re
from collections import Counter
SYS = """Du bist Compliance-Ontologe. Gegeben Paare von Legal Obligations (CRA), bestimme fuer
JEDES Paar GENAU EINE Beziehung. Ziel ist NICHT Aehnlichkeit, sondern die STRUKTURELLE Beziehung.
Kategorien (genau EINE; bei Mehrdeutigkeit gilt diese Prioritaet):
1 SAME_OBLIGATION — dieselbe rechtliche Pflicht, nur pro Domaene anders formuliert -> MERGE-Kandidat.
2 SUPPORTED_BY — A ist domaenenspezifische Auspraegung/Teilfall von B ODER A traegt zur Erfuellung von B bei. RICHTUNG angeben.
3 SHARED_CAPABILITY — beide werden durch DIESELBE technische Faehigkeit erfuellt (z.B. MFA, TLS-Verschluesselung, digitale Signatur, Session-Management, Patch-Management, Logging-Pipeline). capability_name (snake_case) angeben.
4 SHARED_PROCEDURE — beide ueber denselben operativen Prozess erfuellt, ohne gemeinsames technisches Artefakt.
5 SHARED_EVIDENCE — beide erzeugen/nutzen denselben Nachweis (Audit-Log, SBOM, Release Notes). evidence_name angeben.
6 SHARED_GUIDANCE — beide berufen sich auf denselben externen Standard (NIST/OWASP/ISO), sonst distinkt.
7 OVERLAP_ONLY — nur oberflaechliche Wort-/Themenueberlappung, keine echte strukturelle Beziehung.
8 UNRELATED — Falsch-Positiv der Embedding-Naehe.
Gib AUSSCHLIESSLICH JSON aus:
{"results":[{"i":0,"relation":"SHARED_CAPABILITY","direction":"a->b|b->a|none","capability_name":"","evidence_name":"","reason":"max 18 Woerter"}]}
Regeln: relation = genau eine der 8 Strings. direction nur bei SUPPORTED_BY, sonst "none".
capability_name NUR bei SHARED_CAPABILITY (sonst ""), evidence_name NUR bei SHARED_EVIDENCE (sonst "").
Sei streng: SHARED_GUIDANCE/OVERLAP_ONLY/UNRELATED grosszuegig nutzen; SAME_OBLIGATION nur bei echter Deckungsgleichheit.
Gib fuer JEDES Paar (per Index i) genau ein Ergebnis."""
def build_user(pairs: list[dict]) -> str:
lines = []
for i, p in enumerate(pairs):
lines.append(f'[{i}] A={p["a"]} ({p["fa"]}/{p["ta"]}): {p["da"]}\n'
f' B={p["b"]} ({p["fb"]}/{p["tb"]}): {p["db"]} [sim={p["sim"]}]')
return "Paare:\n" + "\n".join(lines)
def main() -> None:
ap = argparse.ArgumentParser()
ap.add_argument("--pairs", required=True)
ap.add_argument("--only-cross-family", action="store_true")
ap.add_argument("--min-sim", type=float, default=0.0)
ap.add_argument("--model", default="claude-opus-4-8")
ap.add_argument("--out", required=True)
a = ap.parse_args()
d = json.load(open(a.pairs, encoding="utf-8"))
pairs = [p for p in d["pairs"]
if (not a.only_cross_family or p["cross_family"]) and p["sim"] >= a.min_sim]
import anthropic
client = anthropic.Anthropic(api_key=os.environ["ANTHROPIC_API_KEY"])
with client.messages.stream(model=a.model, max_tokens=24000, system=SYS,
messages=[{"role": "user", "content": build_user(pairs)}]) as st:
msg = st.get_final_message()
txt = msg.content[0].text
m = re.search(r"\{.*\}", txt, re.DOTALL)
data = json.loads(m.group(0) if m else txt)
res = []
for r in data.get("results", []):
i = r.get("i")
if not isinstance(i, int) or i < 0 or i >= len(pairs):
continue
p = pairs[i]
res.append({"a": p["a"], "fa": p["fa"], "b": p["b"], "fb": p["fb"], "sim": p["sim"],
"relation": r.get("relation", "?"), "direction": r.get("direction", "none"),
"capability_name": r.get("capability_name", ""),
"evidence_name": r.get("evidence_name", ""), "reason": r.get("reason", "")})
dist = Counter(r["relation"] for r in res)
out = {"n_pairs": len(pairs), "n_classified": len(res), "distribution": dict(dist),
"model": a.model, "results": res}
json.dump(out, open(a.out, "w", encoding="utf-8"), ensure_ascii=False, indent=1)
print(f"classified {len(res)}/{len(pairs)} | {dict(dist)}")
print("written:", a.out)
if __name__ == "__main__":
main()
@@ -0,0 +1,66 @@
"""Cross-Domain Relationship Discovery — Stufe 1 (key-frei, im bp-compliance-backend-Container).
Alle Obligations mehrerer Registries -> BGE-M3-Embedding -> je Obligation Top-K Nachbarn ->
Kandidaten-Paare (cross- UND same-family) >= min-sim. KEIN Urteil hier — nur Kandidaten.
Stufe 2 (classify_relationships.py) klassifiziert die Beziehung per Opus.
python3 cross_domain_pairs.py /tmp/reg/cra.json /tmp/reg/cra_authentication.json ... \
--top-k 8 --min-sim 0.60 --out /tmp/cd_pairs.json
"""
from __future__ import annotations
import argparse
import asyncio
import json
from _core import cosine
async def run(paths: list[str], top_k: int, min_sim: float, out: str) -> None:
from compliance.services.mc_embedding_matcher import _embed_texts
obls: list[dict] = []
for p in paths:
reg = json.load(open(p, encoding="utf-8"))
fam = reg.get("family", "")
for o in reg.get("obligations", []):
obls.append({"id": o["id"], "family": o.get("family", "") or fam,
"tier": o.get("tier", ""), "name": o.get("name", ""),
"desc": o.get("description", "")})
vecs = await _embed_texts([f'{o["name"]}. {o["desc"]}' for o in obls])
n = len(obls)
print(f"obligations={n}")
best: dict[tuple[int, int], float] = {}
for i in range(n):
nbrs = sorted(((cosine(vecs[i], vecs[j]), j) for j in range(n) if j != i), reverse=True)[:top_k]
for s, j in nbrs:
if s < min_sim:
continue
a, b = sorted((i, j))
if (a, b) not in best or s > best[(a, b)]:
best[(a, b)] = s
pairs = []
for (a, b), s in sorted(best.items(), key=lambda x: -x[1]):
pairs.append({
"a": obls[a]["id"], "fa": obls[a]["family"], "ta": obls[a]["tier"], "da": obls[a]["desc"][:220],
"b": obls[b]["id"], "fb": obls[b]["family"], "tb": obls[b]["tier"], "db": obls[b]["desc"][:220],
"sim": round(s, 3), "cross_family": obls[a]["family"] != obls[b]["family"]})
cf = sum(1 for p in pairs if p["cross_family"])
json.dump({"n_obligations": n, "n_pairs": len(pairs), "cross_family": cf, "pairs": pairs},
open(out, "w", encoding="utf-8"), ensure_ascii=False, indent=1)
print(f"pairs={len(pairs)} (cross-family={cf}, same-family={len(pairs) - cf}) written: {out}")
def main() -> None:
ap = argparse.ArgumentParser()
ap.add_argument("registries", nargs="+")
ap.add_argument("--top-k", type=int, default=8)
ap.add_argument("--min-sim", type=float, default=0.60)
ap.add_argument("--out", default="/tmp/cd_pairs.json")
a = ap.parse_args()
asyncio.run(run(a.registries, a.top_k, a.min_sim, a.out))
if __name__ == "__main__":
main()