feat(citability): logischer norm_id-Join auf legal_basis (KB-v2 Zitier-Vertrag)
Wake-up #2 (Domaene 2): Zitierfaehigkeit ohne char-Level-Spans via logischem norm_id-Join auf KB-v2-Units (bp_compliance_kb_2026_1_build). Konvention (Board Compliance/KB-v2 2026-07-01): EU-<ACT>-Anhang<ROM> (Annex-Ebene, confirmed) / EU-<ACT>-Art<N> + EU-<ACT>-Kapitel<ROM> (verify_pending). Namensvariante EU-MaschVO-* (NICHT MaschinenVO). KEINE neue Klasse — norm_ids ist ein Attribut auf legal_basis (freeze-safe). - 65/65 legal_basis gejoint (CRA 40 + MaschVO 25), 0 unparsed; 64 Obligations citation_status -> norm_id_linked (BP/guidance-anchored bleiben ohne norm_id). - 53 annex_confirmed, 12 verify_pending; distinkt 5 Annex-IDs + 19 Art/Kapitel. - norm_id_manifest.json = KB-v2-Handoff (verify_pending Art-/Kapitel-IDs pruefen). - Granularitaet annex-grob (Part/Punkt = KB-Enhancement TBD); Artikel-norm_ids in KB-v2 noch zu verifizieren. Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
@@ -0,0 +1,75 @@
|
||||
"""Zitierfähigkeits-Join (Wake-up #2): logischer norm_id-Join auf legal_basis.
|
||||
|
||||
KB-v2-Konvention (Board 2026-07-01, Compliance/KB-v2): `EU-<ACT>-Anhang<ROM>` (Annex-Ebene, grob) ·
|
||||
`EU-<ACT>-Art<N>` (Artikel, in KB-v2 noch zu verifizieren) · Kapitel = TBD-Konvention.
|
||||
Namensvariante: `EU-MaschVO-*` (NICHT MaschinenVO). Kein char-Span nötig — logischer Join auf norm_id.
|
||||
Fügt `norm_ids` (Liste) je legal_basis + `norm_id_status` hinzu; setzt obligation.citation_status
|
||||
auf `norm_id_linked` (annex-grob). KEINE neue Klasse (Attribut). Freeze-safe.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import glob
|
||||
import json
|
||||
import re
|
||||
|
||||
ACT = {"CRA": "CRA", "MaschVO": "MaschVO", "MaschinenVO": "MaschVO"}
|
||||
FILES = sorted(glob.glob("obligations/cra*.json"))
|
||||
|
||||
|
||||
def derive(source: str, anchor: str) -> tuple[list[str], str]:
|
||||
act = ACT.get(source, source)
|
||||
ids: list[str] = []
|
||||
for rom in re.findall(r"An(?:hang|nex)\s+([IVX]+)", anchor, re.I):
|
||||
ids.append(f"EU-{act}-Anhang{rom.upper()}")
|
||||
articles = re.findall(r"\bArt(?:icle|\.)?\s*(\d+)", anchor)
|
||||
chapters = re.findall(r"Kapitel\s+([IVX/]+)", anchor, re.I)
|
||||
verify: list[str] = []
|
||||
for n in articles:
|
||||
verify.append(f"EU-{act}-Art{n}")
|
||||
for grp in chapters:
|
||||
for rom in grp.split("/"):
|
||||
rom = rom.strip()
|
||||
if rom:
|
||||
verify.append(f"EU-{act}-Kapitel{rom.upper()}")
|
||||
# dedup, Annexe zuerst (confirmed), dann verify
|
||||
seen: set[str] = set()
|
||||
ordered = [x for x in ids + verify if not (x in seen or seen.add(x))]
|
||||
status = "annex_confirmed" if ids else ("verify_pending" if verify else "unparsed")
|
||||
return ordered, status
|
||||
|
||||
|
||||
def main() -> None:
|
||||
total_lb = linked = unparsed = 0
|
||||
obl_linked = 0
|
||||
for f in FILES:
|
||||
d = json.load(open(f, encoding="utf-8"))
|
||||
d.setdefault("norm_id_contract", {
|
||||
"convention": "EU-<ACT>-Anhang<ROM> (Annex-Ebene) / EU-<ACT>-Art<N> (verify) — KB-v2 bp_compliance_kb_2026_1_build",
|
||||
"act_naming": "EU-MaschVO-* (NICHT MaschinenVO)",
|
||||
"granularity": "annex-grob — 'Annex I Part II (1)' -> EU-CRA-AnhangI; Part/Punkt = KB-Enhancement TBD",
|
||||
"article_status": "EU-<ACT>-Art<N> in KB-v2 noch zu verifizieren; Annex-IDs confirmed",
|
||||
"source": "Board Compliance/KB-v2 2026-07-01",
|
||||
})
|
||||
for o in d.get("obligations", []):
|
||||
got = False
|
||||
for b in o.get("legal_basis", []):
|
||||
total_lb += 1
|
||||
nids, st = derive(b.get("source", ""), b.get("anchor", ""))
|
||||
b["norm_ids"] = nids
|
||||
b["norm_id_status"] = st
|
||||
if nids:
|
||||
linked += 1
|
||||
got = True
|
||||
if st == "unparsed":
|
||||
unparsed += 1
|
||||
print(f" UNPARSED: {b.get('source')} \"{b.get('anchor')}\"")
|
||||
if got:
|
||||
o["citation_status"] = "norm_id_linked"
|
||||
obl_linked += 1
|
||||
json.dump(d, open(f, "w", encoding="utf-8"), ensure_ascii=False, indent=1)
|
||||
print(f"legal_basis gesamt {total_lb} | mit norm_ids {linked} | unparsed {unparsed}")
|
||||
print(f"Obligations citation_status -> norm_id_linked: {obl_linked}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user