1. BAG-Urteile Ingestion Script (21 kuratierte Urteile zu §87 BetrVG) - Microsoft 365, SAP ERP, E-Mail, Standardsoftware, Video, SaaS/Cloud - 14 erfolgreich ingestiert (4.726 Chunks in bp_compliance_datenschutz) 2. Betriebsvereinbarung Template (6. Document Template) - SQL-Migration mit 13 Sektionen (A-M), ~30 Placeholders - Conditional Blocks fuer KI-Systeme, Video, HR - Python-Generator mit automatischer TOM-Befuellung Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
285 lines
11 KiB
Python
285 lines
11 KiB
Python
"""Ingest BAG (Bundesarbeitsgericht) court decisions into RAG.
|
|
|
|
Downloads PDFs from bundesarbeitsgericht.de and uploads them to the
|
|
bp_compliance_datenschutz Qdrant collection via the RAG-Service API.
|
|
|
|
These decisions are curated for IT/KI-Mitbestimmung relevance (§87 BetrVG).
|
|
|
|
Usage:
|
|
python scripts/ingest_bag_urteile.py [--rag-url https://macmini:8097] [--dry-run]
|
|
"""
|
|
|
|
import argparse
|
|
import json
|
|
import os
|
|
import re
|
|
import sys
|
|
import tempfile
|
|
import time
|
|
|
|
import httpx
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Curated BAG decisions for IT/AI works council co-determination
|
|
# ---------------------------------------------------------------------------
|
|
|
|
BAG_DECISIONS = [
|
|
# --- M365 / Copilot / Standardsoftware ---
|
|
{
|
|
"url": "https://www.bundesarbeitsgericht.de/entscheidung/1-abr-20-21/",
|
|
"case_number": "1 ABR 20/21",
|
|
"date": "2022-03-08",
|
|
"subject": "Microsoft Office 365 — Mitbestimmung",
|
|
"keywords": ["Microsoft 365", "Standardsoftware", "Ueberwachung", "§87 BetrVG"],
|
|
},
|
|
{
|
|
"url": "https://www.bundesarbeitsgericht.de/entscheidung/1-abn-36-18/",
|
|
"case_number": "1 ABN 36/18",
|
|
"date": "2018-10-23",
|
|
"subject": "Excel / Standardsoftware — keine Geringfuegigkeitsschwelle",
|
|
"keywords": ["Excel", "Standardsoftware", "Geringfuegigkeit", "§87 BetrVG"],
|
|
},
|
|
{
|
|
"url": "https://www.bundesarbeitsgericht.de/entscheidung/1-abr-45-11/",
|
|
"case_number": "1 ABR 45/11",
|
|
"date": "2012-09-25",
|
|
"subject": "SAP ERP im Personalwesen",
|
|
"keywords": ["SAP", "ERP", "Personalwesen", "Verhaltenskontrolle", "§87 BetrVG"],
|
|
},
|
|
{
|
|
"url": "https://www.bundesarbeitsgericht.de/entscheidung/1-abr-31-19/",
|
|
"case_number": "1 ABR 31/19",
|
|
"date": "2021-01-27",
|
|
"subject": "E-Mail-Kommunikationssoftware — Mitbestimmung",
|
|
"keywords": ["E-Mail", "Kommunikation", "Software", "§87 BetrVG"],
|
|
},
|
|
{
|
|
"url": "https://www.bundesarbeitsgericht.de/entscheidung/1-abr-13-17/",
|
|
"case_number": "1 ABR 13/17",
|
|
"date": "2019-07-09",
|
|
"subject": "IT-System fuer Mitarbeiterbefragung",
|
|
"keywords": ["Mitarbeiterbefragung", "Feedback", "technische Einrichtung", "§87 BetrVG"],
|
|
},
|
|
{
|
|
"url": "https://www.bundesarbeitsgericht.de/entscheidung/1-abr-16-23/",
|
|
"case_number": "1 ABR 16/23",
|
|
"date": "2024-07-16",
|
|
"subject": "Headset-System — Geraetenutzungsdaten",
|
|
"keywords": ["Headset", "Geraetenutzung", "Ueberwachung", "§87 BetrVG"],
|
|
},
|
|
# --- Ueberwachung, Social, Drittplattformen ---
|
|
{
|
|
"url": "https://www.bundesarbeitsgericht.de/entscheidung/1-abr-7-15/",
|
|
"case_number": "1 ABR 7/15",
|
|
"date": "2016-12-13",
|
|
"subject": "Facebook-Seite — indirekte Leistungsueberwachung",
|
|
"keywords": ["Facebook", "Social Media", "Besucherbeitraege", "Ueberwachung", "§87 BetrVG"],
|
|
},
|
|
{
|
|
"url": "https://www.bundesarbeitsgericht.de/entscheidung/1-abr-43-12/",
|
|
"case_number": "1 ABR 43/12",
|
|
"date": "2013-12-10",
|
|
"subject": "Google Maps — indirekte Ueberwachung / Definition Ueberwachung",
|
|
"keywords": ["Google Maps", "Routenplaner", "indirekte Ueberwachung", "Definition", "§87 BetrVG"],
|
|
},
|
|
{
|
|
"url": "https://www.bundesarbeitsgericht.de/entscheidung/1-abr-68-13/",
|
|
"case_number": "1 ABR 68/13",
|
|
"date": "2015-07-21",
|
|
"subject": "Ueberwachung durch technische Einrichtung eines Dritten (SaaS/Cloud)",
|
|
"keywords": ["Drittsystem", "SaaS", "Cloud", "Ueberwachung", "§87 BetrVG"],
|
|
},
|
|
# --- Video, Belastung, Leistungskennzahlen ---
|
|
{
|
|
"url": "https://www.bundesarbeitsgericht.de/entscheidung/1-abr-78-11/",
|
|
"case_number": "1 ABR 78/11",
|
|
"date": "2012-12-11",
|
|
"subject": "Videoueberwachung — Grundsatzentscheidung",
|
|
"keywords": ["Videoueberwachung", "Kamera", "Arbeitsplatz", "§87 BetrVG"],
|
|
},
|
|
{
|
|
"url": "https://www.bundesarbeitsgericht.de/entscheidung/1-abr-46-15/",
|
|
"case_number": "1 ABR 46/15",
|
|
"date": "2017-04-25",
|
|
"subject": "Belastungsstatistik — dauerhafte Kennzahlenueberwachung",
|
|
"keywords": ["Belastungsstatistik", "Kennzahlen", "Analytics", "Persoenlichkeitsrecht", "§87 BetrVG"],
|
|
},
|
|
# --- Negative / abgrenzende Faelle ---
|
|
{
|
|
"url": "https://www.bundesarbeitsgericht.de/entscheidung/1-abr-32-16/",
|
|
"case_number": "1 ABR 32/16",
|
|
"date": "2017-12-19",
|
|
"subject": "Anti-Terror-Listen — keine Mitbestimmung",
|
|
"keywords": ["Anti-Terror", "Sanktionsliste", "keine Mitbestimmung", "Abgrenzung", "§87 BetrVG"],
|
|
},
|
|
{
|
|
"url": "https://www.bundesarbeitsgericht.de/entscheidung/1-abr-22-21/",
|
|
"case_number": "1 ABR 22/21",
|
|
"date": "2022-09-13",
|
|
"subject": "Elektronische Arbeitszeiterfassung — Initiativrecht",
|
|
"keywords": ["Arbeitszeiterfassung", "Initiativrecht", "digitale Systeme", "§87 BetrVG"],
|
|
},
|
|
# --- Historische Grundsatzentscheidungen ---
|
|
{
|
|
"url": "https://www.bundesarbeitsgericht.de/entscheidung/1-abr-43-81/",
|
|
"case_number": "1 ABR 43/81",
|
|
"date": "1983-12-06",
|
|
"subject": "Grundsatz technische Ueberwachung — Eignung genuegt",
|
|
"keywords": ["Grundsatz", "Eignung", "technische Einrichtung", "§87 BetrVG"],
|
|
},
|
|
{
|
|
"url": "https://www.bundesarbeitsgericht.de/entscheidung/1-abr-23-82/",
|
|
"case_number": "1 ABR 23/82",
|
|
"date": "1984-09-14",
|
|
"subject": "Erste Grundlinie IT-Systeme",
|
|
"keywords": ["IT-System", "Grundlinie", "technische Einrichtung", "§87 BetrVG"],
|
|
},
|
|
# --- E-Mail / Internet ---
|
|
{
|
|
"url": "https://www.bundesarbeitsgericht.de/entscheidung/1-abr-46-10/",
|
|
"case_number": "1 ABR 46/10",
|
|
"date": "2012-02-07",
|
|
"subject": "Internet- und E-Mail-Nutzung — Kommunikationsdaten",
|
|
"keywords": ["Internet", "E-Mail", "Kommunikationsdaten", "Auswertung", "§87 BetrVG"],
|
|
},
|
|
# --- HR / Bewertungssysteme ---
|
|
{
|
|
"url": "https://www.bundesarbeitsgericht.de/entscheidung/1-abr-40-07/",
|
|
"case_number": "1 ABR 40/07",
|
|
"date": "2008-07-22",
|
|
"subject": "Beurteilungssysteme — §94/§95 BetrVG",
|
|
"keywords": ["Beurteilung", "Bewertungssystem", "HR", "§94 BetrVG", "§95 BetrVG"],
|
|
},
|
|
{
|
|
"url": "https://www.bundesarbeitsgericht.de/entscheidung/1-abr-16-07/",
|
|
"case_number": "1 ABR 16/07",
|
|
"date": "2008-03-18",
|
|
"subject": "Personalfrageboegen — Bewertung",
|
|
"keywords": ["Personalfragebogen", "Bewertung", "HR-Tools", "§94 BetrVG"],
|
|
},
|
|
# --- Video / physische Ueberwachung ---
|
|
{
|
|
"url": "https://www.bundesarbeitsgericht.de/entscheidung/1-abr-21-03/",
|
|
"case_number": "1 ABR 21/03",
|
|
"date": "2004-06-29",
|
|
"subject": "Videoueberwachung Arbeitsplatz",
|
|
"keywords": ["Video", "Kamera", "Arbeitsplatz", "Ueberwachung", "§87 BetrVG"],
|
|
},
|
|
# --- Zustaendigkeit ---
|
|
{
|
|
"url": "https://www.bundesarbeitsgericht.de/entscheidung/1-abr-2-05/",
|
|
"case_number": "1 ABR 2/05",
|
|
"date": "2006-05-03",
|
|
"subject": "Zustaendigkeit Betriebsrat bei konzernweiten Tools",
|
|
"keywords": ["Zustaendigkeit", "Konzern", "Gesamtbetriebsrat", "§87 BetrVG"],
|
|
},
|
|
{
|
|
"url": "https://www.bundesarbeitsgericht.de/entscheidung/1-abr-58-04/",
|
|
"case_number": "1 ABR 58/04",
|
|
"date": "2006-03-28",
|
|
"subject": "Mitbestimmung bei Einfuehrung technischer Systeme",
|
|
"keywords": ["Systemeinführung", "technische Systeme", "Mitbestimmung", "§87 BetrVG"],
|
|
},
|
|
]
|
|
|
|
|
|
def normalize_case_number(case_number: str) -> str:
|
|
"""Normalize case number for use as regulation_id."""
|
|
return re.sub(r"[^a-z0-9]", "_", case_number.lower()).strip("_")
|
|
|
|
|
|
def download_decision(url: str, client: httpx.Client) -> bytes:
|
|
"""Download a BAG decision page as HTML."""
|
|
resp = client.get(url, follow_redirects=True)
|
|
resp.raise_for_status()
|
|
return resp.content
|
|
|
|
|
|
def upload_to_rag(
|
|
file_bytes: bytes,
|
|
filename: str,
|
|
metadata: dict,
|
|
rag_url: str,
|
|
client: httpx.Client,
|
|
) -> dict:
|
|
"""Upload a document to the RAG service."""
|
|
files = {"file": (filename, file_bytes, "text/html")}
|
|
data = {
|
|
"collection": "bp_compliance_datenschutz",
|
|
"data_type": "compliance_datenschutz",
|
|
"bundesland": "bund",
|
|
"use_case": "court_decision",
|
|
"year": metadata.get("date", "2024")[:4],
|
|
"chunk_strategy": "legal",
|
|
"chunk_size": "512",
|
|
"chunk_overlap": "50",
|
|
"metadata_json": json.dumps(metadata),
|
|
}
|
|
resp = client.post(f"{rag_url}/api/v1/documents/upload", files=files, data=data)
|
|
resp.raise_for_status()
|
|
return resp.json()
|
|
|
|
|
|
def main():
|
|
parser = argparse.ArgumentParser(description="Ingest BAG court decisions into RAG")
|
|
parser.add_argument("--rag-url", default="https://macmini:8097", help="RAG service URL")
|
|
parser.add_argument("--dry-run", action="store_true", help="Download only, don't upload")
|
|
args = parser.parse_args()
|
|
|
|
client = httpx.Client(timeout=60, verify=False)
|
|
stats = {"downloaded": 0, "uploaded": 0, "errors": 0}
|
|
|
|
for decision in BAG_DECISIONS:
|
|
case_id = normalize_case_number(decision["case_number"])
|
|
print(f"\n--- {decision['case_number']}: {decision['subject']} ---")
|
|
|
|
# Download
|
|
try:
|
|
html_bytes = download_decision(decision["url"], client)
|
|
stats["downloaded"] += 1
|
|
print(f" Downloaded: {len(html_bytes)} bytes")
|
|
except Exception as e:
|
|
print(f" ERROR downloading: {e}")
|
|
stats["errors"] += 1
|
|
continue
|
|
|
|
if args.dry_run:
|
|
continue
|
|
|
|
# Upload
|
|
metadata = {
|
|
"regulation_id": f"bag_{case_id}",
|
|
"regulation_name_de": f"BAG {decision['case_number']} — {decision['subject']}",
|
|
"category": "arbeitsrecht",
|
|
"source": "bundesarbeitsgericht.de",
|
|
"doc_type": "court_decision",
|
|
"license": "public_domain_§5_UrhG",
|
|
"court": "BAG",
|
|
"case_number": decision["case_number"],
|
|
"date": decision["date"],
|
|
"subject_matter": decision["subject"],
|
|
"keywords": decision["keywords"],
|
|
}
|
|
|
|
try:
|
|
result = upload_to_rag(
|
|
html_bytes,
|
|
f"bag_{case_id}.html",
|
|
metadata,
|
|
args.rag_url,
|
|
client,
|
|
)
|
|
stats["uploaded"] += 1
|
|
print(f" Uploaded: {result.get('chunks_count', '?')} chunks, doc_id={result.get('document_id', '?')}")
|
|
except Exception as e:
|
|
print(f" ERROR uploading: {e}")
|
|
stats["errors"] += 1
|
|
|
|
time.sleep(1) # Rate limiting
|
|
|
|
print(f"\n=== Done: {stats['downloaded']} downloaded, {stats['uploaded']} uploaded, {stats['errors']} errors ===")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|