"""Ingest BAG (Bundesarbeitsgericht) court decisions into RAG. Downloads PDFs from bundesarbeitsgericht.de and uploads them to the bp_compliance_datenschutz Qdrant collection via the RAG-Service API. These decisions are curated for IT/KI-Mitbestimmung relevance (§87 BetrVG). Usage: python scripts/ingest_bag_urteile.py [--rag-url https://macmini:8097] [--dry-run] """ import argparse import json import os import re import sys import tempfile import time import httpx # --------------------------------------------------------------------------- # Curated BAG decisions for IT/AI works council co-determination # --------------------------------------------------------------------------- BAG_DECISIONS = [ # --- M365 / Copilot / Standardsoftware --- { "url": "https://www.bundesarbeitsgericht.de/entscheidung/1-abr-20-21/", "case_number": "1 ABR 20/21", "date": "2022-03-08", "subject": "Microsoft Office 365 — Mitbestimmung", "keywords": ["Microsoft 365", "Standardsoftware", "Ueberwachung", "§87 BetrVG"], }, { "url": "https://www.bundesarbeitsgericht.de/entscheidung/1-abn-36-18/", "case_number": "1 ABN 36/18", "date": "2018-10-23", "subject": "Excel / Standardsoftware — keine Geringfuegigkeitsschwelle", "keywords": ["Excel", "Standardsoftware", "Geringfuegigkeit", "§87 BetrVG"], }, { "url": "https://www.bundesarbeitsgericht.de/entscheidung/1-abr-45-11/", "case_number": "1 ABR 45/11", "date": "2012-09-25", "subject": "SAP ERP im Personalwesen", "keywords": ["SAP", "ERP", "Personalwesen", "Verhaltenskontrolle", "§87 BetrVG"], }, { "url": "https://www.bundesarbeitsgericht.de/entscheidung/1-abr-31-19/", "case_number": "1 ABR 31/19", "date": "2021-01-27", "subject": "E-Mail-Kommunikationssoftware — Mitbestimmung", "keywords": ["E-Mail", "Kommunikation", "Software", "§87 BetrVG"], }, { "url": "https://www.bundesarbeitsgericht.de/entscheidung/1-abr-13-17/", "case_number": "1 ABR 13/17", "date": "2019-07-09", "subject": "IT-System fuer Mitarbeiterbefragung", "keywords": ["Mitarbeiterbefragung", "Feedback", "technische Einrichtung", "§87 BetrVG"], }, { "url": "https://www.bundesarbeitsgericht.de/entscheidung/1-abr-16-23/", "case_number": "1 ABR 16/23", "date": "2024-07-16", "subject": "Headset-System — Geraetenutzungsdaten", "keywords": ["Headset", "Geraetenutzung", "Ueberwachung", "§87 BetrVG"], }, # --- Ueberwachung, Social, Drittplattformen --- { "url": "https://www.bundesarbeitsgericht.de/entscheidung/1-abr-7-15/", "case_number": "1 ABR 7/15", "date": "2016-12-13", "subject": "Facebook-Seite — indirekte Leistungsueberwachung", "keywords": ["Facebook", "Social Media", "Besucherbeitraege", "Ueberwachung", "§87 BetrVG"], }, { "url": "https://www.bundesarbeitsgericht.de/entscheidung/1-abr-43-12/", "case_number": "1 ABR 43/12", "date": "2013-12-10", "subject": "Google Maps — indirekte Ueberwachung / Definition Ueberwachung", "keywords": ["Google Maps", "Routenplaner", "indirekte Ueberwachung", "Definition", "§87 BetrVG"], }, { "url": "https://www.bundesarbeitsgericht.de/entscheidung/1-abr-68-13/", "case_number": "1 ABR 68/13", "date": "2015-07-21", "subject": "Ueberwachung durch technische Einrichtung eines Dritten (SaaS/Cloud)", "keywords": ["Drittsystem", "SaaS", "Cloud", "Ueberwachung", "§87 BetrVG"], }, # --- Video, Belastung, Leistungskennzahlen --- { "url": "https://www.bundesarbeitsgericht.de/entscheidung/1-abr-78-11/", "case_number": "1 ABR 78/11", "date": "2012-12-11", "subject": "Videoueberwachung — Grundsatzentscheidung", "keywords": ["Videoueberwachung", "Kamera", "Arbeitsplatz", "§87 BetrVG"], }, { "url": "https://www.bundesarbeitsgericht.de/entscheidung/1-abr-46-15/", "case_number": "1 ABR 46/15", "date": "2017-04-25", "subject": "Belastungsstatistik — dauerhafte Kennzahlenueberwachung", "keywords": ["Belastungsstatistik", "Kennzahlen", "Analytics", "Persoenlichkeitsrecht", "§87 BetrVG"], }, # --- Negative / abgrenzende Faelle --- { "url": "https://www.bundesarbeitsgericht.de/entscheidung/1-abr-32-16/", "case_number": "1 ABR 32/16", "date": "2017-12-19", "subject": "Anti-Terror-Listen — keine Mitbestimmung", "keywords": ["Anti-Terror", "Sanktionsliste", "keine Mitbestimmung", "Abgrenzung", "§87 BetrVG"], }, { "url": "https://www.bundesarbeitsgericht.de/entscheidung/1-abr-22-21/", "case_number": "1 ABR 22/21", "date": "2022-09-13", "subject": "Elektronische Arbeitszeiterfassung — Initiativrecht", "keywords": ["Arbeitszeiterfassung", "Initiativrecht", "digitale Systeme", "§87 BetrVG"], }, # --- Historische Grundsatzentscheidungen --- { "url": "https://www.bundesarbeitsgericht.de/entscheidung/1-abr-43-81/", "case_number": "1 ABR 43/81", "date": "1983-12-06", "subject": "Grundsatz technische Ueberwachung — Eignung genuegt", "keywords": ["Grundsatz", "Eignung", "technische Einrichtung", "§87 BetrVG"], }, { "url": "https://www.bundesarbeitsgericht.de/entscheidung/1-abr-23-82/", "case_number": "1 ABR 23/82", "date": "1984-09-14", "subject": "Erste Grundlinie IT-Systeme", "keywords": ["IT-System", "Grundlinie", "technische Einrichtung", "§87 BetrVG"], }, # --- E-Mail / Internet --- { "url": "https://www.bundesarbeitsgericht.de/entscheidung/1-abr-46-10/", "case_number": "1 ABR 46/10", "date": "2012-02-07", "subject": "Internet- und E-Mail-Nutzung — Kommunikationsdaten", "keywords": ["Internet", "E-Mail", "Kommunikationsdaten", "Auswertung", "§87 BetrVG"], }, # --- HR / Bewertungssysteme --- { "url": "https://www.bundesarbeitsgericht.de/entscheidung/1-abr-40-07/", "case_number": "1 ABR 40/07", "date": "2008-07-22", "subject": "Beurteilungssysteme — §94/§95 BetrVG", "keywords": ["Beurteilung", "Bewertungssystem", "HR", "§94 BetrVG", "§95 BetrVG"], }, { "url": "https://www.bundesarbeitsgericht.de/entscheidung/1-abr-16-07/", "case_number": "1 ABR 16/07", "date": "2008-03-18", "subject": "Personalfrageboegen — Bewertung", "keywords": ["Personalfragebogen", "Bewertung", "HR-Tools", "§94 BetrVG"], }, # --- Video / physische Ueberwachung --- { "url": "https://www.bundesarbeitsgericht.de/entscheidung/1-abr-21-03/", "case_number": "1 ABR 21/03", "date": "2004-06-29", "subject": "Videoueberwachung Arbeitsplatz", "keywords": ["Video", "Kamera", "Arbeitsplatz", "Ueberwachung", "§87 BetrVG"], }, # --- Zustaendigkeit --- { "url": "https://www.bundesarbeitsgericht.de/entscheidung/1-abr-2-05/", "case_number": "1 ABR 2/05", "date": "2006-05-03", "subject": "Zustaendigkeit Betriebsrat bei konzernweiten Tools", "keywords": ["Zustaendigkeit", "Konzern", "Gesamtbetriebsrat", "§87 BetrVG"], }, { "url": "https://www.bundesarbeitsgericht.de/entscheidung/1-abr-58-04/", "case_number": "1 ABR 58/04", "date": "2006-03-28", "subject": "Mitbestimmung bei Einfuehrung technischer Systeme", "keywords": ["Systemeinführung", "technische Systeme", "Mitbestimmung", "§87 BetrVG"], }, ] def normalize_case_number(case_number: str) -> str: """Normalize case number for use as regulation_id.""" return re.sub(r"[^a-z0-9]", "_", case_number.lower()).strip("_") def download_decision(url: str, client: httpx.Client) -> bytes: """Download a BAG decision page as HTML.""" resp = client.get(url, follow_redirects=True) resp.raise_for_status() return resp.content def upload_to_rag( file_bytes: bytes, filename: str, metadata: dict, rag_url: str, client: httpx.Client, ) -> dict: """Upload a document to the RAG service.""" files = {"file": (filename, file_bytes, "text/html")} data = { "collection": "bp_compliance_datenschutz", "data_type": "compliance_datenschutz", "bundesland": "bund", "use_case": "court_decision", "year": metadata.get("date", "2024")[:4], "chunk_strategy": "legal", "chunk_size": "512", "chunk_overlap": "50", "metadata_json": json.dumps(metadata), } resp = client.post(f"{rag_url}/api/v1/documents/upload", files=files, data=data) resp.raise_for_status() return resp.json() def main(): parser = argparse.ArgumentParser(description="Ingest BAG court decisions into RAG") parser.add_argument("--rag-url", default="https://macmini:8097", help="RAG service URL") parser.add_argument("--dry-run", action="store_true", help="Download only, don't upload") args = parser.parse_args() client = httpx.Client(timeout=60, verify=False) stats = {"downloaded": 0, "uploaded": 0, "errors": 0} for decision in BAG_DECISIONS: case_id = normalize_case_number(decision["case_number"]) print(f"\n--- {decision['case_number']}: {decision['subject']} ---") # Download try: html_bytes = download_decision(decision["url"], client) stats["downloaded"] += 1 print(f" Downloaded: {len(html_bytes)} bytes") except Exception as e: print(f" ERROR downloading: {e}") stats["errors"] += 1 continue if args.dry_run: continue # Upload metadata = { "regulation_id": f"bag_{case_id}", "regulation_name_de": f"BAG {decision['case_number']} — {decision['subject']}", "category": "arbeitsrecht", "source": "bundesarbeitsgericht.de", "doc_type": "court_decision", "license": "public_domain_§5_UrhG", "court": "BAG", "case_number": decision["case_number"], "date": decision["date"], "subject_matter": decision["subject"], "keywords": decision["keywords"], } try: result = upload_to_rag( html_bytes, f"bag_{case_id}.html", metadata, args.rag_url, client, ) stats["uploaded"] += 1 print(f" Uploaded: {result.get('chunks_count', '?')} chunks, doc_id={result.get('document_id', '?')}") except Exception as e: print(f" ERROR uploading: {e}") stats["errors"] += 1 time.sleep(1) # Rate limiting print(f"\n=== Done: {stats['downloaded']} downloaded, {stats['uploaded']} uploaded, {stats['errors']} errors ===") if __name__ == "__main__": main()