""" NiBiS Filename Parsers Parses old and new naming conventions for NiBiS Abitur documents. """ import re from typing import Dict, Optional # Niveau-Mapping NIVEAU_MAPPING = { "ea": "eA", # erhoehtes Anforderungsniveau "ga": "gA", # grundlegendes Anforderungsniveau "neuga": "gA (neu einsetzend)", "neuea": "eA (neu einsetzend)", } def parse_filename_old_format(filename: str, file_path) -> Optional[Dict]: """ Parst alte Namenskonvention (2016, 2017): - {Jahr}{Fach}{Niveau}Lehrer/{Jahr}{Fach}{Niveau}A{Nr}L.pdf - Beispiel: 2016DeutschEALehrer/2016DeutschEAA1L.pdf """ # Pattern fuer Lehrer-Dateien pattern = r"(\d{4})([A-Za-z\u00e4\u00f6\u00fc\u00c4\u00d6\u00dc]+)(EA|GA|NeuGA|NeuEA)(?:Lehrer)?.*?(?:A(\d+)|Aufg(\d+))?L?\.pdf$" match = re.search(pattern, filename, re.IGNORECASE) if not match: return None year = int(match.group(1)) subject_raw = match.group(2).lower() niveau = match.group(3).upper() task_num = match.group(4) or match.group(5) # Pruefe ob es ein Lehrer-Dokument ist (EWH) is_ewh = "lehrer" in str(file_path).lower() or filename.endswith("L.pdf") # Extrahiere Variante (Tech, Wirt, CAS, GTR, etc.) variant = None variant_patterns = ["Tech", "Wirt", "CAS", "GTR", "Pflicht", "BG", "mitExp", "ohneExp"] for v in variant_patterns: if v.lower() in str(file_path).lower(): variant = v break return { "year": year, "subject": subject_raw, "niveau": NIVEAU_MAPPING.get(niveau.lower(), niveau), "task_number": int(task_num) if task_num else None, "doc_type": "EWH" if is_ewh else "Aufgabe", "variant": variant, } def parse_filename_new_format(filename: str, file_path) -> Optional[Dict]: """ Parst neue Namenskonvention (2024, 2025): - {Jahr}_{Fach}_{niveau}_{Nr}_EWH.pdf - Beispiel: 2025_Deutsch_eA_I_EWH.pdf """ # Pattern fuer neue Dateien pattern = r"(\d{4})_([A-Za-z\u00e4\u00f6\u00fc\u00c4\u00d6\u00dc]+)(?:BG)?_(eA|gA)(?:_([IVX\d]+))?(?:_(.+))?\.pdf$" match = re.search(pattern, filename, re.IGNORECASE) if not match: return None year = int(match.group(1)) subject_raw = match.group(2).lower() niveau = match.group(3) task_id = match.group(4) suffix = match.group(5) or "" # Task-Nummer aus roemischen Zahlen task_num = None if task_id: roman_map = {"I": 1, "II": 2, "III": 3, "IV": 4, "V": 5} task_num = roman_map.get(task_id) or (int(task_id) if task_id.isdigit() else None) # Dokumenttyp is_ewh = "EWH" in filename or "ewh" in filename.lower() # Spezielle Dokumenttypen doc_type = "EWH" if is_ewh else "Aufgabe" if "Material" in suffix: doc_type = "Material" elif "GBU" in suffix: doc_type = "GBU" elif "Ergebnis" in suffix: doc_type = "Ergebnis" elif "Bewertungsbogen" in suffix: doc_type = "Bewertungsbogen" elif "HV" in suffix: doc_type = "Hoerverstehen" elif "ME" in suffix: doc_type = "Mediation" # BG Variante variant = "BG" if "BG" in filename else None if "mitExp" in str(file_path): variant = "mitExp" return { "year": year, "subject": subject_raw, "niveau": NIVEAU_MAPPING.get(niveau.lower(), niveau), "task_number": task_num, "doc_type": doc_type, "variant": variant, }