This repository has been archived on 2026-02-15. You can view files and clone it. You cannot open issues or pull requests or push a commit.
Files
breakpilot-pwa/klausur-service/backend/country_metadata.py
BreakPilot Dev f927c0c205 feat(rag): Add DACH legal corpus ingestion (DE/AT/CH laws)
Add 29 new regulations (7 DE + 7 AT + 4 CH + 11 P2/P3) with country
metadata, legal corpus text excerpts, and updated RAG admin UI with
AT/CH type colors and labels. Fix module path in deploy script.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-11 09:24:33 +01:00

205 lines
9.2 KiB
Python

"""
Country Metadata for EU/EWR Compliance Coverage.
Provides entry points (DPA URLs, legal portals, languages) for all EU/EWR countries.
DACH countries have full RAG coverage; others have entry-point metadata for system prompt injection.
Usage:
from country_metadata import COUNTRY_METADATA, get_country_context, DACH_COUNTRIES
"""
from typing import Dict, Optional
COUNTRY_METADATA: Dict[str, dict] = {
"EU": {
"country_name": "Europaeische Union",
"language_codes": ["de", "en", "fr"],
"legal_portal": {"name": "EUR-Lex", "url": "https://eur-lex.europa.eu/"},
"dpa": {"name": "European Data Protection Board (EDPB)", "url": "https://edpb.europa.eu/"},
"rag_coverage": "full",
"priority_modules": ["DSI", "VVT", "TOMs", "AGB", "Impressum", "Retention", "DSFA", "AVV", "Cookies"],
},
"DE": {
"country_name": "Deutschland",
"language_codes": ["de"],
"legal_portal": {"name": "Gesetze im Internet", "url": "https://www.gesetze-im-internet.de/"},
"dpa": {"name": "Datenschutzkonferenz (DSK)", "url": "https://www.datenschutzkonferenz-online.de/"},
"rag_coverage": "full",
"priority_modules": ["DSI", "VVT", "TOMs", "AGB", "Impressum", "Retention", "DSFA", "AVV", "Cookies"],
},
"AT": {
"country_name": "Oesterreich",
"language_codes": ["de"],
"legal_portal": {"name": "Rechtsinformationssystem (RIS)", "url": "https://www.ris.bka.gv.at/"},
"dpa": {"name": "Datenschutzbehoerde (DSB)", "url": "https://www.dsb.gv.at/"},
"rag_coverage": "full",
"priority_modules": ["DSI", "VVT", "TOMs", "AGB", "Impressum", "Retention", "DSFA", "AVV", "Cookies"],
},
"CH": {
"country_name": "Schweiz",
"language_codes": ["de", "fr", "it"],
"legal_portal": {"name": "Fedlex", "url": "https://www.fedlex.admin.ch/"},
"dpa": {"name": "EDOEB", "url": "https://www.edoeb.admin.ch/"},
"rag_coverage": "full",
"priority_modules": ["DSI", "VVT", "TOMs", "AGB", "Impressum", "Retention", "DSFA", "AVV", "Cookies"],
},
"FR": {
"country_name": "Frankreich",
"language_codes": ["fr"],
"legal_portal": {"name": "Legifrance", "url": "https://www.legifrance.gouv.fr/"},
"dpa": {"name": "CNIL", "url": "https://www.cnil.fr/"},
"rag_coverage": "entry_point",
"priority_modules": ["DSI", "VVT", "TOMs", "AGB", "Impressum", "Retention"],
},
"IT": {
"country_name": "Italien",
"language_codes": ["it"],
"legal_portal": {"name": "Normattiva", "url": "https://www.normattiva.it/"},
"dpa": {"name": "Garante per la protezione dei dati personali", "url": "https://www.garanteprivacy.it/"},
"rag_coverage": "entry_point",
"priority_modules": ["DSI", "VVT", "TOMs", "AGB", "Impressum", "Retention"],
},
"ES": {
"country_name": "Spanien",
"language_codes": ["es"],
"legal_portal": {"name": "BOE", "url": "https://www.boe.es/"},
"dpa": {"name": "AEPD", "url": "https://www.aepd.es/"},
"rag_coverage": "entry_point",
"priority_modules": ["DSI", "VVT", "TOMs", "AGB", "Impressum", "Retention"],
},
"NL": {
"country_name": "Niederlande",
"language_codes": ["nl"],
"legal_portal": {"name": "Overheid.nl", "url": "https://wetten.overheid.nl/"},
"dpa": {"name": "Autoriteit Persoonsgegevens", "url": "https://www.autoriteitpersoonsgegevens.nl/"},
"rag_coverage": "entry_point",
"priority_modules": ["DSI", "VVT", "TOMs", "AGB", "Impressum", "Retention"],
},
"BE": {
"country_name": "Belgien",
"language_codes": ["fr", "nl", "de"],
"legal_portal": {"name": "eJustice Belgium", "url": "https://www.ejustice.just.fgov.be/"},
"dpa": {"name": "Autorite de protection des donnees (APD)", "url": "https://www.autoriteprotectiondonnees.be/"},
"rag_coverage": "entry_point",
"priority_modules": ["DSI", "VVT", "TOMs", "AGB", "Impressum", "Retention"],
},
"LU": {
"country_name": "Luxemburg",
"language_codes": ["fr", "de"],
"legal_portal": {"name": "Legilux", "url": "https://legilux.public.lu/"},
"dpa": {"name": "CNPD", "url": "https://cnpd.public.lu/"},
"rag_coverage": "entry_point",
"priority_modules": ["DSI", "VVT", "TOMs", "AGB", "Impressum", "Retention"],
},
"IE": {
"country_name": "Irland",
"language_codes": ["en"],
"legal_portal": {"name": "Irish Statute Book", "url": "https://www.irishstatutebook.ie/"},
"dpa": {"name": "Data Protection Commission (DPC)", "url": "https://www.dataprotection.ie/"},
"rag_coverage": "entry_point",
"priority_modules": ["DSI", "VVT", "TOMs", "AGB", "Impressum", "Retention"],
},
"DK": {
"country_name": "Daenemark",
"language_codes": ["da"],
"legal_portal": {"name": "Retsinformation", "url": "https://www.retsinformation.dk/"},
"dpa": {"name": "Datatilsynet", "url": "https://www.datatilsynet.dk/"},
"rag_coverage": "entry_point",
"priority_modules": ["DSI", "VVT", "TOMs"],
},
"SE": {
"country_name": "Schweden",
"language_codes": ["sv"],
"legal_portal": {"name": "Riksdagen", "url": "https://www.riksdagen.se/"},
"dpa": {"name": "IMY (Integritetsskyddsmyndigheten)", "url": "https://www.imy.se/"},
"rag_coverage": "entry_point",
"priority_modules": ["DSI", "VVT", "TOMs"],
},
"FI": {
"country_name": "Finnland",
"language_codes": ["fi", "sv"],
"legal_portal": {"name": "Finlex", "url": "https://www.finlex.fi/"},
"dpa": {"name": "Tietosuojavaltuutetun toimisto", "url": "https://tietosuoja.fi/"},
"rag_coverage": "entry_point",
"priority_modules": ["DSI", "VVT", "TOMs"],
},
"NO": {
"country_name": "Norwegen",
"language_codes": ["no"],
"legal_portal": {"name": "Lovdata", "url": "https://lovdata.no/"},
"dpa": {"name": "Datatilsynet", "url": "https://www.datatilsynet.no/"},
"rag_coverage": "entry_point",
"priority_modules": ["DSI", "VVT", "TOMs"],
},
"IS": {
"country_name": "Island",
"language_codes": ["is"],
"legal_portal": {"name": "Althingi", "url": "https://www.althingi.is/"},
"dpa": {"name": "Personuvernd", "url": "https://www.personuvernd.is/"},
"rag_coverage": "entry_point",
"priority_modules": ["DSI", "VVT"],
},
"LI": {
"country_name": "Liechtenstein",
"language_codes": ["de"],
"legal_portal": {"name": "Gesetze.li", "url": "https://www.gesetze.li/"},
"dpa": {"name": "Datenschutzstelle", "url": "https://www.datenschutzstelle.li/"},
"rag_coverage": "entry_point",
"priority_modules": ["DSI", "VVT", "TOMs"],
},
"UK": {
"country_name": "Vereinigtes Koenigreich",
"language_codes": ["en"],
"legal_portal": {"name": "Legislation.gov.uk", "url": "https://www.legislation.gov.uk/"},
"dpa": {"name": "Information Commissioner's Office (ICO)", "url": "https://ico.org.uk/"},
"rag_coverage": "entry_point",
"priority_modules": ["DSI", "VVT", "TOMs", "AGB", "Impressum", "Retention"],
},
}
DACH_COUNTRIES = {"DE", "AT", "CH"}
def get_country_context(country_code: str) -> Optional[str]:
"""
Generate context string for system prompt injection.
For DACH countries: Full RAG search hint.
For other countries: Entry point with DPA and legal portal URLs.
"""
meta = COUNTRY_METADATA.get(country_code.upper())
if not meta:
return None
lines = [f"Land: {meta['country_name']} ({country_code.upper()})"]
lines.append(f"Sprachen: {', '.join(meta['language_codes'])}")
lines.append(f"Rechtsportal: {meta['legal_portal']['name']}{meta['legal_portal']['url']}")
lines.append(f"Datenschutzbehoerde: {meta['dpa']['name']}{meta['dpa']['url']}")
if meta["rag_coverage"] == "full":
lines.append(f"RAG-Abdeckung: VOLLSTAENDIG — Suche in bp_legal_corpus fuer nationale Gesetze verfuegbar.")
lines.append(f"Compliance-Module: {', '.join(meta['priority_modules'])}")
else:
lines.append(f"RAG-Abdeckung: Einstiegspunkt — Keine nationalen Gesetze im RAG. Verweise auf das Rechtsportal und die Datenschutzbehoerde.")
lines.append(f"Hinweis: Fuer detaillierte rechtliche Informationen zu {meta['country_name']} bitte das Rechtsportal oder die DPA konsultieren.")
return "\n".join(lines)
def get_all_countries_summary() -> str:
"""Generate a summary of all covered countries for overview display."""
lines = ["Laenderabdeckung EU/EWR:"]
lines.append("")
lines.append("VOLLSTAENDIG (DACH):")
for code in sorted(DACH_COUNTRIES):
meta = COUNTRY_METADATA[code]
lines.append(f" {code}: {meta['country_name']} — DPA: {meta['dpa']['name']}")
lines.append("")
lines.append("EINSTIEGSPUNKTE:")
for code, meta in sorted(COUNTRY_METADATA.items()):
if code not in DACH_COUNTRIES and code != "EU":
lines.append(f" {code}: {meta['country_name']} — DPA: {meta['dpa']['name']}")
return "\n".join(lines)