Files
breakpilot-lehrer/edu-search-service/policies/bundeslaender.yaml
Benjamin Boenisch 414e0f5ec0
All checks were successful
CI / go-lint (push) Has been skipped
CI / python-lint (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / test-go-school (push) Successful in 28s
CI / test-go-edu-search (push) Successful in 27s
CI / test-python-klausur (push) Successful in 1m45s
CI / test-python-agent-core (push) Successful in 16s
CI / test-nodejs-website (push) Successful in 21s
feat: edu-search-service migriert, voice-service/geo-service entfernt
- edu-search-service von breakpilot-pwa nach breakpilot-lehrer kopiert (ohne vendor)
- opensearch + edu-search-service in docker-compose.yml hinzugefuegt
- voice-service aus docker-compose.yml entfernt (jetzt in breakpilot-core)
- geo-service aus docker-compose.yml entfernt (nicht mehr benoetigt)
- CI/CD: edu-search-service zu Gitea Actions und Woodpecker hinzugefuegt
  (Go lint, test mit go mod download, build, SBOM)

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-15 18:36:38 +01:00

348 lines
12 KiB
YAML

# =============================================================================
# Source-Policy System - Initial Data Configuration
# =============================================================================
# This file contains the initial whitelist of allowed data sources for the
# edu-search-service. All sources must be official Open-Data portals or
# government sources under §5 UrhG (German Copyright Act).
#
# IMPORTANT:
# - Training with external data is FORBIDDEN (training: allowed: false)
# - All changes are logged in the audit trail
# - PII is blocked automatically
# =============================================================================
# =============================================================================
# FEDERAL / KMK (Bundesebene)
# =============================================================================
federal:
name: "KMK & Bundesebene"
sources:
# Kultusministerkonferenz
- domain: "kmk.org"
name: "Kultusministerkonferenz"
license: "§5 UrhG"
legal_basis: "Amtliche Werke (§5 UrhG)"
citation_template: "Quelle: KMK, {title}, {date}"
trust_boost: 0.95
# Deutscher Bildungsserver
- domain: "bildungsserver.de"
name: "Deutscher Bildungsserver"
license: "DL-DE-BY-2.0"
legal_basis: "Datenlizenz Deutschland"
citation_template: "Quelle: Deutscher Bildungsserver, {title}, {date}"
trust_boost: 0.90
# IQB (Institut zur Qualitaetsentwicklung im Bildungswesen)
- domain: "iqb.hu-berlin.de"
name: "IQB Bildungstrends"
license: "§5 UrhG"
legal_basis: "Amtliche Werke (§5 UrhG)"
citation_template: "Quelle: IQB, {title}, {date}"
trust_boost: 0.90
# BMBF (Bundesministerium fuer Bildung und Forschung)
- domain: "bmbf.de"
name: "Bundesministerium fuer Bildung und Forschung"
license: "§5 UrhG"
legal_basis: "Amtliche Werke (§5 UrhG)"
citation_template: "Quelle: BMBF, {title}, {date}"
trust_boost: 0.95
# =============================================================================
# NIEDERSACHSEN (NI)
# =============================================================================
NI:
name: "Niedersachsen"
sources:
- domain: "nibis.de"
name: "NiBiS Bildungsserver"
license: "DL-DE-BY-2.0"
legal_basis: "Datenlizenz Deutschland"
citation_template: "Quelle: NiBiS, {title}, {date}"
trust_boost: 0.85
- domain: "mk.niedersachsen.de"
name: "Kultusministerium Niedersachsen"
license: "§5 UrhG"
legal_basis: "Amtliche Werke (§5 UrhG)"
citation_template: "Quelle: MK Niedersachsen, {title}, {date}"
trust_boost: 0.90
- domain: "cuvo.nibis.de"
name: "Kerncurricula Niedersachsen"
license: "DL-DE-BY-2.0"
legal_basis: "Datenlizenz Deutschland"
citation_template: "Quelle: Kerncurriculum Niedersachsen, {title}, {date}"
trust_boost: 0.90
- domain: "nline.nibis.de"
name: "NiBiS Online-Materialien"
license: "DL-DE-BY-2.0"
legal_basis: "Datenlizenz Deutschland"
citation_template: "Quelle: NiBiS, {title}, {date}"
trust_boost: 0.80
# =============================================================================
# BAYERN (BY)
# =============================================================================
BY:
name: "Bayern"
sources:
- domain: "km.bayern.de"
name: "Bayerisches Kultusministerium"
license: "§5 UrhG"
legal_basis: "Amtliche Werke (§5 UrhG)"
citation_template: "Quelle: StMUK Bayern, {title}, {date}"
trust_boost: 0.90
- domain: "isb.bayern.de"
name: "ISB Bayern"
license: "DL-DE-BY-2.0"
legal_basis: "Datenlizenz Deutschland"
citation_template: "Quelle: ISB Bayern, {title}, {date}"
trust_boost: 0.90
- domain: "lehrplanplus.bayern.de"
name: "LehrplanPLUS"
license: "DL-DE-BY-2.0"
legal_basis: "Datenlizenz Deutschland"
citation_template: "Quelle: LehrplanPLUS Bayern, {title}, {date}"
trust_boost: 0.90
- domain: "mebis.bayern.de"
name: "mebis Landesmedienzentrum"
license: "CC-BY-SA"
legal_basis: "Creative Commons"
citation_template: "Quelle: mebis Bayern, {title}, {date}"
trust_boost: 0.75
# =============================================================================
# BADEN-WUERTTEMBERG (BW)
# =============================================================================
BW:
name: "Baden-Wuerttemberg"
sources:
- domain: "km-bw.de"
name: "Kultusministerium Baden-Wuerttemberg"
license: "§5 UrhG"
legal_basis: "Amtliche Werke (§5 UrhG)"
citation_template: "Quelle: KM Baden-Wuerttemberg, {title}, {date}"
trust_boost: 0.90
- domain: "bildungsplaene-bw.de"
name: "Bildungsplaene BW"
license: "DL-DE-BY-2.0"
legal_basis: "Datenlizenz Deutschland"
citation_template: "Quelle: Bildungsplan BW, {title}, {date}"
trust_boost: 0.90
- domain: "schule-bw.de"
name: "Landesbildungsserver BW"
license: "DL-DE-BY-2.0"
legal_basis: "Datenlizenz Deutschland"
citation_template: "Quelle: Landesbildungsserver BW, {title}, {date}"
trust_boost: 0.85
# =============================================================================
# NORDRHEIN-WESTFALEN (NW)
# =============================================================================
NW:
name: "Nordrhein-Westfalen"
sources:
- domain: "schulministerium.nrw"
name: "Schulministerium NRW"
license: "§5 UrhG"
legal_basis: "Amtliche Werke (§5 UrhG)"
citation_template: "Quelle: MSB NRW, {title}, {date}"
trust_boost: 0.90
- domain: "schulentwicklung.nrw.de"
name: "QUA-LiS NRW"
license: "DL-DE-BY-2.0"
legal_basis: "Datenlizenz Deutschland"
citation_template: "Quelle: QUA-LiS NRW, {title}, {date}"
trust_boost: 0.85
- domain: "learn-line.nrw.de"
name: "EDMOND NRW"
license: "CC-BY-SA"
legal_basis: "Creative Commons"
citation_template: "Quelle: EDMOND NRW, {title}, {date}"
trust_boost: 0.75
# =============================================================================
# HESSEN (HE)
# =============================================================================
HE:
name: "Hessen"
sources:
- domain: "kultusministerium.hessen.de"
name: "Kultusministerium Hessen"
license: "§5 UrhG"
legal_basis: "Amtliche Werke (§5 UrhG)"
citation_template: "Quelle: HKM Hessen, {title}, {date}"
trust_boost: 0.90
- domain: "lsa.hessen.de"
name: "Landesschulamt Hessen"
license: "§5 UrhG"
legal_basis: "Amtliche Werke (§5 UrhG)"
citation_template: "Quelle: LSA Hessen, {title}, {date}"
trust_boost: 0.85
- domain: "bildung.hessen.de"
name: "Bildungsserver Hessen"
license: "DL-DE-BY-2.0"
legal_basis: "Datenlizenz Deutschland"
citation_template: "Quelle: Bildungsserver Hessen, {title}, {date}"
trust_boost: 0.85
# =============================================================================
# SACHSEN (SN)
# =============================================================================
SN:
name: "Sachsen"
sources:
- domain: "smk.sachsen.de"
name: "Kultusministerium Sachsen"
license: "§5 UrhG"
legal_basis: "Amtliche Werke (§5 UrhG)"
citation_template: "Quelle: SMK Sachsen, {title}, {date}"
trust_boost: 0.90
- domain: "lehrplaene.sachsen.de"
name: "Lehrplaene Sachsen"
license: "DL-DE-BY-2.0"
legal_basis: "Datenlizenz Deutschland"
citation_template: "Quelle: Lehrplan Sachsen, {title}, {date}"
trust_boost: 0.90
- domain: "sbi.smk.sachsen.de"
name: "SBI Sachsen"
license: "DL-DE-BY-2.0"
legal_basis: "Datenlizenz Deutschland"
citation_template: "Quelle: SBI Sachsen, {title}, {date}"
trust_boost: 0.85
# =============================================================================
# BERLIN (BE)
# =============================================================================
BE:
name: "Berlin"
sources:
- domain: "berlin.de/sen/bildung"
name: "Senatsverwaltung fuer Bildung Berlin"
license: "§5 UrhG"
legal_basis: "Amtliche Werke (§5 UrhG)"
citation_template: "Quelle: SenBJF Berlin, {title}, {date}"
trust_boost: 0.90
- domain: "bildungsserver.berlin-brandenburg.de"
name: "Bildungsserver Berlin-Brandenburg"
license: "DL-DE-BY-2.0"
legal_basis: "Datenlizenz Deutschland"
citation_template: "Quelle: Bildungsserver Berlin-Brandenburg, {title}, {date}"
trust_boost: 0.85
# =============================================================================
# HAMBURG (HH)
# =============================================================================
HH:
name: "Hamburg"
sources:
- domain: "hamburg.de/bsb"
name: "Schulbehoerde Hamburg"
license: "§5 UrhG"
legal_basis: "Amtliche Werke (§5 UrhG)"
citation_template: "Quelle: BSB Hamburg, {title}, {date}"
trust_boost: 0.90
- domain: "li.hamburg.de"
name: "Landesinstitut Hamburg"
license: "DL-DE-BY-2.0"
legal_basis: "Datenlizenz Deutschland"
citation_template: "Quelle: LI Hamburg, {title}, {date}"
trust_boost: 0.85
# =============================================================================
# DEFAULT OPERATIONS MATRIX
# =============================================================================
# IMPORTANT: Training is ALWAYS forbidden!
default_operations:
lookup:
allowed: true
requires_citation: true
rag:
allowed: true
requires_citation: true
training:
allowed: false # VERBOTEN - Training with external data is NOT allowed
export:
allowed: true
requires_citation: true
# =============================================================================
# PII DETECTION RULES
# =============================================================================
pii_rules:
# Email Addresses
- name: "Email Addresses"
type: "regex"
pattern: "[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\\.[a-zA-Z]{2,}"
severity: "block"
# German Phone Numbers
- name: "German Phone Numbers"
type: "regex"
pattern: "(?:\\+49|0)[\\s.-]?\\d{2,4}[\\s.-]?\\d{3,}[\\s.-]?\\d{2,}"
severity: "block"
# German Mobile Numbers
- name: "German Mobile Numbers"
type: "regex"
pattern: "(?:\\+49|0)1[567]\\d[\\s.-]?\\d{3,}[\\s.-]?\\d{2,}"
severity: "block"
# IBAN (German)
- name: "German IBAN"
type: "regex"
pattern: "DE\\d{2}\\s?\\d{4}\\s?\\d{4}\\s?\\d{4}\\s?\\d{4}\\s?\\d{2}"
severity: "block"
# German Tax ID (Steuer-ID)
- name: "German Tax ID"
type: "regex"
pattern: "\\d{2}\\s?\\d{3}\\s?\\d{3}\\s?\\d{3}"
severity: "block"
# Credit Card Numbers
- name: "Credit Card Numbers"
type: "regex"
pattern: "(?:\\d{4}[\\s.-]?){3}\\d{4}"
severity: "block"
# German Address Pattern (Postal Code + City)
- name: "German Address Pattern"
type: "regex"
pattern: "\\d{5}\\s+[A-ZÄÖÜ][a-zäöüß]+"
severity: "warn"
# Date of Birth Patterns
- name: "Date of Birth"
type: "regex"
pattern: "(?:geboren|geb\\.|Geburtsdatum|DoB)[\\s:]*\\d{1,2}[\\./]\\d{1,2}[\\./]\\d{2,4}"
severity: "warn"
# Personal Names with Titles
- name: "Personal Names with Titles"
type: "regex"
pattern: "(?:Herr|Frau|Dr\\.|Prof\\.)\\s+[A-ZÄÖÜ][a-zäöüß]+\\s+[A-ZÄÖÜ][a-zäöüß]+"
severity: "warn"
# German Health Insurance Number
- name: "Health Insurance Number"
type: "regex"
pattern: "[A-Z]\\d{9}"
severity: "block"