feat(rag): Migrate national DPA laws from bp_dsfa_corpus to bp_legal_corpus
Move 23 sources (18 national data protection laws + 5 EDPB guidelines/SCC) from bp_dsfa_corpus to bp_legal_corpus with vector preservation. Extend REGULATIONS array with national_law and eu_guideline types. Mark migrated sources in dsfa_corpus_ingestion.py to prevent re-ingestion. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -14,6 +14,7 @@ import { AIModuleSidebarResponsive } from '@/components/ai/AIModuleSidebar'
|
||||
|
||||
// API uses local proxy route to klausur-service
|
||||
const API_PROXY = '/api/legal-corpus'
|
||||
const DSFA_API_PROXY = '/api/dsfa-corpus'
|
||||
|
||||
// Types
|
||||
interface RegulationStatus {
|
||||
@@ -45,6 +46,32 @@ interface SearchResult {
|
||||
score: number
|
||||
}
|
||||
|
||||
// DSFA source type (from /api/dsfa-corpus)
|
||||
interface DsfaSource {
|
||||
source_code: string
|
||||
name: string
|
||||
full_name?: string
|
||||
organization?: string
|
||||
source_url?: string
|
||||
license_code: string
|
||||
attribution_text: string
|
||||
document_type: string
|
||||
language: string
|
||||
chunk_count?: number
|
||||
}
|
||||
|
||||
interface DsfaCorpusStatus {
|
||||
qdrant_collection: string
|
||||
total_sources: number
|
||||
total_documents: number
|
||||
total_chunks: number
|
||||
qdrant_points_count: number
|
||||
qdrant_status: string
|
||||
}
|
||||
|
||||
// RAG category filter for Regulations tab
|
||||
type RegulationCategory = 'regulations' | 'dsfa' | 'nibis' | 'templates'
|
||||
|
||||
// Tab definitions
|
||||
type TabId = 'overview' | 'regulations' | 'map' | 'search' | 'data' | 'ingestion' | 'pipeline'
|
||||
|
||||
@@ -366,13 +393,331 @@ const REGULATIONS = [
|
||||
keyTopics: ['Patientenakte (MyHealth@EU)', 'Sekundaernutzung', 'Datenzugangsorgane', 'Gesundheitsdatenstandards', 'Forschungszugang'],
|
||||
effectiveDate: '2025 (gestaffelt bis 2029)'
|
||||
},
|
||||
// National Data Protection Laws (migrated from bp_dsfa_corpus)
|
||||
{
|
||||
code: 'AT_DSG',
|
||||
name: 'DSG Oesterreich',
|
||||
fullName: 'Datenschutzgesetz Oesterreich (DSG)',
|
||||
type: 'national_law',
|
||||
expected: 50,
|
||||
description: 'Oesterreichisches Datenschutzgesetz zur Ergaenzung der DSGVO. Regelt nationale Besonderheiten wie Bildverarbeitung, Datenschutzbehoerde und Strafbestimmungen.',
|
||||
relevantFor: ['Unternehmen in Oesterreich', 'DACH-Unternehmen', 'Auftragsverarbeiter'],
|
||||
keyTopics: ['Nationale DSGVO-Ergaenzung', 'Bildverarbeitung', 'Datenschutzbehoerde', 'Strafbestimmungen'],
|
||||
effectiveDate: '25. Mai 2018'
|
||||
},
|
||||
{
|
||||
code: 'BDSG_FULL',
|
||||
name: 'BDSG',
|
||||
fullName: 'Bundesdatenschutzgesetz (BDSG) - Volltext',
|
||||
type: 'de_law',
|
||||
expected: 9,
|
||||
description: 'Deutsches Bundesdatenschutzgesetz als nationale Ergaenzung zur DSGVO. Regelt Beschaeftigtendatenschutz, Videoueberachung, Scoring und Datenschutzbeauftragte.',
|
||||
relevantFor: ['Deutsche Unternehmen', 'Arbeitgeber', 'Auskunfteien', 'Oeffentliche Stellen'],
|
||||
keyTopics: ['Beschaeftigtendatenschutz', 'Videoueberachung', 'Scoring', 'Datenschutzbeauftragter'],
|
||||
effectiveDate: '25. Mai 2018'
|
||||
},
|
||||
{
|
||||
code: 'CH_DSG',
|
||||
name: 'DSG Schweiz',
|
||||
fullName: 'Datenschutzgesetz Schweiz (revDSG 2023)',
|
||||
type: 'national_law',
|
||||
expected: 2,
|
||||
description: 'Revidiertes Schweizer Datenschutzgesetz mit DSGVO-nahen Anforderungen. Gilt fuer Schweizer Unternehmen und solche, die Schweizer Daten verarbeiten.',
|
||||
relevantFor: ['Schweizer Unternehmen', 'DACH-Unternehmen', 'Internationale Dienstleister'],
|
||||
keyTopics: ['Datenschutz-Folgenabschaetzung', 'Meldepflichten', 'Profiling', 'Strafbestimmungen'],
|
||||
effectiveDate: '1. September 2023'
|
||||
},
|
||||
{
|
||||
code: 'LI_DSG',
|
||||
name: 'DSG Liechtenstein',
|
||||
fullName: 'Datenschutzgesetz Liechtenstein',
|
||||
type: 'national_law',
|
||||
expected: 1,
|
||||
description: 'Liechtensteinisches Datenschutzgesetz als EWR-Umsetzung der DSGVO.',
|
||||
relevantFor: ['Unternehmen in Liechtenstein', 'EWR-Dienstleister'],
|
||||
keyTopics: ['EWR-Datenschutz', 'Nationale Ergaenzung', 'Datenschutzstelle'],
|
||||
effectiveDate: '2018'
|
||||
},
|
||||
{
|
||||
code: 'BE_DPA_LAW',
|
||||
name: 'Datenschutzgesetz Belgien',
|
||||
fullName: 'Loi relative a la protection des donnees (Belgien)',
|
||||
type: 'national_law',
|
||||
expected: 153,
|
||||
description: 'Belgisches Datenschutzgesetz zur nationalen Umsetzung der DSGVO. Regelt die Autorite de protection des donnees (APD).',
|
||||
relevantFor: ['Unternehmen in Belgien', 'EU-Hauptsitz Bruessel'],
|
||||
keyTopics: ['APD', 'Nationale DSGVO-Umsetzung', 'Strafbestimmungen', 'Sektorale Regeln'],
|
||||
effectiveDate: '2018'
|
||||
},
|
||||
{
|
||||
code: 'NL_UAVG',
|
||||
name: 'UAVG Niederlande',
|
||||
fullName: 'Uitvoeringswet AVG (UAVG) Niederlande',
|
||||
type: 'national_law',
|
||||
expected: 138,
|
||||
description: 'Niederlaendisches Ausfuehrungsgesetz zur DSGVO. Regelt nationale Besonderheiten wie BSN-Verarbeitung und Gesundheitsdaten.',
|
||||
relevantFor: ['Unternehmen in den Niederlanden', 'Gesundheitssektor NL'],
|
||||
keyTopics: ['BSN-Nummer', 'Gesundheitsdaten', 'Autoriteit Persoonsgegevens', 'Nationale Ergaenzung'],
|
||||
effectiveDate: '25. Mai 2018'
|
||||
},
|
||||
{
|
||||
code: 'FR_CNIL_GUIDE',
|
||||
name: 'CNIL Guide RGPD',
|
||||
fullName: 'Guide pratique RGPD (CNIL Frankreich)',
|
||||
type: 'national_law',
|
||||
expected: 14,
|
||||
description: 'Praktischer DSGVO-Leitfaden der franzoesischen Datenschutzbehoerde CNIL. Wichtig fuer alle Unternehmen mit franzoesischen Kunden.',
|
||||
relevantFor: ['Unternehmen in Frankreich', 'Franzoesischsprachige Maerkte'],
|
||||
keyTopics: ['CNIL-Guidance', 'Cookies', 'Einwilligung', 'Sanktionen'],
|
||||
effectiveDate: '2018'
|
||||
},
|
||||
{
|
||||
code: 'ES_LOPDGDD',
|
||||
name: 'LOPDGDD Spanien',
|
||||
fullName: 'Ley Organica de Proteccion de Datos (LOPDGDD) Spanien',
|
||||
type: 'national_law',
|
||||
expected: 154,
|
||||
description: 'Spanisches organisches Datenschutzgesetz mit Garantien digitaler Rechte. Umfassende DSGVO-Umsetzung mit digitalen Grundrechten.',
|
||||
relevantFor: ['Unternehmen in Spanien', 'Spanischsprachige Maerkte'],
|
||||
keyTopics: ['Digitale Rechte', 'AEPD', 'Recht auf Vergessenwerden', 'Beschaeftigtendatenschutz'],
|
||||
effectiveDate: '7. Dezember 2018'
|
||||
},
|
||||
{
|
||||
code: 'IT_CODICE_PRIVACY',
|
||||
name: 'Codice Privacy Italien',
|
||||
fullName: 'Codice in materia di protezione dei dati personali (Italien)',
|
||||
type: 'national_law',
|
||||
expected: 3,
|
||||
description: 'Italienisches Datenschutzgesetzbuch, aktualisiert gemaess DSGVO. Umfassende nationale Regelung durch den Garante.',
|
||||
relevantFor: ['Unternehmen in Italien', 'Garante-regulierte Sektoren'],
|
||||
keyTopics: ['Garante Privacy', 'Codice Privacy', 'Gesundheitsdaten', 'Strafrecht'],
|
||||
effectiveDate: '2018 (aktualisiert)'
|
||||
},
|
||||
{
|
||||
code: 'IE_DPA_2018',
|
||||
name: 'DPA 2018 Ireland',
|
||||
fullName: 'Data Protection Act 2018 (Ireland)',
|
||||
type: 'national_law',
|
||||
expected: 28,
|
||||
description: 'Irisches Datenschutzgesetz. Besonders relevant da viele Tech-Konzerne (Google, Meta, Apple) ihren EU-Hauptsitz in Irland haben.',
|
||||
relevantFor: ['Tech-Konzerne mit EU-Sitz Irland', 'Irische Unternehmen', 'DPC-reguliert'],
|
||||
keyTopics: ['DPC Ireland', 'Big Tech Aufsicht', 'Nationale Ergaenzung', 'Strafbestimmungen'],
|
||||
effectiveDate: '24. Mai 2018'
|
||||
},
|
||||
{
|
||||
code: 'UK_DPA_2018',
|
||||
name: 'DPA 2018 UK',
|
||||
fullName: 'Data Protection Act 2018 (United Kingdom)',
|
||||
type: 'national_law',
|
||||
expected: 94,
|
||||
description: 'Britisches Datenschutzgesetz nach dem Brexit. Ergaenzt die UK GDPR mit nationalen Bestimmungen, reguliert durch das ICO.',
|
||||
relevantFor: ['Unternehmen mit UK-Kunden', 'UK-Datentransfers', 'ICO-regulierte Unternehmen'],
|
||||
keyTopics: ['ICO', 'UK Adequacy', 'Post-Brexit Datenschutz', 'Law Enforcement'],
|
||||
effectiveDate: '23. Mai 2018'
|
||||
},
|
||||
{
|
||||
code: 'UK_GDPR',
|
||||
name: 'UK GDPR',
|
||||
fullName: 'UK General Data Protection Regulation (retained EU law)',
|
||||
type: 'national_law',
|
||||
expected: 24,
|
||||
description: 'In UK-Recht ueberfuehrte DSGVO nach dem Brexit. Weitgehend identisch mit EU-DSGVO, aber unter britischer Aufsicht (ICO).',
|
||||
relevantFor: ['UK-Unternehmen', 'EU-UK Datentransfers', 'Internationale Konzerne'],
|
||||
keyTopics: ['Retained EU Law', 'UK-EU Adequacy', 'ICO Enforcement', 'UK-spezifische Anpassungen'],
|
||||
effectiveDate: '1. Januar 2021'
|
||||
},
|
||||
{
|
||||
code: 'NO_PERSONOPPLYSNINGSLOVEN',
|
||||
name: 'Personopplysningsloven',
|
||||
fullName: 'Personopplysningsloven (Norwegen)',
|
||||
type: 'national_law',
|
||||
expected: 18,
|
||||
description: 'Norwegisches Datenschutzgesetz als EWR-Umsetzung der DSGVO. Reguliert durch Datatilsynet.',
|
||||
relevantFor: ['Unternehmen in Norwegen', 'EWR-Dienstleister', 'Skandinavische Maerkte'],
|
||||
keyTopics: ['Datatilsynet', 'EWR-Datenschutz', 'Nationale Ergaenzung', 'Kameras'],
|
||||
effectiveDate: '20. Juli 2018'
|
||||
},
|
||||
{
|
||||
code: 'SE_DATASKYDDSLAG',
|
||||
name: 'Dataskyddslag Schweden',
|
||||
fullName: 'Dataskyddslag (2018:218) Schweden',
|
||||
type: 'national_law',
|
||||
expected: 30,
|
||||
description: 'Schwedisches Datenschutzgesetz als ergaenzende Bestimmungen zur DSGVO. Reguliert durch IMY.',
|
||||
relevantFor: ['Unternehmen in Schweden', 'Skandinavische Maerkte'],
|
||||
keyTopics: ['IMY', 'Personnummer', 'Forschungsdaten', 'Pressefreiheit'],
|
||||
effectiveDate: '25. Mai 2018'
|
||||
},
|
||||
{
|
||||
code: 'FI_TIETOSUOJALAKI',
|
||||
name: 'Tietosuojalaki Finnland',
|
||||
fullName: 'Tietosuojalaki (1050/2018) Finnland',
|
||||
type: 'national_law',
|
||||
expected: 1,
|
||||
description: 'Finnisches Datenschutzgesetz als nationale Ergaenzung zur DSGVO.',
|
||||
relevantFor: ['Unternehmen in Finnland', 'Nordische Maerkte'],
|
||||
keyTopics: ['Nationale Ergaenzung', 'Tietosuojavaltuutettu', 'Forschungsdaten'],
|
||||
effectiveDate: '1. Januar 2019'
|
||||
},
|
||||
{
|
||||
code: 'PL_UODO',
|
||||
name: 'UODO Polen',
|
||||
fullName: 'Ustawa o ochronie danych osobowych (Polen)',
|
||||
type: 'national_law',
|
||||
expected: 1,
|
||||
description: 'Polnisches Datenschutzgesetz als DSGVO-Umsetzung. Reguliert durch den UODO (Praesident des Amtes fuer den Schutz personenbezogener Daten).',
|
||||
relevantFor: ['Unternehmen in Polen', 'Osteuropaeische Maerkte'],
|
||||
keyTopics: ['UODO', 'Nationale Ergaenzung', 'Strafbestimmungen', 'Oeffentlicher Sektor'],
|
||||
effectiveDate: '25. Mai 2018'
|
||||
},
|
||||
{
|
||||
code: 'CZ_ZOU',
|
||||
name: 'Zakon Tschechien',
|
||||
fullName: 'Zakon o zpracovani osobnich udaju (Tschechien)',
|
||||
type: 'national_law',
|
||||
expected: 135,
|
||||
description: 'Tschechisches Datenschutzgesetz zur DSGVO-Umsetzung. Reguliert durch das UOOU.',
|
||||
relevantFor: ['Unternehmen in Tschechien', 'Mitteleuropaeische Maerkte'],
|
||||
keyTopics: ['UOOU', 'Nationale Ergaenzung', 'Kamerasysteme', 'Strafbestimmungen'],
|
||||
effectiveDate: '24. April 2019'
|
||||
},
|
||||
{
|
||||
code: 'HU_INFOTV',
|
||||
name: 'Infotv. Ungarn',
|
||||
fullName: 'Informacios torvenye (Infotv.) Ungarn',
|
||||
type: 'national_law',
|
||||
expected: 156,
|
||||
description: 'Ungarisches Informationsgesetz ueber Selbstbestimmung und Informationsfreiheit als DSGVO-Ergaenzung. Reguliert durch NAIH.',
|
||||
relevantFor: ['Unternehmen in Ungarn', 'Mitteleuropaeische Maerkte'],
|
||||
keyTopics: ['NAIH', 'Informationsfreiheit', 'Nationale Ergaenzung', 'Datensicherheit'],
|
||||
effectiveDate: '2018 (aktualisiert)'
|
||||
},
|
||||
{
|
||||
code: 'SCC_FULL_TEXT',
|
||||
name: 'SCC Volltext',
|
||||
fullName: 'Standardvertragsklauseln Volltext (2021/914/EU)',
|
||||
type: 'eu_regulation',
|
||||
expected: 154,
|
||||
description: 'Vollstaendiger Text der EU-Standardvertragsklauseln fuer internationale Datentransfers. Alle Module (C2C, C2P, P2C, P2P) mit Annexen.',
|
||||
relevantFor: ['Alle mit Drittlandtransfers', 'Cloud-Nutzer', 'Auftragsverarbeiter'],
|
||||
keyTopics: ['Module 1-4', 'TIA', 'Annexe', 'Technische Massnahmen'],
|
||||
effectiveDate: '27. Juni 2021'
|
||||
},
|
||||
{
|
||||
code: 'EDPB_GUIDELINES_2_2019',
|
||||
name: 'EDPB GL Art. 6(1)(b)',
|
||||
fullName: 'EDPB Leitlinien 2/2019 zu Art. 6(1)(b) DSGVO',
|
||||
type: 'eu_guideline',
|
||||
expected: 3,
|
||||
description: 'EDPB-Leitlinien zur Verarbeitung personenbezogener Daten auf Grundlage der Vertragserfullung gemaess Art. 6 Abs. 1 lit. b DSGVO.',
|
||||
relevantFor: ['Alle Verantwortlichen', 'Vertragsdatenverarbeitung', 'Online-Dienste'],
|
||||
keyTopics: ['Vertragserfullung', 'Art. 6(1)(b)', 'Erforderlichkeit', 'Online-Dienste'],
|
||||
effectiveDate: '2019'
|
||||
},
|
||||
{
|
||||
code: 'EDPB_GUIDELINES_3_2019',
|
||||
name: 'EDPB GL Videoueberwachung',
|
||||
fullName: 'EDPB Leitlinien 3/2019 Videoueberwachung',
|
||||
type: 'eu_guideline',
|
||||
expected: 3,
|
||||
description: 'EDPB-Leitlinien zur Verarbeitung personenbezogener Daten durch Videoueberwachungsgeraete.',
|
||||
relevantFor: ['Videoueberwachung', 'Sicherheitsdienste', 'Einzelhandel', 'Oeffentliche Stellen'],
|
||||
keyTopics: ['Videoueberwachung', 'Kameras', 'Speicherfristen', 'Hinweisschilder'],
|
||||
effectiveDate: '2020'
|
||||
},
|
||||
{
|
||||
code: 'EDPB_GUIDELINES_5_2020',
|
||||
name: 'EDPB GL Einwilligung',
|
||||
fullName: 'EDPB Leitlinien 5/2020 zur Einwilligung',
|
||||
type: 'eu_guideline',
|
||||
expected: 2,
|
||||
description: 'EDPB-Leitlinien zur Einwilligung gemaess DSGVO. Klaert Anforderungen an gueltige Einwilligungen, Widerruf und Cookie-Consent.',
|
||||
relevantFor: ['Website-Betreiber', 'Marketing', 'App-Entwickler', 'Consent-Management'],
|
||||
keyTopics: ['Einwilligung', 'Cookie-Consent', 'Widerruf', 'Freiwilligkeit'],
|
||||
effectiveDate: '2020'
|
||||
},
|
||||
{
|
||||
code: 'EDPB_GUIDELINES_7_2020',
|
||||
name: 'EDPB GL Controller/Processor',
|
||||
fullName: 'EDPB Leitlinien 7/2020 Controller und Processor',
|
||||
type: 'eu_guideline',
|
||||
expected: 2,
|
||||
description: 'EDPB-Leitlinien zu den Begriffen Verantwortlicher und Auftragsverarbeiter. Klaert Rollen, Pflichten und Joint Controllership.',
|
||||
relevantFor: ['Alle Verantwortlichen', 'Auftragsverarbeiter', 'Joint Controller'],
|
||||
keyTopics: ['Verantwortlicher', 'Auftragsverarbeiter', 'Joint Controller', 'AVV'],
|
||||
effectiveDate: '2021'
|
||||
},
|
||||
]
|
||||
|
||||
// License info for each regulation
|
||||
const REGULATION_LICENSES: Record<string, { license: string; licenseNote: string }> = {
|
||||
GDPR: { license: 'PUBLIC_DOMAIN', licenseNote: 'Amtliches Werk der EU — frei verwendbar' },
|
||||
EPRIVACY: { license: 'PUBLIC_DOMAIN', licenseNote: 'EU-Richtlinie — amtliches Werk' },
|
||||
TDDDG: { license: 'PUBLIC_DOMAIN', licenseNote: 'Deutsches Bundesgesetz — amtliches Werk (§5 UrhG)' },
|
||||
SCC: { license: 'PUBLIC_DOMAIN', licenseNote: 'EU-Durchfuehrungsbeschluss — amtliches Werk' },
|
||||
DPF: { license: 'PUBLIC_DOMAIN', licenseNote: 'EU-Angemessenheitsbeschluss — amtliches Werk' },
|
||||
AIACT: { license: 'PUBLIC_DOMAIN', licenseNote: 'EU-Verordnung — amtliches Werk' },
|
||||
CRA: { license: 'PUBLIC_DOMAIN', licenseNote: 'EU-Verordnung — amtliches Werk' },
|
||||
NIS2: { license: 'PUBLIC_DOMAIN', licenseNote: 'EU-Richtlinie — amtliches Werk' },
|
||||
EUCSA: { license: 'PUBLIC_DOMAIN', licenseNote: 'EU-Verordnung — amtliches Werk' },
|
||||
DATAACT: { license: 'PUBLIC_DOMAIN', licenseNote: 'EU-Verordnung — amtliches Werk' },
|
||||
DGA: { license: 'PUBLIC_DOMAIN', licenseNote: 'EU-Verordnung — amtliches Werk' },
|
||||
DSA: { license: 'PUBLIC_DOMAIN', licenseNote: 'EU-Verordnung — amtliches Werk' },
|
||||
EAA: { license: 'PUBLIC_DOMAIN', licenseNote: 'EU-Richtlinie — amtliches Werk' },
|
||||
DSM: { license: 'PUBLIC_DOMAIN', licenseNote: 'EU-Richtlinie — amtliches Werk' },
|
||||
PLD: { license: 'PUBLIC_DOMAIN', licenseNote: 'EU-Richtlinie — amtliches Werk' },
|
||||
GPSR: { license: 'PUBLIC_DOMAIN', licenseNote: 'EU-Verordnung — amtliches Werk' },
|
||||
'BSI-TR-03161-1': { license: 'DL-DE-BY-2.0', licenseNote: 'Datenlizenz Deutschland — Namensnennung 2.0' },
|
||||
'BSI-TR-03161-2': { license: 'DL-DE-BY-2.0', licenseNote: 'Datenlizenz Deutschland — Namensnennung 2.0' },
|
||||
'BSI-TR-03161-3': { license: 'DL-DE-BY-2.0', licenseNote: 'Datenlizenz Deutschland — Namensnennung 2.0' },
|
||||
DORA: { license: 'PUBLIC_DOMAIN', licenseNote: 'EU-Verordnung — amtliches Werk' },
|
||||
PSD2: { license: 'PUBLIC_DOMAIN', licenseNote: 'EU-Richtlinie — amtliches Werk' },
|
||||
AMLR: { license: 'PUBLIC_DOMAIN', licenseNote: 'EU-Verordnung — amtliches Werk' },
|
||||
MiCA: { license: 'PUBLIC_DOMAIN', licenseNote: 'EU-Verordnung — amtliches Werk' },
|
||||
EHDS: { license: 'PUBLIC_DOMAIN', licenseNote: 'EU-Verordnung — amtliches Werk' },
|
||||
// National Data Protection Laws
|
||||
AT_DSG: { license: 'PUBLIC_DOMAIN', licenseNote: 'Amtliches Werk Oesterreich — frei verwendbar' },
|
||||
BDSG_FULL: { license: 'PUBLIC_DOMAIN', licenseNote: 'Deutsches Bundesgesetz — amtliches Werk (§5 UrhG)' },
|
||||
CH_DSG: { license: 'PUBLIC_DOMAIN', licenseNote: 'Amtliches Werk Schweiz — frei verwendbar' },
|
||||
LI_DSG: { license: 'PUBLIC_DOMAIN', licenseNote: 'Amtliches Werk Liechtenstein — frei verwendbar' },
|
||||
BE_DPA_LAW: { license: 'PUBLIC_DOMAIN', licenseNote: 'Amtliches Werk Belgien — frei verwendbar' },
|
||||
NL_UAVG: { license: 'PUBLIC_DOMAIN', licenseNote: 'Amtliches Werk Niederlande — frei verwendbar' },
|
||||
FR_CNIL_GUIDE: { license: 'PUBLIC_DOMAIN', licenseNote: 'CNIL — oeffentliches Dokument' },
|
||||
ES_LOPDGDD: { license: 'PUBLIC_DOMAIN', licenseNote: 'Amtliches Werk Spanien (BOE) — frei verwendbar' },
|
||||
IT_CODICE_PRIVACY: { license: 'PUBLIC_DOMAIN', licenseNote: 'Amtliches Werk Italien — frei verwendbar' },
|
||||
IE_DPA_2018: { license: 'OGL-3.0', licenseNote: 'Open Government Licence v3.0 — Ireland' },
|
||||
UK_DPA_2018: { license: 'OGL-3.0', licenseNote: 'Open Government Licence v3.0 — UK' },
|
||||
UK_GDPR: { license: 'OGL-3.0', licenseNote: 'Open Government Licence v3.0 — UK' },
|
||||
NO_PERSONOPPLYSNINGSLOVEN: { license: 'PUBLIC_DOMAIN', licenseNote: 'Amtliches Werk Norwegen — frei verwendbar' },
|
||||
SE_DATASKYDDSLAG: { license: 'PUBLIC_DOMAIN', licenseNote: 'Amtliches Werk Schweden — frei verwendbar' },
|
||||
FI_TIETOSUOJALAKI: { license: 'PUBLIC_DOMAIN', licenseNote: 'Amtliches Werk Finnland — frei verwendbar' },
|
||||
PL_UODO: { license: 'PUBLIC_DOMAIN', licenseNote: 'Amtliches Werk Polen — frei verwendbar' },
|
||||
CZ_ZOU: { license: 'PUBLIC_DOMAIN', licenseNote: 'Amtliches Werk Tschechien — frei verwendbar' },
|
||||
HU_INFOTV: { license: 'PUBLIC_DOMAIN', licenseNote: 'Amtliches Werk Ungarn — frei verwendbar' },
|
||||
SCC_FULL_TEXT: { license: 'PUBLIC_DOMAIN', licenseNote: 'EU-Durchfuehrungsbeschluss — amtliches Werk' },
|
||||
EDPB_GUIDELINES_2_2019: { license: 'EDPB-LICENSE', licenseNote: 'EDPB Document License' },
|
||||
EDPB_GUIDELINES_3_2019: { license: 'EDPB-LICENSE', licenseNote: 'EDPB Document License' },
|
||||
EDPB_GUIDELINES_5_2020: { license: 'EDPB-LICENSE', licenseNote: 'EDPB Document License' },
|
||||
EDPB_GUIDELINES_7_2020: { license: 'EDPB-LICENSE', licenseNote: 'EDPB Document License' },
|
||||
}
|
||||
|
||||
// License display labels
|
||||
const LICENSE_LABELS: Record<string, string> = {
|
||||
PUBLIC_DOMAIN: 'Public Domain',
|
||||
'DL-DE-BY-2.0': 'DL-DE-BY 2.0',
|
||||
'CC-BY-4.0': 'CC BY 4.0',
|
||||
'EDPB-LICENSE': 'EDPB License',
|
||||
'OGL-3.0': 'OGL v3.0',
|
||||
PROPRIETARY: 'Proprietaer',
|
||||
}
|
||||
|
||||
const TYPE_COLORS: Record<string, string> = {
|
||||
eu_regulation: 'bg-blue-100 text-blue-700',
|
||||
eu_directive: 'bg-purple-100 text-purple-700',
|
||||
de_law: 'bg-yellow-100 text-yellow-700',
|
||||
bsi_standard: 'bg-green-100 text-green-700',
|
||||
national_law: 'bg-orange-100 text-orange-700',
|
||||
eu_guideline: 'bg-teal-100 text-teal-700',
|
||||
}
|
||||
|
||||
const TYPE_LABELS: Record<string, string> = {
|
||||
@@ -380,6 +725,8 @@ const TYPE_LABELS: Record<string, string> = {
|
||||
eu_directive: 'EU-RL',
|
||||
de_law: 'DE-Gesetz',
|
||||
bsi_standard: 'BSI',
|
||||
national_law: 'Nat. Gesetz',
|
||||
eu_guideline: 'EDPB-GL',
|
||||
}
|
||||
|
||||
// Industry/Sector definitions for the regulation map
|
||||
@@ -693,6 +1040,13 @@ export default function RAGPage() {
|
||||
const [autoRefresh, setAutoRefresh] = useState(true)
|
||||
const [elapsedTime, setElapsedTime] = useState<string>('')
|
||||
|
||||
// DSFA corpus state
|
||||
const [dsfaSources, setDsfaSources] = useState<DsfaSource[]>([])
|
||||
const [dsfaStatus, setDsfaStatus] = useState<DsfaCorpusStatus | null>(null)
|
||||
const [dsfaLoading, setDsfaLoading] = useState(false)
|
||||
const [regulationCategory, setRegulationCategory] = useState<RegulationCategory>('regulations')
|
||||
const [expandedDsfaSource, setExpandedDsfaSource] = useState<string | null>(null)
|
||||
|
||||
// Data tab state
|
||||
const [customDocuments, setCustomDocuments] = useState<CustomDocument[]>([])
|
||||
const [uploadFile, setUploadFile] = useState<File | null>(null)
|
||||
@@ -734,6 +1088,28 @@ export default function RAGPage() {
|
||||
}
|
||||
}, [])
|
||||
|
||||
const fetchDsfaStatus = useCallback(async () => {
|
||||
setDsfaLoading(true)
|
||||
try {
|
||||
const [statusRes, sourcesRes] = await Promise.all([
|
||||
fetch(`${DSFA_API_PROXY}?action=status`),
|
||||
fetch(`${DSFA_API_PROXY}?action=sources`),
|
||||
])
|
||||
if (statusRes.ok) {
|
||||
const data = await statusRes.json()
|
||||
setDsfaStatus(data)
|
||||
}
|
||||
if (sourcesRes.ok) {
|
||||
const data = await sourcesRes.json()
|
||||
setDsfaSources(data.sources || data || [])
|
||||
}
|
||||
} catch (error) {
|
||||
console.error('Failed to fetch DSFA status:', error)
|
||||
} finally {
|
||||
setDsfaLoading(false)
|
||||
}
|
||||
}, [])
|
||||
|
||||
const fetchCustomDocuments = useCallback(async () => {
|
||||
try {
|
||||
const res = await fetch(`${API_PROXY}?action=custom-documents`)
|
||||
@@ -848,7 +1224,8 @@ export default function RAGPage() {
|
||||
|
||||
useEffect(() => {
|
||||
fetchStatus()
|
||||
}, [fetchStatus])
|
||||
fetchDsfaStatus()
|
||||
}, [fetchStatus, fetchDsfaStatus])
|
||||
|
||||
useEffect(() => {
|
||||
if (activeTab === 'pipeline') {
|
||||
@@ -1023,47 +1400,45 @@ export default function RAGPage() {
|
||||
{/* Page Purpose */}
|
||||
<PagePurpose
|
||||
title="Daten & RAG"
|
||||
purpose="Verwalten und durchsuchen Sie indexierte Dokumente im RAG-System. Das Legal Corpus enthält 19+ Regulierungen (DSGVO, AI Act, CRA, BSI TR-03161, etc.) für semantische Suche. Teil der KI-Daten-Pipeline: Empfängt Embeddings von der RAG Pipeline und liefert Suchergebnisse an die Klausur-Korrektur."
|
||||
purpose="Verwalten und durchsuchen Sie 4 RAG-Collections: Legal Corpus (24 Regulierungen), DSFA Corpus (70+ Quellen inkl. internationaler Datenschutzgesetze), NiBiS EH (Bildungsinhalte) und Legal Templates (Dokumentvorlagen). Teil der KI-Daten-Pipeline fuer Compliance und Klausur-Korrektur."
|
||||
audience={['DSB', 'Compliance Officer', 'Entwickler']}
|
||||
gdprArticles={['§5 UrhG (Amtliche Werke)', 'Art. 5 DSGVO (Rechenschaftspflicht)']}
|
||||
architecture={{
|
||||
services: ['klausur-service (Python)', 'embedding-service (BGE-M3)', 'Qdrant (Vector DB)'],
|
||||
databases: ['Qdrant Collections: bp_legal_corpus, bp_nibis_eh, bp_eh'],
|
||||
databases: ['Qdrant: bp_legal_corpus, bp_dsfa_corpus, bp_nibis_eh, bp_legal_templates'],
|
||||
}}
|
||||
relatedPages={[
|
||||
{ name: 'RAG Pipeline', href: '/ai/rag-pipeline', description: 'Neue Dokumente indexieren' },
|
||||
{ name: 'Klausur-Korrektur', href: '/ai/klausur-korrektur', description: 'RAG-Suche nutzen' },
|
||||
{ name: 'OCR-Labeling', href: '/ai/ocr-labeling', description: 'Ground Truth erstellen' },
|
||||
{ name: 'Compliance Hub', href: '/compliance/hub', description: 'Compliance-Dashboard' },
|
||||
{ name: 'Compliance Hub', href: '/sdk/compliance-hub', description: 'Compliance-Dashboard' },
|
||||
]}
|
||||
/>
|
||||
|
||||
{/* AI Module Sidebar - Desktop: Fixed, Mobile: FAB + Drawer */}
|
||||
<AIModuleSidebarResponsive currentModule="rag" />
|
||||
|
||||
{/* Stats Cards */}
|
||||
{/* RAG Collections Stats */}
|
||||
<div className="grid grid-cols-2 md:grid-cols-4 gap-4 mb-6">
|
||||
<div className="bg-white rounded-xl p-4 border border-slate-200">
|
||||
<p className="text-sm text-slate-500">Regulierungen</p>
|
||||
<p className="text-2xl font-bold text-slate-900">{REGULATIONS.length}</p>
|
||||
<p className="text-xs font-medium text-blue-600 uppercase mb-1">Legal Corpus</p>
|
||||
<p className="text-2xl font-bold text-slate-900">{loading ? '-' : getTotalChunks().toLocaleString()}</p>
|
||||
<p className="text-xs text-slate-500">Chunks · {REGULATIONS.length} Regulierungen</p>
|
||||
</div>
|
||||
<div className="bg-white rounded-xl p-4 border border-slate-200">
|
||||
<p className="text-sm text-slate-500">Chunks Total</p>
|
||||
<p className="text-2xl font-bold text-teal-600">{loading ? '-' : getTotalChunks().toLocaleString()}</p>
|
||||
<p className="text-xs font-medium text-purple-600 uppercase mb-1">DSFA Corpus</p>
|
||||
<p className="text-2xl font-bold text-slate-900">{dsfaLoading ? '-' : (dsfaStatus?.total_chunks || 0).toLocaleString()}</p>
|
||||
<p className="text-xs text-slate-500">Chunks · {dsfaSources.length || '~70'} Quellen</p>
|
||||
</div>
|
||||
<div className="bg-white rounded-xl p-4 border border-slate-200">
|
||||
<p className="text-sm text-slate-500">Vector Size</p>
|
||||
<p className="text-2xl font-bold text-slate-700">{collectionStatus?.vectorSize || 1024}</p>
|
||||
<p className="text-xs font-medium text-emerald-600 uppercase mb-1">NiBiS EH</p>
|
||||
<p className="text-2xl font-bold text-slate-900">28.662</p>
|
||||
<p className="text-xs text-slate-500">Chunks · Bildungs-Erwartungshorizonte</p>
|
||||
</div>
|
||||
<div className={`bg-white rounded-xl p-4 border ${
|
||||
collectionStatus?.status === 'green' ? 'border-green-200' : 'border-slate-200'
|
||||
}`}>
|
||||
<p className="text-sm text-slate-500">Status</p>
|
||||
<p className={`text-2xl font-bold ${
|
||||
collectionStatus?.status === 'green' ? 'text-green-600' : 'text-slate-600'
|
||||
}`}>
|
||||
{collectionStatus?.status === 'green' ? '✓ Ready' : loading ? '-' : collectionStatus?.status || 'N/A'}
|
||||
</p>
|
||||
<div className="bg-white rounded-xl p-4 border border-slate-200">
|
||||
<p className="text-xs font-medium text-orange-600 uppercase mb-1">Legal Templates</p>
|
||||
<p className="text-2xl font-bold text-slate-900">824</p>
|
||||
<p className="text-xs text-slate-500">Chunks · Dokumentvorlagen</p>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
@@ -1088,6 +1463,39 @@ export default function RAGPage() {
|
||||
{/* Tab Content */}
|
||||
{activeTab === 'overview' && (
|
||||
<div className="space-y-6">
|
||||
{/* RAG Categories Overview */}
|
||||
<div className="bg-white rounded-xl border border-slate-200 p-6">
|
||||
<h3 className="font-semibold text-slate-900 mb-4">RAG-Kategorien</h3>
|
||||
<div className="grid grid-cols-2 md:grid-cols-4 gap-4">
|
||||
<button
|
||||
onClick={() => { setRegulationCategory('regulations'); setActiveTab('regulations') }}
|
||||
className="p-4 rounded-lg border border-blue-200 bg-blue-50 hover:bg-blue-100 transition-colors text-left"
|
||||
>
|
||||
<p className="text-xs font-medium text-blue-600 uppercase">Gesetze & Regulierungen</p>
|
||||
<p className="text-2xl font-bold text-slate-900 mt-1">{loading ? '-' : getTotalChunks().toLocaleString()}</p>
|
||||
<p className="text-xs text-slate-500 mt-1">{REGULATIONS.length} Regulierungen (EU, DE, BSI)</p>
|
||||
</button>
|
||||
<button
|
||||
onClick={() => { setRegulationCategory('dsfa'); setActiveTab('regulations') }}
|
||||
className="p-4 rounded-lg border border-purple-200 bg-purple-50 hover:bg-purple-100 transition-colors text-left"
|
||||
>
|
||||
<p className="text-xs font-medium text-purple-600 uppercase">DSFA Corpus</p>
|
||||
<p className="text-2xl font-bold text-slate-900 mt-1">{dsfaLoading ? '-' : (dsfaStatus?.total_chunks || 0).toLocaleString()}</p>
|
||||
<p className="text-xs text-slate-500 mt-1">{dsfaSources.length || '~70'} Quellen (WP248, DSK, Gesetze)</p>
|
||||
</button>
|
||||
<div className="p-4 rounded-lg border border-emerald-200 bg-emerald-50 text-left">
|
||||
<p className="text-xs font-medium text-emerald-600 uppercase">NiBiS EH</p>
|
||||
<p className="text-2xl font-bold text-slate-900 mt-1">28.662</p>
|
||||
<p className="text-xs text-slate-500 mt-1">Chunks · Bildungs-Erwartungshorizonte</p>
|
||||
</div>
|
||||
<div className="p-4 rounded-lg border border-orange-200 bg-orange-50 text-left">
|
||||
<p className="text-xs font-medium text-orange-600 uppercase">Legal Templates</p>
|
||||
<p className="text-2xl font-bold text-slate-900 mt-1">824</p>
|
||||
<p className="text-xs text-slate-500 mt-1">Chunks · Dokumentvorlagen (VVT, TOM, DSFA)</p>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
{/* Quick Stats per Type */}
|
||||
<div className="grid grid-cols-1 md:grid-cols-4 gap-4">
|
||||
{Object.entries(TYPE_LABELS).map(([type, label]) => {
|
||||
@@ -1134,6 +1542,53 @@ export default function RAGPage() {
|
||||
)}
|
||||
|
||||
{activeTab === 'regulations' && (
|
||||
<div className="space-y-4">
|
||||
{/* Category Filter */}
|
||||
<div className="flex items-center gap-2 flex-wrap">
|
||||
<button
|
||||
onClick={() => setRegulationCategory('regulations')}
|
||||
className={`px-3 py-1.5 text-sm font-medium rounded-lg transition-colors ${
|
||||
regulationCategory === 'regulations'
|
||||
? 'bg-blue-100 text-blue-700 ring-2 ring-blue-300'
|
||||
: 'bg-white text-slate-600 border border-slate-200 hover:bg-slate-50'
|
||||
}`}
|
||||
>
|
||||
Gesetze & Regulierungen ({REGULATIONS.length})
|
||||
</button>
|
||||
<button
|
||||
onClick={() => setRegulationCategory('dsfa')}
|
||||
className={`px-3 py-1.5 text-sm font-medium rounded-lg transition-colors ${
|
||||
regulationCategory === 'dsfa'
|
||||
? 'bg-purple-100 text-purple-700 ring-2 ring-purple-300'
|
||||
: 'bg-white text-slate-600 border border-slate-200 hover:bg-slate-50'
|
||||
}`}
|
||||
>
|
||||
DSFA Quellen ({dsfaSources.length || '~70'})
|
||||
</button>
|
||||
<button
|
||||
onClick={() => setRegulationCategory('nibis')}
|
||||
className={`px-3 py-1.5 text-sm font-medium rounded-lg transition-colors ${
|
||||
regulationCategory === 'nibis'
|
||||
? 'bg-emerald-100 text-emerald-700 ring-2 ring-emerald-300'
|
||||
: 'bg-white text-slate-600 border border-slate-200 hover:bg-slate-50'
|
||||
}`}
|
||||
>
|
||||
NiBiS Dokumente
|
||||
</button>
|
||||
<button
|
||||
onClick={() => setRegulationCategory('templates')}
|
||||
className={`px-3 py-1.5 text-sm font-medium rounded-lg transition-colors ${
|
||||
regulationCategory === 'templates'
|
||||
? 'bg-orange-100 text-orange-700 ring-2 ring-orange-300'
|
||||
: 'bg-white text-slate-600 border border-slate-200 hover:bg-slate-50'
|
||||
}`}
|
||||
>
|
||||
Templates & Vorlagen
|
||||
</button>
|
||||
</div>
|
||||
|
||||
{/* Regulations Table (existing) */}
|
||||
{regulationCategory === 'regulations' && (
|
||||
<div className="bg-white rounded-xl border border-slate-200 overflow-hidden">
|
||||
<div className="px-4 py-3 border-b bg-slate-50 flex items-center justify-between">
|
||||
<h3 className="font-semibold text-slate-900">Alle {REGULATIONS.length} Regulierungen</h3>
|
||||
@@ -1224,7 +1679,17 @@ export default function RAGPage() {
|
||||
</div>
|
||||
</div>
|
||||
<div className="flex items-center justify-between pt-2 border-t border-slate-100 text-xs text-slate-500">
|
||||
<span>In Kraft seit: {reg.effectiveDate}</span>
|
||||
<div className="flex items-center gap-4">
|
||||
<span>In Kraft seit: {reg.effectiveDate}</span>
|
||||
{REGULATION_LICENSES[reg.code] && (
|
||||
<span className="flex items-center gap-1">
|
||||
<span className="px-1.5 py-0.5 bg-slate-100 text-slate-600 rounded text-[10px] font-medium">
|
||||
{LICENSE_LABELS[REGULATION_LICENSES[reg.code].license] || REGULATION_LICENSES[reg.code].license}
|
||||
</span>
|
||||
<span className="text-slate-400">{REGULATION_LICENSES[reg.code].licenseNote}</span>
|
||||
</span>
|
||||
)}
|
||||
</div>
|
||||
<button
|
||||
onClick={(e) => {
|
||||
e.stopPropagation()
|
||||
@@ -1247,6 +1712,172 @@ export default function RAGPage() {
|
||||
</table>
|
||||
</div>
|
||||
</div>
|
||||
)}
|
||||
|
||||
{/* DSFA Sources */}
|
||||
{regulationCategory === 'dsfa' && (
|
||||
<div className="bg-white rounded-xl border border-slate-200 overflow-hidden">
|
||||
<div className="px-4 py-3 border-b bg-slate-50 flex items-center justify-between">
|
||||
<div>
|
||||
<h3 className="font-semibold text-slate-900">DSFA Quellen ({dsfaSources.length || '~70'})</h3>
|
||||
<p className="text-xs text-slate-500">WP248, DSK Kurzpapiere, Muss-Listen, nationale Datenschutzgesetze</p>
|
||||
</div>
|
||||
<button
|
||||
onClick={fetchDsfaStatus}
|
||||
className="text-sm text-teal-600 hover:text-teal-700"
|
||||
>
|
||||
Aktualisieren
|
||||
</button>
|
||||
</div>
|
||||
{dsfaLoading ? (
|
||||
<div className="p-8 text-center text-slate-500">Lade DSFA-Quellen...</div>
|
||||
) : dsfaSources.length === 0 ? (
|
||||
<div className="p-8 text-center text-slate-500">
|
||||
<p className="mb-2">Keine DSFA-Quellen vom Backend geladen.</p>
|
||||
<p className="text-xs">Endpunkt: <code className="bg-slate-100 px-1 rounded">/api/dsfa-corpus?action=sources</code></p>
|
||||
</div>
|
||||
) : (
|
||||
<div className="divide-y">
|
||||
{dsfaSources.map((source) => {
|
||||
const isExpanded = expandedDsfaSource === source.source_code
|
||||
const typeColors: Record<string, string> = {
|
||||
regulation: 'bg-blue-100 text-blue-700',
|
||||
legislation: 'bg-indigo-100 text-indigo-700',
|
||||
guideline: 'bg-teal-100 text-teal-700',
|
||||
checklist: 'bg-yellow-100 text-yellow-700',
|
||||
standard: 'bg-green-100 text-green-700',
|
||||
methodology: 'bg-purple-100 text-purple-700',
|
||||
specification: 'bg-orange-100 text-orange-700',
|
||||
catalog: 'bg-pink-100 text-pink-700',
|
||||
guidance: 'bg-cyan-100 text-cyan-700',
|
||||
}
|
||||
return (
|
||||
<React.Fragment key={source.source_code}>
|
||||
<div
|
||||
onClick={() => setExpandedDsfaSource(isExpanded ? null : source.source_code)}
|
||||
className="px-4 py-3 hover:bg-slate-50 cursor-pointer transition-colors flex items-center justify-between"
|
||||
>
|
||||
<div className="flex items-center gap-3">
|
||||
<span className={`transform transition-transform text-xs ${isExpanded ? 'rotate-90' : ''}`}>▶</span>
|
||||
<span className="font-mono text-sm text-purple-600 font-medium">{source.source_code}</span>
|
||||
<span className={`px-2 py-0.5 text-xs rounded ${typeColors[source.document_type] || 'bg-slate-100 text-slate-600'}`}>
|
||||
{source.document_type}
|
||||
</span>
|
||||
<span className="text-sm text-slate-900">{source.name}</span>
|
||||
</div>
|
||||
<div className="flex items-center gap-3">
|
||||
<span className="px-1.5 py-0.5 text-[10px] font-medium bg-slate-100 text-slate-500 rounded uppercase">
|
||||
{source.language}
|
||||
</span>
|
||||
{source.chunk_count != null && (
|
||||
<span className="text-sm font-bold text-purple-600">{source.chunk_count} Chunks</span>
|
||||
)}
|
||||
</div>
|
||||
</div>
|
||||
{isExpanded && (
|
||||
<div className="px-4 pb-4 bg-slate-50">
|
||||
<div className="bg-white rounded-lg border border-slate-200 p-4 space-y-3">
|
||||
<div>
|
||||
<h4 className="font-semibold text-slate-900 mb-1">{source.full_name || source.name}</h4>
|
||||
{source.organization && (
|
||||
<p className="text-sm text-slate-600">Organisation: {source.organization}</p>
|
||||
)}
|
||||
</div>
|
||||
<div className="flex items-center gap-4 pt-2 border-t border-slate-100 text-xs text-slate-500">
|
||||
<span className="flex items-center gap-1">
|
||||
<span className="px-1.5 py-0.5 bg-slate-100 text-slate-600 rounded text-[10px] font-medium">
|
||||
{LICENSE_LABELS[source.license_code] || source.license_code}
|
||||
</span>
|
||||
<span className="text-slate-400">{source.attribution_text}</span>
|
||||
</span>
|
||||
</div>
|
||||
{source.source_url && (
|
||||
<div className="text-xs">
|
||||
<a
|
||||
href={source.source_url}
|
||||
target="_blank"
|
||||
rel="noopener noreferrer"
|
||||
className="text-teal-600 hover:underline"
|
||||
onClick={(e) => e.stopPropagation()}
|
||||
>
|
||||
Quelle: {source.source_url}
|
||||
</a>
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
</div>
|
||||
)}
|
||||
</React.Fragment>
|
||||
)
|
||||
})}
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
)}
|
||||
|
||||
{/* NiBiS Dokumente (info only) */}
|
||||
{regulationCategory === 'nibis' && (
|
||||
<div className="bg-white rounded-xl border border-slate-200 p-6">
|
||||
<div className="flex items-center gap-3 mb-4">
|
||||
<div className="w-10 h-10 rounded-lg bg-emerald-100 flex items-center justify-center text-xl">📚</div>
|
||||
<div>
|
||||
<h3 className="font-semibold text-slate-900">NiBiS Erwartungshorizonte</h3>
|
||||
<p className="text-sm text-slate-500">Collection: <code className="bg-slate-100 px-1 rounded">bp_nibis_eh</code></p>
|
||||
</div>
|
||||
</div>
|
||||
<div className="grid grid-cols-3 gap-4 mb-4">
|
||||
<div className="bg-emerald-50 rounded-lg p-4 border border-emerald-200">
|
||||
<p className="text-sm text-emerald-600 font-medium">Chunks</p>
|
||||
<p className="text-2xl font-bold text-slate-900">28.662</p>
|
||||
</div>
|
||||
<div className="bg-emerald-50 rounded-lg p-4 border border-emerald-200">
|
||||
<p className="text-sm text-emerald-600 font-medium">Vector Size</p>
|
||||
<p className="text-2xl font-bold text-slate-900">1024</p>
|
||||
</div>
|
||||
<div className="bg-emerald-50 rounded-lg p-4 border border-emerald-200">
|
||||
<p className="text-sm text-emerald-600 font-medium">Typ</p>
|
||||
<p className="text-2xl font-bold text-slate-900">BGE-M3</p>
|
||||
</div>
|
||||
</div>
|
||||
<p className="text-sm text-slate-600">
|
||||
Bildungsinhalte aus dem Niedersaechsischen Bildungsserver (NiBiS). Enthaelt Erwartungshorizonte fuer
|
||||
verschiedene Faecher und Schulformen. Wird ueber die Klausur-Korrektur fuer EH-Matching genutzt.
|
||||
Diese Daten sind nicht direkt compliance-relevant.
|
||||
</p>
|
||||
</div>
|
||||
)}
|
||||
|
||||
{/* Templates (info only) */}
|
||||
{regulationCategory === 'templates' && (
|
||||
<div className="bg-white rounded-xl border border-slate-200 p-6">
|
||||
<div className="flex items-center gap-3 mb-4">
|
||||
<div className="w-10 h-10 rounded-lg bg-orange-100 flex items-center justify-center text-xl">📋</div>
|
||||
<div>
|
||||
<h3 className="font-semibold text-slate-900">Legal Templates & Vorlagen</h3>
|
||||
<p className="text-sm text-slate-500">Collection: <code className="bg-slate-100 px-1 rounded">bp_legal_templates</code></p>
|
||||
</div>
|
||||
</div>
|
||||
<div className="grid grid-cols-3 gap-4 mb-4">
|
||||
<div className="bg-orange-50 rounded-lg p-4 border border-orange-200">
|
||||
<p className="text-sm text-orange-600 font-medium">Chunks</p>
|
||||
<p className="text-2xl font-bold text-slate-900">824</p>
|
||||
</div>
|
||||
<div className="bg-orange-50 rounded-lg p-4 border border-orange-200">
|
||||
<p className="text-sm text-orange-600 font-medium">Vector Size</p>
|
||||
<p className="text-2xl font-bold text-slate-900">1024</p>
|
||||
</div>
|
||||
<div className="bg-orange-50 rounded-lg p-4 border border-orange-200">
|
||||
<p className="text-sm text-orange-600 font-medium">Typ</p>
|
||||
<p className="text-2xl font-bold text-slate-900">BGE-M3</p>
|
||||
</div>
|
||||
</div>
|
||||
<p className="text-sm text-slate-600">
|
||||
Vorlagen fuer VVT (Verzeichnis von Verarbeitungstaetigkeiten), TOM (Technisch-Organisatorische Massnahmen),
|
||||
DSFA-Berichte und weitere Compliance-Dokumente. Werden vom AI Compliance SDK fuer die Dokumentgenerierung genutzt.
|
||||
</p>
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
)}
|
||||
|
||||
{activeTab === 'map' && (
|
||||
|
||||
@@ -766,6 +766,326 @@ DSFA_SOURCES = [
|
||||
"document_type": "standard",
|
||||
"language": "de"
|
||||
},
|
||||
|
||||
# === EDPB Ergaenzende Leitlinien ===
|
||||
# MIGRATED to bp_legal_corpus via migrate_rag_chunks.py (2026-02-10)
|
||||
{
|
||||
"source_code": "EDPB_GUIDELINES_2_2019",
|
||||
"name": "EDPB Leitlinien 2/2019 zu Art. 6(1)(b)",
|
||||
"full_name": "EDPB Leitlinien 2/2019 zur Verarbeitung personenbezogener Daten auf Grundlage von Art. 6 Abs. 1 lit. b DSGVO",
|
||||
"organization": "European Data Protection Board",
|
||||
"source_url": "https://edpb.europa.eu/our-work-tools/our-documents/guidelines/guidelines-22019-processing-personal-data-under-article-61b_en",
|
||||
"license_code": "EDPB-LICENSE",
|
||||
"attribution_text": "Source: EDPB Guidelines 2/2019, European Data Protection Board",
|
||||
"document_type": "guideline",
|
||||
"language": "en",
|
||||
"migrated_to": "bp_legal_corpus"
|
||||
},
|
||||
{
|
||||
"source_code": "EDPB_GUIDELINES_3_2019",
|
||||
"name": "EDPB Leitlinien 3/2019 Videoueberwachung",
|
||||
"full_name": "EDPB Leitlinien 3/2019 zur Verarbeitung personenbezogener Daten durch Videoueberwachung",
|
||||
"organization": "European Data Protection Board",
|
||||
"source_url": "https://edpb.europa.eu/our-work-tools/our-documents/guidelines/guidelines-32019-processing-personal-data-through-video_en",
|
||||
"license_code": "EDPB-LICENSE",
|
||||
"attribution_text": "Source: EDPB Guidelines 3/2019, European Data Protection Board",
|
||||
"document_type": "guideline",
|
||||
"language": "en",
|
||||
"migrated_to": "bp_legal_corpus"
|
||||
},
|
||||
{
|
||||
"source_code": "EDPB_GUIDELINES_5_2020",
|
||||
"name": "EDPB Leitlinien 5/2020 Einwilligung",
|
||||
"full_name": "EDPB Leitlinien 5/2020 zur Einwilligung gemaess Verordnung 2016/679",
|
||||
"organization": "European Data Protection Board",
|
||||
"source_url": "https://edpb.europa.eu/our-work-tools/our-documents/guidelines/guidelines-052020-consent-under-regulation-2016679_en",
|
||||
"license_code": "EDPB-LICENSE",
|
||||
"attribution_text": "Source: EDPB Guidelines 5/2020, European Data Protection Board",
|
||||
"document_type": "guideline",
|
||||
"language": "en",
|
||||
"migrated_to": "bp_legal_corpus"
|
||||
},
|
||||
{
|
||||
"source_code": "EDPB_GUIDELINES_7_2020",
|
||||
"name": "EDPB Leitlinien 7/2020 Controller/Processor",
|
||||
"full_name": "EDPB Leitlinien 7/2020 zu den Begriffen Verantwortlicher und Auftragsverarbeiter",
|
||||
"organization": "European Data Protection Board",
|
||||
"source_url": "https://edpb.europa.eu/our-work-tools/our-documents/guidelines/guidelines-072020-concepts-controller-and-processor-gdpr_en",
|
||||
"license_code": "EDPB-LICENSE",
|
||||
"attribution_text": "Source: EDPB Guidelines 7/2020, European Data Protection Board",
|
||||
"document_type": "guideline",
|
||||
"language": "en",
|
||||
"migrated_to": "bp_legal_corpus"
|
||||
},
|
||||
{
|
||||
"source_code": "EDPB_GUIDELINES_1_2022",
|
||||
"name": "EDPB Leitlinien 1/2022 Bussgelder",
|
||||
"full_name": "EDPB Leitlinien 04/2022 zur Berechnung von Bussgeldern nach der DSGVO",
|
||||
"organization": "European Data Protection Board",
|
||||
"source_url": "https://edpb.europa.eu/our-work-tools/our-documents/guidelines/guidelines-042022-calculation-administrative-fines-under-gdpr_en",
|
||||
"license_code": "EDPB-LICENSE",
|
||||
"attribution_text": "Source: EDPB Guidelines 04/2022, European Data Protection Board",
|
||||
"document_type": "guideline",
|
||||
"language": "en",
|
||||
"migrated_to": "bp_legal_corpus"
|
||||
},
|
||||
{
|
||||
"source_code": "SCC_FULL_TEXT",
|
||||
"name": "Standard Contractual Clauses Volltext",
|
||||
"full_name": "Standardvertragsklauseln fuer die Uebermittlung personenbezogener Daten an Drittlaender (2021/914/EU)",
|
||||
"organization": "Europaeische Kommission",
|
||||
"source_url": "https://eur-lex.europa.eu/eli/dec_impl/2021/914/oj",
|
||||
"license_code": "PUBLIC_DOMAIN",
|
||||
"attribution_text": "Quelle: SCC Volltext, Europaeische Kommission (EUR-Lex)",
|
||||
"document_type": "regulation",
|
||||
"language": "de",
|
||||
"migrated_to": "bp_legal_corpus"
|
||||
},
|
||||
|
||||
# === Nationale Datenschutzgesetze (DSGVO-Umsetzungen) ===
|
||||
# MIGRATED to bp_legal_corpus via migrate_rag_chunks.py (2026-02-10)
|
||||
# These sources are kept here for reference but will be skipped during ingestion.
|
||||
# Ingestion should target bp_legal_corpus for these source codes.
|
||||
{
|
||||
"source_code": "BDSG_FULL",
|
||||
"name": "BDSG Volltext (Deutschland)",
|
||||
"full_name": "Bundesdatenschutzgesetz (BDSG) - Volltext inkl. aller Teile",
|
||||
"organization": "Bundesrepublik Deutschland",
|
||||
"source_url": "https://www.gesetze-im-internet.de/bdsg_2018/",
|
||||
"license_code": "PUBLIC_DOMAIN",
|
||||
"attribution_text": "Quelle: BDSG, Bundesrepublik Deutschland (gesetze-im-internet.de)",
|
||||
"document_type": "legislation",
|
||||
"language": "de",
|
||||
"migrated_to": "bp_legal_corpus"
|
||||
},
|
||||
{
|
||||
"source_code": "AT_DSG",
|
||||
"name": "DSG Oesterreich",
|
||||
"full_name": "Bundesgesetz zum Schutz natuerlicher Personen bei der Verarbeitung personenbezogener Daten (Datenschutzgesetz - DSG)",
|
||||
"organization": "Republik Oesterreich",
|
||||
"source_url": "https://www.ris.bka.gv.at/GeltendeFassung.wxe?Abfrage=Bundesnormen&Gesetzesnummer=10001597",
|
||||
"license_code": "PUBLIC_DOMAIN",
|
||||
"attribution_text": "Quelle: DSG, Republik Oesterreich (RIS)",
|
||||
"document_type": "legislation",
|
||||
"language": "de",
|
||||
"migrated_to": "bp_legal_corpus"
|
||||
},
|
||||
{
|
||||
"source_code": "CH_DSG",
|
||||
"name": "DSG Schweiz (revDSG 2023)",
|
||||
"full_name": "Bundesgesetz ueber den Datenschutz (Datenschutzgesetz, DSG) - revidierte Fassung 2023",
|
||||
"organization": "Schweizerische Eidgenossenschaft",
|
||||
"source_url": "https://www.fedlex.admin.ch/eli/cc/2022/491/de",
|
||||
"license_code": "PUBLIC_DOMAIN",
|
||||
"attribution_text": "Quelle: DSG, Schweizerische Eidgenossenschaft (Fedlex)",
|
||||
"document_type": "legislation",
|
||||
"language": "de",
|
||||
"migrated_to": "bp_legal_corpus"
|
||||
},
|
||||
{
|
||||
"source_code": "LI_DSG",
|
||||
"name": "DSG Liechtenstein",
|
||||
"full_name": "Datenschutzgesetz (DSG) Liechtenstein",
|
||||
"organization": "Fuerstentum Liechtenstein",
|
||||
"source_url": "https://www.gesetze.li/konso/2018.272",
|
||||
"license_code": "PUBLIC_DOMAIN",
|
||||
"attribution_text": "Quelle: DSG, Fuerstentum Liechtenstein (gesetze.li)",
|
||||
"document_type": "legislation",
|
||||
"language": "de",
|
||||
"migrated_to": "bp_legal_corpus"
|
||||
},
|
||||
{
|
||||
"source_code": "FR_CNIL_GUIDE",
|
||||
"name": "CNIL Guide RGPD",
|
||||
"full_name": "Guide pratique RGPD - Commission Nationale de l'Informatique et des Libertes",
|
||||
"organization": "CNIL (France)",
|
||||
"source_url": "https://www.cnil.fr/fr/rgpd-de-quoi-parle-t-on",
|
||||
"license_code": "PUBLIC_DOMAIN",
|
||||
"attribution_text": "Source: CNIL Guide RGPD, Commission Nationale de l'Informatique et des Libertes",
|
||||
"document_type": "guideline",
|
||||
"language": "fr",
|
||||
"migrated_to": "bp_legal_corpus"
|
||||
},
|
||||
{
|
||||
"source_code": "ES_LOPDGDD",
|
||||
"name": "LOPDGDD Spanien",
|
||||
"full_name": "Ley Organica de Proteccion de Datos Personales y garantia de los derechos digitales",
|
||||
"organization": "Reino de Espana",
|
||||
"source_url": "https://www.boe.es/buscar/act.php?id=BOE-A-2018-16673",
|
||||
"license_code": "PUBLIC_DOMAIN",
|
||||
"attribution_text": "Fuente: LOPDGDD, Reino de Espana (BOE)",
|
||||
"document_type": "legislation",
|
||||
"language": "es",
|
||||
"migrated_to": "bp_legal_corpus"
|
||||
},
|
||||
{
|
||||
"source_code": "IT_CODICE_PRIVACY",
|
||||
"name": "Codice Privacy Italien",
|
||||
"full_name": "Codice in materia di protezione dei dati personali (D.Lgs. 196/2003, aggiornato D.Lgs. 101/2018)",
|
||||
"organization": "Repubblica Italiana",
|
||||
"source_url": "https://www.garanteprivacy.it/home/docweb/-/docweb-display/docweb/9042678",
|
||||
"license_code": "PUBLIC_DOMAIN",
|
||||
"attribution_text": "Fonte: Codice Privacy, Garante per la protezione dei dati personali",
|
||||
"document_type": "legislation",
|
||||
"language": "it",
|
||||
"migrated_to": "bp_legal_corpus"
|
||||
},
|
||||
{
|
||||
"source_code": "NL_UAVG",
|
||||
"name": "UAVG Niederlande",
|
||||
"full_name": "Uitvoeringswet Algemene verordening gegevensbescherming (UAVG)",
|
||||
"organization": "Koninkrijk der Nederlanden",
|
||||
"source_url": "https://wetten.overheid.nl/BWBR0040940/",
|
||||
"license_code": "PUBLIC_DOMAIN",
|
||||
"attribution_text": "Bron: UAVG, Koninkrijk der Nederlanden (wetten.overheid.nl)",
|
||||
"document_type": "legislation",
|
||||
"language": "nl",
|
||||
"migrated_to": "bp_legal_corpus"
|
||||
},
|
||||
{
|
||||
"source_code": "BE_DPA_LAW",
|
||||
"name": "Datenschutzgesetz Belgien",
|
||||
"full_name": "Loi relative a la protection des personnes physiques a l'egard des traitements de donnees a caractere personnel",
|
||||
"organization": "Royaume de Belgique",
|
||||
"source_url": "https://www.ejustice.just.fgov.be/cgi_loi/change_lg.pl?language=fr&la=F&cn=2018073046",
|
||||
"license_code": "PUBLIC_DOMAIN",
|
||||
"attribution_text": "Source: Loi Protection des Donnees, Royaume de Belgique (eJustice)",
|
||||
"document_type": "legislation",
|
||||
"language": "fr",
|
||||
"migrated_to": "bp_legal_corpus"
|
||||
},
|
||||
{
|
||||
"source_code": "LU_DPA_LAW",
|
||||
"name": "Datenschutzgesetz Luxemburg",
|
||||
"full_name": "Loi du 1er aout 2018 portant organisation de la Commission nationale pour la protection des donnees",
|
||||
"organization": "Grand-Duche de Luxembourg",
|
||||
"source_url": "https://legilux.public.lu/eli/etat/leg/loi/2018/08/01/a686/jo",
|
||||
"license_code": "PUBLIC_DOMAIN",
|
||||
"attribution_text": "Source: Loi Protection des Donnees, Grand-Duche de Luxembourg (Legilux)",
|
||||
"document_type": "legislation",
|
||||
"language": "fr",
|
||||
"migrated_to": "bp_legal_corpus"
|
||||
},
|
||||
{
|
||||
"source_code": "IE_DPA_2018",
|
||||
"name": "Data Protection Act 2018 Ireland",
|
||||
"full_name": "Data Protection Act 2018 (Act No. 7 of 2018) - Ireland",
|
||||
"organization": "Government of Ireland",
|
||||
"source_url": "https://www.irishstatutebook.ie/eli/2018/act/7/enacted/en/html",
|
||||
"license_code": "OGL-3.0",
|
||||
"attribution_text": "Contains public sector information licensed under the Open Government Licence v3.0. Source: Data Protection Act 2018, Ireland",
|
||||
"document_type": "legislation",
|
||||
"language": "en",
|
||||
"migrated_to": "bp_legal_corpus"
|
||||
},
|
||||
{
|
||||
"source_code": "UK_DPA_2018",
|
||||
"name": "Data Protection Act 2018 UK",
|
||||
"full_name": "Data Protection Act 2018 (c. 12) - United Kingdom",
|
||||
"organization": "Government of the United Kingdom",
|
||||
"source_url": "https://www.legislation.gov.uk/ukpga/2018/12/contents/enacted",
|
||||
"license_code": "OGL-3.0",
|
||||
"attribution_text": "Contains public sector information licensed under the Open Government Licence v3.0. Source: Data Protection Act 2018, UK",
|
||||
"document_type": "legislation",
|
||||
"language": "en",
|
||||
"migrated_to": "bp_legal_corpus"
|
||||
},
|
||||
{
|
||||
"source_code": "UK_GDPR",
|
||||
"name": "UK GDPR (retained EU law)",
|
||||
"full_name": "United Kingdom General Data Protection Regulation (UK GDPR) - retained EU law",
|
||||
"organization": "Government of the United Kingdom",
|
||||
"source_url": "https://www.legislation.gov.uk/eur/2016/679/contents",
|
||||
"license_code": "OGL-3.0",
|
||||
"attribution_text": "Contains public sector information licensed under the Open Government Licence v3.0. Source: UK GDPR, legislation.gov.uk",
|
||||
"document_type": "legislation",
|
||||
"language": "en",
|
||||
"migrated_to": "bp_legal_corpus"
|
||||
},
|
||||
{
|
||||
"source_code": "NO_PERSONOPPLYSNINGSLOVEN",
|
||||
"name": "Personopplysningsloven Norwegen",
|
||||
"full_name": "Lov om behandling av personopplysninger (personopplysningsloven)",
|
||||
"organization": "Kongeriket Norge",
|
||||
"source_url": "https://lovdata.no/dokument/NL/lov/2018-06-15-38",
|
||||
"license_code": "PUBLIC_DOMAIN",
|
||||
"attribution_text": "Kilde: Personopplysningsloven, Kongeriket Norge (Lovdata)",
|
||||
"document_type": "legislation",
|
||||
"language": "no",
|
||||
"migrated_to": "bp_legal_corpus"
|
||||
},
|
||||
{
|
||||
"source_code": "SE_DATASKYDDSLAG",
|
||||
"name": "Dataskyddslag Schweden",
|
||||
"full_name": "Lag (2018:218) med kompletterande bestammelser till EU:s dataskyddsforordning",
|
||||
"organization": "Konungariket Sverige",
|
||||
"source_url": "https://www.riksdagen.se/sv/dokument-och-lagar/dokument/svensk-forfattningssamling/lag-2018218-med-kompletterande-bestammelser-till_sfs-2018-218/",
|
||||
"license_code": "PUBLIC_DOMAIN",
|
||||
"attribution_text": "Kalla: Dataskyddslag (2018:218), Konungariket Sverige (Riksdagen)",
|
||||
"document_type": "legislation",
|
||||
"language": "sv",
|
||||
"migrated_to": "bp_legal_corpus"
|
||||
},
|
||||
{
|
||||
"source_code": "DK_DATABESKYTTELSESLOVEN",
|
||||
"name": "Databeskyttelsesloven Daenemark",
|
||||
"full_name": "Lov om supplerende bestemmelser til forordning om beskyttelse af fysiske personer i forbindelse med behandling af personoplysninger",
|
||||
"organization": "Kongeriget Danmark",
|
||||
"source_url": "https://www.retsinformation.dk/eli/lta/2018/502",
|
||||
"license_code": "PUBLIC_DOMAIN",
|
||||
"attribution_text": "Kilde: Databeskyttelsesloven, Kongeriget Danmark (Retsinformation)",
|
||||
"document_type": "legislation",
|
||||
"language": "da",
|
||||
"migrated_to": "bp_legal_corpus"
|
||||
},
|
||||
{
|
||||
"source_code": "FI_TIETOSUOJALAKI",
|
||||
"name": "Tietosuojalaki Finnland",
|
||||
"full_name": "Tietosuojalaki (1050/2018) - Datenschutzgesetz Finnland",
|
||||
"organization": "Suomen tasavalta",
|
||||
"source_url": "https://www.finlex.fi/fi/laki/ajantasa/2018/20181050",
|
||||
"license_code": "PUBLIC_DOMAIN",
|
||||
"attribution_text": "Lahde: Tietosuojalaki, Suomen tasavalta (Finlex)",
|
||||
"document_type": "legislation",
|
||||
"language": "fi",
|
||||
"migrated_to": "bp_legal_corpus"
|
||||
},
|
||||
{
|
||||
"source_code": "PL_UODO",
|
||||
"name": "UODO Polen",
|
||||
"full_name": "Ustawa o ochronie danych osobowych - Datenschutzgesetz Polen",
|
||||
"organization": "Rzeczpospolita Polska",
|
||||
"source_url": "https://isap.sejm.gov.pl/isap.nsf/DocDetails.xsp?id=WDU20180001000",
|
||||
"license_code": "PUBLIC_DOMAIN",
|
||||
"attribution_text": "Zrodlo: Ustawa o ochronie danych osobowych, Rzeczpospolita Polska (ISAP)",
|
||||
"document_type": "legislation",
|
||||
"language": "pl",
|
||||
"migrated_to": "bp_legal_corpus"
|
||||
},
|
||||
{
|
||||
"source_code": "CZ_ZOU",
|
||||
"name": "Zakon o ochrane osobnich udaju Tschechien",
|
||||
"full_name": "Zakon c. 110/2019 Sb. o zpracovani osobnich udaju",
|
||||
"organization": "Ceska republika",
|
||||
"source_url": "https://www.zakonyprolidi.cz/cs/2019-110",
|
||||
"license_code": "PUBLIC_DOMAIN",
|
||||
"attribution_text": "Zdroj: Zakon o ochrane osobnich udaju, Ceska republika (zakonyprolidi.cz)",
|
||||
"document_type": "legislation",
|
||||
"language": "cs",
|
||||
"migrated_to": "bp_legal_corpus"
|
||||
},
|
||||
{
|
||||
"source_code": "HU_INFOTV",
|
||||
"name": "Informacios torvenye Ungarn",
|
||||
"full_name": "2011. evi CXII. torveny az informacios onrendelkezesi jogrol es az informacioszabadsagrol (Infotv.)",
|
||||
"organization": "Magyarorszag",
|
||||
"source_url": "https://njt.hu/jogszabaly/2011-112-00-00",
|
||||
"license_code": "PUBLIC_DOMAIN",
|
||||
"attribution_text": "Forras: Infotv., Magyarorszag (njt.hu)",
|
||||
"document_type": "legislation",
|
||||
"language": "hu",
|
||||
"migrated_to": "bp_legal_corpus"
|
||||
},
|
||||
]
|
||||
|
||||
|
||||
@@ -1100,7 +1420,7 @@ class DSFAQdrantService:
|
||||
@property
|
||||
def client(self) -> QdrantClient:
|
||||
if self._client is None:
|
||||
self._client = QdrantClient(url=self.url)
|
||||
self._client = QdrantClient(url=self.url, check_compatibility=False)
|
||||
return self._client
|
||||
|
||||
async def ensure_collection(self) -> bool:
|
||||
@@ -1408,14 +1728,21 @@ async def init_dsfa_tables(pool: asyncpg.Pool):
|
||||
|
||||
|
||||
async def register_all_sources(pool: asyncpg.Pool):
|
||||
"""Register all DSFA sources in the database."""
|
||||
"""Register all DSFA sources in the database (skips migrated sources)."""
|
||||
store = DSFACorpusStore(pool)
|
||||
|
||||
registered = 0
|
||||
skipped = 0
|
||||
for source in DSFA_SOURCES:
|
||||
if source.get("migrated_to"):
|
||||
print(f"Skipping migrated source: {source['source_code']} -> {source['migrated_to']}")
|
||||
skipped += 1
|
||||
continue
|
||||
source_id = await store.register_source(source)
|
||||
print(f"Registered source: {source['source_code']} -> {source_id}")
|
||||
registered += 1
|
||||
|
||||
print(f"\nTotal sources registered: {len(DSFA_SOURCES)}")
|
||||
print(f"\nTotal sources registered: {registered} (skipped {skipped} migrated)")
|
||||
|
||||
|
||||
async def get_ingestion_status(pool: asyncpg.Pool):
|
||||
|
||||
307
klausur-service/backend/migrate_rag_chunks.py
Normal file
307
klausur-service/backend/migrate_rag_chunks.py
Normal file
@@ -0,0 +1,307 @@
|
||||
"""
|
||||
RAG Chunk Migration: bp_dsfa_corpus -> bp_legal_corpus
|
||||
|
||||
Verschiebt nationale Datenschutzgesetze und EU-Dokumente aus bp_dsfa_corpus
|
||||
nach bp_legal_corpus. Vektoren werden 1:1 uebernommen (kein Re-Embedding).
|
||||
|
||||
Usage:
|
||||
python migrate_rag_chunks.py # Dry run (default)
|
||||
python migrate_rag_chunks.py --execute # Actually migrate
|
||||
python migrate_rag_chunks.py --verify # Verify after migration
|
||||
"""
|
||||
|
||||
import os
|
||||
import sys
|
||||
import argparse
|
||||
from datetime import datetime, timezone
|
||||
from typing import List, Dict, Any
|
||||
|
||||
from qdrant_client import QdrantClient
|
||||
from qdrant_client.models import (
|
||||
PointStruct, Filter, FieldCondition, MatchAny, ScrollRequest
|
||||
)
|
||||
|
||||
# Configuration
|
||||
QDRANT_URL = os.getenv("QDRANT_URL", "http://qdrant:6333")
|
||||
SOURCE_COLLECTION = "bp_dsfa_corpus"
|
||||
TARGET_COLLECTION = "bp_legal_corpus"
|
||||
|
||||
# Source codes to migrate from bp_dsfa_corpus -> bp_legal_corpus
|
||||
SOURCES_TO_MIGRATE = [
|
||||
# Nationale Datenschutzgesetze
|
||||
"AT_DSG",
|
||||
"BDSG_FULL",
|
||||
"BE_DPA_LAW",
|
||||
"CH_DSG",
|
||||
"CZ_ZOU",
|
||||
"ES_LOPDGDD",
|
||||
"FI_TIETOSUOJALAKI",
|
||||
"FR_CNIL_GUIDE",
|
||||
"HU_INFOTV",
|
||||
"IE_DPA_2018",
|
||||
"IT_CODICE_PRIVACY",
|
||||
"LI_DSG",
|
||||
"NL_UAVG",
|
||||
"NO_PERSONOPPLYSNINGSLOVEN",
|
||||
"PL_UODO",
|
||||
"SE_DATASKYDDSLAG",
|
||||
"UK_DPA_2018",
|
||||
"UK_GDPR",
|
||||
# EU-Dokumente
|
||||
"SCC_FULL_TEXT",
|
||||
"EDPB_GUIDELINES_2_2019",
|
||||
"EDPB_GUIDELINES_3_2019",
|
||||
"EDPB_GUIDELINES_5_2020",
|
||||
"EDPB_GUIDELINES_7_2020",
|
||||
]
|
||||
|
||||
# Mapping: source_code -> regulation_type for bp_legal_corpus
|
||||
REGULATION_TYPE_MAP = {
|
||||
"AT_DSG": "national_law",
|
||||
"BDSG_FULL": "de_law",
|
||||
"BE_DPA_LAW": "national_law",
|
||||
"CH_DSG": "national_law",
|
||||
"CZ_ZOU": "national_law",
|
||||
"ES_LOPDGDD": "national_law",
|
||||
"FI_TIETOSUOJALAKI": "national_law",
|
||||
"FR_CNIL_GUIDE": "national_law",
|
||||
"HU_INFOTV": "national_law",
|
||||
"IE_DPA_2018": "national_law",
|
||||
"IT_CODICE_PRIVACY": "national_law",
|
||||
"LI_DSG": "national_law",
|
||||
"NL_UAVG": "national_law",
|
||||
"NO_PERSONOPPLYSNINGSLOVEN": "national_law",
|
||||
"PL_UODO": "national_law",
|
||||
"SE_DATASKYDDSLAG": "national_law",
|
||||
"UK_DPA_2018": "national_law",
|
||||
"UK_GDPR": "national_law",
|
||||
"SCC_FULL_TEXT": "eu_regulation",
|
||||
"EDPB_GUIDELINES_2_2019": "eu_guideline",
|
||||
"EDPB_GUIDELINES_3_2019": "eu_guideline",
|
||||
"EDPB_GUIDELINES_5_2020": "eu_guideline",
|
||||
"EDPB_GUIDELINES_7_2020": "eu_guideline",
|
||||
}
|
||||
|
||||
# Mapping: source_code -> regulation_name for bp_legal_corpus
|
||||
REGULATION_NAME_MAP = {
|
||||
"AT_DSG": "DSG Oesterreich",
|
||||
"BDSG_FULL": "BDSG",
|
||||
"BE_DPA_LAW": "Datenschutzgesetz Belgien",
|
||||
"CH_DSG": "DSG Schweiz",
|
||||
"CZ_ZOU": "Zakon Tschechien",
|
||||
"ES_LOPDGDD": "LOPDGDD Spanien",
|
||||
"FI_TIETOSUOJALAKI": "Tietosuojalaki Finnland",
|
||||
"FR_CNIL_GUIDE": "CNIL Guide RGPD",
|
||||
"HU_INFOTV": "Infotv. Ungarn",
|
||||
"IE_DPA_2018": "DPA 2018 Ireland",
|
||||
"IT_CODICE_PRIVACY": "Codice Privacy Italien",
|
||||
"LI_DSG": "DSG Liechtenstein",
|
||||
"NL_UAVG": "UAVG Niederlande",
|
||||
"NO_PERSONOPPLYSNINGSLOVEN": "Personopplysningsloven",
|
||||
"PL_UODO": "UODO Polen",
|
||||
"SE_DATASKYDDSLAG": "Dataskyddslag Schweden",
|
||||
"UK_DPA_2018": "DPA 2018 UK",
|
||||
"UK_GDPR": "UK GDPR",
|
||||
"SCC_FULL_TEXT": "Standardvertragsklauseln",
|
||||
"EDPB_GUIDELINES_2_2019": "EDPB GL 2/2019",
|
||||
"EDPB_GUIDELINES_3_2019": "EDPB GL 3/2019",
|
||||
"EDPB_GUIDELINES_5_2020": "EDPB GL 5/2020",
|
||||
"EDPB_GUIDELINES_7_2020": "EDPB GL 7/2020",
|
||||
}
|
||||
|
||||
|
||||
def transform_payload(dsfa_payload: Dict[str, Any]) -> Dict[str, Any]:
|
||||
"""Transform bp_dsfa_corpus payload to bp_legal_corpus format."""
|
||||
source_code = dsfa_payload.get("source_code", "")
|
||||
now = datetime.now(timezone.utc).isoformat()
|
||||
|
||||
return {
|
||||
"text": dsfa_payload.get("content", ""),
|
||||
"regulation_code": source_code,
|
||||
"regulation_name": REGULATION_NAME_MAP.get(source_code, dsfa_payload.get("source_name", "")),
|
||||
"regulation_full_name": dsfa_payload.get("source_name", ""),
|
||||
"regulation_type": REGULATION_TYPE_MAP.get(source_code, "national_law"),
|
||||
"source_url": dsfa_payload.get("source_url", ""),
|
||||
"chunk_index": dsfa_payload.get("chunk_index", 0),
|
||||
"chunk_position": dsfa_payload.get("chunk_position", 0),
|
||||
"article": dsfa_payload.get("article", None),
|
||||
"paragraph": dsfa_payload.get("paragraph", None),
|
||||
"language": dsfa_payload.get("language", "de"),
|
||||
"indexed_at": now,
|
||||
"training_allowed": False,
|
||||
}
|
||||
|
||||
|
||||
def scroll_all_points(client: QdrantClient, collection: str, source_codes: List[str]) -> List:
|
||||
"""Scroll through all points matching the source codes."""
|
||||
all_points = []
|
||||
offset = None
|
||||
batch_size = 100
|
||||
|
||||
scroll_filter = Filter(
|
||||
must=[
|
||||
FieldCondition(
|
||||
key="source_code",
|
||||
match=MatchAny(any=source_codes),
|
||||
)
|
||||
]
|
||||
)
|
||||
|
||||
while True:
|
||||
results, next_offset = client.scroll(
|
||||
collection_name=collection,
|
||||
scroll_filter=scroll_filter,
|
||||
limit=batch_size,
|
||||
offset=offset,
|
||||
with_vectors=True,
|
||||
with_payload=True,
|
||||
)
|
||||
|
||||
all_points.extend(results)
|
||||
|
||||
if next_offset is None:
|
||||
break
|
||||
offset = next_offset
|
||||
|
||||
return all_points
|
||||
|
||||
|
||||
def migrate(execute: bool = False):
|
||||
"""Run the migration."""
|
||||
print(f"{'=' * 60}")
|
||||
print(f"RAG Chunk Migration: {SOURCE_COLLECTION} -> {TARGET_COLLECTION}")
|
||||
print(f"Mode: {'EXECUTE' if execute else 'DRY RUN'}")
|
||||
print(f"{'=' * 60}")
|
||||
print()
|
||||
|
||||
client = QdrantClient(url=QDRANT_URL)
|
||||
|
||||
# Get initial counts
|
||||
source_info = client.get_collection(SOURCE_COLLECTION)
|
||||
target_info = client.get_collection(TARGET_COLLECTION)
|
||||
print(f"Before migration:")
|
||||
print(f" {SOURCE_COLLECTION}: {source_info.points_count} points")
|
||||
print(f" {TARGET_COLLECTION}: {target_info.points_count} points")
|
||||
print()
|
||||
|
||||
# Scroll all points to migrate
|
||||
print(f"Scrolling points for {len(SOURCES_TO_MIGRATE)} source codes...")
|
||||
points = scroll_all_points(client, SOURCE_COLLECTION, SOURCES_TO_MIGRATE)
|
||||
print(f" Found {len(points)} points to migrate")
|
||||
print()
|
||||
|
||||
if not points:
|
||||
print("No points found to migrate. Exiting.")
|
||||
return
|
||||
|
||||
# Group by source_code for reporting
|
||||
by_source: Dict[str, int] = {}
|
||||
for p in points:
|
||||
sc = p.payload.get("source_code", "UNKNOWN")
|
||||
by_source[sc] = by_source.get(sc, 0) + 1
|
||||
|
||||
print("Points per source:")
|
||||
for sc in sorted(by_source.keys()):
|
||||
print(f" {sc}: {by_source[sc]} chunks")
|
||||
print()
|
||||
|
||||
if not execute:
|
||||
print("DRY RUN complete. Use --execute to actually migrate.")
|
||||
return
|
||||
|
||||
# Transform and upsert in batches
|
||||
batch_size = 50
|
||||
upserted = 0
|
||||
for i in range(0, len(points), batch_size):
|
||||
batch = points[i:i + batch_size]
|
||||
new_points = []
|
||||
for p in batch:
|
||||
new_payload = transform_payload(p.payload)
|
||||
new_points.append(PointStruct(
|
||||
id=p.id,
|
||||
vector=p.vector,
|
||||
payload=new_payload,
|
||||
))
|
||||
|
||||
client.upsert(
|
||||
collection_name=TARGET_COLLECTION,
|
||||
points=new_points,
|
||||
)
|
||||
upserted += len(new_points)
|
||||
print(f" Upserted {upserted}/{len(points)} points...")
|
||||
|
||||
print(f"\nUpsert complete: {upserted} points added to {TARGET_COLLECTION}")
|
||||
|
||||
# Delete from source collection
|
||||
point_ids = [p.id for p in points]
|
||||
for i in range(0, len(point_ids), 100):
|
||||
batch_ids = point_ids[i:i + 100]
|
||||
client.delete(
|
||||
collection_name=SOURCE_COLLECTION,
|
||||
points_selector=batch_ids,
|
||||
)
|
||||
print(f" Deleted {min(i + 100, len(point_ids))}/{len(point_ids)} from {SOURCE_COLLECTION}...")
|
||||
|
||||
print(f"\nDelete complete: {len(point_ids)} points removed from {SOURCE_COLLECTION}")
|
||||
|
||||
# Final counts
|
||||
source_info = client.get_collection(SOURCE_COLLECTION)
|
||||
target_info = client.get_collection(TARGET_COLLECTION)
|
||||
print(f"\nAfter migration:")
|
||||
print(f" {SOURCE_COLLECTION}: {source_info.points_count} points")
|
||||
print(f" {TARGET_COLLECTION}: {target_info.points_count} points")
|
||||
print(f"\nMigration complete!")
|
||||
|
||||
|
||||
def verify():
|
||||
"""Verify migration results."""
|
||||
print(f"Verifying migration...")
|
||||
client = QdrantClient(url=QDRANT_URL)
|
||||
|
||||
source_info = client.get_collection(SOURCE_COLLECTION)
|
||||
target_info = client.get_collection(TARGET_COLLECTION)
|
||||
print(f" {SOURCE_COLLECTION}: {source_info.points_count} points")
|
||||
print(f" {TARGET_COLLECTION}: {target_info.points_count} points")
|
||||
|
||||
# Check that migrated sources are gone from dsfa
|
||||
remaining = scroll_all_points(client, SOURCE_COLLECTION, SOURCES_TO_MIGRATE)
|
||||
if remaining:
|
||||
print(f"\n WARNING: {len(remaining)} points still in {SOURCE_COLLECTION}!")
|
||||
by_source: Dict[str, int] = {}
|
||||
for p in remaining:
|
||||
sc = p.payload.get("source_code", "UNKNOWN")
|
||||
by_source[sc] = by_source.get(sc, 0) + 1
|
||||
for sc, cnt in sorted(by_source.items()):
|
||||
print(f" {sc}: {cnt}")
|
||||
else:
|
||||
print(f"\n OK: No migrated sources remaining in {SOURCE_COLLECTION}")
|
||||
|
||||
# Check that migrated sources exist in legal
|
||||
for code in SOURCES_TO_MIGRATE:
|
||||
results, _ = client.scroll(
|
||||
collection_name=TARGET_COLLECTION,
|
||||
scroll_filter=Filter(
|
||||
must=[FieldCondition(key="regulation_code", match=MatchAny(any=[code]))]
|
||||
),
|
||||
limit=1,
|
||||
with_payload=True,
|
||||
with_vectors=False,
|
||||
)
|
||||
status = f"{len(results)}+ chunks" if results else "MISSING"
|
||||
print(f" {TARGET_COLLECTION}/{code}: {status}")
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description="Migrate RAG chunks between collections")
|
||||
parser.add_argument("--execute", action="store_true", help="Actually execute the migration (default: dry run)")
|
||||
parser.add_argument("--verify", action="store_true", help="Verify migration results")
|
||||
args = parser.parse_args()
|
||||
|
||||
if args.verify:
|
||||
verify()
|
||||
else:
|
||||
migrate(execute=args.execute)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user