feat: RAG Chunk Browser — alle Collections + 59 EDPB/WP29/DSFA Eintraege
Some checks failed
CI / go-lint (push) Has been skipped
CI / python-lint (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / test-go-school (push) Successful in 26s
CI / test-go-edu-search (push) Successful in 28s
CI / test-python-klausur (push) Failing after 1m56s
CI / test-python-agent-core (push) Successful in 17s
CI / test-nodejs-website (push) Successful in 18s

- rag-constants.ts: 11 → 59 EDPB/WP29/EDPS + 20 DSFA Muss-Listen
- ChunkBrowserQA: Dropdown von 3 auf 7 Collections erweitert
  (+ bp_dsfa_corpus, bp_compliance_recht, bp_legal_templates, bp_nibis_eh)
- page.tsx: Collection-Totals aktualisiert (datenschutz 17459, dsfa 8666)

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
Benjamin Admin
2026-03-05 11:01:14 +01:00
parent 1cc69d6b5e
commit e6858010c2
3 changed files with 141 additions and 12 deletions

View File

@@ -32,6 +32,10 @@ const COLLECTIONS = [
'bp_compliance_gesetze',
'bp_compliance_ce',
'bp_compliance_datenschutz',
'bp_dsfa_corpus',
'bp_compliance_recht',
'bp_legal_templates',
'bp_nibis_eh',
]
export function ChunkBrowserQA({ apiProxy }: ChunkBrowserQAProps) {

View File

@@ -1384,20 +1384,19 @@ const isInRag = (code: string): boolean => code in REGULATIONS_IN_RAG
// Helper: Get known chunk count for a regulation
const getKnownChunks = (code: string): number => REGULATIONS_IN_RAG[code]?.chunks || 0
// Known collection totals (updated: 2026-02-28)
// Note: bp_compliance_ce and bp_compliance_datenschutz will increase after
// industry compliance ingestion (Machinery Reg, Blue Guide, ENISA, NIST, OECD).
// Update chunk counts after running ingest-industry-compliance.sh.
// Known collection totals (updated: 2026-03-05)
// Note: bp_compliance_datenschutz expanded via edpb-crawler.py (55 EDPB/WP29/EDPS documents).
// bp_dsfa_corpus expanded with 20 DSFA Muss-Listen (BfDI + DSK + 16 Bundeslaender).
const COLLECTION_TOTALS = {
bp_compliance_gesetze: 58304,
bp_compliance_ce: 18183,
bp_legal_templates: 7689,
bp_compliance_datenschutz: 2448,
bp_dsfa_corpus: 7867,
bp_compliance_datenschutz: 17459,
bp_dsfa_corpus: 8666,
bp_compliance_recht: 1425,
bp_nibis_eh: 7996,
total_legal: 76487, // gesetze + ce
total_all: 103912,
total_all: 119722,
}
// License display labels

View File

@@ -109,7 +109,7 @@ export const REGULATIONS_IN_RAG: Record<string, RagRegulationEntry> = {
HU_INFOTV: { collection: 'bp_compliance_gesetze', chunks: 747, qdrant_id: 'hu_info_tv' },
LU_DPA_LAW: { collection: 'bp_compliance_gesetze', chunks: 2, qdrant_id: 'lu_dpa_law' },
// === EDPB Guidelines (bp_compliance_datenschutz) ===
// === EDPB Guidelines (bp_compliance_datenschutz) — alt (ingest-legal-corpus.sh) ===
EDPB_GUIDELINES_5_2020: { collection: 'bp_compliance_datenschutz', chunks: 236, qdrant_id: 'edpb_05_2020' },
EDPB_GUIDELINES_7_2020: { collection: 'bp_compliance_datenschutz', chunks: 347, qdrant_id: 'edpb_guidelines_7_2020' },
EDPB_GUIDELINES_1_2020: { collection: 'bp_compliance_datenschutz', chunks: 337, qdrant_id: 'edpb_01_2020' },
@@ -119,8 +119,73 @@ export const REGULATIONS_IN_RAG: Record<string, RagRegulationEntry> = {
EDPB_GUIDELINES_4_2019: { collection: 'bp_compliance_datenschutz', chunks: 202, qdrant_id: 'edpb_04_2019' },
EDPB_GUIDELINES_9_2022: { collection: 'bp_compliance_datenschutz', chunks: 243, qdrant_id: 'edpb_09_2022' },
EDPB_DPIA_LIST: { collection: 'bp_compliance_datenschutz', chunks: 29, qdrant_id: 'edpb_dpia_list' },
EDPB_LEGITIMATE_INTEREST: { collection: 'bp_compliance_datenschutz', chunks: 336, qdrant_id: 'edpb_legitimate_interest' },
EDPS_DPIA_LIST: { collection: 'bp_compliance_datenschutz', chunks: 35, qdrant_id: 'edps_dpia_list' },
EDPB_LEGITIMATE_INTEREST: { collection: 'bp_compliance_datenschutz', chunks: 672, qdrant_id: 'edpb_legitimate_interest' },
EDPS_DPIA_LIST: { collection: 'bp_compliance_datenschutz', chunks: 73, qdrant_id: 'edps_dpia_list' },
// === EDPB Guidelines (bp_compliance_datenschutz) — neu (edpb-crawler.py) ===
EDPB_ACCESS_01_2022: { collection: 'bp_compliance_datenschutz', chunks: 1020, qdrant_id: 'edpb_access_01_2022' },
EDPB_ARTICLE48_02_2024: { collection: 'bp_compliance_datenschutz', chunks: 158, qdrant_id: 'edpb_article48_02_2024' },
EDPB_BCR_01_2022: { collection: 'bp_compliance_datenschutz', chunks: 384, qdrant_id: 'edpb_bcr_01_2022' },
EDPB_BREACH_09_2022: { collection: 'bp_compliance_datenschutz', chunks: 486, qdrant_id: 'edpb_breach_09_2022' },
EDPB_CERTIFICATION_01_2018: { collection: 'bp_compliance_datenschutz', chunks: 160, qdrant_id: 'edpb_certification_01_2018' },
EDPB_CERTIFICATION_01_2019: { collection: 'bp_compliance_datenschutz', chunks: 160, qdrant_id: 'edpb_certification_01_2019' },
EDPB_CONNECTED_VEHICLES_01_2020: { collection: 'bp_compliance_datenschutz', chunks: 482, qdrant_id: 'edpb_connected_vehicles_01_2020' },
EDPB_CONSENT_05_2020: { collection: 'bp_compliance_datenschutz', chunks: 247, qdrant_id: 'edpb_consent_05_2020' },
EDPB_CONTROLLER_PROCESSOR_07_2020: { collection: 'bp_compliance_datenschutz', chunks: 694, qdrant_id: 'edpb_controller_processor_07_2020' },
EDPB_COOKIE_TASKFORCE_2023: { collection: 'bp_compliance_datenschutz', chunks: 78, qdrant_id: 'edpb_cookie_taskforce_2023' },
EDPB_DARK_PATTERNS_03_2022: { collection: 'bp_compliance_datenschutz', chunks: 413, qdrant_id: 'edpb_dark_patterns_03_2022' },
EDPB_DPBD_04_2019: { collection: 'bp_compliance_datenschutz', chunks: 216, qdrant_id: 'edpb_dpbd_04_2019' },
EDPB_DPIA_LIST_RECOMMENDATION: { collection: 'bp_compliance_datenschutz', chunks: 31, qdrant_id: 'edpb_dpia_list_recommendation' },
EDPB_EPRIVACY_02_2023: { collection: 'bp_compliance_datenschutz', chunks: 188, qdrant_id: 'edpb_eprivacy_02_2023' },
EDPB_FACIAL_RECOGNITION_05_2022: { collection: 'bp_compliance_datenschutz', chunks: 396, qdrant_id: 'edpb_facial_recognition_05_2022' },
EDPB_FINES_04_2022: { collection: 'bp_compliance_datenschutz', chunks: 346, qdrant_id: 'edpb_fines_04_2022' },
EDPB_GEOLOCATION_04_2020: { collection: 'bp_compliance_datenschutz', chunks: 108, qdrant_id: 'edpb_geolocation_04_2020' },
EDPB_GL_2_2019: { collection: 'bp_compliance_datenschutz', chunks: 107, qdrant_id: 'edpb_gl_2_2019' },
EDPB_HEALTH_DATA_03_2020: { collection: 'bp_compliance_datenschutz', chunks: 182, qdrant_id: 'edpb_health_data_03_2020' },
EDPB_LEGAL_BASIS_02_2019: { collection: 'bp_compliance_datenschutz', chunks: 107, qdrant_id: 'edpb_legal_basis_02_2019' },
EDPB_LEGITIMATE_INTEREST_01_2024: { collection: 'bp_compliance_datenschutz', chunks: 336, qdrant_id: 'edpb_legitimate_interest_01_2024' },
EDPB_RTBF_05_2019: { collection: 'bp_compliance_datenschutz', chunks: 111, qdrant_id: 'edpb_rtbf_05_2019' },
EDPB_RRO_09_2020: { collection: 'bp_compliance_datenschutz', chunks: 82, qdrant_id: 'edpb_rro_09_2020' },
EDPB_SOCIAL_MEDIA_08_2020: { collection: 'bp_compliance_datenschutz', chunks: 333, qdrant_id: 'edpb_social_media_08_2020' },
EDPB_TRANSFERS_01_2020: { collection: 'bp_compliance_datenschutz', chunks: 337, qdrant_id: 'edpb_transfers_01_2020' },
EDPB_TRANSFERS_07_2020: { collection: 'bp_compliance_datenschutz', chunks: 337, qdrant_id: 'edpb_transfers_07_2020' },
EDPB_VIDEO_03_2019: { collection: 'bp_compliance_datenschutz', chunks: 204, qdrant_id: 'edpb_video_03_2019' },
EDPB_VVA_02_2021: { collection: 'bp_compliance_datenschutz', chunks: 273, qdrant_id: 'edpb_vva_02_2021' },
// === EDPS Guidance (bp_compliance_datenschutz) ===
EDPS_DIGITAL_ETHICS_2018: { collection: 'bp_compliance_datenschutz', chunks: 404, qdrant_id: 'edps_digital_ethics_2018' },
EDPS_GENAI_ORIENTATIONS_2024: { collection: 'bp_compliance_datenschutz', chunks: 274, qdrant_id: 'edps_genai_orientations_2024' },
// === WP29 Endorsed (bp_compliance_datenschutz) ===
WP242_PORTABILITY: { collection: 'bp_compliance_datenschutz', chunks: 141, qdrant_id: 'wp242_portability' },
WP243_DPO: { collection: 'bp_compliance_datenschutz', chunks: 54, qdrant_id: 'wp243_dpo' },
WP244_PROFILING: { collection: 'bp_compliance_datenschutz', chunks: 247, qdrant_id: 'wp244_profiling' },
WP248_DPIA: { collection: 'bp_compliance_datenschutz', chunks: 288, qdrant_id: 'wp248_dpia' },
WP250_BREACH: { collection: 'bp_compliance_datenschutz', chunks: 201, qdrant_id: 'wp250_breach' },
WP259_CONSENT: { collection: 'bp_compliance_datenschutz', chunks: 496, qdrant_id: 'wp259_consent' },
WP260_TRANSPARENCY: { collection: 'bp_compliance_datenschutz', chunks: 558, qdrant_id: 'wp260_transparency' },
// === DSFA Muss-Listen (bp_dsfa_corpus) ===
DSFA_BFDI_BUND: { collection: 'bp_dsfa_corpus', chunks: 17, qdrant_id: 'dsfa_bfdi_bund' },
DSFA_DSK_GEMEINSAM: { collection: 'bp_dsfa_corpus', chunks: 35, qdrant_id: 'dsfa_dsk_gemeinsam' },
DSFA_BW: { collection: 'bp_dsfa_corpus', chunks: 41, qdrant_id: 'dsfa_bw' },
DSFA_BY: { collection: 'bp_dsfa_corpus', chunks: 35, qdrant_id: 'dsfa_by' },
DSFA_BE_OE: { collection: 'bp_dsfa_corpus', chunks: 31, qdrant_id: 'dsfa_be_oe' },
DSFA_BE_NOE: { collection: 'bp_dsfa_corpus', chunks: 48, qdrant_id: 'dsfa_be_noe' },
DSFA_BB_OE: { collection: 'bp_dsfa_corpus', chunks: 43, qdrant_id: 'dsfa_bb_oe' },
DSFA_BB_NOE: { collection: 'bp_dsfa_corpus', chunks: 53, qdrant_id: 'dsfa_bb_noe' },
DSFA_HB: { collection: 'bp_dsfa_corpus', chunks: 44, qdrant_id: 'dsfa_hb' },
DSFA_HH_OE: { collection: 'bp_dsfa_corpus', chunks: 58, qdrant_id: 'dsfa_hh_oe' },
DSFA_HH_NOE: { collection: 'bp_dsfa_corpus', chunks: 53, qdrant_id: 'dsfa_hh_noe' },
DSFA_MV: { collection: 'bp_dsfa_corpus', chunks: 32, qdrant_id: 'dsfa_mv' },
DSFA_NI: { collection: 'bp_dsfa_corpus', chunks: 47, qdrant_id: 'dsfa_ni' },
DSFA_RP: { collection: 'bp_dsfa_corpus', chunks: 25, qdrant_id: 'dsfa_rp' },
DSFA_SL: { collection: 'bp_dsfa_corpus', chunks: 35, qdrant_id: 'dsfa_sl' },
DSFA_SN: { collection: 'bp_dsfa_corpus', chunks: 18, qdrant_id: 'dsfa_sn' },
DSFA_ST_OE: { collection: 'bp_dsfa_corpus', chunks: 57, qdrant_id: 'dsfa_st_oe' },
DSFA_ST_NOE: { collection: 'bp_dsfa_corpus', chunks: 35, qdrant_id: 'dsfa_st_noe' },
DSFA_SH: { collection: 'bp_dsfa_corpus', chunks: 44, qdrant_id: 'dsfa_sh' },
DSFA_TH: { collection: 'bp_dsfa_corpus', chunks: 48, qdrant_id: 'dsfa_th' },
}
/**
@@ -217,18 +282,79 @@ export const REGULATION_INFO: RegulationInfo[] = [
{ code: 'CZ_ZOU', name: 'Zakon Tschechien', type: 'national_law' },
{ code: 'HU_INFOTV', name: 'Infotv. Ungarn', type: 'national_law' },
{ code: 'LU_DPA_LAW', name: 'Datenschutzgesetz Luxemburg', type: 'national_law' },
// EDPB
// EDPB Guidelines (alt)
{ code: 'EDPB_GUIDELINES_5_2020', name: 'EDPB GL Einwilligung', type: 'eu_guideline' },
{ code: 'EDPB_GUIDELINES_7_2020', name: 'EDPB GL C/P Konzepte', type: 'eu_guideline' },
{ code: 'EDPB_GUIDELINES_1_2020', name: 'EDPB GL Fahrzeuge', type: 'eu_guideline' },
{ code: 'EDPB_GUIDELINES_1_2022', name: 'EDPB GL Bussgelder', type: 'eu_guideline' },
{ code: 'EDPB_GUIDELINES_2_2023', name: 'EDPB GL Art. 37 Scope', type: 'eu_guideline' },
{ code: 'EDPB_GUIDELINES_2_2024', name: 'EDPB GL 2024', type: 'eu_guideline' },
{ code: 'EDPB_GUIDELINES_2_2024', name: 'EDPB GL Art. 48', type: 'eu_guideline' },
{ code: 'EDPB_GUIDELINES_4_2019', name: 'EDPB GL Art. 25 DPbD', type: 'eu_guideline' },
{ code: 'EDPB_GUIDELINES_9_2022', name: 'EDPB GL Datenschutzverletzung', type: 'eu_guideline' },
{ code: 'EDPB_DPIA_LIST', name: 'EDPB DPIA-Liste', type: 'eu_guideline' },
{ code: 'EDPB_LEGITIMATE_INTEREST', name: 'EDPB Berecht. Interesse', type: 'eu_guideline' },
{ code: 'EDPS_DPIA_LIST', name: 'EDPS DPIA-Liste', type: 'eu_guideline' },
// EDPB Guidelines (neu — Crawler)
{ code: 'EDPB_ACCESS_01_2022', name: 'EDPB GL Auskunftsrecht', type: 'eu_guideline' },
{ code: 'EDPB_ARTICLE48_02_2024', name: 'EDPB GL Art. 48', type: 'eu_guideline' },
{ code: 'EDPB_BCR_01_2022', name: 'EDPB GL BCR', type: 'eu_guideline' },
{ code: 'EDPB_BREACH_09_2022', name: 'EDPB GL Datenpannen', type: 'eu_guideline' },
{ code: 'EDPB_CERTIFICATION_01_2018', name: 'EDPB GL Zertifizierung', type: 'eu_guideline' },
{ code: 'EDPB_CERTIFICATION_01_2019', name: 'EDPB GL Zertifizierung 2019', type: 'eu_guideline' },
{ code: 'EDPB_CONNECTED_VEHICLES_01_2020', name: 'EDPB GL Vernetzte Fahrzeuge', type: 'eu_guideline' },
{ code: 'EDPB_CONSENT_05_2020', name: 'EDPB GL Consent', type: 'eu_guideline' },
{ code: 'EDPB_CONTROLLER_PROCESSOR_07_2020', name: 'EDPB GL Verantwortliche/Auftragsverarbeiter', type: 'eu_guideline' },
{ code: 'EDPB_COOKIE_TASKFORCE_2023', name: 'EDPB Cookie-Banner Taskforce', type: 'eu_guideline' },
{ code: 'EDPB_DARK_PATTERNS_03_2022', name: 'EDPB GL Dark Patterns', type: 'eu_guideline' },
{ code: 'EDPB_DPBD_04_2019', name: 'EDPB GL Data Protection by Design', type: 'eu_guideline' },
{ code: 'EDPB_DPIA_LIST_RECOMMENDATION', name: 'EDPB DPIA-Empfehlung', type: 'eu_guideline' },
{ code: 'EDPB_EPRIVACY_02_2023', name: 'EDPB GL ePrivacy', type: 'eu_guideline' },
{ code: 'EDPB_FACIAL_RECOGNITION_05_2022', name: 'EDPB GL Gesichtserkennung', type: 'eu_guideline' },
{ code: 'EDPB_FINES_04_2022', name: 'EDPB GL Bussgeldberechnung', type: 'eu_guideline' },
{ code: 'EDPB_GEOLOCATION_04_2020', name: 'EDPB GL Geolokalisierung', type: 'eu_guideline' },
{ code: 'EDPB_GL_2_2019', name: 'EDPB GL Video-Ueberwachung', type: 'eu_guideline' },
{ code: 'EDPB_HEALTH_DATA_03_2020', name: 'EDPB GL Gesundheitsdaten', type: 'eu_guideline' },
{ code: 'EDPB_LEGAL_BASIS_02_2019', name: 'EDPB GL Rechtsgrundlage Art. 6(1)(b)', type: 'eu_guideline' },
{ code: 'EDPB_LEGITIMATE_INTEREST_01_2024', name: 'EDPB GL Berecht. Interesse 2024', type: 'eu_guideline' },
{ code: 'EDPB_RTBF_05_2019', name: 'EDPB GL Recht auf Vergessenwerden', type: 'eu_guideline' },
{ code: 'EDPB_RRO_09_2020', name: 'EDPB GL Relevant & Reasoned Objection', type: 'eu_guideline' },
{ code: 'EDPB_SOCIAL_MEDIA_08_2020', name: 'EDPB GL Social Media Targeting', type: 'eu_guideline' },
{ code: 'EDPB_TRANSFERS_01_2020', name: 'EDPB GL Uebermittlungen Art. 49', type: 'eu_guideline' },
{ code: 'EDPB_TRANSFERS_07_2020', name: 'EDPB GL Drittlandtransfers', type: 'eu_guideline' },
{ code: 'EDPB_VIDEO_03_2019', name: 'EDPB GL Videoueberwachung', type: 'eu_guideline' },
{ code: 'EDPB_VVA_02_2021', name: 'EDPB GL Virtuelle Sprachassistenten', type: 'eu_guideline' },
// EDPS
{ code: 'EDPS_DIGITAL_ETHICS_2018', name: 'EDPS Digitale Ethik', type: 'eu_guideline' },
{ code: 'EDPS_GENAI_ORIENTATIONS_2024', name: 'EDPS GenAI Orientierungen', type: 'eu_guideline' },
// WP29 Endorsed
{ code: 'WP242_PORTABILITY', name: 'WP242 Datenportabilitaet', type: 'wp29_endorsed' },
{ code: 'WP243_DPO', name: 'WP243 Datenschutzbeauftragter', type: 'wp29_endorsed' },
{ code: 'WP244_PROFILING', name: 'WP244 Profiling', type: 'wp29_endorsed' },
{ code: 'WP248_DPIA', name: 'WP248 DSFA', type: 'wp29_endorsed' },
{ code: 'WP250_BREACH', name: 'WP250 Datenpannen', type: 'wp29_endorsed' },
{ code: 'WP259_CONSENT', name: 'WP259 Einwilligung', type: 'wp29_endorsed' },
{ code: 'WP260_TRANSPARENCY', name: 'WP260 Transparenz', type: 'wp29_endorsed' },
// DSFA Muss-Listen
{ code: 'DSFA_BFDI_BUND', name: 'DSFA BfDI Bund', type: 'dsfa_mussliste' },
{ code: 'DSFA_DSK_GEMEINSAM', name: 'DSFA DSK Gemeinsam', type: 'dsfa_mussliste' },
{ code: 'DSFA_BW', name: 'DSFA Baden-Wuerttemberg', type: 'dsfa_mussliste' },
{ code: 'DSFA_BY', name: 'DSFA Bayern', type: 'dsfa_mussliste' },
{ code: 'DSFA_BE_OE', name: 'DSFA Berlin oeffentlich', type: 'dsfa_mussliste' },
{ code: 'DSFA_BE_NOE', name: 'DSFA Berlin nicht-oeffentlich', type: 'dsfa_mussliste' },
{ code: 'DSFA_BB_OE', name: 'DSFA Brandenburg oeffentlich', type: 'dsfa_mussliste' },
{ code: 'DSFA_BB_NOE', name: 'DSFA Brandenburg nicht-oeffentlich', type: 'dsfa_mussliste' },
{ code: 'DSFA_HB', name: 'DSFA Bremen', type: 'dsfa_mussliste' },
{ code: 'DSFA_HH_OE', name: 'DSFA Hamburg oeffentlich', type: 'dsfa_mussliste' },
{ code: 'DSFA_HH_NOE', name: 'DSFA Hamburg nicht-oeffentlich', type: 'dsfa_mussliste' },
{ code: 'DSFA_MV', name: 'DSFA Mecklenburg-Vorpommern', type: 'dsfa_mussliste' },
{ code: 'DSFA_NI', name: 'DSFA Niedersachsen', type: 'dsfa_mussliste' },
{ code: 'DSFA_RP', name: 'DSFA Rheinland-Pfalz', type: 'dsfa_mussliste' },
{ code: 'DSFA_SL', name: 'DSFA Saarland', type: 'dsfa_mussliste' },
{ code: 'DSFA_SN', name: 'DSFA Sachsen', type: 'dsfa_mussliste' },
{ code: 'DSFA_ST_OE', name: 'DSFA Sachsen-Anhalt oeffentlich', type: 'dsfa_mussliste' },
{ code: 'DSFA_ST_NOE', name: 'DSFA Sachsen-Anhalt nicht-oeffentlich', type: 'dsfa_mussliste' },
{ code: 'DSFA_SH', name: 'DSFA Schleswig-Holstein', type: 'dsfa_mussliste' },
{ code: 'DSFA_TH', name: 'DSFA Thueringen', type: 'dsfa_mussliste' },
// International Standards
{ code: 'NIST_SSDF', name: 'NIST SSDF', type: 'international_standard' },
{ code: 'NIST_CSF_2', name: 'NIST CSF 2.0', type: 'international_standard' },