Compare commits
54 Commits
572052285c
..
main
| Author | SHA1 | Date | |
|---|---|---|---|
| a7850a0296 | |||
| ec3b0e26fd | |||
| 19d1a56df4 | |||
| 3934bdf814 | |||
| dbd44ecc20 | |||
| 93687a32fe | |||
| 2d9fec3a6d | |||
| a6f4ca88a4 | |||
| 297eff949e | |||
| 01e2e0fc4b | |||
| b4043b20b2 | |||
| ad61fd3779 | |||
| d1b55cd65b | |||
| cb46372e52 | |||
| f1814fe8ec | |||
| 12a9fe1810 | |||
| 8b5b9905a7 | |||
| cd23ebc3ba | |||
| f30ac73b79 | |||
| bb85ee2e27 | |||
| 0d5ebcd27a | |||
| 7d721a6787 | |||
| 9a1ad87acd | |||
| 911697bab4 | |||
| 9783657da3 | |||
| 47d7beeb52 | |||
| 63b195c0aa | |||
| 77993d0ea0 | |||
| 9382d2a7a4 | |||
| b727f14011 | |||
| 084beed348 | |||
| 5510689710 | |||
| 49e594bf38 | |||
| 583e54fabc | |||
| 7f4b7da098 | |||
| f3e54180f0 | |||
| ae937a35d7 | |||
| edac3aca6c | |||
| fc4d5d8c56 | |||
| f5d4e3bd95 | |||
| 9e3604fe31 | |||
| 0c09b960b9 | |||
| cf18b1074a | |||
| 2e8cbfff3f | |||
| f6489e7748 | |||
| 519cc274bb | |||
| 79810f4eb8 | |||
| 5f193c8a72 | |||
| d13f4511cb | |||
| 937eca6b77 | |||
| 0c1561d6cc | |||
| 0bb9726ddd | |||
| 8510af46eb | |||
| 81db904b3e |
@@ -25,6 +25,7 @@ voice-service/bqas/** | owner=pipeline | reason=RAG Quality Assessment, produkti
|
||||
# Seed/Helper Scripts (keine Service-Logik)
|
||||
scripts/seed-demo-and-screenshot.py | owner=infra | reason=Einmaliges Seed-Script, kein Service-Code | review=permanent
|
||||
pitch-deck/scripts/import-finanzplan.py | owner=pitch-deck | reason=583 LOC, einmaliges Excel-Import-Script (9 Sheet-Importer), hardcodierte Row/Col-Mappings fuer eine Finanzplan-.xlsm-Datei, keine wiederverwendbare Logik | review=2027-01
|
||||
pitch-deck/scripts/export-finanzplan-excel.ts | owner=pitch-deck | reason=1254 LOC, Excel-Export-Script — analog zu import-finanzplan.py: 9 Sheets, ~80% Cell-Formatting/Styling-Boilerplate, keine wiederverwendbare Logik | review=2027-01
|
||||
|
||||
# PDF Templates (reine statische HTML/CSS Strings, keine Logik)
|
||||
backend-core/services/pdf_templates.py | owner=all | reason=519 LOC, rein statische Jinja2-HTML-Templates + CSS, keine Logik | review=2026-07
|
||||
@@ -33,3 +34,6 @@ backend-core/services/pdf_templates.py | owner=all | reason=519 LOC, rein statis
|
||||
pitch-deck/lib/presenter/presenter-faq.ts | owner=pitch-deck | reason=973 LOC, pure static FAQ array (questions/answers/keywords), no logic | review=2027-01
|
||||
pitch-deck/lib/presenter/presenter-script.ts | owner=pitch-deck | reason=608 LOC, pure static presenter script data + 3 trivial lookup functions | review=2027-01
|
||||
pitch-deck/lib/i18n.ts | owner=pitch-deck | reason=620 LOC, pure DE/EN translation dictionaries + 3 small format helpers | review=2027-01
|
||||
|
||||
# Marketing Website — adapted from pitch-deck USP slide (complex SVG animation, inline styles, no logic to split)
|
||||
marketing-website/components/sections/PlatformBridgeSection.tsx | owner=marketing | reason=816 LOC, adapted 1:1 from pitch-deck USPSlide with SVG animations, CSS keyframes, inline styles — splitting would break animation coherence | review=2027-01
|
||||
|
||||
@@ -41,6 +41,11 @@ backups/*.backup
|
||||
*.mp3
|
||||
*.wav
|
||||
|
||||
# Cloned external legal-source repos (gitignored; pulled fresh at ingest time)
|
||||
legal-sources/bsi-quaidal/
|
||||
legal-sources/bsi-quaidal-src/
|
||||
legal-sources/bsi-grundschutz-plus/
|
||||
|
||||
# Compiled binaries
|
||||
billing-service/billing-service
|
||||
consent-service/server
|
||||
@@ -62,3 +67,7 @@ consent-service/server
|
||||
# Coverage
|
||||
coverage/
|
||||
*.coverage
|
||||
controls_backup_*.dump
|
||||
|
||||
# Allow Finanzplan exports (generated by pitch-deck/scripts/export-finanzplan.sh)
|
||||
!pitch-deck/exports/*.xlsx
|
||||
|
||||
Generated
+2948
File diff suppressed because it is too large
Load Diff
@@ -10,7 +10,7 @@
|
||||
},
|
||||
"dependencies": {
|
||||
"lucide-react": "^0.468.0",
|
||||
"next": "^15.1.0",
|
||||
"next": "^15.5.16",
|
||||
"react": "^18.3.1",
|
||||
"react-dom": "^18.3.1",
|
||||
"reactflow": "^11.11.4",
|
||||
|
||||
@@ -0,0 +1,158 @@
|
||||
# Controls nutzen — Anleitung für andere Sessions
|
||||
|
||||
**Stand:** 2026-05-07, wird laufend aktualisiert
|
||||
**Repo:** breakpilot-core (~/Projekte/breakpilot-core)
|
||||
|
||||
---
|
||||
|
||||
## Was sind die Controls?
|
||||
|
||||
174.497 atomare Compliance-Controls in der Datenbank. Jeder Control ist eine **einzelne prüfbare Anforderung** aus einer Rechtsquelle (DSGVO, NIS2, NIST, AI Act, etc.).
|
||||
|
||||
### Beispiel
|
||||
|
||||
```
|
||||
Control-ID: AUTH-2956-A14
|
||||
Titel: "Implementierung von Multi-Faktor-Authentifizierung prüfen"
|
||||
Objective: "Sicherstellen, dass MFA korrekt implementiert ist..."
|
||||
Merge-Key: "verify:multi_factor_auth:testing"
|
||||
Severity: high
|
||||
```
|
||||
|
||||
## Wo liegen die Controls?
|
||||
|
||||
### Datenbank (PostgreSQL auf Mac Mini)
|
||||
|
||||
```sql
|
||||
-- Alle Controls abfragen
|
||||
SELECT id, control_id, title, objective, severity,
|
||||
source_citation, -- Rechtsquelle (JSON)
|
||||
generation_metadata->>'merge_group_hint' AS merge_key
|
||||
FROM compliance.canonical_controls
|
||||
WHERE release_state NOT IN ('deprecated', 'rejected');
|
||||
```
|
||||
|
||||
**Verbindung:**
|
||||
```bash
|
||||
# Vom MacBook:
|
||||
ssh macmini "/usr/local/bin/docker exec bp-core-postgres psql -U breakpilot -d breakpilot_db"
|
||||
|
||||
# Oder via Control-Pipeline Container:
|
||||
ssh macmini "/usr/local/bin/docker exec bp-core-control-pipeline curl -sf http://127.0.0.1:8098/..."
|
||||
```
|
||||
|
||||
### API (Port 8098, nur via Docker exec erreichbar)
|
||||
|
||||
```bash
|
||||
# Master Controls auflisten
|
||||
ssh macmini "/usr/local/bin/docker exec bp-core-control-pipeline \
|
||||
curl -sf 'http://127.0.0.1:8098/v1/master-controls?limit=50&sort=total_controls'"
|
||||
|
||||
# Master Control Detail mit allen Membern
|
||||
ssh macmini "/usr/local/bin/docker exec bp-core-control-pipeline \
|
||||
curl -sf 'http://127.0.0.1:8098/v1/master-controls/MC-8292'"
|
||||
```
|
||||
|
||||
## Struktur der Controls
|
||||
|
||||
### merge_group_hint (Schlüsselfeld!)
|
||||
|
||||
Jeder Control hat einen `merge_group_hint` im Format `action:object:phase`:
|
||||
|
||||
```
|
||||
implement:encryption:implementation
|
||||
define:access_control:definition
|
||||
monitor:network_security:monitoring
|
||||
report:supervisory_authority:reporting
|
||||
```
|
||||
|
||||
**74 kanonische Object-Tokens** (Stand 2026-05-07):
|
||||
|
||||
| Kategorie | Tokens |
|
||||
|-----------|--------|
|
||||
| **Security** | multi_factor_auth, password_policy, credentials, session_management, privileged_access, access_control, encryption, transport_encryption, key_management, certificate_management, network_security, network_segmentation, firewall, vpn, remote_access, monitoring, audit_logging, siem, alerting, compliance_audit, vulnerability, patch_management, backup, disaster_recovery, physical_security, secure_development, api_security, input_validation, container_security, logging_configuration |
|
||||
| **Data Protection** | personal_data, sensitive_data, health_data, consent, data_subject_rights, data_retention, data_transfer, data_breach_notification, dpia, data_processing_agreement, privacy_by_design, data_processing_register, data_classification, cookie_consent, video_surveillance |
|
||||
| **Governance** | policy, procedure, process, training, awareness, incident, risk_management, third_party_management, change_management, documentation, records_management, compliance_reporting, asset_management, human_resources_security |
|
||||
| **Regulatory** | supervisory_authority, certification, product_safety, ai_system, financial_reporting, aml, whistleblowing, consumer_protection, ecommerce, telecommunications, medical_device, payment_services, critical_infrastructure, supply_chain_due_diligence, sustainability_reporting |
|
||||
|
||||
### Rechtsquellen (source_citation)
|
||||
|
||||
Die **Parent-Controls** (nicht die atomaren!) haben `source_citation`:
|
||||
|
||||
```sql
|
||||
-- Controls mit Rechtsquelle finden
|
||||
SELECT cc.control_id, cc.title,
|
||||
pc.source_citation->>'source' AS regulation,
|
||||
pc.source_citation->>'article' AS article
|
||||
FROM compliance.canonical_controls cc
|
||||
JOIN compliance.canonical_controls pc ON pc.id = cc.parent_control_uuid
|
||||
WHERE pc.source_citation IS NOT NULL
|
||||
AND pc.source_citation->>'source' LIKE '%DSGVO%';
|
||||
```
|
||||
|
||||
148 verschiedene Rechtsquellen (DSGVO, NIS2, NIST, OWASP, BSI, TKG, etc.)
|
||||
|
||||
## Controls filtern (Use Cases)
|
||||
|
||||
### Beispiel: Alle DSGVO Art. 13 Controls (für DSI-Prüfung)
|
||||
|
||||
```sql
|
||||
SELECT cc.control_id, cc.title, cc.objective,
|
||||
cc.generation_metadata->>'merge_group_hint' AS merge_key,
|
||||
pc.source_citation->>'article' AS article
|
||||
FROM compliance.canonical_controls cc
|
||||
JOIN compliance.canonical_controls pc ON pc.id = cc.parent_control_uuid
|
||||
WHERE pc.source_citation->>'source' = 'DSGVO (EU) 2016/679'
|
||||
AND pc.source_citation->>'article' LIKE '%13%'
|
||||
AND cc.release_state NOT IN ('deprecated', 'rejected')
|
||||
ORDER BY cc.control_id;
|
||||
```
|
||||
|
||||
### Beispiel: Alle Encryption-Controls
|
||||
|
||||
```sql
|
||||
SELECT control_id, title, objective
|
||||
FROM compliance.canonical_controls
|
||||
WHERE generation_metadata->>'merge_group_hint' LIKE '%:encryption:%'
|
||||
AND release_state NOT IN ('deprecated', 'rejected');
|
||||
```
|
||||
|
||||
### Beispiel: Controls nach Object-Token filtern
|
||||
|
||||
```sql
|
||||
-- Alle Controls zu einem bestimmten Thema
|
||||
SELECT control_id, title,
|
||||
generation_metadata->>'merge_group_hint' AS merge_key
|
||||
FROM compliance.canonical_controls
|
||||
WHERE generation_metadata->>'merge_group_hint' LIKE '%:data_retention:%'
|
||||
AND release_state NOT IN ('deprecated', 'rejected');
|
||||
```
|
||||
|
||||
## Wichtige Tabellen
|
||||
|
||||
| Tabelle | Rows | Beschreibung |
|
||||
|---------|------|-------------|
|
||||
| `compliance.canonical_controls` | ~294K | Alle Controls (Rich + Atomic) |
|
||||
| `compliance.master_controls` | ~5.329 | Gruppierte Master Controls |
|
||||
| `compliance.master_control_members` | ~172K | Zuordnung Control → MC |
|
||||
| `compliance.object_ontology` | 74 | Kanonische Object-Definitionen |
|
||||
| `compliance.regulation_registry` | 223 | Rechtsquellen-Register |
|
||||
|
||||
## Was gerade passiert (2026-05-07)
|
||||
|
||||
**Phase 2 läuft:** Alle 174K Controls werden per Claude Haiku re-klassifiziert. Die `merge_group_hint` werden von frei-form LLM-Objekten auf 74 kanonische Tokens normalisiert. Danach:
|
||||
- Phase 3: Re-Clustering (gpre1 mit K=20000)
|
||||
- Phase 4: Neue Master Controls (gpre2)
|
||||
- Phase 5: Regulation-Source-Split (gpre3)
|
||||
|
||||
**NICHT ÄNDERN:** `canonical_controls`, `master_controls`, `object_ontology` Tabellen werden aktiv bearbeitet.
|
||||
|
||||
## DB-Zugang Quick Reference
|
||||
|
||||
```bash
|
||||
# Quick Query (eine Zeile)
|
||||
ssh macmini "/usr/local/bin/docker exec bp-core-postgres psql -U breakpilot -d breakpilot_db -c \"SELECT count(*) FROM compliance.canonical_controls\""
|
||||
|
||||
# Interaktive Session
|
||||
ssh macmini "/usr/local/bin/docker exec -it bp-core-postgres psql -U breakpilot -d breakpilot_db"
|
||||
```
|
||||
@@ -1,194 +1,117 @@
|
||||
# Session-Instruktionen: Master Control Qualitaet + Regulation-Source Split
|
||||
# Session-Handover: MC Quality + Gap-Analyse + RAG Ingestion
|
||||
|
||||
**Datum:** 2026-05-06
|
||||
**Fuer:** Naechste Claude-Session
|
||||
**Repo:** breakpilot-core (~/Projekte/breakpilot-core)
|
||||
**Datum:** 2026-05-07 bis 2026-05-11 (5 Tage Marathon)
|
||||
**Repo:** breakpilot-core + breakpilot-compliance
|
||||
|
||||
---
|
||||
|
||||
## NAECHSTER SCHRITT: 25 grosse Master Controls aufsplitten
|
||||
## ERLEDIGT
|
||||
|
||||
### Problem
|
||||
### Master Control Quality Overhaul (Core)
|
||||
- **74.5% → 92.8% Accuracy** (13.588 MCs, 83.073 Members)
|
||||
- Phase 0: Quality Audit mit Claude Sonnet ($3)
|
||||
- Phase 1: Ontologie 31 → 74 Tokens + LLM-Prompt fix
|
||||
- Phase 2: 174K Controls re-klassifiziert via Haiku (10 Batches, ~$50)
|
||||
- Phase 2b: Generic Tokens gefixt (documentation/procedure → echte Themen, $7.54)
|
||||
- Phase 2c: L2 Sub-Topics (2 Runden, 172K Controls, ~$32)
|
||||
- Phase 2d: Bad Subtopics gefixt (stakeholder_*, $0.50)
|
||||
- Phase 3: Re-Clustering K=18704
|
||||
- Phase 4: gpre2 Direct MC (13.588 MCs)
|
||||
- Phase 6: Golden Dataset (20 Controls) + 8 Quality Tests (alle grün)
|
||||
- **Production Sync:** MCs + Members + Hints + doc_check_controls
|
||||
|
||||
25 Master Controls sind zu generisch (>200 Atomic Controls pro MC). Sie basieren auf generischen Security-Domain-Keywords wie "monitoring", "encryption", "personal_data". Embedding-Clustering allein reicht nicht — die Controls handeln zwar alle von "monitoring", aber fuer unterschiedliche Regulierungen (DSGVO, NIS2, NIST, BSI etc.).
|
||||
### doc_check_controls (Core → Production)
|
||||
- **1.874 Controls** über 8 Dokumenttypen (DSE, Cookie, Impressum, AGB, Widerruf, DSFA, AVV, Löschkonzept)
|
||||
- Jeder mit check_question + pass_criteria + fail_criteria
|
||||
- Tabelle `compliance.doc_check_controls` lokal + Production
|
||||
|
||||
### Die 25 betroffenen MCs
|
||||
### RAG Ingestion (Core)
|
||||
- **126 BAuA PDFs** (TRBS/TRGS/ASR): 27.664 Chunks → `bp_compliance_ce`
|
||||
- **OSHA Technical Manual** (23 Kapitel): 7.241 Chunks → `bp_compliance_ce`
|
||||
- **OSHA 1910 Subpart O** (Volltext): 745 Chunks
|
||||
- **EuGH C-588/21 P**: 216 Chunks
|
||||
- **EU 2018/1725**: 842 Chunks → `bp_compliance`
|
||||
- **CE-Obligations extrahiert:** 6.141 Obligations → `/tmp/ce_obligations_v2.json`
|
||||
- Playwright-Crawler für BAuA + OSHA gebaut
|
||||
|
||||
| MC-ID | Name | Controls | Problem |
|
||||
|-------|------|----------|---------|
|
||||
| MC-8292 | monitoring | 6.157 | Alles von Video bis Vulnerability |
|
||||
| MC-2260 | procedure | 4.176 | Generisch |
|
||||
| MC-8302 | alerting | 3.126 | Meldepflichten aller Gesetze gemischt |
|
||||
| MC-8306 | personal_data | 3.057 | DSGVO + NIS2 + AT/CH gemischt |
|
||||
| MC-8312 | training | 2.572 | |
|
||||
| MC-7932 | certificate_management | 2.350 | |
|
||||
| MC-8317 | incident | 2.288 | |
|
||||
| MC-8329 | encryption | 1.790 | |
|
||||
| MC-8333 | audit_logging | 1.645 | |
|
||||
| MC-8321 | policy | 1.463 | |
|
||||
| MC-8325 | patch_management | 1.155 | |
|
||||
| MC-8338 | network_security | 1.071 | |
|
||||
| ... | (13 weitere) | 200-960 | |
|
||||
### Gap-Analyse Engine (Compliance)
|
||||
- **12 Regulierungen** automatisch klassifiziert (CRA, AI Act, NIS2, DSGVO, MiCA, PSD2, AML, etc.)
|
||||
- **IST-Zustand Assessment:** CE-Kennzeichnung, angewandte Normen, bestehende Prozesse, IACE-Projekt-Link
|
||||
- **Norm→Control Mapping:** 20 Normen → MC-Topic Coverage
|
||||
- **Prioritäts-Engine:** Severity × Deadline × Dependency
|
||||
- **5 Branchentemplates:** IoT, Exchange, Cobot, SaaS, Medical
|
||||
- **Frontend:** 2-Step Wizard (Produkt + IST-Zustand) + Dashboard mit Ampel-Status
|
||||
- **API:** 8 Endpoints unter `/sdk/v1/gap/`
|
||||
- **Persistente Projekte:** Speichern + wieder öffnen
|
||||
- **Getestet:** SmartFactory Gateway → 5 Regulierungen, 500 Gaps
|
||||
|
||||
### Loesung: Regulation-Source Split
|
||||
### Tenant Document Upload API (Core)
|
||||
- `POST/GET/DELETE /api/v1/tenant/documents`
|
||||
- Tenant-isolierte Qdrant-Collections
|
||||
- Code fertig, nicht deployed (RAG Service rebuild nötig)
|
||||
|
||||
Statt nur nach Embedding-Aehnlichkeit zu clustern, nach **Regulation-Quelle** aufteilen:
|
||||
|
||||
```
|
||||
MC "encryption" (1.790 Controls)
|
||||
→ encryption_dsgvo (DSGVO Art. 32, ~200)
|
||||
→ encryption_nis2 (NIS2 Art. 21, ~150)
|
||||
→ encryption_nist (NIST SC-13, ~300)
|
||||
→ encryption_bsi (BSI, ~200)
|
||||
→ encryption_owasp (OWASP, ~100)
|
||||
→ encryption_other (~840)
|
||||
```
|
||||
|
||||
### Script-Ansatz
|
||||
|
||||
```python
|
||||
# Fuer jeden der 25 grossen MCs:
|
||||
# 1. Hole alle member controls mit source_citation->>'source'
|
||||
# 2. Gruppiere nach source (Regulation)
|
||||
# 3. Erstelle Sub-MCs pro Regulation
|
||||
# 4. Controls ohne source → "general" Sub-MC
|
||||
```
|
||||
|
||||
### Qualitaetsanforderung (WICHTIG!)
|
||||
|
||||
**Nur "sehr gut" ist akzeptabel.** Mittlere MCs (30-100 Controls) sind bereits excellent:
|
||||
- MC-1082 (data_retention_policies, 52) → perfekt koharent
|
||||
- MC-5477 (austausch_von_cybersicherheitsinformationen, 5) → perfekt
|
||||
|
||||
Ziel: ALLE MCs sollen diese Qualitaet haben. Kein MC >100 Controls.
|
||||
### Master Controls Browser (Compliance)
|
||||
- **Neue Seite** `/sdk/master-controls` — reused Control Library UI
|
||||
- Sidebar-Eintrag zwischen Control Library und Provenance
|
||||
- 13.588 MCs mit allen Filtern, Paginierung, Klick-Detail
|
||||
- Verbindet sich mit Production-DB
|
||||
|
||||
---
|
||||
|
||||
## SESSION 03-06.05.2026 KOMPLETT ERLEDIGT
|
||||
## DB-Tabellen (neu/geändert)
|
||||
|
||||
### Block F (Hardcoded Knowledge → DB)
|
||||
- F1: regulation_registry (223 Eintraege) ✅
|
||||
- F2: action_types (34) + action_synonyms (368) ✅
|
||||
- F3: object_synonyms (320) ✅
|
||||
- F4: LLM Enrichment (+468 Synonyme via Ollama) ✅
|
||||
- F5: Validation (8 Tests, Dicts als Fallback) ✅
|
||||
|
||||
### Control Generation Pipeline
|
||||
- 1.599 Rich Controls aus E-Block Chunks (~$17 Anthropic)
|
||||
- 11.522 Obligations (Pass 0a, ~$4)
|
||||
- 1.147 Atomic Controls (Pass 0b, ~$4.60)
|
||||
- **Gesamtkosten: ~$25.60**
|
||||
|
||||
### Production Sync
|
||||
- 2.625 Controls + 11.522 Obligations auf Production synchronisiert
|
||||
- Production: 294.027 Controls total
|
||||
- Backups: lokal + production auf MacBook
|
||||
|
||||
### Block G-pre (Master Controls)
|
||||
- G-pre1: 144k Objects → 7.753 Gruppen (K-Means k=5000 + Sub-Cluster + Refinement)
|
||||
- G-pre2: 5.329 Master Controls, 172.504+ Members
|
||||
- G-pre3: Master Control API (list, stats, detail)
|
||||
- **Qualitaet:** Kleine/mittlere MCs excellent, 25 grosse MCs brauchen Regulation-Source Split
|
||||
|
||||
### Block G (Compliance Execution Layer)
|
||||
- G1: Decision Trace (decision_traces Tabelle + 6 API Endpoints) ✅
|
||||
- G2: Compliance Commit Ledger (compliance_commits + 5 Endpoints) ✅
|
||||
- G3: Full Decision Memory (decision_events + Timeline + 4 Endpoints) ✅
|
||||
- G4: Pre-Deployment Enforcement (deployment_checks + Override + 4 Endpoints) ✅
|
||||
|
||||
### Infrastruktur
|
||||
- Vault CPU-Fix committed (Marker-File + idempotente Checks)
|
||||
- Pass 0a Endpoint im Core Control-Pipeline registriert
|
||||
- Gitea Timezone-Fix (docker-compose.yml)
|
||||
- 61 neue regulation_ids in regulation_registry
|
||||
- Container-Cleanup (fewo-finance-agent, mediaanalysisd)
|
||||
| Tabelle | Repo | Rows (lokal) | Rows (Production) |
|
||||
|---------|------|-------------|-------------------|
|
||||
| compliance.master_controls | Core | 13.588 | 13.588 |
|
||||
| compliance.master_control_members | Core | 83.073 | 83.073 |
|
||||
| compliance.object_ontology | Core | 74 | 74 |
|
||||
| compliance.object_groups | Core | 16.683 | — |
|
||||
| compliance.doc_check_controls | Core | 1.874 | 1.874 |
|
||||
| compliance.gap_projects | Compliance | 1 | 0 |
|
||||
|
||||
---
|
||||
|
||||
## DB-Tabellen (alle Bloecke)
|
||||
## OFFEN / NÄCHSTE SESSION
|
||||
|
||||
| Tabelle | Rows | Migration |
|
||||
|---------|------|-----------|
|
||||
| compliance.regulation_registry | 223 | 002 |
|
||||
| compliance.action_types | 34 | 003 |
|
||||
| compliance.action_synonyms | 368 | 003 |
|
||||
| compliance.object_synonyms | 320 | 003 |
|
||||
| compliance.object_groups | 7.753 | 004 |
|
||||
| compliance.master_controls | 5.329 | 005 |
|
||||
| compliance.master_control_members | ~170k | 005 |
|
||||
| compliance.decision_traces | 0 (Schema ready) | 006 |
|
||||
| compliance.compliance_commits | 0 (Schema ready) | 007 |
|
||||
| compliance.decision_events | 0 (Schema ready) | 008 |
|
||||
| compliance.deployment_checks | 0 (Schema ready) | 009 |
|
||||
|
||||
---
|
||||
|
||||
## API Endpoints (Core Control-Pipeline, Port 8098)
|
||||
|
||||
### Bestehend
|
||||
- `/v1/canonical/generate/*` — Control Generation Pipeline
|
||||
- `/v1/canonical/generate/run-pass0a` — Pass 0a (NEU in dieser Session)
|
||||
- `/v1/canonical/generate/submit-pass0b` — Pass 0b Batch API
|
||||
|
||||
### Neu (diese Session)
|
||||
- `/v1/master-controls` — G-pre3: Liste, Stats, Detail
|
||||
- `/v1/decision-traces` — G1: CRUD + Stats
|
||||
- `/v1/controls/{id}/full-trace` — G1: Volle Kette
|
||||
- `/v1/compliance-commits` — G2: Commit Ledger
|
||||
- `/v1/decision-events` — G3: Lifecycle Events + Timeline
|
||||
- `/v1/deployment-checks` — G4: Pre-Deploy Gate + Override
|
||||
|
||||
### API-Zugriff (WICHTIG)
|
||||
```bash
|
||||
# Nur via Docker exec (Port 8098 blockiert durch document-crawler)
|
||||
ssh macmini "/usr/local/bin/docker exec bp-core-control-pipeline curl -sf http://127.0.0.1:8098/..."
|
||||
```
|
||||
1. **Orca Deploy-Fix** — Production deployed nicht automatisch (Webhook + docker pull Problem)
|
||||
2. **Gap-Analyse v2 IST-Zustand** — Frontend Step 2 deployed, Backend deployed, aber Orca blockiert
|
||||
3. **Tenant Document Upload** deployen (RAG Service rebuild)
|
||||
4. **Compliance-Repo auf gitea pushen** — aktuell "Everything up-to-date", Orca muss manuell redeployt werden
|
||||
5. **MC-Browser erweitern** — Detail-View mit Member-Controls verbessern
|
||||
|
||||
---
|
||||
|
||||
## BACKUPS (auf MacBook)
|
||||
|
||||
| Datei | Inhalt | Groesse |
|
||||
|-------|--------|---------|
|
||||
| controls_backup_20260505.csv | 1.599 neue Controls | 7.2 MB |
|
||||
| obligations_backup_20260505.csv | 11.522 Obligations | 6.2 MB |
|
||||
| production_backup_20260505.dump | Production komprimiert | 30 MB |
|
||||
| production_backup_20260505_plain.sql | Production plain | 1.3 GB |
|
||||
| local_backup_20260506.dump | Lokale DB komprimiert | ~30 MB |
|
||||
| production_backup_20260506.dump | Production komprimiert | ~30 MB |
|
||||
| Datei | Inhalt |
|
||||
|-------|--------|
|
||||
| `backup_pre_gpre3_20260510.dump` | Vor gpre3 Live-Run (171 MB) |
|
||||
| `backup_session_end_20260511.dump` | Session-Ende |
|
||||
| `production_backup_20260508.dump` | Production nach Phase 2 |
|
||||
| `gpre0_checkpoints_backup_20260508/` | 10 Corrections-JSONs |
|
||||
|
||||
---
|
||||
|
||||
## GESTOPPTE CONTAINER
|
||||
## API-Kosten (Anthropic)
|
||||
|
||||
```bash
|
||||
# Vault: Erst nach Fix-Deploy starten (Marker-File noetig)
|
||||
ssh macmini "/usr/local/bin/docker start bp-core-vault"
|
||||
|
||||
# OpenSearch: Bei Bedarf
|
||||
ssh macmini "/usr/local/bin/docker start bp-lehrer-opensearch"
|
||||
|
||||
# fewo-finance-agent: Fremder Container, nicht starten
|
||||
```
|
||||
| Phase | Modell | Kosten |
|
||||
|-------|--------|--------|
|
||||
| Phase 0: Quality Audit | Sonnet | $2.92 |
|
||||
| Phase 0b: Quality Audit v2 | Sonnet | $5.93 |
|
||||
| Phase 2: 174K Re-Klassifizierung | Haiku | ~$50 |
|
||||
| Phase 2b: Generic Token Fix | Haiku | $7.54 |
|
||||
| Phase 2c: Subtopics R1 | Haiku | $20.22 |
|
||||
| Phase 2c: Subtopics R2 | Haiku | $12.03 |
|
||||
| Phase 2d: Bad Subtopics | Haiku | ~$0.50 |
|
||||
| 5K Test-Run | Sonnet | $5.32 |
|
||||
| doc_check_controls | Haiku | ~$5 |
|
||||
| **Gesamt** | | **~$110** |
|
||||
|
||||
---
|
||||
|
||||
## TESTS
|
||||
## STRATEGISCHE ENTSCHEIDUNGEN (in Memory)
|
||||
|
||||
```bash
|
||||
# Pipeline (454 Tests)
|
||||
PYTHONPATH=control-pipeline python3 -m pytest control-pipeline/tests/ -v
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## OFFENE PUNKTE FUER ANDERE SESSIONS
|
||||
|
||||
1. **Qdrant API-Key** fuer Production (qdrant-dev.breakpilot.ai) ist ungueltig (401). Muss in Coolify erneuert werden.
|
||||
2. **DSI-Check False Positives**: Controls mischen interne Governance mit externen DSI-Anforderungen. Fix: nur Controls mit Art. 13/14 Referenz fuer DSI-Checks nutzen.
|
||||
3. **Spotlight + mediaanalysisd** auf Mac Mini deaktivieren (braucht sudo):
|
||||
```bash
|
||||
sudo mdutil -a -i off
|
||||
sudo launchctl disable system/com.apple.mediaanalysisd
|
||||
```
|
||||
4. **Production DB Sync** fuer neue G-Block Tabellen (decision_traces, compliance_commits, decision_events, deployment_checks) noch ausstehend — Tabellen sind leer, Schema muss auf Production deployed werden.
|
||||
1. **3 Use Cases:** Gap-Analyse (Prio 1), Vendor Risk (Prio 2), Web3/Crypto als Vertikal (Prio 3)
|
||||
2. **Keine Norm-Reproduktion:** Obligation Extraction statt ISO-Texte (juristisch sicher)
|
||||
3. **Regulatory Ingestion Engine:** BAuA/OSHA Crawler als Vorlage für automatisierte Source-Feeds
|
||||
4. **CE-Compliance Crossover:** IACE × Master Controls für Trigger-basierte Compliance-Hinweise
|
||||
|
||||
@@ -0,0 +1,132 @@
|
||||
# Lessons Learned — MC `check_type` Klassifikation (KRITISCH fuer CRA + alle neuen Frameworks)
|
||||
|
||||
Datum: 2026-05-17
|
||||
Auslöser: Compliance-Check BMW lieferte 0/381 Cookie-MCs, 3/75 Impressum-MCs, 43/571 DSE-MCs — alle Doc-Typen unter 20%.
|
||||
|
||||
## TL;DR
|
||||
|
||||
**Die heutigen Master-Controls (MCs) vermischen drei strukturell unterschiedliche Klassen von Pruefungen in einer einzigen Tabelle (`compliance.doc_check_controls`). Nur eine der drei Klassen lässt sich gegen Dokument-Text matchen. Die anderen zwei werden faelschlich als "failed" gezaehlt, weil sie ueberhaupt nicht ueber Text-Matching gepruefbar sind.**
|
||||
|
||||
Bei der CRA-MC-Generierung (laeuft jetzt im Pass 0a mit Haiku) **MUSS** jeder MC ein **`check_type`-Feld** bekommen, bevor er in die Datenbank geht. Sonst wiederholt sich das Problem.
|
||||
|
||||
## Die drei Klassen
|
||||
|
||||
| `check_type` | Pruefungsfrage-Pattern | Beispiel | Wie pruefbar? |
|
||||
|---|---|---|---|
|
||||
| **`text`** | "Enthaelt das Dokument...", "Wird im X die Y benannt?", "Ist im Text aufgelistet..." | "Wird im Impressum die Aufsichtsbehoerde benannt?" | Regex / Embedding-Match gegen Doc-Text |
|
||||
| **`process`** | "Ist sichergestellt...", "Ist implementiert...", "Wird durchgefuehrt..." | "Ist sichergestellt, dass Cookies erst nach Einwilligung gespeichert werden?" | Evidence/TOM-Check — kein Doc-Text vorhanden |
|
||||
| **`review`** | "Sind ALLE / Werden ALLE / Stimmt X mit Y ueberein?" | "Sind alle Verarbeitungszwecke vollstaendig erfasst?" | Mensch (DSB) — Checkliste, nicht automatisch |
|
||||
|
||||
## Befund aus den BMW-Daten
|
||||
|
||||
| Doc-Type | TEXT (matchbar) | PROCESS | UNKLAR/REVIEW | Total | % TEXT |
|
||||
|---|---|---|---|---|---|
|
||||
| cookie | 30 | 49 | 302 | 381 | **8%** |
|
||||
| dse | 72 | 139 | 359 | 571 | **13%** |
|
||||
| impressum | 14 | 14 | 47 | 75 | **19%** |
|
||||
| agb | 24 | 20 | 69 | 113 | 21% |
|
||||
| widerruf | 29 | 26 | 96 | 153 | 19% |
|
||||
| loeschkonzept | 38 | 39 | 232 | 309 | 12% |
|
||||
|
||||
**Selbst mit perfektem Matching liegt die Obergrenze fuer doc_check bei 8-20%**, weil 80-92% der MCs nicht ueber Text-Matching pruefbar sind. Es sind keine "schlechten MCs" — sie sind in der falschen Schublade.
|
||||
|
||||
## Konsequenzen fuer CRA-Generation (Pass 0a)
|
||||
|
||||
### 1. Prompt-Aenderung (Hauptmassnahme)
|
||||
|
||||
Der Pass-0a-Prompt fuer Haiku/Sonnet MUSS pro generiertem Control ein `check_type`-Feld erzwingen. Vorschlag:
|
||||
|
||||
```jsonc
|
||||
{
|
||||
"control_id": "CRA-...-A01",
|
||||
"title": "...",
|
||||
"check_question": "...",
|
||||
"check_type": "text" | "process" | "review", // PFLICHT
|
||||
"rationale_for_check_type": "..."
|
||||
}
|
||||
```
|
||||
|
||||
Klassifikations-Regel im Prompt:
|
||||
|
||||
> Wenn deine `check_question` mit "Enthaelt", "Wird … genannt/aufgelistet/erwaehnt", "Steht im Text" beginnt -> `text`.
|
||||
> Wenn sie mit "Ist sichergestellt", "Ist implementiert", "Wird durchgefuehrt", "Existiert ein Prozess" beginnt -> `process`.
|
||||
> Wenn sie mit "Sind ALLE", "Werden ALLE", "Stimmt X mit Y ueberein" beginnt -> `review`.
|
||||
> Im Zweifel: lieber `review` als `text`.
|
||||
|
||||
### 2. Doc-Type-Zuordnung kritisch validieren
|
||||
|
||||
Bei den heutigen MCs sind viele falsch zugeordnet (z.B. "Bestellbestätigung implementieren" landet im `impressum`-doc_type, gehoert aber zu AGB/Widerruf). Fuer CRA:
|
||||
|
||||
- **`doc_type` darf nur Werte aus einer expliziten Liste annehmen** — pro Regulation festlegen.
|
||||
- Fuer CRA z.B.: `produkt_konformitaetserklaerung`, `risiko_management_dossier`, `sbom`, `cra_dse`, `meldepflichten_doku`.
|
||||
- Falsche Zuordnung im Prompt explizit verbieten: "Wenn der Control nicht eindeutig zu EINEM dieser Doc-Typen passt, setze `doc_type: 'unassigned'` und `check_type: 'review'`."
|
||||
|
||||
### 3. Zwei Tabellen statt einer
|
||||
|
||||
Heutige Architektur:
|
||||
- `compliance.doc_check_controls` <- alle 1874 MCs (mit allem vermischt)
|
||||
|
||||
Empfohlen fuer CRA + Refactor:
|
||||
- `compliance.text_check_controls` <- nur `check_type='text'`
|
||||
- `compliance.process_check_controls` <- nur `check_type='process'`, gepruefte via Evidence/TOM
|
||||
- `compliance.review_checklist_controls` <- nur `check_type='review'`, gepruefte via DSB-Workflow
|
||||
|
||||
Falls Schema-Aenderung nicht moeglich (CLAUDE.md: DB ist frozen), Sidecar-SQLite mit `mc_classification.db` oder neue Spalte als Add-only-Migration.
|
||||
|
||||
### 4. Dedupe-Phase respektieren
|
||||
|
||||
In Pass 0b (Dedup) muss `check_type` ein **Pflicht-Dedupe-Key** sein:
|
||||
- Zwei MCs mit gleicher Aussage aber unterschiedlichem `check_type` sind **nicht** Duplikate — sie pruefen verschiedene Dinge ("ist im Text genannt" vs "ist technisch implementiert").
|
||||
- Heute werden solche faelschlich gemerged → noch mehr Vermischung.
|
||||
|
||||
### 5. Matching-Engine danach umbauen
|
||||
|
||||
Das eigentliche doc-check-Match-System muss nur noch `check_type='text'`-MCs verarbeiten. Andere werden in ihre eigenen Module geroutet:
|
||||
|
||||
- `text` MCs -> `rag_document_checker` (Regex + spaeter Embedding)
|
||||
- `process` MCs -> neuer `evidence_check_runner` (Kunde lieferte Nachweise/TOM hoch)
|
||||
- `review` MCs -> neuer `review_checklist_ui` (DSB beantwortet manuell)
|
||||
|
||||
## Checkliste fuer CRA-Session
|
||||
|
||||
- [ ] Pass-0a-Prompt um `check_type`-Pflichtfeld erweitert (Wortlaut-Regel + Beispiele)
|
||||
- [ ] Pass-0a-Prompt zwingt `doc_type` aus expliziter Whitelist
|
||||
- [ ] Pass-0b-Dedup-Key enthaelt `check_type`
|
||||
- [ ] Output-Validator weist MCs ohne `check_type` zurueck
|
||||
- [ ] DB-Schema (oder Sidecar) hat `check_type`-Spalte mit Default `review` (sicherer Fallback)
|
||||
- [ ] Stichprobe von 50 generierten CRA-MCs vor Bulk-Run: TEXT-Anteil sollte 30-50% sein (mehr als bei den alten DSGVO-MCs, weil CRA stark dokument-fokussiert ist).
|
||||
|
||||
## Update 2026-05-17 — Parallel-CRA-Session-Findings
|
||||
|
||||
Die laufende CRA-Generation hat ein Feld `verification_method` (document/tool/hybrid/code_review/empty), das **NICHT identisch** mit `check_type` ist:
|
||||
|
||||
- `verification_method` fragt: **WAS schaust du dir an?** (Dokument, Tool-Output, Code)
|
||||
- `check_type` fragt: **KANN das per Text-Match geprueft werden?** (text/process/review)
|
||||
|
||||
Ein Control kann `verification_method=document` haben UND trotzdem `check_type=process` sein. Beispiel: "Wird die SBOM regelmaessig (mindestens monatlich) aktualisiert?" — Du schaust ins Dokument SBOM-Historie, prüfst aber einen Prozess. Text-Match findet das nie.
|
||||
|
||||
**Mapping-Heuristik (gut genug fuer 80% der Faelle, Rest LLM):**
|
||||
|
||||
| `verification_method` | Auto-Mapping `check_type` | LLM noetig? |
|
||||
|---|---|---|
|
||||
| `tool` | `process` | nein |
|
||||
| `code_review` | `process` | nein |
|
||||
| empty/null | `review` (sicherer Default) | nein |
|
||||
| `document` | erstmal `text`, Stichprobe pruefen | 10-20% sampling |
|
||||
| `hybrid` | LLM klassifizieren | ja, alle |
|
||||
|
||||
**Idealfall (fuer alle KUENFTIGEN Pass-0a-Generationen — auch CRA falls man nochmal generiert):** Beide Felder gleichzeitig generieren, nicht eins aus dem anderen ableiten.
|
||||
|
||||
## Backfill-Workflow fuer die laufende CRA-Generation
|
||||
|
||||
1. Aktueller Haiku-Job laeuft fertig (kein Restart, kein Verlust)
|
||||
2. Nach Job-Ende: Auto-Mapping fuer eindeutige Buckets (tool/code_review/empty)
|
||||
3. Sonnet-Klassifikation nur fuer `document`+`hybrid` Subset (~62 Calls fuer 1500 Controls, ~$0.05 statt $2)
|
||||
4. Wiederverwenden: `breakpilot-compliance/backend-compliance/scripts/classify_mc_check_type.py` — nur DB-Query anpassen (Source-Tabelle + WHERE-Filter)
|
||||
5. Validierung: TEXT-Anteil bei CRA sollte 40-60% sein (CRA ist dokument-zentrierter als DSGVO-Cookie)
|
||||
|
||||
## Quervewweise
|
||||
|
||||
- BMW-Run-Befund: `breakpilot-compliance` E-Mail vom 2026-05-17, check_id `08bcc9dd`
|
||||
- Bestehender Klassifikations-Skript fuer Retrofit der alten 1874: `backend-compliance/scripts/classify_mc_check_type.py`
|
||||
- Doc-Type-Audit-Query: dieselbe Datei, am Ende
|
||||
@@ -1553,6 +1553,7 @@ async def get_repair_backfill_status(backfill_id: str):
|
||||
class BatchDedupRequest(BaseModel):
|
||||
dry_run: bool = True
|
||||
hint_filter: Optional[str] = None # Only process groups matching this hint prefix
|
||||
since: Optional[str] = None # ISO datetime — scope to controls created at/after this
|
||||
|
||||
|
||||
_batch_dedup_status: dict = {}
|
||||
@@ -1567,7 +1568,15 @@ async def _run_batch_dedup(req: BatchDedupRequest, dedup_id: str):
|
||||
runner = BatchDedupRunner(db)
|
||||
_batch_dedup_status[dedup_id] = {"status": "running", "phase": "starting"}
|
||||
|
||||
stats = await runner.run(dry_run=req.dry_run, hint_filter=req.hint_filter)
|
||||
since_dt = None
|
||||
if req.since:
|
||||
from datetime import datetime
|
||||
since_dt = datetime.fromisoformat(req.since.replace("Z", "+00:00"))
|
||||
stats = await runner.run(
|
||||
dry_run=req.dry_run,
|
||||
hint_filter=req.hint_filter,
|
||||
since=since_dt,
|
||||
)
|
||||
|
||||
_batch_dedup_status[dedup_id] = {
|
||||
"status": "completed",
|
||||
|
||||
@@ -0,0 +1,430 @@
|
||||
source: Derived from BSI QUAIDAL (Clean-Room)
|
||||
source_url: https://github.com/BSI-Bund/QUAIDAL
|
||||
commit_sha: c39b75369841b359c6bf56d6588e3768c722842f
|
||||
plagiarism_limit_4gram: 0.2
|
||||
generated_by_model: qwen3.5:35b-a3b
|
||||
controls:
|
||||
- id: AC-AI-DATA-QB-01-syntaktische-genauigkeit
|
||||
canonical_name: Syntaktische Genauigkeit
|
||||
description: Das KI-Trainingsset muss syntaktisch konsistent sein, wobei alle definierten
|
||||
Grammatik- und Strukturregeln strikt einzuhalten sind. Eine fehlerfreie Datenstruktur
|
||||
ist zwingend erforderlich, um eine korrekte Verarbeitung durch Parser oder Sprachmodelle
|
||||
zu gewährleisten. Die Validierung der formalen Korrektheit ist vor jedem Training
|
||||
durchzuführen, um Verarbeitungsfehler auszuschließen.
|
||||
kind: building_block
|
||||
regulation_anchor: EU AI Act Art. 10 (Datenqualität für Hochrisiko-KI)
|
||||
related_quaidal_ids:
|
||||
- MA-01
|
||||
- MA-02
|
||||
- MA-03
|
||||
- MA-04
|
||||
- MA-05
|
||||
- MA-27
|
||||
external_refs:
|
||||
- framework: BSI AIC4
|
||||
citation: null
|
||||
- framework: ISO/IEC 25012
|
||||
citation: null
|
||||
source:
|
||||
framework: BSI QUAIDAL
|
||||
section: QB-01
|
||||
title_original_de: QB-01 Syntaktische Genauigkeit
|
||||
url: https://github.com/BSI-Bund/QUAIDAL/blob/main/0000_Markdown/0001_Criteria,Measurements,Metrics/0001_Qualitätsbausteine/QB-01_Syntactic%20Accuracy.md
|
||||
commit_sha: c39b75369841b359c6bf56d6588e3768c722842f
|
||||
license_note: § 5 UrhG anwendbar; share:true im Frontmatter; Clean-Room-Ableitung.
|
||||
plagiarism_score_at_generation: 0.0
|
||||
- id: AC-AI-DATA-QB-02-semantische-genauigkeit
|
||||
canonical_name: Semantische Genauigkeit
|
||||
description: Die KI-Trainingsdaten müssen inhaltlich korrekt sein, sodass die zugewiesenen
|
||||
Werte dem tatsächlichen Sachverhalt entsprechen und nicht nur formal valide sind.
|
||||
Es ist sicherzustellen, dass semantische Zuordnungen keine logischen Fehler aufweisen,
|
||||
wie beispielsweise die Klassifizierung von Tieren als technische Geräte. Eine
|
||||
Prüfung muss verifizieren, dass die Bedeutung der Datenpunkte im Kontext der Anwendung
|
||||
eindeutig und fehlerfrei interpretiert werden kann.
|
||||
kind: building_block
|
||||
regulation_anchor: EU AI Act Art. 10 (Datenqualität für Hochrisiko-KI)
|
||||
related_quaidal_ids:
|
||||
- MA-05
|
||||
- MA-06
|
||||
- MA-07
|
||||
- MA-27
|
||||
external_refs:
|
||||
- framework: BSI AIC4
|
||||
citation: null
|
||||
source:
|
||||
framework: BSI QUAIDAL
|
||||
section: QB-02
|
||||
title_original_de: QB-02 Semantische Genauigkeit
|
||||
url: https://github.com/BSI-Bund/QUAIDAL/blob/main/0000_Markdown/0001_Criteria,Measurements,Metrics/0001_Qualitätsbausteine/QB-02_Semantic%20Accuracy.md
|
||||
commit_sha: c39b75369841b359c6bf56d6588e3768c722842f
|
||||
license_note: § 5 UrhG anwendbar; share:true im Frontmatter; Clean-Room-Ableitung.
|
||||
plagiarism_score_at_generation: 0.0
|
||||
- id: AC-AI-DATA-QB-03-vielfalt
|
||||
canonical_name: Vielfalt
|
||||
description: Das KI-Trainingsdatenset muss eine maximale Varianz in den relevanten
|
||||
Merkmalen aufweisen, um die Heterogenität der Eingabewerte zu gewährleisten. Es
|
||||
ist sicherzustellen, dass das Spektrum der enthaltenen Werte breit genug ist,
|
||||
um das Variationspotential der Zielgruppe vollständig abzudecken. Eine Prüfung
|
||||
der Datenverteilung ist vor dem Training durchzuführen, um eine unzureichende
|
||||
Diversität auszuschließen.
|
||||
kind: building_block
|
||||
regulation_anchor: EU AI Act Art. 10 (Datenqualität für Hochrisiko-KI)
|
||||
related_quaidal_ids:
|
||||
- MA-08
|
||||
- MA-09
|
||||
- MA-10
|
||||
- MA-12
|
||||
- MA-27
|
||||
- MA-28
|
||||
external_refs: []
|
||||
source:
|
||||
framework: BSI QUAIDAL
|
||||
section: QB-03
|
||||
title_original_de: QB-03 Vielfalt
|
||||
url: https://github.com/BSI-Bund/QUAIDAL/blob/main/0000_Markdown/0001_Criteria,Measurements,Metrics/0001_Qualitätsbausteine/QB-03_Diversity.md
|
||||
commit_sha: c39b75369841b359c6bf56d6588e3768c722842f
|
||||
license_note: § 5 UrhG anwendbar; share:true im Frontmatter; Clean-Room-Ableitung.
|
||||
plagiarism_score_at_generation: 0.0204
|
||||
- id: AC-AI-DATA-QB-04-ausgewogenheit
|
||||
canonical_name: Ausgewogenheit
|
||||
description: Der Trainingsdatensatz ist so zu konzipieren, dass die Verteilung aller
|
||||
relevanten Klassen proportional zur Zielrealität erfolgt, um eine einseitige Dominanz
|
||||
einzelner Kategorien zu vermeiden. Es ist sicherzustellen, dass keine Gruppe systematisch
|
||||
unter- oder überrepräsentiert wird, um Verzerrungen im Modellverhalten auszuschließen.
|
||||
Die Datenqualität muss durch eine ausgewogene Varianz aller Merkmale gewährleistet
|
||||
werden, um Overfitting und Bias wirksam zu verhindern.
|
||||
kind: building_block
|
||||
regulation_anchor: EU AI Act Art. 10 (Datenqualität für Hochrisiko-KI)
|
||||
related_quaidal_ids:
|
||||
- MA-08
|
||||
- MA-09
|
||||
- MA-10
|
||||
- MA-12
|
||||
- MA-14
|
||||
- MA-27
|
||||
external_refs: []
|
||||
source:
|
||||
framework: BSI QUAIDAL
|
||||
section: QB-04
|
||||
title_original_de: QB-04 Ausgewogenheit
|
||||
url: https://github.com/BSI-Bund/QUAIDAL/blob/main/0000_Markdown/0001_Criteria,Measurements,Metrics/0001_Qualitätsbausteine/QB-04_Balance.md
|
||||
commit_sha: c39b75369841b359c6bf56d6588e3768c722842f
|
||||
license_note: § 5 UrhG anwendbar; share:true im Frontmatter; Clean-Room-Ableitung.
|
||||
plagiarism_score_at_generation: 0.0182
|
||||
- id: AC-AI-DATA-QB-05-umfang
|
||||
canonical_name: Umfang
|
||||
description: Der Trainingsdatensatz muss eine quantitativ ausreichende Anzahl an
|
||||
Datenpunkten aufweisen, um statistisch signifikante Muster zu erfassen und das
|
||||
Risiko von Overfitting zu minimieren. Die Größe der Datenbasis ist so zu dimensionieren,
|
||||
dass sie eine belastbare Analyse der zugrundeliegenden Verteilungen ermöglicht
|
||||
und die Generalisierungsfähigkeit des Modells stabilisiert. Eine Prüfung ist durchzuführen,
|
||||
um sicherzustellen, dass der reine quantitative Umfang die notwendige Basis für
|
||||
eine robuste Modellbildung bildet.
|
||||
kind: building_block
|
||||
regulation_anchor: EU AI Act Art. 10 (Datenqualität für Hochrisiko-KI)
|
||||
related_quaidal_ids:
|
||||
- MA-11
|
||||
- MA-12
|
||||
- MA-15
|
||||
- MA-27
|
||||
external_refs:
|
||||
- framework: BSI AIC4
|
||||
citation: null
|
||||
source:
|
||||
framework: BSI QUAIDAL
|
||||
section: QB-05
|
||||
title_original_de: QB-05 Umfang
|
||||
url: https://github.com/BSI-Bund/QUAIDAL/blob/main/0000_Markdown/0001_Criteria,Measurements,Metrics/0001_Qualitätsbausteine/QB-05_Size.md
|
||||
commit_sha: c39b75369841b359c6bf56d6588e3768c722842f
|
||||
license_note: § 5 UrhG anwendbar; share:true im Frontmatter; Clean-Room-Ableitung.
|
||||
plagiarism_score_at_generation: 0.0161
|
||||
- id: AC-AI-DATA-QB-06-verzerrung
|
||||
canonical_name: Verzerrung
|
||||
description: Das KI-System muss vor dem produktiven Einsatz auf systematische Verzerrungen
|
||||
in den Trainingsdaten und den daraus resultierenden Vorhersagen untersucht werden.
|
||||
Es ist sicherzustellen, dass latente Ungleichbehandlungen quantitativ erfasst
|
||||
und dokumentiert werden, um eine transparente Bewertung der Fairness zu ermöglichen.
|
||||
Die Prüfung umfasst die Identifikation von Abweichungen, die auf unausgewogene
|
||||
Datenverteilungen zurückzuführen sind, bevor das Modell für reale Anwendungen
|
||||
freigegeben wird.
|
||||
kind: building_block
|
||||
regulation_anchor: EU AI Act Art. 10 (Datenqualität für Hochrisiko-KI)
|
||||
related_quaidal_ids:
|
||||
- MA-01
|
||||
- MA-02
|
||||
- MA-03
|
||||
- MA-04
|
||||
- MA-06
|
||||
- MA-07
|
||||
- MA-08
|
||||
- MA-09
|
||||
- MA-10
|
||||
- MA-11
|
||||
- MA-12
|
||||
- MA-13
|
||||
- MA-14
|
||||
- MA-15
|
||||
- MA-16
|
||||
- MA-17
|
||||
- MA-18
|
||||
- MA-20
|
||||
- MA-23
|
||||
- MA-24
|
||||
- MA-27
|
||||
- MA-28
|
||||
- QB-15
|
||||
- QM-11
|
||||
external_refs: []
|
||||
source:
|
||||
framework: BSI QUAIDAL
|
||||
section: QB-06
|
||||
title_original_de: QB-06 Verzerrung
|
||||
url: https://github.com/BSI-Bund/QUAIDAL/blob/main/0000_Markdown/0001_Criteria,Measurements,Metrics/0001_Qualitätsbausteine/QB-06_Bias-Detektion.md
|
||||
commit_sha: c39b75369841b359c6bf56d6588e3768c722842f
|
||||
license_note: § 5 UrhG anwendbar; share:true im Frontmatter; Clean-Room-Ableitung.
|
||||
plagiarism_score_at_generation: 0.0
|
||||
- id: AC-AI-DATA-QB-07-gesamtheit
|
||||
canonical_name: Gesamtheit
|
||||
description: Das Trainingsdatenset muss sämtliche für das spezifische Anwendungsszenario
|
||||
definierten Attribute und Entitätsinstanzen vollständig enthalten, um die Anforderung
|
||||
der Gesamtheit zu erfüllen. Diese Vollständigkeit ist auf der Ebene des gesamten
|
||||
Datensatzes, einzelner Spalten oder einzelner Datenpunkte nachweisbar zu prüfen.
|
||||
Die Bewertung der Datenqualität erfolgt stets kontextbezogen unter Berücksichtigung
|
||||
der jeweiligen Nutzungszwecke.
|
||||
kind: building_block
|
||||
regulation_anchor: EU AI Act Art. 10 (Datenqualität für Hochrisiko-KI)
|
||||
related_quaidal_ids:
|
||||
- MA-12
|
||||
- MA-13
|
||||
- MA-27
|
||||
external_refs: []
|
||||
source:
|
||||
framework: BSI QUAIDAL
|
||||
section: QB-07
|
||||
title_original_de: QB-07 Gesamtheit
|
||||
url: https://github.com/BSI-Bund/QUAIDAL/blob/main/0000_Markdown/0001_Criteria,Measurements,Metrics/0001_Qualitätsbausteine/QB-07_Totality.md
|
||||
commit_sha: c39b75369841b359c6bf56d6588e3768c722842f
|
||||
license_note: § 5 UrhG anwendbar; share:true im Frontmatter; Clean-Room-Ableitung.
|
||||
plagiarism_score_at_generation: 0.0
|
||||
- id: AC-AI-DATA-QB-08-konsistenzsicherung
|
||||
canonical_name: Konsistenzsicherung
|
||||
description: Die Konsistenz der KI-Trainingsdaten ist durch standardisierte Datentypen
|
||||
und formatierte Attribute über den gesamten Lebenszyklus sicherzustellen. Automatisierte
|
||||
Prüfmechanismen müssen Abweichungen in den Datenwerten sowie zeitlichen Verläufen
|
||||
frühzeitig identifizieren, um nachvollziehbare Transformations- oder Imputationsmaßnahmen
|
||||
einzuleiten. Eine einheitliche Datenstruktur ist zwingend erforderlich, um die
|
||||
Integrität der Trainingsbasis für valide Modellentscheidungen zu gewährleisten.
|
||||
kind: building_block
|
||||
regulation_anchor: EU AI Act Art. 10 (Datenqualität für Hochrisiko-KI)
|
||||
related_quaidal_ids:
|
||||
- MA-01
|
||||
- MA-02
|
||||
- MA-03
|
||||
external_refs:
|
||||
- framework: ISO/IEC 25012
|
||||
citation: null
|
||||
- framework: BSI AIC4
|
||||
citation: null
|
||||
source:
|
||||
framework: BSI QUAIDAL
|
||||
section: QB-08
|
||||
title_original_de: QB-08 Konsistenzsicherung
|
||||
url: https://github.com/BSI-Bund/QUAIDAL/blob/main/0000_Markdown/0001_Criteria,Measurements,Metrics/0001_Qualitätsbausteine/QB-08_ConsistencyAssurance.md
|
||||
commit_sha: c39b75369841b359c6bf56d6588e3768c722842f
|
||||
license_note: § 5 UrhG anwendbar; share:true im Frontmatter; Clean-Room-Ableitung.
|
||||
plagiarism_score_at_generation: 0.0
|
||||
- id: AC-AI-DATA-QB-09-quellenmanagement
|
||||
canonical_name: Quellenmanagement
|
||||
description: Die Organisation muss einen durchgängigen Mechanismus implementieren,
|
||||
der die Herkunft und den Verarbeitungsweg jeder Trainingsdaten-Einheit lückenlos
|
||||
dokumentiert. Es ist sicherzustellen, dass jeder Datenpunkt mit seinem Ursprung
|
||||
sowie allen nachfolgenden Transformationsschritten verknüpft bleibt, um die Integrität
|
||||
der KI-Datenbasis zu gewährleisten. Zusätzlich sind alle Zugriffe und Modifikationen
|
||||
in einem unveränderlichen Protokoll chronologisch festzuhalten, um einen vollständigen
|
||||
Audit-Trail für Compliance-Prüfungen zu schaffen.
|
||||
kind: building_block
|
||||
regulation_anchor: EU AI Act Art. 10 (Datenqualität für Hochrisiko-KI)
|
||||
related_quaidal_ids:
|
||||
- MA-18
|
||||
- MA-19
|
||||
- MA-20
|
||||
- MA-22
|
||||
external_refs:
|
||||
- framework: BSI AIC4
|
||||
citation: null
|
||||
- framework: AI Act
|
||||
citation: null
|
||||
source:
|
||||
framework: BSI QUAIDAL
|
||||
section: QB-09
|
||||
title_original_de: QB-09 Quellenmanagement
|
||||
url: https://github.com/BSI-Bund/QUAIDAL/blob/main/0000_Markdown/0001_Criteria,Measurements,Metrics/0001_Qualitätsbausteine/QB-09_Sourcemanagement.md
|
||||
commit_sha: c39b75369841b359c6bf56d6588e3768c722842f
|
||||
license_note: § 5 UrhG anwendbar; share:true im Frontmatter; Clean-Room-Ableitung.
|
||||
plagiarism_score_at_generation: 0.0167
|
||||
- id: AC-AI-DATA-QB-10-datenpruefung
|
||||
canonical_name: _Datenprüfung
|
||||
description: Vor der Initialisierung des Trainingsprozesses ist eine systematische
|
||||
Validierung der Eingangsdaten auf Vollständigkeit, Konsistenz und Integrität durchzuführen.
|
||||
Dabei sind Unregelmäßigkeiten wie fehlende Werte, formatinkonsistenzen oder statistische
|
||||
Ausreißer zu identifizieren und zu bereinigen. Das System muss sicherstellen,
|
||||
dass keine verzerrten oder fehlerhaften Datensätze das Modelltraining beeinträchtigen
|
||||
und die Datenqualität den definierten Qualitätsstandards entspricht.
|
||||
kind: building_block
|
||||
regulation_anchor: EU AI Act Art. 10 (Datenqualität für Hochrisiko-KI)
|
||||
related_quaidal_ids:
|
||||
- MA-05
|
||||
- MA-20
|
||||
- MA-26
|
||||
external_refs: []
|
||||
source:
|
||||
framework: BSI QUAIDAL
|
||||
section: QB-10
|
||||
title_original_de: QB-10_Datenprüfung
|
||||
url: https://github.com/BSI-Bund/QUAIDAL/blob/main/0000_Markdown/0001_Criteria,Measurements,Metrics/0001_Qualitätsbausteine/QB-10_DataChecks.md
|
||||
commit_sha: c39b75369841b359c6bf56d6588e3768c722842f
|
||||
license_note: § 5 UrhG anwendbar; share:true im Frontmatter; Clean-Room-Ableitung.
|
||||
plagiarism_score_at_generation: 0.0204
|
||||
- id: AC-AI-DATA-QB-11-prozesse
|
||||
canonical_name: Prozesse
|
||||
description: Es ist sicherzustellen, dass jeder Schritt der Datenvorbereitung und
|
||||
-verarbeitung für KI-Trainingszwecke lückenlos protokolliert wird, um die vollständige
|
||||
Nachvollziehbarkeit der Datenherkunft und aller Transformationen zu gewährleisten.
|
||||
Diese Dokumentation muss so strukturiert sein, dass sie eine valide Reproduzierbarkeit
|
||||
der Modelle sowie eine fundierte Qualitätssicherung der zugrundeliegenden Datensätze
|
||||
ermöglicht. Durch die Erfassung aller Änderungsereignisse wird die Integrität
|
||||
der Trainingsdaten über den gesamten Lebenszyklus hinweg verifiziert.
|
||||
kind: building_block
|
||||
regulation_anchor: EU AI Act Art. 10 (Datenqualität für Hochrisiko-KI)
|
||||
related_quaidal_ids:
|
||||
- MA-18
|
||||
- MA-21
|
||||
external_refs:
|
||||
- framework: BSI Grundschutz
|
||||
citation: null
|
||||
- framework: ISO/IEC 23894
|
||||
citation: null
|
||||
- framework: ISO/IEC 42001
|
||||
citation: null
|
||||
- framework: AI Act
|
||||
citation: null
|
||||
source:
|
||||
framework: BSI QUAIDAL
|
||||
section: QB-11
|
||||
title_original_de: QB-11 Prozesse
|
||||
url: https://github.com/BSI-Bund/QUAIDAL/blob/main/0000_Markdown/0001_Criteria,Measurements,Metrics/0001_Qualitätsbausteine/QB-11_Processes.md
|
||||
commit_sha: c39b75369841b359c6bf56d6588e3768c722842f
|
||||
license_note: § 5 UrhG anwendbar; share:true im Frontmatter; Clean-Room-Ableitung.
|
||||
plagiarism_score_at_generation: 0.0
|
||||
- id: AC-AI-DATA-QB-12-merkmalsentwicklung
|
||||
canonical_name: Merkmalsentwicklung
|
||||
description: Die Erstellung und Auswahl von Eingangsmerkmalen für KI-Modelle ist
|
||||
so zu gestalten, dass sie signifikante Korrelationen zur Zielgröße aufweisen und
|
||||
redundante Informationen eliminieren. Es ist sicherzustellen, dass die transformierten
|
||||
Daten generalisierbar sind und eine hohe Informationsdichte für neue, unbekannte
|
||||
Datensätze bieten. Eine Validierung muss nachweisen, dass die abgeleiteten Merkmale
|
||||
die Interpretierbarkeit des Modells unterstützen und keine unnötige Komplexität
|
||||
verursachen.
|
||||
kind: building_block
|
||||
regulation_anchor: EU AI Act Art. 10 (Datenqualität für Hochrisiko-KI)
|
||||
related_quaidal_ids:
|
||||
- MA-01
|
||||
- MA-02
|
||||
- MA-03
|
||||
- MA-06
|
||||
- MA-12
|
||||
- MA-14
|
||||
- MA-17
|
||||
- MA-23
|
||||
- MA-24
|
||||
- MA-27
|
||||
external_refs: []
|
||||
source:
|
||||
framework: BSI QUAIDAL
|
||||
section: QB-12
|
||||
title_original_de: QB-12 Merkmalsentwicklung
|
||||
url: https://github.com/BSI-Bund/QUAIDAL/blob/main/0000_Markdown/0001_Criteria,Measurements,Metrics/0001_Qualitätsbausteine/QB-12_FeatureEngineering.md
|
||||
commit_sha: c39b75369841b359c6bf56d6588e3768c722842f
|
||||
license_note: § 5 UrhG anwendbar; share:true im Frontmatter; Clean-Room-Ableitung.
|
||||
plagiarism_score_at_generation: 0.0
|
||||
- id: AC-AI-DATA-QB-13-datenvorbereitung
|
||||
canonical_name: Datenvorbereitung
|
||||
description: Vor der Initialisierung des Trainingsprozesses sind alle Rohdaten durch
|
||||
definierte Transformationen in eine qualitätsgeprüfte und für das Modell verarbeitbare
|
||||
Struktur zu überführen. Es ist sicherzustellen, dass jede angewandte Datenaufbereitung
|
||||
die Integrität der Trainingsmenge gewährleistet und keine nicht validierten Artefakte
|
||||
in das Lernsystem einfließen. Die Durchführbarkeit dieser Schritte ist vor dem
|
||||
Start der Modellkonvergenz durch systematische Prüfverfahren nachzuweisen.
|
||||
kind: building_block
|
||||
regulation_anchor: EU AI Act Art. 10 (Datenqualität für Hochrisiko-KI)
|
||||
related_quaidal_ids:
|
||||
- MA-02
|
||||
- MA-03
|
||||
- MA-04
|
||||
- MA-13
|
||||
- MA-14
|
||||
- MA-16
|
||||
- MA-17
|
||||
- MA-23
|
||||
- MA-24
|
||||
- MA-25
|
||||
- MA-27
|
||||
- MA-29
|
||||
external_refs: []
|
||||
source:
|
||||
framework: BSI QUAIDAL
|
||||
section: QB-13
|
||||
title_original_de: QB-13 Datenvorbereitung
|
||||
url: https://github.com/BSI-Bund/QUAIDAL/blob/main/0000_Markdown/0001_Criteria,Measurements,Metrics/0001_Qualitätsbausteine/QB-13_DataPreparation.md
|
||||
commit_sha: c39b75369841b359c6bf56d6588e3768c722842f
|
||||
license_note: § 5 UrhG anwendbar; share:true im Frontmatter; Clean-Room-Ableitung.
|
||||
plagiarism_score_at_generation: 0.0
|
||||
- id: AC-AI-DATA-QB-14-expertanalysis
|
||||
canonical_name: _Expertanalysis
|
||||
description: Die Qualität der KI-Trainingsdaten ist durch eine unabhängige, manuelle
|
||||
Begutachtung durch qualifiziertes Fachpersonal zu validieren. Dabei sind mehrere
|
||||
Prüfer eigenständig einzusetzen, um subjektive Verzerrungen und Gruppenkonformitätseffekte
|
||||
bei der Bewertung auszuschließen. Die Ergebnisse dieser fachlichen Analyse müssen
|
||||
anonymisiert zusammengeführt werden, um eine objektive Beurteilung der Datensatzqualität
|
||||
zu gewährleisten.
|
||||
kind: building_block
|
||||
regulation_anchor: EU AI Act Art. 10 (Datenqualität für Hochrisiko-KI)
|
||||
related_quaidal_ids:
|
||||
- MA-06
|
||||
- MA-10
|
||||
- MA-14
|
||||
- MA-15
|
||||
- MA-21
|
||||
- MA-22
|
||||
external_refs: []
|
||||
source:
|
||||
framework: BSI QUAIDAL
|
||||
section: QB-14
|
||||
title_original_de: QB-14_Expertanalysis
|
||||
url: https://github.com/BSI-Bund/QUAIDAL/blob/main/0000_Markdown/0001_Criteria,Measurements,Metrics/0001_Qualitätsbausteine/QB-14_Expertanalysis.md
|
||||
commit_sha: c39b75369841b359c6bf56d6588e3768c722842f
|
||||
license_note: § 5 UrhG anwendbar; share:true im Frontmatter; Clean-Room-Ableitung.
|
||||
plagiarism_score_at_generation: 0.0
|
||||
- id: AC-AI-DATA-QB-15-bias-mitigation
|
||||
canonical_name: Bias-Mitigation
|
||||
description: Das System muss technische Mechanismen implementieren, um systematische
|
||||
Verzerrungen in den Trainingsdaten oder während des Lernprozesses zu identifizieren
|
||||
und zu kompensieren. Diese Maßnahmen sind unabhängig vom Entwicklungsstadium anzuwenden,
|
||||
wobei Datenanpassungen vor dem Training, Regularisierungsverfahren während des
|
||||
Lernens oder Korrekturen der Ausgabeergebnisse nach dem Training möglich sind.
|
||||
Eine Prüfung der Fairness-Kriterien ist vor der Freigabe des Modells durchzuführen,
|
||||
um sicherzustellen, dass keine diskriminierenden Muster in den Ergebnissen verbleiben.
|
||||
kind: building_block
|
||||
regulation_anchor: EU AI Act Art. 10 (Datenqualität für Hochrisiko-KI)
|
||||
related_quaidal_ids:
|
||||
- MA-30
|
||||
- QM-57
|
||||
external_refs: []
|
||||
source:
|
||||
framework: BSI QUAIDAL
|
||||
section: QB-15
|
||||
title_original_de: QB-15 Bias-Mitigation
|
||||
url: https://github.com/BSI-Bund/QUAIDAL/blob/main/0000_Markdown/0001_Criteria,Measurements,Metrics/0001_Qualitätsbausteine/QB-15_Bias-Mitigation.md
|
||||
commit_sha: c39b75369841b359c6bf56d6588e3768c722842f
|
||||
license_note: § 5 UrhG anwendbar; share:true im Frontmatter; Clean-Room-Ableitung.
|
||||
plagiarism_score_at_generation: 0.0
|
||||
@@ -0,0 +1,280 @@
|
||||
source: Derived from BSI QUAIDAL (Clean-Room)
|
||||
source_url: https://github.com/BSI-Bund/QUAIDAL
|
||||
commit_sha: c39b75369841b359c6bf56d6588e3768c722842f
|
||||
plagiarism_limit_4gram: 0.2
|
||||
generated_by_model: qwen3.5:35b-a3b
|
||||
controls:
|
||||
- id: MC-AI-DATA-QKB-01-repraesentativitaet
|
||||
canonical_name: Repräsentativität
|
||||
description: Der Trainingsdatensatz muss die statistische Verteilung der Zielpopulation
|
||||
exakt abbilden, um systematische Verzerrungen im Modell zu vermeiden. Es ist sicherzustellen,
|
||||
dass alle relevanten Merkmalsausprägungen in ausreichender Häufigkeit und ohne
|
||||
Über- oder Unterrepräsentation vorliegen. Die Datenmenge ist so zu dimensionieren,
|
||||
dass eine robuste Generalisierungsfähigkeit für alle Subgruppen der Gesamtpopulation
|
||||
gewährleistet wird. Eine Prüfung auf Stichprobenqualität ist vor dem Training
|
||||
durchzuführen.
|
||||
kind: criterion
|
||||
regulation_anchor: EU AI Act Art. 10 (Datenqualität für Hochrisiko-KI)
|
||||
related_quaidal_ids:
|
||||
- QB-03
|
||||
- QB-04
|
||||
- QB-05
|
||||
- QB-06
|
||||
- QB-15
|
||||
external_refs:
|
||||
- framework: AI Act
|
||||
citation: Artikel 10
|
||||
- framework: ISO/IEC 25012
|
||||
citation: null
|
||||
source:
|
||||
framework: BSI QUAIDAL
|
||||
section: QKB-01
|
||||
title_original_de: QKB-01 Repräsentativität
|
||||
url: https://github.com/BSI-Bund/QUAIDAL/blob/main/0000_Markdown/0001_Criteria,Measurements,Metrics/0000_Qualitätskriterien/QKB-01_Representativity.md
|
||||
commit_sha: c39b75369841b359c6bf56d6588e3768c722842f
|
||||
license_note: § 5 UrhG anwendbar; share:true im Frontmatter; Clean-Room-Ableitung.
|
||||
plagiarism_score_at_generation: 0.0
|
||||
- id: MC-AI-DATA-QKB-02-vollstaendigkeit
|
||||
canonical_name: Vollständigkeit
|
||||
description: Der Datensatz muss sämtliche für das spezifische KI-Modell erwarteten
|
||||
Attribute und Merkmalsausprägungen lückenlos beinhalten. Es ist sicherzustellen,
|
||||
dass keine Entitätsinstanzen fehlen und alle definierten Merkmale mit Werten belegt
|
||||
sind. Eine Prüfung auf fehlende Werte oder unvollständige Attributmengen ist vor
|
||||
dem Training zwingend durchzuführen, um Verzerrungen zu vermeiden.
|
||||
kind: criterion
|
||||
regulation_anchor: EU AI Act Art. 10 (Datenqualität für Hochrisiko-KI)
|
||||
related_quaidal_ids:
|
||||
- QB-07
|
||||
- QB-09
|
||||
external_refs:
|
||||
- framework: AI Act
|
||||
citation: Artikel 10
|
||||
- framework: BSI AIC4
|
||||
citation: null
|
||||
- framework: ISO/IEC 25012
|
||||
citation: null
|
||||
- framework: ISO/IEC 25024
|
||||
citation: null
|
||||
source:
|
||||
framework: BSI QUAIDAL
|
||||
section: QKB-02
|
||||
title_original_de: QKB-02 Vollständigkeit
|
||||
url: https://github.com/BSI-Bund/QUAIDAL/blob/main/0000_Markdown/0001_Criteria,Measurements,Metrics/0000_Qualitätskriterien/QKB-02_Completeness.md
|
||||
commit_sha: c39b75369841b359c6bf56d6588e3768c722842f
|
||||
license_note: § 5 UrhG anwendbar; share:true im Frontmatter; Clean-Room-Ableitung.
|
||||
plagiarism_score_at_generation: 0.0
|
||||
- id: MC-AI-DATA-QKB-03-genauigkeit
|
||||
canonical_name: Genauigkeit
|
||||
description: Die Integrität der KI-Trainingsdaten erfordert, dass jeder einzelne
|
||||
Datenelementwert eine definierte numerische oder symbolische Übereinstimmung mit
|
||||
dem referenzierten Sollwert aufweist. Es ist sicherzustellen, dass Abweichungen
|
||||
innerhalb festgelegter Toleranzgrenzen bezüglich Rundung, Formatierung und Messauflösung
|
||||
bleiben. Die Einhaltung dieser Spezifikation ist durch automatisierte Prüfverfahren
|
||||
vor jedem Trainingslauf zu verifizieren.
|
||||
kind: criterion
|
||||
regulation_anchor: EU AI Act Art. 10 (Datenqualität für Hochrisiko-KI)
|
||||
related_quaidal_ids:
|
||||
- QB-01
|
||||
- QB-02
|
||||
external_refs:
|
||||
- framework: ISO/IEC 25012
|
||||
citation: null
|
||||
source:
|
||||
framework: BSI QUAIDAL
|
||||
section: QKB-03
|
||||
title_original_de: QKB-03 Genauigkeit
|
||||
url: https://github.com/BSI-Bund/QUAIDAL/blob/main/0000_Markdown/0001_Criteria,Measurements,Metrics/0000_Qualitätskriterien/QKB-03_Accuracy.md
|
||||
commit_sha: c39b75369841b359c6bf56d6588e3768c722842f
|
||||
license_note: § 5 UrhG anwendbar; share:true im Frontmatter; Clean-Room-Ableitung.
|
||||
plagiarism_score_at_generation: 0.0
|
||||
- id: MC-AI-DATA-QKB-04-konsistenz
|
||||
canonical_name: Konsistenz
|
||||
description: Das System muss sicherstellen, dass alle Eingabedaten für das KI-Training
|
||||
logisch kohärent und frei von internen Widersprüchen sind. Einheitliche Kodierungen
|
||||
für Kategorien sowie konsistente Formatierungen sind zwingend erforderlich, um
|
||||
eine fehlerfreie Generalisierung durch das Modell zu ermöglichen. Jede Abweichung
|
||||
von den definierten Datenstandards ist durch automatische Prüfmechanismen zu identifizieren
|
||||
und zu unterbinden.
|
||||
kind: criterion
|
||||
regulation_anchor: EU AI Act Art. 10 (Datenqualität für Hochrisiko-KI)
|
||||
related_quaidal_ids:
|
||||
- QB-02
|
||||
- QB-07
|
||||
- QB-08
|
||||
- QB-10
|
||||
- QB-11
|
||||
- QB-12
|
||||
external_refs:
|
||||
- framework: ISO/IEC 25012
|
||||
citation: null
|
||||
source:
|
||||
framework: BSI QUAIDAL
|
||||
section: QKB-04
|
||||
title_original_de: QKB-04 Konsistenz
|
||||
url: https://github.com/BSI-Bund/QUAIDAL/blob/main/0000_Markdown/0001_Criteria,Measurements,Metrics/0000_Qualitätskriterien/QKB-04_Consistency.md
|
||||
commit_sha: c39b75369841b359c6bf56d6588e3768c722842f
|
||||
license_note: § 5 UrhG anwendbar; share:true im Frontmatter; Clean-Room-Ableitung.
|
||||
plagiarism_score_at_generation: 0.0
|
||||
- id: MC-AI-DATA-QKB-05-korrektheit
|
||||
canonical_name: Korrektheit
|
||||
description: Das KI-Modell muss ausschließlich auf Datensätzen trainiert werden,
|
||||
die inhaltlich frei von Fehlern sind und den tatsächlichen Gegebenheiten oder
|
||||
definierten Referenzstandards exakt entsprechen. Es ist sicherzustellen, dass
|
||||
jede annotierte Information den als wahr geltenden Zustand im Anwendungskontext
|
||||
fehlerfrei abbildet. Die Validierung der Trainingsdaten ist vor Beginn des Lernprozesses
|
||||
durchzuführen, um sicherzustellen, dass keine inkorrekten Werte die Modellleistung
|
||||
beeinträchtigen.
|
||||
kind: criterion
|
||||
regulation_anchor: EU AI Act Art. 10 (Datenqualität für Hochrisiko-KI)
|
||||
related_quaidal_ids:
|
||||
- QB-09
|
||||
- QB-10
|
||||
- QB-12
|
||||
- QB-14
|
||||
external_refs:
|
||||
- framework: ISO/IEC 25012
|
||||
citation: null
|
||||
- framework: BSI AIC4
|
||||
citation: null
|
||||
- framework: AI Act
|
||||
citation: Artikel 10
|
||||
source:
|
||||
framework: BSI QUAIDAL
|
||||
section: QKB-05
|
||||
title_original_de: QKB-05 Korrektheit
|
||||
url: https://github.com/BSI-Bund/QUAIDAL/blob/main/0000_Markdown/0001_Criteria,Measurements,Metrics/0000_Qualitätskriterien/QKB-05_Correctness.md
|
||||
commit_sha: c39b75369841b359c6bf56d6588e3768c722842f
|
||||
license_note: § 5 UrhG anwendbar; share:true im Frontmatter; Clean-Room-Ableitung.
|
||||
plagiarism_score_at_generation: 0.0
|
||||
- id: MC-AI-DATA-QKB-06-einheitlichkeit
|
||||
canonical_name: Einheitlichkeit
|
||||
description: Die Konsistenz der KI-Trainingsdaten ist durch die strikte Einhaltung
|
||||
definierter Syntaxregeln und Datenstrukturen sicherzustellen. Jedes Datenelement
|
||||
muss vor der Verarbeitung gemäß festgelegten Standards formatiert werden, um strukturelle
|
||||
Abweichungen auszuschließen. Eine Prüfung der formalen Einheitlichkeit ist unabhängig
|
||||
von der inhaltlichen Richtigkeit der Werte durchzuführen.
|
||||
kind: criterion
|
||||
regulation_anchor: EU AI Act Art. 10 (Datenqualität für Hochrisiko-KI)
|
||||
related_quaidal_ids:
|
||||
- QB-02
|
||||
- QB-08
|
||||
- QB-10
|
||||
- QB-12
|
||||
- QB-14
|
||||
external_refs:
|
||||
- framework: ISO/IEC 25012
|
||||
citation: null
|
||||
source:
|
||||
framework: BSI QUAIDAL
|
||||
section: QKB-06
|
||||
title_original_de: QKB-06 Einheitlichkeit
|
||||
url: https://github.com/BSI-Bund/QUAIDAL/blob/main/0000_Markdown/0001_Criteria,Measurements,Metrics/0000_Qualitätskriterien/QKB-06_Uniformity.md
|
||||
commit_sha: c39b75369841b359c6bf56d6588e3768c722842f
|
||||
license_note: § 5 UrhG anwendbar; share:true im Frontmatter; Clean-Room-Ableitung.
|
||||
plagiarism_score_at_generation: 0.0
|
||||
- id: MC-AI-DATA-QKB-07-gueltigkeit
|
||||
canonical_name: Gültigkeit
|
||||
description: Das System muss sicherstellen, dass die für das KI-Training verwendeten
|
||||
Daten inhaltlich exakt das intendierte Zielkonstrukt abbilden und nicht nur oberflächliche
|
||||
Korrelationen erfassen. Es ist zu prüfen, ob die erfassten Merkmale den theoretischen
|
||||
Anforderungen an den Messgegenstand entsprechen, um eine valide Grundlage für
|
||||
Ableitungen zu gewährleisten. Eine Abweichung zwischen dem gemessenen Inhalt und
|
||||
dem definierten Zielkonzept ist als Fehlerzustand zu klassifizieren und muss ausgeschlossen
|
||||
werden.
|
||||
kind: criterion
|
||||
regulation_anchor: EU AI Act Art. 10 (Datenqualität für Hochrisiko-KI)
|
||||
related_quaidal_ids:
|
||||
- QB-02
|
||||
- QB-05
|
||||
- QB-09
|
||||
- QB-10
|
||||
- QB-14
|
||||
external_refs:
|
||||
- framework: ISO/IEC 25012
|
||||
citation: null
|
||||
source:
|
||||
framework: BSI QUAIDAL
|
||||
section: QKB-07
|
||||
title_original_de: QKB-07 Gültigkeit
|
||||
url: https://github.com/BSI-Bund/QUAIDAL/blob/main/0000_Markdown/0001_Criteria,Measurements,Metrics/0000_Qualitätskriterien/QKB-07_Validity.md
|
||||
commit_sha: c39b75369841b359c6bf56d6588e3768c722842f
|
||||
license_note: § 5 UrhG anwendbar; share:true im Frontmatter; Clean-Room-Ableitung.
|
||||
plagiarism_score_at_generation: 0.0
|
||||
- id: MC-AI-DATA-QKB-08-eindeutigkeit
|
||||
canonical_name: Eindeutigkeit
|
||||
description: Jeder Datensatz im Trainingskorpus muss eine eindeutige Identität besitzen,
|
||||
um die Entstehung redundanter Instanzen auszuschließen. Es ist sicherzustellen,
|
||||
dass keine doppelten oder mehrdeutigen Einträge vorliegen, da diese die Modellgeneralisierung
|
||||
beeinträchtigen und zu Overfitting führen können. Die Validierung muss nachweisen,
|
||||
dass jede Dateneinheit eindeutig identifizierbar ist und logisch von anderen unterscheidbar
|
||||
bleibt.
|
||||
kind: criterion
|
||||
regulation_anchor: EU AI Act Art. 10 (Datenqualität für Hochrisiko-KI)
|
||||
related_quaidal_ids:
|
||||
- QB-05
|
||||
- QB-10
|
||||
- QB-13
|
||||
external_refs:
|
||||
- framework: ISO/IEC 25012
|
||||
citation: null
|
||||
source:
|
||||
framework: BSI QUAIDAL
|
||||
section: QKB-08
|
||||
title_original_de: QKB-08 Eindeutigkeit
|
||||
url: https://github.com/BSI-Bund/QUAIDAL/blob/main/0000_Markdown/0001_Criteria,Measurements,Metrics/0000_Qualitätskriterien/QKB-08_Uniqueness.md
|
||||
commit_sha: c39b75369841b359c6bf56d6588e3768c722842f
|
||||
license_note: § 5 UrhG anwendbar; share:true im Frontmatter; Clean-Room-Ableitung.
|
||||
plagiarism_score_at_generation: 0.0
|
||||
- id: MC-AI-DATA-QKB-09-sichere-quellen
|
||||
canonical_name: Sichere Quellen
|
||||
description: Für KI-Trainingsdaten muss eine lückenlose Provenienz-Dokumentation
|
||||
etabliert werden, die jeden Verarbeitungsschritt von der Erfassung bis zur finalen
|
||||
Nutzung nachvollziehbar macht. Es ist sicherzustellen, dass alle Transformationen
|
||||
und Herkunftsinformationen vollständig erfasst sind, um die Datenintegrität und
|
||||
-qualität kontinuierlich verifizieren zu können. Die Nachprüfbarkeit dieser Metadaten
|
||||
ist zwingend erforderlich, um potenzielle Qualitätsmängel oder Manipulationen
|
||||
in den Trainingsbeständen frühzeitig zu identifizieren.
|
||||
kind: criterion
|
||||
regulation_anchor: EU AI Act Art. 10 (Datenqualität für Hochrisiko-KI)
|
||||
related_quaidal_ids:
|
||||
- QB-09
|
||||
- QB-11
|
||||
external_refs:
|
||||
- framework: ISO/IEC 25012
|
||||
citation: null
|
||||
- framework: BSI AIC4
|
||||
citation: null
|
||||
source:
|
||||
framework: BSI QUAIDAL
|
||||
section: QKB-09
|
||||
title_original_de: QKB-09 Sichere Quellen
|
||||
url: https://github.com/BSI-Bund/QUAIDAL/blob/main/0000_Markdown/0001_Criteria,Measurements,Metrics/0000_Qualitätskriterien/QKB-09_SecureSource.md
|
||||
commit_sha: c39b75369841b359c6bf56d6588e3768c722842f
|
||||
license_note: § 5 UrhG anwendbar; share:true im Frontmatter; Clean-Room-Ableitung.
|
||||
plagiarism_score_at_generation: 0.0
|
||||
- id: MC-AI-DATA-QKB-10-daten-mit-personenbezug
|
||||
canonical_name: Daten mit Personenbezug
|
||||
description: Das System muss vor der Nutzung von Trainingsdaten eine automatisierte
|
||||
Prüfung durchführen, um personenbezogene Informationen zu identifizieren. Ist
|
||||
derartige Datenbestandteil der Eingabedaten, ist deren vollständige und nachweisbare
|
||||
Entfernung sicherzustellen, bevor ein Modelltraining initiiert wird. Die Integrität
|
||||
der verbleibenden Datensätze ist durch technische Maßnahmen gegen unbeabsichtigte
|
||||
Wiederverwendung zu gewährleisten.
|
||||
kind: criterion
|
||||
regulation_anchor: EU AI Act Art. 10 (Datenqualität für Hochrisiko-KI)
|
||||
related_quaidal_ids:
|
||||
- QB-09
|
||||
- QB-10
|
||||
- QB-11
|
||||
- QB-14
|
||||
external_refs:
|
||||
- framework: EU GDPR
|
||||
citation: null
|
||||
source:
|
||||
framework: BSI QUAIDAL
|
||||
section: QKB-10
|
||||
title_original_de: QKB-10 Daten mit Personenbezug
|
||||
url: https://github.com/BSI-Bund/QUAIDAL/blob/main/0000_Markdown/0001_Criteria,Measurements,Metrics/0000_Qualitätskriterien/QKB-10_PersonalDataCheck.md
|
||||
commit_sha: c39b75369841b359c6bf56d6588e3768c722842f
|
||||
license_note: § 5 UrhG anwendbar; share:true im Frontmatter; Clean-Room-Ableitung.
|
||||
plagiarism_score_at_generation: 0.0
|
||||
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,753 @@
|
||||
source: Derived from BSI QUAIDAL (Clean-Room)
|
||||
source_url: https://github.com/BSI-Bund/QUAIDAL
|
||||
commit_sha: c39b75369841b359c6bf56d6588e3768c722842f
|
||||
plagiarism_limit_4gram: 0.2
|
||||
generated_by_model: qwen3.5:35b-a3b
|
||||
controls:
|
||||
- id: MIT-AI-DATA-MA-01-datentyp-validierung
|
||||
canonical_name: Datentyp Validierung
|
||||
description: Es ist sicherzustellen, dass alle Eingabedaten und Trainingsdatensätze
|
||||
vor der Verarbeitung auf Konformität mit den definierten Schemata und Datentypen
|
||||
des Modells geprüft werden. Abweichungen von den erwarteten Formaten sind automatisch
|
||||
zu identifizieren und müssen entweder bereinigt oder ausgeschlossen werden, um
|
||||
Inferenzfehler zu verhindern. Diese Validierung ist als automatisierter Schritt
|
||||
in den Datenpipelines zu implementieren, um die Integrität der KI-Systeme zu gewährleisten.
|
||||
kind: measure
|
||||
regulation_anchor: EU AI Act Art. 10 (Datenqualität für Hochrisiko-KI)
|
||||
related_quaidal_ids:
|
||||
- QM-32
|
||||
- QM-34
|
||||
external_refs: []
|
||||
source:
|
||||
framework: BSI QUAIDAL
|
||||
section: MA-01
|
||||
title_original_de: MA-01 Datentyp Validierung
|
||||
url: https://github.com/BSI-Bund/QUAIDAL/blob/main/0000_Markdown/0001_Criteria,Measurements,Metrics/0002_Maßnahmen/MA-01_Datatype%20Validation.md
|
||||
commit_sha: c39b75369841b359c6bf56d6588e3768c722842f
|
||||
license_note: § 5 UrhG anwendbar; share:true im Frontmatter; Clean-Room-Ableitung.
|
||||
plagiarism_score_at_generation: 0.0
|
||||
- id: MIT-AI-DATA-MA-02-format-pruefung
|
||||
canonical_name: Format Prüfung
|
||||
description: Die Eingabedaten für KI-Trainingszwecke sind vor der Verarbeitung auf
|
||||
strukturelle Korrektheit zu validieren, wobei Datentypen wie Zeitstempel oder
|
||||
Textfelder exakt den definierten Schemata entsprechen müssen. Durch die erzwingung
|
||||
einer einheitlichen Formatierung wird verhindert, dass regionale Abweichungen
|
||||
oder inkonsistente Darstellungen zu Fehlinterpretationen im Modell führen. Die
|
||||
Konformität ist automatisiert zu prüfen, um sicherzustellen, dass keine nicht
|
||||
konformen Datensätze in den Lernprozess eingehen.
|
||||
kind: measure
|
||||
regulation_anchor: EU AI Act Art. 10 (Datenqualität für Hochrisiko-KI)
|
||||
related_quaidal_ids:
|
||||
- QM-32
|
||||
- QM-34
|
||||
- QM-43
|
||||
external_refs: []
|
||||
source:
|
||||
framework: BSI QUAIDAL
|
||||
section: MA-02
|
||||
title_original_de: MA-02 Format Prüfung
|
||||
url: https://github.com/BSI-Bund/QUAIDAL/blob/main/0000_Markdown/0001_Criteria,Measurements,Metrics/0002_Maßnahmen/MA-02_Format%20Check.md
|
||||
commit_sha: c39b75369841b359c6bf56d6588e3768c722842f
|
||||
license_note: § 5 UrhG anwendbar; share:true im Frontmatter; Clean-Room-Ableitung.
|
||||
plagiarism_score_at_generation: 0.0
|
||||
- id: MIT-AI-DATA-MA-03-bereichspruefung
|
||||
canonical_name: Bereichsprüfung
|
||||
description: Das System muss vor dem KI-Training eine automatische Validierung aller
|
||||
Eingangsmerkmale durchführen, um Werte außerhalb definierter physikalischer oder
|
||||
logischer Grenzen zu identifizieren. Dabei sind insbesondere inkonsistente Datentypen,
|
||||
fehlerhafte Maßeinheiten und statistisch unplausible Ausreißer zu detektieren
|
||||
und zu isolieren. Die Integrität des Trainingsdatensatzes ist erst dann gewährleistet,
|
||||
wenn alle nicht konformen Einträge ausgeschlossen oder korrigiert wurden, bevor
|
||||
der Lernprozess initiiert wird.
|
||||
kind: measure
|
||||
regulation_anchor: EU AI Act Art. 10 (Datenqualität für Hochrisiko-KI)
|
||||
related_quaidal_ids:
|
||||
- QM-51
|
||||
- QM-52
|
||||
external_refs: []
|
||||
source:
|
||||
framework: BSI QUAIDAL
|
||||
section: MA-03
|
||||
title_original_de: MA-03 Bereichsprüfung
|
||||
url: https://github.com/BSI-Bund/QUAIDAL/blob/main/0000_Markdown/0001_Criteria,Measurements,Metrics/0002_Maßnahmen/MA-03_Range%20Check.md
|
||||
commit_sha: c39b75369841b359c6bf56d6588e3768c722842f
|
||||
license_note: § 5 UrhG anwendbar; share:true im Frontmatter; Clean-Room-Ableitung.
|
||||
plagiarism_score_at_generation: 0.0
|
||||
- id: MIT-AI-DATA-MA-04-over-undersampling
|
||||
canonical_name: Over-Undersampling
|
||||
description: Das Daten-Set für das KI-Training ist auf ein ausgewogenes Klassenverhältnis
|
||||
zu prüfen, wobei eine künstliche Aufstockung seltener Kategorien durch synthetische
|
||||
Generierung oder Duplizierung zulässig ist. Alternativ ist eine Reduktion der
|
||||
Datenpunkte der Mehrheitsklasse nach definierten Kriterien durchzuführen, um eine
|
||||
Verzerrung des Modells zu vermeiden. Die angewandte Methode zur Erreichung dieses
|
||||
Gleichgewichts ist dokumentiert und muss reproduzierbar sein.
|
||||
kind: measure
|
||||
regulation_anchor: EU AI Act Art. 10 (Datenqualität für Hochrisiko-KI)
|
||||
related_quaidal_ids:
|
||||
- QM-34
|
||||
- QM-38
|
||||
- QM-57
|
||||
external_refs: []
|
||||
source:
|
||||
framework: BSI QUAIDAL
|
||||
section: MA-04
|
||||
title_original_de: MA-04 Over-Undersampling
|
||||
url: https://github.com/BSI-Bund/QUAIDAL/blob/main/0000_Markdown/0001_Criteria,Measurements,Metrics/0002_Maßnahmen/MA-04_Over-Undersampling.md
|
||||
commit_sha: c39b75369841b359c6bf56d6588e3768c722842f
|
||||
license_note: § 5 UrhG anwendbar; share:true im Frontmatter; Clean-Room-Ableitung.
|
||||
plagiarism_score_at_generation: 0.0
|
||||
- id: MIT-AI-DATA-MA-05-automatisierte-aufgaben
|
||||
canonical_name: Automatisierte Aufgaben
|
||||
description: Wiederkehrende Prozesse der Datenvorverarbeitung und Qualitätsprüfung
|
||||
im KI-Lebenszyklus sind durch automatisierte Mechanismen zu implementieren. Die
|
||||
Ausführung dieser Aufgaben muss so konfiguriert sein, dass eine konsistente Ergebnisqualität
|
||||
über alle Durchläufe hinweg sichergestellt wird. Es ist zu prüfen, dass die eingesetzten
|
||||
Automatisierungswerkzeuge spezifische Validierungsregeln für Trainingsdaten zuverlässig
|
||||
anwenden.
|
||||
kind: measure
|
||||
regulation_anchor: EU AI Act Art. 10 (Datenqualität für Hochrisiko-KI)
|
||||
related_quaidal_ids:
|
||||
- MA-02
|
||||
- MA-03
|
||||
- QM-10
|
||||
- QM-34
|
||||
- QM-64
|
||||
external_refs:
|
||||
- framework: AI Act
|
||||
citation: null
|
||||
source:
|
||||
framework: BSI QUAIDAL
|
||||
section: MA-05
|
||||
title_original_de: MA-05 Automatisierte Aufgaben
|
||||
url: https://github.com/BSI-Bund/QUAIDAL/blob/main/0000_Markdown/0001_Criteria,Measurements,Metrics/0002_Maßnahmen/MA-05_Automated%20Tasks.md
|
||||
commit_sha: c39b75369841b359c6bf56d6588e3768c722842f
|
||||
license_note: § 5 UrhG anwendbar; share:true im Frontmatter; Clean-Room-Ableitung.
|
||||
plagiarism_score_at_generation: 0.0
|
||||
- id: MIT-AI-DATA-MA-06-experten-auswertung
|
||||
canonical_name: Experten Auswertung
|
||||
description: Für die Validierung von KI-Trainingsdaten ist eine manuelle Prüfung
|
||||
durch qualifizierte Fachexperten zwingend erforderlich. Diese Experten müssen
|
||||
die inhaltliche Gültigkeit, Relevanz und Korrektheit der Datensätze auf Basis
|
||||
domänenspezifischen Wissens systematisch evaluieren. Das Ergebnis dieser Begutachtung
|
||||
dient dazu, methodische Fehler oder qualitative Mängel frühzeitig zu identifizieren
|
||||
und konkrete Maßnahmen zur Datenbereinigung abzuleiten.
|
||||
kind: measure
|
||||
regulation_anchor: EU AI Act Art. 10 (Datenqualität für Hochrisiko-KI)
|
||||
related_quaidal_ids:
|
||||
- QM-16
|
||||
- QM-30
|
||||
- QM-43
|
||||
- QM-45
|
||||
- QM-59
|
||||
- QM-70
|
||||
external_refs:
|
||||
- framework: ISO/IEC 25012
|
||||
citation: null
|
||||
- framework: ISO/IEC 25024
|
||||
citation: null
|
||||
source:
|
||||
framework: BSI QUAIDAL
|
||||
section: MA-06
|
||||
title_original_de: MA-06 Experten Auswertung
|
||||
url: https://github.com/BSI-Bund/QUAIDAL/blob/main/0000_Markdown/0001_Criteria,Measurements,Metrics/0002_Maßnahmen/MA-06_Expert%20Evaluation.md
|
||||
commit_sha: c39b75369841b359c6bf56d6588e3768c722842f
|
||||
license_note: § 5 UrhG anwendbar; share:true im Frontmatter; Clean-Room-Ableitung.
|
||||
plagiarism_score_at_generation: 0.0204
|
||||
- id: MIT-AI-DATA-MA-07-massenbeteiligung
|
||||
canonical_name: Massenbeteiligung
|
||||
description: Das System muss Mechanismen implementieren, um die Qualität von Trainingsdaten
|
||||
durch dezentrale Validierung durch eine heterogene Gruppe externer Prüfer sicherzustellen.
|
||||
Es ist zwingend erforderlich, dass die Ergebnisse dieser kollektiven Überprüfung
|
||||
mit internen Qualitätsstandards abgeglichen werden, um systematische Fehler in
|
||||
den annotierten Datensätzen zu identifizieren. Die Integrität der KI-Modelle ist
|
||||
nur gewährleistet, wenn diese skalierbare Prüfprozedur für kritische Datenmengen
|
||||
routinemäßig angewendet wird.
|
||||
kind: measure
|
||||
regulation_anchor: EU AI Act Art. 10 (Datenqualität für Hochrisiko-KI)
|
||||
related_quaidal_ids:
|
||||
- MA-06
|
||||
- QM-03
|
||||
- QM-16
|
||||
- QM-43
|
||||
external_refs: []
|
||||
source:
|
||||
framework: BSI QUAIDAL
|
||||
section: MA-07
|
||||
title_original_de: MA-07 Massenbeteiligung
|
||||
url: https://github.com/BSI-Bund/QUAIDAL/blob/main/0000_Markdown/0001_Criteria,Measurements,Metrics/0002_Maßnahmen/MA-07_Crowdsourcing.md
|
||||
commit_sha: c39b75369841b359c6bf56d6588e3768c722842f
|
||||
license_note: § 5 UrhG anwendbar; share:true im Frontmatter; Clean-Room-Ableitung.
|
||||
plagiarism_score_at_generation: 0.0
|
||||
- id: MIT-AI-DATA-MA-08-verteilungsanalyse
|
||||
canonical_name: Verteilungsanalyse
|
||||
description: Es ist sicherzustellen, dass die Verteilung der Trainingsdaten über
|
||||
alle relevanten Klassen und Merkmalsbereiche systematisch auf statistische Verzerrungen
|
||||
und Anomalien geprüft wird. Diese Analyse muss nachweisen, dass das Modell auf
|
||||
einer repräsentativen und ausgewogenen Datenbasis trainiert wurde, um die Generalisierungsfähigkeit
|
||||
der Vorhersagen zu gewährleisten. Die Ergebnisse der Verteilungsprüfung sind vor
|
||||
Beginn des Trainings zu dokumentieren und bei signifikanten Abweichungen sind
|
||||
Korrekturmaßnahmen einzuleiten.
|
||||
kind: measure
|
||||
regulation_anchor: EU AI Act Art. 10 (Datenqualität für Hochrisiko-KI)
|
||||
related_quaidal_ids:
|
||||
- MA-06
|
||||
- QM-10
|
||||
- QM-11
|
||||
- QM-51
|
||||
external_refs: []
|
||||
source:
|
||||
framework: BSI QUAIDAL
|
||||
section: MA-08
|
||||
title_original_de: MA-08 Verteilungsanalyse
|
||||
url: https://github.com/BSI-Bund/QUAIDAL/blob/main/0000_Markdown/0001_Criteria,Measurements,Metrics/0002_Maßnahmen/MA-08_DistributionAnalysis.md
|
||||
commit_sha: c39b75369841b359c6bf56d6588e3768c722842f
|
||||
license_note: § 5 UrhG anwendbar; share:true im Frontmatter; Clean-Room-Ableitung.
|
||||
plagiarism_score_at_generation: 0.0339
|
||||
- id: MIT-AI-DATA-MA-09-vergleichgrundgesamtheit
|
||||
canonical_name: VergleichGrundgesamtheit
|
||||
description: Das System muss eine repräsentative Referenzstichprobe aus der Zielverteilung
|
||||
bereitstellen, um die Validität von KI-Trainingsdaten zu verifizieren. Es ist
|
||||
sicherzustellen, dass diese Referenzdaten als Goldstandard dienen, um Abweichungen
|
||||
zwischen dem Trainingsset und der tatsächlichen Grundgesamtheit zu quantifizieren.
|
||||
Die Übereinstimmung ist durch einen automatisierten Abgleich mit den vorab definierten
|
||||
Verteilungsparametern zu prüfen.
|
||||
kind: measure
|
||||
regulation_anchor: EU AI Act Art. 10 (Datenqualität für Hochrisiko-KI)
|
||||
related_quaidal_ids:
|
||||
- MA-9
|
||||
- QM-51
|
||||
- QM-52
|
||||
external_refs: []
|
||||
source:
|
||||
framework: BSI QUAIDAL
|
||||
section: MA-09
|
||||
title_original_de: MA-09 VergleichGrundgesamtheit
|
||||
url: https://github.com/BSI-Bund/QUAIDAL/blob/main/0000_Markdown/0001_Criteria,Measurements,Metrics/0002_Maßnahmen/MA-09_CompareGroundtruth.md
|
||||
commit_sha: c39b75369841b359c6bf56d6588e3768c722842f
|
||||
license_note: § 5 UrhG anwendbar; share:true im Frontmatter; Clean-Room-Ableitung.
|
||||
plagiarism_score_at_generation: 0.0
|
||||
- id: MIT-AI-DATA-MA-10-gewichtung-der-daten
|
||||
canonical_name: Gewichtung der Daten
|
||||
description: Für KI-Trainingsdatensätze ist eine manuelle Gewichtung der einzelnen
|
||||
Merkmale zwingend erforderlich, um systematische Verzerrungen zu minimieren. Diese
|
||||
Maßnahme dient der Sicherstellung einer ausgewogenen Datenrepräsentation und verbessert
|
||||
die Generalisierungsfähigkeit des Modells auf spezifische Anwendungsfälle. Die
|
||||
Zuordnung der Gewichtungsfaktoren ist vor dem Training durchzuführen und muss
|
||||
dokumentiert werden, um die Nachvollziehbarkeit der Datenqualität zu gewährleisten.
|
||||
kind: measure
|
||||
regulation_anchor: EU AI Act Art. 10 (Datenqualität für Hochrisiko-KI)
|
||||
related_quaidal_ids:
|
||||
- QM-10
|
||||
- QM-18
|
||||
- QM-28
|
||||
- QM-29
|
||||
- QM-37
|
||||
- QM-38
|
||||
- QM-39
|
||||
external_refs: []
|
||||
source:
|
||||
framework: BSI QUAIDAL
|
||||
section: MA-10
|
||||
title_original_de: MA-10 Gewichtung der Daten
|
||||
url: https://github.com/BSI-Bund/QUAIDAL/blob/main/0000_Markdown/0001_Criteria,Measurements,Metrics/0002_Maßnahmen/MA-10_ManualWeights.md
|
||||
commit_sha: c39b75369841b359c6bf56d6588e3768c722842f
|
||||
license_note: § 5 UrhG anwendbar; share:true im Frontmatter; Clean-Room-Ableitung.
|
||||
plagiarism_score_at_generation: 0.0
|
||||
- id: MIT-AI-DATA-MA-11-stichprobengroesse
|
||||
canonical_name: Stichprobengröße
|
||||
description: Die Menge der für das Training verwendeten Daten ist so zu dimensionieren,
|
||||
dass statistisch signifikante Ergebnisse bei definiertem Konfidenzniveau und akzeptabler
|
||||
Fehlervarianz gewährleistet sind. Die Datengröße muss iterativ angepasst werden,
|
||||
wobei sowohl die Gesamtgröße der zugrundeliegenden Population als auch die spezifische
|
||||
Art der Datenerweiterung systematisch zu berücksichtigen sind. Eine Validierung
|
||||
der Datenqualität ist zwingend erforderlich, um Verzerrungen durch unterschiedliche
|
||||
Skalierungsmethoden auszuschließen.
|
||||
kind: measure
|
||||
regulation_anchor: EU AI Act Art. 10 (Datenqualität für Hochrisiko-KI)
|
||||
related_quaidal_ids:
|
||||
- QM-08
|
||||
- QM-09
|
||||
- QM-39
|
||||
- QM-41
|
||||
external_refs: []
|
||||
source:
|
||||
framework: BSI QUAIDAL
|
||||
section: MA-11
|
||||
title_original_de: MA-11 Stichprobengröße
|
||||
url: https://github.com/BSI-Bund/QUAIDAL/blob/main/0000_Markdown/0001_Criteria,Measurements,Metrics/0002_Maßnahmen/MA-11_Trainingsdataset%20Size.md
|
||||
commit_sha: c39b75369841b359c6bf56d6588e3768c722842f
|
||||
license_note: § 5 UrhG anwendbar; share:true im Frontmatter; Clean-Room-Ableitung.
|
||||
plagiarism_score_at_generation: 0.0
|
||||
- id: MIT-AI-DATA-MA-12-abdeckung-relevanter-merkmale
|
||||
canonical_name: Abdeckung relevanter Merkmale
|
||||
description: Das Trainingsdatenset muss vollständig alle für die spezifische Problemstellung
|
||||
essenziellen Eingangsvariablen enthalten, um eine lückenlose Merkmalsabdeckung
|
||||
zu gewährleisten. Es ist sicherzustellen, dass keine kritischen Einflussgrößen
|
||||
fehlen, da sonst das Modell keine verlässlichen Korrelationen erlernen kann. Die
|
||||
Vollständigkeit des Merkmalsraums ist vor Beginn des Trainingsprozesses durch
|
||||
eine formale Prüfung zu verifizieren.
|
||||
kind: measure
|
||||
regulation_anchor: EU AI Act Art. 10 (Datenqualität für Hochrisiko-KI)
|
||||
related_quaidal_ids:
|
||||
- MA-06
|
||||
- MA-14
|
||||
- QM-10
|
||||
- QM-11
|
||||
- QM-13
|
||||
- QM-25
|
||||
- QM-26
|
||||
- QM-27
|
||||
- QM-28
|
||||
- QM-29
|
||||
external_refs: []
|
||||
source:
|
||||
framework: BSI QUAIDAL
|
||||
section: MA-12
|
||||
title_original_de: MA-12 Abdeckung relevanter Merkmale
|
||||
url: https://github.com/BSI-Bund/QUAIDAL/blob/main/0000_Markdown/0001_Criteria,Measurements,Metrics/0002_Maßnahmen/MA-12_RelevantFeatureCoverage.md
|
||||
commit_sha: c39b75369841b359c6bf56d6588e3768c722842f
|
||||
license_note: § 5 UrhG anwendbar; share:true im Frontmatter; Clean-Room-Ableitung.
|
||||
plagiarism_score_at_generation: 0.0
|
||||
- id: MIT-AI-DATA-MA-13-vollstaendige-information-in-datensaetze
|
||||
canonical_name: Vollständige Information in Datensätzen
|
||||
description: Für die Validierung von KI-Trainingsdaten ist sicherzustellen, dass
|
||||
alle für die Analyse erforderlichen Attribute vollständig vorliegen und keine
|
||||
unbeabsichtigten Lücken existieren. Bei festgestellten Datenfehlern ist zwingend
|
||||
die Ursache zu ermitteln, um das passende Imputationsverfahren basierend auf dem
|
||||
spezifischen Fehlerschema auszuwählen. Eine unzureichende Datenbasis darf nicht
|
||||
zur Modellierung genutzt werden, solange die Integrität der relevanten Information
|
||||
nicht durch geeignete Maßnahmen wiederhergestellt wurde.
|
||||
kind: measure
|
||||
regulation_anchor: EU AI Act Art. 10 (Datenqualität für Hochrisiko-KI)
|
||||
related_quaidal_ids:
|
||||
- QM-12
|
||||
- QM-40
|
||||
- QM-53
|
||||
external_refs: []
|
||||
source:
|
||||
framework: BSI QUAIDAL
|
||||
section: MA-13
|
||||
title_original_de: MA-13 Vollständige Information in Datensätzen
|
||||
url: https://github.com/BSI-Bund/QUAIDAL/blob/main/0000_Markdown/0001_Criteria,Measurements,Metrics/0002_Maßnahmen/MA-13_CompleteInformation.md
|
||||
commit_sha: c39b75369841b359c6bf56d6588e3768c722842f
|
||||
license_note: § 5 UrhG anwendbar; share:true im Frontmatter; Clean-Room-Ableitung.
|
||||
plagiarism_score_at_generation: 0.0
|
||||
- id: MIT-AI-DATA-MA-14-eda-explorative-daten-analyse
|
||||
canonical_name: EDA-Explorative Daten Analyse
|
||||
description: Vor Beginn des Modelltrainings ist eine explorative Datenanalyse durchzuführen,
|
||||
um Datenverteilungen, Korrelationen sowie Ausreißer und strukturelle Anomalien
|
||||
ohne vorab definierte Hypothesen zu identifizieren. Die gewonnenen Erkenntnisse
|
||||
sind systematisch zu dokumentieren, um die Qualität der Trainingsdaten zu validieren
|
||||
und fundierte Entscheidungen über notwendige Bereinigungs- oder Erweiterungsschritte
|
||||
abzuleiten. Auf Basis dieser Analyse ist der Datensatz so anzupassen, dass er
|
||||
die für die Zielfunktion erforderliche Repräsentativität und Integrität gewährleistet.
|
||||
kind: measure
|
||||
regulation_anchor: EU AI Act Art. 10 (Datenqualität für Hochrisiko-KI)
|
||||
related_quaidal_ids:
|
||||
- QM-10
|
||||
- QM-12
|
||||
- QM-24
|
||||
- QM-25
|
||||
- QM-26
|
||||
- QM-27
|
||||
- QM-28
|
||||
- QM-29
|
||||
- QM-36
|
||||
- QM-42
|
||||
- QM-54
|
||||
- QM-57
|
||||
- QM-61
|
||||
external_refs: []
|
||||
source:
|
||||
framework: BSI QUAIDAL
|
||||
section: MA-14
|
||||
title_original_de: MA-14 EDA-Explorative Daten Analyse
|
||||
url: https://github.com/BSI-Bund/QUAIDAL/blob/main/0000_Markdown/0001_Criteria,Measurements,Metrics/0002_Maßnahmen/MA-14_EDA-ExplorativeDataAnalysis.md
|
||||
commit_sha: c39b75369841b359c6bf56d6588e3768c722842f
|
||||
license_note: § 5 UrhG anwendbar; share:true im Frontmatter; Clean-Room-Ableitung.
|
||||
plagiarism_score_at_generation: 0.0
|
||||
- id: MIT-AI-DATA-MA-15-empirische-evidenz
|
||||
canonical_name: Empirische Evidenz
|
||||
description: Es ist sicherzustellen, dass die Wirksamkeit von Schutzmaßnahmen gegen
|
||||
KI-gestützte Angriffe durch den systematischen Vergleich mit historischen Einsatzszenarien
|
||||
empirisch validiert wird. Dabei sind Leistungsdaten aus vergleichbaren Anwendungsfällen
|
||||
heranzuziehen, um die Angemessenheit der eingesetzten Trainingsdatensätze und
|
||||
Methoden für den spezifischen Kontext nachzuweisen. Die Analyse muss belegen,
|
||||
dass die gewählten Maßnahmen die identifizierten Risiken in der Praxis effektiv
|
||||
reduzieren und die Datenqualität den aktuellen Bedrohungsmodellen entspricht.
|
||||
kind: measure
|
||||
regulation_anchor: EU AI Act Art. 10 (Datenqualität für Hochrisiko-KI)
|
||||
related_quaidal_ids:
|
||||
- QM-16
|
||||
- QM-30
|
||||
- QM-61
|
||||
external_refs: []
|
||||
source:
|
||||
framework: BSI QUAIDAL
|
||||
section: MA-15
|
||||
title_original_de: MA-15 Empirische Evidenz
|
||||
url: https://github.com/BSI-Bund/QUAIDAL/blob/main/0000_Markdown/0001_Criteria,Measurements,Metrics/0002_Maßnahmen/MA-15_EmpiricEvidence.md
|
||||
commit_sha: c39b75369841b359c6bf56d6588e3768c722842f
|
||||
license_note: § 5 UrhG anwendbar; share:true im Frontmatter; Clean-Room-Ableitung.
|
||||
plagiarism_score_at_generation: 0.0
|
||||
- id: MIT-AI-DATA-MA-16-daten-imputation
|
||||
canonical_name: Daten Imputation
|
||||
description: Für KI-Trainingsdatensätze ist eine systematische Analyse der Ursachen
|
||||
für fehlende Werte zwingend erforderlich, bevor eine Rekonstruktion erfolgt. Das
|
||||
gewählte Verfahren zur Datenergänzung muss sich strikt an den identifizierten
|
||||
Entstehungsgründen orientieren, um die statistische Integrität des Modells zu
|
||||
wahren. Eine unkritische Imputation ohne Ursachenanalyse ist unzulässig, da sie
|
||||
das Lernverhalten des Algorithmus verfälschen kann.
|
||||
kind: measure
|
||||
regulation_anchor: EU AI Act Art. 10 (Datenqualität für Hochrisiko-KI)
|
||||
related_quaidal_ids:
|
||||
- MA-13
|
||||
- QM-10
|
||||
- QM-22
|
||||
- QM-44
|
||||
- QM-53
|
||||
external_refs: []
|
||||
source:
|
||||
framework: BSI QUAIDAL
|
||||
section: MA-16
|
||||
title_original_de: MA-16 Daten Imputation
|
||||
url: https://github.com/BSI-Bund/QUAIDAL/blob/main/0000_Markdown/0001_Criteria,Measurements,Metrics/0002_Maßnahmen/MA-16_DataImputation.md
|
||||
commit_sha: c39b75369841b359c6bf56d6588e3768c722842f
|
||||
license_note: § 5 UrhG anwendbar; share:true im Frontmatter; Clean-Room-Ableitung.
|
||||
plagiarism_score_at_generation: 0.0
|
||||
- id: MIT-AI-DATA-MA-17-metadatenverwaltung
|
||||
canonical_name: Metadatenverwaltung
|
||||
description: Für den KI-Trainingsprozess ist eine vollständige Dokumentation der
|
||||
Datenherkunft, der Qualitätsmetriken sowie der rechtlichen Klassifizierung jeder
|
||||
einzelnen Trainingsinstanz sicherzustellen. Diese strukturellen Begleitinformationen
|
||||
müssen maschinenlesbar vorliegen, um eine automatisierte Validierung der Datenintegrität
|
||||
und eine nachvollziehbare Auditierung des Datensatzes zu ermöglichen. Die Erfassung
|
||||
dieser Attribute ist zwingend erforderlich, um die Eignung der Daten für den spezifischen
|
||||
Trainingszweck zu gewährleisten und regulatorische Vorgaben einzuhalten.
|
||||
kind: measure
|
||||
regulation_anchor: EU AI Act Art. 10 (Datenqualität für Hochrisiko-KI)
|
||||
related_quaidal_ids:
|
||||
- QM-59
|
||||
external_refs: []
|
||||
source:
|
||||
framework: BSI QUAIDAL
|
||||
section: MA-17
|
||||
title_original_de: MA-17 Metadatenverwaltung
|
||||
url: https://github.com/BSI-Bund/QUAIDAL/blob/main/0000_Markdown/0001_Criteria,Measurements,Metrics/0002_Maßnahmen/MA-17_MetadataManagement.md
|
||||
commit_sha: c39b75369841b359c6bf56d6588e3768c722842f
|
||||
license_note: § 5 UrhG anwendbar; share:true im Frontmatter; Clean-Room-Ableitung.
|
||||
plagiarism_score_at_generation: 0.0
|
||||
- id: MIT-AI-DATA-MA-18-provenienztracking
|
||||
canonical_name: ProvenienzTracking
|
||||
description: Die Herkunft und der Verarbeitungsweg von KI-Trainingsdaten sind lückenlos
|
||||
zu dokumentieren, um deren Integrität und Nachvollziehbarkeit sicherzustellen.
|
||||
Für jeden Datensatz ist eine eindeutige Identifikation des Ursprungs sowie aller
|
||||
Transformationsschritte im Lebenszyklus zu führen. Diese Metadaten müssen so strukturiert
|
||||
sein, dass eine Rückverfolgung zur ursprünglichen Quelle jederzeit möglich ist,
|
||||
ohne dass Datenverluste oder Manipulationen unentdeckt bleiben.
|
||||
kind: measure
|
||||
regulation_anchor: EU AI Act Art. 10 (Datenqualität für Hochrisiko-KI)
|
||||
related_quaidal_ids:
|
||||
- QM-59
|
||||
- QM-60
|
||||
- QM-61
|
||||
- QM-65
|
||||
- QM-67
|
||||
- QM-70
|
||||
external_refs: []
|
||||
source:
|
||||
framework: BSI QUAIDAL
|
||||
section: MA-18
|
||||
title_original_de: MA-18 ProvenienzTracking
|
||||
url: https://github.com/BSI-Bund/QUAIDAL/blob/main/0000_Markdown/0001_Criteria,Measurements,Metrics/0002_Maßnahmen/MA-18_ProvenienzTracking.md
|
||||
commit_sha: c39b75369841b359c6bf56d6588e3768c722842f
|
||||
license_note: § 5 UrhG anwendbar; share:true im Frontmatter; Clean-Room-Ableitung.
|
||||
plagiarism_score_at_generation: 0.0
|
||||
- id: MIT-AI-DATA-MA-19-audit-trails
|
||||
canonical_name: Audit Trails
|
||||
description: Für die Nachvollziehbarkeit von KI-Trainingsprozessen ist ein lückenloses
|
||||
Protokollierungssystem zu implementieren, das alle Datenmanipulationen und Modellupdates
|
||||
zeitgestempelt erfasst. Jeder Zugriff auf Trainingsdatensätze sowie jede Änderung
|
||||
der Modellparameter muss mit eindeutigen Benutzeridentitäten verknüpft werden.
|
||||
Die gespeicherten Logs müssen so strukturiert sein, dass sie eine vollständige
|
||||
Rekonstruktion des Datenflusses und eine Rückführung auf frühere Datenqualitätszustände
|
||||
ermöglichen.
|
||||
kind: measure
|
||||
regulation_anchor: EU AI Act Art. 10 (Datenqualität für Hochrisiko-KI)
|
||||
related_quaidal_ids:
|
||||
- MA-22
|
||||
external_refs: []
|
||||
source:
|
||||
framework: BSI QUAIDAL
|
||||
section: MA-19
|
||||
title_original_de: MA-19 Audit Trails
|
||||
url: https://github.com/BSI-Bund/QUAIDAL/blob/main/0000_Markdown/0001_Criteria,Measurements,Metrics/0002_Maßnahmen/MA-19_AuditTrails.md
|
||||
commit_sha: c39b75369841b359c6bf56d6588e3768c722842f
|
||||
license_note: § 5 UrhG anwendbar; share:true im Frontmatter; Clean-Room-Ableitung.
|
||||
plagiarism_score_at_generation: 0.0
|
||||
- id: MIT-AI-DATA-MA-20-prozess-dokumentation
|
||||
canonical_name: Prozess Dokumentation
|
||||
description: Für die Sicherstellung der Datenqualität im KI-Trainingsprozess ist
|
||||
eine vollständige Dokumentation aller Phasen der Datenerstellung und -aufbereitung
|
||||
zwingend erforderlich. Diese Spezifikation muss verbindlich festlegen, welche
|
||||
Aktivitäten auszuführen sind, wer hierfür verantwortlich zeichnet, welche Ressourcen
|
||||
notwendig sind und welche qualitativen Ergebnisse zu erzielen sind. Insbesondere
|
||||
ist die Nachverfolgbarkeit der Datenherkunft innerhalb des Dokumentationsprozesses
|
||||
lückenlos zu gewährleisten, um die Integrität der Trainingsdaten zu validieren.
|
||||
kind: measure
|
||||
regulation_anchor: EU AI Act Art. 10 (Datenqualität für Hochrisiko-KI)
|
||||
related_quaidal_ids:
|
||||
- QM-15
|
||||
- QM-31
|
||||
- QM-62
|
||||
- QM-65
|
||||
external_refs:
|
||||
- framework: ISO/IEC 42001
|
||||
citation: null
|
||||
source:
|
||||
framework: BSI QUAIDAL
|
||||
section: MA-20
|
||||
title_original_de: MA-20 Prozess Dokumentation
|
||||
url: https://github.com/BSI-Bund/QUAIDAL/blob/main/0000_Markdown/0001_Criteria,Measurements,Metrics/0002_Maßnahmen/MA-20_ProcessDocumentation.md
|
||||
commit_sha: c39b75369841b359c6bf56d6588e3768c722842f
|
||||
license_note: § 5 UrhG anwendbar; share:true im Frontmatter; Clean-Room-Ableitung.
|
||||
plagiarism_score_at_generation: 0.0
|
||||
- id: MIT-AI-DATA-MA-21-compliance
|
||||
canonical_name: Compliance
|
||||
description: Der Einsatz von KI-Modellen erfordert eine zwingende Prüfung der Trainingsdatensätze
|
||||
auf rechtliche Konformität und ethische Integrität, bevor diese zur Modellgenerierung
|
||||
verwendet werden. Es ist sicherzustellen, dass alle verarbeiteten Informationen
|
||||
die Vorgaben der DSGVO sowie branchenspezifische Regularien vollständig erfüllen
|
||||
und keine unrechtmäßig beschafften oder personenbezogenen Daten ohne explizite
|
||||
Einwilligung enthalten. Die Validierung dieser Datenqualität muss vor jedem Trainingslauf
|
||||
durch einen automatisierten oder manuellen Compliance-Check nachgewiesen werden.
|
||||
kind: measure
|
||||
regulation_anchor: EU AI Act Art. 10 (Datenqualität für Hochrisiko-KI)
|
||||
related_quaidal_ids:
|
||||
- QM-12
|
||||
- QM-15
|
||||
external_refs:
|
||||
- framework: EU GDPR
|
||||
citation: null
|
||||
- framework: AI Act
|
||||
citation: null
|
||||
source:
|
||||
framework: BSI QUAIDAL
|
||||
section: MA-21
|
||||
title_original_de: MA-21 Compliance
|
||||
url: https://github.com/BSI-Bund/QUAIDAL/blob/main/0000_Markdown/0001_Criteria,Measurements,Metrics/0002_Maßnahmen/MA-21_Compliance.md
|
||||
commit_sha: c39b75369841b359c6bf56d6588e3768c722842f
|
||||
license_note: § 5 UrhG anwendbar; share:true im Frontmatter; Clean-Room-Ableitung.
|
||||
plagiarism_score_at_generation: 0.0
|
||||
- id: MIT-AI-DATA-MA-22-vertrauenswuerdigkeit
|
||||
canonical_name: Vertrauenswürdigkeit
|
||||
description: Die Integrität und Zuverlässigkeit der für das KI-Training verwendeten
|
||||
Datensätze ist im jeweiligen Anwendungskontext nachweislich zu verifizieren. Es
|
||||
ist sicherzustellen, dass potenzielle Manipulationen oder unbeabsichtigte Korruptionen
|
||||
des Datenflusses durch technische Prüfmechanismen ausgeschlossen werden. Bei der
|
||||
Anwendung von Korrekturverfahren zur Datenbereinigung muss die ursprüngliche Glaubwürdigkeit
|
||||
der Informationen gewahrt bleiben und darf nicht durch die Maßnahme beeinträchtigt
|
||||
werden.
|
||||
kind: measure
|
||||
regulation_anchor: EU AI Act Art. 10 (Datenqualität für Hochrisiko-KI)
|
||||
related_quaidal_ids:
|
||||
- QM-15
|
||||
- QM-43
|
||||
- QM-65
|
||||
external_refs: []
|
||||
source:
|
||||
framework: BSI QUAIDAL
|
||||
section: MA-22
|
||||
title_original_de: MA-22 Vertrauenswürdigkeit
|
||||
url: https://github.com/BSI-Bund/QUAIDAL/blob/main/0000_Markdown/0001_Criteria,Measurements,Metrics/0002_Maßnahmen/MA-22_Credibility.md
|
||||
commit_sha: c39b75369841b359c6bf56d6588e3768c722842f
|
||||
license_note: § 5 UrhG anwendbar; share:true im Frontmatter; Clean-Room-Ableitung.
|
||||
plagiarism_score_at_generation: 0.0
|
||||
- id: MIT-AI-DATA-MA-23-merkmalsskalierung
|
||||
canonical_name: Merkmalsskalierung
|
||||
description: Für KI-Trainingsdatensätze ist eine Normalisierung der Merkmalswerte
|
||||
auf einen einheitlichen Wertebereich zwingend erforderlich, um Dominanzeffekte
|
||||
durch unterschiedliche Größenordnungen zu vermeiden. Diese Maßnahme stellt sicher,
|
||||
dass Algorithmen, die auf Distanzberechnungen oder Gradientenverfahren basieren,
|
||||
nicht durch skalenbedingte Verzerrungen beeinträchtigt werden. Die Wirksamkeit
|
||||
der Skalierung ist vor dem Training systematisch zu prüfen, um die Vorhersagegenauigkeit
|
||||
des Modells zu garantieren.
|
||||
kind: measure
|
||||
regulation_anchor: EU AI Act Art. 10 (Datenqualität für Hochrisiko-KI)
|
||||
related_quaidal_ids:
|
||||
- QM-10
|
||||
- QM-56
|
||||
external_refs: []
|
||||
source:
|
||||
framework: BSI QUAIDAL
|
||||
section: MA-23
|
||||
title_original_de: MA-23 Merkmalsskalierung
|
||||
url: https://github.com/BSI-Bund/QUAIDAL/blob/main/0000_Markdown/0001_Criteria,Measurements,Metrics/0002_Maßnahmen/MA-23_FeatureScaling.md
|
||||
commit_sha: c39b75369841b359c6bf56d6588e3768c722842f
|
||||
license_note: § 5 UrhG anwendbar; share:true im Frontmatter; Clean-Room-Ableitung.
|
||||
plagiarism_score_at_generation: 0.0
|
||||
- id: MIT-AI-DATA-MA-24-merkmalserstellung
|
||||
canonical_name: Merkmalserstellung
|
||||
description: Es ist sicherzustellen, dass bei der Erstellung neuer Eingangsmerkmale
|
||||
für KI-Modelle ausschließlich validierte Transformationsverfahren angewendet werden,
|
||||
um die Datenqualität zu gewährleisten. Die Generierung neuer Features muss auf
|
||||
nachvollziehbaren Algorithmen basieren, die eine signifikante Verbesserung der
|
||||
Modellleistung gegenüber den Rohdaten nachweisen. Jede angewandte Methode zur
|
||||
Datenanreicherung oder -bereinigung ist vor dem Training auf ihre Eignung zur
|
||||
Mustererkennung und Vorhersagegenauigkeit zu prüfen.
|
||||
kind: measure
|
||||
regulation_anchor: EU AI Act Art. 10 (Datenqualität für Hochrisiko-KI)
|
||||
related_quaidal_ids:
|
||||
- QM-11
|
||||
- QM-25
|
||||
- QM-26
|
||||
- QM-27
|
||||
- QM-28
|
||||
- QM-51
|
||||
- QM-71
|
||||
external_refs: []
|
||||
source:
|
||||
framework: BSI QUAIDAL
|
||||
section: MA-24
|
||||
title_original_de: MA-24 Merkmalserstellung
|
||||
url: https://github.com/BSI-Bund/QUAIDAL/blob/main/0000_Markdown/0001_Criteria,Measurements,Metrics/0002_Maßnahmen/MA-24_FeatureCreation.md
|
||||
commit_sha: c39b75369841b359c6bf56d6588e3768c722842f
|
||||
license_note: § 5 UrhG anwendbar; share:true im Frontmatter; Clean-Room-Ableitung.
|
||||
plagiarism_score_at_generation: 0.0
|
||||
- id: MIT-AI-DATA-MA-25-differential-privacy
|
||||
canonical_name: Differential Privacy
|
||||
description: Das System muss bei der Verarbeitung von KI-Trainingsdaten differenzielle
|
||||
Privatsphäre implementieren, indem statistisch signifikante, zufällige Störgrößen
|
||||
zu den Ergebnissen hinzugefügt werden. Es ist sicherzustellen, dass die An- oder
|
||||
Abwesenheit einzelner Datensätze im Trainingsset das Ausgabeergebnis nur marginal
|
||||
beeinflusst. Durch diese Maßnahme ist zu prüfen, ob keine Rückschlüsse auf spezifische
|
||||
Personen aus den generierten Analysen gezogen werden können, während die allgemeine
|
||||
Datenqualität für das Modelltraining erhalten bleibt.
|
||||
kind: measure
|
||||
regulation_anchor: EU AI Act Art. 10 (Datenqualität für Hochrisiko-KI)
|
||||
related_quaidal_ids:
|
||||
- QM-58
|
||||
external_refs: []
|
||||
source:
|
||||
framework: BSI QUAIDAL
|
||||
section: MA-25
|
||||
title_original_de: MA-25 Differential Privacy
|
||||
url: https://github.com/BSI-Bund/QUAIDAL/blob/main/0000_Markdown/0001_Criteria,Measurements,Metrics/0002_Maßnahmen/MA-25_Differential%20Privacy.md
|
||||
commit_sha: c39b75369841b359c6bf56d6588e3768c722842f
|
||||
license_note: § 5 UrhG anwendbar; share:true im Frontmatter; Clean-Room-Ableitung.
|
||||
plagiarism_score_at_generation: 0.0625
|
||||
- id: MIT-AI-DATA-MA-26-federated-learning
|
||||
canonical_name: Federated Learning
|
||||
description: Für KI-Systeme, die auf verteilten Datenquellen basieren, ist ein Federated-Learning-Ansatz
|
||||
zwingend vorzusehen, um die Rohdaten dezentral zu belassen. Die lokalen Modelle
|
||||
müssen ausschließlich aggregierte Parameter an eine zentrale Instanz übermitteln,
|
||||
während die ursprünglichen Trainingsdaten niemals die lokale Umgebung verlassen.
|
||||
Eine Prüfung ist sicherzustellen, dass durch diese Architektur keine sensiblen
|
||||
Informationen während des Lernprozesses zentralisiert oder übertragen werden.
|
||||
kind: measure
|
||||
regulation_anchor: EU AI Act Art. 10 (Datenqualität für Hochrisiko-KI)
|
||||
related_quaidal_ids:
|
||||
- QM-63
|
||||
external_refs: []
|
||||
source:
|
||||
framework: BSI QUAIDAL
|
||||
section: MA-26
|
||||
title_original_de: MA-26 Federated Learning
|
||||
url: https://github.com/BSI-Bund/QUAIDAL/blob/main/0000_Markdown/0001_Criteria,Measurements,Metrics/0002_Maßnahmen/MA-26_Federated%20Learning%20Approach.md
|
||||
commit_sha: c39b75369841b359c6bf56d6588e3768c722842f
|
||||
license_note: § 5 UrhG anwendbar; share:true im Frontmatter; Clean-Room-Ableitung.
|
||||
plagiarism_score_at_generation: 0.0
|
||||
- id: MIT-AI-DATA-MA-27-statistische-grundlagenthemen
|
||||
canonical_name: Statistische Grundlagenthemen
|
||||
description: Für die Sicherstellung der Datenqualität im KI-Lebenszyklus sind statistische
|
||||
Basisverfahren systematisch zu implementieren und kontinuierlich zu validieren.
|
||||
Es ist sicherzustellen, dass alle relevanten Metriken zur Verteilungsanalyse und
|
||||
Datenintegrität konsistent in die Berechnungspipelines integriert werden. Diese
|
||||
fundamentalen Analysen müssen unabhängig von spezifischen Bausteinen als übergeordnete
|
||||
Prüfkriterien für die Modellgüte dienen.
|
||||
kind: measure
|
||||
regulation_anchor: EU AI Act Art. 10 (Datenqualität für Hochrisiko-KI)
|
||||
related_quaidal_ids:
|
||||
- QM-01
|
||||
- QM-02
|
||||
- QM-03
|
||||
- QM-04
|
||||
- QM-06
|
||||
- QM-07
|
||||
- QM-09
|
||||
- QM-23
|
||||
- QM-51
|
||||
external_refs: []
|
||||
source:
|
||||
framework: BSI QUAIDAL
|
||||
section: MA-27
|
||||
title_original_de: MA-27 Statistische Grundlagenthemen
|
||||
url: https://github.com/BSI-Bund/QUAIDAL/blob/main/0000_Markdown/0001_Criteria,Measurements,Metrics/0002_Maßnahmen/MA-27_StatisticalBasis.md
|
||||
commit_sha: c39b75369841b359c6bf56d6588e3768c722842f
|
||||
license_note: § 5 UrhG anwendbar; share:true im Frontmatter; Clean-Room-Ableitung.
|
||||
plagiarism_score_at_generation: 0.0213
|
||||
- id: MIT-AI-DATA-MA-28-diversitaetsindizes
|
||||
canonical_name: Diversitätsindizes
|
||||
description: Das System muss quantitative Metriken zur Erfassung der Heterogenität
|
||||
von KI-Trainingsdaten implementieren, um die Verteilung verschiedener Kategorien
|
||||
zu messen. Es ist sicherzustellen, dass diese Kennzahlen sowohl die Anzahl vorhandener
|
||||
Klassen als auch deren Gleichverteilung abbilden. Die Validierung der Datenqualität
|
||||
erfolgt durch die Berechnung von Diversitätsindizes, die statistische Unsicherheit
|
||||
oder Kollisionswahrscheinlichkeiten quantifizieren.
|
||||
kind: measure
|
||||
regulation_anchor: EU AI Act Art. 10 (Datenqualität für Hochrisiko-KI)
|
||||
related_quaidal_ids:
|
||||
- QM-68
|
||||
external_refs: []
|
||||
source:
|
||||
framework: BSI QUAIDAL
|
||||
section: MA-28
|
||||
title_original_de: MA-28 Diversitätsindizes
|
||||
url: https://github.com/BSI-Bund/QUAIDAL/blob/main/0000_Markdown/0001_Criteria,Measurements,Metrics/0002_Maßnahmen/MA-28_Diversity-Indices.md
|
||||
commit_sha: c39b75369841b359c6bf56d6588e3768c722842f
|
||||
license_note: § 5 UrhG anwendbar; share:true im Frontmatter; Clean-Room-Ableitung.
|
||||
plagiarism_score_at_generation: 0.0
|
||||
- id: MIT-AI-DATA-MA-29-data-splitting
|
||||
canonical_name: Data-Splitting
|
||||
description: Die Aufteilung von KI-Trainingsdaten in disjunkte Teilmengen ist zwingend
|
||||
erforderlich, um eine unvoreingenommene Validierung der Modellgüte zu gewährleisten.
|
||||
Dabei müssen mindestens drei voneinander getrennte Bereiche für das Training,
|
||||
die Hyperparameter-Optimierung sowie die abschließende Leistungsbewertung definiert
|
||||
werden. Eine zufällige oder stratifizierte Trennung ist sicherzustellen, um Datenlecks
|
||||
zwischen den Phasen auszuschließen und die Generalisierungsfähigkeit des Systems
|
||||
nachweisbar zu prüfen.
|
||||
kind: measure
|
||||
regulation_anchor: EU AI Act Art. 10 (Datenqualität für Hochrisiko-KI)
|
||||
related_quaidal_ids:
|
||||
- QM-69
|
||||
external_refs: []
|
||||
source:
|
||||
framework: BSI QUAIDAL
|
||||
section: MA-29
|
||||
title_original_de: MA-29 Data-Splitting
|
||||
url: https://github.com/BSI-Bund/QUAIDAL/blob/main/0000_Markdown/0001_Criteria,Measurements,Metrics/0002_Maßnahmen/MA-29_Data%20Splitting.md
|
||||
commit_sha: c39b75369841b359c6bf56d6588e3768c722842f
|
||||
license_note: § 5 UrhG anwendbar; share:true im Frontmatter; Clean-Room-Ableitung.
|
||||
plagiarism_score_at_generation: 0.0
|
||||
- id: MIT-AI-DATA-MA-30-fairness
|
||||
canonical_name: Fairness
|
||||
description: Das System muss sicherstellen, dass KI-Trainingsdaten keine systematischen
|
||||
Verzerrungen bezüglich sensibler demografischer Merkmale aufweisen, um diskriminierende
|
||||
Vorhersagen zu vermeiden. Bei unzureichender Repräsentation von Teilgruppen sind
|
||||
präventive Aufbereitungsverfahren oder algorithmische Transformationsmethoden
|
||||
zur Bias-Korrektur zwingend anzuwenden. Die Wirksamkeit dieser Maßnahmen ist vor
|
||||
der Modellbereitstellung durch quantitative Prüfverfahren auf Gleichbehandlungsgrundsätze
|
||||
zu validieren.
|
||||
kind: measure
|
||||
regulation_anchor: EU AI Act Art. 10 (Datenqualität für Hochrisiko-KI)
|
||||
related_quaidal_ids:
|
||||
- QM-57
|
||||
external_refs: []
|
||||
source:
|
||||
framework: BSI QUAIDAL
|
||||
section: MA-30
|
||||
title_original_de: MA-30 Fairness
|
||||
url: https://github.com/BSI-Bund/QUAIDAL/blob/main/0000_Markdown/0001_Criteria,Measurements,Metrics/0002_Maßnahmen/MA-30_Fairness.md
|
||||
commit_sha: c39b75369841b359c6bf56d6588e3768c722842f
|
||||
license_note: § 5 UrhG anwendbar; share:true im Frontmatter; Clean-Room-Ableitung.
|
||||
plagiarism_score_at_generation: 0.0
|
||||
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,83 @@
|
||||
# Lizenzregeln der Control-Pipeline
|
||||
|
||||
> **Stand:** 2026-05-21 — Mapping festgezurrt nach DB-Inspektion und IACE-Audit.
|
||||
>
|
||||
> Die Pipeline klassifiziert jede Regulation (und damit jedes daraus extrahierte
|
||||
> Chunk und jeden atomic_control) in eine von **drei Lizenzregeln**. Die Regel
|
||||
> entscheidet, ob der Volltext aufbewahrt werden darf und welche Attribution im
|
||||
> Ausgabe-Renderer Pflicht ist.
|
||||
|
||||
## Die drei Regeln
|
||||
|
||||
| Regel | Bedeutung | Volltext speichern? | Attribution Pflicht? | Beispiele |
|
||||
|-------|-----------|---------------------|----------------------|-----------|
|
||||
| **1** | Wörtlich — Hoheitsrecht / Public Domain | ✓ | nein (empfohlen für Audit) | EU-Recht (EUR-Lex), Bundesrecht, Satzungsrecht (DGUV UVV), TRBS, TRGS, ASR, US Federal Code (OSHA), NIST SP, EU-Leitfäden |
|
||||
| **2** | Wörtlich mit Attribution — freie Lizenzen | ✓ | **ja** | OWASP (CC-BY-SA-4.0), OECD AI Principles (OECD_PUBLIC), ENISA-Dokumente (CC-BY-4.0), Apache-2.0 Werke |
|
||||
| **3** | Nur zitieren — proprietäre Standards | ✗ | nicht anwendbar (kein Volltext) | DIN, EN, ISO, ANSI, UL, IEC, IEEE, DGUV Regeln/Informationen/Grundsätze, Bitkom-Leitfäden, BSI-Bausteine (urheberrechtlich) |
|
||||
|
||||
**Wichtige Klarstellung:** Regel 3 = "nur Identifier/Abschnitt zitieren", **nicht** "umformulieren". Die ursprüngliche Bezeichnung "neu formulieren" war irreführend. Korrekt: Bei Regel-3-Quellen darf die Pipeline den Volltext nicht speichern; sie bewahrt nur die Quellenreferenz (regulation_id + article/paragraph), und der Output-Renderer zeigt diese Referenz im Frontend/PDF.
|
||||
|
||||
## Mapping `license_type` → `license_rule`
|
||||
|
||||
| license_type | license_rule | Erklärung |
|
||||
|---|---|---|
|
||||
| `EU_LAW`, `EU_PUBLIC` | 1 | EU-Verordnungen, Richtlinien, OJ-Veröffentlichungen, EU-Leitfäden |
|
||||
| `DE_LAW`, `DE_PUBLIC` | 1 | Bundesgesetze, TRBS, TRGS, ASR, DGUV-UVV (Satzungsrecht) |
|
||||
| `AT_LAW`, `CH_LAW`, `FR_LAW`, `IT_LAW`, `ES_LAW`, `NL_LAW`, `HU_LAW` | 1 | Andere EU-Mitgliedsstaaten-Recht |
|
||||
| `US_GOV_PUBLIC`, `NIST_PUBLIC_DOMAIN`, `OSHA_PUBLIC` | 1 | US Federal Code (17 U.S.C. §105 Public Domain) |
|
||||
| `CC-BY-4.0`, `CC-BY-SA-4.0`, `CC-BY-3.0`, `CC-BY-SA-3.0` | 2 | Creative-Commons mit Attribution-Pflicht |
|
||||
| `Apache-2.0`, `MIT` | 2 | Permissive OSS-Lizenzen, NOTICE-Pflicht |
|
||||
| `OECD_PUBLIC`, `ENISA_CC_BY_4.0` | 2 | Behörden-Publikationen mit Attribution-Auflage |
|
||||
| `DIN_COPYRIGHT`, `ISO_COPYRIGHT`, `ANSI_COPYRIGHT`, `UL_COPYRIGHT`, `IEC_COPYRIGHT` | 3 | Normungsorganisationen — nur Identifier-Zitat |
|
||||
| `DGUV_COPYRIGHT` | 3 | DGUV Regeln/Informationen/Grundsätze (nicht UVV) |
|
||||
| `BITKOM_COPYRIGHT`, `BSI_COPYRIGHT`, `VDMA_COPYRIGHT` | 3 | Verbands-/Behörden-Publikationen mit eigenständigem Urheberrecht |
|
||||
| `OWN_WORK` | 3 | BreakPilot-Eigentexte (Templates, eigene Patterns) — kein externes Lizenzrisiko, aber auch kein Public-Domain-Status |
|
||||
|
||||
**Sonderfall DGUV:** Die Klasse trennt sich nach Publikationstyp:
|
||||
- DGUV **Vorschriften / UVV** → `DE_LAW` → Regel 1
|
||||
- DGUV **Regeln, Informationen, Grundsätze** → `DGUV_COPYRIGHT` → Regel 3
|
||||
|
||||
## Auswirkung pro Pipeline-Stage
|
||||
|
||||
| Stage | Verhalten bei Regel 1 | Regel 2 | Regel 3 |
|
||||
|---|---|---|---|
|
||||
| Stage 6 ControlCompose (`pipeline_adapter.py:147`) | speichert `chunk_text` | speichert `chunk_text` | speichert `chunk_text = None` |
|
||||
| Atomic-Control-Bildung | Volltext als Quelle | Volltext + Attribution-Vermerk | nur regulation_id + article |
|
||||
| Output-Renderer (Frontend/PDF) | optionaler Quellen-Hinweis | **Pflicht-Attribution in Footer + Inline** | nur Identifier rendern |
|
||||
| Tech-File-Anhang | Quelle nennen | Quelle + Lizenz-URL | Identifier-Liste |
|
||||
|
||||
## Quellen ohne Klassifikation
|
||||
|
||||
Aktuell sind in `regulation_registry` **232 Regulationen** klassifiziert (Stand 2026-05-21). Die folgenden müssen noch ergänzt werden (Task #20 deckt den DGUV-Ingest):
|
||||
|
||||
| Quelle | Regel | Begründung |
|
||||
|---|---|---|
|
||||
| TRBS-Familie (24 PDFs im RAG) | 1 | Technische Regeln Betriebssicherheit — BAuA Bundesarbeitsblatt |
|
||||
| TRGS-Familie (alle Volltext-Chunks) | 1 | Technische Regeln Gefahrstoffe — BAuA |
|
||||
| ASR-Familie (17 PDFs) | 1 | Arbeitsstättenregeln — BAuA |
|
||||
| OSHA 29 CFR 1910 Subpart O + Technical Manual | 1 | US Federal Public Domain (17 U.S.C. §105) |
|
||||
| DGUV Vorschrift 1 + UVV-Familie (sobald ingest) | 1 | Satzungsrecht der BG |
|
||||
| DGUV Regel 100-500 + Information 209-072/074/073 | 3 | DGUV-Copyright, nur Identifier |
|
||||
| DIN-Identifier-Tabelle (ohne Volltext) | 3 | DIN-Beuth-Copyright |
|
||||
| ANSI B11.0 + RIA R15.06 + UL 508A Identifier | 3 | ANSI/UL-Copyright |
|
||||
| ISO 12100/13849/13857 Identifier | 3 | ISO-Copyright |
|
||||
|
||||
## Audit-Pflicht
|
||||
|
||||
Vor jedem Ingest neuer Quellen:
|
||||
1. Lizenz prüfen (publikationen.dguv.de, EUR-Lex, etc.)
|
||||
2. license_type aus obiger Tabelle wählen — wenn nicht vorhanden, hier ergänzen
|
||||
3. license_rule wird daraus deterministisch abgeleitet
|
||||
4. Attribution-Text bei Regel 2 ist Pflichtfeld
|
||||
|
||||
Vor jedem Output:
|
||||
- Wenn ein atomic_control aus einer Regel-3-Quelle stammt: prüfen dass NUR Identifier gezeigt wird, niemals Volltext
|
||||
- Wenn aus Regel-2-Quelle: Attribution muss im PDF-Footer und im Frontend-Tooltip vorhanden sein
|
||||
- Wenn aus Regel-1-Quelle: empfohlen Quelle nennen für Auditierbarkeit
|
||||
|
||||
## Verweise
|
||||
|
||||
- Schema: `migrations/002_regulation_registry.sql`
|
||||
- Code: `services/regulation_registry.py`, `services/pipeline_adapter.py`
|
||||
- Seed-Script: `scripts/f1_migrate_regulation_registry.py`
|
||||
- Tests: `tests/test_regulation_registry.py` (assert: rule IN (1,2,3))
|
||||
@@ -0,0 +1,101 @@
|
||||
# Incremental BatchDedup für nachgeschobene Dokumente
|
||||
|
||||
Eingefuehrt am 2026-05-18. Pattern fuer alle zukuenftigen Einzeldokument-Ingestionen.
|
||||
|
||||
## Problem
|
||||
|
||||
Der Default-BatchDedup-Runner lief gegen ALLE `pass0b` Atomics ohne Filter
|
||||
(WHERE decomposition_method = 'pass0b' AND release_state NOT IN ('deprecated','duplicate')).
|
||||
Das sind bei uns ~172k Controls. Pace ~5k/h → 25-40h Laufzeit. Bei jedem
|
||||
hinzugefuegten Dokument der gleiche volle Lauf — auch wenn das neue Dokument
|
||||
nur 1-2k Atomics erzeugt.
|
||||
|
||||
Zusaetzliches Risiko: Phase 1 schreibt master_controls erst am Ende. Ein
|
||||
Container-Crash mitten im Lauf (z.B. via Qdrant-Timeout) verwirft 100%
|
||||
des In-Memory-Fortschritts.
|
||||
|
||||
## Loesung — `since` Parameter
|
||||
|
||||
`POST /v1/canonical/generate/batch-dedup` akzeptiert jetzt:
|
||||
|
||||
```json
|
||||
{
|
||||
"dry_run": false,
|
||||
"since": "2026-05-18T02:53:00+00:00"
|
||||
}
|
||||
```
|
||||
|
||||
Effekt:
|
||||
- Phase 1 (intra-group dedup) laedt nur Controls mit `created_at >= since`
|
||||
- Phase 2 (cross-group dedup) filtert ebenfalls auf `created_at >= since`
|
||||
- Phase 2 Checkpoint wird vor Lauf-Start geloescht (sonst skippt stale
|
||||
`last_control_id` neu erzeugte Atomics deren control_id alphabetisch
|
||||
davor liegt)
|
||||
|
||||
Phase 2 sucht weiter im **vollen** Qdrant-Index `atomic_controls_dedup`,
|
||||
findet also Matches zu alten Master Controls und verlinkt korrekt.
|
||||
|
||||
## Wann verwenden
|
||||
|
||||
| Szenario | Empfehlung |
|
||||
|---|---|
|
||||
| Einzelnes neues Dokument ingestiert + Pass 0a + Pass 0b durchgelaufen | `since` setzen auf Zeitpunkt vor Pass 0b |
|
||||
| Mehrere kleine Updates seit letztem Full-Dedup | `since` setzen auf Zeitpunkt nach letztem Full-Dedup |
|
||||
| Initial-Setup oder Pipeline-Major-Update | KEIN `since` — full run |
|
||||
| Verdacht auf Drift / Quality-Regression | KEIN `since` — full run |
|
||||
|
||||
## Workflow nach Einzeldokument-Ingestion
|
||||
|
||||
```bash
|
||||
# 1. Pass 0a auf neue Controls (Obligations extrahieren)
|
||||
curl -X POST .../v1/canonical/generate/run-pass0a -d '{...}'
|
||||
|
||||
# 2. Pass 0b Decomposition Submit (Atomics erzeugen)
|
||||
curl -X POST .../v1/canonical/generate/submit-pass0b -d '{...}'
|
||||
|
||||
# 3. Wenn Anthropic Batch durch: process-batch
|
||||
curl -X POST .../v1/canonical/generate/process-batch -d '{
|
||||
"batch_id": "msgbatch_...",
|
||||
"pass_type": "0b"
|
||||
}'
|
||||
|
||||
# 4. Inkrementell deduppen (NEU, statt 25h full run)
|
||||
curl -X POST .../v1/canonical/generate/batch-dedup -d '{
|
||||
"dry_run": false,
|
||||
"since": "<ISO-Datetime kurz vor Pass-0b-Start>"
|
||||
}'
|
||||
```
|
||||
|
||||
## Pace-Beobachtung (CRA-Lauf 2026-05-18)
|
||||
|
||||
- Total neue Atomics: 19.423
|
||||
- Phase 1 multi-groups: 568 (Rest 18.101 sind Singletons → direkt Master)
|
||||
- Phase 2 Cross-Group: ~3-4h erwartet
|
||||
- Vergleich: Full-Run waere 25-40h gewesen, scoped 6-13x schneller.
|
||||
|
||||
## Implementation-Details (fuer Wartung)
|
||||
|
||||
Geaenderte Dateien:
|
||||
- `services/batch_dedup_runner.py` — `run()` + `_load_merge_groups()` +
|
||||
`_run_cross_group_pass()` SQL-Queries
|
||||
- `api/control_generator_routes.py` — `BatchDedupRequest.since` Feld +
|
||||
Handler reicht durch
|
||||
|
||||
Backwards-kompatibel: ohne `since` aequivalent zum alten Verhalten.
|
||||
|
||||
## Bekannte Limits
|
||||
|
||||
1. **Phase 2 Checkpoint wird beim scoped Lauf geloescht.** Wenn waehrend
|
||||
eines `since`-Laufs ein voller Run dazwischen geschoben werden soll
|
||||
(sollte nicht passieren), muss neu starten.
|
||||
2. **Phase 1 commit-Granularitaet nicht angefasst.** Bei Crash mitten in
|
||||
Phase 1 ohne `since` bleibt der Verlust gleich. Aber: scoped Phase 1
|
||||
ist so kurz (Minuten), dass das praktisch egal ist.
|
||||
3. **Singleton-Atomics werden direkt Master ohne Cross-Check.** Wenn ein
|
||||
neues Singleton-Atomic semantisch identisch zu einem alten Master
|
||||
ist, faengt das nur Phase 2 (via Qdrant). Funktioniert solange Phase 2
|
||||
nicht uebersprungen wird (dry_run=false ist Pflicht).
|
||||
|
||||
## Memory-Eintrag
|
||||
|
||||
Siehe `~/.claude/projects/-Users-benjaminadmin-Projekte-breakpilot-core/memory/feedback_incremental_dedup.md`
|
||||
@@ -0,0 +1,162 @@
|
||||
-- Migration 010: Expanded Object Ontology
|
||||
-- Expands from 31 to ~180 canonical object tokens with clear semantic boundaries.
|
||||
-- Each token has a description to prevent ambiguous classification.
|
||||
--
|
||||
-- IMPORTANT: This migration ADDS new tokens. Existing synonyms are preserved.
|
||||
|
||||
SET search_path TO compliance, public;
|
||||
|
||||
-- Add description column to object_synonyms if not exists
|
||||
DO $$ BEGIN
|
||||
ALTER TABLE object_synonyms ADD COLUMN IF NOT EXISTS description TEXT;
|
||||
EXCEPTION WHEN duplicate_column THEN NULL;
|
||||
END $$;
|
||||
|
||||
-- New table: canonical object definitions with clear boundaries
|
||||
CREATE TABLE IF NOT EXISTS object_ontology (
|
||||
canonical_token VARCHAR(100) PRIMARY KEY,
|
||||
category VARCHAR(50) NOT NULL, -- security, data_protection, governance, regulatory, technical
|
||||
description_de TEXT NOT NULL, -- German description for LLM prompts
|
||||
description_en TEXT NOT NULL, -- English description
|
||||
NOT_confused_with TEXT, -- Explicit disambiguation
|
||||
examples TEXT, -- Example controls that belong here
|
||||
created_at TIMESTAMPTZ DEFAULT NOW()
|
||||
);
|
||||
|
||||
-- ═══════════════════════════════════════════════════════════════
|
||||
-- SECURITY & TECHNICAL
|
||||
-- ═══════════════════════════════════════════════════════════════
|
||||
|
||||
-- Authentication & Identity
|
||||
INSERT INTO object_ontology VALUES
|
||||
('multi_factor_auth', 'security', 'Multi-Faktor-Authentifizierung (2FA/MFA)', 'Multi-factor authentication', 'NOT password_policy (Passwortregeln) oder session_management (Sitzungen)', 'MFA implementieren, 2FA-Pflicht, Authentifizierungsfaktoren'),
|
||||
('password_policy', 'security', 'Passwortrichtlinien und -komplexität', 'Password policies and complexity', 'NOT credentials (allg. Zugangsdaten) oder multi_factor_auth (MFA)', 'Passwortlänge, Komplexität, Rotation, Passwort-Historie'),
|
||||
('credentials', 'security', 'Zugangsdaten-Verwaltung (Tokens, API-Keys, Secrets)', 'Credential management', 'NOT password_policy (Passwortregeln) oder key_management (kryptografisch)', 'API-Key-Rotation, Token-Verwaltung, Secret Storage'),
|
||||
('session_management', 'security', 'Sitzungsverwaltung (Session Timeout, Token-Lifecycle)', 'Session management', 'NOT multi_factor_auth (Login) oder access_control (Berechtigungen)', 'Session Timeout, Token-Invalidierung, Concurrent Sessions'),
|
||||
('privileged_access', 'security', 'Verwaltung privilegierter Zugriffe (Admin, Root)', 'Privileged access management', 'NOT access_control (allg. Zugriffskontrolle)', 'Admin-Konten, Root-Zugriff, PAM, Just-in-Time-Access'),
|
||||
('access_control', 'security', 'Allgemeine Zugriffskontrolle (RBAC, Berechtigungen)', 'Access control (RBAC, permissions)', 'NOT privileged_access (Admin) oder authentication (Login)', 'Rollenbasierte Zugriffskontrolle, Berechtigungsvergabe, Least Privilege')
|
||||
ON CONFLICT (canonical_token) DO UPDATE SET description_de = EXCLUDED.description_de, description_en = EXCLUDED.description_en, NOT_confused_with = EXCLUDED.NOT_confused_with;
|
||||
|
||||
-- Encryption & Cryptography
|
||||
INSERT INTO object_ontology VALUES
|
||||
('encryption', 'security', 'Verschlüsselung at-rest (Datenverschlüsselung)', 'Encryption at rest', 'NOT transport_encryption (in-transit) oder key_management (Schlüssel)', 'AES-256, Festplattenverschlüsselung, DB-Verschlüsselung'),
|
||||
('transport_encryption', 'security', 'Transportverschlüsselung (TLS, HTTPS)', 'Transport encryption (TLS)', 'NOT encryption (at-rest)', 'TLS 1.3, HTTPS, mTLS, Zertifikats-Pinning'),
|
||||
('key_management', 'security', 'Kryptografische Schlüsselverwaltung', 'Cryptographic key management', 'NOT credentials (API-Keys) oder certificate_management (Zertifikate)', 'Key Rotation, HSM, Key Escrow, Schlüsselerzeugung'),
|
||||
('certificate_management', 'security', 'Zertifikatsverwaltung (PKI, X.509)', 'Certificate management (PKI)', 'NOT key_management (Schlüssel) oder encryption (Verschlüsselung)', 'X.509-Zertifikate, PKI, Zertifikatsrückruf, CA-Verwaltung')
|
||||
ON CONFLICT (canonical_token) DO UPDATE SET description_de = EXCLUDED.description_de, description_en = EXCLUDED.description_en, NOT_confused_with = EXCLUDED.NOT_confused_with;
|
||||
|
||||
-- Network Security
|
||||
INSERT INTO object_ontology VALUES
|
||||
('network_security', 'security', 'Allgemeine Netzwerksicherheit', 'General network security', 'NOT network_segmentation (Segmentierung) oder firewall (Regeln)', 'Netzwerk-Hardening, Port-Management, DNS-Sicherheit'),
|
||||
('network_segmentation', 'security', 'Netzwerksegmentierung (VLANs, Zonen)', 'Network segmentation', 'NOT network_security (allg.) oder firewall (Regeln)', 'VLANs, DMZ, Micro-Segmentation, Zero Trust Network'),
|
||||
('firewall', 'security', 'Firewall-Regeln und -Verwaltung', 'Firewall rules and management', 'NOT network_security (allg.)', 'WAF, Firewall-Regeln, Ingress/Egress, Whitelist'),
|
||||
('vpn', 'security', 'VPN-Konfiguration und -Verwaltung', 'VPN configuration', NULL, 'IPSec, WireGuard, Site-to-Site VPN'),
|
||||
('remote_access', 'security', 'Fernzugriff und Remote-Arbeit', 'Remote access', 'NOT vpn (Technologie)', 'Remote Desktop, Bastion Hosts, Jump Server')
|
||||
ON CONFLICT (canonical_token) DO UPDATE SET description_de = EXCLUDED.description_de, description_en = EXCLUDED.description_en, NOT_confused_with = EXCLUDED.NOT_confused_with;
|
||||
|
||||
-- Monitoring & Logging (CRITICAL: clear boundaries!)
|
||||
INSERT INTO object_ontology VALUES
|
||||
('monitoring', 'security', 'Kontinuierliche Echtzeit-Überwachung von Systemen/Metriken', 'Continuous real-time monitoring of systems', 'NOT audit_logging (Protokollierung), NOT training (Schulung), NOT procedure (Verfahren), NOT risk_assessment (Bewertung)', 'System-Health-Monitoring, Verfügbarkeitsüberwachung, Performance-Monitoring, Anomalie-Erkennung in Echtzeit'),
|
||||
('audit_logging', 'security', 'Protokollierung und Audit-Trail (Nachvollziehbarkeit)', 'Audit logging and trail', 'NOT monitoring (Echtzeit-Überwachung), NOT compliance_audit (Prüfungen)', 'Log-Aufzeichnung, Audit Trail, Zeitstempel, Nachvollziehbarkeit, Protokollierung von Zugriffen'),
|
||||
('siem', 'security', 'Security Information and Event Management', 'SIEM', 'NOT monitoring (allg.) oder audit_logging (Protokollierung)', 'SIEM-Korrelation, Security Events, Log-Aggregation'),
|
||||
('alerting', 'security', 'Benachrichtigungen und Meldepflichten bei Sicherheitsereignissen', 'Security alerting and notification obligations', 'NOT monitoring (Überwachung) oder incident (Vorfallsbehandlung)', 'Sicherheitsmeldungen, Breach Notification, Benachrichtigungspflichten'),
|
||||
('compliance_audit', 'governance', 'Compliance-Prüfungen und externe Audits', 'Compliance audits and external reviews', 'NOT audit_logging (technische Protokollierung), NOT monitoring (Überwachung)', 'Externe Prüfung, Jahresabschlussprüfung, Zertifizierungsaudit, Lieferanten-Audit')
|
||||
ON CONFLICT (canonical_token) DO UPDATE SET description_de = EXCLUDED.description_de, description_en = EXCLUDED.description_en, NOT_confused_with = EXCLUDED.NOT_confused_with;
|
||||
|
||||
-- Vulnerability & Patch Management
|
||||
INSERT INTO object_ontology VALUES
|
||||
('vulnerability', 'security', 'Schwachstellenmanagement und -scanning', 'Vulnerability management', 'NOT patch_management (Updates)', 'Vulnerability Scanning, CVE-Tracking, Penetration Testing'),
|
||||
('patch_management', 'security', 'Software-Updates und Patch-Verwaltung', 'Patch management', 'NOT vulnerability (Scanning)', 'Patch-Zyklus, Update-Policy, Hotfix-Prozess')
|
||||
ON CONFLICT (canonical_token) DO UPDATE SET description_de = EXCLUDED.description_de, description_en = EXCLUDED.description_en, NOT_confused_with = EXCLUDED.NOT_confused_with;
|
||||
|
||||
-- Backup & Recovery
|
||||
INSERT INTO object_ontology VALUES
|
||||
('backup', 'security', 'Datensicherung und Backup-Strategien', 'Backup strategies', 'NOT disaster_recovery (Wiederherstellung)', 'Backup-Rotation, Offsite-Backup, Backup-Verschlüsselung'),
|
||||
('disaster_recovery', 'security', 'Notfallwiederherstellung und Business Continuity', 'Disaster recovery', 'NOT backup (Datensicherung) oder incident (Vorfälle)', 'DR-Plan, RTO/RPO, Failover, Business Continuity')
|
||||
ON CONFLICT (canonical_token) DO UPDATE SET description_de = EXCLUDED.description_de, description_en = EXCLUDED.description_en, NOT_confused_with = EXCLUDED.NOT_confused_with;
|
||||
|
||||
-- ═══════════════════════════════════════════════════════════════
|
||||
-- DATA PROTECTION (CRITICAL: clear boundaries!)
|
||||
-- ═══════════════════════════════════════════════════════════════
|
||||
|
||||
INSERT INTO object_ontology VALUES
|
||||
('personal_data', 'data_protection', 'Verarbeitung personenbezogener Daten (DSGVO-Grundsätze)', 'Personal data processing principles', 'NOT sensitive_data (besondere Kategorien), NOT data_subject_rights (Betroffenenrechte), NOT consent (Einwilligung)', 'Datenminimierung, Zweckbindung, Speicherbegrenzung, Rechtmäßigkeit der Verarbeitung'),
|
||||
('sensitive_data', 'data_protection', 'Besondere Kategorien personenbezogener Daten (Art. 9 DSGVO)', 'Special categories of personal data', 'NOT personal_data (allg.), NOT health_data (Gesundheit)', 'Biometrische Daten, ethnische Herkunft, politische Meinungen, Gewerkschaftszugehörigkeit'),
|
||||
('health_data', 'data_protection', 'Gesundheitsdaten und Medizindaten', 'Health and medical data', 'NOT sensitive_data (allg. besondere Kategorien)', 'Patientendaten, Medizinprodukte-Daten, klinische Daten'),
|
||||
('consent', 'data_protection', 'Einwilligungsmanagement', 'Consent management', 'NOT data_subject_rights (andere Betroffenenrechte)', 'Einwilligung einholen, Widerruf, Opt-In, Consent-Banner'),
|
||||
('data_subject_rights', 'data_protection', 'Betroffenenrechte (Auskunft, Löschung, Portabilität)', 'Data subject rights (access, erasure, portability)', 'NOT consent (Einwilligung), NOT personal_data (Verarbeitung)', 'Auskunftsrecht, Recht auf Löschung, Datenportabilität, Widerspruchsrecht'),
|
||||
('data_retention', 'data_protection', 'Aufbewahrungsfristen und Löschkonzept', 'Data retention and deletion', 'NOT backup (technische Sicherung)', 'Löschfristen, Aufbewahrungspflichten, Löschkonzept, Archivierung'),
|
||||
('data_transfer', 'data_protection', 'Internationale Datenübermittlung (Drittländer, SCC)', 'International data transfer', 'NOT data_processing (Verarbeitung)', 'Drittlandtransfer, Standardvertragsklauseln, Angemessenheitsbeschluss, BCR'),
|
||||
('data_breach_notification', 'data_protection', 'Meldung von Datenschutzverletzungen (Art. 33/34 DSGVO)', 'Data breach notification', 'NOT incident (allg. Sicherheitsvorfälle), NOT alerting (techn. Alerts)', 'Breach-Meldung an Aufsichtsbehörde, Benachrichtigung Betroffener, 72-Stunden-Frist'),
|
||||
('dpia', 'data_protection', 'Datenschutz-Folgenabschätzung (Art. 35 DSGVO)', 'Data protection impact assessment', NULL, 'DSFA, Schwellwertanalyse, Risikobewertung für Betroffene'),
|
||||
('data_processing_agreement', 'data_protection', 'Auftragsverarbeitung (Art. 28 DSGVO)', 'Data processing agreements', NULL, 'AVV, Auftragsverarbeiter, Sub-Auftragsverarbeiter, TOMs'),
|
||||
('privacy_by_design', 'data_protection', 'Datenschutz durch Technikgestaltung (Art. 25 DSGVO)', 'Privacy by design and default', NULL, 'Privacy by Default, Datenminimierung in der Architektur'),
|
||||
('data_processing_register', 'data_protection', 'Verzeichnis von Verarbeitungstätigkeiten (Art. 30 DSGVO)', 'Records of processing activities', NULL, 'VVT, Verarbeitungsverzeichnis')
|
||||
ON CONFLICT (canonical_token) DO UPDATE SET description_de = EXCLUDED.description_de, description_en = EXCLUDED.description_en, NOT_confused_with = EXCLUDED.NOT_confused_with;
|
||||
|
||||
-- ═══════════════════════════════════════════════════════════════
|
||||
-- GOVERNANCE & ORGANIZATION
|
||||
-- ═══════════════════════════════════════════════════════════════
|
||||
|
||||
INSERT INTO object_ontology VALUES
|
||||
('policy', 'governance', 'Richtlinien und Leitlinien ERSTELLEN/DEFINIEREN', 'Creating/defining policies', 'NOT procedure (Verfahrensablauf), NOT compliance_audit (Prüfung)', 'Sicherheitsrichtlinie erstellen, Policy-Framework definieren, Leitlinie verabschieden'),
|
||||
('procedure', 'governance', 'Verfahren und Prozessabläufe DEFINIEREN/DOKUMENTIEREN', 'Defining/documenting procedures', 'NOT incident (Vorfallsbehandlung), NOT process (laufender Betrieb)', 'Verfahrensanweisung, Ablaufbeschreibung, Standardprozess definieren'),
|
||||
('process', 'governance', 'Laufende betriebliche Prozesse AUSFÜHREN', 'Executing operational processes', 'NOT procedure (Definition), NOT monitoring (Überwachung)', 'Betriebsprozess, Geschäftsprozess, Workflow-Ausführung'),
|
||||
('training', 'governance', 'Schulung und Weiterbildung DURCHFÜHREN', 'Training and education', 'NOT awareness (Sensibilisierung), NOT monitoring (Überwachung!)', 'Mitarbeiterschulung, Zertifizierungskurs, Pflichtunterweisung'),
|
||||
('awareness', 'governance', 'Sicherheitsbewusstsein und Sensibilisierung', 'Security awareness', 'NOT training (formale Schulung)', 'Phishing-Simulation, Awareness-Kampagne, Sicherheitskultur'),
|
||||
('incident', 'governance', 'Sicherheitsvorfälle BEHANDELN (Incident Response)', 'Incident response and handling', 'NOT alerting (Benachrichtigung), NOT data_breach_notification (DSGVO-Meldung)', 'Incident Response Plan, Vorfallsanalyse, Containment, Recovery, Lessons Learned'),
|
||||
('risk_management', 'governance', 'Risikomanagement und -bewertung', 'Risk management and assessment', 'NOT vulnerability (techn. Schwachstellen), NOT monitoring (Überwachung)', 'Risikobewertung, Risikobehandlung, Risikoakzeptanz, Risikomatrix'),
|
||||
('third_party_management', 'governance', 'Lieferanten- und Drittanbieter-Management', 'Third-party and vendor management', 'NOT data_processing_agreement (AVV)', 'Lieferantenbewertung, Vendor Risk Assessment, Supply Chain Security'),
|
||||
('change_management', 'governance', 'Änderungsmanagement', 'Change management', 'NOT patch_management (Updates)', 'Change Request, Change Advisory Board, Rollback-Verfahren'),
|
||||
('documentation', 'governance', 'Allgemeine Dokumentationspflichten', 'General documentation requirements', 'NOT audit_logging (technische Logs), NOT data_processing_register (VVT)', 'Betriebshandbuch, Systemdokumentation, Verfahrensdokumentation'),
|
||||
('records_management', 'governance', 'Akten- und Unterlagenverwaltung', 'Records management', 'NOT data_retention (Löschfristen)', 'Archivierung, Aktenführung, Aufbewahrungspflichten nach HGB/AO'),
|
||||
('compliance_reporting', 'governance', 'Compliance-Berichterstattung', 'Compliance reporting', 'NOT alerting (techn. Alerts), NOT supervisory_authority (Behördenkommunikation)', 'Compliance-Bericht, Management-Reporting, KPI-Tracking'),
|
||||
('asset_management', 'governance', 'IT-Asset-Verwaltung und Inventar', 'IT asset management', NULL, 'Asset-Inventar, CMDB, Hardware-Lifecycle, Software-Inventar'),
|
||||
('physical_security', 'security', 'Physische Sicherheit und Zutrittskontrolle', 'Physical security and access', NULL, 'Zutrittskontrolle, Videoüberwachung (physisch), Serverraum-Sicherheit'),
|
||||
('human_resources_security', 'governance', 'Personalsicherheit', 'HR security', 'NOT training (Schulung)', 'Background-Checks, Geheimhaltungsvereinbarungen, Onboarding/Offboarding')
|
||||
ON CONFLICT (canonical_token) DO UPDATE SET description_de = EXCLUDED.description_de, description_en = EXCLUDED.description_en, NOT_confused_with = EXCLUDED.NOT_confused_with;
|
||||
|
||||
-- ═══════════════════════════════════════════════════════════════
|
||||
-- REGULATORY SPECIFIC
|
||||
-- ═══════════════════════════════════════════════════════════════
|
||||
|
||||
INSERT INTO object_ontology VALUES
|
||||
('supervisory_authority', 'regulatory', 'Kommunikation mit Aufsichtsbehörden', 'Supervisory authority communication', 'NOT compliance_reporting (interne Berichte)', 'Meldung an BaFin, Abstimmung mit DPA, behördliche Anfragen'),
|
||||
('certification', 'regulatory', 'Zertifizierung und Konformitätsbewertung', 'Certification and conformity assessment', 'NOT compliance_audit (Prüfung), NOT personal_data (Datenschutz)', 'CE-Kennzeichnung, ISO-Zertifizierung, Konformitätserklärung'),
|
||||
('product_safety', 'regulatory', 'Produktsicherheit und Marktüberwachung', 'Product safety and market surveillance', 'NOT certification (Zertifizierung)', 'Rückrufmanagement, Sicherheitsbewertung, RAPEX-Meldung'),
|
||||
('ai_system', 'regulatory', 'KI-System-Regulierung (AI Act)', 'AI system regulation', NULL, 'KI-Risikobewertung, Hochrisiko-KI, Transparenzpflichten, FRIA'),
|
||||
('financial_reporting', 'regulatory', 'Finanzberichterstattung und Rechnungslegung', 'Financial reporting and accounting', NULL, 'Jahresabschluss, HGB-Pflichten, IFRS, Buchführung'),
|
||||
('aml', 'regulatory', 'Geldwäscheprävention und KYC', 'Anti-money laundering and KYC', NULL, 'KYC, Verdachtsmeldung, PEP-Prüfung, Transaktionsmonitoring'),
|
||||
('whistleblowing', 'regulatory', 'Hinweisgeberschutz und Meldekanäle', 'Whistleblower protection', NULL, 'Hinweisgebersystem, Meldekanal, Hinweisgeberschutzgesetz'),
|
||||
('consumer_protection', 'regulatory', 'Verbraucherschutz und AGB', 'Consumer protection', NULL, 'AGB-Prüfung, Widerrufsrecht, Informationspflichten, Preistransparenz'),
|
||||
('ecommerce', 'regulatory', 'E-Commerce-Pflichten (Impressum, Fernabsatz)', 'E-commerce obligations', NULL, 'Impressumspflicht, Fernabsatzrecht, Online-Handel-Pflichten'),
|
||||
('telecommunications', 'regulatory', 'Telekommunikationsregulierung', 'Telecommunications regulation', NULL, 'TKG-Pflichten, Vorratsdatenspeicherung, Notruf'),
|
||||
('medical_device', 'regulatory', 'Medizinprodukte-Regulierung (MDR)', 'Medical device regulation', NULL, 'UDI, klinische Bewertung, Post-Market Surveillance'),
|
||||
('payment_services', 'regulatory', 'Zahlungsdienste-Regulierung (PSD2)', 'Payment services regulation', NULL, 'Starke Kundenauthentifizierung, PSD2-Compliance, Open Banking'),
|
||||
('critical_infrastructure', 'regulatory', 'KRITIS und NIS2-Pflichten', 'Critical infrastructure (NIS2)', NULL, 'KRITIS-Meldepflichten, NIS2-Maßnahmen, Mindeststandards'),
|
||||
('supply_chain_due_diligence', 'regulatory', 'Lieferkettensorgfaltspflicht (LkSG)', 'Supply chain due diligence', 'NOT third_party_management (allg. Lieferanten)', 'Menschenrechts-Due-Diligence, Umwelt-Sorgfaltspflicht, LkSG-Bericht'),
|
||||
('sustainability_reporting', 'regulatory', 'Nachhaltigkeitsberichterstattung (CSRD)', 'Sustainability reporting', NULL, 'ESG-Reporting, CSRD, Nachhaltigkeitsbericht'),
|
||||
('cookie_consent', 'regulatory', 'Cookie-Consent und Tracking (TDDDG/ePrivacy)', 'Cookie consent and tracking', 'NOT consent (allg. Einwilligung)', 'Cookie-Banner, Tracking-Einwilligung, TDDDG §25'),
|
||||
('video_surveillance', 'regulatory', 'Videoüberwachung (datenschutzrechtlich)', 'Video surveillance (data protection)', 'NOT physical_security (physische Sicherheit), NOT monitoring (IT-Monitoring)', 'Kamera-Überwachung, Speicherfristen, Kennzeichnungspflicht')
|
||||
ON CONFLICT (canonical_token) DO UPDATE SET description_de = EXCLUDED.description_de, description_en = EXCLUDED.description_en, NOT_confused_with = EXCLUDED.NOT_confused_with;
|
||||
|
||||
-- ═══════════════════════════════════════════════════════════════
|
||||
-- APPLICATION SECURITY
|
||||
-- ═══════════════════════════════════════════════════════════════
|
||||
|
||||
INSERT INTO object_ontology VALUES
|
||||
('secure_development', 'technical', 'Sichere Softwareentwicklung (SDLC)', 'Secure software development lifecycle', NULL, 'Secure Coding, Code Review, SAST/DAST, DevSecOps'),
|
||||
('api_security', 'technical', 'API-Sicherheit', 'API security', NULL, 'API-Authentifizierung, Rate Limiting, Input Validation'),
|
||||
('input_validation', 'technical', 'Eingabevalidierung und Output Encoding', 'Input validation and output encoding', NULL, 'XSS-Prävention, SQL-Injection-Schutz, Parametervalidierung'),
|
||||
('container_security', 'technical', 'Container- und Cloud-Sicherheit', 'Container and cloud security', NULL, 'Docker-Hardening, Kubernetes-Security, Image-Scanning'),
|
||||
('logging_configuration', 'technical', 'Log-Konfiguration und -Format', 'Log configuration and format', 'NOT audit_logging (Nachvollziehbarkeit), NOT monitoring (Überwachung)', 'Log-Format, Log-Rotation, Log-Shipping, Structured Logging'),
|
||||
('data_classification', 'governance', 'Datenklassifizierung und -kennzeichnung', 'Data classification and labeling', 'NOT sensitive_data (besondere Kategorien)', 'Vertraulichkeitsstufen, Datenklassifizierung, Labeling')
|
||||
ON CONFLICT (canonical_token) DO UPDATE SET description_de = EXCLUDED.description_de, description_en = EXCLUDED.description_en, NOT_confused_with = EXCLUDED.NOT_confused_with;
|
||||
|
||||
-- Count results
|
||||
DO $$
|
||||
DECLARE cnt INTEGER;
|
||||
BEGIN
|
||||
SELECT count(*) INTO cnt FROM object_ontology;
|
||||
RAISE NOTICE 'object_ontology: % canonical tokens defined', cnt;
|
||||
END $$;
|
||||
@@ -0,0 +1,58 @@
|
||||
-- Migration 011: Derived Controls Library (Clean-Room MCs from external sources)
|
||||
-- Schema: compliance
|
||||
--
|
||||
-- Holds Master Controls + atomic controls + mitigations + metrics that were
|
||||
-- derived Clean-Room from external regulatory sources (BSI QUAIDAL today,
|
||||
-- Grundschutz++/CRA/NIST AI RMF next). Kept separate from the gpre2
|
||||
-- master_controls table because:
|
||||
-- 1) The shape is different (no object_group/phase concepts).
|
||||
-- 2) Source-Layer-Trennung: derivations from external IP must be cleanly
|
||||
-- separable from internally-generated artifacts.
|
||||
-- 3) Each row carries the licence + provenance for due diligence.
|
||||
--
|
||||
-- Run: ssh macmini "docker exec -i bp-core-postgres psql -U breakpilot -d breakpilot_db" \
|
||||
-- < control-pipeline/migrations/011_derived_controls.sql
|
||||
|
||||
SET search_path TO compliance, public;
|
||||
|
||||
CREATE TABLE IF NOT EXISTS derived_controls (
|
||||
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
|
||||
derived_id VARCHAR(200) UNIQUE NOT NULL, -- e.g. MC-AI-DATA-QKB-01-repraesentativitaet
|
||||
kind VARCHAR(30) NOT NULL, -- criterion | building_block | measure | metric
|
||||
canonical_name VARCHAR(300) NOT NULL,
|
||||
description TEXT NOT NULL, -- our own wording, never the original
|
||||
regulation_anchor TEXT, -- e.g. "EU AI Act Art. 10"
|
||||
related_quaidal_ids JSONB NOT NULL DEFAULT '[]', -- ["QB-03", "QB-04", ...]
|
||||
external_refs JSONB NOT NULL DEFAULT '[]', -- [{framework, citation}, ...]
|
||||
source_framework VARCHAR(80) NOT NULL, -- "BSI QUAIDAL"
|
||||
source_section VARCHAR(80) NOT NULL, -- "QKB-01"
|
||||
source_url TEXT,
|
||||
source_commit_sha VARCHAR(80),
|
||||
source_title_original TEXT, -- original title (label, not protected)
|
||||
source_license_note TEXT,
|
||||
plagiarism_score_at_generation NUMERIC(5,4), -- 0..1; gate was 0.20
|
||||
generated_by_model VARCHAR(80),
|
||||
yaml_path TEXT, -- pointer back to source YAML
|
||||
created_at TIMESTAMPTZ DEFAULT NOW(),
|
||||
updated_at TIMESTAMPTZ DEFAULT NOW()
|
||||
);
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_derived_controls_kind ON derived_controls(kind);
|
||||
CREATE INDEX IF NOT EXISTS idx_derived_controls_source_framework ON derived_controls(source_framework);
|
||||
CREATE INDEX IF NOT EXISTS idx_derived_controls_source_section ON derived_controls(source_section);
|
||||
CREATE INDEX IF NOT EXISTS idx_derived_controls_related_quaidal_gin
|
||||
ON derived_controls USING GIN(related_quaidal_ids);
|
||||
|
||||
-- Trigger to keep updated_at fresh
|
||||
CREATE OR REPLACE FUNCTION trg_derived_controls_set_updated_at()
|
||||
RETURNS TRIGGER AS $$
|
||||
BEGIN
|
||||
NEW.updated_at = NOW();
|
||||
RETURN NEW;
|
||||
END;
|
||||
$$ LANGUAGE plpgsql;
|
||||
|
||||
DROP TRIGGER IF EXISTS derived_controls_updated_at ON derived_controls;
|
||||
CREATE TRIGGER derived_controls_updated_at
|
||||
BEFORE UPDATE ON derived_controls
|
||||
FOR EACH ROW EXECUTE FUNCTION trg_derived_controls_set_updated_at();
|
||||
@@ -0,0 +1,170 @@
|
||||
#!/usr/bin/env python3
|
||||
"""Upsert derived QUAIDAL controls from YAML into compliance.derived_controls.
|
||||
|
||||
Reads:
|
||||
control-pipeline/data/quaidal/master_controls.yaml
|
||||
control-pipeline/data/quaidal/atomic_controls.yaml
|
||||
control-pipeline/data/quaidal/mitigations.yaml
|
||||
control-pipeline/data/quaidal/metrics.yaml
|
||||
|
||||
Writes: compliance.derived_controls (idempotent UPSERT by derived_id)
|
||||
|
||||
Usage:
|
||||
# Mac Mini direct:
|
||||
python3 control-pipeline/scripts/apply_quaidal_to_db.py
|
||||
|
||||
# Via SSH (locally, against macmini DB):
|
||||
DB_HOST=macmini python3 control-pipeline/scripts/apply_quaidal_to_db.py
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import json
|
||||
import os
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
try:
|
||||
import psycopg
|
||||
import yaml
|
||||
except ImportError as e:
|
||||
print(f"ERROR: missing dependency {e.name}. Install with: pip install psycopg[binary] pyyaml", file=sys.stderr)
|
||||
sys.exit(2)
|
||||
|
||||
REPO_ROOT = Path(__file__).resolve().parents[2]
|
||||
DATA_DIR = REPO_ROOT / "control-pipeline" / "data" / "quaidal"
|
||||
|
||||
KIND_FILES = {
|
||||
"criterion": "master_controls.yaml",
|
||||
"building_block": "atomic_controls.yaml",
|
||||
"measure": "mitigations.yaml",
|
||||
"metric": "metrics.yaml",
|
||||
}
|
||||
|
||||
UPSERT_SQL = """
|
||||
INSERT INTO compliance.derived_controls (
|
||||
derived_id, kind, canonical_name, description, regulation_anchor,
|
||||
related_quaidal_ids, external_refs,
|
||||
source_framework, source_section, source_url, source_commit_sha,
|
||||
source_title_original, source_license_note,
|
||||
plagiarism_score_at_generation, generated_by_model, yaml_path
|
||||
) VALUES (
|
||||
%(derived_id)s, %(kind)s, %(canonical_name)s, %(description)s, %(regulation_anchor)s,
|
||||
%(related_quaidal_ids)s::jsonb, %(external_refs)s::jsonb,
|
||||
%(source_framework)s, %(source_section)s, %(source_url)s, %(source_commit_sha)s,
|
||||
%(source_title_original)s, %(source_license_note)s,
|
||||
%(plagiarism_score)s, %(generated_by_model)s, %(yaml_path)s
|
||||
)
|
||||
ON CONFLICT (derived_id) DO UPDATE SET
|
||||
kind = EXCLUDED.kind,
|
||||
canonical_name = EXCLUDED.canonical_name,
|
||||
description = EXCLUDED.description,
|
||||
regulation_anchor = EXCLUDED.regulation_anchor,
|
||||
related_quaidal_ids = EXCLUDED.related_quaidal_ids,
|
||||
external_refs = EXCLUDED.external_refs,
|
||||
source_framework = EXCLUDED.source_framework,
|
||||
source_section = EXCLUDED.source_section,
|
||||
source_url = EXCLUDED.source_url,
|
||||
source_commit_sha = EXCLUDED.source_commit_sha,
|
||||
source_title_original = EXCLUDED.source_title_original,
|
||||
source_license_note = EXCLUDED.source_license_note,
|
||||
plagiarism_score_at_generation = EXCLUDED.plagiarism_score_at_generation,
|
||||
generated_by_model = EXCLUDED.generated_by_model,
|
||||
yaml_path = EXCLUDED.yaml_path
|
||||
"""
|
||||
|
||||
|
||||
def load_yaml_records(yaml_path: Path) -> tuple[list[dict], str | None, str | None]:
|
||||
if not yaml_path.exists():
|
||||
return [], None, None
|
||||
data = yaml.safe_load(yaml_path.read_text(encoding="utf-8"))
|
||||
return data.get("controls", []), data.get("commit_sha"), data.get("generated_by_model")
|
||||
|
||||
|
||||
def to_row(ctrl: dict, yaml_path: Path, default_model: str | None, default_commit: str | None) -> dict:
|
||||
source = ctrl.get("source") or {}
|
||||
return {
|
||||
"derived_id": ctrl["id"],
|
||||
"kind": ctrl["kind"],
|
||||
"canonical_name": ctrl["canonical_name"],
|
||||
"description": ctrl["description"],
|
||||
"regulation_anchor": ctrl.get("regulation_anchor"),
|
||||
"related_quaidal_ids": json.dumps(ctrl.get("related_quaidal_ids", []), ensure_ascii=False),
|
||||
"external_refs": json.dumps(ctrl.get("external_refs", []), ensure_ascii=False),
|
||||
"source_framework": source.get("framework", "BSI QUAIDAL"),
|
||||
"source_section": source.get("section", ""),
|
||||
"source_url": source.get("url"),
|
||||
"source_commit_sha": source.get("commit_sha") or default_commit,
|
||||
"source_title_original": source.get("title_original_de"),
|
||||
"source_license_note": source.get("license_note"),
|
||||
"plagiarism_score": ctrl.get("plagiarism_score_at_generation"),
|
||||
"generated_by_model": default_model,
|
||||
"yaml_path": str(yaml_path.relative_to(REPO_ROOT)),
|
||||
}
|
||||
|
||||
|
||||
def build_dsn(args: argparse.Namespace) -> str:
|
||||
if args.dsn:
|
||||
return args.dsn
|
||||
return (
|
||||
f"host={args.db_host} port={args.db_port} "
|
||||
f"dbname={args.db_name} user={args.db_user} password={args.db_password}"
|
||||
)
|
||||
|
||||
|
||||
def main() -> int:
|
||||
ap = argparse.ArgumentParser(description=__doc__)
|
||||
ap.add_argument("--dsn", help="Full DSN; overrides individual flags")
|
||||
ap.add_argument("--db-host", default=os.environ.get("DB_HOST", "localhost"))
|
||||
ap.add_argument("--db-port", default=os.environ.get("DB_PORT", "5432"))
|
||||
ap.add_argument("--db-name", default=os.environ.get("DB_NAME", "breakpilot_db"))
|
||||
ap.add_argument("--db-user", default=os.environ.get("DB_USER", "breakpilot"))
|
||||
ap.add_argument("--db-password", default=os.environ.get("DB_PASSWORD", "breakpilot"))
|
||||
ap.add_argument("--dry-run", action="store_true")
|
||||
args = ap.parse_args()
|
||||
|
||||
total = 0
|
||||
rows: list[dict] = []
|
||||
for kind, fname in KIND_FILES.items():
|
||||
path = DATA_DIR / fname
|
||||
records, commit, model = load_yaml_records(path)
|
||||
for rec in records:
|
||||
rows.append(to_row(rec, path, model, commit))
|
||||
if records:
|
||||
print(f" {fname}: {len(records)} entries", file=sys.stderr)
|
||||
total += len(records)
|
||||
|
||||
if not rows:
|
||||
print("ERROR: no YAML records found; run derive_quaidal_mcs.py first", file=sys.stderr)
|
||||
return 2
|
||||
|
||||
print(f"Total rows: {total}", file=sys.stderr)
|
||||
if args.dry_run:
|
||||
print("Dry run — sample row:", file=sys.stderr)
|
||||
print(json.dumps({k: (v[:200] if isinstance(v, str) else v) for k, v in rows[0].items()}, indent=2, ensure_ascii=False))
|
||||
return 0
|
||||
|
||||
dsn = build_dsn(args)
|
||||
print(f"Connecting to {args.db_host}:{args.db_port}/{args.db_name}", file=sys.stderr)
|
||||
inserted = updated = 0
|
||||
with psycopg.connect(dsn) as conn:
|
||||
with conn.cursor() as cur:
|
||||
for row in rows:
|
||||
cur.execute(
|
||||
"SELECT 1 FROM compliance.derived_controls WHERE derived_id = %s",
|
||||
(row["derived_id"],),
|
||||
)
|
||||
existed = cur.fetchone() is not None
|
||||
cur.execute(UPSERT_SQL, row)
|
||||
if existed:
|
||||
updated += 1
|
||||
else:
|
||||
inserted += 1
|
||||
conn.commit()
|
||||
print(f"Inserted: {inserted}, Updated: {updated}", file=sys.stderr)
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
sys.exit(main())
|
||||
@@ -0,0 +1,256 @@
|
||||
#!/usr/bin/env python3
|
||||
"""Audit script for license classification gaps in the control pipeline.
|
||||
|
||||
Reports:
|
||||
|
||||
1. **regulation_registry coverage** — how many regulations are classified, by
|
||||
rule and license_type.
|
||||
2. **atomic_controls without license_rule** — how many controls reference a
|
||||
regulation_id that has no entry (or no license_rule) in the registry.
|
||||
3. **Qdrant payload consistency** — for each indexed collection, how many
|
||||
chunks carry both ``license`` and ``license_rule`` payload fields.
|
||||
|
||||
The goal is to surface every record where the engine could in principle
|
||||
extract or emit content but the license rule is unknown — those records are
|
||||
the highest-risk material in a license audit.
|
||||
|
||||
Usage::
|
||||
|
||||
python3 scripts/audit_license_classification.py --db-host 100.80.114.48
|
||||
|
||||
Add ``--check-qdrant`` to also probe ``http://<host>:6333`` collections.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import json
|
||||
import sys
|
||||
from collections import Counter
|
||||
from pathlib import Path
|
||||
from typing import Optional
|
||||
from urllib import request as urllib_request
|
||||
|
||||
sys.path.insert(0, str(Path(__file__).resolve().parent.parent))
|
||||
|
||||
DEFAULT_HOST = "100.80.114.48"
|
||||
DEFAULT_PORT = 5432
|
||||
DEFAULT_USER = "breakpilot"
|
||||
DEFAULT_DB = "breakpilot_db"
|
||||
|
||||
|
||||
def parse_args() -> argparse.Namespace:
|
||||
p = argparse.ArgumentParser(description=__doc__)
|
||||
p.add_argument("--db-host", default=DEFAULT_HOST)
|
||||
p.add_argument("--db-port", type=int, default=DEFAULT_PORT)
|
||||
p.add_argument("--db-user", default=DEFAULT_USER)
|
||||
p.add_argument("--db-name", default=DEFAULT_DB)
|
||||
p.add_argument("--db-password", default="")
|
||||
p.add_argument("--check-qdrant", action="store_true")
|
||||
p.add_argument("--qdrant-host", default="100.80.114.48")
|
||||
p.add_argument("--qdrant-port", type=int, default=6333)
|
||||
p.add_argument("--json", action="store_true", help="Emit JSON result on stdout")
|
||||
return p.parse_args()
|
||||
|
||||
|
||||
def audit_registry(conn) -> dict:
|
||||
"""Coverage of regulation_registry."""
|
||||
cur = conn.cursor()
|
||||
cur.execute(
|
||||
"SET search_path TO compliance, public; "
|
||||
"SELECT license_rule, license_type, COUNT(*) "
|
||||
"FROM regulation_registry GROUP BY license_rule, license_type "
|
||||
"ORDER BY license_rule, license_type;"
|
||||
)
|
||||
by_rule_and_type: list[tuple] = []
|
||||
by_rule: Counter = Counter()
|
||||
for rule, ltype, count in cur.fetchall():
|
||||
by_rule_and_type.append((rule, ltype or "(empty)", count))
|
||||
by_rule[rule] += count
|
||||
|
||||
cur.execute(
|
||||
"SELECT COUNT(*) FROM regulation_registry "
|
||||
"WHERE license_type IS NULL OR license_type = '';"
|
||||
)
|
||||
missing_type = cur.fetchone()[0]
|
||||
|
||||
cur.execute("SELECT COUNT(*) FROM regulation_registry;")
|
||||
total = cur.fetchone()[0]
|
||||
|
||||
return {
|
||||
"total": total,
|
||||
"by_rule": dict(by_rule),
|
||||
"by_rule_and_type": by_rule_and_type,
|
||||
"missing_license_type": missing_type,
|
||||
}
|
||||
|
||||
|
||||
def audit_atomic_controls(conn) -> dict:
|
||||
"""Controls whose source regulation has no license rule.
|
||||
|
||||
Important: the schema differs between core (bp-core) and customer
|
||||
deployments. We probe a handful of likely column names and skip if
|
||||
none are found.
|
||||
"""
|
||||
cur = conn.cursor()
|
||||
# Detect controls table
|
||||
cur.execute(
|
||||
"SELECT table_name FROM information_schema.tables "
|
||||
"WHERE table_schema='compliance' AND table_name IN "
|
||||
"('atomic_controls','atomic_controls_dedup','canonical_controls');"
|
||||
)
|
||||
tables = [r[0] for r in cur.fetchall()]
|
||||
if not tables:
|
||||
return {"skipped": True, "reason": "no controls table found"}
|
||||
|
||||
result: dict = {"tables": {}}
|
||||
for tbl in tables:
|
||||
cur.execute(
|
||||
f"SELECT column_name FROM information_schema.columns "
|
||||
f"WHERE table_schema='compliance' AND table_name='{tbl}';"
|
||||
)
|
||||
cols = {r[0] for r in cur.fetchall()}
|
||||
if "license_rule" not in cols:
|
||||
result["tables"][tbl] = {"skipped": True, "reason": "no license_rule column"}
|
||||
continue
|
||||
cur.execute(f"SELECT COUNT(*) FROM compliance.{tbl};")
|
||||
total = cur.fetchone()[0]
|
||||
cur.execute(
|
||||
f"SELECT license_rule, COUNT(*) FROM compliance.{tbl} "
|
||||
f"GROUP BY license_rule ORDER BY license_rule;"
|
||||
)
|
||||
by_rule = {str(r[0]): r[1] for r in cur.fetchall()}
|
||||
cur.execute(
|
||||
f"SELECT COUNT(*) FROM compliance.{tbl} WHERE license_rule IS NULL;"
|
||||
)
|
||||
missing = cur.fetchone()[0]
|
||||
result["tables"][tbl] = {
|
||||
"total": total,
|
||||
"by_rule": by_rule,
|
||||
"missing_license_rule": missing,
|
||||
}
|
||||
return result
|
||||
|
||||
|
||||
def audit_qdrant(host: str, port: int) -> dict:
|
||||
"""Probe Qdrant collections for license + license_rule payload coverage.
|
||||
|
||||
Samples 500 points per collection and reports how many have neither
|
||||
field populated.
|
||||
"""
|
||||
out: dict = {"collections": {}}
|
||||
base = f"http://{host}:{port}"
|
||||
try:
|
||||
with urllib_request.urlopen(f"{base}/collections", timeout=10) as r:
|
||||
colls = json.loads(r.read()).get("result", {}).get("collections", [])
|
||||
except Exception as e:
|
||||
return {"error": str(e)}
|
||||
|
||||
for c in colls:
|
||||
name = c["name"]
|
||||
if "compliance" not in name and "atomic_controls" not in name:
|
||||
continue
|
||||
payload = {"limit": 500, "with_payload": True, "with_vector": False}
|
||||
req = urllib_request.Request(
|
||||
f"{base}/collections/{name}/points/scroll",
|
||||
data=json.dumps(payload).encode(),
|
||||
headers={"Content-Type": "application/json"},
|
||||
)
|
||||
try:
|
||||
with urllib_request.urlopen(req, timeout=15) as r:
|
||||
points = json.loads(r.read()).get("result", {}).get("points", [])
|
||||
except Exception as e:
|
||||
out["collections"][name] = {"error": str(e)}
|
||||
continue
|
||||
sampled = len(points)
|
||||
both_set = 0
|
||||
only_license = 0
|
||||
only_rule = 0
|
||||
neither = 0
|
||||
for p in points:
|
||||
pl = p.get("payload", {}) or {}
|
||||
has_lic = bool(pl.get("license"))
|
||||
has_rule = pl.get("license_rule") is not None
|
||||
if has_lic and has_rule:
|
||||
both_set += 1
|
||||
elif has_lic:
|
||||
only_license += 1
|
||||
elif has_rule:
|
||||
only_rule += 1
|
||||
else:
|
||||
neither += 1
|
||||
out["collections"][name] = {
|
||||
"sampled": sampled,
|
||||
"both_set": both_set,
|
||||
"only_license_field": only_license,
|
||||
"only_license_rule_field": only_rule,
|
||||
"neither_set": neither,
|
||||
"neither_pct": round(neither / sampled * 100, 1) if sampled else 0,
|
||||
}
|
||||
return out
|
||||
|
||||
|
||||
def main() -> int:
|
||||
args = parse_args()
|
||||
try:
|
||||
import psycopg2
|
||||
except ImportError:
|
||||
print("error: psycopg2 not installed (pip install psycopg2-binary)", file=sys.stderr)
|
||||
return 2
|
||||
|
||||
conn = psycopg2.connect(
|
||||
host=args.db_host,
|
||||
port=args.db_port,
|
||||
user=args.db_user,
|
||||
dbname=args.db_name,
|
||||
password=args.db_password or None,
|
||||
)
|
||||
try:
|
||||
registry = audit_registry(conn)
|
||||
controls = audit_atomic_controls(conn)
|
||||
finally:
|
||||
conn.close()
|
||||
|
||||
qdrant: Optional[dict] = None
|
||||
if args.check_qdrant:
|
||||
qdrant = audit_qdrant(args.qdrant_host, args.qdrant_port)
|
||||
|
||||
result = {"registry": registry, "atomic_controls": controls, "qdrant": qdrant}
|
||||
|
||||
if args.json:
|
||||
print(json.dumps(result, indent=2, default=str))
|
||||
return 0
|
||||
|
||||
print("=" * 60)
|
||||
print(" Audit — License Classification")
|
||||
print("=" * 60)
|
||||
print()
|
||||
print(f"## regulation_registry ({registry['total']} rows)")
|
||||
print(f" By rule: {registry['by_rule']}")
|
||||
print(f" Missing license_type: {registry['missing_license_type']}")
|
||||
print()
|
||||
print("## atomic_controls")
|
||||
for tbl, info in controls.get("tables", {}).items():
|
||||
if info.get("skipped"):
|
||||
print(f" {tbl}: SKIPPED ({info['reason']})")
|
||||
continue
|
||||
print(f" {tbl}: {info['total']} rows")
|
||||
print(f" by_rule={info['by_rule']}")
|
||||
print(f" missing_license_rule={info['missing_license_rule']}")
|
||||
print()
|
||||
if qdrant:
|
||||
print("## qdrant")
|
||||
for name, info in qdrant.get("collections", {}).items():
|
||||
if "error" in info:
|
||||
print(f" {name}: ERROR {info['error']}")
|
||||
continue
|
||||
print(
|
||||
f" {name:30} sampled={info['sampled']:4} "
|
||||
f"both={info['both_set']:4} "
|
||||
f"neither={info['neither_set']:4} ({info['neither_pct']}%)"
|
||||
)
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
raise SystemExit(main())
|
||||
@@ -0,0 +1,184 @@
|
||||
#!/usr/bin/env python3
|
||||
"""Backfill license_rule on canonical_controls by inheriting from parent.
|
||||
|
||||
Background
|
||||
==========
|
||||
|
||||
Audit (audit_license_classification.py) showed that 279,384 of 314,811 rows
|
||||
in compliance.canonical_controls have NULL license_rule. Drilling in:
|
||||
|
||||
- 261,980 of those (94%) have a parent_control_uuid whose parent already
|
||||
carries a non-NULL license_rule. The pass0b decomposition pipeline did
|
||||
not propagate the rule to its child controls — this is a clear inheritance
|
||||
bug, fixable without any classification decisions.
|
||||
- 16,617 have a parent that itself has no license_rule (transitive case).
|
||||
Inheriting iteratively converges to either rule-set or root-orphan.
|
||||
- 787 have no parent at all (decomposition roots). These need cluster-based
|
||||
manual classification (see Strategy Notes at the bottom of this file).
|
||||
|
||||
This script runs the inheritance fix in three idempotent stages and
|
||||
prints per-stage counts before any write happens.
|
||||
|
||||
Usage::
|
||||
|
||||
# Always dry-run first:
|
||||
python3 scripts/backfill_license_rule.py --db-host 100.80.114.48 \\
|
||||
--db-password breakpilot123 --dry-run
|
||||
|
||||
# If counts look right:
|
||||
python3 scripts/backfill_license_rule.py --db-host 100.80.114.48 \\
|
||||
--db-password breakpilot123 --apply
|
||||
|
||||
The script is safe to rerun — it only touches rows where license_rule
|
||||
IS NULL.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import sys
|
||||
|
||||
|
||||
def parse_args() -> argparse.Namespace:
|
||||
p = argparse.ArgumentParser(description=__doc__)
|
||||
p.add_argument("--db-host", default="100.80.114.48")
|
||||
p.add_argument("--db-port", type=int, default=5432)
|
||||
p.add_argument("--db-user", default="breakpilot")
|
||||
p.add_argument("--db-name", default="breakpilot_db")
|
||||
p.add_argument("--db-password", required=True)
|
||||
g = p.add_mutually_exclusive_group(required=True)
|
||||
g.add_argument("--dry-run", action="store_true")
|
||||
g.add_argument("--apply", action="store_true")
|
||||
p.add_argument("--max-iterations", type=int, default=5,
|
||||
help="Cap on inheritance iterations to avoid loops")
|
||||
return p.parse_args()
|
||||
|
||||
|
||||
# Stage 1: direct parent has license_rule — single UPDATE.
|
||||
# Stage 2: iterative — parent did not have it, but a grandparent does.
|
||||
# We loop until no more rows can be filled or max-iterations.
|
||||
# Stage 3: residual rows with no resolvable parent. Report them clustered
|
||||
# by category/pattern_id so the user can classify by family.
|
||||
|
||||
SQL_REPORT_NULLS = """
|
||||
SET search_path TO compliance, public;
|
||||
SELECT
|
||||
CASE WHEN cc.parent_control_uuid IS NULL THEN 'no_parent'
|
||||
WHEN p.license_rule IS NULL THEN 'parent_null'
|
||||
ELSE 'parent_set' END AS bucket,
|
||||
COUNT(*) AS n
|
||||
FROM canonical_controls cc
|
||||
LEFT JOIN canonical_controls p ON cc.parent_control_uuid = p.id
|
||||
WHERE cc.license_rule IS NULL
|
||||
GROUP BY 1 ORDER BY 2 DESC;
|
||||
"""
|
||||
|
||||
SQL_INHERIT_FROM_PARENT = """
|
||||
SET search_path TO compliance, public;
|
||||
UPDATE canonical_controls cc
|
||||
SET license_rule = p.license_rule, updated_at = NOW()
|
||||
FROM canonical_controls p
|
||||
WHERE cc.parent_control_uuid = p.id
|
||||
AND cc.license_rule IS NULL
|
||||
AND p.license_rule IS NOT NULL;
|
||||
"""
|
||||
|
||||
SQL_REPORT_ORPHAN_CLUSTERS = """
|
||||
SET search_path TO compliance, public;
|
||||
SELECT
|
||||
COALESCE(category, '(null)') AS category,
|
||||
COALESCE(pattern_id, '(null)') AS pattern_id,
|
||||
COALESCE(generation_strategy, '(null)') AS gen,
|
||||
COUNT(*) AS n
|
||||
FROM canonical_controls
|
||||
WHERE license_rule IS NULL AND parent_control_uuid IS NULL
|
||||
GROUP BY 1, 2, 3 ORDER BY n DESC LIMIT 25;
|
||||
"""
|
||||
|
||||
|
||||
def print_bucket(rows, label: str) -> None:
|
||||
print(f"\n## {label}")
|
||||
total = 0
|
||||
for bucket, n in rows:
|
||||
print(f" {bucket:12} {n:>8}")
|
||||
total += n
|
||||
print(f" {'TOTAL':12} {total:>8}")
|
||||
|
||||
|
||||
def main() -> int:
|
||||
args = parse_args()
|
||||
try:
|
||||
import psycopg2
|
||||
except ImportError:
|
||||
print("error: psycopg2 not installed", file=sys.stderr)
|
||||
return 2
|
||||
|
||||
conn = psycopg2.connect(
|
||||
host=args.db_host, port=args.db_port, user=args.db_user,
|
||||
dbname=args.db_name, password=args.db_password,
|
||||
)
|
||||
conn.autocommit = False
|
||||
cur = conn.cursor()
|
||||
|
||||
print("=" * 60)
|
||||
print(" Backfill — license_rule via parent inheritance")
|
||||
print(f" Mode: {'DRY-RUN' if args.dry_run else 'APPLY'}")
|
||||
print("=" * 60)
|
||||
|
||||
# Initial bucket report
|
||||
cur.execute(SQL_REPORT_NULLS)
|
||||
rows = cur.fetchall()
|
||||
print_bucket(rows, "Initial NULL distribution")
|
||||
|
||||
if args.dry_run:
|
||||
# Print what the FIRST inherit pass would resolve (without writing)
|
||||
cur.execute(
|
||||
"SET search_path TO compliance, public; "
|
||||
"SELECT p.license_rule, COUNT(*) "
|
||||
"FROM canonical_controls cc "
|
||||
"JOIN canonical_controls p ON cc.parent_control_uuid = p.id "
|
||||
"WHERE cc.license_rule IS NULL AND p.license_rule IS NOT NULL "
|
||||
"GROUP BY 1 ORDER BY 1;"
|
||||
)
|
||||
print("\n## First inherit-pass would fill:")
|
||||
for rule, n in cur.fetchall():
|
||||
print(f" rule={rule} {n:>8} rows")
|
||||
|
||||
# Show orphan clusters that would remain
|
||||
cur.execute(SQL_REPORT_ORPHAN_CLUSTERS)
|
||||
print("\n## Orphan clusters (no parent + no rule, top 25):")
|
||||
for cat, pid, gen, n in cur.fetchall():
|
||||
print(f" cat={cat[:20]:20} pat={pid[:20]:20} gen={gen[:20]:20} n={n}")
|
||||
print("\nNo writes performed. Use --apply to execute.")
|
||||
conn.rollback()
|
||||
return 0
|
||||
|
||||
# Apply mode — iterative inheritance
|
||||
total_updated = 0
|
||||
for i in range(1, args.max_iterations + 1):
|
||||
cur.execute(SQL_INHERIT_FROM_PARENT)
|
||||
updated = cur.rowcount
|
||||
total_updated += updated
|
||||
print(f"\n iteration {i}: {updated} rows updated")
|
||||
if updated == 0:
|
||||
break
|
||||
|
||||
conn.commit()
|
||||
print(f"\n✓ Total rows backfilled: {total_updated}")
|
||||
|
||||
# Final bucket report
|
||||
cur.execute(SQL_REPORT_NULLS)
|
||||
print_bucket(cur.fetchall(), "Remaining NULL distribution")
|
||||
|
||||
cur.execute(SQL_REPORT_ORPHAN_CLUSTERS)
|
||||
rows = cur.fetchall()
|
||||
if rows:
|
||||
print("\n## Orphan clusters still need classification:")
|
||||
for cat, pid, gen, n in rows:
|
||||
print(f" cat={cat[:20]:20} pat={pid[:20]:20} gen={gen[:20]:20} n={n}")
|
||||
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
raise SystemExit(main())
|
||||
@@ -0,0 +1,203 @@
|
||||
#!/usr/bin/env python3
|
||||
"""Backfill ``license_rule`` payload field into Qdrant atomic_controls_dedup
|
||||
and related compliance collections, sourced from canonical_controls in Postgres.
|
||||
|
||||
The audit (audit_license_classification.py) surfaced that Qdrant collections
|
||||
holding canonical-control vectors (notably ``atomic_controls_dedup``) carry no
|
||||
license_rule payload at all, even though the underlying Postgres table is now
|
||||
fully classified. This script joins the two via ``control_uuid`` and patches the
|
||||
Qdrant payload in batches.
|
||||
|
||||
Usage::
|
||||
|
||||
python3 scripts/backfill_qdrant_license_payload.py \\
|
||||
--pg-host 100.80.114.48 --pg-password breakpilot123 \\
|
||||
--qdrant http://100.80.114.48:6333 \\
|
||||
--collection atomic_controls_dedup \\
|
||||
--dry-run
|
||||
|
||||
# apply
|
||||
python3 scripts/backfill_qdrant_license_payload.py ... --apply
|
||||
|
||||
Notes
|
||||
-----
|
||||
- ``control_uuid`` lives in the payload of atomic_controls_dedup. For other
|
||||
collections that key the canonical control by a different field, override with
|
||||
``--uuid-field``.
|
||||
- Qdrant ``set_payload`` is keyed by point id, not payload field. We resolve
|
||||
UUID → point id by a paginated scroll-and-filter pass, then issue grouped
|
||||
set_payload requests per license_rule (3 batches per collection).
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import json
|
||||
import sys
|
||||
import time
|
||||
from typing import Iterator
|
||||
from urllib import request as urllib_request
|
||||
|
||||
|
||||
def parse_args() -> argparse.Namespace:
|
||||
p = argparse.ArgumentParser(description=__doc__)
|
||||
p.add_argument("--pg-host", default="100.80.114.48")
|
||||
p.add_argument("--pg-port", type=int, default=5432)
|
||||
p.add_argument("--pg-user", default="breakpilot")
|
||||
p.add_argument("--pg-name", default="breakpilot_db")
|
||||
p.add_argument("--pg-password", required=True)
|
||||
p.add_argument("--qdrant", default="http://100.80.114.48:6333")
|
||||
p.add_argument("--qdrant-api-key", default="",
|
||||
help="API key for managed Qdrant (Production)")
|
||||
p.add_argument("--collection", default="atomic_controls_dedup")
|
||||
p.add_argument("--uuid-field", default="control_uuid",
|
||||
help="Payload field used for lookup (control_uuid or regulation_id)")
|
||||
p.add_argument("--lookup", choices=["canonical_controls", "regulation_registry"],
|
||||
default="canonical_controls",
|
||||
help="Postgres table to resolve the lookup against")
|
||||
p.add_argument("--batch-size", type=int, default=500)
|
||||
g = p.add_mutually_exclusive_group(required=True)
|
||||
g.add_argument("--dry-run", action="store_true")
|
||||
g.add_argument("--apply", action="store_true")
|
||||
return p.parse_args()
|
||||
|
||||
|
||||
def fetch_rule_by_uuid(args) -> dict[str, int]:
|
||||
"""Pull lookup-key → license_rule mapping from Postgres.
|
||||
|
||||
Source table is chosen by ``--lookup``:
|
||||
- canonical_controls: id (UUID) → license_rule, for atomic_controls_dedup
|
||||
- regulation_registry: regulation_id → license_rule, for document chunks
|
||||
"""
|
||||
import psycopg2
|
||||
conn = psycopg2.connect(
|
||||
host=args.pg_host, port=args.pg_port, user=args.pg_user,
|
||||
dbname=args.pg_name, password=args.pg_password,
|
||||
)
|
||||
cur = conn.cursor()
|
||||
cur.execute("SET search_path TO compliance, public;")
|
||||
if args.lookup == "regulation_registry":
|
||||
cur.execute(
|
||||
"SELECT regulation_id, license_rule FROM regulation_registry "
|
||||
"WHERE license_rule IS NOT NULL"
|
||||
)
|
||||
else:
|
||||
cur.execute(
|
||||
"SELECT id::text, license_rule FROM canonical_controls "
|
||||
"WHERE license_rule IS NOT NULL"
|
||||
)
|
||||
mapping = {row[0]: int(row[1]) for row in cur.fetchall()}
|
||||
conn.close()
|
||||
return mapping
|
||||
|
||||
|
||||
def _headers(api_key: str = "") -> dict:
|
||||
h = {"Content-Type": "application/json"}
|
||||
if api_key:
|
||||
h["api-key"] = api_key
|
||||
return h
|
||||
|
||||
|
||||
def scroll_collection(qdrant: str, collection: str, uuid_field: str, api_key: str = "") -> Iterator[dict]:
|
||||
"""Yield (point_id, uuid_value, has_rule_already) tuples."""
|
||||
next_offset = None
|
||||
while True:
|
||||
body = {"limit": 1000, "with_payload": True, "with_vector": False}
|
||||
if next_offset is not None:
|
||||
body["offset"] = next_offset
|
||||
req = urllib_request.Request(
|
||||
f"{qdrant}/collections/{collection}/points/scroll",
|
||||
data=json.dumps(body).encode(),
|
||||
headers=_headers(api_key),
|
||||
)
|
||||
with urllib_request.urlopen(req, timeout=60) as r:
|
||||
payload = json.loads(r.read())
|
||||
result = payload.get("result", {})
|
||||
for pt in result.get("points", []):
|
||||
pl = pt.get("payload", {}) or {}
|
||||
yield {
|
||||
"id": pt["id"],
|
||||
"uuid": pl.get(uuid_field),
|
||||
"has_rule": "license_rule" in pl,
|
||||
}
|
||||
next_offset = result.get("next_page_offset")
|
||||
if next_offset is None:
|
||||
break
|
||||
|
||||
|
||||
def set_payload_batch(qdrant: str, collection: str, point_ids: list, rule: int, api_key: str = "") -> int:
|
||||
"""POST set_payload for a batch of point IDs with a single license_rule."""
|
||||
body = {
|
||||
"payload": {"license_rule": rule},
|
||||
"points": point_ids,
|
||||
}
|
||||
req = urllib_request.Request(
|
||||
f"{qdrant}/collections/{collection}/points/payload?wait=true",
|
||||
data=json.dumps(body).encode(),
|
||||
headers=_headers(api_key),
|
||||
method="POST",
|
||||
)
|
||||
with urllib_request.urlopen(req, timeout=120) as r:
|
||||
resp = json.loads(r.read())
|
||||
if resp.get("status") != "ok":
|
||||
raise RuntimeError(f"set_payload failed: {resp}")
|
||||
return len(point_ids)
|
||||
|
||||
|
||||
def main() -> int:
|
||||
args = parse_args()
|
||||
print("Loading canonical_controls → license_rule mapping…")
|
||||
rule_by_uuid = fetch_rule_by_uuid(args)
|
||||
print(f" Postgres returned {len(rule_by_uuid)} classified controls")
|
||||
|
||||
print(f"Scrolling Qdrant collection {args.collection!r}…")
|
||||
by_rule: dict[int, list] = {1: [], 2: [], 3: []}
|
||||
points_total = 0
|
||||
points_with_uuid = 0
|
||||
points_already_set = 0
|
||||
points_no_match = 0
|
||||
|
||||
for pt in scroll_collection(args.qdrant, args.collection, args.uuid_field, args.qdrant_api_key):
|
||||
points_total += 1
|
||||
uuid = pt["uuid"]
|
||||
if not uuid:
|
||||
continue
|
||||
points_with_uuid += 1
|
||||
if pt["has_rule"]:
|
||||
points_already_set += 1
|
||||
continue
|
||||
rule = rule_by_uuid.get(uuid)
|
||||
if rule is None:
|
||||
points_no_match += 1
|
||||
continue
|
||||
if rule not in by_rule:
|
||||
continue
|
||||
by_rule[rule].append(pt["id"])
|
||||
|
||||
print(f" total points scanned: {points_total}")
|
||||
print(f" with {args.uuid_field}: {points_with_uuid}")
|
||||
print(f" already had license_rule: {points_already_set}")
|
||||
print(f" uuid not found in Postgres: {points_no_match}")
|
||||
print(f" to set per rule: rule1={len(by_rule[1])} rule2={len(by_rule[2])} rule3={len(by_rule[3])}")
|
||||
|
||||
if args.dry_run:
|
||||
print("\nDRY-RUN: no writes performed. Use --apply to execute.")
|
||||
return 0
|
||||
|
||||
total_written = 0
|
||||
for rule, ids in by_rule.items():
|
||||
if not ids:
|
||||
continue
|
||||
print(f"\nWriting license_rule={rule} to {len(ids)} points (batch {args.batch_size})…")
|
||||
for i in range(0, len(ids), args.batch_size):
|
||||
chunk = ids[i:i + args.batch_size]
|
||||
n = set_payload_batch(args.qdrant, args.collection, chunk, rule, args.qdrant_api_key)
|
||||
total_written += n
|
||||
print(f" batch {i // args.batch_size + 1}: {n} points (cumulative {total_written})")
|
||||
time.sleep(0.05)
|
||||
print(f"\nWrote license_rule on {total_written} Qdrant points in {args.collection}")
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
raise SystemExit(main())
|
||||
@@ -0,0 +1,310 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Derive doc_check_controls from existing Master Controls.
|
||||
|
||||
Filters MCs by document-relevant regulations, then uses Claude Haiku
|
||||
to generate check_question + pass_criteria + fail_criteria per control.
|
||||
|
||||
Usage:
|
||||
python3 /app/scripts/derive_doc_check_controls.py --dry-run
|
||||
python3 /app/scripts/derive_doc_check_controls.py
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import time
|
||||
from pathlib import Path
|
||||
|
||||
import httpx
|
||||
from sqlalchemy import create_engine, text
|
||||
|
||||
logging.basicConfig(
|
||||
level=logging.INFO, format="%(asctime)s [%(levelname)s] %(message)s"
|
||||
)
|
||||
logger = logging.getLogger("doc-check-derive")
|
||||
|
||||
DB_URL = os.getenv(
|
||||
"DATABASE_URL",
|
||||
"postgresql://breakpilot:breakpilot123@postgres:5432/breakpilot_db",
|
||||
)
|
||||
ANTHROPIC_API_KEY = os.getenv("ANTHROPIC_API_KEY", "")
|
||||
ANTHROPIC_URL = "https://api.anthropic.com/v1/messages"
|
||||
|
||||
# Document types and their regulation sources
|
||||
DOC_TYPES = {
|
||||
"dse": {
|
||||
"name": "Datenschutzinformation",
|
||||
"sources": ["DSGVO (EU) 2016/679"],
|
||||
"articles": ["%13%", "%14%"],
|
||||
"extra_tokens": ["personal_data%", "data_subject_rights%", "consent%",
|
||||
"data_processing_register%", "data_transfer%"],
|
||||
},
|
||||
"cookie": {
|
||||
"name": "Cookie-Richtlinie",
|
||||
"sources": ["TDDDG", "ePrivacy-Richtlinie"],
|
||||
"articles": ["%25%", "%5%"],
|
||||
"extra_tokens": ["cookie_consent%", "consent%"],
|
||||
},
|
||||
"impressum": {
|
||||
"name": "Impressum",
|
||||
"sources": ["TMG"],
|
||||
"articles": ["%5%"],
|
||||
"extra_tokens": ["ecommerce%"],
|
||||
},
|
||||
"widerruf": {
|
||||
"name": "Widerrufsbelehrung",
|
||||
"sources": ["BGB"],
|
||||
"articles": ["%355%", "%312%"],
|
||||
"extra_tokens": ["consumer_protection%"],
|
||||
},
|
||||
"agb": {
|
||||
"name": "AGB",
|
||||
"sources": ["BGB"],
|
||||
"articles": ["%305%", "%307%", "%308%", "%309%"],
|
||||
"extra_tokens": ["consumer_protection%"],
|
||||
},
|
||||
"dsfa": {
|
||||
"name": "Datenschutz-Folgenabschaetzung",
|
||||
"sources": ["DSGVO (EU) 2016/679"],
|
||||
"articles": ["%35%"],
|
||||
"extra_tokens": ["dpia%"],
|
||||
},
|
||||
"avv": {
|
||||
"name": "Auftragsverarbeitung",
|
||||
"sources": ["DSGVO (EU) 2016/679"],
|
||||
"articles": ["%28%"],
|
||||
"extra_tokens": ["data_processing_agreement%"],
|
||||
},
|
||||
"loeschkonzept": {
|
||||
"name": "Loeschkonzept",
|
||||
"sources": ["DSGVO (EU) 2016/679"],
|
||||
"articles": ["%5%", "%17%"],
|
||||
"extra_tokens": ["data_retention%"],
|
||||
},
|
||||
}
|
||||
|
||||
SYSTEM_PROMPT = """Du erzeugst binäre Prüfkriterien für Compliance-Dokumente.
|
||||
|
||||
Für jeden Control erzeugst du:
|
||||
1. check_question: Eine JA/NEIN Frage die ein LLM anhand eines Dokuments beantworten kann
|
||||
2. pass_criteria: Konkrete Textinhalte die vorhanden sein MÜSSEN (3-5 Stück)
|
||||
3. fail_criteria: Typische Fehler/Mängel (2-3 Stück)
|
||||
4. severity: HIGH, MEDIUM oder LOW
|
||||
|
||||
REGELN:
|
||||
- check_question muss BINÄR beantwortbar sein (nicht "wie gut")
|
||||
- pass_criteria müssen KONKRET sein ("Name + Rechtsform + Anschrift", nicht "Angaben")
|
||||
- fail_criteria müssen TYPISCHE Fehler beschreiben
|
||||
- Alles auf Deutsch
|
||||
|
||||
Antworte als JSON-Array:
|
||||
[{"id":"...","check_question":"...","pass_criteria":["..."],"fail_criteria":["..."],"severity":"HIGH"}]"""
|
||||
|
||||
|
||||
def get_doc_controls(engine, doc_type: str, config: dict) -> list[dict]:
|
||||
"""Get controls relevant for a document type."""
|
||||
controls = []
|
||||
|
||||
# Strategy 1: By source + article
|
||||
for source in config["sources"]:
|
||||
for article in config["articles"]:
|
||||
with engine.connect() as c:
|
||||
rows = c.execute(text("""
|
||||
SELECT cc.id, cc.control_id, cc.title,
|
||||
COALESCE(cc.objective, '') as objective,
|
||||
pc.source_citation->>'article' as article
|
||||
FROM compliance.canonical_controls cc
|
||||
JOIN compliance.canonical_controls pc ON pc.id = cc.parent_control_uuid
|
||||
WHERE pc.source_citation->>'source' = :source
|
||||
AND pc.source_citation->>'article' LIKE :article
|
||||
AND cc.release_state NOT IN ('deprecated', 'rejected')
|
||||
LIMIT 200
|
||||
"""), {"source": source, "article": article}).fetchall()
|
||||
for r in rows:
|
||||
controls.append({
|
||||
"uuid": str(r[0]), "control_id": r[1],
|
||||
"title": r[2] or "", "objective": r[3] or "",
|
||||
"article": r[4] or "", "doc_type": doc_type,
|
||||
})
|
||||
|
||||
# Strategy 2: By MC canonical_name
|
||||
for token_pattern in config.get("extra_tokens", []):
|
||||
with engine.connect() as c:
|
||||
rows = c.execute(text("""
|
||||
SELECT cc.id, cc.control_id, cc.title,
|
||||
COALESCE(cc.objective, '') as objective
|
||||
FROM compliance.master_controls mc
|
||||
JOIN compliance.master_control_members mcm ON mcm.master_control_uuid = mc.id
|
||||
JOIN compliance.canonical_controls cc ON cc.id = mcm.control_uuid
|
||||
WHERE mc.canonical_name LIKE :pattern
|
||||
AND cc.release_state NOT IN ('deprecated', 'rejected')
|
||||
LIMIT 100
|
||||
"""), {"pattern": token_pattern}).fetchall()
|
||||
for r in rows:
|
||||
controls.append({
|
||||
"uuid": str(r[0]), "control_id": r[1],
|
||||
"title": r[2] or "", "objective": r[3] or "",
|
||||
"article": "", "doc_type": doc_type,
|
||||
})
|
||||
|
||||
# Deduplicate
|
||||
seen = set()
|
||||
unique = []
|
||||
for c in controls:
|
||||
if c["control_id"] not in seen:
|
||||
seen.add(c["control_id"])
|
||||
unique.append(c)
|
||||
|
||||
return unique
|
||||
|
||||
|
||||
def enrich_with_llm(controls: list[dict], doc_type_name: str) -> list[dict]:
|
||||
"""Add check_question, pass/fail_criteria via Haiku."""
|
||||
enriched = []
|
||||
batch_size = 5
|
||||
|
||||
for i in range(0, len(controls), batch_size):
|
||||
batch = controls[i:i + batch_size]
|
||||
items = [
|
||||
f'- id="{c["control_id"]}" doc="{doc_type_name}" '
|
||||
f't="{c["title"]}" o="{c["objective"][:100]}"'
|
||||
for c in batch
|
||||
]
|
||||
|
||||
prompt = (
|
||||
f"Dokumenttyp: {doc_type_name}\n"
|
||||
f"Erzeuge Prüfkriterien:\n" + "\n".join(items)
|
||||
)
|
||||
|
||||
try:
|
||||
resp = httpx.post(ANTHROPIC_URL, headers={
|
||||
"x-api-key": ANTHROPIC_API_KEY,
|
||||
"anthropic-version": "2023-06-01",
|
||||
"content-type": "application/json",
|
||||
}, json={
|
||||
"model": "claude-haiku-4-5-20251001",
|
||||
"max_tokens": 2000, "temperature": 0.1,
|
||||
"system": SYSTEM_PROMPT,
|
||||
"messages": [{"role": "user", "content": prompt}],
|
||||
}, timeout=45.0)
|
||||
resp.raise_for_status()
|
||||
content = resp.json().get("content", [{}])[0].get("text", "")
|
||||
start = content.find("[")
|
||||
end = content.rfind("]") + 1
|
||||
if start >= 0 and end > start:
|
||||
results = json.loads(content[start:end])
|
||||
result_map = {r.get("id", ""): r for r in results}
|
||||
for ctrl in batch:
|
||||
r = result_map.get(ctrl["control_id"], {})
|
||||
if r.get("check_question"):
|
||||
ctrl["check_question"] = r["check_question"]
|
||||
ctrl["pass_criteria"] = r.get("pass_criteria", [])
|
||||
ctrl["fail_criteria"] = r.get("fail_criteria", [])
|
||||
ctrl["severity"] = r.get("severity", "MEDIUM")
|
||||
enriched.append(ctrl)
|
||||
except Exception as e:
|
||||
logger.error("Batch %d failed: %s", i, e)
|
||||
|
||||
time.sleep(0.5)
|
||||
|
||||
return enriched
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument("--dry-run", action="store_true")
|
||||
parser.add_argument("--doc-type", choices=list(DOC_TYPES.keys()),
|
||||
help="Only one doc type")
|
||||
args = parser.parse_args()
|
||||
|
||||
engine = create_engine(
|
||||
DB_URL, connect_args={"options": "-c search_path=compliance,public"}
|
||||
)
|
||||
|
||||
# Create table
|
||||
with engine.begin() as c:
|
||||
c.execute(text("SET search_path TO compliance, public"))
|
||||
c.execute(text("""
|
||||
CREATE TABLE IF NOT EXISTS doc_check_controls (
|
||||
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
|
||||
control_id VARCHAR(500) NOT NULL,
|
||||
control_uuid UUID,
|
||||
doc_type VARCHAR(50) NOT NULL,
|
||||
title VARCHAR(500),
|
||||
regulation VARCHAR(200),
|
||||
article VARCHAR(100),
|
||||
check_question TEXT NOT NULL,
|
||||
pass_criteria JSONB DEFAULT '[]',
|
||||
fail_criteria JSONB DEFAULT '[]',
|
||||
severity VARCHAR(20) DEFAULT 'MEDIUM',
|
||||
created_at TIMESTAMPTZ DEFAULT NOW()
|
||||
)
|
||||
"""))
|
||||
c.execute(text("""
|
||||
CREATE INDEX IF NOT EXISTS idx_doc_check_doc_type
|
||||
ON doc_check_controls(doc_type)
|
||||
"""))
|
||||
|
||||
doc_types = [args.doc_type] if args.doc_type else list(DOC_TYPES.keys())
|
||||
all_checks = []
|
||||
|
||||
for dt in doc_types:
|
||||
config = DOC_TYPES[dt]
|
||||
logger.info("\n=== %s (%s) ===", dt, config["name"])
|
||||
|
||||
controls = get_doc_controls(engine, dt, config)
|
||||
logger.info("Found %d relevant controls", len(controls))
|
||||
|
||||
if not controls:
|
||||
continue
|
||||
|
||||
enriched = enrich_with_llm(controls, config["name"])
|
||||
logger.info("Enriched %d with check criteria", len(enriched))
|
||||
all_checks.extend(enriched)
|
||||
|
||||
logger.info("\nTotal: %d doc_check_controls across %d doc types",
|
||||
len(all_checks), len(doc_types))
|
||||
|
||||
if args.dry_run:
|
||||
for dc in all_checks[:5]:
|
||||
logger.info(" [%s] %s: %s", dc["doc_type"], dc["control_id"],
|
||||
dc.get("check_question", "?")[:80])
|
||||
logger.info("DRY RUN — not writing")
|
||||
return
|
||||
|
||||
# Write to DB
|
||||
with engine.begin() as c:
|
||||
c.execute(text("SET search_path TO compliance, public"))
|
||||
c.execute(text("DELETE FROM doc_check_controls"))
|
||||
for dc in all_checks:
|
||||
c.execute(text("""
|
||||
INSERT INTO doc_check_controls
|
||||
(control_id, control_uuid, doc_type, title,
|
||||
check_question, pass_criteria, fail_criteria, severity)
|
||||
VALUES (:cid, CAST(:uuid AS uuid), :doc_type, :title,
|
||||
:question, CAST(:pass AS jsonb),
|
||||
CAST(:fail AS jsonb), :severity)
|
||||
"""), {
|
||||
"cid": dc["control_id"],
|
||||
"uuid": dc["uuid"],
|
||||
"doc_type": dc["doc_type"],
|
||||
"title": dc["title"],
|
||||
"question": dc.get("check_question", ""),
|
||||
"pass": json.dumps(dc.get("pass_criteria", [])),
|
||||
"fail": json.dumps(dc.get("fail_criteria", [])),
|
||||
"severity": dc.get("severity", "MEDIUM"),
|
||||
})
|
||||
|
||||
logger.info("Wrote %d doc_check_controls to DB", len(all_checks))
|
||||
|
||||
# Save as JSON too
|
||||
Path("/tmp/doc_check_controls.json").write_text(
|
||||
json.dumps(all_checks, indent=2, ensure_ascii=False)
|
||||
)
|
||||
logger.info("Saved to /tmp/doc_check_controls.json")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
@@ -0,0 +1,400 @@
|
||||
#!/usr/bin/env python3
|
||||
"""Clean-Room MC derivation from BSI QUAIDAL.
|
||||
|
||||
For each QUAIDAL entry in the parsed index, ask a local LLM to produce our own
|
||||
wording for a Master Control / atomic control / mitigation / metric. Reject any
|
||||
output whose 4-gram overlap with the BSI source text exceeds PLAGIARISM_LIMIT.
|
||||
|
||||
We never store the BSI prose; only our own derived wording plus structural
|
||||
references (BSI section ID + URL + commit SHA).
|
||||
|
||||
Usage:
|
||||
# Single entry, prints to stdout for review:
|
||||
python3 control-pipeline/scripts/derive_quaidal_mcs.py --only QKB-01 --dry-run
|
||||
|
||||
# Full run, writes YAML:
|
||||
python3 control-pipeline/scripts/derive_quaidal_mcs.py --ollama-host macmini
|
||||
|
||||
Output: control-pipeline/data/quaidal/{master_controls,atomic_controls,mitigations,metrics}.yaml
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import json
|
||||
import re
|
||||
import sys
|
||||
import time
|
||||
from dataclasses import dataclass
|
||||
from pathlib import Path
|
||||
|
||||
try:
|
||||
import httpx
|
||||
import yaml
|
||||
except ImportError as e:
|
||||
print(f"ERROR: missing dependency {e.name}. Install with: pip install httpx pyyaml", file=sys.stderr)
|
||||
sys.exit(2)
|
||||
|
||||
REPO_ROOT = Path(__file__).resolve().parents[2]
|
||||
SOURCE_ROOT = REPO_ROOT / "legal-sources" / "bsi-quaidal"
|
||||
INDEX_FILE = REPO_ROOT / "control-pipeline" / "data" / "quaidal" / "quaidal_index.json"
|
||||
OUTPUT_DIR = REPO_ROOT / "control-pipeline" / "data" / "quaidal"
|
||||
|
||||
PLAGIARISM_LIMIT = 0.20 # max share of 4-grams that may appear in BSI source
|
||||
N_GRAM = 4
|
||||
MAX_RETRIES = 3
|
||||
|
||||
DEFAULT_OLLAMA_URL = "http://macmini:11434"
|
||||
OLLAMA_MODEL = "qwen3.5:35b-a3b"
|
||||
QUAIDAL_REPO_URL = "https://github.com/BSI-Bund/QUAIDAL"
|
||||
|
||||
KIND_TO_PROMPT_ROLE = {
|
||||
"criterion": "Master Control",
|
||||
"building_block": "atomarer technischer Control",
|
||||
"measure": "Schutzmaßnahme",
|
||||
"metric": "messbarer Qualitäts-Indikator",
|
||||
}
|
||||
|
||||
KIND_TO_OUTPUT_FILE = {
|
||||
"criterion": "master_controls.yaml",
|
||||
"building_block": "atomic_controls.yaml",
|
||||
"measure": "mitigations.yaml",
|
||||
"metric": "metrics.yaml",
|
||||
}
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Source-side extraction (kept in memory, never written to disk)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
FRONTMATTER_RE = re.compile(r"^---\s*\n.*?\n---\s*\n", re.DOTALL)
|
||||
SECTION_RE = re.compile(r"^###?\s+(.+?)\s*$", re.MULTILINE)
|
||||
|
||||
|
||||
def load_source_extract(rel_path: str) -> dict:
|
||||
"""Load BSI source text for ONE entry. Used only for prompt + plagiarism check."""
|
||||
path = SOURCE_ROOT / rel_path
|
||||
text = path.read_text(encoding="utf-8")
|
||||
|
||||
# Strip frontmatter; capture shortdesc separately for the prompt.
|
||||
fm_match = re.match(r"^---\s*\n(.*?)\n---\s*\n", text, re.DOTALL)
|
||||
shortdesc = ""
|
||||
if fm_match:
|
||||
for line in fm_match.group(1).splitlines():
|
||||
if line.lower().startswith("shortdesc:"):
|
||||
shortdesc = line.split(":", 1)[1].strip()
|
||||
break
|
||||
body = FRONTMATTER_RE.sub("", text, count=1)
|
||||
|
||||
# Pull the first 1-2 paragraphs under "Beschreibung" (or whole body if none)
|
||||
desc_match = re.search(r"###?\s+Beschreibung\s*\n+(.+?)(?:\n###?\s|\Z)", body, re.DOTALL)
|
||||
description_excerpt = desc_match.group(1).strip() if desc_match else body[:1500].strip()
|
||||
paragraphs = [p.strip() for p in description_excerpt.split("\n\n") if p.strip()]
|
||||
description_excerpt = "\n\n".join(paragraphs[:2])
|
||||
|
||||
return {
|
||||
"shortdesc": shortdesc,
|
||||
"description_excerpt": description_excerpt,
|
||||
"full_body": body,
|
||||
}
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Plagiarism gate
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
WORD_RE = re.compile(r"\b[\wäöüÄÖÜß]+\b", re.UNICODE)
|
||||
|
||||
|
||||
def _tokenize(text: str) -> list[str]:
|
||||
return [w.lower() for w in WORD_RE.findall(text)]
|
||||
|
||||
|
||||
def ngram_overlap(produced: str, source: str, n: int = N_GRAM) -> float:
|
||||
"""Share of produced n-grams that also appear in source."""
|
||||
p_tokens = _tokenize(produced)
|
||||
s_tokens = _tokenize(source)
|
||||
if len(p_tokens) < n:
|
||||
return 0.0
|
||||
s_grams = {tuple(s_tokens[i : i + n]) for i in range(len(s_tokens) - n + 1)}
|
||||
if not s_grams:
|
||||
return 0.0
|
||||
p_grams = [tuple(p_tokens[i : i + n]) for i in range(len(p_tokens) - n + 1)]
|
||||
hits = sum(1 for g in p_grams if g in s_grams)
|
||||
return hits / len(p_grams)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# LLM prompt + call
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
PROMPT_TEMPLATE = """Du bist Compliance-Engineer bei BreakPilot. Schreibe eine eigenständige Anforderung im Stil einer technischen Kontroll-Spezifikation.
|
||||
|
||||
Quelle: BSI QUAIDAL Sektion {entry_id} ("{title_de}"). Die Quelle steht unter unklarer Lizenz (BSI-Veröffentlichung, § 5 UrhG anwendbar) — wir dürfen die Idee aufgreifen, aber NICHT abschreiben.
|
||||
|
||||
Aufgabe: Formuliere eine eigenständige Anforderung im Stil eines {role}. Anforderungen:
|
||||
- Eigene Formulierung in deutscher Sprache. Kein Satz darf aus der Quelle übernommen werden, auch nicht teilweise. Synonyme verwenden, Satzbau ändern, Inhalt strukturell anders aufbauen.
|
||||
- 2-4 Sätze (max 80 Wörter).
|
||||
- Sprachstil: nüchtern, technisch, normativ ("muss", "ist sicherzustellen", "ist zu prüfen").
|
||||
- Bezug auf KI-Trainingsdaten oder KI-Datenqualität, je nach Quelle.
|
||||
- Nicht die wörtlichen BSI-Beispiele kopieren.
|
||||
|
||||
Quellauszug (NUR zur Orientierung, NICHT abschreiben):
|
||||
---
|
||||
shortdesc: {shortdesc}
|
||||
|
||||
{description_excerpt}
|
||||
---
|
||||
|
||||
Antwort: Liefere AUSSCHLIESSLICH die fertige Beschreibung als reinen Text — kein JSON, keine Überschriften, keine Anführungszeichen, keine Quellenangabe."""
|
||||
|
||||
|
||||
def call_ollama(prompt: str, ollama_url: str, model: str, retries: int = 2) -> str:
|
||||
last_err = None
|
||||
for attempt in range(retries + 1):
|
||||
try:
|
||||
resp = httpx.post(
|
||||
f"{ollama_url}/api/chat",
|
||||
json={
|
||||
"model": model,
|
||||
"messages": [{"role": "user", "content": prompt}],
|
||||
"stream": False,
|
||||
"options": {"temperature": 0.4},
|
||||
"think": False,
|
||||
},
|
||||
timeout=180.0,
|
||||
)
|
||||
resp.raise_for_status()
|
||||
return resp.json()["message"]["content"].strip()
|
||||
except (httpx.HTTPError, KeyError, ValueError) as e:
|
||||
last_err = e
|
||||
if attempt < retries:
|
||||
time.sleep(2 ** attempt)
|
||||
raise RuntimeError(f"Ollama call failed after {retries+1} attempts: {last_err}")
|
||||
|
||||
|
||||
def strip_llm_artifacts(text: str) -> str:
|
||||
"""Clean leading/trailing markdown and quotes from LLM output."""
|
||||
text = text.strip()
|
||||
# Strip surrounding code fences
|
||||
if text.startswith("```"):
|
||||
text = re.sub(r"^```[a-zA-Z]*\n?", "", text)
|
||||
text = re.sub(r"\n?```\s*$", "", text)
|
||||
# Strip surrounding quotes
|
||||
text = text.strip('"„"”„')
|
||||
# Drop a leading "Beschreibung:" or similar label
|
||||
text = re.sub(r"^(Beschreibung|Description|Anforderung|Control):\s*", "", text, flags=re.IGNORECASE)
|
||||
return text.strip()
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Derivation
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
@dataclass
|
||||
class DerivedControl:
|
||||
derived_id: str
|
||||
source_id: str
|
||||
kind: str
|
||||
canonical_name: str
|
||||
description: str
|
||||
plagiarism_score: float
|
||||
related_quaidal_ids: list[str]
|
||||
external_refs: list[dict]
|
||||
source: dict
|
||||
|
||||
|
||||
_ASCII_FOLD = str.maketrans({"ä": "ae", "ö": "oe", "ü": "ue", "Ä": "ae", "Ö": "oe", "Ü": "ue", "ß": "ss"})
|
||||
|
||||
|
||||
def slug(text: str) -> str:
|
||||
text = text.translate(_ASCII_FOLD).lower()
|
||||
text = re.sub(r"[^a-z0-9]+", "-", text)
|
||||
return text.strip("-")
|
||||
|
||||
|
||||
def derived_id_for(entry: dict) -> str:
|
||||
prefix = {
|
||||
"criterion": "MC-AI-DATA",
|
||||
"building_block": "AC-AI-DATA",
|
||||
"measure": "MIT-AI-DATA",
|
||||
"metric": "MET-AI-DATA",
|
||||
}.get(entry["kind"], "X-AI-DATA")
|
||||
title = entry["title_de"]
|
||||
title = re.sub(r"^\s*(QKB|QB|MA|QM)-\d+[a-zA-Z]?\s*", "", title)
|
||||
return f"{prefix}-{entry['id']}-{slug(title)[:40]}".rstrip("-")
|
||||
|
||||
|
||||
def derive_one(entry: dict, source_extract: dict, ollama_url: str, model: str, *, verbose: bool = False) -> DerivedControl:
|
||||
role = KIND_TO_PROMPT_ROLE.get(entry["kind"], "Control")
|
||||
prompt = PROMPT_TEMPLATE.format(
|
||||
entry_id=entry["id"],
|
||||
title_de=entry["title_de"],
|
||||
role=role,
|
||||
shortdesc=source_extract["shortdesc"] or "(keiner)",
|
||||
description_excerpt=source_extract["description_excerpt"] or "(keine Beschreibung)",
|
||||
)
|
||||
|
||||
source_corpus = "\n\n".join(filter(None, [source_extract["shortdesc"], source_extract["description_excerpt"]]))
|
||||
|
||||
best: tuple[str, float] | None = None
|
||||
for attempt in range(1, MAX_RETRIES + 1):
|
||||
output = call_ollama(prompt, ollama_url, model)
|
||||
output = strip_llm_artifacts(output)
|
||||
score = ngram_overlap(output, source_corpus)
|
||||
if verbose:
|
||||
print(f" attempt {attempt}: overlap={score:.2%} len={len(output)}", file=sys.stderr)
|
||||
if score < PLAGIARISM_LIMIT:
|
||||
best = (output, score)
|
||||
break
|
||||
if best is None or score < best[1]:
|
||||
best = (output, score)
|
||||
# Strengthen the next prompt by appending a reject notice
|
||||
prompt += f"\n\n(Vorheriger Versuch hatte {score:.0%} Wortdeckung mit der Quelle. Verwende völlig andere Begriffe und Satzstruktur.)"
|
||||
|
||||
if best is None:
|
||||
raise RuntimeError(f"Could not derive {entry['id']}: no output")
|
||||
output, score = best
|
||||
if score >= PLAGIARISM_LIMIT:
|
||||
raise RuntimeError(
|
||||
f"Plagiarism gate failed for {entry['id']}: best overlap {score:.2%} >= limit {PLAGIARISM_LIMIT:.0%}.\n"
|
||||
f"Output:\n{output}"
|
||||
)
|
||||
|
||||
title_de_clean = re.sub(r"^\s*(QKB|QB|MA|QM)-\d+[a-zA-Z]?\s*", "", entry["title_de"]).strip()
|
||||
return DerivedControl(
|
||||
derived_id=derived_id_for(entry),
|
||||
source_id=entry["id"],
|
||||
kind=entry["kind"],
|
||||
canonical_name=title_de_clean or entry["title_de"],
|
||||
description=output,
|
||||
plagiarism_score=round(score, 4),
|
||||
related_quaidal_ids=entry["referenced_ids"],
|
||||
external_refs=entry["external_refs"],
|
||||
source={
|
||||
"framework": "BSI QUAIDAL",
|
||||
"section": entry["id"],
|
||||
"title_original_de": entry["title_de"],
|
||||
"url": f"{QUAIDAL_REPO_URL}/blob/main/{entry['source_path'].replace(' ', '%20')}",
|
||||
"commit_sha": None, # filled in by main()
|
||||
"license_note": "§ 5 UrhG anwendbar; share:true im Frontmatter; Clean-Room-Ableitung.",
|
||||
},
|
||||
)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Output writers
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def control_to_dict(c: DerivedControl) -> dict:
|
||||
d = {
|
||||
"id": c.derived_id,
|
||||
"canonical_name": c.canonical_name,
|
||||
"description": c.description,
|
||||
"kind": c.kind,
|
||||
"regulation_anchor": "EU AI Act Art. 10 (Datenqualität für Hochrisiko-KI)",
|
||||
"related_quaidal_ids": c.related_quaidal_ids,
|
||||
"external_refs": c.external_refs,
|
||||
"source": c.source,
|
||||
"plagiarism_score_at_generation": c.plagiarism_score,
|
||||
}
|
||||
return d
|
||||
|
||||
|
||||
def write_yaml_per_kind(controls: list[DerivedControl], commit_sha: str | None) -> dict[str, Path]:
|
||||
out: dict[str, list[dict]] = {}
|
||||
for c in controls:
|
||||
c.source["commit_sha"] = commit_sha
|
||||
fname = KIND_TO_OUTPUT_FILE.get(c.kind, "other.yaml")
|
||||
out.setdefault(fname, []).append(control_to_dict(c))
|
||||
|
||||
OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
|
||||
written: dict[str, Path] = {}
|
||||
for fname, items in out.items():
|
||||
path = OUTPUT_DIR / fname
|
||||
payload = {
|
||||
"source": "Derived from BSI QUAIDAL (Clean-Room)",
|
||||
"source_url": QUAIDAL_REPO_URL,
|
||||
"commit_sha": commit_sha,
|
||||
"plagiarism_limit_4gram": PLAGIARISM_LIMIT,
|
||||
"generated_by_model": OLLAMA_MODEL,
|
||||
"controls": items,
|
||||
}
|
||||
path.write_text(yaml.safe_dump(payload, allow_unicode=True, sort_keys=False), encoding="utf-8")
|
||||
written[fname] = path
|
||||
return written
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# CLI
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def main() -> int:
|
||||
ap = argparse.ArgumentParser(description=__doc__)
|
||||
ap.add_argument("--only", help="Derive only this QUAIDAL ID (e.g. QKB-01)")
|
||||
ap.add_argument("--kind", help="Derive only entries of this kind (criterion/building_block/measure/metric)")
|
||||
ap.add_argument("--limit", type=int, help="Process at most N entries")
|
||||
ap.add_argument("--dry-run", action="store_true", help="Print derived controls instead of writing YAML")
|
||||
ap.add_argument("--ollama-host", default="macmini", help="Ollama host (default: macmini)")
|
||||
ap.add_argument("--model", default=OLLAMA_MODEL)
|
||||
ap.add_argument("--verbose", action="store_true")
|
||||
args = ap.parse_args()
|
||||
|
||||
if not INDEX_FILE.exists():
|
||||
print(f"ERROR: missing index. Run ingest_bsi_quaidal.py first ({INDEX_FILE})", file=sys.stderr)
|
||||
return 2
|
||||
index = json.loads(INDEX_FILE.read_text(encoding="utf-8"))
|
||||
entries = index["entries"]
|
||||
if args.only:
|
||||
entries = [e for e in entries if e["id"].upper() == args.only.upper()]
|
||||
if args.kind:
|
||||
entries = [e for e in entries if e["kind"] == args.kind]
|
||||
if args.limit:
|
||||
entries = entries[: args.limit]
|
||||
|
||||
if not entries:
|
||||
print("No entries match the filter.", file=sys.stderr)
|
||||
return 1
|
||||
|
||||
ollama_url = args.ollama_host if "://" in args.ollama_host else f"http://{args.ollama_host}:11434"
|
||||
print(f"Derivation: {len(entries)} entries, model={args.model}, ollama={ollama_url}, limit={PLAGIARISM_LIMIT:.0%}", file=sys.stderr)
|
||||
|
||||
derived: list[DerivedControl] = []
|
||||
failed: list[tuple[str, str]] = []
|
||||
for i, entry in enumerate(entries, 1):
|
||||
if args.verbose:
|
||||
print(f"[{i}/{len(entries)}] {entry['id']} ({entry['kind']}): {entry['title_de']}", file=sys.stderr)
|
||||
try:
|
||||
extract = load_source_extract(entry["source_path"])
|
||||
ctrl = derive_one(entry, extract, ollama_url, args.model, verbose=args.verbose)
|
||||
derived.append(ctrl)
|
||||
except Exception as exc: # noqa: BLE001
|
||||
failed.append((entry["id"], str(exc)))
|
||||
print(f" FAILED {entry['id']}: {exc}", file=sys.stderr)
|
||||
|
||||
print(f"\nDerived: {len(derived)} | Failed: {len(failed)}", file=sys.stderr)
|
||||
|
||||
if args.dry_run:
|
||||
for c in derived:
|
||||
c.source["commit_sha"] = index.get("commit_sha")
|
||||
print(yaml.safe_dump(control_to_dict(c), allow_unicode=True, sort_keys=False))
|
||||
print("---")
|
||||
return 0 if not failed else 1
|
||||
|
||||
written = write_yaml_per_kind(derived, index.get("commit_sha"))
|
||||
for fname, path in written.items():
|
||||
print(f"Wrote {path.relative_to(REPO_ROOT)} ({sum(1 for c in derived if KIND_TO_OUTPUT_FILE[c.kind] == fname)} entries)", file=sys.stderr)
|
||||
|
||||
if failed:
|
||||
print("\nFailures:", file=sys.stderr)
|
||||
for fid, msg in failed:
|
||||
print(f" - {fid}: {msg.splitlines()[0]}", file=sys.stderr)
|
||||
return 1
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
sys.exit(main())
|
||||
@@ -0,0 +1,214 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Extract CE-relevant obligations from TRBS/TRGS/ASR/OSHA chunks in Qdrant.
|
||||
|
||||
Searches for MUSS/SOLL patterns in chunk texts and classifies them.
|
||||
Output: JSON file with structured obligations for the CE session.
|
||||
|
||||
Usage:
|
||||
python3 /app/scripts/extract_ce_obligations.py
|
||||
python3 /app/scripts/extract_ce_obligations.py --output /tmp/ce_obligations.json
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import re
|
||||
from pathlib import Path
|
||||
|
||||
import httpx
|
||||
|
||||
logging.basicConfig(
|
||||
level=logging.INFO, format="%(asctime)s [%(levelname)s] %(message)s"
|
||||
)
|
||||
logger = logging.getLogger("ce-obligations")
|
||||
|
||||
QDRANT_URL = os.getenv("QDRANT_URL", "http://qdrant:6333")
|
||||
COLLECTION = "bp_compliance_ce"
|
||||
OLLAMA_URL = os.getenv("OLLAMA_URL", "http://host.docker.internal:11434")
|
||||
LLM_MODEL = "qwen3.5:35b-a3b"
|
||||
|
||||
# Obligation patterns (DE + EN)
|
||||
OBLIGATION_PATTERNS = re.compile(
|
||||
r"(muss|müssen|hat\s+[\w\s]*zu\s|ist\s+[\w\s]*sicherzustellen|"
|
||||
r"ist\s+verpflichtet|sind\s+verpflichtet|darf\s+nicht|"
|
||||
r"shall|must|required\s+to|is\s+required|shall\s+not)",
|
||||
re.IGNORECASE,
|
||||
)
|
||||
|
||||
# CE relevance keywords
|
||||
CE_KEYWORDS = re.compile(
|
||||
r"(maschine|schutzeinrichtung|gefährdung|quetsch|scher|stoß|"
|
||||
r"schneid|fang|einzug|absturz|druck|explosion|brand|"
|
||||
r"elektrisch|spannung|erdung|schutzleiter|not-halt|"
|
||||
r"betriebsanleitung|kennzeichnung|prüfung|prüfpflicht|"
|
||||
r"instandhaltung|wartung|sicherheitsabstand|"
|
||||
r"schutzmaßnahme|persönliche schutzausrüstung|psa|"
|
||||
r"machine|guard|hazard|crush|shear|cut|entangle|"
|
||||
r"lockout|tagout|electrical|grounding|emergency stop|"
|
||||
r"safety distance|protective device|ppe|inspection)",
|
||||
re.IGNORECASE,
|
||||
)
|
||||
|
||||
HAZARD_CATEGORIES = {
|
||||
"quetsch|crush|squeeze": "mechanical_crushing",
|
||||
"schneid|cut": "mechanical_cutting",
|
||||
"fang|einzug|entangle|draw": "mechanical_entanglement",
|
||||
"absturz|fall": "fall_hazard",
|
||||
"explosion|ex-bereich|atex": "explosion_hazard",
|
||||
"brand|fire|feuer": "fire_hazard",
|
||||
"elektrisch|electrical|spannung|voltage": "electrical_hazard",
|
||||
"lärm|noise|schall": "noise_hazard",
|
||||
"gefahrstoff|hazardous substance|chemical": "chemical_hazard",
|
||||
"ergonomie|ergonomic|heben|lift": "ergonomic_hazard",
|
||||
"temperatur|heat|hitze|kälte|cold": "thermal_hazard",
|
||||
"strahlung|radiation|laser": "radiation_hazard",
|
||||
"not-halt|emergency stop|e-stop": "emergency_stop",
|
||||
"lockout|tagout|loto": "lockout_tagout",
|
||||
"kennzeichnung|label|marking|sign": "safety_marking",
|
||||
"prüfung|inspection|test": "inspection_requirement",
|
||||
"instandhaltung|maintenance|wartung": "maintenance",
|
||||
"schutzeinrichtung|guard|protective device": "protective_device",
|
||||
"betriebsanleitung|instruction|manual": "operating_instructions",
|
||||
"druck|pressure|behälter|vessel|kessel|boiler": "pressure_hazard",
|
||||
}
|
||||
|
||||
# Source-based overrides: TRGS docs about chemicals/storage
|
||||
# should never be classified as mechanical hazards
|
||||
_CHEMICAL_SOURCES = re.compile(
|
||||
r"trgs\s*(5[0-9]{2}|7[0-9]{2}|9[0-9]{2}|4[0-9]{2}|6[0-9]{2})",
|
||||
re.IGNORECASE,
|
||||
)
|
||||
|
||||
|
||||
def _classify_hazard(text: str, source: str) -> str:
|
||||
"""Classify hazard with source-aware overrides."""
|
||||
# TRGS sources → chemical/pressure/explosion, never mechanical
|
||||
if _CHEMICAL_SOURCES.search(source):
|
||||
if re.search(r"explosion|ex-bereich|atex|zündfähig", text, re.IGNORECASE):
|
||||
return "explosion_hazard"
|
||||
if re.search(r"druck|pressure|behälter|vessel", text, re.IGNORECASE):
|
||||
return "pressure_hazard"
|
||||
if re.search(r"brand|fire|feuer", text, re.IGNORECASE):
|
||||
return "fire_hazard"
|
||||
return "chemical_hazard"
|
||||
|
||||
# Standard pattern matching (order matters — specific first)
|
||||
for pattern, category in HAZARD_CATEGORIES.items():
|
||||
if re.search(pattern, text, re.IGNORECASE):
|
||||
return category
|
||||
return "general"
|
||||
|
||||
|
||||
def scroll_chunks(source_filter: str = None) -> list[dict]:
|
||||
"""Scroll through Qdrant to get all relevant chunks."""
|
||||
chunks = []
|
||||
offset = None
|
||||
batch = 100
|
||||
|
||||
while True:
|
||||
scroll_body = {
|
||||
"limit": batch,
|
||||
"with_payload": True,
|
||||
"with_vector": False,
|
||||
}
|
||||
if offset is not None:
|
||||
scroll_body["offset"] = offset
|
||||
|
||||
resp = httpx.post(
|
||||
f"{QDRANT_URL}/collections/{COLLECTION}/points/scroll",
|
||||
json=scroll_body,
|
||||
timeout=30.0,
|
||||
)
|
||||
data = resp.json()
|
||||
points = data.get("result", {}).get("points", [])
|
||||
next_offset = data.get("result", {}).get("next_page_offset")
|
||||
|
||||
for pt in points:
|
||||
payload = pt.get("payload", {})
|
||||
source = payload.get("source", payload.get("filename", ""))
|
||||
text = payload.get("chunk_text", "")
|
||||
|
||||
# Filter for TRBS/TRGS/ASR/OSHA
|
||||
source_lower = source.lower()
|
||||
is_relevant = any(k in source_lower for k in
|
||||
["trbs", "trgs", "asr", "osha"])
|
||||
if not is_relevant:
|
||||
continue
|
||||
|
||||
# Check for obligation patterns
|
||||
if not OBLIGATION_PATTERNS.search(text):
|
||||
continue
|
||||
|
||||
# Check CE relevance
|
||||
if not CE_KEYWORDS.search(text):
|
||||
continue
|
||||
|
||||
# Classify hazard category (source-aware)
|
||||
hazard = _classify_hazard(text, source)
|
||||
|
||||
# Determine obligation type
|
||||
if re.search(r"muss|müssen|shall|must|required", text, re.IGNORECASE):
|
||||
obl_type = "MUSS"
|
||||
elif re.search(r"soll|sollte|should", text, re.IGNORECASE):
|
||||
obl_type = "SOLL"
|
||||
else:
|
||||
obl_type = "MUSS"
|
||||
|
||||
chunks.append({
|
||||
"source": source,
|
||||
"section": payload.get("section", ""),
|
||||
"paragraph": payload.get("paragraph", ""),
|
||||
"obligation_text": text.strip()[:500],
|
||||
"hazard_category": hazard,
|
||||
"obligation_type": obl_type,
|
||||
"ce_relevance": "high" if hazard != "general" else "medium",
|
||||
"filename": payload.get("filename", ""),
|
||||
})
|
||||
|
||||
if next_offset is None or not points:
|
||||
break
|
||||
offset = next_offset
|
||||
|
||||
if len(chunks) % 500 == 0:
|
||||
logger.info(" Scanned... %d obligations found so far", len(chunks))
|
||||
|
||||
return chunks
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument("--output", default="/tmp/ce_obligations.json")
|
||||
args = parser.parse_args()
|
||||
|
||||
logger.info("Scanning %s for CE obligations...", COLLECTION)
|
||||
obligations = scroll_chunks()
|
||||
|
||||
logger.info("Found %d CE-relevant obligations", len(obligations))
|
||||
|
||||
# Stats
|
||||
by_source = {}
|
||||
by_hazard = {}
|
||||
for o in obligations:
|
||||
src = o["source"][:30]
|
||||
by_source[src] = by_source.get(src, 0) + 1
|
||||
by_hazard[o["hazard_category"]] = by_hazard.get(o["hazard_category"], 0) + 1
|
||||
|
||||
logger.info("\nBy source:")
|
||||
for src, cnt in sorted(by_source.items(), key=lambda x: -x[1])[:20]:
|
||||
logger.info(" %4d %s", cnt, src)
|
||||
|
||||
logger.info("\nBy hazard category:")
|
||||
for cat, cnt in sorted(by_hazard.items(), key=lambda x: -x[1]):
|
||||
logger.info(" %4d %s", cnt, cat)
|
||||
|
||||
# Save
|
||||
Path(args.output).write_text(
|
||||
json.dumps(obligations, indent=2, ensure_ascii=False)
|
||||
)
|
||||
logger.info("\nSaved to %s", args.output)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
@@ -0,0 +1,289 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Add L2 sub-topics to broad tokens. Instead of just "incident",
|
||||
produces "incident:response", "incident:detection", etc.
|
||||
|
||||
Only processes tokens with >500 controls AND <90% audit accuracy.
|
||||
|
||||
Usage:
|
||||
python3 /app/scripts/gpre0_add_subtopics.py --dry-run
|
||||
python3 /app/scripts/gpre0_add_subtopics.py
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import time
|
||||
from collections import defaultdict
|
||||
from pathlib import Path
|
||||
|
||||
import httpx
|
||||
from sqlalchemy import create_engine, text
|
||||
|
||||
logging.basicConfig(
|
||||
level=logging.INFO, format="%(asctime)s [%(levelname)s] %(message)s"
|
||||
)
|
||||
logger = logging.getLogger("gpre0-subtopics")
|
||||
|
||||
DB_URL = os.getenv(
|
||||
"DATABASE_URL",
|
||||
"postgresql://breakpilot:breakpilot123@postgres:5432/breakpilot_db",
|
||||
)
|
||||
ANTHROPIC_API_KEY = os.getenv("ANTHROPIC_API_KEY", "")
|
||||
ANTHROPIC_MODEL = "claude-haiku-4-5-20251001"
|
||||
ANTHROPIC_URL = "https://api.anthropic.com/v1/messages"
|
||||
CHECKPOINT_DIR = Path("/tmp/gpre0_subtopic_checkpoints")
|
||||
|
||||
# Tokens that are too broad — need L2 sub-topics
|
||||
BROAD_TOKENS = {
|
||||
# Round 1 (already done)
|
||||
"risk_management", "policy", "audit_logging", "incident",
|
||||
"access_control", "compliance_audit", "asset_management",
|
||||
"key_management", "third_party_management", "monitoring",
|
||||
"financial_reporting", "data_classification", "change_management",
|
||||
"alerting", "multi_factor_auth", "api_security",
|
||||
"certificate_management", "human_resources_security",
|
||||
"training", "data_processing_agreement", "data_processing_register",
|
||||
"consumer_protection", "input_validation", "vulnerability",
|
||||
"dpia", "data_breach_notification", "backup",
|
||||
"supply_chain_due_diligence", "awareness",
|
||||
"privacy_by_design", "credentials", "logging_configuration",
|
||||
# Round 2 (remaining large tokens)
|
||||
"supervisory_authority", "certification", "secure_development",
|
||||
"product_safety", "personal_data", "data_subject_rights", "consent",
|
||||
"ai_system", "encryption", "data_retention", "disaster_recovery",
|
||||
"data_transfer", "aml", "transport_encryption", "network_security",
|
||||
"physical_security", "medical_device", "patch_management",
|
||||
"cookie_consent", "video_surveillance", "network_segmentation",
|
||||
"telecommunications", "privileged_access", "session_management",
|
||||
"password_policy", "governance", "whistleblowing", "payment_services",
|
||||
"health_data", "sensitive_data", "ecommerce", "sustainability_reporting",
|
||||
"critical_infrastructure", "regulatory",
|
||||
}
|
||||
|
||||
SYSTEM_PROMPT = """Du bist ein Compliance-Spezialist. Jeder Control hat bereits ein Hauptthema (L1 Token).
|
||||
Deine Aufgabe: Bestimme ein SPEZIFISCHES Sub-Thema (L2) innerhalb des Hauptthemas.
|
||||
|
||||
Das L2 Sub-Thema soll den KONKRETEN Aspekt beschreiben. Verwende kurze, klare englische Bezeichnungen.
|
||||
|
||||
Beispiele:
|
||||
- L1=incident, Titel="Incident Response Plan erstellen" → L2="response_plan"
|
||||
- L1=incident, Titel="Sicherheitsvorfälle erkennen" → L2="detection"
|
||||
- L1=incident, Titel="Recovery nach Vorfall dokumentieren" → L2="recovery"
|
||||
- L1=incident, Titel="Forensische Analyse durchführen" → L2="forensics"
|
||||
- L1=risk_management, Titel="Risikobewertung durchführen" → L2="assessment"
|
||||
- L1=risk_management, Titel="Risikominderungsmaßnahmen umsetzen" → L2="treatment"
|
||||
- L1=risk_management, Titel="Restrisiko akzeptieren" → L2="acceptance"
|
||||
- L1=access_control, Titel="Rollenbasierte Zugriffskontrolle" → L2="rbac"
|
||||
- L1=access_control, Titel="Zugriffsrechte regelmäßig prüfen" → L2="access_review"
|
||||
- L1=access_control, Titel="Identitätsmanagement implementieren" → L2="identity_management"
|
||||
- L1=monitoring, Titel="Systemverfügbarkeit überwachen" → L2="availability"
|
||||
- L1=monitoring, Titel="Sicherheitsereignisse überwachen" → L2="security_events"
|
||||
- L1=policy, Titel="Datenschutzrichtlinie erstellen" → L2="data_protection"
|
||||
- L1=policy, Titel="Acceptable Use Policy definieren" → L2="acceptable_use"
|
||||
- L1=policy, Titel="Passwortrichtlinie festlegen" → L2="password"
|
||||
- L1=financial_reporting, Titel="Jahresabschluss erstellen" → L2="annual_accounts"
|
||||
- L1=financial_reporting, Titel="Steuererklärung einreichen" → L2="tax"
|
||||
- L1=alerting, Titel="Datenpanne an Behörde melden" → L2="breach_notification"
|
||||
- L1=alerting, Titel="Sicherheitswarnung eskalieren" → L2="escalation"
|
||||
|
||||
REGELN:
|
||||
- L2 soll 1-3 Wörter sein, snake_case
|
||||
- L2 soll SPEZIFISCH sein (nicht das L1 wiederholen)
|
||||
- Verwende konsistente L2-Bezeichnungen für ähnliche Controls
|
||||
|
||||
Antworte NUR als JSON-Array: [{"id":"...","l2":"subtopic"}, ...]"""
|
||||
|
||||
|
||||
def call_claude(controls_batch: list[dict]) -> tuple[list[dict], dict]:
|
||||
"""Send batch to Claude for L2 sub-topic assignment."""
|
||||
items = []
|
||||
for c in controls_batch:
|
||||
items.append(
|
||||
f'- id="{c["control_id"]}" '
|
||||
f'L1="{c["current_object"]}" '
|
||||
f't="{c["title"]}" '
|
||||
f'o="{c["objective"][:80]}"'
|
||||
)
|
||||
|
||||
prompt = "Bestimme L2 Sub-Topics:\n" + "\n".join(items)
|
||||
|
||||
headers = {
|
||||
"x-api-key": ANTHROPIC_API_KEY,
|
||||
"anthropic-version": "2023-06-01",
|
||||
"content-type": "application/json",
|
||||
}
|
||||
payload = {
|
||||
"model": ANTHROPIC_MODEL,
|
||||
"max_tokens": 1500,
|
||||
"temperature": 0.0,
|
||||
"system": SYSTEM_PROMPT,
|
||||
"messages": [{"role": "user", "content": prompt}],
|
||||
}
|
||||
|
||||
try:
|
||||
resp = httpx.post(
|
||||
ANTHROPIC_URL, headers=headers, json=payload, timeout=45.0
|
||||
)
|
||||
resp.raise_for_status()
|
||||
data = resp.json()
|
||||
content = data.get("content", [{}])[0].get("text", "")
|
||||
usage = data.get("usage", {})
|
||||
start = content.find("[")
|
||||
end = content.rfind("]") + 1
|
||||
if start >= 0 and end > start:
|
||||
return json.loads(content[start:end]), usage
|
||||
return [], usage
|
||||
except httpx.TimeoutException:
|
||||
logger.error("TIMEOUT — skipping")
|
||||
return [], {}
|
||||
except httpx.HTTPStatusError as e:
|
||||
if e.response.status_code == 429:
|
||||
logger.warning("Rate limited — waiting 60s")
|
||||
time.sleep(60)
|
||||
else:
|
||||
logger.error("API error %d", e.response.status_code)
|
||||
return [], {}
|
||||
except Exception as e:
|
||||
logger.error("Failed: %s", e)
|
||||
return [], {}
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument("--batch-size", type=int, default=20)
|
||||
parser.add_argument("--dry-run", action="store_true")
|
||||
args = parser.parse_args()
|
||||
|
||||
engine = create_engine(
|
||||
DB_URL, connect_args={"options": "-c search_path=compliance,public"}
|
||||
)
|
||||
|
||||
# Build LIKE patterns for broad tokens
|
||||
like_clauses = " OR ".join(
|
||||
f"cc.generation_metadata->>'merge_group_hint' LIKE '%:{tok}:%'"
|
||||
for tok in BROAD_TOKENS
|
||||
)
|
||||
|
||||
with engine.connect() as c:
|
||||
rows = c.execute(text(f"""
|
||||
SELECT cc.id, cc.control_id, cc.title,
|
||||
COALESCE(cc.objective, '') as objective,
|
||||
cc.generation_metadata->>'merge_group_hint' as hint
|
||||
FROM canonical_controls cc
|
||||
WHERE cc.generation_metadata->>'merge_group_hint' IS NOT NULL
|
||||
AND cc.release_state NOT IN ('deprecated', 'rejected')
|
||||
AND ({like_clauses})
|
||||
""")).fetchall()
|
||||
|
||||
controls = []
|
||||
for uuid, cid, title, objective, hint in rows:
|
||||
parts = hint.split(":", 2) if hint else []
|
||||
obj = parts[1] if len(parts) > 1 else ""
|
||||
if obj in BROAD_TOKENS:
|
||||
controls.append({
|
||||
"uuid": str(uuid), "control_id": cid,
|
||||
"title": title or "", "objective": objective or "",
|
||||
"current_hint": hint, "current_object": obj,
|
||||
})
|
||||
|
||||
logger.info("Found %d controls in broad tokens to add L2 sub-topics", len(controls))
|
||||
|
||||
# Process
|
||||
total_tagged = 0
|
||||
total_skipped = 0
|
||||
total_input_tokens = 0
|
||||
total_output_tokens = 0
|
||||
corrections = []
|
||||
l2_stats: dict[str, dict[str, int]] = defaultdict(lambda: defaultdict(int))
|
||||
|
||||
for i in range(0, len(controls), args.batch_size):
|
||||
batch = controls[i:i + args.batch_size]
|
||||
results, usage = call_claude(batch)
|
||||
|
||||
total_input_tokens += usage.get("input_tokens", 0)
|
||||
total_output_tokens += usage.get("output_tokens", 0)
|
||||
|
||||
if not results:
|
||||
total_skipped += len(batch)
|
||||
continue
|
||||
|
||||
result_map = {r.get("id", ""): r for r in results}
|
||||
for ctrl in batch:
|
||||
r = result_map.get(ctrl["control_id"], {})
|
||||
l2 = r.get("l2", "")
|
||||
if not l2:
|
||||
total_skipped += 1
|
||||
continue
|
||||
|
||||
total_tagged += 1
|
||||
old_hint = ctrl["current_hint"]
|
||||
parts = old_hint.split(":", 2)
|
||||
action = parts[0] if parts else "implement"
|
||||
l1 = parts[1] if len(parts) > 1 else "unknown"
|
||||
phase = parts[2] if len(parts) > 2 else "implementation"
|
||||
# New format: action:L1_L2:phase
|
||||
new_obj = f"{l1}_{l2}"
|
||||
new_hint = f"{action}:{new_obj}:{phase}"
|
||||
corrections.append({
|
||||
"uuid": ctrl["uuid"],
|
||||
"old_hint": old_hint,
|
||||
"new_hint": new_hint,
|
||||
})
|
||||
l2_stats[l1][l2] += 1
|
||||
|
||||
processed = min(i + args.batch_size, len(controls))
|
||||
if processed % 5000 < args.batch_size or processed >= len(controls):
|
||||
logger.info(
|
||||
"Progress: %d/%d (tagged=%d skip=%d)",
|
||||
processed, len(controls), total_tagged, total_skipped,
|
||||
)
|
||||
|
||||
time.sleep(0.3)
|
||||
|
||||
# Report
|
||||
cost_in = total_input_tokens / 1_000_000 * 0.80
|
||||
cost_out = total_output_tokens / 1_000_000 * 4.00
|
||||
logger.info("\n" + "=" * 60)
|
||||
logger.info("SUBTOPIC REPORT")
|
||||
logger.info("=" * 60)
|
||||
logger.info("Total: %d | Tagged: %d | Skipped: %d", len(controls), total_tagged, total_skipped)
|
||||
logger.info("Cost: $%.2f (Haiku)", cost_in + cost_out)
|
||||
|
||||
# Show L2 distribution per L1
|
||||
for l1, subs in sorted(l2_stats.items()):
|
||||
top_subs = sorted(subs.items(), key=lambda x: -x[1])[:10]
|
||||
logger.info("\n%s (%d unique L2):", l1, len(subs))
|
||||
for l2, cnt in top_subs:
|
||||
logger.info(" %4d %s_%s", cnt, l1, l2)
|
||||
|
||||
# Save corrections
|
||||
CHECKPOINT_DIR.mkdir(parents=True, exist_ok=True)
|
||||
corr_file = CHECKPOINT_DIR / "corrections_subtopics.json"
|
||||
corr_file.write_text(json.dumps(corrections))
|
||||
logger.info("\nSaved %d corrections to %s", len(corrections), corr_file)
|
||||
|
||||
if args.dry_run:
|
||||
logger.info("DRY RUN — not updating DB")
|
||||
return
|
||||
|
||||
if corrections:
|
||||
logger.info("Applying %d corrections...", len(corrections))
|
||||
with engine.begin() as c:
|
||||
c.execute(text("SET search_path TO compliance, public"))
|
||||
for corr in corrections:
|
||||
c.execute(text("""
|
||||
UPDATE canonical_controls
|
||||
SET generation_metadata = jsonb_set(
|
||||
generation_metadata,
|
||||
'{merge_group_hint}',
|
||||
to_jsonb(CAST(:new_hint AS text))
|
||||
)
|
||||
WHERE id = CAST(:uuid AS uuid)
|
||||
"""), {"uuid": corr["uuid"], "new_hint": corr["new_hint"]})
|
||||
logger.info("Done. %d hints updated.", len(corrections))
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
@@ -0,0 +1,52 @@
|
||||
#!/usr/bin/env python3
|
||||
"""Apply saved corrections from JSON file to DB (crash recovery)."""
|
||||
|
||||
import argparse
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
from pathlib import Path
|
||||
|
||||
from sqlalchemy import create_engine, text
|
||||
|
||||
logging.basicConfig(level=logging.INFO, format="%(asctime)s [%(levelname)s] %(message)s")
|
||||
logger = logging.getLogger("apply-corrections")
|
||||
|
||||
DB_URL = os.getenv("DATABASE_URL", "postgresql://breakpilot:breakpilot123@postgres:5432/breakpilot_db")
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument("file", help="Path to corrections JSON file")
|
||||
parser.add_argument("--dry-run", action="store_true")
|
||||
args = parser.parse_args()
|
||||
|
||||
corrections = json.loads(Path(args.file).read_text())
|
||||
logger.info("Loaded %d corrections from %s", len(corrections), args.file)
|
||||
|
||||
if args.dry_run:
|
||||
for c in corrections[:10]:
|
||||
logger.info(" %s: %s → %s", c["uuid"][:8], c["old_hint"], c["new_hint"])
|
||||
logger.info("DRY RUN — not applying")
|
||||
return
|
||||
|
||||
engine = create_engine(DB_URL, connect_args={"options": "-c search_path=compliance,public"})
|
||||
applied = 0
|
||||
with engine.begin() as c:
|
||||
c.execute(text("SET search_path TO compliance, public"))
|
||||
for corr in corrections:
|
||||
c.execute(text("""
|
||||
UPDATE canonical_controls
|
||||
SET generation_metadata = jsonb_set(
|
||||
generation_metadata,
|
||||
'{merge_group_hint}',
|
||||
to_jsonb(CAST(:new_hint AS text))
|
||||
)
|
||||
WHERE id = CAST(:uuid AS uuid)
|
||||
"""), {"uuid": corr["uuid"], "new_hint": corr["new_hint"]})
|
||||
applied += 1
|
||||
logger.info("Applied %d corrections.", applied)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
@@ -0,0 +1,153 @@
|
||||
#!/usr/bin/env python3
|
||||
"""Fix bad L2 subtopics: stakeholder_*, escalation fragments, *_approval*, *_documentation."""
|
||||
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import time
|
||||
from pathlib import Path
|
||||
|
||||
import httpx
|
||||
from sqlalchemy import create_engine, text
|
||||
|
||||
logging.basicConfig(level=logging.INFO, format="%(asctime)s [%(levelname)s] %(message)s")
|
||||
logger = logging.getLogger("fix-subtopics")
|
||||
|
||||
DB_URL = os.getenv("DATABASE_URL", "postgresql://breakpilot:breakpilot123@postgres:5432/breakpilot_db")
|
||||
ANTHROPIC_API_KEY = os.getenv("ANTHROPIC_API_KEY", "")
|
||||
ANTHROPIC_URL = "https://api.anthropic.com/v1/messages"
|
||||
|
||||
SYSTEM_PROMPT = """Du klassifizierst Controls mit einem L1_L2 Token. Das L2 soll den KONKRETEN fachlichen Aspekt beschreiben.
|
||||
|
||||
VERBOTENE L2-Wörter (zu generisch):
|
||||
- stakeholder (zu vage — WER sind die Stakeholder? WAS wird getan?)
|
||||
- documentation (ist eine Handlung, kein Thema)
|
||||
- approval (ist eine Handlung)
|
||||
- communication (zu vage)
|
||||
|
||||
Stattdessen SPEZIFISCH:
|
||||
- "stakeholder_notification" bei Behördenmeldung → "authority_reporting"
|
||||
- "stakeholder_consultation" bei DSFA → "impact_consultation"
|
||||
- "stakeholder_engagement" bei Training → "participant_selection"
|
||||
- "escalation_procedure" → "severity_classification" oder "response_plan"
|
||||
- "access_documentation" → "access_policy" oder "permission_matrix"
|
||||
- "approval_process" → "authorization_workflow" oder "sign_off"
|
||||
|
||||
L2 = 1-3 Wörter, snake_case, FACHLICH SPEZIFISCH.
|
||||
|
||||
Antworte NUR als JSON-Array: [{"id":"...","token":"L1_L2"}, ...]"""
|
||||
|
||||
|
||||
def main():
|
||||
engine = create_engine(DB_URL, connect_args={"options": "-c search_path=compliance,public"})
|
||||
|
||||
with engine.connect() as c:
|
||||
rows = c.execute(text("""
|
||||
SELECT cc.id, cc.control_id, cc.title,
|
||||
COALESCE(cc.objective, '') as objective,
|
||||
cc.generation_metadata->>'merge_group_hint' as hint
|
||||
FROM canonical_controls cc
|
||||
WHERE cc.release_state NOT IN ('deprecated', 'rejected')
|
||||
AND cc.generation_metadata->>'merge_group_hint' IS NOT NULL
|
||||
AND (
|
||||
cc.generation_metadata->>'merge_group_hint' LIKE '%stakeholder%'
|
||||
OR cc.generation_metadata->>'merge_group_hint' LIKE '%_escalation_%'
|
||||
OR cc.generation_metadata->>'merge_group_hint' LIKE '%_approval_%'
|
||||
OR cc.generation_metadata->>'merge_group_hint' LIKE '%response_time%'
|
||||
OR cc.generation_metadata->>'merge_group_hint' LIKE '%machine_re%'
|
||||
OR cc.generation_metadata->>'merge_group_hint' LIKE '%management_app%'
|
||||
)
|
||||
""")).fetchall()
|
||||
|
||||
controls = []
|
||||
for uuid, cid, title, objective, hint in rows:
|
||||
parts = hint.split(":", 2) if hint else []
|
||||
controls.append({
|
||||
"uuid": str(uuid), "control_id": cid,
|
||||
"title": title or "", "objective": objective or "",
|
||||
"current_hint": hint,
|
||||
"current_object": parts[1] if len(parts) > 1 else "",
|
||||
})
|
||||
|
||||
logger.info("Found %d controls with bad subtopics to fix", len(controls))
|
||||
|
||||
headers = {
|
||||
"x-api-key": ANTHROPIC_API_KEY,
|
||||
"anthropic-version": "2023-06-01",
|
||||
"content-type": "application/json",
|
||||
}
|
||||
|
||||
corrections = []
|
||||
total_fixed = 0
|
||||
batch_size = 20
|
||||
|
||||
for i in range(0, len(controls), batch_size):
|
||||
batch = controls[i:i + batch_size]
|
||||
items = [
|
||||
f'- id="{c["control_id"]}" cur="{c["current_object"]}" t="{c["title"]}" o="{c["objective"][:80]}"'
|
||||
for c in batch
|
||||
]
|
||||
|
||||
try:
|
||||
resp = httpx.post(ANTHROPIC_URL, headers=headers, json={
|
||||
"model": "claude-haiku-4-5-20251001",
|
||||
"max_tokens": 1500, "temperature": 0.0,
|
||||
"system": SYSTEM_PROMPT,
|
||||
"messages": [{"role": "user", "content": "Fix:\n" + "\n".join(items)}],
|
||||
}, timeout=45.0)
|
||||
resp.raise_for_status()
|
||||
content = resp.json().get("content", [{}])[0].get("text", "")
|
||||
start = content.find("[")
|
||||
end = content.rfind("]") + 1
|
||||
results = json.loads(content[start:end]) if start >= 0 else []
|
||||
except Exception as e:
|
||||
logger.error("Batch %d failed: %s", i, e)
|
||||
continue
|
||||
|
||||
result_map = {r.get("id", ""): r for r in results}
|
||||
for ctrl in batch:
|
||||
r = result_map.get(ctrl["control_id"], {})
|
||||
new_token = r.get("token", "")
|
||||
if not new_token or new_token == ctrl["current_object"]:
|
||||
continue
|
||||
if "stakeholder" in new_token or "approval" in new_token:
|
||||
continue # Still bad
|
||||
|
||||
parts = ctrl["current_hint"].split(":", 2)
|
||||
action = parts[0] if parts else "implement"
|
||||
phase = parts[2] if len(parts) > 2 else "implementation"
|
||||
corrections.append({
|
||||
"uuid": ctrl["uuid"],
|
||||
"old_hint": ctrl["current_hint"],
|
||||
"new_hint": f"{action}:{new_token}:{phase}",
|
||||
})
|
||||
total_fixed += 1
|
||||
|
||||
if (i + batch_size) % 200 < batch_size:
|
||||
logger.info("Progress: %d/%d (fixed=%d)", min(i + batch_size, len(controls)), len(controls), total_fixed)
|
||||
time.sleep(0.3)
|
||||
|
||||
logger.info("Fixed: %d of %d controls", total_fixed, len(controls))
|
||||
|
||||
# Save + apply
|
||||
Path("/tmp/corrections_bad_subtopics.json").write_text(json.dumps(corrections))
|
||||
|
||||
if corrections:
|
||||
logger.info("Applying %d corrections...", len(corrections))
|
||||
with engine.begin() as c:
|
||||
c.execute(text("SET search_path TO compliance, public"))
|
||||
for corr in corrections:
|
||||
c.execute(text("""
|
||||
UPDATE canonical_controls
|
||||
SET generation_metadata = jsonb_set(
|
||||
generation_metadata,
|
||||
'{merge_group_hint}',
|
||||
to_jsonb(CAST(:new_hint AS text))
|
||||
)
|
||||
WHERE id = CAST(:uuid AS uuid)
|
||||
"""), {"uuid": corr["uuid"], "new_hint": corr["new_hint"]})
|
||||
logger.info("Done.")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
@@ -0,0 +1,284 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Fix generic tokens: Re-classify controls that were assigned to
|
||||
action-based tokens (documentation, procedure, process, etc.)
|
||||
instead of topic-based tokens.
|
||||
|
||||
Runs sequentially in 5 batches. NO retry on timeout.
|
||||
|
||||
Usage:
|
||||
python3 /app/scripts/gpre0_fix_generic_tokens.py --dry-run
|
||||
python3 /app/scripts/gpre0_fix_generic_tokens.py
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import time
|
||||
from collections import defaultdict
|
||||
from pathlib import Path
|
||||
|
||||
import httpx
|
||||
from sqlalchemy import create_engine, text
|
||||
|
||||
logging.basicConfig(
|
||||
level=logging.INFO, format="%(asctime)s [%(levelname)s] %(message)s"
|
||||
)
|
||||
logger = logging.getLogger("gpre0-fix-generic")
|
||||
|
||||
DB_URL = os.getenv(
|
||||
"DATABASE_URL",
|
||||
"postgresql://breakpilot:breakpilot123@postgres:5432/breakpilot_db",
|
||||
)
|
||||
ANTHROPIC_API_KEY = os.getenv("ANTHROPIC_API_KEY", "")
|
||||
ANTHROPIC_MODEL = "claude-haiku-4-5-20251001"
|
||||
ANTHROPIC_URL = "https://api.anthropic.com/v1/messages"
|
||||
CHECKPOINT_DIR = Path("/tmp/gpre0_fix_checkpoints")
|
||||
|
||||
# Tokens that are ACTION-based, not TOPIC-based → must be re-classified
|
||||
FORBIDDEN_TOKENS = {
|
||||
"documentation", "procedure", "process",
|
||||
"compliance_reporting", "records_management",
|
||||
}
|
||||
|
||||
SYSTEM_PROMPT = """Du bist ein Compliance-Klassifizierer. Ordne jeden Control dem THEMA zu, nicht der Handlung.
|
||||
|
||||
KRITISCH: Die Tokens "documentation", "procedure", "process", "compliance_reporting",
|
||||
"records_management" sind VERBOTEN. Klassifiziere nach dem INHALTLICHEN THEMA.
|
||||
|
||||
Beispiele:
|
||||
- "Risikobewertung dokumentieren" → risk_management (NICHT documentation)
|
||||
- "Incident-Verfahren definieren" → incident (NICHT procedure)
|
||||
- "Verschlüsselungsprozess implementieren" → encryption (NICHT process)
|
||||
- "Audit-Ergebnisse berichten" → compliance_audit (NICHT compliance_reporting)
|
||||
- "Datenschutz-Unterlagen verwalten" → personal_data (NICHT records_management)
|
||||
- "Löschkonzept dokumentieren" → data_retention (NICHT documentation)
|
||||
- "Zertifizierungsverfahren definieren" → certification (NICHT procedure)
|
||||
- "Schulungsprozess durchführen" → training (NICHT process)
|
||||
|
||||
ERLAUBTE TOKENS:
|
||||
|
||||
SECURITY: multi_factor_auth, password_policy, credentials, session_management,
|
||||
privileged_access, access_control, encryption, transport_encryption,
|
||||
key_management, certificate_management, network_security, network_segmentation,
|
||||
firewall, vpn, remote_access, monitoring, audit_logging, siem, alerting,
|
||||
compliance_audit, vulnerability, patch_management, backup, disaster_recovery,
|
||||
physical_security, secure_development, api_security, input_validation,
|
||||
container_security, logging_configuration
|
||||
|
||||
DATA_PROTECTION: personal_data, sensitive_data, health_data, consent,
|
||||
data_subject_rights, data_retention, data_transfer, data_breach_notification,
|
||||
dpia, data_processing_agreement, privacy_by_design, data_processing_register,
|
||||
data_classification, cookie_consent, video_surveillance
|
||||
|
||||
GOVERNANCE: policy, training, awareness, incident, risk_management,
|
||||
third_party_management, change_management, asset_management,
|
||||
human_resources_security
|
||||
|
||||
REGULATORY: supervisory_authority, certification, product_safety, ai_system,
|
||||
financial_reporting, aml, whistleblowing, consumer_protection, ecommerce,
|
||||
telecommunications, medical_device, payment_services, critical_infrastructure,
|
||||
supply_chain_due_diligence, sustainability_reporting
|
||||
|
||||
Antworte NUR als JSON-Array: [{"id":"...","token":"...","conf":0.9}, ...]"""
|
||||
|
||||
|
||||
def call_claude(controls_batch: list[dict]) -> tuple[list[dict], dict]:
|
||||
"""Send batch to Claude. NO retry on timeout."""
|
||||
items = []
|
||||
for c in controls_batch:
|
||||
items.append(
|
||||
f'- id="{c["control_id"]}" '
|
||||
f'cur="{c["current_object"]}" '
|
||||
f't="{c["title"]}" '
|
||||
f'o="{c["objective"][:100]}"'
|
||||
)
|
||||
|
||||
prompt = "Klassifiziere nach THEMA (nicht Handlung):\n" + "\n".join(items)
|
||||
|
||||
headers = {
|
||||
"x-api-key": ANTHROPIC_API_KEY,
|
||||
"anthropic-version": "2023-06-01",
|
||||
"content-type": "application/json",
|
||||
}
|
||||
payload = {
|
||||
"model": ANTHROPIC_MODEL,
|
||||
"max_tokens": 1500,
|
||||
"temperature": 0.0,
|
||||
"system": SYSTEM_PROMPT,
|
||||
"messages": [{"role": "user", "content": prompt}],
|
||||
}
|
||||
|
||||
try:
|
||||
resp = httpx.post(
|
||||
ANTHROPIC_URL, headers=headers, json=payload, timeout=45.0
|
||||
)
|
||||
resp.raise_for_status()
|
||||
data = resp.json()
|
||||
content = data.get("content", [{}])[0].get("text", "")
|
||||
usage = data.get("usage", {})
|
||||
start = content.find("[")
|
||||
end = content.rfind("]") + 1
|
||||
if start >= 0 and end > start:
|
||||
return json.loads(content[start:end]), usage
|
||||
return [], usage
|
||||
except httpx.TimeoutException:
|
||||
logger.error("TIMEOUT — skipping batch")
|
||||
return [], {}
|
||||
except httpx.HTTPStatusError as e:
|
||||
if e.response.status_code == 429:
|
||||
logger.warning("Rate limited — waiting 60s")
|
||||
time.sleep(60)
|
||||
else:
|
||||
logger.error("API error %d", e.response.status_code)
|
||||
return [], {}
|
||||
except Exception as e:
|
||||
logger.error("Failed: %s", e)
|
||||
return [], {}
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument("--batch-size", type=int, default=20)
|
||||
parser.add_argument("--dry-run", action="store_true")
|
||||
args = parser.parse_args()
|
||||
|
||||
engine = create_engine(
|
||||
DB_URL, connect_args={"options": "-c search_path=compliance,public"}
|
||||
)
|
||||
|
||||
# Load only controls with forbidden tokens
|
||||
forbidden_pattern = "|".join(
|
||||
f":{tok}:" for tok in FORBIDDEN_TOKENS
|
||||
)
|
||||
with engine.connect() as c:
|
||||
rows = c.execute(text("""
|
||||
SELECT cc.id, cc.control_id, cc.title,
|
||||
COALESCE(cc.objective, '') as objective,
|
||||
cc.generation_metadata->>'merge_group_hint' as hint
|
||||
FROM canonical_controls cc
|
||||
WHERE cc.generation_metadata->>'merge_group_hint' IS NOT NULL
|
||||
AND cc.release_state NOT IN ('deprecated', 'rejected')
|
||||
AND (
|
||||
cc.generation_metadata->>'merge_group_hint' LIKE '%:documentation:%'
|
||||
OR cc.generation_metadata->>'merge_group_hint' LIKE '%:procedure:%'
|
||||
OR cc.generation_metadata->>'merge_group_hint' LIKE '%:process:%'
|
||||
OR cc.generation_metadata->>'merge_group_hint' LIKE '%:compliance_reporting:%'
|
||||
OR cc.generation_metadata->>'merge_group_hint' LIKE '%:records_management:%'
|
||||
)
|
||||
""")).fetchall()
|
||||
|
||||
controls = []
|
||||
for uuid, cid, title, objective, hint in rows:
|
||||
parts = hint.split(":", 2) if hint else []
|
||||
controls.append({
|
||||
"uuid": str(uuid), "control_id": cid,
|
||||
"title": title or "", "objective": objective or "",
|
||||
"current_hint": hint,
|
||||
"current_object": parts[1] if len(parts) > 1 else hint,
|
||||
})
|
||||
|
||||
logger.info("Found %d controls with forbidden tokens to re-classify", len(controls))
|
||||
|
||||
# Process
|
||||
total_fixed = 0
|
||||
total_kept = 0
|
||||
total_skipped = 0
|
||||
total_input_tokens = 0
|
||||
total_output_tokens = 0
|
||||
corrections = []
|
||||
change_stats: dict[str, dict[str, int]] = defaultdict(lambda: defaultdict(int))
|
||||
|
||||
for i in range(0, len(controls), args.batch_size):
|
||||
batch = controls[i:i + args.batch_size]
|
||||
results, usage = call_claude(batch)
|
||||
|
||||
total_input_tokens += usage.get("input_tokens", 0)
|
||||
total_output_tokens += usage.get("output_tokens", 0)
|
||||
|
||||
if not results:
|
||||
total_skipped += len(batch)
|
||||
continue
|
||||
|
||||
result_map = {r.get("id", ""): r for r in results}
|
||||
for ctrl in batch:
|
||||
r = result_map.get(ctrl["control_id"], {})
|
||||
new_token = r.get("token", "")
|
||||
if not new_token or new_token in FORBIDDEN_TOKENS:
|
||||
total_kept += 1
|
||||
continue
|
||||
|
||||
old_obj = ctrl["current_object"]
|
||||
if new_token != old_obj:
|
||||
total_fixed += 1
|
||||
parts = ctrl["current_hint"].split(":", 2)
|
||||
action = parts[0] if parts else "implement"
|
||||
phase = parts[2] if len(parts) > 2 else "implementation"
|
||||
corrections.append({
|
||||
"uuid": ctrl["uuid"],
|
||||
"old_hint": ctrl["current_hint"],
|
||||
"new_hint": f"{action}:{new_token}:{phase}",
|
||||
})
|
||||
change_stats[old_obj][new_token] += 1
|
||||
else:
|
||||
total_kept += 1
|
||||
|
||||
processed = min(i + args.batch_size, len(controls))
|
||||
if processed % 2000 < args.batch_size or processed >= len(controls):
|
||||
logger.info(
|
||||
"Progress: %d/%d (fixed=%d kept=%d skip=%d)",
|
||||
processed, len(controls), total_fixed, total_kept, total_skipped,
|
||||
)
|
||||
|
||||
time.sleep(0.3)
|
||||
|
||||
# Report
|
||||
cost_in = total_input_tokens / 1_000_000 * 0.80
|
||||
cost_out = total_output_tokens / 1_000_000 * 4.00
|
||||
logger.info("\n" + "=" * 60)
|
||||
logger.info("GENERIC TOKEN FIX REPORT")
|
||||
logger.info("=" * 60)
|
||||
logger.info("Total: %d controls", len(controls))
|
||||
logger.info("Fixed: %d", total_fixed)
|
||||
logger.info("Kept: %d (LLM also chose forbidden → kept as-is)", total_kept)
|
||||
logger.info("Skipped: %d", total_skipped)
|
||||
logger.info("Cost: $%.2f (Haiku)", cost_in + cost_out)
|
||||
|
||||
logger.info("\nTop changes:")
|
||||
flat = []
|
||||
for old, news in change_stats.items():
|
||||
for new, cnt in news.items():
|
||||
flat.append((cnt, old, new))
|
||||
for cnt, old, new in sorted(flat, reverse=True)[:30]:
|
||||
logger.info(" %4d × %s → %s", cnt, old, new)
|
||||
|
||||
# Save corrections
|
||||
CHECKPOINT_DIR.mkdir(parents=True, exist_ok=True)
|
||||
corr_file = CHECKPOINT_DIR / "corrections_generic_fix.json"
|
||||
corr_file.write_text(json.dumps(corrections))
|
||||
logger.info("Saved %d corrections to %s", len(corrections), corr_file)
|
||||
|
||||
if args.dry_run:
|
||||
logger.info("DRY RUN — not updating DB")
|
||||
return
|
||||
|
||||
if corrections:
|
||||
logger.info("Applying %d corrections...", len(corrections))
|
||||
with engine.begin() as c:
|
||||
c.execute(text("SET search_path TO compliance, public"))
|
||||
for corr in corrections:
|
||||
c.execute(text("""
|
||||
UPDATE canonical_controls
|
||||
SET generation_metadata = jsonb_set(
|
||||
generation_metadata,
|
||||
'{merge_group_hint}',
|
||||
to_jsonb(CAST(:new_hint AS text))
|
||||
)
|
||||
WHERE id = CAST(:uuid AS uuid)
|
||||
"""), {"uuid": corr["uuid"], "new_hint": corr["new_hint"]})
|
||||
logger.info("Done. %d hints corrected.", len(corrections))
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
@@ -0,0 +1,37 @@
|
||||
#!/bin/bash
|
||||
# Run all 10 batches sequentially. Safe: if one fails, the rest don't run.
|
||||
# Each batch saves corrections to JSON before applying to DB.
|
||||
#
|
||||
# Usage: bash /app/scripts/gpre0_run_all.sh
|
||||
# bash /app/scripts/gpre0_run_all.sh 5 # start from batch 5
|
||||
|
||||
set -e
|
||||
|
||||
START=${1:-1}
|
||||
TOTAL=10
|
||||
|
||||
echo "=== Starting from batch $START of $TOTAL ==="
|
||||
|
||||
for i in $(seq $START $TOTAL); do
|
||||
echo ""
|
||||
echo "================================================================"
|
||||
echo " BATCH $i/$TOTAL — $(date)"
|
||||
echo "================================================================"
|
||||
|
||||
PYTHONPATH=/app python3 /app/scripts/gpre0_validate_hints.py \
|
||||
--batch-id $i \
|
||||
--total-batches $TOTAL \
|
||||
--batch-size 20
|
||||
|
||||
EXIT_CODE=$?
|
||||
if [ $EXIT_CODE -ne 0 ]; then
|
||||
echo "BATCH $i FAILED with exit code $EXIT_CODE"
|
||||
echo "Resume with: bash /app/scripts/gpre0_run_all.sh $i"
|
||||
exit $EXIT_CODE
|
||||
fi
|
||||
|
||||
echo "BATCH $i DONE — $(date)"
|
||||
done
|
||||
|
||||
echo ""
|
||||
echo "ALL $TOTAL BATCHES COMPLETE!"
|
||||
@@ -0,0 +1,351 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Phase 2: Validate and correct merge_group_hints using Claude Haiku.
|
||||
|
||||
Re-classifies each control's object token against the expanded ontology
|
||||
(74 canonical tokens). Corrects wrong hints in the DB.
|
||||
|
||||
SAFETY: Split into 4 batches. NEVER retries on timeout (double-billing!).
|
||||
Writes checkpoint after each API call for safe resume.
|
||||
|
||||
Usage:
|
||||
python3 /app/scripts/gpre0_validate_hints.py --batch-id 1 --dry-run
|
||||
python3 /app/scripts/gpre0_validate_hints.py --batch-id 1
|
||||
python3 /app/scripts/gpre0_validate_hints.py --batch-id 2
|
||||
python3 /app/scripts/gpre0_validate_hints.py --batch-id 3
|
||||
python3 /app/scripts/gpre0_validate_hints.py --batch-id 4
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import time
|
||||
from collections import defaultdict
|
||||
from pathlib import Path
|
||||
|
||||
import httpx
|
||||
from sqlalchemy import create_engine, text
|
||||
|
||||
logging.basicConfig(
|
||||
level=logging.INFO, format="%(asctime)s [%(levelname)s] %(message)s"
|
||||
)
|
||||
logger = logging.getLogger("gpre0-validate")
|
||||
|
||||
DB_URL = os.getenv(
|
||||
"DATABASE_URL",
|
||||
"postgresql://breakpilot:breakpilot123@postgres:5432/breakpilot_db",
|
||||
)
|
||||
ANTHROPIC_API_KEY = os.getenv("ANTHROPIC_API_KEY", "")
|
||||
ANTHROPIC_MODEL = "claude-haiku-4-5-20251001"
|
||||
ANTHROPIC_URL = "https://api.anthropic.com/v1/messages"
|
||||
CHECKPOINT_DIR = Path("/tmp/gpre0_checkpoints")
|
||||
|
||||
SYSTEM_PROMPT = """Du bist ein Compliance-Klassifizierer. Ordne jeden Control GENAU EINEM Token zu.
|
||||
|
||||
REGEL: Waehle IMMER den naechstbesten Token aus der Liste. OTHER nur wenn ABSOLUT
|
||||
kein Token auch nur entfernt passt (<1% der Faelle). Im Zweifel: den breitesten
|
||||
passenden Token waehlen (z.B. "policy" fuer Governance-Dokumente, "procedure" fuer
|
||||
Ablauf-Definitionen, "risk_management" fuer Bewertungen).
|
||||
|
||||
TOKENS:
|
||||
|
||||
SECURITY: multi_factor_auth, password_policy, credentials, session_management,
|
||||
privileged_access, access_control, encryption, transport_encryption,
|
||||
key_management, certificate_management, network_security, network_segmentation,
|
||||
firewall, vpn, remote_access, monitoring (NUR Echtzeit-Systemueberwachung),
|
||||
audit_logging (Protokollierung/Audit Trail), siem, alerting (Meldepflichten),
|
||||
compliance_audit (externe Pruefungen), vulnerability, patch_management,
|
||||
backup, disaster_recovery, physical_security, secure_development,
|
||||
api_security, input_validation, container_security, logging_configuration
|
||||
|
||||
DATA_PROTECTION: personal_data (DSGVO-Verarbeitung), sensitive_data (Art.9),
|
||||
health_data, consent, data_subject_rights, data_retention, data_transfer,
|
||||
data_breach_notification, dpia, data_processing_agreement, privacy_by_design,
|
||||
data_processing_register, data_classification, cookie_consent, video_surveillance
|
||||
|
||||
GOVERNANCE: policy (Richtlinie definieren), procedure (Verfahren definieren),
|
||||
process (Betriebsprozess ausfuehren), training (Schulung), awareness,
|
||||
incident (Vorfallsbehandlung), risk_management, third_party_management,
|
||||
change_management, documentation, records_management, compliance_reporting,
|
||||
asset_management, human_resources_security
|
||||
|
||||
REGULATORY: supervisory_authority, certification (Zertifizierung/Konformitaet),
|
||||
product_safety, ai_system, financial_reporting, aml, whistleblowing,
|
||||
consumer_protection, ecommerce, telecommunications, medical_device,
|
||||
payment_services, critical_infrastructure, supply_chain_due_diligence,
|
||||
sustainability_reporting
|
||||
|
||||
ABGRENZUNGEN:
|
||||
- monitoring = NUR Echtzeit-Systemueberwachung, NICHT Audit/Schulung/Bewertung
|
||||
- audit_logging = Protokollierung, NICHT externe Pruefung (→ compliance_audit)
|
||||
- procedure = Verfahren DEFINIEREN, NICHT Vorfaelle behandeln (→ incident)
|
||||
- personal_data = DSGVO-Verarbeitung, NICHT Zertifizierung (→ certification)
|
||||
- alerting = Meldepflichten, NICHT Vorfallsbehandlung (→ incident)
|
||||
|
||||
Antworte NUR als JSON-Array: [{"id":"...","token":"...","conf":0.9}, ...]
|
||||
KEIN weiterer Text. Nur das Array."""
|
||||
|
||||
|
||||
def call_claude(controls_batch: list[dict]) -> tuple[list[dict], dict]:
|
||||
"""Send batch to Claude. NO RETRY on timeout (double-billing risk!)."""
|
||||
items = []
|
||||
for c in controls_batch:
|
||||
items.append(
|
||||
f'- id="{c["control_id"]}" '
|
||||
f'cur="{c["current_object"]}" '
|
||||
f't="{c["title"]}" '
|
||||
f'o="{c["objective"][:100]}"'
|
||||
)
|
||||
|
||||
prompt = "Klassifiziere:\n" + "\n".join(items)
|
||||
|
||||
headers = {
|
||||
"x-api-key": ANTHROPIC_API_KEY,
|
||||
"anthropic-version": "2023-06-01",
|
||||
"content-type": "application/json",
|
||||
}
|
||||
payload = {
|
||||
"model": ANTHROPIC_MODEL,
|
||||
"max_tokens": 1500,
|
||||
"temperature": 0.0,
|
||||
"system": SYSTEM_PROMPT,
|
||||
"messages": [{"role": "user", "content": prompt}],
|
||||
}
|
||||
|
||||
try:
|
||||
resp = httpx.post(
|
||||
ANTHROPIC_URL, headers=headers, json=payload, timeout=45.0
|
||||
)
|
||||
resp.raise_for_status()
|
||||
data = resp.json()
|
||||
content = data.get("content", [{}])[0].get("text", "")
|
||||
usage = data.get("usage", {})
|
||||
start = content.find("[")
|
||||
end = content.rfind("]") + 1
|
||||
if start >= 0 and end > start:
|
||||
return json.loads(content[start:end]), usage
|
||||
logger.warning("No JSON array in response")
|
||||
return [], usage
|
||||
except httpx.TimeoutException:
|
||||
# CRITICAL: Do NOT retry! Log and skip.
|
||||
logger.error("TIMEOUT — skipping batch (NOT retrying to avoid double-billing)")
|
||||
return [], {}
|
||||
except httpx.HTTPStatusError as e:
|
||||
if e.response.status_code == 429:
|
||||
logger.warning("Rate limited — waiting 60s then skipping")
|
||||
time.sleep(60)
|
||||
else:
|
||||
logger.error("API error %d — skipping batch", e.response.status_code)
|
||||
return [], {}
|
||||
except Exception as e:
|
||||
logger.error("Request failed — skipping: %s", e)
|
||||
return [], {}
|
||||
|
||||
|
||||
def load_checkpoint(batch_id: int) -> int:
|
||||
"""Load last processed index for this batch."""
|
||||
cp_file = CHECKPOINT_DIR / f"batch_{batch_id}.json"
|
||||
if cp_file.exists():
|
||||
data = json.loads(cp_file.read_text())
|
||||
return data.get("last_index", 0)
|
||||
return 0
|
||||
|
||||
|
||||
def save_checkpoint(batch_id: int, last_index: int, stats: dict):
|
||||
"""Save progress checkpoint."""
|
||||
CHECKPOINT_DIR.mkdir(parents=True, exist_ok=True)
|
||||
cp_file = CHECKPOINT_DIR / f"batch_{batch_id}.json"
|
||||
cp_file.write_text(json.dumps({
|
||||
"batch_id": batch_id,
|
||||
"last_index": last_index,
|
||||
**stats,
|
||||
}))
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument("--batch-id", type=int, required=True)
|
||||
parser.add_argument("--total-batches", type=int, default=10)
|
||||
parser.add_argument("--batch-size", type=int, default=20)
|
||||
parser.add_argument("--dry-run", action="store_true")
|
||||
parser.add_argument("--resume", action="store_true",
|
||||
help="Resume from checkpoint")
|
||||
args = parser.parse_args()
|
||||
|
||||
engine = create_engine(
|
||||
DB_URL, connect_args={"options": "-c search_path=compliance,public"}
|
||||
)
|
||||
|
||||
# Load ALL control IDs ordered deterministically, then select quarter
|
||||
with engine.connect() as c:
|
||||
all_ids = c.execute(text("""
|
||||
SELECT cc.id
|
||||
FROM canonical_controls cc
|
||||
WHERE cc.generation_metadata->>'merge_group_hint' IS NOT NULL
|
||||
AND cc.generation_metadata->>'merge_group_hint' != ''
|
||||
AND cc.release_state NOT IN ('deprecated', 'rejected')
|
||||
ORDER BY cc.id
|
||||
""")).fetchall()
|
||||
|
||||
total = len(all_ids)
|
||||
chunk = total // args.total_batches
|
||||
start_idx = (args.batch_id - 1) * chunk
|
||||
end_idx = total if args.batch_id == args.total_batches else args.batch_id * chunk
|
||||
batch_ids = [str(r[0]) for r in all_ids[start_idx:end_idx]]
|
||||
|
||||
logger.info("Batch %d/%d: controls %d-%d (%d controls of %d total)",
|
||||
args.batch_id, args.total_batches, start_idx, end_idx, len(batch_ids), total)
|
||||
|
||||
# Load full data for this batch
|
||||
id_list = ",".join(f"'{uid}'" for uid in batch_ids)
|
||||
with engine.connect() as c:
|
||||
rows = c.execute(text(f"""
|
||||
SELECT cc.id, cc.control_id, cc.title,
|
||||
COALESCE(cc.objective, '') as objective,
|
||||
cc.generation_metadata->>'merge_group_hint' as hint
|
||||
FROM canonical_controls cc
|
||||
WHERE cc.id IN ({id_list})
|
||||
ORDER BY cc.id
|
||||
""")).fetchall()
|
||||
|
||||
controls = []
|
||||
for uuid, cid, title, objective, hint in rows:
|
||||
parts = hint.split(":", 2) if hint else []
|
||||
controls.append({
|
||||
"uuid": str(uuid), "control_id": cid,
|
||||
"title": title or "", "objective": objective or "",
|
||||
"current_hint": hint, "current_object": parts[1] if len(parts) > 1 else hint,
|
||||
})
|
||||
|
||||
# Resume from checkpoint?
|
||||
start_from = 0
|
||||
if args.resume:
|
||||
start_from = load_checkpoint(args.batch_id)
|
||||
if start_from > 0:
|
||||
logger.info("Resuming from index %d", start_from)
|
||||
|
||||
# Process
|
||||
total_same = 0
|
||||
total_changed = 0
|
||||
total_other = 0
|
||||
total_skipped = 0
|
||||
total_input_tokens = 0
|
||||
total_output_tokens = 0
|
||||
corrections: list[dict] = []
|
||||
change_stats: dict[str, dict[str, int]] = defaultdict(lambda: defaultdict(int))
|
||||
|
||||
for i in range(start_from, len(controls), args.batch_size):
|
||||
batch = controls[i:i + args.batch_size]
|
||||
results, usage = call_claude(batch)
|
||||
|
||||
total_input_tokens += usage.get("input_tokens", 0)
|
||||
total_output_tokens += usage.get("output_tokens", 0)
|
||||
|
||||
if not results:
|
||||
total_skipped += len(batch)
|
||||
save_checkpoint(args.batch_id, i + args.batch_size, {
|
||||
"same": total_same, "changed": total_changed,
|
||||
"other": total_other, "skipped": total_skipped,
|
||||
})
|
||||
continue
|
||||
|
||||
result_map = {r.get("id", ""): r for r in results}
|
||||
for ctrl in batch:
|
||||
r = result_map.get(ctrl["control_id"], {})
|
||||
new_token = r.get("token", "")
|
||||
if not new_token:
|
||||
total_skipped += 1
|
||||
continue
|
||||
|
||||
old_obj = ctrl["current_object"]
|
||||
if new_token == "OTHER":
|
||||
total_other += 1
|
||||
elif new_token == old_obj:
|
||||
total_same += 1
|
||||
else:
|
||||
total_changed += 1
|
||||
parts = ctrl["current_hint"].split(":", 2)
|
||||
action = parts[0] if parts else "implement"
|
||||
phase = parts[2] if len(parts) > 2 else "implementation"
|
||||
corrections.append({
|
||||
"uuid": ctrl["uuid"],
|
||||
"old_hint": ctrl["current_hint"],
|
||||
"new_hint": f"{action}:{new_token}:{phase}",
|
||||
})
|
||||
change_stats[old_obj][new_token] += 1
|
||||
|
||||
# Checkpoint every batch
|
||||
save_checkpoint(args.batch_id, i + args.batch_size, {
|
||||
"same": total_same, "changed": total_changed,
|
||||
"other": total_other, "skipped": total_skipped,
|
||||
})
|
||||
|
||||
processed = min(i + args.batch_size, len(controls))
|
||||
if processed % 1000 < args.batch_size or processed >= len(controls):
|
||||
logger.info(
|
||||
"Batch %d: %d/%d (same=%d changed=%d other=%d skip=%d)",
|
||||
args.batch_id, processed, len(controls),
|
||||
total_same, total_changed, total_other, total_skipped,
|
||||
)
|
||||
|
||||
time.sleep(0.3)
|
||||
|
||||
# Report
|
||||
cost_in = total_input_tokens / 1_000_000 * 0.80 # Haiku
|
||||
cost_out = total_output_tokens / 1_000_000 * 4.00 # Haiku
|
||||
total_cost = cost_in + cost_out
|
||||
total_proc = total_same + total_changed + total_other
|
||||
|
||||
logger.info("\n" + "=" * 60)
|
||||
logger.info("BATCH %d REPORT", args.batch_id)
|
||||
logger.info("=" * 60)
|
||||
logger.info("Processed: %d | Skipped: %d", total_proc, total_skipped)
|
||||
logger.info("Same: %d (%.1f%%)", total_same, total_same / max(total_proc, 1) * 100)
|
||||
logger.info("Changed: %d (%.1f%%)", total_changed, total_changed / max(total_proc, 1) * 100)
|
||||
logger.info("OTHER: %d (%.1f%%)", total_other, total_other / max(total_proc, 1) * 100)
|
||||
logger.info("Cost: $%.2f (Haiku)", total_cost)
|
||||
logger.info("Cost/ctrl: $%.5f", total_cost / max(total_proc, 1))
|
||||
|
||||
# Top changes
|
||||
flat = []
|
||||
for old, news in change_stats.items():
|
||||
for new, cnt in news.items():
|
||||
flat.append((cnt, old, new))
|
||||
logger.info("\nTop Changes:")
|
||||
for cnt, old, new in sorted(flat, reverse=True)[:20]:
|
||||
logger.info(" %4d × %s → %s", cnt, old, new)
|
||||
|
||||
# Always save corrections to file (recovery safety)
|
||||
corr_file = CHECKPOINT_DIR / f"corrections_batch_{args.batch_id}.json"
|
||||
if corrections:
|
||||
CHECKPOINT_DIR.mkdir(parents=True, exist_ok=True)
|
||||
corr_file.write_text(json.dumps(corrections))
|
||||
logger.info("Saved %d corrections to %s", len(corrections), corr_file)
|
||||
|
||||
if args.dry_run:
|
||||
logger.info("\nDRY RUN — not updating DB")
|
||||
return
|
||||
|
||||
# Apply corrections in single transaction
|
||||
if corrections:
|
||||
logger.info("\nApplying %d corrections...", len(corrections))
|
||||
with engine.begin() as c:
|
||||
c.execute(text("SET search_path TO compliance, public"))
|
||||
for corr in corrections:
|
||||
c.execute(text("""
|
||||
UPDATE canonical_controls
|
||||
SET generation_metadata = jsonb_set(
|
||||
generation_metadata,
|
||||
'{merge_group_hint}',
|
||||
to_jsonb(CAST(:new_hint AS text))
|
||||
)
|
||||
WHERE id = CAST(:uuid AS uuid)
|
||||
"""), {"uuid": corr["uuid"], "new_hint": corr["new_hint"]})
|
||||
logger.info("Done. %d hints corrected.", len(corrections))
|
||||
else:
|
||||
logger.info("No corrections needed.")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
@@ -0,0 +1,203 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
G-pre1 INCREMENTAL: Append new objects to object_groups via embedding similarity.
|
||||
|
||||
Non-destructive alternative to gpre1_object_clustering.py (which DELETEs and
|
||||
rebuilds all groups via K-Means). This script:
|
||||
- Finds objects referenced in atomic controls that are NOT yet in
|
||||
object_groups.members
|
||||
- Embeds each unmatched object via bge-m3 (local embedding-service)
|
||||
- Nearest-neighbor search against existing object_groups.canonical_name
|
||||
- Cosine >= --threshold (default 0.85) → APPEND to existing group's members
|
||||
- Cosine < --threshold → CREATE new object_group with next free group_id
|
||||
|
||||
Existing groups stay; only members get appended and new groups get added.
|
||||
|
||||
Usage (inside control-pipeline container):
|
||||
python3 /app/scripts/gpre1_object_groups_incremental.py --since 2026-05-18T02:53:00+00:00 --dry-run
|
||||
python3 /app/scripts/gpre1_object_groups_incremental.py --since 2026-05-18T02:53:00+00:00
|
||||
python3 /app/scripts/gpre1_object_groups_incremental.py --since 2026-05-18T02:53:00+00:00 --threshold 0.82
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
from datetime import datetime
|
||||
|
||||
import httpx
|
||||
import numpy as np
|
||||
from sqlalchemy import create_engine, text
|
||||
|
||||
logging.basicConfig(level=logging.INFO, format="%(asctime)s [%(levelname)s] %(message)s")
|
||||
logger = logging.getLogger("gpre1_inc")
|
||||
|
||||
DB_URL = os.getenv("DATABASE_URL", "postgresql://breakpilot:breakpilot123@postgres:5432/breakpilot_db")
|
||||
EMBEDDING_URL = os.getenv("EMBEDDING_URL", "http://embedding-service:8087")
|
||||
BATCH_SIZE = 64
|
||||
|
||||
|
||||
def embed_batch(texts: list[str]) -> np.ndarray:
|
||||
"""Embed a list of strings via bge-m3 embedding-service."""
|
||||
with httpx.Client(timeout=120.0) as c:
|
||||
resp = c.post(f"{EMBEDDING_URL}/embed", json={"texts": texts, "normalize": True})
|
||||
resp.raise_for_status()
|
||||
return np.array(resp.json()["embeddings"], dtype=np.float32)
|
||||
|
||||
|
||||
def embed_many(texts: list[str], label: str = "") -> np.ndarray:
|
||||
"""Embed many strings in batches."""
|
||||
n = len(texts)
|
||||
out = np.zeros((n, 1024), dtype=np.float32)
|
||||
for i in range(0, n, BATCH_SIZE):
|
||||
batch = texts[i:i + BATCH_SIZE]
|
||||
out[i:i + len(batch)] = embed_batch(batch)
|
||||
if (i // BATCH_SIZE) % 20 == 0:
|
||||
logger.info(" %s: %d/%d embedded", label, i + len(batch), n)
|
||||
return out
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument("--since", required=True, help="ISO datetime — consider atomics from this date onwards")
|
||||
parser.add_argument("--threshold", type=float, default=0.85,
|
||||
help="Cosine threshold for appending to existing group (default 0.85)")
|
||||
parser.add_argument("--dry-run", action="store_true")
|
||||
args = parser.parse_args()
|
||||
|
||||
since_dt = datetime.fromisoformat(args.since.replace("Z", "+00:00"))
|
||||
logger.info("Incremental object_groups update since %s, threshold=%.2f, dry_run=%s",
|
||||
since_dt.isoformat(), args.threshold, args.dry_run)
|
||||
|
||||
engine = create_engine(DB_URL, connect_args={"options": "-c search_path=compliance,public"})
|
||||
|
||||
# 1. Load existing object_groups (id, canonical_name, members)
|
||||
with engine.connect() as c:
|
||||
rows = c.execute(text("""
|
||||
SELECT group_id, canonical_name, members FROM object_groups
|
||||
""")).fetchall()
|
||||
existing_groups = [(r[0], r[1], json.loads(r[2]) if isinstance(r[2], str) else r[2]) for r in rows]
|
||||
logger.info("Loaded %d existing object_groups", len(existing_groups))
|
||||
|
||||
existing_members: set[str] = set()
|
||||
for _, _, members in existing_groups:
|
||||
for m in members:
|
||||
existing_members.add(m)
|
||||
logger.info("Existing union of members: %d distinct strings", len(existing_members))
|
||||
|
||||
# 2. Find unmatched objects from atomics since `since`
|
||||
from services.control_dedup import normalize_object
|
||||
with engine.connect() as c:
|
||||
rows = c.execute(text("""
|
||||
SELECT DISTINCT split_part(generation_metadata->>'merge_group_hint', ':', 2) AS obj
|
||||
FROM canonical_controls
|
||||
WHERE decomposition_method = 'pass0b'
|
||||
AND created_at >= :since
|
||||
AND generation_metadata->>'merge_group_hint' IS NOT NULL
|
||||
AND generation_metadata->>'merge_group_hint' != ''
|
||||
AND release_state NOT IN ('deprecated', 'rejected', 'duplicate')
|
||||
"""), {"since": since_dt}).fetchall()
|
||||
new_objects_raw = [r[0] for r in rows if r[0]]
|
||||
logger.info("Distinct objects in new atomics: %d", len(new_objects_raw))
|
||||
|
||||
# Normalize each + dedupe; track originals → normalized
|
||||
normed_to_originals: dict[str, set[str]] = {}
|
||||
for obj in new_objects_raw:
|
||||
normed = normalize_object(obj)
|
||||
if not normed:
|
||||
continue
|
||||
if normed in existing_members or obj in existing_members:
|
||||
continue # already in some group
|
||||
normed_to_originals.setdefault(normed, set()).update([normed, obj])
|
||||
|
||||
unmatched_normed = list(normed_to_originals.keys())
|
||||
logger.info("Unmatched normalized objects: %d", len(unmatched_normed))
|
||||
|
||||
if not unmatched_normed:
|
||||
logger.info("Nothing to do — all objects already mapped.")
|
||||
return
|
||||
|
||||
# 3. Embed existing canonical_names + unmatched objects
|
||||
logger.info("Embedding %d existing canonical_names...", len(existing_groups))
|
||||
existing_emb = embed_many([g[1] for g in existing_groups], label="existing")
|
||||
logger.info("Embedding %d unmatched objects...", len(unmatched_normed))
|
||||
unmatched_emb = embed_many(unmatched_normed, label="unmatched")
|
||||
|
||||
# 4. Nearest-neighbor: for each unmatched, find best existing match
|
||||
# cosine = dot product (both already L2-normalized)
|
||||
logger.info("Computing nearest-neighbor matches...")
|
||||
sims = unmatched_emb @ existing_emb.T # (N_unmatched, N_existing)
|
||||
best_idx = sims.argmax(axis=1)
|
||||
best_score = sims.max(axis=1)
|
||||
|
||||
appends: dict[int, list[str]] = {} # group_id → list of new members
|
||||
new_groups: list[tuple[str, list[str]]] = [] # (canonical_name, members)
|
||||
|
||||
for i, normed in enumerate(unmatched_normed):
|
||||
originals = sorted(normed_to_originals[normed])
|
||||
if best_score[i] >= args.threshold:
|
||||
gid = existing_groups[int(best_idx[i])][0]
|
||||
appends.setdefault(gid, []).extend(originals)
|
||||
else:
|
||||
# Create a new group with this object as canonical
|
||||
new_groups.append((normed, originals))
|
||||
|
||||
# Stats
|
||||
distinct_groups_to_extend = len(appends)
|
||||
total_appends = sum(len(v) for v in appends.values())
|
||||
logger.info("Plan: extend %d existing groups (+%d members), create %d new groups",
|
||||
distinct_groups_to_extend, total_appends, len(new_groups))
|
||||
|
||||
if args.dry_run:
|
||||
logger.info("DRY RUN — no writes")
|
||||
# Sample
|
||||
if appends:
|
||||
sample = list(appends.items())[:5]
|
||||
for gid, members in sample:
|
||||
gname = next((g[1] for g in existing_groups if g[0] == gid), "?")
|
||||
logger.info(" Extend group_id=%d (%s) with: %s", gid, gname, members[:3])
|
||||
if new_groups:
|
||||
for name, members in new_groups[:5]:
|
||||
logger.info(" NEW group: %s — members=%s", name, members[:3])
|
||||
return
|
||||
|
||||
# 5. Write — pure INSERT/UPDATE
|
||||
with engine.begin() as c:
|
||||
c.execute(text("SET search_path TO compliance, public"))
|
||||
|
||||
# UPDATE existing groups (append to members JSONB)
|
||||
for gid, new_members in appends.items():
|
||||
c.execute(text("""
|
||||
UPDATE object_groups
|
||||
SET members = (
|
||||
SELECT jsonb_agg(DISTINCT m)
|
||||
FROM jsonb_array_elements_text(members || CAST(:new_members AS jsonb)) AS x(m)
|
||||
),
|
||||
member_count = (
|
||||
SELECT count(DISTINCT m)
|
||||
FROM jsonb_array_elements_text(members || CAST(:new_members AS jsonb)) AS x(m)
|
||||
)
|
||||
WHERE group_id = :gid
|
||||
"""), {"gid": gid, "new_members": json.dumps(new_members)})
|
||||
|
||||
# INSERT new groups with next free group_id
|
||||
next_gid_row = c.execute(text("SELECT COALESCE(MAX(group_id), 0) + 1 FROM object_groups")).fetchone()
|
||||
next_gid = next_gid_row[0] if next_gid_row else 1
|
||||
for name, members in new_groups:
|
||||
c.execute(text("""
|
||||
INSERT INTO object_groups (group_id, canonical_name, member_count, members, top_controls_count)
|
||||
VALUES (:gid, :name, :count, CAST(:members AS jsonb), 0)
|
||||
"""), {
|
||||
"gid": next_gid,
|
||||
"name": name[:200],
|
||||
"count": len(members),
|
||||
"members": json.dumps(members),
|
||||
})
|
||||
next_gid += 1
|
||||
|
||||
logger.info("DONE — extended %d existing groups (+%d members), created %d new groups",
|
||||
distinct_groups_to_extend, total_appends, len(new_groups))
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
@@ -0,0 +1,214 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
G-pre2 v2: Build Master Controls directly from canonical tokens.
|
||||
|
||||
No K-Means needed — Phase 2 already normalized merge_group_hints
|
||||
to 74 canonical tokens. Each token = one object group.
|
||||
|
||||
Groups controls by (canonical_token, phase) and creates MCs
|
||||
for tokens with >=2 distinct phases.
|
||||
|
||||
Usage:
|
||||
python3 /app/scripts/gpre2_direct_mc.py --dry-run
|
||||
python3 /app/scripts/gpre2_direct_mc.py --min-phases 2
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
from collections import defaultdict
|
||||
|
||||
from sqlalchemy import create_engine, text
|
||||
|
||||
logging.basicConfig(
|
||||
level=logging.INFO, format="%(asctime)s [%(levelname)s] %(message)s"
|
||||
)
|
||||
logger = logging.getLogger("gpre2-direct")
|
||||
|
||||
DB_URL = os.getenv(
|
||||
"DATABASE_URL",
|
||||
"postgresql://breakpilot:breakpilot123@postgres:5432/breakpilot_db",
|
||||
)
|
||||
|
||||
PHASE_ORDER = {
|
||||
"scope": 0, "definition": 1, "governance": 1,
|
||||
"design": 2, "implementation": 3, "configuration": 3,
|
||||
"operation": 4, "training": 4, "monitoring": 5,
|
||||
"testing": 6, "review": 7, "assessment": 8, "remediation": 8,
|
||||
"validation": 9, "reporting": 10, "evidence": 11,
|
||||
}
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument("--min-phases", type=int, default=2)
|
||||
parser.add_argument("--dry-run", action="store_true")
|
||||
args = parser.parse_args()
|
||||
|
||||
engine = create_engine(
|
||||
DB_URL, connect_args={"options": "-c search_path=compliance,public"}
|
||||
)
|
||||
|
||||
# Step 1: Load all controls with merge_group_hint
|
||||
logger.info("Loading controls...")
|
||||
with engine.connect() as c:
|
||||
rows = c.execute(text("""
|
||||
SELECT id, control_id,
|
||||
generation_metadata->>'merge_group_hint' AS hint
|
||||
FROM canonical_controls
|
||||
WHERE generation_metadata->>'merge_group_hint' IS NOT NULL
|
||||
AND generation_metadata->>'merge_group_hint' != ''
|
||||
AND release_state NOT IN ('deprecated', 'rejected')
|
||||
""")).fetchall()
|
||||
|
||||
logger.info("Loaded %d controls", len(rows))
|
||||
|
||||
# Step 2: Group by (object_token, phase)
|
||||
token_phases: dict[str, dict[str, list]] = defaultdict(
|
||||
lambda: defaultdict(list)
|
||||
)
|
||||
|
||||
for uuid, control_id, hint in rows:
|
||||
parts = hint.split(":", 2)
|
||||
if len(parts) < 2:
|
||||
continue
|
||||
action = parts[0]
|
||||
obj = parts[1]
|
||||
phase = parts[2] if len(parts) > 2 else "implementation"
|
||||
token_phases[obj][phase].append((str(uuid), control_id, action))
|
||||
|
||||
logger.info("Found %d unique object tokens", len(token_phases))
|
||||
|
||||
# Step 3: Create Master Controls
|
||||
master_controls = []
|
||||
master_members = []
|
||||
|
||||
for token, phases in token_phases.items():
|
||||
if len(phases) < args.min_phases:
|
||||
continue
|
||||
|
||||
sorted_phases = sorted(
|
||||
phases.keys(), key=lambda p: PHASE_ORDER.get(p, 99)
|
||||
)
|
||||
phase_counts = {p: len(ctrls) for p, ctrls in phases.items()}
|
||||
total = sum(phase_counts.values())
|
||||
|
||||
master_controls.append({
|
||||
"canonical_name": token,
|
||||
"phases_covered": json.dumps(sorted_phases),
|
||||
"phase_control_count": json.dumps(phase_counts),
|
||||
"total_controls": total,
|
||||
})
|
||||
|
||||
for phase, controls in phases.items():
|
||||
for ctrl_uuid, ctrl_id, action in controls:
|
||||
master_members.append({
|
||||
"canonical_name": token,
|
||||
"control_uuid": ctrl_uuid,
|
||||
"phase": phase,
|
||||
"action": action,
|
||||
})
|
||||
|
||||
logger.info(
|
||||
"Created %d Master Controls with %d members (min %d phases)",
|
||||
len(master_controls), len(master_members), args.min_phases,
|
||||
)
|
||||
|
||||
# Stats
|
||||
if master_controls:
|
||||
counts = [mc["total_controls"] for mc in master_controls]
|
||||
phases_per = [
|
||||
len(json.loads(mc["phases_covered"])) for mc in master_controls
|
||||
]
|
||||
logger.info(" Avg controls/MC: %.1f", sum(counts) / len(counts))
|
||||
logger.info(" Max controls/MC: %d", max(counts))
|
||||
logger.info(" Avg phases/MC: %.1f", sum(phases_per) / len(phases_per))
|
||||
logger.info(" Max phases/MC: %d", max(phases_per))
|
||||
|
||||
# Size distribution
|
||||
logger.info("\n Size distribution:")
|
||||
logger.info(" ≤10: %d", sum(1 for c in counts if c <= 10))
|
||||
logger.info(" 11-50: %d", sum(1 for c in counts if 11 <= c <= 50))
|
||||
logger.info(" 51-200: %d", sum(1 for c in counts if 51 <= c <= 200))
|
||||
logger.info(" 201-500: %d", sum(1 for c in counts if 201 <= c <= 500))
|
||||
logger.info(" 501-2K: %d", sum(1 for c in counts if 501 <= c <= 2000))
|
||||
logger.info(" >2K: %d", sum(1 for c in counts if c > 2000))
|
||||
|
||||
# Top 15
|
||||
top = sorted(master_controls, key=lambda x: -x["total_controls"])[:15]
|
||||
logger.info("\n Top 15 Master Controls:")
|
||||
for mc in top:
|
||||
logger.info(
|
||||
" %6d %s (%d phases)",
|
||||
mc["total_controls"],
|
||||
mc["canonical_name"],
|
||||
len(json.loads(mc["phases_covered"])),
|
||||
)
|
||||
|
||||
if args.dry_run:
|
||||
logger.info("\nDRY RUN — not writing to DB")
|
||||
return
|
||||
|
||||
# Step 4: Write to DB
|
||||
with engine.begin() as c:
|
||||
c.execute(text("SET search_path TO compliance, public"))
|
||||
c.execute(text("DELETE FROM master_control_members"))
|
||||
c.execute(text("DELETE FROM master_controls"))
|
||||
|
||||
# Get next object_group_id
|
||||
max_gid = c.execute(
|
||||
text("SELECT COALESCE(MAX(group_id), 0) FROM object_groups")
|
||||
).scalar()
|
||||
next_gid = max_gid + 1
|
||||
|
||||
mc_uuids = {}
|
||||
for mc in master_controls:
|
||||
gid = next_gid
|
||||
next_gid += 1
|
||||
mc_id = f"MC-{gid}"
|
||||
|
||||
c.execute(text("""
|
||||
INSERT INTO master_controls
|
||||
(master_control_id, object_group_id, canonical_name,
|
||||
phases_covered, phase_control_count, total_controls)
|
||||
VALUES (:mcid, :gid, :name,
|
||||
CAST(:phases AS jsonb),
|
||||
CAST(:pcounts AS jsonb), :total)
|
||||
"""), {
|
||||
"mcid": mc_id, "gid": gid,
|
||||
"name": mc["canonical_name"],
|
||||
"phases": mc["phases_covered"],
|
||||
"pcounts": mc["phase_control_count"],
|
||||
"total": mc["total_controls"],
|
||||
})
|
||||
|
||||
mc_uuid = c.execute(text(
|
||||
"SELECT id FROM master_controls WHERE master_control_id = :mcid"
|
||||
), {"mcid": mc_id}).scalar()
|
||||
mc_uuids[mc["canonical_name"]] = str(mc_uuid)
|
||||
|
||||
# Insert members
|
||||
mem_count = 0
|
||||
for mem in master_members:
|
||||
mc_uuid = mc_uuids.get(mem["canonical_name"])
|
||||
if not mc_uuid:
|
||||
continue
|
||||
c.execute(text("""
|
||||
INSERT INTO master_control_members
|
||||
(master_control_uuid, control_uuid, phase, action)
|
||||
VALUES (CAST(:mc AS uuid), CAST(:ctrl AS uuid),
|
||||
:phase, :action)
|
||||
"""), {
|
||||
"mc": mc_uuid,
|
||||
"ctrl": mem["control_uuid"],
|
||||
"phase": mem["phase"],
|
||||
"action": mem["action"],
|
||||
})
|
||||
mem_count += 1
|
||||
|
||||
logger.info("Wrote %d MCs + %d members to DB", len(master_controls), mem_count)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
@@ -0,0 +1,267 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
G-pre2 INCREMENTAL: Add new atomic controls to Master Controls without rebuild.
|
||||
|
||||
Unlike gpre2_master_controls.py which DELETEs and rebuilds the entire
|
||||
master_controls table, this script is non-destructive:
|
||||
- Existing master_controls stay untouched (same UUIDs, same MC-IDs)
|
||||
- For each object_group that gained new atomic controls:
|
||||
* If MC exists: append new members + update total_controls/phase_counts
|
||||
* If MC missing AND group now has >= min_phases: create new MC + all members
|
||||
|
||||
Usage:
|
||||
python3 /app/scripts/gpre2_master_controls_incremental.py --since 2026-05-18T02:53:00+00:00
|
||||
python3 /app/scripts/gpre2_master_controls_incremental.py --since 2026-05-18T02:53:00+00:00 --dry-run
|
||||
python3 /app/scripts/gpre2_master_controls_incremental.py --since 2026-05-18T02:53:00+00:00 --min-phases 2
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
from collections import defaultdict
|
||||
from datetime import datetime
|
||||
|
||||
from sqlalchemy import create_engine, text
|
||||
|
||||
logging.basicConfig(level=logging.INFO, format="%(asctime)s [%(levelname)s] %(message)s")
|
||||
logger = logging.getLogger("gpre2_incremental")
|
||||
|
||||
DB_URL = os.getenv("DATABASE_URL", "postgresql://breakpilot:breakpilot123@postgres:5432/breakpilot_db")
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument("--since", required=True, help="ISO datetime — only consider atomics created at/after this")
|
||||
parser.add_argument("--min-phases", type=int, default=2, help="Min distinct phases to form a new MC (default 2)")
|
||||
parser.add_argument("--dry-run", action="store_true")
|
||||
args = parser.parse_args()
|
||||
|
||||
since_dt = datetime.fromisoformat(args.since.replace("Z", "+00:00"))
|
||||
logger.info("Incremental run since %s, min_phases=%d, dry_run=%s",
|
||||
since_dt.isoformat(), args.min_phases, args.dry_run)
|
||||
|
||||
engine = create_engine(DB_URL, connect_args={"options": "-c search_path=compliance,public"})
|
||||
|
||||
# Step 1: object → group_id reverse index
|
||||
object_to_group = {}
|
||||
with engine.connect() as c:
|
||||
groups = c.execute(text("SELECT group_id, canonical_name, members FROM object_groups")).fetchall()
|
||||
for gid, canonical, members_json in groups:
|
||||
members = json.loads(members_json) if isinstance(members_json, str) else members_json
|
||||
for member in members:
|
||||
object_to_group[member] = (gid, canonical)
|
||||
logger.info("Reverse index: %d objects → %d groups", len(object_to_group), len(groups))
|
||||
|
||||
# Step 2: Load ALL atomics with merge_group_hint (we need full picture)
|
||||
with engine.connect() as c:
|
||||
all_rows = c.execute(text("""
|
||||
SELECT id, control_id,
|
||||
generation_metadata->>'merge_group_hint' AS hint,
|
||||
title,
|
||||
created_at
|
||||
FROM canonical_controls
|
||||
WHERE generation_metadata->>'merge_group_hint' IS NOT NULL
|
||||
AND generation_metadata->>'merge_group_hint' != ''
|
||||
AND release_state NOT IN ('deprecated', 'rejected', 'duplicate')
|
||||
""")).fetchall()
|
||||
logger.info("Loaded %d atomic controls total", len(all_rows))
|
||||
|
||||
# Step 3: Build group_phases (gid → phase → [(uuid, control_id, action, title, is_new)])
|
||||
from services.control_dedup import normalize_object
|
||||
group_phases: dict[int, dict[str, list]] = defaultdict(lambda: defaultdict(list))
|
||||
group_names: dict[int, str] = {}
|
||||
new_atomic_count = 0
|
||||
new_groups_touched: set[int] = set()
|
||||
unmatched = 0
|
||||
|
||||
for uuid, control_id, hint, title, created_at in all_rows:
|
||||
parts = hint.split(":", 2)
|
||||
if len(parts) < 2:
|
||||
continue
|
||||
action = parts[0]
|
||||
obj = parts[1]
|
||||
phase = parts[2] if len(parts) > 2 else "implementation"
|
||||
normed = normalize_object(obj)
|
||||
if normed in object_to_group:
|
||||
gid, canonical = object_to_group[normed]
|
||||
elif obj in object_to_group:
|
||||
gid, canonical = object_to_group[obj]
|
||||
else:
|
||||
unmatched += 1
|
||||
continue
|
||||
is_new = created_at >= since_dt
|
||||
group_phases[gid][phase].append((str(uuid), control_id, action, title, is_new))
|
||||
group_names[gid] = canonical
|
||||
if is_new:
|
||||
new_atomic_count += 1
|
||||
new_groups_touched.add(gid)
|
||||
|
||||
logger.info("Total: %d new atomics across %d object_groups (%d unmatched)",
|
||||
new_atomic_count, len(new_groups_touched), unmatched)
|
||||
|
||||
if not new_groups_touched:
|
||||
logger.info("Nothing to do — no new atomics matched to any object_group.")
|
||||
return
|
||||
|
||||
# Step 4: For each touched object_group, decide action
|
||||
stats = {
|
||||
"groups_examined": len(new_groups_touched),
|
||||
"mcs_existing_updated": 0,
|
||||
"mcs_new_created": 0,
|
||||
"members_inserted": 0,
|
||||
"members_skipped_existing": 0,
|
||||
"groups_skipped_below_min_phases": 0,
|
||||
"groups_skipped_no_member_change": 0,
|
||||
}
|
||||
|
||||
# Load existing master_controls index: master_control_id → uuid
|
||||
with engine.connect() as c:
|
||||
mc_index = {row[1]: (str(row[0]), row[2]) for row in c.execute(text(
|
||||
"SELECT id, master_control_id, total_controls FROM master_controls"
|
||||
)).fetchall()}
|
||||
logger.info("Existing master_controls: %d", len(mc_index))
|
||||
|
||||
# Load existing members for touched MCs (avoid duplicate inserts)
|
||||
touched_mc_ids = ["MC-%d" % gid for gid in new_groups_touched]
|
||||
existing_members: dict[str, set[str]] = defaultdict(set)
|
||||
with engine.connect() as c:
|
||||
for mc_id_str in touched_mc_ids:
|
||||
mc_uuid_info = mc_index.get(mc_id_str)
|
||||
if not mc_uuid_info:
|
||||
continue
|
||||
mc_uuid = mc_uuid_info[0]
|
||||
for row in c.execute(text(
|
||||
"SELECT control_uuid FROM master_control_members WHERE master_control_uuid = CAST(:u AS uuid)"
|
||||
), {"u": mc_uuid}).fetchall():
|
||||
existing_members[mc_id_str].add(str(row[0]))
|
||||
|
||||
# Build INSERT/UPDATE plans
|
||||
inserts_new_mcs = []
|
||||
inserts_members = []
|
||||
updates_mcs = []
|
||||
|
||||
PHASE_ORDER = {
|
||||
"scope": 0, "definition": 1, "governance": 1, "design": 2,
|
||||
"implementation": 3, "configuration": 3, "operation": 4, "training": 4,
|
||||
"monitoring": 5, "testing": 6, "review": 7, "assessment": 8,
|
||||
"remediation": 8, "validation": 9, "reporting": 10, "evidence": 11,
|
||||
}
|
||||
|
||||
for gid in new_groups_touched:
|
||||
mc_id_str = "MC-%d" % gid
|
||||
phases = group_phases[gid]
|
||||
canonical = group_names[gid]
|
||||
all_phases = sorted(phases.keys(), key=lambda p: PHASE_ORDER.get(p, 99))
|
||||
phase_counts = {p: len(ctrls) for p, ctrls in phases.items()}
|
||||
total = sum(phase_counts.values())
|
||||
|
||||
existing_mc = mc_index.get(mc_id_str)
|
||||
|
||||
if existing_mc:
|
||||
# MC exists — append only NEW atomics that aren't already members
|
||||
mc_uuid = existing_mc[0]
|
||||
existing_set = existing_members[mc_id_str]
|
||||
added_for_this_mc = 0
|
||||
for phase, controls in phases.items():
|
||||
for ctrl_uuid, ctrl_id, action, title, is_new in controls:
|
||||
if ctrl_uuid in existing_set:
|
||||
stats["members_skipped_existing"] += 1
|
||||
continue
|
||||
inserts_members.append({
|
||||
"mc_uuid": mc_uuid, "control_uuid": ctrl_uuid,
|
||||
"phase": phase, "action": action,
|
||||
})
|
||||
stats["members_inserted"] += 1
|
||||
added_for_this_mc += 1
|
||||
if added_for_this_mc > 0:
|
||||
updates_mcs.append({
|
||||
"mc_uuid": mc_uuid,
|
||||
"phases_covered": json.dumps(all_phases),
|
||||
"phase_control_count": json.dumps(phase_counts),
|
||||
"total_controls": total,
|
||||
})
|
||||
stats["mcs_existing_updated"] += 1
|
||||
else:
|
||||
stats["groups_skipped_no_member_change"] += 1
|
||||
else:
|
||||
# MC missing — create only if group now meets min_phases threshold
|
||||
if len(phases) < args.min_phases:
|
||||
stats["groups_skipped_below_min_phases"] += 1
|
||||
continue
|
||||
inserts_new_mcs.append({
|
||||
"master_control_id": mc_id_str,
|
||||
"object_group_id": gid,
|
||||
"canonical_name": canonical,
|
||||
"phases_covered": json.dumps(all_phases),
|
||||
"phase_control_count": json.dumps(phase_counts),
|
||||
"total_controls": total,
|
||||
"_members": [
|
||||
{"control_uuid": c[0], "phase": p, "action": c[2]}
|
||||
for p, ctrls in phases.items() for c in ctrls
|
||||
],
|
||||
})
|
||||
stats["mcs_new_created"] += 1
|
||||
|
||||
logger.info("Plan summary: %s", stats)
|
||||
|
||||
if args.dry_run:
|
||||
logger.info("DRY RUN — no writes")
|
||||
# Show first few examples
|
||||
if inserts_new_mcs:
|
||||
logger.info("Sample NEW MCs (up to 5):")
|
||||
for mc in inserts_new_mcs[:5]:
|
||||
logger.info(" %s: %s — total=%d, phases=%s",
|
||||
mc["master_control_id"], mc["canonical_name"],
|
||||
mc["total_controls"], mc["phases_covered"])
|
||||
if updates_mcs:
|
||||
logger.info("Updates to existing MCs: %d", len(updates_mcs))
|
||||
return
|
||||
|
||||
# Step 5: WRITE — strictly INSERT/UPDATE, no DELETE
|
||||
with engine.begin() as c:
|
||||
c.execute(text("SET search_path TO compliance, public"))
|
||||
|
||||
# 5a: Insert new MCs + their members
|
||||
for mc in inserts_new_mcs:
|
||||
new_uuid_row = c.execute(text("""
|
||||
INSERT INTO master_controls
|
||||
(master_control_id, object_group_id, canonical_name,
|
||||
phases_covered, phase_control_count, total_controls)
|
||||
VALUES (:master_control_id, :object_group_id, :canonical_name,
|
||||
CAST(:phases_covered AS jsonb), CAST(:phase_control_count AS jsonb),
|
||||
:total_controls)
|
||||
RETURNING id
|
||||
"""), {k: v for k, v in mc.items() if k != "_members"}).fetchone()
|
||||
new_mc_uuid = str(new_uuid_row[0])
|
||||
for mem in mc["_members"]:
|
||||
c.execute(text("""
|
||||
INSERT INTO master_control_members
|
||||
(master_control_uuid, control_uuid, phase, action)
|
||||
VALUES (CAST(:mc_uuid AS uuid), CAST(:control_uuid AS uuid), :phase, :action)
|
||||
"""), {"mc_uuid": new_mc_uuid, **mem})
|
||||
|
||||
# 5b: Append new members to existing MCs
|
||||
for mem in inserts_members:
|
||||
c.execute(text("""
|
||||
INSERT INTO master_control_members
|
||||
(master_control_uuid, control_uuid, phase, action)
|
||||
VALUES (CAST(:mc_uuid AS uuid), CAST(:control_uuid AS uuid), :phase, :action)
|
||||
"""), mem)
|
||||
|
||||
# 5c: Update phase counts / totals on touched existing MCs
|
||||
for upd in updates_mcs:
|
||||
c.execute(text("""
|
||||
UPDATE master_controls
|
||||
SET phases_covered = CAST(:phases_covered AS jsonb),
|
||||
phase_control_count = CAST(:phase_control_count AS jsonb),
|
||||
total_controls = :total_controls
|
||||
WHERE id = CAST(:mc_uuid AS uuid)
|
||||
"""), upd)
|
||||
|
||||
logger.info("DONE — wrote %d new MCs, updated %d existing MCs, %d members inserted",
|
||||
stats["mcs_new_created"], stats["mcs_existing_updated"], stats["members_inserted"])
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
@@ -0,0 +1,298 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
G-pre3: Split large Master Controls by regulation source.
|
||||
|
||||
For each MC with >200 controls:
|
||||
1. Load member controls with parent's source_citation->>'source'
|
||||
2. Group by regulation source
|
||||
3. Sources with >= MIN_SOURCE_SIZE → new sub-MC
|
||||
4. Small sources → merge into "mixed" bucket
|
||||
5. UNKNOWN (no source_citation) → sub-cluster by embedding if >MAX_MC
|
||||
6. Delete original large MC, create new sub-MCs
|
||||
|
||||
Usage:
|
||||
python3 /app/scripts/gpre3_regulation_split.py --dry-run
|
||||
python3 /app/scripts/gpre3_regulation_split.py --min-source 15 --max-mc 100
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import re
|
||||
from collections import defaultdict
|
||||
|
||||
from sqlalchemy import create_engine, text
|
||||
|
||||
from services.embedding_utils import subcluster_controls
|
||||
|
||||
logging.basicConfig(
|
||||
level=logging.INFO, format="%(asctime)s [%(levelname)s] %(message)s"
|
||||
)
|
||||
logger = logging.getLogger("gpre3")
|
||||
|
||||
DB_URL = os.getenv(
|
||||
"DATABASE_URL",
|
||||
"postgresql://breakpilot:breakpilot123@postgres:5432/breakpilot_db",
|
||||
)
|
||||
|
||||
# ── Source key normalization ────────────────────────────────────────
|
||||
# fmt: off
|
||||
_SOURCE_SHORT: dict[str, str] = {
|
||||
"DSGVO (EU) 2016/679": "dsgvo", "NIS2-Richtlinie (EU) 2022/2555": "nis2",
|
||||
"KI-Verordnung (EU) 2024/1689": "ai_act", "Cyber Resilience Act (CRA)": "cra",
|
||||
"Digital Services Act (DSA)": "dsa", "Digital Markets Act (DMA)": "dma",
|
||||
"Digital Operational Resilience Act": "dora", "Data Governance Act (DGA)": "dga",
|
||||
"Data Act": "data_act", "Maschinenverordnung (EU) 2023/1230": "machinery_reg",
|
||||
"Medizinprodukteverordnung (EU) 2017/745 (MDR)": "mdr",
|
||||
"European Health Data Space": "ehds", "European Accessibility Act": "eaa",
|
||||
"EU Cybersecurity Act": "eu_csa", "EU Blue Guide 2022": "eu_blue_guide",
|
||||
"EU-US Data Privacy Framework": "eu_us_dpf", "Markets in Crypto-Assets (MiCA)": "mica",
|
||||
"Standardvertragsklauseln (SCC)": "scc", "ePrivacy-Richtlinie": "eprivacy",
|
||||
"Batterieverordnung (EU) 2023/1542": "battery_reg",
|
||||
"Bundesdatenschutzgesetz (BDSG)": "bdsg",
|
||||
"BSI-Gesetz (BSIG 2025, NIS2-Umsetzung)": "bsig",
|
||||
"BSI-Kritisverordnung (BSI-KritisV)": "bsi_kritisv",
|
||||
"Geldwaeschegesetz (GwG)": "gwg", "Hinweisgeberschutzgesetz (HinSchG)": "hinschg",
|
||||
"Lieferkettensorgfaltspflichtengesetz (LkSG)": "lksg",
|
||||
"KRITIS-Dachgesetz (KRITISDachG)": "kritisdachg",
|
||||
"NIST SP 800-53 Rev. 5": "nist_800_53", "NIST Cybersecurity Framework 2.0": "nist_csf",
|
||||
"NIST Privacy Framework 1.0": "nist_privacy",
|
||||
"NIST SP 800-207 (Zero Trust)": "nist_zero_trust",
|
||||
"NIST SP 800-218 (SSDF)": "nist_ssdf", "NIST SP 800-63-3": "nist_800_63",
|
||||
"NIST AI Risk Management Framework": "nist_ai_rmf",
|
||||
"NISTIR 8259A IoT Security": "nist_iot",
|
||||
"OWASP Top 10 (2021)": "owasp_top10", "OWASP API Security Top 10 (2023)": "owasp_api",
|
||||
"OWASP ASVS 4.0": "owasp_asvs", "OWASP SAMM 2.0": "owasp_samm",
|
||||
"OWASP MASVS 2.0": "owasp_masvs", "OWASP Mobile Top 10": "owasp_mobile",
|
||||
"ENISA": "enisa", "TDDDG": "tdddg", "TKG": "tkg", "TMG": "tmg",
|
||||
"BGB": "bgb", "UWG": "uwg", "UrhG": "urhg",
|
||||
"BAIT (BaFin 2024)": "bait", "VAIT (BaFin 2022)": "vait",
|
||||
"AML-Verordnung": "aml_reg", "Zahlungsdiensterichtlinie 2": "psd2",
|
||||
"Telekommunikationsgesetz Oesterreich": "at_tkg",
|
||||
"Österreichisches Datenschutzgesetz (DSG)": "at_dsg",
|
||||
"Allgemeines Gleichbehandlungsgesetz (AGG)": "agg",
|
||||
"Aktiengesetz (AktG)": "aktg", "Handelsgesetzbuch (HGB)": "hgb",
|
||||
"GmbH-Gesetz (GmbHG)": "gmbhg", "Insolvenzordnung (InsO)": "inso",
|
||||
"Gewerbeordnung (GewO)": "gewo", "Abgabenordnung (AO)": "ao",
|
||||
}
|
||||
# fmt: on
|
||||
|
||||
|
||||
def source_to_key(source: str) -> str:
|
||||
"""Normalize regulation source name to a short slug key."""
|
||||
if source in _SOURCE_SHORT:
|
||||
return _SOURCE_SHORT[source]
|
||||
s = source.lower()
|
||||
s = re.sub(r"\(.*?\)", "", s)
|
||||
s = re.sub(r"[^a-z0-9äöüß]+", "_", s)
|
||||
s = re.sub(r"_+", "_", s).strip("_")
|
||||
return s[:40] if s else "unknown"
|
||||
|
||||
|
||||
# ── Main ───────────────────────────────────────────────────────────
|
||||
def main():
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument("--min-source", type=int, default=15,
|
||||
help="Min controls per source for own sub-MC")
|
||||
parser.add_argument("--max-mc", type=int, default=100,
|
||||
help="Max controls per sub-MC before sub-clustering")
|
||||
parser.add_argument("--threshold", type=int, default=200,
|
||||
help="Only split MCs with more than N controls")
|
||||
parser.add_argument("--dry-run", action="store_true")
|
||||
args = parser.parse_args()
|
||||
|
||||
engine = create_engine(
|
||||
DB_URL, connect_args={"options": "-c search_path=compliance,public"}
|
||||
)
|
||||
|
||||
# Step 1: Find large master controls
|
||||
with engine.connect() as c:
|
||||
large_mcs = c.execute(text("""
|
||||
SELECT mc.id, mc.master_control_id, mc.object_group_id,
|
||||
mc.canonical_name, mc.total_controls
|
||||
FROM master_controls mc
|
||||
WHERE mc.total_controls > :threshold
|
||||
ORDER BY mc.total_controls DESC
|
||||
"""), {"threshold": args.threshold}).fetchall()
|
||||
|
||||
logger.info("Found %d MCs with >%d controls", len(large_mcs), args.threshold)
|
||||
if not large_mcs:
|
||||
return
|
||||
|
||||
# Step 2: Build split plans
|
||||
all_splits = []
|
||||
for mc_uuid, mc_id, og_id, canonical, total in large_mcs:
|
||||
plan = _build_split_plan(engine, mc_uuid, mc_id, og_id, canonical, total, args)
|
||||
all_splits.append(plan)
|
||||
|
||||
total_new = sum(len(sp["sub_groups"]) for sp in all_splits)
|
||||
total_covered = sum(
|
||||
sum(len(sg["controls"]) for sg in sp["sub_groups"]) for sp in all_splits
|
||||
)
|
||||
logger.info("SUMMARY: %d large MCs → %d sub-MCs (%d controls)", len(all_splits), total_new, total_covered)
|
||||
|
||||
if args.dry_run:
|
||||
logger.info("DRY RUN — not writing to DB")
|
||||
return
|
||||
|
||||
_write_splits(engine, all_splits)
|
||||
|
||||
|
||||
def _build_split_plan(engine, mc_uuid, mc_id, og_id, canonical, total, args) -> dict:
|
||||
"""Build a regulation-source split plan for one large MC."""
|
||||
logger.info("\n━━━ %s: %s (%d controls) ━━━", mc_id, canonical, total)
|
||||
|
||||
with engine.connect() as c:
|
||||
members = c.execute(text("""
|
||||
SELECT mcm.control_uuid, mcm.phase, mcm.action,
|
||||
cc.control_id, cc.title,
|
||||
COALESCE(pc.source_citation->>'source', 'UNKNOWN') AS src
|
||||
FROM master_control_members mcm
|
||||
JOIN canonical_controls cc ON cc.id = mcm.control_uuid
|
||||
LEFT JOIN canonical_controls pc ON pc.id = cc.parent_control_uuid
|
||||
WHERE mcm.master_control_uuid = CAST(:mc_uuid AS uuid)
|
||||
"""), {"mc_uuid": str(mc_uuid)}).fetchall()
|
||||
|
||||
by_source: dict[str, list[dict]] = defaultdict(list)
|
||||
for ctrl_uuid, phase, action, cid, title, src in members:
|
||||
by_source[src].append({
|
||||
"control_uuid": str(ctrl_uuid), "phase": phase,
|
||||
"action": action, "control_id": cid, "title": title,
|
||||
})
|
||||
|
||||
sorted_sources = sorted(by_source.items(), key=lambda x: -len(x[1]))
|
||||
for src, ctrls in sorted_sources[:8]:
|
||||
logger.info(" %4d %s", len(ctrls), src)
|
||||
if len(sorted_sources) > 8:
|
||||
logger.info(" ... +%d more sources", len(sorted_sources) - 8)
|
||||
|
||||
plan = {"mc_uuid": str(mc_uuid), "mc_id": mc_id, "og_id": og_id,
|
||||
"canonical": canonical, "total": total, "sub_groups": []}
|
||||
|
||||
own_mc_sources = []
|
||||
mixed_controls = []
|
||||
for src, ctrls in sorted_sources:
|
||||
if src == "UNKNOWN":
|
||||
continue
|
||||
if len(ctrls) >= args.min_source:
|
||||
own_mc_sources.append((src, ctrls))
|
||||
else:
|
||||
mixed_controls.extend(ctrls)
|
||||
|
||||
unknown_controls = by_source.get("UNKNOWN", [])
|
||||
|
||||
# (a) Named regulation sub-MCs
|
||||
for src, ctrls in own_mc_sources:
|
||||
key = source_to_key(src)
|
||||
name = f"{canonical}_{key}"
|
||||
_add_subgroups(plan, name, src, ctrls, args.max_mc)
|
||||
|
||||
# (b) Mixed small-source bucket
|
||||
if mixed_controls:
|
||||
_add_subgroups(plan, f"{canonical}_mixed", "mixed", mixed_controls, args.max_mc)
|
||||
|
||||
# (c) UNKNOWN bucket
|
||||
if unknown_controls:
|
||||
_add_subgroups(plan, f"{canonical}_general", "general", unknown_controls, args.max_mc)
|
||||
|
||||
logger.info(" → %d sub-groups:", len(plan["sub_groups"]))
|
||||
for sg in sorted(plan["sub_groups"], key=lambda x: -len(x["controls"])):
|
||||
logger.info(" %4d %s", len(sg["controls"]), sg["name"])
|
||||
|
||||
return plan
|
||||
|
||||
|
||||
def _add_subgroups(plan: dict, name: str, source: str,
|
||||
controls: list[dict], max_mc: int):
|
||||
"""Add controls as one or more sub-groups to the plan."""
|
||||
if len(controls) <= max_mc:
|
||||
plan["sub_groups"].append({"name": name, "source": source, "controls": controls})
|
||||
else:
|
||||
clusters = subcluster_controls(controls, max_mc)
|
||||
for i, cluster in enumerate(clusters):
|
||||
sub_name = f"{name}_{i+1}" if len(clusters) > 1 else name
|
||||
plan["sub_groups"].append({"name": sub_name, "source": source, "controls": cluster})
|
||||
|
||||
|
||||
def _write_splits(engine, splits: list[dict]):
|
||||
"""Apply split plan: delete old MCs, create new object_groups + MCs."""
|
||||
with engine.begin() as c:
|
||||
c.execute(text("SET search_path TO compliance, public"))
|
||||
max_gid = c.execute(
|
||||
text("SELECT COALESCE(MAX(group_id), 0) FROM object_groups")
|
||||
).scalar()
|
||||
next_gid = max_gid + 1
|
||||
total_mc = 0
|
||||
total_mem = 0
|
||||
|
||||
for sp in splits:
|
||||
c.execute(text(
|
||||
"DELETE FROM master_control_members "
|
||||
"WHERE master_control_uuid = CAST(:u AS uuid)"
|
||||
), {"u": sp["mc_uuid"]})
|
||||
c.execute(text(
|
||||
"DELETE FROM master_controls WHERE id = CAST(:u AS uuid)"
|
||||
), {"u": sp["mc_uuid"]})
|
||||
logger.info("Deleted %s (%s)", sp["mc_id"], sp["canonical"])
|
||||
|
||||
for sg in sp["sub_groups"]:
|
||||
if not sg["controls"]:
|
||||
continue
|
||||
gid = next_gid
|
||||
next_gid += 1
|
||||
|
||||
members_list = list({ctrl["control_id"] for ctrl in sg["controls"]})
|
||||
c.execute(text("""
|
||||
INSERT INTO object_groups
|
||||
(group_id, canonical_name, member_count, members, top_controls_count)
|
||||
VALUES (:gid, :name, :cnt, CAST(:members AS jsonb), 0)
|
||||
"""), {"gid": gid, "name": sg["name"], "cnt": len(members_list),
|
||||
"members": json.dumps(members_list)})
|
||||
|
||||
by_phase: dict[str, list[dict]] = defaultdict(list)
|
||||
for ctrl in sg["controls"]:
|
||||
by_phase[ctrl["phase"]].append(ctrl)
|
||||
|
||||
sorted_phases = sorted(by_phase.keys())
|
||||
phase_counts = {p: len(v) for p, v in by_phase.items()}
|
||||
mc_id = f"MC-{gid}"
|
||||
|
||||
c.execute(text("""
|
||||
INSERT INTO master_controls
|
||||
(master_control_id, object_group_id, canonical_name,
|
||||
phases_covered, phase_control_count, total_controls)
|
||||
VALUES (:mcid, :gid, :name,
|
||||
CAST(:phases AS jsonb), CAST(:pcounts AS jsonb), :total)
|
||||
"""), {"mcid": mc_id, "gid": gid, "name": sg["name"],
|
||||
"phases": json.dumps(sorted_phases),
|
||||
"pcounts": json.dumps(phase_counts),
|
||||
"total": sum(phase_counts.values())})
|
||||
|
||||
mc_uuid = c.execute(text(
|
||||
"SELECT id FROM master_controls WHERE master_control_id = :mcid"
|
||||
), {"mcid": mc_id}).scalar()
|
||||
|
||||
for ctrl in sg["controls"]:
|
||||
c.execute(text("""
|
||||
INSERT INTO master_control_members
|
||||
(master_control_uuid, control_uuid, phase, action)
|
||||
VALUES (CAST(:mc AS uuid), CAST(:ctrl AS uuid), :phase, :action)
|
||||
"""), {"mc": str(mc_uuid), "ctrl": ctrl["control_uuid"],
|
||||
"phase": ctrl["phase"], "action": ctrl["action"]})
|
||||
total_mem += 1
|
||||
total_mc += 1
|
||||
|
||||
logger.info("Created %d new MCs with %d members", total_mc, total_mem)
|
||||
|
||||
with engine.connect() as c:
|
||||
stats = c.execute(text("""
|
||||
SELECT count(*), count(CASE WHEN total_controls > 200 THEN 1 END),
|
||||
AVG(total_controls)::int
|
||||
FROM compliance.master_controls
|
||||
""")).fetchone()
|
||||
logger.info("Final: %d MCs, %d still >200, avg %d controls/MC", stats[0], stats[1], stats[2])
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
@@ -0,0 +1,310 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Phase 0: Quality Audit for Master Control Assignments.
|
||||
|
||||
Uses Claude Sonnet to validate whether controls are correctly assigned
|
||||
to their Master Controls. Samples controls from large and small MCs.
|
||||
|
||||
Usage:
|
||||
python3 /app/scripts/gpre_quality_audit.py
|
||||
python3 /app/scripts/gpre_quality_audit.py --large-sample 50 --small-sample 10
|
||||
python3 /app/scripts/gpre_quality_audit.py --mc MC-8292 # single MC
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import random
|
||||
import time
|
||||
from collections import defaultdict
|
||||
|
||||
import httpx
|
||||
from sqlalchemy import create_engine, text
|
||||
|
||||
logging.basicConfig(
|
||||
level=logging.INFO, format="%(asctime)s [%(levelname)s] %(message)s"
|
||||
)
|
||||
logger = logging.getLogger("quality-audit")
|
||||
|
||||
DB_URL = os.getenv(
|
||||
"DATABASE_URL",
|
||||
"postgresql://breakpilot:breakpilot123@postgres:5432/breakpilot_db",
|
||||
)
|
||||
ANTHROPIC_API_KEY = os.getenv("ANTHROPIC_API_KEY", "")
|
||||
ANTHROPIC_MODEL = os.getenv("AUDIT_MODEL", "claude-sonnet-4-20250514")
|
||||
ANTHROPIC_URL = "https://api.anthropic.com/v1/messages"
|
||||
|
||||
SYSTEM_PROMPT = """Du bist ein Compliance-Experte der prüft ob Controls korrekt zu Master Controls zugeordnet sind.
|
||||
|
||||
Für jeden Control beantworte:
|
||||
1. MATCH: Gehört dieser Control thematisch zum Master Control Topic?
|
||||
2. CONFIDENCE: Wie sicher bist du? (0.0-1.0)
|
||||
3. REASON: Kurze Begründung (max 1 Satz)
|
||||
4. SUGGESTED_TOPIC: Falls MATCH=false, welches Topic wäre korrekt?
|
||||
|
||||
Wichtige Unterscheidungen:
|
||||
- "monitoring" = kontinuierliche Überwachung, Alerting, Log-Analyse
|
||||
- "training" = Schulung, Awareness, Lernmaterialien
|
||||
- "personal_data" = personenbezogene Daten, DSGVO-Betroffenenrechte
|
||||
- "procedure" = Verfahren, Prozesse (aber NICHT wenn es spezifisch um Incidents geht)
|
||||
- "incident" = Sicherheitsvorfälle, Breach Notification, Recovery
|
||||
- "policy" = Richtlinien, Regelwerke, Governance-Dokumente
|
||||
- "encryption" = Verschlüsselung, Kryptografie, Key Management
|
||||
- "audit_logging" = Protokollierung, Audit Trail, Nachvollziehbarkeit
|
||||
|
||||
Antworte NUR als JSON-Array, ein Objekt pro Control."""
|
||||
|
||||
|
||||
def call_claude(controls_batch: list[dict], mc_topic: str) -> list[dict]:
|
||||
"""Send a batch of controls to Claude for validation."""
|
||||
items = []
|
||||
for c in controls_batch:
|
||||
items.append(
|
||||
f"- Control '{c['control_id']}': "
|
||||
f"Titel=\"{c['title']}\", "
|
||||
f"Objective=\"{c['objective'][:150]}...\", "
|
||||
f"Phase={c['phase']}, Action={c['action']}"
|
||||
)
|
||||
|
||||
prompt = (
|
||||
f"Master Control Topic: \"{mc_topic}\"\n\n"
|
||||
f"Prüfe diese {len(controls_batch)} Controls:\n\n"
|
||||
+ "\n".join(items)
|
||||
+ "\n\nAntwort als JSON-Array mit Feldern: "
|
||||
"control_id, match (bool), confidence (float), reason (str), "
|
||||
"suggested_topic (str, nur wenn match=false)."
|
||||
)
|
||||
|
||||
headers = {
|
||||
"x-api-key": ANTHROPIC_API_KEY,
|
||||
"anthropic-version": "2023-06-01",
|
||||
"content-type": "application/json",
|
||||
}
|
||||
payload = {
|
||||
"model": ANTHROPIC_MODEL,
|
||||
"max_tokens": 2048,
|
||||
"temperature": 0.1,
|
||||
"system": SYSTEM_PROMPT,
|
||||
"messages": [{"role": "user", "content": prompt}],
|
||||
}
|
||||
|
||||
for attempt in range(3):
|
||||
try:
|
||||
resp = httpx.post(
|
||||
ANTHROPIC_URL,
|
||||
headers=headers,
|
||||
json=payload,
|
||||
timeout=60.0,
|
||||
)
|
||||
resp.raise_for_status()
|
||||
data = resp.json()
|
||||
content = data.get("content", [{}])[0].get("text", "")
|
||||
usage = data.get("usage", {})
|
||||
|
||||
# Parse JSON from response
|
||||
start = content.find("[")
|
||||
end = content.rfind("]") + 1
|
||||
if start >= 0 and end > start:
|
||||
results = json.loads(content[start:end])
|
||||
return results, usage
|
||||
logger.warning("No JSON array in response: %s", content[:200])
|
||||
return [], usage
|
||||
except httpx.HTTPStatusError as e:
|
||||
if e.response.status_code == 429:
|
||||
wait = 30 * (attempt + 1)
|
||||
logger.warning("Rate limited, waiting %ds...", wait)
|
||||
time.sleep(wait)
|
||||
else:
|
||||
logger.error("API error: %s", e)
|
||||
return [], {}
|
||||
except Exception as e:
|
||||
logger.error("Request failed (attempt %d): %s", attempt + 1, e)
|
||||
if attempt < 2:
|
||||
time.sleep(5)
|
||||
return [], {}
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument("--large-sample", type=int, default=50,
|
||||
help="Controls to sample per large MC")
|
||||
parser.add_argument("--small-sample", type=int, default=10,
|
||||
help="Controls to sample per small MC")
|
||||
parser.add_argument("--small-mc-count", type=int, default=50,
|
||||
help="Number of small MCs to audit")
|
||||
parser.add_argument("--mc", type=str, default=None,
|
||||
help="Audit a single MC by ID (e.g., MC-8292)")
|
||||
parser.add_argument("--batch-size", type=int, default=10,
|
||||
help="Controls per API call")
|
||||
args = parser.parse_args()
|
||||
|
||||
engine = create_engine(
|
||||
DB_URL, connect_args={"options": "-c search_path=compliance,public"}
|
||||
)
|
||||
|
||||
# Load MCs to audit
|
||||
with engine.connect() as c:
|
||||
if args.mc:
|
||||
mcs = c.execute(text("""
|
||||
SELECT id, master_control_id, canonical_name, total_controls
|
||||
FROM master_controls WHERE master_control_id = :mc
|
||||
"""), {"mc": args.mc}).fetchall()
|
||||
else:
|
||||
# Large MCs (>200) + random small MCs
|
||||
large = c.execute(text("""
|
||||
SELECT id, master_control_id, canonical_name, total_controls
|
||||
FROM master_controls WHERE total_controls > 200
|
||||
ORDER BY total_controls DESC
|
||||
""")).fetchall()
|
||||
|
||||
small = c.execute(text("""
|
||||
SELECT id, master_control_id, canonical_name, total_controls
|
||||
FROM master_controls WHERE total_controls BETWEEN 10 AND 200
|
||||
ORDER BY RANDOM() LIMIT :cnt
|
||||
"""), {"cnt": args.small_mc_count}).fetchall()
|
||||
|
||||
mcs = list(large) + list(small)
|
||||
|
||||
logger.info("Auditing %d Master Controls", len(mcs))
|
||||
|
||||
# Results tracking
|
||||
total_checked = 0
|
||||
total_match = 0
|
||||
total_mismatch = 0
|
||||
total_input_tokens = 0
|
||||
total_output_tokens = 0
|
||||
mc_results: dict[str, dict] = {}
|
||||
all_mismatches: list[dict] = []
|
||||
|
||||
for mc_uuid, mc_id, canonical, total in mcs:
|
||||
is_large = total > 200
|
||||
sample_size = args.large_sample if is_large else args.small_sample
|
||||
|
||||
# Sample controls
|
||||
with engine.connect() as c:
|
||||
controls = c.execute(text("""
|
||||
SELECT mcm.control_uuid, mcm.phase, mcm.action,
|
||||
cc.control_id, cc.title,
|
||||
COALESCE(cc.objective, '') as objective
|
||||
FROM master_control_members mcm
|
||||
JOIN canonical_controls cc ON cc.id = mcm.control_uuid
|
||||
WHERE mcm.master_control_uuid = CAST(:mc AS uuid)
|
||||
ORDER BY RANDOM()
|
||||
LIMIT :n
|
||||
"""), {"mc": str(mc_uuid), "n": sample_size}).fetchall()
|
||||
|
||||
if not controls:
|
||||
continue
|
||||
|
||||
control_dicts = [
|
||||
{"control_uuid": str(r[0]), "phase": r[1], "action": r[2],
|
||||
"control_id": r[3], "title": r[4] or "", "objective": r[5] or ""}
|
||||
for r in controls
|
||||
]
|
||||
|
||||
logger.info("\n%s: %s (%d total, sampling %d)",
|
||||
mc_id, canonical, total, len(control_dicts))
|
||||
|
||||
mc_match = 0
|
||||
mc_mismatch = 0
|
||||
|
||||
# Process in batches
|
||||
for i in range(0, len(control_dicts), args.batch_size):
|
||||
batch = control_dicts[i:i + args.batch_size]
|
||||
results, usage = call_claude(batch, canonical)
|
||||
|
||||
total_input_tokens += usage.get("input_tokens", 0)
|
||||
total_output_tokens += usage.get("output_tokens", 0)
|
||||
|
||||
for r in results:
|
||||
if r.get("match", True):
|
||||
mc_match += 1
|
||||
total_match += 1
|
||||
else:
|
||||
mc_mismatch += 1
|
||||
total_mismatch += 1
|
||||
mismatch = {
|
||||
"mc_id": mc_id,
|
||||
"mc_topic": canonical,
|
||||
"control_id": r.get("control_id", "?"),
|
||||
"confidence": r.get("confidence", 0),
|
||||
"reason": r.get("reason", ""),
|
||||
"suggested_topic": r.get("suggested_topic", ""),
|
||||
}
|
||||
all_mismatches.append(mismatch)
|
||||
|
||||
total_checked += len(results)
|
||||
|
||||
# Rate limit
|
||||
time.sleep(1)
|
||||
|
||||
accuracy = mc_match / (mc_match + mc_mismatch) if (mc_match + mc_mismatch) > 0 else 1.0
|
||||
mc_results[mc_id] = {
|
||||
"canonical": canonical, "total": total,
|
||||
"checked": mc_match + mc_mismatch,
|
||||
"match": mc_match, "mismatch": mc_mismatch,
|
||||
"accuracy": accuracy,
|
||||
}
|
||||
logger.info(" → %d/%d correct (%.1f%%)",
|
||||
mc_match, mc_match + mc_mismatch, accuracy * 100)
|
||||
|
||||
# Final report
|
||||
_print_report(mc_results, all_mismatches, total_checked, total_match,
|
||||
total_mismatch, total_input_tokens, total_output_tokens)
|
||||
|
||||
|
||||
def _print_report(mc_results, mismatches, checked, match, mismatch,
|
||||
input_tok, output_tok):
|
||||
"""Print the quality audit report."""
|
||||
logger.info("\n" + "=" * 70)
|
||||
logger.info("QUALITY AUDIT REPORT")
|
||||
logger.info("=" * 70)
|
||||
logger.info("Total controls checked: %d", checked)
|
||||
logger.info("Correct assignments: %d (%.1f%%)",
|
||||
match, match / max(checked, 1) * 100)
|
||||
logger.info("Wrong assignments: %d (%.1f%%)",
|
||||
mismatch, mismatch / max(checked, 1) * 100)
|
||||
|
||||
# Cost estimate
|
||||
cost_input = input_tok / 1_000_000 * 3.0 # Sonnet input: $3/MTok
|
||||
cost_output = output_tok / 1_000_000 * 15.0 # Sonnet output: $15/MTok
|
||||
logger.info("\nAPI Usage: %d input + %d output tokens",
|
||||
input_tok, output_tok)
|
||||
logger.info("Estimated cost: $%.2f", cost_input + cost_output)
|
||||
|
||||
# Per-MC breakdown (worst first)
|
||||
logger.info("\n--- Per-MC Accuracy (worst first) ---")
|
||||
sorted_mcs = sorted(mc_results.values(), key=lambda x: x["accuracy"])
|
||||
for mc in sorted_mcs:
|
||||
flag = "❌" if mc["accuracy"] < 0.9 else "⚠️" if mc["accuracy"] < 0.95 else "✅"
|
||||
logger.info(" %s %s (%s): %d/%d = %.1f%% [total: %d]",
|
||||
flag, mc["canonical"][:30].ljust(30),
|
||||
"large" if mc["total"] > 200 else "small",
|
||||
mc["match"], mc["checked"],
|
||||
mc["accuracy"] * 100, mc["total"])
|
||||
|
||||
# Top mismatches
|
||||
if mismatches:
|
||||
logger.info("\n--- Mismatches (all %d) ---", len(mismatches))
|
||||
for m in sorted(mismatches, key=lambda x: -x.get("confidence", 0)):
|
||||
logger.info(" %s in %s (%s) → should be '%s': %s",
|
||||
m["control_id"], m["mc_id"], m["mc_topic"],
|
||||
m["suggested_topic"], m["reason"])
|
||||
|
||||
# Size-class breakdown
|
||||
large_mcs = [m for m in mc_results.values() if m["total"] > 200]
|
||||
small_mcs = [m for m in mc_results.values() if m["total"] <= 200]
|
||||
|
||||
if large_mcs:
|
||||
lg_acc = sum(m["match"] for m in large_mcs) / max(sum(m["checked"] for m in large_mcs), 1)
|
||||
logger.info("\nLarge MCs (>200): %.1f%% accuracy (%d MCs)",
|
||||
lg_acc * 100, len(large_mcs))
|
||||
if small_mcs:
|
||||
sm_acc = sum(m["match"] for m in small_mcs) / max(sum(m["checked"] for m in small_mcs), 1)
|
||||
logger.info("Small MCs (≤200): %.1f%% accuracy (%d MCs)",
|
||||
sm_acc * 100, len(small_mcs))
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
@@ -0,0 +1,242 @@
|
||||
#!/usr/bin/env python3
|
||||
"""Parse BSI QUAIDAL Markdown catalog into a structural index.
|
||||
|
||||
Clean-Room principle: this script does NOT persist any QUAIDAL prose to disk.
|
||||
It only extracts non-protectable structural facts (IDs, type, file paths,
|
||||
cross-references to other QUAIDAL entries, references to external norms).
|
||||
|
||||
The derivation step (derive_quaidal_mcs.py) reads the index plus the original
|
||||
.md files from the gitignored clone and asks the LLM to produce our own
|
||||
wordings, never copying the BSI prose into our own controls/database.
|
||||
|
||||
Input: legal-sources/bsi-quaidal/0000_Markdown/**/*.md (gitignored clone)
|
||||
Output: control-pipeline/data/quaidal/quaidal_index.json (structural only)
|
||||
|
||||
Usage:
|
||||
python3 control-pipeline/scripts/ingest_bsi_quaidal.py
|
||||
python3 control-pipeline/scripts/ingest_bsi_quaidal.py --check # validate only
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import json
|
||||
import re
|
||||
import subprocess
|
||||
import sys
|
||||
from dataclasses import asdict, dataclass, field
|
||||
from pathlib import Path
|
||||
|
||||
try:
|
||||
import yaml
|
||||
except ImportError:
|
||||
print("ERROR: PyYAML missing. Install with: pip install pyyaml", file=sys.stderr)
|
||||
sys.exit(2)
|
||||
|
||||
REPO_ROOT = Path(__file__).resolve().parents[2]
|
||||
SOURCE_ROOT = REPO_ROOT / "legal-sources" / "bsi-quaidal"
|
||||
MARKDOWN_ROOT = SOURCE_ROOT / "0000_Markdown"
|
||||
OUTPUT_DIR = REPO_ROOT / "control-pipeline" / "data" / "quaidal"
|
||||
OUTPUT_FILE = OUTPUT_DIR / "quaidal_index.json"
|
||||
|
||||
# Map folder name -> our internal kind. Sub-folders inside the Methoden tree
|
||||
# (e.g. "QM-10_Dimension Reduction") are treated as method variants of their
|
||||
# parent QM.
|
||||
KIND_BY_PARENT_DIR = {
|
||||
"0000_Qualitätskriterien": "criterion", # QKB → Master Control candidates
|
||||
"0001_Qualitätsbausteine": "building_block", # QB → atomic controls
|
||||
"0002_Maßnahmen": "measure", # M → mitigations
|
||||
"0003_Qualitätsmetriken_methoden": "metric", # QM → runtime check / metric
|
||||
"0002_Referenz-Matrizen": "matrix", # cross-walk matrix
|
||||
"9998_CustomTemplates": "template",
|
||||
}
|
||||
|
||||
FRONTMATTER_RE = re.compile(r"^---\s*\n(.*?)\n---\s*\n", re.DOTALL)
|
||||
ID_RE = re.compile(r"\b((?:QKB|QB|MA|QM)-\d+[a-zA-Z]?)", re.IGNORECASE)
|
||||
|
||||
|
||||
@dataclass
|
||||
class IndexEntry:
|
||||
id: str # Canonical ID: QKB-01, QB-03, M-12, QM-07
|
||||
kind: str # criterion / building_block / measure / metric / matrix / template
|
||||
title_de: str
|
||||
title_en: str
|
||||
source_path: str # relative to SOURCE_ROOT
|
||||
referenced_ids: list[str] = field(default_factory=list) # other QUAIDAL IDs linked in this file
|
||||
external_refs: list[dict] = field(default_factory=list) # {framework, citation, ref_id}
|
||||
tags: list[str] = field(default_factory=list)
|
||||
share: bool | None = None
|
||||
|
||||
|
||||
def parse_frontmatter(text: str) -> dict:
|
||||
m = FRONTMATTER_RE.match(text)
|
||||
if not m:
|
||||
return {}
|
||||
try:
|
||||
return yaml.safe_load(m.group(1)) or {}
|
||||
except yaml.YAMLError:
|
||||
return {}
|
||||
|
||||
|
||||
def canonical_id(raw_id: str | list | None, filename: str) -> str | None:
|
||||
"""QUAIDAL files sometimes list multiple IDs or odd casing — normalise."""
|
||||
candidates: list[str] = []
|
||||
if isinstance(raw_id, list):
|
||||
candidates.extend(str(x) for x in raw_id)
|
||||
elif isinstance(raw_id, str):
|
||||
candidates.append(raw_id)
|
||||
# Fallback: derive from filename
|
||||
candidates.append(filename)
|
||||
for c in candidates:
|
||||
m = ID_RE.search(c)
|
||||
if m:
|
||||
return m.group(1).upper().replace(" ", "-")
|
||||
return None
|
||||
|
||||
|
||||
def determine_kind(path: Path) -> str:
|
||||
for parent in path.parents:
|
||||
if parent.name in KIND_BY_PARENT_DIR:
|
||||
return KIND_BY_PARENT_DIR[parent.name]
|
||||
return "unknown"
|
||||
|
||||
|
||||
def collect_referenced_ids(body: str, own_id: str) -> list[str]:
|
||||
found = {m.group(1).upper() for m in ID_RE.finditer(body)}
|
||||
found.discard(own_id)
|
||||
return sorted(found)
|
||||
|
||||
|
||||
REF_FRAMEWORKS = [
|
||||
("AI Act", ["AI-Act", "AI Act", "Verordnung (EU) 2024/1689", "KI-VO"]),
|
||||
("EU GDPR", ["DSGVO", "Verordnung (EU) 2016/679", "GDPR"]),
|
||||
("ISO/IEC 25012", ["ISO/IEC 25012", "ISO 25012"]),
|
||||
("ISO/IEC 25024", ["ISO/IEC 25024", "ISO 25024"]),
|
||||
("ISO/IEC 23894", ["ISO/IEC 23894", "ISO 23894"]),
|
||||
("ISO/IEC 42001", ["ISO/IEC 42001", "ISO 42001"]),
|
||||
("NIST AI RMF", ["NIST AI RMF", "AI Risk Management Framework"]),
|
||||
("BSI Grundschutz", ["IT-Grundschutz", "Grundschutz"]),
|
||||
("BSI AIC4", ["AIC4", "AI Cloud Service Compliance Criteria"]),
|
||||
]
|
||||
|
||||
|
||||
def detect_external_refs(body: str) -> list[dict]:
|
||||
refs: list[dict] = []
|
||||
seen: set[tuple[str, str]] = set()
|
||||
# Section "Referenzen" tables — pick up first column ref-id and first
|
||||
# textual hit of the framework. We do NOT store the BSI "Kurzbeschr."
|
||||
# column to avoid copying their prose.
|
||||
for line in body.splitlines():
|
||||
for framework, patterns in REF_FRAMEWORKS:
|
||||
for pat in patterns:
|
||||
if pat.lower() in line.lower():
|
||||
# Try to grab an article/section nearby (e.g. "Artikel 10")
|
||||
art = re.search(r"(Artikel|Art\.?|Section|§)\s*([0-9]+[a-z]?)", line, re.IGNORECASE)
|
||||
citation = f"{art.group(1)} {art.group(2)}" if art else None
|
||||
key = (framework, citation or "")
|
||||
if key in seen:
|
||||
continue
|
||||
seen.add(key)
|
||||
refs.append({"framework": framework, "citation": citation})
|
||||
break
|
||||
return refs
|
||||
|
||||
|
||||
def parse_file(path: Path) -> IndexEntry | None:
|
||||
text = path.read_text(encoding="utf-8")
|
||||
fm = parse_frontmatter(text)
|
||||
body = text[text.find("---", 3) + 3 :] if text.startswith("---") else text
|
||||
|
||||
own_id = canonical_id(fm.get("ID"), path.stem)
|
||||
if not own_id:
|
||||
return None
|
||||
|
||||
title_de = str(fm.get("TitleGer") or fm.get("Title") or path.stem).strip()
|
||||
title_en = str(fm.get("Title") or "").strip()
|
||||
tags_raw = fm.get("tags") or []
|
||||
if isinstance(tags_raw, str):
|
||||
tags_raw = [tags_raw]
|
||||
tags = [str(t).strip() for t in tags_raw if t]
|
||||
|
||||
share_val = fm.get("share")
|
||||
share = bool(share_val) if share_val is not None else None
|
||||
|
||||
return IndexEntry(
|
||||
id=own_id,
|
||||
kind=determine_kind(path),
|
||||
title_de=title_de,
|
||||
title_en=title_en,
|
||||
source_path=str(path.relative_to(SOURCE_ROOT)),
|
||||
referenced_ids=collect_referenced_ids(body, own_id),
|
||||
external_refs=detect_external_refs(body),
|
||||
tags=tags,
|
||||
share=share,
|
||||
)
|
||||
|
||||
|
||||
def get_commit_sha() -> str | None:
|
||||
try:
|
||||
out = subprocess.run(
|
||||
["git", "-C", str(SOURCE_ROOT), "rev-parse", "HEAD"],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
check=True,
|
||||
)
|
||||
return out.stdout.strip()
|
||||
except (subprocess.CalledProcessError, FileNotFoundError):
|
||||
return None
|
||||
|
||||
|
||||
def main() -> int:
|
||||
ap = argparse.ArgumentParser(description=__doc__)
|
||||
ap.add_argument("--check", action="store_true", help="Parse + validate, do not write output")
|
||||
args = ap.parse_args()
|
||||
|
||||
if not MARKDOWN_ROOT.exists():
|
||||
print(f"ERROR: clone not found at {SOURCE_ROOT}", file=sys.stderr)
|
||||
print("Run: git clone --depth=1 https://github.com/BSI-Bund/QUAIDAL.git legal-sources/bsi-quaidal", file=sys.stderr)
|
||||
return 2
|
||||
|
||||
entries: list[IndexEntry] = []
|
||||
skipped: list[Path] = []
|
||||
for path in sorted(MARKDOWN_ROOT.rglob("*.md")):
|
||||
entry = parse_file(path)
|
||||
if entry is None:
|
||||
skipped.append(path)
|
||||
continue
|
||||
entries.append(entry)
|
||||
|
||||
by_kind: dict[str, int] = {}
|
||||
for e in entries:
|
||||
by_kind[e.kind] = by_kind.get(e.kind, 0) + 1
|
||||
|
||||
print(f"Parsed {len(entries)} entries (skipped {len(skipped)} without ID):")
|
||||
for kind, count in sorted(by_kind.items()):
|
||||
print(f" {kind:18s} {count}")
|
||||
|
||||
if args.check:
|
||||
return 0
|
||||
|
||||
OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
|
||||
payload = {
|
||||
"source": "BSI QUAIDAL",
|
||||
"source_url": "https://github.com/BSI-Bund/QUAIDAL",
|
||||
"commit_sha": get_commit_sha(),
|
||||
"license_note": (
|
||||
"BSI-Veroeffentlichung. Repo enthaelt keine SPDX-Lizenzdatei. "
|
||||
"Frontmatter share:true. Veroeffentlichung durch Bundesbehoerde, "
|
||||
"§ 5 UrhG (amtliche Werke) anwendbar. BSI hat 05/2026 die Annahme "
|
||||
"CC-BY-SA-4.0 in unserer Anfrage nicht widersprochen, aber auch "
|
||||
"nicht aktiv bestaetigt. Wir derivieren Clean-Room (eigene "
|
||||
"Formulierungen, nur Referenz auf BSI QUAIDAL Sektion)."
|
||||
),
|
||||
"entries": [asdict(e) for e in entries],
|
||||
}
|
||||
OUTPUT_FILE.write_text(json.dumps(payload, ensure_ascii=False, indent=2), encoding="utf-8")
|
||||
print(f"\nWrote index: {OUTPUT_FILE.relative_to(REPO_ROOT)}")
|
||||
print(f"Commit SHA: {payload['commit_sha']}")
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
sys.exit(main())
|
||||
@@ -0,0 +1,414 @@
|
||||
#!/usr/bin/env python3
|
||||
"""Ingest CRA-relevant ENISA documents into the RAG (collection `bp_compliance_ce`).
|
||||
|
||||
Source files live under `legal-sources/enisa/` in this repo. The script extracts
|
||||
PDF text with pdfplumber (HTML for the SRP FAQ), normalizes it, and uploads via
|
||||
the RAG service with `chunk_strategy='legal'` so that section metadata is
|
||||
attached to every chunk.
|
||||
|
||||
Each document carries a `requirement_strength` field so downstream consumers
|
||||
can distinguish normative material from guidance and consultation drafts:
|
||||
- mandatory — binding (none in this batch; CRA itself is the law)
|
||||
- guidance — official ENISA / EUCC guidance, citable
|
||||
- consultation_draft — public-consultation drafts (use with caveat)
|
||||
|
||||
Usage (run on Mac Mini after copying the legal-sources/enisa/ folder, or via SSH
|
||||
with the repo mounted):
|
||||
python3 control-pipeline/scripts/ingest_enisa_cra.py --dry-run
|
||||
python3 control-pipeline/scripts/ingest_enisa_cra.py
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import json
|
||||
import re
|
||||
import sys
|
||||
import time
|
||||
import unicodedata
|
||||
from html.parser import HTMLParser
|
||||
from pathlib import Path
|
||||
|
||||
import httpx
|
||||
import pdfplumber
|
||||
|
||||
RAG_URL = "https://localhost:8097"
|
||||
QDRANT_URL = "http://localhost:6333"
|
||||
UPLOAD_TIMEOUT = 1800.0
|
||||
COLLECTION = "bp_compliance_ce"
|
||||
|
||||
REPO_ROOT = Path(__file__).resolve().parents[2]
|
||||
SOURCE_DIR = REPO_ROOT / "legal-sources" / "enisa"
|
||||
|
||||
DOCS = [
|
||||
{
|
||||
"regulation_id": "enisa_cra_requirements_standards_mapping",
|
||||
"filename": "enisa_cra_requirements_standards_mapping.pdf",
|
||||
"upload_filename": "enisa_cra_requirements_standards_mapping.txt",
|
||||
"extra_metadata": {
|
||||
"regulation_id": "enisa_cra_requirements_standards_mapping",
|
||||
"regulation_short": "ENISA CRA Standards Mapping",
|
||||
"guideline_name": "Cyber Resilience Act Requirements Standards Mapping",
|
||||
"doc_type": "standards_mapping",
|
||||
"requirement_strength": "guidance",
|
||||
"publication_year": "2024",
|
||||
"license": "reuse_with_attribution",
|
||||
"source": "enisa.europa.eu",
|
||||
"attribution": "ENISA, CC BY 4.0",
|
||||
},
|
||||
},
|
||||
{
|
||||
"regulation_id": "enisa_cra_implementation_via_eucc",
|
||||
"filename": "enisa_cra_implementation_via_eucc.pdf",
|
||||
"upload_filename": "enisa_cra_implementation_via_eucc.txt",
|
||||
"extra_metadata": {
|
||||
"regulation_id": "enisa_cra_implementation_via_eucc",
|
||||
"regulation_short": "ENISA CRA via EUCC",
|
||||
"guideline_name": "CRA Implementation via EUCC and its Applicable Technical Elements",
|
||||
"doc_type": "certification_guidance",
|
||||
"requirement_strength": "guidance",
|
||||
"license": "reuse_with_attribution",
|
||||
"source": "enisa.europa.eu",
|
||||
"attribution": "ENISA, CC BY 4.0",
|
||||
},
|
||||
},
|
||||
{
|
||||
"regulation_id": "enisa_cra_implementation_via_eucc_annex",
|
||||
"filename": "enisa_cra_implementation_via_eucc_annex.pdf",
|
||||
"upload_filename": "enisa_cra_implementation_via_eucc_annex.txt",
|
||||
"extra_metadata": {
|
||||
"regulation_id": "enisa_cra_implementation_via_eucc_annex",
|
||||
"regulation_short": "ENISA CRA via EUCC (Annex)",
|
||||
"guideline_name": "Annex — CRA Implementation via EUCC",
|
||||
"doc_type": "certification_guidance_annex",
|
||||
"requirement_strength": "guidance",
|
||||
"license": "reuse_with_attribution",
|
||||
"source": "enisa.europa.eu",
|
||||
"attribution": "ENISA, CC BY 4.0",
|
||||
},
|
||||
},
|
||||
{
|
||||
"regulation_id": "enisa_eucc_vulnerability_management_disclosure",
|
||||
"filename": "enisa_eucc_vulnerability_management_disclosure.pdf",
|
||||
"upload_filename": "enisa_eucc_vulnerability_management_disclosure.txt",
|
||||
"extra_metadata": {
|
||||
"regulation_id": "enisa_eucc_vulnerability_management_disclosure",
|
||||
"regulation_short": "EUCC Vuln Management & Disclosure",
|
||||
"guideline_name": "EUCC Guidelines — Vulnerability Management and Disclosure v1.1",
|
||||
"doc_type": "vulnerability_guidance",
|
||||
"requirement_strength": "guidance",
|
||||
"license": "reuse_with_attribution",
|
||||
"source": "enisa.europa.eu",
|
||||
"attribution": "ENISA, CC BY 4.0",
|
||||
},
|
||||
},
|
||||
{
|
||||
"regulation_id": "enisa_eccg_opinion_vulnerability_management",
|
||||
"filename": "enisa_eccg_opinion_vulnerability_management.pdf",
|
||||
"upload_filename": "enisa_eccg_opinion_vulnerability_management.txt",
|
||||
"extra_metadata": {
|
||||
"regulation_id": "enisa_eccg_opinion_vulnerability_management",
|
||||
"regulation_short": "ECCG Opinion Vuln Management",
|
||||
"guideline_name": "Final ECCG Opinion — Guidance on Vulnerability Management",
|
||||
"doc_type": "eccg_opinion",
|
||||
"requirement_strength": "guidance",
|
||||
"license": "reuse_with_attribution",
|
||||
"source": "enisa.europa.eu",
|
||||
"attribution": "ENISA, CC BY 4.0",
|
||||
},
|
||||
},
|
||||
{
|
||||
"regulation_id": "enisa_nis2_technical_implementation_guidance",
|
||||
"filename": "enisa_nis2_technical_implementation_guidance.pdf",
|
||||
"upload_filename": "enisa_nis2_technical_implementation_guidance.txt",
|
||||
"extra_metadata": {
|
||||
"regulation_id": "enisa_nis2_technical_implementation_guidance",
|
||||
"regulation_short": "ENISA NIS2 TIG v1.0",
|
||||
"guideline_name": "ENISA Technical Implementation Guidance on Cybersecurity Risk Management Measures v1.0",
|
||||
"doc_type": "technical_guidance",
|
||||
"requirement_strength": "guidance",
|
||||
"publication_year": "2025",
|
||||
"license": "reuse_with_attribution",
|
||||
"source": "enisa.europa.eu",
|
||||
"attribution": "ENISA, CC BY 4.0",
|
||||
},
|
||||
},
|
||||
{
|
||||
"regulation_id": "enisa_nis2_security_measures_consultation",
|
||||
"filename": "enisa_nis2_security_measures_implementation_guidance_consultation.pdf",
|
||||
"upload_filename": "enisa_nis2_security_measures_consultation.txt",
|
||||
"extra_metadata": {
|
||||
"regulation_id": "enisa_nis2_security_measures_consultation",
|
||||
"regulation_short": "ENISA NIS2 Security Measures (Draft)",
|
||||
"guideline_name": "Implementation Guidance on Security Measures — Public Consultation Draft",
|
||||
"doc_type": "consultation_draft",
|
||||
"requirement_strength": "consultation_draft",
|
||||
"license": "reuse_with_attribution",
|
||||
"source": "enisa.europa.eu",
|
||||
"attribution": "ENISA, CC BY 4.0",
|
||||
},
|
||||
},
|
||||
{
|
||||
"regulation_id": "enisa_cra_single_reporting_platform_faq",
|
||||
"filename": "enisa_cra_single_reporting_platform_faq.html",
|
||||
"upload_filename": "enisa_cra_single_reporting_platform_faq.txt",
|
||||
"extra_metadata": {
|
||||
"regulation_id": "enisa_cra_single_reporting_platform_faq",
|
||||
"regulation_short": "ENISA SRP FAQ",
|
||||
"guideline_name": "CRA Single Reporting Platform (SRP) FAQ",
|
||||
"doc_type": "faq",
|
||||
"requirement_strength": "guidance",
|
||||
"license": "reuse_with_attribution",
|
||||
"source": "enisa.europa.eu",
|
||||
"attribution": "ENISA, CC BY 4.0",
|
||||
},
|
||||
},
|
||||
{
|
||||
"regulation_id": "enisa_eucc_evaluation_methodology_product_series",
|
||||
"filename": "enisa_eucc_evaluation_methodology_product_series.pdf",
|
||||
"upload_filename": "enisa_eucc_evaluation_methodology_product_series.txt",
|
||||
"extra_metadata": {
|
||||
"regulation_id": "enisa_eucc_evaluation_methodology_product_series",
|
||||
"regulation_short": "EUCC Eval Methodology Product Series",
|
||||
"guideline_name": "EUCC Guidelines — Evaluation Methodology for Product Series v1.0",
|
||||
"doc_type": "evaluation_methodology",
|
||||
"requirement_strength": "guidance",
|
||||
"publication_year": "2025",
|
||||
"license": "reuse_with_attribution",
|
||||
"source": "enisa.europa.eu",
|
||||
"attribution": "ENISA, CC BY 4.0",
|
||||
},
|
||||
},
|
||||
{
|
||||
"regulation_id": "enisa_threat_landscape_2025",
|
||||
"filename": "enisa_threat_landscape_2025.pdf",
|
||||
"upload_filename": "enisa_threat_landscape_2025.txt",
|
||||
"extra_metadata": {
|
||||
"regulation_id": "enisa_threat_landscape_2025",
|
||||
"regulation_short": "ENISA Threat Landscape 2025",
|
||||
"guideline_name": "ENISA Threat Landscape 2025 v1.2",
|
||||
"doc_type": "threat_landscape",
|
||||
"requirement_strength": "evidentiary",
|
||||
"publication_year": "2025",
|
||||
"license": "reuse_with_attribution",
|
||||
"source": "enisa.europa.eu",
|
||||
"attribution": "ENISA, CC BY 4.0",
|
||||
},
|
||||
},
|
||||
{
|
||||
"regulation_id": "enisa_cvd_policies_eu_2022",
|
||||
"filename": "enisa_cvd_policies_eu_2022.pdf",
|
||||
"upload_filename": "enisa_cvd_policies_eu_2022.txt",
|
||||
"extra_metadata": {
|
||||
"regulation_id": "enisa_cvd_policies_eu_2022",
|
||||
"regulation_short": "ENISA CVD Policies EU 2022",
|
||||
"guideline_name": "Coordinated Vulnerability Disclosure Policies in the EU (2022)",
|
||||
"doc_type": "policy_study",
|
||||
"requirement_strength": "guidance",
|
||||
"publication_year": "2022",
|
||||
"license": "reuse_with_attribution",
|
||||
"source": "enisa.europa.eu",
|
||||
"attribution": "ENISA, CC BY 4.0",
|
||||
},
|
||||
},
|
||||
]
|
||||
|
||||
|
||||
def normalize_text(text: str) -> str:
|
||||
text = unicodedata.normalize("NFKC", text)
|
||||
text = text.replace("", "").replace("", "")
|
||||
prev = None
|
||||
while prev != text:
|
||||
prev = text
|
||||
text = re.sub(r"(\d+)\s+\.\s+(\d+)", r"\1.\2", text)
|
||||
text = re.sub(r"\b([A-Z]{2,4})\s+-\s+(\d+)\b", r"\1-\2", text)
|
||||
text = re.sub(r"\(\s+(\d+)\s+\)", r"(\1)", text)
|
||||
text = re.sub(r"[^\S\n]{2,}", " ", text)
|
||||
return text
|
||||
|
||||
|
||||
class _HTMLToText(HTMLParser):
|
||||
SKIP = {"script", "style", "nav", "header", "footer", "noscript"}
|
||||
BLOCK = {"p", "div", "li", "br", "h1", "h2", "h3", "h4", "h5", "h6", "tr", "section"}
|
||||
|
||||
def __init__(self) -> None:
|
||||
super().__init__()
|
||||
self._buf: list[str] = []
|
||||
self._skip_depth = 0
|
||||
|
||||
def handle_starttag(self, tag, attrs):
|
||||
if tag in self.SKIP:
|
||||
self._skip_depth += 1
|
||||
if tag in self.BLOCK:
|
||||
self._buf.append("\n")
|
||||
|
||||
def handle_endtag(self, tag):
|
||||
if tag in self.SKIP and self._skip_depth > 0:
|
||||
self._skip_depth -= 1
|
||||
if tag in self.BLOCK:
|
||||
self._buf.append("\n")
|
||||
|
||||
def handle_data(self, data):
|
||||
if self._skip_depth == 0:
|
||||
self._buf.append(data)
|
||||
|
||||
def text(self) -> str:
|
||||
raw = "".join(self._buf)
|
||||
raw = re.sub(r"\n{3,}", "\n\n", raw)
|
||||
return raw.strip()
|
||||
|
||||
|
||||
def extract_pdf(path: Path) -> str:
|
||||
print(f" Extracting PDF: {path.name}")
|
||||
parts: list[str] = []
|
||||
with pdfplumber.open(path) as pdf:
|
||||
for i, page in enumerate(pdf.pages):
|
||||
t = page.extract_text(x_tolerance=3, y_tolerance=4)
|
||||
if t:
|
||||
parts.append(t)
|
||||
if (i + 1) % 50 == 0:
|
||||
print(f" {i + 1}/{len(pdf.pages)} pages...")
|
||||
return normalize_text("\n\n".join(parts))
|
||||
|
||||
|
||||
def extract_html(path: Path) -> str:
|
||||
print(f" Extracting HTML: {path.name}")
|
||||
html = path.read_text(encoding="utf-8", errors="replace")
|
||||
parser = _HTMLToText()
|
||||
parser.feed(html)
|
||||
return normalize_text(parser.text())
|
||||
|
||||
|
||||
def get_text(doc) -> str:
|
||||
path = SOURCE_DIR / doc["filename"]
|
||||
if not path.exists():
|
||||
raise FileNotFoundError(path)
|
||||
if path.suffix.lower() == ".pdf":
|
||||
text = extract_pdf(path)
|
||||
elif path.suffix.lower() in {".html", ".htm"}:
|
||||
text = extract_html(path)
|
||||
else:
|
||||
raise ValueError(f"Unsupported file type: {path.suffix}")
|
||||
print(f" Extracted {len(text):,} chars")
|
||||
return text
|
||||
|
||||
|
||||
def upload_text_legal(text: str, filename: str, extra_metadata: dict) -> dict:
|
||||
form_data = {
|
||||
"collection": COLLECTION,
|
||||
"data_type": "compliance",
|
||||
"bundesland": "bund",
|
||||
"use_case": "compliance",
|
||||
"year": "2026",
|
||||
"chunk_strategy": "legal",
|
||||
"chunk_size": "1500",
|
||||
"chunk_overlap": "100",
|
||||
"metadata_json": json.dumps(extra_metadata, ensure_ascii=False),
|
||||
}
|
||||
with httpx.Client(timeout=UPLOAD_TIMEOUT, verify=False) as c:
|
||||
resp = c.post(
|
||||
f"{RAG_URL}/api/v1/documents/upload",
|
||||
files={"file": (filename, text.encode("utf-8"), "text/plain")},
|
||||
data=form_data,
|
||||
)
|
||||
resp.raise_for_status()
|
||||
return resp.json()
|
||||
|
||||
|
||||
def count_chunks(regulation_id: str) -> int:
|
||||
with httpx.Client(timeout=30) as c:
|
||||
resp = c.post(
|
||||
f"{QDRANT_URL}/collections/{COLLECTION}/points/count",
|
||||
json={
|
||||
"filter": {
|
||||
"must": [
|
||||
{"key": "regulation_id", "match": {"value": regulation_id}}
|
||||
]
|
||||
},
|
||||
"exact": True,
|
||||
},
|
||||
)
|
||||
resp.raise_for_status()
|
||||
return resp.json()["result"]["count"]
|
||||
|
||||
|
||||
def main() -> int:
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument("--dry-run", action="store_true",
|
||||
help="Extract text and report sizes, but do not upload.")
|
||||
parser.add_argument("--only", action="append", default=[],
|
||||
help="Limit run to one or more regulation_ids.")
|
||||
args = parser.parse_args()
|
||||
|
||||
if not SOURCE_DIR.exists():
|
||||
print(f"ERROR: source dir not found: {SOURCE_DIR}")
|
||||
return 2
|
||||
|
||||
docs = DOCS
|
||||
if args.only:
|
||||
wanted = set(args.only)
|
||||
docs = [d for d in DOCS if d["regulation_id"] in wanted]
|
||||
missing = wanted - {d["regulation_id"] for d in docs}
|
||||
if missing:
|
||||
print(f"ERROR: unknown regulation_id(s): {sorted(missing)}")
|
||||
return 2
|
||||
|
||||
print("=" * 70)
|
||||
print(f"ENISA CRA ingestion → collection={COLLECTION}")
|
||||
print(f"Source dir: {SOURCE_DIR}")
|
||||
print(f"Documents: {len(docs)} Dry run: {args.dry_run}")
|
||||
print("=" * 70)
|
||||
|
||||
results = []
|
||||
for i, doc in enumerate(docs, 1):
|
||||
reg_id = doc["regulation_id"]
|
||||
print(f"\n[{i}/{len(docs)}] {reg_id}")
|
||||
|
||||
existing = count_chunks(reg_id) if not args.dry_run else "?"
|
||||
print(f" Existing chunks in Qdrant: {existing}")
|
||||
|
||||
try:
|
||||
text = get_text(doc)
|
||||
except Exception as e:
|
||||
print(f" ERROR extracting text: {e}")
|
||||
results.append({"id": reg_id, "chars": 0, "new": 0,
|
||||
"strength": doc["extra_metadata"]["requirement_strength"]})
|
||||
continue
|
||||
|
||||
if args.dry_run:
|
||||
results.append({"id": reg_id, "chars": len(text), "new": "?",
|
||||
"strength": doc["extra_metadata"]["requirement_strength"]})
|
||||
continue
|
||||
|
||||
if existing and existing > 0:
|
||||
print(f" SKIP — {existing} chunks already present. "
|
||||
f"Use Qdrant delete-by-filter before re-ingesting.")
|
||||
results.append({"id": reg_id, "chars": len(text), "new": 0,
|
||||
"strength": doc["extra_metadata"]["requirement_strength"]})
|
||||
continue
|
||||
|
||||
print(" Uploading with chunk_strategy='legal'...")
|
||||
result = upload_text_legal(
|
||||
text, doc["upload_filename"], doc["extra_metadata"]
|
||||
)
|
||||
new_chunks = result.get("chunks_count", 0)
|
||||
new_doc_id = result.get("document_id", "")
|
||||
print(f" -> {new_chunks} chunks (doc_id={new_doc_id})")
|
||||
|
||||
results.append({"id": reg_id, "chars": len(text), "new": new_chunks,
|
||||
"strength": doc["extra_metadata"]["requirement_strength"]})
|
||||
|
||||
if i < len(docs):
|
||||
time.sleep(2)
|
||||
|
||||
print("\n" + "=" * 70)
|
||||
print("SUMMARY")
|
||||
print("=" * 70)
|
||||
for r in results:
|
||||
print(f" {r['id']:<55} chars={r['chars']:<9} new={r['new']:<5} "
|
||||
f"strength={r['strength']}")
|
||||
total_new = sum(r["new"] for r in results if isinstance(r["new"], int))
|
||||
print(f"\nTotal new chunks: {total_new}")
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
sys.exit(main())
|
||||
@@ -22,6 +22,7 @@ import json
|
||||
import logging
|
||||
import time
|
||||
from collections import defaultdict
|
||||
from datetime import datetime
|
||||
|
||||
from sqlalchemy import text
|
||||
|
||||
@@ -108,24 +109,37 @@ class BatchDedupRunner:
|
||||
self._progress_phase = ""
|
||||
self._progress_count = 0
|
||||
self._progress_total = 0
|
||||
self._since = None # set by run() when scoped run requested
|
||||
|
||||
async def run(
|
||||
self,
|
||||
dry_run: bool = False,
|
||||
hint_filter: str = None,
|
||||
since: datetime = None,
|
||||
) -> dict:
|
||||
"""Run the full batch dedup pipeline.
|
||||
|
||||
Args:
|
||||
dry_run: If True, compute stats but don't modify DB/Qdrant.
|
||||
hint_filter: If set, only process groups matching this hint prefix.
|
||||
since: If set, only process controls with created_at >= since.
|
||||
Useful for incremental dedup after single-document ingestion.
|
||||
|
||||
Returns:
|
||||
Stats dict with counts.
|
||||
"""
|
||||
start = time.monotonic()
|
||||
logger.info("BatchDedup starting (dry_run=%s, hint_filter=%s)",
|
||||
dry_run, hint_filter)
|
||||
logger.info("BatchDedup starting (dry_run=%s, hint_filter=%s, since=%s)",
|
||||
dry_run, hint_filter, since)
|
||||
|
||||
# Scoped runs reset checkpoint to avoid skipping new controls whose
|
||||
# control_id sorts before the stale last_id of a previous full run.
|
||||
self._since = since
|
||||
if since and not dry_run:
|
||||
self.db.execute(text(
|
||||
"DELETE FROM canonical_generation_jobs WHERE status = 'dedup_phase2_checkpoint'"
|
||||
))
|
||||
self.db.commit()
|
||||
|
||||
if not dry_run:
|
||||
await ensure_qdrant_collection(collection=self.collection)
|
||||
@@ -133,7 +147,7 @@ class BatchDedupRunner:
|
||||
# Phase 1: Intra-group dedup (same merge_group_hint)
|
||||
# Optimization: skip singleton groups (they're automatically masters)
|
||||
self._progress_phase = "phase1"
|
||||
groups = self._load_merge_groups(hint_filter)
|
||||
groups = self._load_merge_groups(hint_filter, since)
|
||||
self._progress_total = self.stats["total_controls"]
|
||||
|
||||
multi_groups = [(h, c) for h, c in groups if len(c) > 1]
|
||||
@@ -171,7 +185,7 @@ class BatchDedupRunner:
|
||||
logger.info("BatchDedup completed in %.1fs: %s", elapsed, self.stats)
|
||||
return self.stats
|
||||
|
||||
def _load_merge_groups(self, hint_filter: str = None) -> list:
|
||||
def _load_merge_groups(self, hint_filter: str = None, since: datetime = None) -> list:
|
||||
"""Load all Pass 0b controls grouped by merge_group_hint, largest first."""
|
||||
conditions = [
|
||||
"decomposition_method = 'pass0b'",
|
||||
@@ -184,6 +198,10 @@ class BatchDedupRunner:
|
||||
conditions.append("generation_metadata->>'merge_group_hint' LIKE :hf")
|
||||
params["hf"] = f"{hint_filter}%"
|
||||
|
||||
if since:
|
||||
conditions.append("created_at >= :since")
|
||||
params["since"] = since
|
||||
|
||||
where = " AND ".join(conditions)
|
||||
rows = self.db.execute(text(f"""
|
||||
SELECT id::text, control_id, title, objective,
|
||||
@@ -335,13 +353,15 @@ class BatchDedupRunner:
|
||||
"""
|
||||
logger.info("BatchDedup Phase 2: Cross-group pass starting...")
|
||||
|
||||
# Count total
|
||||
total_row = self.db.execute(text("""
|
||||
# Count total — respect scoped run if since is set
|
||||
since_clause = " AND created_at >= :since" if self._since else ""
|
||||
params = {"since": self._since} if self._since else {}
|
||||
total_row = self.db.execute(text(f"""
|
||||
SELECT COUNT(*) FROM canonical_controls
|
||||
WHERE decomposition_method = 'pass0b'
|
||||
AND release_state != 'duplicate'
|
||||
AND release_state != 'deprecated'
|
||||
""")).fetchone()
|
||||
AND release_state != 'deprecated'{since_clause}
|
||||
"""), params).fetchone()
|
||||
total = total_row[0] if total_row else 0
|
||||
|
||||
self._progress_total = total
|
||||
@@ -360,13 +380,16 @@ class BatchDedupRunner:
|
||||
last_control_id = checkpoint_row[0] if checkpoint_row else ""
|
||||
|
||||
if last_control_id:
|
||||
skip_row = self.db.execute(text("""
|
||||
skip_params = {"last_id": last_control_id}
|
||||
if self._since:
|
||||
skip_params["since"] = self._since
|
||||
skip_row = self.db.execute(text(f"""
|
||||
SELECT COUNT(*) FROM canonical_controls
|
||||
WHERE decomposition_method = 'pass0b'
|
||||
AND release_state != 'duplicate'
|
||||
AND release_state != 'deprecated'
|
||||
AND control_id <= :last_id
|
||||
"""), {"last_id": last_control_id}).fetchone()
|
||||
AND control_id <= :last_id{since_clause}
|
||||
"""), skip_params).fetchone()
|
||||
skipped = skip_row[0] if skip_row else 0
|
||||
self._progress_count = skipped
|
||||
logger.info("BatchDedup Cross-group: RESUMING from %s (skipping %d already processed)",
|
||||
@@ -382,17 +405,20 @@ class BatchDedupRunner:
|
||||
total, last_control_id or "beginning")
|
||||
|
||||
while True:
|
||||
rows = self.db.execute(text("""
|
||||
page_params = {"last_id": last_control_id, "page_size": DB_PAGE}
|
||||
if self._since:
|
||||
page_params["since"] = self._since
|
||||
rows = self.db.execute(text(f"""
|
||||
SELECT id::text, control_id, title,
|
||||
generation_metadata->>'merge_group_hint' as merge_group_hint
|
||||
FROM canonical_controls
|
||||
WHERE decomposition_method = 'pass0b'
|
||||
AND release_state != 'duplicate'
|
||||
AND release_state != 'deprecated'
|
||||
AND control_id > :last_id
|
||||
AND control_id > :last_id{since_clause}
|
||||
ORDER BY control_id
|
||||
LIMIT :page_size
|
||||
"""), {"last_id": last_control_id, "page_size": DB_PAGE}).fetchall()
|
||||
"""), page_params).fetchall()
|
||||
|
||||
if not rows:
|
||||
break
|
||||
|
||||
@@ -460,12 +460,50 @@ WICHTIGE REGELN:
|
||||
|
||||
7. MERGE-KEY: Erzeuge im JSON-Output ein zusaetzliches Feld "merge_key" mit
|
||||
dem Format: "action_type:normalized_object:control_phase"
|
||||
|
||||
WICHTIG: Waehle normalized_object NUR aus dieser Liste kanonischer Tokens:
|
||||
SECURITY: multi_factor_auth, password_policy, credentials, session_management,
|
||||
privileged_access, access_control, encryption, transport_encryption,
|
||||
key_management, certificate_management, network_security, network_segmentation,
|
||||
firewall, vpn, remote_access, monitoring, audit_logging, siem, alerting,
|
||||
compliance_audit, vulnerability, patch_management, backup, disaster_recovery,
|
||||
physical_security, secure_development, api_security, input_validation,
|
||||
container_security, logging_configuration
|
||||
DATA_PROTECTION: personal_data, sensitive_data, health_data, consent,
|
||||
data_subject_rights, data_retention, data_transfer, data_breach_notification,
|
||||
dpia, data_processing_agreement, privacy_by_design, data_processing_register,
|
||||
data_classification, cookie_consent, video_surveillance
|
||||
GOVERNANCE: policy, procedure, process, training, awareness, incident,
|
||||
risk_management, third_party_management, change_management, documentation,
|
||||
records_management, compliance_reporting, asset_management,
|
||||
human_resources_security
|
||||
REGULATORY: supervisory_authority, certification, product_safety, ai_system,
|
||||
financial_reporting, aml, whistleblowing, consumer_protection, ecommerce,
|
||||
telecommunications, medical_device, payment_services, critical_infrastructure,
|
||||
supply_chain_due_diligence, sustainability_reporting
|
||||
|
||||
Wenn KEIN Token passt: "OTHER:kurzbeschreibung" (z.B. "OTHER:battery_recycling")
|
||||
|
||||
ABGRENZUNGEN (haeufige Fehler vermeiden!):
|
||||
- monitoring = NUR kontinuierliche Echtzeit-Ueberwachung von Systemen
|
||||
- audit_logging = Protokollierung, Audit Trail, Nachvollziehbarkeit
|
||||
- compliance_audit = externe Pruefungen, Zertifizierungsaudits
|
||||
- training = Schulungen DURCHFUEHREN (nicht "ueberwachen")
|
||||
- procedure = Verfahren DEFINIEREN (nicht Incident-Behandlung)
|
||||
- incident = Sicherheitsvorfaelle BEHANDELN
|
||||
- alerting = Meldepflichten und Benachrichtigungen
|
||||
- personal_data = DSGVO-Verarbeitungsgrundsaetze (nicht Zertifizierung!)
|
||||
- certification = Zertifizierung/Konformitaet (nicht Datenschutz)
|
||||
|
||||
Beispiele:
|
||||
- "implement:api_rate_limiting:implementation"
|
||||
- "define:access_control_policy:definition"
|
||||
- "monitor:third_party_vulnerabilities:monitoring"
|
||||
- "test:authentication_mechanism:testing"
|
||||
- "implement:multi_factor_auth:implementation"
|
||||
- "define:access_control:definition"
|
||||
- "monitor:network_security:monitoring"
|
||||
- "test:vulnerability:testing"
|
||||
- "report:supervisory_authority:reporting"
|
||||
- "implement:audit_logging:implementation" (NICHT monitoring!)
|
||||
- "define:incident:definition" (Incident-Verfahren, NICHT procedure!)
|
||||
- "train:training:operation" (Schulung, NICHT monitoring!)
|
||||
|
||||
8. APPLICABILITY + SCANNER: Bestimme fuer jedes Control:
|
||||
- applicability: Unter welchen Bedingungen gilt dieses Control?
|
||||
@@ -2472,6 +2510,81 @@ def _ensure_list(val) -> list:
|
||||
return []
|
||||
|
||||
|
||||
# Canonical object tokens from object_ontology (loaded once)
|
||||
_CANONICAL_OBJECTS: set[str] | None = None
|
||||
|
||||
|
||||
def _load_canonical_objects() -> set[str]:
|
||||
"""Load canonical tokens from DB, fallback to hardcoded set."""
|
||||
global _CANONICAL_OBJECTS
|
||||
if _CANONICAL_OBJECTS is not None:
|
||||
return _CANONICAL_OBJECTS
|
||||
try:
|
||||
from db.session import get_engine
|
||||
from sqlalchemy import text
|
||||
engine = get_engine()
|
||||
with engine.connect() as c:
|
||||
rows = c.execute(text(
|
||||
"SELECT canonical_token FROM compliance.object_ontology"
|
||||
)).fetchall()
|
||||
_CANONICAL_OBJECTS = {r[0] for r in rows}
|
||||
except Exception:
|
||||
_CANONICAL_OBJECTS = set()
|
||||
if not _CANONICAL_OBJECTS:
|
||||
_CANONICAL_OBJECTS = {
|
||||
"multi_factor_auth", "password_policy", "credentials",
|
||||
"session_management", "privileged_access", "access_control",
|
||||
"encryption", "transport_encryption", "key_management",
|
||||
"certificate_management", "network_security",
|
||||
"network_segmentation", "firewall", "vpn", "remote_access",
|
||||
"monitoring", "audit_logging", "siem", "alerting",
|
||||
"compliance_audit", "vulnerability", "patch_management",
|
||||
"backup", "disaster_recovery", "personal_data",
|
||||
"sensitive_data", "consent", "data_subject_rights",
|
||||
"data_retention", "data_transfer", "data_breach_notification",
|
||||
"dpia", "data_processing_agreement", "privacy_by_design",
|
||||
"policy", "procedure", "process", "training", "awareness",
|
||||
"incident", "risk_management", "third_party_management",
|
||||
"change_management", "documentation", "supervisory_authority",
|
||||
"certification", "product_safety", "ai_system", "aml",
|
||||
"critical_infrastructure", "medical_device",
|
||||
}
|
||||
return _CANONICAL_OBJECTS
|
||||
|
||||
|
||||
def _validate_merge_key(merge_key: str) -> str:
|
||||
"""Validate merge_key object against canonical ontology.
|
||||
|
||||
Returns the merge_key (possibly corrected). Logs warnings for
|
||||
unknown objects so they can be tracked.
|
||||
"""
|
||||
parts = merge_key.split(":", 2)
|
||||
if len(parts) < 2:
|
||||
return merge_key
|
||||
|
||||
action, obj = parts[0], parts[1]
|
||||
phase = parts[2] if len(parts) > 2 else "implementation"
|
||||
|
||||
# Accept OTHER: prefix (LLM signaling unknown object)
|
||||
if obj.startswith("OTHER:"):
|
||||
return merge_key
|
||||
|
||||
# Check against canonical ontology
|
||||
canonical = _load_canonical_objects()
|
||||
if obj in canonical:
|
||||
return merge_key
|
||||
|
||||
# Try normalize_object() as fallback
|
||||
from services.control_dedup import normalize_object
|
||||
normed = normalize_object(obj)
|
||||
if normed in canonical:
|
||||
return f"{action}:{normed}:{phase}"
|
||||
|
||||
# Unknown object — log and keep as-is (will be clustered by embedding)
|
||||
logger.debug("merge_key unknown object: %s (normed: %s)", obj, normed)
|
||||
return merge_key
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Decomposition Pass
|
||||
# ---------------------------------------------------------------------------
|
||||
@@ -3025,10 +3138,10 @@ class DecompositionPass:
|
||||
evidence_type=parsed.get("evidence_type", ""),
|
||||
provides_context=_ensure_list(parsed.get("provides_context", [])),
|
||||
)
|
||||
# Store merge_key from LLM output in metadata
|
||||
# Store merge_key from LLM output in metadata — with validation
|
||||
llm_merge_key = parsed.get("merge_key", "")
|
||||
if llm_merge_key:
|
||||
atomic.merge_group_hint = llm_merge_key
|
||||
atomic.merge_group_hint = _validate_merge_key(llm_merge_key)
|
||||
|
||||
atomic.parent_control_uuid = obl["parent_uuid"]
|
||||
atomic.obligation_candidate_id = obl["candidate_id"]
|
||||
|
||||
@@ -0,0 +1,84 @@
|
||||
"""Shared embedding + sub-clustering utilities for the control pipeline."""
|
||||
|
||||
import logging
|
||||
import os
|
||||
from collections import defaultdict
|
||||
|
||||
import httpx
|
||||
import numpy as np
|
||||
from sklearn.cluster import MiniBatchKMeans
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
EMBEDDING_URL = os.getenv(
|
||||
"EMBEDDING_SERVICE_URL", "http://embedding-service:8087"
|
||||
)
|
||||
|
||||
|
||||
def embed_texts(texts: list[str]) -> np.ndarray | None:
|
||||
"""Embed texts via the embedding-service in batches of 64."""
|
||||
try:
|
||||
result = np.zeros((len(texts), 1024), dtype=np.float32)
|
||||
batch_size = 64
|
||||
for i in range(0, len(texts), batch_size):
|
||||
batch = texts[i : i + batch_size]
|
||||
for attempt in range(3):
|
||||
try:
|
||||
with httpx.Client(
|
||||
timeout=httpx.Timeout(60.0, connect=10.0)
|
||||
) as client:
|
||||
resp = client.post(
|
||||
f"{EMBEDDING_URL}/embed", json={"texts": batch}
|
||||
)
|
||||
resp.raise_for_status()
|
||||
embs = resp.json().get("embeddings", [])
|
||||
end = min(i + len(embs), len(texts))
|
||||
result[i:end] = np.array(embs, dtype=np.float32)
|
||||
break
|
||||
except Exception as e:
|
||||
if attempt == 2:
|
||||
logger.error("Embed batch %d failed: %s", i, e)
|
||||
import time
|
||||
time.sleep(2)
|
||||
return result
|
||||
except Exception as e:
|
||||
logger.error("Embedding failed: %s", e)
|
||||
return None
|
||||
|
||||
|
||||
def subcluster_controls(
|
||||
controls: list[dict], target_size: int = 50
|
||||
) -> list[list[dict]]:
|
||||
"""Sub-cluster controls by embedding similarity.
|
||||
|
||||
Returns a list of clusters. Falls back to naive chunking
|
||||
if embedding fails.
|
||||
"""
|
||||
if len(controls) <= target_size:
|
||||
return [controls]
|
||||
|
||||
texts = [c.get("title", "") or c.get("control_id", "") for c in controls]
|
||||
embeddings = embed_texts(texts)
|
||||
if embeddings is None:
|
||||
return [
|
||||
controls[i : i + target_size]
|
||||
for i in range(0, len(controls), target_size)
|
||||
]
|
||||
|
||||
norms = np.linalg.norm(embeddings, axis=1, keepdims=True)
|
||||
norms[norms == 0] = 1
|
||||
normalized = embeddings / norms
|
||||
|
||||
k = max(2, min(len(controls) // target_size, 30))
|
||||
kmeans = MiniBatchKMeans(
|
||||
n_clusters=k,
|
||||
batch_size=min(100, len(controls)),
|
||||
max_iter=50,
|
||||
random_state=42,
|
||||
)
|
||||
labels = kmeans.fit_predict(normalized)
|
||||
|
||||
clusters: dict[int, list[dict]] = defaultdict(list)
|
||||
for i, ctrl in enumerate(controls):
|
||||
clusters[int(labels[i])].append(ctrl)
|
||||
return list(clusters.values())
|
||||
@@ -0,0 +1,94 @@
|
||||
# Golden Dataset for MC Assignment Quality
|
||||
# Manually verified controls with their expected MC topics.
|
||||
# Used for regression testing after pipeline changes.
|
||||
# Created: 2026-05-10, verified by manual review (19/20 correct)
|
||||
|
||||
golden_controls:
|
||||
# ── Data Protection ──
|
||||
- control_id: "DATA-3291-A06"
|
||||
expected_topic_prefix: "data_retention"
|
||||
reason: "Speicherfristen für personenbezogene Daten definieren"
|
||||
|
||||
- control_id: "SEC-7449-A01"
|
||||
expected_topic_prefix: "personal_data"
|
||||
reason: "Fahrzeugnutzungsdaten in Telematikbox (Datenminimierung)"
|
||||
|
||||
- control_id: "DATA-3518-A06"
|
||||
expected_topic_prefix: "data_subject_rights"
|
||||
reason: "Betroffene über Lösch-Ausnahmen informieren"
|
||||
|
||||
- control_id: "GOV-963-A02"
|
||||
expected_topic_prefix: "consent"
|
||||
reason: "Zustimmung des Urhebers vor Veröffentlichung einholen"
|
||||
|
||||
# ── Security ──
|
||||
- control_id: "CRYP-1454-A07"
|
||||
expected_topic_prefix: "encryption"
|
||||
reason: "RSASSA-PSS in TLS 1.3 verifizieren"
|
||||
|
||||
- control_id: "NET-1141-A08"
|
||||
expected_topic_prefix: "monitoring"
|
||||
reason: "Sampling-Strategien konfigurieren"
|
||||
|
||||
- control_id: "SEC-2244-A05"
|
||||
expected_topic_prefix: "asset_management"
|
||||
reason: "Systeminventar kontinuierlich aktualisieren"
|
||||
|
||||
- control_id: "AUTH-3468-A06"
|
||||
expected_topic_prefix: "access_control"
|
||||
reason: "Rollenkonzept mit abgestuften Zugriffsrechten"
|
||||
|
||||
# ── Governance ──
|
||||
- control_id: "AUTH-2364-A09"
|
||||
expected_topic_prefix: "supervisory_authority"
|
||||
reason: "Zusammenarbeit mit Wirtschaftsakteuren dokumentieren"
|
||||
|
||||
- control_id: "SEC-5972-A14"
|
||||
expected_topic_prefix: "third_party_management"
|
||||
reason: "Cybersicherheitsrichtlinien kritischer Lieferanten prüfen"
|
||||
|
||||
- control_id: "SEC-3441-A02"
|
||||
expected_topic_prefix: "human_resources_security"
|
||||
reason: "Mitarbeiter vor Nachteil bei Verweigerung schützen"
|
||||
|
||||
- control_id: "SEC-3502-A06"
|
||||
expected_topic_prefix: "awareness"
|
||||
reason: "Organisationskultur für Sicherheitsverbesserung"
|
||||
|
||||
- control_id: "GOV-1748-A04"
|
||||
expected_topic_prefix: "policy"
|
||||
reason: "Annahme von Geschenken untersagen"
|
||||
|
||||
# ── Regulatory ──
|
||||
- control_id: "AI-1287-A01"
|
||||
expected_topic_prefix: "ai_system"
|
||||
reason: "Akteure des KI-Systems identifizieren"
|
||||
|
||||
- control_id: "AI-1732-A11"
|
||||
expected_topic_prefix: "ai_system"
|
||||
reason: "Menschliche Kontrolle für KI-Entscheidungen"
|
||||
|
||||
- control_id: "COMP-1352-A04"
|
||||
expected_topic_prefix: "certification"
|
||||
reason: "Amateurfunkprüfungszeugnis vorlegen"
|
||||
|
||||
- control_id: "FIN-1212-A02"
|
||||
expected_topic_prefix: "financial_reporting"
|
||||
reason: "Jahresabschluss gemäß EU-Richtlinie aufstellen"
|
||||
|
||||
- control_id: "AUTH-1165-A01"
|
||||
expected_topic_prefix: "data_classification"
|
||||
reason: "Öffentliche IP-Adressen als Stammdaten klassifizieren"
|
||||
|
||||
- control_id: "SEC-7367-A10"
|
||||
expected_topic_prefix: "audit_logging"
|
||||
reason: "Banner-Version Rückverfolgung testen"
|
||||
|
||||
- control_id: "LAB-034-A03"
|
||||
expected_topic_prefix: "third_party_management"
|
||||
reason: "Verträge auf unzulässige Klauseln prüfen"
|
||||
|
||||
quality_thresholds:
|
||||
min_accuracy: 0.90
|
||||
max_controls_per_mc: 300
|
||||
min_master_controls: 10000
|
||||
@@ -0,0 +1,166 @@
|
||||
"""
|
||||
Master Control Quality Tests.
|
||||
|
||||
Regression tests to ensure MC assignment quality stays above 90%.
|
||||
Uses golden dataset of manually verified controls.
|
||||
"""
|
||||
|
||||
import os
|
||||
import yaml
|
||||
import pytest
|
||||
from sqlalchemy import create_engine, text
|
||||
|
||||
DB_URL = os.getenv(
|
||||
"DATABASE_URL",
|
||||
"postgresql://breakpilot:breakpilot123@postgres:5432/breakpilot_db",
|
||||
)
|
||||
|
||||
_engine = None
|
||||
|
||||
|
||||
def get_engine():
|
||||
global _engine
|
||||
if _engine is None:
|
||||
_engine = create_engine(
|
||||
DB_URL,
|
||||
connect_args={"options": "-c search_path=compliance,public"},
|
||||
)
|
||||
return _engine
|
||||
|
||||
|
||||
def load_golden():
|
||||
path = os.path.join(os.path.dirname(__file__), "golden_mc_assignments.yaml")
|
||||
with open(path) as f:
|
||||
return yaml.safe_load(f)
|
||||
|
||||
|
||||
# ── Golden Dataset Tests ──
|
||||
|
||||
|
||||
class TestGoldenMCAssignments:
|
||||
"""Each golden control must be in the correct MC."""
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
def setup(self):
|
||||
self.golden = load_golden()
|
||||
self.engine = get_engine()
|
||||
|
||||
def test_golden_controls_correctly_assigned(self):
|
||||
"""All golden controls must be in an MC matching their expected topic prefix."""
|
||||
errors = []
|
||||
with self.engine.connect() as c:
|
||||
for gc in self.golden["golden_controls"]:
|
||||
row = c.execute(text("""
|
||||
SELECT mc.canonical_name
|
||||
FROM master_controls mc
|
||||
JOIN master_control_members mcm ON mcm.master_control_uuid = mc.id
|
||||
JOIN canonical_controls cc ON cc.id = mcm.control_uuid
|
||||
WHERE cc.control_id = :cid
|
||||
LIMIT 1
|
||||
"""), {"cid": gc["control_id"]}).fetchone()
|
||||
|
||||
if row is None:
|
||||
errors.append(f"{gc['control_id']}: not found in any MC")
|
||||
elif not row[0].startswith(gc["expected_topic_prefix"]):
|
||||
errors.append(
|
||||
f"{gc['control_id']}: expected {gc['expected_topic_prefix']}*, "
|
||||
f"got {row[0]}"
|
||||
)
|
||||
|
||||
if errors:
|
||||
pytest.fail(
|
||||
f"{len(errors)} golden controls misassigned:\n"
|
||||
+ "\n".join(f" - {e}" for e in errors)
|
||||
)
|
||||
|
||||
|
||||
# ── Structural Quality Tests ──
|
||||
|
||||
|
||||
class TestMCStructuralQuality:
|
||||
"""Structural invariants for Master Controls."""
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
def setup(self):
|
||||
self.golden = load_golden()
|
||||
self.thresholds = self.golden["quality_thresholds"]
|
||||
self.engine = get_engine()
|
||||
|
||||
def test_minimum_master_controls(self):
|
||||
"""Must have at least 10K Master Controls."""
|
||||
with self.engine.connect() as c:
|
||||
count = c.execute(
|
||||
text("SELECT count(*) FROM master_controls")
|
||||
).scalar()
|
||||
assert count >= self.thresholds["min_master_controls"], (
|
||||
f"Only {count} MCs, expected >= {self.thresholds['min_master_controls']}"
|
||||
)
|
||||
|
||||
def test_max_controls_per_mc(self):
|
||||
"""No MC should have more than 300 controls."""
|
||||
with self.engine.connect() as c:
|
||||
max_mc = c.execute(
|
||||
text("SELECT max(total_controls) FROM master_controls")
|
||||
).scalar()
|
||||
assert max_mc <= self.thresholds["max_controls_per_mc"], (
|
||||
f"Max MC has {max_mc} controls, limit is {self.thresholds['max_controls_per_mc']}"
|
||||
)
|
||||
|
||||
def test_no_empty_master_controls(self):
|
||||
"""Every MC must have at least 1 member."""
|
||||
with self.engine.connect() as c:
|
||||
empty = c.execute(text("""
|
||||
SELECT count(*) FROM master_controls
|
||||
WHERE total_controls = 0
|
||||
""")).scalar()
|
||||
assert empty == 0, f"{empty} empty MCs found"
|
||||
|
||||
def test_all_members_reference_valid_controls(self):
|
||||
"""Every MC member must reference an existing control."""
|
||||
with self.engine.connect() as c:
|
||||
orphans = c.execute(text("""
|
||||
SELECT count(*) FROM master_control_members mcm
|
||||
LEFT JOIN canonical_controls cc ON cc.id = mcm.control_uuid
|
||||
WHERE cc.id IS NULL
|
||||
""")).scalar()
|
||||
assert orphans == 0, f"{orphans} orphan members found"
|
||||
|
||||
|
||||
# ── Doc Check Controls Tests ──
|
||||
|
||||
|
||||
class TestDocCheckControls:
|
||||
"""Validate doc_check_controls table."""
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
def setup(self):
|
||||
self.engine = get_engine()
|
||||
|
||||
def test_doc_check_controls_exist(self):
|
||||
"""Must have doc_check_controls."""
|
||||
with self.engine.connect() as c:
|
||||
count = c.execute(
|
||||
text("SELECT count(*) FROM doc_check_controls")
|
||||
).scalar()
|
||||
assert count > 100, f"Only {count} doc_check_controls"
|
||||
|
||||
def test_all_doc_types_covered(self):
|
||||
"""All 8 document types must have controls."""
|
||||
expected = {"dse", "cookie", "impressum", "widerruf",
|
||||
"agb", "dsfa", "avv", "loeschkonzept"}
|
||||
with self.engine.connect() as c:
|
||||
rows = c.execute(text(
|
||||
"SELECT DISTINCT doc_type FROM doc_check_controls"
|
||||
)).fetchall()
|
||||
actual = {r[0] for r in rows}
|
||||
missing = expected - actual
|
||||
assert not missing, f"Missing doc types: {missing}"
|
||||
|
||||
def test_check_questions_not_empty(self):
|
||||
"""Every doc_check_control must have a check_question."""
|
||||
with self.engine.connect() as c:
|
||||
empty = c.execute(text("""
|
||||
SELECT count(*) FROM doc_check_controls
|
||||
WHERE check_question IS NULL OR check_question = ''
|
||||
""")).scalar()
|
||||
assert empty == 0, f"{empty} controls without check_question"
|
||||
@@ -909,3 +909,20 @@ services:
|
||||
restart: unless-stopped
|
||||
networks:
|
||||
- breakpilot-network
|
||||
|
||||
# =========================================================
|
||||
# MARKETING WEBSITE - BreakPilot Produktwebsite
|
||||
# =========================================================
|
||||
marketing-website:
|
||||
build:
|
||||
context: ./marketing-website
|
||||
dockerfile: Dockerfile
|
||||
container_name: bp-core-marketing-website
|
||||
platform: linux/arm64
|
||||
ports:
|
||||
- "3014:3000"
|
||||
environment:
|
||||
NODE_ENV: production
|
||||
restart: unless-stopped
|
||||
networks:
|
||||
- breakpilot-network
|
||||
|
||||
@@ -0,0 +1,860 @@
|
||||
<!DOCTYPE html>
|
||||
<html lang="en" dir="ltr" class="h-100">
|
||||
<head>
|
||||
<meta charset="utf-8" />
|
||||
<meta name="MobileOptimized" content="width" />
|
||||
<meta name="HandheldFriendly" content="true" />
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
|
||||
<link rel="icon" href="/themes/custom/enisaweb/favicon.ico" type="image/png" />
|
||||
<link rel="alternate" type="application/rss+xml" title="Single Reporting Platform (SRP)" href="https://www.enisa.europa.eu/taxonomy/term/1317/feed" />
|
||||
<script>window.a2a_config=window.a2a_config||{};a2a_config.callbacks=[];a2a_config.overlays=[];a2a_config.templates={};</script>
|
||||
|
||||
<meta content="ENISA: Every day we experience the Information Society. Interconnected networks touch our everyday lives, at home and at work. It is therefore vital that computers, mobile phones, banking, and the Internet function, to support Europe’s digital economy. That is why ENISA is working with Cybersecurity for the EU and the Member States." name="DC.description">
|
||||
<meta name="description" content="ENISA is the EU agency dedicated to enhancing cybersecurity in Europe. They offer guidance, tools, and resources to safeguard citizens and businesses from cyber threats.">
|
||||
<meta name="keywords" content="Cybersecurity, EU, ENISA, computer security, Cyber Threats, EU Cyber Crisis, Incident Management, Market and Standards, Product Security, Security certification, Risk Management, Skills and competences, State of cybersecurity in the EU, Vulnerability Disclosure, Artificial Intelligence, Next Gen Technologies, Awareness, Cyber Hygiene, Digital Identity, Data Protection, Education and career path">
|
||||
<title>Single Reporting Platform (SRP) | ENISA</title>
|
||||
|
||||
<link rel="stylesheet" media="all" href="/sites/default/files/css/css_H8-PkuOemoNPxq-HW0ue4hGKWqBFO5KaLA29hyssQWk.css?delta=0&language=en&theme=enisaweb&include=eJxtj2sOwyAMgy9E4UhVCl6HRpKJ0FXd6Uf3lrY_kfPJkR1KqSnJFugp_KGqNBdPSLlpHSlGrSmrhLd6WCDJdYAgWplKvsJBstGKKUxkX9tcdKIyWNtKlvnDGWY0w5xpzFRG7pE0ds_JwqEndJMpw0flswp6qz_GX-TbEQxnmzXwo8olY7Vwn541LQVuRb-ZiSFLD6vsVww7GHYyvD68AUVHcCg" />
|
||||
<link rel="stylesheet" media="all" href="//cdnjs.cloudflare.com/ajax/libs/font-awesome/6.6.0/css/all.min.css" />
|
||||
<link rel="stylesheet" media="all" href="/sites/default/files/css/css_XfzkZLkUSSs_yRcqoRmh-VWG0krtdRIrQV-ENlV19ao.css?delta=2&language=en&theme=enisaweb&include=eJxtj2sOwyAMgy9E4UhVCl6HRpKJ0FXd6Uf3lrY_kfPJkR1KqSnJFugp_KGqNBdPSLlpHSlGrSmrhLd6WCDJdYAgWplKvsJBstGKKUxkX9tcdKIyWNtKlvnDGWY0w5xpzFRG7pE0ds_JwqEndJMpw0flswp6qz_GX-TbEQxnmzXwo8olY7Vwn541LQVuRb-ZiSFLD6vsVww7GHYyvD68AUVHcCg" />
|
||||
<link rel="stylesheet" media="all" href="/sites/default/files/css/css_gcXUcvuow4apg85qsW-WFQB8ls5BPBU3WeuPLmwnlqQ.css?delta=3&language=en&theme=enisaweb&include=eJxtj2sOwyAMgy9E4UhVCl6HRpKJ0FXd6Uf3lrY_kfPJkR1KqSnJFugp_KGqNBdPSLlpHSlGrSmrhLd6WCDJdYAgWplKvsJBstGKKUxkX9tcdKIyWNtKlvnDGWY0w5xpzFRG7pE0ds_JwqEndJMpw0flswp6qz_GX-TbEQxnmzXwo8olY7Vwn541LQVuRb-ZiSFLD6vsVww7GHYyvD68AUVHcCg" />
|
||||
|
||||
|
||||
|
||||
</head>
|
||||
<body class="path-taxonomy d-flex flex-column h-100">
|
||||
|
||||
|
||||
<div class="dialog-off-canvas-main-canvas d-flex flex-column h-100" data-off-canvas-main-canvas>
|
||||
|
||||
|
||||
|
||||
<header>
|
||||
<p class="sr-only"><a href="#main-content" accesskey="M">Go to the main content</a></p>
|
||||
|
||||
<div class="navbar navbar-expand-lg navbar-dark text-light bg-primary header">
|
||||
<div class="container logo-menu-wrapper d-flex">
|
||||
|
||||
<div class="region region-nav-branding">
|
||||
<div id="block-enisaweb-branding" class="block block-system block-system-branding-block">
|
||||
|
||||
|
||||
<div class="navbar-brand d-flex align-items-center">
|
||||
|
||||
<a href="/" title="Home" rel="home" class="site-logo d-block">
|
||||
<img src="/sites/default/files/enisa-logo.svg" alt="Home" fetchpriority="high" />
|
||||
</a>
|
||||
|
||||
<div>
|
||||
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
</div>
|
||||
|
||||
|
||||
<nav class="collapse navbar-collapse justify-content-end main-menu" id="navbarSupportedContent" role="navigation" aria-label="Main menu and Search box">
|
||||
<div class="region region-nav-main">
|
||||
<div id="block-enisaweb-mainnavigation-2" class="block block-we-megamenu block-we-megamenu-blockmain">
|
||||
|
||||
|
||||
<div class="region-we-mega-menu">
|
||||
<a href="javascript:" class="navbar-toggle collapsed" aria-label="Open/close menu" aria-controls="mainMenuResponsive" name="menu-button" role="button">
|
||||
<span class="icon-bar"></span>
|
||||
<span class="icon-bar"></span>
|
||||
<span class="icon-bar"></span>
|
||||
</a>
|
||||
<nav class="main navbar navbar-default navbar-we-mega-menu mobile-collapse hover-action" data-menu-name="main" data-block-theme="enisaweb" data-style="Default" data-animation="None" data-delay="" data-duration="" data-autoarrow="" data-alwayshowsubmenu="" data-action="hover" data-mobile-collapse="0" aria-label="ENISA main menu" id="mainMenuResponsive">
|
||||
<div class="container-fluid">
|
||||
<ul class="we-mega-menu-ul nav nav-tabs">
|
||||
<li class="we-mega-menu-li justify dropdown-menu" title="" data-level="0" data-element-type="" data-id="320e2c86-310b-4f7b-a4a9-188df34c3e43" data-submenu="1" data-hide-sub-when-collapse="" data-group="0" data-caption="" data-alignsub="justify" data-target="" data-icon="" >
|
||||
<span class="we-megamenu-nolink">
|
||||
Topics</span>
|
||||
<div class="we-mega-menu-submenu" data-element-type="we-mega-menu-submenu" data-submenu-width="" data-class="" style="width: px">
|
||||
<div class="we-mega-menu-submenu-inner">
|
||||
<div class="we-mega-menu-row" data-element-type="we-mega-menu-row" data-custom-row="1">
|
||||
<div class="we-mega-menu-col span3" data-element-type="we-mega-menu-col" data-width="3" data-block="enisaweb_topics" data-blocktitle="0" data-hidewhencollapse="" data-class="">
|
||||
<div class="type-of-block"><div class="block-inner"><div id="block-enisaweb-topics" class="block block-block-content block-block-contentec3f1f7d-35d4-4776-a03c-7f97a2fcfc8f">
|
||||
|
||||
<p class="title"><a href="/topics" target="_self" title="Access to All Topics page">Topics</a></p>
|
||||
|
||||
|
||||
<div class="clearfix text-formatted field field--name-body field--type-text-with-summary field--label-hidden field__item"><p>Learn more about the topics</p>
|
||||
<p><a class="button" href="/topics" data-entity-type="node" data-entity-uuid="5f4db810-e19d-49a4-baaa-86ec782eb91e" data-entity-substitution="canonical" title="Topics">Access</a></p>
|
||||
</div>
|
||||
|
||||
</div>
|
||||
</div></div>
|
||||
|
||||
</div>
|
||||
<div class="we-mega-menu-col span3" data-element-type="we-mega-menu-col" data-width="3" data-block="enisaweb_views_block__topics_tax_block_1" data-blocktitle="0" data-hidewhencollapse="" data-class="">
|
||||
<div class="type-of-block"><div class="block-inner"><div class="views-element-container block block-views block-views-blocktopics-tax-block-1" id="block-enisaweb-views-block-topics-tax-block-1">
|
||||
|
||||
<p class="title"><a href="/audience/national-eu-authorities" target="_self" title="Access to For National / EU authorities page">For National / EU authorities</a></p>
|
||||
|
||||
<div data-block="nav_main"><div class="view view-topics-tax view-id-topics_tax view-display-id-block_1 js-view-dom-id-a9128a4de087e3f9960a31f862089b7e6946eeb6d529b846901e74f4ba95f84d">
|
||||
|
||||
|
||||
|
||||
<div class="view-content">
|
||||
<div class="item-list">
|
||||
|
||||
<ul>
|
||||
|
||||
<li><a href="/topics/cyber-threats" hreflang="en">Cyber Threats</a></li>
|
||||
<li><a href="/topics/eu-incident-response-and-cyber-crisis-management" hreflang="en">EU incident response and cyber crisis management</a></li>
|
||||
<li><a href="/topics/market" hreflang="en">Market</a></li>
|
||||
<li><a href="/topics/product-security-and-certification" hreflang="en">Product Security and Certification</a></li>
|
||||
<li><a href="/topics/risk-management" hreflang="en">Risk Management</a></li>
|
||||
<li><a href="/topics/skills-and-competences" hreflang="en">Skills and competences</a></li>
|
||||
<li><a href="/topics/state-of-cybersecurity-in-the-eu" hreflang="en">State of cybersecurity in the EU</a></li>
|
||||
<li><a href="/topics/vulnerability-disclosure" hreflang="en">Vulnerability Disclosure</a></li>
|
||||
|
||||
</ul>
|
||||
|
||||
</div>
|
||||
|
||||
</div>
|
||||
|
||||
</div>
|
||||
</div>
|
||||
|
||||
</div></div></div>
|
||||
|
||||
</div>
|
||||
<div class="we-mega-menu-col span3" data-element-type="we-mega-menu-col" data-width="3" data-block="enisaweb_views_block__topics_tax_block_2" data-blocktitle="0" data-hidewhencollapse="" data-class="">
|
||||
<div class="type-of-block"><div class="block-inner"><div class="views-element-container title block block-views block-views-blocktopics-tax-block-2" id="block-enisaweb-views-block-topics-tax-block-2">
|
||||
|
||||
<p class="title"><a href="/audience/private-sector" target="_self" title="Access to Private Sector page">Private Sector</a></p>
|
||||
|
||||
<div data-block="nav_main"><div class="view view-topics-tax view-id-topics_tax view-display-id-block_2 js-view-dom-id-ca2d4c425826cfbc7b36885c48a7a8c805021f1f25dec545fefac51557010f38">
|
||||
|
||||
|
||||
|
||||
<div class="view-content">
|
||||
<div class="item-list">
|
||||
|
||||
<ul>
|
||||
|
||||
<li><a href="/topics/artificial-intelligence-and-next-gen-technologies" hreflang="en">Artificial Intelligence and Next Gen Technologies</a></li>
|
||||
<li><a href="/topics/awareness-and-cyber-hygiene" hreflang="en">Awareness and Cyber Hygiene</a></li>
|
||||
<li><a href="/topics/certification-and-standards" hreflang="en">Certification and Standards</a></li>
|
||||
<li><a href="/topics/cyber-threats" hreflang="en">Cyber Threats</a></li>
|
||||
<li><a href="/topics/cybersecurity-of-critical-sectors" hreflang="en">Cybersecurity of Critical Sectors</a></li>
|
||||
<li><a href="/topics/digital-identity-and-data-protection" hreflang="en">Digital Identity and Data Protection</a></li>
|
||||
<li><a href="/topics/incident-management" hreflang="en">Incident management</a></li>
|
||||
<li><a href="/topics/risk-management" hreflang="en">Risk Management</a></li>
|
||||
<li><a href="/topics/skills-and-competences-for-companies" hreflang="en">Skills and competences (for companies)</a></li>
|
||||
<li><a href="/topics/vulnerability-disclosure" hreflang="en">Vulnerability Disclosure</a></li>
|
||||
|
||||
</ul>
|
||||
|
||||
</div>
|
||||
|
||||
</div>
|
||||
|
||||
</div>
|
||||
</div>
|
||||
|
||||
</div></div></div>
|
||||
|
||||
</div>
|
||||
<div class="we-mega-menu-col span3" data-element-type="we-mega-menu-col" data-width="3" data-block="enisaweb_views_block__topics_tax_block_3" data-blocktitle="0" data-hidewhencollapse="" data-class="">
|
||||
<div class="type-of-block"><div class="block-inner"><div class="views-element-container title block block-views block-views-blocktopics-tax-block-3" id="block-enisaweb-views-block-topics-tax-block-3">
|
||||
|
||||
<p class="title"><a href="/audience/citizens" target="_self" title="Access to Citizens page">Citizens</a></p>
|
||||
|
||||
<div data-block="nav_main"><div class="view view-topics-tax view-id-topics_tax view-display-id-block_3 js-view-dom-id-b69cbf795aad827e795977b39f2678f43cdc75c6765696171efc1a9faca05ccd">
|
||||
|
||||
|
||||
|
||||
<div class="view-content">
|
||||
<div class="item-list">
|
||||
|
||||
<ul>
|
||||
|
||||
<li><a href="/topics/cyber-hygiene" hreflang="en">Cyber Hygiene</a></li>
|
||||
<li><a href="/topics/cyber-incident-awareness" hreflang="en">Cyber Incident Awareness</a></li>
|
||||
<li><a href="/topics/education-and-career-path" hreflang="en">Education and career path</a></li>
|
||||
|
||||
</ul>
|
||||
|
||||
</div>
|
||||
|
||||
</div>
|
||||
|
||||
</div>
|
||||
</div>
|
||||
|
||||
</div></div></div>
|
||||
|
||||
</div>
|
||||
|
||||
</div>
|
||||
|
||||
</div>
|
||||
</div>
|
||||
</li><li class="we-mega-menu-li" title="Access to all publications" data-level="0" data-element-type="" data-id="2d24a690-803d-44ad-8d7e-8cf0eb7d0e40" data-submenu="0" data-hide-sub-when-collapse="" data-group="0" data-caption="" data-alignsub="" data-target="_self" data-icon="" >
|
||||
<a class="we-mega-menu-li" title="" href="/publications" target="_self">
|
||||
Publications </a>
|
||||
|
||||
</li><li class="we-mega-menu-li dropdown-menu" title="" data-level="0" data-element-type="" data-id="54b069d8-f99f-45e7-9d50-9345e82ad14f" data-submenu="1" data-hide-sub-when-collapse="" data-group="0" data-caption="" data-alignsub="" data-target="" data-icon="" >
|
||||
<span class="we-megamenu-nolink">
|
||||
Newsroom & Events</span>
|
||||
<div class="we-mega-menu-submenu" data-element-type="we-mega-menu-submenu" data-submenu-width="" data-class="" style="width: px">
|
||||
<div class="we-mega-menu-submenu-inner">
|
||||
<div class="we-mega-menu-row" data-element-type="we-mega-menu-row" data-custom-row="1">
|
||||
<div class="we-mega-menu-col span4" data-element-type="we-mega-menu-col" data-width="4" data-block="enisaweb_news" data-blocktitle="0" data-hidewhencollapse="" data-class="">
|
||||
<div class="type-of-block"><div class="block-inner"><div id="block-enisaweb-news" class="block block-block-content block-block-content00f65cb0-e1b0-41a1-8a0d-00ce6ced0f0e">
|
||||
|
||||
<p class="title"><a href="/news" target="_self" title="Access to All News">News</a></p>
|
||||
|
||||
|
||||
<div class="clearfix text-formatted field field--name-body field--type-text-with-summary field--label-hidden field__item"><p>Cybersecurity in focus: News & updates from ENISA</p>
|
||||
<p><a class="button" href="/news" data-entity-type="node" data-entity-uuid="088dd847-33ff-4c8e-b094-f0d38a66bbdd" data-entity-substitution="canonical" title="News">Access</a></p>
|
||||
</div>
|
||||
|
||||
</div>
|
||||
</div></div>
|
||||
|
||||
</div>
|
||||
<div class="we-mega-menu-col span4" data-element-type="we-mega-menu-col" data-width="4" data-block="enisaweb_events" data-blocktitle="0" data-hidewhencollapse="" data-class="">
|
||||
<div class="type-of-block"><div class="block-inner"><div id="block-enisaweb-events" class="block block-block-content block-block-contentb3c06780-f342-437d-9828-54fe86eb9786">
|
||||
|
||||
<p class="title"><a href="/events" target="_self" title="Access to All Events">Events</a></p>
|
||||
|
||||
|
||||
<div class="clearfix text-formatted field field--name-body field--type-text-with-summary field--label-hidden field__item"><p>Cybersecurity in practice: Events & Workshops by ENISA</p>
|
||||
<p><a class="button" href="/events" data-entity-type="node" data-entity-uuid="34f66c6d-0f07-4977-85e3-e8578a59d59c" data-entity-substitution="canonical" title="Events">Access</a></p>
|
||||
</div>
|
||||
|
||||
</div>
|
||||
</div></div>
|
||||
|
||||
</div>
|
||||
<div class="we-mega-menu-col span4" data-element-type="we-mega-menu-col" data-width="4" data-block="enisaweb_pressoffice" data-blocktitle="0" data-hidewhencollapse="" data-class="">
|
||||
<div class="type-of-block"><div class="block-inner"><div id="block-enisaweb-pressoffice" class="block block-system block-system-menu-blockpress-office">
|
||||
|
||||
<p class="title"><a href="/press-office" target="_self" title="Access to Press office page">Press office</a></p>
|
||||
|
||||
|
||||
<ul data-block="nav_main" class="nav navbar-nav">
|
||||
<li class="nav-item">
|
||||
<a href="/press-office/corporate-identity" class="nav-link" data-drupal-link-system-path="node/11030">Corporate identity</a>
|
||||
</li>
|
||||
<li class="nav-item">
|
||||
<a href="/press-office/cybersecurity-material" class="nav-link" data-drupal-link-system-path="node/11031">Cybersecurity material</a>
|
||||
</li>
|
||||
<li class="nav-item">
|
||||
<a href="/form/seek-an-expert" class="nav-link" data-drupal-link-system-path="webform/seek_an_expert">Seek an expert or Request a speaker</a>
|
||||
</li>
|
||||
</ul>
|
||||
|
||||
|
||||
|
||||
</div></div></div>
|
||||
|
||||
</div>
|
||||
|
||||
</div>
|
||||
|
||||
</div>
|
||||
</div>
|
||||
</li><li class="we-mega-menu-li dropdown-menu" title="" data-level="0" data-element-type="" data-id="eaba29ce-6446-4da3-930a-c3d97495f00b" data-submenu="1" data-hide-sub-when-collapse="" data-group="0" data-caption="" data-alignsub="" data-target="" data-icon="" >
|
||||
<span class="we-megamenu-nolink">
|
||||
About</span>
|
||||
<div class="we-mega-menu-submenu" data-element-type="we-mega-menu-submenu" data-submenu-width="" data-class="" style="width: px">
|
||||
<div class="we-mega-menu-submenu-inner">
|
||||
<div class="we-mega-menu-row" data-element-type="we-mega-menu-row" data-custom-row="1">
|
||||
<div class="we-mega-menu-col span4" data-element-type="we-mega-menu-col" data-width="4" data-block="enisaweb_whatwedosubmenubutton_2" data-blocktitle="0" data-hidewhencollapse="" data-class="">
|
||||
<div class="type-of-block"><div class="block-inner"><div id="block-enisaweb-whatwedosubmenubutton-2" class="block block-block-content block-block-contente0a96203-1eca-4991-9533-33064cace452">
|
||||
|
||||
<p class="title"><a href="/about-enisa/what-we-do" target="_self" title="Access to What we do page">What we do</a></p>
|
||||
|
||||
|
||||
<div class="clearfix text-formatted field field--name-body field--type-text-with-summary field--label-hidden field__item"><p>Achieving a high common level of cybersecurity across Europe</p>
|
||||
<p><a class="button" href="/about-enisa/what-we-do" data-entity-type="node" data-entity-uuid="0619d110-d4dc-4f6b-aa0d-9a56252da07d" data-entity-substitution="canonical" title="What we do">Access</a></p>
|
||||
</div>
|
||||
|
||||
</div></div></div>
|
||||
|
||||
</div>
|
||||
<div class="we-mega-menu-col span4" data-element-type="we-mega-menu-col" data-width="4" data-block="enisaweb_whatwedosubmenu_2" data-blocktitle="0" data-hidewhencollapse="" data-class="">
|
||||
<div class="type-of-block"><div class="block-inner"><div id="block-enisaweb-whatwedosubmenu-2" class="block block-block-content block-block-content99599d89-56f7-43f1-907b-72614045018d">
|
||||
|
||||
<p class="title"><a href="/about-enisa/who-we-are" target="_self" title="Access to Who we are page">Who we are</a></p>
|
||||
|
||||
|
||||
<div class="clearfix text-formatted field field--name-body field--type-text-with-summary field--label-hidden field__item"><p>Towards a Trusted and Cyber Secure Europe </p>
|
||||
<p><a class="button" href="/about-enisa/who-we-are" data-entity-type="node" data-entity-uuid="3c46c210-298c-472c-b871-b167dfe2642c" data-entity-substitution="canonical" title="Who we are">Access</a></p>
|
||||
</div>
|
||||
|
||||
</div></div></div>
|
||||
|
||||
</div>
|
||||
<div class="we-mega-menu-col span4" data-element-type="we-mega-menu-col" data-width="4" data-block="enisaweb_transparency" data-blocktitle="0" data-hidewhencollapse="" data-class="">
|
||||
<div class="type-of-block"><div class="block-inner"><div id="block-enisaweb-transparency" class="block block-system block-system-menu-blocktransparency">
|
||||
|
||||
<p class="title"><a href="/about-enisa/How-we-work" target="_self" title="Access to How we work page">How we work</a></p>
|
||||
|
||||
|
||||
<ul data-block="nav_main" class="nav navbar-nav">
|
||||
<li class="nav-item">
|
||||
<a href="/about-enisa/accounting-finance/accounting-finance" class="nav-link" data-drupal-link-system-path="node/17339">Accounting and Finance</a>
|
||||
</li>
|
||||
<li class="nav-item">
|
||||
<a href="/about-enisa/procedures-and-policies" class="nav-link dropdown-toggle" data-drupal-link-system-path="node/17360">Policies and Procedures</a>
|
||||
</li>
|
||||
<li class="nav-item">
|
||||
<a href="/about-enisa/data-protection/data-protection" class="nav-link dropdown-toggle" data-drupal-link-system-path="node/17346">Data Protection</a>
|
||||
</li>
|
||||
<li class="nav-item">
|
||||
<a href="/about-enisa/transparency" class="nav-link dropdown-toggle" data-drupal-link-system-path="node/17404">Transparency</a>
|
||||
</li>
|
||||
<li class="nav-item">
|
||||
<a href="/enisa-a-climate-neutral-agency" class="nav-link" data-drupal-link-system-path="node/18893">ENISA, a climate neutral agency </a>
|
||||
</li>
|
||||
</ul>
|
||||
|
||||
|
||||
|
||||
</div>
|
||||
</div></div>
|
||||
|
||||
</div>
|
||||
|
||||
</div>
|
||||
<div class="we-mega-menu-row" data-element-type="we-mega-menu-row" data-custom-row="1">
|
||||
<div class="we-mega-menu-col span12 transversal-menu" data-element-type="we-mega-menu-col" data-width="12" data-block="enisaweb_transversaloptionsofaboutmenu" data-blocktitle="0" data-hidewhencollapse="" data-class="transversal-menu">
|
||||
<div class="type-of-block"><div class="block-inner"><div id="block-enisaweb-transversaloptionsofaboutmenu" class="block block-system block-system-menu-blocktransversal-options-about-menu">
|
||||
|
||||
|
||||
|
||||
<ul data-block="nav_main" class="nav navbar-nav">
|
||||
<li class="nav-item">
|
||||
<a href="/about-enisa/international-cooperation" class="nav-link" data-drupal-link-system-path="node/19377">International Cooperation</a>
|
||||
</li>
|
||||
</ul>
|
||||
|
||||
|
||||
|
||||
</div>
|
||||
</div></div>
|
||||
|
||||
</div>
|
||||
|
||||
</div>
|
||||
|
||||
</div>
|
||||
</div>
|
||||
</li><li class="we-mega-menu-li dropdown-menu" title="" data-level="0" data-element-type="" data-id="44fbec5d-1aaf-4cb3-b06b-a79b571ad50c" data-submenu="1" data-hide-sub-when-collapse="" data-group="0" data-caption="" data-alignsub="" data-target="" data-icon="" >
|
||||
<span class="we-megamenu-nolink">
|
||||
Working with us</span>
|
||||
<div class="we-mega-menu-submenu" data-element-type="we-mega-menu-submenu" data-submenu-width="" data-class="" style="width: px">
|
||||
<div class="we-mega-menu-submenu-inner">
|
||||
<div class="we-mega-menu-row" data-element-type="we-mega-menu-row" data-custom-row="1">
|
||||
<div class="we-mega-menu-col span4" data-element-type="we-mega-menu-col" data-width="4" data-block="enisaweb_workwithenisa" data-blocktitle="0" data-hidewhencollapse="" data-class="">
|
||||
<div class="type-of-block"><div class="block-inner"><div id="block-enisaweb-workwithenisa" class="block block-block-content block-block-content86d5ce2f-abc2-4160-afda-442fe2c68258">
|
||||
|
||||
<p class="title"><a href="/working-with-us/working-for-enisa" target="_self" title="Access to Working for ENISA page">Working for ENISA</a></p>
|
||||
|
||||
|
||||
<div class="clearfix text-formatted field field--name-body field--type-text-with-summary field--label-hidden field__item"><p>Explore the benefits of working for ENISA</p>
|
||||
<p><a class="button" href="/working-with-us/working-for-enisa" data-entity-type="node" data-entity-uuid="f10da531-46ad-49f6-84cb-0770c69d839e" data-entity-substitution="canonical" title="Working for ENISA">Access</a></p>
|
||||
</div>
|
||||
|
||||
</div>
|
||||
</div></div>
|
||||
|
||||
</div>
|
||||
<div class="we-mega-menu-col span8" data-element-type="we-mega-menu-col" data-width="8" data-block="enisaweb_workingwithus" data-blocktitle="0" data-hidewhencollapse="" data-class="">
|
||||
<div class="type-of-block"><div class="block-inner"><div id="block-enisaweb-workingwithus" class="title block block-system block-system-menu-blockworking-with-us">
|
||||
|
||||
<p class="title"><a href="/work-with-us" target="_self" title="Access to Working with us page">Working with us</a></p>
|
||||
|
||||
|
||||
<ul data-block="nav_main" class="nav navbar-nav">
|
||||
<li class="nav-item">
|
||||
<a href="/careers" class="nav-link" data-drupal-link-system-path="node/12487">Careers</a>
|
||||
</li>
|
||||
<li class="nav-item">
|
||||
<a href="/working-with-us/procurement" class="nav-link" data-drupal-link-system-path="node/17421">Procurement</a>
|
||||
</li>
|
||||
<li class="nav-item">
|
||||
<a href="/working-with-us/ad-hoc-working-groups-calls" class="nav-link" data-drupal-link-system-path="node/17422">Ad hoc working groups</a>
|
||||
</li>
|
||||
</ul>
|
||||
|
||||
|
||||
|
||||
</div></div></div>
|
||||
|
||||
</div>
|
||||
|
||||
</div>
|
||||
|
||||
</div>
|
||||
</div>
|
||||
</li>
|
||||
</ul>
|
||||
</div>
|
||||
</nav>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
</div>
|
||||
|
||||
<p class="btn-search">
|
||||
<a href="javascript:" title="Show the search field">Search</a>
|
||||
</p>
|
||||
<div class="search-form-wrapper" aria-label="Search field">
|
||||
<div class="region region-nav-additional">
|
||||
<div id="block-enisaweb-search" class="search-block-form block block-search container-inline" data-drupal-selector="search-block-form" id="block-enisaweb-search-form" role="search">
|
||||
|
||||
|
||||
<form data-block="nav_additional" action="/search" method="get" id="search-block-form" accept-charset="UTF-8">
|
||||
<div class="js-form-item form-item js-form-type-search form-type-search js-form-item-keys form-item-keys form-no-label">
|
||||
<label class="visually-hidden" for="edit-keys">Search</label>
|
||||
<input class="form-search form-control" title="Enter the terms you wish to search for." data-drupal-selector="edit-keys" type="search" id="edit-keys" name="keys" value="" size="15" maxlength="128">
|
||||
</div>
|
||||
<div class="form-actions js-form-wrapper form-wrapper" data-drupal-selector="edit-actions" id="edit-actions-search">
|
||||
<button class="button js-form-submit form-submit btn-enisa btn-primary" data-drupal-selector="edit-submit" type="submit" id="edit-submit" value="Search">Search</button>
|
||||
</div>
|
||||
</form>
|
||||
</div>
|
||||
|
||||
</div>
|
||||
|
||||
<p class="btn-close-search"><a href="javascript:" title="Hide the search field">Close</a></p>
|
||||
</div>
|
||||
</nav>
|
||||
</div>
|
||||
</div>
|
||||
</header>
|
||||
|
||||
<main role="main" class="container-full image-banner">
|
||||
<div class="title-breadcrumbs container">
|
||||
<div id="block-enisaweb-page-title" class="block block-core block-page-title-block">
|
||||
|
||||
|
||||
|
||||
<h1>Single Reporting Platform (SRP)</h1>
|
||||
|
||||
|
||||
</div>
|
||||
|
||||
<div class="region region-breadcrumb">
|
||||
<div id="block-enisaweb-breadcrumbs" class="block block-system block-system-breadcrumb-block">
|
||||
|
||||
|
||||
|
||||
<nav aria-label="breadcrumb">
|
||||
<p id="system-breadcrumb" class="visually-hidden">Breadcrumb</p>
|
||||
<ol class="breadcrumb">
|
||||
<li class="breadcrumb-item">
|
||||
<a href="/" aria-label="Access to ">Home</a>
|
||||
</li>
|
||||
<li class="breadcrumb-item">
|
||||
<a href="/topics" aria-label="Access to ">Topics</a>
|
||||
</li>
|
||||
<li class="breadcrumb-item">
|
||||
<a href="/topics/product-security-and-certification" aria-label="Access to ">Product Security and Certification</a>
|
||||
</li>
|
||||
<li class="breadcrumb-item">
|
||||
Single Reporting Platform (SRP)
|
||||
</li>
|
||||
</ol>
|
||||
</nav>
|
||||
|
||||
</div>
|
||||
|
||||
</div>
|
||||
|
||||
</div>
|
||||
<div class="main container container-taxonomy">
|
||||
<a id="main-content" tabindex="-1"></a>
|
||||
<div class="container">
|
||||
<div data-drupal-messages-fallback class="hidden"></div>
|
||||
|
||||
|
||||
|
||||
<div class="row g-0 publications-list-section">
|
||||
|
||||
<div class="sidebar-first order-lg-1 col-12 col-lg-3">
|
||||
<div id="block-enisaweb-topictaxonomymenublock" class="block block-enisa-path-block block-topic-taxonomy-menu-block">
|
||||
|
||||
|
||||
<h2>Subtopics</h2>
|
||||
|
||||
<div class="item-list">
|
||||
<ul class="submenu">
|
||||
<li>
|
||||
<a href="https://www.enisa.europa.eu//topics/product-security-and-certification/single-reporting-platform-srp" hreflang="en">Single Reporting Platform (SRP)</a>
|
||||
</li>
|
||||
</ul>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
|
||||
|
||||
</div>
|
||||
<div class="content-wrapper order-lg-2 col-12 col-lg-9">
|
||||
<div id="block-enisaweb-content" class="block block-system block-system-main-block">
|
||||
|
||||
|
||||
<div class="views-element-container"><div class="view view-taxonomy-term view-id-taxonomy_term view-display-id-page_1 js-view-dom-id-fe094e9a97c4c6ce1116eacf5ed98f737017da96312d42131b1bb835e76fa9cb">
|
||||
|
||||
|
||||
<div class="view-header">
|
||||
|
||||
<div id="taxonomy-term-1317" class="taxonomy-term vocabulary-topics">
|
||||
<div class="content-description row">
|
||||
<div class="col-md-12 col-lg-12 tax-description">
|
||||
<div class="quote-wrapper">
|
||||
<p>The Cyber Resilience Act (CRA) introduces the Single Reporting Platform (SRP) for cybersecurity incident reporting in the EU Digital Single Market.</p>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
<div class="row">
|
||||
<div class="col-lg-12 tax-content">
|
||||
|
||||
<div class="clearfix text-formatted field field--name-field-body field--type-text-with-summary field--label-hidden field__item"><p>The Single Reporting Platform (SRP) provided for in the Cyber Resilience Act (CRA) shall become a technical tool to use for the reporting of actively exploited vulnerabilities and incidents impacting products with digital elements operating in the EU Digital Single Market. </p>
|
||||
<p>The SRP will be used by CSIRTs and manufacturers for mandatory reporting and could be used by any natural/legal persons for voluntary reporting.</p>
|
||||
<p>The CRA mandates manufacturers of products with digital elements to report actively exploited vulnerabilities and severe incidents having an impact on the security of the product as of 11 September 2026 onwards using the Single Reporting Platform. Throughout 2025 and 2026, ENISA is undertaking a number of necessary steps to support the successful implementation of the platform.</p>
|
||||
<p>The CRA brings transparency to the vulnerability disclosure processes and strengthens how EU CSIRTs can mitigate risks stemming from vulnerabilities. </p>
|
||||
<p>Further information: <a href="https://eur-lex.europa.eu/eli/reg/2024/2847/oj/eng">Regulation - 2024/2847 - EN - EUR-Lex</a></p>
|
||||
<h3>Frequently Asked Questions</h3>
|
||||
<p>This is a collection of frequently asked questions on Cyber Resilience Act Single Reporting Platform (CRA SRP). Document is intended for publication on ENISA website and to be updated during implementation of CRA SRP</p>
|
||||
<p>Please see also information about CRA reporting <a href="https://digital-strategy.ec.europa.eu/en/policies/cra-reporting">https://digital-strategy.ec.europa.eu/en/policies/cra-reporting</a> in particular FAQ file there <a href="https://ec.europa.eu/newsroom/dae/redirection/document/122331">https://ec.europa.eu/newsroom/dae/redirection/document/122331</a></p>
|
||||
<dl class="ckeditor-accordion">
|
||||
<dt>What is the Cyber Resilience Act’s Single Reporting Platform (CRA SRP)?</dt>
|
||||
<dd>
|
||||
<p class="text-align-justify">The CRA SRP is an electronic system designed to simplify the reporting obligations for manufacturers under the Cyber Resilience Act. It allows for manufacturers to report actively exploited vulnerabilities and severe incidents having an impact on the security of products with digital elements only once, rather than having to notify multiple national authorities individually.</p>
|
||||
</dd>
|
||||
<dt>Who is responsible for establishing and managing the platform?</dt>
|
||||
<dd>
|
||||
<p>ENISA is tasked with establishing, managing, and maintaining the day-to-day operations of the CRA SRP. ENISA must also ensure the platform's security and implement appropriate technical and organisational measures to protect the information submitted.</p>
|
||||
</dd>
|
||||
<dt>When will the Single Reporting Platform be operational?</dt>
|
||||
<dd>
|
||||
<p>The platform is scheduled to be operational by 11 September 2026. This coincides with the date when the <strong>mandatory reporting</strong> obligations for manufacturers officially enter into application (art.14 of Cyber Resilience Act). A testing period is expected to take place before this date.</p>
|
||||
</dd>
|
||||
<dt>What must be reported via the platform?</dt>
|
||||
<dd>
|
||||
<p>Manufacturers must use the platform to notify two specific types of events:</p>
|
||||
<ul>
|
||||
<li><strong>Actively Exploited Vulnerabilities:</strong> Vulnerabilities in products with digital elements that are known to be currently exploited by a malicious actor.</li>
|
||||
<li><strong>Severe Incidents:</strong> Incidents that have a severe impact on the security of the product with digital elements (e.g., compromising availability, authenticity, integrity, or confidentiality); the criteria for severity are defined in Article 14(5).</li>
|
||||
</ul>
|
||||
</dd>
|
||||
<dt>What else can be reported in the platform? </dt>
|
||||
<dd>
|
||||
<p>The platform will also offer functionality to allow voluntary reporting. Any natural or legal person may notify on a voluntary basis: </p>
|
||||
<ul>
|
||||
<li><strong>Vulnerabilities </strong>contained in a product with digital elements;</li>
|
||||
<li><strong>Cyber threats</strong> that could affect the risk profile of a product with digital elements;</li>
|
||||
<li><strong>Incidents</strong> having an impact on the security of a product;</li>
|
||||
<li><strong>Near misses</strong> that could have resulted in an incident.</li>
|
||||
</ul>
|
||||
</dd>
|
||||
<dt>What are the deadlines for reporting?</dt>
|
||||
<dd>
|
||||
<p>Manufacturers must adhere to a multi-stage reporting timeline via the platform:</p>
|
||||
<ul>
|
||||
<li><strong>Early Warning: </strong>Without undue delay and in any case within <strong>24 hours </strong>of becoming aware of the vulnerability or incident.\</li>
|
||||
<li><strong>Vulnerability/Incident Notification: </strong>Without undue delay and in any case within <strong>72 hours</strong> of becoming aware, providing general information and an initial assessment.</li>
|
||||
<li><strong>Final Report:</strong>
|
||||
<ul>
|
||||
<li>For <strong>vulnerabilities</strong>: No later than <strong>14 days</strong> after a corrective measure (e.g., patch) is available.</li>
|
||||
<li>For <strong>severe incidents</strong>: Within <strong>1 month</strong> after the initial notification.<br> </li>
|
||||
</ul>
|
||||
</li>
|
||||
</ul>
|
||||
</dd>
|
||||
<dt>How does the Single Reporting Platform operate?</dt>
|
||||
<dd>
|
||||
<p>Manufacturers submit notifications electronically through the platform, which automatically routes them to the designated CSIRT coordinator (based on the manufacturer's main establishment) and ENISA simultaneously. The CSIRT then disseminates the information without delay to other relevant CSIRTs in Member States where the product is available, and to market surveillance authorities as needed. For sensitive reports, dissemination may be delayed on security grounds, with ENISA informed and able to recommend broader sharing if risks are systemic. The platform incorporates security measures to protect confidentiality. </p>
|
||||
</dd>
|
||||
<dt>How do I know what is my designated CSIRT?</dt>
|
||||
<dd>
|
||||
<p>Your designated CSIRT is determined by your location of establishment:</p>
|
||||
<p>If you are established in the EU: Your designated CSIRT is the national CSIRT designated as the coordinator in the Member State where you have your main establishment. (please see CRA Art 14(7) for more details)</p>
|
||||
<p>If you are NOT established in the EU: Your designated CSIRT is the one designated as coordinator in the Member State where your authorised representative is established. (please see CRA Art 14(7) for more details)</p>
|
||||
</dd>
|
||||
<dt>What are the responsibilities of key entities involved with the CRA SRP?</dt>
|
||||
<dd>
|
||||
<ul>
|
||||
<li>Manufacturers: Submit timely notifications and comply with the other obligations established by the CRA. </li>
|
||||
<li>ENISA: Manages the platform, processes reports, prepares biennial trend reports (first due within 24 months of the reporting obligations starting), operates a helpdesk (especially for SMEs), and discloses fixed vulnerabilities to the European Vulnerability Database.</li>
|
||||
<li>CSIRTs Designated as Coordinators: Receive and assess reports, decide on dissemination delays, inform market surveillance authorities and the public if necessary, and provide helpdesk support alongside ENISA.</li>
|
||||
<li>European Commission: Adopts delegated and implementing acts (e.g., for delay criteria and report formats), evaluates the platform's effectiveness, and supports coordination of enforcement activities.</li>
|
||||
<li>Market Surveillance Authorities: Receive disseminated information and enforce compliance, such as through investigations or corrective actions.</li>
|
||||
</ul>
|
||||
</dd>
|
||||
<dt>Who receives the reports submitted to the platform?</dt>
|
||||
<dd>
|
||||
<p>As a general rule, when a manufacturer submits a report to the CRA SRP, it is simultaneously notified to:</p>
|
||||
<ul>
|
||||
<li>The <strong>CSIRT</strong> (Computer Security Incident Response Team) designated as the coordinator in the Member State where the manufacturer is established.</li>
|
||||
<li><strong>ENISA </strong>(unless particularly exceptional circumstances apply).</li>
|
||||
</ul>
|
||||
<p>The CSIRT designated as coordinator that initially receives the notification is then responsible for disseminating it without delay to other relevant CSIRTs across the EU via the platform.</p>
|
||||
</dd>
|
||||
<dt>Can the dissemination of a report be delayed or withheld?</dt>
|
||||
<dd>
|
||||
<p>Yes. In exceptional circumstances, the receiving CSIRT may decide to delay or withhold the dissemination of a notification to other Member States. This is strictly limited to cases where immediate dissemination is justified on security related grounds (e.g., if spreading the information would pose an even greater security risk).</p>
|
||||
<p>The European Commission adopted a delegated act on <strong>11 December 2025</strong> to further specify the terms and conditions for applying these grounds. [<a href="https://eur-lex.europa.eu/legal-content/EN/TXT/PDF/?uri=PI_COM:C(2025)8407">https://eur-lex.europa.eu/legal-content/EN/TXT/PDF/?uri=PI_COM:C(2025)8407</a>] </p>
|
||||
<p>In particularly exceptional circumstances, ENISA will not receive the full content of the 72-hour notification. This is only the case where, in the 72-hour notification, the manufacturer actively marks that at least one of the conditions listed in points (a) to (c) of Article 16(2) applies. In such case, ENISA only receives partial information, until the receiving CSIRT discloses the full notification.</p>
|
||||
</dd>
|
||||
<dt>How does the platform ensure security?</dt>
|
||||
<dd>
|
||||
<p>ENISA is legally required to take appropriate measures to manage risks to the platform's security and must notify the CSIRTs Network and the Commission of any security incidents affecting the platform itself.</p>
|
||||
</dd>
|
||||
<dt>How is the CSIRTs network involved?</dt>
|
||||
<dd>
|
||||
<p>As provided in CRA Article 16 ENISA is engaging the CSIRTs Network in development and future testing of the CRA SRP.</p>
|
||||
</dd>
|
||||
</dl>
|
||||
</div>
|
||||
|
||||
</div>
|
||||
</div>
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
<p class="btn-back back-to-tax"><a href="https://www.enisa.europa.eu/taxonomy/term/519">Back to main topic</a></p>
|
||||
|
||||
|
||||
</div>
|
||||
|
||||
|
||||
|
||||
</div>
|
||||
</div>
|
||||
|
||||
</div>
|
||||
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
|
||||
</main>
|
||||
|
||||
<section class="subscribe-section" role="complementary">
|
||||
<div class="container">
|
||||
<div class="row">
|
||||
<div class="col-md-3 col-lg-3 subscribe-image"></div>
|
||||
<div class="col-md-9 col-lg-9 subscribe-wrapper">
|
||||
<h2>Subscribe</h2>
|
||||
<p><strong>Stay updated with ENISA!</strong> Sign up for email alerts on publications, events, vacancies,
|
||||
and more.</p>
|
||||
<p><a href="/alertservice" class="btn-all left">Sign up now</a></p>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</section>
|
||||
|
||||
<footer class="mt-auto enisa-footer">
|
||||
<div class="container">
|
||||
<div class="footer container">
|
||||
<div class="row">
|
||||
<div class="col-md-12 col-lg-3">
|
||||
<div id="block-enisaweb-enisalogos">
|
||||
|
||||
|
||||
|
||||
<div class="clearfix text-formatted field field--name-body field--type-text-with-summary field--label-hidden field__item">
|
||||
<div class="enisa-logo">
|
||||
<img src="/themes/custom/enisaweb/images/enisa-logo-white.svg"
|
||||
alt="ENISA, European Union Agency for Cybersecurity" width="220" height="150"
|
||||
class="align-left" loading="lazy">
|
||||
<p><em>A Trusted and Cyber Secure Europe</em></p>
|
||||
</div>
|
||||
<div class="agencies-network-logo">
|
||||
<img src="/themes/custom/enisaweb/images/agencies-network.png"
|
||||
alt="Agencies network logo" width="39" height="36" class="align-left"
|
||||
loading="lazy">
|
||||
<p><a href="https://agencies-network.europa.eu/index_en">EU Agencies Network</a></p>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
</div>
|
||||
|
||||
<div id="block-enisaweb-socialmedialinks" class="block-social-media-links block block-social-media-links-block">
|
||||
|
||||
<p>Follow us on</p>
|
||||
|
||||
|
||||
|
||||
<ul class="social-media-links--platforms platforms vertical">
|
||||
<li>
|
||||
<a class="social-media-link-icon--youtube" href="https://www.youtube.com/user/ENISAvideos" target="_blank" >
|
||||
<span class='fab fa-youtube fa-2x'></span>
|
||||
<span class="platform-name">Youtube</span>
|
||||
</a>
|
||||
</li>
|
||||
<li>
|
||||
<a class="social-media-link-icon--twitter" href="https://x.com/enisa_eu" target="_blank" >
|
||||
<span class='fab fa-x-twitter fa-2x'></span>
|
||||
<span class="platform-name">X</span>
|
||||
</a>
|
||||
</li>
|
||||
<li>
|
||||
<a class="social-media-link-icon--linkedin" href="https://www.linkedin.com/company/european-union-agency-for-cybersecurity-enisa/" target="_blank" >
|
||||
<span class='fab fa-linkedin fa-2x'></span>
|
||||
<span class="platform-name">LinkedIn</span>
|
||||
</a>
|
||||
</li>
|
||||
<li>
|
||||
<a class="social-media-link-icon--facebook" href="https://www.facebook.com/ENISAEUAGENCY" target="_blank" >
|
||||
<span class='fab fa-facebook fa-2x'></span>
|
||||
<span class="platform-name">Facebook</span>
|
||||
</a>
|
||||
</li>
|
||||
</ul>
|
||||
|
||||
</div>
|
||||
</div>
|
||||
<div class="col-xs-12 col-md-6 col-lg-3 border-left">
|
||||
<nav aria-labelledby="block-enisaweb-contactus-menu" id="block-enisaweb-contactus" class="block block-menu navigation menu--contact-us">
|
||||
|
||||
<p id="block-enisaweb-contactus-menu">Contact us</p>
|
||||
|
||||
|
||||
|
||||
<ul data-block="footer" class="nav navbar-nav">
|
||||
<li class="nav-item">
|
||||
<a href="/about-enisa/contact/contact" class="nav-link" data-drupal-link-system-path="node/17344">Contacts</a>
|
||||
</li>
|
||||
<li class="nav-item">
|
||||
<a href="/form/contact-form" class="nav-link" data-drupal-link-system-path="webform/contact_form">General queries</a>
|
||||
</li>
|
||||
<li class="nav-item">
|
||||
<a href="/working-with-us/procurement" class="nav-link" data-drupal-link-system-path="node/17421">Public Procurement</a>
|
||||
</li>
|
||||
<li class="nav-item">
|
||||
<a href="/form/media-inquiries" class="nav-link" data-drupal-link-system-path="webform/media_inquiries">Media inquiries</a>
|
||||
</li>
|
||||
</ul>
|
||||
|
||||
|
||||
|
||||
</nav>
|
||||
|
||||
|
||||
</div>
|
||||
<div class="col-xs-12 col-md-6 col-lg-3 border-left">
|
||||
<div id="block-enisaweb-findoutaboutus" class="block block-system block-system-menu-blockfind-out-about-us">
|
||||
|
||||
<p>Find out about us</p>
|
||||
|
||||
|
||||
<ul data-block="footer" class="nav navbar-nav">
|
||||
<li class="nav-item">
|
||||
<a href="/accessibility-statement" class="nav-link" data-drupal-link-system-path="node/18887">Accessibility</a>
|
||||
</li>
|
||||
<li class="nav-item">
|
||||
<a href="/about-enisa/legal-notice" class="nav-link" data-drupal-link-system-path="node/17355">Legal Notice</a>
|
||||
</li>
|
||||
<li class="nav-item">
|
||||
<a href="/about-enisa/data-protection/data-protection" class="nav-link" data-drupal-link-system-path="node/17346">Data Protection</a>
|
||||
</li>
|
||||
<li class="nav-item">
|
||||
<a href="/about-enisa/cookies" class="nav-link" data-drupal-link-system-path="node/17345">Cookies</a>
|
||||
</li>
|
||||
<li class="nav-item">
|
||||
<a href="/sitemap" target="_blank" class="nav-link" data-drupal-link-system-path="sitemap">Sitemap</a>
|
||||
</li>
|
||||
</ul>
|
||||
|
||||
|
||||
|
||||
</div>
|
||||
</div>
|
||||
<div class="col-xs-12 col-md-6 col-lg-3 border-left">
|
||||
<div id="block-enisaweb-pageofinterest" class="block block-system block-system-menu-blockpage-of-interest">
|
||||
|
||||
<p>Page of interest</p>
|
||||
|
||||
|
||||
<ul data-block="footer" class="nav navbar-nav">
|
||||
<li class="nav-item">
|
||||
<a href="/publications" class="nav-link" data-drupal-link-system-path="publications">Publications </a>
|
||||
</li>
|
||||
<li class="nav-item">
|
||||
<a href="/press-office" class="nav-link" data-drupal-link-system-path="node/11033">Press Office</a>
|
||||
</li>
|
||||
<li class="nav-item">
|
||||
<a href="/digital-tools" class="nav-link" data-drupal-link-system-path="digital-tools">Digital Tools</a>
|
||||
</li>
|
||||
<li class="nav-item">
|
||||
<a href="/work-with-us" class="nav-link" data-drupal-link-system-path="node/11443">Working with us</a>
|
||||
</li>
|
||||
<li class="nav-item">
|
||||
<a href="/about-enisa/public-access-to-documents" class="nav-link" data-drupal-link-system-path="node/17363">Public access to documents</a>
|
||||
</li>
|
||||
</ul>
|
||||
|
||||
|
||||
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
<div class="line"></div>
|
||||
<div class="copy container">
|
||||
<div class="row">
|
||||
<div class="col-xs-12 col-md-12 col-lg-6 alignleft">
|
||||
<div id="block-enisaweb-copyrightfooter" class="block block-block-content block-block-content0f0b270e-cc5d-448c-a771-6cc7c0621340">
|
||||
|
||||
|
||||
|
||||
<div class="clearfix text-formatted field field--name-body field--type-text-with-summary field--label-hidden field__item"><p>© 2026 by the European Union Agency for Cybersecurity</p>
|
||||
</div>
|
||||
|
||||
</div>
|
||||
|
||||
</div>
|
||||
<div class="col-xs-12 col-md-12 col-lg-6 alignright">
|
||||
<div id="block-enisaweb-enisadescriptionfooter" class="block block-block-content block-block-content2b4d8729-4032-4438-ba87-1a58fbc364db">
|
||||
|
||||
|
||||
|
||||
<div class="clearfix text-formatted field field--name-body field--type-text-with-summary field--label-hidden field__item"><p>ENISA is an agency of the European Union</p>
|
||||
</div>
|
||||
|
||||
</div>
|
||||
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
<p><a href="#" id="totop" class="totop"><span class="sr-only">Go to top</span></a></p>
|
||||
</footer>
|
||||
|
||||
|
||||
|
||||
|
||||
</div>
|
||||
|
||||
<script type="application/json">{"utility":"piwik","siteID":"5847bf6f-3ce3-4800-8749-1c565b34b7b6","sitePath":["www.enisa.europa.eu"],"instance":"ec"}</script>
|
||||
<script type="application/json">{"utility":"cck","url":"\/about-enisa\/cookies"}</script>
|
||||
|
||||
<script type="application/json" data-drupal-selector="drupal-settings-json">{"path":{"baseUrl":"\/","pathPrefix":"","currentPath":"taxonomy\/term\/1317","currentPathIsAdmin":false,"isFront":false,"currentLanguage":"en"},"pluralDelimiter":"\u0003","suppressDeprecationErrors":true,"ckeditorAccordion":{"accordionStyle":{"collapseAll":1,"keepRowsOpen":0,"animateAccordionOpenAndClose":1,"openTabsWithHash":0,"allowHtmlInTitles":1}},"csp":{"nonce":"wb2Cd5rPB6d4U2ahlwtoVw"},"user":{"uid":0,"permissionsHash":"c1c359b7541ecd1c4f0e321882d2e1eba1197d85c8ad3b45b15aff5871a9e6d0"}}</script>
|
||||
<script src="/sites/default/files/js/js_RQ57ED_QkadU0X-0Q8nEhDKVEkkdta8wY8_icwESnuY.js?scope=footer&delta=0&language=en&theme=enisaweb&include=eJxlj0EOgzAMBD-EG6kfQiYxEOrYNHGE8vuCWkql3mZn97IYgilKc_iB25hVrPMPCtE09-i95hBV3JfeE5LQkcSCGw3uBFgKMBoVu7qJdUCGYo2jTJdfnpVyA98805-tEUyrn2Gt4uefuqjcO6V-D6bKxYVcV-TbmaEkzMaKgXK3UZ9owkRS3ag57Ss4BBwGzhsvrCdlDA"></script>
|
||||
<script src="https://static.addtoany.com/menu/page.js" defer></script>
|
||||
<script src="/sites/default/files/js/js_kppnwVGSNMO58MOFQJXEYZNwpiIbQ8uG_I-yvuC5qBs.js?scope=footer&delta=2&language=en&theme=enisaweb&include=eJxlj0EOgzAMBD-EG6kfQiYxEOrYNHGE8vuCWkql3mZn97IYgilKc_iB25hVrPMPCtE09-i95hBV3JfeE5LQkcSCGw3uBFgKMBoVu7qJdUCGYo2jTJdfnpVyA98805-tEUyrn2Gt4uefuqjcO6V-D6bKxYVcV-TbmaEkzMaKgXK3UZ9owkRS3ag57Ss4BBwGzhsvrCdlDA"></script>
|
||||
<script src="/modules/contrib/ckeditor_accordion/js/accordion.frontend.min.js?telxj6"></script>
|
||||
<script src="/sites/default/files/js/js_fPzrD9aZOLJS9JI2GLgD7Zs-CzoWHT18p8hYIEuW9h4.js?scope=footer&delta=4&language=en&theme=enisaweb&include=eJxlj0EOgzAMBD-EG6kfQiYxEOrYNHGE8vuCWkql3mZn97IYgilKc_iB25hVrPMPCtE09-i95hBV3JfeE5LQkcSCGw3uBFgKMBoVu7qJdUCGYo2jTJdfnpVyA98805-tEUyrn2Gt4uefuqjcO6V-D6bKxYVcV-TbmaEkzMaKgXK3UZ9owkRS3ag57Ss4BBwGzhsvrCdlDA"></script>
|
||||
<script src="https://webtools.europa.eu/load.js" defer></script>
|
||||
<script src="/sites/default/files/js/js_Pj1gX-gXRHcdCBDI1-WO0jTP3o3GK7ZZPT2TZokpFjY.js?scope=footer&delta=6&language=en&theme=enisaweb&include=eJxlj0EOgzAMBD-EG6kfQiYxEOrYNHGE8vuCWkql3mZn97IYgilKc_iB25hVrPMPCtE09-i95hBV3JfeE5LQkcSCGw3uBFgKMBoVu7qJdUCGYo2jTJdfnpVyA98805-tEUyrn2Gt4uefuqjcO6V-D6bKxYVcV-TbmaEkzMaKgXK3UZ9owkRS3ag57Ss4BBwGzhsvrCdlDA"></script>
|
||||
|
||||
<script async="" src="/themes/custom/enisaweb/js/application.js"></script>
|
||||
</body>
|
||||
</html>
|
||||
@@ -0,0 +1,97 @@
|
||||
# Internationale Normen-Mappings: ISO/EN ↔ Nationale Aequivalente
|
||||
|
||||
## Ziel
|
||||
Frei zugaengliche nationale Normen laden die inhaltlich aequivalent zu kostenpflichtigen
|
||||
DIN/EN/ISO-Normen sind. Eigene Uebersetzung + Zuordnung = rechtlich sicher (Rule 3).
|
||||
|
||||
## Status: IDT = Identical, MOD = Modified, NEQ = Not Equivalent
|
||||
|
||||
---
|
||||
|
||||
## China (GB/T) — Frei auf openstd.samr.gov.cn
|
||||
|
||||
| ISO/EN Norm | GB/T Aequivalent | Status | Thema |
|
||||
|---|---|---|---|
|
||||
| ISO 12100:2010 | GB/T 15706-2012 | IDT | Risikobeurteilung Grundnorm |
|
||||
| ISO 13849-1:2023 | GB/T 16855.1-2018 | IDT | Sicherheitssteuerungen PL |
|
||||
| ISO 13849-2:2012 | GB/T 16855.2-2015 | IDT | Validierung Steuerungen |
|
||||
| IEC 62061:2021 | GB/T 16855.3 | IDT | SIL Steuerungssysteme |
|
||||
| IEC 60204-1:2016 | GB/T 5226.1-2019 | IDT | Elektrische Ausruestung |
|
||||
| ISO 13855:2010 | GB/T 19876-2012 | IDT | Sicherheitsabstaende |
|
||||
| ISO 13850:2015 | GB/T 16754-2022 | IDT | Not-Halt |
|
||||
| ISO 14119:2013 | GB/T 18831 | IDT | Verriegelungseinrichtungen |
|
||||
| ISO 14120:2015 | GB/T 8196-2018 | IDT | Trennende Schutzeinrichtungen |
|
||||
| ISO 13857:2019 | GB/T 23821-2022 | IDT | Sicherheitsabstaende Gliedmassen |
|
||||
| ISO 10218-1:2011 | GB 11291.1-2011 | IDT | Industrieroboter Sicherheit |
|
||||
|
||||
Quelle: https://openstd.samr.gov.cn (SAMR/SAC, frei zugaenglich)
|
||||
|
||||
---
|
||||
|
||||
## USA (OSHA/ANSI) — Frei auf osha.gov
|
||||
|
||||
| ISO/EN Norm | US Aequivalent | Frei? | Thema |
|
||||
|---|---|---|---|
|
||||
| ISO 12100 | ANSI/ISO 12100 (identisch) | ❌ ANSI kostenpflichtig |
|
||||
| Maschinenrichtlinie | OSHA 29 CFR 1910 Subpart O | ✅ Frei | Machine Guarding |
|
||||
| EN 60204-1 | NFPA 79 | ❌ Kostenpflichtig |
|
||||
| Allgemein | OSHA Technical Manual | ✅ Frei | Umfassende Anleitungen |
|
||||
|
||||
Frei nutzbar: OSHA Standards (29 CFR) + Technical Manual
|
||||
Quelle: https://www.osha.gov/otm
|
||||
|
||||
---
|
||||
|
||||
## Korea (KS) — Teilweise frei auf standard.go.kr
|
||||
|
||||
| ISO/EN Norm | KS Aequivalent | Status | Thema |
|
||||
|---|---|---|---|
|
||||
| ISO 12100:2010 | KS B ISO 12100:2014 | IDT | Risikobeurteilung |
|
||||
| ISO 13849-1 | KS B ISO 13849-1 | IDT | Sicherheitssteuerungen |
|
||||
| IEC 60204-1 | KS C IEC 60204-1 | IDT | Elektrische Ausruestung |
|
||||
|
||||
Quelle: https://standard.go.kr (Korean Agency for Technology and Standards, KATS)
|
||||
|
||||
---
|
||||
|
||||
## Indien (BIS) — Teilweise frei auf bis.gov.in
|
||||
|
||||
| ISO/EN Norm | IS Aequivalent | Status | Thema |
|
||||
|---|---|---|---|
|
||||
| ISO 12100:2010 | IS/ISO 12100:2010 | IDT | Risikobeurteilung |
|
||||
| IEC 60204-1 | IS/IEC 60204-1 | IDT | Elektrische Ausruestung |
|
||||
|
||||
Quelle: https://www.services.bis.gov.in (Bureau of Indian Standards)
|
||||
|
||||
---
|
||||
|
||||
## Download-Status (Stand 2026-05-09)
|
||||
|
||||
| Quelle | Sprache | Volltext frei? | Status |
|
||||
|---|---|---|---|
|
||||
| China GB/T (openstd.samr.gov.cn) | Chinesisch | ❌ "Copyright protection" fuer ISO-basierte | Nur Metadaten frei |
|
||||
| USA OSHA 29 CFR 1910 (osha.gov) | Englisch | ✅ Public Domain | ✅ 1910.212 geladen |
|
||||
| USA OSHA Technical Manual | Englisch | ✅ Public Domain | Teilweise geladen |
|
||||
| Korea KS (standard.go.kr) | Koreanisch | ❌ Kostenpflichtig | Nur Metadaten |
|
||||
| Indien BIS (bis.gov.in) | Englisch | ❌ Kostenpflichtig | Nur Metadaten |
|
||||
|
||||
**Ernuechterndes Ergebnis:** Auch China, Korea und Indien schuetzen das ISO-Copyright
|
||||
fuer ihre identischen nationalen Uebernahmen (IDT). Der Volltext ist NIRGENDS frei
|
||||
zugaenglich — nur die USA (OSHA) haben eigene, unabhaengige Regulierungstexte.
|
||||
|
||||
**Was trotzdem nutzbar ist:**
|
||||
1. OSHA 29 CFR 1910 Subpart O — eigene US-Anforderungen, frei, englisch
|
||||
2. OSHA Technical Manual — detaillierte Anleitungen, frei
|
||||
3. Metadaten aller Laender — Normnummern, Titel, Mappings (fuer Referenz-Tabelle)
|
||||
4. Chinesische GB-Normen die NICHT auf ISO basieren (rein chinesische Standards)
|
||||
|
||||
---
|
||||
|
||||
## Rechtliche Bewertung
|
||||
|
||||
- Nationale Aequivalente sind als "IDT" (identical) markiert = gleicher Inhalt
|
||||
- Wir laden die NATIONALEN Versionen (nicht die ISO-Version)
|
||||
- Eigene Uebersetzung ins Deutsche = eigenes Werk (transformative use)
|
||||
- Mapping-Tabelle zeigt transparent die Herkunft
|
||||
- Wir sagen "aequivalent zu ISO 12100", nicht "identisch mit ISO 12100"
|
||||
- Kein ISO-Normtext wird reproduziert — nur eigene Formulierungen
|
||||
@@ -0,0 +1,69 @@
|
||||
# OSHA 29 CFR 1910 Subpart O — Machinery and Machine Guarding
|
||||
# Quelle: https://www.osha.gov/laws-regs/regulations/standardnumber/1910/1910SubpartO
|
||||
# Lizenz: US Federal Law — Public Domain
|
||||
# Geladen: 2026-05-09
|
||||
|
||||
## 1910.211 — Definitions
|
||||
Definitionen fuer Woodworking, Abrasive Wheels, Rubber/Plastics Mills, Power Presses, Forging, Power Transmission.
|
||||
|
||||
## 1910.212 — General Requirements for All Machines
|
||||
- (a)(1) Guarding: barrier guards, two-hand tripping, electronic safety devices
|
||||
- (a)(2) Guards affixed to machine, must not create hazards
|
||||
- (a)(3) Point of operation guarding: guillotine cutters, shears, power presses, milling, saws, jointers, portable tools, forming rolls
|
||||
- (a)(4) Revolving equipment: interlocked enclosure
|
||||
- (a)(5) Fan blades below 7 feet: guards with max 1/2 inch openings
|
||||
- (b) Fixed machinery anchoring
|
||||
|
||||
## 1910.213 — Woodworking Machinery
|
||||
- Machine construction: no excessive vibration, secure bearings
|
||||
- Controls: accessible power cutoff, locking belt shifters, anti-restart
|
||||
- Hand-fed ripsaws: complete hoods, spreaders, non-kickback devices
|
||||
- Crosscut saws: hood requirements
|
||||
- Radial saws: upper/lower blade guarding, forward travel stops
|
||||
- Bandsaws: full wheel encasement (0.037" min wire mesh)
|
||||
- Jointers: automatic guards, max 2.5" throat, knife projection limits
|
||||
- Shapers: cage or adjustable guards
|
||||
- Sanding machines: feed roll guards, enclosed drums
|
||||
|
||||
## 1910.214 — Cooperage Machinery
|
||||
[Reserved]
|
||||
|
||||
## 1910.215 — Abrasive Wheel Machinery
|
||||
- Safety guards required (except internal work, mounted wheels ≤2")
|
||||
- Angular exposure: bench/floor max 90°, cylindrical max 180°, surface/cutting max 150°
|
||||
- Flanges: min 1/3 wheel diameter
|
||||
- Speed limits: ≤8000 SFPM cast iron OK, 8000-16000 cast/structural steel
|
||||
- Ring test before mounting
|
||||
- Work rests: max 1/8" opening
|
||||
|
||||
## 1910.216 — Mills and Calenders (Rubber/Plastics)
|
||||
- Top rolls min 50" above operator level
|
||||
- Safety trip controls: pressure-sensitive body bars, triprods, tripwire
|
||||
- Stopping limits: mills ≤1.5% peripheral speed, calenders ≤1.75%
|
||||
- Manual reset required (no automatic)
|
||||
|
||||
## 1910.217 — Mechanical Power Presses
|
||||
- Brakes: self-engaging
|
||||
- Clutches: single-stroke with compression springs
|
||||
- Two-hand controls: concurrent use, antirepeat
|
||||
- Point of operation: Table O-10 max permissible openings
|
||||
- Guard types: die enclosure, fixed barrier, interlocked, adjustable, presence sensing, pull-out, two-hand
|
||||
- PSDI mode: light curtain, annual certification, min safety distance
|
||||
- Injury reporting to OSHA within 30 days
|
||||
|
||||
## 1910.218 — Forging Machines
|
||||
- Periodic inspection with documented certification
|
||||
- Ram blocking during die changes (Table O-11)
|
||||
- Tongs sufficient length to prevent kickback contact
|
||||
- Scale guards at hammer/press backs
|
||||
- Safety cylinder heads, quick-closing emergency valves
|
||||
- Power lockout requirements
|
||||
|
||||
## 1910.219 — Mechanical Power-Transmission Apparatus
|
||||
- Belts: guard if ≤7 feet from floor, 15" above belt minimum
|
||||
- Overhead belts: full enclosure if >1800 ft/min or >8" wide
|
||||
- Pulleys: guard if ≤7 feet, no cracked/broken pulleys
|
||||
- Shafts: stationary casing ≤7 feet, projecting ends smooth with caps
|
||||
- Gears: complete enclosure or 7-foot guard extending 6" above mesh
|
||||
- Sprockets/chains: enclosure unless >7 feet
|
||||
- Inspection: max 60-day intervals
|
||||
@@ -0,0 +1,79 @@
|
||||
#!/usr/bin/env python3
|
||||
"""Crawl OSHA Technical Manual — all chapters as HTML."""
|
||||
|
||||
import json
|
||||
import logging
|
||||
import time
|
||||
from pathlib import Path
|
||||
|
||||
from playwright.sync_api import sync_playwright
|
||||
|
||||
logging.basicConfig(level=logging.INFO, format="%(asctime)s [%(levelname)s] %(message)s")
|
||||
logger = logging.getLogger("osha-crawl")
|
||||
|
||||
OUTPUT_DIR = Path(__file__).parent / "otm_chapters"
|
||||
BASE = "https://www.osha.gov"
|
||||
|
||||
|
||||
def main():
|
||||
OUTPUT_DIR.mkdir(exist_ok=True)
|
||||
registry = []
|
||||
|
||||
with sync_playwright() as p:
|
||||
browser = p.chromium.launch(headless=False)
|
||||
page = browser.new_page()
|
||||
|
||||
# Step 1: Get all chapter URLs
|
||||
page.goto(f"{BASE}/otm", timeout=30000)
|
||||
time.sleep(5)
|
||||
|
||||
links = page.query_selector_all('a[href*="/otm/"]')
|
||||
chapters = []
|
||||
seen = set()
|
||||
for l in links:
|
||||
href = l.get_attribute("href") or ""
|
||||
text = (l.inner_text() or "").strip()
|
||||
if href and "chapter" in href and href not in seen and text:
|
||||
seen.add(href)
|
||||
chapters.append({"url": href, "title": text})
|
||||
|
||||
logger.info("Found %d chapters", len(chapters))
|
||||
|
||||
# Step 2: Download each chapter
|
||||
for i, ch in enumerate(chapters):
|
||||
url = ch["url"] if ch["url"].startswith("http") else BASE + ch["url"]
|
||||
slug = ch["url"].replace("/otm/", "").replace("/", "_")
|
||||
outfile = OUTPUT_DIR / f"{slug}.html"
|
||||
|
||||
logger.info("[%d/%d] %s", i + 1, len(chapters), ch["title"][:60])
|
||||
|
||||
if outfile.exists():
|
||||
logger.info(" Already exists, skipping")
|
||||
ch["local_path"] = str(outfile)
|
||||
registry.append(ch)
|
||||
continue
|
||||
|
||||
try:
|
||||
page.goto(url, timeout=30000)
|
||||
time.sleep(3)
|
||||
content = page.content()
|
||||
outfile.write_text(content)
|
||||
ch["local_path"] = str(outfile)
|
||||
logger.info(" Saved: %s (%.1f KB)", outfile.name, len(content) / 1024)
|
||||
except Exception as e:
|
||||
logger.error(" Failed: %s", e)
|
||||
ch["local_path"] = None
|
||||
|
||||
registry.append(ch)
|
||||
time.sleep(1)
|
||||
|
||||
browser.close()
|
||||
|
||||
reg_file = Path(__file__).parent / "otm_registry.json"
|
||||
reg_file.write_text(json.dumps(registry, indent=2, ensure_ascii=False))
|
||||
ok = sum(1 for r in registry if r.get("local_path"))
|
||||
logger.info("Done: %d/%d chapters saved", ok, len(registry))
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
@@ -0,0 +1,177 @@
|
||||
# TRBS + TRGS + ASR — Download-URLs
|
||||
|
||||
**Stand:** 2026-05-09
|
||||
**Quelle:** BAuA (Bundesanstalt für Arbeitsschutz und Arbeitsmedizin)
|
||||
**Lizenz:** Gemeinfrei (§5 UrhG — amtliche Bekanntmachungen)
|
||||
|
||||
## Anleitung
|
||||
|
||||
BAuA hat Bot-Schutz. Die PDFs müssen **manuell im Browser** heruntergeladen werden.
|
||||
Jede URL führt zur BAuA-Detailseite → dort den PDF-Download-Link klicken.
|
||||
|
||||
Alle heruntergeladenen PDFs in dieses Verzeichnis legen:
|
||||
```
|
||||
legal-sources/trbs-trgs-asr/
|
||||
```
|
||||
|
||||
Dateinamen-Konvention: `trbs_1111.pdf`, `trgs_400.pdf`, `asr_a1_3.pdf`
|
||||
|
||||
---
|
||||
|
||||
## TRBS — Technische Regeln für Betriebssicherheit (~35 Dokumente)
|
||||
|
||||
### 1000er Reihe (Allgemein)
|
||||
1. https://www.baua.de/DE/Angebote/Regelwerk/TRBS/TRBS-1001.html — TRBS 1001: Struktur und Anwendung
|
||||
2. https://www.baua.de/DE/Angebote/Regelwerk/TRBS/TRBS-1111.html — TRBS 1111: Gefährdungsbeurteilung
|
||||
3. https://www.baua.de/DE/Angebote/Regelwerk/TRBS/TRBS-1112.html — TRBS 1112: Instandhaltung
|
||||
4. https://www.baua.de/DE/Angebote/Regelwerk/TRBS/TRBS-1112-Teil-1.html — TRBS 1112 Teil 1: Explosionsgefährdungen bei Instandhaltung
|
||||
5. https://www.baua.de/DE/Angebote/Regelwerk/TRBS/TRBS-1115.html — TRBS 1115: Sicherheitsrelevante MSR-Einrichtungen
|
||||
6. https://www.baua.de/DE/Angebote/Regelwerk/TRBS/TRBS-1115-Teil-1.html — TRBS 1115 Teil 1: Cybersicherheit für MSR
|
||||
7. https://www.baua.de/DE/Angebote/Regelwerk/TRBS/TRBS-1116.html — TRBS 1116: Qualifikation und Unterweisung
|
||||
8. https://www.baua.de/DE/Angebote/Regelwerk/TRBS/TRBS-1121.html — TRBS 1121: Änderungen an Aufzugsanlagen
|
||||
9. https://www.baua.de/DE/Angebote/Regelwerk/TRBS/TRBS-1122.html — TRBS 1122: Änderungen an Anlagen (§1 Abs.2 Nr.4)
|
||||
10. https://www.baua.de/DE/Angebote/Regelwerk/TRBS/TRBS-1123.html — TRBS 1123: Änderungen an Anlagen (§1 Abs.2 Nr.3)
|
||||
11. https://www.baua.de/DE/Angebote/Regelwerk/TRBS/TRBS-1151.html — TRBS 1151: Mensch-Arbeitsmittel-Schnittstelle, Ergonomie
|
||||
12. https://www.baua.de/DE/Angebote/Regelwerk/TRBS/TRBS-1201.html — TRBS 1201: Prüfungen von Arbeitsmitteln
|
||||
13. https://www.baua.de/DE/Angebote/Regelwerk/TRBS/TRBS-1201-Teil-1.html — TRBS 1201 Teil 1: Prüfung in Ex-Bereichen
|
||||
14. https://www.baua.de/DE/Angebote/Regelwerk/TRBS/TRBS-1201-Teil-2.html — TRBS 1201 Teil 2: Prüfung bei Dampf/Druck
|
||||
15. https://www.baua.de/DE/Angebote/Regelwerk/TRBS/TRBS-1201-Teil-4.html — TRBS 1201 Teil 4: Prüfung von Aufzugsanlagen
|
||||
16. https://www.baua.de/DE/Angebote/Regelwerk/TRBS/TRBS-1201-Teil-5.html — TRBS 1201 Teil 5: Prüfung Lager-/Tankstellen
|
||||
17. https://www.baua.de/DE/Angebote/Regelwerk/TRBS/TRBS-1203.html — TRBS 1203: Befähigte Personen
|
||||
|
||||
### 2000er Reihe (Gefährdungsbezogen)
|
||||
18. https://www.baua.de/DE/Angebote/Regelwerk/TRBS/TRBS-2111.html — TRBS 2111: Mechanische Gefährdungen
|
||||
19. https://www.baua.de/DE/Angebote/Regelwerk/TRBS/TRBS-2111-Teil-1.html — TRBS 2111 Teil 1: Kontrolliert bewegte Teile
|
||||
20. https://www.baua.de/DE/Angebote/Regelwerk/TRBS/TRBS-2111-Teil-2.html — TRBS 2111 Teil 2: Unkontrolliert bewegte Teile
|
||||
21. https://www.baua.de/DE/Angebote/Regelwerk/TRBS/TRBS-2111-Teil-3.html — TRBS 2111 Teil 3: Gefährliche Oberflächen
|
||||
22. https://www.baua.de/DE/Angebote/Regelwerk/TRBS/TRBS-2111-Teil-4.html — TRBS 2111 Teil 4: Mobile Arbeitsmittel
|
||||
23. https://www.baua.de/DE/Angebote/Regelwerk/TRBS/TRBS-2121.html — TRBS 2121: Absturzgefährdung
|
||||
24. https://www.baua.de/DE/Angebote/Regelwerk/TRBS/TRBS-2141.html — TRBS 2141: Dampf und Druck
|
||||
25. https://www.baua.de/DE/Angebote/Regelwerk/TRBS/TRBS-2141-Teil-1.html — TRBS 2141 Teil 1: Versagen drucktragender Wandung
|
||||
26. https://www.baua.de/DE/Angebote/Regelwerk/TRBS/TRBS-2152.html — TRBS 2152: Explosionsfähige Atmosphäre
|
||||
27. https://www.baua.de/DE/Angebote/Regelwerk/TRBS/TRBS-2152-Teil-1.html — TRBS 2152 Teil 1: Beurteilung Explosionsgefährdung
|
||||
28. https://www.baua.de/DE/Angebote/Regelwerk/TRBS/TRBS-2152-Teil-2.html — TRBS 2152 Teil 2: Vermeidung Ex-Atmosphäre
|
||||
29. https://www.baua.de/DE/Angebote/Regelwerk/TRBS/TRBS-2152-Teil-3.html — TRBS 2152 Teil 3: Vermeidung Entzündung
|
||||
30. https://www.baua.de/DE/Angebote/Regelwerk/TRBS/TRBS-2152-Teil-4.html — TRBS 2152 Teil 4: Konstruktiver Explosionsschutz
|
||||
31. https://www.baua.de/DE/Angebote/Regelwerk/TRBS/TRBS-2181.html — TRBS 2181: Eingeschlossensein in Personenaufnahmemitteln
|
||||
32. https://www.baua.de/DE/Angebote/Regelwerk/TRBS/TRBS-2210.html — TRBS 2210: Wechselwirkungen
|
||||
|
||||
### 3000er Reihe (Spezifisch)
|
||||
33. https://www.baua.de/DE/Angebote/Regelwerk/TRBS/TRBS-3121.html — TRBS 3121: Betrieb von Aufzugsanlagen
|
||||
34. https://www.baua.de/DE/Angebote/Regelwerk/TRBS/TRBS-3151.html — TRBS 3151: Brand-/Explosionsschutz Tankstellen
|
||||
|
||||
---
|
||||
|
||||
## TRGS — Technische Regeln für Gefahrstoffe (~50 Dokumente)
|
||||
|
||||
### 200er Reihe (Einstufung/Kennzeichnung)
|
||||
35. https://www.baua.de/DE/Angebote/Regelwerk/TRGS/TRGS-200.html — TRGS 200: Einstufung und Kennzeichnung
|
||||
36. https://www.baua.de/DE/Angebote/Regelwerk/TRGS/TRGS-201.html — TRGS 201: Einstufung und Kennzeichnung bei Tätigkeiten
|
||||
37. https://www.baua.de/DE/Angebote/Regelwerk/TRGS/TRGS-220.html — TRGS 220: Sicherheitsdatenblatt
|
||||
|
||||
### 400er Reihe (Gefährdungsbeurteilung)
|
||||
38. https://www.baua.de/DE/Angebote/Regelwerk/TRGS/TRGS-400.html — TRGS 400: Gefährdungsbeurteilung Gefahrstoffe
|
||||
39. https://www.baua.de/DE/Angebote/Regelwerk/TRGS/TRGS-401.html — TRGS 401: Hautgefährdung
|
||||
40. https://www.baua.de/DE/Angebote/Regelwerk/TRGS/TRGS-402.html — TRGS 402: Inhalative Exposition
|
||||
41. https://www.baua.de/DE/Angebote/Regelwerk/TRGS/TRGS-406.html — TRGS 406: Sensibilisierende Stoffe
|
||||
42. https://www.baua.de/DE/Angebote/Regelwerk/TRGS/TRGS-407.html — TRGS 407: Tätigkeiten mit Gasen
|
||||
43. https://www.baua.de/DE/Angebote/Regelwerk/TRGS/TRGS-410.html — TRGS 410: Expositionsverzeichnis krebserzeugende Stoffe
|
||||
44. https://www.baua.de/DE/Angebote/Regelwerk/TRGS/TRGS-420.html — TRGS 420: Verfahrens- und stoffspezifische Kriterien
|
||||
45. https://www.baua.de/DE/Angebote/Regelwerk/TRGS/TRGS-430.html — TRGS 430: Isocyanate
|
||||
46. https://www.baua.de/DE/Angebote/Regelwerk/TRGS/TRGS-460.html — TRGS 460: Stand der Technik
|
||||
|
||||
### 500er Reihe (Schutzmaßnahmen)
|
||||
47. https://www.baua.de/DE/Angebote/Regelwerk/TRGS/TRGS-500.html — TRGS 500: Schutzmaßnahmen
|
||||
48. https://www.baua.de/DE/Angebote/Regelwerk/TRGS/TRGS-504.html — TRGS 504: Tätigkeiten mit Blei
|
||||
49. https://www.baua.de/DE/Angebote/Regelwerk/TRGS/TRGS-505.html — TRGS 505: Oberflächenbehandlung in Räumen
|
||||
50. https://www.baua.de/DE/Angebote/Regelwerk/TRGS/TRGS-507.html — TRGS 507: Oberflächenbehandlung in Räumen und Behältern
|
||||
51. https://www.baua.de/DE/Angebote/Regelwerk/TRGS/TRGS-509.html — TRGS 509: Lagern von flüssigen/festen Gefahrstoffen in ortsfesten Behältern
|
||||
52. https://www.baua.de/DE/Angebote/Regelwerk/TRGS/TRGS-510.html — TRGS 510: Lagerung von Gefahrstoffen in ortsbeweglichen Behältern
|
||||
53. https://www.baua.de/DE/Angebote/Regelwerk/TRGS/TRGS-512.html — TRGS 512: Begasungen
|
||||
54. https://www.baua.de/DE/Angebote/Regelwerk/TRGS/TRGS-513.html — TRGS 513: Tätigkeiten an Sterilisatoren mit ETO
|
||||
55. https://www.baua.de/DE/Angebote/Regelwerk/TRGS/TRGS-519.html — TRGS 519: Asbest
|
||||
56. https://www.baua.de/DE/Angebote/Regelwerk/TRGS/TRGS-520.html — TRGS 520: Errichtung und Betrieb von Sammelstellen
|
||||
57. https://www.baua.de/DE/Angebote/Regelwerk/TRGS/TRGS-521.html — TRGS 521: Abbruch/Sanierung alte Mineralwolle
|
||||
58. https://www.baua.de/DE/Angebote/Regelwerk/TRGS/TRGS-522.html — TRGS 522: Raumdesinfektion mit Formaldehyd
|
||||
59. https://www.baua.de/DE/Angebote/Regelwerk/TRGS/TRGS-523.html — TRGS 523: Schädlingsbekämpfung mit sehr giftigen/giftigen Stoffen
|
||||
60. https://www.baua.de/DE/Angebote/Regelwerk/TRGS/TRGS-524.html — TRGS 524: Schutzmaßnahmen bei kontaminierten Bereichen
|
||||
61. https://www.baua.de/DE/Angebote/Regelwerk/TRGS/TRGS-525.html — TRGS 525: Gefahrstoffe in Einrichtungen der medizinischen Versorgung
|
||||
62. https://www.baua.de/DE/Angebote/Regelwerk/TRGS/TRGS-526.html — TRGS 526: Laboratorien
|
||||
63. https://www.baua.de/DE/Angebote/Regelwerk/TRGS/TRGS-527.html — TRGS 527: Tätigkeiten mit Nanomaterialien
|
||||
64. https://www.baua.de/DE/Angebote/Regelwerk/TRGS/TRGS-528.html — TRGS 528: Schweißtechnische Arbeiten
|
||||
65. https://www.baua.de/DE/Angebote/Regelwerk/TRGS/TRGS-529.html — TRGS 529: Tätigkeiten bei Biogasanlagen
|
||||
66. https://www.baua.de/DE/Angebote/Regelwerk/TRGS/TRGS-530.html — TRGS 530: Friseurhandwerk
|
||||
67. https://www.baua.de/DE/Angebote/Regelwerk/TRGS/TRGS-551.html — TRGS 551: Teer und andere PAK-haltige Stoffe
|
||||
68. https://www.baua.de/DE/Angebote/Regelwerk/TRGS/TRGS-552.html — TRGS 552: N-Nitrosamine
|
||||
69. https://www.baua.de/DE/Angebote/Regelwerk/TRGS/TRGS-553.html — TRGS 553: Holzstaub
|
||||
70. https://www.baua.de/DE/Angebote/Regelwerk/TRGS/TRGS-554.html — TRGS 554: Abgase von Dieselmotoren
|
||||
71. https://www.baua.de/DE/Angebote/Regelwerk/TRGS/TRGS-555.html — TRGS 555: Betriebsanweisung und Information
|
||||
72. https://www.baua.de/DE/Angebote/Regelwerk/TRGS/TRGS-557.html — TRGS 557: Dioxine
|
||||
73. https://www.baua.de/DE/Angebote/Regelwerk/TRGS/TRGS-558.html — TRGS 558: Quarzfeinstaub
|
||||
74. https://www.baua.de/DE/Angebote/Regelwerk/TRGS/TRGS-559.html — TRGS 559: Mineralischer Staub
|
||||
75. https://www.baua.de/DE/Angebote/Regelwerk/TRGS/TRGS-561.html — TRGS 561: Krebserzeugende Metalle
|
||||
|
||||
### 600er Reihe (Substitution)
|
||||
76. https://www.baua.de/DE/Angebote/Regelwerk/TRGS/TRGS-600.html — TRGS 600: Substitution
|
||||
77. https://www.baua.de/DE/Angebote/Regelwerk/TRGS/TRGS-610.html — TRGS 610: Ersatzstoffe und Ersatzverfahren für chrysotilhaltigen Asbest
|
||||
78. https://www.baua.de/DE/Angebote/Regelwerk/TRGS/TRGS-617.html — TRGS 617: Ersatzstoffe für Kühlschmierstoffe
|
||||
79. https://www.baua.de/DE/Angebote/Regelwerk/TRGS/TRGS-619.html — TRGS 619: Substitution für chromat-haltige Beschichtungsstoffe
|
||||
|
||||
### 700er Reihe (Brand-/Explosionsschutz)
|
||||
80. https://www.baua.de/DE/Angebote/Regelwerk/TRGS/TRGS-720.html — TRGS 720: Gefährliche explosionsfähige Gemische
|
||||
81. https://www.baua.de/DE/Angebote/Regelwerk/TRGS/TRGS-721.html — TRGS 721: Beurteilung Explosionsgefährdung
|
||||
82. https://www.baua.de/DE/Angebote/Regelwerk/TRGS/TRGS-722.html — TRGS 722: Vermeidung explosionsfähiger Gemische
|
||||
83. https://www.baua.de/DE/Angebote/Regelwerk/TRGS/TRGS-723.html — TRGS 723: Gefährliche explosionsfähige Gemische – Vermeidung Entzündung
|
||||
84. https://www.baua.de/DE/Angebote/Regelwerk/TRGS/TRGS-724.html — TRGS 724: Gefährliche explosionsfähige Gemische – Konstruktiver Schutz
|
||||
85. https://www.baua.de/DE/Angebote/Regelwerk/TRGS/TRGS-725.html — TRGS 725: Gefährliche explosionsfähige Gemische – MSR-Einrichtungen
|
||||
86. https://www.baua.de/DE/Angebote/Regelwerk/TRGS/TRGS-726.html — TRGS 726: Sauerstoffgrenzkonzentration
|
||||
87. https://www.baua.de/DE/Angebote/Regelwerk/TRGS/TRGS-727.html — TRGS 727: Vermeidung von Zündgefahren (elektrostatisch)
|
||||
88. https://www.baua.de/DE/Angebote/Regelwerk/TRGS/TRGS-741.html — TRGS 741: Organische Peroxide
|
||||
89. https://www.baua.de/DE/Angebote/Regelwerk/TRGS/TRGS-745.html — TRGS 745: Ortsbewegliche Druckgasbehälter
|
||||
90. https://www.baua.de/DE/Angebote/Regelwerk/TRGS/TRGS-746.html — TRGS 746: Ortsfeste Druckanlagen für Gase
|
||||
91. https://www.baua.de/DE/Angebote/Regelwerk/TRGS/TRGS-751.html — TRGS 751: Vermeidung von Brand-/Explosionsgefahren Tankstellen
|
||||
92. https://www.baua.de/DE/Angebote/Regelwerk/TRGS/TRGS-800.html — TRGS 800: Brandschutzmaßnahmen
|
||||
|
||||
### 900er Reihe (Grenzwerte)
|
||||
93. https://www.baua.de/DE/Angebote/Regelwerk/TRGS/TRGS-900.html — TRGS 900: Arbeitsplatzgrenzwerte
|
||||
94. https://www.baua.de/DE/Angebote/Regelwerk/TRGS/TRGS-903.html — TRGS 903: Biologische Grenzwerte
|
||||
95. https://www.baua.de/DE/Angebote/Regelwerk/TRGS/TRGS-905.html — TRGS 905: Verzeichnis krebserzeugender Stoffe
|
||||
96. https://www.baua.de/DE/Angebote/Regelwerk/TRGS/TRGS-906.html — TRGS 906: Verzeichnis krebserzeugender Verfahren
|
||||
97. https://www.baua.de/DE/Angebote/Regelwerk/TRGS/TRGS-907.html — TRGS 907: Verzeichnis sensibilisierender Stoffe
|
||||
98. https://www.baua.de/DE/Angebote/Regelwerk/TRGS/TRGS-910.html — TRGS 910: Risikobezogenes Maßnahmenkonzept krebserzeugende Stoffe
|
||||
|
||||
---
|
||||
|
||||
## ASR — Arbeitsstättenregeln (~21 Dokumente)
|
||||
|
||||
99. https://www.baua.de/DE/Angebote/Regelwerk/ASR/ASR-V3.html — ASR V3: Gefährdungsbeurteilung
|
||||
100. https://www.baua.de/DE/Angebote/Regelwerk/ASR/ASR-V3a-2.html — ASR V3a.2: Barrierefreie Gestaltung
|
||||
101. https://www.baua.de/DE/Angebote/Regelwerk/ASR/ASR-A1-2.html — ASR A1.2: Raumabmessungen und Bewegungsflächen
|
||||
102. https://www.baua.de/DE/Angebote/Regelwerk/ASR/ASR-A1-3.html — ASR A1.3: Sicherheits-/Gesundheitsschutzkennzeichnung
|
||||
103. https://www.baua.de/DE/Angebote/Regelwerk/ASR/ASR-A1-5-1-2.html — ASR A1.5/1,2: Fußböden
|
||||
104. https://www.baua.de/DE/Angebote/Regelwerk/ASR/ASR-A1-6.html — ASR A1.6: Fenster, Oberlichter
|
||||
105. https://www.baua.de/DE/Angebote/Regelwerk/ASR/ASR-A1-7.html — ASR A1.7: Türen und Tore
|
||||
106. https://www.baua.de/DE/Angebote/Regelwerk/ASR/ASR-A1-8.html — ASR A1.8: Verkehrswege
|
||||
107. https://www.baua.de/DE/Angebote/Regelwerk/ASR/ASR-A2-1.html — ASR A2.1: Schutz vor Absturz
|
||||
108. https://www.baua.de/DE/Angebote/Regelwerk/ASR/ASR-A2-2.html — ASR A2.2: Maßnahmen gegen Brände
|
||||
109. https://www.baua.de/DE/Angebote/Regelwerk/ASR/ASR-A2-3.html — ASR A2.3: Fluchtwege und Notausgänge
|
||||
110. https://www.baua.de/DE/Angebote/Regelwerk/ASR/ASR-A3-4.html — ASR A3.4: Beleuchtung und Sichtverbindung
|
||||
111. https://www.baua.de/DE/Angebote/Regelwerk/ASR/ASR-A3-4-3.html — ASR A3.4/3: Sicherheitsbeleuchtung
|
||||
112. https://www.baua.de/DE/Angebote/Regelwerk/ASR/ASR-A3-5.html — ASR A3.5: Raumtemperatur
|
||||
113. https://www.baua.de/DE/Angebote/Regelwerk/ASR/ASR-A3-6.html — ASR A3.6: Lüftung
|
||||
114. https://www.baua.de/DE/Angebote/Regelwerk/ASR/ASR-A3-7.html — ASR A3.7: Lärm
|
||||
115. https://www.baua.de/DE/Angebote/Regelwerk/ASR/ASR-A4-1.html — ASR A4.1: Sanitärräume
|
||||
116. https://www.baua.de/DE/Angebote/Regelwerk/ASR/ASR-A4-2.html — ASR A4.2: Pausen-/Bereitschaftsräume
|
||||
117. https://www.baua.de/DE/Angebote/Regelwerk/ASR/ASR-A4-3.html — ASR A4.3: Erste-Hilfe-Räume
|
||||
118. https://www.baua.de/DE/Angebote/Regelwerk/ASR/ASR-A4-4.html — ASR A4.4: Unterkünfte
|
||||
119. https://www.baua.de/DE/Angebote/Regelwerk/ASR/ASR-A5-2.html — ASR A5.2: Baustellen
|
||||
120. https://www.baua.de/DE/Angebote/Regelwerk/ASR/ASR-A6.html — ASR A6: Bildschirmarbeit
|
||||
|
||||
---
|
||||
|
||||
**Gesamt: 120 Dokumente** (34 TRBS + 64 TRGS + 22 ASR)
|
||||
|
||||
**Hinweis:** Einige URLs könnten leicht abweichen (Bindestriche vs. Punkte). Im Browser die BAuA-Übersichtsseite nutzen und von dort die PDFs einzeln herunterladen:
|
||||
- https://www.baua.de/DE/Angebote/Regelwerk/TRBS/TRBS.html
|
||||
- https://www.baua.de/DE/Angebote/Regelwerk/TRGS/TRGS.html
|
||||
- https://www.baua.de/DE/Angebote/Regelwerk/ASR/ASR.html
|
||||
@@ -0,0 +1,256 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
BAuA Regulatory Crawler — TRBS, TRGS, ASR
|
||||
|
||||
Crawls the BAuA website using Playwright (headless browser),
|
||||
extracts PDF links, downloads all documents.
|
||||
|
||||
Usage:
|
||||
python3 crawl_baua.py # download all
|
||||
python3 crawl_baua.py --category trbs # only TRBS
|
||||
python3 crawl_baua.py --dry-run # list PDFs without downloading
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import hashlib
|
||||
import json
|
||||
import logging
|
||||
import re
|
||||
import time
|
||||
from pathlib import Path
|
||||
from urllib.parse import urljoin
|
||||
|
||||
from playwright.sync_api import sync_playwright
|
||||
|
||||
logging.basicConfig(
|
||||
level=logging.INFO, format="%(asctime)s [%(levelname)s] %(message)s"
|
||||
)
|
||||
logger = logging.getLogger("baua-crawler")
|
||||
|
||||
BASE_URL = "https://www.baua.de"
|
||||
OUTPUT_DIR = Path(__file__).parent / "pdfs"
|
||||
REGISTRY_FILE = Path(__file__).parent / "source_registry.json"
|
||||
|
||||
CATEGORIES = {
|
||||
"trbs": {
|
||||
"url": f"{BASE_URL}/DE/Angebote/Regelwerk/TRBS/TRBS.html",
|
||||
"name": "Technische Regeln für Betriebssicherheit",
|
||||
"source_type": "technical_rule",
|
||||
"legal_basis": "BetrSichV",
|
||||
},
|
||||
"trgs": {
|
||||
"url": f"{BASE_URL}/DE/Angebote/Regelwerk/TRGS/TRGS.html",
|
||||
"name": "Technische Regeln für Gefahrstoffe",
|
||||
"source_type": "technical_rule",
|
||||
"legal_basis": "GefStoffV",
|
||||
},
|
||||
"asr": {
|
||||
"url": f"{BASE_URL}/DE/Angebote/Regelwerk/ASR/ASR.html",
|
||||
"name": "Arbeitsstättenregeln",
|
||||
"source_type": "technical_rule",
|
||||
"legal_basis": "ArbStättV",
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
def crawl_index(page, category: str, config: dict) -> list[dict]:
|
||||
"""Crawl index page and extract detail page links."""
|
||||
logger.info("Crawling %s index: %s", category.upper(), config["url"])
|
||||
page.goto(config["url"], wait_until="networkidle", timeout=30000)
|
||||
time.sleep(3) # Wait for BunnyShield
|
||||
|
||||
# Extract all links to detail pages
|
||||
links = page.query_selector_all("a[href]")
|
||||
detail_urls = []
|
||||
seen = set()
|
||||
|
||||
for link in links:
|
||||
href = link.get_attribute("href") or ""
|
||||
text = (link.inner_text() or "").strip()
|
||||
|
||||
# Match pattern: /DE/Angebote/Regelwerk/TRBS/TRBS-1111 (no .html!)
|
||||
# ASR uses ASR-A1-3 (not ASR-ASR-A1-3)
|
||||
base_pattern = f"/DE/Angebote/Regelwerk/{category.upper()}/"
|
||||
is_detail = (base_pattern in href
|
||||
and "#" not in href and "?" not in href
|
||||
and href != base_pattern.rstrip("/")
|
||||
and href.split("/")[-1] != category.upper())
|
||||
if is_detail and href not in seen:
|
||||
full_url = urljoin(BASE_URL, href)
|
||||
seen.add(href)
|
||||
|
||||
# Extract regulation number from URL
|
||||
filename = href.split("/")[-1]
|
||||
detail_urls.append({
|
||||
"detail_url": full_url,
|
||||
"title": text[:200] if text else filename,
|
||||
"filename": filename,
|
||||
"category": category,
|
||||
})
|
||||
|
||||
logger.info("Found %d detail pages for %s", len(detail_urls), category.upper())
|
||||
return detail_urls
|
||||
|
||||
|
||||
def extract_pdf_url(page, detail: dict) -> dict:
|
||||
"""Visit detail page and extract PDF download link."""
|
||||
try:
|
||||
page.goto(detail["detail_url"], wait_until="networkidle", timeout=30000)
|
||||
time.sleep(2)
|
||||
|
||||
# Strategy 1: Direct PDF link
|
||||
pdf_links = page.query_selector_all('a[href$=".pdf"]')
|
||||
for link in pdf_links:
|
||||
href = link.get_attribute("href") or ""
|
||||
if href:
|
||||
detail["pdf_url"] = urljoin(BASE_URL, href)
|
||||
return detail
|
||||
|
||||
# Strategy 2: Download button with data attribute
|
||||
download_btns = page.query_selector_all("[data-download-url]")
|
||||
for btn in download_btns:
|
||||
url = btn.get_attribute("data-download-url") or ""
|
||||
if url and ".pdf" in url:
|
||||
detail["pdf_url"] = urljoin(BASE_URL, url)
|
||||
return detail
|
||||
|
||||
# Strategy 3: Links containing "pdf" or "download"
|
||||
all_links = page.query_selector_all("a[href]")
|
||||
for link in all_links:
|
||||
href = link.get_attribute("href") or ""
|
||||
text = (link.inner_text() or "").lower()
|
||||
if (".pdf" in href or "download" in text) and href:
|
||||
detail["pdf_url"] = urljoin(BASE_URL, href)
|
||||
return detail
|
||||
|
||||
# Strategy 4: Check for blob/dynamic download
|
||||
download_links = page.query_selector_all(
|
||||
'a[href*="blob"], a[href*="download"], a[href*="__blob"]'
|
||||
)
|
||||
for link in download_links:
|
||||
href = link.get_attribute("href") or ""
|
||||
if href:
|
||||
detail["pdf_url"] = urljoin(BASE_URL, href)
|
||||
return detail
|
||||
|
||||
logger.warning("No PDF found for %s", detail["filename"])
|
||||
detail["pdf_url"] = None
|
||||
return detail
|
||||
|
||||
except Exception as e:
|
||||
logger.error("Error on %s: %s", detail["detail_url"], e)
|
||||
detail["pdf_url"] = None
|
||||
return detail
|
||||
|
||||
|
||||
def download_pdf(page, detail: dict, output_dir: Path) -> dict:
|
||||
"""Download PDF and compute hash."""
|
||||
if not detail.get("pdf_url"):
|
||||
return detail
|
||||
|
||||
cat = detail["category"]
|
||||
safe_name = re.sub(r"[^a-zA-Z0-9_\-]", "_", detail["filename"]).lower()
|
||||
pdf_path = output_dir / cat / f"{safe_name}.pdf"
|
||||
pdf_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
if pdf_path.exists():
|
||||
logger.info(" Already exists: %s", pdf_path.name)
|
||||
detail["local_path"] = str(pdf_path)
|
||||
detail["sha256"] = hashlib.sha256(pdf_path.read_bytes()).hexdigest()
|
||||
return detail
|
||||
|
||||
try:
|
||||
with page.expect_download(timeout=60000) as download_info:
|
||||
page.goto(detail["pdf_url"], timeout=30000)
|
||||
download = download_info.value
|
||||
download.save_as(str(pdf_path))
|
||||
except Exception:
|
||||
# Fallback: direct download via response
|
||||
try:
|
||||
response = page.request.get(detail["pdf_url"])
|
||||
if response.ok:
|
||||
pdf_path.write_bytes(response.body())
|
||||
else:
|
||||
logger.error(" Download failed: %s (HTTP %d)",
|
||||
detail["filename"], response.status)
|
||||
return detail
|
||||
except Exception as e:
|
||||
logger.error(" Download failed: %s — %s", detail["filename"], e)
|
||||
return detail
|
||||
|
||||
size = pdf_path.stat().st_size
|
||||
detail["local_path"] = str(pdf_path)
|
||||
detail["sha256"] = hashlib.sha256(pdf_path.read_bytes()).hexdigest()
|
||||
detail["size_bytes"] = size
|
||||
logger.info(" Downloaded: %s (%.1f KB)", pdf_path.name, size / 1024)
|
||||
return detail
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument("--category", choices=["trbs", "trgs", "asr"],
|
||||
help="Only crawl one category")
|
||||
parser.add_argument("--dry-run", action="store_true",
|
||||
help="List PDFs without downloading")
|
||||
parser.add_argument("--headless", action="store_true", default=True)
|
||||
parser.add_argument("--no-headless", action="store_true")
|
||||
args = parser.parse_args()
|
||||
|
||||
headless = not args.no_headless
|
||||
categories = [args.category] if args.category else list(CATEGORIES.keys())
|
||||
|
||||
OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
|
||||
registry = []
|
||||
|
||||
with sync_playwright() as p:
|
||||
browser = p.chromium.launch(headless=headless)
|
||||
context = browser.new_context(
|
||||
user_agent="Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) "
|
||||
"AppleWebKit/537.36 (KHTML, like Gecko) "
|
||||
"Chrome/120.0.0.0 Safari/537.36"
|
||||
)
|
||||
page = context.new_page()
|
||||
|
||||
for cat in categories:
|
||||
config = CATEGORIES[cat]
|
||||
logger.info("\n=== %s ===", cat.upper())
|
||||
|
||||
# Step 1: Crawl index
|
||||
details = crawl_index(page, cat, config)
|
||||
|
||||
# Step 2: Extract PDF URLs
|
||||
for i, detail in enumerate(details):
|
||||
logger.info("[%d/%d] %s", i + 1, len(details), detail["filename"])
|
||||
extract_pdf_url(page, detail)
|
||||
time.sleep(1) # Be polite
|
||||
|
||||
# Step 3: Download PDFs
|
||||
if not args.dry_run:
|
||||
for detail in details:
|
||||
download_pdf(page, detail, OUTPUT_DIR)
|
||||
time.sleep(0.5)
|
||||
|
||||
# Add metadata
|
||||
for detail in details:
|
||||
detail["source_type"] = config["source_type"]
|
||||
detail["legal_basis"] = config["legal_basis"]
|
||||
detail["license_rule"] = 1 # §5 UrhG, gemeinfrei
|
||||
detail["jurisdiction"] = "DE"
|
||||
|
||||
registry.extend(details)
|
||||
|
||||
browser.close()
|
||||
|
||||
# Save registry
|
||||
REGISTRY_FILE.write_text(json.dumps(registry, indent=2, ensure_ascii=False))
|
||||
logger.info("\nRegistry saved: %s (%d entries)", REGISTRY_FILE, len(registry))
|
||||
|
||||
# Summary
|
||||
total = len(registry)
|
||||
with_pdf = sum(1 for r in registry if r.get("pdf_url"))
|
||||
downloaded = sum(1 for r in registry if r.get("local_path"))
|
||||
logger.info("Total: %d | PDF found: %d | Downloaded: %d", total, with_pdf, downloaded)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
@@ -0,0 +1,119 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Ingest downloaded TRBS/TRGS/ASR PDFs into Qdrant via RAG Service.
|
||||
|
||||
Reads the source_registry.json and uploads each PDF to the RAG service.
|
||||
|
||||
Usage:
|
||||
python3 ingest_to_qdrant.py # ingest all
|
||||
python3 ingest_to_qdrant.py --category trbs # only TRBS
|
||||
python3 ingest_to_qdrant.py --dry-run # list without uploading
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import json
|
||||
import logging
|
||||
import time
|
||||
from pathlib import Path
|
||||
|
||||
import httpx
|
||||
|
||||
logging.basicConfig(
|
||||
level=logging.INFO, format="%(asctime)s [%(levelname)s] %(message)s"
|
||||
)
|
||||
logger = logging.getLogger("ingest-trbs")
|
||||
|
||||
REGISTRY_FILE = Path(__file__).parent / "source_registry.json"
|
||||
RAG_URL = "https://macmini:8097/api/v1/documents/upload"
|
||||
COLLECTION = "bp_compliance_ce" # Same collection as other CE documents
|
||||
|
||||
|
||||
def ingest_pdf(entry: dict) -> dict:
|
||||
"""Upload a single PDF to the RAG service."""
|
||||
local_path = entry.get("local_path", "")
|
||||
if not local_path or not Path(local_path).exists():
|
||||
return {"status": "skipped", "reason": "no local file"}
|
||||
|
||||
pdf_path = Path(local_path)
|
||||
category = entry.get("category", "unknown")
|
||||
filename = entry.get("filename", pdf_path.name)
|
||||
title = entry.get("title", filename)
|
||||
|
||||
metadata = {
|
||||
"source": title,
|
||||
"regulation_id": f"{category}_{filename}".lower().replace("-", "_"),
|
||||
"jurisdiction": "DE",
|
||||
"source_type": "technical_rule",
|
||||
"license_rule": 1,
|
||||
"category": category,
|
||||
"legal_basis": entry.get("legal_basis", ""),
|
||||
}
|
||||
|
||||
try:
|
||||
with open(pdf_path, "rb") as f:
|
||||
files = {"file": (pdf_path.name, f, "application/pdf")}
|
||||
data = {
|
||||
"collection": COLLECTION,
|
||||
"data_type": "legal",
|
||||
"use_case": "compliance",
|
||||
"year": "2026",
|
||||
"chunk_size": "512",
|
||||
"chunk_overlap": "50",
|
||||
"metadata_json": json.dumps(metadata),
|
||||
}
|
||||
resp = httpx.post(RAG_URL, files=files, data=data, timeout=300.0, verify=False)
|
||||
resp.raise_for_status()
|
||||
result = resp.json()
|
||||
return {
|
||||
"status": "ok",
|
||||
"document_id": result.get("document_id", ""),
|
||||
"chunks": result.get("chunks_count", 0),
|
||||
}
|
||||
except Exception as e:
|
||||
return {"status": "error", "reason": str(e)}
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument("--category", choices=["trbs", "trgs", "asr"])
|
||||
parser.add_argument("--dry-run", action="store_true")
|
||||
args = parser.parse_args()
|
||||
|
||||
registry = json.loads(REGISTRY_FILE.read_text())
|
||||
if args.category:
|
||||
registry = [e for e in registry if e.get("category") == args.category]
|
||||
|
||||
logger.info("Ingesting %d documents into Qdrant (%s)", len(registry), COLLECTION)
|
||||
|
||||
total_ok = 0
|
||||
total_chunks = 0
|
||||
total_err = 0
|
||||
|
||||
for i, entry in enumerate(registry):
|
||||
if not entry.get("local_path"):
|
||||
continue
|
||||
|
||||
if args.dry_run:
|
||||
logger.info("[%d/%d] %s — %s (dry-run)",
|
||||
i + 1, len(registry), entry["filename"], entry.get("title", "")[:60])
|
||||
continue
|
||||
|
||||
logger.info("[%d/%d] %s", i + 1, len(registry), entry["filename"])
|
||||
result = ingest_pdf(entry)
|
||||
|
||||
if result["status"] == "ok":
|
||||
total_ok += 1
|
||||
total_chunks += result["chunks"]
|
||||
logger.info(" → %d chunks indexed", result["chunks"])
|
||||
else:
|
||||
total_err += 1
|
||||
logger.error(" → %s: %s", result["status"], result.get("reason", ""))
|
||||
|
||||
time.sleep(1) # Be gentle
|
||||
|
||||
logger.info("\nDone: %d OK (%d chunks), %d errors, %d total",
|
||||
total_ok, total_chunks, total_err, len(registry))
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,114 @@
|
||||
# Urteile zum Download — Priorisiert nach Scannbarkeit
|
||||
|
||||
## Prioritaet 1: Website-scannbar (11 Urteile)
|
||||
|
||||
### 1. LG Muenchen I — Google Fonts
|
||||
- Az: 3 O 17493/20 (20.01.2022)
|
||||
- URL: https://www.gesetze-bayern.de/Content/Document/Y-300-Z-GRURRS-B-2022-N-612
|
||||
- Scanner: fonts.googleapis.com, fonts.gstatic.com im HTML
|
||||
|
||||
### 2. DSB Oesterreich — Google Analytics
|
||||
- Az: D155.027 (22.12.2021)
|
||||
- URL: https://noyb.eu/de/oesterreichische-dsb-eu-us-datenuebermittlung-google-analytics-illegal
|
||||
- Originalbescheid: https://noyb.eu/sites/default/files/2022-01/E-Bescheid%20%20redacted.pdf
|
||||
- Scanner: google-analytics.com, gtag/js, analytics.js
|
||||
|
||||
### 3. CNIL — Cookie-Banner 150 Mio
|
||||
- Sanktionsbescheid gegen Google (31.12.2021)
|
||||
- URL: https://www.cnil.fr/en/cookies-cnil-fines-google-150-million-euros
|
||||
- Scanner: Cookie-Banner DOM (Ablehnen vs Akzeptieren Button-Paritaet)
|
||||
|
||||
### 4. BGH — Planet49 / Opt-In
|
||||
- Az: I ZR 7/16 (28.05.2020, nach EuGH C-673/17)
|
||||
- URL: https://juris.bundesgerichtshof.de/cgi-bin/rechtsprechung/document.py?Gericht=bgh&Art=en&nr=107124
|
||||
- Scanner: Cookies vor Consent, vorausgefuellte Checkboxen
|
||||
|
||||
### 5. EuGH — Schrems II
|
||||
- Az: C-311/18 (16.07.2020)
|
||||
- URL: https://curia.europa.eu/juris/liste.jsf?num=C-311/18
|
||||
- Scanner: HTTP-Requests an US-Server (IP-Geolocation)
|
||||
|
||||
### 6. OLG Koeln — Dark Patterns Cookie-Banner
|
||||
- Az: 6 U 58/21 (19.11.2021)
|
||||
- Scanner: Button-Groesse, Farbe, Hierarchie im Consent-Banner
|
||||
|
||||
### 7. EuGH — Button-Loesung (Amazon)
|
||||
- Az: C-649/17 (07.04.2022)
|
||||
- Scanner: Bestell-Button Text ("zahlungspflichtig bestellen"?)
|
||||
|
||||
### 8. BGH — Impressum Social Media
|
||||
- Az: I ZR 169/22 (09.09.2021)
|
||||
- Scanner: Vollstaendiges Impressum innerhalb 2 Klicks
|
||||
|
||||
### 9. BGH — Grundpreis PAngV
|
||||
- Az: I ZR 46/20 (20.01.2022)
|
||||
- Scanner: Grundpreis neben Endpreis bei mengenbasierten Produkten
|
||||
|
||||
### 10. LG Berlin — Datenschutzerklaerung Vollstaendigkeit
|
||||
- Az: 16 O 341/15
|
||||
- Scanner: Art. 13/14 DSGVO Pflichtangaben in DSE
|
||||
|
||||
### 11. DSK — Telemedien Orientierungshilfe
|
||||
- Bereits im RAG als: dsk_oh_telemedien (589 Chunks)
|
||||
- KEIN Download noetig ✅
|
||||
|
||||
## Prioritaet 2: Dokument/Prozess-Checks (8 Urteile)
|
||||
|
||||
### 12. EuGH — SCHUFA Scoring / Art. 22
|
||||
- Az: C-634/21 (07.12.2023)
|
||||
- URL: https://curia.europa.eu/juris/liste.jsf?num=C-634/21
|
||||
|
||||
### 13. BAG — Zeiterfassung
|
||||
- Az: 1 ABR 22/21 (13.09.2022)
|
||||
- Bereits im RAG als: bag_1_abr_22_21 (237 Chunks)
|
||||
- KEIN Download noetig ✅
|
||||
|
||||
### 14. EuGH — Schadensersatz bei Datenleck (Befuerchtung reicht)
|
||||
- Az: C-340/21 (14.12.2023)
|
||||
- URL: https://curia.europa.eu/juris/liste.jsf?num=C-340/21
|
||||
|
||||
### 15. EuGH — Meta / Berechtigtes Interesse
|
||||
- Az: C-252/21 (04.07.2023)
|
||||
- URL: https://curia.europa.eu/juris/liste.jsf?num=C-252/21
|
||||
|
||||
### 16. LAG Hamm — Microsoft 365 Mitbestimmung
|
||||
- Az: 11 Sa 1108/22 (20.06.2023)
|
||||
|
||||
### 17. OLG Muenchen — Widerrufsbelehrung
|
||||
- Az: 29 U 2698/19
|
||||
|
||||
### 18. BVerfG — Recht auf Vergessenwerden
|
||||
- Az: 1 BvR 1547/19 (06.11.2019)
|
||||
|
||||
### 19. 1&1 Bussgeld (BfDI)
|
||||
- 9,55 Mio EUR (09.12.2019)
|
||||
- Unzureichende Authentifizierung im Kundenservice
|
||||
|
||||
### 20. BFSG/EAA
|
||||
- Bereits im RAG als: bfsg (219 Chunks)
|
||||
- KEIN Download noetig ✅
|
||||
|
||||
## Bereits im RAG vorhanden (kein Download):
|
||||
- dsk_oh_telemedien (589 Chunks) ✅
|
||||
- bag_1_abr_22_21 — Zeiterfassung (237 Chunks) ✅
|
||||
- bfsg (219 Chunks) ✅
|
||||
- 13 weitere BAG-Urteile ✅
|
||||
|
||||
## Download-Status:
|
||||
- [ ] 1. Google Fonts
|
||||
- [ ] 2. Google Analytics (DSB AT)
|
||||
- [ ] 3. CNIL Cookie-Banner
|
||||
- [ ] 4. BGH Planet49
|
||||
- [ ] 5. EuGH Schrems II
|
||||
- [ ] 6. OLG Koeln Dark Patterns
|
||||
- [ ] 7. EuGH Button-Loesung
|
||||
- [ ] 8. BGH Impressum
|
||||
- [ ] 9. BGH Grundpreis
|
||||
- [ ] 10. LG Berlin DSE
|
||||
- [ ] 12. EuGH SCHUFA
|
||||
- [ ] 14. EuGH Schadensersatz Datenleck
|
||||
- [ ] 15. EuGH Meta
|
||||
- [ ] 16. LAG Hamm M365
|
||||
- [ ] 17. OLG Muenchen Widerruf
|
||||
- [ ] 18. BVerfG Vergessenwerden
|
||||
- [ ] 19. 1&1 Bussgeld
|
||||
@@ -0,0 +1,28 @@
|
||||
URTEIL DES GERICHTSHOFS (Dritte Kammer)
|
||||
4. Mai 2023
|
||||
Rechtssache C-300/21 — UI gegen Oesterreichische Post AG
|
||||
|
||||
TENOR:
|
||||
|
||||
1. Art. 82 Abs. 1 der Verordnung (EU) 2016/679 (DSGVO) ist dahin auszulegen, dass der blosse Verstoss gegen die Bestimmungen dieser Verordnung nicht ausreicht, um einen Schadenersatzanspruch zu begruenden.
|
||||
|
||||
2. Art. 82 Abs. 1 der DSGVO ist dahin auszulegen, dass er einer nationalen Regelung oder Praxis entgegensteht, die den Ersatz eines immateriellen Schadens davon abhaengig macht, dass der der betroffenen Person entstandene Schaden einen bestimmten Grad an Erheblichkeit erreicht hat.
|
||||
|
||||
3. Art. 82 der DSGVO ist dahin auszulegen, dass die nationalen Gerichte bei der Festsetzung der Hoehe des Schadenersatzes die innerstaatlichen Vorschriften anzuwenden haben, sofern die unionsrechtlichen Grundsaetze der Aequivalenz und der Effektivitaet beachtet werden.
|
||||
|
||||
KERNAUSSAGEN:
|
||||
- DSGVO-Verstoss allein begruendet KEINEN Schadenersatzanspruch — es braucht einen konkreten Schaden
|
||||
- Aber: KEINE Erheblichkeitsschwelle fuer immateriellen Schaden (jeder nachweisbare Schaden genuegt)
|
||||
- 3 kumulative Voraussetzungen fuer Art. 82: Verstoss + Schaden + Kausalzusammenhang
|
||||
- "Schaden" ist weit auszulegen (146. Erwaegungsgrund DSGVO)
|
||||
- Kein Strafschadensersatz — nur Ausgleichsfunktion (vollstaendiger und wirksamer Ersatz)
|
||||
- Nationale Gerichte wenden nationales Recht fuer die Hoehe an (Verfahrensautonomie)
|
||||
- Grundsaetze der Aequivalenz und Effektivitaet muessen beachtet werden
|
||||
- Unangenehme Gefuehle koennen immateriellen Schaden darstellen (keine Bagatellgrenze)
|
||||
|
||||
RELEVANTE NORMEN:
|
||||
- Art. 82 DSGVO (Haftung und Recht auf Schadenersatz)
|
||||
- Art. 83 DSGVO (Geldbussen — ergaenzt Schadenersatz, aber eigenstaendig)
|
||||
- Art. 84 DSGVO (Sanktionen)
|
||||
- Erwaegungsgrund 146 DSGVO (weite Auslegung des Schadensbegriffs)
|
||||
- Erwaegungsgruende 75, 85 DSGVO (moegliche Schaeden)
|
||||
@@ -0,0 +1,44 @@
|
||||
URTEIL DES GERICHTSHOFS (Grosse Kammer)
|
||||
16. Juli 2020
|
||||
Rechtssache C-311/18 — Data Protection Commissioner gegen Facebook Ireland Ltd, Maximillian Schrems
|
||||
|
||||
TENOR:
|
||||
|
||||
1. Art. 2 Abs. 1 und 2 der Verordnung (EU) 2016/679 (DSGVO) ist dahin auszulegen, dass eine zu gewerblichen Zwecken erfolgende Uebermittlung personenbezogener Daten durch einen in einem Mitgliedstaat ansaessigen Wirtschaftsteilnehmer an einen anderen, in einem Drittland ansaessigen Wirtschaftsteilnehmer in den Anwendungsbereich dieser Verordnung faellt, ungeachtet dessen, ob die Daten bei ihrer Uebermittlung oder im Anschluss daran von den Behoerden des betreffenden Drittlands fuer Zwecke der oeffentlichen Sicherheit, der Landesverteidigung und der Sicherheit des Staates verarbeitet werden koennen.
|
||||
|
||||
2. Art. 46 Abs. 1 und Art. 46 Abs. 2 Buchst. c der DSGVO sind dahin auszulegen, dass die nach diesen Vorschriften erforderlichen geeigneten Garantien, durchsetzbaren Rechte und wirksamen Rechtsbehelfe gewaehrleisten muessen, dass die Rechte der Personen, deren personenbezogene Daten auf der Grundlage von Standarddatenschutzklauseln in ein Drittland uebermittelt werden, ein Schutzniveau geniessen, das dem in der EU durch die DSGVO im Licht der Charta garantierten Niveau der Sache nach gleichwertig ist.
|
||||
|
||||
3. Art. 58 Abs. 2 Buchst. f und j der DSGVO ist dahin auszulegen, dass die zustaendige Aufsichtsbehoerde verpflichtet ist, eine auf Standarddatenschutzklauseln gestuetzte Uebermittlung personenbezogener Daten in ein Drittland auszusetzen oder zu verbieten, wenn die Klauseln in diesem Drittland nicht eingehalten werden oder nicht eingehalten werden koennen und der nach dem Unionsrecht erforderliche Schutz nicht mit anderen Mitteln gewaehrleistet werden kann.
|
||||
|
||||
4. Die Pruefung des Beschlusses 2010/87/EU (Standardvertragsklauseln) anhand der Art. 7, 8 und 47 der Charta hat nichts ergeben, was seine Gueltigkeit beruehren koennte.
|
||||
|
||||
5. Der Durchfuehrungsbeschluss (EU) 2016/1250 (EU-US-Datenschutzschild / Privacy Shield) ist UNGUELTIG.
|
||||
|
||||
KERNAUSSAGEN:
|
||||
- Privacy Shield (EU-US-Datenschutzschild) ist ungueltig
|
||||
- US-Ueberwachungsprogramme (PRISM, UPSTREAM via Section 702 FISA + E.O. 12333) verstoessen gegen EU-Grundrechte
|
||||
- Weder Section 702 FISA noch E.O. 12333 genuegen dem Verhaeltnismaessigkeitsgrundsatz
|
||||
- PPD-28 verleiht betroffenen EU-Buergern keine durchsetzbaren Rechte
|
||||
- Die Ombudsperson des Datenschutzschilds ist KEIN unabhaengiges Gericht i.S.v. Art. 47 Charta
|
||||
- Standardvertragsklauseln (SCCs) bleiben gueltig, ABER:
|
||||
- Der Verantwortliche muss VOR der Uebermittlung pruefen ob das Drittland angemessenen Schutz bietet
|
||||
- Ggf. muessen zusaetzliche Massnahmen ergriffen werden
|
||||
- Wenn kein angemessener Schutz moeglich: Uebermittlung aussetzen/verbieten
|
||||
- Aufsichtsbehoerden sind VERPFLICHTET Uebermittlungen zu verbieten wenn Schutz nicht gewaehrleistet
|
||||
- DSGVO gilt auch wenn Drittland-Behoerden Daten fuer nationale Sicherheit nutzen koennten
|
||||
|
||||
RELEVANTE NORMEN:
|
||||
- Art. 44-49 DSGVO (Uebermittlungen in Drittlaender)
|
||||
- Art. 45 DSGVO (Angemessenheitsbeschluss)
|
||||
- Art. 46 DSGVO (Geeignete Garantien / Standardvertragsklauseln)
|
||||
- Art. 58 Abs. 2 DSGVO (Befugnisse der Aufsichtsbehoerden)
|
||||
- Art. 7, 8, 47 EU-Grundrechtecharta
|
||||
- Art. 52 Abs. 1 EU-Grundrechtecharta (Verhaeltnismaessigkeit)
|
||||
- Section 702 FISA (US-Auslandsaufklaerung)
|
||||
- Executive Order 12333 (US-Nachrichtendienste)
|
||||
- PPD-28 (Presidential Policy Directive)
|
||||
|
||||
AUSWIRKUNGEN:
|
||||
- Jede Datenuebermittlung in die USA muss einzeln geprueft werden (Transfer Impact Assessment)
|
||||
- Zusaetzliche technische Massnahmen (z.B. Verschluesselung) erforderlich
|
||||
- Nachfolger: EU-US Data Privacy Framework (2023)
|
||||
@@ -0,0 +1,30 @@
|
||||
URTEIL DES GERICHTSHOFS (Erste Kammer)
|
||||
|
||||
7. Dezember 2023
|
||||
|
||||
Vorlage zur Vorabentscheidung – Schutz natuerlicher Personen bei der Verarbeitung personenbezogener Daten – Verordnung (EU) 2016/679 – Art. 22 – Automatisierte Entscheidung im Einzelfall – Wirtschaftsauskunfteien – Automatisierte Erstellung eines Wahrscheinlichkeitswerts in Bezug auf die Faehigkeit einer Person zur Erfuellung kuenftiger Zahlungsverpflichtungen (Scoring) – Verwendung dieses Wahrscheinlichkeitswerts durch Dritte
|
||||
|
||||
In der Rechtssache C-634/21 — OQ gegen Land Hessen, Beteiligte: SCHUFA Holding AG
|
||||
|
||||
TENOR:
|
||||
|
||||
Art. 22 Abs. 1 der Verordnung (EU) 2016/679 (DSGVO) ist dahin auszulegen, dass eine automatisierte Entscheidung im Einzelfall im Sinne dieser Bestimmung vorliegt, wenn ein auf personenbezogene Daten zu einer Person gestuetzter Wahrscheinlichkeitswert in Bezug auf deren Faehigkeit zur Erfuellung kuenftiger Zahlungsverpflichtungen durch eine Wirtschaftsauskunftei automatisiert erstellt wird, sofern von diesem Wahrscheinlichkeitswert massgeblich abhaengt, ob ein Dritter, dem dieser Wahrscheinlichkeitswert uebermittelt wird, ein Vertragsverhaeltnis mit dieser Person begruendet, durchfuehrt oder beendet.
|
||||
|
||||
KERNAUSSAGEN:
|
||||
- SCHUFA-Scoring ist eine automatisierte Entscheidung im Einzelfall gemaess Art. 22 DSGVO
|
||||
- Der Score-Wert selbst ist bereits die "Entscheidung" (nicht erst die Handlung des Dritten)
|
||||
- Art. 22 DSGVO stellt ein grundsaetzliches VERBOT automatisierter Entscheidungen auf
|
||||
- Ausnahmen nur nach Art. 22 Abs. 2 DSGVO (Vertrag, Rechtsvorschrift, Einwilligung)
|
||||
- Betroffene haben Recht auf Auskunft ueber die involvierte Logik (Art. 15 Abs. 1 Buchst. h)
|
||||
- Nationale Regelungen (wie § 31 BDSG) muessen Art. 5, 6 und 22 DSGVO genuegen
|
||||
- Enge Auslegung wuerde zu Rechtsschutzluecke fuehren (3-Akteure-Problem)
|
||||
- Angemessene Massnahmen: Recht auf menschliches Eingreifen, Darlegung des Standpunkts, Anfechtung
|
||||
|
||||
RELEVANTE NORMEN:
|
||||
- Art. 22 DSGVO (Automatisierte Entscheidungen im Einzelfall)
|
||||
- Art. 4 Nr. 4 DSGVO (Definition Profiling)
|
||||
- Art. 15 Abs. 1 Buchst. h DSGVO (Auskunftsrecht bei automatisierter Entscheidung)
|
||||
- Art. 13 Abs. 2 Buchst. f DSGVO (Informationspflicht)
|
||||
- Art. 5 DSGVO (Grundsaetze der Verarbeitung)
|
||||
- Art. 6 DSGVO (Rechtmaessigkeit)
|
||||
- § 31 BDSG (Scoring — Vereinbarkeit mit EU-Recht zweifelhaft)
|
||||
@@ -0,0 +1,19 @@
|
||||
URTEIL DES GERICHTSHOFS (Große Kammer)
|
||||
|
||||
1. Oktober 2019
|
||||
|
||||
Vorlage zur Vorabentscheidung – Richtlinie 95/46/EG – Richtlinie 2002/58/EG – Verordnung (EU) 2016/679 – Verarbeitung personenbezogener Daten und Schutz der Privatsphäre in der elektronischen Kommunikation – Cookies – Begriff der Einwilligung der betroffenen Person – Einwilligungserklaerung mittels eines mit einem voreingestellten Haekchen versehenen Ankreuzkaestchens
|
||||
|
||||
In der Rechtssache C-673/17
|
||||
|
||||
Bundesverband der Verbraucherzentralen und Verbraucherverbände – Verbraucherzentrale Bundesverband e. V. gegen Planet49 GmbH
|
||||
|
||||
TENOR:
|
||||
|
||||
1. Art. 2 Buchst. f und Art. 5 Abs. 3 der Richtlinie 2002/58/EG in Verbindung mit Art. 2 Buchst. h der Richtlinie 95/46/EG bzw. mit Art. 4 Nr. 11 und Art. 6 Abs. 1 Buchst. a der Verordnung 2016/679 sind dahin auszulegen, dass keine wirksame Einwilligung im Sinne dieser Bestimmungen vorliegt, wenn die Speicherung von Informationen oder der Zugriff auf Informationen, die bereits im Endgeraet des Nutzers einer Website gespeichert sind, mittels Cookies durch ein voreingestelltes Ankreuzkaestchen erlaubt wird, das der Nutzer zur Verweigerung seiner Einwilligung abwaehlen muss.
|
||||
|
||||
2. Art. 2 Buchst. f und Art. 5 Abs. 3 der Richtlinie 2002/58 sind nicht unterschiedlich auszulegen, je nachdem, ob es sich bei den im Endgeraet des Nutzers einer Website gespeicherten oder abgerufenen Informationen um personenbezogene Daten handelt oder nicht.
|
||||
|
||||
3. Art. 5 Abs. 3 der Richtlinie 2002/58 ist dahin auszulegen, dass Angaben zur Funktionsdauer der Cookies und dazu, ob Dritte Zugriff auf die Cookies erhalten koennen, zu den Informationen zaehlen, die der Diensteanbieter dem Nutzer einer Website zu geben hat.
|
||||
|
||||
Verkuendet in oeffentlicher Sitzung in Luxemburg am 1. Oktober 2019.
|
||||
@@ -0,0 +1,29 @@
|
||||
# LG München I — Google Fonts Urteil
|
||||
# Az: 3 O 17493/20 (20.01.2022)
|
||||
# Quelle: gesetze-bayern.de
|
||||
|
||||
## Tenor (Entscheidung)
|
||||
|
||||
1. Die Beklagte wird verurteilt, es zu unterlassen, die dynamische IP-Adresse des Klägers an Google weiterzugeben, wenn der Kläger die Website der Beklagten aufruft, ohne dass der Kläger in die Weitergabe eingewilligt hat. Androhung: Ordnungsgeld bis 250.000 EUR oder Ordnungshaft bis 6 Monate.
|
||||
|
||||
2. Die Beklagte wird verurteilt, dem Kläger Auskunft zu erteilen, welche personenbezogenen Daten über ihn verarbeitet werden.
|
||||
|
||||
3. Die Beklagte wird verurteilt, 100 EUR Schmerzensgeld nebst Zinsen zu zahlen.
|
||||
|
||||
## Kernbegruendung
|
||||
|
||||
**DSGVO-Verstoss durch IP-Uebermittlung:** Das Gericht stellte fest, dass die automatische Uebermittlung dynamischer IP-Adressen an Google beim Laden von Google Fonts das Recht auf informationelle Selbstbestimmung (Art. 823 BGB) und Art. 6 Abs. 1 DSGVO verletzt.
|
||||
|
||||
**IP-Adressen = personenbezogene Daten:** Dynamische IP-Adressen sind personenbezogene Daten, weil der Website-Betreiber ueber den abstrakten rechtlichen Weg (Behoerden, Provider) die Identifikation der Person erreichen kann.
|
||||
|
||||
**Kein berechtigtes Interesse:** Das berechtigte Interesse der Beklagten scheitert, weil "Google Fonts auch genutzt werden kann, ohne dass beim Aufruf der Webseite eine Verbindung zu Google-Servern hergestellt wird und die IP-Adresse der Webseitenbesucher uebertragen wird." (lokales Hosting moeglich)
|
||||
|
||||
## Compliance-Anforderung
|
||||
|
||||
Website-Betreiber muessen Google Fonts lokal hosten oder Alternativen verwenden, die keine automatische IP-Uebermittlung an externe Server ohne explizite Einwilligung verursachen.
|
||||
|
||||
## Scanner-Pruefpunkte
|
||||
- HTML pruefen auf: fonts.googleapis.com, fonts.gstatic.com
|
||||
- CSS pruefen auf: @import url('https://fonts.googleapis.com/...')
|
||||
- JS pruefen auf: WebFont.load mit google-Provider
|
||||
- Wenn gefunden: FAIL — externer Google Fonts Einbindung ohne Consent
|
||||
Generated
+40
-40
@@ -10,7 +10,7 @@
|
||||
"dependencies": {
|
||||
"framer-motion": "^11.15.0",
|
||||
"lucide-react": "^0.468.0",
|
||||
"next": "^15.1.0",
|
||||
"next": "^15.5.16",
|
||||
"react": "^18.3.1",
|
||||
"react-dom": "^18.3.1"
|
||||
},
|
||||
@@ -552,15 +552,15 @@
|
||||
}
|
||||
},
|
||||
"node_modules/@next/env": {
|
||||
"version": "15.5.12",
|
||||
"resolved": "https://registry.npmjs.org/@next/env/-/env-15.5.12.tgz",
|
||||
"integrity": "sha512-pUvdJN1on574wQHjaBfNGDt9Mz5utDSZFsIIQkMzPgNS8ZvT4H2mwOrOIClwsQOb6EGx5M76/CZr6G8i6pSpLg==",
|
||||
"version": "15.5.16",
|
||||
"resolved": "https://registry.npmjs.org/@next/env/-/env-15.5.16.tgz",
|
||||
"integrity": "sha512-9QMKolCl+JnJtaRAQSXy4RQrhgfe8W7/G1+Hl3QSB/HZY7zQMzTwPDdTRwwio8BS96ps1MHpHhbS8qxoNV3JIQ==",
|
||||
"license": "MIT"
|
||||
},
|
||||
"node_modules/@next/swc-darwin-arm64": {
|
||||
"version": "15.5.12",
|
||||
"resolved": "https://registry.npmjs.org/@next/swc-darwin-arm64/-/swc-darwin-arm64-15.5.12.tgz",
|
||||
"integrity": "sha512-RnRjBtH8S8eXCpUNkQ+543DUc7ys8y15VxmFU9HRqlo9BG3CcBUiwNtF8SNoi2xvGCVJq1vl2yYq+3oISBS0Zg==",
|
||||
"version": "15.5.16",
|
||||
"resolved": "https://registry.npmjs.org/@next/swc-darwin-arm64/-/swc-darwin-arm64-15.5.16.tgz",
|
||||
"integrity": "sha512-wzdER4JZj+31vNkhaZ1Ght3IsNI8DMwj7VqadfIOqJB5sh8FiOqNSopYADQn6mgEPomzDd/DHqBcfo2fmVMYtg==",
|
||||
"cpu": [
|
||||
"arm64"
|
||||
],
|
||||
@@ -574,9 +574,9 @@
|
||||
}
|
||||
},
|
||||
"node_modules/@next/swc-darwin-x64": {
|
||||
"version": "15.5.12",
|
||||
"resolved": "https://registry.npmjs.org/@next/swc-darwin-x64/-/swc-darwin-x64-15.5.12.tgz",
|
||||
"integrity": "sha512-nqa9/7iQlboF1EFtNhWxQA0rQstmYRSBGxSM6g3GxvxHxcoeqVXfGNr9stJOme674m2V7r4E3+jEhhGvSQhJRA==",
|
||||
"version": "15.5.16",
|
||||
"resolved": "https://registry.npmjs.org/@next/swc-darwin-x64/-/swc-darwin-x64-15.5.16.tgz",
|
||||
"integrity": "sha512-PPTo+cvcanxkuDEuDyZGk28ntmu0WjfkxqlG7hw9Mhsiribs4x1C6h2Culn0cJKqsne1gFjjZRK3ax7WYlSxgg==",
|
||||
"cpu": [
|
||||
"x64"
|
||||
],
|
||||
@@ -590,9 +590,9 @@
|
||||
}
|
||||
},
|
||||
"node_modules/@next/swc-linux-arm64-gnu": {
|
||||
"version": "15.5.12",
|
||||
"resolved": "https://registry.npmjs.org/@next/swc-linux-arm64-gnu/-/swc-linux-arm64-gnu-15.5.12.tgz",
|
||||
"integrity": "sha512-dCzAjqhDHwmoB2M4eYfVKqXs99QdQxNQVpftvP1eGVppamXh/OkDAwV737Zr0KPXEqRUMN4uCjh6mjO+XtF3Mw==",
|
||||
"version": "15.5.16",
|
||||
"resolved": "https://registry.npmjs.org/@next/swc-linux-arm64-gnu/-/swc-linux-arm64-gnu-15.5.16.tgz",
|
||||
"integrity": "sha512-Jl0IL9P7S8uNl5oI1TqrQmfmLp7OqjWM58000pVnUVIsHrvPP6m9QDW/uNWYUbmd+8IYvc6MTeZKICstBMBpew==",
|
||||
"cpu": [
|
||||
"arm64"
|
||||
],
|
||||
@@ -606,9 +606,9 @@
|
||||
}
|
||||
},
|
||||
"node_modules/@next/swc-linux-arm64-musl": {
|
||||
"version": "15.5.12",
|
||||
"resolved": "https://registry.npmjs.org/@next/swc-linux-arm64-musl/-/swc-linux-arm64-musl-15.5.12.tgz",
|
||||
"integrity": "sha512-+fpGWvQiITgf7PUtbWY1H7qUSnBZsPPLyyq03QuAKpVoTy/QUx1JptEDTQMVvQhvizCEuNLEeghrQUyXQOekuw==",
|
||||
"version": "15.5.16",
|
||||
"resolved": "https://registry.npmjs.org/@next/swc-linux-arm64-musl/-/swc-linux-arm64-musl-15.5.16.tgz",
|
||||
"integrity": "sha512-Zf0BIqv/o5uOWfyRkzgGhyV2Tky7HLt0bG+w7XWdaU1JpyX0tltM3TrSfa/Y9c597SJG4CzN47+u2InhgZZ4vg==",
|
||||
"cpu": [
|
||||
"arm64"
|
||||
],
|
||||
@@ -622,9 +622,9 @@
|
||||
}
|
||||
},
|
||||
"node_modules/@next/swc-linux-x64-gnu": {
|
||||
"version": "15.5.12",
|
||||
"resolved": "https://registry.npmjs.org/@next/swc-linux-x64-gnu/-/swc-linux-x64-gnu-15.5.12.tgz",
|
||||
"integrity": "sha512-jSLvgdRRL/hrFAPqEjJf1fFguC719kmcptjNVDJl26BnJIpjL3KH5h6mzR4mAweociLQaqvt4UyzfbFjgAdDcw==",
|
||||
"version": "15.5.16",
|
||||
"resolved": "https://registry.npmjs.org/@next/swc-linux-x64-gnu/-/swc-linux-x64-gnu-15.5.16.tgz",
|
||||
"integrity": "sha512-HCDDU1TRLeUDV180QQTWrs5Oa4lIcI7XH9nF0UVUVmYLN/boZ6LqyFtm3814gc1fv+lOVyKaw5B6bVC9BpXTSQ==",
|
||||
"cpu": [
|
||||
"x64"
|
||||
],
|
||||
@@ -638,9 +638,9 @@
|
||||
}
|
||||
},
|
||||
"node_modules/@next/swc-linux-x64-musl": {
|
||||
"version": "15.5.12",
|
||||
"resolved": "https://registry.npmjs.org/@next/swc-linux-x64-musl/-/swc-linux-x64-musl-15.5.12.tgz",
|
||||
"integrity": "sha512-/uaF0WfmYqQgLfPmN6BvULwxY0dufI2mlN2JbOKqqceZh1G4hjREyi7pg03zjfyS6eqNemHAZPSoP84x17vo6w==",
|
||||
"version": "15.5.16",
|
||||
"resolved": "https://registry.npmjs.org/@next/swc-linux-x64-musl/-/swc-linux-x64-musl-15.5.16.tgz",
|
||||
"integrity": "sha512-kvXUY1dn5wxKuMkXxQRUbPjEnKxW1PR9uKOm0zpIpj3574+cFfaePhYFmBVtrOuwt+w34OdDzNaJr5Iixf+HBQ==",
|
||||
"cpu": [
|
||||
"x64"
|
||||
],
|
||||
@@ -654,9 +654,9 @@
|
||||
}
|
||||
},
|
||||
"node_modules/@next/swc-win32-arm64-msvc": {
|
||||
"version": "15.5.12",
|
||||
"resolved": "https://registry.npmjs.org/@next/swc-win32-arm64-msvc/-/swc-win32-arm64-msvc-15.5.12.tgz",
|
||||
"integrity": "sha512-xhsL1OvQSfGmlL5RbOmU+FV120urrgFpYLq+6U8C6KIym32gZT6XF/SDE92jKzzlPWskkbjOKCpqk5m4i8PEfg==",
|
||||
"version": "15.5.16",
|
||||
"resolved": "https://registry.npmjs.org/@next/swc-win32-arm64-msvc/-/swc-win32-arm64-msvc-15.5.16.tgz",
|
||||
"integrity": "sha512-zpOQuF+eyENMXRjglp2hZCIrUjTdO37suEBnDn1mX4PXSuetXZDMLpjKOh4dYSw3SiDTnOoOUwBl5i5Elr6nnQ==",
|
||||
"cpu": [
|
||||
"arm64"
|
||||
],
|
||||
@@ -670,9 +670,9 @@
|
||||
}
|
||||
},
|
||||
"node_modules/@next/swc-win32-x64-msvc": {
|
||||
"version": "15.5.12",
|
||||
"resolved": "https://registry.npmjs.org/@next/swc-win32-x64-msvc/-/swc-win32-x64-msvc-15.5.12.tgz",
|
||||
"integrity": "sha512-Z1Dh6lhFkxvBDH1FoW6OU/L6prYwPSlwjLiZkExIAh8fbP6iI/M7iGTQAJPYJ9YFlWobCZ1PHbchFhFYb2ADkw==",
|
||||
"version": "15.5.16",
|
||||
"resolved": "https://registry.npmjs.org/@next/swc-win32-x64-msvc/-/swc-win32-x64-msvc-15.5.16.tgz",
|
||||
"integrity": "sha512-LnwKYpiSmIzXlTq76hMeeIzZoDcFwu848p6H+QBkGFJIbZphgzNUPdHruJcHM/bFnaFeco0l1Frie5I27VKglA==",
|
||||
"cpu": [
|
||||
"x64"
|
||||
],
|
||||
@@ -1272,12 +1272,12 @@
|
||||
}
|
||||
},
|
||||
"node_modules/next": {
|
||||
"version": "15.5.12",
|
||||
"resolved": "https://registry.npmjs.org/next/-/next-15.5.12.tgz",
|
||||
"integrity": "sha512-Fi/wQ4Etlrn60rz78bebG1i1SR20QxvV8tVp6iJspjLUSHcZoeUXCt+vmWoEcza85ElZzExK/jJ/F6SvtGktjA==",
|
||||
"version": "15.5.16",
|
||||
"resolved": "https://registry.npmjs.org/next/-/next-15.5.16.tgz",
|
||||
"integrity": "sha512-aZExBk/V6JCu3NCFc90twdj9L/M3y0+ukeQwUAZbOiqRhAX+h2oMEa0NZFhcpj6HYRYjVS3V2/3xvyOpNnmw7A==",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"@next/env": "15.5.12",
|
||||
"@next/env": "15.5.16",
|
||||
"@swc/helpers": "0.5.15",
|
||||
"caniuse-lite": "^1.0.30001579",
|
||||
"postcss": "8.4.31",
|
||||
@@ -1290,14 +1290,14 @@
|
||||
"node": "^18.18.0 || ^19.8.0 || >= 20.0.0"
|
||||
},
|
||||
"optionalDependencies": {
|
||||
"@next/swc-darwin-arm64": "15.5.12",
|
||||
"@next/swc-darwin-x64": "15.5.12",
|
||||
"@next/swc-linux-arm64-gnu": "15.5.12",
|
||||
"@next/swc-linux-arm64-musl": "15.5.12",
|
||||
"@next/swc-linux-x64-gnu": "15.5.12",
|
||||
"@next/swc-linux-x64-musl": "15.5.12",
|
||||
"@next/swc-win32-arm64-msvc": "15.5.12",
|
||||
"@next/swc-win32-x64-msvc": "15.5.12",
|
||||
"@next/swc-darwin-arm64": "15.5.16",
|
||||
"@next/swc-darwin-x64": "15.5.16",
|
||||
"@next/swc-linux-arm64-gnu": "15.5.16",
|
||||
"@next/swc-linux-arm64-musl": "15.5.16",
|
||||
"@next/swc-linux-x64-gnu": "15.5.16",
|
||||
"@next/swc-linux-x64-musl": "15.5.16",
|
||||
"@next/swc-win32-arm64-msvc": "15.5.16",
|
||||
"@next/swc-win32-x64-msvc": "15.5.16",
|
||||
"sharp": "^0.34.3"
|
||||
},
|
||||
"peerDependencies": {
|
||||
|
||||
@@ -10,7 +10,7 @@
|
||||
"dependencies": {
|
||||
"framer-motion": "^11.15.0",
|
||||
"lucide-react": "^0.468.0",
|
||||
"next": "^15.1.0",
|
||||
"next": "^15.5.16",
|
||||
"react": "^18.3.1",
|
||||
"react-dom": "^18.3.1"
|
||||
},
|
||||
|
||||
@@ -0,0 +1,3 @@
|
||||
{
|
||||
"extends": "next/core-web-vitals"
|
||||
}
|
||||
@@ -0,0 +1,4 @@
|
||||
node_modules/
|
||||
.next/
|
||||
.env.local
|
||||
*.tsbuildinfo
|
||||
@@ -0,0 +1,27 @@
|
||||
FROM node:20-alpine AS base
|
||||
|
||||
FROM base AS deps
|
||||
WORKDIR /app
|
||||
COPY package.json package-lock.json* ./
|
||||
RUN npm ci
|
||||
|
||||
FROM base AS builder
|
||||
WORKDIR /app
|
||||
COPY --from=deps /app/node_modules ./node_modules
|
||||
COPY . .
|
||||
RUN mkdir -p public
|
||||
RUN npm run build
|
||||
|
||||
FROM base AS runner
|
||||
WORKDIR /app
|
||||
ENV NODE_ENV=production
|
||||
RUN addgroup --system --gid 1001 nodejs
|
||||
RUN adduser --system --uid 1001 nextjs
|
||||
COPY --from=builder /app/public ./public
|
||||
COPY --from=builder --chown=nextjs:nodejs /app/.next/standalone ./
|
||||
COPY --from=builder --chown=nextjs:nodejs /app/.next/static ./.next/static
|
||||
USER nextjs
|
||||
EXPOSE 3000
|
||||
ENV PORT=3000
|
||||
ENV HOSTNAME="0.0.0.0"
|
||||
CMD ["node", "server.js"]
|
||||
@@ -0,0 +1,61 @@
|
||||
import { NextRequest } from 'next/server'
|
||||
|
||||
const SYSTEM_PROMPT = `Du bist der BreakPilot Compliance Agent — ein technischer Berater fuer die BreakPilot Plattform.
|
||||
|
||||
Kernbotschaften:
|
||||
- BreakPilot ist eine deterministische Regulatory Engineering Plattform
|
||||
- Keine Halluzinationen: Jedes Ergebnis verweist auf eine konkrete Rechtsquelle
|
||||
- EU-souveraen: Kein US-Cloud-Anbieter, on-premise deploybar
|
||||
- 294.000+ atomare Controls aus 380+ Rechtsquellen
|
||||
- Unterstuetzte Regulierungen: DSGVO, NIS2, EU AI Act, Maschinenverordnung, TDDDG, DORA, BSI IT-Grundschutz
|
||||
|
||||
Sage NIEMALS "ChatGPT fuer CE" oder "KI-Assistent". Sage stattdessen "Deterministic Analysis" oder "Compliance Engine".
|
||||
Antworte auf Deutsch, professionell und praezise. Halte Antworten kurz (max 200 Woerter).`
|
||||
|
||||
export async function POST(req: NextRequest) {
|
||||
const { message, history } = await req.json()
|
||||
|
||||
// Placeholder: In production, connect to the actual Compliance Agent API
|
||||
// For now, return a static response as a stream
|
||||
const responses: Record<string, string> = {
|
||||
'default': `Vielen Dank fuer Ihre Frage.
|
||||
|
||||
BreakPilot ist eine deterministische Regulatory Engineering Plattform. Im Unterschied zu LLM-basierten Tools analysieren wir regulatorische Anforderungen regelbasiert — jedes Ergebnis verweist auf eine konkrete Rechtsquelle (Artikel, Absatz, Erwaegungs\u00ADgrund).
|
||||
|
||||
Unsere Plattform umfasst:
|
||||
- 294.000+ atomare Compliance Controls
|
||||
- 380+ Rechtsquellen (DSGVO, NIS2, AI Act, Maschinenverordnung u.a.)
|
||||
- Vollstaendiger Decision Trail: Rechtsquelle → Obligation → Control → Massnahme
|
||||
- EU-souveraene Infrastruktur ohne US-Cloud-Abhaengigkeit
|
||||
|
||||
Fuer eine persoenliche Demo kontaktieren Sie uns unter info@breakpilot.ai.`,
|
||||
}
|
||||
|
||||
void history
|
||||
void SYSTEM_PROMPT
|
||||
|
||||
const responseText = responses['default']
|
||||
|
||||
// Simulate streaming by sending chunks
|
||||
const encoder = new TextEncoder()
|
||||
const stream = new ReadableStream({
|
||||
async start(controller) {
|
||||
const words = responseText.split(' ')
|
||||
for (let i = 0; i < words.length; i++) {
|
||||
const chunk = (i === 0 ? '' : ' ') + words[i]
|
||||
controller.enqueue(encoder.encode(chunk))
|
||||
await new Promise(resolve => setTimeout(resolve, 30))
|
||||
}
|
||||
controller.close()
|
||||
},
|
||||
})
|
||||
|
||||
void message
|
||||
|
||||
return new Response(stream, {
|
||||
headers: {
|
||||
'Content-Type': 'text/plain; charset=utf-8',
|
||||
'Cache-Control': 'no-cache',
|
||||
},
|
||||
})
|
||||
}
|
||||
@@ -0,0 +1,21 @@
|
||||
import { NextRequest, NextResponse } from 'next/server'
|
||||
|
||||
const BACKEND_URL = process.env.CONSENT_BACKEND_URL || 'https://macmini:3007/api/sdk/v1/banner'
|
||||
const TENANT_ID = process.env.CONSENT_TENANT_ID || '9282a473-5c95-4b3a-bf78-0ecc0ec71d3e'
|
||||
|
||||
export async function DELETE(req: NextRequest, { params }: { params: Promise<{ id: string }> }) {
|
||||
try {
|
||||
const { id } = await params
|
||||
const res = await fetch(`${BACKEND_URL}/consent/${id}`, {
|
||||
method: 'DELETE',
|
||||
headers: { 'X-Tenant-ID': TENANT_ID },
|
||||
})
|
||||
const data = await res.text()
|
||||
return new NextResponse(data, {
|
||||
status: res.status,
|
||||
headers: { 'Content-Type': 'application/json' },
|
||||
})
|
||||
} catch {
|
||||
return NextResponse.json({ error: 'Consent service not reachable' }, { status: 503 })
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,21 @@
|
||||
import { NextRequest, NextResponse } from 'next/server'
|
||||
|
||||
const BACKEND_URL = process.env.CONSENT_BACKEND_URL || 'https://macmini:3007/api/sdk/v1/banner'
|
||||
const TENANT_ID = process.env.CONSENT_TENANT_ID || '9282a473-5c95-4b3a-bf78-0ecc0ec71d3e'
|
||||
const SITE_ID = process.env.NEXT_PUBLIC_CONSENT_SITE_ID || 'breakpilot-marketing'
|
||||
|
||||
export async function GET(req: NextRequest) {
|
||||
try {
|
||||
const siteId = req.nextUrl.searchParams.get('site_id') || SITE_ID
|
||||
const res = await fetch(`${BACKEND_URL}/config/${siteId}`, {
|
||||
headers: { 'X-Tenant-ID': TENANT_ID },
|
||||
})
|
||||
const data = await res.text()
|
||||
return new NextResponse(data, {
|
||||
status: res.status,
|
||||
headers: { 'Content-Type': 'application/json' },
|
||||
})
|
||||
} catch {
|
||||
return NextResponse.json({ categories: [], vendors: [] }, { status: 200 })
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,34 @@
|
||||
import { NextRequest, NextResponse } from 'next/server'
|
||||
|
||||
const BACKEND_URL = process.env.CONSENT_BACKEND_URL || 'https://macmini:3007/api/sdk/v1/banner'
|
||||
const TENANT_ID = process.env.CONSENT_TENANT_ID || '9282a473-5c95-4b3a-bf78-0ecc0ec71d3e'
|
||||
|
||||
export async function POST(req: NextRequest) {
|
||||
try {
|
||||
const data = await req.json()
|
||||
|
||||
// Inject client IP for backend GeoIP resolution
|
||||
const ip = req.headers.get('x-forwarded-for')?.split(',')[0]?.trim()
|
||||
|| req.headers.get('x-real-ip')
|
||||
|| null
|
||||
if (ip) data.ip_address = ip
|
||||
|
||||
const res = await fetch(`${BACKEND_URL}/consent`, {
|
||||
method: 'POST',
|
||||
headers: {
|
||||
'Content-Type': 'application/json',
|
||||
'X-Tenant-ID': TENANT_ID,
|
||||
},
|
||||
body: JSON.stringify(data),
|
||||
})
|
||||
|
||||
const resBody = await res.text()
|
||||
return new NextResponse(resBody, {
|
||||
status: res.status,
|
||||
headers: { 'Content-Type': 'application/json' },
|
||||
})
|
||||
} catch (err) {
|
||||
console.error('Consent proxy error:', err)
|
||||
return NextResponse.json({ error: 'Consent service not reachable' }, { status: 503 })
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,43 @@
|
||||
/**
|
||||
* POST /api/scan/start
|
||||
* Proxy to compliance backend /api/compliance/agent/saving-scan/start.
|
||||
*
|
||||
* Body: { url: string; email: string; consent?: boolean }
|
||||
*
|
||||
* Server-side proxy avoids cross-origin POST from breakpilot.ai to
|
||||
* api-dev.breakpilot.ai — same-origin from the browser, secure egress
|
||||
* from the Next.js server. Backend handles rate-limit + TDM + lead-DB.
|
||||
*/
|
||||
import { NextRequest, NextResponse } from 'next/server'
|
||||
|
||||
const BACKEND_URL =
|
||||
process.env.COMPLIANCE_BACKEND_URL || 'https://api-dev.breakpilot.ai'
|
||||
|
||||
export async function POST(request: NextRequest) {
|
||||
let body: unknown
|
||||
try {
|
||||
body = await request.json()
|
||||
} catch {
|
||||
return NextResponse.json(
|
||||
{ error: 'Body muss JSON sein' }, { status: 400 },
|
||||
)
|
||||
}
|
||||
|
||||
try {
|
||||
const res = await fetch(
|
||||
`${BACKEND_URL}/api/compliance/agent/saving-scan/start`,
|
||||
{
|
||||
method: 'POST',
|
||||
headers: { 'Content-Type': 'application/json' },
|
||||
body: JSON.stringify(body),
|
||||
signal: AbortSignal.timeout(20000),
|
||||
},
|
||||
)
|
||||
const data = await res.json().catch(() => ({}))
|
||||
return NextResponse.json(data, { status: res.status })
|
||||
} catch {
|
||||
return NextResponse.json(
|
||||
{ error: 'Backend nicht erreichbar' }, { status: 503 },
|
||||
)
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,29 @@
|
||||
/**
|
||||
* GET /api/scan/status/<checkId>
|
||||
* Proxy to compliance backend /api/compliance/agent/compliance-check/<id>.
|
||||
*
|
||||
* Polled every ~5s by the savings-scan page until status==completed/failed.
|
||||
*/
|
||||
import { NextRequest, NextResponse } from 'next/server'
|
||||
|
||||
const BACKEND_URL =
|
||||
process.env.COMPLIANCE_BACKEND_URL || 'https://api-dev.breakpilot.ai'
|
||||
|
||||
export async function GET(
|
||||
_request: NextRequest,
|
||||
{ params }: { params: Promise<{ checkId: string }> },
|
||||
) {
|
||||
const { checkId } = await params
|
||||
try {
|
||||
const res = await fetch(
|
||||
`${BACKEND_URL}/api/compliance/agent/compliance-check/${checkId}`,
|
||||
{ signal: AbortSignal.timeout(15000) },
|
||||
)
|
||||
const data = await res.json().catch(() => ({}))
|
||||
return NextResponse.json(data, { status: res.status })
|
||||
} catch {
|
||||
return NextResponse.json(
|
||||
{ error: 'Backend nicht erreichbar' }, { status: 503 },
|
||||
)
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,18 @@
|
||||
import Navbar from '@/components/layout/Navbar'
|
||||
import Footer from '@/components/layout/Footer'
|
||||
import ChatFAB from '@/components/layout/ChatFAB'
|
||||
import ArchitectureSection from '@/components/sections/ArchitectureSection'
|
||||
import SovereignSection from '@/components/sections/SovereignSection'
|
||||
|
||||
export default function ArchitekturPage() {
|
||||
return (
|
||||
<>
|
||||
<Navbar />
|
||||
<div className="pt-16" />
|
||||
<ArchitectureSection />
|
||||
<SovereignSection />
|
||||
<Footer />
|
||||
<ChatFAB />
|
||||
</>
|
||||
)
|
||||
}
|
||||
@@ -0,0 +1,26 @@
|
||||
import Navbar from '@/components/layout/Navbar'
|
||||
import Footer from '@/components/layout/Footer'
|
||||
import ChatFAB from '@/components/layout/ChatFAB'
|
||||
import PageHeader from '@/components/ui/PageHeader'
|
||||
import CEFlowSection from '@/components/sections/CEFlowSection'
|
||||
|
||||
export default function CEProzessPage() {
|
||||
return (
|
||||
<>
|
||||
<Navbar />
|
||||
<main>
|
||||
<div className="max-w-7xl mx-auto px-4 sm:px-6 lg:px-8">
|
||||
<PageHeader
|
||||
tag="CE-PROZESS"
|
||||
title="Von der Maschinenbeschreibung"
|
||||
titleHighlight="zur CE-Akte."
|
||||
subtitle="6 deterministische Schritte — vom Textfeld zur vollständigen Technischen Dokumentation nach MVO 2023/1230."
|
||||
/>
|
||||
</div>
|
||||
<CEFlowSection />
|
||||
</main>
|
||||
<Footer />
|
||||
<ChatFAB />
|
||||
</>
|
||||
)
|
||||
}
|
||||
@@ -0,0 +1,179 @@
|
||||
import Link from 'next/link'
|
||||
|
||||
export default function DatenschutzPage() {
|
||||
return (
|
||||
<div className="min-h-screen bg-enterprise-dark text-white">
|
||||
<div className="max-w-3xl mx-auto px-4 py-24">
|
||||
<Link href="/" className="text-sm text-white/40 hover:text-white/60 transition-colors mb-8 inline-block">
|
||||
← Zurueck zur Startseite
|
||||
</Link>
|
||||
|
||||
<h1 className="text-4xl font-bold mb-8">Datenschutzerklaerung</h1>
|
||||
|
||||
<div className="space-y-6 text-white/60 text-sm leading-relaxed">
|
||||
<div>
|
||||
<h2 className="text-lg font-semibold text-white mb-2">1. Verantwortlicher</h2>
|
||||
<p>BreakPilot GmbH (i.Gr.)</p>
|
||||
<p>[Adresse wird nach Gruendung ergaenzt]</p>
|
||||
<p>E-Mail: datenschutz@breakpilot.ai</p>
|
||||
</div>
|
||||
|
||||
<div>
|
||||
<h2 className="text-lg font-semibold text-white mb-2">2. Datenschutzbeauftragter</h2>
|
||||
<p>
|
||||
[Wird nach Gruendung benannt]<br />
|
||||
E-Mail: datenschutz@breakpilot.ai
|
||||
</p>
|
||||
</div>
|
||||
|
||||
<div>
|
||||
<h2 className="text-lg font-semibold text-white mb-2">3. Hosting</h2>
|
||||
<p>
|
||||
Diese Website wird auf Servern der Hetzner Online GmbH in Deutschland gehostet.
|
||||
Es findet kein Drittlandtransfer fuer das Hosting statt.
|
||||
</p>
|
||||
<p className="mt-2">
|
||||
<span className="text-white font-medium">Rechtsgrundlage:</span> Art. 6 Abs. 1 lit. f DSGVO (berechtigtes Interesse).
|
||||
</p>
|
||||
<p className="mt-1">
|
||||
<span className="text-white font-medium">Interessenabwaegung:</span> Unser berechtigtes Interesse liegt im
|
||||
zuverlaessigen und sicheren Betrieb der Website. Ohne Hosting-Infrastruktur koennen wir unser Angebot
|
||||
nicht bereitstellen. Die Verarbeitung beschraenkt sich auf technisch notwendige Verbindungsdaten
|
||||
(IP-Adresse, Zeitstempel). Entgegenstehende Interessen der Betroffenen ueberwiegen nicht, da die
|
||||
Daten nur kurzzeitig (7 Tage) gespeichert, nicht mit anderen Datenquellen zusammengefuehrt und
|
||||
ausschliesslich zur Sicherstellung des Betriebs und der IT-Sicherheit verwendet werden.
|
||||
</p>
|
||||
</div>
|
||||
|
||||
<div>
|
||||
<h2 className="text-lg font-semibold text-white mb-2">4. Cookies und Einwilligungsverwaltung</h2>
|
||||
<p>
|
||||
Diese Website verwendet ein Consent-Management-System (BreakPilot CMP), um Ihre Cookie-Einstellungen
|
||||
zu verwalten. Beim ersten Besuch wird Ihnen ein Cookie-Banner angezeigt, ueber das Sie Ihre
|
||||
Einwilligung fuer verschiedene Cookie-Kategorien erteilen oder verweigern koennen.
|
||||
</p>
|
||||
<p className="mt-2">
|
||||
<span className="text-white font-medium">Rechtsgrundlage:</span> Art. 6 Abs. 1 lit. a DSGVO (Einwilligung)
|
||||
i.V.m. Paragraph 25 Abs. 1 TDDDG fuer nicht-essenzielle Cookies.
|
||||
Fuer technisch notwendige Cookies: Paragraph 25 Abs. 2 TDDDG (unbedingt erforderlich).
|
||||
</p>
|
||||
<h3 className="text-sm font-semibold text-white mt-4 mb-2">Eingesetzte Cookies</h3>
|
||||
<div className="overflow-x-auto">
|
||||
<table className="w-full text-xs border border-white/10">
|
||||
<thead>
|
||||
<tr className="border-b border-white/10 text-white/80">
|
||||
<th className="text-left px-3 py-2">Name</th>
|
||||
<th className="text-left px-3 py-2">Anbieter</th>
|
||||
<th className="text-left px-3 py-2">Zweck</th>
|
||||
<th className="text-left px-3 py-2">Speicherdauer</th>
|
||||
<th className="text-left px-3 py-2">Typ</th>
|
||||
</tr>
|
||||
</thead>
|
||||
<tbody className="divide-y divide-white/5">
|
||||
<tr>
|
||||
<td className="px-3 py-2 font-mono">bp_consent</td>
|
||||
<td className="px-3 py-2">BreakPilot (First-Party)</td>
|
||||
<td className="px-3 py-2">Speichert Ihre Cookie-Einwilligung</td>
|
||||
<td className="px-3 py-2">12 Monate</td>
|
||||
<td className="px-3 py-2">Notwendig</td>
|
||||
</tr>
|
||||
</tbody>
|
||||
</table>
|
||||
</div>
|
||||
<p className="mt-2">
|
||||
Sie koennen Ihre Einwilligung jederzeit widerrufen, indem Sie den Link
|
||||
"Cookie-Einstellungen" im Seitenfuss klicken. Der Widerruf ist gemaess Art. 7 Abs. 3 DSGVO
|
||||
genauso einfach wie die Erteilung der Einwilligung.
|
||||
</p>
|
||||
</div>
|
||||
|
||||
<div>
|
||||
<h2 className="text-lg font-semibold text-white mb-2">5. Server-Logfiles</h2>
|
||||
<p>
|
||||
Der Hosting-Provider erhebt technisch notwendige Logfiles (IP-Adresse, Browsertyp, Zeitstempel,
|
||||
aufgerufene Seite, HTTP-Statuscode).
|
||||
</p>
|
||||
<p className="mt-2">
|
||||
<span className="text-white font-medium">Rechtsgrundlage:</span> Art. 6 Abs. 1 lit. f DSGVO (berechtigtes Interesse).
|
||||
</p>
|
||||
<p className="mt-1">
|
||||
<span className="text-white font-medium">Interessenabwaegung:</span> Die Erhebung von Server-Logfiles ist
|
||||
fuer die Erkennung und Abwehr von Cyberangriffen, die Fehlerbehebung und die Gewaehrleistung der
|
||||
Systemstabilitaet unerlasslich. Die Daten werden automatisiert nach 7 Tagen geloescht und nicht
|
||||
zur Profilbildung oder Identifizierung einzelner Nutzer verwendet. Eine Zusammenfuehrung mit anderen
|
||||
Datenquellen findet nicht statt. Das Interesse der Betroffenen am Schutz ihrer Daten wird durch die
|
||||
kurze Speicherdauer und die rein technische Nutzung angemessen gewahrt.
|
||||
</p>
|
||||
</div>
|
||||
|
||||
<div>
|
||||
<h2 className="text-lg font-semibold text-white mb-2">6. Schriften</h2>
|
||||
<p>
|
||||
Diese Website verwendet die Schriftarten Inter, Plus Jakarta Sans und JetBrains Mono.
|
||||
Die Schriften werden lokal auf unserem Server gehostet — es findet kein Abruf von
|
||||
externen Servern (z.B. Google Fonts) statt. Es werden keine personenbezogenen Daten
|
||||
an Dritte uebermittelt.
|
||||
</p>
|
||||
</div>
|
||||
|
||||
<div>
|
||||
<h2 className="text-lg font-semibold text-white mb-2">7. Zwecke der Verarbeitung</h2>
|
||||
<ul className="list-disc list-inside space-y-1">
|
||||
<li>Bereitstellung und Betrieb der Website (Art. 6 Abs. 1 lit. f DSGVO)</li>
|
||||
<li>Verwaltung Ihrer Cookie-Einwilligungen (Art. 6 Abs. 1 lit. c DSGVO — Nachweispflicht Art. 7 Abs. 1 DSGVO)</li>
|
||||
<li>Kontaktaufnahme per E-Mail (Art. 6 Abs. 1 lit. b DSGVO — vorvertragliche Massnahmen)</li>
|
||||
</ul>
|
||||
</div>
|
||||
|
||||
<div>
|
||||
<h2 className="text-lg font-semibold text-white mb-2">8. Empfaenger und Auftragsverarbeiter</h2>
|
||||
<ul className="list-disc list-inside space-y-1">
|
||||
<li>Hetzner Online GmbH, Industriestr. 25, 91710 Gunzenhausen — Hosting (AVV nach Art. 28 DSGVO)</li>
|
||||
</ul>
|
||||
<p className="mt-1">Schriftarten werden lokal gehostet — kein Drittanbieter-Transfer.</p>
|
||||
</div>
|
||||
|
||||
<div>
|
||||
<h2 className="text-lg font-semibold text-white mb-2">9. Speicherdauer</h2>
|
||||
<ul className="list-disc list-inside space-y-1">
|
||||
<li>Server-Logfiles: 7 Tage</li>
|
||||
<li>Cookie-Einwilligung (bp_consent): 12 Monate</li>
|
||||
<li>Consent-Nachweis (Backend): 13 Monate (CNIL-Empfehlung)</li>
|
||||
</ul>
|
||||
</div>
|
||||
|
||||
<div>
|
||||
<h2 className="text-lg font-semibold text-white mb-2">10. Ihre Rechte</h2>
|
||||
<p>Sie haben gegenueber uns folgende Rechte bezueglich Ihrer personenbezogenen Daten:</p>
|
||||
<ul className="list-disc list-inside space-y-1 mt-2">
|
||||
<li>Recht auf Auskunft (Art. 15 DSGVO)</li>
|
||||
<li>Recht auf Berichtigung (Art. 16 DSGVO)</li>
|
||||
<li>Recht auf Loeschung (Art. 17 DSGVO)</li>
|
||||
<li>Recht auf Einschraenkung der Verarbeitung (Art. 18 DSGVO)</li>
|
||||
<li>Recht auf Datenuebertragbarkeit (Art. 20 DSGVO)</li>
|
||||
<li>Recht auf Widerspruch (Art. 21 DSGVO)</li>
|
||||
</ul>
|
||||
<p className="mt-2">
|
||||
Zur Ausuebung Ihrer Rechte wenden Sie sich an: datenschutz@breakpilot.ai
|
||||
</p>
|
||||
</div>
|
||||
|
||||
<div>
|
||||
<h2 className="text-lg font-semibold text-white mb-2">11. Beschwerderecht</h2>
|
||||
<p>
|
||||
Sie haben das Recht, sich bei einer Aufsichtsbehoerde zu beschweren.
|
||||
Zustaendig fuer uns ist:
|
||||
</p>
|
||||
<p className="mt-1">
|
||||
Die Landesbeauftragte fuer den Datenschutz Niedersachsen<br />
|
||||
Prinzenstrasse 5, 30159 Hannover<br />
|
||||
<a href="https://www.lfd.niedersachsen.de" className="text-accent-electric hover:underline" target="_blank" rel="noopener noreferrer">
|
||||
www.lfd.niedersachsen.de
|
||||
</a>
|
||||
</p>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
)
|
||||
}
|
||||
@@ -0,0 +1,302 @@
|
||||
/* Self-hosted fonts — kein Drittlandtransfer zu Google */
|
||||
@font-face {
|
||||
font-family: 'Inter';
|
||||
font-style: normal;
|
||||
font-weight: 300 900;
|
||||
font-display: swap;
|
||||
src: url('/fonts/Inter-Latin.woff2') format('woff2');
|
||||
unicode-range: U+0000-00FF, U+0131, U+0152-0153, U+02BB-02BC, U+02C6, U+02DA, U+02DC, U+0304, U+0308, U+0329, U+2000-206F, U+20AC, U+2122, U+2191, U+2193, U+2212, U+2215, U+FEFF, U+FFFD;
|
||||
}
|
||||
@font-face {
|
||||
font-family: 'Plus Jakarta Sans';
|
||||
font-style: normal;
|
||||
font-weight: 400 800;
|
||||
font-display: swap;
|
||||
src: url('/fonts/PlusJakartaSans-Latin.woff2') format('woff2');
|
||||
unicode-range: U+0000-00FF, U+0131, U+0152-0153, U+02BB-02BC, U+02C6, U+02DA, U+02DC, U+0304, U+0308, U+0329, U+2000-206F, U+20AC, U+2122, U+2191, U+2193, U+2212, U+2215, U+FEFF, U+FFFD;
|
||||
}
|
||||
@font-face {
|
||||
font-family: 'JetBrains Mono';
|
||||
font-style: normal;
|
||||
font-weight: 400 600;
|
||||
font-display: swap;
|
||||
src: url('/fonts/JetBrainsMono-Latin.woff2') format('woff2');
|
||||
unicode-range: U+0000-00FF, U+0131, U+0152-0153, U+02BB-02BC, U+02C6, U+02DA, U+02DC, U+0304, U+0308, U+0329, U+2000-206F, U+20AC, U+2122, U+2191, U+2193, U+2212, U+2215, U+FEFF, U+FFFD;
|
||||
}
|
||||
|
||||
@tailwind base;
|
||||
@tailwind components;
|
||||
@tailwind utilities;
|
||||
|
||||
:root {
|
||||
--bg-primary: #0a0a1a;
|
||||
--bg-secondary: #06060f;
|
||||
--bg-card: rgba(255, 255, 255, 0.06);
|
||||
--bg-card-hover: rgba(255, 255, 255, 0.10);
|
||||
--border-subtle: rgba(255, 255, 255, 0.08);
|
||||
--text-primary: #ffffff;
|
||||
--text-secondary: rgba(255, 255, 255, 0.6);
|
||||
--text-muted: rgba(255, 255, 255, 0.4);
|
||||
--accent-electric: #3b82f6;
|
||||
--accent-signal: #22c55e;
|
||||
--accent-indigo: #6366f1;
|
||||
--accent-purple: #a78bfa;
|
||||
--glass-bg: rgba(255, 255, 255, 0.06);
|
||||
--glass-border: rgba(255, 255, 255, 0.08);
|
||||
--glass-hover: rgba(255, 255, 255, 0.10);
|
||||
--scrollbar-thumb: rgba(255, 255, 255, 0.15);
|
||||
--scrollbar-hover: rgba(255, 255, 255, 0.25);
|
||||
}
|
||||
|
||||
* {
|
||||
margin: 0;
|
||||
padding: 0;
|
||||
box-sizing: border-box;
|
||||
}
|
||||
|
||||
html {
|
||||
scroll-behavior: smooth;
|
||||
}
|
||||
|
||||
body {
|
||||
background: var(--bg-primary);
|
||||
color: var(--text-primary);
|
||||
font-family: 'Inter', 'Plus Jakarta Sans', system-ui, sans-serif;
|
||||
}
|
||||
|
||||
::selection {
|
||||
background: rgba(59, 130, 246, 0.3);
|
||||
color: white;
|
||||
}
|
||||
|
||||
::-webkit-scrollbar {
|
||||
width: 6px;
|
||||
}
|
||||
|
||||
::-webkit-scrollbar-track {
|
||||
background: transparent;
|
||||
}
|
||||
|
||||
::-webkit-scrollbar-thumb {
|
||||
background: var(--scrollbar-thumb);
|
||||
border-radius: 3px;
|
||||
}
|
||||
|
||||
::-webkit-scrollbar-thumb:hover {
|
||||
background: var(--scrollbar-hover);
|
||||
}
|
||||
|
||||
@layer utilities {
|
||||
.glass {
|
||||
background: var(--glass-bg);
|
||||
backdrop-filter: blur(20px);
|
||||
-webkit-backdrop-filter: blur(20px);
|
||||
border: 1px solid var(--glass-border);
|
||||
}
|
||||
|
||||
.glass-hover:hover {
|
||||
background: var(--glass-hover);
|
||||
}
|
||||
|
||||
.gradient-text {
|
||||
background: linear-gradient(135deg, var(--accent-electric), var(--accent-indigo), var(--accent-purple));
|
||||
-webkit-background-clip: text;
|
||||
-webkit-text-fill-color: transparent;
|
||||
background-clip: text;
|
||||
}
|
||||
|
||||
.gradient-text-signal {
|
||||
background: linear-gradient(135deg, var(--accent-signal), #34d399, var(--accent-electric));
|
||||
-webkit-background-clip: text;
|
||||
-webkit-text-fill-color: transparent;
|
||||
background-clip: text;
|
||||
}
|
||||
|
||||
.text-shadow-glow {
|
||||
text-shadow: 0 0 40px rgba(59, 130, 246, 0.3);
|
||||
}
|
||||
|
||||
.glow-blue {
|
||||
box-shadow: 0 0 30px rgba(59, 130, 246, 0.15), 0 0 60px rgba(59, 130, 246, 0.05);
|
||||
}
|
||||
|
||||
.glow-signal {
|
||||
box-shadow: 0 0 20px rgba(34, 197, 94, 0.15);
|
||||
}
|
||||
|
||||
.mono-label {
|
||||
font-family: 'JetBrains Mono', monospace;
|
||||
font-size: 0.75rem;
|
||||
letter-spacing: 0.1em;
|
||||
text-transform: uppercase;
|
||||
color: var(--text-muted);
|
||||
}
|
||||
|
||||
.enterprise-grid {
|
||||
background-image:
|
||||
linear-gradient(rgba(255, 255, 255, 0.02) 1px, transparent 1px),
|
||||
linear-gradient(90deg, rgba(255, 255, 255, 0.02) 1px, transparent 1px);
|
||||
background-size: 60px 60px;
|
||||
}
|
||||
|
||||
.section-alt {
|
||||
background: var(--bg-secondary);
|
||||
}
|
||||
}
|
||||
|
||||
/* === Light Mode === */
|
||||
.theme-light {
|
||||
--bg-primary: #ffffff;
|
||||
--bg-secondary: #f8fafc;
|
||||
--bg-card: #f8fafc;
|
||||
--bg-card-hover: #f1f5f9;
|
||||
--border-subtle: #e2e8f0;
|
||||
--text-primary: #0f172a;
|
||||
--text-secondary: #334155;
|
||||
--text-muted: #64748b;
|
||||
--accent-electric: #2563eb;
|
||||
--accent-signal: #059669;
|
||||
--accent-indigo: #4f46e5;
|
||||
--accent-purple: #7c3aed;
|
||||
--glass-bg: #f8fafc;
|
||||
--glass-border: #e2e8f0;
|
||||
--glass-hover: #f1f5f9;
|
||||
--scrollbar-thumb: #cbd5e1;
|
||||
--scrollbar-hover: #94a3b8;
|
||||
}
|
||||
|
||||
.theme-light body {
|
||||
color: var(--text-primary);
|
||||
}
|
||||
|
||||
.theme-light ::selection {
|
||||
background: rgba(37, 99, 235, 0.15);
|
||||
color: #0f172a;
|
||||
}
|
||||
|
||||
/* Text overrides */
|
||||
.theme-light .text-white { color: #0f172a; }
|
||||
.theme-light .text-white\/80 { color: #1e293b; }
|
||||
.theme-light .text-white\/70 { color: #334155; }
|
||||
.theme-light .text-white\/60 { color: #475569; }
|
||||
.theme-light .text-white\/50 { color: #64748b; }
|
||||
.theme-light .text-white\/40 { color: #64748b; }
|
||||
.theme-light .text-white\/30 { color: #94a3b8; }
|
||||
.theme-light .text-white\/20 { color: #cbd5e1; }
|
||||
|
||||
/* Card backgrounds */
|
||||
.theme-light .bg-white\/\[0\.06\],
|
||||
.theme-light .bg-white\/\[0\.04\],
|
||||
.theme-light .bg-white\/\[0\.03\] {
|
||||
background-color: #f8fafc !important;
|
||||
}
|
||||
|
||||
.theme-light .border-white\/\[0\.08\],
|
||||
.theme-light .border-white\/\[0\.06\],
|
||||
.theme-light .border-white\/10 {
|
||||
border-color: #e2e8f0 !important;
|
||||
}
|
||||
|
||||
/* No blur in light mode */
|
||||
.theme-light .backdrop-blur-xl,
|
||||
.theme-light .backdrop-blur {
|
||||
backdrop-filter: none !important;
|
||||
-webkit-backdrop-filter: none !important;
|
||||
}
|
||||
|
||||
/* Navbar */
|
||||
.theme-light .bg-enterprise-dark\/80 {
|
||||
background-color: rgba(255, 255, 255, 0.9) !important;
|
||||
}
|
||||
|
||||
/* Enterprise grid */
|
||||
.theme-light .enterprise-grid {
|
||||
background-image:
|
||||
linear-gradient(rgba(0, 0, 0, 0.03) 1px, transparent 1px),
|
||||
linear-gradient(90deg, rgba(0, 0, 0, 0.03) 1px, transparent 1px);
|
||||
}
|
||||
|
||||
/* Gradient text — stronger in light */
|
||||
.theme-light .gradient-text {
|
||||
background: linear-gradient(135deg, #2563eb, #4f46e5, #7c3aed) !important;
|
||||
-webkit-background-clip: text !important;
|
||||
background-clip: text !important;
|
||||
}
|
||||
|
||||
.theme-light .gradient-text-signal {
|
||||
background: linear-gradient(135deg, #059669, #10b981, #2563eb) !important;
|
||||
-webkit-background-clip: text !important;
|
||||
background-clip: text !important;
|
||||
}
|
||||
|
||||
/* Mono label */
|
||||
.theme-light .mono-label { color: #64748b; }
|
||||
|
||||
/* Status dots */
|
||||
.theme-light .text-shadow-glow { text-shadow: none; }
|
||||
.theme-light .glow-blue { box-shadow: 0 4px 14px -3px rgba(37, 99, 235, 0.15); }
|
||||
|
||||
/* Accent backgrounds */
|
||||
.theme-light .bg-accent-electric\/10 { background-color: #eff6ff !important; }
|
||||
.theme-light .bg-accent-electric\/5 { background-color: #f0f9ff !important; }
|
||||
.theme-light .bg-accent-indigo\/10 { background-color: #eef2ff !important; }
|
||||
.theme-light .bg-accent-indigo\/5 { background-color: #eef2ff !important; }
|
||||
.theme-light .bg-accent-purple\/10 { background-color: #faf5ff !important; }
|
||||
.theme-light .bg-accent-purple\/\[0\.04\] { background-color: #faf5ff !important; }
|
||||
.theme-light .bg-amber-500\/10 { background-color: #fefce8 !important; }
|
||||
|
||||
/* Colored borders */
|
||||
.theme-light .border-red-500\/20 { border-color: #fecaca !important; }
|
||||
.theme-light .border-red-500\/15 { border-color: #fecaca !important; }
|
||||
.theme-light .border-green-500\/20 { border-color: #bbf7d0 !important; }
|
||||
.theme-light .border-green-500\/15 { border-color: #bbf7d0 !important; }
|
||||
.theme-light .border-accent-electric\/30 { border-color: #bfdbfe !important; }
|
||||
.theme-light .border-accent-indigo\/30 { border-color: #c7d2fe !important; }
|
||||
.theme-light .border-accent-purple\/30 { border-color: #ddd6fe !important; }
|
||||
.theme-light .border-accent-purple\/20 { border-color: #e9d5ff !important; }
|
||||
.theme-light .border-accent-electric\/20 { border-color: #bfdbfe !important; }
|
||||
|
||||
/* Colored text */
|
||||
.theme-light .text-red-400 { color: #dc2626 !important; }
|
||||
.theme-light .text-green-400 { color: #059669 !important; }
|
||||
.theme-light .text-amber-400 { color: #d97706 !important; }
|
||||
.theme-light .text-accent-electric { color: #2563eb !important; }
|
||||
.theme-light .text-accent-indigo { color: #4f46e5 !important; }
|
||||
.theme-light .text-accent-purple { color: #7c3aed !important; }
|
||||
.theme-light .text-accent-signal\/80 { color: #059669 !important; }
|
||||
|
||||
/* Colored backgrounds for tinted cards */
|
||||
.theme-light .bg-red-500\/\[0\.04\] { background-color: #fef2f2 !important; }
|
||||
.theme-light .bg-red-500\/\[0\.03\] { background-color: #fef2f2 !important; }
|
||||
.theme-light .bg-green-500\/\[0\.04\] { background-color: #f0fdf4 !important; }
|
||||
.theme-light .bg-green-500\/\[0\.03\] { background-color: #f0fdf4 !important; }
|
||||
.theme-light .bg-red-500\/10 { background-color: #fef2f2 !important; }
|
||||
.theme-light .bg-blue-500\/10 { background-color: #eff6ff !important; }
|
||||
.theme-light .bg-green-500\/10 { background-color: #f0fdf4 !important; }
|
||||
|
||||
/* Terminal / code blocks */
|
||||
.theme-light .bg-enterprise-darker {
|
||||
background-color: #f1f5f9 !important;
|
||||
}
|
||||
|
||||
/* Chat panel */
|
||||
.theme-light .bg-black\/90 {
|
||||
background-color: #ffffff !important;
|
||||
border: 1px solid #e2e8f0 !important;
|
||||
}
|
||||
.theme-light .bg-black\/60 {
|
||||
background-color: rgba(0, 0, 0, 0.1) !important;
|
||||
}
|
||||
|
||||
/* Hover states */
|
||||
.theme-light .hover\:bg-white\/\[0\.06\]:hover,
|
||||
.theme-light .hover\:bg-white\/\[0\.04\]:hover { background-color: #f1f5f9 !important; }
|
||||
.theme-light .hover\:bg-white\/20:hover { background-color: #e2e8f0 !important; }
|
||||
|
||||
/* Shadows */
|
||||
.theme-light .shadow-lg { box-shadow: 0 4px 6px -1px rgba(0,0,0,0.06) !important; }
|
||||
.theme-light .shadow-2xl { box-shadow: 0 10px 25px -5px rgba(0,0,0,0.08) !important; }
|
||||
|
||||
/* Table */
|
||||
.theme-light .hover\:bg-white\/\[0\.02\]:hover { background-color: #f8fafc !important; }
|
||||
.theme-light .bg-white\/\[0\.02\] { background-color: #f8fafc !important; }
|
||||
@@ -0,0 +1,10 @@
|
||||
<svg width="32" height="32" viewBox="0 0 32 32" fill="none" xmlns="http://www.w3.org/2000/svg">
|
||||
<rect width="32" height="32" rx="8" fill="url(#g)"/>
|
||||
<text x="16" y="22" text-anchor="middle" font-family="Inter, sans-serif" font-weight="700" font-size="18" fill="white">B</text>
|
||||
<defs>
|
||||
<linearGradient id="g" x1="0" y1="0" x2="32" y2="32">
|
||||
<stop stop-color="#3b82f6"/>
|
||||
<stop offset="1" stop-color="#6366f1"/>
|
||||
</linearGradient>
|
||||
</defs>
|
||||
</svg>
|
||||
|
After Width: | Height: | Size: 468 B |
@@ -0,0 +1,62 @@
|
||||
import Link from 'next/link'
|
||||
|
||||
export default function ImpressumPage() {
|
||||
return (
|
||||
<div className="min-h-screen bg-enterprise-dark text-white">
|
||||
<div className="max-w-3xl mx-auto px-4 py-24">
|
||||
<Link href="/" className="text-sm text-white/40 hover:text-white/60 transition-colors mb-8 inline-block">
|
||||
← Zurueck zur Startseite
|
||||
</Link>
|
||||
|
||||
<h1 className="text-4xl font-bold mb-8">Impressum</h1>
|
||||
|
||||
<div className="space-y-6 text-white/60 text-sm">
|
||||
<div>
|
||||
<h2 className="text-lg font-semibold text-white mb-2">Angaben gemaess Paragraph 5 TMG</h2>
|
||||
<p>BreakPilot GmbH (i.Gr.)</p>
|
||||
<p>[Adresse wird nach Gruendung ergaenzt]</p>
|
||||
</div>
|
||||
|
||||
<div>
|
||||
<h2 className="text-lg font-semibold text-white mb-2">Kontakt</h2>
|
||||
<p>E-Mail: info@breakpilot.ai</p>
|
||||
</div>
|
||||
|
||||
<div>
|
||||
<h2 className="text-lg font-semibold text-white mb-2">Verantwortlich fuer den Inhalt nach Paragraph 18 Abs. 2 MStV</h2>
|
||||
<p>[Wird nach Gruendung ergaenzt]</p>
|
||||
</div>
|
||||
|
||||
<div>
|
||||
<h2 className="text-lg font-semibold text-white mb-2">EU-Streitschlichtung</h2>
|
||||
<p>
|
||||
Die Europaeische Kommission stellt eine Plattform zur Online-Streitbeilegung (OS) bereit.
|
||||
Unsere E-Mail-Adresse finden Sie oben im Impressum.
|
||||
</p>
|
||||
</div>
|
||||
|
||||
<div>
|
||||
<h2 className="text-lg font-semibold text-white mb-2">Quellen und Lizenzen der Compliance-Inhalte</h2>
|
||||
<p>
|
||||
Die BreakPilot Compliance-Plattform stuetzt sich auf rund 315.000 klassifizierte
|
||||
Controls aus oeffentlichen Quellen: EU-Recht (EUR-Lex), deutsches und oesterreichisches
|
||||
Bundesrecht, US Federal Code (OSHA, NIST), Behoerden-Leitfaeden (ENISA, EDPB, BAuA),
|
||||
freie Sicherheits-Frameworks unter CC-BY-SA (OWASP-Familie, OECD AI Principles) und
|
||||
eigene Texte. Jeder Control traegt eine deterministische Lizenzregel (R1 woertlich, R2
|
||||
mit Attribution, R3 nur Identifier-Verweis), die das Render-Verhalten in Berichten,
|
||||
PDF-Exports und Frontend steuert. Die vollstaendige Quellenliste mit Aufschluesselung
|
||||
pro Lizenzklasse ist im SDK unter <code className="text-white/80">/sdk/licenses</code>
|
||||
eingesehen. Pflicht-Attributionen fuer R2-Quellen erscheinen automatisch im
|
||||
Quellen-Footer jedes generierten Berichts.
|
||||
</p>
|
||||
<p className="mt-2 text-xs">
|
||||
Hinweis: Dieser Pauschalvermerk ersetzt nicht die werknahe Attribution. Jede
|
||||
Berichts- oder Frontend-Ausgabe nennt die konkret verwendeten Quellen direkt am
|
||||
Werk (Auto-Footer in PDFs, Inline-Citation im Frontend).
|
||||
</p>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
)
|
||||
}
|
||||
@@ -0,0 +1,36 @@
|
||||
import type { Metadata } from 'next'
|
||||
import { AppProvider } from '@/lib/context'
|
||||
import ConsentBanner from '@/components/layout/ConsentBanner'
|
||||
import ScriptManager from '@/components/layout/ScriptManager'
|
||||
import './globals.css'
|
||||
|
||||
export const metadata: Metadata = {
|
||||
title: 'BreakPilot | Deterministic Regulatory Engineering Platform',
|
||||
description: 'Deterministische regulatorische Analyse für Maschinenbau, Fertigung und kritische Infrastruktur. Keine Halluzinationen. Keine US-Cloud. Volle Nachvollziehbarkeit.',
|
||||
keywords: ['Compliance', 'Regulatory Engineering', 'CE-Kennzeichnung', 'Maschinenverordnung', 'DSGVO', 'NIS2', 'AI Act', 'Sovereign AI', 'CRA', 'OTA'],
|
||||
robots: { index: true, follow: true },
|
||||
openGraph: {
|
||||
title: 'BreakPilot | Deterministic Regulatory Engineering',
|
||||
description: 'Deterministische regulatorische Analyse. Keine Halluzinationen. Keine Compliance-Lücken.',
|
||||
type: 'website',
|
||||
locale: 'de_DE',
|
||||
},
|
||||
}
|
||||
|
||||
export default function RootLayout({
|
||||
children,
|
||||
}: {
|
||||
children: React.ReactNode
|
||||
}) {
|
||||
return (
|
||||
<html lang="de">
|
||||
<body className="antialiased">
|
||||
<AppProvider>
|
||||
{children}
|
||||
<ConsentBanner />
|
||||
<ScriptManager />
|
||||
</AppProvider>
|
||||
</body>
|
||||
</html>
|
||||
)
|
||||
}
|
||||
@@ -0,0 +1,25 @@
|
||||
import Navbar from '@/components/layout/Navbar'
|
||||
import Footer from '@/components/layout/Footer'
|
||||
import ChatFAB from '@/components/layout/ChatFAB'
|
||||
import HeroSection from '@/components/sections/HeroSection'
|
||||
import ProblemFlowSection from '@/components/sections/ProblemFlowSection'
|
||||
import SavingsSection from '@/components/sections/SavingsSection'
|
||||
import UseCaseCards from '@/components/sections/UseCaseCards'
|
||||
import TrustBar from '@/components/sections/TrustBar'
|
||||
|
||||
export default function HomePage() {
|
||||
return (
|
||||
<>
|
||||
<Navbar />
|
||||
<main>
|
||||
<HeroSection />
|
||||
<ProblemFlowSection />
|
||||
<SavingsSection />
|
||||
<UseCaseCards />
|
||||
<TrustBar />
|
||||
</main>
|
||||
<Footer />
|
||||
<ChatFAB />
|
||||
</>
|
||||
)
|
||||
}
|
||||
@@ -0,0 +1,20 @@
|
||||
import Navbar from '@/components/layout/Navbar'
|
||||
import Footer from '@/components/layout/Footer'
|
||||
import ChatFAB from '@/components/layout/ChatFAB'
|
||||
import PlatformBridgeSection from '@/components/sections/PlatformBridgeSection'
|
||||
import ComparisonSection from '@/components/sections/ComparisonSection'
|
||||
import ContinuousSection from '@/components/sections/ContinuousSection'
|
||||
|
||||
export default function PlattformPage() {
|
||||
return (
|
||||
<>
|
||||
<Navbar />
|
||||
<div className="pt-20" />
|
||||
<PlatformBridgeSection />
|
||||
<ComparisonSection />
|
||||
<ContinuousSection />
|
||||
<Footer />
|
||||
<ChatFAB />
|
||||
</>
|
||||
)
|
||||
}
|
||||
@@ -0,0 +1,15 @@
|
||||
import Navbar from '@/components/layout/Navbar'
|
||||
import Footer from '@/components/layout/Footer'
|
||||
import ChatFAB from '@/components/layout/ChatFAB'
|
||||
import PricingSection from '@/components/sections/PricingSection'
|
||||
|
||||
export default function PreisePage() {
|
||||
return (
|
||||
<>
|
||||
<Navbar />
|
||||
<PricingSection />
|
||||
<Footer />
|
||||
<ChatFAB />
|
||||
</>
|
||||
)
|
||||
}
|
||||
@@ -0,0 +1,34 @@
|
||||
import Navbar from '@/components/layout/Navbar'
|
||||
import Footer from '@/components/layout/Footer'
|
||||
import ChatFAB from '@/components/layout/ChatFAB'
|
||||
import PageHeader from '@/components/ui/PageHeader'
|
||||
import DeltaImpactSection from '@/components/sections/DeltaImpactSection'
|
||||
import SecurityToolchainSection from '@/components/sections/SecurityToolchainSection'
|
||||
import CRAFahrplanSection from '@/components/sections/CRAFahrplanSection'
|
||||
import SafetySection from '@/components/sections/SafetySection'
|
||||
import TargetSection from '@/components/sections/TargetSection'
|
||||
|
||||
export default function ProductCompliancePage() {
|
||||
return (
|
||||
<>
|
||||
<Navbar />
|
||||
<main>
|
||||
<div className="max-w-7xl mx-auto px-4 sm:px-6 lg:px-8">
|
||||
<PageHeader
|
||||
tag="PRODUCT COMPLIANCE"
|
||||
title="Muss ich mein Produkt"
|
||||
titleHighlight="redesignen?"
|
||||
subtitle="Delta-Impact-Analyse für bestehende Produkte. CRA, RED, Maschinenverordnung — priorisiert statt aufgelistet."
|
||||
/>
|
||||
</div>
|
||||
<DeltaImpactSection />
|
||||
<SecurityToolchainSection />
|
||||
<CRAFahrplanSection />
|
||||
<SafetySection />
|
||||
<TargetSection />
|
||||
</main>
|
||||
<Footer />
|
||||
<ChatFAB />
|
||||
</>
|
||||
)
|
||||
}
|
||||
@@ -0,0 +1,213 @@
|
||||
import Navbar from '@/components/layout/Navbar'
|
||||
import Footer from '@/components/layout/Footer'
|
||||
import ChatFAB from '@/components/layout/ChatFAB'
|
||||
import PageHeader from '@/components/ui/PageHeader'
|
||||
import GlassCard from '@/components/ui/GlassCard'
|
||||
import FadeInView from '@/components/ui/FadeInView'
|
||||
import { Database, Layers, Calculator, AlertTriangle, Globe, Cookie } from 'lucide-react'
|
||||
|
||||
export default function SavingsMethodikPage() {
|
||||
return (
|
||||
<>
|
||||
<Navbar />
|
||||
<main>
|
||||
<div className="max-w-7xl mx-auto px-4 sm:px-6 lg:px-8">
|
||||
<PageHeader
|
||||
tag="METHODIK"
|
||||
title="Wie Cookies"
|
||||
titleHighlight="Marketing-Budgets entlarven"
|
||||
subtitle="4-Stufen-Analyse: vom rohen Cookie-Footprint zur fundierten Saving-Schaetzung. Jede Stufe nachvollziehbar, jede Zahl mit Quelle, jede Annahme transparent."
|
||||
/>
|
||||
</div>
|
||||
|
||||
{/* 4 Stufen */}
|
||||
<section className="py-12 sm:py-16">
|
||||
<div className="max-w-5xl mx-auto px-4 sm:px-6 lg:px-8 space-y-8">
|
||||
<FadeInView>
|
||||
<GlassCard>
|
||||
<div className="flex items-start gap-4">
|
||||
<div className="w-12 h-12 rounded-xl bg-emerald-500/10 flex items-center justify-center flex-shrink-0">
|
||||
<Cookie className="w-6 h-6 text-emerald-400" />
|
||||
</div>
|
||||
<div>
|
||||
<div className="mono-label text-emerald-400 mb-1">STUFE 1</div>
|
||||
<h3 className="text-xl font-bold mb-2">Cookie-Footprint extrahieren</h3>
|
||||
<p className="text-sm text-white/60 mb-3">
|
||||
Playwright laedt die Webseite vollstaendig (inkl. JavaScript-Rendering)
|
||||
und erfasst jeden gesetzten Cookie + jeden CMP-Payload
|
||||
(ePaaS, OneTrust, Usercentrics, Cookiebot, Didomi, TrustArc).
|
||||
</p>
|
||||
<ul className="text-sm text-white/50 space-y-1">
|
||||
<li>• Cookie-Namen, Werte, Domains, Lifetimes</li>
|
||||
<li>• IAB TCF v2.2 Vendor-Liste auswerten (Vendor-IDs zur eindeutigen Zuordnung)</li>
|
||||
<li>• Drittanbieter-Quote pro Cookie</li>
|
||||
<li>• Premium-Feature-Cookies erkennen (z.B. <code className="text-emerald-300">s_target_qa</code> = Adobe Target Enterprise)</li>
|
||||
</ul>
|
||||
</div>
|
||||
</div>
|
||||
</GlassCard>
|
||||
</FadeInView>
|
||||
|
||||
<FadeInView delay={0.1}>
|
||||
<GlassCard>
|
||||
<div className="flex items-start gap-4">
|
||||
<div className="w-12 h-12 rounded-xl bg-emerald-500/10 flex items-center justify-center flex-shrink-0">
|
||||
<Database className="w-6 h-6 text-emerald-400" />
|
||||
</div>
|
||||
<div>
|
||||
<div className="mono-label text-emerald-400 mb-1">STUFE 2</div>
|
||||
<h3 className="text-xl font-bold mb-2">Wissens-Datenbank-Abgleich</h3>
|
||||
<p className="text-sm text-white/60 mb-3">
|
||||
Jeder Cookie wird gegen unsere kuratierte Wissens-DB mit derzeit
|
||||
~50 Top-Vendors abgeglichen. Pro Cookie wissen wir:
|
||||
</p>
|
||||
<ul className="text-sm text-white/50 space-y-1">
|
||||
<li>• Setzender Anbieter + Sitzland</li>
|
||||
<li>• Exakter funktionaler Zweck (nicht nur Kategorie)</li>
|
||||
<li>• Welche Datenfelder gesammelt werden (Client-ID, IP, etc.)</li>
|
||||
<li>• Re-Identifikations-Risiko (low/medium/high)</li>
|
||||
<li>• §25(2) TDDDG technische Notwendigkeit</li>
|
||||
<li>• Schrems-II-Status + relevante EuGH-/CNIL-Urteile</li>
|
||||
<li>• Konkreter EU-Alternativ-Cookie + EU-Alternativ-Vendor</li>
|
||||
</ul>
|
||||
</div>
|
||||
</div>
|
||||
</GlassCard>
|
||||
</FadeInView>
|
||||
|
||||
<FadeInView delay={0.2}>
|
||||
<GlassCard>
|
||||
<div className="flex items-start gap-4">
|
||||
<div className="w-12 h-12 rounded-xl bg-emerald-500/10 flex items-center justify-center flex-shrink-0">
|
||||
<Layers className="w-6 h-6 text-emerald-400" />
|
||||
</div>
|
||||
<div>
|
||||
<div className="mono-label text-emerald-400 mb-1">STUFE 3</div>
|
||||
<h3 className="text-xl font-bold mb-2">Tier-Inferenz + Funktionale Kategorisierung</h3>
|
||||
<p className="text-sm text-white/60 mb-3">
|
||||
Pro Vendor leiten wir das Pricing-Tier aus dem Cookie-Footprint ab:
|
||||
</p>
|
||||
<ul className="text-sm text-white/50 space-y-1">
|
||||
<li>• <strong className="text-white/80"><10 Cookies</strong> = Starter-Plan</li>
|
||||
<li>• <strong className="text-white/80">10-30 Cookies</strong> = Professional / Mid-Market</li>
|
||||
<li>• <strong className="text-white/80">30-60 Cookies</strong> = Enterprise</li>
|
||||
<li>• <strong className="text-white/80">>60 Cookies + Premium-Features</strong> = Premier-Tier</li>
|
||||
</ul>
|
||||
<p className="text-sm text-white/60 mt-3 mb-2">
|
||||
Parallel werden alle Vendors funktional klassifiziert (Web-Analytics,
|
||||
Werbung, CDN, Marketing-Automation, …). Mehrere Vendors in
|
||||
derselben Kategorie = Konsolidierungs-Kandidat.
|
||||
</p>
|
||||
</div>
|
||||
</div>
|
||||
</GlassCard>
|
||||
</FadeInView>
|
||||
|
||||
<FadeInView delay={0.3}>
|
||||
<GlassCard>
|
||||
<div className="flex items-start gap-4">
|
||||
<div className="w-12 h-12 rounded-xl bg-emerald-500/10 flex items-center justify-center flex-shrink-0">
|
||||
<Calculator className="w-6 h-6 text-emerald-400" />
|
||||
</div>
|
||||
<div>
|
||||
<div className="mono-label text-emerald-400 mb-1">STUFE 4</div>
|
||||
<h3 className="text-xl font-bold mb-2">Kosten-Schaetzung + EU-Konsolidierung</h3>
|
||||
<p className="text-sm text-white/60 mb-3">
|
||||
Pro Tier multiplizieren wir mit unseren Pricing-Lookups
|
||||
(Gartner/Forrester 2025 + oeffentliche Listpreise).
|
||||
Ergebnis: jaehrlicher Kostenbereich pro Vendor.
|
||||
</p>
|
||||
<ul className="text-sm text-white/50 space-y-1">
|
||||
<li>• Master-Vertrag-Dedupe (1 Adobe-Lizenz, viele Features)</li>
|
||||
<li>• EU-Alternative mit gleicher Funktion + Listpreis</li>
|
||||
<li>• Multi-Funktions-Tools die mehrere Kategorien gleichzeitig ersetzen</li>
|
||||
<li>• Sparpotenzial = Aktuelle Listpreise − EU-Tool-Listpreis</li>
|
||||
</ul>
|
||||
</div>
|
||||
</div>
|
||||
</GlassCard>
|
||||
</FadeInView>
|
||||
</div>
|
||||
</section>
|
||||
|
||||
{/* Caveats — ehrlich */}
|
||||
<section className="py-12 sm:py-16 bg-amber-500/[0.03]">
|
||||
<div className="max-w-5xl mx-auto px-4 sm:px-6 lg:px-8">
|
||||
<h2 className="text-2xl font-bold mb-6 flex items-center gap-3">
|
||||
<AlertTriangle className="w-6 h-6 text-amber-400" />
|
||||
Was wir NICHT versprechen
|
||||
</h2>
|
||||
<div className="grid grid-cols-1 md:grid-cols-2 gap-6">
|
||||
<GlassCard>
|
||||
<h3 className="font-bold mb-2">Listpreise ≠ Vertragspreise</h3>
|
||||
<p className="text-sm text-white/60">
|
||||
Konzern-Konditionen liegen ueblicherweise 30–50% unter Listpreis.
|
||||
Wir geben Bereiche an, nicht exakte Zahlen. Verifikation mit dem
|
||||
eigenen Einkauf ist Pflicht.
|
||||
</p>
|
||||
</GlassCard>
|
||||
<GlassCard>
|
||||
<h3 className="font-bold mb-2">Funktionale Redundanz ≠ Strategische Redundanz</h3>
|
||||
<p className="text-sm text-white/60">
|
||||
Mehrere Analytics-Tools koennen legitim sein (A/B-Test, regional split,
|
||||
Marketing vs Produkt). Wir nennen die bekannten Gruende explizit.
|
||||
</p>
|
||||
</GlassCard>
|
||||
<GlassCard>
|
||||
<h3 className="font-bold mb-2">Media-Spend nicht enthalten</h3>
|
||||
<p className="text-sm text-white/60">
|
||||
Google-Ads-/Meta-Ads-/Programmatic-Budget ist NICHT in der Saving-
|
||||
Schaetzung. Nur Tool-Lizenzen. Media-Optimierung ist ein separates
|
||||
Thema.
|
||||
</p>
|
||||
</GlassCard>
|
||||
<GlassCard>
|
||||
<h3 className="font-bold mb-2">Migrations-Kosten nicht abgezogen</h3>
|
||||
<p className="text-sm text-white/60">
|
||||
Tool-Wechsel kostet Zeit + interne Implementation. Faustregel:
|
||||
3-6 Monate Amortisation einrechnen. Saving-Schaetzung ist Brutto.
|
||||
</p>
|
||||
</GlassCard>
|
||||
</div>
|
||||
</div>
|
||||
</section>
|
||||
|
||||
{/* Datenquellen */}
|
||||
<section className="py-12 sm:py-16">
|
||||
<div className="max-w-5xl mx-auto px-4 sm:px-6 lg:px-8">
|
||||
<h2 className="text-2xl font-bold mb-6 flex items-center gap-3">
|
||||
<Globe className="w-6 h-6 text-emerald-400" />
|
||||
Datenquellen + Updates
|
||||
</h2>
|
||||
<GlassCard>
|
||||
<ul className="text-sm text-white/60 space-y-3">
|
||||
<li>
|
||||
<strong className="text-white/90">Cookie-Wissen:</strong>{' '}
|
||||
Cookiepedia, IAB Europe TCF v2.2 Vendor-Liste, Cookiebot Public DB,
|
||||
Vendor-eigene Dokumentation
|
||||
</li>
|
||||
<li>
|
||||
<strong className="text-white/90">Pricing:</strong>{' '}
|
||||
Gartner Hype Cycle 2025, Forrester Wave MarTech 2025, oeffentliche
|
||||
Pricing-Pages, anonymisierte Kundengespraeche
|
||||
</li>
|
||||
<li>
|
||||
<strong className="text-white/90">Regulatorik:</strong>{' '}
|
||||
EDPB Cookie Guidelines 2/2023, DSK-Orientierungshilfe Telemedien 2024,
|
||||
CNIL Cookies-Recommendations
|
||||
</li>
|
||||
<li>
|
||||
<strong className="text-white/90">Updates:</strong>{' '}
|
||||
DB wird kontinuierlich gepflegt. Neue Kunden geben uns Ground-Truth
|
||||
fuer Kalibrierung.
|
||||
</li>
|
||||
</ul>
|
||||
</GlassCard>
|
||||
</div>
|
||||
</section>
|
||||
</main>
|
||||
<Footer />
|
||||
<ChatFAB />
|
||||
</>
|
||||
)
|
||||
}
|
||||
@@ -0,0 +1,249 @@
|
||||
'use client'
|
||||
|
||||
import { useEffect, useRef, useState } from 'react'
|
||||
import Navbar from '@/components/layout/Navbar'
|
||||
import Footer from '@/components/layout/Footer'
|
||||
import ChatFAB from '@/components/layout/ChatFAB'
|
||||
import PageHeader from '@/components/ui/PageHeader'
|
||||
import GlassCard from '@/components/ui/GlassCard'
|
||||
import FadeInView from '@/components/ui/FadeInView'
|
||||
import { Cookie, ShieldCheck, Mail, ArrowRight, CheckCircle2, AlertTriangle } from 'lucide-react'
|
||||
|
||||
export default function SavingsScanPage() {
|
||||
const [url, setUrl] = useState('')
|
||||
const [email, setEmail] = useState('')
|
||||
const [consent, setConsent] = useState(true)
|
||||
const [submitting, setSubmitting] = useState(false)
|
||||
const [done, setDone] = useState(false)
|
||||
const [checkId, setCheckId] = useState<string | null>(null)
|
||||
const [progress, setProgress] = useState<string>('')
|
||||
const [progressPct, setProgressPct] = useState<number>(0)
|
||||
const [error, setError] = useState<string | null>(null)
|
||||
const pollingRef = useRef<boolean>(false)
|
||||
|
||||
async function handleSubmit(e: React.FormEvent) {
|
||||
e.preventDefault()
|
||||
if (!url || !email) return
|
||||
setError(null)
|
||||
setSubmitting(true)
|
||||
try {
|
||||
const res = await fetch('/api/scan/start', {
|
||||
method: 'POST',
|
||||
headers: { 'Content-Type': 'application/json' },
|
||||
body: JSON.stringify({ url, email, consent }),
|
||||
})
|
||||
const data = await res.json()
|
||||
if (!res.ok) {
|
||||
setError(data.detail || data.error || 'Scan konnte nicht gestartet werden')
|
||||
return
|
||||
}
|
||||
setCheckId(data.check_id)
|
||||
setDone(true)
|
||||
} catch {
|
||||
setError('Netzwerkfehler — bitte erneut versuchen.')
|
||||
} finally {
|
||||
setSubmitting(false)
|
||||
}
|
||||
}
|
||||
|
||||
useEffect(() => {
|
||||
if (!checkId || pollingRef.current) return
|
||||
pollingRef.current = true
|
||||
let cancelled = false
|
||||
const poll = async () => {
|
||||
for (let i = 0; i < 60 && !cancelled; i++) {
|
||||
await new Promise(r => setTimeout(r, 5000))
|
||||
try {
|
||||
const res = await fetch(`/api/scan/status/${checkId}`)
|
||||
const data = await res.json()
|
||||
if (data.progress) setProgress(data.progress)
|
||||
if (typeof data.progress_pct === 'number') setProgressPct(data.progress_pct)
|
||||
if (['completed', 'failed', 'skipped_tdm'].includes(data.status)) {
|
||||
if (data.status !== 'completed') {
|
||||
setError(data.error || 'Scan abgebrochen')
|
||||
}
|
||||
return
|
||||
}
|
||||
} catch { /* retry */ }
|
||||
}
|
||||
}
|
||||
poll()
|
||||
return () => { cancelled = true; pollingRef.current = false }
|
||||
}, [checkId])
|
||||
|
||||
return (
|
||||
<>
|
||||
<Navbar />
|
||||
<main>
|
||||
<div className="max-w-7xl mx-auto px-4 sm:px-6 lg:px-8">
|
||||
<PageHeader
|
||||
tag="KOSTENLOSER SAVING-SCAN"
|
||||
title="In 5 Minuten zur"
|
||||
titleHighlight="sechsstelligen Saving-Schaetzung"
|
||||
subtitle="URL eingeben — wir analysieren alle Cookies, identifizieren redundante Anbieter und schaetzen jaehrliche Einsparung. Kostenlos, ohne Login, ohne Vertrieb-Termin."
|
||||
/>
|
||||
</div>
|
||||
|
||||
<section className="py-12 sm:py-16">
|
||||
<div className="max-w-3xl mx-auto px-4 sm:px-6 lg:px-8">
|
||||
{!done ? (
|
||||
<GlassCard>
|
||||
<form onSubmit={handleSubmit} className="space-y-5">
|
||||
<div>
|
||||
<label htmlFor="url" className="block text-sm font-medium text-white/70 mb-2">
|
||||
Website-URL <span className="text-red-400">*</span>
|
||||
</label>
|
||||
<input
|
||||
id="url"
|
||||
type="url"
|
||||
required
|
||||
placeholder="https://www.ihre-firma.de"
|
||||
value={url}
|
||||
onChange={e => setUrl(e.target.value)}
|
||||
className="w-full px-4 py-3 rounded-lg bg-white/[0.04] border border-white/10
|
||||
text-white placeholder-white/30 focus:border-emerald-400 focus:outline-none"
|
||||
/>
|
||||
<p className="mt-1 text-xs text-white/40">
|
||||
Wir crawlen die Startseite + automatisch erkennbare Unterseiten
|
||||
(DSI, Impressum, Cookie-Richtlinie).
|
||||
</p>
|
||||
</div>
|
||||
|
||||
<div>
|
||||
<label htmlFor="email" className="block text-sm font-medium text-white/70 mb-2">
|
||||
E-Mail fuer den Bericht <span className="text-red-400">*</span>
|
||||
</label>
|
||||
<input
|
||||
id="email"
|
||||
type="email"
|
||||
required
|
||||
placeholder="ihr.name@firma.de"
|
||||
value={email}
|
||||
onChange={e => setEmail(e.target.value)}
|
||||
className="w-full px-4 py-3 rounded-lg bg-white/[0.04] border border-white/10
|
||||
text-white placeholder-white/30 focus:border-emerald-400 focus:outline-none"
|
||||
/>
|
||||
<p className="mt-1 text-xs text-white/40">
|
||||
Bericht kommt als PDF + JSON. Die Mailadresse wird ausschliesslich
|
||||
fuer diesen Scan verwendet.
|
||||
</p>
|
||||
</div>
|
||||
|
||||
<label className="flex items-start gap-2 text-xs text-white/60 cursor-pointer">
|
||||
<input type="checkbox" checked={consent} onChange={e => setConsent(e.target.checked)}
|
||||
className="mt-0.5 accent-emerald-500" />
|
||||
<span>
|
||||
Ich stimme zu, dass meine E-Mail fuer den Saving-Report + ein
|
||||
einmaliges Sales-Follow-Up genutzt wird. Widerruf jederzeit per E-Mail.
|
||||
</span>
|
||||
</label>
|
||||
|
||||
{error && (
|
||||
<p className="text-sm text-amber-300/80 bg-amber-500/10 border border-amber-400/30 rounded px-3 py-2">
|
||||
{error}
|
||||
</p>
|
||||
)}
|
||||
|
||||
<button
|
||||
type="submit"
|
||||
disabled={submitting || !consent}
|
||||
className="inline-flex items-center gap-2 px-6 py-3 rounded-full
|
||||
bg-emerald-500 hover:bg-emerald-400 transition-colors
|
||||
text-enterprise-dark font-semibold disabled:opacity-50"
|
||||
>
|
||||
{submitting ? 'Wird gestartet …' : 'Saving-Scan starten'}
|
||||
<ArrowRight className="w-4 h-4" />
|
||||
</button>
|
||||
|
||||
<p className="text-xs text-white/40 pt-2">
|
||||
Wir analysieren ausschliesslich oeffentlich abrufbare Daten Ihrer Website
|
||||
unter Beachtung maschinenlesbarer Nutzungsvorbehalte (§ 44b UrhG).
|
||||
Pro Domain max. 1 Scan / 24h. Ergebnis innerhalb von ~3-5 Minuten per E-Mail.
|
||||
</p>
|
||||
</form>
|
||||
</GlassCard>
|
||||
) : (
|
||||
<GlassCard>
|
||||
<div className="text-center py-6">
|
||||
{error ? (
|
||||
<AlertTriangle className="w-12 h-12 text-amber-400 mx-auto mb-4" />
|
||||
) : (
|
||||
<CheckCircle2 className="w-12 h-12 text-emerald-400 mx-auto mb-4" />
|
||||
)}
|
||||
<h3 className="text-xl font-bold mb-2">
|
||||
{error ? 'Scan-Hinweis' : (progressPct >= 100 ? 'Scan abgeschlossen' : 'Scan laeuft')}
|
||||
</h3>
|
||||
{error ? (
|
||||
<p className="text-amber-300/80 mb-4 text-sm">{error}</p>
|
||||
) : (
|
||||
<>
|
||||
<p className="text-white/60 mb-4">
|
||||
{progressPct >= 100
|
||||
? <>Der Bericht ist unterwegs an <strong className="text-white/90">{email}</strong>.</>
|
||||
: <>Wir analysieren <strong className="text-white/90">{url}</strong>. Bericht kommt in ~3-5 Min an <strong className="text-white/90">{email}</strong>.</>
|
||||
}
|
||||
</p>
|
||||
{progress && progressPct < 100 && (
|
||||
<div className="max-w-md mx-auto mt-4">
|
||||
<div className="text-xs text-white/50 mb-2">{progress}</div>
|
||||
<div className="w-full bg-white/10 rounded-full h-2 overflow-hidden">
|
||||
<div className="bg-emerald-400 h-full transition-all" style={{ width: `${progressPct}%` }} />
|
||||
</div>
|
||||
<div className="text-xs text-white/40 mt-1">{progressPct}%</div>
|
||||
</div>
|
||||
)}
|
||||
<p className="text-xs text-white/40 mt-4">
|
||||
Pruefen Sie auch Ihren Spam-Ordner.
|
||||
</p>
|
||||
</>
|
||||
)}
|
||||
</div>
|
||||
</GlassCard>
|
||||
)}
|
||||
</div>
|
||||
</section>
|
||||
|
||||
<section className="py-16">
|
||||
<div className="max-w-7xl mx-auto px-4 sm:px-6 lg:px-8">
|
||||
<FadeInView>
|
||||
<div className="grid grid-cols-1 md:grid-cols-3 gap-6">
|
||||
<GlassCard>
|
||||
<Cookie className="w-8 h-8 text-emerald-400 mb-4" />
|
||||
<h3 className="text-lg font-bold mb-2">Was wir analysieren</h3>
|
||||
<ul className="text-sm text-white/60 space-y-2">
|
||||
<li>• Alle Cookies + Vendor-Identifikation</li>
|
||||
<li>• Funktionale Kategorisierung (Analytics, Werbung, CDN, …)</li>
|
||||
<li>• Redundanz-Detection ueber Kategorien</li>
|
||||
<li>• Cookie-Tiefenanalyse mit Tier-Inferenz</li>
|
||||
</ul>
|
||||
</GlassCard>
|
||||
<GlassCard delay={0.1}>
|
||||
<ShieldCheck className="w-8 h-8 text-emerald-400 mb-4" />
|
||||
<h3 className="text-lg font-bold mb-2">Was Sie bekommen</h3>
|
||||
<ul className="text-sm text-white/60 space-y-2">
|
||||
<li>• Geschaetzte jaehrliche Tooling-Kosten (Listpreis-Range)</li>
|
||||
<li>• Sparpotenzial pro Konsolidierungs-Kandidat</li>
|
||||
<li>• EU-Alternative pro US-Vendor</li>
|
||||
<li>• Schrems-II-Risiko-Bewertung</li>
|
||||
</ul>
|
||||
</GlassCard>
|
||||
<GlassCard delay={0.2}>
|
||||
<Mail className="w-8 h-8 text-emerald-400 mb-4" />
|
||||
<h3 className="text-lg font-bold mb-2">Was es kostet</h3>
|
||||
<ul className="text-sm text-white/60 space-y-2">
|
||||
<li>• <strong className="text-white">Erster Scan: kostenlos</strong></li>
|
||||
<li>• Kein Login, kein Vertriebs-Termin</li>
|
||||
<li>• Daten werden nicht gespeichert</li>
|
||||
<li>• PDF + JSON zum Download</li>
|
||||
</ul>
|
||||
</GlassCard>
|
||||
</div>
|
||||
</FadeInView>
|
||||
</div>
|
||||
</section>
|
||||
</main>
|
||||
<Footer />
|
||||
<ChatFAB />
|
||||
</>
|
||||
)
|
||||
}
|
||||
@@ -0,0 +1,177 @@
|
||||
import Navbar from '@/components/layout/Navbar'
|
||||
import Footer from '@/components/layout/Footer'
|
||||
import ChatFAB from '@/components/layout/ChatFAB'
|
||||
|
||||
// Stärken / USP-Seite — sieben Verkaufsargumente aus der IACE-Strategie
|
||||
// (Memory: project_marketing_website_3014_themes.md). Aufgebaut als
|
||||
// Long-Form-Page mit Anker-Sprungmarken — eine Nummerierte Differenzierung
|
||||
// pro Sektion, damit Sales-Calls über tiefe Links arbeiten können.
|
||||
|
||||
const usps = [
|
||||
{
|
||||
id: 'engine',
|
||||
no: '1',
|
||||
title: 'Engine, nicht Checkliste',
|
||||
sub: 'Wir leiten Gefährdungen ab. Wettbewerb fragt aus einer Liste.',
|
||||
body:
|
||||
'Marktstandard (DesignSafe, Pilz, Sick) ist Excel-aufgewertete Checkliste: der Engineer wählt aus einer Hazard-Bibliothek aus. ' +
|
||||
'BreakPilot betreibt eine deterministische Pattern-Engine mit über 1.200 Hazard-Patterns. Aus der Maschinenbeschreibung leitet sie ' +
|
||||
'die Gefährdungen ab — keine Auswahllisten, keine vergessenen Punkte.',
|
||||
proof: 'Audit-Suite cmd/iace-audit erkennt eigene Lücken (Methode A–E)',
|
||||
},
|
||||
{
|
||||
id: 'multi-markt',
|
||||
no: '2',
|
||||
title: 'Eine Risikobeurteilung — alle Märkte',
|
||||
sub: 'CE + OSHA + ANSI + GB + JIS aus einem Datenmodell.',
|
||||
body:
|
||||
'Die gleiche Pattern-Engine generiert pro Maschinenbeschreibung mehrere Compliance-Anhänge. Hersteller wählt seine Zielmärkte. ' +
|
||||
'EU-Recht zitieren wir wörtlich (Rule 1). OWASP unter CC-BY-SA mit Pflicht-Attribution (Rule 2). DIN/EN nur per Identifier (Rule 3). ' +
|
||||
'Norm-Cross-Reference-Bibliothek mappt ISO 12100 ↔ DIN EN ISO 12100 ↔ ANSI B11.0 ↔ GB/T 15706 ↔ JIS B 9700.',
|
||||
proof: '252 Regulationen klassifiziert · 314.811 Controls audited',
|
||||
},
|
||||
{
|
||||
id: 'folgegefahren',
|
||||
no: '3',
|
||||
title: 'Vom Bediener bis zum Endkunden',
|
||||
sub: 'Folgegefahren-Modell mit Sekundärschadens-Kette.',
|
||||
body:
|
||||
'Klassische Risikobeurteilung schaut nur den Bediener an. Wir modellieren die Schadenskette weiter: Glasbruch in der Abfüllanlage ' +
|
||||
'verletzt nicht nur den Bediener, sondern erreicht über Restsplitter den Endkunden. BreakPilot verbindet CE-Sicherheit mit ' +
|
||||
'Produkthaftung nach ProdHaftG, Lebensmittelrecht nach VO 178/2002 und ISO 31000 Unternehmensrisiko in einem Datenmodell.',
|
||||
proof: 'SecondaryHarm-Modell live für consumer_safety, product_liability, food_safety, environmental, reputation, financial',
|
||||
},
|
||||
{
|
||||
id: 'public-domain',
|
||||
no: '4',
|
||||
title: 'Public Domain als Rechtsanker',
|
||||
sub: 'Werte aus OSHA, NIST, EUR-Lex, BAuA — auditfähig zitiert.',
|
||||
body:
|
||||
'Mindestabstände der Maschinensicherheit kommen bei uns aus OSHA 29 CFR 1910 Subpart O — US Federal Public Domain, lizenzrechtlich ' +
|
||||
'unbedenklich. Engineering-Rundung auf safe-side mm-Raster wird transparent dokumentiert. EU-Normen erscheinen nur als Identifier-Verweis ' +
|
||||
'mit einer menschlich kuratierten "Strenger/Gleich/Weicher"-Annotation — kein Copyright-Risiko.',
|
||||
proof: 'OSHA Table O-10 + §1910.217 PSDI-Formel verbatim · DIN nur Identifier · 6 DGUV-Publikationen referenziert',
|
||||
},
|
||||
{
|
||||
id: 'audit-suite',
|
||||
no: '5',
|
||||
title: 'Audit findet Lücken, die der Fachmann übersieht',
|
||||
sub: 'Fünf deterministische Audits ohne Ground Truth.',
|
||||
body:
|
||||
'Unsere Engine kennt ihre eigenen Lücken. Methode A bis E (Reachability, Consistency, Vocabulary, Echo, Hierarchy) finden Gaps ' +
|
||||
'ohne Fachmann-Vergleich. Bei einem Test fanden wir 100 strukturell unerreichbare Patterns und 46 unvollständige Component-Tags — ' +
|
||||
'Probleme, die ein menschlicher Auditor in einem Einzelfall nie gesehen hätte.',
|
||||
proof: 'cmd/iace-audit · 1.213 Patterns transparent · 99,94% Recall verifiziert',
|
||||
},
|
||||
{
|
||||
id: 'made-in-germany',
|
||||
no: '6',
|
||||
title: 'Made in Germany meets US Federal Public Domain',
|
||||
sub: 'Deutscher Maschinenbau, der gleichzeitig US-Compliance liefert.',
|
||||
body:
|
||||
'Deutscher Exportweltmeister-Maschinenbau braucht UL/NRTL-Zulassung für die USA. Die gleichen Daten, die wir für CE generieren, ' +
|
||||
'liefern dem US-Auditor 80 % der Vorarbeit. Risikobeurteilung in einer Sprache, Compliance in zwei Märkten — ohne Mehraufwand für den Hersteller.',
|
||||
proof: 'OSHA-Anker im RAG · NRTL-fähige Compliance-Spur · DesignSafe-Marktstandard wird hier erweitert, nicht imitiert',
|
||||
},
|
||||
{
|
||||
id: 'tooling',
|
||||
no: '7',
|
||||
title: 'LLM-Gap-Review als Co-Pilot, nicht als Roboter-Anwalt',
|
||||
sub: 'Pattern-Engine als Audit-Spur, LLM als Lücken-Suchhund.',
|
||||
body:
|
||||
'Die deterministische Engine bleibt die auditfähige Quelle der Wahrheit. Ein nachgelagerter LLM-Gap-Review (Qwen / Claude) prüft, ' +
|
||||
'was die Engine übersehen hat — mit klarer Quellen-Provenance (R3 LLM-Review) und Adopt/Reject-UX. Halluzinationen können nicht in ' +
|
||||
'die finale Risikobeurteilung schlüpfen.',
|
||||
proof: 'POST /projects/:id/llm-gap-review · Konfidenz-Stufen · Fallback auf statische Checkliste',
|
||||
},
|
||||
] as const
|
||||
|
||||
const competitors = [
|
||||
{ feature: 'Pattern-Engine statt Checkliste', bp: '✓', ds: '—', pilz: '—', sick: '—', sphera: '—' },
|
||||
{ feature: 'Multi-Markt CE / US / CN / JP', bp: '✓', ds: 'nur US', pilz: 'nur EU', sick: 'nur EU', sphera: 'enterprise' },
|
||||
{ feature: 'Folgegefahren-Modell', bp: '✓', ds: '—', pilz: '—', sick: '—', sphera: 'Process' },
|
||||
{ feature: 'Audit-Suite (Engine-Lücken-Erkennung)', bp: '✓', ds: '—', pilz: '—', sick: '—', sphera: '—' },
|
||||
{ feature: 'OSHA-Anker (Public Domain Werte)', bp: '✓', ds: '✓', pilz: '—', sick: '—', sphera: '—' },
|
||||
{ feature: 'LLM-Gap-Review (Co-Pilot)', bp: '✓', ds: '—', pilz: '—', sick: '—', sphera: '—' },
|
||||
]
|
||||
|
||||
export default function StaerkenPage() {
|
||||
return (
|
||||
<>
|
||||
<Navbar />
|
||||
<main className="bg-enterprise-dark text-white pt-32 pb-24">
|
||||
<div className="max-w-5xl mx-auto px-4">
|
||||
<header className="mb-16">
|
||||
<h1 className="text-5xl font-bold mb-4">Was uns differenziert</h1>
|
||||
<p className="text-white/60 text-lg max-w-3xl">
|
||||
Sieben konkrete Punkte, die BreakPilot von DesignSafe, Pilz, Sick, TÜV-Tools und Sphera trennen.
|
||||
Jede Differenzierung ist im Produkt umgesetzt — kein Marketing-Versprechen.
|
||||
</p>
|
||||
</header>
|
||||
|
||||
<ol className="space-y-12">
|
||||
{usps.map((u) => (
|
||||
<li id={u.id} key={u.id} className="border-l-2 border-accent-electric pl-6">
|
||||
<div className="flex items-baseline gap-3 mb-2">
|
||||
<span className="text-accent-electric font-mono text-3xl font-bold">#{u.no}</span>
|
||||
<h2 className="text-2xl font-semibold">{u.title}</h2>
|
||||
</div>
|
||||
<p className="text-accent-electric/80 text-sm mb-3">{u.sub}</p>
|
||||
<p className="text-white/70 leading-relaxed mb-3">{u.body}</p>
|
||||
<p className="text-white/40 text-xs">
|
||||
<span className="text-white/60">Belegt durch:</span> {u.proof}
|
||||
</p>
|
||||
</li>
|
||||
))}
|
||||
</ol>
|
||||
|
||||
<section className="mt-20">
|
||||
<h2 className="text-3xl font-bold mb-4">Direktvergleich</h2>
|
||||
<p className="text-white/60 mb-6 max-w-3xl">
|
||||
Stand 2026. Marktangaben basieren auf öffentlicher Produktinformation der genannten Anbieter.
|
||||
</p>
|
||||
<div className="overflow-x-auto border border-white/10 rounded-lg">
|
||||
<table className="w-full text-sm">
|
||||
<thead className="bg-white/[0.04] border-b border-white/10">
|
||||
<tr>
|
||||
<th className="text-left p-3 font-medium">Feature</th>
|
||||
<th className="text-left p-3 font-medium text-accent-electric">BreakPilot</th>
|
||||
<th className="text-left p-3 font-medium text-white/60">DesignSafe</th>
|
||||
<th className="text-left p-3 font-medium text-white/60">Pilz PASS</th>
|
||||
<th className="text-left p-3 font-medium text-white/60">Sick SD</th>
|
||||
<th className="text-left p-3 font-medium text-white/60">Sphera</th>
|
||||
</tr>
|
||||
</thead>
|
||||
<tbody>
|
||||
{competitors.map((c) => (
|
||||
<tr key={c.feature} className="border-t border-white/[0.06]">
|
||||
<td className="p-3 text-white/80">{c.feature}</td>
|
||||
<td className="p-3 text-accent-electric font-medium">{c.bp}</td>
|
||||
<td className="p-3 text-white/50">{c.ds}</td>
|
||||
<td className="p-3 text-white/50">{c.pilz}</td>
|
||||
<td className="p-3 text-white/50">{c.sick}</td>
|
||||
<td className="p-3 text-white/50">{c.sphera}</td>
|
||||
</tr>
|
||||
))}
|
||||
</tbody>
|
||||
</table>
|
||||
</div>
|
||||
</section>
|
||||
|
||||
<section className="mt-20 border-t border-white/10 pt-12">
|
||||
<h2 className="text-2xl font-bold mb-3">Quellen & Lizenz-Architektur</h2>
|
||||
<p className="text-white/60 leading-relaxed">
|
||||
Die Plattform stützt sich auf öffentliche Quellen: EU-Recht (EUR-Lex), Bundesrecht (BetrSichV, ArbSchG),
|
||||
US Federal Code (OSHA, NIST), Behörden-Leitfäden (ENISA, EDPB, BAuA), freie Sicherheits-Frameworks unter
|
||||
CC-BY-SA (OWASP). Jeder Inhalt trägt eine deterministische Lizenzregel R1/R2/R3 und löst die
|
||||
entsprechende Attribution im Ausgabe-PDF und im Frontend automatisch aus. Vollständige Quellenliste
|
||||
im SDK unter <code className="bg-white/[0.06] px-1.5 py-0.5 rounded">/sdk/licenses</code>.
|
||||
</p>
|
||||
</section>
|
||||
</div>
|
||||
</main>
|
||||
<Footer />
|
||||
<ChatFAB />
|
||||
</>
|
||||
)
|
||||
}
|
||||
@@ -0,0 +1,15 @@
|
||||
import Navbar from '@/components/layout/Navbar'
|
||||
import Footer from '@/components/layout/Footer'
|
||||
import ChatFAB from '@/components/layout/ChatFAB'
|
||||
import TeamSection from '@/components/sections/TeamSection'
|
||||
|
||||
export default function TeamPage() {
|
||||
return (
|
||||
<>
|
||||
<Navbar />
|
||||
<TeamSection />
|
||||
<Footer />
|
||||
<ChatFAB />
|
||||
</>
|
||||
)
|
||||
}
|
||||
@@ -0,0 +1,295 @@
|
||||
'use client'
|
||||
|
||||
import { useState, useRef, useEffect } from 'react'
|
||||
import { motion, AnimatePresence } from 'framer-motion'
|
||||
import { X, Send, Bot, User, Sparkles, Maximize2, Minimize2 } from 'lucide-react'
|
||||
import { t } from '@/lib/content'
|
||||
import { useApp } from '@/lib/context'
|
||||
|
||||
interface ChatMessage {
|
||||
role: 'user' | 'assistant'
|
||||
content: string
|
||||
}
|
||||
|
||||
export default function ChatFAB() {
|
||||
const { lang } = useApp()
|
||||
const i = t(lang)
|
||||
const [isOpen, setIsOpen] = useState(false)
|
||||
const [isExpanded, setIsExpanded] = useState(false)
|
||||
const [messages, setMessages] = useState<ChatMessage[]>([])
|
||||
const [input, setInput] = useState('')
|
||||
const [isStreaming, setIsStreaming] = useState(false)
|
||||
const [isWaiting, setIsWaiting] = useState(false)
|
||||
const messagesEndRef = useRef<HTMLDivElement>(null)
|
||||
const inputRef = useRef<HTMLInputElement>(null)
|
||||
const abortRef = useRef<AbortController | null>(null)
|
||||
|
||||
useEffect(() => {
|
||||
messagesEndRef.current?.scrollIntoView({ behavior: 'smooth' })
|
||||
}, [messages])
|
||||
|
||||
useEffect(() => {
|
||||
if (isOpen && inputRef.current) {
|
||||
setTimeout(() => inputRef.current?.focus(), 200)
|
||||
}
|
||||
}, [isOpen])
|
||||
|
||||
async function sendMessage(text?: string) {
|
||||
const message = text || input.trim()
|
||||
if (!message || isStreaming) return
|
||||
|
||||
setInput('')
|
||||
setMessages(prev => [...prev, { role: 'user', content: message }])
|
||||
setIsStreaming(true)
|
||||
setIsWaiting(true)
|
||||
|
||||
abortRef.current = new AbortController()
|
||||
|
||||
try {
|
||||
const res = await fetch('/api/chat', {
|
||||
method: 'POST',
|
||||
headers: { 'Content-Type': 'application/json' },
|
||||
body: JSON.stringify({
|
||||
message,
|
||||
history: messages.slice(-10),
|
||||
}),
|
||||
signal: abortRef.current.signal,
|
||||
})
|
||||
|
||||
if (!res.ok) throw new Error(`HTTP ${res.status}`)
|
||||
|
||||
const reader = res.body!.getReader()
|
||||
const decoder = new TextDecoder()
|
||||
let content = ''
|
||||
let firstChunk = true
|
||||
|
||||
while (true) {
|
||||
const { done, value } = await reader.read()
|
||||
if (done) break
|
||||
|
||||
content += decoder.decode(value, { stream: true })
|
||||
|
||||
if (firstChunk) {
|
||||
firstChunk = false
|
||||
setIsWaiting(false)
|
||||
setMessages(prev => [...prev, { role: 'assistant', content }])
|
||||
} else {
|
||||
const currentText = content
|
||||
setMessages(prev => {
|
||||
const updated = [...prev]
|
||||
updated[updated.length - 1] = { role: 'assistant', content: currentText }
|
||||
return updated
|
||||
})
|
||||
}
|
||||
}
|
||||
} catch (err: unknown) {
|
||||
if (err instanceof Error && err.name === 'AbortError') return
|
||||
setIsWaiting(false)
|
||||
setMessages(prev => [
|
||||
...prev,
|
||||
{ role: 'assistant', content: i.chat.error },
|
||||
])
|
||||
} finally {
|
||||
setIsStreaming(false)
|
||||
setIsWaiting(false)
|
||||
abortRef.current = null
|
||||
}
|
||||
}
|
||||
|
||||
function stopGeneration() {
|
||||
if (abortRef.current) {
|
||||
abortRef.current.abort()
|
||||
setIsStreaming(false)
|
||||
}
|
||||
}
|
||||
|
||||
return (
|
||||
<>
|
||||
{/* FAB Button */}
|
||||
<AnimatePresence>
|
||||
{!isOpen && (
|
||||
<motion.button
|
||||
initial={{ scale: 0 }}
|
||||
animate={{ scale: 1 }}
|
||||
exit={{ scale: 0 }}
|
||||
whileHover={{ scale: 1.1 }}
|
||||
whileTap={{ scale: 0.95 }}
|
||||
onClick={() => setIsOpen(true)}
|
||||
className="fixed bottom-6 right-[5.5rem] z-50 w-14 h-14 rounded-full
|
||||
bg-accent-electric hover:bg-blue-500 text-white
|
||||
flex items-center justify-center shadow-lg shadow-blue-600/30
|
||||
transition-colors"
|
||||
aria-label="Compliance Agent oeffnen"
|
||||
>
|
||||
<svg width="24" height="24" viewBox="0 0 24 24" fill="none" stroke="currentColor" strokeWidth="2" strokeLinecap="round" strokeLinejoin="round">
|
||||
<path d="M21 15a2 2 0 01-2 2H7l-4 4V5a2 2 0 012-2h14a2 2 0 012 2z" />
|
||||
<circle cx="9" cy="10" r="1" fill="currentColor" />
|
||||
<circle cx="12" cy="10" r="1" fill="currentColor" />
|
||||
<circle cx="15" cy="10" r="1" fill="currentColor" />
|
||||
</svg>
|
||||
</motion.button>
|
||||
)}
|
||||
</AnimatePresence>
|
||||
|
||||
{/* Chat Panel */}
|
||||
<AnimatePresence>
|
||||
{isOpen && (
|
||||
<motion.div
|
||||
initial={{ opacity: 0, scale: 0.9, y: 20 }}
|
||||
animate={{ opacity: 1, scale: 1, y: 0 }}
|
||||
exit={{ opacity: 0, scale: 0.9, y: 20 }}
|
||||
transition={{ duration: 0.2 }}
|
||||
className={`fixed bottom-6 right-6 z-50
|
||||
${isExpanded ? 'w-[700px] h-[80vh]' : 'w-[400px] h-[520px]'}
|
||||
rounded-2xl overflow-hidden
|
||||
bg-black/90 backdrop-blur-xl border border-white/10
|
||||
shadow-2xl shadow-black/50 flex flex-col
|
||||
transition-all duration-200`}
|
||||
>
|
||||
{/* Header */}
|
||||
<div className="flex items-center justify-between px-4 py-3 border-b border-white/10 shrink-0">
|
||||
<div className="flex items-center gap-2">
|
||||
<div className="w-7 h-7 rounded-full bg-accent-electric/20 flex items-center justify-center">
|
||||
<Bot className="w-4 h-4 text-accent-electric" />
|
||||
</div>
|
||||
<div>
|
||||
<span className="text-sm font-semibold text-white">{i.chat.title}</span>
|
||||
<span className="text-xs text-white/30 ml-2">
|
||||
{isStreaming ? i.chat.responding : i.chat.online}
|
||||
</span>
|
||||
</div>
|
||||
</div>
|
||||
<div className="flex items-center gap-1">
|
||||
<button
|
||||
onClick={() => setIsExpanded(prev => !prev)}
|
||||
className="w-7 h-7 rounded-full bg-white/10 flex items-center justify-center hover:bg-white/20 transition-colors"
|
||||
>
|
||||
{isExpanded ? <Minimize2 className="w-3.5 h-3.5 text-white/60" /> : <Maximize2 className="w-3.5 h-3.5 text-white/60" />}
|
||||
</button>
|
||||
<button
|
||||
onClick={() => setIsOpen(false)}
|
||||
className="w-7 h-7 rounded-full bg-white/10 flex items-center justify-center hover:bg-white/20 transition-colors"
|
||||
>
|
||||
<X className="w-4 h-4 text-white/60" />
|
||||
</button>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
{/* Messages */}
|
||||
<div className="flex-1 overflow-y-auto px-4 py-3 space-y-3">
|
||||
{messages.length === 0 && (
|
||||
<div className="space-y-2">
|
||||
<div className="flex items-center gap-2 text-white/40 text-xs mb-3">
|
||||
<Sparkles className="w-3.5 h-3.5" />
|
||||
<span>{i.chat.ask}</span>
|
||||
</div>
|
||||
{i.chat.suggestions.map((q, idx) => (
|
||||
<motion.button
|
||||
key={idx}
|
||||
initial={{ opacity: 0, y: 8 }}
|
||||
animate={{ opacity: 1, y: 0 }}
|
||||
transition={{ delay: 0.1 + idx * 0.08 }}
|
||||
onClick={() => sendMessage(q)}
|
||||
className="block w-full text-left px-3 py-2.5 rounded-xl
|
||||
bg-white/[0.05] border border-white/10
|
||||
hover:bg-white/[0.1] transition-colors
|
||||
text-xs text-white/70 hover:text-white"
|
||||
>
|
||||
{q}
|
||||
</motion.button>
|
||||
))}
|
||||
</div>
|
||||
)}
|
||||
|
||||
{/* Waiting indicator */}
|
||||
<AnimatePresence>
|
||||
{isWaiting && (
|
||||
<motion.div
|
||||
initial={{ opacity: 0, y: 6 }}
|
||||
animate={{ opacity: 1, y: 0 }}
|
||||
exit={{ opacity: 0 }}
|
||||
className="flex gap-2.5"
|
||||
>
|
||||
<div className="w-7 h-7 rounded-full bg-accent-electric/20 flex items-center justify-center shrink-0 mt-0.5">
|
||||
<Bot className="w-3.5 h-3.5 text-accent-electric" />
|
||||
</div>
|
||||
<div className="bg-white/[0.06] rounded-2xl px-3.5 py-3 flex items-center gap-1">
|
||||
{[0, 1, 2].map(dotIdx => (
|
||||
<motion.span
|
||||
key={dotIdx}
|
||||
className="block w-1.5 h-1.5 rounded-full bg-accent-electric/70"
|
||||
animate={{ opacity: [0.3, 1, 0.3], y: [0, -3, 0] }}
|
||||
transition={{ duration: 0.7, repeat: Infinity, delay: dotIdx * 0.15 }}
|
||||
/>
|
||||
))}
|
||||
</div>
|
||||
</motion.div>
|
||||
)}
|
||||
</AnimatePresence>
|
||||
|
||||
{messages.map((msg, idx) => (
|
||||
<div key={idx} className={`flex gap-2.5 ${msg.role === 'user' ? 'justify-end' : ''}`}>
|
||||
{msg.role === 'assistant' && (
|
||||
<div className="w-7 h-7 rounded-full bg-accent-electric/20 flex items-center justify-center shrink-0 mt-0.5">
|
||||
<Bot className="w-3.5 h-3.5 text-accent-electric" />
|
||||
</div>
|
||||
)}
|
||||
<div className={`max-w-[85%] rounded-2xl px-3.5 py-2.5 text-xs leading-relaxed ${
|
||||
msg.role === 'user' ? 'bg-accent-electric/20 text-white' : 'bg-white/[0.06] text-white/80'
|
||||
}`}>
|
||||
<div className="whitespace-pre-wrap">{msg.content}</div>
|
||||
{isStreaming && idx === messages.length - 1 && msg.role === 'assistant' && (
|
||||
<span className="inline-block w-1.5 h-3.5 bg-accent-electric animate-pulse ml-0.5" />
|
||||
)}
|
||||
</div>
|
||||
{msg.role === 'user' && (
|
||||
<div className="w-7 h-7 rounded-full bg-white/10 flex items-center justify-center shrink-0 mt-0.5">
|
||||
<User className="w-3.5 h-3.5 text-white/60" />
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
))}
|
||||
<div ref={messagesEndRef} />
|
||||
</div>
|
||||
|
||||
{/* Input */}
|
||||
<div className="border-t border-white/10 px-4 py-3 shrink-0">
|
||||
{isStreaming && (
|
||||
<button
|
||||
onClick={stopGeneration}
|
||||
className="w-full mb-2 px-3 py-1.5 rounded-lg bg-white/[0.06] hover:bg-white/[0.1]
|
||||
text-xs text-white/50 transition-colors"
|
||||
>
|
||||
{i.chat.stop}
|
||||
</button>
|
||||
)}
|
||||
<div className="flex gap-2">
|
||||
<input
|
||||
ref={inputRef}
|
||||
type="text"
|
||||
value={input}
|
||||
onChange={(e) => setInput(e.target.value)}
|
||||
onKeyDown={(e) => e.key === 'Enter' && sendMessage()}
|
||||
placeholder={i.chat.placeholder}
|
||||
disabled={isStreaming}
|
||||
className="flex-1 bg-white/[0.06] border border-white/10 rounded-xl px-3.5 py-2.5
|
||||
text-xs text-white placeholder-white/30 outline-none
|
||||
focus:border-accent-electric/50 focus:ring-1 focus:ring-accent-electric/20
|
||||
disabled:opacity-50 transition-all"
|
||||
/>
|
||||
<button
|
||||
onClick={() => sendMessage()}
|
||||
disabled={isStreaming || !input.trim()}
|
||||
className="px-3.5 py-2.5 bg-accent-electric hover:bg-blue-600 disabled:opacity-30
|
||||
rounded-xl transition-all text-white"
|
||||
>
|
||||
<Send className="w-3.5 h-3.5" />
|
||||
</button>
|
||||
</div>
|
||||
</div>
|
||||
</motion.div>
|
||||
)}
|
||||
</AnimatePresence>
|
||||
</>
|
||||
)
|
||||
}
|
||||
@@ -0,0 +1,342 @@
|
||||
'use client'
|
||||
|
||||
import { useState, useEffect, useCallback } from 'react'
|
||||
import { motion, AnimatePresence } from 'framer-motion'
|
||||
import { Shield, ChevronDown, ChevronUp } from 'lucide-react'
|
||||
import { useApp } from '@/lib/context'
|
||||
|
||||
const COOKIE_NAME = 'bp_consent'
|
||||
const SITE_ID = process.env.NEXT_PUBLIC_CONSENT_SITE_ID || 'breakpilot-marketing'
|
||||
|
||||
interface ConsentState {
|
||||
essential: boolean
|
||||
functional: boolean
|
||||
analytics: boolean
|
||||
}
|
||||
|
||||
const defaultConsent: ConsentState = {
|
||||
essential: true,
|
||||
functional: false,
|
||||
analytics: false,
|
||||
}
|
||||
|
||||
const texts = {
|
||||
de: {
|
||||
title: 'Cookie-Einwilligung',
|
||||
description: 'Wir verwenden Cookies, um unsere Website zu verbessern. Essenzielle Cookies sind für die Grundfunktionen erforderlich. Weitere Informationen finden Sie in unserer',
|
||||
privacyLink: 'Datenschutzerklärung',
|
||||
acceptAll: 'Alle akzeptieren',
|
||||
rejectAll: 'Nur notwendige',
|
||||
settings: 'Einstellungen',
|
||||
save: 'Auswahl speichern',
|
||||
categories: {
|
||||
essential: { name: 'Notwendig', description: 'Erforderlich für die Grundfunktionen der Website.', required: true },
|
||||
functional: { name: 'Funktional', description: 'Ermöglicht erweiterte Funktionen wie Spracheinstellungen und Theme-Präferenzen.' },
|
||||
analytics: { name: 'Analyse', description: 'Hilft uns zu verstehen, wie Besucher die Website nutzen.' },
|
||||
},
|
||||
},
|
||||
en: {
|
||||
title: 'Cookie Consent',
|
||||
description: 'We use cookies to improve our website. Essential cookies are required for basic functionality. For more information, please see our',
|
||||
privacyLink: 'Privacy Policy',
|
||||
acceptAll: 'Accept All',
|
||||
rejectAll: 'Essential Only',
|
||||
settings: 'Settings',
|
||||
save: 'Save Preferences',
|
||||
categories: {
|
||||
essential: { name: 'Essential', description: 'Required for basic website functionality.', required: true },
|
||||
functional: { name: 'Functional', description: 'Enables enhanced features like language settings and theme preferences.' },
|
||||
analytics: { name: 'Analytics', description: 'Helps us understand how visitors use the website.' },
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
function getSessionId(): string {
|
||||
if (typeof window === 'undefined') return ''
|
||||
try {
|
||||
let sid = sessionStorage.getItem('bp_session_id')
|
||||
if (!sid) {
|
||||
sid = crypto.randomUUID()
|
||||
sessionStorage.setItem('bp_session_id', sid)
|
||||
}
|
||||
return sid
|
||||
} catch { return '' }
|
||||
}
|
||||
|
||||
function getFingerprint(): string {
|
||||
const nav = typeof navigator !== 'undefined' ? navigator : null
|
||||
const raw = [nav?.language, nav?.platform, screen?.width, screen?.height, new Date().getTimezoneOffset()].join('|')
|
||||
let hash = 0
|
||||
for (let i = 0; i < raw.length; i++) {
|
||||
hash = ((hash << 5) - hash + raw.charCodeAt(i)) | 0
|
||||
}
|
||||
return 'fp_' + Math.abs(hash).toString(36)
|
||||
}
|
||||
|
||||
function getSavedConsent(): ConsentState | null {
|
||||
if (typeof window === 'undefined') return null
|
||||
try {
|
||||
const stored = localStorage.getItem(COOKIE_NAME)
|
||||
if (stored) return JSON.parse(stored)
|
||||
} catch { /* ignore */ }
|
||||
return null
|
||||
}
|
||||
|
||||
function detectDevice(): { device_type: string; browser: string; os: string } {
|
||||
const ua = navigator.userAgent
|
||||
const device_type = /Mobi|Android/i.test(ua)
|
||||
? (/Tablet|iPad/i.test(ua) ? 'tablet' : 'mobile')
|
||||
: 'desktop'
|
||||
const browserMatch = ua.match(/(Chrome|Safari|Firefox|Edge|Opera|OPR)\/[\d.]+/)
|
||||
const browser = browserMatch
|
||||
? browserMatch[0].replace('OPR', 'Opera')
|
||||
: 'Unknown'
|
||||
const osMatch = ua.match(/(Windows NT [\d.]+|Mac OS X [\d_.]+|Linux|Android [\d.]+|iOS [\d._]+)/)
|
||||
const os = osMatch ? osMatch[0].replace(/_/g, '.') : 'Unknown'
|
||||
return { device_type, browser, os }
|
||||
}
|
||||
|
||||
type ConsentMethod = 'accept_all' | 'reject_all' | 'custom_selection'
|
||||
|
||||
interface ScriptEntry { src: string; category: string }
|
||||
interface CookieEntry { name: string; domain: string; expiry_days: number; category: string }
|
||||
|
||||
function detectScripts(): { blocked: ScriptEntry[]; released: ScriptEntry[] } {
|
||||
const scripts = Array.from(document.querySelectorAll('script[src]'))
|
||||
const released: ScriptEntry[] = []
|
||||
const blocked: ScriptEntry[] = []
|
||||
for (const el of scripts) {
|
||||
const src = el.getAttribute('src') || ''
|
||||
if (/google.*tag|gtag|analytics/i.test(src)) released.push({ src, category: 'analytics' })
|
||||
else if (/facebook|fbevents|linkedin|tiktok/i.test(src)) released.push({ src, category: 'marketing' })
|
||||
}
|
||||
return { blocked, released }
|
||||
}
|
||||
|
||||
function detectCookies(): CookieEntry[] {
|
||||
const cookies: CookieEntry[] = []
|
||||
for (const c of document.cookie.split(';')) {
|
||||
const name = c.trim().split('=')[0]
|
||||
if (!name) continue
|
||||
let category = 'functional'
|
||||
if (/^_ga|^_gid|^_gat/i.test(name)) category = 'analytics'
|
||||
else if (/^_fb|^_gcl|^_li/i.test(name)) category = 'marketing'
|
||||
else if (/^bp_consent|^session|^csrf/i.test(name)) category = 'essential'
|
||||
cookies.push({ name, domain: window.location.hostname, expiry_days: 0, category })
|
||||
}
|
||||
return cookies
|
||||
}
|
||||
|
||||
async function sendConsent(consent: ConsentState, method: ConsentMethod, vendorConsents?: Record<string, boolean>): Promise<string | null> {
|
||||
try {
|
||||
const { device_type, browser, os } = detectDevice()
|
||||
const { blocked, released } = detectScripts()
|
||||
const cookies_set = detectCookies()
|
||||
const res = await fetch('/api/consent', {
|
||||
method: 'POST',
|
||||
headers: { 'Content-Type': 'application/json' },
|
||||
body: JSON.stringify({
|
||||
site_id: SITE_ID,
|
||||
device_fingerprint: getFingerprint(),
|
||||
categories: [
|
||||
'essential',
|
||||
...(consent.functional ? ['functional'] : []),
|
||||
...(consent.analytics ? ['analytics'] : []),
|
||||
],
|
||||
vendors: Object.keys(vendorConsents || {}).filter(k => vendorConsents?.[k]),
|
||||
vendor_consents: vendorConsents || {},
|
||||
user_agent: navigator.userAgent,
|
||||
consent_method: method,
|
||||
page_url: window.location.href,
|
||||
referrer: document.referrer || null,
|
||||
device_type,
|
||||
browser,
|
||||
os,
|
||||
screen_resolution: `${screen.width}x${screen.height}`,
|
||||
consent_scope: 'domain',
|
||||
session_id: getSessionId(),
|
||||
timezone: Intl.DateTimeFormat().resolvedOptions().timeZone,
|
||||
scripts_blocked: blocked,
|
||||
scripts_released: released,
|
||||
cookies_set,
|
||||
}),
|
||||
})
|
||||
const result = await res.json().catch(() => null)
|
||||
if (result?.id) {
|
||||
localStorage.setItem('bp_consent_id', result.id)
|
||||
return result.id
|
||||
}
|
||||
return null
|
||||
} catch {
|
||||
return null
|
||||
}
|
||||
}
|
||||
|
||||
interface VendorConfig {
|
||||
vendor_name: string
|
||||
category_key: string
|
||||
description_de?: string
|
||||
description_en?: string
|
||||
cookie_names?: string[]
|
||||
retention_days?: number
|
||||
}
|
||||
|
||||
export default function ConsentBanner() {
|
||||
const { lang } = useApp()
|
||||
const t = texts[lang]
|
||||
const [visible, setVisible] = useState(false)
|
||||
const [showDetails, setShowDetails] = useState(false)
|
||||
const [consent, setConsent] = useState<ConsentState>(defaultConsent)
|
||||
const [vendors, setVendors] = useState<VendorConfig[]>([])
|
||||
const [vendorConsents, setVendorConsents] = useState<Record<string, boolean>>({})
|
||||
|
||||
useEffect(() => {
|
||||
const saved = getSavedConsent()
|
||||
if (!saved) {
|
||||
setVisible(true)
|
||||
}
|
||||
// Load vendor config from backend
|
||||
fetch('/api/consent/config')
|
||||
.then(r => r.json())
|
||||
.then(data => {
|
||||
const v = data?.vendors || []
|
||||
setVendors(v)
|
||||
// Default all vendors to true
|
||||
const defaults: Record<string, boolean> = {}
|
||||
for (const vendor of v) defaults[vendor.vendor_name] = true
|
||||
setVendorConsents(defaults)
|
||||
})
|
||||
.catch(() => {})
|
||||
}, [])
|
||||
|
||||
const save = useCallback((state: ConsentState, method: ConsentMethod) => {
|
||||
localStorage.setItem(COOKIE_NAME, JSON.stringify(state))
|
||||
sendConsent(state, method, vendorConsents)
|
||||
setVisible(false)
|
||||
window.dispatchEvent(new CustomEvent('consent-change', { detail: state }))
|
||||
}, [vendorConsents])
|
||||
|
||||
const acceptAll = () => save({ essential: true, functional: true, analytics: true }, 'accept_all')
|
||||
const rejectAll = () => save({ essential: true, functional: false, analytics: false }, 'reject_all')
|
||||
const saveSelection = () => save(consent, 'custom_selection')
|
||||
|
||||
if (!visible) return null
|
||||
|
||||
const categories = Object.entries(t.categories) as [string, { name: string; description: string; required?: boolean }][]
|
||||
|
||||
return (
|
||||
<AnimatePresence>
|
||||
<motion.div
|
||||
initial={{ y: 100, opacity: 0 }}
|
||||
animate={{ y: 0, opacity: 1 }}
|
||||
exit={{ y: 100, opacity: 0 }}
|
||||
transition={{ duration: 0.3, ease: [0.22, 1, 0.36, 1] }}
|
||||
className="fixed bottom-0 left-0 right-0 z-[9999] p-4 md:p-6"
|
||||
>
|
||||
<div className="max-w-3xl mx-auto rounded-2xl bg-enterprise-dark/95 backdrop-blur-xl border border-white/[0.08] shadow-2xl shadow-black/40 p-6">
|
||||
{/* Header */}
|
||||
<div className="flex items-start gap-3 mb-4">
|
||||
<div className="w-8 h-8 rounded-lg bg-accent-electric/10 flex items-center justify-center shrink-0 mt-0.5">
|
||||
<Shield className="w-4 h-4 text-accent-electric" />
|
||||
</div>
|
||||
<div>
|
||||
<h3 className="text-sm font-bold text-white mb-1">{t.title}</h3>
|
||||
<p className="text-xs text-white/50 leading-relaxed">
|
||||
{t.description}{' '}
|
||||
<a href="/datenschutz" className="text-accent-electric hover:underline">{t.privacyLink}</a>.
|
||||
</p>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
{/* Category details */}
|
||||
<AnimatePresence>
|
||||
{showDetails && (
|
||||
<motion.div
|
||||
initial={{ height: 0, opacity: 0 }}
|
||||
animate={{ height: 'auto', opacity: 1 }}
|
||||
exit={{ height: 0, opacity: 0 }}
|
||||
transition={{ duration: 0.2 }}
|
||||
className="overflow-hidden mb-4"
|
||||
>
|
||||
<div className="space-y-2 pt-2 border-t border-white/[0.06]">
|
||||
{categories.map(([key, cat]) => {
|
||||
const catVendors = vendors.filter(v => v.category_key === key)
|
||||
return (
|
||||
<div key={key} className="rounded-xl bg-white/[0.03] border border-white/[0.06]">
|
||||
<label className="flex items-center justify-between p-3 cursor-pointer hover:bg-white/[0.05] transition-colors">
|
||||
<div className="flex-1 mr-4">
|
||||
<span className="text-xs font-semibold text-white">{cat.name}</span>
|
||||
<p className="text-xs text-white/40 mt-0.5">{cat.description}</p>
|
||||
</div>
|
||||
<input
|
||||
type="checkbox"
|
||||
checked={cat.required || consent[key as keyof ConsentState]}
|
||||
disabled={cat.required}
|
||||
onChange={(e) => {
|
||||
setConsent(prev => ({ ...prev, [key]: e.target.checked }))
|
||||
for (const v of catVendors) {
|
||||
setVendorConsents(prev => ({ ...prev, [v.vendor_name]: e.target.checked }))
|
||||
}
|
||||
}}
|
||||
className="w-4 h-4 rounded accent-accent-electric"
|
||||
/>
|
||||
</label>
|
||||
{catVendors.length > 0 && consent[key as keyof ConsentState] && (
|
||||
<div className="px-3 pb-3 space-y-1">
|
||||
{catVendors.map(v => (
|
||||
<label key={v.vendor_name} className="flex items-center justify-between pl-4 py-1 text-xs cursor-pointer">
|
||||
<div className="flex-1 mr-2">
|
||||
<span className="text-white/60">{v.vendor_name}</span>
|
||||
{v.retention_days && <span className="text-white/30 ml-1">({v.retention_days}d)</span>}
|
||||
</div>
|
||||
<input
|
||||
type="checkbox"
|
||||
checked={vendorConsents[v.vendor_name] ?? true}
|
||||
onChange={(e) => setVendorConsents(prev => ({ ...prev, [v.vendor_name]: e.target.checked }))}
|
||||
className="w-3 h-3 rounded accent-accent-electric"
|
||||
/>
|
||||
</label>
|
||||
))}
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
)
|
||||
})}
|
||||
</div>
|
||||
</motion.div>
|
||||
)}
|
||||
</AnimatePresence>
|
||||
|
||||
{/* Buttons */}
|
||||
<div className="flex flex-col sm:flex-row items-stretch sm:items-center gap-2">
|
||||
<button
|
||||
onClick={acceptAll}
|
||||
className="flex-1 px-4 py-2.5 rounded-xl bg-accent-electric text-white text-xs font-semibold hover:bg-blue-500 transition-colors"
|
||||
>
|
||||
{t.acceptAll}
|
||||
</button>
|
||||
<button
|
||||
onClick={rejectAll}
|
||||
className="flex-1 px-4 py-2.5 rounded-xl bg-white/[0.06] border border-white/[0.08] text-white/70 text-xs font-semibold hover:bg-white/[0.1] transition-colors"
|
||||
>
|
||||
{t.rejectAll}
|
||||
</button>
|
||||
<button
|
||||
onClick={() => showDetails ? saveSelection() : setShowDetails(true)}
|
||||
className="flex-1 px-4 py-2.5 rounded-xl bg-white/[0.06] border border-white/[0.08] text-white/70 text-xs font-semibold hover:bg-white/[0.1] transition-colors flex items-center justify-center gap-1.5"
|
||||
>
|
||||
{showDetails ? t.save : t.settings}
|
||||
{!showDetails && <ChevronDown className="w-3 h-3" />}
|
||||
{showDetails && <ChevronUp className="w-3 h-3" />}
|
||||
</button>
|
||||
</div>
|
||||
|
||||
{/* Powered by */}
|
||||
<p className="text-center mt-3 text-[10px] text-white/20 font-mono">
|
||||
Consent managed by BreakPilot CMP
|
||||
</p>
|
||||
</div>
|
||||
</motion.div>
|
||||
</AnimatePresence>
|
||||
)
|
||||
}
|
||||
@@ -0,0 +1,89 @@
|
||||
'use client'
|
||||
|
||||
import { Cookie } from 'lucide-react'
|
||||
import { t } from '@/lib/content'
|
||||
import { useApp } from '@/lib/context'
|
||||
|
||||
async function reopenConsentBanner() {
|
||||
const consentId = localStorage.getItem('bp_consent_id')
|
||||
if (consentId) {
|
||||
fetch(`/api/consent/${consentId}`, { method: 'DELETE' }).catch(() => {})
|
||||
localStorage.removeItem('bp_consent_id')
|
||||
}
|
||||
localStorage.removeItem('bp_consent')
|
||||
window.location.reload()
|
||||
}
|
||||
|
||||
export default function Footer() {
|
||||
const { lang } = useApp()
|
||||
const i = t(lang)
|
||||
const year = new Date().getFullYear()
|
||||
|
||||
return (
|
||||
<footer className="border-t border-white/[0.06] bg-enterprise-darker">
|
||||
<div className="max-w-7xl mx-auto px-4 sm:px-6 lg:px-8 py-16">
|
||||
<div className="grid grid-cols-1 md:grid-cols-4 gap-12">
|
||||
<div className="md:col-span-2">
|
||||
<div className="flex items-center gap-2 mb-4">
|
||||
<div className="w-8 h-8 rounded-lg bg-gradient-to-br from-accent-electric to-accent-indigo flex items-center justify-center">
|
||||
<span className="text-white font-bold text-sm">B</span>
|
||||
</div>
|
||||
<span className="font-bold text-white text-lg">BreakPilot</span>
|
||||
</div>
|
||||
<p className="mono-label mb-2">{i.footer.tagline}</p>
|
||||
<p className="text-white/30 text-sm max-w-sm">
|
||||
{i.footer.description}
|
||||
</p>
|
||||
</div>
|
||||
|
||||
<div>
|
||||
<h4 className="font-semibold text-white/80 text-sm mb-4">Produkt</h4>
|
||||
<ul className="space-y-2">
|
||||
{i.footer.links.product.map(link => (
|
||||
<li key={link}>
|
||||
<a href={`#${link.toLowerCase()}`} className="text-sm text-white/40 hover:text-white/70 transition-colors">
|
||||
{link}
|
||||
</a>
|
||||
</li>
|
||||
))}
|
||||
</ul>
|
||||
</div>
|
||||
|
||||
<div>
|
||||
<h4 className="font-semibold text-white/80 text-sm mb-4">Rechtliches</h4>
|
||||
<ul className="space-y-2">
|
||||
{i.footer.links.legal.map(link => (
|
||||
<li key={link}>
|
||||
<a
|
||||
href={link === 'Impressum' ? '/impressum' : link === 'Datenschutz' ? '/datenschutz' : '#'}
|
||||
className="text-sm text-white/40 hover:text-white/70 transition-colors"
|
||||
>
|
||||
{link}
|
||||
</a>
|
||||
</li>
|
||||
))}
|
||||
</ul>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div className="mt-12 pt-8 border-t border-white/[0.04] flex flex-col sm:flex-row justify-between items-center gap-4">
|
||||
<p className="text-xs text-white/20">
|
||||
© {year} {i.footer.copyright}. Alle Rechte vorbehalten.
|
||||
</p>
|
||||
<div className="flex items-center gap-4">
|
||||
<button
|
||||
onClick={reopenConsentBanner}
|
||||
className="text-xs text-white/20 hover:text-white/50 transition-colors flex items-center gap-1.5"
|
||||
>
|
||||
<Cookie className="w-3 h-3" />
|
||||
Cookie-Einstellungen
|
||||
</button>
|
||||
<p className="text-xs text-white/20 font-mono">
|
||||
{i.footer.madeIn}
|
||||
</p>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</footer>
|
||||
)
|
||||
}
|
||||
@@ -0,0 +1,63 @@
|
||||
'use client'
|
||||
|
||||
import { motion, AnimatePresence } from 'framer-motion'
|
||||
import { X } from 'lucide-react'
|
||||
import Link from 'next/link'
|
||||
import { navLinks } from '@/lib/sections'
|
||||
import { useApp } from '@/lib/context'
|
||||
|
||||
interface MobileMenuProps {
|
||||
open: boolean
|
||||
onClose: () => void
|
||||
}
|
||||
|
||||
export default function MobileMenu({ open, onClose }: MobileMenuProps) {
|
||||
const { lang } = useApp()
|
||||
|
||||
return (
|
||||
<AnimatePresence>
|
||||
{open && (
|
||||
<>
|
||||
<motion.div
|
||||
initial={{ opacity: 0 }}
|
||||
animate={{ opacity: 1 }}
|
||||
exit={{ opacity: 0 }}
|
||||
onClick={onClose}
|
||||
className="fixed inset-0 z-50 bg-black/60"
|
||||
/>
|
||||
<motion.div
|
||||
initial={{ x: '100%' }}
|
||||
animate={{ x: 0 }}
|
||||
exit={{ x: '100%' }}
|
||||
transition={{ type: 'spring', damping: 30, stiffness: 300 }}
|
||||
className="fixed right-0 top-0 bottom-0 z-50 w-72 bg-enterprise-dark border-l border-white/[0.08] p-6"
|
||||
>
|
||||
<button onClick={onClose} className="absolute top-4 right-4 text-white/60 hover:text-white">
|
||||
<X className="w-5 h-5" />
|
||||
</button>
|
||||
|
||||
<nav className="mt-12 flex flex-col gap-1">
|
||||
<Link
|
||||
href="/"
|
||||
onClick={onClose}
|
||||
className="px-4 py-3 rounded-xl text-sm text-white/60 hover:text-white hover:bg-white/[0.06] transition-colors"
|
||||
>
|
||||
Start
|
||||
</Link>
|
||||
{navLinks.map(link => (
|
||||
<Link
|
||||
key={link.href}
|
||||
href={link.href}
|
||||
onClick={onClose}
|
||||
className="px-4 py-3 rounded-xl text-sm text-white/60 hover:text-white hover:bg-white/[0.06] transition-colors"
|
||||
>
|
||||
{lang === 'de' ? link.labelDe : link.labelEn}
|
||||
</Link>
|
||||
))}
|
||||
</nav>
|
||||
</motion.div>
|
||||
</>
|
||||
)}
|
||||
</AnimatePresence>
|
||||
)
|
||||
}
|
||||
@@ -0,0 +1,111 @@
|
||||
'use client'
|
||||
|
||||
import { useState, useEffect } from 'react'
|
||||
import { motion } from 'framer-motion'
|
||||
import { Menu, Sun, Moon } from 'lucide-react'
|
||||
import Link from 'next/link'
|
||||
import { usePathname } from 'next/navigation'
|
||||
import { navLinks } from '@/lib/sections'
|
||||
import { t } from '@/lib/content'
|
||||
import { useApp } from '@/lib/context'
|
||||
import CTAButton from '@/components/ui/CTAButton'
|
||||
import MobileMenu from './MobileMenu'
|
||||
|
||||
export default function Navbar() {
|
||||
const { lang, theme, toggleLang, toggleTheme } = useApp()
|
||||
const [scrolled, setScrolled] = useState(false)
|
||||
const [mobileOpen, setMobileOpen] = useState(false)
|
||||
const pathname = usePathname()
|
||||
const i = t(lang)
|
||||
|
||||
useEffect(() => {
|
||||
const handleScroll = () => setScrolled(window.scrollY > 50)
|
||||
window.addEventListener('scroll', handleScroll, { passive: true })
|
||||
return () => window.removeEventListener('scroll', handleScroll)
|
||||
}, [])
|
||||
|
||||
return (
|
||||
<>
|
||||
<motion.nav
|
||||
initial={{ y: -100 }}
|
||||
animate={{ y: 0 }}
|
||||
transition={{ duration: 0.5, ease: [0.22, 1, 0.36, 1] }}
|
||||
className={`
|
||||
fixed top-0 left-0 right-0 z-50 transition-all duration-300
|
||||
${scrolled
|
||||
? 'bg-enterprise-dark/80 backdrop-blur-xl border-b border-white/[0.06]'
|
||||
: 'bg-transparent'
|
||||
}
|
||||
`}
|
||||
>
|
||||
<div className="max-w-7xl mx-auto px-4 sm:px-6 lg:px-8">
|
||||
<div className="flex items-center justify-between h-16">
|
||||
<Link href="/" className="flex items-center gap-2">
|
||||
<div className="w-8 h-8 rounded-lg bg-gradient-to-br from-accent-electric to-accent-indigo flex items-center justify-center">
|
||||
<span className="text-white font-bold text-sm">B</span>
|
||||
</div>
|
||||
<span className="font-bold text-white text-lg">BreakPilot</span>
|
||||
</Link>
|
||||
|
||||
<div className="hidden md:flex items-center gap-1">
|
||||
{navLinks.map(link => (
|
||||
<Link
|
||||
key={link.href}
|
||||
href={link.href}
|
||||
className={`
|
||||
px-4 py-2 rounded-lg text-sm font-medium transition-colors duration-200
|
||||
${pathname === link.href
|
||||
? 'text-white bg-white/[0.08]'
|
||||
: 'text-white/50 hover:text-white hover:bg-white/[0.04]'
|
||||
}
|
||||
`}
|
||||
>
|
||||
{lang === 'de' ? link.labelDe : link.labelEn}
|
||||
</Link>
|
||||
))}
|
||||
</div>
|
||||
|
||||
<div className="flex items-center gap-2">
|
||||
<button
|
||||
onClick={toggleLang}
|
||||
className="flex items-center gap-0.5 rounded-lg bg-white/[0.06] border border-white/[0.08] overflow-hidden"
|
||||
>
|
||||
<span className={`px-2 py-1 text-xs font-medium transition-colors ${lang === 'de' ? 'bg-accent-electric text-white' : 'text-white/40'}`}>
|
||||
DE
|
||||
</span>
|
||||
<span className={`px-2 py-1 text-xs font-medium transition-colors ${lang === 'en' ? 'bg-accent-electric text-white' : 'text-white/40'}`}>
|
||||
EN
|
||||
</span>
|
||||
</button>
|
||||
|
||||
<button
|
||||
onClick={toggleTheme}
|
||||
className="w-8 h-8 rounded-lg bg-white/[0.06] border border-white/[0.08] flex items-center justify-center
|
||||
hover:bg-white/[0.1] transition-colors"
|
||||
aria-label={theme === 'dark' ? 'Light mode' : 'Dark mode'}
|
||||
>
|
||||
{theme === 'dark'
|
||||
? <Sun className="w-4 h-4 text-white/50" />
|
||||
: <Moon className="w-4 h-4 text-white/50" />
|
||||
}
|
||||
</button>
|
||||
|
||||
<CTAButton href="/preise" className="hidden sm:inline-flex text-xs px-4 py-2">
|
||||
{i.nav.cta}
|
||||
</CTAButton>
|
||||
|
||||
<button
|
||||
onClick={() => setMobileOpen(true)}
|
||||
className="md:hidden p-2 text-white/60 hover:text-white"
|
||||
>
|
||||
<Menu className="w-5 h-5" />
|
||||
</button>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</motion.nav>
|
||||
|
||||
<MobileMenu open={mobileOpen} onClose={() => setMobileOpen(false)} />
|
||||
</>
|
||||
)
|
||||
}
|
||||
@@ -0,0 +1,115 @@
|
||||
'use client'
|
||||
|
||||
import { useEffect, useRef, useCallback } from 'react'
|
||||
|
||||
/**
|
||||
* ScriptManager — active consent-aware script blocking + injection.
|
||||
*
|
||||
* Two mechanisms:
|
||||
* 1. INJECTION: Scripts in CONSENT_SCRIPTS are only injected AFTER consent.
|
||||
* 2. BLOCKING: Existing <script data-consent="category" type="text/plain">
|
||||
* elements in the page are activated after consent by changing type to
|
||||
* "text/javascript". This is the standard CMP blocking pattern.
|
||||
*
|
||||
* Usage for inline scripts in pages:
|
||||
* <script type="text/plain" data-consent="analytics">
|
||||
* // This won't execute until analytics consent is given
|
||||
* gtag('config', 'G-XXXXXX');
|
||||
* </script>
|
||||
*
|
||||
* Usage for adding new third-party scripts:
|
||||
* Add to CONSENT_SCRIPTS below. They'll be injected only after consent.
|
||||
*/
|
||||
|
||||
interface ConsentScript {
|
||||
src: string
|
||||
async?: boolean
|
||||
id?: string
|
||||
}
|
||||
|
||||
const CONSENT_SCRIPTS: Record<string, ConsentScript[]> = {
|
||||
analytics: [
|
||||
// { src: 'https://www.googletagmanager.com/gtag/js?id=G-XXXXXX', async: true, id: 'gtag' },
|
||||
// { src: 'https://plausible.io/js/script.js', async: true, id: 'plausible' },
|
||||
],
|
||||
marketing: [
|
||||
// { src: 'https://connect.facebook.net/en_US/fbevents.js', async: true, id: 'fb-pixel' },
|
||||
// { src: 'https://snap.licdn.com/li.lms-analytics/insight.min.js', async: true, id: 'li-insight' },
|
||||
],
|
||||
functional: [
|
||||
// { src: 'https://widget.example.com/chat.js', async: true, id: 'chat-widget' },
|
||||
],
|
||||
}
|
||||
|
||||
interface ConsentState {
|
||||
essential: boolean
|
||||
functional: boolean
|
||||
analytics: boolean
|
||||
}
|
||||
|
||||
function getStoredConsent(): ConsentState | null {
|
||||
if (typeof window === 'undefined') return null
|
||||
try {
|
||||
const raw = localStorage.getItem('bp_consent')
|
||||
return raw ? JSON.parse(raw) : null
|
||||
} catch { return null }
|
||||
}
|
||||
|
||||
export default function ScriptManager() {
|
||||
const injected = useRef(new Set<string>())
|
||||
|
||||
const applyConsent = useCallback((consent: ConsentState) => {
|
||||
const accepted = new Set<string>()
|
||||
accepted.add('essential') // always allowed
|
||||
if (consent.functional) accepted.add('functional')
|
||||
if (consent.analytics) accepted.add('analytics')
|
||||
|
||||
// 1. INJECT: Add scripts from CONSENT_SCRIPTS for accepted categories
|
||||
for (const cat of accepted) {
|
||||
for (const script of CONSENT_SCRIPTS[cat] ?? []) {
|
||||
if (injected.current.has(script.src)) continue
|
||||
const el = document.createElement('script')
|
||||
el.src = script.src
|
||||
if (script.async) el.async = true
|
||||
if (script.id) el.id = script.id
|
||||
el.dataset.consent = cat
|
||||
document.head.appendChild(el)
|
||||
injected.current.add(script.src)
|
||||
}
|
||||
}
|
||||
|
||||
// 2. ACTIVATE: Unblock <script type="text/plain" data-consent="...">
|
||||
const blocked = document.querySelectorAll('script[type="text/plain"][data-consent]')
|
||||
for (const el of blocked) {
|
||||
const cat = el.getAttribute('data-consent') || ''
|
||||
if (accepted.has(cat)) {
|
||||
const clone = document.createElement('script')
|
||||
// Copy attributes
|
||||
for (const attr of el.attributes) {
|
||||
if (attr.name === 'type') continue // skip type="text/plain"
|
||||
clone.setAttribute(attr.name, attr.value)
|
||||
}
|
||||
clone.type = 'text/javascript'
|
||||
clone.textContent = el.textContent
|
||||
el.parentNode?.replaceChild(clone, el)
|
||||
}
|
||||
}
|
||||
}, [])
|
||||
|
||||
useEffect(() => {
|
||||
// On mount: apply saved consent (return visitors)
|
||||
const saved = getStoredConsent()
|
||||
if (saved) applyConsent(saved)
|
||||
|
||||
// Listen for new consent decisions
|
||||
function onConsentChange(e: Event) {
|
||||
const detail = (e as CustomEvent<ConsentState>).detail
|
||||
if (detail) applyConsent(detail)
|
||||
}
|
||||
|
||||
window.addEventListener('consent-change', onConsentChange)
|
||||
return () => window.removeEventListener('consent-change', onConsentChange)
|
||||
}, [applyConsent])
|
||||
|
||||
return null
|
||||
}
|
||||
@@ -0,0 +1,65 @@
|
||||
'use client'
|
||||
|
||||
import { Check } from 'lucide-react'
|
||||
import { t } from '@/lib/content'
|
||||
import { useApp } from '@/lib/context'
|
||||
import SectionHeading from '@/components/ui/SectionHeading'
|
||||
import FadeInView from '@/components/ui/FadeInView'
|
||||
|
||||
const riskColors = {
|
||||
red: { bg: 'bg-red-500/10', border: 'border-red-500/20', text: 'text-red-400', bar: 'bg-red-500' },
|
||||
amber: { bg: 'bg-amber-500/10', border: 'border-amber-500/20', text: 'text-amber-400', bar: 'bg-amber-500' },
|
||||
blue: { bg: 'bg-blue-500/10', border: 'border-blue-500/20', text: 'text-blue-400', bar: 'bg-blue-500' },
|
||||
green: { bg: 'bg-green-500/10', border: 'border-green-500/20', text: 'text-green-400', bar: 'bg-green-500' },
|
||||
}
|
||||
|
||||
export default function AIGovernanceSection() {
|
||||
const { lang } = useApp()
|
||||
const i = t(lang)
|
||||
|
||||
return (
|
||||
<section id="ai-governance" className="py-24 sm:py-32 section-alt">
|
||||
<div className="max-w-7xl mx-auto px-4 sm:px-6 lg:px-8">
|
||||
<SectionHeading
|
||||
tag={i.aiGovernance.tag}
|
||||
title={i.aiGovernance.title}
|
||||
titleHighlight={i.aiGovernance.titleHighlight}
|
||||
subtitle={i.aiGovernance.subtitle}
|
||||
/>
|
||||
|
||||
<div className="grid grid-cols-1 lg:grid-cols-2 gap-8">
|
||||
<FadeInView direction="left">
|
||||
<div className="space-y-3">
|
||||
{i.aiGovernance.riskLevels.map((level, idx) => {
|
||||
const colors = riskColors[level.color]
|
||||
return (
|
||||
<div key={idx} className={`rounded-xl border ${colors.border} ${colors.bg} p-4 flex items-center gap-4`}>
|
||||
<div className={`w-1 h-10 rounded-full ${colors.bar} shrink-0`} />
|
||||
<div>
|
||||
<h4 className={`text-sm font-bold ${colors.text}`}>{level.level}</h4>
|
||||
<p className="text-xs text-white/40">{level.description}</p>
|
||||
</div>
|
||||
</div>
|
||||
)
|
||||
})}
|
||||
</div>
|
||||
</FadeInView>
|
||||
|
||||
<FadeInView direction="right">
|
||||
<div className="rounded-2xl border border-white/[0.06] bg-white/[0.03] p-6 h-full">
|
||||
<h3 className="text-lg font-bold mb-4">Deterministische AI-Act-Compliance</h3>
|
||||
<ul className="space-y-3">
|
||||
{i.aiGovernance.features.map((feature, idx) => (
|
||||
<li key={idx} className="flex items-start gap-3 text-sm text-white/60">
|
||||
<Check className="w-4 h-4 text-accent-signal mt-0.5 shrink-0" />
|
||||
{feature}
|
||||
</li>
|
||||
))}
|
||||
</ul>
|
||||
</div>
|
||||
</FadeInView>
|
||||
</div>
|
||||
</div>
|
||||
</section>
|
||||
)
|
||||
}
|
||||
@@ -0,0 +1,73 @@
|
||||
'use client'
|
||||
|
||||
import { Layers, Server, Database } from 'lucide-react'
|
||||
import { t } from '@/lib/content'
|
||||
import { useApp } from '@/lib/context'
|
||||
import SectionHeading from '@/components/ui/SectionHeading'
|
||||
import FadeInView from '@/components/ui/FadeInView'
|
||||
import TechBadge from '@/components/ui/TechBadge'
|
||||
|
||||
const layerIcons = [Layers, Server, Database]
|
||||
const layerColors = ['border-accent-electric/30', 'border-accent-indigo/30', 'border-accent-purple/30']
|
||||
const layerBg = ['bg-accent-electric/5', 'bg-accent-indigo/5', 'bg-accent-purple/5']
|
||||
|
||||
export default function ArchitectureSection() {
|
||||
const { lang } = useApp()
|
||||
const i = t(lang)
|
||||
|
||||
return (
|
||||
<section id="architecture" className="py-24 sm:py-32 section-alt">
|
||||
<div className="max-w-7xl mx-auto px-4 sm:px-6 lg:px-8">
|
||||
<SectionHeading
|
||||
tag={i.architecture.tag}
|
||||
title={i.architecture.title}
|
||||
titleHighlight={i.architecture.titleHighlight}
|
||||
subtitle={i.architecture.subtitle}
|
||||
/>
|
||||
|
||||
<div className="space-y-4 mb-12">
|
||||
{i.architecture.layers.map((layer, idx) => {
|
||||
const Icon = layerIcons[idx]
|
||||
return (
|
||||
<FadeInView key={idx} delay={idx * 0.15}>
|
||||
<div className={`rounded-2xl border ${layerColors[idx]} ${layerBg[idx]} p-6`}>
|
||||
<div className="flex flex-col md:flex-row md:items-center gap-4">
|
||||
<div className="flex items-center gap-3 md:w-64 shrink-0">
|
||||
<Icon className="w-5 h-5 text-white/60" />
|
||||
<h3 className="font-bold text-sm">{layer.name}</h3>
|
||||
</div>
|
||||
<div className="flex-1 flex flex-wrap gap-2">
|
||||
{layer.components.map((comp, ci) => (
|
||||
<span
|
||||
key={ci}
|
||||
className="px-3 py-1.5 rounded-lg text-xs bg-white/[0.06] border border-white/[0.06] text-white/70"
|
||||
>
|
||||
{comp}
|
||||
</span>
|
||||
))}
|
||||
</div>
|
||||
<TechBadge className="shrink-0">{layer.tech}</TechBadge>
|
||||
</div>
|
||||
</div>
|
||||
</FadeInView>
|
||||
)
|
||||
})}
|
||||
</div>
|
||||
|
||||
<FadeInView delay={0.5}>
|
||||
<div className="flex flex-wrap justify-center gap-3">
|
||||
{i.architecture.badges.map((badge, idx) => (
|
||||
<span
|
||||
key={idx}
|
||||
className="inline-flex items-center gap-2 px-4 py-2 rounded-full border border-white/[0.08] bg-white/[0.03] text-xs text-white/60 font-medium"
|
||||
>
|
||||
<span className="w-1.5 h-1.5 rounded-full bg-accent-signal" />
|
||||
{badge}
|
||||
</span>
|
||||
))}
|
||||
</div>
|
||||
</FadeInView>
|
||||
</div>
|
||||
</section>
|
||||
)
|
||||
}
|
||||
@@ -0,0 +1,171 @@
|
||||
'use client'
|
||||
|
||||
import { motion } from 'framer-motion'
|
||||
import { FileText, Cpu, Shield, Search, ClipboardCheck, Download } from 'lucide-react'
|
||||
import { useApp } from '@/lib/context'
|
||||
import FadeInView from '@/components/ui/FadeInView'
|
||||
import TechBadge from '@/components/ui/TechBadge'
|
||||
import { ANIMATION } from '@/lib/constants'
|
||||
|
||||
const stepIcons = [FileText, Cpu, Shield, Search, ClipboardCheck, Download]
|
||||
|
||||
const flow = {
|
||||
de: {
|
||||
steps: [
|
||||
{
|
||||
num: '01',
|
||||
title: 'Grenzen & Verwendung',
|
||||
description: 'Der Nutzer füllt 14 Textfelder aus — Maschinenbeschreibung, bestimmungsgemäße Verwendung, Energiequellen, Betriebsarten, Personengruppen.',
|
||||
example: '"Kollaborativer 6-Achs-Roboter UR10e mit Kraft-/Momentsensorik, elektrischer Antrieb 48V DC, Ethernet/PROFINET..."',
|
||||
tags: [],
|
||||
},
|
||||
{
|
||||
num: '02',
|
||||
title: 'Automatische Analyse',
|
||||
description: '6 deterministische Schritte in Sekunden: Narrative Parser (200 Keywords) → Komponenten → Pattern Engine (1.058 Patterns) → Gefährdungen → Maßnahmen (225 Bibliothek) → Normen (751 A/B/C).',
|
||||
example: 'Pattern HP059 "Kollisionsgefahr Cobot": RequiredTags [cobot, rotating_joint] — PATTERN FEUERT ✓',
|
||||
tags: ['Deterministisch', 'Kein LLM', 'AND/NOT-Logik'],
|
||||
},
|
||||
{
|
||||
num: '03',
|
||||
title: 'Risikobewertung',
|
||||
description: 'Im Hazard Log erscheinen alle Gefährdungen mit Erstbewertung (S/E/P), RPZ-Berechnung und automatischer SIL/PL-Ableitung nach ISO-Risikograph.',
|
||||
example: 'Cobot-Ergebnis: 12 Gefährdungen, RPZ 4-48, SIL 0-2, PL a-d',
|
||||
tags: ['ISO 12100', 'SIL/PL', 'RPZ'],
|
||||
},
|
||||
{
|
||||
num: '04',
|
||||
title: 'Regulatorische Hinweise',
|
||||
description: 'On-Demand RAG-Suche in 36.708 Chunks: BAuA (TRBS/TRGS/ASR), OSHA Technical Manual, EU-Verordnungen. Keine ISO-Texte — nur gemeinfreie Quellen.',
|
||||
example: 'Gefährdung "Kollision" → TRBS 2111 Kap. 4.3, OSHA 1910.212(a)(1)',
|
||||
tags: ['RAG', 'BAuA', 'OSHA'],
|
||||
},
|
||||
{
|
||||
num: '05',
|
||||
title: 'Verifikation & Nachweise',
|
||||
description: '25 Evidenztypen werden automatisch vorgeschlagen. Der Nutzer erstellt Verifikationspläne und ordnet Prüfberichte zu.',
|
||||
example: 'E01 Hazard Analysis Report, E04 Electrical Safety Test, E14 Software Validation',
|
||||
tags: ['25 Evidenztypen', 'Traceability'],
|
||||
},
|
||||
{
|
||||
num: '06',
|
||||
title: 'CE-Akte generieren',
|
||||
description: 'Strukturiertes Dokument nach MVO 2023/1230 Anhang IV: Beschreibung, Risikobeurteilung, Normen, Maßnahmen, Nachweise, Konformitätserklärung. Export als PDF.',
|
||||
example: 'Vollständige Technische Dokumentation per Klick',
|
||||
tags: ['MVO 2023/1230', 'Anhang IV', 'PDF'],
|
||||
},
|
||||
],
|
||||
},
|
||||
en: {
|
||||
steps: [
|
||||
{
|
||||
num: '01',
|
||||
title: 'Limits & Intended Use',
|
||||
description: 'The user fills in 14 text fields — machine description, intended use, energy sources, operating modes, user groups.',
|
||||
example: '"Collaborative 6-axis robot UR10e with force/torque sensing, electric drive 48V DC, Ethernet/PROFINET..."',
|
||||
tags: [],
|
||||
},
|
||||
{
|
||||
num: '02',
|
||||
title: 'Automatic Analysis',
|
||||
description: '6 deterministic steps in seconds: Narrative Parser (200 keywords) → Components → Pattern Engine (1,058 patterns) → Hazards → Mitigations (225 library) → Norms (751 A/B/C).',
|
||||
example: 'Pattern HP059 "Collision hazard cobot": RequiredTags [cobot, rotating_joint] — PATTERN FIRES ✓',
|
||||
tags: ['Deterministic', 'No LLM', 'AND/NOT logic'],
|
||||
},
|
||||
{
|
||||
num: '03',
|
||||
title: 'Risk Assessment',
|
||||
description: 'The Hazard Log shows all hazards with initial assessment (S/E/P), RPZ calculation and automatic SIL/PL derivation per ISO risk graph.',
|
||||
example: 'Cobot result: 12 hazards, RPZ 4-48, SIL 0-2, PL a-d',
|
||||
tags: ['ISO 12100', 'SIL/PL', 'RPZ'],
|
||||
},
|
||||
{
|
||||
num: '04',
|
||||
title: 'Regulatory Guidance',
|
||||
description: 'On-demand RAG search across 36,708 chunks: BAuA (TRBS/TRGS/ASR), OSHA Technical Manual, EU regulations. No ISO texts — only public domain sources.',
|
||||
example: 'Hazard "Collision" → TRBS 2111 Ch. 4.3, OSHA 1910.212(a)(1)',
|
||||
tags: ['RAG', 'BAuA', 'OSHA'],
|
||||
},
|
||||
{
|
||||
num: '05',
|
||||
title: 'Verification & Evidence',
|
||||
description: '25 evidence types are automatically suggested. Users create verification plans and assign test reports.',
|
||||
example: 'E01 Hazard Analysis Report, E04 Electrical Safety Test, E14 Software Validation',
|
||||
tags: ['25 evidence types', 'Traceability'],
|
||||
},
|
||||
{
|
||||
num: '06',
|
||||
title: 'Generate CE File',
|
||||
description: 'Structured document per MR 2023/1230 Annex IV: Description, risk assessment, norms, mitigations, evidence, declaration of conformity. Export as PDF.',
|
||||
example: 'Complete technical documentation with one click',
|
||||
tags: ['MR 2023/1230', 'Annex IV', 'PDF'],
|
||||
},
|
||||
],
|
||||
},
|
||||
}
|
||||
|
||||
function StepContent({ step, Icon }: { step: typeof flow.de.steps[0]; Icon: typeof FileText }) {
|
||||
return (
|
||||
<div className="relative pl-16">
|
||||
<div className="absolute left-0 w-12 h-12 rounded-xl bg-accent-electric/10 border border-accent-electric/20 flex items-center justify-center">
|
||||
<Icon className="w-5 h-5 text-accent-electric" />
|
||||
</div>
|
||||
<div className="rounded-2xl border border-white/[0.06] bg-white/[0.03] p-6">
|
||||
<div className="flex items-center gap-3 mb-3">
|
||||
<span className="font-mono text-xs text-accent-electric/60">{step.num}</span>
|
||||
<h3 className="text-lg font-bold">{step.title}</h3>
|
||||
</div>
|
||||
<p className="text-sm text-white/50 mb-3">{step.description}</p>
|
||||
<div className="rounded-lg bg-enterprise-darker border border-white/[0.04] px-4 py-3 mb-3">
|
||||
<p className="font-mono text-xs text-white/40">{step.example}</p>
|
||||
</div>
|
||||
{step.tags.length > 0 && (
|
||||
<div className="flex flex-wrap gap-1.5">
|
||||
{step.tags.map(tag => (
|
||||
<TechBadge key={tag}>{tag}</TechBadge>
|
||||
))}
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
</div>
|
||||
)
|
||||
}
|
||||
|
||||
export default function CEFlowSection() {
|
||||
const { lang } = useApp()
|
||||
const { steps } = flow[lang]
|
||||
|
||||
return (
|
||||
<section className="py-16 sm:py-24">
|
||||
<div className="max-w-4xl mx-auto px-4 sm:px-6 lg:px-8">
|
||||
<div className="relative">
|
||||
{/* Vertical line */}
|
||||
<div className="absolute left-6 top-0 bottom-0 w-px bg-white/[0.06]" />
|
||||
|
||||
<div className="space-y-8">
|
||||
{steps.map((step, idx) => {
|
||||
const Icon = stepIcons[idx]
|
||||
if (idx < 2) {
|
||||
return (
|
||||
<motion.div
|
||||
key={idx}
|
||||
initial={{ opacity: 0, y: 20 }}
|
||||
animate={{ opacity: 1, y: 0 }}
|
||||
transition={{ duration: ANIMATION.duration, delay: 0.3 + idx * 0.15, ease: ANIMATION.ease }}
|
||||
>
|
||||
<StepContent step={step} Icon={Icon} />
|
||||
</motion.div>
|
||||
)
|
||||
}
|
||||
return (
|
||||
<FadeInView key={idx} delay={0}>
|
||||
<StepContent step={step} Icon={Icon} />
|
||||
</FadeInView>
|
||||
)
|
||||
})}
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</section>
|
||||
)
|
||||
}
|
||||
@@ -0,0 +1,84 @@
|
||||
'use client'
|
||||
|
||||
import { Check, AlertTriangle, X } from 'lucide-react'
|
||||
import { useApp } from '@/lib/context'
|
||||
import FadeInView from '@/components/ui/FadeInView'
|
||||
import SectionHeading from '@/components/ui/SectionHeading'
|
||||
|
||||
type ReqStatus = 'done' | 'partial' | 'missing'
|
||||
const statusIcons = { done: Check, partial: AlertTriangle, missing: X }
|
||||
const statusColors = { done: 'text-green-400 bg-green-500/10', partial: 'text-amber-400 bg-amber-500/10', missing: 'text-red-400 bg-red-500/10' }
|
||||
|
||||
const heading = {
|
||||
de: {
|
||||
tag: 'CRA COMPLIANCE',
|
||||
title: 'Was muss ich tun um',
|
||||
titleHighlight: 'CRA-konform zu werden?',
|
||||
subtitle: 'Der Cyber Resilience Act (EU 2024/2847) gilt ab September 2027. BreakPilot zeigt den Status pro Anforderung.',
|
||||
},
|
||||
en: {
|
||||
tag: 'CRA COMPLIANCE',
|
||||
title: 'What do I need to do to become',
|
||||
titleHighlight: 'CRA-compliant?',
|
||||
subtitle: 'The Cyber Resilience Act (EU 2024/2847) applies from September 2027. BreakPilot shows the status per requirement.',
|
||||
},
|
||||
}
|
||||
|
||||
const requirements = {
|
||||
de: [
|
||||
{ req: 'Schwachstellenmanagement einrichten', detail: 'Prozess für Identifikation, Bewertung und Behebung von Schwachstellen', status: 'missing' as ReqStatus },
|
||||
{ req: 'SBOM erstellen und pflegen', detail: 'Software Bill of Materials für jedes Produkt mit digitalen Elementen', status: 'partial' as ReqStatus },
|
||||
{ req: 'Security-Updates ermöglichen (OTA/SOTA)', detail: 'Mechanismus für sichere Software-Updates über die gesamte Lebensdauer', status: 'missing' as ReqStatus },
|
||||
{ req: 'Meldepflichten etablieren (24h/72h)', detail: 'Aktiv ausgenutzte Schwachstellen innerhalb 24h an ENISA melden', status: 'missing' as ReqStatus },
|
||||
{ req: 'Koordinierte Offenlegung (PSIRT)', detail: 'Product Security Incident Response Team und Disclosure Policy', status: 'missing' as ReqStatus },
|
||||
{ req: 'Technische Dokumentation aktualisieren', detail: 'Risikoanalyse, Design-Entscheidungen, Test-Ergebnisse dokumentieren', status: 'partial' as ReqStatus },
|
||||
{ req: 'Secure by Design', detail: 'Standardmäßig sichere Konfiguration, minimale Angriffsfläche', status: 'done' as ReqStatus },
|
||||
{ req: 'Keine bekannten Schwachstellen ausliefern', detail: 'Vor Inverkehrbringen alle bekannten CVEs beheben', status: 'partial' as ReqStatus },
|
||||
],
|
||||
en: [
|
||||
{ req: 'Establish vulnerability management', detail: 'Process for identification, assessment and remediation of vulnerabilities', status: 'missing' as ReqStatus },
|
||||
{ req: 'Create and maintain SBOM', detail: 'Software Bill of Materials for every product with digital elements', status: 'partial' as ReqStatus },
|
||||
{ req: 'Enable security updates (OTA/SOTA)', detail: 'Mechanism for secure software updates throughout the product lifetime', status: 'missing' as ReqStatus },
|
||||
{ req: 'Establish reporting obligations (24h/72h)', detail: 'Report actively exploited vulnerabilities to ENISA within 24h', status: 'missing' as ReqStatus },
|
||||
{ req: 'Coordinated disclosure (PSIRT)', detail: 'Product Security Incident Response Team and disclosure policy', status: 'missing' as ReqStatus },
|
||||
{ req: 'Update technical documentation', detail: 'Document risk analysis, design decisions, test results', status: 'partial' as ReqStatus },
|
||||
{ req: 'Secure by Design', detail: 'Secure configuration by default, minimal attack surface', status: 'done' as ReqStatus },
|
||||
{ req: 'Ship without known vulnerabilities', detail: 'Remediate all known CVEs before placing on market', status: 'partial' as ReqStatus },
|
||||
],
|
||||
}
|
||||
|
||||
export default function CRAFahrplanSection() {
|
||||
const { lang } = useApp()
|
||||
const h = heading[lang]
|
||||
const reqs = requirements[lang]
|
||||
|
||||
return (
|
||||
<section className="py-24 sm:py-32">
|
||||
<div className="max-w-4xl mx-auto px-4 sm:px-6 lg:px-8">
|
||||
<SectionHeading tag={h.tag} title={h.title} titleHighlight={h.titleHighlight} subtitle={h.subtitle} />
|
||||
|
||||
<div className="space-y-3">
|
||||
{reqs.map((item, idx) => {
|
||||
const Icon = statusIcons[item.status]
|
||||
return (
|
||||
<FadeInView key={idx} delay={idx * 0.05}>
|
||||
<div className="rounded-xl border border-white/[0.06] bg-white/[0.03] p-4 flex items-start gap-4">
|
||||
<div className={`w-8 h-8 rounded-lg ${statusColors[item.status]} flex items-center justify-center shrink-0 mt-0.5`}>
|
||||
<Icon className="w-4 h-4" />
|
||||
</div>
|
||||
<div className="flex-1 min-w-0">
|
||||
<h4 className="text-sm font-bold mb-0.5">{item.req}</h4>
|
||||
<p className="text-xs text-white/40">{item.detail}</p>
|
||||
</div>
|
||||
<span className={`text-xs font-mono shrink-0 ${item.status === 'done' ? 'text-green-400' : item.status === 'partial' ? 'text-amber-400' : 'text-red-400'}`}>
|
||||
{item.status === 'done' ? '✓' : item.status === 'partial' ? '◐' : '✗'}
|
||||
</span>
|
||||
</div>
|
||||
</FadeInView>
|
||||
)
|
||||
})}
|
||||
</div>
|
||||
</div>
|
||||
</section>
|
||||
)
|
||||
}
|
||||
@@ -0,0 +1,72 @@
|
||||
'use client'
|
||||
|
||||
import { Shield, GitBranch, Server, Scan, FileCheck, Layers } from 'lucide-react'
|
||||
import { useApp } from '@/lib/context'
|
||||
import SectionHeading from '@/components/ui/SectionHeading'
|
||||
import GlassCard from '@/components/ui/GlassCard'
|
||||
|
||||
const icons = [Shield, GitBranch, Scan, Server, FileCheck, Layers]
|
||||
|
||||
const content = {
|
||||
de: {
|
||||
tag: '05 / DIFFERENZIERUNG',
|
||||
title: 'Was BreakPilot',
|
||||
titleHighlight: 'einzigartig macht.',
|
||||
subtitle: 'Sechs Alleinstellungsmerkmale, die kein anderer Anbieter in einer Plattform vereint.',
|
||||
items: [
|
||||
{ title: 'Deterministisch, nicht generativ', description: 'Regelbasierte Analyse statt LLM-Interpretation. Jedes Ergebnis ist reproduzierbar und versioniert — unabhängig vom Modell.' },
|
||||
{ title: 'Lückenloser Decision Trail', description: 'Von der Rechtsquelle über die Obligation zum Control bis zur Maßnahme. Jeder Schritt ist auditierbar und dokumentiert.' },
|
||||
{ title: 'Code Security integriert', description: 'SAST, DAST, SBOM und Container Scanning als Teil der Compliance-Plattform — nicht als separates Tool.' },
|
||||
{ title: 'Vollständig on-premise deploybar', description: 'Kein US-Cloud-Anbieter in der gesamten Architektur. Betrieb auf eigener Hardware oder in BSI-zertifizierten Rechenzentren.' },
|
||||
{ title: 'Regulierungsübergreifend', description: 'DSGVO, NIS2, AI Act, Maschinenverordnung, TDDDG, DORA — eine Plattform statt sieben Einzellösungen.' },
|
||||
{ title: '294.000+ atomare Controls', description: 'Abgeleitet aus 380+ Rechtsquellen. Nicht manuell kuratiert, sondern systematisch aus Originaltext extrahiert und verifiziert.' },
|
||||
],
|
||||
},
|
||||
en: {
|
||||
tag: '05 / DIFFERENTIATION',
|
||||
title: 'What makes BreakPilot',
|
||||
titleHighlight: 'unique.',
|
||||
subtitle: 'Six unique selling points that no other provider combines in a single platform.',
|
||||
items: [
|
||||
{ title: 'Deterministic, not generative', description: 'Rule-based analysis instead of LLM interpretation. Every result is reproducible and versioned — independent of the model.' },
|
||||
{ title: 'Seamless decision trail', description: 'From legal source through obligation to control to action. Every step is auditable and documented.' },
|
||||
{ title: 'Code security integrated', description: 'SAST, DAST, SBOM and container scanning as part of the compliance platform — not as a separate tool.' },
|
||||
{ title: 'Fully on-premise deployable', description: 'No US cloud provider in the entire architecture. Operation on own hardware or in BSI-certified data centers.' },
|
||||
{ title: 'Cross-regulatory', description: 'GDPR, NIS2, AI Act, Machinery Regulation, TDDDG, DORA — one platform instead of seven individual solutions.' },
|
||||
{ title: '294,000+ atomic controls', description: 'Derived from 380+ legal sources. Not manually curated, but systematically extracted and verified from original text.' },
|
||||
],
|
||||
},
|
||||
}
|
||||
|
||||
export default function ComparisonSection() {
|
||||
const { lang } = useApp()
|
||||
const c = content[lang]
|
||||
|
||||
return (
|
||||
<section id="comparison" className="py-24 sm:py-32">
|
||||
<div className="max-w-7xl mx-auto px-4 sm:px-6 lg:px-8">
|
||||
<SectionHeading
|
||||
tag={c.tag}
|
||||
title={c.title}
|
||||
titleHighlight={c.titleHighlight}
|
||||
subtitle={c.subtitle}
|
||||
/>
|
||||
|
||||
<div className="grid grid-cols-1 sm:grid-cols-2 lg:grid-cols-3 gap-6">
|
||||
{c.items.map((item, idx) => {
|
||||
const Icon = icons[idx]
|
||||
return (
|
||||
<GlassCard key={idx} delay={idx * 0.08}>
|
||||
<div className="w-10 h-10 rounded-xl bg-accent-electric/10 flex items-center justify-center mb-4">
|
||||
<Icon className="w-5 h-5 text-accent-electric" />
|
||||
</div>
|
||||
<h3 className="text-sm font-bold mb-2">{item.title}</h3>
|
||||
<p className="text-xs text-white/40 leading-relaxed">{item.description}</p>
|
||||
</GlassCard>
|
||||
)
|
||||
})}
|
||||
</div>
|
||||
</div>
|
||||
</section>
|
||||
)
|
||||
}
|
||||
@@ -0,0 +1,62 @@
|
||||
'use client'
|
||||
|
||||
import { X, Check } from 'lucide-react'
|
||||
import { t } from '@/lib/content'
|
||||
import { useApp } from '@/lib/context'
|
||||
import SectionHeading from '@/components/ui/SectionHeading'
|
||||
import FadeInView from '@/components/ui/FadeInView'
|
||||
import StatusIndicator from '@/components/ui/StatusIndicator'
|
||||
|
||||
export default function ContinuousSection() {
|
||||
const { lang } = useApp()
|
||||
const i = t(lang)
|
||||
|
||||
return (
|
||||
<section id="continuous" className="py-24 sm:py-32 section-alt">
|
||||
<div className="max-w-7xl mx-auto px-4 sm:px-6 lg:px-8">
|
||||
<SectionHeading
|
||||
tag={i.continuous.tag}
|
||||
title={i.continuous.title}
|
||||
titleHighlight={i.continuous.titleHighlight}
|
||||
subtitle={i.continuous.subtitle}
|
||||
/>
|
||||
|
||||
<div className="grid grid-cols-1 md:grid-cols-2 gap-6">
|
||||
<FadeInView direction="left">
|
||||
<div className="rounded-2xl border border-red-500/15 bg-red-500/[0.03] p-6 h-full">
|
||||
<div className="flex items-center gap-3 mb-6">
|
||||
<StatusIndicator label="Offline" status="error" />
|
||||
<h3 className="text-sm font-bold text-red-400">{i.continuous.comparison.annual.title}</h3>
|
||||
</div>
|
||||
<ul className="space-y-3">
|
||||
{i.continuous.comparison.annual.points.map((point, idx) => (
|
||||
<li key={idx} className="flex items-start gap-3 text-sm text-white/40">
|
||||
<X className="w-4 h-4 text-red-400/50 mt-0.5 shrink-0" />
|
||||
{point}
|
||||
</li>
|
||||
))}
|
||||
</ul>
|
||||
</div>
|
||||
</FadeInView>
|
||||
|
||||
<FadeInView direction="right">
|
||||
<div className="rounded-2xl border border-green-500/15 bg-green-500/[0.03] p-6 h-full">
|
||||
<div className="flex items-center gap-3 mb-6">
|
||||
<StatusIndicator label="Live" status="active" />
|
||||
<h3 className="text-sm font-bold text-green-400">{i.continuous.comparison.continuous.title}</h3>
|
||||
</div>
|
||||
<ul className="space-y-3">
|
||||
{i.continuous.comparison.continuous.points.map((point, idx) => (
|
||||
<li key={idx} className="flex items-start gap-3 text-sm text-white/60">
|
||||
<Check className="w-4 h-4 text-green-400 mt-0.5 shrink-0" />
|
||||
{point}
|
||||
</li>
|
||||
))}
|
||||
</ul>
|
||||
</div>
|
||||
</FadeInView>
|
||||
</div>
|
||||
</div>
|
||||
</section>
|
||||
)
|
||||
}
|
||||
@@ -0,0 +1,129 @@
|
||||
'use client'
|
||||
|
||||
import { motion } from 'framer-motion'
|
||||
import { X } from 'lucide-react'
|
||||
import { useApp } from '@/lib/context'
|
||||
|
||||
const data = {
|
||||
de: {
|
||||
before: {
|
||||
title: 'Ausgangslage',
|
||||
items: [
|
||||
'15 Jahre altes Embedded Board',
|
||||
'Kein Secure Element / TPM',
|
||||
'Kein Secure Boot, kein OTA',
|
||||
'Hardcoded Credentials im Firmware',
|
||||
'Alter TCP/IP Stack ohne Patches',
|
||||
'Penetration-Test: 187 Findings',
|
||||
'"Was davon ist wirklich kritisch?"',
|
||||
],
|
||||
},
|
||||
after: {
|
||||
title: 'BreakPilot Delta-Analyse',
|
||||
items: [
|
||||
{ text: '3 Findings blockieren CE/CRA → sofort handeln', type: 'critical' as const },
|
||||
{ text: '12 Findings sind Software-only Fixes', type: 'fixable' as const },
|
||||
{ text: '172 Findings sind kosmetisch oder low-risk', type: 'ok' as const },
|
||||
{ text: 'Hardware-Redesign: wahrscheinlich NICHT nötig', type: 'ok' as const },
|
||||
{ text: 'RED-Re-Zertifizierung: nur bei Funkmodul-Änderung', type: 'fixable' as const },
|
||||
{ text: 'Geschätzter Aufwand: €15k statt €50k', type: 'ok' as const },
|
||||
{ text: 'Jira-Tickets mit Fix-Vorschlägen erstellt', type: 'ok' as const },
|
||||
],
|
||||
},
|
||||
},
|
||||
en: {
|
||||
before: {
|
||||
title: 'Starting Point',
|
||||
items: [
|
||||
'15-year-old embedded board',
|
||||
'No Secure Element / TPM',
|
||||
'No Secure Boot, no OTA',
|
||||
'Hardcoded credentials in firmware',
|
||||
'Legacy TCP/IP stack without patches',
|
||||
'Penetration test: 187 findings',
|
||||
'"Which ones actually matter?"',
|
||||
],
|
||||
},
|
||||
after: {
|
||||
title: 'BreakPilot Delta Analysis',
|
||||
items: [
|
||||
{ text: '3 findings block CE/CRA → act immediately', type: 'critical' as const },
|
||||
{ text: '12 findings are software-only fixes', type: 'fixable' as const },
|
||||
{ text: '172 findings are cosmetic or low-risk', type: 'ok' as const },
|
||||
{ text: 'Hardware redesign: probably NOT necessary', type: 'ok' as const },
|
||||
{ text: 'RED re-certification: only if RF module changes', type: 'fixable' as const },
|
||||
{ text: 'Estimated effort: €15k instead of €50k', type: 'ok' as const },
|
||||
{ text: 'Jira tickets with fix suggestions created', type: 'ok' as const },
|
||||
],
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
const typeColors = {
|
||||
critical: 'text-red-400',
|
||||
fixable: 'text-amber-400',
|
||||
ok: 'text-green-400',
|
||||
}
|
||||
|
||||
const typeIcons = {
|
||||
critical: '●',
|
||||
fixable: '◐',
|
||||
ok: '●',
|
||||
}
|
||||
|
||||
export default function DeltaImpactSection() {
|
||||
const { lang } = useApp()
|
||||
const d = data[lang]
|
||||
|
||||
return (
|
||||
<section className="py-16 sm:py-24">
|
||||
<div className="max-w-6xl mx-auto px-4 sm:px-6 lg:px-8">
|
||||
<div className="grid grid-cols-1 lg:grid-cols-2 gap-6 relative">
|
||||
{/* Before */}
|
||||
<motion.div
|
||||
initial={{ opacity: 0, y: 20 }}
|
||||
animate={{ opacity: 1, y: 0 }}
|
||||
transition={{ duration: 0.5, ease: [0.22, 1, 0.36, 1] }}
|
||||
>
|
||||
<div className="rounded-2xl border border-red-500/15 bg-red-500/[0.03] p-6 h-full">
|
||||
<h3 className="text-sm font-bold text-red-400 mb-5 font-mono uppercase tracking-wider">
|
||||
{d.before.title}
|
||||
</h3>
|
||||
<ul className="space-y-3">
|
||||
{d.before.items.map((item, idx) => (
|
||||
<li key={idx} className="flex items-start gap-3 text-sm text-white/40">
|
||||
<X className="w-4 h-4 text-red-400/50 mt-0.5 shrink-0" />
|
||||
{item}
|
||||
</li>
|
||||
))}
|
||||
</ul>
|
||||
</div>
|
||||
</motion.div>
|
||||
|
||||
{/* After */}
|
||||
<motion.div
|
||||
initial={{ opacity: 0, y: 20 }}
|
||||
animate={{ opacity: 1, y: 0 }}
|
||||
transition={{ duration: 0.5, delay: 0.1, ease: [0.22, 1, 0.36, 1] }}
|
||||
>
|
||||
<div className="rounded-2xl border border-green-500/15 bg-green-500/[0.03] p-6 h-full">
|
||||
<h3 className="text-sm font-bold text-green-400 mb-5 font-mono uppercase tracking-wider">
|
||||
{d.after.title}
|
||||
</h3>
|
||||
<ul className="space-y-3">
|
||||
{d.after.items.map((item, idx) => (
|
||||
<li key={idx} className="flex items-start gap-3 text-sm text-white/60">
|
||||
<span className={`mt-0.5 shrink-0 text-xs ${typeColors[item.type]}`}>
|
||||
{typeIcons[item.type]}
|
||||
</span>
|
||||
{item.text}
|
||||
</li>
|
||||
))}
|
||||
</ul>
|
||||
</div>
|
||||
</motion.div>
|
||||
</div>
|
||||
</div>
|
||||
</section>
|
||||
)
|
||||
}
|
||||
@@ -0,0 +1,79 @@
|
||||
'use client'
|
||||
|
||||
import { Shield, FileCheck, ClipboardCheck, Check, X } from 'lucide-react'
|
||||
import { t } from '@/lib/content'
|
||||
import { useApp } from '@/lib/context'
|
||||
import SectionHeading from '@/components/ui/SectionHeading'
|
||||
import GlassCard from '@/components/ui/GlassCard'
|
||||
import FadeInView from '@/components/ui/FadeInView'
|
||||
|
||||
const iconMap: Record<string, typeof Shield> = {
|
||||
Shield,
|
||||
FileCheck,
|
||||
ClipboardCheck,
|
||||
}
|
||||
|
||||
export default function DeterministicSection() {
|
||||
const { lang } = useApp()
|
||||
const i = t(lang)
|
||||
|
||||
return (
|
||||
<section id="deterministic" className="py-24 sm:py-32">
|
||||
<div className="max-w-7xl mx-auto px-4 sm:px-6 lg:px-8">
|
||||
<SectionHeading
|
||||
tag={i.deterministic.tag}
|
||||
title={i.deterministic.title}
|
||||
titleHighlight={i.deterministic.titleHighlight}
|
||||
subtitle={i.deterministic.subtitle}
|
||||
/>
|
||||
|
||||
<div className="grid grid-cols-1 md:grid-cols-3 gap-6 mb-16">
|
||||
{i.deterministic.pillars.map((pillar, idx) => {
|
||||
const Icon = iconMap[pillar.icon]
|
||||
return (
|
||||
<GlassCard key={idx} delay={idx * 0.1}>
|
||||
<div className="w-12 h-12 rounded-xl bg-accent-indigo/10 flex items-center justify-center mb-4">
|
||||
<Icon className="w-6 h-6 text-accent-indigo" />
|
||||
</div>
|
||||
<h3 className="text-lg font-bold mb-2">{pillar.title}</h3>
|
||||
<p className="text-sm text-white/50">{pillar.description}</p>
|
||||
</GlassCard>
|
||||
)
|
||||
})}
|
||||
</div>
|
||||
|
||||
<FadeInView>
|
||||
<div className="grid grid-cols-1 md:grid-cols-2 gap-6">
|
||||
<div className="rounded-2xl border border-red-500/20 bg-red-500/[0.04] p-6">
|
||||
<h4 className="text-sm font-bold text-red-400 mb-4 font-mono uppercase tracking-wider">
|
||||
{i.deterministic.comparison.llm.title}
|
||||
</h4>
|
||||
<ul className="space-y-3">
|
||||
{i.deterministic.comparison.llm.items.map((item, idx) => (
|
||||
<li key={idx} className="flex items-start gap-3 text-sm text-white/50">
|
||||
<X className="w-4 h-4 text-red-400/60 mt-0.5 shrink-0" />
|
||||
{item}
|
||||
</li>
|
||||
))}
|
||||
</ul>
|
||||
</div>
|
||||
|
||||
<div className="rounded-2xl border border-green-500/20 bg-green-500/[0.04] p-6">
|
||||
<h4 className="text-sm font-bold text-green-400 mb-4 font-mono uppercase tracking-wider">
|
||||
{i.deterministic.comparison.breakpilot.title}
|
||||
</h4>
|
||||
<ul className="space-y-3">
|
||||
{i.deterministic.comparison.breakpilot.items.map((item, idx) => (
|
||||
<li key={idx} className="flex items-start gap-3 text-sm text-white/70">
|
||||
<Check className="w-4 h-4 text-green-400 mt-0.5 shrink-0" />
|
||||
{item}
|
||||
</li>
|
||||
))}
|
||||
</ul>
|
||||
</div>
|
||||
</div>
|
||||
</FadeInView>
|
||||
</div>
|
||||
</section>
|
||||
)
|
||||
}
|
||||
@@ -0,0 +1,91 @@
|
||||
'use client'
|
||||
|
||||
import { motion } from 'framer-motion'
|
||||
import { ArrowRight, ChevronDown } from 'lucide-react'
|
||||
import { t } from '@/lib/content'
|
||||
import { useApp } from '@/lib/context'
|
||||
import GradientText from '@/components/ui/GradientText'
|
||||
import CTAButton from '@/components/ui/CTAButton'
|
||||
import StatusIndicator from '@/components/ui/StatusIndicator'
|
||||
import { ANIMATION } from '@/lib/constants'
|
||||
|
||||
export default function HeroSection() {
|
||||
const { lang } = useApp()
|
||||
const i = t(lang)
|
||||
|
||||
return (
|
||||
<section id="hero" className="relative min-h-screen flex items-center justify-center enterprise-grid overflow-hidden">
|
||||
<div className="absolute inset-0 bg-gradient-to-b from-transparent via-transparent to-enterprise-dark" />
|
||||
|
||||
<div className="relative z-10 max-w-5xl mx-auto px-4 sm:px-6 lg:px-8 text-center">
|
||||
<motion.div
|
||||
initial={{ opacity: 0, y: 20 }}
|
||||
animate={{ opacity: 1, y: 0 }}
|
||||
transition={{ duration: 0.6, ease: ANIMATION.ease }}
|
||||
>
|
||||
<div className="inline-flex items-center gap-3 mb-8 px-4 py-2 rounded-full border border-white/[0.08] bg-white/[0.04]">
|
||||
<StatusIndicator label={i.hero.status} />
|
||||
</div>
|
||||
</motion.div>
|
||||
|
||||
<motion.div
|
||||
initial={{ opacity: 0, y: 20 }}
|
||||
animate={{ opacity: 1, y: 0 }}
|
||||
transition={{ duration: 0.6, delay: 0.1, ease: ANIMATION.ease }}
|
||||
className="mb-3"
|
||||
>
|
||||
<span className="mono-label tracking-widest">{i.hero.badge}</span>
|
||||
</motion.div>
|
||||
|
||||
<motion.h1
|
||||
initial={{ opacity: 0, y: 20 }}
|
||||
animate={{ opacity: 1, y: 0 }}
|
||||
transition={{ duration: 0.6, delay: 0.2, ease: ANIMATION.ease }}
|
||||
className="text-5xl sm:text-6xl lg:text-7xl font-bold mb-6 leading-tight text-shadow-glow"
|
||||
>
|
||||
{i.hero.title}
|
||||
<br />
|
||||
<GradientText>{i.hero.titleHighlight}</GradientText>
|
||||
</motion.h1>
|
||||
|
||||
<motion.p
|
||||
initial={{ opacity: 0, y: 20 }}
|
||||
animate={{ opacity: 1, y: 0 }}
|
||||
transition={{ duration: 0.6, delay: 0.3, ease: ANIMATION.ease }}
|
||||
className="text-lg sm:text-xl text-white/50 max-w-2xl mx-auto mb-10"
|
||||
>
|
||||
{i.hero.subtitle}
|
||||
</motion.p>
|
||||
|
||||
<motion.div
|
||||
initial={{ opacity: 0, y: 20 }}
|
||||
animate={{ opacity: 1, y: 0 }}
|
||||
transition={{ duration: 0.6, delay: 0.4, ease: ANIMATION.ease }}
|
||||
className="flex flex-col sm:flex-row items-center justify-center gap-4"
|
||||
>
|
||||
<CTAButton href="/plattform">
|
||||
{lang === 'de' ? 'Plattform entdecken' : 'Discover Platform'}
|
||||
<ArrowRight className="w-4 h-4" />
|
||||
</CTAButton>
|
||||
<CTAButton variant="ghost" href="/preise">
|
||||
{i.hero.cta}
|
||||
</CTAButton>
|
||||
</motion.div>
|
||||
</div>
|
||||
|
||||
<motion.div
|
||||
initial={{ opacity: 0 }}
|
||||
animate={{ opacity: 1 }}
|
||||
transition={{ duration: 1, delay: 1 }}
|
||||
className="absolute bottom-8 left-1/2 -translate-x-1/2"
|
||||
>
|
||||
<motion.div
|
||||
animate={{ y: [0, 8, 0] }}
|
||||
transition={{ duration: 2, repeat: Infinity, ease: 'easeInOut' }}
|
||||
>
|
||||
<ChevronDown className="w-5 h-5 text-white/20" />
|
||||
</motion.div>
|
||||
</motion.div>
|
||||
</section>
|
||||
)
|
||||
}
|
||||
@@ -0,0 +1,82 @@
|
||||
'use client'
|
||||
|
||||
import { t } from '@/lib/content'
|
||||
import { useApp } from '@/lib/context'
|
||||
import SectionHeading from '@/components/ui/SectionHeading'
|
||||
import FadeInView from '@/components/ui/FadeInView'
|
||||
|
||||
const statusColors = {
|
||||
success: 'text-green-400',
|
||||
warning: 'text-amber-400',
|
||||
neutral: 'text-accent-electric',
|
||||
}
|
||||
|
||||
const statusDots = {
|
||||
success: 'bg-green-400',
|
||||
warning: 'bg-amber-400',
|
||||
neutral: 'bg-accent-electric',
|
||||
}
|
||||
|
||||
export default function ImpactSection() {
|
||||
const { lang } = useApp()
|
||||
const i = t(lang)
|
||||
|
||||
return (
|
||||
<section id="impact" className="py-24 sm:py-32">
|
||||
<div className="max-w-7xl mx-auto px-4 sm:px-6 lg:px-8">
|
||||
<SectionHeading
|
||||
tag={i.impact.tag}
|
||||
title={i.impact.title}
|
||||
titleHighlight={i.impact.titleHighlight}
|
||||
subtitle={i.impact.subtitle}
|
||||
/>
|
||||
|
||||
<div className="grid grid-cols-1 lg:grid-cols-2 gap-8">
|
||||
<FadeInView direction="left">
|
||||
<div className="rounded-2xl bg-enterprise-darker border border-white/[0.06] p-6 font-mono text-sm overflow-hidden">
|
||||
<div className="flex items-center gap-2 mb-4 pb-3 border-b border-white/[0.06]">
|
||||
<div className="w-3 h-3 rounded-full bg-red-500/60" />
|
||||
<div className="w-3 h-3 rounded-full bg-amber-500/60" />
|
||||
<div className="w-3 h-3 rounded-full bg-green-500/60" />
|
||||
<span className="ml-2 text-xs text-white/30">regulatory-impact-analysis</span>
|
||||
</div>
|
||||
<div className="space-y-2">
|
||||
{i.impact.terminalLines.map((line, idx) => (
|
||||
<div
|
||||
key={idx}
|
||||
className={`
|
||||
${line.type === 'input' ? 'text-white/70' : ''}
|
||||
${line.type === 'output' ? 'text-white/40' : ''}
|
||||
${line.type === 'signal' ? 'text-green-400' : ''}
|
||||
`}
|
||||
>
|
||||
{line.text}
|
||||
</div>
|
||||
))}
|
||||
</div>
|
||||
</div>
|
||||
</FadeInView>
|
||||
|
||||
<FadeInView direction="right">
|
||||
<div className="grid grid-cols-2 gap-4 h-full">
|
||||
{i.impact.outputs.map((output, idx) => (
|
||||
<div
|
||||
key={idx}
|
||||
className="rounded-2xl bg-white/[0.04] border border-white/[0.06] p-5 flex flex-col justify-between"
|
||||
>
|
||||
<p className="text-xs text-white/40 mb-2">{output.label}</p>
|
||||
<div className="flex items-center gap-2">
|
||||
<span className={`w-2 h-2 rounded-full ${statusDots[output.status]}`} />
|
||||
<span className={`text-2xl font-bold ${statusColors[output.status]}`}>
|
||||
{output.value}
|
||||
</span>
|
||||
</div>
|
||||
</div>
|
||||
))}
|
||||
</div>
|
||||
</FadeInView>
|
||||
</div>
|
||||
</div>
|
||||
</section>
|
||||
)
|
||||
}
|
||||
@@ -0,0 +1,46 @@
|
||||
'use client'
|
||||
|
||||
import { Check } from 'lucide-react'
|
||||
import { t } from '@/lib/content'
|
||||
import { useApp } from '@/lib/context'
|
||||
import SectionHeading from '@/components/ui/SectionHeading'
|
||||
import GlassCard from '@/components/ui/GlassCard'
|
||||
|
||||
const accentColors = ['border-t-accent-electric', 'border-t-accent-indigo', 'border-t-accent-purple']
|
||||
|
||||
export default function LegalSection() {
|
||||
const { lang } = useApp()
|
||||
const i = t(lang)
|
||||
|
||||
return (
|
||||
<section id="legal" className="py-24 sm:py-32">
|
||||
<div className="max-w-7xl mx-auto px-4 sm:px-6 lg:px-8">
|
||||
<SectionHeading
|
||||
tag={i.legal.tag}
|
||||
title={i.legal.title}
|
||||
titleHighlight={i.legal.titleHighlight}
|
||||
subtitle={i.legal.subtitle}
|
||||
/>
|
||||
|
||||
<div className="grid grid-cols-1 md:grid-cols-3 gap-6">
|
||||
{i.legal.regulations.map((reg, idx) => (
|
||||
<GlassCard key={idx} delay={idx * 0.1} className={`border-t-2 ${accentColors[idx]}`}>
|
||||
<div className="mb-4">
|
||||
<h3 className="text-2xl font-bold font-mono">{reg.name}</h3>
|
||||
<p className="text-xs text-white/40 mt-1">{reg.fullName}</p>
|
||||
</div>
|
||||
<ul className="space-y-2">
|
||||
{reg.features.map((feature, fi) => (
|
||||
<li key={fi} className="flex items-start gap-2 text-sm text-white/50">
|
||||
<Check className="w-3.5 h-3.5 text-accent-signal mt-0.5 shrink-0" />
|
||||
{feature}
|
||||
</li>
|
||||
))}
|
||||
</ul>
|
||||
</GlassCard>
|
||||
))}
|
||||
</div>
|
||||
</div>
|
||||
</section>
|
||||
)
|
||||
}
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user