From 93687a32fe91e43c9e81a974aa6faf8157c0ee95 Mon Sep 17 00:00:00 2001
From: Benjamin Admin
Date: Thu, 21 May 2026 11:29:38 +0200
Subject: [PATCH 1/4] docs(licenses): freeze 3-rule license mapping + audit
script
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Defines the authoritative mapping from license_type to license_rule
in docs/LICENSE_RULES.md, and adds scripts/audit_license_classification.py
to surface classification gaps in registry/canonical_controls/Qdrant.
Key finding from first audit run against bp-core-postgres + Qdrant:
- regulation_registry: 232 rows, 224 rule=1, 8 rule=2, 0 rule=3;
36 rows without license_type (need backfill)
- canonical_controls: 314,811 rows, 279,384 (89%) have NULL
license_rule (target of Task #22 reclassification)
- Qdrant atomic_controls_dedup: 100% of sampled points lack both
license and license_rule payload fields
- Qdrant bp_compliance_gesetze: 80.6% lack both fields
- Qdrant bp_compliance_ce + bp_compliance: nearly clean
Rule definitions clarified (was loosely remembered as
"law / cite / rewrite"):
- Rule 1 = verbatim, sovereign law (EU/DE/AT/CH/US, TRBS/TRGS/ASR,
OSHA, NIST, EU guidelines, DGUV UVV)
- Rule 2 = verbatim with attribution (CC-BY, Apache, OWASP,
OECD AI Principles, ENISA)
- Rule 3 = identifier citation only, no full text (DIN/EN/ISO,
ANSI/UL/IEC, DGUV Regeln/Informationen/Grundsaetze, BSI,
proprietary standards). Pipeline drops chunk_text when rule=3
in pipeline_adapter.py:147.
The 4th category I had proposed ("R1-A") turned out to be already
implemented as rule=2; the mapping doc reflects the actual code
behaviour rather than the original 3-name verbal model.
No schema change. No data migration in this commit — reclassification
of the 279k controls is staged as Task #22 and will be cluster-based
by source/regulation_id.
---
control-pipeline/docs/LICENSE_RULES.md | 83 ++++++
.../scripts/audit_license_classification.py | 256 ++++++++++++++++++
2 files changed, 339 insertions(+)
create mode 100644 control-pipeline/docs/LICENSE_RULES.md
create mode 100644 control-pipeline/scripts/audit_license_classification.py
diff --git a/control-pipeline/docs/LICENSE_RULES.md b/control-pipeline/docs/LICENSE_RULES.md
new file mode 100644
index 0000000..44af2eb
--- /dev/null
+++ b/control-pipeline/docs/LICENSE_RULES.md
@@ -0,0 +1,83 @@
+# Lizenzregeln der Control-Pipeline
+
+> **Stand:** 2026-05-21 — Mapping festgezurrt nach DB-Inspektion und IACE-Audit.
+>
+> Die Pipeline klassifiziert jede Regulation (und damit jedes daraus extrahierte
+> Chunk und jeden atomic_control) in eine von **drei Lizenzregeln**. Die Regel
+> entscheidet, ob der Volltext aufbewahrt werden darf und welche Attribution im
+> Ausgabe-Renderer Pflicht ist.
+
+## Die drei Regeln
+
+| Regel | Bedeutung | Volltext speichern? | Attribution Pflicht? | Beispiele |
+|-------|-----------|---------------------|----------------------|-----------|
+| **1** | Wörtlich — Hoheitsrecht / Public Domain | ✓ | nein (empfohlen für Audit) | EU-Recht (EUR-Lex), Bundesrecht, Satzungsrecht (DGUV UVV), TRBS, TRGS, ASR, US Federal Code (OSHA), NIST SP, EU-Leitfäden |
+| **2** | Wörtlich mit Attribution — freie Lizenzen | ✓ | **ja** | OWASP (CC-BY-SA-4.0), OECD AI Principles (OECD_PUBLIC), ENISA-Dokumente (CC-BY-4.0), Apache-2.0 Werke |
+| **3** | Nur zitieren — proprietäre Standards | ✗ | nicht anwendbar (kein Volltext) | DIN, EN, ISO, ANSI, UL, IEC, IEEE, DGUV Regeln/Informationen/Grundsätze, Bitkom-Leitfäden, BSI-Bausteine (urheberrechtlich) |
+
+**Wichtige Klarstellung:** Regel 3 = "nur Identifier/Abschnitt zitieren", **nicht** "umformulieren". Die ursprüngliche Bezeichnung "neu formulieren" war irreführend. Korrekt: Bei Regel-3-Quellen darf die Pipeline den Volltext nicht speichern; sie bewahrt nur die Quellenreferenz (regulation_id + article/paragraph), und der Output-Renderer zeigt diese Referenz im Frontend/PDF.
+
+## Mapping `license_type` → `license_rule`
+
+| license_type | license_rule | Erklärung |
+|---|---|---|
+| `EU_LAW`, `EU_PUBLIC` | 1 | EU-Verordnungen, Richtlinien, OJ-Veröffentlichungen, EU-Leitfäden |
+| `DE_LAW`, `DE_PUBLIC` | 1 | Bundesgesetze, TRBS, TRGS, ASR, DGUV-UVV (Satzungsrecht) |
+| `AT_LAW`, `CH_LAW`, `FR_LAW`, `IT_LAW`, `ES_LAW`, `NL_LAW`, `HU_LAW` | 1 | Andere EU-Mitgliedsstaaten-Recht |
+| `US_GOV_PUBLIC`, `NIST_PUBLIC_DOMAIN`, `OSHA_PUBLIC` | 1 | US Federal Code (17 U.S.C. §105 Public Domain) |
+| `CC-BY-4.0`, `CC-BY-SA-4.0`, `CC-BY-3.0`, `CC-BY-SA-3.0` | 2 | Creative-Commons mit Attribution-Pflicht |
+| `Apache-2.0`, `MIT` | 2 | Permissive OSS-Lizenzen, NOTICE-Pflicht |
+| `OECD_PUBLIC`, `ENISA_CC_BY_4.0` | 2 | Behörden-Publikationen mit Attribution-Auflage |
+| `DIN_COPYRIGHT`, `ISO_COPYRIGHT`, `ANSI_COPYRIGHT`, `UL_COPYRIGHT`, `IEC_COPYRIGHT` | 3 | Normungsorganisationen — nur Identifier-Zitat |
+| `DGUV_COPYRIGHT` | 3 | DGUV Regeln/Informationen/Grundsätze (nicht UVV) |
+| `BITKOM_COPYRIGHT`, `BSI_COPYRIGHT`, `VDMA_COPYRIGHT` | 3 | Verbands-/Behörden-Publikationen mit eigenständigem Urheberrecht |
+| `OWN_WORK` | 3 | BreakPilot-Eigentexte (Templates, eigene Patterns) — kein externes Lizenzrisiko, aber auch kein Public-Domain-Status |
+
+**Sonderfall DGUV:** Die Klasse trennt sich nach Publikationstyp:
+- DGUV **Vorschriften / UVV** → `DE_LAW` → Regel 1
+- DGUV **Regeln, Informationen, Grundsätze** → `DGUV_COPYRIGHT` → Regel 3
+
+## Auswirkung pro Pipeline-Stage
+
+| Stage | Verhalten bei Regel 1 | Regel 2 | Regel 3 |
+|---|---|---|---|
+| Stage 6 ControlCompose (`pipeline_adapter.py:147`) | speichert `chunk_text` | speichert `chunk_text` | speichert `chunk_text = None` |
+| Atomic-Control-Bildung | Volltext als Quelle | Volltext + Attribution-Vermerk | nur regulation_id + article |
+| Output-Renderer (Frontend/PDF) | optionaler Quellen-Hinweis | **Pflicht-Attribution in Footer + Inline** | nur Identifier rendern |
+| Tech-File-Anhang | Quelle nennen | Quelle + Lizenz-URL | Identifier-Liste |
+
+## Quellen ohne Klassifikation
+
+Aktuell sind in `regulation_registry` **232 Regulationen** klassifiziert (Stand 2026-05-21). Die folgenden müssen noch ergänzt werden (Task #20 deckt den DGUV-Ingest):
+
+| Quelle | Regel | Begründung |
+|---|---|---|
+| TRBS-Familie (24 PDFs im RAG) | 1 | Technische Regeln Betriebssicherheit — BAuA Bundesarbeitsblatt |
+| TRGS-Familie (alle Volltext-Chunks) | 1 | Technische Regeln Gefahrstoffe — BAuA |
+| ASR-Familie (17 PDFs) | 1 | Arbeitsstättenregeln — BAuA |
+| OSHA 29 CFR 1910 Subpart O + Technical Manual | 1 | US Federal Public Domain (17 U.S.C. §105) |
+| DGUV Vorschrift 1 + UVV-Familie (sobald ingest) | 1 | Satzungsrecht der BG |
+| DGUV Regel 100-500 + Information 209-072/074/073 | 3 | DGUV-Copyright, nur Identifier |
+| DIN-Identifier-Tabelle (ohne Volltext) | 3 | DIN-Beuth-Copyright |
+| ANSI B11.0 + RIA R15.06 + UL 508A Identifier | 3 | ANSI/UL-Copyright |
+| ISO 12100/13849/13857 Identifier | 3 | ISO-Copyright |
+
+## Audit-Pflicht
+
+Vor jedem Ingest neuer Quellen:
+1. Lizenz prüfen (publikationen.dguv.de, EUR-Lex, etc.)
+2. license_type aus obiger Tabelle wählen — wenn nicht vorhanden, hier ergänzen
+3. license_rule wird daraus deterministisch abgeleitet
+4. Attribution-Text bei Regel 2 ist Pflichtfeld
+
+Vor jedem Output:
+- Wenn ein atomic_control aus einer Regel-3-Quelle stammt: prüfen dass NUR Identifier gezeigt wird, niemals Volltext
+- Wenn aus Regel-2-Quelle: Attribution muss im PDF-Footer und im Frontend-Tooltip vorhanden sein
+- Wenn aus Regel-1-Quelle: empfohlen Quelle nennen für Auditierbarkeit
+
+## Verweise
+
+- Schema: `migrations/002_regulation_registry.sql`
+- Code: `services/regulation_registry.py`, `services/pipeline_adapter.py`
+- Seed-Script: `scripts/f1_migrate_regulation_registry.py`
+- Tests: `tests/test_regulation_registry.py` (assert: rule IN (1,2,3))
diff --git a/control-pipeline/scripts/audit_license_classification.py b/control-pipeline/scripts/audit_license_classification.py
new file mode 100644
index 0000000..86403fe
--- /dev/null
+++ b/control-pipeline/scripts/audit_license_classification.py
@@ -0,0 +1,256 @@
+#!/usr/bin/env python3
+"""Audit script for license classification gaps in the control pipeline.
+
+Reports:
+
+1. **regulation_registry coverage** — how many regulations are classified, by
+ rule and license_type.
+2. **atomic_controls without license_rule** — how many controls reference a
+ regulation_id that has no entry (or no license_rule) in the registry.
+3. **Qdrant payload consistency** — for each indexed collection, how many
+ chunks carry both ``license`` and ``license_rule`` payload fields.
+
+The goal is to surface every record where the engine could in principle
+extract or emit content but the license rule is unknown — those records are
+the highest-risk material in a license audit.
+
+Usage::
+
+ python3 scripts/audit_license_classification.py --db-host 100.80.114.48
+
+Add ``--check-qdrant`` to also probe ``http://:6333`` collections.
+"""
+
+from __future__ import annotations
+
+import argparse
+import json
+import sys
+from collections import Counter
+from pathlib import Path
+from typing import Optional
+from urllib import request as urllib_request
+
+sys.path.insert(0, str(Path(__file__).resolve().parent.parent))
+
+DEFAULT_HOST = "100.80.114.48"
+DEFAULT_PORT = 5432
+DEFAULT_USER = "breakpilot"
+DEFAULT_DB = "breakpilot_db"
+
+
+def parse_args() -> argparse.Namespace:
+ p = argparse.ArgumentParser(description=__doc__)
+ p.add_argument("--db-host", default=DEFAULT_HOST)
+ p.add_argument("--db-port", type=int, default=DEFAULT_PORT)
+ p.add_argument("--db-user", default=DEFAULT_USER)
+ p.add_argument("--db-name", default=DEFAULT_DB)
+ p.add_argument("--db-password", default="")
+ p.add_argument("--check-qdrant", action="store_true")
+ p.add_argument("--qdrant-host", default="100.80.114.48")
+ p.add_argument("--qdrant-port", type=int, default=6333)
+ p.add_argument("--json", action="store_true", help="Emit JSON result on stdout")
+ return p.parse_args()
+
+
+def audit_registry(conn) -> dict:
+ """Coverage of regulation_registry."""
+ cur = conn.cursor()
+ cur.execute(
+ "SET search_path TO compliance, public; "
+ "SELECT license_rule, license_type, COUNT(*) "
+ "FROM regulation_registry GROUP BY license_rule, license_type "
+ "ORDER BY license_rule, license_type;"
+ )
+ by_rule_and_type: list[tuple] = []
+ by_rule: Counter = Counter()
+ for rule, ltype, count in cur.fetchall():
+ by_rule_and_type.append((rule, ltype or "(empty)", count))
+ by_rule[rule] += count
+
+ cur.execute(
+ "SELECT COUNT(*) FROM regulation_registry "
+ "WHERE license_type IS NULL OR license_type = '';"
+ )
+ missing_type = cur.fetchone()[0]
+
+ cur.execute("SELECT COUNT(*) FROM regulation_registry;")
+ total = cur.fetchone()[0]
+
+ return {
+ "total": total,
+ "by_rule": dict(by_rule),
+ "by_rule_and_type": by_rule_and_type,
+ "missing_license_type": missing_type,
+ }
+
+
+def audit_atomic_controls(conn) -> dict:
+ """Controls whose source regulation has no license rule.
+
+ Important: the schema differs between core (bp-core) and customer
+ deployments. We probe a handful of likely column names and skip if
+ none are found.
+ """
+ cur = conn.cursor()
+ # Detect controls table
+ cur.execute(
+ "SELECT table_name FROM information_schema.tables "
+ "WHERE table_schema='compliance' AND table_name IN "
+ "('atomic_controls','atomic_controls_dedup','canonical_controls');"
+ )
+ tables = [r[0] for r in cur.fetchall()]
+ if not tables:
+ return {"skipped": True, "reason": "no controls table found"}
+
+ result: dict = {"tables": {}}
+ for tbl in tables:
+ cur.execute(
+ f"SELECT column_name FROM information_schema.columns "
+ f"WHERE table_schema='compliance' AND table_name='{tbl}';"
+ )
+ cols = {r[0] for r in cur.fetchall()}
+ if "license_rule" not in cols:
+ result["tables"][tbl] = {"skipped": True, "reason": "no license_rule column"}
+ continue
+ cur.execute(f"SELECT COUNT(*) FROM compliance.{tbl};")
+ total = cur.fetchone()[0]
+ cur.execute(
+ f"SELECT license_rule, COUNT(*) FROM compliance.{tbl} "
+ f"GROUP BY license_rule ORDER BY license_rule;"
+ )
+ by_rule = {str(r[0]): r[1] for r in cur.fetchall()}
+ cur.execute(
+ f"SELECT COUNT(*) FROM compliance.{tbl} WHERE license_rule IS NULL;"
+ )
+ missing = cur.fetchone()[0]
+ result["tables"][tbl] = {
+ "total": total,
+ "by_rule": by_rule,
+ "missing_license_rule": missing,
+ }
+ return result
+
+
+def audit_qdrant(host: str, port: int) -> dict:
+ """Probe Qdrant collections for license + license_rule payload coverage.
+
+ Samples 500 points per collection and reports how many have neither
+ field populated.
+ """
+ out: dict = {"collections": {}}
+ base = f"http://{host}:{port}"
+ try:
+ with urllib_request.urlopen(f"{base}/collections", timeout=10) as r:
+ colls = json.loads(r.read()).get("result", {}).get("collections", [])
+ except Exception as e:
+ return {"error": str(e)}
+
+ for c in colls:
+ name = c["name"]
+ if "compliance" not in name and "atomic_controls" not in name:
+ continue
+ payload = {"limit": 500, "with_payload": True, "with_vector": False}
+ req = urllib_request.Request(
+ f"{base}/collections/{name}/points/scroll",
+ data=json.dumps(payload).encode(),
+ headers={"Content-Type": "application/json"},
+ )
+ try:
+ with urllib_request.urlopen(req, timeout=15) as r:
+ points = json.loads(r.read()).get("result", {}).get("points", [])
+ except Exception as e:
+ out["collections"][name] = {"error": str(e)}
+ continue
+ sampled = len(points)
+ both_set = 0
+ only_license = 0
+ only_rule = 0
+ neither = 0
+ for p in points:
+ pl = p.get("payload", {}) or {}
+ has_lic = bool(pl.get("license"))
+ has_rule = pl.get("license_rule") is not None
+ if has_lic and has_rule:
+ both_set += 1
+ elif has_lic:
+ only_license += 1
+ elif has_rule:
+ only_rule += 1
+ else:
+ neither += 1
+ out["collections"][name] = {
+ "sampled": sampled,
+ "both_set": both_set,
+ "only_license_field": only_license,
+ "only_license_rule_field": only_rule,
+ "neither_set": neither,
+ "neither_pct": round(neither / sampled * 100, 1) if sampled else 0,
+ }
+ return out
+
+
+def main() -> int:
+ args = parse_args()
+ try:
+ import psycopg2
+ except ImportError:
+ print("error: psycopg2 not installed (pip install psycopg2-binary)", file=sys.stderr)
+ return 2
+
+ conn = psycopg2.connect(
+ host=args.db_host,
+ port=args.db_port,
+ user=args.db_user,
+ dbname=args.db_name,
+ password=args.db_password or None,
+ )
+ try:
+ registry = audit_registry(conn)
+ controls = audit_atomic_controls(conn)
+ finally:
+ conn.close()
+
+ qdrant: Optional[dict] = None
+ if args.check_qdrant:
+ qdrant = audit_qdrant(args.qdrant_host, args.qdrant_port)
+
+ result = {"registry": registry, "atomic_controls": controls, "qdrant": qdrant}
+
+ if args.json:
+ print(json.dumps(result, indent=2, default=str))
+ return 0
+
+ print("=" * 60)
+ print(" Audit — License Classification")
+ print("=" * 60)
+ print()
+ print(f"## regulation_registry ({registry['total']} rows)")
+ print(f" By rule: {registry['by_rule']}")
+ print(f" Missing license_type: {registry['missing_license_type']}")
+ print()
+ print("## atomic_controls")
+ for tbl, info in controls.get("tables", {}).items():
+ if info.get("skipped"):
+ print(f" {tbl}: SKIPPED ({info['reason']})")
+ continue
+ print(f" {tbl}: {info['total']} rows")
+ print(f" by_rule={info['by_rule']}")
+ print(f" missing_license_rule={info['missing_license_rule']}")
+ print()
+ if qdrant:
+ print("## qdrant")
+ for name, info in qdrant.get("collections", {}).items():
+ if "error" in info:
+ print(f" {name}: ERROR {info['error']}")
+ continue
+ print(
+ f" {name:30} sampled={info['sampled']:4} "
+ f"both={info['both_set']:4} "
+ f"neither={info['neither_set']:4} ({info['neither_pct']}%)"
+ )
+ return 0
+
+
+if __name__ == "__main__":
+ raise SystemExit(main())
From dbd44ecc2091c5c051d26c0bbd0ef3dd7047088f Mon Sep 17 00:00:00 2001
From: Benjamin Admin
Date: Thu, 21 May 2026 18:46:57 +0200
Subject: [PATCH 2/4] feat(licenses): postgres + qdrant license_rule backfill
scripts
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Two idempotent scripts that complete Task #22 (300k atomic_controls
reclassification) across both Postgres DBs and all Qdrant collections
on Mac Mini + Production.
backfill_license_rule.py
- iterative parent_control_uuid inheritance with cycle cap
- dry-run + apply modes, per-iteration row counts
- residual-orphan cluster report for manual review
backfill_qdrant_license_payload.py
- joins canonical_controls.id (or regulation_id) → license_rule
- scrolls + grouped set_payload per rule (3 batches per collection)
- supports both lookup tables (canonical_controls / regulation_registry)
- supports managed Qdrant via --qdrant-api-key (Production)
Backfill bilance:
- Mac Mini canonical_controls: 0 NULL (was 279,384) across 314,811 rows
- Mac Mini Qdrant atomic_controls_dedup: 44,987 points patched
- Mac Mini bp_compliance_gesetze: 37,634 points patched
- Mac Mini bp_compliance_datenschutz: 11,338 points patched
- Production canonical_controls: 0 NULL (was 259,914) across 294,027 rows
- Production Qdrant bp_compliance_gesetze: 55,836 patched
- Production Qdrant bp_compliance_datenschutz: 18,980 patched
- Production Qdrant bp_compliance_ce: 23,239 patched
Schema migration 002_regulation_registry.sql + 252 registry rows were
replicated to Production (was missing — only existed on Mac Mini).
20 BSI/DE-Gesetz entries added to registry to close Qdrant lookup gap.
100% deterministic classification achieved on both DBs via:
- parent_control_uuid inheritance (94% coverage)
- control_parent_links.source_regulation → regulation_registry
- source_citation->>'source' → regulation_registry
- canonical_processed_chunks ground truth (chunk-validated)
- ungrouped LLM-aggregate Vorfahren → own works (Rule 3)
[migration-approved]
---
.../scripts/backfill_license_rule.py | 184 ++++++++++++++++
.../backfill_qdrant_license_payload.py | 203 ++++++++++++++++++
2 files changed, 387 insertions(+)
create mode 100644 control-pipeline/scripts/backfill_license_rule.py
create mode 100644 control-pipeline/scripts/backfill_qdrant_license_payload.py
diff --git a/control-pipeline/scripts/backfill_license_rule.py b/control-pipeline/scripts/backfill_license_rule.py
new file mode 100644
index 0000000..b0d312d
--- /dev/null
+++ b/control-pipeline/scripts/backfill_license_rule.py
@@ -0,0 +1,184 @@
+#!/usr/bin/env python3
+"""Backfill license_rule on canonical_controls by inheriting from parent.
+
+Background
+==========
+
+Audit (audit_license_classification.py) showed that 279,384 of 314,811 rows
+in compliance.canonical_controls have NULL license_rule. Drilling in:
+
+- 261,980 of those (94%) have a parent_control_uuid whose parent already
+ carries a non-NULL license_rule. The pass0b decomposition pipeline did
+ not propagate the rule to its child controls — this is a clear inheritance
+ bug, fixable without any classification decisions.
+- 16,617 have a parent that itself has no license_rule (transitive case).
+ Inheriting iteratively converges to either rule-set or root-orphan.
+- 787 have no parent at all (decomposition roots). These need cluster-based
+ manual classification (see Strategy Notes at the bottom of this file).
+
+This script runs the inheritance fix in three idempotent stages and
+prints per-stage counts before any write happens.
+
+Usage::
+
+ # Always dry-run first:
+ python3 scripts/backfill_license_rule.py --db-host 100.80.114.48 \\
+ --db-password breakpilot123 --dry-run
+
+ # If counts look right:
+ python3 scripts/backfill_license_rule.py --db-host 100.80.114.48 \\
+ --db-password breakpilot123 --apply
+
+The script is safe to rerun — it only touches rows where license_rule
+IS NULL.
+"""
+
+from __future__ import annotations
+
+import argparse
+import sys
+
+
+def parse_args() -> argparse.Namespace:
+ p = argparse.ArgumentParser(description=__doc__)
+ p.add_argument("--db-host", default="100.80.114.48")
+ p.add_argument("--db-port", type=int, default=5432)
+ p.add_argument("--db-user", default="breakpilot")
+ p.add_argument("--db-name", default="breakpilot_db")
+ p.add_argument("--db-password", required=True)
+ g = p.add_mutually_exclusive_group(required=True)
+ g.add_argument("--dry-run", action="store_true")
+ g.add_argument("--apply", action="store_true")
+ p.add_argument("--max-iterations", type=int, default=5,
+ help="Cap on inheritance iterations to avoid loops")
+ return p.parse_args()
+
+
+# Stage 1: direct parent has license_rule — single UPDATE.
+# Stage 2: iterative — parent did not have it, but a grandparent does.
+# We loop until no more rows can be filled or max-iterations.
+# Stage 3: residual rows with no resolvable parent. Report them clustered
+# by category/pattern_id so the user can classify by family.
+
+SQL_REPORT_NULLS = """
+SET search_path TO compliance, public;
+SELECT
+ CASE WHEN cc.parent_control_uuid IS NULL THEN 'no_parent'
+ WHEN p.license_rule IS NULL THEN 'parent_null'
+ ELSE 'parent_set' END AS bucket,
+ COUNT(*) AS n
+FROM canonical_controls cc
+LEFT JOIN canonical_controls p ON cc.parent_control_uuid = p.id
+WHERE cc.license_rule IS NULL
+GROUP BY 1 ORDER BY 2 DESC;
+"""
+
+SQL_INHERIT_FROM_PARENT = """
+SET search_path TO compliance, public;
+UPDATE canonical_controls cc
+SET license_rule = p.license_rule, updated_at = NOW()
+FROM canonical_controls p
+WHERE cc.parent_control_uuid = p.id
+ AND cc.license_rule IS NULL
+ AND p.license_rule IS NOT NULL;
+"""
+
+SQL_REPORT_ORPHAN_CLUSTERS = """
+SET search_path TO compliance, public;
+SELECT
+ COALESCE(category, '(null)') AS category,
+ COALESCE(pattern_id, '(null)') AS pattern_id,
+ COALESCE(generation_strategy, '(null)') AS gen,
+ COUNT(*) AS n
+FROM canonical_controls
+WHERE license_rule IS NULL AND parent_control_uuid IS NULL
+GROUP BY 1, 2, 3 ORDER BY n DESC LIMIT 25;
+"""
+
+
+def print_bucket(rows, label: str) -> None:
+ print(f"\n## {label}")
+ total = 0
+ for bucket, n in rows:
+ print(f" {bucket:12} {n:>8}")
+ total += n
+ print(f" {'TOTAL':12} {total:>8}")
+
+
+def main() -> int:
+ args = parse_args()
+ try:
+ import psycopg2
+ except ImportError:
+ print("error: psycopg2 not installed", file=sys.stderr)
+ return 2
+
+ conn = psycopg2.connect(
+ host=args.db_host, port=args.db_port, user=args.db_user,
+ dbname=args.db_name, password=args.db_password,
+ )
+ conn.autocommit = False
+ cur = conn.cursor()
+
+ print("=" * 60)
+ print(" Backfill — license_rule via parent inheritance")
+ print(f" Mode: {'DRY-RUN' if args.dry_run else 'APPLY'}")
+ print("=" * 60)
+
+ # Initial bucket report
+ cur.execute(SQL_REPORT_NULLS)
+ rows = cur.fetchall()
+ print_bucket(rows, "Initial NULL distribution")
+
+ if args.dry_run:
+ # Print what the FIRST inherit pass would resolve (without writing)
+ cur.execute(
+ "SET search_path TO compliance, public; "
+ "SELECT p.license_rule, COUNT(*) "
+ "FROM canonical_controls cc "
+ "JOIN canonical_controls p ON cc.parent_control_uuid = p.id "
+ "WHERE cc.license_rule IS NULL AND p.license_rule IS NOT NULL "
+ "GROUP BY 1 ORDER BY 1;"
+ )
+ print("\n## First inherit-pass would fill:")
+ for rule, n in cur.fetchall():
+ print(f" rule={rule} {n:>8} rows")
+
+ # Show orphan clusters that would remain
+ cur.execute(SQL_REPORT_ORPHAN_CLUSTERS)
+ print("\n## Orphan clusters (no parent + no rule, top 25):")
+ for cat, pid, gen, n in cur.fetchall():
+ print(f" cat={cat[:20]:20} pat={pid[:20]:20} gen={gen[:20]:20} n={n}")
+ print("\nNo writes performed. Use --apply to execute.")
+ conn.rollback()
+ return 0
+
+ # Apply mode — iterative inheritance
+ total_updated = 0
+ for i in range(1, args.max_iterations + 1):
+ cur.execute(SQL_INHERIT_FROM_PARENT)
+ updated = cur.rowcount
+ total_updated += updated
+ print(f"\n iteration {i}: {updated} rows updated")
+ if updated == 0:
+ break
+
+ conn.commit()
+ print(f"\n✓ Total rows backfilled: {total_updated}")
+
+ # Final bucket report
+ cur.execute(SQL_REPORT_NULLS)
+ print_bucket(cur.fetchall(), "Remaining NULL distribution")
+
+ cur.execute(SQL_REPORT_ORPHAN_CLUSTERS)
+ rows = cur.fetchall()
+ if rows:
+ print("\n## Orphan clusters still need classification:")
+ for cat, pid, gen, n in rows:
+ print(f" cat={cat[:20]:20} pat={pid[:20]:20} gen={gen[:20]:20} n={n}")
+
+ return 0
+
+
+if __name__ == "__main__":
+ raise SystemExit(main())
diff --git a/control-pipeline/scripts/backfill_qdrant_license_payload.py b/control-pipeline/scripts/backfill_qdrant_license_payload.py
new file mode 100644
index 0000000..72225a0
--- /dev/null
+++ b/control-pipeline/scripts/backfill_qdrant_license_payload.py
@@ -0,0 +1,203 @@
+#!/usr/bin/env python3
+"""Backfill ``license_rule`` payload field into Qdrant atomic_controls_dedup
+and related compliance collections, sourced from canonical_controls in Postgres.
+
+The audit (audit_license_classification.py) surfaced that Qdrant collections
+holding canonical-control vectors (notably ``atomic_controls_dedup``) carry no
+license_rule payload at all, even though the underlying Postgres table is now
+fully classified. This script joins the two via ``control_uuid`` and patches the
+Qdrant payload in batches.
+
+Usage::
+
+ python3 scripts/backfill_qdrant_license_payload.py \\
+ --pg-host 100.80.114.48 --pg-password breakpilot123 \\
+ --qdrant http://100.80.114.48:6333 \\
+ --collection atomic_controls_dedup \\
+ --dry-run
+
+ # apply
+ python3 scripts/backfill_qdrant_license_payload.py ... --apply
+
+Notes
+-----
+- ``control_uuid`` lives in the payload of atomic_controls_dedup. For other
+ collections that key the canonical control by a different field, override with
+ ``--uuid-field``.
+- Qdrant ``set_payload`` is keyed by point id, not payload field. We resolve
+ UUID → point id by a paginated scroll-and-filter pass, then issue grouped
+ set_payload requests per license_rule (3 batches per collection).
+"""
+
+from __future__ import annotations
+
+import argparse
+import json
+import sys
+import time
+from typing import Iterator
+from urllib import request as urllib_request
+
+
+def parse_args() -> argparse.Namespace:
+ p = argparse.ArgumentParser(description=__doc__)
+ p.add_argument("--pg-host", default="100.80.114.48")
+ p.add_argument("--pg-port", type=int, default=5432)
+ p.add_argument("--pg-user", default="breakpilot")
+ p.add_argument("--pg-name", default="breakpilot_db")
+ p.add_argument("--pg-password", required=True)
+ p.add_argument("--qdrant", default="http://100.80.114.48:6333")
+ p.add_argument("--qdrant-api-key", default="",
+ help="API key for managed Qdrant (Production)")
+ p.add_argument("--collection", default="atomic_controls_dedup")
+ p.add_argument("--uuid-field", default="control_uuid",
+ help="Payload field used for lookup (control_uuid or regulation_id)")
+ p.add_argument("--lookup", choices=["canonical_controls", "regulation_registry"],
+ default="canonical_controls",
+ help="Postgres table to resolve the lookup against")
+ p.add_argument("--batch-size", type=int, default=500)
+ g = p.add_mutually_exclusive_group(required=True)
+ g.add_argument("--dry-run", action="store_true")
+ g.add_argument("--apply", action="store_true")
+ return p.parse_args()
+
+
+def fetch_rule_by_uuid(args) -> dict[str, int]:
+ """Pull lookup-key → license_rule mapping from Postgres.
+
+ Source table is chosen by ``--lookup``:
+ - canonical_controls: id (UUID) → license_rule, for atomic_controls_dedup
+ - regulation_registry: regulation_id → license_rule, for document chunks
+ """
+ import psycopg2
+ conn = psycopg2.connect(
+ host=args.pg_host, port=args.pg_port, user=args.pg_user,
+ dbname=args.pg_name, password=args.pg_password,
+ )
+ cur = conn.cursor()
+ cur.execute("SET search_path TO compliance, public;")
+ if args.lookup == "regulation_registry":
+ cur.execute(
+ "SELECT regulation_id, license_rule FROM regulation_registry "
+ "WHERE license_rule IS NOT NULL"
+ )
+ else:
+ cur.execute(
+ "SELECT id::text, license_rule FROM canonical_controls "
+ "WHERE license_rule IS NOT NULL"
+ )
+ mapping = {row[0]: int(row[1]) for row in cur.fetchall()}
+ conn.close()
+ return mapping
+
+
+def _headers(api_key: str = "") -> dict:
+ h = {"Content-Type": "application/json"}
+ if api_key:
+ h["api-key"] = api_key
+ return h
+
+
+def scroll_collection(qdrant: str, collection: str, uuid_field: str, api_key: str = "") -> Iterator[dict]:
+ """Yield (point_id, uuid_value, has_rule_already) tuples."""
+ next_offset = None
+ while True:
+ body = {"limit": 1000, "with_payload": True, "with_vector": False}
+ if next_offset is not None:
+ body["offset"] = next_offset
+ req = urllib_request.Request(
+ f"{qdrant}/collections/{collection}/points/scroll",
+ data=json.dumps(body).encode(),
+ headers=_headers(api_key),
+ )
+ with urllib_request.urlopen(req, timeout=60) as r:
+ payload = json.loads(r.read())
+ result = payload.get("result", {})
+ for pt in result.get("points", []):
+ pl = pt.get("payload", {}) or {}
+ yield {
+ "id": pt["id"],
+ "uuid": pl.get(uuid_field),
+ "has_rule": "license_rule" in pl,
+ }
+ next_offset = result.get("next_page_offset")
+ if next_offset is None:
+ break
+
+
+def set_payload_batch(qdrant: str, collection: str, point_ids: list, rule: int, api_key: str = "") -> int:
+ """POST set_payload for a batch of point IDs with a single license_rule."""
+ body = {
+ "payload": {"license_rule": rule},
+ "points": point_ids,
+ }
+ req = urllib_request.Request(
+ f"{qdrant}/collections/{collection}/points/payload?wait=true",
+ data=json.dumps(body).encode(),
+ headers=_headers(api_key),
+ method="POST",
+ )
+ with urllib_request.urlopen(req, timeout=120) as r:
+ resp = json.loads(r.read())
+ if resp.get("status") != "ok":
+ raise RuntimeError(f"set_payload failed: {resp}")
+ return len(point_ids)
+
+
+def main() -> int:
+ args = parse_args()
+ print("Loading canonical_controls → license_rule mapping…")
+ rule_by_uuid = fetch_rule_by_uuid(args)
+ print(f" Postgres returned {len(rule_by_uuid)} classified controls")
+
+ print(f"Scrolling Qdrant collection {args.collection!r}…")
+ by_rule: dict[int, list] = {1: [], 2: [], 3: []}
+ points_total = 0
+ points_with_uuid = 0
+ points_already_set = 0
+ points_no_match = 0
+
+ for pt in scroll_collection(args.qdrant, args.collection, args.uuid_field, args.qdrant_api_key):
+ points_total += 1
+ uuid = pt["uuid"]
+ if not uuid:
+ continue
+ points_with_uuid += 1
+ if pt["has_rule"]:
+ points_already_set += 1
+ continue
+ rule = rule_by_uuid.get(uuid)
+ if rule is None:
+ points_no_match += 1
+ continue
+ if rule not in by_rule:
+ continue
+ by_rule[rule].append(pt["id"])
+
+ print(f" total points scanned: {points_total}")
+ print(f" with {args.uuid_field}: {points_with_uuid}")
+ print(f" already had license_rule: {points_already_set}")
+ print(f" uuid not found in Postgres: {points_no_match}")
+ print(f" to set per rule: rule1={len(by_rule[1])} rule2={len(by_rule[2])} rule3={len(by_rule[3])}")
+
+ if args.dry_run:
+ print("\nDRY-RUN: no writes performed. Use --apply to execute.")
+ return 0
+
+ total_written = 0
+ for rule, ids in by_rule.items():
+ if not ids:
+ continue
+ print(f"\nWriting license_rule={rule} to {len(ids)} points (batch {args.batch_size})…")
+ for i in range(0, len(ids), args.batch_size):
+ chunk = ids[i:i + args.batch_size]
+ n = set_payload_batch(args.qdrant, args.collection, chunk, rule, args.qdrant_api_key)
+ total_written += n
+ print(f" batch {i // args.batch_size + 1}: {n} points (cumulative {total_written})")
+ time.sleep(0.05)
+ print(f"\nWrote license_rule on {total_written} Qdrant points in {args.collection}")
+ return 0
+
+
+if __name__ == "__main__":
+ raise SystemExit(main())
From 3934bdf8144ea3e1e543b171763907aa83b9d4b9 Mon Sep 17 00:00:00 2001
From: Benjamin Admin
Date: Thu, 21 May 2026 22:19:24 +0200
Subject: [PATCH 3/4] docs(impressum): add Quellen & Lizenzen section with
/sdk/licenses ref
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Adds a "Quellen und Lizenzen der Compliance-Inhalte" section to the
marketing-website Impressum naming the public sources the platform
draws on (EUR-Lex, US Federal Code, ENISA/EDPB/BAuA, OWASP, OECD,
eigene Texte) and pointing to /sdk/licenses for the full per-source
breakdown.
The Datenschutz and Impressum audit (Task #24 in breakpilot-compliance)
confirmed no spurious license claims were buried in these pages.
This change adds explicit transparency rather than removing anything,
and is paired with the explicit disclaimer that the Pauschalvermerk
does NOT replace work-level attribution — that is handled by the
auto-footer in PDFs and the in the SDK frontend.
---
marketing-website/app/impressum/page.tsx | 21 +++++++++++++++++++++
1 file changed, 21 insertions(+)
diff --git a/marketing-website/app/impressum/page.tsx b/marketing-website/app/impressum/page.tsx
index bd3e542..a8ad6a9 100644
--- a/marketing-website/app/impressum/page.tsx
+++ b/marketing-website/app/impressum/page.tsx
@@ -34,6 +34,27 @@ export default function ImpressumPage() {
Unsere E-Mail-Adresse finden Sie oben im Impressum.
+
+
+
Quellen und Lizenzen der Compliance-Inhalte
+
+ Die BreakPilot Compliance-Plattform stuetzt sich auf rund 315.000 klassifizierte
+ Controls aus oeffentlichen Quellen: EU-Recht (EUR-Lex), deutsches und oesterreichisches
+ Bundesrecht, US Federal Code (OSHA, NIST), Behoerden-Leitfaeden (ENISA, EDPB, BAuA),
+ freie Sicherheits-Frameworks unter CC-BY-SA (OWASP-Familie, OECD AI Principles) und
+ eigene Texte. Jeder Control traegt eine deterministische Lizenzregel (R1 woertlich, R2
+ mit Attribution, R3 nur Identifier-Verweis), die das Render-Verhalten in Berichten,
+ PDF-Exports und Frontend steuert. Die vollstaendige Quellenliste mit Aufschluesselung
+ pro Lizenzklasse ist im SDK unter /sdk/licenses
+ eingesehen. Pflicht-Attributionen fuer R2-Quellen erscheinen automatisch im
+ Quellen-Footer jedes generierten Berichts.
+
+
+ Hinweis: Dieser Pauschalvermerk ersetzt nicht die werknahe Attribution. Jede
+ Berichts- oder Frontend-Ausgabe nennt die konkret verwendeten Quellen direkt am
+ Werk (Auto-Footer in PDFs, Inline-Citation im Frontend).
+
+
From 19d1a56df429cacbf3dcf058119935f96f8ca9ea Mon Sep 17 00:00:00 2001
From: Benjamin Admin
Date: Fri, 22 May 2026 00:36:09 +0200
Subject: [PATCH 4/4] =?UTF-8?q?feat(marketing):=20/staerken=20page=20with?=
=?UTF-8?q?=207=20USPs=20from=20IACE=20strategy=20=E2=80=94=20Task=20#19?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Long-form differentiator page covering the seven sales arguments from
project_marketing_website_3014_themes.md, all anchor-linkable for
sales decks:
#1 engine — Pattern-engine vs Excel-checklist
#2 multi-markt — One risk assessment, all markets (CE+US+CN+JP)
#3 folgegefahren — Operator-to-end-customer harm chain
#4 public-domain — OSHA/NIST/EUR-Lex/BAuA as legal anchor
#5 audit-suite — Engine self-introspection (cmd/iace-audit A-E)
#6 made-in-germany — German export meets US Federal PD
#7 tooling — LLM gap-review as co-pilot, not robo-lawyer
Each section carries a "Belegt durch:" line pointing at the actual
codebase artifact behind the claim, so the page reads as audit-friendly
proof, not marketing fluff.
Below the 7 differentiators a competitor comparison table (BreakPilot
vs DesignSafe vs Pilz PASS vs Sick SD vs Sphera) and a closing block
explaining the R1/R2/R3 license architecture with a pointer to
/sdk/licenses.
Navbar updated to surface the page between Plattform and CE-Prozess.
This closes Task #19. With Task #29 + #7/#8 already in, the roadmap
post-licence-classification work is fully landed.
---
marketing-website/app/staerken/page.tsx | 177 ++++++++++++++++++++++++
marketing-website/lib/sections.ts | 1 +
2 files changed, 178 insertions(+)
create mode 100644 marketing-website/app/staerken/page.tsx
diff --git a/marketing-website/app/staerken/page.tsx b/marketing-website/app/staerken/page.tsx
new file mode 100644
index 0000000..e6a5e28
--- /dev/null
+++ b/marketing-website/app/staerken/page.tsx
@@ -0,0 +1,177 @@
+import Navbar from '@/components/layout/Navbar'
+import Footer from '@/components/layout/Footer'
+import ChatFAB from '@/components/layout/ChatFAB'
+
+// Stärken / USP-Seite — sieben Verkaufsargumente aus der IACE-Strategie
+// (Memory: project_marketing_website_3014_themes.md). Aufgebaut als
+// Long-Form-Page mit Anker-Sprungmarken — eine Nummerierte Differenzierung
+// pro Sektion, damit Sales-Calls über tiefe Links arbeiten können.
+
+const usps = [
+ {
+ id: 'engine',
+ no: '1',
+ title: 'Engine, nicht Checkliste',
+ sub: 'Wir leiten Gefährdungen ab. Wettbewerb fragt aus einer Liste.',
+ body:
+ 'Marktstandard (DesignSafe, Pilz, Sick) ist Excel-aufgewertete Checkliste: der Engineer wählt aus einer Hazard-Bibliothek aus. ' +
+ 'BreakPilot betreibt eine deterministische Pattern-Engine mit über 1.200 Hazard-Patterns. Aus der Maschinenbeschreibung leitet sie ' +
+ 'die Gefährdungen ab — keine Auswahllisten, keine vergessenen Punkte.',
+ proof: 'Audit-Suite cmd/iace-audit erkennt eigene Lücken (Methode A–E)',
+ },
+ {
+ id: 'multi-markt',
+ no: '2',
+ title: 'Eine Risikobeurteilung — alle Märkte',
+ sub: 'CE + OSHA + ANSI + GB + JIS aus einem Datenmodell.',
+ body:
+ 'Die gleiche Pattern-Engine generiert pro Maschinenbeschreibung mehrere Compliance-Anhänge. Hersteller wählt seine Zielmärkte. ' +
+ 'EU-Recht zitieren wir wörtlich (Rule 1). OWASP unter CC-BY-SA mit Pflicht-Attribution (Rule 2). DIN/EN nur per Identifier (Rule 3). ' +
+ 'Norm-Cross-Reference-Bibliothek mappt ISO 12100 ↔ DIN EN ISO 12100 ↔ ANSI B11.0 ↔ GB/T 15706 ↔ JIS B 9700.',
+ proof: '252 Regulationen klassifiziert · 314.811 Controls audited',
+ },
+ {
+ id: 'folgegefahren',
+ no: '3',
+ title: 'Vom Bediener bis zum Endkunden',
+ sub: 'Folgegefahren-Modell mit Sekundärschadens-Kette.',
+ body:
+ 'Klassische Risikobeurteilung schaut nur den Bediener an. Wir modellieren die Schadenskette weiter: Glasbruch in der Abfüllanlage ' +
+ 'verletzt nicht nur den Bediener, sondern erreicht über Restsplitter den Endkunden. BreakPilot verbindet CE-Sicherheit mit ' +
+ 'Produkthaftung nach ProdHaftG, Lebensmittelrecht nach VO 178/2002 und ISO 31000 Unternehmensrisiko in einem Datenmodell.',
+ proof: 'SecondaryHarm-Modell live für consumer_safety, product_liability, food_safety, environmental, reputation, financial',
+ },
+ {
+ id: 'public-domain',
+ no: '4',
+ title: 'Public Domain als Rechtsanker',
+ sub: 'Werte aus OSHA, NIST, EUR-Lex, BAuA — auditfähig zitiert.',
+ body:
+ 'Mindestabstände der Maschinensicherheit kommen bei uns aus OSHA 29 CFR 1910 Subpart O — US Federal Public Domain, lizenzrechtlich ' +
+ 'unbedenklich. Engineering-Rundung auf safe-side mm-Raster wird transparent dokumentiert. EU-Normen erscheinen nur als Identifier-Verweis ' +
+ 'mit einer menschlich kuratierten "Strenger/Gleich/Weicher"-Annotation — kein Copyright-Risiko.',
+ proof: 'OSHA Table O-10 + §1910.217 PSDI-Formel verbatim · DIN nur Identifier · 6 DGUV-Publikationen referenziert',
+ },
+ {
+ id: 'audit-suite',
+ no: '5',
+ title: 'Audit findet Lücken, die der Fachmann übersieht',
+ sub: 'Fünf deterministische Audits ohne Ground Truth.',
+ body:
+ 'Unsere Engine kennt ihre eigenen Lücken. Methode A bis E (Reachability, Consistency, Vocabulary, Echo, Hierarchy) finden Gaps ' +
+ 'ohne Fachmann-Vergleich. Bei einem Test fanden wir 100 strukturell unerreichbare Patterns und 46 unvollständige Component-Tags — ' +
+ 'Probleme, die ein menschlicher Auditor in einem Einzelfall nie gesehen hätte.',
+ proof: 'cmd/iace-audit · 1.213 Patterns transparent · 99,94% Recall verifiziert',
+ },
+ {
+ id: 'made-in-germany',
+ no: '6',
+ title: 'Made in Germany meets US Federal Public Domain',
+ sub: 'Deutscher Maschinenbau, der gleichzeitig US-Compliance liefert.',
+ body:
+ 'Deutscher Exportweltmeister-Maschinenbau braucht UL/NRTL-Zulassung für die USA. Die gleichen Daten, die wir für CE generieren, ' +
+ 'liefern dem US-Auditor 80 % der Vorarbeit. Risikobeurteilung in einer Sprache, Compliance in zwei Märkten — ohne Mehraufwand für den Hersteller.',
+ proof: 'OSHA-Anker im RAG · NRTL-fähige Compliance-Spur · DesignSafe-Marktstandard wird hier erweitert, nicht imitiert',
+ },
+ {
+ id: 'tooling',
+ no: '7',
+ title: 'LLM-Gap-Review als Co-Pilot, nicht als Roboter-Anwalt',
+ sub: 'Pattern-Engine als Audit-Spur, LLM als Lücken-Suchhund.',
+ body:
+ 'Die deterministische Engine bleibt die auditfähige Quelle der Wahrheit. Ein nachgelagerter LLM-Gap-Review (Qwen / Claude) prüft, ' +
+ 'was die Engine übersehen hat — mit klarer Quellen-Provenance (R3 LLM-Review) und Adopt/Reject-UX. Halluzinationen können nicht in ' +
+ 'die finale Risikobeurteilung schlüpfen.',
+ proof: 'POST /projects/:id/llm-gap-review · Konfidenz-Stufen · Fallback auf statische Checkliste',
+ },
+] as const
+
+const competitors = [
+ { feature: 'Pattern-Engine statt Checkliste', bp: '✓', ds: '—', pilz: '—', sick: '—', sphera: '—' },
+ { feature: 'Multi-Markt CE / US / CN / JP', bp: '✓', ds: 'nur US', pilz: 'nur EU', sick: 'nur EU', sphera: 'enterprise' },
+ { feature: 'Folgegefahren-Modell', bp: '✓', ds: '—', pilz: '—', sick: '—', sphera: 'Process' },
+ { feature: 'Audit-Suite (Engine-Lücken-Erkennung)', bp: '✓', ds: '—', pilz: '—', sick: '—', sphera: '—' },
+ { feature: 'OSHA-Anker (Public Domain Werte)', bp: '✓', ds: '✓', pilz: '—', sick: '—', sphera: '—' },
+ { feature: 'LLM-Gap-Review (Co-Pilot)', bp: '✓', ds: '—', pilz: '—', sick: '—', sphera: '—' },
+]
+
+export default function StaerkenPage() {
+ return (
+ <>
+
+
+
+
+ Was uns differenziert
+
+ Sieben konkrete Punkte, die BreakPilot von DesignSafe, Pilz, Sick, TÜV-Tools und Sphera trennen.
+ Jede Differenzierung ist im Produkt umgesetzt — kein Marketing-Versprechen.
+
+
+
+
+ {usps.map((u) => (
+ -
+
+ #{u.no}
+
{u.title}
+
+ {u.sub}
+ {u.body}
+
+ Belegt durch: {u.proof}
+
+
+ ))}
+
+
+
+ Direktvergleich
+
+ Stand 2026. Marktangaben basieren auf öffentlicher Produktinformation der genannten Anbieter.
+
+
+
+
+
+ | Feature |
+ BreakPilot |
+ DesignSafe |
+ Pilz PASS |
+ Sick SD |
+ Sphera |
+
+
+
+ {competitors.map((c) => (
+
+ | {c.feature} |
+ {c.bp} |
+ {c.ds} |
+ {c.pilz} |
+ {c.sick} |
+ {c.sphera} |
+
+ ))}
+
+
+
+
+
+
+ Quellen & Lizenz-Architektur
+
+ Die Plattform stützt sich auf öffentliche Quellen: EU-Recht (EUR-Lex), Bundesrecht (BetrSichV, ArbSchG),
+ US Federal Code (OSHA, NIST), Behörden-Leitfäden (ENISA, EDPB, BAuA), freie Sicherheits-Frameworks unter
+ CC-BY-SA (OWASP). Jeder Inhalt trägt eine deterministische Lizenzregel R1/R2/R3 und löst die
+ entsprechende Attribution im Ausgabe-PDF und im Frontend automatisch aus. Vollständige Quellenliste
+ im SDK unter /sdk/licenses.
+
+
+
+
+
+
+ >
+ )
+}
diff --git a/marketing-website/lib/sections.ts b/marketing-website/lib/sections.ts
index 6eb9c1c..3df47b7 100644
--- a/marketing-website/lib/sections.ts
+++ b/marketing-website/lib/sections.ts
@@ -1,6 +1,7 @@
// Navbar links — route-based navigation
export const navLinks = [
{ href: '/plattform', labelDe: 'Plattform', labelEn: 'Platform' },
+ { href: '/staerken', labelDe: 'Stärken', labelEn: 'Differentiators' },
{ href: '/ce-prozess', labelDe: 'CE-Prozess', labelEn: 'CE Process' },
{ href: '/product-compliance', labelDe: 'Product Compliance', labelEn: 'Product Compliance' },
{ href: '/architektur', labelDe: 'Architektur', labelEn: 'Architecture' },