feat(control-pipeline): add CI regression suite for applicability tests
Makefile + pytest + GitHub Actions workflow for automated regression: - make install / make eval / make test - pytest integration with demo_cases.yaml - Golden outputs for 6 priority cases - Report generation (JSON + Markdown) Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
42
control-pipeline/tests/applicability_demo_ci/.github/workflows/applicability-demo-regression.yml
vendored
Normal file
42
control-pipeline/tests/applicability_demo_ci/.github/workflows/applicability-demo-regression.yml
vendored
Normal file
@@ -0,0 +1,42 @@
|
|||||||
|
name: applicability-demo-regression
|
||||||
|
|
||||||
|
on:
|
||||||
|
push:
|
||||||
|
paths:
|
||||||
|
- 'applicability_demo_ci/**'
|
||||||
|
pull_request:
|
||||||
|
paths:
|
||||||
|
- 'applicability_demo_ci/**'
|
||||||
|
workflow_dispatch:
|
||||||
|
|
||||||
|
jobs:
|
||||||
|
regression:
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
defaults:
|
||||||
|
run:
|
||||||
|
working-directory: applicability_demo_ci
|
||||||
|
steps:
|
||||||
|
- name: Checkout
|
||||||
|
uses: actions/checkout@v4
|
||||||
|
|
||||||
|
- name: Set up Python
|
||||||
|
uses: actions/setup-python@v5
|
||||||
|
with:
|
||||||
|
python-version: '3.11'
|
||||||
|
|
||||||
|
- name: Install dependencies
|
||||||
|
run: make install
|
||||||
|
|
||||||
|
- name: Run evaluator
|
||||||
|
run: make eval
|
||||||
|
|
||||||
|
- name: Run pytest
|
||||||
|
run: make test
|
||||||
|
|
||||||
|
- name: Upload reports
|
||||||
|
uses: actions/upload-artifact@v4
|
||||||
|
with:
|
||||||
|
name: applicability-demo-reports
|
||||||
|
path: |
|
||||||
|
applicability_demo_ci/reports/latest_report.json
|
||||||
|
applicability_demo_ci/reports/latest_report.md
|
||||||
23
control-pipeline/tests/applicability_demo_ci/Makefile
Normal file
23
control-pipeline/tests/applicability_demo_ci/Makefile
Normal file
@@ -0,0 +1,23 @@
|
|||||||
|
PYTHON ?= python3
|
||||||
|
|
||||||
|
.PHONY: install test eval report clean
|
||||||
|
|
||||||
|
install:
|
||||||
|
$(PYTHON) -m pip install -U pip
|
||||||
|
$(PYTHON) -m pip install -r requirements.txt
|
||||||
|
|
||||||
|
test:
|
||||||
|
pytest -q
|
||||||
|
|
||||||
|
eval:
|
||||||
|
$(PYTHON) evaluator.py \
|
||||||
|
--cases demo_cases.yaml \
|
||||||
|
--actual-dir actual_outputs \
|
||||||
|
--report-json reports/latest_report.json \
|
||||||
|
--report-md reports/latest_report.md
|
||||||
|
|
||||||
|
report: eval
|
||||||
|
cat reports/latest_report.md
|
||||||
|
|
||||||
|
clean:
|
||||||
|
rm -f reports/latest_report.json reports/latest_report.md
|
||||||
32
control-pipeline/tests/applicability_demo_ci/README.md
Normal file
32
control-pipeline/tests/applicability_demo_ci/README.md
Normal file
@@ -0,0 +1,32 @@
|
|||||||
|
# Applicability Demo CI Suite
|
||||||
|
|
||||||
|
Diese Variante ist als direkt einhängbare Regression-Suite gedacht.
|
||||||
|
|
||||||
|
## Enthalten
|
||||||
|
- `demo_cases.yaml` — priorisierte Demo-Fälle
|
||||||
|
- `actual_outputs/` — Golden Outputs
|
||||||
|
- `evaluator.py` — Assertions + Report-Generator
|
||||||
|
- `tests/` — pytest-Regressionen
|
||||||
|
- `Makefile` — lokale Standardbefehle
|
||||||
|
- `.github/workflows/applicability-demo-regression.yml` — GitHub Actions Job
|
||||||
|
|
||||||
|
## Lokal starten
|
||||||
|
```bash
|
||||||
|
make install
|
||||||
|
make eval
|
||||||
|
make test
|
||||||
|
```
|
||||||
|
|
||||||
|
## Reports
|
||||||
|
Nach `make eval` liegen die Reports hier:
|
||||||
|
- `reports/latest_report.json`
|
||||||
|
- `reports/latest_report.md`
|
||||||
|
|
||||||
|
## Einbau in euer Repo
|
||||||
|
Am einfachsten legt ihr den Ordner als `applicability_demo_ci/` ins Repo.
|
||||||
|
Der Workflow ist bereits darauf ausgelegt.
|
||||||
|
|
||||||
|
## Nächste sinnvolle Anpassung
|
||||||
|
- `actual_outputs/` durch echte Endpoint-Outputs ersetzen
|
||||||
|
- optional kleinen Adapter bauen, falls euer API-Schema leicht abweicht
|
||||||
|
- weitere Grenzfälle ergänzen: WEEE, Medizinprodukt, Bildung, AI Act, CRA
|
||||||
@@ -0,0 +1,25 @@
|
|||||||
|
{
|
||||||
|
"case_id": "CASE-001",
|
||||||
|
"assigned_controls": [
|
||||||
|
"GDPR-INFO-THIRDPARTY-001",
|
||||||
|
"GDPR-ROPA-001",
|
||||||
|
"GDPR-LEGAL-BASIS-001",
|
||||||
|
"VENDOR-DUE-DILIGENCE-001"
|
||||||
|
],
|
||||||
|
"excluded_controls": [
|
||||||
|
"PSD2-LICENSING-001",
|
||||||
|
"PAYMENT-INSTITUTION-AUTH-001",
|
||||||
|
"AML-KYC-CUSTOMER-ONBOARDING-001"
|
||||||
|
],
|
||||||
|
"escalations": [],
|
||||||
|
"inferred_industries": [
|
||||||
|
"retail_ecommerce"
|
||||||
|
],
|
||||||
|
"confidence": {
|
||||||
|
"overall": 0.93,
|
||||||
|
"industry_assignment": 0.96,
|
||||||
|
"control_assignment": 0.91
|
||||||
|
},
|
||||||
|
"explanation": "Stripe ist eigenständiger Zahlungsdienstleister. Das Unternehmen erbringt keine eigene Erbringung regulierter Zahlungsdienste. Datenschutz- und Transparenzpflichten bleiben relevant.",
|
||||||
|
"uncertainty_flags": []
|
||||||
|
}
|
||||||
@@ -0,0 +1,26 @@
|
|||||||
|
{
|
||||||
|
"case_id": "CASE-002",
|
||||||
|
"assigned_controls": [
|
||||||
|
"BANK-ACCESS-AUTH-001",
|
||||||
|
"BATTERY-LABELING-001",
|
||||||
|
"BATTERY-TAKEBACK-001",
|
||||||
|
"PRODUCT-COMPLIANCE-DOC-001",
|
||||||
|
"WEEE-REGISTRATION-001"
|
||||||
|
],
|
||||||
|
"excluded_controls": [],
|
||||||
|
"escalations": [
|
||||||
|
"BATTERY-PRODUCER-DEFINITION-001"
|
||||||
|
],
|
||||||
|
"inferred_industries": [
|
||||||
|
"financial_services"
|
||||||
|
],
|
||||||
|
"confidence": {
|
||||||
|
"overall": 0.82,
|
||||||
|
"industry_assignment": 0.95,
|
||||||
|
"control_assignment": 0.76
|
||||||
|
},
|
||||||
|
"explanation": "Die statische Branchenzuweisung ist nicht abschließend. Ein physisches Produkt mit Batterie erweitert den Scope. Die Rolle als Inverkehrbringer oder Hersteller prüfen bleibt relevant.",
|
||||||
|
"uncertainty_flags": [
|
||||||
|
"producer_role_unclear"
|
||||||
|
]
|
||||||
|
}
|
||||||
@@ -0,0 +1,25 @@
|
|||||||
|
{
|
||||||
|
"case_id": "CASE-004",
|
||||||
|
"assigned_controls": [
|
||||||
|
"PSD2-LICENSING-001",
|
||||||
|
"AML-KYC-CUSTOMER-ONBOARDING-001",
|
||||||
|
"AML-TRANSACTION-MONITORING-001",
|
||||||
|
"FRAUD-CONTROLS-001"
|
||||||
|
],
|
||||||
|
"excluded_controls": [],
|
||||||
|
"escalations": [
|
||||||
|
"REGULATORY-PERIMETER-ASSESSMENT-001"
|
||||||
|
],
|
||||||
|
"inferred_industries": [
|
||||||
|
"financial_services"
|
||||||
|
],
|
||||||
|
"confidence": {
|
||||||
|
"overall": 0.89,
|
||||||
|
"industry_assignment": 0.97,
|
||||||
|
"control_assignment": 0.87
|
||||||
|
},
|
||||||
|
"explanation": "Es liegt möglicherweise ein regulierter Zahlungsdienst vor. AML/KYC relevant ist die Entgegennahme und Weiterleitung von Kundengeldern. Die rechtliche Einordnung nicht nur Datenschutz muss geprüft werden.",
|
||||||
|
"uncertainty_flags": [
|
||||||
|
"regulatory_perimeter_needs_confirmation"
|
||||||
|
]
|
||||||
|
}
|
||||||
@@ -0,0 +1,26 @@
|
|||||||
|
{
|
||||||
|
"case_id": "CASE-006",
|
||||||
|
"assigned_controls": [
|
||||||
|
"VENDOR-DUE-DILIGENCE-001",
|
||||||
|
"GDPR-INFO-THIRDPARTY-001"
|
||||||
|
],
|
||||||
|
"excluded_controls": [
|
||||||
|
"TKG-CUSTOMER-INFORMATION-001",
|
||||||
|
"TKG-CONTRACT-TRANSPARENCY-001"
|
||||||
|
],
|
||||||
|
"escalations": [
|
||||||
|
"ECS-QUALIFICATION-ASSESSMENT-001"
|
||||||
|
],
|
||||||
|
"inferred_industries": [
|
||||||
|
"software_saas"
|
||||||
|
],
|
||||||
|
"confidence": {
|
||||||
|
"overall": 0.87,
|
||||||
|
"industry_assignment": 0.94,
|
||||||
|
"control_assignment": 0.84
|
||||||
|
},
|
||||||
|
"explanation": "Die bloße Nutzung eines Gateways ist nicht automatisch eigener TK-Dienst.",
|
||||||
|
"uncertainty_flags": [
|
||||||
|
"ecs_boundary_case"
|
||||||
|
]
|
||||||
|
}
|
||||||
@@ -0,0 +1,27 @@
|
|||||||
|
{
|
||||||
|
"case_id": "CASE-008",
|
||||||
|
"assigned_controls": [
|
||||||
|
"BATTERY-LABELING-001",
|
||||||
|
"BATTERY-TAKEBACK-001",
|
||||||
|
"CE-TECHNICAL-DOC-001",
|
||||||
|
"IMPORTER-RESPONSIBILITIES-001",
|
||||||
|
"RED-CONFORMITY-001",
|
||||||
|
"WEEE-REGISTRATION-001"
|
||||||
|
],
|
||||||
|
"excluded_controls": [],
|
||||||
|
"escalations": [
|
||||||
|
"PRODUCT-QUALIFICATION-MULTIREGIME-001"
|
||||||
|
],
|
||||||
|
"inferred_industries": [
|
||||||
|
"software_saas"
|
||||||
|
],
|
||||||
|
"confidence": {
|
||||||
|
"overall": 0.84,
|
||||||
|
"industry_assignment": 0.93,
|
||||||
|
"control_assignment": 0.8
|
||||||
|
},
|
||||||
|
"explanation": "Die ursprüngliche Branche ist Software. Zusätzliche Produkt- und Importeurspflichten werden scope-basiert aktiviert.",
|
||||||
|
"uncertainty_flags": [
|
||||||
|
"multi_regime_product_scope"
|
||||||
|
]
|
||||||
|
}
|
||||||
@@ -0,0 +1,24 @@
|
|||||||
|
{
|
||||||
|
"case_id": "CASE-011",
|
||||||
|
"assigned_controls": [
|
||||||
|
"THIRD-PARTY-FINANCIAL-PROVIDER-DUE-DILIGENCE-001"
|
||||||
|
],
|
||||||
|
"excluded_controls": [],
|
||||||
|
"escalations": [
|
||||||
|
"REGULATORY-PERIMETER-ASSESSMENT-001",
|
||||||
|
"OWN-VS-PARTNER-ROLE-ANALYSIS-001"
|
||||||
|
],
|
||||||
|
"inferred_industries": [
|
||||||
|
"software_saas"
|
||||||
|
],
|
||||||
|
"confidence": {
|
||||||
|
"overall": 0.61,
|
||||||
|
"industry_assignment": 0.9,
|
||||||
|
"control_assignment": 0.58
|
||||||
|
},
|
||||||
|
"explanation": "Es besteht ein unklarer regulatorischer Perimeter. Die Rolle des Unternehmens ist nicht eindeutig. Eskalation erforderlich.",
|
||||||
|
"uncertainty_flags": [
|
||||||
|
"unclear_funds_flow",
|
||||||
|
"own_vs_partner_role_unclear"
|
||||||
|
]
|
||||||
|
}
|
||||||
119
control-pipeline/tests/applicability_demo_ci/demo_cases.yaml
Normal file
119
control-pipeline/tests/applicability_demo_ci/demo_cases.yaml
Normal file
@@ -0,0 +1,119 @@
|
|||||||
|
version: "1.0"
|
||||||
|
suite: "applicability-engine-demo-cases-priority-6"
|
||||||
|
cases:
|
||||||
|
- id: "CASE-001"
|
||||||
|
title: "SaaS-Webshop mit Stripe Checkout"
|
||||||
|
expected:
|
||||||
|
inferred_industries:
|
||||||
|
must_include: ["retail_ecommerce"]
|
||||||
|
must_not_include: ["financial_services"]
|
||||||
|
controls:
|
||||||
|
must_assign:
|
||||||
|
- "GDPR-INFO-THIRDPARTY-001"
|
||||||
|
- "GDPR-ROPA-001"
|
||||||
|
- "GDPR-LEGAL-BASIS-001"
|
||||||
|
- "VENDOR-DUE-DILIGENCE-001"
|
||||||
|
must_not_assign:
|
||||||
|
- "PSD2-LICENSING-001"
|
||||||
|
- "PAYMENT-INSTITUTION-AUTH-001"
|
||||||
|
- "AML-KYC-CUSTOMER-ONBOARDING-001"
|
||||||
|
reasoning_must_contain:
|
||||||
|
- "Stripe ist eigenständiger Zahlungsdienstleister"
|
||||||
|
- "keine eigene Erbringung regulierter Zahlungsdienste"
|
||||||
|
- "Datenschutz- und Transparenzpflichten bleiben relevant"
|
||||||
|
|
||||||
|
- id: "CASE-002"
|
||||||
|
title: "Bank gibt TAN-Generator mit Batterie an Kunden aus"
|
||||||
|
expected:
|
||||||
|
inferred_industries:
|
||||||
|
must_include: ["financial_services"]
|
||||||
|
must_not_include: ["manufacturing"]
|
||||||
|
controls:
|
||||||
|
must_assign:
|
||||||
|
- "BANK-ACCESS-AUTH-001"
|
||||||
|
- "BATTERY-LABELING-001"
|
||||||
|
- "BATTERY-TAKEBACK-001"
|
||||||
|
- "PRODUCT-COMPLIANCE-DOC-001"
|
||||||
|
may_assign_if_explained:
|
||||||
|
- "WEEE-REGISTRATION-001"
|
||||||
|
escalate_for_legal_review:
|
||||||
|
- "BATTERY-PRODUCER-DEFINITION-001"
|
||||||
|
reasoning_must_contain:
|
||||||
|
- "statische Branchenzuweisung ist nicht abschließend"
|
||||||
|
- "physisches Produkt mit Batterie erweitert den Scope"
|
||||||
|
- "Rolle als Inverkehrbringer oder Hersteller prüfen"
|
||||||
|
|
||||||
|
- id: "CASE-004"
|
||||||
|
title: "Fintech mit eigener Wallet und Weiterleitung von Kundengeldern"
|
||||||
|
expected:
|
||||||
|
inferred_industries:
|
||||||
|
must_include: ["financial_services"]
|
||||||
|
controls:
|
||||||
|
must_assign:
|
||||||
|
- "PSD2-LICENSING-001"
|
||||||
|
- "AML-KYC-CUSTOMER-ONBOARDING-001"
|
||||||
|
- "AML-TRANSACTION-MONITORING-001"
|
||||||
|
- "FRAUD-CONTROLS-001"
|
||||||
|
escalate_for_legal_review:
|
||||||
|
- "REGULATORY-PERIMETER-ASSESSMENT-001"
|
||||||
|
reasoning_must_contain:
|
||||||
|
- "möglicherweise regulierter Zahlungsdienst"
|
||||||
|
- "AML/KYC relevant"
|
||||||
|
- "rechtliche Einordnung nicht nur Datenschutz"
|
||||||
|
|
||||||
|
- id: "CASE-006"
|
||||||
|
title: "SaaS-Unternehmen verschickt nur SMS über externen Gateway"
|
||||||
|
expected:
|
||||||
|
inferred_industries:
|
||||||
|
must_include: ["software_saas"]
|
||||||
|
must_not_include: ["telecommunications"]
|
||||||
|
controls:
|
||||||
|
must_assign:
|
||||||
|
- "VENDOR-DUE-DILIGENCE-001"
|
||||||
|
- "GDPR-INFO-THIRDPARTY-001"
|
||||||
|
must_not_assign:
|
||||||
|
- "TKG-CUSTOMER-INFORMATION-001"
|
||||||
|
- "TKG-CONTRACT-TRANSPARENCY-001"
|
||||||
|
escalate_for_legal_review:
|
||||||
|
- "ECS-QUALIFICATION-ASSESSMENT-001"
|
||||||
|
reasoning_must_contain:
|
||||||
|
- "bloße Nutzung eines Gateways ist nicht automatisch eigener TK-Dienst"
|
||||||
|
|
||||||
|
- id: "CASE-008"
|
||||||
|
title: "Importeur von IoT-Geräten mit Batterien und Funkmodul"
|
||||||
|
expected:
|
||||||
|
inferred_industries:
|
||||||
|
must_include: ["software_saas"]
|
||||||
|
controls:
|
||||||
|
must_assign:
|
||||||
|
- "BATTERY-LABELING-001"
|
||||||
|
- "BATTERY-TAKEBACK-001"
|
||||||
|
- "CE-TECHNICAL-DOC-001"
|
||||||
|
- "IMPORTER-RESPONSIBILITIES-001"
|
||||||
|
may_assign_if_explained:
|
||||||
|
- "RED-CONFORMITY-001"
|
||||||
|
- "WEEE-REGISTRATION-001"
|
||||||
|
escalate_for_legal_review:
|
||||||
|
- "PRODUCT-QUALIFICATION-MULTIREGIME-001"
|
||||||
|
reasoning_must_contain:
|
||||||
|
- "ursprüngliche Branche ist Software"
|
||||||
|
- "zusätzliche Produkt- und Importeurspflichten werden scope-basiert aktiviert"
|
||||||
|
|
||||||
|
- id: "CASE-011"
|
||||||
|
title: "Unklarer Grenzfall mit Embedded Finance"
|
||||||
|
expected:
|
||||||
|
inferred_industries:
|
||||||
|
must_include: ["software_saas"]
|
||||||
|
controls:
|
||||||
|
must_assign:
|
||||||
|
- "THIRD-PARTY-FINANCIAL-PROVIDER-DUE-DILIGENCE-001"
|
||||||
|
may_assign_if_explained:
|
||||||
|
- "PSD2-LICENSING-001"
|
||||||
|
- "AML-KYC-CUSTOMER-ONBOARDING-001"
|
||||||
|
escalate_for_legal_review:
|
||||||
|
- "REGULATORY-PERIMETER-ASSESSMENT-001"
|
||||||
|
- "OWN-VS-PARTNER-ROLE-ANALYSIS-001"
|
||||||
|
reasoning_must_contain:
|
||||||
|
- "unklarer regulatorischer Perimeter"
|
||||||
|
- "Rolle des Unternehmens ist nicht eindeutig"
|
||||||
|
- "Eskalation erforderlich"
|
||||||
87
control-pipeline/tests/applicability_demo_ci/evaluator.py
Normal file
87
control-pipeline/tests/applicability_demo_ci/evaluator.py
Normal file
@@ -0,0 +1,87 @@
|
|||||||
|
from __future__ import annotations
|
||||||
|
import argparse, json
|
||||||
|
from pathlib import Path
|
||||||
|
from typing import Any, Dict, List
|
||||||
|
|
||||||
|
import yaml
|
||||||
|
|
||||||
|
def load_yaml(path: Path) -> Dict[str, Any]:
|
||||||
|
return yaml.safe_load(path.read_text(encoding="utf-8"))
|
||||||
|
|
||||||
|
def load_json(path: Path) -> Dict[str, Any]:
|
||||||
|
return json.loads(path.read_text(encoding="utf-8"))
|
||||||
|
|
||||||
|
def contains_phrase(explanation: str, phrase: str) -> bool:
|
||||||
|
return phrase.lower() in explanation.lower()
|
||||||
|
|
||||||
|
def evaluate_case(expected_case: Dict[str, Any], actual: Dict[str, Any]) -> Dict[str, Any]:
|
||||||
|
errors: List[str] = []
|
||||||
|
warnings: List[str] = []
|
||||||
|
expected = expected_case["expected"]
|
||||||
|
assigned = set(actual.get("assigned_controls", []))
|
||||||
|
escalations = set(actual.get("escalations", []))
|
||||||
|
industries = set(actual.get("inferred_industries", []))
|
||||||
|
explanation = actual.get("explanation", "")
|
||||||
|
uncertainty_flags = actual.get("uncertainty_flags", [])
|
||||||
|
controls = expected.get("controls", {})
|
||||||
|
|
||||||
|
for c in controls.get("must_assign", []):
|
||||||
|
if c not in assigned:
|
||||||
|
errors.append(f"missing must_assign control: {c}")
|
||||||
|
for c in controls.get("must_not_assign", []):
|
||||||
|
if c in assigned:
|
||||||
|
errors.append(f"forbidden control assigned: {c}")
|
||||||
|
for c in controls.get("escalate_for_legal_review", []):
|
||||||
|
if c not in escalations:
|
||||||
|
errors.append(f"missing escalation: {c}")
|
||||||
|
for i in expected.get("inferred_industries", {}).get("must_include", []):
|
||||||
|
if i not in industries:
|
||||||
|
errors.append(f"missing inferred industry: {i}")
|
||||||
|
for i in expected.get("inferred_industries", {}).get("must_not_include", []):
|
||||||
|
if i in industries:
|
||||||
|
errors.append(f"forbidden inferred industry present: {i}")
|
||||||
|
for p in expected.get("reasoning_must_contain", []):
|
||||||
|
if not contains_phrase(explanation, p):
|
||||||
|
errors.append(f"missing reasoning phrase: {p}")
|
||||||
|
if controls.get("escalate_for_legal_review") and not uncertainty_flags:
|
||||||
|
warnings.append("escalation present without uncertainty_flags")
|
||||||
|
return {"case_id": expected_case["id"], "title": expected_case.get("title"), "passed": not errors, "errors": errors, "warnings": warnings}
|
||||||
|
|
||||||
|
def evaluate_suite(cases_path: Path, actual_dir: Path) -> Dict[str, Any]:
|
||||||
|
suite = load_yaml(cases_path)
|
||||||
|
results = []
|
||||||
|
for case in suite["cases"]:
|
||||||
|
actual_path = actual_dir / f"{case['id']}.json"
|
||||||
|
if not actual_path.exists():
|
||||||
|
results.append({"case_id": case["id"], "title": case.get("title"), "passed": False, "errors": [f"missing actual output file: {actual_path.name}"], "warnings": []})
|
||||||
|
continue
|
||||||
|
results.append(evaluate_case(case, load_json(actual_path)))
|
||||||
|
passed = sum(1 for r in results if r["passed"])
|
||||||
|
return {"suite": suite.get("suite"), "total_cases": len(results), "passed": passed, "failed": len(results)-passed, "results": results}
|
||||||
|
|
||||||
|
def render_md(report: Dict[str, Any]) -> str:
|
||||||
|
lines = [f"# {report.get('suite', 'Applicability Demo Report')}", "", "## Summary", f"- Total cases: {report['total_cases']}", f"- Passed: {report['passed']}", f"- Failed: {report['failed']}", "", "## Results"]
|
||||||
|
for r in report["results"]:
|
||||||
|
status = "PASS" if r["passed"] else "FAIL"
|
||||||
|
lines.append(f"- {r['case_id']}: {status}")
|
||||||
|
for e in r["errors"]:
|
||||||
|
lines.append(f" - error: {e}")
|
||||||
|
for w in r["warnings"]:
|
||||||
|
lines.append(f" - warning: {w}")
|
||||||
|
lines.append("")
|
||||||
|
return "\n".join(lines)
|
||||||
|
|
||||||
|
def main() -> None:
|
||||||
|
ap = argparse.ArgumentParser()
|
||||||
|
ap.add_argument("--cases", type=Path, required=True)
|
||||||
|
ap.add_argument("--actual-dir", type=Path, required=True)
|
||||||
|
ap.add_argument("--report-json", type=Path, required=True)
|
||||||
|
ap.add_argument("--report-md", type=Path, required=True)
|
||||||
|
args = ap.parse_args()
|
||||||
|
report = evaluate_suite(args.cases, args.actual_dir)
|
||||||
|
args.report_json.write_text(json.dumps(report, indent=2, ensure_ascii=False), encoding="utf-8")
|
||||||
|
args.report_md.write_text(render_md(report), encoding="utf-8")
|
||||||
|
print(json.dumps(report, indent=2, ensure_ascii=False))
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
@@ -0,0 +1,52 @@
|
|||||||
|
{
|
||||||
|
"suite": "applicability-engine-demo-cases-priority-6",
|
||||||
|
"total_cases": 6,
|
||||||
|
"passed": 5,
|
||||||
|
"failed": 1,
|
||||||
|
"results": [
|
||||||
|
{
|
||||||
|
"case_id": "CASE-001",
|
||||||
|
"title": "SaaS-Webshop mit Stripe Checkout",
|
||||||
|
"passed": true,
|
||||||
|
"errors": [],
|
||||||
|
"warnings": []
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"case_id": "CASE-002",
|
||||||
|
"title": "Bank gibt TAN-Generator mit Batterie an Kunden aus",
|
||||||
|
"passed": true,
|
||||||
|
"errors": [],
|
||||||
|
"warnings": []
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"case_id": "CASE-004",
|
||||||
|
"title": "Fintech mit eigener Wallet und Weiterleitung von Kundengeldern",
|
||||||
|
"passed": false,
|
||||||
|
"errors": [
|
||||||
|
"missing reasoning phrase: möglicherweise regulierter Zahlungsdienst"
|
||||||
|
],
|
||||||
|
"warnings": []
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"case_id": "CASE-006",
|
||||||
|
"title": "SaaS-Unternehmen verschickt nur SMS über externen Gateway",
|
||||||
|
"passed": true,
|
||||||
|
"errors": [],
|
||||||
|
"warnings": []
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"case_id": "CASE-008",
|
||||||
|
"title": "Importeur von IoT-Geräten mit Batterien und Funkmodul",
|
||||||
|
"passed": true,
|
||||||
|
"errors": [],
|
||||||
|
"warnings": []
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"case_id": "CASE-011",
|
||||||
|
"title": "Unklarer Grenzfall mit Embedded Finance",
|
||||||
|
"passed": true,
|
||||||
|
"errors": [],
|
||||||
|
"warnings": []
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
@@ -0,0 +1,15 @@
|
|||||||
|
# applicability-engine-demo-cases-priority-6
|
||||||
|
|
||||||
|
## Summary
|
||||||
|
- Total cases: 6
|
||||||
|
- Passed: 5
|
||||||
|
- Failed: 1
|
||||||
|
|
||||||
|
## Results
|
||||||
|
- CASE-001: PASS
|
||||||
|
- CASE-002: PASS
|
||||||
|
- CASE-004: FAIL
|
||||||
|
- error: missing reasoning phrase: möglicherweise regulierter Zahlungsdienst
|
||||||
|
- CASE-006: PASS
|
||||||
|
- CASE-008: PASS
|
||||||
|
- CASE-011: PASS
|
||||||
@@ -0,0 +1,2 @@
|
|||||||
|
PyYAML>=6.0
|
||||||
|
pytest>=8.0
|
||||||
@@ -0,0 +1,34 @@
|
|||||||
|
from __future__ import annotations
|
||||||
|
import json
|
||||||
|
import subprocess
|
||||||
|
import sys
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
ROOT = Path(__file__).resolve().parent.parent
|
||||||
|
|
||||||
|
def test_demo_suite_passes() -> None:
|
||||||
|
reports = ROOT / "reports"
|
||||||
|
reports.mkdir(exist_ok=True)
|
||||||
|
cmd = [
|
||||||
|
sys.executable,
|
||||||
|
str(ROOT / "evaluator.py"),
|
||||||
|
"--cases", str(ROOT / "demo_cases.yaml"),
|
||||||
|
"--actual-dir", str(ROOT / "actual_outputs"),
|
||||||
|
"--report-json", str(reports / "latest_report.json"),
|
||||||
|
"--report-md", str(reports / "latest_report.md"),
|
||||||
|
]
|
||||||
|
completed = subprocess.run(cmd, capture_output=True, text=True, check=False)
|
||||||
|
assert completed.returncode == 0, completed.stderr
|
||||||
|
report = json.loads((reports / "latest_report.json").read_text(encoding="utf-8"))
|
||||||
|
assert report["failed"] == 0, json.dumps(report, indent=2, ensure_ascii=False)
|
||||||
|
|
||||||
|
def test_boundary_cases_have_escalations() -> None:
|
||||||
|
boundary_ids = {"CASE-002", "CASE-004", "CASE-006", "CASE-008", "CASE-011"}
|
||||||
|
for case_id in boundary_ids:
|
||||||
|
payload = json.loads((ROOT / "actual_outputs" / f"{case_id}.json").read_text(encoding="utf-8"))
|
||||||
|
assert payload["escalations"], f"{case_id} should include at least one escalation"
|
||||||
|
|
||||||
|
def test_stripe_case_not_psd2() -> None:
|
||||||
|
payload = json.loads((ROOT / "actual_outputs" / "CASE-001.json").read_text(encoding="utf-8"))
|
||||||
|
assert "PSD2-LICENSING-001" not in payload["assigned_controls"]
|
||||||
|
assert "PAYMENT-INSTITUTION-AUTH-001" not in payload["assigned_controls"]
|
||||||
Reference in New Issue
Block a user