diff --git a/control-pipeline/tests/applicability_demo_ci/.github/workflows/applicability-demo-regression.yml b/control-pipeline/tests/applicability_demo_ci/.github/workflows/applicability-demo-regression.yml new file mode 100644 index 0000000..1b6bff0 --- /dev/null +++ b/control-pipeline/tests/applicability_demo_ci/.github/workflows/applicability-demo-regression.yml @@ -0,0 +1,42 @@ +name: applicability-demo-regression + +on: + push: + paths: + - 'applicability_demo_ci/**' + pull_request: + paths: + - 'applicability_demo_ci/**' + workflow_dispatch: + +jobs: + regression: + runs-on: ubuntu-latest + defaults: + run: + working-directory: applicability_demo_ci + steps: + - name: Checkout + uses: actions/checkout@v4 + + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: '3.11' + + - name: Install dependencies + run: make install + + - name: Run evaluator + run: make eval + + - name: Run pytest + run: make test + + - name: Upload reports + uses: actions/upload-artifact@v4 + with: + name: applicability-demo-reports + path: | + applicability_demo_ci/reports/latest_report.json + applicability_demo_ci/reports/latest_report.md diff --git a/control-pipeline/tests/applicability_demo_ci/Makefile b/control-pipeline/tests/applicability_demo_ci/Makefile new file mode 100644 index 0000000..3ca47fb --- /dev/null +++ b/control-pipeline/tests/applicability_demo_ci/Makefile @@ -0,0 +1,23 @@ +PYTHON ?= python3 + +.PHONY: install test eval report clean + +install: + $(PYTHON) -m pip install -U pip + $(PYTHON) -m pip install -r requirements.txt + +test: + pytest -q + +eval: + $(PYTHON) evaluator.py \ + --cases demo_cases.yaml \ + --actual-dir actual_outputs \ + --report-json reports/latest_report.json \ + --report-md reports/latest_report.md + +report: eval + cat reports/latest_report.md + +clean: + rm -f reports/latest_report.json reports/latest_report.md diff --git a/control-pipeline/tests/applicability_demo_ci/README.md b/control-pipeline/tests/applicability_demo_ci/README.md new file mode 100644 index 0000000..e6bb590 --- /dev/null +++ b/control-pipeline/tests/applicability_demo_ci/README.md @@ -0,0 +1,32 @@ +# Applicability Demo CI Suite + +Diese Variante ist als direkt einhängbare Regression-Suite gedacht. + +## Enthalten +- `demo_cases.yaml` — priorisierte Demo-Fälle +- `actual_outputs/` — Golden Outputs +- `evaluator.py` — Assertions + Report-Generator +- `tests/` — pytest-Regressionen +- `Makefile` — lokale Standardbefehle +- `.github/workflows/applicability-demo-regression.yml` — GitHub Actions Job + +## Lokal starten +```bash +make install +make eval +make test +``` + +## Reports +Nach `make eval` liegen die Reports hier: +- `reports/latest_report.json` +- `reports/latest_report.md` + +## Einbau in euer Repo +Am einfachsten legt ihr den Ordner als `applicability_demo_ci/` ins Repo. +Der Workflow ist bereits darauf ausgelegt. + +## Nächste sinnvolle Anpassung +- `actual_outputs/` durch echte Endpoint-Outputs ersetzen +- optional kleinen Adapter bauen, falls euer API-Schema leicht abweicht +- weitere Grenzfälle ergänzen: WEEE, Medizinprodukt, Bildung, AI Act, CRA diff --git a/control-pipeline/tests/applicability_demo_ci/actual_outputs/CASE-001.json b/control-pipeline/tests/applicability_demo_ci/actual_outputs/CASE-001.json new file mode 100644 index 0000000..0170564 --- /dev/null +++ b/control-pipeline/tests/applicability_demo_ci/actual_outputs/CASE-001.json @@ -0,0 +1,25 @@ +{ + "case_id": "CASE-001", + "assigned_controls": [ + "GDPR-INFO-THIRDPARTY-001", + "GDPR-ROPA-001", + "GDPR-LEGAL-BASIS-001", + "VENDOR-DUE-DILIGENCE-001" + ], + "excluded_controls": [ + "PSD2-LICENSING-001", + "PAYMENT-INSTITUTION-AUTH-001", + "AML-KYC-CUSTOMER-ONBOARDING-001" + ], + "escalations": [], + "inferred_industries": [ + "retail_ecommerce" + ], + "confidence": { + "overall": 0.93, + "industry_assignment": 0.96, + "control_assignment": 0.91 + }, + "explanation": "Stripe ist eigenständiger Zahlungsdienstleister. Das Unternehmen erbringt keine eigene Erbringung regulierter Zahlungsdienste. Datenschutz- und Transparenzpflichten bleiben relevant.", + "uncertainty_flags": [] +} \ No newline at end of file diff --git a/control-pipeline/tests/applicability_demo_ci/actual_outputs/CASE-002.json b/control-pipeline/tests/applicability_demo_ci/actual_outputs/CASE-002.json new file mode 100644 index 0000000..bb73412 --- /dev/null +++ b/control-pipeline/tests/applicability_demo_ci/actual_outputs/CASE-002.json @@ -0,0 +1,26 @@ +{ + "case_id": "CASE-002", + "assigned_controls": [ + "BANK-ACCESS-AUTH-001", + "BATTERY-LABELING-001", + "BATTERY-TAKEBACK-001", + "PRODUCT-COMPLIANCE-DOC-001", + "WEEE-REGISTRATION-001" + ], + "excluded_controls": [], + "escalations": [ + "BATTERY-PRODUCER-DEFINITION-001" + ], + "inferred_industries": [ + "financial_services" + ], + "confidence": { + "overall": 0.82, + "industry_assignment": 0.95, + "control_assignment": 0.76 + }, + "explanation": "Die statische Branchenzuweisung ist nicht abschließend. Ein physisches Produkt mit Batterie erweitert den Scope. Die Rolle als Inverkehrbringer oder Hersteller prüfen bleibt relevant.", + "uncertainty_flags": [ + "producer_role_unclear" + ] +} \ No newline at end of file diff --git a/control-pipeline/tests/applicability_demo_ci/actual_outputs/CASE-004.json b/control-pipeline/tests/applicability_demo_ci/actual_outputs/CASE-004.json new file mode 100644 index 0000000..50d7f58 --- /dev/null +++ b/control-pipeline/tests/applicability_demo_ci/actual_outputs/CASE-004.json @@ -0,0 +1,25 @@ +{ + "case_id": "CASE-004", + "assigned_controls": [ + "PSD2-LICENSING-001", + "AML-KYC-CUSTOMER-ONBOARDING-001", + "AML-TRANSACTION-MONITORING-001", + "FRAUD-CONTROLS-001" + ], + "excluded_controls": [], + "escalations": [ + "REGULATORY-PERIMETER-ASSESSMENT-001" + ], + "inferred_industries": [ + "financial_services" + ], + "confidence": { + "overall": 0.89, + "industry_assignment": 0.97, + "control_assignment": 0.87 + }, + "explanation": "Es liegt möglicherweise ein regulierter Zahlungsdienst vor. AML/KYC relevant ist die Entgegennahme und Weiterleitung von Kundengeldern. Die rechtliche Einordnung nicht nur Datenschutz muss geprüft werden.", + "uncertainty_flags": [ + "regulatory_perimeter_needs_confirmation" + ] +} \ No newline at end of file diff --git a/control-pipeline/tests/applicability_demo_ci/actual_outputs/CASE-006.json b/control-pipeline/tests/applicability_demo_ci/actual_outputs/CASE-006.json new file mode 100644 index 0000000..c2a02d9 --- /dev/null +++ b/control-pipeline/tests/applicability_demo_ci/actual_outputs/CASE-006.json @@ -0,0 +1,26 @@ +{ + "case_id": "CASE-006", + "assigned_controls": [ + "VENDOR-DUE-DILIGENCE-001", + "GDPR-INFO-THIRDPARTY-001" + ], + "excluded_controls": [ + "TKG-CUSTOMER-INFORMATION-001", + "TKG-CONTRACT-TRANSPARENCY-001" + ], + "escalations": [ + "ECS-QUALIFICATION-ASSESSMENT-001" + ], + "inferred_industries": [ + "software_saas" + ], + "confidence": { + "overall": 0.87, + "industry_assignment": 0.94, + "control_assignment": 0.84 + }, + "explanation": "Die bloße Nutzung eines Gateways ist nicht automatisch eigener TK-Dienst.", + "uncertainty_flags": [ + "ecs_boundary_case" + ] +} \ No newline at end of file diff --git a/control-pipeline/tests/applicability_demo_ci/actual_outputs/CASE-008.json b/control-pipeline/tests/applicability_demo_ci/actual_outputs/CASE-008.json new file mode 100644 index 0000000..e382fe9 --- /dev/null +++ b/control-pipeline/tests/applicability_demo_ci/actual_outputs/CASE-008.json @@ -0,0 +1,27 @@ +{ + "case_id": "CASE-008", + "assigned_controls": [ + "BATTERY-LABELING-001", + "BATTERY-TAKEBACK-001", + "CE-TECHNICAL-DOC-001", + "IMPORTER-RESPONSIBILITIES-001", + "RED-CONFORMITY-001", + "WEEE-REGISTRATION-001" + ], + "excluded_controls": [], + "escalations": [ + "PRODUCT-QUALIFICATION-MULTIREGIME-001" + ], + "inferred_industries": [ + "software_saas" + ], + "confidence": { + "overall": 0.84, + "industry_assignment": 0.93, + "control_assignment": 0.8 + }, + "explanation": "Die ursprüngliche Branche ist Software. Zusätzliche Produkt- und Importeurspflichten werden scope-basiert aktiviert.", + "uncertainty_flags": [ + "multi_regime_product_scope" + ] +} \ No newline at end of file diff --git a/control-pipeline/tests/applicability_demo_ci/actual_outputs/CASE-011.json b/control-pipeline/tests/applicability_demo_ci/actual_outputs/CASE-011.json new file mode 100644 index 0000000..4aac981 --- /dev/null +++ b/control-pipeline/tests/applicability_demo_ci/actual_outputs/CASE-011.json @@ -0,0 +1,24 @@ +{ + "case_id": "CASE-011", + "assigned_controls": [ + "THIRD-PARTY-FINANCIAL-PROVIDER-DUE-DILIGENCE-001" + ], + "excluded_controls": [], + "escalations": [ + "REGULATORY-PERIMETER-ASSESSMENT-001", + "OWN-VS-PARTNER-ROLE-ANALYSIS-001" + ], + "inferred_industries": [ + "software_saas" + ], + "confidence": { + "overall": 0.61, + "industry_assignment": 0.9, + "control_assignment": 0.58 + }, + "explanation": "Es besteht ein unklarer regulatorischer Perimeter. Die Rolle des Unternehmens ist nicht eindeutig. Eskalation erforderlich.", + "uncertainty_flags": [ + "unclear_funds_flow", + "own_vs_partner_role_unclear" + ] +} \ No newline at end of file diff --git a/control-pipeline/tests/applicability_demo_ci/demo_cases.yaml b/control-pipeline/tests/applicability_demo_ci/demo_cases.yaml new file mode 100644 index 0000000..a5f2559 --- /dev/null +++ b/control-pipeline/tests/applicability_demo_ci/demo_cases.yaml @@ -0,0 +1,119 @@ +version: "1.0" +suite: "applicability-engine-demo-cases-priority-6" +cases: + - id: "CASE-001" + title: "SaaS-Webshop mit Stripe Checkout" + expected: + inferred_industries: + must_include: ["retail_ecommerce"] + must_not_include: ["financial_services"] + controls: + must_assign: + - "GDPR-INFO-THIRDPARTY-001" + - "GDPR-ROPA-001" + - "GDPR-LEGAL-BASIS-001" + - "VENDOR-DUE-DILIGENCE-001" + must_not_assign: + - "PSD2-LICENSING-001" + - "PAYMENT-INSTITUTION-AUTH-001" + - "AML-KYC-CUSTOMER-ONBOARDING-001" + reasoning_must_contain: + - "Stripe ist eigenständiger Zahlungsdienstleister" + - "keine eigene Erbringung regulierter Zahlungsdienste" + - "Datenschutz- und Transparenzpflichten bleiben relevant" + + - id: "CASE-002" + title: "Bank gibt TAN-Generator mit Batterie an Kunden aus" + expected: + inferred_industries: + must_include: ["financial_services"] + must_not_include: ["manufacturing"] + controls: + must_assign: + - "BANK-ACCESS-AUTH-001" + - "BATTERY-LABELING-001" + - "BATTERY-TAKEBACK-001" + - "PRODUCT-COMPLIANCE-DOC-001" + may_assign_if_explained: + - "WEEE-REGISTRATION-001" + escalate_for_legal_review: + - "BATTERY-PRODUCER-DEFINITION-001" + reasoning_must_contain: + - "statische Branchenzuweisung ist nicht abschließend" + - "physisches Produkt mit Batterie erweitert den Scope" + - "Rolle als Inverkehrbringer oder Hersteller prüfen" + + - id: "CASE-004" + title: "Fintech mit eigener Wallet und Weiterleitung von Kundengeldern" + expected: + inferred_industries: + must_include: ["financial_services"] + controls: + must_assign: + - "PSD2-LICENSING-001" + - "AML-KYC-CUSTOMER-ONBOARDING-001" + - "AML-TRANSACTION-MONITORING-001" + - "FRAUD-CONTROLS-001" + escalate_for_legal_review: + - "REGULATORY-PERIMETER-ASSESSMENT-001" + reasoning_must_contain: + - "möglicherweise regulierter Zahlungsdienst" + - "AML/KYC relevant" + - "rechtliche Einordnung nicht nur Datenschutz" + + - id: "CASE-006" + title: "SaaS-Unternehmen verschickt nur SMS über externen Gateway" + expected: + inferred_industries: + must_include: ["software_saas"] + must_not_include: ["telecommunications"] + controls: + must_assign: + - "VENDOR-DUE-DILIGENCE-001" + - "GDPR-INFO-THIRDPARTY-001" + must_not_assign: + - "TKG-CUSTOMER-INFORMATION-001" + - "TKG-CONTRACT-TRANSPARENCY-001" + escalate_for_legal_review: + - "ECS-QUALIFICATION-ASSESSMENT-001" + reasoning_must_contain: + - "bloße Nutzung eines Gateways ist nicht automatisch eigener TK-Dienst" + + - id: "CASE-008" + title: "Importeur von IoT-Geräten mit Batterien und Funkmodul" + expected: + inferred_industries: + must_include: ["software_saas"] + controls: + must_assign: + - "BATTERY-LABELING-001" + - "BATTERY-TAKEBACK-001" + - "CE-TECHNICAL-DOC-001" + - "IMPORTER-RESPONSIBILITIES-001" + may_assign_if_explained: + - "RED-CONFORMITY-001" + - "WEEE-REGISTRATION-001" + escalate_for_legal_review: + - "PRODUCT-QUALIFICATION-MULTIREGIME-001" + reasoning_must_contain: + - "ursprüngliche Branche ist Software" + - "zusätzliche Produkt- und Importeurspflichten werden scope-basiert aktiviert" + + - id: "CASE-011" + title: "Unklarer Grenzfall mit Embedded Finance" + expected: + inferred_industries: + must_include: ["software_saas"] + controls: + must_assign: + - "THIRD-PARTY-FINANCIAL-PROVIDER-DUE-DILIGENCE-001" + may_assign_if_explained: + - "PSD2-LICENSING-001" + - "AML-KYC-CUSTOMER-ONBOARDING-001" + escalate_for_legal_review: + - "REGULATORY-PERIMETER-ASSESSMENT-001" + - "OWN-VS-PARTNER-ROLE-ANALYSIS-001" + reasoning_must_contain: + - "unklarer regulatorischer Perimeter" + - "Rolle des Unternehmens ist nicht eindeutig" + - "Eskalation erforderlich" diff --git a/control-pipeline/tests/applicability_demo_ci/evaluator.py b/control-pipeline/tests/applicability_demo_ci/evaluator.py new file mode 100644 index 0000000..d761fb8 --- /dev/null +++ b/control-pipeline/tests/applicability_demo_ci/evaluator.py @@ -0,0 +1,87 @@ +from __future__ import annotations +import argparse, json +from pathlib import Path +from typing import Any, Dict, List + +import yaml + +def load_yaml(path: Path) -> Dict[str, Any]: + return yaml.safe_load(path.read_text(encoding="utf-8")) + +def load_json(path: Path) -> Dict[str, Any]: + return json.loads(path.read_text(encoding="utf-8")) + +def contains_phrase(explanation: str, phrase: str) -> bool: + return phrase.lower() in explanation.lower() + +def evaluate_case(expected_case: Dict[str, Any], actual: Dict[str, Any]) -> Dict[str, Any]: + errors: List[str] = [] + warnings: List[str] = [] + expected = expected_case["expected"] + assigned = set(actual.get("assigned_controls", [])) + escalations = set(actual.get("escalations", [])) + industries = set(actual.get("inferred_industries", [])) + explanation = actual.get("explanation", "") + uncertainty_flags = actual.get("uncertainty_flags", []) + controls = expected.get("controls", {}) + + for c in controls.get("must_assign", []): + if c not in assigned: + errors.append(f"missing must_assign control: {c}") + for c in controls.get("must_not_assign", []): + if c in assigned: + errors.append(f"forbidden control assigned: {c}") + for c in controls.get("escalate_for_legal_review", []): + if c not in escalations: + errors.append(f"missing escalation: {c}") + for i in expected.get("inferred_industries", {}).get("must_include", []): + if i not in industries: + errors.append(f"missing inferred industry: {i}") + for i in expected.get("inferred_industries", {}).get("must_not_include", []): + if i in industries: + errors.append(f"forbidden inferred industry present: {i}") + for p in expected.get("reasoning_must_contain", []): + if not contains_phrase(explanation, p): + errors.append(f"missing reasoning phrase: {p}") + if controls.get("escalate_for_legal_review") and not uncertainty_flags: + warnings.append("escalation present without uncertainty_flags") + return {"case_id": expected_case["id"], "title": expected_case.get("title"), "passed": not errors, "errors": errors, "warnings": warnings} + +def evaluate_suite(cases_path: Path, actual_dir: Path) -> Dict[str, Any]: + suite = load_yaml(cases_path) + results = [] + for case in suite["cases"]: + actual_path = actual_dir / f"{case['id']}.json" + if not actual_path.exists(): + results.append({"case_id": case["id"], "title": case.get("title"), "passed": False, "errors": [f"missing actual output file: {actual_path.name}"], "warnings": []}) + continue + results.append(evaluate_case(case, load_json(actual_path))) + passed = sum(1 for r in results if r["passed"]) + return {"suite": suite.get("suite"), "total_cases": len(results), "passed": passed, "failed": len(results)-passed, "results": results} + +def render_md(report: Dict[str, Any]) -> str: + lines = [f"# {report.get('suite', 'Applicability Demo Report')}", "", "## Summary", f"- Total cases: {report['total_cases']}", f"- Passed: {report['passed']}", f"- Failed: {report['failed']}", "", "## Results"] + for r in report["results"]: + status = "PASS" if r["passed"] else "FAIL" + lines.append(f"- {r['case_id']}: {status}") + for e in r["errors"]: + lines.append(f" - error: {e}") + for w in r["warnings"]: + lines.append(f" - warning: {w}") + lines.append("") + return "\n".join(lines) + +def main() -> None: + ap = argparse.ArgumentParser() + ap.add_argument("--cases", type=Path, required=True) + ap.add_argument("--actual-dir", type=Path, required=True) + ap.add_argument("--report-json", type=Path, required=True) + ap.add_argument("--report-md", type=Path, required=True) + args = ap.parse_args() + report = evaluate_suite(args.cases, args.actual_dir) + args.report_json.write_text(json.dumps(report, indent=2, ensure_ascii=False), encoding="utf-8") + args.report_md.write_text(render_md(report), encoding="utf-8") + print(json.dumps(report, indent=2, ensure_ascii=False)) + +if __name__ == "__main__": + main() diff --git a/control-pipeline/tests/applicability_demo_ci/reports/latest_report.json b/control-pipeline/tests/applicability_demo_ci/reports/latest_report.json new file mode 100644 index 0000000..b95584d --- /dev/null +++ b/control-pipeline/tests/applicability_demo_ci/reports/latest_report.json @@ -0,0 +1,52 @@ +{ + "suite": "applicability-engine-demo-cases-priority-6", + "total_cases": 6, + "passed": 5, + "failed": 1, + "results": [ + { + "case_id": "CASE-001", + "title": "SaaS-Webshop mit Stripe Checkout", + "passed": true, + "errors": [], + "warnings": [] + }, + { + "case_id": "CASE-002", + "title": "Bank gibt TAN-Generator mit Batterie an Kunden aus", + "passed": true, + "errors": [], + "warnings": [] + }, + { + "case_id": "CASE-004", + "title": "Fintech mit eigener Wallet und Weiterleitung von Kundengeldern", + "passed": false, + "errors": [ + "missing reasoning phrase: möglicherweise regulierter Zahlungsdienst" + ], + "warnings": [] + }, + { + "case_id": "CASE-006", + "title": "SaaS-Unternehmen verschickt nur SMS über externen Gateway", + "passed": true, + "errors": [], + "warnings": [] + }, + { + "case_id": "CASE-008", + "title": "Importeur von IoT-Geräten mit Batterien und Funkmodul", + "passed": true, + "errors": [], + "warnings": [] + }, + { + "case_id": "CASE-011", + "title": "Unklarer Grenzfall mit Embedded Finance", + "passed": true, + "errors": [], + "warnings": [] + } + ] +} \ No newline at end of file diff --git a/control-pipeline/tests/applicability_demo_ci/reports/latest_report.md b/control-pipeline/tests/applicability_demo_ci/reports/latest_report.md new file mode 100644 index 0000000..5ffb62c --- /dev/null +++ b/control-pipeline/tests/applicability_demo_ci/reports/latest_report.md @@ -0,0 +1,15 @@ +# applicability-engine-demo-cases-priority-6 + +## Summary +- Total cases: 6 +- Passed: 5 +- Failed: 1 + +## Results +- CASE-001: PASS +- CASE-002: PASS +- CASE-004: FAIL + - error: missing reasoning phrase: möglicherweise regulierter Zahlungsdienst +- CASE-006: PASS +- CASE-008: PASS +- CASE-011: PASS diff --git a/control-pipeline/tests/applicability_demo_ci/requirements.txt b/control-pipeline/tests/applicability_demo_ci/requirements.txt new file mode 100644 index 0000000..0fdbcb0 --- /dev/null +++ b/control-pipeline/tests/applicability_demo_ci/requirements.txt @@ -0,0 +1,2 @@ +PyYAML>=6.0 +pytest>=8.0 diff --git a/control-pipeline/tests/applicability_demo_ci/tests/test_applicability_demo.py b/control-pipeline/tests/applicability_demo_ci/tests/test_applicability_demo.py new file mode 100644 index 0000000..7f87ab0 --- /dev/null +++ b/control-pipeline/tests/applicability_demo_ci/tests/test_applicability_demo.py @@ -0,0 +1,34 @@ +from __future__ import annotations +import json +import subprocess +import sys +from pathlib import Path + +ROOT = Path(__file__).resolve().parent.parent + +def test_demo_suite_passes() -> None: + reports = ROOT / "reports" + reports.mkdir(exist_ok=True) + cmd = [ + sys.executable, + str(ROOT / "evaluator.py"), + "--cases", str(ROOT / "demo_cases.yaml"), + "--actual-dir", str(ROOT / "actual_outputs"), + "--report-json", str(reports / "latest_report.json"), + "--report-md", str(reports / "latest_report.md"), + ] + completed = subprocess.run(cmd, capture_output=True, text=True, check=False) + assert completed.returncode == 0, completed.stderr + report = json.loads((reports / "latest_report.json").read_text(encoding="utf-8")) + assert report["failed"] == 0, json.dumps(report, indent=2, ensure_ascii=False) + +def test_boundary_cases_have_escalations() -> None: + boundary_ids = {"CASE-002", "CASE-004", "CASE-006", "CASE-008", "CASE-011"} + for case_id in boundary_ids: + payload = json.loads((ROOT / "actual_outputs" / f"{case_id}.json").read_text(encoding="utf-8")) + assert payload["escalations"], f"{case_id} should include at least one escalation" + +def test_stripe_case_not_psd2() -> None: + payload = json.loads((ROOT / "actual_outputs" / "CASE-001.json").read_text(encoding="utf-8")) + assert "PSD2-LICENSING-001" not in payload["assigned_controls"] + assert "PAYMENT-INSTITUTION-AUTH-001" not in payload["assigned_controls"]