diff --git a/control-pipeline/tests/applicability_demo/README.md b/control-pipeline/tests/applicability_demo/README.md new file mode 100644 index 0000000..1a659e4 --- /dev/null +++ b/control-pipeline/tests/applicability_demo/README.md @@ -0,0 +1,53 @@ +# Applicability Engine Demo Package + +## Inhalt +- `demo_cases.yaml` — 6 priorisierte Demo- und Regressionstestfälle +- `expected_outputs/CASE-*.json` — Golden Outputs für die 6 Fälle +- `evaluator.py` — vergleicht tatsächliche Engine-Outputs gegen die Assertions +- `run_demo.py` — einfacher Runner +- `reports/` — Zielordner für JSON- und Markdown-Reports + +## Schnellstart +```bash +python run_demo.py +``` + +Das nutzt `expected_outputs/` als Self-Test. + +## Gegen echte SDK-Outputs laufen lassen +Lege pro Fall eine Datei `CASE-XYZ.json` mit folgendem Schema in ein Verzeichnis: + +```json +{ + "case_id": "CASE-001", + "assigned_controls": [], + "excluded_controls": [], + "escalations": [], + "inferred_industries": [], + "confidence": { + "overall": 0.0, + "industry_assignment": 0.0, + "control_assignment": 0.0 + }, + "explanation": "", + "uncertainty_flags": [] +} +``` + +Dann: + +```bash +python run_demo.py --actual-dir /pfad/zu/deinen/outputs +``` + +## Testlogik +Der Evaluator prüft: +- `must_assign` +- `must_not_assign` +- `escalate_for_legal_review` +- `inferred_industries.must_include` +- `inferred_industries.must_not_include` +- `reasoning_must_contain` + +Zusätzlich gibt es Warnings, wenn Grenzfälle eskaliert sind, aber keine `uncertainty_flags` +gesetzt wurden oder die Confidence unplausibel hoch ist. diff --git a/control-pipeline/tests/applicability_demo/demo_cases.yaml b/control-pipeline/tests/applicability_demo/demo_cases.yaml new file mode 100644 index 0000000..ba64c97 --- /dev/null +++ b/control-pipeline/tests/applicability_demo/demo_cases.yaml @@ -0,0 +1,239 @@ +version: "1.0" +suite: "applicability-engine-demo-cases-priority-6" +description: > + Priorisierte Demo- und Regressionstestfälle für die Applicability Engine. + Ziel: False Positives vermeiden, Scope-Overrides korrekt aktivieren und + Unsicherheiten sauber eskalieren. + +defaults: + jurisdiction: "DE" + evaluation_mode: "strict" + require_explanation: true + require_uncertainty_flag: true + +cases: + + - id: "CASE-001" + title: "SaaS-Webshop mit Stripe Checkout" + objective: > + Prüfen, dass Stripe-Einbindung nicht fälschlich zu PSD2-/ZAG-/Zahlungsinstituts- + Controls führt, sondern zu Datenschutz-/Transparenz-/Drittanbieter-Controls. + profile: + company_type: "GmbH" + industry: "retail_ecommerce" + summary: > + Ein deutsches Unternehmen betreibt einen Webshop für physische Produkte. + Zahlungen werden über Stripe Checkout abgewickelt. Das Unternehmen hält + selbst keine Kundengelder, führt keine Zahlungskonten und bietet keine + eigenen Zahlungsdienste an. + facts: + sells_physical_products: true + webshop: true + payment_provider: "Stripe" + stores_card_data: false + holds_customer_funds: false + operates_payment_service: false + processes_personal_data: true + sends_data_to_stripe: true + expected: + inferred_industries: + must_include: + - "retail_ecommerce" + must_not_include: + - "financial_services" + controls: + must_assign: + - "GDPR-INFO-THIRDPARTY-001" + - "GDPR-ROPA-001" + - "GDPR-LEGAL-BASIS-001" + - "VENDOR-DUE-DILIGENCE-001" + must_not_assign: + - "PSD2-LICENSING-001" + - "PAYMENT-INSTITUTION-AUTH-001" + - "AML-KYC-CUSTOMER-ONBOARDING-001" + escalate_for_legal_review: [] + reasoning_must_contain: + - "Stripe ist eigenständiger Zahlungsdienstleister" + - "keine eigene Erbringung regulierter Zahlungsdienste" + - "Datenschutz- und Transparenzpflichten bleiben relevant" + + - id: "CASE-002" + title: "Bank gibt TAN-Generator mit Batterie an Kunden aus" + objective: > + Prüfen, dass branchenfremd wirkende Produktpflichten über Scope aktiviert + werden können, obwohl die Hauptbranche Finanzdienstleistung ist. + profile: + company_type: "AG" + industry: "financial_services" + summary: > + Eine Bank gibt physische TAN-Generatoren mit eingebauter Batterie an + Endkunden aus. Die Geräte werden unter eigener Marke vertrieben. + facts: + provides_banking_services: true + distributes_physical_products: true + product_contains_battery: true + product_under_own_brand: true + imports_product_from_non_eu: false + manufactures_product: false + expected: + inferred_industries: + must_include: + - "financial_services" + must_not_include: + - "manufacturing" + controls: + must_assign: + - "BANK-ACCESS-AUTH-001" + - "BATTERY-LABELING-001" + - "BATTERY-TAKEBACK-001" + - "PRODUCT-COMPLIANCE-DOC-001" + must_not_assign: [] + may_assign_if_explained: + - "WEEE-REGISTRATION-001" + escalate_for_legal_review: + - "BATTERY-PRODUCER-DEFINITION-001" + reasoning_must_contain: + - "statische Branchenzuweisung ist nicht abschließend" + - "physisches Produkt mit Batterie erweitert den Scope" + - "Rolle als Inverkehrbringer oder Hersteller prüfen" + + - id: "CASE-004" + title: "Fintech mit eigener Wallet und Weiterleitung von Kundengeldern" + objective: > + Positiver Gegentest: echte Finanzregulierung muss ausgelöst werden. + profile: + company_type: "GmbH" + industry: "financial_services" + summary: > + Ein Fintech bietet eine App mit Wallet-Funktion, Kundengelder werden + entgegengenommen und an Händler weitergeleitet. + facts: + provides_wallet: true + holds_customer_funds: true + executes_payment_transactions: true + customer_onboarding: true + transaction_monitoring: true + expected: + inferred_industries: + must_include: + - "financial_services" + controls: + must_assign: + - "PSD2-LICENSING-001" + - "AML-KYC-CUSTOMER-ONBOARDING-001" + - "AML-TRANSACTION-MONITORING-001" + - "FRAUD-CONTROLS-001" + must_not_assign: [] + escalate_for_legal_review: + - "REGULATORY-PERIMETER-ASSESSMENT-001" + reasoning_must_contain: + - "möglicherweise regulierter Zahlungsdienst" + - "AML/KYC relevant" + - "rechtliche Einordnung nicht nur Datenschutz" + + - id: "CASE-006" + title: "SaaS-Unternehmen verschickt nur SMS über externen Gateway" + objective: > + Prüfen, dass reine Nutzung eines Kommunikationsdienstes nicht automatisch + zu voller TKG-Relevanz führt. + profile: + company_type: "UG" + industry: "software_saas" + summary: > + Eine SaaS-Plattform verschickt Login-Codes per Twilio/SMS-Gateway, + betreibt aber kein eigenes öffentliches Telekommunikationsnetz und + bietet keinen Telekommunikationsdienst am Markt an. + facts: + sends_sms_notifications: true + uses_external_gateway: true + provides_public_telecom_services: false + operates_network: false + expected: + inferred_industries: + must_include: + - "software_saas" + must_not_include: + - "telecommunications" + controls: + must_assign: + - "VENDOR-DUE-DILIGENCE-001" + - "GDPR-INFO-THIRDPARTY-001" + must_not_assign: + - "TKG-CUSTOMER-INFORMATION-001" + - "TKG-CONTRACT-TRANSPARENCY-001" + escalate_for_legal_review: + - "ECS-QUALIFICATION-ASSESSMENT-001" + reasoning_must_contain: + - "bloße Nutzung eines Gateways ist nicht automatisch eigener TK-Dienst" + + - id: "CASE-008" + title: "Importeur von IoT-Geräten mit Batterien und Funkmodul" + objective: > + Mehrfach-Scope: Batterie, Funk, Produktrecht, Importeurspflichten. + profile: + company_type: "GmbH" + industry: "software_saas" + summary: > + Ein Softwareunternehmen verkauft nun zusätzlich eigene IoT-Sensoren + mit Batterie und Funkmodul unter eigener Marke. + facts: + imports_from_non_eu: true + sells_hardware: true + product_contains_battery: true + product_has_radio: true + own_brand: true + expected: + inferred_industries: + must_include: + - "software_saas" + controls: + must_assign: + - "BATTERY-LABELING-001" + - "BATTERY-TAKEBACK-001" + - "CE-TECHNICAL-DOC-001" + - "IMPORTER-RESPONSIBILITIES-001" + must_not_assign: [] + may_assign_if_explained: + - "RED-CONFORMITY-001" + - "WEEE-REGISTRATION-001" + escalate_for_legal_review: + - "PRODUCT-QUALIFICATION-MULTIREGIME-001" + reasoning_must_contain: + - "ursprüngliche Branche ist Software" + - "zusätzliche Produkt- und Importeurspflichten werden scope-basiert aktiviert" + + - id: "CASE-011" + title: "Unklarer Grenzfall mit Embedded Finance" + objective: > + Das System muss Unsicherheit erkennen und sauber eskalieren. + profile: + company_type: "GmbH" + industry: "software_saas" + summary: > + Eine Plattform ermöglicht Händlern Auszahlungen, virtuelle Konten, + Split Settlements und einen Finanzierungsvorschuss, teilweise über + Partnerbanken, teilweise über eigene Prozesse. + facts: + virtual_accounts: true + split_settlements: true + advance_payments: true + partner_bank_involved: true + own_funds_flow_unclear: true + expected: + inferred_industries: + must_include: + - "software_saas" + controls: + must_assign: + - "THIRD-PARTY-FINANCIAL-PROVIDER-DUE-DILIGENCE-001" + must_not_assign: [] + may_assign_if_explained: + - "PSD2-LICENSING-001" + - "AML-KYC-CUSTOMER-ONBOARDING-001" + escalate_for_legal_review: + - "REGULATORY-PERIMETER-ASSESSMENT-001" + - "OWN-VS-PARTNER-ROLE-ANALYSIS-001" + reasoning_must_contain: + - "unklarer regulatorischer Perimeter" + - "Rolle des Unternehmens nicht eindeutig" + - "Eskalation erforderlich" diff --git a/control-pipeline/tests/applicability_demo/evaluator.py b/control-pipeline/tests/applicability_demo/evaluator.py new file mode 100644 index 0000000..29d38aa --- /dev/null +++ b/control-pipeline/tests/applicability_demo/evaluator.py @@ -0,0 +1,180 @@ +from __future__ import annotations + +import argparse +import json +from pathlib import Path +from typing import Any, Dict, List, Tuple + +try: + import yaml +except ImportError as exc: # pragma: no cover + raise SystemExit("PyYAML is required. Install with: pip install pyyaml") from exc + + +def load_yaml(path: Path) -> Dict[str, Any]: + with path.open("r", encoding="utf-8") as f: + return yaml.safe_load(f) + + +def load_json(path: Path) -> Dict[str, Any]: + with path.open("r", encoding="utf-8") as f: + return json.load(f) + + +def _contains_phrase(explanation: str, phrase: str) -> bool: + return phrase.lower() in explanation.lower() + + +def evaluate_case(expected_case: Dict[str, Any], actual: Dict[str, Any]) -> Dict[str, Any]: + errors: List[str] = [] + warnings: List[str] = [] + + expected = expected_case.get("expected", {}) + expected_controls = expected.get("controls", {}) + explanation = actual.get("explanation", "") or "" + + assigned_controls = set(actual.get("assigned_controls", [])) + escalations = set(actual.get("escalations", [])) + inferred_industries = set(actual.get("inferred_industries", [])) + uncertainty_flags = actual.get("uncertainty_flags", []) + + for control in expected_controls.get("must_assign", []): + if control not in assigned_controls: + errors.append(f"missing must_assign control: {control}") + + for control in expected_controls.get("must_not_assign", []): + if control in assigned_controls: + errors.append(f"forbidden control assigned: {control}") + + for control in expected_controls.get("escalate_for_legal_review", []): + if control not in escalations: + errors.append(f"missing escalation: {control}") + + for control in expected_controls.get("may_assign_if_explained", []): + if control in assigned_controls and not explanation: + errors.append( + f"control assigned without explanation for may_assign_if_explained: {control}" + ) + + expected_industries = expected.get("inferred_industries", {}) + for industry in expected_industries.get("must_include", []): + if industry not in inferred_industries: + errors.append(f"missing inferred industry: {industry}") + + for industry in expected_industries.get("must_not_include", []): + if industry in inferred_industries: + errors.append(f"forbidden inferred industry present: {industry}") + + for phrase in expected.get("reasoning_must_contain", []): + if not _contains_phrase(explanation, phrase): + errors.append(f"missing reasoning phrase: {phrase}") + + require_uncertainty = bool(expected_controls.get("escalate_for_legal_review", [])) + if require_uncertainty and not uncertainty_flags: + warnings.append("case includes escalations but actual output has no uncertainty_flags") + + overall_conf = ((actual.get("confidence") or {}).get("overall")) + if expected_controls.get("escalate_for_legal_review") and isinstance(overall_conf, (int, float)): + if overall_conf >= 0.9: + warnings.append( + f"high confidence ({overall_conf}) on escalated boundary case; review whether too certain" + ) + + return { + "case_id": expected_case["id"], + "title": expected_case.get("title"), + "passed": len(errors) == 0, + "errors": errors, + "warnings": warnings, + } + + +def evaluate_suite(demo_cases_path: Path, actual_outputs_dir: Path) -> Dict[str, Any]: + suite = load_yaml(demo_cases_path) + results: List[Dict[str, Any]] = [] + + for case in suite.get("cases", []): + case_id = case["id"] + actual_path = actual_outputs_dir / f"{case_id}.json" + if not actual_path.exists(): + results.append({ + "case_id": case_id, + "title": case.get("title"), + "passed": False, + "errors": [f"missing actual output file: {actual_path.name}"], + "warnings": [], + }) + continue + actual = load_json(actual_path) + results.append(evaluate_case(case, actual)) + + passed = sum(1 for r in results if r["passed"]) + failed = len(results) - passed + + return { + "suite": suite.get("suite"), + "total_cases": len(results), + "passed": passed, + "failed": failed, + "results": results, + } + + +def render_markdown_report(report: Dict[str, Any]) -> str: + lines: List[str] = [] + lines.append(f"# {report.get('suite', 'Applicability Engine Demo Report')}") + lines.append("") + lines.append("## Summary") + lines.append(f"- Total cases: {report['total_cases']}") + lines.append(f"- Passed: {report['passed']}") + lines.append(f"- Failed: {report['failed']}") + lines.append("") + + failed_cases = [r for r in report["results"] if not r["passed"]] + if failed_cases: + lines.append("## Failed cases") + for case in failed_cases: + lines.append(f"### {case['case_id']} — {case.get('title', '')}") + for err in case["errors"]: + lines.append(f"- {err}") + if case["warnings"]: + lines.append("- Warnings:") + for w in case["warnings"]: + lines.append(f" - {w}") + lines.append("") + else: + lines.append("## Failed cases") + lines.append("None.") + lines.append("") + + lines.append("## All results") + for case in report["results"]: + status = "PASS" if case["passed"] else "FAIL" + lines.append(f"- {case['case_id']}: {status}") + for w in case["warnings"]: + lines.append(f" - warning: {w}") + lines.append("") + return "\n".join(lines) + + +def main() -> None: + parser = argparse.ArgumentParser(description="Evaluate Applicability Engine demo cases.") + parser.add_argument("--cases", type=Path, required=True, help="Path to demo_cases.yaml") + parser.add_argument("--actual-dir", type=Path, required=True, help="Directory containing actual CASE-XXX.json outputs") + parser.add_argument("--report-json", type=Path, required=False, help="Write machine-readable report JSON") + parser.add_argument("--report-md", type=Path, required=False, help="Write markdown report") + args = parser.parse_args() + + report = evaluate_suite(args.cases, args.actual_dir) + + if args.report_json: + args.report_json.write_text(json.dumps(report, indent=2, ensure_ascii=False), encoding="utf-8") + + if args.report_md: + args.report_md.write_text(render_markdown_report(report), encoding="utf-8") + + print(json.dumps(report, indent=2, ensure_ascii=False)) + + +if __name__ == "__main__": + main() diff --git a/control-pipeline/tests/applicability_demo/expected_outputs/CASE-001.json b/control-pipeline/tests/applicability_demo/expected_outputs/CASE-001.json new file mode 100644 index 0000000..0d5975c --- /dev/null +++ b/control-pipeline/tests/applicability_demo/expected_outputs/CASE-001.json @@ -0,0 +1,25 @@ +{ + "case_id": "CASE-001", + "assigned_controls": [ + "GDPR-INFO-THIRDPARTY-001", + "GDPR-ROPA-001", + "GDPR-LEGAL-BASIS-001", + "VENDOR-DUE-DILIGENCE-001" + ], + "excluded_controls": [ + "PSD2-LICENSING-001", + "PAYMENT-INSTITUTION-AUTH-001", + "AML-KYC-CUSTOMER-ONBOARDING-001" + ], + "escalations": [], + "inferred_industries": [ + "retail_ecommerce" + ], + "confidence": { + "overall": 0.93, + "industry_assignment": 0.96, + "control_assignment": 0.91 + }, + "explanation": "Stripe ist eigenständiger Zahlungsdienstleister. Das Unternehmen erbringt keine eigene Erbringung regulierter Zahlungsdienste. Datenschutz- und Transparenzpflichten bleiben relevant, insbesondere Informationspflichten, ROPA, Rechtsgrundlage und Vendor Due Diligence.", + "uncertainty_flags": [] +} \ No newline at end of file diff --git a/control-pipeline/tests/applicability_demo/expected_outputs/CASE-002.json b/control-pipeline/tests/applicability_demo/expected_outputs/CASE-002.json new file mode 100644 index 0000000..c090c53 --- /dev/null +++ b/control-pipeline/tests/applicability_demo/expected_outputs/CASE-002.json @@ -0,0 +1,26 @@ +{ + "case_id": "CASE-002", + "assigned_controls": [ + "BANK-ACCESS-AUTH-001", + "BATTERY-LABELING-001", + "BATTERY-TAKEBACK-001", + "PRODUCT-COMPLIANCE-DOC-001", + "WEEE-REGISTRATION-001" + ], + "excluded_controls": [], + "escalations": [ + "BATTERY-PRODUCER-DEFINITION-001" + ], + "inferred_industries": [ + "financial_services" + ], + "confidence": { + "overall": 0.82, + "industry_assignment": 0.95, + "control_assignment": 0.76 + }, + "explanation": "Die statische Branchenzuweisung ist nicht abschließend. Ein physisches Produkt mit Batterie erweitert den Scope über die originäre Finanzbranche hinaus. Zusätzlich zu banktypischen Controls sind Batterie- und Produktpflichten relevant. Die Rolle als Inverkehrbringer oder Hersteller prüfen bleibt für die genaue Abgrenzung eskalationsbedürftig.", + "uncertainty_flags": [ + "producer_role_unclear" + ] +} \ No newline at end of file diff --git a/control-pipeline/tests/applicability_demo/expected_outputs/CASE-004.json b/control-pipeline/tests/applicability_demo/expected_outputs/CASE-004.json new file mode 100644 index 0000000..c78de94 --- /dev/null +++ b/control-pipeline/tests/applicability_demo/expected_outputs/CASE-004.json @@ -0,0 +1,25 @@ +{ + "case_id": "CASE-004", + "assigned_controls": [ + "PSD2-LICENSING-001", + "AML-KYC-CUSTOMER-ONBOARDING-001", + "AML-TRANSACTION-MONITORING-001", + "FRAUD-CONTROLS-001" + ], + "excluded_controls": [], + "escalations": [ + "REGULATORY-PERIMETER-ASSESSMENT-001" + ], + "inferred_industries": [ + "financial_services" + ], + "confidence": { + "overall": 0.89, + "industry_assignment": 0.97, + "control_assignment": 0.87 + }, + "explanation": "Möglicherweise regulierter Zahlungsdienst. AML/KYC relevant aufgrund der Entgegennahme und Weiterleitung von Kundengeldern sowie des Onboardings. Die rechtliche Einordnung nicht nur Datenschutz, sondern aufsichtsrechtlicher Perimeter, muss zusätzlich rechtlich validiert werden.", + "uncertainty_flags": [ + "regulatory_perimeter_needs_confirmation" + ] +} \ No newline at end of file diff --git a/control-pipeline/tests/applicability_demo/expected_outputs/CASE-006.json b/control-pipeline/tests/applicability_demo/expected_outputs/CASE-006.json new file mode 100644 index 0000000..0534264 --- /dev/null +++ b/control-pipeline/tests/applicability_demo/expected_outputs/CASE-006.json @@ -0,0 +1,26 @@ +{ + "case_id": "CASE-006", + "assigned_controls": [ + "VENDOR-DUE-DILIGENCE-001", + "GDPR-INFO-THIRDPARTY-001" + ], + "excluded_controls": [ + "TKG-CUSTOMER-INFORMATION-001", + "TKG-CONTRACT-TRANSPARENCY-001" + ], + "escalations": [ + "ECS-QUALIFICATION-ASSESSMENT-001" + ], + "inferred_industries": [ + "software_saas" + ], + "confidence": { + "overall": 0.87, + "industry_assignment": 0.94, + "control_assignment": 0.84 + }, + "explanation": "Die bloße Nutzung eines Gateways ist nicht automatisch eigener TK-Dienst. Relevanz besteht primär für Drittanbieter-Management und Datenschutzinformation. Zur Absicherung bleibt eine ECS-Qualifikationsprüfung als Eskalation sinnvoll.", + "uncertainty_flags": [ + "ecs_boundary_case" + ] +} \ No newline at end of file diff --git a/control-pipeline/tests/applicability_demo/expected_outputs/CASE-008.json b/control-pipeline/tests/applicability_demo/expected_outputs/CASE-008.json new file mode 100644 index 0000000..3c31338 --- /dev/null +++ b/control-pipeline/tests/applicability_demo/expected_outputs/CASE-008.json @@ -0,0 +1,27 @@ +{ + "case_id": "CASE-008", + "assigned_controls": [ + "BATTERY-LABELING-001", + "BATTERY-TAKEBACK-001", + "CE-TECHNICAL-DOC-001", + "IMPORTER-RESPONSIBILITIES-001", + "RED-CONFORMITY-001", + "WEEE-REGISTRATION-001" + ], + "excluded_controls": [], + "escalations": [ + "PRODUCT-QUALIFICATION-MULTIREGIME-001" + ], + "inferred_industries": [ + "software_saas" + ], + "confidence": { + "overall": 0.84, + "industry_assignment": 0.93, + "control_assignment": 0.8 + }, + "explanation": "Die ursprüngliche Branche ist Software. Durch Hardwarevertrieb unter eigener Marke werden zusätzliche Produkt- und Importeurspflichten werden scope-basiert aktiviert. Batterie-, Funk-, CE- und Importeursthemen greifen kumulativ; die exakte Multi-Regime-Produktqualifikation wird eskaliert.", + "uncertainty_flags": [ + "multi_regime_product_scope" + ] +} \ No newline at end of file diff --git a/control-pipeline/tests/applicability_demo/expected_outputs/CASE-011.json b/control-pipeline/tests/applicability_demo/expected_outputs/CASE-011.json new file mode 100644 index 0000000..9501a89 --- /dev/null +++ b/control-pipeline/tests/applicability_demo/expected_outputs/CASE-011.json @@ -0,0 +1,24 @@ +{ + "case_id": "CASE-011", + "assigned_controls": [ + "THIRD-PARTY-FINANCIAL-PROVIDER-DUE-DILIGENCE-001" + ], + "excluded_controls": [], + "escalations": [ + "REGULATORY-PERIMETER-ASSESSMENT-001", + "OWN-VS-PARTNER-ROLE-ANALYSIS-001" + ], + "inferred_industries": [ + "software_saas" + ], + "confidence": { + "overall": 0.61, + "industry_assignment": 0.9, + "control_assignment": 0.58 + }, + "explanation": "Unklarer regulatorischer Perimeter. Rolle des Unternehmens nicht eindeutig — Partnerbanken beteiligt, zugleich eigene Prozesse für Auszahlungen, Split Settlements und Vorschüsse denkbar. Eskalation erforderlich, bevor belastbar über PSD2- oder AML-Pflichten entschieden wird.", + "uncertainty_flags": [ + "unclear_funds_flow", + "own_vs_partner_role_unclear" + ] +} \ No newline at end of file diff --git a/control-pipeline/tests/applicability_demo/reports/latest_report.json b/control-pipeline/tests/applicability_demo/reports/latest_report.json new file mode 100644 index 0000000..55b11bb --- /dev/null +++ b/control-pipeline/tests/applicability_demo/reports/latest_report.json @@ -0,0 +1,50 @@ +{ + "suite": "applicability-engine-demo-cases-priority-6", + "total_cases": 6, + "passed": 6, + "failed": 0, + "results": [ + { + "case_id": "CASE-001", + "title": "SaaS-Webshop mit Stripe Checkout", + "passed": true, + "errors": [], + "warnings": [] + }, + { + "case_id": "CASE-002", + "title": "Bank gibt TAN-Generator mit Batterie an Kunden aus", + "passed": true, + "errors": [], + "warnings": [] + }, + { + "case_id": "CASE-004", + "title": "Fintech mit eigener Wallet und Weiterleitung von Kundengeldern", + "passed": true, + "errors": [], + "warnings": [] + }, + { + "case_id": "CASE-006", + "title": "SaaS-Unternehmen verschickt nur SMS über externen Gateway", + "passed": true, + "errors": [], + "warnings": [] + }, + { + "case_id": "CASE-008", + "title": "Importeur von IoT-Geräten mit Batterien und Funkmodul", + "passed": true, + "errors": [], + "warnings": [] + }, + { + "case_id": "CASE-011", + "title": "Unklarer Grenzfall mit Embedded Finance", + "passed": true, + "errors": [], + "warnings": [] + } + ] +} \ No newline at end of file diff --git a/control-pipeline/tests/applicability_demo/reports/latest_report.md b/control-pipeline/tests/applicability_demo/reports/latest_report.md new file mode 100644 index 0000000..eee688e --- /dev/null +++ b/control-pipeline/tests/applicability_demo/reports/latest_report.md @@ -0,0 +1,17 @@ +# applicability-engine-demo-cases-priority-6 + +## Summary +- Total cases: 6 +- Passed: 6 +- Failed: 0 + +## Failed cases +None. + +## All results +- CASE-001: PASS +- CASE-002: PASS +- CASE-004: PASS +- CASE-006: PASS +- CASE-008: PASS +- CASE-011: PASS diff --git a/control-pipeline/tests/applicability_demo/run_demo.py b/control-pipeline/tests/applicability_demo/run_demo.py new file mode 100644 index 0000000..d7f270f --- /dev/null +++ b/control-pipeline/tests/applicability_demo/run_demo.py @@ -0,0 +1,43 @@ +from __future__ import annotations + +import argparse +import shutil +import subprocess +import sys +from pathlib import Path + + +def main() -> None: + parser = argparse.ArgumentParser(description="Run Applicability Engine demo evaluation.") + parser.add_argument("--root", type=Path, default=Path(__file__).resolve().parent) + parser.add_argument( + "--actual-dir", + type=Path, + default=None, + help="Directory with actual outputs. Defaults to expected_outputs for self-test.", + ) + args = parser.parse_args() + + root = args.root + actual_dir = args.actual_dir or (root / "expected_outputs") + reports_dir = root / "reports" + reports_dir.mkdir(parents=True, exist_ok=True) + + cmd = [ + sys.executable, + str(root / "evaluator.py"), + "--cases", + str(root / "demo_cases.yaml"), + "--actual-dir", + str(actual_dir), + "--report-json", + str(reports_dir / "latest_report.json"), + "--report-md", + str(reports_dir / "latest_report.md"), + ] + completed = subprocess.run(cmd, check=False) + raise SystemExit(completed.returncode) + + +if __name__ == "__main__": + main()