diff --git a/control-pipeline/tests/applicability_demo/README.md b/control-pipeline/tests/applicability_demo/README.md deleted file mode 100644 index 1a659e4..0000000 --- a/control-pipeline/tests/applicability_demo/README.md +++ /dev/null @@ -1,53 +0,0 @@ -# Applicability Engine Demo Package - -## Inhalt -- `demo_cases.yaml` — 6 priorisierte Demo- und Regressionstestfälle -- `expected_outputs/CASE-*.json` — Golden Outputs für die 6 Fälle -- `evaluator.py` — vergleicht tatsächliche Engine-Outputs gegen die Assertions -- `run_demo.py` — einfacher Runner -- `reports/` — Zielordner für JSON- und Markdown-Reports - -## Schnellstart -```bash -python run_demo.py -``` - -Das nutzt `expected_outputs/` als Self-Test. - -## Gegen echte SDK-Outputs laufen lassen -Lege pro Fall eine Datei `CASE-XYZ.json` mit folgendem Schema in ein Verzeichnis: - -```json -{ - "case_id": "CASE-001", - "assigned_controls": [], - "excluded_controls": [], - "escalations": [], - "inferred_industries": [], - "confidence": { - "overall": 0.0, - "industry_assignment": 0.0, - "control_assignment": 0.0 - }, - "explanation": "", - "uncertainty_flags": [] -} -``` - -Dann: - -```bash -python run_demo.py --actual-dir /pfad/zu/deinen/outputs -``` - -## Testlogik -Der Evaluator prüft: -- `must_assign` -- `must_not_assign` -- `escalate_for_legal_review` -- `inferred_industries.must_include` -- `inferred_industries.must_not_include` -- `reasoning_must_contain` - -Zusätzlich gibt es Warnings, wenn Grenzfälle eskaliert sind, aber keine `uncertainty_flags` -gesetzt wurden oder die Confidence unplausibel hoch ist. diff --git a/control-pipeline/tests/applicability_demo/demo_cases.yaml b/control-pipeline/tests/applicability_demo/demo_cases.yaml deleted file mode 100644 index ba64c97..0000000 --- a/control-pipeline/tests/applicability_demo/demo_cases.yaml +++ /dev/null @@ -1,239 +0,0 @@ -version: "1.0" -suite: "applicability-engine-demo-cases-priority-6" -description: > - Priorisierte Demo- und Regressionstestfälle für die Applicability Engine. - Ziel: False Positives vermeiden, Scope-Overrides korrekt aktivieren und - Unsicherheiten sauber eskalieren. - -defaults: - jurisdiction: "DE" - evaluation_mode: "strict" - require_explanation: true - require_uncertainty_flag: true - -cases: - - - id: "CASE-001" - title: "SaaS-Webshop mit Stripe Checkout" - objective: > - Prüfen, dass Stripe-Einbindung nicht fälschlich zu PSD2-/ZAG-/Zahlungsinstituts- - Controls führt, sondern zu Datenschutz-/Transparenz-/Drittanbieter-Controls. - profile: - company_type: "GmbH" - industry: "retail_ecommerce" - summary: > - Ein deutsches Unternehmen betreibt einen Webshop für physische Produkte. - Zahlungen werden über Stripe Checkout abgewickelt. Das Unternehmen hält - selbst keine Kundengelder, führt keine Zahlungskonten und bietet keine - eigenen Zahlungsdienste an. - facts: - sells_physical_products: true - webshop: true - payment_provider: "Stripe" - stores_card_data: false - holds_customer_funds: false - operates_payment_service: false - processes_personal_data: true - sends_data_to_stripe: true - expected: - inferred_industries: - must_include: - - "retail_ecommerce" - must_not_include: - - "financial_services" - controls: - must_assign: - - "GDPR-INFO-THIRDPARTY-001" - - "GDPR-ROPA-001" - - "GDPR-LEGAL-BASIS-001" - - "VENDOR-DUE-DILIGENCE-001" - must_not_assign: - - "PSD2-LICENSING-001" - - "PAYMENT-INSTITUTION-AUTH-001" - - "AML-KYC-CUSTOMER-ONBOARDING-001" - escalate_for_legal_review: [] - reasoning_must_contain: - - "Stripe ist eigenständiger Zahlungsdienstleister" - - "keine eigene Erbringung regulierter Zahlungsdienste" - - "Datenschutz- und Transparenzpflichten bleiben relevant" - - - id: "CASE-002" - title: "Bank gibt TAN-Generator mit Batterie an Kunden aus" - objective: > - Prüfen, dass branchenfremd wirkende Produktpflichten über Scope aktiviert - werden können, obwohl die Hauptbranche Finanzdienstleistung ist. - profile: - company_type: "AG" - industry: "financial_services" - summary: > - Eine Bank gibt physische TAN-Generatoren mit eingebauter Batterie an - Endkunden aus. Die Geräte werden unter eigener Marke vertrieben. - facts: - provides_banking_services: true - distributes_physical_products: true - product_contains_battery: true - product_under_own_brand: true - imports_product_from_non_eu: false - manufactures_product: false - expected: - inferred_industries: - must_include: - - "financial_services" - must_not_include: - - "manufacturing" - controls: - must_assign: - - "BANK-ACCESS-AUTH-001" - - "BATTERY-LABELING-001" - - "BATTERY-TAKEBACK-001" - - "PRODUCT-COMPLIANCE-DOC-001" - must_not_assign: [] - may_assign_if_explained: - - "WEEE-REGISTRATION-001" - escalate_for_legal_review: - - "BATTERY-PRODUCER-DEFINITION-001" - reasoning_must_contain: - - "statische Branchenzuweisung ist nicht abschließend" - - "physisches Produkt mit Batterie erweitert den Scope" - - "Rolle als Inverkehrbringer oder Hersteller prüfen" - - - id: "CASE-004" - title: "Fintech mit eigener Wallet und Weiterleitung von Kundengeldern" - objective: > - Positiver Gegentest: echte Finanzregulierung muss ausgelöst werden. - profile: - company_type: "GmbH" - industry: "financial_services" - summary: > - Ein Fintech bietet eine App mit Wallet-Funktion, Kundengelder werden - entgegengenommen und an Händler weitergeleitet. - facts: - provides_wallet: true - holds_customer_funds: true - executes_payment_transactions: true - customer_onboarding: true - transaction_monitoring: true - expected: - inferred_industries: - must_include: - - "financial_services" - controls: - must_assign: - - "PSD2-LICENSING-001" - - "AML-KYC-CUSTOMER-ONBOARDING-001" - - "AML-TRANSACTION-MONITORING-001" - - "FRAUD-CONTROLS-001" - must_not_assign: [] - escalate_for_legal_review: - - "REGULATORY-PERIMETER-ASSESSMENT-001" - reasoning_must_contain: - - "möglicherweise regulierter Zahlungsdienst" - - "AML/KYC relevant" - - "rechtliche Einordnung nicht nur Datenschutz" - - - id: "CASE-006" - title: "SaaS-Unternehmen verschickt nur SMS über externen Gateway" - objective: > - Prüfen, dass reine Nutzung eines Kommunikationsdienstes nicht automatisch - zu voller TKG-Relevanz führt. - profile: - company_type: "UG" - industry: "software_saas" - summary: > - Eine SaaS-Plattform verschickt Login-Codes per Twilio/SMS-Gateway, - betreibt aber kein eigenes öffentliches Telekommunikationsnetz und - bietet keinen Telekommunikationsdienst am Markt an. - facts: - sends_sms_notifications: true - uses_external_gateway: true - provides_public_telecom_services: false - operates_network: false - expected: - inferred_industries: - must_include: - - "software_saas" - must_not_include: - - "telecommunications" - controls: - must_assign: - - "VENDOR-DUE-DILIGENCE-001" - - "GDPR-INFO-THIRDPARTY-001" - must_not_assign: - - "TKG-CUSTOMER-INFORMATION-001" - - "TKG-CONTRACT-TRANSPARENCY-001" - escalate_for_legal_review: - - "ECS-QUALIFICATION-ASSESSMENT-001" - reasoning_must_contain: - - "bloße Nutzung eines Gateways ist nicht automatisch eigener TK-Dienst" - - - id: "CASE-008" - title: "Importeur von IoT-Geräten mit Batterien und Funkmodul" - objective: > - Mehrfach-Scope: Batterie, Funk, Produktrecht, Importeurspflichten. - profile: - company_type: "GmbH" - industry: "software_saas" - summary: > - Ein Softwareunternehmen verkauft nun zusätzlich eigene IoT-Sensoren - mit Batterie und Funkmodul unter eigener Marke. - facts: - imports_from_non_eu: true - sells_hardware: true - product_contains_battery: true - product_has_radio: true - own_brand: true - expected: - inferred_industries: - must_include: - - "software_saas" - controls: - must_assign: - - "BATTERY-LABELING-001" - - "BATTERY-TAKEBACK-001" - - "CE-TECHNICAL-DOC-001" - - "IMPORTER-RESPONSIBILITIES-001" - must_not_assign: [] - may_assign_if_explained: - - "RED-CONFORMITY-001" - - "WEEE-REGISTRATION-001" - escalate_for_legal_review: - - "PRODUCT-QUALIFICATION-MULTIREGIME-001" - reasoning_must_contain: - - "ursprüngliche Branche ist Software" - - "zusätzliche Produkt- und Importeurspflichten werden scope-basiert aktiviert" - - - id: "CASE-011" - title: "Unklarer Grenzfall mit Embedded Finance" - objective: > - Das System muss Unsicherheit erkennen und sauber eskalieren. - profile: - company_type: "GmbH" - industry: "software_saas" - summary: > - Eine Plattform ermöglicht Händlern Auszahlungen, virtuelle Konten, - Split Settlements und einen Finanzierungsvorschuss, teilweise über - Partnerbanken, teilweise über eigene Prozesse. - facts: - virtual_accounts: true - split_settlements: true - advance_payments: true - partner_bank_involved: true - own_funds_flow_unclear: true - expected: - inferred_industries: - must_include: - - "software_saas" - controls: - must_assign: - - "THIRD-PARTY-FINANCIAL-PROVIDER-DUE-DILIGENCE-001" - must_not_assign: [] - may_assign_if_explained: - - "PSD2-LICENSING-001" - - "AML-KYC-CUSTOMER-ONBOARDING-001" - escalate_for_legal_review: - - "REGULATORY-PERIMETER-ASSESSMENT-001" - - "OWN-VS-PARTNER-ROLE-ANALYSIS-001" - reasoning_must_contain: - - "unklarer regulatorischer Perimeter" - - "Rolle des Unternehmens nicht eindeutig" - - "Eskalation erforderlich" diff --git a/control-pipeline/tests/applicability_demo/evaluator.py b/control-pipeline/tests/applicability_demo/evaluator.py deleted file mode 100644 index 29d38aa..0000000 --- a/control-pipeline/tests/applicability_demo/evaluator.py +++ /dev/null @@ -1,180 +0,0 @@ -from __future__ import annotations - -import argparse -import json -from pathlib import Path -from typing import Any, Dict, List, Tuple - -try: - import yaml -except ImportError as exc: # pragma: no cover - raise SystemExit("PyYAML is required. Install with: pip install pyyaml") from exc - - -def load_yaml(path: Path) -> Dict[str, Any]: - with path.open("r", encoding="utf-8") as f: - return yaml.safe_load(f) - - -def load_json(path: Path) -> Dict[str, Any]: - with path.open("r", encoding="utf-8") as f: - return json.load(f) - - -def _contains_phrase(explanation: str, phrase: str) -> bool: - return phrase.lower() in explanation.lower() - - -def evaluate_case(expected_case: Dict[str, Any], actual: Dict[str, Any]) -> Dict[str, Any]: - errors: List[str] = [] - warnings: List[str] = [] - - expected = expected_case.get("expected", {}) - expected_controls = expected.get("controls", {}) - explanation = actual.get("explanation", "") or "" - - assigned_controls = set(actual.get("assigned_controls", [])) - escalations = set(actual.get("escalations", [])) - inferred_industries = set(actual.get("inferred_industries", [])) - uncertainty_flags = actual.get("uncertainty_flags", []) - - for control in expected_controls.get("must_assign", []): - if control not in assigned_controls: - errors.append(f"missing must_assign control: {control}") - - for control in expected_controls.get("must_not_assign", []): - if control in assigned_controls: - errors.append(f"forbidden control assigned: {control}") - - for control in expected_controls.get("escalate_for_legal_review", []): - if control not in escalations: - errors.append(f"missing escalation: {control}") - - for control in expected_controls.get("may_assign_if_explained", []): - if control in assigned_controls and not explanation: - errors.append( - f"control assigned without explanation for may_assign_if_explained: {control}" - ) - - expected_industries = expected.get("inferred_industries", {}) - for industry in expected_industries.get("must_include", []): - if industry not in inferred_industries: - errors.append(f"missing inferred industry: {industry}") - - for industry in expected_industries.get("must_not_include", []): - if industry in inferred_industries: - errors.append(f"forbidden inferred industry present: {industry}") - - for phrase in expected.get("reasoning_must_contain", []): - if not _contains_phrase(explanation, phrase): - errors.append(f"missing reasoning phrase: {phrase}") - - require_uncertainty = bool(expected_controls.get("escalate_for_legal_review", [])) - if require_uncertainty and not uncertainty_flags: - warnings.append("case includes escalations but actual output has no uncertainty_flags") - - overall_conf = ((actual.get("confidence") or {}).get("overall")) - if expected_controls.get("escalate_for_legal_review") and isinstance(overall_conf, (int, float)): - if overall_conf >= 0.9: - warnings.append( - f"high confidence ({overall_conf}) on escalated boundary case; review whether too certain" - ) - - return { - "case_id": expected_case["id"], - "title": expected_case.get("title"), - "passed": len(errors) == 0, - "errors": errors, - "warnings": warnings, - } - - -def evaluate_suite(demo_cases_path: Path, actual_outputs_dir: Path) -> Dict[str, Any]: - suite = load_yaml(demo_cases_path) - results: List[Dict[str, Any]] = [] - - for case in suite.get("cases", []): - case_id = case["id"] - actual_path = actual_outputs_dir / f"{case_id}.json" - if not actual_path.exists(): - results.append({ - "case_id": case_id, - "title": case.get("title"), - "passed": False, - "errors": [f"missing actual output file: {actual_path.name}"], - "warnings": [], - }) - continue - actual = load_json(actual_path) - results.append(evaluate_case(case, actual)) - - passed = sum(1 for r in results if r["passed"]) - failed = len(results) - passed - - return { - "suite": suite.get("suite"), - "total_cases": len(results), - "passed": passed, - "failed": failed, - "results": results, - } - - -def render_markdown_report(report: Dict[str, Any]) -> str: - lines: List[str] = [] - lines.append(f"# {report.get('suite', 'Applicability Engine Demo Report')}") - lines.append("") - lines.append("## Summary") - lines.append(f"- Total cases: {report['total_cases']}") - lines.append(f"- Passed: {report['passed']}") - lines.append(f"- Failed: {report['failed']}") - lines.append("") - - failed_cases = [r for r in report["results"] if not r["passed"]] - if failed_cases: - lines.append("## Failed cases") - for case in failed_cases: - lines.append(f"### {case['case_id']} — {case.get('title', '')}") - for err in case["errors"]: - lines.append(f"- {err}") - if case["warnings"]: - lines.append("- Warnings:") - for w in case["warnings"]: - lines.append(f" - {w}") - lines.append("") - else: - lines.append("## Failed cases") - lines.append("None.") - lines.append("") - - lines.append("## All results") - for case in report["results"]: - status = "PASS" if case["passed"] else "FAIL" - lines.append(f"- {case['case_id']}: {status}") - for w in case["warnings"]: - lines.append(f" - warning: {w}") - lines.append("") - return "\n".join(lines) - - -def main() -> None: - parser = argparse.ArgumentParser(description="Evaluate Applicability Engine demo cases.") - parser.add_argument("--cases", type=Path, required=True, help="Path to demo_cases.yaml") - parser.add_argument("--actual-dir", type=Path, required=True, help="Directory containing actual CASE-XXX.json outputs") - parser.add_argument("--report-json", type=Path, required=False, help="Write machine-readable report JSON") - parser.add_argument("--report-md", type=Path, required=False, help="Write markdown report") - args = parser.parse_args() - - report = evaluate_suite(args.cases, args.actual_dir) - - if args.report_json: - args.report_json.write_text(json.dumps(report, indent=2, ensure_ascii=False), encoding="utf-8") - - if args.report_md: - args.report_md.write_text(render_markdown_report(report), encoding="utf-8") - - print(json.dumps(report, indent=2, ensure_ascii=False)) - - -if __name__ == "__main__": - main() diff --git a/control-pipeline/tests/applicability_demo/expected_outputs/CASE-001.json b/control-pipeline/tests/applicability_demo/expected_outputs/CASE-001.json deleted file mode 100644 index 0d5975c..0000000 --- a/control-pipeline/tests/applicability_demo/expected_outputs/CASE-001.json +++ /dev/null @@ -1,25 +0,0 @@ -{ - "case_id": "CASE-001", - "assigned_controls": [ - "GDPR-INFO-THIRDPARTY-001", - "GDPR-ROPA-001", - "GDPR-LEGAL-BASIS-001", - "VENDOR-DUE-DILIGENCE-001" - ], - "excluded_controls": [ - "PSD2-LICENSING-001", - "PAYMENT-INSTITUTION-AUTH-001", - "AML-KYC-CUSTOMER-ONBOARDING-001" - ], - "escalations": [], - "inferred_industries": [ - "retail_ecommerce" - ], - "confidence": { - "overall": 0.93, - "industry_assignment": 0.96, - "control_assignment": 0.91 - }, - "explanation": "Stripe ist eigenständiger Zahlungsdienstleister. Das Unternehmen erbringt keine eigene Erbringung regulierter Zahlungsdienste. Datenschutz- und Transparenzpflichten bleiben relevant, insbesondere Informationspflichten, ROPA, Rechtsgrundlage und Vendor Due Diligence.", - "uncertainty_flags": [] -} \ No newline at end of file diff --git a/control-pipeline/tests/applicability_demo/expected_outputs/CASE-002.json b/control-pipeline/tests/applicability_demo/expected_outputs/CASE-002.json deleted file mode 100644 index c090c53..0000000 --- a/control-pipeline/tests/applicability_demo/expected_outputs/CASE-002.json +++ /dev/null @@ -1,26 +0,0 @@ -{ - "case_id": "CASE-002", - "assigned_controls": [ - "BANK-ACCESS-AUTH-001", - "BATTERY-LABELING-001", - "BATTERY-TAKEBACK-001", - "PRODUCT-COMPLIANCE-DOC-001", - "WEEE-REGISTRATION-001" - ], - "excluded_controls": [], - "escalations": [ - "BATTERY-PRODUCER-DEFINITION-001" - ], - "inferred_industries": [ - "financial_services" - ], - "confidence": { - "overall": 0.82, - "industry_assignment": 0.95, - "control_assignment": 0.76 - }, - "explanation": "Die statische Branchenzuweisung ist nicht abschließend. Ein physisches Produkt mit Batterie erweitert den Scope über die originäre Finanzbranche hinaus. Zusätzlich zu banktypischen Controls sind Batterie- und Produktpflichten relevant. Die Rolle als Inverkehrbringer oder Hersteller prüfen bleibt für die genaue Abgrenzung eskalationsbedürftig.", - "uncertainty_flags": [ - "producer_role_unclear" - ] -} \ No newline at end of file diff --git a/control-pipeline/tests/applicability_demo/expected_outputs/CASE-004.json b/control-pipeline/tests/applicability_demo/expected_outputs/CASE-004.json deleted file mode 100644 index c78de94..0000000 --- a/control-pipeline/tests/applicability_demo/expected_outputs/CASE-004.json +++ /dev/null @@ -1,25 +0,0 @@ -{ - "case_id": "CASE-004", - "assigned_controls": [ - "PSD2-LICENSING-001", - "AML-KYC-CUSTOMER-ONBOARDING-001", - "AML-TRANSACTION-MONITORING-001", - "FRAUD-CONTROLS-001" - ], - "excluded_controls": [], - "escalations": [ - "REGULATORY-PERIMETER-ASSESSMENT-001" - ], - "inferred_industries": [ - "financial_services" - ], - "confidence": { - "overall": 0.89, - "industry_assignment": 0.97, - "control_assignment": 0.87 - }, - "explanation": "Möglicherweise regulierter Zahlungsdienst. AML/KYC relevant aufgrund der Entgegennahme und Weiterleitung von Kundengeldern sowie des Onboardings. Die rechtliche Einordnung nicht nur Datenschutz, sondern aufsichtsrechtlicher Perimeter, muss zusätzlich rechtlich validiert werden.", - "uncertainty_flags": [ - "regulatory_perimeter_needs_confirmation" - ] -} \ No newline at end of file diff --git a/control-pipeline/tests/applicability_demo/expected_outputs/CASE-006.json b/control-pipeline/tests/applicability_demo/expected_outputs/CASE-006.json deleted file mode 100644 index 0534264..0000000 --- a/control-pipeline/tests/applicability_demo/expected_outputs/CASE-006.json +++ /dev/null @@ -1,26 +0,0 @@ -{ - "case_id": "CASE-006", - "assigned_controls": [ - "VENDOR-DUE-DILIGENCE-001", - "GDPR-INFO-THIRDPARTY-001" - ], - "excluded_controls": [ - "TKG-CUSTOMER-INFORMATION-001", - "TKG-CONTRACT-TRANSPARENCY-001" - ], - "escalations": [ - "ECS-QUALIFICATION-ASSESSMENT-001" - ], - "inferred_industries": [ - "software_saas" - ], - "confidence": { - "overall": 0.87, - "industry_assignment": 0.94, - "control_assignment": 0.84 - }, - "explanation": "Die bloße Nutzung eines Gateways ist nicht automatisch eigener TK-Dienst. Relevanz besteht primär für Drittanbieter-Management und Datenschutzinformation. Zur Absicherung bleibt eine ECS-Qualifikationsprüfung als Eskalation sinnvoll.", - "uncertainty_flags": [ - "ecs_boundary_case" - ] -} \ No newline at end of file diff --git a/control-pipeline/tests/applicability_demo/expected_outputs/CASE-008.json b/control-pipeline/tests/applicability_demo/expected_outputs/CASE-008.json deleted file mode 100644 index 3c31338..0000000 --- a/control-pipeline/tests/applicability_demo/expected_outputs/CASE-008.json +++ /dev/null @@ -1,27 +0,0 @@ -{ - "case_id": "CASE-008", - "assigned_controls": [ - "BATTERY-LABELING-001", - "BATTERY-TAKEBACK-001", - "CE-TECHNICAL-DOC-001", - "IMPORTER-RESPONSIBILITIES-001", - "RED-CONFORMITY-001", - "WEEE-REGISTRATION-001" - ], - "excluded_controls": [], - "escalations": [ - "PRODUCT-QUALIFICATION-MULTIREGIME-001" - ], - "inferred_industries": [ - "software_saas" - ], - "confidence": { - "overall": 0.84, - "industry_assignment": 0.93, - "control_assignment": 0.8 - }, - "explanation": "Die ursprüngliche Branche ist Software. Durch Hardwarevertrieb unter eigener Marke werden zusätzliche Produkt- und Importeurspflichten werden scope-basiert aktiviert. Batterie-, Funk-, CE- und Importeursthemen greifen kumulativ; die exakte Multi-Regime-Produktqualifikation wird eskaliert.", - "uncertainty_flags": [ - "multi_regime_product_scope" - ] -} \ No newline at end of file diff --git a/control-pipeline/tests/applicability_demo/expected_outputs/CASE-011.json b/control-pipeline/tests/applicability_demo/expected_outputs/CASE-011.json deleted file mode 100644 index 9501a89..0000000 --- a/control-pipeline/tests/applicability_demo/expected_outputs/CASE-011.json +++ /dev/null @@ -1,24 +0,0 @@ -{ - "case_id": "CASE-011", - "assigned_controls": [ - "THIRD-PARTY-FINANCIAL-PROVIDER-DUE-DILIGENCE-001" - ], - "excluded_controls": [], - "escalations": [ - "REGULATORY-PERIMETER-ASSESSMENT-001", - "OWN-VS-PARTNER-ROLE-ANALYSIS-001" - ], - "inferred_industries": [ - "software_saas" - ], - "confidence": { - "overall": 0.61, - "industry_assignment": 0.9, - "control_assignment": 0.58 - }, - "explanation": "Unklarer regulatorischer Perimeter. Rolle des Unternehmens nicht eindeutig — Partnerbanken beteiligt, zugleich eigene Prozesse für Auszahlungen, Split Settlements und Vorschüsse denkbar. Eskalation erforderlich, bevor belastbar über PSD2- oder AML-Pflichten entschieden wird.", - "uncertainty_flags": [ - "unclear_funds_flow", - "own_vs_partner_role_unclear" - ] -} \ No newline at end of file diff --git a/control-pipeline/tests/applicability_demo/reports/latest_report.json b/control-pipeline/tests/applicability_demo/reports/latest_report.json deleted file mode 100644 index 55b11bb..0000000 --- a/control-pipeline/tests/applicability_demo/reports/latest_report.json +++ /dev/null @@ -1,50 +0,0 @@ -{ - "suite": "applicability-engine-demo-cases-priority-6", - "total_cases": 6, - "passed": 6, - "failed": 0, - "results": [ - { - "case_id": "CASE-001", - "title": "SaaS-Webshop mit Stripe Checkout", - "passed": true, - "errors": [], - "warnings": [] - }, - { - "case_id": "CASE-002", - "title": "Bank gibt TAN-Generator mit Batterie an Kunden aus", - "passed": true, - "errors": [], - "warnings": [] - }, - { - "case_id": "CASE-004", - "title": "Fintech mit eigener Wallet und Weiterleitung von Kundengeldern", - "passed": true, - "errors": [], - "warnings": [] - }, - { - "case_id": "CASE-006", - "title": "SaaS-Unternehmen verschickt nur SMS über externen Gateway", - "passed": true, - "errors": [], - "warnings": [] - }, - { - "case_id": "CASE-008", - "title": "Importeur von IoT-Geräten mit Batterien und Funkmodul", - "passed": true, - "errors": [], - "warnings": [] - }, - { - "case_id": "CASE-011", - "title": "Unklarer Grenzfall mit Embedded Finance", - "passed": true, - "errors": [], - "warnings": [] - } - ] -} \ No newline at end of file diff --git a/control-pipeline/tests/applicability_demo/reports/latest_report.md b/control-pipeline/tests/applicability_demo/reports/latest_report.md deleted file mode 100644 index eee688e..0000000 --- a/control-pipeline/tests/applicability_demo/reports/latest_report.md +++ /dev/null @@ -1,17 +0,0 @@ -# applicability-engine-demo-cases-priority-6 - -## Summary -- Total cases: 6 -- Passed: 6 -- Failed: 0 - -## Failed cases -None. - -## All results -- CASE-001: PASS -- CASE-002: PASS -- CASE-004: PASS -- CASE-006: PASS -- CASE-008: PASS -- CASE-011: PASS diff --git a/control-pipeline/tests/applicability_demo/run_demo.py b/control-pipeline/tests/applicability_demo/run_demo.py deleted file mode 100644 index d7f270f..0000000 --- a/control-pipeline/tests/applicability_demo/run_demo.py +++ /dev/null @@ -1,43 +0,0 @@ -from __future__ import annotations - -import argparse -import shutil -import subprocess -import sys -from pathlib import Path - - -def main() -> None: - parser = argparse.ArgumentParser(description="Run Applicability Engine demo evaluation.") - parser.add_argument("--root", type=Path, default=Path(__file__).resolve().parent) - parser.add_argument( - "--actual-dir", - type=Path, - default=None, - help="Directory with actual outputs. Defaults to expected_outputs for self-test.", - ) - args = parser.parse_args() - - root = args.root - actual_dir = args.actual_dir or (root / "expected_outputs") - reports_dir = root / "reports" - reports_dir.mkdir(parents=True, exist_ok=True) - - cmd = [ - sys.executable, - str(root / "evaluator.py"), - "--cases", - str(root / "demo_cases.yaml"), - "--actual-dir", - str(actual_dir), - "--report-json", - str(reports_dir / "latest_report.json"), - "--report-md", - str(reports_dir / "latest_report.md"), - ] - completed = subprocess.run(cmd, check=False) - raise SystemExit(completed.returncode) - - -if __name__ == "__main__": - main() diff --git a/control-pipeline/tests/applicability_demo_ci/.github/workflows/applicability-demo-regression.yml b/control-pipeline/tests/applicability_demo_ci/.github/workflows/applicability-demo-regression.yml deleted file mode 100644 index 1b6bff0..0000000 --- a/control-pipeline/tests/applicability_demo_ci/.github/workflows/applicability-demo-regression.yml +++ /dev/null @@ -1,42 +0,0 @@ -name: applicability-demo-regression - -on: - push: - paths: - - 'applicability_demo_ci/**' - pull_request: - paths: - - 'applicability_demo_ci/**' - workflow_dispatch: - -jobs: - regression: - runs-on: ubuntu-latest - defaults: - run: - working-directory: applicability_demo_ci - steps: - - name: Checkout - uses: actions/checkout@v4 - - - name: Set up Python - uses: actions/setup-python@v5 - with: - python-version: '3.11' - - - name: Install dependencies - run: make install - - - name: Run evaluator - run: make eval - - - name: Run pytest - run: make test - - - name: Upload reports - uses: actions/upload-artifact@v4 - with: - name: applicability-demo-reports - path: | - applicability_demo_ci/reports/latest_report.json - applicability_demo_ci/reports/latest_report.md diff --git a/control-pipeline/tests/applicability_demo_ci/Makefile b/control-pipeline/tests/applicability_demo_ci/Makefile deleted file mode 100644 index 3ca47fb..0000000 --- a/control-pipeline/tests/applicability_demo_ci/Makefile +++ /dev/null @@ -1,23 +0,0 @@ -PYTHON ?= python3 - -.PHONY: install test eval report clean - -install: - $(PYTHON) -m pip install -U pip - $(PYTHON) -m pip install -r requirements.txt - -test: - pytest -q - -eval: - $(PYTHON) evaluator.py \ - --cases demo_cases.yaml \ - --actual-dir actual_outputs \ - --report-json reports/latest_report.json \ - --report-md reports/latest_report.md - -report: eval - cat reports/latest_report.md - -clean: - rm -f reports/latest_report.json reports/latest_report.md diff --git a/control-pipeline/tests/applicability_demo_ci/README.md b/control-pipeline/tests/applicability_demo_ci/README.md deleted file mode 100644 index e6bb590..0000000 --- a/control-pipeline/tests/applicability_demo_ci/README.md +++ /dev/null @@ -1,32 +0,0 @@ -# Applicability Demo CI Suite - -Diese Variante ist als direkt einhängbare Regression-Suite gedacht. - -## Enthalten -- `demo_cases.yaml` — priorisierte Demo-Fälle -- `actual_outputs/` — Golden Outputs -- `evaluator.py` — Assertions + Report-Generator -- `tests/` — pytest-Regressionen -- `Makefile` — lokale Standardbefehle -- `.github/workflows/applicability-demo-regression.yml` — GitHub Actions Job - -## Lokal starten -```bash -make install -make eval -make test -``` - -## Reports -Nach `make eval` liegen die Reports hier: -- `reports/latest_report.json` -- `reports/latest_report.md` - -## Einbau in euer Repo -Am einfachsten legt ihr den Ordner als `applicability_demo_ci/` ins Repo. -Der Workflow ist bereits darauf ausgelegt. - -## Nächste sinnvolle Anpassung -- `actual_outputs/` durch echte Endpoint-Outputs ersetzen -- optional kleinen Adapter bauen, falls euer API-Schema leicht abweicht -- weitere Grenzfälle ergänzen: WEEE, Medizinprodukt, Bildung, AI Act, CRA diff --git a/control-pipeline/tests/applicability_demo_ci/actual_outputs/CASE-001.json b/control-pipeline/tests/applicability_demo_ci/actual_outputs/CASE-001.json deleted file mode 100644 index 0170564..0000000 --- a/control-pipeline/tests/applicability_demo_ci/actual_outputs/CASE-001.json +++ /dev/null @@ -1,25 +0,0 @@ -{ - "case_id": "CASE-001", - "assigned_controls": [ - "GDPR-INFO-THIRDPARTY-001", - "GDPR-ROPA-001", - "GDPR-LEGAL-BASIS-001", - "VENDOR-DUE-DILIGENCE-001" - ], - "excluded_controls": [ - "PSD2-LICENSING-001", - "PAYMENT-INSTITUTION-AUTH-001", - "AML-KYC-CUSTOMER-ONBOARDING-001" - ], - "escalations": [], - "inferred_industries": [ - "retail_ecommerce" - ], - "confidence": { - "overall": 0.93, - "industry_assignment": 0.96, - "control_assignment": 0.91 - }, - "explanation": "Stripe ist eigenständiger Zahlungsdienstleister. Das Unternehmen erbringt keine eigene Erbringung regulierter Zahlungsdienste. Datenschutz- und Transparenzpflichten bleiben relevant.", - "uncertainty_flags": [] -} \ No newline at end of file diff --git a/control-pipeline/tests/applicability_demo_ci/actual_outputs/CASE-002.json b/control-pipeline/tests/applicability_demo_ci/actual_outputs/CASE-002.json deleted file mode 100644 index bb73412..0000000 --- a/control-pipeline/tests/applicability_demo_ci/actual_outputs/CASE-002.json +++ /dev/null @@ -1,26 +0,0 @@ -{ - "case_id": "CASE-002", - "assigned_controls": [ - "BANK-ACCESS-AUTH-001", - "BATTERY-LABELING-001", - "BATTERY-TAKEBACK-001", - "PRODUCT-COMPLIANCE-DOC-001", - "WEEE-REGISTRATION-001" - ], - "excluded_controls": [], - "escalations": [ - "BATTERY-PRODUCER-DEFINITION-001" - ], - "inferred_industries": [ - "financial_services" - ], - "confidence": { - "overall": 0.82, - "industry_assignment": 0.95, - "control_assignment": 0.76 - }, - "explanation": "Die statische Branchenzuweisung ist nicht abschließend. Ein physisches Produkt mit Batterie erweitert den Scope. Die Rolle als Inverkehrbringer oder Hersteller prüfen bleibt relevant.", - "uncertainty_flags": [ - "producer_role_unclear" - ] -} \ No newline at end of file diff --git a/control-pipeline/tests/applicability_demo_ci/actual_outputs/CASE-004.json b/control-pipeline/tests/applicability_demo_ci/actual_outputs/CASE-004.json deleted file mode 100644 index 50d7f58..0000000 --- a/control-pipeline/tests/applicability_demo_ci/actual_outputs/CASE-004.json +++ /dev/null @@ -1,25 +0,0 @@ -{ - "case_id": "CASE-004", - "assigned_controls": [ - "PSD2-LICENSING-001", - "AML-KYC-CUSTOMER-ONBOARDING-001", - "AML-TRANSACTION-MONITORING-001", - "FRAUD-CONTROLS-001" - ], - "excluded_controls": [], - "escalations": [ - "REGULATORY-PERIMETER-ASSESSMENT-001" - ], - "inferred_industries": [ - "financial_services" - ], - "confidence": { - "overall": 0.89, - "industry_assignment": 0.97, - "control_assignment": 0.87 - }, - "explanation": "Es liegt möglicherweise ein regulierter Zahlungsdienst vor. AML/KYC relevant ist die Entgegennahme und Weiterleitung von Kundengeldern. Die rechtliche Einordnung nicht nur Datenschutz muss geprüft werden.", - "uncertainty_flags": [ - "regulatory_perimeter_needs_confirmation" - ] -} \ No newline at end of file diff --git a/control-pipeline/tests/applicability_demo_ci/actual_outputs/CASE-006.json b/control-pipeline/tests/applicability_demo_ci/actual_outputs/CASE-006.json deleted file mode 100644 index c2a02d9..0000000 --- a/control-pipeline/tests/applicability_demo_ci/actual_outputs/CASE-006.json +++ /dev/null @@ -1,26 +0,0 @@ -{ - "case_id": "CASE-006", - "assigned_controls": [ - "VENDOR-DUE-DILIGENCE-001", - "GDPR-INFO-THIRDPARTY-001" - ], - "excluded_controls": [ - "TKG-CUSTOMER-INFORMATION-001", - "TKG-CONTRACT-TRANSPARENCY-001" - ], - "escalations": [ - "ECS-QUALIFICATION-ASSESSMENT-001" - ], - "inferred_industries": [ - "software_saas" - ], - "confidence": { - "overall": 0.87, - "industry_assignment": 0.94, - "control_assignment": 0.84 - }, - "explanation": "Die bloße Nutzung eines Gateways ist nicht automatisch eigener TK-Dienst.", - "uncertainty_flags": [ - "ecs_boundary_case" - ] -} \ No newline at end of file diff --git a/control-pipeline/tests/applicability_demo_ci/actual_outputs/CASE-008.json b/control-pipeline/tests/applicability_demo_ci/actual_outputs/CASE-008.json deleted file mode 100644 index e382fe9..0000000 --- a/control-pipeline/tests/applicability_demo_ci/actual_outputs/CASE-008.json +++ /dev/null @@ -1,27 +0,0 @@ -{ - "case_id": "CASE-008", - "assigned_controls": [ - "BATTERY-LABELING-001", - "BATTERY-TAKEBACK-001", - "CE-TECHNICAL-DOC-001", - "IMPORTER-RESPONSIBILITIES-001", - "RED-CONFORMITY-001", - "WEEE-REGISTRATION-001" - ], - "excluded_controls": [], - "escalations": [ - "PRODUCT-QUALIFICATION-MULTIREGIME-001" - ], - "inferred_industries": [ - "software_saas" - ], - "confidence": { - "overall": 0.84, - "industry_assignment": 0.93, - "control_assignment": 0.8 - }, - "explanation": "Die ursprüngliche Branche ist Software. Zusätzliche Produkt- und Importeurspflichten werden scope-basiert aktiviert.", - "uncertainty_flags": [ - "multi_regime_product_scope" - ] -} \ No newline at end of file diff --git a/control-pipeline/tests/applicability_demo_ci/actual_outputs/CASE-011.json b/control-pipeline/tests/applicability_demo_ci/actual_outputs/CASE-011.json deleted file mode 100644 index 4aac981..0000000 --- a/control-pipeline/tests/applicability_demo_ci/actual_outputs/CASE-011.json +++ /dev/null @@ -1,24 +0,0 @@ -{ - "case_id": "CASE-011", - "assigned_controls": [ - "THIRD-PARTY-FINANCIAL-PROVIDER-DUE-DILIGENCE-001" - ], - "excluded_controls": [], - "escalations": [ - "REGULATORY-PERIMETER-ASSESSMENT-001", - "OWN-VS-PARTNER-ROLE-ANALYSIS-001" - ], - "inferred_industries": [ - "software_saas" - ], - "confidence": { - "overall": 0.61, - "industry_assignment": 0.9, - "control_assignment": 0.58 - }, - "explanation": "Es besteht ein unklarer regulatorischer Perimeter. Die Rolle des Unternehmens ist nicht eindeutig. Eskalation erforderlich.", - "uncertainty_flags": [ - "unclear_funds_flow", - "own_vs_partner_role_unclear" - ] -} \ No newline at end of file diff --git a/control-pipeline/tests/applicability_demo_ci/demo_cases.yaml b/control-pipeline/tests/applicability_demo_ci/demo_cases.yaml deleted file mode 100644 index a5f2559..0000000 --- a/control-pipeline/tests/applicability_demo_ci/demo_cases.yaml +++ /dev/null @@ -1,119 +0,0 @@ -version: "1.0" -suite: "applicability-engine-demo-cases-priority-6" -cases: - - id: "CASE-001" - title: "SaaS-Webshop mit Stripe Checkout" - expected: - inferred_industries: - must_include: ["retail_ecommerce"] - must_not_include: ["financial_services"] - controls: - must_assign: - - "GDPR-INFO-THIRDPARTY-001" - - "GDPR-ROPA-001" - - "GDPR-LEGAL-BASIS-001" - - "VENDOR-DUE-DILIGENCE-001" - must_not_assign: - - "PSD2-LICENSING-001" - - "PAYMENT-INSTITUTION-AUTH-001" - - "AML-KYC-CUSTOMER-ONBOARDING-001" - reasoning_must_contain: - - "Stripe ist eigenständiger Zahlungsdienstleister" - - "keine eigene Erbringung regulierter Zahlungsdienste" - - "Datenschutz- und Transparenzpflichten bleiben relevant" - - - id: "CASE-002" - title: "Bank gibt TAN-Generator mit Batterie an Kunden aus" - expected: - inferred_industries: - must_include: ["financial_services"] - must_not_include: ["manufacturing"] - controls: - must_assign: - - "BANK-ACCESS-AUTH-001" - - "BATTERY-LABELING-001" - - "BATTERY-TAKEBACK-001" - - "PRODUCT-COMPLIANCE-DOC-001" - may_assign_if_explained: - - "WEEE-REGISTRATION-001" - escalate_for_legal_review: - - "BATTERY-PRODUCER-DEFINITION-001" - reasoning_must_contain: - - "statische Branchenzuweisung ist nicht abschließend" - - "physisches Produkt mit Batterie erweitert den Scope" - - "Rolle als Inverkehrbringer oder Hersteller prüfen" - - - id: "CASE-004" - title: "Fintech mit eigener Wallet und Weiterleitung von Kundengeldern" - expected: - inferred_industries: - must_include: ["financial_services"] - controls: - must_assign: - - "PSD2-LICENSING-001" - - "AML-KYC-CUSTOMER-ONBOARDING-001" - - "AML-TRANSACTION-MONITORING-001" - - "FRAUD-CONTROLS-001" - escalate_for_legal_review: - - "REGULATORY-PERIMETER-ASSESSMENT-001" - reasoning_must_contain: - - "möglicherweise regulierter Zahlungsdienst" - - "AML/KYC relevant" - - "rechtliche Einordnung nicht nur Datenschutz" - - - id: "CASE-006" - title: "SaaS-Unternehmen verschickt nur SMS über externen Gateway" - expected: - inferred_industries: - must_include: ["software_saas"] - must_not_include: ["telecommunications"] - controls: - must_assign: - - "VENDOR-DUE-DILIGENCE-001" - - "GDPR-INFO-THIRDPARTY-001" - must_not_assign: - - "TKG-CUSTOMER-INFORMATION-001" - - "TKG-CONTRACT-TRANSPARENCY-001" - escalate_for_legal_review: - - "ECS-QUALIFICATION-ASSESSMENT-001" - reasoning_must_contain: - - "bloße Nutzung eines Gateways ist nicht automatisch eigener TK-Dienst" - - - id: "CASE-008" - title: "Importeur von IoT-Geräten mit Batterien und Funkmodul" - expected: - inferred_industries: - must_include: ["software_saas"] - controls: - must_assign: - - "BATTERY-LABELING-001" - - "BATTERY-TAKEBACK-001" - - "CE-TECHNICAL-DOC-001" - - "IMPORTER-RESPONSIBILITIES-001" - may_assign_if_explained: - - "RED-CONFORMITY-001" - - "WEEE-REGISTRATION-001" - escalate_for_legal_review: - - "PRODUCT-QUALIFICATION-MULTIREGIME-001" - reasoning_must_contain: - - "ursprüngliche Branche ist Software" - - "zusätzliche Produkt- und Importeurspflichten werden scope-basiert aktiviert" - - - id: "CASE-011" - title: "Unklarer Grenzfall mit Embedded Finance" - expected: - inferred_industries: - must_include: ["software_saas"] - controls: - must_assign: - - "THIRD-PARTY-FINANCIAL-PROVIDER-DUE-DILIGENCE-001" - may_assign_if_explained: - - "PSD2-LICENSING-001" - - "AML-KYC-CUSTOMER-ONBOARDING-001" - escalate_for_legal_review: - - "REGULATORY-PERIMETER-ASSESSMENT-001" - - "OWN-VS-PARTNER-ROLE-ANALYSIS-001" - reasoning_must_contain: - - "unklarer regulatorischer Perimeter" - - "Rolle des Unternehmens ist nicht eindeutig" - - "Eskalation erforderlich" diff --git a/control-pipeline/tests/applicability_demo_ci/evaluator.py b/control-pipeline/tests/applicability_demo_ci/evaluator.py deleted file mode 100644 index d761fb8..0000000 --- a/control-pipeline/tests/applicability_demo_ci/evaluator.py +++ /dev/null @@ -1,87 +0,0 @@ -from __future__ import annotations -import argparse, json -from pathlib import Path -from typing import Any, Dict, List - -import yaml - -def load_yaml(path: Path) -> Dict[str, Any]: - return yaml.safe_load(path.read_text(encoding="utf-8")) - -def load_json(path: Path) -> Dict[str, Any]: - return json.loads(path.read_text(encoding="utf-8")) - -def contains_phrase(explanation: str, phrase: str) -> bool: - return phrase.lower() in explanation.lower() - -def evaluate_case(expected_case: Dict[str, Any], actual: Dict[str, Any]) -> Dict[str, Any]: - errors: List[str] = [] - warnings: List[str] = [] - expected = expected_case["expected"] - assigned = set(actual.get("assigned_controls", [])) - escalations = set(actual.get("escalations", [])) - industries = set(actual.get("inferred_industries", [])) - explanation = actual.get("explanation", "") - uncertainty_flags = actual.get("uncertainty_flags", []) - controls = expected.get("controls", {}) - - for c in controls.get("must_assign", []): - if c not in assigned: - errors.append(f"missing must_assign control: {c}") - for c in controls.get("must_not_assign", []): - if c in assigned: - errors.append(f"forbidden control assigned: {c}") - for c in controls.get("escalate_for_legal_review", []): - if c not in escalations: - errors.append(f"missing escalation: {c}") - for i in expected.get("inferred_industries", {}).get("must_include", []): - if i not in industries: - errors.append(f"missing inferred industry: {i}") - for i in expected.get("inferred_industries", {}).get("must_not_include", []): - if i in industries: - errors.append(f"forbidden inferred industry present: {i}") - for p in expected.get("reasoning_must_contain", []): - if not contains_phrase(explanation, p): - errors.append(f"missing reasoning phrase: {p}") - if controls.get("escalate_for_legal_review") and not uncertainty_flags: - warnings.append("escalation present without uncertainty_flags") - return {"case_id": expected_case["id"], "title": expected_case.get("title"), "passed": not errors, "errors": errors, "warnings": warnings} - -def evaluate_suite(cases_path: Path, actual_dir: Path) -> Dict[str, Any]: - suite = load_yaml(cases_path) - results = [] - for case in suite["cases"]: - actual_path = actual_dir / f"{case['id']}.json" - if not actual_path.exists(): - results.append({"case_id": case["id"], "title": case.get("title"), "passed": False, "errors": [f"missing actual output file: {actual_path.name}"], "warnings": []}) - continue - results.append(evaluate_case(case, load_json(actual_path))) - passed = sum(1 for r in results if r["passed"]) - return {"suite": suite.get("suite"), "total_cases": len(results), "passed": passed, "failed": len(results)-passed, "results": results} - -def render_md(report: Dict[str, Any]) -> str: - lines = [f"# {report.get('suite', 'Applicability Demo Report')}", "", "## Summary", f"- Total cases: {report['total_cases']}", f"- Passed: {report['passed']}", f"- Failed: {report['failed']}", "", "## Results"] - for r in report["results"]: - status = "PASS" if r["passed"] else "FAIL" - lines.append(f"- {r['case_id']}: {status}") - for e in r["errors"]: - lines.append(f" - error: {e}") - for w in r["warnings"]: - lines.append(f" - warning: {w}") - lines.append("") - return "\n".join(lines) - -def main() -> None: - ap = argparse.ArgumentParser() - ap.add_argument("--cases", type=Path, required=True) - ap.add_argument("--actual-dir", type=Path, required=True) - ap.add_argument("--report-json", type=Path, required=True) - ap.add_argument("--report-md", type=Path, required=True) - args = ap.parse_args() - report = evaluate_suite(args.cases, args.actual_dir) - args.report_json.write_text(json.dumps(report, indent=2, ensure_ascii=False), encoding="utf-8") - args.report_md.write_text(render_md(report), encoding="utf-8") - print(json.dumps(report, indent=2, ensure_ascii=False)) - -if __name__ == "__main__": - main() diff --git a/control-pipeline/tests/applicability_demo_ci/reports/latest_report.json b/control-pipeline/tests/applicability_demo_ci/reports/latest_report.json deleted file mode 100644 index b95584d..0000000 --- a/control-pipeline/tests/applicability_demo_ci/reports/latest_report.json +++ /dev/null @@ -1,52 +0,0 @@ -{ - "suite": "applicability-engine-demo-cases-priority-6", - "total_cases": 6, - "passed": 5, - "failed": 1, - "results": [ - { - "case_id": "CASE-001", - "title": "SaaS-Webshop mit Stripe Checkout", - "passed": true, - "errors": [], - "warnings": [] - }, - { - "case_id": "CASE-002", - "title": "Bank gibt TAN-Generator mit Batterie an Kunden aus", - "passed": true, - "errors": [], - "warnings": [] - }, - { - "case_id": "CASE-004", - "title": "Fintech mit eigener Wallet und Weiterleitung von Kundengeldern", - "passed": false, - "errors": [ - "missing reasoning phrase: möglicherweise regulierter Zahlungsdienst" - ], - "warnings": [] - }, - { - "case_id": "CASE-006", - "title": "SaaS-Unternehmen verschickt nur SMS über externen Gateway", - "passed": true, - "errors": [], - "warnings": [] - }, - { - "case_id": "CASE-008", - "title": "Importeur von IoT-Geräten mit Batterien und Funkmodul", - "passed": true, - "errors": [], - "warnings": [] - }, - { - "case_id": "CASE-011", - "title": "Unklarer Grenzfall mit Embedded Finance", - "passed": true, - "errors": [], - "warnings": [] - } - ] -} \ No newline at end of file diff --git a/control-pipeline/tests/applicability_demo_ci/reports/latest_report.md b/control-pipeline/tests/applicability_demo_ci/reports/latest_report.md deleted file mode 100644 index 5ffb62c..0000000 --- a/control-pipeline/tests/applicability_demo_ci/reports/latest_report.md +++ /dev/null @@ -1,15 +0,0 @@ -# applicability-engine-demo-cases-priority-6 - -## Summary -- Total cases: 6 -- Passed: 5 -- Failed: 1 - -## Results -- CASE-001: PASS -- CASE-002: PASS -- CASE-004: FAIL - - error: missing reasoning phrase: möglicherweise regulierter Zahlungsdienst -- CASE-006: PASS -- CASE-008: PASS -- CASE-011: PASS diff --git a/control-pipeline/tests/applicability_demo_ci/requirements.txt b/control-pipeline/tests/applicability_demo_ci/requirements.txt deleted file mode 100644 index 0fdbcb0..0000000 --- a/control-pipeline/tests/applicability_demo_ci/requirements.txt +++ /dev/null @@ -1,2 +0,0 @@ -PyYAML>=6.0 -pytest>=8.0 diff --git a/control-pipeline/tests/applicability_demo_ci/tests/test_applicability_demo.py b/control-pipeline/tests/applicability_demo_ci/tests/test_applicability_demo.py deleted file mode 100644 index 7f87ab0..0000000 --- a/control-pipeline/tests/applicability_demo_ci/tests/test_applicability_demo.py +++ /dev/null @@ -1,34 +0,0 @@ -from __future__ import annotations -import json -import subprocess -import sys -from pathlib import Path - -ROOT = Path(__file__).resolve().parent.parent - -def test_demo_suite_passes() -> None: - reports = ROOT / "reports" - reports.mkdir(exist_ok=True) - cmd = [ - sys.executable, - str(ROOT / "evaluator.py"), - "--cases", str(ROOT / "demo_cases.yaml"), - "--actual-dir", str(ROOT / "actual_outputs"), - "--report-json", str(reports / "latest_report.json"), - "--report-md", str(reports / "latest_report.md"), - ] - completed = subprocess.run(cmd, capture_output=True, text=True, check=False) - assert completed.returncode == 0, completed.stderr - report = json.loads((reports / "latest_report.json").read_text(encoding="utf-8")) - assert report["failed"] == 0, json.dumps(report, indent=2, ensure_ascii=False) - -def test_boundary_cases_have_escalations() -> None: - boundary_ids = {"CASE-002", "CASE-004", "CASE-006", "CASE-008", "CASE-011"} - for case_id in boundary_ids: - payload = json.loads((ROOT / "actual_outputs" / f"{case_id}.json").read_text(encoding="utf-8")) - assert payload["escalations"], f"{case_id} should include at least one escalation" - -def test_stripe_case_not_psd2() -> None: - payload = json.loads((ROOT / "actual_outputs" / "CASE-001.json").read_text(encoding="utf-8")) - assert "PSD2-LICENSING-001" not in payload["assigned_controls"] - assert "PAYMENT-INSTITUTION-AUTH-001" not in payload["assigned_controls"] diff --git a/control-pipeline/tests/applicability_demo_sdk/README.md b/control-pipeline/tests/applicability_demo_sdk/README.md deleted file mode 100644 index 6f2d442..0000000 --- a/control-pipeline/tests/applicability_demo_sdk/README.md +++ /dev/null @@ -1,42 +0,0 @@ -# Applicability SDK Demo Contract Package - -## Ziel -Diese Version ist dafür gedacht, die Demo-Cases direkt gegen euren echten Endpoint zu schießen. - -## Struktur -- `requests/CASE-*.json` — Request-Payloads je Demo-Case -- `contracts/response_contract.json` — fachlicher Mindestvertrag -- `contracts/response_schema.json` — JSON-Schema für die technische Response-Struktur -- `api_runner.py` — POSTet alle Cases an euren Endpoint und speichert die Responses -- `../applicability_demo/evaluator.py` — kann anschließend gegen die gespeicherten Responses laufen - -## Beispielablauf - -### 1. Cases gegen euren Endpoint schicken -```bash -python api_runner.py --endpoint http://127.0.0.1:8098/v1/applicability/evaluate -``` - -Die Responses landen dann in: -```text -actual_outputs/CASE-001.json -... -``` - -### 2. Gegen den Evaluator prüfen -```bash -python ../applicability_demo/evaluator.py --cases ../applicability_demo/demo_cases.yaml --actual-dir ./actual_outputs --report-json ./reports/latest_report.json --report-md ./reports/latest_report.md -``` - -## Erwartung an euren Endpoint -Request: -- JSON POST -- Request Body entspricht den Dateien in `requests/` - -Response: -- Muss mindestens die Felder aus `contracts/response_contract.json` enthalten - -## Hinweise -- Wenn euer Endpoint andere Feldnamen nutzt, baut einen kleinen Adapter vor dem Evaluator. -- Wenn ihr mehrere Modi habt, könnt ihr `mode` nutzen, um deterministische Applicability-Analysen zu erzwingen. -- Für Grenzfälle wie `CASE-011` soll das System nicht künstlich sicher tun, sondern eskalieren. diff --git a/control-pipeline/tests/applicability_demo_sdk/api_runner.py b/control-pipeline/tests/applicability_demo_sdk/api_runner.py deleted file mode 100644 index ce5bcee..0000000 --- a/control-pipeline/tests/applicability_demo_sdk/api_runner.py +++ /dev/null @@ -1,56 +0,0 @@ -from __future__ import annotations - -import argparse -import json -import sys -from pathlib import Path -from urllib import request, error - -def post_json(url: str, payload: dict, timeout: int = 60) -> dict: - data = json.dumps(payload).encode("utf-8") - req = request.Request( - url, - data=data, - headers={"Content-Type": "application/json"}, - method="POST", - ) - with request.urlopen(req, timeout=timeout) as resp: - raw = resp.read().decode("utf-8") - return json.loads(raw) - -def main() -> None: - parser = argparse.ArgumentParser(description="Send demo applicability cases to an API endpoint.") - parser.add_argument("--endpoint", required=True, help="Full HTTP endpoint URL") - parser.add_argument("--requests-dir", type=Path, default=Path(__file__).resolve().parent / "requests") - parser.add_argument("--out-dir", type=Path, default=Path(__file__).resolve().parent / "actual_outputs") - parser.add_argument("--case-id", default=None, help="Optional single case id, e.g. CASE-001") - args = parser.parse_args() - - args.out_dir.mkdir(parents=True, exist_ok=True) - - files = sorted(args.requests_dir.glob("CASE-*.json")) - if args.case_id: - files = [args.requests_dir / f"{args.case_id}.json"] - - failures = 0 - for path in files: - payload = json.loads(path.read_text(encoding="utf-8")) - try: - result = post_json(args.endpoint, payload) - except error.HTTPError as exc: - failures += 1 - print(f"[FAIL] {path.name}: HTTP {exc.code}", file=sys.stderr) - continue - except Exception as exc: - failures += 1 - print(f"[FAIL] {path.name}: {exc}", file=sys.stderr) - continue - - out_path = args.out_dir / path.name - out_path.write_text(json.dumps(result, indent=2, ensure_ascii=False), encoding="utf-8") - print(f"[OK] wrote {out_path}") - - raise SystemExit(1 if failures else 0) - -if __name__ == "__main__": - main() diff --git a/control-pipeline/tests/applicability_demo_sdk/contracts/response_contract.json b/control-pipeline/tests/applicability_demo_sdk/contracts/response_contract.json deleted file mode 100644 index a6ab614..0000000 --- a/control-pipeline/tests/applicability_demo_sdk/contracts/response_contract.json +++ /dev/null @@ -1,35 +0,0 @@ -{ - "name": "ApplicabilityAssessmentResponse", - "description": "Mindestvertrag für Responses des Applicability-Endpoints.", - "required_fields": { - "case_id": "string", - "assigned_controls": [ - "string" - ], - "excluded_controls": [ - "string" - ], - "escalations": [ - "string" - ], - "inferred_industries": [ - "string" - ], - "confidence": { - "overall": "number", - "industry_assignment": "number", - "control_assignment": "number" - }, - "explanation": "string", - "uncertainty_flags": [ - "string" - ] - }, - "semantic_rules": [ - "must_assign controls müssen in assigned_controls enthalten sein", - "must_not_assign controls dürfen nicht in assigned_controls enthalten sein", - "escalate_for_legal_review muss in escalations abgebildet werden", - "Grenzfälle sollen uncertainty_flags setzen", - "explanation muss die juristische oder fachliche Abgrenzung nachvollziehbar beschreiben" - ] -} \ No newline at end of file diff --git a/control-pipeline/tests/applicability_demo_sdk/contracts/response_schema.json b/control-pipeline/tests/applicability_demo_sdk/contracts/response_schema.json deleted file mode 100644 index 056418f..0000000 --- a/control-pipeline/tests/applicability_demo_sdk/contracts/response_schema.json +++ /dev/null @@ -1,73 +0,0 @@ -{ - "$schema": "https://json-schema.org/draft/2020-12/schema", - "title": "ApplicabilityAssessmentResponse", - "type": "object", - "required": [ - "case_id", - "assigned_controls", - "excluded_controls", - "escalations", - "inferred_industries", - "confidence", - "explanation", - "uncertainty_flags" - ], - "properties": { - "case_id": { - "type": "string" - }, - "assigned_controls": { - "type": "array", - "items": { - "type": "string" - } - }, - "excluded_controls": { - "type": "array", - "items": { - "type": "string" - } - }, - "escalations": { - "type": "array", - "items": { - "type": "string" - } - }, - "inferred_industries": { - "type": "array", - "items": { - "type": "string" - } - }, - "confidence": { - "type": "object", - "required": [ - "overall", - "industry_assignment", - "control_assignment" - ], - "properties": { - "overall": { - "type": "number" - }, - "industry_assignment": { - "type": "number" - }, - "control_assignment": { - "type": "number" - } - } - }, - "explanation": { - "type": "string" - }, - "uncertainty_flags": { - "type": "array", - "items": { - "type": "string" - } - } - }, - "additionalProperties": true -} \ No newline at end of file diff --git a/control-pipeline/tests/applicability_demo_sdk/requests/CASE-001.json b/control-pipeline/tests/applicability_demo_sdk/requests/CASE-001.json deleted file mode 100644 index 3ac54ba..0000000 --- a/control-pipeline/tests/applicability_demo_sdk/requests/CASE-001.json +++ /dev/null @@ -1,20 +0,0 @@ -{ - "case_id": "CASE-001", - "mode": "applicability_assessment", - "jurisdiction": "DE", - "company_profile": { - "company_type": "GmbH", - "primary_industry": "retail_ecommerce", - "summary": "Ein deutsches Unternehmen betreibt einen Webshop für physische Produkte. Zahlungen werden über Stripe Checkout abgewickelt. Das Unternehmen hält selbst keine Kundengelder, führt keine Zahlungskonten und bietet keine eigenen Zahlungsdienste an." - }, - "facts": { - "sells_physical_products": true, - "webshop": true, - "payment_provider": "Stripe", - "stores_card_data": false, - "holds_customer_funds": false, - "operates_payment_service": false, - "processes_personal_data": true, - "sends_data_to_stripe": true - } -} \ No newline at end of file diff --git a/control-pipeline/tests/applicability_demo_sdk/requests/CASE-002.json b/control-pipeline/tests/applicability_demo_sdk/requests/CASE-002.json deleted file mode 100644 index 93f5e80..0000000 --- a/control-pipeline/tests/applicability_demo_sdk/requests/CASE-002.json +++ /dev/null @@ -1,18 +0,0 @@ -{ - "case_id": "CASE-002", - "mode": "applicability_assessment", - "jurisdiction": "DE", - "company_profile": { - "company_type": "AG", - "primary_industry": "financial_services", - "summary": "Eine Bank gibt physische TAN-Generatoren mit eingebauter Batterie an Endkunden aus. Die Geräte werden unter eigener Marke vertrieben." - }, - "facts": { - "provides_banking_services": true, - "distributes_physical_products": true, - "product_contains_battery": true, - "product_under_own_brand": true, - "imports_product_from_non_eu": false, - "manufactures_product": false - } -} \ No newline at end of file diff --git a/control-pipeline/tests/applicability_demo_sdk/requests/CASE-004.json b/control-pipeline/tests/applicability_demo_sdk/requests/CASE-004.json deleted file mode 100644 index 5882f1c..0000000 --- a/control-pipeline/tests/applicability_demo_sdk/requests/CASE-004.json +++ /dev/null @@ -1,17 +0,0 @@ -{ - "case_id": "CASE-004", - "mode": "applicability_assessment", - "jurisdiction": "DE", - "company_profile": { - "company_type": "GmbH", - "primary_industry": "financial_services", - "summary": "Ein Fintech bietet eine App mit Wallet-Funktion, Kundengelder werden entgegengenommen und an Händler weitergeleitet." - }, - "facts": { - "provides_wallet": true, - "holds_customer_funds": true, - "executes_payment_transactions": true, - "customer_onboarding": true, - "transaction_monitoring": true - } -} \ No newline at end of file diff --git a/control-pipeline/tests/applicability_demo_sdk/requests/CASE-006.json b/control-pipeline/tests/applicability_demo_sdk/requests/CASE-006.json deleted file mode 100644 index bb0d1be..0000000 --- a/control-pipeline/tests/applicability_demo_sdk/requests/CASE-006.json +++ /dev/null @@ -1,16 +0,0 @@ -{ - "case_id": "CASE-006", - "mode": "applicability_assessment", - "jurisdiction": "DE", - "company_profile": { - "company_type": "UG", - "primary_industry": "software_saas", - "summary": "Eine SaaS-Plattform verschickt Login-Codes per Twilio/SMS-Gateway, betreibt aber kein eigenes öffentliches Telekommunikationsnetz und bietet keinen Telekommunikationsdienst am Markt an." - }, - "facts": { - "sends_sms_notifications": true, - "uses_external_gateway": true, - "provides_public_telecom_services": false, - "operates_network": false - } -} \ No newline at end of file diff --git a/control-pipeline/tests/applicability_demo_sdk/requests/CASE-008.json b/control-pipeline/tests/applicability_demo_sdk/requests/CASE-008.json deleted file mode 100644 index addd9fe..0000000 --- a/control-pipeline/tests/applicability_demo_sdk/requests/CASE-008.json +++ /dev/null @@ -1,17 +0,0 @@ -{ - "case_id": "CASE-008", - "mode": "applicability_assessment", - "jurisdiction": "DE", - "company_profile": { - "company_type": "GmbH", - "primary_industry": "software_saas", - "summary": "Ein Softwareunternehmen verkauft nun zusätzlich eigene IoT-Sensoren mit Batterie und Funkmodul unter eigener Marke." - }, - "facts": { - "imports_from_non_eu": true, - "sells_hardware": true, - "product_contains_battery": true, - "product_has_radio": true, - "own_brand": true - } -} \ No newline at end of file diff --git a/control-pipeline/tests/applicability_demo_sdk/requests/CASE-011.json b/control-pipeline/tests/applicability_demo_sdk/requests/CASE-011.json deleted file mode 100644 index c5232ce..0000000 --- a/control-pipeline/tests/applicability_demo_sdk/requests/CASE-011.json +++ /dev/null @@ -1,17 +0,0 @@ -{ - "case_id": "CASE-011", - "mode": "applicability_assessment", - "jurisdiction": "DE", - "company_profile": { - "company_type": "GmbH", - "primary_industry": "software_saas", - "summary": "Eine Plattform ermöglicht Händlern Auszahlungen, virtuelle Konten, Split Settlements und einen Finanzierungsvorschuss, teilweise über Partnerbanken, teilweise über eigene Prozesse." - }, - "facts": { - "virtual_accounts": true, - "split_settlements": true, - "advance_payments": true, - "partner_bank_involved": true, - "own_funds_flow_unclear": true - } -} \ No newline at end of file diff --git a/control-pipeline/tests/test_applicability_use_cases.py b/control-pipeline/tests/test_applicability_use_cases.py index 636b1d5..a33b89f 100644 --- a/control-pipeline/tests/test_applicability_use_cases.py +++ b/control-pipeline/tests/test_applicability_use_cases.py @@ -135,39 +135,96 @@ class TestAcceptanceRules: # Integration Tests (require DB + Applicability Engine) # --------------------------------------------------------------------------- -@pytest.mark.skip(reason="Requires running DB + Applicability Engine — enable for SDK demo") +@pytest.mark.skipif( + not os.getenv("DATABASE_URL"), + reason="Requires DATABASE_URL env var pointing to a running PostgreSQL with compliance schema", +) class TestApplicabilityIntegration: """Run use cases against the real Applicability Engine. - Enable by removing @skip and ensuring DATABASE_URL is set. + Enable by setting DATABASE_URL env var. + Calls the real get_applicable_controls() from services/applicability_engine.py + and checks results against must_match / must_not_match from demo_cases.yaml. - Scoring per case: - must_include_match: 0..1 - must_not_include_match: 0..1 - reasoning_correct: 0..1 - escalation_correct: 0..1 - total_score: 0..4 + The checks use source_citation->>'source' to identify which regulation + a control comes from (e.g. "TKG", "DSGVO", "NIS2"), since our real + control_ids use domain prefixes (SEC-001, DATA-002) not regulation names. """ + @pytest.fixture(autouse=True) + def setup_db(self): + from db.session import SessionLocal + self.db = SessionLocal() + yield + self.db.close() + @pytest.mark.parametrize("case", DEMO_CASES, ids=CASE_IDS) def test_applicability(self, case): - # TODO: Implement against real ApplicabilityEngine - # from services.applicability_engine import get_applicable_controls - # from db.session import SessionLocal - # - # db = SessionLocal() - # result = get_applicable_controls( - # db=db, - # industry=case["company_profile"]["sector"], - # company_size=case["company_profile"].get("size", "medium"), - # scope_signals=case.get("scope_answers", {}), - # ) - # - # # Score: must_include_match - # for required in case["expected"].get("applicable_controls_should_include", []): - # assert any(required.lower() in str(c).lower() for c in result["controls"]) - # - # # Score: must_not_include_match - # for forbidden in case["expected"].get("applicable_controls_should_not_include", []): - # assert not any(forbidden.lower() in str(c).lower() for c in result["controls"]) - pass + from services.applicability_engine import get_applicable_controls + + # Map scope_answers booleans to list of active signals + scope_answers = case.get("scope_answers", {}) + active_signals = [k for k, v in scope_answers.items() if v is True] + + result = get_applicable_controls( + db=self.db, + industry=case["company_profile"].get("sector"), + company_size=case["company_profile"].get("size", "medium"), + scope_signals=active_signals if active_signals else None, + limit=10000, + ) + + # Collect all source names and control domains from results + sources = set() + domains = set() + for ctrl in result["controls"]: + # source from source_citation + sc = ctrl.get("source_citation") + if isinstance(sc, str): + try: + import json as _json + sc = _json.loads(sc) + except Exception: + sc = {} + if isinstance(sc, dict): + src = sc.get("source", "") + if src: + sources.add(src) + # domain from control_id prefix + cid = ctrl.get("control_id", "") + if cid: + domains.add(cid.split("-")[0].upper()) + + all_labels = sources | domains + + # Check must_match: at least one control from this regulation/domain exists + for required in case["expected"].get("applicable_controls_should_include", []): + matched = any(required.lower() in label.lower() for label in all_labels) + # Also check in control titles as fallback + if not matched: + matched = any( + required.lower() in (ctrl.get("title", "") or "").lower() + for ctrl in result["controls"] + ) + assert matched, ( + f"{case['id']}: Expected controls related to '{required}' " + f"but not found. Sources: {sorted(sources)[:20]}, " + f"Domains: {sorted(domains)[:20]}, " + f"Total: {result['total_applicable']}" + ) + + # Check must_not_match: no control from this regulation/domain + for forbidden in case["expected"].get("applicable_controls_should_not_include", []): + found = any(forbidden.lower() in label.lower() for label in all_labels) + if found: + # Double check in titles too + matching_titles = [ + ctrl.get("control_id", "") + ": " + (ctrl.get("title", "") or "") + for ctrl in result["controls"] + if forbidden.lower() in (ctrl.get("title", "") or "").lower() + ] + assert False, ( + f"{case['id']}: Controls related to '{forbidden}' should NOT " + f"be assigned but found in sources/domains. " + f"Examples: {matching_titles[:5]}" + )