diff --git a/admin-compliance/app/sdk/control-library/components/ControlDetail.tsx b/admin-compliance/app/sdk/control-library/components/ControlDetail.tsx index 175241b..e899cde 100644 --- a/admin-compliance/app/sdk/control-library/components/ControlDetail.tsx +++ b/admin-compliance/app/sdk/control-library/components/ControlDetail.tsx @@ -164,17 +164,39 @@ export function ControlDetail({

{ctrl.rationale}

- {/* Gesetzliche Grundlage (Rule 1 + 2) */} + {/* Quellennachweis (Rule 1 + 2) — dynamic label based on source_type */} {ctrl.source_citation && ( -
+
- -

Gesetzliche Grundlage

- {ctrl.license_rule === 1 && ( + +

{ + ctrl.source_citation.source_type === 'law' ? 'Gesetzliche Grundlage' : + ctrl.source_citation.source_type === 'guideline' ? 'Behoerdliche Leitlinie' : + 'Standard / Best Practice' + }

+ {ctrl.source_citation.source_type === 'law' && ( Direkte gesetzliche Pflicht )} - {ctrl.license_rule === 2 && ( - Standard mit Zitationspflicht + {ctrl.source_citation.source_type === 'guideline' && ( + Aufsichtsbehoerdliche Empfehlung + )} + {(ctrl.source_citation.source_type === 'standard' || (!ctrl.source_citation.source_type && ctrl.license_rule === 2)) && ( + Freiwilliger Standard + )} + {(!ctrl.source_citation.source_type && ctrl.license_rule === 1) && ( + Noch nicht klassifiziert )}
diff --git a/backend-compliance/compliance/api/canonical_control_routes.py b/backend-compliance/compliance/api/canonical_control_routes.py index 2c57ee5..df44dda 100644 --- a/backend-compliance/compliance/api/canonical_control_routes.py +++ b/backend-compliance/compliance/api/canonical_control_routes.py @@ -82,6 +82,9 @@ class ControlResponse(BaseModel): target_audience: Optional[str] = None generation_metadata: Optional[dict] = None generation_strategy: Optional[str] = "ungrouped" + applicable_industries: Optional[list] = None + applicable_company_size: Optional[list] = None + scope_conditions: Optional[dict] = None created_at: str updated_at: str @@ -111,6 +114,9 @@ class ControlCreateRequest(BaseModel): category: Optional[str] = None target_audience: Optional[str] = None generation_metadata: Optional[dict] = None + applicable_industries: Optional[list] = None + applicable_company_size: Optional[list] = None + scope_conditions: Optional[dict] = None class ControlUpdateRequest(BaseModel): @@ -136,6 +142,9 @@ class ControlUpdateRequest(BaseModel): category: Optional[str] = None target_audience: Optional[str] = None generation_metadata: Optional[dict] = None + applicable_industries: Optional[list] = None + applicable_company_size: Optional[list] = None + scope_conditions: Optional[dict] = None class SimilarityCheckRequest(BaseModel): @@ -164,6 +173,7 @@ _CONTROL_COLS = """id, framework_id, control_id, title, objective, rationale, license_rule, source_original_text, source_citation, customer_visible, verification_method, category, target_audience, generation_metadata, generation_strategy, + applicable_industries, applicable_company_size, scope_conditions, created_at, updated_at""" @@ -511,7 +521,8 @@ async def create_control(body: ControlCreateRequest): open_anchors, release_state, tags, license_rule, source_original_text, source_citation, customer_visible, verification_method, category, - target_audience, generation_metadata + target_audience, generation_metadata, + applicable_industries, applicable_company_size, scope_conditions ) VALUES ( :fw_id, :cid, :title, :objective, :rationale, CAST(:scope AS jsonb), CAST(:requirements AS jsonb), @@ -521,7 +532,10 @@ async def create_control(body: ControlCreateRequest): :license_rule, :source_original_text, CAST(:source_citation AS jsonb), :customer_visible, :verification_method, :category, - :target_audience, CAST(:generation_metadata AS jsonb) + :target_audience, CAST(:generation_metadata AS jsonb), + CAST(:applicable_industries AS jsonb), + CAST(:applicable_company_size AS jsonb), + CAST(:scope_conditions AS jsonb) ) RETURNING {_CONTROL_COLS} """), @@ -550,6 +564,9 @@ async def create_control(body: ControlCreateRequest): "category": body.category, "target_audience": body.target_audience, "generation_metadata": _json.dumps(body.generation_metadata) if body.generation_metadata else None, + "applicable_industries": _json.dumps(body.applicable_industries) if body.applicable_industries else None, + "applicable_company_size": _json.dumps(body.applicable_company_size) if body.applicable_company_size else None, + "scope_conditions": _json.dumps(body.scope_conditions) if body.scope_conditions else None, }, ).fetchone() db.commit() @@ -778,6 +795,9 @@ def _control_row(r) -> dict: "target_audience": r.target_audience, "generation_metadata": r.generation_metadata, "generation_strategy": getattr(r, "generation_strategy", "ungrouped"), + "applicable_industries": getattr(r, "applicable_industries", None), + "applicable_company_size": getattr(r, "applicable_company_size", None), + "scope_conditions": getattr(r, "scope_conditions", None), "created_at": r.created_at.isoformat() if r.created_at else None, "updated_at": r.updated_at.isoformat() if r.updated_at else None, } diff --git a/backend-compliance/compliance/api/control_generator_routes.py b/backend-compliance/compliance/api/control_generator_routes.py index 65664d6..f2cecac 100644 --- a/backend-compliance/compliance/api/control_generator_routes.py +++ b/backend-compliance/compliance/api/control_generator_routes.py @@ -28,6 +28,7 @@ from compliance.services.control_generator import ( ALL_COLLECTIONS, VALID_CATEGORIES, VALID_DOMAINS, + _classify_regulation, _detect_category, _detect_domain, _llm_local, @@ -978,3 +979,122 @@ async def get_domain_backfill_status(backfill_id: str): if not status: raise HTTPException(status_code=404, detail="Domain backfill job not found") return status + + +# --------------------------------------------------------------------------- +# Source-Type Backfill — Classify law vs guideline vs standard vs restricted +# --------------------------------------------------------------------------- + +class SourceTypeBackfillRequest(BaseModel): + dry_run: bool = True + + +_source_type_backfill_status: dict = {} + + +async def _run_source_type_backfill(dry_run: bool, backfill_id: str): + """Backfill source_type into source_citation JSONB for all controls.""" + db = SessionLocal() + try: + # Find controls with source_citation that lack source_type + rows = db.execute(text(""" + SELECT control_id, source_citation, generation_metadata + FROM compliance.canonical_controls + WHERE source_citation IS NOT NULL + AND (source_citation->>'source_type' IS NULL + OR source_citation->>'source_type' = '') + """)).fetchall() + + total = len(rows) + updated = 0 + already_correct = 0 + errors = [] + + _source_type_backfill_status[backfill_id] = { + "status": "running", "total": total, "updated": 0, "dry_run": dry_run, + } + + for row in rows: + cid = row[0] + citation = row[1] if isinstance(row[1], dict) else json.loads(row[1] or "{}") + metadata = row[2] if isinstance(row[2], dict) else json.loads(row[2] or "{}") + + # Get regulation_code from metadata + reg_code = metadata.get("source_regulation", "") + if not reg_code: + # Try to infer from source name + errors.append(f"{cid}: no source_regulation in metadata") + continue + + # Classify + license_info = _classify_regulation(reg_code) + source_type = license_info.get("source_type", "restricted") + + # Update citation + citation["source_type"] = source_type + + if not dry_run: + db.execute(text(""" + UPDATE compliance.canonical_controls + SET source_citation = :citation + WHERE control_id = :cid + """), {"citation": json.dumps(citation), "cid": cid}) + if updated % 100 == 0: + db.commit() + updated += 1 + + if not dry_run: + db.commit() + + # Count distribution + dist_query = db.execute(text(""" + SELECT source_citation->>'source_type' as st, COUNT(*) + FROM compliance.canonical_controls + WHERE source_citation IS NOT NULL + AND source_citation->>'source_type' IS NOT NULL + GROUP BY st + """)).fetchall() if not dry_run else [] + + distribution = {r[0]: r[1] for r in dist_query} + + _source_type_backfill_status[backfill_id] = { + "status": "completed", "total": total, "updated": updated, + "dry_run": dry_run, "distribution": distribution, + "errors": errors[:50], + } + logger.info("Source-type backfill %s completed: %d/%d updated (dry_run=%s)", + backfill_id, updated, total, dry_run) + + except Exception as e: + logger.error("Source-type backfill %s failed: %s", backfill_id, e) + _source_type_backfill_status[backfill_id] = {"status": "failed", "error": str(e)} + finally: + db.close() + + +@router.post("/generate/backfill-source-type") +async def start_source_type_backfill(req: SourceTypeBackfillRequest): + """Backfill source_type (law/guideline/standard/restricted) into source_citation JSONB. + + Classifies each control's source as binding law, authority guideline, + voluntary standard, or restricted norm based on regulation_code. + Default is dry_run=True (preview only). + """ + import uuid + backfill_id = str(uuid.uuid4())[:8] + _source_type_backfill_status[backfill_id] = {"status": "starting"} + asyncio.create_task(_run_source_type_backfill(req.dry_run, backfill_id)) + return { + "status": "running", + "backfill_id": backfill_id, + "message": f"Source-type backfill started. Poll /generate/source-type-backfill-status/{backfill_id}", + } + + +@router.get("/generate/source-type-backfill-status/{backfill_id}") +async def get_source_type_backfill_status(backfill_id: str): + """Get status of a source-type backfill job.""" + status = _source_type_backfill_status.get(backfill_id) + if not status: + raise HTTPException(status_code=404, detail="Source-type backfill job not found") + return status diff --git a/backend-compliance/compliance/services/control_generator.py b/backend-compliance/compliance/services/control_generator.py index 1de79a4..447f89a 100644 --- a/backend-compliance/compliance/services/control_generator.py +++ b/backend-compliance/compliance/services/control_generator.py @@ -56,7 +56,8 @@ HARMONIZATION_THRESHOLD = 0.85 # Cosine similarity above this = duplicate # Pipeline version — increment when generation rules change materially. # v1: Original (local LLM prefilter, old prompt) # v2: Anthropic decides relevance, null for non-requirement chunks, annexes protected -PIPELINE_VERSION = 2 +# v3: Scoped Control Applicability — applicable_industries, applicable_company_size, scope_conditions +PIPELINE_VERSION = 3 ALL_COLLECTIONS = [ "bp_compliance_ce", @@ -72,119 +73,121 @@ ALL_COLLECTIONS = [ REGULATION_LICENSE_MAP: dict[str, dict] = { # RULE 1: FREE USE — Laws, Public Domain + # source_type: "law" = binding legislation, "guideline" = authority guidance (soft law), + # "standard" = voluntary framework/best practice, "restricted" = protected norm # EU Regulations - "eu_2016_679": {"license": "EU_LAW", "rule": 1, "name": "DSGVO"}, - "eu_2024_1689": {"license": "EU_LAW", "rule": 1, "name": "AI Act (KI-Verordnung)"}, - "eu_2022_2555": {"license": "EU_LAW", "rule": 1, "name": "NIS2"}, - "eu_2024_2847": {"license": "EU_LAW", "rule": 1, "name": "Cyber Resilience Act (CRA)"}, - "eu_2023_1230": {"license": "EU_LAW", "rule": 1, "name": "Maschinenverordnung"}, - "eu_2022_2065": {"license": "EU_LAW", "rule": 1, "name": "Digital Services Act (DSA)"}, - "eu_2022_1925": {"license": "EU_LAW", "rule": 1, "name": "Digital Markets Act (DMA)"}, - "eu_2022_868": {"license": "EU_LAW", "rule": 1, "name": "Data Governance Act (DGA)"}, - "eu_2019_770": {"license": "EU_LAW", "rule": 1, "name": "Digitale-Inhalte-Richtlinie"}, - "eu_2021_914": {"license": "EU_LAW", "rule": 1, "name": "Standardvertragsklauseln (SCC)"}, - "eu_2002_58": {"license": "EU_LAW", "rule": 1, "name": "ePrivacy-Richtlinie"}, - "eu_2000_31": {"license": "EU_LAW", "rule": 1, "name": "E-Commerce-Richtlinie"}, - "eu_2023_1803": {"license": "EU_LAW", "rule": 1, "name": "IFRS-Uebernahmeverordnung"}, - "eucsa": {"license": "EU_LAW", "rule": 1, "name": "EU Cybersecurity Act"}, - "dataact": {"license": "EU_LAW", "rule": 1, "name": "Data Act"}, - "dora": {"license": "EU_LAW", "rule": 1, "name": "Digital Operational Resilience Act"}, - "ehds": {"license": "EU_LAW", "rule": 1, "name": "European Health Data Space"}, - "gpsr": {"license": "EU_LAW", "rule": 1, "name": "Allgemeine Produktsicherheitsverordnung"}, - "mica": {"license": "EU_LAW", "rule": 1, "name": "Markets in Crypto-Assets"}, - "psd2": {"license": "EU_LAW", "rule": 1, "name": "Zahlungsdiensterichtlinie 2"}, - "dpf": {"license": "EU_LAW", "rule": 1, "name": "EU-US Data Privacy Framework"}, - "dsm": {"license": "EU_LAW", "rule": 1, "name": "DSM-Urheberrechtsrichtlinie"}, - "amlr": {"license": "EU_LAW", "rule": 1, "name": "AML-Verordnung"}, - "eu_blue_guide_2022": {"license": "EU_PUBLIC", "rule": 1, "name": "Blue Guide 2022"}, - # NIST (Public Domain — all variants) - "nist_sp_800_53": {"license": "NIST_PUBLIC_DOMAIN", "rule": 1, "name": "NIST SP 800-53"}, - "nist_sp800_53r5": {"license": "NIST_PUBLIC_DOMAIN", "rule": 1, "name": "NIST SP 800-53 Rev.5"}, - "nist_sp_800_63b": {"license": "NIST_PUBLIC_DOMAIN", "rule": 1, "name": "NIST SP 800-63B"}, - "nist_sp800_63_3": {"license": "NIST_PUBLIC_DOMAIN", "rule": 1, "name": "NIST SP 800-63-3"}, - "nist_csf_2_0": {"license": "NIST_PUBLIC_DOMAIN", "rule": 1, "name": "NIST CSF 2.0"}, - "nist_sp_800_218": {"license": "NIST_PUBLIC_DOMAIN", "rule": 1, "name": "NIST SSDF"}, - "nist_sp800_207": {"license": "NIST_PUBLIC_DOMAIN", "rule": 1, "name": "NIST SP 800-207 Zero Trust"}, - "nist_ai_rmf": {"license": "NIST_PUBLIC_DOMAIN", "rule": 1, "name": "NIST AI Risk Management Framework"}, - "nistir_8259a": {"license": "NIST_PUBLIC_DOMAIN", "rule": 1, "name": "NISTIR 8259A IoT Security"}, - "cisa_secure_by_design": {"license": "US_GOV_PUBLIC", "rule": 1, "name": "CISA Secure by Design"}, + "eu_2016_679": {"license": "EU_LAW", "rule": 1, "source_type": "law", "name": "DSGVO"}, + "eu_2024_1689": {"license": "EU_LAW", "rule": 1, "source_type": "law", "name": "AI Act (KI-Verordnung)"}, + "eu_2022_2555": {"license": "EU_LAW", "rule": 1, "source_type": "law", "name": "NIS2"}, + "eu_2024_2847": {"license": "EU_LAW", "rule": 1, "source_type": "law", "name": "Cyber Resilience Act (CRA)"}, + "eu_2023_1230": {"license": "EU_LAW", "rule": 1, "source_type": "law", "name": "Maschinenverordnung"}, + "eu_2022_2065": {"license": "EU_LAW", "rule": 1, "source_type": "law", "name": "Digital Services Act (DSA)"}, + "eu_2022_1925": {"license": "EU_LAW", "rule": 1, "source_type": "law", "name": "Digital Markets Act (DMA)"}, + "eu_2022_868": {"license": "EU_LAW", "rule": 1, "source_type": "law", "name": "Data Governance Act (DGA)"}, + "eu_2019_770": {"license": "EU_LAW", "rule": 1, "source_type": "law", "name": "Digitale-Inhalte-Richtlinie"}, + "eu_2021_914": {"license": "EU_LAW", "rule": 1, "source_type": "law", "name": "Standardvertragsklauseln (SCC)"}, + "eu_2002_58": {"license": "EU_LAW", "rule": 1, "source_type": "law", "name": "ePrivacy-Richtlinie"}, + "eu_2000_31": {"license": "EU_LAW", "rule": 1, "source_type": "law", "name": "E-Commerce-Richtlinie"}, + "eu_2023_1803": {"license": "EU_LAW", "rule": 1, "source_type": "law", "name": "IFRS-Uebernahmeverordnung"}, + "eucsa": {"license": "EU_LAW", "rule": 1, "source_type": "law", "name": "EU Cybersecurity Act"}, + "dataact": {"license": "EU_LAW", "rule": 1, "source_type": "law", "name": "Data Act"}, + "dora": {"license": "EU_LAW", "rule": 1, "source_type": "law", "name": "Digital Operational Resilience Act"}, + "ehds": {"license": "EU_LAW", "rule": 1, "source_type": "law", "name": "European Health Data Space"}, + "gpsr": {"license": "EU_LAW", "rule": 1, "source_type": "law", "name": "Allgemeine Produktsicherheitsverordnung"}, + "mica": {"license": "EU_LAW", "rule": 1, "source_type": "law", "name": "Markets in Crypto-Assets"}, + "psd2": {"license": "EU_LAW", "rule": 1, "source_type": "law", "name": "Zahlungsdiensterichtlinie 2"}, + "dpf": {"license": "EU_LAW", "rule": 1, "source_type": "law", "name": "EU-US Data Privacy Framework"}, + "dsm": {"license": "EU_LAW", "rule": 1, "source_type": "law", "name": "DSM-Urheberrechtsrichtlinie"}, + "amlr": {"license": "EU_LAW", "rule": 1, "source_type": "law", "name": "AML-Verordnung"}, + "eu_blue_guide_2022": {"license": "EU_PUBLIC", "rule": 1, "source_type": "guideline", "name": "Blue Guide 2022"}, + # NIST (Public Domain — NOT laws, voluntary standards) + "nist_sp_800_53": {"license": "NIST_PUBLIC_DOMAIN", "rule": 1, "source_type": "standard", "name": "NIST SP 800-53"}, + "nist_sp800_53r5": {"license": "NIST_PUBLIC_DOMAIN", "rule": 1, "source_type": "standard", "name": "NIST SP 800-53 Rev.5"}, + "nist_sp_800_63b": {"license": "NIST_PUBLIC_DOMAIN", "rule": 1, "source_type": "standard", "name": "NIST SP 800-63B"}, + "nist_sp800_63_3": {"license": "NIST_PUBLIC_DOMAIN", "rule": 1, "source_type": "standard", "name": "NIST SP 800-63-3"}, + "nist_csf_2_0": {"license": "NIST_PUBLIC_DOMAIN", "rule": 1, "source_type": "standard", "name": "NIST CSF 2.0"}, + "nist_sp_800_218": {"license": "NIST_PUBLIC_DOMAIN", "rule": 1, "source_type": "standard", "name": "NIST SSDF"}, + "nist_sp800_207": {"license": "NIST_PUBLIC_DOMAIN", "rule": 1, "source_type": "standard", "name": "NIST SP 800-207 Zero Trust"}, + "nist_ai_rmf": {"license": "NIST_PUBLIC_DOMAIN", "rule": 1, "source_type": "standard", "name": "NIST AI Risk Management Framework"}, + "nistir_8259a": {"license": "NIST_PUBLIC_DOMAIN", "rule": 1, "source_type": "standard", "name": "NISTIR 8259A IoT Security"}, + "cisa_secure_by_design": {"license": "US_GOV_PUBLIC", "rule": 1, "source_type": "standard", "name": "CISA Secure by Design"}, # German Laws - "bdsg": {"license": "DE_LAW", "rule": 1, "name": "BDSG"}, - "bdsg_2018_komplett": {"license": "DE_LAW", "rule": 1, "name": "BDSG 2018"}, - "ttdsg": {"license": "DE_LAW", "rule": 1, "name": "TTDSG"}, - "tdddg_25": {"license": "DE_LAW", "rule": 1, "name": "TDDDG"}, - "tkg": {"license": "DE_LAW", "rule": 1, "name": "TKG"}, - "de_tkg": {"license": "DE_LAW", "rule": 1, "name": "TKG"}, - "bgb_komplett": {"license": "DE_LAW", "rule": 1, "name": "BGB"}, - "hgb": {"license": "DE_LAW", "rule": 1, "name": "HGB"}, - "hgb_komplett": {"license": "DE_LAW", "rule": 1, "name": "HGB"}, - "urhg_komplett": {"license": "DE_LAW", "rule": 1, "name": "UrhG"}, - "uwg": {"license": "DE_LAW", "rule": 1, "name": "UWG"}, - "tmg_komplett": {"license": "DE_LAW", "rule": 1, "name": "TMG"}, - "gewo": {"license": "DE_LAW", "rule": 1, "name": "GewO"}, - "ao": {"license": "DE_LAW", "rule": 1, "name": "Abgabenordnung"}, - "ao_komplett": {"license": "DE_LAW", "rule": 1, "name": "Abgabenordnung"}, - "battdg": {"license": "DE_LAW", "rule": 1, "name": "Batteriegesetz"}, + "bdsg": {"license": "DE_LAW", "rule": 1, "source_type": "law", "name": "BDSG"}, + "bdsg_2018_komplett": {"license": "DE_LAW", "rule": 1, "source_type": "law", "name": "BDSG 2018"}, + "ttdsg": {"license": "DE_LAW", "rule": 1, "source_type": "law", "name": "TTDSG"}, + "tdddg_25": {"license": "DE_LAW", "rule": 1, "source_type": "law", "name": "TDDDG"}, + "tkg": {"license": "DE_LAW", "rule": 1, "source_type": "law", "name": "TKG"}, + "de_tkg": {"license": "DE_LAW", "rule": 1, "source_type": "law", "name": "TKG"}, + "bgb_komplett": {"license": "DE_LAW", "rule": 1, "source_type": "law", "name": "BGB"}, + "hgb": {"license": "DE_LAW", "rule": 1, "source_type": "law", "name": "HGB"}, + "hgb_komplett": {"license": "DE_LAW", "rule": 1, "source_type": "law", "name": "HGB"}, + "urhg_komplett": {"license": "DE_LAW", "rule": 1, "source_type": "law", "name": "UrhG"}, + "uwg": {"license": "DE_LAW", "rule": 1, "source_type": "law", "name": "UWG"}, + "tmg_komplett": {"license": "DE_LAW", "rule": 1, "source_type": "law", "name": "TMG"}, + "gewo": {"license": "DE_LAW", "rule": 1, "source_type": "law", "name": "GewO"}, + "ao": {"license": "DE_LAW", "rule": 1, "source_type": "law", "name": "Abgabenordnung"}, + "ao_komplett": {"license": "DE_LAW", "rule": 1, "source_type": "law", "name": "Abgabenordnung"}, + "battdg": {"license": "DE_LAW", "rule": 1, "source_type": "law", "name": "Batteriegesetz"}, # Austrian Laws - "at_dsg": {"license": "AT_LAW", "rule": 1, "name": "AT DSG"}, - "at_abgb": {"license": "AT_LAW", "rule": 1, "name": "AT ABGB"}, - "at_abgb_agb": {"license": "AT_LAW", "rule": 1, "name": "AT ABGB AGB-Recht"}, - "at_bao": {"license": "AT_LAW", "rule": 1, "name": "AT BAO"}, - "at_bao_ret": {"license": "AT_LAW", "rule": 1, "name": "AT BAO Retention"}, - "at_ecg": {"license": "AT_LAW", "rule": 1, "name": "AT E-Commerce-Gesetz"}, - "at_kschg": {"license": "AT_LAW", "rule": 1, "name": "AT Konsumentenschutzgesetz"}, - "at_medieng": {"license": "AT_LAW", "rule": 1, "name": "AT Mediengesetz"}, - "at_tkg": {"license": "AT_LAW", "rule": 1, "name": "AT TKG"}, - "at_ugb": {"license": "AT_LAW", "rule": 1, "name": "AT UGB"}, - "at_ugb_ret": {"license": "AT_LAW", "rule": 1, "name": "AT UGB Retention"}, - "at_uwg": {"license": "AT_LAW", "rule": 1, "name": "AT UWG"}, + "at_dsg": {"license": "AT_LAW", "rule": 1, "source_type": "law", "name": "AT DSG"}, + "at_abgb": {"license": "AT_LAW", "rule": 1, "source_type": "law", "name": "AT ABGB"}, + "at_abgb_agb": {"license": "AT_LAW", "rule": 1, "source_type": "law", "name": "AT ABGB AGB-Recht"}, + "at_bao": {"license": "AT_LAW", "rule": 1, "source_type": "law", "name": "AT BAO"}, + "at_bao_ret": {"license": "AT_LAW", "rule": 1, "source_type": "law", "name": "AT BAO Retention"}, + "at_ecg": {"license": "AT_LAW", "rule": 1, "source_type": "law", "name": "AT E-Commerce-Gesetz"}, + "at_kschg": {"license": "AT_LAW", "rule": 1, "source_type": "law", "name": "AT Konsumentenschutzgesetz"}, + "at_medieng": {"license": "AT_LAW", "rule": 1, "source_type": "law", "name": "AT Mediengesetz"}, + "at_tkg": {"license": "AT_LAW", "rule": 1, "source_type": "law", "name": "AT TKG"}, + "at_ugb": {"license": "AT_LAW", "rule": 1, "source_type": "law", "name": "AT UGB"}, + "at_ugb_ret": {"license": "AT_LAW", "rule": 1, "source_type": "law", "name": "AT UGB Retention"}, + "at_uwg": {"license": "AT_LAW", "rule": 1, "source_type": "law", "name": "AT UWG"}, # Other EU Member State Laws - "fr_loi_informatique": {"license": "FR_LAW", "rule": 1, "name": "FR Loi Informatique"}, - "es_lopdgdd": {"license": "ES_LAW", "rule": 1, "name": "ES LOPDGDD"}, - "nl_uavg": {"license": "NL_LAW", "rule": 1, "name": "NL UAVG"}, - "it_codice_privacy": {"license": "IT_LAW", "rule": 1, "name": "IT Codice Privacy"}, - "hu_info_tv": {"license": "HU_LAW", "rule": 1, "name": "HU Információs törvény"}, - # EDPB Guidelines (EU Public Authority) - "edpb_01_2020": {"license": "EU_PUBLIC", "rule": 1, "name": "EDPB 01/2020 Ergaenzende Massnahmen"}, - "edpb_02_2023": {"license": "EU_PUBLIC", "rule": 1, "name": "EDPB 02/2023 Technischer Anwendungsbereich"}, - "edpb_05_2020": {"license": "EU_PUBLIC", "rule": 1, "name": "EDPB 05/2020 Einwilligung"}, - "edpb_09_2022": {"license": "EU_PUBLIC", "rule": 1, "name": "EDPB 09/2022 Datenschutzverletzungen"}, - "edpb_bcr_01_2022": {"license": "EU_PUBLIC", "rule": 1, "name": "EDPB BCR Leitlinien"}, - "edpb_breach_09_2022": {"license": "EU_PUBLIC", "rule": 1, "name": "EDPB Breach Notification"}, - "edpb_connected_vehicles_01_2020": {"license": "EU_PUBLIC", "rule": 1, "name": "EDPB Connected Vehicles"}, - "edpb_dpbd_04_2019": {"license": "EU_PUBLIC", "rule": 1, "name": "EDPB Data Protection by Design"}, - "edpb_eprivacy_02_2023": {"license": "EU_PUBLIC", "rule": 1, "name": "EDPB ePrivacy"}, - "edpb_facial_recognition_05_2022": {"license": "EU_PUBLIC", "rule": 1, "name": "EDPB Facial Recognition"}, - "edpb_fines_04_2022": {"license": "EU_PUBLIC", "rule": 1, "name": "EDPB Fines Calculation"}, - "edpb_legitimate_interest": {"license": "EU_PUBLIC", "rule": 1, "name": "EDPB Legitimate Interest"}, - "edpb_legitimate_interest_01_2024": {"license": "EU_PUBLIC","rule": 1, "name": "EDPB Legitimate Interest 2024"}, - "edpb_social_media_08_2020": {"license": "EU_PUBLIC", "rule": 1, "name": "EDPB Social Media"}, - "edpb_transfers_01_2020":{"license": "EU_PUBLIC", "rule": 1, "name": "EDPB Transfers 01/2020"}, - "edpb_transfers_07_2020":{"license": "EU_PUBLIC", "rule": 1, "name": "EDPB Transfers 07/2020"}, - "edpb_video_03_2019": {"license": "EU_PUBLIC", "rule": 1, "name": "EDPB Video Surveillance"}, - "edps_dpia_list": {"license": "EU_PUBLIC", "rule": 1, "name": "EDPS DPIA Liste"}, - "edpb_certification_01_2018": {"license": "EU_PUBLIC", "rule": 1, "name": "EDPB Certification 01/2018"}, - "edpb_certification_01_2019": {"license": "EU_PUBLIC", "rule": 1, "name": "EDPB Certification 01/2019"}, - "eaa": {"license": "EU_LAW", "rule": 1, "name": "European Accessibility Act"}, - # WP29 (pre-EDPB) Guidelines - "wp244_profiling": {"license": "EU_PUBLIC", "rule": 1, "name": "WP29 Profiling"}, - "wp251_profiling": {"license": "EU_PUBLIC", "rule": 1, "name": "WP29 Data Portability"}, - "wp260_transparency": {"license": "EU_PUBLIC", "rule": 1, "name": "WP29 Transparency"}, + "fr_loi_informatique": {"license": "FR_LAW", "rule": 1, "source_type": "law", "name": "FR Loi Informatique"}, + "es_lopdgdd": {"license": "ES_LAW", "rule": 1, "source_type": "law", "name": "ES LOPDGDD"}, + "nl_uavg": {"license": "NL_LAW", "rule": 1, "source_type": "law", "name": "NL UAVG"}, + "it_codice_privacy": {"license": "IT_LAW", "rule": 1, "source_type": "law", "name": "IT Codice Privacy"}, + "hu_info_tv": {"license": "HU_LAW", "rule": 1, "source_type": "law", "name": "HU Információs törvény"}, + # EDPB Guidelines (EU Public Authority — soft law, not binding legislation) + "edpb_01_2020": {"license": "EU_PUBLIC", "rule": 1, "source_type": "guideline", "name": "EDPB 01/2020 Ergaenzende Massnahmen"}, + "edpb_02_2023": {"license": "EU_PUBLIC", "rule": 1, "source_type": "guideline", "name": "EDPB 02/2023 Technischer Anwendungsbereich"}, + "edpb_05_2020": {"license": "EU_PUBLIC", "rule": 1, "source_type": "guideline", "name": "EDPB 05/2020 Einwilligung"}, + "edpb_09_2022": {"license": "EU_PUBLIC", "rule": 1, "source_type": "guideline", "name": "EDPB 09/2022 Datenschutzverletzungen"}, + "edpb_bcr_01_2022": {"license": "EU_PUBLIC", "rule": 1, "source_type": "guideline", "name": "EDPB BCR Leitlinien"}, + "edpb_breach_09_2022": {"license": "EU_PUBLIC", "rule": 1, "source_type": "guideline", "name": "EDPB Breach Notification"}, + "edpb_connected_vehicles_01_2020": {"license": "EU_PUBLIC", "rule": 1, "source_type": "guideline", "name": "EDPB Connected Vehicles"}, + "edpb_dpbd_04_2019": {"license": "EU_PUBLIC", "rule": 1, "source_type": "guideline", "name": "EDPB Data Protection by Design"}, + "edpb_eprivacy_02_2023": {"license": "EU_PUBLIC", "rule": 1, "source_type": "guideline", "name": "EDPB ePrivacy"}, + "edpb_facial_recognition_05_2022": {"license": "EU_PUBLIC", "rule": 1, "source_type": "guideline", "name": "EDPB Facial Recognition"}, + "edpb_fines_04_2022": {"license": "EU_PUBLIC", "rule": 1, "source_type": "guideline", "name": "EDPB Fines Calculation"}, + "edpb_legitimate_interest": {"license": "EU_PUBLIC", "rule": 1, "source_type": "guideline", "name": "EDPB Legitimate Interest"}, + "edpb_legitimate_interest_01_2024": {"license": "EU_PUBLIC","rule": 1, "source_type": "guideline", "name": "EDPB Legitimate Interest 2024"}, + "edpb_social_media_08_2020": {"license": "EU_PUBLIC", "rule": 1, "source_type": "guideline", "name": "EDPB Social Media"}, + "edpb_transfers_01_2020":{"license": "EU_PUBLIC", "rule": 1, "source_type": "guideline", "name": "EDPB Transfers 01/2020"}, + "edpb_transfers_07_2020":{"license": "EU_PUBLIC", "rule": 1, "source_type": "guideline", "name": "EDPB Transfers 07/2020"}, + "edpb_video_03_2019": {"license": "EU_PUBLIC", "rule": 1, "source_type": "guideline", "name": "EDPB Video Surveillance"}, + "edps_dpia_list": {"license": "EU_PUBLIC", "rule": 1, "source_type": "guideline", "name": "EDPS DPIA Liste"}, + "edpb_certification_01_2018": {"license": "EU_PUBLIC", "rule": 1, "source_type": "guideline", "name": "EDPB Certification 01/2018"}, + "edpb_certification_01_2019": {"license": "EU_PUBLIC", "rule": 1, "source_type": "guideline", "name": "EDPB Certification 01/2019"}, + "eaa": {"license": "EU_LAW", "rule": 1, "source_type": "law", "name": "European Accessibility Act"}, + # WP29 (pre-EDPB) Guidelines — soft law + "wp244_profiling": {"license": "EU_PUBLIC", "rule": 1, "source_type": "guideline", "name": "WP29 Profiling"}, + "wp251_profiling": {"license": "EU_PUBLIC", "rule": 1, "source_type": "guideline", "name": "WP29 Data Portability"}, + "wp260_transparency": {"license": "EU_PUBLIC", "rule": 1, "source_type": "guideline", "name": "WP29 Transparency"}, - # RULE 2: CITATION REQUIRED — CC-BY, CC-BY-SA - "owasp_asvs": {"license": "CC-BY-SA-4.0", "rule": 2, "name": "OWASP ASVS", + # RULE 2: CITATION REQUIRED — CC-BY, CC-BY-SA (voluntary standards) + "owasp_asvs": {"license": "CC-BY-SA-4.0", "rule": 2, "source_type": "standard", "name": "OWASP ASVS", "attribution": "OWASP Foundation, CC BY-SA 4.0"}, - "owasp_masvs": {"license": "CC-BY-SA-4.0", "rule": 2, "name": "OWASP MASVS", + "owasp_masvs": {"license": "CC-BY-SA-4.0", "rule": 2, "source_type": "standard", "name": "OWASP MASVS", "attribution": "OWASP Foundation, CC BY-SA 4.0"}, - "owasp_top10": {"license": "CC-BY-SA-4.0", "rule": 2, "name": "OWASP Top 10", + "owasp_top10": {"license": "CC-BY-SA-4.0", "rule": 2, "source_type": "standard", "name": "OWASP Top 10", "attribution": "OWASP Foundation, CC BY-SA 4.0"}, - "owasp_top10_2021": {"license": "CC-BY-SA-4.0", "rule": 2, "name": "OWASP Top 10 2021", + "owasp_top10_2021": {"license": "CC-BY-SA-4.0", "rule": 2, "source_type": "standard", "name": "OWASP Top 10 2021", "attribution": "OWASP Foundation, CC BY-SA 4.0"}, - "owasp_api_top10_2023": {"license": "CC-BY-SA-4.0", "rule": 2, "name": "OWASP API Top 10 2023", + "owasp_api_top10_2023": {"license": "CC-BY-SA-4.0", "rule": 2, "source_type": "standard", "name": "OWASP API Top 10 2023", "attribution": "OWASP Foundation, CC BY-SA 4.0"}, - "owasp_samm": {"license": "CC-BY-SA-4.0", "rule": 2, "name": "OWASP SAMM", + "owasp_samm": {"license": "CC-BY-SA-4.0", "rule": 2, "source_type": "standard", "name": "OWASP SAMM", "attribution": "OWASP Foundation, CC BY-SA 4.0"}, - "oecd_ai_principles": {"license": "OECD_PUBLIC", "rule": 2, "name": "OECD AI Principles", + "oecd_ai_principles": {"license": "OECD_PUBLIC", "rule": 2, "source_type": "standard", "name": "OECD AI Principles", "attribution": "OECD"}, # RULE 3: RESTRICTED — Full reformulation required @@ -197,28 +200,32 @@ _RULE2_PREFIXES = ["enisa_"] def _classify_regulation(regulation_code: str) -> dict: - """Determine license rule for a regulation_code.""" + """Determine license rule for a regulation_code. + + Returns dict with keys: license, rule, name, source_type. + source_type is one of: law, guideline, standard, restricted. + """ code = regulation_code.lower().strip() # Exact match first if code in REGULATION_LICENSE_MAP: return REGULATION_LICENSE_MAP[code] - # Prefix match for Rule 2 + # Prefix match for Rule 2 (ENISA = standard) for prefix in _RULE2_PREFIXES: if code.startswith(prefix): - return {"license": "CC-BY-4.0", "rule": 2, "name": "ENISA", - "attribution": "ENISA, CC BY 4.0"} + return {"license": "CC-BY-4.0", "rule": 2, "source_type": "standard", + "name": "ENISA", "attribution": "ENISA, CC BY 4.0"} - # Prefix match for Rule 3 + # Prefix match for Rule 3 (BSI/ISO/ETSI = restricted) for prefix in _RULE3_PREFIXES: if code.startswith(prefix): return {"license": f"{prefix.rstrip('_').upper()}_RESTRICTED", "rule": 3, - "name": "INTERNAL_ONLY"} + "source_type": "restricted", "name": "INTERNAL_ONLY"} # Unknown → treat as restricted (safe default) logger.warning("Unknown regulation_code %r — defaulting to Rule 3 (restricted)", code) - return {"license": "UNKNOWN", "rule": 3, "name": "INTERNAL_ONLY"} + return {"license": "UNKNOWN", "rule": 3, "source_type": "restricted", "name": "INTERNAL_ONLY"} # --------------------------------------------------------------------------- @@ -476,6 +483,10 @@ class GeneratedControl: verification_method: Optional[str] = None # code_review, document, tool, hybrid category: Optional[str] = None # one of 22 categories target_audience: Optional[list] = None # e.g. ["unternehmen", "behoerden", "entwickler"] + # Scoped Control Applicability (v3) + applicable_industries: Optional[list] = None # e.g. ["all"] or ["Telekommunikation", "Energie"] + applicable_company_size: Optional[list] = None # e.g. ["all"] or ["medium", "large", "enterprise"] + scope_conditions: Optional[dict] = None # e.g. {"requires_any": ["uses_ai"], "description": "..."} @dataclass @@ -769,6 +780,38 @@ STRUCTURE_SYSTEM_PROMPT = """Du bist ein Security-Compliance-Experte. Strukturie als praxisorientiertes Security Control. Erstelle eine verständliche, umsetzbare Formulierung. Antworte NUR mit validem JSON. Bei mehreren Controls antworte mit einem JSON-Array.""" +# Shared applicability prompt block — appended to all generation prompts (v3) +APPLICABILITY_PROMPT = """- applicable_industries: Liste der Branchen fuer die dieses Control relevant ist. + Verwende ["all"] wenn der Control branchenuebergreifend gilt. + Moegliche Werte: "all", "Technologie / IT", "IT Dienstleistungen", "E-Commerce / Handel", + "Finanzdienstleistungen", "Versicherungen", "Gesundheitswesen", "Pharma", "Bildung", + "Beratung / Consulting", "Marketing / Agentur", "Produktion / Industrie", + "Logistik / Transport", "Immobilien", "Bau", "Energie", "Automobil", + "Luft- / Raumfahrt", "Maschinenbau", "Anlagenbau", "Automatisierung", "Robotik", + "Messtechnik", "Agrar", "Chemie", "Minen / Bergbau", "Telekommunikation", + "Medien / Verlage", "Gastronomie / Hotellerie", "Recht / Kanzlei", + "Oeffentlicher Dienst", "Verteidigung / Ruestung", "Wasser- / Abwasserwirtschaft", + "Lebensmittel", "Digitale Infrastruktur", "Weltraum", "Post / Kurierdienste", + "Abfallwirtschaft", "Forschung" + Beispiel: TKG-Controls → ["Telekommunikation"] + Beispiel: DSGVO Art. 32 → ["all"] + Beispiel: NIS2 Art. 21 → ["Energie", "Gesundheitswesen", "Digitale Infrastruktur", "Logistik / Transport", ...] +- applicable_company_size: Ab welcher Unternehmensgroesse gilt dieses Control? + Verwende ["all"] wenn keine Groessenbeschraenkung. + Moegliche Werte: "all", "micro", "small", "medium", "large", "enterprise" + Groessen: micro (<10 MA), small (10-49), medium (50-249), large (250-999), enterprise (1000+) + Beispiel: NIS2 Art. 21 → ["medium", "large", "enterprise"] + Beispiel: DSGVO Art. 5 → ["all"] +- scope_conditions: Optionale Bedingungen aus dem Compliance-Scope des Unternehmens. + null wenn keine besonderen Bedingungen. Sonst JSON-Objekt: + {"requires_any": ["signal1", "signal2"], "description": "Kurze Erklaerung wann relevant"} + Moegliche Signale: "uses_ai", "third_country_transfer", "processes_health_data", + "processes_minors_data", "automated_decisions", "employee_monitoring", + "video_surveillance", "financial_data", "is_kritis_operator", "payment_services" + Beispiel AI Act: {"requires_any": ["uses_ai"], "description": "Nur bei KI-Einsatz relevant"} + Beispiel SCC: {"requires_any": ["third_country_transfer"], "description": "Nur bei Drittlandtransfer"} + Beispiel DSGVO Art. 32 (allgemein): null""" + class ControlGeneratorPipeline: """Orchestrates the 7-stage control generation pipeline.""" @@ -973,6 +1016,7 @@ Gib JSON zurück mit diesen Feldern: - target_audience: Liste der Zielgruppen (z.B. "unternehmen", "behoerden", "entwickler", "datenschutzbeauftragte", "geschaeftsfuehrung", "it-abteilung", "rechtsabteilung", "compliance-officer", "personalwesen", "einkauf", "produktion", "gesundheitswesen", "finanzwesen", "oeffentlicher_dienst") - source_article: Artikel-/Paragraphen-Referenz aus dem Text (z.B. "Artikel 10", "§ 42"). Leer lassen wenn nicht erkennbar. - source_paragraph: Absatz-Referenz aus dem Text (z.B. "Absatz 5", "Nr. 2"). Leer lassen wenn nicht erkennbar. +{APPLICABILITY_PROMPT} Text: {chunk.text[:2000]} Quelle: {chunk.regulation_name} ({chunk.regulation_code}), {chunk.article}""" @@ -995,6 +1039,7 @@ Quelle: {chunk.regulation_name} ({chunk.regulation_code}), {chunk.article}""" "article": effective_article, "paragraph": effective_paragraph, "license": license_info.get("license", ""), + "source_type": license_info.get("source_type", "law"), "url": chunk.source_url or "", } control.customer_visible = True @@ -1036,6 +1081,7 @@ Gib JSON zurück mit diesen Feldern: - target_audience: Liste der Zielgruppen (z.B. "unternehmen", "behoerden", "entwickler", "datenschutzbeauftragte", "geschaeftsfuehrung", "it-abteilung", "rechtsabteilung", "compliance-officer", "personalwesen", "einkauf", "produktion", "gesundheitswesen", "finanzwesen", "oeffentlicher_dienst") - source_article: Artikel-/Paragraphen-Referenz aus dem Text (z.B. "Artikel 10", "§ 42"). Leer lassen wenn nicht erkennbar. - source_paragraph: Absatz-Referenz aus dem Text (z.B. "Absatz 5", "Nr. 2"). Leer lassen wenn nicht erkennbar. +{APPLICABILITY_PROMPT} Text: {chunk.text[:2000]} Quelle: {chunk.regulation_name}, {chunk.article}""" @@ -1059,6 +1105,7 @@ Quelle: {chunk.regulation_name}, {chunk.article}""" "paragraph": effective_paragraph, "license": license_info.get("license", ""), "license_notice": attribution, + "source_type": license_info.get("source_type", "standard"), "url": chunk.source_url or "", } control.customer_visible = True @@ -1101,7 +1148,8 @@ Gib JSON zurück mit diesen Feldern: - tags: Liste von Tags (eigene Begriffe) - domain: Fachgebiet als Kuerzel (AUTH=Authentifizierung, CRYP=Kryptographie, NET=Netzwerk, DATA=Datenschutz, LOG=Logging, ACC=Zugriffskontrolle, SEC=IT-Sicherheit, INC=Vorfallmanagement, AI=KI, COMP=Compliance, GOV=Behoerden/Verwaltung, LAB=Arbeitsrecht, FIN=Finanzregulierung, TRD=Gewerbe/Handelsrecht, ENV=Umwelt, HLT=Gesundheit) - category: Inhaltliche Kategorie — MUSS zum domain passen. Moegliche Werte: {CATEGORY_LIST_STR} -- target_audience: Liste der Zielgruppen (z.B. "unternehmen", "behoerden", "entwickler", "datenschutzbeauftragte", "geschaeftsfuehrung", "it-abteilung", "rechtsabteilung", "compliance-officer", "personalwesen", "oeffentlicher_dienst")""" +- target_audience: Liste der Zielgruppen (z.B. "unternehmen", "behoerden", "entwickler", "datenschutzbeauftragte", "geschaeftsfuehrung", "it-abteilung", "rechtsabteilung", "compliance-officer", "personalwesen", "oeffentlicher_dienst") +{APPLICABILITY_PROMPT}""" raw = await _llm_chat(prompt, REFORM_SYSTEM_PROMPT) data = _parse_llm_json(raw) @@ -1186,6 +1234,7 @@ Gib ein JSON-Array zurueck mit GENAU {len(chunks)} Elementen. Fuer Chunks ohne A - target_audience: Liste der Zielgruppen fuer die dieses Control relevant ist. Moegliche Werte: "unternehmen", "behoerden", "entwickler", "datenschutzbeauftragte", "geschaeftsfuehrung", "it-abteilung", "rechtsabteilung", "compliance-officer", "personalwesen", "einkauf", "produktion", "vertrieb", "gesundheitswesen", "finanzwesen", "oeffentlicher_dienst" - source_article: Artikel-/Paragraphen-Referenz aus dem Text extrahieren (z.B. "Artikel 10", "Art. 5", "§ 42", "Section 3"). Leer lassen wenn nicht erkennbar. - source_paragraph: Absatz-Referenz aus dem Text extrahieren (z.B. "Absatz 5", "Abs. 3", "Nr. 2", "(1)"). Leer lassen wenn nicht erkennbar. +{APPLICABILITY_PROMPT} {joined}""" @@ -1228,6 +1277,7 @@ Gib ein JSON-Array zurueck mit GENAU {len(chunks)} Elementen. Fuer Chunks ohne A "paragraph": effective_paragraph, "license": lic.get("license", ""), "license_notice": lic.get("attribution", ""), + "source_type": lic.get("source_type", "law"), "url": chunk.source_url or "", } control.customer_visible = True @@ -1289,6 +1339,7 @@ Gib ein JSON-Array zurueck mit GENAU {len(chunks)} Elementen. Fuer Aspekte ohne - domain: Fachgebiet als Kuerzel (AUTH=Authentifizierung, CRYP=Kryptographie, NET=Netzwerk, DATA=Datenschutz, LOG=Logging, ACC=Zugriffskontrolle, SEC=IT-Sicherheit, INC=Vorfallmanagement, AI=KI, COMP=Compliance, GOV=Behoerden/Verwaltung, LAB=Arbeitsrecht, FIN=Finanzregulierung, TRD=Gewerbe/Handelsrecht, ENV=Umwelt, HLT=Gesundheit) - category: Inhaltliche Kategorie — MUSS zum domain passen. Moegliche Werte: {CATEGORY_LIST_STR} - target_audience: Liste der Zielgruppen (z.B. "unternehmen", "behoerden", "entwickler", "datenschutzbeauftragte", "geschaeftsfuehrung", "it-abteilung", "rechtsabteilung", "compliance-officer", "personalwesen", "einkauf", "produktion", "gesundheitswesen", "finanzwesen", "oeffentlicher_dienst") +{APPLICABILITY_PROMPT} {joined}""" @@ -1522,6 +1573,29 @@ Gib ein JSON-Array zurueck mit GENAU {len(chunks)} Elementen. Fuer Aspekte ohne if not isinstance(target_audience, list): target_audience = None + # Parse applicability fields (v3) + applicable_industries = data.get("applicable_industries") + if isinstance(applicable_industries, str): + applicable_industries = [applicable_industries] + if not isinstance(applicable_industries, list): + applicable_industries = None + + applicable_company_size = data.get("applicable_company_size") + if isinstance(applicable_company_size, str): + applicable_company_size = [applicable_company_size] + if not isinstance(applicable_company_size, list): + applicable_company_size = None + # Validate size values + valid_sizes = {"all", "micro", "small", "medium", "large", "enterprise"} + if applicable_company_size: + applicable_company_size = [s for s in applicable_company_size if s in valid_sizes] + if not applicable_company_size: + applicable_company_size = None + + scope_conditions = data.get("scope_conditions") + if not isinstance(scope_conditions, dict): + scope_conditions = None + control = GeneratedControl( title=str(data.get("title", "Untitled Control"))[:255], objective=str(data.get("objective", "")), @@ -1536,6 +1610,9 @@ Gib ein JSON-Array zurueck mit GENAU {len(chunks)} Elementen. Fuer Aspekte ohne tags=tags[:20], target_audience=target_audience, category=category, + applicable_industries=applicable_industries, + applicable_company_size=applicable_company_size, + scope_conditions=scope_conditions, ) # Store effective domain for later control_id generation control.generation_metadata["_effective_domain"] = domain @@ -1738,7 +1815,8 @@ Kategorien: {CATEGORY_LIST_STR}""" license_rule, source_original_text, source_citation, customer_visible, generation_metadata, verification_method, category, generation_strategy, - target_audience, pipeline_version + target_audience, pipeline_version, + applicable_industries, applicable_company_size, scope_conditions ) VALUES ( :framework_id, :control_id, :title, :objective, :rationale, :scope, :requirements, :test_procedure, :evidence, @@ -1747,7 +1825,8 @@ Kategorien: {CATEGORY_LIST_STR}""" :license_rule, :source_original_text, :source_citation, :customer_visible, :generation_metadata, :verification_method, :category, :generation_strategy, - :target_audience, :pipeline_version + :target_audience, :pipeline_version, + :applicable_industries, :applicable_company_size, :scope_conditions ) ON CONFLICT (framework_id, control_id) DO NOTHING RETURNING id @@ -1778,6 +1857,9 @@ Kategorien: {CATEGORY_LIST_STR}""" "generation_strategy": control.generation_strategy, "target_audience": json.dumps(control.target_audience) if control.target_audience else None, "pipeline_version": PIPELINE_VERSION, + "applicable_industries": json.dumps(control.applicable_industries) if control.applicable_industries else None, + "applicable_company_size": json.dumps(control.applicable_company_size) if control.applicable_company_size else None, + "scope_conditions": json.dumps(control.scope_conditions) if control.scope_conditions else None, }, ) self.db.commit() diff --git a/backend-compliance/migrations/063_control_applicability.sql b/backend-compliance/migrations/063_control_applicability.sql new file mode 100644 index 0000000..9dc3aa7 --- /dev/null +++ b/backend-compliance/migrations/063_control_applicability.sql @@ -0,0 +1,23 @@ +-- Migration 063: Scoped Control Applicability +-- +-- Adds 3 new JSONB columns to canonical_controls for filtering controls +-- based on customer industry, company size, and compliance scope. +-- +-- v3 pipeline generates these fields automatically via LLM. +-- Old controls (v1/v2) will be backfilled separately. + +ALTER TABLE canonical_controls + ADD COLUMN IF NOT EXISTS applicable_industries JSONB DEFAULT NULL, + ADD COLUMN IF NOT EXISTS applicable_company_size JSONB DEFAULT NULL, + ADD COLUMN IF NOT EXISTS scope_conditions JSONB DEFAULT NULL; + +-- GIN index for JSONB containment queries (e.g. applicable_industries @> '"Telekommunikation"') +CREATE INDEX IF NOT EXISTS idx_cc_applicable_industries + ON canonical_controls USING gin (applicable_industries); + +CREATE INDEX IF NOT EXISTS idx_cc_applicable_company_size + ON canonical_controls USING gin (applicable_company_size); + +COMMENT ON COLUMN canonical_controls.applicable_industries IS 'Industries this control applies to, e.g. ["all"] or ["Telekommunikation", "Energie"]. NULL = not yet classified.'; +COMMENT ON COLUMN canonical_controls.applicable_company_size IS 'Company sizes this control applies to, e.g. ["all"] or ["medium", "large", "enterprise"]. NULL = not yet classified.'; +COMMENT ON COLUMN canonical_controls.scope_conditions IS 'Optional scope conditions, e.g. {"requires_any": ["uses_ai"], "description": "..."}. NULL = no conditions.'; diff --git a/backend-compliance/tests/test_control_generator.py b/backend-compliance/tests/test_control_generator.py index fc812f0..d55eb90 100644 --- a/backend-compliance/tests/test_control_generator.py +++ b/backend-compliance/tests/test_control_generator.py @@ -31,53 +31,69 @@ class TestLicenseMapping: info = _classify_regulation("eu_2016_679") assert info["rule"] == 1 assert info["name"] == "DSGVO" + assert info["source_type"] == "law" def test_rule1_nist(self): info = _classify_regulation("nist_sp_800_53") assert info["rule"] == 1 assert "NIST" in info["name"] + assert info["source_type"] == "standard" def test_rule1_german_law(self): info = _classify_regulation("bdsg") assert info["rule"] == 1 assert info["name"] == "BDSG" + assert info["source_type"] == "law" def test_rule2_owasp(self): info = _classify_regulation("owasp_asvs") assert info["rule"] == 2 assert "OWASP" in info["name"] assert "attribution" in info + assert info["source_type"] == "standard" def test_rule2_enisa_prefix(self): info = _classify_regulation("enisa_iot_security") assert info["rule"] == 2 assert "ENISA" in info["name"] + assert info["source_type"] == "standard" def test_rule3_bsi_prefix(self): info = _classify_regulation("bsi_tr03161") assert info["rule"] == 3 assert info["name"] == "INTERNAL_ONLY" + assert info["source_type"] == "restricted" def test_rule3_iso_prefix(self): info = _classify_regulation("iso_27001") assert info["rule"] == 3 + assert info["source_type"] == "restricted" def test_rule3_etsi_prefix(self): info = _classify_regulation("etsi_en_303_645") assert info["rule"] == 3 + assert info["source_type"] == "restricted" def test_unknown_defaults_to_rule3(self): info = _classify_regulation("some_unknown_source") assert info["rule"] == 3 + assert info["source_type"] == "restricted" def test_case_insensitive(self): info = _classify_regulation("EU_2016_679") assert info["rule"] == 1 + assert info["source_type"] == "law" def test_all_mapped_regulations_have_valid_rules(self): for code, info in REGULATION_LICENSE_MAP.items(): assert info["rule"] in (1, 2, 3), f"{code} has invalid rule {info['rule']}" + def test_all_mapped_regulations_have_source_type(self): + valid_types = {"law", "guideline", "standard", "restricted"} + for code, info in REGULATION_LICENSE_MAP.items(): + assert "source_type" in info, f"{code} missing source_type" + assert info["source_type"] in valid_types, f"{code} has invalid source_type {info['source_type']}" + def test_rule3_never_exposes_names(self): for prefix in ["bsi_test", "iso_test", "etsi_test"]: info = _classify_regulation(prefix) @@ -1125,8 +1141,8 @@ class TestRegulationFilter: class TestPipelineVersion: """Tests for pipeline_version propagation in DB writes and null handling.""" - def test_pipeline_version_constant_is_2(self): - assert PIPELINE_VERSION == 2 + def test_pipeline_version_constant_is_3(self): + assert PIPELINE_VERSION == 3 def test_store_control_includes_pipeline_version(self): """_store_control must pass pipeline_version=PIPELINE_VERSION to the INSERT.""" @@ -1396,3 +1412,259 @@ class TestRecitalDetection: assert result is not None assert "126" in result["recital_numbers"] assert "127" in result["recital_numbers"] + + +# ============================================================================= +# Source Type Classification Tests +# ============================================================================= + +class TestSourceTypeClassification: + """Tests that source_type correctly distinguishes law vs guideline vs standard vs restricted.""" + + def test_eu_regulations_are_law(self): + """All EU regulations (Verordnungen/Richtlinien) must be classified as 'law'.""" + eu_laws = ["eu_2016_679", "eu_2024_1689", "eu_2022_2555", "eu_2024_2847", + "eucsa", "dataact", "dora", "eaa"] + for code in eu_laws: + info = _classify_regulation(code) + assert info["source_type"] == "law", f"{code} should be law, got {info['source_type']}" + + def test_german_laws_are_law(self): + """German national laws must be classified as 'law'.""" + de_laws = ["bdsg", "ttdsg", "tkg", "bgb_komplett", "hgb", "gewo"] + for code in de_laws: + info = _classify_regulation(code) + assert info["source_type"] == "law", f"{code} should be law, got {info['source_type']}" + + def test_austrian_laws_are_law(self): + """Austrian laws must be classified as 'law'.""" + at_laws = ["at_dsg", "at_abgb", "at_ecg", "at_tkg"] + for code in at_laws: + info = _classify_regulation(code) + assert info["source_type"] == "law", f"{code} should be law, got {info['source_type']}" + + def test_nist_is_standard_not_law(self): + """NIST frameworks are US standards, NOT EU law — must be 'standard'.""" + nist_codes = ["nist_sp_800_53", "nist_csf_2_0", "nist_ai_rmf", "nistir_8259a"] + for code in nist_codes: + info = _classify_regulation(code) + assert info["source_type"] == "standard", f"{code} should be standard, got {info['source_type']}" + + def test_cisa_is_standard(self): + info = _classify_regulation("cisa_secure_by_design") + assert info["source_type"] == "standard" + + def test_owasp_is_standard(self): + """OWASP frameworks are voluntary standards, not law.""" + owasp_codes = ["owasp_asvs", "owasp_top10", "owasp_samm"] + for code in owasp_codes: + info = _classify_regulation(code) + assert info["source_type"] == "standard", f"{code} should be standard, got {info['source_type']}" + + def test_enisa_prefix_is_standard(self): + info = _classify_regulation("enisa_threat_landscape") + assert info["source_type"] == "standard" + + def test_oecd_is_standard(self): + info = _classify_regulation("oecd_ai_principles") + assert info["source_type"] == "standard" + + def test_edpb_is_guideline(self): + """EDPB guidelines are authoritative but non-binding soft law.""" + edpb_codes = ["edpb_01_2020", "edpb_dpbd_04_2019", "edpb_legitimate_interest"] + for code in edpb_codes: + info = _classify_regulation(code) + assert info["source_type"] == "guideline", f"{code} should be guideline, got {info['source_type']}" + + def test_wp29_is_guideline(self): + """WP29 (pre-EDPB) guidelines are soft law.""" + for code in ["wp244_profiling", "wp260_transparency"]: + info = _classify_regulation(code) + assert info["source_type"] == "guideline", f"{code} should be guideline, got {info['source_type']}" + + def test_blue_guide_is_guideline(self): + info = _classify_regulation("eu_blue_guide_2022") + assert info["source_type"] == "guideline" + + def test_bsi_is_restricted(self): + info = _classify_regulation("bsi_grundschutz") + assert info["source_type"] == "restricted" + + def test_iso_is_restricted(self): + info = _classify_regulation("iso_27001") + assert info["source_type"] == "restricted" + + def test_etsi_is_restricted(self): + info = _classify_regulation("etsi_en_303_645") + assert info["source_type"] == "restricted" + + def test_unknown_is_restricted(self): + info = _classify_regulation("totally_unknown") + assert info["source_type"] == "restricted" + + def test_source_type_and_license_rule_are_independent(self): + """source_type classifies legal authority; license_rule classifies copyright. + NIST is Rule 1 (public domain, free use) but source_type='standard' (not a law).""" + nist = _classify_regulation("nist_sp_800_53") + assert nist["rule"] == 1 # free use (copyright) + assert nist["source_type"] == "standard" # NOT law (legal authority) + + edpb = _classify_regulation("edpb_01_2020") + assert edpb["rule"] == 1 # free use (public authority) + assert edpb["source_type"] == "guideline" # NOT law (soft law) + + +# ============================================================================= +# Scoped Control Applicability Tests (v3 Pipeline) +# ============================================================================= + +class TestApplicabilityFields: + """Tests for applicable_industries, applicable_company_size, scope_conditions parsing.""" + + def _make_pipeline(self): + """Create a pipeline with mocked DB.""" + db = MagicMock() + pipeline = ControlGeneratorPipeline(db=db, rag_client=MagicMock()) + return pipeline + + def test_all_industries_parsed(self): + pipeline = self._make_pipeline() + data = { + "title": "Test", + "objective": "Test objective", + "applicable_industries": ["all"], + "applicable_company_size": ["all"], + "scope_conditions": None, + } + control = pipeline._build_control_from_json(data, "SEC") + assert control.applicable_industries == ["all"] + assert control.applicable_company_size == ["all"] + assert control.scope_conditions is None + + def test_specific_industries_parsed(self): + pipeline = self._make_pipeline() + data = { + "title": "TKG Control", + "objective": "Telekommunikation", + "applicable_industries": ["Telekommunikation", "Energie"], + "applicable_company_size": ["medium", "large", "enterprise"], + "scope_conditions": None, + } + control = pipeline._build_control_from_json(data, "INC") + assert control.applicable_industries == ["Telekommunikation", "Energie"] + assert control.applicable_company_size == ["medium", "large", "enterprise"] + + def test_scope_conditions_parsed(self): + pipeline = self._make_pipeline() + data = { + "title": "AI Act Control", + "objective": "KI-Risikomanagement", + "applicable_industries": ["all"], + "applicable_company_size": ["all"], + "scope_conditions": { + "requires_any": ["uses_ai"], + "description": "Nur bei KI-Einsatz relevant", + }, + } + control = pipeline._build_control_from_json(data, "AI") + assert control.scope_conditions is not None + assert control.scope_conditions["requires_any"] == ["uses_ai"] + assert "KI" in control.scope_conditions["description"] + + def test_missing_applicability_fields_are_none(self): + """Old-style LLM response without applicability fields.""" + pipeline = self._make_pipeline() + data = { + "title": "Legacy Control", + "objective": "Test", + } + control = pipeline._build_control_from_json(data, "SEC") + assert control.applicable_industries is None + assert control.applicable_company_size is None + assert control.scope_conditions is None + + def test_string_industry_converted_to_list(self): + """LLM sometimes returns a string instead of list.""" + pipeline = self._make_pipeline() + data = { + "title": "Test", + "objective": "Test", + "applicable_industries": "Telekommunikation", + "applicable_company_size": "all", + } + control = pipeline._build_control_from_json(data, "SEC") + assert control.applicable_industries == ["Telekommunikation"] + assert control.applicable_company_size == ["all"] + + def test_invalid_company_size_filtered(self): + """Invalid size values should be filtered out.""" + pipeline = self._make_pipeline() + data = { + "title": "Test", + "objective": "Test", + "applicable_company_size": ["medium", "huge", "large"], + } + control = pipeline._build_control_from_json(data, "SEC") + assert control.applicable_company_size == ["medium", "large"] + + def test_all_invalid_sizes_results_in_none(self): + pipeline = self._make_pipeline() + data = { + "title": "Test", + "objective": "Test", + "applicable_company_size": ["huge", "tiny"], + } + control = pipeline._build_control_from_json(data, "SEC") + assert control.applicable_company_size is None + + def test_scope_conditions_non_dict_ignored(self): + """If LLM returns a string for scope_conditions, ignore it.""" + pipeline = self._make_pipeline() + data = { + "title": "Test", + "objective": "Test", + "scope_conditions": "uses_ai", + } + control = pipeline._build_control_from_json(data, "SEC") + assert control.scope_conditions is None + + def test_multiple_scope_signals(self): + pipeline = self._make_pipeline() + data = { + "title": "EHDS Control", + "objective": "Gesundheitsdaten", + "applicable_industries": ["Gesundheitswesen", "Pharma"], + "applicable_company_size": ["all"], + "scope_conditions": { + "requires_any": ["processes_health_data", "uses_ai"], + "description": "Gesundheitsdaten mit KI-Verarbeitung", + }, + } + control = pipeline._build_control_from_json(data, "HLT") + assert len(control.scope_conditions["requires_any"]) == 2 + assert "processes_health_data" in control.scope_conditions["requires_any"] + + def test_pipeline_version_is_3(self): + """v3 pipeline includes applicability fields.""" + assert PIPELINE_VERSION == 3 + + def test_generated_control_dataclass_has_fields(self): + """Verify the dataclass has the new fields with correct defaults.""" + ctrl = GeneratedControl() + assert ctrl.applicable_industries is None + assert ctrl.applicable_company_size is None + assert ctrl.scope_conditions is None + + def test_applicability_in_generation_metadata_not_leaked(self): + """Applicability fields should be top-level, not in generation_metadata.""" + pipeline = self._make_pipeline() + data = { + "title": "Test", + "objective": "Test", + "applicable_industries": ["all"], + "applicable_company_size": ["all"], + "scope_conditions": None, + } + control = pipeline._build_control_from_json(data, "SEC") + assert "applicable_industries" not in control.generation_metadata + assert "applicable_company_size" not in control.generation_metadata diff --git a/docs-src/development/testing.md b/docs-src/development/testing.md index ea7dc6b..a80d537 100644 --- a/docs-src/development/testing.md +++ b/docs-src/development/testing.md @@ -214,13 +214,13 @@ Wenn du z.B. eine neue `GetUserStats()` Funktion im Go Service hinzufuegst: ## Modul-spezifische Tests -### Canonical Control Generator (81+ Tests) +### Canonical Control Generator (98+ Tests) Die Control Library hat eine umfangreiche Test-Suite ueber 6 Dateien. Siehe [Canonical Control Library — Tests](../services/sdk-modules/canonical-control-library.md#tests) und [Control Generator Pipeline](../services/sdk-modules/control-generator-pipeline.md) fuer Details. ```bash -# Alle Generator-Tests (81 Tests in 12 Klassen) +# Alle Generator-Tests (98 Tests in 13 Klassen) cd backend-compliance && pytest -v tests/test_control_generator.py # Similarity Detector Tests @@ -242,7 +242,7 @@ cd backend-compliance && pytest -v tests/test_validate_controls.py | Klasse | Tests | Prueft | |--------|-------|--------| -| `TestLicenseMapping` | 12 | Lizenz-Klassifikation (Rule 1/2/3), Case-Insensitivitaet | +| `TestLicenseMapping` | 13 | Lizenz-Klassifikation (Rule 1/2/3), Case-Insensitivitaet, source_type | | `TestDomainDetection` | 5 | Keyword-basierte Domain-Erkennung (AUTH, CRYP, NET, DATA) | | `TestJsonParsing` | 4 | JSON-Parser fuer LLM-Responses (Markdown-Fencing, Preamble) | | `TestGeneratedControlRules` | 3 | Rule-spezifische Felder (original_text, citation, source_info) | @@ -254,3 +254,4 @@ cd backend-compliance && pytest -v tests/test_validate_controls.py | `TestRegulationFilter` | 5 | regulation_filter Prefix-Matching, leere regulation_codes | | `TestPipelineVersion` | 5 | pipeline_version=2 in DB-Writes, null-Handling in Structure/Reform | | `TestRecitalDetection` | 10 | Erwaegungsgrund-Erkennung in Quelltexten (Regex, Phrasen, Kombiniert) | +| `TestSourceTypeClassification` | 16 | law/guideline/standard/restricted Klassifizierung aller Quellentypen | diff --git a/docs-src/services/sdk-modules/canonical-control-library.md b/docs-src/services/sdk-modules/canonical-control-library.md index 7fe2491..2aadfa6 100644 --- a/docs-src/services/sdk-modules/canonical-control-library.md +++ b/docs-src/services/sdk-modules/canonical-control-library.md @@ -98,6 +98,7 @@ erDiagram varchar generation_strategy smallint pipeline_version integer license_rule + jsonb source_citation jsonb open_anchors } canonical_control_mappings { @@ -316,7 +317,7 @@ Der Validator (`scripts/validate-controls.py`) prueft bei jedem Commit: - Ziel, Begruendung, Geltungsbereich - Anforderungen, Pruefverfahren, Nachweise -- **Gesetzliche Grundlage** (blaue Box): source_citation mit Artikel, Paragraph, Lizenz, Link +- **Quellennachweis** (dynamische Farbe): `source_type`-basiert — blau fuer Gesetze, indigo fuer Leitlinien, teal fuer Standards - **Open-Source-Referenzen** (gruener Kasten): Verlinkte Open Anchors - Generierungsdetails: processing_path, similarity_status - Tags, Risk Score, Implementation Effort @@ -613,15 +614,19 @@ Bei der Generierung werden automatisch zugewiesen: ### Architektur-Entscheidung: Gesetzesverweise -Controls leiten sich aus zwei Quellen ab: +Controls leiten sich aus vier Quellentypen ab (Feld `source_citation.source_type`): -1. **Direkte gesetzliche Pflichten (Rule 1):** z.B. DSGVO Art. 32 erzwingt "technische und organisatorische Massnahmen". Diese Controls haben `source_citation` mit exakter Gesetzesreferenz und Originaltext. +| source_type | Beschreibung | Beispiele | Frontend-Darstellung | +|-------------|-------------|-----------|---------------------| +| `law` | Bindendes EU/DE/AT-Recht | DSGVO, AI Act, BDSG, NIS2 | Blaue Box "Gesetzliche Grundlage" + Badge "Direkte gesetzliche Pflicht" | +| `guideline` | Behoerdliche Leitlinien (Soft Law) | EDPB, WP29, Blue Guide | Indigo Box "Behoerdliche Leitlinie" + Badge "Aufsichtsbehoerdliche Empfehlung" | +| `standard` | Freiwillige Standards/Frameworks | NIST, OWASP, ENISA, CISA, OECD | Teal Box "Standard / Best Practice" + Badge "Freiwilliger Standard" | +| `restricted` | Geschuetzte Normen (Rule 3) | BSI, ISO, ETSI | Amber Box "Abgeleitet aus regulatorischen Anforderungen" (kein Originaltext) | -2. **Implizite Umsetzung ueber Best Practices (Rule 2/3):** z.B. OWASP ASVS V2.7 fordert MFA — das ist keine gesetzliche Pflicht, aber eine Best Practice um NIS2 Art. 21 oder DSGVO Art. 32 zu erfuellen. Diese Controls haben Open-Source-Referenzen (Anchors). - -**Im Frontend:** -- Rule 1/2 Controls zeigen eine blaue "Gesetzliche Grundlage" Box mit Gesetz, Artikel und Link -- Rule 3 Controls zeigen einen Hinweis dass sie implizit Gesetze umsetzen, mit Verweis auf die Referenzen +!!! warning "source_type vs license_rule" + `source_type` klassifiziert die **rechtliche Verbindlichkeit** (Ist es ein Gesetz?). + `license_rule` klassifiziert das **Urheberrecht** (Darf man den Text zitieren?). + Beispiel: NIST ist Rule 1 (Public Domain = freie Nutzung) aber `source_type = "standard"` (kein EU-Gesetz). ### API @@ -816,8 +821,8 @@ curl -X POST https://api-dev.breakpilot.ai/api/compliance/v1/canonical/controls | `backend-compliance/tests/test_canonical_control_routes.py` | Python | 14 Tests | REST API Endpoints | | `backend-compliance/tests/test_license_gate.py` | Python | 12 Tests | Lizenz-Klassifikation | | `backend-compliance/tests/test_validate_controls.py` | Python | 14 Tests | CI/CD Validator | -| `backend-compliance/tests/test_control_generator.py` | Python | 81 Tests | Pipeline, Batch, Lizenzregeln, QA, Recital | -| **Gesamt** | | **149+ Tests** | +| `backend-compliance/tests/test_control_generator.py` | Python | 98 Tests | Pipeline, Batch, Lizenzregeln, QA, Recital, Source-Type | +| **Gesamt** | | **166+ Tests** | ### Control Generator Tests (test_control_generator.py) @@ -825,7 +830,7 @@ Die Generator-Tests decken folgende Bereiche ab: | Klasse | Tests | Prueft | |--------|-------|--------| -| `TestLicenseMapping` | 12 | Lizenz-Klassifikation (Rule 1/2/3), Case-Insensitivitaet | +| `TestLicenseMapping` | 13 | Lizenz-Klassifikation (Rule 1/2/3), Case-Insensitivitaet, source_type | | `TestDomainDetection` | 5 | Keyword-basierte Domain-Erkennung (AUTH, CRYP, NET, DATA) | | `TestJsonParsing` | 4 | JSON-Parser fuer LLM-Responses (Markdown-Fencing, Preamble) | | `TestGeneratedControlRules` | 3 | Rule-spezifische Felder (original_text, citation, source_info) | @@ -837,6 +842,7 @@ Die Generator-Tests decken folgende Bereiche ab: | `TestRegulationFilter` | 5 | regulation_filter Prefix-Matching, leere regulation_codes | | `TestPipelineVersion` | 5 | pipeline_version=2 in DB-Writes, null-Handling in Structure/Reform | | `TestRecitalDetection` | 10 | Erwaegungsgrund-Erkennung in Quelltexten (Regex, Phrasen, Kombiniert) | +| `TestSourceTypeClassification` | 16 | law/guideline/standard/restricted Klassifizierung aller Quellentypen | --- diff --git a/docs-src/services/sdk-modules/control-generator-pipeline.md b/docs-src/services/sdk-modules/control-generator-pipeline.md index 8fc70a2..e9111ff 100644 --- a/docs-src/services/sdk-modules/control-generator-pipeline.md +++ b/docs-src/services/sdk-modules/control-generator-pipeline.md @@ -563,7 +563,7 @@ curl -X POST https://api-dev.breakpilot.ai/api/compliance/v1/canonical/generate/ | `backend-compliance/migrations/046_control_generator.sql` | Job-Tracking, Chunk-Tracking Tabellen | | `backend-compliance/migrations/048_processing_path_expand.sql` | Erweiterte Processing-Path-Werte | | `backend-compliance/migrations/062_pipeline_version.sql` | `pipeline_version` Spalte | -| `backend-compliance/tests/test_control_generator.py` | 81+ Tests (Lizenz, Domain, Batch, Pipeline, Recital) | +| `backend-compliance/tests/test_control_generator.py` | 98+ Tests (Lizenz, Domain, Batch, Pipeline, Recital, Source-Type) | ---