-
-
Gesetzliche Grundlage
- {ctrl.license_rule === 1 && (
+
+ {
+ ctrl.source_citation.source_type === 'law' ? 'Gesetzliche Grundlage' :
+ ctrl.source_citation.source_type === 'guideline' ? 'Behoerdliche Leitlinie' :
+ 'Standard / Best Practice'
+ }
+ {ctrl.source_citation.source_type === 'law' && (
Direkte gesetzliche Pflicht
)}
- {ctrl.license_rule === 2 && (
- Standard mit Zitationspflicht
+ {ctrl.source_citation.source_type === 'guideline' && (
+ Aufsichtsbehoerdliche Empfehlung
+ )}
+ {(ctrl.source_citation.source_type === 'standard' || (!ctrl.source_citation.source_type && ctrl.license_rule === 2)) && (
+ Freiwilliger Standard
+ )}
+ {(!ctrl.source_citation.source_type && ctrl.license_rule === 1) && (
+ Noch nicht klassifiziert
)}
diff --git a/backend-compliance/compliance/api/canonical_control_routes.py b/backend-compliance/compliance/api/canonical_control_routes.py
index 2c57ee5..df44dda 100644
--- a/backend-compliance/compliance/api/canonical_control_routes.py
+++ b/backend-compliance/compliance/api/canonical_control_routes.py
@@ -82,6 +82,9 @@ class ControlResponse(BaseModel):
target_audience: Optional[str] = None
generation_metadata: Optional[dict] = None
generation_strategy: Optional[str] = "ungrouped"
+ applicable_industries: Optional[list] = None
+ applicable_company_size: Optional[list] = None
+ scope_conditions: Optional[dict] = None
created_at: str
updated_at: str
@@ -111,6 +114,9 @@ class ControlCreateRequest(BaseModel):
category: Optional[str] = None
target_audience: Optional[str] = None
generation_metadata: Optional[dict] = None
+ applicable_industries: Optional[list] = None
+ applicable_company_size: Optional[list] = None
+ scope_conditions: Optional[dict] = None
class ControlUpdateRequest(BaseModel):
@@ -136,6 +142,9 @@ class ControlUpdateRequest(BaseModel):
category: Optional[str] = None
target_audience: Optional[str] = None
generation_metadata: Optional[dict] = None
+ applicable_industries: Optional[list] = None
+ applicable_company_size: Optional[list] = None
+ scope_conditions: Optional[dict] = None
class SimilarityCheckRequest(BaseModel):
@@ -164,6 +173,7 @@ _CONTROL_COLS = """id, framework_id, control_id, title, objective, rationale,
license_rule, source_original_text, source_citation,
customer_visible, verification_method, category,
target_audience, generation_metadata, generation_strategy,
+ applicable_industries, applicable_company_size, scope_conditions,
created_at, updated_at"""
@@ -511,7 +521,8 @@ async def create_control(body: ControlCreateRequest):
open_anchors, release_state, tags,
license_rule, source_original_text, source_citation,
customer_visible, verification_method, category,
- target_audience, generation_metadata
+ target_audience, generation_metadata,
+ applicable_industries, applicable_company_size, scope_conditions
) VALUES (
:fw_id, :cid, :title, :objective, :rationale,
CAST(:scope AS jsonb), CAST(:requirements AS jsonb),
@@ -521,7 +532,10 @@ async def create_control(body: ControlCreateRequest):
:license_rule, :source_original_text,
CAST(:source_citation AS jsonb),
:customer_visible, :verification_method, :category,
- :target_audience, CAST(:generation_metadata AS jsonb)
+ :target_audience, CAST(:generation_metadata AS jsonb),
+ CAST(:applicable_industries AS jsonb),
+ CAST(:applicable_company_size AS jsonb),
+ CAST(:scope_conditions AS jsonb)
)
RETURNING {_CONTROL_COLS}
"""),
@@ -550,6 +564,9 @@ async def create_control(body: ControlCreateRequest):
"category": body.category,
"target_audience": body.target_audience,
"generation_metadata": _json.dumps(body.generation_metadata) if body.generation_metadata else None,
+ "applicable_industries": _json.dumps(body.applicable_industries) if body.applicable_industries else None,
+ "applicable_company_size": _json.dumps(body.applicable_company_size) if body.applicable_company_size else None,
+ "scope_conditions": _json.dumps(body.scope_conditions) if body.scope_conditions else None,
},
).fetchone()
db.commit()
@@ -778,6 +795,9 @@ def _control_row(r) -> dict:
"target_audience": r.target_audience,
"generation_metadata": r.generation_metadata,
"generation_strategy": getattr(r, "generation_strategy", "ungrouped"),
+ "applicable_industries": getattr(r, "applicable_industries", None),
+ "applicable_company_size": getattr(r, "applicable_company_size", None),
+ "scope_conditions": getattr(r, "scope_conditions", None),
"created_at": r.created_at.isoformat() if r.created_at else None,
"updated_at": r.updated_at.isoformat() if r.updated_at else None,
}
diff --git a/backend-compliance/compliance/api/control_generator_routes.py b/backend-compliance/compliance/api/control_generator_routes.py
index 65664d6..f2cecac 100644
--- a/backend-compliance/compliance/api/control_generator_routes.py
+++ b/backend-compliance/compliance/api/control_generator_routes.py
@@ -28,6 +28,7 @@ from compliance.services.control_generator import (
ALL_COLLECTIONS,
VALID_CATEGORIES,
VALID_DOMAINS,
+ _classify_regulation,
_detect_category,
_detect_domain,
_llm_local,
@@ -978,3 +979,122 @@ async def get_domain_backfill_status(backfill_id: str):
if not status:
raise HTTPException(status_code=404, detail="Domain backfill job not found")
return status
+
+
+# ---------------------------------------------------------------------------
+# Source-Type Backfill — Classify law vs guideline vs standard vs restricted
+# ---------------------------------------------------------------------------
+
+class SourceTypeBackfillRequest(BaseModel):
+ dry_run: bool = True
+
+
+_source_type_backfill_status: dict = {}
+
+
+async def _run_source_type_backfill(dry_run: bool, backfill_id: str):
+ """Backfill source_type into source_citation JSONB for all controls."""
+ db = SessionLocal()
+ try:
+ # Find controls with source_citation that lack source_type
+ rows = db.execute(text("""
+ SELECT control_id, source_citation, generation_metadata
+ FROM compliance.canonical_controls
+ WHERE source_citation IS NOT NULL
+ AND (source_citation->>'source_type' IS NULL
+ OR source_citation->>'source_type' = '')
+ """)).fetchall()
+
+ total = len(rows)
+ updated = 0
+ already_correct = 0
+ errors = []
+
+ _source_type_backfill_status[backfill_id] = {
+ "status": "running", "total": total, "updated": 0, "dry_run": dry_run,
+ }
+
+ for row in rows:
+ cid = row[0]
+ citation = row[1] if isinstance(row[1], dict) else json.loads(row[1] or "{}")
+ metadata = row[2] if isinstance(row[2], dict) else json.loads(row[2] or "{}")
+
+ # Get regulation_code from metadata
+ reg_code = metadata.get("source_regulation", "")
+ if not reg_code:
+ # Try to infer from source name
+ errors.append(f"{cid}: no source_regulation in metadata")
+ continue
+
+ # Classify
+ license_info = _classify_regulation(reg_code)
+ source_type = license_info.get("source_type", "restricted")
+
+ # Update citation
+ citation["source_type"] = source_type
+
+ if not dry_run:
+ db.execute(text("""
+ UPDATE compliance.canonical_controls
+ SET source_citation = :citation
+ WHERE control_id = :cid
+ """), {"citation": json.dumps(citation), "cid": cid})
+ if updated % 100 == 0:
+ db.commit()
+ updated += 1
+
+ if not dry_run:
+ db.commit()
+
+ # Count distribution
+ dist_query = db.execute(text("""
+ SELECT source_citation->>'source_type' as st, COUNT(*)
+ FROM compliance.canonical_controls
+ WHERE source_citation IS NOT NULL
+ AND source_citation->>'source_type' IS NOT NULL
+ GROUP BY st
+ """)).fetchall() if not dry_run else []
+
+ distribution = {r[0]: r[1] for r in dist_query}
+
+ _source_type_backfill_status[backfill_id] = {
+ "status": "completed", "total": total, "updated": updated,
+ "dry_run": dry_run, "distribution": distribution,
+ "errors": errors[:50],
+ }
+ logger.info("Source-type backfill %s completed: %d/%d updated (dry_run=%s)",
+ backfill_id, updated, total, dry_run)
+
+ except Exception as e:
+ logger.error("Source-type backfill %s failed: %s", backfill_id, e)
+ _source_type_backfill_status[backfill_id] = {"status": "failed", "error": str(e)}
+ finally:
+ db.close()
+
+
+@router.post("/generate/backfill-source-type")
+async def start_source_type_backfill(req: SourceTypeBackfillRequest):
+ """Backfill source_type (law/guideline/standard/restricted) into source_citation JSONB.
+
+ Classifies each control's source as binding law, authority guideline,
+ voluntary standard, or restricted norm based on regulation_code.
+ Default is dry_run=True (preview only).
+ """
+ import uuid
+ backfill_id = str(uuid.uuid4())[:8]
+ _source_type_backfill_status[backfill_id] = {"status": "starting"}
+ asyncio.create_task(_run_source_type_backfill(req.dry_run, backfill_id))
+ return {
+ "status": "running",
+ "backfill_id": backfill_id,
+ "message": f"Source-type backfill started. Poll /generate/source-type-backfill-status/{backfill_id}",
+ }
+
+
+@router.get("/generate/source-type-backfill-status/{backfill_id}")
+async def get_source_type_backfill_status(backfill_id: str):
+ """Get status of a source-type backfill job."""
+ status = _source_type_backfill_status.get(backfill_id)
+ if not status:
+ raise HTTPException(status_code=404, detail="Source-type backfill job not found")
+ return status
diff --git a/backend-compliance/compliance/services/control_generator.py b/backend-compliance/compliance/services/control_generator.py
index 1de79a4..447f89a 100644
--- a/backend-compliance/compliance/services/control_generator.py
+++ b/backend-compliance/compliance/services/control_generator.py
@@ -56,7 +56,8 @@ HARMONIZATION_THRESHOLD = 0.85 # Cosine similarity above this = duplicate
# Pipeline version — increment when generation rules change materially.
# v1: Original (local LLM prefilter, old prompt)
# v2: Anthropic decides relevance, null for non-requirement chunks, annexes protected
-PIPELINE_VERSION = 2
+# v3: Scoped Control Applicability — applicable_industries, applicable_company_size, scope_conditions
+PIPELINE_VERSION = 3
ALL_COLLECTIONS = [
"bp_compliance_ce",
@@ -72,119 +73,121 @@ ALL_COLLECTIONS = [
REGULATION_LICENSE_MAP: dict[str, dict] = {
# RULE 1: FREE USE — Laws, Public Domain
+ # source_type: "law" = binding legislation, "guideline" = authority guidance (soft law),
+ # "standard" = voluntary framework/best practice, "restricted" = protected norm
# EU Regulations
- "eu_2016_679": {"license": "EU_LAW", "rule": 1, "name": "DSGVO"},
- "eu_2024_1689": {"license": "EU_LAW", "rule": 1, "name": "AI Act (KI-Verordnung)"},
- "eu_2022_2555": {"license": "EU_LAW", "rule": 1, "name": "NIS2"},
- "eu_2024_2847": {"license": "EU_LAW", "rule": 1, "name": "Cyber Resilience Act (CRA)"},
- "eu_2023_1230": {"license": "EU_LAW", "rule": 1, "name": "Maschinenverordnung"},
- "eu_2022_2065": {"license": "EU_LAW", "rule": 1, "name": "Digital Services Act (DSA)"},
- "eu_2022_1925": {"license": "EU_LAW", "rule": 1, "name": "Digital Markets Act (DMA)"},
- "eu_2022_868": {"license": "EU_LAW", "rule": 1, "name": "Data Governance Act (DGA)"},
- "eu_2019_770": {"license": "EU_LAW", "rule": 1, "name": "Digitale-Inhalte-Richtlinie"},
- "eu_2021_914": {"license": "EU_LAW", "rule": 1, "name": "Standardvertragsklauseln (SCC)"},
- "eu_2002_58": {"license": "EU_LAW", "rule": 1, "name": "ePrivacy-Richtlinie"},
- "eu_2000_31": {"license": "EU_LAW", "rule": 1, "name": "E-Commerce-Richtlinie"},
- "eu_2023_1803": {"license": "EU_LAW", "rule": 1, "name": "IFRS-Uebernahmeverordnung"},
- "eucsa": {"license": "EU_LAW", "rule": 1, "name": "EU Cybersecurity Act"},
- "dataact": {"license": "EU_LAW", "rule": 1, "name": "Data Act"},
- "dora": {"license": "EU_LAW", "rule": 1, "name": "Digital Operational Resilience Act"},
- "ehds": {"license": "EU_LAW", "rule": 1, "name": "European Health Data Space"},
- "gpsr": {"license": "EU_LAW", "rule": 1, "name": "Allgemeine Produktsicherheitsverordnung"},
- "mica": {"license": "EU_LAW", "rule": 1, "name": "Markets in Crypto-Assets"},
- "psd2": {"license": "EU_LAW", "rule": 1, "name": "Zahlungsdiensterichtlinie 2"},
- "dpf": {"license": "EU_LAW", "rule": 1, "name": "EU-US Data Privacy Framework"},
- "dsm": {"license": "EU_LAW", "rule": 1, "name": "DSM-Urheberrechtsrichtlinie"},
- "amlr": {"license": "EU_LAW", "rule": 1, "name": "AML-Verordnung"},
- "eu_blue_guide_2022": {"license": "EU_PUBLIC", "rule": 1, "name": "Blue Guide 2022"},
- # NIST (Public Domain — all variants)
- "nist_sp_800_53": {"license": "NIST_PUBLIC_DOMAIN", "rule": 1, "name": "NIST SP 800-53"},
- "nist_sp800_53r5": {"license": "NIST_PUBLIC_DOMAIN", "rule": 1, "name": "NIST SP 800-53 Rev.5"},
- "nist_sp_800_63b": {"license": "NIST_PUBLIC_DOMAIN", "rule": 1, "name": "NIST SP 800-63B"},
- "nist_sp800_63_3": {"license": "NIST_PUBLIC_DOMAIN", "rule": 1, "name": "NIST SP 800-63-3"},
- "nist_csf_2_0": {"license": "NIST_PUBLIC_DOMAIN", "rule": 1, "name": "NIST CSF 2.0"},
- "nist_sp_800_218": {"license": "NIST_PUBLIC_DOMAIN", "rule": 1, "name": "NIST SSDF"},
- "nist_sp800_207": {"license": "NIST_PUBLIC_DOMAIN", "rule": 1, "name": "NIST SP 800-207 Zero Trust"},
- "nist_ai_rmf": {"license": "NIST_PUBLIC_DOMAIN", "rule": 1, "name": "NIST AI Risk Management Framework"},
- "nistir_8259a": {"license": "NIST_PUBLIC_DOMAIN", "rule": 1, "name": "NISTIR 8259A IoT Security"},
- "cisa_secure_by_design": {"license": "US_GOV_PUBLIC", "rule": 1, "name": "CISA Secure by Design"},
+ "eu_2016_679": {"license": "EU_LAW", "rule": 1, "source_type": "law", "name": "DSGVO"},
+ "eu_2024_1689": {"license": "EU_LAW", "rule": 1, "source_type": "law", "name": "AI Act (KI-Verordnung)"},
+ "eu_2022_2555": {"license": "EU_LAW", "rule": 1, "source_type": "law", "name": "NIS2"},
+ "eu_2024_2847": {"license": "EU_LAW", "rule": 1, "source_type": "law", "name": "Cyber Resilience Act (CRA)"},
+ "eu_2023_1230": {"license": "EU_LAW", "rule": 1, "source_type": "law", "name": "Maschinenverordnung"},
+ "eu_2022_2065": {"license": "EU_LAW", "rule": 1, "source_type": "law", "name": "Digital Services Act (DSA)"},
+ "eu_2022_1925": {"license": "EU_LAW", "rule": 1, "source_type": "law", "name": "Digital Markets Act (DMA)"},
+ "eu_2022_868": {"license": "EU_LAW", "rule": 1, "source_type": "law", "name": "Data Governance Act (DGA)"},
+ "eu_2019_770": {"license": "EU_LAW", "rule": 1, "source_type": "law", "name": "Digitale-Inhalte-Richtlinie"},
+ "eu_2021_914": {"license": "EU_LAW", "rule": 1, "source_type": "law", "name": "Standardvertragsklauseln (SCC)"},
+ "eu_2002_58": {"license": "EU_LAW", "rule": 1, "source_type": "law", "name": "ePrivacy-Richtlinie"},
+ "eu_2000_31": {"license": "EU_LAW", "rule": 1, "source_type": "law", "name": "E-Commerce-Richtlinie"},
+ "eu_2023_1803": {"license": "EU_LAW", "rule": 1, "source_type": "law", "name": "IFRS-Uebernahmeverordnung"},
+ "eucsa": {"license": "EU_LAW", "rule": 1, "source_type": "law", "name": "EU Cybersecurity Act"},
+ "dataact": {"license": "EU_LAW", "rule": 1, "source_type": "law", "name": "Data Act"},
+ "dora": {"license": "EU_LAW", "rule": 1, "source_type": "law", "name": "Digital Operational Resilience Act"},
+ "ehds": {"license": "EU_LAW", "rule": 1, "source_type": "law", "name": "European Health Data Space"},
+ "gpsr": {"license": "EU_LAW", "rule": 1, "source_type": "law", "name": "Allgemeine Produktsicherheitsverordnung"},
+ "mica": {"license": "EU_LAW", "rule": 1, "source_type": "law", "name": "Markets in Crypto-Assets"},
+ "psd2": {"license": "EU_LAW", "rule": 1, "source_type": "law", "name": "Zahlungsdiensterichtlinie 2"},
+ "dpf": {"license": "EU_LAW", "rule": 1, "source_type": "law", "name": "EU-US Data Privacy Framework"},
+ "dsm": {"license": "EU_LAW", "rule": 1, "source_type": "law", "name": "DSM-Urheberrechtsrichtlinie"},
+ "amlr": {"license": "EU_LAW", "rule": 1, "source_type": "law", "name": "AML-Verordnung"},
+ "eu_blue_guide_2022": {"license": "EU_PUBLIC", "rule": 1, "source_type": "guideline", "name": "Blue Guide 2022"},
+ # NIST (Public Domain — NOT laws, voluntary standards)
+ "nist_sp_800_53": {"license": "NIST_PUBLIC_DOMAIN", "rule": 1, "source_type": "standard", "name": "NIST SP 800-53"},
+ "nist_sp800_53r5": {"license": "NIST_PUBLIC_DOMAIN", "rule": 1, "source_type": "standard", "name": "NIST SP 800-53 Rev.5"},
+ "nist_sp_800_63b": {"license": "NIST_PUBLIC_DOMAIN", "rule": 1, "source_type": "standard", "name": "NIST SP 800-63B"},
+ "nist_sp800_63_3": {"license": "NIST_PUBLIC_DOMAIN", "rule": 1, "source_type": "standard", "name": "NIST SP 800-63-3"},
+ "nist_csf_2_0": {"license": "NIST_PUBLIC_DOMAIN", "rule": 1, "source_type": "standard", "name": "NIST CSF 2.0"},
+ "nist_sp_800_218": {"license": "NIST_PUBLIC_DOMAIN", "rule": 1, "source_type": "standard", "name": "NIST SSDF"},
+ "nist_sp800_207": {"license": "NIST_PUBLIC_DOMAIN", "rule": 1, "source_type": "standard", "name": "NIST SP 800-207 Zero Trust"},
+ "nist_ai_rmf": {"license": "NIST_PUBLIC_DOMAIN", "rule": 1, "source_type": "standard", "name": "NIST AI Risk Management Framework"},
+ "nistir_8259a": {"license": "NIST_PUBLIC_DOMAIN", "rule": 1, "source_type": "standard", "name": "NISTIR 8259A IoT Security"},
+ "cisa_secure_by_design": {"license": "US_GOV_PUBLIC", "rule": 1, "source_type": "standard", "name": "CISA Secure by Design"},
# German Laws
- "bdsg": {"license": "DE_LAW", "rule": 1, "name": "BDSG"},
- "bdsg_2018_komplett": {"license": "DE_LAW", "rule": 1, "name": "BDSG 2018"},
- "ttdsg": {"license": "DE_LAW", "rule": 1, "name": "TTDSG"},
- "tdddg_25": {"license": "DE_LAW", "rule": 1, "name": "TDDDG"},
- "tkg": {"license": "DE_LAW", "rule": 1, "name": "TKG"},
- "de_tkg": {"license": "DE_LAW", "rule": 1, "name": "TKG"},
- "bgb_komplett": {"license": "DE_LAW", "rule": 1, "name": "BGB"},
- "hgb": {"license": "DE_LAW", "rule": 1, "name": "HGB"},
- "hgb_komplett": {"license": "DE_LAW", "rule": 1, "name": "HGB"},
- "urhg_komplett": {"license": "DE_LAW", "rule": 1, "name": "UrhG"},
- "uwg": {"license": "DE_LAW", "rule": 1, "name": "UWG"},
- "tmg_komplett": {"license": "DE_LAW", "rule": 1, "name": "TMG"},
- "gewo": {"license": "DE_LAW", "rule": 1, "name": "GewO"},
- "ao": {"license": "DE_LAW", "rule": 1, "name": "Abgabenordnung"},
- "ao_komplett": {"license": "DE_LAW", "rule": 1, "name": "Abgabenordnung"},
- "battdg": {"license": "DE_LAW", "rule": 1, "name": "Batteriegesetz"},
+ "bdsg": {"license": "DE_LAW", "rule": 1, "source_type": "law", "name": "BDSG"},
+ "bdsg_2018_komplett": {"license": "DE_LAW", "rule": 1, "source_type": "law", "name": "BDSG 2018"},
+ "ttdsg": {"license": "DE_LAW", "rule": 1, "source_type": "law", "name": "TTDSG"},
+ "tdddg_25": {"license": "DE_LAW", "rule": 1, "source_type": "law", "name": "TDDDG"},
+ "tkg": {"license": "DE_LAW", "rule": 1, "source_type": "law", "name": "TKG"},
+ "de_tkg": {"license": "DE_LAW", "rule": 1, "source_type": "law", "name": "TKG"},
+ "bgb_komplett": {"license": "DE_LAW", "rule": 1, "source_type": "law", "name": "BGB"},
+ "hgb": {"license": "DE_LAW", "rule": 1, "source_type": "law", "name": "HGB"},
+ "hgb_komplett": {"license": "DE_LAW", "rule": 1, "source_type": "law", "name": "HGB"},
+ "urhg_komplett": {"license": "DE_LAW", "rule": 1, "source_type": "law", "name": "UrhG"},
+ "uwg": {"license": "DE_LAW", "rule": 1, "source_type": "law", "name": "UWG"},
+ "tmg_komplett": {"license": "DE_LAW", "rule": 1, "source_type": "law", "name": "TMG"},
+ "gewo": {"license": "DE_LAW", "rule": 1, "source_type": "law", "name": "GewO"},
+ "ao": {"license": "DE_LAW", "rule": 1, "source_type": "law", "name": "Abgabenordnung"},
+ "ao_komplett": {"license": "DE_LAW", "rule": 1, "source_type": "law", "name": "Abgabenordnung"},
+ "battdg": {"license": "DE_LAW", "rule": 1, "source_type": "law", "name": "Batteriegesetz"},
# Austrian Laws
- "at_dsg": {"license": "AT_LAW", "rule": 1, "name": "AT DSG"},
- "at_abgb": {"license": "AT_LAW", "rule": 1, "name": "AT ABGB"},
- "at_abgb_agb": {"license": "AT_LAW", "rule": 1, "name": "AT ABGB AGB-Recht"},
- "at_bao": {"license": "AT_LAW", "rule": 1, "name": "AT BAO"},
- "at_bao_ret": {"license": "AT_LAW", "rule": 1, "name": "AT BAO Retention"},
- "at_ecg": {"license": "AT_LAW", "rule": 1, "name": "AT E-Commerce-Gesetz"},
- "at_kschg": {"license": "AT_LAW", "rule": 1, "name": "AT Konsumentenschutzgesetz"},
- "at_medieng": {"license": "AT_LAW", "rule": 1, "name": "AT Mediengesetz"},
- "at_tkg": {"license": "AT_LAW", "rule": 1, "name": "AT TKG"},
- "at_ugb": {"license": "AT_LAW", "rule": 1, "name": "AT UGB"},
- "at_ugb_ret": {"license": "AT_LAW", "rule": 1, "name": "AT UGB Retention"},
- "at_uwg": {"license": "AT_LAW", "rule": 1, "name": "AT UWG"},
+ "at_dsg": {"license": "AT_LAW", "rule": 1, "source_type": "law", "name": "AT DSG"},
+ "at_abgb": {"license": "AT_LAW", "rule": 1, "source_type": "law", "name": "AT ABGB"},
+ "at_abgb_agb": {"license": "AT_LAW", "rule": 1, "source_type": "law", "name": "AT ABGB AGB-Recht"},
+ "at_bao": {"license": "AT_LAW", "rule": 1, "source_type": "law", "name": "AT BAO"},
+ "at_bao_ret": {"license": "AT_LAW", "rule": 1, "source_type": "law", "name": "AT BAO Retention"},
+ "at_ecg": {"license": "AT_LAW", "rule": 1, "source_type": "law", "name": "AT E-Commerce-Gesetz"},
+ "at_kschg": {"license": "AT_LAW", "rule": 1, "source_type": "law", "name": "AT Konsumentenschutzgesetz"},
+ "at_medieng": {"license": "AT_LAW", "rule": 1, "source_type": "law", "name": "AT Mediengesetz"},
+ "at_tkg": {"license": "AT_LAW", "rule": 1, "source_type": "law", "name": "AT TKG"},
+ "at_ugb": {"license": "AT_LAW", "rule": 1, "source_type": "law", "name": "AT UGB"},
+ "at_ugb_ret": {"license": "AT_LAW", "rule": 1, "source_type": "law", "name": "AT UGB Retention"},
+ "at_uwg": {"license": "AT_LAW", "rule": 1, "source_type": "law", "name": "AT UWG"},
# Other EU Member State Laws
- "fr_loi_informatique": {"license": "FR_LAW", "rule": 1, "name": "FR Loi Informatique"},
- "es_lopdgdd": {"license": "ES_LAW", "rule": 1, "name": "ES LOPDGDD"},
- "nl_uavg": {"license": "NL_LAW", "rule": 1, "name": "NL UAVG"},
- "it_codice_privacy": {"license": "IT_LAW", "rule": 1, "name": "IT Codice Privacy"},
- "hu_info_tv": {"license": "HU_LAW", "rule": 1, "name": "HU Információs törvény"},
- # EDPB Guidelines (EU Public Authority)
- "edpb_01_2020": {"license": "EU_PUBLIC", "rule": 1, "name": "EDPB 01/2020 Ergaenzende Massnahmen"},
- "edpb_02_2023": {"license": "EU_PUBLIC", "rule": 1, "name": "EDPB 02/2023 Technischer Anwendungsbereich"},
- "edpb_05_2020": {"license": "EU_PUBLIC", "rule": 1, "name": "EDPB 05/2020 Einwilligung"},
- "edpb_09_2022": {"license": "EU_PUBLIC", "rule": 1, "name": "EDPB 09/2022 Datenschutzverletzungen"},
- "edpb_bcr_01_2022": {"license": "EU_PUBLIC", "rule": 1, "name": "EDPB BCR Leitlinien"},
- "edpb_breach_09_2022": {"license": "EU_PUBLIC", "rule": 1, "name": "EDPB Breach Notification"},
- "edpb_connected_vehicles_01_2020": {"license": "EU_PUBLIC", "rule": 1, "name": "EDPB Connected Vehicles"},
- "edpb_dpbd_04_2019": {"license": "EU_PUBLIC", "rule": 1, "name": "EDPB Data Protection by Design"},
- "edpb_eprivacy_02_2023": {"license": "EU_PUBLIC", "rule": 1, "name": "EDPB ePrivacy"},
- "edpb_facial_recognition_05_2022": {"license": "EU_PUBLIC", "rule": 1, "name": "EDPB Facial Recognition"},
- "edpb_fines_04_2022": {"license": "EU_PUBLIC", "rule": 1, "name": "EDPB Fines Calculation"},
- "edpb_legitimate_interest": {"license": "EU_PUBLIC", "rule": 1, "name": "EDPB Legitimate Interest"},
- "edpb_legitimate_interest_01_2024": {"license": "EU_PUBLIC","rule": 1, "name": "EDPB Legitimate Interest 2024"},
- "edpb_social_media_08_2020": {"license": "EU_PUBLIC", "rule": 1, "name": "EDPB Social Media"},
- "edpb_transfers_01_2020":{"license": "EU_PUBLIC", "rule": 1, "name": "EDPB Transfers 01/2020"},
- "edpb_transfers_07_2020":{"license": "EU_PUBLIC", "rule": 1, "name": "EDPB Transfers 07/2020"},
- "edpb_video_03_2019": {"license": "EU_PUBLIC", "rule": 1, "name": "EDPB Video Surveillance"},
- "edps_dpia_list": {"license": "EU_PUBLIC", "rule": 1, "name": "EDPS DPIA Liste"},
- "edpb_certification_01_2018": {"license": "EU_PUBLIC", "rule": 1, "name": "EDPB Certification 01/2018"},
- "edpb_certification_01_2019": {"license": "EU_PUBLIC", "rule": 1, "name": "EDPB Certification 01/2019"},
- "eaa": {"license": "EU_LAW", "rule": 1, "name": "European Accessibility Act"},
- # WP29 (pre-EDPB) Guidelines
- "wp244_profiling": {"license": "EU_PUBLIC", "rule": 1, "name": "WP29 Profiling"},
- "wp251_profiling": {"license": "EU_PUBLIC", "rule": 1, "name": "WP29 Data Portability"},
- "wp260_transparency": {"license": "EU_PUBLIC", "rule": 1, "name": "WP29 Transparency"},
+ "fr_loi_informatique": {"license": "FR_LAW", "rule": 1, "source_type": "law", "name": "FR Loi Informatique"},
+ "es_lopdgdd": {"license": "ES_LAW", "rule": 1, "source_type": "law", "name": "ES LOPDGDD"},
+ "nl_uavg": {"license": "NL_LAW", "rule": 1, "source_type": "law", "name": "NL UAVG"},
+ "it_codice_privacy": {"license": "IT_LAW", "rule": 1, "source_type": "law", "name": "IT Codice Privacy"},
+ "hu_info_tv": {"license": "HU_LAW", "rule": 1, "source_type": "law", "name": "HU Információs törvény"},
+ # EDPB Guidelines (EU Public Authority — soft law, not binding legislation)
+ "edpb_01_2020": {"license": "EU_PUBLIC", "rule": 1, "source_type": "guideline", "name": "EDPB 01/2020 Ergaenzende Massnahmen"},
+ "edpb_02_2023": {"license": "EU_PUBLIC", "rule": 1, "source_type": "guideline", "name": "EDPB 02/2023 Technischer Anwendungsbereich"},
+ "edpb_05_2020": {"license": "EU_PUBLIC", "rule": 1, "source_type": "guideline", "name": "EDPB 05/2020 Einwilligung"},
+ "edpb_09_2022": {"license": "EU_PUBLIC", "rule": 1, "source_type": "guideline", "name": "EDPB 09/2022 Datenschutzverletzungen"},
+ "edpb_bcr_01_2022": {"license": "EU_PUBLIC", "rule": 1, "source_type": "guideline", "name": "EDPB BCR Leitlinien"},
+ "edpb_breach_09_2022": {"license": "EU_PUBLIC", "rule": 1, "source_type": "guideline", "name": "EDPB Breach Notification"},
+ "edpb_connected_vehicles_01_2020": {"license": "EU_PUBLIC", "rule": 1, "source_type": "guideline", "name": "EDPB Connected Vehicles"},
+ "edpb_dpbd_04_2019": {"license": "EU_PUBLIC", "rule": 1, "source_type": "guideline", "name": "EDPB Data Protection by Design"},
+ "edpb_eprivacy_02_2023": {"license": "EU_PUBLIC", "rule": 1, "source_type": "guideline", "name": "EDPB ePrivacy"},
+ "edpb_facial_recognition_05_2022": {"license": "EU_PUBLIC", "rule": 1, "source_type": "guideline", "name": "EDPB Facial Recognition"},
+ "edpb_fines_04_2022": {"license": "EU_PUBLIC", "rule": 1, "source_type": "guideline", "name": "EDPB Fines Calculation"},
+ "edpb_legitimate_interest": {"license": "EU_PUBLIC", "rule": 1, "source_type": "guideline", "name": "EDPB Legitimate Interest"},
+ "edpb_legitimate_interest_01_2024": {"license": "EU_PUBLIC","rule": 1, "source_type": "guideline", "name": "EDPB Legitimate Interest 2024"},
+ "edpb_social_media_08_2020": {"license": "EU_PUBLIC", "rule": 1, "source_type": "guideline", "name": "EDPB Social Media"},
+ "edpb_transfers_01_2020":{"license": "EU_PUBLIC", "rule": 1, "source_type": "guideline", "name": "EDPB Transfers 01/2020"},
+ "edpb_transfers_07_2020":{"license": "EU_PUBLIC", "rule": 1, "source_type": "guideline", "name": "EDPB Transfers 07/2020"},
+ "edpb_video_03_2019": {"license": "EU_PUBLIC", "rule": 1, "source_type": "guideline", "name": "EDPB Video Surveillance"},
+ "edps_dpia_list": {"license": "EU_PUBLIC", "rule": 1, "source_type": "guideline", "name": "EDPS DPIA Liste"},
+ "edpb_certification_01_2018": {"license": "EU_PUBLIC", "rule": 1, "source_type": "guideline", "name": "EDPB Certification 01/2018"},
+ "edpb_certification_01_2019": {"license": "EU_PUBLIC", "rule": 1, "source_type": "guideline", "name": "EDPB Certification 01/2019"},
+ "eaa": {"license": "EU_LAW", "rule": 1, "source_type": "law", "name": "European Accessibility Act"},
+ # WP29 (pre-EDPB) Guidelines — soft law
+ "wp244_profiling": {"license": "EU_PUBLIC", "rule": 1, "source_type": "guideline", "name": "WP29 Profiling"},
+ "wp251_profiling": {"license": "EU_PUBLIC", "rule": 1, "source_type": "guideline", "name": "WP29 Data Portability"},
+ "wp260_transparency": {"license": "EU_PUBLIC", "rule": 1, "source_type": "guideline", "name": "WP29 Transparency"},
- # RULE 2: CITATION REQUIRED — CC-BY, CC-BY-SA
- "owasp_asvs": {"license": "CC-BY-SA-4.0", "rule": 2, "name": "OWASP ASVS",
+ # RULE 2: CITATION REQUIRED — CC-BY, CC-BY-SA (voluntary standards)
+ "owasp_asvs": {"license": "CC-BY-SA-4.0", "rule": 2, "source_type": "standard", "name": "OWASP ASVS",
"attribution": "OWASP Foundation, CC BY-SA 4.0"},
- "owasp_masvs": {"license": "CC-BY-SA-4.0", "rule": 2, "name": "OWASP MASVS",
+ "owasp_masvs": {"license": "CC-BY-SA-4.0", "rule": 2, "source_type": "standard", "name": "OWASP MASVS",
"attribution": "OWASP Foundation, CC BY-SA 4.0"},
- "owasp_top10": {"license": "CC-BY-SA-4.0", "rule": 2, "name": "OWASP Top 10",
+ "owasp_top10": {"license": "CC-BY-SA-4.0", "rule": 2, "source_type": "standard", "name": "OWASP Top 10",
"attribution": "OWASP Foundation, CC BY-SA 4.0"},
- "owasp_top10_2021": {"license": "CC-BY-SA-4.0", "rule": 2, "name": "OWASP Top 10 2021",
+ "owasp_top10_2021": {"license": "CC-BY-SA-4.0", "rule": 2, "source_type": "standard", "name": "OWASP Top 10 2021",
"attribution": "OWASP Foundation, CC BY-SA 4.0"},
- "owasp_api_top10_2023": {"license": "CC-BY-SA-4.0", "rule": 2, "name": "OWASP API Top 10 2023",
+ "owasp_api_top10_2023": {"license": "CC-BY-SA-4.0", "rule": 2, "source_type": "standard", "name": "OWASP API Top 10 2023",
"attribution": "OWASP Foundation, CC BY-SA 4.0"},
- "owasp_samm": {"license": "CC-BY-SA-4.0", "rule": 2, "name": "OWASP SAMM",
+ "owasp_samm": {"license": "CC-BY-SA-4.0", "rule": 2, "source_type": "standard", "name": "OWASP SAMM",
"attribution": "OWASP Foundation, CC BY-SA 4.0"},
- "oecd_ai_principles": {"license": "OECD_PUBLIC", "rule": 2, "name": "OECD AI Principles",
+ "oecd_ai_principles": {"license": "OECD_PUBLIC", "rule": 2, "source_type": "standard", "name": "OECD AI Principles",
"attribution": "OECD"},
# RULE 3: RESTRICTED — Full reformulation required
@@ -197,28 +200,32 @@ _RULE2_PREFIXES = ["enisa_"]
def _classify_regulation(regulation_code: str) -> dict:
- """Determine license rule for a regulation_code."""
+ """Determine license rule for a regulation_code.
+
+ Returns dict with keys: license, rule, name, source_type.
+ source_type is one of: law, guideline, standard, restricted.
+ """
code = regulation_code.lower().strip()
# Exact match first
if code in REGULATION_LICENSE_MAP:
return REGULATION_LICENSE_MAP[code]
- # Prefix match for Rule 2
+ # Prefix match for Rule 2 (ENISA = standard)
for prefix in _RULE2_PREFIXES:
if code.startswith(prefix):
- return {"license": "CC-BY-4.0", "rule": 2, "name": "ENISA",
- "attribution": "ENISA, CC BY 4.0"}
+ return {"license": "CC-BY-4.0", "rule": 2, "source_type": "standard",
+ "name": "ENISA", "attribution": "ENISA, CC BY 4.0"}
- # Prefix match for Rule 3
+ # Prefix match for Rule 3 (BSI/ISO/ETSI = restricted)
for prefix in _RULE3_PREFIXES:
if code.startswith(prefix):
return {"license": f"{prefix.rstrip('_').upper()}_RESTRICTED", "rule": 3,
- "name": "INTERNAL_ONLY"}
+ "source_type": "restricted", "name": "INTERNAL_ONLY"}
# Unknown → treat as restricted (safe default)
logger.warning("Unknown regulation_code %r — defaulting to Rule 3 (restricted)", code)
- return {"license": "UNKNOWN", "rule": 3, "name": "INTERNAL_ONLY"}
+ return {"license": "UNKNOWN", "rule": 3, "source_type": "restricted", "name": "INTERNAL_ONLY"}
# ---------------------------------------------------------------------------
@@ -476,6 +483,10 @@ class GeneratedControl:
verification_method: Optional[str] = None # code_review, document, tool, hybrid
category: Optional[str] = None # one of 22 categories
target_audience: Optional[list] = None # e.g. ["unternehmen", "behoerden", "entwickler"]
+ # Scoped Control Applicability (v3)
+ applicable_industries: Optional[list] = None # e.g. ["all"] or ["Telekommunikation", "Energie"]
+ applicable_company_size: Optional[list] = None # e.g. ["all"] or ["medium", "large", "enterprise"]
+ scope_conditions: Optional[dict] = None # e.g. {"requires_any": ["uses_ai"], "description": "..."}
@dataclass
@@ -769,6 +780,38 @@ STRUCTURE_SYSTEM_PROMPT = """Du bist ein Security-Compliance-Experte. Strukturie
als praxisorientiertes Security Control. Erstelle eine verständliche, umsetzbare Formulierung.
Antworte NUR mit validem JSON. Bei mehreren Controls antworte mit einem JSON-Array."""
+# Shared applicability prompt block — appended to all generation prompts (v3)
+APPLICABILITY_PROMPT = """- applicable_industries: Liste der Branchen fuer die dieses Control relevant ist.
+ Verwende ["all"] wenn der Control branchenuebergreifend gilt.
+ Moegliche Werte: "all", "Technologie / IT", "IT Dienstleistungen", "E-Commerce / Handel",
+ "Finanzdienstleistungen", "Versicherungen", "Gesundheitswesen", "Pharma", "Bildung",
+ "Beratung / Consulting", "Marketing / Agentur", "Produktion / Industrie",
+ "Logistik / Transport", "Immobilien", "Bau", "Energie", "Automobil",
+ "Luft- / Raumfahrt", "Maschinenbau", "Anlagenbau", "Automatisierung", "Robotik",
+ "Messtechnik", "Agrar", "Chemie", "Minen / Bergbau", "Telekommunikation",
+ "Medien / Verlage", "Gastronomie / Hotellerie", "Recht / Kanzlei",
+ "Oeffentlicher Dienst", "Verteidigung / Ruestung", "Wasser- / Abwasserwirtschaft",
+ "Lebensmittel", "Digitale Infrastruktur", "Weltraum", "Post / Kurierdienste",
+ "Abfallwirtschaft", "Forschung"
+ Beispiel: TKG-Controls → ["Telekommunikation"]
+ Beispiel: DSGVO Art. 32 → ["all"]
+ Beispiel: NIS2 Art. 21 → ["Energie", "Gesundheitswesen", "Digitale Infrastruktur", "Logistik / Transport", ...]
+- applicable_company_size: Ab welcher Unternehmensgroesse gilt dieses Control?
+ Verwende ["all"] wenn keine Groessenbeschraenkung.
+ Moegliche Werte: "all", "micro", "small", "medium", "large", "enterprise"
+ Groessen: micro (<10 MA), small (10-49), medium (50-249), large (250-999), enterprise (1000+)
+ Beispiel: NIS2 Art. 21 → ["medium", "large", "enterprise"]
+ Beispiel: DSGVO Art. 5 → ["all"]
+- scope_conditions: Optionale Bedingungen aus dem Compliance-Scope des Unternehmens.
+ null wenn keine besonderen Bedingungen. Sonst JSON-Objekt:
+ {"requires_any": ["signal1", "signal2"], "description": "Kurze Erklaerung wann relevant"}
+ Moegliche Signale: "uses_ai", "third_country_transfer", "processes_health_data",
+ "processes_minors_data", "automated_decisions", "employee_monitoring",
+ "video_surveillance", "financial_data", "is_kritis_operator", "payment_services"
+ Beispiel AI Act: {"requires_any": ["uses_ai"], "description": "Nur bei KI-Einsatz relevant"}
+ Beispiel SCC: {"requires_any": ["third_country_transfer"], "description": "Nur bei Drittlandtransfer"}
+ Beispiel DSGVO Art. 32 (allgemein): null"""
+
class ControlGeneratorPipeline:
"""Orchestrates the 7-stage control generation pipeline."""
@@ -973,6 +1016,7 @@ Gib JSON zurück mit diesen Feldern:
- target_audience: Liste der Zielgruppen (z.B. "unternehmen", "behoerden", "entwickler", "datenschutzbeauftragte", "geschaeftsfuehrung", "it-abteilung", "rechtsabteilung", "compliance-officer", "personalwesen", "einkauf", "produktion", "gesundheitswesen", "finanzwesen", "oeffentlicher_dienst")
- source_article: Artikel-/Paragraphen-Referenz aus dem Text (z.B. "Artikel 10", "§ 42"). Leer lassen wenn nicht erkennbar.
- source_paragraph: Absatz-Referenz aus dem Text (z.B. "Absatz 5", "Nr. 2"). Leer lassen wenn nicht erkennbar.
+{APPLICABILITY_PROMPT}
Text: {chunk.text[:2000]}
Quelle: {chunk.regulation_name} ({chunk.regulation_code}), {chunk.article}"""
@@ -995,6 +1039,7 @@ Quelle: {chunk.regulation_name} ({chunk.regulation_code}), {chunk.article}"""
"article": effective_article,
"paragraph": effective_paragraph,
"license": license_info.get("license", ""),
+ "source_type": license_info.get("source_type", "law"),
"url": chunk.source_url or "",
}
control.customer_visible = True
@@ -1036,6 +1081,7 @@ Gib JSON zurück mit diesen Feldern:
- target_audience: Liste der Zielgruppen (z.B. "unternehmen", "behoerden", "entwickler", "datenschutzbeauftragte", "geschaeftsfuehrung", "it-abteilung", "rechtsabteilung", "compliance-officer", "personalwesen", "einkauf", "produktion", "gesundheitswesen", "finanzwesen", "oeffentlicher_dienst")
- source_article: Artikel-/Paragraphen-Referenz aus dem Text (z.B. "Artikel 10", "§ 42"). Leer lassen wenn nicht erkennbar.
- source_paragraph: Absatz-Referenz aus dem Text (z.B. "Absatz 5", "Nr. 2"). Leer lassen wenn nicht erkennbar.
+{APPLICABILITY_PROMPT}
Text: {chunk.text[:2000]}
Quelle: {chunk.regulation_name}, {chunk.article}"""
@@ -1059,6 +1105,7 @@ Quelle: {chunk.regulation_name}, {chunk.article}"""
"paragraph": effective_paragraph,
"license": license_info.get("license", ""),
"license_notice": attribution,
+ "source_type": license_info.get("source_type", "standard"),
"url": chunk.source_url or "",
}
control.customer_visible = True
@@ -1101,7 +1148,8 @@ Gib JSON zurück mit diesen Feldern:
- tags: Liste von Tags (eigene Begriffe)
- domain: Fachgebiet als Kuerzel (AUTH=Authentifizierung, CRYP=Kryptographie, NET=Netzwerk, DATA=Datenschutz, LOG=Logging, ACC=Zugriffskontrolle, SEC=IT-Sicherheit, INC=Vorfallmanagement, AI=KI, COMP=Compliance, GOV=Behoerden/Verwaltung, LAB=Arbeitsrecht, FIN=Finanzregulierung, TRD=Gewerbe/Handelsrecht, ENV=Umwelt, HLT=Gesundheit)
- category: Inhaltliche Kategorie — MUSS zum domain passen. Moegliche Werte: {CATEGORY_LIST_STR}
-- target_audience: Liste der Zielgruppen (z.B. "unternehmen", "behoerden", "entwickler", "datenschutzbeauftragte", "geschaeftsfuehrung", "it-abteilung", "rechtsabteilung", "compliance-officer", "personalwesen", "oeffentlicher_dienst")"""
+- target_audience: Liste der Zielgruppen (z.B. "unternehmen", "behoerden", "entwickler", "datenschutzbeauftragte", "geschaeftsfuehrung", "it-abteilung", "rechtsabteilung", "compliance-officer", "personalwesen", "oeffentlicher_dienst")
+{APPLICABILITY_PROMPT}"""
raw = await _llm_chat(prompt, REFORM_SYSTEM_PROMPT)
data = _parse_llm_json(raw)
@@ -1186,6 +1234,7 @@ Gib ein JSON-Array zurueck mit GENAU {len(chunks)} Elementen. Fuer Chunks ohne A
- target_audience: Liste der Zielgruppen fuer die dieses Control relevant ist. Moegliche Werte: "unternehmen", "behoerden", "entwickler", "datenschutzbeauftragte", "geschaeftsfuehrung", "it-abteilung", "rechtsabteilung", "compliance-officer", "personalwesen", "einkauf", "produktion", "vertrieb", "gesundheitswesen", "finanzwesen", "oeffentlicher_dienst"
- source_article: Artikel-/Paragraphen-Referenz aus dem Text extrahieren (z.B. "Artikel 10", "Art. 5", "§ 42", "Section 3"). Leer lassen wenn nicht erkennbar.
- source_paragraph: Absatz-Referenz aus dem Text extrahieren (z.B. "Absatz 5", "Abs. 3", "Nr. 2", "(1)"). Leer lassen wenn nicht erkennbar.
+{APPLICABILITY_PROMPT}
{joined}"""
@@ -1228,6 +1277,7 @@ Gib ein JSON-Array zurueck mit GENAU {len(chunks)} Elementen. Fuer Chunks ohne A
"paragraph": effective_paragraph,
"license": lic.get("license", ""),
"license_notice": lic.get("attribution", ""),
+ "source_type": lic.get("source_type", "law"),
"url": chunk.source_url or "",
}
control.customer_visible = True
@@ -1289,6 +1339,7 @@ Gib ein JSON-Array zurueck mit GENAU {len(chunks)} Elementen. Fuer Aspekte ohne
- domain: Fachgebiet als Kuerzel (AUTH=Authentifizierung, CRYP=Kryptographie, NET=Netzwerk, DATA=Datenschutz, LOG=Logging, ACC=Zugriffskontrolle, SEC=IT-Sicherheit, INC=Vorfallmanagement, AI=KI, COMP=Compliance, GOV=Behoerden/Verwaltung, LAB=Arbeitsrecht, FIN=Finanzregulierung, TRD=Gewerbe/Handelsrecht, ENV=Umwelt, HLT=Gesundheit)
- category: Inhaltliche Kategorie — MUSS zum domain passen. Moegliche Werte: {CATEGORY_LIST_STR}
- target_audience: Liste der Zielgruppen (z.B. "unternehmen", "behoerden", "entwickler", "datenschutzbeauftragte", "geschaeftsfuehrung", "it-abteilung", "rechtsabteilung", "compliance-officer", "personalwesen", "einkauf", "produktion", "gesundheitswesen", "finanzwesen", "oeffentlicher_dienst")
+{APPLICABILITY_PROMPT}
{joined}"""
@@ -1522,6 +1573,29 @@ Gib ein JSON-Array zurueck mit GENAU {len(chunks)} Elementen. Fuer Aspekte ohne
if not isinstance(target_audience, list):
target_audience = None
+ # Parse applicability fields (v3)
+ applicable_industries = data.get("applicable_industries")
+ if isinstance(applicable_industries, str):
+ applicable_industries = [applicable_industries]
+ if not isinstance(applicable_industries, list):
+ applicable_industries = None
+
+ applicable_company_size = data.get("applicable_company_size")
+ if isinstance(applicable_company_size, str):
+ applicable_company_size = [applicable_company_size]
+ if not isinstance(applicable_company_size, list):
+ applicable_company_size = None
+ # Validate size values
+ valid_sizes = {"all", "micro", "small", "medium", "large", "enterprise"}
+ if applicable_company_size:
+ applicable_company_size = [s for s in applicable_company_size if s in valid_sizes]
+ if not applicable_company_size:
+ applicable_company_size = None
+
+ scope_conditions = data.get("scope_conditions")
+ if not isinstance(scope_conditions, dict):
+ scope_conditions = None
+
control = GeneratedControl(
title=str(data.get("title", "Untitled Control"))[:255],
objective=str(data.get("objective", "")),
@@ -1536,6 +1610,9 @@ Gib ein JSON-Array zurueck mit GENAU {len(chunks)} Elementen. Fuer Aspekte ohne
tags=tags[:20],
target_audience=target_audience,
category=category,
+ applicable_industries=applicable_industries,
+ applicable_company_size=applicable_company_size,
+ scope_conditions=scope_conditions,
)
# Store effective domain for later control_id generation
control.generation_metadata["_effective_domain"] = domain
@@ -1738,7 +1815,8 @@ Kategorien: {CATEGORY_LIST_STR}"""
license_rule, source_original_text, source_citation,
customer_visible, generation_metadata,
verification_method, category, generation_strategy,
- target_audience, pipeline_version
+ target_audience, pipeline_version,
+ applicable_industries, applicable_company_size, scope_conditions
) VALUES (
:framework_id, :control_id, :title, :objective, :rationale,
:scope, :requirements, :test_procedure, :evidence,
@@ -1747,7 +1825,8 @@ Kategorien: {CATEGORY_LIST_STR}"""
:license_rule, :source_original_text, :source_citation,
:customer_visible, :generation_metadata,
:verification_method, :category, :generation_strategy,
- :target_audience, :pipeline_version
+ :target_audience, :pipeline_version,
+ :applicable_industries, :applicable_company_size, :scope_conditions
)
ON CONFLICT (framework_id, control_id) DO NOTHING
RETURNING id
@@ -1778,6 +1857,9 @@ Kategorien: {CATEGORY_LIST_STR}"""
"generation_strategy": control.generation_strategy,
"target_audience": json.dumps(control.target_audience) if control.target_audience else None,
"pipeline_version": PIPELINE_VERSION,
+ "applicable_industries": json.dumps(control.applicable_industries) if control.applicable_industries else None,
+ "applicable_company_size": json.dumps(control.applicable_company_size) if control.applicable_company_size else None,
+ "scope_conditions": json.dumps(control.scope_conditions) if control.scope_conditions else None,
},
)
self.db.commit()
diff --git a/backend-compliance/migrations/063_control_applicability.sql b/backend-compliance/migrations/063_control_applicability.sql
new file mode 100644
index 0000000..9dc3aa7
--- /dev/null
+++ b/backend-compliance/migrations/063_control_applicability.sql
@@ -0,0 +1,23 @@
+-- Migration 063: Scoped Control Applicability
+--
+-- Adds 3 new JSONB columns to canonical_controls for filtering controls
+-- based on customer industry, company size, and compliance scope.
+--
+-- v3 pipeline generates these fields automatically via LLM.
+-- Old controls (v1/v2) will be backfilled separately.
+
+ALTER TABLE canonical_controls
+ ADD COLUMN IF NOT EXISTS applicable_industries JSONB DEFAULT NULL,
+ ADD COLUMN IF NOT EXISTS applicable_company_size JSONB DEFAULT NULL,
+ ADD COLUMN IF NOT EXISTS scope_conditions JSONB DEFAULT NULL;
+
+-- GIN index for JSONB containment queries (e.g. applicable_industries @> '"Telekommunikation"')
+CREATE INDEX IF NOT EXISTS idx_cc_applicable_industries
+ ON canonical_controls USING gin (applicable_industries);
+
+CREATE INDEX IF NOT EXISTS idx_cc_applicable_company_size
+ ON canonical_controls USING gin (applicable_company_size);
+
+COMMENT ON COLUMN canonical_controls.applicable_industries IS 'Industries this control applies to, e.g. ["all"] or ["Telekommunikation", "Energie"]. NULL = not yet classified.';
+COMMENT ON COLUMN canonical_controls.applicable_company_size IS 'Company sizes this control applies to, e.g. ["all"] or ["medium", "large", "enterprise"]. NULL = not yet classified.';
+COMMENT ON COLUMN canonical_controls.scope_conditions IS 'Optional scope conditions, e.g. {"requires_any": ["uses_ai"], "description": "..."}. NULL = no conditions.';
diff --git a/backend-compliance/tests/test_control_generator.py b/backend-compliance/tests/test_control_generator.py
index fc812f0..d55eb90 100644
--- a/backend-compliance/tests/test_control_generator.py
+++ b/backend-compliance/tests/test_control_generator.py
@@ -31,53 +31,69 @@ class TestLicenseMapping:
info = _classify_regulation("eu_2016_679")
assert info["rule"] == 1
assert info["name"] == "DSGVO"
+ assert info["source_type"] == "law"
def test_rule1_nist(self):
info = _classify_regulation("nist_sp_800_53")
assert info["rule"] == 1
assert "NIST" in info["name"]
+ assert info["source_type"] == "standard"
def test_rule1_german_law(self):
info = _classify_regulation("bdsg")
assert info["rule"] == 1
assert info["name"] == "BDSG"
+ assert info["source_type"] == "law"
def test_rule2_owasp(self):
info = _classify_regulation("owasp_asvs")
assert info["rule"] == 2
assert "OWASP" in info["name"]
assert "attribution" in info
+ assert info["source_type"] == "standard"
def test_rule2_enisa_prefix(self):
info = _classify_regulation("enisa_iot_security")
assert info["rule"] == 2
assert "ENISA" in info["name"]
+ assert info["source_type"] == "standard"
def test_rule3_bsi_prefix(self):
info = _classify_regulation("bsi_tr03161")
assert info["rule"] == 3
assert info["name"] == "INTERNAL_ONLY"
+ assert info["source_type"] == "restricted"
def test_rule3_iso_prefix(self):
info = _classify_regulation("iso_27001")
assert info["rule"] == 3
+ assert info["source_type"] == "restricted"
def test_rule3_etsi_prefix(self):
info = _classify_regulation("etsi_en_303_645")
assert info["rule"] == 3
+ assert info["source_type"] == "restricted"
def test_unknown_defaults_to_rule3(self):
info = _classify_regulation("some_unknown_source")
assert info["rule"] == 3
+ assert info["source_type"] == "restricted"
def test_case_insensitive(self):
info = _classify_regulation("EU_2016_679")
assert info["rule"] == 1
+ assert info["source_type"] == "law"
def test_all_mapped_regulations_have_valid_rules(self):
for code, info in REGULATION_LICENSE_MAP.items():
assert info["rule"] in (1, 2, 3), f"{code} has invalid rule {info['rule']}"
+ def test_all_mapped_regulations_have_source_type(self):
+ valid_types = {"law", "guideline", "standard", "restricted"}
+ for code, info in REGULATION_LICENSE_MAP.items():
+ assert "source_type" in info, f"{code} missing source_type"
+ assert info["source_type"] in valid_types, f"{code} has invalid source_type {info['source_type']}"
+
def test_rule3_never_exposes_names(self):
for prefix in ["bsi_test", "iso_test", "etsi_test"]:
info = _classify_regulation(prefix)
@@ -1125,8 +1141,8 @@ class TestRegulationFilter:
class TestPipelineVersion:
"""Tests for pipeline_version propagation in DB writes and null handling."""
- def test_pipeline_version_constant_is_2(self):
- assert PIPELINE_VERSION == 2
+ def test_pipeline_version_constant_is_3(self):
+ assert PIPELINE_VERSION == 3
def test_store_control_includes_pipeline_version(self):
"""_store_control must pass pipeline_version=PIPELINE_VERSION to the INSERT."""
@@ -1396,3 +1412,259 @@ class TestRecitalDetection:
assert result is not None
assert "126" in result["recital_numbers"]
assert "127" in result["recital_numbers"]
+
+
+# =============================================================================
+# Source Type Classification Tests
+# =============================================================================
+
+class TestSourceTypeClassification:
+ """Tests that source_type correctly distinguishes law vs guideline vs standard vs restricted."""
+
+ def test_eu_regulations_are_law(self):
+ """All EU regulations (Verordnungen/Richtlinien) must be classified as 'law'."""
+ eu_laws = ["eu_2016_679", "eu_2024_1689", "eu_2022_2555", "eu_2024_2847",
+ "eucsa", "dataact", "dora", "eaa"]
+ for code in eu_laws:
+ info = _classify_regulation(code)
+ assert info["source_type"] == "law", f"{code} should be law, got {info['source_type']}"
+
+ def test_german_laws_are_law(self):
+ """German national laws must be classified as 'law'."""
+ de_laws = ["bdsg", "ttdsg", "tkg", "bgb_komplett", "hgb", "gewo"]
+ for code in de_laws:
+ info = _classify_regulation(code)
+ assert info["source_type"] == "law", f"{code} should be law, got {info['source_type']}"
+
+ def test_austrian_laws_are_law(self):
+ """Austrian laws must be classified as 'law'."""
+ at_laws = ["at_dsg", "at_abgb", "at_ecg", "at_tkg"]
+ for code in at_laws:
+ info = _classify_regulation(code)
+ assert info["source_type"] == "law", f"{code} should be law, got {info['source_type']}"
+
+ def test_nist_is_standard_not_law(self):
+ """NIST frameworks are US standards, NOT EU law — must be 'standard'."""
+ nist_codes = ["nist_sp_800_53", "nist_csf_2_0", "nist_ai_rmf", "nistir_8259a"]
+ for code in nist_codes:
+ info = _classify_regulation(code)
+ assert info["source_type"] == "standard", f"{code} should be standard, got {info['source_type']}"
+
+ def test_cisa_is_standard(self):
+ info = _classify_regulation("cisa_secure_by_design")
+ assert info["source_type"] == "standard"
+
+ def test_owasp_is_standard(self):
+ """OWASP frameworks are voluntary standards, not law."""
+ owasp_codes = ["owasp_asvs", "owasp_top10", "owasp_samm"]
+ for code in owasp_codes:
+ info = _classify_regulation(code)
+ assert info["source_type"] == "standard", f"{code} should be standard, got {info['source_type']}"
+
+ def test_enisa_prefix_is_standard(self):
+ info = _classify_regulation("enisa_threat_landscape")
+ assert info["source_type"] == "standard"
+
+ def test_oecd_is_standard(self):
+ info = _classify_regulation("oecd_ai_principles")
+ assert info["source_type"] == "standard"
+
+ def test_edpb_is_guideline(self):
+ """EDPB guidelines are authoritative but non-binding soft law."""
+ edpb_codes = ["edpb_01_2020", "edpb_dpbd_04_2019", "edpb_legitimate_interest"]
+ for code in edpb_codes:
+ info = _classify_regulation(code)
+ assert info["source_type"] == "guideline", f"{code} should be guideline, got {info['source_type']}"
+
+ def test_wp29_is_guideline(self):
+ """WP29 (pre-EDPB) guidelines are soft law."""
+ for code in ["wp244_profiling", "wp260_transparency"]:
+ info = _classify_regulation(code)
+ assert info["source_type"] == "guideline", f"{code} should be guideline, got {info['source_type']}"
+
+ def test_blue_guide_is_guideline(self):
+ info = _classify_regulation("eu_blue_guide_2022")
+ assert info["source_type"] == "guideline"
+
+ def test_bsi_is_restricted(self):
+ info = _classify_regulation("bsi_grundschutz")
+ assert info["source_type"] == "restricted"
+
+ def test_iso_is_restricted(self):
+ info = _classify_regulation("iso_27001")
+ assert info["source_type"] == "restricted"
+
+ def test_etsi_is_restricted(self):
+ info = _classify_regulation("etsi_en_303_645")
+ assert info["source_type"] == "restricted"
+
+ def test_unknown_is_restricted(self):
+ info = _classify_regulation("totally_unknown")
+ assert info["source_type"] == "restricted"
+
+ def test_source_type_and_license_rule_are_independent(self):
+ """source_type classifies legal authority; license_rule classifies copyright.
+ NIST is Rule 1 (public domain, free use) but source_type='standard' (not a law)."""
+ nist = _classify_regulation("nist_sp_800_53")
+ assert nist["rule"] == 1 # free use (copyright)
+ assert nist["source_type"] == "standard" # NOT law (legal authority)
+
+ edpb = _classify_regulation("edpb_01_2020")
+ assert edpb["rule"] == 1 # free use (public authority)
+ assert edpb["source_type"] == "guideline" # NOT law (soft law)
+
+
+# =============================================================================
+# Scoped Control Applicability Tests (v3 Pipeline)
+# =============================================================================
+
+class TestApplicabilityFields:
+ """Tests for applicable_industries, applicable_company_size, scope_conditions parsing."""
+
+ def _make_pipeline(self):
+ """Create a pipeline with mocked DB."""
+ db = MagicMock()
+ pipeline = ControlGeneratorPipeline(db=db, rag_client=MagicMock())
+ return pipeline
+
+ def test_all_industries_parsed(self):
+ pipeline = self._make_pipeline()
+ data = {
+ "title": "Test",
+ "objective": "Test objective",
+ "applicable_industries": ["all"],
+ "applicable_company_size": ["all"],
+ "scope_conditions": None,
+ }
+ control = pipeline._build_control_from_json(data, "SEC")
+ assert control.applicable_industries == ["all"]
+ assert control.applicable_company_size == ["all"]
+ assert control.scope_conditions is None
+
+ def test_specific_industries_parsed(self):
+ pipeline = self._make_pipeline()
+ data = {
+ "title": "TKG Control",
+ "objective": "Telekommunikation",
+ "applicable_industries": ["Telekommunikation", "Energie"],
+ "applicable_company_size": ["medium", "large", "enterprise"],
+ "scope_conditions": None,
+ }
+ control = pipeline._build_control_from_json(data, "INC")
+ assert control.applicable_industries == ["Telekommunikation", "Energie"]
+ assert control.applicable_company_size == ["medium", "large", "enterprise"]
+
+ def test_scope_conditions_parsed(self):
+ pipeline = self._make_pipeline()
+ data = {
+ "title": "AI Act Control",
+ "objective": "KI-Risikomanagement",
+ "applicable_industries": ["all"],
+ "applicable_company_size": ["all"],
+ "scope_conditions": {
+ "requires_any": ["uses_ai"],
+ "description": "Nur bei KI-Einsatz relevant",
+ },
+ }
+ control = pipeline._build_control_from_json(data, "AI")
+ assert control.scope_conditions is not None
+ assert control.scope_conditions["requires_any"] == ["uses_ai"]
+ assert "KI" in control.scope_conditions["description"]
+
+ def test_missing_applicability_fields_are_none(self):
+ """Old-style LLM response without applicability fields."""
+ pipeline = self._make_pipeline()
+ data = {
+ "title": "Legacy Control",
+ "objective": "Test",
+ }
+ control = pipeline._build_control_from_json(data, "SEC")
+ assert control.applicable_industries is None
+ assert control.applicable_company_size is None
+ assert control.scope_conditions is None
+
+ def test_string_industry_converted_to_list(self):
+ """LLM sometimes returns a string instead of list."""
+ pipeline = self._make_pipeline()
+ data = {
+ "title": "Test",
+ "objective": "Test",
+ "applicable_industries": "Telekommunikation",
+ "applicable_company_size": "all",
+ }
+ control = pipeline._build_control_from_json(data, "SEC")
+ assert control.applicable_industries == ["Telekommunikation"]
+ assert control.applicable_company_size == ["all"]
+
+ def test_invalid_company_size_filtered(self):
+ """Invalid size values should be filtered out."""
+ pipeline = self._make_pipeline()
+ data = {
+ "title": "Test",
+ "objective": "Test",
+ "applicable_company_size": ["medium", "huge", "large"],
+ }
+ control = pipeline._build_control_from_json(data, "SEC")
+ assert control.applicable_company_size == ["medium", "large"]
+
+ def test_all_invalid_sizes_results_in_none(self):
+ pipeline = self._make_pipeline()
+ data = {
+ "title": "Test",
+ "objective": "Test",
+ "applicable_company_size": ["huge", "tiny"],
+ }
+ control = pipeline._build_control_from_json(data, "SEC")
+ assert control.applicable_company_size is None
+
+ def test_scope_conditions_non_dict_ignored(self):
+ """If LLM returns a string for scope_conditions, ignore it."""
+ pipeline = self._make_pipeline()
+ data = {
+ "title": "Test",
+ "objective": "Test",
+ "scope_conditions": "uses_ai",
+ }
+ control = pipeline._build_control_from_json(data, "SEC")
+ assert control.scope_conditions is None
+
+ def test_multiple_scope_signals(self):
+ pipeline = self._make_pipeline()
+ data = {
+ "title": "EHDS Control",
+ "objective": "Gesundheitsdaten",
+ "applicable_industries": ["Gesundheitswesen", "Pharma"],
+ "applicable_company_size": ["all"],
+ "scope_conditions": {
+ "requires_any": ["processes_health_data", "uses_ai"],
+ "description": "Gesundheitsdaten mit KI-Verarbeitung",
+ },
+ }
+ control = pipeline._build_control_from_json(data, "HLT")
+ assert len(control.scope_conditions["requires_any"]) == 2
+ assert "processes_health_data" in control.scope_conditions["requires_any"]
+
+ def test_pipeline_version_is_3(self):
+ """v3 pipeline includes applicability fields."""
+ assert PIPELINE_VERSION == 3
+
+ def test_generated_control_dataclass_has_fields(self):
+ """Verify the dataclass has the new fields with correct defaults."""
+ ctrl = GeneratedControl()
+ assert ctrl.applicable_industries is None
+ assert ctrl.applicable_company_size is None
+ assert ctrl.scope_conditions is None
+
+ def test_applicability_in_generation_metadata_not_leaked(self):
+ """Applicability fields should be top-level, not in generation_metadata."""
+ pipeline = self._make_pipeline()
+ data = {
+ "title": "Test",
+ "objective": "Test",
+ "applicable_industries": ["all"],
+ "applicable_company_size": ["all"],
+ "scope_conditions": None,
+ }
+ control = pipeline._build_control_from_json(data, "SEC")
+ assert "applicable_industries" not in control.generation_metadata
+ assert "applicable_company_size" not in control.generation_metadata
diff --git a/docs-src/development/testing.md b/docs-src/development/testing.md
index ea7dc6b..a80d537 100644
--- a/docs-src/development/testing.md
+++ b/docs-src/development/testing.md
@@ -214,13 +214,13 @@ Wenn du z.B. eine neue `GetUserStats()` Funktion im Go Service hinzufuegst:
## Modul-spezifische Tests
-### Canonical Control Generator (81+ Tests)
+### Canonical Control Generator (98+ Tests)
Die Control Library hat eine umfangreiche Test-Suite ueber 6 Dateien.
Siehe [Canonical Control Library — Tests](../services/sdk-modules/canonical-control-library.md#tests) und [Control Generator Pipeline](../services/sdk-modules/control-generator-pipeline.md) fuer Details.
```bash
-# Alle Generator-Tests (81 Tests in 12 Klassen)
+# Alle Generator-Tests (98 Tests in 13 Klassen)
cd backend-compliance && pytest -v tests/test_control_generator.py
# Similarity Detector Tests
@@ -242,7 +242,7 @@ cd backend-compliance && pytest -v tests/test_validate_controls.py
| Klasse | Tests | Prueft |
|--------|-------|--------|
-| `TestLicenseMapping` | 12 | Lizenz-Klassifikation (Rule 1/2/3), Case-Insensitivitaet |
+| `TestLicenseMapping` | 13 | Lizenz-Klassifikation (Rule 1/2/3), Case-Insensitivitaet, source_type |
| `TestDomainDetection` | 5 | Keyword-basierte Domain-Erkennung (AUTH, CRYP, NET, DATA) |
| `TestJsonParsing` | 4 | JSON-Parser fuer LLM-Responses (Markdown-Fencing, Preamble) |
| `TestGeneratedControlRules` | 3 | Rule-spezifische Felder (original_text, citation, source_info) |
@@ -254,3 +254,4 @@ cd backend-compliance && pytest -v tests/test_validate_controls.py
| `TestRegulationFilter` | 5 | regulation_filter Prefix-Matching, leere regulation_codes |
| `TestPipelineVersion` | 5 | pipeline_version=2 in DB-Writes, null-Handling in Structure/Reform |
| `TestRecitalDetection` | 10 | Erwaegungsgrund-Erkennung in Quelltexten (Regex, Phrasen, Kombiniert) |
+| `TestSourceTypeClassification` | 16 | law/guideline/standard/restricted Klassifizierung aller Quellentypen |
diff --git a/docs-src/services/sdk-modules/canonical-control-library.md b/docs-src/services/sdk-modules/canonical-control-library.md
index 7fe2491..2aadfa6 100644
--- a/docs-src/services/sdk-modules/canonical-control-library.md
+++ b/docs-src/services/sdk-modules/canonical-control-library.md
@@ -98,6 +98,7 @@ erDiagram
varchar generation_strategy
smallint pipeline_version
integer license_rule
+ jsonb source_citation
jsonb open_anchors
}
canonical_control_mappings {
@@ -316,7 +317,7 @@ Der Validator (`scripts/validate-controls.py`) prueft bei jedem Commit:
- Ziel, Begruendung, Geltungsbereich
- Anforderungen, Pruefverfahren, Nachweise
-- **Gesetzliche Grundlage** (blaue Box): source_citation mit Artikel, Paragraph, Lizenz, Link
+- **Quellennachweis** (dynamische Farbe): `source_type`-basiert — blau fuer Gesetze, indigo fuer Leitlinien, teal fuer Standards
- **Open-Source-Referenzen** (gruener Kasten): Verlinkte Open Anchors
- Generierungsdetails: processing_path, similarity_status
- Tags, Risk Score, Implementation Effort
@@ -613,15 +614,19 @@ Bei der Generierung werden automatisch zugewiesen:
### Architektur-Entscheidung: Gesetzesverweise
-Controls leiten sich aus zwei Quellen ab:
+Controls leiten sich aus vier Quellentypen ab (Feld `source_citation.source_type`):
-1. **Direkte gesetzliche Pflichten (Rule 1):** z.B. DSGVO Art. 32 erzwingt "technische und organisatorische Massnahmen". Diese Controls haben `source_citation` mit exakter Gesetzesreferenz und Originaltext.
+| source_type | Beschreibung | Beispiele | Frontend-Darstellung |
+|-------------|-------------|-----------|---------------------|
+| `law` | Bindendes EU/DE/AT-Recht | DSGVO, AI Act, BDSG, NIS2 | Blaue Box "Gesetzliche Grundlage" + Badge "Direkte gesetzliche Pflicht" |
+| `guideline` | Behoerdliche Leitlinien (Soft Law) | EDPB, WP29, Blue Guide | Indigo Box "Behoerdliche Leitlinie" + Badge "Aufsichtsbehoerdliche Empfehlung" |
+| `standard` | Freiwillige Standards/Frameworks | NIST, OWASP, ENISA, CISA, OECD | Teal Box "Standard / Best Practice" + Badge "Freiwilliger Standard" |
+| `restricted` | Geschuetzte Normen (Rule 3) | BSI, ISO, ETSI | Amber Box "Abgeleitet aus regulatorischen Anforderungen" (kein Originaltext) |
-2. **Implizite Umsetzung ueber Best Practices (Rule 2/3):** z.B. OWASP ASVS V2.7 fordert MFA — das ist keine gesetzliche Pflicht, aber eine Best Practice um NIS2 Art. 21 oder DSGVO Art. 32 zu erfuellen. Diese Controls haben Open-Source-Referenzen (Anchors).
-
-**Im Frontend:**
-- Rule 1/2 Controls zeigen eine blaue "Gesetzliche Grundlage" Box mit Gesetz, Artikel und Link
-- Rule 3 Controls zeigen einen Hinweis dass sie implizit Gesetze umsetzen, mit Verweis auf die Referenzen
+!!! warning "source_type vs license_rule"
+ `source_type` klassifiziert die **rechtliche Verbindlichkeit** (Ist es ein Gesetz?).
+ `license_rule` klassifiziert das **Urheberrecht** (Darf man den Text zitieren?).
+ Beispiel: NIST ist Rule 1 (Public Domain = freie Nutzung) aber `source_type = "standard"` (kein EU-Gesetz).
### API
@@ -816,8 +821,8 @@ curl -X POST https://api-dev.breakpilot.ai/api/compliance/v1/canonical/controls
| `backend-compliance/tests/test_canonical_control_routes.py` | Python | 14 Tests | REST API Endpoints |
| `backend-compliance/tests/test_license_gate.py` | Python | 12 Tests | Lizenz-Klassifikation |
| `backend-compliance/tests/test_validate_controls.py` | Python | 14 Tests | CI/CD Validator |
-| `backend-compliance/tests/test_control_generator.py` | Python | 81 Tests | Pipeline, Batch, Lizenzregeln, QA, Recital |
-| **Gesamt** | | **149+ Tests** |
+| `backend-compliance/tests/test_control_generator.py` | Python | 98 Tests | Pipeline, Batch, Lizenzregeln, QA, Recital, Source-Type |
+| **Gesamt** | | **166+ Tests** |
### Control Generator Tests (test_control_generator.py)
@@ -825,7 +830,7 @@ Die Generator-Tests decken folgende Bereiche ab:
| Klasse | Tests | Prueft |
|--------|-------|--------|
-| `TestLicenseMapping` | 12 | Lizenz-Klassifikation (Rule 1/2/3), Case-Insensitivitaet |
+| `TestLicenseMapping` | 13 | Lizenz-Klassifikation (Rule 1/2/3), Case-Insensitivitaet, source_type |
| `TestDomainDetection` | 5 | Keyword-basierte Domain-Erkennung (AUTH, CRYP, NET, DATA) |
| `TestJsonParsing` | 4 | JSON-Parser fuer LLM-Responses (Markdown-Fencing, Preamble) |
| `TestGeneratedControlRules` | 3 | Rule-spezifische Felder (original_text, citation, source_info) |
@@ -837,6 +842,7 @@ Die Generator-Tests decken folgende Bereiche ab:
| `TestRegulationFilter` | 5 | regulation_filter Prefix-Matching, leere regulation_codes |
| `TestPipelineVersion` | 5 | pipeline_version=2 in DB-Writes, null-Handling in Structure/Reform |
| `TestRecitalDetection` | 10 | Erwaegungsgrund-Erkennung in Quelltexten (Regex, Phrasen, Kombiniert) |
+| `TestSourceTypeClassification` | 16 | law/guideline/standard/restricted Klassifizierung aller Quellentypen |
---
diff --git a/docs-src/services/sdk-modules/control-generator-pipeline.md b/docs-src/services/sdk-modules/control-generator-pipeline.md
index 8fc70a2..e9111ff 100644
--- a/docs-src/services/sdk-modules/control-generator-pipeline.md
+++ b/docs-src/services/sdk-modules/control-generator-pipeline.md
@@ -563,7 +563,7 @@ curl -X POST https://api-dev.breakpilot.ai/api/compliance/v1/canonical/generate/
| `backend-compliance/migrations/046_control_generator.sql` | Job-Tracking, Chunk-Tracking Tabellen |
| `backend-compliance/migrations/048_processing_path_expand.sql` | Erweiterte Processing-Path-Werte |
| `backend-compliance/migrations/062_pipeline_version.sql` | `pipeline_version` Spalte |
-| `backend-compliance/tests/test_control_generator.py` | 81+ Tests (Lizenz, Domain, Batch, Pipeline, Recital) |
+| `backend-compliance/tests/test_control_generator.py` | 98+ Tests (Lizenz, Domain, Batch, Pipeline, Recital, Source-Type) |
---