feat(pipeline): v3 — scoped control applicability + source_type classification
Some checks failed
CI/CD / go-lint (push) Has been skipped
CI/CD / python-lint (push) Has been skipped
CI/CD / nodejs-lint (push) Has been skipped
CI/CD / test-go-ai-compliance (push) Failing after 36s
CI/CD / test-python-backend-compliance (push) Successful in 36s
CI/CD / test-python-document-crawler (push) Successful in 27s
CI/CD / test-python-dsms-gateway (push) Successful in 18s
CI/CD / validate-canonical-controls (push) Successful in 11s
CI/CD / Deploy (push) Has been skipped
Some checks failed
CI/CD / go-lint (push) Has been skipped
CI/CD / python-lint (push) Has been skipped
CI/CD / nodejs-lint (push) Has been skipped
CI/CD / test-go-ai-compliance (push) Failing after 36s
CI/CD / test-python-backend-compliance (push) Successful in 36s
CI/CD / test-python-document-crawler (push) Successful in 27s
CI/CD / test-python-dsms-gateway (push) Successful in 18s
CI/CD / validate-canonical-controls (push) Successful in 11s
CI/CD / Deploy (push) Has been skipped
Phase 4: source_type (law/guideline/standard/restricted) on source_citation - NIST/OWASP/ENISA correctly shown as "Standard" instead of "Gesetzliche Grundlage" - Dynamic frontend labels based on source_type - Backfill endpoint POST /v1/canonical/generate/backfill-source-type Phase v3: Scoped Control Applicability - 3 new fields: applicable_industries, applicable_company_size, scope_conditions - LLM prompt extended with 39 industries, 5 company sizes, 10 scope signals - All 5 generation paths (Rule 1/2/3, batch structure, batch reform) updated - _build_control_from_json: parsing + validation (string→list, size validation) - _store_control: writes 3 new JSONB columns - API: response models, create/update requests, SELECT queries extended - Migration 063: 3 new JSONB columns with GIN indexes - 110 generator tests + 28 route tests = 138 total, all passing Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -82,6 +82,9 @@ class ControlResponse(BaseModel):
|
||||
target_audience: Optional[str] = None
|
||||
generation_metadata: Optional[dict] = None
|
||||
generation_strategy: Optional[str] = "ungrouped"
|
||||
applicable_industries: Optional[list] = None
|
||||
applicable_company_size: Optional[list] = None
|
||||
scope_conditions: Optional[dict] = None
|
||||
created_at: str
|
||||
updated_at: str
|
||||
|
||||
@@ -111,6 +114,9 @@ class ControlCreateRequest(BaseModel):
|
||||
category: Optional[str] = None
|
||||
target_audience: Optional[str] = None
|
||||
generation_metadata: Optional[dict] = None
|
||||
applicable_industries: Optional[list] = None
|
||||
applicable_company_size: Optional[list] = None
|
||||
scope_conditions: Optional[dict] = None
|
||||
|
||||
|
||||
class ControlUpdateRequest(BaseModel):
|
||||
@@ -136,6 +142,9 @@ class ControlUpdateRequest(BaseModel):
|
||||
category: Optional[str] = None
|
||||
target_audience: Optional[str] = None
|
||||
generation_metadata: Optional[dict] = None
|
||||
applicable_industries: Optional[list] = None
|
||||
applicable_company_size: Optional[list] = None
|
||||
scope_conditions: Optional[dict] = None
|
||||
|
||||
|
||||
class SimilarityCheckRequest(BaseModel):
|
||||
@@ -164,6 +173,7 @@ _CONTROL_COLS = """id, framework_id, control_id, title, objective, rationale,
|
||||
license_rule, source_original_text, source_citation,
|
||||
customer_visible, verification_method, category,
|
||||
target_audience, generation_metadata, generation_strategy,
|
||||
applicable_industries, applicable_company_size, scope_conditions,
|
||||
created_at, updated_at"""
|
||||
|
||||
|
||||
@@ -511,7 +521,8 @@ async def create_control(body: ControlCreateRequest):
|
||||
open_anchors, release_state, tags,
|
||||
license_rule, source_original_text, source_citation,
|
||||
customer_visible, verification_method, category,
|
||||
target_audience, generation_metadata
|
||||
target_audience, generation_metadata,
|
||||
applicable_industries, applicable_company_size, scope_conditions
|
||||
) VALUES (
|
||||
:fw_id, :cid, :title, :objective, :rationale,
|
||||
CAST(:scope AS jsonb), CAST(:requirements AS jsonb),
|
||||
@@ -521,7 +532,10 @@ async def create_control(body: ControlCreateRequest):
|
||||
:license_rule, :source_original_text,
|
||||
CAST(:source_citation AS jsonb),
|
||||
:customer_visible, :verification_method, :category,
|
||||
:target_audience, CAST(:generation_metadata AS jsonb)
|
||||
:target_audience, CAST(:generation_metadata AS jsonb),
|
||||
CAST(:applicable_industries AS jsonb),
|
||||
CAST(:applicable_company_size AS jsonb),
|
||||
CAST(:scope_conditions AS jsonb)
|
||||
)
|
||||
RETURNING {_CONTROL_COLS}
|
||||
"""),
|
||||
@@ -550,6 +564,9 @@ async def create_control(body: ControlCreateRequest):
|
||||
"category": body.category,
|
||||
"target_audience": body.target_audience,
|
||||
"generation_metadata": _json.dumps(body.generation_metadata) if body.generation_metadata else None,
|
||||
"applicable_industries": _json.dumps(body.applicable_industries) if body.applicable_industries else None,
|
||||
"applicable_company_size": _json.dumps(body.applicable_company_size) if body.applicable_company_size else None,
|
||||
"scope_conditions": _json.dumps(body.scope_conditions) if body.scope_conditions else None,
|
||||
},
|
||||
).fetchone()
|
||||
db.commit()
|
||||
@@ -778,6 +795,9 @@ def _control_row(r) -> dict:
|
||||
"target_audience": r.target_audience,
|
||||
"generation_metadata": r.generation_metadata,
|
||||
"generation_strategy": getattr(r, "generation_strategy", "ungrouped"),
|
||||
"applicable_industries": getattr(r, "applicable_industries", None),
|
||||
"applicable_company_size": getattr(r, "applicable_company_size", None),
|
||||
"scope_conditions": getattr(r, "scope_conditions", None),
|
||||
"created_at": r.created_at.isoformat() if r.created_at else None,
|
||||
"updated_at": r.updated_at.isoformat() if r.updated_at else None,
|
||||
}
|
||||
|
||||
@@ -28,6 +28,7 @@ from compliance.services.control_generator import (
|
||||
ALL_COLLECTIONS,
|
||||
VALID_CATEGORIES,
|
||||
VALID_DOMAINS,
|
||||
_classify_regulation,
|
||||
_detect_category,
|
||||
_detect_domain,
|
||||
_llm_local,
|
||||
@@ -978,3 +979,122 @@ async def get_domain_backfill_status(backfill_id: str):
|
||||
if not status:
|
||||
raise HTTPException(status_code=404, detail="Domain backfill job not found")
|
||||
return status
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Source-Type Backfill — Classify law vs guideline vs standard vs restricted
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
class SourceTypeBackfillRequest(BaseModel):
|
||||
dry_run: bool = True
|
||||
|
||||
|
||||
_source_type_backfill_status: dict = {}
|
||||
|
||||
|
||||
async def _run_source_type_backfill(dry_run: bool, backfill_id: str):
|
||||
"""Backfill source_type into source_citation JSONB for all controls."""
|
||||
db = SessionLocal()
|
||||
try:
|
||||
# Find controls with source_citation that lack source_type
|
||||
rows = db.execute(text("""
|
||||
SELECT control_id, source_citation, generation_metadata
|
||||
FROM compliance.canonical_controls
|
||||
WHERE source_citation IS NOT NULL
|
||||
AND (source_citation->>'source_type' IS NULL
|
||||
OR source_citation->>'source_type' = '')
|
||||
""")).fetchall()
|
||||
|
||||
total = len(rows)
|
||||
updated = 0
|
||||
already_correct = 0
|
||||
errors = []
|
||||
|
||||
_source_type_backfill_status[backfill_id] = {
|
||||
"status": "running", "total": total, "updated": 0, "dry_run": dry_run,
|
||||
}
|
||||
|
||||
for row in rows:
|
||||
cid = row[0]
|
||||
citation = row[1] if isinstance(row[1], dict) else json.loads(row[1] or "{}")
|
||||
metadata = row[2] if isinstance(row[2], dict) else json.loads(row[2] or "{}")
|
||||
|
||||
# Get regulation_code from metadata
|
||||
reg_code = metadata.get("source_regulation", "")
|
||||
if not reg_code:
|
||||
# Try to infer from source name
|
||||
errors.append(f"{cid}: no source_regulation in metadata")
|
||||
continue
|
||||
|
||||
# Classify
|
||||
license_info = _classify_regulation(reg_code)
|
||||
source_type = license_info.get("source_type", "restricted")
|
||||
|
||||
# Update citation
|
||||
citation["source_type"] = source_type
|
||||
|
||||
if not dry_run:
|
||||
db.execute(text("""
|
||||
UPDATE compliance.canonical_controls
|
||||
SET source_citation = :citation
|
||||
WHERE control_id = :cid
|
||||
"""), {"citation": json.dumps(citation), "cid": cid})
|
||||
if updated % 100 == 0:
|
||||
db.commit()
|
||||
updated += 1
|
||||
|
||||
if not dry_run:
|
||||
db.commit()
|
||||
|
||||
# Count distribution
|
||||
dist_query = db.execute(text("""
|
||||
SELECT source_citation->>'source_type' as st, COUNT(*)
|
||||
FROM compliance.canonical_controls
|
||||
WHERE source_citation IS NOT NULL
|
||||
AND source_citation->>'source_type' IS NOT NULL
|
||||
GROUP BY st
|
||||
""")).fetchall() if not dry_run else []
|
||||
|
||||
distribution = {r[0]: r[1] for r in dist_query}
|
||||
|
||||
_source_type_backfill_status[backfill_id] = {
|
||||
"status": "completed", "total": total, "updated": updated,
|
||||
"dry_run": dry_run, "distribution": distribution,
|
||||
"errors": errors[:50],
|
||||
}
|
||||
logger.info("Source-type backfill %s completed: %d/%d updated (dry_run=%s)",
|
||||
backfill_id, updated, total, dry_run)
|
||||
|
||||
except Exception as e:
|
||||
logger.error("Source-type backfill %s failed: %s", backfill_id, e)
|
||||
_source_type_backfill_status[backfill_id] = {"status": "failed", "error": str(e)}
|
||||
finally:
|
||||
db.close()
|
||||
|
||||
|
||||
@router.post("/generate/backfill-source-type")
|
||||
async def start_source_type_backfill(req: SourceTypeBackfillRequest):
|
||||
"""Backfill source_type (law/guideline/standard/restricted) into source_citation JSONB.
|
||||
|
||||
Classifies each control's source as binding law, authority guideline,
|
||||
voluntary standard, or restricted norm based on regulation_code.
|
||||
Default is dry_run=True (preview only).
|
||||
"""
|
||||
import uuid
|
||||
backfill_id = str(uuid.uuid4())[:8]
|
||||
_source_type_backfill_status[backfill_id] = {"status": "starting"}
|
||||
asyncio.create_task(_run_source_type_backfill(req.dry_run, backfill_id))
|
||||
return {
|
||||
"status": "running",
|
||||
"backfill_id": backfill_id,
|
||||
"message": f"Source-type backfill started. Poll /generate/source-type-backfill-status/{backfill_id}",
|
||||
}
|
||||
|
||||
|
||||
@router.get("/generate/source-type-backfill-status/{backfill_id}")
|
||||
async def get_source_type_backfill_status(backfill_id: str):
|
||||
"""Get status of a source-type backfill job."""
|
||||
status = _source_type_backfill_status.get(backfill_id)
|
||||
if not status:
|
||||
raise HTTPException(status_code=404, detail="Source-type backfill job not found")
|
||||
return status
|
||||
|
||||
@@ -56,7 +56,8 @@ HARMONIZATION_THRESHOLD = 0.85 # Cosine similarity above this = duplicate
|
||||
# Pipeline version — increment when generation rules change materially.
|
||||
# v1: Original (local LLM prefilter, old prompt)
|
||||
# v2: Anthropic decides relevance, null for non-requirement chunks, annexes protected
|
||||
PIPELINE_VERSION = 2
|
||||
# v3: Scoped Control Applicability — applicable_industries, applicable_company_size, scope_conditions
|
||||
PIPELINE_VERSION = 3
|
||||
|
||||
ALL_COLLECTIONS = [
|
||||
"bp_compliance_ce",
|
||||
@@ -72,119 +73,121 @@ ALL_COLLECTIONS = [
|
||||
|
||||
REGULATION_LICENSE_MAP: dict[str, dict] = {
|
||||
# RULE 1: FREE USE — Laws, Public Domain
|
||||
# source_type: "law" = binding legislation, "guideline" = authority guidance (soft law),
|
||||
# "standard" = voluntary framework/best practice, "restricted" = protected norm
|
||||
# EU Regulations
|
||||
"eu_2016_679": {"license": "EU_LAW", "rule": 1, "name": "DSGVO"},
|
||||
"eu_2024_1689": {"license": "EU_LAW", "rule": 1, "name": "AI Act (KI-Verordnung)"},
|
||||
"eu_2022_2555": {"license": "EU_LAW", "rule": 1, "name": "NIS2"},
|
||||
"eu_2024_2847": {"license": "EU_LAW", "rule": 1, "name": "Cyber Resilience Act (CRA)"},
|
||||
"eu_2023_1230": {"license": "EU_LAW", "rule": 1, "name": "Maschinenverordnung"},
|
||||
"eu_2022_2065": {"license": "EU_LAW", "rule": 1, "name": "Digital Services Act (DSA)"},
|
||||
"eu_2022_1925": {"license": "EU_LAW", "rule": 1, "name": "Digital Markets Act (DMA)"},
|
||||
"eu_2022_868": {"license": "EU_LAW", "rule": 1, "name": "Data Governance Act (DGA)"},
|
||||
"eu_2019_770": {"license": "EU_LAW", "rule": 1, "name": "Digitale-Inhalte-Richtlinie"},
|
||||
"eu_2021_914": {"license": "EU_LAW", "rule": 1, "name": "Standardvertragsklauseln (SCC)"},
|
||||
"eu_2002_58": {"license": "EU_LAW", "rule": 1, "name": "ePrivacy-Richtlinie"},
|
||||
"eu_2000_31": {"license": "EU_LAW", "rule": 1, "name": "E-Commerce-Richtlinie"},
|
||||
"eu_2023_1803": {"license": "EU_LAW", "rule": 1, "name": "IFRS-Uebernahmeverordnung"},
|
||||
"eucsa": {"license": "EU_LAW", "rule": 1, "name": "EU Cybersecurity Act"},
|
||||
"dataact": {"license": "EU_LAW", "rule": 1, "name": "Data Act"},
|
||||
"dora": {"license": "EU_LAW", "rule": 1, "name": "Digital Operational Resilience Act"},
|
||||
"ehds": {"license": "EU_LAW", "rule": 1, "name": "European Health Data Space"},
|
||||
"gpsr": {"license": "EU_LAW", "rule": 1, "name": "Allgemeine Produktsicherheitsverordnung"},
|
||||
"mica": {"license": "EU_LAW", "rule": 1, "name": "Markets in Crypto-Assets"},
|
||||
"psd2": {"license": "EU_LAW", "rule": 1, "name": "Zahlungsdiensterichtlinie 2"},
|
||||
"dpf": {"license": "EU_LAW", "rule": 1, "name": "EU-US Data Privacy Framework"},
|
||||
"dsm": {"license": "EU_LAW", "rule": 1, "name": "DSM-Urheberrechtsrichtlinie"},
|
||||
"amlr": {"license": "EU_LAW", "rule": 1, "name": "AML-Verordnung"},
|
||||
"eu_blue_guide_2022": {"license": "EU_PUBLIC", "rule": 1, "name": "Blue Guide 2022"},
|
||||
# NIST (Public Domain — all variants)
|
||||
"nist_sp_800_53": {"license": "NIST_PUBLIC_DOMAIN", "rule": 1, "name": "NIST SP 800-53"},
|
||||
"nist_sp800_53r5": {"license": "NIST_PUBLIC_DOMAIN", "rule": 1, "name": "NIST SP 800-53 Rev.5"},
|
||||
"nist_sp_800_63b": {"license": "NIST_PUBLIC_DOMAIN", "rule": 1, "name": "NIST SP 800-63B"},
|
||||
"nist_sp800_63_3": {"license": "NIST_PUBLIC_DOMAIN", "rule": 1, "name": "NIST SP 800-63-3"},
|
||||
"nist_csf_2_0": {"license": "NIST_PUBLIC_DOMAIN", "rule": 1, "name": "NIST CSF 2.0"},
|
||||
"nist_sp_800_218": {"license": "NIST_PUBLIC_DOMAIN", "rule": 1, "name": "NIST SSDF"},
|
||||
"nist_sp800_207": {"license": "NIST_PUBLIC_DOMAIN", "rule": 1, "name": "NIST SP 800-207 Zero Trust"},
|
||||
"nist_ai_rmf": {"license": "NIST_PUBLIC_DOMAIN", "rule": 1, "name": "NIST AI Risk Management Framework"},
|
||||
"nistir_8259a": {"license": "NIST_PUBLIC_DOMAIN", "rule": 1, "name": "NISTIR 8259A IoT Security"},
|
||||
"cisa_secure_by_design": {"license": "US_GOV_PUBLIC", "rule": 1, "name": "CISA Secure by Design"},
|
||||
"eu_2016_679": {"license": "EU_LAW", "rule": 1, "source_type": "law", "name": "DSGVO"},
|
||||
"eu_2024_1689": {"license": "EU_LAW", "rule": 1, "source_type": "law", "name": "AI Act (KI-Verordnung)"},
|
||||
"eu_2022_2555": {"license": "EU_LAW", "rule": 1, "source_type": "law", "name": "NIS2"},
|
||||
"eu_2024_2847": {"license": "EU_LAW", "rule": 1, "source_type": "law", "name": "Cyber Resilience Act (CRA)"},
|
||||
"eu_2023_1230": {"license": "EU_LAW", "rule": 1, "source_type": "law", "name": "Maschinenverordnung"},
|
||||
"eu_2022_2065": {"license": "EU_LAW", "rule": 1, "source_type": "law", "name": "Digital Services Act (DSA)"},
|
||||
"eu_2022_1925": {"license": "EU_LAW", "rule": 1, "source_type": "law", "name": "Digital Markets Act (DMA)"},
|
||||
"eu_2022_868": {"license": "EU_LAW", "rule": 1, "source_type": "law", "name": "Data Governance Act (DGA)"},
|
||||
"eu_2019_770": {"license": "EU_LAW", "rule": 1, "source_type": "law", "name": "Digitale-Inhalte-Richtlinie"},
|
||||
"eu_2021_914": {"license": "EU_LAW", "rule": 1, "source_type": "law", "name": "Standardvertragsklauseln (SCC)"},
|
||||
"eu_2002_58": {"license": "EU_LAW", "rule": 1, "source_type": "law", "name": "ePrivacy-Richtlinie"},
|
||||
"eu_2000_31": {"license": "EU_LAW", "rule": 1, "source_type": "law", "name": "E-Commerce-Richtlinie"},
|
||||
"eu_2023_1803": {"license": "EU_LAW", "rule": 1, "source_type": "law", "name": "IFRS-Uebernahmeverordnung"},
|
||||
"eucsa": {"license": "EU_LAW", "rule": 1, "source_type": "law", "name": "EU Cybersecurity Act"},
|
||||
"dataact": {"license": "EU_LAW", "rule": 1, "source_type": "law", "name": "Data Act"},
|
||||
"dora": {"license": "EU_LAW", "rule": 1, "source_type": "law", "name": "Digital Operational Resilience Act"},
|
||||
"ehds": {"license": "EU_LAW", "rule": 1, "source_type": "law", "name": "European Health Data Space"},
|
||||
"gpsr": {"license": "EU_LAW", "rule": 1, "source_type": "law", "name": "Allgemeine Produktsicherheitsverordnung"},
|
||||
"mica": {"license": "EU_LAW", "rule": 1, "source_type": "law", "name": "Markets in Crypto-Assets"},
|
||||
"psd2": {"license": "EU_LAW", "rule": 1, "source_type": "law", "name": "Zahlungsdiensterichtlinie 2"},
|
||||
"dpf": {"license": "EU_LAW", "rule": 1, "source_type": "law", "name": "EU-US Data Privacy Framework"},
|
||||
"dsm": {"license": "EU_LAW", "rule": 1, "source_type": "law", "name": "DSM-Urheberrechtsrichtlinie"},
|
||||
"amlr": {"license": "EU_LAW", "rule": 1, "source_type": "law", "name": "AML-Verordnung"},
|
||||
"eu_blue_guide_2022": {"license": "EU_PUBLIC", "rule": 1, "source_type": "guideline", "name": "Blue Guide 2022"},
|
||||
# NIST (Public Domain — NOT laws, voluntary standards)
|
||||
"nist_sp_800_53": {"license": "NIST_PUBLIC_DOMAIN", "rule": 1, "source_type": "standard", "name": "NIST SP 800-53"},
|
||||
"nist_sp800_53r5": {"license": "NIST_PUBLIC_DOMAIN", "rule": 1, "source_type": "standard", "name": "NIST SP 800-53 Rev.5"},
|
||||
"nist_sp_800_63b": {"license": "NIST_PUBLIC_DOMAIN", "rule": 1, "source_type": "standard", "name": "NIST SP 800-63B"},
|
||||
"nist_sp800_63_3": {"license": "NIST_PUBLIC_DOMAIN", "rule": 1, "source_type": "standard", "name": "NIST SP 800-63-3"},
|
||||
"nist_csf_2_0": {"license": "NIST_PUBLIC_DOMAIN", "rule": 1, "source_type": "standard", "name": "NIST CSF 2.0"},
|
||||
"nist_sp_800_218": {"license": "NIST_PUBLIC_DOMAIN", "rule": 1, "source_type": "standard", "name": "NIST SSDF"},
|
||||
"nist_sp800_207": {"license": "NIST_PUBLIC_DOMAIN", "rule": 1, "source_type": "standard", "name": "NIST SP 800-207 Zero Trust"},
|
||||
"nist_ai_rmf": {"license": "NIST_PUBLIC_DOMAIN", "rule": 1, "source_type": "standard", "name": "NIST AI Risk Management Framework"},
|
||||
"nistir_8259a": {"license": "NIST_PUBLIC_DOMAIN", "rule": 1, "source_type": "standard", "name": "NISTIR 8259A IoT Security"},
|
||||
"cisa_secure_by_design": {"license": "US_GOV_PUBLIC", "rule": 1, "source_type": "standard", "name": "CISA Secure by Design"},
|
||||
# German Laws
|
||||
"bdsg": {"license": "DE_LAW", "rule": 1, "name": "BDSG"},
|
||||
"bdsg_2018_komplett": {"license": "DE_LAW", "rule": 1, "name": "BDSG 2018"},
|
||||
"ttdsg": {"license": "DE_LAW", "rule": 1, "name": "TTDSG"},
|
||||
"tdddg_25": {"license": "DE_LAW", "rule": 1, "name": "TDDDG"},
|
||||
"tkg": {"license": "DE_LAW", "rule": 1, "name": "TKG"},
|
||||
"de_tkg": {"license": "DE_LAW", "rule": 1, "name": "TKG"},
|
||||
"bgb_komplett": {"license": "DE_LAW", "rule": 1, "name": "BGB"},
|
||||
"hgb": {"license": "DE_LAW", "rule": 1, "name": "HGB"},
|
||||
"hgb_komplett": {"license": "DE_LAW", "rule": 1, "name": "HGB"},
|
||||
"urhg_komplett": {"license": "DE_LAW", "rule": 1, "name": "UrhG"},
|
||||
"uwg": {"license": "DE_LAW", "rule": 1, "name": "UWG"},
|
||||
"tmg_komplett": {"license": "DE_LAW", "rule": 1, "name": "TMG"},
|
||||
"gewo": {"license": "DE_LAW", "rule": 1, "name": "GewO"},
|
||||
"ao": {"license": "DE_LAW", "rule": 1, "name": "Abgabenordnung"},
|
||||
"ao_komplett": {"license": "DE_LAW", "rule": 1, "name": "Abgabenordnung"},
|
||||
"battdg": {"license": "DE_LAW", "rule": 1, "name": "Batteriegesetz"},
|
||||
"bdsg": {"license": "DE_LAW", "rule": 1, "source_type": "law", "name": "BDSG"},
|
||||
"bdsg_2018_komplett": {"license": "DE_LAW", "rule": 1, "source_type": "law", "name": "BDSG 2018"},
|
||||
"ttdsg": {"license": "DE_LAW", "rule": 1, "source_type": "law", "name": "TTDSG"},
|
||||
"tdddg_25": {"license": "DE_LAW", "rule": 1, "source_type": "law", "name": "TDDDG"},
|
||||
"tkg": {"license": "DE_LAW", "rule": 1, "source_type": "law", "name": "TKG"},
|
||||
"de_tkg": {"license": "DE_LAW", "rule": 1, "source_type": "law", "name": "TKG"},
|
||||
"bgb_komplett": {"license": "DE_LAW", "rule": 1, "source_type": "law", "name": "BGB"},
|
||||
"hgb": {"license": "DE_LAW", "rule": 1, "source_type": "law", "name": "HGB"},
|
||||
"hgb_komplett": {"license": "DE_LAW", "rule": 1, "source_type": "law", "name": "HGB"},
|
||||
"urhg_komplett": {"license": "DE_LAW", "rule": 1, "source_type": "law", "name": "UrhG"},
|
||||
"uwg": {"license": "DE_LAW", "rule": 1, "source_type": "law", "name": "UWG"},
|
||||
"tmg_komplett": {"license": "DE_LAW", "rule": 1, "source_type": "law", "name": "TMG"},
|
||||
"gewo": {"license": "DE_LAW", "rule": 1, "source_type": "law", "name": "GewO"},
|
||||
"ao": {"license": "DE_LAW", "rule": 1, "source_type": "law", "name": "Abgabenordnung"},
|
||||
"ao_komplett": {"license": "DE_LAW", "rule": 1, "source_type": "law", "name": "Abgabenordnung"},
|
||||
"battdg": {"license": "DE_LAW", "rule": 1, "source_type": "law", "name": "Batteriegesetz"},
|
||||
# Austrian Laws
|
||||
"at_dsg": {"license": "AT_LAW", "rule": 1, "name": "AT DSG"},
|
||||
"at_abgb": {"license": "AT_LAW", "rule": 1, "name": "AT ABGB"},
|
||||
"at_abgb_agb": {"license": "AT_LAW", "rule": 1, "name": "AT ABGB AGB-Recht"},
|
||||
"at_bao": {"license": "AT_LAW", "rule": 1, "name": "AT BAO"},
|
||||
"at_bao_ret": {"license": "AT_LAW", "rule": 1, "name": "AT BAO Retention"},
|
||||
"at_ecg": {"license": "AT_LAW", "rule": 1, "name": "AT E-Commerce-Gesetz"},
|
||||
"at_kschg": {"license": "AT_LAW", "rule": 1, "name": "AT Konsumentenschutzgesetz"},
|
||||
"at_medieng": {"license": "AT_LAW", "rule": 1, "name": "AT Mediengesetz"},
|
||||
"at_tkg": {"license": "AT_LAW", "rule": 1, "name": "AT TKG"},
|
||||
"at_ugb": {"license": "AT_LAW", "rule": 1, "name": "AT UGB"},
|
||||
"at_ugb_ret": {"license": "AT_LAW", "rule": 1, "name": "AT UGB Retention"},
|
||||
"at_uwg": {"license": "AT_LAW", "rule": 1, "name": "AT UWG"},
|
||||
"at_dsg": {"license": "AT_LAW", "rule": 1, "source_type": "law", "name": "AT DSG"},
|
||||
"at_abgb": {"license": "AT_LAW", "rule": 1, "source_type": "law", "name": "AT ABGB"},
|
||||
"at_abgb_agb": {"license": "AT_LAW", "rule": 1, "source_type": "law", "name": "AT ABGB AGB-Recht"},
|
||||
"at_bao": {"license": "AT_LAW", "rule": 1, "source_type": "law", "name": "AT BAO"},
|
||||
"at_bao_ret": {"license": "AT_LAW", "rule": 1, "source_type": "law", "name": "AT BAO Retention"},
|
||||
"at_ecg": {"license": "AT_LAW", "rule": 1, "source_type": "law", "name": "AT E-Commerce-Gesetz"},
|
||||
"at_kschg": {"license": "AT_LAW", "rule": 1, "source_type": "law", "name": "AT Konsumentenschutzgesetz"},
|
||||
"at_medieng": {"license": "AT_LAW", "rule": 1, "source_type": "law", "name": "AT Mediengesetz"},
|
||||
"at_tkg": {"license": "AT_LAW", "rule": 1, "source_type": "law", "name": "AT TKG"},
|
||||
"at_ugb": {"license": "AT_LAW", "rule": 1, "source_type": "law", "name": "AT UGB"},
|
||||
"at_ugb_ret": {"license": "AT_LAW", "rule": 1, "source_type": "law", "name": "AT UGB Retention"},
|
||||
"at_uwg": {"license": "AT_LAW", "rule": 1, "source_type": "law", "name": "AT UWG"},
|
||||
# Other EU Member State Laws
|
||||
"fr_loi_informatique": {"license": "FR_LAW", "rule": 1, "name": "FR Loi Informatique"},
|
||||
"es_lopdgdd": {"license": "ES_LAW", "rule": 1, "name": "ES LOPDGDD"},
|
||||
"nl_uavg": {"license": "NL_LAW", "rule": 1, "name": "NL UAVG"},
|
||||
"it_codice_privacy": {"license": "IT_LAW", "rule": 1, "name": "IT Codice Privacy"},
|
||||
"hu_info_tv": {"license": "HU_LAW", "rule": 1, "name": "HU Információs törvény"},
|
||||
# EDPB Guidelines (EU Public Authority)
|
||||
"edpb_01_2020": {"license": "EU_PUBLIC", "rule": 1, "name": "EDPB 01/2020 Ergaenzende Massnahmen"},
|
||||
"edpb_02_2023": {"license": "EU_PUBLIC", "rule": 1, "name": "EDPB 02/2023 Technischer Anwendungsbereich"},
|
||||
"edpb_05_2020": {"license": "EU_PUBLIC", "rule": 1, "name": "EDPB 05/2020 Einwilligung"},
|
||||
"edpb_09_2022": {"license": "EU_PUBLIC", "rule": 1, "name": "EDPB 09/2022 Datenschutzverletzungen"},
|
||||
"edpb_bcr_01_2022": {"license": "EU_PUBLIC", "rule": 1, "name": "EDPB BCR Leitlinien"},
|
||||
"edpb_breach_09_2022": {"license": "EU_PUBLIC", "rule": 1, "name": "EDPB Breach Notification"},
|
||||
"edpb_connected_vehicles_01_2020": {"license": "EU_PUBLIC", "rule": 1, "name": "EDPB Connected Vehicles"},
|
||||
"edpb_dpbd_04_2019": {"license": "EU_PUBLIC", "rule": 1, "name": "EDPB Data Protection by Design"},
|
||||
"edpb_eprivacy_02_2023": {"license": "EU_PUBLIC", "rule": 1, "name": "EDPB ePrivacy"},
|
||||
"edpb_facial_recognition_05_2022": {"license": "EU_PUBLIC", "rule": 1, "name": "EDPB Facial Recognition"},
|
||||
"edpb_fines_04_2022": {"license": "EU_PUBLIC", "rule": 1, "name": "EDPB Fines Calculation"},
|
||||
"edpb_legitimate_interest": {"license": "EU_PUBLIC", "rule": 1, "name": "EDPB Legitimate Interest"},
|
||||
"edpb_legitimate_interest_01_2024": {"license": "EU_PUBLIC","rule": 1, "name": "EDPB Legitimate Interest 2024"},
|
||||
"edpb_social_media_08_2020": {"license": "EU_PUBLIC", "rule": 1, "name": "EDPB Social Media"},
|
||||
"edpb_transfers_01_2020":{"license": "EU_PUBLIC", "rule": 1, "name": "EDPB Transfers 01/2020"},
|
||||
"edpb_transfers_07_2020":{"license": "EU_PUBLIC", "rule": 1, "name": "EDPB Transfers 07/2020"},
|
||||
"edpb_video_03_2019": {"license": "EU_PUBLIC", "rule": 1, "name": "EDPB Video Surveillance"},
|
||||
"edps_dpia_list": {"license": "EU_PUBLIC", "rule": 1, "name": "EDPS DPIA Liste"},
|
||||
"edpb_certification_01_2018": {"license": "EU_PUBLIC", "rule": 1, "name": "EDPB Certification 01/2018"},
|
||||
"edpb_certification_01_2019": {"license": "EU_PUBLIC", "rule": 1, "name": "EDPB Certification 01/2019"},
|
||||
"eaa": {"license": "EU_LAW", "rule": 1, "name": "European Accessibility Act"},
|
||||
# WP29 (pre-EDPB) Guidelines
|
||||
"wp244_profiling": {"license": "EU_PUBLIC", "rule": 1, "name": "WP29 Profiling"},
|
||||
"wp251_profiling": {"license": "EU_PUBLIC", "rule": 1, "name": "WP29 Data Portability"},
|
||||
"wp260_transparency": {"license": "EU_PUBLIC", "rule": 1, "name": "WP29 Transparency"},
|
||||
"fr_loi_informatique": {"license": "FR_LAW", "rule": 1, "source_type": "law", "name": "FR Loi Informatique"},
|
||||
"es_lopdgdd": {"license": "ES_LAW", "rule": 1, "source_type": "law", "name": "ES LOPDGDD"},
|
||||
"nl_uavg": {"license": "NL_LAW", "rule": 1, "source_type": "law", "name": "NL UAVG"},
|
||||
"it_codice_privacy": {"license": "IT_LAW", "rule": 1, "source_type": "law", "name": "IT Codice Privacy"},
|
||||
"hu_info_tv": {"license": "HU_LAW", "rule": 1, "source_type": "law", "name": "HU Információs törvény"},
|
||||
# EDPB Guidelines (EU Public Authority — soft law, not binding legislation)
|
||||
"edpb_01_2020": {"license": "EU_PUBLIC", "rule": 1, "source_type": "guideline", "name": "EDPB 01/2020 Ergaenzende Massnahmen"},
|
||||
"edpb_02_2023": {"license": "EU_PUBLIC", "rule": 1, "source_type": "guideline", "name": "EDPB 02/2023 Technischer Anwendungsbereich"},
|
||||
"edpb_05_2020": {"license": "EU_PUBLIC", "rule": 1, "source_type": "guideline", "name": "EDPB 05/2020 Einwilligung"},
|
||||
"edpb_09_2022": {"license": "EU_PUBLIC", "rule": 1, "source_type": "guideline", "name": "EDPB 09/2022 Datenschutzverletzungen"},
|
||||
"edpb_bcr_01_2022": {"license": "EU_PUBLIC", "rule": 1, "source_type": "guideline", "name": "EDPB BCR Leitlinien"},
|
||||
"edpb_breach_09_2022": {"license": "EU_PUBLIC", "rule": 1, "source_type": "guideline", "name": "EDPB Breach Notification"},
|
||||
"edpb_connected_vehicles_01_2020": {"license": "EU_PUBLIC", "rule": 1, "source_type": "guideline", "name": "EDPB Connected Vehicles"},
|
||||
"edpb_dpbd_04_2019": {"license": "EU_PUBLIC", "rule": 1, "source_type": "guideline", "name": "EDPB Data Protection by Design"},
|
||||
"edpb_eprivacy_02_2023": {"license": "EU_PUBLIC", "rule": 1, "source_type": "guideline", "name": "EDPB ePrivacy"},
|
||||
"edpb_facial_recognition_05_2022": {"license": "EU_PUBLIC", "rule": 1, "source_type": "guideline", "name": "EDPB Facial Recognition"},
|
||||
"edpb_fines_04_2022": {"license": "EU_PUBLIC", "rule": 1, "source_type": "guideline", "name": "EDPB Fines Calculation"},
|
||||
"edpb_legitimate_interest": {"license": "EU_PUBLIC", "rule": 1, "source_type": "guideline", "name": "EDPB Legitimate Interest"},
|
||||
"edpb_legitimate_interest_01_2024": {"license": "EU_PUBLIC","rule": 1, "source_type": "guideline", "name": "EDPB Legitimate Interest 2024"},
|
||||
"edpb_social_media_08_2020": {"license": "EU_PUBLIC", "rule": 1, "source_type": "guideline", "name": "EDPB Social Media"},
|
||||
"edpb_transfers_01_2020":{"license": "EU_PUBLIC", "rule": 1, "source_type": "guideline", "name": "EDPB Transfers 01/2020"},
|
||||
"edpb_transfers_07_2020":{"license": "EU_PUBLIC", "rule": 1, "source_type": "guideline", "name": "EDPB Transfers 07/2020"},
|
||||
"edpb_video_03_2019": {"license": "EU_PUBLIC", "rule": 1, "source_type": "guideline", "name": "EDPB Video Surveillance"},
|
||||
"edps_dpia_list": {"license": "EU_PUBLIC", "rule": 1, "source_type": "guideline", "name": "EDPS DPIA Liste"},
|
||||
"edpb_certification_01_2018": {"license": "EU_PUBLIC", "rule": 1, "source_type": "guideline", "name": "EDPB Certification 01/2018"},
|
||||
"edpb_certification_01_2019": {"license": "EU_PUBLIC", "rule": 1, "source_type": "guideline", "name": "EDPB Certification 01/2019"},
|
||||
"eaa": {"license": "EU_LAW", "rule": 1, "source_type": "law", "name": "European Accessibility Act"},
|
||||
# WP29 (pre-EDPB) Guidelines — soft law
|
||||
"wp244_profiling": {"license": "EU_PUBLIC", "rule": 1, "source_type": "guideline", "name": "WP29 Profiling"},
|
||||
"wp251_profiling": {"license": "EU_PUBLIC", "rule": 1, "source_type": "guideline", "name": "WP29 Data Portability"},
|
||||
"wp260_transparency": {"license": "EU_PUBLIC", "rule": 1, "source_type": "guideline", "name": "WP29 Transparency"},
|
||||
|
||||
# RULE 2: CITATION REQUIRED — CC-BY, CC-BY-SA
|
||||
"owasp_asvs": {"license": "CC-BY-SA-4.0", "rule": 2, "name": "OWASP ASVS",
|
||||
# RULE 2: CITATION REQUIRED — CC-BY, CC-BY-SA (voluntary standards)
|
||||
"owasp_asvs": {"license": "CC-BY-SA-4.0", "rule": 2, "source_type": "standard", "name": "OWASP ASVS",
|
||||
"attribution": "OWASP Foundation, CC BY-SA 4.0"},
|
||||
"owasp_masvs": {"license": "CC-BY-SA-4.0", "rule": 2, "name": "OWASP MASVS",
|
||||
"owasp_masvs": {"license": "CC-BY-SA-4.0", "rule": 2, "source_type": "standard", "name": "OWASP MASVS",
|
||||
"attribution": "OWASP Foundation, CC BY-SA 4.0"},
|
||||
"owasp_top10": {"license": "CC-BY-SA-4.0", "rule": 2, "name": "OWASP Top 10",
|
||||
"owasp_top10": {"license": "CC-BY-SA-4.0", "rule": 2, "source_type": "standard", "name": "OWASP Top 10",
|
||||
"attribution": "OWASP Foundation, CC BY-SA 4.0"},
|
||||
"owasp_top10_2021": {"license": "CC-BY-SA-4.0", "rule": 2, "name": "OWASP Top 10 2021",
|
||||
"owasp_top10_2021": {"license": "CC-BY-SA-4.0", "rule": 2, "source_type": "standard", "name": "OWASP Top 10 2021",
|
||||
"attribution": "OWASP Foundation, CC BY-SA 4.0"},
|
||||
"owasp_api_top10_2023": {"license": "CC-BY-SA-4.0", "rule": 2, "name": "OWASP API Top 10 2023",
|
||||
"owasp_api_top10_2023": {"license": "CC-BY-SA-4.0", "rule": 2, "source_type": "standard", "name": "OWASP API Top 10 2023",
|
||||
"attribution": "OWASP Foundation, CC BY-SA 4.0"},
|
||||
"owasp_samm": {"license": "CC-BY-SA-4.0", "rule": 2, "name": "OWASP SAMM",
|
||||
"owasp_samm": {"license": "CC-BY-SA-4.0", "rule": 2, "source_type": "standard", "name": "OWASP SAMM",
|
||||
"attribution": "OWASP Foundation, CC BY-SA 4.0"},
|
||||
"oecd_ai_principles": {"license": "OECD_PUBLIC", "rule": 2, "name": "OECD AI Principles",
|
||||
"oecd_ai_principles": {"license": "OECD_PUBLIC", "rule": 2, "source_type": "standard", "name": "OECD AI Principles",
|
||||
"attribution": "OECD"},
|
||||
|
||||
# RULE 3: RESTRICTED — Full reformulation required
|
||||
@@ -197,28 +200,32 @@ _RULE2_PREFIXES = ["enisa_"]
|
||||
|
||||
|
||||
def _classify_regulation(regulation_code: str) -> dict:
|
||||
"""Determine license rule for a regulation_code."""
|
||||
"""Determine license rule for a regulation_code.
|
||||
|
||||
Returns dict with keys: license, rule, name, source_type.
|
||||
source_type is one of: law, guideline, standard, restricted.
|
||||
"""
|
||||
code = regulation_code.lower().strip()
|
||||
|
||||
# Exact match first
|
||||
if code in REGULATION_LICENSE_MAP:
|
||||
return REGULATION_LICENSE_MAP[code]
|
||||
|
||||
# Prefix match for Rule 2
|
||||
# Prefix match for Rule 2 (ENISA = standard)
|
||||
for prefix in _RULE2_PREFIXES:
|
||||
if code.startswith(prefix):
|
||||
return {"license": "CC-BY-4.0", "rule": 2, "name": "ENISA",
|
||||
"attribution": "ENISA, CC BY 4.0"}
|
||||
return {"license": "CC-BY-4.0", "rule": 2, "source_type": "standard",
|
||||
"name": "ENISA", "attribution": "ENISA, CC BY 4.0"}
|
||||
|
||||
# Prefix match for Rule 3
|
||||
# Prefix match for Rule 3 (BSI/ISO/ETSI = restricted)
|
||||
for prefix in _RULE3_PREFIXES:
|
||||
if code.startswith(prefix):
|
||||
return {"license": f"{prefix.rstrip('_').upper()}_RESTRICTED", "rule": 3,
|
||||
"name": "INTERNAL_ONLY"}
|
||||
"source_type": "restricted", "name": "INTERNAL_ONLY"}
|
||||
|
||||
# Unknown → treat as restricted (safe default)
|
||||
logger.warning("Unknown regulation_code %r — defaulting to Rule 3 (restricted)", code)
|
||||
return {"license": "UNKNOWN", "rule": 3, "name": "INTERNAL_ONLY"}
|
||||
return {"license": "UNKNOWN", "rule": 3, "source_type": "restricted", "name": "INTERNAL_ONLY"}
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
@@ -476,6 +483,10 @@ class GeneratedControl:
|
||||
verification_method: Optional[str] = None # code_review, document, tool, hybrid
|
||||
category: Optional[str] = None # one of 22 categories
|
||||
target_audience: Optional[list] = None # e.g. ["unternehmen", "behoerden", "entwickler"]
|
||||
# Scoped Control Applicability (v3)
|
||||
applicable_industries: Optional[list] = None # e.g. ["all"] or ["Telekommunikation", "Energie"]
|
||||
applicable_company_size: Optional[list] = None # e.g. ["all"] or ["medium", "large", "enterprise"]
|
||||
scope_conditions: Optional[dict] = None # e.g. {"requires_any": ["uses_ai"], "description": "..."}
|
||||
|
||||
|
||||
@dataclass
|
||||
@@ -769,6 +780,38 @@ STRUCTURE_SYSTEM_PROMPT = """Du bist ein Security-Compliance-Experte. Strukturie
|
||||
als praxisorientiertes Security Control. Erstelle eine verständliche, umsetzbare Formulierung.
|
||||
Antworte NUR mit validem JSON. Bei mehreren Controls antworte mit einem JSON-Array."""
|
||||
|
||||
# Shared applicability prompt block — appended to all generation prompts (v3)
|
||||
APPLICABILITY_PROMPT = """- applicable_industries: Liste der Branchen fuer die dieses Control relevant ist.
|
||||
Verwende ["all"] wenn der Control branchenuebergreifend gilt.
|
||||
Moegliche Werte: "all", "Technologie / IT", "IT Dienstleistungen", "E-Commerce / Handel",
|
||||
"Finanzdienstleistungen", "Versicherungen", "Gesundheitswesen", "Pharma", "Bildung",
|
||||
"Beratung / Consulting", "Marketing / Agentur", "Produktion / Industrie",
|
||||
"Logistik / Transport", "Immobilien", "Bau", "Energie", "Automobil",
|
||||
"Luft- / Raumfahrt", "Maschinenbau", "Anlagenbau", "Automatisierung", "Robotik",
|
||||
"Messtechnik", "Agrar", "Chemie", "Minen / Bergbau", "Telekommunikation",
|
||||
"Medien / Verlage", "Gastronomie / Hotellerie", "Recht / Kanzlei",
|
||||
"Oeffentlicher Dienst", "Verteidigung / Ruestung", "Wasser- / Abwasserwirtschaft",
|
||||
"Lebensmittel", "Digitale Infrastruktur", "Weltraum", "Post / Kurierdienste",
|
||||
"Abfallwirtschaft", "Forschung"
|
||||
Beispiel: TKG-Controls → ["Telekommunikation"]
|
||||
Beispiel: DSGVO Art. 32 → ["all"]
|
||||
Beispiel: NIS2 Art. 21 → ["Energie", "Gesundheitswesen", "Digitale Infrastruktur", "Logistik / Transport", ...]
|
||||
- applicable_company_size: Ab welcher Unternehmensgroesse gilt dieses Control?
|
||||
Verwende ["all"] wenn keine Groessenbeschraenkung.
|
||||
Moegliche Werte: "all", "micro", "small", "medium", "large", "enterprise"
|
||||
Groessen: micro (<10 MA), small (10-49), medium (50-249), large (250-999), enterprise (1000+)
|
||||
Beispiel: NIS2 Art. 21 → ["medium", "large", "enterprise"]
|
||||
Beispiel: DSGVO Art. 5 → ["all"]
|
||||
- scope_conditions: Optionale Bedingungen aus dem Compliance-Scope des Unternehmens.
|
||||
null wenn keine besonderen Bedingungen. Sonst JSON-Objekt:
|
||||
{"requires_any": ["signal1", "signal2"], "description": "Kurze Erklaerung wann relevant"}
|
||||
Moegliche Signale: "uses_ai", "third_country_transfer", "processes_health_data",
|
||||
"processes_minors_data", "automated_decisions", "employee_monitoring",
|
||||
"video_surveillance", "financial_data", "is_kritis_operator", "payment_services"
|
||||
Beispiel AI Act: {"requires_any": ["uses_ai"], "description": "Nur bei KI-Einsatz relevant"}
|
||||
Beispiel SCC: {"requires_any": ["third_country_transfer"], "description": "Nur bei Drittlandtransfer"}
|
||||
Beispiel DSGVO Art. 32 (allgemein): null"""
|
||||
|
||||
|
||||
class ControlGeneratorPipeline:
|
||||
"""Orchestrates the 7-stage control generation pipeline."""
|
||||
@@ -973,6 +1016,7 @@ Gib JSON zurück mit diesen Feldern:
|
||||
- target_audience: Liste der Zielgruppen (z.B. "unternehmen", "behoerden", "entwickler", "datenschutzbeauftragte", "geschaeftsfuehrung", "it-abteilung", "rechtsabteilung", "compliance-officer", "personalwesen", "einkauf", "produktion", "gesundheitswesen", "finanzwesen", "oeffentlicher_dienst")
|
||||
- source_article: Artikel-/Paragraphen-Referenz aus dem Text (z.B. "Artikel 10", "§ 42"). Leer lassen wenn nicht erkennbar.
|
||||
- source_paragraph: Absatz-Referenz aus dem Text (z.B. "Absatz 5", "Nr. 2"). Leer lassen wenn nicht erkennbar.
|
||||
{APPLICABILITY_PROMPT}
|
||||
|
||||
Text: {chunk.text[:2000]}
|
||||
Quelle: {chunk.regulation_name} ({chunk.regulation_code}), {chunk.article}"""
|
||||
@@ -995,6 +1039,7 @@ Quelle: {chunk.regulation_name} ({chunk.regulation_code}), {chunk.article}"""
|
||||
"article": effective_article,
|
||||
"paragraph": effective_paragraph,
|
||||
"license": license_info.get("license", ""),
|
||||
"source_type": license_info.get("source_type", "law"),
|
||||
"url": chunk.source_url or "",
|
||||
}
|
||||
control.customer_visible = True
|
||||
@@ -1036,6 +1081,7 @@ Gib JSON zurück mit diesen Feldern:
|
||||
- target_audience: Liste der Zielgruppen (z.B. "unternehmen", "behoerden", "entwickler", "datenschutzbeauftragte", "geschaeftsfuehrung", "it-abteilung", "rechtsabteilung", "compliance-officer", "personalwesen", "einkauf", "produktion", "gesundheitswesen", "finanzwesen", "oeffentlicher_dienst")
|
||||
- source_article: Artikel-/Paragraphen-Referenz aus dem Text (z.B. "Artikel 10", "§ 42"). Leer lassen wenn nicht erkennbar.
|
||||
- source_paragraph: Absatz-Referenz aus dem Text (z.B. "Absatz 5", "Nr. 2"). Leer lassen wenn nicht erkennbar.
|
||||
{APPLICABILITY_PROMPT}
|
||||
|
||||
Text: {chunk.text[:2000]}
|
||||
Quelle: {chunk.regulation_name}, {chunk.article}"""
|
||||
@@ -1059,6 +1105,7 @@ Quelle: {chunk.regulation_name}, {chunk.article}"""
|
||||
"paragraph": effective_paragraph,
|
||||
"license": license_info.get("license", ""),
|
||||
"license_notice": attribution,
|
||||
"source_type": license_info.get("source_type", "standard"),
|
||||
"url": chunk.source_url or "",
|
||||
}
|
||||
control.customer_visible = True
|
||||
@@ -1101,7 +1148,8 @@ Gib JSON zurück mit diesen Feldern:
|
||||
- tags: Liste von Tags (eigene Begriffe)
|
||||
- domain: Fachgebiet als Kuerzel (AUTH=Authentifizierung, CRYP=Kryptographie, NET=Netzwerk, DATA=Datenschutz, LOG=Logging, ACC=Zugriffskontrolle, SEC=IT-Sicherheit, INC=Vorfallmanagement, AI=KI, COMP=Compliance, GOV=Behoerden/Verwaltung, LAB=Arbeitsrecht, FIN=Finanzregulierung, TRD=Gewerbe/Handelsrecht, ENV=Umwelt, HLT=Gesundheit)
|
||||
- category: Inhaltliche Kategorie — MUSS zum domain passen. Moegliche Werte: {CATEGORY_LIST_STR}
|
||||
- target_audience: Liste der Zielgruppen (z.B. "unternehmen", "behoerden", "entwickler", "datenschutzbeauftragte", "geschaeftsfuehrung", "it-abteilung", "rechtsabteilung", "compliance-officer", "personalwesen", "oeffentlicher_dienst")"""
|
||||
- target_audience: Liste der Zielgruppen (z.B. "unternehmen", "behoerden", "entwickler", "datenschutzbeauftragte", "geschaeftsfuehrung", "it-abteilung", "rechtsabteilung", "compliance-officer", "personalwesen", "oeffentlicher_dienst")
|
||||
{APPLICABILITY_PROMPT}"""
|
||||
|
||||
raw = await _llm_chat(prompt, REFORM_SYSTEM_PROMPT)
|
||||
data = _parse_llm_json(raw)
|
||||
@@ -1186,6 +1234,7 @@ Gib ein JSON-Array zurueck mit GENAU {len(chunks)} Elementen. Fuer Chunks ohne A
|
||||
- target_audience: Liste der Zielgruppen fuer die dieses Control relevant ist. Moegliche Werte: "unternehmen", "behoerden", "entwickler", "datenschutzbeauftragte", "geschaeftsfuehrung", "it-abteilung", "rechtsabteilung", "compliance-officer", "personalwesen", "einkauf", "produktion", "vertrieb", "gesundheitswesen", "finanzwesen", "oeffentlicher_dienst"
|
||||
- source_article: Artikel-/Paragraphen-Referenz aus dem Text extrahieren (z.B. "Artikel 10", "Art. 5", "§ 42", "Section 3"). Leer lassen wenn nicht erkennbar.
|
||||
- source_paragraph: Absatz-Referenz aus dem Text extrahieren (z.B. "Absatz 5", "Abs. 3", "Nr. 2", "(1)"). Leer lassen wenn nicht erkennbar.
|
||||
{APPLICABILITY_PROMPT}
|
||||
|
||||
{joined}"""
|
||||
|
||||
@@ -1228,6 +1277,7 @@ Gib ein JSON-Array zurueck mit GENAU {len(chunks)} Elementen. Fuer Chunks ohne A
|
||||
"paragraph": effective_paragraph,
|
||||
"license": lic.get("license", ""),
|
||||
"license_notice": lic.get("attribution", ""),
|
||||
"source_type": lic.get("source_type", "law"),
|
||||
"url": chunk.source_url or "",
|
||||
}
|
||||
control.customer_visible = True
|
||||
@@ -1289,6 +1339,7 @@ Gib ein JSON-Array zurueck mit GENAU {len(chunks)} Elementen. Fuer Aspekte ohne
|
||||
- domain: Fachgebiet als Kuerzel (AUTH=Authentifizierung, CRYP=Kryptographie, NET=Netzwerk, DATA=Datenschutz, LOG=Logging, ACC=Zugriffskontrolle, SEC=IT-Sicherheit, INC=Vorfallmanagement, AI=KI, COMP=Compliance, GOV=Behoerden/Verwaltung, LAB=Arbeitsrecht, FIN=Finanzregulierung, TRD=Gewerbe/Handelsrecht, ENV=Umwelt, HLT=Gesundheit)
|
||||
- category: Inhaltliche Kategorie — MUSS zum domain passen. Moegliche Werte: {CATEGORY_LIST_STR}
|
||||
- target_audience: Liste der Zielgruppen (z.B. "unternehmen", "behoerden", "entwickler", "datenschutzbeauftragte", "geschaeftsfuehrung", "it-abteilung", "rechtsabteilung", "compliance-officer", "personalwesen", "einkauf", "produktion", "gesundheitswesen", "finanzwesen", "oeffentlicher_dienst")
|
||||
{APPLICABILITY_PROMPT}
|
||||
|
||||
{joined}"""
|
||||
|
||||
@@ -1522,6 +1573,29 @@ Gib ein JSON-Array zurueck mit GENAU {len(chunks)} Elementen. Fuer Aspekte ohne
|
||||
if not isinstance(target_audience, list):
|
||||
target_audience = None
|
||||
|
||||
# Parse applicability fields (v3)
|
||||
applicable_industries = data.get("applicable_industries")
|
||||
if isinstance(applicable_industries, str):
|
||||
applicable_industries = [applicable_industries]
|
||||
if not isinstance(applicable_industries, list):
|
||||
applicable_industries = None
|
||||
|
||||
applicable_company_size = data.get("applicable_company_size")
|
||||
if isinstance(applicable_company_size, str):
|
||||
applicable_company_size = [applicable_company_size]
|
||||
if not isinstance(applicable_company_size, list):
|
||||
applicable_company_size = None
|
||||
# Validate size values
|
||||
valid_sizes = {"all", "micro", "small", "medium", "large", "enterprise"}
|
||||
if applicable_company_size:
|
||||
applicable_company_size = [s for s in applicable_company_size if s in valid_sizes]
|
||||
if not applicable_company_size:
|
||||
applicable_company_size = None
|
||||
|
||||
scope_conditions = data.get("scope_conditions")
|
||||
if not isinstance(scope_conditions, dict):
|
||||
scope_conditions = None
|
||||
|
||||
control = GeneratedControl(
|
||||
title=str(data.get("title", "Untitled Control"))[:255],
|
||||
objective=str(data.get("objective", "")),
|
||||
@@ -1536,6 +1610,9 @@ Gib ein JSON-Array zurueck mit GENAU {len(chunks)} Elementen. Fuer Aspekte ohne
|
||||
tags=tags[:20],
|
||||
target_audience=target_audience,
|
||||
category=category,
|
||||
applicable_industries=applicable_industries,
|
||||
applicable_company_size=applicable_company_size,
|
||||
scope_conditions=scope_conditions,
|
||||
)
|
||||
# Store effective domain for later control_id generation
|
||||
control.generation_metadata["_effective_domain"] = domain
|
||||
@@ -1738,7 +1815,8 @@ Kategorien: {CATEGORY_LIST_STR}"""
|
||||
license_rule, source_original_text, source_citation,
|
||||
customer_visible, generation_metadata,
|
||||
verification_method, category, generation_strategy,
|
||||
target_audience, pipeline_version
|
||||
target_audience, pipeline_version,
|
||||
applicable_industries, applicable_company_size, scope_conditions
|
||||
) VALUES (
|
||||
:framework_id, :control_id, :title, :objective, :rationale,
|
||||
:scope, :requirements, :test_procedure, :evidence,
|
||||
@@ -1747,7 +1825,8 @@ Kategorien: {CATEGORY_LIST_STR}"""
|
||||
:license_rule, :source_original_text, :source_citation,
|
||||
:customer_visible, :generation_metadata,
|
||||
:verification_method, :category, :generation_strategy,
|
||||
:target_audience, :pipeline_version
|
||||
:target_audience, :pipeline_version,
|
||||
:applicable_industries, :applicable_company_size, :scope_conditions
|
||||
)
|
||||
ON CONFLICT (framework_id, control_id) DO NOTHING
|
||||
RETURNING id
|
||||
@@ -1778,6 +1857,9 @@ Kategorien: {CATEGORY_LIST_STR}"""
|
||||
"generation_strategy": control.generation_strategy,
|
||||
"target_audience": json.dumps(control.target_audience) if control.target_audience else None,
|
||||
"pipeline_version": PIPELINE_VERSION,
|
||||
"applicable_industries": json.dumps(control.applicable_industries) if control.applicable_industries else None,
|
||||
"applicable_company_size": json.dumps(control.applicable_company_size) if control.applicable_company_size else None,
|
||||
"scope_conditions": json.dumps(control.scope_conditions) if control.scope_conditions else None,
|
||||
},
|
||||
)
|
||||
self.db.commit()
|
||||
|
||||
Reference in New Issue
Block a user