merge: sync with origin/main, take upstream on conflicts

# Conflicts:
#	admin-compliance/lib/sdk/types.ts
#	admin-compliance/lib/sdk/vendor-compliance/types.ts
This commit is contained in:
Sharang Parnerkar
2026-04-16 16:26:48 +02:00
352 changed files with 181673 additions and 2188 deletions

View File

@@ -12,6 +12,7 @@ Endpoints:
POST /v1/canonical/blocked-sources/cleanup — Start cleanup workflow
"""
import asyncio
import json
import logging
from typing import Optional, List
@@ -25,7 +26,16 @@ from compliance.services.control_generator import (
ControlGeneratorPipeline,
GeneratorConfig,
ALL_COLLECTIONS,
VALID_CATEGORIES,
VALID_DOMAINS,
_classify_regulation,
_detect_category,
_detect_domain,
_llm_local,
_parse_llm_json,
CATEGORY_LIST_STR,
)
from compliance.services.citation_backfill import CitationBackfill, BackfillResult
from compliance.services.rag_client import get_rag_client
logger = logging.getLogger(__name__)
@@ -40,9 +50,12 @@ class GenerateRequest(BaseModel):
domain: Optional[str] = None
collections: Optional[List[str]] = None
max_controls: int = 50
max_chunks: int = 1000 # Default: process max 1000 chunks per job (respects document boundaries)
batch_size: int = 5
skip_web_search: bool = False
dry_run: bool = False
regulation_filter: Optional[List[str]] = None # Only process these regulation_code prefixes
skip_prefilter: bool = False # Skip local LLM pre-filter, send all chunks to API
class GenerateResponse(BaseModel):
@@ -55,6 +68,7 @@ class GenerateResponse(BaseModel):
controls_needs_review: int = 0
controls_too_close: int = 0
controls_duplicates_found: int = 0
controls_qa_fixed: int = 0
errors: list = []
controls: list = []
@@ -89,42 +103,111 @@ class BlockedSourceResponse(BaseModel):
# ENDPOINTS
# =============================================================================
async def _run_pipeline_background(config: GeneratorConfig, job_id: str):
"""Run the pipeline in the background. Uses its own DB session."""
db = SessionLocal()
try:
config.existing_job_id = job_id
pipeline = ControlGeneratorPipeline(db=db, rag_client=get_rag_client())
result = await pipeline.run(config)
logger.info(
"Background generation job %s completed: %d controls from %d chunks",
job_id, result.controls_generated, result.total_chunks_scanned,
)
except Exception as e:
logger.error("Background generation job %s failed: %s", job_id, e)
# Update job as failed
try:
db.execute(
text("""
UPDATE canonical_generation_jobs
SET status = 'failed', errors = :errors, completed_at = NOW()
WHERE id = CAST(:job_id AS uuid)
"""),
{"job_id": job_id, "errors": json.dumps([str(e)])},
)
db.commit()
except Exception:
pass
finally:
db.close()
@router.post("/generate", response_model=GenerateResponse)
async def start_generation(req: GenerateRequest):
"""Start a control generation run."""
"""Start a control generation run (runs in background).
Returns immediately with job_id. Use GET /generate/status/{job_id} to poll progress.
"""
config = GeneratorConfig(
collections=req.collections,
domain=req.domain,
batch_size=req.batch_size,
max_controls=req.max_controls,
max_chunks=req.max_chunks,
skip_web_search=req.skip_web_search,
dry_run=req.dry_run,
regulation_filter=req.regulation_filter,
skip_prefilter=req.skip_prefilter,
)
if req.dry_run:
# Dry run: execute synchronously and return controls
db = SessionLocal()
try:
pipeline = ControlGeneratorPipeline(db=db, rag_client=get_rag_client())
result = await pipeline.run(config)
return GenerateResponse(
job_id=result.job_id,
status=result.status,
message=f"Dry run: {result.controls_generated} controls from {result.total_chunks_scanned} chunks",
total_chunks_scanned=result.total_chunks_scanned,
controls_generated=result.controls_generated,
controls_verified=result.controls_verified,
controls_needs_review=result.controls_needs_review,
controls_too_close=result.controls_too_close,
controls_duplicates_found=result.controls_duplicates_found,
errors=result.errors,
controls=result.controls,
)
except Exception as e:
logger.error("Dry run failed: %s", e)
raise HTTPException(status_code=500, detail=str(e))
finally:
db.close()
# Create job record first so we can return the ID
db = SessionLocal()
try:
pipeline = ControlGeneratorPipeline(db=db, rag_client=get_rag_client())
result = await pipeline.run(config)
return GenerateResponse(
job_id=result.job_id,
status=result.status,
message=f"Generated {result.controls_generated} controls from {result.total_chunks_scanned} chunks",
total_chunks_scanned=result.total_chunks_scanned,
controls_generated=result.controls_generated,
controls_verified=result.controls_verified,
controls_needs_review=result.controls_needs_review,
controls_too_close=result.controls_too_close,
controls_duplicates_found=result.controls_duplicates_found,
errors=result.errors,
controls=result.controls if req.dry_run else [],
result = db.execute(
text("""
INSERT INTO canonical_generation_jobs (status, config)
VALUES ('running', :config)
RETURNING id
"""),
{"config": json.dumps(config.model_dump())},
)
db.commit()
row = result.fetchone()
job_id = str(row[0]) if row else None
except Exception as e:
logger.error("Generation failed: %s", e)
raise HTTPException(status_code=500, detail=str(e))
logger.error("Failed to create job: %s", e)
raise HTTPException(status_code=500, detail=f"Failed to create job: {e}")
finally:
db.close()
if not job_id:
raise HTTPException(status_code=500, detail="Failed to create job record")
# Launch pipeline in background
asyncio.create_task(_run_pipeline_background(config, job_id))
return GenerateResponse(
job_id=job_id,
status="running",
message="Generation started in background. Poll /generate/status/{job_id} for progress.",
)
@router.get("/generate/status/{job_id}")
async def get_job_status(job_id: str):
@@ -132,7 +215,7 @@ async def get_job_status(job_id: str):
db = SessionLocal()
try:
result = db.execute(
text("SELECT * FROM canonical_generation_jobs WHERE id = :id::uuid"),
text("SELECT * FROM canonical_generation_jobs WHERE id = CAST(:id AS uuid)"),
{"id": job_id},
)
row = result.fetchone()
@@ -270,6 +353,188 @@ async def review_control(control_id: str, req: ReviewRequest):
db.close()
class BulkReviewRequest(BaseModel):
release_state: str # Filter: which controls to bulk-review
action: str # "approve" or "reject"
new_state: Optional[str] = None # Override target state
@router.post("/generate/bulk-review")
async def bulk_review(req: BulkReviewRequest):
"""Bulk review all controls matching a release_state filter.
Example: reject all needs_review → sets them to deprecated.
"""
if req.release_state not in ("needs_review", "too_close", "duplicate"):
raise HTTPException(status_code=400, detail=f"Invalid filter state: {req.release_state}")
if req.action == "approve":
target = req.new_state or "draft"
elif req.action == "reject":
target = "deprecated"
else:
raise HTTPException(status_code=400, detail=f"Unknown action: {req.action}")
if target not in ("draft", "review", "approved", "deprecated", "needs_review"):
raise HTTPException(status_code=400, detail=f"Invalid target state: {target}")
db = SessionLocal()
try:
result = db.execute(
text("""
UPDATE canonical_controls
SET release_state = :target, updated_at = NOW()
WHERE release_state = :source
RETURNING control_id
"""),
{"source": req.release_state, "target": target},
)
affected = [row[0] for row in result]
db.commit()
return {
"action": req.action,
"source_state": req.release_state,
"target_state": target,
"affected_count": len(affected),
}
finally:
db.close()
class QAReclassifyRequest(BaseModel):
limit: int = 100 # How many controls to reclassify per run
dry_run: bool = True # Preview only by default
filter_category: Optional[str] = None # Only reclassify controls of this category
filter_domain_prefix: Optional[str] = None # Only reclassify controls with this prefix
@router.post("/generate/qa-reclassify")
async def qa_reclassify(req: QAReclassifyRequest):
"""Run QA reclassification on existing controls using local LLM.
Finds controls where keyword-detection disagrees with current category/domain,
then uses Ollama to determine the correct classification.
"""
db = SessionLocal()
try:
# Load controls to check
where_clauses = ["release_state NOT IN ('deprecated')"]
params = {"limit": req.limit}
if req.filter_category:
where_clauses.append("category = :cat")
params["cat"] = req.filter_category
if req.filter_domain_prefix:
where_clauses.append("control_id LIKE :prefix")
params["prefix"] = f"{req.filter_domain_prefix}-%"
where_sql = " AND ".join(where_clauses)
rows = db.execute(
text(f"""
SELECT id, control_id, title, objective, category,
COALESCE(requirements::text, '[]') as requirements,
COALESCE(source_original_text, '') as source_text
FROM canonical_controls
WHERE {where_sql}
ORDER BY created_at DESC
LIMIT :limit
"""),
params,
).fetchall()
results = {"checked": 0, "mismatches": 0, "fixes": [], "errors": []}
for row in rows:
results["checked"] += 1
control_id = row[1]
title = row[2]
objective = row[3] or ""
current_category = row[4]
source_text = row[6] or objective
# Keyword detection on source text
kw_category = _detect_category(source_text) or _detect_category(objective)
kw_domain = _detect_domain(source_text)
current_prefix = control_id.split("-")[0] if "-" in control_id else ""
# Skip if keyword detection agrees with current classification
if kw_category == current_category and kw_domain == current_prefix:
continue
results["mismatches"] += 1
# Ask Ollama to arbitrate
try:
reqs_text = ""
try:
reqs = json.loads(row[5])
if isinstance(reqs, list):
reqs_text = ", ".join(str(r) for r in reqs[:3])
except Exception:
pass
prompt = f"""Pruefe dieses Compliance-Control auf korrekte Klassifizierung.
Titel: {title[:100]}
Ziel: {objective[:200]}
Anforderungen: {reqs_text[:200]}
Aktuelle Zuordnung: domain={current_prefix}, category={current_category}
Keyword-Erkennung: domain={kw_domain}, category={kw_category}
Welche Zuordnung ist korrekt? Antworte NUR als JSON:
{{"domain": "KUERZEL", "category": "kategorie_name", "reason": "kurze Begruendung"}}
Domains: AUTH=Authentifizierung, CRYP=Kryptographie, NET=Netzwerk, DATA=Datenschutz, LOG=Logging, ACC=Zugriffskontrolle, SEC=IT-Sicherheit, INC=Vorfallmanagement, AI=KI, COMP=Compliance, GOV=Behoerden, LAB=Arbeitsrecht, FIN=Finanzregulierung, TRD=Gewerbe, ENV=Umwelt, HLT=Gesundheit
Kategorien: {CATEGORY_LIST_STR}"""
raw = await _llm_local(prompt)
data = _parse_llm_json(raw)
if not data:
continue
qa_domain = data.get("domain", "").upper()
qa_category = data.get("category", "")
reason = data.get("reason", "")
fix_entry = {
"control_id": control_id,
"title": title[:80],
"old_category": current_category,
"old_domain": current_prefix,
"new_category": qa_category if qa_category in VALID_CATEGORIES else current_category,
"new_domain": qa_domain if qa_domain in VALID_DOMAINS else current_prefix,
"reason": reason,
}
category_changed = qa_category in VALID_CATEGORIES and qa_category != current_category
if category_changed and not req.dry_run:
db.execute(
text("""
UPDATE canonical_controls
SET category = :category, updated_at = NOW()
WHERE id = :id
"""),
{"id": row[0], "category": qa_category},
)
fix_entry["applied"] = True
else:
fix_entry["applied"] = False
results["fixes"].append(fix_entry)
except Exception as e:
results["errors"].append({"control_id": control_id, "error": str(e)})
if not req.dry_run:
db.commit()
return results
finally:
db.close()
@router.get("/generate/processed-stats")
async def get_processed_stats():
"""Get processing statistics per collection."""
@@ -429,3 +694,407 @@ async def get_controls_customer_view(
return {"controls": controls, "total": len(controls)}
finally:
db.close()
# =============================================================================
# CITATION BACKFILL
# =============================================================================
class BackfillRequest(BaseModel):
dry_run: bool = True # Default to dry_run for safety
limit: int = 0 # 0 = all controls
class BackfillResponse(BaseModel):
status: str
total_controls: int = 0
matched_hash: int = 0
matched_regex: int = 0
matched_llm: int = 0
unmatched: int = 0
updated: int = 0
errors: list = []
_backfill_status: dict = {}
async def _run_backfill_background(dry_run: bool, limit: int, backfill_id: str):
"""Run backfill in background with own DB session."""
db = SessionLocal()
try:
backfill = CitationBackfill(db=db, rag_client=get_rag_client())
result = await backfill.run(dry_run=dry_run, limit=limit)
_backfill_status[backfill_id] = {
"status": "completed",
"total_controls": result.total_controls,
"matched_hash": result.matched_hash,
"matched_regex": result.matched_regex,
"matched_llm": result.matched_llm,
"unmatched": result.unmatched,
"updated": result.updated,
"errors": result.errors[:50],
}
logger.info("Backfill %s completed: %d updated", backfill_id, result.updated)
except Exception as e:
logger.error("Backfill %s failed: %s", backfill_id, e)
_backfill_status[backfill_id] = {"status": "failed", "errors": [str(e)]}
finally:
db.close()
@router.post("/generate/backfill-citations", response_model=BackfillResponse)
async def start_backfill(req: BackfillRequest):
"""Backfill article/paragraph into existing control source_citations.
Uses 3-tier matching: hash lookup → regex parse → Ollama LLM.
Default is dry_run=True (preview only, no DB changes).
"""
import uuid
backfill_id = str(uuid.uuid4())[:8]
_backfill_status[backfill_id] = {"status": "running"}
# Always run in background (RAG index build takes minutes)
asyncio.create_task(_run_backfill_background(req.dry_run, req.limit, backfill_id))
return BackfillResponse(
status=f"running (id={backfill_id})",
)
@router.get("/generate/backfill-status/{backfill_id}")
async def get_backfill_status(backfill_id: str):
"""Get status of a backfill job."""
status = _backfill_status.get(backfill_id)
if not status:
raise HTTPException(status_code=404, detail="Backfill job not found")
return status
# =============================================================================
# DOMAIN + TARGET AUDIENCE BACKFILL
# =============================================================================
class DomainBackfillRequest(BaseModel):
dry_run: bool = True
job_id: Optional[str] = None # Only backfill controls from this job
limit: int = 0 # 0 = all
_domain_backfill_status: dict = {}
async def _run_domain_backfill(req: DomainBackfillRequest, backfill_id: str):
"""Backfill domain, category, and target_audience for existing controls using Anthropic."""
import os
import httpx
ANTHROPIC_API_KEY = os.getenv("ANTHROPIC_API_KEY", "")
ANTHROPIC_MODEL = os.getenv("CONTROL_GEN_ANTHROPIC_MODEL", "claude-sonnet-4-6")
if not ANTHROPIC_API_KEY:
_domain_backfill_status[backfill_id] = {
"status": "failed", "error": "ANTHROPIC_API_KEY not set"
}
return
db = SessionLocal()
try:
# Find controls needing backfill
where_clauses = ["(target_audience IS NULL OR target_audience = '[]' OR target_audience = 'null')"]
params: dict = {}
if req.job_id:
where_clauses.append("generation_metadata->>'job_id' = :job_id")
params["job_id"] = req.job_id
query = f"""
SELECT id, control_id, title, objective, category, source_original_text, tags
FROM canonical_controls
WHERE {' AND '.join(where_clauses)}
ORDER BY control_id
"""
if req.limit > 0:
query += f" LIMIT {req.limit}"
result = db.execute(text(query), params)
controls = [dict(zip(result.keys(), row)) for row in result]
total = len(controls)
updated = 0
errors = []
_domain_backfill_status[backfill_id] = {
"status": "running", "total": total, "updated": 0, "errors": []
}
# Process in batches of 10
BATCH_SIZE = 10
for batch_start in range(0, total, BATCH_SIZE):
batch = controls[batch_start:batch_start + BATCH_SIZE]
entries = []
for idx, ctrl in enumerate(batch):
text_for_analysis = ctrl.get("objective") or ctrl.get("title") or ""
original = ctrl.get("source_original_text") or ""
if original:
text_for_analysis += f"\n\nQuelltext-Auszug: {original[:500]}"
entries.append(
f"--- CONTROL {idx + 1}: {ctrl['control_id']} ---\n"
f"Titel: {ctrl.get('title', '')}\n"
f"Objective: {text_for_analysis[:800]}\n"
f"Tags: {json.dumps(ctrl.get('tags', []))}"
)
prompt = f"""Analysiere die folgenden {len(batch)} Controls und bestimme fuer jedes:
1. domain: Das Fachgebiet (AUTH, CRYP, NET, DATA, LOG, ACC, SEC, INC, AI, COMP, GOV, LAB, FIN, TRD, ENV, HLT)
2. category: Die Kategorie (encryption, authentication, network, data_protection, logging, incident, continuity, compliance, supply_chain, physical, personnel, application, system, risk, governance, hardware, identity, public_administration, labor_law, finance, trade_regulation, environmental, health)
3. target_audience: Liste der Zielgruppen (moegliche Werte: "unternehmen", "behoerden", "entwickler", "datenschutzbeauftragte", "geschaeftsfuehrung", "it-abteilung", "rechtsabteilung", "compliance-officer", "personalwesen", "einkauf", "produktion", "vertrieb", "gesundheitswesen", "finanzwesen", "oeffentlicher_dienst")
Antworte mit einem JSON-Array mit {len(batch)} Objekten. Jedes Objekt hat:
- control_index: 1-basierter Index
- domain: Fachgebiet-Kuerzel
- category: Kategorie
- target_audience: Liste der Zielgruppen
{"".join(entries)}"""
try:
headers = {
"x-api-key": ANTHROPIC_API_KEY,
"anthropic-version": "2023-06-01",
"content-type": "application/json",
}
payload = {
"model": ANTHROPIC_MODEL,
"max_tokens": 4096,
"system": "Du bist ein Compliance-Experte. Klassifiziere Controls nach Fachgebiet und Zielgruppe. Antworte NUR mit validem JSON.",
"messages": [{"role": "user", "content": prompt}],
}
async with httpx.AsyncClient(timeout=60.0) as client:
resp = await client.post(
"https://api.anthropic.com/v1/messages",
headers=headers,
json=payload,
)
if resp.status_code != 200:
errors.append(f"Anthropic API {resp.status_code} at batch {batch_start}")
continue
raw = resp.json().get("content", [{}])[0].get("text", "")
# Parse response
import re
bracket_match = re.search(r"\[.*\]", raw, re.DOTALL)
if not bracket_match:
errors.append(f"No JSON array in response at batch {batch_start}")
continue
results_list = json.loads(bracket_match.group(0))
for item in results_list:
idx = item.get("control_index", 0) - 1
if idx < 0 or idx >= len(batch):
continue
ctrl = batch[idx]
ctrl_id = str(ctrl["id"])
new_domain = item.get("domain", "")
new_category = item.get("category", "")
new_audience = item.get("target_audience", [])
if not isinstance(new_audience, list):
new_audience = []
# Build new control_id from domain if domain changed
old_prefix = ctrl["control_id"].split("-")[0] if ctrl["control_id"] else ""
new_prefix = new_domain.upper()[:4] if new_domain else old_prefix
if not req.dry_run:
update_parts = []
update_params: dict = {"ctrl_id": ctrl_id}
if new_category:
update_parts.append("category = :category")
update_params["category"] = new_category
if new_audience:
update_parts.append("target_audience = :target_audience")
update_params["target_audience"] = json.dumps(new_audience)
# Note: We do NOT rename control_ids here — that would
# break references and cause unique constraint violations.
if update_parts:
update_parts.append("updated_at = NOW()")
db.execute(
text(f"UPDATE canonical_controls SET {', '.join(update_parts)} WHERE id = CAST(:ctrl_id AS uuid)"),
update_params,
)
updated += 1
if not req.dry_run:
db.commit()
except Exception as e:
errors.append(f"Batch {batch_start}: {str(e)}")
db.rollback()
_domain_backfill_status[backfill_id] = {
"status": "running", "total": total, "updated": updated,
"progress": f"{min(batch_start + BATCH_SIZE, total)}/{total}",
"errors": errors[-10:],
}
_domain_backfill_status[backfill_id] = {
"status": "completed", "total": total, "updated": updated,
"errors": errors[-50:],
}
logger.info("Domain backfill %s completed: %d/%d updated", backfill_id, updated, total)
except Exception as e:
logger.error("Domain backfill %s failed: %s", backfill_id, e)
_domain_backfill_status[backfill_id] = {"status": "failed", "error": str(e)}
finally:
db.close()
@router.post("/generate/backfill-domain")
async def start_domain_backfill(req: DomainBackfillRequest):
"""Backfill domain, category, and target_audience for controls using Anthropic API.
Finds controls where target_audience is NULL and enriches them.
Default is dry_run=True (preview only).
"""
import uuid
backfill_id = str(uuid.uuid4())[:8]
_domain_backfill_status[backfill_id] = {"status": "starting"}
asyncio.create_task(_run_domain_backfill(req, backfill_id))
return {"status": "running", "backfill_id": backfill_id,
"message": f"Domain backfill started. Poll /generate/backfill-status/{backfill_id}"}
@router.get("/generate/domain-backfill-status/{backfill_id}")
async def get_domain_backfill_status(backfill_id: str):
"""Get status of a domain backfill job."""
status = _domain_backfill_status.get(backfill_id)
if not status:
raise HTTPException(status_code=404, detail="Domain backfill job not found")
return status
# ---------------------------------------------------------------------------
# Source-Type Backfill — Classify law vs guideline vs standard vs restricted
# ---------------------------------------------------------------------------
class SourceTypeBackfillRequest(BaseModel):
dry_run: bool = True
_source_type_backfill_status: dict = {}
async def _run_source_type_backfill(dry_run: bool, backfill_id: str):
"""Backfill source_type into source_citation JSONB for all controls."""
db = SessionLocal()
try:
# Find controls with source_citation that lack source_type
rows = db.execute(text("""
SELECT control_id, source_citation, generation_metadata
FROM compliance.canonical_controls
WHERE source_citation IS NOT NULL
AND (source_citation->>'source_type' IS NULL
OR source_citation->>'source_type' = '')
""")).fetchall()
total = len(rows)
updated = 0
already_correct = 0
errors = []
_source_type_backfill_status[backfill_id] = {
"status": "running", "total": total, "updated": 0, "dry_run": dry_run,
}
for row in rows:
cid = row[0]
citation = row[1] if isinstance(row[1], dict) else json.loads(row[1] or "{}")
metadata = row[2] if isinstance(row[2], dict) else json.loads(row[2] or "{}")
# Get regulation_code from metadata
reg_code = metadata.get("source_regulation", "")
if not reg_code:
# Try to infer from source name
errors.append(f"{cid}: no source_regulation in metadata")
continue
# Classify
license_info = _classify_regulation(reg_code)
source_type = license_info.get("source_type", "restricted")
# Update citation
citation["source_type"] = source_type
if not dry_run:
db.execute(text("""
UPDATE compliance.canonical_controls
SET source_citation = :citation
WHERE control_id = :cid
"""), {"citation": json.dumps(citation), "cid": cid})
if updated % 100 == 0:
db.commit()
updated += 1
if not dry_run:
db.commit()
# Count distribution
dist_query = db.execute(text("""
SELECT source_citation->>'source_type' as st, COUNT(*)
FROM compliance.canonical_controls
WHERE source_citation IS NOT NULL
AND source_citation->>'source_type' IS NOT NULL
GROUP BY st
""")).fetchall() if not dry_run else []
distribution = {r[0]: r[1] for r in dist_query}
_source_type_backfill_status[backfill_id] = {
"status": "completed", "total": total, "updated": updated,
"dry_run": dry_run, "distribution": distribution,
"errors": errors[:50],
}
logger.info("Source-type backfill %s completed: %d/%d updated (dry_run=%s)",
backfill_id, updated, total, dry_run)
except Exception as e:
logger.error("Source-type backfill %s failed: %s", backfill_id, e)
_source_type_backfill_status[backfill_id] = {"status": "failed", "error": str(e)}
finally:
db.close()
@router.post("/generate/backfill-source-type")
async def start_source_type_backfill(req: SourceTypeBackfillRequest):
"""Backfill source_type (law/guideline/standard/restricted) into source_citation JSONB.
Classifies each control's source as binding law, authority guideline,
voluntary standard, or restricted norm based on regulation_code.
Default is dry_run=True (preview only).
"""
import uuid
backfill_id = str(uuid.uuid4())[:8]
_source_type_backfill_status[backfill_id] = {"status": "starting"}
asyncio.create_task(_run_source_type_backfill(req.dry_run, backfill_id))
return {
"status": "running",
"backfill_id": backfill_id,
"message": f"Source-type backfill started. Poll /generate/source-type-backfill-status/{backfill_id}",
}
@router.get("/generate/source-type-backfill-status/{backfill_id}")
async def get_source_type_backfill_status(backfill_id: str):
"""Get status of a source-type backfill job."""
status = _source_type_backfill_status.get(backfill_id)
if not status:
raise HTTPException(status_code=404, detail="Source-type backfill job not found")
return status