feat(pipeline): Anthropic Batch API, source/regulation filter, cost optimization
Some checks failed
CI/CD / go-lint (push) Has been skipped
CI/CD / python-lint (push) Has been skipped
CI/CD / nodejs-lint (push) Has been skipped
CI/CD / test-go-ai-compliance (push) Failing after 35s
CI/CD / test-python-backend-compliance (push) Successful in 34s
CI/CD / test-python-document-crawler (push) Successful in 22s
CI/CD / test-python-dsms-gateway (push) Successful in 19s
CI/CD / validate-canonical-controls (push) Successful in 11s
CI/CD / Deploy (push) Has been skipped
Some checks failed
CI/CD / go-lint (push) Has been skipped
CI/CD / python-lint (push) Has been skipped
CI/CD / nodejs-lint (push) Has been skipped
CI/CD / test-go-ai-compliance (push) Failing after 35s
CI/CD / test-python-backend-compliance (push) Successful in 34s
CI/CD / test-python-document-crawler (push) Successful in 22s
CI/CD / test-python-dsms-gateway (push) Successful in 19s
CI/CD / validate-canonical-controls (push) Successful in 11s
CI/CD / Deploy (push) Has been skipped
- Add Anthropic API support to decomposition Pass 0a/0b (prompt caching, content batching) - Add Anthropic Batch API (50% cost reduction, async 24h processing) - Add source_filter (ILIKE on source_citation) for regulation-based filtering - Add category_filter to Pass 0a for selective decomposition - Add regulation_filter to control_generator for RAG scan phase filtering (prefix match on regulation_code — enables CE + Code Review focus) - New API endpoints: batch-submit-0a, batch-submit-0b, batch-status, batch-process - 83 new tests (all passing) Cost reduction: $2,525 → ~$600-700 with all optimizations combined. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -53,6 +53,7 @@ class GenerateRequest(BaseModel):
|
||||
batch_size: int = 5
|
||||
skip_web_search: bool = False
|
||||
dry_run: bool = False
|
||||
regulation_filter: Optional[List[str]] = None # Only process these regulation_code prefixes
|
||||
|
||||
|
||||
class GenerateResponse(BaseModel):
|
||||
@@ -144,6 +145,7 @@ async def start_generation(req: GenerateRequest):
|
||||
max_chunks=req.max_chunks,
|
||||
skip_web_search=req.skip_web_search,
|
||||
dry_run=req.dry_run,
|
||||
regulation_filter=req.regulation_filter,
|
||||
)
|
||||
|
||||
if req.dry_run:
|
||||
|
||||
@@ -115,6 +115,22 @@ class CrosswalkStatsResponse(BaseModel):
|
||||
|
||||
class MigrationRequest(BaseModel):
|
||||
limit: int = 0 # 0 = no limit
|
||||
batch_size: int = 0 # 0 = auto (5 for Anthropic, 1 for Ollama)
|
||||
use_anthropic: bool = False # Use Anthropic API instead of Ollama
|
||||
category_filter: Optional[str] = None # Comma-separated categories
|
||||
source_filter: Optional[str] = None # Comma-separated source regulations (ILIKE match)
|
||||
|
||||
|
||||
class BatchSubmitRequest(BaseModel):
|
||||
limit: int = 0
|
||||
batch_size: int = 5
|
||||
category_filter: Optional[str] = None
|
||||
source_filter: Optional[str] = None
|
||||
|
||||
|
||||
class BatchProcessRequest(BaseModel):
|
||||
batch_id: str
|
||||
pass_type: str = "0a" # "0a" or "0b"
|
||||
|
||||
|
||||
class MigrationResponse(BaseModel):
|
||||
@@ -447,13 +463,23 @@ async def crosswalk_stats():
|
||||
|
||||
@router.post("/migrate/decompose", response_model=MigrationResponse)
|
||||
async def migrate_decompose(req: MigrationRequest):
|
||||
"""Pass 0a: Extract obligation candidates from rich controls."""
|
||||
"""Pass 0a: Extract obligation candidates from rich controls.
|
||||
|
||||
With use_anthropic=true, uses Anthropic API with prompt caching
|
||||
and content batching (multiple controls per API call).
|
||||
"""
|
||||
from compliance.services.decomposition_pass import DecompositionPass
|
||||
|
||||
db = SessionLocal()
|
||||
try:
|
||||
decomp = DecompositionPass(db=db)
|
||||
stats = await decomp.run_pass0a(limit=req.limit)
|
||||
stats = await decomp.run_pass0a(
|
||||
limit=req.limit,
|
||||
batch_size=req.batch_size,
|
||||
use_anthropic=req.use_anthropic,
|
||||
category_filter=req.category_filter,
|
||||
source_filter=req.source_filter,
|
||||
)
|
||||
return MigrationResponse(status="completed", stats=stats)
|
||||
except Exception as e:
|
||||
logger.error("Decomposition pass 0a failed: %s", e)
|
||||
@@ -464,13 +490,21 @@ async def migrate_decompose(req: MigrationRequest):
|
||||
|
||||
@router.post("/migrate/compose-atomic", response_model=MigrationResponse)
|
||||
async def migrate_compose_atomic(req: MigrationRequest):
|
||||
"""Pass 0b: Compose atomic controls from obligation candidates."""
|
||||
"""Pass 0b: Compose atomic controls from obligation candidates.
|
||||
|
||||
With use_anthropic=true, uses Anthropic API with prompt caching
|
||||
and content batching (multiple obligations per API call).
|
||||
"""
|
||||
from compliance.services.decomposition_pass import DecompositionPass
|
||||
|
||||
db = SessionLocal()
|
||||
try:
|
||||
decomp = DecompositionPass(db=db)
|
||||
stats = await decomp.run_pass0b(limit=req.limit)
|
||||
stats = await decomp.run_pass0b(
|
||||
limit=req.limit,
|
||||
batch_size=req.batch_size,
|
||||
use_anthropic=req.use_anthropic,
|
||||
)
|
||||
return MigrationResponse(status="completed", stats=stats)
|
||||
except Exception as e:
|
||||
logger.error("Decomposition pass 0b failed: %s", e)
|
||||
@@ -479,6 +513,87 @@ async def migrate_compose_atomic(req: MigrationRequest):
|
||||
db.close()
|
||||
|
||||
|
||||
@router.post("/migrate/batch-submit-0a", response_model=MigrationResponse)
|
||||
async def batch_submit_pass0a(req: BatchSubmitRequest):
|
||||
"""Submit Pass 0a as Anthropic Batch API job (50% cost reduction).
|
||||
|
||||
Returns a batch_id for polling. Results are processed asynchronously
|
||||
within 24 hours by Anthropic.
|
||||
"""
|
||||
from compliance.services.decomposition_pass import DecompositionPass
|
||||
|
||||
db = SessionLocal()
|
||||
try:
|
||||
decomp = DecompositionPass(db=db)
|
||||
result = await decomp.submit_batch_pass0a(
|
||||
limit=req.limit,
|
||||
batch_size=req.batch_size,
|
||||
category_filter=req.category_filter,
|
||||
source_filter=req.source_filter,
|
||||
)
|
||||
return MigrationResponse(status=result.pop("status", "submitted"), stats=result)
|
||||
except Exception as e:
|
||||
logger.error("Batch submit 0a failed: %s", e)
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
finally:
|
||||
db.close()
|
||||
|
||||
|
||||
@router.post("/migrate/batch-submit-0b", response_model=MigrationResponse)
|
||||
async def batch_submit_pass0b(req: BatchSubmitRequest):
|
||||
"""Submit Pass 0b as Anthropic Batch API job (50% cost reduction)."""
|
||||
from compliance.services.decomposition_pass import DecompositionPass
|
||||
|
||||
db = SessionLocal()
|
||||
try:
|
||||
decomp = DecompositionPass(db=db)
|
||||
result = await decomp.submit_batch_pass0b(
|
||||
limit=req.limit,
|
||||
batch_size=req.batch_size,
|
||||
)
|
||||
return MigrationResponse(status=result.pop("status", "submitted"), stats=result)
|
||||
except Exception as e:
|
||||
logger.error("Batch submit 0b failed: %s", e)
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
finally:
|
||||
db.close()
|
||||
|
||||
|
||||
@router.get("/migrate/batch-status/{batch_id}")
|
||||
async def batch_check_status(batch_id: str):
|
||||
"""Check processing status of an Anthropic batch job."""
|
||||
from compliance.services.decomposition_pass import check_batch_status
|
||||
|
||||
try:
|
||||
status = await check_batch_status(batch_id)
|
||||
return status
|
||||
except Exception as e:
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
|
||||
|
||||
@router.post("/migrate/batch-process", response_model=MigrationResponse)
|
||||
async def batch_process_results(req: BatchProcessRequest):
|
||||
"""Fetch and process results from a completed Anthropic batch.
|
||||
|
||||
Call this after batch-status shows processing_status='ended'.
|
||||
"""
|
||||
from compliance.services.decomposition_pass import DecompositionPass
|
||||
|
||||
db = SessionLocal()
|
||||
try:
|
||||
decomp = DecompositionPass(db=db)
|
||||
stats = await decomp.process_batch_results(
|
||||
batch_id=req.batch_id,
|
||||
pass_type=req.pass_type,
|
||||
)
|
||||
return MigrationResponse(status=stats.pop("status", "completed"), stats=stats)
|
||||
except Exception as e:
|
||||
logger.error("Batch process failed: %s", e)
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
finally:
|
||||
db.close()
|
||||
|
||||
|
||||
@router.post("/migrate/link-obligations", response_model=MigrationResponse)
|
||||
async def migrate_link_obligations(req: MigrationRequest):
|
||||
"""Pass 1: Link controls to obligations via source_citation article."""
|
||||
|
||||
Reference in New Issue
Block a user