fix: handle new numeric batch custom_id format in Pass 0a result processing

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
Benjamin Admin
2026-04-24 07:21:50 +02:00
parent bbfcd44407
commit f1359d63ba

View File

@@ -3697,10 +3697,49 @@ class DecompositionPass:
def _handle_batch_result_0a(
self, custom_id: str, text_content: str, stats: dict,
) -> None:
"""Process a single Pass 0a batch result."""
# custom_id format: p0a_CTRL-001+CTRL-002+...
"""Process a single Pass 0a batch result.
custom_id formats:
- p0a_b00001 (numeric batch index — new format)
- p0a_CTRL-001+CTRL-002 (legacy + separated control IDs)
For numeric format, control IDs are extracted from the LLM response
(JSON object with control_id keys).
"""
prefix = "p0a_"
control_ids = custom_id[len(prefix):].split("+") if custom_id.startswith(prefix) else []
suffix = custom_id[len(prefix):] if custom_id.startswith(prefix) else custom_id
# New format: p0a_b00001 — extract control IDs from LLM response
if suffix.startswith("b") and suffix[1:].isdigit():
# Response is a JSON object: {control_id: [obligations], ...}
results_by_id = _parse_json_object(text_content)
if not results_by_id:
# Try as array (single-control response)
raw_obls = _parse_json_array(text_content)
if raw_obls:
logger.warning("Batch %s: got array instead of object, skipping", custom_id)
stats["errors"] += 1
return
for control_id, raw_obls in results_by_id.items():
uuid_row = self.db.execute(
text("SELECT id FROM canonical_controls WHERE control_id = :cid LIMIT 1"),
{"cid": control_id},
).fetchone()
if not uuid_row:
continue
control_uuid = str(uuid_row[0])
if not isinstance(raw_obls, list):
raw_obls = [raw_obls] if raw_obls else []
if not raw_obls:
raw_obls = [{"obligation_text": control_id, "action": "sicherstellen",
"object": control_id}]
self._process_pass0a_obligations(raw_obls, control_id, control_uuid, stats)
stats["controls_processed"] += 1
return
# Legacy format: p0a_CTRL-001+CTRL-002
control_ids = suffix.split("+")
if len(control_ids) == 1:
raw_obls = _parse_json_array(text_content)