From f1359d63bac560d07d16c847bcfece5ef49db977 Mon Sep 17 00:00:00 2001 From: Benjamin Admin Date: Fri, 24 Apr 2026 07:21:50 +0200 Subject: [PATCH] fix: handle new numeric batch custom_id format in Pass 0a result processing Co-Authored-By: Claude Opus 4.6 (1M context) --- .../services/decomposition_pass.py | 45 +++++++++++++++++-- 1 file changed, 42 insertions(+), 3 deletions(-) diff --git a/control-pipeline/services/decomposition_pass.py b/control-pipeline/services/decomposition_pass.py index c4a44f1..2ad8ce9 100644 --- a/control-pipeline/services/decomposition_pass.py +++ b/control-pipeline/services/decomposition_pass.py @@ -3697,10 +3697,49 @@ class DecompositionPass: def _handle_batch_result_0a( self, custom_id: str, text_content: str, stats: dict, ) -> None: - """Process a single Pass 0a batch result.""" - # custom_id format: p0a_CTRL-001+CTRL-002+... + """Process a single Pass 0a batch result. + + custom_id formats: + - p0a_b00001 (numeric batch index — new format) + - p0a_CTRL-001+CTRL-002 (legacy + separated control IDs) + + For numeric format, control IDs are extracted from the LLM response + (JSON object with control_id keys). + """ prefix = "p0a_" - control_ids = custom_id[len(prefix):].split("+") if custom_id.startswith(prefix) else [] + suffix = custom_id[len(prefix):] if custom_id.startswith(prefix) else custom_id + + # New format: p0a_b00001 — extract control IDs from LLM response + if suffix.startswith("b") and suffix[1:].isdigit(): + # Response is a JSON object: {control_id: [obligations], ...} + results_by_id = _parse_json_object(text_content) + if not results_by_id: + # Try as array (single-control response) + raw_obls = _parse_json_array(text_content) + if raw_obls: + logger.warning("Batch %s: got array instead of object, skipping", custom_id) + stats["errors"] += 1 + return + + for control_id, raw_obls in results_by_id.items(): + uuid_row = self.db.execute( + text("SELECT id FROM canonical_controls WHERE control_id = :cid LIMIT 1"), + {"cid": control_id}, + ).fetchone() + if not uuid_row: + continue + control_uuid = str(uuid_row[0]) + if not isinstance(raw_obls, list): + raw_obls = [raw_obls] if raw_obls else [] + if not raw_obls: + raw_obls = [{"obligation_text": control_id, "action": "sicherstellen", + "object": control_id}] + self._process_pass0a_obligations(raw_obls, control_id, control_uuid, stats) + stats["controls_processed"] += 1 + return + + # Legacy format: p0a_CTRL-001+CTRL-002 + control_ids = suffix.split("+") if len(control_ids) == 1: raw_obls = _parse_json_array(text_content)