diff --git a/control-pipeline/services/decomposition_pass.py b/control-pipeline/services/decomposition_pass.py index 14d3c13..efd21e0 100644 --- a/control-pipeline/services/decomposition_pass.py +++ b/control-pipeline/services/decomposition_pass.py @@ -3640,6 +3640,33 @@ class DecompositionPass: if not prepared: return {"status": "empty", "total_candidates": 0} + # Pre-LLM filter: skip evidence, containers, framework references + from services.control_ontology import classify_obligation + atomic_prepared = [] + filtered_stats = {"evidence": 0, "composite": 0, "framework_container": 0, "atomic": 0} + for obl in prepared: + classification = classify_obligation( + obl["obligation_text"], obl.get("action", "") + ) + routing = classification["routing"] + filtered_stats[routing] = filtered_stats.get(routing, 0) + 1 + if routing == "atomic": + atomic_prepared.append(obl) + else: + logger.info("Pre-LLM filter: %s skipped (%s): %s", + obl["candidate_id"], routing, obl["obligation_text"][:80]) + + logger.info("Pre-LLM filter: %d → %d atomic (skipped: %d evidence, %d composite, %d framework)", + len(prepared), len(atomic_prepared), + filtered_stats.get("evidence", 0), + filtered_stats.get("composite", 0), + filtered_stats.get("framework_container", 0)) + + prepared = atomic_prepared + if not prepared: + return {"status": "empty", "total_candidates": 0, + "filtered": filtered_stats} + requests = [] for i in range(0, len(prepared), batch_size): batch = prepared[i : i + batch_size] @@ -3686,6 +3713,7 @@ class DecompositionPass: "total_candidates": len(prepared), "total_requests": len(requests), "batch_size": batch_size, + "pre_filter": filtered_stats, } async def process_batch_results(