Add skip_prefilter option to control generator

Local LLM prefilter (llama3.2 3B) was incorrectly skipping annex chunks
that contain concrete requirements. Added skip_prefilter flag to bypass
the local pre-filter and send all chunks directly to Anthropic API.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
Benjamin Admin
2026-03-17 16:30:57 +01:00
parent 567e82ddf5
commit a7f7e57dd7
2 changed files with 4 additions and 1 deletions

View File

@@ -54,6 +54,7 @@ class GenerateRequest(BaseModel):
skip_web_search: bool = False
dry_run: bool = False
regulation_filter: Optional[List[str]] = None # Only process these regulation_code prefixes
skip_prefilter: bool = False # Skip local LLM pre-filter, send all chunks to API
class GenerateResponse(BaseModel):
@@ -146,6 +147,7 @@ async def start_generation(req: GenerateRequest):
skip_web_search=req.skip_web_search,
dry_run=req.dry_run,
regulation_filter=req.regulation_filter,
skip_prefilter=req.skip_prefilter,
)
if req.dry_run:

View File

@@ -385,6 +385,7 @@ class GeneratorConfig(BaseModel):
dry_run: bool = False
existing_job_id: Optional[str] = None # If set, reuse this job instead of creating a new one
regulation_filter: Optional[List[str]] = None # Only process chunks matching these regulation_code prefixes
skip_prefilter: bool = False # If True, skip local LLM pre-filter (send all chunks to API)
@dataclass
@@ -1886,7 +1887,7 @@ Kategorien: {CATEGORY_LIST_STR}"""
self._update_job(job_id, result)
# Stage 1.5: Local LLM pre-filter — skip chunks without requirements
if not config.dry_run:
if not config.dry_run and not config.skip_prefilter:
is_relevant, prefilter_reason = await _prefilter_chunk(chunk.text)
if not is_relevant:
chunks_skipped_prefilter += 1