perf(pipeline): switch to v3 prompt for generation, v4 fields via Haiku backfill
Remove applicability/scanner_hint/evidence_type/provides_context from Pass 0b prompt to reduce output tokens (~40% less). These 6 fields are added via cheap Haiku backfill afterwards (~$1.50 per 10k controls). Saves ~$200 over the remaining 160k obligations. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
118
control-pipeline/scripts/test_qwen_backfill.py
Normal file
118
control-pipeline/scripts/test_qwen_backfill.py
Normal file
@@ -0,0 +1,118 @@
|
|||||||
|
"""Test qwen3 backfill quality on 30 controls — compare with Haiku results."""
|
||||||
|
|
||||||
|
import json
|
||||||
|
import re
|
||||||
|
import time
|
||||||
|
import httpx
|
||||||
|
from db.session import SessionLocal
|
||||||
|
from sqlalchemy import text
|
||||||
|
|
||||||
|
CONTROL_IDS = [
|
||||||
|
'GOV-1006-A07', 'TRD-218-A02', 'INC-649-A06', 'INC-573-A02', 'AUTH-1245-A07',
|
||||||
|
'AUTH-1122-A01', 'FIN-902-A07', 'FIN-496-A04', 'FIN-992-A02', 'AUTH-1165-A03',
|
||||||
|
'SEC-1594-A04', 'INC-495-A04', 'INC-379-A09', 'COMP-1272-A07', 'FIN-1024-A04',
|
||||||
|
'INC-498-A03', 'GOV-734-A07', 'LOG-920-A04', 'TRD-294-A05', 'SEC-1735-A01',
|
||||||
|
'FIN-996-A01', 'AUTH-1128-A04', 'TRD-294-A02', 'FIN-915-A03', 'DATA-1079-A03',
|
||||||
|
'TRD-046-A01', 'TRD-111-A02', 'AUTH-1228-A03', 'INC-532-A02', 'FIN-624-A07',
|
||||||
|
]
|
||||||
|
|
||||||
|
SYSTEM_PROMPT = """Du ergaenzt fehlende Felder fuer Compliance Controls. Aendere NICHTS am bestehenden Control.
|
||||||
|
Ergaenze NUR diese 6 Felder:
|
||||||
|
1. applicability: {} wenn universell, sonst {"field": "context.SIGNAL", "op": "==", "value": true}
|
||||||
|
2. check_type: EINEN der 10 Werte: technical_config_check, code_pattern_check, runtime_security_test, document_policy_check, document_classification_check, document_contract_check, evidence_artifact_check, process_verification, training_verification, interview_assessment
|
||||||
|
3. scanner_hint: {"search_terms": [...], "negative_indicators": [...]}
|
||||||
|
4. manual_review_required_if: ["Bedingung 1", ...]
|
||||||
|
5. evidence_type: code|process|hybrid
|
||||||
|
6. provides_context: ["context.VARIABLE", ...] oder []
|
||||||
|
Antworte als JSON-Objekt mit Control-ID als Key. Kein Markdown, kein Text drumherum."""
|
||||||
|
|
||||||
|
OLLAMA_URL = "http://host.docker.internal:11434/api/chat"
|
||||||
|
|
||||||
|
|
||||||
|
def load_controls():
|
||||||
|
db = SessionLocal()
|
||||||
|
ids_str = ",".join(f"'{c}'" for c in CONTROL_IDS)
|
||||||
|
rows = db.execute(text(f"""
|
||||||
|
SELECT control_id, title, generation_metadata->>'assertion' as assertion,
|
||||||
|
category, severity, generation_metadata->>'merge_group_hint' as merge_key
|
||||||
|
FROM canonical_controls
|
||||||
|
WHERE control_id IN ({ids_str})
|
||||||
|
ORDER BY control_id
|
||||||
|
""")).fetchall()
|
||||||
|
db.close()
|
||||||
|
return [
|
||||||
|
{"id": r[0], "title": r[1], "assertion": r[2] or "", "cat": r[3], "sev": r[4], "mk": r[5] or ""}
|
||||||
|
for r in rows
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
def call_qwen(controls: list[dict]) -> dict:
|
||||||
|
prompt = "Ergaenze die 6 Felder fuer diese Controls:\n"
|
||||||
|
for c in controls:
|
||||||
|
prompt += f"\nControl-ID: {c['id']}\nTitel: {c['title']}\nAssertion: {c['assertion']}\nKategorie: {c['cat']}\nSeverity: {c['sev']}\nMerge-Key: {c['mk']}\n---"
|
||||||
|
|
||||||
|
resp = httpx.post(OLLAMA_URL, json={
|
||||||
|
"model": "qwen3:30b-a3b",
|
||||||
|
"messages": [
|
||||||
|
{"role": "system", "content": SYSTEM_PROMPT},
|
||||||
|
{"role": "user", "content": prompt + "\n\n/no_think"},
|
||||||
|
],
|
||||||
|
"stream": False,
|
||||||
|
"options": {"temperature": 0.1, "num_predict": 8192},
|
||||||
|
}, timeout=180)
|
||||||
|
|
||||||
|
content = resp.json().get("message", {}).get("content", "")
|
||||||
|
|
||||||
|
# Strip thinking tags if present
|
||||||
|
content = re.sub(r"<think>.*?</think>", "", content, flags=re.DOTALL).strip()
|
||||||
|
|
||||||
|
# Strip markdown fencing
|
||||||
|
if content.startswith("```"):
|
||||||
|
content = content.split("\n", 1)[1].rsplit("```", 1)[0]
|
||||||
|
|
||||||
|
return json.loads(content)
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
controls = load_controls()
|
||||||
|
print(f"Loaded {len(controls)} controls")
|
||||||
|
|
||||||
|
batch_size = 3
|
||||||
|
all_results = {}
|
||||||
|
errors = 0
|
||||||
|
start = time.time()
|
||||||
|
|
||||||
|
for i in range(0, len(controls), batch_size):
|
||||||
|
batch = controls[i:i + batch_size]
|
||||||
|
batch_num = i // batch_size + 1
|
||||||
|
t0 = time.time()
|
||||||
|
try:
|
||||||
|
parsed = call_qwen(batch)
|
||||||
|
all_results.update(parsed)
|
||||||
|
dt = time.time() - t0
|
||||||
|
print(f"Batch {batch_num}: {len(parsed)} controls OK ({dt:.1f}s)")
|
||||||
|
except Exception as e:
|
||||||
|
errors += 1
|
||||||
|
dt = time.time() - t0
|
||||||
|
print(f"Batch {batch_num}: ERROR ({dt:.1f}s) — {e}")
|
||||||
|
|
||||||
|
elapsed = time.time() - start
|
||||||
|
print(f"\nDone: {len(all_results)}/{len(controls)} results, {errors} errors")
|
||||||
|
print(f"Total time: {elapsed:.1f}s")
|
||||||
|
print(f"Avg per control: {elapsed / len(controls):.1f}s")
|
||||||
|
print(f"Hochrechnung 20k Controls: {elapsed / len(controls) * 20000 / 3600:.1f}h")
|
||||||
|
|
||||||
|
# Show 3 sample results
|
||||||
|
print("\n=== Sample Results ===")
|
||||||
|
for cid in list(all_results.keys())[:3]:
|
||||||
|
r = all_results[cid]
|
||||||
|
print(f"\n{cid}:")
|
||||||
|
print(f" applicability: {json.dumps(r.get('applicability', {}), ensure_ascii=False)[:100]}")
|
||||||
|
print(f" check_type: {r.get('check_type', '')}")
|
||||||
|
print(f" evidence_type: {r.get('evidence_type', '')}")
|
||||||
|
print(f" scanner_hint terms: {r.get('scanner_hint', {}).get('search_terms', [])[:3]}")
|
||||||
|
print(f" provides_context: {r.get('provides_context', [])[:2]}")
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
@@ -2168,12 +2168,7 @@ Antworte als JSON:
|
|||||||
"check_type": "technical_config_check|code_pattern_check|runtime_security_test|document_policy_check|document_classification_check|document_contract_check|evidence_artifact_check|process_verification|training_verification|interview_assessment",
|
"check_type": "technical_config_check|code_pattern_check|runtime_security_test|document_policy_check|document_classification_check|document_contract_check|evidence_artifact_check|process_verification|training_verification|interview_assessment",
|
||||||
"merge_key": "action_type:normalized_object:control_phase",
|
"merge_key": "action_type:normalized_object:control_phase",
|
||||||
"dependency_hints": ["dependency_type:action_type:normalized_object (Voraussetzungen, Ersetzungen, Kompensationen)"],
|
"dependency_hints": ["dependency_type:action_type:normalized_object (Voraussetzungen, Ersetzungen, Kompensationen)"],
|
||||||
"lifecycle_phase_order": "1-13 (1=scope, 2=definition, 4=implementation, 7=monitoring, 8=testing, 12=reporting)",
|
"lifecycle_phase_order": "1-13 (1=scope, 2=definition, 4=implementation, 7=monitoring, 8=testing, 12=reporting)"
|
||||||
"applicability": {{}},
|
|
||||||
"scanner_hint": {{"search_terms": ["technischer Suchbegriff"], "negative_indicators": ["Negativindikator"]}},
|
|
||||||
"manual_review_required_if": ["Bedingung fuer manuelle Pruefung"],
|
|
||||||
"evidence_type": "code|process|hybrid",
|
|
||||||
"provides_context": ["context.VARIABLE die dieses Control bei Pruefung erzeugt"]
|
|
||||||
}}"""
|
}}"""
|
||||||
|
|
||||||
|
|
||||||
@@ -2271,12 +2266,7 @@ Jedes Control hat dieses Format:
|
|||||||
"check_type": "technical_config_check|code_pattern_check|runtime_security_test|document_policy_check|document_classification_check|document_contract_check|evidence_artifact_check|process_verification|training_verification|interview_assessment",
|
"check_type": "technical_config_check|code_pattern_check|runtime_security_test|document_policy_check|document_classification_check|document_contract_check|evidence_artifact_check|process_verification|training_verification|interview_assessment",
|
||||||
"merge_key": "action_type:normalized_object:control_phase",
|
"merge_key": "action_type:normalized_object:control_phase",
|
||||||
"dependency_hints": ["dependency_type:action_type:normalized_object (Voraussetzungen, Ersetzungen, Kompensationen)"],
|
"dependency_hints": ["dependency_type:action_type:normalized_object (Voraussetzungen, Ersetzungen, Kompensationen)"],
|
||||||
"lifecycle_phase_order": "1-13 (1=scope, 2=definition, 4=implementation, 7=monitoring, 8=testing, 12=reporting)",
|
"lifecycle_phase_order": "1-13 (1=scope, 2=definition, 4=implementation, 7=monitoring, 8=testing, 12=reporting)"
|
||||||
"applicability": {{}},
|
|
||||||
"scanner_hint": {{"search_terms": ["technischer Suchbegriff"], "negative_indicators": ["Negativindikator"]}},
|
|
||||||
"manual_review_required_if": ["Bedingung fuer manuelle Pruefung"],
|
|
||||||
"evidence_type": "code|process|hybrid",
|
|
||||||
"provides_context": ["context.VARIABLE die dieses Control bei Pruefung erzeugt"]
|
|
||||||
}}"""
|
}}"""
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user