"""Test qwen3 backfill quality on 30 controls — compare with Haiku results.""" import json import re import time import httpx from db.session import SessionLocal from sqlalchemy import text CONTROL_IDS = [ 'GOV-1006-A07', 'TRD-218-A02', 'INC-649-A06', 'INC-573-A02', 'AUTH-1245-A07', 'AUTH-1122-A01', 'FIN-902-A07', 'FIN-496-A04', 'FIN-992-A02', 'AUTH-1165-A03', 'SEC-1594-A04', 'INC-495-A04', 'INC-379-A09', 'COMP-1272-A07', 'FIN-1024-A04', 'INC-498-A03', 'GOV-734-A07', 'LOG-920-A04', 'TRD-294-A05', 'SEC-1735-A01', 'FIN-996-A01', 'AUTH-1128-A04', 'TRD-294-A02', 'FIN-915-A03', 'DATA-1079-A03', 'TRD-046-A01', 'TRD-111-A02', 'AUTH-1228-A03', 'INC-532-A02', 'FIN-624-A07', ] SYSTEM_PROMPT = """Du ergaenzt fehlende Felder fuer Compliance Controls. Aendere NICHTS am bestehenden Control. Ergaenze NUR diese 6 Felder: 1. applicability: {} wenn universell, sonst {"field": "context.SIGNAL", "op": "==", "value": true} 2. check_type: EINEN der 10 Werte: technical_config_check, code_pattern_check, runtime_security_test, document_policy_check, document_classification_check, document_contract_check, evidence_artifact_check, process_verification, training_verification, interview_assessment 3. scanner_hint: {"search_terms": [...], "negative_indicators": [...]} 4. manual_review_required_if: ["Bedingung 1", ...] 5. evidence_type: code|process|hybrid 6. provides_context: ["context.VARIABLE", ...] oder [] Antworte als JSON-Objekt mit Control-ID als Key. Kein Markdown, kein Text drumherum.""" OLLAMA_URL = "http://host.docker.internal:11434/api/chat" def load_controls(): db = SessionLocal() ids_str = ",".join(f"'{c}'" for c in CONTROL_IDS) rows = db.execute(text(f""" SELECT control_id, title, generation_metadata->>'assertion' as assertion, category, severity, generation_metadata->>'merge_group_hint' as merge_key FROM canonical_controls WHERE control_id IN ({ids_str}) ORDER BY control_id """)).fetchall() db.close() return [ {"id": r[0], "title": r[1], "assertion": r[2] or "", "cat": r[3], "sev": r[4], "mk": r[5] or ""} for r in rows ] def call_qwen(controls: list[dict]) -> dict: prompt = "Ergaenze die 6 Felder fuer diese Controls:\n" for c in controls: prompt += f"\nControl-ID: {c['id']}\nTitel: {c['title']}\nAssertion: {c['assertion']}\nKategorie: {c['cat']}\nSeverity: {c['sev']}\nMerge-Key: {c['mk']}\n---" resp = httpx.post(OLLAMA_URL, json={ "model": "qwen3:30b-a3b", "messages": [ {"role": "system", "content": SYSTEM_PROMPT}, {"role": "user", "content": prompt + "\n\n/no_think"}, ], "stream": False, "options": {"temperature": 0.1, "num_predict": 8192}, }, timeout=180) content = resp.json().get("message", {}).get("content", "") # Strip thinking tags if present content = re.sub(r".*?", "", content, flags=re.DOTALL).strip() # Strip markdown fencing if content.startswith("```"): content = content.split("\n", 1)[1].rsplit("```", 1)[0] return json.loads(content) def main(): controls = load_controls() print(f"Loaded {len(controls)} controls") batch_size = 3 all_results = {} errors = 0 start = time.time() for i in range(0, len(controls), batch_size): batch = controls[i:i + batch_size] batch_num = i // batch_size + 1 t0 = time.time() try: parsed = call_qwen(batch) all_results.update(parsed) dt = time.time() - t0 print(f"Batch {batch_num}: {len(parsed)} controls OK ({dt:.1f}s)") except Exception as e: errors += 1 dt = time.time() - t0 print(f"Batch {batch_num}: ERROR ({dt:.1f}s) — {e}") elapsed = time.time() - start print(f"\nDone: {len(all_results)}/{len(controls)} results, {errors} errors") print(f"Total time: {elapsed:.1f}s") print(f"Avg per control: {elapsed / len(controls):.1f}s") print(f"Hochrechnung 20k Controls: {elapsed / len(controls) * 20000 / 3600:.1f}h") # Show 3 sample results print("\n=== Sample Results ===") for cid in list(all_results.keys())[:3]: r = all_results[cid] print(f"\n{cid}:") print(f" applicability: {json.dumps(r.get('applicability', {}), ensure_ascii=False)[:100]}") print(f" check_type: {r.get('check_type', '')}") print(f" evidence_type: {r.get('evidence_type', '')}") print(f" scanner_hint terms: {r.get('scanner_hint', {}).get('search_terms', [])[:3]}") print(f" provides_context: {r.get('provides_context', [])[:2]}") if __name__ == "__main__": main()