feat: LLM-basierter Rationale-Backfill fuer atomare Controls
POST /controls/backfill-rationale — ersetzt Placeholder "Aus Obligation abgeleitet." durch LLM-generierte Begruendungen (Ollama/qwen3.5). Optimierung: gruppiert ~86k Controls nach ~7k Parents, ein LLM-Call pro Parent. Paginierung via batch_size/offset fuer kontrollierte Ausfuehrung. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -1112,6 +1112,170 @@ async def backfill_evidence_type(
|
||||
}
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# RATIONALE BACKFILL (LLM)
|
||||
# =============================================================================
|
||||
|
||||
@router.post("/controls/backfill-rationale")
|
||||
async def backfill_rationale(
|
||||
dry_run: bool = Query(True, description="Nur zaehlen, nicht aendern"),
|
||||
batch_size: int = Query(50, description="Parent-Controls pro Durchlauf"),
|
||||
offset: int = Query(0, description="Offset fuer Paginierung (Parent-Index)"),
|
||||
):
|
||||
"""
|
||||
Generiert sinnvolle Begruendungen fuer atomare Controls per LLM.
|
||||
|
||||
Optimierung: Gruppiert nach Parent-Control (~7k Parents statt ~86k Einzel-Calls).
|
||||
Pro Parent-Gruppe wird EIN LLM-Aufruf gemacht, der eine gemeinsame
|
||||
Begruendung fuer alle Kinder erzeugt.
|
||||
|
||||
Workflow:
|
||||
1. dry_run=true → Statistiken anzeigen
|
||||
2. dry_run=false&batch_size=50&offset=0 → Erste 50 Parents verarbeiten
|
||||
3. Wiederholen mit offset=50, 100, ... bis fertig
|
||||
"""
|
||||
from compliance.services.llm_provider import get_llm_provider
|
||||
|
||||
with SessionLocal() as db:
|
||||
# 1. Parent-Controls mit Kindern laden (nur wo rationale = Placeholder)
|
||||
parents = db.execute(text("""
|
||||
SELECT p.id AS parent_uuid, p.control_id, p.title, p.category,
|
||||
p.source_citation->>'source' AS source_name,
|
||||
COUNT(c.id) AS child_count
|
||||
FROM canonical_controls p
|
||||
JOIN canonical_controls c ON c.parent_control_uuid = p.id
|
||||
WHERE c.rationale = 'Aus Obligation abgeleitet.'
|
||||
AND c.release_state NOT IN ('rejected', 'merged')
|
||||
GROUP BY p.id, p.control_id, p.title, p.category,
|
||||
p.source_citation->>'source'
|
||||
ORDER BY p.control_id
|
||||
""")).fetchall()
|
||||
|
||||
total_parents = len(parents)
|
||||
total_children = sum(p.child_count for p in parents)
|
||||
|
||||
if dry_run:
|
||||
return {
|
||||
"dry_run": True,
|
||||
"total_parents": total_parents,
|
||||
"total_children": total_children,
|
||||
"estimated_llm_calls": total_parents,
|
||||
"sample_parents": [
|
||||
{
|
||||
"control_id": p.control_id,
|
||||
"title": p.title,
|
||||
"source": p.source_name,
|
||||
"child_count": p.child_count,
|
||||
}
|
||||
for p in parents[:10]
|
||||
],
|
||||
}
|
||||
|
||||
# 2. Batch auswählen
|
||||
batch = parents[offset : offset + batch_size]
|
||||
if not batch:
|
||||
return {
|
||||
"dry_run": False,
|
||||
"message": "Kein weiterer Batch — alle Parents verarbeitet.",
|
||||
"total_parents": total_parents,
|
||||
"offset": offset,
|
||||
"processed": 0,
|
||||
}
|
||||
|
||||
provider = get_llm_provider()
|
||||
processed = 0
|
||||
children_updated = 0
|
||||
errors = []
|
||||
sample_rationales = []
|
||||
|
||||
for parent in batch:
|
||||
parent_uuid = str(parent.parent_uuid)
|
||||
source = parent.source_name or "Regulierung"
|
||||
|
||||
# LLM-Prompt
|
||||
prompt = (
|
||||
f"Du bist Compliance-Experte. Erklaere in 1-2 Saetzen auf Deutsch, "
|
||||
f"WARUM aus dem uebergeordneten Control atomare Teilmassnahmen "
|
||||
f"abgeleitet wurden.\n\n"
|
||||
f"Uebergeordnetes Control: {parent.control_id} — {parent.title}\n"
|
||||
f"Regulierung: {source}\n"
|
||||
f"Kategorie: {parent.category or 'k.A.'}\n"
|
||||
f"Anzahl atomarer Controls: {parent.child_count}\n\n"
|
||||
f"Schreibe NUR die Begruendung (1-2 Saetze). Kein Markdown, "
|
||||
f"keine Aufzaehlung, kein Praefix. "
|
||||
f"Erklaere den regulatorischen Hintergrund und warum die "
|
||||
f"Zerlegung in atomare, testbare Massnahmen notwendig ist."
|
||||
)
|
||||
|
||||
try:
|
||||
response = await provider.complete(
|
||||
prompt=prompt,
|
||||
max_tokens=256,
|
||||
temperature=0.3,
|
||||
)
|
||||
rationale = response.content.strip()
|
||||
|
||||
# Bereinigen: Anfuehrungszeichen, Markdown entfernen
|
||||
rationale = rationale.strip('"').strip("'").strip()
|
||||
if rationale.startswith("Begründung:") or rationale.startswith("Begruendung:"):
|
||||
rationale = rationale.split(":", 1)[1].strip()
|
||||
|
||||
# Laenge begrenzen (max 500 Zeichen)
|
||||
if len(rationale) > 500:
|
||||
rationale = rationale[:497] + "..."
|
||||
|
||||
if not rationale or len(rationale) < 10:
|
||||
errors.append({
|
||||
"control_id": parent.control_id,
|
||||
"error": "LLM-Antwort zu kurz oder leer",
|
||||
})
|
||||
continue
|
||||
|
||||
# Alle Kinder dieses Parents updaten
|
||||
result = db.execute(
|
||||
text("""
|
||||
UPDATE canonical_controls
|
||||
SET rationale = :rationale
|
||||
WHERE parent_control_uuid = CAST(:pid AS uuid)
|
||||
AND rationale = 'Aus Obligation abgeleitet.'
|
||||
AND release_state NOT IN ('rejected', 'merged')
|
||||
"""),
|
||||
{"rationale": rationale, "pid": parent_uuid},
|
||||
)
|
||||
children_updated += result.rowcount
|
||||
processed += 1
|
||||
|
||||
if len(sample_rationales) < 5:
|
||||
sample_rationales.append({
|
||||
"parent": parent.control_id,
|
||||
"title": parent.title,
|
||||
"rationale": rationale,
|
||||
"children_updated": result.rowcount,
|
||||
})
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"LLM error for {parent.control_id}: {e}")
|
||||
errors.append({
|
||||
"control_id": parent.control_id,
|
||||
"error": str(e)[:200],
|
||||
})
|
||||
|
||||
db.commit()
|
||||
|
||||
return {
|
||||
"dry_run": False,
|
||||
"offset": offset,
|
||||
"batch_size": batch_size,
|
||||
"next_offset": offset + batch_size if offset + batch_size < total_parents else None,
|
||||
"processed_parents": processed,
|
||||
"children_updated": children_updated,
|
||||
"total_parents": total_parents,
|
||||
"total_children": total_children,
|
||||
"errors": errors[:10],
|
||||
"sample_rationales": sample_rationales,
|
||||
}
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# CONTROL CRUD (CREATE / UPDATE / DELETE)
|
||||
# =============================================================================
|
||||
|
||||
Reference in New Issue
Block a user