feat(impressum): Snapshot-Modul-Tab — ImpressumAgent auf gespeichertem Text

Snapshot-Detailseite wird zu Modul-Tabs (Cookies & Tracking | Impressum).
Backend GET /snapshots/{id}/impressum-check laeuft den v3 ImpressumAgent auf
dem gespeicherten Impressum-Text (kein Re-Crawl); Input-Erzeugung in
impressum_input_from_snapshot() ausgelagert (pure + getestet: Text/Scope/
company_name-Fallback/None-Pfad). Frontend laedt lazy beim Tab-Wechsel und
rendert mit dem bestehenden AgentResultTab (keine zweite Engine).

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
Benjamin Admin
2026-06-11 11:24:44 +02:00
parent 6846ca6b28
commit 5b36b3f367
5 changed files with 203 additions and 14 deletions
@@ -53,6 +53,31 @@ def _derive_scope(profile_dict: dict) -> list[str]:
return sorted(scope)
def impressum_input_from_snapshot(snap: dict) -> dict | None:
"""Baut den ImpressumAgent-Input aus einem gespeicherten Snapshot (kein
Re-Crawl). Pure + testbar: zieht den Impressum-Text aus doc_entries, leitet
den Scope aus scan_context + Profil ab (identisch zur Live-Auswertung) und
nimmt site_label als company_name-Fallback. None, wenn kein Impressum-Text.
"""
docs = snap.get("doc_entries") or []
text = next((e.get("text") or e.get("content") or ""
for e in docs if e.get("doc_type") == "impressum"), "")
if len((text or "").strip()) < _MIN_TEXT:
return None
profile = snap.get("profile") or {}
scope = sorted(
set(scan_context_to_scope(snap.get("scan_context")))
| set(_derive_scope(profile))
)
return {
"doc_type": "impressum",
"text": text,
"business_scope": scope,
"company_name": (profile.get("company_name") or snap.get("site_label") or ""),
"origin_domain": snap.get("site_domain", ""),
}
async def run_agent_outputs(state: dict) -> None:
"""Für jedes Topic mit registriertem v3-Agent + ausreichend Text:
Agent laufen lassen, AgentOutput ablegen + als SSE topic-Event
@@ -268,6 +268,33 @@ async def snapshot_cookie_check(snapshot_id: str):
db.close()
@router.get("/snapshots/{snapshot_id}/impressum-check")
async def snapshot_impressum_check(snapshot_id: str):
"""Impressum-Analyse aus dem Snapshot (kein Re-Crawl): laeuft den v3
ImpressumAgent auf dem gespeicherten Impressum-Text + Profil/Scope und
liefert den AgentOutput (Findings/Massnahmen/MC-Coverage) fuer den Tab."""
from fastapi import HTTPException
from database import SessionLocal
from compliance.services.check_snapshot import load_snapshot
from compliance.services.specialist_agents import REGISTRY, AgentInput
from compliance.api.agent_check._agent_outputs import (
impressum_input_from_snapshot,
)
db = SessionLocal()
try:
snap = load_snapshot(db, snapshot_id)
if not snap:
raise HTTPException(status_code=404, detail="snapshot not found")
agent_input = impressum_input_from_snapshot(snap)
if not agent_input:
return {"findings": [], "recommendations": [], "mc_coverage": [],
"notes": "kein Impressum-Text im Snapshot", "confidence": 0.0}
out = await REGISTRY.get("impressum").evaluate(AgentInput(**agent_input))
return out.model_dump(mode="json")
finally:
db.close()
@router.get("/admin/benchmark")
async def benchmark(
industry: str = "",
@@ -0,0 +1,47 @@
"""impressum_input_from_snapshot — Snapshot → ImpressumAgent-Input (pure).
Deckt die Glue des /snapshots/{id}/impressum-check-Endpoints ohne DB/LLM ab:
Text-Extraktion, Scope-Ableitung (Profil), company_name-Fallback, None-Pfad.
"""
from __future__ import annotations
from compliance.api.agent_check._agent_outputs import (
impressum_input_from_snapshot,
)
def _snap(text: str = "x" * 200, **over) -> dict:
s = {
"doc_entries": [{"doc_type": "impressum", "text": text}],
"profile": {}, "scan_context": None,
"site_label": "BMW", "site_domain": "bmw.de",
}
s.update(over)
return s
def test_builds_input_from_impressum_text():
inp = impressum_input_from_snapshot(_snap())
assert inp is not None
assert inp["doc_type"] == "impressum"
assert inp["text"].startswith("x")
assert inp["company_name"] == "BMW" # site_label-Fallback
assert inp["origin_domain"] == "bmw.de"
assert isinstance(inp["business_scope"], list)
def test_none_when_no_or_short_impressum_text():
assert impressum_input_from_snapshot(_snap(doc_entries=[])) is None
assert impressum_input_from_snapshot(
{"doc_entries": [{"doc_type": "impressum", "text": "zu kurz"}]}) is None
def test_scope_includes_profile_derived():
inp = impressum_input_from_snapshot(_snap(profile={"has_online_shop": True}))
assert "ecommerce" in inp["business_scope"]
def test_company_name_prefers_profile_over_site_label():
inp = impressum_input_from_snapshot(_snap(profile={"company_name": "ACME AG"}))
assert inp["company_name"] == "ACME AG"