feat: OCR pipeline step 8 — validation view with image detection & generation
Some checks failed
CI / go-lint (push) Has been skipped
CI / python-lint (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / test-go-school (push) Successful in 29s
CI / test-go-edu-search (push) Successful in 27s
CI / test-python-klausur (push) Failing after 2m4s
CI / test-python-agent-core (push) Successful in 19s
CI / test-nodejs-website (push) Successful in 19s
Some checks failed
CI / go-lint (push) Has been skipped
CI / python-lint (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / test-go-school (push) Successful in 29s
CI / test-go-edu-search (push) Successful in 27s
CI / test-python-klausur (push) Failing after 2m4s
CI / test-python-agent-core (push) Successful in 19s
CI / test-nodejs-website (push) Successful in 19s
Replaces the stub StepGroundTruth with a full side-by-side Original vs Reconstruction view. Adds VLM-based image region detection (qwen2.5vl), mflux image generation proxy, sync scroll/zoom, manual region drawing, and score/notes persistence. New backend endpoints: detect-images, generate-image, validate, get validation. New standalone mflux-service (scripts/mflux-service.py) for Metal GPU generation. Dockerfile.base: adds fonts-liberation (Apache-2.0). Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
121
scripts/mflux-service.py
Normal file
121
scripts/mflux-service.py
Normal file
@@ -0,0 +1,121 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
mflux-service — Standalone FastAPI wrapper for mflux image generation.
|
||||
|
||||
Runs NATIVELY on Mac Mini (requires Metal GPU, not Docker).
|
||||
Generates images using Flux Schnell via the mflux library.
|
||||
|
||||
Setup:
|
||||
python3 -m venv ~/mflux-env
|
||||
source ~/mflux-env/bin/activate
|
||||
pip install mflux fastapi uvicorn
|
||||
|
||||
Run:
|
||||
source ~/mflux-env/bin/activate
|
||||
python scripts/mflux-service.py
|
||||
|
||||
Or as a background service:
|
||||
nohup ~/mflux-env/bin/python scripts/mflux-service.py > /tmp/mflux-service.log 2>&1 &
|
||||
|
||||
License: Apache-2.0
|
||||
"""
|
||||
|
||||
import base64
|
||||
import io
|
||||
import logging
|
||||
import os
|
||||
import time
|
||||
from typing import Optional
|
||||
|
||||
import uvicorn
|
||||
from fastapi import FastAPI
|
||||
from pydantic import BaseModel
|
||||
|
||||
logging.basicConfig(level=logging.INFO, format="%(asctime)s %(levelname)s %(message)s")
|
||||
logger = logging.getLogger("mflux-service")
|
||||
|
||||
app = FastAPI(title="mflux Image Generation Service", version="1.0.0")
|
||||
|
||||
# Lazy-loaded generator
|
||||
_flux = None
|
||||
|
||||
|
||||
def _get_flux():
|
||||
"""Lazy-load the Flux model on first use."""
|
||||
global _flux
|
||||
if _flux is None:
|
||||
logger.info("Loading Flux Schnell model (first call, may download ~12 GB)...")
|
||||
from mflux import Flux1
|
||||
|
||||
_flux = Flux1(
|
||||
model_name="schnell",
|
||||
quantize=8,
|
||||
)
|
||||
logger.info("Flux Schnell model loaded.")
|
||||
return _flux
|
||||
|
||||
|
||||
class GenerateRequest(BaseModel):
|
||||
prompt: str
|
||||
width: int = 512
|
||||
height: int = 512
|
||||
steps: int = 4
|
||||
seed: Optional[int] = None
|
||||
|
||||
|
||||
class GenerateResponse(BaseModel):
|
||||
image_b64: Optional[str] = None
|
||||
success: bool = True
|
||||
error: Optional[str] = None
|
||||
duration_ms: int = 0
|
||||
|
||||
|
||||
@app.get("/health")
|
||||
async def health():
|
||||
return {"status": "ok", "model": "flux-schnell", "gpu": "metal"}
|
||||
|
||||
|
||||
@app.post("/generate", response_model=GenerateResponse)
|
||||
async def generate_image(req: GenerateRequest):
|
||||
"""Generate an image from a text prompt using Flux Schnell."""
|
||||
t0 = time.time()
|
||||
|
||||
# Validate dimensions (must be multiples of 64 for Flux)
|
||||
width = max(256, min(1024, (req.width // 64) * 64))
|
||||
height = max(256, min(1024, (req.height // 64) * 64))
|
||||
|
||||
try:
|
||||
from mflux import Config
|
||||
|
||||
flux = _get_flux()
|
||||
image = flux.generate_image(
|
||||
seed=req.seed or int(time.time()) % 2**31,
|
||||
prompt=req.prompt,
|
||||
config=Config(
|
||||
num_inference_steps=req.steps,
|
||||
height=height,
|
||||
width=width,
|
||||
),
|
||||
)
|
||||
|
||||
# Convert PIL image to base64
|
||||
buf = io.BytesIO()
|
||||
image.save(buf, format="PNG")
|
||||
buf.seek(0)
|
||||
img_b64 = "data:image/png;base64," + base64.b64encode(buf.read()).decode("utf-8")
|
||||
|
||||
duration_ms = int((time.time() - t0) * 1000)
|
||||
logger.info(f"Generated {width}x{height} image in {duration_ms}ms: {req.prompt[:60]}...")
|
||||
|
||||
return GenerateResponse(image_b64=img_b64, success=True, duration_ms=duration_ms)
|
||||
|
||||
except Exception as e:
|
||||
duration_ms = int((time.time() - t0) * 1000)
|
||||
logger.error(f"Generation failed: {e}")
|
||||
return GenerateResponse(image_b64=None, success=False, error=str(e), duration_ms=duration_ms)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
port = int(os.getenv("MFLUX_PORT", "8095"))
|
||||
logger.info(f"Starting mflux-service on port {port}")
|
||||
uvicorn.run(app, host="0.0.0.0", port=port)
|
||||
Reference in New Issue
Block a user