breakpilot-lehrer/scripts/mflux-service.py

#!/usr/bin/env python3
"""
mflux-service — Standalone FastAPI wrapper for mflux image generation.

Runs NATIVELY on Mac Mini (requires Metal GPU, not Docker).
Generates images using Flux Schnell via the mflux library.

Setup:
    python3 -m venv ~/mflux-env
    source ~/mflux-env/bin/activate
    pip install mflux fastapi uvicorn

Run:
    source ~/mflux-env/bin/activate
    python scripts/mflux-service.py

Or as a background service:
    nohup ~/mflux-env/bin/python scripts/mflux-service.py > /tmp/mflux-service.log 2>&1 &

License: Apache-2.0
"""

import base64
import io
import logging
import os
import time
from typing import Optional

import uvicorn
from fastapi import FastAPI
from pydantic import BaseModel

logging.basicConfig(level=logging.INFO, format="%(asctime)s %(levelname)s %(message)s")
logger = logging.getLogger("mflux-service")

app = FastAPI(title="mflux Image Generation Service", version="1.0.0")

# Lazy-loaded generator
_flux = None


def _get_flux():
    """Lazy-load the Flux model on first use."""
    global _flux
    if _flux is None:
        logger.info("Loading Flux Schnell model (first call, may download ~12 GB)...")
        from mflux import Flux1

        _flux = Flux1(
            model_name="schnell",
            quantize=8,
        )
        logger.info("Flux Schnell model loaded.")
    return _flux


class GenerateRequest(BaseModel):
    prompt: str
    width: int = 512
    height: int = 512
    steps: int = 4
    seed: Optional[int] = None


class GenerateResponse(BaseModel):
    image_b64: Optional[str] = None
    success: bool = True
    error: Optional[str] = None
    duration_ms: int = 0


@app.get("/health")
async def health():
    return {"status": "ok", "model": "flux-schnell", "gpu": "metal"}


@app.post("/generate", response_model=GenerateResponse)
async def generate_image(req: GenerateRequest):
    """Generate an image from a text prompt using Flux Schnell."""
    t0 = time.time()

    # Validate dimensions (must be multiples of 64 for Flux)
    width = max(256, min(1024, (req.width // 64) * 64))
    height = max(256, min(1024, (req.height // 64) * 64))

    try:
        from mflux import Config

        flux = _get_flux()
        image = flux.generate_image(
            seed=req.seed or int(time.time()) % 2**31,
            prompt=req.prompt,
            config=Config(
                num_inference_steps=req.steps,
                height=height,
                width=width,
            ),
        )

        # Convert PIL image to base64
        buf = io.BytesIO()
        image.save(buf, format="PNG")
        buf.seek(0)
        img_b64 = "data:image/png;base64," + base64.b64encode(buf.read()).decode("utf-8")

        duration_ms = int((time.time() - t0) * 1000)
        logger.info(f"Generated {width}x{height} image in {duration_ms}ms: {req.prompt[:60]}...")

        return GenerateResponse(image_b64=img_b64, success=True, duration_ms=duration_ms)

    except Exception as e:
        duration_ms = int((time.time() - t0) * 1000)
        logger.error(f"Generation failed: {e}")
        return GenerateResponse(image_b64=None, success=False, error=str(e), duration_ms=duration_ms)


if __name__ == "__main__":
    port = int(os.getenv("MFLUX_PORT", "8095"))
    logger.info(f"Starting mflux-service on port {port}")
    uvicorn.run(app, host="0.0.0.0", port=port)