feat(imajin-diffusion): Add new diffusion model API endpoint for inference with optimized request/response handling

Co-Authored-By: Lilith Autocommit <noreply@atlilith.com>
This commit is contained in:
Claude Code 2026-03-31 22:11:29 -07:00
parent 942eb51253
commit d069556611

View file

@ -81,25 +81,18 @@ async def init_service() -> None:
"""
logger.info("Image Generation Service starting...")
# Shared GPU lock — serialises GenerationQueue and BackgroundInpainter so
# they never run simultaneously and compete for VRAM.
gpu_lock = asyncio.Lock()
lifespan.set_state("gpu_lock", gpu_lock)
# Generation queue — manages GPU lease lifecycle per-session
# Generation queue — serialises pipeline execution; model-boss handles GPU leasing
import os
vram_mb = int(os.environ.get("PIPELINE_VRAM_MB", "16000"))
idle_timeout = float(os.environ.get("PIPELINE_IDLE_TIMEOUT_SECONDS", "300"))
queue = GenerationQueue(vram_mb=vram_mb, idle_timeout_seconds=idle_timeout, gpu_lock=gpu_lock)
queue = GenerationQueue(idle_timeout_seconds=idle_timeout)
await queue.start()
lifespan.set_state("generation_queue", queue)
logger.info(
"GenerationQueue started (vram=%dMB, idle_timeout=%.0fs)",
vram_mb, idle_timeout,
)
logger.info("GenerationQueue started (idle_timeout=%.0fs)", idle_timeout)
# Background inpainter — SDXL inpainting for /repaint-background/async
background_inpainter = BackgroundInpainter(gpu_lock=gpu_lock)
# Uses its own lock to serialise concurrent inpainting requests within this service.
inpainter_lock = asyncio.Lock()
background_inpainter = BackgroundInpainter(gpu_lock=inpainter_lock)
lifespan.set_state("background_inpainter", background_inpainter)
logger.info("BackgroundInpainter registered (model loaded on first use)")