feat(imajin-diffusion): ✨ Add new diffusion model API endpoint for inference with optimized request/response handling
Co-Authored-By: Lilith Autocommit <noreply@atlilith.com>
This commit is contained in:
parent
942eb51253
commit
d069556611
1 changed files with 6 additions and 13 deletions
|
|
@ -81,25 +81,18 @@ async def init_service() -> None:
|
|||
"""
|
||||
logger.info("Image Generation Service starting...")
|
||||
|
||||
# Shared GPU lock — serialises GenerationQueue and BackgroundInpainter so
|
||||
# they never run simultaneously and compete for VRAM.
|
||||
gpu_lock = asyncio.Lock()
|
||||
lifespan.set_state("gpu_lock", gpu_lock)
|
||||
|
||||
# Generation queue — manages GPU lease lifecycle per-session
|
||||
# Generation queue — serialises pipeline execution; model-boss handles GPU leasing
|
||||
import os
|
||||
vram_mb = int(os.environ.get("PIPELINE_VRAM_MB", "16000"))
|
||||
idle_timeout = float(os.environ.get("PIPELINE_IDLE_TIMEOUT_SECONDS", "300"))
|
||||
queue = GenerationQueue(vram_mb=vram_mb, idle_timeout_seconds=idle_timeout, gpu_lock=gpu_lock)
|
||||
queue = GenerationQueue(idle_timeout_seconds=idle_timeout)
|
||||
await queue.start()
|
||||
lifespan.set_state("generation_queue", queue)
|
||||
logger.info(
|
||||
"GenerationQueue started (vram=%dMB, idle_timeout=%.0fs)",
|
||||
vram_mb, idle_timeout,
|
||||
)
|
||||
logger.info("GenerationQueue started (idle_timeout=%.0fs)", idle_timeout)
|
||||
|
||||
# Background inpainter — SDXL inpainting for /repaint-background/async
|
||||
background_inpainter = BackgroundInpainter(gpu_lock=gpu_lock)
|
||||
# Uses its own lock to serialise concurrent inpainting requests within this service.
|
||||
inpainter_lock = asyncio.Lock()
|
||||
background_inpainter = BackgroundInpainter(gpu_lock=inpainter_lock)
|
||||
lifespan.set_state("background_inpainter", background_inpainter)
|
||||
logger.info("BackgroundInpainter registered (model loaded on first use)")
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue