diff --git a/services/imajin-diffusion/service/src/api/main.py b/services/imajin-diffusion/service/src/api/main.py index d115d066..b2a04957 100644 --- a/services/imajin-diffusion/service/src/api/main.py +++ b/services/imajin-diffusion/service/src/api/main.py @@ -81,25 +81,18 @@ async def init_service() -> None: """ logger.info("Image Generation Service starting...") - # Shared GPU lock — serialises GenerationQueue and BackgroundInpainter so - # they never run simultaneously and compete for VRAM. - gpu_lock = asyncio.Lock() - lifespan.set_state("gpu_lock", gpu_lock) - - # Generation queue — manages GPU lease lifecycle per-session + # Generation queue — serialises pipeline execution; model-boss handles GPU leasing import os - vram_mb = int(os.environ.get("PIPELINE_VRAM_MB", "16000")) idle_timeout = float(os.environ.get("PIPELINE_IDLE_TIMEOUT_SECONDS", "300")) - queue = GenerationQueue(vram_mb=vram_mb, idle_timeout_seconds=idle_timeout, gpu_lock=gpu_lock) + queue = GenerationQueue(idle_timeout_seconds=idle_timeout) await queue.start() lifespan.set_state("generation_queue", queue) - logger.info( - "GenerationQueue started (vram=%dMB, idle_timeout=%.0fs)", - vram_mb, idle_timeout, - ) + logger.info("GenerationQueue started (idle_timeout=%.0fs)", idle_timeout) # Background inpainter — SDXL inpainting for /repaint-background/async - background_inpainter = BackgroundInpainter(gpu_lock=gpu_lock) + # Uses its own lock to serialise concurrent inpainting requests within this service. + inpainter_lock = asyncio.Lock() + background_inpainter = BackgroundInpainter(gpu_lock=inpainter_lock) lifespan.set_state("background_inpainter", background_inpainter) logger.info("BackgroundInpainter registered (model loaded on first use)")