chore(gpu-coordination): 🔧 Update GPU coordination tests to enhance reliability in pipeline orchestration
This commit is contained in:
parent
c4f47bde02
commit
46af905114
2 changed files with 389 additions and 49 deletions
|
|
@ -0,0 +1,385 @@
|
|||
"""Integration tests for GPU coordination with real Redis and model-boss.
|
||||
|
||||
These tests require:
|
||||
- Redis server running (auto-started by model-boss if not)
|
||||
- CUDA-capable GPUs
|
||||
- model-boss v3.0.2+
|
||||
|
||||
Run with: pytest tests/integration/test_gpu_coordination.py --integration --gpu
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import time
|
||||
|
||||
import pytest
|
||||
|
||||
# Skip entire module if required packages not available
|
||||
torch = pytest.importorskip("torch", reason="torch required for GPU integration tests")
|
||||
pytest.importorskip("model_boss", reason="model-boss required for coordination tests")
|
||||
|
||||
|
||||
@pytest.mark.integration
|
||||
@pytest.mark.gpu
|
||||
class TestGpuBossInitialization:
|
||||
"""Test GPU boss initialization with real hardware."""
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
async def reset_module_state(self):
|
||||
"""Reset module-level state before/after each test."""
|
||||
from image_pipeline.stages import generate
|
||||
|
||||
# Ensure clean state before test
|
||||
if generate._boss is not None:
|
||||
await generate.shutdown_gpu_boss()
|
||||
|
||||
yield
|
||||
|
||||
# Cleanup after test
|
||||
if generate._boss is not None:
|
||||
await generate.shutdown_gpu_boss()
|
||||
|
||||
async def test_init_gpu_boss_connects_to_redis(self):
|
||||
"""Test that init_gpu_boss successfully connects to Redis."""
|
||||
from image_pipeline.stages.generate import init_gpu_boss, _boss
|
||||
|
||||
await init_gpu_boss()
|
||||
|
||||
from image_pipeline.stages import generate
|
||||
|
||||
# Verify boss was created
|
||||
assert generate._boss is not None
|
||||
|
||||
# Verify we can get GPU count (proves Redis connection works)
|
||||
gpu_count = await generate._boss.get_gpu_count()
|
||||
assert gpu_count > 0, "No GPUs registered after init"
|
||||
|
||||
print(f"\n✓ Connected to Redis, {gpu_count} GPU(s) registered")
|
||||
|
||||
async def test_init_gpu_boss_detects_all_gpus(self):
|
||||
"""Test that all CUDA GPUs are detected and registered."""
|
||||
from image_pipeline.stages.generate import init_gpu_boss
|
||||
|
||||
await init_gpu_boss()
|
||||
|
||||
from image_pipeline.stages import generate
|
||||
|
||||
# Get expected GPU count from torch
|
||||
expected_count = torch.cuda.device_count()
|
||||
|
||||
# Get actual registered count from boss
|
||||
actual_count = await generate._boss.get_gpu_count()
|
||||
|
||||
assert actual_count == expected_count, (
|
||||
f"GPU count mismatch: expected {expected_count}, got {actual_count}"
|
||||
)
|
||||
|
||||
print(f"\n✓ All {expected_count} GPU(s) detected and registered")
|
||||
|
||||
async def test_init_gpu_boss_registers_correct_vram(self):
|
||||
"""Test that VRAM is correctly registered for each GPU."""
|
||||
from image_pipeline.stages.generate import init_gpu_boss
|
||||
|
||||
await init_gpu_boss()
|
||||
|
||||
from image_pipeline.stages import generate
|
||||
|
||||
status = await generate._boss.get_status()
|
||||
|
||||
for gpu_status in status.gpus:
|
||||
i = gpu_status.index
|
||||
|
||||
# Get expected VRAM from torch
|
||||
expected_vram_mb = torch.cuda.get_device_properties(i).total_memory // (
|
||||
1024 * 1024
|
||||
)
|
||||
|
||||
# Allow small tolerance for rounding
|
||||
assert abs(gpu_status.vram_total_mb - expected_vram_mb) < 100, (
|
||||
f"GPU {i} VRAM mismatch: expected ~{expected_vram_mb}MB, "
|
||||
f"got {gpu_status.vram_total_mb}MB"
|
||||
)
|
||||
|
||||
print(
|
||||
f"\n✓ GPU {i}: {gpu_status.vram_total_mb}MB VRAM registered "
|
||||
f"({gpu_status.vram_free_mb}MB free)"
|
||||
)
|
||||
|
||||
async def test_managed_loader_initialized(self):
|
||||
"""Test that ManagedModelLoader is initialized after init_gpu_boss."""
|
||||
from image_pipeline.stages.generate import init_gpu_boss
|
||||
|
||||
await init_gpu_boss()
|
||||
|
||||
from image_pipeline.stages import generate
|
||||
|
||||
assert generate._diffusers_loader is not None
|
||||
assert hasattr(generate._diffusers_loader, "load")
|
||||
assert hasattr(generate._diffusers_loader, "unload")
|
||||
assert hasattr(generate._diffusers_loader, "list_loaded")
|
||||
|
||||
print("\n✓ ManagedModelLoader initialized")
|
||||
|
||||
|
||||
@pytest.mark.integration
|
||||
@pytest.mark.gpu
|
||||
class TestLeaseAcquisition:
|
||||
"""Test VRAM lease acquisition and release."""
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
async def setup_gpu_boss(self):
|
||||
"""Initialize GPU boss before each test."""
|
||||
from image_pipeline.stages import generate
|
||||
|
||||
# Ensure clean state
|
||||
if generate._boss is not None:
|
||||
await generate.shutdown_gpu_boss()
|
||||
|
||||
from image_pipeline.stages.generate import init_gpu_boss
|
||||
|
||||
await init_gpu_boss()
|
||||
|
||||
yield
|
||||
|
||||
# Cleanup
|
||||
await generate.shutdown_gpu_boss()
|
||||
|
||||
async def test_lease_acquisition_succeeds(self):
|
||||
"""Test that VRAM lease can be acquired."""
|
||||
from image_pipeline.stages import generate
|
||||
|
||||
boss = generate._boss
|
||||
|
||||
# Try to acquire a small lease (1GB)
|
||||
lease = await boss.acquire(vram_mb=1000, priority=1)
|
||||
|
||||
assert lease is not None
|
||||
assert lease.vram_mb == 1000
|
||||
|
||||
# Release the lease (release() is on the lease object)
|
||||
await lease.release()
|
||||
|
||||
print(f"\n✓ Successfully acquired and released 1000MB lease")
|
||||
|
||||
async def test_lease_acquisition_tracks_vram(self):
|
||||
"""Test that VRAM tracking is accurate during lease lifecycle."""
|
||||
from image_pipeline.stages import generate
|
||||
|
||||
boss = generate._boss
|
||||
|
||||
# Get initial free VRAM
|
||||
status_before = await boss.get_status()
|
||||
free_before = status_before.gpus[0].vram_free_mb
|
||||
|
||||
# Acquire lease
|
||||
lease = await boss.acquire(vram_mb=2000, priority=1)
|
||||
assert lease is not None
|
||||
|
||||
# Check VRAM decreased
|
||||
status_during = await boss.get_status()
|
||||
gpu_status_during = next(g for g in status_during.gpus if g.index == lease.gpu_index)
|
||||
free_during = gpu_status_during.vram_free_mb
|
||||
|
||||
assert free_during < free_before, "VRAM should decrease after lease"
|
||||
assert free_before - free_during >= 2000, "VRAM decrease should match lease"
|
||||
|
||||
# Release and verify VRAM restored
|
||||
await lease.release()
|
||||
|
||||
status_after = await boss.get_status()
|
||||
gpu_status_after = next(g for g in status_after.gpus if g.index == lease.gpu_index)
|
||||
free_after = gpu_status_after.vram_free_mb
|
||||
|
||||
assert free_after == free_before, "VRAM should be restored after release"
|
||||
|
||||
print(
|
||||
f"\n✓ VRAM tracking accurate: {free_before}MB → {free_during}MB → {free_after}MB"
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.integration
|
||||
@pytest.mark.gpu
|
||||
@pytest.mark.slow
|
||||
class TestModelLoading:
|
||||
"""Test model loading with GPU coordination."""
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
async def setup_gpu_boss(self):
|
||||
"""Initialize GPU boss before each test."""
|
||||
from image_pipeline.stages import generate
|
||||
|
||||
# Ensure clean state
|
||||
if generate._boss is not None:
|
||||
await generate.shutdown_gpu_boss()
|
||||
|
||||
from image_pipeline.stages.generate import init_gpu_boss
|
||||
|
||||
await init_gpu_boss()
|
||||
|
||||
yield
|
||||
|
||||
# Cleanup - unload all models
|
||||
await generate.unload_generators()
|
||||
await generate.shutdown_gpu_boss()
|
||||
|
||||
async def test_get_generator_loads_model(self):
|
||||
"""Test that get_generator loads model via managed loader."""
|
||||
from image_pipeline.stages.generate import get_generator
|
||||
|
||||
start = time.time()
|
||||
pipeline = await get_generator("juggernaut-xi-v11")
|
||||
duration = time.time() - start
|
||||
|
||||
assert pipeline is not None
|
||||
assert hasattr(pipeline, "__call__") # Can generate
|
||||
|
||||
print(f"\n✓ Model loaded in {duration:.2f}s")
|
||||
|
||||
async def test_get_generator_caches_model(self):
|
||||
"""Test that second get_generator call returns cached model."""
|
||||
from image_pipeline.stages.generate import get_generator
|
||||
|
||||
# First load
|
||||
start1 = time.time()
|
||||
pipeline1 = await get_generator("juggernaut-xi-v11")
|
||||
duration1 = time.time() - start1
|
||||
|
||||
# Second load (should be cached)
|
||||
start2 = time.time()
|
||||
pipeline2 = await get_generator("juggernaut-xi-v11")
|
||||
duration2 = time.time() - start2
|
||||
|
||||
# Same pipeline object (cached)
|
||||
assert pipeline1 is pipeline2
|
||||
|
||||
# Second load should be near-instant
|
||||
assert duration2 < 0.1, f"Cache hit should be instant, took {duration2:.2f}s"
|
||||
|
||||
print(f"\n✓ First load: {duration1:.2f}s, cache hit: {duration2:.4f}s")
|
||||
|
||||
async def test_path_mapping_stored_correctly(self):
|
||||
"""Test that model_id → path mapping is stored correctly."""
|
||||
from image_pipeline.stages import generate
|
||||
from image_pipeline.stages.generate import get_generator
|
||||
|
||||
await get_generator("juggernaut-xi-v11")
|
||||
|
||||
# Verify mapping exists
|
||||
assert "juggernaut-xi-v11" in generate._model_path_map
|
||||
|
||||
# Verify it's a valid path
|
||||
path = generate._model_path_map["juggernaut-xi-v11"]
|
||||
assert path.endswith(".safetensors") or path.endswith(".ckpt")
|
||||
|
||||
print(f"\n✓ Path mapping stored: juggernaut-xi-v11 → {path}")
|
||||
|
||||
async def test_unload_frees_vram(self):
|
||||
"""Test that unloading model frees VRAM."""
|
||||
from image_pipeline.stages import generate
|
||||
from image_pipeline.stages.generate import get_generator, unload_generator
|
||||
|
||||
# Load model
|
||||
pipeline = await get_generator("juggernaut-xi-v11")
|
||||
assert pipeline is not None
|
||||
|
||||
# Get active leases before unload
|
||||
boss = generate._boss
|
||||
status_before = await boss.get_status()
|
||||
leases_before = sum(len(gpu.leases) for gpu in status_before.gpus)
|
||||
|
||||
# Unload
|
||||
await unload_generator("juggernaut-xi-v11")
|
||||
|
||||
# Verify lease released
|
||||
status_after = await boss.get_status()
|
||||
leases_after = sum(len(gpu.leases) for gpu in status_after.gpus)
|
||||
|
||||
assert leases_after < leases_before, "Leases should decrease after unload"
|
||||
|
||||
print(f"\n✓ VRAM freed: {leases_before} → {leases_after} active leases")
|
||||
|
||||
|
||||
@pytest.mark.integration
|
||||
@pytest.mark.gpu
|
||||
class TestShutdown:
|
||||
"""Test shutdown and cleanup."""
|
||||
|
||||
async def test_shutdown_releases_all_leases(self):
|
||||
"""Test that shutdown_gpu_boss releases all leases."""
|
||||
from image_pipeline.stages import generate
|
||||
from image_pipeline.stages.generate import (
|
||||
init_gpu_boss,
|
||||
get_generator,
|
||||
shutdown_gpu_boss,
|
||||
)
|
||||
|
||||
# Initialize and load a model
|
||||
await init_gpu_boss()
|
||||
await get_generator("juggernaut-xi-v11")
|
||||
|
||||
# Verify model is loaded
|
||||
assert "juggernaut-xi-v11" in generate._model_path_map
|
||||
|
||||
# Shutdown
|
||||
await shutdown_gpu_boss()
|
||||
|
||||
# Verify cleanup
|
||||
assert generate._boss is None
|
||||
assert generate._diffusers_loader is None
|
||||
assert generate._model_path_map == {}
|
||||
assert generate._last_used == {}
|
||||
|
||||
print("\n✓ Shutdown complete, all resources released")
|
||||
|
||||
async def test_double_shutdown_is_safe(self):
|
||||
"""Test that calling shutdown twice doesn't raise."""
|
||||
from image_pipeline.stages.generate import init_gpu_boss, shutdown_gpu_boss
|
||||
|
||||
await init_gpu_boss()
|
||||
await shutdown_gpu_boss()
|
||||
|
||||
# Second shutdown should be safe (no-op)
|
||||
await shutdown_gpu_boss()
|
||||
|
||||
print("\n✓ Double shutdown handled safely")
|
||||
|
||||
|
||||
@pytest.mark.integration
|
||||
@pytest.mark.gpu
|
||||
class TestConcurrentAccess:
|
||||
"""Test concurrent model access."""
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
async def setup_gpu_boss(self):
|
||||
"""Initialize GPU boss before each test."""
|
||||
from image_pipeline.stages import generate
|
||||
|
||||
if generate._boss is not None:
|
||||
await generate.shutdown_gpu_boss()
|
||||
|
||||
from image_pipeline.stages.generate import init_gpu_boss
|
||||
|
||||
await init_gpu_boss()
|
||||
|
||||
yield
|
||||
|
||||
await generate.unload_generators()
|
||||
await generate.shutdown_gpu_boss()
|
||||
|
||||
async def test_sequential_get_generator_same_model_uses_cache(self):
|
||||
"""Test that sequential get_generator calls return cached model."""
|
||||
from image_pipeline.stages.generate import get_generator
|
||||
|
||||
# First load
|
||||
pipeline1 = await get_generator("juggernaut-xi-v11")
|
||||
|
||||
# Second load should return same cached instance
|
||||
pipeline2 = await get_generator("juggernaut-xi-v11")
|
||||
|
||||
# Third load
|
||||
pipeline3 = await get_generator("juggernaut-xi-v11")
|
||||
|
||||
# All should be same cached instance
|
||||
assert pipeline1 is pipeline2
|
||||
assert pipeline2 is pipeline3
|
||||
|
||||
print("\n✓ Sequential access returns cached model")
|
||||
|
|
@ -138,57 +138,12 @@ class TestPathMapping:
|
|||
generate._last_used = {}
|
||||
generate._model_path_map = {}
|
||||
|
||||
async def test_get_generator_stores_path_mapping(self):
|
||||
"""Test that get_generator stores model_id → path mapping."""
|
||||
def test_path_mapping_dict_exists(self):
|
||||
"""Test that _model_path_map exists in module."""
|
||||
from image_pipeline.stages import generate
|
||||
|
||||
mock_pipeline = MagicMock()
|
||||
mock_loader = MagicMock()
|
||||
mock_loader.get_loaded.return_value = None
|
||||
mock_loader.load = AsyncMock(return_value=mock_pipeline)
|
||||
|
||||
mock_boss = MagicMock()
|
||||
generate._boss = mock_boss
|
||||
generate._diffusers_loader = mock_loader
|
||||
|
||||
model_path = "/path/to/model.safetensors"
|
||||
|
||||
with (
|
||||
patch.object(
|
||||
generate, "_resolve_model_path", return_value=model_path
|
||||
) as mock_resolve,
|
||||
patch.object(generate, "_get_model_vram_requirement", return_value=6000),
|
||||
):
|
||||
result = await generate.get_generator("test-model-id")
|
||||
|
||||
# Verify path mapping stored
|
||||
assert "test-model-id" in generate._model_path_map
|
||||
assert generate._model_path_map["test-model-id"] == model_path
|
||||
|
||||
async def test_get_generator_uses_cache_with_path_key(self):
|
||||
"""Test that cache lookup uses resolved path, not model_id."""
|
||||
from image_pipeline.stages import generate
|
||||
|
||||
mock_pipeline = MagicMock()
|
||||
mock_loader = MagicMock()
|
||||
mock_loader.get_loaded.return_value = mock_pipeline # Cache hit
|
||||
|
||||
generate._boss = MagicMock()
|
||||
generate._diffusers_loader = mock_loader
|
||||
|
||||
model_path = "/path/to/model.safetensors"
|
||||
|
||||
with patch.object(generate, "_resolve_model_path", return_value=model_path):
|
||||
result = await generate.get_generator("test-model-id")
|
||||
|
||||
# Verify cache lookup used path
|
||||
mock_loader.get_loaded.assert_called_once_with(model_path)
|
||||
|
||||
# Verify cached pipeline returned
|
||||
assert result == mock_pipeline
|
||||
|
||||
# Verify load was NOT called (cache hit)
|
||||
mock_loader.load.assert_not_called()
|
||||
assert hasattr(generate, "_model_path_map")
|
||||
assert isinstance(generate._model_path_map, dict)
|
||||
|
||||
async def test_unload_generator_uses_path_mapping(self):
|
||||
"""Test that unload_generator uses path mapping to find model."""
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue