Switch to using cuda:N instead of N (integer device ids)

2025-06-24 20:01:42 +02:00 · 2022-11-11 14:46:05 +05:30 · 2022-11-11 14:46:05 +05:30 · bd56795c62
commit bd56795c62
parent 2c54b7f289
3 changed files with 31 additions and 26 deletions
--- a/ui/sd_internal/runtime.py
+++ b/ui/sd_internal/runtime.py
@ -64,9 +64,13 @@ def get_processor_name():
        print(traceback.format_exc())
        return "cpu"

+def validate_device_id(device, allow_auto=False, log_prefix=''):
+    device_names = ['cpu', 'auto'] if allow_auto else ['cpu']
+    if not isinstance(device, str) or (device not in device_names and (len(device) <= len('cuda:') or device[:5] != 'cuda:' or not device[5:].isnumeric())):
+        raise EnvironmentError(f"{log_prefix}: device id should be {', '.join(device_names)}, or 'cuda:N' (where N is an integer index for the GPU). Got: {device}")
+
 def device_would_fail(device):
-    if device != 'cpu' and not isinstance(device, int):
-        raise EnvironmentError(f"device_would_fail() only accepts 'cpu', or an integer index for the GPU. Got: {device}")
+    validate_device_id(device, allow_auto=False, log_prefix='device_would_fail')

    if device == 'cpu': return None
    # Returns None when no issues found, otherwise returns the detected error str.
@ -81,15 +85,14 @@ def device_would_fail(device):
    return None

 def device_select(device):
-    if device != 'cpu' and not isinstance(device, int):
-        raise EnvironmentError(f"device_select() only accepts 'cpu', or an integer index for the GPU. Got: {device}")
+    validate_device_id(device, allow_auto=False, log_prefix='device_select')

    if device == 'cpu': return True
    if not torch.cuda.is_available(): return False
    failure_msg = device_would_fail(device)
    if failure_msg:
        if 'invalid device' in failure_msg:
-            raise NameError(f'GPU "{device}" could not be found. Remove this device from config.render_devices or use "auto".')
+            raise NameError(f'{device} could not be found. Remove this device from config.render_devices or use "auto".')
        print(failure_msg)
        return False

@ -107,8 +110,7 @@ def device_select(device):
    return True

 def device_init(device_selection):
-    if device_selection not in ['cpu', 'auto'] and not isinstance(device_selection, int):
-        raise EnvironmentError(f"device_init() only accepts 'cpu', 'auto', or an integer index for the GPU. Got: {device_selection}")
+    validate_device_id(device_selection, allow_auto=True, log_prefix='device_init')

    # Thread bound properties
    thread_data.stop_processing = False
@ -155,37 +157,38 @@ def device_init(device_selection):
    if device_selection == 'auto':
        device_count = torch.cuda.device_count()
        if device_count == 1:
-            device_select(0)
-            torch.cuda.device(0)
+            device_select('cuda:0')
+            torch.cuda.device('cuda:0')
            return

        print('Autoselecting GPU. Using most free memory.')
        max_mem_free = 0
        best_device = None
        for device in range(device_count):
+            device = f'cuda:{device}'
            mem_free, mem_total = torch.cuda.mem_get_info(device)
            mem_free /= float(10**9)
            mem_total /= float(10**9)
            device_name = torch.cuda.get_device_name(device)
-            print(f'GPU {device} detected: {device_name} - Memory: {round(mem_total - mem_free, 2)}Gb / {round(mem_total, 2)}Gb')
+            print(f'{device} detected: {device_name} - Memory: {round(mem_total - mem_free, 2)}Gb / {round(mem_total, 2)}Gb')
            if max_mem_free < mem_free:
                max_mem_free = mem_free
                best_device = device
        if best_device and device_select(best_device):
-            print(f'Setting GPU {device} as active')
+            print(f'Setting {device} as active')
            torch.cuda.device(device)
            return

    if device_select(device_selection):
-        print(f'Setting GPU {device_selection} as active')
+        print(f'Setting {device_selection} as active')
        torch.cuda.device(device_selection)
        return

    # By default use current device.
    print('Checking current GPU...')
-    device = torch.cuda.current_device()
+    device = f'cuda:{torch.cuda.current_device()}'
    device_name = torch.cuda.get_device_name(device)
-    print(f'GPU {device} detected: {device_name}')
+    print(f'{device} detected: {device_name}')
    if device_select(device):
        return
    print('WARNING: No compatible GPU found. Using the CPU, but this will be very slow!')
@ -325,7 +328,7 @@ def wait_model_move_to(model, target_device): # Send to target_device and wait u
    start_mem = torch.cuda.memory_allocated(thread_data.device) / 1e6
    if start_mem <= 0: return
    model_name = model.__class__.__name__
-    print(f'Device:{thread_data.device} - Sending model {model_name} to {target_device} | Memory transfer starting. Memory Used: {round(start_mem)}Mb')
+    print(f'Device {thread_data.device} - Sending model {model_name} to {target_device} | Memory transfer starting. Memory Used: {round(start_mem)}Mb')
    start_time = time.time()
    model.to(target_device)
    time_step = start_time
@ -340,17 +343,17 @@ def wait_model_move_to(model, target_device): # Send to target_device and wait u
        if not is_transfering:
            break;
        if time.time() - time_step > WARNING_TIMEOUT: # Long delay, print to console to show activity.
-            print(f'Device:{thread_data.device} - Waiting for Memory transfer. Memory Used: {round(mem)}Mb, Transfered: {round(start_mem - mem)}Mb')
+            print(f'Device {thread_data.device} - Waiting for Memory transfer. Memory Used: {round(mem)}Mb, Transfered: {round(start_mem - mem)}Mb')
            time_step = time.time()
-    print(f'Device:{thread_data.device} - {model_name} Moved: {round(start_mem - last_mem)}Mb in {round(time.time() - start_time, 3)} seconds to {target_device}')
+    print(f'Device {thread_data.device} - {model_name} Moved: {round(start_mem - last_mem)}Mb in {round(time.time() - start_time, 3)} seconds to {target_device}')

 def load_model_gfpgan():
    if thread_data.gfpgan_file is None: raise ValueError(f'Thread gfpgan_file is undefined.')
        #print('load_model_gfpgan called without setting gfpgan_file')
        #return
-    if thread_data.device != 0:
+    if thread_data.device != 'cuda:0':
        #TODO Remove when fixed - A bug with GFPGANer and facexlib needs to be fixed before use on other devices.
-        raise Exception(f'Current device {torch.device(thread_data.device)} is not {torch.device(0)}. Cannot run GFPGANer.')
+        raise Exception(f'Current device {torch.device(thread_data.device)} is not {torch.device("cuda:0")}. Cannot run GFPGANer.')
    model_path = thread_data.gfpgan_file + ".pth"
    thread_data.model_gfpgan = GFPGANer(device=torch.device(thread_data.device), model_path=model_path, upscale=1, arch='clean', channel_multiplier=2, bg_upsampler=None)
    print('loaded', thread_data.gfpgan_file, 'to', thread_data.model_gfpgan.device, 'precision', thread_data.precision)
--- a/ui/sd_internal/task_manager.py
+++ b/ui/sd_internal/task_manager.py
@ -219,7 +219,7 @@ def thread_get_next_task():
    try:  # Select a render task.
        for queued_task in tasks_queue:
            if queued_task.request.use_face_correction:  # TODO Remove when fixed - A bug with GFPGANer and facexlib needs to be fixed before use on other devices.
-                if is_alive(0) <= 0:  # Allows GFPGANer only on cuda:0.
+                if is_alive('cuda:0') <= 0:  # Allows GFPGANer only on cuda:0.
                    queued_task.error = Exception('cuda:0 is not available with the current config. Remove GFPGANer filter to run task.')
                    task = queued_task
                    break
@ -227,7 +227,7 @@ def thread_get_next_task():
                    queued_task.error = Exception('Cpu cannot be used to run this task. Remove GFPGANer filter to run task.')
                    task = queued_task
                    break
-                if runtime.thread_data.device != 0:
+                if runtime.thread_data.device != 'cuda:0':
                    continue  # Wait for cuda:0
            if queued_task.render_device and runtime.thread_data.device != queued_task.render_device:
                # Is asking for a specific render device.
@ -235,7 +235,7 @@ def thread_get_next_task():
                    continue  # requested device alive, skip current one.
                else:
                    # Requested device is not active, return error to UI.
-                    queued_task.error = Exception(str(queued_task.render_device) + ' is not currently active.')
+                    queued_task.error = Exception(queued_task.render_device + ' is not currently active.')
                    task = queued_task
                    break
            if not queued_task.render_device and runtime.thread_data.device == 'cpu' and is_alive() > 1:
@ -365,6 +365,7 @@ def get_devices():
    # list the compatible devices
    gpu_count = torch.cuda.device_count()
    for device in range(gpu_count):
+        device = f'cuda:{device}'
        if runtime.device_would_fail(device):
            continue

--- a/ui/server.py
+++ b/ui/server.py
@ -22,7 +22,7 @@ OUTPUT_DIRNAME = "Stable Diffusion UI" # in the user's home folder
 TASK_TTL = 15 * 60 # Discard last session's task timeout
 APP_CONFIG_DEFAULTS = {
    # auto: selects the cuda device with the most free memory, cuda: use the currently active cuda device.
-    'render_devices': ['auto'], # ['cuda'] or ['CPU', 'GPU:0', 'GPU:1', ...] or ['cpu']
+    'render_devices': ['auto'], # valid entries: 'auto', 'cpu' or 'cuda:N' (where N is a GPU index)
    'update_branch': 'main',
 }
 APP_CONFIG_DEFAULT_MODELS = [
@ -281,7 +281,7 @@ def render(req : task_manager.ImageRequest):
    # if req.render_device != 'cpu':
    #     req.render_device = int(req.render_device)
    if req.render_device and task_manager.is_alive(req.render_device) <= 0: raise HTTPException(status_code=403, detail=f'{req.render_device} rendering is not enabled in config.json or the thread has died...') # HTTP403 Forbidden
-    if req.use_face_correction and task_manager.is_alive(0) <= 0: #TODO Remove when GFPGANer is fixed upstream.
+    if req.use_face_correction and task_manager.is_alive('cuda:0') <= 0: #TODO Remove when GFPGANer is fixed upstream.
        raise HTTPException(status_code=412, detail=f'GFPGANer only works GPU:0, use CUDA_VISIBLE_DEVICES if GFPGANer is needed on a specific GPU.') # HTTP412 Precondition Failed
    try:
        save_model_to_config(req.use_stable_diffusion_model, req.use_vae_model)
@ -369,6 +369,7 @@ task_manager.default_vae_to_load = resolve_vae_to_use()
 if 'render_devices' in config:  # Start a new thread for each device.
    if not isinstance(config['render_devices'], list):
        raise Exception('Invalid render_devices value in config. Should be a list')
+    config['render_devices'] = set(config['render_devices']) # de-duplicate
    for device in config['render_devices']:
        if task_manager.is_alive(device) >= 1:
            print(device, 'already registered.')
@ -383,7 +384,7 @@ if task_manager.is_alive() <= 0: # Either no defaults or no devices after loadin
    # Select best GPU device using free memory, if more than one device.
    if task_manager.start_render_thread('auto'): # Detect best device for renders
        # if cuda:0 is missing, another cuda device is better. try to start it...
-        if task_manager.is_alive(0) <= 0 and task_manager.is_alive('cpu') <= 0 and not task_manager.start_render_thread(0):
+        if task_manager.is_alive('cuda:0') <= 0 and task_manager.is_alive('cpu') <= 0 and not task_manager.start_render_thread('cuda:0'):
            print('Failed to start GPU:0...')
    else:
        print('Failed to start gpu device.')
@ -391,7 +392,7 @@ if task_manager.is_alive() <= 0: # Either no defaults or no devices after loadin
        print('Failed to start CPU render device...')

 is_using_a_gpu = (task_manager.is_alive() > task_manager.is_alive('cpu'))
-if is_using_a_gpu and task_manager.is_alive(0) <= 0:
+if is_using_a_gpu and task_manager.is_alive('cuda:0') <= 0:
    print('WARNING: GFPGANer only works on GPU:0, use CUDA_VISIBLE_DEVICES if GFPGANer is needed on a specific GPU.')
    print('Using CUDA_VISIBLE_DEVICES will remap the selected devices starting at GPU:0 fixing GFPGANer')
    print('Add the line "@set CUDA_VISIBLE_DEVICES=N" where N is the GPUs to use to config.bat')