From bd56795c6234a92970619bd8812dd7d752b2395f Mon Sep 17 00:00:00 2001
From: cmdr2 <secondary.cmdr2@gmail.com>
Date: Fri, 11 Nov 2022 14:46:05 +0530
Subject: [PATCH] Switch to using cuda:N instead of N (integer device ids)

---
 ui/sd_internal/runtime.py      | 41 ++++++++++++++++++----------------
 ui/sd_internal/task_manager.py |  7 +++---
 ui/server.py                   |  9 ++++----
 3 files changed, 31 insertions(+), 26 deletions(-)

diff --git a/ui/sd_internal/runtime.py b/ui/sd_internal/runtime.py
index 74bd8434..f30a4fe0 100644
--- a/ui/sd_internal/runtime.py
+++ b/ui/sd_internal/runtime.py
@@ -64,9 +64,13 @@ def get_processor_name():
         print(traceback.format_exc())
         return "cpu"
 
+def validate_device_id(device, allow_auto=False, log_prefix=''):
+    device_names = ['cpu', 'auto'] if allow_auto else ['cpu']
+    if not isinstance(device, str) or (device not in device_names and (len(device) <= len('cuda:') or device[:5] != 'cuda:' or not device[5:].isnumeric())):
+        raise EnvironmentError(f"{log_prefix}: device id should be {', '.join(device_names)}, or 'cuda:N' (where N is an integer index for the GPU). Got: {device}")
+
 def device_would_fail(device):
-    if device != 'cpu' and not isinstance(device, int):
-        raise EnvironmentError(f"device_would_fail() only accepts 'cpu', or an integer index for the GPU. Got: {device}")
+    validate_device_id(device, allow_auto=False, log_prefix='device_would_fail')
 
     if device == 'cpu': return None
     # Returns None when no issues found, otherwise returns the detected error str.
@@ -81,15 +85,14 @@ def device_would_fail(device):
     return None
 
 def device_select(device):
-    if device != 'cpu' and not isinstance(device, int):
-        raise EnvironmentError(f"device_select() only accepts 'cpu', or an integer index for the GPU. Got: {device}")
+    validate_device_id(device, allow_auto=False, log_prefix='device_select')
 
     if device == 'cpu': return True
     if not torch.cuda.is_available(): return False
     failure_msg = device_would_fail(device)
     if failure_msg:
         if 'invalid device' in failure_msg:
-            raise NameError(f'GPU "{device}" could not be found. Remove this device from config.render_devices or use "auto".')
+            raise NameError(f'{device} could not be found. Remove this device from config.render_devices or use "auto".')
         print(failure_msg)
         return False
 
@@ -107,8 +110,7 @@ def device_select(device):
     return True
 
 def device_init(device_selection):
-    if device_selection not in ['cpu', 'auto'] and not isinstance(device_selection, int):
-        raise EnvironmentError(f"device_init() only accepts 'cpu', 'auto', or an integer index for the GPU. Got: {device_selection}")
+    validate_device_id(device_selection, allow_auto=True, log_prefix='device_init')
 
     # Thread bound properties
     thread_data.stop_processing = False
@@ -155,37 +157,38 @@ def device_init(device_selection):
     if device_selection == 'auto':
         device_count = torch.cuda.device_count()
         if device_count == 1:
-            device_select(0)
-            torch.cuda.device(0)
+            device_select('cuda:0')
+            torch.cuda.device('cuda:0')
             return
 
         print('Autoselecting GPU. Using most free memory.')
         max_mem_free = 0
         best_device = None
         for device in range(device_count):
+            device = f'cuda:{device}'
             mem_free, mem_total = torch.cuda.mem_get_info(device)
             mem_free /= float(10**9)
             mem_total /= float(10**9)
             device_name = torch.cuda.get_device_name(device)
-            print(f'GPU {device} detected: {device_name} - Memory: {round(mem_total - mem_free, 2)}Gb / {round(mem_total, 2)}Gb')
+            print(f'{device} detected: {device_name} - Memory: {round(mem_total - mem_free, 2)}Gb / {round(mem_total, 2)}Gb')
             if max_mem_free < mem_free:
                 max_mem_free = mem_free
                 best_device = device
         if best_device and device_select(best_device):
-            print(f'Setting GPU {device} as active')
+            print(f'Setting {device} as active')
             torch.cuda.device(device)
             return
 
     if device_select(device_selection):
-        print(f'Setting GPU {device_selection} as active')
+        print(f'Setting {device_selection} as active')
         torch.cuda.device(device_selection)
         return
 
     # By default use current device.
     print('Checking current GPU...')
-    device = torch.cuda.current_device()
+    device = f'cuda:{torch.cuda.current_device()}'
     device_name = torch.cuda.get_device_name(device)
-    print(f'GPU {device} detected: {device_name}')
+    print(f'{device} detected: {device_name}')
     if device_select(device):
         return
     print('WARNING: No compatible GPU found. Using the CPU, but this will be very slow!')
@@ -325,7 +328,7 @@ def wait_model_move_to(model, target_device): # Send to target_device and wait u
     start_mem = torch.cuda.memory_allocated(thread_data.device) / 1e6
     if start_mem <= 0: return
     model_name = model.__class__.__name__
-    print(f'Device:{thread_data.device} - Sending model {model_name} to {target_device} | Memory transfer starting. Memory Used: {round(start_mem)}Mb')
+    print(f'Device {thread_data.device} - Sending model {model_name} to {target_device} | Memory transfer starting. Memory Used: {round(start_mem)}Mb')
     start_time = time.time()
     model.to(target_device)
     time_step = start_time
@@ -340,17 +343,17 @@ def wait_model_move_to(model, target_device): # Send to target_device and wait u
         if not is_transfering:
             break;
         if time.time() - time_step > WARNING_TIMEOUT: # Long delay, print to console to show activity.
-            print(f'Device:{thread_data.device} - Waiting for Memory transfer. Memory Used: {round(mem)}Mb, Transfered: {round(start_mem - mem)}Mb')
+            print(f'Device {thread_data.device} - Waiting for Memory transfer. Memory Used: {round(mem)}Mb, Transfered: {round(start_mem - mem)}Mb')
             time_step = time.time()
-    print(f'Device:{thread_data.device} - {model_name} Moved: {round(start_mem - last_mem)}Mb in {round(time.time() - start_time, 3)} seconds to {target_device}')
+    print(f'Device {thread_data.device} - {model_name} Moved: {round(start_mem - last_mem)}Mb in {round(time.time() - start_time, 3)} seconds to {target_device}')
 
 def load_model_gfpgan():
     if thread_data.gfpgan_file is None: raise ValueError(f'Thread gfpgan_file is undefined.')
         #print('load_model_gfpgan called without setting gfpgan_file')
         #return
-    if thread_data.device != 0:
+    if thread_data.device != 'cuda:0':
         #TODO Remove when fixed - A bug with GFPGANer and facexlib needs to be fixed before use on other devices.
-        raise Exception(f'Current device {torch.device(thread_data.device)} is not {torch.device(0)}. Cannot run GFPGANer.')
+        raise Exception(f'Current device {torch.device(thread_data.device)} is not {torch.device("cuda:0")}. Cannot run GFPGANer.')
     model_path = thread_data.gfpgan_file + ".pth"
     thread_data.model_gfpgan = GFPGANer(device=torch.device(thread_data.device), model_path=model_path, upscale=1, arch='clean', channel_multiplier=2, bg_upsampler=None)
     print('loaded', thread_data.gfpgan_file, 'to', thread_data.model_gfpgan.device, 'precision', thread_data.precision)
diff --git a/ui/sd_internal/task_manager.py b/ui/sd_internal/task_manager.py
index f4fcf03b..30e12d7c 100644
--- a/ui/sd_internal/task_manager.py
+++ b/ui/sd_internal/task_manager.py
@@ -219,7 +219,7 @@ def thread_get_next_task():
     try:  # Select a render task.
         for queued_task in tasks_queue:
             if queued_task.request.use_face_correction:  # TODO Remove when fixed - A bug with GFPGANer and facexlib needs to be fixed before use on other devices.
-                if is_alive(0) <= 0:  # Allows GFPGANer only on cuda:0.
+                if is_alive('cuda:0') <= 0:  # Allows GFPGANer only on cuda:0.
                     queued_task.error = Exception('cuda:0 is not available with the current config. Remove GFPGANer filter to run task.')
                     task = queued_task
                     break
@@ -227,7 +227,7 @@ def thread_get_next_task():
                     queued_task.error = Exception('Cpu cannot be used to run this task. Remove GFPGANer filter to run task.')
                     task = queued_task
                     break
-                if runtime.thread_data.device != 0:
+                if runtime.thread_data.device != 'cuda:0':
                     continue  # Wait for cuda:0
             if queued_task.render_device and runtime.thread_data.device != queued_task.render_device:
                 # Is asking for a specific render device.
@@ -235,7 +235,7 @@ def thread_get_next_task():
                     continue  # requested device alive, skip current one.
                 else:
                     # Requested device is not active, return error to UI.
-                    queued_task.error = Exception(str(queued_task.render_device) + ' is not currently active.')
+                    queued_task.error = Exception(queued_task.render_device + ' is not currently active.')
                     task = queued_task
                     break
             if not queued_task.render_device and runtime.thread_data.device == 'cpu' and is_alive() > 1:
@@ -365,6 +365,7 @@ def get_devices():
     # list the compatible devices
     gpu_count = torch.cuda.device_count()
     for device in range(gpu_count):
+        device = f'cuda:{device}'
         if runtime.device_would_fail(device):
             continue
 
diff --git a/ui/server.py b/ui/server.py
index fbe9a2d3..ea5b98fc 100644
--- a/ui/server.py
+++ b/ui/server.py
@@ -22,7 +22,7 @@ OUTPUT_DIRNAME = "Stable Diffusion UI" # in the user's home folder
 TASK_TTL = 15 * 60 # Discard last session's task timeout
 APP_CONFIG_DEFAULTS = {
     # auto: selects the cuda device with the most free memory, cuda: use the currently active cuda device.
-    'render_devices': ['auto'], # ['cuda'] or ['CPU', 'GPU:0', 'GPU:1', ...] or ['cpu']
+    'render_devices': ['auto'], # valid entries: 'auto', 'cpu' or 'cuda:N' (where N is a GPU index)
     'update_branch': 'main',
 }
 APP_CONFIG_DEFAULT_MODELS = [
@@ -281,7 +281,7 @@ def render(req : task_manager.ImageRequest):
     # if req.render_device != 'cpu':
     #     req.render_device = int(req.render_device)
     if req.render_device and task_manager.is_alive(req.render_device) <= 0: raise HTTPException(status_code=403, detail=f'{req.render_device} rendering is not enabled in config.json or the thread has died...') # HTTP403 Forbidden
-    if req.use_face_correction and task_manager.is_alive(0) <= 0: #TODO Remove when GFPGANer is fixed upstream.
+    if req.use_face_correction and task_manager.is_alive('cuda:0') <= 0: #TODO Remove when GFPGANer is fixed upstream.
         raise HTTPException(status_code=412, detail=f'GFPGANer only works GPU:0, use CUDA_VISIBLE_DEVICES if GFPGANer is needed on a specific GPU.') # HTTP412 Precondition Failed
     try:
         save_model_to_config(req.use_stable_diffusion_model, req.use_vae_model)
@@ -369,6 +369,7 @@ task_manager.default_vae_to_load = resolve_vae_to_use()
 if 'render_devices' in config:  # Start a new thread for each device.
     if not isinstance(config['render_devices'], list):
         raise Exception('Invalid render_devices value in config. Should be a list')
+    config['render_devices'] = set(config['render_devices']) # de-duplicate
     for device in config['render_devices']:
         if task_manager.is_alive(device) >= 1:
             print(device, 'already registered.')
@@ -383,7 +384,7 @@ if task_manager.is_alive() <= 0: # Either no defaults or no devices after loadin
     # Select best GPU device using free memory, if more than one device.
     if task_manager.start_render_thread('auto'): # Detect best device for renders
         # if cuda:0 is missing, another cuda device is better. try to start it...
-        if task_manager.is_alive(0) <= 0 and task_manager.is_alive('cpu') <= 0 and not task_manager.start_render_thread(0):
+        if task_manager.is_alive('cuda:0') <= 0 and task_manager.is_alive('cpu') <= 0 and not task_manager.start_render_thread('cuda:0'):
             print('Failed to start GPU:0...')
     else:
         print('Failed to start gpu device.')
@@ -391,7 +392,7 @@ if task_manager.is_alive() <= 0: # Either no defaults or no devices after loadin
         print('Failed to start CPU render device...')
 
 is_using_a_gpu = (task_manager.is_alive() > task_manager.is_alive('cpu'))
-if is_using_a_gpu and task_manager.is_alive(0) <= 0:
+if is_using_a_gpu and task_manager.is_alive('cuda:0') <= 0:
     print('WARNING: GFPGANer only works on GPU:0, use CUDA_VISIBLE_DEVICES if GFPGANer is needed on a specific GPU.')
     print('Using CUDA_VISIBLE_DEVICES will remap the selected devices starting at GPU:0 fixing GFPGANer')
     print('Add the line "@set CUDA_VISIBLE_DEVICES=N" where N is the GPUs to use to config.bat')