From bd56795c6234a92970619bd8812dd7d752b2395f Mon Sep 17 00:00:00 2001 From: cmdr2 Date: Fri, 11 Nov 2022 14:46:05 +0530 Subject: [PATCH] Switch to using cuda:N instead of N (integer device ids) --- ui/sd_internal/runtime.py | 41 ++++++++++++++++++---------------- ui/sd_internal/task_manager.py | 7 +++--- ui/server.py | 9 ++++---- 3 files changed, 31 insertions(+), 26 deletions(-) diff --git a/ui/sd_internal/runtime.py b/ui/sd_internal/runtime.py index 74bd8434..f30a4fe0 100644 --- a/ui/sd_internal/runtime.py +++ b/ui/sd_internal/runtime.py @@ -64,9 +64,13 @@ def get_processor_name(): print(traceback.format_exc()) return "cpu" +def validate_device_id(device, allow_auto=False, log_prefix=''): + device_names = ['cpu', 'auto'] if allow_auto else ['cpu'] + if not isinstance(device, str) or (device not in device_names and (len(device) <= len('cuda:') or device[:5] != 'cuda:' or not device[5:].isnumeric())): + raise EnvironmentError(f"{log_prefix}: device id should be {', '.join(device_names)}, or 'cuda:N' (where N is an integer index for the GPU). Got: {device}") + def device_would_fail(device): - if device != 'cpu' and not isinstance(device, int): - raise EnvironmentError(f"device_would_fail() only accepts 'cpu', or an integer index for the GPU. Got: {device}") + validate_device_id(device, allow_auto=False, log_prefix='device_would_fail') if device == 'cpu': return None # Returns None when no issues found, otherwise returns the detected error str. @@ -81,15 +85,14 @@ def device_would_fail(device): return None def device_select(device): - if device != 'cpu' and not isinstance(device, int): - raise EnvironmentError(f"device_select() only accepts 'cpu', or an integer index for the GPU. Got: {device}") + validate_device_id(device, allow_auto=False, log_prefix='device_select') if device == 'cpu': return True if not torch.cuda.is_available(): return False failure_msg = device_would_fail(device) if failure_msg: if 'invalid device' in failure_msg: - raise NameError(f'GPU "{device}" could not be found. Remove this device from config.render_devices or use "auto".') + raise NameError(f'{device} could not be found. Remove this device from config.render_devices or use "auto".') print(failure_msg) return False @@ -107,8 +110,7 @@ def device_select(device): return True def device_init(device_selection): - if device_selection not in ['cpu', 'auto'] and not isinstance(device_selection, int): - raise EnvironmentError(f"device_init() only accepts 'cpu', 'auto', or an integer index for the GPU. Got: {device_selection}") + validate_device_id(device_selection, allow_auto=True, log_prefix='device_init') # Thread bound properties thread_data.stop_processing = False @@ -155,37 +157,38 @@ def device_init(device_selection): if device_selection == 'auto': device_count = torch.cuda.device_count() if device_count == 1: - device_select(0) - torch.cuda.device(0) + device_select('cuda:0') + torch.cuda.device('cuda:0') return print('Autoselecting GPU. Using most free memory.') max_mem_free = 0 best_device = None for device in range(device_count): + device = f'cuda:{device}' mem_free, mem_total = torch.cuda.mem_get_info(device) mem_free /= float(10**9) mem_total /= float(10**9) device_name = torch.cuda.get_device_name(device) - print(f'GPU {device} detected: {device_name} - Memory: {round(mem_total - mem_free, 2)}Gb / {round(mem_total, 2)}Gb') + print(f'{device} detected: {device_name} - Memory: {round(mem_total - mem_free, 2)}Gb / {round(mem_total, 2)}Gb') if max_mem_free < mem_free: max_mem_free = mem_free best_device = device if best_device and device_select(best_device): - print(f'Setting GPU {device} as active') + print(f'Setting {device} as active') torch.cuda.device(device) return if device_select(device_selection): - print(f'Setting GPU {device_selection} as active') + print(f'Setting {device_selection} as active') torch.cuda.device(device_selection) return # By default use current device. print('Checking current GPU...') - device = torch.cuda.current_device() + device = f'cuda:{torch.cuda.current_device()}' device_name = torch.cuda.get_device_name(device) - print(f'GPU {device} detected: {device_name}') + print(f'{device} detected: {device_name}') if device_select(device): return print('WARNING: No compatible GPU found. Using the CPU, but this will be very slow!') @@ -325,7 +328,7 @@ def wait_model_move_to(model, target_device): # Send to target_device and wait u start_mem = torch.cuda.memory_allocated(thread_data.device) / 1e6 if start_mem <= 0: return model_name = model.__class__.__name__ - print(f'Device:{thread_data.device} - Sending model {model_name} to {target_device} | Memory transfer starting. Memory Used: {round(start_mem)}Mb') + print(f'Device {thread_data.device} - Sending model {model_name} to {target_device} | Memory transfer starting. Memory Used: {round(start_mem)}Mb') start_time = time.time() model.to(target_device) time_step = start_time @@ -340,17 +343,17 @@ def wait_model_move_to(model, target_device): # Send to target_device and wait u if not is_transfering: break; if time.time() - time_step > WARNING_TIMEOUT: # Long delay, print to console to show activity. - print(f'Device:{thread_data.device} - Waiting for Memory transfer. Memory Used: {round(mem)}Mb, Transfered: {round(start_mem - mem)}Mb') + print(f'Device {thread_data.device} - Waiting for Memory transfer. Memory Used: {round(mem)}Mb, Transfered: {round(start_mem - mem)}Mb') time_step = time.time() - print(f'Device:{thread_data.device} - {model_name} Moved: {round(start_mem - last_mem)}Mb in {round(time.time() - start_time, 3)} seconds to {target_device}') + print(f'Device {thread_data.device} - {model_name} Moved: {round(start_mem - last_mem)}Mb in {round(time.time() - start_time, 3)} seconds to {target_device}') def load_model_gfpgan(): if thread_data.gfpgan_file is None: raise ValueError(f'Thread gfpgan_file is undefined.') #print('load_model_gfpgan called without setting gfpgan_file') #return - if thread_data.device != 0: + if thread_data.device != 'cuda:0': #TODO Remove when fixed - A bug with GFPGANer and facexlib needs to be fixed before use on other devices. - raise Exception(f'Current device {torch.device(thread_data.device)} is not {torch.device(0)}. Cannot run GFPGANer.') + raise Exception(f'Current device {torch.device(thread_data.device)} is not {torch.device("cuda:0")}. Cannot run GFPGANer.') model_path = thread_data.gfpgan_file + ".pth" thread_data.model_gfpgan = GFPGANer(device=torch.device(thread_data.device), model_path=model_path, upscale=1, arch='clean', channel_multiplier=2, bg_upsampler=None) print('loaded', thread_data.gfpgan_file, 'to', thread_data.model_gfpgan.device, 'precision', thread_data.precision) diff --git a/ui/sd_internal/task_manager.py b/ui/sd_internal/task_manager.py index f4fcf03b..30e12d7c 100644 --- a/ui/sd_internal/task_manager.py +++ b/ui/sd_internal/task_manager.py @@ -219,7 +219,7 @@ def thread_get_next_task(): try: # Select a render task. for queued_task in tasks_queue: if queued_task.request.use_face_correction: # TODO Remove when fixed - A bug with GFPGANer and facexlib needs to be fixed before use on other devices. - if is_alive(0) <= 0: # Allows GFPGANer only on cuda:0. + if is_alive('cuda:0') <= 0: # Allows GFPGANer only on cuda:0. queued_task.error = Exception('cuda:0 is not available with the current config. Remove GFPGANer filter to run task.') task = queued_task break @@ -227,7 +227,7 @@ def thread_get_next_task(): queued_task.error = Exception('Cpu cannot be used to run this task. Remove GFPGANer filter to run task.') task = queued_task break - if runtime.thread_data.device != 0: + if runtime.thread_data.device != 'cuda:0': continue # Wait for cuda:0 if queued_task.render_device and runtime.thread_data.device != queued_task.render_device: # Is asking for a specific render device. @@ -235,7 +235,7 @@ def thread_get_next_task(): continue # requested device alive, skip current one. else: # Requested device is not active, return error to UI. - queued_task.error = Exception(str(queued_task.render_device) + ' is not currently active.') + queued_task.error = Exception(queued_task.render_device + ' is not currently active.') task = queued_task break if not queued_task.render_device and runtime.thread_data.device == 'cpu' and is_alive() > 1: @@ -365,6 +365,7 @@ def get_devices(): # list the compatible devices gpu_count = torch.cuda.device_count() for device in range(gpu_count): + device = f'cuda:{device}' if runtime.device_would_fail(device): continue diff --git a/ui/server.py b/ui/server.py index fbe9a2d3..ea5b98fc 100644 --- a/ui/server.py +++ b/ui/server.py @@ -22,7 +22,7 @@ OUTPUT_DIRNAME = "Stable Diffusion UI" # in the user's home folder TASK_TTL = 15 * 60 # Discard last session's task timeout APP_CONFIG_DEFAULTS = { # auto: selects the cuda device with the most free memory, cuda: use the currently active cuda device. - 'render_devices': ['auto'], # ['cuda'] or ['CPU', 'GPU:0', 'GPU:1', ...] or ['cpu'] + 'render_devices': ['auto'], # valid entries: 'auto', 'cpu' or 'cuda:N' (where N is a GPU index) 'update_branch': 'main', } APP_CONFIG_DEFAULT_MODELS = [ @@ -281,7 +281,7 @@ def render(req : task_manager.ImageRequest): # if req.render_device != 'cpu': # req.render_device = int(req.render_device) if req.render_device and task_manager.is_alive(req.render_device) <= 0: raise HTTPException(status_code=403, detail=f'{req.render_device} rendering is not enabled in config.json or the thread has died...') # HTTP403 Forbidden - if req.use_face_correction and task_manager.is_alive(0) <= 0: #TODO Remove when GFPGANer is fixed upstream. + if req.use_face_correction and task_manager.is_alive('cuda:0') <= 0: #TODO Remove when GFPGANer is fixed upstream. raise HTTPException(status_code=412, detail=f'GFPGANer only works GPU:0, use CUDA_VISIBLE_DEVICES if GFPGANer is needed on a specific GPU.') # HTTP412 Precondition Failed try: save_model_to_config(req.use_stable_diffusion_model, req.use_vae_model) @@ -369,6 +369,7 @@ task_manager.default_vae_to_load = resolve_vae_to_use() if 'render_devices' in config: # Start a new thread for each device. if not isinstance(config['render_devices'], list): raise Exception('Invalid render_devices value in config. Should be a list') + config['render_devices'] = set(config['render_devices']) # de-duplicate for device in config['render_devices']: if task_manager.is_alive(device) >= 1: print(device, 'already registered.') @@ -383,7 +384,7 @@ if task_manager.is_alive() <= 0: # Either no defaults or no devices after loadin # Select best GPU device using free memory, if more than one device. if task_manager.start_render_thread('auto'): # Detect best device for renders # if cuda:0 is missing, another cuda device is better. try to start it... - if task_manager.is_alive(0) <= 0 and task_manager.is_alive('cpu') <= 0 and not task_manager.start_render_thread(0): + if task_manager.is_alive('cuda:0') <= 0 and task_manager.is_alive('cpu') <= 0 and not task_manager.start_render_thread('cuda:0'): print('Failed to start GPU:0...') else: print('Failed to start gpu device.') @@ -391,7 +392,7 @@ if task_manager.is_alive() <= 0: # Either no defaults or no devices after loadin print('Failed to start CPU render device...') is_using_a_gpu = (task_manager.is_alive() > task_manager.is_alive('cpu')) -if is_using_a_gpu and task_manager.is_alive(0) <= 0: +if is_using_a_gpu and task_manager.is_alive('cuda:0') <= 0: print('WARNING: GFPGANer only works on GPU:0, use CUDA_VISIBLE_DEVICES if GFPGANer is needed on a specific GPU.') print('Using CUDA_VISIBLE_DEVICES will remap the selected devices starting at GPU:0 fixing GFPGANer') print('Add the line "@set CUDA_VISIBLE_DEVICES=N" where N is the GPUs to use to config.bat')