Switch to using cuda:N instead of N (integer device ids)

This commit is contained in:
cmdr2 2022-11-11 14:46:05 +05:30
parent 2c54b7f289
commit bd56795c62
3 changed files with 31 additions and 26 deletions

View File

@ -64,9 +64,13 @@ def get_processor_name():
print(traceback.format_exc()) print(traceback.format_exc())
return "cpu" return "cpu"
def validate_device_id(device, allow_auto=False, log_prefix=''):
device_names = ['cpu', 'auto'] if allow_auto else ['cpu']
if not isinstance(device, str) or (device not in device_names and (len(device) <= len('cuda:') or device[:5] != 'cuda:' or not device[5:].isnumeric())):
raise EnvironmentError(f"{log_prefix}: device id should be {', '.join(device_names)}, or 'cuda:N' (where N is an integer index for the GPU). Got: {device}")
def device_would_fail(device): def device_would_fail(device):
if device != 'cpu' and not isinstance(device, int): validate_device_id(device, allow_auto=False, log_prefix='device_would_fail')
raise EnvironmentError(f"device_would_fail() only accepts 'cpu', or an integer index for the GPU. Got: {device}")
if device == 'cpu': return None if device == 'cpu': return None
# Returns None when no issues found, otherwise returns the detected error str. # Returns None when no issues found, otherwise returns the detected error str.
@ -81,15 +85,14 @@ def device_would_fail(device):
return None return None
def device_select(device): def device_select(device):
if device != 'cpu' and not isinstance(device, int): validate_device_id(device, allow_auto=False, log_prefix='device_select')
raise EnvironmentError(f"device_select() only accepts 'cpu', or an integer index for the GPU. Got: {device}")
if device == 'cpu': return True if device == 'cpu': return True
if not torch.cuda.is_available(): return False if not torch.cuda.is_available(): return False
failure_msg = device_would_fail(device) failure_msg = device_would_fail(device)
if failure_msg: if failure_msg:
if 'invalid device' in failure_msg: if 'invalid device' in failure_msg:
raise NameError(f'GPU "{device}" could not be found. Remove this device from config.render_devices or use "auto".') raise NameError(f'{device} could not be found. Remove this device from config.render_devices or use "auto".')
print(failure_msg) print(failure_msg)
return False return False
@ -107,8 +110,7 @@ def device_select(device):
return True return True
def device_init(device_selection): def device_init(device_selection):
if device_selection not in ['cpu', 'auto'] and not isinstance(device_selection, int): validate_device_id(device_selection, allow_auto=True, log_prefix='device_init')
raise EnvironmentError(f"device_init() only accepts 'cpu', 'auto', or an integer index for the GPU. Got: {device_selection}")
# Thread bound properties # Thread bound properties
thread_data.stop_processing = False thread_data.stop_processing = False
@ -155,37 +157,38 @@ def device_init(device_selection):
if device_selection == 'auto': if device_selection == 'auto':
device_count = torch.cuda.device_count() device_count = torch.cuda.device_count()
if device_count == 1: if device_count == 1:
device_select(0) device_select('cuda:0')
torch.cuda.device(0) torch.cuda.device('cuda:0')
return return
print('Autoselecting GPU. Using most free memory.') print('Autoselecting GPU. Using most free memory.')
max_mem_free = 0 max_mem_free = 0
best_device = None best_device = None
for device in range(device_count): for device in range(device_count):
device = f'cuda:{device}'
mem_free, mem_total = torch.cuda.mem_get_info(device) mem_free, mem_total = torch.cuda.mem_get_info(device)
mem_free /= float(10**9) mem_free /= float(10**9)
mem_total /= float(10**9) mem_total /= float(10**9)
device_name = torch.cuda.get_device_name(device) device_name = torch.cuda.get_device_name(device)
print(f'GPU {device} detected: {device_name} - Memory: {round(mem_total - mem_free, 2)}Gb / {round(mem_total, 2)}Gb') print(f'{device} detected: {device_name} - Memory: {round(mem_total - mem_free, 2)}Gb / {round(mem_total, 2)}Gb')
if max_mem_free < mem_free: if max_mem_free < mem_free:
max_mem_free = mem_free max_mem_free = mem_free
best_device = device best_device = device
if best_device and device_select(best_device): if best_device and device_select(best_device):
print(f'Setting GPU {device} as active') print(f'Setting {device} as active')
torch.cuda.device(device) torch.cuda.device(device)
return return
if device_select(device_selection): if device_select(device_selection):
print(f'Setting GPU {device_selection} as active') print(f'Setting {device_selection} as active')
torch.cuda.device(device_selection) torch.cuda.device(device_selection)
return return
# By default use current device. # By default use current device.
print('Checking current GPU...') print('Checking current GPU...')
device = torch.cuda.current_device() device = f'cuda:{torch.cuda.current_device()}'
device_name = torch.cuda.get_device_name(device) device_name = torch.cuda.get_device_name(device)
print(f'GPU {device} detected: {device_name}') print(f'{device} detected: {device_name}')
if device_select(device): if device_select(device):
return return
print('WARNING: No compatible GPU found. Using the CPU, but this will be very slow!') print('WARNING: No compatible GPU found. Using the CPU, but this will be very slow!')
@ -325,7 +328,7 @@ def wait_model_move_to(model, target_device): # Send to target_device and wait u
start_mem = torch.cuda.memory_allocated(thread_data.device) / 1e6 start_mem = torch.cuda.memory_allocated(thread_data.device) / 1e6
if start_mem <= 0: return if start_mem <= 0: return
model_name = model.__class__.__name__ model_name = model.__class__.__name__
print(f'Device:{thread_data.device} - Sending model {model_name} to {target_device} | Memory transfer starting. Memory Used: {round(start_mem)}Mb') print(f'Device {thread_data.device} - Sending model {model_name} to {target_device} | Memory transfer starting. Memory Used: {round(start_mem)}Mb')
start_time = time.time() start_time = time.time()
model.to(target_device) model.to(target_device)
time_step = start_time time_step = start_time
@ -340,17 +343,17 @@ def wait_model_move_to(model, target_device): # Send to target_device and wait u
if not is_transfering: if not is_transfering:
break; break;
if time.time() - time_step > WARNING_TIMEOUT: # Long delay, print to console to show activity. if time.time() - time_step > WARNING_TIMEOUT: # Long delay, print to console to show activity.
print(f'Device:{thread_data.device} - Waiting for Memory transfer. Memory Used: {round(mem)}Mb, Transfered: {round(start_mem - mem)}Mb') print(f'Device {thread_data.device} - Waiting for Memory transfer. Memory Used: {round(mem)}Mb, Transfered: {round(start_mem - mem)}Mb')
time_step = time.time() time_step = time.time()
print(f'Device:{thread_data.device} - {model_name} Moved: {round(start_mem - last_mem)}Mb in {round(time.time() - start_time, 3)} seconds to {target_device}') print(f'Device {thread_data.device} - {model_name} Moved: {round(start_mem - last_mem)}Mb in {round(time.time() - start_time, 3)} seconds to {target_device}')
def load_model_gfpgan(): def load_model_gfpgan():
if thread_data.gfpgan_file is None: raise ValueError(f'Thread gfpgan_file is undefined.') if thread_data.gfpgan_file is None: raise ValueError(f'Thread gfpgan_file is undefined.')
#print('load_model_gfpgan called without setting gfpgan_file') #print('load_model_gfpgan called without setting gfpgan_file')
#return #return
if thread_data.device != 0: if thread_data.device != 'cuda:0':
#TODO Remove when fixed - A bug with GFPGANer and facexlib needs to be fixed before use on other devices. #TODO Remove when fixed - A bug with GFPGANer and facexlib needs to be fixed before use on other devices.
raise Exception(f'Current device {torch.device(thread_data.device)} is not {torch.device(0)}. Cannot run GFPGANer.') raise Exception(f'Current device {torch.device(thread_data.device)} is not {torch.device("cuda:0")}. Cannot run GFPGANer.')
model_path = thread_data.gfpgan_file + ".pth" model_path = thread_data.gfpgan_file + ".pth"
thread_data.model_gfpgan = GFPGANer(device=torch.device(thread_data.device), model_path=model_path, upscale=1, arch='clean', channel_multiplier=2, bg_upsampler=None) thread_data.model_gfpgan = GFPGANer(device=torch.device(thread_data.device), model_path=model_path, upscale=1, arch='clean', channel_multiplier=2, bg_upsampler=None)
print('loaded', thread_data.gfpgan_file, 'to', thread_data.model_gfpgan.device, 'precision', thread_data.precision) print('loaded', thread_data.gfpgan_file, 'to', thread_data.model_gfpgan.device, 'precision', thread_data.precision)

View File

@ -219,7 +219,7 @@ def thread_get_next_task():
try: # Select a render task. try: # Select a render task.
for queued_task in tasks_queue: for queued_task in tasks_queue:
if queued_task.request.use_face_correction: # TODO Remove when fixed - A bug with GFPGANer and facexlib needs to be fixed before use on other devices. if queued_task.request.use_face_correction: # TODO Remove when fixed - A bug with GFPGANer and facexlib needs to be fixed before use on other devices.
if is_alive(0) <= 0: # Allows GFPGANer only on cuda:0. if is_alive('cuda:0') <= 0: # Allows GFPGANer only on cuda:0.
queued_task.error = Exception('cuda:0 is not available with the current config. Remove GFPGANer filter to run task.') queued_task.error = Exception('cuda:0 is not available with the current config. Remove GFPGANer filter to run task.')
task = queued_task task = queued_task
break break
@ -227,7 +227,7 @@ def thread_get_next_task():
queued_task.error = Exception('Cpu cannot be used to run this task. Remove GFPGANer filter to run task.') queued_task.error = Exception('Cpu cannot be used to run this task. Remove GFPGANer filter to run task.')
task = queued_task task = queued_task
break break
if runtime.thread_data.device != 0: if runtime.thread_data.device != 'cuda:0':
continue # Wait for cuda:0 continue # Wait for cuda:0
if queued_task.render_device and runtime.thread_data.device != queued_task.render_device: if queued_task.render_device and runtime.thread_data.device != queued_task.render_device:
# Is asking for a specific render device. # Is asking for a specific render device.
@ -235,7 +235,7 @@ def thread_get_next_task():
continue # requested device alive, skip current one. continue # requested device alive, skip current one.
else: else:
# Requested device is not active, return error to UI. # Requested device is not active, return error to UI.
queued_task.error = Exception(str(queued_task.render_device) + ' is not currently active.') queued_task.error = Exception(queued_task.render_device + ' is not currently active.')
task = queued_task task = queued_task
break break
if not queued_task.render_device and runtime.thread_data.device == 'cpu' and is_alive() > 1: if not queued_task.render_device and runtime.thread_data.device == 'cpu' and is_alive() > 1:
@ -365,6 +365,7 @@ def get_devices():
# list the compatible devices # list the compatible devices
gpu_count = torch.cuda.device_count() gpu_count = torch.cuda.device_count()
for device in range(gpu_count): for device in range(gpu_count):
device = f'cuda:{device}'
if runtime.device_would_fail(device): if runtime.device_would_fail(device):
continue continue

View File

@ -22,7 +22,7 @@ OUTPUT_DIRNAME = "Stable Diffusion UI" # in the user's home folder
TASK_TTL = 15 * 60 # Discard last session's task timeout TASK_TTL = 15 * 60 # Discard last session's task timeout
APP_CONFIG_DEFAULTS = { APP_CONFIG_DEFAULTS = {
# auto: selects the cuda device with the most free memory, cuda: use the currently active cuda device. # auto: selects the cuda device with the most free memory, cuda: use the currently active cuda device.
'render_devices': ['auto'], # ['cuda'] or ['CPU', 'GPU:0', 'GPU:1', ...] or ['cpu'] 'render_devices': ['auto'], # valid entries: 'auto', 'cpu' or 'cuda:N' (where N is a GPU index)
'update_branch': 'main', 'update_branch': 'main',
} }
APP_CONFIG_DEFAULT_MODELS = [ APP_CONFIG_DEFAULT_MODELS = [
@ -281,7 +281,7 @@ def render(req : task_manager.ImageRequest):
# if req.render_device != 'cpu': # if req.render_device != 'cpu':
# req.render_device = int(req.render_device) # req.render_device = int(req.render_device)
if req.render_device and task_manager.is_alive(req.render_device) <= 0: raise HTTPException(status_code=403, detail=f'{req.render_device} rendering is not enabled in config.json or the thread has died...') # HTTP403 Forbidden if req.render_device and task_manager.is_alive(req.render_device) <= 0: raise HTTPException(status_code=403, detail=f'{req.render_device} rendering is not enabled in config.json or the thread has died...') # HTTP403 Forbidden
if req.use_face_correction and task_manager.is_alive(0) <= 0: #TODO Remove when GFPGANer is fixed upstream. if req.use_face_correction and task_manager.is_alive('cuda:0') <= 0: #TODO Remove when GFPGANer is fixed upstream.
raise HTTPException(status_code=412, detail=f'GFPGANer only works GPU:0, use CUDA_VISIBLE_DEVICES if GFPGANer is needed on a specific GPU.') # HTTP412 Precondition Failed raise HTTPException(status_code=412, detail=f'GFPGANer only works GPU:0, use CUDA_VISIBLE_DEVICES if GFPGANer is needed on a specific GPU.') # HTTP412 Precondition Failed
try: try:
save_model_to_config(req.use_stable_diffusion_model, req.use_vae_model) save_model_to_config(req.use_stable_diffusion_model, req.use_vae_model)
@ -369,6 +369,7 @@ task_manager.default_vae_to_load = resolve_vae_to_use()
if 'render_devices' in config: # Start a new thread for each device. if 'render_devices' in config: # Start a new thread for each device.
if not isinstance(config['render_devices'], list): if not isinstance(config['render_devices'], list):
raise Exception('Invalid render_devices value in config. Should be a list') raise Exception('Invalid render_devices value in config. Should be a list')
config['render_devices'] = set(config['render_devices']) # de-duplicate
for device in config['render_devices']: for device in config['render_devices']:
if task_manager.is_alive(device) >= 1: if task_manager.is_alive(device) >= 1:
print(device, 'already registered.') print(device, 'already registered.')
@ -383,7 +384,7 @@ if task_manager.is_alive() <= 0: # Either no defaults or no devices after loadin
# Select best GPU device using free memory, if more than one device. # Select best GPU device using free memory, if more than one device.
if task_manager.start_render_thread('auto'): # Detect best device for renders if task_manager.start_render_thread('auto'): # Detect best device for renders
# if cuda:0 is missing, another cuda device is better. try to start it... # if cuda:0 is missing, another cuda device is better. try to start it...
if task_manager.is_alive(0) <= 0 and task_manager.is_alive('cpu') <= 0 and not task_manager.start_render_thread(0): if task_manager.is_alive('cuda:0') <= 0 and task_manager.is_alive('cpu') <= 0 and not task_manager.start_render_thread('cuda:0'):
print('Failed to start GPU:0...') print('Failed to start GPU:0...')
else: else:
print('Failed to start gpu device.') print('Failed to start gpu device.')
@ -391,7 +392,7 @@ if task_manager.is_alive() <= 0: # Either no defaults or no devices after loadin
print('Failed to start CPU render device...') print('Failed to start CPU render device...')
is_using_a_gpu = (task_manager.is_alive() > task_manager.is_alive('cpu')) is_using_a_gpu = (task_manager.is_alive() > task_manager.is_alive('cpu'))
if is_using_a_gpu and task_manager.is_alive(0) <= 0: if is_using_a_gpu and task_manager.is_alive('cuda:0') <= 0:
print('WARNING: GFPGANer only works on GPU:0, use CUDA_VISIBLE_DEVICES if GFPGANer is needed on a specific GPU.') print('WARNING: GFPGANer only works on GPU:0, use CUDA_VISIBLE_DEVICES if GFPGANer is needed on a specific GPU.')
print('Using CUDA_VISIBLE_DEVICES will remap the selected devices starting at GPU:0 fixing GFPGANer') print('Using CUDA_VISIBLE_DEVICES will remap the selected devices starting at GPU:0 fixing GFPGANer')
print('Add the line "@set CUDA_VISIBLE_DEVICES=N" where N is the GPUs to use to config.bat') print('Add the line "@set CUDA_VISIBLE_DEVICES=N" where N is the GPUs to use to config.bat')