mirror of
https://github.com/easydiffusion/easydiffusion.git
synced 2024-11-23 00:33:28 +01:00
Switch to using cuda:N instead of N (integer device ids)
This commit is contained in:
parent
2c54b7f289
commit
bd56795c62
@ -64,9 +64,13 @@ def get_processor_name():
|
|||||||
print(traceback.format_exc())
|
print(traceback.format_exc())
|
||||||
return "cpu"
|
return "cpu"
|
||||||
|
|
||||||
|
def validate_device_id(device, allow_auto=False, log_prefix=''):
|
||||||
|
device_names = ['cpu', 'auto'] if allow_auto else ['cpu']
|
||||||
|
if not isinstance(device, str) or (device not in device_names and (len(device) <= len('cuda:') or device[:5] != 'cuda:' or not device[5:].isnumeric())):
|
||||||
|
raise EnvironmentError(f"{log_prefix}: device id should be {', '.join(device_names)}, or 'cuda:N' (where N is an integer index for the GPU). Got: {device}")
|
||||||
|
|
||||||
def device_would_fail(device):
|
def device_would_fail(device):
|
||||||
if device != 'cpu' and not isinstance(device, int):
|
validate_device_id(device, allow_auto=False, log_prefix='device_would_fail')
|
||||||
raise EnvironmentError(f"device_would_fail() only accepts 'cpu', or an integer index for the GPU. Got: {device}")
|
|
||||||
|
|
||||||
if device == 'cpu': return None
|
if device == 'cpu': return None
|
||||||
# Returns None when no issues found, otherwise returns the detected error str.
|
# Returns None when no issues found, otherwise returns the detected error str.
|
||||||
@ -81,15 +85,14 @@ def device_would_fail(device):
|
|||||||
return None
|
return None
|
||||||
|
|
||||||
def device_select(device):
|
def device_select(device):
|
||||||
if device != 'cpu' and not isinstance(device, int):
|
validate_device_id(device, allow_auto=False, log_prefix='device_select')
|
||||||
raise EnvironmentError(f"device_select() only accepts 'cpu', or an integer index for the GPU. Got: {device}")
|
|
||||||
|
|
||||||
if device == 'cpu': return True
|
if device == 'cpu': return True
|
||||||
if not torch.cuda.is_available(): return False
|
if not torch.cuda.is_available(): return False
|
||||||
failure_msg = device_would_fail(device)
|
failure_msg = device_would_fail(device)
|
||||||
if failure_msg:
|
if failure_msg:
|
||||||
if 'invalid device' in failure_msg:
|
if 'invalid device' in failure_msg:
|
||||||
raise NameError(f'GPU "{device}" could not be found. Remove this device from config.render_devices or use "auto".')
|
raise NameError(f'{device} could not be found. Remove this device from config.render_devices or use "auto".')
|
||||||
print(failure_msg)
|
print(failure_msg)
|
||||||
return False
|
return False
|
||||||
|
|
||||||
@ -107,8 +110,7 @@ def device_select(device):
|
|||||||
return True
|
return True
|
||||||
|
|
||||||
def device_init(device_selection):
|
def device_init(device_selection):
|
||||||
if device_selection not in ['cpu', 'auto'] and not isinstance(device_selection, int):
|
validate_device_id(device_selection, allow_auto=True, log_prefix='device_init')
|
||||||
raise EnvironmentError(f"device_init() only accepts 'cpu', 'auto', or an integer index for the GPU. Got: {device_selection}")
|
|
||||||
|
|
||||||
# Thread bound properties
|
# Thread bound properties
|
||||||
thread_data.stop_processing = False
|
thread_data.stop_processing = False
|
||||||
@ -155,37 +157,38 @@ def device_init(device_selection):
|
|||||||
if device_selection == 'auto':
|
if device_selection == 'auto':
|
||||||
device_count = torch.cuda.device_count()
|
device_count = torch.cuda.device_count()
|
||||||
if device_count == 1:
|
if device_count == 1:
|
||||||
device_select(0)
|
device_select('cuda:0')
|
||||||
torch.cuda.device(0)
|
torch.cuda.device('cuda:0')
|
||||||
return
|
return
|
||||||
|
|
||||||
print('Autoselecting GPU. Using most free memory.')
|
print('Autoselecting GPU. Using most free memory.')
|
||||||
max_mem_free = 0
|
max_mem_free = 0
|
||||||
best_device = None
|
best_device = None
|
||||||
for device in range(device_count):
|
for device in range(device_count):
|
||||||
|
device = f'cuda:{device}'
|
||||||
mem_free, mem_total = torch.cuda.mem_get_info(device)
|
mem_free, mem_total = torch.cuda.mem_get_info(device)
|
||||||
mem_free /= float(10**9)
|
mem_free /= float(10**9)
|
||||||
mem_total /= float(10**9)
|
mem_total /= float(10**9)
|
||||||
device_name = torch.cuda.get_device_name(device)
|
device_name = torch.cuda.get_device_name(device)
|
||||||
print(f'GPU {device} detected: {device_name} - Memory: {round(mem_total - mem_free, 2)}Gb / {round(mem_total, 2)}Gb')
|
print(f'{device} detected: {device_name} - Memory: {round(mem_total - mem_free, 2)}Gb / {round(mem_total, 2)}Gb')
|
||||||
if max_mem_free < mem_free:
|
if max_mem_free < mem_free:
|
||||||
max_mem_free = mem_free
|
max_mem_free = mem_free
|
||||||
best_device = device
|
best_device = device
|
||||||
if best_device and device_select(best_device):
|
if best_device and device_select(best_device):
|
||||||
print(f'Setting GPU {device} as active')
|
print(f'Setting {device} as active')
|
||||||
torch.cuda.device(device)
|
torch.cuda.device(device)
|
||||||
return
|
return
|
||||||
|
|
||||||
if device_select(device_selection):
|
if device_select(device_selection):
|
||||||
print(f'Setting GPU {device_selection} as active')
|
print(f'Setting {device_selection} as active')
|
||||||
torch.cuda.device(device_selection)
|
torch.cuda.device(device_selection)
|
||||||
return
|
return
|
||||||
|
|
||||||
# By default use current device.
|
# By default use current device.
|
||||||
print('Checking current GPU...')
|
print('Checking current GPU...')
|
||||||
device = torch.cuda.current_device()
|
device = f'cuda:{torch.cuda.current_device()}'
|
||||||
device_name = torch.cuda.get_device_name(device)
|
device_name = torch.cuda.get_device_name(device)
|
||||||
print(f'GPU {device} detected: {device_name}')
|
print(f'{device} detected: {device_name}')
|
||||||
if device_select(device):
|
if device_select(device):
|
||||||
return
|
return
|
||||||
print('WARNING: No compatible GPU found. Using the CPU, but this will be very slow!')
|
print('WARNING: No compatible GPU found. Using the CPU, but this will be very slow!')
|
||||||
@ -325,7 +328,7 @@ def wait_model_move_to(model, target_device): # Send to target_device and wait u
|
|||||||
start_mem = torch.cuda.memory_allocated(thread_data.device) / 1e6
|
start_mem = torch.cuda.memory_allocated(thread_data.device) / 1e6
|
||||||
if start_mem <= 0: return
|
if start_mem <= 0: return
|
||||||
model_name = model.__class__.__name__
|
model_name = model.__class__.__name__
|
||||||
print(f'Device:{thread_data.device} - Sending model {model_name} to {target_device} | Memory transfer starting. Memory Used: {round(start_mem)}Mb')
|
print(f'Device {thread_data.device} - Sending model {model_name} to {target_device} | Memory transfer starting. Memory Used: {round(start_mem)}Mb')
|
||||||
start_time = time.time()
|
start_time = time.time()
|
||||||
model.to(target_device)
|
model.to(target_device)
|
||||||
time_step = start_time
|
time_step = start_time
|
||||||
@ -340,17 +343,17 @@ def wait_model_move_to(model, target_device): # Send to target_device and wait u
|
|||||||
if not is_transfering:
|
if not is_transfering:
|
||||||
break;
|
break;
|
||||||
if time.time() - time_step > WARNING_TIMEOUT: # Long delay, print to console to show activity.
|
if time.time() - time_step > WARNING_TIMEOUT: # Long delay, print to console to show activity.
|
||||||
print(f'Device:{thread_data.device} - Waiting for Memory transfer. Memory Used: {round(mem)}Mb, Transfered: {round(start_mem - mem)}Mb')
|
print(f'Device {thread_data.device} - Waiting for Memory transfer. Memory Used: {round(mem)}Mb, Transfered: {round(start_mem - mem)}Mb')
|
||||||
time_step = time.time()
|
time_step = time.time()
|
||||||
print(f'Device:{thread_data.device} - {model_name} Moved: {round(start_mem - last_mem)}Mb in {round(time.time() - start_time, 3)} seconds to {target_device}')
|
print(f'Device {thread_data.device} - {model_name} Moved: {round(start_mem - last_mem)}Mb in {round(time.time() - start_time, 3)} seconds to {target_device}')
|
||||||
|
|
||||||
def load_model_gfpgan():
|
def load_model_gfpgan():
|
||||||
if thread_data.gfpgan_file is None: raise ValueError(f'Thread gfpgan_file is undefined.')
|
if thread_data.gfpgan_file is None: raise ValueError(f'Thread gfpgan_file is undefined.')
|
||||||
#print('load_model_gfpgan called without setting gfpgan_file')
|
#print('load_model_gfpgan called without setting gfpgan_file')
|
||||||
#return
|
#return
|
||||||
if thread_data.device != 0:
|
if thread_data.device != 'cuda:0':
|
||||||
#TODO Remove when fixed - A bug with GFPGANer and facexlib needs to be fixed before use on other devices.
|
#TODO Remove when fixed - A bug with GFPGANer and facexlib needs to be fixed before use on other devices.
|
||||||
raise Exception(f'Current device {torch.device(thread_data.device)} is not {torch.device(0)}. Cannot run GFPGANer.')
|
raise Exception(f'Current device {torch.device(thread_data.device)} is not {torch.device("cuda:0")}. Cannot run GFPGANer.')
|
||||||
model_path = thread_data.gfpgan_file + ".pth"
|
model_path = thread_data.gfpgan_file + ".pth"
|
||||||
thread_data.model_gfpgan = GFPGANer(device=torch.device(thread_data.device), model_path=model_path, upscale=1, arch='clean', channel_multiplier=2, bg_upsampler=None)
|
thread_data.model_gfpgan = GFPGANer(device=torch.device(thread_data.device), model_path=model_path, upscale=1, arch='clean', channel_multiplier=2, bg_upsampler=None)
|
||||||
print('loaded', thread_data.gfpgan_file, 'to', thread_data.model_gfpgan.device, 'precision', thread_data.precision)
|
print('loaded', thread_data.gfpgan_file, 'to', thread_data.model_gfpgan.device, 'precision', thread_data.precision)
|
||||||
|
@ -219,7 +219,7 @@ def thread_get_next_task():
|
|||||||
try: # Select a render task.
|
try: # Select a render task.
|
||||||
for queued_task in tasks_queue:
|
for queued_task in tasks_queue:
|
||||||
if queued_task.request.use_face_correction: # TODO Remove when fixed - A bug with GFPGANer and facexlib needs to be fixed before use on other devices.
|
if queued_task.request.use_face_correction: # TODO Remove when fixed - A bug with GFPGANer and facexlib needs to be fixed before use on other devices.
|
||||||
if is_alive(0) <= 0: # Allows GFPGANer only on cuda:0.
|
if is_alive('cuda:0') <= 0: # Allows GFPGANer only on cuda:0.
|
||||||
queued_task.error = Exception('cuda:0 is not available with the current config. Remove GFPGANer filter to run task.')
|
queued_task.error = Exception('cuda:0 is not available with the current config. Remove GFPGANer filter to run task.')
|
||||||
task = queued_task
|
task = queued_task
|
||||||
break
|
break
|
||||||
@ -227,7 +227,7 @@ def thread_get_next_task():
|
|||||||
queued_task.error = Exception('Cpu cannot be used to run this task. Remove GFPGANer filter to run task.')
|
queued_task.error = Exception('Cpu cannot be used to run this task. Remove GFPGANer filter to run task.')
|
||||||
task = queued_task
|
task = queued_task
|
||||||
break
|
break
|
||||||
if runtime.thread_data.device != 0:
|
if runtime.thread_data.device != 'cuda:0':
|
||||||
continue # Wait for cuda:0
|
continue # Wait for cuda:0
|
||||||
if queued_task.render_device and runtime.thread_data.device != queued_task.render_device:
|
if queued_task.render_device and runtime.thread_data.device != queued_task.render_device:
|
||||||
# Is asking for a specific render device.
|
# Is asking for a specific render device.
|
||||||
@ -235,7 +235,7 @@ def thread_get_next_task():
|
|||||||
continue # requested device alive, skip current one.
|
continue # requested device alive, skip current one.
|
||||||
else:
|
else:
|
||||||
# Requested device is not active, return error to UI.
|
# Requested device is not active, return error to UI.
|
||||||
queued_task.error = Exception(str(queued_task.render_device) + ' is not currently active.')
|
queued_task.error = Exception(queued_task.render_device + ' is not currently active.')
|
||||||
task = queued_task
|
task = queued_task
|
||||||
break
|
break
|
||||||
if not queued_task.render_device and runtime.thread_data.device == 'cpu' and is_alive() > 1:
|
if not queued_task.render_device and runtime.thread_data.device == 'cpu' and is_alive() > 1:
|
||||||
@ -365,6 +365,7 @@ def get_devices():
|
|||||||
# list the compatible devices
|
# list the compatible devices
|
||||||
gpu_count = torch.cuda.device_count()
|
gpu_count = torch.cuda.device_count()
|
||||||
for device in range(gpu_count):
|
for device in range(gpu_count):
|
||||||
|
device = f'cuda:{device}'
|
||||||
if runtime.device_would_fail(device):
|
if runtime.device_would_fail(device):
|
||||||
continue
|
continue
|
||||||
|
|
||||||
|
@ -22,7 +22,7 @@ OUTPUT_DIRNAME = "Stable Diffusion UI" # in the user's home folder
|
|||||||
TASK_TTL = 15 * 60 # Discard last session's task timeout
|
TASK_TTL = 15 * 60 # Discard last session's task timeout
|
||||||
APP_CONFIG_DEFAULTS = {
|
APP_CONFIG_DEFAULTS = {
|
||||||
# auto: selects the cuda device with the most free memory, cuda: use the currently active cuda device.
|
# auto: selects the cuda device with the most free memory, cuda: use the currently active cuda device.
|
||||||
'render_devices': ['auto'], # ['cuda'] or ['CPU', 'GPU:0', 'GPU:1', ...] or ['cpu']
|
'render_devices': ['auto'], # valid entries: 'auto', 'cpu' or 'cuda:N' (where N is a GPU index)
|
||||||
'update_branch': 'main',
|
'update_branch': 'main',
|
||||||
}
|
}
|
||||||
APP_CONFIG_DEFAULT_MODELS = [
|
APP_CONFIG_DEFAULT_MODELS = [
|
||||||
@ -281,7 +281,7 @@ def render(req : task_manager.ImageRequest):
|
|||||||
# if req.render_device != 'cpu':
|
# if req.render_device != 'cpu':
|
||||||
# req.render_device = int(req.render_device)
|
# req.render_device = int(req.render_device)
|
||||||
if req.render_device and task_manager.is_alive(req.render_device) <= 0: raise HTTPException(status_code=403, detail=f'{req.render_device} rendering is not enabled in config.json or the thread has died...') # HTTP403 Forbidden
|
if req.render_device and task_manager.is_alive(req.render_device) <= 0: raise HTTPException(status_code=403, detail=f'{req.render_device} rendering is not enabled in config.json or the thread has died...') # HTTP403 Forbidden
|
||||||
if req.use_face_correction and task_manager.is_alive(0) <= 0: #TODO Remove when GFPGANer is fixed upstream.
|
if req.use_face_correction and task_manager.is_alive('cuda:0') <= 0: #TODO Remove when GFPGANer is fixed upstream.
|
||||||
raise HTTPException(status_code=412, detail=f'GFPGANer only works GPU:0, use CUDA_VISIBLE_DEVICES if GFPGANer is needed on a specific GPU.') # HTTP412 Precondition Failed
|
raise HTTPException(status_code=412, detail=f'GFPGANer only works GPU:0, use CUDA_VISIBLE_DEVICES if GFPGANer is needed on a specific GPU.') # HTTP412 Precondition Failed
|
||||||
try:
|
try:
|
||||||
save_model_to_config(req.use_stable_diffusion_model, req.use_vae_model)
|
save_model_to_config(req.use_stable_diffusion_model, req.use_vae_model)
|
||||||
@ -369,6 +369,7 @@ task_manager.default_vae_to_load = resolve_vae_to_use()
|
|||||||
if 'render_devices' in config: # Start a new thread for each device.
|
if 'render_devices' in config: # Start a new thread for each device.
|
||||||
if not isinstance(config['render_devices'], list):
|
if not isinstance(config['render_devices'], list):
|
||||||
raise Exception('Invalid render_devices value in config. Should be a list')
|
raise Exception('Invalid render_devices value in config. Should be a list')
|
||||||
|
config['render_devices'] = set(config['render_devices']) # de-duplicate
|
||||||
for device in config['render_devices']:
|
for device in config['render_devices']:
|
||||||
if task_manager.is_alive(device) >= 1:
|
if task_manager.is_alive(device) >= 1:
|
||||||
print(device, 'already registered.')
|
print(device, 'already registered.')
|
||||||
@ -383,7 +384,7 @@ if task_manager.is_alive() <= 0: # Either no defaults or no devices after loadin
|
|||||||
# Select best GPU device using free memory, if more than one device.
|
# Select best GPU device using free memory, if more than one device.
|
||||||
if task_manager.start_render_thread('auto'): # Detect best device for renders
|
if task_manager.start_render_thread('auto'): # Detect best device for renders
|
||||||
# if cuda:0 is missing, another cuda device is better. try to start it...
|
# if cuda:0 is missing, another cuda device is better. try to start it...
|
||||||
if task_manager.is_alive(0) <= 0 and task_manager.is_alive('cpu') <= 0 and not task_manager.start_render_thread(0):
|
if task_manager.is_alive('cuda:0') <= 0 and task_manager.is_alive('cpu') <= 0 and not task_manager.start_render_thread('cuda:0'):
|
||||||
print('Failed to start GPU:0...')
|
print('Failed to start GPU:0...')
|
||||||
else:
|
else:
|
||||||
print('Failed to start gpu device.')
|
print('Failed to start gpu device.')
|
||||||
@ -391,7 +392,7 @@ if task_manager.is_alive() <= 0: # Either no defaults or no devices after loadin
|
|||||||
print('Failed to start CPU render device...')
|
print('Failed to start CPU render device...')
|
||||||
|
|
||||||
is_using_a_gpu = (task_manager.is_alive() > task_manager.is_alive('cpu'))
|
is_using_a_gpu = (task_manager.is_alive() > task_manager.is_alive('cpu'))
|
||||||
if is_using_a_gpu and task_manager.is_alive(0) <= 0:
|
if is_using_a_gpu and task_manager.is_alive('cuda:0') <= 0:
|
||||||
print('WARNING: GFPGANer only works on GPU:0, use CUDA_VISIBLE_DEVICES if GFPGANer is needed on a specific GPU.')
|
print('WARNING: GFPGANer only works on GPU:0, use CUDA_VISIBLE_DEVICES if GFPGANer is needed on a specific GPU.')
|
||||||
print('Using CUDA_VISIBLE_DEVICES will remap the selected devices starting at GPU:0 fixing GFPGANer')
|
print('Using CUDA_VISIBLE_DEVICES will remap the selected devices starting at GPU:0 fixing GFPGANer')
|
||||||
print('Add the line "@set CUDA_VISIBLE_DEVICES=N" where N is the GPUs to use to config.bat')
|
print('Add the line "@set CUDA_VISIBLE_DEVICES=N" where N is the GPUs to use to config.bat')
|
||||||
|
Loading…
Reference in New Issue
Block a user