Restrict device selection id to 'cpu' or integers (and 'auto' in the initial device selection functions)

This commit is contained in:
cmdr2 2022-11-10 20:03:11 +05:30
parent 3f26d03166
commit b9a12d1562
3 changed files with 60 additions and 124 deletions

View File

@ -52,19 +52,22 @@ def get_processor_name():
return platform.processor()
elif platform.system() == "Darwin":
os.environ['PATH'] = os.environ['PATH'] + os.pathsep + '/usr/sbin'
command ="sysctl -n machdep.cpu.brand_string"
command = "sysctl -n machdep.cpu.brand_string"
return subprocess.check_output(command).strip()
elif platform.system() == "Linux":
command = "cat /proc/cpuinfo"
all_info = subprocess.check_output(command, shell=True).decode().strip()
for line in all_info.split("\n"):
if "model name" in line:
return re.sub( ".*model name.*:", "", line,1).strip()
return re.sub(".*model name.*:", "", line, 1).strip()
except:
print(traceback.format_exc())
return "cpu"
def device_would_fail(device):
if device != 'cpu' and not isinstance(device, int):
raise EnvironmentError(f"device_would_fail() only accepts 'cpu', or an integer index for the GPU. Got: {device}")
if device == 'cpu': return None
# Returns None when no issues found, otherwise returns the detected error str.
# Memory check
@ -78,12 +81,15 @@ def device_would_fail(device):
return None
def device_select(device):
if device != 'cpu' and not isinstance(device, int):
raise EnvironmentError(f"device_select() only accepts 'cpu', or an integer index for the GPU. Got: {device}")
if device == 'cpu': return True
if not torch.cuda.is_available(): return False
failure_msg = device_would_fail(device)
if failure_msg:
if 'invalid device' in failure_msg:
raise NameError(f'GPU "{device}" could not be found. Remove this device from config.render_devices or use one of "auto" or "cuda".')
raise NameError(f'GPU "{device}" could not be found. Remove this device from config.render_devices or use "auto".')
print(failure_msg)
return False
@ -100,7 +106,10 @@ def device_select(device):
return True
def device_init(device_selection=None):
def device_init(device_selection):
if device_selection not in ['cpu', 'auto'] and not isinstance(device_selection, int):
raise EnvironmentError(f"device_init() only accepts 'cpu', 'auto', or an integer index for the GPU. Got: {device_selection}")
# Thread bound properties
thread_data.stop_processing = False
thread_data.temp_images = {}
@ -129,25 +138,27 @@ def device_init(device_selection=None):
thread_data.force_full_precision = False
thread_data.reduced_memory = True
device_selection = device_selection.lower()
if device_selection == 'cpu':
thread_data.device = 'cpu'
thread_data.device_name = get_processor_name()
print('Render device CPU available as', thread_data.device_name)
return
if not torch.cuda.is_available():
if device_selection == 'auto' or device_selection == 'current':
if device_selection == 'auto':
print('WARNING: Could not find a compatible GPU. Using the CPU, but this will be very slow!')
thread_data.device = 'cpu'
thread_data.device_name = get_processor_name()
return
else:
raise EnvironmentError(f'Could not find a compatible GPU for the requested device_selection: {device_selection}!')
device_count = torch.cuda.device_count()
if device_count <= 1 and device_selection == 'auto':
device_selection = 'current' # Use 'auto' only when there is more than one compatible device found.
if device_selection == 'auto':
device_count = torch.cuda.device_count()
if device_count == 1:
device_select(0)
torch.cuda.device(0)
return
print('Autoselecting GPU. Using most free memory.')
max_mem_free = 0
best_device = None
@ -156,46 +167,31 @@ def device_init(device_selection=None):
mem_free /= float(10**9)
mem_total /= float(10**9)
device_name = torch.cuda.get_device_name(device)
print(f'GPU:{device} detected: {device_name} - Memory: {round(mem_total - mem_free, 2)}Go / {round(mem_total, 2)}Go')
print(f'GPU {device} detected: {device_name} - Memory: {round(mem_total - mem_free, 2)}Gb / {round(mem_total, 2)}Gb')
if max_mem_free < mem_free:
max_mem_free = mem_free
best_device = device
if best_device and device_select(device):
print(f'Setting GPU:{device} as active')
if best_device and device_select(best_device):
print(f'Setting GPU {device} as active')
torch.cuda.device(device)
return
if device_selection.startswith('gpu:'):
device_selection = int(device_selection[4:])
if device_select(device_selection):
print(f'Setting GPU {device_selection} as active')
torch.cuda.device(device_selection)
return
if device_selection != 'cuda' and device_selection != 'current' and device_selection != 'gpu':
if device_select(device_selection):
if isinstance(device_selection, int):
print(f'Setting GPU:{device_selection} as active')
else:
print(f'Setting {device_selection} as active')
torch.cuda.device(device_selection)
return
# By default use current device.
print('Checking current GPU...')
device = torch.cuda.current_device()
device_name = torch.cuda.get_device_name(device)
print(f'GPU:{device} detected: {device_name}')
print(f'GPU {device} detected: {device_name}')
if device_select(device):
return
print('WARNING: No compatible GPU found. Using the CPU, but this will be very slow!')
thread_data.device = 'cpu'
thread_data.device_name = get_processor_name()
def is_first_cuda_device(device):
if device is None: return False
if device == 0 or device == '0': return True
if device == 'cuda' or device == 'cuda:0': return True
if device == 'gpu' or device == 'gpu:0': return True
if device == 'current': return True
if device == torch.device(0): return True
return False
def load_model_ckpt():
if not thread_data.ckpt_file: raise ValueError(f'Thread ckpt_file is undefined.')
if not os.path.exists(thread_data.ckpt_file + '.ckpt'): raise FileNotFoundError(f'Cannot find {thread_data.ckpt_file}.ckpt')
@ -209,7 +205,7 @@ def load_model_ckpt():
if thread_data.device == 'cpu':
thread_data.precision = 'full'
print('loading', thread_data.ckpt_file + '.ckpt', 'to', thread_data.device, 'using precision', thread_data.precision)
print('loading', thread_data.ckpt_file + '.ckpt', 'to device', thread_data.device, 'using precision', thread_data.precision)
sd = load_model_from_config(thread_data.ckpt_file + '.ckpt')
li, lo = [], []
for key, value in sd.items():
@ -296,16 +292,26 @@ def load_model_ckpt():
def unload_filters():
if thread_data.model_gfpgan is not None:
if thread_data.device != 'cpu': thread_data.model_gfpgan.gfpgan.to('cpu')
del thread_data.model_gfpgan
thread_data.model_gfpgan = None
if thread_data.model_real_esrgan is not None:
if thread_data.device != 'cpu': thread_data.model_real_esrgan.model.to('cpu')
del thread_data.model_real_esrgan
thread_data.model_real_esrgan = None
def unload_models():
if thread_data.model is not None:
print('Unloading models...')
if thread_data.device != 'cpu':
thread_data.modelFS.to('cpu')
thread_data.modelCS.to('cpu')
thread_data.model.model1.to("cpu")
thread_data.model.model2.to("cpu")
del thread_data.model
del thread_data.modelCS
del thread_data.modelFS
@ -319,7 +325,7 @@ def wait_model_move_to(model, target_device): # Send to target_device and wait u
start_mem = torch.cuda.memory_allocated(thread_data.device) / 1e6
if start_mem <= 0: return
model_name = model.__class__.__name__
print(f'Device:{thread_data.device} - Sending model {model_name} to {target_device} | Memory transfer starting. Memory Used: {round(start_mem)}Mo')
print(f'Device:{thread_data.device} - Sending model {model_name} to {target_device} | Memory transfer starting. Memory Used: {round(start_mem)}Mb')
start_time = time.time()
model.to(target_device)
time_step = start_time
@ -334,15 +340,15 @@ def wait_model_move_to(model, target_device): # Send to target_device and wait u
if not is_transfering:
break;
if time.time() - time_step > WARNING_TIMEOUT: # Long delay, print to console to show activity.
print(f'Device:{thread_data.device} - Waiting for Memory transfer. Memory Used: {round(mem)}Mo, Transfered: {round(start_mem - mem)}Mo')
print(f'Device:{thread_data.device} - Waiting for Memory transfer. Memory Used: {round(mem)}Mb, Transfered: {round(start_mem - mem)}Mb')
time_step = time.time()
print(f'Device:{thread_data.device} - {model_name} Moved: {round(start_mem - last_mem)}Mo in {round(time.time() - start_time, 3)} seconds to {target_device}')
print(f'Device:{thread_data.device} - {model_name} Moved: {round(start_mem - last_mem)}Mb in {round(time.time() - start_time, 3)} seconds to {target_device}')
def load_model_gfpgan():
if thread_data.gfpgan_file is None: raise ValueError(f'Thread gfpgan_file is undefined.')
#print('load_model_gfpgan called without setting gfpgan_file')
#return
if not is_first_cuda_device(thread_data.device):
if thread_data.device != 0:
#TODO Remove when fixed - A bug with GFPGANer and facexlib needs to be fixed before use on other devices.
raise Exception(f'Current device {torch.device(thread_data.device)} is not {torch.device(0)}. Cannot run GFPGANer.')
model_path = thread_data.gfpgan_file + ".pth"
@ -431,15 +437,11 @@ def mk_img(req: Request):
except Exception as e:
print(traceback.format_exc())
if thread_data.reduced_memory:
if thread_data.device != 'cpu':
thread_data.modelFS.to('cpu')
thread_data.modelCS.to('cpu')
thread_data.model.model1.to("cpu")
thread_data.model.model2.to("cpu")
else:
# Model crashed, release all resources in unknown state.
unload_models()
unload_filters()
gc() # Release from memory.
yield json.dumps({
@ -523,6 +525,7 @@ def do_mk_img(req: Request):
if needs_model_reload:
unload_models()
unload_filters()
gc()
load_model_ckpt()
if thread_data.turbo != req.turbo:
@ -715,12 +718,12 @@ def do_mk_img(req: Request):
# Filter Applied, move to next seed
opt_seed += 1
if thread_data.reduced_memory:
unload_filters()
# if thread_data.reduced_memory:
# unload_filters()
del img_data
gc()
if thread_data.device != 'cpu':
print(f'memory_final = {round(torch.cuda.memory_allocated(thread_data.device) / 1e6, 2)}Mo')
print(f'memory_final = {round(torch.cuda.memory_allocated(thread_data.device) / 1e6, 2)}Mb')
print('Task completed')
yield json.dumps(res.json())

View File

@ -227,7 +227,7 @@ def thread_get_next_task():
queued_task.error = Exception('Cpu cannot be used to run this task. Remove GFPGANer filter to run task.')
task = queued_task
break
if not runtime.is_first_cuda_device(runtime.thread_data.device):
if runtime.thread_data.device != 0:
continue # Wait for cuda:0
if queued_task.render_device and runtime.thread_data.device != queued_task.render_device:
# Is asking for a specific render device.
@ -387,24 +387,17 @@ def get_devices():
return devices
def is_first_cuda_device(device):
from . import runtime # When calling runtime from outside thread_render DO NOT USE thread specific attributes or functions.
return runtime.is_first_cuda_device(device)
def is_alive(name=None):
def is_alive(device=None):
if not manager_lock.acquire(blocking=True, timeout=LOCK_TIMEOUT): raise Exception('is_alive' + ERR_LOCK_FAILED)
nbr_alive = 0
try:
for rthread in render_threads:
if name is not None:
if device is not None:
weak_data = weak_thread_data.get(rthread)
if weak_data is None or not 'device' in weak_data or weak_data['device'] is None:
continue
thread_name = str(weak_data['device']).lower()
if is_first_cuda_device(name):
if not is_first_cuda_device(thread_name):
continue
elif thread_name != name:
thread_device = weak_data['device']
if thread_device != device:
continue
if rthread.is_alive():
nbr_alive += 1
@ -418,7 +411,7 @@ def start_render_thread(device='auto'):
try:
rthread = threading.Thread(target=thread_render, kwargs={'device': device})
rthread.daemon = True
rthread.name = THREAD_NAME_PREFIX + device
rthread.name = THREAD_NAME_PREFIX + str(device)
rthread.start()
render_threads.append(rthread)
finally:

View File

@ -56,23 +56,13 @@ NOCACHE_HEADERS={"Cache-Control": "no-cache, no-store, must-revalidate", "Pragma
app.mount('/media', StaticFiles(directory=os.path.join(SD_UI_DIR, 'media')), name="media")
app.mount('/plugins', StaticFiles(directory=UI_PLUGINS_DIR), name="plugins")
config_cached = None
config_last_mod_time = 0
def getConfig(default_val=APP_CONFIG_DEFAULTS):
global config_cached, config_last_mod_time
try:
config_json_path = os.path.join(CONFIG_DIR, 'config.json')
if not os.path.exists(config_json_path):
return default_val
if config_last_mod_time > 0 and config_cached is not None:
# Don't read if file was not modified
mtime = os.path.getmtime(config_json_path)
if mtime <= config_last_mod_time:
return config_cached
with open(config_json_path, 'r', encoding='utf-8') as f:
config_cached = json.load(f)
config_last_mod_time = os.path.getmtime(config_json_path)
return config_cached
return json.load(f)
except Exception as e:
print(str(e))
print(traceback.format_exc())
@ -86,34 +76,10 @@ def setConfig(config):
except:
print(traceback.format_exc())
if 'render_devices' in config:
gpu_devices = list(filter(lambda dev: dev.lower().startswith('gpu') or dev.lower().startswith('cuda'), config['render_devices']))
else:
gpu_devices = []
has_first_cuda_device = False
for device in gpu_devices:
if not task_manager.is_first_cuda_device(device.lower()): continue
has_first_cuda_device = True
break
if len(gpu_devices) > 0 and not has_first_cuda_device:
print('WARNING: GFPGANer only works on GPU:0, use CUDA_VISIBLE_DEVICES if GFPGANer is needed on a specific GPU.')
print('Using CUDA_VISIBLE_DEVICES will remap the selected devices starting at GPU:0 fixing GFPGANer')
try: # config.bat
config_bat = [
f"@set update_branch={config['update_branch']}"
]
if os.getenv('CUDA_VISIBLE_DEVICES') is None:
if len(gpu_devices) > 0 and not has_first_cuda_device:
config_bat.append('::Set the devices visible inside SD-UI here')
config_bat.append(f"::@set CUDA_VISIBLE_DEVICES={','.join(gpu_devices)}") # Needs better detection for edge cases, add as a comment for now.
print('Add the line "@set CUDA_VISIBLE_DEVICES=N" where N is the GPUs to use to config.bat')
else:
config_bat.append(f"@set CUDA_VISIBLE_DEVICES={os.getenv('CUDA_VISIBLE_DEVICES')}")
if len(gpu_devices) > 0 and not has_first_cuda_device:
print('GPU:0 seems to be missing! Validate that CUDA_VISIBLE_DEVICES is set properly.')
config_bat_path = os.path.join(CONFIG_DIR, 'config.bat')
if os.getenv('SD_UI_BIND_PORT') is not None:
@ -121,7 +87,6 @@ def setConfig(config):
if os.getenv('SD_UI_BIND_IP') is not None:
config_bat.append(f"@set SD_UI_BIND_IP={os.getenv('SD_UI_BIND_IP')}")
with open(config_bat_path, 'w', encoding='utf-8') as f:
f.write('\r\n'.join(config_bat))
except Exception as e:
@ -132,22 +97,13 @@ def setConfig(config):
'#!/bin/bash',
f"export update_branch={config['update_branch']}"
]
if os.getenv('CUDA_VISIBLE_DEVICES') is None:
if len(gpu_devices) > 0 and not has_first_cuda_device:
config_sh.append('#Set the devices visible inside SD-UI here')
config_sh.append(f"#CUDA_VISIBLE_DEVICES={','.join(gpu_devices)}") # Needs better detection for edge cases, add as a comment for now.
print('Add the line "CUDA_VISIBLE_DEVICES=N" where N is the GPUs to use to config.sh')
else:
config_sh.append(f"export CUDA_VISIBLE_DEVICES=\"{os.getenv('CUDA_VISIBLE_DEVICES')}\"")
if len(gpu_devices) > 0 and not has_first_cuda_device:
print('GPU:0 seems to be missing! Validate that CUDA_VISIBLE_DEVICES is set properly.')
config_sh_path = os.path.join(CONFIG_DIR, 'config.sh')
if os.getenv('SD_UI_BIND_PORT') is not None:
config_sh.append(f"export SD_UI_BIND_PORT={os.getenv('SD_UI_BIND_PORT')}")
if os.getenv('SD_UI_BIND_IP') is not None:
config_sh.append(f"export SD_UI_BIND_IP={os.getenv('SD_UI_BIND_IP')}")
config_sh_path = os.path.join(CONFIG_DIR, 'config.sh')
with open(config_sh_path, 'w', encoding='utf-8') as f:
f.write('\n'.join(config_sh))
except Exception as e:
@ -205,20 +161,6 @@ async def setAppConfig(req : SetAppConfigRequest):
config = getConfig()
if req.update_branch:
config['update_branch'] = req.update_branch
if req.render_devices and hasattr(req.render_devices, "__len__"): # strings, array of strings or numbers.
render_devices = []
if isinstance(req.render_devices, str):
req.render_devices = req.render_devices.split(',')
if isinstance(req.render_devices, list):
for gpu in req.render_devices:
if isinstance(req.render_devices, int):
render_devices.append('GPU:' + gpu)
else:
render_devices.append(gpu)
if isinstance(req.render_devices, int):
render_devices.append('GPU:' + req.render_devices)
if len(render_devices) > 0:
config['render_devices'] = render_devices
try:
setConfig(config)
return JSONResponse({'status': 'OK'}, headers=NOCACHE_HEADERS)
@ -425,10 +367,8 @@ config = getConfig()
task_manager.default_model_to_load = resolve_ckpt_to_use()
task_manager.default_vae_to_load = resolve_vae_to_use()
if 'render_devices' in config: # Start a new thread for each device.
if isinstance(config['render_devices'], str):
config['render_devices'] = config['render_devices'].split(',')
if not isinstance(config['render_devices'], list):
raise Exception('Invalid render_devices value in config.')
raise Exception('Invalid render_devices value in config. Should be a list')
for device in config['render_devices']:
if task_manager.is_alive(device) >= 1:
print(device, 'already registered.')
@ -443,7 +383,7 @@ if task_manager.is_alive() <= 0: # Either no defaults or no devices after loadin
# Select best GPU device using free memory, if more than one device.
if task_manager.start_render_thread('auto'): # Detect best device for renders
# if cuda:0 is missing, another cuda device is better. try to start it...
if task_manager.is_alive(0) <= 0 and task_manager.is_alive('cpu') <= 0 and not task_manager.start_render_thread('cuda'):
if task_manager.is_alive(0) <= 0 and task_manager.is_alive('cpu') <= 0 and not task_manager.start_render_thread(0):
print('Failed to start GPU:0...')
else:
print('Failed to start gpu device.')
@ -457,7 +397,7 @@ if is_using_a_gpu and task_manager.is_alive(0) <= 0:
print('Add the line "@set CUDA_VISIBLE_DEVICES=N" where N is the GPUs to use to config.bat')
print('Add the line "CUDA_VISIBLE_DEVICES=N" where N is the GPUs to use to config.sh')
# print('active devices', task_manager.get_devices())
print('active devices', task_manager.get_devices()['active'])
# start the browser ui
import webbrowser; webbrowser.open('http://localhost:9000')