Restrict device selection id to 'cpu' or integers (and 'auto' in the initial device selection functions)

2025-08-09 07:45:01 +02:00 · 2022-11-10 20:03:11 +05:30
parent 3f26d03166
commit b9a12d1562
3 changed files with 60 additions and 124 deletions
--- a/ui/sd_internal/runtime.py
+++ b/ui/sd_internal/runtime.py
@ -52,19 +52,22 @@ def get_processor_name():
            return platform.processor()
        elif platform.system() == "Darwin":
            os.environ['PATH'] = os.environ['PATH'] + os.pathsep + '/usr/sbin'
-            command ="sysctl -n machdep.cpu.brand_string"
+            command = "sysctl -n machdep.cpu.brand_string"
            return subprocess.check_output(command).strip()
        elif platform.system() == "Linux":
            command = "cat /proc/cpuinfo"
            all_info = subprocess.check_output(command, shell=True).decode().strip()
            for line in all_info.split("\n"):
                if "model name" in line:
-                    return re.sub( ".*model name.*:", "", line,1).strip()
+                    return re.sub(".*model name.*:", "", line, 1).strip()
    except:
        print(traceback.format_exc())
        return "cpu"

 def device_would_fail(device):
+    if device != 'cpu' and not isinstance(device, int):
+        raise EnvironmentError(f"device_would_fail() only accepts 'cpu', or an integer index for the GPU. Got: {device}")
+
    if device == 'cpu': return None
    # Returns None when no issues found, otherwise returns the detected error str.
    # Memory check
@ -78,12 +81,15 @@ def device_would_fail(device):
    return None

 def device_select(device):
+    if device != 'cpu' and not isinstance(device, int):
+        raise EnvironmentError(f"device_select() only accepts 'cpu', or an integer index for the GPU. Got: {device}")
+
    if device == 'cpu': return True
    if not torch.cuda.is_available(): return False
    failure_msg = device_would_fail(device)
    if failure_msg:
        if 'invalid device' in failure_msg:
-            raise NameError(f'GPU "{device}" could not be found. Remove this device from config.render_devices or use one of "auto" or "cuda".')
+            raise NameError(f'GPU "{device}" could not be found. Remove this device from config.render_devices or use "auto".')
        print(failure_msg)
        return False

@ -100,7 +106,10 @@ def device_select(device):

    return True

-def device_init(device_selection=None):
+def device_init(device_selection):
+    if device_selection not in ['cpu', 'auto'] and not isinstance(device_selection, int):
+        raise EnvironmentError(f"device_init() only accepts 'cpu', 'auto', or an integer index for the GPU. Got: {device_selection}")
+
    # Thread bound properties
    thread_data.stop_processing = False
    thread_data.temp_images = {}
@ -129,25 +138,27 @@ def device_init(device_selection=None):
    thread_data.force_full_precision = False
    thread_data.reduced_memory = True

-    device_selection = device_selection.lower()
-
    if device_selection == 'cpu':
        thread_data.device = 'cpu'
        thread_data.device_name = get_processor_name()
        print('Render device CPU available as', thread_data.device_name)
        return
    if not torch.cuda.is_available():
-        if device_selection == 'auto' or device_selection == 'current':
+        if device_selection == 'auto':
            print('WARNING: Could not find a compatible GPU. Using the CPU, but this will be very slow!')
            thread_data.device = 'cpu'
            thread_data.device_name = get_processor_name()
            return
        else:
            raise EnvironmentError(f'Could not find a compatible GPU for the requested device_selection: {device_selection}!')
-    device_count = torch.cuda.device_count()
-    if device_count <= 1 and device_selection == 'auto':
-        device_selection = 'current' # Use 'auto' only when there is more than one compatible device found.
+
    if device_selection == 'auto':
+        device_count = torch.cuda.device_count()
+        if device_count == 1:
+            device_select(0)
+            torch.cuda.device(0)
+            return
+
        print('Autoselecting GPU. Using most free memory.')
        max_mem_free = 0
        best_device = None
@ -156,46 +167,31 @@ def device_init(device_selection=None):
            mem_free /= float(10**9)
            mem_total /= float(10**9)
            device_name = torch.cuda.get_device_name(device)
-            print(f'GPU:{device} detected: {device_name} - Memory: {round(mem_total - mem_free, 2)}Go / {round(mem_total, 2)}Go')
+            print(f'GPU {device} detected: {device_name} - Memory: {round(mem_total - mem_free, 2)}Gb / {round(mem_total, 2)}Gb')
            if max_mem_free < mem_free:
                max_mem_free = mem_free
                best_device = device
-        if best_device and device_select(device):
-            print(f'Setting GPU:{device} as active')
+        if best_device and device_select(best_device):
+            print(f'Setting GPU {device} as active')
            torch.cuda.device(device)
            return

-    if device_selection.startswith('gpu:'):
-        device_selection = int(device_selection[4:])
+    if device_select(device_selection):
+        print(f'Setting GPU {device_selection} as active')
+        torch.cuda.device(device_selection)
+        return

-    if device_selection != 'cuda' and device_selection != 'current' and device_selection != 'gpu':
-        if device_select(device_selection):
-            if isinstance(device_selection, int):
-                print(f'Setting GPU:{device_selection} as active')
-            else:
-                print(f'Setting {device_selection} as active')
-            torch.cuda.device(device_selection)
-            return
    # By default use current device.
    print('Checking current GPU...')
    device = torch.cuda.current_device()
    device_name = torch.cuda.get_device_name(device)
-    print(f'GPU:{device} detected: {device_name}')
+    print(f'GPU {device} detected: {device_name}')
    if device_select(device):
        return
    print('WARNING: No compatible GPU found. Using the CPU, but this will be very slow!')
    thread_data.device = 'cpu'
    thread_data.device_name = get_processor_name()

-def is_first_cuda_device(device):
-    if device is None: return False
-    if device == 0 or device == '0': return True
-    if device == 'cuda' or device == 'cuda:0': return True
-    if device == 'gpu' or device == 'gpu:0': return True
-    if device == 'current': return True
-    if device == torch.device(0): return True
-    return False
-
 def load_model_ckpt():
    if not thread_data.ckpt_file: raise ValueError(f'Thread ckpt_file is undefined.')
    if not os.path.exists(thread_data.ckpt_file + '.ckpt'): raise FileNotFoundError(f'Cannot find {thread_data.ckpt_file}.ckpt')
@ -209,7 +205,7 @@ def load_model_ckpt():
    if thread_data.device == 'cpu':
        thread_data.precision = 'full'

-    print('loading', thread_data.ckpt_file + '.ckpt', 'to', thread_data.device, 'using precision', thread_data.precision)
+    print('loading', thread_data.ckpt_file + '.ckpt', 'to device', thread_data.device, 'using precision', thread_data.precision)
    sd = load_model_from_config(thread_data.ckpt_file + '.ckpt')
    li, lo = [], []
    for key, value in sd.items():
@ -296,16 +292,26 @@ def load_model_ckpt():

 def unload_filters():
    if thread_data.model_gfpgan is not None:
+        if thread_data.device != 'cpu': thread_data.model_gfpgan.gfpgan.to('cpu')
+
        del thread_data.model_gfpgan
    thread_data.model_gfpgan = None

    if thread_data.model_real_esrgan is not None:
+        if thread_data.device != 'cpu': thread_data.model_real_esrgan.model.to('cpu')
+
        del thread_data.model_real_esrgan
    thread_data.model_real_esrgan = None

 def unload_models():
    if thread_data.model is not None:
        print('Unloading models...')
+        if thread_data.device != 'cpu':
+            thread_data.modelFS.to('cpu')
+            thread_data.modelCS.to('cpu')
+            thread_data.model.model1.to("cpu")
+            thread_data.model.model2.to("cpu")
+
        del thread_data.model
        del thread_data.modelCS
        del thread_data.modelFS
@ -319,7 +325,7 @@ def wait_model_move_to(model, target_device): # Send to target_device and wait u
    start_mem = torch.cuda.memory_allocated(thread_data.device) / 1e6
    if start_mem <= 0: return
    model_name = model.__class__.__name__
-    print(f'Device:{thread_data.device} - Sending model {model_name} to {target_device} | Memory transfer starting. Memory Used: {round(start_mem)}Mo')
+    print(f'Device:{thread_data.device} - Sending model {model_name} to {target_device} | Memory transfer starting. Memory Used: {round(start_mem)}Mb')
    start_time = time.time()
    model.to(target_device)
    time_step = start_time
@ -334,15 +340,15 @@ def wait_model_move_to(model, target_device): # Send to target_device and wait u
        if not is_transfering:
            break;
        if time.time() - time_step > WARNING_TIMEOUT: # Long delay, print to console to show activity.
-            print(f'Device:{thread_data.device} - Waiting for Memory transfer. Memory Used: {round(mem)}Mo, Transfered: {round(start_mem - mem)}Mo')
+            print(f'Device:{thread_data.device} - Waiting for Memory transfer. Memory Used: {round(mem)}Mb, Transfered: {round(start_mem - mem)}Mb')
            time_step = time.time()
-    print(f'Device:{thread_data.device} - {model_name} Moved: {round(start_mem - last_mem)}Mo in {round(time.time() - start_time, 3)} seconds to {target_device}')
+    print(f'Device:{thread_data.device} - {model_name} Moved: {round(start_mem - last_mem)}Mb in {round(time.time() - start_time, 3)} seconds to {target_device}')

 def load_model_gfpgan():
    if thread_data.gfpgan_file is None: raise ValueError(f'Thread gfpgan_file is undefined.')
        #print('load_model_gfpgan called without setting gfpgan_file')
        #return
-    if not is_first_cuda_device(thread_data.device):
+    if thread_data.device != 0:
        #TODO Remove when fixed - A bug with GFPGANer and facexlib needs to be fixed before use on other devices.
        raise Exception(f'Current device {torch.device(thread_data.device)} is not {torch.device(0)}. Cannot run GFPGANer.')
    model_path = thread_data.gfpgan_file + ".pth"
@ -431,15 +437,11 @@ def mk_img(req: Request):
    except Exception as e:
        print(traceback.format_exc())

-        if thread_data.reduced_memory:
+        if thread_data.device != 'cpu':
            thread_data.modelFS.to('cpu')
            thread_data.modelCS.to('cpu')
            thread_data.model.model1.to("cpu")
            thread_data.model.model2.to("cpu")
-        else:
-            # Model crashed, release all resources in unknown state.
-            unload_models()
-            unload_filters()

        gc() # Release from memory.
        yield json.dumps({
@ -523,6 +525,7 @@ def do_mk_img(req: Request):
    if needs_model_reload:
        unload_models()
        unload_filters()
+        gc()
        load_model_ckpt()

    if thread_data.turbo != req.turbo:
@ -715,12 +718,12 @@ def do_mk_img(req: Request):
                        # Filter Applied, move to next seed
                        opt_seed += 1

-                    if thread_data.reduced_memory:
-                        unload_filters()
+                    # if thread_data.reduced_memory:
+                    #     unload_filters()
                    del img_data
                    gc()
                    if thread_data.device != 'cpu':
-                        print(f'memory_final = {round(torch.cuda.memory_allocated(thread_data.device) / 1e6, 2)}Mo')
+                        print(f'memory_final = {round(torch.cuda.memory_allocated(thread_data.device) / 1e6, 2)}Mb')

    print('Task completed')
    yield json.dumps(res.json())
--- a/ui/sd_internal/task_manager.py
+++ b/ui/sd_internal/task_manager.py
@ -227,7 +227,7 @@ def thread_get_next_task():
                    queued_task.error = Exception('Cpu cannot be used to run this task. Remove GFPGANer filter to run task.')
                    task = queued_task
                    break
-                if not runtime.is_first_cuda_device(runtime.thread_data.device):
+                if runtime.thread_data.device != 0:
                    continue  # Wait for cuda:0
            if queued_task.render_device and runtime.thread_data.device != queued_task.render_device:
                # Is asking for a specific render device.
@ -387,24 +387,17 @@ def get_devices():

    return devices

-def is_first_cuda_device(device):
-    from . import runtime # When calling runtime from outside thread_render DO NOT USE thread specific attributes or functions.
-    return runtime.is_first_cuda_device(device)
-
-def is_alive(name=None):
+def is_alive(device=None):
    if not manager_lock.acquire(blocking=True, timeout=LOCK_TIMEOUT): raise Exception('is_alive' + ERR_LOCK_FAILED)
    nbr_alive = 0
    try:
        for rthread in render_threads:
-            if name is not None:
+            if device is not None:
                weak_data = weak_thread_data.get(rthread)
                if weak_data is None or not 'device' in weak_data or weak_data['device'] is None:
                    continue
-                thread_name = str(weak_data['device']).lower()
-                if is_first_cuda_device(name):
-                    if not is_first_cuda_device(thread_name):
-                        continue
-                elif thread_name != name:
+                thread_device = weak_data['device']
+                if thread_device != device:
                    continue
            if rthread.is_alive():
                nbr_alive += 1
@ -418,7 +411,7 @@ def start_render_thread(device='auto'):
    try:
        rthread = threading.Thread(target=thread_render, kwargs={'device': device})
        rthread.daemon = True
-        rthread.name = THREAD_NAME_PREFIX + device
+        rthread.name = THREAD_NAME_PREFIX + str(device)
        rthread.start()
        render_threads.append(rthread)
    finally:
--- a/ui/server.py
+++ b/ui/server.py
@ -56,23 +56,13 @@ NOCACHE_HEADERS={"Cache-Control": "no-cache, no-store, must-revalidate", "Pragma
 app.mount('/media', StaticFiles(directory=os.path.join(SD_UI_DIR, 'media')), name="media")
 app.mount('/plugins', StaticFiles(directory=UI_PLUGINS_DIR), name="plugins")

-config_cached = None
-config_last_mod_time = 0
 def getConfig(default_val=APP_CONFIG_DEFAULTS):
-    global config_cached, config_last_mod_time
    try:
        config_json_path = os.path.join(CONFIG_DIR, 'config.json')
        if not os.path.exists(config_json_path):
            return default_val
-        if config_last_mod_time > 0 and config_cached is not None:
-            # Don't read if file was not modified
-            mtime = os.path.getmtime(config_json_path)
-            if mtime <= config_last_mod_time:
-                return config_cached
        with open(config_json_path, 'r', encoding='utf-8') as f:
-            config_cached = json.load(f)
-        config_last_mod_time = os.path.getmtime(config_json_path)
-        return config_cached
+            return json.load(f)
    except Exception as e:
        print(str(e))
        print(traceback.format_exc())
@ -86,34 +76,10 @@ def setConfig(config):
    except:
        print(traceback.format_exc())

-    if 'render_devices' in config:
-        gpu_devices = list(filter(lambda dev: dev.lower().startswith('gpu') or dev.lower().startswith('cuda'), config['render_devices']))
-    else:
-        gpu_devices = []
-
-    has_first_cuda_device = False
-    for device in gpu_devices:
-        if not task_manager.is_first_cuda_device(device.lower()): continue
-        has_first_cuda_device = True
-        break
-    if len(gpu_devices) > 0 and not has_first_cuda_device:
-        print('WARNING: GFPGANer only works on GPU:0, use CUDA_VISIBLE_DEVICES if GFPGANer is needed on a specific GPU.')
-        print('Using CUDA_VISIBLE_DEVICES will remap the selected devices starting at GPU:0 fixing GFPGANer')
-
    try: # config.bat
        config_bat = [
            f"@set update_branch={config['update_branch']}"
        ]
-
-        if os.getenv('CUDA_VISIBLE_DEVICES') is None:
-            if len(gpu_devices) > 0 and not has_first_cuda_device:
-                config_bat.append('::Set the devices visible inside SD-UI here')
-                config_bat.append(f"::@set CUDA_VISIBLE_DEVICES={','.join(gpu_devices)}") # Needs better detection for edge cases, add as a comment for now.
-                print('Add the line "@set CUDA_VISIBLE_DEVICES=N" where N is the GPUs to use to config.bat')
-        else:
-            config_bat.append(f"@set CUDA_VISIBLE_DEVICES={os.getenv('CUDA_VISIBLE_DEVICES')}")
-            if len(gpu_devices) > 0 and not has_first_cuda_device:
-                print('GPU:0 seems to be missing! Validate that CUDA_VISIBLE_DEVICES is set properly.')
        config_bat_path = os.path.join(CONFIG_DIR, 'config.bat')

        if os.getenv('SD_UI_BIND_PORT') is not None:
@ -121,7 +87,6 @@ def setConfig(config):
        if os.getenv('SD_UI_BIND_IP') is not None:
            config_bat.append(f"@set SD_UI_BIND_IP={os.getenv('SD_UI_BIND_IP')}")

-
        with open(config_bat_path, 'w', encoding='utf-8') as f:
            f.write('\r\n'.join(config_bat))
    except Exception as e:
@ -132,22 +97,13 @@ def setConfig(config):
            '#!/bin/bash',
            f"export update_branch={config['update_branch']}"
        ]
-        if os.getenv('CUDA_VISIBLE_DEVICES') is None:
-            if len(gpu_devices) > 0 and not has_first_cuda_device:
-                config_sh.append('#Set the devices visible inside SD-UI here')
-                config_sh.append(f"#CUDA_VISIBLE_DEVICES={','.join(gpu_devices)}") # Needs better detection for edge cases, add as a comment for now.
-                print('Add the line "CUDA_VISIBLE_DEVICES=N" where N is the GPUs to use to config.sh')
-        else:
-            config_sh.append(f"export CUDA_VISIBLE_DEVICES=\"{os.getenv('CUDA_VISIBLE_DEVICES')}\"")
-            if len(gpu_devices) > 0 and not has_first_cuda_device:
-                print('GPU:0 seems to be missing! Validate that CUDA_VISIBLE_DEVICES is set properly.')
+        config_sh_path = os.path.join(CONFIG_DIR, 'config.sh')

        if os.getenv('SD_UI_BIND_PORT') is not None:
            config_sh.append(f"export SD_UI_BIND_PORT={os.getenv('SD_UI_BIND_PORT')}")
        if os.getenv('SD_UI_BIND_IP') is not None:
            config_sh.append(f"export SD_UI_BIND_IP={os.getenv('SD_UI_BIND_IP')}")

-        config_sh_path = os.path.join(CONFIG_DIR, 'config.sh')
        with open(config_sh_path, 'w', encoding='utf-8') as f:
            f.write('\n'.join(config_sh))
    except Exception as e:
@ -205,20 +161,6 @@ async def setAppConfig(req : SetAppConfigRequest):
    config = getConfig()
    if req.update_branch:
        config['update_branch'] = req.update_branch
-    if req.render_devices and hasattr(req.render_devices, "__len__"): # strings, array of strings or numbers.
-        render_devices = []
-        if isinstance(req.render_devices, str):
-            req.render_devices = req.render_devices.split(',')
-        if isinstance(req.render_devices, list):
-            for gpu in req.render_devices:
-                if isinstance(req.render_devices, int):
-                    render_devices.append('GPU:' + gpu)
-                else:
-                    render_devices.append(gpu)
-        if isinstance(req.render_devices, int):
-            render_devices.append('GPU:' + req.render_devices)
-        if len(render_devices) > 0:
-            config['render_devices'] = render_devices
    try:
        setConfig(config)
        return JSONResponse({'status': 'OK'}, headers=NOCACHE_HEADERS)
@ -425,10 +367,8 @@ config = getConfig()
 task_manager.default_model_to_load = resolve_ckpt_to_use()
 task_manager.default_vae_to_load = resolve_vae_to_use()
 if 'render_devices' in config:  # Start a new thread for each device.
-    if isinstance(config['render_devices'], str):
-        config['render_devices'] = config['render_devices'].split(',')
    if not isinstance(config['render_devices'], list):
-        raise Exception('Invalid render_devices value in config.')
+        raise Exception('Invalid render_devices value in config. Should be a list')
    for device in config['render_devices']:
        if task_manager.is_alive(device) >= 1:
            print(device, 'already registered.')
@ -443,7 +383,7 @@ if task_manager.is_alive() <= 0: # Either no defaults or no devices after loadin
    # Select best GPU device using free memory, if more than one device.
    if task_manager.start_render_thread('auto'): # Detect best device for renders
        # if cuda:0 is missing, another cuda device is better. try to start it...
-        if task_manager.is_alive(0) <= 0 and task_manager.is_alive('cpu') <= 0 and not task_manager.start_render_thread('cuda'):
+        if task_manager.is_alive(0) <= 0 and task_manager.is_alive('cpu') <= 0 and not task_manager.start_render_thread(0):
            print('Failed to start GPU:0...')
    else:
        print('Failed to start gpu device.')
@ -457,7 +397,7 @@ if is_using_a_gpu and task_manager.is_alive(0) <= 0:
    print('Add the line "@set CUDA_VISIBLE_DEVICES=N" where N is the GPUs to use to config.bat')
    print('Add the line "CUDA_VISIBLE_DEVICES=N" where N is the GPUs to use to config.sh')

-# print('active devices', task_manager.get_devices())
+print('active devices', task_manager.get_devices()['active'])

 # start the browser ui
 import webbrowser; webbrowser.open('http://localhost:9000')